Add GPU profiling (#304)

* Add GPU profiling

* Fix conditional compilation for `headless`

* Add full profiling

* Productionise

* Fix MacOS trace file

* Try to make it easier to tell what's important

* Resolve CI issues
This commit is contained in:
Daniel McNab 2023-06-01 16:10:27 +01:00 committed by GitHub
parent 03545e5d9a
commit 6d57093cc2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 336 additions and 16 deletions

1
.gitignore vendored
View file

@ -1,5 +1,6 @@
/target /target
Cargo.lock Cargo.lock
/trace.json
examples/assets/downloads/* examples/assets/downloads/*
!examples/assets/downloads/.tracked !examples/assets/downloads/.tracked

View file

@ -50,6 +50,7 @@ futures-intrusive = "0.5.0"
parking_lot = "0.12" parking_lot = "0.12"
smallvec = "1.8.0" smallvec = "1.8.0"
vello_encoding = { path = "crates/encoding" } vello_encoding = { path = "crates/encoding" }
wgpu-profiler = { workspace = true, optional = true }
[workspace.dependencies] [workspace.dependencies]
bytemuck = { version = "1.12.1", features = ["derive"] } bytemuck = { version = "1.12.1", features = ["derive"] }
@ -57,8 +58,10 @@ fello = { git = "https://github.com/dfrg/fount", rev = "58a284eaae67512fb61cf761
peniko = { git = "https://github.com/linebender/peniko", rev = "cafdac9a211a0fb2fec5656bd663d1ac770bcc81" } peniko = { git = "https://github.com/linebender/peniko", rev = "cafdac9a211a0fb2fec5656bd663d1ac770bcc81" }
wgpu = "0.16" # NOTE: Make sure to keep this in sync with the version badge in README.md wgpu = "0.16" # NOTE: Make sure to keep this in sync with the version badge in README.md
# Used for examples # Used for examples
clap = "4.1.0" clap = "4.1.0"
anyhow = "1.0" anyhow = "1.0"
instant = { version = "0.1.12", features = [ "wasm-bindgen" ] } instant = { version = "0.1.12", features = ["wasm-bindgen"] }
pollster = "0.3.0" pollster = "0.3.0"
wgpu-profiler = "0.12.1"

View file

@ -89,6 +89,7 @@ async fn render(mut scenes: SceneSet, index: usize, args: &Args) -> Result<()> {
device, device,
&RendererOptions { &RendererOptions {
surface_format: None, surface_format: None,
timestamp_period: queue.get_timestamp_period(),
}, },
) )
.or_else(|_| bail!("Got non-Send/Sync error from creating renderer"))?; .or_else(|_| bail!("Got non-Send/Sync error from creating renderer"))?;

View file

@ -22,11 +22,14 @@ struct VelloRenderer(Renderer);
impl FromWorld for VelloRenderer { impl FromWorld for VelloRenderer {
fn from_world(world: &mut World) -> Self { fn from_world(world: &mut World) -> Self {
let device = world.get_resource::<RenderDevice>().unwrap(); let device = world.get_resource::<RenderDevice>().unwrap();
let queue = world.get_resource::<RenderQueue>().unwrap();
VelloRenderer( VelloRenderer(
Renderer::new( Renderer::new(
device.wgpu_device(), device.wgpu_device(),
&RendererOptions { &RendererOptions {
surface_format: None, surface_format: None,
timestamp_period: queue.0.get_timestamp_period(),
}, },
) )
.unwrap(), .unwrap(),

View file

@ -20,12 +20,13 @@ name = "with_winit_bin"
path = "src/main.rs" path = "src/main.rs"
[dependencies] [dependencies]
vello = { path = "../../", features = ["buffer_labels"] } vello = { path = "../../", features = ["buffer_labels", "wgpu-profiler"] }
scenes = { path = "../scenes" } scenes = { path = "../scenes" }
anyhow = { workspace = true } anyhow = { workspace = true }
clap = { workspace = true, features = ["derive"] } clap = { workspace = true, features = ["derive"] }
instant = { workspace = true } instant = { workspace = true }
pollster = { workspace = true } pollster = { workspace = true }
wgpu-profiler = { workspace = true }
wgpu = { workspace = true } wgpu = { workspace = true }
winit = "0.28.1" winit = "0.28.1"

View file

@ -14,7 +14,7 @@
// //
// Also licensed under MIT license, at your choice. // Also licensed under MIT license, at your choice.
use instant::Instant; use instant::{Duration, Instant};
use std::collections::HashSet; use std::collections::HashSet;
use anyhow::Result; use anyhow::Result;
@ -83,6 +83,7 @@ fn run(
&render_cx.devices[id].device, &render_cx.devices[id].device,
&RendererOptions { &RendererOptions {
surface_format: Some(render_state.surface.format), surface_format: Some(render_state.surface.format),
timestamp_period: render_cx.devices[id].queue.get_timestamp_period(),
}, },
) )
.expect("Could create renderer"), .expect("Could create renderer"),
@ -121,7 +122,9 @@ fn run(
if let Some(set_scene) = args.scene { if let Some(set_scene) = args.scene {
scene_ix = set_scene; scene_ix = set_scene;
} }
let mut profile_stored = None;
let mut prev_scene_ix = scene_ix - 1; let mut prev_scene_ix = scene_ix - 1;
let mut profile_taken = Instant::now();
// _event_loop is used on non-wasm platforms to create new windows // _event_loop is used on non-wasm platforms to create new windows
event_loop.run(move |event, _event_loop, control_flow| match event { event_loop.run(move |event, _event_loop, control_flow| match event {
Event::WindowEvent { Event::WindowEvent {
@ -163,6 +166,29 @@ fn run(
Some(VirtualKeyCode::C) => { Some(VirtualKeyCode::C) => {
stats.clear_min_and_max(); stats.clear_min_and_max();
} }
Some(VirtualKeyCode::P) => {
if let Some(renderer) = &renderers[render_state.surface.dev_id] {
if let Some(profile_result) = &renderer
.profile_result
.as_ref()
.or(profile_stored.as_ref())
{
// There can be empty results if the required features aren't supported
if !profile_result.is_empty() {
let path = std::path::Path::new("trace.json");
match wgpu_profiler::chrometrace::write_chrometrace(
path,
profile_result,
) {
Ok(()) => {
println!("Wrote trace to path {path:?}")
}
Err(e) => eprintln!("Failed to write trace {e}"),
}
}
}
}
}
Some(VirtualKeyCode::V) => { Some(VirtualKeyCode::V) => {
vsync_on = !vsync_on; vsync_on = !vsync_on;
render_cx.set_present_mode( render_cx.set_present_mode(
@ -342,6 +368,25 @@ fn run(
complexity_shown.then_some(scene_complexity).flatten(), complexity_shown.then_some(scene_complexity).flatten(),
vsync_on, vsync_on,
); );
if let Some(profiling_result) = renderers[render_state.surface.dev_id]
.as_mut()
.and_then(|it| it.profile_result.take())
{
if profile_stored.is_none() || profile_taken.elapsed() > Duration::from_secs(1)
{
profile_stored = Some(profiling_result);
profile_taken = Instant::now();
}
}
if let Some(profiling_result) = profile_stored.as_ref() {
stats::draw_gpu_profiling(
&mut builder,
scene_params.text,
width as f64,
height as f64,
profiling_result,
)
}
} }
let surface_texture = render_state let surface_texture = render_state
.surface .surface
@ -438,6 +483,9 @@ fn run(
&render_cx.devices[id].device, &render_cx.devices[id].device,
&RendererOptions { &RendererOptions {
surface_format: Some(render_state.surface.format), surface_format: Some(render_state.surface.format),
timestamp_period: render_cx.devices[id]
.queue
.get_timestamp_period(),
}, },
) )
.expect("Could create renderer") .expect("Could create renderer")

View file

@ -15,12 +15,13 @@
// Also licensed under MIT license, at your choice. // Also licensed under MIT license, at your choice.
use scenes::SimpleText; use scenes::SimpleText;
use std::collections::VecDeque; use std::{collections::VecDeque, time::Duration};
use vello::{ use vello::{
kurbo::{Affine, PathEl, Rect}, kurbo::{Affine, Line, PathEl, Rect},
peniko::{Brush, Color, Fill, Stroke}, peniko::{Brush, Color, Fill, Stroke},
BumpAllocators, SceneBuilder, BumpAllocators, SceneBuilder,
}; };
use wgpu_profiler::GpuTimerScopeResult;
const SLIDING_WINDOW_SIZE: usize = 100; const SLIDING_WINDOW_SIZE: usize = 100;
@ -247,3 +248,204 @@ impl Stats {
fn round_up(n: usize, f: usize) -> usize { fn round_up(n: usize, f: usize) -> usize {
n - 1 - (n - 1) % f + f n - 1 - (n - 1) % f + f
} }
const COLORS: &[Color] = &[
Color::AQUA,
Color::RED,
Color::ALICE_BLUE,
Color::YELLOW,
Color::GREEN,
Color::BLUE,
Color::ORANGE,
Color::WHITE,
];
pub fn draw_gpu_profiling(
sb: &mut SceneBuilder,
text: &mut SimpleText,
viewport_width: f64,
viewport_height: f64,
profiles: &[GpuTimerScopeResult],
) {
if profiles.is_empty() {
return;
}
let width = (viewport_width * 0.3).clamp(150., 450.);
let height = width * 1.5;
let y_offset = viewport_height - height;
let offset = Affine::translate((0., y_offset));
// Draw the background
sb.fill(
Fill::NonZero,
offset,
&Brush::Solid(Color::rgba8(0, 0, 0, 200)),
None,
&Rect::new(0., 0., width, height),
);
// Find the range of the samples, so we can normalise them
let mut min = f64::MAX;
let mut max = f64::MIN;
let mut max_depth = 0;
let mut depth = 0;
let mut count = 0;
traverse_profiling(profiles, &mut |profile, stage| {
match stage {
TraversalStage::Enter => {
count += 1;
min = min.min(profile.time.start);
max = max.max(profile.time.end);
max_depth = max_depth.max(depth);
// Apply a higher depth to the children
depth += 1;
}
TraversalStage::Leave => depth -= 1,
}
});
let total_time = max - min;
{
let labels = [
format!("GPU Time: {:.2?}", Duration::from_secs_f64(total_time)),
"Press P to save a trace".to_string(),
];
// height / 5 is dedicated to the text labels and the rest is filled by the frame time.
let text_height = height * 0.2 / (1 + labels.len()) as f64;
let left_margin = width * 0.01;
let text_size = (text_height * 0.9) as f32;
for (i, label) in labels.iter().enumerate() {
text.add(
sb,
None,
text_size,
Some(&Brush::Solid(Color::WHITE)),
offset * Affine::translate((left_margin, (i + 1) as f64 * text_height)),
label,
);
}
let text_size = (text_height * 0.9) as f32;
for (i, label) in labels.iter().enumerate() {
text.add(
sb,
None,
text_size,
Some(&Brush::Solid(Color::WHITE)),
offset * Affine::translate((left_margin, (i + 1) as f64 * text_height)),
label,
);
}
}
let timeline_start_y = height * 0.21;
let timeline_range_y = height * 0.78;
let timeline_range_end = timeline_start_y + timeline_range_y;
// Add 6 items worth of margin
let text_height = timeline_range_y / (6 + count) as f64;
let left_margin = width * 0.35;
let mut cur_text_y = timeline_start_y;
let mut cur_index = 0;
let mut depth = 0;
// Leave 1 bar's worth of margin
let depth_width = width * 0.28 / (max_depth + 1) as f64;
let depth_size = depth_width * 0.8;
traverse_profiling(profiles, &mut |profile, stage| {
if let TraversalStage::Enter = stage {
let start_normalised =
((profile.time.start - min) / total_time) * timeline_range_y + timeline_start_y;
let end_normalised =
((profile.time.end - min) / total_time) * timeline_range_y + timeline_start_y;
let color = COLORS[cur_index % COLORS.len()];
let x = width * 0.01 + (depth as f64 * depth_width);
sb.fill(
Fill::NonZero,
offset,
&Brush::Solid(color),
None,
&Rect::new(x, start_normalised, x + depth_size, end_normalised),
);
let mut text_start = start_normalised;
let nested = !profile.nested_scopes.is_empty();
if nested {
// If we have children, leave some more space for them
text_start -= text_height * 0.7;
}
let this_time = profile.time.end - profile.time.start;
// Highlight as important if more than 10% of the total time, or more than 1ms
let slow = this_time * 20. >= total_time || this_time >= 0.001;
let text_y = text_start
// Ensure that we don't overlap the previous item
.max(cur_text_y)
// Ensure that all remaining items can fit
.min(timeline_range_end - (count - cur_index) as f64 * text_height);
let (text_height, text_color) = if slow {
(text_height, Color::WHITE)
} else {
(text_height * 0.6, Color::LIGHT_GRAY)
};
let text_size = (text_height * 0.9) as f32;
// Text is specified by the baseline, but the y positions all refer to the top of the text
cur_text_y = text_y + text_height;
let label = format!(
"{:.2?} - {:.30}",
Duration::from_secs_f64(this_time),
profile.label
);
sb.fill(
Fill::NonZero,
offset,
&Brush::Solid(color),
None,
&Rect::new(
width * 0.31,
cur_text_y - text_size as f64 * 0.7,
width * 0.34,
cur_text_y,
),
);
text.add(
sb,
None,
text_size,
Some(&Brush::Solid(text_color)),
offset * Affine::translate((left_margin, cur_text_y)),
&label,
);
if !nested && slow {
sb.stroke(
&Stroke::new(2.),
offset,
&Brush::Solid(color),
None,
&Line::new(
(x + depth_size, (end_normalised + start_normalised) / 2.),
(width * 0.31, cur_text_y - text_size as f64 * 0.35),
),
);
}
cur_index += 1;
// Higher depth applies only to the children
depth += 1;
} else {
depth -= 1;
}
});
}
enum TraversalStage {
Enter,
Leave,
}
fn traverse_profiling(
profiles: &[GpuTimerScopeResult],
callback: &mut impl FnMut(&GpuTimerScopeResult, TraversalStage),
) {
for profile in profiles {
callback(profile, TraversalStage::Enter);
traverse_profiling(&profile.nested_scopes, &mut *callback);
callback(profile, TraversalStage::Leave);
}
}

View file

@ -22,8 +22,8 @@ use std::{
}; };
use wgpu::{ use wgpu::{
BindGroup, BindGroupLayout, Buffer, BufferUsages, ComputePipeline, Device, Queue, Texture, BindGroup, BindGroupLayout, Buffer, BufferUsages, CommandEncoderDescriptor, ComputePipeline,
TextureAspect, TextureUsages, TextureView, TextureViewDimension, Device, Queue, Texture, TextureAspect, TextureUsages, TextureView, TextureViewDimension,
}; };
pub type Error = Box<dyn std::error::Error>; pub type Error = Box<dyn std::error::Error>;
@ -46,6 +46,7 @@ pub struct Engine {
struct Shader { struct Shader {
pipeline: ComputePipeline, pipeline: ComputePipeline,
bind_group_layout: BindGroupLayout, bind_group_layout: BindGroupLayout,
label: &'static str,
} }
#[derive(Default)] #[derive(Default)]
@ -238,6 +239,7 @@ impl Engine {
let shader = Shader { let shader = Shader {
pipeline, pipeline,
bind_group_layout, bind_group_layout,
label,
}; };
let id = self.shaders.len(); let id = self.shaders.len();
self.shaders.push(shader); self.shaders.push(shader);
@ -250,11 +252,16 @@ impl Engine {
queue: &Queue, queue: &Queue,
recording: &Recording, recording: &Recording,
external_resources: &[ExternalResource], external_resources: &[ExternalResource],
label: &'static str,
#[cfg(feature = "wgpu-profiler")] profiler: &mut wgpu_profiler::GpuProfiler,
) -> Result<(), Error> { ) -> Result<(), Error> {
let mut free_bufs: HashSet<Id> = Default::default(); let mut free_bufs: HashSet<Id> = Default::default();
let mut free_images: HashSet<Id> = Default::default(); let mut free_images: HashSet<Id> = Default::default();
let mut encoder = device.create_command_encoder(&Default::default()); let mut encoder =
device.create_command_encoder(&CommandEncoderDescriptor { label: Some(label) });
#[cfg(feature = "wgpu-profiler")]
profiler.begin_scope(label, &mut encoder, device);
for command in &recording.commands { for command in &recording.commands {
match command { match command {
Command::Upload(buf_proxy, bytes) => { Command::Upload(buf_proxy, bytes) => {
@ -366,9 +373,13 @@ impl Engine {
&mut self.pool, &mut self.pool,
)?; )?;
let mut cpass = encoder.begin_compute_pass(&Default::default()); let mut cpass = encoder.begin_compute_pass(&Default::default());
#[cfg(feature = "wgpu-profiler")]
profiler.begin_scope(shader.label, &mut cpass, device);
cpass.set_pipeline(&shader.pipeline); cpass.set_pipeline(&shader.pipeline);
cpass.set_bind_group(0, &bind_group, &[]); cpass.set_bind_group(0, &bind_group, &[]);
cpass.dispatch_workgroups(wg_size.0, wg_size.1, wg_size.2); cpass.dispatch_workgroups(wg_size.0, wg_size.1, wg_size.2);
#[cfg(feature = "wgpu-profiler")]
profiler.end_scope(&mut cpass);
} }
Command::Download(proxy) => { Command::Download(proxy) => {
let src_buf = self let src_buf = self
@ -407,6 +418,8 @@ impl Engine {
} }
} }
} }
#[cfg(feature = "wgpu-profiler")]
profiler.end_scope(&mut encoder);
queue.submit(Some(encoder.finish())); queue.submit(Some(encoder.finish()));
for id in free_bufs { for id in free_bufs {
if let Some(buf) = self.bind_map.buf_map.remove(&id) { if let Some(buf) = self.bind_map.buf_map.remove(&id) {

View file

@ -40,6 +40,8 @@ use shaders::FullShaders;
/// Temporary export, used in with_winit for stats /// Temporary export, used in with_winit for stats
pub use vello_encoding::BumpAllocators; pub use vello_encoding::BumpAllocators;
use wgpu::{Device, Queue, SurfaceTexture, TextureFormat, TextureView}; use wgpu::{Device, Queue, SurfaceTexture, TextureFormat, TextureView};
#[cfg(feature = "wgpu-profiler")]
use wgpu_profiler::GpuProfiler;
/// Catch-all error type. /// Catch-all error type.
pub type Error = Box<dyn std::error::Error>; pub type Error = Box<dyn std::error::Error>;
@ -53,6 +55,10 @@ pub struct Renderer {
shaders: FullShaders, shaders: FullShaders,
blit: Option<BlitPipeline>, blit: Option<BlitPipeline>,
target: Option<TargetTexture>, target: Option<TargetTexture>,
#[cfg(feature = "wgpu-profiler")]
profiler: GpuProfiler,
#[cfg(feature = "wgpu-profiler")]
pub profile_result: Option<Vec<wgpu_profiler::GpuTimerScopeResult>>,
} }
/// Parameters used in a single render that are configurable by the client. /// Parameters used in a single render that are configurable by the client.
@ -70,6 +76,9 @@ pub struct RendererOptions {
/// The format of the texture used for surfaces with this renderer/device /// The format of the texture used for surfaces with this renderer/device
/// If None, the renderer cannot be used with surfaces /// If None, the renderer cannot be used with surfaces
pub surface_format: Option<TextureFormat>, pub surface_format: Option<TextureFormat>,
/// The timestamp period from [`wgpu::Queue::get_timestamp_period`]
/// Used when the wgpu-profiler feature is enabled
pub timestamp_period: f32,
} }
impl Renderer { impl Renderer {
@ -85,6 +94,11 @@ impl Renderer {
shaders, shaders,
blit, blit,
target: None, target: None,
// Use 3 pending frames
#[cfg(feature = "wgpu-profiler")]
profiler: GpuProfiler::new(3, render_options.timestamp_period, device.features()),
#[cfg(feature = "wgpu-profiler")]
profile_result: None,
}) })
} }
@ -106,8 +120,15 @@ impl Renderer {
*target.as_image().unwrap(), *target.as_image().unwrap(),
texture, texture,
)]; )];
self.engine self.engine.run_recording(
.run_recording(device, queue, &recording, &external_resources)?; device,
queue,
&recording,
&external_resources,
"render_to_texture",
#[cfg(feature = "wgpu-profiler")]
&mut self.profiler,
)?;
Ok(()) Ok(())
} }
@ -217,7 +238,15 @@ impl Renderer {
let recording = render.render_encoding_coarse(encoding, &self.shaders, params, true); let recording = render.render_encoding_coarse(encoding, &self.shaders, params, true);
let target = render.out_image(); let target = render.out_image();
let bump_buf = render.bump_buf(); let bump_buf = render.bump_buf();
self.engine.run_recording(device, queue, &recording, &[])?; self.engine.run_recording(
device,
queue,
&recording,
&[],
"t_async_coarse",
#[cfg(feature = "wgpu-profiler")]
&mut self.profiler,
)?;
let mut bump: Option<BumpAllocators> = None; let mut bump: Option<BumpAllocators> = None;
if let Some(bump_buf) = self.engine.get_download(bump_buf) { if let Some(bump_buf) = self.engine.get_download(bump_buf) {
@ -239,8 +268,15 @@ impl Renderer {
let mut recording = Recording::default(); let mut recording = Recording::default();
render.record_fine(&self.shaders, &mut recording); render.record_fine(&self.shaders, &mut recording);
let external_resources = [ExternalResource::Image(target, texture)]; let external_resources = [ExternalResource::Image(target, texture)];
self.engine self.engine.run_recording(
.run_recording(device, queue, &recording, &external_resources)?; device,
queue,
&recording,
&external_resources,
"t_async_fine",
#[cfg(feature = "wgpu-profiler")]
&mut self.profiler,
)?;
Ok(bump) Ok(bump)
} }
@ -301,8 +337,16 @@ impl Renderer {
render_pass.set_bind_group(0, &bind_group, &[]); render_pass.set_bind_group(0, &bind_group, &[]);
render_pass.draw(0..6, 0..1); render_pass.draw(0..6, 0..1);
} }
#[cfg(feature = "wgpu-profiler")]
self.profiler.resolve_queries(&mut encoder);
queue.submit(Some(encoder.finish())); queue.submit(Some(encoder.finish()));
self.target = Some(target); self.target = Some(target);
#[cfg(feature = "wgpu-profiler")]
self.profiler.end_frame().unwrap();
#[cfg(feature = "wgpu-profiler")]
if let Some(result) = self.profiler.process_finished_frame() {
self.profile_result = Some(result);
}
Ok(bump) Ok(bump)
} }
} }

View file

@ -134,12 +134,16 @@ impl RenderContext {
.await?; .await?;
let features = adapter.features(); let features = adapter.features();
let limits = Limits::default(); let limits = Limits::default();
let mut maybe_features = wgpu::Features::CLEAR_TEXTURE;
#[cfg(feature = "wgpu-profiler")]
{
maybe_features |= wgpu_profiler::GpuProfiler::ALL_WGPU_TIMER_FEATURES;
};
let (device, queue) = adapter let (device, queue) = adapter
.request_device( .request_device(
&wgpu::DeviceDescriptor { &wgpu::DeviceDescriptor {
label: None, label: None,
features: features features: features & maybe_features,
& (wgpu::Features::TIMESTAMP_QUERY | wgpu::Features::CLEAR_TEXTURE),
limits, limits,
}, },
None, None,