mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-23 09:56:34 +11:00
Add GPU profiling (#304)
* Add GPU profiling * Fix conditional compilation for `headless` * Add full profiling * Productionise * Fix MacOS trace file * Try to make it easier to tell what's important * Resolve CI issues
This commit is contained in:
parent
03545e5d9a
commit
6d57093cc2
10 changed files with 336 additions and 16 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,5 +1,6 @@
|
||||||
/target
|
/target
|
||||||
Cargo.lock
|
Cargo.lock
|
||||||
|
/trace.json
|
||||||
|
|
||||||
examples/assets/downloads/*
|
examples/assets/downloads/*
|
||||||
!examples/assets/downloads/.tracked
|
!examples/assets/downloads/.tracked
|
||||||
|
|
|
@ -50,6 +50,7 @@ futures-intrusive = "0.5.0"
|
||||||
parking_lot = "0.12"
|
parking_lot = "0.12"
|
||||||
smallvec = "1.8.0"
|
smallvec = "1.8.0"
|
||||||
vello_encoding = { path = "crates/encoding" }
|
vello_encoding = { path = "crates/encoding" }
|
||||||
|
wgpu-profiler = { workspace = true, optional = true }
|
||||||
|
|
||||||
[workspace.dependencies]
|
[workspace.dependencies]
|
||||||
bytemuck = { version = "1.12.1", features = ["derive"] }
|
bytemuck = { version = "1.12.1", features = ["derive"] }
|
||||||
|
@ -57,8 +58,10 @@ fello = { git = "https://github.com/dfrg/fount", rev = "58a284eaae67512fb61cf761
|
||||||
peniko = { git = "https://github.com/linebender/peniko", rev = "cafdac9a211a0fb2fec5656bd663d1ac770bcc81" }
|
peniko = { git = "https://github.com/linebender/peniko", rev = "cafdac9a211a0fb2fec5656bd663d1ac770bcc81" }
|
||||||
wgpu = "0.16" # NOTE: Make sure to keep this in sync with the version badge in README.md
|
wgpu = "0.16" # NOTE: Make sure to keep this in sync with the version badge in README.md
|
||||||
|
|
||||||
|
|
||||||
# Used for examples
|
# Used for examples
|
||||||
clap = "4.1.0"
|
clap = "4.1.0"
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
instant = { version = "0.1.12", features = [ "wasm-bindgen" ] }
|
instant = { version = "0.1.12", features = ["wasm-bindgen"] }
|
||||||
pollster = "0.3.0"
|
pollster = "0.3.0"
|
||||||
|
wgpu-profiler = "0.12.1"
|
||||||
|
|
|
@ -89,6 +89,7 @@ async fn render(mut scenes: SceneSet, index: usize, args: &Args) -> Result<()> {
|
||||||
device,
|
device,
|
||||||
&RendererOptions {
|
&RendererOptions {
|
||||||
surface_format: None,
|
surface_format: None,
|
||||||
|
timestamp_period: queue.get_timestamp_period(),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.or_else(|_| bail!("Got non-Send/Sync error from creating renderer"))?;
|
.or_else(|_| bail!("Got non-Send/Sync error from creating renderer"))?;
|
||||||
|
|
|
@ -22,11 +22,14 @@ struct VelloRenderer(Renderer);
|
||||||
impl FromWorld for VelloRenderer {
|
impl FromWorld for VelloRenderer {
|
||||||
fn from_world(world: &mut World) -> Self {
|
fn from_world(world: &mut World) -> Self {
|
||||||
let device = world.get_resource::<RenderDevice>().unwrap();
|
let device = world.get_resource::<RenderDevice>().unwrap();
|
||||||
|
let queue = world.get_resource::<RenderQueue>().unwrap();
|
||||||
|
|
||||||
VelloRenderer(
|
VelloRenderer(
|
||||||
Renderer::new(
|
Renderer::new(
|
||||||
device.wgpu_device(),
|
device.wgpu_device(),
|
||||||
&RendererOptions {
|
&RendererOptions {
|
||||||
surface_format: None,
|
surface_format: None,
|
||||||
|
timestamp_period: queue.0.get_timestamp_period(),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.unwrap(),
|
.unwrap(),
|
||||||
|
|
|
@ -20,12 +20,13 @@ name = "with_winit_bin"
|
||||||
path = "src/main.rs"
|
path = "src/main.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
vello = { path = "../../", features = ["buffer_labels"] }
|
vello = { path = "../../", features = ["buffer_labels", "wgpu-profiler"] }
|
||||||
scenes = { path = "../scenes" }
|
scenes = { path = "../scenes" }
|
||||||
anyhow = { workspace = true }
|
anyhow = { workspace = true }
|
||||||
clap = { workspace = true, features = ["derive"] }
|
clap = { workspace = true, features = ["derive"] }
|
||||||
instant = { workspace = true }
|
instant = { workspace = true }
|
||||||
pollster = { workspace = true }
|
pollster = { workspace = true }
|
||||||
|
wgpu-profiler = { workspace = true }
|
||||||
|
|
||||||
wgpu = { workspace = true }
|
wgpu = { workspace = true }
|
||||||
winit = "0.28.1"
|
winit = "0.28.1"
|
||||||
|
|
|
@ -14,7 +14,7 @@
|
||||||
//
|
//
|
||||||
// Also licensed under MIT license, at your choice.
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
use instant::Instant;
|
use instant::{Duration, Instant};
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
@ -83,6 +83,7 @@ fn run(
|
||||||
&render_cx.devices[id].device,
|
&render_cx.devices[id].device,
|
||||||
&RendererOptions {
|
&RendererOptions {
|
||||||
surface_format: Some(render_state.surface.format),
|
surface_format: Some(render_state.surface.format),
|
||||||
|
timestamp_period: render_cx.devices[id].queue.get_timestamp_period(),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.expect("Could create renderer"),
|
.expect("Could create renderer"),
|
||||||
|
@ -121,7 +122,9 @@ fn run(
|
||||||
if let Some(set_scene) = args.scene {
|
if let Some(set_scene) = args.scene {
|
||||||
scene_ix = set_scene;
|
scene_ix = set_scene;
|
||||||
}
|
}
|
||||||
|
let mut profile_stored = None;
|
||||||
let mut prev_scene_ix = scene_ix - 1;
|
let mut prev_scene_ix = scene_ix - 1;
|
||||||
|
let mut profile_taken = Instant::now();
|
||||||
// _event_loop is used on non-wasm platforms to create new windows
|
// _event_loop is used on non-wasm platforms to create new windows
|
||||||
event_loop.run(move |event, _event_loop, control_flow| match event {
|
event_loop.run(move |event, _event_loop, control_flow| match event {
|
||||||
Event::WindowEvent {
|
Event::WindowEvent {
|
||||||
|
@ -163,6 +166,29 @@ fn run(
|
||||||
Some(VirtualKeyCode::C) => {
|
Some(VirtualKeyCode::C) => {
|
||||||
stats.clear_min_and_max();
|
stats.clear_min_and_max();
|
||||||
}
|
}
|
||||||
|
Some(VirtualKeyCode::P) => {
|
||||||
|
if let Some(renderer) = &renderers[render_state.surface.dev_id] {
|
||||||
|
if let Some(profile_result) = &renderer
|
||||||
|
.profile_result
|
||||||
|
.as_ref()
|
||||||
|
.or(profile_stored.as_ref())
|
||||||
|
{
|
||||||
|
// There can be empty results if the required features aren't supported
|
||||||
|
if !profile_result.is_empty() {
|
||||||
|
let path = std::path::Path::new("trace.json");
|
||||||
|
match wgpu_profiler::chrometrace::write_chrometrace(
|
||||||
|
path,
|
||||||
|
profile_result,
|
||||||
|
) {
|
||||||
|
Ok(()) => {
|
||||||
|
println!("Wrote trace to path {path:?}")
|
||||||
|
}
|
||||||
|
Err(e) => eprintln!("Failed to write trace {e}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Some(VirtualKeyCode::V) => {
|
Some(VirtualKeyCode::V) => {
|
||||||
vsync_on = !vsync_on;
|
vsync_on = !vsync_on;
|
||||||
render_cx.set_present_mode(
|
render_cx.set_present_mode(
|
||||||
|
@ -342,6 +368,25 @@ fn run(
|
||||||
complexity_shown.then_some(scene_complexity).flatten(),
|
complexity_shown.then_some(scene_complexity).flatten(),
|
||||||
vsync_on,
|
vsync_on,
|
||||||
);
|
);
|
||||||
|
if let Some(profiling_result) = renderers[render_state.surface.dev_id]
|
||||||
|
.as_mut()
|
||||||
|
.and_then(|it| it.profile_result.take())
|
||||||
|
{
|
||||||
|
if profile_stored.is_none() || profile_taken.elapsed() > Duration::from_secs(1)
|
||||||
|
{
|
||||||
|
profile_stored = Some(profiling_result);
|
||||||
|
profile_taken = Instant::now();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(profiling_result) = profile_stored.as_ref() {
|
||||||
|
stats::draw_gpu_profiling(
|
||||||
|
&mut builder,
|
||||||
|
scene_params.text,
|
||||||
|
width as f64,
|
||||||
|
height as f64,
|
||||||
|
profiling_result,
|
||||||
|
)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
let surface_texture = render_state
|
let surface_texture = render_state
|
||||||
.surface
|
.surface
|
||||||
|
@ -438,6 +483,9 @@ fn run(
|
||||||
&render_cx.devices[id].device,
|
&render_cx.devices[id].device,
|
||||||
&RendererOptions {
|
&RendererOptions {
|
||||||
surface_format: Some(render_state.surface.format),
|
surface_format: Some(render_state.surface.format),
|
||||||
|
timestamp_period: render_cx.devices[id]
|
||||||
|
.queue
|
||||||
|
.get_timestamp_period(),
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
.expect("Could create renderer")
|
.expect("Could create renderer")
|
||||||
|
|
|
@ -15,12 +15,13 @@
|
||||||
// Also licensed under MIT license, at your choice.
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
use scenes::SimpleText;
|
use scenes::SimpleText;
|
||||||
use std::collections::VecDeque;
|
use std::{collections::VecDeque, time::Duration};
|
||||||
use vello::{
|
use vello::{
|
||||||
kurbo::{Affine, PathEl, Rect},
|
kurbo::{Affine, Line, PathEl, Rect},
|
||||||
peniko::{Brush, Color, Fill, Stroke},
|
peniko::{Brush, Color, Fill, Stroke},
|
||||||
BumpAllocators, SceneBuilder,
|
BumpAllocators, SceneBuilder,
|
||||||
};
|
};
|
||||||
|
use wgpu_profiler::GpuTimerScopeResult;
|
||||||
|
|
||||||
const SLIDING_WINDOW_SIZE: usize = 100;
|
const SLIDING_WINDOW_SIZE: usize = 100;
|
||||||
|
|
||||||
|
@ -247,3 +248,204 @@ impl Stats {
|
||||||
fn round_up(n: usize, f: usize) -> usize {
|
fn round_up(n: usize, f: usize) -> usize {
|
||||||
n - 1 - (n - 1) % f + f
|
n - 1 - (n - 1) % f + f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const COLORS: &[Color] = &[
|
||||||
|
Color::AQUA,
|
||||||
|
Color::RED,
|
||||||
|
Color::ALICE_BLUE,
|
||||||
|
Color::YELLOW,
|
||||||
|
Color::GREEN,
|
||||||
|
Color::BLUE,
|
||||||
|
Color::ORANGE,
|
||||||
|
Color::WHITE,
|
||||||
|
];
|
||||||
|
|
||||||
|
pub fn draw_gpu_profiling(
|
||||||
|
sb: &mut SceneBuilder,
|
||||||
|
text: &mut SimpleText,
|
||||||
|
viewport_width: f64,
|
||||||
|
viewport_height: f64,
|
||||||
|
profiles: &[GpuTimerScopeResult],
|
||||||
|
) {
|
||||||
|
if profiles.is_empty() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let width = (viewport_width * 0.3).clamp(150., 450.);
|
||||||
|
let height = width * 1.5;
|
||||||
|
let y_offset = viewport_height - height;
|
||||||
|
let offset = Affine::translate((0., y_offset));
|
||||||
|
|
||||||
|
// Draw the background
|
||||||
|
sb.fill(
|
||||||
|
Fill::NonZero,
|
||||||
|
offset,
|
||||||
|
&Brush::Solid(Color::rgba8(0, 0, 0, 200)),
|
||||||
|
None,
|
||||||
|
&Rect::new(0., 0., width, height),
|
||||||
|
);
|
||||||
|
// Find the range of the samples, so we can normalise them
|
||||||
|
let mut min = f64::MAX;
|
||||||
|
let mut max = f64::MIN;
|
||||||
|
let mut max_depth = 0;
|
||||||
|
let mut depth = 0;
|
||||||
|
let mut count = 0;
|
||||||
|
traverse_profiling(profiles, &mut |profile, stage| {
|
||||||
|
match stage {
|
||||||
|
TraversalStage::Enter => {
|
||||||
|
count += 1;
|
||||||
|
min = min.min(profile.time.start);
|
||||||
|
max = max.max(profile.time.end);
|
||||||
|
max_depth = max_depth.max(depth);
|
||||||
|
// Apply a higher depth to the children
|
||||||
|
depth += 1;
|
||||||
|
}
|
||||||
|
TraversalStage::Leave => depth -= 1,
|
||||||
|
}
|
||||||
|
});
|
||||||
|
let total_time = max - min;
|
||||||
|
{
|
||||||
|
let labels = [
|
||||||
|
format!("GPU Time: {:.2?}", Duration::from_secs_f64(total_time)),
|
||||||
|
"Press P to save a trace".to_string(),
|
||||||
|
];
|
||||||
|
|
||||||
|
// height / 5 is dedicated to the text labels and the rest is filled by the frame time.
|
||||||
|
let text_height = height * 0.2 / (1 + labels.len()) as f64;
|
||||||
|
let left_margin = width * 0.01;
|
||||||
|
let text_size = (text_height * 0.9) as f32;
|
||||||
|
for (i, label) in labels.iter().enumerate() {
|
||||||
|
text.add(
|
||||||
|
sb,
|
||||||
|
None,
|
||||||
|
text_size,
|
||||||
|
Some(&Brush::Solid(Color::WHITE)),
|
||||||
|
offset * Affine::translate((left_margin, (i + 1) as f64 * text_height)),
|
||||||
|
label,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
let text_size = (text_height * 0.9) as f32;
|
||||||
|
for (i, label) in labels.iter().enumerate() {
|
||||||
|
text.add(
|
||||||
|
sb,
|
||||||
|
None,
|
||||||
|
text_size,
|
||||||
|
Some(&Brush::Solid(Color::WHITE)),
|
||||||
|
offset * Affine::translate((left_margin, (i + 1) as f64 * text_height)),
|
||||||
|
label,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let timeline_start_y = height * 0.21;
|
||||||
|
let timeline_range_y = height * 0.78;
|
||||||
|
let timeline_range_end = timeline_start_y + timeline_range_y;
|
||||||
|
|
||||||
|
// Add 6 items worth of margin
|
||||||
|
let text_height = timeline_range_y / (6 + count) as f64;
|
||||||
|
let left_margin = width * 0.35;
|
||||||
|
let mut cur_text_y = timeline_start_y;
|
||||||
|
let mut cur_index = 0;
|
||||||
|
let mut depth = 0;
|
||||||
|
// Leave 1 bar's worth of margin
|
||||||
|
let depth_width = width * 0.28 / (max_depth + 1) as f64;
|
||||||
|
let depth_size = depth_width * 0.8;
|
||||||
|
traverse_profiling(profiles, &mut |profile, stage| {
|
||||||
|
if let TraversalStage::Enter = stage {
|
||||||
|
let start_normalised =
|
||||||
|
((profile.time.start - min) / total_time) * timeline_range_y + timeline_start_y;
|
||||||
|
let end_normalised =
|
||||||
|
((profile.time.end - min) / total_time) * timeline_range_y + timeline_start_y;
|
||||||
|
|
||||||
|
let color = COLORS[cur_index % COLORS.len()];
|
||||||
|
let x = width * 0.01 + (depth as f64 * depth_width);
|
||||||
|
sb.fill(
|
||||||
|
Fill::NonZero,
|
||||||
|
offset,
|
||||||
|
&Brush::Solid(color),
|
||||||
|
None,
|
||||||
|
&Rect::new(x, start_normalised, x + depth_size, end_normalised),
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut text_start = start_normalised;
|
||||||
|
let nested = !profile.nested_scopes.is_empty();
|
||||||
|
if nested {
|
||||||
|
// If we have children, leave some more space for them
|
||||||
|
text_start -= text_height * 0.7;
|
||||||
|
}
|
||||||
|
let this_time = profile.time.end - profile.time.start;
|
||||||
|
// Highlight as important if more than 10% of the total time, or more than 1ms
|
||||||
|
let slow = this_time * 20. >= total_time || this_time >= 0.001;
|
||||||
|
let text_y = text_start
|
||||||
|
// Ensure that we don't overlap the previous item
|
||||||
|
.max(cur_text_y)
|
||||||
|
// Ensure that all remaining items can fit
|
||||||
|
.min(timeline_range_end - (count - cur_index) as f64 * text_height);
|
||||||
|
let (text_height, text_color) = if slow {
|
||||||
|
(text_height, Color::WHITE)
|
||||||
|
} else {
|
||||||
|
(text_height * 0.6, Color::LIGHT_GRAY)
|
||||||
|
};
|
||||||
|
let text_size = (text_height * 0.9) as f32;
|
||||||
|
// Text is specified by the baseline, but the y positions all refer to the top of the text
|
||||||
|
cur_text_y = text_y + text_height;
|
||||||
|
let label = format!(
|
||||||
|
"{:.2?} - {:.30}",
|
||||||
|
Duration::from_secs_f64(this_time),
|
||||||
|
profile.label
|
||||||
|
);
|
||||||
|
sb.fill(
|
||||||
|
Fill::NonZero,
|
||||||
|
offset,
|
||||||
|
&Brush::Solid(color),
|
||||||
|
None,
|
||||||
|
&Rect::new(
|
||||||
|
width * 0.31,
|
||||||
|
cur_text_y - text_size as f64 * 0.7,
|
||||||
|
width * 0.34,
|
||||||
|
cur_text_y,
|
||||||
|
),
|
||||||
|
);
|
||||||
|
text.add(
|
||||||
|
sb,
|
||||||
|
None,
|
||||||
|
text_size,
|
||||||
|
Some(&Brush::Solid(text_color)),
|
||||||
|
offset * Affine::translate((left_margin, cur_text_y)),
|
||||||
|
&label,
|
||||||
|
);
|
||||||
|
if !nested && slow {
|
||||||
|
sb.stroke(
|
||||||
|
&Stroke::new(2.),
|
||||||
|
offset,
|
||||||
|
&Brush::Solid(color),
|
||||||
|
None,
|
||||||
|
&Line::new(
|
||||||
|
(x + depth_size, (end_normalised + start_normalised) / 2.),
|
||||||
|
(width * 0.31, cur_text_y - text_size as f64 * 0.35),
|
||||||
|
),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
cur_index += 1;
|
||||||
|
// Higher depth applies only to the children
|
||||||
|
depth += 1;
|
||||||
|
} else {
|
||||||
|
depth -= 1;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
enum TraversalStage {
|
||||||
|
Enter,
|
||||||
|
Leave,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn traverse_profiling(
|
||||||
|
profiles: &[GpuTimerScopeResult],
|
||||||
|
callback: &mut impl FnMut(&GpuTimerScopeResult, TraversalStage),
|
||||||
|
) {
|
||||||
|
for profile in profiles {
|
||||||
|
callback(profile, TraversalStage::Enter);
|
||||||
|
traverse_profiling(&profile.nested_scopes, &mut *callback);
|
||||||
|
callback(profile, TraversalStage::Leave);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -22,8 +22,8 @@ use std::{
|
||||||
};
|
};
|
||||||
|
|
||||||
use wgpu::{
|
use wgpu::{
|
||||||
BindGroup, BindGroupLayout, Buffer, BufferUsages, ComputePipeline, Device, Queue, Texture,
|
BindGroup, BindGroupLayout, Buffer, BufferUsages, CommandEncoderDescriptor, ComputePipeline,
|
||||||
TextureAspect, TextureUsages, TextureView, TextureViewDimension,
|
Device, Queue, Texture, TextureAspect, TextureUsages, TextureView, TextureViewDimension,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub type Error = Box<dyn std::error::Error>;
|
pub type Error = Box<dyn std::error::Error>;
|
||||||
|
@ -46,6 +46,7 @@ pub struct Engine {
|
||||||
struct Shader {
|
struct Shader {
|
||||||
pipeline: ComputePipeline,
|
pipeline: ComputePipeline,
|
||||||
bind_group_layout: BindGroupLayout,
|
bind_group_layout: BindGroupLayout,
|
||||||
|
label: &'static str,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
|
@ -238,6 +239,7 @@ impl Engine {
|
||||||
let shader = Shader {
|
let shader = Shader {
|
||||||
pipeline,
|
pipeline,
|
||||||
bind_group_layout,
|
bind_group_layout,
|
||||||
|
label,
|
||||||
};
|
};
|
||||||
let id = self.shaders.len();
|
let id = self.shaders.len();
|
||||||
self.shaders.push(shader);
|
self.shaders.push(shader);
|
||||||
|
@ -250,11 +252,16 @@ impl Engine {
|
||||||
queue: &Queue,
|
queue: &Queue,
|
||||||
recording: &Recording,
|
recording: &Recording,
|
||||||
external_resources: &[ExternalResource],
|
external_resources: &[ExternalResource],
|
||||||
|
label: &'static str,
|
||||||
|
#[cfg(feature = "wgpu-profiler")] profiler: &mut wgpu_profiler::GpuProfiler,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
let mut free_bufs: HashSet<Id> = Default::default();
|
let mut free_bufs: HashSet<Id> = Default::default();
|
||||||
let mut free_images: HashSet<Id> = Default::default();
|
let mut free_images: HashSet<Id> = Default::default();
|
||||||
|
|
||||||
let mut encoder = device.create_command_encoder(&Default::default());
|
let mut encoder =
|
||||||
|
device.create_command_encoder(&CommandEncoderDescriptor { label: Some(label) });
|
||||||
|
#[cfg(feature = "wgpu-profiler")]
|
||||||
|
profiler.begin_scope(label, &mut encoder, device);
|
||||||
for command in &recording.commands {
|
for command in &recording.commands {
|
||||||
match command {
|
match command {
|
||||||
Command::Upload(buf_proxy, bytes) => {
|
Command::Upload(buf_proxy, bytes) => {
|
||||||
|
@ -366,9 +373,13 @@ impl Engine {
|
||||||
&mut self.pool,
|
&mut self.pool,
|
||||||
)?;
|
)?;
|
||||||
let mut cpass = encoder.begin_compute_pass(&Default::default());
|
let mut cpass = encoder.begin_compute_pass(&Default::default());
|
||||||
|
#[cfg(feature = "wgpu-profiler")]
|
||||||
|
profiler.begin_scope(shader.label, &mut cpass, device);
|
||||||
cpass.set_pipeline(&shader.pipeline);
|
cpass.set_pipeline(&shader.pipeline);
|
||||||
cpass.set_bind_group(0, &bind_group, &[]);
|
cpass.set_bind_group(0, &bind_group, &[]);
|
||||||
cpass.dispatch_workgroups(wg_size.0, wg_size.1, wg_size.2);
|
cpass.dispatch_workgroups(wg_size.0, wg_size.1, wg_size.2);
|
||||||
|
#[cfg(feature = "wgpu-profiler")]
|
||||||
|
profiler.end_scope(&mut cpass);
|
||||||
}
|
}
|
||||||
Command::Download(proxy) => {
|
Command::Download(proxy) => {
|
||||||
let src_buf = self
|
let src_buf = self
|
||||||
|
@ -407,6 +418,8 @@ impl Engine {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#[cfg(feature = "wgpu-profiler")]
|
||||||
|
profiler.end_scope(&mut encoder);
|
||||||
queue.submit(Some(encoder.finish()));
|
queue.submit(Some(encoder.finish()));
|
||||||
for id in free_bufs {
|
for id in free_bufs {
|
||||||
if let Some(buf) = self.bind_map.buf_map.remove(&id) {
|
if let Some(buf) = self.bind_map.buf_map.remove(&id) {
|
||||||
|
|
54
src/lib.rs
54
src/lib.rs
|
@ -40,6 +40,8 @@ use shaders::FullShaders;
|
||||||
/// Temporary export, used in with_winit for stats
|
/// Temporary export, used in with_winit for stats
|
||||||
pub use vello_encoding::BumpAllocators;
|
pub use vello_encoding::BumpAllocators;
|
||||||
use wgpu::{Device, Queue, SurfaceTexture, TextureFormat, TextureView};
|
use wgpu::{Device, Queue, SurfaceTexture, TextureFormat, TextureView};
|
||||||
|
#[cfg(feature = "wgpu-profiler")]
|
||||||
|
use wgpu_profiler::GpuProfiler;
|
||||||
|
|
||||||
/// Catch-all error type.
|
/// Catch-all error type.
|
||||||
pub type Error = Box<dyn std::error::Error>;
|
pub type Error = Box<dyn std::error::Error>;
|
||||||
|
@ -53,6 +55,10 @@ pub struct Renderer {
|
||||||
shaders: FullShaders,
|
shaders: FullShaders,
|
||||||
blit: Option<BlitPipeline>,
|
blit: Option<BlitPipeline>,
|
||||||
target: Option<TargetTexture>,
|
target: Option<TargetTexture>,
|
||||||
|
#[cfg(feature = "wgpu-profiler")]
|
||||||
|
profiler: GpuProfiler,
|
||||||
|
#[cfg(feature = "wgpu-profiler")]
|
||||||
|
pub profile_result: Option<Vec<wgpu_profiler::GpuTimerScopeResult>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parameters used in a single render that are configurable by the client.
|
/// Parameters used in a single render that are configurable by the client.
|
||||||
|
@ -70,6 +76,9 @@ pub struct RendererOptions {
|
||||||
/// The format of the texture used for surfaces with this renderer/device
|
/// The format of the texture used for surfaces with this renderer/device
|
||||||
/// If None, the renderer cannot be used with surfaces
|
/// If None, the renderer cannot be used with surfaces
|
||||||
pub surface_format: Option<TextureFormat>,
|
pub surface_format: Option<TextureFormat>,
|
||||||
|
/// The timestamp period from [`wgpu::Queue::get_timestamp_period`]
|
||||||
|
/// Used when the wgpu-profiler feature is enabled
|
||||||
|
pub timestamp_period: f32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Renderer {
|
impl Renderer {
|
||||||
|
@ -85,6 +94,11 @@ impl Renderer {
|
||||||
shaders,
|
shaders,
|
||||||
blit,
|
blit,
|
||||||
target: None,
|
target: None,
|
||||||
|
// Use 3 pending frames
|
||||||
|
#[cfg(feature = "wgpu-profiler")]
|
||||||
|
profiler: GpuProfiler::new(3, render_options.timestamp_period, device.features()),
|
||||||
|
#[cfg(feature = "wgpu-profiler")]
|
||||||
|
profile_result: None,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -106,8 +120,15 @@ impl Renderer {
|
||||||
*target.as_image().unwrap(),
|
*target.as_image().unwrap(),
|
||||||
texture,
|
texture,
|
||||||
)];
|
)];
|
||||||
self.engine
|
self.engine.run_recording(
|
||||||
.run_recording(device, queue, &recording, &external_resources)?;
|
device,
|
||||||
|
queue,
|
||||||
|
&recording,
|
||||||
|
&external_resources,
|
||||||
|
"render_to_texture",
|
||||||
|
#[cfg(feature = "wgpu-profiler")]
|
||||||
|
&mut self.profiler,
|
||||||
|
)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -217,7 +238,15 @@ impl Renderer {
|
||||||
let recording = render.render_encoding_coarse(encoding, &self.shaders, params, true);
|
let recording = render.render_encoding_coarse(encoding, &self.shaders, params, true);
|
||||||
let target = render.out_image();
|
let target = render.out_image();
|
||||||
let bump_buf = render.bump_buf();
|
let bump_buf = render.bump_buf();
|
||||||
self.engine.run_recording(device, queue, &recording, &[])?;
|
self.engine.run_recording(
|
||||||
|
device,
|
||||||
|
queue,
|
||||||
|
&recording,
|
||||||
|
&[],
|
||||||
|
"t_async_coarse",
|
||||||
|
#[cfg(feature = "wgpu-profiler")]
|
||||||
|
&mut self.profiler,
|
||||||
|
)?;
|
||||||
|
|
||||||
let mut bump: Option<BumpAllocators> = None;
|
let mut bump: Option<BumpAllocators> = None;
|
||||||
if let Some(bump_buf) = self.engine.get_download(bump_buf) {
|
if let Some(bump_buf) = self.engine.get_download(bump_buf) {
|
||||||
|
@ -239,8 +268,15 @@ impl Renderer {
|
||||||
let mut recording = Recording::default();
|
let mut recording = Recording::default();
|
||||||
render.record_fine(&self.shaders, &mut recording);
|
render.record_fine(&self.shaders, &mut recording);
|
||||||
let external_resources = [ExternalResource::Image(target, texture)];
|
let external_resources = [ExternalResource::Image(target, texture)];
|
||||||
self.engine
|
self.engine.run_recording(
|
||||||
.run_recording(device, queue, &recording, &external_resources)?;
|
device,
|
||||||
|
queue,
|
||||||
|
&recording,
|
||||||
|
&external_resources,
|
||||||
|
"t_async_fine",
|
||||||
|
#[cfg(feature = "wgpu-profiler")]
|
||||||
|
&mut self.profiler,
|
||||||
|
)?;
|
||||||
Ok(bump)
|
Ok(bump)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -301,8 +337,16 @@ impl Renderer {
|
||||||
render_pass.set_bind_group(0, &bind_group, &[]);
|
render_pass.set_bind_group(0, &bind_group, &[]);
|
||||||
render_pass.draw(0..6, 0..1);
|
render_pass.draw(0..6, 0..1);
|
||||||
}
|
}
|
||||||
|
#[cfg(feature = "wgpu-profiler")]
|
||||||
|
self.profiler.resolve_queries(&mut encoder);
|
||||||
queue.submit(Some(encoder.finish()));
|
queue.submit(Some(encoder.finish()));
|
||||||
self.target = Some(target);
|
self.target = Some(target);
|
||||||
|
#[cfg(feature = "wgpu-profiler")]
|
||||||
|
self.profiler.end_frame().unwrap();
|
||||||
|
#[cfg(feature = "wgpu-profiler")]
|
||||||
|
if let Some(result) = self.profiler.process_finished_frame() {
|
||||||
|
self.profile_result = Some(result);
|
||||||
|
}
|
||||||
Ok(bump)
|
Ok(bump)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -134,12 +134,16 @@ impl RenderContext {
|
||||||
.await?;
|
.await?;
|
||||||
let features = adapter.features();
|
let features = adapter.features();
|
||||||
let limits = Limits::default();
|
let limits = Limits::default();
|
||||||
|
let mut maybe_features = wgpu::Features::CLEAR_TEXTURE;
|
||||||
|
#[cfg(feature = "wgpu-profiler")]
|
||||||
|
{
|
||||||
|
maybe_features |= wgpu_profiler::GpuProfiler::ALL_WGPU_TIMER_FEATURES;
|
||||||
|
};
|
||||||
let (device, queue) = adapter
|
let (device, queue) = adapter
|
||||||
.request_device(
|
.request_device(
|
||||||
&wgpu::DeviceDescriptor {
|
&wgpu::DeviceDescriptor {
|
||||||
label: None,
|
label: None,
|
||||||
features: features
|
features: features & maybe_features,
|
||||||
& (wgpu::Features::TIMESTAMP_QUERY | wgpu::Features::CLEAR_TEXTURE),
|
|
||||||
limits,
|
limits,
|
||||||
},
|
},
|
||||||
None,
|
None,
|
||||||
|
|
Loading…
Add table
Reference in a new issue