From 02cc8679506471811b707bdb6fd74ee69cd41fac Mon Sep 17 00:00:00 2001 From: chad Date: Thu, 21 Apr 2022 04:20:54 -0400 Subject: [PATCH] command style metal timer queries + compute pass This commit adds timestamps to compute pass boundaries for command style timer queries on metal. It also updates the code in piet-gpu/stages, piet-gpu/lib.rs and tests/ to use the new ComputePass type. --- piet-gpu-hal/src/metal.rs | 90 +++++++++++++++++++++----------- piet-gpu/bin/winit.rs | 13 +++-- piet-gpu/src/lib.rs | 63 +++++++++++++--------- piet-gpu/src/stages.rs | 10 ++-- piet-gpu/src/stages/clip.rs | 12 ++--- piet-gpu/src/stages/draw.rs | 14 ++--- piet-gpu/src/stages/path.rs | 16 +++--- piet-gpu/src/stages/transform.rs | 14 ++--- tests/src/clip.rs | 6 +-- tests/src/draw.rs | 6 +-- tests/src/linkedlist.rs | 6 +-- tests/src/message_passing.rs | 6 +-- tests/src/path.rs | 6 +-- tests/src/prefix.rs | 6 +-- tests/src/prefix_tree.rs | 14 ++--- tests/src/transform.rs | 6 +-- 16 files changed, 166 insertions(+), 122 deletions(-) diff --git a/piet-gpu-hal/src/metal.rs b/piet-gpu-hal/src/metal.rs index 9b4468c..b2189e4 100644 --- a/piet-gpu-hal/src/metal.rs +++ b/piet-gpu-hal/src/metal.rs @@ -107,7 +107,7 @@ pub struct CmdBuf { enum Encoder { None, - Compute(metal::ComputeCommandEncoder), + Compute(metal::ComputeCommandEncoder, Option<(id, u32)>), Blit(metal::BlitCommandEncoder), } @@ -578,31 +578,52 @@ impl crate::backend::CmdBuf for CmdBuf { //debug_assert!(matches!(self.cur_encoder, Encoder::None)); self.flush_encoder(); autoreleasepool(|| { - let encoder = if let Some(queries) = &desc.timer_queries { - let descriptor: id = - msg_send![class!(MTLComputePassDescriptor), computePassDescriptor]; - let attachments: id = msg_send![descriptor, sampleBufferAttachments]; - let index: NSUInteger = 0; - let attachment: id = msg_send![attachments, objectAtIndexedSubscript: index]; - // Here we break the hub/mux separation a bit, for expedience - #[allow(irrefutable_let_patterns)] - if let crate::hub::QueryPool::Mtl(query_pool) = queries.0 { - if let Some(sample_buf) = &query_pool.counter_sample_buf { - let () = msg_send![attachment, setSampleBuffer: sample_buf.id()]; + let (encoder, end_query) = match (&desc.timer_queries, self.counter_style) { + (Some(queries), CounterStyle::Stage) => { + let descriptor: id = + msg_send![class!(MTLComputePassDescriptor), computePassDescriptor]; + let attachments: id = msg_send![descriptor, sampleBufferAttachments]; + let index: NSUInteger = 0; + let attachment: id = msg_send![attachments, objectAtIndexedSubscript: index]; + // Here we break the hub/mux separation a bit, for expedience + #[allow(irrefutable_let_patterns)] + if let crate::hub::QueryPool::Mtl(query_pool) = queries.0 { + if let Some(sample_buf) = &query_pool.counter_sample_buf { + let () = msg_send![attachment, setSampleBuffer: sample_buf.id()]; + } } + let start_index = queries.1 as NSUInteger; + let end_index = queries.2 as NSInteger; + let () = msg_send![attachment, setStartOfEncoderSampleIndex: start_index]; + let () = msg_send![attachment, setEndOfEncoderSampleIndex: end_index]; + ( + msg_send![ + self.cmd_buf, + computeCommandEncoderWithDescriptor: descriptor + ], + None, + ) } - let start_index = queries.1 as NSUInteger; - let end_index = queries.2 as NSInteger; - let () = msg_send![attachment, setStartOfEncoderSampleIndex: start_index]; - let () = msg_send![attachment, setEndOfEncoderSampleIndex: end_index]; - msg_send![ - self.cmd_buf, - computeCommandEncoderWithDescriptor: descriptor - ] - } else { - self.cmd_buf.new_compute_command_encoder() + (Some(queries), CounterStyle::Command) => { + let encoder = self.cmd_buf.new_compute_command_encoder(); + #[allow(irrefutable_let_patterns)] + let end_query = if let crate::hub::QueryPool::Mtl(query_pool) = queries.0 { + if let Some(sample_buf) = &query_pool.counter_sample_buf { + let sample_index = queries.1 as NSUInteger; + let sample_buf = sample_buf.id(); + let () = msg_send![encoder, sampleCountersInBuffer: sample_buf atSampleIndex: sample_index withBarrier: true]; + Some((sample_buf, queries.2)) + } else { + None + } + } else { + None + }; + (encoder, end_query) + } + _ => (self.cmd_buf.new_compute_command_encoder(), None), }; - self.cur_encoder = Encoder::Compute(encoder.to_owned()); + self.cur_encoder = Encoder::Compute(encoder.to_owned(), end_query); }); } @@ -663,7 +684,7 @@ impl crate::backend::CmdBuf for CmdBuf { let size = size.unwrap_or(buffer.size); let _ = self.compute_command_encoder(); // Getting this directly is a workaround for a borrow checker issue. - if let Encoder::Compute(e) = &self.cur_encoder { + if let Encoder::Compute(e, _) = &self.cur_encoder { clear::encode_clear(e, &self.helpers.clear_pipeline, &buffer.buffer, size); } } @@ -752,12 +773,12 @@ impl crate::backend::CmdBuf for CmdBuf { if let Some(buf) = &pool.counter_sample_buf { if matches!(self.cur_encoder, Encoder::None) { self.cur_encoder = - Encoder::Compute(self.cmd_buf.new_compute_command_encoder().to_owned()); + Encoder::Compute(self.cmd_buf.new_compute_command_encoder().to_owned(), None); } let sample_index = query as NSUInteger; if self.counter_style == CounterStyle::Command { match &self.cur_encoder { - Encoder::Compute(e) => { + Encoder::Compute(e, _) => { let () = msg_send![e.as_ptr(), sampleCountersInBuffer: buf.id() atSampleIndex: sample_index withBarrier: true]; } Encoder::None => unreachable!(), @@ -765,7 +786,7 @@ impl crate::backend::CmdBuf for CmdBuf { } } else if self.counter_style == CounterStyle::Stage { match &self.cur_encoder { - Encoder::Compute(_e) => { + Encoder::Compute(_e, _) => { println!("write_timestamp is not supported for stage-style encoders"); } _ => (), @@ -777,12 +798,12 @@ impl crate::backend::CmdBuf for CmdBuf { impl CmdBuf { fn compute_command_encoder(&mut self) -> &metal::ComputeCommandEncoder { - if !matches!(self.cur_encoder, Encoder::Compute(_)) { + if !matches!(self.cur_encoder, Encoder::Compute(..)) { self.flush_encoder(); self.cur_encoder = - Encoder::Compute(self.cmd_buf.new_compute_command_encoder().to_owned()); + Encoder::Compute(self.cmd_buf.new_compute_command_encoder().to_owned(), None); } - if let Encoder::Compute(e) = &self.cur_encoder { + if let Encoder::Compute(e, _) = &self.cur_encoder { e } else { unreachable!() @@ -803,7 +824,14 @@ impl CmdBuf { fn flush_encoder(&mut self) { match std::mem::replace(&mut self.cur_encoder, Encoder::None) { - Encoder::Compute(e) => e.end_encoding(), + Encoder::Compute(e, Some((sample_buf, end_query))) => { + let sample_index = end_query as NSUInteger; + unsafe { + let () = msg_send![e.as_ptr(), sampleCountersInBuffer: sample_buf atSampleIndex: sample_index withBarrier: true]; + } + e.end_encoding(); + } + Encoder::Compute(e, None) => e.end_encoding(), Encoder::Blit(e) => e.end_encoding(), Encoder::None => (), } diff --git a/piet-gpu/bin/winit.rs b/piet-gpu/bin/winit.rs index b1db5e0..1642026 100644 --- a/piet-gpu/bin/winit.rs +++ b/piet-gpu/bin/winit.rs @@ -70,7 +70,7 @@ fn main() -> Result<(), Error> { .map(|_| session.create_semaphore()) .collect::, Error>>()?; let query_pools = (0..NUM_FRAMES) - .map(|_| session.create_query_pool(8)) + .map(|_| session.create_query_pool(12)) .collect::, Error>>()?; let mut cmd_bufs: [Option; NUM_FRAMES] = Default::default(); let mut submitted: [Option; NUM_FRAMES] = Default::default(); @@ -112,22 +112,23 @@ fn main() -> Result<(), Error> { if !ts.is_empty() { info_string = format!( "{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms", - ts[6] * 1e3, + ts[10] * 1e3, ts[0] * 1e3, (ts[1] - ts[0]) * 1e3, (ts[2] - ts[1]) * 1e3, - (ts[3] - ts[2]) * 1e3, (ts[4] - ts[3]) * 1e3, - (ts[5] - ts[4]) * 1e3, (ts[6] - ts[5]) * 1e3, + (ts[8] - ts[7]) * 1e3, + (ts[10] - ts[9]) * 1e3, ); } } let mut ctx = PietGpuRenderContext::new(); + let test_blend = false; if let Some(svg) = &svg { test_scenes::render_svg(&mut ctx, svg); - } else { + } else if test_blend { use piet_gpu::{Blend, BlendMode::*, CompositionMode::*}; let blends = [ Blend::new(Normal, SrcOver), @@ -163,6 +164,8 @@ fn main() -> Result<(), Error> { let blend = blends[mode % blends.len()]; test_scenes::render_blend_test(&mut ctx, current_frame, blend); info_string = format!("{:?}", blend); + } else { + test_scenes::render_anim_frame(&mut ctx, current_frame); } render_info_string(&mut ctx, &info_string); if let Err(e) = renderer.upload_render_ctx(&mut ctx, frame_idx) { diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index 249735a..acbd28d 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -17,8 +17,8 @@ use piet::kurbo::Vec2; use piet::{ImageFormat, RenderContext}; use piet_gpu_hal::{ - include_shader, BindType, Buffer, BufferUsage, CmdBuf, DescriptorSet, Error, Image, - ImageLayout, Pipeline, QueryPool, Session, + include_shader, BindType, Buffer, BufferUsage, CmdBuf, ComputePassDescriptor, DescriptorSet, + Error, Image, ImageLayout, Pipeline, QueryPool, Session, }; pub use pico_svg::PicoSvg; @@ -423,10 +423,11 @@ impl Renderer { cmd_buf.copy_buffer_to_image(&self.gradient_bufs[buf_ix], &self.gradients); cmd_buf.image_barrier(&self.gradients, ImageLayout::BlitDst, ImageLayout::General); cmd_buf.reset_query_pool(&query_pool); - cmd_buf.write_timestamp(&query_pool, 0); cmd_buf.begin_debug_label("Element bounding box calculation"); + let mut pass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(&query_pool, 0, 1)); + // cmd_buf.write_timestamp(&query_pool, 0); self.element_stage.record( - cmd_buf, + &mut pass, &self.element_code, &self.element_bindings[buf_ix], self.n_transform as u64, @@ -434,56 +435,64 @@ impl Renderer { self.n_pathtag as u32, self.n_drawobj as u64, ); + pass.end(); cmd_buf.end_debug_label(); - cmd_buf.write_timestamp(&query_pool, 1); + // cmd_buf.write_timestamp(&query_pool, 1); cmd_buf.memory_barrier(); cmd_buf.begin_debug_label("Clip bounding box calculation"); + let mut pass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(&query_pool, 2, 3)); self.clip_binding - .record(cmd_buf, &self.clip_code, self.n_clip as u32); - cmd_buf.end_debug_label(); - cmd_buf.begin_debug_label("Element binning"); - cmd_buf.dispatch( + .record(&mut pass, &self.clip_code, self.n_clip as u32); + // cmd_buf.end_debug_label(); + // cmd_buf.begin_debug_label("Element binning"); + pass.dispatch( &self.bin_pipeline, &self.bin_ds, (((self.n_paths + 255) / 256) as u32, 1, 1), (256, 1, 1), ); - cmd_buf.end_debug_label(); - cmd_buf.memory_barrier(); - cmd_buf.begin_debug_label("Tile allocation"); - cmd_buf.dispatch( + // cmd_buf.end_debug_label(); + pass.memory_barrier(); + // cmd_buf.begin_debug_label("Tile allocation"); + pass.dispatch( &self.tile_pipeline, &self.tile_ds[buf_ix], (((self.n_paths + 255) / 256) as u32, 1, 1), (256, 1, 1), ); - cmd_buf.end_debug_label(); - cmd_buf.write_timestamp(&query_pool, 2); - cmd_buf.memory_barrier(); + // cmd_buf.end_debug_label(); + pass.end(); + // cmd_buf.write_timestamp(&query_pool, 2); cmd_buf.begin_debug_label("Path flattening"); - cmd_buf.dispatch( + cmd_buf.memory_barrier(); + let mut pass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(&query_pool, 4, 5)); + pass.dispatch( &self.path_pipeline, &self.path_ds, (((self.n_pathseg + 31) / 32) as u32, 1, 1), (32, 1, 1), ); + pass.end(); + // cmd_buf.write_timestamp(&query_pool, 3); cmd_buf.end_debug_label(); - cmd_buf.write_timestamp(&query_pool, 3); cmd_buf.memory_barrier(); cmd_buf.begin_debug_label("Backdrop propagation"); - cmd_buf.dispatch( + let mut pass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(&query_pool, 6, 7)); + pass.dispatch( &self.backdrop_pipeline, &self.backdrop_ds, (((self.n_paths + 255) / 256) as u32, 1, 1), (256, self.backdrop_y, 1), ); + pass.end(); + // cmd_buf.write_timestamp(&query_pool, 4); cmd_buf.end_debug_label(); - cmd_buf.write_timestamp(&query_pool, 4); // TODO: redo query accounting - cmd_buf.write_timestamp(&query_pool, 5); + // cmd_buf.write_timestamp(&query_pool, 5); cmd_buf.memory_barrier(); cmd_buf.begin_debug_label("Coarse raster"); - cmd_buf.dispatch( + let mut pass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(&query_pool, 8, 9)); + pass.dispatch( &self.coarse_pipeline, &self.coarse_ds[buf_ix], ( @@ -493,11 +502,14 @@ impl Renderer { ), (256, 1, 1), ); + pass.end(); cmd_buf.end_debug_label(); - cmd_buf.write_timestamp(&query_pool, 6); + // cmd_buf.write_timestamp(&query_pool, 6); cmd_buf.memory_barrier(); cmd_buf.begin_debug_label("Fine raster"); - cmd_buf.dispatch( + let mut pass = + cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(&query_pool, 10, 11)); + pass.dispatch( &self.k4_pipeline, &self.k4_ds, ( @@ -507,8 +519,9 @@ impl Renderer { ), (8, 4, 1), ); + pass.end(); cmd_buf.end_debug_label(); - cmd_buf.write_timestamp(&query_pool, 7); + // cmd_buf.write_timestamp(&query_pool, 7); cmd_buf.memory_barrier(); cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc); } diff --git a/piet-gpu/src/stages.rs b/piet-gpu/src/stages.rs index 52b8bf1..5442ba3 100644 --- a/piet-gpu/src/stages.rs +++ b/piet-gpu/src/stages.rs @@ -26,7 +26,7 @@ use bytemuck::{Pod, Zeroable}; pub use clip::{ClipBinding, ClipCode, CLIP_PART_SIZE}; pub use draw::{DrawBinding, DrawCode, DrawMonoid, DrawStage, DRAW_PART_SIZE}; pub use path::{PathBinding, PathCode, PathEncoder, PathStage, PATHSEG_PART_SIZE}; -use piet_gpu_hal::{Buffer, CmdBuf, Session}; +use piet_gpu_hal::{Buffer, ComputePass, Session}; pub use transform::{ Transform, TransformBinding, TransformCode, TransformStage, TRANSFORM_PART_SIZE, }; @@ -140,7 +140,7 @@ impl ElementStage { pub unsafe fn record( &self, - cmd_buf: &mut CmdBuf, + pass: &mut ComputePass, code: &ElementCode, binding: &ElementBinding, n_transform: u64, @@ -149,14 +149,14 @@ impl ElementStage { n_drawobj: u64, ) { self.transform_stage.record( - cmd_buf, + pass, &code.transform_code, &binding.transform_binding, n_transform, ); // No memory barrier needed here; path has at least one before pathseg self.path_stage.record( - cmd_buf, + pass, &code.path_code, &binding.path_binding, n_paths, @@ -164,6 +164,6 @@ impl ElementStage { ); // No memory barrier needed here; draw has at least one before draw_leaf self.draw_stage - .record(cmd_buf, &code.draw_code, &binding.draw_binding, n_drawobj); + .record(pass, &code.draw_code, &binding.draw_binding, n_drawobj); } } diff --git a/piet-gpu/src/stages/clip.rs b/piet-gpu/src/stages/clip.rs index e4bc3db..2fd195b 100644 --- a/piet-gpu/src/stages/clip.rs +++ b/piet-gpu/src/stages/clip.rs @@ -16,7 +16,7 @@ //! The clip processing stage (includes substages). -use piet_gpu_hal::{include_shader, BindType, Buffer, CmdBuf, DescriptorSet, Pipeline, Session}; +use piet_gpu_hal::{include_shader, BindType, Buffer, ComputePass, DescriptorSet, Pipeline, Session}; // Note that this isn't the code/stage/binding pattern of most of the other stages // in the new element processing pipeline. We want to move those temporary buffers @@ -69,26 +69,26 @@ impl ClipBinding { /// Record the clip dispatches. /// /// Assumes memory barrier on entry. Provides memory barrier on exit. - pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, code: &ClipCode, n_clip: u32) { + pub unsafe fn record(&self, pass: &mut ComputePass, code: &ClipCode, n_clip: u32) { let n_wg_reduce = n_clip.saturating_sub(1) / CLIP_PART_SIZE; if n_wg_reduce > 0 { - cmd_buf.dispatch( + pass.dispatch( &code.reduce_pipeline, &self.reduce_ds, (n_wg_reduce, 1, 1), (CLIP_PART_SIZE, 1, 1), ); - cmd_buf.memory_barrier(); + pass.memory_barrier(); } let n_wg = (n_clip + CLIP_PART_SIZE - 1) / CLIP_PART_SIZE; if n_wg > 0 { - cmd_buf.dispatch( + pass.dispatch( &code.leaf_pipeline, &self.leaf_ds, (n_wg, 1, 1), (CLIP_PART_SIZE, 1, 1), ); - cmd_buf.memory_barrier(); + pass.memory_barrier(); } } } diff --git a/piet-gpu/src/stages/draw.rs b/piet-gpu/src/stages/draw.rs index 21312a4..f0ee2b6 100644 --- a/piet-gpu/src/stages/draw.rs +++ b/piet-gpu/src/stages/draw.rs @@ -19,7 +19,7 @@ use bytemuck::{Pod, Zeroable}; use piet_gpu_hal::{ - include_shader, BindType, Buffer, BufferUsage, CmdBuf, DescriptorSet, Pipeline, Session, + include_shader, BindType, Buffer, BufferUsage, ComputePass, DescriptorSet, Pipeline, Session, }; /// The output element of the draw object stage. @@ -130,7 +130,7 @@ impl DrawStage { pub unsafe fn record( &self, - cmd_buf: &mut CmdBuf, + pass: &mut ComputePass, code: &DrawCode, binding: &DrawBinding, size: u64, @@ -140,22 +140,22 @@ impl DrawStage { } let n_workgroups = (size + DRAW_PART_SIZE - 1) / DRAW_PART_SIZE; if n_workgroups > 1 { - cmd_buf.dispatch( + pass.dispatch( &code.reduce_pipeline, &binding.reduce_ds, (n_workgroups as u32, 1, 1), (DRAW_WG as u32, 1, 1), ); - cmd_buf.memory_barrier(); - cmd_buf.dispatch( + pass.memory_barrier(); + pass.dispatch( &code.root_pipeline, &self.root_ds, (1, 1, 1), (DRAW_WG as u32, 1, 1), ); } - cmd_buf.memory_barrier(); - cmd_buf.dispatch( + pass.memory_barrier(); + pass.dispatch( &code.leaf_pipeline, &binding.leaf_ds, (n_workgroups as u32, 1, 1), diff --git a/piet-gpu/src/stages/path.rs b/piet-gpu/src/stages/path.rs index 6c524a2..be33041 100644 --- a/piet-gpu/src/stages/path.rs +++ b/piet-gpu/src/stages/path.rs @@ -17,7 +17,7 @@ //! The path stage (includes substages). use piet_gpu_hal::{ - include_shader, BindType, Buffer, BufferUsage, CmdBuf, DescriptorSet, Pipeline, Session, + include_shader, BindType, Buffer, BufferUsage, ComputePass, DescriptorSet, Pipeline, Session, }; pub struct PathCode { @@ -148,7 +148,7 @@ impl PathStage { /// those are consumed. Result is written without barrier. pub unsafe fn record( &self, - cmd_buf: &mut CmdBuf, + pass: &mut ComputePass, code: &PathCode, binding: &PathBinding, n_paths: u32, @@ -166,15 +166,15 @@ impl PathStage { let reduce_part_tags = REDUCE_PART_SIZE * 4; let n_wg_tag_reduce = (n_tags + reduce_part_tags - 1) / reduce_part_tags; if n_wg_tag_reduce > 1 { - cmd_buf.dispatch( + pass.dispatch( &code.reduce_pipeline, &binding.reduce_ds, (n_wg_tag_reduce, 1, 1), (REDUCE_WG, 1, 1), ); // I think we can skip root if n_wg_tag_reduce == 2 - cmd_buf.memory_barrier(); - cmd_buf.dispatch( + pass.memory_barrier(); + pass.dispatch( &code.tag_root_pipeline, &self.tag_root_ds, (1, 1, 1), @@ -183,15 +183,15 @@ impl PathStage { // No barrier needed here; clear doesn't depend on path tags } let n_wg_clear = (n_paths + CLEAR_WG - 1) / CLEAR_WG; - cmd_buf.dispatch( + pass.dispatch( &code.clear_pipeline, &binding.clear_ds, (n_wg_clear, 1, 1), (CLEAR_WG, 1, 1), ); - cmd_buf.memory_barrier(); + pass.memory_barrier(); let n_wg_pathseg = (n_tags + SCAN_PART_SIZE - 1) / SCAN_PART_SIZE; - cmd_buf.dispatch( + pass.dispatch( &code.pathseg_pipeline, &binding.path_ds, (n_wg_pathseg, 1, 1), diff --git a/piet-gpu/src/stages/transform.rs b/piet-gpu/src/stages/transform.rs index b21712f..8de7cee 100644 --- a/piet-gpu/src/stages/transform.rs +++ b/piet-gpu/src/stages/transform.rs @@ -20,7 +20,7 @@ use bytemuck::{Pod, Zeroable}; use piet::kurbo::Affine; use piet_gpu_hal::{ - include_shader, BindType, Buffer, BufferUsage, CmdBuf, DescriptorSet, Pipeline, Session, + include_shader, BindType, Buffer, BufferUsage, ComputePass, DescriptorSet, Pipeline, Session, }; /// An affine transform. @@ -132,7 +132,7 @@ impl TransformStage { pub unsafe fn record( &self, - cmd_buf: &mut CmdBuf, + pass: &mut ComputePass, code: &TransformCode, binding: &TransformBinding, size: u64, @@ -142,22 +142,22 @@ impl TransformStage { } let n_workgroups = (size + TRANSFORM_PART_SIZE - 1) / TRANSFORM_PART_SIZE; if n_workgroups > 1 { - cmd_buf.dispatch( + pass.dispatch( &code.reduce_pipeline, &binding.reduce_ds, (n_workgroups as u32, 1, 1), (TRANSFORM_WG as u32, 1, 1), ); - cmd_buf.memory_barrier(); - cmd_buf.dispatch( + pass.memory_barrier(); + pass.dispatch( &code.root_pipeline, &self.root_ds, (1, 1, 1), (TRANSFORM_WG as u32, 1, 1), ); - cmd_buf.memory_barrier(); + pass.memory_barrier(); } - cmd_buf.dispatch( + pass.dispatch( &code.leaf_pipeline, &binding.leaf_ds, (n_workgroups as u32, 1, 1), diff --git a/tests/src/clip.rs b/tests/src/clip.rs index 4a38949..b1f8613 100644 --- a/tests/src/clip.rs +++ b/tests/src/clip.rs @@ -58,11 +58,11 @@ pub unsafe fn clip_test(runner: &mut Runner, config: &Config) -> TestResult { let binding = ClipBinding::new(&runner.session, &code, &config_buf, &memory.dev_buf); let mut commands = runner.commands(); - commands.write_timestamp(0); commands.upload(&memory); - binding.record(&mut commands.cmd_buf, &code, n_clip as u32); + let mut pass = commands.compute_pass(0, 1); + binding.record(&mut pass, &code, n_clip as u32); + pass.end(); commands.download(&memory); - commands.write_timestamp(1); runner.submit(commands); let dst = memory.map_read(..); if let Some(failure) = data.verify(&dst) { diff --git a/tests/src/draw.rs b/tests/src/draw.rs index 4372da4..dc82572 100644 --- a/tests/src/draw.rs +++ b/tests/src/draw.rs @@ -77,9 +77,9 @@ pub unsafe fn draw_test(runner: &mut Runner, config: &Config) -> TestResult { let n_iter = config.n_iter; for i in 0..n_iter { let mut commands = runner.commands(); - commands.write_timestamp(0); - stage.record(&mut commands.cmd_buf, &code, &binding, n_tag); - commands.write_timestamp(1); + let mut pass = commands.compute_pass(0, 1); + stage.record(&mut pass, &code, &binding, n_tag); + pass.end(); if i == 0 || config.verify_all { commands.cmd_buf.memory_barrier(); commands.download(&memory); diff --git a/tests/src/linkedlist.rs b/tests/src/linkedlist.rs index 5767806..e24adcb 100644 --- a/tests/src/linkedlist.rs +++ b/tests/src/linkedlist.rs @@ -45,9 +45,7 @@ pub unsafe fn run_linkedlist_test(runner: &mut Runner, config: &Config) -> TestR for i in 0..n_iter { let mut commands = runner.commands(); // Might clear only buckets to save time. - commands.write_timestamp(0); stage.record(&mut commands, &code, &binding, &mem_buf.dev_buf); - commands.write_timestamp(1); if i == 0 || config.verify_all { commands.cmd_buf.memory_barrier(); commands.download(&mem_buf); @@ -107,12 +105,14 @@ impl LinkedListStage { commands.cmd_buf.clear_buffer(out_buf, None); commands.cmd_buf.memory_barrier(); let n_workgroups = N_BUCKETS / WG_SIZE; - commands.cmd_buf.dispatch( + let mut pass = commands.compute_pass(0, 1); + pass.dispatch( &code.pipeline, &bindings.descriptor_set, (n_workgroups as u32, 1, 1), (WG_SIZE as u32, 1, 1), ); + pass.end(); } } diff --git a/tests/src/message_passing.rs b/tests/src/message_passing.rs index c5d989b..39e71dc 100644 --- a/tests/src/message_passing.rs +++ b/tests/src/message_passing.rs @@ -59,9 +59,7 @@ pub unsafe fn run_message_passing_test( let mut failures = 0; for _ in 0..n_iter { let mut commands = runner.commands(); - commands.write_timestamp(0); stage.record(&mut commands, &code, &binding, &out_buf.dev_buf); - commands.write_timestamp(1); commands.cmd_buf.memory_barrier(); commands.download(&out_buf); total_elapsed += runner.submit(commands); @@ -128,11 +126,13 @@ impl MessagePassingStage { commands.cmd_buf.clear_buffer(&self.data_buf, None); commands.cmd_buf.clear_buffer(out_buf, None); commands.cmd_buf.memory_barrier(); - commands.cmd_buf.dispatch( + let mut pass = commands.compute_pass(0, 1); + pass.dispatch( &code.pipeline, &bindings.descriptor_set, (256, 1, 1), (256, 1, 1), ); + pass.end(); } } diff --git a/tests/src/path.rs b/tests/src/path.rs index bf72c68..9d794e1 100644 --- a/tests/src/path.rs +++ b/tests/src/path.rs @@ -105,15 +105,15 @@ pub unsafe fn path_test(runner: &mut Runner, config: &Config) -> TestResult { let mut commands = runner.commands(); commands.cmd_buf.copy_buffer(&memory_init, &memory.dev_buf); commands.cmd_buf.memory_barrier(); - commands.write_timestamp(0); + let mut pass = commands.compute_pass(0, 1); stage.record( - &mut commands.cmd_buf, + &mut pass, &code, &binding, path_data.n_path, path_data.tags.len() as u32, ); - commands.write_timestamp(1); + pass.end(); if i == 0 || config.verify_all { commands.cmd_buf.memory_barrier(); commands.download(&memory); diff --git a/tests/src/prefix.rs b/tests/src/prefix.rs index 4174d8d..dbaf256 100644 --- a/tests/src/prefix.rs +++ b/tests/src/prefix.rs @@ -85,9 +85,7 @@ pub unsafe fn run_prefix_test( let mut total_elapsed = 0.0; for i in 0..n_iter { let mut commands = runner.commands(); - commands.write_timestamp(0); stage.record(&mut commands, &code, &binding); - commands.write_timestamp(1); if i == 0 || config.verify_all { commands.cmd_buf.memory_barrier(); commands.download(&out_buf); @@ -159,12 +157,14 @@ impl PrefixStage { let n_workgroups = (self.n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG; commands.cmd_buf.clear_buffer(&self.state_buf, None); commands.cmd_buf.memory_barrier(); - commands.cmd_buf.dispatch( + let mut pass = commands.compute_pass(0, 1); + pass.dispatch( &code.pipeline, &bindings.descriptor_set, (n_workgroups as u32, 1, 1), (WG_SIZE as u32, 1, 1), ); + pass.end(); // One thing that's missing here is registering the buffers so // they can be safely dropped by Rust code before the execution // of the command buffer completes. diff --git a/tests/src/prefix_tree.rs b/tests/src/prefix_tree.rs index 24be2af..3c9c813 100644 --- a/tests/src/prefix_tree.rs +++ b/tests/src/prefix_tree.rs @@ -66,9 +66,7 @@ pub unsafe fn run_prefix_test(runner: &mut Runner, config: &Config) -> TestResul let mut commands = runner.commands(); commands.cmd_buf.copy_buffer(&data_buf, &out_buf.dev_buf); commands.cmd_buf.memory_barrier(); - commands.write_timestamp(0); stage.record(&mut commands, &code, &binding); - commands.write_timestamp(1); if i == 0 || config.verify_all { commands.cmd_buf.memory_barrier(); commands.download(&out_buf); @@ -175,33 +173,35 @@ impl PrefixTreeStage { code: &PrefixTreeCode, bindings: &PrefixTreeBinding, ) { + let mut pass = commands.compute_pass(0, 1); let n = self.tmp_bufs.len(); for i in 0..n { let n_workgroups = self.sizes[i + 1]; - commands.cmd_buf.dispatch( + pass.dispatch( &code.reduce_pipeline, &bindings.descriptor_sets[i], (n_workgroups as u32, 1, 1), (WG_SIZE as u32, 1, 1), ); - commands.cmd_buf.memory_barrier(); + pass.memory_barrier(); } - commands.cmd_buf.dispatch( + pass.dispatch( &code.root_pipeline, &bindings.descriptor_sets[n], (1, 1, 1), (WG_SIZE as u32, 1, 1), ); for i in (0..n).rev() { - commands.cmd_buf.memory_barrier(); + pass.memory_barrier(); let n_workgroups = self.sizes[i + 1]; - commands.cmd_buf.dispatch( + pass.dispatch( &code.scan_pipeline, &bindings.descriptor_sets[2 * n - i], (n_workgroups as u32, 1, 1), (WG_SIZE as u32, 1, 1), ); } + pass.end(); } } diff --git a/tests/src/transform.rs b/tests/src/transform.rs index 6edcc3f..43bfc67 100644 --- a/tests/src/transform.rs +++ b/tests/src/transform.rs @@ -61,9 +61,9 @@ pub unsafe fn transform_test(runner: &mut Runner, config: &Config) -> TestResult let n_iter = config.n_iter; for i in 0..n_iter { let mut commands = runner.commands(); - commands.write_timestamp(0); - stage.record(&mut commands.cmd_buf, &code, &binding, n_elements); - commands.write_timestamp(1); + let mut pass = commands.compute_pass(0, 1); + stage.record(&mut pass, &code, &binding, n_elements); + pass.end(); if i == 0 || config.verify_all { commands.cmd_buf.memory_barrier(); commands.download(&memory);