Fixes for mac

Fix incorrect workgroup sizes, and change strategy for assigning binding
numbers; ultimately we should get correct values for those from shader
compilation, but this works for now.
This commit is contained in:
Raph Levien 2021-12-03 20:25:42 -08:00
parent 395d400cd1
commit 3ee81ad5a9
4 changed files with 39 additions and 32 deletions

View file

@ -277,7 +277,8 @@ impl crate::backend::Device for MtlDevice {
} }
unsafe fn destroy_image(&self, _image: &Self::Image) -> Result<(), Error> { unsafe fn destroy_image(&self, _image: &Self::Image) -> Result<(), Error> {
todo!() // TODO figure out what we want to do here
Ok(())
} }
unsafe fn create_compute_pipeline( unsafe fn create_compute_pipeline(
@ -429,7 +430,7 @@ impl crate::backend::CmdBuf<MtlDevice> for CmdBuf {
encoder.set_buffer(buf_ix, Some(&buffer.buffer), 0); encoder.set_buffer(buf_ix, Some(&buffer.buffer), 0);
buf_ix += 1; buf_ix += 1;
} }
let mut img_ix = 0; let mut img_ix = buf_ix;
for image in &descriptor_set.images { for image in &descriptor_set.images {
encoder.set_texture(img_ix, Some(&image.texture)); encoder.set_texture(img_ix, Some(&image.texture));
img_ix += 1; img_ix += 1;

View file

@ -264,22 +264,23 @@ fn main() -> Result<(), Error> {
submitted.wait()?; submitted.wait()?;
println!("elapsed = {:?}", start.elapsed()); println!("elapsed = {:?}", start.elapsed());
let ts = session.fetch_query_pool(&query_pool).unwrap(); let ts = session.fetch_query_pool(&query_pool).unwrap();
println!("Element kernel time: {:.3}ms", ts[0] * 1e3); if !ts.is_empty() {
println!( println!("Element kernel time: {:.3}ms", ts[0] * 1e3);
"Tile allocation kernel time: {:.3}ms", println!(
(ts[1] - ts[0]) * 1e3 "Tile allocation kernel time: {:.3}ms",
); (ts[1] - ts[0]) * 1e3
println!("Coarse path kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3); );
println!("Backdrop kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3); println!("Coarse path kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3);
println!("Binning kernel time: {:.3}ms", (ts[4] - ts[3]) * 1e3); println!("Backdrop kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
println!("Coarse raster kernel time: {:.3}ms", (ts[5] - ts[4]) * 1e3); println!("Binning kernel time: {:.3}ms", (ts[4] - ts[3]) * 1e3);
println!("Render kernel time: {:.3}ms", (ts[6] - ts[5]) * 1e3); println!("Coarse raster kernel time: {:.3}ms", (ts[5] - ts[4]) * 1e3);
println!("Render kernel time: {:.3}ms", (ts[6] - ts[5]) * 1e3);
}
/* /*
let mut data: Vec<u32> = Default::default(); let mut data: Vec<u32> = Default::default();
renderer.memory_buf_dev.read(&mut data).unwrap(); renderer.memory_buf_dev.read(&mut data).unwrap();
piet_gpu::dump_k1_data(&data[2..]); piet_gpu::dump_k1_data(&data[2..]);
trace_ptcl(&data);
*/ */
let mut img_data: Vec<u8> = Default::default(); let mut img_data: Vec<u8> = Default::default();

View file

@ -79,17 +79,19 @@ fn main() -> Result<(), Error> {
if let Some(submitted) = submitted[frame_idx].take() { if let Some(submitted) = submitted[frame_idx].take() {
cmd_bufs[frame_idx] = submitted.wait().unwrap(); cmd_bufs[frame_idx] = submitted.wait().unwrap();
let ts = session.fetch_query_pool(&query_pools[frame_idx]).unwrap(); let ts = session.fetch_query_pool(&query_pools[frame_idx]).unwrap();
info_string = format!( if !ts.is_empty() {
"{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms", info_string = format!(
ts[6] * 1e3, "{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms",
ts[0] * 1e3, ts[6] * 1e3,
(ts[1] - ts[0]) * 1e3, ts[0] * 1e3,
(ts[2] - ts[1]) * 1e3, (ts[1] - ts[0]) * 1e3,
(ts[3] - ts[2]) * 1e3, (ts[2] - ts[1]) * 1e3,
(ts[4] - ts[3]) * 1e3, (ts[3] - ts[2]) * 1e3,
(ts[5] - ts[4]) * 1e3, (ts[4] - ts[3]) * 1e3,
(ts[6] - ts[5]) * 1e3, (ts[5] - ts[4]) * 1e3,
); (ts[6] - ts[5]) * 1e3,
);
}
} }
let mut ctx = PietGpuRenderContext::new(); let mut ctx = PietGpuRenderContext::new();

View file

@ -81,6 +81,7 @@ pub struct Renderer {
backdrop_pipeline: Pipeline, backdrop_pipeline: Pipeline,
backdrop_ds: DescriptorSet, backdrop_ds: DescriptorSet,
backdrop_y: u32,
bin_pipeline: Pipeline, bin_pipeline: Pipeline,
bin_ds: DescriptorSet, bin_ds: DescriptorSet,
@ -170,12 +171,13 @@ impl Renderer {
let path_ds = session let path_ds = session
.create_simple_descriptor_set(&path_pipeline, &[&memory_buf_dev, &config_buf])?; .create_simple_descriptor_set(&path_pipeline, &[&memory_buf_dev, &config_buf])?;
let backdrop_code = if session.gpu_info().workgroup_limits.max_invocations >= 1024 { let (backdrop_code, backdrop_y) =
include_shader!(session, "../shader/gen/backdrop_lg") if session.gpu_info().workgroup_limits.max_invocations >= 1024 {
} else { (include_shader!(session, "../shader/gen/backdrop_lg"), 4)
println!("using small workgroup backdrop kernel"); } else {
include_shader!(session, "../shader/gen/backdrop") println!("using small workgroup backdrop kernel");
}; (include_shader!(session, "../shader/gen/backdrop"), 1)
};
let backdrop_pipeline = session let backdrop_pipeline = session
.create_compute_pipeline(backdrop_code, &[BindType::Buffer, BindType::BufReadOnly])?; .create_compute_pipeline(backdrop_code, &[BindType::Buffer, BindType::BufReadOnly])?;
let backdrop_ds = session let backdrop_ds = session
@ -243,6 +245,7 @@ impl Renderer {
path_ds, path_ds,
backdrop_pipeline, backdrop_pipeline,
backdrop_ds, backdrop_ds,
backdrop_y,
bin_pipeline, bin_pipeline,
bin_ds, bin_ds,
coarse_pipeline, coarse_pipeline,
@ -367,7 +370,7 @@ impl Renderer {
&self.backdrop_pipeline, &self.backdrop_pipeline,
&self.backdrop_ds, &self.backdrop_ds,
(((self.n_paths + 255) / 256) as u32, 1, 1), (((self.n_paths + 255) / 256) as u32, 1, 1),
(256, 1, 1), (256, self.backdrop_y, 1),
); );
cmd_buf.write_timestamp(&query_pool, 4); cmd_buf.write_timestamp(&query_pool, 4);
// Note: this barrier is not needed as an actual dependency between // Note: this barrier is not needed as an actual dependency between
@ -390,7 +393,7 @@ impl Renderer {
(self.height as u32 + 255) / 256, (self.height as u32 + 255) / 256,
1, 1,
), ),
(256, 256, 1), (256, 1, 1),
); );
cmd_buf.write_timestamp(&query_pool, 6); cmd_buf.write_timestamp(&query_pool, 6);
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();