diff --git a/piet-gpu/bin/winit.rs b/piet-gpu/bin/winit.rs index 3568732..0b471b3 100644 --- a/piet-gpu/bin/winit.rs +++ b/piet-gpu/bin/winit.rs @@ -37,7 +37,7 @@ fn main() -> Result<(), Error> { .map(|_| device.create_cmd_buf()) .collect::, Error>>()?; let query_pools = (0..NUM_FRAMES) - .map(|_| device.create_query_pool(5)) + .map(|_| device.create_query_pool(8)) .collect::, Error>>()?; let mut ctx = PietGpuRenderContext::new(); @@ -70,13 +70,17 @@ fn main() -> Result<(), Error> { if current_frame >= NUM_FRAMES { device.wait_and_reset(&[frame_fences[frame_idx]]).unwrap(); - let timestamps = device.reap_query_pool(query_pool).unwrap(); + let ts = device.reap_query_pool(query_pool).unwrap(); window.set_title(&format!( - "e: {:.3}ms, b: {:.3}ms, c: {:.3}ms, f: {:.3}ms", - timestamps[0] * 1e3, - (timestamps[1] - timestamps[0]) * 1e3, - (timestamps[2] - timestamps[1]) * 1e3, - (timestamps[3] - timestamps[2]) * 1e3, + "{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms", + ts[6] * 1e3, + ts[0] * 1e3, + (ts[1] - ts[0]) * 1e3, + (ts[2] - ts[1]) * 1e3, + (ts[3] - ts[2]) * 1e3, + (ts[4] - ts[3]) * 1e3, + (ts[5] - ts[4]) * 1e3, + (ts[6] - ts[5]) * 1e3, )); } @@ -84,8 +88,6 @@ fn main() -> Result<(), Error> { let swap_image = swapchain.image(image_idx); let cmd_buf = &mut cmd_buffers[frame_idx]; cmd_buf.begin(); - cmd_buf.reset_query_pool(&query_pool); - renderer.record(cmd_buf, &query_pool); // Image -> Swapchain diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index e00320b..a88bc3d 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -8,9 +8,11 @@ #extension GL_GOOGLE_include_directive : enable #extension GL_KHR_shader_subgroup_basic : enable +#include "setup.h" + #define CHUNK 8 -#define CHUNK_DY (16 / CHUNK) -layout(local_size_x = 16, local_size_y = 2) in; +#define CHUNK_DY (TILE_HEIGHT_PX / CHUNK) +layout(local_size_x = TILE_WIDTH_PX, local_size_y = CHUNK_DY) in; // Same concern that this should be readonly as in kernel 3. layout(set = 0, binding = 0) buffer PtclBuf { @@ -26,8 +28,6 @@ layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image; #include "ptcl.h" #include "tile.h" -#include "setup.h" - void main() { uint tile_ix = gl_WorkGroupID.y * WIDTH_IN_TILES + gl_WorkGroupID.x; CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC); diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index 5813ed3..8c720a2 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -263,8 +263,8 @@ impl Renderer { let k4_code = include_bytes!("../shader/kernel4.spv"); let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 2, 1)?; let k4_ds = device.create_descriptor_set( - &k4_pipeline, - &[&ptcl_buf, &tile_buf], + &k4_pipeline, + &[&ptcl_buf, &tile_buf], &[&image_dev] )?;