Fix performance issues

Use larger workgroup for tile initialization (utilization was poor). Provide correct element count to coarse rasterizer.
2025-01-09 20:31:29 +11:00 · 2020-06-03 15:32:58 -07:00 · 2020-06-03 15:32:58 -07:00 · 63ba45c774
parent ff8cee059c
commit 63ba45c774
3 changed files with 3 additions and 3 deletions
--- a/piet-gpu/shader/tile_alloc.comp
+++ b/piet-gpu/shader/tile_alloc.comp
@ -5,7 +5,7 @@
 #include "setup.h"
-#define LG_TILE_ALLOC_WG 5
+#define LG_TILE_ALLOC_WG 8
 #define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
 layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
--- a/piet-gpu/shader/tile_alloc.spv
+++ b/piet-gpu/shader/tile_alloc.spv
--- a/piet-gpu/src/lib.rs
+++ b/piet-gpu/src/lib.rs
@ -247,7 +247,7 @@ impl<D: Device> Renderer<D> {
        let coarse_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
        device.write_buffer(
            &coarse_alloc_buf_host,
-            &[n_elements as u32, coarse_alloc_start as u32],
+            &[n_paths as u32, coarse_alloc_start as u32],
        )?;
        let coarse_code = include_bytes!("../shader/coarse.spv");
        let coarse_pipeline = device.create_simple_compute_pipeline(coarse_code, 5, 0)?;
@ -323,7 +323,7 @@ impl<D: Device> Renderer<D> {
        cmd_buf.dispatch(
            &self.tile_pipeline,
            &self.tile_ds,
-            (((self.n_paths + 31) / 32) as u32, 1, 1),
+            (((self.n_paths + 255) / 256) as u32, 1, 1),
        );
        cmd_buf.write_timestamp(&query_pool, 2);
        cmd_buf.memory_barrier();