Fix performance issues

Use larger workgroup for tile initialization (utilization was poor).
Provide correct element count to coarse rasterizer.
This commit is contained in:
Raph Levien 2020-06-03 15:32:58 -07:00
parent ff8cee059c
commit 63ba45c774
3 changed files with 3 additions and 3 deletions

View file

@ -5,7 +5,7 @@
#include "setup.h" #include "setup.h"
#define LG_TILE_ALLOC_WG 5 #define LG_TILE_ALLOC_WG 8
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG) #define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in; layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;

Binary file not shown.

View file

@ -247,7 +247,7 @@ impl<D: Device> Renderer<D> {
let coarse_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC; let coarse_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
device.write_buffer( device.write_buffer(
&coarse_alloc_buf_host, &coarse_alloc_buf_host,
&[n_elements as u32, coarse_alloc_start as u32], &[n_paths as u32, coarse_alloc_start as u32],
)?; )?;
let coarse_code = include_bytes!("../shader/coarse.spv"); let coarse_code = include_bytes!("../shader/coarse.spv");
let coarse_pipeline = device.create_simple_compute_pipeline(coarse_code, 5, 0)?; let coarse_pipeline = device.create_simple_compute_pipeline(coarse_code, 5, 0)?;
@ -323,7 +323,7 @@ impl<D: Device> Renderer<D> {
cmd_buf.dispatch( cmd_buf.dispatch(
&self.tile_pipeline, &self.tile_pipeline,
&self.tile_ds, &self.tile_ds,
(((self.n_paths + 31) / 32) as u32, 1, 1), (((self.n_paths + 255) / 256) as u32, 1, 1),
); );
cmd_buf.write_timestamp(&query_pool, 2); cmd_buf.write_timestamp(&query_pool, 2);
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();