From 63ba45c774d55434bcb70f9c002de89febab7072 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Wed, 3 Jun 2020 15:32:58 -0700 Subject: [PATCH] Fix performance issues Use larger workgroup for tile initialization (utilization was poor). Provide correct element count to coarse rasterizer. --- piet-gpu/shader/tile_alloc.comp | 2 +- piet-gpu/shader/tile_alloc.spv | Bin 8752 -> 8736 bytes piet-gpu/src/lib.rs | 4 ++-- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/piet-gpu/shader/tile_alloc.comp b/piet-gpu/shader/tile_alloc.comp index e1dff63..0d25274 100644 --- a/piet-gpu/shader/tile_alloc.comp +++ b/piet-gpu/shader/tile_alloc.comp @@ -5,7 +5,7 @@ #include "setup.h" -#define LG_TILE_ALLOC_WG 5 +#define LG_TILE_ALLOC_WG 8 #define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG) layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in; diff --git a/piet-gpu/shader/tile_alloc.spv b/piet-gpu/shader/tile_alloc.spv index 204cc903c46ce5145057abaf6954277ec7c912ae..37276476010468de9292b5620d8e2973b61fb095 100644 GIT binary patch delta 1298 zcmZ9KOHWf#6ovO~Z3}7Ygn>zoq9MwopkgiZ5TsgPC8)Ll~!vO(|dJm}mgvvFa z?UQB1yXDVI+m`RQxcTzSjjze9S7%~U$CP$Ro@vAUScvacoXK6hQ=G{ir=H2feCj_w z9q_!~;hkBdntI(n*ekI|)-L<0^0*rH33tmzg|jkpCM;@)<@@}BJ(+sH5Db-Tz{hnB zs*ec2mH(t;KFc2!N2Akk&=((=L|bbd3)Mj#N}FYT-_#T2p~ z@DV*;L|Q?(OP(5H?gcLh^MWj?yUaylba$Cc!f2Dqqj#NELv#g~g?Wu{!A-9SH?^RE zRYY_7Gs1jZnz5E}KEqXcJ`w(xE;T1VFXKo2S}X3qN~h`z!qmE+*M*sDLo={7fGJ0|b*wx+?W=6+v3Detsx!c0zpy&0VId*VSd{)IePY z{eTK%enI_}?)m{81fl2AZ^P6KoAti$I_$Ofw?2%ljWpF{+Ur9|LIeN&>!z7ZTln%| zx^}iJ387FdF3#tdR|-ojp_cz4w3UV8{rvLky@jU%Wm_0Vdt6v5JPcd>>_5CVoHu82 zrBKYzFRnh?sxw;NQ`ek~ReI`jNitVisDD?(wdG9XSW;V_XlzYd%TF4c`?8d;qwIc4 zG+^7{r@{0aS~&VUuouu;v0X%$v2&GAjk%=c1cvq#wij#n)x5ik@51jS(Qi2?iQIfA zspg~q@v)Q-^Db{gsN|boZSOvS*vWB*AEY}^7Tm!(3C?kh84NXHA2wTV*^wWx99CN3@HXHdJu(jhSoCZ%NyuXfNp0 Renderer { let coarse_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC; device.write_buffer( &coarse_alloc_buf_host, - &[n_elements as u32, coarse_alloc_start as u32], + &[n_paths as u32, coarse_alloc_start as u32], )?; let coarse_code = include_bytes!("../shader/coarse.spv"); let coarse_pipeline = device.create_simple_compute_pipeline(coarse_code, 5, 0)?; @@ -323,7 +323,7 @@ impl Renderer { cmd_buf.dispatch( &self.tile_pipeline, &self.tile_ds, - (((self.n_paths + 31) / 32) as u32, 1, 1), + (((self.n_paths + 255) / 256) as u32, 1, 1), ); cmd_buf.write_timestamp(&query_pool, 2); cmd_buf.memory_barrier();