shader: introduce a crude setting for adjusting the maximum workgroup size

Both the Vulkan and OpenGL ES spec allow implementations to limit workgroups to
128 threads. Add a LG_WG_FACTOR setting for easy switching between 128 and 256
threads, with 256 being kept as the default setting.

Manually tested that LG_WG_FACTOR = 0 (128 threads) works as expected.

Signed-off-by: Elias Naur <mail@eliasnaur.com>
This commit is contained in:
Elias Naur 2020-09-13 12:58:47 +02:00
parent 326f7f0d03
commit ac3ac3ddff
5 changed files with 10 additions and 5 deletions

View file

@ -15,7 +15,7 @@
#include "setup.h" #include "setup.h"
#define LG_BACKDROP_WG 8 #define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
#define BACKDROP_WG (1 << LG_BACKDROP_WG) #define BACKDROP_WG (1 << LG_BACKDROP_WG)
layout(local_size_x = BACKDROP_WG, local_size_y = 1) in; layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;

View file

@ -41,7 +41,7 @@ layout(set = 0, binding = 4) buffer PtclBuf {
#include "tile.h" #include "tile.h"
#include "ptcl.h" #include "ptcl.h"
#define LG_N_PART_READ 8 #define LG_N_PART_READ (7 + LG_WG_FACTOR)
#define N_PART_READ (1 << LG_N_PART_READ) #define N_PART_READ (1 << LG_N_PART_READ)
shared uint sh_elements[N_TILE]; shared uint sh_elements[N_TILE];

Binary file not shown.

View file

@ -3,6 +3,11 @@
// Much of this will be made dynamic in various ways, but for now it's easiest // Much of this will be made dynamic in various ways, but for now it's easiest
// to hardcode and keep all in one place. // to hardcode and keep all in one place.
// A LG_WG_FACTOR of n scales workgroup sizes by 2^n. Use 0 for a
// maximum workgroup size of 128, or 1 for a maximum size of 256.
#define LG_WG_FACTOR 1
#define WG_FACTOR (1<<LG_WG_FACTOR)
// TODO: compute all these // TODO: compute all these
#define WIDTH_IN_TILES 128 #define WIDTH_IN_TILES 128
@ -17,7 +22,7 @@
// of that kernel is equal to the number of bins, but should probably // of that kernel is equal to the number of bins, but should probably
// be more flexible (it's 512 in the K&L paper). // be more flexible (it's 512 in the K&L paper).
#define N_TILE_X 16 #define N_TILE_X 16
#define N_TILE_Y 16 #define N_TILE_Y (8 * WG_FACTOR)
#define N_TILE (N_TILE_X * N_TILE_Y) #define N_TILE (N_TILE_X * N_TILE_Y)
#define LG_N_TILE 8 #define LG_N_TILE (7 + LG_WG_FACTOR)
#define N_SLICE (N_TILE / 32) #define N_SLICE (N_TILE / 32)

View file

@ -5,7 +5,7 @@
#include "setup.h" #include "setup.h"
#define LG_TILE_ALLOC_WG 8 #define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG) #define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in; layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;