mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 20:51:29 +11:00
shader: introduce a crude setting for adjusting the maximum workgroup size
Both the Vulkan and OpenGL ES spec allow implementations to limit workgroups to 128 threads. Add a LG_WG_FACTOR setting for easy switching between 128 and 256 threads, with 256 being kept as the default setting. Manually tested that LG_WG_FACTOR = 0 (128 threads) works as expected. Signed-off-by: Elias Naur <mail@eliasnaur.com>
This commit is contained in:
parent
326f7f0d03
commit
ac3ac3ddff
|
@ -15,7 +15,7 @@
|
||||||
|
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
#define LG_BACKDROP_WG 8
|
#define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
|
||||||
#define BACKDROP_WG (1 << LG_BACKDROP_WG)
|
#define BACKDROP_WG (1 << LG_BACKDROP_WG)
|
||||||
|
|
||||||
layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
|
layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
|
||||||
|
|
|
@ -41,7 +41,7 @@ layout(set = 0, binding = 4) buffer PtclBuf {
|
||||||
#include "tile.h"
|
#include "tile.h"
|
||||||
#include "ptcl.h"
|
#include "ptcl.h"
|
||||||
|
|
||||||
#define LG_N_PART_READ 8
|
#define LG_N_PART_READ (7 + LG_WG_FACTOR)
|
||||||
#define N_PART_READ (1 << LG_N_PART_READ)
|
#define N_PART_READ (1 << LG_N_PART_READ)
|
||||||
|
|
||||||
shared uint sh_elements[N_TILE];
|
shared uint sh_elements[N_TILE];
|
||||||
|
|
Binary file not shown.
|
@ -3,6 +3,11 @@
|
||||||
// Much of this will be made dynamic in various ways, but for now it's easiest
|
// Much of this will be made dynamic in various ways, but for now it's easiest
|
||||||
// to hardcode and keep all in one place.
|
// to hardcode and keep all in one place.
|
||||||
|
|
||||||
|
// A LG_WG_FACTOR of n scales workgroup sizes by 2^n. Use 0 for a
|
||||||
|
// maximum workgroup size of 128, or 1 for a maximum size of 256.
|
||||||
|
#define LG_WG_FACTOR 1
|
||||||
|
#define WG_FACTOR (1<<LG_WG_FACTOR)
|
||||||
|
|
||||||
// TODO: compute all these
|
// TODO: compute all these
|
||||||
|
|
||||||
#define WIDTH_IN_TILES 128
|
#define WIDTH_IN_TILES 128
|
||||||
|
@ -17,7 +22,7 @@
|
||||||
// of that kernel is equal to the number of bins, but should probably
|
// of that kernel is equal to the number of bins, but should probably
|
||||||
// be more flexible (it's 512 in the K&L paper).
|
// be more flexible (it's 512 in the K&L paper).
|
||||||
#define N_TILE_X 16
|
#define N_TILE_X 16
|
||||||
#define N_TILE_Y 16
|
#define N_TILE_Y (8 * WG_FACTOR)
|
||||||
#define N_TILE (N_TILE_X * N_TILE_Y)
|
#define N_TILE (N_TILE_X * N_TILE_Y)
|
||||||
#define LG_N_TILE 8
|
#define LG_N_TILE (7 + LG_WG_FACTOR)
|
||||||
#define N_SLICE (N_TILE / 32)
|
#define N_SLICE (N_TILE / 32)
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
|
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
#define LG_TILE_ALLOC_WG 8
|
#define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
|
||||||
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
|
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
|
||||||
|
|
||||||
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
|
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
|
||||||
|
|
Loading…
Reference in a new issue