Merge pull request #32 from eliasnaur/master

Clean up, add maximum workgroup size control
This commit is contained in:
Elias Naur 2020-09-16 08:44:48 +02:00 committed by GitHub
commit 02a6bfbb6c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 13 additions and 37 deletions

View file

@ -15,7 +15,7 @@
#include "setup.h" #include "setup.h"
#define LG_BACKDROP_WG 8 #define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
#define BACKDROP_WG (1 << LG_BACKDROP_WG) #define BACKDROP_WG (1 << LG_BACKDROP_WG)
layout(local_size_x = BACKDROP_WG, local_size_y = 1) in; layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;

View file

@ -17,8 +17,6 @@ layout(set = 0, binding = 0) buffer AnnotatedBuf {
layout(set = 0, binding = 1) buffer AllocBuf { layout(set = 0, binding = 1) buffer AllocBuf {
uint n_elements; // paths uint n_elements; // paths
// Will be incremented atomically to claim tiles
uint tile_ix;
uint alloc; uint alloc;
}; };
@ -42,8 +40,6 @@ shared uint bitmaps[N_SLICE][N_TILE];
shared uint count[N_SLICE][N_TILE]; shared uint count[N_SLICE][N_TILE];
shared uint sh_chunk_start[N_TILE]; shared uint sh_chunk_start[N_TILE];
shared float sh_right_edge[N_TILE];
void main() { void main() {
uint my_n_elements = n_elements; uint my_n_elements = n_elements;
uint my_partition = gl_WorkGroupID.x; uint my_partition = gl_WorkGroupID.x;

Binary file not shown.

View file

@ -41,7 +41,7 @@ layout(set = 0, binding = 4) buffer PtclBuf {
#include "tile.h" #include "tile.h"
#include "ptcl.h" #include "ptcl.h"
#define LG_N_PART_READ 8 #define LG_N_PART_READ (7 + LG_WG_FACTOR)
#define N_PART_READ (1 << LG_N_PART_READ) #define N_PART_READ (1 << LG_N_PART_READ)
shared uint sh_elements[N_TILE]; shared uint sh_elements[N_TILE];

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -108,27 +108,9 @@ void main() {
PathStrokeLine line; PathStrokeLine line;
float dx; float dx;
switch (tag) { switch (tag) {
/*
case PathSeg_FillLine:
case PathSeg_StrokeLine:
line = PathSeg_StrokeLine_read(ref);
xmin = min(line.p0.x, line.p1.x) - line.stroke.x;
xmax = max(line.p0.x, line.p1.x) + line.stroke.x;
ymin = min(line.p0.y, line.p1.y) - line.stroke.y;
ymax = max(line.p0.y, line.p1.y) + line.stroke.y;
dx = line.p1.x - line.p0.x;
float dy = line.p1.y - line.p0.y;
// Set up for per-scanline coverage formula, below.
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
c = (line.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + line.stroke.y)) * SX;
b = invslope; // Note: assumes square tiles, otherwise scale.
a = (line.p0.x - (line.p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
break;
*/
case PathSeg_FillCubic: case PathSeg_FillCubic:
case PathSeg_StrokeCubic: case PathSeg_StrokeCubic:
PathStrokeCubic cubic = PathSeg_StrokeCubic_read(ref); PathStrokeCubic cubic = PathSeg_StrokeCubic_read(ref);
// Commented out code is for computing error bound on conversion to quadratics
vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3; vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
float err = err_v.x * err_v.x + err_v.y * err_v.y; float err = err_v.x * err_v.x + err_v.y * err_v.y;
// The number of quadratics. // The number of quadratics.

View file

@ -3,6 +3,11 @@
// Much of this will be made dynamic in various ways, but for now it's easiest // Much of this will be made dynamic in various ways, but for now it's easiest
// to hardcode and keep all in one place. // to hardcode and keep all in one place.
// A LG_WG_FACTOR of n scales workgroup sizes by 2^n. Use 0 for a
// maximum workgroup size of 128, or 1 for a maximum size of 256.
#define LG_WG_FACTOR 1
#define WG_FACTOR (1<<LG_WG_FACTOR)
// TODO: compute all these // TODO: compute all these
#define WIDTH_IN_TILES 128 #define WIDTH_IN_TILES 128
@ -10,21 +15,14 @@
#define TILE_WIDTH_PX 16 #define TILE_WIDTH_PX 16
#define TILE_HEIGHT_PX 16 #define TILE_HEIGHT_PX 16
// TODO: make the image size dynamic.
#define IMAGE_WIDTH (WIDTH_IN_TILES*TILE_WIDTH_PX)
#define IMAGE_HEIGHT (HEIGHT_IN_TILES*TILE_HEIGHT_PX)
#define PTCL_INITIAL_ALLOC 1024 #define PTCL_INITIAL_ALLOC 1024
// Stuff for new algorithm follows; some of the above should get
// deleted.
// These should probably be renamed and/or reworked. In the binning // These should probably be renamed and/or reworked. In the binning
// kernel, they represent the number of bins. Also, the workgroup size // kernel, they represent the number of bins. Also, the workgroup size
// of that kernel is equal to the number of bins, but should probably // of that kernel is equal to the number of bins, but should probably
// be more flexible (it's 512 in the K&L paper). // be more flexible (it's 512 in the K&L paper).
#define N_TILE_X 16 #define N_TILE_X 16
#define N_TILE_Y 16 #define N_TILE_Y (8 * WG_FACTOR)
#define N_TILE (N_TILE_X * N_TILE_Y) #define N_TILE (N_TILE_X * N_TILE_Y)
#define LG_N_TILE 8 #define LG_N_TILE (7 + LG_WG_FACTOR)
#define N_SLICE (N_TILE / 32) #define N_SLICE (N_TILE / 32)

View file

@ -5,7 +5,7 @@
#include "setup.h" #include "setup.h"
#define LG_TILE_ALLOC_WG 8 #define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG) #define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in; layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;

Binary file not shown.

View file

@ -227,14 +227,14 @@ impl<D: Device> Renderer<D> {
&[], &[],
)?; )?;
let bin_alloc_buf_host = device.create_buffer(12, host)?; let bin_alloc_buf_host = device.create_buffer(8, host)?;
let bin_alloc_buf_dev = device.create_buffer(12, dev)?; let bin_alloc_buf_dev = device.create_buffer(8, dev)?;
// TODO: constants // TODO: constants
let bin_alloc_start = ((n_paths + 255) & !255) * 8; let bin_alloc_start = ((n_paths + 255) & !255) * 8;
device.write_buffer( device.write_buffer(
&bin_alloc_buf_host, &bin_alloc_buf_host,
&[n_paths as u32, 0, bin_alloc_start as u32], &[n_paths as u32, bin_alloc_start as u32],
)?; )?;
let bin_code = include_bytes!("../shader/binning.spv"); let bin_code = include_bytes!("../shader/binning.spv");
let bin_pipeline = device.create_simple_compute_pipeline(bin_code, 3, 0)?; let bin_pipeline = device.create_simple_compute_pipeline(bin_code, 3, 0)?;