diff --git a/piet-gpu/shader/backdrop.comp b/piet-gpu/shader/backdrop.comp index afe4d62..beba683 100644 --- a/piet-gpu/shader/backdrop.comp +++ b/piet-gpu/shader/backdrop.comp @@ -15,7 +15,7 @@ #include "setup.h" -#define LG_BACKDROP_WG 8 +#define LG_BACKDROP_WG (7 + LG_WG_FACTOR) #define BACKDROP_WG (1 << LG_BACKDROP_WG) layout(local_size_x = BACKDROP_WG, local_size_y = 1) in; diff --git a/piet-gpu/shader/binning.comp b/piet-gpu/shader/binning.comp index 3ed507b..046c4fb 100644 --- a/piet-gpu/shader/binning.comp +++ b/piet-gpu/shader/binning.comp @@ -17,8 +17,6 @@ layout(set = 0, binding = 0) buffer AnnotatedBuf { layout(set = 0, binding = 1) buffer AllocBuf { uint n_elements; // paths - // Will be incremented atomically to claim tiles - uint tile_ix; uint alloc; }; @@ -42,8 +40,6 @@ shared uint bitmaps[N_SLICE][N_TILE]; shared uint count[N_SLICE][N_TILE]; shared uint sh_chunk_start[N_TILE]; -shared float sh_right_edge[N_TILE]; - void main() { uint my_n_elements = n_elements; uint my_partition = gl_WorkGroupID.x; diff --git a/piet-gpu/shader/binning.spv b/piet-gpu/shader/binning.spv index 1b31cd1..98f1d4e 100644 Binary files a/piet-gpu/shader/binning.spv and b/piet-gpu/shader/binning.spv differ diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index 56b3c07..57cbc8b 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -41,7 +41,7 @@ layout(set = 0, binding = 4) buffer PtclBuf { #include "tile.h" #include "ptcl.h" -#define LG_N_PART_READ 8 +#define LG_N_PART_READ (7 + LG_WG_FACTOR) #define N_PART_READ (1 << LG_N_PART_READ) shared uint sh_elements[N_TILE]; diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index 692b6c1..757b8b2 100644 Binary files a/piet-gpu/shader/coarse.spv and b/piet-gpu/shader/coarse.spv differ diff --git a/piet-gpu/shader/elements.spv b/piet-gpu/shader/elements.spv index 5bd0650..54c5cc0 100644 Binary files a/piet-gpu/shader/elements.spv and b/piet-gpu/shader/elements.spv differ diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index 52ba572..6f50746 100644 Binary files a/piet-gpu/shader/kernel4.spv and b/piet-gpu/shader/kernel4.spv differ diff --git a/piet-gpu/shader/path_coarse.comp b/piet-gpu/shader/path_coarse.comp index b17f3e3..658af0e 100644 --- a/piet-gpu/shader/path_coarse.comp +++ b/piet-gpu/shader/path_coarse.comp @@ -108,27 +108,9 @@ void main() { PathStrokeLine line; float dx; switch (tag) { - /* - case PathSeg_FillLine: - case PathSeg_StrokeLine: - line = PathSeg_StrokeLine_read(ref); - xmin = min(line.p0.x, line.p1.x) - line.stroke.x; - xmax = max(line.p0.x, line.p1.x) + line.stroke.x; - ymin = min(line.p0.y, line.p1.y) - line.stroke.y; - ymax = max(line.p0.y, line.p1.y) + line.stroke.y; - dx = line.p1.x - line.p0.x; - float dy = line.p1.y - line.p0.y; - // Set up for per-scanline coverage formula, below. - float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy; - c = (line.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + line.stroke.y)) * SX; - b = invslope; // Note: assumes square tiles, otherwise scale. - a = (line.p0.x - (line.p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX; - break; - */ case PathSeg_FillCubic: case PathSeg_StrokeCubic: PathStrokeCubic cubic = PathSeg_StrokeCubic_read(ref); - // Commented out code is for computing error bound on conversion to quadratics vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3; float err = err_v.x * err_v.x + err_v.y * err_v.y; // The number of quadratics. diff --git a/piet-gpu/shader/setup.h b/piet-gpu/shader/setup.h index 75bed8e..6bdde03 100644 --- a/piet-gpu/shader/setup.h +++ b/piet-gpu/shader/setup.h @@ -3,6 +3,11 @@ // Much of this will be made dynamic in various ways, but for now it's easiest // to hardcode and keep all in one place. +// A LG_WG_FACTOR of n scales workgroup sizes by 2^n. Use 0 for a +// maximum workgroup size of 128, or 1 for a maximum size of 256. +#define LG_WG_FACTOR 1 +#define WG_FACTOR (1< Renderer { &[], )?; - let bin_alloc_buf_host = device.create_buffer(12, host)?; - let bin_alloc_buf_dev = device.create_buffer(12, dev)?; + let bin_alloc_buf_host = device.create_buffer(8, host)?; + let bin_alloc_buf_dev = device.create_buffer(8, dev)?; // TODO: constants let bin_alloc_start = ((n_paths + 255) & !255) * 8; device.write_buffer( &bin_alloc_buf_host, - &[n_paths as u32, 0, bin_alloc_start as u32], + &[n_paths as u32, bin_alloc_start as u32], )?; let bin_code = include_bytes!("../shader/binning.spv"); let bin_pipeline = device.create_simple_compute_pipeline(bin_code, 3, 0)?;