mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Merge pull request #32 from eliasnaur/master
Clean up, add maximum workgroup size control
This commit is contained in:
commit
02a6bfbb6c
|
@ -15,7 +15,7 @@
|
|||
|
||||
#include "setup.h"
|
||||
|
||||
#define LG_BACKDROP_WG 8
|
||||
#define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
|
||||
#define BACKDROP_WG (1 << LG_BACKDROP_WG)
|
||||
|
||||
layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
|
||||
|
|
|
@ -17,8 +17,6 @@ layout(set = 0, binding = 0) buffer AnnotatedBuf {
|
|||
|
||||
layout(set = 0, binding = 1) buffer AllocBuf {
|
||||
uint n_elements; // paths
|
||||
// Will be incremented atomically to claim tiles
|
||||
uint tile_ix;
|
||||
uint alloc;
|
||||
};
|
||||
|
||||
|
@ -42,8 +40,6 @@ shared uint bitmaps[N_SLICE][N_TILE];
|
|||
shared uint count[N_SLICE][N_TILE];
|
||||
shared uint sh_chunk_start[N_TILE];
|
||||
|
||||
shared float sh_right_edge[N_TILE];
|
||||
|
||||
void main() {
|
||||
uint my_n_elements = n_elements;
|
||||
uint my_partition = gl_WorkGroupID.x;
|
||||
|
|
Binary file not shown.
|
@ -41,7 +41,7 @@ layout(set = 0, binding = 4) buffer PtclBuf {
|
|||
#include "tile.h"
|
||||
#include "ptcl.h"
|
||||
|
||||
#define LG_N_PART_READ 8
|
||||
#define LG_N_PART_READ (7 + LG_WG_FACTOR)
|
||||
#define N_PART_READ (1 << LG_N_PART_READ)
|
||||
|
||||
shared uint sh_elements[N_TILE];
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -108,27 +108,9 @@ void main() {
|
|||
PathStrokeLine line;
|
||||
float dx;
|
||||
switch (tag) {
|
||||
/*
|
||||
case PathSeg_FillLine:
|
||||
case PathSeg_StrokeLine:
|
||||
line = PathSeg_StrokeLine_read(ref);
|
||||
xmin = min(line.p0.x, line.p1.x) - line.stroke.x;
|
||||
xmax = max(line.p0.x, line.p1.x) + line.stroke.x;
|
||||
ymin = min(line.p0.y, line.p1.y) - line.stroke.y;
|
||||
ymax = max(line.p0.y, line.p1.y) + line.stroke.y;
|
||||
dx = line.p1.x - line.p0.x;
|
||||
float dy = line.p1.y - line.p0.y;
|
||||
// Set up for per-scanline coverage formula, below.
|
||||
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
|
||||
c = (line.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + line.stroke.y)) * SX;
|
||||
b = invslope; // Note: assumes square tiles, otherwise scale.
|
||||
a = (line.p0.x - (line.p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
|
||||
break;
|
||||
*/
|
||||
case PathSeg_FillCubic:
|
||||
case PathSeg_StrokeCubic:
|
||||
PathStrokeCubic cubic = PathSeg_StrokeCubic_read(ref);
|
||||
// Commented out code is for computing error bound on conversion to quadratics
|
||||
vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
|
||||
float err = err_v.x * err_v.x + err_v.y * err_v.y;
|
||||
// The number of quadratics.
|
||||
|
|
|
@ -3,6 +3,11 @@
|
|||
// Much of this will be made dynamic in various ways, but for now it's easiest
|
||||
// to hardcode and keep all in one place.
|
||||
|
||||
// A LG_WG_FACTOR of n scales workgroup sizes by 2^n. Use 0 for a
|
||||
// maximum workgroup size of 128, or 1 for a maximum size of 256.
|
||||
#define LG_WG_FACTOR 1
|
||||
#define WG_FACTOR (1<<LG_WG_FACTOR)
|
||||
|
||||
// TODO: compute all these
|
||||
|
||||
#define WIDTH_IN_TILES 128
|
||||
|
@ -10,21 +15,14 @@
|
|||
#define TILE_WIDTH_PX 16
|
||||
#define TILE_HEIGHT_PX 16
|
||||
|
||||
// TODO: make the image size dynamic.
|
||||
#define IMAGE_WIDTH (WIDTH_IN_TILES*TILE_WIDTH_PX)
|
||||
#define IMAGE_HEIGHT (HEIGHT_IN_TILES*TILE_HEIGHT_PX)
|
||||
|
||||
#define PTCL_INITIAL_ALLOC 1024
|
||||
|
||||
// Stuff for new algorithm follows; some of the above should get
|
||||
// deleted.
|
||||
|
||||
// These should probably be renamed and/or reworked. In the binning
|
||||
// kernel, they represent the number of bins. Also, the workgroup size
|
||||
// of that kernel is equal to the number of bins, but should probably
|
||||
// be more flexible (it's 512 in the K&L paper).
|
||||
#define N_TILE_X 16
|
||||
#define N_TILE_Y 16
|
||||
#define N_TILE_Y (8 * WG_FACTOR)
|
||||
#define N_TILE (N_TILE_X * N_TILE_Y)
|
||||
#define LG_N_TILE 8
|
||||
#define LG_N_TILE (7 + LG_WG_FACTOR)
|
||||
#define N_SLICE (N_TILE / 32)
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
#include "setup.h"
|
||||
|
||||
#define LG_TILE_ALLOC_WG 8
|
||||
#define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
|
||||
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
|
||||
|
||||
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
|
||||
|
|
Binary file not shown.
|
@ -227,14 +227,14 @@ impl<D: Device> Renderer<D> {
|
|||
&[],
|
||||
)?;
|
||||
|
||||
let bin_alloc_buf_host = device.create_buffer(12, host)?;
|
||||
let bin_alloc_buf_dev = device.create_buffer(12, dev)?;
|
||||
let bin_alloc_buf_host = device.create_buffer(8, host)?;
|
||||
let bin_alloc_buf_dev = device.create_buffer(8, dev)?;
|
||||
|
||||
// TODO: constants
|
||||
let bin_alloc_start = ((n_paths + 255) & !255) * 8;
|
||||
device.write_buffer(
|
||||
&bin_alloc_buf_host,
|
||||
&[n_paths as u32, 0, bin_alloc_start as u32],
|
||||
&[n_paths as u32, bin_alloc_start as u32],
|
||||
)?;
|
||||
let bin_code = include_bytes!("../shader/binning.spv");
|
||||
let bin_pipeline = device.create_simple_compute_pipeline(bin_code, 3, 0)?;
|
||||
|
|
Loading…
Reference in a new issue