mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Merge pull request #32 from eliasnaur/master
Clean up, add maximum workgroup size control
This commit is contained in:
commit
02a6bfbb6c
|
@ -15,7 +15,7 @@
|
||||||
|
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
#define LG_BACKDROP_WG 8
|
#define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
|
||||||
#define BACKDROP_WG (1 << LG_BACKDROP_WG)
|
#define BACKDROP_WG (1 << LG_BACKDROP_WG)
|
||||||
|
|
||||||
layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
|
layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
|
||||||
|
|
|
@ -17,8 +17,6 @@ layout(set = 0, binding = 0) buffer AnnotatedBuf {
|
||||||
|
|
||||||
layout(set = 0, binding = 1) buffer AllocBuf {
|
layout(set = 0, binding = 1) buffer AllocBuf {
|
||||||
uint n_elements; // paths
|
uint n_elements; // paths
|
||||||
// Will be incremented atomically to claim tiles
|
|
||||||
uint tile_ix;
|
|
||||||
uint alloc;
|
uint alloc;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -42,8 +40,6 @@ shared uint bitmaps[N_SLICE][N_TILE];
|
||||||
shared uint count[N_SLICE][N_TILE];
|
shared uint count[N_SLICE][N_TILE];
|
||||||
shared uint sh_chunk_start[N_TILE];
|
shared uint sh_chunk_start[N_TILE];
|
||||||
|
|
||||||
shared float sh_right_edge[N_TILE];
|
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
uint my_n_elements = n_elements;
|
uint my_n_elements = n_elements;
|
||||||
uint my_partition = gl_WorkGroupID.x;
|
uint my_partition = gl_WorkGroupID.x;
|
||||||
|
|
Binary file not shown.
|
@ -41,7 +41,7 @@ layout(set = 0, binding = 4) buffer PtclBuf {
|
||||||
#include "tile.h"
|
#include "tile.h"
|
||||||
#include "ptcl.h"
|
#include "ptcl.h"
|
||||||
|
|
||||||
#define LG_N_PART_READ 8
|
#define LG_N_PART_READ (7 + LG_WG_FACTOR)
|
||||||
#define N_PART_READ (1 << LG_N_PART_READ)
|
#define N_PART_READ (1 << LG_N_PART_READ)
|
||||||
|
|
||||||
shared uint sh_elements[N_TILE];
|
shared uint sh_elements[N_TILE];
|
||||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -108,27 +108,9 @@ void main() {
|
||||||
PathStrokeLine line;
|
PathStrokeLine line;
|
||||||
float dx;
|
float dx;
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
/*
|
|
||||||
case PathSeg_FillLine:
|
|
||||||
case PathSeg_StrokeLine:
|
|
||||||
line = PathSeg_StrokeLine_read(ref);
|
|
||||||
xmin = min(line.p0.x, line.p1.x) - line.stroke.x;
|
|
||||||
xmax = max(line.p0.x, line.p1.x) + line.stroke.x;
|
|
||||||
ymin = min(line.p0.y, line.p1.y) - line.stroke.y;
|
|
||||||
ymax = max(line.p0.y, line.p1.y) + line.stroke.y;
|
|
||||||
dx = line.p1.x - line.p0.x;
|
|
||||||
float dy = line.p1.y - line.p0.y;
|
|
||||||
// Set up for per-scanline coverage formula, below.
|
|
||||||
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
|
|
||||||
c = (line.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + line.stroke.y)) * SX;
|
|
||||||
b = invslope; // Note: assumes square tiles, otherwise scale.
|
|
||||||
a = (line.p0.x - (line.p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
|
|
||||||
break;
|
|
||||||
*/
|
|
||||||
case PathSeg_FillCubic:
|
case PathSeg_FillCubic:
|
||||||
case PathSeg_StrokeCubic:
|
case PathSeg_StrokeCubic:
|
||||||
PathStrokeCubic cubic = PathSeg_StrokeCubic_read(ref);
|
PathStrokeCubic cubic = PathSeg_StrokeCubic_read(ref);
|
||||||
// Commented out code is for computing error bound on conversion to quadratics
|
|
||||||
vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
|
vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
|
||||||
float err = err_v.x * err_v.x + err_v.y * err_v.y;
|
float err = err_v.x * err_v.x + err_v.y * err_v.y;
|
||||||
// The number of quadratics.
|
// The number of quadratics.
|
||||||
|
|
|
@ -3,6 +3,11 @@
|
||||||
// Much of this will be made dynamic in various ways, but for now it's easiest
|
// Much of this will be made dynamic in various ways, but for now it's easiest
|
||||||
// to hardcode and keep all in one place.
|
// to hardcode and keep all in one place.
|
||||||
|
|
||||||
|
// A LG_WG_FACTOR of n scales workgroup sizes by 2^n. Use 0 for a
|
||||||
|
// maximum workgroup size of 128, or 1 for a maximum size of 256.
|
||||||
|
#define LG_WG_FACTOR 1
|
||||||
|
#define WG_FACTOR (1<<LG_WG_FACTOR)
|
||||||
|
|
||||||
// TODO: compute all these
|
// TODO: compute all these
|
||||||
|
|
||||||
#define WIDTH_IN_TILES 128
|
#define WIDTH_IN_TILES 128
|
||||||
|
@ -10,21 +15,14 @@
|
||||||
#define TILE_WIDTH_PX 16
|
#define TILE_WIDTH_PX 16
|
||||||
#define TILE_HEIGHT_PX 16
|
#define TILE_HEIGHT_PX 16
|
||||||
|
|
||||||
// TODO: make the image size dynamic.
|
|
||||||
#define IMAGE_WIDTH (WIDTH_IN_TILES*TILE_WIDTH_PX)
|
|
||||||
#define IMAGE_HEIGHT (HEIGHT_IN_TILES*TILE_HEIGHT_PX)
|
|
||||||
|
|
||||||
#define PTCL_INITIAL_ALLOC 1024
|
#define PTCL_INITIAL_ALLOC 1024
|
||||||
|
|
||||||
// Stuff for new algorithm follows; some of the above should get
|
|
||||||
// deleted.
|
|
||||||
|
|
||||||
// These should probably be renamed and/or reworked. In the binning
|
// These should probably be renamed and/or reworked. In the binning
|
||||||
// kernel, they represent the number of bins. Also, the workgroup size
|
// kernel, they represent the number of bins. Also, the workgroup size
|
||||||
// of that kernel is equal to the number of bins, but should probably
|
// of that kernel is equal to the number of bins, but should probably
|
||||||
// be more flexible (it's 512 in the K&L paper).
|
// be more flexible (it's 512 in the K&L paper).
|
||||||
#define N_TILE_X 16
|
#define N_TILE_X 16
|
||||||
#define N_TILE_Y 16
|
#define N_TILE_Y (8 * WG_FACTOR)
|
||||||
#define N_TILE (N_TILE_X * N_TILE_Y)
|
#define N_TILE (N_TILE_X * N_TILE_Y)
|
||||||
#define LG_N_TILE 8
|
#define LG_N_TILE (7 + LG_WG_FACTOR)
|
||||||
#define N_SLICE (N_TILE / 32)
|
#define N_SLICE (N_TILE / 32)
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
|
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
#define LG_TILE_ALLOC_WG 8
|
#define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
|
||||||
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
|
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
|
||||||
|
|
||||||
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
|
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
|
||||||
|
|
Binary file not shown.
|
@ -227,14 +227,14 @@ impl<D: Device> Renderer<D> {
|
||||||
&[],
|
&[],
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let bin_alloc_buf_host = device.create_buffer(12, host)?;
|
let bin_alloc_buf_host = device.create_buffer(8, host)?;
|
||||||
let bin_alloc_buf_dev = device.create_buffer(12, dev)?;
|
let bin_alloc_buf_dev = device.create_buffer(8, dev)?;
|
||||||
|
|
||||||
// TODO: constants
|
// TODO: constants
|
||||||
let bin_alloc_start = ((n_paths + 255) & !255) * 8;
|
let bin_alloc_start = ((n_paths + 255) & !255) * 8;
|
||||||
device.write_buffer(
|
device.write_buffer(
|
||||||
&bin_alloc_buf_host,
|
&bin_alloc_buf_host,
|
||||||
&[n_paths as u32, 0, bin_alloc_start as u32],
|
&[n_paths as u32, bin_alloc_start as u32],
|
||||||
)?;
|
)?;
|
||||||
let bin_code = include_bytes!("../shader/binning.spv");
|
let bin_code = include_bytes!("../shader/binning.spv");
|
||||||
let bin_pipeline = device.create_simple_compute_pipeline(bin_code, 3, 0)?;
|
let bin_pipeline = device.create_simple_compute_pipeline(bin_code, 3, 0)?;
|
||||||
|
|
Loading…
Reference in a new issue