mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Faster coarse raster
Store a lot more tile context in shared memory and do the work from that.
This commit is contained in:
parent
e1aa9b2f5d
commit
877da4a98e
|
@ -51,6 +51,10 @@ shared uint sh_tile_width[N_TILE];
|
||||||
shared uint sh_tile_x0[N_TILE];
|
shared uint sh_tile_x0[N_TILE];
|
||||||
shared uint sh_tile_y0[N_TILE];
|
shared uint sh_tile_y0[N_TILE];
|
||||||
|
|
||||||
|
// These are set up so base + tile_y * stride + tile_x points to a Tile.
|
||||||
|
shared uint sh_tile_base[N_TILE];
|
||||||
|
shared uint sh_tile_stride[N_TILE];
|
||||||
|
|
||||||
// scale factors useful for converting coordinates to tiles
|
// scale factors useful for converting coordinates to tiles
|
||||||
#define SX (1.0 / float(TILE_WIDTH_PX))
|
#define SX (1.0 / float(TILE_WIDTH_PX))
|
||||||
#define SY (1.0 / float(TILE_HEIGHT_PX))
|
#define SY (1.0 / float(TILE_HEIGHT_PX))
|
||||||
|
@ -76,9 +80,12 @@ void main() {
|
||||||
vec2 xy0 = vec2(N_TILE_X * TILE_WIDTH_PX * gl_WorkGroupID.x, N_TILE_Y * TILE_HEIGHT_PX * gl_WorkGroupID.y);
|
vec2 xy0 = vec2(N_TILE_X * TILE_WIDTH_PX * gl_WorkGroupID.x, N_TILE_Y * TILE_HEIGHT_PX * gl_WorkGroupID.y);
|
||||||
uint th_ix = gl_LocalInvocationID.x;
|
uint th_ix = gl_LocalInvocationID.x;
|
||||||
|
|
||||||
uint tile_x = N_TILE_X * gl_WorkGroupID.x + gl_LocalInvocationID.x % N_TILE_X;
|
// Coordinates of top left of bin, in tiles.
|
||||||
uint tile_y = N_TILE_Y * gl_WorkGroupID.y + gl_LocalInvocationID.x / N_TILE_X;
|
uint bin_tile_x = N_TILE_X * gl_WorkGroupID.x;
|
||||||
uint this_tile_ix = tile_y * WIDTH_IN_TILES + tile_x;
|
uint bin_tile_y = N_TILE_Y * gl_WorkGroupID.y;
|
||||||
|
uint tile_x = gl_LocalInvocationID.x % N_TILE_X;
|
||||||
|
uint tile_y = gl_LocalInvocationID.x / N_TILE_X;
|
||||||
|
uint this_tile_ix = (bin_tile_y + tile_y) * WIDTH_IN_TILES + bin_tile_x + tile_x;
|
||||||
CmdRef cmd_ref = CmdRef(this_tile_ix * PTCL_INITIAL_ALLOC);
|
CmdRef cmd_ref = CmdRef(this_tile_ix * PTCL_INITIAL_ALLOC);
|
||||||
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
||||||
|
|
||||||
|
@ -151,53 +158,48 @@ void main() {
|
||||||
|
|
||||||
// Read one element, compute coverage.
|
// Read one element, compute coverage.
|
||||||
uint tag = Annotated_Nop;
|
uint tag = Annotated_Nop;
|
||||||
|
uint element_ix;
|
||||||
AnnotatedRef ref;
|
AnnotatedRef ref;
|
||||||
float right_edge = 0.0;
|
float right_edge = 0.0;
|
||||||
if (th_ix + rd_ix < wr_ix) {
|
if (th_ix + rd_ix < wr_ix) {
|
||||||
uint element_ix = sh_elements[th_ix];
|
element_ix = sh_elements[th_ix];
|
||||||
right_edge = sh_right_edge[th_ix];
|
right_edge = sh_right_edge[th_ix];
|
||||||
ref = AnnotatedRef(element_ix * Annotated_size);
|
ref = AnnotatedRef(element_ix * Annotated_size);
|
||||||
tag = Annotated_tag(ref);
|
tag = Annotated_tag(ref);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Bounding box of element in pixel coordinates.
|
// Bounding box of element in pixel coordinates.
|
||||||
float xmin, xmax, ymin, ymax;
|
uint tile_count;
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
case Annotated_Fill:
|
case Annotated_Fill:
|
||||||
case Annotated_Stroke:
|
case Annotated_Stroke:
|
||||||
// Note: we take advantage of the fact that fills and strokes
|
// Because the only elements we're processing right now are
|
||||||
// have compatible layout.
|
// paths, we can just use the element index as the path index.
|
||||||
AnnoFill fill = Annotated_Fill_read(ref);
|
// In future, when we're doing a bunch of stuff, the path index
|
||||||
xmin = fill.bbox.x;
|
// should probably be stored in the annotated element.
|
||||||
xmax = fill.bbox.z;
|
uint path_ix = element_ix;
|
||||||
ymin = fill.bbox.y;
|
Path path = Path_read(PathRef(path_ix * Path_size));
|
||||||
ymax = fill.bbox.w;
|
uint stride = path.bbox.z - path.bbox.x;
|
||||||
|
sh_tile_stride[th_ix] = stride;
|
||||||
|
int dx = int(path.bbox.x) - int(bin_tile_x);
|
||||||
|
int dy = int(path.bbox.y) - int(bin_tile_y);
|
||||||
|
int x0 = clamp(dx, 0, N_TILE_X);
|
||||||
|
int y0 = clamp(dy, 0, N_TILE_Y);
|
||||||
|
int x1 = clamp(int(path.bbox.z) - int(bin_tile_x), 0, N_TILE_X);
|
||||||
|
int y1 = clamp(int(path.bbox.w) - int(bin_tile_y), 0, N_TILE_Y);
|
||||||
|
sh_tile_width[th_ix] = uint(x1 - x0);
|
||||||
|
sh_tile_x0[th_ix] = x0;
|
||||||
|
sh_tile_y0[th_ix] = y0;
|
||||||
|
tile_count = uint(x1 - x0) * uint(y1 - y0);
|
||||||
|
// base relative to bin
|
||||||
|
uint base = path.tiles.offset - uint(dy * stride + dx) * Tile_size;
|
||||||
|
sh_tile_base[th_ix] = base;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ymin = 0;
|
tile_count = 0;
|
||||||
ymax = 0;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Draw the coverage area into the bitmasks. This uses an algorithm
|
|
||||||
// that computes the coverage of a span for given scanline.
|
|
||||||
|
|
||||||
// Compute bounding box in tiles and clip to this bin.
|
|
||||||
int x0 = int(floor((xmin - xy0.x) * SX));
|
|
||||||
int x1 = int(ceil((xmax - xy0.x) * SX));
|
|
||||||
int y0 = int(floor((ymin - xy0.y) * SY));
|
|
||||||
int y1 = int(ceil((ymax - xy0.y) * SY));
|
|
||||||
|
|
||||||
x0 = clamp(x0, 0, N_TILE_X);
|
|
||||||
x1 = clamp(x1, x0, N_TILE_X);
|
|
||||||
y0 = clamp(y0, 0, N_TILE_Y);
|
|
||||||
y1 = clamp(y1, y0, N_TILE_Y);
|
|
||||||
|
|
||||||
uint tile_count = uint((x1 - x0) * (y1 - y0));
|
|
||||||
sh_tile_width[th_ix] = uint(x1 - x0);
|
|
||||||
sh_tile_x0[th_ix] = uint(x0);
|
|
||||||
sh_tile_y0[th_ix] = uint(y0);
|
|
||||||
|
|
||||||
// Prefix sum of sh_tile_count
|
// Prefix sum of sh_tile_count
|
||||||
sh_tile_count[th_ix] = tile_count;
|
sh_tile_count[th_ix] = tile_count;
|
||||||
for (uint i = 0; i < LG_N_TILE; i++) {
|
for (uint i = 0; i < LG_N_TILE; i++) {
|
||||||
|
@ -223,19 +225,11 @@ void main() {
|
||||||
uint width = sh_tile_width[el_ix];
|
uint width = sh_tile_width[el_ix];
|
||||||
uint x = sh_tile_x0[el_ix] + seq_ix % width;
|
uint x = sh_tile_x0[el_ix] + seq_ix % width;
|
||||||
uint y = sh_tile_y0[el_ix] + seq_ix / width;
|
uint y = sh_tile_y0[el_ix] + seq_ix / width;
|
||||||
uint tile_x = x + gl_WorkGroupID.x * N_TILE_X;
|
Tile tile = Tile_read(TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
|
||||||
uint tile_y = y + gl_WorkGroupID.y * N_TILE_Y;
|
if (tile.tile.offset != 0) {
|
||||||
uint element_ix = sh_elements[el_ix];
|
uint el_slice = el_ix / 32;
|
||||||
Path path = Path_read(PathRef(element_ix * Path_size));
|
uint el_mask = 1 << (el_ix & 31);
|
||||||
if (tile_x >= path.bbox.x && tile_x < path.bbox.z && tile_y >= path.bbox.y && tile_y < path.bbox.w) {
|
atomicOr(sh_bitmaps[el_slice][y * N_TILE_X + x], el_mask);
|
||||||
uint stride = path.bbox.z - path.bbox.x;
|
|
||||||
uint tile_subix = (tile_y - path.bbox.y) * stride + tile_x - path.bbox.x;
|
|
||||||
Tile tile = Tile_read(Tile_index(path.tiles, tile_subix));
|
|
||||||
if (tile.tile.offset != 0) {
|
|
||||||
uint el_slice = el_ix / 32;
|
|
||||||
uint el_mask = 1 << (el_ix & 31);
|
|
||||||
atomicOr(sh_bitmaps[el_slice][y * N_TILE_X + x], el_mask);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -407,25 +401,16 @@ void main() {
|
||||||
break;
|
break;
|
||||||
*/
|
*/
|
||||||
case Annotated_Stroke:
|
case Annotated_Stroke:
|
||||||
// Because the only elements we're processing right now are
|
Tile tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
|
||||||
// paths, we can just use the element index as the path index.
|
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||||
// In future, when we're doing a bunch of stuff, the path index
|
AnnoStroke stroke = Annotated_Stroke_read(ref);
|
||||||
// should probably be stored in the annotated element.
|
CmdStroke cmd_stroke;
|
||||||
uint path_ix = element_ix;
|
cmd_stroke.tile_ref = tile.tile.offset;
|
||||||
Path path = Path_read(PathRef(path_ix * Path_size));
|
cmd_stroke.half_width = 0.5 * stroke.linewidth;
|
||||||
uint stride = path.bbox.z - path.bbox.x;
|
cmd_stroke.rgba_color = stroke.rgba_color;
|
||||||
uint tile_subix = (tile_y - path.bbox.y) * stride + tile_x - path.bbox.x;
|
alloc_cmd(cmd_ref, cmd_limit);
|
||||||
Tile tile = Tile_read(Tile_index(path.tiles, tile_subix));
|
Cmd_Stroke_write(cmd_ref, cmd_stroke);
|
||||||
if (tile.tile.offset != 0) {
|
cmd_ref.offset += Cmd_size;
|
||||||
AnnoStroke stroke = Annotated_Stroke_read(ref);
|
|
||||||
CmdStroke cmd_stroke;
|
|
||||||
cmd_stroke.tile_ref = tile.tile.offset;
|
|
||||||
cmd_stroke.half_width = 0.5 * stroke.linewidth;
|
|
||||||
cmd_stroke.rgba_color = stroke.rgba_color;
|
|
||||||
alloc_cmd(cmd_ref, cmd_limit);
|
|
||||||
Cmd_Stroke_write(cmd_ref, cmd_stroke);
|
|
||||||
cmd_ref.offset += Cmd_size;
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Loading…
Reference in a new issue