mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 20:51:29 +11:00
commit
fa38101e39
|
@ -33,8 +33,6 @@ layout(set = 0, binding = 2) buffer BinsBuf {
|
||||||
#define SX (1.0 / float(N_TILE_X * TILE_WIDTH_PX))
|
#define SX (1.0 / float(N_TILE_X * TILE_WIDTH_PX))
|
||||||
#define SY (1.0 / float(N_TILE_Y * TILE_HEIGHT_PX))
|
#define SY (1.0 / float(N_TILE_Y * TILE_HEIGHT_PX))
|
||||||
|
|
||||||
#define TSY (1.0 / float(TILE_HEIGHT_PX))
|
|
||||||
|
|
||||||
// Constant not available in GLSL. Also consider uintBitsToFloat(0x7f800000)
|
// Constant not available in GLSL. Also consider uintBitsToFloat(0x7f800000)
|
||||||
#define INFINITY (1.0 / 0.0)
|
#define INFINITY (1.0 / 0.0)
|
||||||
|
|
||||||
|
@ -47,7 +45,6 @@ shared uint sh_chunk_start[N_TILE];
|
||||||
shared float sh_right_edge[N_TILE];
|
shared float sh_right_edge[N_TILE];
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
uint chunk_n = 0;
|
|
||||||
uint my_n_elements = n_elements;
|
uint my_n_elements = n_elements;
|
||||||
uint my_partition = gl_WorkGroupID.x;
|
uint my_partition = gl_WorkGroupID.x;
|
||||||
|
|
||||||
|
@ -65,17 +62,7 @@ void main() {
|
||||||
}
|
}
|
||||||
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
|
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
|
||||||
float my_right_edge = INFINITY;
|
float my_right_edge = INFINITY;
|
||||||
// bool crosses_edge = false;
|
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
// case Annotated_FillLine:
|
|
||||||
// case Annotated_StrokeLine:
|
|
||||||
// AnnoStrokeLineSeg line = Annotated_StrokeLine_read(ref);
|
|
||||||
// x0 = int(floor((min(line.p0.x, line.p1.x) - line.stroke.x) * SX));
|
|
||||||
// y0 = int(floor((min(line.p0.y, line.p1.y) - line.stroke.y) * SY));
|
|
||||||
// x1 = int(ceil((max(line.p0.x, line.p1.x) + line.stroke.x) * SX));
|
|
||||||
// y1 = int(ceil((max(line.p0.y, line.p1.y) + line.stroke.y) * SY));
|
|
||||||
// crosses_edge = tag == Annotated_FillLine && ceil(line.p0.y * TSY) != ceil(line.p1.y * TSY);
|
|
||||||
// break;
|
|
||||||
case Annotated_Fill:
|
case Annotated_Fill:
|
||||||
case Annotated_Stroke:
|
case Annotated_Stroke:
|
||||||
// Note: we take advantage of the fact that fills and strokes
|
// Note: we take advantage of the fact that fills and strokes
|
||||||
|
|
|
@ -63,10 +63,6 @@ shared uint sh_tile_y0[N_TILE];
|
||||||
shared uint sh_tile_base[N_TILE];
|
shared uint sh_tile_base[N_TILE];
|
||||||
shared uint sh_tile_stride[N_TILE];
|
shared uint sh_tile_stride[N_TILE];
|
||||||
|
|
||||||
// scale factors useful for converting coordinates to tiles
|
|
||||||
#define SX (1.0 / float(TILE_WIDTH_PX))
|
|
||||||
#define SY (1.0 / float(TILE_HEIGHT_PX))
|
|
||||||
|
|
||||||
// Perhaps cmd_limit should be a global? This is a style question.
|
// Perhaps cmd_limit should be a global? This is a style question.
|
||||||
void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) {
|
void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) {
|
||||||
if (cmd_ref.offset > cmd_limit) {
|
if (cmd_ref.offset > cmd_limit) {
|
||||||
|
@ -84,8 +80,6 @@ void main() {
|
||||||
uint bin_ix = N_TILE_X * gl_WorkGroupID.y + gl_WorkGroupID.x;
|
uint bin_ix = N_TILE_X * gl_WorkGroupID.y + gl_WorkGroupID.x;
|
||||||
uint partition_ix = 0;
|
uint partition_ix = 0;
|
||||||
uint n_partitions = (n_elements + N_TILE - 1) / N_TILE;
|
uint n_partitions = (n_elements + N_TILE - 1) / N_TILE;
|
||||||
// Top left coordinates of this bin.
|
|
||||||
vec2 xy0 = vec2(N_TILE_X * TILE_WIDTH_PX * gl_WorkGroupID.x, N_TILE_Y * TILE_HEIGHT_PX * gl_WorkGroupID.y);
|
|
||||||
uint th_ix = gl_LocalInvocationID.x;
|
uint th_ix = gl_LocalInvocationID.x;
|
||||||
|
|
||||||
// Coordinates of top left of bin, in tiles.
|
// Coordinates of top left of bin, in tiles.
|
||||||
|
@ -105,7 +99,6 @@ void main() {
|
||||||
// Items between part_start_ix and ready_ix are ready to be transferred from sh_part_elements
|
// Items between part_start_ix and ready_ix are ready to be transferred from sh_part_elements
|
||||||
uint part_start_ix = 0;
|
uint part_start_ix = 0;
|
||||||
uint ready_ix = 0;
|
uint ready_ix = 0;
|
||||||
int backdrop = 0;
|
|
||||||
while (true) {
|
while (true) {
|
||||||
for (uint i = 0; i < N_SLICE; i++) {
|
for (uint i = 0; i < N_SLICE; i++) {
|
||||||
sh_bitmaps[i][th_ix] = 0;
|
sh_bitmaps[i][th_ix] = 0;
|
||||||
|
@ -243,99 +236,6 @@ void main() {
|
||||||
|
|
||||||
barrier();
|
barrier();
|
||||||
|
|
||||||
// We've computed coverage and other info for each element in the input, now for
|
|
||||||
// the output stage. We'll do segments first using a more parallel algorithm.
|
|
||||||
|
|
||||||
/*
|
|
||||||
uint seg_count = 0;
|
|
||||||
for (uint i = 0; i < N_SLICE; i++) {
|
|
||||||
seg_count += bitCount(sh_bitmaps[i][th_ix] & sh_is_segment[i]);
|
|
||||||
}
|
|
||||||
sh_seg_count[th_ix] = seg_count;
|
|
||||||
// Prefix sum of sh_seg_count
|
|
||||||
for (uint i = 0; i < LG_N_TILE; i++) {
|
|
||||||
barrier();
|
|
||||||
if (th_ix >= (1 << i)) {
|
|
||||||
seg_count += sh_seg_count[th_ix - (1 << i)];
|
|
||||||
}
|
|
||||||
barrier();
|
|
||||||
sh_seg_count[th_ix] = seg_count;
|
|
||||||
}
|
|
||||||
if (th_ix == N_TILE - 1) {
|
|
||||||
sh_seg_alloc = atomicAdd(alloc, seg_count * Segment_size);
|
|
||||||
}
|
|
||||||
barrier();
|
|
||||||
uint total_seg_count = sh_seg_count[N_TILE - 1];
|
|
||||||
uint seg_alloc = sh_seg_alloc;
|
|
||||||
|
|
||||||
// Output buffer is allocated as segments for each tile laid end-to-end.
|
|
||||||
|
|
||||||
for (uint ix = th_ix; ix < total_seg_count; ix += N_TILE) {
|
|
||||||
// Find the work item; this thread is now not bound to an element or tile.
|
|
||||||
// First find the tile (by binary search)
|
|
||||||
uint tile_ix = 0;
|
|
||||||
for (uint i = 0; i < LG_N_TILE; i++) {
|
|
||||||
uint probe = tile_ix + ((N_TILE / 2) >> i);
|
|
||||||
if (ix >= sh_seg_count[probe - 1]) {
|
|
||||||
tile_ix = probe;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Now, sh_seg_count[tile_ix - 1] <= ix < sh_seg_count[tile_ix].
|
|
||||||
// (considering sh_seg_count[-1] == 0)
|
|
||||||
|
|
||||||
// Index of segment within tile's segments
|
|
||||||
uint seq_ix = ix;
|
|
||||||
// Maybe consider a sentinel value to avoid the conditional?
|
|
||||||
if (tile_ix > 0) {
|
|
||||||
seq_ix -= sh_seg_count[tile_ix - 1];
|
|
||||||
}
|
|
||||||
// Find the segment. This is done by linear scan through the bitmaps of the
|
|
||||||
// tile, accelerated by bit counting. Binary search might help, maybe not.
|
|
||||||
uint slice_ix = 0;
|
|
||||||
uint seq_bits;
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
seq_bits = sh_bitmaps[slice_ix][tile_ix] & sh_is_segment[slice_ix];
|
|
||||||
uint this_count = bitCount(seq_bits);
|
|
||||||
if (this_count > seq_ix) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
seq_ix -= this_count;
|
|
||||||
slice_ix++;
|
|
||||||
}
|
|
||||||
// Now find position of nth bit set (n = seq_ix) in seq_bits; binary search
|
|
||||||
uint bit_ix = 0;
|
|
||||||
for (int i = 0; i < 5; i++) {
|
|
||||||
uint probe = bit_ix + (16 >> i);
|
|
||||||
if (seq_ix >= bitCount(seq_bits & ((1 << probe) - 1))) {
|
|
||||||
bit_ix = probe;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
uint out_offset = seg_alloc + Segment_size * ix + SegChunk_size;
|
|
||||||
uint rd_el_ix = slice_ix * 32 + bit_ix;
|
|
||||||
uint element_ix = sh_elements[rd_el_ix];
|
|
||||||
ref = AnnotatedRef(element_ix * Annotated_size);
|
|
||||||
AnnoFillLineSeg line = Annotated_FillLine_read(ref);
|
|
||||||
float y_edge = 0.0;
|
|
||||||
// This is basically the same logic as piet-metal, but should be made numerically robust.
|
|
||||||
if (Annotated_tag(ref) == Annotated_FillLine) {
|
|
||||||
vec2 tile_xy = xy0 + vec2((tile_ix % N_TILE_X) * TILE_WIDTH_PX, (tile_ix / N_TILE_X) * TILE_HEIGHT_PX);
|
|
||||||
y_edge = mix(line.p0.y, line.p1.y, (tile_xy.x - line.p0.x) / (line.p1.x - line.p0.x));
|
|
||||||
if (min(line.p0.x, line.p1.x) < tile_xy.x && y_edge >= tile_xy.y && y_edge < tile_xy.y + TILE_HEIGHT_PX) {
|
|
||||||
if (line.p0.x > line.p1.x) {
|
|
||||||
line.p1 = vec2(tile_xy.x, y_edge);
|
|
||||||
} else {
|
|
||||||
line.p0 = vec2(tile_xy.x, y_edge);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
y_edge = 1e9;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Segment seg = Segment(line.p0, line.p1, y_edge);
|
|
||||||
Segment_write(SegmentRef(seg_alloc + Segment_size * ix), seg);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Output non-segment elements for this tile. The thread does a sequential walk
|
// Output non-segment elements for this tile. The thread does a sequential walk
|
||||||
// through the non-segment elements, and for segments, count and backdrop are
|
// through the non-segment elements, and for segments, count and backdrop are
|
||||||
// aggregated using bit counting.
|
// aggregated using bit counting.
|
||||||
|
|
|
@ -6,7 +6,6 @@
|
||||||
|
|
||||||
#version 450
|
#version 450
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
#extension GL_KHR_shader_subgroup_basic : enable
|
|
||||||
|
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
|
@ -34,8 +33,6 @@ void main() {
|
||||||
|
|
||||||
uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
|
uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
|
||||||
vec2 xy = vec2(xy_uint);
|
vec2 xy = vec2(xy_uint);
|
||||||
vec2 uv = xy * vec2(1.0 / IMAGE_WIDTH, 1.0 / IMAGE_HEIGHT);
|
|
||||||
//vec3 rgb = uv.xyy;
|
|
||||||
vec3 rgb[CHUNK];
|
vec3 rgb[CHUNK];
|
||||||
for (uint i = 0; i < CHUNK; i++) {
|
for (uint i = 0; i < CHUNK; i++) {
|
||||||
rgb[i] = vec3(0.5);
|
rgb[i] = vec3(0.5);
|
||||||
|
|
|
@ -65,7 +65,6 @@ void main() {
|
||||||
Path path;
|
Path path;
|
||||||
path.bbox = uvec4(x0, y0, x1, y1);
|
path.bbox = uvec4(x0, y0, x1, y1);
|
||||||
uint tile_count = (x1 - x0) * (y1 - y0);
|
uint tile_count = (x1 - x0) * (y1 - y0);
|
||||||
uint n_tiles = tile_count;
|
|
||||||
|
|
||||||
sh_tile_count[th_ix] = tile_count;
|
sh_tile_count[th_ix] = tile_count;
|
||||||
// Prefix sum of sh_tile_count
|
// Prefix sum of sh_tile_count
|
||||||
|
|
Loading…
Reference in a new issue