mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 20:51:29 +11:00
138 lines
6.1 KiB
Plaintext
138 lines
6.1 KiB
Plaintext
// This is "kernel 2" (strokes) in a 4-kernel pipeline. It processes the stroke
|
|
// (polyline) items in the scene and generates a list of segments for each, for
|
|
// each tile.
|
|
|
|
#version 450
|
|
#extension GL_GOOGLE_include_directive : enable
|
|
|
|
layout(local_size_x = 32) in;
|
|
|
|
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
|
uint[] scene;
|
|
};
|
|
|
|
layout(set = 0, binding = 1) buffer TilegroupBuf {
|
|
uint[] tilegroup;
|
|
};
|
|
|
|
layout(set = 0, binding = 2) buffer SegmentBuf {
|
|
uint[] segment;
|
|
};
|
|
|
|
layout(set = 0, binding = 3) buffer AllocBuf {
|
|
uint alloc;
|
|
};
|
|
|
|
#include "scene.h"
|
|
#include "tilegroup.h"
|
|
#include "segment.h"
|
|
|
|
#include "setup.h"
|
|
|
|
void main() {
|
|
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
|
|
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
|
|
+ (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
|
|
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
|
|
TileGroupRef stroke_start = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE + TILEGROUP_STROKE_START);
|
|
uint stroke_n = tilegroup[stroke_start.offset >> 2];
|
|
|
|
TileHeaderRef tile_header_ref = TileHeaderRef(tile_ix * TileHeader_size);
|
|
if (stroke_n > 0) {
|
|
ChunkRef chunk_ref = ChunkRef(stroke_start.offset + 4);
|
|
Chunk chunk = Chunk_read(chunk_ref);
|
|
InstanceRef stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
|
|
ItemHeaderRef item_header = ItemHeaderRef(atomicAdd(alloc, stroke_n * ItemHeader_size));
|
|
TileHeader_write(tile_header_ref, TileHeader(stroke_n, item_header));
|
|
SegChunkRef seg_chunk_ref = SegChunkRef(0);
|
|
uint seg_limit = 0;
|
|
// Iterate through items; stroke_n holds count remaining.
|
|
while (true) {
|
|
if (chunk.chunk_n == 0) {
|
|
chunk_ref = chunk.next;
|
|
if (chunk_ref.offset == 0) {
|
|
break;
|
|
}
|
|
chunk = Chunk_read(chunk_ref);
|
|
stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
|
|
}
|
|
Instance ins = Instance_read(stroke_ref);
|
|
PietStrokePolyLine poly = PietItem_Poly_read(PietItemRef(ins.item_ref));
|
|
|
|
// Process the stroke polyline item.
|
|
uint max_n_segs = poly.n_points - 1;
|
|
uint chunk_n_segs = 0;
|
|
SegChunkRef seg_chunk_ref;
|
|
vec2 start = Point_read(poly.points).xy;
|
|
for (uint j = 0; j < max_n_segs; j++) {
|
|
poly.points.offset += Point_size;
|
|
vec2 end = Point_read(poly.points).xy;
|
|
|
|
// Process one segment.
|
|
|
|
// This logic just tests for collision. What we probably want to do
|
|
// is a clipping algorithm like Liang-Barsky, and then store coords
|
|
// relative to the tile in f16. See also:
|
|
// https://tavianator.com/fast-branchless-raybounding-box-intersections/
|
|
|
|
// Also note that when we go to the fancy version, we want to compute
|
|
// the (horizontal projection of) the bounding box of the intersection
|
|
// once per tilegroup, so we can assign work to individual tiles.
|
|
|
|
float a = end.y - start.y;
|
|
float b = start.x - end.x;
|
|
float c = -(a * start.x + b * start.y);
|
|
float half_width = 0.5 * poly.width;
|
|
// Tile boundaries padded by half-width.
|
|
float xmin = xy0.x - half_width;
|
|
float ymin = xy0.y - half_width;
|
|
float xmax = xy0.x + float(TILE_WIDTH_PX) + half_width;
|
|
float ymax = xy0.y + float(TILE_HEIGHT_PX) + half_width;
|
|
float s00 = sign(b * ymin + a * xmin + c);
|
|
float s01 = sign(b * ymin + a * xmax + c);
|
|
float s10 = sign(b * ymax + a * xmin + c);
|
|
float s11 = sign(b * ymax + a * xmax + c);
|
|
// If bounding boxes intersect and not all four corners are on the same side, hit.
|
|
// Also note: this is designed to be false on NAN input.
|
|
if (max(min(start.x, end.x), xmin) < min(max(start.x, end.x), xmax)
|
|
&& max(min(start.y, end.y), ymin) < min(max(start.y, end.y), ymax)
|
|
&& s00 * s01 + s00 * s10 + s00 * s11 < 3.0)
|
|
{
|
|
// Allocate a chunk if needed.
|
|
if (chunk_n_segs == 0) {
|
|
if (seg_chunk_ref.offset + 40 > seg_limit) {
|
|
seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
|
seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - Segment_size;
|
|
}
|
|
ItemHeader_write(item_header, ItemHeader(seg_chunk_ref));
|
|
} else if (seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs > seg_limit) {
|
|
uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
|
seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - Segment_size;
|
|
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(new_chunk_ref)));
|
|
seg_chunk_ref.offset = new_chunk_ref;
|
|
chunk_n_segs = 0;
|
|
}
|
|
Segment seg = Segment(start, end);
|
|
Segment_write(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs), seg);
|
|
chunk_n_segs++;
|
|
}
|
|
|
|
start = end;
|
|
}
|
|
if (chunk_n_segs == 0) {
|
|
ItemHeader_write(item_header, ItemHeader(SegChunkRef(0)));
|
|
} else {
|
|
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(0)));
|
|
seg_chunk_ref.offset += SegChunk_size + Segment_size * chunk_n_segs;
|
|
}
|
|
|
|
stroke_ref.offset += Instance_size;
|
|
chunk.chunk_n--;
|
|
item_header.offset += ItemHeader_size;
|
|
}
|
|
} else {
|
|
// As an optimization, we could just write 0 for the size.
|
|
TileHeader_write(tile_header_ref, TileHeader(stroke_n, ItemHeaderRef(0)));
|
|
}
|
|
}
|