mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-09 20:31:29 +11:00
Use linked list strategy for segments
Trying to allocate them contiguously wasn't good.
This commit is contained in:
parent
cb06b1bc3d
commit
b23fe25177
|
@ -13,8 +13,7 @@ piet_gpu! {
|
|||
end: [f32; 2],
|
||||
}
|
||||
struct CmdStroke {
|
||||
n_segs: u32,
|
||||
// Should be Ref<Segment> if we had cross-module references.
|
||||
// Should be Ref<SegChunk> if we had cross-module references.
|
||||
seg_ref: u32,
|
||||
half_width: f32,
|
||||
rgba_color: u32,
|
||||
|
|
|
@ -12,8 +12,7 @@ piet_gpu! {
|
|||
|
||||
// Note: this is only suitable for strokes, fills require backdrop.
|
||||
struct ItemHeader {
|
||||
n: u32,
|
||||
segments: Ref<Segment>,
|
||||
segments: Ref<SegChunk>,
|
||||
}
|
||||
|
||||
// TODO: strongly consider using f16. If so, these would be
|
||||
|
@ -23,5 +22,11 @@ piet_gpu! {
|
|||
start: [f32; 2],
|
||||
end: [f32; 2],
|
||||
}
|
||||
|
||||
struct SegChunk {
|
||||
n: u32,
|
||||
next: Ref<SegChunk>,
|
||||
// Segments follow (could represent this as a variable sized array).
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -13,6 +13,6 @@ build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h setup.h
|
|||
|
||||
build kernel2s.spv: glsl kernel2s.comp | scene.h tilegroup.h segment.h setup.h
|
||||
|
||||
build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h ptcl.h setup.h
|
||||
build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h segment.h ptcl.h setup.h
|
||||
|
||||
build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h
|
||||
build kernel4.spv: glsl kernel4.comp | ptcl.h segment.h setup.h
|
||||
|
|
|
@ -44,12 +44,15 @@ void main() {
|
|||
InstanceRef stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
|
||||
ItemHeaderRef item_header = ItemHeaderRef(atomicAdd(alloc, stroke_n * ItemHeader_size));
|
||||
TileHeader_write(tile_header_ref, TileHeader(stroke_n, item_header));
|
||||
SegmentRef seg_ref = SegmentRef(0);
|
||||
SegChunkRef seg_chunk_ref = SegChunkRef(0);
|
||||
uint seg_limit = 0;
|
||||
// Iterate through items; stroke_n holds count remaining.
|
||||
while (true) {
|
||||
if (chunk.chunk_n == 0) {
|
||||
chunk_ref = chunk.next;
|
||||
if (chunk_ref.offset == 0) {
|
||||
break;
|
||||
}
|
||||
chunk = Chunk_read(chunk_ref);
|
||||
stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
|
||||
}
|
||||
|
@ -58,16 +61,8 @@ void main() {
|
|||
|
||||
// Process the stroke polyline item.
|
||||
uint max_n_segs = poly.n_points - 1;
|
||||
uint reserve = max_n_segs * Segment_size;
|
||||
if (seg_ref.offset + reserve > seg_limit) {
|
||||
// This is a heuristic to balance atomic bandwidth and utilization.
|
||||
// The output always gets a contiguous allocation. We might use
|
||||
// all, some, or none of the capacity.
|
||||
uint capacity_bytes = stroke_n > 1 ? reserve * 2 + 128 : reserve;
|
||||
seg_ref.offset = atomicAdd(alloc, capacity_bytes);
|
||||
seg_limit = seg_ref.offset + capacity_bytes;
|
||||
}
|
||||
uint n_segs = 0;
|
||||
uint chunk_n_segs = 0;
|
||||
SegChunkRef seg_chunk_ref;
|
||||
vec2 start = Point_read(poly.points).xy;
|
||||
for (uint j = 0; j < max_n_segs; j++) {
|
||||
poly.points.offset += Point_size;
|
||||
|
@ -103,18 +98,31 @@ void main() {
|
|||
&& max(min(start.y, end.y), ymin) < min(max(start.y, end.y), ymax)
|
||||
&& s00 * s01 + s00 * s10 + s00 * s11 < 3.0)
|
||||
{
|
||||
// Allocate a chunk if needed.
|
||||
if (chunk_n_segs == 0) {
|
||||
if (seg_chunk_ref.offset + 40 > seg_limit) {
|
||||
seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
||||
seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - Segment_size;
|
||||
}
|
||||
ItemHeader_write(item_header, ItemHeader(seg_chunk_ref));
|
||||
} else if (seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs > seg_limit) {
|
||||
uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
||||
seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - Segment_size;
|
||||
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(new_chunk_ref)));
|
||||
seg_chunk_ref.offset = new_chunk_ref;
|
||||
chunk_n_segs = 0;
|
||||
}
|
||||
Segment seg = Segment(start, end);
|
||||
Segment_write(Segment_index(seg_ref, n_segs), seg);
|
||||
n_segs++;
|
||||
Segment_write(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs), seg);
|
||||
chunk_n_segs++;
|
||||
}
|
||||
|
||||
start = end;
|
||||
}
|
||||
ItemHeader_write(item_header, ItemHeader(n_segs, seg_ref));
|
||||
if (--stroke_n == 0) {
|
||||
break;
|
||||
if (chunk_n_segs > 0) {
|
||||
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(0)));
|
||||
seg_chunk_ref.offset += SegChunk_size + Segment_size * chunk_n_segs;
|
||||
}
|
||||
seg_ref.offset += n_segs * Segment_size;
|
||||
|
||||
stroke_ref.offset += Instance_size;
|
||||
chunk.chunk_n--;
|
||||
|
|
Binary file not shown.
|
@ -87,10 +87,9 @@ void main() {
|
|||
case PietItem_Poly:
|
||||
ItemHeader stroke_item = ItemHeader_read(stroke_th.items);
|
||||
stroke_th.items.offset += ItemHeader_size;
|
||||
if (stroke_item.n > 0) {
|
||||
if (stroke_item.segments.offset != 0) {
|
||||
PietStrokePolyLine poly = PietItem_Poly_read(item_ref);
|
||||
CmdStroke cmd = CmdStroke(
|
||||
stroke_item.n,
|
||||
stroke_item.segments.offset,
|
||||
0.5 * poly.width,
|
||||
poly.rgba_color
|
||||
|
|
Binary file not shown.
|
@ -54,13 +54,18 @@ void main() {
|
|||
case Cmd_Stroke:
|
||||
CmdStroke stroke = Cmd_Stroke_read(cmd_ref);
|
||||
float df = 1e9;
|
||||
for (int i = 0; i < stroke.n_segs; i++) {
|
||||
Segment seg = Segment_read(Segment_index(SegmentRef(stroke.seg_ref), i));
|
||||
vec2 line_vec = seg.end - seg.start;
|
||||
vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
|
||||
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
|
||||
df = min(df, length(line_vec * t - dpos));
|
||||
}
|
||||
SegChunkRef seg_chunk_ref = SegChunkRef(stroke.seg_ref);
|
||||
do {
|
||||
SegChunk seg_chunk = SegChunk_read(seg_chunk_ref);
|
||||
for (int i = 0; i < seg_chunk.n; i++) {
|
||||
Segment seg = Segment_read(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * i));
|
||||
vec2 line_vec = seg.end - seg.start;
|
||||
vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
|
||||
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
|
||||
df = min(df, length(line_vec * t - dpos));
|
||||
}
|
||||
seg_chunk_ref = seg_chunk.next;
|
||||
} while (seg_chunk_ref.offset != 0);
|
||||
fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx;
|
||||
alpha = clamp(stroke.half_width + 0.5 - df, 0.0, 1.0);
|
||||
rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a);
|
||||
|
|
Binary file not shown.
|
@ -60,13 +60,12 @@ CmdLineRef CmdLine_index(CmdLineRef ref, uint index) {
|
|||
}
|
||||
|
||||
struct CmdStroke {
|
||||
uint n_segs;
|
||||
uint seg_ref;
|
||||
float half_width;
|
||||
uint rgba_color;
|
||||
};
|
||||
|
||||
#define CmdStroke_size 16
|
||||
#define CmdStroke_size 12
|
||||
|
||||
CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
|
||||
return CmdStrokeRef(ref.offset + index * CmdStroke_size);
|
||||
|
@ -187,21 +186,18 @@ CmdStroke CmdStroke_read(CmdStrokeRef ref) {
|
|||
uint raw0 = ptcl[ix + 0];
|
||||
uint raw1 = ptcl[ix + 1];
|
||||
uint raw2 = ptcl[ix + 2];
|
||||
uint raw3 = ptcl[ix + 3];
|
||||
CmdStroke s;
|
||||
s.n_segs = raw0;
|
||||
s.seg_ref = raw1;
|
||||
s.half_width = uintBitsToFloat(raw2);
|
||||
s.rgba_color = raw3;
|
||||
s.seg_ref = raw0;
|
||||
s.half_width = uintBitsToFloat(raw1);
|
||||
s.rgba_color = raw2;
|
||||
return s;
|
||||
}
|
||||
|
||||
void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = s.n_segs;
|
||||
ptcl[ix + 1] = s.seg_ref;
|
||||
ptcl[ix + 2] = floatBitsToUint(s.half_width);
|
||||
ptcl[ix + 3] = s.rgba_color;
|
||||
ptcl[ix + 0] = s.seg_ref;
|
||||
ptcl[ix + 1] = floatBitsToUint(s.half_width);
|
||||
ptcl[ix + 2] = s.rgba_color;
|
||||
}
|
||||
|
||||
CmdFill CmdFill_read(CmdFillRef ref) {
|
||||
|
|
|
@ -12,6 +12,10 @@ struct SegmentRef {
|
|||
uint offset;
|
||||
};
|
||||
|
||||
struct SegChunkRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct TileHeader {
|
||||
uint n;
|
||||
ItemHeaderRef items;
|
||||
|
@ -24,11 +28,10 @@ TileHeaderRef TileHeader_index(TileHeaderRef ref, uint index) {
|
|||
}
|
||||
|
||||
struct ItemHeader {
|
||||
uint n;
|
||||
SegmentRef segments;
|
||||
SegChunkRef segments;
|
||||
};
|
||||
|
||||
#define ItemHeader_size 8
|
||||
#define ItemHeader_size 4
|
||||
|
||||
ItemHeaderRef ItemHeader_index(ItemHeaderRef ref, uint index) {
|
||||
return ItemHeaderRef(ref.offset + index * ItemHeader_size);
|
||||
|
@ -45,6 +48,17 @@ SegmentRef Segment_index(SegmentRef ref, uint index) {
|
|||
return SegmentRef(ref.offset + index * Segment_size);
|
||||
}
|
||||
|
||||
struct SegChunk {
|
||||
uint n;
|
||||
SegChunkRef next;
|
||||
};
|
||||
|
||||
#define SegChunk_size 8
|
||||
|
||||
SegChunkRef SegChunk_index(SegChunkRef ref, uint index) {
|
||||
return SegChunkRef(ref.offset + index * SegChunk_size);
|
||||
}
|
||||
|
||||
TileHeader TileHeader_read(TileHeaderRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = segment[ix + 0];
|
||||
|
@ -64,17 +78,14 @@ void TileHeader_write(TileHeaderRef ref, TileHeader s) {
|
|||
ItemHeader ItemHeader_read(ItemHeaderRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = segment[ix + 0];
|
||||
uint raw1 = segment[ix + 1];
|
||||
ItemHeader s;
|
||||
s.n = raw0;
|
||||
s.segments = SegmentRef(raw1);
|
||||
s.segments = SegChunkRef(raw0);
|
||||
return s;
|
||||
}
|
||||
|
||||
void ItemHeader_write(ItemHeaderRef ref, ItemHeader s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
segment[ix + 0] = s.n;
|
||||
segment[ix + 1] = s.segments.offset;
|
||||
segment[ix + 0] = s.segments.offset;
|
||||
}
|
||||
|
||||
Segment Segment_read(SegmentRef ref) {
|
||||
|
@ -97,3 +108,19 @@ void Segment_write(SegmentRef ref, Segment s) {
|
|||
segment[ix + 3] = floatBitsToUint(s.end.y);
|
||||
}
|
||||
|
||||
SegChunk SegChunk_read(SegChunkRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = segment[ix + 0];
|
||||
uint raw1 = segment[ix + 1];
|
||||
SegChunk s;
|
||||
s.n = raw0;
|
||||
s.next = SegChunkRef(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void SegChunk_write(SegChunkRef ref, SegChunk s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
segment[ix + 0] = s.n;
|
||||
segment[ix + 1] = s.next.offset;
|
||||
}
|
||||
|
||||
|
|
|
@ -32,3 +32,7 @@
|
|||
#define TILE_HEIGHT_PX 16
|
||||
|
||||
#define PTCL_INITIAL_ALLOC 1024
|
||||
|
||||
// Maximum number of segments in a SegChunk
|
||||
#define SEG_CHUNK_N 32
|
||||
#define SEG_CHUNK_ALLOC 512
|
|
@ -53,7 +53,7 @@ fn render_scene(rc: &mut impl RenderContext) {
|
|||
}
|
||||
|
||||
fn render_cardioid(rc: &mut impl RenderContext) {
|
||||
let n = 100;
|
||||
let n = 91;
|
||||
let dth = std::f64::consts::PI * 2.0 / (n as f64);
|
||||
let center = Point::new(1024.0, 768.0);
|
||||
let r = 750.0;
|
||||
|
|
Loading…
Reference in a new issue