Merge pull request #3 from linebender/chunk_segments

Use linked list strategy for segments
This commit is contained in:
Raph Levien 2020-04-30 21:40:04 -07:00 committed by GitHub
commit 19ecd0a158
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 95 additions and 52 deletions

View file

@ -13,8 +13,7 @@ piet_gpu! {
end: [f32; 2], end: [f32; 2],
} }
struct CmdStroke { struct CmdStroke {
n_segs: u32, // Should be Ref<SegChunk> if we had cross-module references.
// Should be Ref<Segment> if we had cross-module references.
seg_ref: u32, seg_ref: u32,
half_width: f32, half_width: f32,
rgba_color: u32, rgba_color: u32,

View file

@ -12,8 +12,7 @@ piet_gpu! {
// Note: this is only suitable for strokes, fills require backdrop. // Note: this is only suitable for strokes, fills require backdrop.
struct ItemHeader { struct ItemHeader {
n: u32, segments: Ref<SegChunk>,
segments: Ref<Segment>,
} }
// TODO: strongly consider using f16. If so, these would be // TODO: strongly consider using f16. If so, these would be
@ -23,5 +22,11 @@ piet_gpu! {
start: [f32; 2], start: [f32; 2],
end: [f32; 2], end: [f32; 2],
} }
struct SegChunk {
n: u32,
next: Ref<SegChunk>,
// Segments follow (could represent this as a variable sized array).
}
} }
} }

View file

@ -13,6 +13,6 @@ build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h setup.h
build kernel2s.spv: glsl kernel2s.comp | scene.h tilegroup.h segment.h setup.h build kernel2s.spv: glsl kernel2s.comp | scene.h tilegroup.h segment.h setup.h
build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h ptcl.h setup.h build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h segment.h ptcl.h setup.h
build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h build kernel4.spv: glsl kernel4.comp | ptcl.h segment.h setup.h

View file

@ -44,12 +44,15 @@ void main() {
InstanceRef stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size); InstanceRef stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
ItemHeaderRef item_header = ItemHeaderRef(atomicAdd(alloc, stroke_n * ItemHeader_size)); ItemHeaderRef item_header = ItemHeaderRef(atomicAdd(alloc, stroke_n * ItemHeader_size));
TileHeader_write(tile_header_ref, TileHeader(stroke_n, item_header)); TileHeader_write(tile_header_ref, TileHeader(stroke_n, item_header));
SegmentRef seg_ref = SegmentRef(0); SegChunkRef seg_chunk_ref = SegChunkRef(0);
uint seg_limit = 0; uint seg_limit = 0;
// Iterate through items; stroke_n holds count remaining. // Iterate through items; stroke_n holds count remaining.
while (true) { while (true) {
if (chunk.chunk_n == 0) { if (chunk.chunk_n == 0) {
chunk_ref = chunk.next; chunk_ref = chunk.next;
if (chunk_ref.offset == 0) {
break;
}
chunk = Chunk_read(chunk_ref); chunk = Chunk_read(chunk_ref);
stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size); stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
} }
@ -58,16 +61,8 @@ void main() {
// Process the stroke polyline item. // Process the stroke polyline item.
uint max_n_segs = poly.n_points - 1; uint max_n_segs = poly.n_points - 1;
uint reserve = max_n_segs * Segment_size; uint chunk_n_segs = 0;
if (seg_ref.offset + reserve > seg_limit) { SegChunkRef seg_chunk_ref;
// This is a heuristic to balance atomic bandwidth and utilization.
// The output always gets a contiguous allocation. We might use
// all, some, or none of the capacity.
uint capacity_bytes = stroke_n > 1 ? reserve * 2 + 128 : reserve;
seg_ref.offset = atomicAdd(alloc, capacity_bytes);
seg_limit = seg_ref.offset + capacity_bytes;
}
uint n_segs = 0;
vec2 start = Point_read(poly.points).xy; vec2 start = Point_read(poly.points).xy;
for (uint j = 0; j < max_n_segs; j++) { for (uint j = 0; j < max_n_segs; j++) {
poly.points.offset += Point_size; poly.points.offset += Point_size;
@ -103,18 +98,31 @@ void main() {
&& max(min(start.y, end.y), ymin) < min(max(start.y, end.y), ymax) && max(min(start.y, end.y), ymin) < min(max(start.y, end.y), ymax)
&& s00 * s01 + s00 * s10 + s00 * s11 < 3.0) && s00 * s01 + s00 * s10 + s00 * s11 < 3.0)
{ {
// Allocate a chunk if needed.
if (chunk_n_segs == 0) {
if (seg_chunk_ref.offset + 40 > seg_limit) {
seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC);
seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - Segment_size;
}
ItemHeader_write(item_header, ItemHeader(seg_chunk_ref));
} else if (seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs > seg_limit) {
uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC);
seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - Segment_size;
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(new_chunk_ref)));
seg_chunk_ref.offset = new_chunk_ref;
chunk_n_segs = 0;
}
Segment seg = Segment(start, end); Segment seg = Segment(start, end);
Segment_write(Segment_index(seg_ref, n_segs), seg); Segment_write(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs), seg);
n_segs++; chunk_n_segs++;
} }
start = end; start = end;
} }
ItemHeader_write(item_header, ItemHeader(n_segs, seg_ref)); if (chunk_n_segs > 0) {
if (--stroke_n == 0) { SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(0)));
break; seg_chunk_ref.offset += SegChunk_size + Segment_size * chunk_n_segs;
} }
seg_ref.offset += n_segs * Segment_size;
stroke_ref.offset += Instance_size; stroke_ref.offset += Instance_size;
chunk.chunk_n--; chunk.chunk_n--;

Binary file not shown.

View file

@ -87,10 +87,9 @@ void main() {
case PietItem_Poly: case PietItem_Poly:
ItemHeader stroke_item = ItemHeader_read(stroke_th.items); ItemHeader stroke_item = ItemHeader_read(stroke_th.items);
stroke_th.items.offset += ItemHeader_size; stroke_th.items.offset += ItemHeader_size;
if (stroke_item.n > 0) { if (stroke_item.segments.offset != 0) {
PietStrokePolyLine poly = PietItem_Poly_read(item_ref); PietStrokePolyLine poly = PietItem_Poly_read(item_ref);
CmdStroke cmd = CmdStroke( CmdStroke cmd = CmdStroke(
stroke_item.n,
stroke_item.segments.offset, stroke_item.segments.offset,
0.5 * poly.width, 0.5 * poly.width,
poly.rgba_color poly.rgba_color

Binary file not shown.

View file

@ -54,13 +54,18 @@ void main() {
case Cmd_Stroke: case Cmd_Stroke:
CmdStroke stroke = Cmd_Stroke_read(cmd_ref); CmdStroke stroke = Cmd_Stroke_read(cmd_ref);
float df = 1e9; float df = 1e9;
for (int i = 0; i < stroke.n_segs; i++) { SegChunkRef seg_chunk_ref = SegChunkRef(stroke.seg_ref);
Segment seg = Segment_read(Segment_index(SegmentRef(stroke.seg_ref), i)); do {
SegChunk seg_chunk = SegChunk_read(seg_chunk_ref);
for (int i = 0; i < seg_chunk.n; i++) {
Segment seg = Segment_read(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * i));
vec2 line_vec = seg.end - seg.start; vec2 line_vec = seg.end - seg.start;
vec2 dpos = xy + vec2(0.5, 0.5) - seg.start; vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0); float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
df = min(df, length(line_vec * t - dpos)); df = min(df, length(line_vec * t - dpos));
} }
seg_chunk_ref = seg_chunk.next;
} while (seg_chunk_ref.offset != 0);
fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx; fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx;
alpha = clamp(stroke.half_width + 0.5 - df, 0.0, 1.0); alpha = clamp(stroke.half_width + 0.5 - df, 0.0, 1.0);
rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a); rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a);

Binary file not shown.

View file

@ -60,13 +60,12 @@ CmdLineRef CmdLine_index(CmdLineRef ref, uint index) {
} }
struct CmdStroke { struct CmdStroke {
uint n_segs;
uint seg_ref; uint seg_ref;
float half_width; float half_width;
uint rgba_color; uint rgba_color;
}; };
#define CmdStroke_size 16 #define CmdStroke_size 12
CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) { CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
return CmdStrokeRef(ref.offset + index * CmdStroke_size); return CmdStrokeRef(ref.offset + index * CmdStroke_size);
@ -187,21 +186,18 @@ CmdStroke CmdStroke_read(CmdStrokeRef ref) {
uint raw0 = ptcl[ix + 0]; uint raw0 = ptcl[ix + 0];
uint raw1 = ptcl[ix + 1]; uint raw1 = ptcl[ix + 1];
uint raw2 = ptcl[ix + 2]; uint raw2 = ptcl[ix + 2];
uint raw3 = ptcl[ix + 3];
CmdStroke s; CmdStroke s;
s.n_segs = raw0; s.seg_ref = raw0;
s.seg_ref = raw1; s.half_width = uintBitsToFloat(raw1);
s.half_width = uintBitsToFloat(raw2); s.rgba_color = raw2;
s.rgba_color = raw3;
return s; return s;
} }
void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) { void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
ptcl[ix + 0] = s.n_segs; ptcl[ix + 0] = s.seg_ref;
ptcl[ix + 1] = s.seg_ref; ptcl[ix + 1] = floatBitsToUint(s.half_width);
ptcl[ix + 2] = floatBitsToUint(s.half_width); ptcl[ix + 2] = s.rgba_color;
ptcl[ix + 3] = s.rgba_color;
} }
CmdFill CmdFill_read(CmdFillRef ref) { CmdFill CmdFill_read(CmdFillRef ref) {

View file

@ -12,6 +12,10 @@ struct SegmentRef {
uint offset; uint offset;
}; };
struct SegChunkRef {
uint offset;
};
struct TileHeader { struct TileHeader {
uint n; uint n;
ItemHeaderRef items; ItemHeaderRef items;
@ -24,11 +28,10 @@ TileHeaderRef TileHeader_index(TileHeaderRef ref, uint index) {
} }
struct ItemHeader { struct ItemHeader {
uint n; SegChunkRef segments;
SegmentRef segments;
}; };
#define ItemHeader_size 8 #define ItemHeader_size 4
ItemHeaderRef ItemHeader_index(ItemHeaderRef ref, uint index) { ItemHeaderRef ItemHeader_index(ItemHeaderRef ref, uint index) {
return ItemHeaderRef(ref.offset + index * ItemHeader_size); return ItemHeaderRef(ref.offset + index * ItemHeader_size);
@ -45,6 +48,17 @@ SegmentRef Segment_index(SegmentRef ref, uint index) {
return SegmentRef(ref.offset + index * Segment_size); return SegmentRef(ref.offset + index * Segment_size);
} }
struct SegChunk {
uint n;
SegChunkRef next;
};
#define SegChunk_size 8
SegChunkRef SegChunk_index(SegChunkRef ref, uint index) {
return SegChunkRef(ref.offset + index * SegChunk_size);
}
TileHeader TileHeader_read(TileHeaderRef ref) { TileHeader TileHeader_read(TileHeaderRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = segment[ix + 0]; uint raw0 = segment[ix + 0];
@ -64,17 +78,14 @@ void TileHeader_write(TileHeaderRef ref, TileHeader s) {
ItemHeader ItemHeader_read(ItemHeaderRef ref) { ItemHeader ItemHeader_read(ItemHeaderRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = segment[ix + 0]; uint raw0 = segment[ix + 0];
uint raw1 = segment[ix + 1];
ItemHeader s; ItemHeader s;
s.n = raw0; s.segments = SegChunkRef(raw0);
s.segments = SegmentRef(raw1);
return s; return s;
} }
void ItemHeader_write(ItemHeaderRef ref, ItemHeader s) { void ItemHeader_write(ItemHeaderRef ref, ItemHeader s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
segment[ix + 0] = s.n; segment[ix + 0] = s.segments.offset;
segment[ix + 1] = s.segments.offset;
} }
Segment Segment_read(SegmentRef ref) { Segment Segment_read(SegmentRef ref) {
@ -97,3 +108,19 @@ void Segment_write(SegmentRef ref, Segment s) {
segment[ix + 3] = floatBitsToUint(s.end.y); segment[ix + 3] = floatBitsToUint(s.end.y);
} }
SegChunk SegChunk_read(SegChunkRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = segment[ix + 0];
uint raw1 = segment[ix + 1];
SegChunk s;
s.n = raw0;
s.next = SegChunkRef(raw1);
return s;
}
void SegChunk_write(SegChunkRef ref, SegChunk s) {
uint ix = ref.offset >> 2;
segment[ix + 0] = s.n;
segment[ix + 1] = s.next.offset;
}

View file

@ -32,3 +32,7 @@
#define TILE_HEIGHT_PX 16 #define TILE_HEIGHT_PX 16
#define PTCL_INITIAL_ALLOC 1024 #define PTCL_INITIAL_ALLOC 1024
// Maximum number of segments in a SegChunk
#define SEG_CHUNK_N 32
#define SEG_CHUNK_ALLOC 512

View file

@ -53,7 +53,7 @@ fn render_scene(rc: &mut impl RenderContext) {
} }
fn render_cardioid(rc: &mut impl RenderContext) { fn render_cardioid(rc: &mut impl RenderContext) {
let n = 100; let n = 91;
let dth = std::f64::consts::PI * 2.0 / (n as f64); let dth = std::f64::consts::PI * 2.0 / (n as f64);
let center = Point::new(1024.0, 768.0); let center = Point::new(1024.0, 768.0);
let r = 750.0; let r = 750.0;