Use linked list strategy for segments

Trying to allocate them contiguously wasn't good.
This commit is contained in:
Raph Levien 2020-04-28 22:25:57 -07:00
parent cb06b1bc3d
commit b23fe25177
13 changed files with 95 additions and 52 deletions

View file

@ -13,8 +13,7 @@ piet_gpu! {
end: [f32; 2],
}
struct CmdStroke {
n_segs: u32,
// Should be Ref<Segment> if we had cross-module references.
// Should be Ref<SegChunk> if we had cross-module references.
seg_ref: u32,
half_width: f32,
rgba_color: u32,

View file

@ -12,8 +12,7 @@ piet_gpu! {
// Note: this is only suitable for strokes, fills require backdrop.
struct ItemHeader {
n: u32,
segments: Ref<Segment>,
segments: Ref<SegChunk>,
}
// TODO: strongly consider using f16. If so, these would be
@ -23,5 +22,11 @@ piet_gpu! {
start: [f32; 2],
end: [f32; 2],
}
struct SegChunk {
n: u32,
next: Ref<SegChunk>,
// Segments follow (could represent this as a variable sized array).
}
}
}

View file

@ -13,6 +13,6 @@ build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h setup.h
build kernel2s.spv: glsl kernel2s.comp | scene.h tilegroup.h segment.h setup.h
build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h ptcl.h setup.h
build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h segment.h ptcl.h setup.h
build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h
build kernel4.spv: glsl kernel4.comp | ptcl.h segment.h setup.h

View file

@ -44,12 +44,15 @@ void main() {
InstanceRef stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
ItemHeaderRef item_header = ItemHeaderRef(atomicAdd(alloc, stroke_n * ItemHeader_size));
TileHeader_write(tile_header_ref, TileHeader(stroke_n, item_header));
SegmentRef seg_ref = SegmentRef(0);
SegChunkRef seg_chunk_ref = SegChunkRef(0);
uint seg_limit = 0;
// Iterate through items; stroke_n holds count remaining.
while (true) {
if (chunk.chunk_n == 0) {
chunk_ref = chunk.next;
if (chunk_ref.offset == 0) {
break;
}
chunk = Chunk_read(chunk_ref);
stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
}
@ -58,16 +61,8 @@ void main() {
// Process the stroke polyline item.
uint max_n_segs = poly.n_points - 1;
uint reserve = max_n_segs * Segment_size;
if (seg_ref.offset + reserve > seg_limit) {
// This is a heuristic to balance atomic bandwidth and utilization.
// The output always gets a contiguous allocation. We might use
// all, some, or none of the capacity.
uint capacity_bytes = stroke_n > 1 ? reserve * 2 + 128 : reserve;
seg_ref.offset = atomicAdd(alloc, capacity_bytes);
seg_limit = seg_ref.offset + capacity_bytes;
}
uint n_segs = 0;
uint chunk_n_segs = 0;
SegChunkRef seg_chunk_ref;
vec2 start = Point_read(poly.points).xy;
for (uint j = 0; j < max_n_segs; j++) {
poly.points.offset += Point_size;
@ -103,18 +98,31 @@ void main() {
&& max(min(start.y, end.y), ymin) < min(max(start.y, end.y), ymax)
&& s00 * s01 + s00 * s10 + s00 * s11 < 3.0)
{
// Allocate a chunk if needed.
if (chunk_n_segs == 0) {
if (seg_chunk_ref.offset + 40 > seg_limit) {
seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC);
seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - Segment_size;
}
ItemHeader_write(item_header, ItemHeader(seg_chunk_ref));
} else if (seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs > seg_limit) {
uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC);
seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - Segment_size;
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(new_chunk_ref)));
seg_chunk_ref.offset = new_chunk_ref;
chunk_n_segs = 0;
}
Segment seg = Segment(start, end);
Segment_write(Segment_index(seg_ref, n_segs), seg);
n_segs++;
Segment_write(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs), seg);
chunk_n_segs++;
}
start = end;
}
ItemHeader_write(item_header, ItemHeader(n_segs, seg_ref));
if (--stroke_n == 0) {
break;
if (chunk_n_segs > 0) {
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(0)));
seg_chunk_ref.offset += SegChunk_size + Segment_size * chunk_n_segs;
}
seg_ref.offset += n_segs * Segment_size;
stroke_ref.offset += Instance_size;
chunk.chunk_n--;

Binary file not shown.

View file

@ -87,10 +87,9 @@ void main() {
case PietItem_Poly:
ItemHeader stroke_item = ItemHeader_read(stroke_th.items);
stroke_th.items.offset += ItemHeader_size;
if (stroke_item.n > 0) {
if (stroke_item.segments.offset != 0) {
PietStrokePolyLine poly = PietItem_Poly_read(item_ref);
CmdStroke cmd = CmdStroke(
stroke_item.n,
stroke_item.segments.offset,
0.5 * poly.width,
poly.rgba_color

Binary file not shown.

View file

@ -54,13 +54,18 @@ void main() {
case Cmd_Stroke:
CmdStroke stroke = Cmd_Stroke_read(cmd_ref);
float df = 1e9;
for (int i = 0; i < stroke.n_segs; i++) {
Segment seg = Segment_read(Segment_index(SegmentRef(stroke.seg_ref), i));
vec2 line_vec = seg.end - seg.start;
vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
df = min(df, length(line_vec * t - dpos));
}
SegChunkRef seg_chunk_ref = SegChunkRef(stroke.seg_ref);
do {
SegChunk seg_chunk = SegChunk_read(seg_chunk_ref);
for (int i = 0; i < seg_chunk.n; i++) {
Segment seg = Segment_read(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * i));
vec2 line_vec = seg.end - seg.start;
vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
df = min(df, length(line_vec * t - dpos));
}
seg_chunk_ref = seg_chunk.next;
} while (seg_chunk_ref.offset != 0);
fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx;
alpha = clamp(stroke.half_width + 0.5 - df, 0.0, 1.0);
rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a);

Binary file not shown.

View file

@ -60,13 +60,12 @@ CmdLineRef CmdLine_index(CmdLineRef ref, uint index) {
}
struct CmdStroke {
uint n_segs;
uint seg_ref;
float half_width;
uint rgba_color;
};
#define CmdStroke_size 16
#define CmdStroke_size 12
CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
return CmdStrokeRef(ref.offset + index * CmdStroke_size);
@ -187,21 +186,18 @@ CmdStroke CmdStroke_read(CmdStrokeRef ref) {
uint raw0 = ptcl[ix + 0];
uint raw1 = ptcl[ix + 1];
uint raw2 = ptcl[ix + 2];
uint raw3 = ptcl[ix + 3];
CmdStroke s;
s.n_segs = raw0;
s.seg_ref = raw1;
s.half_width = uintBitsToFloat(raw2);
s.rgba_color = raw3;
s.seg_ref = raw0;
s.half_width = uintBitsToFloat(raw1);
s.rgba_color = raw2;
return s;
}
void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) {
uint ix = ref.offset >> 2;
ptcl[ix + 0] = s.n_segs;
ptcl[ix + 1] = s.seg_ref;
ptcl[ix + 2] = floatBitsToUint(s.half_width);
ptcl[ix + 3] = s.rgba_color;
ptcl[ix + 0] = s.seg_ref;
ptcl[ix + 1] = floatBitsToUint(s.half_width);
ptcl[ix + 2] = s.rgba_color;
}
CmdFill CmdFill_read(CmdFillRef ref) {

View file

@ -12,6 +12,10 @@ struct SegmentRef {
uint offset;
};
struct SegChunkRef {
uint offset;
};
struct TileHeader {
uint n;
ItemHeaderRef items;
@ -24,11 +28,10 @@ TileHeaderRef TileHeader_index(TileHeaderRef ref, uint index) {
}
struct ItemHeader {
uint n;
SegmentRef segments;
SegChunkRef segments;
};
#define ItemHeader_size 8
#define ItemHeader_size 4
ItemHeaderRef ItemHeader_index(ItemHeaderRef ref, uint index) {
return ItemHeaderRef(ref.offset + index * ItemHeader_size);
@ -45,6 +48,17 @@ SegmentRef Segment_index(SegmentRef ref, uint index) {
return SegmentRef(ref.offset + index * Segment_size);
}
struct SegChunk {
uint n;
SegChunkRef next;
};
#define SegChunk_size 8
SegChunkRef SegChunk_index(SegChunkRef ref, uint index) {
return SegChunkRef(ref.offset + index * SegChunk_size);
}
TileHeader TileHeader_read(TileHeaderRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = segment[ix + 0];
@ -64,17 +78,14 @@ void TileHeader_write(TileHeaderRef ref, TileHeader s) {
ItemHeader ItemHeader_read(ItemHeaderRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = segment[ix + 0];
uint raw1 = segment[ix + 1];
ItemHeader s;
s.n = raw0;
s.segments = SegmentRef(raw1);
s.segments = SegChunkRef(raw0);
return s;
}
void ItemHeader_write(ItemHeaderRef ref, ItemHeader s) {
uint ix = ref.offset >> 2;
segment[ix + 0] = s.n;
segment[ix + 1] = s.segments.offset;
segment[ix + 0] = s.segments.offset;
}
Segment Segment_read(SegmentRef ref) {
@ -97,3 +108,19 @@ void Segment_write(SegmentRef ref, Segment s) {
segment[ix + 3] = floatBitsToUint(s.end.y);
}
SegChunk SegChunk_read(SegChunkRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = segment[ix + 0];
uint raw1 = segment[ix + 1];
SegChunk s;
s.n = raw0;
s.next = SegChunkRef(raw1);
return s;
}
void SegChunk_write(SegChunkRef ref, SegChunk s) {
uint ix = ref.offset >> 2;
segment[ix + 0] = s.n;
segment[ix + 1] = s.next.offset;
}

View file

@ -32,3 +32,7 @@
#define TILE_HEIGHT_PX 16
#define PTCL_INITIAL_ALLOC 1024
// Maximum number of segments in a SegChunk
#define SEG_CHUNK_N 32
#define SEG_CHUNK_ALLOC 512

View file

@ -53,7 +53,7 @@ fn render_scene(rc: &mut impl RenderContext) {
}
fn render_cardioid(rc: &mut impl RenderContext) {
let n = 100;
let n = 91;
let dth = std::f64::consts::PI * 2.0 / (n as f64);
let center = Point::new(1024.0, 768.0);
let r = 750.0;