mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-09 12:21:31 +11:00
Merge pull request #13 from linebender/par_coarse
Parallelized segment output in coarse raster
This commit is contained in:
commit
37a6f6aa51
|
@ -13,14 +13,13 @@ piet_gpu! {
|
|||
end: [f32; 2],
|
||||
}
|
||||
struct CmdStroke {
|
||||
// Should be Ref<SegChunk>
|
||||
seg_ref: u32,
|
||||
// Consider a specialization to one segment.
|
||||
seg_ref: Ref<SegChunk>,
|
||||
half_width: f32,
|
||||
rgba_color: u32,
|
||||
}
|
||||
struct CmdFill {
|
||||
// Should be Ref<FillSegChunk>
|
||||
seg_ref: u32,
|
||||
seg_ref: Ref<SegChunk>,
|
||||
backdrop: i32,
|
||||
rgba_color: u32,
|
||||
}
|
||||
|
@ -58,12 +57,17 @@ piet_gpu! {
|
|||
struct Segment {
|
||||
start: [f32; 2],
|
||||
end: [f32; 2],
|
||||
|
||||
// This is used for fills only, but we're including it in
|
||||
// the general structure for simplicity.
|
||||
y_edge: f32,
|
||||
}
|
||||
|
||||
struct SegChunk {
|
||||
n: u32,
|
||||
next: Ref<SegChunk>,
|
||||
// Segments follow (could represent this as a variable sized array).
|
||||
// Actually a reference to a variable-sized slice.
|
||||
segs: Ref<Segment>,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -75,6 +75,82 @@ fn trace_merge(buf: &[u32]) {
|
|||
}
|
||||
}
|
||||
|
||||
/// Interpret the output of the coarse raster stage, for diagnostic purposes.
|
||||
#[allow(unused)]
|
||||
fn trace_ptcl(buf: &[u32]) {
|
||||
for y in 0..96 {
|
||||
for x in 0..128 {
|
||||
let tile_ix = y * 128 + x;
|
||||
println!("tile {} @({}, {})", tile_ix, x, y);
|
||||
let mut tile_offset = tile_ix * 1024;
|
||||
loop {
|
||||
let tag = buf[tile_offset / 4];
|
||||
match tag {
|
||||
0 => break,
|
||||
3 => {
|
||||
let backdrop = buf[tile_offset / 4 + 2];
|
||||
let rgba_color = buf[tile_offset / 4 + 3];
|
||||
println!(" {:x}: fill {:x} {}", tile_offset, rgba_color, backdrop);
|
||||
let mut seg_chunk = buf[tile_offset / 4 + 1] as usize;
|
||||
let n = buf[seg_chunk / 4] as usize;
|
||||
let segs = buf[seg_chunk / 4 + 2] as usize;
|
||||
println!(" chunk @{:x}: n={}, segs @{:x}", seg_chunk, n, segs);
|
||||
for i in 0..n {
|
||||
let x0 = f32::from_bits(buf[segs / 4 + i * 5]);
|
||||
let y0 = f32::from_bits(buf[segs / 4 + i * 5 + 1]);
|
||||
let x1 = f32::from_bits(buf[segs / 4 + i * 5 + 2]);
|
||||
let y1 = f32::from_bits(buf[segs / 4 + i * 5 + 3]);
|
||||
let y_edge = f32::from_bits(buf[segs / 4 + i * 5 + 4]);
|
||||
println!(" ({:.3}, {:.3}) - ({:.3}, {:.3}) | {:.3}", x0, y0, x1, y1, y_edge);
|
||||
}
|
||||
loop {
|
||||
seg_chunk = buf[seg_chunk / 4 + 1] as usize;
|
||||
if seg_chunk == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
4 => {
|
||||
let line_width = f32::from_bits(buf[tile_offset / 4 + 2]);
|
||||
let rgba_color = buf[tile_offset / 4 + 3];
|
||||
println!(" {:x}: stroke {:x} {}", tile_offset, rgba_color, line_width);
|
||||
let mut seg_chunk = buf[tile_offset / 4 + 1] as usize;
|
||||
let n = buf[seg_chunk / 4] as usize;
|
||||
let segs = buf[seg_chunk / 4 + 2] as usize;
|
||||
println!(" chunk @{:x}: n={}, segs @{:x}", seg_chunk, n, segs);
|
||||
for i in 0..n {
|
||||
let x0 = f32::from_bits(buf[segs / 4 + i * 5]);
|
||||
let y0 = f32::from_bits(buf[segs / 4 + i * 5 + 1]);
|
||||
let x1 = f32::from_bits(buf[segs / 4 + i * 5 + 2]);
|
||||
let y1 = f32::from_bits(buf[segs / 4 + i * 5 + 3]);
|
||||
let y_edge = f32::from_bits(buf[segs / 4 + i * 5 + 4]);
|
||||
println!(" ({:.3}, {:.3}) - ({:.3}, {:.3}) | {:.3}", x0, y0, x1, y1, y_edge);
|
||||
}
|
||||
loop {
|
||||
seg_chunk = buf[seg_chunk / 4 + 1] as usize;
|
||||
if seg_chunk == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
println!("{:x}: {}", tile_offset, tag);
|
||||
}
|
||||
}
|
||||
if tag == 0 {
|
||||
break;
|
||||
}
|
||||
if tag == 8 {
|
||||
tile_offset = buf[tile_offset / 4 + 1] as usize;
|
||||
} else {
|
||||
tile_offset += 20;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn main() -> Result<(), Error> {
|
||||
let (instance, _) = VkInstance::new(None)?;
|
||||
unsafe {
|
||||
|
@ -107,9 +183,9 @@ fn main() -> Result<(), Error> {
|
|||
|
||||
/*
|
||||
let mut data: Vec<u32> = Default::default();
|
||||
device.read_buffer(&renderer.bin_buf, &mut data).unwrap();
|
||||
device.read_buffer(&renderer.ptcl_buf, &mut data).unwrap();
|
||||
//piet_gpu::dump_k1_data(&data);
|
||||
//trace_merge(&data);
|
||||
trace_ptcl(&data);
|
||||
*/
|
||||
|
||||
let mut img_data: Vec<u8> = Default::default();
|
||||
|
|
|
@ -44,6 +44,14 @@ shared uint sh_elements_ref;
|
|||
shared uint sh_bitmaps[N_SLICE][N_TILE];
|
||||
shared uint sh_backdrop[N_SLICE][N_TILE];
|
||||
shared uint sh_bd_sign[N_SLICE];
|
||||
shared uint sh_is_segment[N_SLICE];
|
||||
|
||||
// Shared state for parallel segment output stage
|
||||
|
||||
// Count of total number of segments in each tile, then
|
||||
// inclusive prefix sum of same.
|
||||
shared uint sh_seg_count[N_TILE];
|
||||
shared uint sh_seg_alloc;
|
||||
|
||||
// scale factors useful for converting coordinates to tiles
|
||||
#define SX (1.0 / float(TILE_WIDTH_PX))
|
||||
|
@ -60,24 +68,20 @@ void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) {
|
|||
}
|
||||
}
|
||||
|
||||
// Ensure that there is space to encode a segment.
|
||||
void alloc_chunk(inout uint chunk_n_segs, inout SegChunkRef seg_chunk_ref,
|
||||
inout SegChunkRef first_seg_chunk, inout uint seg_limit)
|
||||
{
|
||||
// TODO: Reduce divergence of atomic alloc?
|
||||
if (chunk_n_segs == 0) {
|
||||
if (seg_chunk_ref.offset + 40 > seg_limit) {
|
||||
seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
||||
seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - Segment_size;
|
||||
}
|
||||
first_seg_chunk = seg_chunk_ref;
|
||||
} else if (seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs > seg_limit) {
|
||||
uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
||||
seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - Segment_size;
|
||||
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(new_chunk_ref)));
|
||||
seg_chunk_ref.offset = new_chunk_ref;
|
||||
chunk_n_segs = 0;
|
||||
#define CHUNK_ALLOC_SLAB 16
|
||||
|
||||
uint alloc_chunk_remaining;
|
||||
uint alloc_chunk_offset;
|
||||
|
||||
SegChunkRef alloc_seg_chunk() {
|
||||
if (alloc_chunk_remaining == 0) {
|
||||
alloc_chunk_offset = atomicAdd(alloc, CHUNK_ALLOC_SLAB * SegChunk_size);
|
||||
alloc_chunk_remaining = CHUNK_ALLOC_SLAB;
|
||||
}
|
||||
uint offset = alloc_chunk_offset;
|
||||
alloc_chunk_offset += SegChunk_size;
|
||||
alloc_chunk_remaining--;
|
||||
return SegChunkRef(offset);
|
||||
}
|
||||
|
||||
// Accumulate delta to backdrop.
|
||||
|
@ -103,10 +107,11 @@ void main() {
|
|||
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
||||
|
||||
// Allocation and management of segment output
|
||||
SegChunkRef seg_chunk_ref = SegChunkRef(0);
|
||||
SegChunkRef first_seg_chunk = SegChunkRef(0);
|
||||
uint seg_limit = 0;
|
||||
uint chunk_n_segs = 0;
|
||||
SegChunkRef last_chunk_ref = SegChunkRef(0);
|
||||
uint last_chunk_n = 0;
|
||||
SegmentRef last_chunk_segs = SegmentRef(0);
|
||||
alloc_chunk_remaining = 0;
|
||||
|
||||
uint wr_ix = 0;
|
||||
uint rd_ix = 0;
|
||||
|
@ -129,6 +134,9 @@ void main() {
|
|||
sh_bitmaps[i][th_ix] = 0;
|
||||
sh_backdrop[i][th_ix] = 0;
|
||||
}
|
||||
if (th_ix < N_SLICE) {
|
||||
sh_is_segment[th_ix] = 0;
|
||||
}
|
||||
|
||||
while (wr_ix - rd_ix <= N_TILE) {
|
||||
// Choose segment with least element.
|
||||
|
@ -219,6 +227,7 @@ void main() {
|
|||
atomicAnd(sh_bd_sign[my_slice], ~my_mask);
|
||||
}
|
||||
}
|
||||
atomicOr(sh_is_segment[my_slice], my_mask);
|
||||
// Set up for per-scanline coverage formula, below.
|
||||
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
|
||||
c = (line.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + line.stroke.y)) * SX;
|
||||
|
@ -279,14 +288,112 @@ void main() {
|
|||
}
|
||||
barrier();
|
||||
|
||||
// Output elements for this tile, based on bitmaps.
|
||||
// We've computed coverage and other info for each element in the input, now for
|
||||
// the output stage. We'll do segments first using a more parallel algorithm.
|
||||
|
||||
uint seg_count = 0;
|
||||
for (uint i = 0; i < N_SLICE; i++) {
|
||||
seg_count += bitCount(sh_bitmaps[i][th_ix] & sh_is_segment[i]);
|
||||
}
|
||||
sh_seg_count[th_ix] = seg_count;
|
||||
// Prefix sum of sh_seg_count
|
||||
for (uint i = 0; i < LG_N_TILE; i++) {
|
||||
barrier();
|
||||
if (th_ix >= (1 << i)) {
|
||||
seg_count += sh_seg_count[th_ix - (1 << i)];
|
||||
}
|
||||
barrier();
|
||||
sh_seg_count[th_ix] = seg_count;
|
||||
}
|
||||
if (th_ix == N_TILE - 1) {
|
||||
sh_seg_alloc = atomicAdd(alloc, seg_count * Segment_size);
|
||||
}
|
||||
barrier();
|
||||
uint total_seg_count = sh_seg_count[N_TILE - 1];
|
||||
uint seg_alloc = sh_seg_alloc;
|
||||
|
||||
// Output buffer is allocated as segments for each tile laid end-to-end.
|
||||
|
||||
for (uint ix = th_ix; ix < total_seg_count; ix += N_TILE) {
|
||||
// Find the work item; this thread is now not bound to an element or tile.
|
||||
// First find the tile (by binary search)
|
||||
uint tile_ix = 0;
|
||||
for (uint i = 0; i < LG_N_TILE; i++) {
|
||||
uint probe = tile_ix + ((N_TILE / 2) >> i);
|
||||
if (ix >= sh_seg_count[probe - 1]) {
|
||||
tile_ix = probe;
|
||||
}
|
||||
}
|
||||
// Now, sh_seg_count[tile_ix - 1] <= ix < sh_seg_count[tile_ix].
|
||||
// (considering sh_seg_count[-1] == 0)
|
||||
|
||||
// Index of segment within tile's segments
|
||||
uint seq_ix = ix;
|
||||
// Maybe consider a sentinel value to avoid the conditional?
|
||||
if (tile_ix > 0) {
|
||||
seq_ix -= sh_seg_count[tile_ix - 1];
|
||||
}
|
||||
// Find the segment. This is done by linear scan through the bitmaps of the
|
||||
// tile, accelerated by bit counting. Binary search might help, maybe not.
|
||||
uint slice_ix = 0;
|
||||
uint seq_bits;
|
||||
|
||||
while (true) {
|
||||
seq_bits = sh_bitmaps[slice_ix][tile_ix] & sh_is_segment[slice_ix];
|
||||
uint this_count = bitCount(seq_bits);
|
||||
if (this_count > seq_ix) {
|
||||
break;
|
||||
}
|
||||
seq_ix -= this_count;
|
||||
slice_ix++;
|
||||
}
|
||||
// Now find position of nth bit set (n = seq_ix) in seq_bits; binary search
|
||||
uint bit_ix = 0;
|
||||
for (int i = 0; i < 5; i++) {
|
||||
uint probe = bit_ix + (16 >> i);
|
||||
if (seq_ix >= bitCount(seq_bits & ((1 << probe) - 1))) {
|
||||
bit_ix = probe;
|
||||
}
|
||||
}
|
||||
uint out_offset = seg_alloc + Segment_size * ix + SegChunk_size;
|
||||
uint rd_el_ix = (rd_ix + slice_ix * 32 + bit_ix) % N_RINGBUF;
|
||||
uint element_ix = sh_elements[rd_el_ix];
|
||||
ref = AnnotatedRef(element_ix * Annotated_size);
|
||||
AnnoFillLineSeg line = Annotated_FillLine_read(ref);
|
||||
float y_edge = 0.0;
|
||||
// This is basically the same logic as piet-metal, but should be made numerically robust.
|
||||
if (Annotated_tag(ref) == Annotated_FillLine) {
|
||||
vec2 tile_xy = xy0 + vec2((tile_ix % N_TILE_X) * TILE_WIDTH_PX, (tile_ix / N_TILE_X) * TILE_HEIGHT_PX);
|
||||
y_edge = mix(line.p0.y, line.p1.y, (tile_xy.x - line.p0.x) / (line.p1.x - line.p0.x));
|
||||
if (min(line.p0.x, line.p1.x) < tile_xy.x && y_edge >= tile_xy.y && y_edge < tile_xy.y + TILE_HEIGHT_PX) {
|
||||
if (line.p0.x > line.p1.x) {
|
||||
line.p1 = vec2(tile_xy.x, y_edge);
|
||||
} else {
|
||||
line.p0 = vec2(tile_xy.x, y_edge);
|
||||
}
|
||||
} else {
|
||||
y_edge = 1e9;
|
||||
}
|
||||
}
|
||||
Segment seg = Segment(line.p0, line.p1, y_edge);
|
||||
Segment_write(SegmentRef(seg_alloc + Segment_size * ix), seg);
|
||||
}
|
||||
|
||||
// Output non-segment elements for this tile. The thread does a sequential walk
|
||||
// through the non-segment elements, and for segments, count and backdrop are
|
||||
// aggregated using bit counting.
|
||||
uint slice_ix = 0;
|
||||
uint bitmap = sh_bitmaps[0][th_ix];
|
||||
uint bd_bitmap = sh_backdrop[0][th_ix];
|
||||
uint bd_sign = sh_bd_sign[0];
|
||||
uint is_segment = sh_is_segment[0];
|
||||
uint seg_start = th_ix == 0 ? 0 : sh_seg_count[th_ix - 1];
|
||||
seg_count = 0;
|
||||
while (true) {
|
||||
if (bitmap == 0) {
|
||||
uint nonseg_bitmap = bitmap & ~is_segment;
|
||||
if (nonseg_bitmap == 0) {
|
||||
backdrop += count_backdrop(bd_bitmap, bd_sign);
|
||||
seg_count += bitCount(bitmap & is_segment);
|
||||
slice_ix++;
|
||||
if (slice_ix == N_SLICE) {
|
||||
break;
|
||||
|
@ -294,16 +401,19 @@ void main() {
|
|||
bitmap = sh_bitmaps[slice_ix][th_ix];
|
||||
bd_bitmap = sh_backdrop[slice_ix][th_ix];
|
||||
bd_sign = sh_bd_sign[slice_ix];
|
||||
if (bitmap == 0) {
|
||||
is_segment = sh_is_segment[slice_ix];
|
||||
nonseg_bitmap = bitmap & ~is_segment;
|
||||
if (nonseg_bitmap == 0) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
uint element_ref_ix = slice_ix * 32 + findLSB(bitmap);
|
||||
uint element_ref_ix = slice_ix * 32 + findLSB(nonseg_bitmap);
|
||||
uint element_ix = sh_elements[(rd_ix + element_ref_ix) % N_RINGBUF];
|
||||
|
||||
// Bits up to and including the lsb
|
||||
uint bd_mask = (bitmap - 1) ^ bitmap;
|
||||
uint bd_mask = (nonseg_bitmap - 1) ^ nonseg_bitmap;
|
||||
backdrop += count_backdrop(bd_bitmap & bd_mask, bd_sign);
|
||||
seg_count += bitCount(bitmap & bd_mask & is_segment);
|
||||
// Clear bits that have been consumed.
|
||||
bd_bitmap &= ~bd_mask;
|
||||
bitmap &= ~bd_mask;
|
||||
|
@ -315,76 +425,104 @@ void main() {
|
|||
tag = Annotated_tag(ref);
|
||||
|
||||
switch (tag) {
|
||||
case Annotated_FillLine:
|
||||
AnnoFillLineSeg fill_line = Annotated_FillLine_read(ref);
|
||||
// This is basically the same logic as piet-metal, but should be made numerically robust.
|
||||
vec2 tile_xy = vec2(tile_x * TILE_WIDTH_PX, tile_y * TILE_HEIGHT_PX);
|
||||
float yEdge = mix(fill_line.p0.y, fill_line.p1.y, (tile_xy.x - fill_line.p0.x) / (fill_line.p1.x - fill_line.p0.x));
|
||||
if (min(fill_line.p0.x, fill_line.p1.x) < tile_xy.x && yEdge >= tile_xy.y && yEdge < tile_xy.y + TILE_HEIGHT_PX) {
|
||||
Segment edge_seg;
|
||||
if (fill_line.p0.x > fill_line.p1.x) {
|
||||
fill_line.p1 = vec2(tile_xy.x, yEdge);
|
||||
edge_seg.start = fill_line.p1;
|
||||
edge_seg.end = vec2(tile_xy.x, tile_xy.y + TILE_HEIGHT_PX);
|
||||
} else {
|
||||
fill_line.p0 = vec2(tile_xy.x, yEdge);
|
||||
edge_seg.start = vec2(tile_xy.x, tile_xy.y + TILE_HEIGHT_PX);
|
||||
edge_seg.end = fill_line.p0;
|
||||
}
|
||||
alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit);
|
||||
Segment_write(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs), edge_seg);
|
||||
chunk_n_segs++;
|
||||
}
|
||||
Segment fill_seg = Segment(fill_line.p0, fill_line.p1);
|
||||
alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit);
|
||||
Segment_write(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs), fill_seg);
|
||||
chunk_n_segs++;
|
||||
break;
|
||||
case Annotated_StrokeLine:
|
||||
AnnoStrokeLineSeg line = Annotated_StrokeLine_read(ref);
|
||||
Segment seg = Segment(line.p0, line.p1);
|
||||
alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit);
|
||||
Segment_write(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs), seg);
|
||||
chunk_n_segs++;
|
||||
break;
|
||||
case Annotated_Fill:
|
||||
if (chunk_n_segs > 0) {
|
||||
if (last_chunk_n > 0 || seg_count > 0) {
|
||||
SegChunkRef chunk_ref = SegChunkRef(0);
|
||||
if (seg_count > 0) {
|
||||
chunk_ref = alloc_seg_chunk();
|
||||
SegChunk chunk;
|
||||
chunk.n = seg_count;
|
||||
chunk.next = SegChunkRef(0);
|
||||
uint seg_offset = seg_alloc + seg_start * Segment_size;
|
||||
chunk.segs = SegmentRef(seg_offset);
|
||||
SegChunk_write(chunk_ref, chunk);
|
||||
}
|
||||
if (last_chunk_n > 0) {
|
||||
SegChunk chunk;
|
||||
chunk.n = last_chunk_n;
|
||||
chunk.next = chunk_ref;
|
||||
chunk.segs = last_chunk_segs;
|
||||
SegChunk_write(last_chunk_ref, chunk);
|
||||
} else {
|
||||
first_seg_chunk = chunk_ref;
|
||||
}
|
||||
|
||||
AnnoFill fill = Annotated_Fill_read(ref);
|
||||
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(0)));
|
||||
seg_chunk_ref.offset += SegChunk_size + Segment_size * chunk_n_segs;
|
||||
CmdFill cmd_fill;
|
||||
cmd_fill.seg_ref = first_seg_chunk.offset;
|
||||
cmd_fill.seg_ref = first_seg_chunk;
|
||||
cmd_fill.backdrop = backdrop;
|
||||
cmd_fill.rgba_color = fill.rgba_color;
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Fill_write(cmd_ref, cmd_fill);
|
||||
cmd_ref.offset += Cmd_size;
|
||||
chunk_n_segs = 0;
|
||||
last_chunk_n = 0;
|
||||
} else if (backdrop != 0) {
|
||||
AnnoFill fill = Annotated_Fill_read(ref);
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
|
||||
cmd_ref.offset += Cmd_size;
|
||||
}
|
||||
seg_start += seg_count;
|
||||
seg_count = 0;
|
||||
backdrop = 0;
|
||||
break;
|
||||
case Annotated_Stroke:
|
||||
if (chunk_n_segs > 0) {
|
||||
// TODO: reduce divergence & code duplication? Much of the
|
||||
// fill and stroke processing is in common.
|
||||
if (last_chunk_n > 0 || seg_count > 0) {
|
||||
SegChunkRef chunk_ref = SegChunkRef(0);
|
||||
if (seg_count > 0) {
|
||||
chunk_ref = alloc_seg_chunk();
|
||||
SegChunk chunk;
|
||||
chunk.n = seg_count;
|
||||
chunk.next = SegChunkRef(0);
|
||||
uint seg_offset = seg_alloc + seg_start * Segment_size;
|
||||
chunk.segs = SegmentRef(seg_offset);
|
||||
SegChunk_write(chunk_ref, chunk);
|
||||
}
|
||||
if (last_chunk_n > 0) {
|
||||
SegChunk chunk;
|
||||
chunk.n = last_chunk_n;
|
||||
chunk.next = chunk_ref;
|
||||
chunk.segs = last_chunk_segs;
|
||||
SegChunk_write(last_chunk_ref, chunk);
|
||||
} else {
|
||||
first_seg_chunk = chunk_ref;
|
||||
}
|
||||
|
||||
AnnoStroke stroke = Annotated_Stroke_read(ref);
|
||||
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(0)));
|
||||
seg_chunk_ref.offset += SegChunk_size + Segment_size * chunk_n_segs;
|
||||
CmdStroke cmd_stroke;
|
||||
cmd_stroke.seg_ref = first_seg_chunk.offset;
|
||||
cmd_stroke.seg_ref = first_seg_chunk;
|
||||
cmd_stroke.half_width = 0.5 * stroke.linewidth;
|
||||
cmd_stroke.rgba_color = stroke.rgba_color;
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Stroke_write(cmd_ref, cmd_stroke);
|
||||
cmd_ref.offset += Cmd_size;
|
||||
chunk_n_segs = 0;
|
||||
last_chunk_n = 0;
|
||||
}
|
||||
seg_start += seg_count;
|
||||
seg_count = 0;
|
||||
break;
|
||||
default:
|
||||
// This shouldn't happen, but just in case.
|
||||
seg_start++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (seg_count > 0) {
|
||||
SegChunkRef chunk_ref = alloc_seg_chunk();
|
||||
if (last_chunk_n > 0) {
|
||||
SegChunk_write(last_chunk_ref, SegChunk(last_chunk_n, chunk_ref, last_chunk_segs));
|
||||
} else {
|
||||
first_seg_chunk = chunk_ref;
|
||||
}
|
||||
// TODO: free two registers by writing count and segments ref now,
|
||||
// as opposed to deferring SegChunk write until all fields are known.
|
||||
last_chunk_ref = chunk_ref;
|
||||
last_chunk_n = seg_count;
|
||||
uint seg_offset = seg_alloc + seg_start * Segment_size;
|
||||
last_chunk_segs = SegmentRef(seg_offset);
|
||||
}
|
||||
barrier();
|
||||
|
||||
rd_ix += N_TILE;
|
||||
|
|
Binary file not shown.
|
@ -47,11 +47,12 @@ void main() {
|
|||
case Cmd_Stroke:
|
||||
CmdStroke stroke = Cmd_Stroke_read(cmd_ref);
|
||||
float df = 1e9;
|
||||
SegChunkRef seg_chunk_ref = SegChunkRef(stroke.seg_ref);
|
||||
SegChunkRef seg_chunk_ref = stroke.seg_ref;
|
||||
do {
|
||||
SegChunk seg_chunk = SegChunk_read(seg_chunk_ref);
|
||||
SegmentRef segs = seg_chunk.segs;
|
||||
for (int i = 0; i < seg_chunk.n; i++) {
|
||||
Segment seg = Segment_read(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * i));
|
||||
Segment seg = Segment_read(Segment_index(segs, i));
|
||||
vec2 line_vec = seg.end - seg.start;
|
||||
vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
|
||||
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
|
||||
|
@ -67,11 +68,12 @@ void main() {
|
|||
CmdFill fill = Cmd_Fill_read(cmd_ref);
|
||||
// Probably better to store as float, but conversion is no doubt cheap.
|
||||
float area = float(fill.backdrop);
|
||||
SegChunkRef fill_seg_chunk_ref = SegChunkRef(fill.seg_ref);
|
||||
SegChunkRef fill_seg_chunk_ref = fill.seg_ref;
|
||||
do {
|
||||
SegChunk seg_chunk = SegChunk_read(fill_seg_chunk_ref);
|
||||
SegmentRef segs = seg_chunk.segs;
|
||||
for (int i = 0; i < seg_chunk.n; i++) {
|
||||
Segment seg = Segment_read(SegmentRef(fill_seg_chunk_ref.offset + SegChunk_size + Segment_size * i));
|
||||
Segment seg = Segment_read(Segment_index(segs, i));
|
||||
vec2 start = seg.start - xy;
|
||||
vec2 end = seg.end - xy;
|
||||
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
|
||||
|
@ -86,6 +88,7 @@ void main() {
|
|||
float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
|
||||
area += a * (window.x - window.y);
|
||||
}
|
||||
area += sign(end.x - start.x) * clamp(xy.y - seg.y_edge + 1.0, 0.0, 1.0);
|
||||
}
|
||||
fill_seg_chunk_ref = seg_chunk.next;
|
||||
} while (fill_seg_chunk_ref.offset != 0);
|
||||
|
|
Binary file not shown.
|
@ -68,7 +68,7 @@ CmdLineRef CmdLine_index(CmdLineRef ref, uint index) {
|
|||
}
|
||||
|
||||
struct CmdStroke {
|
||||
uint seg_ref;
|
||||
SegChunkRef seg_ref;
|
||||
float half_width;
|
||||
uint rgba_color;
|
||||
};
|
||||
|
@ -80,7 +80,7 @@ CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
|
|||
}
|
||||
|
||||
struct CmdFill {
|
||||
uint seg_ref;
|
||||
SegChunkRef seg_ref;
|
||||
int backdrop;
|
||||
uint rgba_color;
|
||||
};
|
||||
|
@ -152,9 +152,10 @@ CmdRef Cmd_index(CmdRef ref, uint index) {
|
|||
struct Segment {
|
||||
vec2 start;
|
||||
vec2 end;
|
||||
float y_edge;
|
||||
};
|
||||
|
||||
#define Segment_size 16
|
||||
#define Segment_size 20
|
||||
|
||||
SegmentRef Segment_index(SegmentRef ref, uint index) {
|
||||
return SegmentRef(ref.offset + index * Segment_size);
|
||||
|
@ -163,9 +164,10 @@ SegmentRef Segment_index(SegmentRef ref, uint index) {
|
|||
struct SegChunk {
|
||||
uint n;
|
||||
SegChunkRef next;
|
||||
SegmentRef segs;
|
||||
};
|
||||
|
||||
#define SegChunk_size 8
|
||||
#define SegChunk_size 12
|
||||
|
||||
SegChunkRef SegChunk_index(SegChunkRef ref, uint index) {
|
||||
return SegChunkRef(ref.offset + index * SegChunk_size);
|
||||
|
@ -218,7 +220,7 @@ CmdStroke CmdStroke_read(CmdStrokeRef ref) {
|
|||
uint raw1 = ptcl[ix + 1];
|
||||
uint raw2 = ptcl[ix + 2];
|
||||
CmdStroke s;
|
||||
s.seg_ref = raw0;
|
||||
s.seg_ref = SegChunkRef(raw0);
|
||||
s.half_width = uintBitsToFloat(raw1);
|
||||
s.rgba_color = raw2;
|
||||
return s;
|
||||
|
@ -226,7 +228,7 @@ CmdStroke CmdStroke_read(CmdStrokeRef ref) {
|
|||
|
||||
void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = s.seg_ref;
|
||||
ptcl[ix + 0] = s.seg_ref.offset;
|
||||
ptcl[ix + 1] = floatBitsToUint(s.half_width);
|
||||
ptcl[ix + 2] = s.rgba_color;
|
||||
}
|
||||
|
@ -237,7 +239,7 @@ CmdFill CmdFill_read(CmdFillRef ref) {
|
|||
uint raw1 = ptcl[ix + 1];
|
||||
uint raw2 = ptcl[ix + 2];
|
||||
CmdFill s;
|
||||
s.seg_ref = raw0;
|
||||
s.seg_ref = SegChunkRef(raw0);
|
||||
s.backdrop = int(raw1);
|
||||
s.rgba_color = raw2;
|
||||
return s;
|
||||
|
@ -245,7 +247,7 @@ CmdFill CmdFill_read(CmdFillRef ref) {
|
|||
|
||||
void CmdFill_write(CmdFillRef ref, CmdFill s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = s.seg_ref;
|
||||
ptcl[ix + 0] = s.seg_ref.offset;
|
||||
ptcl[ix + 1] = uint(s.backdrop);
|
||||
ptcl[ix + 2] = s.rgba_color;
|
||||
}
|
||||
|
@ -398,9 +400,11 @@ Segment Segment_read(SegmentRef ref) {
|
|||
uint raw1 = ptcl[ix + 1];
|
||||
uint raw2 = ptcl[ix + 2];
|
||||
uint raw3 = ptcl[ix + 3];
|
||||
uint raw4 = ptcl[ix + 4];
|
||||
Segment s;
|
||||
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.y_edge = uintBitsToFloat(raw4);
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -410,15 +414,18 @@ void Segment_write(SegmentRef ref, Segment s) {
|
|||
ptcl[ix + 1] = floatBitsToUint(s.start.y);
|
||||
ptcl[ix + 2] = floatBitsToUint(s.end.x);
|
||||
ptcl[ix + 3] = floatBitsToUint(s.end.y);
|
||||
ptcl[ix + 4] = floatBitsToUint(s.y_edge);
|
||||
}
|
||||
|
||||
SegChunk SegChunk_read(SegChunkRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = ptcl[ix + 0];
|
||||
uint raw1 = ptcl[ix + 1];
|
||||
uint raw2 = ptcl[ix + 2];
|
||||
SegChunk s;
|
||||
s.n = raw0;
|
||||
s.next = SegChunkRef(raw1);
|
||||
s.segs = SegmentRef(raw2);
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -426,5 +433,6 @@ void SegChunk_write(SegChunkRef ref, SegChunk s) {
|
|||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = s.n;
|
||||
ptcl[ix + 1] = s.next.offset;
|
||||
ptcl[ix + 2] = s.segs.offset;
|
||||
}
|
||||
|
||||
|
|
|
@ -46,8 +46,8 @@ pub fn render_scene(rc: &mut impl RenderContext) {
|
|||
let circle = Circle::new(center, radius);
|
||||
rc.fill(circle, &color);
|
||||
}
|
||||
let mut path = BezPath::new();
|
||||
/*
|
||||
let mut path = BezPath::new();
|
||||
path.move_to((100.0, 1150.0));
|
||||
path.line_to((200.0, 1200.0));
|
||||
path.line_to((150.0, 1250.0));
|
||||
|
|
Loading…
Reference in a new issue