Delete old-style kernels and buffers

Pave the way for the coarse raster pass to write to the ptcl buffer.
This commit is contained in:
Raph Levien 2020-05-15 15:20:25 -07:00
parent 3a6428238b
commit 1240da3870
20 changed files with 91 additions and 965 deletions

View file

@ -1,37 +0,0 @@
use piet_gpu_derive::piet_gpu;
// Structures representing segments for fill items.
// There is some cut'n'paste here from stroke segments, which can be
// traced to the fact that buffers in GLSL are basically global.
// Maybe there's a way to address that, but in the meantime living
// with the duplication is easiest.
piet_gpu! {
#[gpu_write]
mod fill_seg {
struct FillTileHeader {
n: u32,
items: Ref<FillItemHeader>,
}
struct FillItemHeader {
backdrop: i32,
segments: Ref<FillSegChunk>,
}
// TODO: strongly consider using f16. If so, these would be
// relative to the tile. We're doing f32 for now to minimize
// divergence from piet-metal originals.
struct FillSegment {
start: [f32; 2],
end: [f32; 2],
}
struct FillSegChunk {
n: u32,
next: Ref<FillSegChunk>,
// Segments follow (could represent this as a variable sized array).
}
}
}

View file

@ -3,10 +3,8 @@
pub mod annotated; pub mod annotated;
pub mod bins; pub mod bins;
pub mod encoder; pub mod encoder;
pub mod fill_seg;
pub mod ptcl; pub mod ptcl;
pub mod scene; pub mod scene;
pub mod segment;
pub mod state; pub mod state;
pub mod test; pub mod test;
pub mod tilegroup; pub mod tilegroup;

View file

@ -9,8 +9,6 @@ fn main() {
"annotated" => print!("{}", piet_gpu_types::annotated::gen_gpu_annotated()), "annotated" => print!("{}", piet_gpu_types::annotated::gen_gpu_annotated()),
"bins" => print!("{}", piet_gpu_types::bins::gen_gpu_bins()), "bins" => print!("{}", piet_gpu_types::bins::gen_gpu_bins()),
"tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()), "tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()),
"segment" => print!("{}", piet_gpu_types::segment::gen_gpu_segment()),
"fill_seg" => print!("{}", piet_gpu_types::fill_seg::gen_gpu_fill_seg()),
"ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()), "ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()),
"test" => print!("{}", piet_gpu_types::test::gen_gpu_test()), "test" => print!("{}", piet_gpu_types::test::gen_gpu_test()),
_ => println!("Oops, unknown module name"), _ => println!("Oops, unknown module name"),

View file

@ -13,13 +13,13 @@ piet_gpu! {
end: [f32; 2], end: [f32; 2],
} }
struct CmdStroke { struct CmdStroke {
// Should be Ref<SegChunk> if we had cross-module references. // Should be Ref<SegChunk>
seg_ref: u32, seg_ref: u32,
half_width: f32, half_width: f32,
rgba_color: u32, rgba_color: u32,
} }
struct CmdFill { struct CmdFill {
// Should be Ref<FillSegChunk> if we had cross-module references. // Should be Ref<FillSegChunk>
seg_ref: u32, seg_ref: u32,
backdrop: i32, backdrop: i32,
rgba_color: u32, rgba_color: u32,
@ -51,5 +51,19 @@ piet_gpu! {
Jump(CmdJump), Jump(CmdJump),
Bail, Bail,
} }
// TODO: strongly consider using f16. If so, these would be
// relative to the tile. We're doing f32 for now to minimize
// divergence from piet-metal originals.
struct Segment {
start: [f32; 2],
end: [f32; 2],
}
struct SegChunk {
n: u32,
next: Ref<SegChunk>,
// Segments follow (could represent this as a variable sized array).
}
} }
} }

View file

@ -1,32 +0,0 @@
use piet_gpu_derive::piet_gpu;
// Structures representing segments for stroke/fill items.
piet_gpu! {
#[gpu_write]
mod segment {
struct TileHeader {
n: u32,
items: Ref<ItemHeader>,
}
// Note: this is only suitable for strokes, fills require backdrop.
struct ItemHeader {
segments: Ref<SegChunk>,
}
// TODO: strongly consider using f16. If so, these would be
// relative to the tile. We're doing f32 for now to minimize
// divergence from piet-metal originals.
struct Segment {
start: [f32; 2],
end: [f32; 2],
}
struct SegChunk {
n: u32,
next: Ref<SegChunk>,
// Segments follow (could represent this as a variable sized array).
}
}
}

View file

@ -9,19 +9,11 @@ rule glsl
build image.spv: glsl image.comp | scene.h build image.spv: glsl image.comp | scene.h
build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h setup.h
build kernel2s.spv: glsl kernel2s.comp | scene.h tilegroup.h segment.h setup.h
build kernel2f.spv: glsl kernel2f.comp | scene.h tilegroup.h fill_seg.h setup.h
build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h segment.h fill_seg.h ptcl.h setup.h
build kernel4.spv: glsl kernel4.comp | ptcl.h segment.h fill_seg.h setup.h
build elements.spv: glsl elements.comp | scene.h state.h annotated.h build elements.spv: glsl elements.comp | scene.h state.h annotated.h
build binning.spv: glsl binning.comp | annotated.h bins.h setup.h build binning.spv: glsl binning.comp | annotated.h bins.h setup.h
build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h

View file

@ -1,130 +0,0 @@
// Code auto-generated by piet-gpu-derive
struct FillTileHeaderRef {
uint offset;
};
struct FillItemHeaderRef {
uint offset;
};
struct FillSegmentRef {
uint offset;
};
struct FillSegChunkRef {
uint offset;
};
struct FillTileHeader {
uint n;
FillItemHeaderRef items;
};
#define FillTileHeader_size 8
FillTileHeaderRef FillTileHeader_index(FillTileHeaderRef ref, uint index) {
return FillTileHeaderRef(ref.offset + index * FillTileHeader_size);
}
struct FillItemHeader {
int backdrop;
FillSegChunkRef segments;
};
#define FillItemHeader_size 8
FillItemHeaderRef FillItemHeader_index(FillItemHeaderRef ref, uint index) {
return FillItemHeaderRef(ref.offset + index * FillItemHeader_size);
}
struct FillSegment {
vec2 start;
vec2 end;
};
#define FillSegment_size 16
FillSegmentRef FillSegment_index(FillSegmentRef ref, uint index) {
return FillSegmentRef(ref.offset + index * FillSegment_size);
}
struct FillSegChunk {
uint n;
FillSegChunkRef next;
};
#define FillSegChunk_size 8
FillSegChunkRef FillSegChunk_index(FillSegChunkRef ref, uint index) {
return FillSegChunkRef(ref.offset + index * FillSegChunk_size);
}
FillTileHeader FillTileHeader_read(FillTileHeaderRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = fill_seg[ix + 0];
uint raw1 = fill_seg[ix + 1];
FillTileHeader s;
s.n = raw0;
s.items = FillItemHeaderRef(raw1);
return s;
}
void FillTileHeader_write(FillTileHeaderRef ref, FillTileHeader s) {
uint ix = ref.offset >> 2;
fill_seg[ix + 0] = s.n;
fill_seg[ix + 1] = s.items.offset;
}
FillItemHeader FillItemHeader_read(FillItemHeaderRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = fill_seg[ix + 0];
uint raw1 = fill_seg[ix + 1];
FillItemHeader s;
s.backdrop = int(raw0);
s.segments = FillSegChunkRef(raw1);
return s;
}
void FillItemHeader_write(FillItemHeaderRef ref, FillItemHeader s) {
uint ix = ref.offset >> 2;
fill_seg[ix + 0] = uint(s.backdrop);
fill_seg[ix + 1] = s.segments.offset;
}
FillSegment FillSegment_read(FillSegmentRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = fill_seg[ix + 0];
uint raw1 = fill_seg[ix + 1];
uint raw2 = fill_seg[ix + 2];
uint raw3 = fill_seg[ix + 3];
FillSegment s;
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
return s;
}
void FillSegment_write(FillSegmentRef ref, FillSegment s) {
uint ix = ref.offset >> 2;
fill_seg[ix + 0] = floatBitsToUint(s.start.x);
fill_seg[ix + 1] = floatBitsToUint(s.start.y);
fill_seg[ix + 2] = floatBitsToUint(s.end.x);
fill_seg[ix + 3] = floatBitsToUint(s.end.y);
}
FillSegChunk FillSegChunk_read(FillSegChunkRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = fill_seg[ix + 0];
uint raw1 = fill_seg[ix + 1];
FillSegChunk s;
s.n = raw0;
s.next = FillSegChunkRef(raw1);
return s;
}
void FillSegChunk_write(FillSegChunkRef ref, FillSegChunk s) {
uint ix = ref.offset >> 2;
fill_seg[ix + 0] = s.n;
fill_seg[ix + 1] = s.next.offset;
}

View file

@ -1,161 +0,0 @@
// This is "kernel 1" in a 4-kernel pipeline. It traverses the scene graph
// and outputs "instances" (references to item + translation) for each item
// that intersects the tilegroup.
//
// This implementation is simplistic and leaves a lot of performance on the
// table. A fancier implementation would use threadgroup shared memory or
// subgroups (or possibly both) to parallelize the reading of the input and
// the computation of tilegroup intersection.
//
// In addition, there are some features currently missing, such as support
// for clipping.
#version 450
#extension GL_GOOGLE_include_directive : enable
// It's possible we should lay this out with x and do our own math.
layout(local_size_x = 1, local_size_y = 32) in;
layout(set = 0, binding = 0) readonly buffer SceneBuf {
uint[] scene;
};
layout(set = 0, binding = 1) buffer TilegroupBuf {
uint[] tilegroup;
};
layout(set = 0, binding = 2) buffer AllocBuf {
uint alloc;
};
#include "scene.h"
#include "tilegroup.h"
#include "setup.h"
#define MAX_STACK 8
struct StackElement {
PietItemRef group;
uint index;
vec2 offset;
};
void main() {
StackElement stack[MAX_STACK];
uint stack_ix = 0;
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x;
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE);
uint tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size;
// State for stroke references.
TileGroupRef stroke_start = TileGroupRef(tg_ref.offset + TILEGROUP_STROKE_START);
ChunkRef stroke_chunk_start = ChunkRef(stroke_start.offset + 4);
InstanceRef stroke_ref = InstanceRef(stroke_chunk_start.offset + Chunk_size);
uint stroke_limit = stroke_start.offset + TILEGROUP_INITIAL_STROKE_ALLOC - Instance_size;
uint stroke_chunk_n = 0;
uint stroke_n = 0;
// State for fill references. All this is a bit cut'n'paste, but making a
// proper abstraction isn't easy.
TileGroupRef fill_start = TileGroupRef(tg_ref.offset + TILEGROUP_FILL_START);
ChunkRef fill_chunk_start = ChunkRef(fill_start.offset + 4);
InstanceRef fill_ref = InstanceRef(fill_chunk_start.offset + Chunk_size);
uint fill_limit = fill_start.offset + TILEGROUP_INITIAL_FILL_ALLOC - Instance_size;
uint fill_chunk_n = 0;
uint fill_n = 0;
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX);
PietItemRef root = PietItemRef(0);
SimpleGroup group = PietItem_Group_read(root);
StackElement tos = StackElement(root, 0, group.offset.xy);
while (true) {
if (tos.index < group.n_items) {
Bbox bbox = Bbox_read(Bbox_index(group.bboxes, tos.index));
vec4 bb = vec4(bbox.bbox) + tos.offset.xyxy;
bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH_PX))
&& max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT_PX));
bool is_group = false;
uint tag;
if (hit) {
PietItemRef item_ref = PietItem_index(group.items, tos.index);
tag = PietItem_tag(item_ref);
is_group = tag == PietItem_Group;
}
if (hit && !is_group) {
PietItemRef item_ref = PietItem_index(group.items, tos.index);
Instance ins = Instance(item_ref.offset, tos.offset);
if (tg_ref.offset > tg_limit) {
// Allocation exceeded; do atomic bump alloc.
uint new_tg = atomicAdd(alloc, TILEGROUP_INITIAL_ALLOC);
Jump jump = Jump(TileGroupRef(new_tg));
TileGroup_Jump_write(tg_ref, jump);
tg_ref = TileGroupRef(new_tg);
tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size;
}
TileGroup_Instance_write(tg_ref, ins);
tg_ref.offset += TileGroup_size;
if (tag == PietItem_Poly) {
if (stroke_ref.offset > stroke_limit) {
uint new_stroke = atomicAdd(alloc, TILEGROUP_STROKE_ALLOC);
Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(new_stroke)));
stroke_chunk_start = ChunkRef(new_stroke);
stroke_ref = InstanceRef(new_stroke + Chunk_size);
stroke_n += stroke_chunk_n;
stroke_chunk_n = 0;
stroke_limit = new_stroke + TILEGROUP_STROKE_ALLOC - Instance_size;
}
Instance_write(stroke_ref, ins);
stroke_chunk_n++;
stroke_ref.offset += Instance_size;
} else if (tag == PietItem_Fill) {
if (fill_ref.offset > fill_limit) {
uint new_fill = atomicAdd(alloc, TILEGROUP_FILL_ALLOC);
Chunk_write(fill_chunk_start, Chunk(fill_chunk_n, ChunkRef(new_fill)));
fill_chunk_start = ChunkRef(new_fill);
fill_ref = InstanceRef(new_fill + Chunk_size);
fill_n += fill_chunk_n;
fill_chunk_n = 0;
fill_limit = new_fill + TILEGROUP_FILL_ALLOC - Instance_size;
}
Instance_write(fill_ref, ins);
fill_chunk_n++;
fill_ref.offset += Instance_size;
}
}
if (is_group) {
PietItemRef item_ref = PietItem_index(group.items, tos.index);
tos.index++;
if (tos.index < group.n_items) {
stack[stack_ix++] = tos;
}
group = PietItem_Group_read(item_ref);
tos = StackElement(item_ref, 0, tos.offset + group.offset.xy);
} else {
tos.index++;
}
} else {
// processed all items in this group; pop the stack
if (stack_ix == 0) {
break;
}
tos = stack[--stack_ix];
group = PietItem_Group_read(tos.group);
}
}
TileGroup_End_write(tg_ref);
stroke_n += stroke_chunk_n;
if (stroke_n > 0) {
Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(0)));
}
tilegroup[stroke_start.offset >> 2] = stroke_n;
fill_n += fill_chunk_n;
if (fill_n > 0) {
Chunk_write(fill_chunk_start, Chunk(fill_chunk_n, ChunkRef(0)));
}
tilegroup[fill_start.offset >> 2] = fill_n;
}

Binary file not shown.

View file

@ -1,167 +0,0 @@
// This is "kernel 2" (fill) in a 4-kernel pipeline. It processes the fill
// (polyline) items in the scene and generates a list of segments for each, for
// each tile.
#version 450
#extension GL_GOOGLE_include_directive : enable
layout(local_size_x = 32) in;
layout(set = 0, binding = 0) readonly buffer SceneBuf {
uint[] scene;
};
layout(set = 0, binding = 1) buffer TilegroupBuf {
uint[] tilegroup;
};
layout(set = 0, binding = 2) buffer FillSegBuf {
uint[] fill_seg;
};
layout(set = 0, binding = 3) buffer AllocBuf {
uint alloc;
};
#include "scene.h"
#include "tilegroup.h"
#include "fill_seg.h"
#include "setup.h"
// Ensure that there is space to encode a segment.
void alloc_chunk(inout uint chunk_n_segs, inout FillSegChunkRef seg_chunk_ref,
inout FillSegChunkRef first_seg_chunk, inout uint seg_limit)
{
if (chunk_n_segs == 0) {
if (seg_chunk_ref.offset + 40 > seg_limit) {
seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC);
seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - FillSegment_size;
}
first_seg_chunk = seg_chunk_ref;
} else if (seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs > seg_limit) {
uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC);
seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - FillSegment_size;
FillSegChunk_write(seg_chunk_ref, FillSegChunk(chunk_n_segs, FillSegChunkRef(new_chunk_ref)));
seg_chunk_ref.offset = new_chunk_ref;
chunk_n_segs = 0;
}
}
void main() {
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
+ (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
TileGroupRef fill_start = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE + TILEGROUP_FILL_START);
uint fill_n = tilegroup[fill_start.offset >> 2];
FillTileHeaderRef tile_header_ref = FillTileHeaderRef(tile_ix * FillTileHeader_size);
if (fill_n > 0) {
ChunkRef chunk_ref = ChunkRef(fill_start.offset + 4);
Chunk chunk = Chunk_read(chunk_ref);
InstanceRef fill_ref = InstanceRef(chunk_ref.offset + Chunk_size);
FillItemHeaderRef item_header = FillItemHeaderRef(atomicAdd(alloc, fill_n * FillItemHeader_size));
FillTileHeader_write(tile_header_ref, FillTileHeader(fill_n, item_header));
FillSegChunkRef seg_chunk_ref = FillSegChunkRef(0);
uint seg_limit = 0;
// Iterate through items; fill_n holds count remaining.
while (true) {
if (chunk.chunk_n == 0) {
chunk_ref = chunk.next;
if (chunk_ref.offset == 0) {
break;
}
chunk = Chunk_read(chunk_ref);
fill_ref = InstanceRef(chunk_ref.offset + Chunk_size);
}
Instance ins = Instance_read(fill_ref);
PietFill fill = PietItem_Fill_read(PietItemRef(ins.item_ref));
// Process the fill polyline item.
uint max_n_segs = fill.n_points - 1;
uint chunk_n_segs = 0;
int backdrop = 0;
FillSegChunkRef seg_chunk_ref;
FillSegChunkRef first_seg_chunk = FillSegChunkRef(0);
vec2 start = Point_read(fill.points).xy;
for (uint j = 0; j < max_n_segs; j++) {
fill.points.offset += Point_size;
vec2 end = Point_read(fill.points).xy;
// Process one segment.
// TODO: I think this would go more smoothly (and be easier to
// make numerically robust) if it were based on clipping the line
// to the tile box. See:
// https://tavianator.com/fast-branchless-raybounding-box-intersections/
vec2 xymin = min(start, end);
vec2 xymax = max(start, end);
float a = end.y - start.y;
float b = start.x - end.x;
float c = -(a * start.x + b * start.y);
vec2 xy1 = xy0 + vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
float ytop = max(xy0.y, xymin.y);
float ybot = min(xy1.y, xymax.y);
float s00 = sign(b * ytop + a * xy0.x + c);
float s01 = sign(b * ytop + a * xy1.x + c);
float s10 = sign(b * ybot + a * xy0.x + c);
float s11 = sign(b * ybot + a * xy1.x + c);
float sTopLeft = sign(b * xy0.y + a * xy0.x + c);
if (sTopLeft == sign(a) && xymin.y <= xy0.y && xymax.y > xy0.y) {
backdrop -= int(s00);
}
// This is adapted from piet-metal but could be improved.
if (max(xymin.x, xy0.x) < min(xymax.x, xy1.x)
&& ytop < ybot
&& s00 * s01 + s00 * s10 + s00 * s11 < 3.0)
{
// avoid overwriting `end` so that it can be used as start
vec2 enc_end = end;
if (xymin.x < xy0.x) {
float yEdge = mix(start.y, end.y, (start.x - xy0.x) / b);
if (yEdge >= xy0.y && yEdge < xy1.y) {
// This is encoded the same as a general fill segment, but could be
// special-cased, either here or in rendering. (It was special-cased
// in piet-metal).
FillSegment edge_seg;
if (b > 0.0) {
enc_end = vec2(xy0.x, yEdge);
edge_seg.start = enc_end;
edge_seg.end = vec2(xy0.x, xy1.y);
} else {
start = vec2(xy0.x, yEdge);
edge_seg.start = vec2(xy0.x, xy1.y);
edge_seg.end = start;
}
alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit);
FillSegment_write(FillSegmentRef(seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs), edge_seg);
chunk_n_segs++;
}
}
alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit);
FillSegment seg = FillSegment(start, enc_end);
FillSegment_write(FillSegmentRef(seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs), seg);
chunk_n_segs++;
}
start = end;
}
FillItemHeader_write(item_header, FillItemHeader(backdrop, first_seg_chunk));
if (chunk_n_segs != 0) {
FillSegChunk_write(seg_chunk_ref, FillSegChunk(chunk_n_segs, FillSegChunkRef(0)));
seg_chunk_ref.offset += FillSegChunk_size + FillSegment_size * chunk_n_segs;
}
fill_ref.offset += Instance_size;
chunk.chunk_n--;
item_header.offset += FillItemHeader_size;
}
} else {
// As an optimization, we could just write 0 for the size.
FillTileHeader_write(tile_header_ref, FillTileHeader(fill_n, FillItemHeaderRef(0)));
}
}

Binary file not shown.

View file

@ -1,137 +0,0 @@
// This is "kernel 2" (strokes) in a 4-kernel pipeline. It processes the stroke
// (polyline) items in the scene and generates a list of segments for each, for
// each tile.
#version 450
#extension GL_GOOGLE_include_directive : enable
layout(local_size_x = 32) in;
layout(set = 0, binding = 0) readonly buffer SceneBuf {
uint[] scene;
};
layout(set = 0, binding = 1) buffer TilegroupBuf {
uint[] tilegroup;
};
layout(set = 0, binding = 2) buffer SegmentBuf {
uint[] segment;
};
layout(set = 0, binding = 3) buffer AllocBuf {
uint alloc;
};
#include "scene.h"
#include "tilegroup.h"
#include "segment.h"
#include "setup.h"
void main() {
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
+ (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
TileGroupRef stroke_start = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE + TILEGROUP_STROKE_START);
uint stroke_n = tilegroup[stroke_start.offset >> 2];
TileHeaderRef tile_header_ref = TileHeaderRef(tile_ix * TileHeader_size);
if (stroke_n > 0) {
ChunkRef chunk_ref = ChunkRef(stroke_start.offset + 4);
Chunk chunk = Chunk_read(chunk_ref);
InstanceRef stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
ItemHeaderRef item_header = ItemHeaderRef(atomicAdd(alloc, stroke_n * ItemHeader_size));
TileHeader_write(tile_header_ref, TileHeader(stroke_n, item_header));
SegChunkRef seg_chunk_ref = SegChunkRef(0);
uint seg_limit = 0;
// Iterate through items; stroke_n holds count remaining.
while (true) {
if (chunk.chunk_n == 0) {
chunk_ref = chunk.next;
if (chunk_ref.offset == 0) {
break;
}
chunk = Chunk_read(chunk_ref);
stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
}
Instance ins = Instance_read(stroke_ref);
PietStrokePolyLine poly = PietItem_Poly_read(PietItemRef(ins.item_ref));
// Process the stroke polyline item.
uint max_n_segs = poly.n_points - 1;
uint chunk_n_segs = 0;
SegChunkRef seg_chunk_ref;
vec2 start = Point_read(poly.points).xy;
for (uint j = 0; j < max_n_segs; j++) {
poly.points.offset += Point_size;
vec2 end = Point_read(poly.points).xy;
// Process one segment.
// This logic just tests for collision. What we probably want to do
// is a clipping algorithm like Liang-Barsky, and then store coords
// relative to the tile in f16. See also:
// https://tavianator.com/fast-branchless-raybounding-box-intersections/
// Also note that when we go to the fancy version, we want to compute
// the (horizontal projection of) the bounding box of the intersection
// once per tilegroup, so we can assign work to individual tiles.
float a = end.y - start.y;
float b = start.x - end.x;
float c = -(a * start.x + b * start.y);
float half_width = 0.5 * poly.width;
// Tile boundaries padded by half-width.
float xmin = xy0.x - half_width;
float ymin = xy0.y - half_width;
float xmax = xy0.x + float(TILE_WIDTH_PX) + half_width;
float ymax = xy0.y + float(TILE_HEIGHT_PX) + half_width;
float s00 = sign(b * ymin + a * xmin + c);
float s01 = sign(b * ymin + a * xmax + c);
float s10 = sign(b * ymax + a * xmin + c);
float s11 = sign(b * ymax + a * xmax + c);
// If bounding boxes intersect and not all four corners are on the same side, hit.
// Also note: this is designed to be false on NAN input.
if (max(min(start.x, end.x), xmin) < min(max(start.x, end.x), xmax)
&& max(min(start.y, end.y), ymin) < min(max(start.y, end.y), ymax)
&& s00 * s01 + s00 * s10 + s00 * s11 < 3.0)
{
// Allocate a chunk if needed.
if (chunk_n_segs == 0) {
if (seg_chunk_ref.offset + 40 > seg_limit) {
seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC);
seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - Segment_size;
}
ItemHeader_write(item_header, ItemHeader(seg_chunk_ref));
} else if (seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs > seg_limit) {
uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC);
seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - Segment_size;
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(new_chunk_ref)));
seg_chunk_ref.offset = new_chunk_ref;
chunk_n_segs = 0;
}
Segment seg = Segment(start, end);
Segment_write(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs), seg);
chunk_n_segs++;
}
start = end;
}
if (chunk_n_segs == 0) {
ItemHeader_write(item_header, ItemHeader(SegChunkRef(0)));
} else {
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(0)));
seg_chunk_ref.offset += SegChunk_size + Segment_size * chunk_n_segs;
}
stroke_ref.offset += Instance_size;
chunk.chunk_n--;
item_header.offset += ItemHeader_size;
}
} else {
// As an optimization, we could just write 0 for the size.
TileHeader_write(tile_header_ref, TileHeader(stroke_n, ItemHeaderRef(0)));
}
}

Binary file not shown.

View file

@ -1,135 +0,0 @@
// This is "kernel 3" in a 4-kernel pipeline. It walks the active items
// for the tilegroup and produces a per-tile command list for each tile.
#version 450
#extension GL_GOOGLE_include_directive : enable
layout(local_size_x = 32, local_size_y = 1) in;
layout(set = 0, binding = 0) readonly buffer SceneBuf {
uint[] scene;
};
// TODO: this should have a `readonly` qualifier, but then inclusion
// of ptcl.h would fail because of the writers.
layout(set = 0, binding = 1) buffer TilegroupBuf {
uint[] tilegroup;
};
// Used readonly
layout(set = 0, binding = 2) buffer SegmentBuf {
uint[] segment;
};
// Used readonly
layout(set = 0, binding = 3) buffer FillSegmentBuf {
uint[] fill_seg;
};
layout(set = 0, binding = 4) buffer PtclBuf {
uint[] ptcl;
};
layout(set = 0, binding = 5) buffer AllocBuf {
uint alloc;
};
#include "scene.h"
#include "tilegroup.h"
#include "segment.h"
#include "fill_seg.h"
#include "ptcl.h"
#include "setup.h"
void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) {
if (cmd_ref.offset > cmd_limit) {
uint new_cmd = atomicAdd(alloc, PTCL_INITIAL_ALLOC);
CmdJump jump = CmdJump(new_cmd);
Cmd_Jump_write(cmd_ref, jump);
cmd_ref = CmdRef(new_cmd);
cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
}
}
void main() {
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
+ (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE);
CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC);
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
TileHeader stroke_th = TileHeader_read(TileHeaderRef(tile_ix * TileHeader_size));
FillTileHeader fill_th = FillTileHeader_read(FillTileHeaderRef(tile_ix * FillTileHeader_size));
while (true) {
uint tg_tag = TileGroup_tag(tg_ref);
if (tg_tag == TileGroup_End) {
break;
}
if (tg_tag == TileGroup_Jump) {
tg_ref = TileGroup_Jump_read(tg_ref).new_ref;
continue;
}
// Assume tg_tag is `Instance`, though there will be more cases.
Instance ins = TileGroup_Instance_read(tg_ref);
PietItemRef item_ref = PietItemRef(ins.item_ref);
uint item_tag = PietItem_tag(item_ref);
switch (item_tag) {
case PietItem_Circle:
PietCircle circle = PietItem_Circle_read(item_ref);
vec2 center = ins.offset + circle.center.xy;
float r = circle.radius;
if (max(center.x - r, xy0.x) < min(center.x + r, xy0.x + float(TILE_WIDTH_PX))
&& max(center.y - r, xy0.y) < min(center.y + r, xy0.y + float(TILE_HEIGHT_PX)))
{
CmdCircle cmd = CmdCircle(center, r, circle.rgba_color);
alloc_cmd(cmd_ref, cmd_limit);
Cmd_Circle_write(cmd_ref, cmd);
cmd_ref.offset += Cmd_size;
}
break;
case PietItem_Poly:
ItemHeader stroke_item = ItemHeader_read(stroke_th.items);
stroke_th.items.offset += ItemHeader_size;
if (stroke_item.segments.offset != 0) {
PietStrokePolyLine poly = PietItem_Poly_read(item_ref);
CmdStroke cmd = CmdStroke(
stroke_item.segments.offset,
0.5 * poly.width,
poly.rgba_color
);
alloc_cmd(cmd_ref, cmd_limit);
Cmd_Stroke_write(cmd_ref, cmd);
cmd_ref.offset += Cmd_size;
}
break;
case PietItem_Fill:
FillItemHeader fill_item = FillItemHeader_read(fill_th.items);
fill_th.items.offset += FillItemHeader_size;
// TODO: handle segments == 0 but backdrop != specially, it's a solid tile.
if (fill_item.segments.offset != 0) {
PietFill fill = PietItem_Fill_read(item_ref);
CmdFill cmd = CmdFill(
fill_item.segments.offset,
fill_item.backdrop,
fill.rgba_color
);
alloc_cmd(cmd_ref, cmd_limit);
Cmd_Fill_write(cmd_ref, cmd);
cmd_ref.offset += Cmd_size;
} else if (fill_item.backdrop != 0) {
// TODO: truncate existing cmd list if alpha is opaque
PietFill fill = PietItem_Fill_read(item_ref);
alloc_cmd(cmd_ref, cmd_limit);
Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
cmd_ref.offset += Cmd_size;
}
break;
}
tg_ref.offset += TileGroup_size;
}
Cmd_End_write(cmd_ref);
}

Binary file not shown.

View file

@ -9,26 +9,14 @@
layout(local_size_x = 16, local_size_y = 16) in; layout(local_size_x = 16, local_size_y = 16) in;
// Same concern that this should be readonly as in kernel 3. // This should be annotated readonly but infra doesn't support that yet.
layout(set = 0, binding = 0) buffer PtclBuf { layout(set = 0, binding = 0) buffer PtclBuf {
uint[] ptcl; uint[] ptcl;
}; };
// Used readonly layout(rgba8, set = 0, binding = 1) uniform writeonly image2D image;
layout(set = 0, binding = 1) buffer SegmentBuf {
uint[] segment;
};
// Used readonly
layout(set = 0, binding = 2) buffer FillSegBuf {
uint[] fill_seg;
};
layout(rgba8, set = 0, binding = 3) uniform writeonly image2D image;
#include "ptcl.h" #include "ptcl.h"
#include "segment.h"
#include "fill_seg.h"
#include "setup.h" #include "setup.h"
@ -79,11 +67,11 @@ void main() {
CmdFill fill = Cmd_Fill_read(cmd_ref); CmdFill fill = Cmd_Fill_read(cmd_ref);
// Probably better to store as float, but conversion is no doubt cheap. // Probably better to store as float, but conversion is no doubt cheap.
float area = float(fill.backdrop); float area = float(fill.backdrop);
FillSegChunkRef fill_seg_chunk_ref = FillSegChunkRef(fill.seg_ref); SegChunkRef fill_seg_chunk_ref = SegChunkRef(fill.seg_ref);
do { do {
FillSegChunk seg_chunk = FillSegChunk_read(fill_seg_chunk_ref); SegChunk seg_chunk = SegChunk_read(fill_seg_chunk_ref);
for (int i = 0; i < seg_chunk.n; i++) { for (int i = 0; i < seg_chunk.n; i++) {
FillSegment seg = FillSegment_read(FillSegmentRef(fill_seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * i)); Segment seg = Segment_read(SegmentRef(fill_seg_chunk_ref.offset + SegChunk_size + Segment_size * i));
vec2 start = seg.start - xy; vec2 start = seg.start - xy;
vec2 end = seg.end - xy; vec2 end = seg.end - xy;
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0); vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);

Binary file not shown.

View file

@ -36,6 +36,14 @@ struct CmdRef {
uint offset; uint offset;
}; };
struct SegmentRef {
uint offset;
};
struct SegChunkRef {
uint offset;
};
struct CmdCircle { struct CmdCircle {
vec2 center; vec2 center;
float radius; float radius;
@ -141,6 +149,28 @@ CmdRef Cmd_index(CmdRef ref, uint index) {
return CmdRef(ref.offset + index * Cmd_size); return CmdRef(ref.offset + index * Cmd_size);
} }
struct Segment {
vec2 start;
vec2 end;
};
#define Segment_size 16
SegmentRef Segment_index(SegmentRef ref, uint index) {
return SegmentRef(ref.offset + index * Segment_size);
}
struct SegChunk {
uint n;
SegChunkRef next;
};
#define SegChunk_size 8
SegChunkRef SegChunk_index(SegChunkRef ref, uint index) {
return SegChunkRef(ref.offset + index * SegChunk_size);
}
CmdCircle CmdCircle_read(CmdCircleRef ref) { CmdCircle CmdCircle_read(CmdCircleRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = ptcl[ix + 0]; uint raw0 = ptcl[ix + 0];
@ -362,3 +392,39 @@ void Cmd_Bail_write(CmdRef ref) {
ptcl[ref.offset >> 2] = Cmd_Bail; ptcl[ref.offset >> 2] = Cmd_Bail;
} }
Segment Segment_read(SegmentRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = ptcl[ix + 0];
uint raw1 = ptcl[ix + 1];
uint raw2 = ptcl[ix + 2];
uint raw3 = ptcl[ix + 3];
Segment s;
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
return s;
}
void Segment_write(SegmentRef ref, Segment s) {
uint ix = ref.offset >> 2;
ptcl[ix + 0] = floatBitsToUint(s.start.x);
ptcl[ix + 1] = floatBitsToUint(s.start.y);
ptcl[ix + 2] = floatBitsToUint(s.end.x);
ptcl[ix + 3] = floatBitsToUint(s.end.y);
}
SegChunk SegChunk_read(SegChunkRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = ptcl[ix + 0];
uint raw1 = ptcl[ix + 1];
SegChunk s;
s.n = raw0;
s.next = SegChunkRef(raw1);
return s;
}
void SegChunk_write(SegChunkRef ref, SegChunk s) {
uint ix = ref.offset >> 2;
ptcl[ix + 0] = s.n;
ptcl[ix + 1] = s.next.offset;
}

View file

@ -1,126 +0,0 @@
// Code auto-generated by piet-gpu-derive
struct TileHeaderRef {
uint offset;
};
struct ItemHeaderRef {
uint offset;
};
struct SegmentRef {
uint offset;
};
struct SegChunkRef {
uint offset;
};
struct TileHeader {
uint n;
ItemHeaderRef items;
};
#define TileHeader_size 8
TileHeaderRef TileHeader_index(TileHeaderRef ref, uint index) {
return TileHeaderRef(ref.offset + index * TileHeader_size);
}
struct ItemHeader {
SegChunkRef segments;
};
#define ItemHeader_size 4
ItemHeaderRef ItemHeader_index(ItemHeaderRef ref, uint index) {
return ItemHeaderRef(ref.offset + index * ItemHeader_size);
}
struct Segment {
vec2 start;
vec2 end;
};
#define Segment_size 16
SegmentRef Segment_index(SegmentRef ref, uint index) {
return SegmentRef(ref.offset + index * Segment_size);
}
struct SegChunk {
uint n;
SegChunkRef next;
};
#define SegChunk_size 8
SegChunkRef SegChunk_index(SegChunkRef ref, uint index) {
return SegChunkRef(ref.offset + index * SegChunk_size);
}
TileHeader TileHeader_read(TileHeaderRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = segment[ix + 0];
uint raw1 = segment[ix + 1];
TileHeader s;
s.n = raw0;
s.items = ItemHeaderRef(raw1);
return s;
}
void TileHeader_write(TileHeaderRef ref, TileHeader s) {
uint ix = ref.offset >> 2;
segment[ix + 0] = s.n;
segment[ix + 1] = s.items.offset;
}
ItemHeader ItemHeader_read(ItemHeaderRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = segment[ix + 0];
ItemHeader s;
s.segments = SegChunkRef(raw0);
return s;
}
void ItemHeader_write(ItemHeaderRef ref, ItemHeader s) {
uint ix = ref.offset >> 2;
segment[ix + 0] = s.segments.offset;
}
Segment Segment_read(SegmentRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = segment[ix + 0];
uint raw1 = segment[ix + 1];
uint raw2 = segment[ix + 2];
uint raw3 = segment[ix + 3];
Segment s;
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
return s;
}
void Segment_write(SegmentRef ref, Segment s) {
uint ix = ref.offset >> 2;
segment[ix + 0] = floatBitsToUint(s.start.x);
segment[ix + 1] = floatBitsToUint(s.start.y);
segment[ix + 2] = floatBitsToUint(s.end.x);
segment[ix + 3] = floatBitsToUint(s.end.y);
}
SegChunk SegChunk_read(SegChunkRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = segment[ix + 0];
uint raw1 = segment[ix + 1];
SegChunk s;
s.n = raw0;
s.next = SegChunkRef(raw1);
return s;
}
void SegChunk_write(SegChunkRef ref, SegChunk s) {
uint ix = ref.offset >> 2;
segment[ix + 0] = s.n;
segment[ix + 1] = s.next.offset;
}

View file

@ -209,16 +209,11 @@ impl<D: Device> Renderer<D> {
&[], &[],
)?; )?;
// These will probably be combined with the ptcl buf, as they're all written by the
// same kernel now.
let segment_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
let fill_seg_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
let k4_code = include_bytes!("../shader/kernel4.spv"); let k4_code = include_bytes!("../shader/kernel4.spv");
let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 3, 1)?; let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 1, 1)?;
let k4_ds = device.create_descriptor_set( let k4_ds = device.create_descriptor_set(
&k4_pipeline, &k4_pipeline,
&[&ptcl_buf, &segment_buf, &fill_seg_buf], &[&ptcl_buf],
&[&image_dev], &[&image_dev],
)?; )?;