mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 20:51:29 +11:00
Delete old-style kernels and buffers
Pave the way for the coarse raster pass to write to the ptcl buffer.
This commit is contained in:
parent
3a6428238b
commit
1240da3870
|
@ -1,37 +0,0 @@
|
||||||
use piet_gpu_derive::piet_gpu;
|
|
||||||
|
|
||||||
// Structures representing segments for fill items.
|
|
||||||
|
|
||||||
// There is some cut'n'paste here from stroke segments, which can be
|
|
||||||
// traced to the fact that buffers in GLSL are basically global.
|
|
||||||
// Maybe there's a way to address that, but in the meantime living
|
|
||||||
// with the duplication is easiest.
|
|
||||||
|
|
||||||
piet_gpu! {
|
|
||||||
#[gpu_write]
|
|
||||||
mod fill_seg {
|
|
||||||
struct FillTileHeader {
|
|
||||||
n: u32,
|
|
||||||
items: Ref<FillItemHeader>,
|
|
||||||
}
|
|
||||||
|
|
||||||
struct FillItemHeader {
|
|
||||||
backdrop: i32,
|
|
||||||
segments: Ref<FillSegChunk>,
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: strongly consider using f16. If so, these would be
|
|
||||||
// relative to the tile. We're doing f32 for now to minimize
|
|
||||||
// divergence from piet-metal originals.
|
|
||||||
struct FillSegment {
|
|
||||||
start: [f32; 2],
|
|
||||||
end: [f32; 2],
|
|
||||||
}
|
|
||||||
|
|
||||||
struct FillSegChunk {
|
|
||||||
n: u32,
|
|
||||||
next: Ref<FillSegChunk>,
|
|
||||||
// Segments follow (could represent this as a variable sized array).
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -3,10 +3,8 @@
|
||||||
pub mod annotated;
|
pub mod annotated;
|
||||||
pub mod bins;
|
pub mod bins;
|
||||||
pub mod encoder;
|
pub mod encoder;
|
||||||
pub mod fill_seg;
|
|
||||||
pub mod ptcl;
|
pub mod ptcl;
|
||||||
pub mod scene;
|
pub mod scene;
|
||||||
pub mod segment;
|
|
||||||
pub mod state;
|
pub mod state;
|
||||||
pub mod test;
|
pub mod test;
|
||||||
pub mod tilegroup;
|
pub mod tilegroup;
|
||||||
|
|
|
@ -9,8 +9,6 @@ fn main() {
|
||||||
"annotated" => print!("{}", piet_gpu_types::annotated::gen_gpu_annotated()),
|
"annotated" => print!("{}", piet_gpu_types::annotated::gen_gpu_annotated()),
|
||||||
"bins" => print!("{}", piet_gpu_types::bins::gen_gpu_bins()),
|
"bins" => print!("{}", piet_gpu_types::bins::gen_gpu_bins()),
|
||||||
"tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()),
|
"tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()),
|
||||||
"segment" => print!("{}", piet_gpu_types::segment::gen_gpu_segment()),
|
|
||||||
"fill_seg" => print!("{}", piet_gpu_types::fill_seg::gen_gpu_fill_seg()),
|
|
||||||
"ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()),
|
"ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()),
|
||||||
"test" => print!("{}", piet_gpu_types::test::gen_gpu_test()),
|
"test" => print!("{}", piet_gpu_types::test::gen_gpu_test()),
|
||||||
_ => println!("Oops, unknown module name"),
|
_ => println!("Oops, unknown module name"),
|
||||||
|
|
|
@ -13,13 +13,13 @@ piet_gpu! {
|
||||||
end: [f32; 2],
|
end: [f32; 2],
|
||||||
}
|
}
|
||||||
struct CmdStroke {
|
struct CmdStroke {
|
||||||
// Should be Ref<SegChunk> if we had cross-module references.
|
// Should be Ref<SegChunk>
|
||||||
seg_ref: u32,
|
seg_ref: u32,
|
||||||
half_width: f32,
|
half_width: f32,
|
||||||
rgba_color: u32,
|
rgba_color: u32,
|
||||||
}
|
}
|
||||||
struct CmdFill {
|
struct CmdFill {
|
||||||
// Should be Ref<FillSegChunk> if we had cross-module references.
|
// Should be Ref<FillSegChunk>
|
||||||
seg_ref: u32,
|
seg_ref: u32,
|
||||||
backdrop: i32,
|
backdrop: i32,
|
||||||
rgba_color: u32,
|
rgba_color: u32,
|
||||||
|
@ -51,5 +51,19 @@ piet_gpu! {
|
||||||
Jump(CmdJump),
|
Jump(CmdJump),
|
||||||
Bail,
|
Bail,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: strongly consider using f16. If so, these would be
|
||||||
|
// relative to the tile. We're doing f32 for now to minimize
|
||||||
|
// divergence from piet-metal originals.
|
||||||
|
struct Segment {
|
||||||
|
start: [f32; 2],
|
||||||
|
end: [f32; 2],
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SegChunk {
|
||||||
|
n: u32,
|
||||||
|
next: Ref<SegChunk>,
|
||||||
|
// Segments follow (could represent this as a variable sized array).
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,32 +0,0 @@
|
||||||
use piet_gpu_derive::piet_gpu;
|
|
||||||
|
|
||||||
// Structures representing segments for stroke/fill items.
|
|
||||||
|
|
||||||
piet_gpu! {
|
|
||||||
#[gpu_write]
|
|
||||||
mod segment {
|
|
||||||
struct TileHeader {
|
|
||||||
n: u32,
|
|
||||||
items: Ref<ItemHeader>,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Note: this is only suitable for strokes, fills require backdrop.
|
|
||||||
struct ItemHeader {
|
|
||||||
segments: Ref<SegChunk>,
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: strongly consider using f16. If so, these would be
|
|
||||||
// relative to the tile. We're doing f32 for now to minimize
|
|
||||||
// divergence from piet-metal originals.
|
|
||||||
struct Segment {
|
|
||||||
start: [f32; 2],
|
|
||||||
end: [f32; 2],
|
|
||||||
}
|
|
||||||
|
|
||||||
struct SegChunk {
|
|
||||||
n: u32,
|
|
||||||
next: Ref<SegChunk>,
|
|
||||||
// Segments follow (could represent this as a variable sized array).
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -9,19 +9,11 @@ rule glsl
|
||||||
|
|
||||||
build image.spv: glsl image.comp | scene.h
|
build image.spv: glsl image.comp | scene.h
|
||||||
|
|
||||||
build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h setup.h
|
|
||||||
|
|
||||||
build kernel2s.spv: glsl kernel2s.comp | scene.h tilegroup.h segment.h setup.h
|
|
||||||
|
|
||||||
build kernel2f.spv: glsl kernel2f.comp | scene.h tilegroup.h fill_seg.h setup.h
|
|
||||||
|
|
||||||
build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h segment.h fill_seg.h ptcl.h setup.h
|
|
||||||
|
|
||||||
build kernel4.spv: glsl kernel4.comp | ptcl.h segment.h fill_seg.h setup.h
|
|
||||||
|
|
||||||
|
|
||||||
build elements.spv: glsl elements.comp | scene.h state.h annotated.h
|
build elements.spv: glsl elements.comp | scene.h state.h annotated.h
|
||||||
|
|
||||||
build binning.spv: glsl binning.comp | annotated.h bins.h setup.h
|
build binning.spv: glsl binning.comp | annotated.h bins.h setup.h
|
||||||
|
|
||||||
build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
|
build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
|
||||||
|
|
||||||
|
build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h
|
||||||
|
|
|
@ -1,130 +0,0 @@
|
||||||
// Code auto-generated by piet-gpu-derive
|
|
||||||
|
|
||||||
struct FillTileHeaderRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct FillItemHeaderRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct FillSegmentRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct FillSegChunkRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct FillTileHeader {
|
|
||||||
uint n;
|
|
||||||
FillItemHeaderRef items;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define FillTileHeader_size 8
|
|
||||||
|
|
||||||
FillTileHeaderRef FillTileHeader_index(FillTileHeaderRef ref, uint index) {
|
|
||||||
return FillTileHeaderRef(ref.offset + index * FillTileHeader_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct FillItemHeader {
|
|
||||||
int backdrop;
|
|
||||||
FillSegChunkRef segments;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define FillItemHeader_size 8
|
|
||||||
|
|
||||||
FillItemHeaderRef FillItemHeader_index(FillItemHeaderRef ref, uint index) {
|
|
||||||
return FillItemHeaderRef(ref.offset + index * FillItemHeader_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct FillSegment {
|
|
||||||
vec2 start;
|
|
||||||
vec2 end;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define FillSegment_size 16
|
|
||||||
|
|
||||||
FillSegmentRef FillSegment_index(FillSegmentRef ref, uint index) {
|
|
||||||
return FillSegmentRef(ref.offset + index * FillSegment_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct FillSegChunk {
|
|
||||||
uint n;
|
|
||||||
FillSegChunkRef next;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define FillSegChunk_size 8
|
|
||||||
|
|
||||||
FillSegChunkRef FillSegChunk_index(FillSegChunkRef ref, uint index) {
|
|
||||||
return FillSegChunkRef(ref.offset + index * FillSegChunk_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
FillTileHeader FillTileHeader_read(FillTileHeaderRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = fill_seg[ix + 0];
|
|
||||||
uint raw1 = fill_seg[ix + 1];
|
|
||||||
FillTileHeader s;
|
|
||||||
s.n = raw0;
|
|
||||||
s.items = FillItemHeaderRef(raw1);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void FillTileHeader_write(FillTileHeaderRef ref, FillTileHeader s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
fill_seg[ix + 0] = s.n;
|
|
||||||
fill_seg[ix + 1] = s.items.offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
FillItemHeader FillItemHeader_read(FillItemHeaderRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = fill_seg[ix + 0];
|
|
||||||
uint raw1 = fill_seg[ix + 1];
|
|
||||||
FillItemHeader s;
|
|
||||||
s.backdrop = int(raw0);
|
|
||||||
s.segments = FillSegChunkRef(raw1);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void FillItemHeader_write(FillItemHeaderRef ref, FillItemHeader s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
fill_seg[ix + 0] = uint(s.backdrop);
|
|
||||||
fill_seg[ix + 1] = s.segments.offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
FillSegment FillSegment_read(FillSegmentRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = fill_seg[ix + 0];
|
|
||||||
uint raw1 = fill_seg[ix + 1];
|
|
||||||
uint raw2 = fill_seg[ix + 2];
|
|
||||||
uint raw3 = fill_seg[ix + 3];
|
|
||||||
FillSegment s;
|
|
||||||
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
|
||||||
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void FillSegment_write(FillSegmentRef ref, FillSegment s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
fill_seg[ix + 0] = floatBitsToUint(s.start.x);
|
|
||||||
fill_seg[ix + 1] = floatBitsToUint(s.start.y);
|
|
||||||
fill_seg[ix + 2] = floatBitsToUint(s.end.x);
|
|
||||||
fill_seg[ix + 3] = floatBitsToUint(s.end.y);
|
|
||||||
}
|
|
||||||
|
|
||||||
FillSegChunk FillSegChunk_read(FillSegChunkRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = fill_seg[ix + 0];
|
|
||||||
uint raw1 = fill_seg[ix + 1];
|
|
||||||
FillSegChunk s;
|
|
||||||
s.n = raw0;
|
|
||||||
s.next = FillSegChunkRef(raw1);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void FillSegChunk_write(FillSegChunkRef ref, FillSegChunk s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
fill_seg[ix + 0] = s.n;
|
|
||||||
fill_seg[ix + 1] = s.next.offset;
|
|
||||||
}
|
|
||||||
|
|
|
@ -1,161 +0,0 @@
|
||||||
// This is "kernel 1" in a 4-kernel pipeline. It traverses the scene graph
|
|
||||||
// and outputs "instances" (references to item + translation) for each item
|
|
||||||
// that intersects the tilegroup.
|
|
||||||
//
|
|
||||||
// This implementation is simplistic and leaves a lot of performance on the
|
|
||||||
// table. A fancier implementation would use threadgroup shared memory or
|
|
||||||
// subgroups (or possibly both) to parallelize the reading of the input and
|
|
||||||
// the computation of tilegroup intersection.
|
|
||||||
//
|
|
||||||
// In addition, there are some features currently missing, such as support
|
|
||||||
// for clipping.
|
|
||||||
|
|
||||||
#version 450
|
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
|
||||||
|
|
||||||
// It's possible we should lay this out with x and do our own math.
|
|
||||||
layout(local_size_x = 1, local_size_y = 32) in;
|
|
||||||
|
|
||||||
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
|
||||||
uint[] scene;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 1) buffer TilegroupBuf {
|
|
||||||
uint[] tilegroup;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 2) buffer AllocBuf {
|
|
||||||
uint alloc;
|
|
||||||
};
|
|
||||||
|
|
||||||
#include "scene.h"
|
|
||||||
#include "tilegroup.h"
|
|
||||||
|
|
||||||
#include "setup.h"
|
|
||||||
|
|
||||||
#define MAX_STACK 8
|
|
||||||
|
|
||||||
struct StackElement {
|
|
||||||
PietItemRef group;
|
|
||||||
uint index;
|
|
||||||
vec2 offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
void main() {
|
|
||||||
StackElement stack[MAX_STACK];
|
|
||||||
uint stack_ix = 0;
|
|
||||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x;
|
|
||||||
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE);
|
|
||||||
uint tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size;
|
|
||||||
|
|
||||||
// State for stroke references.
|
|
||||||
TileGroupRef stroke_start = TileGroupRef(tg_ref.offset + TILEGROUP_STROKE_START);
|
|
||||||
ChunkRef stroke_chunk_start = ChunkRef(stroke_start.offset + 4);
|
|
||||||
InstanceRef stroke_ref = InstanceRef(stroke_chunk_start.offset + Chunk_size);
|
|
||||||
uint stroke_limit = stroke_start.offset + TILEGROUP_INITIAL_STROKE_ALLOC - Instance_size;
|
|
||||||
uint stroke_chunk_n = 0;
|
|
||||||
uint stroke_n = 0;
|
|
||||||
|
|
||||||
// State for fill references. All this is a bit cut'n'paste, but making a
|
|
||||||
// proper abstraction isn't easy.
|
|
||||||
TileGroupRef fill_start = TileGroupRef(tg_ref.offset + TILEGROUP_FILL_START);
|
|
||||||
ChunkRef fill_chunk_start = ChunkRef(fill_start.offset + 4);
|
|
||||||
InstanceRef fill_ref = InstanceRef(fill_chunk_start.offset + Chunk_size);
|
|
||||||
uint fill_limit = fill_start.offset + TILEGROUP_INITIAL_FILL_ALLOC - Instance_size;
|
|
||||||
uint fill_chunk_n = 0;
|
|
||||||
uint fill_n = 0;
|
|
||||||
|
|
||||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX);
|
|
||||||
PietItemRef root = PietItemRef(0);
|
|
||||||
SimpleGroup group = PietItem_Group_read(root);
|
|
||||||
StackElement tos = StackElement(root, 0, group.offset.xy);
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
if (tos.index < group.n_items) {
|
|
||||||
Bbox bbox = Bbox_read(Bbox_index(group.bboxes, tos.index));
|
|
||||||
vec4 bb = vec4(bbox.bbox) + tos.offset.xyxy;
|
|
||||||
bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH_PX))
|
|
||||||
&& max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT_PX));
|
|
||||||
bool is_group = false;
|
|
||||||
uint tag;
|
|
||||||
if (hit) {
|
|
||||||
PietItemRef item_ref = PietItem_index(group.items, tos.index);
|
|
||||||
tag = PietItem_tag(item_ref);
|
|
||||||
is_group = tag == PietItem_Group;
|
|
||||||
}
|
|
||||||
if (hit && !is_group) {
|
|
||||||
PietItemRef item_ref = PietItem_index(group.items, tos.index);
|
|
||||||
Instance ins = Instance(item_ref.offset, tos.offset);
|
|
||||||
if (tg_ref.offset > tg_limit) {
|
|
||||||
// Allocation exceeded; do atomic bump alloc.
|
|
||||||
uint new_tg = atomicAdd(alloc, TILEGROUP_INITIAL_ALLOC);
|
|
||||||
Jump jump = Jump(TileGroupRef(new_tg));
|
|
||||||
TileGroup_Jump_write(tg_ref, jump);
|
|
||||||
tg_ref = TileGroupRef(new_tg);
|
|
||||||
tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size;
|
|
||||||
}
|
|
||||||
TileGroup_Instance_write(tg_ref, ins);
|
|
||||||
tg_ref.offset += TileGroup_size;
|
|
||||||
if (tag == PietItem_Poly) {
|
|
||||||
if (stroke_ref.offset > stroke_limit) {
|
|
||||||
uint new_stroke = atomicAdd(alloc, TILEGROUP_STROKE_ALLOC);
|
|
||||||
Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(new_stroke)));
|
|
||||||
stroke_chunk_start = ChunkRef(new_stroke);
|
|
||||||
stroke_ref = InstanceRef(new_stroke + Chunk_size);
|
|
||||||
stroke_n += stroke_chunk_n;
|
|
||||||
stroke_chunk_n = 0;
|
|
||||||
stroke_limit = new_stroke + TILEGROUP_STROKE_ALLOC - Instance_size;
|
|
||||||
}
|
|
||||||
Instance_write(stroke_ref, ins);
|
|
||||||
stroke_chunk_n++;
|
|
||||||
stroke_ref.offset += Instance_size;
|
|
||||||
} else if (tag == PietItem_Fill) {
|
|
||||||
if (fill_ref.offset > fill_limit) {
|
|
||||||
uint new_fill = atomicAdd(alloc, TILEGROUP_FILL_ALLOC);
|
|
||||||
Chunk_write(fill_chunk_start, Chunk(fill_chunk_n, ChunkRef(new_fill)));
|
|
||||||
fill_chunk_start = ChunkRef(new_fill);
|
|
||||||
fill_ref = InstanceRef(new_fill + Chunk_size);
|
|
||||||
fill_n += fill_chunk_n;
|
|
||||||
fill_chunk_n = 0;
|
|
||||||
fill_limit = new_fill + TILEGROUP_FILL_ALLOC - Instance_size;
|
|
||||||
}
|
|
||||||
Instance_write(fill_ref, ins);
|
|
||||||
fill_chunk_n++;
|
|
||||||
fill_ref.offset += Instance_size;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (is_group) {
|
|
||||||
PietItemRef item_ref = PietItem_index(group.items, tos.index);
|
|
||||||
tos.index++;
|
|
||||||
if (tos.index < group.n_items) {
|
|
||||||
stack[stack_ix++] = tos;
|
|
||||||
}
|
|
||||||
group = PietItem_Group_read(item_ref);
|
|
||||||
tos = StackElement(item_ref, 0, tos.offset + group.offset.xy);
|
|
||||||
} else {
|
|
||||||
tos.index++;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// processed all items in this group; pop the stack
|
|
||||||
if (stack_ix == 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
tos = stack[--stack_ix];
|
|
||||||
group = PietItem_Group_read(tos.group);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
TileGroup_End_write(tg_ref);
|
|
||||||
|
|
||||||
stroke_n += stroke_chunk_n;
|
|
||||||
if (stroke_n > 0) {
|
|
||||||
Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(0)));
|
|
||||||
}
|
|
||||||
tilegroup[stroke_start.offset >> 2] = stroke_n;
|
|
||||||
|
|
||||||
fill_n += fill_chunk_n;
|
|
||||||
if (fill_n > 0) {
|
|
||||||
Chunk_write(fill_chunk_start, Chunk(fill_chunk_n, ChunkRef(0)));
|
|
||||||
}
|
|
||||||
tilegroup[fill_start.offset >> 2] = fill_n;
|
|
||||||
}
|
|
Binary file not shown.
|
@ -1,167 +0,0 @@
|
||||||
// This is "kernel 2" (fill) in a 4-kernel pipeline. It processes the fill
|
|
||||||
// (polyline) items in the scene and generates a list of segments for each, for
|
|
||||||
// each tile.
|
|
||||||
|
|
||||||
#version 450
|
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
|
||||||
|
|
||||||
layout(local_size_x = 32) in;
|
|
||||||
|
|
||||||
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
|
||||||
uint[] scene;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 1) buffer TilegroupBuf {
|
|
||||||
uint[] tilegroup;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 2) buffer FillSegBuf {
|
|
||||||
uint[] fill_seg;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 3) buffer AllocBuf {
|
|
||||||
uint alloc;
|
|
||||||
};
|
|
||||||
|
|
||||||
#include "scene.h"
|
|
||||||
#include "tilegroup.h"
|
|
||||||
#include "fill_seg.h"
|
|
||||||
|
|
||||||
#include "setup.h"
|
|
||||||
|
|
||||||
// Ensure that there is space to encode a segment.
|
|
||||||
void alloc_chunk(inout uint chunk_n_segs, inout FillSegChunkRef seg_chunk_ref,
|
|
||||||
inout FillSegChunkRef first_seg_chunk, inout uint seg_limit)
|
|
||||||
{
|
|
||||||
if (chunk_n_segs == 0) {
|
|
||||||
if (seg_chunk_ref.offset + 40 > seg_limit) {
|
|
||||||
seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
|
||||||
seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - FillSegment_size;
|
|
||||||
}
|
|
||||||
first_seg_chunk = seg_chunk_ref;
|
|
||||||
} else if (seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs > seg_limit) {
|
|
||||||
uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
|
||||||
seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - FillSegment_size;
|
|
||||||
FillSegChunk_write(seg_chunk_ref, FillSegChunk(chunk_n_segs, FillSegChunkRef(new_chunk_ref)));
|
|
||||||
seg_chunk_ref.offset = new_chunk_ref;
|
|
||||||
chunk_n_segs = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
void main() {
|
|
||||||
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
|
|
||||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
|
|
||||||
+ (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
|
|
||||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
|
|
||||||
TileGroupRef fill_start = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE + TILEGROUP_FILL_START);
|
|
||||||
uint fill_n = tilegroup[fill_start.offset >> 2];
|
|
||||||
|
|
||||||
FillTileHeaderRef tile_header_ref = FillTileHeaderRef(tile_ix * FillTileHeader_size);
|
|
||||||
if (fill_n > 0) {
|
|
||||||
ChunkRef chunk_ref = ChunkRef(fill_start.offset + 4);
|
|
||||||
Chunk chunk = Chunk_read(chunk_ref);
|
|
||||||
InstanceRef fill_ref = InstanceRef(chunk_ref.offset + Chunk_size);
|
|
||||||
FillItemHeaderRef item_header = FillItemHeaderRef(atomicAdd(alloc, fill_n * FillItemHeader_size));
|
|
||||||
FillTileHeader_write(tile_header_ref, FillTileHeader(fill_n, item_header));
|
|
||||||
FillSegChunkRef seg_chunk_ref = FillSegChunkRef(0);
|
|
||||||
uint seg_limit = 0;
|
|
||||||
// Iterate through items; fill_n holds count remaining.
|
|
||||||
while (true) {
|
|
||||||
if (chunk.chunk_n == 0) {
|
|
||||||
chunk_ref = chunk.next;
|
|
||||||
if (chunk_ref.offset == 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
chunk = Chunk_read(chunk_ref);
|
|
||||||
fill_ref = InstanceRef(chunk_ref.offset + Chunk_size);
|
|
||||||
}
|
|
||||||
Instance ins = Instance_read(fill_ref);
|
|
||||||
PietFill fill = PietItem_Fill_read(PietItemRef(ins.item_ref));
|
|
||||||
|
|
||||||
// Process the fill polyline item.
|
|
||||||
uint max_n_segs = fill.n_points - 1;
|
|
||||||
uint chunk_n_segs = 0;
|
|
||||||
int backdrop = 0;
|
|
||||||
FillSegChunkRef seg_chunk_ref;
|
|
||||||
FillSegChunkRef first_seg_chunk = FillSegChunkRef(0);
|
|
||||||
vec2 start = Point_read(fill.points).xy;
|
|
||||||
for (uint j = 0; j < max_n_segs; j++) {
|
|
||||||
fill.points.offset += Point_size;
|
|
||||||
vec2 end = Point_read(fill.points).xy;
|
|
||||||
|
|
||||||
// Process one segment.
|
|
||||||
|
|
||||||
// TODO: I think this would go more smoothly (and be easier to
|
|
||||||
// make numerically robust) if it were based on clipping the line
|
|
||||||
// to the tile box. See:
|
|
||||||
// https://tavianator.com/fast-branchless-raybounding-box-intersections/
|
|
||||||
vec2 xymin = min(start, end);
|
|
||||||
vec2 xymax = max(start, end);
|
|
||||||
float a = end.y - start.y;
|
|
||||||
float b = start.x - end.x;
|
|
||||||
float c = -(a * start.x + b * start.y);
|
|
||||||
vec2 xy1 = xy0 + vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
|
|
||||||
float ytop = max(xy0.y, xymin.y);
|
|
||||||
float ybot = min(xy1.y, xymax.y);
|
|
||||||
float s00 = sign(b * ytop + a * xy0.x + c);
|
|
||||||
float s01 = sign(b * ytop + a * xy1.x + c);
|
|
||||||
float s10 = sign(b * ybot + a * xy0.x + c);
|
|
||||||
float s11 = sign(b * ybot + a * xy1.x + c);
|
|
||||||
float sTopLeft = sign(b * xy0.y + a * xy0.x + c);
|
|
||||||
if (sTopLeft == sign(a) && xymin.y <= xy0.y && xymax.y > xy0.y) {
|
|
||||||
backdrop -= int(s00);
|
|
||||||
}
|
|
||||||
|
|
||||||
// This is adapted from piet-metal but could be improved.
|
|
||||||
|
|
||||||
if (max(xymin.x, xy0.x) < min(xymax.x, xy1.x)
|
|
||||||
&& ytop < ybot
|
|
||||||
&& s00 * s01 + s00 * s10 + s00 * s11 < 3.0)
|
|
||||||
{
|
|
||||||
// avoid overwriting `end` so that it can be used as start
|
|
||||||
vec2 enc_end = end;
|
|
||||||
if (xymin.x < xy0.x) {
|
|
||||||
float yEdge = mix(start.y, end.y, (start.x - xy0.x) / b);
|
|
||||||
if (yEdge >= xy0.y && yEdge < xy1.y) {
|
|
||||||
// This is encoded the same as a general fill segment, but could be
|
|
||||||
// special-cased, either here or in rendering. (It was special-cased
|
|
||||||
// in piet-metal).
|
|
||||||
FillSegment edge_seg;
|
|
||||||
if (b > 0.0) {
|
|
||||||
enc_end = vec2(xy0.x, yEdge);
|
|
||||||
edge_seg.start = enc_end;
|
|
||||||
edge_seg.end = vec2(xy0.x, xy1.y);
|
|
||||||
} else {
|
|
||||||
start = vec2(xy0.x, yEdge);
|
|
||||||
edge_seg.start = vec2(xy0.x, xy1.y);
|
|
||||||
edge_seg.end = start;
|
|
||||||
}
|
|
||||||
alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit);
|
|
||||||
FillSegment_write(FillSegmentRef(seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs), edge_seg);
|
|
||||||
chunk_n_segs++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit);
|
|
||||||
FillSegment seg = FillSegment(start, enc_end);
|
|
||||||
FillSegment_write(FillSegmentRef(seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs), seg);
|
|
||||||
chunk_n_segs++;
|
|
||||||
}
|
|
||||||
|
|
||||||
start = end;
|
|
||||||
}
|
|
||||||
FillItemHeader_write(item_header, FillItemHeader(backdrop, first_seg_chunk));
|
|
||||||
if (chunk_n_segs != 0) {
|
|
||||||
FillSegChunk_write(seg_chunk_ref, FillSegChunk(chunk_n_segs, FillSegChunkRef(0)));
|
|
||||||
seg_chunk_ref.offset += FillSegChunk_size + FillSegment_size * chunk_n_segs;
|
|
||||||
}
|
|
||||||
|
|
||||||
fill_ref.offset += Instance_size;
|
|
||||||
chunk.chunk_n--;
|
|
||||||
item_header.offset += FillItemHeader_size;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// As an optimization, we could just write 0 for the size.
|
|
||||||
FillTileHeader_write(tile_header_ref, FillTileHeader(fill_n, FillItemHeaderRef(0)));
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
|
@ -1,137 +0,0 @@
|
||||||
// This is "kernel 2" (strokes) in a 4-kernel pipeline. It processes the stroke
|
|
||||||
// (polyline) items in the scene and generates a list of segments for each, for
|
|
||||||
// each tile.
|
|
||||||
|
|
||||||
#version 450
|
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
|
||||||
|
|
||||||
layout(local_size_x = 32) in;
|
|
||||||
|
|
||||||
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
|
||||||
uint[] scene;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 1) buffer TilegroupBuf {
|
|
||||||
uint[] tilegroup;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 2) buffer SegmentBuf {
|
|
||||||
uint[] segment;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 3) buffer AllocBuf {
|
|
||||||
uint alloc;
|
|
||||||
};
|
|
||||||
|
|
||||||
#include "scene.h"
|
|
||||||
#include "tilegroup.h"
|
|
||||||
#include "segment.h"
|
|
||||||
|
|
||||||
#include "setup.h"
|
|
||||||
|
|
||||||
void main() {
|
|
||||||
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
|
|
||||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
|
|
||||||
+ (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
|
|
||||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
|
|
||||||
TileGroupRef stroke_start = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE + TILEGROUP_STROKE_START);
|
|
||||||
uint stroke_n = tilegroup[stroke_start.offset >> 2];
|
|
||||||
|
|
||||||
TileHeaderRef tile_header_ref = TileHeaderRef(tile_ix * TileHeader_size);
|
|
||||||
if (stroke_n > 0) {
|
|
||||||
ChunkRef chunk_ref = ChunkRef(stroke_start.offset + 4);
|
|
||||||
Chunk chunk = Chunk_read(chunk_ref);
|
|
||||||
InstanceRef stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
|
|
||||||
ItemHeaderRef item_header = ItemHeaderRef(atomicAdd(alloc, stroke_n * ItemHeader_size));
|
|
||||||
TileHeader_write(tile_header_ref, TileHeader(stroke_n, item_header));
|
|
||||||
SegChunkRef seg_chunk_ref = SegChunkRef(0);
|
|
||||||
uint seg_limit = 0;
|
|
||||||
// Iterate through items; stroke_n holds count remaining.
|
|
||||||
while (true) {
|
|
||||||
if (chunk.chunk_n == 0) {
|
|
||||||
chunk_ref = chunk.next;
|
|
||||||
if (chunk_ref.offset == 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
chunk = Chunk_read(chunk_ref);
|
|
||||||
stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
|
|
||||||
}
|
|
||||||
Instance ins = Instance_read(stroke_ref);
|
|
||||||
PietStrokePolyLine poly = PietItem_Poly_read(PietItemRef(ins.item_ref));
|
|
||||||
|
|
||||||
// Process the stroke polyline item.
|
|
||||||
uint max_n_segs = poly.n_points - 1;
|
|
||||||
uint chunk_n_segs = 0;
|
|
||||||
SegChunkRef seg_chunk_ref;
|
|
||||||
vec2 start = Point_read(poly.points).xy;
|
|
||||||
for (uint j = 0; j < max_n_segs; j++) {
|
|
||||||
poly.points.offset += Point_size;
|
|
||||||
vec2 end = Point_read(poly.points).xy;
|
|
||||||
|
|
||||||
// Process one segment.
|
|
||||||
|
|
||||||
// This logic just tests for collision. What we probably want to do
|
|
||||||
// is a clipping algorithm like Liang-Barsky, and then store coords
|
|
||||||
// relative to the tile in f16. See also:
|
|
||||||
// https://tavianator.com/fast-branchless-raybounding-box-intersections/
|
|
||||||
|
|
||||||
// Also note that when we go to the fancy version, we want to compute
|
|
||||||
// the (horizontal projection of) the bounding box of the intersection
|
|
||||||
// once per tilegroup, so we can assign work to individual tiles.
|
|
||||||
|
|
||||||
float a = end.y - start.y;
|
|
||||||
float b = start.x - end.x;
|
|
||||||
float c = -(a * start.x + b * start.y);
|
|
||||||
float half_width = 0.5 * poly.width;
|
|
||||||
// Tile boundaries padded by half-width.
|
|
||||||
float xmin = xy0.x - half_width;
|
|
||||||
float ymin = xy0.y - half_width;
|
|
||||||
float xmax = xy0.x + float(TILE_WIDTH_PX) + half_width;
|
|
||||||
float ymax = xy0.y + float(TILE_HEIGHT_PX) + half_width;
|
|
||||||
float s00 = sign(b * ymin + a * xmin + c);
|
|
||||||
float s01 = sign(b * ymin + a * xmax + c);
|
|
||||||
float s10 = sign(b * ymax + a * xmin + c);
|
|
||||||
float s11 = sign(b * ymax + a * xmax + c);
|
|
||||||
// If bounding boxes intersect and not all four corners are on the same side, hit.
|
|
||||||
// Also note: this is designed to be false on NAN input.
|
|
||||||
if (max(min(start.x, end.x), xmin) < min(max(start.x, end.x), xmax)
|
|
||||||
&& max(min(start.y, end.y), ymin) < min(max(start.y, end.y), ymax)
|
|
||||||
&& s00 * s01 + s00 * s10 + s00 * s11 < 3.0)
|
|
||||||
{
|
|
||||||
// Allocate a chunk if needed.
|
|
||||||
if (chunk_n_segs == 0) {
|
|
||||||
if (seg_chunk_ref.offset + 40 > seg_limit) {
|
|
||||||
seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
|
||||||
seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - Segment_size;
|
|
||||||
}
|
|
||||||
ItemHeader_write(item_header, ItemHeader(seg_chunk_ref));
|
|
||||||
} else if (seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs > seg_limit) {
|
|
||||||
uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
|
||||||
seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - Segment_size;
|
|
||||||
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(new_chunk_ref)));
|
|
||||||
seg_chunk_ref.offset = new_chunk_ref;
|
|
||||||
chunk_n_segs = 0;
|
|
||||||
}
|
|
||||||
Segment seg = Segment(start, end);
|
|
||||||
Segment_write(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs), seg);
|
|
||||||
chunk_n_segs++;
|
|
||||||
}
|
|
||||||
|
|
||||||
start = end;
|
|
||||||
}
|
|
||||||
if (chunk_n_segs == 0) {
|
|
||||||
ItemHeader_write(item_header, ItemHeader(SegChunkRef(0)));
|
|
||||||
} else {
|
|
||||||
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(0)));
|
|
||||||
seg_chunk_ref.offset += SegChunk_size + Segment_size * chunk_n_segs;
|
|
||||||
}
|
|
||||||
|
|
||||||
stroke_ref.offset += Instance_size;
|
|
||||||
chunk.chunk_n--;
|
|
||||||
item_header.offset += ItemHeader_size;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// As an optimization, we could just write 0 for the size.
|
|
||||||
TileHeader_write(tile_header_ref, TileHeader(stroke_n, ItemHeaderRef(0)));
|
|
||||||
}
|
|
||||||
}
|
|
Binary file not shown.
|
@ -1,135 +0,0 @@
|
||||||
// This is "kernel 3" in a 4-kernel pipeline. It walks the active items
|
|
||||||
// for the tilegroup and produces a per-tile command list for each tile.
|
|
||||||
|
|
||||||
#version 450
|
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
|
||||||
|
|
||||||
layout(local_size_x = 32, local_size_y = 1) in;
|
|
||||||
|
|
||||||
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
|
||||||
uint[] scene;
|
|
||||||
};
|
|
||||||
|
|
||||||
// TODO: this should have a `readonly` qualifier, but then inclusion
|
|
||||||
// of ptcl.h would fail because of the writers.
|
|
||||||
layout(set = 0, binding = 1) buffer TilegroupBuf {
|
|
||||||
uint[] tilegroup;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Used readonly
|
|
||||||
layout(set = 0, binding = 2) buffer SegmentBuf {
|
|
||||||
uint[] segment;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Used readonly
|
|
||||||
layout(set = 0, binding = 3) buffer FillSegmentBuf {
|
|
||||||
uint[] fill_seg;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 4) buffer PtclBuf {
|
|
||||||
uint[] ptcl;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(set = 0, binding = 5) buffer AllocBuf {
|
|
||||||
uint alloc;
|
|
||||||
};
|
|
||||||
|
|
||||||
#include "scene.h"
|
|
||||||
#include "tilegroup.h"
|
|
||||||
#include "segment.h"
|
|
||||||
#include "fill_seg.h"
|
|
||||||
#include "ptcl.h"
|
|
||||||
|
|
||||||
#include "setup.h"
|
|
||||||
|
|
||||||
void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) {
|
|
||||||
if (cmd_ref.offset > cmd_limit) {
|
|
||||||
uint new_cmd = atomicAdd(alloc, PTCL_INITIAL_ALLOC);
|
|
||||||
CmdJump jump = CmdJump(new_cmd);
|
|
||||||
Cmd_Jump_write(cmd_ref, jump);
|
|
||||||
cmd_ref = CmdRef(new_cmd);
|
|
||||||
cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void main() {
|
|
||||||
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
|
|
||||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
|
|
||||||
+ (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
|
|
||||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
|
|
||||||
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE);
|
|
||||||
CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC);
|
|
||||||
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
|
||||||
|
|
||||||
TileHeader stroke_th = TileHeader_read(TileHeaderRef(tile_ix * TileHeader_size));
|
|
||||||
FillTileHeader fill_th = FillTileHeader_read(FillTileHeaderRef(tile_ix * FillTileHeader_size));
|
|
||||||
|
|
||||||
while (true) {
|
|
||||||
uint tg_tag = TileGroup_tag(tg_ref);
|
|
||||||
if (tg_tag == TileGroup_End) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (tg_tag == TileGroup_Jump) {
|
|
||||||
tg_ref = TileGroup_Jump_read(tg_ref).new_ref;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Assume tg_tag is `Instance`, though there will be more cases.
|
|
||||||
Instance ins = TileGroup_Instance_read(tg_ref);
|
|
||||||
PietItemRef item_ref = PietItemRef(ins.item_ref);
|
|
||||||
uint item_tag = PietItem_tag(item_ref);
|
|
||||||
switch (item_tag) {
|
|
||||||
case PietItem_Circle:
|
|
||||||
PietCircle circle = PietItem_Circle_read(item_ref);
|
|
||||||
vec2 center = ins.offset + circle.center.xy;
|
|
||||||
float r = circle.radius;
|
|
||||||
if (max(center.x - r, xy0.x) < min(center.x + r, xy0.x + float(TILE_WIDTH_PX))
|
|
||||||
&& max(center.y - r, xy0.y) < min(center.y + r, xy0.y + float(TILE_HEIGHT_PX)))
|
|
||||||
{
|
|
||||||
CmdCircle cmd = CmdCircle(center, r, circle.rgba_color);
|
|
||||||
alloc_cmd(cmd_ref, cmd_limit);
|
|
||||||
Cmd_Circle_write(cmd_ref, cmd);
|
|
||||||
cmd_ref.offset += Cmd_size;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case PietItem_Poly:
|
|
||||||
ItemHeader stroke_item = ItemHeader_read(stroke_th.items);
|
|
||||||
stroke_th.items.offset += ItemHeader_size;
|
|
||||||
if (stroke_item.segments.offset != 0) {
|
|
||||||
PietStrokePolyLine poly = PietItem_Poly_read(item_ref);
|
|
||||||
CmdStroke cmd = CmdStroke(
|
|
||||||
stroke_item.segments.offset,
|
|
||||||
0.5 * poly.width,
|
|
||||||
poly.rgba_color
|
|
||||||
);
|
|
||||||
alloc_cmd(cmd_ref, cmd_limit);
|
|
||||||
Cmd_Stroke_write(cmd_ref, cmd);
|
|
||||||
cmd_ref.offset += Cmd_size;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case PietItem_Fill:
|
|
||||||
FillItemHeader fill_item = FillItemHeader_read(fill_th.items);
|
|
||||||
fill_th.items.offset += FillItemHeader_size;
|
|
||||||
// TODO: handle segments == 0 but backdrop != specially, it's a solid tile.
|
|
||||||
if (fill_item.segments.offset != 0) {
|
|
||||||
PietFill fill = PietItem_Fill_read(item_ref);
|
|
||||||
CmdFill cmd = CmdFill(
|
|
||||||
fill_item.segments.offset,
|
|
||||||
fill_item.backdrop,
|
|
||||||
fill.rgba_color
|
|
||||||
);
|
|
||||||
alloc_cmd(cmd_ref, cmd_limit);
|
|
||||||
Cmd_Fill_write(cmd_ref, cmd);
|
|
||||||
cmd_ref.offset += Cmd_size;
|
|
||||||
} else if (fill_item.backdrop != 0) {
|
|
||||||
// TODO: truncate existing cmd list if alpha is opaque
|
|
||||||
PietFill fill = PietItem_Fill_read(item_ref);
|
|
||||||
alloc_cmd(cmd_ref, cmd_limit);
|
|
||||||
Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
|
|
||||||
cmd_ref.offset += Cmd_size;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
tg_ref.offset += TileGroup_size;
|
|
||||||
}
|
|
||||||
Cmd_End_write(cmd_ref);
|
|
||||||
}
|
|
Binary file not shown.
|
@ -9,26 +9,14 @@
|
||||||
|
|
||||||
layout(local_size_x = 16, local_size_y = 16) in;
|
layout(local_size_x = 16, local_size_y = 16) in;
|
||||||
|
|
||||||
// Same concern that this should be readonly as in kernel 3.
|
// This should be annotated readonly but infra doesn't support that yet.
|
||||||
layout(set = 0, binding = 0) buffer PtclBuf {
|
layout(set = 0, binding = 0) buffer PtclBuf {
|
||||||
uint[] ptcl;
|
uint[] ptcl;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Used readonly
|
layout(rgba8, set = 0, binding = 1) uniform writeonly image2D image;
|
||||||
layout(set = 0, binding = 1) buffer SegmentBuf {
|
|
||||||
uint[] segment;
|
|
||||||
};
|
|
||||||
|
|
||||||
// Used readonly
|
|
||||||
layout(set = 0, binding = 2) buffer FillSegBuf {
|
|
||||||
uint[] fill_seg;
|
|
||||||
};
|
|
||||||
|
|
||||||
layout(rgba8, set = 0, binding = 3) uniform writeonly image2D image;
|
|
||||||
|
|
||||||
#include "ptcl.h"
|
#include "ptcl.h"
|
||||||
#include "segment.h"
|
|
||||||
#include "fill_seg.h"
|
|
||||||
|
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
|
@ -79,11 +67,11 @@ void main() {
|
||||||
CmdFill fill = Cmd_Fill_read(cmd_ref);
|
CmdFill fill = Cmd_Fill_read(cmd_ref);
|
||||||
// Probably better to store as float, but conversion is no doubt cheap.
|
// Probably better to store as float, but conversion is no doubt cheap.
|
||||||
float area = float(fill.backdrop);
|
float area = float(fill.backdrop);
|
||||||
FillSegChunkRef fill_seg_chunk_ref = FillSegChunkRef(fill.seg_ref);
|
SegChunkRef fill_seg_chunk_ref = SegChunkRef(fill.seg_ref);
|
||||||
do {
|
do {
|
||||||
FillSegChunk seg_chunk = FillSegChunk_read(fill_seg_chunk_ref);
|
SegChunk seg_chunk = SegChunk_read(fill_seg_chunk_ref);
|
||||||
for (int i = 0; i < seg_chunk.n; i++) {
|
for (int i = 0; i < seg_chunk.n; i++) {
|
||||||
FillSegment seg = FillSegment_read(FillSegmentRef(fill_seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * i));
|
Segment seg = Segment_read(SegmentRef(fill_seg_chunk_ref.offset + SegChunk_size + Segment_size * i));
|
||||||
vec2 start = seg.start - xy;
|
vec2 start = seg.start - xy;
|
||||||
vec2 end = seg.end - xy;
|
vec2 end = seg.end - xy;
|
||||||
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
|
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
|
||||||
|
|
Binary file not shown.
|
@ -36,6 +36,14 @@ struct CmdRef {
|
||||||
uint offset;
|
uint offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct SegmentRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct SegChunkRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
struct CmdCircle {
|
struct CmdCircle {
|
||||||
vec2 center;
|
vec2 center;
|
||||||
float radius;
|
float radius;
|
||||||
|
@ -141,6 +149,28 @@ CmdRef Cmd_index(CmdRef ref, uint index) {
|
||||||
return CmdRef(ref.offset + index * Cmd_size);
|
return CmdRef(ref.offset + index * Cmd_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct Segment {
|
||||||
|
vec2 start;
|
||||||
|
vec2 end;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define Segment_size 16
|
||||||
|
|
||||||
|
SegmentRef Segment_index(SegmentRef ref, uint index) {
|
||||||
|
return SegmentRef(ref.offset + index * Segment_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SegChunk {
|
||||||
|
uint n;
|
||||||
|
SegChunkRef next;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define SegChunk_size 8
|
||||||
|
|
||||||
|
SegChunkRef SegChunk_index(SegChunkRef ref, uint index) {
|
||||||
|
return SegChunkRef(ref.offset + index * SegChunk_size);
|
||||||
|
}
|
||||||
|
|
||||||
CmdCircle CmdCircle_read(CmdCircleRef ref) {
|
CmdCircle CmdCircle_read(CmdCircleRef ref) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
uint raw0 = ptcl[ix + 0];
|
uint raw0 = ptcl[ix + 0];
|
||||||
|
@ -362,3 +392,39 @@ void Cmd_Bail_write(CmdRef ref) {
|
||||||
ptcl[ref.offset >> 2] = Cmd_Bail;
|
ptcl[ref.offset >> 2] = Cmd_Bail;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Segment Segment_read(SegmentRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = ptcl[ix + 0];
|
||||||
|
uint raw1 = ptcl[ix + 1];
|
||||||
|
uint raw2 = ptcl[ix + 2];
|
||||||
|
uint raw3 = ptcl[ix + 3];
|
||||||
|
Segment s;
|
||||||
|
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||||
|
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Segment_write(SegmentRef ref, Segment s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
ptcl[ix + 0] = floatBitsToUint(s.start.x);
|
||||||
|
ptcl[ix + 1] = floatBitsToUint(s.start.y);
|
||||||
|
ptcl[ix + 2] = floatBitsToUint(s.end.x);
|
||||||
|
ptcl[ix + 3] = floatBitsToUint(s.end.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
SegChunk SegChunk_read(SegChunkRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = ptcl[ix + 0];
|
||||||
|
uint raw1 = ptcl[ix + 1];
|
||||||
|
SegChunk s;
|
||||||
|
s.n = raw0;
|
||||||
|
s.next = SegChunkRef(raw1);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void SegChunk_write(SegChunkRef ref, SegChunk s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
ptcl[ix + 0] = s.n;
|
||||||
|
ptcl[ix + 1] = s.next.offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,126 +0,0 @@
|
||||||
// Code auto-generated by piet-gpu-derive
|
|
||||||
|
|
||||||
struct TileHeaderRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct ItemHeaderRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct SegmentRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct SegChunkRef {
|
|
||||||
uint offset;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct TileHeader {
|
|
||||||
uint n;
|
|
||||||
ItemHeaderRef items;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define TileHeader_size 8
|
|
||||||
|
|
||||||
TileHeaderRef TileHeader_index(TileHeaderRef ref, uint index) {
|
|
||||||
return TileHeaderRef(ref.offset + index * TileHeader_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct ItemHeader {
|
|
||||||
SegChunkRef segments;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define ItemHeader_size 4
|
|
||||||
|
|
||||||
ItemHeaderRef ItemHeader_index(ItemHeaderRef ref, uint index) {
|
|
||||||
return ItemHeaderRef(ref.offset + index * ItemHeader_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct Segment {
|
|
||||||
vec2 start;
|
|
||||||
vec2 end;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define Segment_size 16
|
|
||||||
|
|
||||||
SegmentRef Segment_index(SegmentRef ref, uint index) {
|
|
||||||
return SegmentRef(ref.offset + index * Segment_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct SegChunk {
|
|
||||||
uint n;
|
|
||||||
SegChunkRef next;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define SegChunk_size 8
|
|
||||||
|
|
||||||
SegChunkRef SegChunk_index(SegChunkRef ref, uint index) {
|
|
||||||
return SegChunkRef(ref.offset + index * SegChunk_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
TileHeader TileHeader_read(TileHeaderRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = segment[ix + 0];
|
|
||||||
uint raw1 = segment[ix + 1];
|
|
||||||
TileHeader s;
|
|
||||||
s.n = raw0;
|
|
||||||
s.items = ItemHeaderRef(raw1);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void TileHeader_write(TileHeaderRef ref, TileHeader s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
segment[ix + 0] = s.n;
|
|
||||||
segment[ix + 1] = s.items.offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
ItemHeader ItemHeader_read(ItemHeaderRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = segment[ix + 0];
|
|
||||||
ItemHeader s;
|
|
||||||
s.segments = SegChunkRef(raw0);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void ItemHeader_write(ItemHeaderRef ref, ItemHeader s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
segment[ix + 0] = s.segments.offset;
|
|
||||||
}
|
|
||||||
|
|
||||||
Segment Segment_read(SegmentRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = segment[ix + 0];
|
|
||||||
uint raw1 = segment[ix + 1];
|
|
||||||
uint raw2 = segment[ix + 2];
|
|
||||||
uint raw3 = segment[ix + 3];
|
|
||||||
Segment s;
|
|
||||||
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
|
||||||
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Segment_write(SegmentRef ref, Segment s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
segment[ix + 0] = floatBitsToUint(s.start.x);
|
|
||||||
segment[ix + 1] = floatBitsToUint(s.start.y);
|
|
||||||
segment[ix + 2] = floatBitsToUint(s.end.x);
|
|
||||||
segment[ix + 3] = floatBitsToUint(s.end.y);
|
|
||||||
}
|
|
||||||
|
|
||||||
SegChunk SegChunk_read(SegChunkRef ref) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
uint raw0 = segment[ix + 0];
|
|
||||||
uint raw1 = segment[ix + 1];
|
|
||||||
SegChunk s;
|
|
||||||
s.n = raw0;
|
|
||||||
s.next = SegChunkRef(raw1);
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
void SegChunk_write(SegChunkRef ref, SegChunk s) {
|
|
||||||
uint ix = ref.offset >> 2;
|
|
||||||
segment[ix + 0] = s.n;
|
|
||||||
segment[ix + 1] = s.next.offset;
|
|
||||||
}
|
|
||||||
|
|
|
@ -209,16 +209,11 @@ impl<D: Device> Renderer<D> {
|
||||||
&[],
|
&[],
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
// These will probably be combined with the ptcl buf, as they're all written by the
|
|
||||||
// same kernel now.
|
|
||||||
let segment_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
|
||||||
let fill_seg_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
|
||||||
|
|
||||||
let k4_code = include_bytes!("../shader/kernel4.spv");
|
let k4_code = include_bytes!("../shader/kernel4.spv");
|
||||||
let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 3, 1)?;
|
let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 1, 1)?;
|
||||||
let k4_ds = device.create_descriptor_set(
|
let k4_ds = device.create_descriptor_set(
|
||||||
&k4_pipeline,
|
&k4_pipeline,
|
||||||
&[&ptcl_buf, &segment_buf, &fill_seg_buf],
|
&[&ptcl_buf],
|
||||||
&[&image_dev],
|
&[&image_dev],
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue