mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Delete old-style kernels and buffers
Pave the way for the coarse raster pass to write to the ptcl buffer.
This commit is contained in:
parent
3a6428238b
commit
1240da3870
|
@ -1,37 +0,0 @@
|
|||
use piet_gpu_derive::piet_gpu;
|
||||
|
||||
// Structures representing segments for fill items.
|
||||
|
||||
// There is some cut'n'paste here from stroke segments, which can be
|
||||
// traced to the fact that buffers in GLSL are basically global.
|
||||
// Maybe there's a way to address that, but in the meantime living
|
||||
// with the duplication is easiest.
|
||||
|
||||
piet_gpu! {
|
||||
#[gpu_write]
|
||||
mod fill_seg {
|
||||
struct FillTileHeader {
|
||||
n: u32,
|
||||
items: Ref<FillItemHeader>,
|
||||
}
|
||||
|
||||
struct FillItemHeader {
|
||||
backdrop: i32,
|
||||
segments: Ref<FillSegChunk>,
|
||||
}
|
||||
|
||||
// TODO: strongly consider using f16. If so, these would be
|
||||
// relative to the tile. We're doing f32 for now to minimize
|
||||
// divergence from piet-metal originals.
|
||||
struct FillSegment {
|
||||
start: [f32; 2],
|
||||
end: [f32; 2],
|
||||
}
|
||||
|
||||
struct FillSegChunk {
|
||||
n: u32,
|
||||
next: Ref<FillSegChunk>,
|
||||
// Segments follow (could represent this as a variable sized array).
|
||||
}
|
||||
}
|
||||
}
|
|
@ -3,10 +3,8 @@
|
|||
pub mod annotated;
|
||||
pub mod bins;
|
||||
pub mod encoder;
|
||||
pub mod fill_seg;
|
||||
pub mod ptcl;
|
||||
pub mod scene;
|
||||
pub mod segment;
|
||||
pub mod state;
|
||||
pub mod test;
|
||||
pub mod tilegroup;
|
||||
|
|
|
@ -9,8 +9,6 @@ fn main() {
|
|||
"annotated" => print!("{}", piet_gpu_types::annotated::gen_gpu_annotated()),
|
||||
"bins" => print!("{}", piet_gpu_types::bins::gen_gpu_bins()),
|
||||
"tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()),
|
||||
"segment" => print!("{}", piet_gpu_types::segment::gen_gpu_segment()),
|
||||
"fill_seg" => print!("{}", piet_gpu_types::fill_seg::gen_gpu_fill_seg()),
|
||||
"ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()),
|
||||
"test" => print!("{}", piet_gpu_types::test::gen_gpu_test()),
|
||||
_ => println!("Oops, unknown module name"),
|
||||
|
|
|
@ -13,13 +13,13 @@ piet_gpu! {
|
|||
end: [f32; 2],
|
||||
}
|
||||
struct CmdStroke {
|
||||
// Should be Ref<SegChunk> if we had cross-module references.
|
||||
// Should be Ref<SegChunk>
|
||||
seg_ref: u32,
|
||||
half_width: f32,
|
||||
rgba_color: u32,
|
||||
}
|
||||
struct CmdFill {
|
||||
// Should be Ref<FillSegChunk> if we had cross-module references.
|
||||
// Should be Ref<FillSegChunk>
|
||||
seg_ref: u32,
|
||||
backdrop: i32,
|
||||
rgba_color: u32,
|
||||
|
@ -51,5 +51,19 @@ piet_gpu! {
|
|||
Jump(CmdJump),
|
||||
Bail,
|
||||
}
|
||||
|
||||
// TODO: strongly consider using f16. If so, these would be
|
||||
// relative to the tile. We're doing f32 for now to minimize
|
||||
// divergence from piet-metal originals.
|
||||
struct Segment {
|
||||
start: [f32; 2],
|
||||
end: [f32; 2],
|
||||
}
|
||||
|
||||
struct SegChunk {
|
||||
n: u32,
|
||||
next: Ref<SegChunk>,
|
||||
// Segments follow (could represent this as a variable sized array).
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,32 +0,0 @@
|
|||
use piet_gpu_derive::piet_gpu;
|
||||
|
||||
// Structures representing segments for stroke/fill items.
|
||||
|
||||
piet_gpu! {
|
||||
#[gpu_write]
|
||||
mod segment {
|
||||
struct TileHeader {
|
||||
n: u32,
|
||||
items: Ref<ItemHeader>,
|
||||
}
|
||||
|
||||
// Note: this is only suitable for strokes, fills require backdrop.
|
||||
struct ItemHeader {
|
||||
segments: Ref<SegChunk>,
|
||||
}
|
||||
|
||||
// TODO: strongly consider using f16. If so, these would be
|
||||
// relative to the tile. We're doing f32 for now to minimize
|
||||
// divergence from piet-metal originals.
|
||||
struct Segment {
|
||||
start: [f32; 2],
|
||||
end: [f32; 2],
|
||||
}
|
||||
|
||||
struct SegChunk {
|
||||
n: u32,
|
||||
next: Ref<SegChunk>,
|
||||
// Segments follow (could represent this as a variable sized array).
|
||||
}
|
||||
}
|
||||
}
|
|
@ -9,19 +9,11 @@ rule glsl
|
|||
|
||||
build image.spv: glsl image.comp | scene.h
|
||||
|
||||
build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h setup.h
|
||||
|
||||
build kernel2s.spv: glsl kernel2s.comp | scene.h tilegroup.h segment.h setup.h
|
||||
|
||||
build kernel2f.spv: glsl kernel2f.comp | scene.h tilegroup.h fill_seg.h setup.h
|
||||
|
||||
build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h segment.h fill_seg.h ptcl.h setup.h
|
||||
|
||||
build kernel4.spv: glsl kernel4.comp | ptcl.h segment.h fill_seg.h setup.h
|
||||
|
||||
|
||||
build elements.spv: glsl elements.comp | scene.h state.h annotated.h
|
||||
|
||||
build binning.spv: glsl binning.comp | annotated.h bins.h setup.h
|
||||
|
||||
build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
|
||||
|
||||
build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h
|
||||
|
|
|
@ -1,130 +0,0 @@
|
|||
// Code auto-generated by piet-gpu-derive
|
||||
|
||||
struct FillTileHeaderRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct FillItemHeaderRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct FillSegmentRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct FillSegChunkRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct FillTileHeader {
|
||||
uint n;
|
||||
FillItemHeaderRef items;
|
||||
};
|
||||
|
||||
#define FillTileHeader_size 8
|
||||
|
||||
FillTileHeaderRef FillTileHeader_index(FillTileHeaderRef ref, uint index) {
|
||||
return FillTileHeaderRef(ref.offset + index * FillTileHeader_size);
|
||||
}
|
||||
|
||||
struct FillItemHeader {
|
||||
int backdrop;
|
||||
FillSegChunkRef segments;
|
||||
};
|
||||
|
||||
#define FillItemHeader_size 8
|
||||
|
||||
FillItemHeaderRef FillItemHeader_index(FillItemHeaderRef ref, uint index) {
|
||||
return FillItemHeaderRef(ref.offset + index * FillItemHeader_size);
|
||||
}
|
||||
|
||||
struct FillSegment {
|
||||
vec2 start;
|
||||
vec2 end;
|
||||
};
|
||||
|
||||
#define FillSegment_size 16
|
||||
|
||||
FillSegmentRef FillSegment_index(FillSegmentRef ref, uint index) {
|
||||
return FillSegmentRef(ref.offset + index * FillSegment_size);
|
||||
}
|
||||
|
||||
struct FillSegChunk {
|
||||
uint n;
|
||||
FillSegChunkRef next;
|
||||
};
|
||||
|
||||
#define FillSegChunk_size 8
|
||||
|
||||
FillSegChunkRef FillSegChunk_index(FillSegChunkRef ref, uint index) {
|
||||
return FillSegChunkRef(ref.offset + index * FillSegChunk_size);
|
||||
}
|
||||
|
||||
FillTileHeader FillTileHeader_read(FillTileHeaderRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = fill_seg[ix + 0];
|
||||
uint raw1 = fill_seg[ix + 1];
|
||||
FillTileHeader s;
|
||||
s.n = raw0;
|
||||
s.items = FillItemHeaderRef(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void FillTileHeader_write(FillTileHeaderRef ref, FillTileHeader s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
fill_seg[ix + 0] = s.n;
|
||||
fill_seg[ix + 1] = s.items.offset;
|
||||
}
|
||||
|
||||
FillItemHeader FillItemHeader_read(FillItemHeaderRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = fill_seg[ix + 0];
|
||||
uint raw1 = fill_seg[ix + 1];
|
||||
FillItemHeader s;
|
||||
s.backdrop = int(raw0);
|
||||
s.segments = FillSegChunkRef(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void FillItemHeader_write(FillItemHeaderRef ref, FillItemHeader s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
fill_seg[ix + 0] = uint(s.backdrop);
|
||||
fill_seg[ix + 1] = s.segments.offset;
|
||||
}
|
||||
|
||||
FillSegment FillSegment_read(FillSegmentRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = fill_seg[ix + 0];
|
||||
uint raw1 = fill_seg[ix + 1];
|
||||
uint raw2 = fill_seg[ix + 2];
|
||||
uint raw3 = fill_seg[ix + 3];
|
||||
FillSegment s;
|
||||
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
return s;
|
||||
}
|
||||
|
||||
void FillSegment_write(FillSegmentRef ref, FillSegment s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
fill_seg[ix + 0] = floatBitsToUint(s.start.x);
|
||||
fill_seg[ix + 1] = floatBitsToUint(s.start.y);
|
||||
fill_seg[ix + 2] = floatBitsToUint(s.end.x);
|
||||
fill_seg[ix + 3] = floatBitsToUint(s.end.y);
|
||||
}
|
||||
|
||||
FillSegChunk FillSegChunk_read(FillSegChunkRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = fill_seg[ix + 0];
|
||||
uint raw1 = fill_seg[ix + 1];
|
||||
FillSegChunk s;
|
||||
s.n = raw0;
|
||||
s.next = FillSegChunkRef(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void FillSegChunk_write(FillSegChunkRef ref, FillSegChunk s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
fill_seg[ix + 0] = s.n;
|
||||
fill_seg[ix + 1] = s.next.offset;
|
||||
}
|
||||
|
|
@ -1,161 +0,0 @@
|
|||
// This is "kernel 1" in a 4-kernel pipeline. It traverses the scene graph
|
||||
// and outputs "instances" (references to item + translation) for each item
|
||||
// that intersects the tilegroup.
|
||||
//
|
||||
// This implementation is simplistic and leaves a lot of performance on the
|
||||
// table. A fancier implementation would use threadgroup shared memory or
|
||||
// subgroups (or possibly both) to parallelize the reading of the input and
|
||||
// the computation of tilegroup intersection.
|
||||
//
|
||||
// In addition, there are some features currently missing, such as support
|
||||
// for clipping.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
// It's possible we should lay this out with x and do our own math.
|
||||
layout(local_size_x = 1, local_size_y = 32) in;
|
||||
|
||||
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
||||
uint[] scene;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 1) buffer TilegroupBuf {
|
||||
uint[] tilegroup;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 2) buffer AllocBuf {
|
||||
uint alloc;
|
||||
};
|
||||
|
||||
#include "scene.h"
|
||||
#include "tilegroup.h"
|
||||
|
||||
#include "setup.h"
|
||||
|
||||
#define MAX_STACK 8
|
||||
|
||||
struct StackElement {
|
||||
PietItemRef group;
|
||||
uint index;
|
||||
vec2 offset;
|
||||
};
|
||||
|
||||
void main() {
|
||||
StackElement stack[MAX_STACK];
|
||||
uint stack_ix = 0;
|
||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x;
|
||||
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE);
|
||||
uint tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size;
|
||||
|
||||
// State for stroke references.
|
||||
TileGroupRef stroke_start = TileGroupRef(tg_ref.offset + TILEGROUP_STROKE_START);
|
||||
ChunkRef stroke_chunk_start = ChunkRef(stroke_start.offset + 4);
|
||||
InstanceRef stroke_ref = InstanceRef(stroke_chunk_start.offset + Chunk_size);
|
||||
uint stroke_limit = stroke_start.offset + TILEGROUP_INITIAL_STROKE_ALLOC - Instance_size;
|
||||
uint stroke_chunk_n = 0;
|
||||
uint stroke_n = 0;
|
||||
|
||||
// State for fill references. All this is a bit cut'n'paste, but making a
|
||||
// proper abstraction isn't easy.
|
||||
TileGroupRef fill_start = TileGroupRef(tg_ref.offset + TILEGROUP_FILL_START);
|
||||
ChunkRef fill_chunk_start = ChunkRef(fill_start.offset + 4);
|
||||
InstanceRef fill_ref = InstanceRef(fill_chunk_start.offset + Chunk_size);
|
||||
uint fill_limit = fill_start.offset + TILEGROUP_INITIAL_FILL_ALLOC - Instance_size;
|
||||
uint fill_chunk_n = 0;
|
||||
uint fill_n = 0;
|
||||
|
||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX);
|
||||
PietItemRef root = PietItemRef(0);
|
||||
SimpleGroup group = PietItem_Group_read(root);
|
||||
StackElement tos = StackElement(root, 0, group.offset.xy);
|
||||
|
||||
while (true) {
|
||||
if (tos.index < group.n_items) {
|
||||
Bbox bbox = Bbox_read(Bbox_index(group.bboxes, tos.index));
|
||||
vec4 bb = vec4(bbox.bbox) + tos.offset.xyxy;
|
||||
bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH_PX))
|
||||
&& max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT_PX));
|
||||
bool is_group = false;
|
||||
uint tag;
|
||||
if (hit) {
|
||||
PietItemRef item_ref = PietItem_index(group.items, tos.index);
|
||||
tag = PietItem_tag(item_ref);
|
||||
is_group = tag == PietItem_Group;
|
||||
}
|
||||
if (hit && !is_group) {
|
||||
PietItemRef item_ref = PietItem_index(group.items, tos.index);
|
||||
Instance ins = Instance(item_ref.offset, tos.offset);
|
||||
if (tg_ref.offset > tg_limit) {
|
||||
// Allocation exceeded; do atomic bump alloc.
|
||||
uint new_tg = atomicAdd(alloc, TILEGROUP_INITIAL_ALLOC);
|
||||
Jump jump = Jump(TileGroupRef(new_tg));
|
||||
TileGroup_Jump_write(tg_ref, jump);
|
||||
tg_ref = TileGroupRef(new_tg);
|
||||
tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size;
|
||||
}
|
||||
TileGroup_Instance_write(tg_ref, ins);
|
||||
tg_ref.offset += TileGroup_size;
|
||||
if (tag == PietItem_Poly) {
|
||||
if (stroke_ref.offset > stroke_limit) {
|
||||
uint new_stroke = atomicAdd(alloc, TILEGROUP_STROKE_ALLOC);
|
||||
Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(new_stroke)));
|
||||
stroke_chunk_start = ChunkRef(new_stroke);
|
||||
stroke_ref = InstanceRef(new_stroke + Chunk_size);
|
||||
stroke_n += stroke_chunk_n;
|
||||
stroke_chunk_n = 0;
|
||||
stroke_limit = new_stroke + TILEGROUP_STROKE_ALLOC - Instance_size;
|
||||
}
|
||||
Instance_write(stroke_ref, ins);
|
||||
stroke_chunk_n++;
|
||||
stroke_ref.offset += Instance_size;
|
||||
} else if (tag == PietItem_Fill) {
|
||||
if (fill_ref.offset > fill_limit) {
|
||||
uint new_fill = atomicAdd(alloc, TILEGROUP_FILL_ALLOC);
|
||||
Chunk_write(fill_chunk_start, Chunk(fill_chunk_n, ChunkRef(new_fill)));
|
||||
fill_chunk_start = ChunkRef(new_fill);
|
||||
fill_ref = InstanceRef(new_fill + Chunk_size);
|
||||
fill_n += fill_chunk_n;
|
||||
fill_chunk_n = 0;
|
||||
fill_limit = new_fill + TILEGROUP_FILL_ALLOC - Instance_size;
|
||||
}
|
||||
Instance_write(fill_ref, ins);
|
||||
fill_chunk_n++;
|
||||
fill_ref.offset += Instance_size;
|
||||
|
||||
}
|
||||
}
|
||||
if (is_group) {
|
||||
PietItemRef item_ref = PietItem_index(group.items, tos.index);
|
||||
tos.index++;
|
||||
if (tos.index < group.n_items) {
|
||||
stack[stack_ix++] = tos;
|
||||
}
|
||||
group = PietItem_Group_read(item_ref);
|
||||
tos = StackElement(item_ref, 0, tos.offset + group.offset.xy);
|
||||
} else {
|
||||
tos.index++;
|
||||
}
|
||||
} else {
|
||||
// processed all items in this group; pop the stack
|
||||
if (stack_ix == 0) {
|
||||
break;
|
||||
}
|
||||
tos = stack[--stack_ix];
|
||||
group = PietItem_Group_read(tos.group);
|
||||
}
|
||||
}
|
||||
TileGroup_End_write(tg_ref);
|
||||
|
||||
stroke_n += stroke_chunk_n;
|
||||
if (stroke_n > 0) {
|
||||
Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(0)));
|
||||
}
|
||||
tilegroup[stroke_start.offset >> 2] = stroke_n;
|
||||
|
||||
fill_n += fill_chunk_n;
|
||||
if (fill_n > 0) {
|
||||
Chunk_write(fill_chunk_start, Chunk(fill_chunk_n, ChunkRef(0)));
|
||||
}
|
||||
tilegroup[fill_start.offset >> 2] = fill_n;
|
||||
}
|
Binary file not shown.
|
@ -1,167 +0,0 @@
|
|||
// This is "kernel 2" (fill) in a 4-kernel pipeline. It processes the fill
|
||||
// (polyline) items in the scene and generates a list of segments for each, for
|
||||
// each tile.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
layout(local_size_x = 32) in;
|
||||
|
||||
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
||||
uint[] scene;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 1) buffer TilegroupBuf {
|
||||
uint[] tilegroup;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 2) buffer FillSegBuf {
|
||||
uint[] fill_seg;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 3) buffer AllocBuf {
|
||||
uint alloc;
|
||||
};
|
||||
|
||||
#include "scene.h"
|
||||
#include "tilegroup.h"
|
||||
#include "fill_seg.h"
|
||||
|
||||
#include "setup.h"
|
||||
|
||||
// Ensure that there is space to encode a segment.
|
||||
void alloc_chunk(inout uint chunk_n_segs, inout FillSegChunkRef seg_chunk_ref,
|
||||
inout FillSegChunkRef first_seg_chunk, inout uint seg_limit)
|
||||
{
|
||||
if (chunk_n_segs == 0) {
|
||||
if (seg_chunk_ref.offset + 40 > seg_limit) {
|
||||
seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
||||
seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - FillSegment_size;
|
||||
}
|
||||
first_seg_chunk = seg_chunk_ref;
|
||||
} else if (seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs > seg_limit) {
|
||||
uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
||||
seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - FillSegment_size;
|
||||
FillSegChunk_write(seg_chunk_ref, FillSegChunk(chunk_n_segs, FillSegChunkRef(new_chunk_ref)));
|
||||
seg_chunk_ref.offset = new_chunk_ref;
|
||||
chunk_n_segs = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void main() {
|
||||
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
|
||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
|
||||
+ (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
|
||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
|
||||
TileGroupRef fill_start = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE + TILEGROUP_FILL_START);
|
||||
uint fill_n = tilegroup[fill_start.offset >> 2];
|
||||
|
||||
FillTileHeaderRef tile_header_ref = FillTileHeaderRef(tile_ix * FillTileHeader_size);
|
||||
if (fill_n > 0) {
|
||||
ChunkRef chunk_ref = ChunkRef(fill_start.offset + 4);
|
||||
Chunk chunk = Chunk_read(chunk_ref);
|
||||
InstanceRef fill_ref = InstanceRef(chunk_ref.offset + Chunk_size);
|
||||
FillItemHeaderRef item_header = FillItemHeaderRef(atomicAdd(alloc, fill_n * FillItemHeader_size));
|
||||
FillTileHeader_write(tile_header_ref, FillTileHeader(fill_n, item_header));
|
||||
FillSegChunkRef seg_chunk_ref = FillSegChunkRef(0);
|
||||
uint seg_limit = 0;
|
||||
// Iterate through items; fill_n holds count remaining.
|
||||
while (true) {
|
||||
if (chunk.chunk_n == 0) {
|
||||
chunk_ref = chunk.next;
|
||||
if (chunk_ref.offset == 0) {
|
||||
break;
|
||||
}
|
||||
chunk = Chunk_read(chunk_ref);
|
||||
fill_ref = InstanceRef(chunk_ref.offset + Chunk_size);
|
||||
}
|
||||
Instance ins = Instance_read(fill_ref);
|
||||
PietFill fill = PietItem_Fill_read(PietItemRef(ins.item_ref));
|
||||
|
||||
// Process the fill polyline item.
|
||||
uint max_n_segs = fill.n_points - 1;
|
||||
uint chunk_n_segs = 0;
|
||||
int backdrop = 0;
|
||||
FillSegChunkRef seg_chunk_ref;
|
||||
FillSegChunkRef first_seg_chunk = FillSegChunkRef(0);
|
||||
vec2 start = Point_read(fill.points).xy;
|
||||
for (uint j = 0; j < max_n_segs; j++) {
|
||||
fill.points.offset += Point_size;
|
||||
vec2 end = Point_read(fill.points).xy;
|
||||
|
||||
// Process one segment.
|
||||
|
||||
// TODO: I think this would go more smoothly (and be easier to
|
||||
// make numerically robust) if it were based on clipping the line
|
||||
// to the tile box. See:
|
||||
// https://tavianator.com/fast-branchless-raybounding-box-intersections/
|
||||
vec2 xymin = min(start, end);
|
||||
vec2 xymax = max(start, end);
|
||||
float a = end.y - start.y;
|
||||
float b = start.x - end.x;
|
||||
float c = -(a * start.x + b * start.y);
|
||||
vec2 xy1 = xy0 + vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
|
||||
float ytop = max(xy0.y, xymin.y);
|
||||
float ybot = min(xy1.y, xymax.y);
|
||||
float s00 = sign(b * ytop + a * xy0.x + c);
|
||||
float s01 = sign(b * ytop + a * xy1.x + c);
|
||||
float s10 = sign(b * ybot + a * xy0.x + c);
|
||||
float s11 = sign(b * ybot + a * xy1.x + c);
|
||||
float sTopLeft = sign(b * xy0.y + a * xy0.x + c);
|
||||
if (sTopLeft == sign(a) && xymin.y <= xy0.y && xymax.y > xy0.y) {
|
||||
backdrop -= int(s00);
|
||||
}
|
||||
|
||||
// This is adapted from piet-metal but could be improved.
|
||||
|
||||
if (max(xymin.x, xy0.x) < min(xymax.x, xy1.x)
|
||||
&& ytop < ybot
|
||||
&& s00 * s01 + s00 * s10 + s00 * s11 < 3.0)
|
||||
{
|
||||
// avoid overwriting `end` so that it can be used as start
|
||||
vec2 enc_end = end;
|
||||
if (xymin.x < xy0.x) {
|
||||
float yEdge = mix(start.y, end.y, (start.x - xy0.x) / b);
|
||||
if (yEdge >= xy0.y && yEdge < xy1.y) {
|
||||
// This is encoded the same as a general fill segment, but could be
|
||||
// special-cased, either here or in rendering. (It was special-cased
|
||||
// in piet-metal).
|
||||
FillSegment edge_seg;
|
||||
if (b > 0.0) {
|
||||
enc_end = vec2(xy0.x, yEdge);
|
||||
edge_seg.start = enc_end;
|
||||
edge_seg.end = vec2(xy0.x, xy1.y);
|
||||
} else {
|
||||
start = vec2(xy0.x, yEdge);
|
||||
edge_seg.start = vec2(xy0.x, xy1.y);
|
||||
edge_seg.end = start;
|
||||
}
|
||||
alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit);
|
||||
FillSegment_write(FillSegmentRef(seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs), edge_seg);
|
||||
chunk_n_segs++;
|
||||
}
|
||||
}
|
||||
alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit);
|
||||
FillSegment seg = FillSegment(start, enc_end);
|
||||
FillSegment_write(FillSegmentRef(seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs), seg);
|
||||
chunk_n_segs++;
|
||||
}
|
||||
|
||||
start = end;
|
||||
}
|
||||
FillItemHeader_write(item_header, FillItemHeader(backdrop, first_seg_chunk));
|
||||
if (chunk_n_segs != 0) {
|
||||
FillSegChunk_write(seg_chunk_ref, FillSegChunk(chunk_n_segs, FillSegChunkRef(0)));
|
||||
seg_chunk_ref.offset += FillSegChunk_size + FillSegment_size * chunk_n_segs;
|
||||
}
|
||||
|
||||
fill_ref.offset += Instance_size;
|
||||
chunk.chunk_n--;
|
||||
item_header.offset += FillItemHeader_size;
|
||||
}
|
||||
} else {
|
||||
// As an optimization, we could just write 0 for the size.
|
||||
FillTileHeader_write(tile_header_ref, FillTileHeader(fill_n, FillItemHeaderRef(0)));
|
||||
}
|
||||
}
|
Binary file not shown.
|
@ -1,137 +0,0 @@
|
|||
// This is "kernel 2" (strokes) in a 4-kernel pipeline. It processes the stroke
|
||||
// (polyline) items in the scene and generates a list of segments for each, for
|
||||
// each tile.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
layout(local_size_x = 32) in;
|
||||
|
||||
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
||||
uint[] scene;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 1) buffer TilegroupBuf {
|
||||
uint[] tilegroup;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 2) buffer SegmentBuf {
|
||||
uint[] segment;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 3) buffer AllocBuf {
|
||||
uint alloc;
|
||||
};
|
||||
|
||||
#include "scene.h"
|
||||
#include "tilegroup.h"
|
||||
#include "segment.h"
|
||||
|
||||
#include "setup.h"
|
||||
|
||||
void main() {
|
||||
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
|
||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
|
||||
+ (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
|
||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
|
||||
TileGroupRef stroke_start = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE + TILEGROUP_STROKE_START);
|
||||
uint stroke_n = tilegroup[stroke_start.offset >> 2];
|
||||
|
||||
TileHeaderRef tile_header_ref = TileHeaderRef(tile_ix * TileHeader_size);
|
||||
if (stroke_n > 0) {
|
||||
ChunkRef chunk_ref = ChunkRef(stroke_start.offset + 4);
|
||||
Chunk chunk = Chunk_read(chunk_ref);
|
||||
InstanceRef stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
|
||||
ItemHeaderRef item_header = ItemHeaderRef(atomicAdd(alloc, stroke_n * ItemHeader_size));
|
||||
TileHeader_write(tile_header_ref, TileHeader(stroke_n, item_header));
|
||||
SegChunkRef seg_chunk_ref = SegChunkRef(0);
|
||||
uint seg_limit = 0;
|
||||
// Iterate through items; stroke_n holds count remaining.
|
||||
while (true) {
|
||||
if (chunk.chunk_n == 0) {
|
||||
chunk_ref = chunk.next;
|
||||
if (chunk_ref.offset == 0) {
|
||||
break;
|
||||
}
|
||||
chunk = Chunk_read(chunk_ref);
|
||||
stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
|
||||
}
|
||||
Instance ins = Instance_read(stroke_ref);
|
||||
PietStrokePolyLine poly = PietItem_Poly_read(PietItemRef(ins.item_ref));
|
||||
|
||||
// Process the stroke polyline item.
|
||||
uint max_n_segs = poly.n_points - 1;
|
||||
uint chunk_n_segs = 0;
|
||||
SegChunkRef seg_chunk_ref;
|
||||
vec2 start = Point_read(poly.points).xy;
|
||||
for (uint j = 0; j < max_n_segs; j++) {
|
||||
poly.points.offset += Point_size;
|
||||
vec2 end = Point_read(poly.points).xy;
|
||||
|
||||
// Process one segment.
|
||||
|
||||
// This logic just tests for collision. What we probably want to do
|
||||
// is a clipping algorithm like Liang-Barsky, and then store coords
|
||||
// relative to the tile in f16. See also:
|
||||
// https://tavianator.com/fast-branchless-raybounding-box-intersections/
|
||||
|
||||
// Also note that when we go to the fancy version, we want to compute
|
||||
// the (horizontal projection of) the bounding box of the intersection
|
||||
// once per tilegroup, so we can assign work to individual tiles.
|
||||
|
||||
float a = end.y - start.y;
|
||||
float b = start.x - end.x;
|
||||
float c = -(a * start.x + b * start.y);
|
||||
float half_width = 0.5 * poly.width;
|
||||
// Tile boundaries padded by half-width.
|
||||
float xmin = xy0.x - half_width;
|
||||
float ymin = xy0.y - half_width;
|
||||
float xmax = xy0.x + float(TILE_WIDTH_PX) + half_width;
|
||||
float ymax = xy0.y + float(TILE_HEIGHT_PX) + half_width;
|
||||
float s00 = sign(b * ymin + a * xmin + c);
|
||||
float s01 = sign(b * ymin + a * xmax + c);
|
||||
float s10 = sign(b * ymax + a * xmin + c);
|
||||
float s11 = sign(b * ymax + a * xmax + c);
|
||||
// If bounding boxes intersect and not all four corners are on the same side, hit.
|
||||
// Also note: this is designed to be false on NAN input.
|
||||
if (max(min(start.x, end.x), xmin) < min(max(start.x, end.x), xmax)
|
||||
&& max(min(start.y, end.y), ymin) < min(max(start.y, end.y), ymax)
|
||||
&& s00 * s01 + s00 * s10 + s00 * s11 < 3.0)
|
||||
{
|
||||
// Allocate a chunk if needed.
|
||||
if (chunk_n_segs == 0) {
|
||||
if (seg_chunk_ref.offset + 40 > seg_limit) {
|
||||
seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
||||
seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - Segment_size;
|
||||
}
|
||||
ItemHeader_write(item_header, ItemHeader(seg_chunk_ref));
|
||||
} else if (seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs > seg_limit) {
|
||||
uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
||||
seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - Segment_size;
|
||||
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(new_chunk_ref)));
|
||||
seg_chunk_ref.offset = new_chunk_ref;
|
||||
chunk_n_segs = 0;
|
||||
}
|
||||
Segment seg = Segment(start, end);
|
||||
Segment_write(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs), seg);
|
||||
chunk_n_segs++;
|
||||
}
|
||||
|
||||
start = end;
|
||||
}
|
||||
if (chunk_n_segs == 0) {
|
||||
ItemHeader_write(item_header, ItemHeader(SegChunkRef(0)));
|
||||
} else {
|
||||
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(0)));
|
||||
seg_chunk_ref.offset += SegChunk_size + Segment_size * chunk_n_segs;
|
||||
}
|
||||
|
||||
stroke_ref.offset += Instance_size;
|
||||
chunk.chunk_n--;
|
||||
item_header.offset += ItemHeader_size;
|
||||
}
|
||||
} else {
|
||||
// As an optimization, we could just write 0 for the size.
|
||||
TileHeader_write(tile_header_ref, TileHeader(stroke_n, ItemHeaderRef(0)));
|
||||
}
|
||||
}
|
Binary file not shown.
|
@ -1,135 +0,0 @@
|
|||
// This is "kernel 3" in a 4-kernel pipeline. It walks the active items
|
||||
// for the tilegroup and produces a per-tile command list for each tile.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
layout(local_size_x = 32, local_size_y = 1) in;
|
||||
|
||||
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
||||
uint[] scene;
|
||||
};
|
||||
|
||||
// TODO: this should have a `readonly` qualifier, but then inclusion
|
||||
// of ptcl.h would fail because of the writers.
|
||||
layout(set = 0, binding = 1) buffer TilegroupBuf {
|
||||
uint[] tilegroup;
|
||||
};
|
||||
|
||||
// Used readonly
|
||||
layout(set = 0, binding = 2) buffer SegmentBuf {
|
||||
uint[] segment;
|
||||
};
|
||||
|
||||
// Used readonly
|
||||
layout(set = 0, binding = 3) buffer FillSegmentBuf {
|
||||
uint[] fill_seg;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 4) buffer PtclBuf {
|
||||
uint[] ptcl;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 5) buffer AllocBuf {
|
||||
uint alloc;
|
||||
};
|
||||
|
||||
#include "scene.h"
|
||||
#include "tilegroup.h"
|
||||
#include "segment.h"
|
||||
#include "fill_seg.h"
|
||||
#include "ptcl.h"
|
||||
|
||||
#include "setup.h"
|
||||
|
||||
void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) {
|
||||
if (cmd_ref.offset > cmd_limit) {
|
||||
uint new_cmd = atomicAdd(alloc, PTCL_INITIAL_ALLOC);
|
||||
CmdJump jump = CmdJump(new_cmd);
|
||||
Cmd_Jump_write(cmd_ref, jump);
|
||||
cmd_ref = CmdRef(new_cmd);
|
||||
cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
||||
}
|
||||
}
|
||||
|
||||
void main() {
|
||||
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
|
||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
|
||||
+ (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
|
||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
|
||||
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE);
|
||||
CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC);
|
||||
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
||||
|
||||
TileHeader stroke_th = TileHeader_read(TileHeaderRef(tile_ix * TileHeader_size));
|
||||
FillTileHeader fill_th = FillTileHeader_read(FillTileHeaderRef(tile_ix * FillTileHeader_size));
|
||||
|
||||
while (true) {
|
||||
uint tg_tag = TileGroup_tag(tg_ref);
|
||||
if (tg_tag == TileGroup_End) {
|
||||
break;
|
||||
}
|
||||
if (tg_tag == TileGroup_Jump) {
|
||||
tg_ref = TileGroup_Jump_read(tg_ref).new_ref;
|
||||
continue;
|
||||
}
|
||||
// Assume tg_tag is `Instance`, though there will be more cases.
|
||||
Instance ins = TileGroup_Instance_read(tg_ref);
|
||||
PietItemRef item_ref = PietItemRef(ins.item_ref);
|
||||
uint item_tag = PietItem_tag(item_ref);
|
||||
switch (item_tag) {
|
||||
case PietItem_Circle:
|
||||
PietCircle circle = PietItem_Circle_read(item_ref);
|
||||
vec2 center = ins.offset + circle.center.xy;
|
||||
float r = circle.radius;
|
||||
if (max(center.x - r, xy0.x) < min(center.x + r, xy0.x + float(TILE_WIDTH_PX))
|
||||
&& max(center.y - r, xy0.y) < min(center.y + r, xy0.y + float(TILE_HEIGHT_PX)))
|
||||
{
|
||||
CmdCircle cmd = CmdCircle(center, r, circle.rgba_color);
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Circle_write(cmd_ref, cmd);
|
||||
cmd_ref.offset += Cmd_size;
|
||||
}
|
||||
break;
|
||||
case PietItem_Poly:
|
||||
ItemHeader stroke_item = ItemHeader_read(stroke_th.items);
|
||||
stroke_th.items.offset += ItemHeader_size;
|
||||
if (stroke_item.segments.offset != 0) {
|
||||
PietStrokePolyLine poly = PietItem_Poly_read(item_ref);
|
||||
CmdStroke cmd = CmdStroke(
|
||||
stroke_item.segments.offset,
|
||||
0.5 * poly.width,
|
||||
poly.rgba_color
|
||||
);
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Stroke_write(cmd_ref, cmd);
|
||||
cmd_ref.offset += Cmd_size;
|
||||
}
|
||||
break;
|
||||
case PietItem_Fill:
|
||||
FillItemHeader fill_item = FillItemHeader_read(fill_th.items);
|
||||
fill_th.items.offset += FillItemHeader_size;
|
||||
// TODO: handle segments == 0 but backdrop != specially, it's a solid tile.
|
||||
if (fill_item.segments.offset != 0) {
|
||||
PietFill fill = PietItem_Fill_read(item_ref);
|
||||
CmdFill cmd = CmdFill(
|
||||
fill_item.segments.offset,
|
||||
fill_item.backdrop,
|
||||
fill.rgba_color
|
||||
);
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Fill_write(cmd_ref, cmd);
|
||||
cmd_ref.offset += Cmd_size;
|
||||
} else if (fill_item.backdrop != 0) {
|
||||
// TODO: truncate existing cmd list if alpha is opaque
|
||||
PietFill fill = PietItem_Fill_read(item_ref);
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
|
||||
cmd_ref.offset += Cmd_size;
|
||||
}
|
||||
break;
|
||||
}
|
||||
tg_ref.offset += TileGroup_size;
|
||||
}
|
||||
Cmd_End_write(cmd_ref);
|
||||
}
|
Binary file not shown.
|
@ -9,26 +9,14 @@
|
|||
|
||||
layout(local_size_x = 16, local_size_y = 16) in;
|
||||
|
||||
// Same concern that this should be readonly as in kernel 3.
|
||||
// This should be annotated readonly but infra doesn't support that yet.
|
||||
layout(set = 0, binding = 0) buffer PtclBuf {
|
||||
uint[] ptcl;
|
||||
};
|
||||
|
||||
// Used readonly
|
||||
layout(set = 0, binding = 1) buffer SegmentBuf {
|
||||
uint[] segment;
|
||||
};
|
||||
|
||||
// Used readonly
|
||||
layout(set = 0, binding = 2) buffer FillSegBuf {
|
||||
uint[] fill_seg;
|
||||
};
|
||||
|
||||
layout(rgba8, set = 0, binding = 3) uniform writeonly image2D image;
|
||||
layout(rgba8, set = 0, binding = 1) uniform writeonly image2D image;
|
||||
|
||||
#include "ptcl.h"
|
||||
#include "segment.h"
|
||||
#include "fill_seg.h"
|
||||
|
||||
#include "setup.h"
|
||||
|
||||
|
@ -79,11 +67,11 @@ void main() {
|
|||
CmdFill fill = Cmd_Fill_read(cmd_ref);
|
||||
// Probably better to store as float, but conversion is no doubt cheap.
|
||||
float area = float(fill.backdrop);
|
||||
FillSegChunkRef fill_seg_chunk_ref = FillSegChunkRef(fill.seg_ref);
|
||||
SegChunkRef fill_seg_chunk_ref = SegChunkRef(fill.seg_ref);
|
||||
do {
|
||||
FillSegChunk seg_chunk = FillSegChunk_read(fill_seg_chunk_ref);
|
||||
SegChunk seg_chunk = SegChunk_read(fill_seg_chunk_ref);
|
||||
for (int i = 0; i < seg_chunk.n; i++) {
|
||||
FillSegment seg = FillSegment_read(FillSegmentRef(fill_seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * i));
|
||||
Segment seg = Segment_read(SegmentRef(fill_seg_chunk_ref.offset + SegChunk_size + Segment_size * i));
|
||||
vec2 start = seg.start - xy;
|
||||
vec2 end = seg.end - xy;
|
||||
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
|
||||
|
|
Binary file not shown.
|
@ -36,6 +36,14 @@ struct CmdRef {
|
|||
uint offset;
|
||||
};
|
||||
|
||||
struct SegmentRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct SegChunkRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdCircle {
|
||||
vec2 center;
|
||||
float radius;
|
||||
|
@ -141,6 +149,28 @@ CmdRef Cmd_index(CmdRef ref, uint index) {
|
|||
return CmdRef(ref.offset + index * Cmd_size);
|
||||
}
|
||||
|
||||
struct Segment {
|
||||
vec2 start;
|
||||
vec2 end;
|
||||
};
|
||||
|
||||
#define Segment_size 16
|
||||
|
||||
SegmentRef Segment_index(SegmentRef ref, uint index) {
|
||||
return SegmentRef(ref.offset + index * Segment_size);
|
||||
}
|
||||
|
||||
struct SegChunk {
|
||||
uint n;
|
||||
SegChunkRef next;
|
||||
};
|
||||
|
||||
#define SegChunk_size 8
|
||||
|
||||
SegChunkRef SegChunk_index(SegChunkRef ref, uint index) {
|
||||
return SegChunkRef(ref.offset + index * SegChunk_size);
|
||||
}
|
||||
|
||||
CmdCircle CmdCircle_read(CmdCircleRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = ptcl[ix + 0];
|
||||
|
@ -362,3 +392,39 @@ void Cmd_Bail_write(CmdRef ref) {
|
|||
ptcl[ref.offset >> 2] = Cmd_Bail;
|
||||
}
|
||||
|
||||
Segment Segment_read(SegmentRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = ptcl[ix + 0];
|
||||
uint raw1 = ptcl[ix + 1];
|
||||
uint raw2 = ptcl[ix + 2];
|
||||
uint raw3 = ptcl[ix + 3];
|
||||
Segment s;
|
||||
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
return s;
|
||||
}
|
||||
|
||||
void Segment_write(SegmentRef ref, Segment s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = floatBitsToUint(s.start.x);
|
||||
ptcl[ix + 1] = floatBitsToUint(s.start.y);
|
||||
ptcl[ix + 2] = floatBitsToUint(s.end.x);
|
||||
ptcl[ix + 3] = floatBitsToUint(s.end.y);
|
||||
}
|
||||
|
||||
SegChunk SegChunk_read(SegChunkRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = ptcl[ix + 0];
|
||||
uint raw1 = ptcl[ix + 1];
|
||||
SegChunk s;
|
||||
s.n = raw0;
|
||||
s.next = SegChunkRef(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void SegChunk_write(SegChunkRef ref, SegChunk s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = s.n;
|
||||
ptcl[ix + 1] = s.next.offset;
|
||||
}
|
||||
|
||||
|
|
|
@ -1,126 +0,0 @@
|
|||
// Code auto-generated by piet-gpu-derive
|
||||
|
||||
struct TileHeaderRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct ItemHeaderRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct SegmentRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct SegChunkRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct TileHeader {
|
||||
uint n;
|
||||
ItemHeaderRef items;
|
||||
};
|
||||
|
||||
#define TileHeader_size 8
|
||||
|
||||
TileHeaderRef TileHeader_index(TileHeaderRef ref, uint index) {
|
||||
return TileHeaderRef(ref.offset + index * TileHeader_size);
|
||||
}
|
||||
|
||||
struct ItemHeader {
|
||||
SegChunkRef segments;
|
||||
};
|
||||
|
||||
#define ItemHeader_size 4
|
||||
|
||||
ItemHeaderRef ItemHeader_index(ItemHeaderRef ref, uint index) {
|
||||
return ItemHeaderRef(ref.offset + index * ItemHeader_size);
|
||||
}
|
||||
|
||||
struct Segment {
|
||||
vec2 start;
|
||||
vec2 end;
|
||||
};
|
||||
|
||||
#define Segment_size 16
|
||||
|
||||
SegmentRef Segment_index(SegmentRef ref, uint index) {
|
||||
return SegmentRef(ref.offset + index * Segment_size);
|
||||
}
|
||||
|
||||
struct SegChunk {
|
||||
uint n;
|
||||
SegChunkRef next;
|
||||
};
|
||||
|
||||
#define SegChunk_size 8
|
||||
|
||||
SegChunkRef SegChunk_index(SegChunkRef ref, uint index) {
|
||||
return SegChunkRef(ref.offset + index * SegChunk_size);
|
||||
}
|
||||
|
||||
TileHeader TileHeader_read(TileHeaderRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = segment[ix + 0];
|
||||
uint raw1 = segment[ix + 1];
|
||||
TileHeader s;
|
||||
s.n = raw0;
|
||||
s.items = ItemHeaderRef(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void TileHeader_write(TileHeaderRef ref, TileHeader s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
segment[ix + 0] = s.n;
|
||||
segment[ix + 1] = s.items.offset;
|
||||
}
|
||||
|
||||
ItemHeader ItemHeader_read(ItemHeaderRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = segment[ix + 0];
|
||||
ItemHeader s;
|
||||
s.segments = SegChunkRef(raw0);
|
||||
return s;
|
||||
}
|
||||
|
||||
void ItemHeader_write(ItemHeaderRef ref, ItemHeader s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
segment[ix + 0] = s.segments.offset;
|
||||
}
|
||||
|
||||
Segment Segment_read(SegmentRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = segment[ix + 0];
|
||||
uint raw1 = segment[ix + 1];
|
||||
uint raw2 = segment[ix + 2];
|
||||
uint raw3 = segment[ix + 3];
|
||||
Segment s;
|
||||
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
return s;
|
||||
}
|
||||
|
||||
void Segment_write(SegmentRef ref, Segment s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
segment[ix + 0] = floatBitsToUint(s.start.x);
|
||||
segment[ix + 1] = floatBitsToUint(s.start.y);
|
||||
segment[ix + 2] = floatBitsToUint(s.end.x);
|
||||
segment[ix + 3] = floatBitsToUint(s.end.y);
|
||||
}
|
||||
|
||||
SegChunk SegChunk_read(SegChunkRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = segment[ix + 0];
|
||||
uint raw1 = segment[ix + 1];
|
||||
SegChunk s;
|
||||
s.n = raw0;
|
||||
s.next = SegChunkRef(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void SegChunk_write(SegChunkRef ref, SegChunk s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
segment[ix + 0] = s.n;
|
||||
segment[ix + 1] = s.next.offset;
|
||||
}
|
||||
|
|
@ -209,16 +209,11 @@ impl<D: Device> Renderer<D> {
|
|||
&[],
|
||||
)?;
|
||||
|
||||
// These will probably be combined with the ptcl buf, as they're all written by the
|
||||
// same kernel now.
|
||||
let segment_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
||||
let fill_seg_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
||||
|
||||
let k4_code = include_bytes!("../shader/kernel4.spv");
|
||||
let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 3, 1)?;
|
||||
let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 1, 1)?;
|
||||
let k4_ds = device.create_descriptor_set(
|
||||
&k4_pipeline,
|
||||
&[&ptcl_buf, &segment_buf, &fill_seg_buf],
|
||||
&[&ptcl_buf],
|
||||
&[&image_dev],
|
||||
)?;
|
||||
|
||||
|
|
Loading…
Reference in a new issue