Adds fills, and has more or less working tiger render (with artifacts).
This commit is contained in:
Raph Levien 2020-04-30 17:06:01 -07:00
parent 064ee86a45
commit aa83d782ed
22 changed files with 785 additions and 35 deletions

16
Cargo.lock generated
View file

@ -139,6 +139,7 @@ dependencies = [
"piet-gpu-types", "piet-gpu-types",
"png", "png",
"rand", "rand",
"roxmltree",
] ]
[[package]] [[package]]
@ -243,6 +244,15 @@ dependencies = [
"rand_core", "rand_core",
] ]
[[package]]
name = "roxmltree"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d5001f134077069d87f77c8b9452b690df2445f7a43f1c7ca4a1af8dd505789d"
dependencies = [
"xmlparser",
]
[[package]] [[package]]
name = "syn" name = "syn"
version = "1.0.17" version = "1.0.17"
@ -287,3 +297,9 @@ name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0" version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "xmlparser"
version = "0.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccb4240203dadf40be2de9369e5c6dec1bf427528115b030baca3334c18362d7"

View file

@ -0,0 +1,37 @@
use piet_gpu_derive::piet_gpu;
// Structures representing segments for fill items.
// There is some cut'n'paste here from stroke segments, which can be
// traced to the fact that buffers in GLSL are basically global.
// Maybe there's a way to address that, but in the meantime living
// with the duplication is easiest.
piet_gpu! {
#[gpu_write]
mod fill_seg {
struct FillTileHeader {
n: u32,
items: Ref<FillItemHeader>,
}
struct FillItemHeader {
backdrop: i32,
segments: Ref<FillSegChunk>,
}
// TODO: strongly consider using f16. If so, these would be
// relative to the tile. We're doing f32 for now to minimize
// divergence from piet-metal originals.
struct FillSegment {
start: [f32; 2],
end: [f32; 2],
}
struct FillSegChunk {
n: u32,
next: Ref<FillSegChunk>,
// Segments follow (could represent this as a variable sized array).
}
}
}

View file

@ -1,4 +1,5 @@
pub mod encoder; pub mod encoder;
pub mod fill_seg;
pub mod ptcl; pub mod ptcl;
pub mod scene; pub mod scene;
pub mod segment; pub mod segment;

View file

@ -7,6 +7,7 @@ fn main() {
"scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()), "scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()),
"tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()), "tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()),
"segment" => print!("{}", piet_gpu_types::segment::gen_gpu_segment()), "segment" => print!("{}", piet_gpu_types::segment::gen_gpu_segment()),
"fill_seg" => print!("{}", piet_gpu_types::fill_seg::gen_gpu_fill_seg()),
"ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()), "ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()),
"test" => print!("{}", piet_gpu_types::test::gen_gpu_test()), "test" => print!("{}", piet_gpu_types::test::gen_gpu_test()),
_ => println!("Oops, unknown module name"), _ => println!("Oops, unknown module name"),

View file

@ -19,8 +19,10 @@ piet_gpu! {
rgba_color: u32, rgba_color: u32,
} }
struct CmdFill { struct CmdFill {
start: [f32; 2], // Should be Ref<FillSegChunk> if we had cross-module references.
end: [f32; 2], seg_ref: u32,
backdrop: i32,
rgba_color: u32,
} }
struct CmdFillEdge { struct CmdFillEdge {
// The sign is only one bit. // The sign is only one bit.

View file

@ -17,3 +17,4 @@ kurbo = "0.5.11"
piet = "0.0.12" piet = "0.0.12"
png = "0.16.2" png = "0.16.2"
rand = "0.7.3" rand = "0.7.3"
roxmltree = "0.11"

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 62 KiB

View file

@ -13,6 +13,8 @@ build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h setup.h
build kernel2s.spv: glsl kernel2s.comp | scene.h tilegroup.h segment.h setup.h build kernel2s.spv: glsl kernel2s.comp | scene.h tilegroup.h segment.h setup.h
build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h segment.h ptcl.h setup.h build kernel2f.spv: glsl kernel2f.comp | scene.h tilegroup.h fill_seg.h setup.h
build kernel4.spv: glsl kernel4.comp | ptcl.h segment.h setup.h build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h segment.h fill_seg.h ptcl.h setup.h
build kernel4.spv: glsl kernel4.comp | ptcl.h segment.h fill_seg.h setup.h

130
piet-gpu/shader/fill_seg.h Normal file
View file

@ -0,0 +1,130 @@
// Code auto-generated by piet-gpu-derive
struct FillTileHeaderRef {
uint offset;
};
struct FillItemHeaderRef {
uint offset;
};
struct FillSegmentRef {
uint offset;
};
struct FillSegChunkRef {
uint offset;
};
struct FillTileHeader {
uint n;
FillItemHeaderRef items;
};
#define FillTileHeader_size 8
FillTileHeaderRef FillTileHeader_index(FillTileHeaderRef ref, uint index) {
return FillTileHeaderRef(ref.offset + index * FillTileHeader_size);
}
struct FillItemHeader {
int backdrop;
FillSegChunkRef segments;
};
#define FillItemHeader_size 8
FillItemHeaderRef FillItemHeader_index(FillItemHeaderRef ref, uint index) {
return FillItemHeaderRef(ref.offset + index * FillItemHeader_size);
}
struct FillSegment {
vec2 start;
vec2 end;
};
#define FillSegment_size 16
FillSegmentRef FillSegment_index(FillSegmentRef ref, uint index) {
return FillSegmentRef(ref.offset + index * FillSegment_size);
}
struct FillSegChunk {
uint n;
FillSegChunkRef next;
};
#define FillSegChunk_size 8
FillSegChunkRef FillSegChunk_index(FillSegChunkRef ref, uint index) {
return FillSegChunkRef(ref.offset + index * FillSegChunk_size);
}
FillTileHeader FillTileHeader_read(FillTileHeaderRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = fill_seg[ix + 0];
uint raw1 = fill_seg[ix + 1];
FillTileHeader s;
s.n = raw0;
s.items = FillItemHeaderRef(raw1);
return s;
}
void FillTileHeader_write(FillTileHeaderRef ref, FillTileHeader s) {
uint ix = ref.offset >> 2;
fill_seg[ix + 0] = s.n;
fill_seg[ix + 1] = s.items.offset;
}
FillItemHeader FillItemHeader_read(FillItemHeaderRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = fill_seg[ix + 0];
uint raw1 = fill_seg[ix + 1];
FillItemHeader s;
s.backdrop = int(raw0);
s.segments = FillSegChunkRef(raw1);
return s;
}
void FillItemHeader_write(FillItemHeaderRef ref, FillItemHeader s) {
uint ix = ref.offset >> 2;
fill_seg[ix + 0] = uint(s.backdrop);
fill_seg[ix + 1] = s.segments.offset;
}
FillSegment FillSegment_read(FillSegmentRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = fill_seg[ix + 0];
uint raw1 = fill_seg[ix + 1];
uint raw2 = fill_seg[ix + 2];
uint raw3 = fill_seg[ix + 3];
FillSegment s;
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
return s;
}
void FillSegment_write(FillSegmentRef ref, FillSegment s) {
uint ix = ref.offset >> 2;
fill_seg[ix + 0] = floatBitsToUint(s.start.x);
fill_seg[ix + 1] = floatBitsToUint(s.start.y);
fill_seg[ix + 2] = floatBitsToUint(s.end.x);
fill_seg[ix + 3] = floatBitsToUint(s.end.y);
}
FillSegChunk FillSegChunk_read(FillSegChunkRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = fill_seg[ix + 0];
uint raw1 = fill_seg[ix + 1];
FillSegChunk s;
s.n = raw0;
s.next = FillSegChunkRef(raw1);
return s;
}
void FillSegChunk_write(FillSegChunkRef ref, FillSegChunk s) {
uint ix = ref.offset >> 2;
fill_seg[ix + 0] = s.n;
fill_seg[ix + 1] = s.next.offset;
}

View file

@ -52,10 +52,19 @@ void main() {
TileGroupRef stroke_start = TileGroupRef(tg_ref.offset + TILEGROUP_STROKE_START); TileGroupRef stroke_start = TileGroupRef(tg_ref.offset + TILEGROUP_STROKE_START);
ChunkRef stroke_chunk_start = ChunkRef(stroke_start.offset + 4); ChunkRef stroke_chunk_start = ChunkRef(stroke_start.offset + 4);
InstanceRef stroke_ref = InstanceRef(stroke_chunk_start.offset + Chunk_size); InstanceRef stroke_ref = InstanceRef(stroke_chunk_start.offset + Chunk_size);
uint stroke_limit = stroke_start.offset + TILEGROUP_INITIAL_ALLOC - Instance_size; uint stroke_limit = stroke_start.offset + TILEGROUP_INITIAL_STROKE_ALLOC - Instance_size;
uint stroke_chunk_n = 0; uint stroke_chunk_n = 0;
uint stroke_n = 0; uint stroke_n = 0;
// State for fill references. All this is a bit cut'n'paste, but making a
// proper abstraction isn't easy.
TileGroupRef fill_start = TileGroupRef(tg_ref.offset + TILEGROUP_FILL_START);
ChunkRef fill_chunk_start = ChunkRef(fill_start.offset + 4);
InstanceRef fill_ref = InstanceRef(fill_chunk_start.offset + Chunk_size);
uint fill_limit = fill_start.offset + TILEGROUP_INITIAL_FILL_ALLOC - Instance_size;
uint fill_chunk_n = 0;
uint fill_n = 0;
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX); vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX);
PietItemRef root = PietItemRef(0); PietItemRef root = PietItemRef(0);
SimpleGroup group = PietItem_Group_read(root); SimpleGroup group = PietItem_Group_read(root);
@ -100,6 +109,20 @@ void main() {
Instance_write(stroke_ref, ins); Instance_write(stroke_ref, ins);
stroke_chunk_n++; stroke_chunk_n++;
stroke_ref.offset += Instance_size; stroke_ref.offset += Instance_size;
} else if (tag == PietItem_Fill) {
if (fill_ref.offset > fill_limit) {
uint new_fill = atomicAdd(alloc, TILEGROUP_FILL_ALLOC);
Chunk_write(fill_chunk_start, Chunk(fill_chunk_n, ChunkRef(new_fill)));
fill_chunk_start = ChunkRef(new_fill);
fill_ref = InstanceRef(new_fill + Chunk_size);
fill_n += fill_chunk_n;
fill_chunk_n = 0;
fill_limit = new_fill + TILEGROUP_FILL_ALLOC - Instance_size;
}
Instance_write(fill_ref, ins);
fill_chunk_n++;
fill_ref.offset += Instance_size;
} }
} }
if (is_group) { if (is_group) {
@ -129,4 +152,10 @@ void main() {
Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(0))); Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(0)));
} }
tilegroup[stroke_start.offset >> 2] = stroke_n; tilegroup[stroke_start.offset >> 2] = stroke_n;
fill_n += fill_chunk_n;
if (fill_n > 0) {
Chunk_write(fill_chunk_start, Chunk(fill_chunk_n, ChunkRef(0)));
}
tilegroup[fill_start.offset >> 2] = fill_n;
} }

Binary file not shown.

View file

@ -0,0 +1,165 @@
// This is "kernel 2" (fill) in a 4-kernel pipeline. It processes the fill
// (polyline) items in the scene and generates a list of segments for each, for
// each tile.
#version 450
#extension GL_GOOGLE_include_directive : enable
layout(local_size_x = 32) in;
layout(set = 0, binding = 0) readonly buffer SceneBuf {
uint[] scene;
};
layout(set = 0, binding = 1) buffer TilegroupBuf {
uint[] tilegroup;
};
layout(set = 0, binding = 2) buffer FillSegBuf {
uint[] fill_seg;
};
layout(set = 0, binding = 3) buffer AllocBuf {
uint alloc;
};
#include "scene.h"
#include "tilegroup.h"
#include "fill_seg.h"
#include "setup.h"
// Ensure that there is space to encode a segment.
void alloc_chunk(inout uint chunk_n_segs, inout FillSegChunkRef seg_chunk_ref,
inout FillSegChunkRef first_seg_chunk, inout uint seg_limit)
{
if (chunk_n_segs == 0) {
if (seg_chunk_ref.offset + 40 > seg_limit) {
seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC);
seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - FillSegment_size;
}
first_seg_chunk = seg_chunk_ref;
} else if (seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs > seg_limit) {
uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC);
seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - FillSegment_size;
FillSegChunk_write(seg_chunk_ref, FillSegChunk(chunk_n_segs, FillSegChunkRef(new_chunk_ref)));
seg_chunk_ref.offset = new_chunk_ref;
chunk_n_segs = 0;
}
}
void main() {
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
+ (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
TileGroupRef fill_start = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE + TILEGROUP_FILL_START);
uint fill_n = tilegroup[fill_start.offset >> 2];
FillTileHeaderRef tile_header_ref = FillTileHeaderRef(tile_ix * FillTileHeader_size);
if (fill_n > 0) {
ChunkRef chunk_ref = ChunkRef(fill_start.offset + 4);
Chunk chunk = Chunk_read(chunk_ref);
InstanceRef fill_ref = InstanceRef(chunk_ref.offset + Chunk_size);
FillItemHeaderRef item_header = FillItemHeaderRef(atomicAdd(alloc, fill_n * FillItemHeader_size));
FillTileHeader_write(tile_header_ref, FillTileHeader(fill_n, item_header));
FillSegChunkRef seg_chunk_ref = FillSegChunkRef(0);
uint seg_limit = 0;
// Iterate through items; fill_n holds count remaining.
while (true) {
if (chunk.chunk_n == 0) {
chunk_ref = chunk.next;
if (chunk_ref.offset == 0) {
break;
}
chunk = Chunk_read(chunk_ref);
fill_ref = InstanceRef(chunk_ref.offset + Chunk_size);
}
Instance ins = Instance_read(fill_ref);
PietFill fill = PietItem_Fill_read(PietItemRef(ins.item_ref));
// Process the fill polyline item.
uint max_n_segs = fill.n_points - 1;
uint chunk_n_segs = 0;
int backdrop = 0;
FillSegChunkRef seg_chunk_ref;
FillSegChunkRef first_seg_chunk = FillSegChunkRef(0);
vec2 start = Point_read(fill.points).xy;
for (uint j = 0; j < max_n_segs; j++) {
fill.points.offset += Point_size;
vec2 end = Point_read(fill.points).xy;
// Process one segment.
// TODO: I think this would go more smoothly (and be easier to
// make numerically robust) if it were based on clipping the line
// to the tile box. See:
// https://tavianator.com/fast-branchless-raybounding-box-intersections/
vec2 xymin = min(start, end);
vec2 xymax = max(start, end);
float a = end.y - start.y;
float b = start.x - end.x;
float c = -(a * start.x + b * start.y);
vec2 xy1 = xy0 + vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
float ytop = max(xy0.y, xymin.y);
float ybot = min(xy1.y, xymax.y);
float s00 = sign(b * ytop + a * xy0.x + c);
float s01 = sign(b * ytop + a * xy1.x + c);
float s10 = sign(b * ybot + a * xy0.x + c);
float s11 = sign(b * ybot + a * xy1.x + c);
float sTopLeft = sign(b * xy0.y + a * xy0.x + c);
if (sTopLeft == sign(a) && xymin.y <= xy0.y && xymax.y > xy0.y) {
backdrop -= int(s00);
}
// This is adapted from piet-metal but could be improved.
if (max(xymin.x, xy0.x) < min(xymax.x, xy1.x)
&& ytop < ybot
&& s00 * s01 + s00 * s10 + s00 * s11 < 3.0)
{
if (xymin.x < xy0.x) {
float yEdge = mix(start.y, end.y, (start.x - xy0.x) / b);
if (yEdge >= xy0.y && yEdge < xy1.y) {
// This is encoded the same as a general fill segment, but could be
// special-cased, either here or in rendering. (It was special-cased
// in piet-metal).
FillSegment edge_seg;
if (b > 0.0) {
end = vec2(xy0.x, yEdge);
edge_seg.start = end;
edge_seg.end = vec2(xy0.x, xy1.y);
} else {
start = vec2(xy0.x, yEdge);
edge_seg.start = vec2(xy0.x, xy1.y);
edge_seg.end = start;
}
alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit);
FillSegment_write(FillSegmentRef(seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs), edge_seg);
chunk_n_segs++;
}
}
alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit);
FillSegment seg = FillSegment(start, end);
FillSegment_write(FillSegmentRef(seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs), seg);
chunk_n_segs++;
}
start = end;
}
FillItemHeader_write(item_header, FillItemHeader(backdrop, first_seg_chunk));
if (chunk_n_segs != 0) {
FillSegChunk_write(seg_chunk_ref, FillSegChunk(chunk_n_segs, FillSegChunkRef(0)));
seg_chunk_ref.offset += FillSegChunk_size + FillSegment_size * chunk_n_segs;
}
fill_ref.offset += Instance_size;
chunk.chunk_n--;
item_header.offset += FillItemHeader_size;
}
} else {
// As an optimization, we could just write 0 for the size.
FillTileHeader_write(tile_header_ref, FillTileHeader(fill_n, FillItemHeaderRef(0)));
}
}

Binary file not shown.

View file

@ -21,17 +21,23 @@ layout(set = 0, binding = 2) buffer SegmentBuf {
uint[] segment; uint[] segment;
}; };
layout(set = 0, binding = 3) buffer PtclBuf { // Used readonly
layout(set = 0, binding = 3) buffer FillSegmentBuf {
uint[] fill_seg;
};
layout(set = 0, binding = 4) buffer PtclBuf {
uint[] ptcl; uint[] ptcl;
}; };
layout(set = 0, binding = 4) buffer AllocBuf { layout(set = 0, binding = 5) buffer AllocBuf {
uint alloc; uint alloc;
}; };
#include "scene.h" #include "scene.h"
#include "tilegroup.h" #include "tilegroup.h"
#include "segment.h" #include "segment.h"
#include "fill_seg.h"
#include "ptcl.h" #include "ptcl.h"
#include "setup.h" #include "setup.h"
@ -56,6 +62,7 @@ void main() {
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size; uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
TileHeader stroke_th = TileHeader_read(TileHeaderRef(tile_ix * TileHeader_size)); TileHeader stroke_th = TileHeader_read(TileHeaderRef(tile_ix * TileHeader_size));
FillTileHeader fill_th = FillTileHeader_read(FillTileHeaderRef(tile_ix * FillTileHeader_size));
while (true) { while (true) {
uint tg_tag = TileGroup_tag(tg_ref); uint tg_tag = TileGroup_tag(tg_ref);
@ -99,6 +106,22 @@ void main() {
cmd_ref.offset += Cmd_size; cmd_ref.offset += Cmd_size;
} }
break; break;
case PietItem_Fill:
FillItemHeader fill_item = FillItemHeader_read(fill_th.items);
fill_th.items.offset += FillItemHeader_size;
// TODO: handle segments == 0 but backdrop != specially, it's a solid tile.
if (fill_item.segments.offset != 0 || fill_item.backdrop != 0) {
PietFill fill = PietItem_Fill_read(item_ref);
CmdFill cmd = CmdFill(
fill_item.segments.offset,
fill_item.backdrop,
fill.rgba_color
);
alloc_cmd(cmd_ref, cmd_limit);
Cmd_Fill_write(cmd_ref, cmd);
cmd_ref.offset += Cmd_size;
}
break;
} }
tg_ref.offset += TileGroup_size; tg_ref.offset += TileGroup_size;
} }

Binary file not shown.

View file

@ -19,12 +19,18 @@ layout(set = 0, binding = 1) buffer SegmentBuf {
uint[] segment; uint[] segment;
}; };
layout(set = 0, binding = 2) buffer ImageBuf { // Used readonly
layout(set = 0, binding = 2) buffer FillSegBuf {
uint[] fill_seg;
};
layout(set = 0, binding = 3) buffer ImageBuf {
uint[] image; uint[] image;
}; };
#include "ptcl.h" #include "ptcl.h"
#include "segment.h" #include "segment.h"
#include "fill_seg.h"
#include "setup.h" #include "setup.h"
@ -70,6 +76,36 @@ void main() {
alpha = clamp(stroke.half_width + 0.5 - df, 0.0, 1.0); alpha = clamp(stroke.half_width + 0.5 - df, 0.0, 1.0);
rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a); rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a);
break; break;
case Cmd_Fill:
CmdFill fill = Cmd_Fill_read(cmd_ref);
// Probably better to store as float, but conversion is no doubt cheap.
float area = float(fill.backdrop);
FillSegChunkRef fill_seg_chunk_ref = FillSegChunkRef(fill.seg_ref);
do {
FillSegChunk seg_chunk = FillSegChunk_read(fill_seg_chunk_ref);
for (int i = 0; i < seg_chunk.n; i++) {
FillSegment seg = FillSegment_read(FillSegmentRef(fill_seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * i));
vec2 start = seg.start - xy;
vec2 end = seg.end - xy;
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
if (window.x != window.y) {
vec2 t = (window - start.y) / (end.y - start.y);
vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));
float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6;
float xmax = max(xs.x, xs.y);
float b = min(xmax, 1.0);
float c = max(b, 0.0);
float d = max(xmin, 0.0);
float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
area += a * (window.x - window.y);
}
}
fill_seg_chunk_ref = seg_chunk.next;
} while (fill_seg_chunk_ref.offset != 0);
fg_rgba = unpackUnorm4x8(fill.rgba_color).wzyx;
alpha = min(abs(area), 1.0);
rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a);
break;
case Cmd_Jump: case Cmd_Jump:
cmd_ref = CmdRef(Cmd_Jump_read(cmd_ref).new_ref); cmd_ref = CmdRef(Cmd_Jump_read(cmd_ref).new_ref);
continue; continue;

Binary file not shown.

View file

@ -72,11 +72,12 @@ CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
} }
struct CmdFill { struct CmdFill {
vec2 start; uint seg_ref;
vec2 end; int backdrop;
uint rgba_color;
}; };
#define CmdFill_size 16 #define CmdFill_size 12
CmdFillRef CmdFill_index(CmdFillRef ref, uint index) { CmdFillRef CmdFill_index(CmdFillRef ref, uint index) {
return CmdFillRef(ref.offset + index * CmdFill_size); return CmdFillRef(ref.offset + index * CmdFill_size);
@ -205,19 +206,18 @@ CmdFill CmdFill_read(CmdFillRef ref) {
uint raw0 = ptcl[ix + 0]; uint raw0 = ptcl[ix + 0];
uint raw1 = ptcl[ix + 1]; uint raw1 = ptcl[ix + 1];
uint raw2 = ptcl[ix + 2]; uint raw2 = ptcl[ix + 2];
uint raw3 = ptcl[ix + 3];
CmdFill s; CmdFill s;
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); s.seg_ref = raw0;
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); s.backdrop = int(raw1);
s.rgba_color = raw2;
return s; return s;
} }
void CmdFill_write(CmdFillRef ref, CmdFill s) { void CmdFill_write(CmdFillRef ref, CmdFill s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
ptcl[ix + 0] = floatBitsToUint(s.start.x); ptcl[ix + 0] = s.seg_ref;
ptcl[ix + 1] = floatBitsToUint(s.start.y); ptcl[ix + 1] = uint(s.backdrop);
ptcl[ix + 2] = floatBitsToUint(s.end.x); ptcl[ix + 2] = s.rgba_color;
ptcl[ix + 3] = floatBitsToUint(s.end.y);
} }
CmdFillEdge CmdFillEdge_read(CmdFillEdgeRef ref) { CmdFillEdge CmdFillEdge_read(CmdFillEdgeRef ref) {

View file

@ -19,10 +19,14 @@
// there is a region of size TILEGROUP_STRIDE for each tilegroup. // there is a region of size TILEGROUP_STRIDE for each tilegroup.
// At offset 0 are the main instances, encoded with Jump. At offset // At offset 0 are the main instances, encoded with Jump. At offset
// TILEGROUP_STROKE_START are the stroke instances, encoded with // TILEGROUP_STROKE_START are the stroke instances, encoded with
// Head and Link. // Head and Link. Similarly for fill.
#define TILEGROUP_STRIDE 2048 #define TILEGROUP_STRIDE 2048
#define TILEGROUP_STROKE_START 1024 #define TILEGROUP_STROKE_START 1024
#define TILEGROUP_FILL_START 1536
#define TILEGROUP_STROKE_ALLOC 1024 #define TILEGROUP_STROKE_ALLOC 1024
#define TILEGROUP_FILL_ALLOC 1024
#define TILEGROUP_INITIAL_STROKE_ALLOC 512
#define TILEGROUP_INITIAL_FILL_ALLOC 512
// TODO: compute all these // TODO: compute all these

View file

@ -10,9 +10,11 @@ use piet::{Color, RenderContext};
use piet_gpu_hal::vulkan::VkInstance; use piet_gpu_hal::vulkan::VkInstance;
use piet_gpu_hal::{CmdBuf, Device, MemFlags}; use piet_gpu_hal::{CmdBuf, Device, MemFlags};
mod pico_svg;
mod render_ctx; mod render_ctx;
use render_ctx::PietGpuRenderContext; use render_ctx::PietGpuRenderContext;
use pico_svg::PicoSvg;
const WIDTH: usize = 2048; const WIDTH: usize = 2048;
const HEIGHT: usize = 1536; const HEIGHT: usize = 1536;
@ -44,14 +46,22 @@ fn render_scene(rc: &mut impl RenderContext) {
let circle = Circle::new(center, radius); let circle = Circle::new(center, radius);
rc.fill(circle, &color); rc.fill(circle, &color);
} }
let mut path = BezPath::new();
path.move_to((100.0, 1150.0));
path.line_to((200.0, 1200.0));
path.line_to((150.0, 1250.0));
path.close_path();
rc.fill(path, &Color::rgb8(128, 0, 128));
rc.stroke( rc.stroke(
Line::new((100.0, 100.0), (200.0, 150.0)), Line::new((100.0, 100.0), (200.0, 150.0)),
&Color::WHITE, &Color::WHITE,
5.0, 5.0,
); );
render_cardioid(rc); //render_cardioid(rc);
render_tiger(rc);
} }
#[allow(unused)]
fn render_cardioid(rc: &mut impl RenderContext) { fn render_cardioid(rc: &mut impl RenderContext) {
let n = 91; let n = 91;
let dth = std::f64::consts::PI * 2.0 / (n as f64); let dth = std::f64::consts::PI * 2.0 / (n as f64);
@ -69,6 +79,17 @@ fn render_cardioid(rc: &mut impl RenderContext) {
rc.stroke(&path, &Color::BLACK, 2.0); rc.stroke(&path, &Color::BLACK, 2.0);
} }
fn render_tiger(rc: &mut impl RenderContext) {
let xml_str = std::str::from_utf8(include_bytes!("../Ghostscript_Tiger.svg")).unwrap();
let start = std::time::Instant::now();
let svg = PicoSvg::load(xml_str, 8.0).unwrap();
println!("parsing time: {:?}", start.elapsed());
let start = std::time::Instant::now();
svg.render(rc);
println!("flattening and encoding time: {:?}", start.elapsed());
}
#[allow(unused)] #[allow(unused)]
fn dump_scene(buf: &[u8]) { fn dump_scene(buf: &[u8]) {
for i in 0..(buf.len() / 4) { for i in 0..(buf.len() / 4) {
@ -107,6 +128,7 @@ fn main() {
let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev).unwrap(); let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev).unwrap();
let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev).unwrap(); let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev).unwrap();
let segment_buf = device.create_buffer(64 * 1024 * 1024, dev).unwrap(); let segment_buf = device.create_buffer(64 * 1024 * 1024, dev).unwrap();
let fill_seg_buf = device.create_buffer(64 * 1024 * 1024, dev).unwrap();
let image_buf = device let image_buf = device
.create_buffer((WIDTH * HEIGHT * 4) as u64, host) .create_buffer((WIDTH * HEIGHT * 4) as u64, host)
.unwrap(); .unwrap();
@ -144,6 +166,26 @@ fn main() {
) )
.unwrap(); .unwrap();
let k2f_alloc_buf_host = device.create_buffer(4, host).unwrap();
let k2f_alloc_buf_dev = device.create_buffer(4, dev).unwrap();
let k2f_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * K2_PER_TILE_SIZE;
device
.write_buffer(&k2f_alloc_buf_host, &[k2f_alloc_start as u32])
.unwrap();
let k2f_code = include_bytes!("../shader/kernel2f.spv");
let k2f_pipeline = device.create_simple_compute_pipeline(k2f_code, 4).unwrap();
let k2f_ds = device
.create_descriptor_set(
&k2f_pipeline,
&[
&scene_dev,
&tilegroup_buf,
&fill_seg_buf,
&k2f_alloc_buf_dev,
],
)
.unwrap();
let k3_alloc_buf_host = device.create_buffer(4, host).unwrap(); let k3_alloc_buf_host = device.create_buffer(4, host).unwrap();
let k3_alloc_buf_dev = device.create_buffer(4, dev).unwrap(); let k3_alloc_buf_dev = device.create_buffer(4, dev).unwrap();
let k3_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC; let k3_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
@ -151,7 +193,7 @@ fn main() {
.write_buffer(&k3_alloc_buf_host, &[k3_alloc_start as u32]) .write_buffer(&k3_alloc_buf_host, &[k3_alloc_start as u32])
.unwrap(); .unwrap();
let k3_code = include_bytes!("../shader/kernel3.spv"); let k3_code = include_bytes!("../shader/kernel3.spv");
let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 5).unwrap(); let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 6).unwrap();
let k3_ds = device let k3_ds = device
.create_descriptor_set( .create_descriptor_set(
&k3_pipeline, &k3_pipeline,
@ -159,6 +201,7 @@ fn main() {
&scene_dev, &scene_dev,
&tilegroup_buf, &tilegroup_buf,
&segment_buf, &segment_buf,
&fill_seg_buf,
&ptcl_buf, &ptcl_buf,
&k3_alloc_buf_dev, &k3_alloc_buf_dev,
], ],
@ -166,18 +209,26 @@ fn main() {
.unwrap(); .unwrap();
let k4_code = include_bytes!("../shader/kernel4.spv"); let k4_code = include_bytes!("../shader/kernel4.spv");
let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 3).unwrap(); let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 4).unwrap();
let k4_ds = device let k4_ds = device
.create_descriptor_set(&k4_pipeline, &[&ptcl_buf, &segment_buf, &image_dev]) .create_descriptor_set(
&k4_pipeline,
&[&ptcl_buf, &segment_buf, &fill_seg_buf, &image_dev],
)
.unwrap(); .unwrap();
let query_pool = device.create_query_pool(5).unwrap(); let query_pool = device.create_query_pool(6).unwrap();
let mut cmd_buf = device.create_cmd_buf().unwrap(); let mut cmd_buf = device.create_cmd_buf().unwrap();
cmd_buf.begin(); cmd_buf.begin();
cmd_buf.copy_buffer(&scene_buf, &scene_dev); cmd_buf.copy_buffer(&scene_buf, &scene_dev);
// Note: we could use one alloc buf and reuse it. But we'll stick with
// multiple ones for clarity.
cmd_buf.copy_buffer(&k1_alloc_buf_host, &k1_alloc_buf_dev); cmd_buf.copy_buffer(&k1_alloc_buf_host, &k1_alloc_buf_dev);
cmd_buf.copy_buffer(&k2s_alloc_buf_host, &k2s_alloc_buf_dev); cmd_buf.copy_buffer(&k2s_alloc_buf_host, &k2s_alloc_buf_dev);
cmd_buf.copy_buffer(&k2f_alloc_buf_host, &k2f_alloc_buf_dev);
cmd_buf.copy_buffer(&k3_alloc_buf_host, &k3_alloc_buf_dev); cmd_buf.copy_buffer(&k3_alloc_buf_host, &k3_alloc_buf_dev);
// Note: these clears aren't necessary, and are here to make inspection
// of the buffers cleaner. Can likely be removed.
cmd_buf.clear_buffer(&tilegroup_buf); cmd_buf.clear_buffer(&tilegroup_buf);
cmd_buf.clear_buffer(&ptcl_buf); cmd_buf.clear_buffer(&ptcl_buf);
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
@ -196,20 +247,30 @@ fn main() {
((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1), ((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1),
); );
cmd_buf.write_timestamp(&query_pool, 2); cmd_buf.write_timestamp(&query_pool, 2);
// Note: this barrier is not necessary (k2f does not depend on
// k2s output), but I'm keeping it here to increase transparency
// of performance.
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&k2f_pipeline,
&k2f_ds,
((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 2),
);
cmd_buf.write_timestamp(&query_pool, 3);
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
cmd_buf.dispatch( cmd_buf.dispatch(
&k3_pipeline, &k3_pipeline,
&k3_ds, &k3_ds,
((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1), ((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 3),
); );
cmd_buf.write_timestamp(&query_pool, 3); cmd_buf.write_timestamp(&query_pool, 4);
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
cmd_buf.dispatch( cmd_buf.dispatch(
&k4_pipeline, &k4_pipeline,
&k4_ds, &k4_ds,
((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1), ((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
); );
cmd_buf.write_timestamp(&query_pool, 4); cmd_buf.write_timestamp(&query_pool, 5);
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
cmd_buf.copy_buffer(&image_dev, &image_buf); cmd_buf.copy_buffer(&image_dev, &image_buf);
cmd_buf.finish(); cmd_buf.finish();
@ -217,17 +278,21 @@ fn main() {
let timestamps = device.reap_query_pool(query_pool).unwrap(); let timestamps = device.reap_query_pool(query_pool).unwrap();
println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3); println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3);
println!( println!(
"Kernel 2 time: {:.3}ms", "Kernel 2s time: {:.3}ms",
(timestamps[1] - timestamps[0]) * 1e3 (timestamps[1] - timestamps[0]) * 1e3
); );
println!( println!(
"Kernel 3 time: {:.3}ms", "Kernel 2f time: {:.3}ms",
(timestamps[2] - timestamps[1]) * 1e3 (timestamps[2] - timestamps[1]) * 1e3
); );
println!( println!(
"Render time: {:.3}ms", "Kernel 3 time: {:.3}ms",
(timestamps[3] - timestamps[2]) * 1e3 (timestamps[3] - timestamps[2]) * 1e3
); );
println!(
"Render time: {:.3}ms",
(timestamps[4] - timestamps[3]) * 1e3
);
/* /*
let mut k1_data: Vec<u32> = Default::default(); let mut k1_data: Vec<u32> = Default::default();

80
piet-gpu/src/pico_svg.rs Normal file
View file

@ -0,0 +1,80 @@
//! A loader for a tiny fragment of SVG
use std::str::FromStr;
use roxmltree::Document;
use kurbo::BezPath;
use piet::{Color, RenderContext};
pub struct PicoSvg {
items: Vec<Item>,
}
pub enum Item {
Fill(FillItem),
Stroke(StrokeItem),
}
pub struct StrokeItem {
width: f64,
color: Color,
path: BezPath,
}
pub struct FillItem {
color: Color,
path: BezPath,
}
impl PicoSvg {
pub fn load(xml_string: &str, scale: f64) -> Result<PicoSvg, Box<dyn std::error::Error>> {
let doc = Document::parse(xml_string)?;
let root = doc.root_element();
let g = root.first_element_child().ok_or("no root element")?;
let mut items = Vec::new();
for el in g.children() {
if el.is_element() {
let d = el.attribute("d").ok_or("missing 'd' attribute")?;
let bp = BezPath::from_svg(d)?;
let path = kurbo::Affine::scale(scale) * bp;
if let Some(fill_color) = el.attribute("fill") {
let color = parse_color(fill_color);
items.push(Item::Fill(FillItem { color, path: path.clone() }));
}
if let Some(stroke_color) = el.attribute("stroke") {
let width = f64::from_str(el.attribute("stroke-width").ok_or("missing width")?)?;
let color = parse_color(stroke_color);
items.push(Item::Stroke(StrokeItem { width, color, path }));
}
}
}
Ok(PicoSvg { items })
}
pub fn render(&self, rc: &mut impl RenderContext) {
for item in &self.items {
match item {
Item::Fill(fill_item) => {
rc.fill(&fill_item.path, &fill_item.color);
}
Item::Stroke(stroke_item) => {
rc.stroke(&stroke_item.path, &stroke_item.color, stroke_item.width);
}
}
}
}
}
fn parse_color(color: &str) -> Color {
if color.as_bytes()[0] == b'#' {
let mut hex = u32::from_str_radix(&color[1..], 16).unwrap();
if color.len() == 4 {
hex = (hex >> 8) * 0x110000 + ((hex >> 4) & 0xf) * 0x1100 + (hex & 0xf) * 0x11;
}
Color::from_rgba32_u32((hex << 8) + 0xff)
} else {
Color::from_rgba32_u32(0xff00ff80)
}
}

View file

@ -2,7 +2,7 @@ use std::borrow::Cow;
use piet_gpu_types::encoder::{Encode, Encoder, Ref}; use piet_gpu_types::encoder::{Encode, Encoder, Ref};
use piet_gpu_types::scene; use piet_gpu_types::scene;
use piet_gpu_types::scene::{Bbox, PietCircle, PietItem, PietStrokePolyLine, SimpleGroup}; use piet_gpu_types::scene::{Bbox, PietCircle, PietFill, PietItem, PietStrokePolyLine, SimpleGroup};
use piet::kurbo::{Affine, PathEl, Point, Rect, Shape}; use piet::kurbo::{Affine, PathEl, Point, Rect, Shape};
@ -119,6 +119,7 @@ impl RenderContext for PietGpuRenderContext {
n_points, n_points,
points, points,
}; };
let bbox = bbox.inset(-0.5 * width);
self.push_item(PietItem::Poly(poly_line), bbox); self.push_item(PietItem::Poly(poly_line), bbox);
} }
_ => (), _ => (),
@ -135,10 +136,11 @@ impl RenderContext for PietGpuRenderContext {
} }
fn fill(&mut self, shape: impl Shape, brush: &impl IntoBrush<Self>) { fn fill(&mut self, shape: impl Shape, brush: &impl IntoBrush<Self>) {
let bbox = shape.bounding_box();
let brush = brush.make_brush(self, || shape.bounding_box()).into_owned(); let brush = brush.make_brush(self, || shape.bounding_box()).into_owned();
match shape.as_circle() { if let Some(circle) = shape.as_circle() {
Some(circle) => match brush { match brush {
PietGpuBrush::Solid(rgba_color) => { PietGpuBrush::Solid(rgba_color) => {
let piet_circle = PietCircle { let piet_circle = PietCircle {
rgba_color, rgba_color,
@ -149,8 +151,22 @@ impl RenderContext for PietGpuRenderContext {
self.push_item(PietItem::Circle(piet_circle), bbox); self.push_item(PietItem::Circle(piet_circle), bbox);
} }
_ => {} _ => {}
}, }
None => {} return;
}
let path = shape.to_bez_path(TOLERANCE);
let (n_points, points) = flatten_shape(&mut self.encoder, path);
match brush {
PietGpuBrush::Solid(rgba_color) => {
let fill = PietFill {
flags: 0,
rgba_color,
n_points,
points,
};
self.push_item(PietItem::Fill(fill), bbox);
}
_ => (),
} }
} }