Make fills work

The backdrop propagation is slow but it does work.
This commit is contained in:
Raph Levien 2020-06-05 15:07:02 -07:00
parent f9f5961428
commit af0a1af8e1
16 changed files with 174 additions and 90 deletions

View file

@ -20,7 +20,8 @@ piet_gpu! {
rgba_color: u32,
}
struct CmdFill {
seg_ref: Ref<SegChunk>,
// As above, really Ref<Tile>
tile_ref: u32,
backdrop: i32,
rgba_color: u32,
}

View file

@ -15,6 +15,7 @@ piet_gpu! {
struct TileSeg {
start: [f32; 2],
end: [f32; 2],
y_edge: f32,
next: Ref<TileSeg>,
}
}

View file

@ -171,7 +171,7 @@ fn main() -> Result<(), Error> {
let fence = device.create_fence(false)?;
let mut cmd_buf = device.create_cmd_buf()?;
let query_pool = device.create_query_pool(7)?;
let query_pool = device.create_query_pool(8)?;
let mut ctx = PietGpuRenderContext::new();
if let Some(input) = matches.value_of("INPUT") {
@ -204,9 +204,10 @@ fn main() -> Result<(), Error> {
println!("Element kernel time: {:.3}ms", ts[0] * 1e3);
println!("Tile allocation kernel time: {:.3}ms", (ts[1] - ts[0]) * 1e3);
println!("Coarse path kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3);
println!("Binning kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
println!("Coarse raster kernel time: {:.3}ms", (ts[4] - ts[3]) * 1e3);
println!("Render kernel time: {:.3}ms", (ts[5] - ts[4]) * 1e3);
println!("Backdrop kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
println!("Binning kernel time: {:.3}ms", (ts[4] - ts[3]) * 1e3);
println!("Coarse raster kernel time: {:.3}ms", (ts[5] - ts[4]) * 1e3);
println!("Render kernel time: {:.3}ms", (ts[6] - ts[5]) * 1e3);
/*
let mut data: Vec<u32> = Default::default();

View file

@ -0,0 +1,56 @@
// Propagation of tile backdrop for filling.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "setup.h"
#define BACKDROP_WG 256
layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
layout(set = 0, binding = 0) buffer AnnotatedBuf {
uint[] annotated;
};
// This is really only used for n_elements; maybe we can handle that
// a different way, but it's convenient to have the same signature as
// tile allocation.
layout(set = 0, binding = 1) buffer AllocBuf {
uint n_elements;
uint n_pathseg;
uint alloc;
};
layout(set = 0, binding = 2) buffer TileBuf {
uint[] tile;
};
#include "annotated.h"
#include "tile.h"
void main() {
uint element_ix = gl_GlobalInvocationID.x;
AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size);
uint tag = Annotated_Nop;
if (element_ix < n_elements) {
tag = Annotated_tag(ref);
}
if (tag == Annotated_Fill) {
PathRef path_ref = PathRef(element_ix * Path_size);
Path path = Path_read(path_ref);
uint width = path.bbox.z - path.bbox.x;
uint height = path.bbox.w - path.bbox.y;
// slightly handrolling the tile structure here...
uint tile_el_ix = (path.tiles.offset >> 2) + 1;
for (uint y = 0; y < height; y++) {
uint sum = 0;
for (uint x = 0; x < width; x++) {
sum += tile[tile_el_ix];
tile[tile_el_ix] = sum;
tile_el_ix += 2;
}
}
}
}

Binary file not shown.

View file

@ -18,6 +18,8 @@ build tile_alloc.spv: glsl tile_alloc.comp | annotated.h tile.h setup.h
build path_coarse.spv: glsl path_coarse.comp | annotated.h tile.h setup.h
build backdrop.spv: glsl backdrop.comp | annotated.h tile.h setup.h
build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h

View file

@ -226,7 +226,7 @@ void main() {
uint x = sh_tile_x0[el_ix] + seq_ix % width;
uint y = sh_tile_y0[el_ix] + seq_ix / width;
Tile tile = Tile_read(TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
if (tile.tile.offset != 0) {
if (tile.tile.offset != 0 || tile.backdrop != 0) {
uint el_slice = el_ix / 32;
uint el_mask = 1 << (el_ix & 31);
atomicOr(sh_bitmaps[el_slice][y * N_TILE_X + x], el_mask);
@ -357,52 +357,26 @@ void main() {
tag = Annotated_tag(ref);
switch (tag) {
/*
case Annotated_Fill:
if (last_chunk_n > 0 || seg_count > 0) {
SegChunkRef chunk_ref = SegChunkRef(0);
if (seg_count > 0) {
chunk_ref = alloc_seg_chunk();
SegChunk chunk;
chunk.n = seg_count;
chunk.next = SegChunkRef(0);
uint seg_offset = seg_alloc + seg_start * Segment_size;
chunk.segs = SegmentRef(seg_offset);
SegChunk_write(chunk_ref, chunk);
}
if (last_chunk_n > 0) {
SegChunk chunk;
chunk.n = last_chunk_n;
chunk.next = chunk_ref;
chunk.segs = last_chunk_segs;
SegChunk_write(last_chunk_ref, chunk);
} else {
first_seg_chunk = chunk_ref;
}
AnnoFill fill = Annotated_Fill_read(ref);
CmdFill cmd_fill;
cmd_fill.seg_ref = first_seg_chunk;
cmd_fill.backdrop = backdrop;
cmd_fill.rgba_color = fill.rgba_color;
alloc_cmd(cmd_ref, cmd_limit);
Cmd_Fill_write(cmd_ref, cmd_fill);
cmd_ref.offset += Cmd_size;
last_chunk_n = 0;
} else if (backdrop != 0) {
AnnoFill fill = Annotated_Fill_read(ref);
alloc_cmd(cmd_ref, cmd_limit);
Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
cmd_ref.offset += Cmd_size;
}
seg_start += seg_count;
seg_count = 0;
backdrop = 0;
break;
*/
case Annotated_Stroke:
Tile tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoFill fill = Annotated_Fill_read(ref);
alloc_cmd(cmd_ref, cmd_limit);
if (tile.tile.offset != 0) {
CmdFill cmd_fill;
cmd_fill.tile_ref = tile.tile.offset;
cmd_fill.backdrop = tile.backdrop;
cmd_fill.rgba_color = fill.rgba_color;
Cmd_Fill_write(cmd_ref, cmd_fill);
} else {
AnnoFill fill = Annotated_Fill_read(ref);
Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
}
cmd_ref.offset += Cmd_size;
break;
case Annotated_Stroke:
tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoStroke stroke = Annotated_Stroke_read(ref);
CmdStroke cmd_stroke;
cmd_stroke.tile_ref = tile.tile.offset;

Binary file not shown.

View file

@ -80,46 +80,40 @@ void main() {
rgb[k] = mix(rgb[k], fg_rgba.rgb, alpha * fg_rgba.a);
}
break;
/*
case Cmd_Fill:
CmdFill fill = Cmd_Fill_read(cmd_ref);
// Probably better to store as float, but conversion is no doubt cheap.
float area[CHUNK];
for (uint k = 0; k < CHUNK; k++) area[k] = float(fill.backdrop);
SegChunkRef fill_seg_chunk_ref = fill.seg_ref;
tile_seg_ref = TileSegRef(fill.tile_ref);
do {
SegChunk seg_chunk = SegChunk_read(fill_seg_chunk_ref);
SegmentRef segs = seg_chunk.segs;
for (int i = 0; i < seg_chunk.n; i++) {
Segment seg = Segment_read(Segment_index(segs, i));
for (uint k = 0; k < CHUNK; k++) {
vec2 my_xy = vec2(xy.x, xy.y + float(k * CHUNK_DY));
vec2 start = seg.start - my_xy;
vec2 end = seg.end - my_xy;
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
if (window.x != window.y) {
vec2 t = (window - start.y) / (end.y - start.y);
vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));
float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6;
float xmax = max(xs.x, xs.y);
float b = min(xmax, 1.0);
float c = max(b, 0.0);
float d = max(xmin, 0.0);
float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
area[k] += a * (window.x - window.y);
}
area[k] += sign(end.x - start.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0);
TileSeg seg = TileSeg_read(tile_seg_ref);
for (uint k = 0; k < CHUNK; k++) {
vec2 my_xy = vec2(xy.x, xy.y + float(k * CHUNK_DY));
vec2 start = seg.start - my_xy;
vec2 end = seg.end - my_xy;
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
if (window.x != window.y) {
vec2 t = (window - start.y) / (end.y - start.y);
vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));
float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6;
float xmax = max(xs.x, xs.y);
float b = min(xmax, 1.0);
float c = max(b, 0.0);
float d = max(xmin, 0.0);
float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
area[k] += a * (window.x - window.y);
}
area[k] += sign(end.x - start.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0);
}
fill_seg_chunk_ref = seg_chunk.next;
} while (fill_seg_chunk_ref.offset != 0);
tile_seg_ref = seg.next;
} while (tile_seg_ref.offset != 0);
fg_rgba = unpackUnorm4x8(fill.rgba_color).wzyx;
for (uint k = 0; k < CHUNK; k++) {
float alpha = min(abs(area[k]), 1.0);
rgb[k] = mix(rgb[k], fg_rgba.rgb, alpha * fg_rgba.a);
}
break;
*/
case Cmd_Solid:
CmdSolid solid = Cmd_Solid_read(cmd_ref);
fg_rgba = unpackUnorm4x8(solid.rgba_color).wzyx;

Binary file not shown.

View file

@ -36,9 +36,11 @@ layout(set = 0, binding = 2) buffer TileBuf {
shared uint sh_tile_count[COARSE_WG];
shared uint sh_width[COARSE_WG];
shared uint sh_draw_width[COARSE_WG];
shared uint sh_tag[COARSE_WG];
shared vec2 sh_p0[COARSE_WG];
shared vec2 sh_p1[COARSE_WG];
shared int sh_x0[COARSE_WG];
shared int sh_bbox_x1[COARSE_WG];
shared int sh_y0[COARSE_WG];
shared float sh_a[COARSE_WG];
shared float sh_b[COARSE_WG];
@ -56,6 +58,7 @@ void main() {
if (element_ix < n_pathseg) {
tag = PathSeg_tag(ref);
}
sh_tag[th_ix] = tag;
// Setup for coverage algorithm.
float a, b, c;
// Bounding box of element in pixel coordinates.
@ -96,6 +99,7 @@ void main() {
x1 = clamp(x1, bbox.x, bbox.z);
y1 = clamp(y1, bbox.y, bbox.w);
sh_x0[th_ix] = x0;
sh_bbox_x1[th_ix] = bbox.z;
// TODO: can get rid of this (fold into base), with care (also need to update `a`)
sh_y0[th_ix] = y0;
int stride = bbox.z - bbox.x;
@ -138,7 +142,8 @@ void main() {
int x1 = x0 + int(sh_width[el_ix]);
int dx = int(seq_ix % draw_width);
uint y = sh_y0[el_ix] + seq_ix / draw_width;
float t = sh_a[el_ix] + sh_b[el_ix] * float(y);
float b = sh_b[el_ix];
float t = sh_a[el_ix] + b * float(y);
float c = sh_c[el_ix];
int xx0 = clamp(int(floor(t - c)), x0, x1);
int xx1 = clamp(int(ceil(t + c)), x0, x1);
@ -148,8 +153,34 @@ void main() {
uint tile_el = (sh_base[el_ix] + uint(y * sh_stride[el_ix] + x) * Tile_size) >> 2;
uint old = atomicExchange(tile[tile_el], tile_offset);
TileSeg tile_seg;
tile_seg.start = sh_p0[el_ix];
tile_seg.end = sh_p1[el_ix];
vec2 p0 = sh_p0[el_ix];
vec2 p1 = sh_p1[el_ix];
float y_edge = 0.0;
if (sh_tag[el_ix] == PathSeg_FillLine) {
vec2 tile_xy = vec2(x * TILE_WIDTH_PX, y * TILE_HEIGHT_PX);
if (dx == 0 && min(p0.y, p1.y) <= tile_xy.y) {
// TODO: need a little more work to make sure this triggers even
// when line is to the left of bbox.
int xray = max(int(ceil(t - 0.5 * b)), x0);
if (xray < sh_bbox_x1[el_ix]) {
int backdrop = p1.y < p0.y ? 1 : -1;
atomicAdd(tile[tile_el + 1 + 2 * (xray - x)], backdrop);
}
}
y_edge = mix(p0.y, p1.y, (tile_xy.x - p0.x) / (p1.x - p0.x));
if (min(p0.x, p1.x) < tile_xy.x && y_edge >= tile_xy.y && y_edge < tile_xy.y + TILE_HEIGHT_PX) {
if (p0.x > p1.x) {
p1 = vec2(tile_xy.x, y_edge);
} else {
p0 = vec2(tile_xy.x, y_edge);
}
} else {
y_edge = 1e9;
}
}
tile_seg.start = p0;
tile_seg.end = p1;
tile_seg.y_edge = y_edge;
tile_seg.next.offset = old;
TileSeg_write(TileSegRef(tile_offset), tile_seg);
}

Binary file not shown.

View file

@ -80,7 +80,7 @@ CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
}
struct CmdFill {
SegChunkRef seg_ref;
uint tile_ref;
int backdrop;
uint rgba_color;
};
@ -239,7 +239,7 @@ CmdFill CmdFill_read(CmdFillRef ref) {
uint raw1 = ptcl[ix + 1];
uint raw2 = ptcl[ix + 2];
CmdFill s;
s.seg_ref = SegChunkRef(raw0);
s.tile_ref = raw0;
s.backdrop = int(raw1);
s.rgba_color = raw2;
return s;
@ -247,7 +247,7 @@ CmdFill CmdFill_read(CmdFillRef ref) {
void CmdFill_write(CmdFillRef ref, CmdFill s) {
uint ix = ref.offset >> 2;
ptcl[ix + 0] = s.seg_ref.offset;
ptcl[ix + 0] = s.tile_ref;
ptcl[ix + 1] = uint(s.backdrop);
ptcl[ix + 2] = s.rgba_color;
}

View file

@ -37,10 +37,11 @@ TileRef Tile_index(TileRef ref, uint index) {
struct TileSeg {
vec2 start;
vec2 end;
float y_edge;
TileSegRef next;
};
#define TileSeg_size 20
#define TileSeg_size 24
TileSegRef TileSeg_index(TileSegRef ref, uint index) {
return TileSegRef(ref.offset + index * TileSeg_size);
@ -87,10 +88,12 @@ TileSeg TileSeg_read(TileSegRef ref) {
uint raw2 = tile[ix + 2];
uint raw3 = tile[ix + 3];
uint raw4 = tile[ix + 4];
uint raw5 = tile[ix + 5];
TileSeg s;
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.next = TileSegRef(raw4);
s.y_edge = uintBitsToFloat(raw4);
s.next = TileSegRef(raw5);
return s;
}
@ -100,6 +103,7 @@ void TileSeg_write(TileSegRef ref, TileSeg s) {
tile[ix + 1] = floatBitsToUint(s.start.y);
tile[ix + 2] = floatBitsToUint(s.end.x);
tile[ix + 3] = floatBitsToUint(s.end.y);
tile[ix + 4] = s.next.offset;
tile[ix + 4] = floatBitsToUint(s.y_edge);
tile[ix + 5] = s.next.offset;
}

View file

@ -57,8 +57,8 @@ pub fn render_scene(rc: &mut impl RenderContext) {
let circle = Circle::new(center, radius);
rc.fill(circle, &color);
}
/*
let mut path = BezPath::new();
/*
path.move_to((100.0, 1150.0));
path.line_to((200.0, 1200.0));
path.line_to((150.0, 1250.0));
@ -143,6 +143,9 @@ pub struct Renderer<D: Device> {
path_pipeline: D::Pipeline,
path_ds: D::DescriptorSet,
backdrop_pipeline: D::Pipeline,
backdrop_ds: D::DescriptorSet,
tile_alloc_buf_host: D::Buffer,
tile_alloc_buf_dev: D::Buffer,
@ -224,6 +227,14 @@ impl<D: Device> Renderer<D> {
&[],
)?;
let backdrop_alloc_code = include_bytes!("../shader/backdrop.spv");
let backdrop_pipeline = device.create_simple_compute_pipeline(backdrop_alloc_code, 3, 0)?;
let backdrop_ds = device.create_descriptor_set(
&backdrop_pipeline,
&[&anno_buf, &tile_alloc_buf_dev, &tile_buf],
&[],
)?;
let bin_alloc_buf_host = device.create_buffer(12, host)?;
let bin_alloc_buf_dev = device.create_buffer(12, dev)?;
@ -275,6 +286,8 @@ impl<D: Device> Renderer<D> {
tile_ds,
path_pipeline,
path_ds,
backdrop_pipeline,
backdrop_ds,
bin_pipeline,
bin_ds,
coarse_pipeline,
@ -333,6 +346,13 @@ impl<D: Device> Renderer<D> {
(((self.n_pathseg + 31) / 32) as u32, 1, 1),
);
cmd_buf.write_timestamp(&query_pool, 3);
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&self.backdrop_pipeline,
&self.backdrop_ds,
(((self.n_paths + 255) / 256) as u32, 1, 1),
);
cmd_buf.write_timestamp(&query_pool, 4);
// Note: this barrier is not needed as an actual dependency between
// pipeline stages, but I am keeping it in so that timer queries are
// easier to interpret.
@ -342,21 +362,21 @@ impl<D: Device> Renderer<D> {
&self.bin_ds,
(((self.n_paths + 255) / 256) as u32, 1, 1),
);
cmd_buf.write_timestamp(&query_pool, 4);
cmd_buf.write_timestamp(&query_pool, 5);
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&self.coarse_pipeline,
&self.coarse_ds,
(WIDTH as u32 / 256, HEIGHT as u32 / 256, 1),
);
cmd_buf.write_timestamp(&query_pool, 5);
cmd_buf.write_timestamp(&query_pool, 6);
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&self.k4_pipeline,
&self.k4_ds,
((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
);
cmd_buf.write_timestamp(&query_pool, 6);
cmd_buf.write_timestamp(&query_pool, 7);
cmd_buf.memory_barrier();
cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc);
}

View file

@ -49,8 +49,8 @@ impl PicoSvg {
for item in &self.items {
match item {
Item::Fill(fill_item) => {
//rc.fill(&fill_item.path, &fill_item.color);
rc.stroke(&fill_item.path, &fill_item.color, 1.0);
rc.fill(&fill_item.path, &fill_item.color);
//rc.stroke(&fill_item.path, &fill_item.color, 1.0);
}
Item::Stroke(stroke_item) => {
rc.stroke(&stroke_item.path, &stroke_item.color, stroke_item.width);