Rework right_edge computation in elements

Trying to fit it into the fancy monad doesn't really work, so use a
more straightforward approach to compute it from the aggregate.

Also add yEdge logic (basically copying piet-metal). With a fix to
ELEMENT_BINNING_RATIO (which I had simply gotten wrong), the example
renders almost correctly, with small bounding box artifacts.
This commit is contained in:
Raph Levien 2020-05-20 16:36:09 -07:00
parent ed4ed30708
commit a616b4d010
12 changed files with 76 additions and 38 deletions

View file

@ -8,7 +8,6 @@ piet_gpu! {
translate: [f32; 2],
bbox: [f32; 4],
linewidth: f32,
right_edge: f32,
flags: u32,
}
}

View file

@ -50,10 +50,10 @@ shared uint sh_chunk_jump[N_TILE];
shared float sh_right_edge[N_TILE];
#define StateBuf_stride (4 + 2 * State_size)
#define StateBuf_stride (8 + 2 * State_size)
StateRef state_aggregate_ref(uint partition_ix) {
return StateRef(8 + partition_ix * StateBuf_stride);
uint state_right_edge_index(uint partition_ix) {
return 2 + partition_ix * (StateBuf_stride / 4);
}
void main() {
@ -120,8 +120,7 @@ void main() {
// look-forward is small (performance may degrade in the case
// of massively complex paths).
do {
StateRef agg_ref = state_aggregate_ref(aggregate_ix);
my_right_edge = State_read(agg_ref).right_edge;
my_right_edge = uintBitsToFloat(state[state_right_edge_index(aggregate_ix)]);
aggregate_ix++;
} while (isinf(my_right_edge));
}

Binary file not shown.

View file

@ -12,7 +12,7 @@ build image.spv: glsl image.comp | scene.h
build elements.spv: glsl elements.comp | scene.h state.h annotated.h
build binning.spv: glsl binning.comp | annotated.h bins.h setup.h
build binning.spv: glsl binning.comp | annotated.h state.h bins.h setup.h
build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h

View file

@ -310,6 +310,30 @@ void main() {
switch (tag) {
case Annotated_FillLine:
AnnoFillLineSeg fill_line = Annotated_FillLine_read(ref);
// This is basically the same logic as piet-metal, but should be made numerically robust.
vec2 tile_xy = vec2(tile_x * TILE_WIDTH_PX, tile_y * TILE_HEIGHT_PX);
float yEdge = mix(fill_line.p0.y, fill_line.p1.y, (tile_xy.x - fill_line.p0.x) / (fill_line.p1.x - fill_line.p0.x));
if (min(fill_line.p0.x, fill_line.p1.x) < tile_xy.x && yEdge >= tile_xy.y && yEdge < tile_xy.y + TILE_HEIGHT_PX) {
Segment edge_seg;
if (fill_line.p0.x > fill_line.p1.x) {
fill_line.p1 = vec2(tile_xy.x, yEdge);
edge_seg.start = fill_line.p1;
edge_seg.end = vec2(tile_xy.x, tile_xy.y + TILE_HEIGHT_PX);
} else {
fill_line.p0 = vec2(tile_xy.x, yEdge);
edge_seg.start = vec2(tile_xy.x, tile_xy.y + TILE_HEIGHT_PX);
edge_seg.end = fill_line.p0;
}
alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit);
Segment_write(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs), edge_seg);
chunk_n_segs++;
}
Segment fill_seg = Segment(fill_line.p0, fill_line.p1);
alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit);
Segment_write(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs), fill_seg);
chunk_n_segs++;
break;
case Annotated_StrokeLine:
AnnoStrokeLineSeg line = Annotated_StrokeLine_read(ref);
Segment seg = Segment(line.p0, line.p1);

Binary file not shown.

View file

@ -34,14 +34,14 @@ layout(set = 0, binding = 2) buffer AnnotatedBuf {
#include "state.h"
#include "annotated.h"
#define StateBuf_stride (4 + 2 * State_size)
#define StateBuf_stride (8 + 2 * State_size)
StateRef state_aggregate_ref(uint partition_ix) {
return StateRef(8 + partition_ix * StateBuf_stride);
return StateRef(12 + partition_ix * StateBuf_stride);
}
StateRef state_prefix_ref(uint partition_ix) {
return StateRef(8 + partition_ix * StateBuf_stride + State_size);
return StateRef(12 + partition_ix * StateBuf_stride + State_size);
}
uint state_flag_index(uint partition_ix) {
@ -81,13 +81,12 @@ State combine_state(State a, State b) {
c.translate.x = a.mat.x * b.translate.x + a.mat.z * b.translate.y + a.translate.x;
c.translate.y = a.mat.y * b.translate.x + a.mat.w * b.translate.y + a.translate.y;
c.linewidth = (b.flags & FLAG_SET_LINEWIDTH) == 0 ? a.linewidth : b.linewidth;
c.right_edge = (a.flags & FLAG_SET_BBOX) != 0 ? a.right_edge : (a.flags & FLAG_RESET_BBOX) != 0 ? a.bbox.z : c.right_edge;
c.flags = (a.flags & (FLAG_SET_LINEWIDTH | FLAG_SET_BBOX)) | b.flags;
c.flags |= (a.flags & FLAG_RESET_BBOX) >> 1;
return c;
}
State map_element(ElementRef ref) {
State map_element(ElementRef ref, inout bool is_fill) {
// TODO: it would *probably* be more efficient to make the memory read patterns less
// divergent, though it would be more wasted memory.
uint tag = Element_tag(ref);
@ -97,6 +96,7 @@ State map_element(ElementRef ref) {
c.translate = vec2(0.0, 0.0);
c.linewidth = 1.0; // TODO should be 0.0
c.flags = 0;
is_fill = false;
switch (tag) {
case Element_FillLine:
case Element_StrokeLine:
@ -115,6 +115,8 @@ State map_element(ElementRef ref) {
c.bbox.zw = max(max(cubic.p0, cubic.p1), max(cubic.p2, cubic.p3));
break;
case Element_Fill:
is_fill = true;
// fall-through
case Element_Stroke:
c.flags = FLAG_RESET_BBOX;
break;
@ -145,9 +147,10 @@ shared vec4 sh_mat[WG_SIZE];
shared vec2 sh_translate[WG_SIZE];
shared vec4 sh_bbox[WG_SIZE];
shared float sh_width[WG_SIZE];
shared float sh_right_edge[WG_SIZE];
shared uint sh_flags[WG_SIZE];
shared uint sh_min_fill;
shared uint sh_tile_ix;
shared State sh_prefix;
@ -157,6 +160,7 @@ void main() {
// 4.4 of prefix sum paper).
if (gl_LocalInvocationID.x == 0) {
sh_tile_ix = atomicAdd(state[0], 1);
sh_min_fill = ~0;
}
barrier();
uint tile_ix = sh_tile_ix;
@ -164,18 +168,24 @@ void main() {
uint ix = tile_ix * PARTITION_SIZE + gl_LocalInvocationID.x * N_ROWS;
ElementRef ref = ElementRef(ix * Element_size);
th_state[0] = map_element(ref);
bool is_fill;
uint my_min_fill = ~0;
th_state[0] = map_element(ref, is_fill);
if (is_fill) my_min_fill = ix;
for (uint i = 1; i < N_ROWS; i++) {
// discussion question: would it be faster to load using more coherent patterns
// into thread memory? This is kinda strided.
th_state[i] = combine_state(th_state[i - 1], map_element(Element_index(ref, i)));
th_state[i] = combine_state(th_state[i - 1], map_element(Element_index(ref, i), is_fill));
if (is_fill && my_min_fill == ~0) {
my_min_fill = ix + i;
}
}
atomicMin(sh_min_fill, my_min_fill);
State agg = th_state[N_ROWS - 1];
sh_mat[gl_LocalInvocationID.x] = agg.mat;
sh_translate[gl_LocalInvocationID.x] = agg.translate;
sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
sh_width[gl_LocalInvocationID.x] = agg.linewidth;
sh_right_edge[gl_LocalInvocationID.x] = agg.right_edge;
sh_flags[gl_LocalInvocationID.x] = agg.flags;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
@ -194,7 +204,6 @@ void main() {
sh_translate[gl_LocalInvocationID.x] = agg.translate;
sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
sh_width[gl_LocalInvocationID.x] = agg.linewidth;
sh_right_edge[gl_LocalInvocationID.x] = agg.right_edge;
sh_flags[gl_LocalInvocationID.x] = agg.flags;
}
@ -203,7 +212,6 @@ void main() {
exclusive.mat = vec4(1.0, 0.0, 0.0, 1.0);
exclusive.translate = vec2(0.0, 0.0);
exclusive.linewidth = 1.0; //TODO should be 0.0
exclusive.right_edge = 0.0;
exclusive.flags = 0;
// Publish aggregate for this partition
@ -244,6 +252,7 @@ void main() {
}
}
barrier();
my_min_fill = sh_min_fill;
if (tile_ix != 0) {
exclusive = sh_prefix;
}
@ -256,12 +265,17 @@ void main() {
other.translate = sh_translate[ix];
other.bbox = sh_bbox[ix];
other.linewidth = sh_width[ix];
other.right_edge = sh_right_edge[ix];
other.flags = sh_flags[ix];
row = combine_state(row, other);
}
if (my_min_fill == ~0 && gl_LocalInvocationID.x == 0) {
state[state_flag_index(tile_ix) + 1] = 0x7f800000; // infinity
}
for (uint i = 0; i < N_ROWS; i++) {
State st = combine_state(row, th_state[i]);
if (my_min_fill == ix + i) {
state[state_flag_index(tile_ix) + 1] = floatBitsToUint(st.bbox.z);
}
// We write the state now for development purposes, but the
// actual goal is to write transformed and annotated elements.
//State_write(StateRef((ix + i) * State_size), st);

Binary file not shown.

View file

@ -58,7 +58,7 @@
// This is the ratio of the number of elements in a binning workgroup
// over the number of elements in a partition workgroup.
#define ELEMENT_BINNING_RATIO 4
#define ELEMENT_BINNING_RATIO 2
#define BIN_INITIAL_ALLOC 64
#define BIN_ALLOC 256

View file

@ -9,11 +9,10 @@ struct State {
vec2 translate;
vec4 bbox;
float linewidth;
float right_edge;
uint flags;
};
#define State_size 52
#define State_size 48
StateRef State_index(StateRef ref, uint index) {
return StateRef(ref.offset + index * State_size);
@ -33,14 +32,12 @@ State State_read(StateRef ref) {
uint raw9 = state[ix + 9];
uint raw10 = state[ix + 10];
uint raw11 = state[ix + 11];
uint raw12 = state[ix + 12];
State s;
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
s.bbox = vec4(uintBitsToFloat(raw6), uintBitsToFloat(raw7), uintBitsToFloat(raw8), uintBitsToFloat(raw9));
s.linewidth = uintBitsToFloat(raw10);
s.right_edge = uintBitsToFloat(raw11);
s.flags = raw12;
s.flags = raw11;
return s;
}
@ -57,7 +54,6 @@ void State_write(StateRef ref, State s) {
state[ix + 8] = floatBitsToUint(s.bbox.z);
state[ix + 9] = floatBitsToUint(s.bbox.w);
state[ix + 10] = floatBitsToUint(s.linewidth);
state[ix + 11] = floatBitsToUint(s.right_edge);
state[ix + 12] = s.flags;
state[ix + 11] = s.flags;
}

View file

@ -46,8 +46,8 @@ pub fn render_scene(rc: &mut impl RenderContext) {
let circle = Circle::new(center, radius);
rc.fill(circle, &color);
}
/*
let mut path = BezPath::new();
/*
path.move_to((100.0, 1150.0));
path.line_to((200.0, 1200.0));
path.line_to((150.0, 1250.0));

View file

@ -215,6 +215,7 @@ impl PietGpuRenderContext {
match el {
PathEl::MoveTo(p) => {
let scene_pt = to_f32_2(p);
start_pt = Some(scene_pt);
last_pt = Some(scene_pt);
}
PathEl::LineTo(p) => {
@ -228,11 +229,13 @@ impl PietGpuRenderContext {
}
PathEl::ClosePath => {
if let (Some(start), Some(last)) = (start_pt.take(), last_pt.take()) {
let seg = LineSeg {
p0: last,
p1: start,
};
self.encode_line_seg(seg, is_fill);
if last != start {
let seg = LineSeg {
p0: last,
p1: start,
};
self.encode_line_seg(seg, is_fill);
}
}
}
_ => (),
@ -246,6 +249,7 @@ impl PietGpuRenderContext {
match el {
PathEl::MoveTo(p) => {
let scene_pt = to_f32_2(p);
start_pt = Some(scene_pt);
last_pt = Some(scene_pt);
}
PathEl::LineTo(p) => {
@ -283,11 +287,13 @@ impl PietGpuRenderContext {
}
PathEl::ClosePath => {
if let (Some(start), Some(last)) = (start_pt.take(), last_pt.take()) {
let seg = LineSeg {
p0: last,
p1: start,
};
self.encode_line_seg(seg, is_fill);
if last != start {
let seg = LineSeg {
p0: last,
p1: start,
};
self.encode_line_seg(seg, is_fill);
}
}
}
}