mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-09 20:31:29 +11:00
Rework right_edge computation in elements
Trying to fit it into the fancy monad doesn't really work, so use a more straightforward approach to compute it from the aggregate. Also add yEdge logic (basically copying piet-metal). With a fix to ELEMENT_BINNING_RATIO (which I had simply gotten wrong), the example renders almost correctly, with small bounding box artifacts.
This commit is contained in:
parent
ed4ed30708
commit
a616b4d010
|
@ -8,7 +8,6 @@ piet_gpu! {
|
|||
translate: [f32; 2],
|
||||
bbox: [f32; 4],
|
||||
linewidth: f32,
|
||||
right_edge: f32,
|
||||
flags: u32,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -50,10 +50,10 @@ shared uint sh_chunk_jump[N_TILE];
|
|||
|
||||
shared float sh_right_edge[N_TILE];
|
||||
|
||||
#define StateBuf_stride (4 + 2 * State_size)
|
||||
#define StateBuf_stride (8 + 2 * State_size)
|
||||
|
||||
StateRef state_aggregate_ref(uint partition_ix) {
|
||||
return StateRef(8 + partition_ix * StateBuf_stride);
|
||||
uint state_right_edge_index(uint partition_ix) {
|
||||
return 2 + partition_ix * (StateBuf_stride / 4);
|
||||
}
|
||||
|
||||
void main() {
|
||||
|
@ -120,8 +120,7 @@ void main() {
|
|||
// look-forward is small (performance may degrade in the case
|
||||
// of massively complex paths).
|
||||
do {
|
||||
StateRef agg_ref = state_aggregate_ref(aggregate_ix);
|
||||
my_right_edge = State_read(agg_ref).right_edge;
|
||||
my_right_edge = uintBitsToFloat(state[state_right_edge_index(aggregate_ix)]);
|
||||
aggregate_ix++;
|
||||
} while (isinf(my_right_edge));
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -12,7 +12,7 @@ build image.spv: glsl image.comp | scene.h
|
|||
|
||||
build elements.spv: glsl elements.comp | scene.h state.h annotated.h
|
||||
|
||||
build binning.spv: glsl binning.comp | annotated.h bins.h setup.h
|
||||
build binning.spv: glsl binning.comp | annotated.h state.h bins.h setup.h
|
||||
|
||||
build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
|
||||
|
||||
|
|
|
@ -310,6 +310,30 @@ void main() {
|
|||
|
||||
switch (tag) {
|
||||
case Annotated_FillLine:
|
||||
AnnoFillLineSeg fill_line = Annotated_FillLine_read(ref);
|
||||
// This is basically the same logic as piet-metal, but should be made numerically robust.
|
||||
vec2 tile_xy = vec2(tile_x * TILE_WIDTH_PX, tile_y * TILE_HEIGHT_PX);
|
||||
float yEdge = mix(fill_line.p0.y, fill_line.p1.y, (tile_xy.x - fill_line.p0.x) / (fill_line.p1.x - fill_line.p0.x));
|
||||
if (min(fill_line.p0.x, fill_line.p1.x) < tile_xy.x && yEdge >= tile_xy.y && yEdge < tile_xy.y + TILE_HEIGHT_PX) {
|
||||
Segment edge_seg;
|
||||
if (fill_line.p0.x > fill_line.p1.x) {
|
||||
fill_line.p1 = vec2(tile_xy.x, yEdge);
|
||||
edge_seg.start = fill_line.p1;
|
||||
edge_seg.end = vec2(tile_xy.x, tile_xy.y + TILE_HEIGHT_PX);
|
||||
} else {
|
||||
fill_line.p0 = vec2(tile_xy.x, yEdge);
|
||||
edge_seg.start = vec2(tile_xy.x, tile_xy.y + TILE_HEIGHT_PX);
|
||||
edge_seg.end = fill_line.p0;
|
||||
}
|
||||
alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit);
|
||||
Segment_write(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs), edge_seg);
|
||||
chunk_n_segs++;
|
||||
}
|
||||
Segment fill_seg = Segment(fill_line.p0, fill_line.p1);
|
||||
alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit);
|
||||
Segment_write(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs), fill_seg);
|
||||
chunk_n_segs++;
|
||||
break;
|
||||
case Annotated_StrokeLine:
|
||||
AnnoStrokeLineSeg line = Annotated_StrokeLine_read(ref);
|
||||
Segment seg = Segment(line.p0, line.p1);
|
||||
|
|
Binary file not shown.
|
@ -34,14 +34,14 @@ layout(set = 0, binding = 2) buffer AnnotatedBuf {
|
|||
#include "state.h"
|
||||
#include "annotated.h"
|
||||
|
||||
#define StateBuf_stride (4 + 2 * State_size)
|
||||
#define StateBuf_stride (8 + 2 * State_size)
|
||||
|
||||
StateRef state_aggregate_ref(uint partition_ix) {
|
||||
return StateRef(8 + partition_ix * StateBuf_stride);
|
||||
return StateRef(12 + partition_ix * StateBuf_stride);
|
||||
}
|
||||
|
||||
StateRef state_prefix_ref(uint partition_ix) {
|
||||
return StateRef(8 + partition_ix * StateBuf_stride + State_size);
|
||||
return StateRef(12 + partition_ix * StateBuf_stride + State_size);
|
||||
}
|
||||
|
||||
uint state_flag_index(uint partition_ix) {
|
||||
|
@ -81,13 +81,12 @@ State combine_state(State a, State b) {
|
|||
c.translate.x = a.mat.x * b.translate.x + a.mat.z * b.translate.y + a.translate.x;
|
||||
c.translate.y = a.mat.y * b.translate.x + a.mat.w * b.translate.y + a.translate.y;
|
||||
c.linewidth = (b.flags & FLAG_SET_LINEWIDTH) == 0 ? a.linewidth : b.linewidth;
|
||||
c.right_edge = (a.flags & FLAG_SET_BBOX) != 0 ? a.right_edge : (a.flags & FLAG_RESET_BBOX) != 0 ? a.bbox.z : c.right_edge;
|
||||
c.flags = (a.flags & (FLAG_SET_LINEWIDTH | FLAG_SET_BBOX)) | b.flags;
|
||||
c.flags |= (a.flags & FLAG_RESET_BBOX) >> 1;
|
||||
return c;
|
||||
}
|
||||
|
||||
State map_element(ElementRef ref) {
|
||||
State map_element(ElementRef ref, inout bool is_fill) {
|
||||
// TODO: it would *probably* be more efficient to make the memory read patterns less
|
||||
// divergent, though it would be more wasted memory.
|
||||
uint tag = Element_tag(ref);
|
||||
|
@ -97,6 +96,7 @@ State map_element(ElementRef ref) {
|
|||
c.translate = vec2(0.0, 0.0);
|
||||
c.linewidth = 1.0; // TODO should be 0.0
|
||||
c.flags = 0;
|
||||
is_fill = false;
|
||||
switch (tag) {
|
||||
case Element_FillLine:
|
||||
case Element_StrokeLine:
|
||||
|
@ -115,6 +115,8 @@ State map_element(ElementRef ref) {
|
|||
c.bbox.zw = max(max(cubic.p0, cubic.p1), max(cubic.p2, cubic.p3));
|
||||
break;
|
||||
case Element_Fill:
|
||||
is_fill = true;
|
||||
// fall-through
|
||||
case Element_Stroke:
|
||||
c.flags = FLAG_RESET_BBOX;
|
||||
break;
|
||||
|
@ -145,9 +147,10 @@ shared vec4 sh_mat[WG_SIZE];
|
|||
shared vec2 sh_translate[WG_SIZE];
|
||||
shared vec4 sh_bbox[WG_SIZE];
|
||||
shared float sh_width[WG_SIZE];
|
||||
shared float sh_right_edge[WG_SIZE];
|
||||
shared uint sh_flags[WG_SIZE];
|
||||
|
||||
shared uint sh_min_fill;
|
||||
|
||||
shared uint sh_tile_ix;
|
||||
shared State sh_prefix;
|
||||
|
||||
|
@ -157,6 +160,7 @@ void main() {
|
|||
// 4.4 of prefix sum paper).
|
||||
if (gl_LocalInvocationID.x == 0) {
|
||||
sh_tile_ix = atomicAdd(state[0], 1);
|
||||
sh_min_fill = ~0;
|
||||
}
|
||||
barrier();
|
||||
uint tile_ix = sh_tile_ix;
|
||||
|
@ -164,18 +168,24 @@ void main() {
|
|||
uint ix = tile_ix * PARTITION_SIZE + gl_LocalInvocationID.x * N_ROWS;
|
||||
ElementRef ref = ElementRef(ix * Element_size);
|
||||
|
||||
th_state[0] = map_element(ref);
|
||||
bool is_fill;
|
||||
uint my_min_fill = ~0;
|
||||
th_state[0] = map_element(ref, is_fill);
|
||||
if (is_fill) my_min_fill = ix;
|
||||
for (uint i = 1; i < N_ROWS; i++) {
|
||||
// discussion question: would it be faster to load using more coherent patterns
|
||||
// into thread memory? This is kinda strided.
|
||||
th_state[i] = combine_state(th_state[i - 1], map_element(Element_index(ref, i)));
|
||||
th_state[i] = combine_state(th_state[i - 1], map_element(Element_index(ref, i), is_fill));
|
||||
if (is_fill && my_min_fill == ~0) {
|
||||
my_min_fill = ix + i;
|
||||
}
|
||||
}
|
||||
atomicMin(sh_min_fill, my_min_fill);
|
||||
State agg = th_state[N_ROWS - 1];
|
||||
sh_mat[gl_LocalInvocationID.x] = agg.mat;
|
||||
sh_translate[gl_LocalInvocationID.x] = agg.translate;
|
||||
sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
|
||||
sh_width[gl_LocalInvocationID.x] = agg.linewidth;
|
||||
sh_right_edge[gl_LocalInvocationID.x] = agg.right_edge;
|
||||
sh_flags[gl_LocalInvocationID.x] = agg.flags;
|
||||
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||
barrier();
|
||||
|
@ -194,7 +204,6 @@ void main() {
|
|||
sh_translate[gl_LocalInvocationID.x] = agg.translate;
|
||||
sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
|
||||
sh_width[gl_LocalInvocationID.x] = agg.linewidth;
|
||||
sh_right_edge[gl_LocalInvocationID.x] = agg.right_edge;
|
||||
sh_flags[gl_LocalInvocationID.x] = agg.flags;
|
||||
}
|
||||
|
||||
|
@ -203,7 +212,6 @@ void main() {
|
|||
exclusive.mat = vec4(1.0, 0.0, 0.0, 1.0);
|
||||
exclusive.translate = vec2(0.0, 0.0);
|
||||
exclusive.linewidth = 1.0; //TODO should be 0.0
|
||||
exclusive.right_edge = 0.0;
|
||||
exclusive.flags = 0;
|
||||
|
||||
// Publish aggregate for this partition
|
||||
|
@ -244,6 +252,7 @@ void main() {
|
|||
}
|
||||
}
|
||||
barrier();
|
||||
my_min_fill = sh_min_fill;
|
||||
if (tile_ix != 0) {
|
||||
exclusive = sh_prefix;
|
||||
}
|
||||
|
@ -256,12 +265,17 @@ void main() {
|
|||
other.translate = sh_translate[ix];
|
||||
other.bbox = sh_bbox[ix];
|
||||
other.linewidth = sh_width[ix];
|
||||
other.right_edge = sh_right_edge[ix];
|
||||
other.flags = sh_flags[ix];
|
||||
row = combine_state(row, other);
|
||||
}
|
||||
if (my_min_fill == ~0 && gl_LocalInvocationID.x == 0) {
|
||||
state[state_flag_index(tile_ix) + 1] = 0x7f800000; // infinity
|
||||
}
|
||||
for (uint i = 0; i < N_ROWS; i++) {
|
||||
State st = combine_state(row, th_state[i]);
|
||||
if (my_min_fill == ix + i) {
|
||||
state[state_flag_index(tile_ix) + 1] = floatBitsToUint(st.bbox.z);
|
||||
}
|
||||
// We write the state now for development purposes, but the
|
||||
// actual goal is to write transformed and annotated elements.
|
||||
//State_write(StateRef((ix + i) * State_size), st);
|
||||
|
|
Binary file not shown.
|
@ -58,7 +58,7 @@
|
|||
|
||||
// This is the ratio of the number of elements in a binning workgroup
|
||||
// over the number of elements in a partition workgroup.
|
||||
#define ELEMENT_BINNING_RATIO 4
|
||||
#define ELEMENT_BINNING_RATIO 2
|
||||
|
||||
#define BIN_INITIAL_ALLOC 64
|
||||
#define BIN_ALLOC 256
|
||||
|
|
|
@ -9,11 +9,10 @@ struct State {
|
|||
vec2 translate;
|
||||
vec4 bbox;
|
||||
float linewidth;
|
||||
float right_edge;
|
||||
uint flags;
|
||||
};
|
||||
|
||||
#define State_size 52
|
||||
#define State_size 48
|
||||
|
||||
StateRef State_index(StateRef ref, uint index) {
|
||||
return StateRef(ref.offset + index * State_size);
|
||||
|
@ -33,14 +32,12 @@ State State_read(StateRef ref) {
|
|||
uint raw9 = state[ix + 9];
|
||||
uint raw10 = state[ix + 10];
|
||||
uint raw11 = state[ix + 11];
|
||||
uint raw12 = state[ix + 12];
|
||||
State s;
|
||||
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||
s.bbox = vec4(uintBitsToFloat(raw6), uintBitsToFloat(raw7), uintBitsToFloat(raw8), uintBitsToFloat(raw9));
|
||||
s.linewidth = uintBitsToFloat(raw10);
|
||||
s.right_edge = uintBitsToFloat(raw11);
|
||||
s.flags = raw12;
|
||||
s.flags = raw11;
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -57,7 +54,6 @@ void State_write(StateRef ref, State s) {
|
|||
state[ix + 8] = floatBitsToUint(s.bbox.z);
|
||||
state[ix + 9] = floatBitsToUint(s.bbox.w);
|
||||
state[ix + 10] = floatBitsToUint(s.linewidth);
|
||||
state[ix + 11] = floatBitsToUint(s.right_edge);
|
||||
state[ix + 12] = s.flags;
|
||||
state[ix + 11] = s.flags;
|
||||
}
|
||||
|
||||
|
|
|
@ -46,8 +46,8 @@ pub fn render_scene(rc: &mut impl RenderContext) {
|
|||
let circle = Circle::new(center, radius);
|
||||
rc.fill(circle, &color);
|
||||
}
|
||||
/*
|
||||
let mut path = BezPath::new();
|
||||
/*
|
||||
path.move_to((100.0, 1150.0));
|
||||
path.line_to((200.0, 1200.0));
|
||||
path.line_to((150.0, 1250.0));
|
||||
|
|
|
@ -215,6 +215,7 @@ impl PietGpuRenderContext {
|
|||
match el {
|
||||
PathEl::MoveTo(p) => {
|
||||
let scene_pt = to_f32_2(p);
|
||||
start_pt = Some(scene_pt);
|
||||
last_pt = Some(scene_pt);
|
||||
}
|
||||
PathEl::LineTo(p) => {
|
||||
|
@ -228,11 +229,13 @@ impl PietGpuRenderContext {
|
|||
}
|
||||
PathEl::ClosePath => {
|
||||
if let (Some(start), Some(last)) = (start_pt.take(), last_pt.take()) {
|
||||
let seg = LineSeg {
|
||||
p0: last,
|
||||
p1: start,
|
||||
};
|
||||
self.encode_line_seg(seg, is_fill);
|
||||
if last != start {
|
||||
let seg = LineSeg {
|
||||
p0: last,
|
||||
p1: start,
|
||||
};
|
||||
self.encode_line_seg(seg, is_fill);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
|
@ -246,6 +249,7 @@ impl PietGpuRenderContext {
|
|||
match el {
|
||||
PathEl::MoveTo(p) => {
|
||||
let scene_pt = to_f32_2(p);
|
||||
start_pt = Some(scene_pt);
|
||||
last_pt = Some(scene_pt);
|
||||
}
|
||||
PathEl::LineTo(p) => {
|
||||
|
@ -283,11 +287,13 @@ impl PietGpuRenderContext {
|
|||
}
|
||||
PathEl::ClosePath => {
|
||||
if let (Some(start), Some(last)) = (start_pt.take(), last_pt.take()) {
|
||||
let seg = LineSeg {
|
||||
p0: last,
|
||||
p1: start,
|
||||
};
|
||||
self.encode_line_seg(seg, is_fill);
|
||||
if last != start {
|
||||
let seg = LineSeg {
|
||||
p0: last,
|
||||
p1: start,
|
||||
};
|
||||
self.encode_line_seg(seg, is_fill);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue