mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 04:31:30 +11:00
Merge branch 'master' into image_work
This commit is contained in:
commit
634530fb91
|
@ -13,8 +13,8 @@ piet_gpu! {
|
|||
}
|
||||
// Segments within a tile are represented as a linked list.
|
||||
struct TileSeg {
|
||||
start: [f32; 2],
|
||||
end: [f32; 2],
|
||||
origin: [f32; 2],
|
||||
vector: [f32; 2],
|
||||
y_edge: f32,
|
||||
next: Ref<TileSeg>,
|
||||
}
|
||||
|
|
|
@ -84,11 +84,22 @@ void main() {
|
|||
// Coordinates of top left of bin, in tiles.
|
||||
uint bin_tile_x = N_TILE_X * gl_WorkGroupID.x;
|
||||
uint bin_tile_y = N_TILE_Y * gl_WorkGroupID.y;
|
||||
|
||||
// Per-tile state
|
||||
uint tile_x = gl_LocalInvocationID.x % N_TILE_X;
|
||||
uint tile_y = gl_LocalInvocationID.x / N_TILE_X;
|
||||
uint this_tile_ix = (bin_tile_y + tile_y) * WIDTH_IN_TILES + bin_tile_x + tile_x;
|
||||
CmdRef cmd_ref = CmdRef(this_tile_ix * PTCL_INITIAL_ALLOC);
|
||||
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
||||
// The nesting depth of the clip stack
|
||||
uint clip_depth = 0;
|
||||
// State for the "clip zero" optimization. If it's nonzero, then we are
|
||||
// currently in a clip for which the entire tile has an alpha of zero, and
|
||||
// the value is the depth after the "begin clip" of that element.
|
||||
uint clip_zero_depth = 0;
|
||||
// State for the "clip one" optimization. If bit `i` is set, then that means
|
||||
// that the clip pushed at depth `i` has an alpha of all one.
|
||||
uint clip_one_mask = 0;
|
||||
|
||||
// I'm sure we can figure out how to do this with at least one fewer register...
|
||||
// Items up to rd_ix have been read from sh_elements
|
||||
|
@ -98,6 +109,7 @@ void main() {
|
|||
// Items between part_start_ix and ready_ix are ready to be transferred from sh_part_elements
|
||||
uint part_start_ix = 0;
|
||||
uint ready_ix = 0;
|
||||
|
||||
while (true) {
|
||||
for (uint i = 0; i < N_SLICE; i++) {
|
||||
sh_bitmaps[i][th_ix] = 0;
|
||||
|
@ -270,56 +282,84 @@ void main() {
|
|||
ref = AnnotatedRef(element_ix * Annotated_size);
|
||||
tag = Annotated_tag(ref);
|
||||
|
||||
switch (tag) {
|
||||
case Annotated_Fill:
|
||||
Tile tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
|
||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||
AnnoFill fill = Annotated_Fill_read(ref);
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
if (tile.tile.offset != 0) {
|
||||
CmdFill cmd_fill;
|
||||
cmd_fill.tile_ref = tile.tile.offset;
|
||||
cmd_fill.backdrop = tile.backdrop;
|
||||
cmd_fill.rgba_color = fill.rgba_color;
|
||||
Cmd_Fill_write(cmd_ref, cmd_fill);
|
||||
} else {
|
||||
Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
|
||||
if (clip_zero_depth == 0) {
|
||||
switch (tag) {
|
||||
case Annotated_Fill:
|
||||
Tile tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
|
||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||
AnnoFill fill = Annotated_Fill_read(ref);
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
if (tile.tile.offset != 0) {
|
||||
CmdFill cmd_fill;
|
||||
cmd_fill.tile_ref = tile.tile.offset;
|
||||
cmd_fill.backdrop = tile.backdrop;
|
||||
cmd_fill.rgba_color = fill.rgba_color;
|
||||
Cmd_Fill_write(cmd_ref, cmd_fill);
|
||||
} else {
|
||||
Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
|
||||
}
|
||||
cmd_ref.offset += Cmd_size;
|
||||
break;
|
||||
case Annotated_BeginClip:
|
||||
tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
|
||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||
if (tile.tile.offset == 0 && tile.backdrop == 0) {
|
||||
clip_zero_depth = clip_depth + 1;
|
||||
} else if (tile.tile.offset == 0 && clip_depth < 32) {
|
||||
clip_one_mask |= (1 << clip_depth);
|
||||
} else {
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
if (tile.tile.offset != 0) {
|
||||
CmdBeginClip cmd_begin_clip;
|
||||
cmd_begin_clip.tile_ref = tile.tile.offset;
|
||||
cmd_begin_clip.backdrop = tile.backdrop;
|
||||
Cmd_BeginClip_write(cmd_ref, cmd_begin_clip);
|
||||
} else {
|
||||
// TODO: here is where a bunch of optimization magic should happen
|
||||
float alpha = tile.backdrop == 0 ? 0.0 : 1.0;
|
||||
Cmd_BeginSolidClip_write(cmd_ref, CmdBeginSolidClip(alpha));
|
||||
}
|
||||
cmd_ref.offset += Cmd_size;
|
||||
if (clip_depth < 32) {
|
||||
clip_one_mask &= ~(1 << clip_depth);
|
||||
}
|
||||
}
|
||||
clip_depth++;
|
||||
break;
|
||||
case Annotated_EndClip:
|
||||
clip_depth--;
|
||||
if (clip_depth >= 32 || (clip_one_mask & (1 << clip_depth)) == 0) {
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_EndClip_write(cmd_ref, CmdEndClip(1.0));
|
||||
cmd_ref.offset += Cmd_size;
|
||||
}
|
||||
break;
|
||||
case Annotated_Stroke:
|
||||
tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
|
||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||
AnnoStroke stroke = Annotated_Stroke_read(ref);
|
||||
CmdStroke cmd_stroke;
|
||||
cmd_stroke.tile_ref = tile.tile.offset;
|
||||
cmd_stroke.half_width = 0.5 * stroke.linewidth;
|
||||
cmd_stroke.rgba_color = stroke.rgba_color;
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Stroke_write(cmd_ref, cmd_stroke);
|
||||
cmd_ref.offset += Cmd_size;
|
||||
break;
|
||||
}
|
||||
cmd_ref.offset += Cmd_size;
|
||||
break;
|
||||
case Annotated_BeginClip:
|
||||
tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
|
||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
if (tile.tile.offset != 0) {
|
||||
CmdBeginClip cmd_begin_clip;
|
||||
cmd_begin_clip.tile_ref = tile.tile.offset;
|
||||
cmd_begin_clip.backdrop = tile.backdrop;
|
||||
Cmd_BeginClip_write(cmd_ref, cmd_begin_clip);
|
||||
} else {
|
||||
// TODO: here is where a bunch of optimization magic should happen
|
||||
float alpha = tile.backdrop == 0 ? 0.0 : 1.0;
|
||||
Cmd_BeginSolidClip_write(cmd_ref, CmdBeginSolidClip(alpha));
|
||||
} else {
|
||||
// In "clip zero" state, suppress all drawing
|
||||
switch (tag) {
|
||||
case Annotated_BeginClip:
|
||||
clip_depth++;
|
||||
break;
|
||||
case Annotated_EndClip:
|
||||
if (clip_depth == clip_zero_depth) {
|
||||
clip_zero_depth = 0;
|
||||
}
|
||||
clip_depth--;
|
||||
break;
|
||||
}
|
||||
cmd_ref.offset += Cmd_size;
|
||||
break;
|
||||
case Annotated_EndClip:
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_EndClip_write(cmd_ref, CmdEndClip(1.0));
|
||||
cmd_ref.offset += Cmd_size;
|
||||
break;
|
||||
case Annotated_Stroke:
|
||||
tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
|
||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||
AnnoStroke stroke = Annotated_Stroke_read(ref);
|
||||
CmdStroke cmd_stroke;
|
||||
cmd_stroke.tile_ref = tile.tile.offset;
|
||||
cmd_stroke.half_width = 0.5 * stroke.linewidth;
|
||||
cmd_stroke.rgba_color = stroke.rgba_color;
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Stroke_write(cmd_ref, cmd_stroke);
|
||||
cmd_ref.offset += Cmd_size;
|
||||
break;
|
||||
}
|
||||
}
|
||||
barrier();
|
||||
|
|
Binary file not shown.
|
@ -68,8 +68,8 @@ float[CHUNK] computeArea(vec2 xy, int backdrop, uint tile_ref) {
|
|||
TileSeg seg = TileSeg_read(tile_seg_ref);
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
vec2 my_xy = vec2(xy.x, xy.y + float(k * CHUNK_DY));
|
||||
vec2 start = seg.start - my_xy;
|
||||
vec2 end = seg.end - my_xy;
|
||||
vec2 start = seg.origin - my_xy;
|
||||
vec2 end = start + seg.vector;
|
||||
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
|
||||
if (window.x != window.y) {
|
||||
vec2 t = (window - start.y) / (end.y - start.y);
|
||||
|
@ -82,7 +82,7 @@ float[CHUNK] computeArea(vec2 xy, int backdrop, uint tile_ref) {
|
|||
float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
|
||||
area[k] += a * (window.x - window.y);
|
||||
}
|
||||
area[k] += sign(end.x - start.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0);
|
||||
area[k] += sign(seg.vector.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0);
|
||||
}
|
||||
tile_seg_ref = seg.next;
|
||||
} while (tile_seg_ref.offset != 0);
|
||||
|
@ -137,9 +137,9 @@ void main() {
|
|||
TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
|
||||
do {
|
||||
TileSeg seg = TileSeg_read(tile_seg_ref);
|
||||
vec2 line_vec = seg.end - seg.start;
|
||||
vec2 line_vec = seg.vector;
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
|
||||
vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin;
|
||||
dpos.y += float(k * CHUNK_DY);
|
||||
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
|
||||
df[k] = min(df[k], length(line_vec * t - dpos));
|
||||
|
|
Binary file not shown.
|
@ -101,12 +101,6 @@ void main() {
|
|||
if (element_ix < n_pathseg) {
|
||||
tag = PathSeg_tag(ref);
|
||||
}
|
||||
// Setup for coverage algorithm.
|
||||
float a, b, c;
|
||||
// Bounding box of element in pixel coordinates.
|
||||
float xmin, xmax, ymin, ymax;
|
||||
PathStrokeLine line;
|
||||
float dx;
|
||||
switch (tag) {
|
||||
case PathSeg_FillCubic:
|
||||
case PathSeg_StrokeCubic:
|
||||
|
@ -162,22 +156,24 @@ void main() {
|
|||
}
|
||||
|
||||
// Output line segment
|
||||
xmin = min(p0.x, p1.x) - cubic.stroke.x;
|
||||
xmax = max(p0.x, p1.x) + cubic.stroke.x;
|
||||
ymin = min(p0.y, p1.y) - cubic.stroke.y;
|
||||
ymax = max(p0.y, p1.y) + cubic.stroke.y;
|
||||
|
||||
// Bounding box of element in pixel coordinates.
|
||||
float xmin = min(p0.x, p1.x) - cubic.stroke.x;
|
||||
float xmax = max(p0.x, p1.x) + cubic.stroke.x;
|
||||
float ymin = min(p0.y, p1.y) - cubic.stroke.y;
|
||||
float ymax = max(p0.y, p1.y) + cubic.stroke.y;
|
||||
float dx = p1.x - p0.x;
|
||||
float dy = p1.y - p0.y;
|
||||
// Set up for per-scanline coverage formula, below.
|
||||
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
|
||||
c = (cubic.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + cubic.stroke.y)) * SX;
|
||||
b = invslope; // Note: assumes square tiles, otherwise scale.
|
||||
a = (p0.x - (p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
|
||||
float c = (cubic.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + cubic.stroke.y)) * SX;
|
||||
float b = invslope; // Note: assumes square tiles, otherwise scale.
|
||||
float a = (p0.x - (p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
|
||||
|
||||
int x0 = int(floor((xmin) * SX));
|
||||
int x1 = int(ceil((xmax) * SX));
|
||||
int y0 = int(floor((ymin) * SY));
|
||||
int y1 = int(ceil((ymax) * SY));
|
||||
int x0 = int(floor(xmin * SX));
|
||||
int x1 = int(floor(xmax * SX) + 1);
|
||||
int y0 = int(floor(ymin * SY));
|
||||
int y1 = int(floor(ymax * SY) + 1);
|
||||
|
||||
x0 = clamp(x0, bbox.x, bbox.z);
|
||||
y0 = clamp(y0, bbox.y, bbox.w);
|
||||
|
@ -191,36 +187,69 @@ void main() {
|
|||
// Consider using subgroups to aggregate atomic add.
|
||||
uint tile_offset = atomicAdd(alloc, n_tile_alloc * TileSeg_size);
|
||||
TileSeg tile_seg;
|
||||
|
||||
int xray = int(floor(p0.x*SX));
|
||||
int last_xray = int(floor(p1.x*SX));
|
||||
if (p0.y > p1.y) {
|
||||
int tmp = xray;
|
||||
xray = last_xray;
|
||||
last_xray = tmp;
|
||||
}
|
||||
for (int y = y0; y < y1; y++) {
|
||||
float tile_y0 = float(y * TILE_HEIGHT_PX);
|
||||
if (tag == PathSeg_FillCubic && min(p0.y, p1.y) <= tile_y0) {
|
||||
int xray = max(int(ceil(xc - 0.5 * b)), bbox.x);
|
||||
if (xray < bbox.z) {
|
||||
int backdrop = p1.y < p0.y ? 1 : -1;
|
||||
TileRef tile_ref = Tile_index(path.tiles, uint(base + xray));
|
||||
uint tile_el = tile_ref.offset >> 2;
|
||||
atomicAdd(tile[tile_el + 1], backdrop);
|
||||
}
|
||||
int xbackdrop = max(xray + 1, bbox.x);
|
||||
if (tag == PathSeg_FillCubic && y > y0 && xbackdrop < bbox.z) {
|
||||
int backdrop = p1.y < p0.y ? 1 : -1;
|
||||
TileRef tile_ref = Tile_index(path.tiles, uint(base + xbackdrop));
|
||||
uint tile_el = tile_ref.offset >> 2;
|
||||
atomicAdd(tile[tile_el + 1], backdrop);
|
||||
}
|
||||
|
||||
int xx0 = clamp(int(floor(xc - c)), x0, x1);
|
||||
int xx1 = clamp(int(ceil(xc + c)), x0, x1);
|
||||
xx1 = max(xx1, xray + 1);
|
||||
|
||||
// next_xray is the xray for the next scanline; it is derived
|
||||
// by left edge intersections computed below.
|
||||
int next_xray = xray;
|
||||
for (int x = xx0; x < xx1; x++) {
|
||||
float tile_x0 = float(x * TILE_WIDTH_PX);
|
||||
TileRef tile_ref = Tile_index(path.tiles, uint(base + x));
|
||||
uint tile_el = tile_ref.offset >> 2;
|
||||
uint old = atomicExchange(tile[tile_el], tile_offset);
|
||||
tile_seg.start = p0;
|
||||
tile_seg.end = p1;
|
||||
tile_seg.origin = p0;
|
||||
tile_seg.vector = p1 - p0;
|
||||
float y_edge = 0.0;
|
||||
if (tag == PathSeg_FillCubic) {
|
||||
float tile_y0 = float(y * TILE_HEIGHT_PX);
|
||||
y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx);
|
||||
if (min(p0.x, p1.x) < tile_x0 && y_edge >= tile_y0 && y_edge < tile_y0 + TILE_HEIGHT_PX) {
|
||||
// Left edge intersection.
|
||||
vec2 p = vec2(tile_x0, y_edge);
|
||||
if (p0.x > p1.x) {
|
||||
tile_seg.end = vec2(tile_x0, y_edge);
|
||||
tile_seg.vector = p - p0;
|
||||
} else {
|
||||
tile_seg.start = vec2(tile_x0, y_edge);
|
||||
tile_seg.origin = p;
|
||||
tile_seg.vector = p1 - p;
|
||||
}
|
||||
} else {
|
||||
// kernel4 uses sign(vector.x) for the sign of the intersection backdrop.
|
||||
// Nudge zeroes towards the intended sign.
|
||||
if (tile_seg.vector.x == 0) {
|
||||
tile_seg.vector.x += sign(p1.x - p0.x)*1e-9;
|
||||
}
|
||||
// Move next_xray consistently with previous intersections.
|
||||
if (x > next_xray && next_xray >= xray) {
|
||||
next_xray = x;
|
||||
} else if (x <= next_xray && next_xray <= xray) {
|
||||
next_xray = x - 1;
|
||||
}
|
||||
}
|
||||
// Force last xray on the last scanline for consistency with later
|
||||
// line segments.
|
||||
if (y == y1 - 1) {
|
||||
next_xray = last_xray;
|
||||
}
|
||||
// Drop inconsistent intersections.
|
||||
if (x <= min(xray, next_xray) || max(xray, next_xray) < x) {
|
||||
y_edge = 1e9;
|
||||
}
|
||||
}
|
||||
|
@ -231,6 +260,7 @@ void main() {
|
|||
}
|
||||
xc += b;
|
||||
base += stride;
|
||||
xray = next_xray;
|
||||
}
|
||||
|
||||
n_out += 1;
|
||||
|
|
Binary file not shown.
|
@ -35,8 +35,8 @@ TileRef Tile_index(TileRef ref, uint index) {
|
|||
}
|
||||
|
||||
struct TileSeg {
|
||||
vec2 start;
|
||||
vec2 end;
|
||||
vec2 origin;
|
||||
vec2 vector;
|
||||
float y_edge;
|
||||
TileSegRef next;
|
||||
};
|
||||
|
@ -90,8 +90,8 @@ TileSeg TileSeg_read(TileSegRef ref) {
|
|||
uint raw4 = tile[ix + 4];
|
||||
uint raw5 = tile[ix + 5];
|
||||
TileSeg s;
|
||||
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.origin = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.vector = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.y_edge = uintBitsToFloat(raw4);
|
||||
s.next = TileSegRef(raw5);
|
||||
return s;
|
||||
|
@ -99,10 +99,10 @@ TileSeg TileSeg_read(TileSegRef ref) {
|
|||
|
||||
void TileSeg_write(TileSegRef ref, TileSeg s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
tile[ix + 0] = floatBitsToUint(s.start.x);
|
||||
tile[ix + 1] = floatBitsToUint(s.start.y);
|
||||
tile[ix + 2] = floatBitsToUint(s.end.x);
|
||||
tile[ix + 3] = floatBitsToUint(s.end.y);
|
||||
tile[ix + 0] = floatBitsToUint(s.origin.x);
|
||||
tile[ix + 1] = floatBitsToUint(s.origin.y);
|
||||
tile[ix + 2] = floatBitsToUint(s.vector.x);
|
||||
tile[ix + 3] = floatBitsToUint(s.vector.y);
|
||||
tile[ix + 4] = floatBitsToUint(s.y_edge);
|
||||
tile[ix + 5] = s.next.offset;
|
||||
}
|
||||
|
|
|
@ -99,9 +99,11 @@ fn render_cardioid(rc: &mut impl RenderContext) {
|
|||
fn render_clip_test(rc: &mut impl RenderContext) {
|
||||
const N: usize = 16;
|
||||
const X0: f64 = 50.0;
|
||||
const Y0: f64 = 50.0;
|
||||
const X1: f64 = 100.0;
|
||||
const Y1: f64 = 100.0;
|
||||
const Y0: f64 = 450.0;
|
||||
// Note: if it gets much larger, it will exceed the 1MB scratch buffer.
|
||||
// But this is a pretty demanding test.
|
||||
const X1: f64 = 550.0;
|
||||
const Y1: f64 = 950.0;
|
||||
let step = 1.0 / ((N + 1) as f64);
|
||||
for i in 0..N {
|
||||
let t = ((i + 1) as f64) * step;
|
||||
|
|
Loading…
Reference in a new issue