mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 20:51:29 +11:00
Merge branch 'master' into android2
This commit is contained in:
commit
6602d58054
|
@ -35,16 +35,13 @@ shared Alloc sh_row_alloc[BACKDROP_WG];
|
||||||
shared uint sh_row_width[BACKDROP_WG];
|
shared uint sh_row_width[BACKDROP_WG];
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
if (mem_error != NO_ERROR) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint th_ix = gl_LocalInvocationID.x;
|
uint th_ix = gl_LocalInvocationID.x;
|
||||||
uint element_ix = gl_GlobalInvocationID.x;
|
uint element_ix = gl_GlobalInvocationID.x;
|
||||||
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
||||||
|
|
||||||
// Work assignment: 1 thread : 1 path element
|
// Work assignment: 1 thread : 1 path element
|
||||||
uint row_count = 0;
|
uint row_count = 0;
|
||||||
|
bool mem_ok = mem_error == NO_ERROR;
|
||||||
if (element_ix < conf.n_elements) {
|
if (element_ix < conf.n_elements) {
|
||||||
AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
|
AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
|
||||||
switch (tag.tag) {
|
switch (tag.tag) {
|
||||||
|
@ -67,7 +64,7 @@ void main() {
|
||||||
// long as it doesn't cross the left edge.
|
// long as it doesn't cross the left edge.
|
||||||
row_count = 0;
|
row_count = 0;
|
||||||
}
|
}
|
||||||
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size);
|
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
|
||||||
sh_row_alloc[th_ix] = path_alloc;
|
sh_row_alloc[th_ix] = path_alloc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -95,7 +92,7 @@ void main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
uint width = sh_row_width[el_ix];
|
uint width = sh_row_width[el_ix];
|
||||||
if (width > 0) {
|
if (width > 0 && mem_ok) {
|
||||||
// Process one row sequentially
|
// Process one row sequentially
|
||||||
// Read backdrop value per tile and prefix sum it
|
// Read backdrop value per tile and prefix sum it
|
||||||
Alloc tiles_alloc = sh_row_alloc[el_ix];
|
Alloc tiles_alloc = sh_row_alloc[el_ix];
|
||||||
|
|
Binary file not shown.
|
@ -36,10 +36,6 @@ shared Alloc sh_chunk_alloc[N_TILE];
|
||||||
shared bool sh_alloc_failed;
|
shared bool sh_alloc_failed;
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
if (mem_error != NO_ERROR) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint my_n_elements = conf.n_elements;
|
uint my_n_elements = conf.n_elements;
|
||||||
uint my_partition = gl_WorkGroupID.x;
|
uint my_partition = gl_WorkGroupID.x;
|
||||||
|
|
||||||
|
@ -105,7 +101,7 @@ void main() {
|
||||||
count[i][gl_LocalInvocationID.x] = element_count;
|
count[i][gl_LocalInvocationID.x] = element_count;
|
||||||
}
|
}
|
||||||
// element_count is number of elements covering bin for this invocation.
|
// element_count is number of elements covering bin for this invocation.
|
||||||
Alloc chunk_alloc = new_alloc(0, 0);
|
Alloc chunk_alloc = new_alloc(0, 0, true);
|
||||||
if (element_count != 0) {
|
if (element_count != 0) {
|
||||||
// TODO: aggregate atomic adds (subgroup is probably fastest)
|
// TODO: aggregate atomic adds (subgroup is probably fastest)
|
||||||
MallocResult chunk = malloc(element_count * BinInstance_size);
|
MallocResult chunk = malloc(element_count * BinInstance_size);
|
||||||
|
@ -122,7 +118,7 @@ void main() {
|
||||||
write_mem(conf.bin_alloc, out_ix + 1, chunk_alloc.offset);
|
write_mem(conf.bin_alloc, out_ix + 1, chunk_alloc.offset);
|
||||||
|
|
||||||
barrier();
|
barrier();
|
||||||
if (sh_alloc_failed) {
|
if (sh_alloc_failed || mem_error != NO_ERROR) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -56,7 +56,7 @@ void write_tile_alloc(uint el_ix, Alloc a) {
|
||||||
sh_tile_alloc[el_ix] = a;
|
sh_tile_alloc[el_ix] = a;
|
||||||
}
|
}
|
||||||
|
|
||||||
Alloc read_tile_alloc(uint el_ix) {
|
Alloc read_tile_alloc(uint el_ix, bool mem_ok) {
|
||||||
return sh_tile_alloc[el_ix];
|
return sh_tile_alloc[el_ix];
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
|
@ -64,9 +64,9 @@ void write_tile_alloc(uint el_ix, Alloc a) {
|
||||||
// No-op
|
// No-op
|
||||||
}
|
}
|
||||||
|
|
||||||
Alloc read_tile_alloc(uint el_ix) {
|
Alloc read_tile_alloc(uint el_ix, bool mem_ok) {
|
||||||
// All memory.
|
// All memory.
|
||||||
return new_alloc(0, memory.length()*4);
|
return new_alloc(0, memory.length()*4, mem_ok);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -109,10 +109,6 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float
|
||||||
}
|
}
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
if (mem_error != NO_ERROR) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Could use either linear or 2d layouts for both dispatch and
|
// Could use either linear or 2d layouts for both dispatch and
|
||||||
// invocations within the workgroup. We'll use variables to abstract.
|
// invocations within the workgroup. We'll use variables to abstract.
|
||||||
uint width_in_bins = (conf.width_in_tiles + N_TILE_X - 1)/N_TILE_X;
|
uint width_in_bins = (conf.width_in_tiles + N_TILE_X - 1)/N_TILE_X;
|
||||||
|
@ -158,6 +154,7 @@ void main() {
|
||||||
|
|
||||||
uint num_begin_slots = 0;
|
uint num_begin_slots = 0;
|
||||||
uint begin_slot = 0;
|
uint begin_slot = 0;
|
||||||
|
bool mem_ok = mem_error == NO_ERROR;
|
||||||
while (true) {
|
while (true) {
|
||||||
for (uint i = 0; i < N_SLICE; i++) {
|
for (uint i = 0; i < N_SLICE; i++) {
|
||||||
sh_bitmaps[i][th_ix] = 0;
|
sh_bitmaps[i][th_ix] = 0;
|
||||||
|
@ -172,7 +169,7 @@ void main() {
|
||||||
uint in_ix = (conf.bin_alloc.offset >> 2) + ((partition_ix + th_ix) * N_TILE + bin_ix) * 2;
|
uint in_ix = (conf.bin_alloc.offset >> 2) + ((partition_ix + th_ix) * N_TILE + bin_ix) * 2;
|
||||||
count = read_mem(conf.bin_alloc, in_ix);
|
count = read_mem(conf.bin_alloc, in_ix);
|
||||||
uint offset = read_mem(conf.bin_alloc, in_ix + 1);
|
uint offset = read_mem(conf.bin_alloc, in_ix + 1);
|
||||||
sh_part_elements[th_ix] = new_alloc(offset, count*BinInstance_size);
|
sh_part_elements[th_ix] = new_alloc(offset, count*BinInstance_size, mem_ok);
|
||||||
}
|
}
|
||||||
// prefix sum of counts
|
// prefix sum of counts
|
||||||
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
||||||
|
@ -196,7 +193,7 @@ void main() {
|
||||||
}
|
}
|
||||||
// use binary search to find element to read
|
// use binary search to find element to read
|
||||||
uint ix = rd_ix + th_ix;
|
uint ix = rd_ix + th_ix;
|
||||||
if (ix >= wr_ix && ix < ready_ix) {
|
if (ix >= wr_ix && ix < ready_ix && mem_ok) {
|
||||||
uint part_ix = 0;
|
uint part_ix = 0;
|
||||||
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
||||||
uint probe = part_ix + ((N_PART_READ / 2) >> i);
|
uint probe = part_ix + ((N_PART_READ / 2) >> i);
|
||||||
|
@ -253,7 +250,7 @@ void main() {
|
||||||
// base relative to bin
|
// base relative to bin
|
||||||
uint base = path.tiles.offset - uint(dy * stride + dx) * Tile_size;
|
uint base = path.tiles.offset - uint(dy * stride + dx) * Tile_size;
|
||||||
sh_tile_base[th_ix] = base;
|
sh_tile_base[th_ix] = base;
|
||||||
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size);
|
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
|
||||||
write_tile_alloc(th_ix, path_alloc);
|
write_tile_alloc(th_ix, path_alloc);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -288,11 +285,11 @@ void main() {
|
||||||
uint width = sh_tile_width[el_ix];
|
uint width = sh_tile_width[el_ix];
|
||||||
uint x = sh_tile_x0[el_ix] + seq_ix % width;
|
uint x = sh_tile_x0[el_ix] + seq_ix % width;
|
||||||
uint y = sh_tile_y0[el_ix] + seq_ix / width;
|
uint y = sh_tile_y0[el_ix] + seq_ix / width;
|
||||||
bool include_tile;
|
bool include_tile = false;
|
||||||
if (tag == Annotated_BeginClip || tag == Annotated_EndClip) {
|
if (tag == Annotated_BeginClip || tag == Annotated_EndClip) {
|
||||||
include_tile = true;
|
include_tile = true;
|
||||||
} else {
|
} else if (mem_ok) {
|
||||||
Tile tile = Tile_read(read_tile_alloc(el_ix), TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
|
Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok), TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
|
||||||
// Include the path in the tile if
|
// Include the path in the tile if
|
||||||
// - the tile contains at least a segment (tile offset non-zero)
|
// - the tile contains at least a segment (tile offset non-zero)
|
||||||
// - the tile is completely covered (backdrop non-zero)
|
// - the tile is completely covered (backdrop non-zero)
|
||||||
|
@ -311,7 +308,7 @@ void main() {
|
||||||
// through the non-segment elements.
|
// through the non-segment elements.
|
||||||
uint slice_ix = 0;
|
uint slice_ix = 0;
|
||||||
uint bitmap = sh_bitmaps[0][th_ix];
|
uint bitmap = sh_bitmaps[0][th_ix];
|
||||||
while (true) {
|
while (mem_ok) {
|
||||||
if (bitmap == 0) {
|
if (bitmap == 0) {
|
||||||
slice_ix++;
|
slice_ix++;
|
||||||
if (slice_ix == N_SLICE) {
|
if (slice_ix == N_SLICE) {
|
||||||
|
@ -337,7 +334,7 @@ void main() {
|
||||||
if (clip_zero_depth == 0) {
|
if (clip_zero_depth == 0) {
|
||||||
switch (tag.tag) {
|
switch (tag.tag) {
|
||||||
case Annotated_Color:
|
case Annotated_Color:
|
||||||
Tile tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
|
Tile tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
|
||||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||||
AnnoColor fill = Annotated_Color_read(conf.anno_alloc, ref);
|
AnnoColor fill = Annotated_Color_read(conf.anno_alloc, ref);
|
||||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||||
|
@ -348,7 +345,7 @@ void main() {
|
||||||
cmd_ref.offset += 4 + CmdColor_size;
|
cmd_ref.offset += 4 + CmdColor_size;
|
||||||
break;
|
break;
|
||||||
case Annotated_Image:
|
case Annotated_Image:
|
||||||
tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
|
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
|
||||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||||
AnnoImage fill_img = Annotated_Image_read(conf.anno_alloc, ref);
|
AnnoImage fill_img = Annotated_Image_read(conf.anno_alloc, ref);
|
||||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||||
|
@ -359,7 +356,7 @@ void main() {
|
||||||
cmd_ref.offset += 4 + CmdImage_size;
|
cmd_ref.offset += 4 + CmdImage_size;
|
||||||
break;
|
break;
|
||||||
case Annotated_BeginClip:
|
case Annotated_BeginClip:
|
||||||
tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
|
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
|
||||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||||
if (tile.tile.offset == 0 && tile.backdrop == 0) {
|
if (tile.tile.offset == 0 && tile.backdrop == 0) {
|
||||||
clip_zero_depth = clip_depth + 1;
|
clip_zero_depth = clip_depth + 1;
|
||||||
|
|
Binary file not shown.
|
@ -176,10 +176,6 @@ shared uint sh_part_ix;
|
||||||
shared State sh_prefix;
|
shared State sh_prefix;
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
if (mem_error != NO_ERROR) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
State th_state[N_ROWS];
|
State th_state[N_ROWS];
|
||||||
// Determine partition to process by atomic counter (described in Section
|
// Determine partition to process by atomic counter (described in Section
|
||||||
// 4.4 of prefix sum paper).
|
// 4.4 of prefix sum paper).
|
||||||
|
@ -392,7 +388,6 @@ void main() {
|
||||||
vec2 lw = get_linewidth(st);
|
vec2 lw = get_linewidth(st);
|
||||||
anno_begin_clip.linewidth = st.linewidth * sqrt(abs(st.mat.x * st.mat.w - st.mat.y * st.mat.z));
|
anno_begin_clip.linewidth = st.linewidth * sqrt(abs(st.mat.x * st.mat.w - st.mat.y * st.mat.z));
|
||||||
} else {
|
} else {
|
||||||
anno_begin_clip.bbox = begin_clip.bbox;
|
|
||||||
anno_fill.linewidth = 0.0;
|
anno_fill.linewidth = 0.0;
|
||||||
}
|
}
|
||||||
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
|
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
|
||||||
|
|
Binary file not shown.
|
@ -22,45 +22,45 @@
|
||||||
#define CHUNK_DY (TILE_HEIGHT_PX / CHUNK_Y)
|
#define CHUNK_DY (TILE_HEIGHT_PX / CHUNK_Y)
|
||||||
layout(local_size_x = CHUNK_DX, local_size_y = CHUNK_DY) in;
|
layout(local_size_x = CHUNK_DX, local_size_y = CHUNK_DY) in;
|
||||||
|
|
||||||
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
layout(set = 0, binding = 1) restrict readonly buffer ConfigBuf {
|
||||||
Config conf;
|
Config conf;
|
||||||
};
|
};
|
||||||
|
|
||||||
layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image;
|
layout(rgba8, set = 0, binding = 2) uniform restrict writeonly image2D image;
|
||||||
|
|
||||||
#ifdef ENABLE_IMAGE_INDICES
|
#ifdef ENABLE_IMAGE_INDICES
|
||||||
layout(rgba8, set = 0, binding = 3) uniform readonly image2D images[];
|
layout(rgba8, set = 0, binding = 3) uniform restrict readonly image2D images[];
|
||||||
#else
|
#else
|
||||||
layout(rgba8, set = 0, binding = 3) uniform readonly image2D images[1];
|
layout(rgba8, set = 0, binding = 3) uniform restrict readonly image2D images[1];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include "ptcl.h"
|
#include "ptcl.h"
|
||||||
#include "tile.h"
|
#include "tile.h"
|
||||||
|
|
||||||
vec3 tosRGB(vec3 rgb) {
|
mediump vec3 tosRGB(mediump vec3 rgb) {
|
||||||
bvec3 cutoff = greaterThanEqual(rgb, vec3(0.0031308));
|
bvec3 cutoff = greaterThanEqual(rgb, vec3(0.0031308));
|
||||||
vec3 below = vec3(12.92)*rgb;
|
mediump vec3 below = vec3(12.92)*rgb;
|
||||||
vec3 above = vec3(1.055)*pow(rgb, vec3(0.41666)) - vec3(0.055);
|
mediump vec3 above = vec3(1.055)*pow(rgb, vec3(0.41666)) - vec3(0.055);
|
||||||
return mix(below, above, cutoff);
|
return mix(below, above, cutoff);
|
||||||
}
|
}
|
||||||
|
|
||||||
vec3 fromsRGB(vec3 srgb) {
|
mediump vec3 fromsRGB(mediump vec3 srgb) {
|
||||||
// Formula from EXT_sRGB.
|
// Formula from EXT_sRGB.
|
||||||
bvec3 cutoff = greaterThanEqual(srgb, vec3(0.04045));
|
bvec3 cutoff = greaterThanEqual(srgb, vec3(0.04045));
|
||||||
vec3 below = srgb/vec3(12.92);
|
mediump vec3 below = srgb/vec3(12.92);
|
||||||
vec3 above = pow((srgb + vec3(0.055))/vec3(1.055), vec3(2.4));
|
mediump vec3 above = pow((srgb + vec3(0.055))/vec3(1.055), vec3(2.4));
|
||||||
return mix(below, above, cutoff);
|
return mix(below, above, cutoff);
|
||||||
}
|
}
|
||||||
|
|
||||||
// unpacksRGB unpacks a color in the sRGB color space to a vec4 in the linear color
|
// unpacksRGB unpacks a color in the sRGB color space to a vec4 in the linear color
|
||||||
// space.
|
// space.
|
||||||
vec4 unpacksRGB(uint srgba) {
|
mediump vec4 unpacksRGB(uint srgba) {
|
||||||
vec4 color = unpackUnorm4x8(srgba).wzyx;
|
mediump vec4 color = unpackUnorm4x8(srgba).wzyx;
|
||||||
return vec4(fromsRGB(color.rgb), color.a);
|
return vec4(fromsRGB(color.rgb), color.a);
|
||||||
}
|
}
|
||||||
|
|
||||||
// packsRGB packs a color in the linear color space into its 8-bit sRGB equivalent.
|
// packsRGB packs a color in the linear color space into its 8-bit sRGB equivalent.
|
||||||
uint packsRGB(vec4 rgba) {
|
uint packsRGB(mediump vec4 rgba) {
|
||||||
rgba = vec4(tosRGB(rgba.rgb), rgba.a);
|
rgba = vec4(tosRGB(rgba.rgb), rgba.a);
|
||||||
return packUnorm4x8(rgba.wzyx);
|
return packUnorm4x8(rgba.wzyx);
|
||||||
}
|
}
|
||||||
|
@ -69,14 +69,15 @@ uvec2 chunk_offset(uint i) {
|
||||||
return uvec2(i % CHUNK_X * CHUNK_DX, i / CHUNK_X * CHUNK_DY);
|
return uvec2(i % CHUNK_X * CHUNK_DX, i / CHUNK_X * CHUNK_DY);
|
||||||
}
|
}
|
||||||
|
|
||||||
vec4[CHUNK] fillImage(uvec2 xy, CmdImage cmd_img) {
|
mediump vec4[CHUNK] fillImage(uvec2 xy, CmdImage cmd_img) {
|
||||||
vec4 rgba[CHUNK];
|
mediump vec4 rgba[CHUNK];
|
||||||
for (uint i = 0; i < CHUNK; i++) {
|
for (uint i = 0; i < CHUNK; i++) {
|
||||||
ivec2 uv = ivec2(xy + chunk_offset(i)) + cmd_img.offset;
|
ivec2 uv = ivec2(xy + chunk_offset(i)) + cmd_img.offset;
|
||||||
|
mediump vec4 fg_rgba;
|
||||||
#ifdef ENABLE_IMAGE_INDICES
|
#ifdef ENABLE_IMAGE_INDICES
|
||||||
vec4 fg_rgba = imageLoad(images[cmd_img.index], uv);
|
fg_rgba = imageLoad(images[cmd_img.index], uv);
|
||||||
#else
|
#else
|
||||||
vec4 fg_rgba = imageLoad(images[0], uv);
|
fg_rgba = imageLoad(images[0], uv);
|
||||||
#endif
|
#endif
|
||||||
fg_rgba.rgb = fromsRGB(fg_rgba.rgb);
|
fg_rgba.rgb = fromsRGB(fg_rgba.rgb);
|
||||||
rgba[i] = fg_rgba;
|
rgba[i] = fg_rgba;
|
||||||
|
@ -85,10 +86,6 @@ vec4[CHUNK] fillImage(uvec2 xy, CmdImage cmd_img) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
if (mem_error != NO_ERROR) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint tile_ix = gl_WorkGroupID.y * conf.width_in_tiles + gl_WorkGroupID.x;
|
uint tile_ix = gl_WorkGroupID.y * conf.width_in_tiles + gl_WorkGroupID.x;
|
||||||
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
|
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
|
||||||
CmdRef cmd_ref = CmdRef(cmd_alloc.offset);
|
CmdRef cmd_ref = CmdRef(cmd_alloc.offset);
|
||||||
|
@ -99,7 +96,7 @@ void main() {
|
||||||
|
|
||||||
uvec2 xy_uint = uvec2(gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_WorkGroupID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
|
uvec2 xy_uint = uvec2(gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_WorkGroupID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
|
||||||
vec2 xy = vec2(xy_uint);
|
vec2 xy = vec2(xy_uint);
|
||||||
vec4 rgba[CHUNK];
|
mediump vec4 rgba[CHUNK];
|
||||||
for (uint i = 0; i < CHUNK; i++) {
|
for (uint i = 0; i < CHUNK; i++) {
|
||||||
rgba[i] = vec4(0.0);
|
rgba[i] = vec4(0.0);
|
||||||
// TODO: remove this debug image support when the actual image method is plumbed.
|
// TODO: remove this debug image support when the actual image method is plumbed.
|
||||||
|
@ -116,9 +113,10 @@ void main() {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
float area[CHUNK];
|
mediump float area[CHUNK];
|
||||||
uint clip_depth = 0;
|
uint clip_depth = 0;
|
||||||
while (true) {
|
bool mem_ok = mem_error == NO_ERROR;
|
||||||
|
while (mem_ok) {
|
||||||
uint tag = Cmd_tag(cmd_alloc, cmd_ref).tag;
|
uint tag = Cmd_tag(cmd_alloc, cmd_ref).tag;
|
||||||
if (tag == Cmd_End) {
|
if (tag == Cmd_End) {
|
||||||
break;
|
break;
|
||||||
|
@ -127,11 +125,11 @@ void main() {
|
||||||
case Cmd_Stroke:
|
case Cmd_Stroke:
|
||||||
// Calculate distance field from all the line segments in this tile.
|
// Calculate distance field from all the line segments in this tile.
|
||||||
CmdStroke stroke = Cmd_Stroke_read(cmd_alloc, cmd_ref);
|
CmdStroke stroke = Cmd_Stroke_read(cmd_alloc, cmd_ref);
|
||||||
float df[CHUNK];
|
mediump float df[CHUNK];
|
||||||
for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
|
for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
|
||||||
TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
|
TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
|
||||||
do {
|
do {
|
||||||
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size), tile_seg_ref);
|
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref);
|
||||||
vec2 line_vec = seg.vector;
|
vec2 line_vec = seg.vector;
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin;
|
vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin;
|
||||||
|
@ -152,7 +150,7 @@ void main() {
|
||||||
tile_seg_ref = TileSegRef(fill.tile_ref);
|
tile_seg_ref = TileSegRef(fill.tile_ref);
|
||||||
// Calculate coverage based on backdrop + coverage of each line segment
|
// Calculate coverage based on backdrop + coverage of each line segment
|
||||||
do {
|
do {
|
||||||
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size), tile_seg_ref);
|
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref);
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
vec2 my_xy = xy + vec2(chunk_offset(k));
|
vec2 my_xy = xy + vec2(chunk_offset(k));
|
||||||
vec2 start = seg.origin - my_xy;
|
vec2 start = seg.origin - my_xy;
|
||||||
|
@ -193,18 +191,18 @@ void main() {
|
||||||
break;
|
break;
|
||||||
case Cmd_Color:
|
case Cmd_Color:
|
||||||
CmdColor color = Cmd_Color_read(cmd_alloc, cmd_ref);
|
CmdColor color = Cmd_Color_read(cmd_alloc, cmd_ref);
|
||||||
vec4 fg = unpacksRGB(color.rgba_color);
|
mediump vec4 fg = unpacksRGB(color.rgba_color);
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
vec4 fg_k = fg * area[k];
|
mediump vec4 fg_k = fg * area[k];
|
||||||
rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
|
rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
|
||||||
}
|
}
|
||||||
cmd_ref.offset += 4 + CmdColor_size;
|
cmd_ref.offset += 4 + CmdColor_size;
|
||||||
break;
|
break;
|
||||||
case Cmd_Image:
|
case Cmd_Image:
|
||||||
CmdImage fill_img = Cmd_Image_read(cmd_alloc, cmd_ref);
|
CmdImage fill_img = Cmd_Image_read(cmd_alloc, cmd_ref);
|
||||||
vec4 img[CHUNK] = fillImage(xy_uint, fill_img);
|
mediump vec4 img[CHUNK] = fillImage(xy_uint, fill_img);
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
vec4 fg_k = img[k] * area[k];
|
mediump vec4 fg_k = img[k] * area[k];
|
||||||
rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
|
rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
|
||||||
}
|
}
|
||||||
cmd_ref.offset += 4 + CmdImage_size;
|
cmd_ref.offset += 4 + CmdImage_size;
|
||||||
|
@ -215,7 +213,7 @@ void main() {
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
uvec2 offset = chunk_offset(k);
|
uvec2 offset = chunk_offset(k);
|
||||||
uint srgb = packsRGB(vec4(rgba[k]));
|
uint srgb = packsRGB(vec4(rgba[k]));
|
||||||
float alpha = clamp(abs(area[k]), 0.0, 1.0);
|
mediump float alpha = clamp(abs(area[k]), 0.0, 1.0);
|
||||||
write_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), srgb);
|
write_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), srgb);
|
||||||
write_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), floatBitsToUint(alpha));
|
write_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), floatBitsToUint(alpha));
|
||||||
rgba[k] = vec4(0.0);
|
rgba[k] = vec4(0.0);
|
||||||
|
@ -231,8 +229,8 @@ void main() {
|
||||||
uvec2 offset = chunk_offset(k);
|
uvec2 offset = chunk_offset(k);
|
||||||
uint srgb = read_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX));
|
uint srgb = read_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX));
|
||||||
uint alpha = read_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX));
|
uint alpha = read_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX));
|
||||||
vec4 bg = unpacksRGB(srgb);
|
mediump vec4 bg = unpacksRGB(srgb);
|
||||||
vec4 fg = rgba[k] * area[k] * uintBitsToFloat(alpha);
|
mediump vec4 fg = rgba[k] * area[k] * uintBitsToFloat(alpha);
|
||||||
rgba[k] = bg * (1.0 - fg.a) + fg;
|
rgba[k] = bg * (1.0 - fg.a) + fg;
|
||||||
}
|
}
|
||||||
cmd_ref.offset += 4;
|
cmd_ref.offset += 4;
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -44,11 +44,15 @@ struct MallocResult {
|
||||||
};
|
};
|
||||||
|
|
||||||
// new_alloc synthesizes an Alloc from an offset and size.
|
// new_alloc synthesizes an Alloc from an offset and size.
|
||||||
Alloc new_alloc(uint offset, uint size) {
|
Alloc new_alloc(uint offset, uint size, bool mem_ok) {
|
||||||
Alloc a;
|
Alloc a;
|
||||||
a.offset = offset;
|
a.offset = offset;
|
||||||
#ifdef MEM_DEBUG
|
#ifdef MEM_DEBUG
|
||||||
|
if (mem_ok) {
|
||||||
a.size = size;
|
a.size = size;
|
||||||
|
} else {
|
||||||
|
a.size = 0;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
@ -56,11 +60,10 @@ Alloc new_alloc(uint offset, uint size) {
|
||||||
// malloc allocates size bytes of memory.
|
// malloc allocates size bytes of memory.
|
||||||
MallocResult malloc(uint size) {
|
MallocResult malloc(uint size) {
|
||||||
MallocResult r;
|
MallocResult r;
|
||||||
r.failed = false;
|
|
||||||
uint offset = atomicAdd(mem_offset, size);
|
uint offset = atomicAdd(mem_offset, size);
|
||||||
r.alloc = new_alloc(offset, size);
|
r.failed = offset + size > memory.length() * 4;
|
||||||
if (offset + size > memory.length() * 4) {
|
r.alloc = new_alloc(offset, size, !r.failed);
|
||||||
r.failed = true;
|
if (r.failed) {
|
||||||
atomicMax(mem_error, ERR_MALLOC_FAILED);
|
atomicMax(mem_error, ERR_MALLOC_FAILED);
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
@ -119,8 +122,10 @@ Alloc slice_mem(Alloc a, uint offset, uint size) {
|
||||||
// but never written.
|
// but never written.
|
||||||
return Alloc(0, 0);
|
return Alloc(0, 0);
|
||||||
}
|
}
|
||||||
|
return Alloc(a.offset + offset, size);
|
||||||
|
#else
|
||||||
|
return Alloc(a.offset + offset);
|
||||||
#endif
|
#endif
|
||||||
return new_alloc(a.offset + offset, size);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// alloc_write writes alloc to memory at offset bytes.
|
// alloc_write writes alloc to memory at offset bytes.
|
||||||
|
|
|
@ -87,10 +87,6 @@ SubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
if (mem_error != NO_ERROR) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint element_ix = gl_GlobalInvocationID.x;
|
uint element_ix = gl_GlobalInvocationID.x;
|
||||||
PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size);
|
PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size);
|
||||||
|
|
||||||
|
@ -98,6 +94,7 @@ void main() {
|
||||||
if (element_ix < conf.n_pathseg) {
|
if (element_ix < conf.n_pathseg) {
|
||||||
tag = PathSeg_tag(conf.pathseg_alloc, ref);
|
tag = PathSeg_tag(conf.pathseg_alloc, ref);
|
||||||
}
|
}
|
||||||
|
bool mem_ok = mem_error == NO_ERROR;
|
||||||
switch (tag.tag) {
|
switch (tag.tag) {
|
||||||
case PathSeg_Cubic:
|
case PathSeg_Cubic:
|
||||||
PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref);
|
PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref);
|
||||||
|
@ -135,7 +132,7 @@ void main() {
|
||||||
bool is_stroke = fill_mode_from_flags(tag.flags) == MODE_STROKE;
|
bool is_stroke = fill_mode_from_flags(tag.flags) == MODE_STROKE;
|
||||||
uint path_ix = cubic.path_ix;
|
uint path_ix = cubic.path_ix;
|
||||||
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
|
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
|
||||||
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size);
|
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
|
||||||
ivec4 bbox = ivec4(path.bbox);
|
ivec4 bbox = ivec4(path.bbox);
|
||||||
vec2 p0 = cubic.p0;
|
vec2 p0 = cubic.p0;
|
||||||
qp0 = cubic.p0;
|
qp0 = cubic.p0;
|
||||||
|
@ -195,7 +192,7 @@ void main() {
|
||||||
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
|
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
|
||||||
// Consider using subgroups to aggregate atomic add.
|
// Consider using subgroups to aggregate atomic add.
|
||||||
MallocResult tile_alloc = malloc(n_tile_alloc * TileSeg_size);
|
MallocResult tile_alloc = malloc(n_tile_alloc * TileSeg_size);
|
||||||
if (tile_alloc.failed) {
|
if (tile_alloc.failed || !mem_ok) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
uint tile_offset = tile_alloc.alloc.offset;
|
uint tile_offset = tile_alloc.alloc.offset;
|
||||||
|
|
Binary file not shown.
|
@ -28,10 +28,6 @@ shared uint sh_tile_count[TILE_ALLOC_WG];
|
||||||
shared MallocResult sh_tile_alloc;
|
shared MallocResult sh_tile_alloc;
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
if (mem_error != NO_ERROR) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint th_ix = gl_LocalInvocationID.x;
|
uint th_ix = gl_LocalInvocationID.x;
|
||||||
uint element_ix = gl_GlobalInvocationID.x;
|
uint element_ix = gl_GlobalInvocationID.x;
|
||||||
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
|
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
|
||||||
|
@ -86,7 +82,7 @@ void main() {
|
||||||
}
|
}
|
||||||
barrier();
|
barrier();
|
||||||
MallocResult alloc_start = sh_tile_alloc;
|
MallocResult alloc_start = sh_tile_alloc;
|
||||||
if (alloc_start.failed) {
|
if (alloc_start.failed || mem_error != NO_ERROR) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Binary file not shown.
Loading…
Reference in a new issue