Implement robust dynamic memory

This is the core logic for robust dynamic memory. There are changes to both shaders and the driver logic.

On the shader side, failure information is more useful and fine grained. In particular, it now reports which stage failed and how much memory would have been required to make that stage succeed.

On the driver side, there is a new RenderDriver abstraction which owns command buffers (and associated query pools) and runs the logic to retry and reallocate buffers when necessary. There's also a fairly significant rework of the logic to produce the config block, as that overlaps the robust memory.

The RenderDriver abstraction may not stay. It was done this way to minimize code disruption, but arguably it should just be combined with Renderer.

Another change: the GLSL length() method on a buffer requires additional infrastructure (at least on Metal, where it needs a binding of its own), so we now pass that in as a field in the config.

This also moves blend memory to its own buffer. This worked out well because coarse rasterization can simply report the size of the blend buffer and it can be reallocated without needing to rerun the pipeline. In the previous state, blend allocations and ptcl writes were interleaved in coarse rasterization, so a failure of the former would require rerunning coarse. This should fix #83 (finally!)

There are a few loose ends. The binaries haven't (yet) been updated (I've been testing using a hand-written test program). Gradients weren't touched so still have a fixed size allocation. And the logic to calculate the new buffer size on allocation failure could be smarter.

Closes #175
This commit is contained in:
Raph Levien 2022-06-23 08:48:26 -07:00
parent 64e6268059
commit 240f44a228
18 changed files with 865 additions and 514 deletions

View file

@ -45,12 +45,15 @@ shared Alloc sh_row_alloc[BACKDROP_WG];
shared uint sh_row_width[BACKDROP_WG]; shared uint sh_row_width[BACKDROP_WG];
void main() { void main() {
if (!check_deps(STAGE_BINNING | STAGE_TILE_ALLOC | STAGE_PATH_COARSE)) {
return;
}
uint th_ix = gl_LocalInvocationIndex; uint th_ix = gl_LocalInvocationIndex;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
// Work assignment: 1 thread : 1 path element // Work assignment: 1 thread : 1 path element
uint row_count = 0; uint row_count = 0;
bool mem_ok = mem_error == NO_ERROR;
if (gl_LocalInvocationID.y == 0) { if (gl_LocalInvocationID.y == 0) {
if (element_ix < conf.n_elements) { if (element_ix < conf.n_elements) {
// Possible TODO: it's not necessary to process backdrops of stroked paths. // Possible TODO: it's not necessary to process backdrops of stroked paths.
@ -68,7 +71,7 @@ void main() {
row_count = 0; row_count = 0;
} }
Alloc path_alloc = new_alloc( Alloc path_alloc = new_alloc(
path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok); path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, true);
sh_row_alloc[th_ix] = path_alloc; sh_row_alloc[th_ix] = path_alloc;
} }
sh_row_count[th_ix] = row_count; sh_row_count[th_ix] = row_count;
@ -98,7 +101,7 @@ void main() {
} }
} }
uint width = sh_row_width[el_ix]; uint width = sh_row_width[el_ix];
if (width > 0 && mem_ok) { if (width > 0) {
// Process one row sequentially // Process one row sequentially
// Read backdrop value per tile and prefix sum it // Read backdrop value per tile and prefix sum it
Alloc tiles_alloc = sh_row_alloc[el_ix]; Alloc tiles_alloc = sh_row_alloc[el_ix];

View file

@ -32,8 +32,7 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
// Bitmaps are sliced (256bit into 8 (N_SLICE) 32bit submaps) // Bitmaps are sliced (256bit into 8 (N_SLICE) 32bit submaps)
shared uint bitmaps[N_SLICE][N_TILE]; shared uint bitmaps[N_SLICE][N_TILE];
shared uint count[N_SLICE][N_TILE]; shared uint count[N_SLICE][N_TILE];
shared Alloc sh_chunk_alloc[N_TILE]; shared uint sh_chunk_offset[N_TILE];
shared bool sh_alloc_failed;
DrawMonoid load_draw_monoid(uint element_ix) { DrawMonoid load_draw_monoid(uint element_ix) {
uint base = (conf.drawmonoid_alloc.offset >> 2) + 4 * element_ix; uint base = (conf.drawmonoid_alloc.offset >> 2) + 4 * element_ix;
@ -84,10 +83,6 @@ void main() {
for (uint i = 0; i < N_SLICE; i++) { for (uint i = 0; i < N_SLICE; i++) {
bitmaps[i][gl_LocalInvocationID.x] = 0; bitmaps[i][gl_LocalInvocationID.x] = 0;
} }
if (gl_LocalInvocationID.x == 0) {
sh_alloc_failed = false;
}
barrier();
// Read inputs and determine coverage of bins // Read inputs and determine coverage of bins
uint element_ix = my_partition * N_TILE + gl_LocalInvocationID.x; uint element_ix = my_partition * N_TILE + gl_LocalInvocationID.x;
@ -148,26 +143,18 @@ void main() {
count[i][gl_LocalInvocationID.x] = element_count; count[i][gl_LocalInvocationID.x] = element_count;
} }
// element_count is number of elements covering bin for this invocation. // element_count is number of elements covering bin for this invocation.
Alloc chunk_alloc = new_alloc(0, 0, true); uint chunk_offset = 0;
if (element_count != 0) { if (element_count != 0) {
// TODO: aggregate atomic adds (subgroup is probably fastest) chunk_offset = malloc_stage(element_count * BinInstance_size, conf.mem_size, STAGE_BINNING);
MallocResult chunk = malloc(element_count * BinInstance_size); sh_chunk_offset[gl_LocalInvocationID.x] = chunk_offset;
chunk_alloc = chunk.alloc;
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
if (chunk.failed) {
sh_alloc_failed = true;
}
} }
// Note: it might be more efficient for reading to do this in the // Note: it might be more efficient for reading to do this in the
// other order (each bin is a contiguous sequence of partitions) // other order (each bin is a contiguous sequence of partitions)
uint out_ix = (conf.bin_alloc.offset >> 2) + (my_partition * N_TILE + gl_LocalInvocationID.x) * 2; uint out_ix = (conf.bin_alloc.offset >> 2) + (my_partition * N_TILE + gl_LocalInvocationID.x) * 2;
write_mem(conf.bin_alloc, out_ix, element_count); write_mem(conf.bin_alloc, out_ix, element_count);
write_mem(conf.bin_alloc, out_ix + 1, chunk_alloc.offset); write_mem(conf.bin_alloc, out_ix + 1, chunk_offset);
barrier(); barrier();
if (sh_alloc_failed || mem_error != NO_ERROR) {
return;
}
// Use similar strategy as Laine & Karras paper; loop over bbox of bins // Use similar strategy as Laine & Karras paper; loop over bbox of bins
// touched by this element // touched by this element
@ -181,9 +168,10 @@ void main() {
if (my_slice > 0) { if (my_slice > 0) {
idx += count[my_slice - 1][bin_ix]; idx += count[my_slice - 1][bin_ix];
} }
Alloc out_alloc = sh_chunk_alloc[bin_ix]; uint chunk_offset = sh_chunk_offset[bin_ix];
uint out_offset = out_alloc.offset + idx * BinInstance_size; if (chunk_offset != MALLOC_FAILED) {
BinInstance_write(out_alloc, BinInstanceRef(out_offset), BinInstance(element_ix)); memory[(chunk_offset >> 2) + idx] = element_ix;
}
} }
x++; x++;
if (x == x1) { if (x == x1) {

View file

@ -72,49 +72,62 @@ void write_tile_alloc(uint el_ix, Alloc a) {
Alloc read_tile_alloc(uint el_ix, bool mem_ok) { Alloc read_tile_alloc(uint el_ix, bool mem_ok) {
// All memory. // All memory.
return new_alloc(0, memory.length() * 4, mem_ok); return new_alloc(0, conf.mem_size, mem_ok);
} }
#endif #endif
// The maximum number of commands per annotated element. // The maximum number of commands per annotated element.
#define ANNO_COMMANDS 2 #define ANNO_COMMANDS 2
// Perhaps cmd_alloc should be a global? This is a style question. // All writes to the output must be gated by mem_ok.
bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit) { bool mem_ok = true;
// Perhaps cmd allocations should be a global? This is a style question.
void alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit) {
if (cmd_ref.offset < cmd_limit) { if (cmd_ref.offset < cmd_limit) {
return true; return;
} }
MallocResult new_cmd = malloc(PTCL_INITIAL_ALLOC); uint new_cmd = malloc_stage(PTCL_INITIAL_ALLOC, conf.mem_size, STAGE_COARSE);
if (new_cmd.failed) { if (new_cmd == MALLOC_FAILED) {
return false; mem_ok = false;
} }
CmdJump jump = CmdJump(new_cmd.alloc.offset); if (mem_ok) {
CmdJump jump = CmdJump(new_cmd);
Cmd_Jump_write(cmd_alloc, cmd_ref, jump); Cmd_Jump_write(cmd_alloc, cmd_ref, jump);
cmd_alloc = new_cmd.alloc; }
cmd_ref = CmdRef(cmd_alloc.offset); cmd_alloc = new_alloc(new_cmd, PTCL_INITIAL_ALLOC, true);
cmd_ref = CmdRef(new_cmd);
// Reserve space for the maximum number of commands and a potential jump. // Reserve space for the maximum number of commands and a potential jump.
cmd_limit = cmd_alloc.offset + PTCL_INITIAL_ALLOC - (ANNO_COMMANDS + 1) * Cmd_size; cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - (ANNO_COMMANDS + 1) * Cmd_size;
return true;
} }
void write_fill(Alloc alloc, inout CmdRef cmd_ref, Tile tile, float linewidth) { void write_fill(Alloc alloc, inout CmdRef cmd_ref, Tile tile, float linewidth) {
if (linewidth < 0.0) { if (linewidth < 0.0) {
if (tile.tile.offset != 0) { if (tile.tile.offset != 0) {
CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop); CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);
if (mem_ok) {
Cmd_Fill_write(alloc, cmd_ref, cmd_fill); Cmd_Fill_write(alloc, cmd_ref, cmd_fill);
}
cmd_ref.offset += 4 + CmdFill_size; cmd_ref.offset += 4 + CmdFill_size;
} else { } else {
if (mem_ok) {
Cmd_Solid_write(alloc, cmd_ref); Cmd_Solid_write(alloc, cmd_ref);
}
cmd_ref.offset += 4; cmd_ref.offset += 4;
} }
} else { } else {
CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * linewidth); CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * linewidth);
if (mem_ok) {
Cmd_Stroke_write(alloc, cmd_ref, cmd_stroke); Cmd_Stroke_write(alloc, cmd_ref, cmd_stroke);
}
cmd_ref.offset += 4 + CmdStroke_size; cmd_ref.offset += 4 + CmdStroke_size;
} }
} }
void main() { void main() {
if (!check_deps(STAGE_BINNING | STAGE_TILE_ALLOC | STAGE_PATH_COARSE)) {
return;
}
// Could use either linear or 2d layouts for both dispatch and // Could use either linear or 2d layouts for both dispatch and
// invocations within the workgroup. We'll use variables to abstract. // invocations within the workgroup. We'll use variables to abstract.
uint width_in_bins = (conf.width_in_tiles + N_TILE_X - 1) / N_TILE_X; uint width_in_bins = (conf.width_in_tiles + N_TILE_X - 1) / N_TILE_X;
@ -161,7 +174,6 @@ void main() {
uint drawtag_start = conf.drawtag_offset >> 2; uint drawtag_start = conf.drawtag_offset >> 2;
uint drawdata_start = conf.drawdata_offset >> 2; uint drawdata_start = conf.drawdata_offset >> 2;
uint drawinfo_start = conf.drawinfo_alloc.offset >> 2; uint drawinfo_start = conf.drawinfo_alloc.offset >> 2;
bool mem_ok = mem_error == NO_ERROR;
while (true) { while (true) {
for (uint i = 0; i < N_SLICE; i++) { for (uint i = 0; i < N_SLICE; i++) {
sh_bitmaps[i][th_ix] = 0; sh_bitmaps[i][th_ix] = 0;
@ -176,7 +188,7 @@ void main() {
uint in_ix = (conf.bin_alloc.offset >> 2) + ((partition_ix + th_ix) * N_TILE + bin_ix) * 2; uint in_ix = (conf.bin_alloc.offset >> 2) + ((partition_ix + th_ix) * N_TILE + bin_ix) * 2;
count = read_mem(conf.bin_alloc, in_ix); count = read_mem(conf.bin_alloc, in_ix);
uint offset = read_mem(conf.bin_alloc, in_ix + 1); uint offset = read_mem(conf.bin_alloc, in_ix + 1);
sh_part_elements[th_ix] = new_alloc(offset, count * BinInstance_size, mem_ok); sh_part_elements[th_ix] = new_alloc(offset, count * BinInstance_size, true);
} }
// prefix sum of counts // prefix sum of counts
for (uint i = 0; i < LG_N_PART_READ; i++) { for (uint i = 0; i < LG_N_PART_READ; i++) {
@ -200,7 +212,7 @@ void main() {
} }
// use binary search to find element to read // use binary search to find element to read
uint ix = rd_ix + th_ix; uint ix = rd_ix + th_ix;
if (ix >= wr_ix && ix < ready_ix && mem_ok) { if (ix >= wr_ix && ix < ready_ix) {
uint part_ix = 0; uint part_ix = 0;
for (uint i = 0; i < LG_N_PART_READ; i++) { for (uint i = 0; i < LG_N_PART_READ; i++) {
uint probe = part_ix + (uint(N_PART_READ / 2) >> i); uint probe = part_ix + (uint(N_PART_READ / 2) >> i);
@ -257,7 +269,7 @@ void main() {
uint base = path.tiles.offset - uint(dy * stride + dx) * Tile_size; uint base = path.tiles.offset - uint(dy * stride + dx) * Tile_size;
sh_tile_base[th_ix] = base; sh_tile_base[th_ix] = base;
Alloc path_alloc = new_alloc(path.tiles.offset, Alloc path_alloc = new_alloc(path.tiles.offset,
(path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok); (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, true);
write_tile_alloc(th_ix, path_alloc); write_tile_alloc(th_ix, path_alloc);
break; break;
default: default:
@ -293,8 +305,7 @@ void main() {
uint x = sh_tile_x0[el_ix] + seq_ix % width; uint x = sh_tile_x0[el_ix] + seq_ix % width;
uint y = sh_tile_y0[el_ix] + seq_ix / width; uint y = sh_tile_y0[el_ix] + seq_ix / width;
bool include_tile = false; bool include_tile = false;
if (mem_ok) { Tile tile = Tile_read(read_tile_alloc(el_ix, true),
Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok),
TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size)); TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
bool is_clip = (tag & 1) != 0; bool is_clip = (tag & 1) != 0;
// Always include the tile if it contains a path segment. // Always include the tile if it contains a path segment.
@ -313,7 +324,6 @@ void main() {
} }
include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip
|| is_blend; || is_blend;
}
if (include_tile) { if (include_tile) {
uint el_slice = el_ix / 32; uint el_slice = el_ix / 32;
uint el_mask = 1u << (el_ix & 31); uint el_mask = 1u << (el_ix & 31);
@ -327,7 +337,7 @@ void main() {
// through the draw objects. // through the draw objects.
uint slice_ix = 0; uint slice_ix = 0;
uint bitmap = sh_bitmaps[0][th_ix]; uint bitmap = sh_bitmaps[0][th_ix];
while (mem_ok) { while (true) {
if (bitmap == 0) { if (bitmap == 0) {
slice_ix++; slice_ix++;
if (slice_ix == N_SLICE) { if (slice_ix == N_SLICE) {
@ -347,7 +357,7 @@ void main() {
uint drawtag = scene[drawtag_start + element_ix]; uint drawtag = scene[drawtag_start + element_ix];
if (clip_zero_depth == 0) { if (clip_zero_depth == 0) {
Tile tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), Tile tile = Tile_read(read_tile_alloc(element_ref_ix, true),
TileRef(sh_tile_base[element_ref_ix] + TileRef(sh_tile_base[element_ref_ix] +
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
uint drawmonoid_base = drawmonoid_start + 4 * element_ix; uint drawmonoid_base = drawmonoid_start + 4 * element_ix;
@ -358,18 +368,16 @@ void main() {
switch (drawtag) { switch (drawtag) {
case Drawtag_FillColor: case Drawtag_FillColor:
float linewidth = uintBitsToFloat(memory[di]); float linewidth = uintBitsToFloat(memory[di]);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { alloc_cmd(cmd_alloc, cmd_ref, cmd_limit);
break;
}
write_fill(cmd_alloc, cmd_ref, tile, linewidth); write_fill(cmd_alloc, cmd_ref, tile, linewidth);
uint rgba = scene[dd]; uint rgba = scene[dd];
if (mem_ok) {
Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(rgba)); Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(rgba));
}
cmd_ref.offset += 4 + CmdColor_size; cmd_ref.offset += 4 + CmdColor_size;
break; break;
case Drawtag_FillLinGradient: case Drawtag_FillLinGradient:
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { alloc_cmd(cmd_alloc, cmd_ref, cmd_limit);
break;
}
linewidth = uintBitsToFloat(memory[di]); linewidth = uintBitsToFloat(memory[di]);
write_fill(cmd_alloc, cmd_ref, tile, linewidth); write_fill(cmd_alloc, cmd_ref, tile, linewidth);
CmdLinGrad cmd_lin; CmdLinGrad cmd_lin;
@ -377,13 +385,13 @@ void main() {
cmd_lin.line_x = uintBitsToFloat(memory[di + 1]); cmd_lin.line_x = uintBitsToFloat(memory[di + 1]);
cmd_lin.line_y = uintBitsToFloat(memory[di + 2]); cmd_lin.line_y = uintBitsToFloat(memory[di + 2]);
cmd_lin.line_c = uintBitsToFloat(memory[di + 3]); cmd_lin.line_c = uintBitsToFloat(memory[di + 3]);
if (mem_ok) {
Cmd_LinGrad_write(cmd_alloc, cmd_ref, cmd_lin); Cmd_LinGrad_write(cmd_alloc, cmd_ref, cmd_lin);
}
cmd_ref.offset += 4 + CmdLinGrad_size; cmd_ref.offset += 4 + CmdLinGrad_size;
break; break;
case Drawtag_FillRadGradient: case Drawtag_FillRadGradient:
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { alloc_cmd(cmd_alloc, cmd_ref, cmd_limit);
break;
}
linewidth = uintBitsToFloat(memory[di]); linewidth = uintBitsToFloat(memory[di]);
write_fill(cmd_alloc, cmd_ref, tile, linewidth); write_fill(cmd_alloc, cmd_ref, tile, linewidth);
CmdRadGrad cmd_rad; CmdRadGrad cmd_rad;
@ -396,29 +404,31 @@ void main() {
cmd_rad.c1 = uintBitsToFloat(uvec2(memory[di + 7], memory[di + 8])); cmd_rad.c1 = uintBitsToFloat(uvec2(memory[di + 7], memory[di + 8]));
cmd_rad.ra = uintBitsToFloat(memory[di + 9]); cmd_rad.ra = uintBitsToFloat(memory[di + 9]);
cmd_rad.roff = uintBitsToFloat(memory[di + 10]); cmd_rad.roff = uintBitsToFloat(memory[di + 10]);
if (mem_ok) {
Cmd_RadGrad_write(cmd_alloc, cmd_ref, cmd_rad); Cmd_RadGrad_write(cmd_alloc, cmd_ref, cmd_rad);
}
cmd_ref.offset += 4 + CmdRadGrad_size; cmd_ref.offset += 4 + CmdRadGrad_size;
break; break;
case Drawtag_FillImage: case Drawtag_FillImage:
alloc_cmd(cmd_alloc, cmd_ref, cmd_limit);
linewidth = uintBitsToFloat(memory[di]); linewidth = uintBitsToFloat(memory[di]);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
write_fill(cmd_alloc, cmd_ref, tile, linewidth); write_fill(cmd_alloc, cmd_ref, tile, linewidth);
uint index = scene[dd]; uint index = scene[dd];
uint raw1 = scene[dd + 1]; uint raw1 = scene[dd + 1];
ivec2 offset = ivec2(int(raw1 << 16) >> 16, int(raw1) >> 16); ivec2 offset = ivec2(int(raw1 << 16) >> 16, int(raw1) >> 16);
if (mem_ok) {
Cmd_Image_write(cmd_alloc, cmd_ref, CmdImage(index, offset)); Cmd_Image_write(cmd_alloc, cmd_ref, CmdImage(index, offset));
}
cmd_ref.offset += 4 + CmdImage_size; cmd_ref.offset += 4 + CmdImage_size;
break; break;
case Drawtag_BeginClip: case Drawtag_BeginClip:
if (tile.tile.offset == 0 && tile.backdrop == 0) { if (tile.tile.offset == 0 && tile.backdrop == 0) {
clip_zero_depth = clip_depth + 1; clip_zero_depth = clip_depth + 1;
} else { } else {
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { alloc_cmd(cmd_alloc, cmd_ref, cmd_limit);
break; if (mem_ok) {
}
Cmd_BeginClip_write(cmd_alloc, cmd_ref); Cmd_BeginClip_write(cmd_alloc, cmd_ref);
}
cmd_ref.offset += 4; cmd_ref.offset += 4;
render_blend_depth++; render_blend_depth++;
max_blend_depth = max(max_blend_depth, render_blend_depth); max_blend_depth = max(max_blend_depth, render_blend_depth);
@ -427,12 +437,11 @@ void main() {
break; break;
case Drawtag_EndClip: case Drawtag_EndClip:
clip_depth--; clip_depth--;
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
write_fill(cmd_alloc, cmd_ref, tile, -1.0); write_fill(cmd_alloc, cmd_ref, tile, -1.0);
uint blend = scene[dd]; uint blend = scene[dd];
if (mem_ok) {
Cmd_EndClip_write(cmd_alloc, cmd_ref, CmdEndClip(blend)); Cmd_EndClip_write(cmd_alloc, cmd_ref, CmdEndClip(blend));
}
cmd_ref.offset += 4 + CmdEndClip_size; cmd_ref.offset += 4 + CmdEndClip_size;
render_blend_depth--; render_blend_depth--;
break; break;
@ -459,11 +468,13 @@ void main() {
break; break;
} }
if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) { if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) {
if (mem_ok) {
Cmd_End_write(cmd_alloc, cmd_ref); Cmd_End_write(cmd_alloc, cmd_ref);
}
if (max_blend_depth > BLEND_STACK_SPLIT) { if (max_blend_depth > BLEND_STACK_SPLIT) {
uint scratch_size = max_blend_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE * 4; uint scratch_size = max_blend_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE * 4;
MallocResult scratch = malloc(scratch_size); uint scratch = atomicAdd(blend_offset, scratch_size);
alloc_write(scratch_alloc, scratch_alloc.offset, scratch.alloc); write_mem(scratch_alloc, scratch_alloc.offset >> 2, scratch);
} }
} }
} }

BIN
piet-gpu/shader/image.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 337 KiB

View file

@ -14,6 +14,7 @@
// higher quality antialiasing among other things). // higher quality antialiasing among other things).
#define DO_SRGB_CONVERSION 0 #define DO_SRGB_CONVERSION 0
// TODO: the binding of the main buffer can be readonly
#include "mem.h" #include "mem.h"
#include "setup.h" #include "setup.h"
@ -24,19 +25,23 @@
#define CHUNK_DY (TILE_HEIGHT_PX / CHUNK_Y) #define CHUNK_DY (TILE_HEIGHT_PX / CHUNK_Y)
layout(local_size_x = CHUNK_DX, local_size_y = CHUNK_DY) in; layout(local_size_x = CHUNK_DX, local_size_y = CHUNK_DY) in;
layout(set = 0, binding = 1) restrict readonly buffer ConfigBuf { layout(binding = 1) restrict readonly buffer ConfigBuf {
Config conf; Config conf;
}; };
layout(binding = 2) buffer BlendBuf {
uint blend_mem[];
};
#ifdef GRAY #ifdef GRAY
layout(r8, set = 0, binding = 2) uniform restrict writeonly image2D image; layout(r8, binding = 3) uniform restrict writeonly image2D image;
#else #else
layout(rgba8, set = 0, binding = 2) uniform restrict writeonly image2D image; layout(rgba8, binding = 3) uniform restrict writeonly image2D image;
#endif #endif
layout(rgba8, set = 0, binding = 3) uniform restrict readonly image2D image_atlas; layout(rgba8, binding = 4) uniform restrict readonly image2D image_atlas;
layout(rgba8, set = 0, binding = 4) uniform restrict readonly image2D gradients; layout(rgba8, binding = 5) uniform restrict readonly image2D gradients;
#include "ptcl.h" #include "ptcl.h"
#include "tile.h" #include "tile.h"
@ -114,8 +119,9 @@ void main() {
mediump float area[CHUNK]; mediump float area[CHUNK];
uint clip_depth = 0; uint clip_depth = 0;
bool mem_ok = mem_error == NO_ERROR; // Previously we would early-out if there was a memory failure, so we wouldn't try to read corrupt
while (mem_ok) { // tiles. But now we assume this is checked CPU-side before launching fine rasterization.
while (true) {
uint tag = Cmd_tag(cmd_alloc, cmd_ref).tag; uint tag = Cmd_tag(cmd_alloc, cmd_ref).tag;
if (tag == Cmd_End) { if (tag == Cmd_End) {
break; break;
@ -129,7 +135,7 @@ void main() {
df[k] = 1e9; df[k] = 1e9;
TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref); TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
do { do {
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref); TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, true), tile_seg_ref);
vec2 line_vec = seg.vector; vec2 line_vec = seg.vector;
for (uint k = 0; k < CHUNK; k++) { for (uint k = 0; k < CHUNK; k++) {
vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin; vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin;
@ -151,7 +157,7 @@ void main() {
tile_seg_ref = TileSegRef(fill.tile_ref); tile_seg_ref = TileSegRef(fill.tile_ref);
// Calculate coverage based on backdrop + coverage of each line segment // Calculate coverage based on backdrop + coverage of each line segment
do { do {
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref); TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, true), tile_seg_ref);
for (uint k = 0; k < CHUNK; k++) { for (uint k = 0; k < CHUNK; k++) {
vec2 my_xy = xy + vec2(chunk_offset(k)); vec2 my_xy = xy + vec2(chunk_offset(k));
vec2 start = seg.origin - my_xy; vec2 start = seg.origin - my_xy;
@ -248,7 +254,7 @@ void main() {
uint base_ix = (blend_offset >> 2) + (clip_depth - BLEND_STACK_SPLIT) * TILE_HEIGHT_PX * TILE_WIDTH_PX + uint base_ix = (blend_offset >> 2) + (clip_depth - BLEND_STACK_SPLIT) * TILE_HEIGHT_PX * TILE_WIDTH_PX +
CHUNK * (gl_LocalInvocationID.x + CHUNK_DX * gl_LocalInvocationID.y); CHUNK * (gl_LocalInvocationID.x + CHUNK_DX * gl_LocalInvocationID.y);
for (uint k = 0; k < CHUNK; k++) { for (uint k = 0; k < CHUNK; k++) {
memory[base_ix + k] = packsRGB(vec4(rgba[k])); blend_mem[base_ix + k] = packsRGB(vec4(rgba[k]));
rgba[k] = vec4(0.0); rgba[k] = vec4(0.0);
} }
} }
@ -268,7 +274,7 @@ void main() {
if (clip_depth < BLEND_STACK_SPLIT) { if (clip_depth < BLEND_STACK_SPLIT) {
bg_rgba = blend_stack[clip_depth][k]; bg_rgba = blend_stack[clip_depth][k];
} else { } else {
bg_rgba = memory[base_ix + k]; bg_rgba = blend_mem[base_ix + k];
} }
mediump vec4 bg = unpacksRGB(bg_rgba); mediump vec4 bg = unpacksRGB(bg_rgba);
mediump vec4 fg = rgba[k] * area[k]; mediump vec4 fg = rgba[k] * area[k];

View file

@ -3,27 +3,23 @@
layout(set = 0, binding = 0) buffer Memory { layout(set = 0, binding = 0) buffer Memory {
// offset into memory of the next allocation, initialized by the user. // offset into memory of the next allocation, initialized by the user.
uint mem_offset; uint mem_offset;
// mem_error tracks the status of memory accesses, initialized to NO_ERROR // mem_error is a bitmask of stages that have failed allocation.
// by the user. ERR_MALLOC_FAILED is reported for insufficient memory.
// If MEM_DEBUG is defined the following errors are reported:
// - ERR_OUT_OF_BOUNDS is reported for out of bounds writes.
// - ERR_UNALIGNED_ACCESS for memory access not aligned to 32-bit words.
uint mem_error; uint mem_error;
// offset into blend memory of allocations for blend stack.
uint blend_offset;
uint[] memory; uint[] memory;
}; };
// Uncomment this line to add the size field to Alloc and enable memory checks. // Uncomment this line to add the size field to Alloc and enable memory checks.
// Note that the Config struct in setup.h grows size fields as well. // Note that the Config struct in setup.h grows size fields as well.
//#define MEM_DEBUG
#define NO_ERROR 0 // This setting is not working and the mechanism will be removed.
#define ERR_MALLOC_FAILED 1 //#define MEM_DEBUG
#define ERR_OUT_OF_BOUNDS 2
#define ERR_UNALIGNED_ACCESS 3
#ifdef MEM_DEBUG #ifdef MEM_DEBUG
#define Alloc_size 16 #define Alloc_size 16
#else #else
// TODO: this seems wrong
#define Alloc_size 8 #define Alloc_size 8
#endif #endif
@ -37,12 +33,6 @@ struct Alloc {
#endif #endif
}; };
struct MallocResult {
Alloc alloc;
// failed is true if the allocation overflowed memory.
bool failed;
};
// new_alloc synthesizes an Alloc from an offset and size. // new_alloc synthesizes an Alloc from an offset and size.
Alloc new_alloc(uint offset, uint size, bool mem_ok) { Alloc new_alloc(uint offset, uint size, bool mem_ok) {
Alloc a; Alloc a;
@ -57,24 +47,32 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok) {
return a; return a;
} }
// malloc allocates size bytes of memory. #define STAGE_BINNING (1u << 0)
MallocResult malloc(uint size) { #define STAGE_TILE_ALLOC (1u << 1)
MallocResult r; #define STAGE_PATH_COARSE (1u << 2)
#define STAGE_COARSE (1u << 3)
// Allocations in main memory will never be 0, and this might be slightly
// faster to test against than some other value.
#define MALLOC_FAILED 0
// Check that previous dependent stages have succeeded.
bool check_deps(uint dep_stage) {
// TODO: this should be an atomic relaxed load, but that involves
// bringing in "memory scope semantics"
return (atomicOr(mem_error, 0) & dep_stage) == 0;
}
// Allocate size bytes of memory, offset in bytes.
// Note: with a bit of rearrangement of header files, we could make the
// mem_size argument go away (it comes from the config binding).
uint malloc_stage(uint size, uint mem_size, uint stage) {
uint offset = atomicAdd(mem_offset, size); uint offset = atomicAdd(mem_offset, size);
r.failed = offset + size > memory.length() * 4; if (offset + size > mem_size) {
r.alloc = new_alloc(offset, size, !r.failed); atomicOr(mem_error, stage);
if (r.failed) { offset = MALLOC_FAILED;
atomicMax(mem_error, ERR_MALLOC_FAILED);
return r;
} }
#ifdef MEM_DEBUG return offset;
if ((size & 3) != 0) {
r.failed = true;
atomicMax(mem_error, ERR_UNALIGNED_ACCESS);
return r;
}
#endif
return r;
} }
// touch_mem checks whether access to the memory word at offset is valid. // touch_mem checks whether access to the memory word at offset is valid.

View file

@ -87,7 +87,13 @@ SubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol) {
return SubdivResult(val, a0, a2); return SubdivResult(val, a0, a2);
} }
// All writes to the output must be gated by mem_ok.
bool mem_ok = true;
void main() { void main() {
if (!check_deps(STAGE_BINNING | STAGE_TILE_ALLOC | STAGE_PATH_COARSE)) {
return;
}
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size); PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size);
@ -95,24 +101,10 @@ void main() {
if (element_ix < conf.n_pathseg) { if (element_ix < conf.n_pathseg) {
tag = PathSeg_tag(conf.pathseg_alloc, ref); tag = PathSeg_tag(conf.pathseg_alloc, ref);
} }
bool mem_ok = mem_error == NO_ERROR;
switch (tag.tag) { switch (tag.tag) {
case PathSeg_Cubic: case PathSeg_Cubic:
PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref); PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref);
// Affine transform is now applied in pathseg
/*
uint trans_ix = cubic.trans_ix;
if (trans_ix > 0) {
TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (trans_ix - 1) * TransformSeg_size);
TransformSeg trans = TransformSeg_read(conf.trans_alloc, trans_ref);
cubic.p0 = trans.mat.xy * cubic.p0.x + trans.mat.zw * cubic.p0.y + trans.translate;
cubic.p1 = trans.mat.xy * cubic.p1.x + trans.mat.zw * cubic.p1.y + trans.translate;
cubic.p2 = trans.mat.xy * cubic.p2.x + trans.mat.zw * cubic.p2.y + trans.translate;
cubic.p3 = trans.mat.xy * cubic.p3.x + trans.mat.zw * cubic.p3.y + trans.translate;
}
*/
vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3; vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
float err = err_v.x * err_v.x + err_v.y * err_v.y; float err = err_v.x * err_v.x + err_v.y * err_v.y;
// The number of quadratics. // The number of quadratics.
@ -140,7 +132,7 @@ void main() {
uint path_ix = cubic.path_ix; uint path_ix = cubic.path_ix;
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size)); Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
Alloc path_alloc = Alloc path_alloc =
new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok); new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, true);
ivec4 bbox = ivec4(path.bbox); ivec4 bbox = ivec4(path.bbox);
vec2 p0 = cubic.p0; vec2 p0 = cubic.p0;
qp0 = cubic.p0; qp0 = cubic.p0;
@ -199,11 +191,12 @@ void main() {
// TODO: can be tighter, use c to bound width // TODO: can be tighter, use c to bound width
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0)); uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
// Consider using subgroups to aggregate atomic add. // Consider using subgroups to aggregate atomic add.
MallocResult tile_alloc = malloc(n_tile_alloc * TileSeg_size); uint malloc_size = n_tile_alloc * TileSeg_size;
if (tile_alloc.failed || !mem_ok) { uint tile_offset = malloc_stage(malloc_size, conf.mem_size, STAGE_PATH_COARSE);
return; if (tile_offset == MALLOC_FAILED) {
mem_ok = false;
} }
uint tile_offset = tile_alloc.alloc.offset; Alloc tile_alloc = new_alloc(tile_offset, malloc_size, true);
TileSeg tile_seg; TileSeg tile_seg;
@ -221,10 +214,8 @@ void main() {
int backdrop = p1.y < p0.y ? 1 : -1; int backdrop = p1.y < p0.y ? 1 : -1;
TileRef tile_ref = Tile_index(path.tiles, uint(base + xbackdrop)); TileRef tile_ref = Tile_index(path.tiles, uint(base + xbackdrop));
uint tile_el = tile_ref.offset >> 2; uint tile_el = tile_ref.offset >> 2;
if (touch_mem(path_alloc, tile_el + 1)) {
atomicAdd(memory[tile_el + 1], backdrop); atomicAdd(memory[tile_el + 1], backdrop);
} }
}
// next_xray is the xray for the next scanline; the line segment intersects // next_xray is the xray for the next scanline; the line segment intersects
// all tiles between xray and next_xray. // all tiles between xray and next_xray.
@ -247,9 +238,7 @@ void main() {
TileRef tile_ref = Tile_index(TileRef(path.tiles.offset), uint(base + x)); TileRef tile_ref = Tile_index(TileRef(path.tiles.offset), uint(base + x));
uint tile_el = tile_ref.offset >> 2; uint tile_el = tile_ref.offset >> 2;
uint old = 0; uint old = 0;
if (touch_mem(path_alloc, tile_el)) {
old = atomicExchange(memory[tile_el], tile_offset); old = atomicExchange(memory[tile_el], tile_offset);
}
tile_seg.origin = p0; tile_seg.origin = p0;
tile_seg.vector = p1 - p0; tile_seg.vector = p1 - p0;
float y_edge = 0.0; float y_edge = 0.0;
@ -276,7 +265,9 @@ void main() {
} }
tile_seg.y_edge = y_edge; tile_seg.y_edge = y_edge;
tile_seg.next.offset = old; tile_seg.next.offset = old;
TileSeg_write(tile_alloc.alloc, TileSegRef(tile_offset), tile_seg); if (mem_ok) {
TileSeg_write(tile_alloc, TileSegRef(tile_offset), tile_seg);
}
tile_offset += TileSeg_size; tile_offset += TileSeg_size;
} }
xc += b; xc += b;

View file

@ -31,8 +31,9 @@
// to memory for the overflow. // to memory for the overflow.
#define BLEND_STACK_SPLIT 4 #define BLEND_STACK_SPLIT 4
#ifdef ERR_MALLOC_FAILED #ifdef MALLOC_FAILED
struct Config { struct Config {
uint mem_size; // in bytes
uint n_elements; // paths uint n_elements; // paths
uint n_pathseg; uint n_pathseg;
uint width_in_tiles; uint width_in_tiles;

View file

@ -29,7 +29,7 @@ layout(binding = 2) readonly buffer SceneBuf {
#define SY (1.0 / float(TILE_HEIGHT_PX)) #define SY (1.0 / float(TILE_HEIGHT_PX))
shared uint sh_tile_count[TILE_ALLOC_WG]; shared uint sh_tile_count[TILE_ALLOC_WG];
shared MallocResult sh_tile_alloc; shared uint sh_tile_offset;
vec4 load_draw_bbox(uint draw_ix) { vec4 load_draw_bbox(uint draw_ix) {
uint base = (conf.draw_bbox_alloc.offset >> 2) + 4 * draw_ix; uint base = (conf.draw_bbox_alloc.offset >> 2) + 4 * draw_ix;
@ -42,6 +42,9 @@ vec4 load_draw_bbox(uint draw_ix) {
} }
void main() { void main() {
if (!check_deps(STAGE_BINNING)) {
return;
}
uint th_ix = gl_LocalInvocationID.x; uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
// At the moment, element_ix == path_ix. The clip-intersected bounding boxes // At the moment, element_ix == path_ix. The clip-intersected bounding boxes
@ -86,27 +89,24 @@ void main() {
sh_tile_count[th_ix] = total_tile_count; sh_tile_count[th_ix] = total_tile_count;
} }
if (th_ix == TILE_ALLOC_WG - 1) { if (th_ix == TILE_ALLOC_WG - 1) {
sh_tile_alloc = malloc(total_tile_count * Tile_size); sh_tile_offset = malloc_stage(total_tile_count * Tile_size, conf.mem_size, STAGE_TILE_ALLOC);
} }
barrier(); barrier();
MallocResult alloc_start = sh_tile_alloc; uint offset_start = sh_tile_offset;
if (alloc_start.failed || mem_error != NO_ERROR) { if (offset_start == MALLOC_FAILED) {
return; return;
} }
if (element_ix < conf.n_elements) { if (element_ix < conf.n_elements) {
uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0; uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
Alloc tiles_alloc = slice_mem(alloc_start.alloc, Tile_size * tile_subix, Tile_size * tile_count); path.tiles = TileRef(offset_start + Tile_size * tile_subix);
path.tiles = TileRef(tiles_alloc.offset);
Path_write(conf.tile_alloc, path_ref, path); Path_write(conf.tile_alloc, path_ref, path);
} }
// Zero out allocated tiles efficiently // Zero out allocated tiles efficiently
uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4); uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
uint start_ix = alloc_start.alloc.offset >> 2; uint start_ix = offset_start >> 2;
for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) { for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
// Note: this interleaving is faster than using Tile_write memory[start_ix + i] = 0;
// by a significant amount.
write_mem(alloc_start.alloc, start_ix + i, 0);
} }
} }

View file

@ -16,13 +16,12 @@
//! Low-level scene encoding. //! Low-level scene encoding.
use crate::Blend; use crate::{Blend, SceneStats, DRAWTAG_SIZE, TRANSFORM_SIZE};
use bytemuck::{Pod, Zeroable}; use bytemuck::{Pod, Zeroable};
use piet_gpu_hal::BufWrite; use piet_gpu_hal::BufWrite;
use crate::stages::{ use crate::stages::{
self, Config, PathEncoder, Transform, CLIP_PART_SIZE, DRAW_PART_SIZE, PATHSEG_PART_SIZE, self, PathEncoder, Transform, DRAW_PART_SIZE, PATHSEG_PART_SIZE, TRANSFORM_PART_SIZE,
TRANSFORM_PART_SIZE,
}; };
pub struct Encoder { pub struct Encoder {
@ -52,86 +51,19 @@ pub struct EncodedSceneRef<'a, T: Copy + Pod> {
} }
impl<'a, T: Copy + Pod> EncodedSceneRef<'a, T> { impl<'a, T: Copy + Pod> EncodedSceneRef<'a, T> {
/// Return a config for the element processing pipeline. pub(crate) fn stats(&self) -> SceneStats {
/// SceneStats {
/// This does not include further pipeline processing. Also returns the n_drawobj: self.drawtag_stream.len(),
/// beginning of free memory. drawdata_len: self.drawdata_stream.len(),
pub fn stage_config(&self) -> (Config, usize) { n_transform: self.transform_stream.len(),
// Layout of scene buffer linewidth_len: std::mem::size_of_val(self.linewidth_stream),
let drawtag_offset = 0; pathseg_len: self.pathseg_stream.len(),
let n_drawobj = self.n_drawobj(); n_pathtag: self.tag_stream.len(),
let n_drawobj_padded = align_up(n_drawobj, DRAW_PART_SIZE as usize);
let drawdata_offset = drawtag_offset + n_drawobj_padded * DRAWTAG_SIZE;
let trans_offset = drawdata_offset + self.drawdata_stream.len();
let n_trans = self.transform_stream.len();
let n_trans_padded = align_up(n_trans, TRANSFORM_PART_SIZE as usize);
let linewidth_offset = trans_offset + n_trans_padded * TRANSFORM_SIZE;
let n_linewidth = self.linewidth_stream.len();
let pathtag_offset = linewidth_offset + n_linewidth * LINEWIDTH_SIZE;
let n_pathtag = self.tag_stream.len();
let n_pathtag_padded = align_up(n_pathtag, PATHSEG_PART_SIZE as usize);
let pathseg_offset = pathtag_offset + n_pathtag_padded;
// Layout of memory
let mut alloc = 0;
let trans_alloc = alloc;
alloc += trans_alloc + n_trans_padded * TRANSFORM_SIZE;
let pathseg_alloc = alloc;
alloc += pathseg_alloc + self.n_pathseg as usize * PATHSEG_SIZE;
let path_bbox_alloc = alloc;
let n_path = self.n_path as usize;
alloc += path_bbox_alloc + n_path * PATH_BBOX_SIZE;
let drawmonoid_alloc = alloc;
alloc += n_drawobj_padded * DRAWMONOID_SIZE;
let anno_alloc = alloc;
alloc += n_drawobj * ANNOTATED_SIZE;
let clip_alloc = alloc;
let n_clip = self.n_clip as usize;
const CLIP_SIZE: usize = 4;
alloc += n_clip * CLIP_SIZE;
let clip_bic_alloc = alloc;
const CLIP_BIC_SIZE: usize = 8;
// This can round down, as we only reduce the prefix
alloc += (n_clip / CLIP_PART_SIZE as usize) * CLIP_BIC_SIZE;
let clip_stack_alloc = alloc;
const CLIP_EL_SIZE: usize = 20;
alloc += n_clip * CLIP_EL_SIZE;
let clip_bbox_alloc = alloc;
const CLIP_BBOX_SIZE: usize = 16;
alloc += align_up(n_clip as usize, CLIP_PART_SIZE as usize) * CLIP_BBOX_SIZE;
let draw_bbox_alloc = alloc;
alloc += n_drawobj * DRAW_BBOX_SIZE;
let drawinfo_alloc = alloc;
// TODO: not optimized; it can be accumulated during encoding or summed from drawtags
const MAX_DRAWINFO_SIZE: usize = 44;
alloc += n_drawobj * MAX_DRAWINFO_SIZE;
let config = Config {
n_elements: n_drawobj as u32,
n_pathseg: self.n_pathseg,
pathseg_alloc: pathseg_alloc as u32,
anno_alloc: anno_alloc as u32,
trans_alloc: trans_alloc as u32,
path_bbox_alloc: path_bbox_alloc as u32,
drawmonoid_alloc: drawmonoid_alloc as u32,
clip_alloc: clip_alloc as u32,
clip_bic_alloc: clip_bic_alloc as u32,
clip_stack_alloc: clip_stack_alloc as u32,
clip_bbox_alloc: clip_bbox_alloc as u32,
draw_bbox_alloc: draw_bbox_alloc as u32,
drawinfo_alloc: drawinfo_alloc as u32,
n_trans: n_trans as u32,
n_path: self.n_path, n_path: self.n_path,
n_pathseg: self.n_pathseg,
n_clip: self.n_clip, n_clip: self.n_clip,
trans_offset: trans_offset as u32, }
linewidth_offset: linewidth_offset as u32,
pathtag_offset: pathtag_offset as u32,
pathseg_offset: pathseg_offset as u32,
drawtag_offset: drawtag_offset as u32,
drawdata_offset: drawdata_offset as u32,
..Default::default()
};
(config, alloc)
} }
pub fn write_scene(&self, buf: &mut BufWrite) { pub fn write_scene(&self, buf: &mut BufWrite) {
@ -148,34 +80,6 @@ impl<'a, T: Copy + Pod> EncodedSceneRef<'a, T> {
buf.fill_zero(padding(n_pathtag, PATHSEG_PART_SIZE as usize)); buf.fill_zero(padding(n_pathtag, PATHSEG_PART_SIZE as usize));
buf.extend_slice(&self.pathseg_stream); buf.extend_slice(&self.pathseg_stream);
} }
/// The number of draw objects in the draw object stream.
pub(crate) fn n_drawobj(&self) -> usize {
self.drawtag_stream.len()
}
/// The number of paths.
pub(crate) fn n_path(&self) -> u32 {
self.n_path
}
/// The number of path segments.
pub(crate) fn n_pathseg(&self) -> u32 {
self.n_pathseg
}
pub(crate) fn n_transform(&self) -> usize {
self.transform_stream.len()
}
/// The number of tags in the path stream.
pub(crate) fn n_pathtag(&self) -> usize {
self.tag_stream.len()
}
pub(crate) fn n_clip(&self) -> u32 {
self.n_clip
}
} }
/// A scene fragment encoding a glyph. /// A scene fragment encoding a glyph.
@ -191,15 +95,6 @@ pub struct GlyphEncoder {
n_pathseg: u32, n_pathseg: u32,
} }
const TRANSFORM_SIZE: usize = 24;
const LINEWIDTH_SIZE: usize = 4;
const PATHSEG_SIZE: usize = 52;
const PATH_BBOX_SIZE: usize = 24;
const DRAWMONOID_SIZE: usize = 16;
const DRAW_BBOX_SIZE: usize = 16;
const DRAWTAG_SIZE: usize = 4;
const ANNOTATED_SIZE: usize = 40;
// Tags for draw objects. See shader/drawtag.h for the authoritative source. // Tags for draw objects. See shader/drawtag.h for the authoritative source.
const DRAWTAG_FILLCOLOR: u32 = 0x44; const DRAWTAG_FILLCOLOR: u32 = 0x44;
const DRAWTAG_FILLLINGRADIENT: u32 = 0x114; const DRAWTAG_FILLLINGRADIENT: u32 = 0x114;
@ -343,88 +238,6 @@ impl Encoder {
self.n_clip += 1; self.n_clip += 1;
} }
/// Return a config for the element processing pipeline.
///
/// This does not include further pipeline processing. Also returns the
/// beginning of free memory.
pub fn stage_config(&self) -> (Config, usize) {
// Layout of scene buffer
let drawtag_offset = 0;
let n_drawobj = self.n_drawobj();
let n_drawobj_padded = align_up(n_drawobj, DRAW_PART_SIZE as usize);
let drawdata_offset = drawtag_offset + n_drawobj_padded * DRAWTAG_SIZE;
let trans_offset = drawdata_offset + self.drawdata_stream.len();
let n_trans = self.transform_stream.len();
let n_trans_padded = align_up(n_trans, TRANSFORM_PART_SIZE as usize);
let linewidth_offset = trans_offset + n_trans_padded * TRANSFORM_SIZE;
let n_linewidth = self.linewidth_stream.len();
let pathtag_offset = linewidth_offset + n_linewidth * LINEWIDTH_SIZE;
let n_pathtag = self.tag_stream.len();
let n_pathtag_padded = align_up(n_pathtag, PATHSEG_PART_SIZE as usize);
let pathseg_offset = pathtag_offset + n_pathtag_padded;
// Layout of memory
let mut alloc = 0;
let trans_alloc = alloc;
alloc += trans_alloc + n_trans_padded * TRANSFORM_SIZE;
let pathseg_alloc = alloc;
alloc += pathseg_alloc + self.n_pathseg as usize * PATHSEG_SIZE;
let path_bbox_alloc = alloc;
let n_path = self.n_path as usize;
alloc += path_bbox_alloc + n_path * PATH_BBOX_SIZE;
let drawmonoid_alloc = alloc;
alloc += n_drawobj_padded * DRAWMONOID_SIZE;
let anno_alloc = alloc;
alloc += n_drawobj * ANNOTATED_SIZE;
let clip_alloc = alloc;
let n_clip = self.n_clip as usize;
const CLIP_SIZE: usize = 4;
alloc += n_clip * CLIP_SIZE;
let clip_bic_alloc = alloc;
const CLIP_BIC_SIZE: usize = 8;
// This can round down, as we only reduce the prefix
alloc += (n_clip / CLIP_PART_SIZE as usize) * CLIP_BIC_SIZE;
let clip_stack_alloc = alloc;
const CLIP_EL_SIZE: usize = 20;
alloc += n_clip * CLIP_EL_SIZE;
let clip_bbox_alloc = alloc;
const CLIP_BBOX_SIZE: usize = 16;
alloc += align_up(n_clip as usize, CLIP_PART_SIZE as usize) * CLIP_BBOX_SIZE;
let draw_bbox_alloc = alloc;
alloc += n_drawobj * DRAW_BBOX_SIZE;
let drawinfo_alloc = alloc;
// TODO: not optimized; it can be accumulated during encoding or summed from drawtags
const MAX_DRAWINFO_SIZE: usize = 44;
alloc += n_drawobj * MAX_DRAWINFO_SIZE;
let config = Config {
n_elements: n_drawobj as u32,
n_pathseg: self.n_pathseg,
pathseg_alloc: pathseg_alloc as u32,
anno_alloc: anno_alloc as u32,
trans_alloc: trans_alloc as u32,
path_bbox_alloc: path_bbox_alloc as u32,
drawmonoid_alloc: drawmonoid_alloc as u32,
clip_alloc: clip_alloc as u32,
clip_bic_alloc: clip_bic_alloc as u32,
clip_stack_alloc: clip_stack_alloc as u32,
clip_bbox_alloc: clip_bbox_alloc as u32,
draw_bbox_alloc: draw_bbox_alloc as u32,
drawinfo_alloc: drawinfo_alloc as u32,
n_trans: n_trans as u32,
n_path: self.n_path,
n_clip: self.n_clip,
trans_offset: trans_offset as u32,
linewidth_offset: linewidth_offset as u32,
pathtag_offset: pathtag_offset as u32,
pathseg_offset: pathseg_offset as u32,
drawtag_offset: drawtag_offset as u32,
drawdata_offset: drawdata_offset as u32,
..Default::default()
};
(config, alloc)
}
pub fn write_scene(&self, buf: &mut BufWrite) { pub fn write_scene(&self, buf: &mut BufWrite) {
buf.extend_slice(&self.drawtag_stream); buf.extend_slice(&self.drawtag_stream);
let n_drawobj = self.drawtag_stream.len(); let n_drawobj = self.drawtag_stream.len();
@ -440,32 +253,19 @@ impl Encoder {
buf.extend_slice(&self.pathseg_stream); buf.extend_slice(&self.pathseg_stream);
} }
/// The number of draw objects in the draw object stream. pub(crate) fn stats(&self) -> SceneStats {
pub(crate) fn n_drawobj(&self) -> usize { SceneStats {
self.drawtag_stream.len() n_drawobj: self.drawtag_stream.len(),
} drawdata_len: self.drawdata_stream.len(),
n_transform: self.transform_stream.len(),
linewidth_len: std::mem::size_of_val(&*self.linewidth_stream),
n_pathtag: self.tag_stream.len(),
pathseg_len: self.pathseg_stream.len(),
/// The number of paths. n_path: self.n_path,
pub(crate) fn n_path(&self) -> u32 { n_pathseg: self.n_pathseg,
self.n_path n_clip: self.n_clip,
} }
/// The number of path segments.
pub(crate) fn n_pathseg(&self) -> u32 {
self.n_pathseg
}
pub(crate) fn n_transform(&self) -> usize {
self.transform_stream.len()
}
/// The number of tags in the path stream.
pub(crate) fn n_pathtag(&self) -> usize {
self.tag_stream.len()
}
pub(crate) fn n_clip(&self) -> u32 {
self.n_clip
} }
pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) { pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) {
@ -478,11 +278,6 @@ impl Encoder {
} }
} }
fn align_up(x: usize, align: usize) -> usize {
debug_assert!(align.is_power_of_two());
(x + align - 1) & !(align - 1)
}
fn padding(x: usize, align: usize) -> usize { fn padding(x: usize, align: usize) -> usize {
x.wrapping_neg() & (align - 1) x.wrapping_neg() & (align - 1)
} }

View file

@ -4,17 +4,19 @@ pub mod glyph_render;
mod gradient; mod gradient;
mod pico_svg; mod pico_svg;
mod render_ctx; mod render_ctx;
mod render_driver;
pub mod stages; pub mod stages;
pub mod test_scenes; pub mod test_scenes;
mod text; mod text;
use bytemuck::Pod; use bytemuck::{Pod, Zeroable};
use std::convert::TryInto; use std::convert::TryInto;
pub use blend::{Blend, BlendMode, CompositionMode}; pub use blend::{Blend, BlendMode, CompositionMode};
pub use encoder::EncodedSceneRef; pub use encoder::EncodedSceneRef;
pub use gradient::Colrv1RadialGradient; pub use gradient::Colrv1RadialGradient;
pub use render_ctx::PietGpuRenderContext; pub use render_ctx::PietGpuRenderContext;
pub use render_driver::RenderDriver;
use piet::kurbo::Vec2; use piet::kurbo::Vec2;
use piet::{ImageFormat, RenderContext}; use piet::{ImageFormat, RenderContext};
@ -25,9 +27,12 @@ use piet_gpu_hal::{
}; };
pub use pico_svg::PicoSvg; pub use pico_svg::PicoSvg;
use stages::{ClipBinding, ElementBinding, ElementCode}; use stages::{
ClipBinding, ElementBinding, ElementCode, DRAW_PART_SIZE, PATHSEG_PART_SIZE,
TRANSFORM_PART_SIZE,
};
use crate::stages::{ClipCode, Config, ElementStage}; use crate::stages::{ClipCode, Config, ElementStage, CLIP_PART_SIZE};
const TILE_W: usize = 16; const TILE_W: usize = 16;
const TILE_H: usize = 16; const TILE_H: usize = 16;
@ -64,6 +69,31 @@ pub enum PixelFormat {
Rgba8, Rgba8,
} }
#[repr(C)]
#[derive(Clone, Copy, Debug, Zeroable, Pod)]
pub(crate) struct MemoryHeader {
mem_offset: u32,
mem_error: u32,
blend_offset: u32,
}
/// The sizes of various objects in the encoded scene, needed for memory layout.
#[derive(Default)]
pub(crate) struct SceneStats {
// Slices of scene encoding, in order
pub n_drawobj: usize,
pub drawdata_len: usize,
pub n_transform: usize,
pub linewidth_len: usize,
pub pathseg_len: usize,
pub n_pathtag: usize,
// Additional stats needed needed for memory layout & dispatch
pub n_path: u32,
pub n_pathseg: u32,
pub n_clip: u32,
}
pub struct Renderer { pub struct Renderer {
// These sizes are aligned to tile boundaries, though at some point // These sizes are aligned to tile boundaries, though at some point
// we'll want to have a good strategy for dealing with odd sizes. // we'll want to have a good strategy for dealing with odd sizes.
@ -72,18 +102,23 @@ pub struct Renderer {
pub image_dev: Image, // resulting image pub image_dev: Image, // resulting image
// The reference is held by the pipelines. We will be changing // TODO: two changes needed here. First, if we're fencing on the coarse
// this to make the scene upload dynamic. // pipeline, then we only need one copy (this changes if we also bind the
// scene buffer in fine rasterization, which might be a good idea to reduce
// copying). Second, there should be a staging buffer for discrete cards.
scene_bufs: Vec<Buffer>, scene_bufs: Vec<Buffer>,
memory_buf_host: Vec<Buffer>, memory_buf_host: Vec<Buffer>,
memory_buf_dev: Buffer, memory_buf_dev: Buffer,
memory_buf_readback: Buffer,
// Staging buffers // Staging buffers
config_bufs: Vec<Buffer>, config_bufs: Vec<Buffer>,
// Device config buf // Device config buf
config_buf: Buffer, config_buf: Buffer,
blend_buf: Buffer,
// New element pipeline // New element pipeline
element_code: ElementCode, element_code: ElementCode,
element_stage: ElementStage, element_stage: ElementStage,
@ -111,6 +146,8 @@ pub struct Renderer {
k4_pipeline: Pipeline, k4_pipeline: Pipeline,
k4_ds: DescriptorSet, k4_ds: DescriptorSet,
scene_stats: SceneStats,
// TODO: the following stats are now redundant and can be removed.
n_transform: usize, n_transform: usize,
n_drawobj: usize, n_drawobj: usize,
n_paths: usize, n_paths: usize,
@ -166,12 +203,18 @@ impl Renderer {
let width = width + (width.wrapping_neg() & (TILE_W - 1)); let width = width + (width.wrapping_neg() & (TILE_W - 1));
let height = height + (height.wrapping_neg() & (TILE_W - 1)); let height = height + (height.wrapping_neg() & (TILE_W - 1));
let dev = BufferUsage::STORAGE | BufferUsage::COPY_DST; let dev = BufferUsage::STORAGE | BufferUsage::COPY_DST;
let host_upload = BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC; let usage_mem_dev = BufferUsage::STORAGE | BufferUsage::COPY_DST | BufferUsage::COPY_SRC;
let usage_blend = BufferUsage::STORAGE;
let usage_upload = BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC;
let usage_readback = BufferUsage::MAP_READ | BufferUsage::COPY_DST;
// This may be inadequate for very complex scenes (paris etc)
// TODO: separate staging buffer (if needed) // TODO: separate staging buffer (if needed)
let scene_bufs = (0..n_bufs) let scene_bufs = (0..n_bufs)
.map(|_| session.create_buffer(8 * 1024 * 1024, host_upload).unwrap()) .map(|_| {
session
.create_buffer(8 * 1024 * 1024, usage_upload)
.unwrap()
})
.collect::<Vec<_>>(); .collect::<Vec<_>>();
let image_format = match config.format { let image_format = match config.format {
@ -185,15 +228,22 @@ impl Renderer {
let config_bufs = (0..n_bufs) let config_bufs = (0..n_bufs)
.map(|_| { .map(|_| {
session session
.create_buffer(CONFIG_BUFFER_SIZE, host_upload) .create_buffer(CONFIG_BUFFER_SIZE, usage_upload)
.unwrap() .unwrap()
}) })
.collect(); .collect();
let memory_buf_host = (0..n_bufs) let memory_buf_host = (0..n_bufs)
.map(|_| session.create_buffer(2 * 4, host_upload).unwrap()) .map(|_| {
session
.create_buffer(std::mem::size_of::<MemoryHeader>() as u64, usage_upload)
.unwrap()
})
.collect(); .collect();
let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?; let memory_buf_dev = session.create_buffer(16 * 1024 * 1024, usage_mem_dev)?;
let memory_buf_readback =
session.create_buffer(std::mem::size_of::<MemoryHeader>() as u64, usage_readback)?;
let blend_buf = session.create_buffer(16 * 1024 * 1024, usage_blend)?;
let element_code = ElementCode::new(session); let element_code = ElementCode::new(session);
let element_stage = ElementStage::new(session, &element_code); let element_stage = ElementStage::new(session, &element_code);
@ -282,7 +332,7 @@ impl Renderer {
let gradient_bufs = (0..n_bufs) let gradient_bufs = (0..n_bufs)
.map(|_| { .map(|_| {
session session
.create_buffer(GRADIENT_BUF_SIZE as u64, host_upload) .create_buffer(GRADIENT_BUF_SIZE as u64, usage_upload)
.unwrap() .unwrap()
}) })
.collect(); .collect();
@ -297,6 +347,7 @@ impl Renderer {
&[ &[
BindType::Buffer, BindType::Buffer,
BindType::BufReadOnly, BindType::BufReadOnly,
BindType::Buffer,
BindType::Image, BindType::Image,
BindType::ImageRead, BindType::ImageRead,
BindType::ImageRead, BindType::ImageRead,
@ -304,19 +355,22 @@ impl Renderer {
)?; )?;
let k4_ds = session let k4_ds = session
.descriptor_set_builder() .descriptor_set_builder()
.add_buffers(&[&memory_buf_dev, &config_buf]) .add_buffers(&[&memory_buf_dev, &config_buf, &blend_buf])
.add_images(&[&image_dev]) .add_images(&[&image_dev])
.add_textures(&[&bg_image, &gradients]) .add_textures(&[&bg_image, &gradients])
.build(&session, &k4_pipeline)?; .build(&session, &k4_pipeline)?;
let scene_stats = Default::default();
Ok(Renderer { Ok(Renderer {
width, width,
height, height,
scene_bufs, scene_bufs,
memory_buf_host, memory_buf_host,
memory_buf_dev, memory_buf_dev,
memory_buf_readback,
config_buf, config_buf,
config_bufs, config_bufs,
blend_buf,
image_dev, image_dev,
element_code, element_code,
element_stage, element_stage,
@ -336,6 +390,7 @@ impl Renderer {
coarse_ds, coarse_ds,
k4_pipeline, k4_pipeline,
k4_ds, k4_ds,
scene_stats,
n_transform: 0, n_transform: 0,
n_drawobj: 0, n_drawobj: 0,
n_paths: 0, n_paths: 0,
@ -358,43 +413,14 @@ impl Renderer {
render_ctx: &mut PietGpuRenderContext, render_ctx: &mut PietGpuRenderContext,
buf_ix: usize, buf_ix: usize,
) -> Result<(), Error> { ) -> Result<(), Error> {
let (mut config, mut alloc) = render_ctx.stage_config(); self.scene_stats = render_ctx.stats();
let n_drawobj = render_ctx.n_drawobj();
// TODO: be more consistent in size types
let n_path = render_ctx.n_path() as usize;
self.n_paths = n_path;
self.n_transform = render_ctx.n_transform();
self.n_drawobj = render_ctx.n_drawobj();
self.n_pathseg = render_ctx.n_pathseg() as usize;
self.n_pathtag = render_ctx.n_pathtag();
self.n_clip = render_ctx.n_clip();
// These constants depend on encoding and may need to be updated.
// Perhaps we can plumb these from piet-gpu-derive?
const PATH_SIZE: usize = 12;
const BIN_SIZE: usize = 8;
let width_in_tiles = self.width / TILE_W;
let height_in_tiles = self.height / TILE_H;
let tile_base = alloc;
alloc += ((n_path + 3) & !3) * PATH_SIZE;
let bin_base = alloc;
alloc += ((n_drawobj + 255) & !255) * BIN_SIZE;
let ptcl_base = alloc;
alloc += width_in_tiles * height_in_tiles * PTCL_INITIAL_ALLOC;
config.width_in_tiles = width_in_tiles as u32;
config.height_in_tiles = height_in_tiles as u32;
config.tile_alloc = tile_base as u32;
config.bin_alloc = bin_base as u32;
config.ptcl_alloc = ptcl_base as u32;
unsafe { unsafe {
// TODO: reallocate scene buffer if size is inadequate self.upload_config(buf_ix)?;
{ {
let mut mapped_scene = self.scene_bufs[buf_ix].map_write(..)?; let mut mapped_scene = self.scene_bufs[buf_ix].map_write(..)?;
render_ctx.write_scene(&mut mapped_scene); render_ctx.write_scene(&mut mapped_scene);
} }
self.config_bufs[buf_ix].write(&[config])?;
self.memory_buf_host[buf_ix].write(&[alloc as u32, 0 /* Overflow flag */])?;
// Upload gradient data. // Upload gradient data.
let ramp_data = render_ctx.get_ramp_data(); let ramp_data = render_ctx.get_ramp_data();
@ -414,43 +440,14 @@ impl Renderer {
scene: &EncodedSceneRef<T>, scene: &EncodedSceneRef<T>,
buf_ix: usize, buf_ix: usize,
) -> Result<(), Error> { ) -> Result<(), Error> {
let (mut config, mut alloc) = scene.stage_config(); self.scene_stats = scene.stats();
let n_drawobj = scene.n_drawobj();
// TODO: be more consistent in size types
let n_path = scene.n_path() as usize;
self.n_paths = n_path;
self.n_transform = scene.n_transform();
self.n_drawobj = scene.n_drawobj();
self.n_pathseg = scene.n_pathseg() as usize;
self.n_pathtag = scene.n_pathtag();
self.n_clip = scene.n_clip();
// These constants depend on encoding and may need to be updated.
// Perhaps we can plumb these from piet-gpu-derive?
const PATH_SIZE: usize = 12;
const BIN_SIZE: usize = 8;
let width_in_tiles = self.width / TILE_W;
let height_in_tiles = self.height / TILE_H;
let tile_base = alloc;
alloc += ((n_path + 3) & !3) * PATH_SIZE;
let bin_base = alloc;
alloc += ((n_drawobj + 255) & !255) * BIN_SIZE;
let ptcl_base = alloc;
alloc += width_in_tiles * height_in_tiles * PTCL_INITIAL_ALLOC;
config.width_in_tiles = width_in_tiles as u32;
config.height_in_tiles = height_in_tiles as u32;
config.tile_alloc = tile_base as u32;
config.bin_alloc = bin_base as u32;
config.ptcl_alloc = ptcl_base as u32;
unsafe { unsafe {
// TODO: reallocate scene buffer if size is inadequate self.upload_config(buf_ix)?;
{ {
let mut mapped_scene = self.scene_bufs[buf_ix].map_write(..)?; let mut mapped_scene = self.scene_bufs[buf_ix].map_write(..)?;
scene.write_scene(&mut mapped_scene); scene.write_scene(&mut mapped_scene);
} }
self.config_bufs[buf_ix].write(&[config])?;
self.memory_buf_host[buf_ix].write(&[alloc as u32, 0 /* Overflow flag */])?;
// Upload gradient data. // Upload gradient data.
if !scene.ramp_data.is_empty() { if !scene.ramp_data.is_empty() {
@ -464,7 +461,44 @@ impl Renderer {
Ok(()) Ok(())
} }
pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, query_pool: &QueryPool, buf_ix: usize) { // Note: configuration has to be re-uploaded when memory buffer is resized
pub(crate) unsafe fn upload_config(
&mut self,
buf_ix: usize,
) -> Result<(), Error> {
let stats = &self.scene_stats;
let n_path = stats.n_path as usize;
self.n_paths = n_path;
self.n_transform = stats.n_transform;
self.n_drawobj = stats.n_drawobj;
self.n_pathseg = stats.n_pathseg as usize;
self.n_pathtag = stats.n_pathtag;
self.n_clip = stats.n_clip;
let (mut config, alloc) = stats.config(self.width, self.height);
config.mem_size = self.memory_buf_size() as u32;
self.config_bufs[buf_ix].write(&[config])?;
let mem_header = MemoryHeader {
mem_offset: alloc as u32,
mem_error: 0,
blend_offset: 0,
};
// Note: we could skip doing this on realloc, but probably not worth the bother
self.memory_buf_host[buf_ix].write(&[mem_header])?;
Ok(())
}
/// Get the size of memory for the allocations known in advance.
pub(crate) fn memory_size(&self, stats: &SceneStats) -> usize {
stats.config(self.width, self.height).1
}
/// Record the coarse part of a render pipeline.
pub unsafe fn record_coarse(
&self,
cmd_buf: &mut CmdBuf,
query_pool: &QueryPool,
buf_ix: usize,
) {
cmd_buf.copy_buffer(&self.config_bufs[buf_ix], &self.config_buf); cmd_buf.copy_buffer(&self.config_bufs[buf_ix], &self.config_buf);
cmd_buf.copy_buffer(&self.memory_buf_host[buf_ix], &self.memory_buf_dev); cmd_buf.copy_buffer(&self.memory_buf_host[buf_ix], &self.memory_buf_dev);
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
@ -558,9 +592,20 @@ impl Renderer {
pass.end(); pass.end();
cmd_buf.end_debug_label(); cmd_buf.end_debug_label();
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
}
pub unsafe fn record_fine(
&self,
cmd_buf: &mut CmdBuf,
query_pool: &QueryPool,
query_start: u32,
) {
cmd_buf.begin_debug_label("Fine raster"); cmd_buf.begin_debug_label("Fine raster");
let mut pass = let mut pass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(
cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(&query_pool, 10, 11)); &query_pool,
query_start,
query_start + 1,
));
pass.dispatch( pass.dispatch(
&self.k4_pipeline, &self.k4_pipeline,
&self.k4_ds, &self.k4_ds,
@ -577,6 +622,19 @@ impl Renderer {
cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc); cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc);
} }
pub unsafe fn record_readback(&self, cmd_buf: &mut CmdBuf) {
cmd_buf.copy_buffer(&self.memory_buf_dev, &self.memory_buf_readback);
cmd_buf.memory_barrier();
}
/// Record a render pipeline.
///
/// This *assumes* the buffers are adequately sized.
pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, query_pool: &QueryPool, buf_ix: usize) {
self.record_coarse(cmd_buf, query_pool, buf_ix);
self.record_fine(cmd_buf, query_pool, 10);
}
pub fn make_image( pub fn make_image(
session: &Session, session: &Session,
width: usize, width: usize,
@ -636,4 +694,210 @@ impl Renderer {
.unwrap() .unwrap()
} }
} }
pub(crate) unsafe fn realloc_scene_if_needed(
&mut self,
session: &Session,
new_size: u64,
buf_ix: usize,
) -> Result<(), Error> {
if new_size <= self.scene_bufs[buf_ix].size() {
return Ok(());
}
const ALIGN: u64 = 0x10000;
let new_size = (new_size + ALIGN - 1) & ALIGN.wrapping_neg();
println!(
"reallocating scene buf[{}] {} -> {}",
buf_ix,
self.scene_bufs[buf_ix].size(),
new_size
);
let usage_upload = BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC;
let scene_buf = session.create_buffer(new_size, usage_upload)?;
self.element_bindings[buf_ix].rebind_scene(session, &scene_buf);
session.update_buffer_descriptor(&mut self.tile_ds[buf_ix], 2, &scene_buf);
session.update_buffer_descriptor(&mut self.coarse_ds[buf_ix], 2, &scene_buf);
self.scene_bufs[buf_ix] = scene_buf;
Ok(())
}
/// Get the size of the memory buffer.
///
/// This is the usable size (not including the header).
pub(crate) fn memory_buf_size(&self) -> u64 {
self.memory_buf_dev.size() - std::mem::size_of::<MemoryHeader>() as u64
}
pub(crate) unsafe fn realloc_memory(
&mut self,
session: &Session,
new_size: u64,
) -> Result<(), Error> {
println!(
"reallocating memory buf {} -> {}",
self.memory_buf_dev.size(),
new_size
);
let usage_mem_dev = BufferUsage::STORAGE | BufferUsage::COPY_DST | BufferUsage::COPY_SRC;
let memory_buf_dev = session.create_buffer(new_size, usage_mem_dev)?;
for element_binding in &mut self.element_bindings {
element_binding.rebind_memory(session, &memory_buf_dev);
}
self.clip_binding.rebind_memory(session, &memory_buf_dev);
for tile_ds in &mut self.tile_ds {
session.update_buffer_descriptor(tile_ds, 0, &memory_buf_dev);
}
session.update_buffer_descriptor(&mut self.path_ds, 0, &memory_buf_dev);
session.update_buffer_descriptor(&mut self.backdrop_ds, 0, &memory_buf_dev);
session.update_buffer_descriptor(&mut self.bin_ds, 0, &memory_buf_dev);
for coarse_ds in &mut self.coarse_ds {
session.update_buffer_descriptor(coarse_ds, 0, &memory_buf_dev);
}
session.update_buffer_descriptor(&mut self.k4_ds, 0, &memory_buf_dev);
self.memory_buf_dev = memory_buf_dev;
Ok(())
}
pub(crate) fn blend_size(&self) -> u64 {
self.blend_buf.size()
}
pub(crate) unsafe fn realloc_blend(
&mut self,
session: &Session,
new_size: u64,
) -> Result<(), Error> {
println!(
"reallocating blend buf {} -> {}",
self.blend_size(),
new_size
);
let usage_blend = BufferUsage::STORAGE;
let blend_buf = session.create_buffer(new_size, usage_blend)?;
session.update_buffer_descriptor(&mut self.k4_ds, 2, &blend_buf);
self.blend_buf = blend_buf;
Ok(())
}
}
const TRANSFORM_SIZE: usize = 24;
const PATHSEG_SIZE: usize = 52;
const PATH_BBOX_SIZE: usize = 24;
const DRAWMONOID_SIZE: usize = 16;
const DRAW_BBOX_SIZE: usize = 16;
const DRAWTAG_SIZE: usize = 4;
const ANNOTATED_SIZE: usize = 40;
impl SceneStats {
pub(crate) fn scene_size(&self) -> usize {
align_up(self.n_drawobj, DRAW_PART_SIZE as usize) * DRAWTAG_SIZE
+ self.drawdata_len
+ align_up(self.n_transform, TRANSFORM_PART_SIZE as usize) * TRANSFORM_SIZE
+ self.linewidth_len
+ align_up(self.n_pathtag, PATHSEG_PART_SIZE as usize)
+ self.pathseg_len
}
/// Return a config for a scene with these stats.
///
/// Also returns the beginning of free (dynamic) memory.
fn config(&self, width: usize, height: usize) -> (Config, usize) {
// Layout of scene buffer
let drawtag_offset = 0;
let n_drawobj = self.n_drawobj;
let n_drawobj_padded = align_up(n_drawobj, DRAW_PART_SIZE as usize);
let drawdata_offset = drawtag_offset + n_drawobj_padded * DRAWTAG_SIZE;
let trans_offset = drawdata_offset + self.drawdata_len;
let n_trans = self.n_transform;
let n_trans_padded = align_up(n_trans, TRANSFORM_PART_SIZE as usize);
let linewidth_offset = trans_offset + n_trans_padded * TRANSFORM_SIZE;
let pathtag_offset = linewidth_offset + self.linewidth_len;
let n_pathtag = self.n_pathtag;
let n_pathtag_padded = align_up(n_pathtag, PATHSEG_PART_SIZE as usize);
let pathseg_offset = pathtag_offset + n_pathtag_padded;
// Layout of memory
let mut alloc = 0;
let trans_alloc = alloc;
alloc += trans_alloc + n_trans_padded * TRANSFORM_SIZE;
let pathseg_alloc = alloc;
alloc += pathseg_alloc + self.n_pathseg as usize * PATHSEG_SIZE;
let path_bbox_alloc = alloc;
let n_path = self.n_path as usize;
alloc += path_bbox_alloc + n_path * PATH_BBOX_SIZE;
let drawmonoid_alloc = alloc;
alloc += n_drawobj_padded * DRAWMONOID_SIZE;
let anno_alloc = alloc;
alloc += n_drawobj * ANNOTATED_SIZE;
let clip_alloc = alloc;
let n_clip = self.n_clip as usize;
const CLIP_SIZE: usize = 4;
alloc += n_clip * CLIP_SIZE;
let clip_bic_alloc = alloc;
const CLIP_BIC_SIZE: usize = 8;
// This can round down, as we only reduce the prefix
alloc += (n_clip / CLIP_PART_SIZE as usize) * CLIP_BIC_SIZE;
let clip_stack_alloc = alloc;
const CLIP_EL_SIZE: usize = 20;
alloc += n_clip * CLIP_EL_SIZE;
let clip_bbox_alloc = alloc;
const CLIP_BBOX_SIZE: usize = 16;
alloc += align_up(n_clip as usize, CLIP_PART_SIZE as usize) * CLIP_BBOX_SIZE;
let draw_bbox_alloc = alloc;
alloc += n_drawobj * DRAW_BBOX_SIZE;
let drawinfo_alloc = alloc;
// TODO: not optimized; it can be accumulated during encoding or summed from drawtags
const MAX_DRAWINFO_SIZE: usize = 44;
alloc += n_drawobj * MAX_DRAWINFO_SIZE;
// These constants depend on encoding and may need to be updated.
const PATH_SIZE: usize = 12;
const BIN_SIZE: usize = 8;
let width_in_tiles = width / TILE_W;
let height_in_tiles = height / TILE_H;
let tile_base = alloc;
alloc += ((n_path + 3) & !3) * PATH_SIZE;
let bin_base = alloc;
alloc += ((n_drawobj + 255) & !255) * BIN_SIZE;
let ptcl_base = alloc;
alloc += width_in_tiles * height_in_tiles * PTCL_INITIAL_ALLOC;
let config = Config {
mem_size: 0, // to be filled in later
n_elements: n_drawobj as u32,
n_pathseg: self.n_pathseg,
pathseg_alloc: pathseg_alloc as u32,
anno_alloc: anno_alloc as u32,
trans_alloc: trans_alloc as u32,
path_bbox_alloc: path_bbox_alloc as u32,
drawmonoid_alloc: drawmonoid_alloc as u32,
clip_alloc: clip_alloc as u32,
clip_bic_alloc: clip_bic_alloc as u32,
clip_stack_alloc: clip_stack_alloc as u32,
clip_bbox_alloc: clip_bbox_alloc as u32,
draw_bbox_alloc: draw_bbox_alloc as u32,
drawinfo_alloc: drawinfo_alloc as u32,
n_trans: n_trans as u32,
n_path: self.n_path,
n_clip: self.n_clip,
trans_offset: trans_offset as u32,
linewidth_offset: linewidth_offset as u32,
pathtag_offset: pathtag_offset as u32,
pathseg_offset: pathseg_offset as u32,
drawtag_offset: drawtag_offset as u32,
drawdata_offset: drawdata_offset as u32,
width_in_tiles: width_in_tiles as u32,
height_in_tiles: height_in_tiles as u32,
tile_alloc: tile_base as u32,
bin_alloc: bin_base as u32,
ptcl_alloc: ptcl_base as u32,
};
(config, alloc)
}
}
fn align_up(x: usize, align: usize) -> usize {
debug_assert!(align.is_power_of_two());
(x + align - 1) & !(align - 1)
} }

View file

@ -4,7 +4,7 @@ const DO_SRGB_CONVERSION: bool = false;
use std::borrow::Cow; use std::borrow::Cow;
use crate::encoder::GlyphEncoder; use crate::encoder::GlyphEncoder;
use crate::stages::{Config, Transform}; use crate::stages::Transform;
use piet::kurbo::{Affine, PathEl, Point, Rect, Shape}; use piet::kurbo::{Affine, PathEl, Point, Rect, Shape};
use piet::{ use piet::{
Color, Error, FixedGradient, ImageFormat, InterpolationMode, IntoBrush, RenderContext, Color, Error, FixedGradient, ImageFormat, InterpolationMode, IntoBrush, RenderContext,
@ -18,7 +18,7 @@ use piet_gpu_types::scene::Element;
use crate::gradient::{Colrv1RadialGradient, LinearGradient, RadialGradient, RampCache}; use crate::gradient::{Colrv1RadialGradient, LinearGradient, RadialGradient, RampCache};
use crate::text::Font; use crate::text::Font;
pub use crate::text::{PietGpuText, PietGpuTextLayout, PietGpuTextLayoutBuilder}; pub use crate::text::{PietGpuText, PietGpuTextLayout, PietGpuTextLayoutBuilder};
use crate::Blend; use crate::{Blend, SceneStats};
pub struct PietGpuImage; pub struct PietGpuImage;
@ -95,44 +95,15 @@ impl PietGpuRenderContext {
} }
} }
pub fn stage_config(&self) -> (Config, usize) { pub(crate) fn stats(&self) -> SceneStats {
self.new_encoder.stage_config() self.new_encoder.stats()
}
/// Number of draw objects.
///
/// This is for the new element processing pipeline. It's not necessarily the
/// same as the number of paths (as in the old pipeline), but it might take a
/// while to sort that out.
pub fn n_drawobj(&self) -> usize {
self.new_encoder.n_drawobj()
}
/// Number of paths.
pub fn n_path(&self) -> u32 {
self.new_encoder.n_path()
}
pub fn n_pathseg(&self) -> u32 {
self.new_encoder.n_pathseg()
}
pub fn n_pathtag(&self) -> usize {
self.new_encoder.n_pathtag()
}
pub fn n_transform(&self) -> usize {
self.new_encoder.n_transform()
}
pub fn n_clip(&self) -> u32 {
self.new_encoder.n_clip()
} }
pub fn write_scene(&self, buf: &mut BufWrite) { pub fn write_scene(&self, buf: &mut BufWrite) {
self.new_encoder.write_scene(buf); self.new_encoder.write_scene(buf);
} }
// TODO: delete
pub fn get_scene_buf(&mut self) -> &[u8] { pub fn get_scene_buf(&mut self) -> &[u8] {
const ALIGN: usize = 128; const ALIGN: usize = 128;
let padded_size = (self.elements.len() + (ALIGN - 1)) & ALIGN.wrapping_neg(); let padded_size = (self.elements.len() + (ALIGN - 1)) & ALIGN.wrapping_neg();
@ -194,7 +165,6 @@ impl RenderContext for PietGpuRenderContext {
let rad = self.ramp_cache.add_radial_gradient(&rad); let rad = self.ramp_cache.add_radial_gradient(&rad);
Ok(PietGpuBrush::RadGradient(rad)) Ok(PietGpuBrush::RadGradient(rad))
} }
_ => todo!("don't do radial gradients yet"),
} }
} }

View file

@ -0,0 +1,267 @@
// Copyright 2022 The piet-gpu authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
use bytemuck::Pod;
use piet_gpu_hal::{CmdBuf, Error, Image, QueryPool, Semaphore, Session, SubmittedCmdBuf};
use crate::{EncodedSceneRef, MemoryHeader, PietGpuRenderContext, Renderer, SceneStats};
/// Additional logic for sequencing rendering operations, specifically
/// for handling failure and reallocation.
///
/// It may be this shouldn't be a separate object from Renderer.
pub struct RenderDriver {
frames: Vec<RenderFrame>,
renderer: Renderer,
buf_ix: usize,
/// The index of a pending fine rasterization submission.
pending: Option<usize>,
}
pub struct TargetState<'a> {
pub cmd_buf: &'a mut CmdBuf,
pub image: &'a Image,
}
struct RenderFrame {
cmd_buf: CmdBufState,
query_pool: QueryPool,
}
enum CmdBufState {
Start,
Submitted(SubmittedCmdBuf),
Ready(CmdBuf),
}
impl RenderDriver {
/// Create new render driver.
///
/// Should probably be fallible.
///
/// We can get n from the renderer as well.
pub fn new(session: &Session, n: usize, renderer: Renderer) -> RenderDriver {
let frames = (0..n)
.map(|_| {
// Maybe should allocate here so it doesn't happen on first frame?
let cmd_buf = CmdBufState::default();
let query_pool = session.create_query_pool(Renderer::QUERY_POOL_SIZE)?;
Ok(RenderFrame {
cmd_buf,
query_pool,
})
})
.collect::<Result<_, Error>>()
.unwrap();
RenderDriver {
frames,
renderer,
buf_ix: 0,
pending: None,
}
}
pub fn upload_render_ctx(
&mut self,
session: &Session,
render_ctx: &mut PietGpuRenderContext,
) -> Result<(), Error> {
let stats = render_ctx.stats();
self.ensure_scene_buffers(session, &stats)?;
self.renderer.upload_render_ctx(render_ctx, self.buf_ix)
}
pub fn upload_scene<T: Copy + Pod>(
&mut self,
session: &Session,
scene: &EncodedSceneRef<T>,
) -> Result<(), Error> {
let stats = scene.stats();
self.ensure_scene_buffers(session, &stats)?;
self.renderer.upload_scene(scene, self.buf_ix)
}
fn ensure_scene_buffers(&mut self, session: &Session, stats: &SceneStats) -> Result<(), Error> {
let scene_size = stats.scene_size();
unsafe {
self.renderer
.realloc_scene_if_needed(session, scene_size as u64, self.buf_ix)?;
}
let memory_size = self.renderer.memory_size(&stats);
// TODO: better estimate of additional memory needed
// Note: if we were to cover the worst-case binning output, we could make the
// binning stage infallible and cut checking logic. It also may not be a bad
// estimate for the rest.
let estimated_needed = memory_size as u64 + (1 << 20);
if estimated_needed > self.renderer.memory_buf_size() {
if let Some(pending) = self.pending.take() {
// There might be a fine rasterization task that binds the memory buffer
// still in flight.
self.frames[pending].cmd_buf.wait();
}
unsafe {
self.renderer.realloc_memory(session, estimated_needed)?;
}
}
Ok(())
}
/// Run one try of the coarse rendering pipeline.
pub(crate) fn try_run_coarse(&mut self, session: &Session) -> Result<MemoryHeader, Error> {
let frame = &mut self.frames[self.buf_ix];
let cmd_buf = frame.cmd_buf.cmd_buf(session)?;
unsafe {
cmd_buf.begin();
// TODO: probably want to return query results as well
self.renderer
.record_coarse(cmd_buf, &frame.query_pool, self.buf_ix);
self.renderer.record_readback(cmd_buf);
let cmd_buf = frame.cmd_buf.cmd_buf(session)?;
cmd_buf.finish_timestamps(&frame.query_pool);
cmd_buf.host_barrier();
cmd_buf.finish();
frame.cmd_buf.submit(session, &[], &[])?;
frame.cmd_buf.wait();
let mut result = Vec::new();
// TODO: consider read method for single POD value
self.renderer.memory_buf_readback.read(&mut result)?;
Ok(result[0])
}
}
/// Run the coarse render pipeline, ensuring enough memory for intermediate buffers.
pub fn run_coarse(&mut self, session: &Session) -> Result<(), Error> {
loop {
let mem_header = self.try_run_coarse(session)?;
println!("{:?}", mem_header);
if mem_header.mem_error == 0 {
let blend_needed = mem_header.blend_offset as u64;
if blend_needed > self.renderer.blend_size() {
unsafe {
self.renderer.realloc_blend(session, blend_needed)?;
}
}
return Ok(());
}
// Not enough memory, reallocate and retry.
// TODO: be smarter (multiplier for early stages)
let mem_size = mem_header.mem_offset + 4096;
// Safety rationalization: no command buffers containing the buffer are
// in flight.
unsafe {
self.renderer.realloc_memory(session, mem_size.into())?;
self.renderer.upload_config(self.buf_ix)?;
}
}
}
/// Record the fine rasterizer, leaving the command buffer open.
pub fn record_fine(&mut self, session: &Session) -> Result<TargetState, Error> {
let frame = &mut self.frames[self.buf_ix];
let cmd_buf = frame.cmd_buf.cmd_buf(session)?;
unsafe {
self.renderer.record_fine(cmd_buf, &frame.query_pool, 0);
}
let image = &self.renderer.image_dev;
Ok(TargetState { cmd_buf, image })
}
/// Submit the current command buffer.
pub fn submit(
&mut self,
session: &Session,
wait_semaphores: &[&Semaphore],
signal_semaphores: &[&Semaphore],
) -> Result<(), Error> {
let frame = &mut self.frames[self.buf_ix];
let cmd_buf = frame.cmd_buf.cmd_buf(session)?;
unsafe {
cmd_buf.finish_timestamps(&frame.query_pool);
cmd_buf.host_barrier();
cmd_buf.finish();
frame
.cmd_buf
.submit(session, wait_semaphores, signal_semaphores)?
}
self.pending = Some(self.buf_ix);
Ok(())
}
pub fn wait(&mut self) {
self.frames[self.buf_ix].cmd_buf.wait();
self.pending = None;
}
/// Move to the next buffer.
pub fn next_buffer(&mut self) {
self.buf_ix = (self.buf_ix + 1) % self.frames.len()
}
}
impl Default for CmdBufState {
fn default() -> Self {
CmdBufState::Start
}
}
impl CmdBufState {
/// Get a command buffer suitable for recording.
///
/// If the command buffer is submitted, wait.
fn cmd_buf(&mut self, session: &Session) -> Result<&mut CmdBuf, Error> {
if let CmdBufState::Ready(cmd_buf) = self {
return Ok(cmd_buf);
}
if let CmdBufState::Submitted(submitted) = std::mem::take(self) {
if let Ok(Some(cmd_buf)) = submitted.wait() {
*self = CmdBufState::Ready(cmd_buf);
}
}
if matches!(self, CmdBufState::Start) {
*self = CmdBufState::Ready(session.cmd_buf()?);
}
if let CmdBufState::Ready(cmd_buf) = self {
Ok(cmd_buf)
} else {
unreachable!()
}
}
unsafe fn submit(
&mut self,
session: &Session,
wait_semaphores: &[&Semaphore],
signal_semaphores: &[&Semaphore],
) -> Result<(), Error> {
if let CmdBufState::Ready(cmd_buf) = std::mem::take(self) {
let submitted = session.run_cmd_buf(cmd_buf, wait_semaphores, signal_semaphores)?;
*self = CmdBufState::Submitted(submitted);
Ok(())
} else {
Err("Tried to submit CmdBufState not in ready state".into())
}
}
fn wait(&mut self) {
if matches!(self, CmdBufState::Submitted(_)) {
if let CmdBufState::Submitted(submitted) = std::mem::take(self) {
if let Ok(Some(cmd_buf)) = submitted.wait() {
*self = CmdBufState::Ready(cmd_buf);
}
}
}
}
}

View file

@ -37,6 +37,7 @@ pub use transform::{
#[repr(C)] #[repr(C)]
#[derive(Clone, Copy, Default, Debug, Zeroable, Pod)] #[derive(Clone, Copy, Default, Debug, Zeroable, Pod)]
pub struct Config { pub struct Config {
pub mem_size: u32,
pub n_elements: u32, // paths pub n_elements: u32, // paths
pub n_pathseg: u32, pub n_pathseg: u32,
pub width_in_tiles: u32, pub width_in_tiles: u32,
@ -167,3 +168,17 @@ impl ElementStage {
.record(pass, &code.draw_code, &binding.draw_binding, n_drawobj); .record(pass, &code.draw_code, &binding.draw_binding, n_drawobj);
} }
} }
impl ElementBinding {
pub unsafe fn rebind_memory(&mut self, session: &Session, memory: &Buffer) {
self.transform_binding.rebind_memory(session, memory);
self.path_binding.rebind_memory(session, memory);
self.draw_binding.rebind_memory(session, memory);
}
pub unsafe fn rebind_scene(&mut self, session: &Session, scene: &Buffer) {
self.transform_binding.rebind_scene(session, scene);
self.path_binding.rebind_scene(session, scene);
self.draw_binding.rebind_scene(session, scene);
}
}

View file

@ -93,4 +93,9 @@ impl ClipBinding {
pass.memory_barrier(); pass.memory_barrier();
} }
} }
pub unsafe fn rebind_memory(&mut self, session: &Session, memory: &Buffer) {
session.update_buffer_descriptor(&mut self.reduce_ds, 0, memory);
session.update_buffer_descriptor(&mut self.leaf_ds, 0, memory);
}
} }

View file

@ -163,3 +163,15 @@ impl DrawStage {
); );
} }
} }
impl DrawBinding {
pub unsafe fn rebind_memory(&mut self, session: &Session, memory: &Buffer) {
session.update_buffer_descriptor(&mut self.reduce_ds, 0, memory);
session.update_buffer_descriptor(&mut self.leaf_ds, 0, memory);
}
pub unsafe fn rebind_scene(&mut self, session: &Session, scene: &Buffer) {
session.update_buffer_descriptor(&mut self.reduce_ds, 2, scene);
session.update_buffer_descriptor(&mut self.leaf_ds, 2, scene);
}
}

View file

@ -200,6 +200,19 @@ impl PathStage {
} }
} }
impl PathBinding {
pub unsafe fn rebind_memory(&mut self, session: &Session, memory: &Buffer) {
session.update_buffer_descriptor(&mut self.reduce_ds, 0, memory);
session.update_buffer_descriptor(&mut self.clear_ds, 0, memory);
session.update_buffer_descriptor(&mut self.path_ds, 0, memory);
}
pub unsafe fn rebind_scene(&mut self, session: &Session, scene: &Buffer) {
session.update_buffer_descriptor(&mut self.reduce_ds, 2, scene);
session.update_buffer_descriptor(&mut self.path_ds, 2, scene);
}
}
pub struct PathEncoder<'a> { pub struct PathEncoder<'a> {
tag_stream: &'a mut Vec<u8>, tag_stream: &'a mut Vec<u8>,
// If we're never going to use the i16 encoding, it might be // If we're never going to use the i16 encoding, it might be

View file

@ -166,6 +166,18 @@ impl TransformStage {
} }
} }
impl TransformBinding {
pub unsafe fn rebind_memory(&mut self, session: &Session, memory: &Buffer) {
session.update_buffer_descriptor(&mut self.reduce_ds, 0, memory);
session.update_buffer_descriptor(&mut self.leaf_ds, 0, memory);
}
pub unsafe fn rebind_scene(&mut self, session: &Session, scene: &Buffer) {
session.update_buffer_descriptor(&mut self.reduce_ds, 2, scene);
session.update_buffer_descriptor(&mut self.leaf_ds, 2, scene);
}
}
impl Transform { impl Transform {
pub const IDENTITY: Transform = Transform { pub const IDENTITY: Transform = Transform {
mat: [1.0, 0.0, 0.0, 1.0], mat: [1.0, 0.0, 0.0, 1.0],