unify GPU memory management

Merge all static and dynamic buffers to just one, "memory". Add a malloc
function for dynamic allocations.

Unify static allocation offsets into a "config" buffer containing scene setup
(number of paths, number of path segments), as well as the memory offsets of
the static allocations.

Finally, set an overflow flag when an allocation fail, and make sure to exit
shader execution as soon as that triggers. Add checks before beginning
execution in case the client wants to run two or more shaders before checking
the flag.

The "state" buffer is left alone because it needs zero'ing and because it is
accessed with the "volatile" keyword.

Fixes #40

Signed-off-by: Elias Naur <mail@eliasnaur.com>
This commit is contained in:
Elias Naur 2020-12-11 18:30:20 +01:00
parent a2a2d12c5d
commit 4de67d9081
23 changed files with 463 additions and 448 deletions

View file

@ -31,17 +31,22 @@ pub fn gen_glsl(module: &LayoutModule) -> String {
for name in &module.def_names { for name in &module.def_names {
let def = module.defs.get(name).unwrap(); let def = module.defs.get(name).unwrap();
let mem = &"memory".to_owned();
let mut buf_name = &module.name;
if !module.name.eq(&"state") && !module.name.eq(&"scene") {
buf_name = mem;
}
match def { match def {
(_size, LayoutTypeDef::Struct(fields)) => { (_size, LayoutTypeDef::Struct(fields)) => {
gen_struct_read(&mut r, &module.name, &name, fields); gen_struct_read(&mut r, buf_name, &name, fields);
if module.gpu_write { if module.gpu_write {
gen_struct_write(&mut r, &module.name, &name, fields); gen_struct_write(&mut r, buf_name, &name, fields);
} }
} }
(_size, LayoutTypeDef::Enum(en)) => { (_size, LayoutTypeDef::Enum(en)) => {
gen_enum_read(&mut r, &module.name, &name, en); gen_enum_read(&mut r, buf_name, &name, en);
if module.gpu_write { if module.gpu_write {
gen_enum_write(&mut r, &module.name, &name, en); gen_enum_write(&mut r, buf_name, &name, en);
} }
} }
} }

View file

@ -64,11 +64,11 @@ AnnotatedRef Annotated_index(AnnotatedRef ref, uint index) {
AnnoFill AnnoFill_read(AnnoFillRef ref) { AnnoFill AnnoFill_read(AnnoFillRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = annotated[ix + 0]; uint raw0 = memory[ix + 0];
uint raw1 = annotated[ix + 1]; uint raw1 = memory[ix + 1];
uint raw2 = annotated[ix + 2]; uint raw2 = memory[ix + 2];
uint raw3 = annotated[ix + 3]; uint raw3 = memory[ix + 3];
uint raw4 = annotated[ix + 4]; uint raw4 = memory[ix + 4];
AnnoFill s; AnnoFill s;
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3)); s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.rgba_color = raw4; s.rgba_color = raw4;
@ -77,21 +77,21 @@ AnnoFill AnnoFill_read(AnnoFillRef ref) {
void AnnoFill_write(AnnoFillRef ref, AnnoFill s) { void AnnoFill_write(AnnoFillRef ref, AnnoFill s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
annotated[ix + 0] = floatBitsToUint(s.bbox.x); memory[ix + 0] = floatBitsToUint(s.bbox.x);
annotated[ix + 1] = floatBitsToUint(s.bbox.y); memory[ix + 1] = floatBitsToUint(s.bbox.y);
annotated[ix + 2] = floatBitsToUint(s.bbox.z); memory[ix + 2] = floatBitsToUint(s.bbox.z);
annotated[ix + 3] = floatBitsToUint(s.bbox.w); memory[ix + 3] = floatBitsToUint(s.bbox.w);
annotated[ix + 4] = s.rgba_color; memory[ix + 4] = s.rgba_color;
} }
AnnoStroke AnnoStroke_read(AnnoStrokeRef ref) { AnnoStroke AnnoStroke_read(AnnoStrokeRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = annotated[ix + 0]; uint raw0 = memory[ix + 0];
uint raw1 = annotated[ix + 1]; uint raw1 = memory[ix + 1];
uint raw2 = annotated[ix + 2]; uint raw2 = memory[ix + 2];
uint raw3 = annotated[ix + 3]; uint raw3 = memory[ix + 3];
uint raw4 = annotated[ix + 4]; uint raw4 = memory[ix + 4];
uint raw5 = annotated[ix + 5]; uint raw5 = memory[ix + 5];
AnnoStroke s; AnnoStroke s;
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3)); s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.rgba_color = raw4; s.rgba_color = raw4;
@ -101,20 +101,20 @@ AnnoStroke AnnoStroke_read(AnnoStrokeRef ref) {
void AnnoStroke_write(AnnoStrokeRef ref, AnnoStroke s) { void AnnoStroke_write(AnnoStrokeRef ref, AnnoStroke s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
annotated[ix + 0] = floatBitsToUint(s.bbox.x); memory[ix + 0] = floatBitsToUint(s.bbox.x);
annotated[ix + 1] = floatBitsToUint(s.bbox.y); memory[ix + 1] = floatBitsToUint(s.bbox.y);
annotated[ix + 2] = floatBitsToUint(s.bbox.z); memory[ix + 2] = floatBitsToUint(s.bbox.z);
annotated[ix + 3] = floatBitsToUint(s.bbox.w); memory[ix + 3] = floatBitsToUint(s.bbox.w);
annotated[ix + 4] = s.rgba_color; memory[ix + 4] = s.rgba_color;
annotated[ix + 5] = floatBitsToUint(s.linewidth); memory[ix + 5] = floatBitsToUint(s.linewidth);
} }
AnnoClip AnnoClip_read(AnnoClipRef ref) { AnnoClip AnnoClip_read(AnnoClipRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = annotated[ix + 0]; uint raw0 = memory[ix + 0];
uint raw1 = annotated[ix + 1]; uint raw1 = memory[ix + 1];
uint raw2 = annotated[ix + 2]; uint raw2 = memory[ix + 2];
uint raw3 = annotated[ix + 3]; uint raw3 = memory[ix + 3];
AnnoClip s; AnnoClip s;
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3)); s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
return s; return s;
@ -122,14 +122,14 @@ AnnoClip AnnoClip_read(AnnoClipRef ref) {
void AnnoClip_write(AnnoClipRef ref, AnnoClip s) { void AnnoClip_write(AnnoClipRef ref, AnnoClip s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
annotated[ix + 0] = floatBitsToUint(s.bbox.x); memory[ix + 0] = floatBitsToUint(s.bbox.x);
annotated[ix + 1] = floatBitsToUint(s.bbox.y); memory[ix + 1] = floatBitsToUint(s.bbox.y);
annotated[ix + 2] = floatBitsToUint(s.bbox.z); memory[ix + 2] = floatBitsToUint(s.bbox.z);
annotated[ix + 3] = floatBitsToUint(s.bbox.w); memory[ix + 3] = floatBitsToUint(s.bbox.w);
} }
uint Annotated_tag(AnnotatedRef ref) { uint Annotated_tag(AnnotatedRef ref) {
return annotated[ref.offset >> 2]; return memory[ref.offset >> 2];
} }
AnnoStroke Annotated_Stroke_read(AnnotatedRef ref) { AnnoStroke Annotated_Stroke_read(AnnotatedRef ref) {
@ -149,26 +149,26 @@ AnnoClip Annotated_EndClip_read(AnnotatedRef ref) {
} }
void Annotated_Nop_write(AnnotatedRef ref) { void Annotated_Nop_write(AnnotatedRef ref) {
annotated[ref.offset >> 2] = Annotated_Nop; memory[ref.offset >> 2] = Annotated_Nop;
} }
void Annotated_Stroke_write(AnnotatedRef ref, AnnoStroke s) { void Annotated_Stroke_write(AnnotatedRef ref, AnnoStroke s) {
annotated[ref.offset >> 2] = Annotated_Stroke; memory[ref.offset >> 2] = Annotated_Stroke;
AnnoStroke_write(AnnoStrokeRef(ref.offset + 4), s); AnnoStroke_write(AnnoStrokeRef(ref.offset + 4), s);
} }
void Annotated_Fill_write(AnnotatedRef ref, AnnoFill s) { void Annotated_Fill_write(AnnotatedRef ref, AnnoFill s) {
annotated[ref.offset >> 2] = Annotated_Fill; memory[ref.offset >> 2] = Annotated_Fill;
AnnoFill_write(AnnoFillRef(ref.offset + 4), s); AnnoFill_write(AnnoFillRef(ref.offset + 4), s);
} }
void Annotated_BeginClip_write(AnnotatedRef ref, AnnoClip s) { void Annotated_BeginClip_write(AnnotatedRef ref, AnnoClip s) {
annotated[ref.offset >> 2] = Annotated_BeginClip; memory[ref.offset >> 2] = Annotated_BeginClip;
AnnoClip_write(AnnoClipRef(ref.offset + 4), s); AnnoClip_write(AnnoClipRef(ref.offset + 4), s);
} }
void Annotated_EndClip_write(AnnotatedRef ref, AnnoClip s) { void Annotated_EndClip_write(AnnotatedRef ref, AnnoClip s) {
annotated[ref.offset >> 2] = Annotated_EndClip; memory[ref.offset >> 2] = Annotated_EndClip;
AnnoClip_write(AnnoClipRef(ref.offset + 4), s); AnnoClip_write(AnnoClipRef(ref.offset + 4), s);
} }

View file

@ -16,27 +16,15 @@
#extension GL_GOOGLE_include_directive : enable #extension GL_GOOGLE_include_directive : enable
#include "setup.h" #include "setup.h"
#include "mem.h"
#define LG_BACKDROP_WG (7 + LG_WG_FACTOR) #define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
#define BACKDROP_WG (1 << LG_BACKDROP_WG) #define BACKDROP_WG (1 << LG_BACKDROP_WG)
layout(local_size_x = BACKDROP_WG, local_size_y = 1) in; layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
layout(set = 0, binding = 0) buffer AnnotatedBuf { layout(set = 0, binding = 1) readonly buffer ConfigBuf {
uint[] annotated; Config conf;
};
// This is really only used for n_elements; maybe we can handle that
// a different way, but it's convenient to have the same signature as
// tile allocation.
layout(set = 0, binding = 1) readonly buffer AllocBuf {
uint n_elements; // paths
uint n_pathseg;
uint alloc;
};
layout(set = 0, binding = 2) buffer TileBuf {
uint[] tile;
}; };
#include "annotated.h" #include "annotated.h"
@ -47,18 +35,22 @@ shared uint sh_row_base[BACKDROP_WG];
shared uint sh_row_width[BACKDROP_WG]; shared uint sh_row_width[BACKDROP_WG];
void main() { void main() {
if (mem_overflow) {
return;
}
uint th_ix = gl_LocalInvocationID.x; uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size); AnnotatedRef ref = AnnotatedRef(conf.anno_base + element_ix * Annotated_size);
// Work assignment: 1 thread : 1 path element // Work assignment: 1 thread : 1 path element
uint row_count = 0; uint row_count = 0;
if (element_ix < n_elements) { if (element_ix < conf.n_elements) {
uint tag = Annotated_tag(ref); uint tag = Annotated_tag(ref);
switch (tag) { switch (tag) {
case Annotated_Fill: case Annotated_Fill:
case Annotated_BeginClip: case Annotated_BeginClip:
PathRef path_ref = PathRef(element_ix * Path_size); PathRef path_ref = PathRef(conf.tile_base + element_ix * Path_size);
Path path = Path_read(path_ref); Path path = Path_read(path_ref);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x; sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y; row_count = path.bbox.w - path.bbox.y;
@ -98,11 +90,11 @@ void main() {
// Process one row sequentially // Process one row sequentially
// Read backdrop value per tile and prefix sum it // Read backdrop value per tile and prefix sum it
uint tile_el_ix = sh_row_base[el_ix] + seq_ix * 2 * width; uint tile_el_ix = sh_row_base[el_ix] + seq_ix * 2 * width;
uint sum = tile[tile_el_ix]; uint sum = memory[tile_el_ix];
for (uint x = 1; x < width; x++) { for (uint x = 1; x < width; x++) {
tile_el_ix += 2; tile_el_ix += 2;
sum += tile[tile_el_ix]; sum += memory[tile_el_ix];
tile[tile_el_ix] = sum; memory[tile_el_ix] = sum;
} }
} }
} }

Binary file not shown.

View file

@ -10,20 +10,12 @@
#extension GL_GOOGLE_include_directive : enable #extension GL_GOOGLE_include_directive : enable
#include "setup.h" #include "setup.h"
#include "mem.h"
layout(local_size_x = N_TILE, local_size_y = 1) in; layout(local_size_x = N_TILE, local_size_y = 1) in;
layout(set = 0, binding = 0) buffer AnnotatedBuf { layout(set = 0, binding = 1) readonly buffer ConfigBuf {
uint[] annotated; Config conf;
};
layout(set = 0, binding = 1) buffer AllocBuf {
uint n_elements; // paths
uint alloc;
};
layout(set = 0, binding = 2) buffer BinsBuf {
uint[] bins;
}; };
#include "annotated.h" #include "annotated.h"
@ -41,19 +33,27 @@ layout(set = 0, binding = 2) buffer BinsBuf {
shared uint bitmaps[N_SLICE][N_TILE]; shared uint bitmaps[N_SLICE][N_TILE];
shared uint count[N_SLICE][N_TILE]; shared uint count[N_SLICE][N_TILE];
shared uint sh_chunk_start[N_TILE]; shared uint sh_chunk_start[N_TILE];
shared bool sh_alloc_failed;
void main() { void main() {
uint my_n_elements = n_elements; if (mem_overflow) {
return;
}
uint my_n_elements = conf.n_elements;
uint my_partition = gl_WorkGroupID.x; uint my_partition = gl_WorkGroupID.x;
for (uint i = 0; i < N_SLICE; i++) { for (uint i = 0; i < N_SLICE; i++) {
bitmaps[i][gl_LocalInvocationID.x] = 0; bitmaps[i][gl_LocalInvocationID.x] = 0;
} }
if (gl_LocalInvocationID.x == 0) {
sh_alloc_failed = false;
}
barrier(); barrier();
// Read inputs and determine coverage of bins // Read inputs and determine coverage of bins
uint element_ix = my_partition * N_TILE + gl_LocalInvocationID.x; uint element_ix = my_partition * N_TILE + gl_LocalInvocationID.x;
AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size); AnnotatedRef ref = AnnotatedRef(conf.anno_base + element_ix * Annotated_size);
uint tag = Annotated_Nop; uint tag = Annotated_Nop;
if (element_ix < my_n_elements) { if (element_ix < my_n_elements) {
tag = Annotated_tag(ref); tag = Annotated_tag(ref);
@ -103,19 +103,26 @@ void main() {
count[i][gl_LocalInvocationID.x] = element_count; count[i][gl_LocalInvocationID.x] = element_count;
} }
// element_count is number of elements covering bin for this invocation. // element_count is number of elements covering bin for this invocation.
uint chunk_start = 0; Alloc chunk_alloc = Alloc(0, false);
if (element_count != 0) { if (element_count != 0) {
// TODO: aggregate atomic adds (subgroup is probably fastest) // TODO: aggregate atomic adds (subgroup is probably fastest)
chunk_start = atomicAdd(alloc, element_count * BinInstance_size); chunk_alloc = malloc(element_count * BinInstance_size);
sh_chunk_start[gl_LocalInvocationID.x] = chunk_start; sh_chunk_start[gl_LocalInvocationID.x] = chunk_alloc.offset;
if (chunk_alloc.failed) {
sh_alloc_failed = true;
}
} }
// Note: it might be more efficient for reading to do this in the // Note: it might be more efficient for reading to do this in the
// other order (each bin is a contiguous sequence of partitions) // other order (each bin is a contiguous sequence of partitions)
uint out_ix = (my_partition * N_TILE + gl_LocalInvocationID.x) * 2; uint out_ix = (conf.bin_base >> 2) + (my_partition * N_TILE + gl_LocalInvocationID.x) * 2;
bins[out_ix] = element_count; memory[out_ix] = element_count;
bins[out_ix + 1] = chunk_start; memory[out_ix + 1] = chunk_alloc.offset;
barrier(); barrier();
if (sh_alloc_failed) {
return;
}
// Use similar strategy as Laine & Karras paper; loop over bbox of bins // Use similar strategy as Laine & Karras paper; loop over bbox of bins
// touched by this element // touched by this element
x = x0; x = x0;

Binary file not shown.

View file

@ -18,7 +18,7 @@ BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) {
BinInstance BinInstance_read(BinInstanceRef ref) { BinInstance BinInstance_read(BinInstanceRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = bins[ix + 0]; uint raw0 = memory[ix + 0];
BinInstance s; BinInstance s;
s.element_ix = raw0; s.element_ix = raw0;
return s; return s;
@ -26,6 +26,6 @@ BinInstance BinInstance_read(BinInstanceRef ref) {
void BinInstance_write(BinInstanceRef ref, BinInstance s) { void BinInstance_write(BinInstanceRef ref, BinInstance s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
bins[ix + 0] = s.element_ix; memory[ix + 0] = s.element_ix;
} }

View file

@ -14,28 +14,12 @@
#extension GL_GOOGLE_include_directive : enable #extension GL_GOOGLE_include_directive : enable
#include "setup.h" #include "setup.h"
#include "mem.h"
layout(local_size_x = N_TILE, local_size_y = 1) in; layout(local_size_x = N_TILE, local_size_y = 1) in;
layout(set = 0, binding = 0) buffer AnnotatedBuf { layout(set = 0, binding = 1) readonly buffer ConfigBuf {
uint[] annotated; Config conf;
};
layout(set = 0, binding = 1) buffer BinsBuf {
uint[] bins;
};
layout(set = 0, binding = 2) buffer TileBuf {
uint[] tile;
};
layout(set = 0, binding = 3) buffer AllocBuf {
uint n_elements;
uint alloc;
};
layout(set = 0, binding = 4) buffer PtclBuf {
uint[] ptcl;
}; };
#include "annotated.h" #include "annotated.h"
@ -65,22 +49,31 @@ shared uint sh_tile_base[N_TILE];
shared uint sh_tile_stride[N_TILE]; shared uint sh_tile_stride[N_TILE];
// Perhaps cmd_limit should be a global? This is a style question. // Perhaps cmd_limit should be a global? This is a style question.
void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) { bool alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) {
if (cmd_ref.offset > cmd_limit) { if (cmd_ref.offset < cmd_limit) {
uint new_cmd = atomicAdd(alloc, PTCL_INITIAL_ALLOC); return true;
CmdJump jump = CmdJump(new_cmd);
Cmd_Jump_write(cmd_ref, jump);
cmd_ref = CmdRef(new_cmd);
cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
} }
Alloc new_cmd = malloc(PTCL_INITIAL_ALLOC);
if (new_cmd.failed) {
return false;
}
CmdJump jump = CmdJump(new_cmd.offset);
Cmd_Jump_write(cmd_ref, jump);
cmd_ref = CmdRef(new_cmd.offset);
cmd_limit = new_cmd.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
return true;
} }
void main() { void main() {
if (mem_overflow) {
return;
}
// Could use either linear or 2d layouts for both dispatch and // Could use either linear or 2d layouts for both dispatch and
// invocations within the workgroup. We'll use variables to abstract. // invocations within the workgroup. We'll use variables to abstract.
uint bin_ix = N_TILE_X * gl_WorkGroupID.y + gl_WorkGroupID.x; uint bin_ix = N_TILE_X * gl_WorkGroupID.y + gl_WorkGroupID.x;
uint partition_ix = 0; uint partition_ix = 0;
uint n_partitions = (n_elements + N_TILE - 1) / N_TILE; uint n_partitions = (conf.n_elements + N_TILE - 1) / N_TILE;
uint th_ix = gl_LocalInvocationID.x; uint th_ix = gl_LocalInvocationID.x;
// Coordinates of top left of bin, in tiles. // Coordinates of top left of bin, in tiles.
@ -91,7 +84,7 @@ void main() {
uint tile_x = gl_LocalInvocationID.x % N_TILE_X; uint tile_x = gl_LocalInvocationID.x % N_TILE_X;
uint tile_y = gl_LocalInvocationID.x / N_TILE_X; uint tile_y = gl_LocalInvocationID.x / N_TILE_X;
uint this_tile_ix = (bin_tile_y + tile_y) * WIDTH_IN_TILES + bin_tile_x + tile_x; uint this_tile_ix = (bin_tile_y + tile_y) * WIDTH_IN_TILES + bin_tile_x + tile_x;
CmdRef cmd_ref = CmdRef(this_tile_ix * PTCL_INITIAL_ALLOC); CmdRef cmd_ref = CmdRef(conf.ptcl_base + this_tile_ix * PTCL_INITIAL_ALLOC);
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size; uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
// The nesting depth of the clip stack // The nesting depth of the clip stack
uint clip_depth = 0; uint clip_depth = 0;
@ -123,9 +116,9 @@ void main() {
part_start_ix = ready_ix; part_start_ix = ready_ix;
uint count = 0; uint count = 0;
if (th_ix < N_PART_READ && partition_ix + th_ix < n_partitions) { if (th_ix < N_PART_READ && partition_ix + th_ix < n_partitions) {
uint in_ix = ((partition_ix + th_ix) * N_TILE + bin_ix) * 2; uint in_ix = (conf.bin_base >> 2) + ((partition_ix + th_ix) * N_TILE + bin_ix) * 2;
count = bins[in_ix]; count = memory[in_ix];
sh_part_elements[th_ix] = bins[in_ix + 1]; sh_part_elements[th_ix] = memory[in_ix + 1];
} }
// prefix sum of counts // prefix sum of counts
for (uint i = 0; i < LG_N_PART_READ; i++) { for (uint i = 0; i < LG_N_PART_READ; i++) {
@ -175,7 +168,7 @@ void main() {
AnnotatedRef ref; AnnotatedRef ref;
if (th_ix + rd_ix < wr_ix) { if (th_ix + rd_ix < wr_ix) {
element_ix = sh_elements[th_ix]; element_ix = sh_elements[th_ix];
ref = AnnotatedRef(element_ix * Annotated_size); ref = AnnotatedRef(conf.anno_base + element_ix * Annotated_size);
tag = Annotated_tag(ref); tag = Annotated_tag(ref);
} }
@ -189,7 +182,7 @@ void main() {
// We have one "path" for each element, even if the element isn't // We have one "path" for each element, even if the element isn't
// actually a path (currently EndClip, but images etc in the future). // actually a path (currently EndClip, but images etc in the future).
uint path_ix = element_ix; uint path_ix = element_ix;
Path path = Path_read(PathRef(path_ix * Path_size)); Path path = Path_read(PathRef(conf.tile_base + path_ix * Path_size));
uint stride = path.bbox.z - path.bbox.x; uint stride = path.bbox.z - path.bbox.x;
sh_tile_stride[th_ix] = stride; sh_tile_stride[th_ix] = stride;
int dx = int(path.bbox.x) - int(bin_tile_x); int dx = int(path.bbox.x) - int(bin_tile_x);
@ -232,7 +225,7 @@ void main() {
el_ix = probe; el_ix = probe;
} }
} }
AnnotatedRef ref = AnnotatedRef(sh_elements[el_ix] * Annotated_size); AnnotatedRef ref = AnnotatedRef(conf.anno_base + sh_elements[el_ix] * Annotated_size);
uint tag = Annotated_tag(ref); uint tag = Annotated_tag(ref);
uint seq_ix = ix - (el_ix > 0 ? sh_tile_count[el_ix - 1] : 0); uint seq_ix = ix - (el_ix > 0 ? sh_tile_count[el_ix - 1] : 0);
uint width = sh_tile_width[el_ix]; uint width = sh_tile_width[el_ix];
@ -281,7 +274,7 @@ void main() {
// At this point, we read the element again from global memory. // At this point, we read the element again from global memory.
// If that turns out to be expensive, maybe we can pack it into // If that turns out to be expensive, maybe we can pack it into
// shared memory (or perhaps just the tag). // shared memory (or perhaps just the tag).
ref = AnnotatedRef(element_ix * Annotated_size); ref = AnnotatedRef(conf.anno_base + element_ix * Annotated_size);
tag = Annotated_tag(ref); tag = Annotated_tag(ref);
if (clip_zero_depth == 0) { if (clip_zero_depth == 0) {
@ -290,7 +283,9 @@ void main() {
Tile tile = Tile_read(TileRef(sh_tile_base[element_ref_ix] Tile tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoFill fill = Annotated_Fill_read(ref); AnnoFill fill = Annotated_Fill_read(ref);
alloc_cmd(cmd_ref, cmd_limit); if (!alloc_cmd(cmd_ref, cmd_limit)) {
break;
}
if (tile.tile.offset != 0) { if (tile.tile.offset != 0) {
CmdFill cmd_fill; CmdFill cmd_fill;
cmd_fill.tile_ref = tile.tile.offset; cmd_fill.tile_ref = tile.tile.offset;
@ -310,7 +305,9 @@ void main() {
} else if (tile.tile.offset == 0 && clip_depth < 32) { } else if (tile.tile.offset == 0 && clip_depth < 32) {
clip_one_mask |= (1 << clip_depth); clip_one_mask |= (1 << clip_depth);
} else { } else {
alloc_cmd(cmd_ref, cmd_limit); if (!alloc_cmd(cmd_ref, cmd_limit)) {
break;
}
if (tile.tile.offset != 0) { if (tile.tile.offset != 0) {
CmdBeginClip cmd_begin_clip; CmdBeginClip cmd_begin_clip;
cmd_begin_clip.tile_ref = tile.tile.offset; cmd_begin_clip.tile_ref = tile.tile.offset;
@ -331,7 +328,9 @@ void main() {
case Annotated_EndClip: case Annotated_EndClip:
clip_depth--; clip_depth--;
if (clip_depth >= 32 || (clip_one_mask & (1 << clip_depth)) == 0) { if (clip_depth >= 32 || (clip_one_mask & (1 << clip_depth)) == 0) {
alloc_cmd(cmd_ref, cmd_limit); if (!alloc_cmd(cmd_ref, cmd_limit)) {
break;
}
Cmd_EndClip_write(cmd_ref, CmdEndClip(1.0)); Cmd_EndClip_write(cmd_ref, CmdEndClip(1.0));
cmd_ref.offset += Cmd_size; cmd_ref.offset += Cmd_size;
} }
@ -344,7 +343,9 @@ void main() {
cmd_stroke.tile_ref = tile.tile.offset; cmd_stroke.tile_ref = tile.tile.offset;
cmd_stroke.half_width = 0.5 * stroke.linewidth; cmd_stroke.half_width = 0.5 * stroke.linewidth;
cmd_stroke.rgba_color = stroke.rgba_color; cmd_stroke.rgba_color = stroke.rgba_color;
alloc_cmd(cmd_ref, cmd_limit); if (!alloc_cmd(cmd_ref, cmd_limit)) {
break;
}
Cmd_Stroke_write(cmd_ref, cmd_stroke); Cmd_Stroke_write(cmd_ref, cmd_stroke);
cmd_ref.offset += Cmd_size; cmd_ref.offset += Cmd_size;
break; break;

Binary file not shown.

View file

@ -9,6 +9,9 @@
#version 450 #version 450
#extension GL_GOOGLE_include_directive : enable #extension GL_GOOGLE_include_directive : enable
#include "setup.h"
#include "mem.h"
#define N_ROWS 4 #define N_ROWS 4
#define WG_SIZE 32 #define WG_SIZE 32
#define LG_WG_SIZE 5 #define LG_WG_SIZE 5
@ -16,28 +19,22 @@
layout(local_size_x = WG_SIZE, local_size_y = 1) in; layout(local_size_x = WG_SIZE, local_size_y = 1) in;
layout(set = 0, binding = 0) readonly buffer SceneBuf { layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf;
};
layout(set = 0, binding = 2) readonly buffer SceneBuf {
uint[] scene; uint[] scene;
}; };
// It would be better to use the Vulkan memory model than // It would be better to use the Vulkan memory model than
// "volatile" but shooting for compatibility here rather // "volatile" but shooting for compatibility here rather
// than doing things right. // than doing things right.
layout(set = 0, binding = 1) volatile buffer StateBuf { layout(set = 0, binding = 3) volatile buffer StateBuf {
uint part_counter; uint part_counter;
uint[] state; uint[] state;
}; };
// The annotated results are stored here.
layout(set = 0, binding = 2) buffer AnnotatedBuf {
uint[] annotated;
};
// Path segments are stored here.
layout(set = 0, binding = 3) buffer PathSegBuf {
uint[] pathseg;
};
#include "scene.h" #include "scene.h"
#include "state.h" #include "state.h"
#include "annotated.h" #include "annotated.h"
@ -175,6 +172,10 @@ shared uint sh_part_ix;
shared State sh_prefix; shared State sh_prefix;
void main() { void main() {
if (mem_overflow) {
return;
}
State th_state[N_ROWS]; State th_state[N_ROWS];
// Determine partition to process by atomic counter (described in Section // Determine partition to process by atomic counter (described in Section
// 4.4 of prefix sum paper). // 4.4 of prefix sum paper).
@ -341,9 +342,9 @@ void main() {
} }
// We do encoding a bit by hand to minimize divergence. Another approach // We do encoding a bit by hand to minimize divergence. Another approach
// would be to have a fill/stroke bool. // would be to have a fill/stroke bool.
PathSegRef path_out_ref = PathSegRef((st.pathseg_count - 1) * PathSeg_size); PathSegRef path_out_ref = PathSegRef(conf.pathseg_base + (st.pathseg_count - 1) * PathSeg_size);
uint out_tag = tag == Element_FillLine ? PathSeg_FillCubic : PathSeg_StrokeCubic; uint out_tag = tag == Element_FillLine ? PathSeg_FillCubic : PathSeg_StrokeCubic;
pathseg[path_out_ref.offset >> 2] = out_tag; memory[path_out_ref.offset >> 2] = out_tag;
PathStrokeCubic_write(PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic); PathStrokeCubic_write(PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic);
break; break;
case Element_FillQuad: case Element_FillQuad:
@ -365,9 +366,9 @@ void main() {
} }
// We do encoding a bit by hand to minimize divergence. Another approach // We do encoding a bit by hand to minimize divergence. Another approach
// would be to have a fill/stroke bool. // would be to have a fill/stroke bool.
path_out_ref = PathSegRef((st.pathseg_count - 1) * PathSeg_size); path_out_ref = PathSegRef(conf.pathseg_base + (st.pathseg_count - 1) * PathSeg_size);
out_tag = tag == Element_FillQuad ? PathSeg_FillCubic : PathSeg_StrokeCubic; out_tag = tag == Element_FillQuad ? PathSeg_FillCubic : PathSeg_StrokeCubic;
pathseg[path_out_ref.offset >> 2] = out_tag; memory[path_out_ref.offset >> 2] = out_tag;
PathStrokeCubic_write(PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic); PathStrokeCubic_write(PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic);
break; break;
case Element_FillCubic: case Element_FillCubic:
@ -386,9 +387,9 @@ void main() {
} }
// We do encoding a bit by hand to minimize divergence. Another approach // We do encoding a bit by hand to minimize divergence. Another approach
// would be to have a fill/stroke bool. // would be to have a fill/stroke bool.
path_out_ref = PathSegRef((st.pathseg_count - 1) * PathSeg_size); path_out_ref = PathSegRef(conf.pathseg_base + (st.pathseg_count - 1) * PathSeg_size);
out_tag = tag == Element_FillCubic ? PathSeg_FillCubic : PathSeg_StrokeCubic; out_tag = tag == Element_FillCubic ? PathSeg_FillCubic : PathSeg_StrokeCubic;
pathseg[path_out_ref.offset >> 2] = out_tag; memory[path_out_ref.offset >> 2] = out_tag;
PathStrokeCubic_write(PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic); PathStrokeCubic_write(PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic);
break; break;
case Element_Stroke: case Element_Stroke:
@ -398,7 +399,7 @@ void main() {
vec2 lw = get_linewidth(st); vec2 lw = get_linewidth(st);
anno_stroke.bbox = st.bbox + vec4(-lw, lw); anno_stroke.bbox = st.bbox + vec4(-lw, lw);
anno_stroke.linewidth = st.linewidth * sqrt(abs(st.mat.x * st.mat.w - st.mat.y * st.mat.z)); anno_stroke.linewidth = st.linewidth * sqrt(abs(st.mat.x * st.mat.w - st.mat.y * st.mat.z));
AnnotatedRef out_ref = AnnotatedRef((st.path_count - 1) * Annotated_size); AnnotatedRef out_ref = AnnotatedRef(conf.anno_base + (st.path_count - 1) * Annotated_size);
Annotated_Stroke_write(out_ref, anno_stroke); Annotated_Stroke_write(out_ref, anno_stroke);
break; break;
case Element_Fill: case Element_Fill:
@ -406,7 +407,7 @@ void main() {
AnnoFill anno_fill; AnnoFill anno_fill;
anno_fill.rgba_color = fill.rgba_color; anno_fill.rgba_color = fill.rgba_color;
anno_fill.bbox = st.bbox; anno_fill.bbox = st.bbox;
out_ref = AnnotatedRef((st.path_count - 1) * Annotated_size); out_ref = AnnotatedRef(conf.anno_base + (st.path_count - 1) * Annotated_size);
Annotated_Fill_write(out_ref, anno_fill); Annotated_Fill_write(out_ref, anno_fill);
break; break;
case Element_BeginClip: case Element_BeginClip:
@ -414,14 +415,14 @@ void main() {
AnnoClip anno_begin_clip = AnnoClip(begin_clip.bbox); AnnoClip anno_begin_clip = AnnoClip(begin_clip.bbox);
// This is the absolute bbox, it's been transformed during encoding. // This is the absolute bbox, it's been transformed during encoding.
anno_begin_clip.bbox = begin_clip.bbox; anno_begin_clip.bbox = begin_clip.bbox;
out_ref = AnnotatedRef((st.path_count - 1) * Annotated_size); out_ref = AnnotatedRef(conf.anno_base + (st.path_count - 1) * Annotated_size);
Annotated_BeginClip_write(out_ref, anno_begin_clip); Annotated_BeginClip_write(out_ref, anno_begin_clip);
break; break;
case Element_EndClip: case Element_EndClip:
Clip end_clip = Element_EndClip_read(this_ref); Clip end_clip = Element_EndClip_read(this_ref);
// This bbox is expected to be the same as the begin one. // This bbox is expected to be the same as the begin one.
AnnoClip anno_end_clip = AnnoClip(end_clip.bbox); AnnoClip anno_end_clip = AnnoClip(end_clip.bbox);
out_ref = AnnotatedRef((st.path_count - 1) * Annotated_size); out_ref = AnnotatedRef(conf.anno_base + (st.path_count - 1) * Annotated_size);
Annotated_EndClip_write(out_ref, anno_end_clip); Annotated_EndClip_write(out_ref, anno_end_clip);
break; break;
} }

Binary file not shown.

View file

@ -11,50 +11,42 @@
#extension GL_EXT_nonuniform_qualifier : enable #extension GL_EXT_nonuniform_qualifier : enable
#include "setup.h" #include "setup.h"
#include "mem.h"
#define CHUNK 8 #define CHUNK 8
#define CHUNK_DY (TILE_HEIGHT_PX / CHUNK) #define CHUNK_DY (TILE_HEIGHT_PX / CHUNK)
layout(local_size_x = TILE_WIDTH_PX, local_size_y = CHUNK_DY) in; layout(local_size_x = TILE_WIDTH_PX, local_size_y = CHUNK_DY) in;
// Same concern that this should be readonly as in kernel 3. layout(set = 0, binding = 1) readonly buffer ConfigBuf {
layout(set = 0, binding = 0) buffer PtclBuf { Config conf;
uint[] ptcl;
}; };
layout(set = 0, binding = 1) buffer TileBuf { layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image;
uint[] tile;
};
layout(set = 0, binding = 2) buffer ClipScratchBuf { layout(set = 0, binding = 3) uniform sampler2D textures[];
uint[] clip_scratch;
};
layout(rgba8, set = 0, binding = 3) uniform writeonly image2D image;
layout(set = 0, binding = 4) uniform sampler2D textures[];
#include "ptcl.h" #include "ptcl.h"
#include "tile.h" #include "tile.h"
#define BLEND_STACK_SIZE 4 #define BLEND_STACK_SIZE 4
// Layout of clip_scratch buffer: // Layout of a clip scratch frame:
// [0] is the alloc bump offset (in units of 32 bit words, initially 0)
// Starting at 1 is a sequence of frames.
// Each frame is WIDTH * HEIGHT 32-bit words, then a link reference. // Each frame is WIDTH * HEIGHT 32-bit words, then a link reference.
// Link offset and frame size in 32-bit words.
#define CLIP_LINK_OFFSET (TILE_WIDTH_PX * TILE_HEIGHT_PX) #define CLIP_LINK_OFFSET (TILE_WIDTH_PX * TILE_HEIGHT_PX)
#define CLIP_BUF_SIZE (CLIP_LINK_OFFSET + 1) #define CLIP_BUF_SIZE (CLIP_LINK_OFFSET + 1)
shared uint sh_clip_alloc; shared Alloc sh_clip_alloc;
// Allocate a scratch buffer for clipping. Unlike offsets in the rest of the code, // Allocate a scratch buffer for clipping.
// it counts 32-bit words. Alloc alloc_clip_buf(uint link) {
uint alloc_clip_buf(uint link) {
if (gl_LocalInvocationID.x == 0 && gl_LocalInvocationID.y == 0) { if (gl_LocalInvocationID.x == 0 && gl_LocalInvocationID.y == 0) {
uint alloc = atomicAdd(clip_scratch[0], CLIP_BUF_SIZE) + 1; Alloc alloc = malloc(CLIP_BUF_SIZE * 4);
if (!alloc.failed) {
memory[(alloc.offset >> 2) + CLIP_LINK_OFFSET] = link;
}
sh_clip_alloc = alloc; sh_clip_alloc = alloc;
clip_scratch[alloc + CLIP_LINK_OFFSET] = link;
} }
barrier(); barrier();
return sh_clip_alloc; return sh_clip_alloc;
@ -95,8 +87,12 @@ float[CHUNK] computeArea(vec2 xy, int backdrop, uint tile_ref) {
} }
void main() { void main() {
if (mem_overflow) {
return;
}
uint tile_ix = gl_WorkGroupID.y * WIDTH_IN_TILES + gl_WorkGroupID.x; uint tile_ix = gl_WorkGroupID.y * WIDTH_IN_TILES + gl_WorkGroupID.x;
CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC); CmdRef cmd_ref = CmdRef(conf.ptcl_base + tile_ix * PTCL_INITIAL_ALLOC);
uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y); uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
vec2 xy = vec2(xy_uint); vec2 xy = vec2(xy_uint);
@ -168,10 +164,14 @@ void main() {
uint blend_slot = blend_sp % BLEND_STACK_SIZE; uint blend_slot = blend_sp % BLEND_STACK_SIZE;
if (blend_sp == blend_spill + BLEND_STACK_SIZE) { if (blend_sp == blend_spill + BLEND_STACK_SIZE) {
// spill to scratch buffer // spill to scratch buffer
clip_tos = alloc_clip_buf(clip_tos); Alloc alloc = alloc_clip_buf(clip_tos);
uint base_ix = clip_tos + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y; if (alloc.failed) {
return;
}
clip_tos = alloc.offset;
uint base_ix = (clip_tos >> 2) + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
for (uint k = 0; k < CHUNK; k++) { for (uint k = 0; k < CHUNK; k++) {
clip_scratch[base_ix + k * TILE_WIDTH_PX * CHUNK_DY] = blend_stack[blend_slot][k]; memory[base_ix + k * TILE_WIDTH_PX * CHUNK_DY] = blend_stack[blend_slot][k];
} }
blend_spill++; blend_spill++;
} }
@ -194,11 +194,11 @@ void main() {
CmdEndClip end_clip = Cmd_EndClip_read(cmd_ref); CmdEndClip end_clip = Cmd_EndClip_read(cmd_ref);
blend_slot = (blend_sp - 1) % BLEND_STACK_SIZE; blend_slot = (blend_sp - 1) % BLEND_STACK_SIZE;
if (blend_sp == blend_spill) { if (blend_sp == blend_spill) {
uint base_ix = clip_tos + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y; uint base_ix = (clip_tos >> 2) + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
for (uint k = 0; k < CHUNK; k++) { for (uint k = 0; k < CHUNK; k++) {
blend_stack[blend_slot][k] = clip_scratch[base_ix + k * TILE_WIDTH_PX * CHUNK_DY]; blend_stack[blend_slot][k] = memory[base_ix + k * TILE_WIDTH_PX * CHUNK_DY];
} }
clip_tos = clip_scratch[clip_tos + CLIP_LINK_OFFSET]; clip_tos = memory[(clip_tos >> 2) + CLIP_LINK_OFFSET];
blend_spill--; blend_spill--;
} }
blend_sp--; blend_sp--;

Binary file not shown.

29
piet-gpu/shader/mem.h Normal file
View file

@ -0,0 +1,29 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
layout(set = 0, binding = 0) buffer Memory {
// offset into memory of the next allocation, initialized by the user.
uint mem_offset;
bool mem_overflow;
uint[] memory;
};
// Alloc represents a memory allocation.
struct Alloc {
// offset in bytes into memory.
uint offset;
// failed is true if the allocation overflowed memory.
bool failed;
};
// malloc allocates size bytes of memory.
Alloc malloc(uint size) {
Alloc a;
// Round up to nearest 32-bit word.
size = (size + 3) & ~3;
a.offset = atomicAdd(mem_offset, size);
a.failed = a.offset + size > memory.length() * 4;
if (a.failed) {
mem_overflow = true;
}
return a;
}

View file

@ -8,24 +8,15 @@
#extension GL_GOOGLE_include_directive : enable #extension GL_GOOGLE_include_directive : enable
#include "setup.h" #include "setup.h"
#include "mem.h"
#define LG_COARSE_WG 5 #define LG_COARSE_WG 5
#define COARSE_WG (1 << LG_COARSE_WG) #define COARSE_WG (1 << LG_COARSE_WG)
layout(local_size_x = COARSE_WG, local_size_y = 1) in; layout(local_size_x = COARSE_WG, local_size_y = 1) in;
layout(set = 0, binding = 0) buffer PathSegBuf { layout(set = 0, binding = 1) readonly buffer ConfigBuf {
uint[] pathseg; Config conf;
};
layout(set = 0, binding = 1) buffer AllocBuf {
uint n_paths;
uint n_pathseg;
uint alloc;
};
layout(set = 0, binding = 2) buffer TileBuf {
uint[] tile;
}; };
#include "pathseg.h" #include "pathseg.h"
@ -96,11 +87,15 @@ SubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol) {
} }
void main() { void main() {
if (mem_overflow) {
return;
}
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
PathSegRef ref = PathSegRef(element_ix * PathSeg_size); PathSegRef ref = PathSegRef(conf.pathseg_base + element_ix * PathSeg_size);
uint tag = PathSeg_Nop; uint tag = PathSeg_Nop;
if (element_ix < n_pathseg) { if (element_ix < conf.n_pathseg) {
tag = PathSeg_tag(ref); tag = PathSeg_tag(ref);
} }
switch (tag) { switch (tag) {
@ -128,7 +123,7 @@ void main() {
uint n = max(uint(ceil(val * 0.5 / sqrt(REM_ACCURACY))), 1); uint n = max(uint(ceil(val * 0.5 / sqrt(REM_ACCURACY))), 1);
uint path_ix = cubic.path_ix; uint path_ix = cubic.path_ix;
Path path = Path_read(PathRef(path_ix * Path_size)); Path path = Path_read(PathRef(conf.tile_base + path_ix * Path_size));
ivec4 bbox = ivec4(path.bbox); ivec4 bbox = ivec4(path.bbox);
vec2 p0 = cubic.p0; vec2 p0 = cubic.p0;
qp0 = cubic.p0; qp0 = cubic.p0;
@ -187,7 +182,12 @@ void main() {
// TODO: can be tighter, use c to bound width // TODO: can be tighter, use c to bound width
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0)); uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
// Consider using subgroups to aggregate atomic add. // Consider using subgroups to aggregate atomic add.
uint tile_offset = atomicAdd(alloc, n_tile_alloc * TileSeg_size); Alloc tile_alloc = malloc(n_tile_alloc * TileSeg_size);
if (tile_alloc.failed) {
return;
}
uint tile_offset = tile_alloc.offset;
TileSeg tile_seg; TileSeg tile_seg;
int xray = int(floor(p0.x*SX)); int xray = int(floor(p0.x*SX));
@ -204,7 +204,7 @@ void main() {
int backdrop = p1.y < p0.y ? 1 : -1; int backdrop = p1.y < p0.y ? 1 : -1;
TileRef tile_ref = Tile_index(path.tiles, uint(base + xbackdrop)); TileRef tile_ref = Tile_index(path.tiles, uint(base + xbackdrop));
uint tile_el = tile_ref.offset >> 2; uint tile_el = tile_ref.offset >> 2;
atomicAdd(tile[tile_el + 1], backdrop); atomicAdd(memory[tile_el + 1], backdrop);
} }
// next_xray is the xray for the next scanline; the line segment intersects // next_xray is the xray for the next scanline; the line segment intersects
@ -227,7 +227,7 @@ void main() {
float tile_x0 = float(x * TILE_WIDTH_PX); float tile_x0 = float(x * TILE_WIDTH_PX);
TileRef tile_ref = Tile_index(path.tiles, uint(base + x)); TileRef tile_ref = Tile_index(path.tiles, uint(base + x));
uint tile_el = tile_ref.offset >> 2; uint tile_el = tile_ref.offset >> 2;
uint old = atomicExchange(tile[tile_el], tile_offset); uint old = atomicExchange(memory[tile_el], tile_offset);
tile_seg.origin = p0; tile_seg.origin = p0;
tile_seg.vector = p1 - p0; tile_seg.vector = p1 - p0;
float y_edge = 0.0; float y_edge = 0.0;

Binary file not shown.

View file

@ -89,11 +89,11 @@ PathSegRef PathSeg_index(PathSegRef ref, uint index) {
PathFillLine PathFillLine_read(PathFillLineRef ref) { PathFillLine PathFillLine_read(PathFillLineRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = pathseg[ix + 0]; uint raw0 = memory[ix + 0];
uint raw1 = pathseg[ix + 1]; uint raw1 = memory[ix + 1];
uint raw2 = pathseg[ix + 2]; uint raw2 = memory[ix + 2];
uint raw3 = pathseg[ix + 3]; uint raw3 = memory[ix + 3];
uint raw4 = pathseg[ix + 4]; uint raw4 = memory[ix + 4];
PathFillLine s; PathFillLine s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
@ -103,22 +103,22 @@ PathFillLine PathFillLine_read(PathFillLineRef ref) {
void PathFillLine_write(PathFillLineRef ref, PathFillLine s) { void PathFillLine_write(PathFillLineRef ref, PathFillLine s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
pathseg[ix + 0] = floatBitsToUint(s.p0.x); memory[ix + 0] = floatBitsToUint(s.p0.x);
pathseg[ix + 1] = floatBitsToUint(s.p0.y); memory[ix + 1] = floatBitsToUint(s.p0.y);
pathseg[ix + 2] = floatBitsToUint(s.p1.x); memory[ix + 2] = floatBitsToUint(s.p1.x);
pathseg[ix + 3] = floatBitsToUint(s.p1.y); memory[ix + 3] = floatBitsToUint(s.p1.y);
pathseg[ix + 4] = s.path_ix; memory[ix + 4] = s.path_ix;
} }
PathStrokeLine PathStrokeLine_read(PathStrokeLineRef ref) { PathStrokeLine PathStrokeLine_read(PathStrokeLineRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = pathseg[ix + 0]; uint raw0 = memory[ix + 0];
uint raw1 = pathseg[ix + 1]; uint raw1 = memory[ix + 1];
uint raw2 = pathseg[ix + 2]; uint raw2 = memory[ix + 2];
uint raw3 = pathseg[ix + 3]; uint raw3 = memory[ix + 3];
uint raw4 = pathseg[ix + 4]; uint raw4 = memory[ix + 4];
uint raw5 = pathseg[ix + 5]; uint raw5 = memory[ix + 5];
uint raw6 = pathseg[ix + 6]; uint raw6 = memory[ix + 6];
PathStrokeLine s; PathStrokeLine s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
@ -129,26 +129,26 @@ PathStrokeLine PathStrokeLine_read(PathStrokeLineRef ref) {
void PathStrokeLine_write(PathStrokeLineRef ref, PathStrokeLine s) { void PathStrokeLine_write(PathStrokeLineRef ref, PathStrokeLine s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
pathseg[ix + 0] = floatBitsToUint(s.p0.x); memory[ix + 0] = floatBitsToUint(s.p0.x);
pathseg[ix + 1] = floatBitsToUint(s.p0.y); memory[ix + 1] = floatBitsToUint(s.p0.y);
pathseg[ix + 2] = floatBitsToUint(s.p1.x); memory[ix + 2] = floatBitsToUint(s.p1.x);
pathseg[ix + 3] = floatBitsToUint(s.p1.y); memory[ix + 3] = floatBitsToUint(s.p1.y);
pathseg[ix + 4] = s.path_ix; memory[ix + 4] = s.path_ix;
pathseg[ix + 5] = floatBitsToUint(s.stroke.x); memory[ix + 5] = floatBitsToUint(s.stroke.x);
pathseg[ix + 6] = floatBitsToUint(s.stroke.y); memory[ix + 6] = floatBitsToUint(s.stroke.y);
} }
PathFillCubic PathFillCubic_read(PathFillCubicRef ref) { PathFillCubic PathFillCubic_read(PathFillCubicRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = pathseg[ix + 0]; uint raw0 = memory[ix + 0];
uint raw1 = pathseg[ix + 1]; uint raw1 = memory[ix + 1];
uint raw2 = pathseg[ix + 2]; uint raw2 = memory[ix + 2];
uint raw3 = pathseg[ix + 3]; uint raw3 = memory[ix + 3];
uint raw4 = pathseg[ix + 4]; uint raw4 = memory[ix + 4];
uint raw5 = pathseg[ix + 5]; uint raw5 = memory[ix + 5];
uint raw6 = pathseg[ix + 6]; uint raw6 = memory[ix + 6];
uint raw7 = pathseg[ix + 7]; uint raw7 = memory[ix + 7];
uint raw8 = pathseg[ix + 8]; uint raw8 = memory[ix + 8];
PathFillCubic s; PathFillCubic s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
@ -160,30 +160,30 @@ PathFillCubic PathFillCubic_read(PathFillCubicRef ref) {
void PathFillCubic_write(PathFillCubicRef ref, PathFillCubic s) { void PathFillCubic_write(PathFillCubicRef ref, PathFillCubic s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
pathseg[ix + 0] = floatBitsToUint(s.p0.x); memory[ix + 0] = floatBitsToUint(s.p0.x);
pathseg[ix + 1] = floatBitsToUint(s.p0.y); memory[ix + 1] = floatBitsToUint(s.p0.y);
pathseg[ix + 2] = floatBitsToUint(s.p1.x); memory[ix + 2] = floatBitsToUint(s.p1.x);
pathseg[ix + 3] = floatBitsToUint(s.p1.y); memory[ix + 3] = floatBitsToUint(s.p1.y);
pathseg[ix + 4] = floatBitsToUint(s.p2.x); memory[ix + 4] = floatBitsToUint(s.p2.x);
pathseg[ix + 5] = floatBitsToUint(s.p2.y); memory[ix + 5] = floatBitsToUint(s.p2.y);
pathseg[ix + 6] = floatBitsToUint(s.p3.x); memory[ix + 6] = floatBitsToUint(s.p3.x);
pathseg[ix + 7] = floatBitsToUint(s.p3.y); memory[ix + 7] = floatBitsToUint(s.p3.y);
pathseg[ix + 8] = s.path_ix; memory[ix + 8] = s.path_ix;
} }
PathStrokeCubic PathStrokeCubic_read(PathStrokeCubicRef ref) { PathStrokeCubic PathStrokeCubic_read(PathStrokeCubicRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = pathseg[ix + 0]; uint raw0 = memory[ix + 0];
uint raw1 = pathseg[ix + 1]; uint raw1 = memory[ix + 1];
uint raw2 = pathseg[ix + 2]; uint raw2 = memory[ix + 2];
uint raw3 = pathseg[ix + 3]; uint raw3 = memory[ix + 3];
uint raw4 = pathseg[ix + 4]; uint raw4 = memory[ix + 4];
uint raw5 = pathseg[ix + 5]; uint raw5 = memory[ix + 5];
uint raw6 = pathseg[ix + 6]; uint raw6 = memory[ix + 6];
uint raw7 = pathseg[ix + 7]; uint raw7 = memory[ix + 7];
uint raw8 = pathseg[ix + 8]; uint raw8 = memory[ix + 8];
uint raw9 = pathseg[ix + 9]; uint raw9 = memory[ix + 9];
uint raw10 = pathseg[ix + 10]; uint raw10 = memory[ix + 10];
PathStrokeCubic s; PathStrokeCubic s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
@ -196,21 +196,21 @@ PathStrokeCubic PathStrokeCubic_read(PathStrokeCubicRef ref) {
void PathStrokeCubic_write(PathStrokeCubicRef ref, PathStrokeCubic s) { void PathStrokeCubic_write(PathStrokeCubicRef ref, PathStrokeCubic s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
pathseg[ix + 0] = floatBitsToUint(s.p0.x); memory[ix + 0] = floatBitsToUint(s.p0.x);
pathseg[ix + 1] = floatBitsToUint(s.p0.y); memory[ix + 1] = floatBitsToUint(s.p0.y);
pathseg[ix + 2] = floatBitsToUint(s.p1.x); memory[ix + 2] = floatBitsToUint(s.p1.x);
pathseg[ix + 3] = floatBitsToUint(s.p1.y); memory[ix + 3] = floatBitsToUint(s.p1.y);
pathseg[ix + 4] = floatBitsToUint(s.p2.x); memory[ix + 4] = floatBitsToUint(s.p2.x);
pathseg[ix + 5] = floatBitsToUint(s.p2.y); memory[ix + 5] = floatBitsToUint(s.p2.y);
pathseg[ix + 6] = floatBitsToUint(s.p3.x); memory[ix + 6] = floatBitsToUint(s.p3.x);
pathseg[ix + 7] = floatBitsToUint(s.p3.y); memory[ix + 7] = floatBitsToUint(s.p3.y);
pathseg[ix + 8] = s.path_ix; memory[ix + 8] = s.path_ix;
pathseg[ix + 9] = floatBitsToUint(s.stroke.x); memory[ix + 9] = floatBitsToUint(s.stroke.x);
pathseg[ix + 10] = floatBitsToUint(s.stroke.y); memory[ix + 10] = floatBitsToUint(s.stroke.y);
} }
uint PathSeg_tag(PathSegRef ref) { uint PathSeg_tag(PathSegRef ref) {
return pathseg[ref.offset >> 2]; return memory[ref.offset >> 2];
} }
PathFillLine PathSeg_FillLine_read(PathSegRef ref) { PathFillLine PathSeg_FillLine_read(PathSegRef ref) {
@ -230,26 +230,26 @@ PathStrokeCubic PathSeg_StrokeCubic_read(PathSegRef ref) {
} }
void PathSeg_Nop_write(PathSegRef ref) { void PathSeg_Nop_write(PathSegRef ref) {
pathseg[ref.offset >> 2] = PathSeg_Nop; memory[ref.offset >> 2] = PathSeg_Nop;
} }
void PathSeg_FillLine_write(PathSegRef ref, PathFillLine s) { void PathSeg_FillLine_write(PathSegRef ref, PathFillLine s) {
pathseg[ref.offset >> 2] = PathSeg_FillLine; memory[ref.offset >> 2] = PathSeg_FillLine;
PathFillLine_write(PathFillLineRef(ref.offset + 4), s); PathFillLine_write(PathFillLineRef(ref.offset + 4), s);
} }
void PathSeg_StrokeLine_write(PathSegRef ref, PathStrokeLine s) { void PathSeg_StrokeLine_write(PathSegRef ref, PathStrokeLine s) {
pathseg[ref.offset >> 2] = PathSeg_StrokeLine; memory[ref.offset >> 2] = PathSeg_StrokeLine;
PathStrokeLine_write(PathStrokeLineRef(ref.offset + 4), s); PathStrokeLine_write(PathStrokeLineRef(ref.offset + 4), s);
} }
void PathSeg_FillCubic_write(PathSegRef ref, PathFillCubic s) { void PathSeg_FillCubic_write(PathSegRef ref, PathFillCubic s) {
pathseg[ref.offset >> 2] = PathSeg_FillCubic; memory[ref.offset >> 2] = PathSeg_FillCubic;
PathFillCubic_write(PathFillCubicRef(ref.offset + 4), s); PathFillCubic_write(PathFillCubicRef(ref.offset + 4), s);
} }
void PathSeg_StrokeCubic_write(PathSegRef ref, PathStrokeCubic s) { void PathSeg_StrokeCubic_write(PathSegRef ref, PathStrokeCubic s) {
pathseg[ref.offset >> 2] = PathSeg_StrokeCubic; memory[ref.offset >> 2] = PathSeg_StrokeCubic;
PathStrokeCubic_write(PathStrokeCubicRef(ref.offset + 4), s); PathStrokeCubic_write(PathStrokeCubicRef(ref.offset + 4), s);
} }

View file

@ -173,10 +173,10 @@ CmdRef Cmd_index(CmdRef ref, uint index) {
CmdCircle CmdCircle_read(CmdCircleRef ref) { CmdCircle CmdCircle_read(CmdCircleRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = ptcl[ix + 0]; uint raw0 = memory[ix + 0];
uint raw1 = ptcl[ix + 1]; uint raw1 = memory[ix + 1];
uint raw2 = ptcl[ix + 2]; uint raw2 = memory[ix + 2];
uint raw3 = ptcl[ix + 3]; uint raw3 = memory[ix + 3];
CmdCircle s; CmdCircle s;
s.center = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); s.center = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.radius = uintBitsToFloat(raw2); s.radius = uintBitsToFloat(raw2);
@ -186,18 +186,18 @@ CmdCircle CmdCircle_read(CmdCircleRef ref) {
void CmdCircle_write(CmdCircleRef ref, CmdCircle s) { void CmdCircle_write(CmdCircleRef ref, CmdCircle s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
ptcl[ix + 0] = floatBitsToUint(s.center.x); memory[ix + 0] = floatBitsToUint(s.center.x);
ptcl[ix + 1] = floatBitsToUint(s.center.y); memory[ix + 1] = floatBitsToUint(s.center.y);
ptcl[ix + 2] = floatBitsToUint(s.radius); memory[ix + 2] = floatBitsToUint(s.radius);
ptcl[ix + 3] = s.rgba_color; memory[ix + 3] = s.rgba_color;
} }
CmdLine CmdLine_read(CmdLineRef ref) { CmdLine CmdLine_read(CmdLineRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = ptcl[ix + 0]; uint raw0 = memory[ix + 0];
uint raw1 = ptcl[ix + 1]; uint raw1 = memory[ix + 1];
uint raw2 = ptcl[ix + 2]; uint raw2 = memory[ix + 2];
uint raw3 = ptcl[ix + 3]; uint raw3 = memory[ix + 3];
CmdLine s; CmdLine s;
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
@ -206,17 +206,17 @@ CmdLine CmdLine_read(CmdLineRef ref) {
void CmdLine_write(CmdLineRef ref, CmdLine s) { void CmdLine_write(CmdLineRef ref, CmdLine s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
ptcl[ix + 0] = floatBitsToUint(s.start.x); memory[ix + 0] = floatBitsToUint(s.start.x);
ptcl[ix + 1] = floatBitsToUint(s.start.y); memory[ix + 1] = floatBitsToUint(s.start.y);
ptcl[ix + 2] = floatBitsToUint(s.end.x); memory[ix + 2] = floatBitsToUint(s.end.x);
ptcl[ix + 3] = floatBitsToUint(s.end.y); memory[ix + 3] = floatBitsToUint(s.end.y);
} }
CmdStroke CmdStroke_read(CmdStrokeRef ref) { CmdStroke CmdStroke_read(CmdStrokeRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = ptcl[ix + 0]; uint raw0 = memory[ix + 0];
uint raw1 = ptcl[ix + 1]; uint raw1 = memory[ix + 1];
uint raw2 = ptcl[ix + 2]; uint raw2 = memory[ix + 2];
CmdStroke s; CmdStroke s;
s.tile_ref = raw0; s.tile_ref = raw0;
s.half_width = uintBitsToFloat(raw1); s.half_width = uintBitsToFloat(raw1);
@ -226,16 +226,16 @@ CmdStroke CmdStroke_read(CmdStrokeRef ref) {
void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) { void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
ptcl[ix + 0] = s.tile_ref; memory[ix + 0] = s.tile_ref;
ptcl[ix + 1] = floatBitsToUint(s.half_width); memory[ix + 1] = floatBitsToUint(s.half_width);
ptcl[ix + 2] = s.rgba_color; memory[ix + 2] = s.rgba_color;
} }
CmdFill CmdFill_read(CmdFillRef ref) { CmdFill CmdFill_read(CmdFillRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = ptcl[ix + 0]; uint raw0 = memory[ix + 0];
uint raw1 = ptcl[ix + 1]; uint raw1 = memory[ix + 1];
uint raw2 = ptcl[ix + 2]; uint raw2 = memory[ix + 2];
CmdFill s; CmdFill s;
s.tile_ref = raw0; s.tile_ref = raw0;
s.backdrop = int(raw1); s.backdrop = int(raw1);
@ -245,15 +245,15 @@ CmdFill CmdFill_read(CmdFillRef ref) {
void CmdFill_write(CmdFillRef ref, CmdFill s) { void CmdFill_write(CmdFillRef ref, CmdFill s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
ptcl[ix + 0] = s.tile_ref; memory[ix + 0] = s.tile_ref;
ptcl[ix + 1] = uint(s.backdrop); memory[ix + 1] = uint(s.backdrop);
ptcl[ix + 2] = s.rgba_color; memory[ix + 2] = s.rgba_color;
} }
CmdBeginClip CmdBeginClip_read(CmdBeginClipRef ref) { CmdBeginClip CmdBeginClip_read(CmdBeginClipRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = ptcl[ix + 0]; uint raw0 = memory[ix + 0];
uint raw1 = ptcl[ix + 1]; uint raw1 = memory[ix + 1];
CmdBeginClip s; CmdBeginClip s;
s.tile_ref = raw0; s.tile_ref = raw0;
s.backdrop = int(raw1); s.backdrop = int(raw1);
@ -262,13 +262,13 @@ CmdBeginClip CmdBeginClip_read(CmdBeginClipRef ref) {
void CmdBeginClip_write(CmdBeginClipRef ref, CmdBeginClip s) { void CmdBeginClip_write(CmdBeginClipRef ref, CmdBeginClip s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
ptcl[ix + 0] = s.tile_ref; memory[ix + 0] = s.tile_ref;
ptcl[ix + 1] = uint(s.backdrop); memory[ix + 1] = uint(s.backdrop);
} }
CmdBeginSolidClip CmdBeginSolidClip_read(CmdBeginSolidClipRef ref) { CmdBeginSolidClip CmdBeginSolidClip_read(CmdBeginSolidClipRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = ptcl[ix + 0]; uint raw0 = memory[ix + 0];
CmdBeginSolidClip s; CmdBeginSolidClip s;
s.alpha = uintBitsToFloat(raw0); s.alpha = uintBitsToFloat(raw0);
return s; return s;
@ -276,12 +276,12 @@ CmdBeginSolidClip CmdBeginSolidClip_read(CmdBeginSolidClipRef ref) {
void CmdBeginSolidClip_write(CmdBeginSolidClipRef ref, CmdBeginSolidClip s) { void CmdBeginSolidClip_write(CmdBeginSolidClipRef ref, CmdBeginSolidClip s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
ptcl[ix + 0] = floatBitsToUint(s.alpha); memory[ix + 0] = floatBitsToUint(s.alpha);
} }
CmdEndClip CmdEndClip_read(CmdEndClipRef ref) { CmdEndClip CmdEndClip_read(CmdEndClipRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = ptcl[ix + 0]; uint raw0 = memory[ix + 0];
CmdEndClip s; CmdEndClip s;
s.alpha = uintBitsToFloat(raw0); s.alpha = uintBitsToFloat(raw0);
return s; return s;
@ -289,12 +289,12 @@ CmdEndClip CmdEndClip_read(CmdEndClipRef ref) {
void CmdEndClip_write(CmdEndClipRef ref, CmdEndClip s) { void CmdEndClip_write(CmdEndClipRef ref, CmdEndClip s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
ptcl[ix + 0] = floatBitsToUint(s.alpha); memory[ix + 0] = floatBitsToUint(s.alpha);
} }
CmdSolid CmdSolid_read(CmdSolidRef ref) { CmdSolid CmdSolid_read(CmdSolidRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = ptcl[ix + 0]; uint raw0 = memory[ix + 0];
CmdSolid s; CmdSolid s;
s.rgba_color = raw0; s.rgba_color = raw0;
return s; return s;
@ -302,12 +302,12 @@ CmdSolid CmdSolid_read(CmdSolidRef ref) {
void CmdSolid_write(CmdSolidRef ref, CmdSolid s) { void CmdSolid_write(CmdSolidRef ref, CmdSolid s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
ptcl[ix + 0] = s.rgba_color; memory[ix + 0] = s.rgba_color;
} }
CmdSolidMask CmdSolidMask_read(CmdSolidMaskRef ref) { CmdSolidMask CmdSolidMask_read(CmdSolidMaskRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = ptcl[ix + 0]; uint raw0 = memory[ix + 0];
CmdSolidMask s; CmdSolidMask s;
s.mask = uintBitsToFloat(raw0); s.mask = uintBitsToFloat(raw0);
return s; return s;
@ -315,12 +315,12 @@ CmdSolidMask CmdSolidMask_read(CmdSolidMaskRef ref) {
void CmdSolidMask_write(CmdSolidMaskRef ref, CmdSolidMask s) { void CmdSolidMask_write(CmdSolidMaskRef ref, CmdSolidMask s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
ptcl[ix + 0] = floatBitsToUint(s.mask); memory[ix + 0] = floatBitsToUint(s.mask);
} }
CmdJump CmdJump_read(CmdJumpRef ref) { CmdJump CmdJump_read(CmdJumpRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = ptcl[ix + 0]; uint raw0 = memory[ix + 0];
CmdJump s; CmdJump s;
s.new_ref = raw0; s.new_ref = raw0;
return s; return s;
@ -328,11 +328,11 @@ CmdJump CmdJump_read(CmdJumpRef ref) {
void CmdJump_write(CmdJumpRef ref, CmdJump s) { void CmdJump_write(CmdJumpRef ref, CmdJump s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
ptcl[ix + 0] = s.new_ref; memory[ix + 0] = s.new_ref;
} }
uint Cmd_tag(CmdRef ref) { uint Cmd_tag(CmdRef ref) {
return ptcl[ref.offset >> 2]; return memory[ref.offset >> 2];
} }
CmdCircle Cmd_Circle_read(CmdRef ref) { CmdCircle Cmd_Circle_read(CmdRef ref) {
@ -376,56 +376,56 @@ CmdJump Cmd_Jump_read(CmdRef ref) {
} }
void Cmd_End_write(CmdRef ref) { void Cmd_End_write(CmdRef ref) {
ptcl[ref.offset >> 2] = Cmd_End; memory[ref.offset >> 2] = Cmd_End;
} }
void Cmd_Circle_write(CmdRef ref, CmdCircle s) { void Cmd_Circle_write(CmdRef ref, CmdCircle s) {
ptcl[ref.offset >> 2] = Cmd_Circle; memory[ref.offset >> 2] = Cmd_Circle;
CmdCircle_write(CmdCircleRef(ref.offset + 4), s); CmdCircle_write(CmdCircleRef(ref.offset + 4), s);
} }
void Cmd_Line_write(CmdRef ref, CmdLine s) { void Cmd_Line_write(CmdRef ref, CmdLine s) {
ptcl[ref.offset >> 2] = Cmd_Line; memory[ref.offset >> 2] = Cmd_Line;
CmdLine_write(CmdLineRef(ref.offset + 4), s); CmdLine_write(CmdLineRef(ref.offset + 4), s);
} }
void Cmd_Fill_write(CmdRef ref, CmdFill s) { void Cmd_Fill_write(CmdRef ref, CmdFill s) {
ptcl[ref.offset >> 2] = Cmd_Fill; memory[ref.offset >> 2] = Cmd_Fill;
CmdFill_write(CmdFillRef(ref.offset + 4), s); CmdFill_write(CmdFillRef(ref.offset + 4), s);
} }
void Cmd_BeginClip_write(CmdRef ref, CmdBeginClip s) { void Cmd_BeginClip_write(CmdRef ref, CmdBeginClip s) {
ptcl[ref.offset >> 2] = Cmd_BeginClip; memory[ref.offset >> 2] = Cmd_BeginClip;
CmdBeginClip_write(CmdBeginClipRef(ref.offset + 4), s); CmdBeginClip_write(CmdBeginClipRef(ref.offset + 4), s);
} }
void Cmd_BeginSolidClip_write(CmdRef ref, CmdBeginSolidClip s) { void Cmd_BeginSolidClip_write(CmdRef ref, CmdBeginSolidClip s) {
ptcl[ref.offset >> 2] = Cmd_BeginSolidClip; memory[ref.offset >> 2] = Cmd_BeginSolidClip;
CmdBeginSolidClip_write(CmdBeginSolidClipRef(ref.offset + 4), s); CmdBeginSolidClip_write(CmdBeginSolidClipRef(ref.offset + 4), s);
} }
void Cmd_EndClip_write(CmdRef ref, CmdEndClip s) { void Cmd_EndClip_write(CmdRef ref, CmdEndClip s) {
ptcl[ref.offset >> 2] = Cmd_EndClip; memory[ref.offset >> 2] = Cmd_EndClip;
CmdEndClip_write(CmdEndClipRef(ref.offset + 4), s); CmdEndClip_write(CmdEndClipRef(ref.offset + 4), s);
} }
void Cmd_Stroke_write(CmdRef ref, CmdStroke s) { void Cmd_Stroke_write(CmdRef ref, CmdStroke s) {
ptcl[ref.offset >> 2] = Cmd_Stroke; memory[ref.offset >> 2] = Cmd_Stroke;
CmdStroke_write(CmdStrokeRef(ref.offset + 4), s); CmdStroke_write(CmdStrokeRef(ref.offset + 4), s);
} }
void Cmd_Solid_write(CmdRef ref, CmdSolid s) { void Cmd_Solid_write(CmdRef ref, CmdSolid s) {
ptcl[ref.offset >> 2] = Cmd_Solid; memory[ref.offset >> 2] = Cmd_Solid;
CmdSolid_write(CmdSolidRef(ref.offset + 4), s); CmdSolid_write(CmdSolidRef(ref.offset + 4), s);
} }
void Cmd_SolidMask_write(CmdRef ref, CmdSolidMask s) { void Cmd_SolidMask_write(CmdRef ref, CmdSolidMask s) {
ptcl[ref.offset >> 2] = Cmd_SolidMask; memory[ref.offset >> 2] = Cmd_SolidMask;
CmdSolidMask_write(CmdSolidMaskRef(ref.offset + 4), s); CmdSolidMask_write(CmdSolidMaskRef(ref.offset + 4), s);
} }
void Cmd_Jump_write(CmdRef ref, CmdJump s) { void Cmd_Jump_write(CmdRef ref, CmdJump s) {
ptcl[ref.offset >> 2] = Cmd_Jump; memory[ref.offset >> 2] = Cmd_Jump;
CmdJump_write(CmdJumpRef(ref.offset + 4), s); CmdJump_write(CmdJumpRef(ref.offset + 4), s);
} }

View file

@ -28,3 +28,13 @@
#define N_TILE (N_TILE_X * N_TILE_Y) #define N_TILE (N_TILE_X * N_TILE_Y)
#define LG_N_TILE (7 + LG_WG_FACTOR) #define LG_N_TILE (7 + LG_WG_FACTOR)
#define N_SLICE (N_TILE / 32) #define N_SLICE (N_TILE / 32)
struct Config {
uint n_elements; // paths
uint n_pathseg;
uint tile_base;
uint bin_base;
uint ptcl_base;
uint pathseg_base;
uint anno_base;
};

View file

@ -51,9 +51,9 @@ TileSegRef TileSeg_index(TileSegRef ref, uint index) {
Path Path_read(PathRef ref) { Path Path_read(PathRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = tile[ix + 0]; uint raw0 = memory[ix + 0];
uint raw1 = tile[ix + 1]; uint raw1 = memory[ix + 1];
uint raw2 = tile[ix + 2]; uint raw2 = memory[ix + 2];
Path s; Path s;
s.bbox = uvec4(raw0 & 0xffff, raw0 >> 16, raw1 & 0xffff, raw1 >> 16); s.bbox = uvec4(raw0 & 0xffff, raw0 >> 16, raw1 & 0xffff, raw1 >> 16);
s.tiles = TileRef(raw2); s.tiles = TileRef(raw2);
@ -62,15 +62,15 @@ Path Path_read(PathRef ref) {
void Path_write(PathRef ref, Path s) { void Path_write(PathRef ref, Path s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
tile[ix + 0] = s.bbox.x | (s.bbox.y << 16); memory[ix + 0] = s.bbox.x | (s.bbox.y << 16);
tile[ix + 1] = s.bbox.z | (s.bbox.w << 16); memory[ix + 1] = s.bbox.z | (s.bbox.w << 16);
tile[ix + 2] = s.tiles.offset; memory[ix + 2] = s.tiles.offset;
} }
Tile Tile_read(TileRef ref) { Tile Tile_read(TileRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = tile[ix + 0]; uint raw0 = memory[ix + 0];
uint raw1 = tile[ix + 1]; uint raw1 = memory[ix + 1];
Tile s; Tile s;
s.tile = TileSegRef(raw0); s.tile = TileSegRef(raw0);
s.backdrop = int(raw1); s.backdrop = int(raw1);
@ -79,18 +79,18 @@ Tile Tile_read(TileRef ref) {
void Tile_write(TileRef ref, Tile s) { void Tile_write(TileRef ref, Tile s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
tile[ix + 0] = s.tile.offset; memory[ix + 0] = s.tile.offset;
tile[ix + 1] = uint(s.backdrop); memory[ix + 1] = uint(s.backdrop);
} }
TileSeg TileSeg_read(TileSegRef ref) { TileSeg TileSeg_read(TileSegRef ref) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
uint raw0 = tile[ix + 0]; uint raw0 = memory[ix + 0];
uint raw1 = tile[ix + 1]; uint raw1 = memory[ix + 1];
uint raw2 = tile[ix + 2]; uint raw2 = memory[ix + 2];
uint raw3 = tile[ix + 3]; uint raw3 = memory[ix + 3];
uint raw4 = tile[ix + 4]; uint raw4 = memory[ix + 4];
uint raw5 = tile[ix + 5]; uint raw5 = memory[ix + 5];
TileSeg s; TileSeg s;
s.origin = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); s.origin = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.vector = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); s.vector = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
@ -101,11 +101,11 @@ TileSeg TileSeg_read(TileSegRef ref) {
void TileSeg_write(TileSegRef ref, TileSeg s) { void TileSeg_write(TileSegRef ref, TileSeg s) {
uint ix = ref.offset >> 2; uint ix = ref.offset >> 2;
tile[ix + 0] = floatBitsToUint(s.origin.x); memory[ix + 0] = floatBitsToUint(s.origin.x);
tile[ix + 1] = floatBitsToUint(s.origin.y); memory[ix + 1] = floatBitsToUint(s.origin.y);
tile[ix + 2] = floatBitsToUint(s.vector.x); memory[ix + 2] = floatBitsToUint(s.vector.x);
tile[ix + 3] = floatBitsToUint(s.vector.y); memory[ix + 3] = floatBitsToUint(s.vector.y);
tile[ix + 4] = floatBitsToUint(s.y_edge); memory[ix + 4] = floatBitsToUint(s.y_edge);
tile[ix + 5] = s.next.offset; memory[ix + 5] = s.next.offset;
} }

View file

@ -6,24 +6,15 @@
#extension GL_GOOGLE_include_directive : enable #extension GL_GOOGLE_include_directive : enable
#include "setup.h" #include "setup.h"
#include "mem.h"
#define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR) #define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG) #define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in; layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
layout(set = 0, binding = 0) buffer AnnotatedBuf { layout(set = 0, binding = 1) readonly buffer ConfigBuf {
uint[] annotated; Config conf;
};
layout(set = 0, binding = 1) buffer AllocBuf {
uint n_elements;
uint n_pathseg;
uint alloc;
};
layout(set = 0, binding = 2) buffer TileBuf {
uint[] tile;
}; };
#include "annotated.h" #include "annotated.h"
@ -34,16 +25,20 @@ layout(set = 0, binding = 2) buffer TileBuf {
#define SY (1.0 / float(TILE_HEIGHT_PX)) #define SY (1.0 / float(TILE_HEIGHT_PX))
shared uint sh_tile_count[TILE_ALLOC_WG]; shared uint sh_tile_count[TILE_ALLOC_WG];
shared uint sh_tile_alloc; shared Alloc sh_tile_alloc;
void main() { void main() {
if (mem_overflow) {
return;
}
uint th_ix = gl_LocalInvocationID.x; uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x; uint element_ix = gl_GlobalInvocationID.x;
PathRef path_ref = PathRef(element_ix * Path_size); PathRef path_ref = PathRef(conf.tile_base + element_ix * Path_size);
AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size); AnnotatedRef ref = AnnotatedRef(conf.anno_base + element_ix * Annotated_size);
uint tag = Annotated_Nop; uint tag = Annotated_Nop;
if (element_ix < n_elements) { if (element_ix < conf.n_elements) {
tag = Annotated_tag(ref); tag = Annotated_tag(ref);
} }
int x0 = 0, y0 = 0, x1 = 0, y1 = 0; int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
@ -86,23 +81,26 @@ void main() {
sh_tile_count[th_ix] = tile_count; sh_tile_count[th_ix] = tile_count;
} }
if (th_ix == TILE_ALLOC_WG - 1) { if (th_ix == TILE_ALLOC_WG - 1) {
sh_tile_alloc = atomicAdd(alloc, tile_count * Tile_size); sh_tile_alloc = malloc(tile_count * Tile_size);
} }
barrier(); barrier();
uint alloc_start = sh_tile_alloc; Alloc alloc_start = sh_tile_alloc;
if (alloc_start.failed) {
return;
}
if (element_ix < n_elements) { if (element_ix < conf.n_elements) {
uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0; uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
path.tiles = TileRef(alloc_start + Tile_size * tile_subix); path.tiles = TileRef(alloc_start.offset + Tile_size * tile_subix);
Path_write(path_ref, path); Path_write(path_ref, path);
} }
// Zero out allocated tiles efficiently // Zero out allocated tiles efficiently
uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4); uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
uint start_ix = alloc_start >> 2; uint start_ix = alloc_start.offset >> 2;
for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) { for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
// Note: this interleaving is faster than using Tile_write // Note: this interleaving is faster than using Tile_write
// by a significant amount. // by a significant amount.
tile[start_ix + i] = 0; memory[start_ix + i] = 0;
} }
} }

Binary file not shown.

View file

@ -156,15 +156,16 @@ pub fn dump_k1_data(k1_buf: &[u32]) {
pub struct Renderer { pub struct Renderer {
pub image_dev: hub::Image, // resulting image pub image_dev: hub::Image, // resulting image
scene_buf: hub::Buffer, scene_buf_host: hub::Buffer,
scene_dev: hub::Buffer, scene_buf_dev: hub::Buffer,
pub state_buf: hub::Buffer, memory_buf_host: hub::Buffer,
pub anno_buf: hub::Buffer, memory_buf_dev: hub::Buffer,
pub pathseg_buf: hub::Buffer,
pub tile_buf: hub::Buffer, state_buf: hub::Buffer,
pub bin_buf: hub::Buffer,
pub ptcl_buf: hub::Buffer, config_buf_host: hub::Buffer,
config_buf_dev: hub::Buffer,
el_pipeline: hub::Pipeline, el_pipeline: hub::Pipeline,
el_ds: hub::DescriptorSet, el_ds: hub::DescriptorSet,
@ -178,23 +179,12 @@ pub struct Renderer {
backdrop_pipeline: hub::Pipeline, backdrop_pipeline: hub::Pipeline,
backdrop_ds: hub::DescriptorSet, backdrop_ds: hub::DescriptorSet,
tile_alloc_buf_host: hub::Buffer,
tile_alloc_buf_dev: hub::Buffer,
bin_pipeline: hub::Pipeline, bin_pipeline: hub::Pipeline,
bin_ds: hub::DescriptorSet, bin_ds: hub::DescriptorSet,
bin_alloc_buf_host: hub::Buffer,
bin_alloc_buf_dev: hub::Buffer,
coarse_pipeline: hub::Pipeline, coarse_pipeline: hub::Pipeline,
coarse_ds: hub::DescriptorSet, coarse_ds: hub::DescriptorSet,
coarse_alloc_buf_host: hub::Buffer,
coarse_alloc_buf_dev: hub::Buffer,
clip_scratch_buf: hub::Buffer,
k4_pipeline: hub::Pipeline, k4_pipeline: hub::Pipeline,
k4_ds: hub::DescriptorSet, k4_ds: hub::DescriptorSet,
@ -221,88 +211,83 @@ impl Renderer {
n_elements, n_paths, n_pathseg n_elements, n_paths, n_pathseg
); );
let mut scene_buf = session let mut scene_buf_host = session
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, host) .create_buffer(std::mem::size_of_val(&scene[..]) as u64, host)
.unwrap(); .unwrap();
let scene_dev = session let scene_buf_dev = session
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev) .create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev)
.unwrap(); .unwrap();
scene_buf.write(&scene)?; scene_buf_host.write(&scene)?;
let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?; let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?;
let anno_buf = session.create_buffer(64 * 1024 * 1024, dev)?;
let pathseg_buf = session.create_buffer(64 * 1024 * 1024, dev)?;
let tile_buf = session.create_buffer(64 * 1024 * 1024, dev)?;
let bin_buf = session.create_buffer(64 * 1024 * 1024, dev)?;
let ptcl_buf = session.create_buffer(48 * 1024 * 1024, dev)?;
let image_dev = session.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?; let image_dev = session.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?;
let mut config_buf_host = session.create_buffer(7*4, host)?;
let config_buf_dev = session.create_buffer(7*4, dev)?;
// TODO: constants
const PATH_SIZE: usize = 12;
const BIN_SIZE: usize = 8;
const PATHSEG_SIZE: usize = 48;
const ANNO_SIZE: usize = 28;
let mut alloc = 0;
let tile_base = alloc;
alloc += ((n_paths + 3) & !3) * PATH_SIZE;
let bin_base = alloc;
alloc += ((n_paths + 255) & !255) * BIN_SIZE;
let ptcl_base = alloc;
alloc += WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
let pathseg_base = alloc;
alloc += (n_pathseg * PATHSEG_SIZE + 3) & !3;
let anno_base = alloc;
alloc += (n_paths * ANNO_SIZE + 3) & !3;
config_buf_host.write(&[n_paths as u32, n_pathseg as u32, tile_base as u32, bin_base as u32, ptcl_base as u32, pathseg_base as u32, anno_base as u32])?;
let mut memory_buf_host = session.create_buffer(2*4, host)?;
let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?;
memory_buf_host.write(&[alloc as u32, 0 /* Overflow flag */])?;
let el_code = include_bytes!("../shader/elements.spv"); let el_code = include_bytes!("../shader/elements.spv");
let el_pipeline = session.create_simple_compute_pipeline(el_code, 4)?; let el_pipeline = session.create_simple_compute_pipeline(el_code, 4)?;
let el_ds = session.create_simple_descriptor_set( let el_ds = session.create_simple_descriptor_set(
&el_pipeline, &el_pipeline,
&[&scene_dev, &state_buf, &anno_buf, &pathseg_buf], &[&memory_buf_dev, &config_buf_dev, &scene_buf_dev, &state_buf],
)?; )?;
let mut tile_alloc_buf_host = session.create_buffer(12, host)?;
let tile_alloc_buf_dev = session.create_buffer(12, dev)?;
// TODO: constants
const PATH_SIZE: usize = 12;
let tile_alloc_start = ((n_paths + 31) & !31) * PATH_SIZE;
tile_alloc_buf_host.write(&[n_paths as u32, n_pathseg as u32, tile_alloc_start as u32])?;
let tile_alloc_code = include_bytes!("../shader/tile_alloc.spv"); let tile_alloc_code = include_bytes!("../shader/tile_alloc.spv");
let tile_pipeline = session.create_simple_compute_pipeline(tile_alloc_code, 3)?; let tile_pipeline = session.create_simple_compute_pipeline(tile_alloc_code, 2)?;
let tile_ds = session.create_simple_descriptor_set( let tile_ds = session.create_simple_descriptor_set(
&tile_pipeline, &tile_pipeline,
&[&anno_buf, &tile_alloc_buf_dev, &tile_buf], &[&memory_buf_dev, &config_buf_dev],
)?; )?;
let path_alloc_code = include_bytes!("../shader/path_coarse.spv"); let path_alloc_code = include_bytes!("../shader/path_coarse.spv");
let path_pipeline = session.create_simple_compute_pipeline(path_alloc_code, 3)?; let path_pipeline = session.create_simple_compute_pipeline(path_alloc_code, 2)?;
let path_ds = session.create_simple_descriptor_set( let path_ds = session.create_simple_descriptor_set(
&path_pipeline, &path_pipeline,
&[&pathseg_buf, &tile_alloc_buf_dev, &tile_buf], &[&memory_buf_dev, &config_buf_dev],
)?; )?;
let backdrop_alloc_code = include_bytes!("../shader/backdrop.spv"); let backdrop_alloc_code = include_bytes!("../shader/backdrop.spv");
let backdrop_pipeline = session.create_simple_compute_pipeline(backdrop_alloc_code, 3)?; let backdrop_pipeline = session.create_simple_compute_pipeline(backdrop_alloc_code, 2)?;
let backdrop_ds = session.create_simple_descriptor_set( let backdrop_ds = session.create_simple_descriptor_set(
&backdrop_pipeline, &backdrop_pipeline,
&[&anno_buf, &tile_alloc_buf_dev, &tile_buf], &[&memory_buf_dev, &config_buf_dev],
)?; )?;
let mut bin_alloc_buf_host = session.create_buffer(8, host)?;
let bin_alloc_buf_dev = session.create_buffer(8, dev)?;
// TODO: constants // TODO: constants
let bin_alloc_start = ((n_paths + 255) & !255) * 8;
bin_alloc_buf_host.write(&[n_paths as u32, bin_alloc_start as u32])?;
let bin_code = include_bytes!("../shader/binning.spv"); let bin_code = include_bytes!("../shader/binning.spv");
let bin_pipeline = session.create_simple_compute_pipeline(bin_code, 3)?; let bin_pipeline = session.create_simple_compute_pipeline(bin_code, 2)?;
let bin_ds = session.create_simple_descriptor_set( let bin_ds = session.create_simple_descriptor_set(
&bin_pipeline, &bin_pipeline,
&[&anno_buf, &bin_alloc_buf_dev, &bin_buf], &[&memory_buf_dev, &config_buf_dev],
)?; )?;
let clip_scratch_buf = session.create_buffer(1024 * 1024, dev)?;
let mut coarse_alloc_buf_host = session.create_buffer(8, host)?;
let coarse_alloc_buf_dev = session.create_buffer(8, dev)?;
let coarse_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
coarse_alloc_buf_host.write(&[n_paths as u32, coarse_alloc_start as u32])?;
let coarse_code = include_bytes!("../shader/coarse.spv"); let coarse_code = include_bytes!("../shader/coarse.spv");
let coarse_pipeline = session.create_simple_compute_pipeline(coarse_code, 5)?; let coarse_pipeline = session.create_simple_compute_pipeline(coarse_code, 2)?;
let coarse_ds = session.create_simple_descriptor_set( let coarse_ds = session.create_simple_descriptor_set(
&coarse_pipeline, &coarse_pipeline,
&[ &[&memory_buf_dev, &config_buf_dev],
&anno_buf,
&bin_buf,
&tile_buf,
&coarse_alloc_buf_dev,
&ptcl_buf,
],
)?; )?;
let bg_image = Self::make_test_bg_image(&session); let bg_image = Self::make_test_bg_image(&session);
@ -318,20 +303,25 @@ impl Renderer {
let sampler = session.create_sampler(SamplerParams::Linear)?; let sampler = session.create_sampler(SamplerParams::Linear)?;
let k4_pipeline = session let k4_pipeline = session
.pipeline_builder() .pipeline_builder()
.add_buffers(3) .add_buffers(2)
.add_images(1) .add_images(1)
.add_textures(max_textures) .add_textures(max_textures)
.create_compute_pipeline(&session, k4_code)?; .create_compute_pipeline(&session, k4_code)?;
let k4_ds = session let k4_ds = session
.descriptor_set_builder() .descriptor_set_builder()
.add_buffers(&[&ptcl_buf, &tile_buf, &clip_scratch_buf]) .add_buffers(&[&memory_buf_dev, &config_buf_dev])
.add_images(&[&image_dev]) .add_images(&[&image_dev])
.add_textures(&[&bg_image], &sampler) .add_textures(&[&bg_image], &sampler)
.build(&session, &k4_pipeline)?; .build(&session, &k4_pipeline)?;
Ok(Renderer { Ok(Renderer {
scene_buf, scene_buf_host,
scene_dev, scene_buf_dev,
memory_buf_host,
memory_buf_dev,
state_buf,
config_buf_host,
config_buf_dev,
image_dev, image_dev,
el_pipeline, el_pipeline,
el_ds, el_ds,
@ -347,19 +337,6 @@ impl Renderer {
coarse_ds, coarse_ds,
k4_pipeline, k4_pipeline,
k4_ds, k4_ds,
state_buf,
anno_buf,
pathseg_buf,
tile_buf,
bin_buf,
ptcl_buf,
tile_alloc_buf_host,
tile_alloc_buf_dev,
bin_alloc_buf_host,
bin_alloc_buf_dev,
coarse_alloc_buf_host,
coarse_alloc_buf_dev,
clip_scratch_buf,
n_elements, n_elements,
n_paths, n_paths,
n_pathseg, n_pathseg,
@ -368,21 +345,16 @@ impl Renderer {
} }
pub unsafe fn record(&self, cmd_buf: &mut hub::CmdBuf, query_pool: &hub::QueryPool) { pub unsafe fn record(&self, cmd_buf: &mut hub::CmdBuf, query_pool: &hub::QueryPool) {
cmd_buf.copy_buffer(self.scene_buf.vk_buffer(), self.scene_dev.vk_buffer()); cmd_buf.copy_buffer(self.scene_buf_host.vk_buffer(), self.scene_buf_dev.vk_buffer());
cmd_buf.copy_buffer( cmd_buf.copy_buffer(
self.tile_alloc_buf_host.vk_buffer(), self.config_buf_host.vk_buffer(),
self.tile_alloc_buf_dev.vk_buffer(), self.config_buf_dev.vk_buffer(),
); );
cmd_buf.copy_buffer( cmd_buf.copy_buffer(
self.bin_alloc_buf_host.vk_buffer(), self.memory_buf_host.vk_buffer(),
self.bin_alloc_buf_dev.vk_buffer(), self.memory_buf_dev.vk_buffer(),
);
cmd_buf.copy_buffer(
self.coarse_alloc_buf_host.vk_buffer(),
self.coarse_alloc_buf_dev.vk_buffer(),
); );
cmd_buf.clear_buffer(self.state_buf.vk_buffer(), None); cmd_buf.clear_buffer(self.state_buf.vk_buffer(), None);
cmd_buf.clear_buffer(self.clip_scratch_buf.vk_buffer(), Some(4));
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
cmd_buf.image_barrier( cmd_buf.image_barrier(
self.image_dev.vk_image(), self.image_dev.vk_image(),