all: add optional memory checks

Defining MEM_DEBUG in mem.h will add a size field to Alloc and enable
bounds and alignment checks for every memory read and write.

Notes:
- Deriving an Alloc from Path.tiles is unsound, but it's more trouble to
  convert Path.tiles from TileRef to a variable sized Alloc.
- elements.comp note that "We should be able to use an array of structs but the
  NV shader compiler doesn't seem to like it". If that's still relevant, does
  the shared arrays of Allocs work?

Signed-off-by: Elias Naur <mail@eliasnaur.com>
This commit is contained in:
Elias Naur 2020-12-24 12:00:53 +01:00
parent ee67a0a515
commit 6a4e26ef2a
22 changed files with 710 additions and 507 deletions

View file

@ -31,22 +31,18 @@ pub fn gen_glsl(module: &LayoutModule) -> String {
for name in &module.def_names {
let def = module.defs.get(name).unwrap();
let mem = &"memory".to_owned();
let mut buf_name = &module.name;
if !module.name.eq(&"state") && !module.name.eq(&"scene") {
buf_name = mem;
}
let is_mem = !module.name.eq(&"state") && !module.name.eq(&"scene");
match def {
(_size, LayoutTypeDef::Struct(fields)) => {
gen_struct_read(&mut r, buf_name, &name, fields);
gen_struct_read(&mut r, &module.name, &name, is_mem, fields);
if module.gpu_write {
gen_struct_write(&mut r, buf_name, &name, fields);
gen_struct_write(&mut r, &module.name, &name, is_mem, fields);
}
}
(_size, LayoutTypeDef::Enum(en)) => {
gen_enum_read(&mut r, buf_name, &name, en);
gen_enum_read(&mut r, &module.name, &name, is_mem, en);
if module.gpu_write {
gen_enum_write(&mut r, buf_name, &name, en);
gen_enum_write(&mut r, &module.name, &name, is_mem, en);
}
}
}
@ -96,14 +92,23 @@ fn gen_struct_read(
r: &mut String,
bufname: &str,
name: &str,
is_mem: bool,
fields: &[(String, usize, LayoutType)],
) {
writeln!(r, "{} {}_read({}Ref ref) {{", name, name, name).unwrap();
write!(r, "{} {}_read(", name, name).unwrap();
if is_mem {
write!(r, "Alloc a, ").unwrap();
}
writeln!(r, "{}Ref ref) {{", name).unwrap();
writeln!(r, " uint ix = ref.offset >> 2;").unwrap();
let coverage = crate::layout::struct_coverage(fields, false);
for (i, fields) in coverage.iter().enumerate() {
if !fields.is_empty() {
writeln!(r, " uint raw{} = {}[ix + {}];", i, bufname, i).unwrap();
if is_mem {
writeln!(r, " uint raw{} = read_mem(a, ix + {});", i, i).unwrap();
} else {
writeln!(r, " uint raw{} = {}[ix + {}];", i, bufname, i).unwrap();
}
}
}
writeln!(r, " {} s;", name).unwrap();
@ -130,26 +135,47 @@ fn gen_enum_read(
r: &mut String,
bufname: &str,
name: &str,
is_mem: bool,
variants: &[(String, Vec<(usize, LayoutType)>)],
) {
writeln!(r, "uint {}_tag({}Ref ref) {{", name, name).unwrap();
writeln!(r, " return {}[ref.offset >> 2];", bufname).unwrap();
if is_mem {
writeln!(r, "uint {}_tag(Alloc a, {}Ref ref) {{", name, name).unwrap();
writeln!(r, " return read_mem(a, ref.offset >> 2);").unwrap();
} else {
writeln!(r, "uint {}_tag({}Ref ref) {{", name, name).unwrap();
writeln!(r, " return {}[ref.offset >> 2];", bufname).unwrap();
}
writeln!(r, "}}\n").unwrap();
for (var_name, payload) in variants {
if payload.len() == 1 {
if let GpuType::InlineStruct(structname) = &payload[0].1.ty {
writeln!(
r,
"{} {}_{}_read({}Ref ref) {{",
structname, name, var_name, name
)
.unwrap();
writeln!(
r,
" return {}_read({}Ref(ref.offset + {}));",
structname, structname, payload[0].0
)
.unwrap();
if is_mem {
writeln!(
r,
"{} {}_{}_read(Alloc a, {}Ref ref) {{",
structname, name, var_name, name
)
.unwrap();
writeln!(
r,
" return {}_read(a, {}Ref(ref.offset + {}));",
structname, structname, payload[0].0
)
.unwrap();
} else {
writeln!(
r,
"{} {}_{}_read({}Ref ref) {{",
structname, name, var_name, name
)
.unwrap();
writeln!(
r,
" return {}_read({}Ref(ref.offset + {}));",
structname, structname, payload[0].0
)
.unwrap();
}
writeln!(r, "}}\n").unwrap();
}
}
@ -303,9 +329,14 @@ fn gen_struct_write(
r: &mut String,
bufname: &str,
name: &str,
is_mem: bool,
fields: &[(String, usize, LayoutType)],
) {
writeln!(r, "void {}_write({}Ref ref, {} s) {{", name, name, name).unwrap();
write!(r, "void {}_write(", name).unwrap();
if is_mem {
write!(r, "Alloc a, ").unwrap();
}
writeln!(r, "{}Ref ref, {} s) {{", name, name).unwrap();
writeln!(r, " uint ix = ref.offset >> 2;").unwrap();
let coverage = crate::layout::struct_coverage(fields, true);
@ -381,13 +412,20 @@ fn gen_struct_write(
}
if !pieces.is_empty() {
write!(r, " {}[ix + {}] = ", bufname, i).unwrap();
if is_mem {
write!(r, " write_mem(a, ix + {}, ", i).unwrap();
} else {
write!(r, " {}[ix + {}] = ", bufname, i).unwrap();
}
for (j, piece) in pieces.iter().enumerate() {
if j != 0 {
write!(r, " | ").unwrap();
}
write!(r, "{}", piece).unwrap();
}
if is_mem {
write!(r, ")").unwrap();
}
writeln!(r, ";").unwrap();
}
}
@ -429,38 +467,70 @@ fn gen_enum_write(
r: &mut String,
bufname: &str,
name: &str,
is_mem: bool,
variants: &[(String, Vec<(usize, LayoutType)>)],
) {
for (var_name, payload) in variants {
if payload.is_empty() {
writeln!(r, "void {}_{}_write({}Ref ref) {{", name, var_name, name).unwrap();
writeln!(
r,
" {}[ref.offset >> 2] = {}_{};",
bufname, name, var_name
)
.unwrap();
writeln!(r, "}}\n").unwrap();
} else if payload.len() == 1 {
if let GpuType::InlineStruct(structname) = &payload[0].1.ty {
if is_mem {
writeln!(r, "void {}_{}_write(Alloc a, {}Ref ref) {{", name, var_name, name).unwrap();
writeln!(
r,
"void {}_{}_write({}Ref ref, {} s) {{",
name, var_name, name, structname
" write_mem(a, ref.offset >> 2, {}_{});",
name, var_name
)
.unwrap();
} else {
writeln!(r, "void {}_{}_write({}Ref ref) {{", name, var_name, name).unwrap();
writeln!(
r,
" {}[ref.offset >> 2] = {}_{};",
bufname, name, var_name
)
.unwrap();
writeln!(
r,
" {}_write({}Ref(ref.offset + {}), s);",
structname, structname, payload[0].0
)
.unwrap();
}
writeln!(r, "}}\n").unwrap();
} else if payload.len() == 1 {
if let GpuType::InlineStruct(structname) = &payload[0].1.ty {
if is_mem {
writeln!(
r,
"void {}_{}_write(Alloc a, {}Ref ref, {} s) {{",
name, var_name, name, structname
)
.unwrap();
writeln!(
r,
" write_mem(a, ref.offset >> 2, {}_{});",
name, var_name
)
.unwrap();
writeln!(
r,
" {}_write(a, {}Ref(ref.offset + {}), s);",
structname, structname, payload[0].0
)
.unwrap();
} else {
writeln!(
r,
"void {}_{}_write(Alloc a, {}Ref ref, {} s) {{",
name, var_name, name, structname
)
.unwrap();
writeln!(
r,
" {}[ref.offset >> 2] = {}_{};",
bufname, name, var_name
)
.unwrap();
writeln!(
r,
" {}_write({}Ref(ref.offset + {}), s);",
structname, structname, payload[0].0
)
.unwrap();
}
writeln!(r, "}}\n").unwrap();
}
}

View file

@ -62,36 +62,36 @@ AnnotatedRef Annotated_index(AnnotatedRef ref, uint index) {
return AnnotatedRef(ref.offset + index * Annotated_size);
}
AnnoFill AnnoFill_read(AnnoFillRef ref) {
AnnoFill AnnoFill_read(Alloc a, AnnoFillRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw2 = memory[ix + 2];
uint raw3 = memory[ix + 3];
uint raw4 = memory[ix + 4];
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
uint raw4 = read_mem(a, ix + 4);
AnnoFill s;
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.rgba_color = raw4;
return s;
}
void AnnoFill_write(AnnoFillRef ref, AnnoFill s) {
void AnnoFill_write(Alloc a, AnnoFillRef ref, AnnoFill s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = floatBitsToUint(s.bbox.x);
memory[ix + 1] = floatBitsToUint(s.bbox.y);
memory[ix + 2] = floatBitsToUint(s.bbox.z);
memory[ix + 3] = floatBitsToUint(s.bbox.w);
memory[ix + 4] = s.rgba_color;
write_mem(a, ix + 0, floatBitsToUint(s.bbox.x));
write_mem(a, ix + 1, floatBitsToUint(s.bbox.y));
write_mem(a, ix + 2, floatBitsToUint(s.bbox.z));
write_mem(a, ix + 3, floatBitsToUint(s.bbox.w));
write_mem(a, ix + 4, s.rgba_color);
}
AnnoStroke AnnoStroke_read(AnnoStrokeRef ref) {
AnnoStroke AnnoStroke_read(Alloc a, AnnoStrokeRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw2 = memory[ix + 2];
uint raw3 = memory[ix + 3];
uint raw4 = memory[ix + 4];
uint raw5 = memory[ix + 5];
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
uint raw4 = read_mem(a, ix + 4);
uint raw5 = read_mem(a, ix + 5);
AnnoStroke s;
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.rgba_color = raw4;
@ -99,76 +99,76 @@ AnnoStroke AnnoStroke_read(AnnoStrokeRef ref) {
return s;
}
void AnnoStroke_write(AnnoStrokeRef ref, AnnoStroke s) {
void AnnoStroke_write(Alloc a, AnnoStrokeRef ref, AnnoStroke s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = floatBitsToUint(s.bbox.x);
memory[ix + 1] = floatBitsToUint(s.bbox.y);
memory[ix + 2] = floatBitsToUint(s.bbox.z);
memory[ix + 3] = floatBitsToUint(s.bbox.w);
memory[ix + 4] = s.rgba_color;
memory[ix + 5] = floatBitsToUint(s.linewidth);
write_mem(a, ix + 0, floatBitsToUint(s.bbox.x));
write_mem(a, ix + 1, floatBitsToUint(s.bbox.y));
write_mem(a, ix + 2, floatBitsToUint(s.bbox.z));
write_mem(a, ix + 3, floatBitsToUint(s.bbox.w));
write_mem(a, ix + 4, s.rgba_color);
write_mem(a, ix + 5, floatBitsToUint(s.linewidth));
}
AnnoClip AnnoClip_read(AnnoClipRef ref) {
AnnoClip AnnoClip_read(Alloc a, AnnoClipRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw2 = memory[ix + 2];
uint raw3 = memory[ix + 3];
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
AnnoClip s;
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
return s;
}
void AnnoClip_write(AnnoClipRef ref, AnnoClip s) {
void AnnoClip_write(Alloc a, AnnoClipRef ref, AnnoClip s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = floatBitsToUint(s.bbox.x);
memory[ix + 1] = floatBitsToUint(s.bbox.y);
memory[ix + 2] = floatBitsToUint(s.bbox.z);
memory[ix + 3] = floatBitsToUint(s.bbox.w);
write_mem(a, ix + 0, floatBitsToUint(s.bbox.x));
write_mem(a, ix + 1, floatBitsToUint(s.bbox.y));
write_mem(a, ix + 2, floatBitsToUint(s.bbox.z));
write_mem(a, ix + 3, floatBitsToUint(s.bbox.w));
}
uint Annotated_tag(AnnotatedRef ref) {
return memory[ref.offset >> 2];
uint Annotated_tag(Alloc a, AnnotatedRef ref) {
return read_mem(a, ref.offset >> 2);
}
AnnoStroke Annotated_Stroke_read(AnnotatedRef ref) {
return AnnoStroke_read(AnnoStrokeRef(ref.offset + 4));
AnnoStroke Annotated_Stroke_read(Alloc a, AnnotatedRef ref) {
return AnnoStroke_read(a, AnnoStrokeRef(ref.offset + 4));
}
AnnoFill Annotated_Fill_read(AnnotatedRef ref) {
return AnnoFill_read(AnnoFillRef(ref.offset + 4));
AnnoFill Annotated_Fill_read(Alloc a, AnnotatedRef ref) {
return AnnoFill_read(a, AnnoFillRef(ref.offset + 4));
}
AnnoClip Annotated_BeginClip_read(AnnotatedRef ref) {
return AnnoClip_read(AnnoClipRef(ref.offset + 4));
AnnoClip Annotated_BeginClip_read(Alloc a, AnnotatedRef ref) {
return AnnoClip_read(a, AnnoClipRef(ref.offset + 4));
}
AnnoClip Annotated_EndClip_read(AnnotatedRef ref) {
return AnnoClip_read(AnnoClipRef(ref.offset + 4));
AnnoClip Annotated_EndClip_read(Alloc a, AnnotatedRef ref) {
return AnnoClip_read(a, AnnoClipRef(ref.offset + 4));
}
void Annotated_Nop_write(AnnotatedRef ref) {
memory[ref.offset >> 2] = Annotated_Nop;
void Annotated_Nop_write(Alloc a, AnnotatedRef ref) {
write_mem(a, ref.offset >> 2, Annotated_Nop);
}
void Annotated_Stroke_write(AnnotatedRef ref, AnnoStroke s) {
memory[ref.offset >> 2] = Annotated_Stroke;
AnnoStroke_write(AnnoStrokeRef(ref.offset + 4), s);
void Annotated_Stroke_write(Alloc a, AnnotatedRef ref, AnnoStroke s) {
write_mem(a, ref.offset >> 2, Annotated_Stroke);
AnnoStroke_write(a, AnnoStrokeRef(ref.offset + 4), s);
}
void Annotated_Fill_write(AnnotatedRef ref, AnnoFill s) {
memory[ref.offset >> 2] = Annotated_Fill;
AnnoFill_write(AnnoFillRef(ref.offset + 4), s);
void Annotated_Fill_write(Alloc a, AnnotatedRef ref, AnnoFill s) {
write_mem(a, ref.offset >> 2, Annotated_Fill);
AnnoFill_write(a, AnnoFillRef(ref.offset + 4), s);
}
void Annotated_BeginClip_write(AnnotatedRef ref, AnnoClip s) {
memory[ref.offset >> 2] = Annotated_BeginClip;
AnnoClip_write(AnnoClipRef(ref.offset + 4), s);
void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, AnnoClip s) {
write_mem(a, ref.offset >> 2, Annotated_BeginClip);
AnnoClip_write(a, AnnoClipRef(ref.offset + 4), s);
}
void Annotated_EndClip_write(AnnotatedRef ref, AnnoClip s) {
memory[ref.offset >> 2] = Annotated_EndClip;
AnnoClip_write(AnnoClipRef(ref.offset + 4), s);
void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoClip s) {
write_mem(a, ref.offset >> 2, Annotated_EndClip);
AnnoClip_write(a, AnnoClipRef(ref.offset + 4), s);
}

View file

@ -15,8 +15,8 @@
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "setup.h"
#include "mem.h"
#include "setup.h"
#define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
#define BACKDROP_WG (1 << LG_BACKDROP_WG)
@ -31,27 +31,27 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
#include "tile.h"
shared uint sh_row_count[BACKDROP_WG];
shared uint sh_row_base[BACKDROP_WG];
shared Alloc sh_row_alloc[BACKDROP_WG];
shared uint sh_row_width[BACKDROP_WG];
void main() {
if (mem_overflow) {
if (mem_error != NO_ERROR) {
return;
}
uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
AnnotatedRef ref = AnnotatedRef(conf.anno_base + element_ix * Annotated_size);
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
// Work assignment: 1 thread : 1 path element
uint row_count = 0;
if (element_ix < conf.n_elements) {
uint tag = Annotated_tag(ref);
uint tag = Annotated_tag(conf.anno_alloc, ref);
switch (tag) {
case Annotated_Fill:
case Annotated_BeginClip:
PathRef path_ref = PathRef(conf.tile_base + element_ix * Path_size);
Path path = Path_read(path_ref);
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
Path path = Path_read(conf.tile_alloc, path_ref);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
// Paths that don't cross tile top edges don't have backdrops.
@ -62,7 +62,8 @@ void main() {
// long as it doesn't cross the left edge.
row_count = 0;
}
sh_row_base[th_ix] = (path.tiles.offset >> 2) + 1;
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size);
sh_row_alloc[th_ix] = path_alloc;
}
}
@ -92,13 +93,14 @@ void main() {
if (width > 0) {
// Process one row sequentially
// Read backdrop value per tile and prefix sum it
Alloc tiles_alloc = sh_row_alloc[el_ix];
uint seq_ix = row - (el_ix > 0 ? sh_row_count[el_ix - 1] : 0);
uint tile_el_ix = sh_row_base[el_ix] + seq_ix * 2 * width;
uint sum = memory[tile_el_ix];
uint tile_el_ix = (tiles_alloc.offset >> 2) + 1 + seq_ix * 2 * width;
uint sum = read_mem(tiles_alloc, tile_el_ix);
for (uint x = 1; x < width; x++) {
tile_el_ix += 2;
sum += memory[tile_el_ix];
memory[tile_el_ix] = sum;
sum += read_mem(tiles_alloc, tile_el_ix);
write_mem(tiles_alloc, tile_el_ix, sum);
}
}
}

Binary file not shown.

View file

@ -9,8 +9,8 @@
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "setup.h"
#include "mem.h"
#include "setup.h"
layout(local_size_x = N_TILE, local_size_y = 1) in;
@ -32,11 +32,11 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
// Bitmaps are sliced (256bit into 8 (N_SLICE) 32bit submaps)
shared uint bitmaps[N_SLICE][N_TILE];
shared uint count[N_SLICE][N_TILE];
shared uint sh_chunk_start[N_TILE];
shared Alloc sh_chunk_alloc[N_TILE];
shared bool sh_alloc_failed;
void main() {
if (mem_overflow) {
if (mem_error != NO_ERROR) {
return;
}
@ -53,10 +53,10 @@ void main() {
// Read inputs and determine coverage of bins
uint element_ix = my_partition * N_TILE + gl_LocalInvocationID.x;
AnnotatedRef ref = AnnotatedRef(conf.anno_base + element_ix * Annotated_size);
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
uint tag = Annotated_Nop;
if (element_ix < my_n_elements) {
tag = Annotated_tag(ref);
tag = Annotated_tag(conf.anno_alloc, ref);
}
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
switch (tag) {
@ -66,7 +66,7 @@ void main() {
case Annotated_EndClip:
// Note: we take advantage of the fact that these drawing elements
// have the bbox at the same place in their layout.
AnnoFill fill = Annotated_Fill_read(ref);
AnnoFill fill = Annotated_Fill_read(conf.anno_alloc, ref);
x0 = int(floor(fill.bbox.x * SX));
y0 = int(floor(fill.bbox.y * SY));
x1 = int(ceil(fill.bbox.z * SX));
@ -105,20 +105,21 @@ void main() {
count[i][gl_LocalInvocationID.x] = element_count;
}
// element_count is number of elements covering bin for this invocation.
Alloc chunk_alloc = Alloc(0, false);
Alloc chunk_alloc = new_alloc(0, 0);
if (element_count != 0) {
// TODO: aggregate atomic adds (subgroup is probably fastest)
chunk_alloc = malloc(element_count * BinInstance_size);
sh_chunk_start[gl_LocalInvocationID.x] = chunk_alloc.offset;
if (chunk_alloc.failed) {
MallocResult chunk = malloc(element_count * BinInstance_size);
chunk_alloc = chunk.alloc;
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
if (chunk.failed) {
sh_alloc_failed = true;
}
}
// Note: it might be more efficient for reading to do this in the
// other order (each bin is a contiguous sequence of partitions)
uint out_ix = (conf.bin_base >> 2) + (my_partition * N_TILE + gl_LocalInvocationID.x) * 2;
memory[out_ix] = element_count;
memory[out_ix + 1] = chunk_alloc.offset;
uint out_ix = (conf.bin_alloc.offset >> 2) + (my_partition * N_TILE + gl_LocalInvocationID.x) * 2;
write_mem(conf.bin_alloc, out_ix, element_count);
write_mem(conf.bin_alloc, out_ix + 1, chunk_alloc.offset);
barrier();
if (sh_alloc_failed) {
@ -137,8 +138,9 @@ void main() {
if (my_slice > 0) {
idx += count[my_slice - 1][bin_ix];
}
uint out_offset = sh_chunk_start[bin_ix] + idx * BinInstance_size;
BinInstance_write(BinInstanceRef(out_offset), BinInstance(element_ix));
Alloc out_alloc = sh_chunk_alloc[bin_ix];
uint out_offset = out_alloc.offset + idx * BinInstance_size;
BinInstance_write(out_alloc, BinInstanceRef(out_offset), BinInstance(element_ix));
}
x++;
if (x == x1) {

Binary file not shown.

View file

@ -16,16 +16,16 @@ BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) {
return BinInstanceRef(ref.offset + index * BinInstance_size);
}
BinInstance BinInstance_read(BinInstanceRef ref) {
BinInstance BinInstance_read(Alloc a, BinInstanceRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw0 = read_mem(a, ix + 0);
BinInstance s;
s.element_ix = raw0;
return s;
}
void BinInstance_write(BinInstanceRef ref, BinInstance s) {
void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = s.element_ix;
write_mem(a, ix + 0, s.element_ix);
}

View file

@ -13,8 +13,8 @@
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "setup.h"
#include "mem.h"
#include "setup.h"
layout(local_size_x = N_TILE, local_size_y = 1) in;
@ -34,7 +34,7 @@ shared uint sh_elements[N_TILE];
// Number of elements in the partition; prefix sum.
shared uint sh_part_count[N_PART_READ];
shared uint sh_part_elements[N_PART_READ];
shared Alloc sh_part_elements[N_PART_READ];
shared uint sh_bitmaps[N_SLICE][N_TILE];
@ -48,24 +48,47 @@ shared uint sh_tile_y0[N_TILE];
shared uint sh_tile_base[N_TILE];
shared uint sh_tile_stride[N_TILE];
// Perhaps cmd_limit should be a global? This is a style question.
bool alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) {
#ifdef MEM_DEBUG
// Store allocs only when MEM_DEBUG to save shared memory traffic.
shared Alloc sh_tile_alloc[N_TILE];
void write_tile_alloc(uint el_ix, Alloc a) {
sh_tile_alloc[el_ix] = a;
}
Alloc read_tile_alloc(uint el_ix) {
return sh_tile_alloc[el_ix];
}
#else
void write_tile_alloc(uint el_ix, Alloc a) {
// No-op
}
Alloc read_tile_alloc(uint el_ix) {
// All memory.
return new_alloc(0, memory.length()*4);
}
#endif
// Perhaps cmd_alloc should be a global? This is a style question.
bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit) {
if (cmd_ref.offset < cmd_limit) {
return true;
}
Alloc new_cmd = malloc(PTCL_INITIAL_ALLOC);
MallocResult new_cmd = malloc(PTCL_INITIAL_ALLOC);
if (new_cmd.failed) {
return false;
}
CmdJump jump = CmdJump(new_cmd.offset);
Cmd_Jump_write(cmd_ref, jump);
cmd_ref = CmdRef(new_cmd.offset);
cmd_limit = new_cmd.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
CmdJump jump = CmdJump(new_cmd.alloc.offset);
Cmd_Jump_write(cmd_alloc, cmd_ref, jump);
cmd_alloc = new_cmd.alloc;
cmd_ref = CmdRef(cmd_alloc.offset);
cmd_limit = cmd_alloc.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
return true;
}
void main() {
if (mem_overflow) {
if (mem_error != NO_ERROR) {
return;
}
@ -85,7 +108,8 @@ void main() {
uint tile_x = gl_LocalInvocationID.x % N_TILE_X;
uint tile_y = gl_LocalInvocationID.x / N_TILE_X;
uint this_tile_ix = (bin_tile_y + tile_y) * conf.width_in_tiles + bin_tile_x + tile_x;
CmdRef cmd_ref = CmdRef(conf.ptcl_base + this_tile_ix * PTCL_INITIAL_ALLOC);
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, this_tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
CmdRef cmd_ref = CmdRef(cmd_alloc.offset);
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
// The nesting depth of the clip stack
uint clip_depth = 0;
@ -117,9 +141,10 @@ void main() {
part_start_ix = ready_ix;
uint count = 0;
if (th_ix < N_PART_READ && partition_ix + th_ix < n_partitions) {
uint in_ix = (conf.bin_base >> 2) + ((partition_ix + th_ix) * N_TILE + bin_ix) * 2;
count = memory[in_ix];
sh_part_elements[th_ix] = memory[in_ix + 1];
uint in_ix = (conf.bin_alloc.offset >> 2) + ((partition_ix + th_ix) * N_TILE + bin_ix) * 2;
count = read_mem(conf.bin_alloc, in_ix);
uint offset = read_mem(conf.bin_alloc, in_ix + 1);
sh_part_elements[th_ix] = new_alloc(offset, count*BinInstance_size);
}
// prefix sum of counts
for (uint i = 0; i < LG_N_PART_READ; i++) {
@ -152,8 +177,9 @@ void main() {
}
}
ix -= part_ix > 0 ? sh_part_count[part_ix - 1] : part_start_ix;
BinInstanceRef inst_ref = BinInstanceRef(sh_part_elements[part_ix]);
BinInstance inst = BinInstance_read(BinInstance_index(inst_ref, ix));
Alloc bin_alloc = sh_part_elements[part_ix];
BinInstanceRef inst_ref = BinInstanceRef(bin_alloc.offset);
BinInstance inst = BinInstance_read(bin_alloc, BinInstance_index(inst_ref, ix));
sh_elements[th_ix] = inst.element_ix;
}
barrier();
@ -169,8 +195,8 @@ void main() {
AnnotatedRef ref;
if (th_ix + rd_ix < wr_ix) {
element_ix = sh_elements[th_ix];
ref = AnnotatedRef(conf.anno_base + element_ix * Annotated_size);
tag = Annotated_tag(ref);
ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
tag = Annotated_tag(conf.anno_alloc, ref);
}
// Bounding box of element in pixel coordinates.
@ -183,7 +209,7 @@ void main() {
// We have one "path" for each element, even if the element isn't
// actually a path (currently EndClip, but images etc in the future).
uint path_ix = element_ix;
Path path = Path_read(PathRef(conf.tile_base + path_ix * Path_size));
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
uint stride = path.bbox.z - path.bbox.x;
sh_tile_stride[th_ix] = stride;
int dx = int(path.bbox.x) - int(bin_tile_x);
@ -199,6 +225,8 @@ void main() {
// base relative to bin
uint base = path.tiles.offset - uint(dy * stride + dx) * Tile_size;
sh_tile_base[th_ix] = base;
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size);
write_tile_alloc(th_ix, path_alloc);
break;
default:
tile_count = 0;
@ -226,8 +254,8 @@ void main() {
el_ix = probe;
}
}
AnnotatedRef ref = AnnotatedRef(conf.anno_base + sh_elements[el_ix] * Annotated_size);
uint tag = Annotated_tag(ref);
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + sh_elements[el_ix] * Annotated_size);
uint tag = Annotated_tag(conf.anno_alloc, ref);
uint seq_ix = ix - (el_ix > 0 ? sh_tile_count[el_ix - 1] : 0);
uint width = sh_tile_width[el_ix];
uint x = sh_tile_x0[el_ix] + seq_ix % width;
@ -236,7 +264,7 @@ void main() {
if (tag == Annotated_BeginClip || tag == Annotated_EndClip) {
include_tile = true;
} else {
Tile tile = Tile_read(TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
Tile tile = Tile_read(read_tile_alloc(el_ix), TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
// Include the path in the tile if
// - the tile contains at least a segment (tile offset non-zero)
// - the tile is completely covered (backdrop non-zero)
@ -275,16 +303,16 @@ void main() {
// At this point, we read the element again from global memory.
// If that turns out to be expensive, maybe we can pack it into
// shared memory (or perhaps just the tag).
ref = AnnotatedRef(conf.anno_base + element_ix * Annotated_size);
tag = Annotated_tag(ref);
ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
tag = Annotated_tag(conf.anno_alloc, ref);
if (clip_zero_depth == 0) {
switch (tag) {
case Annotated_Fill:
Tile tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
Tile tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoFill fill = Annotated_Fill_read(ref);
if (!alloc_cmd(cmd_ref, cmd_limit)) {
AnnoFill fill = Annotated_Fill_read(conf.anno_alloc, ref);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
if (tile.tile.offset != 0) {
@ -292,32 +320,32 @@ void main() {
cmd_fill.tile_ref = tile.tile.offset;
cmd_fill.backdrop = tile.backdrop;
cmd_fill.rgba_color = fill.rgba_color;
Cmd_Fill_write(cmd_ref, cmd_fill);
Cmd_Fill_write(cmd_alloc, cmd_ref, cmd_fill);
} else {
Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
Cmd_Solid_write(cmd_alloc, cmd_ref, CmdSolid(fill.rgba_color));
}
cmd_ref.offset += Cmd_size;
break;
case Annotated_BeginClip:
tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
if (tile.tile.offset == 0 && tile.backdrop == 0) {
clip_zero_depth = clip_depth + 1;
} else if (tile.tile.offset == 0 && clip_depth < 32) {
clip_one_mask |= (1 << clip_depth);
} else {
if (!alloc_cmd(cmd_ref, cmd_limit)) {
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
if (tile.tile.offset != 0) {
CmdBeginClip cmd_begin_clip;
cmd_begin_clip.tile_ref = tile.tile.offset;
cmd_begin_clip.backdrop = tile.backdrop;
Cmd_BeginClip_write(cmd_ref, cmd_begin_clip);
Cmd_BeginClip_write(cmd_alloc, cmd_ref, cmd_begin_clip);
} else {
// TODO: here is where a bunch of optimization magic should happen
float alpha = tile.backdrop == 0 ? 0.0 : 1.0;
Cmd_BeginSolidClip_write(cmd_ref, CmdBeginSolidClip(alpha));
Cmd_BeginSolidClip_write(cmd_alloc, cmd_ref, CmdBeginSolidClip(alpha));
}
cmd_ref.offset += Cmd_size;
if (clip_depth < 32) {
@ -329,25 +357,25 @@ void main() {
case Annotated_EndClip:
clip_depth--;
if (clip_depth >= 32 || (clip_one_mask & (1 << clip_depth)) == 0) {
if (!alloc_cmd(cmd_ref, cmd_limit)) {
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
Cmd_EndClip_write(cmd_ref, CmdEndClip(1.0));
Cmd_EndClip_write(cmd_alloc, cmd_ref, CmdEndClip(1.0));
cmd_ref.offset += Cmd_size;
}
break;
case Annotated_Stroke:
tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoStroke stroke = Annotated_Stroke_read(ref);
AnnoStroke stroke = Annotated_Stroke_read(conf.anno_alloc, ref);
CmdStroke cmd_stroke;
cmd_stroke.tile_ref = tile.tile.offset;
cmd_stroke.half_width = 0.5 * stroke.linewidth;
cmd_stroke.rgba_color = stroke.rgba_color;
if (!alloc_cmd(cmd_ref, cmd_limit)) {
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
Cmd_Stroke_write(cmd_ref, cmd_stroke);
Cmd_Stroke_write(cmd_alloc, cmd_ref, cmd_stroke);
cmd_ref.offset += Cmd_size;
break;
}
@ -372,6 +400,6 @@ void main() {
if (rd_ix >= ready_ix && partition_ix >= n_partitions) break;
}
if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) {
Cmd_End_write(cmd_ref);
Cmd_End_write(cmd_alloc, cmd_ref);
}
}

Binary file not shown.

View file

@ -9,8 +9,8 @@
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "setup.h"
#include "mem.h"
#include "setup.h"
#define N_ROWS 4
#define WG_SIZE 32
@ -172,7 +172,7 @@ shared uint sh_part_ix;
shared State sh_prefix;
void main() {
if (mem_overflow) {
if (mem_error != NO_ERROR) {
return;
}
@ -342,10 +342,10 @@ void main() {
}
// We do encoding a bit by hand to minimize divergence. Another approach
// would be to have a fill/stroke bool.
PathSegRef path_out_ref = PathSegRef(conf.pathseg_base + (st.pathseg_count - 1) * PathSeg_size);
PathSegRef path_out_ref = PathSegRef(conf.pathseg_alloc.offset + (st.pathseg_count - 1) * PathSeg_size);
uint out_tag = tag == Element_FillLine ? PathSeg_FillCubic : PathSeg_StrokeCubic;
memory[path_out_ref.offset >> 2] = out_tag;
PathStrokeCubic_write(PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic);
write_mem(conf.pathseg_alloc, path_out_ref.offset >> 2, out_tag);
PathStrokeCubic_write(conf.pathseg_alloc, PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic);
break;
case Element_FillQuad:
case Element_StrokeQuad:
@ -366,10 +366,10 @@ void main() {
}
// We do encoding a bit by hand to minimize divergence. Another approach
// would be to have a fill/stroke bool.
path_out_ref = PathSegRef(conf.pathseg_base + (st.pathseg_count - 1) * PathSeg_size);
path_out_ref = PathSegRef(conf.pathseg_alloc.offset + (st.pathseg_count - 1) * PathSeg_size);
out_tag = tag == Element_FillQuad ? PathSeg_FillCubic : PathSeg_StrokeCubic;
memory[path_out_ref.offset >> 2] = out_tag;
PathStrokeCubic_write(PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic);
write_mem(conf.pathseg_alloc, path_out_ref.offset >> 2, out_tag);
PathStrokeCubic_write(conf.pathseg_alloc, PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic);
break;
case Element_FillCubic:
case Element_StrokeCubic:
@ -387,10 +387,10 @@ void main() {
}
// We do encoding a bit by hand to minimize divergence. Another approach
// would be to have a fill/stroke bool.
path_out_ref = PathSegRef(conf.pathseg_base + (st.pathseg_count - 1) * PathSeg_size);
path_out_ref = PathSegRef(conf.pathseg_alloc.offset + (st.pathseg_count - 1) * PathSeg_size);
out_tag = tag == Element_FillCubic ? PathSeg_FillCubic : PathSeg_StrokeCubic;
memory[path_out_ref.offset >> 2] = out_tag;
PathStrokeCubic_write(PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic);
write_mem(conf.pathseg_alloc, path_out_ref.offset >> 2, out_tag);
PathStrokeCubic_write(conf.pathseg_alloc, PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic);
break;
case Element_Stroke:
Stroke stroke = Element_Stroke_read(this_ref);
@ -399,31 +399,31 @@ void main() {
vec2 lw = get_linewidth(st);
anno_stroke.bbox = st.bbox + vec4(-lw, lw);
anno_stroke.linewidth = st.linewidth * sqrt(abs(st.mat.x * st.mat.w - st.mat.y * st.mat.z));
AnnotatedRef out_ref = AnnotatedRef(conf.anno_base + (st.path_count - 1) * Annotated_size);
Annotated_Stroke_write(out_ref, anno_stroke);
AnnotatedRef out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
Annotated_Stroke_write(conf.anno_alloc, out_ref, anno_stroke);
break;
case Element_Fill:
Fill fill = Element_Fill_read(this_ref);
AnnoFill anno_fill;
anno_fill.rgba_color = fill.rgba_color;
anno_fill.bbox = st.bbox;
out_ref = AnnotatedRef(conf.anno_base + (st.path_count - 1) * Annotated_size);
Annotated_Fill_write(out_ref, anno_fill);
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
Annotated_Fill_write(conf.anno_alloc, out_ref, anno_fill);
break;
case Element_BeginClip:
Clip begin_clip = Element_BeginClip_read(this_ref);
AnnoClip anno_begin_clip = AnnoClip(begin_clip.bbox);
// This is the absolute bbox, it's been transformed during encoding.
anno_begin_clip.bbox = begin_clip.bbox;
out_ref = AnnotatedRef(conf.anno_base + (st.path_count - 1) * Annotated_size);
Annotated_BeginClip_write(out_ref, anno_begin_clip);
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
Annotated_BeginClip_write(conf.anno_alloc, out_ref, anno_begin_clip);
break;
case Element_EndClip:
Clip end_clip = Element_EndClip_read(this_ref);
// This bbox is expected to be the same as the begin one.
AnnoClip anno_end_clip = AnnoClip(end_clip.bbox);
out_ref = AnnotatedRef(conf.anno_base + (st.path_count - 1) * Annotated_size);
Annotated_EndClip_write(out_ref, anno_end_clip);
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip);
break;
}
}

Binary file not shown.

View file

@ -10,8 +10,8 @@
#extension GL_GOOGLE_include_directive : enable
#extension GL_EXT_nonuniform_qualifier : enable
#include "setup.h"
#include "mem.h"
#include "setup.h"
#define CHUNK 8
#define CHUNK_DY (TILE_HEIGHT_PX / CHUNK)
@ -37,16 +37,16 @@ layout(set = 0, binding = 3) uniform sampler2D textures[];
#define CLIP_LINK_OFFSET (TILE_WIDTH_PX * TILE_HEIGHT_PX)
#define CLIP_BUF_SIZE (CLIP_LINK_OFFSET + 1)
shared Alloc sh_clip_alloc;
shared MallocResult sh_clip_alloc;
// Allocate a scratch buffer for clipping.
Alloc alloc_clip_buf(uint link) {
MallocResult alloc_clip_buf(uint link) {
if (gl_LocalInvocationID.x == 0 && gl_LocalInvocationID.y == 0) {
Alloc alloc = malloc(CLIP_BUF_SIZE * 4);
if (!alloc.failed) {
memory[(alloc.offset >> 2) + CLIP_LINK_OFFSET] = link;
MallocResult m = malloc(CLIP_BUF_SIZE * 4);
if (!m.failed) {
write_mem(m.alloc, (m.alloc.offset >> 2) + CLIP_LINK_OFFSET, link);
}
sh_clip_alloc = alloc;
sh_clip_alloc = m;
}
barrier();
return sh_clip_alloc;
@ -59,7 +59,7 @@ float[CHUNK] computeArea(vec2 xy, int backdrop, uint tile_ref) {
for (uint k = 0; k < CHUNK; k++) area[k] = float(backdrop);
TileSegRef tile_seg_ref = TileSegRef(tile_ref);
do {
TileSeg seg = TileSeg_read(tile_seg_ref);
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size), tile_seg_ref);
for (uint k = 0; k < CHUNK; k++) {
vec2 my_xy = vec2(xy.x, xy.y + float(k * CHUNK_DY));
vec2 start = seg.origin - my_xy;
@ -87,12 +87,13 @@ float[CHUNK] computeArea(vec2 xy, int backdrop, uint tile_ref) {
}
void main() {
if (mem_overflow) {
if (mem_error != NO_ERROR) {
return;
}
uint tile_ix = gl_WorkGroupID.y * conf.width_in_tiles + gl_WorkGroupID.x;
CmdRef cmd_ref = CmdRef(conf.ptcl_base + tile_ix * PTCL_INITIAL_ALLOC);
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
CmdRef cmd_ref = CmdRef(cmd_alloc.offset);
uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
vec2 xy = vec2(xy_uint);
@ -101,7 +102,7 @@ void main() {
uint blend_stack[BLEND_STACK_SIZE][CHUNK];
uint blend_spill = 0;
uint blend_sp = 0;
uint clip_tos = 0;
Alloc clip_tos = new_alloc(0, 0);
for (uint i = 0; i < CHUNK; i++) {
rgb[i] = vec3(0.5);
if (xy_uint.x < 1024 && xy_uint.y < 1024) {
@ -111,13 +112,13 @@ void main() {
}
while (true) {
uint tag = Cmd_tag(cmd_ref);
uint tag = Cmd_tag(cmd_alloc, cmd_ref);
if (tag == Cmd_End) {
break;
}
switch (tag) {
case Cmd_Circle:
CmdCircle circle = Cmd_Circle_read(cmd_ref);
CmdCircle circle = Cmd_Circle_read(cmd_alloc, cmd_ref);
vec4 fg_rgba = unpackUnorm4x8(circle.rgba_color).wzyx;
for (uint i = 0; i < CHUNK; i++) {
float dy = float(i * CHUNK_DY);
@ -129,12 +130,12 @@ void main() {
break;
case Cmd_Stroke:
// Calculate distance field from all the line segments in this tile.
CmdStroke stroke = Cmd_Stroke_read(cmd_ref);
CmdStroke stroke = Cmd_Stroke_read(cmd_alloc, cmd_ref);
float df[CHUNK];
for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
do {
TileSeg seg = TileSeg_read(tile_seg_ref);
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size), tile_seg_ref);
vec2 line_vec = seg.vector;
for (uint k = 0; k < CHUNK; k++) {
vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin;
@ -151,7 +152,7 @@ void main() {
}
break;
case Cmd_Fill:
CmdFill fill = Cmd_Fill_read(cmd_ref);
CmdFill fill = Cmd_Fill_read(cmd_alloc, cmd_ref);
float area[CHUNK];
area = computeArea(xy, fill.backdrop, fill.tile_ref);
fg_rgba = unpackUnorm4x8(fill.rgba_color).wzyx;
@ -164,25 +165,25 @@ void main() {
uint blend_slot = blend_sp % BLEND_STACK_SIZE;
if (blend_sp == blend_spill + BLEND_STACK_SIZE) {
// spill to scratch buffer
Alloc alloc = alloc_clip_buf(clip_tos);
if (alloc.failed) {
MallocResult m = alloc_clip_buf(clip_tos.offset);
if (m.failed) {
return;
}
clip_tos = alloc.offset;
uint base_ix = (clip_tos >> 2) + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
clip_tos = m.alloc;
uint base_ix = (clip_tos.offset >> 2) + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
for (uint k = 0; k < CHUNK; k++) {
memory[base_ix + k * TILE_WIDTH_PX * CHUNK_DY] = blend_stack[blend_slot][k];
write_mem(clip_tos, base_ix + k * TILE_WIDTH_PX * CHUNK_DY, blend_stack[blend_slot][k]);
}
blend_spill++;
}
if (tag == Cmd_BeginClip) {
CmdBeginClip begin_clip = Cmd_BeginClip_read(cmd_ref);
CmdBeginClip begin_clip = Cmd_BeginClip_read(cmd_alloc, cmd_ref);
area = computeArea(xy, begin_clip.backdrop, begin_clip.tile_ref);
for (uint k = 0; k < CHUNK; k++) {
blend_stack[blend_slot][k] = packUnorm4x8(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0)));
}
} else {
CmdBeginSolidClip begin_solid_clip = Cmd_BeginSolidClip_read(cmd_ref);
CmdBeginSolidClip begin_solid_clip = Cmd_BeginSolidClip_read(cmd_alloc, cmd_ref);
float solid_alpha = begin_solid_clip.alpha;
for (uint k = 0; k < CHUNK; k++) {
blend_stack[blend_slot][k] = packUnorm4x8(vec4(rgb[k], solid_alpha));
@ -191,14 +192,14 @@ void main() {
blend_sp++;
break;
case Cmd_EndClip:
CmdEndClip end_clip = Cmd_EndClip_read(cmd_ref);
CmdEndClip end_clip = Cmd_EndClip_read(cmd_alloc, cmd_ref);
blend_slot = (blend_sp - 1) % BLEND_STACK_SIZE;
if (blend_sp == blend_spill) {
uint base_ix = (clip_tos >> 2) + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
uint base_ix = (clip_tos.offset >> 2) + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
for (uint k = 0; k < CHUNK; k++) {
blend_stack[blend_slot][k] = memory[base_ix + k * TILE_WIDTH_PX * CHUNK_DY];
blend_stack[blend_slot][k] = read_mem(clip_tos, base_ix + k * TILE_WIDTH_PX * CHUNK_DY);
}
clip_tos = memory[(clip_tos >> 2) + CLIP_LINK_OFFSET];
clip_tos.offset = read_mem(clip_tos, (clip_tos.offset >> 2) + CLIP_LINK_OFFSET);
blend_spill--;
}
blend_sp--;
@ -208,20 +209,21 @@ void main() {
}
break;
case Cmd_Solid:
CmdSolid solid = Cmd_Solid_read(cmd_ref);
CmdSolid solid = Cmd_Solid_read(cmd_alloc, cmd_ref);
fg_rgba = unpackUnorm4x8(solid.rgba_color).wzyx;
for (uint k = 0; k < CHUNK; k++) {
rgb[k] = mix(rgb[k], fg_rgba.rgb, mask[k] * fg_rgba.a);
}
break;
case Cmd_SolidMask:
CmdSolidMask solid_mask = Cmd_SolidMask_read(cmd_ref);
CmdSolidMask solid_mask = Cmd_SolidMask_read(cmd_alloc, cmd_ref);
for (uint k = 0; k < CHUNK; k++) {
mask[k] = solid_mask.mask;
}
break;
case Cmd_Jump:
cmd_ref = CmdRef(Cmd_Jump_read(cmd_ref).new_ref);
cmd_ref = CmdRef(Cmd_Jump_read(cmd_alloc, cmd_ref).new_ref);
cmd_alloc.offset = cmd_ref.offset;
continue;
}
cmd_ref.offset += Cmd_size;

Binary file not shown.

View file

@ -3,27 +3,118 @@
layout(set = 0, binding = 0) buffer Memory {
// offset into memory of the next allocation, initialized by the user.
uint mem_offset;
bool mem_overflow;
// mem_error tracks the status of memory accesses, initialized to NO_ERROR
// by the user. ERR_MALLOC_FAILED is reported for insufficient memory.
// If MEM_DEBUG is defined the following errors are reported:
// - ERR_OUT_OF_BOUNDS is reported for out of bounds writes.
// - ERR_UNALIGNED_ACCESS for memory access not aligned to 32-bit words.
uint mem_error;
uint[] memory;
};
// Uncomment this line to add the size field to Alloc and enable memory checks.
// Note that the Config struct in setup.h grows size fields as well.
//#define MEM_DEBUG
#define NO_ERROR 0
#define ERR_MALLOC_FAILED 1
#define ERR_OUT_OF_BOUNDS 2
#define ERR_UNALIGNED_ACCESS 3
#define Alloc_size 8
// Alloc represents a memory allocation.
struct Alloc {
// offset in bytes into memory.
uint offset;
#ifdef MEM_DEBUG
// size in bytes of the allocation.
uint size;
#endif
};
struct MallocResult {
Alloc alloc;
// failed is true if the allocation overflowed memory.
bool failed;
};
// malloc allocates size bytes of memory.
Alloc malloc(uint size) {
// new_alloc synthesizes an Alloc when its offset and size are derived.
Alloc new_alloc(uint offset, uint size) {
Alloc a;
// Round up to nearest 32-bit word.
size = (size + 3) & ~3;
a.offset = atomicAdd(mem_offset, size);
a.failed = a.offset + size > memory.length() * 4;
if (a.failed) {
mem_overflow = true;
}
a.offset = offset;
#ifdef MEM_DEBUG
a.size = size;
#endif
return a;
}
// malloc allocates size bytes of memory.
MallocResult malloc(uint size) {
MallocResult r;
r.failed = false;
uint offset = atomicAdd(mem_offset, size);
r.alloc = new_alloc(offset, size);
if (offset + size > memory.length() * 4) {
r.failed = true;
atomicMax(mem_error, ERR_MALLOC_FAILED);
return r;
}
#ifdef MEM_DEBUG
if ((size & 3) != 0) {
r.failed = true;
atomicMax(mem_error, ERR_UNALIGNED_ACCESS);
return r;
}
#endif
return r;
}
// touch_mem checks whether access to the memory word at offset is valid.
// If MEM_DEBUG is defined, touch_mem returns false if offset is out of bounds.
// Offset is in words.
bool touch_mem(Alloc alloc, uint offset) {
#ifdef MEM_DEBUG
if (offset < alloc.offset/4 || offset >= (alloc.offset + alloc.size)/4) {
atomicMax(mem_error, ERR_OUT_OF_BOUNDS);
return false;
}
#endif
return true;
}
// write_mem writes val to memory at offset.
// Offset is in words.
void write_mem(Alloc alloc, uint offset, uint val) {
if (!touch_mem(alloc, offset)) {
return;
}
memory[offset] = val;
}
// read_mem reads the value from memory at offset.
// Offset is in words.
uint read_mem(Alloc alloc, uint offset) {
if (!touch_mem(alloc, offset)) {
return 0;
}
uint v = memory[offset];
return v;
}
// slice_mem returns a sub-allocation inside another. Offset and size are in
// bytes, relative to a.offset.
Alloc slice_mem(Alloc a, uint offset, uint size) {
#ifdef MEM_DEBUG
if ((offset & 3) != 0 || (size & 3) != 0) {
atomicMax(mem_error, ERR_UNALIGNED_ACCESS);
return Alloc(0, 0);
}
if (offset + size > a.size) {
// slice_mem is sometimes used for slices outside bounds,
// but never written.
return Alloc(0, 0);
}
#endif
return new_alloc(a.offset + offset, size);
}

View file

@ -7,8 +7,8 @@
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "setup.h"
#include "mem.h"
#include "setup.h"
#define LG_COARSE_WG 5
#define COARSE_WG (1 << LG_COARSE_WG)
@ -87,21 +87,21 @@ SubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol) {
}
void main() {
if (mem_overflow) {
if (mem_error != NO_ERROR) {
return;
}
uint element_ix = gl_GlobalInvocationID.x;
PathSegRef ref = PathSegRef(conf.pathseg_base + element_ix * PathSeg_size);
PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size);
uint tag = PathSeg_Nop;
if (element_ix < conf.n_pathseg) {
tag = PathSeg_tag(ref);
tag = PathSeg_tag(conf.pathseg_alloc, ref);
}
switch (tag) {
case PathSeg_FillCubic:
case PathSeg_StrokeCubic:
PathStrokeCubic cubic = PathSeg_StrokeCubic_read(ref);
PathStrokeCubic cubic = PathSeg_StrokeCubic_read(conf.pathseg_alloc, ref);
vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
float err = err_v.x * err_v.x + err_v.y * err_v.y;
// The number of quadratics.
@ -123,7 +123,8 @@ void main() {
uint n = max(uint(ceil(val * 0.5 / sqrt(REM_ACCURACY))), 1);
uint path_ix = cubic.path_ix;
Path path = Path_read(PathRef(conf.tile_base + path_ix * Path_size));
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size);
ivec4 bbox = ivec4(path.bbox);
vec2 p0 = cubic.p0;
qp0 = cubic.p0;
@ -182,11 +183,11 @@ void main() {
// TODO: can be tighter, use c to bound width
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
// Consider using subgroups to aggregate atomic add.
Alloc tile_alloc = malloc(n_tile_alloc * TileSeg_size);
MallocResult tile_alloc = malloc(n_tile_alloc * TileSeg_size);
if (tile_alloc.failed) {
return;
}
uint tile_offset = tile_alloc.offset;
uint tile_offset = tile_alloc.alloc.offset;
TileSeg tile_seg;
@ -204,7 +205,9 @@ void main() {
int backdrop = p1.y < p0.y ? 1 : -1;
TileRef tile_ref = Tile_index(path.tiles, uint(base + xbackdrop));
uint tile_el = tile_ref.offset >> 2;
atomicAdd(memory[tile_el + 1], backdrop);
if (touch_mem(path_alloc, tile_el + 1)) {
atomicAdd(memory[tile_el + 1], backdrop);
}
}
// next_xray is the xray for the next scanline; the line segment intersects
@ -225,9 +228,12 @@ void main() {
for (int x = xx0; x < xx1; x++) {
float tile_x0 = float(x * TILE_WIDTH_PX);
TileRef tile_ref = Tile_index(path.tiles, uint(base + x));
TileRef tile_ref = Tile_index(TileRef(path.tiles.offset), uint(base + x));
uint tile_el = tile_ref.offset >> 2;
uint old = atomicExchange(memory[tile_el], tile_offset);
uint old = 0;
if (touch_mem(path_alloc, tile_el)) {
old = atomicExchange(memory[tile_el], tile_offset);
}
tile_seg.origin = p0;
tile_seg.vector = p1 - p0;
float y_edge = 0.0;
@ -254,7 +260,7 @@ void main() {
}
tile_seg.y_edge = y_edge;
tile_seg.next.offset = old;
TileSeg_write(TileSegRef(tile_offset), tile_seg);
TileSeg_write(tile_alloc.alloc, TileSegRef(tile_offset), tile_seg);
tile_offset += TileSeg_size;
}
xc += b;

Binary file not shown.

View file

@ -87,13 +87,13 @@ PathSegRef PathSeg_index(PathSegRef ref, uint index) {
return PathSegRef(ref.offset + index * PathSeg_size);
}
PathFillLine PathFillLine_read(PathFillLineRef ref) {
PathFillLine PathFillLine_read(Alloc a, PathFillLineRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw2 = memory[ix + 2];
uint raw3 = memory[ix + 3];
uint raw4 = memory[ix + 4];
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
uint raw4 = read_mem(a, ix + 4);
PathFillLine s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
@ -101,24 +101,24 @@ PathFillLine PathFillLine_read(PathFillLineRef ref) {
return s;
}
void PathFillLine_write(PathFillLineRef ref, PathFillLine s) {
void PathFillLine_write(Alloc a, PathFillLineRef ref, PathFillLine s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = floatBitsToUint(s.p0.x);
memory[ix + 1] = floatBitsToUint(s.p0.y);
memory[ix + 2] = floatBitsToUint(s.p1.x);
memory[ix + 3] = floatBitsToUint(s.p1.y);
memory[ix + 4] = s.path_ix;
write_mem(a, ix + 0, floatBitsToUint(s.p0.x));
write_mem(a, ix + 1, floatBitsToUint(s.p0.y));
write_mem(a, ix + 2, floatBitsToUint(s.p1.x));
write_mem(a, ix + 3, floatBitsToUint(s.p1.y));
write_mem(a, ix + 4, s.path_ix);
}
PathStrokeLine PathStrokeLine_read(PathStrokeLineRef ref) {
PathStrokeLine PathStrokeLine_read(Alloc a, PathStrokeLineRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw2 = memory[ix + 2];
uint raw3 = memory[ix + 3];
uint raw4 = memory[ix + 4];
uint raw5 = memory[ix + 5];
uint raw6 = memory[ix + 6];
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
uint raw4 = read_mem(a, ix + 4);
uint raw5 = read_mem(a, ix + 5);
uint raw6 = read_mem(a, ix + 6);
PathStrokeLine s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
@ -127,28 +127,28 @@ PathStrokeLine PathStrokeLine_read(PathStrokeLineRef ref) {
return s;
}
void PathStrokeLine_write(PathStrokeLineRef ref, PathStrokeLine s) {
void PathStrokeLine_write(Alloc a, PathStrokeLineRef ref, PathStrokeLine s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = floatBitsToUint(s.p0.x);
memory[ix + 1] = floatBitsToUint(s.p0.y);
memory[ix + 2] = floatBitsToUint(s.p1.x);
memory[ix + 3] = floatBitsToUint(s.p1.y);
memory[ix + 4] = s.path_ix;
memory[ix + 5] = floatBitsToUint(s.stroke.x);
memory[ix + 6] = floatBitsToUint(s.stroke.y);
write_mem(a, ix + 0, floatBitsToUint(s.p0.x));
write_mem(a, ix + 1, floatBitsToUint(s.p0.y));
write_mem(a, ix + 2, floatBitsToUint(s.p1.x));
write_mem(a, ix + 3, floatBitsToUint(s.p1.y));
write_mem(a, ix + 4, s.path_ix);
write_mem(a, ix + 5, floatBitsToUint(s.stroke.x));
write_mem(a, ix + 6, floatBitsToUint(s.stroke.y));
}
PathFillCubic PathFillCubic_read(PathFillCubicRef ref) {
PathFillCubic PathFillCubic_read(Alloc a, PathFillCubicRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw2 = memory[ix + 2];
uint raw3 = memory[ix + 3];
uint raw4 = memory[ix + 4];
uint raw5 = memory[ix + 5];
uint raw6 = memory[ix + 6];
uint raw7 = memory[ix + 7];
uint raw8 = memory[ix + 8];
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
uint raw4 = read_mem(a, ix + 4);
uint raw5 = read_mem(a, ix + 5);
uint raw6 = read_mem(a, ix + 6);
uint raw7 = read_mem(a, ix + 7);
uint raw8 = read_mem(a, ix + 8);
PathFillCubic s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
@ -158,32 +158,32 @@ PathFillCubic PathFillCubic_read(PathFillCubicRef ref) {
return s;
}
void PathFillCubic_write(PathFillCubicRef ref, PathFillCubic s) {
void PathFillCubic_write(Alloc a, PathFillCubicRef ref, PathFillCubic s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = floatBitsToUint(s.p0.x);
memory[ix + 1] = floatBitsToUint(s.p0.y);
memory[ix + 2] = floatBitsToUint(s.p1.x);
memory[ix + 3] = floatBitsToUint(s.p1.y);
memory[ix + 4] = floatBitsToUint(s.p2.x);
memory[ix + 5] = floatBitsToUint(s.p2.y);
memory[ix + 6] = floatBitsToUint(s.p3.x);
memory[ix + 7] = floatBitsToUint(s.p3.y);
memory[ix + 8] = s.path_ix;
write_mem(a, ix + 0, floatBitsToUint(s.p0.x));
write_mem(a, ix + 1, floatBitsToUint(s.p0.y));
write_mem(a, ix + 2, floatBitsToUint(s.p1.x));
write_mem(a, ix + 3, floatBitsToUint(s.p1.y));
write_mem(a, ix + 4, floatBitsToUint(s.p2.x));
write_mem(a, ix + 5, floatBitsToUint(s.p2.y));
write_mem(a, ix + 6, floatBitsToUint(s.p3.x));
write_mem(a, ix + 7, floatBitsToUint(s.p3.y));
write_mem(a, ix + 8, s.path_ix);
}
PathStrokeCubic PathStrokeCubic_read(PathStrokeCubicRef ref) {
PathStrokeCubic PathStrokeCubic_read(Alloc a, PathStrokeCubicRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw2 = memory[ix + 2];
uint raw3 = memory[ix + 3];
uint raw4 = memory[ix + 4];
uint raw5 = memory[ix + 5];
uint raw6 = memory[ix + 6];
uint raw7 = memory[ix + 7];
uint raw8 = memory[ix + 8];
uint raw9 = memory[ix + 9];
uint raw10 = memory[ix + 10];
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
uint raw4 = read_mem(a, ix + 4);
uint raw5 = read_mem(a, ix + 5);
uint raw6 = read_mem(a, ix + 6);
uint raw7 = read_mem(a, ix + 7);
uint raw8 = read_mem(a, ix + 8);
uint raw9 = read_mem(a, ix + 9);
uint raw10 = read_mem(a, ix + 10);
PathStrokeCubic s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
@ -194,62 +194,62 @@ PathStrokeCubic PathStrokeCubic_read(PathStrokeCubicRef ref) {
return s;
}
void PathStrokeCubic_write(PathStrokeCubicRef ref, PathStrokeCubic s) {
void PathStrokeCubic_write(Alloc a, PathStrokeCubicRef ref, PathStrokeCubic s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = floatBitsToUint(s.p0.x);
memory[ix + 1] = floatBitsToUint(s.p0.y);
memory[ix + 2] = floatBitsToUint(s.p1.x);
memory[ix + 3] = floatBitsToUint(s.p1.y);
memory[ix + 4] = floatBitsToUint(s.p2.x);
memory[ix + 5] = floatBitsToUint(s.p2.y);
memory[ix + 6] = floatBitsToUint(s.p3.x);
memory[ix + 7] = floatBitsToUint(s.p3.y);
memory[ix + 8] = s.path_ix;
memory[ix + 9] = floatBitsToUint(s.stroke.x);
memory[ix + 10] = floatBitsToUint(s.stroke.y);
write_mem(a, ix + 0, floatBitsToUint(s.p0.x));
write_mem(a, ix + 1, floatBitsToUint(s.p0.y));
write_mem(a, ix + 2, floatBitsToUint(s.p1.x));
write_mem(a, ix + 3, floatBitsToUint(s.p1.y));
write_mem(a, ix + 4, floatBitsToUint(s.p2.x));
write_mem(a, ix + 5, floatBitsToUint(s.p2.y));
write_mem(a, ix + 6, floatBitsToUint(s.p3.x));
write_mem(a, ix + 7, floatBitsToUint(s.p3.y));
write_mem(a, ix + 8, s.path_ix);
write_mem(a, ix + 9, floatBitsToUint(s.stroke.x));
write_mem(a, ix + 10, floatBitsToUint(s.stroke.y));
}
uint PathSeg_tag(PathSegRef ref) {
return memory[ref.offset >> 2];
uint PathSeg_tag(Alloc a, PathSegRef ref) {
return read_mem(a, ref.offset >> 2);
}
PathFillLine PathSeg_FillLine_read(PathSegRef ref) {
return PathFillLine_read(PathFillLineRef(ref.offset + 4));
PathFillLine PathSeg_FillLine_read(Alloc a, PathSegRef ref) {
return PathFillLine_read(a, PathFillLineRef(ref.offset + 4));
}
PathStrokeLine PathSeg_StrokeLine_read(PathSegRef ref) {
return PathStrokeLine_read(PathStrokeLineRef(ref.offset + 4));
PathStrokeLine PathSeg_StrokeLine_read(Alloc a, PathSegRef ref) {
return PathStrokeLine_read(a, PathStrokeLineRef(ref.offset + 4));
}
PathFillCubic PathSeg_FillCubic_read(PathSegRef ref) {
return PathFillCubic_read(PathFillCubicRef(ref.offset + 4));
PathFillCubic PathSeg_FillCubic_read(Alloc a, PathSegRef ref) {
return PathFillCubic_read(a, PathFillCubicRef(ref.offset + 4));
}
PathStrokeCubic PathSeg_StrokeCubic_read(PathSegRef ref) {
return PathStrokeCubic_read(PathStrokeCubicRef(ref.offset + 4));
PathStrokeCubic PathSeg_StrokeCubic_read(Alloc a, PathSegRef ref) {
return PathStrokeCubic_read(a, PathStrokeCubicRef(ref.offset + 4));
}
void PathSeg_Nop_write(PathSegRef ref) {
memory[ref.offset >> 2] = PathSeg_Nop;
void PathSeg_Nop_write(Alloc a, PathSegRef ref) {
write_mem(a, ref.offset >> 2, PathSeg_Nop);
}
void PathSeg_FillLine_write(PathSegRef ref, PathFillLine s) {
memory[ref.offset >> 2] = PathSeg_FillLine;
PathFillLine_write(PathFillLineRef(ref.offset + 4), s);
void PathSeg_FillLine_write(Alloc a, PathSegRef ref, PathFillLine s) {
write_mem(a, ref.offset >> 2, PathSeg_FillLine);
PathFillLine_write(a, PathFillLineRef(ref.offset + 4), s);
}
void PathSeg_StrokeLine_write(PathSegRef ref, PathStrokeLine s) {
memory[ref.offset >> 2] = PathSeg_StrokeLine;
PathStrokeLine_write(PathStrokeLineRef(ref.offset + 4), s);
void PathSeg_StrokeLine_write(Alloc a, PathSegRef ref, PathStrokeLine s) {
write_mem(a, ref.offset >> 2, PathSeg_StrokeLine);
PathStrokeLine_write(a, PathStrokeLineRef(ref.offset + 4), s);
}
void PathSeg_FillCubic_write(PathSegRef ref, PathFillCubic s) {
memory[ref.offset >> 2] = PathSeg_FillCubic;
PathFillCubic_write(PathFillCubicRef(ref.offset + 4), s);
void PathSeg_FillCubic_write(Alloc a, PathSegRef ref, PathFillCubic s) {
write_mem(a, ref.offset >> 2, PathSeg_FillCubic);
PathFillCubic_write(a, PathFillCubicRef(ref.offset + 4), s);
}
void PathSeg_StrokeCubic_write(PathSegRef ref, PathStrokeCubic s) {
memory[ref.offset >> 2] = PathSeg_StrokeCubic;
PathStrokeCubic_write(PathStrokeCubicRef(ref.offset + 4), s);
void PathSeg_StrokeCubic_write(Alloc a, PathSegRef ref, PathStrokeCubic s) {
write_mem(a, ref.offset >> 2, PathSeg_StrokeCubic);
PathStrokeCubic_write(a, PathStrokeCubicRef(ref.offset + 4), s);
}

View file

@ -171,12 +171,12 @@ CmdRef Cmd_index(CmdRef ref, uint index) {
return CmdRef(ref.offset + index * Cmd_size);
}
CmdCircle CmdCircle_read(CmdCircleRef ref) {
CmdCircle CmdCircle_read(Alloc a, CmdCircleRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw2 = memory[ix + 2];
uint raw3 = memory[ix + 3];
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
CmdCircle s;
s.center = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.radius = uintBitsToFloat(raw2);
@ -184,39 +184,39 @@ CmdCircle CmdCircle_read(CmdCircleRef ref) {
return s;
}
void CmdCircle_write(CmdCircleRef ref, CmdCircle s) {
void CmdCircle_write(Alloc a, CmdCircleRef ref, CmdCircle s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = floatBitsToUint(s.center.x);
memory[ix + 1] = floatBitsToUint(s.center.y);
memory[ix + 2] = floatBitsToUint(s.radius);
memory[ix + 3] = s.rgba_color;
write_mem(a, ix + 0, floatBitsToUint(s.center.x));
write_mem(a, ix + 1, floatBitsToUint(s.center.y));
write_mem(a, ix + 2, floatBitsToUint(s.radius));
write_mem(a, ix + 3, s.rgba_color);
}
CmdLine CmdLine_read(CmdLineRef ref) {
CmdLine CmdLine_read(Alloc a, CmdLineRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw2 = memory[ix + 2];
uint raw3 = memory[ix + 3];
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
CmdLine s;
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
return s;
}
void CmdLine_write(CmdLineRef ref, CmdLine s) {
void CmdLine_write(Alloc a, CmdLineRef ref, CmdLine s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = floatBitsToUint(s.start.x);
memory[ix + 1] = floatBitsToUint(s.start.y);
memory[ix + 2] = floatBitsToUint(s.end.x);
memory[ix + 3] = floatBitsToUint(s.end.y);
write_mem(a, ix + 0, floatBitsToUint(s.start.x));
write_mem(a, ix + 1, floatBitsToUint(s.start.y));
write_mem(a, ix + 2, floatBitsToUint(s.end.x));
write_mem(a, ix + 3, floatBitsToUint(s.end.y));
}
CmdStroke CmdStroke_read(CmdStrokeRef ref) {
CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw2 = memory[ix + 2];
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
CmdStroke s;
s.tile_ref = raw0;
s.half_width = uintBitsToFloat(raw1);
@ -224,18 +224,18 @@ CmdStroke CmdStroke_read(CmdStrokeRef ref) {
return s;
}
void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) {
void CmdStroke_write(Alloc a, CmdStrokeRef ref, CmdStroke s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = s.tile_ref;
memory[ix + 1] = floatBitsToUint(s.half_width);
memory[ix + 2] = s.rgba_color;
write_mem(a, ix + 0, s.tile_ref);
write_mem(a, ix + 1, floatBitsToUint(s.half_width));
write_mem(a, ix + 2, s.rgba_color);
}
CmdFill CmdFill_read(CmdFillRef ref) {
CmdFill CmdFill_read(Alloc a, CmdFillRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw2 = memory[ix + 2];
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
CmdFill s;
s.tile_ref = raw0;
s.backdrop = int(raw1);
@ -243,189 +243,189 @@ CmdFill CmdFill_read(CmdFillRef ref) {
return s;
}
void CmdFill_write(CmdFillRef ref, CmdFill s) {
void CmdFill_write(Alloc a, CmdFillRef ref, CmdFill s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = s.tile_ref;
memory[ix + 1] = uint(s.backdrop);
memory[ix + 2] = s.rgba_color;
write_mem(a, ix + 0, s.tile_ref);
write_mem(a, ix + 1, uint(s.backdrop));
write_mem(a, ix + 2, s.rgba_color);
}
CmdBeginClip CmdBeginClip_read(CmdBeginClipRef ref) {
CmdBeginClip CmdBeginClip_read(Alloc a, CmdBeginClipRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
CmdBeginClip s;
s.tile_ref = raw0;
s.backdrop = int(raw1);
return s;
}
void CmdBeginClip_write(CmdBeginClipRef ref, CmdBeginClip s) {
void CmdBeginClip_write(Alloc a, CmdBeginClipRef ref, CmdBeginClip s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = s.tile_ref;
memory[ix + 1] = uint(s.backdrop);
write_mem(a, ix + 0, s.tile_ref);
write_mem(a, ix + 1, uint(s.backdrop));
}
CmdBeginSolidClip CmdBeginSolidClip_read(CmdBeginSolidClipRef ref) {
CmdBeginSolidClip CmdBeginSolidClip_read(Alloc a, CmdBeginSolidClipRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw0 = read_mem(a, ix + 0);
CmdBeginSolidClip s;
s.alpha = uintBitsToFloat(raw0);
return s;
}
void CmdBeginSolidClip_write(CmdBeginSolidClipRef ref, CmdBeginSolidClip s) {
void CmdBeginSolidClip_write(Alloc a, CmdBeginSolidClipRef ref, CmdBeginSolidClip s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = floatBitsToUint(s.alpha);
write_mem(a, ix + 0, floatBitsToUint(s.alpha));
}
CmdEndClip CmdEndClip_read(CmdEndClipRef ref) {
CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw0 = read_mem(a, ix + 0);
CmdEndClip s;
s.alpha = uintBitsToFloat(raw0);
return s;
}
void CmdEndClip_write(CmdEndClipRef ref, CmdEndClip s) {
void CmdEndClip_write(Alloc a, CmdEndClipRef ref, CmdEndClip s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = floatBitsToUint(s.alpha);
write_mem(a, ix + 0, floatBitsToUint(s.alpha));
}
CmdSolid CmdSolid_read(CmdSolidRef ref) {
CmdSolid CmdSolid_read(Alloc a, CmdSolidRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw0 = read_mem(a, ix + 0);
CmdSolid s;
s.rgba_color = raw0;
return s;
}
void CmdSolid_write(CmdSolidRef ref, CmdSolid s) {
void CmdSolid_write(Alloc a, CmdSolidRef ref, CmdSolid s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = s.rgba_color;
write_mem(a, ix + 0, s.rgba_color);
}
CmdSolidMask CmdSolidMask_read(CmdSolidMaskRef ref) {
CmdSolidMask CmdSolidMask_read(Alloc a, CmdSolidMaskRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw0 = read_mem(a, ix + 0);
CmdSolidMask s;
s.mask = uintBitsToFloat(raw0);
return s;
}
void CmdSolidMask_write(CmdSolidMaskRef ref, CmdSolidMask s) {
void CmdSolidMask_write(Alloc a, CmdSolidMaskRef ref, CmdSolidMask s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = floatBitsToUint(s.mask);
write_mem(a, ix + 0, floatBitsToUint(s.mask));
}
CmdJump CmdJump_read(CmdJumpRef ref) {
CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw0 = read_mem(a, ix + 0);
CmdJump s;
s.new_ref = raw0;
return s;
}
void CmdJump_write(CmdJumpRef ref, CmdJump s) {
void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = s.new_ref;
write_mem(a, ix + 0, s.new_ref);
}
uint Cmd_tag(CmdRef ref) {
return memory[ref.offset >> 2];
uint Cmd_tag(Alloc a, CmdRef ref) {
return read_mem(a, ref.offset >> 2);
}
CmdCircle Cmd_Circle_read(CmdRef ref) {
return CmdCircle_read(CmdCircleRef(ref.offset + 4));
CmdCircle Cmd_Circle_read(Alloc a, CmdRef ref) {
return CmdCircle_read(a, CmdCircleRef(ref.offset + 4));
}
CmdLine Cmd_Line_read(CmdRef ref) {
return CmdLine_read(CmdLineRef(ref.offset + 4));
CmdLine Cmd_Line_read(Alloc a, CmdRef ref) {
return CmdLine_read(a, CmdLineRef(ref.offset + 4));
}
CmdFill Cmd_Fill_read(CmdRef ref) {
return CmdFill_read(CmdFillRef(ref.offset + 4));
CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) {
return CmdFill_read(a, CmdFillRef(ref.offset + 4));
}
CmdBeginClip Cmd_BeginClip_read(CmdRef ref) {
return CmdBeginClip_read(CmdBeginClipRef(ref.offset + 4));
CmdBeginClip Cmd_BeginClip_read(Alloc a, CmdRef ref) {
return CmdBeginClip_read(a, CmdBeginClipRef(ref.offset + 4));
}
CmdBeginSolidClip Cmd_BeginSolidClip_read(CmdRef ref) {
return CmdBeginSolidClip_read(CmdBeginSolidClipRef(ref.offset + 4));
CmdBeginSolidClip Cmd_BeginSolidClip_read(Alloc a, CmdRef ref) {
return CmdBeginSolidClip_read(a, CmdBeginSolidClipRef(ref.offset + 4));
}
CmdEndClip Cmd_EndClip_read(CmdRef ref) {
return CmdEndClip_read(CmdEndClipRef(ref.offset + 4));
CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) {
return CmdEndClip_read(a, CmdEndClipRef(ref.offset + 4));
}
CmdStroke Cmd_Stroke_read(CmdRef ref) {
return CmdStroke_read(CmdStrokeRef(ref.offset + 4));
CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) {
return CmdStroke_read(a, CmdStrokeRef(ref.offset + 4));
}
CmdSolid Cmd_Solid_read(CmdRef ref) {
return CmdSolid_read(CmdSolidRef(ref.offset + 4));
CmdSolid Cmd_Solid_read(Alloc a, CmdRef ref) {
return CmdSolid_read(a, CmdSolidRef(ref.offset + 4));
}
CmdSolidMask Cmd_SolidMask_read(CmdRef ref) {
return CmdSolidMask_read(CmdSolidMaskRef(ref.offset + 4));
CmdSolidMask Cmd_SolidMask_read(Alloc a, CmdRef ref) {
return CmdSolidMask_read(a, CmdSolidMaskRef(ref.offset + 4));
}
CmdJump Cmd_Jump_read(CmdRef ref) {
return CmdJump_read(CmdJumpRef(ref.offset + 4));
CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) {
return CmdJump_read(a, CmdJumpRef(ref.offset + 4));
}
void Cmd_End_write(CmdRef ref) {
memory[ref.offset >> 2] = Cmd_End;
void Cmd_End_write(Alloc a, CmdRef ref) {
write_mem(a, ref.offset >> 2, Cmd_End);
}
void Cmd_Circle_write(CmdRef ref, CmdCircle s) {
memory[ref.offset >> 2] = Cmd_Circle;
CmdCircle_write(CmdCircleRef(ref.offset + 4), s);
void Cmd_Circle_write(Alloc a, CmdRef ref, CmdCircle s) {
write_mem(a, ref.offset >> 2, Cmd_Circle);
CmdCircle_write(a, CmdCircleRef(ref.offset + 4), s);
}
void Cmd_Line_write(CmdRef ref, CmdLine s) {
memory[ref.offset >> 2] = Cmd_Line;
CmdLine_write(CmdLineRef(ref.offset + 4), s);
void Cmd_Line_write(Alloc a, CmdRef ref, CmdLine s) {
write_mem(a, ref.offset >> 2, Cmd_Line);
CmdLine_write(a, CmdLineRef(ref.offset + 4), s);
}
void Cmd_Fill_write(CmdRef ref, CmdFill s) {
memory[ref.offset >> 2] = Cmd_Fill;
CmdFill_write(CmdFillRef(ref.offset + 4), s);
void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s) {
write_mem(a, ref.offset >> 2, Cmd_Fill);
CmdFill_write(a, CmdFillRef(ref.offset + 4), s);
}
void Cmd_BeginClip_write(CmdRef ref, CmdBeginClip s) {
memory[ref.offset >> 2] = Cmd_BeginClip;
CmdBeginClip_write(CmdBeginClipRef(ref.offset + 4), s);
void Cmd_BeginClip_write(Alloc a, CmdRef ref, CmdBeginClip s) {
write_mem(a, ref.offset >> 2, Cmd_BeginClip);
CmdBeginClip_write(a, CmdBeginClipRef(ref.offset + 4), s);
}
void Cmd_BeginSolidClip_write(CmdRef ref, CmdBeginSolidClip s) {
memory[ref.offset >> 2] = Cmd_BeginSolidClip;
CmdBeginSolidClip_write(CmdBeginSolidClipRef(ref.offset + 4), s);
void Cmd_BeginSolidClip_write(Alloc a, CmdRef ref, CmdBeginSolidClip s) {
write_mem(a, ref.offset >> 2, Cmd_BeginSolidClip);
CmdBeginSolidClip_write(a, CmdBeginSolidClipRef(ref.offset + 4), s);
}
void Cmd_EndClip_write(CmdRef ref, CmdEndClip s) {
memory[ref.offset >> 2] = Cmd_EndClip;
CmdEndClip_write(CmdEndClipRef(ref.offset + 4), s);
void Cmd_EndClip_write(Alloc a, CmdRef ref, CmdEndClip s) {
write_mem(a, ref.offset >> 2, Cmd_EndClip);
CmdEndClip_write(a, CmdEndClipRef(ref.offset + 4), s);
}
void Cmd_Stroke_write(CmdRef ref, CmdStroke s) {
memory[ref.offset >> 2] = Cmd_Stroke;
CmdStroke_write(CmdStrokeRef(ref.offset + 4), s);
void Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s) {
write_mem(a, ref.offset >> 2, Cmd_Stroke);
CmdStroke_write(a, CmdStrokeRef(ref.offset + 4), s);
}
void Cmd_Solid_write(CmdRef ref, CmdSolid s) {
memory[ref.offset >> 2] = Cmd_Solid;
CmdSolid_write(CmdSolidRef(ref.offset + 4), s);
void Cmd_Solid_write(Alloc a, CmdRef ref, CmdSolid s) {
write_mem(a, ref.offset >> 2, Cmd_Solid);
CmdSolid_write(a, CmdSolidRef(ref.offset + 4), s);
}
void Cmd_SolidMask_write(CmdRef ref, CmdSolidMask s) {
memory[ref.offset >> 2] = Cmd_SolidMask;
CmdSolidMask_write(CmdSolidMaskRef(ref.offset + 4), s);
void Cmd_SolidMask_write(Alloc a, CmdRef ref, CmdSolidMask s) {
write_mem(a, ref.offset >> 2, Cmd_SolidMask);
CmdSolidMask_write(a, CmdSolidMaskRef(ref.offset + 4), s);
}
void Cmd_Jump_write(CmdRef ref, CmdJump s) {
memory[ref.offset >> 2] = Cmd_Jump;
CmdJump_write(CmdJumpRef(ref.offset + 4), s);
void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s) {
write_mem(a, ref.offset >> 2, Cmd_Jump);
CmdJump_write(a, CmdJumpRef(ref.offset + 4), s);
}

View file

@ -30,9 +30,9 @@ struct Config {
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
uint tile_base;
uint bin_base;
uint ptcl_base;
uint pathseg_base;
uint anno_base;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
};

View file

@ -49,48 +49,48 @@ TileSegRef TileSeg_index(TileSegRef ref, uint index) {
return TileSegRef(ref.offset + index * TileSeg_size);
}
Path Path_read(PathRef ref) {
Path Path_read(Alloc a, PathRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw2 = memory[ix + 2];
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
Path s;
s.bbox = uvec4(raw0 & 0xffff, raw0 >> 16, raw1 & 0xffff, raw1 >> 16);
s.tiles = TileRef(raw2);
return s;
}
void Path_write(PathRef ref, Path s) {
void Path_write(Alloc a, PathRef ref, Path s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = s.bbox.x | (s.bbox.y << 16);
memory[ix + 1] = s.bbox.z | (s.bbox.w << 16);
memory[ix + 2] = s.tiles.offset;
write_mem(a, ix + 0, s.bbox.x | (s.bbox.y << 16));
write_mem(a, ix + 1, s.bbox.z | (s.bbox.w << 16));
write_mem(a, ix + 2, s.tiles.offset);
}
Tile Tile_read(TileRef ref) {
Tile Tile_read(Alloc a, TileRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
Tile s;
s.tile = TileSegRef(raw0);
s.backdrop = int(raw1);
return s;
}
void Tile_write(TileRef ref, Tile s) {
void Tile_write(Alloc a, TileRef ref, Tile s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = s.tile.offset;
memory[ix + 1] = uint(s.backdrop);
write_mem(a, ix + 0, s.tile.offset);
write_mem(a, ix + 1, uint(s.backdrop));
}
TileSeg TileSeg_read(TileSegRef ref) {
TileSeg TileSeg_read(Alloc a, TileSegRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = memory[ix + 0];
uint raw1 = memory[ix + 1];
uint raw2 = memory[ix + 2];
uint raw3 = memory[ix + 3];
uint raw4 = memory[ix + 4];
uint raw5 = memory[ix + 5];
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
uint raw4 = read_mem(a, ix + 4);
uint raw5 = read_mem(a, ix + 5);
TileSeg s;
s.origin = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.vector = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
@ -99,13 +99,13 @@ TileSeg TileSeg_read(TileSegRef ref) {
return s;
}
void TileSeg_write(TileSegRef ref, TileSeg s) {
void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s) {
uint ix = ref.offset >> 2;
memory[ix + 0] = floatBitsToUint(s.origin.x);
memory[ix + 1] = floatBitsToUint(s.origin.y);
memory[ix + 2] = floatBitsToUint(s.vector.x);
memory[ix + 3] = floatBitsToUint(s.vector.y);
memory[ix + 4] = floatBitsToUint(s.y_edge);
memory[ix + 5] = s.next.offset;
write_mem(a, ix + 0, floatBitsToUint(s.origin.x));
write_mem(a, ix + 1, floatBitsToUint(s.origin.y));
write_mem(a, ix + 2, floatBitsToUint(s.vector.x));
write_mem(a, ix + 3, floatBitsToUint(s.vector.y));
write_mem(a, ix + 4, floatBitsToUint(s.y_edge));
write_mem(a, ix + 5, s.next.offset);
}

View file

@ -5,8 +5,8 @@
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "setup.h"
#include "mem.h"
#include "setup.h"
#define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
@ -25,21 +25,21 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
#define SY (1.0 / float(TILE_HEIGHT_PX))
shared uint sh_tile_count[TILE_ALLOC_WG];
shared Alloc sh_tile_alloc;
shared MallocResult sh_tile_alloc;
void main() {
if (mem_overflow) {
if (mem_error != NO_ERROR) {
return;
}
uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
PathRef path_ref = PathRef(conf.tile_base + element_ix * Path_size);
AnnotatedRef ref = AnnotatedRef(conf.anno_base + element_ix * Annotated_size);
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
uint tag = Annotated_Nop;
if (element_ix < conf.n_elements) {
tag = Annotated_tag(ref);
tag = Annotated_tag(conf.anno_alloc, ref);
}
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
switch (tag) {
@ -49,7 +49,7 @@ void main() {
case Annotated_EndClip:
// Note: we take advantage of the fact that fills, strokes, and
// clips have compatible layout.
AnnoFill fill = Annotated_Fill_read(ref);
AnnoFill fill = Annotated_Fill_read(conf.anno_alloc, ref);
x0 = int(floor(fill.bbox.x * SX));
y0 = int(floor(fill.bbox.y * SY));
x1 = int(ceil(fill.bbox.z * SX));
@ -71,36 +71,38 @@ void main() {
}
sh_tile_count[th_ix] = tile_count;
uint total_tile_count = tile_count;
// Prefix sum of sh_tile_count
for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) {
barrier();
if (th_ix >= (1 << i)) {
tile_count += sh_tile_count[th_ix - (1 << i)];
total_tile_count += sh_tile_count[th_ix - (1 << i)];
}
barrier();
sh_tile_count[th_ix] = tile_count;
sh_tile_count[th_ix] = total_tile_count;
}
if (th_ix == TILE_ALLOC_WG - 1) {
sh_tile_alloc = malloc(tile_count * Tile_size);
sh_tile_alloc = malloc(total_tile_count * Tile_size);
}
barrier();
Alloc alloc_start = sh_tile_alloc;
MallocResult alloc_start = sh_tile_alloc;
if (alloc_start.failed) {
return;
}
if (element_ix < conf.n_elements) {
uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
path.tiles = TileRef(alloc_start.offset + Tile_size * tile_subix);
Path_write(path_ref, path);
Alloc tiles_alloc = slice_mem(alloc_start.alloc, Tile_size * tile_subix, Tile_size * tile_count);
path.tiles = TileRef(tiles_alloc.offset);
Path_write(conf.tile_alloc, path_ref, path);
}
// Zero out allocated tiles efficiently
uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
uint start_ix = alloc_start.offset >> 2;
uint start_ix = alloc_start.alloc.offset >> 2;
for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
// Note: this interleaving is faster than using Tile_write
// by a significant amount.
memory[start_ix + i] = 0;
write_mem(alloc_start.alloc, start_ix + i, 0);
}
}

Binary file not shown.