Dynamic allocation of intermediate buffers

When the initial allocation is exceeded, do an atomic bump allocation.
This is done for both tilegroup instances and per tile command lists.
This commit is contained in:
Raph Levien 2020-04-25 10:15:22 -07:00
parent e1c0e448ef
commit 55e35dd879
14 changed files with 165 additions and 15 deletions

View file

@ -71,6 +71,11 @@ pub trait CmdBuf<D: Device> {
unsafe fn memory_barrier(&mut self); unsafe fn memory_barrier(&mut self);
/// Clear the buffer.
///
/// This is readily supported in Vulkan, but for portability it is remarkably
/// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute
/// kernel, or organize the code not to need it.
unsafe fn clear_buffer(&self, buffer: &D::Buffer); unsafe fn clear_buffer(&self, buffer: &D::Buffer);
unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer); unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer);

View file

@ -33,6 +33,9 @@ piet_gpu! {
struct CmdSolid { struct CmdSolid {
rgba_color: u32, rgba_color: u32,
} }
struct CmdJump {
new_ref: u32,
}
enum Cmd { enum Cmd {
End, End,
Circle(CmdCircle), Circle(CmdCircle),
@ -42,6 +45,7 @@ piet_gpu! {
FillEdge(CmdFillEdge), FillEdge(CmdFillEdge),
DrawFill(CmdDrawFill), DrawFill(CmdDrawFill),
Solid(CmdSolid), Solid(CmdSolid),
Jump(CmdJump),
Bail, Bail,
} }
} }

View file

@ -10,8 +10,12 @@ piet_gpu! {
// A better type would be Point. // A better type would be Point.
offset: [f32; 2], offset: [f32; 2],
} }
struct Jump {
new_ref: u32,
}
enum TileGroup { enum TileGroup {
Instance(Instance), Instance(Instance),
Jump(Jump),
End, End,
} }
} }

View file

@ -25,6 +25,10 @@ layout(set = 0, binding = 1) buffer TilegroupBuf {
uint[] tilegroup; uint[] tilegroup;
}; };
layout(set = 0, binding = 2) buffer AllocBuf {
uint alloc;
};
#include "scene.h" #include "scene.h"
#include "tilegroup.h" #include "tilegroup.h"
@ -43,6 +47,7 @@ void main() {
uint stack_ix = 0; uint stack_ix = 0;
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x; uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x;
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC); TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC);
uint tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size;
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX); vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX);
PietItemRef root = PietItemRef(0); PietItemRef root = PietItemRef(0);
SimpleGroup group = PietItem_Group_read(root); SimpleGroup group = PietItem_Group_read(root);
@ -62,9 +67,16 @@ void main() {
if (hit && !is_group) { if (hit && !is_group) {
PietItemRef item_ref = PietItem_index(group.items, tos.index); PietItemRef item_ref = PietItem_index(group.items, tos.index);
Instance ins = Instance(item_ref.offset, tos.offset); Instance ins = Instance(item_ref.offset, tos.offset);
if (tg_ref.offset > tg_limit) {
// Allocation exceeded; do atomic bump alloc.
uint new_tg = atomicAdd(alloc, TILEGROUP_INITIAL_ALLOC);
Jump jump = Jump(new_tg);
TileGroup_Jump_write(tg_ref, jump);
tg_ref = TileGroupRef(new_tg);
tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size;
}
TileGroup_Instance_write(tg_ref, ins); TileGroup_Instance_write(tg_ref, ins);
tg_ref.offset += TileGroup_size; tg_ref.offset += TileGroup_size;
// TODO: bump allocate if allocation exceeded
} }
if (is_group) { if (is_group) {
PietItemRef item_ref = PietItem_index(group.items, tos.index); PietItemRef item_ref = PietItem_index(group.items, tos.index);

Binary file not shown.

View file

@ -20,12 +20,26 @@ layout(set = 0, binding = 2) buffer PtclBuf {
uint[] ptcl; uint[] ptcl;
}; };
layout(set = 0, binding = 3) buffer AllocBuf {
uint alloc;
};
#include "scene.h" #include "scene.h"
#include "tilegroup.h" #include "tilegroup.h"
#include "ptcl.h" #include "ptcl.h"
#include "setup.h" #include "setup.h"
void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) {
if (cmd_ref.offset > cmd_limit) {
uint new_cmd = atomicAdd(alloc, PTCL_INITIAL_ALLOC);
CmdJump jump = CmdJump(new_cmd);
Cmd_Jump_write(cmd_ref, jump);
cmd_ref = CmdRef(new_cmd);
cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
}
}
void main() { void main() {
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x; uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
@ -33,12 +47,17 @@ void main() {
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX); vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC); TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC);
CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC); CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC);
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
while (true) { while (true) {
uint tg_tag = TileGroup_tag(tg_ref); uint tg_tag = TileGroup_tag(tg_ref);
if (tg_tag == TileGroup_End) { if (tg_tag == TileGroup_End) {
break; break;
} }
if (tg_tag == TileGroup_Jump) {
tg_ref = TileGroupRef(TileGroup_Jump_read(tg_ref).new_ref);
continue;
}
// Assume tg_tag is `Instance`, though there will be more cases. // Assume tg_tag is `Instance`, though there will be more cases.
Instance ins = TileGroup_Instance_read(tg_ref); Instance ins = TileGroup_Instance_read(tg_ref);
PietItemRef item_ref = PietItemRef(ins.item_ref); PietItemRef item_ref = PietItemRef(ins.item_ref);
@ -52,6 +71,7 @@ void main() {
&& max(center.y - r, xy0.y) < min(center.y + r, xy0.y + float(TILE_HEIGHT_PX))) && max(center.y - r, xy0.y) < min(center.y + r, xy0.y + float(TILE_HEIGHT_PX)))
{ {
CmdCircle cmd = CmdCircle(center, r, circle.rgba_color); CmdCircle cmd = CmdCircle(center, r, circle.rgba_color);
alloc_cmd(cmd_ref, cmd_limit);
Cmd_Circle_write(cmd_ref, cmd); Cmd_Circle_write(cmd_ref, cmd);
cmd_ref.offset += Cmd_size; cmd_ref.offset += Cmd_size;
} }

Binary file not shown.

View file

@ -44,6 +44,10 @@ void main() {
vec4 fg_rgba = unpackUnorm4x8(circle.rgba_color); vec4 fg_rgba = unpackUnorm4x8(circle.rgba_color);
// TODO: sRGB // TODO: sRGB
rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a); rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a);
break;
case Cmd_Jump:
cmd_ref = CmdRef(Cmd_Jump_read(cmd_ref).new_ref);
continue;
} }
cmd_ref.offset += Cmd_size; cmd_ref.offset += Cmd_size;
} }

Binary file not shown.

View file

@ -28,6 +28,10 @@ struct CmdSolidRef {
uint offset; uint offset;
}; };
struct CmdJumpRef {
uint offset;
};
struct CmdRef { struct CmdRef {
uint offset; uint offset;
}; };
@ -109,6 +113,16 @@ CmdSolidRef CmdSolid_index(CmdSolidRef ref, uint index) {
return CmdSolidRef(ref.offset + index * CmdSolid_size); return CmdSolidRef(ref.offset + index * CmdSolid_size);
} }
struct CmdJump {
uint new_ref;
};
#define CmdJump_size 4
CmdJumpRef CmdJump_index(CmdJumpRef ref, uint index) {
return CmdJumpRef(ref.offset + index * CmdJump_size);
}
#define Cmd_End 0 #define Cmd_End 0
#define Cmd_Circle 1 #define Cmd_Circle 1
#define Cmd_Line 2 #define Cmd_Line 2
@ -117,7 +131,8 @@ CmdSolidRef CmdSolid_index(CmdSolidRef ref, uint index) {
#define Cmd_FillEdge 5 #define Cmd_FillEdge 5
#define Cmd_DrawFill 6 #define Cmd_DrawFill 6
#define Cmd_Solid 7 #define Cmd_Solid 7
#define Cmd_Bail 8 #define Cmd_Jump 8
#define Cmd_Bail 9
#define Cmd_size 20 #define Cmd_size 20
CmdRef Cmd_index(CmdRef ref, uint index) { CmdRef Cmd_index(CmdRef ref, uint index) {
@ -246,6 +261,19 @@ void CmdSolid_write(CmdSolidRef ref, CmdSolid s) {
ptcl[ix + 0] = s.rgba_color; ptcl[ix + 0] = s.rgba_color;
} }
CmdJump CmdJump_read(CmdJumpRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = ptcl[ix + 0];
CmdJump s;
s.new_ref = raw0;
return s;
}
void CmdJump_write(CmdJumpRef ref, CmdJump s) {
uint ix = ref.offset >> 2;
ptcl[ix + 0] = s.new_ref;
}
uint Cmd_tag(CmdRef ref) { uint Cmd_tag(CmdRef ref) {
return ptcl[ref.offset >> 2]; return ptcl[ref.offset >> 2];
} }
@ -278,6 +306,10 @@ CmdSolid Cmd_Solid_read(CmdRef ref) {
return CmdSolid_read(CmdSolidRef(ref.offset + 4)); return CmdSolid_read(CmdSolidRef(ref.offset + 4));
} }
CmdJump Cmd_Jump_read(CmdRef ref) {
return CmdJump_read(CmdJumpRef(ref.offset + 4));
}
void Cmd_End_write(CmdRef ref) { void Cmd_End_write(CmdRef ref) {
ptcl[ref.offset >> 2] = Cmd_End; ptcl[ref.offset >> 2] = Cmd_End;
} }
@ -317,6 +349,11 @@ void Cmd_Solid_write(CmdRef ref, CmdSolid s) {
CmdSolid_write(CmdSolidRef(ref.offset + 4), s); CmdSolid_write(CmdSolidRef(ref.offset + 4), s);
} }
void Cmd_Jump_write(CmdRef ref, CmdJump s) {
ptcl[ref.offset >> 2] = Cmd_Jump;
CmdJump_write(CmdJumpRef(ref.offset + 4), s);
}
void Cmd_Bail_write(CmdRef ref) { void Cmd_Bail_write(CmdRef ref) {
ptcl[ref.offset >> 2] = Cmd_Bail; ptcl[ref.offset >> 2] = Cmd_Bail;
} }

View file

@ -22,4 +22,4 @@
#define TILE_WIDTH_PX 16 #define TILE_WIDTH_PX 16
#define TILE_HEIGHT_PX 16 #define TILE_HEIGHT_PX 16
#define PTCL_INITIAL_ALLOC 4096 #define PTCL_INITIAL_ALLOC 1024

View file

@ -4,6 +4,10 @@ struct InstanceRef {
uint offset; uint offset;
}; };
struct JumpRef {
uint offset;
};
struct TileGroupRef { struct TileGroupRef {
uint offset; uint offset;
}; };
@ -19,8 +23,19 @@ InstanceRef Instance_index(InstanceRef ref, uint index) {
return InstanceRef(ref.offset + index * Instance_size); return InstanceRef(ref.offset + index * Instance_size);
} }
struct Jump {
uint new_ref;
};
#define Jump_size 4
JumpRef Jump_index(JumpRef ref, uint index) {
return JumpRef(ref.offset + index * Jump_size);
}
#define TileGroup_Instance 0 #define TileGroup_Instance 0
#define TileGroup_End 1 #define TileGroup_Jump 1
#define TileGroup_End 2
#define TileGroup_size 16 #define TileGroup_size 16
TileGroupRef TileGroup_index(TileGroupRef ref, uint index) { TileGroupRef TileGroup_index(TileGroupRef ref, uint index) {
@ -45,6 +60,19 @@ void Instance_write(InstanceRef ref, Instance s) {
tilegroup[ix + 2] = floatBitsToUint(s.offset.y); tilegroup[ix + 2] = floatBitsToUint(s.offset.y);
} }
Jump Jump_read(JumpRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = tilegroup[ix + 0];
Jump s;
s.new_ref = raw0;
return s;
}
void Jump_write(JumpRef ref, Jump s) {
uint ix = ref.offset >> 2;
tilegroup[ix + 0] = s.new_ref;
}
uint TileGroup_tag(TileGroupRef ref) { uint TileGroup_tag(TileGroupRef ref) {
return tilegroup[ref.offset >> 2]; return tilegroup[ref.offset >> 2];
} }
@ -53,11 +81,20 @@ Instance TileGroup_Instance_read(TileGroupRef ref) {
return Instance_read(InstanceRef(ref.offset + 4)); return Instance_read(InstanceRef(ref.offset + 4));
} }
Jump TileGroup_Jump_read(TileGroupRef ref) {
return Jump_read(JumpRef(ref.offset + 4));
}
void TileGroup_Instance_write(TileGroupRef ref, Instance s) { void TileGroup_Instance_write(TileGroupRef ref, Instance s) {
tilegroup[ref.offset >> 2] = TileGroup_Instance; tilegroup[ref.offset >> 2] = TileGroup_Instance;
Instance_write(InstanceRef(ref.offset + 4), s); Instance_write(InstanceRef(ref.offset + 4), s);
} }
void TileGroup_Jump_write(TileGroupRef ref, Jump s) {
tilegroup[ref.offset >> 2] = TileGroup_Jump;
Jump_write(JumpRef(ref.offset + 4), s);
}
void TileGroup_End_write(TileGroupRef ref) { void TileGroup_End_write(TileGroupRef ref) {
tilegroup[ref.offset >> 2] = TileGroup_End; tilegroup[ref.offset >> 2] = TileGroup_End;
} }

View file

@ -20,7 +20,15 @@ const HEIGHT: usize = 1536;
const TILE_W: usize = 16; const TILE_W: usize = 16;
const TILE_H: usize = 16; const TILE_H: usize = 16;
const N_CIRCLES: usize = 3000; const WIDTH_IN_TILEGROUPS: usize = 4;
const HEIGHT_IN_TILEGROUPS: usize = 96;
const TILEGROUP_INITIAL_ALLOC: usize = 1024;
const WIDTH_IN_TILES: usize = 124;
const HEIGHT_IN_TILES: usize = 96;
const PTCL_INITIAL_ALLOC: usize = 1024;
const N_CIRCLES: usize = 10_000;
fn render_scene(rc: &mut impl RenderContext) { fn render_scene(rc: &mut impl RenderContext) {
let mut rng = rand::thread_rng(); let mut rng = rand::thread_rng();
@ -71,8 +79,7 @@ fn main() {
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev) .create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev)
.unwrap(); .unwrap();
device.write_buffer(&scene_buf, &scene).unwrap(); device.write_buffer(&scene_buf, &scene).unwrap();
// These should only be on the host if we're going to examine them from Rust. let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev).unwrap();
let tilegroup_buf = device.create_buffer(384 * 1024, dev).unwrap();
let ptcl_buf = device.create_buffer(12 * 1024 * 4096, dev).unwrap(); let ptcl_buf = device.create_buffer(12 * 1024 * 4096, dev).unwrap();
let image_buf = device let image_buf = device
.create_buffer((WIDTH * HEIGHT * 4) as u64, host) .create_buffer((WIDTH * HEIGHT * 4) as u64, host)
@ -81,16 +88,34 @@ fn main() {
.create_buffer((WIDTH * HEIGHT * 4) as u64, dev) .create_buffer((WIDTH * HEIGHT * 4) as u64, dev)
.unwrap(); .unwrap();
let k1_alloc_buf_host = device.create_buffer(4, host).unwrap();
let k1_alloc_buf_dev = device.create_buffer(4, dev).unwrap();
let k1_alloc_start = WIDTH_IN_TILEGROUPS * HEIGHT_IN_TILEGROUPS * TILEGROUP_INITIAL_ALLOC;
device
.write_buffer(&k1_alloc_buf_host, &[k1_alloc_start as u32])
.unwrap();
let k1_code = include_bytes!("../shader/kernel1.spv"); let k1_code = include_bytes!("../shader/kernel1.spv");
let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 2).unwrap(); let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 3).unwrap();
let k1_ds = device let k1_ds = device
.create_descriptor_set(&k1_pipeline, &[&scene_dev, &tilegroup_buf]) .create_descriptor_set(
&k1_pipeline,
&[&scene_dev, &tilegroup_buf, &k1_alloc_buf_dev],
)
.unwrap(); .unwrap();
let k3_alloc_buf_host = device.create_buffer(4, host).unwrap();
let k3_alloc_buf_dev = device.create_buffer(4, dev).unwrap();
let k3_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
device
.write_buffer(&k3_alloc_buf_host, &[k3_alloc_start as u32])
.unwrap();
let k3_code = include_bytes!("../shader/kernel3.spv"); let k3_code = include_bytes!("../shader/kernel3.spv");
let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 3).unwrap(); let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 4).unwrap();
let k3_ds = device let k3_ds = device
.create_descriptor_set(&k3_pipeline, &[&scene_dev, &tilegroup_buf, &ptcl_buf]) .create_descriptor_set(
&k3_pipeline,
&[&scene_dev, &tilegroup_buf, &ptcl_buf, &k3_alloc_buf_dev],
)
.unwrap(); .unwrap();
let k4_code = include_bytes!("../shader/kernel4.spv"); let k4_code = include_bytes!("../shader/kernel4.spv");
@ -102,6 +127,8 @@ fn main() {
let mut cmd_buf = device.create_cmd_buf().unwrap(); let mut cmd_buf = device.create_cmd_buf().unwrap();
cmd_buf.begin(); cmd_buf.begin();
cmd_buf.copy_buffer(&scene_buf, &scene_dev); cmd_buf.copy_buffer(&scene_buf, &scene_dev);
cmd_buf.copy_buffer(&k1_alloc_buf_host, &k1_alloc_buf_dev);
cmd_buf.copy_buffer(&k3_alloc_buf_host, &k3_alloc_buf_dev);
cmd_buf.clear_buffer(&tilegroup_buf); cmd_buf.clear_buffer(&tilegroup_buf);
cmd_buf.clear_buffer(&ptcl_buf); cmd_buf.clear_buffer(&ptcl_buf);
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();

View file

@ -238,7 +238,9 @@ fn flatten_shape(
let scene_pt = to_scene_point(p); let scene_pt = to_scene_point(p);
start_pt = Some(clone_scene_pt(&scene_pt)); start_pt = Some(clone_scene_pt(&scene_pt));
if !points.is_empty() { if !points.is_empty() {
points.push(scene::Point { xy: [std::f32::NAN, std::f32::NAN ]}); points.push(scene::Point {
xy: [std::f32::NAN, std::f32::NAN],
});
} }
last_pt = Some(clone_scene_pt(&scene_pt)); last_pt = Some(clone_scene_pt(&scene_pt));
points.push(scene_pt); points.push(scene_pt);
@ -350,7 +352,5 @@ fn to_scene_point(point: Point) -> scene::Point {
// TODO: allow #[derive(Clone)] in piet-gpu-derive. // TODO: allow #[derive(Clone)] in piet-gpu-derive.
fn clone_scene_pt(p: &scene::Point) -> scene::Point { fn clone_scene_pt(p: &scene::Point) -> scene::Point {
scene::Point { scene::Point { xy: p.xy }
xy: p.xy
}
} }