mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-09 20:31:29 +11:00
Dynamic allocation of intermediate buffers
When the initial allocation is exceeded, do an atomic bump allocation. This is done for both tilegroup instances and per tile command lists.
This commit is contained in:
parent
e1c0e448ef
commit
55e35dd879
|
@ -71,6 +71,11 @@ pub trait CmdBuf<D: Device> {
|
|||
|
||||
unsafe fn memory_barrier(&mut self);
|
||||
|
||||
/// Clear the buffer.
|
||||
///
|
||||
/// This is readily supported in Vulkan, but for portability it is remarkably
|
||||
/// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute
|
||||
/// kernel, or organize the code not to need it.
|
||||
unsafe fn clear_buffer(&self, buffer: &D::Buffer);
|
||||
|
||||
unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer);
|
||||
|
|
|
@ -33,6 +33,9 @@ piet_gpu! {
|
|||
struct CmdSolid {
|
||||
rgba_color: u32,
|
||||
}
|
||||
struct CmdJump {
|
||||
new_ref: u32,
|
||||
}
|
||||
enum Cmd {
|
||||
End,
|
||||
Circle(CmdCircle),
|
||||
|
@ -42,6 +45,7 @@ piet_gpu! {
|
|||
FillEdge(CmdFillEdge),
|
||||
DrawFill(CmdDrawFill),
|
||||
Solid(CmdSolid),
|
||||
Jump(CmdJump),
|
||||
Bail,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,8 +10,12 @@ piet_gpu! {
|
|||
// A better type would be Point.
|
||||
offset: [f32; 2],
|
||||
}
|
||||
struct Jump {
|
||||
new_ref: u32,
|
||||
}
|
||||
enum TileGroup {
|
||||
Instance(Instance),
|
||||
Jump(Jump),
|
||||
End,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,6 +25,10 @@ layout(set = 0, binding = 1) buffer TilegroupBuf {
|
|||
uint[] tilegroup;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 2) buffer AllocBuf {
|
||||
uint alloc;
|
||||
};
|
||||
|
||||
#include "scene.h"
|
||||
#include "tilegroup.h"
|
||||
|
||||
|
@ -43,6 +47,7 @@ void main() {
|
|||
uint stack_ix = 0;
|
||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x;
|
||||
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC);
|
||||
uint tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size;
|
||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX);
|
||||
PietItemRef root = PietItemRef(0);
|
||||
SimpleGroup group = PietItem_Group_read(root);
|
||||
|
@ -62,9 +67,16 @@ void main() {
|
|||
if (hit && !is_group) {
|
||||
PietItemRef item_ref = PietItem_index(group.items, tos.index);
|
||||
Instance ins = Instance(item_ref.offset, tos.offset);
|
||||
if (tg_ref.offset > tg_limit) {
|
||||
// Allocation exceeded; do atomic bump alloc.
|
||||
uint new_tg = atomicAdd(alloc, TILEGROUP_INITIAL_ALLOC);
|
||||
Jump jump = Jump(new_tg);
|
||||
TileGroup_Jump_write(tg_ref, jump);
|
||||
tg_ref = TileGroupRef(new_tg);
|
||||
tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size;
|
||||
}
|
||||
TileGroup_Instance_write(tg_ref, ins);
|
||||
tg_ref.offset += TileGroup_size;
|
||||
// TODO: bump allocate if allocation exceeded
|
||||
}
|
||||
if (is_group) {
|
||||
PietItemRef item_ref = PietItem_index(group.items, tos.index);
|
||||
|
|
Binary file not shown.
|
@ -20,12 +20,26 @@ layout(set = 0, binding = 2) buffer PtclBuf {
|
|||
uint[] ptcl;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 3) buffer AllocBuf {
|
||||
uint alloc;
|
||||
};
|
||||
|
||||
#include "scene.h"
|
||||
#include "tilegroup.h"
|
||||
#include "ptcl.h"
|
||||
|
||||
#include "setup.h"
|
||||
|
||||
void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) {
|
||||
if (cmd_ref.offset > cmd_limit) {
|
||||
uint new_cmd = atomicAdd(alloc, PTCL_INITIAL_ALLOC);
|
||||
CmdJump jump = CmdJump(new_cmd);
|
||||
Cmd_Jump_write(cmd_ref, jump);
|
||||
cmd_ref = CmdRef(new_cmd);
|
||||
cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
||||
}
|
||||
}
|
||||
|
||||
void main() {
|
||||
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
|
||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
|
||||
|
@ -33,12 +47,17 @@ void main() {
|
|||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
|
||||
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC);
|
||||
CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC);
|
||||
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
||||
|
||||
while (true) {
|
||||
uint tg_tag = TileGroup_tag(tg_ref);
|
||||
if (tg_tag == TileGroup_End) {
|
||||
break;
|
||||
}
|
||||
if (tg_tag == TileGroup_Jump) {
|
||||
tg_ref = TileGroupRef(TileGroup_Jump_read(tg_ref).new_ref);
|
||||
continue;
|
||||
}
|
||||
// Assume tg_tag is `Instance`, though there will be more cases.
|
||||
Instance ins = TileGroup_Instance_read(tg_ref);
|
||||
PietItemRef item_ref = PietItemRef(ins.item_ref);
|
||||
|
@ -52,6 +71,7 @@ void main() {
|
|||
&& max(center.y - r, xy0.y) < min(center.y + r, xy0.y + float(TILE_HEIGHT_PX)))
|
||||
{
|
||||
CmdCircle cmd = CmdCircle(center, r, circle.rgba_color);
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Circle_write(cmd_ref, cmd);
|
||||
cmd_ref.offset += Cmd_size;
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -44,6 +44,10 @@ void main() {
|
|||
vec4 fg_rgba = unpackUnorm4x8(circle.rgba_color);
|
||||
// TODO: sRGB
|
||||
rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a);
|
||||
break;
|
||||
case Cmd_Jump:
|
||||
cmd_ref = CmdRef(Cmd_Jump_read(cmd_ref).new_ref);
|
||||
continue;
|
||||
}
|
||||
cmd_ref.offset += Cmd_size;
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -28,6 +28,10 @@ struct CmdSolidRef {
|
|||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdJumpRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdRef {
|
||||
uint offset;
|
||||
};
|
||||
|
@ -109,6 +113,16 @@ CmdSolidRef CmdSolid_index(CmdSolidRef ref, uint index) {
|
|||
return CmdSolidRef(ref.offset + index * CmdSolid_size);
|
||||
}
|
||||
|
||||
struct CmdJump {
|
||||
uint new_ref;
|
||||
};
|
||||
|
||||
#define CmdJump_size 4
|
||||
|
||||
CmdJumpRef CmdJump_index(CmdJumpRef ref, uint index) {
|
||||
return CmdJumpRef(ref.offset + index * CmdJump_size);
|
||||
}
|
||||
|
||||
#define Cmd_End 0
|
||||
#define Cmd_Circle 1
|
||||
#define Cmd_Line 2
|
||||
|
@ -117,7 +131,8 @@ CmdSolidRef CmdSolid_index(CmdSolidRef ref, uint index) {
|
|||
#define Cmd_FillEdge 5
|
||||
#define Cmd_DrawFill 6
|
||||
#define Cmd_Solid 7
|
||||
#define Cmd_Bail 8
|
||||
#define Cmd_Jump 8
|
||||
#define Cmd_Bail 9
|
||||
#define Cmd_size 20
|
||||
|
||||
CmdRef Cmd_index(CmdRef ref, uint index) {
|
||||
|
@ -246,6 +261,19 @@ void CmdSolid_write(CmdSolidRef ref, CmdSolid s) {
|
|||
ptcl[ix + 0] = s.rgba_color;
|
||||
}
|
||||
|
||||
CmdJump CmdJump_read(CmdJumpRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = ptcl[ix + 0];
|
||||
CmdJump s;
|
||||
s.new_ref = raw0;
|
||||
return s;
|
||||
}
|
||||
|
||||
void CmdJump_write(CmdJumpRef ref, CmdJump s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = s.new_ref;
|
||||
}
|
||||
|
||||
uint Cmd_tag(CmdRef ref) {
|
||||
return ptcl[ref.offset >> 2];
|
||||
}
|
||||
|
@ -278,6 +306,10 @@ CmdSolid Cmd_Solid_read(CmdRef ref) {
|
|||
return CmdSolid_read(CmdSolidRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
CmdJump Cmd_Jump_read(CmdRef ref) {
|
||||
return CmdJump_read(CmdJumpRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
void Cmd_End_write(CmdRef ref) {
|
||||
ptcl[ref.offset >> 2] = Cmd_End;
|
||||
}
|
||||
|
@ -317,6 +349,11 @@ void Cmd_Solid_write(CmdRef ref, CmdSolid s) {
|
|||
CmdSolid_write(CmdSolidRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Cmd_Jump_write(CmdRef ref, CmdJump s) {
|
||||
ptcl[ref.offset >> 2] = Cmd_Jump;
|
||||
CmdJump_write(CmdJumpRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Cmd_Bail_write(CmdRef ref) {
|
||||
ptcl[ref.offset >> 2] = Cmd_Bail;
|
||||
}
|
||||
|
|
|
@ -22,4 +22,4 @@
|
|||
#define TILE_WIDTH_PX 16
|
||||
#define TILE_HEIGHT_PX 16
|
||||
|
||||
#define PTCL_INITIAL_ALLOC 4096
|
||||
#define PTCL_INITIAL_ALLOC 1024
|
||||
|
|
|
@ -4,6 +4,10 @@ struct InstanceRef {
|
|||
uint offset;
|
||||
};
|
||||
|
||||
struct JumpRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct TileGroupRef {
|
||||
uint offset;
|
||||
};
|
||||
|
@ -19,8 +23,19 @@ InstanceRef Instance_index(InstanceRef ref, uint index) {
|
|||
return InstanceRef(ref.offset + index * Instance_size);
|
||||
}
|
||||
|
||||
struct Jump {
|
||||
uint new_ref;
|
||||
};
|
||||
|
||||
#define Jump_size 4
|
||||
|
||||
JumpRef Jump_index(JumpRef ref, uint index) {
|
||||
return JumpRef(ref.offset + index * Jump_size);
|
||||
}
|
||||
|
||||
#define TileGroup_Instance 0
|
||||
#define TileGroup_End 1
|
||||
#define TileGroup_Jump 1
|
||||
#define TileGroup_End 2
|
||||
#define TileGroup_size 16
|
||||
|
||||
TileGroupRef TileGroup_index(TileGroupRef ref, uint index) {
|
||||
|
@ -45,6 +60,19 @@ void Instance_write(InstanceRef ref, Instance s) {
|
|||
tilegroup[ix + 2] = floatBitsToUint(s.offset.y);
|
||||
}
|
||||
|
||||
Jump Jump_read(JumpRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = tilegroup[ix + 0];
|
||||
Jump s;
|
||||
s.new_ref = raw0;
|
||||
return s;
|
||||
}
|
||||
|
||||
void Jump_write(JumpRef ref, Jump s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
tilegroup[ix + 0] = s.new_ref;
|
||||
}
|
||||
|
||||
uint TileGroup_tag(TileGroupRef ref) {
|
||||
return tilegroup[ref.offset >> 2];
|
||||
}
|
||||
|
@ -53,11 +81,20 @@ Instance TileGroup_Instance_read(TileGroupRef ref) {
|
|||
return Instance_read(InstanceRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
Jump TileGroup_Jump_read(TileGroupRef ref) {
|
||||
return Jump_read(JumpRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
void TileGroup_Instance_write(TileGroupRef ref, Instance s) {
|
||||
tilegroup[ref.offset >> 2] = TileGroup_Instance;
|
||||
Instance_write(InstanceRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void TileGroup_Jump_write(TileGroupRef ref, Jump s) {
|
||||
tilegroup[ref.offset >> 2] = TileGroup_Jump;
|
||||
Jump_write(JumpRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void TileGroup_End_write(TileGroupRef ref) {
|
||||
tilegroup[ref.offset >> 2] = TileGroup_End;
|
||||
}
|
||||
|
|
|
@ -20,7 +20,15 @@ const HEIGHT: usize = 1536;
|
|||
const TILE_W: usize = 16;
|
||||
const TILE_H: usize = 16;
|
||||
|
||||
const N_CIRCLES: usize = 3000;
|
||||
const WIDTH_IN_TILEGROUPS: usize = 4;
|
||||
const HEIGHT_IN_TILEGROUPS: usize = 96;
|
||||
const TILEGROUP_INITIAL_ALLOC: usize = 1024;
|
||||
|
||||
const WIDTH_IN_TILES: usize = 124;
|
||||
const HEIGHT_IN_TILES: usize = 96;
|
||||
const PTCL_INITIAL_ALLOC: usize = 1024;
|
||||
|
||||
const N_CIRCLES: usize = 10_000;
|
||||
|
||||
fn render_scene(rc: &mut impl RenderContext) {
|
||||
let mut rng = rand::thread_rng();
|
||||
|
@ -71,8 +79,7 @@ fn main() {
|
|||
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev)
|
||||
.unwrap();
|
||||
device.write_buffer(&scene_buf, &scene).unwrap();
|
||||
// These should only be on the host if we're going to examine them from Rust.
|
||||
let tilegroup_buf = device.create_buffer(384 * 1024, dev).unwrap();
|
||||
let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev).unwrap();
|
||||
let ptcl_buf = device.create_buffer(12 * 1024 * 4096, dev).unwrap();
|
||||
let image_buf = device
|
||||
.create_buffer((WIDTH * HEIGHT * 4) as u64, host)
|
||||
|
@ -81,16 +88,34 @@ fn main() {
|
|||
.create_buffer((WIDTH * HEIGHT * 4) as u64, dev)
|
||||
.unwrap();
|
||||
|
||||
let k1_alloc_buf_host = device.create_buffer(4, host).unwrap();
|
||||
let k1_alloc_buf_dev = device.create_buffer(4, dev).unwrap();
|
||||
let k1_alloc_start = WIDTH_IN_TILEGROUPS * HEIGHT_IN_TILEGROUPS * TILEGROUP_INITIAL_ALLOC;
|
||||
device
|
||||
.write_buffer(&k1_alloc_buf_host, &[k1_alloc_start as u32])
|
||||
.unwrap();
|
||||
let k1_code = include_bytes!("../shader/kernel1.spv");
|
||||
let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 2).unwrap();
|
||||
let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 3).unwrap();
|
||||
let k1_ds = device
|
||||
.create_descriptor_set(&k1_pipeline, &[&scene_dev, &tilegroup_buf])
|
||||
.create_descriptor_set(
|
||||
&k1_pipeline,
|
||||
&[&scene_dev, &tilegroup_buf, &k1_alloc_buf_dev],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let k3_alloc_buf_host = device.create_buffer(4, host).unwrap();
|
||||
let k3_alloc_buf_dev = device.create_buffer(4, dev).unwrap();
|
||||
let k3_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
|
||||
device
|
||||
.write_buffer(&k3_alloc_buf_host, &[k3_alloc_start as u32])
|
||||
.unwrap();
|
||||
let k3_code = include_bytes!("../shader/kernel3.spv");
|
||||
let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 3).unwrap();
|
||||
let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 4).unwrap();
|
||||
let k3_ds = device
|
||||
.create_descriptor_set(&k3_pipeline, &[&scene_dev, &tilegroup_buf, &ptcl_buf])
|
||||
.create_descriptor_set(
|
||||
&k3_pipeline,
|
||||
&[&scene_dev, &tilegroup_buf, &ptcl_buf, &k3_alloc_buf_dev],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let k4_code = include_bytes!("../shader/kernel4.spv");
|
||||
|
@ -102,6 +127,8 @@ fn main() {
|
|||
let mut cmd_buf = device.create_cmd_buf().unwrap();
|
||||
cmd_buf.begin();
|
||||
cmd_buf.copy_buffer(&scene_buf, &scene_dev);
|
||||
cmd_buf.copy_buffer(&k1_alloc_buf_host, &k1_alloc_buf_dev);
|
||||
cmd_buf.copy_buffer(&k3_alloc_buf_host, &k3_alloc_buf_dev);
|
||||
cmd_buf.clear_buffer(&tilegroup_buf);
|
||||
cmd_buf.clear_buffer(&ptcl_buf);
|
||||
cmd_buf.memory_barrier();
|
||||
|
|
|
@ -238,7 +238,9 @@ fn flatten_shape(
|
|||
let scene_pt = to_scene_point(p);
|
||||
start_pt = Some(clone_scene_pt(&scene_pt));
|
||||
if !points.is_empty() {
|
||||
points.push(scene::Point { xy: [std::f32::NAN, std::f32::NAN ]});
|
||||
points.push(scene::Point {
|
||||
xy: [std::f32::NAN, std::f32::NAN],
|
||||
});
|
||||
}
|
||||
last_pt = Some(clone_scene_pt(&scene_pt));
|
||||
points.push(scene_pt);
|
||||
|
@ -350,7 +352,5 @@ fn to_scene_point(point: Point) -> scene::Point {
|
|||
|
||||
// TODO: allow #[derive(Clone)] in piet-gpu-derive.
|
||||
fn clone_scene_pt(p: &scene::Point) -> scene::Point {
|
||||
scene::Point {
|
||||
xy: p.xy
|
||||
}
|
||||
scene::Point { xy: p.xy }
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue