diff --git a/piet-gpu-hal/src/lib.rs b/piet-gpu-hal/src/lib.rs index c62678f..77170c0 100644 --- a/piet-gpu-hal/src/lib.rs +++ b/piet-gpu-hal/src/lib.rs @@ -71,6 +71,11 @@ pub trait CmdBuf { unsafe fn memory_barrier(&mut self); + /// Clear the buffer. + /// + /// This is readily supported in Vulkan, but for portability it is remarkably + /// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute + /// kernel, or organize the code not to need it. unsafe fn clear_buffer(&self, buffer: &D::Buffer); unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer); diff --git a/piet-gpu-types/src/ptcl.rs b/piet-gpu-types/src/ptcl.rs index b6df77d..ed72e42 100644 --- a/piet-gpu-types/src/ptcl.rs +++ b/piet-gpu-types/src/ptcl.rs @@ -33,6 +33,9 @@ piet_gpu! { struct CmdSolid { rgba_color: u32, } + struct CmdJump { + new_ref: u32, + } enum Cmd { End, Circle(CmdCircle), @@ -42,6 +45,7 @@ piet_gpu! { FillEdge(CmdFillEdge), DrawFill(CmdDrawFill), Solid(CmdSolid), + Jump(CmdJump), Bail, } } diff --git a/piet-gpu-types/src/tilegroup.rs b/piet-gpu-types/src/tilegroup.rs index 4824178..5912154 100644 --- a/piet-gpu-types/src/tilegroup.rs +++ b/piet-gpu-types/src/tilegroup.rs @@ -10,8 +10,12 @@ piet_gpu! { // A better type would be Point. offset: [f32; 2], } + struct Jump { + new_ref: u32, + } enum TileGroup { Instance(Instance), + Jump(Jump), End, } } diff --git a/piet-gpu/shader/kernel1.comp b/piet-gpu/shader/kernel1.comp index dbdd492..82ccb8f 100644 --- a/piet-gpu/shader/kernel1.comp +++ b/piet-gpu/shader/kernel1.comp @@ -25,6 +25,10 @@ layout(set = 0, binding = 1) buffer TilegroupBuf { uint[] tilegroup; }; +layout(set = 0, binding = 2) buffer AllocBuf { + uint alloc; +}; + #include "scene.h" #include "tilegroup.h" @@ -43,6 +47,7 @@ void main() { uint stack_ix = 0; uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x; TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC); + uint tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size; vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX); PietItemRef root = PietItemRef(0); SimpleGroup group = PietItem_Group_read(root); @@ -62,9 +67,16 @@ void main() { if (hit && !is_group) { PietItemRef item_ref = PietItem_index(group.items, tos.index); Instance ins = Instance(item_ref.offset, tos.offset); + if (tg_ref.offset > tg_limit) { + // Allocation exceeded; do atomic bump alloc. + uint new_tg = atomicAdd(alloc, TILEGROUP_INITIAL_ALLOC); + Jump jump = Jump(new_tg); + TileGroup_Jump_write(tg_ref, jump); + tg_ref = TileGroupRef(new_tg); + tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size; + } TileGroup_Instance_write(tg_ref, ins); tg_ref.offset += TileGroup_size; - // TODO: bump allocate if allocation exceeded } if (is_group) { PietItemRef item_ref = PietItem_index(group.items, tos.index); diff --git a/piet-gpu/shader/kernel1.spv b/piet-gpu/shader/kernel1.spv index 0e9a497..9ac3593 100644 Binary files a/piet-gpu/shader/kernel1.spv and b/piet-gpu/shader/kernel1.spv differ diff --git a/piet-gpu/shader/kernel3.comp b/piet-gpu/shader/kernel3.comp index cb344c0..ef3faef 100644 --- a/piet-gpu/shader/kernel3.comp +++ b/piet-gpu/shader/kernel3.comp @@ -20,12 +20,26 @@ layout(set = 0, binding = 2) buffer PtclBuf { uint[] ptcl; }; +layout(set = 0, binding = 3) buffer AllocBuf { + uint alloc; +}; + #include "scene.h" #include "tilegroup.h" #include "ptcl.h" #include "setup.h" +void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) { + if (cmd_ref.offset > cmd_limit) { + uint new_cmd = atomicAdd(alloc, PTCL_INITIAL_ALLOC); + CmdJump jump = CmdJump(new_cmd); + Cmd_Jump_write(cmd_ref, jump); + cmd_ref = CmdRef(new_cmd); + cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - 2 * Cmd_size; + } +} + void main() { uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x; uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS @@ -33,12 +47,17 @@ void main() { vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX); TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC); CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC); + uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size; while (true) { uint tg_tag = TileGroup_tag(tg_ref); if (tg_tag == TileGroup_End) { break; } + if (tg_tag == TileGroup_Jump) { + tg_ref = TileGroupRef(TileGroup_Jump_read(tg_ref).new_ref); + continue; + } // Assume tg_tag is `Instance`, though there will be more cases. Instance ins = TileGroup_Instance_read(tg_ref); PietItemRef item_ref = PietItemRef(ins.item_ref); @@ -52,6 +71,7 @@ void main() { && max(center.y - r, xy0.y) < min(center.y + r, xy0.y + float(TILE_HEIGHT_PX))) { CmdCircle cmd = CmdCircle(center, r, circle.rgba_color); + alloc_cmd(cmd_ref, cmd_limit); Cmd_Circle_write(cmd_ref, cmd); cmd_ref.offset += Cmd_size; } diff --git a/piet-gpu/shader/kernel3.spv b/piet-gpu/shader/kernel3.spv index 23a7c3e..cd56c48 100644 Binary files a/piet-gpu/shader/kernel3.spv and b/piet-gpu/shader/kernel3.spv differ diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index 6e2392b..cdde198 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -44,6 +44,10 @@ void main() { vec4 fg_rgba = unpackUnorm4x8(circle.rgba_color); // TODO: sRGB rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a); + break; + case Cmd_Jump: + cmd_ref = CmdRef(Cmd_Jump_read(cmd_ref).new_ref); + continue; } cmd_ref.offset += Cmd_size; } diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index c2cb755..caef463 100644 Binary files a/piet-gpu/shader/kernel4.spv and b/piet-gpu/shader/kernel4.spv differ diff --git a/piet-gpu/shader/ptcl.h b/piet-gpu/shader/ptcl.h index 583cc10..cc43594 100644 --- a/piet-gpu/shader/ptcl.h +++ b/piet-gpu/shader/ptcl.h @@ -28,6 +28,10 @@ struct CmdSolidRef { uint offset; }; +struct CmdJumpRef { + uint offset; +}; + struct CmdRef { uint offset; }; @@ -109,6 +113,16 @@ CmdSolidRef CmdSolid_index(CmdSolidRef ref, uint index) { return CmdSolidRef(ref.offset + index * CmdSolid_size); } +struct CmdJump { + uint new_ref; +}; + +#define CmdJump_size 4 + +CmdJumpRef CmdJump_index(CmdJumpRef ref, uint index) { + return CmdJumpRef(ref.offset + index * CmdJump_size); +} + #define Cmd_End 0 #define Cmd_Circle 1 #define Cmd_Line 2 @@ -117,7 +131,8 @@ CmdSolidRef CmdSolid_index(CmdSolidRef ref, uint index) { #define Cmd_FillEdge 5 #define Cmd_DrawFill 6 #define Cmd_Solid 7 -#define Cmd_Bail 8 +#define Cmd_Jump 8 +#define Cmd_Bail 9 #define Cmd_size 20 CmdRef Cmd_index(CmdRef ref, uint index) { @@ -246,6 +261,19 @@ void CmdSolid_write(CmdSolidRef ref, CmdSolid s) { ptcl[ix + 0] = s.rgba_color; } +CmdJump CmdJump_read(CmdJumpRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = ptcl[ix + 0]; + CmdJump s; + s.new_ref = raw0; + return s; +} + +void CmdJump_write(CmdJumpRef ref, CmdJump s) { + uint ix = ref.offset >> 2; + ptcl[ix + 0] = s.new_ref; +} + uint Cmd_tag(CmdRef ref) { return ptcl[ref.offset >> 2]; } @@ -278,6 +306,10 @@ CmdSolid Cmd_Solid_read(CmdRef ref) { return CmdSolid_read(CmdSolidRef(ref.offset + 4)); } +CmdJump Cmd_Jump_read(CmdRef ref) { + return CmdJump_read(CmdJumpRef(ref.offset + 4)); +} + void Cmd_End_write(CmdRef ref) { ptcl[ref.offset >> 2] = Cmd_End; } @@ -317,6 +349,11 @@ void Cmd_Solid_write(CmdRef ref, CmdSolid s) { CmdSolid_write(CmdSolidRef(ref.offset + 4), s); } +void Cmd_Jump_write(CmdRef ref, CmdJump s) { + ptcl[ref.offset >> 2] = Cmd_Jump; + CmdJump_write(CmdJumpRef(ref.offset + 4), s); +} + void Cmd_Bail_write(CmdRef ref) { ptcl[ref.offset >> 2] = Cmd_Bail; } diff --git a/piet-gpu/shader/setup.h b/piet-gpu/shader/setup.h index f04462b..9ce2de6 100644 --- a/piet-gpu/shader/setup.h +++ b/piet-gpu/shader/setup.h @@ -22,4 +22,4 @@ #define TILE_WIDTH_PX 16 #define TILE_HEIGHT_PX 16 -#define PTCL_INITIAL_ALLOC 4096 +#define PTCL_INITIAL_ALLOC 1024 diff --git a/piet-gpu/shader/tilegroup.h b/piet-gpu/shader/tilegroup.h index f1d646f..64b27d3 100644 --- a/piet-gpu/shader/tilegroup.h +++ b/piet-gpu/shader/tilegroup.h @@ -4,6 +4,10 @@ struct InstanceRef { uint offset; }; +struct JumpRef { + uint offset; +}; + struct TileGroupRef { uint offset; }; @@ -19,8 +23,19 @@ InstanceRef Instance_index(InstanceRef ref, uint index) { return InstanceRef(ref.offset + index * Instance_size); } +struct Jump { + uint new_ref; +}; + +#define Jump_size 4 + +JumpRef Jump_index(JumpRef ref, uint index) { + return JumpRef(ref.offset + index * Jump_size); +} + #define TileGroup_Instance 0 -#define TileGroup_End 1 +#define TileGroup_Jump 1 +#define TileGroup_End 2 #define TileGroup_size 16 TileGroupRef TileGroup_index(TileGroupRef ref, uint index) { @@ -45,6 +60,19 @@ void Instance_write(InstanceRef ref, Instance s) { tilegroup[ix + 2] = floatBitsToUint(s.offset.y); } +Jump Jump_read(JumpRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = tilegroup[ix + 0]; + Jump s; + s.new_ref = raw0; + return s; +} + +void Jump_write(JumpRef ref, Jump s) { + uint ix = ref.offset >> 2; + tilegroup[ix + 0] = s.new_ref; +} + uint TileGroup_tag(TileGroupRef ref) { return tilegroup[ref.offset >> 2]; } @@ -53,11 +81,20 @@ Instance TileGroup_Instance_read(TileGroupRef ref) { return Instance_read(InstanceRef(ref.offset + 4)); } +Jump TileGroup_Jump_read(TileGroupRef ref) { + return Jump_read(JumpRef(ref.offset + 4)); +} + void TileGroup_Instance_write(TileGroupRef ref, Instance s) { tilegroup[ref.offset >> 2] = TileGroup_Instance; Instance_write(InstanceRef(ref.offset + 4), s); } +void TileGroup_Jump_write(TileGroupRef ref, Jump s) { + tilegroup[ref.offset >> 2] = TileGroup_Jump; + Jump_write(JumpRef(ref.offset + 4), s); +} + void TileGroup_End_write(TileGroupRef ref) { tilegroup[ref.offset >> 2] = TileGroup_End; } diff --git a/piet-gpu/src/main.rs b/piet-gpu/src/main.rs index 6a243e9..703e156 100644 --- a/piet-gpu/src/main.rs +++ b/piet-gpu/src/main.rs @@ -20,7 +20,15 @@ const HEIGHT: usize = 1536; const TILE_W: usize = 16; const TILE_H: usize = 16; -const N_CIRCLES: usize = 3000; +const WIDTH_IN_TILEGROUPS: usize = 4; +const HEIGHT_IN_TILEGROUPS: usize = 96; +const TILEGROUP_INITIAL_ALLOC: usize = 1024; + +const WIDTH_IN_TILES: usize = 124; +const HEIGHT_IN_TILES: usize = 96; +const PTCL_INITIAL_ALLOC: usize = 1024; + +const N_CIRCLES: usize = 10_000; fn render_scene(rc: &mut impl RenderContext) { let mut rng = rand::thread_rng(); @@ -71,8 +79,7 @@ fn main() { .create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev) .unwrap(); device.write_buffer(&scene_buf, &scene).unwrap(); - // These should only be on the host if we're going to examine them from Rust. - let tilegroup_buf = device.create_buffer(384 * 1024, dev).unwrap(); + let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev).unwrap(); let ptcl_buf = device.create_buffer(12 * 1024 * 4096, dev).unwrap(); let image_buf = device .create_buffer((WIDTH * HEIGHT * 4) as u64, host) @@ -81,16 +88,34 @@ fn main() { .create_buffer((WIDTH * HEIGHT * 4) as u64, dev) .unwrap(); + let k1_alloc_buf_host = device.create_buffer(4, host).unwrap(); + let k1_alloc_buf_dev = device.create_buffer(4, dev).unwrap(); + let k1_alloc_start = WIDTH_IN_TILEGROUPS * HEIGHT_IN_TILEGROUPS * TILEGROUP_INITIAL_ALLOC; + device + .write_buffer(&k1_alloc_buf_host, &[k1_alloc_start as u32]) + .unwrap(); let k1_code = include_bytes!("../shader/kernel1.spv"); - let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 2).unwrap(); + let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 3).unwrap(); let k1_ds = device - .create_descriptor_set(&k1_pipeline, &[&scene_dev, &tilegroup_buf]) + .create_descriptor_set( + &k1_pipeline, + &[&scene_dev, &tilegroup_buf, &k1_alloc_buf_dev], + ) .unwrap(); + let k3_alloc_buf_host = device.create_buffer(4, host).unwrap(); + let k3_alloc_buf_dev = device.create_buffer(4, dev).unwrap(); + let k3_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC; + device + .write_buffer(&k3_alloc_buf_host, &[k3_alloc_start as u32]) + .unwrap(); let k3_code = include_bytes!("../shader/kernel3.spv"); - let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 3).unwrap(); + let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 4).unwrap(); let k3_ds = device - .create_descriptor_set(&k3_pipeline, &[&scene_dev, &tilegroup_buf, &ptcl_buf]) + .create_descriptor_set( + &k3_pipeline, + &[&scene_dev, &tilegroup_buf, &ptcl_buf, &k3_alloc_buf_dev], + ) .unwrap(); let k4_code = include_bytes!("../shader/kernel4.spv"); @@ -102,6 +127,8 @@ fn main() { let mut cmd_buf = device.create_cmd_buf().unwrap(); cmd_buf.begin(); cmd_buf.copy_buffer(&scene_buf, &scene_dev); + cmd_buf.copy_buffer(&k1_alloc_buf_host, &k1_alloc_buf_dev); + cmd_buf.copy_buffer(&k3_alloc_buf_host, &k3_alloc_buf_dev); cmd_buf.clear_buffer(&tilegroup_buf); cmd_buf.clear_buffer(&ptcl_buf); cmd_buf.memory_barrier(); diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index 4e9a567..eb67132 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -238,7 +238,9 @@ fn flatten_shape( let scene_pt = to_scene_point(p); start_pt = Some(clone_scene_pt(&scene_pt)); if !points.is_empty() { - points.push(scene::Point { xy: [std::f32::NAN, std::f32::NAN ]}); + points.push(scene::Point { + xy: [std::f32::NAN, std::f32::NAN], + }); } last_pt = Some(clone_scene_pt(&scene_pt)); points.push(scene_pt); @@ -350,7 +352,5 @@ fn to_scene_point(point: Point) -> scene::Point { // TODO: allow #[derive(Clone)] in piet-gpu-derive. fn clone_scene_pt(p: &scene::Point) -> scene::Point { - scene::Point { - xy: p.xy - } + scene::Point { xy: p.xy } }