mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Merge branch 'master' into master
This commit is contained in:
commit
818d5b2047
|
@ -300,6 +300,7 @@ fn gen_struct_write(
|
|||
fields: &[(String, usize, LayoutType)],
|
||||
) {
|
||||
writeln!(r, "void {}_write({}Ref ref, {} s) {{", name, name, name).unwrap();
|
||||
writeln!(r, " uint ix = ref.offset >> 2;").unwrap();
|
||||
let coverage = crate::layout::struct_coverage(fields, true);
|
||||
|
||||
for (i, field_ixs) in coverage.iter().enumerate() {
|
||||
|
@ -374,7 +375,7 @@ fn gen_struct_write(
|
|||
}
|
||||
|
||||
if !pieces.is_empty() {
|
||||
write!(r, " {}[{}] = ", bufname, i).unwrap();
|
||||
write!(r, " {}[ix + {}] = ", bufname, i).unwrap();
|
||||
for (j, piece) in pieces.iter().enumerate() {
|
||||
if j != 0 {
|
||||
write!(r, " | ").unwrap();
|
||||
|
|
|
@ -71,6 +71,10 @@ pub trait CmdBuf<D: Device> {
|
|||
|
||||
unsafe fn memory_barrier(&mut self);
|
||||
|
||||
unsafe fn clear_buffer(&self, buffer: &D::Buffer);
|
||||
|
||||
unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer);
|
||||
|
||||
unsafe fn write_timestamp(&mut self, pool: &D::QueryPool, query: u32);
|
||||
}
|
||||
|
||||
|
|
|
@ -142,7 +142,11 @@ impl crate::Device for VkDevice {
|
|||
let buffer = device.create_buffer(
|
||||
&vk::BufferCreateInfo::builder()
|
||||
.size(size)
|
||||
.usage(vk::BufferUsageFlags::STORAGE_BUFFER)
|
||||
.usage(
|
||||
vk::BufferUsageFlags::STORAGE_BUFFER
|
||||
| vk::BufferUsageFlags::TRANSFER_SRC
|
||||
| vk::BufferUsageFlags::TRANSFER_DST,
|
||||
)
|
||||
.sharing_mode(vk::SharingMode::EXCLUSIVE),
|
||||
None,
|
||||
)?;
|
||||
|
@ -443,6 +447,22 @@ impl crate::CmdBuf<VkDevice> for CmdBuf {
|
|||
);
|
||||
}
|
||||
|
||||
unsafe fn clear_buffer(&self, buffer: &Buffer) {
|
||||
let device = &self.device.device;
|
||||
device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, vk::WHOLE_SIZE, 0);
|
||||
}
|
||||
|
||||
unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) {
|
||||
let device = &self.device.device;
|
||||
let size = src.size.min(dst.size);
|
||||
device.cmd_copy_buffer(
|
||||
self.cmd_buf,
|
||||
src.buffer,
|
||||
dst.buffer,
|
||||
&[vk::BufferCopy::builder().size(size).build()],
|
||||
);
|
||||
}
|
||||
|
||||
unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) {
|
||||
let device = &self.device.device;
|
||||
device.cmd_write_timestamp(
|
||||
|
|
|
@ -2,3 +2,4 @@ pub mod encoder;
|
|||
pub mod ptcl;
|
||||
pub mod scene;
|
||||
pub mod test;
|
||||
pub mod tilegroup;
|
||||
|
|
|
@ -5,6 +5,7 @@ fn main() {
|
|||
.expect("provide a module name");
|
||||
match mod_name.as_str() {
|
||||
"scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()),
|
||||
"tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()),
|
||||
"ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()),
|
||||
"test" => print!("{}", piet_gpu_types::test::gen_gpu_test()),
|
||||
_ => println!("Oops, unknown module name"),
|
||||
|
|
|
@ -8,8 +8,7 @@ piet_gpu! {
|
|||
#[rust_encode]
|
||||
mod scene {
|
||||
struct Bbox {
|
||||
// TODO: this should be i16
|
||||
bbox: [u16; 4],
|
||||
bbox: [i16; 4],
|
||||
}
|
||||
struct Point {
|
||||
xy: [f32; 2],
|
||||
|
@ -19,6 +18,7 @@ piet_gpu! {
|
|||
// Note: both of the following items are actually arrays
|
||||
items: Ref<PietItem>,
|
||||
bboxes: Ref<Bbox>,
|
||||
offset: Point,
|
||||
}
|
||||
struct PietCircle {
|
||||
rgba_color: u32,
|
||||
|
@ -45,6 +45,7 @@ piet_gpu! {
|
|||
points: Ref<Point>,
|
||||
}
|
||||
enum PietItem {
|
||||
Group(SimpleGroup),
|
||||
Circle(PietCircle),
|
||||
Line(PietStrokeLine),
|
||||
Fill(PietFill),
|
||||
|
|
18
piet-gpu-types/src/tilegroup.rs
Normal file
18
piet-gpu-types/src/tilegroup.rs
Normal file
|
@ -0,0 +1,18 @@
|
|||
use piet_gpu_derive::piet_gpu;
|
||||
|
||||
piet_gpu! {
|
||||
#[gpu_write]
|
||||
mod tilegroup {
|
||||
struct Instance {
|
||||
// Note: a better type would be `Ref<PietItem>` but to do that we
|
||||
// would need cross-module references. Punt for now.
|
||||
item_ref: u32,
|
||||
// A better type would be Point.
|
||||
offset: [f32; 2],
|
||||
}
|
||||
enum TileGroup {
|
||||
Instance(Instance),
|
||||
End,
|
||||
}
|
||||
}
|
||||
}
|
|
@ -8,3 +8,5 @@ rule glsl
|
|||
command = $glslang_validator -V -o $out $in
|
||||
|
||||
build image.spv: glsl image.comp | scene.h
|
||||
|
||||
build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h
|
||||
|
|
|
@ -32,7 +32,7 @@ void main() {
|
|||
// which is horribly wasteful, but the goal is to get *some* output and
|
||||
// then optimize.
|
||||
|
||||
SimpleGroup group = SimpleGroup_read(SimpleGroupRef(0));
|
||||
SimpleGroup group = PietItem_Group_read(PietItemRef(0));
|
||||
for (uint i = 0; i < group.n_items; i++) {
|
||||
PietItemRef item_ref = PietItem_index(group.items, i);
|
||||
uint tag = PietItem_tag(item_ref);
|
||||
|
|
Binary file not shown.
83
piet-gpu/shader/kernel1.comp
Normal file
83
piet-gpu/shader/kernel1.comp
Normal file
|
@ -0,0 +1,83 @@
|
|||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
// It's possible we should lay this out with x and do our own math.
|
||||
layout(local_size_x = 1, local_size_y = 32) in;
|
||||
|
||||
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
||||
uint[] scene;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 1) buffer TilegroupBuf {
|
||||
uint[] tilegroup;
|
||||
};
|
||||
|
||||
#include "scene.h"
|
||||
#include "tilegroup.h"
|
||||
|
||||
// TODO: compute this
|
||||
#define WIDTH_IN_TILEGROUPS 4
|
||||
|
||||
#define TILEGROUP_WIDTH 512
|
||||
#define TILEGROUP_HEIGHT 16
|
||||
|
||||
#define INITIAL_ALLOC 1024
|
||||
|
||||
#define MAX_STACK 8
|
||||
|
||||
struct StackElement {
|
||||
PietItemRef group;
|
||||
uint index;
|
||||
vec2 offset;
|
||||
};
|
||||
|
||||
void main() {
|
||||
StackElement stack[MAX_STACK];
|
||||
uint stack_ix = 0;
|
||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x;
|
||||
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * INITIAL_ALLOC);
|
||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH, TILEGROUP_HEIGHT);
|
||||
PietItemRef root = PietItemRef(0);
|
||||
SimpleGroup group = PietItem_Group_read(root);
|
||||
StackElement tos = StackElement(root, 0, group.offset.xy);
|
||||
|
||||
while (true) {
|
||||
if (tos.index < group.n_items) {
|
||||
Bbox bbox = Bbox_read(Bbox_index(group.bboxes, tos.index));
|
||||
vec4 bb = vec4(bbox.bbox) + tos.offset.xyxy;
|
||||
bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH))
|
||||
&& max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT));
|
||||
bool is_group = false;
|
||||
if (hit) {
|
||||
PietItemRef item_ref = PietItem_index(group.items, tos.index);
|
||||
is_group = PietItem_tag(item_ref) == PietItem_Group;
|
||||
}
|
||||
if (hit && !is_group) {
|
||||
PietItemRef item_ref = PietItem_index(group.items, tos.index);
|
||||
Instance ins = Instance(item_ref.offset, tos.offset);
|
||||
TileGroup_Instance_write(tg_ref, ins);
|
||||
tg_ref.offset += TileGroup_size;
|
||||
// TODO: bump allocate if allocation exceeded
|
||||
}
|
||||
if (is_group) {
|
||||
PietItemRef item_ref = PietItem_index(group.items, tos.index);
|
||||
tos.index++;
|
||||
if (tos.index < group.n_items) {
|
||||
stack[stack_ix++] = tos;
|
||||
}
|
||||
group = PietItem_Group_read(item_ref);
|
||||
tos = StackElement(item_ref, 0, tos.offset + group.offset.xy);
|
||||
} else {
|
||||
tos.index++;
|
||||
}
|
||||
} else {
|
||||
// processed all items in this group; pop the stack
|
||||
if (stack_ix == 0) {
|
||||
break;
|
||||
}
|
||||
tos = stack[--stack_ix];
|
||||
group = PietItem_Group_read(tos.group);
|
||||
}
|
||||
}
|
||||
TileGroup_End_write(tg_ref);
|
||||
}
|
BIN
piet-gpu/shader/kernel1.spv
Normal file
BIN
piet-gpu/shader/kernel1.spv
Normal file
Binary file not shown.
|
@ -33,7 +33,7 @@ struct PietItemRef {
|
|||
};
|
||||
|
||||
struct Bbox {
|
||||
uvec4 bbox;
|
||||
ivec4 bbox;
|
||||
};
|
||||
|
||||
#define Bbox_size 8
|
||||
|
@ -56,9 +56,10 @@ struct SimpleGroup {
|
|||
uint n_items;
|
||||
PietItemRef items;
|
||||
BboxRef bboxes;
|
||||
Point offset;
|
||||
};
|
||||
|
||||
#define SimpleGroup_size 12
|
||||
#define SimpleGroup_size 20
|
||||
|
||||
SimpleGroupRef SimpleGroup_index(SimpleGroupRef ref, uint index) {
|
||||
return SimpleGroupRef(ref.offset + index * SimpleGroup_size);
|
||||
|
@ -116,10 +117,11 @@ PietStrokePolyLineRef PietStrokePolyLine_index(PietStrokePolyLineRef ref, uint i
|
|||
return PietStrokePolyLineRef(ref.offset + index * PietStrokePolyLine_size);
|
||||
}
|
||||
|
||||
#define PietItem_Circle 0
|
||||
#define PietItem_Line 1
|
||||
#define PietItem_Fill 2
|
||||
#define PietItem_Poly 3
|
||||
#define PietItem_Group 0
|
||||
#define PietItem_Circle 1
|
||||
#define PietItem_Line 2
|
||||
#define PietItem_Fill 3
|
||||
#define PietItem_Poly 4
|
||||
#define PietItem_size 32
|
||||
|
||||
PietItemRef PietItem_index(PietItemRef ref, uint index) {
|
||||
|
@ -131,7 +133,7 @@ Bbox Bbox_read(BboxRef ref) {
|
|||
uint raw0 = scene[ix + 0];
|
||||
uint raw1 = scene[ix + 1];
|
||||
Bbox s;
|
||||
s.bbox = uvec4(raw0 & 0xffff, raw0 >> 16, raw1 & 0xffff, raw1 >> 16);
|
||||
s.bbox = ivec4(int(raw0 << 16) >> 16, int(raw0) >> 16, int(raw1 << 16) >> 16, int(raw1) >> 16);
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -153,6 +155,7 @@ SimpleGroup SimpleGroup_read(SimpleGroupRef ref) {
|
|||
s.n_items = raw0;
|
||||
s.items = PietItemRef(raw1);
|
||||
s.bboxes = BboxRef(raw2);
|
||||
s.offset = Point_read(PointRef(ref.offset + 12));
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -213,6 +216,10 @@ uint PietItem_tag(PietItemRef ref) {
|
|||
return scene[ref.offset >> 2];
|
||||
}
|
||||
|
||||
SimpleGroup PietItem_Group_read(PietItemRef ref) {
|
||||
return SimpleGroup_read(SimpleGroupRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
PietCircle PietItem_Circle_read(PietItemRef ref) {
|
||||
return PietCircle_read(PietCircleRef(ref.offset + 4));
|
||||
}
|
||||
|
|
64
piet-gpu/shader/tilegroup.h
Normal file
64
piet-gpu/shader/tilegroup.h
Normal file
|
@ -0,0 +1,64 @@
|
|||
// Code auto-generated by piet-gpu-derive
|
||||
|
||||
struct InstanceRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct TileGroupRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct Instance {
|
||||
uint item_ref;
|
||||
vec2 offset;
|
||||
};
|
||||
|
||||
#define Instance_size 12
|
||||
|
||||
InstanceRef Instance_index(InstanceRef ref, uint index) {
|
||||
return InstanceRef(ref.offset + index * Instance_size);
|
||||
}
|
||||
|
||||
#define TileGroup_Instance 0
|
||||
#define TileGroup_End 1
|
||||
#define TileGroup_size 16
|
||||
|
||||
TileGroupRef TileGroup_index(TileGroupRef ref, uint index) {
|
||||
return TileGroupRef(ref.offset + index * TileGroup_size);
|
||||
}
|
||||
|
||||
Instance Instance_read(InstanceRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = tilegroup[ix + 0];
|
||||
uint raw1 = tilegroup[ix + 1];
|
||||
uint raw2 = tilegroup[ix + 2];
|
||||
Instance s;
|
||||
s.item_ref = raw0;
|
||||
s.offset = vec2(uintBitsToFloat(raw1), uintBitsToFloat(raw2));
|
||||
return s;
|
||||
}
|
||||
|
||||
void Instance_write(InstanceRef ref, Instance s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
tilegroup[ix + 0] = s.item_ref;
|
||||
tilegroup[ix + 1] = floatBitsToUint(s.offset.x);
|
||||
tilegroup[ix + 2] = floatBitsToUint(s.offset.y);
|
||||
}
|
||||
|
||||
uint TileGroup_tag(TileGroupRef ref) {
|
||||
return tilegroup[ref.offset >> 2];
|
||||
}
|
||||
|
||||
Instance TileGroup_Instance_read(TileGroupRef ref) {
|
||||
return Instance_read(InstanceRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
void TileGroup_Instance_write(TileGroupRef ref, Instance s) {
|
||||
tilegroup[ref.offset >> 2] = TileGroup_Instance;
|
||||
Instance_write(InstanceRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void TileGroup_End_write(TileGroupRef ref) {
|
||||
tilegroup[ref.offset >> 2] = TileGroup_End;
|
||||
}
|
||||
|
|
@ -21,7 +21,7 @@ const N_CIRCLES: usize = 100;
|
|||
fn make_scene() -> Vec<u8> {
|
||||
let mut rng = rand::thread_rng();
|
||||
let mut encoder = Encoder::new();
|
||||
let _reserve_root = encoder.alloc_chunk(SimpleGroup::fixed_size() as u32);
|
||||
let _reserve_root = encoder.alloc_chunk(PietItem::fixed_size() as u32);
|
||||
|
||||
let mut items = Vec::new();
|
||||
let mut bboxes = Vec::new();
|
||||
|
@ -36,23 +36,30 @@ fn make_scene() -> Vec<u8> {
|
|||
},
|
||||
radius: rng.gen_range(0.0, 50.0),
|
||||
};
|
||||
items.push(PietItem::Circle(circle));
|
||||
let bbox = Bbox {
|
||||
// TODO: real bbox
|
||||
bbox: [0, 0, 0, 0],
|
||||
bbox: [
|
||||
(circle.center.xy[0] - circle.radius).floor() as i16,
|
||||
(circle.center.xy[1] - circle.radius).floor() as i16,
|
||||
(circle.center.xy[0] + circle.radius).ceil() as i16,
|
||||
(circle.center.xy[1] + circle.radius).ceil() as i16,
|
||||
],
|
||||
};
|
||||
items.push(PietItem::Circle(circle));
|
||||
bboxes.push(bbox);
|
||||
}
|
||||
|
||||
let n_items = bboxes.len() as u32;
|
||||
let bboxes = bboxes.encode(&mut encoder).transmute();
|
||||
let items = items.encode(&mut encoder).transmute();
|
||||
let offset = Point { xy: [0.0, 0.0] };
|
||||
let simple_group = SimpleGroup {
|
||||
n_items,
|
||||
bboxes,
|
||||
items,
|
||||
offset,
|
||||
};
|
||||
simple_group.encode_to(&mut encoder.buf_mut()[0..SimpleGroup::fixed_size()]);
|
||||
let root_item = PietItem::Group(simple_group);
|
||||
root_item.encode_to(&mut encoder.buf_mut()[0..PietItem::fixed_size()]);
|
||||
// We should avoid this clone.
|
||||
encoder.buf().to_owned()
|
||||
}
|
||||
|
@ -66,39 +73,80 @@ fn dump_scene(buf: &[u8]) {
|
|||
}
|
||||
}
|
||||
|
||||
fn dump_k1_data(k1_buf: &[u32]) {
|
||||
for i in 0..k1_buf.len() {
|
||||
if k1_buf[i] != 0 {
|
||||
println!("{:4x}: {:8x}", i, k1_buf[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let instance = VkInstance::new().unwrap();
|
||||
unsafe {
|
||||
let device = instance.device().unwrap();
|
||||
let mem_flags = MemFlags::host_coherent();
|
||||
let host = MemFlags::host_coherent();
|
||||
let dev = MemFlags::device_local();
|
||||
let scene = make_scene();
|
||||
//dump_scene(&scene);
|
||||
let scene_buf = device
|
||||
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, mem_flags)
|
||||
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, host)
|
||||
.unwrap();
|
||||
let scene_dev = device
|
||||
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev)
|
||||
.unwrap();
|
||||
device.write_buffer(&scene_buf, &scene).unwrap();
|
||||
let tilegroup_buf = device.create_buffer(384 * 1024, host).unwrap();
|
||||
let image_buf = device
|
||||
.create_buffer((WIDTH * HEIGHT * 4) as u64, mem_flags)
|
||||
.create_buffer((WIDTH * HEIGHT * 4) as u64, host)
|
||||
.unwrap();
|
||||
let image_dev = device
|
||||
.create_buffer((WIDTH * HEIGHT * 4) as u64, dev)
|
||||
.unwrap();
|
||||
|
||||
let k1_code = include_bytes!("../shader/kernel1.spv");
|
||||
let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 2).unwrap();
|
||||
let k1_ds = device
|
||||
.create_descriptor_set(&k1_pipeline, &[&scene_dev, &tilegroup_buf])
|
||||
.unwrap();
|
||||
|
||||
let code = include_bytes!("../shader/image.spv");
|
||||
let pipeline = device.create_simple_compute_pipeline(code, 2).unwrap();
|
||||
let descriptor_set = device
|
||||
.create_descriptor_set(&pipeline, &[&scene_buf, &image_buf])
|
||||
.create_descriptor_set(&pipeline, &[&scene_dev, &image_dev])
|
||||
.unwrap();
|
||||
let query_pool = device.create_query_pool(2).unwrap();
|
||||
let query_pool = device.create_query_pool(3).unwrap();
|
||||
let mut cmd_buf = device.create_cmd_buf().unwrap();
|
||||
cmd_buf.begin();
|
||||
cmd_buf.copy_buffer(&scene_buf, &scene_dev);
|
||||
cmd_buf.clear_buffer(&tilegroup_buf);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.write_timestamp(&query_pool, 0);
|
||||
cmd_buf.dispatch(
|
||||
&k1_pipeline,
|
||||
&k1_ds,
|
||||
((WIDTH / 512) as u32, (HEIGHT / 512) as u32, 1),
|
||||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 1);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.dispatch(
|
||||
&pipeline,
|
||||
&descriptor_set,
|
||||
((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
|
||||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 1);
|
||||
cmd_buf.write_timestamp(&query_pool, 2);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.copy_buffer(&image_dev, &image_buf);
|
||||
cmd_buf.finish();
|
||||
device.run_cmd_buf(&cmd_buf).unwrap();
|
||||
let timestamps = device.reap_query_pool(query_pool).unwrap();
|
||||
println!("Render time: {:.3}ms", timestamps[0] * 1e3);
|
||||
println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3);
|
||||
println!("Render time: {:.3}ms", (timestamps[1] - timestamps[0]) * 1e3);
|
||||
|
||||
let mut k1_data: Vec<u32> = Default::default();
|
||||
device.read_buffer(&tilegroup_buf, &mut k1_data).unwrap();
|
||||
dump_k1_data(&k1_data);
|
||||
|
||||
let mut img_data: Vec<u8> = Default::default();
|
||||
// Note: because png can use a `&[u8]` slice, we could avoid an extra copy
|
||||
// (probably passing a slice into a closure). But for now: keep it simple.
|
||||
|
|
Loading…
Reference in a new issue