Merge branch 'master' into master

This commit is contained in:
Brian Merchant 2020-04-21 15:18:51 -07:00 committed by GitHub
commit 818d5b2047
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 274 additions and 24 deletions

View file

@ -300,6 +300,7 @@ fn gen_struct_write(
fields: &[(String, usize, LayoutType)],
) {
writeln!(r, "void {}_write({}Ref ref, {} s) {{", name, name, name).unwrap();
writeln!(r, " uint ix = ref.offset >> 2;").unwrap();
let coverage = crate::layout::struct_coverage(fields, true);
for (i, field_ixs) in coverage.iter().enumerate() {
@ -374,7 +375,7 @@ fn gen_struct_write(
}
if !pieces.is_empty() {
write!(r, " {}[{}] = ", bufname, i).unwrap();
write!(r, " {}[ix + {}] = ", bufname, i).unwrap();
for (j, piece) in pieces.iter().enumerate() {
if j != 0 {
write!(r, " | ").unwrap();

View file

@ -71,6 +71,10 @@ pub trait CmdBuf<D: Device> {
unsafe fn memory_barrier(&mut self);
unsafe fn clear_buffer(&self, buffer: &D::Buffer);
unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer);
unsafe fn write_timestamp(&mut self, pool: &D::QueryPool, query: u32);
}

View file

@ -142,7 +142,11 @@ impl crate::Device for VkDevice {
let buffer = device.create_buffer(
&vk::BufferCreateInfo::builder()
.size(size)
.usage(vk::BufferUsageFlags::STORAGE_BUFFER)
.usage(
vk::BufferUsageFlags::STORAGE_BUFFER
| vk::BufferUsageFlags::TRANSFER_SRC
| vk::BufferUsageFlags::TRANSFER_DST,
)
.sharing_mode(vk::SharingMode::EXCLUSIVE),
None,
)?;
@ -443,6 +447,22 @@ impl crate::CmdBuf<VkDevice> for CmdBuf {
);
}
unsafe fn clear_buffer(&self, buffer: &Buffer) {
let device = &self.device.device;
device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, vk::WHOLE_SIZE, 0);
}
unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) {
let device = &self.device.device;
let size = src.size.min(dst.size);
device.cmd_copy_buffer(
self.cmd_buf,
src.buffer,
dst.buffer,
&[vk::BufferCopy::builder().size(size).build()],
);
}
unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) {
let device = &self.device.device;
device.cmd_write_timestamp(

View file

@ -2,3 +2,4 @@ pub mod encoder;
pub mod ptcl;
pub mod scene;
pub mod test;
pub mod tilegroup;

View file

@ -5,6 +5,7 @@ fn main() {
.expect("provide a module name");
match mod_name.as_str() {
"scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()),
"tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()),
"ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()),
"test" => print!("{}", piet_gpu_types::test::gen_gpu_test()),
_ => println!("Oops, unknown module name"),

View file

@ -8,8 +8,7 @@ piet_gpu! {
#[rust_encode]
mod scene {
struct Bbox {
// TODO: this should be i16
bbox: [u16; 4],
bbox: [i16; 4],
}
struct Point {
xy: [f32; 2],
@ -19,6 +18,7 @@ piet_gpu! {
// Note: both of the following items are actually arrays
items: Ref<PietItem>,
bboxes: Ref<Bbox>,
offset: Point,
}
struct PietCircle {
rgba_color: u32,
@ -45,6 +45,7 @@ piet_gpu! {
points: Ref<Point>,
}
enum PietItem {
Group(SimpleGroup),
Circle(PietCircle),
Line(PietStrokeLine),
Fill(PietFill),

View file

@ -0,0 +1,18 @@
use piet_gpu_derive::piet_gpu;
piet_gpu! {
#[gpu_write]
mod tilegroup {
struct Instance {
// Note: a better type would be `Ref<PietItem>` but to do that we
// would need cross-module references. Punt for now.
item_ref: u32,
// A better type would be Point.
offset: [f32; 2],
}
enum TileGroup {
Instance(Instance),
End,
}
}
}

View file

@ -8,3 +8,5 @@ rule glsl
command = $glslang_validator -V -o $out $in
build image.spv: glsl image.comp | scene.h
build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h

View file

@ -32,7 +32,7 @@ void main() {
// which is horribly wasteful, but the goal is to get *some* output and
// then optimize.
SimpleGroup group = SimpleGroup_read(SimpleGroupRef(0));
SimpleGroup group = PietItem_Group_read(PietItemRef(0));
for (uint i = 0; i < group.n_items; i++) {
PietItemRef item_ref = PietItem_index(group.items, i);
uint tag = PietItem_tag(item_ref);

Binary file not shown.

View file

@ -0,0 +1,83 @@
#version 450
#extension GL_GOOGLE_include_directive : enable
// It's possible we should lay this out with x and do our own math.
layout(local_size_x = 1, local_size_y = 32) in;
layout(set = 0, binding = 0) readonly buffer SceneBuf {
uint[] scene;
};
layout(set = 0, binding = 1) buffer TilegroupBuf {
uint[] tilegroup;
};
#include "scene.h"
#include "tilegroup.h"
// TODO: compute this
#define WIDTH_IN_TILEGROUPS 4
#define TILEGROUP_WIDTH 512
#define TILEGROUP_HEIGHT 16
#define INITIAL_ALLOC 1024
#define MAX_STACK 8
struct StackElement {
PietItemRef group;
uint index;
vec2 offset;
};
void main() {
StackElement stack[MAX_STACK];
uint stack_ix = 0;
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x;
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * INITIAL_ALLOC);
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH, TILEGROUP_HEIGHT);
PietItemRef root = PietItemRef(0);
SimpleGroup group = PietItem_Group_read(root);
StackElement tos = StackElement(root, 0, group.offset.xy);
while (true) {
if (tos.index < group.n_items) {
Bbox bbox = Bbox_read(Bbox_index(group.bboxes, tos.index));
vec4 bb = vec4(bbox.bbox) + tos.offset.xyxy;
bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH))
&& max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT));
bool is_group = false;
if (hit) {
PietItemRef item_ref = PietItem_index(group.items, tos.index);
is_group = PietItem_tag(item_ref) == PietItem_Group;
}
if (hit && !is_group) {
PietItemRef item_ref = PietItem_index(group.items, tos.index);
Instance ins = Instance(item_ref.offset, tos.offset);
TileGroup_Instance_write(tg_ref, ins);
tg_ref.offset += TileGroup_size;
// TODO: bump allocate if allocation exceeded
}
if (is_group) {
PietItemRef item_ref = PietItem_index(group.items, tos.index);
tos.index++;
if (tos.index < group.n_items) {
stack[stack_ix++] = tos;
}
group = PietItem_Group_read(item_ref);
tos = StackElement(item_ref, 0, tos.offset + group.offset.xy);
} else {
tos.index++;
}
} else {
// processed all items in this group; pop the stack
if (stack_ix == 0) {
break;
}
tos = stack[--stack_ix];
group = PietItem_Group_read(tos.group);
}
}
TileGroup_End_write(tg_ref);
}

BIN
piet-gpu/shader/kernel1.spv Normal file

Binary file not shown.

View file

@ -33,7 +33,7 @@ struct PietItemRef {
};
struct Bbox {
uvec4 bbox;
ivec4 bbox;
};
#define Bbox_size 8
@ -56,9 +56,10 @@ struct SimpleGroup {
uint n_items;
PietItemRef items;
BboxRef bboxes;
Point offset;
};
#define SimpleGroup_size 12
#define SimpleGroup_size 20
SimpleGroupRef SimpleGroup_index(SimpleGroupRef ref, uint index) {
return SimpleGroupRef(ref.offset + index * SimpleGroup_size);
@ -116,10 +117,11 @@ PietStrokePolyLineRef PietStrokePolyLine_index(PietStrokePolyLineRef ref, uint i
return PietStrokePolyLineRef(ref.offset + index * PietStrokePolyLine_size);
}
#define PietItem_Circle 0
#define PietItem_Line 1
#define PietItem_Fill 2
#define PietItem_Poly 3
#define PietItem_Group 0
#define PietItem_Circle 1
#define PietItem_Line 2
#define PietItem_Fill 3
#define PietItem_Poly 4
#define PietItem_size 32
PietItemRef PietItem_index(PietItemRef ref, uint index) {
@ -131,7 +133,7 @@ Bbox Bbox_read(BboxRef ref) {
uint raw0 = scene[ix + 0];
uint raw1 = scene[ix + 1];
Bbox s;
s.bbox = uvec4(raw0 & 0xffff, raw0 >> 16, raw1 & 0xffff, raw1 >> 16);
s.bbox = ivec4(int(raw0 << 16) >> 16, int(raw0) >> 16, int(raw1 << 16) >> 16, int(raw1) >> 16);
return s;
}
@ -153,6 +155,7 @@ SimpleGroup SimpleGroup_read(SimpleGroupRef ref) {
s.n_items = raw0;
s.items = PietItemRef(raw1);
s.bboxes = BboxRef(raw2);
s.offset = Point_read(PointRef(ref.offset + 12));
return s;
}
@ -213,6 +216,10 @@ uint PietItem_tag(PietItemRef ref) {
return scene[ref.offset >> 2];
}
SimpleGroup PietItem_Group_read(PietItemRef ref) {
return SimpleGroup_read(SimpleGroupRef(ref.offset + 4));
}
PietCircle PietItem_Circle_read(PietItemRef ref) {
return PietCircle_read(PietCircleRef(ref.offset + 4));
}

View file

@ -0,0 +1,64 @@
// Code auto-generated by piet-gpu-derive
struct InstanceRef {
uint offset;
};
struct TileGroupRef {
uint offset;
};
struct Instance {
uint item_ref;
vec2 offset;
};
#define Instance_size 12
InstanceRef Instance_index(InstanceRef ref, uint index) {
return InstanceRef(ref.offset + index * Instance_size);
}
#define TileGroup_Instance 0
#define TileGroup_End 1
#define TileGroup_size 16
TileGroupRef TileGroup_index(TileGroupRef ref, uint index) {
return TileGroupRef(ref.offset + index * TileGroup_size);
}
Instance Instance_read(InstanceRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = tilegroup[ix + 0];
uint raw1 = tilegroup[ix + 1];
uint raw2 = tilegroup[ix + 2];
Instance s;
s.item_ref = raw0;
s.offset = vec2(uintBitsToFloat(raw1), uintBitsToFloat(raw2));
return s;
}
void Instance_write(InstanceRef ref, Instance s) {
uint ix = ref.offset >> 2;
tilegroup[ix + 0] = s.item_ref;
tilegroup[ix + 1] = floatBitsToUint(s.offset.x);
tilegroup[ix + 2] = floatBitsToUint(s.offset.y);
}
uint TileGroup_tag(TileGroupRef ref) {
return tilegroup[ref.offset >> 2];
}
Instance TileGroup_Instance_read(TileGroupRef ref) {
return Instance_read(InstanceRef(ref.offset + 4));
}
void TileGroup_Instance_write(TileGroupRef ref, Instance s) {
tilegroup[ref.offset >> 2] = TileGroup_Instance;
Instance_write(InstanceRef(ref.offset + 4), s);
}
void TileGroup_End_write(TileGroupRef ref) {
tilegroup[ref.offset >> 2] = TileGroup_End;
}

View file

@ -21,7 +21,7 @@ const N_CIRCLES: usize = 100;
fn make_scene() -> Vec<u8> {
let mut rng = rand::thread_rng();
let mut encoder = Encoder::new();
let _reserve_root = encoder.alloc_chunk(SimpleGroup::fixed_size() as u32);
let _reserve_root = encoder.alloc_chunk(PietItem::fixed_size() as u32);
let mut items = Vec::new();
let mut bboxes = Vec::new();
@ -36,23 +36,30 @@ fn make_scene() -> Vec<u8> {
},
radius: rng.gen_range(0.0, 50.0),
};
items.push(PietItem::Circle(circle));
let bbox = Bbox {
// TODO: real bbox
bbox: [0, 0, 0, 0],
bbox: [
(circle.center.xy[0] - circle.radius).floor() as i16,
(circle.center.xy[1] - circle.radius).floor() as i16,
(circle.center.xy[0] + circle.radius).ceil() as i16,
(circle.center.xy[1] + circle.radius).ceil() as i16,
],
};
items.push(PietItem::Circle(circle));
bboxes.push(bbox);
}
let n_items = bboxes.len() as u32;
let bboxes = bboxes.encode(&mut encoder).transmute();
let items = items.encode(&mut encoder).transmute();
let offset = Point { xy: [0.0, 0.0] };
let simple_group = SimpleGroup {
n_items,
bboxes,
items,
offset,
};
simple_group.encode_to(&mut encoder.buf_mut()[0..SimpleGroup::fixed_size()]);
let root_item = PietItem::Group(simple_group);
root_item.encode_to(&mut encoder.buf_mut()[0..PietItem::fixed_size()]);
// We should avoid this clone.
encoder.buf().to_owned()
}
@ -66,39 +73,80 @@ fn dump_scene(buf: &[u8]) {
}
}
fn dump_k1_data(k1_buf: &[u32]) {
for i in 0..k1_buf.len() {
if k1_buf[i] != 0 {
println!("{:4x}: {:8x}", i, k1_buf[i]);
}
}
}
fn main() {
let instance = VkInstance::new().unwrap();
unsafe {
let device = instance.device().unwrap();
let mem_flags = MemFlags::host_coherent();
let host = MemFlags::host_coherent();
let dev = MemFlags::device_local();
let scene = make_scene();
//dump_scene(&scene);
let scene_buf = device
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, mem_flags)
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, host)
.unwrap();
let scene_dev = device
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev)
.unwrap();
device.write_buffer(&scene_buf, &scene).unwrap();
let tilegroup_buf = device.create_buffer(384 * 1024, host).unwrap();
let image_buf = device
.create_buffer((WIDTH * HEIGHT * 4) as u64, mem_flags)
.create_buffer((WIDTH * HEIGHT * 4) as u64, host)
.unwrap();
let image_dev = device
.create_buffer((WIDTH * HEIGHT * 4) as u64, dev)
.unwrap();
let k1_code = include_bytes!("../shader/kernel1.spv");
let k1_pipeline = device.create_simple_compute_pipeline(k1_code, 2).unwrap();
let k1_ds = device
.create_descriptor_set(&k1_pipeline, &[&scene_dev, &tilegroup_buf])
.unwrap();
let code = include_bytes!("../shader/image.spv");
let pipeline = device.create_simple_compute_pipeline(code, 2).unwrap();
let descriptor_set = device
.create_descriptor_set(&pipeline, &[&scene_buf, &image_buf])
.create_descriptor_set(&pipeline, &[&scene_dev, &image_dev])
.unwrap();
let query_pool = device.create_query_pool(2).unwrap();
let query_pool = device.create_query_pool(3).unwrap();
let mut cmd_buf = device.create_cmd_buf().unwrap();
cmd_buf.begin();
cmd_buf.copy_buffer(&scene_buf, &scene_dev);
cmd_buf.clear_buffer(&tilegroup_buf);
cmd_buf.memory_barrier();
cmd_buf.write_timestamp(&query_pool, 0);
cmd_buf.dispatch(
&k1_pipeline,
&k1_ds,
((WIDTH / 512) as u32, (HEIGHT / 512) as u32, 1),
);
cmd_buf.write_timestamp(&query_pool, 1);
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&pipeline,
&descriptor_set,
((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
);
cmd_buf.write_timestamp(&query_pool, 1);
cmd_buf.write_timestamp(&query_pool, 2);
cmd_buf.memory_barrier();
cmd_buf.copy_buffer(&image_dev, &image_buf);
cmd_buf.finish();
device.run_cmd_buf(&cmd_buf).unwrap();
let timestamps = device.reap_query_pool(query_pool).unwrap();
println!("Render time: {:.3}ms", timestamps[0] * 1e3);
println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3);
println!("Render time: {:.3}ms", (timestamps[1] - timestamps[0]) * 1e3);
let mut k1_data: Vec<u32> = Default::default();
device.read_buffer(&tilegroup_buf, &mut k1_data).unwrap();
dump_k1_data(&k1_data);
let mut img_data: Vec<u8> = Default::default();
// Note: because png can use a `&[u8]` slice, we could avoid an extra copy
// (probably passing a slice into a closure). But for now: keep it simple.