mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Binning stage
Adds a binning stage. This is a first draft, and a number of loose ends exist.
This commit is contained in:
parent
736f883f66
commit
343e4c3075
19
piet-gpu-types/src/bins.rs
Normal file
19
piet-gpu-types/src/bins.rs
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
use piet_gpu_derive::piet_gpu;
|
||||||
|
|
||||||
|
// The output of the binning stage, organized as a linked list of chunks.
|
||||||
|
|
||||||
|
piet_gpu! {
|
||||||
|
#[gpu_write]
|
||||||
|
mod bins {
|
||||||
|
struct BinInstance {
|
||||||
|
element_ix: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct BinChunk {
|
||||||
|
// First chunk can have n = 0, subsequent ones not.
|
||||||
|
n: u32,
|
||||||
|
next: Ref<BinChunk>,
|
||||||
|
// Instances follow
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,4 +1,7 @@
|
||||||
|
// Structures used only internally probably don't need to be pub.
|
||||||
|
|
||||||
pub mod annotated;
|
pub mod annotated;
|
||||||
|
pub mod bins;
|
||||||
pub mod encoder;
|
pub mod encoder;
|
||||||
pub mod fill_seg;
|
pub mod fill_seg;
|
||||||
pub mod ptcl;
|
pub mod ptcl;
|
||||||
|
|
|
@ -7,6 +7,7 @@ fn main() {
|
||||||
"scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()),
|
"scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()),
|
||||||
"state" => print!("{}", piet_gpu_types::state::gen_gpu_state()),
|
"state" => print!("{}", piet_gpu_types::state::gen_gpu_state()),
|
||||||
"annotated" => print!("{}", piet_gpu_types::annotated::gen_gpu_annotated()),
|
"annotated" => print!("{}", piet_gpu_types::annotated::gen_gpu_annotated()),
|
||||||
|
"bins" => print!("{}", piet_gpu_types::bins::gen_gpu_bins()),
|
||||||
"tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()),
|
"tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()),
|
||||||
"segment" => print!("{}", piet_gpu_types::segment::gen_gpu_segment()),
|
"segment" => print!("{}", piet_gpu_types::segment::gen_gpu_segment()),
|
||||||
"fill_seg" => print!("{}", piet_gpu_types::fill_seg::gen_gpu_fill_seg()),
|
"fill_seg" => print!("{}", piet_gpu_types::fill_seg::gen_gpu_fill_seg()),
|
||||||
|
|
|
@ -41,7 +41,7 @@ fn main() -> Result<(), Error> {
|
||||||
|
|
||||||
let fence = device.create_fence(false)?;
|
let fence = device.create_fence(false)?;
|
||||||
let mut cmd_buf = device.create_cmd_buf()?;
|
let mut cmd_buf = device.create_cmd_buf()?;
|
||||||
let query_pool = device.create_query_pool(2)?;
|
let query_pool = device.create_query_pool(3)?;
|
||||||
|
|
||||||
let mut ctx = PietGpuRenderContext::new();
|
let mut ctx = PietGpuRenderContext::new();
|
||||||
render_scene(&mut ctx);
|
render_scene(&mut ctx);
|
||||||
|
@ -58,13 +58,14 @@ fn main() -> Result<(), Error> {
|
||||||
cmd_buf.finish();
|
cmd_buf.finish();
|
||||||
device.run_cmd_buf(&cmd_buf, &[], &[], Some(&fence))?;
|
device.run_cmd_buf(&cmd_buf, &[], &[], Some(&fence))?;
|
||||||
device.wait_and_reset(&[fence])?;
|
device.wait_and_reset(&[fence])?;
|
||||||
let timestamps = device.reap_query_pool(&query_pool).unwrap();
|
let ts = device.reap_query_pool(&query_pool).unwrap();
|
||||||
println!("Element kernel time: {:.3}ms", timestamps[0] * 1e3);
|
println!("Element kernel time: {:.3}ms", ts[0] * 1e3);
|
||||||
|
println!("Binning kernel time: {:.3}ms", (ts[1] - ts[0]) * 1e3);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
let mut data: Vec<u8> = Default::default();
|
let mut data: Vec<u32> = Default::default();
|
||||||
device.read_buffer(&renderer.state_buf, &mut data).unwrap();
|
device.read_buffer(&renderer.bin_buf, &mut data).unwrap();
|
||||||
dump_state(&data);
|
piet_gpu::dump_k1_data(&data);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
let mut img_data: Vec<u8> = Default::default();
|
let mut img_data: Vec<u8> = Default::default();
|
||||||
|
|
169
piet-gpu/shader/binning.comp
Normal file
169
piet-gpu/shader/binning.comp
Normal file
|
@ -0,0 +1,169 @@
|
||||||
|
// The binning stage of the pipeline.
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#define N_ROWS 4
|
||||||
|
#define WG_SIZE 32
|
||||||
|
#define LG_WG_SIZE 5
|
||||||
|
#define TILE_SIZE (WG_SIZE * N_ROWS)
|
||||||
|
|
||||||
|
// TODO: move these to setup file
|
||||||
|
#define N_TILE_X 16
|
||||||
|
#define N_TILE_Y 16
|
||||||
|
#define N_TILE (N_TILE_X * N_TILE_Y)
|
||||||
|
#define N_SLICE (N_TILE / 32)
|
||||||
|
#define N_WG 16 // Number of workgroups, should be 1 per SM
|
||||||
|
|
||||||
|
#define BIN_INITIAL_ALLOC 64
|
||||||
|
#define BIN_ALLOC 256
|
||||||
|
|
||||||
|
layout(local_size_x = N_TILE, local_size_y = 1) in;
|
||||||
|
|
||||||
|
layout(set = 0, binding = 0) buffer AnnotatedBuf {
|
||||||
|
uint[] annotated;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(set = 0, binding = 1) buffer AllocBuf {
|
||||||
|
uint n_elements;
|
||||||
|
// Will be incremented atomically to claim tiles
|
||||||
|
uint tile_ix;
|
||||||
|
uint alloc;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(set = 0, binding = 2) buffer BinsBuf {
|
||||||
|
uint[] bins;
|
||||||
|
};
|
||||||
|
|
||||||
|
#include "annotated.h"
|
||||||
|
#include "bins.h"
|
||||||
|
#include "setup.h"
|
||||||
|
|
||||||
|
// scale factors useful for converting coordinates to bins
|
||||||
|
#define SX (1.0 / float(N_TILE_X * TILE_WIDTH_PX))
|
||||||
|
#define SY (1.0 / float(N_TILE_Y * TILE_HEIGHT_PX))
|
||||||
|
|
||||||
|
// Note: cudaraster has N_TILE + 1 to cut down on bank conflicts.
|
||||||
|
shared uint bitmaps[N_SLICE][N_TILE];
|
||||||
|
shared uint sh_my_tile;
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
BinChunkRef chunk_ref = BinChunkRef((gl_LocalInvocationID.x * N_WG + gl_WorkGroupID.x) * BIN_INITIAL_ALLOC);
|
||||||
|
uint chunk_limit = chunk_ref.offset + BIN_INITIAL_ALLOC - BinInstance_size;
|
||||||
|
uint chunk_n = 0;
|
||||||
|
BinInstanceRef instance_ref = BinInstanceRef(chunk_ref.offset + BinChunk_size);
|
||||||
|
while (true) {
|
||||||
|
if (gl_LocalInvocationID.x == 0) {
|
||||||
|
sh_my_tile = atomicAdd(tile_ix, 1);
|
||||||
|
}
|
||||||
|
barrier();
|
||||||
|
uint my_tile = sh_my_tile;
|
||||||
|
if (my_tile * N_TILE >= n_elements) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint i = 0; i < N_SLICE; i++) {
|
||||||
|
bitmaps[i][gl_LocalInvocationID.x] = 0;
|
||||||
|
}
|
||||||
|
barrier();
|
||||||
|
|
||||||
|
// Read inputs and determine coverage of bins
|
||||||
|
uint element_ix = my_tile * N_TILE + gl_LocalInvocationID.x;
|
||||||
|
AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size);
|
||||||
|
uint tag = Annotated_tag(ref);
|
||||||
|
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
|
||||||
|
switch (tag) {
|
||||||
|
case Annotated_Line:
|
||||||
|
AnnoLineSeg line = Annotated_Line_read(ref);
|
||||||
|
x0 = int(floor((min(line.p0.x, line.p1.x) - line.stroke.x) * SX));
|
||||||
|
y0 = int(floor((min(line.p0.y, line.p1.y) - line.stroke.y) * SY));
|
||||||
|
x1 = int(ceil((max(line.p0.x, line.p1.x) + line.stroke.x) * SX));
|
||||||
|
y1 = int(ceil((max(line.p0.y, line.p1.y) + line.stroke.y) * SY));
|
||||||
|
break;
|
||||||
|
case Annotated_Fill:
|
||||||
|
case Annotated_Stroke:
|
||||||
|
// Note: we take advantage of the fact that fills and strokes
|
||||||
|
// have compatible layout.
|
||||||
|
AnnoFill fill = Annotated_Fill_read(ref);
|
||||||
|
x0 = int(floor(fill.bbox.x * SX));
|
||||||
|
y0 = int(floor(fill.bbox.y * SY));
|
||||||
|
x1 = int(ceil(fill.bbox.z * SX));
|
||||||
|
y1 = int(ceil(fill.bbox.w * SY));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// At this point, we run an iterator over the coverage area,
|
||||||
|
// trying to keep divergence low.
|
||||||
|
// Right now, it's just a bbox, but we'll get finer with
|
||||||
|
// segments.
|
||||||
|
x0 = clamp(x0, 0, N_TILE_X);
|
||||||
|
x1 = clamp(x1, x0, N_TILE_X);
|
||||||
|
y0 = clamp(y0, 0, N_TILE_Y);
|
||||||
|
y1 = clamp(y1, y0, N_TILE_Y);
|
||||||
|
if (x0 == x1) y1 = y0;
|
||||||
|
int x = x0, y = y0;
|
||||||
|
uint my_slice = gl_LocalInvocationID.x / 32;
|
||||||
|
uint my_mask = 1 << (gl_LocalInvocationID.x & 31);
|
||||||
|
while (y < y1) {
|
||||||
|
atomicOr(bitmaps[my_slice][y * N_TILE_X + x], my_mask);
|
||||||
|
x++;
|
||||||
|
if (x == x1) {
|
||||||
|
x = x0;
|
||||||
|
y++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
barrier();
|
||||||
|
// Allocate output segments.
|
||||||
|
uint element_count = 0;
|
||||||
|
for (uint i = 0; i < N_SLICE; i++) {
|
||||||
|
element_count += bitCount(bitmaps[i][gl_LocalInvocationID.x]);
|
||||||
|
}
|
||||||
|
// element_count is number of elements covering bin for this invocation.
|
||||||
|
if (element_count > 0 && chunk_n > 0) {
|
||||||
|
uint new_chunk = instance_ref.offset;
|
||||||
|
if (new_chunk + min(32, element_count * 4) > chunk_limit) {
|
||||||
|
new_chunk = atomicAdd(alloc, BIN_ALLOC);
|
||||||
|
chunk_limit = new_chunk + BIN_ALLOC - BinInstance_size;
|
||||||
|
}
|
||||||
|
BinChunk_write(chunk_ref, BinChunk(chunk_n, BinChunkRef(new_chunk)));
|
||||||
|
chunk_ref = BinChunkRef(new_chunk);
|
||||||
|
instance_ref = BinInstanceRef(new_chunk + BinChunk_size);
|
||||||
|
chunk_n = 0;
|
||||||
|
}
|
||||||
|
// TODO: allocate output here
|
||||||
|
|
||||||
|
// Iterate over bits set.
|
||||||
|
uint slice_ix = 0;
|
||||||
|
uint bitmap = bitmaps[0][gl_LocalInvocationID.x];
|
||||||
|
while (true) {
|
||||||
|
if (bitmap == 0) {
|
||||||
|
slice_ix++;
|
||||||
|
if (slice_ix == N_SLICE) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
bitmap = bitmaps[slice_ix][gl_LocalInvocationID.x];
|
||||||
|
if (bitmap == 0) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
element_ix = my_tile * N_TILE + slice_ix * 32 + findLSB(bitmap);
|
||||||
|
// At this point, element_ix refers to an element that covers this bin.
|
||||||
|
|
||||||
|
// TODO: batch allocated based on element_count; this is divergent
|
||||||
|
if (instance_ref.offset > chunk_limit) {
|
||||||
|
uint new_chunk = atomicAdd(alloc, BIN_ALLOC);
|
||||||
|
BinChunk_write(chunk_ref, BinChunk(chunk_n, BinChunkRef(new_chunk)));
|
||||||
|
chunk_ref = BinChunkRef(new_chunk);
|
||||||
|
instance_ref = BinInstanceRef(new_chunk + BinChunk_size);
|
||||||
|
chunk_n = 0;
|
||||||
|
chunk_limit = new_chunk + BIN_ALLOC - BinInstance_size;
|
||||||
|
}
|
||||||
|
BinInstance_write(instance_ref, BinInstance(element_ix));
|
||||||
|
chunk_n++;
|
||||||
|
instance_ref.offset += BinInstance_size;
|
||||||
|
// clear LSB
|
||||||
|
bitmap &= bitmap - 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
BinChunk_write(chunk_ref, BinChunk(chunk_n, BinChunkRef(0)));
|
||||||
|
}
|
BIN
piet-gpu/shader/binning.spv
Normal file
BIN
piet-gpu/shader/binning.spv
Normal file
Binary file not shown.
60
piet-gpu/shader/bins.h
Normal file
60
piet-gpu/shader/bins.h
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
// Code auto-generated by piet-gpu-derive
|
||||||
|
|
||||||
|
struct BinInstanceRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BinChunkRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct BinInstance {
|
||||||
|
uint element_ix;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define BinInstance_size 4
|
||||||
|
|
||||||
|
BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) {
|
||||||
|
return BinInstanceRef(ref.offset + index * BinInstance_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct BinChunk {
|
||||||
|
uint n;
|
||||||
|
BinChunkRef next;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define BinChunk_size 8
|
||||||
|
|
||||||
|
BinChunkRef BinChunk_index(BinChunkRef ref, uint index) {
|
||||||
|
return BinChunkRef(ref.offset + index * BinChunk_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
BinInstance BinInstance_read(BinInstanceRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = bins[ix + 0];
|
||||||
|
BinInstance s;
|
||||||
|
s.element_ix = raw0;
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinInstance_write(BinInstanceRef ref, BinInstance s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
bins[ix + 0] = s.element_ix;
|
||||||
|
}
|
||||||
|
|
||||||
|
BinChunk BinChunk_read(BinChunkRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = bins[ix + 0];
|
||||||
|
uint raw1 = bins[ix + 1];
|
||||||
|
BinChunk s;
|
||||||
|
s.n = raw0;
|
||||||
|
s.next = BinChunkRef(raw1);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BinChunk_write(BinChunkRef ref, BinChunk s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
bins[ix + 0] = s.n;
|
||||||
|
bins[ix + 1] = s.next.offset;
|
||||||
|
}
|
||||||
|
|
|
@ -20,4 +20,6 @@ build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h segment.h fill_seg.h
|
||||||
build kernel4.spv: glsl kernel4.comp | ptcl.h segment.h fill_seg.h setup.h
|
build kernel4.spv: glsl kernel4.comp | ptcl.h segment.h fill_seg.h setup.h
|
||||||
|
|
||||||
|
|
||||||
build elements.spv: glsl elements.comp | scene.h state.h
|
build elements.spv: glsl elements.comp | scene.h state.h annotated.h
|
||||||
|
|
||||||
|
build binning.spv: glsl binning.comp | annotated.h setup.h
|
||||||
|
|
|
@ -1,3 +1,9 @@
|
||||||
|
// The element processing stage, first in the pipeline.
|
||||||
|
//
|
||||||
|
// This stage is primarily about applying transforms and computing bounding
|
||||||
|
// boxes. It is organized as a scan over the input elements, producing
|
||||||
|
// annotated output elements.
|
||||||
|
|
||||||
#version 450
|
#version 450
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
@ -208,6 +214,13 @@ void main() {
|
||||||
anno_stroke.linewidth = st.linewidth * sqrt(st.mat.x * st.mat.w - st.mat.y * st.mat.z);
|
anno_stroke.linewidth = st.linewidth * sqrt(st.mat.x * st.mat.w - st.mat.y * st.mat.z);
|
||||||
Annotated_Stroke_write(out_ref, anno_stroke);
|
Annotated_Stroke_write(out_ref, anno_stroke);
|
||||||
break;
|
break;
|
||||||
|
case Element_Fill:
|
||||||
|
Fill fill = Element_Fill_read(this_ref);
|
||||||
|
AnnoFill anno_fill;
|
||||||
|
anno_fill.rgba_color = fill.rgba_color;
|
||||||
|
anno_fill.bbox = st.bbox;
|
||||||
|
Annotated_Fill_write(out_ref, anno_fill);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
Annotated_Nop_write(out_ref);
|
Annotated_Nop_write(out_ref);
|
||||||
break;
|
break;
|
||||||
|
|
Binary file not shown.
|
@ -32,6 +32,8 @@ const K2_PER_TILE_SIZE: usize = 8;
|
||||||
|
|
||||||
const N_CIRCLES: usize = 1;
|
const N_CIRCLES: usize = 1;
|
||||||
|
|
||||||
|
const N_WG: u32 = 16;
|
||||||
|
|
||||||
pub fn render_scene(rc: &mut impl RenderContext) {
|
pub fn render_scene(rc: &mut impl RenderContext) {
|
||||||
let mut rng = rand::thread_rng();
|
let mut rng = rand::thread_rng();
|
||||||
for _ in 0..N_CIRCLES {
|
for _ in 0..N_CIRCLES {
|
||||||
|
@ -98,10 +100,10 @@ fn dump_scene(buf: &[u8]) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
fn dump_k1_data(k1_buf: &[u32]) {
|
pub fn dump_k1_data(k1_buf: &[u32]) {
|
||||||
for i in 0..k1_buf.len() {
|
for i in 0..k1_buf.len() {
|
||||||
if k1_buf[i] != 0 {
|
if k1_buf[i] != 0 {
|
||||||
println!("{:4x}: {:8x}", i, k1_buf[i]);
|
println!("{:4x}: {:8x}", i * 4, k1_buf[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -114,10 +116,17 @@ pub struct Renderer<D: Device> {
|
||||||
|
|
||||||
pub state_buf: D::Buffer,
|
pub state_buf: D::Buffer,
|
||||||
pub anno_buf: D::Buffer,
|
pub anno_buf: D::Buffer,
|
||||||
|
pub bin_buf: D::Buffer,
|
||||||
|
|
||||||
el_pipeline: D::Pipeline,
|
el_pipeline: D::Pipeline,
|
||||||
el_ds: D::DescriptorSet,
|
el_ds: D::DescriptorSet,
|
||||||
|
|
||||||
|
bin_pipeline: D::Pipeline,
|
||||||
|
bin_ds: D::DescriptorSet,
|
||||||
|
|
||||||
|
bin_alloc_buf_host: D::Buffer,
|
||||||
|
bin_alloc_buf_dev: D::Buffer,
|
||||||
|
|
||||||
/*
|
/*
|
||||||
k1_alloc_buf_host: D::Buffer,
|
k1_alloc_buf_host: D::Buffer,
|
||||||
k1_alloc_buf_dev: D::Buffer,
|
k1_alloc_buf_dev: D::Buffer,
|
||||||
|
@ -149,6 +158,9 @@ impl<D: Device> Renderer<D> {
|
||||||
let host = MemFlags::host_coherent();
|
let host = MemFlags::host_coherent();
|
||||||
let dev = MemFlags::device_local();
|
let dev = MemFlags::device_local();
|
||||||
|
|
||||||
|
let n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size();
|
||||||
|
println!("scene: {} elements", n_elements);
|
||||||
|
|
||||||
let scene_buf = device
|
let scene_buf = device
|
||||||
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, host)
|
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, host)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
@ -159,6 +171,7 @@ impl<D: Device> Renderer<D> {
|
||||||
|
|
||||||
let state_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
let state_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
||||||
let anno_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
let anno_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
||||||
|
let bin_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
||||||
let image_dev = device.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?;
|
let image_dev = device.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?;
|
||||||
|
|
||||||
let el_code = include_bytes!("../shader/elements.spv");
|
let el_code = include_bytes!("../shader/elements.spv");
|
||||||
|
@ -169,8 +182,25 @@ impl<D: Device> Renderer<D> {
|
||||||
&[],
|
&[],
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size();
|
let bin_alloc_buf_host = device.create_buffer(12, host)?;
|
||||||
println!("scene: {} elements", n_elements);
|
let bin_alloc_buf_dev = device.create_buffer(12, dev)?;
|
||||||
|
|
||||||
|
// TODO: constants
|
||||||
|
let bin_alloc_start = 256 * 64 * N_WG;
|
||||||
|
device
|
||||||
|
.write_buffer(&bin_alloc_buf_host, &[
|
||||||
|
n_elements as u32,
|
||||||
|
0,
|
||||||
|
bin_alloc_start,
|
||||||
|
])
|
||||||
|
?;
|
||||||
|
let bin_code = include_bytes!("../shader/binning.spv");
|
||||||
|
let bin_pipeline = device.create_simple_compute_pipeline(bin_code, 3, 0)?;
|
||||||
|
let bin_ds = device.create_descriptor_set(
|
||||||
|
&bin_pipeline,
|
||||||
|
&[&anno_buf, &bin_alloc_buf_dev, &bin_buf],
|
||||||
|
&[],
|
||||||
|
)?;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev)?;
|
let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev)?;
|
||||||
|
@ -253,14 +283,20 @@ impl<D: Device> Renderer<D> {
|
||||||
image_dev,
|
image_dev,
|
||||||
el_pipeline,
|
el_pipeline,
|
||||||
el_ds,
|
el_ds,
|
||||||
|
bin_pipeline,
|
||||||
|
bin_ds,
|
||||||
state_buf,
|
state_buf,
|
||||||
anno_buf,
|
anno_buf,
|
||||||
|
bin_buf,
|
||||||
|
bin_alloc_buf_host,
|
||||||
|
bin_alloc_buf_dev,
|
||||||
n_elements,
|
n_elements,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn record(&self, cmd_buf: &mut impl CmdBuf<D>, query_pool: &D::QueryPool) {
|
pub unsafe fn record(&self, cmd_buf: &mut impl CmdBuf<D>, query_pool: &D::QueryPool) {
|
||||||
cmd_buf.copy_buffer(&self.scene_buf, &self.scene_dev);
|
cmd_buf.copy_buffer(&self.scene_buf, &self.scene_dev);
|
||||||
|
cmd_buf.copy_buffer(&self.bin_alloc_buf_host, &self.bin_alloc_buf_dev);
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
cmd_buf.image_barrier(
|
cmd_buf.image_barrier(
|
||||||
&self.image_dev,
|
&self.image_dev,
|
||||||
|
@ -276,6 +312,13 @@ impl<D: Device> Renderer<D> {
|
||||||
);
|
);
|
||||||
cmd_buf.write_timestamp(&query_pool, 1);
|
cmd_buf.write_timestamp(&query_pool, 1);
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
|
cmd_buf.dispatch(
|
||||||
|
&self.bin_pipeline,
|
||||||
|
&self.bin_ds,
|
||||||
|
(N_WG, 1, 1),
|
||||||
|
);
|
||||||
|
cmd_buf.write_timestamp(&query_pool, 2);
|
||||||
|
cmd_buf.memory_barrier();
|
||||||
cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc);
|
cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue