mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-09 20:31:29 +11:00
Prototype of buffer reuse
This helps performance but not all performance issues have been resolved. Nontrivial CPU goes into write_buffer, and it's also possible that there isn't enough overlapping between CPU and GPU work.
This commit is contained in:
parent
a9aa3f9cab
commit
4907186de4
|
@ -399,8 +399,8 @@ fn main(
|
||||||
}
|
}
|
||||||
workgroupBarrier();
|
workgroupBarrier();
|
||||||
}
|
}
|
||||||
if bin_tile_x < config.width_in_tiles && bin_tile_y < config.height_in_tiles {
|
if bin_tile_x + tile_x < config.width_in_tiles && bin_tile_y + tile_y < config.height_in_tiles {
|
||||||
//ptcl[cmd_offset] = CMD_END;
|
ptcl[cmd_offset] = CMD_END;
|
||||||
// TODO: blend stack allocation
|
// TODO: blend stack allocation
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
100
src/engine.rs
100
src/engine.rs
|
@ -26,7 +26,7 @@ use parking_lot::RawMutex;
|
||||||
use wgpu::{
|
use wgpu::{
|
||||||
util::DeviceExt, BindGroup, BindGroupLayout, Buffer, BufferAsyncError, BufferSlice, BufferView,
|
util::DeviceExt, BindGroup, BindGroupLayout, Buffer, BufferAsyncError, BufferSlice, BufferView,
|
||||||
ComputePipeline, Device, Queue, Texture, TextureAspect, TextureFormat, TextureUsages,
|
ComputePipeline, Device, Queue, Texture, TextureAspect, TextureFormat, TextureUsages,
|
||||||
TextureView, TextureViewDimension,
|
TextureView, TextureViewDimension, BufferUsages,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub type Error = Box<dyn std::error::Error>;
|
pub type Error = Box<dyn std::error::Error>;
|
||||||
|
@ -41,6 +41,7 @@ static ID_COUNTER: AtomicU64 = AtomicU64::new(0);
|
||||||
|
|
||||||
pub struct Engine {
|
pub struct Engine {
|
||||||
shaders: Vec<Shader>,
|
shaders: Vec<Shader>,
|
||||||
|
pool: ResourcePool,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Shader {
|
struct Shader {
|
||||||
|
@ -123,9 +124,17 @@ struct BindMap {
|
||||||
image_map: HashMap<Id, (Texture, TextureView)>,
|
image_map: HashMap<Id, (Texture, TextureView)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
struct ResourcePool {
|
||||||
|
bufs: HashMap<(u64, BufferUsages), Vec<Buffer>>,
|
||||||
|
}
|
||||||
|
|
||||||
impl Engine {
|
impl Engine {
|
||||||
pub fn new() -> Engine {
|
pub fn new() -> Engine {
|
||||||
Engine { shaders: vec![] }
|
Engine {
|
||||||
|
shaders: vec![],
|
||||||
|
pool: Default::default(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add a shader.
|
/// Add a shader.
|
||||||
|
@ -233,21 +242,18 @@ impl Engine {
|
||||||
for command in &recording.commands {
|
for command in &recording.commands {
|
||||||
match command {
|
match command {
|
||||||
Command::Upload(buf_proxy, bytes) => {
|
Command::Upload(buf_proxy, bytes) => {
|
||||||
let buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
|
let usage = BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE;
|
||||||
label: None,
|
let buf = self.pool.get_buf(bytes.len() as u64, usage, device);
|
||||||
contents: &bytes,
|
// TODO: if buffer is newly created, might be better to make it mapped at creation
|
||||||
usage: wgpu::BufferUsages::STORAGE
|
// and copy. However, we expect reuse will be most common.
|
||||||
| wgpu::BufferUsages::COPY_DST
|
queue.write_buffer(&buf, 0, bytes);
|
||||||
| wgpu::BufferUsages::COPY_SRC,
|
|
||||||
});
|
|
||||||
bind_map.insert_buf(buf_proxy.id, buf);
|
bind_map.insert_buf(buf_proxy.id, buf);
|
||||||
}
|
}
|
||||||
Command::UploadUniform(buf_proxy, bytes) => {
|
Command::UploadUniform(buf_proxy, bytes) => {
|
||||||
let buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
|
let usage = BufferUsages::UNIFORM | BufferUsages::COPY_DST;
|
||||||
label: None,
|
// Same consideration as above
|
||||||
contents: &bytes,
|
let buf = self.pool.get_buf(bytes.len() as u64, usage, device);
|
||||||
usage: wgpu::BufferUsages::UNIFORM,
|
queue.write_buffer(&buf, 0, bytes);
|
||||||
});
|
|
||||||
bind_map.insert_buf(buf_proxy.id, buf);
|
bind_map.insert_buf(buf_proxy.id, buf);
|
||||||
}
|
}
|
||||||
Command::UploadImage(image_proxy, bytes) => {
|
Command::UploadImage(image_proxy, bytes) => {
|
||||||
|
@ -310,6 +316,7 @@ impl Engine {
|
||||||
&shader.bind_group_layout,
|
&shader.bind_group_layout,
|
||||||
bindings,
|
bindings,
|
||||||
external_resources,
|
external_resources,
|
||||||
|
&mut self.pool,
|
||||||
)?;
|
)?;
|
||||||
let mut cpass = encoder.begin_compute_pass(&Default::default());
|
let mut cpass = encoder.begin_compute_pass(&Default::default());
|
||||||
cpass.set_pipeline(&shader.pipeline);
|
cpass.set_pipeline(&shader.pipeline);
|
||||||
|
@ -328,12 +335,13 @@ impl Engine {
|
||||||
downloads.buf_map.insert(proxy.id, buf);
|
downloads.buf_map.insert(proxy.id, buf);
|
||||||
}
|
}
|
||||||
Command::Clear(proxy, offset, size) => {
|
Command::Clear(proxy, offset, size) => {
|
||||||
let buffer = bind_map.get_or_create(*proxy, device)?;
|
let buffer = bind_map.get_or_create(*proxy, device, &mut self.pool)?;
|
||||||
encoder.clear_buffer(buffer, *offset, *size);
|
encoder.clear_buffer(buffer, *offset, *size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
queue.submit(Some(encoder.finish()));
|
queue.submit(Some(encoder.finish()));
|
||||||
|
self.pool.reap_bindmap(bind_map);
|
||||||
Ok(downloads)
|
Ok(downloads)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -481,6 +489,7 @@ impl BindMap {
|
||||||
layout: &BindGroupLayout,
|
layout: &BindGroupLayout,
|
||||||
bindings: &[ResourceProxy],
|
bindings: &[ResourceProxy],
|
||||||
external_resources: &[ExternalResource],
|
external_resources: &[ExternalResource],
|
||||||
|
pool: &mut ResourcePool,
|
||||||
) -> Result<BindGroup, Error> {
|
) -> Result<BindGroup, Error> {
|
||||||
// These functions are ugly and linear, but the remap array should generally be
|
// These functions are ugly and linear, but the remap array should generally be
|
||||||
// small. Should find a better solution for this.
|
// small. Should find a better solution for this.
|
||||||
|
@ -519,14 +528,8 @@ impl BindMap {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if let Entry::Vacant(v) = self.buf_map.entry(proxy.id) {
|
if let Entry::Vacant(v) = self.buf_map.entry(proxy.id) {
|
||||||
let buf = device.create_buffer(&wgpu::BufferDescriptor {
|
let usage = BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE;
|
||||||
label: None,
|
let buf = pool.get_buf(proxy.size, usage, device);
|
||||||
size: proxy.size,
|
|
||||||
usage: wgpu::BufferUsages::STORAGE
|
|
||||||
| wgpu::BufferUsages::COPY_DST
|
|
||||||
| wgpu::BufferUsages::COPY_SRC,
|
|
||||||
mapped_at_creation: false,
|
|
||||||
});
|
|
||||||
v.insert(buf);
|
v.insert(buf);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -595,18 +598,12 @@ impl BindMap {
|
||||||
Ok(bind_group)
|
Ok(bind_group)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_or_create(&mut self, proxy: BufProxy, device: &Device) -> Result<&Buffer, Error> {
|
fn get_or_create(&mut self, proxy: BufProxy, device: &Device, pool: &mut ResourcePool) -> Result<&Buffer, Error> {
|
||||||
match self.buf_map.entry(proxy.id) {
|
match self.buf_map.entry(proxy.id) {
|
||||||
Entry::Occupied(occupied) => Ok(occupied.into_mut()),
|
Entry::Occupied(occupied) => Ok(occupied.into_mut()),
|
||||||
Entry::Vacant(vacant) => {
|
Entry::Vacant(vacant) => {
|
||||||
let buf = device.create_buffer(&wgpu::BufferDescriptor {
|
let usage = BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE;
|
||||||
label: None,
|
let buf = pool.get_buf(proxy.size, usage, device);
|
||||||
size: proxy.size,
|
|
||||||
usage: wgpu::BufferUsages::STORAGE
|
|
||||||
| wgpu::BufferUsages::COPY_DST
|
|
||||||
| wgpu::BufferUsages::COPY_SRC,
|
|
||||||
mapped_at_creation: false,
|
|
||||||
});
|
|
||||||
Ok(vacant.insert(buf))
|
Ok(vacant.insert(buf))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -648,3 +645,42 @@ impl<'a> DownloadsMapped<'a> {
|
||||||
Ok(slice.get_mapped_range())
|
Ok(slice.get_mapped_range())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const SIZE_CLASS_BITS: u32 = 1;
|
||||||
|
|
||||||
|
impl ResourcePool {
|
||||||
|
/// Get a buffer from the pool or create one.
|
||||||
|
fn get_buf(&mut self, size: u64, usage: BufferUsages, device: &Device) -> Buffer {
|
||||||
|
let rounded_size = Self::size_class(size, SIZE_CLASS_BITS);
|
||||||
|
if let Some(buf_vec) = self.bufs.get_mut(&(rounded_size, usage)) {
|
||||||
|
if let Some(buf) = buf_vec.pop() {
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
device.create_buffer(&wgpu::BufferDescriptor {
|
||||||
|
label: None,
|
||||||
|
size: rounded_size,
|
||||||
|
usage,
|
||||||
|
mapped_at_creation: false,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn reap_bindmap(&mut self, bind_map: BindMap) {
|
||||||
|
for (_id, buf) in bind_map.buf_map {
|
||||||
|
let size = buf.size();
|
||||||
|
let usage = buf.usage();
|
||||||
|
self.bufs.entry((size, usage)).or_default().push(buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Quantize a size up to the nearest size class.
|
||||||
|
fn size_class(x: u64, bits: u32) -> u64 {
|
||||||
|
if x > 1 << bits {
|
||||||
|
let a = (x - 1).leading_zeros();
|
||||||
|
let b = (x - 1) | (((u64::MAX / 2) >> bits) >> a);
|
||||||
|
b + 1
|
||||||
|
} else {
|
||||||
|
1 << bits
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue