mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-09 20:31:29 +11:00
Squeeze pipeline to fit
This commit reduces the workgroup shared memory of binning to fit in 16k (by packing two u16's in a u32), and moves the config binding to uniform, from readonly storage. Progress toward #202
This commit is contained in:
parent
2bd75ac86f
commit
e8f8ebbd14
|
@ -9,7 +9,7 @@ struct Tile {
|
|||
#import config
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
var<uniform> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage, read_write> tiles: array<Tile>;
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#import tile
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
var<uniform> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> paths: array<Path>;
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
#import config
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
var<uniform> config: Config;
|
||||
|
||||
struct PathBbox {
|
||||
x0: i32,
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#import bump
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
var<uniform> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> draw_monoids: array<DrawMonoid>;
|
||||
|
@ -46,9 +46,11 @@ let SY = 0.00390625;
|
|||
let WG_SIZE = 256u;
|
||||
let N_SLICE = 8u;
|
||||
//let N_SLICE = WG_SIZE / 32u;
|
||||
let N_SUBSLICE = 4u;
|
||||
|
||||
var<workgroup> sh_bitmaps: array<array<atomic<u32>, N_TILE>, N_SLICE>;
|
||||
var<workgroup> sh_count: array<array<u32, N_TILE>, N_SLICE>;
|
||||
// store count values packed two u16's to a u32
|
||||
var<workgroup> sh_count: array<array<u32, N_TILE>, N_SUBSLICE>;
|
||||
var<workgroup> sh_chunk_offset: array<u32, N_TILE>;
|
||||
|
||||
@compute @workgroup_size(256)
|
||||
|
@ -115,9 +117,13 @@ fn main(
|
|||
workgroupBarrier();
|
||||
// Allocate output segments
|
||||
var element_count = 0u;
|
||||
for (var i = 0u; i < N_SLICE; i += 1u) {
|
||||
element_count += countOneBits(atomicLoad(&sh_bitmaps[i][local_id.x]));
|
||||
sh_count[i][local_id.x] = element_count;
|
||||
for (var i = 0u; i < N_SUBSLICE; i += 1u) {
|
||||
element_count += countOneBits(atomicLoad(&sh_bitmaps[i * 2u][local_id.x]));
|
||||
let element_count_lo = element_count;
|
||||
element_count += countOneBits(atomicLoad(&sh_bitmaps[i * 2u + 1u][local_id.x]));
|
||||
let element_count_hi = element_count;
|
||||
let element_count_packed = element_count_lo | (element_count_hi << 16u);
|
||||
sh_count[i][local_id.x] = element_count_packed;
|
||||
}
|
||||
// element_count is the number of draw objects covering this thread's bin
|
||||
let chunk_offset = atomicAdd(&bump.binning, element_count);
|
||||
|
@ -136,7 +142,9 @@ fn main(
|
|||
if (out_mask & my_mask) != 0u {
|
||||
var idx = countOneBits(out_mask & (my_mask - 1u));
|
||||
if my_slice > 0u {
|
||||
idx += sh_count[my_slice - 1u][bin_ix];
|
||||
let count_ix = my_slice - 1u;
|
||||
let count_packed = sh_count[count_ix / 2u][bin_ix];
|
||||
idx += (count_packed >> (16u * (count_ix & 1u))) & 0xffffu;
|
||||
}
|
||||
let offset = sh_chunk_offset[bin_ix];
|
||||
bin_data[offset + idx] = element_ix;
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#import drawtag
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
var<uniform> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> clip_inp: array<ClipInp>;
|
||||
|
|
|
@ -5,7 +5,7 @@
|
|||
#import clip
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
var<uniform> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> clip_inp: array<ClipInp>;
|
||||
|
|
|
@ -9,7 +9,7 @@
|
|||
#import tile
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
var<uniform> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> scene: array<u32>;
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#import bbox
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
var<uniform> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> scene: array<u32>;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#import drawtag
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
var<uniform> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> scene: array<u32>;
|
||||
|
|
|
@ -13,7 +13,7 @@ struct Tile {
|
|||
#import config
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
var<uniform> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> tiles: array<Tile>;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#import pathtag
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
var<uniform> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> scene: array<u32>;
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#import bump
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
var<uniform> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> scene: array<u32>;
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
#import cubic
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
var<uniform> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> scene: array<u32>;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#import pathtag
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
var<uniform> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> scene: array<u32>;
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
#import pathtag
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
var<uniform> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> scene: array<u32>;
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
#import tile
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
var<uniform> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> scene: array<u32>;
|
||||
|
|
|
@ -86,6 +86,7 @@ pub enum ExternalResource<'a> {
|
|||
|
||||
pub enum Command {
|
||||
Upload(BufProxy, Vec<u8>),
|
||||
UploadUniform(BufProxy, Vec<u8>),
|
||||
UploadImage(ImageProxy, Vec<u8>),
|
||||
// Discussion question: third argument is vec of resources?
|
||||
// Maybe use tricks to make more ergonomic?
|
||||
|
@ -107,6 +108,8 @@ pub enum BindType {
|
|||
Buffer,
|
||||
/// A storage buffer with read only access.
|
||||
BufReadOnly,
|
||||
/// A small storage buffer to be used as uniforms.
|
||||
Uniform,
|
||||
/// A storage image.
|
||||
Image(ImageFormat),
|
||||
/// A storage image with read only access.
|
||||
|
@ -158,6 +161,16 @@ impl Engine {
|
|||
},
|
||||
count: None,
|
||||
},
|
||||
BindType::Uniform => wgpu::BindGroupLayoutEntry {
|
||||
binding: i as u32,
|
||||
visibility: wgpu::ShaderStages::COMPUTE,
|
||||
ty: wgpu::BindingType::Buffer {
|
||||
ty: wgpu::BufferBindingType::Uniform,
|
||||
has_dynamic_offset: false,
|
||||
min_binding_size: None,
|
||||
},
|
||||
count: None,
|
||||
},
|
||||
BindType::Image(format) | BindType::ImageRead(format) => {
|
||||
wgpu::BindGroupLayoutEntry {
|
||||
binding: i as u32,
|
||||
|
@ -229,6 +242,14 @@ impl Engine {
|
|||
});
|
||||
bind_map.insert_buf(buf_proxy.id, buf);
|
||||
}
|
||||
Command::UploadUniform(buf_proxy, bytes) => {
|
||||
let buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
|
||||
label: None,
|
||||
contents: &bytes,
|
||||
usage: wgpu::BufferUsages::UNIFORM,
|
||||
});
|
||||
bind_map.insert_buf(buf_proxy.id, buf);
|
||||
}
|
||||
Command::UploadImage(image_proxy, bytes) => {
|
||||
let buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
|
||||
label: None,
|
||||
|
@ -329,6 +350,13 @@ impl Recording {
|
|||
buf_proxy
|
||||
}
|
||||
|
||||
pub fn upload_uniform(&mut self, data: impl Into<Vec<u8>>) -> BufProxy {
|
||||
let data = data.into();
|
||||
let buf_proxy = BufProxy::new(data.len() as u64);
|
||||
self.push(Command::UploadUniform(buf_proxy, data));
|
||||
buf_proxy
|
||||
}
|
||||
|
||||
pub fn upload_image(
|
||||
&mut self,
|
||||
width: u32,
|
||||
|
|
|
@ -85,7 +85,7 @@ fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
|
|||
..Default::default()
|
||||
};
|
||||
let scene_buf = recording.upload(scene);
|
||||
let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned());
|
||||
let config_buf = recording.upload_uniform(bytemuck::bytes_of(&config));
|
||||
|
||||
let reduced_buf = BufProxy::new(pathtag_wgs as u64 * TAG_MONOID_SIZE);
|
||||
// TODO: really only need pathtag_wgs - 1
|
||||
|
@ -231,7 +231,7 @@ pub fn render_full(
|
|||
};
|
||||
// println!("{:?}", config);
|
||||
let scene_buf = ResourceProxy::Buf(recording.upload(scene));
|
||||
let config_buf = ResourceProxy::Buf(recording.upload(bytemuck::bytes_of(&config).to_owned()));
|
||||
let config_buf = ResourceProxy::Buf(recording.upload_uniform(bytemuck::bytes_of(&config)));
|
||||
|
||||
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
|
||||
let reduced_buf = ResourceProxy::new_buf(pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE);
|
||||
|
|
|
@ -71,17 +71,13 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Err
|
|||
let pathtag_reduce = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(shader!("pathtag_reduce"), &empty, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Buffer,
|
||||
],
|
||||
&[BindType::Uniform, BindType::BufReadOnly, BindType::Buffer],
|
||||
)?;
|
||||
let pathtag_scan = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(shader!("pathtag_scan"), &empty, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::Uniform,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Buffer,
|
||||
|
@ -94,7 +90,7 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Err
|
|||
device,
|
||||
preprocess::preprocess(shader!("path_coarse"), &path_coarse_config, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::Uniform,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Buffer,
|
||||
|
@ -104,13 +100,13 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Err
|
|||
let backdrop = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(shader!("backdrop"), &empty, &imports).into(),
|
||||
&[BindType::BufReadOnly, BindType::Buffer],
|
||||
&[BindType::Uniform, BindType::Buffer],
|
||||
)?;
|
||||
let fine = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(shader!("fine"), &empty, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::Uniform,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Buffer,
|
||||
|
@ -136,17 +132,13 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
|
|||
let pathtag_reduce = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(shader!("pathtag_reduce"), &full_config, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Buffer,
|
||||
],
|
||||
&[BindType::Uniform, BindType::BufReadOnly, BindType::Buffer],
|
||||
)?;
|
||||
let pathtag_scan = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(shader!("pathtag_scan"), &full_config, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::Uniform,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Buffer,
|
||||
|
@ -155,13 +147,13 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
|
|||
let bbox_clear = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(shader!("bbox_clear"), &empty, &imports).into(),
|
||||
&[BindType::BufReadOnly, BindType::Buffer],
|
||||
&[BindType::Uniform, BindType::Buffer],
|
||||
)?;
|
||||
let pathseg = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(shader!("pathseg"), &full_config, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::Uniform,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Buffer,
|
||||
|
@ -171,17 +163,13 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
|
|||
let draw_reduce = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(shader!("draw_reduce"), &empty, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Buffer,
|
||||
],
|
||||
&[BindType::Uniform, BindType::BufReadOnly, BindType::Buffer],
|
||||
)?;
|
||||
let draw_leaf = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(shader!("draw_leaf"), &empty, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::Uniform,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
|
@ -194,7 +182,7 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
|
|||
device,
|
||||
preprocess::preprocess(shader!("clip_reduce"), &empty, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::Uniform,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Buffer,
|
||||
|
@ -205,7 +193,7 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
|
|||
device,
|
||||
preprocess::preprocess(shader!("clip_leaf"), &empty, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::Uniform,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
|
@ -218,7 +206,7 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
|
|||
device,
|
||||
preprocess::preprocess(shader!("binning"), &empty, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::Uniform,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
|
@ -232,7 +220,7 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
|
|||
device,
|
||||
preprocess::preprocess(shader!("tile_alloc"), &empty, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::Uniform,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Buffer,
|
||||
|
@ -245,7 +233,7 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
|
|||
device,
|
||||
preprocess::preprocess(shader!("path_coarse_full"), &full_config, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::Uniform,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
|
@ -258,17 +246,13 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
|
|||
let backdrop = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(shader!("backdrop_dyn"), &empty, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Buffer,
|
||||
],
|
||||
&[BindType::Uniform, BindType::BufReadOnly, BindType::Buffer],
|
||||
)?;
|
||||
let coarse = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(shader!("coarse"), &empty, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::Uniform,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
|
@ -284,7 +268,7 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
|
|||
device,
|
||||
preprocess::preprocess(shader!("fine"), &full_config, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::Uniform,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Image(ImageFormat::Rgba8),
|
||||
|
|
Loading…
Reference in a new issue