mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Unify scene buffer
All streams of the scene are combined into a single buffer. This is very much like existing piet-gpu, however the various outputs from the compute stages (whether computed on CPU or GPU) will retain their separate bindings, which is more native to WGSL. There's a touch of ergonomics loss, in particular when we do transforms we'll need to unmarshal them by hand, but I think overall not too bad.
This commit is contained in:
parent
5c6ec1efa3
commit
7ac327c684
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
|
@ -8,5 +8,6 @@
|
||||||
"pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl",
|
"pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl",
|
||||||
"ptcl": "${workspaceFolder}/piet-wgsl/shader/shared/ptcl.wgsl"
|
"ptcl": "${workspaceFolder}/piet-wgsl/shader/shared/ptcl.wgsl"
|
||||||
},
|
},
|
||||||
"wgsl-analyzer.diagnostics.nagaVersion": "main"
|
"wgsl-analyzer.diagnostics.nagaVersion": "main",
|
||||||
|
"wgsl-analyzer.preprocessor.shaderDefs": ["full"]
|
||||||
}
|
}
|
||||||
|
|
|
@ -44,6 +44,17 @@ var<storage, read_write> output: array<u32>;
|
||||||
|
|
||||||
@group(0) @binding(4)
|
@group(0) @binding(4)
|
||||||
var<storage> ptcl: array<u32>;
|
var<storage> ptcl: array<u32>;
|
||||||
|
|
||||||
|
fn read_fill(cmd_ix: u32) -> CmdFill {
|
||||||
|
let tile = ptcl[cmd_ix + 1u];
|
||||||
|
let backdrop = i32(ptcl[cmd_ix + 2u]);
|
||||||
|
return CmdFill(tile, backdrop);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_color(cmd_ix: u32) -> CmdColor {
|
||||||
|
let rgba_color = ptcl[cmd_ix + 1u];
|
||||||
|
return CmdColor(rgba_color);
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
let PIXELS_PER_THREAD = 4u;
|
let PIXELS_PER_THREAD = 4u;
|
||||||
|
@ -103,7 +114,53 @@ fn main(
|
||||||
let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x;
|
let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x;
|
||||||
let xy = vec2<f32>(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y));
|
let xy = vec2<f32>(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y));
|
||||||
let tile = tiles[tile_ix];
|
let tile = tiles[tile_ix];
|
||||||
|
#ifdef full
|
||||||
|
var rgba: array<vec4<f32>, PIXELS_PER_THREAD>;
|
||||||
|
var area: array<f32, PIXELS_PER_THREAD>;
|
||||||
|
var cmd_ix = tile_ix * PTCL_INITIAL_ALLOC;
|
||||||
|
|
||||||
|
// main interpretation loop
|
||||||
|
while true {
|
||||||
|
let tag = ptcl[cmd_ix];
|
||||||
|
if tag == CMD_END {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
switch tag {
|
||||||
|
// CMD_FILL
|
||||||
|
case 1u: {
|
||||||
|
let fill = read_fill(cmd_ix);
|
||||||
|
let tile = Tile(fill.backdrop, fill.tile);
|
||||||
|
area = fill_path(tile, xy);
|
||||||
|
cmd_ix += 3u;
|
||||||
|
}
|
||||||
|
// CMD_SOLID
|
||||||
|
case 3u: {
|
||||||
|
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||||
|
area[i] = 1.0;
|
||||||
|
}
|
||||||
|
cmd_ix += 1u;
|
||||||
|
}
|
||||||
|
// CMD_COLOR
|
||||||
|
case 5u: {
|
||||||
|
let color = read_color(cmd_ix);
|
||||||
|
let fg = unpack4x8unorm(color.rgba_color);
|
||||||
|
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||||
|
let fg_i = fg * area[i];
|
||||||
|
rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i;
|
||||||
|
}
|
||||||
|
cmd_ix += 1u;
|
||||||
|
}
|
||||||
|
// CMD_JUMP
|
||||||
|
case 11u: {
|
||||||
|
cmd_ix = ptcl[cmd_ix + 1u];
|
||||||
|
}
|
||||||
|
default: {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
let area = fill_path(tile, xy);
|
let area = fill_path(tile, xy);
|
||||||
|
#endif
|
||||||
|
|
||||||
let bytes = pack4x8unorm(vec4<f32>(area[0], area[1], area[2], area[3]));
|
let bytes = pack4x8unorm(vec4<f32>(area[0], area[1], area[2], area[3]));
|
||||||
let out_ix = global_id.y * (config.width_in_tiles * 4u) + global_id.x;
|
let out_ix = global_id.y * (config.width_in_tiles * 4u) + global_id.x;
|
||||||
|
|
|
@ -14,24 +14,22 @@
|
||||||
//
|
//
|
||||||
// Also licensed under MIT license, at your choice.
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
#import config
|
||||||
#import pathtag
|
#import pathtag
|
||||||
|
|
||||||
@group(0) @binding(0)
|
@group(0) @binding(0)
|
||||||
var<storage> path_tags: array<u32>;
|
var<storage> config: Config;
|
||||||
|
|
||||||
@group(0) @binding(1)
|
@group(0) @binding(1)
|
||||||
var<storage> tag_monoids: array<TagMonoid>;
|
var<storage> scene: array<u32>;
|
||||||
|
|
||||||
// TODO: should probably have single "scene" binding.
|
|
||||||
@group(0) @binding(2)
|
@group(0) @binding(2)
|
||||||
var<storage> path_data: array<u32>;
|
var<storage> tag_monoids: array<TagMonoid>;
|
||||||
|
|
||||||
#ifdef cubics_out
|
#ifdef cubics_out
|
||||||
@group(0) @binding(3)
|
@group(0) @binding(3)
|
||||||
var<storage, read_write> output: array<vec2<f32>>;
|
var<storage, read_write> output: array<vec2<f32>>;
|
||||||
#else
|
#else
|
||||||
#import config
|
|
||||||
|
|
||||||
struct Tile {
|
struct Tile {
|
||||||
backdrop: atomic<i32>,
|
backdrop: atomic<i32>,
|
||||||
segments: atomic<u32>,
|
segments: atomic<u32>,
|
||||||
|
@ -39,25 +37,23 @@ struct Tile {
|
||||||
|
|
||||||
#import segment
|
#import segment
|
||||||
|
|
||||||
// Should probably be uniform binding
|
|
||||||
@group(0) @binding(3)
|
@group(0) @binding(3)
|
||||||
var<storage> config: Config;
|
|
||||||
|
|
||||||
@group(0) @binding(4)
|
|
||||||
var<storage, read_write> tiles: array<Tile>;
|
var<storage, read_write> tiles: array<Tile>;
|
||||||
|
|
||||||
@group(0) @binding(5)
|
@group(0) @binding(4)
|
||||||
var<storage, read_write> segments: array<Segment>;
|
var<storage, read_write> segments: array<Segment>;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
var<private> pathdata_base: u32;
|
||||||
|
|
||||||
fn read_f32_point(ix: u32) -> vec2<f32> {
|
fn read_f32_point(ix: u32) -> vec2<f32> {
|
||||||
let x = bitcast<f32>(path_data[ix]);
|
let x = bitcast<f32>(scene[pathdata_base + ix]);
|
||||||
let y = bitcast<f32>(path_data[ix + 1u]);
|
let y = bitcast<f32>(scene[pathdata_base + ix + 1u]);
|
||||||
return vec2<f32>(x, y);
|
return vec2<f32>(x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_i16_point(ix: u32) -> vec2<f32> {
|
fn read_i16_point(ix: u32) -> vec2<f32> {
|
||||||
let raw = path_data[ix];
|
let raw = scene[pathdata_base + ix];
|
||||||
let x = f32(i32(raw << 16u) >> 16u);
|
let x = f32(i32(raw << 16u) >> 16u);
|
||||||
let y = f32(i32(raw) >> 16u);
|
let y = f32(i32(raw) >> 16u);
|
||||||
return vec2<f32>(x, y);
|
return vec2<f32>(x, y);
|
||||||
|
@ -133,7 +129,8 @@ fn main(
|
||||||
) {
|
) {
|
||||||
// Obtain exclusive prefix sum of tag monoid
|
// Obtain exclusive prefix sum of tag monoid
|
||||||
let ix = global_id.x;
|
let ix = global_id.x;
|
||||||
let tag_word = path_tags[ix >> 2u];
|
let tag_word = scene[config.pathtag_base + (ix >> 2u)];
|
||||||
|
pathdata_base = config.pathdata_base;
|
||||||
let shift = (ix & 3u) * 8u;
|
let shift = (ix & 3u) * 8u;
|
||||||
var tm = reduce_tag(tag_word & ((1u << shift) - 1u));
|
var tm = reduce_tag(tag_word & ((1u << shift) - 1u));
|
||||||
tm = combine_tag_monoid(tag_monoids[ix >> 2u], tm);
|
tm = combine_tag_monoid(tag_monoids[ix >> 2u], tm);
|
||||||
|
|
|
@ -14,14 +14,16 @@
|
||||||
//
|
//
|
||||||
// Also licensed under MIT license, at your choice.
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
#import config
|
||||||
#import pathtag
|
#import pathtag
|
||||||
|
|
||||||
// Note: should have a single scene binding, path_tags are a slice
|
|
||||||
// in that; need a config uniform.
|
|
||||||
@group(0) @binding(0)
|
@group(0) @binding(0)
|
||||||
var<storage> path_tags: array<u32>;
|
var<storage> config: Config;
|
||||||
|
|
||||||
@group(0) @binding(1)
|
@group(0) @binding(1)
|
||||||
|
var<storage> scene: array<u32>;
|
||||||
|
|
||||||
|
@group(0) @binding(2)
|
||||||
var<storage, read_write> reduced: array<TagMonoid>;
|
var<storage, read_write> reduced: array<TagMonoid>;
|
||||||
|
|
||||||
let LG_WG_SIZE = 8u;
|
let LG_WG_SIZE = 8u;
|
||||||
|
@ -35,7 +37,7 @@ fn main(
|
||||||
@builtin(local_invocation_id) local_id: vec3<u32>,
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||||
) {
|
) {
|
||||||
let ix = global_id.x;
|
let ix = global_id.x;
|
||||||
let tag_word = path_tags[ix];
|
let tag_word = scene[config.pathtag_base + ix];
|
||||||
var agg = reduce_tag(tag_word);
|
var agg = reduce_tag(tag_word);
|
||||||
sh_scratch[local_id.x] = agg;
|
sh_scratch[local_id.x] = agg;
|
||||||
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||||
|
|
|
@ -14,15 +14,19 @@
|
||||||
//
|
//
|
||||||
// Also licensed under MIT license, at your choice.
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
#import config
|
||||||
#import pathtag
|
#import pathtag
|
||||||
|
|
||||||
@group(0) @binding(0)
|
@group(0) @binding(0)
|
||||||
var<storage> path_tags: array<u32>;
|
var<storage> config: Config;
|
||||||
|
|
||||||
@group(0) @binding(1)
|
@group(0) @binding(1)
|
||||||
var<storage> reduced: array<TagMonoid>;
|
var<storage> scene: array<u32>;
|
||||||
|
|
||||||
@group(0) @binding(2)
|
@group(0) @binding(2)
|
||||||
|
var<storage> reduced: array<TagMonoid>;
|
||||||
|
|
||||||
|
@group(0) @binding(3)
|
||||||
var<storage, read_write> tag_monoids: array<TagMonoid>;
|
var<storage, read_write> tag_monoids: array<TagMonoid>;
|
||||||
|
|
||||||
let LG_WG_SIZE = 8u;
|
let LG_WG_SIZE = 8u;
|
||||||
|
@ -54,7 +58,7 @@ fn main(
|
||||||
}
|
}
|
||||||
|
|
||||||
let ix = global_id.x;
|
let ix = global_id.x;
|
||||||
let tag_word = path_tags[ix];
|
let tag_word = scene[config.pathtag_base + ix];
|
||||||
agg = reduce_tag(tag_word);
|
agg = reduce_tag(tag_word);
|
||||||
sh_monoid[local_id.x] = agg;
|
sh_monoid[local_id.x] = agg;
|
||||||
for (var i = 0u; i < LG_WG_SIZE; i += 1u) {
|
for (var i = 0u; i < LG_WG_SIZE; i += 1u) {
|
||||||
|
|
|
@ -22,6 +22,9 @@ struct Config {
|
||||||
|
|
||||||
// offsets within scene buffer (in u32 units)
|
// offsets within scene buffer (in u32 units)
|
||||||
// Note: this is a difference from piet-gpu, which is in bytes
|
// Note: this is a difference from piet-gpu, which is in bytes
|
||||||
|
pathtag_base: u32,
|
||||||
|
pathdata_base: u32,
|
||||||
|
|
||||||
drawtag_base: u32,
|
drawtag_base: u32,
|
||||||
drawdata_base: u32,
|
drawdata_base: u32,
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,8 @@ struct Config {
|
||||||
width_in_tiles: u32,
|
width_in_tiles: u32,
|
||||||
height_in_tiles: u32,
|
height_in_tiles: u32,
|
||||||
n_drawobj: u32,
|
n_drawobj: u32,
|
||||||
|
pathtag_base: u32,
|
||||||
|
pathdata_base: u32,
|
||||||
drawtag_base: u32,
|
drawtag_base: u32,
|
||||||
drawdata_base: u32,
|
drawdata_base: u32,
|
||||||
}
|
}
|
||||||
|
@ -29,22 +31,39 @@ pub struct PathSegment {
|
||||||
next: u32,
|
next: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn size_to_words(byte_size: usize) -> u32 {
|
||||||
|
(byte_size / std::mem::size_of::<u32>()) as u32
|
||||||
|
}
|
||||||
|
|
||||||
pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
|
pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
|
||||||
let mut recording = Recording::default();
|
let mut recording = Recording::default();
|
||||||
let data = scene.data();
|
let data = scene.data();
|
||||||
let n_pathtag = data.tag_stream.len();
|
let n_pathtag = data.tag_stream.len();
|
||||||
let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG);
|
let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG);
|
||||||
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
|
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
|
||||||
let mut tag_data: Vec<u8> = Vec::with_capacity(pathtag_padded);
|
let mut scene: Vec<u8> = Vec::with_capacity(pathtag_padded);
|
||||||
tag_data.extend(&data.tag_stream);
|
let pathtag_base = size_to_words(scene.len());
|
||||||
tag_data.resize(pathtag_padded, 0);
|
scene.extend(&data.tag_stream);
|
||||||
let pathtag_buf = recording.upload(tag_data);
|
scene.resize(pathtag_padded, 0);
|
||||||
|
let pathdata_base = size_to_words(scene.len());
|
||||||
|
scene.extend(&data.pathseg_stream);
|
||||||
|
|
||||||
|
let config = Config {
|
||||||
|
width_in_tiles: 64,
|
||||||
|
height_in_tiles: 64,
|
||||||
|
pathtag_base,
|
||||||
|
pathdata_base,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let scene_buf = recording.upload(scene);
|
||||||
|
let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned());
|
||||||
|
|
||||||
let reduced_buf = BufProxy::new(pathtag_wgs as u64 * TAG_MONOID_SIZE);
|
let reduced_buf = BufProxy::new(pathtag_wgs as u64 * TAG_MONOID_SIZE);
|
||||||
// TODO: really only need pathtag_wgs - 1
|
// TODO: really only need pathtag_wgs - 1
|
||||||
recording.dispatch(
|
recording.dispatch(
|
||||||
shaders.pathtag_reduce,
|
shaders.pathtag_reduce,
|
||||||
(pathtag_wgs as u32, 1, 1),
|
(pathtag_wgs as u32, 1, 1),
|
||||||
[pathtag_buf, reduced_buf],
|
[config_buf, scene_buf, reduced_buf],
|
||||||
);
|
);
|
||||||
|
|
||||||
let tagmonoid_buf =
|
let tagmonoid_buf =
|
||||||
|
@ -52,20 +71,11 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
|
||||||
recording.dispatch(
|
recording.dispatch(
|
||||||
shaders.pathtag_scan,
|
shaders.pathtag_scan,
|
||||||
(pathtag_wgs as u32, 1, 1),
|
(pathtag_wgs as u32, 1, 1),
|
||||||
[pathtag_buf, reduced_buf, tagmonoid_buf],
|
[config_buf, scene_buf, reduced_buf, tagmonoid_buf],
|
||||||
);
|
);
|
||||||
|
|
||||||
let path_coarse_wgs = (data.n_pathseg + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
|
let path_coarse_wgs = (data.n_pathseg + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
|
||||||
// The clone here is kinda BS, think about reducing copies
|
|
||||||
// Of course, we'll probably end up concatenating into a single scene binding.
|
|
||||||
let pathdata_buf = recording.upload(data.pathseg_stream.clone());
|
|
||||||
//let cubics_buf = BufProxy::new(data.n_pathseg as u64 * 32);
|
//let cubics_buf = BufProxy::new(data.n_pathseg as u64 * 32);
|
||||||
let config = Config {
|
|
||||||
width_in_tiles: 64,
|
|
||||||
height_in_tiles: 64,
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned());
|
|
||||||
// TODO: more principled size calc
|
// TODO: more principled size calc
|
||||||
let tiles_buf = BufProxy::new(4097 * 8);
|
let tiles_buf = BufProxy::new(4097 * 8);
|
||||||
let segments_buf = BufProxy::new(256 * 24);
|
let segments_buf = BufProxy::new(256 * 24);
|
||||||
|
@ -74,10 +84,9 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
|
||||||
shaders.path_coarse,
|
shaders.path_coarse,
|
||||||
(path_coarse_wgs, 1, 1),
|
(path_coarse_wgs, 1, 1),
|
||||||
[
|
[
|
||||||
pathtag_buf,
|
|
||||||
tagmonoid_buf,
|
|
||||||
pathdata_buf,
|
|
||||||
config_buf,
|
config_buf,
|
||||||
|
scene_buf,
|
||||||
|
tagmonoid_buf,
|
||||||
tiles_buf,
|
tiles_buf,
|
||||||
segments_buf,
|
segments_buf,
|
||||||
],
|
],
|
||||||
|
|
|
@ -44,12 +44,13 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Err
|
||||||
let pathtag_reduce = engine.add_shader(
|
let pathtag_reduce = engine.add_shader(
|
||||||
device,
|
device,
|
||||||
preprocess::preprocess(&read_shader("pathtag_reduce"), &empty, &imports).into(),
|
preprocess::preprocess(&read_shader("pathtag_reduce"), &empty, &imports).into(),
|
||||||
&[BindType::BufReadOnly, BindType::Buffer],
|
&[BindType::BufReadOnly, BindType::BufReadOnly, BindType::Buffer],
|
||||||
)?;
|
)?;
|
||||||
let pathtag_scan = engine.add_shader(
|
let pathtag_scan = engine.add_shader(
|
||||||
device,
|
device,
|
||||||
preprocess::preprocess(&read_shader("pathtag_scan"), &empty, &imports).into(),
|
preprocess::preprocess(&read_shader("pathtag_scan"), &empty, &imports).into(),
|
||||||
&[
|
&[
|
||||||
|
BindType::BufReadOnly,
|
||||||
BindType::BufReadOnly,
|
BindType::BufReadOnly,
|
||||||
BindType::BufReadOnly,
|
BindType::BufReadOnly,
|
||||||
BindType::Buffer,
|
BindType::Buffer,
|
||||||
|
@ -65,7 +66,6 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Err
|
||||||
BindType::BufReadOnly,
|
BindType::BufReadOnly,
|
||||||
BindType::BufReadOnly,
|
BindType::BufReadOnly,
|
||||||
BindType::BufReadOnly,
|
BindType::BufReadOnly,
|
||||||
BindType::BufReadOnly,
|
|
||||||
BindType::Buffer,
|
BindType::Buffer,
|
||||||
BindType::Buffer,
|
BindType::Buffer,
|
||||||
],
|
],
|
||||||
|
|
Loading…
Reference in a new issue