mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-09 20:31:29 +11:00
Unify scene buffer
All streams of the scene are combined into a single buffer. This is very much like existing piet-gpu, however the various outputs from the compute stages (whether computed on CPU or GPU) will retain their separate bindings, which is more native to WGSL. There's a touch of ergonomics loss, in particular when we do transforms we'll need to unmarshal them by hand, but I think overall not too bad.
This commit is contained in:
parent
5c6ec1efa3
commit
7ac327c684
3
.vscode/settings.json
vendored
3
.vscode/settings.json
vendored
|
@ -8,5 +8,6 @@
|
|||
"pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl",
|
||||
"ptcl": "${workspaceFolder}/piet-wgsl/shader/shared/ptcl.wgsl"
|
||||
},
|
||||
"wgsl-analyzer.diagnostics.nagaVersion": "main"
|
||||
"wgsl-analyzer.diagnostics.nagaVersion": "main",
|
||||
"wgsl-analyzer.preprocessor.shaderDefs": ["full"]
|
||||
}
|
||||
|
|
|
@ -44,6 +44,17 @@ var<storage, read_write> output: array<u32>;
|
|||
|
||||
@group(0) @binding(4)
|
||||
var<storage> ptcl: array<u32>;
|
||||
|
||||
fn read_fill(cmd_ix: u32) -> CmdFill {
|
||||
let tile = ptcl[cmd_ix + 1u];
|
||||
let backdrop = i32(ptcl[cmd_ix + 2u]);
|
||||
return CmdFill(tile, backdrop);
|
||||
}
|
||||
|
||||
fn read_color(cmd_ix: u32) -> CmdColor {
|
||||
let rgba_color = ptcl[cmd_ix + 1u];
|
||||
return CmdColor(rgba_color);
|
||||
}
|
||||
#endif
|
||||
|
||||
let PIXELS_PER_THREAD = 4u;
|
||||
|
@ -103,7 +114,53 @@ fn main(
|
|||
let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x;
|
||||
let xy = vec2<f32>(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y));
|
||||
let tile = tiles[tile_ix];
|
||||
#ifdef full
|
||||
var rgba: array<vec4<f32>, PIXELS_PER_THREAD>;
|
||||
var area: array<f32, PIXELS_PER_THREAD>;
|
||||
var cmd_ix = tile_ix * PTCL_INITIAL_ALLOC;
|
||||
|
||||
// main interpretation loop
|
||||
while true {
|
||||
let tag = ptcl[cmd_ix];
|
||||
if tag == CMD_END {
|
||||
break;
|
||||
}
|
||||
switch tag {
|
||||
// CMD_FILL
|
||||
case 1u: {
|
||||
let fill = read_fill(cmd_ix);
|
||||
let tile = Tile(fill.backdrop, fill.tile);
|
||||
area = fill_path(tile, xy);
|
||||
cmd_ix += 3u;
|
||||
}
|
||||
// CMD_SOLID
|
||||
case 3u: {
|
||||
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||
area[i] = 1.0;
|
||||
}
|
||||
cmd_ix += 1u;
|
||||
}
|
||||
// CMD_COLOR
|
||||
case 5u: {
|
||||
let color = read_color(cmd_ix);
|
||||
let fg = unpack4x8unorm(color.rgba_color);
|
||||
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||
let fg_i = fg * area[i];
|
||||
rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i;
|
||||
}
|
||||
cmd_ix += 1u;
|
||||
}
|
||||
// CMD_JUMP
|
||||
case 11u: {
|
||||
cmd_ix = ptcl[cmd_ix + 1u];
|
||||
}
|
||||
default: {}
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
let area = fill_path(tile, xy);
|
||||
#endif
|
||||
|
||||
let bytes = pack4x8unorm(vec4<f32>(area[0], area[1], area[2], area[3]));
|
||||
let out_ix = global_id.y * (config.width_in_tiles * 4u) + global_id.x;
|
||||
|
|
|
@ -14,24 +14,22 @@
|
|||
//
|
||||
// Also licensed under MIT license, at your choice.
|
||||
|
||||
#import config
|
||||
#import pathtag
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> path_tags: array<u32>;
|
||||
var<storage> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> tag_monoids: array<TagMonoid>;
|
||||
var<storage> scene: array<u32>;
|
||||
|
||||
// TODO: should probably have single "scene" binding.
|
||||
@group(0) @binding(2)
|
||||
var<storage> path_data: array<u32>;
|
||||
var<storage> tag_monoids: array<TagMonoid>;
|
||||
|
||||
#ifdef cubics_out
|
||||
@group(0) @binding(3)
|
||||
var<storage, read_write> output: array<vec2<f32>>;
|
||||
#else
|
||||
#import config
|
||||
|
||||
struct Tile {
|
||||
backdrop: atomic<i32>,
|
||||
segments: atomic<u32>,
|
||||
|
@ -39,25 +37,23 @@ struct Tile {
|
|||
|
||||
#import segment
|
||||
|
||||
// Should probably be uniform binding
|
||||
@group(0) @binding(3)
|
||||
var<storage> config: Config;
|
||||
|
||||
@group(0) @binding(4)
|
||||
var<storage, read_write> tiles: array<Tile>;
|
||||
|
||||
@group(0) @binding(5)
|
||||
@group(0) @binding(4)
|
||||
var<storage, read_write> segments: array<Segment>;
|
||||
#endif
|
||||
|
||||
var<private> pathdata_base: u32;
|
||||
|
||||
fn read_f32_point(ix: u32) -> vec2<f32> {
|
||||
let x = bitcast<f32>(path_data[ix]);
|
||||
let y = bitcast<f32>(path_data[ix + 1u]);
|
||||
let x = bitcast<f32>(scene[pathdata_base + ix]);
|
||||
let y = bitcast<f32>(scene[pathdata_base + ix + 1u]);
|
||||
return vec2<f32>(x, y);
|
||||
}
|
||||
|
||||
fn read_i16_point(ix: u32) -> vec2<f32> {
|
||||
let raw = path_data[ix];
|
||||
let raw = scene[pathdata_base + ix];
|
||||
let x = f32(i32(raw << 16u) >> 16u);
|
||||
let y = f32(i32(raw) >> 16u);
|
||||
return vec2<f32>(x, y);
|
||||
|
@ -133,7 +129,8 @@ fn main(
|
|||
) {
|
||||
// Obtain exclusive prefix sum of tag monoid
|
||||
let ix = global_id.x;
|
||||
let tag_word = path_tags[ix >> 2u];
|
||||
let tag_word = scene[config.pathtag_base + (ix >> 2u)];
|
||||
pathdata_base = config.pathdata_base;
|
||||
let shift = (ix & 3u) * 8u;
|
||||
var tm = reduce_tag(tag_word & ((1u << shift) - 1u));
|
||||
tm = combine_tag_monoid(tag_monoids[ix >> 2u], tm);
|
||||
|
|
|
@ -14,14 +14,16 @@
|
|||
//
|
||||
// Also licensed under MIT license, at your choice.
|
||||
|
||||
#import config
|
||||
#import pathtag
|
||||
|
||||
// Note: should have a single scene binding, path_tags are a slice
|
||||
// in that; need a config uniform.
|
||||
@group(0) @binding(0)
|
||||
var<storage> path_tags: array<u32>;
|
||||
var<storage> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> scene: array<u32>;
|
||||
|
||||
@group(0) @binding(2)
|
||||
var<storage, read_write> reduced: array<TagMonoid>;
|
||||
|
||||
let LG_WG_SIZE = 8u;
|
||||
|
@ -35,7 +37,7 @@ fn main(
|
|||
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||
) {
|
||||
let ix = global_id.x;
|
||||
let tag_word = path_tags[ix];
|
||||
let tag_word = scene[config.pathtag_base + ix];
|
||||
var agg = reduce_tag(tag_word);
|
||||
sh_scratch[local_id.x] = agg;
|
||||
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||
|
|
|
@ -14,15 +14,19 @@
|
|||
//
|
||||
// Also licensed under MIT license, at your choice.
|
||||
|
||||
#import config
|
||||
#import pathtag
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> path_tags: array<u32>;
|
||||
var<storage> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> reduced: array<TagMonoid>;
|
||||
var<storage> scene: array<u32>;
|
||||
|
||||
@group(0) @binding(2)
|
||||
var<storage> reduced: array<TagMonoid>;
|
||||
|
||||
@group(0) @binding(3)
|
||||
var<storage, read_write> tag_monoids: array<TagMonoid>;
|
||||
|
||||
let LG_WG_SIZE = 8u;
|
||||
|
@ -54,7 +58,7 @@ fn main(
|
|||
}
|
||||
|
||||
let ix = global_id.x;
|
||||
let tag_word = path_tags[ix];
|
||||
let tag_word = scene[config.pathtag_base + ix];
|
||||
agg = reduce_tag(tag_word);
|
||||
sh_monoid[local_id.x] = agg;
|
||||
for (var i = 0u; i < LG_WG_SIZE; i += 1u) {
|
||||
|
|
|
@ -22,6 +22,9 @@ struct Config {
|
|||
|
||||
// offsets within scene buffer (in u32 units)
|
||||
// Note: this is a difference from piet-gpu, which is in bytes
|
||||
pathtag_base: u32,
|
||||
pathdata_base: u32,
|
||||
|
||||
drawtag_base: u32,
|
||||
drawdata_base: u32,
|
||||
}
|
||||
|
|
|
@ -16,6 +16,8 @@ struct Config {
|
|||
width_in_tiles: u32,
|
||||
height_in_tiles: u32,
|
||||
n_drawobj: u32,
|
||||
pathtag_base: u32,
|
||||
pathdata_base: u32,
|
||||
drawtag_base: u32,
|
||||
drawdata_base: u32,
|
||||
}
|
||||
|
@ -29,22 +31,39 @@ pub struct PathSegment {
|
|||
next: u32,
|
||||
}
|
||||
|
||||
fn size_to_words(byte_size: usize) -> u32 {
|
||||
(byte_size / std::mem::size_of::<u32>()) as u32
|
||||
}
|
||||
|
||||
pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
|
||||
let mut recording = Recording::default();
|
||||
let data = scene.data();
|
||||
let n_pathtag = data.tag_stream.len();
|
||||
let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG);
|
||||
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
|
||||
let mut tag_data: Vec<u8> = Vec::with_capacity(pathtag_padded);
|
||||
tag_data.extend(&data.tag_stream);
|
||||
tag_data.resize(pathtag_padded, 0);
|
||||
let pathtag_buf = recording.upload(tag_data);
|
||||
let mut scene: Vec<u8> = Vec::with_capacity(pathtag_padded);
|
||||
let pathtag_base = size_to_words(scene.len());
|
||||
scene.extend(&data.tag_stream);
|
||||
scene.resize(pathtag_padded, 0);
|
||||
let pathdata_base = size_to_words(scene.len());
|
||||
scene.extend(&data.pathseg_stream);
|
||||
|
||||
let config = Config {
|
||||
width_in_tiles: 64,
|
||||
height_in_tiles: 64,
|
||||
pathtag_base,
|
||||
pathdata_base,
|
||||
..Default::default()
|
||||
};
|
||||
let scene_buf = recording.upload(scene);
|
||||
let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned());
|
||||
|
||||
let reduced_buf = BufProxy::new(pathtag_wgs as u64 * TAG_MONOID_SIZE);
|
||||
// TODO: really only need pathtag_wgs - 1
|
||||
recording.dispatch(
|
||||
shaders.pathtag_reduce,
|
||||
(pathtag_wgs as u32, 1, 1),
|
||||
[pathtag_buf, reduced_buf],
|
||||
[config_buf, scene_buf, reduced_buf],
|
||||
);
|
||||
|
||||
let tagmonoid_buf =
|
||||
|
@ -52,20 +71,11 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
|
|||
recording.dispatch(
|
||||
shaders.pathtag_scan,
|
||||
(pathtag_wgs as u32, 1, 1),
|
||||
[pathtag_buf, reduced_buf, tagmonoid_buf],
|
||||
[config_buf, scene_buf, reduced_buf, tagmonoid_buf],
|
||||
);
|
||||
|
||||
let path_coarse_wgs = (data.n_pathseg + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
|
||||
// The clone here is kinda BS, think about reducing copies
|
||||
// Of course, we'll probably end up concatenating into a single scene binding.
|
||||
let pathdata_buf = recording.upload(data.pathseg_stream.clone());
|
||||
//let cubics_buf = BufProxy::new(data.n_pathseg as u64 * 32);
|
||||
let config = Config {
|
||||
width_in_tiles: 64,
|
||||
height_in_tiles: 64,
|
||||
..Default::default()
|
||||
};
|
||||
let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned());
|
||||
// TODO: more principled size calc
|
||||
let tiles_buf = BufProxy::new(4097 * 8);
|
||||
let segments_buf = BufProxy::new(256 * 24);
|
||||
|
@ -74,10 +84,9 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
|
|||
shaders.path_coarse,
|
||||
(path_coarse_wgs, 1, 1),
|
||||
[
|
||||
pathtag_buf,
|
||||
tagmonoid_buf,
|
||||
pathdata_buf,
|
||||
config_buf,
|
||||
scene_buf,
|
||||
tagmonoid_buf,
|
||||
tiles_buf,
|
||||
segments_buf,
|
||||
],
|
||||
|
|
|
@ -44,12 +44,13 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Err
|
|||
let pathtag_reduce = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(&read_shader("pathtag_reduce"), &empty, &imports).into(),
|
||||
&[BindType::BufReadOnly, BindType::Buffer],
|
||||
&[BindType::BufReadOnly, BindType::BufReadOnly, BindType::Buffer],
|
||||
)?;
|
||||
let pathtag_scan = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(&read_shader("pathtag_scan"), &empty, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Buffer,
|
||||
|
@ -65,7 +66,6 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Err
|
|||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Buffer,
|
||||
BindType::Buffer,
|
||||
],
|
||||
|
|
Loading…
Reference in a new issue