Unify scene buffer

All streams of the scene are combined into a single buffer. This is very much like existing piet-gpu, however the various outputs from the compute stages (whether computed on CPU or GPU) will retain their separate bindings, which is more native to WGSL.

There's a touch of ergonomics loss, in particular when we do transforms we'll need to unmarshal them by hand, but I think overall not too bad.
This commit is contained in:
Raph Levien 2022-11-02 18:07:32 -07:00
parent 5c6ec1efa3
commit 7ac327c684
8 changed files with 116 additions and 43 deletions

View file

@ -8,5 +8,6 @@
"pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl",
"ptcl": "${workspaceFolder}/piet-wgsl/shader/shared/ptcl.wgsl"
},
"wgsl-analyzer.diagnostics.nagaVersion": "main"
"wgsl-analyzer.diagnostics.nagaVersion": "main",
"wgsl-analyzer.preprocessor.shaderDefs": ["full"]
}

View file

@ -44,6 +44,17 @@ var<storage, read_write> output: array<u32>;
@group(0) @binding(4)
var<storage> ptcl: array<u32>;
fn read_fill(cmd_ix: u32) -> CmdFill {
let tile = ptcl[cmd_ix + 1u];
let backdrop = i32(ptcl[cmd_ix + 2u]);
return CmdFill(tile, backdrop);
}
fn read_color(cmd_ix: u32) -> CmdColor {
let rgba_color = ptcl[cmd_ix + 1u];
return CmdColor(rgba_color);
}
#endif
let PIXELS_PER_THREAD = 4u;
@ -103,7 +114,53 @@ fn main(
let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x;
let xy = vec2<f32>(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y));
let tile = tiles[tile_ix];
#ifdef full
var rgba: array<vec4<f32>, PIXELS_PER_THREAD>;
var area: array<f32, PIXELS_PER_THREAD>;
var cmd_ix = tile_ix * PTCL_INITIAL_ALLOC;
// main interpretation loop
while true {
let tag = ptcl[cmd_ix];
if tag == CMD_END {
break;
}
switch tag {
// CMD_FILL
case 1u: {
let fill = read_fill(cmd_ix);
let tile = Tile(fill.backdrop, fill.tile);
area = fill_path(tile, xy);
cmd_ix += 3u;
}
// CMD_SOLID
case 3u: {
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
area[i] = 1.0;
}
cmd_ix += 1u;
}
// CMD_COLOR
case 5u: {
let color = read_color(cmd_ix);
let fg = unpack4x8unorm(color.rgba_color);
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
let fg_i = fg * area[i];
rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i;
}
cmd_ix += 1u;
}
// CMD_JUMP
case 11u: {
cmd_ix = ptcl[cmd_ix + 1u];
}
default: {}
}
}
#else
let area = fill_path(tile, xy);
#endif
let bytes = pack4x8unorm(vec4<f32>(area[0], area[1], area[2], area[3]));
let out_ix = global_id.y * (config.width_in_tiles * 4u) + global_id.x;

View file

@ -14,24 +14,22 @@
//
// Also licensed under MIT license, at your choice.
#import config
#import pathtag
@group(0) @binding(0)
var<storage> path_tags: array<u32>;
var<storage> config: Config;
@group(0) @binding(1)
var<storage> tag_monoids: array<TagMonoid>;
var<storage> scene: array<u32>;
// TODO: should probably have single "scene" binding.
@group(0) @binding(2)
var<storage> path_data: array<u32>;
var<storage> tag_monoids: array<TagMonoid>;
#ifdef cubics_out
@group(0) @binding(3)
var<storage, read_write> output: array<vec2<f32>>;
#else
#import config
struct Tile {
backdrop: atomic<i32>,
segments: atomic<u32>,
@ -39,25 +37,23 @@ struct Tile {
#import segment
// Should probably be uniform binding
@group(0) @binding(3)
var<storage> config: Config;
@group(0) @binding(4)
var<storage, read_write> tiles: array<Tile>;
@group(0) @binding(5)
@group(0) @binding(4)
var<storage, read_write> segments: array<Segment>;
#endif
var<private> pathdata_base: u32;
fn read_f32_point(ix: u32) -> vec2<f32> {
let x = bitcast<f32>(path_data[ix]);
let y = bitcast<f32>(path_data[ix + 1u]);
let x = bitcast<f32>(scene[pathdata_base + ix]);
let y = bitcast<f32>(scene[pathdata_base + ix + 1u]);
return vec2<f32>(x, y);
}
fn read_i16_point(ix: u32) -> vec2<f32> {
let raw = path_data[ix];
let raw = scene[pathdata_base + ix];
let x = f32(i32(raw << 16u) >> 16u);
let y = f32(i32(raw) >> 16u);
return vec2<f32>(x, y);
@ -133,7 +129,8 @@ fn main(
) {
// Obtain exclusive prefix sum of tag monoid
let ix = global_id.x;
let tag_word = path_tags[ix >> 2u];
let tag_word = scene[config.pathtag_base + (ix >> 2u)];
pathdata_base = config.pathdata_base;
let shift = (ix & 3u) * 8u;
var tm = reduce_tag(tag_word & ((1u << shift) - 1u));
tm = combine_tag_monoid(tag_monoids[ix >> 2u], tm);

View file

@ -14,14 +14,16 @@
//
// Also licensed under MIT license, at your choice.
#import config
#import pathtag
// Note: should have a single scene binding, path_tags are a slice
// in that; need a config uniform.
@group(0) @binding(0)
var<storage> path_tags: array<u32>;
var<storage> config: Config;
@group(0) @binding(1)
var<storage> scene: array<u32>;
@group(0) @binding(2)
var<storage, read_write> reduced: array<TagMonoid>;
let LG_WG_SIZE = 8u;
@ -35,7 +37,7 @@ fn main(
@builtin(local_invocation_id) local_id: vec3<u32>,
) {
let ix = global_id.x;
let tag_word = path_tags[ix];
let tag_word = scene[config.pathtag_base + ix];
var agg = reduce_tag(tag_word);
sh_scratch[local_id.x] = agg;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {

View file

@ -14,15 +14,19 @@
//
// Also licensed under MIT license, at your choice.
#import config
#import pathtag
@group(0) @binding(0)
var<storage> path_tags: array<u32>;
var<storage> config: Config;
@group(0) @binding(1)
var<storage> reduced: array<TagMonoid>;
var<storage> scene: array<u32>;
@group(0) @binding(2)
var<storage> reduced: array<TagMonoid>;
@group(0) @binding(3)
var<storage, read_write> tag_monoids: array<TagMonoid>;
let LG_WG_SIZE = 8u;
@ -54,7 +58,7 @@ fn main(
}
let ix = global_id.x;
let tag_word = path_tags[ix];
let tag_word = scene[config.pathtag_base + ix];
agg = reduce_tag(tag_word);
sh_monoid[local_id.x] = agg;
for (var i = 0u; i < LG_WG_SIZE; i += 1u) {

View file

@ -22,6 +22,9 @@ struct Config {
// offsets within scene buffer (in u32 units)
// Note: this is a difference from piet-gpu, which is in bytes
pathtag_base: u32,
pathdata_base: u32,
drawtag_base: u32,
drawdata_base: u32,
}

View file

@ -16,6 +16,8 @@ struct Config {
width_in_tiles: u32,
height_in_tiles: u32,
n_drawobj: u32,
pathtag_base: u32,
pathdata_base: u32,
drawtag_base: u32,
drawdata_base: u32,
}
@ -29,22 +31,39 @@ pub struct PathSegment {
next: u32,
}
fn size_to_words(byte_size: usize) -> u32 {
(byte_size / std::mem::size_of::<u32>()) as u32
}
pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
let mut recording = Recording::default();
let data = scene.data();
let n_pathtag = data.tag_stream.len();
let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG);
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
let mut tag_data: Vec<u8> = Vec::with_capacity(pathtag_padded);
tag_data.extend(&data.tag_stream);
tag_data.resize(pathtag_padded, 0);
let pathtag_buf = recording.upload(tag_data);
let mut scene: Vec<u8> = Vec::with_capacity(pathtag_padded);
let pathtag_base = size_to_words(scene.len());
scene.extend(&data.tag_stream);
scene.resize(pathtag_padded, 0);
let pathdata_base = size_to_words(scene.len());
scene.extend(&data.pathseg_stream);
let config = Config {
width_in_tiles: 64,
height_in_tiles: 64,
pathtag_base,
pathdata_base,
..Default::default()
};
let scene_buf = recording.upload(scene);
let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned());
let reduced_buf = BufProxy::new(pathtag_wgs as u64 * TAG_MONOID_SIZE);
// TODO: really only need pathtag_wgs - 1
recording.dispatch(
shaders.pathtag_reduce,
(pathtag_wgs as u32, 1, 1),
[pathtag_buf, reduced_buf],
[config_buf, scene_buf, reduced_buf],
);
let tagmonoid_buf =
@ -52,20 +71,11 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
recording.dispatch(
shaders.pathtag_scan,
(pathtag_wgs as u32, 1, 1),
[pathtag_buf, reduced_buf, tagmonoid_buf],
[config_buf, scene_buf, reduced_buf, tagmonoid_buf],
);
let path_coarse_wgs = (data.n_pathseg + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
// The clone here is kinda BS, think about reducing copies
// Of course, we'll probably end up concatenating into a single scene binding.
let pathdata_buf = recording.upload(data.pathseg_stream.clone());
//let cubics_buf = BufProxy::new(data.n_pathseg as u64 * 32);
let config = Config {
width_in_tiles: 64,
height_in_tiles: 64,
..Default::default()
};
let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned());
// TODO: more principled size calc
let tiles_buf = BufProxy::new(4097 * 8);
let segments_buf = BufProxy::new(256 * 24);
@ -74,10 +84,9 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
shaders.path_coarse,
(path_coarse_wgs, 1, 1),
[
pathtag_buf,
tagmonoid_buf,
pathdata_buf,
config_buf,
scene_buf,
tagmonoid_buf,
tiles_buf,
segments_buf,
],

View file

@ -44,12 +44,13 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Err
let pathtag_reduce = engine.add_shader(
device,
preprocess::preprocess(&read_shader("pathtag_reduce"), &empty, &imports).into(),
&[BindType::BufReadOnly, BindType::Buffer],
&[BindType::BufReadOnly, BindType::BufReadOnly, BindType::Buffer],
)?;
let pathtag_scan = engine.add_shader(
device,
preprocess::preprocess(&read_shader("pathtag_scan"), &empty, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
@ -65,7 +66,6 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Err
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
BindType::Buffer,
],