Checkpoint

Felt like checkpointing what I have before trying to run the pipeline. Theoretically everything should work.
This commit is contained in:
Raph Levien 2022-11-03 19:33:11 -07:00
parent 5851ef1417
commit 06fa3cb9ab
10 changed files with 265 additions and 65 deletions

View file

@ -3,6 +3,7 @@
"bbox": "${workspaceFolder}/piet-wgsl/shader/shared/bbox.wgsl",
"bump": "${workspaceFolder}/piet-wgsl/shader/shared/bump.wgsl",
"config": "${workspaceFolder}/piet-wgsl/shader/shared/config.wgsl",
"cubic": "${workspaceFolder}/piet-wgsl/shader/shared/cubic.wgsl",
"drawtag": "${workspaceFolder}/piet-wgsl/shader/shared/drawtag.wgsl",
"segment": "${workspaceFolder}/piet-wgsl/shader/shared/segment.wgsl",
"pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl",

View file

@ -40,6 +40,25 @@ var<storage, read_write> info: array<u32>;
let WG_SIZE = 256u;
// Possibly dedup?
struct Transform {
matrx: vec4<f32>,
translate: vec2<f32>,
}
fn read_transform(transform_base: u32, ix: u32) -> Transform {
let base = transform_base + ix * 6u;
let c0 = bitcast<f32>(scene[base]);
let c1 = bitcast<f32>(scene[base] + 1u);
let c2 = bitcast<f32>(scene[base] + 2u);
let c3 = bitcast<f32>(scene[base] + 3u);
let c4 = bitcast<f32>(scene[base] + 4u);
let c5 = bitcast<f32>(scene[base] + 5u);
let matrx = vec4<f32>(c0, c1, c2, c3);
let translate = vec2<f32>(c4, c5);
return Transform(matrx, translate);
}
var<workgroup> sh_scratch: array<DrawMonoid, WG_SIZE>;
@compute @workgroup_size(256)
@ -80,17 +99,20 @@ fn main(
tag_word == DRAWTAG_BEGIN_CLIP
{
let bbox = path_bbox[m.path_ix];
let x0 = f32(bbox.x0) - 32768.0;
let y0 = f32(bbox.y0) - 32768.0;
let x1 = f32(bbox.x1) - 32768.0;
let y1 = f32(bbox.y1) - 32768.0;
let bbox_f = vec4(x0, y0, x1, y1);
// TODO: bbox is mostly yagni here, sort that out. Maybe clips?
// let x0 = f32(bbox.x0);
// let y0 = f32(bbox.y0);
// let x1 = f32(bbox.x1);
// let y1 = f32(bbox.y1);
// let bbox_f = vec4(x0, y0, x1, y1);
let fill_mode = u32(bbox.linewidth >= 0.0);
var matrx: vec4<f32>;
var translate: vec2<f32>;
var linewidth = bbox.linewidth;
if linewidth >= 0.0 || tag_word == DRAWTAG_FILL_LIN_GRADIENT || tag_word == DRAWTAG_FILL_RAD_GRADIENT {
// TODO: retrieve transform from scene. Packed?
let transform = read_transform(config.transform_base, bbox.trans_ix);
matrx = transform.matrx;
translate = transform.translate;
}
if linewidth >= 0.0 {
// Note: doesn't deal with anisotropic case

View file

@ -157,12 +157,16 @@ fn main(
default: {}
}
}
let out_ix = global_id.y * (config.width_in_tiles * TILE_WIDTH) + global_id.x * PIXELS_PER_THREAD;
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
let bytes = pack4x8unorm(rgba[i]);
output[out_ix + i] = bytes;
}
#else
let area = fill_path(tile, xy);
#endif
let bytes = pack4x8unorm(vec4<f32>(area[0], area[1], area[2], area[3]));
let out_ix = global_id.y * (config.width_in_tiles * 4u) + global_id.x;
output[out_ix] = bytes;
#endif
}

View file

@ -20,6 +20,8 @@
#import pathtag
#import tile
#import segment
#import cubic
#import bump
@group(0) @binding(0)
var<storage> config: Config;
@ -27,14 +29,6 @@ var<storage> config: Config;
@group(0) @binding(1)
var<storage> scene: array<u32>;
// Maybe dedup?
struct Cubic {
p0: vec2<f32>,
p1: vec2<f32>,
p2: vec2<f32>,
p3: vec2<f32>,
}
@group(0) @binding(2)
var<storage> tag_monoids: array<TagMonoid>;
@ -51,9 +45,12 @@ struct AtomicTile {
}
@group(0) @binding(5)
var<storage, read_write> tiles: array<AtomicTile>;
var<storage, read_write> bump: BumpAllocators;
@group(0) @binding(6)
var<storage, read_write> tiles: array<AtomicTile>;
@group(0) @binding(7)
var<storage, read_write> segments: array<Segment>;
struct SubdivResult {
@ -110,9 +107,7 @@ fn eval_cubic(p0: vec2<f32>, p1: vec2<f32>, p2: vec2<f32>, p3: vec2<f32>, t: f32
}
fn alloc_segment() -> u32 {
// Use 0-index segment (address is sentinel) as counter
// TODO: separate small buffer binding for this?
return atomicAdd(&tiles[4096].segments, 1u) + 1u;
return atomicAdd(&bump.segments, 1u) + 1u;
}
let MAX_QUADS = 16u;
@ -126,12 +121,13 @@ fn main(
let shift = (ix & 3u) * 8u;
var tag_byte = (tag_word >> shift) & 0xffu;
// Reconstruct path_ix from monoid or store in cubic?
if (tag_byte & PATH_TAG_SEG_TYPE) != 0u {
let path_ix = 42u; // BIG GIANT TODO
let path = paths[path_ix];
let bbox = vec4<i32>(path.bbox);
// Discussion question: it might actually be cheaper to do the path segment
// decoding & transform again rather than store the result in a buffer;
// classic memory vs ALU tradeoff.
let cubic = cubics[global_id.x];
let path = paths[cubic.path_ix];
let bbox = vec4<i32>(path.bbox);
let p0 = cubic.p0;
let p1 = cubic.p1;
let p2 = cubic.p2;
@ -201,11 +197,13 @@ fn main(
var x1 = i32(floor(xymax.x * SX) + 1.0);
var y0 = i32(floor(xymin.y * SY));
var y1 = i32(floor(xymax.y * SY) + 1.0);
x0 = clamp(x0, 0, i32(config.width_in_tiles));
x1 = clamp(x1, 0, i32(config.width_in_tiles));
y0 = clamp(y0, 0, i32(config.height_in_tiles));
y1 = clamp(y1, 0, i32(config.height_in_tiles));
x0 = clamp(x0, bbox.x, bbox.z);
x1 = clamp(x1, bbox.x, bbox.z);
y0 = clamp(y0, bbox.y, bbox.w);
y1 = clamp(y1, bbox.y, bbox.w);
var xc = a + b * f32(y0);
let stride = bbox.z - bbox.x;
var base = i32(path.tiles) + (y0 - bbox.y) * stride - bbox.x;
var xray = i32(floor(lp0.x * SX));
var last_xray = i32(floor(lp1.x * SX));
if dp.y < 0.0 {
@ -218,7 +216,7 @@ fn main(
let xbackdrop = max(xray + 1, 0);
if xymin.y < tile_y0 && xbackdrop < i32(config.width_in_tiles) {
let backdrop = select(-1, 1, dp.y < 0.0);
let tile_ix = y * i32(config.width_in_tiles) + xbackdrop;
let tile_ix = base + xbackdrop;
atomicAdd(&tiles[tile_ix].backdrop, backdrop);
}
var next_xray = last_xray;
@ -236,7 +234,7 @@ fn main(
var tile_seg: Segment;
for (var x = xx0; x < xx1; x += 1) {
let tile_x0 = f32(x) * f32(TILE_WIDTH);
let tile_ix = y * i32(config.width_in_tiles) + x;
let tile_ix = base + x;
// allocate segment, insert linked list
let seg_ix = alloc_segment();
let old = atomicExchange(&tiles[tile_ix].segments, seg_ix);
@ -263,6 +261,7 @@ fn main(
segments[seg_ix] = tile_seg;
}
xc += b;
base += stride;
xray = next_xray;
}
n_out += 1u;

View file

@ -27,6 +27,7 @@
#import config
#import pathtag
#import cubic
@group(0) @binding(0)
var<storage> config: Config;
@ -49,12 +50,6 @@ struct AtomicPathBbox {
@group(0) @binding(3)
var<storage, read_write> path_bboxes: array<AtomicPathBbox>;
struct Cubic {
p0: vec2<f32>,
p1: vec2<f32>,
p2: vec2<f32>,
p3: vec2<f32>,
}
@group(0) @binding(4)
var<storage, read_write> cubics: array<Cubic>;
@ -90,7 +85,6 @@ var<storage, read_write> cubics: array<Cubic>;
// }
var<private> pathdata_base: u32;
var<private> transform_base: u32;
fn read_f32_point(ix: u32) -> vec2<f32> {
let x = bitcast<f32>(scene[pathdata_base + ix]);
@ -110,7 +104,7 @@ struct Transform {
translate: vec2<f32>,
}
fn read_transform(ix: u32) -> Transform {
fn read_transform(transform_base: u32, ix: u32) -> Transform {
let base = transform_base + ix * 6u;
let c0 = bitcast<f32>(scene[base]);
let c1 = bitcast<f32>(scene[base] + 1u);
@ -142,7 +136,6 @@ fn main(
) {
let ix = global_id.x;
let tag_word = scene[config.pathtag_base + (ix >> 2u)];
// TODO: set transform_base
pathdata_base = config.pathdata_base;
let shift = (ix & 3u) * 8u;
var tm = reduce_tag(tag_word & ((1u << shift) - 1u));
@ -180,7 +173,7 @@ fn main(
}
}
}
let transform = read_transform(tm.trans_ix);
let transform = read_transform(config.transform_base, tm.trans_ix);
p0 = transform_apply(transform, p0);
p1 = transform_apply(transform, p1);
var bbox = vec4<f32>(min(p0, p1), max(p0, p1));
@ -201,15 +194,14 @@ fn main(
p1 = mix(p1, p0, 1.0 / 3.0);
}
}
cubics[global_id.x] = Cubic(p0, p1, p2, p3);
cubics[global_id.x] = Cubic(p0, p1, p2, p3, tm.path_ix, 0u);
// Update bounding box using atomics only. Computing a monoid is a
// potential future optimization.
if bbox.z > bbox.x && bbox.w > bbox.y {
if bbox.z > bbox.x || bbox.w > bbox.y {
atomicMin(&(*out).x0, round_down(bbox.x));
atomicMin(&(*out).y0, round_down(bbox.y));
atomicMax(&(*out).x1, round_up(bbox.z));
atomicMax(&(*out).y1, round_up(bbox.w));
}
}
}

View file

@ -19,4 +19,5 @@ struct BumpAllocators {
binning: atomic<u32>,
ptcl: atomic<u32>,
tile: atomic<u32>,
segments: atomic<u32>,
}

View file

@ -28,6 +28,8 @@ struct Config {
drawtag_base: u32,
drawdata_base: u32,
transform_base: u32,
}
// Geometry of tiles and bins

View file

@ -22,7 +22,7 @@ use engine::Engine;
use render::render;
use test_scene::dump_scene_info;
use wgpu::{Device, Queue, Limits};
use wgpu::{Device, Limits, Queue};
mod engine;
mod render;

View file

@ -5,10 +5,18 @@ use piet_scene::Scene;
use crate::{
engine::{BufProxy, Recording},
shaders::{self, Shaders},
shaders::{self, FullShaders, Shaders},
};
const TAG_MONOID_SIZE: u64 = 12;
const TAG_MONOID_FULL_SIZE: u64 = 20;
const PATH_BBOX_SIZE: u64 = 24;
const CUBIC_SIZE: u64 = 40;
const DRAWMONOID_SIZE: u64 = 16;
const MAX_DRAWINFO_SIZE: u64 = 44;
const PATH_SIZE: u64 = 8;
const DRAW_BBOX_SIZE: u64 = 16;
const BUMP_SIZE: u64 = 16;
#[repr(C)]
#[derive(Clone, Copy, Default, Zeroable, Pod)]
@ -21,6 +29,7 @@ struct Config {
pathdata_base: u32,
drawtag_base: u32,
drawdata_base: u32,
transform_base: u32,
}
#[repr(C)]
@ -109,6 +118,168 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
(recording, out_buf)
}
pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy) {
let mut recording = Recording::default();
let data = scene.data();
let n_pathtag = data.tag_stream.len();
let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG);
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
let mut scene: Vec<u8> = Vec::with_capacity(pathtag_padded);
let pathtag_base = size_to_words(scene.len());
scene.extend(&data.tag_stream);
scene.resize(pathtag_padded, 0);
let pathdata_base = size_to_words(scene.len());
scene.extend(&data.pathseg_stream);
let drawtag_base = size_to_words(scene.len());
scene.extend(bytemuck::cast_slice(&data.drawtag_stream));
let drawdata_base = size_to_words(scene.len());
scene.extend(&data.drawdata_stream);
let transform_base = size_to_words(scene.len());
scene.extend(bytemuck::cast_slice(&data.transform_stream));
let n_path = data.n_path;
// TODO: calculate for real when we do rectangles
let n_drawobj = n_path;
let config = Config {
width_in_tiles: 64,
height_in_tiles: 64,
n_drawobj,
n_path,
pathtag_base,
pathdata_base,
drawtag_base,
drawdata_base,
transform_base,
};
let scene_buf = recording.upload(scene);
let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned());
let reduced_buf = BufProxy::new(pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE);
// TODO: really only need pathtag_wgs - 1
recording.dispatch(
shaders.pathtag_reduce,
(pathtag_wgs as u32, 1, 1),
[config_buf, scene_buf, reduced_buf],
);
let tagmonoid_buf =
BufProxy::new(pathtag_wgs as u64 * shaders::PATHTAG_REDUCE_WG as u64 * TAG_MONOID_SIZE);
recording.dispatch(
shaders.pathtag_scan,
(pathtag_wgs as u32, 1, 1),
[config_buf, scene_buf, reduced_buf, tagmonoid_buf],
);
let drawobj_wgs = (n_drawobj + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG;
let path_bbox_buf = BufProxy::new(n_path as u64 * PATH_BBOX_SIZE);
recording.dispatch(
shaders.bbox_clear,
(drawobj_wgs, 1, 1),
[config_buf, path_bbox_buf],
);
let cubic_buf = BufProxy::new(n_path as u64 * CUBIC_SIZE);
let path_coarse_wgs = (data.n_pathseg + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
recording.dispatch(
shaders.pathseg,
(path_coarse_wgs, 1, 1),
[
config_buf,
scene_buf,
tagmonoid_buf,
path_bbox_buf,
cubic_buf,
],
);
let draw_reduced_buf = BufProxy::new(drawobj_wgs as u64 * DRAWMONOID_SIZE);
recording.dispatch(
shaders.draw_reduce,
(drawobj_wgs, 1, 1),
[config_buf, scene_buf, draw_reduced_buf],
);
let draw_monoid_buf = BufProxy::new(n_drawobj as u64 * DRAWMONOID_SIZE);
let info_buf = BufProxy::new(n_drawobj as u64 * MAX_DRAWINFO_SIZE);
recording.dispatch(
shaders.draw_leaf,
(drawobj_wgs, 1, 1),
[
config_buf,
scene_buf,
draw_reduced_buf,
path_bbox_buf,
draw_monoid_buf,
info_buf,
],
);
let draw_bbox_buf = BufProxy::new(n_path as u64 * DRAW_BBOX_SIZE);
let bump_buf = BufProxy::new(BUMP_SIZE);
// Not actually used yet.
let clip_bbox_buf = BufProxy::new(1024);
let bin_data_buf = BufProxy::new(1 << 16);
recording.clear_all(bump_buf);
recording.dispatch(
shaders.binning,
(drawobj_wgs, 1, 1),
[
config_buf,
draw_monoid_buf,
path_bbox_buf,
clip_bbox_buf,
draw_bbox_buf,
bump_buf,
bin_data_buf,
],
);
let path_buf = BufProxy::new(n_path as u64 * PATH_SIZE);
let tile_buf = BufProxy::new(1 << 20);
let path_wgs = (n_path + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG;
recording.dispatch(
shaders.tile_alloc,
(path_wgs, 1, 1),
[
config_buf,
scene_buf,
draw_bbox_buf,
bump_buf,
path_buf,
tile_buf,
],
);
//let cubics_buf = BufProxy::new(data.n_pathseg as u64 * 32);
// TODO: more principled size calc
let tiles_buf = BufProxy::new(4097 * 8);
let segments_buf = BufProxy::new(256 * 24);
recording.clear_all(tiles_buf);
recording.dispatch(
shaders.path_coarse,
(path_coarse_wgs, 1, 1),
[
config_buf,
scene_buf,
tagmonoid_buf,
cubic_buf,
path_buf,
bump_buf,
tiles_buf,
segments_buf,
],
);
recording.dispatch(
shaders.backdrop,
(path_wgs, 1, 1),
[config_buf, path_buf, tiles_buf],
);
let out_buf_size = config.width_in_tiles * config.height_in_tiles * 256;
let out_buf = BufProxy::new(out_buf_size as u64);
recording.dispatch(
shaders.fine,
(config.width_in_tiles, config.height_in_tiles, 1),
[config_buf, tiles_buf, segments_buf, out_buf],
);
recording.download(out_buf);
(recording, out_buf)
}
pub fn align_up(len: usize, alignment: u32) -> usize {
len + (len.wrapping_neg() & alignment as usize - 1)
}

View file

@ -25,7 +25,9 @@ use wgpu::Device;
use crate::engine::{BindType, Engine, Error, ShaderId};
pub const PATHTAG_REDUCE_WG: u32 = 256;
pub const PATH_BBOX_WG: u32 = 256;
pub const PATH_COARSE_WG: u32 = 256;
pub const PATH_DRAWOBJ_WG: u32 = 256;
pub struct Shaders {
pub pathtag_reduce: ShaderId,
@ -43,8 +45,8 @@ pub struct FullShaders {
pub pathseg: ShaderId,
pub draw_reduce: ShaderId,
pub draw_leaf: ShaderId,
pub tile_alloc: ShaderId,
pub binning: ShaderId,
pub tile_alloc: ShaderId,
pub path_coarse: ShaderId,
pub backdrop: ShaderId,
pub coarse: ShaderId,
@ -60,7 +62,11 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Err
let pathtag_reduce = engine.add_shader(
device,
preprocess::preprocess(&read_shader("pathtag_reduce"), &empty, &imports).into(),
&[BindType::BufReadOnly, BindType::BufReadOnly, BindType::Buffer],
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
],
)?;
let pathtag_scan = engine.add_shader(
device,
@ -121,7 +127,11 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
let pathtag_reduce = engine.add_shader(
device,
preprocess::preprocess(&read_shader("pathtag_reduce"), &full_config, &imports).into(),
&[BindType::BufReadOnly, BindType::BufReadOnly, BindType::Buffer],
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
],
)?;
let pathtag_scan = engine.add_shader(
device,
@ -136,10 +146,7 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
let bbox_clear = engine.add_shader(
device,
preprocess::preprocess(&read_shader("bbox_clear"), &empty, &imports).into(),
&[
BindType::BufReadOnly,
BindType::Buffer,
],
&[BindType::BufReadOnly, BindType::Buffer],
)?;
let pathseg = engine.add_shader(
device,
@ -173,18 +180,6 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
BindType::Buffer,
],
)?;
let tile_alloc = engine.add_shader(
device,
preprocess::preprocess(&read_shader("tile_alloc"), &empty, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
BindType::Buffer,
BindType::Buffer,
],
)?;
let binning = engine.add_shader(
device,
preprocess::preprocess(&read_shader("binning"), &empty, &imports).into(),
@ -199,6 +194,18 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
BindType::Buffer,
],
)?;
let tile_alloc = engine.add_shader(
device,
preprocess::preprocess(&read_shader("tile_alloc"), &empty, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
BindType::Buffer,
BindType::Buffer,
],
)?;
let path_coarse = engine.add_shader(
device,
@ -211,6 +218,7 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
BindType::BufReadOnly,
BindType::Buffer,
BindType::Buffer,
BindType::Buffer,
],
)?;
let backdrop = engine.add_shader(
@ -251,8 +259,8 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
pathseg,
draw_reduce,
draw_leaf,
tile_alloc,
binning,
tile_alloc,
path_coarse,
backdrop,
coarse,