From c3d81e0985a9593951e7ae16e4dcaa9c8b1d6a3f Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Thu, 3 Nov 2022 22:00:52 -0700 Subject: [PATCH] Mostly working path rendering It draws multiple paths and applies affine transformations. One problem: RGBA writing is byte-reversed and premultiplied. --- piet-wgsl/shader/backdrop_dyn.wgsl | 21 +++------- piet-wgsl/shader/binning.wgsl | 2 - piet-wgsl/shader/coarse.wgsl | 13 +++--- piet-wgsl/shader/draw_leaf.wgsl | 14 +++---- piet-wgsl/shader/fine.wgsl | 4 +- piet-wgsl/shader/path_coarse_full.wgsl | 2 +- piet-wgsl/shader/pathseg.wgsl | 11 +++--- piet-wgsl/shader/shared/cubic.wgsl | 25 ++++++++++++ piet-wgsl/shader/shared/pathtag.wgsl | 1 + piet-wgsl/shader/tile_alloc.wgsl | 10 +++-- piet-wgsl/src/engine.rs | 1 + piet-wgsl/src/main.rs | 33 ++++++++++++---- piet-wgsl/src/render.rs | 55 ++++++++++++++++++-------- piet-wgsl/src/shaders.rs | 8 +++- piet-wgsl/src/test_scene.rs | 12 ++++++ 15 files changed, 143 insertions(+), 69 deletions(-) create mode 100644 piet-wgsl/shader/shared/cubic.wgsl diff --git a/piet-wgsl/shader/backdrop_dyn.wgsl b/piet-wgsl/shader/backdrop_dyn.wgsl index c6c7c8d..ab094e6 100644 --- a/piet-wgsl/shader/backdrop_dyn.wgsl +++ b/piet-wgsl/shader/backdrop_dyn.wgsl @@ -17,20 +17,7 @@ // Prefix sum for dynamically allocated backdrops #import config - -// TODO: dedup & put this in the right place -struct Path { - // bounding box in pixels - bbox: vec4, - // offset (in u32's) to tile rectangle - tiles: u32, -} - -// TODO: -> shared -struct Tile { - backdrop: i32, - segments: u32, -} +#import tile @group(0) @binding(0) var config: Config; @@ -45,6 +32,7 @@ let WG_SIZE = 256u; var sh_row_width: array; var sh_row_count: array; +var sh_offset: array; @compute @workgroup_size(256) fn main( @@ -58,8 +46,9 @@ fn main( let path = paths[drawobj_ix]; sh_row_width[local_id.x] = path.bbox.z - path.bbox.x; row_count = path.bbox.w - path.bbox.y; - sh_row_count[local_id.x] = row_count; + sh_offset[local_id.x] = path.tiles; } + sh_row_count[local_id.x] = row_count; // Prefix sum of row counts for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { @@ -83,7 +72,7 @@ fn main( let width = sh_row_width[el_ix]; if width > 0u { var seq_ix = row - select(0u, sh_row_count[el_ix - 1u], el_ix > 0u); - var tile_ix = seq_ix * width; + var tile_ix = sh_offset[el_ix] + seq_ix * width; var sum = tiles[tile_ix].backdrop; for (var x = 1u; x < width; x += 1u) { tile_ix += 1u; diff --git a/piet-wgsl/shader/binning.wgsl b/piet-wgsl/shader/binning.wgsl index 3d5a053..6fd6284 100644 --- a/piet-wgsl/shader/binning.wgsl +++ b/piet-wgsl/shader/binning.wgsl @@ -36,8 +36,6 @@ var clip_bbox_buf: array>; @group(0) @binding(4) var intersected_bbox: array>; -// TODO: put into shared include - @group(0) @binding(5) var bump: BumpAllocators; diff --git a/piet-wgsl/shader/coarse.wgsl b/piet-wgsl/shader/coarse.wgsl index 8c59ba6..7a2ecf9 100644 --- a/piet-wgsl/shader/coarse.wgsl +++ b/piet-wgsl/shader/coarse.wgsl @@ -41,13 +41,13 @@ struct BinHeader { var bin_headers: array; @group(0) @binding(4) -var paths: array; +var bin_data: array; @group(0) @binding(5) -var tiles: array; +var paths: array; @group(0) @binding(6) -var bin_data: array; +var tiles: array; @group(0) @binding(7) var bump: BumpAllocators; @@ -109,7 +109,7 @@ fn write_path(tile: Tile, linewidth: f32) { fn write_color(color: CmdColor) { alloc_cmd(2u); - ptcl[cmd_offset] = CMD_FILL; + ptcl[cmd_offset] = CMD_COLOR; ptcl[cmd_offset + 1u] = color.rgba_color; cmd_offset += 2u; @@ -117,7 +117,6 @@ fn write_color(color: CmdColor) { @compute @workgroup_size(256) fn main( - @builtin(global_invocation_id) global_id: vec3, @builtin(local_invocation_id) local_id: vec3, @builtin(workgroup_id) wg_id: vec3, ) { @@ -130,7 +129,7 @@ fn main( let bin_tile_y = N_TILE_Y * wg_id.y; let tile_x = local_id.x % N_TILE_X; - let tile_y = local_id.y % N_TILE_Y; + let tile_y = local_id.x / N_TILE_X; let this_tile_ix = (bin_tile_y + tile_y) * config.width_in_tiles + bin_tile_x + tile_x; cmd_offset = this_tile_ix * PTCL_INITIAL_ALLOC; cmd_limit = cmd_offset + (PTCL_INITIAL_ALLOC - PTCL_HEADROOM); @@ -313,7 +312,7 @@ fn main( workgroupBarrier(); } if bin_tile_x < config.width_in_tiles && bin_tile_y < config.height_in_tiles { - ptcl[cmd_offset] = CMD_END; + //ptcl[cmd_offset] = CMD_END; // TODO: blend stack allocation } } diff --git a/piet-wgsl/shader/draw_leaf.wgsl b/piet-wgsl/shader/draw_leaf.wgsl index bce04f8..14a1163 100644 --- a/piet-wgsl/shader/draw_leaf.wgsl +++ b/piet-wgsl/shader/draw_leaf.wgsl @@ -49,11 +49,11 @@ struct Transform { fn read_transform(transform_base: u32, ix: u32) -> Transform { let base = transform_base + ix * 6u; let c0 = bitcast(scene[base]); - let c1 = bitcast(scene[base] + 1u); - let c2 = bitcast(scene[base] + 2u); - let c3 = bitcast(scene[base] + 3u); - let c4 = bitcast(scene[base] + 4u); - let c5 = bitcast(scene[base] + 5u); + let c1 = bitcast(scene[base + 1u]); + let c2 = bitcast(scene[base + 2u]); + let c3 = bitcast(scene[base + 3u]); + let c4 = bitcast(scene[base + 4u]); + let c5 = bitcast(scene[base + 5u]); let matrx = vec4(c0, c1, c2, c3); let translate = vec2(c4, c5); return Transform(matrx, translate); @@ -73,8 +73,8 @@ fn main( sh_scratch[local_id.x] = agg; for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { workgroupBarrier(); - if local_id.x + (1u << i) < WG_SIZE { - let other = sh_scratch[local_id.x + (1u << i)]; + if local_id.x >= 1u << i { + let other = sh_scratch[local_id.x - (1u << i)]; agg = combine_draw_monoid(agg, other); } workgroupBarrier(); diff --git a/piet-wgsl/shader/fine.wgsl b/piet-wgsl/shader/fine.wgsl index 751e408..4431848 100644 --- a/piet-wgsl/shader/fine.wgsl +++ b/piet-wgsl/shader/fine.wgsl @@ -113,7 +113,6 @@ fn main( ) { let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x; let xy = vec2(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y)); - let tile = tiles[tile_ix]; #ifdef full var rgba: array, PIXELS_PER_THREAD>; var area: array; @@ -148,7 +147,7 @@ fn main( let fg_i = fg * area[i]; rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i; } - cmd_ix += 1u; + cmd_ix += 2u; } // CMD_JUMP case 11u: { @@ -163,6 +162,7 @@ fn main( output[out_ix + i] = bytes; } #else + let tile = tiles[tile_ix]; let area = fill_path(tile, xy); let bytes = pack4x8unorm(vec4(area[0], area[1], area[2], area[3])); diff --git a/piet-wgsl/shader/path_coarse_full.wgsl b/piet-wgsl/shader/path_coarse_full.wgsl index 6b6e9b8..fa3609e 100644 --- a/piet-wgsl/shader/path_coarse_full.wgsl +++ b/piet-wgsl/shader/path_coarse_full.wgsl @@ -214,7 +214,7 @@ fn main( for (var y = y0; y < y1; y += 1) { let tile_y0 = f32(y) * f32(TILE_HEIGHT); let xbackdrop = max(xray + 1, 0); - if xymin.y < tile_y0 && xbackdrop < i32(config.width_in_tiles) { + if xymin.y < tile_y0 && xbackdrop < bbox.z { let backdrop = select(-1, 1, dp.y < 0.0); let tile_ix = base + xbackdrop; atomicAdd(&tiles[tile_ix].backdrop, backdrop); diff --git a/piet-wgsl/shader/pathseg.wgsl b/piet-wgsl/shader/pathseg.wgsl index 1326fce..d2c6a6a 100644 --- a/piet-wgsl/shader/pathseg.wgsl +++ b/piet-wgsl/shader/pathseg.wgsl @@ -107,11 +107,11 @@ struct Transform { fn read_transform(transform_base: u32, ix: u32) -> Transform { let base = transform_base + ix * 6u; let c0 = bitcast(scene[base]); - let c1 = bitcast(scene[base] + 1u); - let c2 = bitcast(scene[base] + 2u); - let c3 = bitcast(scene[base] + 3u); - let c4 = bitcast(scene[base] + 4u); - let c5 = bitcast(scene[base] + 5u); + let c1 = bitcast(scene[base + 1u]); + let c2 = bitcast(scene[base + 2u]); + let c3 = bitcast(scene[base + 3u]); + let c4 = bitcast(scene[base + 4u]); + let c5 = bitcast(scene[base + 5u]); let matrx = vec4(c0, c1, c2, c3); let translate = vec2(c4, c5); return Transform(matrx, translate); @@ -174,6 +174,7 @@ fn main( } } let transform = read_transform(config.transform_base, tm.trans_ix); + //let transform = Transform(vec4(1.0, 0.0, 0.0, 1.0), vec2()); p0 = transform_apply(transform, p0); p1 = transform_apply(transform, p1); var bbox = vec4(min(p0, p1), max(p0, p1)); diff --git a/piet-wgsl/shader/shared/cubic.wgsl b/piet-wgsl/shader/shared/cubic.wgsl new file mode 100644 index 0000000..ffb85b4 --- /dev/null +++ b/piet-wgsl/shader/shared/cubic.wgsl @@ -0,0 +1,25 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +struct Cubic { + p0: vec2, + p1: vec2, + p2: vec2, + p3: vec2, + path_ix: u32, + // Needed? + padding: u32, +} diff --git a/piet-wgsl/shader/shared/pathtag.wgsl b/piet-wgsl/shader/shared/pathtag.wgsl index b248e18..fed16fd 100644 --- a/piet-wgsl/shader/shared/pathtag.wgsl +++ b/piet-wgsl/shader/shared/pathtag.wgsl @@ -16,6 +16,7 @@ struct TagMonoid { trans_ix: u32, + // TODO: I don't think pathseg_ix is used. pathseg_ix: u32, pathseg_offset: u32, #ifdef full diff --git a/piet-wgsl/shader/tile_alloc.wgsl b/piet-wgsl/shader/tile_alloc.wgsl index 0bd550e..1c27c83 100644 --- a/piet-wgsl/shader/tile_alloc.wgsl +++ b/piet-wgsl/shader/tile_alloc.wgsl @@ -79,14 +79,17 @@ fn main( sh_tile_count[local_id.x] = tile_count; for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { workgroupBarrier(); - if local_id.x < (1u << i) { + if local_id.x >= (1u << i) { total_tile_count += sh_tile_count[local_id.x - (1u << i)]; } workgroupBarrier(); sh_tile_count[local_id.x] = total_tile_count; } - if local_id.x == WG_SIZE - 1u { - sh_tile_offset = atomicAdd(&bump.tile, total_tile_count); + workgroupBarrier(); + // should be able to avoid a barrier by adding total_tile count from + // thread WG_SIZE - 1, but it doesn't work + if local_id.x == 0u { + sh_tile_offset = atomicAdd(&bump.tile, sh_tile_count[WG_SIZE - 1u]); } workgroupBarrier(); let tile_offset = sh_tile_offset; @@ -94,6 +97,7 @@ fn main( let tile_subix = select(0u, sh_tile_count[local_id.x - 1u], local_id.x > 0u); let bbox = vec4(ux0, uy0, ux1, uy1); let path = Path(bbox, tile_offset + tile_subix); + paths[drawobj_ix] = path; } // zero allocated memory diff --git a/piet-wgsl/src/engine.rs b/piet-wgsl/src/engine.rs index 2be08db..7d1c854 100644 --- a/piet-wgsl/src/engine.rs +++ b/piet-wgsl/src/engine.rs @@ -183,6 +183,7 @@ impl Engine { bind_map.insert_buf(buf_proxy.id, buf); } Command::Dispatch(shader_id, wg_size, bindings) => { + println!("dispatching {:?} with {} bindings", wg_size, bindings.len()); let shader = &self.shaders[shader_id.0]; let bind_group = bind_map.create_bind_group(device, &shader.bind_group_layout, bindings)?; diff --git a/piet-wgsl/src/main.rs b/piet-wgsl/src/main.rs index 355ea33..d8d53ad 100644 --- a/piet-wgsl/src/main.rs +++ b/piet-wgsl/src/main.rs @@ -20,7 +20,6 @@ use std::{fs::File, io::BufWriter}; use engine::Engine; -use render::render; use test_scene::dump_scene_info; use wgpu::{Device, Limits, Queue}; @@ -51,6 +50,20 @@ async fn run() -> Result<(), Box> { Ok(()) } +fn dump_buf(buf: &[u32]) { + for (i, val) in buf.iter().enumerate() { + if *val != 0 { + let lo = val & 0x7fff_ffff; + if lo >= 0x3000_0000 && lo < 0x5000_0000 { + println!("{}: {:x} {}", i, val, f32::from_bits(*val)); + } else { + println!("{}: {:x}", i, val); + + } + } + } +} + async fn do_render( device: &Device, queue: &Queue, @@ -60,17 +73,23 @@ async fn do_render( let full_shaders = shaders::full_shaders(device, engine)?; let scene = test_scene::gen_test_scene(); dump_scene_info(&scene); - let (recording, buf) = render(&scene, &shaders); + //let (recording, buf) = render::render(&scene, &shaders); + let (recording, buf) = render::render_full(&scene, &full_shaders); let downloads = engine.run_recording(&device, &queue, &recording)?; let mapped = downloads.map(); device.poll(wgpu::Maintain::Wait); let buf = mapped.get_mapped(buf).await?; - let file = File::create("image.png")?; - let w = BufWriter::new(file); - let encoder = png::Encoder::new(w, 1024, 1024); - let mut writer = encoder.write_header()?; - writer.write_image_data(&buf)?; + if false { + dump_buf(bytemuck::cast_slice(&buf)); + } else { + let file = File::create("image.png")?; + let w = BufWriter::new(file); + let mut encoder = png::Encoder::new(w, 1024, 1024); + encoder.set_color(png::ColorType::Rgba); + let mut writer = encoder.write_header()?; + writer.write_image_data(&buf)?; + } Ok(()) } diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index d019886..9202ef0 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -14,12 +14,13 @@ const PATH_BBOX_SIZE: u64 = 24; const CUBIC_SIZE: u64 = 40; const DRAWMONOID_SIZE: u64 = 16; const MAX_DRAWINFO_SIZE: u64 = 44; -const PATH_SIZE: u64 = 8; +const PATH_SIZE: u64 = 32; const DRAW_BBOX_SIZE: u64 = 16; const BUMP_SIZE: u64 = 16; +const BIN_HEADER_SIZE: u64 = 8; #[repr(C)] -#[derive(Clone, Copy, Default, Zeroable, Pod)] +#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] struct Config { width_in_tiles: u32, height_in_tiles: u32, @@ -84,8 +85,8 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) { [config_buf, scene_buf, reduced_buf, tagmonoid_buf], ); - let path_coarse_wgs = (data.n_pathseg + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; - //let cubics_buf = BufProxy::new(data.n_pathseg as u64 * 32); + let n_pathtag = data.pathseg_stream.len(); + let path_coarse_wgs = (n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; // TODO: more principled size calc let tiles_buf = BufProxy::new(4097 * 8); let segments_buf = BufProxy::new(256 * 24); @@ -151,6 +152,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy drawdata_base, transform_base, }; + println!("{:?}", config); let scene_buf = recording.upload(scene); let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned()); @@ -176,8 +178,9 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy (drawobj_wgs, 1, 1), [config_buf, path_bbox_buf], ); - let cubic_buf = BufProxy::new(n_path as u64 * CUBIC_SIZE); - let path_coarse_wgs = (data.n_pathseg + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; + let n_pathtag = data.pathseg_stream.len(); + let cubic_buf = BufProxy::new(n_pathtag as u64 * CUBIC_SIZE); + let path_coarse_wgs = (n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; recording.dispatch( shaders.pathseg, (path_coarse_wgs, 1, 1), @@ -214,6 +217,10 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy // Not actually used yet. let clip_bbox_buf = BufProxy::new(1024); let bin_data_buf = BufProxy::new(1 << 16); + let width_in_bins = (config.width_in_tiles + 15) / 16; + let height_in_bins = (config.height_in_tiles + 15) / 16; + let n_bins = width_in_bins * height_in_bins; + let bin_header_buf = BufProxy::new((n_bins * drawobj_wgs) as u64 * BIN_HEADER_SIZE); recording.clear_all(bump_buf); recording.dispatch( shaders.binning, @@ -226,6 +233,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy draw_bbox_buf, bump_buf, bin_data_buf, + bin_header_buf, ], ); let path_buf = BufProxy::new(n_path as u64 * PATH_SIZE); @@ -244,11 +252,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy ], ); - //let cubics_buf = BufProxy::new(data.n_pathseg as u64 * 32); - // TODO: more principled size calc - let tiles_buf = BufProxy::new(4097 * 8); - let segments_buf = BufProxy::new(256 * 24); - recording.clear_all(tiles_buf); + let segments_buf = BufProxy::new(1 << 20); recording.dispatch( shaders.path_coarse, (path_coarse_wgs, 1, 1), @@ -259,25 +263,42 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy cubic_buf, path_buf, bump_buf, - tiles_buf, + tile_buf, segments_buf, ], ); recording.dispatch( shaders.backdrop, (path_wgs, 1, 1), - [config_buf, path_buf, tiles_buf], + [config_buf, path_buf, tile_buf], ); - let out_buf_size = config.width_in_tiles * config.height_in_tiles * 256; + let ptcl_buf = BufProxy::new(1 << 20); + recording.dispatch( + shaders.coarse, + (width_in_bins, height_in_bins, 1), + [ + config_buf, + scene_buf, + draw_monoid_buf, + bin_header_buf, + bin_data_buf, + path_buf, + tile_buf, + bump_buf, + ptcl_buf, + ], + ); + let out_buf_size = config.width_in_tiles * config.height_in_tiles * 1024; let out_buf = BufProxy::new(out_buf_size as u64); recording.dispatch( shaders.fine, (config.width_in_tiles, config.height_in_tiles, 1), - [config_buf, tiles_buf, segments_buf, out_buf], + [config_buf, tile_buf, segments_buf, out_buf, ptcl_buf], ); - recording.download(out_buf); - (recording, out_buf) + let download_buf = out_buf; + recording.download(download_buf); + (recording, download_buf) } pub fn align_up(len: usize, alignment: u32) -> usize { diff --git a/piet-wgsl/src/shaders.rs b/piet-wgsl/src/shaders.rs index 939c58e..f918d96 100644 --- a/piet-wgsl/src/shaders.rs +++ b/piet-wgsl/src/shaders.rs @@ -223,8 +223,12 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result Scene { ]; let brush = Brush::Solid(Color::rgb8(0x80, 0x80, 0x80)); builder.fill(Fill::NonZero, Affine::IDENTITY, &brush, None, &path); + let transform = Affine::translate(10.0, 200.0); + /* + let path = [ + PathElement::MoveTo(Point::new(100.0, 300.0)), + PathElement::LineTo(Point::new(500.0, 320.0)), + PathElement::LineTo(Point::new(300.0, 350.0)), + PathElement::LineTo(Point::new(200.0, 460.0)), + PathElement::LineTo(Point::new(150.0, 410.0)), + PathElement::Close, + ]; + */ + builder.fill(Fill::NonZero, transform, &brush, None, &path); scene }