Mostly working path rendering

It draws multiple paths and applies affine transformations.

One problem: RGBA writing is byte-reversed and premultiplied.
This commit is contained in:
Raph Levien 2022-11-03 22:00:52 -07:00
parent 06fa3cb9ab
commit c3d81e0985
15 changed files with 143 additions and 69 deletions

View file

@ -17,20 +17,7 @@
// Prefix sum for dynamically allocated backdrops
#import config
// TODO: dedup & put this in the right place
struct Path {
// bounding box in pixels
bbox: vec4<u32>,
// offset (in u32's) to tile rectangle
tiles: u32,
}
// TODO: -> shared
struct Tile {
backdrop: i32,
segments: u32,
}
#import tile
@group(0) @binding(0)
var<storage> config: Config;
@ -45,6 +32,7 @@ let WG_SIZE = 256u;
var<workgroup> sh_row_width: array<u32, WG_SIZE>;
var<workgroup> sh_row_count: array<u32, WG_SIZE>;
var<workgroup> sh_offset: array<u32, WG_SIZE>;
@compute @workgroup_size(256)
fn main(
@ -58,8 +46,9 @@ fn main(
let path = paths[drawobj_ix];
sh_row_width[local_id.x] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
sh_row_count[local_id.x] = row_count;
sh_offset[local_id.x] = path.tiles;
}
sh_row_count[local_id.x] = row_count;
// Prefix sum of row counts
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
@ -83,7 +72,7 @@ fn main(
let width = sh_row_width[el_ix];
if width > 0u {
var seq_ix = row - select(0u, sh_row_count[el_ix - 1u], el_ix > 0u);
var tile_ix = seq_ix * width;
var tile_ix = sh_offset[el_ix] + seq_ix * width;
var sum = tiles[tile_ix].backdrop;
for (var x = 1u; x < width; x += 1u) {
tile_ix += 1u;

View file

@ -36,8 +36,6 @@ var<storage> clip_bbox_buf: array<vec4<f32>>;
@group(0) @binding(4)
var<storage, read_write> intersected_bbox: array<vec4<f32>>;
// TODO: put into shared include
@group(0) @binding(5)
var<storage, read_write> bump: BumpAllocators;

View file

@ -41,13 +41,13 @@ struct BinHeader {
var<storage> bin_headers: array<BinHeader>;
@group(0) @binding(4)
var<storage> paths: array<Path>;
var<storage> bin_data: array<u32>;
@group(0) @binding(5)
var<storage> tiles: array<Tile>;
var<storage> paths: array<Path>;
@group(0) @binding(6)
var<storage> bin_data: array<u32>;
var<storage> tiles: array<Tile>;
@group(0) @binding(7)
var<storage, read_write> bump: BumpAllocators;
@ -109,7 +109,7 @@ fn write_path(tile: Tile, linewidth: f32) {
fn write_color(color: CmdColor) {
alloc_cmd(2u);
ptcl[cmd_offset] = CMD_FILL;
ptcl[cmd_offset] = CMD_COLOR;
ptcl[cmd_offset + 1u] = color.rgba_color;
cmd_offset += 2u;
@ -117,7 +117,6 @@ fn write_color(color: CmdColor) {
@compute @workgroup_size(256)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
@builtin(workgroup_id) wg_id: vec3<u32>,
) {
@ -130,7 +129,7 @@ fn main(
let bin_tile_y = N_TILE_Y * wg_id.y;
let tile_x = local_id.x % N_TILE_X;
let tile_y = local_id.y % N_TILE_Y;
let tile_y = local_id.x / N_TILE_X;
let this_tile_ix = (bin_tile_y + tile_y) * config.width_in_tiles + bin_tile_x + tile_x;
cmd_offset = this_tile_ix * PTCL_INITIAL_ALLOC;
cmd_limit = cmd_offset + (PTCL_INITIAL_ALLOC - PTCL_HEADROOM);
@ -313,7 +312,7 @@ fn main(
workgroupBarrier();
}
if bin_tile_x < config.width_in_tiles && bin_tile_y < config.height_in_tiles {
ptcl[cmd_offset] = CMD_END;
//ptcl[cmd_offset] = CMD_END;
// TODO: blend stack allocation
}
}

View file

@ -49,11 +49,11 @@ struct Transform {
fn read_transform(transform_base: u32, ix: u32) -> Transform {
let base = transform_base + ix * 6u;
let c0 = bitcast<f32>(scene[base]);
let c1 = bitcast<f32>(scene[base] + 1u);
let c2 = bitcast<f32>(scene[base] + 2u);
let c3 = bitcast<f32>(scene[base] + 3u);
let c4 = bitcast<f32>(scene[base] + 4u);
let c5 = bitcast<f32>(scene[base] + 5u);
let c1 = bitcast<f32>(scene[base + 1u]);
let c2 = bitcast<f32>(scene[base + 2u]);
let c3 = bitcast<f32>(scene[base + 3u]);
let c4 = bitcast<f32>(scene[base + 4u]);
let c5 = bitcast<f32>(scene[base + 5u]);
let matrx = vec4<f32>(c0, c1, c2, c3);
let translate = vec2<f32>(c4, c5);
return Transform(matrx, translate);
@ -73,8 +73,8 @@ fn main(
sh_scratch[local_id.x] = agg;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
workgroupBarrier();
if local_id.x + (1u << i) < WG_SIZE {
let other = sh_scratch[local_id.x + (1u << i)];
if local_id.x >= 1u << i {
let other = sh_scratch[local_id.x - (1u << i)];
agg = combine_draw_monoid(agg, other);
}
workgroupBarrier();

View file

@ -113,7 +113,6 @@ fn main(
) {
let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x;
let xy = vec2<f32>(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y));
let tile = tiles[tile_ix];
#ifdef full
var rgba: array<vec4<f32>, PIXELS_PER_THREAD>;
var area: array<f32, PIXELS_PER_THREAD>;
@ -148,7 +147,7 @@ fn main(
let fg_i = fg * area[i];
rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i;
}
cmd_ix += 1u;
cmd_ix += 2u;
}
// CMD_JUMP
case 11u: {
@ -163,6 +162,7 @@ fn main(
output[out_ix + i] = bytes;
}
#else
let tile = tiles[tile_ix];
let area = fill_path(tile, xy);
let bytes = pack4x8unorm(vec4<f32>(area[0], area[1], area[2], area[3]));

View file

@ -214,7 +214,7 @@ fn main(
for (var y = y0; y < y1; y += 1) {
let tile_y0 = f32(y) * f32(TILE_HEIGHT);
let xbackdrop = max(xray + 1, 0);
if xymin.y < tile_y0 && xbackdrop < i32(config.width_in_tiles) {
if xymin.y < tile_y0 && xbackdrop < bbox.z {
let backdrop = select(-1, 1, dp.y < 0.0);
let tile_ix = base + xbackdrop;
atomicAdd(&tiles[tile_ix].backdrop, backdrop);

View file

@ -107,11 +107,11 @@ struct Transform {
fn read_transform(transform_base: u32, ix: u32) -> Transform {
let base = transform_base + ix * 6u;
let c0 = bitcast<f32>(scene[base]);
let c1 = bitcast<f32>(scene[base] + 1u);
let c2 = bitcast<f32>(scene[base] + 2u);
let c3 = bitcast<f32>(scene[base] + 3u);
let c4 = bitcast<f32>(scene[base] + 4u);
let c5 = bitcast<f32>(scene[base] + 5u);
let c1 = bitcast<f32>(scene[base + 1u]);
let c2 = bitcast<f32>(scene[base + 2u]);
let c3 = bitcast<f32>(scene[base + 3u]);
let c4 = bitcast<f32>(scene[base + 4u]);
let c5 = bitcast<f32>(scene[base + 5u]);
let matrx = vec4<f32>(c0, c1, c2, c3);
let translate = vec2<f32>(c4, c5);
return Transform(matrx, translate);
@ -174,6 +174,7 @@ fn main(
}
}
let transform = read_transform(config.transform_base, tm.trans_ix);
//let transform = Transform(vec4<f32>(1.0, 0.0, 0.0, 1.0), vec2<f32>());
p0 = transform_apply(transform, p0);
p1 = transform_apply(transform, p1);
var bbox = vec4<f32>(min(p0, p1), max(p0, p1));

View file

@ -0,0 +1,25 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
struct Cubic {
p0: vec2<f32>,
p1: vec2<f32>,
p2: vec2<f32>,
p3: vec2<f32>,
path_ix: u32,
// Needed?
padding: u32,
}

View file

@ -16,6 +16,7 @@
struct TagMonoid {
trans_ix: u32,
// TODO: I don't think pathseg_ix is used.
pathseg_ix: u32,
pathseg_offset: u32,
#ifdef full

View file

@ -79,14 +79,17 @@ fn main(
sh_tile_count[local_id.x] = tile_count;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
workgroupBarrier();
if local_id.x < (1u << i) {
if local_id.x >= (1u << i) {
total_tile_count += sh_tile_count[local_id.x - (1u << i)];
}
workgroupBarrier();
sh_tile_count[local_id.x] = total_tile_count;
}
if local_id.x == WG_SIZE - 1u {
sh_tile_offset = atomicAdd(&bump.tile, total_tile_count);
workgroupBarrier();
// should be able to avoid a barrier by adding total_tile count from
// thread WG_SIZE - 1, but it doesn't work
if local_id.x == 0u {
sh_tile_offset = atomicAdd(&bump.tile, sh_tile_count[WG_SIZE - 1u]);
}
workgroupBarrier();
let tile_offset = sh_tile_offset;
@ -94,6 +97,7 @@ fn main(
let tile_subix = select(0u, sh_tile_count[local_id.x - 1u], local_id.x > 0u);
let bbox = vec4<u32>(ux0, uy0, ux1, uy1);
let path = Path(bbox, tile_offset + tile_subix);
paths[drawobj_ix] = path;
}
// zero allocated memory

View file

@ -183,6 +183,7 @@ impl Engine {
bind_map.insert_buf(buf_proxy.id, buf);
}
Command::Dispatch(shader_id, wg_size, bindings) => {
println!("dispatching {:?} with {} bindings", wg_size, bindings.len());
let shader = &self.shaders[shader_id.0];
let bind_group =
bind_map.create_bind_group(device, &shader.bind_group_layout, bindings)?;

View file

@ -20,7 +20,6 @@ use std::{fs::File, io::BufWriter};
use engine::Engine;
use render::render;
use test_scene::dump_scene_info;
use wgpu::{Device, Limits, Queue};
@ -51,6 +50,20 @@ async fn run() -> Result<(), Box<dyn std::error::Error>> {
Ok(())
}
fn dump_buf(buf: &[u32]) {
for (i, val) in buf.iter().enumerate() {
if *val != 0 {
let lo = val & 0x7fff_ffff;
if lo >= 0x3000_0000 && lo < 0x5000_0000 {
println!("{}: {:x} {}", i, val, f32::from_bits(*val));
} else {
println!("{}: {:x}", i, val);
}
}
}
}
async fn do_render(
device: &Device,
queue: &Queue,
@ -60,17 +73,23 @@ async fn do_render(
let full_shaders = shaders::full_shaders(device, engine)?;
let scene = test_scene::gen_test_scene();
dump_scene_info(&scene);
let (recording, buf) = render(&scene, &shaders);
//let (recording, buf) = render::render(&scene, &shaders);
let (recording, buf) = render::render_full(&scene, &full_shaders);
let downloads = engine.run_recording(&device, &queue, &recording)?;
let mapped = downloads.map();
device.poll(wgpu::Maintain::Wait);
let buf = mapped.get_mapped(buf).await?;
let file = File::create("image.png")?;
let w = BufWriter::new(file);
let encoder = png::Encoder::new(w, 1024, 1024);
let mut writer = encoder.write_header()?;
writer.write_image_data(&buf)?;
if false {
dump_buf(bytemuck::cast_slice(&buf));
} else {
let file = File::create("image.png")?;
let w = BufWriter::new(file);
let mut encoder = png::Encoder::new(w, 1024, 1024);
encoder.set_color(png::ColorType::Rgba);
let mut writer = encoder.write_header()?;
writer.write_image_data(&buf)?;
}
Ok(())
}

View file

@ -14,12 +14,13 @@ const PATH_BBOX_SIZE: u64 = 24;
const CUBIC_SIZE: u64 = 40;
const DRAWMONOID_SIZE: u64 = 16;
const MAX_DRAWINFO_SIZE: u64 = 44;
const PATH_SIZE: u64 = 8;
const PATH_SIZE: u64 = 32;
const DRAW_BBOX_SIZE: u64 = 16;
const BUMP_SIZE: u64 = 16;
const BIN_HEADER_SIZE: u64 = 8;
#[repr(C)]
#[derive(Clone, Copy, Default, Zeroable, Pod)]
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
struct Config {
width_in_tiles: u32,
height_in_tiles: u32,
@ -84,8 +85,8 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
[config_buf, scene_buf, reduced_buf, tagmonoid_buf],
);
let path_coarse_wgs = (data.n_pathseg + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
//let cubics_buf = BufProxy::new(data.n_pathseg as u64 * 32);
let n_pathtag = data.pathseg_stream.len();
let path_coarse_wgs = (n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
// TODO: more principled size calc
let tiles_buf = BufProxy::new(4097 * 8);
let segments_buf = BufProxy::new(256 * 24);
@ -151,6 +152,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy
drawdata_base,
transform_base,
};
println!("{:?}", config);
let scene_buf = recording.upload(scene);
let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned());
@ -176,8 +178,9 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy
(drawobj_wgs, 1, 1),
[config_buf, path_bbox_buf],
);
let cubic_buf = BufProxy::new(n_path as u64 * CUBIC_SIZE);
let path_coarse_wgs = (data.n_pathseg + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
let n_pathtag = data.pathseg_stream.len();
let cubic_buf = BufProxy::new(n_pathtag as u64 * CUBIC_SIZE);
let path_coarse_wgs = (n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
recording.dispatch(
shaders.pathseg,
(path_coarse_wgs, 1, 1),
@ -214,6 +217,10 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy
// Not actually used yet.
let clip_bbox_buf = BufProxy::new(1024);
let bin_data_buf = BufProxy::new(1 << 16);
let width_in_bins = (config.width_in_tiles + 15) / 16;
let height_in_bins = (config.height_in_tiles + 15) / 16;
let n_bins = width_in_bins * height_in_bins;
let bin_header_buf = BufProxy::new((n_bins * drawobj_wgs) as u64 * BIN_HEADER_SIZE);
recording.clear_all(bump_buf);
recording.dispatch(
shaders.binning,
@ -226,6 +233,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy
draw_bbox_buf,
bump_buf,
bin_data_buf,
bin_header_buf,
],
);
let path_buf = BufProxy::new(n_path as u64 * PATH_SIZE);
@ -244,11 +252,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy
],
);
//let cubics_buf = BufProxy::new(data.n_pathseg as u64 * 32);
// TODO: more principled size calc
let tiles_buf = BufProxy::new(4097 * 8);
let segments_buf = BufProxy::new(256 * 24);
recording.clear_all(tiles_buf);
let segments_buf = BufProxy::new(1 << 20);
recording.dispatch(
shaders.path_coarse,
(path_coarse_wgs, 1, 1),
@ -259,25 +263,42 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy
cubic_buf,
path_buf,
bump_buf,
tiles_buf,
tile_buf,
segments_buf,
],
);
recording.dispatch(
shaders.backdrop,
(path_wgs, 1, 1),
[config_buf, path_buf, tiles_buf],
[config_buf, path_buf, tile_buf],
);
let out_buf_size = config.width_in_tiles * config.height_in_tiles * 256;
let ptcl_buf = BufProxy::new(1 << 20);
recording.dispatch(
shaders.coarse,
(width_in_bins, height_in_bins, 1),
[
config_buf,
scene_buf,
draw_monoid_buf,
bin_header_buf,
bin_data_buf,
path_buf,
tile_buf,
bump_buf,
ptcl_buf,
],
);
let out_buf_size = config.width_in_tiles * config.height_in_tiles * 1024;
let out_buf = BufProxy::new(out_buf_size as u64);
recording.dispatch(
shaders.fine,
(config.width_in_tiles, config.height_in_tiles, 1),
[config_buf, tiles_buf, segments_buf, out_buf],
[config_buf, tile_buf, segments_buf, out_buf, ptcl_buf],
);
recording.download(out_buf);
(recording, out_buf)
let download_buf = out_buf;
recording.download(download_buf);
(recording, download_buf)
}
pub fn align_up(len: usize, alignment: u32) -> usize {

View file

@ -223,8 +223,12 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
)?;
let backdrop = engine.add_shader(
device,
preprocess::preprocess(&read_shader("backdrop"), &empty, &imports).into(),
&[BindType::BufReadOnly, BindType::Buffer],
preprocess::preprocess(&read_shader("backdrop_dyn"), &empty, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
],
)?;
let coarse = engine.add_shader(
device,

View file

@ -29,6 +29,18 @@ pub fn gen_test_scene() -> Scene {
];
let brush = Brush::Solid(Color::rgb8(0x80, 0x80, 0x80));
builder.fill(Fill::NonZero, Affine::IDENTITY, &brush, None, &path);
let transform = Affine::translate(10.0, 200.0);
/*
let path = [
PathElement::MoveTo(Point::new(100.0, 300.0)),
PathElement::LineTo(Point::new(500.0, 320.0)),
PathElement::LineTo(Point::new(300.0, 350.0)),
PathElement::LineTo(Point::new(200.0, 460.0)),
PathElement::LineTo(Point::new(150.0, 410.0)),
PathElement::Close,
];
*/
builder.fill(Fill::NonZero, transform, &brush, None, &path);
scene
}