mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 04:31:30 +11:00
Checkpoint
Many shader stages written.
This commit is contained in:
parent
40416fd2ea
commit
5c6ec1efa3
95
piet-wgsl/shader/backdrop_dyn.wgsl
Normal file
95
piet-wgsl/shader/backdrop_dyn.wgsl
Normal file
|
@ -0,0 +1,95 @@
|
|||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Also licensed under MIT license, at your choice.
|
||||
|
||||
// Prefix sum for dynamically allocated backdrops
|
||||
|
||||
#import config
|
||||
|
||||
// TODO: dedup & put this in the right place
|
||||
struct Path {
|
||||
// bounding box in pixels
|
||||
bbox: vec4<u32>,
|
||||
// offset (in u32's) to tile rectangle
|
||||
tiles: u32,
|
||||
}
|
||||
|
||||
// TODO: -> shared
|
||||
struct Tile {
|
||||
backdrop: i32,
|
||||
segments: u32,
|
||||
}
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> paths: array<Path>;
|
||||
|
||||
@group(0) @binding(2)
|
||||
var<storage, read_write> tiles: array<Tile>;
|
||||
|
||||
let WG_SIZE = 256u;
|
||||
|
||||
var<workgroup> sh_row_width: array<u32, WG_SIZE>;
|
||||
var<workgroup> sh_row_count: array<u32, WG_SIZE>;
|
||||
|
||||
@compute @workgroup_size(256)
|
||||
fn main(
|
||||
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||
) {
|
||||
let drawobj_ix = global_id.x;
|
||||
var row_count = 0u;
|
||||
if drawobj_ix < config.n_drawobj {
|
||||
// TODO: when rectangles, path and draw obj are not the same
|
||||
let path = paths[drawobj_ix];
|
||||
sh_row_width[local_id.x] = path.bbox.z - path.bbox.x;
|
||||
row_count = path.bbox.w - path.bbox.y;
|
||||
sh_row_count[local_id.x] = row_count;
|
||||
}
|
||||
|
||||
// Prefix sum of row counts
|
||||
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||
workgroupBarrier();
|
||||
if local_id.x >= (1u << i) {
|
||||
row_count += sh_row_count[local_id.x - (1u << i)];
|
||||
}
|
||||
workgroupBarrier();
|
||||
sh_row_count[local_id.x] = row_count;
|
||||
}
|
||||
workgroupBarrier();
|
||||
let total_rows = sh_row_count[WG_SIZE - 1u];
|
||||
for (var row = local_id.x; row < total_rows; row += WG_SIZE) {
|
||||
var el_ix = 0u;
|
||||
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||
let probe = el_ix + ((WG_SIZE / 2u) >> i);
|
||||
if row >= sh_row_count[probe - 1u] {
|
||||
el_ix = probe;
|
||||
}
|
||||
}
|
||||
let width = sh_row_width[el_ix];
|
||||
if width > 0u {
|
||||
var seq_ix = row - select(0u, sh_row_count[el_ix - 1u], el_ix > 0u);
|
||||
var tile_ix = seq_ix * width;
|
||||
var sum = tiles[tile_ix].backdrop;
|
||||
for (var x = 1u; x < width; x += 1u) {
|
||||
tile_ix += 1u;
|
||||
sum += tiles[tile_ix].backdrop;
|
||||
tiles[tile_ix].backdrop = sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -14,6 +14,9 @@
|
|||
//
|
||||
// Also licensed under MIT license, at your choice.
|
||||
|
||||
// Fine rasterizer. This can run in simple (just path rendering) and full
|
||||
// modes, controllable by #define.
|
||||
|
||||
// This is a cut'n'paste w/ backdrop.
|
||||
struct Tile {
|
||||
backdrop: i32,
|
||||
|
@ -36,17 +39,16 @@ var<storage> segments: array<Segment>;
|
|||
@group(0) @binding(3)
|
||||
var<storage, read_write> output: array<u32>;
|
||||
|
||||
#ifdef full
|
||||
#import ptcl
|
||||
|
||||
@group(0) @binding(4)
|
||||
var<storage> ptcl: array<u32>;
|
||||
#endif
|
||||
|
||||
let PIXELS_PER_THREAD = 4u;
|
||||
|
||||
@compute @workgroup_size(4, 16)
|
||||
fn main(
|
||||
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||
@builtin(workgroup_id) wg_id: vec3<u32>,
|
||||
) {
|
||||
let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x;
|
||||
let xy = vec2<f32>(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y));
|
||||
let tile = tiles[tile_ix];
|
||||
fn fill_path(tile: Tile, xy: vec2<f32>) -> array<f32, PIXELS_PER_THREAD> {
|
||||
var area: array<f32, PIXELS_PER_THREAD>;
|
||||
let backdrop_f = f32(tile.backdrop);
|
||||
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||
|
@ -89,6 +91,19 @@ fn main(
|
|||
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||
area[i] = abs(area[i]);
|
||||
}
|
||||
return area;
|
||||
}
|
||||
|
||||
@compute @workgroup_size(4, 16)
|
||||
fn main(
|
||||
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||
@builtin(workgroup_id) wg_id: vec3<u32>,
|
||||
) {
|
||||
let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x;
|
||||
let xy = vec2<f32>(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y));
|
||||
let tile = tiles[tile_ix];
|
||||
let area = fill_path(tile, xy);
|
||||
|
||||
let bytes = pack4x8unorm(vec4<f32>(area[0], area[1], area[2], area[3]));
|
||||
let out_ix = global_id.y * (config.width_in_tiles * 4u) + global_id.x;
|
||||
|
|
|
@ -18,4 +18,5 @@
|
|||
struct BumpAllocators {
|
||||
binning: atomic<u32>,
|
||||
ptcl: atomic<u32>,
|
||||
tile: atomic<u32>,
|
||||
}
|
||||
|
|
122
piet-wgsl/shader/tile_alloc.wgsl
Normal file
122
piet-wgsl/shader/tile_alloc.wgsl
Normal file
|
@ -0,0 +1,122 @@
|
|||
// Copyright 2022 Google LLC
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Also licensed under MIT license, at your choice.
|
||||
|
||||
// Tile allocation (and zeroing of tiles)
|
||||
|
||||
#import config
|
||||
#import bump
|
||||
#import drawtag
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> config: Config;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> scene: array<u32>;
|
||||
|
||||
@group(0) @binding(2)
|
||||
var<storage> draw_bboxes: array<vec4<f32>>;
|
||||
|
||||
@group(0) @binding(3)
|
||||
var<storage, read_write> bump: BumpAllocators;
|
||||
|
||||
// TODO: put this in the right place, dedup
|
||||
struct Path {
|
||||
// bounding box in pixels
|
||||
bbox: vec4<u32>,
|
||||
// offset (in u32's) to tile rectangle
|
||||
tiles: u32,
|
||||
}
|
||||
|
||||
struct Tile {
|
||||
backdrop: i32,
|
||||
segments: u32,
|
||||
}
|
||||
|
||||
@group(0) @binding(4)
|
||||
var<storage, read_write> paths: array<Path>;
|
||||
|
||||
@group(0) @binding(5)
|
||||
var<storage, read_write> tiles: array<Tile>;
|
||||
|
||||
let WG_SIZE = 256u;
|
||||
|
||||
var<workgroup> sh_tile_count: array<u32, WG_SIZE>;
|
||||
var<workgroup> sh_tile_offset: u32;
|
||||
|
||||
@compute @workgroup_size(256)
|
||||
fn main(
|
||||
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||
) {
|
||||
// scale factors useful for converting coordinates to tiles
|
||||
// TODO: make into constants
|
||||
let SX = 1.0 / f32(TILE_WIDTH);
|
||||
let SY = 1.0 / f32(TILE_HEIGHT);
|
||||
|
||||
let drawobj_ix = global_id.x;
|
||||
var drawtag = DRAWTAG_NOP;
|
||||
if drawobj_ix < config.n_drawobj {
|
||||
drawtag = scene[config.drawtag_base + drawobj_ix];
|
||||
}
|
||||
var x0 = 0;
|
||||
var y0 = 0;
|
||||
var x1 = 0;
|
||||
var y1 = 0;
|
||||
if drawtag != DRAWTAG_NOP && drawtag != DRAWTAG_END_CLIP {
|
||||
let bbox = draw_bboxes[drawobj_ix];
|
||||
x0 = i32(floor(bbox.x * SX));
|
||||
y0 = i32(floor(bbox.y * SY));
|
||||
x1 = i32(ceil(bbox.z * SX));
|
||||
y1 = i32(ceil(bbox.w * SY));
|
||||
}
|
||||
let ux0 = u32(clamp(x0, 0, i32(config.width_in_tiles)));
|
||||
let uy0 = u32(clamp(y0, 0, i32(config.height_in_tiles)));
|
||||
let ux1 = u32(clamp(x1, 0, i32(config.width_in_tiles)));
|
||||
let uy1 = u32(clamp(y1, 0, i32(config.height_in_tiles)));
|
||||
let tile_count = (ux1 - ux0) * (uy1 - uy0);
|
||||
var total_tile_count = tile_count;
|
||||
sh_tile_count[local_id.x] = tile_count;
|
||||
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||
workgroupBarrier();
|
||||
if local_id.x < (1u << i) {
|
||||
total_tile_count += sh_tile_count[local_id.x - (1u << i)];
|
||||
}
|
||||
workgroupBarrier();
|
||||
sh_tile_count[local_id.x] = total_tile_count;
|
||||
}
|
||||
if local_id.x == WG_SIZE - 1u {
|
||||
sh_tile_offset = atomicAdd(&bump.tile, total_tile_count);
|
||||
}
|
||||
workgroupBarrier();
|
||||
let tile_offset = sh_tile_offset;
|
||||
if drawobj_ix < config.n_drawobj {
|
||||
let tile_subix = select(0u, sh_tile_count[local_id.x - 1u], local_id.x > 0u);
|
||||
let bbox = vec4<u32>(ux0, uy0, ux1, uy1);
|
||||
let path = Path(bbox, tile_offset + tile_subix);
|
||||
}
|
||||
|
||||
// zero allocated memory
|
||||
// Note: if the number of draw objects is small, utilization will be poor.
|
||||
// There are two things that can be done to improve that. One would be a
|
||||
// separate (indirect) dispatch. Another would be to have each workgroup
|
||||
// process fewer draw objects than the number of threads in the wg.
|
||||
let total_count = sh_tile_count[WG_SIZE - 1u];
|
||||
for (var i = local_id.x; i < total_count; i += WG_SIZE) {
|
||||
// Note: could format output buffer as u32 for even better load
|
||||
// balancing, as does piet-gpu.
|
||||
tiles[tile_offset + i] = Tile(0, 0u);
|
||||
}
|
||||
}
|
|
@ -11,10 +11,13 @@ use crate::{
|
|||
const TAG_MONOID_SIZE: u64 = 12;
|
||||
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy, Zeroable, Pod)]
|
||||
#[derive(Clone, Copy, Default, Zeroable, Pod)]
|
||||
struct Config {
|
||||
width_in_tiles: u32,
|
||||
height_in_tiles: u32,
|
||||
n_drawobj: u32,
|
||||
drawtag_base: u32,
|
||||
drawdata_base: u32,
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
|
@ -60,6 +63,7 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
|
|||
let config = Config {
|
||||
width_in_tiles: 64,
|
||||
height_in_tiles: 64,
|
||||
..Default::default()
|
||||
};
|
||||
let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned());
|
||||
// TODO: more principled size calc
|
||||
|
|
Loading…
Reference in a new issue