Merge pull request #203 from linebender/clip_checkpoint

Extend pipeline
This commit is contained in:
Chad Brokaw 2022-11-18 17:28:59 -05:00 committed by GitHub
commit de998220db
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
39 changed files with 4277 additions and 122 deletions

12
.vscode/settings.json vendored
View file

@ -1,8 +1,16 @@
{
"wgsl-analyzer.customImports": {
"bbox": "${workspaceFolder}/piet-wgsl/shader/shared/bbox.wgsl",
"bump": "${workspaceFolder}/piet-wgsl/shader/shared/bump.wgsl",
"clip": "${workspaceFolder}/piet-wgsl/shader/shared/clip.wgsl",
"config": "${workspaceFolder}/piet-wgsl/shader/shared/config.wgsl",
"cubic": "${workspaceFolder}/piet-wgsl/shader/shared/cubic.wgsl",
"drawtag": "${workspaceFolder}/piet-wgsl/shader/shared/drawtag.wgsl",
"segment": "${workspaceFolder}/piet-wgsl/shader/shared/segment.wgsl",
"pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl"
"pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl",
"ptcl": "${workspaceFolder}/piet-wgsl/shader/shared/ptcl.wgsl",
"tile": "${workspaceFolder}/piet-wgsl/shader/shared/tile.wgsl"
},
"wgsl-analyzer.diagnostics.nagaVersion": "main"
"wgsl-analyzer.diagnostics.nagaVersion": "main",
"wgsl-analyzer.preprocessor.shaderDefs": ["full"]
}

2
Cargo.lock generated
View file

@ -1174,10 +1174,12 @@ dependencies = [
"bytemuck",
"env_logger",
"futures-intrusive",
"kurbo 0.8.3",
"parking_lot",
"piet-scene",
"png",
"pollster",
"roxmltree",
"wgpu",
]

View file

@ -14,4 +14,8 @@ parking_lot = "0.12"
bytemuck = { version = "1.12.1", features = ["derive"] }
png = "0.17.6"
piet-scene = { path = "../piet-scene" }
piet-scene = { path = "../piet-scene", features = ["kurbo"] }
# for picosvg, should be split out
roxmltree = "0.13"
kurbo = "0.8.3"

View file

@ -0,0 +1,84 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
// Prefix sum for dynamically allocated backdrops
#import config
#import tile
@group(0) @binding(0)
var<storage> config: Config;
@group(0) @binding(1)
var<storage> paths: array<Path>;
@group(0) @binding(2)
var<storage, read_write> tiles: array<Tile>;
let WG_SIZE = 256u;
var<workgroup> sh_row_width: array<u32, WG_SIZE>;
var<workgroup> sh_row_count: array<u32, WG_SIZE>;
var<workgroup> sh_offset: array<u32, WG_SIZE>;
@compute @workgroup_size(256)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
) {
let drawobj_ix = global_id.x;
var row_count = 0u;
if drawobj_ix < config.n_drawobj {
// TODO: when rectangles, path and draw obj are not the same
let path = paths[drawobj_ix];
sh_row_width[local_id.x] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
sh_offset[local_id.x] = path.tiles;
}
sh_row_count[local_id.x] = row_count;
// Prefix sum of row counts
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
workgroupBarrier();
if local_id.x >= (1u << i) {
row_count += sh_row_count[local_id.x - (1u << i)];
}
workgroupBarrier();
sh_row_count[local_id.x] = row_count;
}
workgroupBarrier();
let total_rows = sh_row_count[WG_SIZE - 1u];
for (var row = local_id.x; row < total_rows; row += WG_SIZE) {
var el_ix = 0u;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
let probe = el_ix + ((WG_SIZE / 2u) >> i);
if row >= sh_row_count[probe - 1u] {
el_ix = probe;
}
}
let width = sh_row_width[el_ix];
if width > 0u {
var seq_ix = row - select(0u, sh_row_count[el_ix - 1u], el_ix > 0u);
var tile_ix = sh_offset[el_ix] + seq_ix * width;
var sum = tiles[tile_ix].backdrop;
for (var x = 1u; x < width; x += 1u) {
tile_ix += 1u;
sum += tiles[tile_ix].backdrop;
tiles[tile_ix].backdrop = sum;
}
}
}
}

View file

@ -0,0 +1,45 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
#import config
@group(0) @binding(0)
var<storage> config: Config;
struct PathBbox {
x0: i32,
y0: i32,
x1: i32,
y1: i32,
linewidth: f32,
trans_ix: u32,
}
@group(0) @binding(1)
var<storage, read_write> path_bboxes: array<PathBbox>;
@compute @workgroup_size(256)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
) {
let ix = global_id.x;
if ix < config.n_path {
path_bboxes[ix].x0 = 0x7fffffff;
path_bboxes[ix].y0 = 0x7fffffff;
path_bboxes[ix].x1 = -0x80000000;
path_bboxes[ix].y1 = -0x80000000;
}
}

View file

@ -0,0 +1,164 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
// The binning stage
#import config
#import drawtag
#import bbox
#import bump
@group(0) @binding(0)
var<storage> config: Config;
@group(0) @binding(1)
var<storage> draw_monoids: array<DrawMonoid>;
@group(0) @binding(2)
var<storage> path_bbox_buf: array<PathBbox>;
@group(0) @binding(3)
var<storage> clip_bbox_buf: array<vec4<f32>>;
@group(0) @binding(4)
var<storage, read_write> intersected_bbox: array<vec4<f32>>;
@group(0) @binding(5)
var<storage, read_write> bump: BumpAllocators;
@group(0) @binding(6)
var<storage, read_write> bin_data: array<u32>;
// TODO: put in common place
struct BinHeader {
element_count: u32,
chunk_offset: u32,
}
@group(0) @binding(7)
var<storage, read_write> bin_header: array<BinHeader>;
// conversion factors from coordinates to bin
let SX = 0.00390625;
let SY = 0.00390625;
//let SX = 1.0 / f32(N_TILE_X * TILE_WIDTH);
//let SY = 1.0 / f32(N_TILE_Y * TILE_HEIGHT);
let WG_SIZE = 256u;
let N_SLICE = 8u;
//let N_SLICE = WG_SIZE / 32u;
var<workgroup> sh_bitmaps: array<array<atomic<u32>, N_TILE>, N_SLICE>;
var<workgroup> sh_count: array<array<u32, N_TILE>, N_SLICE>;
var<workgroup> sh_chunk_offset: array<u32, N_TILE>;
@compute @workgroup_size(256)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
@builtin(workgroup_id) wg_id: vec3<u32>,
) {
for (var i = 0u; i < N_SLICE; i += 1u) {
atomicStore(&sh_bitmaps[i][local_id.x], 0u);
}
// Read inputs and determine coverage of bins
let element_ix = global_id.x;
var x0 = 0;
var y0 = 0;
var x1 = 0;
var y1 = 0;
if element_ix < config.n_drawobj {
let draw_monoid = draw_monoids[element_ix];
var clip_bbox = vec4<f32>(-1e9, -1e9, 1e9, 1e9);
if draw_monoid.clip_ix > 0u {
clip_bbox = clip_bbox_buf[draw_monoid.clip_ix - 1u];
}
// For clip elements, clip_box is the bbox of the clip path,
// intersected with enclosing clips.
// For other elements, it is the bbox of the enclosing clips.
// TODO check this is true
let path_bbox = path_bbox_buf[draw_monoid.path_ix];
let pb = vec4<f32>(vec4<i32>(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1));
let bbox_raw = bbox_intersect(clip_bbox, pb);
// TODO(naga): clunky expression a workaround for broken lhs swizzle
let bbox = vec4<f32>(bbox_raw.xy, max(bbox_raw.xy, bbox_raw.zw));
intersected_bbox[element_ix] = bbox;
x0 = i32(floor(bbox.x * SX));
y0 = i32(floor(bbox.y * SY));
x1 = i32(ceil(bbox.z * SX));
y1 = i32(ceil(bbox.w * SY));
}
let width_in_bins = i32((config.width_in_tiles + N_TILE_X - 1u) / N_TILE_X);
let height_in_bins = i32((config.height_in_tiles + N_TILE_Y - 1u) / N_TILE_Y);
x0 = clamp(x0, 0, width_in_bins);
y0 = clamp(y0, 0, height_in_bins);
x1 = clamp(x1, 0, width_in_bins);
y1 = clamp(y1, 0, height_in_bins);
if x0 == x1 {
y1 = y0;
}
var x = x0;
var y = y0;
let my_slice = local_id.x / 32u;
let my_mask = 1u << (local_id.x & 31u);
while y < y1 {
atomicOr(&sh_bitmaps[my_slice][y * width_in_bins + x], my_mask);
x += 1;
if x == x1 {
x = x0;
y += 1;
}
}
workgroupBarrier();
// Allocate output segments
var element_count = 0u;
for (var i = 0u; i < N_SLICE; i += 1u) {
element_count += countOneBits(atomicLoad(&sh_bitmaps[i][local_id.x]));
sh_count[i][local_id.x] = element_count;
}
// element_count is the number of draw objects covering this thread's bin
let chunk_offset = atomicAdd(&bump.binning, element_count);
sh_chunk_offset[local_id.x] = chunk_offset;
bin_header[global_id.x].element_count = element_count;
bin_header[global_id.x].chunk_offset = chunk_offset;
workgroupBarrier();
// loop over bbox of bins touched by this draw object
x = x0;
y = y0;
while y < y1 {
let bin_ix = y * width_in_bins + x;
let out_mask = atomicLoad(&sh_bitmaps[my_slice][bin_ix]);
// I think this predicate will always be true...
if (out_mask & my_mask) != 0u {
var idx = countOneBits(out_mask & (my_mask - 1u));
if my_slice > 0u {
idx += sh_count[my_slice - 1u][bin_ix];
}
let offset = sh_chunk_offset[bin_ix];
bin_data[offset + idx] = element_ix;
}
x += 1;
if x == x1 {
x = x0;
y += 1;
}
}
}

View file

@ -0,0 +1,199 @@
#import config
#import bbox
#import clip
#import drawtag
@group(0) @binding(0)
var<storage> config: Config;
@group(0) @binding(1)
var<storage> clip_inp: array<i32>;
@group(0) @binding(2)
var<storage> path_bboxes: array<PathBbox>;
@group(0) @binding(3)
var<storage> reduced: array<Bic>;
@group(0) @binding(4)
var<storage> clip_els: array<ClipEl>;
@group(0) @binding(5)
var<storage, read_write> draw_monoids: array<DrawMonoid>;
@group(0) @binding(6)
var<storage, read_write> clip_bboxes: array<vec4<f32>>;
let WG_SIZE = 256u;
var<workgroup> sh_bic: array<Bic, 510 >;
var<workgroup> sh_stack: array<u32, WG_SIZE>;
var<workgroup> sh_stack_bbox: array<vec4<f32>, WG_SIZE>;
var<workgroup> sh_bbox: array<vec4<f32>, WG_SIZE>;
var<workgroup> sh_link: array<i32, WG_SIZE>;
fn search_link(bic: ptr<function, Bic>, ix: u32) -> i32 {
var ix = ix;
var j = 0u;
while j < firstTrailingBit(WG_SIZE) {
let base = 2u * WG_SIZE - (2u << (firstTrailingBit(WG_SIZE) - j));
if ((ix >> j) & 1u) != 0u {
let test = bic_combine(sh_bic[base + (ix >> j) - 1u], *bic);
if test.b > 0u {
break;
}
*bic = test;
ix -= 1u << j;
}
j += 1u;
}
if ix > 0u {
while j > 0u {
j -= 1u;
let base = 2u * WG_SIZE - (2u << (firstTrailingBit(WG_SIZE) - j));
let test = bic_combine(sh_bic[base + (ix >> j) - 1u], *bic);
if test.b == 0u {
*bic = test;
ix -= 1u << j;
}
}
}
if ix > 0u {
return i32(ix) - 1;
} else {
return i32(~0u - (*bic).a);
}
}
fn load_clip_inp(ix: u32) -> i32 {
if ix < config.n_clip {
return clip_inp[ix];
} else {
return -2147483648;
// literal too large?
// return 0x80000000;
}
}
@compute @workgroup_size(256)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
@builtin(workgroup_id) wg_id: vec3<u32>,
) {
var bic: Bic;
if local_id.x < wg_id.x {
bic = reduced[local_id.x];
}
sh_bic[local_id.x] = bic;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
workgroupBarrier();
if local_id.x + (1u << i) < WG_SIZE {
let other = sh_bic[local_id.x + (1u << i)];
bic = bic_combine(bic, other);
}
workgroupBarrier();
sh_bic[local_id.x] = bic;
}
workgroupBarrier();
let stack_size = sh_bic[0].b;
// TODO: if stack depth > WG_SIZE desired, scan here
// binary search in stack
let sp = WG_SIZE - 1u - local_id.x;
var ix = 0u;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
let probe = ix + ((WG_SIZE / 2u) >> i);
if sp < sh_bic[probe].b {
ix = probe;
}
}
let b = sh_bic[ix].b;
var bbox = vec4<f32>(-1e9, -1e9, 1e9, 1e9);
if sp < b {
let el = clip_els[ix * WG_SIZE + b - sp - 1u];
sh_stack[local_id.x] = el.parent_ix;
bbox = el.bbox;
}
// forward scan of bbox values of prefix stack
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
sh_stack_bbox[local_id.x] = bbox;
workgroupBarrier();
if local_id.x >= (1u << i) {
bbox = bbox_intersect(sh_stack_bbox[local_id.x - (1u << i)], bbox);
}
workgroupBarrier();
}
sh_stack_bbox[local_id.x] = bbox;
// Read input and compute Bic binary tree
let inp = load_clip_inp(global_id.x);
let is_push = inp >= 0;
var bic = Bic(1u - u32(is_push), u32(is_push));
sh_bic[local_id.x] = bic;
if is_push {
let path_bbox = path_bboxes[inp];
bbox = vec4<f32>(f32(path_bbox.x0), f32(path_bbox.y0), f32(path_bbox.x1), f32(path_bbox.y1));
} else {
bbox = vec4<f32>(-1e9, -1e9, 1e9, 1e9);
}
var inbase = 0u;
for (var i = 0u; i < firstTrailingBit(WG_SIZE) - 1u; i += 1u) {
let outbase = 2u * WG_SIZE - (1u << (firstTrailingBit(WG_SIZE) - i));
workgroupBarrier();
if local_id.x < 1u << (firstTrailingBit(WG_SIZE) - 1u - i) {
let in_off = inbase + local_id.x * 2u;
sh_bic[outbase + local_id.x] = bic_combine(sh_bic[in_off], sh_bic[in_off + 1u]);
}
inbase = outbase;
}
workgroupBarrier();
// search for predecessor node
bic = Bic();
var link = search_link(&bic, local_id.x);
sh_link[local_id.x] = link;
workgroupBarrier();
let grandparent = select(link - 1, sh_link[link], link >= 0);
var parent: i32;
if link >= 0 {
parent = i32(wg_id.x * WG_SIZE) + link;
} else if link + i32(stack_size) >= 0 {
parent = i32(sh_stack[i32(WG_SIZE) + link]);
} else {
parent = -1;
}
// bbox scan (intersect) across parent links
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
if i != 0u {
sh_link[local_id.x] = link;
}
sh_bbox[local_id.x] = bbox;
workgroupBarrier();
if link >= 0 {
bbox = bbox_intersect(sh_bbox[link], bbox);
link = sh_link[link];
}
workgroupBarrier();
}
if link + i32(stack_size) >= 0 {
bbox = bbox_intersect(sh_stack_bbox[i32(WG_SIZE) + link], bbox);
}
// At this point, bbox is the intersection of bboxes on the path to the root
sh_bbox[local_id.x] = bbox;
workgroupBarrier();
if !is_push && global_id.x < config.n_clip {
// Fix up drawmonoid so path_ix of EndClip matches BeginClip
let path_ix = clip_inp[parent];
draw_monoids[~inp].path_ix = u32(path_ix);
if grandparent >= 0 {
bbox = sh_bbox[grandparent];
} else if grandparent + i32(stack_size) >= 0 {
bbox = sh_stack_bbox[i32(WG_SIZE) + grandparent];
} else {
bbox = vec4<f32>(-1e9, -1e9, 1e9, 1e9);
}
}
clip_bboxes[global_id.x] = bbox;
}

View file

@ -0,0 +1,66 @@
#import config
#import bbox
#import clip
@group(0) @binding(0)
var<storage> config: Config;
@group(0) @binding(1)
var<storage> clip_inp: array<i32>;
@group(0) @binding(2)
var<storage> path_bboxes: array<PathBbox>;
@group(0) @binding(3)
var<storage, read_write> reduced: array<Bic>;
@group(0) @binding(4)
var<storage, read_write> clip_out: array<ClipEl>;
let WG_SIZE = 256u;
var<workgroup> sh_bic: array<Bic, WG_SIZE>;
var<workgroup> sh_parent: array<u32, WG_SIZE>;
var<workgroup> sh_path_ix: array<u32, WG_SIZE>;
@compute @workgroup_size(256)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
@builtin(workgroup_id) wg_id: vec3<u32>,
) {
let inp = clip_inp[global_id.x];
let is_push = inp >= 0;
var bic = Bic(1u - u32(is_push), u32(is_push));
// reverse scan of bicyclic semigroup
sh_bic[local_id.x] = bic;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
workgroupBarrier();
if local_id.x + (1u << i) < WG_SIZE {
let other = sh_bic[local_id.x + (1u << i)];
bic = bic_combine(bic, other);
}
workgroupBarrier();
sh_bic[local_id.x] = bic;
}
if local_id.x == 0u {
reduced[wg_id.x] = bic;
}
workgroupBarrier();
let size = sh_bic[0].b;
bic = Bic();
if is_push && bic.a == 0u {
let local_ix = size - bic.b - 1u;
sh_parent[local_ix] = local_id.x;
sh_path_ix[local_ix] = u32(inp);
}
workgroupBarrier();
// TODO: possibly do forward scan here if depth can exceed wg size
if local_id.x < size {
let path_ix = sh_path_ix[local_id.x];
let path_bbox = path_bboxes[path_ix];
let parent_ix = sh_parent[local_id.x] + wg_id.x * WG_SIZE;
let bbox = vec4<f32>(f32(path_bbox.x0), f32(path_bbox.y0), f32(path_bbox.x1), f32(path_bbox.y1));
clip_out[global_id.x] = ClipEl(parent_ix, bbox);
}
}

View file

@ -0,0 +1,448 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
// The coarse rasterization stage.
#import config
#import bump
#import drawtag
#import ptcl
#import tile
@group(0) @binding(0)
var<storage> config: Config;
@group(0) @binding(1)
var<storage> scene: array<u32>;
@group(0) @binding(2)
var<storage> draw_monoids: array<DrawMonoid>;
// TODO: dedup
struct BinHeader {
element_count: u32,
chunk_offset: u32,
}
@group(0) @binding(3)
var<storage> bin_headers: array<BinHeader>;
@group(0) @binding(4)
var<storage> bin_data: array<u32>;
@group(0) @binding(5)
var<storage> paths: array<Path>;
@group(0) @binding(6)
var<storage> tiles: array<Tile>;
@group(0) @binding(7)
var<storage> info: array<u32>;
@group(0) @binding(8)
var<storage, read_write> bump: BumpAllocators;
@group(0) @binding(9)
var<storage, read_write> ptcl: array<u32>;
// Much of this code assumes WG_SIZE == N_TILE. If these diverge, then
// a fair amount of fixup is needed.
let WG_SIZE = 256u;
//let N_SLICE = WG_SIZE / 32u;
let N_SLICE = 8u;
var<workgroup> sh_bitmaps: array<array<atomic<u32>, N_TILE>, N_SLICE>;
var<workgroup> sh_part_count: array<u32, WG_SIZE>;
var<workgroup> sh_part_offsets: array<u32, WG_SIZE>;
var<workgroup> sh_drawobj_ix: array<u32, WG_SIZE>;
var<workgroup> sh_tile_stride: array<u32, WG_SIZE>;
var<workgroup> sh_tile_width: array<u32, WG_SIZE>;
var<workgroup> sh_tile_x0: array<u32, WG_SIZE>;
var<workgroup> sh_tile_y0: array<u32, WG_SIZE>;
var<workgroup> sh_tile_count: array<u32, WG_SIZE>;
var<workgroup> sh_tile_base: array<u32, WG_SIZE>;
// helper functions for writing ptcl
var<private> cmd_offset: u32;
var<private> cmd_limit: u32;
// Make sure there is space for a command of given size, plus a jump if needed
fn alloc_cmd(size: u32) {
if cmd_offset + size >= cmd_limit {
// We might be able to save a little bit of computation here
// by setting the initial value of the bump allocator.
let ptcl_dyn_start = config.width_in_tiles * config.height_in_tiles * PTCL_INITIAL_ALLOC;
let new_cmd = ptcl_dyn_start + atomicAdd(&bump.ptcl, PTCL_INCREMENT);
// TODO: robust memory
ptcl[cmd_offset] = CMD_JUMP;
ptcl[cmd_offset + 1u] = new_cmd;
cmd_offset = new_cmd;
cmd_limit = cmd_offset + (PTCL_INCREMENT - PTCL_HEADROOM);
}
}
fn write_path(tile: Tile, linewidth: f32) {
// TODO: take flags
alloc_cmd(3u);
if linewidth < 0.0 {
if tile.segments != 0u {
let fill = CmdFill(tile.segments, tile.backdrop);
ptcl[cmd_offset] = CMD_FILL;
ptcl[cmd_offset + 1u] = fill.tile;
ptcl[cmd_offset + 2u] = u32(fill.backdrop);
cmd_offset += 3u;
} else {
ptcl[cmd_offset] = CMD_SOLID;
cmd_offset += 1u;
}
} else {
let stroke = CmdStroke(tile.segments, 0.5 * linewidth);
ptcl[cmd_offset] = CMD_STROKE;
ptcl[cmd_offset + 1u] = stroke.tile;
ptcl[cmd_offset + 2u] = bitcast<u32>(stroke.half_width);
cmd_offset += 3u;
}
}
fn write_color(color: CmdColor) {
alloc_cmd(2u);
ptcl[cmd_offset] = CMD_COLOR;
ptcl[cmd_offset + 1u] = color.rgba_color;
cmd_offset += 2u;
}
// Discussion point: these are basically copying from info to ptcl. We
// could just write an info offset and have fine bind that buffer and read
// from it.
fn write_lin_grad(lin: CmdLinGrad) {
alloc_cmd(5u);
ptcl[cmd_offset] = CMD_LIN_GRAD;
ptcl[cmd_offset + 1u] = lin.index;
ptcl[cmd_offset + 2u] = bitcast<u32>(lin.line_x);
ptcl[cmd_offset + 3u] = bitcast<u32>(lin.line_y);
ptcl[cmd_offset + 4u] = bitcast<u32>(lin.line_c);
cmd_offset += 5u;
}
fn write_rad_grad(rad: CmdRadGrad) {
alloc_cmd(12u);
ptcl[cmd_offset] = CMD_RAD_GRAD;
ptcl[cmd_offset + 1u] = rad.index;
ptcl[cmd_offset + 2u] = bitcast<u32>(rad.matrx.x);
ptcl[cmd_offset + 3u] = bitcast<u32>(rad.matrx.y);
ptcl[cmd_offset + 4u] = bitcast<u32>(rad.matrx.z);
ptcl[cmd_offset + 5u] = bitcast<u32>(rad.matrx.w);
ptcl[cmd_offset + 6u] = bitcast<u32>(rad.xlat.x);
ptcl[cmd_offset + 7u] = bitcast<u32>(rad.xlat.y);
ptcl[cmd_offset + 8u] = bitcast<u32>(rad.c1.x);
ptcl[cmd_offset + 9u] = bitcast<u32>(rad.c1.y);
ptcl[cmd_offset + 10u] = bitcast<u32>(rad.ra);
ptcl[cmd_offset + 11u] = bitcast<u32>(rad.roff);
cmd_offset += 12u;
}
fn write_begin_clip() {
alloc_cmd(1u);
ptcl[cmd_offset] = CMD_BEGIN_CLIP;
cmd_offset += 1u;
}
fn write_end_clip(blend: u32) {
alloc_cmd(2u);
ptcl[cmd_offset] = CMD_END_CLIP;
ptcl[cmd_offset + 1u] = blend;
cmd_offset += 2u;
}
@compute @workgroup_size(256)
fn main(
@builtin(local_invocation_id) local_id: vec3<u32>,
@builtin(workgroup_id) wg_id: vec3<u32>,
) {
let width_in_bins = (config.width_in_tiles + N_TILE_X - 1u) / N_TILE_X;
let bin_ix = width_in_bins * wg_id.y + wg_id.x;
let n_partitions = (config.n_drawobj + N_TILE - 1u) / N_TILE;
// Coordinates of the top left of this bin, in tiles.
let bin_tile_x = N_TILE_X * wg_id.x;
let bin_tile_y = N_TILE_Y * wg_id.y;
let tile_x = local_id.x % N_TILE_X;
let tile_y = local_id.x / N_TILE_X;
let this_tile_ix = (bin_tile_y + tile_y) * config.width_in_tiles + bin_tile_x + tile_x;
cmd_offset = this_tile_ix * PTCL_INITIAL_ALLOC;
cmd_limit = cmd_offset + (PTCL_INITIAL_ALLOC - PTCL_HEADROOM);
// clip state
var clip_zero_depth = 0u;
var clip_depth = 0u;
var partition_ix = 0u;
var rd_ix = 0u;
var wr_ix = 0u;
var part_start_ix = 0u;
var ready_ix = 0u;
// blend state
var render_blend_depth = 0u;
var max_blend_depth = 0u;
while true {
for (var i = 0u; i < N_SLICE; i += 1u) {
atomicStore(&sh_bitmaps[i][local_id.x], 0u);
}
while true {
if ready_ix == wr_ix && partition_ix < n_partitions {
part_start_ix = ready_ix;
var count = 0u;
if partition_ix + local_id.x < n_partitions {
let in_ix = (partition_ix + local_id.x) * N_TILE + bin_ix;
let bin_header = bin_headers[in_ix];
count = bin_header.element_count;
sh_part_offsets[local_id.x] = bin_header.chunk_offset;
}
// prefix sum the element counts
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
sh_part_count[local_id.x] = count;
workgroupBarrier();
if local_id.x >= (1u << i) {
count += sh_part_count[local_id.x - (1u << i)];
}
workgroupBarrier();
}
sh_part_count[local_id.x] = part_start_ix + count;
workgroupBarrier();
ready_ix = sh_part_count[WG_SIZE - 1u];
partition_ix += WG_SIZE;
}
// use binary search to find draw object to read
var ix = rd_ix + local_id.x;
if ix >= wr_ix && ix < ready_ix {
var part_ix = 0u;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
let probe = part_ix + ((N_TILE / 2u) >> i);
if ix >= sh_part_count[probe - 1u] {
part_ix = probe;
}
}
ix -= select(part_start_ix, sh_part_count[part_ix - 1u], part_ix > 0u);
let offset = sh_part_offsets[part_ix];
sh_drawobj_ix[local_id.x] = bin_data[offset + ix];
}
wr_ix = min(rd_ix + N_TILE, ready_ix);
if wr_ix - rd_ix >= N_TILE || (wr_ix >= ready_ix && partition_ix >= n_partitions) {
break;
}
}
// At this point, sh_drawobj_ix[0.. wr_ix - rd_ix] contains merged binning results.
var tag = DRAWTAG_NOP;
var drawobj_ix: u32;
if local_id.x + rd_ix < wr_ix {
drawobj_ix = sh_drawobj_ix[local_id.x];
tag = scene[config.drawtag_base + drawobj_ix];
}
var tile_count = 0u;
// I think this predicate is the same as the last, maybe they can be combined
if tag != DRAWTAG_NOP {
let path_ix = draw_monoids[drawobj_ix].path_ix;
let path = paths[path_ix];
let stride = path.bbox.z - path.bbox.x;
sh_tile_stride[local_id.x] = stride;
let dx = i32(path.bbox.x) - i32(bin_tile_x);
let dy = i32(path.bbox.y) - i32(bin_tile_y);
let x0 = clamp(dx, 0, i32(N_TILE_X));
let y0 = clamp(dy, 0, i32(N_TILE_Y));
let x1 = clamp(i32(path.bbox.z) - i32(bin_tile_x), 0, i32(N_TILE_X));
let y1 = clamp(i32(path.bbox.w) - i32(bin_tile_y), 0, i32(N_TILE_Y));
sh_tile_width[local_id.x] = u32(x1 - x0);
sh_tile_x0[local_id.x] = u32(x0);
sh_tile_y0[local_id.x] = u32(y0);
tile_count = u32(x1 - x0) * u32(y1 - y0);
// base relative to bin
let base = path.tiles - u32(dy * i32(stride) + dx);
sh_tile_base[local_id.x] = base;
// TODO: there's a write_tile_alloc here in the source, not sure what it's supposed to do
}
// Prefix sum of tile counts
sh_tile_count[local_id.x] = tile_count;
for (var i = 0u; i < firstTrailingBit(N_TILE); i += 1u) {
workgroupBarrier();
if local_id.x >= (1u << i) {
tile_count += sh_tile_count[local_id.x - (1u << i)];
}
workgroupBarrier();
sh_tile_count[local_id.x] = tile_count;
}
workgroupBarrier();
let total_tile_count = sh_tile_count[N_TILE - 1u];
// Parallel iteration over all tiles
for (var ix = local_id.x; ix < total_tile_count; ix += N_TILE) {
// Binary search to find draw object which contains this tile
var el_ix = 0u;
for (var i = 0u; i < firstTrailingBit(N_TILE); i += 1u) {
let probe = el_ix + ((N_TILE / 2u) >> i);
if ix >= sh_tile_count[probe - 1u] {
el_ix = probe;
}
}
drawobj_ix = sh_drawobj_ix[el_ix];
tag = scene[config.drawtag_base + drawobj_ix];
// TODO: clip logic
let seq_ix = ix - select(0u, sh_tile_count[el_ix - 1u], el_ix > 0u);
let width = sh_tile_width[el_ix];
let x = sh_tile_x0[el_ix] + seq_ix % width;
let y = sh_tile_y0[el_ix] + seq_ix / width;
let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * y + x;
let tile = tiles[tile_ix];
let is_clip = (tag & 1u) != 0u;
var is_blend = false;
if is_clip {
let BLEND_CLIP = (128u << 8u) | 3u;
let scene_offset = draw_monoids[drawobj_ix].scene_offset;
let dd = config.drawdata_base + scene_offset;
let blend = scene[dd];
is_blend = blend != BLEND_CLIP;
}
let include_tile = tile.segments != 0u || (tile.backdrop == 0) == is_clip || is_blend;
if include_tile {
let el_slice = el_ix / 32u;
let el_mask = 1u << (el_ix & 31u);
atomicOr(&sh_bitmaps[el_slice][y * N_TILE_X + x], el_mask);
}
}
workgroupBarrier();
// At this point bit drawobj % 32 is set in sh_bitmaps[drawobj / 32][y * N_TILE_X + x]
// if drawobj touches tile (x, y).
// Write per-tile command list for this tile
var slice_ix = 0u;
var bitmap = atomicLoad(&sh_bitmaps[0u][local_id.x]);
while true {
if bitmap == 0u {
slice_ix += 1u;
// potential optimization: make iteration limit dynamic
if slice_ix == N_SLICE {
break;
}
bitmap = atomicLoad(&sh_bitmaps[slice_ix][local_id.x]);
if bitmap == 0u {
continue;
}
}
let el_ix = slice_ix * 32u + firstTrailingBit(bitmap);
drawobj_ix = sh_drawobj_ix[el_ix];
// clear LSB of bitmap, using bit magic
bitmap &= bitmap - 1u;
let drawtag = scene[config.drawtag_base + drawobj_ix];
let dm = draw_monoids[drawobj_ix];
let dd = config.drawdata_base + dm.scene_offset;
let di = dm.info_offset;
if clip_zero_depth == 0u {
let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * tile_y + tile_x;
let tile = tiles[tile_ix];
switch drawtag {
// DRAWTAG_FILL_COLOR
case 0x44u: {
let linewidth = bitcast<f32>(info[di]);
write_path(tile, linewidth);
let rgba_color = scene[dd];
write_color(CmdColor(rgba_color));
}
// DRAWTAG_FILL_LIN_GRADIENT
case 0x114u: {
let linewidth = bitcast<f32>(info[di]);
write_path(tile, linewidth);
var lin: CmdLinGrad;
lin.index = scene[dd];
lin.line_x = bitcast<f32>(info[di + 1u]);
lin.line_y = bitcast<f32>(info[di + 2u]);
lin.line_c = bitcast<f32>(info[di + 3u]);
write_lin_grad(lin);
}
// DRAWTAG_FILL_RAD_GRADIENT
case 0x2dcu: {
let linewidth = bitcast<f32>(info[di]);
write_path(tile, linewidth);
var rad: CmdRadGrad;
rad.index = scene[dd];
let m0 = bitcast<f32>(info[di + 1u]);
let m1 = bitcast<f32>(info[di + 2u]);
let m2 = bitcast<f32>(info[di + 3u]);
let m3 = bitcast<f32>(info[di + 4u]);
rad.matrx = vec4<f32>(m0, m1, m2, m3);
rad.xlat = vec2<f32>(bitcast<f32>(info[di + 5u]), bitcast<f32>(info[di + 6u]));
rad.c1 = vec2<f32>(bitcast<f32>(info[di + 7u]), bitcast<f32>(info[di + 8u]));
rad.ra = bitcast<f32>(info[di + 9u]);
rad.roff = bitcast<f32>(info[di + 10u]);
write_rad_grad(rad);
}
// DRAWTAG_BEGIN_CLIP
case 0x05u: {
if tile.segments == 0u && tile.backdrop == 0 {
clip_zero_depth = clip_depth + 1u;
} else {
write_begin_clip();
render_blend_depth += 1u;
max_blend_depth = max(max_blend_depth, render_blend_depth);
}
clip_depth += 1u;
}
// DRAWTAG_END_CLIP
case 0x25u: {
clip_depth -= 1u;
write_path(tile, -1.0);
write_end_clip(scene[dd]);
render_blend_depth -= 1u;
}
default: {}
}
} else {
// In "clip zero" state, suppress all drawing
switch drawtag {
// DRAWTAG_BEGIN_CLIP
case 0x05u: {
clip_depth += 1u;
}
// DRAWTAG_END_CLIP
case 0x25u: {
if clip_depth == clip_zero_depth {
clip_zero_depth = 0u;
}
clip_depth -= 1u;
}
default: {}
}
}
}
rd_ix += N_TILE;
if rd_ix >= ready_ix && partition_ix >= n_partitions {
break;
}
workgroupBarrier();
}
if bin_tile_x < config.width_in_tiles && bin_tile_y < config.height_in_tiles {
//ptcl[cmd_offset] = CMD_END;
// TODO: blend stack allocation
}
}

View file

@ -0,0 +1,197 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
// Finish prefix sum of drawtags, decode draw objects.
#import config
#import clip
#import drawtag
#import bbox
@group(0) @binding(0)
var<storage> config: Config;
@group(0) @binding(1)
var<storage> scene: array<u32>;
@group(0) @binding(2)
var<storage> reduced: array<DrawMonoid>;
@group(0) @binding(3)
var<storage> path_bbox: array<PathBbox>;
@group(0) @binding(4)
var<storage, read_write> draw_monoid: array<DrawMonoid>;
@group(0) @binding(5)
var<storage, read_write> info: array<u32>;
@group(0) @binding(6)
var<storage, read_write> clip_inp: array<i32>;
let WG_SIZE = 256u;
// Possibly dedup?
struct Transform {
matrx: vec4<f32>,
translate: vec2<f32>,
}
fn read_transform(transform_base: u32, ix: u32) -> Transform {
let base = transform_base + ix * 6u;
let c0 = bitcast<f32>(scene[base]);
let c1 = bitcast<f32>(scene[base + 1u]);
let c2 = bitcast<f32>(scene[base + 2u]);
let c3 = bitcast<f32>(scene[base + 3u]);
let c4 = bitcast<f32>(scene[base + 4u]);
let c5 = bitcast<f32>(scene[base + 5u]);
let matrx = vec4<f32>(c0, c1, c2, c3);
let translate = vec2<f32>(c4, c5);
return Transform(matrx, translate);
}
var<workgroup> sh_scratch: array<DrawMonoid, WG_SIZE>;
@compute @workgroup_size(256)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
@builtin(workgroup_id) wg_id: vec3<u32>,
) {
let ix = global_id.x;
// Reduce prefix of workgroups up to this one
var agg = draw_monoid_identity();
if local_id.x < wg_id.x {
agg = reduced[local_id.x];
}
sh_scratch[local_id.x] = agg;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
workgroupBarrier();
if local_id.x + (1u << i) < WG_SIZE {
let other = sh_scratch[local_id.x + (1u << i)];
agg = combine_draw_monoid(agg, other);
}
workgroupBarrier();
sh_scratch[local_id.x] = agg;
}
// Two barriers can be eliminated if we use separate shared arrays
// for prefix and intra-workgroup prefix sum.
workgroupBarrier();
var m = sh_scratch[0];
workgroupBarrier();
let tag_word = scene[config.drawtag_base + ix];
agg = map_draw_tag(tag_word);
sh_scratch[local_id.x] = agg;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
workgroupBarrier();
if local_id.x >= 1u << i {
let other = sh_scratch[local_id.x - (1u << i)];
agg = combine_draw_monoid(agg, other);
}
workgroupBarrier();
sh_scratch[local_id.x] = agg;
}
workgroupBarrier();
if local_id.x > 0u {
m = combine_draw_monoid(m, sh_scratch[local_id.x - 1u]);
}
// m now contains exclusive prefix sum of draw monoid
draw_monoid[ix] = m;
let dd = config.drawdata_base + m.scene_offset;
let di = m.info_offset;
if tag_word == DRAWTAG_FILL_COLOR || tag_word == DRAWTAG_FILL_LIN_GRADIENT ||
tag_word == DRAWTAG_FILL_RAD_GRADIENT || tag_word == DRAWTAG_FILL_IMAGE ||
tag_word == DRAWTAG_BEGIN_CLIP
{
let bbox = path_bbox[m.path_ix];
// TODO: bbox is mostly yagni here, sort that out. Maybe clips?
// let x0 = f32(bbox.x0);
// let y0 = f32(bbox.y0);
// let x1 = f32(bbox.x1);
// let y1 = f32(bbox.y1);
// let bbox_f = vec4(x0, y0, x1, y1);
let fill_mode = u32(bbox.linewidth >= 0.0);
var matrx: vec4<f32>;
var translate: vec2<f32>;
var linewidth = bbox.linewidth;
if linewidth >= 0.0 || tag_word == DRAWTAG_FILL_LIN_GRADIENT || tag_word == DRAWTAG_FILL_RAD_GRADIENT {
let transform = read_transform(config.transform_base, bbox.trans_ix);
matrx = transform.matrx;
translate = transform.translate;
}
if linewidth >= 0.0 {
// Note: doesn't deal with anisotropic case
linewidth *= sqrt(abs(matrx.x * matrx.w - matrx.y * matrx.z));
}
switch tag_word {
// DRAWTAG_FILL_COLOR, DRAWTAG_FILL_IMAGE
case 0x44u, 0x48u: {
info[di] = bitcast<u32>(linewidth);
}
// DRAWTAG_FILL_LIN_GRADIENT
case 0x114u: {
info[di] = bitcast<u32>(linewidth);
var p0 = bitcast<vec2<f32>>(vec2(scene[dd + 1u], scene[dd + 2u]));
var p1 = bitcast<vec2<f32>>(vec2(scene[dd + 3u], scene[dd + 4u]));
p0 = matrx.xy * p0.x + matrx.zw * p0.y + translate;
p1 = matrx.xy * p1.x + matrx.zw * p1.y + translate;
let dxy = p1 - p0;
let scale = 1.0 / dot(dxy, dxy);
let line_xy = dxy * scale;
let line_c = -dot(p0, line_xy);
info[di + 1u] = bitcast<u32>(line_xy.x);
info[di + 2u] = bitcast<u32>(line_xy.y);
info[di + 3u] = bitcast<u32>(line_c);
}
// DRAWTAG_FILL_RAD_GRADIENT
case 0x2dcu: {
info[di] = bitcast<u32>(linewidth);
var p0 = bitcast<vec2<f32>>(vec2(scene[dd + 1u], scene[dd + 2u]));
var p1 = bitcast<vec2<f32>>(vec2(scene[dd + 3u], scene[dd + 4u]));
let r0 = bitcast<f32>(scene[dd + 5u]);
let r1 = bitcast<f32>(scene[dd + 6u]);
let inv_det = 1.0 / (matrx.x * matrx.w - matrx.y * matrx.z);
let inv_mat = inv_det * vec4<f32>(matrx.w, -matrx.y, -matrx.z, matrx.x);
var inv_tr = inv_mat.xz * translate.x + inv_mat.yw * translate.y;
inv_tr += p0;
let center1 = p1 - p0;
let rr = r1 / (r1 - r0);
let ra_inv = rr / (r1 * r1 - dot(center1, center1));
let c1 = center1 * ra_inv;
let ra = rr * ra_inv;
let roff = rr - 1.0;
info[di + 1u] = bitcast<u32>(inv_mat.x);
info[di + 2u] = bitcast<u32>(inv_mat.y);
info[di + 3u] = bitcast<u32>(inv_mat.z);
info[di + 4u] = bitcast<u32>(inv_mat.w);
info[di + 5u] = bitcast<u32>(inv_tr.x);
info[di + 6u] = bitcast<u32>(inv_tr.y);
info[di + 7u] = bitcast<u32>(c1.x);
info[di + 8u] = bitcast<u32>(c1.y);
info[di + 9u] = bitcast<u32>(ra);
info[di + 10u] = bitcast<u32>(roff);
}
default: {}
}
}
if tag_word == DRAWTAG_BEGIN_CLIP || tag_word == DRAWTAG_END_CLIP {
var path_ix = ~ix;
if tag_word == DRAWTAG_BEGIN_CLIP {
path_ix = m.path_ix;
}
clip_inp[m.clip_ix] = i32(path_ix);
}
}

View file

@ -0,0 +1,54 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
#import config
#import drawtag
@group(0) @binding(0)
var<storage> config: Config;
@group(0) @binding(1)
var<storage> scene: array<u32>;
@group(0) @binding(2)
var<storage, read_write> reduced: array<DrawMonoid>;
let WG_SIZE = 256u;
var<workgroup> sh_scratch: array<DrawMonoid, WG_SIZE>;
@compute @workgroup_size(256)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
) {
let ix = global_id.x;
let tag_word = scene[config.drawtag_base + ix];
var agg = map_draw_tag(tag_word);
sh_scratch[local_id.x] = agg;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
workgroupBarrier();
if local_id.x + (1u << i) < WG_SIZE {
let other = sh_scratch[local_id.x + (1u << i)];
agg = combine_draw_monoid(agg, other);
}
workgroupBarrier();
sh_scratch[local_id.x] = agg;
}
if local_id.x == 0u {
reduced[ix >> firstTrailingBit(WG_SIZE)] = agg;
}
}

View file

@ -14,6 +14,9 @@
//
// Also licensed under MIT license, at your choice.
// Fine rasterizer. This can run in simple (just path rendering) and full
// modes, controllable by #define.
// This is a cut'n'paste w/ backdrop.
struct Tile {
backdrop: i32,
@ -36,17 +39,64 @@ var<storage> segments: array<Segment>;
@group(0) @binding(3)
var<storage, read_write> output: array<u32>;
#ifdef full
#import blend
#import ptcl
let GRADIENT_WIDTH = 512;
let BLEND_STACK_SPLIT = 4u;
@group(0) @binding(4)
var<storage> ptcl: array<u32>;
@group(0) @binding(5)
var gradients: texture_2d<f32>;
fn read_fill(cmd_ix: u32) -> CmdFill {
let tile = ptcl[cmd_ix + 1u];
let backdrop = i32(ptcl[cmd_ix + 2u]);
return CmdFill(tile, backdrop);
}
fn read_stroke(cmd_ix: u32) -> CmdStroke {
let tile = ptcl[cmd_ix + 1u];
let half_width = bitcast<f32>(ptcl[cmd_ix + 2u]);
return CmdStroke(tile, half_width);
}
fn read_color(cmd_ix: u32) -> CmdColor {
let rgba_color = ptcl[cmd_ix + 1u];
return CmdColor(rgba_color);
}
fn read_lin_grad(cmd_ix: u32) -> CmdLinGrad {
let index = ptcl[cmd_ix + 1u];
let line_x = bitcast<f32>(ptcl[cmd_ix + 2u]);
let line_y = bitcast<f32>(ptcl[cmd_ix + 3u]);
let line_c = bitcast<f32>(ptcl[cmd_ix + 4u]);
return CmdLinGrad(index, line_x, line_y, line_c);
}
fn read_rad_grad(cmd_ix: u32) -> CmdRadGrad {
let index = ptcl[cmd_ix + 1u];
let m0 = bitcast<f32>(ptcl[cmd_ix + 2u]);
let m1 = bitcast<f32>(ptcl[cmd_ix + 3u]);
let m2 = bitcast<f32>(ptcl[cmd_ix + 4u]);
let m3 = bitcast<f32>(ptcl[cmd_ix + 5u]);
let matrx = vec4<f32>(m0, m1, m2, m3);
let xlat = vec2<f32>(bitcast<f32>(ptcl[cmd_ix + 6u]), bitcast<f32>(ptcl[cmd_ix + 7u]));
let c1 = vec2<f32>(bitcast<f32>(ptcl[cmd_ix + 8u]), bitcast<f32>(ptcl[cmd_ix + 9u]));
let ra = bitcast<f32>(ptcl[cmd_ix + 10u]);
let roff = bitcast<f32>(ptcl[cmd_ix + 11u]);
return CmdRadGrad(index, matrx, xlat, c1, ra, roff);
}
#endif
let PIXELS_PER_THREAD = 4u;
@compute @workgroup_size(4, 16)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
@builtin(workgroup_id) wg_id: vec3<u32>,
) {
let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x;
let xy = vec2<f32>(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y));
let tile = tiles[tile_ix];
fn fill_path(tile: Tile, xy: vec2<f32>) -> array<f32, PIXELS_PER_THREAD> {
var area: array<f32, PIXELS_PER_THREAD>;
let backdrop_f = f32(tile.backdrop);
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
@ -89,8 +139,168 @@ fn main(
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
area[i] = abs(area[i]);
}
return area;
}
fn stroke_path(seg: u32, half_width: f32, xy: vec2<f32>) -> array<f32, PIXELS_PER_THREAD> {
var df: array<f32, PIXELS_PER_THREAD>;
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
df[i] = 1e9;
}
var segment_ix = seg;
while segment_ix != 0u {
let segment = segments[segment_ix];
let delta = segment.delta;
let dpos0 = xy + vec2<f32>(0.5, 0.5) - segment.origin;
let scale = 1.0 / dot(delta, delta);
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
let dpos = vec2<f32>(dpos0.x + f32(i), dpos0.y);
let t = clamp(dot(dpos, delta) * scale, 0.0, 1.0);
// performance idea: hoist sqrt out of loop
df[i] = min(df[i], length(delta * t - dpos));
}
segment_ix = segment.next;
}
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
// reuse array; return alpha rather than distance
df[i] = clamp(half_width + 0.5 - df[i], 0.0, 1.0);
}
return df;
}
@compute @workgroup_size(4, 16)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
@builtin(workgroup_id) wg_id: vec3<u32>,
) {
let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x;
let xy = vec2<f32>(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y));
#ifdef full
var rgba: array<vec4<f32>, PIXELS_PER_THREAD>;
var blend_stack: array<array<u32, BLEND_STACK_SPLIT>, PIXELS_PER_THREAD>;
var clip_depth = 0u;
var area: array<f32, PIXELS_PER_THREAD>;
var cmd_ix = tile_ix * PTCL_INITIAL_ALLOC;
// main interpretation loop
while true {
let tag = ptcl[cmd_ix];
if tag == CMD_END {
break;
}
switch tag {
// CMD_FILL
case 1u: {
let fill = read_fill(cmd_ix);
let tile = Tile(fill.backdrop, fill.tile);
area = fill_path(tile, xy);
cmd_ix += 3u;
}
// CMD_STROKE
case 2u: {
let stroke = read_stroke(cmd_ix);
area = stroke_path(stroke.tile, stroke.half_width, xy);
cmd_ix += 3u;
}
// CMD_SOLID
case 3u: {
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
area[i] = 1.0;
}
cmd_ix += 1u;
}
// CMD_COLOR
case 5u: {
let color = read_color(cmd_ix);
let fg = unpack4x8unorm(color.rgba_color).wzyx;
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
let fg_i = fg * area[i];
rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i;
}
cmd_ix += 2u;
}
// CMD_LIN_GRAD
case 6u: {
let lin = read_lin_grad(cmd_ix);
let d = lin.line_x * xy.x + lin.line_y * xy.y + lin.line_c;
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
let my_d = d + lin.line_x * f32(i);
let x = i32(round(clamp(my_d, 0.0, 1.0) * f32(GRADIENT_WIDTH - 1)));
let fg_rgba = textureLoad(gradients, vec2<i32>(x, i32(lin.index)), 0);
let fg_i = fg_rgba * area[i];
rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i;
}
cmd_ix += 5u;
}
// CMD_RAD_GRAD
case 7u: {
let rad = read_rad_grad(cmd_ix);
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
let my_xy = vec2<f32>(xy.x + f32(i), xy.y);
// TODO: can hoist y, but for now stick to piet-gpu
let xy_xformed = rad.matrx.xz * my_xy.x + rad.matrx.yw * my_xy.y - rad.xlat;
let ba = dot(xy_xformed, rad.c1);
let ca = rad.ra * dot(xy_xformed, xy_xformed);
let t = sqrt(ba * ba + ca) - ba - rad.roff;
let x = i32(round(clamp(t, 0.0, 1.0) * f32(GRADIENT_WIDTH - 1)));
let fg_rgba = textureLoad(gradients, vec2<i32>(x, i32(rad.index)), 0);
let fg_i = fg_rgba * area[i];
rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i;
}
cmd_ix += 12u;
}
// CMD_BEGIN_CLIP
case 9u: {
if clip_depth < BLEND_STACK_SPLIT {
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
blend_stack[clip_depth][i] = pack4x8unorm(rgba[i]);
rgba[i] = vec4<f32>(0.0);
}
} else {
// TODO: spill to memory
}
clip_depth += 1u;
cmd_ix += 1u;
}
// CMD_END_CLIP
case 10u: {
let blend = ptcl[cmd_ix + 1u];
clip_depth -= 1u;
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
var bg_rgba: u32;
if clip_depth < BLEND_STACK_SPLIT {
bg_rgba = blend_stack[clip_depth][i];
} else {
// load from memory
}
let bg = unpack4x8unorm(bg_rgba);
let fg = rgba[i] * area[i];
rgba[i] = blend_mix_compose(bg, fg, blend);
}
cmd_ix += 2u;
}
// CMD_JUMP
case 11u: {
cmd_ix = ptcl[cmd_ix + 1u];
}
default: {}
}
}
let out_ix = global_id.y * (config.width_in_tiles * TILE_WIDTH) + global_id.x * PIXELS_PER_THREAD;
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
let fg = rgba[i];
let a_inv = 1.0 / (fg.a + 1e-6);
let rgba_sep = vec4<f32>(fg.r * a_inv, fg.g * a_inv, fg.b * a_inv, fg.a);
let bytes = pack4x8unorm(rgba_sep);
output[out_ix + i] = bytes;
}
#else
let tile = tiles[tile_ix];
let area = fill_path(tile, xy);
let bytes = pack4x8unorm(vec4<f32>(area[0], area[1], area[2], area[3]));
let out_ix = global_id.y * (config.width_in_tiles * 4u) + global_id.x;
output[out_ix] = bytes;
#endif
}

View file

@ -14,59 +14,53 @@
//
// Also licensed under MIT license, at your choice.
#import config
#import pathtag
@group(0) @binding(0)
var<storage> path_tags: array<u32>;
var<storage> config: Config;
@group(0) @binding(1)
var<storage> tag_monoids: array<TagMonoid>;
var<storage> scene: array<u32>;
// TODO: should probably have single "scene" binding.
@group(0) @binding(2)
var<storage> path_data: array<u32>;
var<storage> tag_monoids: array<TagMonoid>;
#ifdef cubics_out
@group(0) @binding(3)
var<storage, read_write> output: array<vec2<f32>>;
#else
#import config
struct Tile {
// We don't get this from import as it's the atomic version
struct AtomicTile {
backdrop: atomic<i32>,
segments: atomic<u32>,
}
#import segment
// Should probably be uniform binding
@group(0) @binding(3)
var<storage> config: Config;
var<storage, read_write> tiles: array<AtomicTile>;
@group(0) @binding(4)
var<storage, read_write> tiles: array<Tile>;
@group(0) @binding(5)
var<storage, read_write> segments: array<Segment>;
#endif
var<private> pathdata_base: u32;
fn read_f32_point(ix: u32) -> vec2<f32> {
let x = bitcast<f32>(path_data[ix]);
let y = bitcast<f32>(path_data[ix + 1u]);
let x = bitcast<f32>(scene[pathdata_base + ix]);
let y = bitcast<f32>(scene[pathdata_base + ix + 1u]);
return vec2<f32>(x, y);
}
fn read_i16_point(ix: u32) -> vec2<f32> {
let raw = path_data[ix];
let raw = scene[pathdata_base + ix];
let x = f32(i32(raw << 16u) >> 16u);
let y = f32(i32(raw) >> 16u);
return vec2<f32>(x, y);
}
#ifndef cubics_out
let TILE_WIDTH = 16u;
let TILE_HEIGHT = 16u;
struct SubdivResult {
val: f32,
a0: f32,
@ -136,7 +130,8 @@ fn main(
) {
// Obtain exclusive prefix sum of tag monoid
let ix = global_id.x;
let tag_word = path_tags[ix >> 2u];
let tag_word = scene[config.pathtag_base + (ix >> 2u)];
pathdata_base = config.pathdata_base;
let shift = (ix & 3u) * 8u;
var tm = reduce_tag(tag_word & ((1u << shift) - 1u));
tm = combine_tag_monoid(tag_monoids[ix >> 2u], tm);

View file

@ -0,0 +1,275 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
// Path coarse rasterization for the full implementation.
#import config
#import pathtag
#import tile
#import segment
#import cubic
#import bump
@group(0) @binding(0)
var<storage> config: Config;
@group(0) @binding(1)
var<storage> scene: array<u32>;
@group(0) @binding(2)
var<storage> tag_monoids: array<TagMonoid>;
@group(0) @binding(3)
var<storage> cubics: array<Cubic>;
@group(0) @binding(4)
var<storage> paths: array<Path>;
// We don't get this from import as it's the atomic version
struct AtomicTile {
backdrop: atomic<i32>,
segments: atomic<u32>,
}
@group(0) @binding(5)
var<storage, read_write> bump: BumpAllocators;
@group(0) @binding(6)
var<storage, read_write> tiles: array<AtomicTile>;
@group(0) @binding(7)
var<storage, read_write> segments: array<Segment>;
struct SubdivResult {
val: f32,
a0: f32,
a2: f32,
}
let D = 0.67;
fn approx_parabola_integral(x: f32) -> f32 {
return x * inverseSqrt(sqrt(1.0 - D + (D * D * D * D + 0.25 * x * x)));
}
let B = 0.39;
fn approx_parabola_inv_integral(x: f32) -> f32 {
return x * sqrt(1.0 - B + (B * B + 0.5 * x * x));
}
fn estimate_subdiv(p0: vec2<f32>, p1: vec2<f32>, p2: vec2<f32>, sqrt_tol: f32) -> SubdivResult {
let d01 = p1 - p0;
let d12 = p2 - p1;
let dd = d01 - d12;
let cross = (p2.x - p0.x) * dd.y - (p2.y - p0.y) * dd.x;
let cross_inv = 1.0 / cross;
let x0 = dot(d01, dd) * cross_inv;
let x2 = dot(d12, dd) * cross_inv;
let scale = abs(cross / (length(dd) * (x2 - x0)));
let a0 = approx_parabola_integral(x0);
let a2 = approx_parabola_integral(x2);
var val = 0.0;
if scale < 1e9 {
let da = abs(a2 - a0);
let sqrt_scale = sqrt(scale);
if sign(x0) == sign(x2) {
val = sqrt_scale;
} else {
let xmin = sqrt_tol / sqrt_scale;
val = sqrt_tol / approx_parabola_integral(xmin);
}
val *= da;
}
return SubdivResult(val, a0, a2);
}
fn eval_quad(p0: vec2<f32>, p1: vec2<f32>, p2: vec2<f32>, t: f32) -> vec2<f32> {
let mt = 1.0 - t;
return p0 * (mt * mt) + (p1 * (mt * 2.0) + p2 * t) * t;
}
fn eval_cubic(p0: vec2<f32>, p1: vec2<f32>, p2: vec2<f32>, p3: vec2<f32>, t: f32) -> vec2<f32> {
let mt = 1.0 - t;
return p0 * (mt * mt * mt) + (p1 * (mt * mt * 3.0) + (p2 * (mt * 3.0) + p3 * t) * t) * t;
}
fn alloc_segment() -> u32 {
return atomicAdd(&bump.segments, 1u) + 1u;
}
let MAX_QUADS = 16u;
@compute @workgroup_size(256)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
) {
let ix = global_id.x;
let tag_word = scene[config.pathtag_base + (ix >> 2u)];
let shift = (ix & 3u) * 8u;
var tag_byte = (tag_word >> shift) & 0xffu;
if (tag_byte & PATH_TAG_SEG_TYPE) != 0u {
// Discussion question: it might actually be cheaper to do the path segment
// decoding & transform again rather than store the result in a buffer;
// classic memory vs ALU tradeoff.
let cubic = cubics[global_id.x];
let path = paths[cubic.path_ix];
let bbox = vec4<i32>(path.bbox);
let p0 = cubic.p0;
let p1 = cubic.p1;
let p2 = cubic.p2;
let p3 = cubic.p3;
let err_v = 3.0 * (p2 - p1) + p0 - p3;
let err = dot(err_v, err_v);
let ACCURACY = 0.25;
let Q_ACCURACY = ACCURACY * 0.1;
let REM_ACCURACY = (ACCURACY - Q_ACCURACY);
let MAX_HYPOT2 = 432.0 * Q_ACCURACY * Q_ACCURACY;
var n_quads = max(u32(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0))), 1u);
n_quads = min(n_quads, MAX_QUADS);
var keep_params: array<SubdivResult, MAX_QUADS>;
var val = 0.0;
var qp0 = p0;
let step = 1.0 / f32(n_quads);
for (var i = 0u; i < n_quads; i += 1u) {
let t = f32(i + 1u) * step;
let qp2 = eval_cubic(p0, p1, p2, p3, t);
var qp1 = eval_cubic(p0, p1, p2, p3, t - 0.5 * step);
qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
let params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY));
keep_params[i] = params;
val += params.val;
qp0 = qp2;
}
let n = max(u32(ceil(val * (0.5 / sqrt(REM_ACCURACY)))), 1u);
var lp0 = p0;
qp0 = p0;
let v_step = val / f32(n);
var n_out = 1u;
var val_sum = 0.0;
for (var i = 0u; i < n_quads; i += 1u) {
let t = f32(i + 1u) * step;
let qp2 = eval_cubic(p0, p1, p2, p3, t);
var qp1 = eval_cubic(p0, p1, p2, p3, t - 0.5 * step);
qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
let params = keep_params[i];
let u0 = approx_parabola_inv_integral(params.a0);
let u2 = approx_parabola_inv_integral(params.a2);
let uscale = 1.0 / (u2 - u0);
var val_target = f32(n_out) * v_step;
while n_out == n || val_target < val_sum + params.val {
var lp1: vec2<f32>;
if n_out == n {
lp1 = p3;
} else {
let u = (val_target - val_sum) / params.val;
let a = mix(params.a0, params.a2, u);
let au = approx_parabola_inv_integral(a);
let t = (au - u0) * uscale;
lp1 = eval_quad(qp0, qp1, qp2, t);
}
// Output line segment lp0..lp1
let xymin = min(lp0, lp1);
let xymax = max(lp0, lp1);
let dp = lp1 - lp0;
let recip_dx = 1.0 / dp.x;
let invslope = select(dp.x / dp.y, 1.0e9, abs(dp.y) < 1.0e-9);
let c = 0.5 * abs(invslope);
let b = invslope;
let SX = 1.0 / f32(TILE_WIDTH);
let SY = 1.0 / f32(TILE_HEIGHT);
let a = (lp0.x - (lp0.y - 0.5 * f32(TILE_HEIGHT)) * b) * SX;
var x0 = i32(floor(xymin.x * SX));
var x1 = i32(floor(xymax.x * SX) + 1.0);
var y0 = i32(floor(xymin.y * SY));
var y1 = i32(floor(xymax.y * SY) + 1.0);
x0 = clamp(x0, bbox.x, bbox.z);
x1 = clamp(x1, bbox.x, bbox.z);
y0 = clamp(y0, bbox.y, bbox.w);
y1 = clamp(y1, bbox.y, bbox.w);
var xc = a + b * f32(y0);
let stride = bbox.z - bbox.x;
var base = i32(path.tiles) + (y0 - bbox.y) * stride - bbox.x;
var xray = i32(floor(lp0.x * SX));
var last_xray = i32(floor(lp1.x * SX));
if dp.y < 0.0 {
let tmp = xray;
xray = last_xray;
last_xray = tmp;
}
for (var y = y0; y < y1; y += 1) {
let tile_y0 = f32(y) * f32(TILE_HEIGHT);
let xbackdrop = max(xray + 1, bbox.x);
if xymin.y < tile_y0 && xbackdrop < bbox.z {
let backdrop = select(-1, 1, dp.y < 0.0);
let tile_ix = base + xbackdrop;
atomicAdd(&tiles[tile_ix].backdrop, backdrop);
}
var next_xray = last_xray;
if y + 1 < y1 {
let tile_y1 = f32(y + 1) * f32(TILE_HEIGHT);
let x_edge = lp0.x + (tile_y1 - lp0.y) * invslope;
next_xray = i32(floor(x_edge * SX));
}
let min_xray = min(xray, next_xray);
let max_xray = max(xray, next_xray);
var xx0 = min(i32(floor(xc - c)), min_xray);
var xx1 = max(i32(ceil(xc + c)), max_xray + 1);
xx0 = clamp(xx0, x0, x1);
xx1 = clamp(xx1, x0, x1);
var tile_seg: Segment;
for (var x = xx0; x < xx1; x += 1) {
let tile_x0 = f32(x) * f32(TILE_WIDTH);
let tile_ix = base + x;
// allocate segment, insert linked list
let seg_ix = alloc_segment();
let old = atomicExchange(&tiles[tile_ix].segments, seg_ix);
tile_seg.origin = lp0;
tile_seg.delta = dp;
var y_edge = mix(lp0.y, lp1.y, (tile_x0 - lp0.x) * recip_dx);
if xymin.x < tile_x0 {
let p = vec2<f32>(tile_x0, y_edge);
if dp.x < 0.0 {
tile_seg.delta = p - lp0;
} else {
tile_seg.origin = p;
tile_seg.delta = lp1 - p;
}
if tile_seg.delta.x == 0.0 {
tile_seg.delta.x = sign(dp.x) * 1e-9;
}
}
if x <= min_xray || max_xray < x {
y_edge = 1e9;
}
tile_seg.y_edge = y_edge;
tile_seg.next = old;
segments[seg_ix] = tile_seg;
}
xc += b;
base += stride;
xray = next_xray;
}
n_out += 1u;
val_target += v_step;
lp0 = lp1;
}
val_sum += params.val;
qp0 = qp2;
}
}
}

View file

@ -0,0 +1,217 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
// Path segment decoding for the full case.
// In the simple case, path segments are decoded as part of the coarse
// path rendering stage. In the full case, they are separated, as the
// decoding process also generates bounding boxes, and those in turn are
// used for tile allocation and clipping; actual coarse path rasterization
// can't proceed until those are complete.
// There's some duplication of the decoding code but we won't worry about
// that just now. Perhaps it could be factored more nicely later.
#import config
#import pathtag
#import cubic
@group(0) @binding(0)
var<storage> config: Config;
@group(0) @binding(1)
var<storage> scene: array<u32>;
@group(0) @binding(2)
var<storage> tag_monoids: array<TagMonoid>;
struct AtomicPathBbox {
x0: atomic<i32>,
y0: atomic<i32>,
x1: atomic<i32>,
y1: atomic<i32>,
linewidth: f32,
trans_ix: u32,
}
@group(0) @binding(3)
var<storage, read_write> path_bboxes: array<AtomicPathBbox>;
@group(0) @binding(4)
var<storage, read_write> cubics: array<Cubic>;
// Monoid is yagni, for future optimization
// struct BboxMonoid {
// bbox: vec4<f32>,
// flags: u32,
// }
// let FLAG_RESET_BBOX = 1u;
// let FLAG_SET_BBOX = 2u;
// fn combine_bbox_monoid(a: BboxMonoid, b: BboxMonoid) -> BboxMonoid {
// var c: BboxMonoid;
// c.bbox = b.bbox;
// // TODO: previous-me thought this should be gated on b & SET_BBOX == false also
// if (a.flags & FLAG_RESET_BBOX) == 0u && b.bbox.z <= b.bbox.x && b.bbox.w <= b.bbox.y {
// c.bbox = a.bbox;
// } else if (a.flags & FLAG_RESET_BBOX) == 0u && (b.flags & FLAG_SET_BBOX) == 0u ||
// (a.bbox.z > a.bbox.x || a.bbox.w > a.bbox.y)
// {
// c.bbox = vec4<f32>(min(a.bbox.xy, c.bbox.xy), max(a.bbox.xw, c.bbox.zw));
// }
// c.flags = (a.flags & FLAG_SET_BBOX) | b.flags;
// c.flags |= (a.flags & FLAG_RESET_BBOX) << 1u;
// return c;
// }
// fn bbox_monoid_identity() -> BboxMonoid {
// return BboxMonoid();
// }
var<private> pathdata_base: u32;
fn read_f32_point(ix: u32) -> vec2<f32> {
let x = bitcast<f32>(scene[pathdata_base + ix]);
let y = bitcast<f32>(scene[pathdata_base + ix + 1u]);
return vec2<f32>(x, y);
}
fn read_i16_point(ix: u32) -> vec2<f32> {
let raw = scene[pathdata_base + ix];
let x = f32(i32(raw << 16u) >> 16u);
let y = f32(i32(raw) >> 16u);
return vec2<f32>(x, y);
}
struct Transform {
matrx: vec4<f32>,
translate: vec2<f32>,
}
fn read_transform(transform_base: u32, ix: u32) -> Transform {
let base = transform_base + ix * 6u;
let c0 = bitcast<f32>(scene[base]);
let c1 = bitcast<f32>(scene[base + 1u]);
let c2 = bitcast<f32>(scene[base + 2u]);
let c3 = bitcast<f32>(scene[base + 3u]);
let c4 = bitcast<f32>(scene[base + 4u]);
let c5 = bitcast<f32>(scene[base + 5u]);
let matrx = vec4<f32>(c0, c1, c2, c3);
let translate = vec2<f32>(c4, c5);
return Transform(matrx, translate);
}
fn transform_apply(transform: Transform, p: vec2<f32>) -> vec2<f32> {
return transform.matrx.xy * p.x + transform.matrx.zw * p.y + transform.translate;
}
fn round_down(x: f32) -> i32 {
return i32(floor(x));
}
fn round_up(x: f32) -> i32 {
return i32(ceil(x));
}
@compute @workgroup_size(256)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
) {
let ix = global_id.x;
let tag_word = scene[config.pathtag_base + (ix >> 2u)];
pathdata_base = config.pathdata_base;
let shift = (ix & 3u) * 8u;
var tm = reduce_tag(tag_word & ((1u << shift) - 1u));
tm = combine_tag_monoid(tag_monoids[ix >> 2u], tm);
var tag_byte = (tag_word >> shift) & 0xffu;
let out = &path_bboxes[tm.path_ix];
var linewidth: f32;
if (tag_byte & PATH_TAG_PATH) != 0u {
linewidth = bitcast<f32>(scene[config.linewidth_base + tm.linewidth_ix]);
(*out).linewidth = linewidth;
(*out).trans_ix = tm.trans_ix;
}
// Decode path data
let seg_type = tag_byte & PATH_TAG_SEG_TYPE;
if seg_type != 0u {
var p0: vec2<f32>;
var p1: vec2<f32>;
var p2: vec2<f32>;
var p3: vec2<f32>;
if (tag_byte & PATH_TAG_F32) != 0u {
p0 = read_f32_point(tm.pathseg_offset);
p1 = read_f32_point(tm.pathseg_offset + 2u);
if seg_type >= PATH_TAG_QUADTO {
p2 = read_f32_point(tm.pathseg_offset + 4u);
if seg_type == PATH_TAG_CUBICTO {
p3 = read_f32_point(tm.pathseg_offset + 6u);
}
}
} else {
p0 = read_i16_point(tm.pathseg_offset);
p1 = read_i16_point(tm.pathseg_offset + 1u);
if seg_type >= PATH_TAG_QUADTO {
p2 = read_i16_point(tm.pathseg_offset + 2u);
if seg_type == PATH_TAG_CUBICTO {
p3 = read_i16_point(tm.pathseg_offset + 3u);
}
}
}
let transform = read_transform(config.transform_base, tm.trans_ix);
//let transform = Transform(vec4<f32>(1.0, 0.0, 0.0, 1.0), vec2<f32>());
p0 = transform_apply(transform, p0);
p1 = transform_apply(transform, p1);
var bbox = vec4<f32>(min(p0, p1), max(p0, p1));
// Degree-raise
if seg_type == PATH_TAG_LINETO {
p3 = p1;
p2 = mix(p3, p0, 1.0 / 3.0);
p1 = mix(p0, p3, 1.0 / 3.0);
} else if seg_type >= PATH_TAG_QUADTO {
p2 = transform_apply(transform, p2);
bbox = vec4<f32>(min(bbox.xy, p2), max(bbox.zw, p2));
if seg_type == PATH_TAG_CUBICTO {
p3 = transform_apply(transform, p3);
bbox = vec4<f32>(min(bbox.xy, p3), max(bbox.zw, p3));
} else {
p3 = p2;
p2 = mix(p1, p2, 1.0 / 3.0);
p1 = mix(p1, p0, 1.0 / 3.0);
}
}
if linewidth >= 0.0 {
// See https://www.iquilezles.org/www/articles/ellipses/ellipses.htm
// This is the correct bounding box, but we're not handling rendering
// in the isotropic case, so it may mismatch.
let stroke = 0.5 * linewidth * vec2<f32>(length(transform.matrx.xz), length(transform.matrx.yw));
bbox += vec4<f32>(-stroke, stroke);
}
cubics[global_id.x] = Cubic(p0, p1, p2, p3, tm.path_ix, 0u);
// Update bounding box using atomics only. Computing a monoid is a
// potential future optimization.
if bbox.z > bbox.x || bbox.w > bbox.y {
atomicMin(&(*out).x0, round_down(bbox.x));
atomicMin(&(*out).y0, round_down(bbox.y));
atomicMax(&(*out).x1, round_up(bbox.z));
atomicMax(&(*out).y1, round_up(bbox.w));
}
}
}

View file

@ -14,14 +14,16 @@
//
// Also licensed under MIT license, at your choice.
#import config
#import pathtag
// Note: should have a single scene binding, path_tags are a slice
// in that; need a config uniform.
@group(0) @binding(0)
var<storage> path_tags: array<u32>;
var<storage> config: Config;
@group(0) @binding(1)
var<storage> scene: array<u32>;
@group(0) @binding(2)
var<storage, read_write> reduced: array<TagMonoid>;
let LG_WG_SIZE = 8u;
@ -35,7 +37,7 @@ fn main(
@builtin(local_invocation_id) local_id: vec3<u32>,
) {
let ix = global_id.x;
let tag_word = path_tags[ix];
let tag_word = scene[config.pathtag_base + ix];
var agg = reduce_tag(tag_word);
sh_scratch[local_id.x] = agg;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {

View file

@ -14,15 +14,19 @@
//
// Also licensed under MIT license, at your choice.
#import config
#import pathtag
@group(0) @binding(0)
var<storage> path_tags: array<u32>;
var<storage> config: Config;
@group(0) @binding(1)
var<storage> reduced: array<TagMonoid>;
var<storage> scene: array<u32>;
@group(0) @binding(2)
var<storage> reduced: array<TagMonoid>;
@group(0) @binding(3)
var<storage, read_write> tag_monoids: array<TagMonoid>;
let LG_WG_SIZE = 8u;
@ -39,13 +43,13 @@ fn main(
@builtin(workgroup_id) wg_id: vec3<u32>,
) {
var agg = tag_monoid_identity();
if (local_id.x < wg_id.x) {
if local_id.x < wg_id.x {
agg = reduced[local_id.x];
}
sh_parent[local_id.x] = agg;
for (var i = 0u; i < LG_WG_SIZE; i += 1u) {
workgroupBarrier();
if (local_id.x + (1u << i) < WG_SIZE) {
if local_id.x + (1u << i) < WG_SIZE {
let other = sh_parent[local_id.x + (1u << i)];
agg = combine_tag_monoid(agg, other);
}
@ -54,12 +58,12 @@ fn main(
}
let ix = global_id.x;
let tag_word = path_tags[ix];
let tag_word = scene[config.pathtag_base + ix];
agg = reduce_tag(tag_word);
sh_monoid[local_id.x] = agg;
for (var i = 0u; i < LG_WG_SIZE; i += 1u) {
workgroupBarrier();
if (local_id.x >= 1u << i) {
if local_id.x >= 1u << i {
let other = sh_monoid[local_id.x - (1u << i)];
agg = combine_tag_monoid(other, agg);
}
@ -68,7 +72,7 @@ fn main(
}
// prefix up to this workgroup
var tm = sh_parent[0];
if (local_id.x > 0u) {
if local_id.x > 0u {
tm = combine_tag_monoid(tm, sh_monoid[local_id.x - 1u]);
}
// exclusive prefix sum, granularity of 4 tag bytes

View file

@ -0,0 +1,32 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
// The annotated bounding box for a path. It has been transformed,
// but contains a link to the active transform, mostly for gradients.
// Coordinates are integer pixels (for the convenience of atomic update)
// but will probably become fixed-point fractions for rectangles.
struct PathBbox {
x0: i32,
y0: i32,
x1: i32,
y1: i32,
linewidth: f32,
trans_ix: u32,
}
fn bbox_intersect(a: vec4<f32>, b: vec4<f32>) -> vec4<f32> {
return vec4(max(a.xy, b.xy), min(a.zw, b.zw));
}

View file

@ -0,0 +1,351 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
// Color mixing modes
let MIX_NORMAL = 0u;
let MIX_MULTIPLY = 1u;
let MIX_SCREEN = 2u;
let MIX_OVERLAY = 3u;
let MIX_DARKEN = 4u;
let MIX_LIGHTEN = 5u;
let MIX_COLOR_DODGE = 6u;
let MIX_COLOR_BURN = 7u;
let MIX_HARD_LIGHT = 8u;
let MIX_SOFT_LIGHT = 9u;
let MIX_DIFFERENCE = 10u;
let MIX_EXCLUSION = 11u;
let MIX_HUE = 12u;
let MIX_SATURATION = 13u;
let MIX_COLOR = 14u;
let MIX_LUMINOSITY = 15u;
let MIX_CLIP = 128u;
fn screen(cb: vec3<f32>, cs: vec3<f32>) -> vec3<f32> {
return cb + cs - (cb * cs);
}
fn color_dodge(cb: f32, cs: f32) -> f32 {
if cb == 0.0 {
return 0.0;
} else if cs == 1.0 {
return 1.0;
} else {
return min(1.0, cb / (1.0 - cs));
}
}
fn color_burn(cb: f32, cs: f32) -> f32 {
if cb == 1.0 {
return 1.0;
} else if cs == 0.0 {
return 0.0;
} else {
return 1.0 - min(1.0, (1.0 - cb) / cs);
}
}
fn hard_light(cb: vec3<f32>, cs: vec3<f32>) -> vec3<f32> {
return mix(
screen(cb, 2.0 * cs - 1.0),
cb * 2.0 * cs,
vec3<f32>(cs <= vec3<f32>(0.5))
);
}
fn soft_light(cb: vec3<f32>, cs: vec3<f32>) -> vec3<f32> {
let d = mix(
sqrt(cb),
((16.0 * cb - vec3(12.0)) * cb + vec3(4.0)) * cb,
vec3<f32>(cb <= vec3<f32>(0.25))
);
return mix(
cb + (2.0 * cs - vec3(1.0)) * (d - cb),
cb - (vec3(1.0) - 2.0 * cs) * cb * (vec3(1.0) - cb),
vec3<f32>(cs <= vec3<f32>(0.5))
);
}
fn sat(c: vec3<f32>) -> f32 {
return max(c.x, max(c.y, c.z)) - min(c.x, min(c.y, c.z));
}
fn lum(c: vec3<f32>) -> f32 {
let f = vec3<f32>(0.3, 0.59, 0.11);
return dot(c, f);
}
fn clip_color(c: vec3<f32>) -> vec3<f32> {
var c = c;
let l = lum(c);
let n = min(c.x, min(c.y, c.z));
let x = max(c.x, max(c.y, c.z));
if n < 0.0 {
c = l + (((c - l) * l) / (l - n));
}
if x > 1.0 {
c = l + (((c - l) * (1.0 - l)) / (x - l));
}
return c;
}
fn set_lum(c: vec3<f32>, l: f32) -> vec3<f32> {
return clip_color(c + (l - lum(c)));
}
fn set_sat_inner(
cmin: ptr<function, f32>,
cmid: ptr<function, f32>,
cmax: ptr<function, f32>,
s: f32
) {
if *cmax > *cmin {
*cmid = ((*cmid - *cmin) * s) / (*cmax - *cmin);
*cmax = s;
} else {
*cmid = 0.0;
*cmax = 0.0;
}
*cmin = 0.0;
}
fn set_sat(c: vec3<f32>, s: f32) -> vec3<f32> {
var r = c.r;
var g = c.g;
var b = c.b;
if r <= g {
if g <= b {
set_sat_inner(&r, &g, &b, s);
} else {
if r <= b {
set_sat_inner(&r, &b, &g, s);
} else {
set_sat_inner(&b, &r, &g, s);
}
}
} else {
if r <= b {
set_sat_inner(&g, &r, &b, s);
} else {
if g <= b {
set_sat_inner(&g, &b, &r, s);
} else {
set_sat_inner(&b, &g, &r, s);
}
}
}
return vec3<f32>(r, g, b);
}
// Blends two RGB colors together. The colors are assumed to be in sRGB
// color space, and this function does not take alpha into account.
fn blend_mix(cb: vec3<f32>, cs: vec3<f32>, mode: u32) -> vec3<f32> {
var b = vec3<f32>(0.0);
switch mode {
// MIX_MULTIPLY
case 1u: {
b = cb * cs;
}
// MIX_SCREEN
case 2u: {
b = screen(cb, cs);
}
// MIX_OVERLAY
case 3u: {
b = hard_light(cs, cb);
}
// MIX_DARKEN
case 4u: {
b = min(cb, cs);
}
// MIX_LIGHTEN
case 5u: {
b = max(cb, cs);
}
// MIX_COLOR_DODGE
case 6u: {
b = vec3<f32>(color_dodge(cb.x, cs.x), color_dodge(cb.y, cs.y), color_dodge(cb.z, cs.z));
}
// MIX_COLOR_BURN
case 7u: {
b = vec3<f32>(color_burn(cb.x, cs.x), color_burn(cb.y, cs.y), color_burn(cb.z, cs.z));
}
// MIX_HARD_LIGHT
case 8u: {
b = hard_light(cb, cs);
}
// MIX_SOFT_LIGHT
case 9u: {
b = soft_light(cb, cs);
}
// MIX_DIFFERENCE
case 10u: {
b = abs(cb - cs);
}
// MIX_EXCLUSION
case 11u: {
b = cb + cs - 2.0 * cb * cs;
}
// MIX_HUE
case 12u: {
b = set_lum(set_sat(cs, sat(cb)), lum(cb));
}
// MIX_SATURATION
case 13u: {
b = set_lum(set_sat(cb, sat(cs)), lum(cb));
}
// MIX_COLOR
case 14u: {
b = set_lum(cs, lum(cb));
}
// MIX_LUMINOSITY
case 15u: {
b = set_lum(cb, lum(cs));
}
default: {
b = cs;
}
}
return b;
}
// Composition modes
let COMPOSE_CLEAR = 0u;
let COMPOSE_COPY = 1u;
let COMPOSE_DEST = 2u;
let COMPOSE_SRC_OVER = 3u;
let COMPOSE_DEST_OVER = 4u;
let COMPOSE_SRC_IN = 5u;
let COMPOSE_DEST_IN = 6u;
let COMPOSE_SRC_OUT = 7u;
let COMPOSE_DEST_OUT = 8u;
let COMPOSE_SRC_ATOP = 9u;
let COMPOSE_DEST_ATOP = 10u;
let COMPOSE_XOR = 11u;
let COMPOSE_PLUS = 12u;
let COMPOSE_PLUS_LIGHTER = 13u;
// Apply general compositing operation.
// Inputs are separated colors and alpha, output is premultiplied.
fn blend_compose(
cb: vec3<f32>,
cs: vec3<f32>,
ab: f32,
as_: f32,
mode: u32
) -> vec4<f32> {
var fa = 0.0;
var fb = 0.0;
switch mode {
// COMPOSE_COPY
case 1u: {
fa = 1.0;
fb = 0.0;
}
// COMPOSE_DEST
case 2u: {
fa = 0.0;
fb = 1.0;
}
// COMPOSE_SRC_OVER
case 3u: {
fa = 1.0;
fb = 1.0 - as_;
}
// COMPOSE_DEST_OVER
case 4u: {
fa = 1.0 - ab;
fb = 1.0;
}
// COMPOSE_SRC_IN
case 5u: {
fa = ab;
fb = 0.0;
}
// COMPOSE_DEST_IN
case 6u: {
fa = 0.0;
fb = as_;
}
// COMPOSE_SRC_OUT
case 7u: {
fa = 1.0 - ab;
fb = 0.0;
}
// COMPOSE_DEST_OUT
case 8u: {
fa = 0.0;
fb = 1.0 - as_;
}
// COMPOSE_SRC_ATOP
case 9u: {
fa = ab;
fb = 1.0 - as_;
}
// COMPOSE_DEST_ATOP
case 10u: {
fa = 1.0 - ab;
fb = as_;
}
// COMPOSE_XOR
case 11u: {
fa = 1.0 - ab;
fb = 1.0 - as_;
}
// COMPOSE_PLUS
case 12u: {
fa = 1.0;
fb = 1.0;
}
// COMPOSE_PLUS_LIGHTER
case 13u: {
return min(vec4<f32>(1.0), vec4<f32>(as_ * cs + ab * cb, as_ + ab));
}
default: {}
}
let as_fa = as_ * fa;
let ab_fb = ab * fb;
let co = as_fa * cs + ab_fb * cb;
return vec4<f32>(co, as_fa + ab_fb);
}
// Apply color mixing and composition. Both input and output colors are
// premultiplied RGB.
fn blend_mix_compose(backdrop: vec4<f32>, src: vec4<f32>, mode: u32) -> vec4<f32> {
let BLEND_DEFAULT = ((MIX_NORMAL << 8u) | COMPOSE_SRC_OVER);
let EPSILON = 1e-15;
if (mode & 0x7fffu) == BLEND_DEFAULT {
// Both normal+src_over blend and clip case
return backdrop * (1.0 - src.a) + src;
}
// Un-premultiply colors for blending
let inv_src_a = 1.0 / (src.a + EPSILON);
var cs = src.rgb * inv_src_a;
let inv_backdrop_a = 1.0 / (backdrop.a + EPSILON);
let cb = backdrop.rgb * inv_backdrop_a;
let mix_mode = mode >> 8u;
let mixed = blend_mix(cb, cs, mix_mode);
cs = mix(cs, mixed, backdrop.a);
let compose_mode = mode & 0xffu;
if compose_mode == COMPOSE_SRC_OVER {
let co = mix(backdrop.rgb, cs, src.a);
return vec4<f32>(co, src.a + backdrop.a * (1.0 - src.a));
} else {
return blend_compose(cb, cs, backdrop.a, src.a, compose_mode);
}
}

View file

@ -0,0 +1,23 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
// TODO: robust memory (failure flags)
struct BumpAllocators {
binning: atomic<u32>,
ptcl: atomic<u32>,
tile: atomic<u32>,
segments: atomic<u32>,
}

View file

@ -0,0 +1,14 @@
struct Bic {
a: u32,
b: u32,
}
fn bic_combine(x: Bic, y: Bic) -> Bic {
let m = min(x.b, y.a);
return Bic(x.a + y.a - m, x.b + y.b - m);
}
struct ClipEl {
parent_ix: u32,
bbox: vec4<f32>,
}

View file

@ -17,4 +17,29 @@
struct Config {
width_in_tiles: u32,
height_in_tiles: u32,
n_drawobj: u32,
n_path: u32,
n_clip: u32,
// offsets within scene buffer (in u32 units)
// Note: this is a difference from piet-gpu, which is in bytes
pathtag_base: u32,
pathdata_base: u32,
drawtag_base: u32,
drawdata_base: u32,
transform_base: u32,
linewidth_base: u32,
}
// Geometry of tiles and bins
let TILE_WIDTH = 16u;
let TILE_HEIGHT = 16u;
// Number of tiles per bin
let N_TILE_X = 16u;
let N_TILE_Y = 16u;
//let N_TILE = N_TILE_X * N_TILE_Y;
let N_TILE = 256u;

View file

@ -0,0 +1,25 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
struct Cubic {
p0: vec2<f32>,
p1: vec2<f32>,
p2: vec2<f32>,
p3: vec2<f32>,
path_ix: u32,
// Needed?
padding: u32,
}

View file

@ -0,0 +1,60 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
// The DrawMonoid is computed as a prefix sum to aid in decoding
// the variable-length encoding of draw objects.
struct DrawMonoid {
// The number of paths preceding this draw object.
path_ix: u32,
// The number of clip operations preceding this draw object.
clip_ix: u32,
// The offset of the encoded draw object in the scene (u32s).
scene_offset: u32,
// The offset of the associated info.
info_offset: u32,
}
// Each draw object has a 32-bit draw tag, which is a bit-packed
// version of the draw monoid.
let DRAWTAG_NOP = 0u;
let DRAWTAG_FILL_COLOR = 0x44u;
let DRAWTAG_FILL_LIN_GRADIENT = 0x114u;
let DRAWTAG_FILL_RAD_GRADIENT = 0x2dcu;
let DRAWTAG_FILL_IMAGE = 0x48u;
let DRAWTAG_BEGIN_CLIP = 0x05u;
let DRAWTAG_END_CLIP = 0x25u;
fn draw_monoid_identity() -> DrawMonoid {
return DrawMonoid();
}
fn combine_draw_monoid(a: DrawMonoid, b: DrawMonoid) -> DrawMonoid {
var c: DrawMonoid;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
return c;
}
fn map_draw_tag(tag_word: u32) -> DrawMonoid {
var c: DrawMonoid;
c.path_ix = u32(tag_word != DRAWTAG_NOP);
c.clip_ix = tag_word & 1u;
c.scene_offset = (tag_word >> 2u) & 0x07u;
c.info_offset = (tag_word >> 6u) & 0x0fu;
return c;
}

View file

@ -16,9 +16,13 @@
struct TagMonoid {
trans_ix: u32,
// TODO: I don't think pathseg_ix is used.
pathseg_ix: u32,
pathseg_offset: u32,
// Note: piet-gpu has linewidth and path, but not needed here
#ifdef full
linewidth_ix: u32,
path_ix: u32,
#endif
}
let PATH_TAG_SEG_TYPE = 3u;
@ -26,15 +30,14 @@ let PATH_TAG_LINETO = 1u;
let PATH_TAG_QUADTO = 2u;
let PATH_TAG_CUBICTO = 3u;
let PATH_TAG_F32 = 8u;
let PATH_TAG_PATH = 0x10u;
let PATH_TAG_TRANSFORM = 0x20u;
#ifdef full
let PATH_TAG_PATH = 0x10u;
let PATH_TAG_LINEWIDTH = 0x40u;
#endif
fn tag_monoid_identity() -> TagMonoid {
var c: TagMonoid;
c.trans_ix = 0u;
c.pathseg_ix = 0u;
c.pathseg_offset = 0u;
return c;
return TagMonoid();
}
fn combine_tag_monoid(a: TagMonoid, b: TagMonoid) -> TagMonoid {
@ -42,6 +45,10 @@ fn combine_tag_monoid(a: TagMonoid, b: TagMonoid) -> TagMonoid {
c.trans_ix = a.trans_ix + b.trans_ix;
c.pathseg_ix = a.pathseg_ix + b.pathseg_ix;
c.pathseg_offset = a.pathseg_offset + b.pathseg_offset;
#ifdef full
c.linewidth_ix = a.linewidth_ix + b.linewidth_ix;
c.path_ix = a.path_ix + b.path_ix;
#endif
return c;
}
@ -55,5 +62,9 @@ fn reduce_tag(tag_word: u32) -> TagMonoid {
a += a >> 8u;
a += a >> 16u;
c.pathseg_offset = a & 0xffu;
#ifdef full
c.path_ix = countOneBits(tag_word & (PATH_TAG_PATH * 0x1010101u));
c.linewidth_ix = countOneBits(tag_word & (PATH_TAG_LINEWIDTH * 0x1010101u));
#endif
return c;
}

View file

@ -0,0 +1,72 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
// Layout of per-tile command list
// Initial allocation, in u32's.
let PTCL_INITIAL_ALLOC = 64u;
let PTCL_INCREMENT = 256u;
// Amount of space taken by jump
let PTCL_HEADROOM = 2u;
// Tags for PTCL commands
let CMD_END = 0u;
let CMD_FILL = 1u;
let CMD_STROKE = 2u;
let CMD_SOLID = 3u;
let CMD_COLOR = 5u;
let CMD_LIN_GRAD = 6u;
let CMD_RAD_GRAD = 7u;
let CMD_BEGIN_CLIP = 9u;
let CMD_END_CLIP = 10u;
let CMD_JUMP = 11u;
// The individual PTCL structs are written here, but read/write is by
// hand in the relevant shaders
struct CmdFill {
tile: u32,
backdrop: i32,
}
struct CmdStroke {
tile: u32,
half_width: f32,
}
struct CmdJump {
new_ix: u32,
}
struct CmdColor {
rgba_color: u32,
}
struct CmdLinGrad {
index: u32,
line_x: f32,
line_y: f32,
line_c: f32,
}
struct CmdRadGrad {
index: u32,
matrx: vec4<f32>,
xlat: vec2<f32>,
c1: vec2<f32>,
ra: f32,
roff: f32,
}

View file

@ -0,0 +1,29 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
// Common datatypes for path and tile intermediate info.
struct Path {
// bounding box in tiles
bbox: vec4<u32>,
// offset (in u32's) to tile rectangle
tiles: u32,
}
struct Tile {
backdrop: i32,
segments: u32,
}

View file

@ -0,0 +1,114 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
// Tile allocation (and zeroing of tiles)
#import config
#import bump
#import drawtag
#import tile
@group(0) @binding(0)
var<storage> config: Config;
@group(0) @binding(1)
var<storage> scene: array<u32>;
@group(0) @binding(2)
var<storage> draw_bboxes: array<vec4<f32>>;
@group(0) @binding(3)
var<storage, read_write> bump: BumpAllocators;
@group(0) @binding(4)
var<storage, read_write> paths: array<Path>;
@group(0) @binding(5)
var<storage, read_write> tiles: array<Tile>;
let WG_SIZE = 256u;
var<workgroup> sh_tile_count: array<u32, WG_SIZE>;
var<workgroup> sh_tile_offset: u32;
@compute @workgroup_size(256)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
) {
// scale factors useful for converting coordinates to tiles
// TODO: make into constants
let SX = 1.0 / f32(TILE_WIDTH);
let SY = 1.0 / f32(TILE_HEIGHT);
let drawobj_ix = global_id.x;
var drawtag = DRAWTAG_NOP;
if drawobj_ix < config.n_drawobj {
drawtag = scene[config.drawtag_base + drawobj_ix];
}
var x0 = 0;
var y0 = 0;
var x1 = 0;
var y1 = 0;
if drawtag != DRAWTAG_NOP && drawtag != DRAWTAG_END_CLIP {
let bbox = draw_bboxes[drawobj_ix];
x0 = i32(floor(bbox.x * SX));
y0 = i32(floor(bbox.y * SY));
x1 = i32(ceil(bbox.z * SX));
y1 = i32(ceil(bbox.w * SY));
}
let ux0 = u32(clamp(x0, 0, i32(config.width_in_tiles)));
let uy0 = u32(clamp(y0, 0, i32(config.height_in_tiles)));
let ux1 = u32(clamp(x1, 0, i32(config.width_in_tiles)));
let uy1 = u32(clamp(y1, 0, i32(config.height_in_tiles)));
let tile_count = (ux1 - ux0) * (uy1 - uy0);
var total_tile_count = tile_count;
sh_tile_count[local_id.x] = tile_count;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
workgroupBarrier();
if local_id.x >= (1u << i) {
total_tile_count += sh_tile_count[local_id.x - (1u << i)];
}
workgroupBarrier();
sh_tile_count[local_id.x] = total_tile_count;
}
if local_id.x == WG_SIZE - 1u {
paths[drawobj_ix].tiles = atomicAdd(&bump.tile, sh_tile_count[WG_SIZE - 1u]);
}
// Using storage barriers is a workaround for what appears to be a miscompilation
// when a normal workgroup-shared variable is used to broadcast the value.
storageBarrier();
let tile_offset = paths[drawobj_ix | (WG_SIZE - 1u)].tiles;
storageBarrier();
if drawobj_ix < config.n_drawobj {
let tile_subix = select(0u, sh_tile_count[local_id.x - 1u], local_id.x > 0u);
let bbox = vec4<u32>(ux0, uy0, ux1, uy1);
let path = Path(bbox, tile_offset + tile_subix);
paths[drawobj_ix] = path;
}
// zero allocated memory
// Note: if the number of draw objects is small, utilization will be poor.
// There are two things that can be done to improve that. One would be a
// separate (indirect) dispatch. Another would be to have each workgroup
// process fewer draw objects than the number of threads in the wg.
let total_count = sh_tile_count[WG_SIZE - 1u];
for (var i = local_id.x; i < total_count; i += WG_SIZE) {
// Note: could format output buffer as u32 for even better load
// balancing, as does piet-gpu.
tiles[tile_offset + i] = Tile(0, 0u);
}
}

5
piet-wgsl/src/debug.rs Normal file
View file

@ -0,0 +1,5 @@
#![allow(dead_code)]
pub mod clip;
pub mod draw;
pub mod fine;

View file

@ -0,0 +1,13 @@
use bytemuck::{Pod, Zeroable};
#[derive(Copy, Clone, Debug, Zeroable, Pod)]
#[repr(C)]
pub struct ClipEl {
pub parent_ix: u32,
pub pad: [u32; 3],
pub bbox: [f32; 4],
}
pub fn parse_clip_els(data: &[u8]) -> Vec<ClipEl> {
Vec::from(bytemuck::cast_slice(data))
}

View file

@ -0,0 +1,14 @@
use bytemuck::{Pod, Zeroable};
#[derive(Copy, Clone, Debug, Zeroable, Pod)]
#[repr(C)]
pub struct DrawMonoid {
pub path_ix: u32,
pub clip_ix: u32,
pub scene_offset: u32,
pub info_offset: u32,
}
pub fn parse_draw_monoids(data: &[u8]) -> Vec<DrawMonoid> {
Vec::from(bytemuck::cast_slice(data))
}

153
piet-wgsl/src/debug/fine.rs Normal file
View file

@ -0,0 +1,153 @@
#[derive(Copy, Clone, Debug)]
#[repr(C)]
pub struct Fill {
pub tile: u32,
pub backdrop: i32,
}
#[derive(Copy, Clone, Debug)]
#[repr(C)]
pub struct Stroke {
pub tile: u32,
pub half_width: f32,
}
#[derive(Copy, Clone, Debug)]
#[repr(C)]
pub struct Color {
abgr: [u8; 4],
}
#[derive(Copy, Clone, Debug)]
#[repr(C)]
pub struct LinGrad {
pub index: u32,
pub line_x: f32,
pub line_y: f32,
pub line_c: f32,
}
#[derive(Copy, Clone, Debug)]
#[repr(C)]
pub struct RadGrad {
pub index: u32,
pub matrix: [f32; 4],
pub xlat: [f32; 2],
pub c1: [f32; 2],
pub ra: f32,
pub roff: f32,
}
#[derive(Copy, Clone, Debug)]
pub enum Command {
Fill(Fill),
Stroke(Stroke),
Solid,
Color(Color),
LinGrad(LinGrad),
RadGrad(RadGrad),
BeginClip,
EndClip(u32),
End,
}
const PTCL_INITIAL_ALLOC: usize = 64;
#[derive(Debug)]
pub struct CommandList {
pub tiles: Vec<(u32, u32, Vec<Command>)>,
}
impl CommandList {
pub fn parse(width: usize, height: usize, ptcl: &[u8]) -> Self {
let mut tiles = vec![];
let width_tiles = width / 16;
let height_tiles = height / 16;
for y in 0..height_tiles {
for x in 0..width_tiles {
let tile_ix = y * width_tiles + x;
let ix = tile_ix * PTCL_INITIAL_ALLOC;
let commands = parse_commands(ptcl, ix);
if !commands.is_empty() {
tiles.push((x as u32, y as u32, commands));
}
}
}
Self { tiles }
}
}
fn parse_commands(ptcl: &[u8], mut ix: usize) -> Vec<Command> {
let mut commands = vec![];
let words: &[u32] = bytemuck::cast_slice(ptcl);
while ix < words.len() {
let tag = words[ix];
ix += 1;
match tag {
0 => break,
1 => {
commands.push(Command::Fill(Fill {
tile: words[ix],
backdrop: words[ix + 1] as i32,
}));
ix += 2;
}
2 => {
commands.push(Command::Stroke(Stroke {
tile: words[ix],
half_width: bytemuck::cast(words[ix + 1]),
}));
ix += 2;
}
3 => {
commands.push(Command::Solid);
}
5 => {
commands.push(Command::Color(Color {
abgr: bytemuck::cast(words[ix]),
}));
ix += 1;
}
6 => {
commands.push(Command::LinGrad(LinGrad {
index: words[ix],
line_x: bytemuck::cast(words[ix + 1]),
line_y: bytemuck::cast(words[ix + 2]),
line_c: bytemuck::cast(words[ix + 3]),
}));
ix += 4;
}
7 => {
let matrix = [
bytemuck::cast(words[ix + 1]),
bytemuck::cast(words[ix + 2]),
bytemuck::cast(words[ix + 3]),
bytemuck::cast(words[ix + 4]),
];
let xlat = [bytemuck::cast(words[ix + 5]), bytemuck::cast(words[ix + 6])];
let c1 = [bytemuck::cast(words[ix + 7]), bytemuck::cast(words[ix + 8])];
commands.push(Command::RadGrad(RadGrad {
index: words[ix],
matrix,
xlat,
c1,
ra: bytemuck::cast(words[ix + 9]),
roff: bytemuck::cast(words[ix + 10]),
}));
ix += 11;
}
9 => {
commands.push(Command::BeginClip);
}
10 => {
commands.push(Command::EndClip(words[ix]));
ix += 1;
}
11 => {
ix = words[ix] as usize;
}
_ => {}
}
}
commands
}

View file

@ -17,7 +17,7 @@
use std::{
borrow::Cow,
collections::{hash_map::Entry, HashMap},
num::NonZeroU64,
num::{NonZeroU32, NonZeroU64},
sync::atomic::{AtomicU64, Ordering},
};
@ -25,7 +25,8 @@ use futures_intrusive::channel::shared::GenericOneshotReceiver;
use parking_lot::RawMutex;
use wgpu::{
util::DeviceExt, BindGroup, BindGroupLayout, Buffer, BufferAsyncError, BufferSlice, BufferView,
ComputePipeline, Device, Queue,
ComputePipeline, Device, Queue, Texture, TextureAspect, TextureFormat, TextureUsages,
TextureView, TextureViewDimension,
};
pub type Error = Box<dyn std::error::Error>;
@ -58,12 +59,27 @@ pub struct BufProxy {
id: Id,
}
#[derive(Clone, Copy)]
pub struct ImageProxy {
width: u32,
height: u32,
// TODO: format
id: Id,
}
#[derive(Clone, Copy)]
pub enum ResourceProxy {
Buf(BufProxy),
Image(ImageProxy),
}
pub enum Command {
Upload(BufProxy, Vec<u8>),
UploadImage(ImageProxy, Vec<u8>),
// Discussion question: third argument is vec of resources?
// Maybe use tricks to make more ergonomic?
// Alternative: provide bufs & images as separate sequences, like piet-gpu.
Dispatch(ShaderId, (u32, u32, u32), Vec<BufProxy>),
Dispatch(ShaderId, (u32, u32, u32), Vec<ResourceProxy>),
Download(BufProxy),
Clear(BufProxy, u64, Option<NonZeroU64>),
}
@ -92,6 +108,7 @@ pub enum BindType {
#[derive(Default)]
struct BindMap {
buf_map: HashMap<Id, Buffer>,
image_map: HashMap<Id, (Texture, TextureView)>,
}
impl Engine {
@ -132,6 +149,16 @@ impl Engine {
},
count: None,
},
BindType::ImageRead => wgpu::BindGroupLayoutEntry {
binding: i as u32,
visibility: wgpu::ShaderStages::COMPUTE,
ty: wgpu::BindingType::Texture {
sample_type: wgpu::TextureSampleType::Float { filterable: true },
view_dimension: wgpu::TextureViewDimension::D2,
multisampled: false,
},
count: None,
},
_ => todo!(),
})
.collect::<Vec<_>>();
@ -182,7 +209,60 @@ impl Engine {
});
bind_map.insert_buf(buf_proxy.id, buf);
}
Command::UploadImage(image_proxy, bytes) => {
let buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
label: None,
contents: &bytes,
usage: wgpu::BufferUsages::COPY_SRC,
});
let texture = device.create_texture(&wgpu::TextureDescriptor {
label: None,
size: wgpu::Extent3d {
width: image_proxy.width,
height: image_proxy.height,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
usage: TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST,
format: TextureFormat::Rgba8Unorm,
});
let texture_view = texture.create_view(&wgpu::TextureViewDescriptor {
label: None,
dimension: Some(TextureViewDimension::D2),
aspect: TextureAspect::All,
mip_level_count: None,
base_mip_level: 0,
base_array_layer: 0,
array_layer_count: None,
format: Some(TextureFormat::Rgba8Unorm),
});
encoder.copy_buffer_to_texture(
wgpu::ImageCopyBuffer {
buffer: &buf,
layout: wgpu::ImageDataLayout {
offset: 0,
bytes_per_row: NonZeroU32::new(image_proxy.width * 4),
rows_per_image: None,
},
},
wgpu::ImageCopyTexture {
texture: &texture,
mip_level: 0,
origin: wgpu::Origin3d { x: 0, y: 0, z: 0 },
aspect: TextureAspect::All,
},
wgpu::Extent3d {
width: image_proxy.width,
height: image_proxy.height,
depth_or_array_layers: 1,
},
);
bind_map.insert_image(image_proxy.id, texture, texture_view)
}
Command::Dispatch(shader_id, wg_size, bindings) => {
println!("dispatching {:?} with {} bindings", wg_size, bindings.len());
let shader = &self.shaders[shader_id.0];
let bind_group =
bind_map.create_bind_group(device, &shader.bind_group_layout, bindings)?;
@ -225,13 +305,28 @@ impl Recording {
buf_proxy
}
pub fn dispatch(
pub fn upload_image(
&mut self,
shader: ShaderId,
wg_size: (u32, u32, u32),
resources: impl Into<Vec<BufProxy>>,
) {
self.push(Command::Dispatch(shader, wg_size, resources.into()));
width: u32,
height: u32,
data: impl Into<Vec<u8>>,
) -> ImageProxy {
let data = data.into();
let image_proxy = ImageProxy::new(width, height);
self.push(Command::UploadImage(image_proxy, data));
image_proxy
}
pub fn dispatch<R>(&mut self, shader: ShaderId, wg_size: (u32, u32, u32), resources: R)
where
R: IntoIterator,
R::Item: Into<ResourceProxy>,
{
self.push(Command::Dispatch(
shader,
wg_size,
resources.into_iter().map(|r| r.into()).collect(),
));
}
pub fn download(&mut self, buf: BufProxy) {
@ -246,7 +341,53 @@ impl Recording {
impl BufProxy {
pub fn new(size: u64) -> Self {
let id = Id::next();
BufProxy { id, size }
BufProxy {
id,
size: size.max(16),
}
}
}
impl ImageProxy {
pub fn new(width: u32, height: u32) -> Self {
let id = Id::next();
ImageProxy { width, height, id }
}
}
impl ResourceProxy {
pub fn new_buf(size: u64) -> Self {
Self::Buf(BufProxy::new(size))
}
pub fn new_image(width: u32, height: u32) -> Self {
Self::Image(ImageProxy::new(width, height))
}
pub fn as_buf(&self) -> Option<&BufProxy> {
match self {
Self::Buf(proxy) => Some(&proxy),
_ => None,
}
}
pub fn as_image(&self) -> Option<&ImageProxy> {
match self {
Self::Image(proxy) => Some(&proxy),
_ => None,
}
}
}
impl From<BufProxy> for ResourceProxy {
fn from(value: BufProxy) -> Self {
Self::Buf(value)
}
}
impl From<ImageProxy> for ResourceProxy {
fn from(value: ImageProxy) -> Self {
Self::Image(value)
}
}
@ -263,34 +404,79 @@ impl BindMap {
self.buf_map.insert(id, buf);
}
fn insert_image(&mut self, id: Id, image: Texture, image_view: TextureView) {
self.image_map.insert(id, (image, image_view));
}
fn create_bind_group(
&mut self,
device: &Device,
layout: &BindGroupLayout,
bindings: &[BufProxy],
bindings: &[ResourceProxy],
) -> Result<BindGroup, Error> {
for proxy in bindings {
if let Entry::Vacant(v) = self.buf_map.entry(proxy.id) {
let buf = device.create_buffer(&wgpu::BufferDescriptor {
label: None,
size: proxy.size,
usage: wgpu::BufferUsages::STORAGE
| wgpu::BufferUsages::COPY_DST
| wgpu::BufferUsages::COPY_SRC,
mapped_at_creation: false,
});
v.insert(buf);
match proxy {
ResourceProxy::Buf(proxy) => {
if let Entry::Vacant(v) = self.buf_map.entry(proxy.id) {
let buf = device.create_buffer(&wgpu::BufferDescriptor {
label: None,
size: proxy.size,
usage: wgpu::BufferUsages::STORAGE
| wgpu::BufferUsages::COPY_DST
| wgpu::BufferUsages::COPY_SRC,
mapped_at_creation: false,
});
v.insert(buf);
}
}
ResourceProxy::Image(proxy) => {
if let Entry::Vacant(v) = self.image_map.entry(proxy.id) {
let texture = device.create_texture(&wgpu::TextureDescriptor {
label: None,
size: wgpu::Extent3d {
width: proxy.width,
height: proxy.height,
depth_or_array_layers: 1,
},
mip_level_count: 1,
sample_count: 1,
dimension: wgpu::TextureDimension::D2,
usage: TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST,
format: TextureFormat::Rgba8Unorm,
});
let texture_view = texture.create_view(&wgpu::TextureViewDescriptor {
label: None,
dimension: Some(TextureViewDimension::D2),
aspect: TextureAspect::All,
mip_level_count: None,
base_mip_level: 0,
base_array_layer: 0,
array_layer_count: None,
format: Some(TextureFormat::Rgba8Unorm),
});
v.insert((texture, texture_view));
}
}
}
}
let entries = bindings
.iter()
.enumerate()
.map(|(i, proxy)| {
let buf = self.buf_map.get(&proxy.id).unwrap();
Ok(wgpu::BindGroupEntry {
binding: i as u32,
resource: buf.as_entire_binding(),
})
.map(|(i, proxy)| match proxy {
ResourceProxy::Buf(proxy) => {
let buf = self.buf_map.get(&proxy.id).unwrap();
Ok(wgpu::BindGroupEntry {
binding: i as u32,
resource: buf.as_entire_binding(),
})
}
ResourceProxy::Image(proxy) => {
let texture = self.image_map.get(&proxy.id).unwrap();
Ok(wgpu::BindGroupEntry {
binding: i as u32,
resource: wgpu::BindingResource::TextureView(&texture.1),
})
}
})
.collect::<Result<Vec<_>, Error>>()?;
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {

View file

@ -20,11 +20,12 @@ use std::{fs::File, io::BufWriter};
use engine::Engine;
use render::render;
use test_scene::dump_scene_info;
use wgpu::{Device, Queue};
use wgpu::{Device, Limits, Queue};
mod debug;
mod engine;
mod pico_svg;
mod ramp;
mod render;
mod shaders;
mod test_scene;
@ -33,12 +34,14 @@ async fn run() -> Result<(), Box<dyn std::error::Error>> {
let instance = wgpu::Instance::new(wgpu::Backends::PRIMARY);
let adapter = instance.request_adapter(&Default::default()).await.unwrap();
let features = adapter.features();
let mut limits = Limits::default();
limits.max_storage_buffers_per_shader_stage = 16;
let (device, queue) = adapter
.request_device(
&wgpu::DeviceDescriptor {
label: None,
features: features & wgpu::Features::TIMESTAMP_QUERY,
limits: Default::default(),
limits,
},
None,
)
@ -49,25 +52,46 @@ async fn run() -> Result<(), Box<dyn std::error::Error>> {
Ok(())
}
fn dump_buf(buf: &[u32]) {
for (i, val) in buf.iter().enumerate() {
if *val != 0 {
let lo = val & 0x7fff_ffff;
if lo >= 0x3000_0000 && lo < 0x5000_0000 {
println!("{}: {:x} {}", i, val, f32::from_bits(*val));
} else {
println!("{}: {:x}", i, val);
}
}
}
}
async fn do_render(
device: &Device,
queue: &Queue,
engine: &mut Engine,
) -> Result<(), Box<dyn std::error::Error>> {
#[allow(unused)]
let shaders = shaders::init_shaders(device, engine)?;
let full_shaders = shaders::full_shaders(device, engine)?;
let scene = test_scene::gen_test_scene();
dump_scene_info(&scene);
let (recording, buf) = render(&scene, &shaders);
//test_scene::dump_scene_info(&scene);
//let (recording, buf) = render::render(&scene, &shaders);
let (recording, buf) = render::render_full(&scene, &full_shaders);
let downloads = engine.run_recording(&device, &queue, &recording)?;
let mapped = downloads.map();
device.poll(wgpu::Maintain::Wait);
let buf = mapped.get_mapped(buf).await?;
let file = File::create("image.png")?;
let w = BufWriter::new(file);
let encoder = png::Encoder::new(w, 1024, 1024);
let mut writer = encoder.write_header()?;
writer.write_image_data(&buf)?;
if false {
dump_buf(bytemuck::cast_slice(&buf));
} else {
let file = File::create("image.png")?;
let w = BufWriter::new(file);
let mut encoder = png::Encoder::new(w, 1024, 1024);
encoder.set_color(png::ColorType::Rgba);
let mut writer = encoder.write_header()?;
writer.write_image_data(&buf)?;
}
Ok(())
}

140
piet-wgsl/src/pico_svg.rs Normal file
View file

@ -0,0 +1,140 @@
//! A loader for a tiny fragment of SVG
use std::str::FromStr;
use roxmltree::{Document, Node};
use kurbo::{Affine, BezPath};
use piet_scene::Color;
pub struct PicoSvg {
pub items: Vec<Item>,
}
pub enum Item {
Fill(FillItem),
Stroke(StrokeItem),
}
pub struct StrokeItem {
pub width: f64,
pub color: Color,
pub path: BezPath,
}
pub struct FillItem {
pub color: Color,
pub path: BezPath,
}
struct Parser<'a> {
scale: f64,
items: &'a mut Vec<Item>,
}
impl PicoSvg {
pub fn load(xml_string: &str, scale: f64) -> Result<PicoSvg, Box<dyn std::error::Error>> {
let doc = Document::parse(xml_string)?;
let root = doc.root_element();
let mut items = Vec::new();
let mut parser = Parser::new(&mut items, scale);
for node in root.children() {
parser.rec_parse(node)?;
}
Ok(PicoSvg { items })
}
}
impl<'a> Parser<'a> {
fn new(items: &'a mut Vec<Item>, scale: f64) -> Parser<'a> {
Parser { scale, items }
}
fn rec_parse(&mut self, node: Node) -> Result<(), Box<dyn std::error::Error>> {
let transform = if self.scale >= 0.0 {
Affine::scale(self.scale)
} else {
Affine::new([-self.scale, 0.0, 0.0, self.scale, 0.0, 1536.0])
};
if node.is_element() {
match node.tag_name().name() {
"g" => {
for child in node.children() {
self.rec_parse(child)?;
}
}
"path" => {
let d = node.attribute("d").ok_or("missing 'd' attribute")?;
let bp = BezPath::from_svg(d)?;
let path = transform * bp;
// TODO: default fill color is black, but this is overridden in tiger to this logic.
if let Some(fill_color) = node.attribute("fill") {
if fill_color != "none" {
let color = parse_color(fill_color);
let color = modify_opacity(color, "fill-opacity", node);
self.items.push(Item::Fill(FillItem {
color,
path: path.clone(),
}));
}
}
if let Some(stroke_color) = node.attribute("stroke") {
if stroke_color != "none" {
let width = self.scale.abs()
* f64::from_str(
node.attribute("stroke-width").ok_or("missing width")?,
)?;
let color = parse_color(stroke_color);
let color = modify_opacity(color, "stroke-opacity", node);
self.items
.push(Item::Stroke(StrokeItem { width, color, path }));
}
}
}
_ => (),
}
}
Ok(())
}
}
fn parse_color(color: &str) -> Color {
if color.as_bytes()[0] == b'#' {
let mut hex = u32::from_str_radix(&color[1..], 16).unwrap();
if color.len() == 4 {
hex = (hex >> 8) * 0x110000 + ((hex >> 4) & 0xf) * 0x1100 + (hex & 0xf) * 0x11;
}
let rgba = (hex << 8) + 0xff;
let (r, g, b, a) = (
(rgba >> 24 & 255) as u8,
((rgba >> 16) & 255) as u8,
((rgba >> 8) & 255) as u8,
(rgba & 255) as u8,
);
Color::rgba8(r, g, b, a)
} else if color.starts_with("rgb(") {
let mut iter = color[4..color.len() - 1].split(',');
let r = u8::from_str(iter.next().unwrap()).unwrap();
let g = u8::from_str(iter.next().unwrap()).unwrap();
let b = u8::from_str(iter.next().unwrap()).unwrap();
Color::rgb8(r, g, b)
} else {
Color::rgba8(255, 0, 255, 0x80)
}
}
fn modify_opacity(mut color: Color, attr_name: &str, node: Node) -> Color {
if let Some(opacity) = node.attribute(attr_name) {
let alpha = if opacity.ends_with("%") {
let pctg = opacity[..opacity.len() - 1].parse().unwrap_or(100.0);
pctg * 0.01
} else {
opacity.parse().unwrap_or(1.0)
} as f64;
color.a = (alpha.min(1.0).max(0.0) * 255.0).round() as u8;
color
} else {
color
}
}

137
piet-wgsl/src/ramp.rs Normal file
View file

@ -0,0 +1,137 @@
use piet_scene::{Color, GradientStop, GradientStops};
use std::collections::HashMap;
const N_SAMPLES: usize = 512;
const RETAINED_COUNT: usize = 64;
#[derive(Default)]
pub struct RampCache {
epoch: u64,
map: HashMap<GradientStops, (u32, u64)>,
data: Vec<u32>,
}
impl RampCache {
pub fn advance(&mut self) {
self.epoch += 1;
if self.map.len() > RETAINED_COUNT {
self.map
.retain(|_key, value| value.0 < RETAINED_COUNT as u32);
self.data.truncate(RETAINED_COUNT * N_SAMPLES);
}
}
pub fn add(&mut self, stops: &[GradientStop]) -> u32 {
if let Some(entry) = self.map.get_mut(stops) {
entry.1 = self.epoch;
entry.0
} else if self.map.len() < RETAINED_COUNT {
let id = (self.data.len() / N_SAMPLES) as u32;
self.data.extend(make_ramp(stops));
self.map.insert(stops.into(), (id, self.epoch));
id
} else {
let mut reuse = None;
for (stops, (id, epoch)) in &self.map {
if *epoch + 2 < self.epoch {
reuse = Some((stops.to_owned(), *id));
break;
}
}
if let Some((old_stops, id)) = reuse {
self.map.remove(&old_stops);
let start = id as usize * N_SAMPLES;
for (dst, src) in self.data[start..start + N_SAMPLES]
.iter_mut()
.zip(make_ramp(stops))
{
*dst = src;
}
self.map.insert(stops.into(), (id, self.epoch));
id
} else {
let id = (self.data.len() / N_SAMPLES) as u32;
self.data.extend(make_ramp(stops));
self.map.insert(stops.into(), (id, self.epoch));
id
}
}
}
pub fn data(&self) -> &[u32] {
&self.data
}
pub fn width(&self) -> u32 {
N_SAMPLES as u32
}
pub fn height(&self) -> u32 {
(self.data.len() / N_SAMPLES) as u32
}
}
fn make_ramp<'a>(stops: &'a [GradientStop]) -> impl Iterator<Item = u32> + 'a {
let mut last_u = 0.0;
let mut last_c = ColorF64::from_color(stops[0].color);
let mut this_u = last_u;
let mut this_c = last_c;
let mut j = 0;
(0..N_SAMPLES).map(move |i| {
let u = (i as f64) / (N_SAMPLES - 1) as f64;
while u > this_u {
last_u = this_u;
last_c = this_c;
if let Some(s) = stops.get(j + 1) {
this_u = s.offset as f64;
this_c = ColorF64::from_color(s.color);
j += 1;
} else {
break;
}
}
let du = this_u - last_u;
let c = if du < 1e-9 {
this_c
} else {
last_c.lerp(&this_c, (u - last_u) / du)
};
c.to_premul_u32()
})
}
#[derive(Copy, Clone, Debug)]
struct ColorF64([f64; 4]);
impl ColorF64 {
fn from_color(color: Color) -> Self {
Self([
color.r as f64 / 255.0,
color.g as f64 / 255.0,
color.b as f64 / 255.0,
color.a as f64 / 255.0,
])
}
fn lerp(&self, other: &Self, a: f64) -> Self {
fn l(x: f64, y: f64, a: f64) -> f64 {
x * (1.0 - a) + y * a
}
Self([
l(self.0[0], other.0[0], a),
l(self.0[1], other.0[1], a),
l(self.0[2], other.0[2], a),
l(self.0[3], other.0[3], a),
])
}
fn to_premul_u32(&self) -> u32 {
let a = self.0[3].min(1.0).max(0.0);
let r = ((self.0[0] * a).min(1.0).max(0.0) * 255.0) as u32;
let g = ((self.0[1] * a).min(1.0).max(0.0) * 255.0) as u32;
let b = ((self.0[2] * a).min(1.0).max(0.0) * 255.0) as u32;
let a = (a * 255.0) as u32;
r | (g << 8) | (b << 16) | (a << 24)
}
}

View file

@ -4,17 +4,39 @@ use bytemuck::{Pod, Zeroable};
use piet_scene::Scene;
use crate::{
engine::{BufProxy, Recording},
shaders::{self, Shaders},
engine::{BufProxy, Recording, ResourceProxy},
shaders::{self, FullShaders, Shaders},
};
const TAG_MONOID_SIZE: u64 = 12;
const TAG_MONOID_FULL_SIZE: u64 = 20;
const PATH_BBOX_SIZE: u64 = 24;
const CUBIC_SIZE: u64 = 40;
const DRAWMONOID_SIZE: u64 = 16;
const MAX_DRAWINFO_SIZE: u64 = 44;
const CLIP_BIC_SIZE: u64 = 8;
const CLIP_EL_SIZE: u64 = 32;
const CLIP_INP_SIZE: u64 = 4;
const CLIP_BBOX_SIZE: u64 = 16;
const PATH_SIZE: u64 = 32;
const DRAW_BBOX_SIZE: u64 = 16;
const BUMP_SIZE: u64 = 16;
const BIN_HEADER_SIZE: u64 = 8;
#[repr(C)]
#[derive(Clone, Copy, Zeroable, Pod)]
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
struct Config {
width_in_tiles: u32,
height_in_tiles: u32,
n_drawobj: u32,
n_path: u32,
n_clip: u32,
pathtag_base: u32,
pathdata_base: u32,
drawtag_base: u32,
drawdata_base: u32,
transform_base: u32,
linewidth_base: u32,
}
#[repr(C)]
@ -26,22 +48,39 @@ pub struct PathSegment {
next: u32,
}
fn size_to_words(byte_size: usize) -> u32 {
(byte_size / std::mem::size_of::<u32>()) as u32
}
pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
let mut recording = Recording::default();
let data = scene.data();
let n_pathtag = data.tag_stream.len();
let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG);
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
let mut tag_data: Vec<u8> = Vec::with_capacity(pathtag_padded);
tag_data.extend(&data.tag_stream);
tag_data.resize(pathtag_padded, 0);
let pathtag_buf = recording.upload(tag_data);
let mut scene: Vec<u8> = Vec::with_capacity(pathtag_padded);
let pathtag_base = size_to_words(scene.len());
scene.extend(&data.tag_stream);
scene.resize(pathtag_padded, 0);
let pathdata_base = size_to_words(scene.len());
scene.extend(&data.pathseg_stream);
let config = Config {
width_in_tiles: 64,
height_in_tiles: 64,
pathtag_base,
pathdata_base,
..Default::default()
};
let scene_buf = recording.upload(scene);
let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned());
let reduced_buf = BufProxy::new(pathtag_wgs as u64 * TAG_MONOID_SIZE);
// TODO: really only need pathtag_wgs - 1
recording.dispatch(
shaders.pathtag_reduce,
(pathtag_wgs as u32, 1, 1),
[pathtag_buf, reduced_buf],
[config_buf, scene_buf, reduced_buf],
);
let tagmonoid_buf =
@ -49,19 +88,11 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
recording.dispatch(
shaders.pathtag_scan,
(pathtag_wgs as u32, 1, 1),
[pathtag_buf, reduced_buf, tagmonoid_buf],
[config_buf, scene_buf, reduced_buf, tagmonoid_buf],
);
let path_coarse_wgs = (data.n_pathseg + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
// The clone here is kinda BS, think about reducing copies
// Of course, we'll probably end up concatenating into a single scene binding.
let pathdata_buf = recording.upload(data.pathseg_stream.clone());
//let cubics_buf = BufProxy::new(data.n_pathseg as u64 * 32);
let config = Config {
width_in_tiles: 64,
height_in_tiles: 64,
};
let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned());
let path_coarse_wgs =
(n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
// TODO: more principled size calc
let tiles_buf = BufProxy::new(4097 * 8);
let segments_buf = BufProxy::new(256 * 24);
@ -70,10 +101,9 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
shaders.path_coarse,
(path_coarse_wgs, 1, 1),
[
pathtag_buf,
tagmonoid_buf,
pathdata_buf,
config_buf,
scene_buf,
tagmonoid_buf,
tiles_buf,
segments_buf,
],
@ -95,6 +125,280 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
(recording, out_buf)
}
pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy) {
let mut recording = Recording::default();
let mut ramps = crate::ramp::RampCache::default();
let mut drawdata_patches: Vec<(usize, u32)> = vec![];
let data = scene.data();
let stop_data = &data.resources.stops;
for patch in &data.resources.patches {
use piet_scene::ResourcePatch;
match patch {
ResourcePatch::Ramp { offset, stops } => {
let ramp_id = ramps.add(&stop_data[stops.clone()]);
drawdata_patches.push((*offset, ramp_id));
}
}
}
let gradient_image = if drawdata_patches.is_empty() {
ResourceProxy::new_image(1, 1)
} else {
let data = ramps.data();
let width = ramps.width();
let height = ramps.height();
let data: &[u8] = bytemuck::cast_slice(data);
println!(
"gradient image: {}x{} ({} bytes)",
width,
height,
data.len()
);
ResourceProxy::Image(recording.upload_image(width, height, data))
};
let n_pathtag = data.tag_stream.len();
let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG);
// TODO: can compute size accurately, avoid reallocation
let mut scene: Vec<u8> = Vec::with_capacity(pathtag_padded);
let pathtag_base = size_to_words(scene.len());
scene.extend(&data.tag_stream);
scene.resize(pathtag_padded, 0);
let pathdata_base = size_to_words(scene.len());
scene.extend(&data.pathseg_stream);
let drawtag_base = size_to_words(scene.len());
scene.extend(bytemuck::cast_slice(&data.drawtag_stream));
let drawdata_base = size_to_words(scene.len());
if !drawdata_patches.is_empty() {
let mut pos = 0;
for patch in drawdata_patches {
let offset = patch.0;
let value = patch.1;
if pos < offset {
scene.extend_from_slice(&data.drawdata_stream[pos..offset]);
}
scene.extend_from_slice(bytemuck::bytes_of(&value));
pos = offset + 4;
}
if pos < data.drawdata_stream.len() {
scene.extend_from_slice(&data.drawdata_stream[pos..])
}
} else {
scene.extend(&data.drawdata_stream);
}
let transform_base = size_to_words(scene.len());
scene.extend(bytemuck::cast_slice(&data.transform_stream));
let linewidth_base = size_to_words(scene.len());
scene.extend(bytemuck::cast_slice(&data.linewidth_stream));
let n_path = data.n_path;
// TODO: calculate for real when we do rectangles
let n_drawobj = n_path;
let n_clip = data.n_clip;
let config = Config {
width_in_tiles: 64,
height_in_tiles: 64,
n_drawobj,
n_path,
n_clip,
pathtag_base,
pathdata_base,
drawtag_base,
drawdata_base,
transform_base,
linewidth_base,
};
println!("{:?}", config);
let scene_buf = ResourceProxy::Buf(recording.upload(scene));
let config_buf = ResourceProxy::Buf(recording.upload(bytemuck::bytes_of(&config).to_owned()));
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
let reduced_buf = ResourceProxy::new_buf(pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE);
// TODO: really only need pathtag_wgs - 1
recording.dispatch(
shaders.pathtag_reduce,
(pathtag_wgs as u32, 1, 1),
[config_buf, scene_buf, reduced_buf],
);
let tagmonoid_buf = ResourceProxy::new_buf(
pathtag_wgs as u64 * shaders::PATHTAG_REDUCE_WG as u64 * TAG_MONOID_FULL_SIZE,
);
recording.dispatch(
shaders.pathtag_scan,
(pathtag_wgs as u32, 1, 1),
[config_buf, scene_buf, reduced_buf, tagmonoid_buf],
);
let drawobj_wgs = (n_drawobj + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG;
let path_bbox_buf = ResourceProxy::new_buf(n_path as u64 * PATH_BBOX_SIZE);
recording.dispatch(
shaders.bbox_clear,
(drawobj_wgs, 1, 1),
[config_buf, path_bbox_buf],
);
let cubic_buf = ResourceProxy::new_buf(n_pathtag as u64 * CUBIC_SIZE);
let path_coarse_wgs =
(n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
recording.dispatch(
shaders.pathseg,
(path_coarse_wgs, 1, 1),
[
config_buf,
scene_buf,
tagmonoid_buf,
path_bbox_buf,
cubic_buf,
],
);
let draw_reduced_buf = ResourceProxy::new_buf(drawobj_wgs as u64 * DRAWMONOID_SIZE);
recording.dispatch(
shaders.draw_reduce,
(drawobj_wgs, 1, 1),
[config_buf, scene_buf, draw_reduced_buf],
);
let draw_monoid_buf = ResourceProxy::new_buf(n_drawobj as u64 * DRAWMONOID_SIZE);
let info_buf = ResourceProxy::new_buf(n_drawobj as u64 * MAX_DRAWINFO_SIZE);
let clip_inp_buf = ResourceProxy::new_buf(data.n_clip as u64 * CLIP_INP_SIZE);
recording.dispatch(
shaders.draw_leaf,
(drawobj_wgs, 1, 1),
[
config_buf,
scene_buf,
draw_reduced_buf,
path_bbox_buf,
draw_monoid_buf,
info_buf,
clip_inp_buf,
],
);
let clip_el_buf = ResourceProxy::new_buf(data.n_clip as u64 * CLIP_EL_SIZE);
let clip_bic_buf =
ResourceProxy::new_buf((n_clip / shaders::CLIP_REDUCE_WG) as u64 * CLIP_BIC_SIZE);
let clip_wg_reduce = n_clip.saturating_sub(1) / shaders::CLIP_REDUCE_WG;
if clip_wg_reduce > 0 {
recording.dispatch(
shaders.clip_reduce,
(clip_wg_reduce, 1, 1),
[
config_buf,
clip_inp_buf,
path_bbox_buf,
clip_bic_buf,
clip_el_buf,
],
);
}
let clip_wg = (n_clip + shaders::CLIP_REDUCE_WG - 1) / shaders::CLIP_REDUCE_WG;
let clip_bbox_buf = ResourceProxy::new_buf(n_clip as u64 * CLIP_BBOX_SIZE);
if clip_wg > 0 {
recording.dispatch(
shaders.clip_leaf,
(clip_wg, 1, 1),
[
config_buf,
clip_inp_buf,
path_bbox_buf,
clip_bic_buf,
clip_el_buf,
draw_monoid_buf,
clip_bbox_buf,
],
);
}
let draw_bbox_buf = ResourceProxy::new_buf(n_path as u64 * DRAW_BBOX_SIZE);
let bump_buf = BufProxy::new(BUMP_SIZE);
let bin_data_buf = ResourceProxy::new_buf(1 << 20);
let width_in_bins = (config.width_in_tiles + 15) / 16;
let height_in_bins = (config.height_in_tiles + 15) / 16;
let n_bins = width_in_bins * height_in_bins;
let bin_header_buf = ResourceProxy::new_buf((n_bins * drawobj_wgs) as u64 * BIN_HEADER_SIZE);
recording.clear_all(bump_buf);
let bump_buf = ResourceProxy::Buf(bump_buf);
recording.dispatch(
shaders.binning,
(drawobj_wgs, 1, 1),
[
config_buf,
draw_monoid_buf,
path_bbox_buf,
clip_bbox_buf,
draw_bbox_buf,
bump_buf,
bin_data_buf,
bin_header_buf,
],
);
let path_buf = ResourceProxy::new_buf(n_path as u64 * PATH_SIZE);
let tile_buf = ResourceProxy::new_buf(1 << 20);
let path_wgs = (n_path + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG;
recording.dispatch(
shaders.tile_alloc,
(path_wgs, 1, 1),
[
config_buf,
scene_buf,
draw_bbox_buf,
bump_buf,
path_buf,
tile_buf,
],
);
let segments_buf = ResourceProxy::new_buf(1 << 24);
recording.dispatch(
shaders.path_coarse,
(path_coarse_wgs, 1, 1),
[
config_buf,
scene_buf,
tagmonoid_buf,
cubic_buf,
path_buf,
bump_buf,
tile_buf,
segments_buf,
],
);
recording.dispatch(
shaders.backdrop,
(path_wgs, 1, 1),
[config_buf, path_buf, tile_buf],
);
let ptcl_buf = ResourceProxy::new_buf(1 << 24);
recording.dispatch(
shaders.coarse,
(width_in_bins, height_in_bins, 1),
[
config_buf,
scene_buf,
draw_monoid_buf,
bin_header_buf,
bin_data_buf,
path_buf,
tile_buf,
info_buf,
bump_buf,
ptcl_buf,
],
);
let out_buf_size = config.width_in_tiles * config.height_in_tiles * 1024;
let out_buf = BufProxy::new(out_buf_size as u64);
recording.dispatch(
shaders.fine,
(config.width_in_tiles, config.height_in_tiles, 1),
[
config_buf,
tile_buf,
segments_buf,
ResourceProxy::Buf(out_buf),
ptcl_buf,
gradient_image,
],
);
let download_buf = out_buf;
recording.download(download_buf);
(recording, download_buf)
}
pub fn align_up(len: usize, alignment: u32) -> usize {
len + (len.wrapping_neg() & alignment as usize - 1)
}

View file

@ -25,7 +25,10 @@ use wgpu::Device;
use crate::engine::{BindType, Engine, Error, ShaderId};
pub const PATHTAG_REDUCE_WG: u32 = 256;
pub const PATH_BBOX_WG: u32 = 256;
pub const PATH_COARSE_WG: u32 = 256;
pub const PATH_DRAWOBJ_WG: u32 = 256;
pub const CLIP_REDUCE_WG: u32 = 256;
pub struct Shaders {
pub pathtag_reduce: ShaderId,
@ -35,6 +38,24 @@ pub struct Shaders {
pub fine: ShaderId,
}
// Shaders for the full pipeline
pub struct FullShaders {
pub pathtag_reduce: ShaderId,
pub pathtag_scan: ShaderId,
pub bbox_clear: ShaderId,
pub pathseg: ShaderId,
pub draw_reduce: ShaderId,
pub draw_leaf: ShaderId,
pub clip_reduce: ShaderId,
pub clip_leaf: ShaderId,
pub binning: ShaderId,
pub tile_alloc: ShaderId,
pub path_coarse: ShaderId,
pub backdrop: ShaderId,
pub coarse: ShaderId,
pub fine: ShaderId,
}
pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Error> {
let shader_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/shader"));
let imports = preprocess::get_imports(shader_dir);
@ -44,12 +65,17 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Err
let pathtag_reduce = engine.add_shader(
device,
preprocess::preprocess(&read_shader("pathtag_reduce"), &empty, &imports).into(),
&[BindType::BufReadOnly, BindType::Buffer],
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
],
)?;
let pathtag_scan = engine.add_shader(
device,
preprocess::preprocess(&read_shader("pathtag_scan"), &empty, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
@ -65,7 +91,6 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Err
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
BindType::Buffer,
],
@ -93,3 +118,188 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Err
fine,
})
}
pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders, Error> {
let shader_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/shader"));
let imports = preprocess::get_imports(shader_dir);
let read_shader =
|path: &str| fs::read_to_string(shader_dir.join(path.to_string() + ".wgsl")).unwrap();
let empty = HashSet::new();
let mut full_config = HashSet::new();
full_config.insert("full".into());
let pathtag_reduce = engine.add_shader(
device,
preprocess::preprocess(&read_shader("pathtag_reduce"), &full_config, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
],
)?;
let pathtag_scan = engine.add_shader(
device,
preprocess::preprocess(&read_shader("pathtag_scan"), &full_config, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
],
)?;
let bbox_clear = engine.add_shader(
device,
preprocess::preprocess(&read_shader("bbox_clear"), &empty, &imports).into(),
&[BindType::BufReadOnly, BindType::Buffer],
)?;
let pathseg = engine.add_shader(
device,
preprocess::preprocess(&read_shader("pathseg"), &full_config, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
BindType::Buffer,
],
)?;
let draw_reduce = engine.add_shader(
device,
preprocess::preprocess(&read_shader("draw_reduce"), &empty, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
],
)?;
let draw_leaf = engine.add_shader(
device,
preprocess::preprocess(&read_shader("draw_leaf"), &empty, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
BindType::Buffer,
BindType::Buffer,
],
)?;
let clip_reduce = engine.add_shader(
device,
preprocess::preprocess(&read_shader("clip_reduce"), &empty, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
BindType::Buffer,
],
)?;
let clip_leaf = engine.add_shader(
device,
preprocess::preprocess(&read_shader("clip_leaf"), &empty, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
BindType::Buffer,
],
)?;
let binning = engine.add_shader(
device,
preprocess::preprocess(&read_shader("binning"), &empty, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
BindType::Buffer,
BindType::Buffer,
BindType::Buffer,
],
)?;
let tile_alloc = engine.add_shader(
device,
preprocess::preprocess(&read_shader("tile_alloc"), &empty, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
BindType::Buffer,
BindType::Buffer,
],
)?;
let path_coarse = engine.add_shader(
device,
preprocess::preprocess(&read_shader("path_coarse_full"), &full_config, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
BindType::Buffer,
BindType::Buffer,
],
)?;
let backdrop = engine.add_shader(
device,
preprocess::preprocess(&read_shader("backdrop_dyn"), &empty, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
],
)?;
let coarse = engine.add_shader(
device,
preprocess::preprocess(&read_shader("coarse"), &empty, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
BindType::Buffer,
],
)?;
let fine = engine.add_shader(
device,
preprocess::preprocess(&read_shader("fine"), &full_config, &imports).into(),
&[
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::BufReadOnly,
BindType::Buffer,
BindType::BufReadOnly,
BindType::ImageRead,
],
)?;
Ok(FullShaders {
pathtag_reduce,
pathtag_scan,
bbox_clear,
pathseg,
draw_reduce,
draw_leaf,
clip_reduce,
clip_leaf,
binning,
tile_alloc,
path_coarse,
backdrop,
coarse,
fine,
})
}

View file

@ -14,24 +14,54 @@
//
// Also licensed under MIT license, at your choice.
use piet_scene::{Affine, Brush, Color, Fill, PathElement, Point, Scene, SceneBuilder};
use kurbo::BezPath;
use piet_scene::{
Affine, BlendMode, Brush, Color, Compose, ExtendMode, Fill, GradientStop, LinearGradient, Mix,
PathElement, Point, RadialGradient, Rect, Scene, SceneBuilder, SceneFragment, Stroke,
};
use crate::pico_svg::PicoSvg;
pub fn gen_test_scene() -> Scene {
let mut scene = Scene::default();
let mut builder = SceneBuilder::for_scene(&mut scene);
let path = [
PathElement::MoveTo(Point::new(100.0, 100.0)),
PathElement::LineTo(Point::new(500.0, 120.0)),
PathElement::LineTo(Point::new(300.0, 150.0)),
PathElement::LineTo(Point::new(200.0, 260.0)),
PathElement::LineTo(Point::new(150.0, 210.0)),
PathElement::Close,
];
let brush = Brush::Solid(Color::rgb8(0x80, 0x80, 0x80));
builder.fill(Fill::NonZero, Affine::IDENTITY, &brush, None, &path);
let scene_ix = 1;
match scene_ix {
0 => {
let path = [
PathElement::MoveTo(Point::new(100.0, 100.0)),
PathElement::LineTo(Point::new(500.0, 120.0)),
PathElement::LineTo(Point::new(300.0, 150.0)),
PathElement::LineTo(Point::new(200.0, 260.0)),
PathElement::LineTo(Point::new(150.0, 210.0)),
PathElement::Close,
];
let brush = Brush::Solid(Color::rgb8(0x40, 0x40, 0xff));
builder.fill(Fill::NonZero, Affine::IDENTITY, &brush, None, &path);
let transform = Affine::translate(50.0, 50.0);
let brush = Brush::Solid(Color::rgba8(0xff, 0xff, 0x00, 0x80));
builder.fill(Fill::NonZero, transform, &brush, None, &path);
let transform = Affine::translate(100.0, 100.0);
let style = simple_stroke(1.0);
let brush = Brush::Solid(Color::rgb8(0xa0, 0x00, 0x00));
builder.stroke(&style, transform, &brush, None, &path);
}
1 => {
render_blend_grid(&mut builder);
}
_ => {
let xml_str =
std::str::from_utf8(include_bytes!("../../piet-gpu/Ghostscript_Tiger.svg"))
.unwrap();
let svg = PicoSvg::load(xml_str, 6.0).unwrap();
render_svg(&mut builder, &svg, false);
}
}
builder.finish();
scene
}
#[allow(unused)]
pub fn dump_scene_info(scene: &Scene) {
let data = scene.data();
println!("tags {:?}", data.tag_stream);
@ -40,3 +70,212 @@ pub fn dump_scene_info(scene: &Scene) {
bytemuck::cast_slice::<u8, f32>(&data.pathseg_stream)
);
}
pub fn render_svg(sb: &mut SceneBuilder, svg: &PicoSvg, print_stats: bool) {
use crate::pico_svg::*;
let start = std::time::Instant::now();
for item in svg.items.iter() {
match item {
Item::Fill(fill) => {
sb.fill(
Fill::NonZero,
Affine::IDENTITY,
&fill.color.into(),
None,
convert_bez_path(&fill.path),
);
}
Item::Stroke(stroke) => {
sb.stroke(
&simple_stroke(stroke.width as f32),
Affine::IDENTITY,
&stroke.color.into(),
None,
convert_bez_path(&stroke.path),
);
}
}
}
if print_stats {
println!("flattening and encoding time: {:?}", start.elapsed());
}
}
fn convert_bez_path<'a>(path: &'a BezPath) -> impl Iterator<Item = PathElement> + 'a + Clone {
path.elements()
.iter()
.map(|el| PathElement::from_kurbo(*el))
}
fn simple_stroke(width: f32) -> Stroke<[f32; 0]> {
Stroke {
width,
join: piet_scene::Join::Round,
miter_limit: 1.4,
start_cap: piet_scene::Cap::Round,
end_cap: piet_scene::Cap::Round,
dash_pattern: [],
dash_offset: 0.0,
scale: true,
}
}
#[allow(unused)]
pub fn render_blend_grid(sb: &mut SceneBuilder) {
const BLEND_MODES: &[Mix] = &[
Mix::Normal,
Mix::Multiply,
Mix::Darken,
Mix::Screen,
Mix::Lighten,
Mix::Overlay,
Mix::ColorDodge,
Mix::ColorBurn,
Mix::HardLight,
Mix::SoftLight,
Mix::Difference,
Mix::Exclusion,
Mix::Hue,
Mix::Saturation,
Mix::Color,
Mix::Luminosity,
];
for (ix, &blend) in BLEND_MODES.iter().enumerate() {
let i = ix % 4;
let j = ix / 4;
let transform = Affine::translate(i as f32 * 225., j as f32 * 225.);
let square = blend_square(blend.into());
sb.append(&square, Some(transform));
}
}
#[allow(unused)]
fn render_blend_square(sb: &mut SceneBuilder, blend: BlendMode, transform: Affine) {
// Inspired by https://developer.mozilla.org/en-US/docs/Web/CSS/mix-blend-mode
let rect = Rect::from_origin_size(Point::new(0., 0.), 200., 200.);
let stops = &[
GradientStop {
color: Color::rgb8(0, 0, 0),
offset: 0.0,
},
GradientStop {
color: Color::rgb8(255, 255, 255),
offset: 1.0,
},
][..];
let linear = Brush::LinearGradient(LinearGradient {
start: Point::new(0.0, 0.0),
end: Point::new(200.0, 0.0),
stops: stops.into(),
extend: ExtendMode::Pad,
});
sb.fill(Fill::NonZero, transform, &linear, None, rect.elements());
const GRADIENTS: &[(f32, f32, Color)] = &[
(150., 0., Color::rgb8(255, 240, 64)),
(175., 100., Color::rgb8(255, 96, 240)),
(125., 200., Color::rgb8(64, 192, 255)),
];
for (x, y, c) in GRADIENTS {
let mut color2 = c.clone();
color2.a = 0;
let stops = &[
GradientStop {
color: c.clone(),
offset: 0.0,
},
GradientStop {
color: color2,
offset: 1.0,
},
][..];
let rad = Brush::RadialGradient(RadialGradient {
center0: Point::new(*x, *y),
center1: Point::new(*x, *y),
radius0: 0.0,
radius1: 100.0,
stops: stops.into(),
extend: ExtendMode::Pad,
});
sb.fill(Fill::NonZero, transform, &rad, None, rect.elements());
}
const COLORS: &[Color] = &[
Color::rgb8(255, 0, 0),
Color::rgb8(0, 255, 0),
Color::rgb8(0, 0, 255),
];
sb.push_layer(Mix::Normal.into(), transform, rect.elements());
for (i, c) in COLORS.iter().enumerate() {
let stops = &[
GradientStop {
color: Color::rgb8(255, 255, 255),
offset: 0.0,
},
GradientStop {
color: c.clone(),
offset: 1.0,
},
][..];
let linear = Brush::LinearGradient(LinearGradient {
start: Point::new(0.0, 0.0),
end: Point::new(0.0, 200.0),
stops: stops.into(),
extend: ExtendMode::Pad,
});
sb.push_layer(blend, transform, rect.elements());
// squash the ellipse
let a = transform
* Affine::translate(100., 100.)
* Affine::rotate(std::f32::consts::FRAC_PI_3 * (i * 2 + 1) as f32)
* Affine::scale(1.0, 0.357)
* Affine::translate(-100., -100.);
sb.fill(
Fill::NonZero,
a,
&linear,
None,
make_ellipse(100., 100., 90., 90.),
);
sb.pop_layer();
}
sb.pop_layer();
}
#[allow(unused)]
fn blend_square(blend: BlendMode) -> SceneFragment {
let mut fragment = SceneFragment::default();
let mut sb = SceneBuilder::for_fragment(&mut fragment);
render_blend_square(&mut sb, blend, Affine::IDENTITY);
sb.finish();
fragment
}
fn make_ellipse(cx: f32, cy: f32, rx: f32, ry: f32) -> impl Iterator<Item = PathElement> + Clone {
let a = 0.551915024494;
let arx = a * rx;
let ary = a * ry;
let elements = [
PathElement::MoveTo(Point::new(cx + rx, cy)),
PathElement::CurveTo(
Point::new(cx + rx, cy + ary),
Point::new(cx + arx, cy + ry),
Point::new(cx, cy + ry),
),
PathElement::CurveTo(
Point::new(cx - arx, cy + ry),
Point::new(cx - rx, cy + ary),
Point::new(cx - rx, cy),
),
PathElement::CurveTo(
Point::new(cx - rx, cy - ary),
Point::new(cx - arx, cy - ry),
Point::new(cx, cy - ry),
),
PathElement::CurveTo(
Point::new(cx + arx, cy - ry),
Point::new(cx + rx, cy - ary),
Point::new(cx + rx, cy),
),
PathElement::Close,
];
(0..elements.len()).map(move |i| elements[i])
}