diff --git a/.vscode/settings.json b/.vscode/settings.json index 323cd81..b8fa532 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,8 +1,16 @@ { "wgsl-analyzer.customImports": { + "bbox": "${workspaceFolder}/piet-wgsl/shader/shared/bbox.wgsl", + "bump": "${workspaceFolder}/piet-wgsl/shader/shared/bump.wgsl", + "clip": "${workspaceFolder}/piet-wgsl/shader/shared/clip.wgsl", "config": "${workspaceFolder}/piet-wgsl/shader/shared/config.wgsl", + "cubic": "${workspaceFolder}/piet-wgsl/shader/shared/cubic.wgsl", + "drawtag": "${workspaceFolder}/piet-wgsl/shader/shared/drawtag.wgsl", "segment": "${workspaceFolder}/piet-wgsl/shader/shared/segment.wgsl", - "pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl" + "pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl", + "ptcl": "${workspaceFolder}/piet-wgsl/shader/shared/ptcl.wgsl", + "tile": "${workspaceFolder}/piet-wgsl/shader/shared/tile.wgsl" }, - "wgsl-analyzer.diagnostics.nagaVersion": "main" + "wgsl-analyzer.diagnostics.nagaVersion": "main", + "wgsl-analyzer.preprocessor.shaderDefs": ["full"] } diff --git a/Cargo.lock b/Cargo.lock index f2669e2..d82ba57 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1174,10 +1174,12 @@ dependencies = [ "bytemuck", "env_logger", "futures-intrusive", + "kurbo 0.8.3", "parking_lot", "piet-scene", "png", "pollster", + "roxmltree", "wgpu", ] diff --git a/piet-wgsl/Cargo.toml b/piet-wgsl/Cargo.toml index e7fa42d..b785eac 100644 --- a/piet-wgsl/Cargo.toml +++ b/piet-wgsl/Cargo.toml @@ -14,4 +14,8 @@ parking_lot = "0.12" bytemuck = { version = "1.12.1", features = ["derive"] } png = "0.17.6" -piet-scene = { path = "../piet-scene" } +piet-scene = { path = "../piet-scene", features = ["kurbo"] } + +# for picosvg, should be split out +roxmltree = "0.13" +kurbo = "0.8.3" diff --git a/piet-wgsl/shader/backdrop_dyn.wgsl b/piet-wgsl/shader/backdrop_dyn.wgsl new file mode 100644 index 0000000..ab094e6 --- /dev/null +++ b/piet-wgsl/shader/backdrop_dyn.wgsl @@ -0,0 +1,84 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// Prefix sum for dynamically allocated backdrops + +#import config +#import tile + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var paths: array; + +@group(0) @binding(2) +var tiles: array; + +let WG_SIZE = 256u; + +var sh_row_width: array; +var sh_row_count: array; +var sh_offset: array; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, +) { + let drawobj_ix = global_id.x; + var row_count = 0u; + if drawobj_ix < config.n_drawobj { + // TODO: when rectangles, path and draw obj are not the same + let path = paths[drawobj_ix]; + sh_row_width[local_id.x] = path.bbox.z - path.bbox.x; + row_count = path.bbox.w - path.bbox.y; + sh_offset[local_id.x] = path.tiles; + } + sh_row_count[local_id.x] = row_count; + + // Prefix sum of row counts + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + workgroupBarrier(); + if local_id.x >= (1u << i) { + row_count += sh_row_count[local_id.x - (1u << i)]; + } + workgroupBarrier(); + sh_row_count[local_id.x] = row_count; + } + workgroupBarrier(); + let total_rows = sh_row_count[WG_SIZE - 1u]; + for (var row = local_id.x; row < total_rows; row += WG_SIZE) { + var el_ix = 0u; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + let probe = el_ix + ((WG_SIZE / 2u) >> i); + if row >= sh_row_count[probe - 1u] { + el_ix = probe; + } + } + let width = sh_row_width[el_ix]; + if width > 0u { + var seq_ix = row - select(0u, sh_row_count[el_ix - 1u], el_ix > 0u); + var tile_ix = sh_offset[el_ix] + seq_ix * width; + var sum = tiles[tile_ix].backdrop; + for (var x = 1u; x < width; x += 1u) { + tile_ix += 1u; + sum += tiles[tile_ix].backdrop; + tiles[tile_ix].backdrop = sum; + } + } + } +} diff --git a/piet-wgsl/shader/bbox_clear.wgsl b/piet-wgsl/shader/bbox_clear.wgsl new file mode 100644 index 0000000..69a8b63 --- /dev/null +++ b/piet-wgsl/shader/bbox_clear.wgsl @@ -0,0 +1,45 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +#import config + +@group(0) @binding(0) +var config: Config; + +struct PathBbox { + x0: i32, + y0: i32, + x1: i32, + y1: i32, + linewidth: f32, + trans_ix: u32, +} + +@group(0) @binding(1) +var path_bboxes: array; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, +) { + let ix = global_id.x; + if ix < config.n_path { + path_bboxes[ix].x0 = 0x7fffffff; + path_bboxes[ix].y0 = 0x7fffffff; + path_bboxes[ix].x1 = -0x80000000; + path_bboxes[ix].y1 = -0x80000000; + } +} diff --git a/piet-wgsl/shader/binning.wgsl b/piet-wgsl/shader/binning.wgsl new file mode 100644 index 0000000..6fd6284 --- /dev/null +++ b/piet-wgsl/shader/binning.wgsl @@ -0,0 +1,164 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// The binning stage + +#import config +#import drawtag +#import bbox +#import bump + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var draw_monoids: array; + +@group(0) @binding(2) +var path_bbox_buf: array; + +@group(0) @binding(3) +var clip_bbox_buf: array>; + +@group(0) @binding(4) +var intersected_bbox: array>; + +@group(0) @binding(5) +var bump: BumpAllocators; + +@group(0) @binding(6) +var bin_data: array; + +// TODO: put in common place +struct BinHeader { + element_count: u32, + chunk_offset: u32, +} + +@group(0) @binding(7) +var bin_header: array; + +// conversion factors from coordinates to bin +let SX = 0.00390625; +let SY = 0.00390625; +//let SX = 1.0 / f32(N_TILE_X * TILE_WIDTH); +//let SY = 1.0 / f32(N_TILE_Y * TILE_HEIGHT); + +let WG_SIZE = 256u; +let N_SLICE = 8u; +//let N_SLICE = WG_SIZE / 32u; + +var sh_bitmaps: array, N_TILE>, N_SLICE>; +var sh_count: array, N_SLICE>; +var sh_chunk_offset: array; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) wg_id: vec3, +) { + for (var i = 0u; i < N_SLICE; i += 1u) { + atomicStore(&sh_bitmaps[i][local_id.x], 0u); + } + + // Read inputs and determine coverage of bins + let element_ix = global_id.x; + var x0 = 0; + var y0 = 0; + var x1 = 0; + var y1 = 0; + if element_ix < config.n_drawobj { + let draw_monoid = draw_monoids[element_ix]; + var clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9); + if draw_monoid.clip_ix > 0u { + clip_bbox = clip_bbox_buf[draw_monoid.clip_ix - 1u]; + } + // For clip elements, clip_box is the bbox of the clip path, + // intersected with enclosing clips. + // For other elements, it is the bbox of the enclosing clips. + // TODO check this is true + + let path_bbox = path_bbox_buf[draw_monoid.path_ix]; + let pb = vec4(vec4(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1)); + let bbox_raw = bbox_intersect(clip_bbox, pb); + // TODO(naga): clunky expression a workaround for broken lhs swizzle + let bbox = vec4(bbox_raw.xy, max(bbox_raw.xy, bbox_raw.zw)); + + intersected_bbox[element_ix] = bbox; + x0 = i32(floor(bbox.x * SX)); + y0 = i32(floor(bbox.y * SY)); + x1 = i32(ceil(bbox.z * SX)); + y1 = i32(ceil(bbox.w * SY)); + } + let width_in_bins = i32((config.width_in_tiles + N_TILE_X - 1u) / N_TILE_X); + let height_in_bins = i32((config.height_in_tiles + N_TILE_Y - 1u) / N_TILE_Y); + x0 = clamp(x0, 0, width_in_bins); + y0 = clamp(y0, 0, height_in_bins); + x1 = clamp(x1, 0, width_in_bins); + y1 = clamp(y1, 0, height_in_bins); + if x0 == x1 { + y1 = y0; + } + var x = x0; + var y = y0; + let my_slice = local_id.x / 32u; + let my_mask = 1u << (local_id.x & 31u); + while y < y1 { + atomicOr(&sh_bitmaps[my_slice][y * width_in_bins + x], my_mask); + x += 1; + if x == x1 { + x = x0; + y += 1; + } + } + + workgroupBarrier(); + // Allocate output segments + var element_count = 0u; + for (var i = 0u; i < N_SLICE; i += 1u) { + element_count += countOneBits(atomicLoad(&sh_bitmaps[i][local_id.x])); + sh_count[i][local_id.x] = element_count; + } + // element_count is the number of draw objects covering this thread's bin + let chunk_offset = atomicAdd(&bump.binning, element_count); + sh_chunk_offset[local_id.x] = chunk_offset; + bin_header[global_id.x].element_count = element_count; + bin_header[global_id.x].chunk_offset = chunk_offset; + workgroupBarrier(); + + // loop over bbox of bins touched by this draw object + x = x0; + y = y0; + while y < y1 { + let bin_ix = y * width_in_bins + x; + let out_mask = atomicLoad(&sh_bitmaps[my_slice][bin_ix]); + // I think this predicate will always be true... + if (out_mask & my_mask) != 0u { + var idx = countOneBits(out_mask & (my_mask - 1u)); + if my_slice > 0u { + idx += sh_count[my_slice - 1u][bin_ix]; + } + let offset = sh_chunk_offset[bin_ix]; + bin_data[offset + idx] = element_ix; + } + x += 1; + if x == x1 { + x = x0; + y += 1; + } + } +} diff --git a/piet-wgsl/shader/clip_leaf.wgsl b/piet-wgsl/shader/clip_leaf.wgsl new file mode 100644 index 0000000..d935550 --- /dev/null +++ b/piet-wgsl/shader/clip_leaf.wgsl @@ -0,0 +1,199 @@ + +#import config +#import bbox +#import clip +#import drawtag + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var clip_inp: array; + +@group(0) @binding(2) +var path_bboxes: array; + +@group(0) @binding(3) +var reduced: array; + +@group(0) @binding(4) +var clip_els: array; + +@group(0) @binding(5) +var draw_monoids: array; + +@group(0) @binding(6) +var clip_bboxes: array>; + +let WG_SIZE = 256u; +var sh_bic: array; +var sh_stack: array; +var sh_stack_bbox: array, WG_SIZE>; +var sh_bbox: array, WG_SIZE>; +var sh_link: array; + +fn search_link(bic: ptr, ix: u32) -> i32 { + var ix = ix; + var j = 0u; + while j < firstTrailingBit(WG_SIZE) { + let base = 2u * WG_SIZE - (2u << (firstTrailingBit(WG_SIZE) - j)); + if ((ix >> j) & 1u) != 0u { + let test = bic_combine(sh_bic[base + (ix >> j) - 1u], *bic); + if test.b > 0u { + break; + } + *bic = test; + ix -= 1u << j; + } + j += 1u; + } + if ix > 0u { + while j > 0u { + j -= 1u; + let base = 2u * WG_SIZE - (2u << (firstTrailingBit(WG_SIZE) - j)); + let test = bic_combine(sh_bic[base + (ix >> j) - 1u], *bic); + if test.b == 0u { + *bic = test; + ix -= 1u << j; + } + } + } + if ix > 0u { + return i32(ix) - 1; + } else { + return i32(~0u - (*bic).a); + } +} + +fn load_clip_inp(ix: u32) -> i32 { + if ix < config.n_clip { + return clip_inp[ix]; + } else { + return -2147483648; + // literal too large? + // return 0x80000000; + } +} + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) wg_id: vec3, +) { + var bic: Bic; + if local_id.x < wg_id.x { + bic = reduced[local_id.x]; + } + sh_bic[local_id.x] = bic; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + workgroupBarrier(); + if local_id.x + (1u << i) < WG_SIZE { + let other = sh_bic[local_id.x + (1u << i)]; + bic = bic_combine(bic, other); + } + workgroupBarrier(); + sh_bic[local_id.x] = bic; + } + workgroupBarrier(); + let stack_size = sh_bic[0].b; + // TODO: if stack depth > WG_SIZE desired, scan here + + // binary search in stack + let sp = WG_SIZE - 1u - local_id.x; + var ix = 0u; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + let probe = ix + ((WG_SIZE / 2u) >> i); + if sp < sh_bic[probe].b { + ix = probe; + } + } + let b = sh_bic[ix].b; + var bbox = vec4(-1e9, -1e9, 1e9, 1e9); + if sp < b { + let el = clip_els[ix * WG_SIZE + b - sp - 1u]; + sh_stack[local_id.x] = el.parent_ix; + bbox = el.bbox; + } + // forward scan of bbox values of prefix stack + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + sh_stack_bbox[local_id.x] = bbox; + workgroupBarrier(); + if local_id.x >= (1u << i) { + bbox = bbox_intersect(sh_stack_bbox[local_id.x - (1u << i)], bbox); + } + workgroupBarrier(); + } + sh_stack_bbox[local_id.x] = bbox; + + // Read input and compute Bic binary tree + let inp = load_clip_inp(global_id.x); + let is_push = inp >= 0; + var bic = Bic(1u - u32(is_push), u32(is_push)); + sh_bic[local_id.x] = bic; + if is_push { + let path_bbox = path_bboxes[inp]; + bbox = vec4(f32(path_bbox.x0), f32(path_bbox.y0), f32(path_bbox.x1), f32(path_bbox.y1)); + } else { + bbox = vec4(-1e9, -1e9, 1e9, 1e9); + } + var inbase = 0u; + for (var i = 0u; i < firstTrailingBit(WG_SIZE) - 1u; i += 1u) { + let outbase = 2u * WG_SIZE - (1u << (firstTrailingBit(WG_SIZE) - i)); + workgroupBarrier(); + if local_id.x < 1u << (firstTrailingBit(WG_SIZE) - 1u - i) { + let in_off = inbase + local_id.x * 2u; + sh_bic[outbase + local_id.x] = bic_combine(sh_bic[in_off], sh_bic[in_off + 1u]); + } + inbase = outbase; + } + workgroupBarrier(); + // search for predecessor node + bic = Bic(); + var link = search_link(&bic, local_id.x); + sh_link[local_id.x] = link; + workgroupBarrier(); + let grandparent = select(link - 1, sh_link[link], link >= 0); + var parent: i32; + if link >= 0 { + parent = i32(wg_id.x * WG_SIZE) + link; + } else if link + i32(stack_size) >= 0 { + parent = i32(sh_stack[i32(WG_SIZE) + link]); + } else { + parent = -1; + } + // bbox scan (intersect) across parent links + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + if i != 0u { + sh_link[local_id.x] = link; + } + sh_bbox[local_id.x] = bbox; + workgroupBarrier(); + if link >= 0 { + bbox = bbox_intersect(sh_bbox[link], bbox); + link = sh_link[link]; + } + workgroupBarrier(); + } + if link + i32(stack_size) >= 0 { + bbox = bbox_intersect(sh_stack_bbox[i32(WG_SIZE) + link], bbox); + } + // At this point, bbox is the intersection of bboxes on the path to the root + sh_bbox[local_id.x] = bbox; + workgroupBarrier(); + + if !is_push && global_id.x < config.n_clip { + // Fix up drawmonoid so path_ix of EndClip matches BeginClip + let path_ix = clip_inp[parent]; + draw_monoids[~inp].path_ix = u32(path_ix); + + if grandparent >= 0 { + bbox = sh_bbox[grandparent]; + } else if grandparent + i32(stack_size) >= 0 { + bbox = sh_stack_bbox[i32(WG_SIZE) + grandparent]; + } else { + bbox = vec4(-1e9, -1e9, 1e9, 1e9); + } + } + clip_bboxes[global_id.x] = bbox; +} diff --git a/piet-wgsl/shader/clip_reduce.wgsl b/piet-wgsl/shader/clip_reduce.wgsl new file mode 100644 index 0000000..50c6402 --- /dev/null +++ b/piet-wgsl/shader/clip_reduce.wgsl @@ -0,0 +1,66 @@ + +#import config +#import bbox +#import clip + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var clip_inp: array; + +@group(0) @binding(2) +var path_bboxes: array; + +@group(0) @binding(3) +var reduced: array; + +@group(0) @binding(4) +var clip_out: array; + +let WG_SIZE = 256u; +var sh_bic: array; +var sh_parent: array; +var sh_path_ix: array; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) wg_id: vec3, +) { + let inp = clip_inp[global_id.x]; + let is_push = inp >= 0; + var bic = Bic(1u - u32(is_push), u32(is_push)); + // reverse scan of bicyclic semigroup + sh_bic[local_id.x] = bic; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + workgroupBarrier(); + if local_id.x + (1u << i) < WG_SIZE { + let other = sh_bic[local_id.x + (1u << i)]; + bic = bic_combine(bic, other); + } + workgroupBarrier(); + sh_bic[local_id.x] = bic; + } + if local_id.x == 0u { + reduced[wg_id.x] = bic; + } + workgroupBarrier(); + let size = sh_bic[0].b; + bic = Bic(); + if is_push && bic.a == 0u { + let local_ix = size - bic.b - 1u; + sh_parent[local_ix] = local_id.x; + sh_path_ix[local_ix] = u32(inp); + } + workgroupBarrier(); + // TODO: possibly do forward scan here if depth can exceed wg size + if local_id.x < size { + let path_ix = sh_path_ix[local_id.x]; + let path_bbox = path_bboxes[path_ix]; + let parent_ix = sh_parent[local_id.x] + wg_id.x * WG_SIZE; + let bbox = vec4(f32(path_bbox.x0), f32(path_bbox.y0), f32(path_bbox.x1), f32(path_bbox.y1)); + clip_out[global_id.x] = ClipEl(parent_ix, bbox); + } +} diff --git a/piet-wgsl/shader/coarse.wgsl b/piet-wgsl/shader/coarse.wgsl new file mode 100644 index 0000000..b27a215 --- /dev/null +++ b/piet-wgsl/shader/coarse.wgsl @@ -0,0 +1,448 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// The coarse rasterization stage. + +#import config +#import bump +#import drawtag +#import ptcl +#import tile + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var scene: array; + +@group(0) @binding(2) +var draw_monoids: array; + +// TODO: dedup +struct BinHeader { + element_count: u32, + chunk_offset: u32, +} + +@group(0) @binding(3) +var bin_headers: array; + +@group(0) @binding(4) +var bin_data: array; + +@group(0) @binding(5) +var paths: array; + +@group(0) @binding(6) +var tiles: array; + +@group(0) @binding(7) +var info: array; + +@group(0) @binding(8) +var bump: BumpAllocators; + +@group(0) @binding(9) +var ptcl: array; + + + +// Much of this code assumes WG_SIZE == N_TILE. If these diverge, then +// a fair amount of fixup is needed. +let WG_SIZE = 256u; +//let N_SLICE = WG_SIZE / 32u; +let N_SLICE = 8u; + +var sh_bitmaps: array, N_TILE>, N_SLICE>; +var sh_part_count: array; +var sh_part_offsets: array; +var sh_drawobj_ix: array; +var sh_tile_stride: array; +var sh_tile_width: array; +var sh_tile_x0: array; +var sh_tile_y0: array; +var sh_tile_count: array; +var sh_tile_base: array; + +// helper functions for writing ptcl + +var cmd_offset: u32; +var cmd_limit: u32; + +// Make sure there is space for a command of given size, plus a jump if needed +fn alloc_cmd(size: u32) { + if cmd_offset + size >= cmd_limit { + // We might be able to save a little bit of computation here + // by setting the initial value of the bump allocator. + let ptcl_dyn_start = config.width_in_tiles * config.height_in_tiles * PTCL_INITIAL_ALLOC; + let new_cmd = ptcl_dyn_start + atomicAdd(&bump.ptcl, PTCL_INCREMENT); + // TODO: robust memory + ptcl[cmd_offset] = CMD_JUMP; + ptcl[cmd_offset + 1u] = new_cmd; + cmd_offset = new_cmd; + cmd_limit = cmd_offset + (PTCL_INCREMENT - PTCL_HEADROOM); + } +} + +fn write_path(tile: Tile, linewidth: f32) { + // TODO: take flags + alloc_cmd(3u); + if linewidth < 0.0 { + if tile.segments != 0u { + let fill = CmdFill(tile.segments, tile.backdrop); + ptcl[cmd_offset] = CMD_FILL; + ptcl[cmd_offset + 1u] = fill.tile; + ptcl[cmd_offset + 2u] = u32(fill.backdrop); + cmd_offset += 3u; + } else { + ptcl[cmd_offset] = CMD_SOLID; + cmd_offset += 1u; + } + } else { + let stroke = CmdStroke(tile.segments, 0.5 * linewidth); + ptcl[cmd_offset] = CMD_STROKE; + ptcl[cmd_offset + 1u] = stroke.tile; + ptcl[cmd_offset + 2u] = bitcast(stroke.half_width); + cmd_offset += 3u; + } +} + +fn write_color(color: CmdColor) { + alloc_cmd(2u); + ptcl[cmd_offset] = CMD_COLOR; + ptcl[cmd_offset + 1u] = color.rgba_color; + cmd_offset += 2u; +} + +// Discussion point: these are basically copying from info to ptcl. We +// could just write an info offset and have fine bind that buffer and read +// from it. + +fn write_lin_grad(lin: CmdLinGrad) { + alloc_cmd(5u); + ptcl[cmd_offset] = CMD_LIN_GRAD; + ptcl[cmd_offset + 1u] = lin.index; + ptcl[cmd_offset + 2u] = bitcast(lin.line_x); + ptcl[cmd_offset + 3u] = bitcast(lin.line_y); + ptcl[cmd_offset + 4u] = bitcast(lin.line_c); + cmd_offset += 5u; +} + +fn write_rad_grad(rad: CmdRadGrad) { + alloc_cmd(12u); + ptcl[cmd_offset] = CMD_RAD_GRAD; + ptcl[cmd_offset + 1u] = rad.index; + ptcl[cmd_offset + 2u] = bitcast(rad.matrx.x); + ptcl[cmd_offset + 3u] = bitcast(rad.matrx.y); + ptcl[cmd_offset + 4u] = bitcast(rad.matrx.z); + ptcl[cmd_offset + 5u] = bitcast(rad.matrx.w); + ptcl[cmd_offset + 6u] = bitcast(rad.xlat.x); + ptcl[cmd_offset + 7u] = bitcast(rad.xlat.y); + ptcl[cmd_offset + 8u] = bitcast(rad.c1.x); + ptcl[cmd_offset + 9u] = bitcast(rad.c1.y); + ptcl[cmd_offset + 10u] = bitcast(rad.ra); + ptcl[cmd_offset + 11u] = bitcast(rad.roff); + cmd_offset += 12u; +} + +fn write_begin_clip() { + alloc_cmd(1u); + ptcl[cmd_offset] = CMD_BEGIN_CLIP; + cmd_offset += 1u; +} + +fn write_end_clip(blend: u32) { + alloc_cmd(2u); + ptcl[cmd_offset] = CMD_END_CLIP; + ptcl[cmd_offset + 1u] = blend; + cmd_offset += 2u; +} + +@compute @workgroup_size(256) +fn main( + @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) wg_id: vec3, +) { + let width_in_bins = (config.width_in_tiles + N_TILE_X - 1u) / N_TILE_X; + let bin_ix = width_in_bins * wg_id.y + wg_id.x; + let n_partitions = (config.n_drawobj + N_TILE - 1u) / N_TILE; + + // Coordinates of the top left of this bin, in tiles. + let bin_tile_x = N_TILE_X * wg_id.x; + let bin_tile_y = N_TILE_Y * wg_id.y; + + let tile_x = local_id.x % N_TILE_X; + let tile_y = local_id.x / N_TILE_X; + let this_tile_ix = (bin_tile_y + tile_y) * config.width_in_tiles + bin_tile_x + tile_x; + cmd_offset = this_tile_ix * PTCL_INITIAL_ALLOC; + cmd_limit = cmd_offset + (PTCL_INITIAL_ALLOC - PTCL_HEADROOM); + + // clip state + var clip_zero_depth = 0u; + var clip_depth = 0u; + + var partition_ix = 0u; + var rd_ix = 0u; + var wr_ix = 0u; + var part_start_ix = 0u; + var ready_ix = 0u; + + // blend state + var render_blend_depth = 0u; + var max_blend_depth = 0u; + + while true { + for (var i = 0u; i < N_SLICE; i += 1u) { + atomicStore(&sh_bitmaps[i][local_id.x], 0u); + } + + while true { + if ready_ix == wr_ix && partition_ix < n_partitions { + part_start_ix = ready_ix; + var count = 0u; + if partition_ix + local_id.x < n_partitions { + let in_ix = (partition_ix + local_id.x) * N_TILE + bin_ix; + let bin_header = bin_headers[in_ix]; + count = bin_header.element_count; + sh_part_offsets[local_id.x] = bin_header.chunk_offset; + } + // prefix sum the element counts + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + sh_part_count[local_id.x] = count; + workgroupBarrier(); + if local_id.x >= (1u << i) { + count += sh_part_count[local_id.x - (1u << i)]; + } + workgroupBarrier(); + } + sh_part_count[local_id.x] = part_start_ix + count; + workgroupBarrier(); + ready_ix = sh_part_count[WG_SIZE - 1u]; + partition_ix += WG_SIZE; + } + // use binary search to find draw object to read + var ix = rd_ix + local_id.x; + if ix >= wr_ix && ix < ready_ix { + var part_ix = 0u; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + let probe = part_ix + ((N_TILE / 2u) >> i); + if ix >= sh_part_count[probe - 1u] { + part_ix = probe; + } + } + ix -= select(part_start_ix, sh_part_count[part_ix - 1u], part_ix > 0u); + let offset = sh_part_offsets[part_ix]; + sh_drawobj_ix[local_id.x] = bin_data[offset + ix]; + } + wr_ix = min(rd_ix + N_TILE, ready_ix); + if wr_ix - rd_ix >= N_TILE || (wr_ix >= ready_ix && partition_ix >= n_partitions) { + break; + } + } + // At this point, sh_drawobj_ix[0.. wr_ix - rd_ix] contains merged binning results. + var tag = DRAWTAG_NOP; + var drawobj_ix: u32; + if local_id.x + rd_ix < wr_ix { + drawobj_ix = sh_drawobj_ix[local_id.x]; + tag = scene[config.drawtag_base + drawobj_ix]; + } + + var tile_count = 0u; + // I think this predicate is the same as the last, maybe they can be combined + if tag != DRAWTAG_NOP { + let path_ix = draw_monoids[drawobj_ix].path_ix; + let path = paths[path_ix]; + let stride = path.bbox.z - path.bbox.x; + sh_tile_stride[local_id.x] = stride; + let dx = i32(path.bbox.x) - i32(bin_tile_x); + let dy = i32(path.bbox.y) - i32(bin_tile_y); + let x0 = clamp(dx, 0, i32(N_TILE_X)); + let y0 = clamp(dy, 0, i32(N_TILE_Y)); + let x1 = clamp(i32(path.bbox.z) - i32(bin_tile_x), 0, i32(N_TILE_X)); + let y1 = clamp(i32(path.bbox.w) - i32(bin_tile_y), 0, i32(N_TILE_Y)); + sh_tile_width[local_id.x] = u32(x1 - x0); + sh_tile_x0[local_id.x] = u32(x0); + sh_tile_y0[local_id.x] = u32(y0); + tile_count = u32(x1 - x0) * u32(y1 - y0); + // base relative to bin + let base = path.tiles - u32(dy * i32(stride) + dx); + sh_tile_base[local_id.x] = base; + // TODO: there's a write_tile_alloc here in the source, not sure what it's supposed to do + } + + // Prefix sum of tile counts + sh_tile_count[local_id.x] = tile_count; + for (var i = 0u; i < firstTrailingBit(N_TILE); i += 1u) { + workgroupBarrier(); + if local_id.x >= (1u << i) { + tile_count += sh_tile_count[local_id.x - (1u << i)]; + } + workgroupBarrier(); + sh_tile_count[local_id.x] = tile_count; + } + workgroupBarrier(); + let total_tile_count = sh_tile_count[N_TILE - 1u]; + // Parallel iteration over all tiles + for (var ix = local_id.x; ix < total_tile_count; ix += N_TILE) { + // Binary search to find draw object which contains this tile + var el_ix = 0u; + for (var i = 0u; i < firstTrailingBit(N_TILE); i += 1u) { + let probe = el_ix + ((N_TILE / 2u) >> i); + if ix >= sh_tile_count[probe - 1u] { + el_ix = probe; + } + } + drawobj_ix = sh_drawobj_ix[el_ix]; + tag = scene[config.drawtag_base + drawobj_ix]; + // TODO: clip logic + let seq_ix = ix - select(0u, sh_tile_count[el_ix - 1u], el_ix > 0u); + let width = sh_tile_width[el_ix]; + let x = sh_tile_x0[el_ix] + seq_ix % width; + let y = sh_tile_y0[el_ix] + seq_ix / width; + let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * y + x; + let tile = tiles[tile_ix]; + let is_clip = (tag & 1u) != 0u; + var is_blend = false; + if is_clip { + let BLEND_CLIP = (128u << 8u) | 3u; + let scene_offset = draw_monoids[drawobj_ix].scene_offset; + let dd = config.drawdata_base + scene_offset; + let blend = scene[dd]; + is_blend = blend != BLEND_CLIP; + } + let include_tile = tile.segments != 0u || (tile.backdrop == 0) == is_clip || is_blend; + if include_tile { + let el_slice = el_ix / 32u; + let el_mask = 1u << (el_ix & 31u); + atomicOr(&sh_bitmaps[el_slice][y * N_TILE_X + x], el_mask); + } + } + workgroupBarrier(); + // At this point bit drawobj % 32 is set in sh_bitmaps[drawobj / 32][y * N_TILE_X + x] + // if drawobj touches tile (x, y). + + // Write per-tile command list for this tile + var slice_ix = 0u; + var bitmap = atomicLoad(&sh_bitmaps[0u][local_id.x]); + while true { + if bitmap == 0u { + slice_ix += 1u; + // potential optimization: make iteration limit dynamic + if slice_ix == N_SLICE { + break; + } + bitmap = atomicLoad(&sh_bitmaps[slice_ix][local_id.x]); + if bitmap == 0u { + continue; + } + } + let el_ix = slice_ix * 32u + firstTrailingBit(bitmap); + drawobj_ix = sh_drawobj_ix[el_ix]; + // clear LSB of bitmap, using bit magic + bitmap &= bitmap - 1u; + let drawtag = scene[config.drawtag_base + drawobj_ix]; + let dm = draw_monoids[drawobj_ix]; + let dd = config.drawdata_base + dm.scene_offset; + let di = dm.info_offset; + if clip_zero_depth == 0u { + let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * tile_y + tile_x; + let tile = tiles[tile_ix]; + switch drawtag { + // DRAWTAG_FILL_COLOR + case 0x44u: { + let linewidth = bitcast(info[di]); + write_path(tile, linewidth); + let rgba_color = scene[dd]; + write_color(CmdColor(rgba_color)); + } + // DRAWTAG_FILL_LIN_GRADIENT + case 0x114u: { + let linewidth = bitcast(info[di]); + write_path(tile, linewidth); + var lin: CmdLinGrad; + lin.index = scene[dd]; + lin.line_x = bitcast(info[di + 1u]); + lin.line_y = bitcast(info[di + 2u]); + lin.line_c = bitcast(info[di + 3u]); + write_lin_grad(lin); + } + // DRAWTAG_FILL_RAD_GRADIENT + case 0x2dcu: { + let linewidth = bitcast(info[di]); + write_path(tile, linewidth); + var rad: CmdRadGrad; + rad.index = scene[dd]; + let m0 = bitcast(info[di + 1u]); + let m1 = bitcast(info[di + 2u]); + let m2 = bitcast(info[di + 3u]); + let m3 = bitcast(info[di + 4u]); + rad.matrx = vec4(m0, m1, m2, m3); + rad.xlat = vec2(bitcast(info[di + 5u]), bitcast(info[di + 6u])); + rad.c1 = vec2(bitcast(info[di + 7u]), bitcast(info[di + 8u])); + rad.ra = bitcast(info[di + 9u]); + rad.roff = bitcast(info[di + 10u]); + write_rad_grad(rad); + } + // DRAWTAG_BEGIN_CLIP + case 0x05u: { + if tile.segments == 0u && tile.backdrop == 0 { + clip_zero_depth = clip_depth + 1u; + } else { + write_begin_clip(); + render_blend_depth += 1u; + max_blend_depth = max(max_blend_depth, render_blend_depth); + } + clip_depth += 1u; + } + // DRAWTAG_END_CLIP + case 0x25u: { + clip_depth -= 1u; + write_path(tile, -1.0); + write_end_clip(scene[dd]); + render_blend_depth -= 1u; + } + default: {} + } + } else { + // In "clip zero" state, suppress all drawing + switch drawtag { + // DRAWTAG_BEGIN_CLIP + case 0x05u: { + clip_depth += 1u; + } + // DRAWTAG_END_CLIP + case 0x25u: { + if clip_depth == clip_zero_depth { + clip_zero_depth = 0u; + } + clip_depth -= 1u; + } + default: {} + } + } + } + + rd_ix += N_TILE; + if rd_ix >= ready_ix && partition_ix >= n_partitions { + break; + } + workgroupBarrier(); + } + if bin_tile_x < config.width_in_tiles && bin_tile_y < config.height_in_tiles { + //ptcl[cmd_offset] = CMD_END; + // TODO: blend stack allocation + } +} diff --git a/piet-wgsl/shader/draw_leaf.wgsl b/piet-wgsl/shader/draw_leaf.wgsl new file mode 100644 index 0000000..f5140a4 --- /dev/null +++ b/piet-wgsl/shader/draw_leaf.wgsl @@ -0,0 +1,197 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// Finish prefix sum of drawtags, decode draw objects. + +#import config +#import clip +#import drawtag +#import bbox + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var scene: array; + +@group(0) @binding(2) +var reduced: array; + +@group(0) @binding(3) +var path_bbox: array; + +@group(0) @binding(4) +var draw_monoid: array; + +@group(0) @binding(5) +var info: array; + +@group(0) @binding(6) +var clip_inp: array; + +let WG_SIZE = 256u; + +// Possibly dedup? +struct Transform { + matrx: vec4, + translate: vec2, +} + +fn read_transform(transform_base: u32, ix: u32) -> Transform { + let base = transform_base + ix * 6u; + let c0 = bitcast(scene[base]); + let c1 = bitcast(scene[base + 1u]); + let c2 = bitcast(scene[base + 2u]); + let c3 = bitcast(scene[base + 3u]); + let c4 = bitcast(scene[base + 4u]); + let c5 = bitcast(scene[base + 5u]); + let matrx = vec4(c0, c1, c2, c3); + let translate = vec2(c4, c5); + return Transform(matrx, translate); +} + +var sh_scratch: array; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) wg_id: vec3, +) { + let ix = global_id.x; + // Reduce prefix of workgroups up to this one + var agg = draw_monoid_identity(); + if local_id.x < wg_id.x { + agg = reduced[local_id.x]; + } + sh_scratch[local_id.x] = agg; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + workgroupBarrier(); + if local_id.x + (1u << i) < WG_SIZE { + let other = sh_scratch[local_id.x + (1u << i)]; + agg = combine_draw_monoid(agg, other); + } + workgroupBarrier(); + sh_scratch[local_id.x] = agg; + } + // Two barriers can be eliminated if we use separate shared arrays + // for prefix and intra-workgroup prefix sum. + workgroupBarrier(); + var m = sh_scratch[0]; + workgroupBarrier(); + let tag_word = scene[config.drawtag_base + ix]; + agg = map_draw_tag(tag_word); + sh_scratch[local_id.x] = agg; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + workgroupBarrier(); + if local_id.x >= 1u << i { + let other = sh_scratch[local_id.x - (1u << i)]; + agg = combine_draw_monoid(agg, other); + } + workgroupBarrier(); + sh_scratch[local_id.x] = agg; + } + workgroupBarrier(); + if local_id.x > 0u { + m = combine_draw_monoid(m, sh_scratch[local_id.x - 1u]); + } + // m now contains exclusive prefix sum of draw monoid + draw_monoid[ix] = m; + let dd = config.drawdata_base + m.scene_offset; + let di = m.info_offset; + if tag_word == DRAWTAG_FILL_COLOR || tag_word == DRAWTAG_FILL_LIN_GRADIENT || + tag_word == DRAWTAG_FILL_RAD_GRADIENT || tag_word == DRAWTAG_FILL_IMAGE || + tag_word == DRAWTAG_BEGIN_CLIP + { + let bbox = path_bbox[m.path_ix]; + // TODO: bbox is mostly yagni here, sort that out. Maybe clips? + // let x0 = f32(bbox.x0); + // let y0 = f32(bbox.y0); + // let x1 = f32(bbox.x1); + // let y1 = f32(bbox.y1); + // let bbox_f = vec4(x0, y0, x1, y1); + let fill_mode = u32(bbox.linewidth >= 0.0); + var matrx: vec4; + var translate: vec2; + var linewidth = bbox.linewidth; + if linewidth >= 0.0 || tag_word == DRAWTAG_FILL_LIN_GRADIENT || tag_word == DRAWTAG_FILL_RAD_GRADIENT { + let transform = read_transform(config.transform_base, bbox.trans_ix); + matrx = transform.matrx; + translate = transform.translate; + } + if linewidth >= 0.0 { + // Note: doesn't deal with anisotropic case + linewidth *= sqrt(abs(matrx.x * matrx.w - matrx.y * matrx.z)); + } + switch tag_word { + // DRAWTAG_FILL_COLOR, DRAWTAG_FILL_IMAGE + case 0x44u, 0x48u: { + info[di] = bitcast(linewidth); + } + // DRAWTAG_FILL_LIN_GRADIENT + case 0x114u: { + info[di] = bitcast(linewidth); + var p0 = bitcast>(vec2(scene[dd + 1u], scene[dd + 2u])); + var p1 = bitcast>(vec2(scene[dd + 3u], scene[dd + 4u])); + p0 = matrx.xy * p0.x + matrx.zw * p0.y + translate; + p1 = matrx.xy * p1.x + matrx.zw * p1.y + translate; + let dxy = p1 - p0; + let scale = 1.0 / dot(dxy, dxy); + let line_xy = dxy * scale; + let line_c = -dot(p0, line_xy); + info[di + 1u] = bitcast(line_xy.x); + info[di + 2u] = bitcast(line_xy.y); + info[di + 3u] = bitcast(line_c); + } + // DRAWTAG_FILL_RAD_GRADIENT + case 0x2dcu: { + info[di] = bitcast(linewidth); + var p0 = bitcast>(vec2(scene[dd + 1u], scene[dd + 2u])); + var p1 = bitcast>(vec2(scene[dd + 3u], scene[dd + 4u])); + let r0 = bitcast(scene[dd + 5u]); + let r1 = bitcast(scene[dd + 6u]); + let inv_det = 1.0 / (matrx.x * matrx.w - matrx.y * matrx.z); + let inv_mat = inv_det * vec4(matrx.w, -matrx.y, -matrx.z, matrx.x); + var inv_tr = inv_mat.xz * translate.x + inv_mat.yw * translate.y; + inv_tr += p0; + let center1 = p1 - p0; + let rr = r1 / (r1 - r0); + let ra_inv = rr / (r1 * r1 - dot(center1, center1)); + let c1 = center1 * ra_inv; + let ra = rr * ra_inv; + let roff = rr - 1.0; + info[di + 1u] = bitcast(inv_mat.x); + info[di + 2u] = bitcast(inv_mat.y); + info[di + 3u] = bitcast(inv_mat.z); + info[di + 4u] = bitcast(inv_mat.w); + info[di + 5u] = bitcast(inv_tr.x); + info[di + 6u] = bitcast(inv_tr.y); + info[di + 7u] = bitcast(c1.x); + info[di + 8u] = bitcast(c1.y); + info[di + 9u] = bitcast(ra); + info[di + 10u] = bitcast(roff); + } + default: {} + } + } + if tag_word == DRAWTAG_BEGIN_CLIP || tag_word == DRAWTAG_END_CLIP { + var path_ix = ~ix; + if tag_word == DRAWTAG_BEGIN_CLIP { + path_ix = m.path_ix; + } + clip_inp[m.clip_ix] = i32(path_ix); + } +} diff --git a/piet-wgsl/shader/draw_reduce.wgsl b/piet-wgsl/shader/draw_reduce.wgsl new file mode 100644 index 0000000..8ff1a44 --- /dev/null +++ b/piet-wgsl/shader/draw_reduce.wgsl @@ -0,0 +1,54 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +#import config +#import drawtag + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var scene: array; + +@group(0) @binding(2) +var reduced: array; + +let WG_SIZE = 256u; + +var sh_scratch: array; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, +) { + let ix = global_id.x; + let tag_word = scene[config.drawtag_base + ix]; + var agg = map_draw_tag(tag_word); + sh_scratch[local_id.x] = agg; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + workgroupBarrier(); + if local_id.x + (1u << i) < WG_SIZE { + let other = sh_scratch[local_id.x + (1u << i)]; + agg = combine_draw_monoid(agg, other); + } + workgroupBarrier(); + sh_scratch[local_id.x] = agg; + } + if local_id.x == 0u { + reduced[ix >> firstTrailingBit(WG_SIZE)] = agg; + } +} diff --git a/piet-wgsl/shader/fine.wgsl b/piet-wgsl/shader/fine.wgsl index 9d62793..5488e01 100644 --- a/piet-wgsl/shader/fine.wgsl +++ b/piet-wgsl/shader/fine.wgsl @@ -14,6 +14,9 @@ // // Also licensed under MIT license, at your choice. +// Fine rasterizer. This can run in simple (just path rendering) and full +// modes, controllable by #define. + // This is a cut'n'paste w/ backdrop. struct Tile { backdrop: i32, @@ -36,17 +39,64 @@ var segments: array; @group(0) @binding(3) var output: array; +#ifdef full + +#import blend +#import ptcl + +let GRADIENT_WIDTH = 512; +let BLEND_STACK_SPLIT = 4u; + +@group(0) @binding(4) +var ptcl: array; + +@group(0) @binding(5) +var gradients: texture_2d; + +fn read_fill(cmd_ix: u32) -> CmdFill { + let tile = ptcl[cmd_ix + 1u]; + let backdrop = i32(ptcl[cmd_ix + 2u]); + return CmdFill(tile, backdrop); +} + +fn read_stroke(cmd_ix: u32) -> CmdStroke { + let tile = ptcl[cmd_ix + 1u]; + let half_width = bitcast(ptcl[cmd_ix + 2u]); + return CmdStroke(tile, half_width); +} + +fn read_color(cmd_ix: u32) -> CmdColor { + let rgba_color = ptcl[cmd_ix + 1u]; + return CmdColor(rgba_color); +} + +fn read_lin_grad(cmd_ix: u32) -> CmdLinGrad { + let index = ptcl[cmd_ix + 1u]; + let line_x = bitcast(ptcl[cmd_ix + 2u]); + let line_y = bitcast(ptcl[cmd_ix + 3u]); + let line_c = bitcast(ptcl[cmd_ix + 4u]); + return CmdLinGrad(index, line_x, line_y, line_c); +} + +fn read_rad_grad(cmd_ix: u32) -> CmdRadGrad { + let index = ptcl[cmd_ix + 1u]; + let m0 = bitcast(ptcl[cmd_ix + 2u]); + let m1 = bitcast(ptcl[cmd_ix + 3u]); + let m2 = bitcast(ptcl[cmd_ix + 4u]); + let m3 = bitcast(ptcl[cmd_ix + 5u]); + let matrx = vec4(m0, m1, m2, m3); + let xlat = vec2(bitcast(ptcl[cmd_ix + 6u]), bitcast(ptcl[cmd_ix + 7u])); + let c1 = vec2(bitcast(ptcl[cmd_ix + 8u]), bitcast(ptcl[cmd_ix + 9u])); + let ra = bitcast(ptcl[cmd_ix + 10u]); + let roff = bitcast(ptcl[cmd_ix + 11u]); + return CmdRadGrad(index, matrx, xlat, c1, ra, roff); +} + +#endif + let PIXELS_PER_THREAD = 4u; -@compute @workgroup_size(4, 16) -fn main( - @builtin(global_invocation_id) global_id: vec3, - @builtin(local_invocation_id) local_id: vec3, - @builtin(workgroup_id) wg_id: vec3, -) { - let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x; - let xy = vec2(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y)); - let tile = tiles[tile_ix]; +fn fill_path(tile: Tile, xy: vec2) -> array { var area: array; let backdrop_f = f32(tile.backdrop); for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { @@ -89,8 +139,168 @@ fn main( for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { area[i] = abs(area[i]); } + return area; +} + +fn stroke_path(seg: u32, half_width: f32, xy: vec2) -> array { + var df: array; + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + df[i] = 1e9; + } + var segment_ix = seg; + while segment_ix != 0u { + let segment = segments[segment_ix]; + let delta = segment.delta; + let dpos0 = xy + vec2(0.5, 0.5) - segment.origin; + let scale = 1.0 / dot(delta, delta); + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + let dpos = vec2(dpos0.x + f32(i), dpos0.y); + let t = clamp(dot(dpos, delta) * scale, 0.0, 1.0); + // performance idea: hoist sqrt out of loop + df[i] = min(df[i], length(delta * t - dpos)); + } + segment_ix = segment.next; + } + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + // reuse array; return alpha rather than distance + df[i] = clamp(half_width + 0.5 - df[i], 0.0, 1.0); + } + return df; +} + +@compute @workgroup_size(4, 16) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) wg_id: vec3, +) { + let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x; + let xy = vec2(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y)); +#ifdef full + var rgba: array, PIXELS_PER_THREAD>; + var blend_stack: array, PIXELS_PER_THREAD>; + var clip_depth = 0u; + var area: array; + var cmd_ix = tile_ix * PTCL_INITIAL_ALLOC; + + // main interpretation loop + while true { + let tag = ptcl[cmd_ix]; + if tag == CMD_END { + break; + } + switch tag { + // CMD_FILL + case 1u: { + let fill = read_fill(cmd_ix); + let tile = Tile(fill.backdrop, fill.tile); + area = fill_path(tile, xy); + cmd_ix += 3u; + } + // CMD_STROKE + case 2u: { + let stroke = read_stroke(cmd_ix); + area = stroke_path(stroke.tile, stroke.half_width, xy); + cmd_ix += 3u; + } + // CMD_SOLID + case 3u: { + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + area[i] = 1.0; + } + cmd_ix += 1u; + } + // CMD_COLOR + case 5u: { + let color = read_color(cmd_ix); + let fg = unpack4x8unorm(color.rgba_color).wzyx; + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + let fg_i = fg * area[i]; + rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i; + } + cmd_ix += 2u; + } + // CMD_LIN_GRAD + case 6u: { + let lin = read_lin_grad(cmd_ix); + let d = lin.line_x * xy.x + lin.line_y * xy.y + lin.line_c; + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + let my_d = d + lin.line_x * f32(i); + let x = i32(round(clamp(my_d, 0.0, 1.0) * f32(GRADIENT_WIDTH - 1))); + let fg_rgba = textureLoad(gradients, vec2(x, i32(lin.index)), 0); + let fg_i = fg_rgba * area[i]; + rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i; + } + cmd_ix += 5u; + } + // CMD_RAD_GRAD + case 7u: { + let rad = read_rad_grad(cmd_ix); + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + let my_xy = vec2(xy.x + f32(i), xy.y); + // TODO: can hoist y, but for now stick to piet-gpu + let xy_xformed = rad.matrx.xz * my_xy.x + rad.matrx.yw * my_xy.y - rad.xlat; + let ba = dot(xy_xformed, rad.c1); + let ca = rad.ra * dot(xy_xformed, xy_xformed); + let t = sqrt(ba * ba + ca) - ba - rad.roff; + let x = i32(round(clamp(t, 0.0, 1.0) * f32(GRADIENT_WIDTH - 1))); + let fg_rgba = textureLoad(gradients, vec2(x, i32(rad.index)), 0); + let fg_i = fg_rgba * area[i]; + rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i; + } + cmd_ix += 12u; + } + // CMD_BEGIN_CLIP + case 9u: { + if clip_depth < BLEND_STACK_SPLIT { + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + blend_stack[clip_depth][i] = pack4x8unorm(rgba[i]); + rgba[i] = vec4(0.0); + } + } else { + // TODO: spill to memory + } + clip_depth += 1u; + cmd_ix += 1u; + } + // CMD_END_CLIP + case 10u: { + let blend = ptcl[cmd_ix + 1u]; + clip_depth -= 1u; + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + var bg_rgba: u32; + if clip_depth < BLEND_STACK_SPLIT { + bg_rgba = blend_stack[clip_depth][i]; + } else { + // load from memory + } + let bg = unpack4x8unorm(bg_rgba); + let fg = rgba[i] * area[i]; + rgba[i] = blend_mix_compose(bg, fg, blend); + } + cmd_ix += 2u; + } + // CMD_JUMP + case 11u: { + cmd_ix = ptcl[cmd_ix + 1u]; + } + default: {} + } + } + let out_ix = global_id.y * (config.width_in_tiles * TILE_WIDTH) + global_id.x * PIXELS_PER_THREAD; + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + let fg = rgba[i]; + let a_inv = 1.0 / (fg.a + 1e-6); + let rgba_sep = vec4(fg.r * a_inv, fg.g * a_inv, fg.b * a_inv, fg.a); + let bytes = pack4x8unorm(rgba_sep); + output[out_ix + i] = bytes; + } +#else + let tile = tiles[tile_ix]; + let area = fill_path(tile, xy); let bytes = pack4x8unorm(vec4(area[0], area[1], area[2], area[3])); let out_ix = global_id.y * (config.width_in_tiles * 4u) + global_id.x; output[out_ix] = bytes; +#endif } diff --git a/piet-wgsl/shader/path_coarse.wgsl b/piet-wgsl/shader/path_coarse.wgsl index 23590d5..ffdd8cf 100644 --- a/piet-wgsl/shader/path_coarse.wgsl +++ b/piet-wgsl/shader/path_coarse.wgsl @@ -14,59 +14,53 @@ // // Also licensed under MIT license, at your choice. +#import config #import pathtag @group(0) @binding(0) -var path_tags: array; +var config: Config; @group(0) @binding(1) -var tag_monoids: array; +var scene: array; -// TODO: should probably have single "scene" binding. @group(0) @binding(2) -var path_data: array; +var tag_monoids: array; #ifdef cubics_out @group(0) @binding(3) var output: array>; #else -#import config - -struct Tile { +// We don't get this from import as it's the atomic version +struct AtomicTile { backdrop: atomic, segments: atomic, } #import segment -// Should probably be uniform binding @group(0) @binding(3) -var config: Config; +var tiles: array; @group(0) @binding(4) -var tiles: array; - -@group(0) @binding(5) var segments: array; #endif +var pathdata_base: u32; + fn read_f32_point(ix: u32) -> vec2 { - let x = bitcast(path_data[ix]); - let y = bitcast(path_data[ix + 1u]); + let x = bitcast(scene[pathdata_base + ix]); + let y = bitcast(scene[pathdata_base + ix + 1u]); return vec2(x, y); } fn read_i16_point(ix: u32) -> vec2 { - let raw = path_data[ix]; + let raw = scene[pathdata_base + ix]; let x = f32(i32(raw << 16u) >> 16u); let y = f32(i32(raw) >> 16u); return vec2(x, y); } #ifndef cubics_out -let TILE_WIDTH = 16u; -let TILE_HEIGHT = 16u; - struct SubdivResult { val: f32, a0: f32, @@ -136,7 +130,8 @@ fn main( ) { // Obtain exclusive prefix sum of tag monoid let ix = global_id.x; - let tag_word = path_tags[ix >> 2u]; + let tag_word = scene[config.pathtag_base + (ix >> 2u)]; + pathdata_base = config.pathdata_base; let shift = (ix & 3u) * 8u; var tm = reduce_tag(tag_word & ((1u << shift) - 1u)); tm = combine_tag_monoid(tag_monoids[ix >> 2u], tm); diff --git a/piet-wgsl/shader/path_coarse_full.wgsl b/piet-wgsl/shader/path_coarse_full.wgsl new file mode 100644 index 0000000..d607bac --- /dev/null +++ b/piet-wgsl/shader/path_coarse_full.wgsl @@ -0,0 +1,275 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// Path coarse rasterization for the full implementation. + +#import config +#import pathtag +#import tile +#import segment +#import cubic +#import bump + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var scene: array; + +@group(0) @binding(2) +var tag_monoids: array; + +@group(0) @binding(3) +var cubics: array; + +@group(0) @binding(4) +var paths: array; + +// We don't get this from import as it's the atomic version +struct AtomicTile { + backdrop: atomic, + segments: atomic, +} + +@group(0) @binding(5) +var bump: BumpAllocators; + +@group(0) @binding(6) +var tiles: array; + +@group(0) @binding(7) +var segments: array; + +struct SubdivResult { + val: f32, + a0: f32, + a2: f32, +} + +let D = 0.67; +fn approx_parabola_integral(x: f32) -> f32 { + return x * inverseSqrt(sqrt(1.0 - D + (D * D * D * D + 0.25 * x * x))); +} + +let B = 0.39; +fn approx_parabola_inv_integral(x: f32) -> f32 { + return x * sqrt(1.0 - B + (B * B + 0.5 * x * x)); +} + +fn estimate_subdiv(p0: vec2, p1: vec2, p2: vec2, sqrt_tol: f32) -> SubdivResult { + let d01 = p1 - p0; + let d12 = p2 - p1; + let dd = d01 - d12; + let cross = (p2.x - p0.x) * dd.y - (p2.y - p0.y) * dd.x; + let cross_inv = 1.0 / cross; + let x0 = dot(d01, dd) * cross_inv; + let x2 = dot(d12, dd) * cross_inv; + let scale = abs(cross / (length(dd) * (x2 - x0))); + + let a0 = approx_parabola_integral(x0); + let a2 = approx_parabola_integral(x2); + var val = 0.0; + if scale < 1e9 { + let da = abs(a2 - a0); + let sqrt_scale = sqrt(scale); + if sign(x0) == sign(x2) { + val = sqrt_scale; + } else { + let xmin = sqrt_tol / sqrt_scale; + val = sqrt_tol / approx_parabola_integral(xmin); + } + val *= da; + } + return SubdivResult(val, a0, a2); +} + +fn eval_quad(p0: vec2, p1: vec2, p2: vec2, t: f32) -> vec2 { + let mt = 1.0 - t; + return p0 * (mt * mt) + (p1 * (mt * 2.0) + p2 * t) * t; +} + +fn eval_cubic(p0: vec2, p1: vec2, p2: vec2, p3: vec2, t: f32) -> vec2 { + let mt = 1.0 - t; + return p0 * (mt * mt * mt) + (p1 * (mt * mt * 3.0) + (p2 * (mt * 3.0) + p3 * t) * t) * t; +} + +fn alloc_segment() -> u32 { + return atomicAdd(&bump.segments, 1u) + 1u; +} + +let MAX_QUADS = 16u; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, +) { + let ix = global_id.x; + let tag_word = scene[config.pathtag_base + (ix >> 2u)]; + let shift = (ix & 3u) * 8u; + var tag_byte = (tag_word >> shift) & 0xffu; + + if (tag_byte & PATH_TAG_SEG_TYPE) != 0u { + // Discussion question: it might actually be cheaper to do the path segment + // decoding & transform again rather than store the result in a buffer; + // classic memory vs ALU tradeoff. + let cubic = cubics[global_id.x]; + let path = paths[cubic.path_ix]; + let bbox = vec4(path.bbox); + let p0 = cubic.p0; + let p1 = cubic.p1; + let p2 = cubic.p2; + let p3 = cubic.p3; + let err_v = 3.0 * (p2 - p1) + p0 - p3; + let err = dot(err_v, err_v); + let ACCURACY = 0.25; + let Q_ACCURACY = ACCURACY * 0.1; + let REM_ACCURACY = (ACCURACY - Q_ACCURACY); + let MAX_HYPOT2 = 432.0 * Q_ACCURACY * Q_ACCURACY; + var n_quads = max(u32(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0))), 1u); + n_quads = min(n_quads, MAX_QUADS); + var keep_params: array; + var val = 0.0; + var qp0 = p0; + let step = 1.0 / f32(n_quads); + for (var i = 0u; i < n_quads; i += 1u) { + let t = f32(i + 1u) * step; + let qp2 = eval_cubic(p0, p1, p2, p3, t); + var qp1 = eval_cubic(p0, p1, p2, p3, t - 0.5 * step); + qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2); + let params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY)); + keep_params[i] = params; + val += params.val; + qp0 = qp2; + } + let n = max(u32(ceil(val * (0.5 / sqrt(REM_ACCURACY)))), 1u); + var lp0 = p0; + qp0 = p0; + let v_step = val / f32(n); + var n_out = 1u; + var val_sum = 0.0; + for (var i = 0u; i < n_quads; i += 1u) { + let t = f32(i + 1u) * step; + let qp2 = eval_cubic(p0, p1, p2, p3, t); + var qp1 = eval_cubic(p0, p1, p2, p3, t - 0.5 * step); + qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2); + let params = keep_params[i]; + let u0 = approx_parabola_inv_integral(params.a0); + let u2 = approx_parabola_inv_integral(params.a2); + let uscale = 1.0 / (u2 - u0); + var val_target = f32(n_out) * v_step; + while n_out == n || val_target < val_sum + params.val { + var lp1: vec2; + if n_out == n { + lp1 = p3; + } else { + let u = (val_target - val_sum) / params.val; + let a = mix(params.a0, params.a2, u); + let au = approx_parabola_inv_integral(a); + let t = (au - u0) * uscale; + lp1 = eval_quad(qp0, qp1, qp2, t); + } + + // Output line segment lp0..lp1 + let xymin = min(lp0, lp1); + let xymax = max(lp0, lp1); + let dp = lp1 - lp0; + let recip_dx = 1.0 / dp.x; + let invslope = select(dp.x / dp.y, 1.0e9, abs(dp.y) < 1.0e-9); + let c = 0.5 * abs(invslope); + let b = invslope; + let SX = 1.0 / f32(TILE_WIDTH); + let SY = 1.0 / f32(TILE_HEIGHT); + let a = (lp0.x - (lp0.y - 0.5 * f32(TILE_HEIGHT)) * b) * SX; + var x0 = i32(floor(xymin.x * SX)); + var x1 = i32(floor(xymax.x * SX) + 1.0); + var y0 = i32(floor(xymin.y * SY)); + var y1 = i32(floor(xymax.y * SY) + 1.0); + x0 = clamp(x0, bbox.x, bbox.z); + x1 = clamp(x1, bbox.x, bbox.z); + y0 = clamp(y0, bbox.y, bbox.w); + y1 = clamp(y1, bbox.y, bbox.w); + var xc = a + b * f32(y0); + let stride = bbox.z - bbox.x; + var base = i32(path.tiles) + (y0 - bbox.y) * stride - bbox.x; + var xray = i32(floor(lp0.x * SX)); + var last_xray = i32(floor(lp1.x * SX)); + if dp.y < 0.0 { + let tmp = xray; + xray = last_xray; + last_xray = tmp; + } + for (var y = y0; y < y1; y += 1) { + let tile_y0 = f32(y) * f32(TILE_HEIGHT); + let xbackdrop = max(xray + 1, bbox.x); + if xymin.y < tile_y0 && xbackdrop < bbox.z { + let backdrop = select(-1, 1, dp.y < 0.0); + let tile_ix = base + xbackdrop; + atomicAdd(&tiles[tile_ix].backdrop, backdrop); + } + var next_xray = last_xray; + if y + 1 < y1 { + let tile_y1 = f32(y + 1) * f32(TILE_HEIGHT); + let x_edge = lp0.x + (tile_y1 - lp0.y) * invslope; + next_xray = i32(floor(x_edge * SX)); + } + let min_xray = min(xray, next_xray); + let max_xray = max(xray, next_xray); + var xx0 = min(i32(floor(xc - c)), min_xray); + var xx1 = max(i32(ceil(xc + c)), max_xray + 1); + xx0 = clamp(xx0, x0, x1); + xx1 = clamp(xx1, x0, x1); + var tile_seg: Segment; + for (var x = xx0; x < xx1; x += 1) { + let tile_x0 = f32(x) * f32(TILE_WIDTH); + let tile_ix = base + x; + // allocate segment, insert linked list + let seg_ix = alloc_segment(); + let old = atomicExchange(&tiles[tile_ix].segments, seg_ix); + tile_seg.origin = lp0; + tile_seg.delta = dp; + var y_edge = mix(lp0.y, lp1.y, (tile_x0 - lp0.x) * recip_dx); + if xymin.x < tile_x0 { + let p = vec2(tile_x0, y_edge); + if dp.x < 0.0 { + tile_seg.delta = p - lp0; + } else { + tile_seg.origin = p; + tile_seg.delta = lp1 - p; + } + if tile_seg.delta.x == 0.0 { + tile_seg.delta.x = sign(dp.x) * 1e-9; + } + } + if x <= min_xray || max_xray < x { + y_edge = 1e9; + } + tile_seg.y_edge = y_edge; + tile_seg.next = old; + segments[seg_ix] = tile_seg; + } + xc += b; + base += stride; + xray = next_xray; + } + n_out += 1u; + val_target += v_step; + lp0 = lp1; + } + val_sum += params.val; + qp0 = qp2; + } + } +} diff --git a/piet-wgsl/shader/pathseg.wgsl b/piet-wgsl/shader/pathseg.wgsl new file mode 100644 index 0000000..b4f83e1 --- /dev/null +++ b/piet-wgsl/shader/pathseg.wgsl @@ -0,0 +1,217 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// Path segment decoding for the full case. + +// In the simple case, path segments are decoded as part of the coarse +// path rendering stage. In the full case, they are separated, as the +// decoding process also generates bounding boxes, and those in turn are +// used for tile allocation and clipping; actual coarse path rasterization +// can't proceed until those are complete. + +// There's some duplication of the decoding code but we won't worry about +// that just now. Perhaps it could be factored more nicely later. + +#import config +#import pathtag +#import cubic + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var scene: array; + +@group(0) @binding(2) +var tag_monoids: array; + +struct AtomicPathBbox { + x0: atomic, + y0: atomic, + x1: atomic, + y1: atomic, + linewidth: f32, + trans_ix: u32, +} + +@group(0) @binding(3) +var path_bboxes: array; + + +@group(0) @binding(4) +var cubics: array; + +// Monoid is yagni, for future optimization + +// struct BboxMonoid { +// bbox: vec4, +// flags: u32, +// } + +// let FLAG_RESET_BBOX = 1u; +// let FLAG_SET_BBOX = 2u; + +// fn combine_bbox_monoid(a: BboxMonoid, b: BboxMonoid) -> BboxMonoid { +// var c: BboxMonoid; +// c.bbox = b.bbox; +// // TODO: previous-me thought this should be gated on b & SET_BBOX == false also +// if (a.flags & FLAG_RESET_BBOX) == 0u && b.bbox.z <= b.bbox.x && b.bbox.w <= b.bbox.y { +// c.bbox = a.bbox; +// } else if (a.flags & FLAG_RESET_BBOX) == 0u && (b.flags & FLAG_SET_BBOX) == 0u || +// (a.bbox.z > a.bbox.x || a.bbox.w > a.bbox.y) +// { +// c.bbox = vec4(min(a.bbox.xy, c.bbox.xy), max(a.bbox.xw, c.bbox.zw)); +// } +// c.flags = (a.flags & FLAG_SET_BBOX) | b.flags; +// c.flags |= (a.flags & FLAG_RESET_BBOX) << 1u; +// return c; +// } + +// fn bbox_monoid_identity() -> BboxMonoid { +// return BboxMonoid(); +// } + +var pathdata_base: u32; + +fn read_f32_point(ix: u32) -> vec2 { + let x = bitcast(scene[pathdata_base + ix]); + let y = bitcast(scene[pathdata_base + ix + 1u]); + return vec2(x, y); +} + +fn read_i16_point(ix: u32) -> vec2 { + let raw = scene[pathdata_base + ix]; + let x = f32(i32(raw << 16u) >> 16u); + let y = f32(i32(raw) >> 16u); + return vec2(x, y); +} + +struct Transform { + matrx: vec4, + translate: vec2, +} + +fn read_transform(transform_base: u32, ix: u32) -> Transform { + let base = transform_base + ix * 6u; + let c0 = bitcast(scene[base]); + let c1 = bitcast(scene[base + 1u]); + let c2 = bitcast(scene[base + 2u]); + let c3 = bitcast(scene[base + 3u]); + let c4 = bitcast(scene[base + 4u]); + let c5 = bitcast(scene[base + 5u]); + let matrx = vec4(c0, c1, c2, c3); + let translate = vec2(c4, c5); + return Transform(matrx, translate); +} + +fn transform_apply(transform: Transform, p: vec2) -> vec2 { + return transform.matrx.xy * p.x + transform.matrx.zw * p.y + transform.translate; +} + +fn round_down(x: f32) -> i32 { + return i32(floor(x)); +} + +fn round_up(x: f32) -> i32 { + return i32(ceil(x)); +} + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, +) { + let ix = global_id.x; + let tag_word = scene[config.pathtag_base + (ix >> 2u)]; + pathdata_base = config.pathdata_base; + let shift = (ix & 3u) * 8u; + var tm = reduce_tag(tag_word & ((1u << shift) - 1u)); + tm = combine_tag_monoid(tag_monoids[ix >> 2u], tm); + var tag_byte = (tag_word >> shift) & 0xffu; + + let out = &path_bboxes[tm.path_ix]; + var linewidth: f32; + if (tag_byte & PATH_TAG_PATH) != 0u { + linewidth = bitcast(scene[config.linewidth_base + tm.linewidth_ix]); + (*out).linewidth = linewidth; + (*out).trans_ix = tm.trans_ix; + } + // Decode path data + let seg_type = tag_byte & PATH_TAG_SEG_TYPE; + if seg_type != 0u { + var p0: vec2; + var p1: vec2; + var p2: vec2; + var p3: vec2; + if (tag_byte & PATH_TAG_F32) != 0u { + p0 = read_f32_point(tm.pathseg_offset); + p1 = read_f32_point(tm.pathseg_offset + 2u); + if seg_type >= PATH_TAG_QUADTO { + p2 = read_f32_point(tm.pathseg_offset + 4u); + if seg_type == PATH_TAG_CUBICTO { + p3 = read_f32_point(tm.pathseg_offset + 6u); + } + } + } else { + p0 = read_i16_point(tm.pathseg_offset); + p1 = read_i16_point(tm.pathseg_offset + 1u); + if seg_type >= PATH_TAG_QUADTO { + p2 = read_i16_point(tm.pathseg_offset + 2u); + if seg_type == PATH_TAG_CUBICTO { + p3 = read_i16_point(tm.pathseg_offset + 3u); + } + } + } + let transform = read_transform(config.transform_base, tm.trans_ix); + //let transform = Transform(vec4(1.0, 0.0, 0.0, 1.0), vec2()); + p0 = transform_apply(transform, p0); + p1 = transform_apply(transform, p1); + var bbox = vec4(min(p0, p1), max(p0, p1)); + // Degree-raise + if seg_type == PATH_TAG_LINETO { + p3 = p1; + p2 = mix(p3, p0, 1.0 / 3.0); + p1 = mix(p0, p3, 1.0 / 3.0); + } else if seg_type >= PATH_TAG_QUADTO { + p2 = transform_apply(transform, p2); + bbox = vec4(min(bbox.xy, p2), max(bbox.zw, p2)); + if seg_type == PATH_TAG_CUBICTO { + p3 = transform_apply(transform, p3); + bbox = vec4(min(bbox.xy, p3), max(bbox.zw, p3)); + } else { + p3 = p2; + p2 = mix(p1, p2, 1.0 / 3.0); + p1 = mix(p1, p0, 1.0 / 3.0); + } + } + if linewidth >= 0.0 { + // See https://www.iquilezles.org/www/articles/ellipses/ellipses.htm + // This is the correct bounding box, but we're not handling rendering + // in the isotropic case, so it may mismatch. + let stroke = 0.5 * linewidth * vec2(length(transform.matrx.xz), length(transform.matrx.yw)); + bbox += vec4(-stroke, stroke); + } + cubics[global_id.x] = Cubic(p0, p1, p2, p3, tm.path_ix, 0u); + // Update bounding box using atomics only. Computing a monoid is a + // potential future optimization. + if bbox.z > bbox.x || bbox.w > bbox.y { + atomicMin(&(*out).x0, round_down(bbox.x)); + atomicMin(&(*out).y0, round_down(bbox.y)); + atomicMax(&(*out).x1, round_up(bbox.z)); + atomicMax(&(*out).y1, round_up(bbox.w)); + } + } +} diff --git a/piet-wgsl/shader/pathtag_reduce.wgsl b/piet-wgsl/shader/pathtag_reduce.wgsl index 92bf20d..bbf8528 100644 --- a/piet-wgsl/shader/pathtag_reduce.wgsl +++ b/piet-wgsl/shader/pathtag_reduce.wgsl @@ -14,14 +14,16 @@ // // Also licensed under MIT license, at your choice. +#import config #import pathtag -// Note: should have a single scene binding, path_tags are a slice -// in that; need a config uniform. @group(0) @binding(0) -var path_tags: array; +var config: Config; @group(0) @binding(1) +var scene: array; + +@group(0) @binding(2) var reduced: array; let LG_WG_SIZE = 8u; @@ -35,7 +37,7 @@ fn main( @builtin(local_invocation_id) local_id: vec3, ) { let ix = global_id.x; - let tag_word = path_tags[ix]; + let tag_word = scene[config.pathtag_base + ix]; var agg = reduce_tag(tag_word); sh_scratch[local_id.x] = agg; for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { diff --git a/piet-wgsl/shader/pathtag_scan.wgsl b/piet-wgsl/shader/pathtag_scan.wgsl index 058ce04..fe87750 100644 --- a/piet-wgsl/shader/pathtag_scan.wgsl +++ b/piet-wgsl/shader/pathtag_scan.wgsl @@ -14,15 +14,19 @@ // // Also licensed under MIT license, at your choice. +#import config #import pathtag @group(0) @binding(0) -var path_tags: array; +var config: Config; @group(0) @binding(1) -var reduced: array; +var scene: array; @group(0) @binding(2) +var reduced: array; + +@group(0) @binding(3) var tag_monoids: array; let LG_WG_SIZE = 8u; @@ -39,13 +43,13 @@ fn main( @builtin(workgroup_id) wg_id: vec3, ) { var agg = tag_monoid_identity(); - if (local_id.x < wg_id.x) { + if local_id.x < wg_id.x { agg = reduced[local_id.x]; } sh_parent[local_id.x] = agg; for (var i = 0u; i < LG_WG_SIZE; i += 1u) { workgroupBarrier(); - if (local_id.x + (1u << i) < WG_SIZE) { + if local_id.x + (1u << i) < WG_SIZE { let other = sh_parent[local_id.x + (1u << i)]; agg = combine_tag_monoid(agg, other); } @@ -54,12 +58,12 @@ fn main( } let ix = global_id.x; - let tag_word = path_tags[ix]; + let tag_word = scene[config.pathtag_base + ix]; agg = reduce_tag(tag_word); sh_monoid[local_id.x] = agg; for (var i = 0u; i < LG_WG_SIZE; i += 1u) { workgroupBarrier(); - if (local_id.x >= 1u << i) { + if local_id.x >= 1u << i { let other = sh_monoid[local_id.x - (1u << i)]; agg = combine_tag_monoid(other, agg); } @@ -68,7 +72,7 @@ fn main( } // prefix up to this workgroup var tm = sh_parent[0]; - if (local_id.x > 0u) { + if local_id.x > 0u { tm = combine_tag_monoid(tm, sh_monoid[local_id.x - 1u]); } // exclusive prefix sum, granularity of 4 tag bytes diff --git a/piet-wgsl/shader/shared/bbox.wgsl b/piet-wgsl/shader/shared/bbox.wgsl new file mode 100644 index 0000000..c260df9 --- /dev/null +++ b/piet-wgsl/shader/shared/bbox.wgsl @@ -0,0 +1,32 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// The annotated bounding box for a path. It has been transformed, +// but contains a link to the active transform, mostly for gradients. +// Coordinates are integer pixels (for the convenience of atomic update) +// but will probably become fixed-point fractions for rectangles. +struct PathBbox { + x0: i32, + y0: i32, + x1: i32, + y1: i32, + linewidth: f32, + trans_ix: u32, +} + +fn bbox_intersect(a: vec4, b: vec4) -> vec4 { + return vec4(max(a.xy, b.xy), min(a.zw, b.zw)); +} diff --git a/piet-wgsl/shader/shared/blend.wgsl b/piet-wgsl/shader/shared/blend.wgsl new file mode 100644 index 0000000..34cb55b --- /dev/null +++ b/piet-wgsl/shader/shared/blend.wgsl @@ -0,0 +1,351 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + + +// Color mixing modes + +let MIX_NORMAL = 0u; +let MIX_MULTIPLY = 1u; +let MIX_SCREEN = 2u; +let MIX_OVERLAY = 3u; +let MIX_DARKEN = 4u; +let MIX_LIGHTEN = 5u; +let MIX_COLOR_DODGE = 6u; +let MIX_COLOR_BURN = 7u; +let MIX_HARD_LIGHT = 8u; +let MIX_SOFT_LIGHT = 9u; +let MIX_DIFFERENCE = 10u; +let MIX_EXCLUSION = 11u; +let MIX_HUE = 12u; +let MIX_SATURATION = 13u; +let MIX_COLOR = 14u; +let MIX_LUMINOSITY = 15u; +let MIX_CLIP = 128u; + +fn screen(cb: vec3, cs: vec3) -> vec3 { + return cb + cs - (cb * cs); +} + +fn color_dodge(cb: f32, cs: f32) -> f32 { + if cb == 0.0 { + return 0.0; + } else if cs == 1.0 { + return 1.0; + } else { + return min(1.0, cb / (1.0 - cs)); + } +} + +fn color_burn(cb: f32, cs: f32) -> f32 { + if cb == 1.0 { + return 1.0; + } else if cs == 0.0 { + return 0.0; + } else { + return 1.0 - min(1.0, (1.0 - cb) / cs); + } +} + +fn hard_light(cb: vec3, cs: vec3) -> vec3 { + return mix( + screen(cb, 2.0 * cs - 1.0), + cb * 2.0 * cs, + vec3(cs <= vec3(0.5)) + ); +} + +fn soft_light(cb: vec3, cs: vec3) -> vec3 { + let d = mix( + sqrt(cb), + ((16.0 * cb - vec3(12.0)) * cb + vec3(4.0)) * cb, + vec3(cb <= vec3(0.25)) + ); + return mix( + cb + (2.0 * cs - vec3(1.0)) * (d - cb), + cb - (vec3(1.0) - 2.0 * cs) * cb * (vec3(1.0) - cb), + vec3(cs <= vec3(0.5)) + ); +} + +fn sat(c: vec3) -> f32 { + return max(c.x, max(c.y, c.z)) - min(c.x, min(c.y, c.z)); +} + +fn lum(c: vec3) -> f32 { + let f = vec3(0.3, 0.59, 0.11); + return dot(c, f); +} + +fn clip_color(c: vec3) -> vec3 { + var c = c; + let l = lum(c); + let n = min(c.x, min(c.y, c.z)); + let x = max(c.x, max(c.y, c.z)); + if n < 0.0 { + c = l + (((c - l) * l) / (l - n)); + } + if x > 1.0 { + c = l + (((c - l) * (1.0 - l)) / (x - l)); + } + return c; +} + +fn set_lum(c: vec3, l: f32) -> vec3 { + return clip_color(c + (l - lum(c))); +} + +fn set_sat_inner( + cmin: ptr, + cmid: ptr, + cmax: ptr, + s: f32 +) { + if *cmax > *cmin { + *cmid = ((*cmid - *cmin) * s) / (*cmax - *cmin); + *cmax = s; + } else { + *cmid = 0.0; + *cmax = 0.0; + } + *cmin = 0.0; +} + +fn set_sat(c: vec3, s: f32) -> vec3 { + var r = c.r; + var g = c.g; + var b = c.b; + if r <= g { + if g <= b { + set_sat_inner(&r, &g, &b, s); + } else { + if r <= b { + set_sat_inner(&r, &b, &g, s); + } else { + set_sat_inner(&b, &r, &g, s); + } + } + } else { + if r <= b { + set_sat_inner(&g, &r, &b, s); + } else { + if g <= b { + set_sat_inner(&g, &b, &r, s); + } else { + set_sat_inner(&b, &g, &r, s); + } + } + } + return vec3(r, g, b); +} + +// Blends two RGB colors together. The colors are assumed to be in sRGB +// color space, and this function does not take alpha into account. +fn blend_mix(cb: vec3, cs: vec3, mode: u32) -> vec3 { + var b = vec3(0.0); + switch mode { + // MIX_MULTIPLY + case 1u: { + b = cb * cs; + } + // MIX_SCREEN + case 2u: { + b = screen(cb, cs); + } + // MIX_OVERLAY + case 3u: { + b = hard_light(cs, cb); + } + // MIX_DARKEN + case 4u: { + b = min(cb, cs); + } + // MIX_LIGHTEN + case 5u: { + b = max(cb, cs); + } + // MIX_COLOR_DODGE + case 6u: { + b = vec3(color_dodge(cb.x, cs.x), color_dodge(cb.y, cs.y), color_dodge(cb.z, cs.z)); + } + // MIX_COLOR_BURN + case 7u: { + b = vec3(color_burn(cb.x, cs.x), color_burn(cb.y, cs.y), color_burn(cb.z, cs.z)); + } + // MIX_HARD_LIGHT + case 8u: { + b = hard_light(cb, cs); + } + // MIX_SOFT_LIGHT + case 9u: { + b = soft_light(cb, cs); + } + // MIX_DIFFERENCE + case 10u: { + b = abs(cb - cs); + } + // MIX_EXCLUSION + case 11u: { + b = cb + cs - 2.0 * cb * cs; + } + // MIX_HUE + case 12u: { + b = set_lum(set_sat(cs, sat(cb)), lum(cb)); + } + // MIX_SATURATION + case 13u: { + b = set_lum(set_sat(cb, sat(cs)), lum(cb)); + } + // MIX_COLOR + case 14u: { + b = set_lum(cs, lum(cb)); + } + // MIX_LUMINOSITY + case 15u: { + b = set_lum(cb, lum(cs)); + } + default: { + b = cs; + } + } + return b; +} + +// Composition modes + +let COMPOSE_CLEAR = 0u; +let COMPOSE_COPY = 1u; +let COMPOSE_DEST = 2u; +let COMPOSE_SRC_OVER = 3u; +let COMPOSE_DEST_OVER = 4u; +let COMPOSE_SRC_IN = 5u; +let COMPOSE_DEST_IN = 6u; +let COMPOSE_SRC_OUT = 7u; +let COMPOSE_DEST_OUT = 8u; +let COMPOSE_SRC_ATOP = 9u; +let COMPOSE_DEST_ATOP = 10u; +let COMPOSE_XOR = 11u; +let COMPOSE_PLUS = 12u; +let COMPOSE_PLUS_LIGHTER = 13u; + +// Apply general compositing operation. +// Inputs are separated colors and alpha, output is premultiplied. +fn blend_compose( + cb: vec3, + cs: vec3, + ab: f32, + as_: f32, + mode: u32 +) -> vec4 { + var fa = 0.0; + var fb = 0.0; + switch mode { + // COMPOSE_COPY + case 1u: { + fa = 1.0; + fb = 0.0; + } + // COMPOSE_DEST + case 2u: { + fa = 0.0; + fb = 1.0; + } + // COMPOSE_SRC_OVER + case 3u: { + fa = 1.0; + fb = 1.0 - as_; + } + // COMPOSE_DEST_OVER + case 4u: { + fa = 1.0 - ab; + fb = 1.0; + } + // COMPOSE_SRC_IN + case 5u: { + fa = ab; + fb = 0.0; + } + // COMPOSE_DEST_IN + case 6u: { + fa = 0.0; + fb = as_; + } + // COMPOSE_SRC_OUT + case 7u: { + fa = 1.0 - ab; + fb = 0.0; + } + // COMPOSE_DEST_OUT + case 8u: { + fa = 0.0; + fb = 1.0 - as_; + } + // COMPOSE_SRC_ATOP + case 9u: { + fa = ab; + fb = 1.0 - as_; + } + // COMPOSE_DEST_ATOP + case 10u: { + fa = 1.0 - ab; + fb = as_; + } + // COMPOSE_XOR + case 11u: { + fa = 1.0 - ab; + fb = 1.0 - as_; + } + // COMPOSE_PLUS + case 12u: { + fa = 1.0; + fb = 1.0; + } + // COMPOSE_PLUS_LIGHTER + case 13u: { + return min(vec4(1.0), vec4(as_ * cs + ab * cb, as_ + ab)); + } + default: {} + } + let as_fa = as_ * fa; + let ab_fb = ab * fb; + let co = as_fa * cs + ab_fb * cb; + return vec4(co, as_fa + ab_fb); +} + +// Apply color mixing and composition. Both input and output colors are +// premultiplied RGB. +fn blend_mix_compose(backdrop: vec4, src: vec4, mode: u32) -> vec4 { + let BLEND_DEFAULT = ((MIX_NORMAL << 8u) | COMPOSE_SRC_OVER); + let EPSILON = 1e-15; + if (mode & 0x7fffu) == BLEND_DEFAULT { + // Both normal+src_over blend and clip case + return backdrop * (1.0 - src.a) + src; + } + // Un-premultiply colors for blending + let inv_src_a = 1.0 / (src.a + EPSILON); + var cs = src.rgb * inv_src_a; + let inv_backdrop_a = 1.0 / (backdrop.a + EPSILON); + let cb = backdrop.rgb * inv_backdrop_a; + let mix_mode = mode >> 8u; + let mixed = blend_mix(cb, cs, mix_mode); + cs = mix(cs, mixed, backdrop.a); + let compose_mode = mode & 0xffu; + if compose_mode == COMPOSE_SRC_OVER { + let co = mix(backdrop.rgb, cs, src.a); + return vec4(co, src.a + backdrop.a * (1.0 - src.a)); + } else { + return blend_compose(cb, cs, backdrop.a, src.a, compose_mode); + } +} diff --git a/piet-wgsl/shader/shared/bump.wgsl b/piet-wgsl/shader/shared/bump.wgsl new file mode 100644 index 0000000..b33ffaa --- /dev/null +++ b/piet-wgsl/shader/shared/bump.wgsl @@ -0,0 +1,23 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// TODO: robust memory (failure flags) +struct BumpAllocators { + binning: atomic, + ptcl: atomic, + tile: atomic, + segments: atomic, +} diff --git a/piet-wgsl/shader/shared/clip.wgsl b/piet-wgsl/shader/shared/clip.wgsl new file mode 100644 index 0000000..608e9bf --- /dev/null +++ b/piet-wgsl/shader/shared/clip.wgsl @@ -0,0 +1,14 @@ +struct Bic { + a: u32, + b: u32, +} + +fn bic_combine(x: Bic, y: Bic) -> Bic { + let m = min(x.b, y.a); + return Bic(x.a + y.a - m, x.b + y.b - m); +} + +struct ClipEl { + parent_ix: u32, + bbox: vec4, +} diff --git a/piet-wgsl/shader/shared/config.wgsl b/piet-wgsl/shader/shared/config.wgsl index 704a608..5db894b 100644 --- a/piet-wgsl/shader/shared/config.wgsl +++ b/piet-wgsl/shader/shared/config.wgsl @@ -17,4 +17,29 @@ struct Config { width_in_tiles: u32, height_in_tiles: u32, + + n_drawobj: u32, + n_path: u32, + n_clip: u32, + + // offsets within scene buffer (in u32 units) + // Note: this is a difference from piet-gpu, which is in bytes + pathtag_base: u32, + pathdata_base: u32, + + drawtag_base: u32, + drawdata_base: u32, + + transform_base: u32, + linewidth_base: u32, } + +// Geometry of tiles and bins + +let TILE_WIDTH = 16u; +let TILE_HEIGHT = 16u; +// Number of tiles per bin +let N_TILE_X = 16u; +let N_TILE_Y = 16u; +//let N_TILE = N_TILE_X * N_TILE_Y; +let N_TILE = 256u; diff --git a/piet-wgsl/shader/shared/cubic.wgsl b/piet-wgsl/shader/shared/cubic.wgsl new file mode 100644 index 0000000..ffb85b4 --- /dev/null +++ b/piet-wgsl/shader/shared/cubic.wgsl @@ -0,0 +1,25 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +struct Cubic { + p0: vec2, + p1: vec2, + p2: vec2, + p3: vec2, + path_ix: u32, + // Needed? + padding: u32, +} diff --git a/piet-wgsl/shader/shared/drawtag.wgsl b/piet-wgsl/shader/shared/drawtag.wgsl new file mode 100644 index 0000000..749b211 --- /dev/null +++ b/piet-wgsl/shader/shared/drawtag.wgsl @@ -0,0 +1,60 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// The DrawMonoid is computed as a prefix sum to aid in decoding +// the variable-length encoding of draw objects. +struct DrawMonoid { + // The number of paths preceding this draw object. + path_ix: u32, + // The number of clip operations preceding this draw object. + clip_ix: u32, + // The offset of the encoded draw object in the scene (u32s). + scene_offset: u32, + // The offset of the associated info. + info_offset: u32, +} + +// Each draw object has a 32-bit draw tag, which is a bit-packed +// version of the draw monoid. +let DRAWTAG_NOP = 0u; +let DRAWTAG_FILL_COLOR = 0x44u; +let DRAWTAG_FILL_LIN_GRADIENT = 0x114u; +let DRAWTAG_FILL_RAD_GRADIENT = 0x2dcu; +let DRAWTAG_FILL_IMAGE = 0x48u; +let DRAWTAG_BEGIN_CLIP = 0x05u; +let DRAWTAG_END_CLIP = 0x25u; + +fn draw_monoid_identity() -> DrawMonoid { + return DrawMonoid(); +} + +fn combine_draw_monoid(a: DrawMonoid, b: DrawMonoid) -> DrawMonoid { + var c: DrawMonoid; + c.path_ix = a.path_ix + b.path_ix; + c.clip_ix = a.clip_ix + b.clip_ix; + c.scene_offset = a.scene_offset + b.scene_offset; + c.info_offset = a.info_offset + b.info_offset; + return c; +} + +fn map_draw_tag(tag_word: u32) -> DrawMonoid { + var c: DrawMonoid; + c.path_ix = u32(tag_word != DRAWTAG_NOP); + c.clip_ix = tag_word & 1u; + c.scene_offset = (tag_word >> 2u) & 0x07u; + c.info_offset = (tag_word >> 6u) & 0x0fu; + return c; +} diff --git a/piet-wgsl/shader/shared/pathtag.wgsl b/piet-wgsl/shader/shared/pathtag.wgsl index e4cfda3..8e46979 100644 --- a/piet-wgsl/shader/shared/pathtag.wgsl +++ b/piet-wgsl/shader/shared/pathtag.wgsl @@ -16,9 +16,13 @@ struct TagMonoid { trans_ix: u32, + // TODO: I don't think pathseg_ix is used. pathseg_ix: u32, pathseg_offset: u32, - // Note: piet-gpu has linewidth and path, but not needed here +#ifdef full + linewidth_ix: u32, + path_ix: u32, +#endif } let PATH_TAG_SEG_TYPE = 3u; @@ -26,15 +30,14 @@ let PATH_TAG_LINETO = 1u; let PATH_TAG_QUADTO = 2u; let PATH_TAG_CUBICTO = 3u; let PATH_TAG_F32 = 8u; -let PATH_TAG_PATH = 0x10u; let PATH_TAG_TRANSFORM = 0x20u; +#ifdef full +let PATH_TAG_PATH = 0x10u; +let PATH_TAG_LINEWIDTH = 0x40u; +#endif fn tag_monoid_identity() -> TagMonoid { - var c: TagMonoid; - c.trans_ix = 0u; - c.pathseg_ix = 0u; - c.pathseg_offset = 0u; - return c; + return TagMonoid(); } fn combine_tag_monoid(a: TagMonoid, b: TagMonoid) -> TagMonoid { @@ -42,6 +45,10 @@ fn combine_tag_monoid(a: TagMonoid, b: TagMonoid) -> TagMonoid { c.trans_ix = a.trans_ix + b.trans_ix; c.pathseg_ix = a.pathseg_ix + b.pathseg_ix; c.pathseg_offset = a.pathseg_offset + b.pathseg_offset; +#ifdef full + c.linewidth_ix = a.linewidth_ix + b.linewidth_ix; + c.path_ix = a.path_ix + b.path_ix; +#endif return c; } @@ -55,5 +62,9 @@ fn reduce_tag(tag_word: u32) -> TagMonoid { a += a >> 8u; a += a >> 16u; c.pathseg_offset = a & 0xffu; +#ifdef full + c.path_ix = countOneBits(tag_word & (PATH_TAG_PATH * 0x1010101u)); + c.linewidth_ix = countOneBits(tag_word & (PATH_TAG_LINEWIDTH * 0x1010101u)); +#endif return c; } diff --git a/piet-wgsl/shader/shared/ptcl.wgsl b/piet-wgsl/shader/shared/ptcl.wgsl new file mode 100644 index 0000000..92316cc --- /dev/null +++ b/piet-wgsl/shader/shared/ptcl.wgsl @@ -0,0 +1,72 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// Layout of per-tile command list +// Initial allocation, in u32's. +let PTCL_INITIAL_ALLOC = 64u; +let PTCL_INCREMENT = 256u; + +// Amount of space taken by jump +let PTCL_HEADROOM = 2u; + +// Tags for PTCL commands +let CMD_END = 0u; +let CMD_FILL = 1u; +let CMD_STROKE = 2u; +let CMD_SOLID = 3u; +let CMD_COLOR = 5u; +let CMD_LIN_GRAD = 6u; +let CMD_RAD_GRAD = 7u; +let CMD_BEGIN_CLIP = 9u; +let CMD_END_CLIP = 10u; +let CMD_JUMP = 11u; + +// The individual PTCL structs are written here, but read/write is by +// hand in the relevant shaders + +struct CmdFill { + tile: u32, + backdrop: i32, +} + +struct CmdStroke { + tile: u32, + half_width: f32, +} + +struct CmdJump { + new_ix: u32, +} + +struct CmdColor { + rgba_color: u32, +} + +struct CmdLinGrad { + index: u32, + line_x: f32, + line_y: f32, + line_c: f32, +} + +struct CmdRadGrad { + index: u32, + matrx: vec4, + xlat: vec2, + c1: vec2, + ra: f32, + roff: f32, +} diff --git a/piet-wgsl/shader/shared/tile.wgsl b/piet-wgsl/shader/shared/tile.wgsl new file mode 100644 index 0000000..f575e50 --- /dev/null +++ b/piet-wgsl/shader/shared/tile.wgsl @@ -0,0 +1,29 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// Common datatypes for path and tile intermediate info. + +struct Path { + // bounding box in tiles + bbox: vec4, + // offset (in u32's) to tile rectangle + tiles: u32, +} + +struct Tile { + backdrop: i32, + segments: u32, +} \ No newline at end of file diff --git a/piet-wgsl/shader/tile_alloc.wgsl b/piet-wgsl/shader/tile_alloc.wgsl new file mode 100644 index 0000000..56771d6 --- /dev/null +++ b/piet-wgsl/shader/tile_alloc.wgsl @@ -0,0 +1,114 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// Tile allocation (and zeroing of tiles) + +#import config +#import bump +#import drawtag +#import tile + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var scene: array; + +@group(0) @binding(2) +var draw_bboxes: array>; + +@group(0) @binding(3) +var bump: BumpAllocators; + +@group(0) @binding(4) +var paths: array; + +@group(0) @binding(5) +var tiles: array; + +let WG_SIZE = 256u; + +var sh_tile_count: array; +var sh_tile_offset: u32; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, +) { + // scale factors useful for converting coordinates to tiles + // TODO: make into constants + let SX = 1.0 / f32(TILE_WIDTH); + let SY = 1.0 / f32(TILE_HEIGHT); + + let drawobj_ix = global_id.x; + var drawtag = DRAWTAG_NOP; + if drawobj_ix < config.n_drawobj { + drawtag = scene[config.drawtag_base + drawobj_ix]; + } + var x0 = 0; + var y0 = 0; + var x1 = 0; + var y1 = 0; + if drawtag != DRAWTAG_NOP && drawtag != DRAWTAG_END_CLIP { + let bbox = draw_bboxes[drawobj_ix]; + x0 = i32(floor(bbox.x * SX)); + y0 = i32(floor(bbox.y * SY)); + x1 = i32(ceil(bbox.z * SX)); + y1 = i32(ceil(bbox.w * SY)); + } + let ux0 = u32(clamp(x0, 0, i32(config.width_in_tiles))); + let uy0 = u32(clamp(y0, 0, i32(config.height_in_tiles))); + let ux1 = u32(clamp(x1, 0, i32(config.width_in_tiles))); + let uy1 = u32(clamp(y1, 0, i32(config.height_in_tiles))); + let tile_count = (ux1 - ux0) * (uy1 - uy0); + var total_tile_count = tile_count; + sh_tile_count[local_id.x] = tile_count; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + workgroupBarrier(); + if local_id.x >= (1u << i) { + total_tile_count += sh_tile_count[local_id.x - (1u << i)]; + } + workgroupBarrier(); + sh_tile_count[local_id.x] = total_tile_count; + } + if local_id.x == WG_SIZE - 1u { + paths[drawobj_ix].tiles = atomicAdd(&bump.tile, sh_tile_count[WG_SIZE - 1u]); + } + // Using storage barriers is a workaround for what appears to be a miscompilation + // when a normal workgroup-shared variable is used to broadcast the value. + storageBarrier(); + let tile_offset = paths[drawobj_ix | (WG_SIZE - 1u)].tiles; + storageBarrier(); + if drawobj_ix < config.n_drawobj { + let tile_subix = select(0u, sh_tile_count[local_id.x - 1u], local_id.x > 0u); + let bbox = vec4(ux0, uy0, ux1, uy1); + let path = Path(bbox, tile_offset + tile_subix); + paths[drawobj_ix] = path; + } + + // zero allocated memory + // Note: if the number of draw objects is small, utilization will be poor. + // There are two things that can be done to improve that. One would be a + // separate (indirect) dispatch. Another would be to have each workgroup + // process fewer draw objects than the number of threads in the wg. + let total_count = sh_tile_count[WG_SIZE - 1u]; + for (var i = local_id.x; i < total_count; i += WG_SIZE) { + // Note: could format output buffer as u32 for even better load + // balancing, as does piet-gpu. + tiles[tile_offset + i] = Tile(0, 0u); + } +} diff --git a/piet-wgsl/src/debug.rs b/piet-wgsl/src/debug.rs new file mode 100644 index 0000000..964c8ee --- /dev/null +++ b/piet-wgsl/src/debug.rs @@ -0,0 +1,5 @@ +#![allow(dead_code)] + +pub mod clip; +pub mod draw; +pub mod fine; diff --git a/piet-wgsl/src/debug/clip.rs b/piet-wgsl/src/debug/clip.rs new file mode 100644 index 0000000..8b2a0e3 --- /dev/null +++ b/piet-wgsl/src/debug/clip.rs @@ -0,0 +1,13 @@ +use bytemuck::{Pod, Zeroable}; + +#[derive(Copy, Clone, Debug, Zeroable, Pod)] +#[repr(C)] +pub struct ClipEl { + pub parent_ix: u32, + pub pad: [u32; 3], + pub bbox: [f32; 4], +} + +pub fn parse_clip_els(data: &[u8]) -> Vec { + Vec::from(bytemuck::cast_slice(data)) +} diff --git a/piet-wgsl/src/debug/draw.rs b/piet-wgsl/src/debug/draw.rs new file mode 100644 index 0000000..ab56ed5 --- /dev/null +++ b/piet-wgsl/src/debug/draw.rs @@ -0,0 +1,14 @@ +use bytemuck::{Pod, Zeroable}; + +#[derive(Copy, Clone, Debug, Zeroable, Pod)] +#[repr(C)] +pub struct DrawMonoid { + pub path_ix: u32, + pub clip_ix: u32, + pub scene_offset: u32, + pub info_offset: u32, +} + +pub fn parse_draw_monoids(data: &[u8]) -> Vec { + Vec::from(bytemuck::cast_slice(data)) +} diff --git a/piet-wgsl/src/debug/fine.rs b/piet-wgsl/src/debug/fine.rs new file mode 100644 index 0000000..d9f05f0 --- /dev/null +++ b/piet-wgsl/src/debug/fine.rs @@ -0,0 +1,153 @@ +#[derive(Copy, Clone, Debug)] +#[repr(C)] +pub struct Fill { + pub tile: u32, + pub backdrop: i32, +} + +#[derive(Copy, Clone, Debug)] +#[repr(C)] +pub struct Stroke { + pub tile: u32, + pub half_width: f32, +} + +#[derive(Copy, Clone, Debug)] +#[repr(C)] +pub struct Color { + abgr: [u8; 4], +} + +#[derive(Copy, Clone, Debug)] +#[repr(C)] +pub struct LinGrad { + pub index: u32, + pub line_x: f32, + pub line_y: f32, + pub line_c: f32, +} + +#[derive(Copy, Clone, Debug)] +#[repr(C)] +pub struct RadGrad { + pub index: u32, + pub matrix: [f32; 4], + pub xlat: [f32; 2], + pub c1: [f32; 2], + pub ra: f32, + pub roff: f32, +} + +#[derive(Copy, Clone, Debug)] +pub enum Command { + Fill(Fill), + Stroke(Stroke), + Solid, + Color(Color), + LinGrad(LinGrad), + RadGrad(RadGrad), + BeginClip, + EndClip(u32), + End, +} + +const PTCL_INITIAL_ALLOC: usize = 64; + +#[derive(Debug)] +pub struct CommandList { + pub tiles: Vec<(u32, u32, Vec)>, +} + +impl CommandList { + pub fn parse(width: usize, height: usize, ptcl: &[u8]) -> Self { + let mut tiles = vec![]; + let width_tiles = width / 16; + let height_tiles = height / 16; + for y in 0..height_tiles { + for x in 0..width_tiles { + let tile_ix = y * width_tiles + x; + let ix = tile_ix * PTCL_INITIAL_ALLOC; + let commands = parse_commands(ptcl, ix); + if !commands.is_empty() { + tiles.push((x as u32, y as u32, commands)); + } + } + } + Self { tiles } + } +} + +fn parse_commands(ptcl: &[u8], mut ix: usize) -> Vec { + let mut commands = vec![]; + let words: &[u32] = bytemuck::cast_slice(ptcl); + while ix < words.len() { + let tag = words[ix]; + ix += 1; + match tag { + 0 => break, + 1 => { + commands.push(Command::Fill(Fill { + tile: words[ix], + backdrop: words[ix + 1] as i32, + })); + ix += 2; + } + 2 => { + commands.push(Command::Stroke(Stroke { + tile: words[ix], + half_width: bytemuck::cast(words[ix + 1]), + })); + ix += 2; + } + 3 => { + commands.push(Command::Solid); + } + 5 => { + commands.push(Command::Color(Color { + abgr: bytemuck::cast(words[ix]), + })); + ix += 1; + } + 6 => { + commands.push(Command::LinGrad(LinGrad { + index: words[ix], + line_x: bytemuck::cast(words[ix + 1]), + line_y: bytemuck::cast(words[ix + 2]), + line_c: bytemuck::cast(words[ix + 3]), + })); + ix += 4; + } + 7 => { + let matrix = [ + bytemuck::cast(words[ix + 1]), + bytemuck::cast(words[ix + 2]), + bytemuck::cast(words[ix + 3]), + bytemuck::cast(words[ix + 4]), + ]; + let xlat = [bytemuck::cast(words[ix + 5]), bytemuck::cast(words[ix + 6])]; + let c1 = [bytemuck::cast(words[ix + 7]), bytemuck::cast(words[ix + 8])]; + commands.push(Command::RadGrad(RadGrad { + index: words[ix], + matrix, + xlat, + c1, + ra: bytemuck::cast(words[ix + 9]), + roff: bytemuck::cast(words[ix + 10]), + })); + ix += 11; + } + 9 => { + commands.push(Command::BeginClip); + } + 10 => { + commands.push(Command::EndClip(words[ix])); + ix += 1; + } + 11 => { + ix = words[ix] as usize; + } + _ => {} + } + } + commands +} diff --git a/piet-wgsl/src/engine.rs b/piet-wgsl/src/engine.rs index 2be08db..049fc80 100644 --- a/piet-wgsl/src/engine.rs +++ b/piet-wgsl/src/engine.rs @@ -17,7 +17,7 @@ use std::{ borrow::Cow, collections::{hash_map::Entry, HashMap}, - num::NonZeroU64, + num::{NonZeroU32, NonZeroU64}, sync::atomic::{AtomicU64, Ordering}, }; @@ -25,7 +25,8 @@ use futures_intrusive::channel::shared::GenericOneshotReceiver; use parking_lot::RawMutex; use wgpu::{ util::DeviceExt, BindGroup, BindGroupLayout, Buffer, BufferAsyncError, BufferSlice, BufferView, - ComputePipeline, Device, Queue, + ComputePipeline, Device, Queue, Texture, TextureAspect, TextureFormat, TextureUsages, + TextureView, TextureViewDimension, }; pub type Error = Box; @@ -58,12 +59,27 @@ pub struct BufProxy { id: Id, } +#[derive(Clone, Copy)] +pub struct ImageProxy { + width: u32, + height: u32, + // TODO: format + id: Id, +} + +#[derive(Clone, Copy)] +pub enum ResourceProxy { + Buf(BufProxy), + Image(ImageProxy), +} + pub enum Command { Upload(BufProxy, Vec), + UploadImage(ImageProxy, Vec), // Discussion question: third argument is vec of resources? // Maybe use tricks to make more ergonomic? // Alternative: provide bufs & images as separate sequences, like piet-gpu. - Dispatch(ShaderId, (u32, u32, u32), Vec), + Dispatch(ShaderId, (u32, u32, u32), Vec), Download(BufProxy), Clear(BufProxy, u64, Option), } @@ -92,6 +108,7 @@ pub enum BindType { #[derive(Default)] struct BindMap { buf_map: HashMap, + image_map: HashMap, } impl Engine { @@ -132,6 +149,16 @@ impl Engine { }, count: None, }, + BindType::ImageRead => wgpu::BindGroupLayoutEntry { + binding: i as u32, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, _ => todo!(), }) .collect::>(); @@ -182,7 +209,60 @@ impl Engine { }); bind_map.insert_buf(buf_proxy.id, buf); } + Command::UploadImage(image_proxy, bytes) => { + let buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: None, + contents: &bytes, + usage: wgpu::BufferUsages::COPY_SRC, + }); + let texture = device.create_texture(&wgpu::TextureDescriptor { + label: None, + size: wgpu::Extent3d { + width: image_proxy.width, + height: image_proxy.height, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + usage: TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST, + format: TextureFormat::Rgba8Unorm, + }); + let texture_view = texture.create_view(&wgpu::TextureViewDescriptor { + label: None, + dimension: Some(TextureViewDimension::D2), + aspect: TextureAspect::All, + mip_level_count: None, + base_mip_level: 0, + base_array_layer: 0, + array_layer_count: None, + format: Some(TextureFormat::Rgba8Unorm), + }); + encoder.copy_buffer_to_texture( + wgpu::ImageCopyBuffer { + buffer: &buf, + layout: wgpu::ImageDataLayout { + offset: 0, + bytes_per_row: NonZeroU32::new(image_proxy.width * 4), + rows_per_image: None, + }, + }, + wgpu::ImageCopyTexture { + texture: &texture, + mip_level: 0, + origin: wgpu::Origin3d { x: 0, y: 0, z: 0 }, + aspect: TextureAspect::All, + }, + wgpu::Extent3d { + width: image_proxy.width, + height: image_proxy.height, + depth_or_array_layers: 1, + }, + ); + bind_map.insert_image(image_proxy.id, texture, texture_view) + } Command::Dispatch(shader_id, wg_size, bindings) => { + println!("dispatching {:?} with {} bindings", wg_size, bindings.len()); let shader = &self.shaders[shader_id.0]; let bind_group = bind_map.create_bind_group(device, &shader.bind_group_layout, bindings)?; @@ -225,13 +305,28 @@ impl Recording { buf_proxy } - pub fn dispatch( + pub fn upload_image( &mut self, - shader: ShaderId, - wg_size: (u32, u32, u32), - resources: impl Into>, - ) { - self.push(Command::Dispatch(shader, wg_size, resources.into())); + width: u32, + height: u32, + data: impl Into>, + ) -> ImageProxy { + let data = data.into(); + let image_proxy = ImageProxy::new(width, height); + self.push(Command::UploadImage(image_proxy, data)); + image_proxy + } + + pub fn dispatch(&mut self, shader: ShaderId, wg_size: (u32, u32, u32), resources: R) + where + R: IntoIterator, + R::Item: Into, + { + self.push(Command::Dispatch( + shader, + wg_size, + resources.into_iter().map(|r| r.into()).collect(), + )); } pub fn download(&mut self, buf: BufProxy) { @@ -246,7 +341,53 @@ impl Recording { impl BufProxy { pub fn new(size: u64) -> Self { let id = Id::next(); - BufProxy { id, size } + BufProxy { + id, + size: size.max(16), + } + } +} + +impl ImageProxy { + pub fn new(width: u32, height: u32) -> Self { + let id = Id::next(); + ImageProxy { width, height, id } + } +} + +impl ResourceProxy { + pub fn new_buf(size: u64) -> Self { + Self::Buf(BufProxy::new(size)) + } + + pub fn new_image(width: u32, height: u32) -> Self { + Self::Image(ImageProxy::new(width, height)) + } + + pub fn as_buf(&self) -> Option<&BufProxy> { + match self { + Self::Buf(proxy) => Some(&proxy), + _ => None, + } + } + + pub fn as_image(&self) -> Option<&ImageProxy> { + match self { + Self::Image(proxy) => Some(&proxy), + _ => None, + } + } +} + +impl From for ResourceProxy { + fn from(value: BufProxy) -> Self { + Self::Buf(value) + } +} + +impl From for ResourceProxy { + fn from(value: ImageProxy) -> Self { + Self::Image(value) } } @@ -263,34 +404,79 @@ impl BindMap { self.buf_map.insert(id, buf); } + fn insert_image(&mut self, id: Id, image: Texture, image_view: TextureView) { + self.image_map.insert(id, (image, image_view)); + } + fn create_bind_group( &mut self, device: &Device, layout: &BindGroupLayout, - bindings: &[BufProxy], + bindings: &[ResourceProxy], ) -> Result { for proxy in bindings { - if let Entry::Vacant(v) = self.buf_map.entry(proxy.id) { - let buf = device.create_buffer(&wgpu::BufferDescriptor { - label: None, - size: proxy.size, - usage: wgpu::BufferUsages::STORAGE - | wgpu::BufferUsages::COPY_DST - | wgpu::BufferUsages::COPY_SRC, - mapped_at_creation: false, - }); - v.insert(buf); + match proxy { + ResourceProxy::Buf(proxy) => { + if let Entry::Vacant(v) = self.buf_map.entry(proxy.id) { + let buf = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: proxy.size, + usage: wgpu::BufferUsages::STORAGE + | wgpu::BufferUsages::COPY_DST + | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + v.insert(buf); + } + } + ResourceProxy::Image(proxy) => { + if let Entry::Vacant(v) = self.image_map.entry(proxy.id) { + let texture = device.create_texture(&wgpu::TextureDescriptor { + label: None, + size: wgpu::Extent3d { + width: proxy.width, + height: proxy.height, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + usage: TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST, + format: TextureFormat::Rgba8Unorm, + }); + let texture_view = texture.create_view(&wgpu::TextureViewDescriptor { + label: None, + dimension: Some(TextureViewDimension::D2), + aspect: TextureAspect::All, + mip_level_count: None, + base_mip_level: 0, + base_array_layer: 0, + array_layer_count: None, + format: Some(TextureFormat::Rgba8Unorm), + }); + v.insert((texture, texture_view)); + } + } } } let entries = bindings .iter() .enumerate() - .map(|(i, proxy)| { - let buf = self.buf_map.get(&proxy.id).unwrap(); - Ok(wgpu::BindGroupEntry { - binding: i as u32, - resource: buf.as_entire_binding(), - }) + .map(|(i, proxy)| match proxy { + ResourceProxy::Buf(proxy) => { + let buf = self.buf_map.get(&proxy.id).unwrap(); + Ok(wgpu::BindGroupEntry { + binding: i as u32, + resource: buf.as_entire_binding(), + }) + } + ResourceProxy::Image(proxy) => { + let texture = self.image_map.get(&proxy.id).unwrap(); + Ok(wgpu::BindGroupEntry { + binding: i as u32, + resource: wgpu::BindingResource::TextureView(&texture.1), + }) + } }) .collect::, Error>>()?; let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { diff --git a/piet-wgsl/src/main.rs b/piet-wgsl/src/main.rs index 5cf960d..4e10486 100644 --- a/piet-wgsl/src/main.rs +++ b/piet-wgsl/src/main.rs @@ -20,11 +20,12 @@ use std::{fs::File, io::BufWriter}; use engine::Engine; -use render::render; -use test_scene::dump_scene_info; -use wgpu::{Device, Queue}; +use wgpu::{Device, Limits, Queue}; +mod debug; mod engine; +mod pico_svg; +mod ramp; mod render; mod shaders; mod test_scene; @@ -33,12 +34,14 @@ async fn run() -> Result<(), Box> { let instance = wgpu::Instance::new(wgpu::Backends::PRIMARY); let adapter = instance.request_adapter(&Default::default()).await.unwrap(); let features = adapter.features(); + let mut limits = Limits::default(); + limits.max_storage_buffers_per_shader_stage = 16; let (device, queue) = adapter .request_device( &wgpu::DeviceDescriptor { label: None, features: features & wgpu::Features::TIMESTAMP_QUERY, - limits: Default::default(), + limits, }, None, ) @@ -49,25 +52,46 @@ async fn run() -> Result<(), Box> { Ok(()) } +fn dump_buf(buf: &[u32]) { + for (i, val) in buf.iter().enumerate() { + if *val != 0 { + let lo = val & 0x7fff_ffff; + if lo >= 0x3000_0000 && lo < 0x5000_0000 { + println!("{}: {:x} {}", i, val, f32::from_bits(*val)); + } else { + println!("{}: {:x}", i, val); + } + } + } +} + async fn do_render( device: &Device, queue: &Queue, engine: &mut Engine, ) -> Result<(), Box> { + #[allow(unused)] let shaders = shaders::init_shaders(device, engine)?; + let full_shaders = shaders::full_shaders(device, engine)?; let scene = test_scene::gen_test_scene(); - dump_scene_info(&scene); - let (recording, buf) = render(&scene, &shaders); + //test_scene::dump_scene_info(&scene); + //let (recording, buf) = render::render(&scene, &shaders); + let (recording, buf) = render::render_full(&scene, &full_shaders); let downloads = engine.run_recording(&device, &queue, &recording)?; let mapped = downloads.map(); device.poll(wgpu::Maintain::Wait); let buf = mapped.get_mapped(buf).await?; - let file = File::create("image.png")?; - let w = BufWriter::new(file); - let encoder = png::Encoder::new(w, 1024, 1024); - let mut writer = encoder.write_header()?; - writer.write_image_data(&buf)?; + if false { + dump_buf(bytemuck::cast_slice(&buf)); + } else { + let file = File::create("image.png")?; + let w = BufWriter::new(file); + let mut encoder = png::Encoder::new(w, 1024, 1024); + encoder.set_color(png::ColorType::Rgba); + let mut writer = encoder.write_header()?; + writer.write_image_data(&buf)?; + } Ok(()) } diff --git a/piet-wgsl/src/pico_svg.rs b/piet-wgsl/src/pico_svg.rs new file mode 100644 index 0000000..eebe3ec --- /dev/null +++ b/piet-wgsl/src/pico_svg.rs @@ -0,0 +1,140 @@ +//! A loader for a tiny fragment of SVG + +use std::str::FromStr; + +use roxmltree::{Document, Node}; + +use kurbo::{Affine, BezPath}; + +use piet_scene::Color; + +pub struct PicoSvg { + pub items: Vec, +} + +pub enum Item { + Fill(FillItem), + Stroke(StrokeItem), +} + +pub struct StrokeItem { + pub width: f64, + pub color: Color, + pub path: BezPath, +} + +pub struct FillItem { + pub color: Color, + pub path: BezPath, +} + +struct Parser<'a> { + scale: f64, + items: &'a mut Vec, +} + +impl PicoSvg { + pub fn load(xml_string: &str, scale: f64) -> Result> { + let doc = Document::parse(xml_string)?; + let root = doc.root_element(); + let mut items = Vec::new(); + let mut parser = Parser::new(&mut items, scale); + for node in root.children() { + parser.rec_parse(node)?; + } + Ok(PicoSvg { items }) + } +} + +impl<'a> Parser<'a> { + fn new(items: &'a mut Vec, scale: f64) -> Parser<'a> { + Parser { scale, items } + } + + fn rec_parse(&mut self, node: Node) -> Result<(), Box> { + let transform = if self.scale >= 0.0 { + Affine::scale(self.scale) + } else { + Affine::new([-self.scale, 0.0, 0.0, self.scale, 0.0, 1536.0]) + }; + if node.is_element() { + match node.tag_name().name() { + "g" => { + for child in node.children() { + self.rec_parse(child)?; + } + } + "path" => { + let d = node.attribute("d").ok_or("missing 'd' attribute")?; + let bp = BezPath::from_svg(d)?; + let path = transform * bp; + // TODO: default fill color is black, but this is overridden in tiger to this logic. + if let Some(fill_color) = node.attribute("fill") { + if fill_color != "none" { + let color = parse_color(fill_color); + let color = modify_opacity(color, "fill-opacity", node); + self.items.push(Item::Fill(FillItem { + color, + path: path.clone(), + })); + } + } + if let Some(stroke_color) = node.attribute("stroke") { + if stroke_color != "none" { + let width = self.scale.abs() + * f64::from_str( + node.attribute("stroke-width").ok_or("missing width")?, + )?; + let color = parse_color(stroke_color); + let color = modify_opacity(color, "stroke-opacity", node); + self.items + .push(Item::Stroke(StrokeItem { width, color, path })); + } + } + } + _ => (), + } + } + Ok(()) + } +} + +fn parse_color(color: &str) -> Color { + if color.as_bytes()[0] == b'#' { + let mut hex = u32::from_str_radix(&color[1..], 16).unwrap(); + if color.len() == 4 { + hex = (hex >> 8) * 0x110000 + ((hex >> 4) & 0xf) * 0x1100 + (hex & 0xf) * 0x11; + } + let rgba = (hex << 8) + 0xff; + let (r, g, b, a) = ( + (rgba >> 24 & 255) as u8, + ((rgba >> 16) & 255) as u8, + ((rgba >> 8) & 255) as u8, + (rgba & 255) as u8, + ); + Color::rgba8(r, g, b, a) + } else if color.starts_with("rgb(") { + let mut iter = color[4..color.len() - 1].split(','); + let r = u8::from_str(iter.next().unwrap()).unwrap(); + let g = u8::from_str(iter.next().unwrap()).unwrap(); + let b = u8::from_str(iter.next().unwrap()).unwrap(); + Color::rgb8(r, g, b) + } else { + Color::rgba8(255, 0, 255, 0x80) + } +} + +fn modify_opacity(mut color: Color, attr_name: &str, node: Node) -> Color { + if let Some(opacity) = node.attribute(attr_name) { + let alpha = if opacity.ends_with("%") { + let pctg = opacity[..opacity.len() - 1].parse().unwrap_or(100.0); + pctg * 0.01 + } else { + opacity.parse().unwrap_or(1.0) + } as f64; + color.a = (alpha.min(1.0).max(0.0) * 255.0).round() as u8; + color + } else { + color + } +} diff --git a/piet-wgsl/src/ramp.rs b/piet-wgsl/src/ramp.rs new file mode 100644 index 0000000..a26c3d9 --- /dev/null +++ b/piet-wgsl/src/ramp.rs @@ -0,0 +1,137 @@ +use piet_scene::{Color, GradientStop, GradientStops}; + +use std::collections::HashMap; + +const N_SAMPLES: usize = 512; +const RETAINED_COUNT: usize = 64; + +#[derive(Default)] +pub struct RampCache { + epoch: u64, + map: HashMap, + data: Vec, +} + +impl RampCache { + pub fn advance(&mut self) { + self.epoch += 1; + if self.map.len() > RETAINED_COUNT { + self.map + .retain(|_key, value| value.0 < RETAINED_COUNT as u32); + self.data.truncate(RETAINED_COUNT * N_SAMPLES); + } + } + + pub fn add(&mut self, stops: &[GradientStop]) -> u32 { + if let Some(entry) = self.map.get_mut(stops) { + entry.1 = self.epoch; + entry.0 + } else if self.map.len() < RETAINED_COUNT { + let id = (self.data.len() / N_SAMPLES) as u32; + self.data.extend(make_ramp(stops)); + self.map.insert(stops.into(), (id, self.epoch)); + id + } else { + let mut reuse = None; + for (stops, (id, epoch)) in &self.map { + if *epoch + 2 < self.epoch { + reuse = Some((stops.to_owned(), *id)); + break; + } + } + if let Some((old_stops, id)) = reuse { + self.map.remove(&old_stops); + let start = id as usize * N_SAMPLES; + for (dst, src) in self.data[start..start + N_SAMPLES] + .iter_mut() + .zip(make_ramp(stops)) + { + *dst = src; + } + self.map.insert(stops.into(), (id, self.epoch)); + id + } else { + let id = (self.data.len() / N_SAMPLES) as u32; + self.data.extend(make_ramp(stops)); + self.map.insert(stops.into(), (id, self.epoch)); + id + } + } + } + + pub fn data(&self) -> &[u32] { + &self.data + } + + pub fn width(&self) -> u32 { + N_SAMPLES as u32 + } + + pub fn height(&self) -> u32 { + (self.data.len() / N_SAMPLES) as u32 + } +} + +fn make_ramp<'a>(stops: &'a [GradientStop]) -> impl Iterator + 'a { + let mut last_u = 0.0; + let mut last_c = ColorF64::from_color(stops[0].color); + let mut this_u = last_u; + let mut this_c = last_c; + let mut j = 0; + (0..N_SAMPLES).map(move |i| { + let u = (i as f64) / (N_SAMPLES - 1) as f64; + while u > this_u { + last_u = this_u; + last_c = this_c; + if let Some(s) = stops.get(j + 1) { + this_u = s.offset as f64; + this_c = ColorF64::from_color(s.color); + j += 1; + } else { + break; + } + } + let du = this_u - last_u; + let c = if du < 1e-9 { + this_c + } else { + last_c.lerp(&this_c, (u - last_u) / du) + }; + c.to_premul_u32() + }) +} + +#[derive(Copy, Clone, Debug)] +struct ColorF64([f64; 4]); + +impl ColorF64 { + fn from_color(color: Color) -> Self { + Self([ + color.r as f64 / 255.0, + color.g as f64 / 255.0, + color.b as f64 / 255.0, + color.a as f64 / 255.0, + ]) + } + + fn lerp(&self, other: &Self, a: f64) -> Self { + fn l(x: f64, y: f64, a: f64) -> f64 { + x * (1.0 - a) + y * a + } + Self([ + l(self.0[0], other.0[0], a), + l(self.0[1], other.0[1], a), + l(self.0[2], other.0[2], a), + l(self.0[3], other.0[3], a), + ]) + } + + fn to_premul_u32(&self) -> u32 { + let a = self.0[3].min(1.0).max(0.0); + let r = ((self.0[0] * a).min(1.0).max(0.0) * 255.0) as u32; + let g = ((self.0[1] * a).min(1.0).max(0.0) * 255.0) as u32; + let b = ((self.0[2] * a).min(1.0).max(0.0) * 255.0) as u32; + let a = (a * 255.0) as u32; + r | (g << 8) | (b << 16) | (a << 24) + } +} diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index 3eec030..ee13694 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -4,17 +4,39 @@ use bytemuck::{Pod, Zeroable}; use piet_scene::Scene; use crate::{ - engine::{BufProxy, Recording}, - shaders::{self, Shaders}, + engine::{BufProxy, Recording, ResourceProxy}, + shaders::{self, FullShaders, Shaders}, }; const TAG_MONOID_SIZE: u64 = 12; +const TAG_MONOID_FULL_SIZE: u64 = 20; +const PATH_BBOX_SIZE: u64 = 24; +const CUBIC_SIZE: u64 = 40; +const DRAWMONOID_SIZE: u64 = 16; +const MAX_DRAWINFO_SIZE: u64 = 44; +const CLIP_BIC_SIZE: u64 = 8; +const CLIP_EL_SIZE: u64 = 32; +const CLIP_INP_SIZE: u64 = 4; +const CLIP_BBOX_SIZE: u64 = 16; +const PATH_SIZE: u64 = 32; +const DRAW_BBOX_SIZE: u64 = 16; +const BUMP_SIZE: u64 = 16; +const BIN_HEADER_SIZE: u64 = 8; #[repr(C)] -#[derive(Clone, Copy, Zeroable, Pod)] +#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] struct Config { width_in_tiles: u32, height_in_tiles: u32, + n_drawobj: u32, + n_path: u32, + n_clip: u32, + pathtag_base: u32, + pathdata_base: u32, + drawtag_base: u32, + drawdata_base: u32, + transform_base: u32, + linewidth_base: u32, } #[repr(C)] @@ -26,22 +48,39 @@ pub struct PathSegment { next: u32, } +fn size_to_words(byte_size: usize) -> u32 { + (byte_size / std::mem::size_of::()) as u32 +} + pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) { let mut recording = Recording::default(); let data = scene.data(); let n_pathtag = data.tag_stream.len(); let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG); let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize); - let mut tag_data: Vec = Vec::with_capacity(pathtag_padded); - tag_data.extend(&data.tag_stream); - tag_data.resize(pathtag_padded, 0); - let pathtag_buf = recording.upload(tag_data); + let mut scene: Vec = Vec::with_capacity(pathtag_padded); + let pathtag_base = size_to_words(scene.len()); + scene.extend(&data.tag_stream); + scene.resize(pathtag_padded, 0); + let pathdata_base = size_to_words(scene.len()); + scene.extend(&data.pathseg_stream); + + let config = Config { + width_in_tiles: 64, + height_in_tiles: 64, + pathtag_base, + pathdata_base, + ..Default::default() + }; + let scene_buf = recording.upload(scene); + let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned()); + let reduced_buf = BufProxy::new(pathtag_wgs as u64 * TAG_MONOID_SIZE); // TODO: really only need pathtag_wgs - 1 recording.dispatch( shaders.pathtag_reduce, (pathtag_wgs as u32, 1, 1), - [pathtag_buf, reduced_buf], + [config_buf, scene_buf, reduced_buf], ); let tagmonoid_buf = @@ -49,19 +88,11 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) { recording.dispatch( shaders.pathtag_scan, (pathtag_wgs as u32, 1, 1), - [pathtag_buf, reduced_buf, tagmonoid_buf], + [config_buf, scene_buf, reduced_buf, tagmonoid_buf], ); - let path_coarse_wgs = (data.n_pathseg + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; - // The clone here is kinda BS, think about reducing copies - // Of course, we'll probably end up concatenating into a single scene binding. - let pathdata_buf = recording.upload(data.pathseg_stream.clone()); - //let cubics_buf = BufProxy::new(data.n_pathseg as u64 * 32); - let config = Config { - width_in_tiles: 64, - height_in_tiles: 64, - }; - let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned()); + let path_coarse_wgs = + (n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; // TODO: more principled size calc let tiles_buf = BufProxy::new(4097 * 8); let segments_buf = BufProxy::new(256 * 24); @@ -70,10 +101,9 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) { shaders.path_coarse, (path_coarse_wgs, 1, 1), [ - pathtag_buf, - tagmonoid_buf, - pathdata_buf, config_buf, + scene_buf, + tagmonoid_buf, tiles_buf, segments_buf, ], @@ -95,6 +125,280 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) { (recording, out_buf) } +pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy) { + let mut recording = Recording::default(); + let mut ramps = crate::ramp::RampCache::default(); + let mut drawdata_patches: Vec<(usize, u32)> = vec![]; + let data = scene.data(); + let stop_data = &data.resources.stops; + for patch in &data.resources.patches { + use piet_scene::ResourcePatch; + match patch { + ResourcePatch::Ramp { offset, stops } => { + let ramp_id = ramps.add(&stop_data[stops.clone()]); + drawdata_patches.push((*offset, ramp_id)); + } + } + } + let gradient_image = if drawdata_patches.is_empty() { + ResourceProxy::new_image(1, 1) + } else { + let data = ramps.data(); + let width = ramps.width(); + let height = ramps.height(); + let data: &[u8] = bytemuck::cast_slice(data); + println!( + "gradient image: {}x{} ({} bytes)", + width, + height, + data.len() + ); + ResourceProxy::Image(recording.upload_image(width, height, data)) + }; + let n_pathtag = data.tag_stream.len(); + let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG); + // TODO: can compute size accurately, avoid reallocation + let mut scene: Vec = Vec::with_capacity(pathtag_padded); + let pathtag_base = size_to_words(scene.len()); + scene.extend(&data.tag_stream); + scene.resize(pathtag_padded, 0); + let pathdata_base = size_to_words(scene.len()); + scene.extend(&data.pathseg_stream); + let drawtag_base = size_to_words(scene.len()); + scene.extend(bytemuck::cast_slice(&data.drawtag_stream)); + let drawdata_base = size_to_words(scene.len()); + if !drawdata_patches.is_empty() { + let mut pos = 0; + for patch in drawdata_patches { + let offset = patch.0; + let value = patch.1; + if pos < offset { + scene.extend_from_slice(&data.drawdata_stream[pos..offset]); + } + scene.extend_from_slice(bytemuck::bytes_of(&value)); + pos = offset + 4; + } + if pos < data.drawdata_stream.len() { + scene.extend_from_slice(&data.drawdata_stream[pos..]) + } + } else { + scene.extend(&data.drawdata_stream); + } + let transform_base = size_to_words(scene.len()); + scene.extend(bytemuck::cast_slice(&data.transform_stream)); + let linewidth_base = size_to_words(scene.len()); + scene.extend(bytemuck::cast_slice(&data.linewidth_stream)); + let n_path = data.n_path; + // TODO: calculate for real when we do rectangles + let n_drawobj = n_path; + let n_clip = data.n_clip; + let config = Config { + width_in_tiles: 64, + height_in_tiles: 64, + n_drawobj, + n_path, + n_clip, + pathtag_base, + pathdata_base, + drawtag_base, + drawdata_base, + transform_base, + linewidth_base, + }; + println!("{:?}", config); + let scene_buf = ResourceProxy::Buf(recording.upload(scene)); + let config_buf = ResourceProxy::Buf(recording.upload(bytemuck::bytes_of(&config).to_owned())); + + let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize); + let reduced_buf = ResourceProxy::new_buf(pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE); + // TODO: really only need pathtag_wgs - 1 + recording.dispatch( + shaders.pathtag_reduce, + (pathtag_wgs as u32, 1, 1), + [config_buf, scene_buf, reduced_buf], + ); + + let tagmonoid_buf = ResourceProxy::new_buf( + pathtag_wgs as u64 * shaders::PATHTAG_REDUCE_WG as u64 * TAG_MONOID_FULL_SIZE, + ); + recording.dispatch( + shaders.pathtag_scan, + (pathtag_wgs as u32, 1, 1), + [config_buf, scene_buf, reduced_buf, tagmonoid_buf], + ); + let drawobj_wgs = (n_drawobj + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG; + let path_bbox_buf = ResourceProxy::new_buf(n_path as u64 * PATH_BBOX_SIZE); + recording.dispatch( + shaders.bbox_clear, + (drawobj_wgs, 1, 1), + [config_buf, path_bbox_buf], + ); + let cubic_buf = ResourceProxy::new_buf(n_pathtag as u64 * CUBIC_SIZE); + let path_coarse_wgs = + (n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; + recording.dispatch( + shaders.pathseg, + (path_coarse_wgs, 1, 1), + [ + config_buf, + scene_buf, + tagmonoid_buf, + path_bbox_buf, + cubic_buf, + ], + ); + let draw_reduced_buf = ResourceProxy::new_buf(drawobj_wgs as u64 * DRAWMONOID_SIZE); + recording.dispatch( + shaders.draw_reduce, + (drawobj_wgs, 1, 1), + [config_buf, scene_buf, draw_reduced_buf], + ); + let draw_monoid_buf = ResourceProxy::new_buf(n_drawobj as u64 * DRAWMONOID_SIZE); + let info_buf = ResourceProxy::new_buf(n_drawobj as u64 * MAX_DRAWINFO_SIZE); + let clip_inp_buf = ResourceProxy::new_buf(data.n_clip as u64 * CLIP_INP_SIZE); + recording.dispatch( + shaders.draw_leaf, + (drawobj_wgs, 1, 1), + [ + config_buf, + scene_buf, + draw_reduced_buf, + path_bbox_buf, + draw_monoid_buf, + info_buf, + clip_inp_buf, + ], + ); + let clip_el_buf = ResourceProxy::new_buf(data.n_clip as u64 * CLIP_EL_SIZE); + let clip_bic_buf = + ResourceProxy::new_buf((n_clip / shaders::CLIP_REDUCE_WG) as u64 * CLIP_BIC_SIZE); + let clip_wg_reduce = n_clip.saturating_sub(1) / shaders::CLIP_REDUCE_WG; + if clip_wg_reduce > 0 { + recording.dispatch( + shaders.clip_reduce, + (clip_wg_reduce, 1, 1), + [ + config_buf, + clip_inp_buf, + path_bbox_buf, + clip_bic_buf, + clip_el_buf, + ], + ); + } + let clip_wg = (n_clip + shaders::CLIP_REDUCE_WG - 1) / shaders::CLIP_REDUCE_WG; + let clip_bbox_buf = ResourceProxy::new_buf(n_clip as u64 * CLIP_BBOX_SIZE); + if clip_wg > 0 { + recording.dispatch( + shaders.clip_leaf, + (clip_wg, 1, 1), + [ + config_buf, + clip_inp_buf, + path_bbox_buf, + clip_bic_buf, + clip_el_buf, + draw_monoid_buf, + clip_bbox_buf, + ], + ); + } + let draw_bbox_buf = ResourceProxy::new_buf(n_path as u64 * DRAW_BBOX_SIZE); + let bump_buf = BufProxy::new(BUMP_SIZE); + let bin_data_buf = ResourceProxy::new_buf(1 << 20); + let width_in_bins = (config.width_in_tiles + 15) / 16; + let height_in_bins = (config.height_in_tiles + 15) / 16; + let n_bins = width_in_bins * height_in_bins; + let bin_header_buf = ResourceProxy::new_buf((n_bins * drawobj_wgs) as u64 * BIN_HEADER_SIZE); + recording.clear_all(bump_buf); + let bump_buf = ResourceProxy::Buf(bump_buf); + recording.dispatch( + shaders.binning, + (drawobj_wgs, 1, 1), + [ + config_buf, + draw_monoid_buf, + path_bbox_buf, + clip_bbox_buf, + draw_bbox_buf, + bump_buf, + bin_data_buf, + bin_header_buf, + ], + ); + let path_buf = ResourceProxy::new_buf(n_path as u64 * PATH_SIZE); + let tile_buf = ResourceProxy::new_buf(1 << 20); + let path_wgs = (n_path + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG; + recording.dispatch( + shaders.tile_alloc, + (path_wgs, 1, 1), + [ + config_buf, + scene_buf, + draw_bbox_buf, + bump_buf, + path_buf, + tile_buf, + ], + ); + + let segments_buf = ResourceProxy::new_buf(1 << 24); + recording.dispatch( + shaders.path_coarse, + (path_coarse_wgs, 1, 1), + [ + config_buf, + scene_buf, + tagmonoid_buf, + cubic_buf, + path_buf, + bump_buf, + tile_buf, + segments_buf, + ], + ); + recording.dispatch( + shaders.backdrop, + (path_wgs, 1, 1), + [config_buf, path_buf, tile_buf], + ); + let ptcl_buf = ResourceProxy::new_buf(1 << 24); + recording.dispatch( + shaders.coarse, + (width_in_bins, height_in_bins, 1), + [ + config_buf, + scene_buf, + draw_monoid_buf, + bin_header_buf, + bin_data_buf, + path_buf, + tile_buf, + info_buf, + bump_buf, + ptcl_buf, + ], + ); + let out_buf_size = config.width_in_tiles * config.height_in_tiles * 1024; + let out_buf = BufProxy::new(out_buf_size as u64); + recording.dispatch( + shaders.fine, + (config.width_in_tiles, config.height_in_tiles, 1), + [ + config_buf, + tile_buf, + segments_buf, + ResourceProxy::Buf(out_buf), + ptcl_buf, + gradient_image, + ], + ); + + let download_buf = out_buf; + recording.download(download_buf); + (recording, download_buf) +} + pub fn align_up(len: usize, alignment: u32) -> usize { len + (len.wrapping_neg() & alignment as usize - 1) } diff --git a/piet-wgsl/src/shaders.rs b/piet-wgsl/src/shaders.rs index 64e022d..0e61710 100644 --- a/piet-wgsl/src/shaders.rs +++ b/piet-wgsl/src/shaders.rs @@ -25,7 +25,10 @@ use wgpu::Device; use crate::engine::{BindType, Engine, Error, ShaderId}; pub const PATHTAG_REDUCE_WG: u32 = 256; +pub const PATH_BBOX_WG: u32 = 256; pub const PATH_COARSE_WG: u32 = 256; +pub const PATH_DRAWOBJ_WG: u32 = 256; +pub const CLIP_REDUCE_WG: u32 = 256; pub struct Shaders { pub pathtag_reduce: ShaderId, @@ -35,6 +38,24 @@ pub struct Shaders { pub fine: ShaderId, } +// Shaders for the full pipeline +pub struct FullShaders { + pub pathtag_reduce: ShaderId, + pub pathtag_scan: ShaderId, + pub bbox_clear: ShaderId, + pub pathseg: ShaderId, + pub draw_reduce: ShaderId, + pub draw_leaf: ShaderId, + pub clip_reduce: ShaderId, + pub clip_leaf: ShaderId, + pub binning: ShaderId, + pub tile_alloc: ShaderId, + pub path_coarse: ShaderId, + pub backdrop: ShaderId, + pub coarse: ShaderId, + pub fine: ShaderId, +} + pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result { let shader_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/shader")); let imports = preprocess::get_imports(shader_dir); @@ -44,12 +65,17 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result Result Result Result { + let shader_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/shader")); + let imports = preprocess::get_imports(shader_dir); + let read_shader = + |path: &str| fs::read_to_string(shader_dir.join(path.to_string() + ".wgsl")).unwrap(); + let empty = HashSet::new(); + let mut full_config = HashSet::new(); + full_config.insert("full".into()); + let pathtag_reduce = engine.add_shader( + device, + preprocess::preprocess(&read_shader("pathtag_reduce"), &full_config, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + ], + )?; + let pathtag_scan = engine.add_shader( + device, + preprocess::preprocess(&read_shader("pathtag_scan"), &full_config, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + ], + )?; + let bbox_clear = engine.add_shader( + device, + preprocess::preprocess(&read_shader("bbox_clear"), &empty, &imports).into(), + &[BindType::BufReadOnly, BindType::Buffer], + )?; + let pathseg = engine.add_shader( + device, + preprocess::preprocess(&read_shader("pathseg"), &full_config, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + ], + )?; + let draw_reduce = engine.add_shader( + device, + preprocess::preprocess(&read_shader("draw_reduce"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + ], + )?; + let draw_leaf = engine.add_shader( + device, + preprocess::preprocess(&read_shader("draw_leaf"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + BindType::Buffer, + ], + )?; + let clip_reduce = engine.add_shader( + device, + preprocess::preprocess(&read_shader("clip_reduce"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + ], + )?; + let clip_leaf = engine.add_shader( + device, + preprocess::preprocess(&read_shader("clip_leaf"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + ], + )?; + let binning = engine.add_shader( + device, + preprocess::preprocess(&read_shader("binning"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + BindType::Buffer, + BindType::Buffer, + ], + )?; + let tile_alloc = engine.add_shader( + device, + preprocess::preprocess(&read_shader("tile_alloc"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + BindType::Buffer, + ], + )?; + + let path_coarse = engine.add_shader( + device, + preprocess::preprocess(&read_shader("path_coarse_full"), &full_config, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + BindType::Buffer, + ], + )?; + let backdrop = engine.add_shader( + device, + preprocess::preprocess(&read_shader("backdrop_dyn"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + ], + )?; + let coarse = engine.add_shader( + device, + preprocess::preprocess(&read_shader("coarse"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + ], + )?; + let fine = engine.add_shader( + device, + preprocess::preprocess(&read_shader("fine"), &full_config, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::BufReadOnly, + BindType::ImageRead, + ], + )?; + Ok(FullShaders { + pathtag_reduce, + pathtag_scan, + bbox_clear, + pathseg, + draw_reduce, + draw_leaf, + clip_reduce, + clip_leaf, + binning, + tile_alloc, + path_coarse, + backdrop, + coarse, + fine, + }) +} diff --git a/piet-wgsl/src/test_scene.rs b/piet-wgsl/src/test_scene.rs index c844a26..861ac54 100644 --- a/piet-wgsl/src/test_scene.rs +++ b/piet-wgsl/src/test_scene.rs @@ -14,24 +14,54 @@ // // Also licensed under MIT license, at your choice. -use piet_scene::{Affine, Brush, Color, Fill, PathElement, Point, Scene, SceneBuilder}; +use kurbo::BezPath; +use piet_scene::{ + Affine, BlendMode, Brush, Color, Compose, ExtendMode, Fill, GradientStop, LinearGradient, Mix, + PathElement, Point, RadialGradient, Rect, Scene, SceneBuilder, SceneFragment, Stroke, +}; + +use crate::pico_svg::PicoSvg; pub fn gen_test_scene() -> Scene { let mut scene = Scene::default(); let mut builder = SceneBuilder::for_scene(&mut scene); - let path = [ - PathElement::MoveTo(Point::new(100.0, 100.0)), - PathElement::LineTo(Point::new(500.0, 120.0)), - PathElement::LineTo(Point::new(300.0, 150.0)), - PathElement::LineTo(Point::new(200.0, 260.0)), - PathElement::LineTo(Point::new(150.0, 210.0)), - PathElement::Close, - ]; - let brush = Brush::Solid(Color::rgb8(0x80, 0x80, 0x80)); - builder.fill(Fill::NonZero, Affine::IDENTITY, &brush, None, &path); + let scene_ix = 1; + match scene_ix { + 0 => { + let path = [ + PathElement::MoveTo(Point::new(100.0, 100.0)), + PathElement::LineTo(Point::new(500.0, 120.0)), + PathElement::LineTo(Point::new(300.0, 150.0)), + PathElement::LineTo(Point::new(200.0, 260.0)), + PathElement::LineTo(Point::new(150.0, 210.0)), + PathElement::Close, + ]; + let brush = Brush::Solid(Color::rgb8(0x40, 0x40, 0xff)); + builder.fill(Fill::NonZero, Affine::IDENTITY, &brush, None, &path); + let transform = Affine::translate(50.0, 50.0); + let brush = Brush::Solid(Color::rgba8(0xff, 0xff, 0x00, 0x80)); + builder.fill(Fill::NonZero, transform, &brush, None, &path); + let transform = Affine::translate(100.0, 100.0); + let style = simple_stroke(1.0); + let brush = Brush::Solid(Color::rgb8(0xa0, 0x00, 0x00)); + builder.stroke(&style, transform, &brush, None, &path); + } + 1 => { + render_blend_grid(&mut builder); + } + _ => { + let xml_str = + std::str::from_utf8(include_bytes!("../../piet-gpu/Ghostscript_Tiger.svg")) + .unwrap(); + let svg = PicoSvg::load(xml_str, 6.0).unwrap(); + render_svg(&mut builder, &svg, false); + } + } + builder.finish(); scene } +#[allow(unused)] pub fn dump_scene_info(scene: &Scene) { let data = scene.data(); println!("tags {:?}", data.tag_stream); @@ -40,3 +70,212 @@ pub fn dump_scene_info(scene: &Scene) { bytemuck::cast_slice::(&data.pathseg_stream) ); } + +pub fn render_svg(sb: &mut SceneBuilder, svg: &PicoSvg, print_stats: bool) { + use crate::pico_svg::*; + let start = std::time::Instant::now(); + for item in svg.items.iter() { + match item { + Item::Fill(fill) => { + sb.fill( + Fill::NonZero, + Affine::IDENTITY, + &fill.color.into(), + None, + convert_bez_path(&fill.path), + ); + } + Item::Stroke(stroke) => { + sb.stroke( + &simple_stroke(stroke.width as f32), + Affine::IDENTITY, + &stroke.color.into(), + None, + convert_bez_path(&stroke.path), + ); + } + } + } + if print_stats { + println!("flattening and encoding time: {:?}", start.elapsed()); + } +} + +fn convert_bez_path<'a>(path: &'a BezPath) -> impl Iterator + 'a + Clone { + path.elements() + .iter() + .map(|el| PathElement::from_kurbo(*el)) +} + +fn simple_stroke(width: f32) -> Stroke<[f32; 0]> { + Stroke { + width, + join: piet_scene::Join::Round, + miter_limit: 1.4, + start_cap: piet_scene::Cap::Round, + end_cap: piet_scene::Cap::Round, + dash_pattern: [], + dash_offset: 0.0, + scale: true, + } +} + +#[allow(unused)] +pub fn render_blend_grid(sb: &mut SceneBuilder) { + const BLEND_MODES: &[Mix] = &[ + Mix::Normal, + Mix::Multiply, + Mix::Darken, + Mix::Screen, + Mix::Lighten, + Mix::Overlay, + Mix::ColorDodge, + Mix::ColorBurn, + Mix::HardLight, + Mix::SoftLight, + Mix::Difference, + Mix::Exclusion, + Mix::Hue, + Mix::Saturation, + Mix::Color, + Mix::Luminosity, + ]; + for (ix, &blend) in BLEND_MODES.iter().enumerate() { + let i = ix % 4; + let j = ix / 4; + let transform = Affine::translate(i as f32 * 225., j as f32 * 225.); + let square = blend_square(blend.into()); + sb.append(&square, Some(transform)); + } +} + +#[allow(unused)] +fn render_blend_square(sb: &mut SceneBuilder, blend: BlendMode, transform: Affine) { + // Inspired by https://developer.mozilla.org/en-US/docs/Web/CSS/mix-blend-mode + let rect = Rect::from_origin_size(Point::new(0., 0.), 200., 200.); + let stops = &[ + GradientStop { + color: Color::rgb8(0, 0, 0), + offset: 0.0, + }, + GradientStop { + color: Color::rgb8(255, 255, 255), + offset: 1.0, + }, + ][..]; + let linear = Brush::LinearGradient(LinearGradient { + start: Point::new(0.0, 0.0), + end: Point::new(200.0, 0.0), + stops: stops.into(), + extend: ExtendMode::Pad, + }); + sb.fill(Fill::NonZero, transform, &linear, None, rect.elements()); + const GRADIENTS: &[(f32, f32, Color)] = &[ + (150., 0., Color::rgb8(255, 240, 64)), + (175., 100., Color::rgb8(255, 96, 240)), + (125., 200., Color::rgb8(64, 192, 255)), + ]; + for (x, y, c) in GRADIENTS { + let mut color2 = c.clone(); + color2.a = 0; + let stops = &[ + GradientStop { + color: c.clone(), + offset: 0.0, + }, + GradientStop { + color: color2, + offset: 1.0, + }, + ][..]; + let rad = Brush::RadialGradient(RadialGradient { + center0: Point::new(*x, *y), + center1: Point::new(*x, *y), + radius0: 0.0, + radius1: 100.0, + stops: stops.into(), + extend: ExtendMode::Pad, + }); + sb.fill(Fill::NonZero, transform, &rad, None, rect.elements()); + } + const COLORS: &[Color] = &[ + Color::rgb8(255, 0, 0), + Color::rgb8(0, 255, 0), + Color::rgb8(0, 0, 255), + ]; + sb.push_layer(Mix::Normal.into(), transform, rect.elements()); + for (i, c) in COLORS.iter().enumerate() { + let stops = &[ + GradientStop { + color: Color::rgb8(255, 255, 255), + offset: 0.0, + }, + GradientStop { + color: c.clone(), + offset: 1.0, + }, + ][..]; + let linear = Brush::LinearGradient(LinearGradient { + start: Point::new(0.0, 0.0), + end: Point::new(0.0, 200.0), + stops: stops.into(), + extend: ExtendMode::Pad, + }); + sb.push_layer(blend, transform, rect.elements()); + // squash the ellipse + let a = transform + * Affine::translate(100., 100.) + * Affine::rotate(std::f32::consts::FRAC_PI_3 * (i * 2 + 1) as f32) + * Affine::scale(1.0, 0.357) + * Affine::translate(-100., -100.); + sb.fill( + Fill::NonZero, + a, + &linear, + None, + make_ellipse(100., 100., 90., 90.), + ); + sb.pop_layer(); + } + sb.pop_layer(); +} + +#[allow(unused)] +fn blend_square(blend: BlendMode) -> SceneFragment { + let mut fragment = SceneFragment::default(); + let mut sb = SceneBuilder::for_fragment(&mut fragment); + render_blend_square(&mut sb, blend, Affine::IDENTITY); + sb.finish(); + fragment +} + +fn make_ellipse(cx: f32, cy: f32, rx: f32, ry: f32) -> impl Iterator + Clone { + let a = 0.551915024494; + let arx = a * rx; + let ary = a * ry; + let elements = [ + PathElement::MoveTo(Point::new(cx + rx, cy)), + PathElement::CurveTo( + Point::new(cx + rx, cy + ary), + Point::new(cx + arx, cy + ry), + Point::new(cx, cy + ry), + ), + PathElement::CurveTo( + Point::new(cx - arx, cy + ry), + Point::new(cx - rx, cy + ary), + Point::new(cx - rx, cy), + ), + PathElement::CurveTo( + Point::new(cx - rx, cy - ary), + Point::new(cx - arx, cy - ry), + Point::new(cx, cy - ry), + ), + PathElement::CurveTo( + Point::new(cx + arx, cy - ry), + Point::new(cx + rx, cy - ary), + Point::new(cx + rx, cy), + ), + PathElement::Close, + ]; + (0..elements.len()).map(move |i| elements[i]) +}