From 5851ef141780d7e58494c76a8a7493fd7b0f0875 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Thu, 3 Nov 2022 16:53:34 -0700 Subject: [PATCH] Shaders loaded This checkpoint loads the shaders for full rendering, but there's a bunch of stuff still needing to be done. --- .vscode/settings.json | 3 +- piet-wgsl/shader/bbox_clear.wgsl | 45 ++++ piet-wgsl/shader/binning.wgsl | 36 ++-- piet-wgsl/shader/coarse.wgsl | 35 ++-- piet-wgsl/shader/draw_leaf.wgsl | 6 +- piet-wgsl/shader/draw_reduce.wgsl | 6 +- piet-wgsl/shader/path_coarse.wgsl | 5 +- piet-wgsl/shader/path_coarse_full.wgsl | 276 +++++++++++++++++++++++++ piet-wgsl/shader/pathseg.wgsl | 215 +++++++++++++++++++ piet-wgsl/shader/shared/bbox.wgsl | 10 +- piet-wgsl/shader/shared/config.wgsl | 1 + piet-wgsl/shader/shared/pathtag.wgsl | 18 +- piet-wgsl/shader/shared/tile.wgsl | 29 +++ piet-wgsl/shader/tile_alloc.wgsl | 14 +- piet-wgsl/src/main.rs | 7 +- piet-wgsl/src/render.rs | 1 + piet-wgsl/src/shaders.rs | 166 +++++++++++++++ 17 files changed, 805 insertions(+), 68 deletions(-) create mode 100644 piet-wgsl/shader/bbox_clear.wgsl create mode 100644 piet-wgsl/shader/path_coarse_full.wgsl create mode 100644 piet-wgsl/shader/pathseg.wgsl create mode 100644 piet-wgsl/shader/shared/tile.wgsl diff --git a/.vscode/settings.json b/.vscode/settings.json index 4d315c9..bdb0028 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -6,7 +6,8 @@ "drawtag": "${workspaceFolder}/piet-wgsl/shader/shared/drawtag.wgsl", "segment": "${workspaceFolder}/piet-wgsl/shader/shared/segment.wgsl", "pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl", - "ptcl": "${workspaceFolder}/piet-wgsl/shader/shared/ptcl.wgsl" + "ptcl": "${workspaceFolder}/piet-wgsl/shader/shared/ptcl.wgsl", + "tile": "${workspaceFolder}/piet-wgsl/shader/shared/tile.wgsl" }, "wgsl-analyzer.diagnostics.nagaVersion": "main", "wgsl-analyzer.preprocessor.shaderDefs": ["full"] diff --git a/piet-wgsl/shader/bbox_clear.wgsl b/piet-wgsl/shader/bbox_clear.wgsl new file mode 100644 index 0000000..69a8b63 --- /dev/null +++ b/piet-wgsl/shader/bbox_clear.wgsl @@ -0,0 +1,45 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +#import config + +@group(0) @binding(0) +var config: Config; + +struct PathBbox { + x0: i32, + y0: i32, + x1: i32, + y1: i32, + linewidth: f32, + trans_ix: u32, +} + +@group(0) @binding(1) +var path_bboxes: array; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, +) { + let ix = global_id.x; + if ix < config.n_path { + path_bboxes[ix].x0 = 0x7fffffff; + path_bboxes[ix].y0 = 0x7fffffff; + path_bboxes[ix].x1 = -0x80000000; + path_bboxes[ix].y1 = -0x80000000; + } +} diff --git a/piet-wgsl/shader/binning.wgsl b/piet-wgsl/shader/binning.wgsl index d16a620..3d5a053 100644 --- a/piet-wgsl/shader/binning.wgsl +++ b/piet-wgsl/shader/binning.wgsl @@ -28,7 +28,7 @@ var config: Config; var draw_monoids: array; @group(0) @binding(2) -var path_bbox_buf: array; +var path_bbox_buf: array; @group(0) @binding(3) var clip_bbox_buf: array>; @@ -39,7 +39,7 @@ var intersected_bbox: array>; // TODO: put into shared include @group(0) @binding(5) -var bump: BumpAllocators; +var bump: BumpAllocators; @group(0) @binding(6) var bin_data: array; @@ -54,11 +54,14 @@ struct BinHeader { var bin_header: array; // conversion factors from coordinates to bin -let SX = 1.0 / f32(N_TILE_X * TILE_WIDTH); -let SY = 1.0 / f32(N_TILE_Y * TILE_HEIGHT); +let SX = 0.00390625; +let SY = 0.00390625; +//let SX = 1.0 / f32(N_TILE_X * TILE_WIDTH); +//let SY = 1.0 / f32(N_TILE_Y * TILE_HEIGHT); let WG_SIZE = 256u; -let N_SLICE = WG_SIZE / 32u; +let N_SLICE = 8u; +//let N_SLICE = WG_SIZE / 32u; var sh_bitmaps: array, N_TILE>, N_SLICE>; var sh_count: array, N_SLICE>; @@ -82,7 +85,7 @@ fn main( var y1 = 0; if element_ix < config.n_drawobj { let draw_monoid = draw_monoids[element_ix]; - var clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9); + var clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9); if draw_monoid.clip_ix > 0u { clip_bbox = clip_bbox_buf[draw_monoid.clip_ix - 1u]; } @@ -92,10 +95,11 @@ fn main( // TODO check this is true let path_bbox = path_bbox_buf[draw_monoid.path_ix]; - let pb = vec4(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1); - let bbox = bbox_intersect(clip_bbox, pb); + let pb = vec4(vec4(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1)); + let bbox_raw = bbox_intersect(clip_bbox, pb); + // TODO(naga): clunky expression a workaround for broken lhs swizzle + let bbox = vec4(bbox_raw.xy, max(bbox_raw.xy, bbox_raw.zw)); - bbox.zw = max(bbox.xy, bbox.zw); intersected_bbox[element_ix] = bbox; x0 = i32(floor(bbox.x * SX)); y0 = i32(floor(bbox.y * SY)); @@ -117,7 +121,7 @@ fn main( let my_mask = 1u << (local_id.x & 31u); while y < y1 { atomicOr(&sh_bitmaps[my_slice][y * width_in_bins + x], my_mask); - x += 1u; + x += 1; if x == x1 { x = x0; y += 1; @@ -128,8 +132,8 @@ fn main( // Allocate output segments var element_count = 0u; for (var i = 0u; i < N_SLICE; i += 1u) { - elementCount += countOneBits(atomicLoad(&sh_bitmaps[i][local_id.x])); - sh_count[i][id_ix] = element_count; + element_count += countOneBits(atomicLoad(&sh_bitmaps[i][local_id.x])); + sh_count[i][local_id.x] = element_count; } // element_count is the number of draw objects covering this thread's bin let chunk_offset = atomicAdd(&bump.binning, element_count); @@ -145,18 +149,18 @@ fn main( let bin_ix = y * width_in_bins + x; let out_mask = atomicLoad(&sh_bitmaps[my_slice][bin_ix]); // I think this predicate will always be true... - if (out_mask & my_mask) != 0 { + if (out_mask & my_mask) != 0u { var idx = countOneBits(out_mask & (my_mask - 1u)); - if my_slice > 0 { + if my_slice > 0u { idx += sh_count[my_slice - 1u][bin_ix]; } let offset = sh_chunk_offset[bin_ix]; bin_data[offset + idx] = element_ix; } - x += 1u; + x += 1; if x == x1 { x = x0; - y += 1u; + y += 1; } } } diff --git a/piet-wgsl/shader/coarse.wgsl b/piet-wgsl/shader/coarse.wgsl index 4ccb0c2..8c59ba6 100644 --- a/piet-wgsl/shader/coarse.wgsl +++ b/piet-wgsl/shader/coarse.wgsl @@ -20,6 +20,7 @@ #import bump #import drawtag #import ptcl +#import tile @group(0) @binding(0) var config: Config; @@ -40,33 +41,21 @@ struct BinHeader { var bin_headers: array; @group(0) @binding(4) -var bin_data: array; - -@group(0) @binding(5) -var bump: BumpAllocators; - -@group(0) @binding(6) -var ptcl: array; - -// TODO: put this in the right place -struct Path { - // bounding box in pixels - bbox: vec4, - // offset (in u32's) to tile rectangle - tiles: u32, -} - -struct Tile { - backdrop: i32, - segments: u32, -} - -@group(0) @binding(7) var paths: array; -@group(0) @binding(8) +@group(0) @binding(5) var tiles: array; +@group(0) @binding(6) +var bin_data: array; + +@group(0) @binding(7) +var bump: BumpAllocators; + +@group(0) @binding(8) +var ptcl: array; + + // Much of this code assumes WG_SIZE == N_TILE. If these diverge, then // a fair amount of fixup is needed. diff --git a/piet-wgsl/shader/draw_leaf.wgsl b/piet-wgsl/shader/draw_leaf.wgsl index b962bf2..6782542 100644 --- a/piet-wgsl/shader/draw_leaf.wgsl +++ b/piet-wgsl/shader/draw_leaf.wgsl @@ -30,10 +30,10 @@ var scene: array; var reduced: array; @group(0) @binding(3) -var draw_monoid: array; +var path_bbox: array; @group(0) @binding(4) -var path_bbox: array; +var draw_monoid: array; @group(0) @binding(5) var info: array; @@ -64,6 +64,8 @@ fn main( workgroupBarrier(); var m = draw_monoid_identity(); if wg_id.x > 0u { + // TODO: separate dispatch to scan these, or integrate into this one? + // In the meantime, will be limited to 2 * WG draw objs. m = reduced[wg_id.x - 1u]; } if local_id.x > 0u { diff --git a/piet-wgsl/shader/draw_reduce.wgsl b/piet-wgsl/shader/draw_reduce.wgsl index 9efae7c..8ff1a44 100644 --- a/piet-wgsl/shader/draw_reduce.wgsl +++ b/piet-wgsl/shader/draw_reduce.wgsl @@ -26,7 +26,7 @@ var scene: array; @group(0) @binding(2) var reduced: array; -let WG_SIZE = 256; +let WG_SIZE = 256u; var sh_scratch: array; @@ -37,7 +37,7 @@ fn main( ) { let ix = global_id.x; let tag_word = scene[config.drawtag_base + ix]; - let agg = map_draw_tag(tag_word); + var agg = map_draw_tag(tag_word); sh_scratch[local_id.x] = agg; for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { workgroupBarrier(); @@ -49,6 +49,6 @@ fn main( sh_scratch[local_id.x] = agg; } if local_id.x == 0u { - reduced[ix >> LG_WG_SIZE] = agg; + reduced[ix >> firstTrailingBit(WG_SIZE)] = agg; } } diff --git a/piet-wgsl/shader/path_coarse.wgsl b/piet-wgsl/shader/path_coarse.wgsl index 534963f..ffdd8cf 100644 --- a/piet-wgsl/shader/path_coarse.wgsl +++ b/piet-wgsl/shader/path_coarse.wgsl @@ -30,7 +30,8 @@ var tag_monoids: array; @group(0) @binding(3) var output: array>; #else -struct Tile { +// We don't get this from import as it's the atomic version +struct AtomicTile { backdrop: atomic, segments: atomic, } @@ -38,7 +39,7 @@ struct Tile { #import segment @group(0) @binding(3) -var tiles: array; +var tiles: array; @group(0) @binding(4) var segments: array; diff --git a/piet-wgsl/shader/path_coarse_full.wgsl b/piet-wgsl/shader/path_coarse_full.wgsl new file mode 100644 index 0000000..5641642 --- /dev/null +++ b/piet-wgsl/shader/path_coarse_full.wgsl @@ -0,0 +1,276 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// Path coarse rasterization for the full implementation. + +#import config +#import pathtag +#import tile +#import segment + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var scene: array; + +// Maybe dedup? +struct Cubic { + p0: vec2, + p1: vec2, + p2: vec2, + p3: vec2, +} + +@group(0) @binding(2) +var tag_monoids: array; + +@group(0) @binding(3) +var cubics: array; + +@group(0) @binding(4) +var paths: array; + +// We don't get this from import as it's the atomic version +struct AtomicTile { + backdrop: atomic, + segments: atomic, +} + +@group(0) @binding(5) +var tiles: array; + +@group(0) @binding(6) +var segments: array; + +struct SubdivResult { + val: f32, + a0: f32, + a2: f32, +} + +let D = 0.67; +fn approx_parabola_integral(x: f32) -> f32 { + return x * inverseSqrt(sqrt(1.0 - D + (D * D * D * D + 0.25 * x * x))); +} + +let B = 0.39; +fn approx_parabola_inv_integral(x: f32) -> f32 { + return x * sqrt(1.0 - B + (B * B + 0.5 * x * x)); +} + +fn estimate_subdiv(p0: vec2, p1: vec2, p2: vec2, sqrt_tol: f32) -> SubdivResult { + let d01 = p1 - p0; + let d12 = p2 - p1; + let dd = d01 - d12; + let cross = (p2.x - p0.x) * dd.y - (p2.y - p0.y) * dd.x; + let cross_inv = 1.0 / cross; + let x0 = dot(d01, dd) * cross_inv; + let x2 = dot(d12, dd) * cross_inv; + let scale = abs(cross / (length(dd) * (x2 - x0))); + + let a0 = approx_parabola_integral(x0); + let a2 = approx_parabola_integral(x2); + var val = 0.0; + if scale < 1e9 { + let da = abs(a2 - a0); + let sqrt_scale = sqrt(scale); + if sign(x0) == sign(x2) { + val = sqrt_scale; + } else { + let xmin = sqrt_tol / sqrt_scale; + val = sqrt_tol / approx_parabola_integral(xmin); + } + val *= da; + } + return SubdivResult(val, a0, a2); +} + +fn eval_quad(p0: vec2, p1: vec2, p2: vec2, t: f32) -> vec2 { + let mt = 1.0 - t; + return p0 * (mt * mt) + (p1 * (mt * 2.0) + p2 * t) * t; +} + +fn eval_cubic(p0: vec2, p1: vec2, p2: vec2, p3: vec2, t: f32) -> vec2 { + let mt = 1.0 - t; + return p0 * (mt * mt * mt) + (p1 * (mt * mt * 3.0) + (p2 * (mt * 3.0) + p3 * t) * t) * t; +} + +fn alloc_segment() -> u32 { + // Use 0-index segment (address is sentinel) as counter + // TODO: separate small buffer binding for this? + return atomicAdd(&tiles[4096].segments, 1u) + 1u; +} + +let MAX_QUADS = 16u; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, +) { + let ix = global_id.x; + let tag_word = scene[config.pathtag_base + (ix >> 2u)]; + let shift = (ix & 3u) * 8u; + var tag_byte = (tag_word >> shift) & 0xffu; + + // Reconstruct path_ix from monoid or store in cubic? + if (tag_byte & PATH_TAG_SEG_TYPE) != 0u { + let path_ix = 42u; // BIG GIANT TODO + let path = paths[path_ix]; + let bbox = vec4(path.bbox); + let cubic = cubics[global_id.x]; + let p0 = cubic.p0; + let p1 = cubic.p1; + let p2 = cubic.p2; + let p3 = cubic.p3; + let err_v = 3.0 * (p2 - p1) + p0 - p3; + let err = dot(err_v, err_v); + let ACCURACY = 0.25; + let Q_ACCURACY = ACCURACY * 0.1; + let REM_ACCURACY = (ACCURACY - Q_ACCURACY); + let MAX_HYPOT2 = 432.0 * Q_ACCURACY * Q_ACCURACY; + var n_quads = max(u32(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0))), 1u); + n_quads = min(n_quads, MAX_QUADS); + var keep_params: array; + var val = 0.0; + var qp0 = p0; + let step = 1.0 / f32(n_quads); + for (var i = 0u; i < n_quads; i += 1u) { + let t = f32(i + 1u) * step; + let qp2 = eval_cubic(p0, p1, p2, p3, t); + var qp1 = eval_cubic(p0, p1, p2, p3, t - 0.5 * step); + qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2); + let params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY)); + keep_params[i] = params; + val += params.val; + qp0 = qp2; + } + let n = max(u32(ceil(val * (0.5 / sqrt(REM_ACCURACY)))), 1u); + var lp0 = p0; + qp0 = p0; + let v_step = val / f32(n); + var n_out = 1u; + var val_sum = 0.0; + for (var i = 0u; i < n_quads; i += 1u) { + let t = f32(i + 1u) * step; + let qp2 = eval_cubic(p0, p1, p2, p3, t); + var qp1 = eval_cubic(p0, p1, p2, p3, t - 0.5 * step); + qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2); + let params = keep_params[i]; + let u0 = approx_parabola_inv_integral(params.a0); + let u2 = approx_parabola_inv_integral(params.a2); + let uscale = 1.0 / (u2 - u0); + var val_target = f32(n_out) * v_step; + while n_out == n || val_target < val_sum + params.val { + var lp1: vec2; + if n_out == n { + lp1 = p3; + } else { + let u = (val_target - val_sum) / params.val; + let a = mix(params.a0, params.a2, u); + let au = approx_parabola_inv_integral(a); + let t = (au - u0) * uscale; + lp1 = eval_quad(qp0, qp1, qp2, t); + } + + // Output line segment lp0..lp1 + let xymin = min(lp0, lp1); + let xymax = max(lp0, lp1); + let dp = lp1 - lp0; + let recip_dx = 1.0 / dp.x; + let invslope = select(dp.x / dp.y, 1.0e9, abs(dp.y) < 1.0e-9); + let c = 0.5 * abs(invslope); + let b = invslope; + let SX = 1.0 / f32(TILE_WIDTH); + let SY = 1.0 / f32(TILE_HEIGHT); + let a = (lp0.x - (lp0.y - 0.5 * f32(TILE_HEIGHT)) * b) * SX; + var x0 = i32(floor(xymin.x * SX)); + var x1 = i32(floor(xymax.x * SX) + 1.0); + var y0 = i32(floor(xymin.y * SY)); + var y1 = i32(floor(xymax.y * SY) + 1.0); + x0 = clamp(x0, 0, i32(config.width_in_tiles)); + x1 = clamp(x1, 0, i32(config.width_in_tiles)); + y0 = clamp(y0, 0, i32(config.height_in_tiles)); + y1 = clamp(y1, 0, i32(config.height_in_tiles)); + var xc = a + b * f32(y0); + var xray = i32(floor(lp0.x * SX)); + var last_xray = i32(floor(lp1.x * SX)); + if dp.y < 0.0 { + let tmp = xray; + xray = last_xray; + last_xray = tmp; + } + for (var y = y0; y < y1; y += 1) { + let tile_y0 = f32(y) * f32(TILE_HEIGHT); + let xbackdrop = max(xray + 1, 0); + if xymin.y < tile_y0 && xbackdrop < i32(config.width_in_tiles) { + let backdrop = select(-1, 1, dp.y < 0.0); + let tile_ix = y * i32(config.width_in_tiles) + xbackdrop; + atomicAdd(&tiles[tile_ix].backdrop, backdrop); + } + var next_xray = last_xray; + if y + 1 < y1 { + let tile_y1 = f32(y + 1) * f32(TILE_HEIGHT); + let x_edge = lp0.x + (tile_y1 - lp0.y) * invslope; + next_xray = i32(floor(x_edge * SX)); + } + let min_xray = min(xray, next_xray); + let max_xray = max(xray, next_xray); + var xx0 = min(i32(floor(xc - c)), min_xray); + var xx1 = max(i32(ceil(xc + c)), max_xray + 1); + xx0 = clamp(xx0, x0, x1); + xx1 = clamp(xx1, x0, x1); + var tile_seg: Segment; + for (var x = xx0; x < xx1; x += 1) { + let tile_x0 = f32(x) * f32(TILE_WIDTH); + let tile_ix = y * i32(config.width_in_tiles) + x; + // allocate segment, insert linked list + let seg_ix = alloc_segment(); + let old = atomicExchange(&tiles[tile_ix].segments, seg_ix); + tile_seg.origin = lp0; + tile_seg.delta = dp; + var y_edge = mix(lp0.y, lp1.y, (tile_x0 - lp0.x) * recip_dx); + if xymin.x < tile_x0 { + let p = vec2(tile_x0, y_edge); + if dp.x < 0.0 { + tile_seg.delta = p - lp0; + } else { + tile_seg.origin = p; + tile_seg.delta = lp1 - p; + } + if tile_seg.delta.x == 0.0 { + tile_seg.delta.x = sign(dp.x) * 1e-9; + } + } + if x <= min_xray || max_xray < x { + y_edge = 1e9; + } + tile_seg.y_edge = y_edge; + tile_seg.next = old; + segments[seg_ix] = tile_seg; + } + xc += b; + xray = next_xray; + } + n_out += 1u; + val_target += v_step; + lp0 = lp1; + } + val_sum += params.val; + qp0 = qp2; + } + } +} diff --git a/piet-wgsl/shader/pathseg.wgsl b/piet-wgsl/shader/pathseg.wgsl new file mode 100644 index 0000000..03c5aa6 --- /dev/null +++ b/piet-wgsl/shader/pathseg.wgsl @@ -0,0 +1,215 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// Path segment decoding for the full case. + +// In the simple case, path segments are decoded as part of the coarse +// path rendering stage. In the full case, they are separated, as the +// decoding process also generates bounding boxes, and those in turn are +// used for tile allocation and clipping; actual coarse path rasterization +// can't proceed until those are complete. + +// There's some duplication of the decoding code but we won't worry about +// that just now. Perhaps it could be factored more nicely later. + +#import config +#import pathtag + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var scene: array; + +@group(0) @binding(2) +var tag_monoids: array; + +struct AtomicPathBbox { + x0: atomic, + y0: atomic, + x1: atomic, + y1: atomic, + linewidth: f32, + trans_ix: u32, +} + +@group(0) @binding(3) +var path_bboxes: array; + +struct Cubic { + p0: vec2, + p1: vec2, + p2: vec2, + p3: vec2, +} + +@group(0) @binding(4) +var cubics: array; + +// Monoid is yagni, for future optimization + +// struct BboxMonoid { +// bbox: vec4, +// flags: u32, +// } + +// let FLAG_RESET_BBOX = 1u; +// let FLAG_SET_BBOX = 2u; + +// fn combine_bbox_monoid(a: BboxMonoid, b: BboxMonoid) -> BboxMonoid { +// var c: BboxMonoid; +// c.bbox = b.bbox; +// // TODO: previous-me thought this should be gated on b & SET_BBOX == false also +// if (a.flags & FLAG_RESET_BBOX) == 0u && b.bbox.z <= b.bbox.x && b.bbox.w <= b.bbox.y { +// c.bbox = a.bbox; +// } else if (a.flags & FLAG_RESET_BBOX) == 0u && (b.flags & FLAG_SET_BBOX) == 0u || +// (a.bbox.z > a.bbox.x || a.bbox.w > a.bbox.y) +// { +// c.bbox = vec4(min(a.bbox.xy, c.bbox.xy), max(a.bbox.xw, c.bbox.zw)); +// } +// c.flags = (a.flags & FLAG_SET_BBOX) | b.flags; +// c.flags |= (a.flags & FLAG_RESET_BBOX) << 1u; +// return c; +// } + +// fn bbox_monoid_identity() -> BboxMonoid { +// return BboxMonoid(); +// } + +var pathdata_base: u32; +var transform_base: u32; + +fn read_f32_point(ix: u32) -> vec2 { + let x = bitcast(scene[pathdata_base + ix]); + let y = bitcast(scene[pathdata_base + ix + 1u]); + return vec2(x, y); +} + +fn read_i16_point(ix: u32) -> vec2 { + let raw = scene[pathdata_base + ix]; + let x = f32(i32(raw << 16u) >> 16u); + let y = f32(i32(raw) >> 16u); + return vec2(x, y); +} + +struct Transform { + matrx: vec4, + translate: vec2, +} + +fn read_transform(ix: u32) -> Transform { + let base = transform_base + ix * 6u; + let c0 = bitcast(scene[base]); + let c1 = bitcast(scene[base] + 1u); + let c2 = bitcast(scene[base] + 2u); + let c3 = bitcast(scene[base] + 3u); + let c4 = bitcast(scene[base] + 4u); + let c5 = bitcast(scene[base] + 5u); + let matrx = vec4(c0, c1, c2, c3); + let translate = vec2(c4, c5); + return Transform(matrx, translate); +} + +fn transform_apply(transform: Transform, p: vec2) -> vec2 { + return transform.matrx.xy * p.x + transform.matrx.zw * p.y + transform.translate; +} + +fn round_down(x: f32) -> i32 { + return i32(floor(x)); +} + +fn round_up(x: f32) -> i32 { + return i32(ceil(x)); +} + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, +) { + let ix = global_id.x; + let tag_word = scene[config.pathtag_base + (ix >> 2u)]; + // TODO: set transform_base + pathdata_base = config.pathdata_base; + let shift = (ix & 3u) * 8u; + var tm = reduce_tag(tag_word & ((1u << shift) - 1u)); + tm = combine_tag_monoid(tag_monoids[ix >> 2u], tm); + var tag_byte = (tag_word >> shift) & 0xffu; + + let out = &path_bboxes[tm.path_ix]; + if (tag_byte & PATH_TAG_PATH) != 0u { + (*out).linewidth = -1.0; // TODO: plumb linewidth + (*out).trans_ix = tm.trans_ix; + } + // Decode path data + let seg_type = tag_byte & PATH_TAG_SEG_TYPE; + if seg_type != 0u { + var p0: vec2; + var p1: vec2; + var p2: vec2; + var p3: vec2; + if (tag_byte & PATH_TAG_F32) != 0u { + p0 = read_f32_point(tm.pathseg_offset); + p1 = read_f32_point(tm.pathseg_offset + 2u); + if seg_type >= PATH_TAG_QUADTO { + p2 = read_f32_point(tm.pathseg_offset + 4u); + if seg_type == PATH_TAG_CUBICTO { + p3 = read_f32_point(tm.pathseg_offset + 6u); + } + } + } else { + p0 = read_i16_point(tm.pathseg_offset); + p1 = read_i16_point(tm.pathseg_offset + 1u); + if seg_type >= PATH_TAG_QUADTO { + p2 = read_i16_point(tm.pathseg_offset + 2u); + if seg_type == PATH_TAG_CUBICTO { + p3 = read_i16_point(tm.pathseg_offset + 3u); + } + } + } + let transform = read_transform(tm.trans_ix); + p0 = transform_apply(transform, p0); + p1 = transform_apply(transform, p1); + var bbox = vec4(min(p0, p1), max(p0, p1)); + // Degree-raise + if seg_type == PATH_TAG_LINETO { + p3 = p1; + p2 = mix(p3, p0, 1.0 / 3.0); + p1 = mix(p0, p3, 1.0 / 3.0); + } else if seg_type >= PATH_TAG_QUADTO { + p2 = transform_apply(transform, p2); + bbox = vec4(min(bbox.xy, p2), max(bbox.zw, p2)); + if seg_type == PATH_TAG_CUBICTO { + p3 = transform_apply(transform, p3); + bbox = vec4(min(bbox.xy, p3), max(bbox.zw, p3)); + } else { + p3 = p2; + p2 = mix(p1, p2, 1.0 / 3.0); + p1 = mix(p1, p0, 1.0 / 3.0); + } + } + cubics[global_id.x] = Cubic(p0, p1, p2, p3); + // Update bounding box using atomics only. Computing a monoid is a + // potential future optimization. + if bbox.z > bbox.x && bbox.w > bbox.y { + atomicMin(&(*out).x0, round_down(bbox.x)); + atomicMin(&(*out).y0, round_down(bbox.y)); + atomicMax(&(*out).x1, round_up(bbox.z)); + atomicMax(&(*out).y1, round_up(bbox.w)); + } + } + +} diff --git a/piet-wgsl/shader/shared/bbox.wgsl b/piet-wgsl/shader/shared/bbox.wgsl index fb7728b..c260df9 100644 --- a/piet-wgsl/shader/shared/bbox.wgsl +++ b/piet-wgsl/shader/shared/bbox.wgsl @@ -16,11 +16,13 @@ // The annotated bounding box for a path. It has been transformed, // but contains a link to the active transform, mostly for gradients. +// Coordinates are integer pixels (for the convenience of atomic update) +// but will probably become fixed-point fractions for rectangles. struct PathBbox { - x0: u32, - y0: u32, - x1: u32, - y1: u32, + x0: i32, + y0: i32, + x1: i32, + y1: i32, linewidth: f32, trans_ix: u32, } diff --git a/piet-wgsl/shader/shared/config.wgsl b/piet-wgsl/shader/shared/config.wgsl index 9058d5c..bf0839d 100644 --- a/piet-wgsl/shader/shared/config.wgsl +++ b/piet-wgsl/shader/shared/config.wgsl @@ -19,6 +19,7 @@ struct Config { height_in_tiles: u32, n_drawobj: u32, + n_path: u32, // offsets within scene buffer (in u32 units) // Note: this is a difference from piet-gpu, which is in bytes diff --git a/piet-wgsl/shader/shared/pathtag.wgsl b/piet-wgsl/shader/shared/pathtag.wgsl index 2424d2a..b248e18 100644 --- a/piet-wgsl/shader/shared/pathtag.wgsl +++ b/piet-wgsl/shader/shared/pathtag.wgsl @@ -18,7 +18,10 @@ struct TagMonoid { trans_ix: u32, pathseg_ix: u32, pathseg_offset: u32, - // Note: piet-gpu has linewidth and path, but not needed here +#ifdef full + linewidth_ix: u32, + path_ix: u32, +#endif } let PATH_TAG_SEG_TYPE = 3u; @@ -26,8 +29,11 @@ let PATH_TAG_LINETO = 1u; let PATH_TAG_QUADTO = 2u; let PATH_TAG_CUBICTO = 3u; let PATH_TAG_F32 = 8u; -let PATH_TAG_PATH = 0x10u; let PATH_TAG_TRANSFORM = 0x20u; +#ifdef full +let PATH_TAG_PATH = 0x10u; +let PATH_TAG_LINEWIDTH = 0x40u; +#endif fn tag_monoid_identity() -> TagMonoid { return TagMonoid(); @@ -38,6 +44,10 @@ fn combine_tag_monoid(a: TagMonoid, b: TagMonoid) -> TagMonoid { c.trans_ix = a.trans_ix + b.trans_ix; c.pathseg_ix = a.pathseg_ix + b.pathseg_ix; c.pathseg_offset = a.pathseg_offset + b.pathseg_offset; +#ifdef full + c.linewidth_ix = a.linewidth_ix + b.linewidth_ix; + c.path_ix = a.path_ix + b.path_ix; +#endif return c; } @@ -51,5 +61,9 @@ fn reduce_tag(tag_word: u32) -> TagMonoid { a += a >> 8u; a += a >> 16u; c.pathseg_offset = a & 0xffu; +#ifdef full + c.path_ix = countOneBits(tag_word & (PATH_TAG_PATH) * 0x1010101u); + c.linewidth_ix = countOneBits(tag_word & (PATH_TAG_LINEWIDTH * 0x1010101u)); +#endif return c; } diff --git a/piet-wgsl/shader/shared/tile.wgsl b/piet-wgsl/shader/shared/tile.wgsl new file mode 100644 index 0000000..f575e50 --- /dev/null +++ b/piet-wgsl/shader/shared/tile.wgsl @@ -0,0 +1,29 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// Common datatypes for path and tile intermediate info. + +struct Path { + // bounding box in tiles + bbox: vec4, + // offset (in u32's) to tile rectangle + tiles: u32, +} + +struct Tile { + backdrop: i32, + segments: u32, +} \ No newline at end of file diff --git a/piet-wgsl/shader/tile_alloc.wgsl b/piet-wgsl/shader/tile_alloc.wgsl index 867db7e..0bd550e 100644 --- a/piet-wgsl/shader/tile_alloc.wgsl +++ b/piet-wgsl/shader/tile_alloc.wgsl @@ -19,6 +19,7 @@ #import config #import bump #import drawtag +#import tile @group(0) @binding(0) var config: Config; @@ -32,19 +33,6 @@ var draw_bboxes: array>; @group(0) @binding(3) var bump: BumpAllocators; -// TODO: put this in the right place, dedup -struct Path { - // bounding box in pixels - bbox: vec4, - // offset (in u32's) to tile rectangle - tiles: u32, -} - -struct Tile { - backdrop: i32, - segments: u32, -} - @group(0) @binding(4) var paths: array; diff --git a/piet-wgsl/src/main.rs b/piet-wgsl/src/main.rs index 5cf960d..ba0d034 100644 --- a/piet-wgsl/src/main.rs +++ b/piet-wgsl/src/main.rs @@ -22,7 +22,7 @@ use engine::Engine; use render::render; use test_scene::dump_scene_info; -use wgpu::{Device, Queue}; +use wgpu::{Device, Queue, Limits}; mod engine; mod render; @@ -33,12 +33,14 @@ async fn run() -> Result<(), Box> { let instance = wgpu::Instance::new(wgpu::Backends::PRIMARY); let adapter = instance.request_adapter(&Default::default()).await.unwrap(); let features = adapter.features(); + let mut limits = Limits::default(); + limits.max_storage_buffers_per_shader_stage = 16; let (device, queue) = adapter .request_device( &wgpu::DeviceDescriptor { label: None, features: features & wgpu::Features::TIMESTAMP_QUERY, - limits: Default::default(), + limits, }, None, ) @@ -55,6 +57,7 @@ async fn do_render( engine: &mut Engine, ) -> Result<(), Box> { let shaders = shaders::init_shaders(device, engine)?; + let full_shaders = shaders::full_shaders(device, engine)?; let scene = test_scene::gen_test_scene(); dump_scene_info(&scene); let (recording, buf) = render(&scene, &shaders); diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index c3c30f1..3f9d184 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -16,6 +16,7 @@ struct Config { width_in_tiles: u32, height_in_tiles: u32, n_drawobj: u32, + n_path: u32, pathtag_base: u32, pathdata_base: u32, drawtag_base: u32, diff --git a/piet-wgsl/src/shaders.rs b/piet-wgsl/src/shaders.rs index d1e8847..6a7e9fb 100644 --- a/piet-wgsl/src/shaders.rs +++ b/piet-wgsl/src/shaders.rs @@ -35,6 +35,22 @@ pub struct Shaders { pub fine: ShaderId, } +// Shaders for the full pipeline +pub struct FullShaders { + pub pathtag_reduce: ShaderId, + pub pathtag_scan: ShaderId, + pub bbox_clear: ShaderId, + pub pathseg: ShaderId, + pub draw_reduce: ShaderId, + pub draw_leaf: ShaderId, + pub tile_alloc: ShaderId, + pub binning: ShaderId, + pub path_coarse: ShaderId, + pub backdrop: ShaderId, + pub coarse: ShaderId, + pub fine: ShaderId, +} + pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result { let shader_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/shader")); let imports = preprocess::get_imports(shader_dir); @@ -93,3 +109,153 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result Result { + let shader_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/shader")); + let imports = preprocess::get_imports(shader_dir); + let read_shader = + |path: &str| fs::read_to_string(shader_dir.join(path.to_string() + ".wgsl")).unwrap(); + let empty = HashSet::new(); + let mut full_config = HashSet::new(); + full_config.insert("full".into()); + let pathtag_reduce = engine.add_shader( + device, + preprocess::preprocess(&read_shader("pathtag_reduce"), &full_config, &imports).into(), + &[BindType::BufReadOnly, BindType::BufReadOnly, BindType::Buffer], + )?; + let pathtag_scan = engine.add_shader( + device, + preprocess::preprocess(&read_shader("pathtag_scan"), &full_config, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + ], + )?; + let bbox_clear = engine.add_shader( + device, + preprocess::preprocess(&read_shader("bbox_clear"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::Buffer, + ], + )?; + let pathseg = engine.add_shader( + device, + preprocess::preprocess(&read_shader("pathseg"), &full_config, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + ], + )?; + let draw_reduce = engine.add_shader( + device, + preprocess::preprocess(&read_shader("draw_reduce"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + ], + )?; + let draw_leaf = engine.add_shader( + device, + preprocess::preprocess(&read_shader("draw_leaf"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + ], + )?; + let tile_alloc = engine.add_shader( + device, + preprocess::preprocess(&read_shader("tile_alloc"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + BindType::Buffer, + ], + )?; + let binning = engine.add_shader( + device, + preprocess::preprocess(&read_shader("binning"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + BindType::Buffer, + BindType::Buffer, + ], + )?; + + let path_coarse = engine.add_shader( + device, + preprocess::preprocess(&read_shader("path_coarse_full"), &full_config, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + ], + )?; + let backdrop = engine.add_shader( + device, + preprocess::preprocess(&read_shader("backdrop"), &empty, &imports).into(), + &[BindType::BufReadOnly, BindType::Buffer], + )?; + let coarse = engine.add_shader( + device, + preprocess::preprocess(&read_shader("coarse"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + ], + )?; + let fine = engine.add_shader( + device, + preprocess::preprocess(&read_shader("fine"), &full_config, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::BufReadOnly, + ], + )?; + Ok(FullShaders { + pathtag_reduce, + pathtag_scan, + bbox_clear, + pathseg, + draw_reduce, + draw_leaf, + tile_alloc, + binning, + path_coarse, + backdrop, + coarse, + fine, + }) +}