From b6da6d958ba90dcb0847e93d2d13b4e394f0df96 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Wed, 26 Oct 2022 13:55:45 -0700 Subject: [PATCH 01/17] Write more shaders This is super WIP, but represents partially written shaders for more of the piet-gpu pipeline. Checkpointing as other work is incoming. --- .vscode/settings.json | 2 + piet-wgsl/shader/binning.wgsl | 173 +++++++++++++++++++++++++++ piet-wgsl/shader/draw_leaf.wgsl | 145 ++++++++++++++++++++++ piet-wgsl/shader/draw_reduce.wgsl | 54 +++++++++ piet-wgsl/shader/shared/bbox.wgsl | 30 +++++ piet-wgsl/shader/shared/config.wgsl | 7 ++ piet-wgsl/shader/shared/drawtag.wgsl | 60 ++++++++++ piet-wgsl/shader/shared/pathtag.wgsl | 6 +- 8 files changed, 472 insertions(+), 5 deletions(-) create mode 100644 piet-wgsl/shader/binning.wgsl create mode 100644 piet-wgsl/shader/draw_leaf.wgsl create mode 100644 piet-wgsl/shader/draw_reduce.wgsl create mode 100644 piet-wgsl/shader/shared/bbox.wgsl create mode 100644 piet-wgsl/shader/shared/drawtag.wgsl diff --git a/.vscode/settings.json b/.vscode/settings.json index 323cd81..3a9bfe5 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,6 +1,8 @@ { "wgsl-analyzer.customImports": { + "bbox": "${workspaceFolder}/piet-wgsl/shader/shared/bbox.wgsl", "config": "${workspaceFolder}/piet-wgsl/shader/shared/config.wgsl", + "drawtag": "${workspaceFolder}/piet-wgsl/shader/shared/drawtag.wgsl", "segment": "${workspaceFolder}/piet-wgsl/shader/shared/segment.wgsl", "pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl" }, diff --git a/piet-wgsl/shader/binning.wgsl b/piet-wgsl/shader/binning.wgsl new file mode 100644 index 0000000..1282164 --- /dev/null +++ b/piet-wgsl/shader/binning.wgsl @@ -0,0 +1,173 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// The binning stage + +#import config +#import drawtag +#import bbox + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var draw_monoids: array; + + +@group(0) @binding(2) +var path_bbox_buf: array; + +@group(0) @binding(3) +var clip_bbox_buf: array>; + +@group(0) @binding(4) +var intersected_bbox: array>; + +// TODO: put into shared include +// TODO: robust memory (failure flags) +struct BumpAllocators { + binning: atomic, +} + +@group(0) @binding(5) +var bump: BumpAllocators; + +@group(0) @binding(6) +var bin_data: array; + +struct BinHeader { + element_count: u32, + chunk_offset: u32, +} + +@group(0) @binding(7) +var bin_header: array; + +// These should probably be in a common block. +let TILE_WIDTH = 16u; +let TILE_HEIGHT = 16u; +// Number of tiles per bin +let N_TILE_X = 16u; +let N_TILE_Y = 16u; +let N_TILE = N_TILE_X * N_TILE_Y; + +// conversion factors from coordinates to bin +let SX = 1.0 / f32(N_TILE_X * TILE_WIDTH); +let SY = 1.0 / f32(N_TILE_Y * TILE_HEIGHT); + +let WG_SIZE = 256u; +let N_SLICE = WG_SIZE / 32u; + +var sh_bitmaps: array, N_TILE>, N_SLICE>; +var sh_count: array, N_SLICE>; +var sh_chunk_offset: array; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) wg_id: vec3, +) { + for (var i = 0u; i < N_SLICE; i += 1u) { + atomicStore(&sh_bitmaps[i][local_id.x], 0u); + } + + // Read inputs and determine coverage of bins + let element_ix = global_id.x; + var x0 = 0; + var y0 = 0; + var x1 = 0; + var y1 = 0; + if element_ix < config.n_drawobj { + let draw_monoid = draw_monoids[element_ix]; + var clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9); + if draw_monoid.clip_ix > 0u { + clip_bbox = clip_bbox_buf[draw_monoid.clip_ix - 1u]; + } + // For clip elements, clip_box is the bbox of the clip path, + // intersected with enclosing clips. + // For other elements, it is the bbox of the enclosing clips. + // TODO check this is true + + let path_bbox = path_bbox_buf[draw_monoid.path_ix]; + let pb = vec4(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1); + let bbox = bbox_intersect(clip_bbox, pb); + + bbox.zw = max(bbox.xy, bbox.zw); + intersected_bbox[element_ix] = bbox; + x0 = i32(floor(bbox.x * SX)); + y0 = i32(floor(bbox.y * SY)); + x1 = i32(ceil(bbox.z * SX)); + y1 = i32(ceil(bbox.w * SY)); + } + let width_in_bins = i32((config.width_in_tiles + N_TILE_X - 1u) / N_TILE_X); + let height_in_bins = i32((config.height_in_tiles + N_TILE_Y - 1u) / N_TILE_Y); + x0 = clamp(x0, 0, width_in_bins); + y0 = clamp(y0, 0, height_in_bins); + x1 = clamp(x1, 0, width_in_bins); + y1 = clamp(y1, 0, height_in_bins); + if x0 == x1 { + y1 = y0; + } + var x = x0; + var y = y0; + let my_slice = local_id.x / 32u; + let my_mask = 1u << (local_id.x & 31u); + while y < y1 { + atomicOr(&sh_bitmaps[my_slice][y * width_in_bins + x], my_mask); + x += 1u; + if x == x1 { + x = x0; + y += 1; + } + } + + workgroupBarrier(); + // Allocate output segments + var element_count = 0u; + for (var i = 0u; i < N_SLICE; i += 1u) { + elementCount += countOneBits(atomicLoad(&sh_bitmaps[i][local_id.x])); + sh_count[i][id_ix] = element_count; + } + // element_count is the number of draw objects covering this thread's bin + let chunk_offset = atomicAdd(&bump.binning, element_count); + sh_chunk_offset[local_id.x] = chunk_offset; + bin_header[global_id.x].element_count = element_count; + bin_header[global_id.x].chunk_offset = chunk_offset; + workgroupBarrier(); + + // loop over bbox of bins touched by this draw object + x = x0; + y = y0; + while y < y1 { + let bin_ix = y * width_in_bins + x; + let out_mask = atomicLoad(&sh_bitmaps[my_slice][bin_ix]); + // I think this predicate will always be true... + if (out_mask & my_mask) != 0 { + var idx = countOneBits(out_mask & (my_mask - 1u)); + if my_slice > 0 { + idx += sh_count[my_slice - 1u][bin_ix]; + } + let offset = sh_chunk_offset[bin_ix]; + bin_data[offset + idx] = element_ix; + } + x += 1u; + if x == x1 { + x = x0; + y += 1u; + } + } +} diff --git a/piet-wgsl/shader/draw_leaf.wgsl b/piet-wgsl/shader/draw_leaf.wgsl new file mode 100644 index 0000000..2169dd4 --- /dev/null +++ b/piet-wgsl/shader/draw_leaf.wgsl @@ -0,0 +1,145 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// Finish prefix sum of drawtags, decode draw objects. + +#import config +#import drawtag +#import bbox + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var scene: array; + +@group(0) @binding(2) +var reduced: array; + +@group(0) @binding(3) +var draw_monoid: array; + +@group(0) @binding(4) +var path_bbox: array; + +@group(0) @binding(5) +var info: array; + +let WG_SIZE = 256; + +var sh_scratch: array; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) wg_id: vec3, +) { + let ix = global_id.x; + let tag_word = scene[config.drawtag_base + ix]; + let agg = map_draw_tag(tag_word); + sh_scratch[local_id.x] = agg; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + workgroupBarrier(); + if local_id.x + (1u << i) < WG_SIZE { + let other = sh_scratch[local_id.x + (1u << i)]; + agg = combine_draw_monoid(agg, other); + } + workgroupBarrier(); + sh_scratch[local_id.x] = agg; + } + workgroupBarrier(); + var m = draw_monoid_identity(); + if wg_id.x > 0u { + m = parent[wg_id.x - 1u]; + } + if local_id.x > 0u { + m = combine_draw_monoid(m, sh_scratch[local_id.x - 1u]); + } + // m now contains exclusive prefix sum of draw monoid + draw_monoid[ix] = m; + let dd = config.drawdata_base + m.scene_offset; + let di = m.info_offset; + if tag_word == DRAWTAG_FILL_COLOR || tag_word == DRAWTAG_FILL_LIN_GRADIENT || + tag_word == DRAWTAG_FILL_RAD_GRADIENT || tag_word == DRAWTAG_FILL_IMAGE || + tag_word == DRAWTAG_BEGIN_CLIP + { + let bbox = path_bbox[m.path_ix]; + let x0 = f32(bbox.x0) - 32768.0; + let y0 = f32(bbox.y0) - 32768.0; + let x1 = f32(bbox.x1) - 32768.0; + let y1 = f32(bbox.y1) - 32768.0; + let bbox_f = vec4(x0, y0, x1, y1); + let fill_mode = u32(bbox.linewidth >= 0.0); + var mat: vec4; + var translate: vec2; + var linewidth = bbox.linewidth; + if linewidth >= 0.0 || tag_word == DRAWTAG_FILL_LIN_GRADIENT || tag_word == DRAWTAG_FILL_RAD_GRADIENT { + // TODO: retrieve transform from scene. Packed? + } + if linewidth >= 0.0 { + // Note: doesn't deal with anisotropic case + linewidth *= sqrt(abs(mat.x * mat.w - mat.y * mat.z)); + } + switch tag_word { + case DRAWTAG_FILL_COLOR, DRAWTAG_FILL_IMAGE: { + info[di] = bitcast(linewidth); + } + case DRAWTAG_FILL_LIN_GRADIENT: { + info[di] = bitcast(linewidth); + var p0 = bitcast>(vec2(scene[dd + 1u], scene[dd + 2u])); + var p1 = bitcast>(vec2(scene[dd + 3u], scene[dd + 4u])); + p0 = mat.xy * p0.x + mat.zw * p0.y + translate; + p1 = mat.xy * p1.x + mat.zw * p1.y + translate; + let dxy = p1 - p0; + let scale = 1.0 / dot(dxy, dxy); + let line_xy = dxy * scale; + let line_c = -dot(p0, line_xy); + info[di + 1u] = bitcast(line_xy.x); + info[di + 2u] = bitcast(line_xy.y); + info[di + 3u] = bitcast(line_c); + } + case DRAWTAG_FILL_RAD_GRADIENT: { + info[di] = bitcast(linewidth); + var p0 = bitcast>(vec2(scene[dd + 1u], scene[dd + 2u])); + var p1 = bitcast>(vec2(scene[dd + 3u], scene[dd + 4u])); + let r0 = bitcast(scene[dd + 5u]); + let r1 = bitcast(scene[dd + 6u]); + let inv_det = 1.0 / (mat.x * mat.w - mat.y * mat.z); + let inv_mat = inv_det * vec4(mat.w, -mat.y, -mat.z, mat.x); + var inv_tr = inv_mat.xz * translate.x + inv_mat.yw * translate.y; + inv_tr += p0; + let center1 = p1 - p0; + let rr = r1 / (r1 - r0); + let ra_inv = rr / (r1 * r1 - dot(center1, center1)); + let c1 = center1 * ra_inv; + let ra = rr * ra_inv; + let roff = rr - 1.0; + info[di + 1u] = bitcast(inv_mat.x); + info[di + 2u] = bitcast(inv_mat.y); + info[di + 3u] = bitcast(inv_mat.z); + info[di + 4u] = bitcast(inv_mat.w); + info[di + 5u] = bitcast(inv_tr.x); + info[di + 6u] = bitcast(inv_tr.y); + info[di + 7u] = bitcast(c1.x); + info[di + 8u] = bitcast(c1.y); + info[di + 9u] = bitcast(ra); + info[di + 10u] = bitcast(roff); + } + default: {} + } + } +} \ No newline at end of file diff --git a/piet-wgsl/shader/draw_reduce.wgsl b/piet-wgsl/shader/draw_reduce.wgsl new file mode 100644 index 0000000..9efae7c --- /dev/null +++ b/piet-wgsl/shader/draw_reduce.wgsl @@ -0,0 +1,54 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +#import config +#import drawtag + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var scene: array; + +@group(0) @binding(2) +var reduced: array; + +let WG_SIZE = 256; + +var sh_scratch: array; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, +) { + let ix = global_id.x; + let tag_word = scene[config.drawtag_base + ix]; + let agg = map_draw_tag(tag_word); + sh_scratch[local_id.x] = agg; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + workgroupBarrier(); + if local_id.x + (1u << i) < WG_SIZE { + let other = sh_scratch[local_id.x + (1u << i)]; + agg = combine_draw_monoid(agg, other); + } + workgroupBarrier(); + sh_scratch[local_id.x] = agg; + } + if local_id.x == 0u { + reduced[ix >> LG_WG_SIZE] = agg; + } +} diff --git a/piet-wgsl/shader/shared/bbox.wgsl b/piet-wgsl/shader/shared/bbox.wgsl new file mode 100644 index 0000000..c0eeb1c --- /dev/null +++ b/piet-wgsl/shader/shared/bbox.wgsl @@ -0,0 +1,30 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// The annotated bounding box for a path. It has been transformed, +// but contains a link to the active transform, mostly for gradients. +struct PathBbox { + x0: u32, + y0: u32, + x1: u32, + y1: u32, + linewidth: f32, + trans_ix: u32, +} + +fn bbox_intersect(a: vec4, b: vec4) -> f32 { + return vec4(max(a.xy, b.xy), min(a.zyw, b.zw)); +} diff --git a/piet-wgsl/shader/shared/config.wgsl b/piet-wgsl/shader/shared/config.wgsl index 704a608..cdb0abc 100644 --- a/piet-wgsl/shader/shared/config.wgsl +++ b/piet-wgsl/shader/shared/config.wgsl @@ -17,4 +17,11 @@ struct Config { width_in_tiles: u32, height_in_tiles: u32, + + n_drawobj: u32, + + // offsets within config file (in u32 units) + // Note: this is a difference from piet-gpu, which is in bytes + drawtag_base: u32, + drawdata_base: u32, } diff --git a/piet-wgsl/shader/shared/drawtag.wgsl b/piet-wgsl/shader/shared/drawtag.wgsl new file mode 100644 index 0000000..c318e53 --- /dev/null +++ b/piet-wgsl/shader/shared/drawtag.wgsl @@ -0,0 +1,60 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// The DrawMonoid is computed as a prefix sum to aid in decoding +// the variable-length encoding of draw objects. +struct DrawMonoid { + // The number of paths preceding this draw object. + path_ix: u32, + // The number of clip operations preceding this draw object. + clip_ix: u32, + // The offset of the encoded draw object in the scene (u32s). + scene_offset: u32, + // The offset of the associated info. + info_offset: u32, +} + +// Each draw object has a 32-bit draw tag, which is a bit-packed +// version of the draw monoid. +let DRAWTAG_NOP = 0u; +let DRAWTAG_FILL_COLOR = 0x44u; +let DRAWTAG_FILL_LIN_GRADIENT = 0x114; +let DRAWTAG_FILL_RAD_GRADIENT = 0x2dc; +let DRAWTAG_FILL_IMAGE = 0x48; +let DRAWTAG_BEGIN_CLIP = 0x05; +let DRAWTAG_END_CLIP = 0x25; + +fn draw_monoid_identity() -> DrawMonoid { + return DrawMonoid(); +} + +fn combine_draw_monoid(a: DrawMonoid, b: DrawMonoid) { + var c: DrawMonoid; + c.path_ix = a.path_ix + b.path_ix; + c.clip_ix = a.clip_ix + b.clip_ix; + c.scene_offset = a.scene_offset + b.scene_offset; + c.info_offset = a.info_offset + b.info_offset; +} + +fn map_draw_tag(tag_word: u32) -> DawMonoid { + let has_path = ; + var c: DrawMonoid; + c.path_ix = u32(tag_word != DRAWTAG_NOP); + c.clip_ix = tag_word & 1u; + c.scene_offset = (tag_word >> 2u) & 0x07u; + c.info_offset = (tag_word >> 6u) & 0x0fu; + return c; +} diff --git a/piet-wgsl/shader/shared/pathtag.wgsl b/piet-wgsl/shader/shared/pathtag.wgsl index e4cfda3..2424d2a 100644 --- a/piet-wgsl/shader/shared/pathtag.wgsl +++ b/piet-wgsl/shader/shared/pathtag.wgsl @@ -30,11 +30,7 @@ let PATH_TAG_PATH = 0x10u; let PATH_TAG_TRANSFORM = 0x20u; fn tag_monoid_identity() -> TagMonoid { - var c: TagMonoid; - c.trans_ix = 0u; - c.pathseg_ix = 0u; - c.pathseg_offset = 0u; - return c; + return TagMonoid(); } fn combine_tag_monoid(a: TagMonoid, b: TagMonoid) -> TagMonoid { From 06ec395b6889271df410f8d441ec214e8d8bc8a3 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Fri, 28 Oct 2022 12:01:15 -0700 Subject: [PATCH 02/17] Checkpoint coarse rasterization The bones of coarse rasterization are in place (so far, fills only). Still not suitable for end-to-end (need to generate bounding boxes, among other things), but getting closer. --- .vscode/settings.json | 4 +- piet-wgsl/shader/binning.wgsl | 15 +- piet-wgsl/shader/coarse.wgsl | 327 +++++++++++++++++++++++++++ piet-wgsl/shader/draw_leaf.wgsl | 4 +- piet-wgsl/shader/shared/bbox.wgsl | 4 +- piet-wgsl/shader/shared/bump.wgsl | 21 ++ piet-wgsl/shader/shared/config.wgsl | 22 +- piet-wgsl/shader/shared/drawtag.wgsl | 13 +- piet-wgsl/shader/shared/ptcl.wgsl | 38 ++++ 9 files changed, 422 insertions(+), 26 deletions(-) create mode 100644 piet-wgsl/shader/coarse.wgsl create mode 100644 piet-wgsl/shader/shared/bump.wgsl create mode 100644 piet-wgsl/shader/shared/ptcl.wgsl diff --git a/.vscode/settings.json b/.vscode/settings.json index 3a9bfe5..43d84c0 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,10 +1,12 @@ { "wgsl-analyzer.customImports": { "bbox": "${workspaceFolder}/piet-wgsl/shader/shared/bbox.wgsl", + "bump": "${workspaceFolder}/piet-wgsl/shader/shared/bump.wgsl", "config": "${workspaceFolder}/piet-wgsl/shader/shared/config.wgsl", "drawtag": "${workspaceFolder}/piet-wgsl/shader/shared/drawtag.wgsl", "segment": "${workspaceFolder}/piet-wgsl/shader/shared/segment.wgsl", - "pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl" + "pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl", + "ptcl": "${workspaceFolder}/piet-wgsl/shader/shared/ptcl.wgsl" }, "wgsl-analyzer.diagnostics.nagaVersion": "main" } diff --git a/piet-wgsl/shader/binning.wgsl b/piet-wgsl/shader/binning.wgsl index 1282164..d16a620 100644 --- a/piet-wgsl/shader/binning.wgsl +++ b/piet-wgsl/shader/binning.wgsl @@ -19,6 +19,7 @@ #import config #import drawtag #import bbox +#import bump @group(0) @binding(0) var config: Config; @@ -26,7 +27,6 @@ var config: Config; @group(0) @binding(1) var draw_monoids: array; - @group(0) @binding(2) var path_bbox_buf: array; @@ -37,10 +37,6 @@ var clip_bbox_buf: array>; var intersected_bbox: array>; // TODO: put into shared include -// TODO: robust memory (failure flags) -struct BumpAllocators { - binning: atomic, -} @group(0) @binding(5) var bump: BumpAllocators; @@ -48,6 +44,7 @@ var bump: BumpAllocators; @group(0) @binding(6) var bin_data: array; +// TODO: put in common place struct BinHeader { element_count: u32, chunk_offset: u32, @@ -56,14 +53,6 @@ struct BinHeader { @group(0) @binding(7) var bin_header: array; -// These should probably be in a common block. -let TILE_WIDTH = 16u; -let TILE_HEIGHT = 16u; -// Number of tiles per bin -let N_TILE_X = 16u; -let N_TILE_Y = 16u; -let N_TILE = N_TILE_X * N_TILE_Y; - // conversion factors from coordinates to bin let SX = 1.0 / f32(N_TILE_X * TILE_WIDTH); let SY = 1.0 / f32(N_TILE_Y * TILE_HEIGHT); diff --git a/piet-wgsl/shader/coarse.wgsl b/piet-wgsl/shader/coarse.wgsl new file mode 100644 index 0000000..159a3d7 --- /dev/null +++ b/piet-wgsl/shader/coarse.wgsl @@ -0,0 +1,327 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// The coarse rasterization stage. + +#import config +#import bump +#import drawtag +#import ptcl + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var scene: array; + +@group(0) @binding(2) +var draw_monoids: array; + +// TODO: dedup +struct BinHeader { + element_count: u32, + chunk_offset: u32, +} + +@group(0) @binding(3) +var bin_headers: array; + +@group(0) @binding(4) +var bin_data: array; + +@group(0) @binding(5) +var bump: BumpAllocators; + +@group(0) @binding(6) +var ptcl: array; + +// TODO: put this in the right place +struct Path { + // bounding box in pixels + bbox: vec4, + // offset (in u32's) to tile rectangle + tiles: u32, +} + +struct Tile { + backdrop: i32, + segments: u32, +} + +@group(0) @binding(7) +var paths: array; + +@group(0) @binding(8) +var tiles: array; + + +// Much of this code assumes WG_SIZE == N_TILE. If these diverge, then +// a fair amount of fixup is needed. +let WG_SIZE = 256u; +let N_SLICE = WG_SIZE / 32u; + +var sh_bitmaps: array, N_TILE>, N_SLICE>; +var sh_part_count: array; +var sh_part_offsets: array; +var sh_drawobj_ix: array; +var sh_tile_stride: array; +var sh_tile_width: array; +var sh_tile_x0: array; +var sh_tile_y0: array; +var sh_tile_count: array; +var sh_tile_base: array; + +// helper functions for writing ptcl + +var cmd_offset: u32; +var cmd_limit: u32; + +// Make sure there is space for a command of given size, plus a jump if needed +fn alloc_cmd(size: u32) { + if cmd_offset + size >= cmd_limit { + let new_cmd = atomicAdd(&bump.ptcl, PTCL_INCREMENT); + // TODO: robust memory + ptcl[cmd_offset] = CMD_JUMP; + ptcl[cmd_offset + 1u] = new_cmd; + cmd_offset = new_cmd; + cmd_limit = cmd_offset + (PTCL_INCREMENT - PTCL_HEADROOM); + } +} + +fn write_path(tile: Tile, linewidth: f32) { + // TODO: take flags + // TODO: handle stroke + alloc_cmd(3u); + if tile.segments != 0u { + let fill = CmdFill(tile.segments, tile.backdrop); + ptcl[cmd_offset] = CMD_FILL; + ptcl[cmd_offset + 1u] = fill.tile; + ptcl[cmd_offset + 2u] = u32(fill.backdrop); + cmd_offset += 3u; + } else { + ptcl[cmd_offset] = CMD_SOLID; + cmd_offset += 1u; + } +} + +fn write_color(color: CmdColor) { + alloc_cmd(2u); + ptcl[cmd_offset] = CMD_FILL; + ptcl[cmd_offset + 1u] = color.rgba_color; + cmd_offset += 2u; + +} + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) wg_id: vec3, +) { + let width_in_bins = (config.width_in_tiles + N_TILE_X - 1u) / N_TILE_X; + let bin_ix = width_in_bins * wg_id.y + wg_id.x; + let n_partitions = (config.n_drawobj + N_TILE - 1u) / N_TILE; + + // Coordinates of the top left of this bin, in tiles. + let bin_tile_x = N_TILE_X * wg_id.x; + let bin_tile_y = N_TILE_Y * wg_id.y; + + let tile_x = local_id.x % N_TILE_X; + let tile_y = local_id.y % N_TILE_Y; + let this_tile_ix = (bin_tile_y + tile_y) * config.width_in_tiles + bin_tile_x + tile_x; + cmd_offset = this_tile_ix * PTCL_INITIAL_ALLOC; + cmd_limit = cmd_offset + (PTCL_INITIAL_ALLOC - PTCL_HEADROOM); + // TODO: clip state + let clip_zero_depth = 0u; + + var partition_ix = 0u; + var rd_ix = 0u; + var wr_ix = 0u; + var part_start_ix = 0u; + var ready_ix = 0u; + // TODO: blend state + + while true { + for (var i = 0u; i < N_SLICE; i += 1u) { + atomicStore(&sh_bitmaps[i][local_id.x], 0u); + } + + while true { + if ready_ix == wr_ix && partition_ix < n_partitions { + part_start_ix = ready_ix; + var count = 0u; + if partition_ix + local_id.x < n_partitions { + let in_ix = (partition_ix + local_id.x) * N_TILE + bin_ix; + let bin_header = bin_headers[in_ix]; + count = bin_header.element_count; + sh_part_offsets[local_id.x] = bin_header.chunk_offset; + } + // prefix sum the element counts + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + sh_part_count[local_id.x] = count; + workgroupBarrier(); + if local_id.x >= (1u << i) { + count += sh_part_count[local_id - (1u << i)]; + } + workgroupBarrier(); + } + sh_part_count[local_id.x] = part_start_ix + count; + workgroupBarrier(); + ready_ix = sh_part_count[WG_SIZE - 1u]; + partition_ix += WG_SIZE; + } + // use binary search to find draw object to read + var ix = rd_ix + local_id.x; + if ix >= wr_ix && ix < ready_ix { + var part_ix = 0u; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + let probe = part_ix + ((N_TILE / 2u) >> i); + if ix >= sh_part_count[probe - 1u] { + part_ix = probe; + } + } + ix -= select(part_start_ix, sh_part_count[part_ix - 1u], part_ix > 0u); + let offset = sh_part_offsets[part_ix]; + sh_drawobj_ix[local_id.x] = bin_data[offset + ix]; + } + wr_ix = min(rd_ix + N_TILE, ready_ix); + if wr_ix - rd_ix >= N_TILE || (wr_ix >= ready_ix && partition_ix >= n_partitions) { + break; + } + } + // At this point, sh_drawobj_ix[0.. wr_ix - rd_ix] contains merged binning results. + var tag = DRAWTAG_NOP; + var drawobj_ix: u32; + if local_id.x + rd_ix < wr_ix { + drawobj_ix = sh_drawobj_ix[local_id.x]; + tag = scene[config.drawtag_base + drawobj_ix]; + } + + var tile_count = 0u; + // I think this predicate is the same as the last, maybe they can be combined + if tag != DRAWTAG_NOP { + let path_ix = draw_monoids[drawobj_ix].path_ix; + let path = paths[path_ix]; + let stride = path.bbox.z - path.bbox.x; + sh_tile_stride[local_id.x] = stride; + let dx = i32(path.bbox.x) - i32(bin_tile_x); + let dy = i32(path.bbox.y) - i32(bin_tile_y); + let x0 = clamp(dx, 0, i32(N_TILE_X)); + let y0 = clamp(dy, 0, i32(N_TILE_Y)); + let x1 = clamp(i32(path.bbox.z) - i32(bin_tile_x), 0, i32(N_TILE_X)); + let y1 = clamp(i32(path.bbox.w) - i32(bin_tile_y), 0, i32(N_TILE_Y)); + sh_tile_width[local_id.x] = u32(x1 - x0); + sh_tile_x0[local_id.x] = u32(x0); + sh_tile_y0[local_id.x] = u32(y0); + tile_count = u32(x1 - x0) * u32(y1 - y0); + // base relative to bin + let base = path.tiles - u32(dy * i32(stride) + dx); + sh_tile_base[local_id.x] = base; + // TODO: there's a write_tile_alloc here in the source, not sure what it's supposed to do + } + + // Prefix sum of tile counts + sh_tile_count[local_id.x] = tile_count; + for (var i = 0; i < firstTrailingBit(N_TILE); i += 1u) { + workgroupBarrier(); + if local_id.x >= (1u << i) { + tile_count += sh_tile_count[local_id.x - (1u << i)]; + } + workgroupBarrier(); + sh_tile_count[local_id.x] = tile_count; + } + workgroupBarrier(); + let total_tile_count = sh_tile_count[N_TILE - 1u]; + // Parallel iteration over all tiles + for (var ix = local_id.x; ix < total_tile_count; ix += N_TILE) { + // Binary search to find draw object which contains this tile + var el_ix = 0u; + for (var i = 0u; i < firstTrailingBit(N_TILE); i += 1u) { + let probe = el_ix + ((N_TILE / 2u) >> i); + if ix >= sh_tile_count[probe - 1u] { + el_ix = probe; + } + } + drawobj_ix = sh_drawobj_ix[el_ix]; + tag = scene[config.drawtag_base + drawobj_ix]; + // TODO: clip logic + let seq_ix = ix - select(0u, sh_tile_count[el_ix - 1u], el_ix > 0u); + let width = sh_tile_width[el_ix]; + let x = sh_tile_x0[el_ix] + seq_ix % width; + let y = sh_tile_y0[el_ix] + seq_ix / width; + let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * y + x; + let tile = tiles[tile_ix]; + // TODO: this predicate becomes more interesting with clip + let include_tile = tile.segments != 0u || tile.backdrop != 0; + if include_tile { + let el_slice = el_ix / 32u; + let el_mask = 1u << (el_ix & 31u); + atomicOr(&sh_bitmaps[el_slice][y * N_TILE_X + x], el_mask); + } + } + workgroupBarrier(); + // At this point bit drawobj % 32 is set in sh_bitmaps[drawobj / 32][y * N_TILE_X + x] + // if drawobj touches tile (x, y). + + // Write per-tile command list for this tile + var slice_ix = 0u; + var bitmap = atomicLoad(&sh_bitmaps[0u][local_id.x]); + while true { + if bitmap == 0u { + slice_ix += 1u; + // potential optimization: make iteration limit dynamic + if slice_ix == N_SLICE { + break; + } + bitmap = atomicLoad(&sh_bitmaps[slice_ix][local_id.x]); + if bitmap == 0u { + continue; + } + } + let el_ix = slice_ix * 32u + firstTrailingBit(bitmap); + drawobj_ix = sh_drawobj_ix[el_ix]; + // clear LSB of bitmap, using bit magic + bitmap &= bitmap - 1u; + let drawtag = scene[config.drawtag_base + drawobj_ix]; + let dm = draw_monoids[drawobj_ix]; + let dd = config.drawdata_base + dm.scene_offset; + if clip_zero_depth == 0u { + let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * tile_y + tile_x; + let tile = tiles[tile_ix]; + switch drawtag { + case DRAWTAG_FILL_COLOR: { + // TODO: get linewidth from draw object + let linewidth = -1.0; + let rgba_color = scene[dd]; + write_path(tile, linewidth); + write_color(CmdColor(rgba_color)); + } + default: {} + } + } + } + + rd_ix += N_TILE; + if rd_ix >= ready_ix && partition_ix >= n_partitions { + break; + } + workgroupBarrier(); + } + if bin_tile_x < config.width_in_tiles && bin_tile_y < config.height_in_tiles { + ptcl[cmd_offset] = CMD_END; + // TODO: blend stack allocation + } +} diff --git a/piet-wgsl/shader/draw_leaf.wgsl b/piet-wgsl/shader/draw_leaf.wgsl index 2169dd4..21edcfe 100644 --- a/piet-wgsl/shader/draw_leaf.wgsl +++ b/piet-wgsl/shader/draw_leaf.wgsl @@ -38,7 +38,7 @@ var path_bbox: array; @group(0) @binding(5) var info: array; -let WG_SIZE = 256; +let WG_SIZE = 256u; var sh_scratch: array; @@ -119,7 +119,7 @@ fn main( let r0 = bitcast(scene[dd + 5u]); let r1 = bitcast(scene[dd + 6u]); let inv_det = 1.0 / (mat.x * mat.w - mat.y * mat.z); - let inv_mat = inv_det * vec4(mat.w, -mat.y, -mat.z, mat.x); + let inv_mat = inv_det * vec4(mat.w, -mat.y, -mat.z, mat.x); var inv_tr = inv_mat.xz * translate.x + inv_mat.yw * translate.y; inv_tr += p0; let center1 = p1 - p0; diff --git a/piet-wgsl/shader/shared/bbox.wgsl b/piet-wgsl/shader/shared/bbox.wgsl index c0eeb1c..fb7728b 100644 --- a/piet-wgsl/shader/shared/bbox.wgsl +++ b/piet-wgsl/shader/shared/bbox.wgsl @@ -25,6 +25,6 @@ struct PathBbox { trans_ix: u32, } -fn bbox_intersect(a: vec4, b: vec4) -> f32 { - return vec4(max(a.xy, b.xy), min(a.zyw, b.zw)); +fn bbox_intersect(a: vec4, b: vec4) -> vec4 { + return vec4(max(a.xy, b.xy), min(a.zw, b.zw)); } diff --git a/piet-wgsl/shader/shared/bump.wgsl b/piet-wgsl/shader/shared/bump.wgsl new file mode 100644 index 0000000..6d9a225 --- /dev/null +++ b/piet-wgsl/shader/shared/bump.wgsl @@ -0,0 +1,21 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// TODO: robust memory (failure flags) +struct BumpAllocators { + binning: atomic, + ptcl: atomic, +} diff --git a/piet-wgsl/shader/shared/config.wgsl b/piet-wgsl/shader/shared/config.wgsl index cdb0abc..0a91782 100644 --- a/piet-wgsl/shader/shared/config.wgsl +++ b/piet-wgsl/shader/shared/config.wgsl @@ -20,8 +20,28 @@ struct Config { n_drawobj: u32, - // offsets within config file (in u32 units) + // offsets within scene buffer (in u32 units) // Note: this is a difference from piet-gpu, which is in bytes drawtag_base: u32, drawdata_base: u32, } + +// Geometry of tiles and bins + + +let TILE_WIDTH = 16u; +let TILE_HEIGHT = 16u; +// Number of tiles per bin +let N_TILE_X = 16u; +let N_TILE_Y = 16u; +let N_TILE = N_TILE_X * N_TILE_Y; + +// Should ptcl stuff move to a separate import? + +// Layout of per-tile command list +// Initial allocation, in u32's. +let PTCL_INITIAL_ALLOC = 64u; +let PTCL_INCREMENT = 256u; + +// Amount of space taken by jump +let PTCL_HEADROOM = 2u; diff --git a/piet-wgsl/shader/shared/drawtag.wgsl b/piet-wgsl/shader/shared/drawtag.wgsl index c318e53..25608bd 100644 --- a/piet-wgsl/shader/shared/drawtag.wgsl +++ b/piet-wgsl/shader/shared/drawtag.wgsl @@ -31,11 +31,11 @@ struct DrawMonoid { // version of the draw monoid. let DRAWTAG_NOP = 0u; let DRAWTAG_FILL_COLOR = 0x44u; -let DRAWTAG_FILL_LIN_GRADIENT = 0x114; -let DRAWTAG_FILL_RAD_GRADIENT = 0x2dc; -let DRAWTAG_FILL_IMAGE = 0x48; -let DRAWTAG_BEGIN_CLIP = 0x05; -let DRAWTAG_END_CLIP = 0x25; +let DRAWTAG_FILL_LIN_GRADIENT = 0x114u; +let DRAWTAG_FILL_RAD_GRADIENT = 0x2dcu; +let DRAWTAG_FILL_IMAGE = 0x48u; +let DRAWTAG_BEGIN_CLIP = 0x05u; +let DRAWTAG_END_CLIP = 0x25u; fn draw_monoid_identity() -> DrawMonoid { return DrawMonoid(); @@ -49,8 +49,7 @@ fn combine_draw_monoid(a: DrawMonoid, b: DrawMonoid) { c.info_offset = a.info_offset + b.info_offset; } -fn map_draw_tag(tag_word: u32) -> DawMonoid { - let has_path = ; +fn map_draw_tag(tag_word: u32) -> DrawMonoid { var c: DrawMonoid; c.path_ix = u32(tag_word != DRAWTAG_NOP); c.clip_ix = tag_word & 1u; diff --git a/piet-wgsl/shader/shared/ptcl.wgsl b/piet-wgsl/shader/shared/ptcl.wgsl new file mode 100644 index 0000000..516189d --- /dev/null +++ b/piet-wgsl/shader/shared/ptcl.wgsl @@ -0,0 +1,38 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// Tags for PTCL commands +let CMD_END = 0u; +let CMD_FILL = 1u; +let CMD_SOLID = 3u; +let CMD_COLOR = 5u; +let CMD_JUMP = 11u; + +// The individual PTCL structs are written here, but read/write is by +// hand in the relevant shaders + +struct CmdFill { + tile: u32, + backdrop: i32, +} + +struct CmdJump { + target: u32, +} + +struct CmdColor { + rgba_color: u32, +} From 40416fd2eae7676c77b1fd19af745ff1206dd4dd Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Sun, 30 Oct 2022 08:08:22 -0700 Subject: [PATCH 03/17] Another checkpoint --- piet-wgsl/shader/coarse.wgsl | 11 +++++++---- piet-wgsl/shader/draw_leaf.wgsl | 25 ++++++++++++++----------- piet-wgsl/shader/path_coarse.wgsl | 3 --- piet-wgsl/shader/shared/config.wgsl | 14 ++------------ piet-wgsl/shader/shared/drawtag.wgsl | 3 ++- piet-wgsl/shader/shared/ptcl.wgsl | 10 +++++++++- 6 files changed, 34 insertions(+), 32 deletions(-) diff --git a/piet-wgsl/shader/coarse.wgsl b/piet-wgsl/shader/coarse.wgsl index 159a3d7..4ccb0c2 100644 --- a/piet-wgsl/shader/coarse.wgsl +++ b/piet-wgsl/shader/coarse.wgsl @@ -71,7 +71,8 @@ var tiles: array; // Much of this code assumes WG_SIZE == N_TILE. If these diverge, then // a fair amount of fixup is needed. let WG_SIZE = 256u; -let N_SLICE = WG_SIZE / 32u; +//let N_SLICE = WG_SIZE / 32u; +let N_SLICE = 8u; var sh_bitmaps: array, N_TILE>, N_SLICE>; var sh_part_count: array; @@ -174,7 +175,7 @@ fn main( sh_part_count[local_id.x] = count; workgroupBarrier(); if local_id.x >= (1u << i) { - count += sh_part_count[local_id - (1u << i)]; + count += sh_part_count[local_id.x - (1u << i)]; } workgroupBarrier(); } @@ -235,7 +236,7 @@ fn main( // Prefix sum of tile counts sh_tile_count[local_id.x] = tile_count; - for (var i = 0; i < firstTrailingBit(N_TILE); i += 1u) { + for (var i = 0u; i < firstTrailingBit(N_TILE); i += 1u) { workgroupBarrier(); if local_id.x >= (1u << i) { tile_count += sh_tile_count[local_id.x - (1u << i)]; @@ -298,11 +299,13 @@ fn main( let drawtag = scene[config.drawtag_base + drawobj_ix]; let dm = draw_monoids[drawobj_ix]; let dd = config.drawdata_base + dm.scene_offset; + // TODO: set up draw info from monoid if clip_zero_depth == 0u { let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * tile_y + tile_x; let tile = tiles[tile_ix]; switch drawtag { - case DRAWTAG_FILL_COLOR: { + // DRAWTAG_FILL_COLOR + case 0x44u: { // TODO: get linewidth from draw object let linewidth = -1.0; let rgba_color = scene[dd]; diff --git a/piet-wgsl/shader/draw_leaf.wgsl b/piet-wgsl/shader/draw_leaf.wgsl index 21edcfe..b962bf2 100644 --- a/piet-wgsl/shader/draw_leaf.wgsl +++ b/piet-wgsl/shader/draw_leaf.wgsl @@ -50,7 +50,7 @@ fn main( ) { let ix = global_id.x; let tag_word = scene[config.drawtag_base + ix]; - let agg = map_draw_tag(tag_word); + var agg = map_draw_tag(tag_word); sh_scratch[local_id.x] = agg; for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { workgroupBarrier(); @@ -64,7 +64,7 @@ fn main( workgroupBarrier(); var m = draw_monoid_identity(); if wg_id.x > 0u { - m = parent[wg_id.x - 1u]; + m = reduced[wg_id.x - 1u]; } if local_id.x > 0u { m = combine_draw_monoid(m, sh_scratch[local_id.x - 1u]); @@ -84,7 +84,7 @@ fn main( let y1 = f32(bbox.y1) - 32768.0; let bbox_f = vec4(x0, y0, x1, y1); let fill_mode = u32(bbox.linewidth >= 0.0); - var mat: vec4; + var matrx: vec4; var translate: vec2; var linewidth = bbox.linewidth; if linewidth >= 0.0 || tag_word == DRAWTAG_FILL_LIN_GRADIENT || tag_word == DRAWTAG_FILL_RAD_GRADIENT { @@ -92,18 +92,20 @@ fn main( } if linewidth >= 0.0 { // Note: doesn't deal with anisotropic case - linewidth *= sqrt(abs(mat.x * mat.w - mat.y * mat.z)); + linewidth *= sqrt(abs(matrx.x * matrx.w - matrx.y * matrx.z)); } switch tag_word { - case DRAWTAG_FILL_COLOR, DRAWTAG_FILL_IMAGE: { + // DRAWTAG_FILL_COLOR, DRAWTAG_FILL_IMAGE + case 0x44u, 0x48u: { info[di] = bitcast(linewidth); } - case DRAWTAG_FILL_LIN_GRADIENT: { + // DRAWTAG_FILL_LIN_GRADIENT + case 0x114u: { info[di] = bitcast(linewidth); var p0 = bitcast>(vec2(scene[dd + 1u], scene[dd + 2u])); var p1 = bitcast>(vec2(scene[dd + 3u], scene[dd + 4u])); - p0 = mat.xy * p0.x + mat.zw * p0.y + translate; - p1 = mat.xy * p1.x + mat.zw * p1.y + translate; + p0 = matrx.xy * p0.x + matrx.zw * p0.y + translate; + p1 = matrx.xy * p1.x + matrx.zw * p1.y + translate; let dxy = p1 - p0; let scale = 1.0 / dot(dxy, dxy); let line_xy = dxy * scale; @@ -112,14 +114,15 @@ fn main( info[di + 2u] = bitcast(line_xy.y); info[di + 3u] = bitcast(line_c); } - case DRAWTAG_FILL_RAD_GRADIENT: { + // DRAWTAG_FILL_RAD_GRADIENT + case 0x2dcu: { info[di] = bitcast(linewidth); var p0 = bitcast>(vec2(scene[dd + 1u], scene[dd + 2u])); var p1 = bitcast>(vec2(scene[dd + 3u], scene[dd + 4u])); let r0 = bitcast(scene[dd + 5u]); let r1 = bitcast(scene[dd + 6u]); - let inv_det = 1.0 / (mat.x * mat.w - mat.y * mat.z); - let inv_mat = inv_det * vec4(mat.w, -mat.y, -mat.z, mat.x); + let inv_det = 1.0 / (matrx.x * matrx.w - matrx.y * matrx.z); + let inv_mat = inv_det * vec4(matrx.w, -matrx.y, -matrx.z, matrx.x); var inv_tr = inv_mat.xz * translate.x + inv_mat.yw * translate.y; inv_tr += p0; let center1 = p1 - p0; diff --git a/piet-wgsl/shader/path_coarse.wgsl b/piet-wgsl/shader/path_coarse.wgsl index 23590d5..0a5139d 100644 --- a/piet-wgsl/shader/path_coarse.wgsl +++ b/piet-wgsl/shader/path_coarse.wgsl @@ -64,9 +64,6 @@ fn read_i16_point(ix: u32) -> vec2 { } #ifndef cubics_out -let TILE_WIDTH = 16u; -let TILE_HEIGHT = 16u; - struct SubdivResult { val: f32, a0: f32, diff --git a/piet-wgsl/shader/shared/config.wgsl b/piet-wgsl/shader/shared/config.wgsl index 0a91782..b31f915 100644 --- a/piet-wgsl/shader/shared/config.wgsl +++ b/piet-wgsl/shader/shared/config.wgsl @@ -28,20 +28,10 @@ struct Config { // Geometry of tiles and bins - let TILE_WIDTH = 16u; let TILE_HEIGHT = 16u; // Number of tiles per bin let N_TILE_X = 16u; let N_TILE_Y = 16u; -let N_TILE = N_TILE_X * N_TILE_Y; - -// Should ptcl stuff move to a separate import? - -// Layout of per-tile command list -// Initial allocation, in u32's. -let PTCL_INITIAL_ALLOC = 64u; -let PTCL_INCREMENT = 256u; - -// Amount of space taken by jump -let PTCL_HEADROOM = 2u; +//let N_TILE = N_TILE_X * N_TILE_Y; +let N_TILE = 256u; diff --git a/piet-wgsl/shader/shared/drawtag.wgsl b/piet-wgsl/shader/shared/drawtag.wgsl index 25608bd..749b211 100644 --- a/piet-wgsl/shader/shared/drawtag.wgsl +++ b/piet-wgsl/shader/shared/drawtag.wgsl @@ -41,12 +41,13 @@ fn draw_monoid_identity() -> DrawMonoid { return DrawMonoid(); } -fn combine_draw_monoid(a: DrawMonoid, b: DrawMonoid) { +fn combine_draw_monoid(a: DrawMonoid, b: DrawMonoid) -> DrawMonoid { var c: DrawMonoid; c.path_ix = a.path_ix + b.path_ix; c.clip_ix = a.clip_ix + b.clip_ix; c.scene_offset = a.scene_offset + b.scene_offset; c.info_offset = a.info_offset + b.info_offset; + return c; } fn map_draw_tag(tag_word: u32) -> DrawMonoid { diff --git a/piet-wgsl/shader/shared/ptcl.wgsl b/piet-wgsl/shader/shared/ptcl.wgsl index 516189d..7121f84 100644 --- a/piet-wgsl/shader/shared/ptcl.wgsl +++ b/piet-wgsl/shader/shared/ptcl.wgsl @@ -14,6 +14,14 @@ // // Also licensed under MIT license, at your choice. +// Layout of per-tile command list +// Initial allocation, in u32's. +let PTCL_INITIAL_ALLOC = 64u; +let PTCL_INCREMENT = 256u; + +// Amount of space taken by jump +let PTCL_HEADROOM = 2u; + // Tags for PTCL commands let CMD_END = 0u; let CMD_FILL = 1u; @@ -30,7 +38,7 @@ struct CmdFill { } struct CmdJump { - target: u32, + new_ix: u32, } struct CmdColor { From 5c6ec1efa3f7573d5ed51bddf1c4b8ac431c5242 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Tue, 1 Nov 2022 16:20:15 -0700 Subject: [PATCH 04/17] Checkpoint Many shader stages written. --- piet-wgsl/shader/backdrop_dyn.wgsl | 95 ++++++++++++++++++++++ piet-wgsl/shader/fine.wgsl | 33 +++++--- piet-wgsl/shader/shared/bump.wgsl | 1 + piet-wgsl/shader/tile_alloc.wgsl | 122 +++++++++++++++++++++++++++++ piet-wgsl/src/render.rs | 6 +- 5 files changed, 247 insertions(+), 10 deletions(-) create mode 100644 piet-wgsl/shader/backdrop_dyn.wgsl create mode 100644 piet-wgsl/shader/tile_alloc.wgsl diff --git a/piet-wgsl/shader/backdrop_dyn.wgsl b/piet-wgsl/shader/backdrop_dyn.wgsl new file mode 100644 index 0000000..c6c7c8d --- /dev/null +++ b/piet-wgsl/shader/backdrop_dyn.wgsl @@ -0,0 +1,95 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// Prefix sum for dynamically allocated backdrops + +#import config + +// TODO: dedup & put this in the right place +struct Path { + // bounding box in pixels + bbox: vec4, + // offset (in u32's) to tile rectangle + tiles: u32, +} + +// TODO: -> shared +struct Tile { + backdrop: i32, + segments: u32, +} + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var paths: array; + +@group(0) @binding(2) +var tiles: array; + +let WG_SIZE = 256u; + +var sh_row_width: array; +var sh_row_count: array; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, +) { + let drawobj_ix = global_id.x; + var row_count = 0u; + if drawobj_ix < config.n_drawobj { + // TODO: when rectangles, path and draw obj are not the same + let path = paths[drawobj_ix]; + sh_row_width[local_id.x] = path.bbox.z - path.bbox.x; + row_count = path.bbox.w - path.bbox.y; + sh_row_count[local_id.x] = row_count; + } + + // Prefix sum of row counts + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + workgroupBarrier(); + if local_id.x >= (1u << i) { + row_count += sh_row_count[local_id.x - (1u << i)]; + } + workgroupBarrier(); + sh_row_count[local_id.x] = row_count; + } + workgroupBarrier(); + let total_rows = sh_row_count[WG_SIZE - 1u]; + for (var row = local_id.x; row < total_rows; row += WG_SIZE) { + var el_ix = 0u; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + let probe = el_ix + ((WG_SIZE / 2u) >> i); + if row >= sh_row_count[probe - 1u] { + el_ix = probe; + } + } + let width = sh_row_width[el_ix]; + if width > 0u { + var seq_ix = row - select(0u, sh_row_count[el_ix - 1u], el_ix > 0u); + var tile_ix = seq_ix * width; + var sum = tiles[tile_ix].backdrop; + for (var x = 1u; x < width; x += 1u) { + tile_ix += 1u; + sum += tiles[tile_ix].backdrop; + tiles[tile_ix].backdrop = sum; + } + } + } +} diff --git a/piet-wgsl/shader/fine.wgsl b/piet-wgsl/shader/fine.wgsl index 9d62793..9174a21 100644 --- a/piet-wgsl/shader/fine.wgsl +++ b/piet-wgsl/shader/fine.wgsl @@ -14,6 +14,9 @@ // // Also licensed under MIT license, at your choice. +// Fine rasterizer. This can run in simple (just path rendering) and full +// modes, controllable by #define. + // This is a cut'n'paste w/ backdrop. struct Tile { backdrop: i32, @@ -36,17 +39,16 @@ var segments: array; @group(0) @binding(3) var output: array; +#ifdef full +#import ptcl + +@group(0) @binding(4) +var ptcl: array; +#endif + let PIXELS_PER_THREAD = 4u; -@compute @workgroup_size(4, 16) -fn main( - @builtin(global_invocation_id) global_id: vec3, - @builtin(local_invocation_id) local_id: vec3, - @builtin(workgroup_id) wg_id: vec3, -) { - let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x; - let xy = vec2(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y)); - let tile = tiles[tile_ix]; +fn fill_path(tile: Tile, xy: vec2) -> array { var area: array; let backdrop_f = f32(tile.backdrop); for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { @@ -89,6 +91,19 @@ fn main( for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { area[i] = abs(area[i]); } + return area; +} + +@compute @workgroup_size(4, 16) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) wg_id: vec3, +) { + let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x; + let xy = vec2(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y)); + let tile = tiles[tile_ix]; + let area = fill_path(tile, xy); let bytes = pack4x8unorm(vec4(area[0], area[1], area[2], area[3])); let out_ix = global_id.y * (config.width_in_tiles * 4u) + global_id.x; diff --git a/piet-wgsl/shader/shared/bump.wgsl b/piet-wgsl/shader/shared/bump.wgsl index 6d9a225..7f33ba2 100644 --- a/piet-wgsl/shader/shared/bump.wgsl +++ b/piet-wgsl/shader/shared/bump.wgsl @@ -18,4 +18,5 @@ struct BumpAllocators { binning: atomic, ptcl: atomic, + tile: atomic, } diff --git a/piet-wgsl/shader/tile_alloc.wgsl b/piet-wgsl/shader/tile_alloc.wgsl new file mode 100644 index 0000000..867db7e --- /dev/null +++ b/piet-wgsl/shader/tile_alloc.wgsl @@ -0,0 +1,122 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// Tile allocation (and zeroing of tiles) + +#import config +#import bump +#import drawtag + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var scene: array; + +@group(0) @binding(2) +var draw_bboxes: array>; + +@group(0) @binding(3) +var bump: BumpAllocators; + +// TODO: put this in the right place, dedup +struct Path { + // bounding box in pixels + bbox: vec4, + // offset (in u32's) to tile rectangle + tiles: u32, +} + +struct Tile { + backdrop: i32, + segments: u32, +} + +@group(0) @binding(4) +var paths: array; + +@group(0) @binding(5) +var tiles: array; + +let WG_SIZE = 256u; + +var sh_tile_count: array; +var sh_tile_offset: u32; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, +) { + // scale factors useful for converting coordinates to tiles + // TODO: make into constants + let SX = 1.0 / f32(TILE_WIDTH); + let SY = 1.0 / f32(TILE_HEIGHT); + + let drawobj_ix = global_id.x; + var drawtag = DRAWTAG_NOP; + if drawobj_ix < config.n_drawobj { + drawtag = scene[config.drawtag_base + drawobj_ix]; + } + var x0 = 0; + var y0 = 0; + var x1 = 0; + var y1 = 0; + if drawtag != DRAWTAG_NOP && drawtag != DRAWTAG_END_CLIP { + let bbox = draw_bboxes[drawobj_ix]; + x0 = i32(floor(bbox.x * SX)); + y0 = i32(floor(bbox.y * SY)); + x1 = i32(ceil(bbox.z * SX)); + y1 = i32(ceil(bbox.w * SY)); + } + let ux0 = u32(clamp(x0, 0, i32(config.width_in_tiles))); + let uy0 = u32(clamp(y0, 0, i32(config.height_in_tiles))); + let ux1 = u32(clamp(x1, 0, i32(config.width_in_tiles))); + let uy1 = u32(clamp(y1, 0, i32(config.height_in_tiles))); + let tile_count = (ux1 - ux0) * (uy1 - uy0); + var total_tile_count = tile_count; + sh_tile_count[local_id.x] = tile_count; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + workgroupBarrier(); + if local_id.x < (1u << i) { + total_tile_count += sh_tile_count[local_id.x - (1u << i)]; + } + workgroupBarrier(); + sh_tile_count[local_id.x] = total_tile_count; + } + if local_id.x == WG_SIZE - 1u { + sh_tile_offset = atomicAdd(&bump.tile, total_tile_count); + } + workgroupBarrier(); + let tile_offset = sh_tile_offset; + if drawobj_ix < config.n_drawobj { + let tile_subix = select(0u, sh_tile_count[local_id.x - 1u], local_id.x > 0u); + let bbox = vec4(ux0, uy0, ux1, uy1); + let path = Path(bbox, tile_offset + tile_subix); + } + + // zero allocated memory + // Note: if the number of draw objects is small, utilization will be poor. + // There are two things that can be done to improve that. One would be a + // separate (indirect) dispatch. Another would be to have each workgroup + // process fewer draw objects than the number of threads in the wg. + let total_count = sh_tile_count[WG_SIZE - 1u]; + for (var i = local_id.x; i < total_count; i += WG_SIZE) { + // Note: could format output buffer as u32 for even better load + // balancing, as does piet-gpu. + tiles[tile_offset + i] = Tile(0, 0u); + } +} diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index 3eec030..7b79236 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -11,10 +11,13 @@ use crate::{ const TAG_MONOID_SIZE: u64 = 12; #[repr(C)] -#[derive(Clone, Copy, Zeroable, Pod)] +#[derive(Clone, Copy, Default, Zeroable, Pod)] struct Config { width_in_tiles: u32, height_in_tiles: u32, + n_drawobj: u32, + drawtag_base: u32, + drawdata_base: u32, } #[repr(C)] @@ -60,6 +63,7 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) { let config = Config { width_in_tiles: 64, height_in_tiles: 64, + ..Default::default() }; let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned()); // TODO: more principled size calc From 7ac327c68414abf06ed4c8dbfe6a30fb1ca5d05b Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Wed, 2 Nov 2022 18:07:32 -0700 Subject: [PATCH 05/17] Unify scene buffer All streams of the scene are combined into a single buffer. This is very much like existing piet-gpu, however the various outputs from the compute stages (whether computed on CPU or GPU) will retain their separate bindings, which is more native to WGSL. There's a touch of ergonomics loss, in particular when we do transforms we'll need to unmarshal them by hand, but I think overall not too bad. --- .vscode/settings.json | 3 +- piet-wgsl/shader/fine.wgsl | 57 ++++++++++++++++++++++++++++ piet-wgsl/shader/path_coarse.wgsl | 27 ++++++------- piet-wgsl/shader/pathtag_reduce.wgsl | 10 +++-- piet-wgsl/shader/pathtag_scan.wgsl | 10 +++-- piet-wgsl/shader/shared/config.wgsl | 3 ++ piet-wgsl/src/render.rs | 45 +++++++++++++--------- piet-wgsl/src/shaders.rs | 4 +- 8 files changed, 116 insertions(+), 43 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 43d84c0..4d315c9 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -8,5 +8,6 @@ "pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl", "ptcl": "${workspaceFolder}/piet-wgsl/shader/shared/ptcl.wgsl" }, - "wgsl-analyzer.diagnostics.nagaVersion": "main" + "wgsl-analyzer.diagnostics.nagaVersion": "main", + "wgsl-analyzer.preprocessor.shaderDefs": ["full"] } diff --git a/piet-wgsl/shader/fine.wgsl b/piet-wgsl/shader/fine.wgsl index 9174a21..7b66d04 100644 --- a/piet-wgsl/shader/fine.wgsl +++ b/piet-wgsl/shader/fine.wgsl @@ -44,6 +44,17 @@ var output: array; @group(0) @binding(4) var ptcl: array; + +fn read_fill(cmd_ix: u32) -> CmdFill { + let tile = ptcl[cmd_ix + 1u]; + let backdrop = i32(ptcl[cmd_ix + 2u]); + return CmdFill(tile, backdrop); +} + +fn read_color(cmd_ix: u32) -> CmdColor { + let rgba_color = ptcl[cmd_ix + 1u]; + return CmdColor(rgba_color); +} #endif let PIXELS_PER_THREAD = 4u; @@ -103,7 +114,53 @@ fn main( let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x; let xy = vec2(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y)); let tile = tiles[tile_ix]; +#ifdef full + var rgba: array, PIXELS_PER_THREAD>; + var area: array; + var cmd_ix = tile_ix * PTCL_INITIAL_ALLOC; + + // main interpretation loop + while true { + let tag = ptcl[cmd_ix]; + if tag == CMD_END { + break; + } + switch tag { + // CMD_FILL + case 1u: { + let fill = read_fill(cmd_ix); + let tile = Tile(fill.backdrop, fill.tile); + area = fill_path(tile, xy); + cmd_ix += 3u; + } + // CMD_SOLID + case 3u: { + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + area[i] = 1.0; + } + cmd_ix += 1u; + } + // CMD_COLOR + case 5u: { + let color = read_color(cmd_ix); + let fg = unpack4x8unorm(color.rgba_color); + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + let fg_i = fg * area[i]; + rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i; + } + cmd_ix += 1u; + } + // CMD_JUMP + case 11u: { + cmd_ix = ptcl[cmd_ix + 1u]; + } + default: {} + } + } + +#else let area = fill_path(tile, xy); +#endif let bytes = pack4x8unorm(vec4(area[0], area[1], area[2], area[3])); let out_ix = global_id.y * (config.width_in_tiles * 4u) + global_id.x; diff --git a/piet-wgsl/shader/path_coarse.wgsl b/piet-wgsl/shader/path_coarse.wgsl index 0a5139d..534963f 100644 --- a/piet-wgsl/shader/path_coarse.wgsl +++ b/piet-wgsl/shader/path_coarse.wgsl @@ -14,24 +14,22 @@ // // Also licensed under MIT license, at your choice. +#import config #import pathtag @group(0) @binding(0) -var path_tags: array; +var config: Config; @group(0) @binding(1) -var tag_monoids: array; +var scene: array; -// TODO: should probably have single "scene" binding. @group(0) @binding(2) -var path_data: array; +var tag_monoids: array; #ifdef cubics_out @group(0) @binding(3) var output: array>; #else -#import config - struct Tile { backdrop: atomic, segments: atomic, @@ -39,25 +37,23 @@ struct Tile { #import segment -// Should probably be uniform binding @group(0) @binding(3) -var config: Config; - -@group(0) @binding(4) var tiles: array; -@group(0) @binding(5) +@group(0) @binding(4) var segments: array; #endif +var pathdata_base: u32; + fn read_f32_point(ix: u32) -> vec2 { - let x = bitcast(path_data[ix]); - let y = bitcast(path_data[ix + 1u]); + let x = bitcast(scene[pathdata_base + ix]); + let y = bitcast(scene[pathdata_base + ix + 1u]); return vec2(x, y); } fn read_i16_point(ix: u32) -> vec2 { - let raw = path_data[ix]; + let raw = scene[pathdata_base + ix]; let x = f32(i32(raw << 16u) >> 16u); let y = f32(i32(raw) >> 16u); return vec2(x, y); @@ -133,7 +129,8 @@ fn main( ) { // Obtain exclusive prefix sum of tag monoid let ix = global_id.x; - let tag_word = path_tags[ix >> 2u]; + let tag_word = scene[config.pathtag_base + (ix >> 2u)]; + pathdata_base = config.pathdata_base; let shift = (ix & 3u) * 8u; var tm = reduce_tag(tag_word & ((1u << shift) - 1u)); tm = combine_tag_monoid(tag_monoids[ix >> 2u], tm); diff --git a/piet-wgsl/shader/pathtag_reduce.wgsl b/piet-wgsl/shader/pathtag_reduce.wgsl index 92bf20d..bbf8528 100644 --- a/piet-wgsl/shader/pathtag_reduce.wgsl +++ b/piet-wgsl/shader/pathtag_reduce.wgsl @@ -14,14 +14,16 @@ // // Also licensed under MIT license, at your choice. +#import config #import pathtag -// Note: should have a single scene binding, path_tags are a slice -// in that; need a config uniform. @group(0) @binding(0) -var path_tags: array; +var config: Config; @group(0) @binding(1) +var scene: array; + +@group(0) @binding(2) var reduced: array; let LG_WG_SIZE = 8u; @@ -35,7 +37,7 @@ fn main( @builtin(local_invocation_id) local_id: vec3, ) { let ix = global_id.x; - let tag_word = path_tags[ix]; + let tag_word = scene[config.pathtag_base + ix]; var agg = reduce_tag(tag_word); sh_scratch[local_id.x] = agg; for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { diff --git a/piet-wgsl/shader/pathtag_scan.wgsl b/piet-wgsl/shader/pathtag_scan.wgsl index 058ce04..d18d872 100644 --- a/piet-wgsl/shader/pathtag_scan.wgsl +++ b/piet-wgsl/shader/pathtag_scan.wgsl @@ -14,15 +14,19 @@ // // Also licensed under MIT license, at your choice. +#import config #import pathtag @group(0) @binding(0) -var path_tags: array; +var config: Config; @group(0) @binding(1) -var reduced: array; +var scene: array; @group(0) @binding(2) +var reduced: array; + +@group(0) @binding(3) var tag_monoids: array; let LG_WG_SIZE = 8u; @@ -54,7 +58,7 @@ fn main( } let ix = global_id.x; - let tag_word = path_tags[ix]; + let tag_word = scene[config.pathtag_base + ix]; agg = reduce_tag(tag_word); sh_monoid[local_id.x] = agg; for (var i = 0u; i < LG_WG_SIZE; i += 1u) { diff --git a/piet-wgsl/shader/shared/config.wgsl b/piet-wgsl/shader/shared/config.wgsl index b31f915..9058d5c 100644 --- a/piet-wgsl/shader/shared/config.wgsl +++ b/piet-wgsl/shader/shared/config.wgsl @@ -22,6 +22,9 @@ struct Config { // offsets within scene buffer (in u32 units) // Note: this is a difference from piet-gpu, which is in bytes + pathtag_base: u32, + pathdata_base: u32, + drawtag_base: u32, drawdata_base: u32, } diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index 7b79236..c3c30f1 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -16,6 +16,8 @@ struct Config { width_in_tiles: u32, height_in_tiles: u32, n_drawobj: u32, + pathtag_base: u32, + pathdata_base: u32, drawtag_base: u32, drawdata_base: u32, } @@ -29,22 +31,39 @@ pub struct PathSegment { next: u32, } +fn size_to_words(byte_size: usize) -> u32 { + (byte_size / std::mem::size_of::()) as u32 +} + pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) { let mut recording = Recording::default(); let data = scene.data(); let n_pathtag = data.tag_stream.len(); let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG); let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize); - let mut tag_data: Vec = Vec::with_capacity(pathtag_padded); - tag_data.extend(&data.tag_stream); - tag_data.resize(pathtag_padded, 0); - let pathtag_buf = recording.upload(tag_data); + let mut scene: Vec = Vec::with_capacity(pathtag_padded); + let pathtag_base = size_to_words(scene.len()); + scene.extend(&data.tag_stream); + scene.resize(pathtag_padded, 0); + let pathdata_base = size_to_words(scene.len()); + scene.extend(&data.pathseg_stream); + + let config = Config { + width_in_tiles: 64, + height_in_tiles: 64, + pathtag_base, + pathdata_base, + ..Default::default() + }; + let scene_buf = recording.upload(scene); + let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned()); + let reduced_buf = BufProxy::new(pathtag_wgs as u64 * TAG_MONOID_SIZE); // TODO: really only need pathtag_wgs - 1 recording.dispatch( shaders.pathtag_reduce, (pathtag_wgs as u32, 1, 1), - [pathtag_buf, reduced_buf], + [config_buf, scene_buf, reduced_buf], ); let tagmonoid_buf = @@ -52,20 +71,11 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) { recording.dispatch( shaders.pathtag_scan, (pathtag_wgs as u32, 1, 1), - [pathtag_buf, reduced_buf, tagmonoid_buf], + [config_buf, scene_buf, reduced_buf, tagmonoid_buf], ); let path_coarse_wgs = (data.n_pathseg + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; - // The clone here is kinda BS, think about reducing copies - // Of course, we'll probably end up concatenating into a single scene binding. - let pathdata_buf = recording.upload(data.pathseg_stream.clone()); //let cubics_buf = BufProxy::new(data.n_pathseg as u64 * 32); - let config = Config { - width_in_tiles: 64, - height_in_tiles: 64, - ..Default::default() - }; - let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned()); // TODO: more principled size calc let tiles_buf = BufProxy::new(4097 * 8); let segments_buf = BufProxy::new(256 * 24); @@ -74,10 +84,9 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) { shaders.path_coarse, (path_coarse_wgs, 1, 1), [ - pathtag_buf, - tagmonoid_buf, - pathdata_buf, config_buf, + scene_buf, + tagmonoid_buf, tiles_buf, segments_buf, ], diff --git a/piet-wgsl/src/shaders.rs b/piet-wgsl/src/shaders.rs index 64e022d..d1e8847 100644 --- a/piet-wgsl/src/shaders.rs +++ b/piet-wgsl/src/shaders.rs @@ -44,12 +44,13 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result Result Date: Thu, 3 Nov 2022 16:53:34 -0700 Subject: [PATCH 06/17] Shaders loaded This checkpoint loads the shaders for full rendering, but there's a bunch of stuff still needing to be done. --- .vscode/settings.json | 3 +- piet-wgsl/shader/bbox_clear.wgsl | 45 ++++ piet-wgsl/shader/binning.wgsl | 36 ++-- piet-wgsl/shader/coarse.wgsl | 35 ++-- piet-wgsl/shader/draw_leaf.wgsl | 6 +- piet-wgsl/shader/draw_reduce.wgsl | 6 +- piet-wgsl/shader/path_coarse.wgsl | 5 +- piet-wgsl/shader/path_coarse_full.wgsl | 276 +++++++++++++++++++++++++ piet-wgsl/shader/pathseg.wgsl | 215 +++++++++++++++++++ piet-wgsl/shader/shared/bbox.wgsl | 10 +- piet-wgsl/shader/shared/config.wgsl | 1 + piet-wgsl/shader/shared/pathtag.wgsl | 18 +- piet-wgsl/shader/shared/tile.wgsl | 29 +++ piet-wgsl/shader/tile_alloc.wgsl | 14 +- piet-wgsl/src/main.rs | 7 +- piet-wgsl/src/render.rs | 1 + piet-wgsl/src/shaders.rs | 166 +++++++++++++++ 17 files changed, 805 insertions(+), 68 deletions(-) create mode 100644 piet-wgsl/shader/bbox_clear.wgsl create mode 100644 piet-wgsl/shader/path_coarse_full.wgsl create mode 100644 piet-wgsl/shader/pathseg.wgsl create mode 100644 piet-wgsl/shader/shared/tile.wgsl diff --git a/.vscode/settings.json b/.vscode/settings.json index 4d315c9..bdb0028 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -6,7 +6,8 @@ "drawtag": "${workspaceFolder}/piet-wgsl/shader/shared/drawtag.wgsl", "segment": "${workspaceFolder}/piet-wgsl/shader/shared/segment.wgsl", "pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl", - "ptcl": "${workspaceFolder}/piet-wgsl/shader/shared/ptcl.wgsl" + "ptcl": "${workspaceFolder}/piet-wgsl/shader/shared/ptcl.wgsl", + "tile": "${workspaceFolder}/piet-wgsl/shader/shared/tile.wgsl" }, "wgsl-analyzer.diagnostics.nagaVersion": "main", "wgsl-analyzer.preprocessor.shaderDefs": ["full"] diff --git a/piet-wgsl/shader/bbox_clear.wgsl b/piet-wgsl/shader/bbox_clear.wgsl new file mode 100644 index 0000000..69a8b63 --- /dev/null +++ b/piet-wgsl/shader/bbox_clear.wgsl @@ -0,0 +1,45 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +#import config + +@group(0) @binding(0) +var config: Config; + +struct PathBbox { + x0: i32, + y0: i32, + x1: i32, + y1: i32, + linewidth: f32, + trans_ix: u32, +} + +@group(0) @binding(1) +var path_bboxes: array; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, +) { + let ix = global_id.x; + if ix < config.n_path { + path_bboxes[ix].x0 = 0x7fffffff; + path_bboxes[ix].y0 = 0x7fffffff; + path_bboxes[ix].x1 = -0x80000000; + path_bboxes[ix].y1 = -0x80000000; + } +} diff --git a/piet-wgsl/shader/binning.wgsl b/piet-wgsl/shader/binning.wgsl index d16a620..3d5a053 100644 --- a/piet-wgsl/shader/binning.wgsl +++ b/piet-wgsl/shader/binning.wgsl @@ -28,7 +28,7 @@ var config: Config; var draw_monoids: array; @group(0) @binding(2) -var path_bbox_buf: array; +var path_bbox_buf: array; @group(0) @binding(3) var clip_bbox_buf: array>; @@ -39,7 +39,7 @@ var intersected_bbox: array>; // TODO: put into shared include @group(0) @binding(5) -var bump: BumpAllocators; +var bump: BumpAllocators; @group(0) @binding(6) var bin_data: array; @@ -54,11 +54,14 @@ struct BinHeader { var bin_header: array; // conversion factors from coordinates to bin -let SX = 1.0 / f32(N_TILE_X * TILE_WIDTH); -let SY = 1.0 / f32(N_TILE_Y * TILE_HEIGHT); +let SX = 0.00390625; +let SY = 0.00390625; +//let SX = 1.0 / f32(N_TILE_X * TILE_WIDTH); +//let SY = 1.0 / f32(N_TILE_Y * TILE_HEIGHT); let WG_SIZE = 256u; -let N_SLICE = WG_SIZE / 32u; +let N_SLICE = 8u; +//let N_SLICE = WG_SIZE / 32u; var sh_bitmaps: array, N_TILE>, N_SLICE>; var sh_count: array, N_SLICE>; @@ -82,7 +85,7 @@ fn main( var y1 = 0; if element_ix < config.n_drawobj { let draw_monoid = draw_monoids[element_ix]; - var clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9); + var clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9); if draw_monoid.clip_ix > 0u { clip_bbox = clip_bbox_buf[draw_monoid.clip_ix - 1u]; } @@ -92,10 +95,11 @@ fn main( // TODO check this is true let path_bbox = path_bbox_buf[draw_monoid.path_ix]; - let pb = vec4(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1); - let bbox = bbox_intersect(clip_bbox, pb); + let pb = vec4(vec4(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1)); + let bbox_raw = bbox_intersect(clip_bbox, pb); + // TODO(naga): clunky expression a workaround for broken lhs swizzle + let bbox = vec4(bbox_raw.xy, max(bbox_raw.xy, bbox_raw.zw)); - bbox.zw = max(bbox.xy, bbox.zw); intersected_bbox[element_ix] = bbox; x0 = i32(floor(bbox.x * SX)); y0 = i32(floor(bbox.y * SY)); @@ -117,7 +121,7 @@ fn main( let my_mask = 1u << (local_id.x & 31u); while y < y1 { atomicOr(&sh_bitmaps[my_slice][y * width_in_bins + x], my_mask); - x += 1u; + x += 1; if x == x1 { x = x0; y += 1; @@ -128,8 +132,8 @@ fn main( // Allocate output segments var element_count = 0u; for (var i = 0u; i < N_SLICE; i += 1u) { - elementCount += countOneBits(atomicLoad(&sh_bitmaps[i][local_id.x])); - sh_count[i][id_ix] = element_count; + element_count += countOneBits(atomicLoad(&sh_bitmaps[i][local_id.x])); + sh_count[i][local_id.x] = element_count; } // element_count is the number of draw objects covering this thread's bin let chunk_offset = atomicAdd(&bump.binning, element_count); @@ -145,18 +149,18 @@ fn main( let bin_ix = y * width_in_bins + x; let out_mask = atomicLoad(&sh_bitmaps[my_slice][bin_ix]); // I think this predicate will always be true... - if (out_mask & my_mask) != 0 { + if (out_mask & my_mask) != 0u { var idx = countOneBits(out_mask & (my_mask - 1u)); - if my_slice > 0 { + if my_slice > 0u { idx += sh_count[my_slice - 1u][bin_ix]; } let offset = sh_chunk_offset[bin_ix]; bin_data[offset + idx] = element_ix; } - x += 1u; + x += 1; if x == x1 { x = x0; - y += 1u; + y += 1; } } } diff --git a/piet-wgsl/shader/coarse.wgsl b/piet-wgsl/shader/coarse.wgsl index 4ccb0c2..8c59ba6 100644 --- a/piet-wgsl/shader/coarse.wgsl +++ b/piet-wgsl/shader/coarse.wgsl @@ -20,6 +20,7 @@ #import bump #import drawtag #import ptcl +#import tile @group(0) @binding(0) var config: Config; @@ -40,33 +41,21 @@ struct BinHeader { var bin_headers: array; @group(0) @binding(4) -var bin_data: array; - -@group(0) @binding(5) -var bump: BumpAllocators; - -@group(0) @binding(6) -var ptcl: array; - -// TODO: put this in the right place -struct Path { - // bounding box in pixels - bbox: vec4, - // offset (in u32's) to tile rectangle - tiles: u32, -} - -struct Tile { - backdrop: i32, - segments: u32, -} - -@group(0) @binding(7) var paths: array; -@group(0) @binding(8) +@group(0) @binding(5) var tiles: array; +@group(0) @binding(6) +var bin_data: array; + +@group(0) @binding(7) +var bump: BumpAllocators; + +@group(0) @binding(8) +var ptcl: array; + + // Much of this code assumes WG_SIZE == N_TILE. If these diverge, then // a fair amount of fixup is needed. diff --git a/piet-wgsl/shader/draw_leaf.wgsl b/piet-wgsl/shader/draw_leaf.wgsl index b962bf2..6782542 100644 --- a/piet-wgsl/shader/draw_leaf.wgsl +++ b/piet-wgsl/shader/draw_leaf.wgsl @@ -30,10 +30,10 @@ var scene: array; var reduced: array; @group(0) @binding(3) -var draw_monoid: array; +var path_bbox: array; @group(0) @binding(4) -var path_bbox: array; +var draw_monoid: array; @group(0) @binding(5) var info: array; @@ -64,6 +64,8 @@ fn main( workgroupBarrier(); var m = draw_monoid_identity(); if wg_id.x > 0u { + // TODO: separate dispatch to scan these, or integrate into this one? + // In the meantime, will be limited to 2 * WG draw objs. m = reduced[wg_id.x - 1u]; } if local_id.x > 0u { diff --git a/piet-wgsl/shader/draw_reduce.wgsl b/piet-wgsl/shader/draw_reduce.wgsl index 9efae7c..8ff1a44 100644 --- a/piet-wgsl/shader/draw_reduce.wgsl +++ b/piet-wgsl/shader/draw_reduce.wgsl @@ -26,7 +26,7 @@ var scene: array; @group(0) @binding(2) var reduced: array; -let WG_SIZE = 256; +let WG_SIZE = 256u; var sh_scratch: array; @@ -37,7 +37,7 @@ fn main( ) { let ix = global_id.x; let tag_word = scene[config.drawtag_base + ix]; - let agg = map_draw_tag(tag_word); + var agg = map_draw_tag(tag_word); sh_scratch[local_id.x] = agg; for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { workgroupBarrier(); @@ -49,6 +49,6 @@ fn main( sh_scratch[local_id.x] = agg; } if local_id.x == 0u { - reduced[ix >> LG_WG_SIZE] = agg; + reduced[ix >> firstTrailingBit(WG_SIZE)] = agg; } } diff --git a/piet-wgsl/shader/path_coarse.wgsl b/piet-wgsl/shader/path_coarse.wgsl index 534963f..ffdd8cf 100644 --- a/piet-wgsl/shader/path_coarse.wgsl +++ b/piet-wgsl/shader/path_coarse.wgsl @@ -30,7 +30,8 @@ var tag_monoids: array; @group(0) @binding(3) var output: array>; #else -struct Tile { +// We don't get this from import as it's the atomic version +struct AtomicTile { backdrop: atomic, segments: atomic, } @@ -38,7 +39,7 @@ struct Tile { #import segment @group(0) @binding(3) -var tiles: array; +var tiles: array; @group(0) @binding(4) var segments: array; diff --git a/piet-wgsl/shader/path_coarse_full.wgsl b/piet-wgsl/shader/path_coarse_full.wgsl new file mode 100644 index 0000000..5641642 --- /dev/null +++ b/piet-wgsl/shader/path_coarse_full.wgsl @@ -0,0 +1,276 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// Path coarse rasterization for the full implementation. + +#import config +#import pathtag +#import tile +#import segment + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var scene: array; + +// Maybe dedup? +struct Cubic { + p0: vec2, + p1: vec2, + p2: vec2, + p3: vec2, +} + +@group(0) @binding(2) +var tag_monoids: array; + +@group(0) @binding(3) +var cubics: array; + +@group(0) @binding(4) +var paths: array; + +// We don't get this from import as it's the atomic version +struct AtomicTile { + backdrop: atomic, + segments: atomic, +} + +@group(0) @binding(5) +var tiles: array; + +@group(0) @binding(6) +var segments: array; + +struct SubdivResult { + val: f32, + a0: f32, + a2: f32, +} + +let D = 0.67; +fn approx_parabola_integral(x: f32) -> f32 { + return x * inverseSqrt(sqrt(1.0 - D + (D * D * D * D + 0.25 * x * x))); +} + +let B = 0.39; +fn approx_parabola_inv_integral(x: f32) -> f32 { + return x * sqrt(1.0 - B + (B * B + 0.5 * x * x)); +} + +fn estimate_subdiv(p0: vec2, p1: vec2, p2: vec2, sqrt_tol: f32) -> SubdivResult { + let d01 = p1 - p0; + let d12 = p2 - p1; + let dd = d01 - d12; + let cross = (p2.x - p0.x) * dd.y - (p2.y - p0.y) * dd.x; + let cross_inv = 1.0 / cross; + let x0 = dot(d01, dd) * cross_inv; + let x2 = dot(d12, dd) * cross_inv; + let scale = abs(cross / (length(dd) * (x2 - x0))); + + let a0 = approx_parabola_integral(x0); + let a2 = approx_parabola_integral(x2); + var val = 0.0; + if scale < 1e9 { + let da = abs(a2 - a0); + let sqrt_scale = sqrt(scale); + if sign(x0) == sign(x2) { + val = sqrt_scale; + } else { + let xmin = sqrt_tol / sqrt_scale; + val = sqrt_tol / approx_parabola_integral(xmin); + } + val *= da; + } + return SubdivResult(val, a0, a2); +} + +fn eval_quad(p0: vec2, p1: vec2, p2: vec2, t: f32) -> vec2 { + let mt = 1.0 - t; + return p0 * (mt * mt) + (p1 * (mt * 2.0) + p2 * t) * t; +} + +fn eval_cubic(p0: vec2, p1: vec2, p2: vec2, p3: vec2, t: f32) -> vec2 { + let mt = 1.0 - t; + return p0 * (mt * mt * mt) + (p1 * (mt * mt * 3.0) + (p2 * (mt * 3.0) + p3 * t) * t) * t; +} + +fn alloc_segment() -> u32 { + // Use 0-index segment (address is sentinel) as counter + // TODO: separate small buffer binding for this? + return atomicAdd(&tiles[4096].segments, 1u) + 1u; +} + +let MAX_QUADS = 16u; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, +) { + let ix = global_id.x; + let tag_word = scene[config.pathtag_base + (ix >> 2u)]; + let shift = (ix & 3u) * 8u; + var tag_byte = (tag_word >> shift) & 0xffu; + + // Reconstruct path_ix from monoid or store in cubic? + if (tag_byte & PATH_TAG_SEG_TYPE) != 0u { + let path_ix = 42u; // BIG GIANT TODO + let path = paths[path_ix]; + let bbox = vec4(path.bbox); + let cubic = cubics[global_id.x]; + let p0 = cubic.p0; + let p1 = cubic.p1; + let p2 = cubic.p2; + let p3 = cubic.p3; + let err_v = 3.0 * (p2 - p1) + p0 - p3; + let err = dot(err_v, err_v); + let ACCURACY = 0.25; + let Q_ACCURACY = ACCURACY * 0.1; + let REM_ACCURACY = (ACCURACY - Q_ACCURACY); + let MAX_HYPOT2 = 432.0 * Q_ACCURACY * Q_ACCURACY; + var n_quads = max(u32(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0))), 1u); + n_quads = min(n_quads, MAX_QUADS); + var keep_params: array; + var val = 0.0; + var qp0 = p0; + let step = 1.0 / f32(n_quads); + for (var i = 0u; i < n_quads; i += 1u) { + let t = f32(i + 1u) * step; + let qp2 = eval_cubic(p0, p1, p2, p3, t); + var qp1 = eval_cubic(p0, p1, p2, p3, t - 0.5 * step); + qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2); + let params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY)); + keep_params[i] = params; + val += params.val; + qp0 = qp2; + } + let n = max(u32(ceil(val * (0.5 / sqrt(REM_ACCURACY)))), 1u); + var lp0 = p0; + qp0 = p0; + let v_step = val / f32(n); + var n_out = 1u; + var val_sum = 0.0; + for (var i = 0u; i < n_quads; i += 1u) { + let t = f32(i + 1u) * step; + let qp2 = eval_cubic(p0, p1, p2, p3, t); + var qp1 = eval_cubic(p0, p1, p2, p3, t - 0.5 * step); + qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2); + let params = keep_params[i]; + let u0 = approx_parabola_inv_integral(params.a0); + let u2 = approx_parabola_inv_integral(params.a2); + let uscale = 1.0 / (u2 - u0); + var val_target = f32(n_out) * v_step; + while n_out == n || val_target < val_sum + params.val { + var lp1: vec2; + if n_out == n { + lp1 = p3; + } else { + let u = (val_target - val_sum) / params.val; + let a = mix(params.a0, params.a2, u); + let au = approx_parabola_inv_integral(a); + let t = (au - u0) * uscale; + lp1 = eval_quad(qp0, qp1, qp2, t); + } + + // Output line segment lp0..lp1 + let xymin = min(lp0, lp1); + let xymax = max(lp0, lp1); + let dp = lp1 - lp0; + let recip_dx = 1.0 / dp.x; + let invslope = select(dp.x / dp.y, 1.0e9, abs(dp.y) < 1.0e-9); + let c = 0.5 * abs(invslope); + let b = invslope; + let SX = 1.0 / f32(TILE_WIDTH); + let SY = 1.0 / f32(TILE_HEIGHT); + let a = (lp0.x - (lp0.y - 0.5 * f32(TILE_HEIGHT)) * b) * SX; + var x0 = i32(floor(xymin.x * SX)); + var x1 = i32(floor(xymax.x * SX) + 1.0); + var y0 = i32(floor(xymin.y * SY)); + var y1 = i32(floor(xymax.y * SY) + 1.0); + x0 = clamp(x0, 0, i32(config.width_in_tiles)); + x1 = clamp(x1, 0, i32(config.width_in_tiles)); + y0 = clamp(y0, 0, i32(config.height_in_tiles)); + y1 = clamp(y1, 0, i32(config.height_in_tiles)); + var xc = a + b * f32(y0); + var xray = i32(floor(lp0.x * SX)); + var last_xray = i32(floor(lp1.x * SX)); + if dp.y < 0.0 { + let tmp = xray; + xray = last_xray; + last_xray = tmp; + } + for (var y = y0; y < y1; y += 1) { + let tile_y0 = f32(y) * f32(TILE_HEIGHT); + let xbackdrop = max(xray + 1, 0); + if xymin.y < tile_y0 && xbackdrop < i32(config.width_in_tiles) { + let backdrop = select(-1, 1, dp.y < 0.0); + let tile_ix = y * i32(config.width_in_tiles) + xbackdrop; + atomicAdd(&tiles[tile_ix].backdrop, backdrop); + } + var next_xray = last_xray; + if y + 1 < y1 { + let tile_y1 = f32(y + 1) * f32(TILE_HEIGHT); + let x_edge = lp0.x + (tile_y1 - lp0.y) * invslope; + next_xray = i32(floor(x_edge * SX)); + } + let min_xray = min(xray, next_xray); + let max_xray = max(xray, next_xray); + var xx0 = min(i32(floor(xc - c)), min_xray); + var xx1 = max(i32(ceil(xc + c)), max_xray + 1); + xx0 = clamp(xx0, x0, x1); + xx1 = clamp(xx1, x0, x1); + var tile_seg: Segment; + for (var x = xx0; x < xx1; x += 1) { + let tile_x0 = f32(x) * f32(TILE_WIDTH); + let tile_ix = y * i32(config.width_in_tiles) + x; + // allocate segment, insert linked list + let seg_ix = alloc_segment(); + let old = atomicExchange(&tiles[tile_ix].segments, seg_ix); + tile_seg.origin = lp0; + tile_seg.delta = dp; + var y_edge = mix(lp0.y, lp1.y, (tile_x0 - lp0.x) * recip_dx); + if xymin.x < tile_x0 { + let p = vec2(tile_x0, y_edge); + if dp.x < 0.0 { + tile_seg.delta = p - lp0; + } else { + tile_seg.origin = p; + tile_seg.delta = lp1 - p; + } + if tile_seg.delta.x == 0.0 { + tile_seg.delta.x = sign(dp.x) * 1e-9; + } + } + if x <= min_xray || max_xray < x { + y_edge = 1e9; + } + tile_seg.y_edge = y_edge; + tile_seg.next = old; + segments[seg_ix] = tile_seg; + } + xc += b; + xray = next_xray; + } + n_out += 1u; + val_target += v_step; + lp0 = lp1; + } + val_sum += params.val; + qp0 = qp2; + } + } +} diff --git a/piet-wgsl/shader/pathseg.wgsl b/piet-wgsl/shader/pathseg.wgsl new file mode 100644 index 0000000..03c5aa6 --- /dev/null +++ b/piet-wgsl/shader/pathseg.wgsl @@ -0,0 +1,215 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// Path segment decoding for the full case. + +// In the simple case, path segments are decoded as part of the coarse +// path rendering stage. In the full case, they are separated, as the +// decoding process also generates bounding boxes, and those in turn are +// used for tile allocation and clipping; actual coarse path rasterization +// can't proceed until those are complete. + +// There's some duplication of the decoding code but we won't worry about +// that just now. Perhaps it could be factored more nicely later. + +#import config +#import pathtag + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var scene: array; + +@group(0) @binding(2) +var tag_monoids: array; + +struct AtomicPathBbox { + x0: atomic, + y0: atomic, + x1: atomic, + y1: atomic, + linewidth: f32, + trans_ix: u32, +} + +@group(0) @binding(3) +var path_bboxes: array; + +struct Cubic { + p0: vec2, + p1: vec2, + p2: vec2, + p3: vec2, +} + +@group(0) @binding(4) +var cubics: array; + +// Monoid is yagni, for future optimization + +// struct BboxMonoid { +// bbox: vec4, +// flags: u32, +// } + +// let FLAG_RESET_BBOX = 1u; +// let FLAG_SET_BBOX = 2u; + +// fn combine_bbox_monoid(a: BboxMonoid, b: BboxMonoid) -> BboxMonoid { +// var c: BboxMonoid; +// c.bbox = b.bbox; +// // TODO: previous-me thought this should be gated on b & SET_BBOX == false also +// if (a.flags & FLAG_RESET_BBOX) == 0u && b.bbox.z <= b.bbox.x && b.bbox.w <= b.bbox.y { +// c.bbox = a.bbox; +// } else if (a.flags & FLAG_RESET_BBOX) == 0u && (b.flags & FLAG_SET_BBOX) == 0u || +// (a.bbox.z > a.bbox.x || a.bbox.w > a.bbox.y) +// { +// c.bbox = vec4(min(a.bbox.xy, c.bbox.xy), max(a.bbox.xw, c.bbox.zw)); +// } +// c.flags = (a.flags & FLAG_SET_BBOX) | b.flags; +// c.flags |= (a.flags & FLAG_RESET_BBOX) << 1u; +// return c; +// } + +// fn bbox_monoid_identity() -> BboxMonoid { +// return BboxMonoid(); +// } + +var pathdata_base: u32; +var transform_base: u32; + +fn read_f32_point(ix: u32) -> vec2 { + let x = bitcast(scene[pathdata_base + ix]); + let y = bitcast(scene[pathdata_base + ix + 1u]); + return vec2(x, y); +} + +fn read_i16_point(ix: u32) -> vec2 { + let raw = scene[pathdata_base + ix]; + let x = f32(i32(raw << 16u) >> 16u); + let y = f32(i32(raw) >> 16u); + return vec2(x, y); +} + +struct Transform { + matrx: vec4, + translate: vec2, +} + +fn read_transform(ix: u32) -> Transform { + let base = transform_base + ix * 6u; + let c0 = bitcast(scene[base]); + let c1 = bitcast(scene[base] + 1u); + let c2 = bitcast(scene[base] + 2u); + let c3 = bitcast(scene[base] + 3u); + let c4 = bitcast(scene[base] + 4u); + let c5 = bitcast(scene[base] + 5u); + let matrx = vec4(c0, c1, c2, c3); + let translate = vec2(c4, c5); + return Transform(matrx, translate); +} + +fn transform_apply(transform: Transform, p: vec2) -> vec2 { + return transform.matrx.xy * p.x + transform.matrx.zw * p.y + transform.translate; +} + +fn round_down(x: f32) -> i32 { + return i32(floor(x)); +} + +fn round_up(x: f32) -> i32 { + return i32(ceil(x)); +} + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, +) { + let ix = global_id.x; + let tag_word = scene[config.pathtag_base + (ix >> 2u)]; + // TODO: set transform_base + pathdata_base = config.pathdata_base; + let shift = (ix & 3u) * 8u; + var tm = reduce_tag(tag_word & ((1u << shift) - 1u)); + tm = combine_tag_monoid(tag_monoids[ix >> 2u], tm); + var tag_byte = (tag_word >> shift) & 0xffu; + + let out = &path_bboxes[tm.path_ix]; + if (tag_byte & PATH_TAG_PATH) != 0u { + (*out).linewidth = -1.0; // TODO: plumb linewidth + (*out).trans_ix = tm.trans_ix; + } + // Decode path data + let seg_type = tag_byte & PATH_TAG_SEG_TYPE; + if seg_type != 0u { + var p0: vec2; + var p1: vec2; + var p2: vec2; + var p3: vec2; + if (tag_byte & PATH_TAG_F32) != 0u { + p0 = read_f32_point(tm.pathseg_offset); + p1 = read_f32_point(tm.pathseg_offset + 2u); + if seg_type >= PATH_TAG_QUADTO { + p2 = read_f32_point(tm.pathseg_offset + 4u); + if seg_type == PATH_TAG_CUBICTO { + p3 = read_f32_point(tm.pathseg_offset + 6u); + } + } + } else { + p0 = read_i16_point(tm.pathseg_offset); + p1 = read_i16_point(tm.pathseg_offset + 1u); + if seg_type >= PATH_TAG_QUADTO { + p2 = read_i16_point(tm.pathseg_offset + 2u); + if seg_type == PATH_TAG_CUBICTO { + p3 = read_i16_point(tm.pathseg_offset + 3u); + } + } + } + let transform = read_transform(tm.trans_ix); + p0 = transform_apply(transform, p0); + p1 = transform_apply(transform, p1); + var bbox = vec4(min(p0, p1), max(p0, p1)); + // Degree-raise + if seg_type == PATH_TAG_LINETO { + p3 = p1; + p2 = mix(p3, p0, 1.0 / 3.0); + p1 = mix(p0, p3, 1.0 / 3.0); + } else if seg_type >= PATH_TAG_QUADTO { + p2 = transform_apply(transform, p2); + bbox = vec4(min(bbox.xy, p2), max(bbox.zw, p2)); + if seg_type == PATH_TAG_CUBICTO { + p3 = transform_apply(transform, p3); + bbox = vec4(min(bbox.xy, p3), max(bbox.zw, p3)); + } else { + p3 = p2; + p2 = mix(p1, p2, 1.0 / 3.0); + p1 = mix(p1, p0, 1.0 / 3.0); + } + } + cubics[global_id.x] = Cubic(p0, p1, p2, p3); + // Update bounding box using atomics only. Computing a monoid is a + // potential future optimization. + if bbox.z > bbox.x && bbox.w > bbox.y { + atomicMin(&(*out).x0, round_down(bbox.x)); + atomicMin(&(*out).y0, round_down(bbox.y)); + atomicMax(&(*out).x1, round_up(bbox.z)); + atomicMax(&(*out).y1, round_up(bbox.w)); + } + } + +} diff --git a/piet-wgsl/shader/shared/bbox.wgsl b/piet-wgsl/shader/shared/bbox.wgsl index fb7728b..c260df9 100644 --- a/piet-wgsl/shader/shared/bbox.wgsl +++ b/piet-wgsl/shader/shared/bbox.wgsl @@ -16,11 +16,13 @@ // The annotated bounding box for a path. It has been transformed, // but contains a link to the active transform, mostly for gradients. +// Coordinates are integer pixels (for the convenience of atomic update) +// but will probably become fixed-point fractions for rectangles. struct PathBbox { - x0: u32, - y0: u32, - x1: u32, - y1: u32, + x0: i32, + y0: i32, + x1: i32, + y1: i32, linewidth: f32, trans_ix: u32, } diff --git a/piet-wgsl/shader/shared/config.wgsl b/piet-wgsl/shader/shared/config.wgsl index 9058d5c..bf0839d 100644 --- a/piet-wgsl/shader/shared/config.wgsl +++ b/piet-wgsl/shader/shared/config.wgsl @@ -19,6 +19,7 @@ struct Config { height_in_tiles: u32, n_drawobj: u32, + n_path: u32, // offsets within scene buffer (in u32 units) // Note: this is a difference from piet-gpu, which is in bytes diff --git a/piet-wgsl/shader/shared/pathtag.wgsl b/piet-wgsl/shader/shared/pathtag.wgsl index 2424d2a..b248e18 100644 --- a/piet-wgsl/shader/shared/pathtag.wgsl +++ b/piet-wgsl/shader/shared/pathtag.wgsl @@ -18,7 +18,10 @@ struct TagMonoid { trans_ix: u32, pathseg_ix: u32, pathseg_offset: u32, - // Note: piet-gpu has linewidth and path, but not needed here +#ifdef full + linewidth_ix: u32, + path_ix: u32, +#endif } let PATH_TAG_SEG_TYPE = 3u; @@ -26,8 +29,11 @@ let PATH_TAG_LINETO = 1u; let PATH_TAG_QUADTO = 2u; let PATH_TAG_CUBICTO = 3u; let PATH_TAG_F32 = 8u; -let PATH_TAG_PATH = 0x10u; let PATH_TAG_TRANSFORM = 0x20u; +#ifdef full +let PATH_TAG_PATH = 0x10u; +let PATH_TAG_LINEWIDTH = 0x40u; +#endif fn tag_monoid_identity() -> TagMonoid { return TagMonoid(); @@ -38,6 +44,10 @@ fn combine_tag_monoid(a: TagMonoid, b: TagMonoid) -> TagMonoid { c.trans_ix = a.trans_ix + b.trans_ix; c.pathseg_ix = a.pathseg_ix + b.pathseg_ix; c.pathseg_offset = a.pathseg_offset + b.pathseg_offset; +#ifdef full + c.linewidth_ix = a.linewidth_ix + b.linewidth_ix; + c.path_ix = a.path_ix + b.path_ix; +#endif return c; } @@ -51,5 +61,9 @@ fn reduce_tag(tag_word: u32) -> TagMonoid { a += a >> 8u; a += a >> 16u; c.pathseg_offset = a & 0xffu; +#ifdef full + c.path_ix = countOneBits(tag_word & (PATH_TAG_PATH) * 0x1010101u); + c.linewidth_ix = countOneBits(tag_word & (PATH_TAG_LINEWIDTH * 0x1010101u)); +#endif return c; } diff --git a/piet-wgsl/shader/shared/tile.wgsl b/piet-wgsl/shader/shared/tile.wgsl new file mode 100644 index 0000000..f575e50 --- /dev/null +++ b/piet-wgsl/shader/shared/tile.wgsl @@ -0,0 +1,29 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +// Common datatypes for path and tile intermediate info. + +struct Path { + // bounding box in tiles + bbox: vec4, + // offset (in u32's) to tile rectangle + tiles: u32, +} + +struct Tile { + backdrop: i32, + segments: u32, +} \ No newline at end of file diff --git a/piet-wgsl/shader/tile_alloc.wgsl b/piet-wgsl/shader/tile_alloc.wgsl index 867db7e..0bd550e 100644 --- a/piet-wgsl/shader/tile_alloc.wgsl +++ b/piet-wgsl/shader/tile_alloc.wgsl @@ -19,6 +19,7 @@ #import config #import bump #import drawtag +#import tile @group(0) @binding(0) var config: Config; @@ -32,19 +33,6 @@ var draw_bboxes: array>; @group(0) @binding(3) var bump: BumpAllocators; -// TODO: put this in the right place, dedup -struct Path { - // bounding box in pixels - bbox: vec4, - // offset (in u32's) to tile rectangle - tiles: u32, -} - -struct Tile { - backdrop: i32, - segments: u32, -} - @group(0) @binding(4) var paths: array; diff --git a/piet-wgsl/src/main.rs b/piet-wgsl/src/main.rs index 5cf960d..ba0d034 100644 --- a/piet-wgsl/src/main.rs +++ b/piet-wgsl/src/main.rs @@ -22,7 +22,7 @@ use engine::Engine; use render::render; use test_scene::dump_scene_info; -use wgpu::{Device, Queue}; +use wgpu::{Device, Queue, Limits}; mod engine; mod render; @@ -33,12 +33,14 @@ async fn run() -> Result<(), Box> { let instance = wgpu::Instance::new(wgpu::Backends::PRIMARY); let adapter = instance.request_adapter(&Default::default()).await.unwrap(); let features = adapter.features(); + let mut limits = Limits::default(); + limits.max_storage_buffers_per_shader_stage = 16; let (device, queue) = adapter .request_device( &wgpu::DeviceDescriptor { label: None, features: features & wgpu::Features::TIMESTAMP_QUERY, - limits: Default::default(), + limits, }, None, ) @@ -55,6 +57,7 @@ async fn do_render( engine: &mut Engine, ) -> Result<(), Box> { let shaders = shaders::init_shaders(device, engine)?; + let full_shaders = shaders::full_shaders(device, engine)?; let scene = test_scene::gen_test_scene(); dump_scene_info(&scene); let (recording, buf) = render(&scene, &shaders); diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index c3c30f1..3f9d184 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -16,6 +16,7 @@ struct Config { width_in_tiles: u32, height_in_tiles: u32, n_drawobj: u32, + n_path: u32, pathtag_base: u32, pathdata_base: u32, drawtag_base: u32, diff --git a/piet-wgsl/src/shaders.rs b/piet-wgsl/src/shaders.rs index d1e8847..6a7e9fb 100644 --- a/piet-wgsl/src/shaders.rs +++ b/piet-wgsl/src/shaders.rs @@ -35,6 +35,22 @@ pub struct Shaders { pub fine: ShaderId, } +// Shaders for the full pipeline +pub struct FullShaders { + pub pathtag_reduce: ShaderId, + pub pathtag_scan: ShaderId, + pub bbox_clear: ShaderId, + pub pathseg: ShaderId, + pub draw_reduce: ShaderId, + pub draw_leaf: ShaderId, + pub tile_alloc: ShaderId, + pub binning: ShaderId, + pub path_coarse: ShaderId, + pub backdrop: ShaderId, + pub coarse: ShaderId, + pub fine: ShaderId, +} + pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result { let shader_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/shader")); let imports = preprocess::get_imports(shader_dir); @@ -93,3 +109,153 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result Result { + let shader_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/shader")); + let imports = preprocess::get_imports(shader_dir); + let read_shader = + |path: &str| fs::read_to_string(shader_dir.join(path.to_string() + ".wgsl")).unwrap(); + let empty = HashSet::new(); + let mut full_config = HashSet::new(); + full_config.insert("full".into()); + let pathtag_reduce = engine.add_shader( + device, + preprocess::preprocess(&read_shader("pathtag_reduce"), &full_config, &imports).into(), + &[BindType::BufReadOnly, BindType::BufReadOnly, BindType::Buffer], + )?; + let pathtag_scan = engine.add_shader( + device, + preprocess::preprocess(&read_shader("pathtag_scan"), &full_config, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + ], + )?; + let bbox_clear = engine.add_shader( + device, + preprocess::preprocess(&read_shader("bbox_clear"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::Buffer, + ], + )?; + let pathseg = engine.add_shader( + device, + preprocess::preprocess(&read_shader("pathseg"), &full_config, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + ], + )?; + let draw_reduce = engine.add_shader( + device, + preprocess::preprocess(&read_shader("draw_reduce"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + ], + )?; + let draw_leaf = engine.add_shader( + device, + preprocess::preprocess(&read_shader("draw_leaf"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + ], + )?; + let tile_alloc = engine.add_shader( + device, + preprocess::preprocess(&read_shader("tile_alloc"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + BindType::Buffer, + ], + )?; + let binning = engine.add_shader( + device, + preprocess::preprocess(&read_shader("binning"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + BindType::Buffer, + BindType::Buffer, + ], + )?; + + let path_coarse = engine.add_shader( + device, + preprocess::preprocess(&read_shader("path_coarse_full"), &full_config, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + ], + )?; + let backdrop = engine.add_shader( + device, + preprocess::preprocess(&read_shader("backdrop"), &empty, &imports).into(), + &[BindType::BufReadOnly, BindType::Buffer], + )?; + let coarse = engine.add_shader( + device, + preprocess::preprocess(&read_shader("coarse"), &empty, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::Buffer, + ], + )?; + let fine = engine.add_shader( + device, + preprocess::preprocess(&read_shader("fine"), &full_config, &imports).into(), + &[ + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::BufReadOnly, + BindType::Buffer, + BindType::BufReadOnly, + ], + )?; + Ok(FullShaders { + pathtag_reduce, + pathtag_scan, + bbox_clear, + pathseg, + draw_reduce, + draw_leaf, + tile_alloc, + binning, + path_coarse, + backdrop, + coarse, + fine, + }) +} From 06fa3cb9ab4f8b10b05419e4d58e40401e0c0203 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Thu, 3 Nov 2022 19:33:11 -0700 Subject: [PATCH 07/17] Checkpoint Felt like checkpointing what I have before trying to run the pipeline. Theoretically everything should work. --- .vscode/settings.json | 1 + piet-wgsl/shader/draw_leaf.wgsl | 34 ++++- piet-wgsl/shader/fine.wgsl | 8 +- piet-wgsl/shader/path_coarse_full.wgsl | 43 +++--- piet-wgsl/shader/pathseg.wgsl | 18 +-- piet-wgsl/shader/shared/bump.wgsl | 1 + piet-wgsl/shader/shared/config.wgsl | 2 + piet-wgsl/src/main.rs | 2 +- piet-wgsl/src/render.rs | 173 ++++++++++++++++++++++++- piet-wgsl/src/shaders.rs | 48 ++++--- 10 files changed, 265 insertions(+), 65 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index bdb0028..813fac6 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -3,6 +3,7 @@ "bbox": "${workspaceFolder}/piet-wgsl/shader/shared/bbox.wgsl", "bump": "${workspaceFolder}/piet-wgsl/shader/shared/bump.wgsl", "config": "${workspaceFolder}/piet-wgsl/shader/shared/config.wgsl", + "cubic": "${workspaceFolder}/piet-wgsl/shader/shared/cubic.wgsl", "drawtag": "${workspaceFolder}/piet-wgsl/shader/shared/drawtag.wgsl", "segment": "${workspaceFolder}/piet-wgsl/shader/shared/segment.wgsl", "pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl", diff --git a/piet-wgsl/shader/draw_leaf.wgsl b/piet-wgsl/shader/draw_leaf.wgsl index 6782542..bce04f8 100644 --- a/piet-wgsl/shader/draw_leaf.wgsl +++ b/piet-wgsl/shader/draw_leaf.wgsl @@ -40,6 +40,25 @@ var info: array; let WG_SIZE = 256u; +// Possibly dedup? +struct Transform { + matrx: vec4, + translate: vec2, +} + +fn read_transform(transform_base: u32, ix: u32) -> Transform { + let base = transform_base + ix * 6u; + let c0 = bitcast(scene[base]); + let c1 = bitcast(scene[base] + 1u); + let c2 = bitcast(scene[base] + 2u); + let c3 = bitcast(scene[base] + 3u); + let c4 = bitcast(scene[base] + 4u); + let c5 = bitcast(scene[base] + 5u); + let matrx = vec4(c0, c1, c2, c3); + let translate = vec2(c4, c5); + return Transform(matrx, translate); +} + var sh_scratch: array; @compute @workgroup_size(256) @@ -80,17 +99,20 @@ fn main( tag_word == DRAWTAG_BEGIN_CLIP { let bbox = path_bbox[m.path_ix]; - let x0 = f32(bbox.x0) - 32768.0; - let y0 = f32(bbox.y0) - 32768.0; - let x1 = f32(bbox.x1) - 32768.0; - let y1 = f32(bbox.y1) - 32768.0; - let bbox_f = vec4(x0, y0, x1, y1); + // TODO: bbox is mostly yagni here, sort that out. Maybe clips? + // let x0 = f32(bbox.x0); + // let y0 = f32(bbox.y0); + // let x1 = f32(bbox.x1); + // let y1 = f32(bbox.y1); + // let bbox_f = vec4(x0, y0, x1, y1); let fill_mode = u32(bbox.linewidth >= 0.0); var matrx: vec4; var translate: vec2; var linewidth = bbox.linewidth; if linewidth >= 0.0 || tag_word == DRAWTAG_FILL_LIN_GRADIENT || tag_word == DRAWTAG_FILL_RAD_GRADIENT { - // TODO: retrieve transform from scene. Packed? + let transform = read_transform(config.transform_base, bbox.trans_ix); + matrx = transform.matrx; + translate = transform.translate; } if linewidth >= 0.0 { // Note: doesn't deal with anisotropic case diff --git a/piet-wgsl/shader/fine.wgsl b/piet-wgsl/shader/fine.wgsl index 7b66d04..751e408 100644 --- a/piet-wgsl/shader/fine.wgsl +++ b/piet-wgsl/shader/fine.wgsl @@ -157,12 +157,16 @@ fn main( default: {} } } - + let out_ix = global_id.y * (config.width_in_tiles * TILE_WIDTH) + global_id.x * PIXELS_PER_THREAD; + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + let bytes = pack4x8unorm(rgba[i]); + output[out_ix + i] = bytes; + } #else let area = fill_path(tile, xy); -#endif let bytes = pack4x8unorm(vec4(area[0], area[1], area[2], area[3])); let out_ix = global_id.y * (config.width_in_tiles * 4u) + global_id.x; output[out_ix] = bytes; +#endif } diff --git a/piet-wgsl/shader/path_coarse_full.wgsl b/piet-wgsl/shader/path_coarse_full.wgsl index 5641642..6b6e9b8 100644 --- a/piet-wgsl/shader/path_coarse_full.wgsl +++ b/piet-wgsl/shader/path_coarse_full.wgsl @@ -20,6 +20,8 @@ #import pathtag #import tile #import segment +#import cubic +#import bump @group(0) @binding(0) var config: Config; @@ -27,14 +29,6 @@ var config: Config; @group(0) @binding(1) var scene: array; -// Maybe dedup? -struct Cubic { - p0: vec2, - p1: vec2, - p2: vec2, - p3: vec2, -} - @group(0) @binding(2) var tag_monoids: array; @@ -51,9 +45,12 @@ struct AtomicTile { } @group(0) @binding(5) -var tiles: array; +var bump: BumpAllocators; @group(0) @binding(6) +var tiles: array; + +@group(0) @binding(7) var segments: array; struct SubdivResult { @@ -110,9 +107,7 @@ fn eval_cubic(p0: vec2, p1: vec2, p2: vec2, p3: vec2, t: f32 } fn alloc_segment() -> u32 { - // Use 0-index segment (address is sentinel) as counter - // TODO: separate small buffer binding for this? - return atomicAdd(&tiles[4096].segments, 1u) + 1u; + return atomicAdd(&bump.segments, 1u) + 1u; } let MAX_QUADS = 16u; @@ -126,12 +121,13 @@ fn main( let shift = (ix & 3u) * 8u; var tag_byte = (tag_word >> shift) & 0xffu; - // Reconstruct path_ix from monoid or store in cubic? if (tag_byte & PATH_TAG_SEG_TYPE) != 0u { - let path_ix = 42u; // BIG GIANT TODO - let path = paths[path_ix]; - let bbox = vec4(path.bbox); + // Discussion question: it might actually be cheaper to do the path segment + // decoding & transform again rather than store the result in a buffer; + // classic memory vs ALU tradeoff. let cubic = cubics[global_id.x]; + let path = paths[cubic.path_ix]; + let bbox = vec4(path.bbox); let p0 = cubic.p0; let p1 = cubic.p1; let p2 = cubic.p2; @@ -201,11 +197,13 @@ fn main( var x1 = i32(floor(xymax.x * SX) + 1.0); var y0 = i32(floor(xymin.y * SY)); var y1 = i32(floor(xymax.y * SY) + 1.0); - x0 = clamp(x0, 0, i32(config.width_in_tiles)); - x1 = clamp(x1, 0, i32(config.width_in_tiles)); - y0 = clamp(y0, 0, i32(config.height_in_tiles)); - y1 = clamp(y1, 0, i32(config.height_in_tiles)); + x0 = clamp(x0, bbox.x, bbox.z); + x1 = clamp(x1, bbox.x, bbox.z); + y0 = clamp(y0, bbox.y, bbox.w); + y1 = clamp(y1, bbox.y, bbox.w); var xc = a + b * f32(y0); + let stride = bbox.z - bbox.x; + var base = i32(path.tiles) + (y0 - bbox.y) * stride - bbox.x; var xray = i32(floor(lp0.x * SX)); var last_xray = i32(floor(lp1.x * SX)); if dp.y < 0.0 { @@ -218,7 +216,7 @@ fn main( let xbackdrop = max(xray + 1, 0); if xymin.y < tile_y0 && xbackdrop < i32(config.width_in_tiles) { let backdrop = select(-1, 1, dp.y < 0.0); - let tile_ix = y * i32(config.width_in_tiles) + xbackdrop; + let tile_ix = base + xbackdrop; atomicAdd(&tiles[tile_ix].backdrop, backdrop); } var next_xray = last_xray; @@ -236,7 +234,7 @@ fn main( var tile_seg: Segment; for (var x = xx0; x < xx1; x += 1) { let tile_x0 = f32(x) * f32(TILE_WIDTH); - let tile_ix = y * i32(config.width_in_tiles) + x; + let tile_ix = base + x; // allocate segment, insert linked list let seg_ix = alloc_segment(); let old = atomicExchange(&tiles[tile_ix].segments, seg_ix); @@ -263,6 +261,7 @@ fn main( segments[seg_ix] = tile_seg; } xc += b; + base += stride; xray = next_xray; } n_out += 1u; diff --git a/piet-wgsl/shader/pathseg.wgsl b/piet-wgsl/shader/pathseg.wgsl index 03c5aa6..1326fce 100644 --- a/piet-wgsl/shader/pathseg.wgsl +++ b/piet-wgsl/shader/pathseg.wgsl @@ -27,6 +27,7 @@ #import config #import pathtag +#import cubic @group(0) @binding(0) var config: Config; @@ -49,12 +50,6 @@ struct AtomicPathBbox { @group(0) @binding(3) var path_bboxes: array; -struct Cubic { - p0: vec2, - p1: vec2, - p2: vec2, - p3: vec2, -} @group(0) @binding(4) var cubics: array; @@ -90,7 +85,6 @@ var cubics: array; // } var pathdata_base: u32; -var transform_base: u32; fn read_f32_point(ix: u32) -> vec2 { let x = bitcast(scene[pathdata_base + ix]); @@ -110,7 +104,7 @@ struct Transform { translate: vec2, } -fn read_transform(ix: u32) -> Transform { +fn read_transform(transform_base: u32, ix: u32) -> Transform { let base = transform_base + ix * 6u; let c0 = bitcast(scene[base]); let c1 = bitcast(scene[base] + 1u); @@ -142,7 +136,6 @@ fn main( ) { let ix = global_id.x; let tag_word = scene[config.pathtag_base + (ix >> 2u)]; - // TODO: set transform_base pathdata_base = config.pathdata_base; let shift = (ix & 3u) * 8u; var tm = reduce_tag(tag_word & ((1u << shift) - 1u)); @@ -180,7 +173,7 @@ fn main( } } } - let transform = read_transform(tm.trans_ix); + let transform = read_transform(config.transform_base, tm.trans_ix); p0 = transform_apply(transform, p0); p1 = transform_apply(transform, p1); var bbox = vec4(min(p0, p1), max(p0, p1)); @@ -201,15 +194,14 @@ fn main( p1 = mix(p1, p0, 1.0 / 3.0); } } - cubics[global_id.x] = Cubic(p0, p1, p2, p3); + cubics[global_id.x] = Cubic(p0, p1, p2, p3, tm.path_ix, 0u); // Update bounding box using atomics only. Computing a monoid is a // potential future optimization. - if bbox.z > bbox.x && bbox.w > bbox.y { + if bbox.z > bbox.x || bbox.w > bbox.y { atomicMin(&(*out).x0, round_down(bbox.x)); atomicMin(&(*out).y0, round_down(bbox.y)); atomicMax(&(*out).x1, round_up(bbox.z)); atomicMax(&(*out).y1, round_up(bbox.w)); } } - } diff --git a/piet-wgsl/shader/shared/bump.wgsl b/piet-wgsl/shader/shared/bump.wgsl index 7f33ba2..b33ffaa 100644 --- a/piet-wgsl/shader/shared/bump.wgsl +++ b/piet-wgsl/shader/shared/bump.wgsl @@ -19,4 +19,5 @@ struct BumpAllocators { binning: atomic, ptcl: atomic, tile: atomic, + segments: atomic, } diff --git a/piet-wgsl/shader/shared/config.wgsl b/piet-wgsl/shader/shared/config.wgsl index bf0839d..b43f35b 100644 --- a/piet-wgsl/shader/shared/config.wgsl +++ b/piet-wgsl/shader/shared/config.wgsl @@ -28,6 +28,8 @@ struct Config { drawtag_base: u32, drawdata_base: u32, + + transform_base: u32, } // Geometry of tiles and bins diff --git a/piet-wgsl/src/main.rs b/piet-wgsl/src/main.rs index ba0d034..355ea33 100644 --- a/piet-wgsl/src/main.rs +++ b/piet-wgsl/src/main.rs @@ -22,7 +22,7 @@ use engine::Engine; use render::render; use test_scene::dump_scene_info; -use wgpu::{Device, Queue, Limits}; +use wgpu::{Device, Limits, Queue}; mod engine; mod render; diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index 3f9d184..d019886 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -5,10 +5,18 @@ use piet_scene::Scene; use crate::{ engine::{BufProxy, Recording}, - shaders::{self, Shaders}, + shaders::{self, FullShaders, Shaders}, }; const TAG_MONOID_SIZE: u64 = 12; +const TAG_MONOID_FULL_SIZE: u64 = 20; +const PATH_BBOX_SIZE: u64 = 24; +const CUBIC_SIZE: u64 = 40; +const DRAWMONOID_SIZE: u64 = 16; +const MAX_DRAWINFO_SIZE: u64 = 44; +const PATH_SIZE: u64 = 8; +const DRAW_BBOX_SIZE: u64 = 16; +const BUMP_SIZE: u64 = 16; #[repr(C)] #[derive(Clone, Copy, Default, Zeroable, Pod)] @@ -21,6 +29,7 @@ struct Config { pathdata_base: u32, drawtag_base: u32, drawdata_base: u32, + transform_base: u32, } #[repr(C)] @@ -109,6 +118,168 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) { (recording, out_buf) } +pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy) { + let mut recording = Recording::default(); + let data = scene.data(); + let n_pathtag = data.tag_stream.len(); + let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG); + let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize); + let mut scene: Vec = Vec::with_capacity(pathtag_padded); + let pathtag_base = size_to_words(scene.len()); + scene.extend(&data.tag_stream); + scene.resize(pathtag_padded, 0); + let pathdata_base = size_to_words(scene.len()); + scene.extend(&data.pathseg_stream); + let drawtag_base = size_to_words(scene.len()); + scene.extend(bytemuck::cast_slice(&data.drawtag_stream)); + let drawdata_base = size_to_words(scene.len()); + scene.extend(&data.drawdata_stream); + let transform_base = size_to_words(scene.len()); + scene.extend(bytemuck::cast_slice(&data.transform_stream)); + + let n_path = data.n_path; + // TODO: calculate for real when we do rectangles + let n_drawobj = n_path; + let config = Config { + width_in_tiles: 64, + height_in_tiles: 64, + n_drawobj, + n_path, + pathtag_base, + pathdata_base, + drawtag_base, + drawdata_base, + transform_base, + }; + let scene_buf = recording.upload(scene); + let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned()); + + let reduced_buf = BufProxy::new(pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE); + // TODO: really only need pathtag_wgs - 1 + recording.dispatch( + shaders.pathtag_reduce, + (pathtag_wgs as u32, 1, 1), + [config_buf, scene_buf, reduced_buf], + ); + + let tagmonoid_buf = + BufProxy::new(pathtag_wgs as u64 * shaders::PATHTAG_REDUCE_WG as u64 * TAG_MONOID_SIZE); + recording.dispatch( + shaders.pathtag_scan, + (pathtag_wgs as u32, 1, 1), + [config_buf, scene_buf, reduced_buf, tagmonoid_buf], + ); + let drawobj_wgs = (n_drawobj + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG; + let path_bbox_buf = BufProxy::new(n_path as u64 * PATH_BBOX_SIZE); + recording.dispatch( + shaders.bbox_clear, + (drawobj_wgs, 1, 1), + [config_buf, path_bbox_buf], + ); + let cubic_buf = BufProxy::new(n_path as u64 * CUBIC_SIZE); + let path_coarse_wgs = (data.n_pathseg + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; + recording.dispatch( + shaders.pathseg, + (path_coarse_wgs, 1, 1), + [ + config_buf, + scene_buf, + tagmonoid_buf, + path_bbox_buf, + cubic_buf, + ], + ); + let draw_reduced_buf = BufProxy::new(drawobj_wgs as u64 * DRAWMONOID_SIZE); + recording.dispatch( + shaders.draw_reduce, + (drawobj_wgs, 1, 1), + [config_buf, scene_buf, draw_reduced_buf], + ); + let draw_monoid_buf = BufProxy::new(n_drawobj as u64 * DRAWMONOID_SIZE); + let info_buf = BufProxy::new(n_drawobj as u64 * MAX_DRAWINFO_SIZE); + recording.dispatch( + shaders.draw_leaf, + (drawobj_wgs, 1, 1), + [ + config_buf, + scene_buf, + draw_reduced_buf, + path_bbox_buf, + draw_monoid_buf, + info_buf, + ], + ); + let draw_bbox_buf = BufProxy::new(n_path as u64 * DRAW_BBOX_SIZE); + let bump_buf = BufProxy::new(BUMP_SIZE); + // Not actually used yet. + let clip_bbox_buf = BufProxy::new(1024); + let bin_data_buf = BufProxy::new(1 << 16); + recording.clear_all(bump_buf); + recording.dispatch( + shaders.binning, + (drawobj_wgs, 1, 1), + [ + config_buf, + draw_monoid_buf, + path_bbox_buf, + clip_bbox_buf, + draw_bbox_buf, + bump_buf, + bin_data_buf, + ], + ); + let path_buf = BufProxy::new(n_path as u64 * PATH_SIZE); + let tile_buf = BufProxy::new(1 << 20); + let path_wgs = (n_path + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG; + recording.dispatch( + shaders.tile_alloc, + (path_wgs, 1, 1), + [ + config_buf, + scene_buf, + draw_bbox_buf, + bump_buf, + path_buf, + tile_buf, + ], + ); + + //let cubics_buf = BufProxy::new(data.n_pathseg as u64 * 32); + // TODO: more principled size calc + let tiles_buf = BufProxy::new(4097 * 8); + let segments_buf = BufProxy::new(256 * 24); + recording.clear_all(tiles_buf); + recording.dispatch( + shaders.path_coarse, + (path_coarse_wgs, 1, 1), + [ + config_buf, + scene_buf, + tagmonoid_buf, + cubic_buf, + path_buf, + bump_buf, + tiles_buf, + segments_buf, + ], + ); + recording.dispatch( + shaders.backdrop, + (path_wgs, 1, 1), + [config_buf, path_buf, tiles_buf], + ); + let out_buf_size = config.width_in_tiles * config.height_in_tiles * 256; + let out_buf = BufProxy::new(out_buf_size as u64); + recording.dispatch( + shaders.fine, + (config.width_in_tiles, config.height_in_tiles, 1), + [config_buf, tiles_buf, segments_buf, out_buf], + ); + + recording.download(out_buf); + (recording, out_buf) +} + pub fn align_up(len: usize, alignment: u32) -> usize { len + (len.wrapping_neg() & alignment as usize - 1) } diff --git a/piet-wgsl/src/shaders.rs b/piet-wgsl/src/shaders.rs index 6a7e9fb..939c58e 100644 --- a/piet-wgsl/src/shaders.rs +++ b/piet-wgsl/src/shaders.rs @@ -25,7 +25,9 @@ use wgpu::Device; use crate::engine::{BindType, Engine, Error, ShaderId}; pub const PATHTAG_REDUCE_WG: u32 = 256; +pub const PATH_BBOX_WG: u32 = 256; pub const PATH_COARSE_WG: u32 = 256; +pub const PATH_DRAWOBJ_WG: u32 = 256; pub struct Shaders { pub pathtag_reduce: ShaderId, @@ -43,8 +45,8 @@ pub struct FullShaders { pub pathseg: ShaderId, pub draw_reduce: ShaderId, pub draw_leaf: ShaderId, - pub tile_alloc: ShaderId, pub binning: ShaderId, + pub tile_alloc: ShaderId, pub path_coarse: ShaderId, pub backdrop: ShaderId, pub coarse: ShaderId, @@ -60,7 +62,11 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result Result Result Result Result Result Result Date: Thu, 3 Nov 2022 22:00:52 -0700 Subject: [PATCH 08/17] Mostly working path rendering It draws multiple paths and applies affine transformations. One problem: RGBA writing is byte-reversed and premultiplied. --- piet-wgsl/shader/backdrop_dyn.wgsl | 21 +++------- piet-wgsl/shader/binning.wgsl | 2 - piet-wgsl/shader/coarse.wgsl | 13 +++--- piet-wgsl/shader/draw_leaf.wgsl | 14 +++---- piet-wgsl/shader/fine.wgsl | 4 +- piet-wgsl/shader/path_coarse_full.wgsl | 2 +- piet-wgsl/shader/pathseg.wgsl | 11 +++--- piet-wgsl/shader/shared/cubic.wgsl | 25 ++++++++++++ piet-wgsl/shader/shared/pathtag.wgsl | 1 + piet-wgsl/shader/tile_alloc.wgsl | 10 +++-- piet-wgsl/src/engine.rs | 1 + piet-wgsl/src/main.rs | 33 ++++++++++++---- piet-wgsl/src/render.rs | 55 ++++++++++++++++++-------- piet-wgsl/src/shaders.rs | 8 +++- piet-wgsl/src/test_scene.rs | 12 ++++++ 15 files changed, 143 insertions(+), 69 deletions(-) create mode 100644 piet-wgsl/shader/shared/cubic.wgsl diff --git a/piet-wgsl/shader/backdrop_dyn.wgsl b/piet-wgsl/shader/backdrop_dyn.wgsl index c6c7c8d..ab094e6 100644 --- a/piet-wgsl/shader/backdrop_dyn.wgsl +++ b/piet-wgsl/shader/backdrop_dyn.wgsl @@ -17,20 +17,7 @@ // Prefix sum for dynamically allocated backdrops #import config - -// TODO: dedup & put this in the right place -struct Path { - // bounding box in pixels - bbox: vec4, - // offset (in u32's) to tile rectangle - tiles: u32, -} - -// TODO: -> shared -struct Tile { - backdrop: i32, - segments: u32, -} +#import tile @group(0) @binding(0) var config: Config; @@ -45,6 +32,7 @@ let WG_SIZE = 256u; var sh_row_width: array; var sh_row_count: array; +var sh_offset: array; @compute @workgroup_size(256) fn main( @@ -58,8 +46,9 @@ fn main( let path = paths[drawobj_ix]; sh_row_width[local_id.x] = path.bbox.z - path.bbox.x; row_count = path.bbox.w - path.bbox.y; - sh_row_count[local_id.x] = row_count; + sh_offset[local_id.x] = path.tiles; } + sh_row_count[local_id.x] = row_count; // Prefix sum of row counts for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { @@ -83,7 +72,7 @@ fn main( let width = sh_row_width[el_ix]; if width > 0u { var seq_ix = row - select(0u, sh_row_count[el_ix - 1u], el_ix > 0u); - var tile_ix = seq_ix * width; + var tile_ix = sh_offset[el_ix] + seq_ix * width; var sum = tiles[tile_ix].backdrop; for (var x = 1u; x < width; x += 1u) { tile_ix += 1u; diff --git a/piet-wgsl/shader/binning.wgsl b/piet-wgsl/shader/binning.wgsl index 3d5a053..6fd6284 100644 --- a/piet-wgsl/shader/binning.wgsl +++ b/piet-wgsl/shader/binning.wgsl @@ -36,8 +36,6 @@ var clip_bbox_buf: array>; @group(0) @binding(4) var intersected_bbox: array>; -// TODO: put into shared include - @group(0) @binding(5) var bump: BumpAllocators; diff --git a/piet-wgsl/shader/coarse.wgsl b/piet-wgsl/shader/coarse.wgsl index 8c59ba6..7a2ecf9 100644 --- a/piet-wgsl/shader/coarse.wgsl +++ b/piet-wgsl/shader/coarse.wgsl @@ -41,13 +41,13 @@ struct BinHeader { var bin_headers: array; @group(0) @binding(4) -var paths: array; +var bin_data: array; @group(0) @binding(5) -var tiles: array; +var paths: array; @group(0) @binding(6) -var bin_data: array; +var tiles: array; @group(0) @binding(7) var bump: BumpAllocators; @@ -109,7 +109,7 @@ fn write_path(tile: Tile, linewidth: f32) { fn write_color(color: CmdColor) { alloc_cmd(2u); - ptcl[cmd_offset] = CMD_FILL; + ptcl[cmd_offset] = CMD_COLOR; ptcl[cmd_offset + 1u] = color.rgba_color; cmd_offset += 2u; @@ -117,7 +117,6 @@ fn write_color(color: CmdColor) { @compute @workgroup_size(256) fn main( - @builtin(global_invocation_id) global_id: vec3, @builtin(local_invocation_id) local_id: vec3, @builtin(workgroup_id) wg_id: vec3, ) { @@ -130,7 +129,7 @@ fn main( let bin_tile_y = N_TILE_Y * wg_id.y; let tile_x = local_id.x % N_TILE_X; - let tile_y = local_id.y % N_TILE_Y; + let tile_y = local_id.x / N_TILE_X; let this_tile_ix = (bin_tile_y + tile_y) * config.width_in_tiles + bin_tile_x + tile_x; cmd_offset = this_tile_ix * PTCL_INITIAL_ALLOC; cmd_limit = cmd_offset + (PTCL_INITIAL_ALLOC - PTCL_HEADROOM); @@ -313,7 +312,7 @@ fn main( workgroupBarrier(); } if bin_tile_x < config.width_in_tiles && bin_tile_y < config.height_in_tiles { - ptcl[cmd_offset] = CMD_END; + //ptcl[cmd_offset] = CMD_END; // TODO: blend stack allocation } } diff --git a/piet-wgsl/shader/draw_leaf.wgsl b/piet-wgsl/shader/draw_leaf.wgsl index bce04f8..14a1163 100644 --- a/piet-wgsl/shader/draw_leaf.wgsl +++ b/piet-wgsl/shader/draw_leaf.wgsl @@ -49,11 +49,11 @@ struct Transform { fn read_transform(transform_base: u32, ix: u32) -> Transform { let base = transform_base + ix * 6u; let c0 = bitcast(scene[base]); - let c1 = bitcast(scene[base] + 1u); - let c2 = bitcast(scene[base] + 2u); - let c3 = bitcast(scene[base] + 3u); - let c4 = bitcast(scene[base] + 4u); - let c5 = bitcast(scene[base] + 5u); + let c1 = bitcast(scene[base + 1u]); + let c2 = bitcast(scene[base + 2u]); + let c3 = bitcast(scene[base + 3u]); + let c4 = bitcast(scene[base + 4u]); + let c5 = bitcast(scene[base + 5u]); let matrx = vec4(c0, c1, c2, c3); let translate = vec2(c4, c5); return Transform(matrx, translate); @@ -73,8 +73,8 @@ fn main( sh_scratch[local_id.x] = agg; for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { workgroupBarrier(); - if local_id.x + (1u << i) < WG_SIZE { - let other = sh_scratch[local_id.x + (1u << i)]; + if local_id.x >= 1u << i { + let other = sh_scratch[local_id.x - (1u << i)]; agg = combine_draw_monoid(agg, other); } workgroupBarrier(); diff --git a/piet-wgsl/shader/fine.wgsl b/piet-wgsl/shader/fine.wgsl index 751e408..4431848 100644 --- a/piet-wgsl/shader/fine.wgsl +++ b/piet-wgsl/shader/fine.wgsl @@ -113,7 +113,6 @@ fn main( ) { let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x; let xy = vec2(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y)); - let tile = tiles[tile_ix]; #ifdef full var rgba: array, PIXELS_PER_THREAD>; var area: array; @@ -148,7 +147,7 @@ fn main( let fg_i = fg * area[i]; rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i; } - cmd_ix += 1u; + cmd_ix += 2u; } // CMD_JUMP case 11u: { @@ -163,6 +162,7 @@ fn main( output[out_ix + i] = bytes; } #else + let tile = tiles[tile_ix]; let area = fill_path(tile, xy); let bytes = pack4x8unorm(vec4(area[0], area[1], area[2], area[3])); diff --git a/piet-wgsl/shader/path_coarse_full.wgsl b/piet-wgsl/shader/path_coarse_full.wgsl index 6b6e9b8..fa3609e 100644 --- a/piet-wgsl/shader/path_coarse_full.wgsl +++ b/piet-wgsl/shader/path_coarse_full.wgsl @@ -214,7 +214,7 @@ fn main( for (var y = y0; y < y1; y += 1) { let tile_y0 = f32(y) * f32(TILE_HEIGHT); let xbackdrop = max(xray + 1, 0); - if xymin.y < tile_y0 && xbackdrop < i32(config.width_in_tiles) { + if xymin.y < tile_y0 && xbackdrop < bbox.z { let backdrop = select(-1, 1, dp.y < 0.0); let tile_ix = base + xbackdrop; atomicAdd(&tiles[tile_ix].backdrop, backdrop); diff --git a/piet-wgsl/shader/pathseg.wgsl b/piet-wgsl/shader/pathseg.wgsl index 1326fce..d2c6a6a 100644 --- a/piet-wgsl/shader/pathseg.wgsl +++ b/piet-wgsl/shader/pathseg.wgsl @@ -107,11 +107,11 @@ struct Transform { fn read_transform(transform_base: u32, ix: u32) -> Transform { let base = transform_base + ix * 6u; let c0 = bitcast(scene[base]); - let c1 = bitcast(scene[base] + 1u); - let c2 = bitcast(scene[base] + 2u); - let c3 = bitcast(scene[base] + 3u); - let c4 = bitcast(scene[base] + 4u); - let c5 = bitcast(scene[base] + 5u); + let c1 = bitcast(scene[base + 1u]); + let c2 = bitcast(scene[base + 2u]); + let c3 = bitcast(scene[base + 3u]); + let c4 = bitcast(scene[base + 4u]); + let c5 = bitcast(scene[base + 5u]); let matrx = vec4(c0, c1, c2, c3); let translate = vec2(c4, c5); return Transform(matrx, translate); @@ -174,6 +174,7 @@ fn main( } } let transform = read_transform(config.transform_base, tm.trans_ix); + //let transform = Transform(vec4(1.0, 0.0, 0.0, 1.0), vec2()); p0 = transform_apply(transform, p0); p1 = transform_apply(transform, p1); var bbox = vec4(min(p0, p1), max(p0, p1)); diff --git a/piet-wgsl/shader/shared/cubic.wgsl b/piet-wgsl/shader/shared/cubic.wgsl new file mode 100644 index 0000000..ffb85b4 --- /dev/null +++ b/piet-wgsl/shader/shared/cubic.wgsl @@ -0,0 +1,25 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +struct Cubic { + p0: vec2, + p1: vec2, + p2: vec2, + p3: vec2, + path_ix: u32, + // Needed? + padding: u32, +} diff --git a/piet-wgsl/shader/shared/pathtag.wgsl b/piet-wgsl/shader/shared/pathtag.wgsl index b248e18..fed16fd 100644 --- a/piet-wgsl/shader/shared/pathtag.wgsl +++ b/piet-wgsl/shader/shared/pathtag.wgsl @@ -16,6 +16,7 @@ struct TagMonoid { trans_ix: u32, + // TODO: I don't think pathseg_ix is used. pathseg_ix: u32, pathseg_offset: u32, #ifdef full diff --git a/piet-wgsl/shader/tile_alloc.wgsl b/piet-wgsl/shader/tile_alloc.wgsl index 0bd550e..1c27c83 100644 --- a/piet-wgsl/shader/tile_alloc.wgsl +++ b/piet-wgsl/shader/tile_alloc.wgsl @@ -79,14 +79,17 @@ fn main( sh_tile_count[local_id.x] = tile_count; for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { workgroupBarrier(); - if local_id.x < (1u << i) { + if local_id.x >= (1u << i) { total_tile_count += sh_tile_count[local_id.x - (1u << i)]; } workgroupBarrier(); sh_tile_count[local_id.x] = total_tile_count; } - if local_id.x == WG_SIZE - 1u { - sh_tile_offset = atomicAdd(&bump.tile, total_tile_count); + workgroupBarrier(); + // should be able to avoid a barrier by adding total_tile count from + // thread WG_SIZE - 1, but it doesn't work + if local_id.x == 0u { + sh_tile_offset = atomicAdd(&bump.tile, sh_tile_count[WG_SIZE - 1u]); } workgroupBarrier(); let tile_offset = sh_tile_offset; @@ -94,6 +97,7 @@ fn main( let tile_subix = select(0u, sh_tile_count[local_id.x - 1u], local_id.x > 0u); let bbox = vec4(ux0, uy0, ux1, uy1); let path = Path(bbox, tile_offset + tile_subix); + paths[drawobj_ix] = path; } // zero allocated memory diff --git a/piet-wgsl/src/engine.rs b/piet-wgsl/src/engine.rs index 2be08db..7d1c854 100644 --- a/piet-wgsl/src/engine.rs +++ b/piet-wgsl/src/engine.rs @@ -183,6 +183,7 @@ impl Engine { bind_map.insert_buf(buf_proxy.id, buf); } Command::Dispatch(shader_id, wg_size, bindings) => { + println!("dispatching {:?} with {} bindings", wg_size, bindings.len()); let shader = &self.shaders[shader_id.0]; let bind_group = bind_map.create_bind_group(device, &shader.bind_group_layout, bindings)?; diff --git a/piet-wgsl/src/main.rs b/piet-wgsl/src/main.rs index 355ea33..d8d53ad 100644 --- a/piet-wgsl/src/main.rs +++ b/piet-wgsl/src/main.rs @@ -20,7 +20,6 @@ use std::{fs::File, io::BufWriter}; use engine::Engine; -use render::render; use test_scene::dump_scene_info; use wgpu::{Device, Limits, Queue}; @@ -51,6 +50,20 @@ async fn run() -> Result<(), Box> { Ok(()) } +fn dump_buf(buf: &[u32]) { + for (i, val) in buf.iter().enumerate() { + if *val != 0 { + let lo = val & 0x7fff_ffff; + if lo >= 0x3000_0000 && lo < 0x5000_0000 { + println!("{}: {:x} {}", i, val, f32::from_bits(*val)); + } else { + println!("{}: {:x}", i, val); + + } + } + } +} + async fn do_render( device: &Device, queue: &Queue, @@ -60,17 +73,23 @@ async fn do_render( let full_shaders = shaders::full_shaders(device, engine)?; let scene = test_scene::gen_test_scene(); dump_scene_info(&scene); - let (recording, buf) = render(&scene, &shaders); + //let (recording, buf) = render::render(&scene, &shaders); + let (recording, buf) = render::render_full(&scene, &full_shaders); let downloads = engine.run_recording(&device, &queue, &recording)?; let mapped = downloads.map(); device.poll(wgpu::Maintain::Wait); let buf = mapped.get_mapped(buf).await?; - let file = File::create("image.png")?; - let w = BufWriter::new(file); - let encoder = png::Encoder::new(w, 1024, 1024); - let mut writer = encoder.write_header()?; - writer.write_image_data(&buf)?; + if false { + dump_buf(bytemuck::cast_slice(&buf)); + } else { + let file = File::create("image.png")?; + let w = BufWriter::new(file); + let mut encoder = png::Encoder::new(w, 1024, 1024); + encoder.set_color(png::ColorType::Rgba); + let mut writer = encoder.write_header()?; + writer.write_image_data(&buf)?; + } Ok(()) } diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index d019886..9202ef0 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -14,12 +14,13 @@ const PATH_BBOX_SIZE: u64 = 24; const CUBIC_SIZE: u64 = 40; const DRAWMONOID_SIZE: u64 = 16; const MAX_DRAWINFO_SIZE: u64 = 44; -const PATH_SIZE: u64 = 8; +const PATH_SIZE: u64 = 32; const DRAW_BBOX_SIZE: u64 = 16; const BUMP_SIZE: u64 = 16; +const BIN_HEADER_SIZE: u64 = 8; #[repr(C)] -#[derive(Clone, Copy, Default, Zeroable, Pod)] +#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] struct Config { width_in_tiles: u32, height_in_tiles: u32, @@ -84,8 +85,8 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) { [config_buf, scene_buf, reduced_buf, tagmonoid_buf], ); - let path_coarse_wgs = (data.n_pathseg + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; - //let cubics_buf = BufProxy::new(data.n_pathseg as u64 * 32); + let n_pathtag = data.pathseg_stream.len(); + let path_coarse_wgs = (n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; // TODO: more principled size calc let tiles_buf = BufProxy::new(4097 * 8); let segments_buf = BufProxy::new(256 * 24); @@ -151,6 +152,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy drawdata_base, transform_base, }; + println!("{:?}", config); let scene_buf = recording.upload(scene); let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned()); @@ -176,8 +178,9 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy (drawobj_wgs, 1, 1), [config_buf, path_bbox_buf], ); - let cubic_buf = BufProxy::new(n_path as u64 * CUBIC_SIZE); - let path_coarse_wgs = (data.n_pathseg + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; + let n_pathtag = data.pathseg_stream.len(); + let cubic_buf = BufProxy::new(n_pathtag as u64 * CUBIC_SIZE); + let path_coarse_wgs = (n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; recording.dispatch( shaders.pathseg, (path_coarse_wgs, 1, 1), @@ -214,6 +217,10 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy // Not actually used yet. let clip_bbox_buf = BufProxy::new(1024); let bin_data_buf = BufProxy::new(1 << 16); + let width_in_bins = (config.width_in_tiles + 15) / 16; + let height_in_bins = (config.height_in_tiles + 15) / 16; + let n_bins = width_in_bins * height_in_bins; + let bin_header_buf = BufProxy::new((n_bins * drawobj_wgs) as u64 * BIN_HEADER_SIZE); recording.clear_all(bump_buf); recording.dispatch( shaders.binning, @@ -226,6 +233,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy draw_bbox_buf, bump_buf, bin_data_buf, + bin_header_buf, ], ); let path_buf = BufProxy::new(n_path as u64 * PATH_SIZE); @@ -244,11 +252,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy ], ); - //let cubics_buf = BufProxy::new(data.n_pathseg as u64 * 32); - // TODO: more principled size calc - let tiles_buf = BufProxy::new(4097 * 8); - let segments_buf = BufProxy::new(256 * 24); - recording.clear_all(tiles_buf); + let segments_buf = BufProxy::new(1 << 20); recording.dispatch( shaders.path_coarse, (path_coarse_wgs, 1, 1), @@ -259,25 +263,42 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy cubic_buf, path_buf, bump_buf, - tiles_buf, + tile_buf, segments_buf, ], ); recording.dispatch( shaders.backdrop, (path_wgs, 1, 1), - [config_buf, path_buf, tiles_buf], + [config_buf, path_buf, tile_buf], ); - let out_buf_size = config.width_in_tiles * config.height_in_tiles * 256; + let ptcl_buf = BufProxy::new(1 << 20); + recording.dispatch( + shaders.coarse, + (width_in_bins, height_in_bins, 1), + [ + config_buf, + scene_buf, + draw_monoid_buf, + bin_header_buf, + bin_data_buf, + path_buf, + tile_buf, + bump_buf, + ptcl_buf, + ], + ); + let out_buf_size = config.width_in_tiles * config.height_in_tiles * 1024; let out_buf = BufProxy::new(out_buf_size as u64); recording.dispatch( shaders.fine, (config.width_in_tiles, config.height_in_tiles, 1), - [config_buf, tiles_buf, segments_buf, out_buf], + [config_buf, tile_buf, segments_buf, out_buf, ptcl_buf], ); - recording.download(out_buf); - (recording, out_buf) + let download_buf = out_buf; + recording.download(download_buf); + (recording, download_buf) } pub fn align_up(len: usize, alignment: u32) -> usize { diff --git a/piet-wgsl/src/shaders.rs b/piet-wgsl/src/shaders.rs index 939c58e..f918d96 100644 --- a/piet-wgsl/src/shaders.rs +++ b/piet-wgsl/src/shaders.rs @@ -223,8 +223,12 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result Scene { ]; let brush = Brush::Solid(Color::rgb8(0x80, 0x80, 0x80)); builder.fill(Fill::NonZero, Affine::IDENTITY, &brush, None, &path); + let transform = Affine::translate(10.0, 200.0); + /* + let path = [ + PathElement::MoveTo(Point::new(100.0, 300.0)), + PathElement::LineTo(Point::new(500.0, 320.0)), + PathElement::LineTo(Point::new(300.0, 350.0)), + PathElement::LineTo(Point::new(200.0, 460.0)), + PathElement::LineTo(Point::new(150.0, 410.0)), + PathElement::Close, + ]; + */ + builder.fill(Fill::NonZero, transform, &brush, None, &path); scene } From 92d6b1188f4c7d7b688322e1e04ba0e082c78b15 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Fri, 4 Nov 2022 09:25:06 -0700 Subject: [PATCH 09/17] Fix color Get rgba order right in rendering, plus generate separated alpha for png. The latter is just for debugging, we won't generally use separated alpha. --- piet-wgsl/shader/fine.wgsl | 7 +++++-- piet-wgsl/src/test_scene.rs | 15 +++------------ 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/piet-wgsl/shader/fine.wgsl b/piet-wgsl/shader/fine.wgsl index 4431848..ad3772c 100644 --- a/piet-wgsl/shader/fine.wgsl +++ b/piet-wgsl/shader/fine.wgsl @@ -142,7 +142,7 @@ fn main( // CMD_COLOR case 5u: { let color = read_color(cmd_ix); - let fg = unpack4x8unorm(color.rgba_color); + let fg = unpack4x8unorm(color.rgba_color).wzyx; for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { let fg_i = fg * area[i]; rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i; @@ -158,7 +158,10 @@ fn main( } let out_ix = global_id.y * (config.width_in_tiles * TILE_WIDTH) + global_id.x * PIXELS_PER_THREAD; for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { - let bytes = pack4x8unorm(rgba[i]); + let fg = rgba[i]; + let a_inv = 1.0 / (fg.a + 1e-6); + let rgba_sep = vec4(fg.r * a_inv, fg.g * a_inv, fg.b * a_inv, fg.a); + let bytes = pack4x8unorm(rgba_sep); output[out_ix + i] = bytes; } #else diff --git a/piet-wgsl/src/test_scene.rs b/piet-wgsl/src/test_scene.rs index 1029c59..c0c9390 100644 --- a/piet-wgsl/src/test_scene.rs +++ b/piet-wgsl/src/test_scene.rs @@ -27,19 +27,10 @@ pub fn gen_test_scene() -> Scene { PathElement::LineTo(Point::new(150.0, 210.0)), PathElement::Close, ]; - let brush = Brush::Solid(Color::rgb8(0x80, 0x80, 0x80)); + let brush = Brush::Solid(Color::rgb8(0x40, 0x40, 0xff)); builder.fill(Fill::NonZero, Affine::IDENTITY, &brush, None, &path); - let transform = Affine::translate(10.0, 200.0); - /* - let path = [ - PathElement::MoveTo(Point::new(100.0, 300.0)), - PathElement::LineTo(Point::new(500.0, 320.0)), - PathElement::LineTo(Point::new(300.0, 350.0)), - PathElement::LineTo(Point::new(200.0, 460.0)), - PathElement::LineTo(Point::new(150.0, 410.0)), - PathElement::Close, - ]; - */ + let transform = Affine::translate(50.0, 50.0); + let brush = Brush::Solid(Color::rgba8(0xff, 0xff, 0x00, 0x80)); builder.fill(Fill::NonZero, transform, &brush, None, &path); scene } From 7ae5aa7491c480ddd31a9e909c908a67f992e9f3 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Fri, 4 Nov 2022 12:40:54 -0700 Subject: [PATCH 10/17] Mostly working strokes The fat line in coarse path rendering is not done, but when lines are thin that mostly looks ok. Onward to tiger! --- piet-wgsl/shader/coarse.wgsl | 34 +++++++++++++++++--------- piet-wgsl/shader/fine.wgsl | 38 +++++++++++++++++++++++++++++ piet-wgsl/shader/pathseg.wgsl | 11 ++++++++- piet-wgsl/shader/shared/config.wgsl | 1 + piet-wgsl/shader/shared/ptcl.wgsl | 6 +++++ piet-wgsl/src/render.rs | 5 ++++ piet-wgsl/src/shaders.rs | 1 + piet-wgsl/src/test_scene.rs | 15 +++++++++++- 8 files changed, 97 insertions(+), 14 deletions(-) diff --git a/piet-wgsl/shader/coarse.wgsl b/piet-wgsl/shader/coarse.wgsl index 7a2ecf9..642ce73 100644 --- a/piet-wgsl/shader/coarse.wgsl +++ b/piet-wgsl/shader/coarse.wgsl @@ -50,9 +50,12 @@ var paths: array; var tiles: array; @group(0) @binding(7) -var bump: BumpAllocators; +var info: array; @group(0) @binding(8) +var bump: BumpAllocators; + +@group(0) @binding(9) var ptcl: array; @@ -95,15 +98,23 @@ fn write_path(tile: Tile, linewidth: f32) { // TODO: take flags // TODO: handle stroke alloc_cmd(3u); - if tile.segments != 0u { - let fill = CmdFill(tile.segments, tile.backdrop); - ptcl[cmd_offset] = CMD_FILL; - ptcl[cmd_offset + 1u] = fill.tile; - ptcl[cmd_offset + 2u] = u32(fill.backdrop); - cmd_offset += 3u; + if linewidth < 0.0 { + if tile.segments != 0u { + let fill = CmdFill(tile.segments, tile.backdrop); + ptcl[cmd_offset] = CMD_FILL; + ptcl[cmd_offset + 1u] = fill.tile; + ptcl[cmd_offset + 2u] = u32(fill.backdrop); + cmd_offset += 3u; + } else { + ptcl[cmd_offset] = CMD_SOLID; + cmd_offset += 1u; + } } else { - ptcl[cmd_offset] = CMD_SOLID; - cmd_offset += 1u; + let stroke = CmdStroke(tile.segments, 0.5 * linewidth); + ptcl[cmd_offset] = CMD_STROKE; + ptcl[cmd_offset + 1u] = stroke.tile; + ptcl[cmd_offset + 2u] = bitcast(stroke.half_width); + cmd_offset += 3u; } } @@ -287,15 +298,14 @@ fn main( let drawtag = scene[config.drawtag_base + drawobj_ix]; let dm = draw_monoids[drawobj_ix]; let dd = config.drawdata_base + dm.scene_offset; - // TODO: set up draw info from monoid + let di = dm.info_offset; if clip_zero_depth == 0u { let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * tile_y + tile_x; let tile = tiles[tile_ix]; switch drawtag { // DRAWTAG_FILL_COLOR case 0x44u: { - // TODO: get linewidth from draw object - let linewidth = -1.0; + let linewidth = bitcast(info[di]); let rgba_color = scene[dd]; write_path(tile, linewidth); write_color(CmdColor(rgba_color)); diff --git a/piet-wgsl/shader/fine.wgsl b/piet-wgsl/shader/fine.wgsl index ad3772c..c347630 100644 --- a/piet-wgsl/shader/fine.wgsl +++ b/piet-wgsl/shader/fine.wgsl @@ -51,6 +51,12 @@ fn read_fill(cmd_ix: u32) -> CmdFill { return CmdFill(tile, backdrop); } +fn read_stroke(cmd_ix: u32) -> CmdStroke { + let tile = ptcl[cmd_ix + 1u]; + let half_width = bitcast(ptcl[cmd_ix + 2u]); + return CmdStroke(tile, half_width); +} + fn read_color(cmd_ix: u32) -> CmdColor { let rgba_color = ptcl[cmd_ix + 1u]; return CmdColor(rgba_color); @@ -105,6 +111,32 @@ fn fill_path(tile: Tile, xy: vec2) -> array { return area; } +fn stroke_path(seg: u32, half_width: f32, xy: vec2) -> array { + var df: array; + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + df[i] = 1e9; + } + var segment_ix = seg; + while segment_ix != 0u { + let segment = segments[segment_ix]; + let delta = segment.delta; + let dpos0 = xy + vec2(0.5, 0.5) - segment.origin; + let scale = 1.0 / dot(delta, delta); + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + let dpos = vec2(dpos0.x + f32(i), dpos0.y); + let t = clamp(dot(dpos, delta) * scale, 0.0, 1.0); + // performance idea: hoist sqrt out of loop + df[i] = min(df[i], length(delta * t - dpos)); + } + segment_ix = segment.next; + } + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + // reuse array; return alpha rather than distance + df[i] = clamp(half_width + 0.5 - df[i], 0.0, 1.0); + } + return df; +} + @compute @workgroup_size(4, 16) fn main( @builtin(global_invocation_id) global_id: vec3, @@ -132,6 +164,12 @@ fn main( area = fill_path(tile, xy); cmd_ix += 3u; } + // CMD_STROKE + case 2u: { + let stroke = read_stroke(cmd_ix); + area = stroke_path(stroke.tile, stroke.half_width, xy); + cmd_ix += 3u; + } // CMD_SOLID case 3u: { for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { diff --git a/piet-wgsl/shader/pathseg.wgsl b/piet-wgsl/shader/pathseg.wgsl index d2c6a6a..b4f83e1 100644 --- a/piet-wgsl/shader/pathseg.wgsl +++ b/piet-wgsl/shader/pathseg.wgsl @@ -143,8 +143,10 @@ fn main( var tag_byte = (tag_word >> shift) & 0xffu; let out = &path_bboxes[tm.path_ix]; + var linewidth: f32; if (tag_byte & PATH_TAG_PATH) != 0u { - (*out).linewidth = -1.0; // TODO: plumb linewidth + linewidth = bitcast(scene[config.linewidth_base + tm.linewidth_ix]); + (*out).linewidth = linewidth; (*out).trans_ix = tm.trans_ix; } // Decode path data @@ -195,6 +197,13 @@ fn main( p1 = mix(p1, p0, 1.0 / 3.0); } } + if linewidth >= 0.0 { + // See https://www.iquilezles.org/www/articles/ellipses/ellipses.htm + // This is the correct bounding box, but we're not handling rendering + // in the isotropic case, so it may mismatch. + let stroke = 0.5 * linewidth * vec2(length(transform.matrx.xz), length(transform.matrx.yw)); + bbox += vec4(-stroke, stroke); + } cubics[global_id.x] = Cubic(p0, p1, p2, p3, tm.path_ix, 0u); // Update bounding box using atomics only. Computing a monoid is a // potential future optimization. diff --git a/piet-wgsl/shader/shared/config.wgsl b/piet-wgsl/shader/shared/config.wgsl index b43f35b..73f4054 100644 --- a/piet-wgsl/shader/shared/config.wgsl +++ b/piet-wgsl/shader/shared/config.wgsl @@ -30,6 +30,7 @@ struct Config { drawdata_base: u32, transform_base: u32, + linewidth_base: u32, } // Geometry of tiles and bins diff --git a/piet-wgsl/shader/shared/ptcl.wgsl b/piet-wgsl/shader/shared/ptcl.wgsl index 7121f84..8d6e869 100644 --- a/piet-wgsl/shader/shared/ptcl.wgsl +++ b/piet-wgsl/shader/shared/ptcl.wgsl @@ -25,6 +25,7 @@ let PTCL_HEADROOM = 2u; // Tags for PTCL commands let CMD_END = 0u; let CMD_FILL = 1u; +let CMD_STROKE = 2u; let CMD_SOLID = 3u; let CMD_COLOR = 5u; let CMD_JUMP = 11u; @@ -37,6 +38,11 @@ struct CmdFill { backdrop: i32, } +struct CmdStroke { + tile: u32, + half_width: f32, +} + struct CmdJump { new_ix: u32, } diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index 9202ef0..04f4232 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -31,6 +31,7 @@ struct Config { drawtag_base: u32, drawdata_base: u32, transform_base: u32, + linewidth_base: u32, } #[repr(C)] @@ -137,6 +138,8 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy scene.extend(&data.drawdata_stream); let transform_base = size_to_words(scene.len()); scene.extend(bytemuck::cast_slice(&data.transform_stream)); + let linewidth_base = size_to_words(scene.len()); + scene.extend(bytemuck::cast_slice(&data.linewidth_stream)); let n_path = data.n_path; // TODO: calculate for real when we do rectangles @@ -151,6 +154,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy drawtag_base, drawdata_base, transform_base, + linewidth_base, }; println!("{:?}", config); let scene_buf = recording.upload(scene); @@ -284,6 +288,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy bin_data_buf, path_buf, tile_buf, + info_buf, bump_buf, ptcl_buf, ], diff --git a/piet-wgsl/src/shaders.rs b/piet-wgsl/src/shaders.rs index f918d96..b659170 100644 --- a/piet-wgsl/src/shaders.rs +++ b/piet-wgsl/src/shaders.rs @@ -241,6 +241,7 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result Scene { let mut scene = Scene::default(); @@ -32,6 +32,19 @@ pub fn gen_test_scene() -> Scene { let transform = Affine::translate(50.0, 50.0); let brush = Brush::Solid(Color::rgba8(0xff, 0xff, 0x00, 0x80)); builder.fill(Fill::NonZero, transform, &brush, None, &path); + let transform = Affine::translate(100.0, 100.0); + let style = Stroke { + width: 1.0, + join: piet_scene::Join::Round, + miter_limit: 1.4, + start_cap: piet_scene::Cap::Round, + end_cap: piet_scene::Cap::Round, + dash_pattern: [], + dash_offset: 0.0, + scale: true, + }; + let brush = Brush::Solid(Color::rgb8(0xa0, 0x00, 0x00)); + builder.stroke(&style, transform, &brush, None, &path); scene } From 17a74fb370f782a89386e758727e50e2d1a0a176 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Fri, 4 Nov 2022 13:15:05 -0700 Subject: [PATCH 11/17] Almost rendering tiger We cut'n'pasted the picosvg stuff, kinda ugly. It renders a number of paths of the tiger. I think the gap might be in prefix sums. --- Cargo.lock | 2 + piet-wgsl/Cargo.toml | 6 +- piet-wgsl/shader/coarse.wgsl | 5 +- piet-wgsl/src/main.rs | 1 + piet-wgsl/src/pico_svg.rs | 140 +++++++++++++++++++++++++++++++++++ piet-wgsl/src/render.rs | 6 +- piet-wgsl/src/test_scene.rs | 102 ++++++++++++++++++------- 7 files changed, 231 insertions(+), 31 deletions(-) create mode 100644 piet-wgsl/src/pico_svg.rs diff --git a/Cargo.lock b/Cargo.lock index f2669e2..d82ba57 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1174,10 +1174,12 @@ dependencies = [ "bytemuck", "env_logger", "futures-intrusive", + "kurbo 0.8.3", "parking_lot", "piet-scene", "png", "pollster", + "roxmltree", "wgpu", ] diff --git a/piet-wgsl/Cargo.toml b/piet-wgsl/Cargo.toml index e7fa42d..b785eac 100644 --- a/piet-wgsl/Cargo.toml +++ b/piet-wgsl/Cargo.toml @@ -14,4 +14,8 @@ parking_lot = "0.12" bytemuck = { version = "1.12.1", features = ["derive"] } png = "0.17.6" -piet-scene = { path = "../piet-scene" } +piet-scene = { path = "../piet-scene", features = ["kurbo"] } + +# for picosvg, should be split out +roxmltree = "0.13" +kurbo = "0.8.3" diff --git a/piet-wgsl/shader/coarse.wgsl b/piet-wgsl/shader/coarse.wgsl index 642ce73..5943e51 100644 --- a/piet-wgsl/shader/coarse.wgsl +++ b/piet-wgsl/shader/coarse.wgsl @@ -85,7 +85,10 @@ var cmd_limit: u32; // Make sure there is space for a command of given size, plus a jump if needed fn alloc_cmd(size: u32) { if cmd_offset + size >= cmd_limit { - let new_cmd = atomicAdd(&bump.ptcl, PTCL_INCREMENT); + // We might be able to save a little bit of computation here + // by setting the initial value of the bump allocator. + let ptcl_dyn_start = config.width_in_tiles * config.height_in_tiles * PTCL_INITIAL_ALLOC; + let new_cmd = ptcl_dyn_start + atomicAdd(&bump.ptcl, PTCL_INCREMENT); // TODO: robust memory ptcl[cmd_offset] = CMD_JUMP; ptcl[cmd_offset + 1u] = new_cmd; diff --git a/piet-wgsl/src/main.rs b/piet-wgsl/src/main.rs index d8d53ad..8b00a26 100644 --- a/piet-wgsl/src/main.rs +++ b/piet-wgsl/src/main.rs @@ -24,6 +24,7 @@ use test_scene::dump_scene_info; use wgpu::{Device, Limits, Queue}; mod engine; +mod pico_svg; mod render; mod shaders; mod test_scene; diff --git a/piet-wgsl/src/pico_svg.rs b/piet-wgsl/src/pico_svg.rs new file mode 100644 index 0000000..eebe3ec --- /dev/null +++ b/piet-wgsl/src/pico_svg.rs @@ -0,0 +1,140 @@ +//! A loader for a tiny fragment of SVG + +use std::str::FromStr; + +use roxmltree::{Document, Node}; + +use kurbo::{Affine, BezPath}; + +use piet_scene::Color; + +pub struct PicoSvg { + pub items: Vec, +} + +pub enum Item { + Fill(FillItem), + Stroke(StrokeItem), +} + +pub struct StrokeItem { + pub width: f64, + pub color: Color, + pub path: BezPath, +} + +pub struct FillItem { + pub color: Color, + pub path: BezPath, +} + +struct Parser<'a> { + scale: f64, + items: &'a mut Vec, +} + +impl PicoSvg { + pub fn load(xml_string: &str, scale: f64) -> Result> { + let doc = Document::parse(xml_string)?; + let root = doc.root_element(); + let mut items = Vec::new(); + let mut parser = Parser::new(&mut items, scale); + for node in root.children() { + parser.rec_parse(node)?; + } + Ok(PicoSvg { items }) + } +} + +impl<'a> Parser<'a> { + fn new(items: &'a mut Vec, scale: f64) -> Parser<'a> { + Parser { scale, items } + } + + fn rec_parse(&mut self, node: Node) -> Result<(), Box> { + let transform = if self.scale >= 0.0 { + Affine::scale(self.scale) + } else { + Affine::new([-self.scale, 0.0, 0.0, self.scale, 0.0, 1536.0]) + }; + if node.is_element() { + match node.tag_name().name() { + "g" => { + for child in node.children() { + self.rec_parse(child)?; + } + } + "path" => { + let d = node.attribute("d").ok_or("missing 'd' attribute")?; + let bp = BezPath::from_svg(d)?; + let path = transform * bp; + // TODO: default fill color is black, but this is overridden in tiger to this logic. + if let Some(fill_color) = node.attribute("fill") { + if fill_color != "none" { + let color = parse_color(fill_color); + let color = modify_opacity(color, "fill-opacity", node); + self.items.push(Item::Fill(FillItem { + color, + path: path.clone(), + })); + } + } + if let Some(stroke_color) = node.attribute("stroke") { + if stroke_color != "none" { + let width = self.scale.abs() + * f64::from_str( + node.attribute("stroke-width").ok_or("missing width")?, + )?; + let color = parse_color(stroke_color); + let color = modify_opacity(color, "stroke-opacity", node); + self.items + .push(Item::Stroke(StrokeItem { width, color, path })); + } + } + } + _ => (), + } + } + Ok(()) + } +} + +fn parse_color(color: &str) -> Color { + if color.as_bytes()[0] == b'#' { + let mut hex = u32::from_str_radix(&color[1..], 16).unwrap(); + if color.len() == 4 { + hex = (hex >> 8) * 0x110000 + ((hex >> 4) & 0xf) * 0x1100 + (hex & 0xf) * 0x11; + } + let rgba = (hex << 8) + 0xff; + let (r, g, b, a) = ( + (rgba >> 24 & 255) as u8, + ((rgba >> 16) & 255) as u8, + ((rgba >> 8) & 255) as u8, + (rgba & 255) as u8, + ); + Color::rgba8(r, g, b, a) + } else if color.starts_with("rgb(") { + let mut iter = color[4..color.len() - 1].split(','); + let r = u8::from_str(iter.next().unwrap()).unwrap(); + let g = u8::from_str(iter.next().unwrap()).unwrap(); + let b = u8::from_str(iter.next().unwrap()).unwrap(); + Color::rgb8(r, g, b) + } else { + Color::rgba8(255, 0, 255, 0x80) + } +} + +fn modify_opacity(mut color: Color, attr_name: &str, node: Node) -> Color { + if let Some(opacity) = node.attribute(attr_name) { + let alpha = if opacity.ends_with("%") { + let pctg = opacity[..opacity.len() - 1].parse().unwrap_or(100.0); + pctg * 0.01 + } else { + opacity.parse().unwrap_or(1.0) + } as f64; + color.a = (alpha.min(1.0).max(0.0) * 255.0).round() as u8; + color + } else { + color + } +} diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index 04f4232..99c068e 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -220,7 +220,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy let bump_buf = BufProxy::new(BUMP_SIZE); // Not actually used yet. let clip_bbox_buf = BufProxy::new(1024); - let bin_data_buf = BufProxy::new(1 << 16); + let bin_data_buf = BufProxy::new(1 << 20); let width_in_bins = (config.width_in_tiles + 15) / 16; let height_in_bins = (config.height_in_tiles + 15) / 16; let n_bins = width_in_bins * height_in_bins; @@ -256,7 +256,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy ], ); - let segments_buf = BufProxy::new(1 << 20); + let segments_buf = BufProxy::new(1 << 24); recording.dispatch( shaders.path_coarse, (path_coarse_wgs, 1, 1), @@ -276,7 +276,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy (path_wgs, 1, 1), [config_buf, path_buf, tile_buf], ); - let ptcl_buf = BufProxy::new(1 << 20); + let ptcl_buf = BufProxy::new(1 << 24); recording.dispatch( shaders.coarse, (width_in_bins, height_in_bins, 1), diff --git a/piet-wgsl/src/test_scene.rs b/piet-wgsl/src/test_scene.rs index 2d28659..ab7d3e3 100644 --- a/piet-wgsl/src/test_scene.rs +++ b/piet-wgsl/src/test_scene.rs @@ -14,37 +14,37 @@ // // Also licensed under MIT license, at your choice. +use kurbo::BezPath; use piet_scene::{Affine, Brush, Color, Fill, PathElement, Point, Scene, SceneBuilder, Stroke}; +use crate::pico_svg::PicoSvg; + pub fn gen_test_scene() -> Scene { let mut scene = Scene::default(); let mut builder = SceneBuilder::for_scene(&mut scene); - let path = [ - PathElement::MoveTo(Point::new(100.0, 100.0)), - PathElement::LineTo(Point::new(500.0, 120.0)), - PathElement::LineTo(Point::new(300.0, 150.0)), - PathElement::LineTo(Point::new(200.0, 260.0)), - PathElement::LineTo(Point::new(150.0, 210.0)), - PathElement::Close, - ]; - let brush = Brush::Solid(Color::rgb8(0x40, 0x40, 0xff)); - builder.fill(Fill::NonZero, Affine::IDENTITY, &brush, None, &path); - let transform = Affine::translate(50.0, 50.0); - let brush = Brush::Solid(Color::rgba8(0xff, 0xff, 0x00, 0x80)); - builder.fill(Fill::NonZero, transform, &brush, None, &path); - let transform = Affine::translate(100.0, 100.0); - let style = Stroke { - width: 1.0, - join: piet_scene::Join::Round, - miter_limit: 1.4, - start_cap: piet_scene::Cap::Round, - end_cap: piet_scene::Cap::Round, - dash_pattern: [], - dash_offset: 0.0, - scale: true, - }; - let brush = Brush::Solid(Color::rgb8(0xa0, 0x00, 0x00)); - builder.stroke(&style, transform, &brush, None, &path); + if false { + let path = [ + PathElement::MoveTo(Point::new(100.0, 100.0)), + PathElement::LineTo(Point::new(500.0, 120.0)), + PathElement::LineTo(Point::new(300.0, 150.0)), + PathElement::LineTo(Point::new(200.0, 260.0)), + PathElement::LineTo(Point::new(150.0, 210.0)), + PathElement::Close, + ]; + let brush = Brush::Solid(Color::rgb8(0x40, 0x40, 0xff)); + builder.fill(Fill::NonZero, Affine::IDENTITY, &brush, None, &path); + let transform = Affine::translate(50.0, 50.0); + let brush = Brush::Solid(Color::rgba8(0xff, 0xff, 0x00, 0x80)); + builder.fill(Fill::NonZero, transform, &brush, None, &path); + let transform = Affine::translate(100.0, 100.0); + let style = simple_stroke(1.0); + let brush = Brush::Solid(Color::rgb8(0xa0, 0x00, 0x00)); + builder.stroke(&style, transform, &brush, None, &path); + } else { + let xml_str = std::str::from_utf8(include_bytes!("../../piet-gpu/Ghostscript_Tiger.svg")).unwrap(); + let svg = PicoSvg::load(xml_str, 6.0).unwrap(); + render_svg(&mut builder, &svg, false); + } scene } @@ -56,3 +56,53 @@ pub fn dump_scene_info(scene: &Scene) { bytemuck::cast_slice::(&data.pathseg_stream) ); } + +pub fn render_svg(sb: &mut SceneBuilder, svg: &PicoSvg, print_stats: bool) { + use crate::pico_svg::*; + let start = std::time::Instant::now(); + for item in svg.items.iter() { + match item { + Item::Fill(fill) => { + sb.fill( + Fill::NonZero, + Affine::IDENTITY, + &fill.color.into(), + None, + convert_bez_path(&fill.path), + ); + } + Item::Stroke(stroke) => { + sb.stroke( + &simple_stroke(stroke.width as f32), + Affine::IDENTITY, + &stroke.color.into(), + None, + convert_bez_path(&stroke.path), + ); + } + } + } + if print_stats { + println!("flattening and encoding time: {:?}", start.elapsed()); + } +} + +fn convert_bez_path<'a>(path: &'a BezPath) -> impl Iterator + 'a + Clone { + path.elements() + .iter() + .map(|el| PathElement::from_kurbo(*el)) +} + + +fn simple_stroke(width: f32) -> Stroke<[f32; 0]> { + Stroke { + width, + join: piet_scene::Join::Round, + miter_limit: 1.4, + start_cap: piet_scene::Cap::Round, + end_cap: piet_scene::Cap::Round, + dash_pattern: [], + dash_offset: 0.0, + scale: true, + } +} From 494f523c4176ea288ae14afdacd00ce40e50ab68 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Fri, 4 Nov 2022 21:41:37 -0700 Subject: [PATCH 12/17] Tiger! Still one flaw, fat lines aren't expanded with strokes in path coarse rasterization. But that's a small visual ding, and can be fixed That said, there is some really strange stuff going on in tile_alloc. It's using storage to do a uniform broadcast (the result of bump allocation for the workgroup), which is not great at all. It should be using workgroup storage, but on my mac it behaves as if the workgroup barrier is not in place. Investigating. --- piet-wgsl/shader/coarse.wgsl | 1 - piet-wgsl/shader/draw_leaf.wgsl | 28 +++++++++++++++++++++------- piet-wgsl/shader/pathtag_scan.wgsl | 8 ++++---- piet-wgsl/shader/tile_alloc.wgsl | 14 +++++++------- piet-wgsl/src/main.rs | 4 ++-- piet-wgsl/src/render.rs | 7 +++---- piet-wgsl/src/test_scene.rs | 1 + 7 files changed, 38 insertions(+), 25 deletions(-) diff --git a/piet-wgsl/shader/coarse.wgsl b/piet-wgsl/shader/coarse.wgsl index 5943e51..e1e88d7 100644 --- a/piet-wgsl/shader/coarse.wgsl +++ b/piet-wgsl/shader/coarse.wgsl @@ -99,7 +99,6 @@ fn alloc_cmd(size: u32) { fn write_path(tile: Tile, linewidth: f32) { // TODO: take flags - // TODO: handle stroke alloc_cmd(3u); if linewidth < 0.0 { if tile.segments != 0u { diff --git a/piet-wgsl/shader/draw_leaf.wgsl b/piet-wgsl/shader/draw_leaf.wgsl index 14a1163..5909fdd 100644 --- a/piet-wgsl/shader/draw_leaf.wgsl +++ b/piet-wgsl/shader/draw_leaf.wgsl @@ -68,8 +68,28 @@ fn main( @builtin(workgroup_id) wg_id: vec3, ) { let ix = global_id.x; + // Reduce prefix of workgroups up to this one + var agg = draw_monoid_identity(); + if local_id.x < wg_id.x { + agg = reduced[local_id.x]; + } + sh_scratch[local_id.x] = agg; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + workgroupBarrier(); + if local_id.x + (1u << i) < WG_SIZE { + let other = sh_scratch[local_id.x + (1u << i)]; + agg = combine_draw_monoid(agg, other); + } + workgroupBarrier(); + sh_scratch[local_id.x] = agg; + } + // Two barriers can be eliminated if we use separate shared arrays + // for prefix and intra-workgroup prefix sum. + workgroupBarrier(); + var m = sh_scratch[0]; + workgroupBarrier(); let tag_word = scene[config.drawtag_base + ix]; - var agg = map_draw_tag(tag_word); + agg = map_draw_tag(tag_word); sh_scratch[local_id.x] = agg; for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { workgroupBarrier(); @@ -81,12 +101,6 @@ fn main( sh_scratch[local_id.x] = agg; } workgroupBarrier(); - var m = draw_monoid_identity(); - if wg_id.x > 0u { - // TODO: separate dispatch to scan these, or integrate into this one? - // In the meantime, will be limited to 2 * WG draw objs. - m = reduced[wg_id.x - 1u]; - } if local_id.x > 0u { m = combine_draw_monoid(m, sh_scratch[local_id.x - 1u]); } diff --git a/piet-wgsl/shader/pathtag_scan.wgsl b/piet-wgsl/shader/pathtag_scan.wgsl index d18d872..fe87750 100644 --- a/piet-wgsl/shader/pathtag_scan.wgsl +++ b/piet-wgsl/shader/pathtag_scan.wgsl @@ -43,13 +43,13 @@ fn main( @builtin(workgroup_id) wg_id: vec3, ) { var agg = tag_monoid_identity(); - if (local_id.x < wg_id.x) { + if local_id.x < wg_id.x { agg = reduced[local_id.x]; } sh_parent[local_id.x] = agg; for (var i = 0u; i < LG_WG_SIZE; i += 1u) { workgroupBarrier(); - if (local_id.x + (1u << i) < WG_SIZE) { + if local_id.x + (1u << i) < WG_SIZE { let other = sh_parent[local_id.x + (1u << i)]; agg = combine_tag_monoid(agg, other); } @@ -63,7 +63,7 @@ fn main( sh_monoid[local_id.x] = agg; for (var i = 0u; i < LG_WG_SIZE; i += 1u) { workgroupBarrier(); - if (local_id.x >= 1u << i) { + if local_id.x >= 1u << i { let other = sh_monoid[local_id.x - (1u << i)]; agg = combine_tag_monoid(other, agg); } @@ -72,7 +72,7 @@ fn main( } // prefix up to this workgroup var tm = sh_parent[0]; - if (local_id.x > 0u) { + if local_id.x > 0u { tm = combine_tag_monoid(tm, sh_monoid[local_id.x - 1u]); } // exclusive prefix sum, granularity of 4 tag bytes diff --git a/piet-wgsl/shader/tile_alloc.wgsl b/piet-wgsl/shader/tile_alloc.wgsl index 1c27c83..56771d6 100644 --- a/piet-wgsl/shader/tile_alloc.wgsl +++ b/piet-wgsl/shader/tile_alloc.wgsl @@ -85,14 +85,14 @@ fn main( workgroupBarrier(); sh_tile_count[local_id.x] = total_tile_count; } - workgroupBarrier(); - // should be able to avoid a barrier by adding total_tile count from - // thread WG_SIZE - 1, but it doesn't work - if local_id.x == 0u { - sh_tile_offset = atomicAdd(&bump.tile, sh_tile_count[WG_SIZE - 1u]); + if local_id.x == WG_SIZE - 1u { + paths[drawobj_ix].tiles = atomicAdd(&bump.tile, sh_tile_count[WG_SIZE - 1u]); } - workgroupBarrier(); - let tile_offset = sh_tile_offset; + // Using storage barriers is a workaround for what appears to be a miscompilation + // when a normal workgroup-shared variable is used to broadcast the value. + storageBarrier(); + let tile_offset = paths[drawobj_ix | (WG_SIZE - 1u)].tiles; + storageBarrier(); if drawobj_ix < config.n_drawobj { let tile_subix = select(0u, sh_tile_count[local_id.x - 1u], local_id.x > 0u); let bbox = vec4(ux0, uy0, ux1, uy1); diff --git a/piet-wgsl/src/main.rs b/piet-wgsl/src/main.rs index 8b00a26..38645db 100644 --- a/piet-wgsl/src/main.rs +++ b/piet-wgsl/src/main.rs @@ -20,7 +20,6 @@ use std::{fs::File, io::BufWriter}; use engine::Engine; -use test_scene::dump_scene_info; use wgpu::{Device, Limits, Queue}; mod engine; @@ -70,10 +69,11 @@ async fn do_render( queue: &Queue, engine: &mut Engine, ) -> Result<(), Box> { + #[allow(unused)] let shaders = shaders::init_shaders(device, engine)?; let full_shaders = shaders::full_shaders(device, engine)?; let scene = test_scene::gen_test_scene(); - dump_scene_info(&scene); + //test_scene::dump_scene_info(&scene); //let (recording, buf) = render::render(&scene, &shaders); let (recording, buf) = render::render_full(&scene, &full_shaders); let downloads = engine.run_recording(&device, &queue, &recording)?; diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index 99c068e..690e681 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -86,7 +86,6 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) { [config_buf, scene_buf, reduced_buf, tagmonoid_buf], ); - let n_pathtag = data.pathseg_stream.len(); let path_coarse_wgs = (n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; // TODO: more principled size calc let tiles_buf = BufProxy::new(4097 * 8); @@ -125,7 +124,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy let data = scene.data(); let n_pathtag = data.tag_stream.len(); let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG); - let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize); + // TODO: can compute size accurately, avoid reallocation let mut scene: Vec = Vec::with_capacity(pathtag_padded); let pathtag_base = size_to_words(scene.len()); scene.extend(&data.tag_stream); @@ -160,6 +159,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy let scene_buf = recording.upload(scene); let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned()); + let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize); let reduced_buf = BufProxy::new(pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE); // TODO: really only need pathtag_wgs - 1 recording.dispatch( @@ -169,7 +169,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy ); let tagmonoid_buf = - BufProxy::new(pathtag_wgs as u64 * shaders::PATHTAG_REDUCE_WG as u64 * TAG_MONOID_SIZE); + BufProxy::new(pathtag_wgs as u64 * shaders::PATHTAG_REDUCE_WG as u64 * TAG_MONOID_FULL_SIZE); recording.dispatch( shaders.pathtag_scan, (pathtag_wgs as u32, 1, 1), @@ -182,7 +182,6 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy (drawobj_wgs, 1, 1), [config_buf, path_bbox_buf], ); - let n_pathtag = data.pathseg_stream.len(); let cubic_buf = BufProxy::new(n_pathtag as u64 * CUBIC_SIZE); let path_coarse_wgs = (n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; recording.dispatch( diff --git a/piet-wgsl/src/test_scene.rs b/piet-wgsl/src/test_scene.rs index ab7d3e3..4cfe1d4 100644 --- a/piet-wgsl/src/test_scene.rs +++ b/piet-wgsl/src/test_scene.rs @@ -48,6 +48,7 @@ pub fn gen_test_scene() -> Scene { scene } +#[allow(unused)] pub fn dump_scene_info(scene: &Scene) { let data = scene.data(); println!("tags {:?}", data.tag_stream); From 5bd3a3639f783e3fb677ec3cace14766aaa78006 Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Tue, 8 Nov 2022 19:30:47 -0800 Subject: [PATCH 13/17] Fix precedence issue Note that this is evidence in favor of https://github.com/gfx-rs/naga/issues/2098 - my code is actually wrong, and it was caught by trying to port it to run in Chrome Canary. --- piet-wgsl/shader/shared/pathtag.wgsl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/piet-wgsl/shader/shared/pathtag.wgsl b/piet-wgsl/shader/shared/pathtag.wgsl index fed16fd..8e46979 100644 --- a/piet-wgsl/shader/shared/pathtag.wgsl +++ b/piet-wgsl/shader/shared/pathtag.wgsl @@ -63,7 +63,7 @@ fn reduce_tag(tag_word: u32) -> TagMonoid { a += a >> 16u; c.pathseg_offset = a & 0xffu; #ifdef full - c.path_ix = countOneBits(tag_word & (PATH_TAG_PATH) * 0x1010101u); + c.path_ix = countOneBits(tag_word & (PATH_TAG_PATH * 0x1010101u)); c.linewidth_ix = countOneBits(tag_word & (PATH_TAG_LINEWIDTH * 0x1010101u)); #endif return c; From ef3ed3c9d73e6e220c27c8c59674e545230de68b Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Thu, 10 Nov 2022 19:48:36 -0800 Subject: [PATCH 14/17] Checkpoint of clip and gradient work This is a checkpoint of partly completed work. Much of the GPU side is done, very little of the CPU side. For clips, the clip_els bindings (binding 6 of draw_leaf) are not added. Clip logic is missing from coarse. The overflow buffer is missing from fine, as is its size calculation in coarse (but it should work as long as the max depth fits within BLEND_STACK_SPLIT). For gradients, the texture binding is missing (binding 6) is missing from fine, as is the infrastructure in engine to deal with texture resources, and of course porting over the logic to fill it. The code is not tested, bugs may lurk. --- .vscode/settings.json | 1 + piet-wgsl/shader/clip_leaf.wgsl | 195 ++++++++++++++++++++++++++++ piet-wgsl/shader/clip_reduce.wgsl | 66 ++++++++++ piet-wgsl/shader/coarse.wgsl | 48 ++++++- piet-wgsl/shader/fine.wgsl | 95 ++++++++++++++ piet-wgsl/shader/shared/clip.wgsl | 14 ++ piet-wgsl/shader/shared/config.wgsl | 1 + piet-wgsl/shader/shared/ptcl.wgsl | 20 +++ piet-wgsl/src/render.rs | 3 + 9 files changed, 441 insertions(+), 2 deletions(-) create mode 100644 piet-wgsl/shader/clip_leaf.wgsl create mode 100644 piet-wgsl/shader/clip_reduce.wgsl create mode 100644 piet-wgsl/shader/shared/clip.wgsl diff --git a/.vscode/settings.json b/.vscode/settings.json index 813fac6..b8fa532 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,6 +2,7 @@ "wgsl-analyzer.customImports": { "bbox": "${workspaceFolder}/piet-wgsl/shader/shared/bbox.wgsl", "bump": "${workspaceFolder}/piet-wgsl/shader/shared/bump.wgsl", + "clip": "${workspaceFolder}/piet-wgsl/shader/shared/clip.wgsl", "config": "${workspaceFolder}/piet-wgsl/shader/shared/config.wgsl", "cubic": "${workspaceFolder}/piet-wgsl/shader/shared/cubic.wgsl", "drawtag": "${workspaceFolder}/piet-wgsl/shader/shared/drawtag.wgsl", diff --git a/piet-wgsl/shader/clip_leaf.wgsl b/piet-wgsl/shader/clip_leaf.wgsl new file mode 100644 index 0000000..f294317 --- /dev/null +++ b/piet-wgsl/shader/clip_leaf.wgsl @@ -0,0 +1,195 @@ + +#import config +#import bbox +#import clip +#import drawtag + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var clip_inp: array; + +@group(0) @binding(2) +var path_bboxes: array; + +@group(0) @binding(3) +var reduced: array; + +@group(0) @binding(4) +var clip_els: array; + +@group(0) @binding(5) +var draw_monoids: array; + +@group(0) @binding(6) +var clip_bboxes: array>; + +let WG_SIZE = 256u; +var sh_bic: array; +var sh_stack: array; +var sh_stack_bbox: array, WG_SIZE>; +var sh_bbox: array, WG_SIZE>; +var sh_link: array; + +fn search_link(bic: ptr, ix: u32) -> i32 { + var j = 0u; + while j < firstTrailingBit(WG_SIZE) { + let base = 2u * WG_SIZE - (2u << (firstTrailingBit(WG_SIZE) - j)); + if ((ix >> j) & 1u) != 0u { + let test = bic_combine(sh_bic[base + (ix >> j) - 1u], *bic); + if test.b >= 0u { + *bic = test; + ix -= 1u << j; + } + } + j += 1u; + } + if ix > 0u { + while j > 0u { + j -= 1u; + let base = 2u * WG_SIZE - (2u << (firstTrailingBit(WG_SIZE) - j)); + let test = bic_combine(sh_bic[base + (ix >> j) - 1u], *bic); + if test.b == 0u { + *bic = test; + ix -= 1u << j; + } + } + } + if ix > 0u { + return i32(ix) - 1; + } else { + return i32(~(*bic).a); + } +} + +fn load_clip_inp(ix: u32) -> i32 { + if ix < config.n_clip { + return clip_inp[ix]; + } else { + return i32(0x80000000); + } +} + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) wg_id: vec3, +) { + var bic: Bic; + if local_id.x < wg_id.x { + bic = reduced[local_id.x]; + } + sh_bic[local_id.x] = bic; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + workgroupBarrier(); + if local_id.x + (1u << i) < WG_SIZE { + let other = sh_bic[local_id.x + (1u << i)]; + bic = bic_combine(bic, other); + } + workgroupBarrier(); + sh_bic[local_id.x] = bic; + } + workgroupBarrier(); + let stack_size = sh_bic[0].b; + // TODO: if stack depth > WG_SIZE desired, scan here + + // binary search in stack + let sp = WG_SIZE - 1u - local_id.x; + var ix = 0u; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + let probe = ix + ((WG_SIZE / 2u) >> i); + if sp < sh_bic[probe].b { + ix = probe; + } + } + let b = sh_bic[ix].b; + var bbox = vec4(-1e9, -1e9, 1e9, 1e9); + if sp < b { + let el = clip_els[ix * WG_SIZE + b - sp - 1u]; + sh_stack[local_id.x] = el.parent_ix; + bbox = el.bbox; + } + // forward scan of bbox values of prefix stack + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + sh_stack_bbox[local_id.x] = bbox; + workgroupBarrier(); + if local_id.x >= (1u << i) { + bbox = bbox_intersect(sh_stack_bbox[local_id.x - (1u << i)], bbox); + } + workgroupBarrier(); + } + sh_stack_bbox[local_id.x] = bbox; + + // Read input and compute Bic binary tree + let inp = load_clip_inp(global_id.x); + let is_push = inp >= 0; + var bic = Bic(1u - u32(is_push), u32(is_push)); + sh_bic[local_id.x] = bic; + if is_push { + let path_bbox = path_bboxes[inp]; + bbox = vec4(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1); + } else { + bbox = vec4(-1e9, -1e9, 1e9, 1e9); + } + var inbase = 0u; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + let outbase = 2u * WG_SIZE - (1u << (firstTrailingBit(WG_SIZE) - i)); + workgroupBarrier(); + if local_id.x < 1u << (firstTrailingBit(WG_SIZE) - 1u - i) { + let in_off = inbase + local_id.x * 2u; + sh_bic[outbase + local_id.x] = bic_combine(sh_bic[in_off], sh_bic[in_off + 1u]); + } + inbase = outbase; + } + workgroupBarrier(); + // search for predecessor node + bic = Bic(); + var link = search_link(&bic, local_id.x); + sh_link[local_id.x] = link; + workgroupBarrier(); + let grandparent = select(link - 1, sh_link[link], link >= 0); + var parent: i32; + if link >= 0 { + parent = i32(wg_id.x * WG_SIZE) + link; + } else if link + i32(stack_size) >= 0 { + parent = i32(sh_stack[i32(WG_SIZE) + link]); + } else { + parent = -1; + } + // bbox scan (intersect) across parent links + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + if i != 0u { + sh_link[local_id.x] = link; + } + sh_bbox[local_id.x] = bbox; + workgroupBarrier(); + if link >= 0 { + bbox = bbox_intersect(sh_bbox[link], bbox); + link = sh_link[link]; + } + workgroupBarrier(); + } + if link + i32(stack_size) >= 0 { + bbox = bbox_intersect(sh_stack_bbox[i32(WG_SIZE) + link], bbox); + } + // At this point, bbox is the intersection of bboxes on the path to the root + sh_bbox[local_id.x] = bbox; + workgroupBarrier(); + + if !is_push && global_id.x < config.n_clip { + // Fix up drawmonoid so path_ix of EndClip matches BeginClip + let path_ix = clip_inp[parent]; + draw_monoids[~inp].path_ix = u32(path_ix); + + if grandparent >= 0 { + bbox = sh_bbox[grandparent]; + } else if grandparent + i32(stack_size) >= 0 { + bbox = sh_stack_bbox[i32(WG_SIZE) + grandparent]; + } else { + bbox = vec4(-1e9, -1e9, 1e9, 1e9); + } + } + clip_bboxes[global_id.x] = bbox +} diff --git a/piet-wgsl/shader/clip_reduce.wgsl b/piet-wgsl/shader/clip_reduce.wgsl new file mode 100644 index 0000000..3288b07 --- /dev/null +++ b/piet-wgsl/shader/clip_reduce.wgsl @@ -0,0 +1,66 @@ + +#import config +#import bbox +#import clip + +@group(0) @binding(0) +var config: Config; + +@group(0) @binding(1) +var clip_inp: array; + +@group(0) @binding(2) +var path_bboxes: array; + +@group(0) @binding(3) +var reduced: array; + +@group(0) @binding(4) +var clip_out: array; + +let WG_SIZE = 256u; +var sh_bic: array; +var sh_parent: array; +var sh_path_ix: array; + +@compute @workgroup_size(256) +fn main( + @builtin(global_invocation_id) global_id: vec3, + @builtin(local_invocation_id) local_id: vec3, + @builtin(workgroup_id) wg_id: vec3, +) { + let inp = clip_inp[global_id.x]; + let is_push = inp >= 0; + var bic = Bic(1u - u32(is_push), u32(is_push)); + // reverse scan of bicyclic semigroup + sh_bic[local_id.x] = bic; + for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + workgroupBarrier(); + if local_id.x + (1u << i) < WG_SIZE { + let other = sh_bic[local_id.x + (1u << i)]; + bic = bic_combine(bic, other); + } + workgroupBarrier(); + sh_bic[local_id.x] = bic; + } + if local_id.x == 0u { + reduced[wg_id.x] = bic; + } + workgroupBarrier(); + let size = sh_bic[0].b; + bic = Bic(); + if is_push && bic.a == 0u { + let local_ix = size - bic.b - 1u; + sh_parent[local_ix] = local_id.x; + sh_path_ix[local_ix] = u32(inp); + } + workgroupBarrier(); + // TODO: possibly do forward scan here if depth can exceed wg size + if local_id.x < size { + let path_ix = sh_path_ix[local_id.x]; + let path_bbox = path_bboxes[path_ix]; + let parent_ix = sh_parent[local_id.x] + wg_id.x * WG_SIZE; + let bbox = vec4(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1); + clip_out[global_id.x] = ClipEl(parent_ix, bbox); + } +} diff --git a/piet-wgsl/shader/coarse.wgsl b/piet-wgsl/shader/coarse.wgsl index e1e88d7..5741ec3 100644 --- a/piet-wgsl/shader/coarse.wgsl +++ b/piet-wgsl/shader/coarse.wgsl @@ -125,7 +125,27 @@ fn write_color(color: CmdColor) { ptcl[cmd_offset] = CMD_COLOR; ptcl[cmd_offset + 1u] = color.rgba_color; cmd_offset += 2u; +} +// Discussion point: these are basically copying from info to ptcl. We +// could just write an info offset and have fine bind that buffer and read +// from it. + +fn write_lin_grad(lin: CmdLinGrad) { + alloc_cmd(5u); + ptcl[cmd_offset] = CMD_LIN_GRAD; + ptcl[cmd_offset + 1u] = lin.index; + ptcl[cmd_offset + 2u] = bitcast(lin.line_x); + ptcl[cmd_offset + 3u] = bitcast(lin.line_y); + ptcl[cmd_offset + 4u] = bitcast(lin.line_c); + cmd_offset += 5u; +} + +fn write_rad_grad(rad: CmdRadGrad) { + alloc_cmd(12u); + ptcl[cmd_offset] = CMD_RAD_GRAD; + ptcl[cmd_offset + 1u] = rad.index; + cmd_offset += 12u; } @compute @workgroup_size(256) @@ -304,14 +324,38 @@ fn main( if clip_zero_depth == 0u { let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * tile_y + tile_x; let tile = tiles[tile_ix]; + let linewidth = bitcast(info[di]); + write_path(tile, linewidth); switch drawtag { // DRAWTAG_FILL_COLOR case 0x44u: { - let linewidth = bitcast(info[di]); let rgba_color = scene[dd]; - write_path(tile, linewidth); write_color(CmdColor(rgba_color)); } + // DRAWTAG_FILL_LIN_GRADIENT + case 0x114u: { + var lin: CmdLinGrad; + lin.index = scene[dd]; + lin.line_x = bitcast(info[di + 1u]); + lin.line_y = bitcast(info[di + 2u]); + lin.line_c = bitcast(info[di + 3u]); + write_lin_grad(lin); + } + // DRAWTAG_FILL_RAD_GRADIENT + case 0x2dcu: { + var rad: CmdRadGrad; + rad.index = scene[dd]; + let m0 = bitcast(info[di + 1u]); + let m1 = bitcast(info[di + 2u]); + let m2 = bitcast(info[di + 3u]); + let m3 = bitcast(info[di + 4u]); + rad.matrx = vec4(m0, m1, m2, m3); + rad.xlat = vec2(bitcast(info[di + 5u]), bitcast(info[di + 6u])); + rad.c1 = vec2(bitcast(info[di + 7u]), bitcast(info[di + 8u])); + rad.ra = bitcast(info[di + 9u]); + rad.roff = bitcast(info[di + 10u]); + write_rad_grad(rad); + } default: {} } } diff --git a/piet-wgsl/shader/fine.wgsl b/piet-wgsl/shader/fine.wgsl index c347630..d76edef 100644 --- a/piet-wgsl/shader/fine.wgsl +++ b/piet-wgsl/shader/fine.wgsl @@ -42,9 +42,15 @@ var output: array; #ifdef full #import ptcl +let GRADIENT_WIDTH = 512; +let BLEND_STACK_SPLIT = 4u; + @group(0) @binding(4) var ptcl: array; +@group(0) @binding(5) +var gradients: texture_2d; + fn read_fill(cmd_ix: u32) -> CmdFill { let tile = ptcl[cmd_ix + 1u]; let backdrop = i32(ptcl[cmd_ix + 2u]); @@ -61,6 +67,33 @@ fn read_color(cmd_ix: u32) -> CmdColor { let rgba_color = ptcl[cmd_ix + 1u]; return CmdColor(rgba_color); } + +fn read_lin_grad(cmd_ix: u32) -> CmdLinGrad { + let index = ptcl[cmd_ix + 1u]; + let line_x = bitcast(ptcl[cmd_ix + 2u]); + let line_y = bitcast(ptcl[cmd_ix + 3u]); + let line_c = bitcast(ptcl[cmd_ix + 4u]); + return CmdLinGrad(index, line_x, line_y, line_c); +} + +fn read_rad_grad(cmd_ix: u32) -> CmdRadGrad { + let index = ptcl[cmd_ix + 1u]; + let m0 = bitcast(ptcl[cmd_ix + 2u]); + let m1 = bitcast(ptcl[cmd_ix + 3u]); + let m2 = bitcast(ptcl[cmd_ix + 4u]); + let m3 = bitcast(ptcl[cmd_ix + 5u]); + let matrx = vec4(m0, m1, m2, m3); + let xlat = vec2(bitcast(ptcl[cmd_ix + 6u]), bitcast(ptcl[cmd_ix + 7u])); + let c1 = vec2(bitcast(ptcl[cmd_ix + 8u]), bitcast(ptcl[cmd_ix + 9u])); + let ra = bitcast(ptcl[cmd_ix + 10u]); + let roff = bitcast(ptcl[cmd_ix + 11u]); + return CmdRadGrad(index, matrx, xlat, c1, ra, roff); +} + +fn mix_blend_compose(backdrop: vec4, src: vec4, mode: u32) -> vec4 { + // TODO: ALL the blend modes. This is just vanilla src-over. + return backdrop * (1.0 - src.a) + src; +} #endif let PIXELS_PER_THREAD = 4u; @@ -147,6 +180,8 @@ fn main( let xy = vec2(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y)); #ifdef full var rgba: array, PIXELS_PER_THREAD>; + var blend_stack: array, PIXELS_PER_THREAD>; + var clip_depth = 0u; var area: array; var cmd_ix = tile_ix * PTCL_INITIAL_ALLOC; @@ -187,6 +222,66 @@ fn main( } cmd_ix += 2u; } + // CMD_LIN_GRAD + case 6u: { + let lin = read_lin_grad(cmd_ix); + let d = lin.line_x * xy.x + lin.line_y * xy.y + lin.line_c; + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + let my_d = d + lin.line_x * f32(i); + let x = i32(round(clamp(my_d, 0.0, 1.0) * f32(GRADIENT_WIDTH - 1))); + let fg_rgba = textureLoad(gradients, vec2(x, i32(lin.index)), 0); + let fg_i = fg_rgba * area[i]; + rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i; + } + cmd_ix += 12u; + } + // CMD_RAD_GRAD + case 7u: { + let rad = read_rad_grad(cmd_ix); + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + let my_xy = vec2(xy.x + f32(i), xy.y); + // TODO: can hoist y, but for now stick to piet-gpu + let xy_xformed = rad.matrx.xz * my_xy.x + rad.matrx.yw * my_xy.y - rad.xlat; + let ba = dot(xy_xformed, rad.c1); + let ca = rad.ra * dot(xy_xformed, xy_xformed); + let t = sqrt(ba * ba + ca) - ba - rad.roff; + let x = i32(round(clamp(t, 0.0, 1.0) * f32(GRADIENT_WIDTH - 1))); + let fg_rgba = textureLoad(gradients, vec2(x, i32(rad.index)), 0); + let fg_i = fg_rgba * area[i]; + rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i; + } + cmd_ix += 12u; + } + // CMD_BEGIN_CLIP + case 9u: { + if clip_depth < BLEND_STACK_SPLIT { + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + blend_stack[clip_depth][i] = pack4x8unorm(rgba[i]); + rgba[i] = vec4(0.0); + } + } else { + // TODO: spill to memory + } + clip_depth += 1u; + cmd_ix += 1u; + } + // CMD_END_CLIP + case 10u: { + let blend = ptcl[cmd_ix + 1u]; + clip_depth -= 1u; + for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) { + var bg_rgba: u32; + if clip_depth < BLEND_STACK_SPLIT { + bg_rgba = blend_stack[clip_depth][i]; + } else { + // load from memory + } + let bg = unpack4x8unorm(bg_rgba); + let fg = rgba[i] * area[i]; + rgba[i] = mix_blend_compose(bg, fg, blend); + } + cmd_ix += 2u; + } // CMD_JUMP case 11u: { cmd_ix = ptcl[cmd_ix + 1u]; diff --git a/piet-wgsl/shader/shared/clip.wgsl b/piet-wgsl/shader/shared/clip.wgsl new file mode 100644 index 0000000..608e9bf --- /dev/null +++ b/piet-wgsl/shader/shared/clip.wgsl @@ -0,0 +1,14 @@ +struct Bic { + a: u32, + b: u32, +} + +fn bic_combine(x: Bic, y: Bic) -> Bic { + let m = min(x.b, y.a); + return Bic(x.a + y.a - m, x.b + y.b - m); +} + +struct ClipEl { + parent_ix: u32, + bbox: vec4, +} diff --git a/piet-wgsl/shader/shared/config.wgsl b/piet-wgsl/shader/shared/config.wgsl index 73f4054..5db894b 100644 --- a/piet-wgsl/shader/shared/config.wgsl +++ b/piet-wgsl/shader/shared/config.wgsl @@ -20,6 +20,7 @@ struct Config { n_drawobj: u32, n_path: u32, + n_clip: u32, // offsets within scene buffer (in u32 units) // Note: this is a difference from piet-gpu, which is in bytes diff --git a/piet-wgsl/shader/shared/ptcl.wgsl b/piet-wgsl/shader/shared/ptcl.wgsl index 8d6e869..92316cc 100644 --- a/piet-wgsl/shader/shared/ptcl.wgsl +++ b/piet-wgsl/shader/shared/ptcl.wgsl @@ -28,6 +28,10 @@ let CMD_FILL = 1u; let CMD_STROKE = 2u; let CMD_SOLID = 3u; let CMD_COLOR = 5u; +let CMD_LIN_GRAD = 6u; +let CMD_RAD_GRAD = 7u; +let CMD_BEGIN_CLIP = 9u; +let CMD_END_CLIP = 10u; let CMD_JUMP = 11u; // The individual PTCL structs are written here, but read/write is by @@ -50,3 +54,19 @@ struct CmdJump { struct CmdColor { rgba_color: u32, } + +struct CmdLinGrad { + index: u32, + line_x: f32, + line_y: f32, + line_c: f32, +} + +struct CmdRadGrad { + index: u32, + matrx: vec4, + xlat: vec2, + c1: vec2, + ra: f32, + roff: f32, +} diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index 690e681..7d052cc 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -26,6 +26,7 @@ struct Config { height_in_tiles: u32, n_drawobj: u32, n_path: u32, + n_clip: u32, pathtag_base: u32, pathdata_base: u32, drawtag_base: u32, @@ -143,11 +144,13 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy let n_path = data.n_path; // TODO: calculate for real when we do rectangles let n_drawobj = n_path; + let n_clip = 0; // TODO: wire up correctly let config = Config { width_in_tiles: 64, height_in_tiles: 64, n_drawobj, n_path, + n_clip, pathtag_base, pathdata_base, drawtag_base, From 20f7b68514758dc1743597c1600f9b99770a415c Mon Sep 17 00:00:00 2001 From: Chad Brokaw Date: Wed, 16 Nov 2022 10:49:38 -0500 Subject: [PATCH 15/17] finish gradient support --- piet-wgsl/src/engine.rs | 217 +++++++++++++++++++++++++++++++----- piet-wgsl/src/main.rs | 2 +- piet-wgsl/src/ramp.rs | 137 +++++++++++++++++++++++ piet-wgsl/src/render.rs | 96 ++++++++++++---- piet-wgsl/src/shaders.rs | 1 + piet-wgsl/src/test_scene.rs | 82 ++++++++++---- 6 files changed, 459 insertions(+), 76 deletions(-) create mode 100644 piet-wgsl/src/ramp.rs diff --git a/piet-wgsl/src/engine.rs b/piet-wgsl/src/engine.rs index 7d1c854..5e89905 100644 --- a/piet-wgsl/src/engine.rs +++ b/piet-wgsl/src/engine.rs @@ -17,7 +17,7 @@ use std::{ borrow::Cow, collections::{hash_map::Entry, HashMap}, - num::NonZeroU64, + num::{NonZeroU32, NonZeroU64}, sync::atomic::{AtomicU64, Ordering}, }; @@ -25,7 +25,8 @@ use futures_intrusive::channel::shared::GenericOneshotReceiver; use parking_lot::RawMutex; use wgpu::{ util::DeviceExt, BindGroup, BindGroupLayout, Buffer, BufferAsyncError, BufferSlice, BufferView, - ComputePipeline, Device, Queue, + ComputePipeline, Device, Queue, Texture, TextureAspect, TextureFormat, TextureUsages, + TextureView, TextureViewDimension, }; pub type Error = Box; @@ -58,12 +59,27 @@ pub struct BufProxy { id: Id, } +#[derive(Clone, Copy)] +pub struct ImageProxy { + width: u32, + height: u32, + // TODO: format + id: Id, +} + +#[derive(Clone, Copy)] +pub enum ResourceProxy { + Buf(BufProxy), + Image(ImageProxy), +} + pub enum Command { Upload(BufProxy, Vec), + UploadImage(ImageProxy, Vec), // Discussion question: third argument is vec of resources? // Maybe use tricks to make more ergonomic? // Alternative: provide bufs & images as separate sequences, like piet-gpu. - Dispatch(ShaderId, (u32, u32, u32), Vec), + Dispatch(ShaderId, (u32, u32, u32), Vec), Download(BufProxy), Clear(BufProxy, u64, Option), } @@ -92,6 +108,7 @@ pub enum BindType { #[derive(Default)] struct BindMap { buf_map: HashMap, + image_map: HashMap, } impl Engine { @@ -132,6 +149,16 @@ impl Engine { }, count: None, }, + BindType::ImageRead => wgpu::BindGroupLayoutEntry { + binding: i as u32, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Texture { + sample_type: wgpu::TextureSampleType::Float { filterable: true }, + view_dimension: wgpu::TextureViewDimension::D2, + multisampled: false, + }, + count: None, + }, _ => todo!(), }) .collect::>(); @@ -182,6 +209,58 @@ impl Engine { }); bind_map.insert_buf(buf_proxy.id, buf); } + Command::UploadImage(image_proxy, bytes) => { + let buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor { + label: None, + contents: &bytes, + usage: wgpu::BufferUsages::COPY_SRC, + }); + let texture = device.create_texture(&wgpu::TextureDescriptor { + label: None, + size: wgpu::Extent3d { + width: image_proxy.width, + height: image_proxy.height, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + usage: TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST, + format: TextureFormat::Rgba8Unorm, + }); + let texture_view = texture.create_view(&wgpu::TextureViewDescriptor { + label: None, + dimension: Some(TextureViewDimension::D2), + aspect: TextureAspect::All, + mip_level_count: None, + base_mip_level: 0, + base_array_layer: 0, + array_layer_count: None, + format: Some(TextureFormat::Rgba8Unorm), + }); + encoder.copy_buffer_to_texture( + wgpu::ImageCopyBuffer { + buffer: &buf, + layout: wgpu::ImageDataLayout { + offset: 0, + bytes_per_row: NonZeroU32::new(image_proxy.width * 4), + rows_per_image: None, + }, + }, + wgpu::ImageCopyTexture { + texture: &texture, + mip_level: 0, + origin: wgpu::Origin3d { x: 0, y: 0, z: 0 }, + aspect: TextureAspect::All, + }, + wgpu::Extent3d { + width: image_proxy.width, + height: image_proxy.height, + depth_or_array_layers: 1, + }, + ); + bind_map.insert_image(image_proxy.id, texture, texture_view) + } Command::Dispatch(shader_id, wg_size, bindings) => { println!("dispatching {:?} with {} bindings", wg_size, bindings.len()); let shader = &self.shaders[shader_id.0]; @@ -226,13 +305,23 @@ impl Recording { buf_proxy } - pub fn dispatch( - &mut self, - shader: ShaderId, - wg_size: (u32, u32, u32), - resources: impl Into>, - ) { - self.push(Command::Dispatch(shader, wg_size, resources.into())); + pub fn upload_image(&mut self, width: u32, height: u32, data: impl Into>) -> ImageProxy { + let data = data.into(); + let image_proxy = ImageProxy::new(width, height); + self.push(Command::UploadImage(image_proxy, data)); + image_proxy + } + + pub fn dispatch(&mut self, shader: ShaderId, wg_size: (u32, u32, u32), resources: R) + where + R: IntoIterator, + R::Item: Into, + { + self.push(Command::Dispatch( + shader, + wg_size, + resources.into_iter().map(|r| r.into()).collect(), + )); } pub fn download(&mut self, buf: BufProxy) { @@ -251,6 +340,35 @@ impl BufProxy { } } +impl ImageProxy { + pub fn new(width: u32, height: u32) -> Self { + let id = Id::next(); + ImageProxy { width, height, id } + } +} + +impl ResourceProxy { + pub fn new_buf(size: u64) -> Self { + Self::Buf(BufProxy::new(size)) + } + + pub fn new_image(width: u32, height: u32) -> Self { + Self::Image(ImageProxy::new(width, height)) + } +} + +impl From for ResourceProxy { + fn from(value: BufProxy) -> Self { + Self::Buf(value) + } +} + +impl From for ResourceProxy { + fn from(value: ImageProxy) -> Self { + Self::Image(value) + } +} + impl Id { pub fn next() -> Id { let val = ID_COUNTER.fetch_add(1, Ordering::Relaxed); @@ -264,34 +382,79 @@ impl BindMap { self.buf_map.insert(id, buf); } + fn insert_image(&mut self, id: Id, image: Texture, image_view: TextureView) { + self.image_map.insert(id, (image, image_view)); + } + fn create_bind_group( &mut self, device: &Device, layout: &BindGroupLayout, - bindings: &[BufProxy], + bindings: &[ResourceProxy], ) -> Result { for proxy in bindings { - if let Entry::Vacant(v) = self.buf_map.entry(proxy.id) { - let buf = device.create_buffer(&wgpu::BufferDescriptor { - label: None, - size: proxy.size, - usage: wgpu::BufferUsages::STORAGE - | wgpu::BufferUsages::COPY_DST - | wgpu::BufferUsages::COPY_SRC, - mapped_at_creation: false, - }); - v.insert(buf); + match proxy { + ResourceProxy::Buf(proxy) => { + if let Entry::Vacant(v) = self.buf_map.entry(proxy.id) { + let buf = device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: proxy.size, + usage: wgpu::BufferUsages::STORAGE + | wgpu::BufferUsages::COPY_DST + | wgpu::BufferUsages::COPY_SRC, + mapped_at_creation: false, + }); + v.insert(buf); + } + } + ResourceProxy::Image(proxy) => { + if let Entry::Vacant(v) = self.image_map.entry(proxy.id) { + let texture = device.create_texture(&wgpu::TextureDescriptor { + label: None, + size: wgpu::Extent3d { + width: proxy.width, + height: proxy.height, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + usage: TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST, + format: TextureFormat::Rgba8Unorm, + }); + let texture_view = texture.create_view(&wgpu::TextureViewDescriptor { + label: None, + dimension: Some(TextureViewDimension::D2), + aspect: TextureAspect::All, + mip_level_count: None, + base_mip_level: 0, + base_array_layer: 0, + array_layer_count: None, + format: Some(TextureFormat::Rgba8Unorm), + }); + v.insert((texture, texture_view)); + } + } } } let entries = bindings .iter() .enumerate() - .map(|(i, proxy)| { - let buf = self.buf_map.get(&proxy.id).unwrap(); - Ok(wgpu::BindGroupEntry { - binding: i as u32, - resource: buf.as_entire_binding(), - }) + .map(|(i, proxy)| match proxy { + ResourceProxy::Buf(proxy) => { + let buf = self.buf_map.get(&proxy.id).unwrap(); + Ok(wgpu::BindGroupEntry { + binding: i as u32, + resource: buf.as_entire_binding(), + }) + } + ResourceProxy::Image(proxy) => { + let texture = self.image_map.get(&proxy.id).unwrap(); + Ok(wgpu::BindGroupEntry { + binding: i as u32, + resource: wgpu::BindingResource::TextureView(&texture.1), + }) + } }) .collect::, Error>>()?; let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { diff --git a/piet-wgsl/src/main.rs b/piet-wgsl/src/main.rs index 38645db..2f1d885 100644 --- a/piet-wgsl/src/main.rs +++ b/piet-wgsl/src/main.rs @@ -24,6 +24,7 @@ use wgpu::{Device, Limits, Queue}; mod engine; mod pico_svg; +mod ramp; mod render; mod shaders; mod test_scene; @@ -58,7 +59,6 @@ fn dump_buf(buf: &[u32]) { println!("{}: {:x} {}", i, val, f32::from_bits(*val)); } else { println!("{}: {:x}", i, val); - } } } diff --git a/piet-wgsl/src/ramp.rs b/piet-wgsl/src/ramp.rs new file mode 100644 index 0000000..a26c3d9 --- /dev/null +++ b/piet-wgsl/src/ramp.rs @@ -0,0 +1,137 @@ +use piet_scene::{Color, GradientStop, GradientStops}; + +use std::collections::HashMap; + +const N_SAMPLES: usize = 512; +const RETAINED_COUNT: usize = 64; + +#[derive(Default)] +pub struct RampCache { + epoch: u64, + map: HashMap, + data: Vec, +} + +impl RampCache { + pub fn advance(&mut self) { + self.epoch += 1; + if self.map.len() > RETAINED_COUNT { + self.map + .retain(|_key, value| value.0 < RETAINED_COUNT as u32); + self.data.truncate(RETAINED_COUNT * N_SAMPLES); + } + } + + pub fn add(&mut self, stops: &[GradientStop]) -> u32 { + if let Some(entry) = self.map.get_mut(stops) { + entry.1 = self.epoch; + entry.0 + } else if self.map.len() < RETAINED_COUNT { + let id = (self.data.len() / N_SAMPLES) as u32; + self.data.extend(make_ramp(stops)); + self.map.insert(stops.into(), (id, self.epoch)); + id + } else { + let mut reuse = None; + for (stops, (id, epoch)) in &self.map { + if *epoch + 2 < self.epoch { + reuse = Some((stops.to_owned(), *id)); + break; + } + } + if let Some((old_stops, id)) = reuse { + self.map.remove(&old_stops); + let start = id as usize * N_SAMPLES; + for (dst, src) in self.data[start..start + N_SAMPLES] + .iter_mut() + .zip(make_ramp(stops)) + { + *dst = src; + } + self.map.insert(stops.into(), (id, self.epoch)); + id + } else { + let id = (self.data.len() / N_SAMPLES) as u32; + self.data.extend(make_ramp(stops)); + self.map.insert(stops.into(), (id, self.epoch)); + id + } + } + } + + pub fn data(&self) -> &[u32] { + &self.data + } + + pub fn width(&self) -> u32 { + N_SAMPLES as u32 + } + + pub fn height(&self) -> u32 { + (self.data.len() / N_SAMPLES) as u32 + } +} + +fn make_ramp<'a>(stops: &'a [GradientStop]) -> impl Iterator + 'a { + let mut last_u = 0.0; + let mut last_c = ColorF64::from_color(stops[0].color); + let mut this_u = last_u; + let mut this_c = last_c; + let mut j = 0; + (0..N_SAMPLES).map(move |i| { + let u = (i as f64) / (N_SAMPLES - 1) as f64; + while u > this_u { + last_u = this_u; + last_c = this_c; + if let Some(s) = stops.get(j + 1) { + this_u = s.offset as f64; + this_c = ColorF64::from_color(s.color); + j += 1; + } else { + break; + } + } + let du = this_u - last_u; + let c = if du < 1e-9 { + this_c + } else { + last_c.lerp(&this_c, (u - last_u) / du) + }; + c.to_premul_u32() + }) +} + +#[derive(Copy, Clone, Debug)] +struct ColorF64([f64; 4]); + +impl ColorF64 { + fn from_color(color: Color) -> Self { + Self([ + color.r as f64 / 255.0, + color.g as f64 / 255.0, + color.b as f64 / 255.0, + color.a as f64 / 255.0, + ]) + } + + fn lerp(&self, other: &Self, a: f64) -> Self { + fn l(x: f64, y: f64, a: f64) -> f64 { + x * (1.0 - a) + y * a + } + Self([ + l(self.0[0], other.0[0], a), + l(self.0[1], other.0[1], a), + l(self.0[2], other.0[2], a), + l(self.0[3], other.0[3], a), + ]) + } + + fn to_premul_u32(&self) -> u32 { + let a = self.0[3].min(1.0).max(0.0); + let r = ((self.0[0] * a).min(1.0).max(0.0) * 255.0) as u32; + let g = ((self.0[1] * a).min(1.0).max(0.0) * 255.0) as u32; + let b = ((self.0[2] * a).min(1.0).max(0.0) * 255.0) as u32; + let a = (a * 255.0) as u32; + r | (g << 8) | (b << 16) | (a << 24) + } +} diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index 7d052cc..4cc63e3 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -4,7 +4,7 @@ use bytemuck::{Pod, Zeroable}; use piet_scene::Scene; use crate::{ - engine::{BufProxy, Recording}, + engine::{BufProxy, Recording, ResourceProxy}, shaders::{self, FullShaders, Shaders}, }; @@ -87,7 +87,8 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) { [config_buf, scene_buf, reduced_buf, tagmonoid_buf], ); - let path_coarse_wgs = (n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; + let path_coarse_wgs = + (n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; // TODO: more principled size calc let tiles_buf = BufProxy::new(4097 * 8); let segments_buf = BufProxy::new(256 * 24); @@ -122,7 +123,29 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) { pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy) { let mut recording = Recording::default(); + let mut ramps = crate::ramp::RampCache::default(); + let mut drawdata_patches: Vec<(usize, u32)> = vec![]; let data = scene.data(); + let stop_data = &data.resources.stops; + for patch in &data.resources.patches { + use piet_scene::ResourcePatch; + match patch { + ResourcePatch::Ramp { offset, stops } => { + let ramp_id = ramps.add(&stop_data[stops.clone()]); + drawdata_patches.push((*offset, ramp_id)); + } + } + } + let gradient_image = if drawdata_patches.is_empty() { + ResourceProxy::new_image(1, 1) + } else { + let data = ramps.data(); + let width = ramps.width(); + let height = ramps.height(); + let data: &[u8] = bytemuck::cast_slice(data); + println!("gradient image: {}x{} ({} bytes)", width, height, data.len()); + ResourceProxy::Image(recording.upload_image(width, height, data)) + }; let n_pathtag = data.tag_stream.len(); let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG); // TODO: can compute size accurately, avoid reallocation @@ -135,12 +158,27 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy let drawtag_base = size_to_words(scene.len()); scene.extend(bytemuck::cast_slice(&data.drawtag_stream)); let drawdata_base = size_to_words(scene.len()); - scene.extend(&data.drawdata_stream); + if !drawdata_patches.is_empty() { + let mut pos = 0; + for patch in drawdata_patches { + let offset = patch.0; + let value = patch.1; + if pos < offset { + scene.extend_from_slice(&data.drawdata_stream[pos..offset]); + } + scene.extend_from_slice(bytemuck::bytes_of(&value)); + pos = offset + 4; + } + if pos < data.drawdata_stream.len() { + scene.extend_from_slice(&data.drawdata_stream[pos..]) + } + } else { + scene.extend(&data.drawdata_stream); + } let transform_base = size_to_words(scene.len()); scene.extend(bytemuck::cast_slice(&data.transform_stream)); let linewidth_base = size_to_words(scene.len()); scene.extend(bytemuck::cast_slice(&data.linewidth_stream)); - let n_path = data.n_path; // TODO: calculate for real when we do rectangles let n_drawobj = n_path; @@ -159,11 +197,11 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy linewidth_base, }; println!("{:?}", config); - let scene_buf = recording.upload(scene); - let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned()); + let scene_buf = ResourceProxy::Buf(recording.upload(scene)); + let config_buf = ResourceProxy::Buf(recording.upload(bytemuck::bytes_of(&config).to_owned())); let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize); - let reduced_buf = BufProxy::new(pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE); + let reduced_buf = ResourceProxy::new_buf(pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE); // TODO: really only need pathtag_wgs - 1 recording.dispatch( shaders.pathtag_reduce, @@ -171,22 +209,24 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy [config_buf, scene_buf, reduced_buf], ); - let tagmonoid_buf = - BufProxy::new(pathtag_wgs as u64 * shaders::PATHTAG_REDUCE_WG as u64 * TAG_MONOID_FULL_SIZE); + let tagmonoid_buf = ResourceProxy::new_buf( + pathtag_wgs as u64 * shaders::PATHTAG_REDUCE_WG as u64 * TAG_MONOID_FULL_SIZE, + ); recording.dispatch( shaders.pathtag_scan, (pathtag_wgs as u32, 1, 1), [config_buf, scene_buf, reduced_buf, tagmonoid_buf], ); let drawobj_wgs = (n_drawobj + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG; - let path_bbox_buf = BufProxy::new(n_path as u64 * PATH_BBOX_SIZE); + let path_bbox_buf = ResourceProxy::new_buf(n_path as u64 * PATH_BBOX_SIZE); recording.dispatch( shaders.bbox_clear, (drawobj_wgs, 1, 1), [config_buf, path_bbox_buf], ); - let cubic_buf = BufProxy::new(n_pathtag as u64 * CUBIC_SIZE); - let path_coarse_wgs = (n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; + let cubic_buf = ResourceProxy::new_buf(n_pathtag as u64 * CUBIC_SIZE); + let path_coarse_wgs = + (n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG; recording.dispatch( shaders.pathseg, (path_coarse_wgs, 1, 1), @@ -198,14 +238,14 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy cubic_buf, ], ); - let draw_reduced_buf = BufProxy::new(drawobj_wgs as u64 * DRAWMONOID_SIZE); + let draw_reduced_buf = ResourceProxy::new_buf(drawobj_wgs as u64 * DRAWMONOID_SIZE); recording.dispatch( shaders.draw_reduce, (drawobj_wgs, 1, 1), [config_buf, scene_buf, draw_reduced_buf], ); - let draw_monoid_buf = BufProxy::new(n_drawobj as u64 * DRAWMONOID_SIZE); - let info_buf = BufProxy::new(n_drawobj as u64 * MAX_DRAWINFO_SIZE); + let draw_monoid_buf = ResourceProxy::new_buf(n_drawobj as u64 * DRAWMONOID_SIZE); + let info_buf = ResourceProxy::new_buf(n_drawobj as u64 * MAX_DRAWINFO_SIZE); recording.dispatch( shaders.draw_leaf, (drawobj_wgs, 1, 1), @@ -218,16 +258,17 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy info_buf, ], ); - let draw_bbox_buf = BufProxy::new(n_path as u64 * DRAW_BBOX_SIZE); + let draw_bbox_buf = ResourceProxy::new_buf(n_path as u64 * DRAW_BBOX_SIZE); let bump_buf = BufProxy::new(BUMP_SIZE); // Not actually used yet. - let clip_bbox_buf = BufProxy::new(1024); - let bin_data_buf = BufProxy::new(1 << 20); + let clip_bbox_buf = ResourceProxy::new_buf(1024); + let bin_data_buf = ResourceProxy::new_buf(1 << 20); let width_in_bins = (config.width_in_tiles + 15) / 16; let height_in_bins = (config.height_in_tiles + 15) / 16; let n_bins = width_in_bins * height_in_bins; - let bin_header_buf = BufProxy::new((n_bins * drawobj_wgs) as u64 * BIN_HEADER_SIZE); + let bin_header_buf = ResourceProxy::new_buf((n_bins * drawobj_wgs) as u64 * BIN_HEADER_SIZE); recording.clear_all(bump_buf); + let bump_buf = ResourceProxy::Buf(bump_buf); recording.dispatch( shaders.binning, (drawobj_wgs, 1, 1), @@ -242,8 +283,8 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy bin_header_buf, ], ); - let path_buf = BufProxy::new(n_path as u64 * PATH_SIZE); - let tile_buf = BufProxy::new(1 << 20); + let path_buf = ResourceProxy::new_buf(n_path as u64 * PATH_SIZE); + let tile_buf = ResourceProxy::new_buf(1 << 20); let path_wgs = (n_path + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG; recording.dispatch( shaders.tile_alloc, @@ -258,7 +299,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy ], ); - let segments_buf = BufProxy::new(1 << 24); + let segments_buf = ResourceProxy::new_buf(1 << 24); recording.dispatch( shaders.path_coarse, (path_coarse_wgs, 1, 1), @@ -278,7 +319,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy (path_wgs, 1, 1), [config_buf, path_buf, tile_buf], ); - let ptcl_buf = BufProxy::new(1 << 24); + let ptcl_buf = ResourceProxy::new_buf(1 << 24); recording.dispatch( shaders.coarse, (width_in_bins, height_in_bins, 1), @@ -300,7 +341,14 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy recording.dispatch( shaders.fine, (config.width_in_tiles, config.height_in_tiles, 1), - [config_buf, tile_buf, segments_buf, out_buf, ptcl_buf], + [ + config_buf, + tile_buf, + segments_buf, + ResourceProxy::Buf(out_buf), + ptcl_buf, + gradient_image, + ], ); let download_buf = out_buf; diff --git a/piet-wgsl/src/shaders.rs b/piet-wgsl/src/shaders.rs index b659170..8e6e89f 100644 --- a/piet-wgsl/src/shaders.rs +++ b/piet-wgsl/src/shaders.rs @@ -255,6 +255,7 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result Scene { let mut scene = Scene::default(); let mut builder = SceneBuilder::for_scene(&mut scene); - if false { - let path = [ - PathElement::MoveTo(Point::new(100.0, 100.0)), - PathElement::LineTo(Point::new(500.0, 120.0)), - PathElement::LineTo(Point::new(300.0, 150.0)), - PathElement::LineTo(Point::new(200.0, 260.0)), - PathElement::LineTo(Point::new(150.0, 210.0)), - PathElement::Close, - ]; - let brush = Brush::Solid(Color::rgb8(0x40, 0x40, 0xff)); - builder.fill(Fill::NonZero, Affine::IDENTITY, &brush, None, &path); - let transform = Affine::translate(50.0, 50.0); - let brush = Brush::Solid(Color::rgba8(0xff, 0xff, 0x00, 0x80)); - builder.fill(Fill::NonZero, transform, &brush, None, &path); - let transform = Affine::translate(100.0, 100.0); - let style = simple_stroke(1.0); - let brush = Brush::Solid(Color::rgb8(0xa0, 0x00, 0x00)); - builder.stroke(&style, transform, &brush, None, &path); - } else { - let xml_str = std::str::from_utf8(include_bytes!("../../piet-gpu/Ghostscript_Tiger.svg")).unwrap(); - let svg = PicoSvg::load(xml_str, 6.0).unwrap(); - render_svg(&mut builder, &svg, false); + let scene_ix = 1; + match scene_ix { + 0 => { + let path = [ + PathElement::MoveTo(Point::new(100.0, 100.0)), + PathElement::LineTo(Point::new(500.0, 120.0)), + PathElement::LineTo(Point::new(300.0, 150.0)), + PathElement::LineTo(Point::new(200.0, 260.0)), + PathElement::LineTo(Point::new(150.0, 210.0)), + PathElement::Close, + ]; + let brush = Brush::Solid(Color::rgb8(0x40, 0x40, 0xff)); + builder.fill(Fill::NonZero, Affine::IDENTITY, &brush, None, &path); + let transform = Affine::translate(50.0, 50.0); + let brush = Brush::Solid(Color::rgba8(0xff, 0xff, 0x00, 0x80)); + builder.fill(Fill::NonZero, transform, &brush, None, &path); + let transform = Affine::translate(100.0, 100.0); + let style = simple_stroke(1.0); + let brush = Brush::Solid(Color::rgb8(0xa0, 0x00, 0x00)); + builder.stroke(&style, transform, &brush, None, &path); + } + 1 => { + let path = [ + PathElement::MoveTo(Point::new(100.0, 100.0)), + PathElement::LineTo(Point::new(300.0, 100.0)), + PathElement::LineTo(Point::new(300.0, 300.0)), + PathElement::LineTo(Point::new(100.0, 300.0)), + PathElement::Close, + ]; + let gradient = Brush::LinearGradient(LinearGradient { + start: Point::new(100.0, 100.0), + end: Point::new(300.0, 300.0), + extend: piet_scene::ExtendMode::Pad, + stops: vec![ + GradientStop { + offset: 0.0, + color: Color::rgb8(255, 0, 0), + }, + GradientStop { + offset: 0.5, + color: Color::rgb8(0, 255, 0), + }, + GradientStop { + offset: 1.0, + color: Color::rgb8(0, 0, 255), + }, + ].into() + }); + builder.fill(Fill::NonZero, Affine::scale(3.0, 3.0), &gradient, None, &path); + } + _ => { + let xml_str = + std::str::from_utf8(include_bytes!("../../piet-gpu/Ghostscript_Tiger.svg")) + .unwrap(); + let svg = PicoSvg::load(xml_str, 6.0).unwrap(); + render_svg(&mut builder, &svg, false); + } } scene } @@ -94,7 +129,6 @@ fn convert_bez_path<'a>(path: &'a BezPath) -> impl Iterator .map(|el| PathElement::from_kurbo(*el)) } - fn simple_stroke(width: f32) -> Stroke<[f32; 0]> { Stroke { width, From 9adeaf3e82dfb25bc8a39c29c4f28a1b464ef70e Mon Sep 17 00:00:00 2001 From: Chad Brokaw Date: Wed, 16 Nov 2022 12:02:11 -0500 Subject: [PATCH 16/17] format --- piet-wgsl/src/engine.rs | 7 ++++++- piet-wgsl/src/render.rs | 7 ++++++- piet-wgsl/src/test_scene.rs | 16 +++++++++++++--- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/piet-wgsl/src/engine.rs b/piet-wgsl/src/engine.rs index 5e89905..9a3556c 100644 --- a/piet-wgsl/src/engine.rs +++ b/piet-wgsl/src/engine.rs @@ -305,7 +305,12 @@ impl Recording { buf_proxy } - pub fn upload_image(&mut self, width: u32, height: u32, data: impl Into>) -> ImageProxy { + pub fn upload_image( + &mut self, + width: u32, + height: u32, + data: impl Into>, + ) -> ImageProxy { let data = data.into(); let image_proxy = ImageProxy::new(width, height); self.push(Command::UploadImage(image_proxy, data)); diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index 4cc63e3..4550927 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -143,7 +143,12 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy let width = ramps.width(); let height = ramps.height(); let data: &[u8] = bytemuck::cast_slice(data); - println!("gradient image: {}x{} ({} bytes)", width, height, data.len()); + println!( + "gradient image: {}x{} ({} bytes)", + width, + height, + data.len() + ); ResourceProxy::Image(recording.upload_image(width, height, data)) }; let n_pathtag = data.tag_stream.len(); diff --git a/piet-wgsl/src/test_scene.rs b/piet-wgsl/src/test_scene.rs index cdbd467..bb6f7ad 100644 --- a/piet-wgsl/src/test_scene.rs +++ b/piet-wgsl/src/test_scene.rs @@ -15,7 +15,10 @@ // Also licensed under MIT license, at your choice. use kurbo::BezPath; -use piet_scene::{Affine, Brush, Color, Fill, LinearGradient, PathElement, Point, Scene, SceneBuilder, Stroke, GradientStop}; +use piet_scene::{ + Affine, Brush, Color, Fill, GradientStop, LinearGradient, PathElement, Point, Scene, + SceneBuilder, Stroke, +}; use crate::pico_svg::PicoSvg; @@ -68,9 +71,16 @@ pub fn gen_test_scene() -> Scene { offset: 1.0, color: Color::rgb8(0, 0, 255), }, - ].into() + ] + .into(), }); - builder.fill(Fill::NonZero, Affine::scale(3.0, 3.0), &gradient, None, &path); + builder.fill( + Fill::NonZero, + Affine::scale(3.0, 3.0), + &gradient, + None, + &path, + ); } _ => { let xml_str = From adc98117764a16f4cbe5bfac64c6a97f5149517b Mon Sep 17 00:00:00 2001 From: Chad Brokaw Date: Fri, 18 Nov 2022 17:26:26 -0500 Subject: [PATCH 17/17] add clips and blends --- piet-wgsl/shader/clip_leaf.wgsl | 20 +- piet-wgsl/shader/clip_reduce.wgsl | 2 +- piet-wgsl/shader/coarse.wgsl | 88 ++++++- piet-wgsl/shader/draw_leaf.wgsl | 13 +- piet-wgsl/shader/fine.wgsl | 10 +- piet-wgsl/shader/path_coarse_full.wgsl | 2 +- piet-wgsl/shader/shared/blend.wgsl | 351 +++++++++++++++++++++++++ piet-wgsl/src/debug.rs | 5 + piet-wgsl/src/debug/clip.rs | 13 + piet-wgsl/src/debug/draw.rs | 14 + piet-wgsl/src/debug/fine.rs | 153 +++++++++++ piet-wgsl/src/engine.rs | 19 +- piet-wgsl/src/main.rs | 1 + piet-wgsl/src/render.rs | 44 +++- piet-wgsl/src/shaders.rs | 30 +++ piet-wgsl/src/test_scene.rs | 200 +++++++++++--- 16 files changed, 901 insertions(+), 64 deletions(-) create mode 100644 piet-wgsl/shader/shared/blend.wgsl create mode 100644 piet-wgsl/src/debug.rs create mode 100644 piet-wgsl/src/debug/clip.rs create mode 100644 piet-wgsl/src/debug/draw.rs create mode 100644 piet-wgsl/src/debug/fine.rs diff --git a/piet-wgsl/shader/clip_leaf.wgsl b/piet-wgsl/shader/clip_leaf.wgsl index f294317..d935550 100644 --- a/piet-wgsl/shader/clip_leaf.wgsl +++ b/piet-wgsl/shader/clip_leaf.wgsl @@ -33,15 +33,17 @@ var sh_bbox: array, WG_SIZE>; var sh_link: array; fn search_link(bic: ptr, ix: u32) -> i32 { + var ix = ix; var j = 0u; while j < firstTrailingBit(WG_SIZE) { let base = 2u * WG_SIZE - (2u << (firstTrailingBit(WG_SIZE) - j)); if ((ix >> j) & 1u) != 0u { let test = bic_combine(sh_bic[base + (ix >> j) - 1u], *bic); - if test.b >= 0u { - *bic = test; - ix -= 1u << j; + if test.b > 0u { + break; } + *bic = test; + ix -= 1u << j; } j += 1u; } @@ -59,7 +61,7 @@ fn search_link(bic: ptr, ix: u32) -> i32 { if ix > 0u { return i32(ix) - 1; } else { - return i32(~(*bic).a); + return i32(~0u - (*bic).a); } } @@ -67,7 +69,9 @@ fn load_clip_inp(ix: u32) -> i32 { if ix < config.n_clip { return clip_inp[ix]; } else { - return i32(0x80000000); + return -2147483648; + // literal too large? + // return 0x80000000; } } @@ -129,12 +133,12 @@ fn main( sh_bic[local_id.x] = bic; if is_push { let path_bbox = path_bboxes[inp]; - bbox = vec4(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1); + bbox = vec4(f32(path_bbox.x0), f32(path_bbox.y0), f32(path_bbox.x1), f32(path_bbox.y1)); } else { bbox = vec4(-1e9, -1e9, 1e9, 1e9); } var inbase = 0u; - for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { + for (var i = 0u; i < firstTrailingBit(WG_SIZE) - 1u; i += 1u) { let outbase = 2u * WG_SIZE - (1u << (firstTrailingBit(WG_SIZE) - i)); workgroupBarrier(); if local_id.x < 1u << (firstTrailingBit(WG_SIZE) - 1u - i) { @@ -191,5 +195,5 @@ fn main( bbox = vec4(-1e9, -1e9, 1e9, 1e9); } } - clip_bboxes[global_id.x] = bbox + clip_bboxes[global_id.x] = bbox; } diff --git a/piet-wgsl/shader/clip_reduce.wgsl b/piet-wgsl/shader/clip_reduce.wgsl index 3288b07..50c6402 100644 --- a/piet-wgsl/shader/clip_reduce.wgsl +++ b/piet-wgsl/shader/clip_reduce.wgsl @@ -60,7 +60,7 @@ fn main( let path_ix = sh_path_ix[local_id.x]; let path_bbox = path_bboxes[path_ix]; let parent_ix = sh_parent[local_id.x] + wg_id.x * WG_SIZE; - let bbox = vec4(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1); + let bbox = vec4(f32(path_bbox.x0), f32(path_bbox.y0), f32(path_bbox.x1), f32(path_bbox.y1)); clip_out[global_id.x] = ClipEl(parent_ix, bbox); } } diff --git a/piet-wgsl/shader/coarse.wgsl b/piet-wgsl/shader/coarse.wgsl index 5741ec3..b27a215 100644 --- a/piet-wgsl/shader/coarse.wgsl +++ b/piet-wgsl/shader/coarse.wgsl @@ -145,9 +145,32 @@ fn write_rad_grad(rad: CmdRadGrad) { alloc_cmd(12u); ptcl[cmd_offset] = CMD_RAD_GRAD; ptcl[cmd_offset + 1u] = rad.index; + ptcl[cmd_offset + 2u] = bitcast(rad.matrx.x); + ptcl[cmd_offset + 3u] = bitcast(rad.matrx.y); + ptcl[cmd_offset + 4u] = bitcast(rad.matrx.z); + ptcl[cmd_offset + 5u] = bitcast(rad.matrx.w); + ptcl[cmd_offset + 6u] = bitcast(rad.xlat.x); + ptcl[cmd_offset + 7u] = bitcast(rad.xlat.y); + ptcl[cmd_offset + 8u] = bitcast(rad.c1.x); + ptcl[cmd_offset + 9u] = bitcast(rad.c1.y); + ptcl[cmd_offset + 10u] = bitcast(rad.ra); + ptcl[cmd_offset + 11u] = bitcast(rad.roff); cmd_offset += 12u; } +fn write_begin_clip() { + alloc_cmd(1u); + ptcl[cmd_offset] = CMD_BEGIN_CLIP; + cmd_offset += 1u; +} + +fn write_end_clip(blend: u32) { + alloc_cmd(2u); + ptcl[cmd_offset] = CMD_END_CLIP; + ptcl[cmd_offset + 1u] = blend; + cmd_offset += 2u; +} + @compute @workgroup_size(256) fn main( @builtin(local_invocation_id) local_id: vec3, @@ -166,15 +189,20 @@ fn main( let this_tile_ix = (bin_tile_y + tile_y) * config.width_in_tiles + bin_tile_x + tile_x; cmd_offset = this_tile_ix * PTCL_INITIAL_ALLOC; cmd_limit = cmd_offset + (PTCL_INITIAL_ALLOC - PTCL_HEADROOM); - // TODO: clip state - let clip_zero_depth = 0u; + + // clip state + var clip_zero_depth = 0u; + var clip_depth = 0u; var partition_ix = 0u; var rd_ix = 0u; var wr_ix = 0u; var part_start_ix = 0u; var ready_ix = 0u; - // TODO: blend state + + // blend state + var render_blend_depth = 0u; + var max_blend_depth = 0u; while true { for (var i = 0u; i < N_SLICE; i += 1u) { @@ -286,8 +314,16 @@ fn main( let y = sh_tile_y0[el_ix] + seq_ix / width; let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * y + x; let tile = tiles[tile_ix]; - // TODO: this predicate becomes more interesting with clip - let include_tile = tile.segments != 0u || tile.backdrop != 0; + let is_clip = (tag & 1u) != 0u; + var is_blend = false; + if is_clip { + let BLEND_CLIP = (128u << 8u) | 3u; + let scene_offset = draw_monoids[drawobj_ix].scene_offset; + let dd = config.drawdata_base + scene_offset; + let blend = scene[dd]; + is_blend = blend != BLEND_CLIP; + } + let include_tile = tile.segments != 0u || (tile.backdrop == 0) == is_clip || is_blend; if include_tile { let el_slice = el_ix / 32u; let el_mask = 1u << (el_ix & 31u); @@ -324,16 +360,18 @@ fn main( if clip_zero_depth == 0u { let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * tile_y + tile_x; let tile = tiles[tile_ix]; - let linewidth = bitcast(info[di]); - write_path(tile, linewidth); switch drawtag { // DRAWTAG_FILL_COLOR case 0x44u: { + let linewidth = bitcast(info[di]); + write_path(tile, linewidth); let rgba_color = scene[dd]; write_color(CmdColor(rgba_color)); } // DRAWTAG_FILL_LIN_GRADIENT case 0x114u: { + let linewidth = bitcast(info[di]); + write_path(tile, linewidth); var lin: CmdLinGrad; lin.index = scene[dd]; lin.line_x = bitcast(info[di + 1u]); @@ -343,6 +381,8 @@ fn main( } // DRAWTAG_FILL_RAD_GRADIENT case 0x2dcu: { + let linewidth = bitcast(info[di]); + write_path(tile, linewidth); var rad: CmdRadGrad; rad.index = scene[dd]; let m0 = bitcast(info[di + 1u]); @@ -356,6 +396,40 @@ fn main( rad.roff = bitcast(info[di + 10u]); write_rad_grad(rad); } + // DRAWTAG_BEGIN_CLIP + case 0x05u: { + if tile.segments == 0u && tile.backdrop == 0 { + clip_zero_depth = clip_depth + 1u; + } else { + write_begin_clip(); + render_blend_depth += 1u; + max_blend_depth = max(max_blend_depth, render_blend_depth); + } + clip_depth += 1u; + } + // DRAWTAG_END_CLIP + case 0x25u: { + clip_depth -= 1u; + write_path(tile, -1.0); + write_end_clip(scene[dd]); + render_blend_depth -= 1u; + } + default: {} + } + } else { + // In "clip zero" state, suppress all drawing + switch drawtag { + // DRAWTAG_BEGIN_CLIP + case 0x05u: { + clip_depth += 1u; + } + // DRAWTAG_END_CLIP + case 0x25u: { + if clip_depth == clip_zero_depth { + clip_zero_depth = 0u; + } + clip_depth -= 1u; + } default: {} } } diff --git a/piet-wgsl/shader/draw_leaf.wgsl b/piet-wgsl/shader/draw_leaf.wgsl index 5909fdd..f5140a4 100644 --- a/piet-wgsl/shader/draw_leaf.wgsl +++ b/piet-wgsl/shader/draw_leaf.wgsl @@ -17,6 +17,7 @@ // Finish prefix sum of drawtags, decode draw objects. #import config +#import clip #import drawtag #import bbox @@ -38,6 +39,9 @@ var draw_monoid: array; @group(0) @binding(5) var info: array; +@group(0) @binding(6) +var clip_inp: array; + let WG_SIZE = 256u; // Possibly dedup? @@ -183,4 +187,11 @@ fn main( default: {} } } -} \ No newline at end of file + if tag_word == DRAWTAG_BEGIN_CLIP || tag_word == DRAWTAG_END_CLIP { + var path_ix = ~ix; + if tag_word == DRAWTAG_BEGIN_CLIP { + path_ix = m.path_ix; + } + clip_inp[m.clip_ix] = i32(path_ix); + } +} diff --git a/piet-wgsl/shader/fine.wgsl b/piet-wgsl/shader/fine.wgsl index d76edef..5488e01 100644 --- a/piet-wgsl/shader/fine.wgsl +++ b/piet-wgsl/shader/fine.wgsl @@ -40,6 +40,8 @@ var segments: array; var output: array; #ifdef full + +#import blend #import ptcl let GRADIENT_WIDTH = 512; @@ -90,10 +92,6 @@ fn read_rad_grad(cmd_ix: u32) -> CmdRadGrad { return CmdRadGrad(index, matrx, xlat, c1, ra, roff); } -fn mix_blend_compose(backdrop: vec4, src: vec4, mode: u32) -> vec4 { - // TODO: ALL the blend modes. This is just vanilla src-over. - return backdrop * (1.0 - src.a) + src; -} #endif let PIXELS_PER_THREAD = 4u; @@ -233,7 +231,7 @@ fn main( let fg_i = fg_rgba * area[i]; rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i; } - cmd_ix += 12u; + cmd_ix += 5u; } // CMD_RAD_GRAD case 7u: { @@ -278,7 +276,7 @@ fn main( } let bg = unpack4x8unorm(bg_rgba); let fg = rgba[i] * area[i]; - rgba[i] = mix_blend_compose(bg, fg, blend); + rgba[i] = blend_mix_compose(bg, fg, blend); } cmd_ix += 2u; } diff --git a/piet-wgsl/shader/path_coarse_full.wgsl b/piet-wgsl/shader/path_coarse_full.wgsl index fa3609e..d607bac 100644 --- a/piet-wgsl/shader/path_coarse_full.wgsl +++ b/piet-wgsl/shader/path_coarse_full.wgsl @@ -213,7 +213,7 @@ fn main( } for (var y = y0; y < y1; y += 1) { let tile_y0 = f32(y) * f32(TILE_HEIGHT); - let xbackdrop = max(xray + 1, 0); + let xbackdrop = max(xray + 1, bbox.x); if xymin.y < tile_y0 && xbackdrop < bbox.z { let backdrop = select(-1, 1, dp.y < 0.0); let tile_ix = base + xbackdrop; diff --git a/piet-wgsl/shader/shared/blend.wgsl b/piet-wgsl/shader/shared/blend.wgsl new file mode 100644 index 0000000..34cb55b --- /dev/null +++ b/piet-wgsl/shader/shared/blend.wgsl @@ -0,0 +1,351 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + + +// Color mixing modes + +let MIX_NORMAL = 0u; +let MIX_MULTIPLY = 1u; +let MIX_SCREEN = 2u; +let MIX_OVERLAY = 3u; +let MIX_DARKEN = 4u; +let MIX_LIGHTEN = 5u; +let MIX_COLOR_DODGE = 6u; +let MIX_COLOR_BURN = 7u; +let MIX_HARD_LIGHT = 8u; +let MIX_SOFT_LIGHT = 9u; +let MIX_DIFFERENCE = 10u; +let MIX_EXCLUSION = 11u; +let MIX_HUE = 12u; +let MIX_SATURATION = 13u; +let MIX_COLOR = 14u; +let MIX_LUMINOSITY = 15u; +let MIX_CLIP = 128u; + +fn screen(cb: vec3, cs: vec3) -> vec3 { + return cb + cs - (cb * cs); +} + +fn color_dodge(cb: f32, cs: f32) -> f32 { + if cb == 0.0 { + return 0.0; + } else if cs == 1.0 { + return 1.0; + } else { + return min(1.0, cb / (1.0 - cs)); + } +} + +fn color_burn(cb: f32, cs: f32) -> f32 { + if cb == 1.0 { + return 1.0; + } else if cs == 0.0 { + return 0.0; + } else { + return 1.0 - min(1.0, (1.0 - cb) / cs); + } +} + +fn hard_light(cb: vec3, cs: vec3) -> vec3 { + return mix( + screen(cb, 2.0 * cs - 1.0), + cb * 2.0 * cs, + vec3(cs <= vec3(0.5)) + ); +} + +fn soft_light(cb: vec3, cs: vec3) -> vec3 { + let d = mix( + sqrt(cb), + ((16.0 * cb - vec3(12.0)) * cb + vec3(4.0)) * cb, + vec3(cb <= vec3(0.25)) + ); + return mix( + cb + (2.0 * cs - vec3(1.0)) * (d - cb), + cb - (vec3(1.0) - 2.0 * cs) * cb * (vec3(1.0) - cb), + vec3(cs <= vec3(0.5)) + ); +} + +fn sat(c: vec3) -> f32 { + return max(c.x, max(c.y, c.z)) - min(c.x, min(c.y, c.z)); +} + +fn lum(c: vec3) -> f32 { + let f = vec3(0.3, 0.59, 0.11); + return dot(c, f); +} + +fn clip_color(c: vec3) -> vec3 { + var c = c; + let l = lum(c); + let n = min(c.x, min(c.y, c.z)); + let x = max(c.x, max(c.y, c.z)); + if n < 0.0 { + c = l + (((c - l) * l) / (l - n)); + } + if x > 1.0 { + c = l + (((c - l) * (1.0 - l)) / (x - l)); + } + return c; +} + +fn set_lum(c: vec3, l: f32) -> vec3 { + return clip_color(c + (l - lum(c))); +} + +fn set_sat_inner( + cmin: ptr, + cmid: ptr, + cmax: ptr, + s: f32 +) { + if *cmax > *cmin { + *cmid = ((*cmid - *cmin) * s) / (*cmax - *cmin); + *cmax = s; + } else { + *cmid = 0.0; + *cmax = 0.0; + } + *cmin = 0.0; +} + +fn set_sat(c: vec3, s: f32) -> vec3 { + var r = c.r; + var g = c.g; + var b = c.b; + if r <= g { + if g <= b { + set_sat_inner(&r, &g, &b, s); + } else { + if r <= b { + set_sat_inner(&r, &b, &g, s); + } else { + set_sat_inner(&b, &r, &g, s); + } + } + } else { + if r <= b { + set_sat_inner(&g, &r, &b, s); + } else { + if g <= b { + set_sat_inner(&g, &b, &r, s); + } else { + set_sat_inner(&b, &g, &r, s); + } + } + } + return vec3(r, g, b); +} + +// Blends two RGB colors together. The colors are assumed to be in sRGB +// color space, and this function does not take alpha into account. +fn blend_mix(cb: vec3, cs: vec3, mode: u32) -> vec3 { + var b = vec3(0.0); + switch mode { + // MIX_MULTIPLY + case 1u: { + b = cb * cs; + } + // MIX_SCREEN + case 2u: { + b = screen(cb, cs); + } + // MIX_OVERLAY + case 3u: { + b = hard_light(cs, cb); + } + // MIX_DARKEN + case 4u: { + b = min(cb, cs); + } + // MIX_LIGHTEN + case 5u: { + b = max(cb, cs); + } + // MIX_COLOR_DODGE + case 6u: { + b = vec3(color_dodge(cb.x, cs.x), color_dodge(cb.y, cs.y), color_dodge(cb.z, cs.z)); + } + // MIX_COLOR_BURN + case 7u: { + b = vec3(color_burn(cb.x, cs.x), color_burn(cb.y, cs.y), color_burn(cb.z, cs.z)); + } + // MIX_HARD_LIGHT + case 8u: { + b = hard_light(cb, cs); + } + // MIX_SOFT_LIGHT + case 9u: { + b = soft_light(cb, cs); + } + // MIX_DIFFERENCE + case 10u: { + b = abs(cb - cs); + } + // MIX_EXCLUSION + case 11u: { + b = cb + cs - 2.0 * cb * cs; + } + // MIX_HUE + case 12u: { + b = set_lum(set_sat(cs, sat(cb)), lum(cb)); + } + // MIX_SATURATION + case 13u: { + b = set_lum(set_sat(cb, sat(cs)), lum(cb)); + } + // MIX_COLOR + case 14u: { + b = set_lum(cs, lum(cb)); + } + // MIX_LUMINOSITY + case 15u: { + b = set_lum(cb, lum(cs)); + } + default: { + b = cs; + } + } + return b; +} + +// Composition modes + +let COMPOSE_CLEAR = 0u; +let COMPOSE_COPY = 1u; +let COMPOSE_DEST = 2u; +let COMPOSE_SRC_OVER = 3u; +let COMPOSE_DEST_OVER = 4u; +let COMPOSE_SRC_IN = 5u; +let COMPOSE_DEST_IN = 6u; +let COMPOSE_SRC_OUT = 7u; +let COMPOSE_DEST_OUT = 8u; +let COMPOSE_SRC_ATOP = 9u; +let COMPOSE_DEST_ATOP = 10u; +let COMPOSE_XOR = 11u; +let COMPOSE_PLUS = 12u; +let COMPOSE_PLUS_LIGHTER = 13u; + +// Apply general compositing operation. +// Inputs are separated colors and alpha, output is premultiplied. +fn blend_compose( + cb: vec3, + cs: vec3, + ab: f32, + as_: f32, + mode: u32 +) -> vec4 { + var fa = 0.0; + var fb = 0.0; + switch mode { + // COMPOSE_COPY + case 1u: { + fa = 1.0; + fb = 0.0; + } + // COMPOSE_DEST + case 2u: { + fa = 0.0; + fb = 1.0; + } + // COMPOSE_SRC_OVER + case 3u: { + fa = 1.0; + fb = 1.0 - as_; + } + // COMPOSE_DEST_OVER + case 4u: { + fa = 1.0 - ab; + fb = 1.0; + } + // COMPOSE_SRC_IN + case 5u: { + fa = ab; + fb = 0.0; + } + // COMPOSE_DEST_IN + case 6u: { + fa = 0.0; + fb = as_; + } + // COMPOSE_SRC_OUT + case 7u: { + fa = 1.0 - ab; + fb = 0.0; + } + // COMPOSE_DEST_OUT + case 8u: { + fa = 0.0; + fb = 1.0 - as_; + } + // COMPOSE_SRC_ATOP + case 9u: { + fa = ab; + fb = 1.0 - as_; + } + // COMPOSE_DEST_ATOP + case 10u: { + fa = 1.0 - ab; + fb = as_; + } + // COMPOSE_XOR + case 11u: { + fa = 1.0 - ab; + fb = 1.0 - as_; + } + // COMPOSE_PLUS + case 12u: { + fa = 1.0; + fb = 1.0; + } + // COMPOSE_PLUS_LIGHTER + case 13u: { + return min(vec4(1.0), vec4(as_ * cs + ab * cb, as_ + ab)); + } + default: {} + } + let as_fa = as_ * fa; + let ab_fb = ab * fb; + let co = as_fa * cs + ab_fb * cb; + return vec4(co, as_fa + ab_fb); +} + +// Apply color mixing and composition. Both input and output colors are +// premultiplied RGB. +fn blend_mix_compose(backdrop: vec4, src: vec4, mode: u32) -> vec4 { + let BLEND_DEFAULT = ((MIX_NORMAL << 8u) | COMPOSE_SRC_OVER); + let EPSILON = 1e-15; + if (mode & 0x7fffu) == BLEND_DEFAULT { + // Both normal+src_over blend and clip case + return backdrop * (1.0 - src.a) + src; + } + // Un-premultiply colors for blending + let inv_src_a = 1.0 / (src.a + EPSILON); + var cs = src.rgb * inv_src_a; + let inv_backdrop_a = 1.0 / (backdrop.a + EPSILON); + let cb = backdrop.rgb * inv_backdrop_a; + let mix_mode = mode >> 8u; + let mixed = blend_mix(cb, cs, mix_mode); + cs = mix(cs, mixed, backdrop.a); + let compose_mode = mode & 0xffu; + if compose_mode == COMPOSE_SRC_OVER { + let co = mix(backdrop.rgb, cs, src.a); + return vec4(co, src.a + backdrop.a * (1.0 - src.a)); + } else { + return blend_compose(cb, cs, backdrop.a, src.a, compose_mode); + } +} diff --git a/piet-wgsl/src/debug.rs b/piet-wgsl/src/debug.rs new file mode 100644 index 0000000..964c8ee --- /dev/null +++ b/piet-wgsl/src/debug.rs @@ -0,0 +1,5 @@ +#![allow(dead_code)] + +pub mod clip; +pub mod draw; +pub mod fine; diff --git a/piet-wgsl/src/debug/clip.rs b/piet-wgsl/src/debug/clip.rs new file mode 100644 index 0000000..8b2a0e3 --- /dev/null +++ b/piet-wgsl/src/debug/clip.rs @@ -0,0 +1,13 @@ +use bytemuck::{Pod, Zeroable}; + +#[derive(Copy, Clone, Debug, Zeroable, Pod)] +#[repr(C)] +pub struct ClipEl { + pub parent_ix: u32, + pub pad: [u32; 3], + pub bbox: [f32; 4], +} + +pub fn parse_clip_els(data: &[u8]) -> Vec { + Vec::from(bytemuck::cast_slice(data)) +} diff --git a/piet-wgsl/src/debug/draw.rs b/piet-wgsl/src/debug/draw.rs new file mode 100644 index 0000000..ab56ed5 --- /dev/null +++ b/piet-wgsl/src/debug/draw.rs @@ -0,0 +1,14 @@ +use bytemuck::{Pod, Zeroable}; + +#[derive(Copy, Clone, Debug, Zeroable, Pod)] +#[repr(C)] +pub struct DrawMonoid { + pub path_ix: u32, + pub clip_ix: u32, + pub scene_offset: u32, + pub info_offset: u32, +} + +pub fn parse_draw_monoids(data: &[u8]) -> Vec { + Vec::from(bytemuck::cast_slice(data)) +} diff --git a/piet-wgsl/src/debug/fine.rs b/piet-wgsl/src/debug/fine.rs new file mode 100644 index 0000000..d9f05f0 --- /dev/null +++ b/piet-wgsl/src/debug/fine.rs @@ -0,0 +1,153 @@ +#[derive(Copy, Clone, Debug)] +#[repr(C)] +pub struct Fill { + pub tile: u32, + pub backdrop: i32, +} + +#[derive(Copy, Clone, Debug)] +#[repr(C)] +pub struct Stroke { + pub tile: u32, + pub half_width: f32, +} + +#[derive(Copy, Clone, Debug)] +#[repr(C)] +pub struct Color { + abgr: [u8; 4], +} + +#[derive(Copy, Clone, Debug)] +#[repr(C)] +pub struct LinGrad { + pub index: u32, + pub line_x: f32, + pub line_y: f32, + pub line_c: f32, +} + +#[derive(Copy, Clone, Debug)] +#[repr(C)] +pub struct RadGrad { + pub index: u32, + pub matrix: [f32; 4], + pub xlat: [f32; 2], + pub c1: [f32; 2], + pub ra: f32, + pub roff: f32, +} + +#[derive(Copy, Clone, Debug)] +pub enum Command { + Fill(Fill), + Stroke(Stroke), + Solid, + Color(Color), + LinGrad(LinGrad), + RadGrad(RadGrad), + BeginClip, + EndClip(u32), + End, +} + +const PTCL_INITIAL_ALLOC: usize = 64; + +#[derive(Debug)] +pub struct CommandList { + pub tiles: Vec<(u32, u32, Vec)>, +} + +impl CommandList { + pub fn parse(width: usize, height: usize, ptcl: &[u8]) -> Self { + let mut tiles = vec![]; + let width_tiles = width / 16; + let height_tiles = height / 16; + for y in 0..height_tiles { + for x in 0..width_tiles { + let tile_ix = y * width_tiles + x; + let ix = tile_ix * PTCL_INITIAL_ALLOC; + let commands = parse_commands(ptcl, ix); + if !commands.is_empty() { + tiles.push((x as u32, y as u32, commands)); + } + } + } + Self { tiles } + } +} + +fn parse_commands(ptcl: &[u8], mut ix: usize) -> Vec { + let mut commands = vec![]; + let words: &[u32] = bytemuck::cast_slice(ptcl); + while ix < words.len() { + let tag = words[ix]; + ix += 1; + match tag { + 0 => break, + 1 => { + commands.push(Command::Fill(Fill { + tile: words[ix], + backdrop: words[ix + 1] as i32, + })); + ix += 2; + } + 2 => { + commands.push(Command::Stroke(Stroke { + tile: words[ix], + half_width: bytemuck::cast(words[ix + 1]), + })); + ix += 2; + } + 3 => { + commands.push(Command::Solid); + } + 5 => { + commands.push(Command::Color(Color { + abgr: bytemuck::cast(words[ix]), + })); + ix += 1; + } + 6 => { + commands.push(Command::LinGrad(LinGrad { + index: words[ix], + line_x: bytemuck::cast(words[ix + 1]), + line_y: bytemuck::cast(words[ix + 2]), + line_c: bytemuck::cast(words[ix + 3]), + })); + ix += 4; + } + 7 => { + let matrix = [ + bytemuck::cast(words[ix + 1]), + bytemuck::cast(words[ix + 2]), + bytemuck::cast(words[ix + 3]), + bytemuck::cast(words[ix + 4]), + ]; + let xlat = [bytemuck::cast(words[ix + 5]), bytemuck::cast(words[ix + 6])]; + let c1 = [bytemuck::cast(words[ix + 7]), bytemuck::cast(words[ix + 8])]; + commands.push(Command::RadGrad(RadGrad { + index: words[ix], + matrix, + xlat, + c1, + ra: bytemuck::cast(words[ix + 9]), + roff: bytemuck::cast(words[ix + 10]), + })); + ix += 11; + } + 9 => { + commands.push(Command::BeginClip); + } + 10 => { + commands.push(Command::EndClip(words[ix])); + ix += 1; + } + 11 => { + ix = words[ix] as usize; + } + _ => {} + } + } + commands +} diff --git a/piet-wgsl/src/engine.rs b/piet-wgsl/src/engine.rs index 9a3556c..049fc80 100644 --- a/piet-wgsl/src/engine.rs +++ b/piet-wgsl/src/engine.rs @@ -341,7 +341,10 @@ impl Recording { impl BufProxy { pub fn new(size: u64) -> Self { let id = Id::next(); - BufProxy { id, size } + BufProxy { + id, + size: size.max(16), + } } } @@ -360,6 +363,20 @@ impl ResourceProxy { pub fn new_image(width: u32, height: u32) -> Self { Self::Image(ImageProxy::new(width, height)) } + + pub fn as_buf(&self) -> Option<&BufProxy> { + match self { + Self::Buf(proxy) => Some(&proxy), + _ => None, + } + } + + pub fn as_image(&self) -> Option<&ImageProxy> { + match self { + Self::Image(proxy) => Some(&proxy), + _ => None, + } + } } impl From for ResourceProxy { diff --git a/piet-wgsl/src/main.rs b/piet-wgsl/src/main.rs index 2f1d885..4e10486 100644 --- a/piet-wgsl/src/main.rs +++ b/piet-wgsl/src/main.rs @@ -22,6 +22,7 @@ use engine::Engine; use wgpu::{Device, Limits, Queue}; +mod debug; mod engine; mod pico_svg; mod ramp; diff --git a/piet-wgsl/src/render.rs b/piet-wgsl/src/render.rs index 4550927..ee13694 100644 --- a/piet-wgsl/src/render.rs +++ b/piet-wgsl/src/render.rs @@ -14,6 +14,10 @@ const PATH_BBOX_SIZE: u64 = 24; const CUBIC_SIZE: u64 = 40; const DRAWMONOID_SIZE: u64 = 16; const MAX_DRAWINFO_SIZE: u64 = 44; +const CLIP_BIC_SIZE: u64 = 8; +const CLIP_EL_SIZE: u64 = 32; +const CLIP_INP_SIZE: u64 = 4; +const CLIP_BBOX_SIZE: u64 = 16; const PATH_SIZE: u64 = 32; const DRAW_BBOX_SIZE: u64 = 16; const BUMP_SIZE: u64 = 16; @@ -187,7 +191,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy let n_path = data.n_path; // TODO: calculate for real when we do rectangles let n_drawobj = n_path; - let n_clip = 0; // TODO: wire up correctly + let n_clip = data.n_clip; let config = Config { width_in_tiles: 64, height_in_tiles: 64, @@ -251,6 +255,7 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy ); let draw_monoid_buf = ResourceProxy::new_buf(n_drawobj as u64 * DRAWMONOID_SIZE); let info_buf = ResourceProxy::new_buf(n_drawobj as u64 * MAX_DRAWINFO_SIZE); + let clip_inp_buf = ResourceProxy::new_buf(data.n_clip as u64 * CLIP_INP_SIZE); recording.dispatch( shaders.draw_leaf, (drawobj_wgs, 1, 1), @@ -261,12 +266,45 @@ pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy path_bbox_buf, draw_monoid_buf, info_buf, + clip_inp_buf, ], ); + let clip_el_buf = ResourceProxy::new_buf(data.n_clip as u64 * CLIP_EL_SIZE); + let clip_bic_buf = + ResourceProxy::new_buf((n_clip / shaders::CLIP_REDUCE_WG) as u64 * CLIP_BIC_SIZE); + let clip_wg_reduce = n_clip.saturating_sub(1) / shaders::CLIP_REDUCE_WG; + if clip_wg_reduce > 0 { + recording.dispatch( + shaders.clip_reduce, + (clip_wg_reduce, 1, 1), + [ + config_buf, + clip_inp_buf, + path_bbox_buf, + clip_bic_buf, + clip_el_buf, + ], + ); + } + let clip_wg = (n_clip + shaders::CLIP_REDUCE_WG - 1) / shaders::CLIP_REDUCE_WG; + let clip_bbox_buf = ResourceProxy::new_buf(n_clip as u64 * CLIP_BBOX_SIZE); + if clip_wg > 0 { + recording.dispatch( + shaders.clip_leaf, + (clip_wg, 1, 1), + [ + config_buf, + clip_inp_buf, + path_bbox_buf, + clip_bic_buf, + clip_el_buf, + draw_monoid_buf, + clip_bbox_buf, + ], + ); + } let draw_bbox_buf = ResourceProxy::new_buf(n_path as u64 * DRAW_BBOX_SIZE); let bump_buf = BufProxy::new(BUMP_SIZE); - // Not actually used yet. - let clip_bbox_buf = ResourceProxy::new_buf(1024); let bin_data_buf = ResourceProxy::new_buf(1 << 20); let width_in_bins = (config.width_in_tiles + 15) / 16; let height_in_bins = (config.height_in_tiles + 15) / 16; diff --git a/piet-wgsl/src/shaders.rs b/piet-wgsl/src/shaders.rs index 8e6e89f..0e61710 100644 --- a/piet-wgsl/src/shaders.rs +++ b/piet-wgsl/src/shaders.rs @@ -28,6 +28,7 @@ pub const PATHTAG_REDUCE_WG: u32 = 256; pub const PATH_BBOX_WG: u32 = 256; pub const PATH_COARSE_WG: u32 = 256; pub const PATH_DRAWOBJ_WG: u32 = 256; +pub const CLIP_REDUCE_WG: u32 = 256; pub struct Shaders { pub pathtag_reduce: ShaderId, @@ -45,6 +46,8 @@ pub struct FullShaders { pub pathseg: ShaderId, pub draw_reduce: ShaderId, pub draw_leaf: ShaderId, + pub clip_reduce: ShaderId, + pub clip_leaf: ShaderId, pub binning: ShaderId, pub tile_alloc: ShaderId, pub path_coarse: ShaderId, @@ -178,6 +181,31 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result Result Scene { builder.stroke(&style, transform, &brush, None, &path); } 1 => { - let path = [ - PathElement::MoveTo(Point::new(100.0, 100.0)), - PathElement::LineTo(Point::new(300.0, 100.0)), - PathElement::LineTo(Point::new(300.0, 300.0)), - PathElement::LineTo(Point::new(100.0, 300.0)), - PathElement::Close, - ]; - let gradient = Brush::LinearGradient(LinearGradient { - start: Point::new(100.0, 100.0), - end: Point::new(300.0, 300.0), - extend: piet_scene::ExtendMode::Pad, - stops: vec![ - GradientStop { - offset: 0.0, - color: Color::rgb8(255, 0, 0), - }, - GradientStop { - offset: 0.5, - color: Color::rgb8(0, 255, 0), - }, - GradientStop { - offset: 1.0, - color: Color::rgb8(0, 0, 255), - }, - ] - .into(), - }); - builder.fill( - Fill::NonZero, - Affine::scale(3.0, 3.0), - &gradient, - None, - &path, - ); + render_blend_grid(&mut builder); } _ => { let xml_str = @@ -90,6 +57,7 @@ pub fn gen_test_scene() -> Scene { render_svg(&mut builder, &svg, false); } } + builder.finish(); scene } @@ -151,3 +119,163 @@ fn simple_stroke(width: f32) -> Stroke<[f32; 0]> { scale: true, } } + +#[allow(unused)] +pub fn render_blend_grid(sb: &mut SceneBuilder) { + const BLEND_MODES: &[Mix] = &[ + Mix::Normal, + Mix::Multiply, + Mix::Darken, + Mix::Screen, + Mix::Lighten, + Mix::Overlay, + Mix::ColorDodge, + Mix::ColorBurn, + Mix::HardLight, + Mix::SoftLight, + Mix::Difference, + Mix::Exclusion, + Mix::Hue, + Mix::Saturation, + Mix::Color, + Mix::Luminosity, + ]; + for (ix, &blend) in BLEND_MODES.iter().enumerate() { + let i = ix % 4; + let j = ix / 4; + let transform = Affine::translate(i as f32 * 225., j as f32 * 225.); + let square = blend_square(blend.into()); + sb.append(&square, Some(transform)); + } +} + +#[allow(unused)] +fn render_blend_square(sb: &mut SceneBuilder, blend: BlendMode, transform: Affine) { + // Inspired by https://developer.mozilla.org/en-US/docs/Web/CSS/mix-blend-mode + let rect = Rect::from_origin_size(Point::new(0., 0.), 200., 200.); + let stops = &[ + GradientStop { + color: Color::rgb8(0, 0, 0), + offset: 0.0, + }, + GradientStop { + color: Color::rgb8(255, 255, 255), + offset: 1.0, + }, + ][..]; + let linear = Brush::LinearGradient(LinearGradient { + start: Point::new(0.0, 0.0), + end: Point::new(200.0, 0.0), + stops: stops.into(), + extend: ExtendMode::Pad, + }); + sb.fill(Fill::NonZero, transform, &linear, None, rect.elements()); + const GRADIENTS: &[(f32, f32, Color)] = &[ + (150., 0., Color::rgb8(255, 240, 64)), + (175., 100., Color::rgb8(255, 96, 240)), + (125., 200., Color::rgb8(64, 192, 255)), + ]; + for (x, y, c) in GRADIENTS { + let mut color2 = c.clone(); + color2.a = 0; + let stops = &[ + GradientStop { + color: c.clone(), + offset: 0.0, + }, + GradientStop { + color: color2, + offset: 1.0, + }, + ][..]; + let rad = Brush::RadialGradient(RadialGradient { + center0: Point::new(*x, *y), + center1: Point::new(*x, *y), + radius0: 0.0, + radius1: 100.0, + stops: stops.into(), + extend: ExtendMode::Pad, + }); + sb.fill(Fill::NonZero, transform, &rad, None, rect.elements()); + } + const COLORS: &[Color] = &[ + Color::rgb8(255, 0, 0), + Color::rgb8(0, 255, 0), + Color::rgb8(0, 0, 255), + ]; + sb.push_layer(Mix::Normal.into(), transform, rect.elements()); + for (i, c) in COLORS.iter().enumerate() { + let stops = &[ + GradientStop { + color: Color::rgb8(255, 255, 255), + offset: 0.0, + }, + GradientStop { + color: c.clone(), + offset: 1.0, + }, + ][..]; + let linear = Brush::LinearGradient(LinearGradient { + start: Point::new(0.0, 0.0), + end: Point::new(0.0, 200.0), + stops: stops.into(), + extend: ExtendMode::Pad, + }); + sb.push_layer(blend, transform, rect.elements()); + // squash the ellipse + let a = transform + * Affine::translate(100., 100.) + * Affine::rotate(std::f32::consts::FRAC_PI_3 * (i * 2 + 1) as f32) + * Affine::scale(1.0, 0.357) + * Affine::translate(-100., -100.); + sb.fill( + Fill::NonZero, + a, + &linear, + None, + make_ellipse(100., 100., 90., 90.), + ); + sb.pop_layer(); + } + sb.pop_layer(); +} + +#[allow(unused)] +fn blend_square(blend: BlendMode) -> SceneFragment { + let mut fragment = SceneFragment::default(); + let mut sb = SceneBuilder::for_fragment(&mut fragment); + render_blend_square(&mut sb, blend, Affine::IDENTITY); + sb.finish(); + fragment +} + +fn make_ellipse(cx: f32, cy: f32, rx: f32, ry: f32) -> impl Iterator + Clone { + let a = 0.551915024494; + let arx = a * rx; + let ary = a * ry; + let elements = [ + PathElement::MoveTo(Point::new(cx + rx, cy)), + PathElement::CurveTo( + Point::new(cx + rx, cy + ary), + Point::new(cx + arx, cy + ry), + Point::new(cx, cy + ry), + ), + PathElement::CurveTo( + Point::new(cx - arx, cy + ry), + Point::new(cx - rx, cy + ary), + Point::new(cx - rx, cy), + ), + PathElement::CurveTo( + Point::new(cx - rx, cy - ary), + Point::new(cx - arx, cy - ry), + Point::new(cx, cy - ry), + ), + PathElement::CurveTo( + Point::new(cx + arx, cy - ry), + Point::new(cx + rx, cy - ary), + Point::new(cx + rx, cy), + ), + PathElement::Close, + ]; + (0..elements.len()).map(move |i| elements[i]) +}