mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Merge pull request #203 from linebender/clip_checkpoint
Extend pipeline
This commit is contained in:
commit
de998220db
12
.vscode/settings.json
vendored
12
.vscode/settings.json
vendored
|
@ -1,8 +1,16 @@
|
||||||
{
|
{
|
||||||
"wgsl-analyzer.customImports": {
|
"wgsl-analyzer.customImports": {
|
||||||
|
"bbox": "${workspaceFolder}/piet-wgsl/shader/shared/bbox.wgsl",
|
||||||
|
"bump": "${workspaceFolder}/piet-wgsl/shader/shared/bump.wgsl",
|
||||||
|
"clip": "${workspaceFolder}/piet-wgsl/shader/shared/clip.wgsl",
|
||||||
"config": "${workspaceFolder}/piet-wgsl/shader/shared/config.wgsl",
|
"config": "${workspaceFolder}/piet-wgsl/shader/shared/config.wgsl",
|
||||||
|
"cubic": "${workspaceFolder}/piet-wgsl/shader/shared/cubic.wgsl",
|
||||||
|
"drawtag": "${workspaceFolder}/piet-wgsl/shader/shared/drawtag.wgsl",
|
||||||
"segment": "${workspaceFolder}/piet-wgsl/shader/shared/segment.wgsl",
|
"segment": "${workspaceFolder}/piet-wgsl/shader/shared/segment.wgsl",
|
||||||
"pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl"
|
"pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl",
|
||||||
|
"ptcl": "${workspaceFolder}/piet-wgsl/shader/shared/ptcl.wgsl",
|
||||||
|
"tile": "${workspaceFolder}/piet-wgsl/shader/shared/tile.wgsl"
|
||||||
},
|
},
|
||||||
"wgsl-analyzer.diagnostics.nagaVersion": "main"
|
"wgsl-analyzer.diagnostics.nagaVersion": "main",
|
||||||
|
"wgsl-analyzer.preprocessor.shaderDefs": ["full"]
|
||||||
}
|
}
|
||||||
|
|
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -1174,10 +1174,12 @@ dependencies = [
|
||||||
"bytemuck",
|
"bytemuck",
|
||||||
"env_logger",
|
"env_logger",
|
||||||
"futures-intrusive",
|
"futures-intrusive",
|
||||||
|
"kurbo 0.8.3",
|
||||||
"parking_lot",
|
"parking_lot",
|
||||||
"piet-scene",
|
"piet-scene",
|
||||||
"png",
|
"png",
|
||||||
"pollster",
|
"pollster",
|
||||||
|
"roxmltree",
|
||||||
"wgpu",
|
"wgpu",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -14,4 +14,8 @@ parking_lot = "0.12"
|
||||||
bytemuck = { version = "1.12.1", features = ["derive"] }
|
bytemuck = { version = "1.12.1", features = ["derive"] }
|
||||||
png = "0.17.6"
|
png = "0.17.6"
|
||||||
|
|
||||||
piet-scene = { path = "../piet-scene" }
|
piet-scene = { path = "../piet-scene", features = ["kurbo"] }
|
||||||
|
|
||||||
|
# for picosvg, should be split out
|
||||||
|
roxmltree = "0.13"
|
||||||
|
kurbo = "0.8.3"
|
||||||
|
|
84
piet-wgsl/shader/backdrop_dyn.wgsl
Normal file
84
piet-wgsl/shader/backdrop_dyn.wgsl
Normal file
|
@ -0,0 +1,84 @@
|
||||||
|
// Copyright 2022 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
// Prefix sum for dynamically allocated backdrops
|
||||||
|
|
||||||
|
#import config
|
||||||
|
#import tile
|
||||||
|
|
||||||
|
@group(0) @binding(0)
|
||||||
|
var<storage> config: Config;
|
||||||
|
|
||||||
|
@group(0) @binding(1)
|
||||||
|
var<storage> paths: array<Path>;
|
||||||
|
|
||||||
|
@group(0) @binding(2)
|
||||||
|
var<storage, read_write> tiles: array<Tile>;
|
||||||
|
|
||||||
|
let WG_SIZE = 256u;
|
||||||
|
|
||||||
|
var<workgroup> sh_row_width: array<u32, WG_SIZE>;
|
||||||
|
var<workgroup> sh_row_count: array<u32, WG_SIZE>;
|
||||||
|
var<workgroup> sh_offset: array<u32, WG_SIZE>;
|
||||||
|
|
||||||
|
@compute @workgroup_size(256)
|
||||||
|
fn main(
|
||||||
|
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||||
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||||
|
) {
|
||||||
|
let drawobj_ix = global_id.x;
|
||||||
|
var row_count = 0u;
|
||||||
|
if drawobj_ix < config.n_drawobj {
|
||||||
|
// TODO: when rectangles, path and draw obj are not the same
|
||||||
|
let path = paths[drawobj_ix];
|
||||||
|
sh_row_width[local_id.x] = path.bbox.z - path.bbox.x;
|
||||||
|
row_count = path.bbox.w - path.bbox.y;
|
||||||
|
sh_offset[local_id.x] = path.tiles;
|
||||||
|
}
|
||||||
|
sh_row_count[local_id.x] = row_count;
|
||||||
|
|
||||||
|
// Prefix sum of row counts
|
||||||
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||||
|
workgroupBarrier();
|
||||||
|
if local_id.x >= (1u << i) {
|
||||||
|
row_count += sh_row_count[local_id.x - (1u << i)];
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
sh_row_count[local_id.x] = row_count;
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
let total_rows = sh_row_count[WG_SIZE - 1u];
|
||||||
|
for (var row = local_id.x; row < total_rows; row += WG_SIZE) {
|
||||||
|
var el_ix = 0u;
|
||||||
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||||
|
let probe = el_ix + ((WG_SIZE / 2u) >> i);
|
||||||
|
if row >= sh_row_count[probe - 1u] {
|
||||||
|
el_ix = probe;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let width = sh_row_width[el_ix];
|
||||||
|
if width > 0u {
|
||||||
|
var seq_ix = row - select(0u, sh_row_count[el_ix - 1u], el_ix > 0u);
|
||||||
|
var tile_ix = sh_offset[el_ix] + seq_ix * width;
|
||||||
|
var sum = tiles[tile_ix].backdrop;
|
||||||
|
for (var x = 1u; x < width; x += 1u) {
|
||||||
|
tile_ix += 1u;
|
||||||
|
sum += tiles[tile_ix].backdrop;
|
||||||
|
tiles[tile_ix].backdrop = sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
45
piet-wgsl/shader/bbox_clear.wgsl
Normal file
45
piet-wgsl/shader/bbox_clear.wgsl
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
// Copyright 2022 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
#import config
|
||||||
|
|
||||||
|
@group(0) @binding(0)
|
||||||
|
var<storage> config: Config;
|
||||||
|
|
||||||
|
struct PathBbox {
|
||||||
|
x0: i32,
|
||||||
|
y0: i32,
|
||||||
|
x1: i32,
|
||||||
|
y1: i32,
|
||||||
|
linewidth: f32,
|
||||||
|
trans_ix: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
@group(0) @binding(1)
|
||||||
|
var<storage, read_write> path_bboxes: array<PathBbox>;
|
||||||
|
|
||||||
|
@compute @workgroup_size(256)
|
||||||
|
fn main(
|
||||||
|
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||||
|
) {
|
||||||
|
let ix = global_id.x;
|
||||||
|
if ix < config.n_path {
|
||||||
|
path_bboxes[ix].x0 = 0x7fffffff;
|
||||||
|
path_bboxes[ix].y0 = 0x7fffffff;
|
||||||
|
path_bboxes[ix].x1 = -0x80000000;
|
||||||
|
path_bboxes[ix].y1 = -0x80000000;
|
||||||
|
}
|
||||||
|
}
|
164
piet-wgsl/shader/binning.wgsl
Normal file
164
piet-wgsl/shader/binning.wgsl
Normal file
|
@ -0,0 +1,164 @@
|
||||||
|
// Copyright 2022 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
// The binning stage
|
||||||
|
|
||||||
|
#import config
|
||||||
|
#import drawtag
|
||||||
|
#import bbox
|
||||||
|
#import bump
|
||||||
|
|
||||||
|
@group(0) @binding(0)
|
||||||
|
var<storage> config: Config;
|
||||||
|
|
||||||
|
@group(0) @binding(1)
|
||||||
|
var<storage> draw_monoids: array<DrawMonoid>;
|
||||||
|
|
||||||
|
@group(0) @binding(2)
|
||||||
|
var<storage> path_bbox_buf: array<PathBbox>;
|
||||||
|
|
||||||
|
@group(0) @binding(3)
|
||||||
|
var<storage> clip_bbox_buf: array<vec4<f32>>;
|
||||||
|
|
||||||
|
@group(0) @binding(4)
|
||||||
|
var<storage, read_write> intersected_bbox: array<vec4<f32>>;
|
||||||
|
|
||||||
|
@group(0) @binding(5)
|
||||||
|
var<storage, read_write> bump: BumpAllocators;
|
||||||
|
|
||||||
|
@group(0) @binding(6)
|
||||||
|
var<storage, read_write> bin_data: array<u32>;
|
||||||
|
|
||||||
|
// TODO: put in common place
|
||||||
|
struct BinHeader {
|
||||||
|
element_count: u32,
|
||||||
|
chunk_offset: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
@group(0) @binding(7)
|
||||||
|
var<storage, read_write> bin_header: array<BinHeader>;
|
||||||
|
|
||||||
|
// conversion factors from coordinates to bin
|
||||||
|
let SX = 0.00390625;
|
||||||
|
let SY = 0.00390625;
|
||||||
|
//let SX = 1.0 / f32(N_TILE_X * TILE_WIDTH);
|
||||||
|
//let SY = 1.0 / f32(N_TILE_Y * TILE_HEIGHT);
|
||||||
|
|
||||||
|
let WG_SIZE = 256u;
|
||||||
|
let N_SLICE = 8u;
|
||||||
|
//let N_SLICE = WG_SIZE / 32u;
|
||||||
|
|
||||||
|
var<workgroup> sh_bitmaps: array<array<atomic<u32>, N_TILE>, N_SLICE>;
|
||||||
|
var<workgroup> sh_count: array<array<u32, N_TILE>, N_SLICE>;
|
||||||
|
var<workgroup> sh_chunk_offset: array<u32, N_TILE>;
|
||||||
|
|
||||||
|
@compute @workgroup_size(256)
|
||||||
|
fn main(
|
||||||
|
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||||
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||||
|
@builtin(workgroup_id) wg_id: vec3<u32>,
|
||||||
|
) {
|
||||||
|
for (var i = 0u; i < N_SLICE; i += 1u) {
|
||||||
|
atomicStore(&sh_bitmaps[i][local_id.x], 0u);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read inputs and determine coverage of bins
|
||||||
|
let element_ix = global_id.x;
|
||||||
|
var x0 = 0;
|
||||||
|
var y0 = 0;
|
||||||
|
var x1 = 0;
|
||||||
|
var y1 = 0;
|
||||||
|
if element_ix < config.n_drawobj {
|
||||||
|
let draw_monoid = draw_monoids[element_ix];
|
||||||
|
var clip_bbox = vec4<f32>(-1e9, -1e9, 1e9, 1e9);
|
||||||
|
if draw_monoid.clip_ix > 0u {
|
||||||
|
clip_bbox = clip_bbox_buf[draw_monoid.clip_ix - 1u];
|
||||||
|
}
|
||||||
|
// For clip elements, clip_box is the bbox of the clip path,
|
||||||
|
// intersected with enclosing clips.
|
||||||
|
// For other elements, it is the bbox of the enclosing clips.
|
||||||
|
// TODO check this is true
|
||||||
|
|
||||||
|
let path_bbox = path_bbox_buf[draw_monoid.path_ix];
|
||||||
|
let pb = vec4<f32>(vec4<i32>(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1));
|
||||||
|
let bbox_raw = bbox_intersect(clip_bbox, pb);
|
||||||
|
// TODO(naga): clunky expression a workaround for broken lhs swizzle
|
||||||
|
let bbox = vec4<f32>(bbox_raw.xy, max(bbox_raw.xy, bbox_raw.zw));
|
||||||
|
|
||||||
|
intersected_bbox[element_ix] = bbox;
|
||||||
|
x0 = i32(floor(bbox.x * SX));
|
||||||
|
y0 = i32(floor(bbox.y * SY));
|
||||||
|
x1 = i32(ceil(bbox.z * SX));
|
||||||
|
y1 = i32(ceil(bbox.w * SY));
|
||||||
|
}
|
||||||
|
let width_in_bins = i32((config.width_in_tiles + N_TILE_X - 1u) / N_TILE_X);
|
||||||
|
let height_in_bins = i32((config.height_in_tiles + N_TILE_Y - 1u) / N_TILE_Y);
|
||||||
|
x0 = clamp(x0, 0, width_in_bins);
|
||||||
|
y0 = clamp(y0, 0, height_in_bins);
|
||||||
|
x1 = clamp(x1, 0, width_in_bins);
|
||||||
|
y1 = clamp(y1, 0, height_in_bins);
|
||||||
|
if x0 == x1 {
|
||||||
|
y1 = y0;
|
||||||
|
}
|
||||||
|
var x = x0;
|
||||||
|
var y = y0;
|
||||||
|
let my_slice = local_id.x / 32u;
|
||||||
|
let my_mask = 1u << (local_id.x & 31u);
|
||||||
|
while y < y1 {
|
||||||
|
atomicOr(&sh_bitmaps[my_slice][y * width_in_bins + x], my_mask);
|
||||||
|
x += 1;
|
||||||
|
if x == x1 {
|
||||||
|
x = x0;
|
||||||
|
y += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
workgroupBarrier();
|
||||||
|
// Allocate output segments
|
||||||
|
var element_count = 0u;
|
||||||
|
for (var i = 0u; i < N_SLICE; i += 1u) {
|
||||||
|
element_count += countOneBits(atomicLoad(&sh_bitmaps[i][local_id.x]));
|
||||||
|
sh_count[i][local_id.x] = element_count;
|
||||||
|
}
|
||||||
|
// element_count is the number of draw objects covering this thread's bin
|
||||||
|
let chunk_offset = atomicAdd(&bump.binning, element_count);
|
||||||
|
sh_chunk_offset[local_id.x] = chunk_offset;
|
||||||
|
bin_header[global_id.x].element_count = element_count;
|
||||||
|
bin_header[global_id.x].chunk_offset = chunk_offset;
|
||||||
|
workgroupBarrier();
|
||||||
|
|
||||||
|
// loop over bbox of bins touched by this draw object
|
||||||
|
x = x0;
|
||||||
|
y = y0;
|
||||||
|
while y < y1 {
|
||||||
|
let bin_ix = y * width_in_bins + x;
|
||||||
|
let out_mask = atomicLoad(&sh_bitmaps[my_slice][bin_ix]);
|
||||||
|
// I think this predicate will always be true...
|
||||||
|
if (out_mask & my_mask) != 0u {
|
||||||
|
var idx = countOneBits(out_mask & (my_mask - 1u));
|
||||||
|
if my_slice > 0u {
|
||||||
|
idx += sh_count[my_slice - 1u][bin_ix];
|
||||||
|
}
|
||||||
|
let offset = sh_chunk_offset[bin_ix];
|
||||||
|
bin_data[offset + idx] = element_ix;
|
||||||
|
}
|
||||||
|
x += 1;
|
||||||
|
if x == x1 {
|
||||||
|
x = x0;
|
||||||
|
y += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
199
piet-wgsl/shader/clip_leaf.wgsl
Normal file
199
piet-wgsl/shader/clip_leaf.wgsl
Normal file
|
@ -0,0 +1,199 @@
|
||||||
|
|
||||||
|
#import config
|
||||||
|
#import bbox
|
||||||
|
#import clip
|
||||||
|
#import drawtag
|
||||||
|
|
||||||
|
@group(0) @binding(0)
|
||||||
|
var<storage> config: Config;
|
||||||
|
|
||||||
|
@group(0) @binding(1)
|
||||||
|
var<storage> clip_inp: array<i32>;
|
||||||
|
|
||||||
|
@group(0) @binding(2)
|
||||||
|
var<storage> path_bboxes: array<PathBbox>;
|
||||||
|
|
||||||
|
@group(0) @binding(3)
|
||||||
|
var<storage> reduced: array<Bic>;
|
||||||
|
|
||||||
|
@group(0) @binding(4)
|
||||||
|
var<storage> clip_els: array<ClipEl>;
|
||||||
|
|
||||||
|
@group(0) @binding(5)
|
||||||
|
var<storage, read_write> draw_monoids: array<DrawMonoid>;
|
||||||
|
|
||||||
|
@group(0) @binding(6)
|
||||||
|
var<storage, read_write> clip_bboxes: array<vec4<f32>>;
|
||||||
|
|
||||||
|
let WG_SIZE = 256u;
|
||||||
|
var<workgroup> sh_bic: array<Bic, 510 >;
|
||||||
|
var<workgroup> sh_stack: array<u32, WG_SIZE>;
|
||||||
|
var<workgroup> sh_stack_bbox: array<vec4<f32>, WG_SIZE>;
|
||||||
|
var<workgroup> sh_bbox: array<vec4<f32>, WG_SIZE>;
|
||||||
|
var<workgroup> sh_link: array<i32, WG_SIZE>;
|
||||||
|
|
||||||
|
fn search_link(bic: ptr<function, Bic>, ix: u32) -> i32 {
|
||||||
|
var ix = ix;
|
||||||
|
var j = 0u;
|
||||||
|
while j < firstTrailingBit(WG_SIZE) {
|
||||||
|
let base = 2u * WG_SIZE - (2u << (firstTrailingBit(WG_SIZE) - j));
|
||||||
|
if ((ix >> j) & 1u) != 0u {
|
||||||
|
let test = bic_combine(sh_bic[base + (ix >> j) - 1u], *bic);
|
||||||
|
if test.b > 0u {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
*bic = test;
|
||||||
|
ix -= 1u << j;
|
||||||
|
}
|
||||||
|
j += 1u;
|
||||||
|
}
|
||||||
|
if ix > 0u {
|
||||||
|
while j > 0u {
|
||||||
|
j -= 1u;
|
||||||
|
let base = 2u * WG_SIZE - (2u << (firstTrailingBit(WG_SIZE) - j));
|
||||||
|
let test = bic_combine(sh_bic[base + (ix >> j) - 1u], *bic);
|
||||||
|
if test.b == 0u {
|
||||||
|
*bic = test;
|
||||||
|
ix -= 1u << j;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ix > 0u {
|
||||||
|
return i32(ix) - 1;
|
||||||
|
} else {
|
||||||
|
return i32(~0u - (*bic).a);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn load_clip_inp(ix: u32) -> i32 {
|
||||||
|
if ix < config.n_clip {
|
||||||
|
return clip_inp[ix];
|
||||||
|
} else {
|
||||||
|
return -2147483648;
|
||||||
|
// literal too large?
|
||||||
|
// return 0x80000000;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@compute @workgroup_size(256)
|
||||||
|
fn main(
|
||||||
|
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||||
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||||
|
@builtin(workgroup_id) wg_id: vec3<u32>,
|
||||||
|
) {
|
||||||
|
var bic: Bic;
|
||||||
|
if local_id.x < wg_id.x {
|
||||||
|
bic = reduced[local_id.x];
|
||||||
|
}
|
||||||
|
sh_bic[local_id.x] = bic;
|
||||||
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||||
|
workgroupBarrier();
|
||||||
|
if local_id.x + (1u << i) < WG_SIZE {
|
||||||
|
let other = sh_bic[local_id.x + (1u << i)];
|
||||||
|
bic = bic_combine(bic, other);
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
sh_bic[local_id.x] = bic;
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
let stack_size = sh_bic[0].b;
|
||||||
|
// TODO: if stack depth > WG_SIZE desired, scan here
|
||||||
|
|
||||||
|
// binary search in stack
|
||||||
|
let sp = WG_SIZE - 1u - local_id.x;
|
||||||
|
var ix = 0u;
|
||||||
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||||
|
let probe = ix + ((WG_SIZE / 2u) >> i);
|
||||||
|
if sp < sh_bic[probe].b {
|
||||||
|
ix = probe;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let b = sh_bic[ix].b;
|
||||||
|
var bbox = vec4<f32>(-1e9, -1e9, 1e9, 1e9);
|
||||||
|
if sp < b {
|
||||||
|
let el = clip_els[ix * WG_SIZE + b - sp - 1u];
|
||||||
|
sh_stack[local_id.x] = el.parent_ix;
|
||||||
|
bbox = el.bbox;
|
||||||
|
}
|
||||||
|
// forward scan of bbox values of prefix stack
|
||||||
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||||
|
sh_stack_bbox[local_id.x] = bbox;
|
||||||
|
workgroupBarrier();
|
||||||
|
if local_id.x >= (1u << i) {
|
||||||
|
bbox = bbox_intersect(sh_stack_bbox[local_id.x - (1u << i)], bbox);
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
}
|
||||||
|
sh_stack_bbox[local_id.x] = bbox;
|
||||||
|
|
||||||
|
// Read input and compute Bic binary tree
|
||||||
|
let inp = load_clip_inp(global_id.x);
|
||||||
|
let is_push = inp >= 0;
|
||||||
|
var bic = Bic(1u - u32(is_push), u32(is_push));
|
||||||
|
sh_bic[local_id.x] = bic;
|
||||||
|
if is_push {
|
||||||
|
let path_bbox = path_bboxes[inp];
|
||||||
|
bbox = vec4<f32>(f32(path_bbox.x0), f32(path_bbox.y0), f32(path_bbox.x1), f32(path_bbox.y1));
|
||||||
|
} else {
|
||||||
|
bbox = vec4<f32>(-1e9, -1e9, 1e9, 1e9);
|
||||||
|
}
|
||||||
|
var inbase = 0u;
|
||||||
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE) - 1u; i += 1u) {
|
||||||
|
let outbase = 2u * WG_SIZE - (1u << (firstTrailingBit(WG_SIZE) - i));
|
||||||
|
workgroupBarrier();
|
||||||
|
if local_id.x < 1u << (firstTrailingBit(WG_SIZE) - 1u - i) {
|
||||||
|
let in_off = inbase + local_id.x * 2u;
|
||||||
|
sh_bic[outbase + local_id.x] = bic_combine(sh_bic[in_off], sh_bic[in_off + 1u]);
|
||||||
|
}
|
||||||
|
inbase = outbase;
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
// search for predecessor node
|
||||||
|
bic = Bic();
|
||||||
|
var link = search_link(&bic, local_id.x);
|
||||||
|
sh_link[local_id.x] = link;
|
||||||
|
workgroupBarrier();
|
||||||
|
let grandparent = select(link - 1, sh_link[link], link >= 0);
|
||||||
|
var parent: i32;
|
||||||
|
if link >= 0 {
|
||||||
|
parent = i32(wg_id.x * WG_SIZE) + link;
|
||||||
|
} else if link + i32(stack_size) >= 0 {
|
||||||
|
parent = i32(sh_stack[i32(WG_SIZE) + link]);
|
||||||
|
} else {
|
||||||
|
parent = -1;
|
||||||
|
}
|
||||||
|
// bbox scan (intersect) across parent links
|
||||||
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||||
|
if i != 0u {
|
||||||
|
sh_link[local_id.x] = link;
|
||||||
|
}
|
||||||
|
sh_bbox[local_id.x] = bbox;
|
||||||
|
workgroupBarrier();
|
||||||
|
if link >= 0 {
|
||||||
|
bbox = bbox_intersect(sh_bbox[link], bbox);
|
||||||
|
link = sh_link[link];
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
}
|
||||||
|
if link + i32(stack_size) >= 0 {
|
||||||
|
bbox = bbox_intersect(sh_stack_bbox[i32(WG_SIZE) + link], bbox);
|
||||||
|
}
|
||||||
|
// At this point, bbox is the intersection of bboxes on the path to the root
|
||||||
|
sh_bbox[local_id.x] = bbox;
|
||||||
|
workgroupBarrier();
|
||||||
|
|
||||||
|
if !is_push && global_id.x < config.n_clip {
|
||||||
|
// Fix up drawmonoid so path_ix of EndClip matches BeginClip
|
||||||
|
let path_ix = clip_inp[parent];
|
||||||
|
draw_monoids[~inp].path_ix = u32(path_ix);
|
||||||
|
|
||||||
|
if grandparent >= 0 {
|
||||||
|
bbox = sh_bbox[grandparent];
|
||||||
|
} else if grandparent + i32(stack_size) >= 0 {
|
||||||
|
bbox = sh_stack_bbox[i32(WG_SIZE) + grandparent];
|
||||||
|
} else {
|
||||||
|
bbox = vec4<f32>(-1e9, -1e9, 1e9, 1e9);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
clip_bboxes[global_id.x] = bbox;
|
||||||
|
}
|
66
piet-wgsl/shader/clip_reduce.wgsl
Normal file
66
piet-wgsl/shader/clip_reduce.wgsl
Normal file
|
@ -0,0 +1,66 @@
|
||||||
|
|
||||||
|
#import config
|
||||||
|
#import bbox
|
||||||
|
#import clip
|
||||||
|
|
||||||
|
@group(0) @binding(0)
|
||||||
|
var<storage> config: Config;
|
||||||
|
|
||||||
|
@group(0) @binding(1)
|
||||||
|
var<storage> clip_inp: array<i32>;
|
||||||
|
|
||||||
|
@group(0) @binding(2)
|
||||||
|
var<storage> path_bboxes: array<PathBbox>;
|
||||||
|
|
||||||
|
@group(0) @binding(3)
|
||||||
|
var<storage, read_write> reduced: array<Bic>;
|
||||||
|
|
||||||
|
@group(0) @binding(4)
|
||||||
|
var<storage, read_write> clip_out: array<ClipEl>;
|
||||||
|
|
||||||
|
let WG_SIZE = 256u;
|
||||||
|
var<workgroup> sh_bic: array<Bic, WG_SIZE>;
|
||||||
|
var<workgroup> sh_parent: array<u32, WG_SIZE>;
|
||||||
|
var<workgroup> sh_path_ix: array<u32, WG_SIZE>;
|
||||||
|
|
||||||
|
@compute @workgroup_size(256)
|
||||||
|
fn main(
|
||||||
|
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||||
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||||
|
@builtin(workgroup_id) wg_id: vec3<u32>,
|
||||||
|
) {
|
||||||
|
let inp = clip_inp[global_id.x];
|
||||||
|
let is_push = inp >= 0;
|
||||||
|
var bic = Bic(1u - u32(is_push), u32(is_push));
|
||||||
|
// reverse scan of bicyclic semigroup
|
||||||
|
sh_bic[local_id.x] = bic;
|
||||||
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||||
|
workgroupBarrier();
|
||||||
|
if local_id.x + (1u << i) < WG_SIZE {
|
||||||
|
let other = sh_bic[local_id.x + (1u << i)];
|
||||||
|
bic = bic_combine(bic, other);
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
sh_bic[local_id.x] = bic;
|
||||||
|
}
|
||||||
|
if local_id.x == 0u {
|
||||||
|
reduced[wg_id.x] = bic;
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
let size = sh_bic[0].b;
|
||||||
|
bic = Bic();
|
||||||
|
if is_push && bic.a == 0u {
|
||||||
|
let local_ix = size - bic.b - 1u;
|
||||||
|
sh_parent[local_ix] = local_id.x;
|
||||||
|
sh_path_ix[local_ix] = u32(inp);
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
// TODO: possibly do forward scan here if depth can exceed wg size
|
||||||
|
if local_id.x < size {
|
||||||
|
let path_ix = sh_path_ix[local_id.x];
|
||||||
|
let path_bbox = path_bboxes[path_ix];
|
||||||
|
let parent_ix = sh_parent[local_id.x] + wg_id.x * WG_SIZE;
|
||||||
|
let bbox = vec4<f32>(f32(path_bbox.x0), f32(path_bbox.y0), f32(path_bbox.x1), f32(path_bbox.y1));
|
||||||
|
clip_out[global_id.x] = ClipEl(parent_ix, bbox);
|
||||||
|
}
|
||||||
|
}
|
448
piet-wgsl/shader/coarse.wgsl
Normal file
448
piet-wgsl/shader/coarse.wgsl
Normal file
|
@ -0,0 +1,448 @@
|
||||||
|
// Copyright 2022 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
// The coarse rasterization stage.
|
||||||
|
|
||||||
|
#import config
|
||||||
|
#import bump
|
||||||
|
#import drawtag
|
||||||
|
#import ptcl
|
||||||
|
#import tile
|
||||||
|
|
||||||
|
@group(0) @binding(0)
|
||||||
|
var<storage> config: Config;
|
||||||
|
|
||||||
|
@group(0) @binding(1)
|
||||||
|
var<storage> scene: array<u32>;
|
||||||
|
|
||||||
|
@group(0) @binding(2)
|
||||||
|
var<storage> draw_monoids: array<DrawMonoid>;
|
||||||
|
|
||||||
|
// TODO: dedup
|
||||||
|
struct BinHeader {
|
||||||
|
element_count: u32,
|
||||||
|
chunk_offset: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
@group(0) @binding(3)
|
||||||
|
var<storage> bin_headers: array<BinHeader>;
|
||||||
|
|
||||||
|
@group(0) @binding(4)
|
||||||
|
var<storage> bin_data: array<u32>;
|
||||||
|
|
||||||
|
@group(0) @binding(5)
|
||||||
|
var<storage> paths: array<Path>;
|
||||||
|
|
||||||
|
@group(0) @binding(6)
|
||||||
|
var<storage> tiles: array<Tile>;
|
||||||
|
|
||||||
|
@group(0) @binding(7)
|
||||||
|
var<storage> info: array<u32>;
|
||||||
|
|
||||||
|
@group(0) @binding(8)
|
||||||
|
var<storage, read_write> bump: BumpAllocators;
|
||||||
|
|
||||||
|
@group(0) @binding(9)
|
||||||
|
var<storage, read_write> ptcl: array<u32>;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Much of this code assumes WG_SIZE == N_TILE. If these diverge, then
|
||||||
|
// a fair amount of fixup is needed.
|
||||||
|
let WG_SIZE = 256u;
|
||||||
|
//let N_SLICE = WG_SIZE / 32u;
|
||||||
|
let N_SLICE = 8u;
|
||||||
|
|
||||||
|
var<workgroup> sh_bitmaps: array<array<atomic<u32>, N_TILE>, N_SLICE>;
|
||||||
|
var<workgroup> sh_part_count: array<u32, WG_SIZE>;
|
||||||
|
var<workgroup> sh_part_offsets: array<u32, WG_SIZE>;
|
||||||
|
var<workgroup> sh_drawobj_ix: array<u32, WG_SIZE>;
|
||||||
|
var<workgroup> sh_tile_stride: array<u32, WG_SIZE>;
|
||||||
|
var<workgroup> sh_tile_width: array<u32, WG_SIZE>;
|
||||||
|
var<workgroup> sh_tile_x0: array<u32, WG_SIZE>;
|
||||||
|
var<workgroup> sh_tile_y0: array<u32, WG_SIZE>;
|
||||||
|
var<workgroup> sh_tile_count: array<u32, WG_SIZE>;
|
||||||
|
var<workgroup> sh_tile_base: array<u32, WG_SIZE>;
|
||||||
|
|
||||||
|
// helper functions for writing ptcl
|
||||||
|
|
||||||
|
var<private> cmd_offset: u32;
|
||||||
|
var<private> cmd_limit: u32;
|
||||||
|
|
||||||
|
// Make sure there is space for a command of given size, plus a jump if needed
|
||||||
|
fn alloc_cmd(size: u32) {
|
||||||
|
if cmd_offset + size >= cmd_limit {
|
||||||
|
// We might be able to save a little bit of computation here
|
||||||
|
// by setting the initial value of the bump allocator.
|
||||||
|
let ptcl_dyn_start = config.width_in_tiles * config.height_in_tiles * PTCL_INITIAL_ALLOC;
|
||||||
|
let new_cmd = ptcl_dyn_start + atomicAdd(&bump.ptcl, PTCL_INCREMENT);
|
||||||
|
// TODO: robust memory
|
||||||
|
ptcl[cmd_offset] = CMD_JUMP;
|
||||||
|
ptcl[cmd_offset + 1u] = new_cmd;
|
||||||
|
cmd_offset = new_cmd;
|
||||||
|
cmd_limit = cmd_offset + (PTCL_INCREMENT - PTCL_HEADROOM);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_path(tile: Tile, linewidth: f32) {
|
||||||
|
// TODO: take flags
|
||||||
|
alloc_cmd(3u);
|
||||||
|
if linewidth < 0.0 {
|
||||||
|
if tile.segments != 0u {
|
||||||
|
let fill = CmdFill(tile.segments, tile.backdrop);
|
||||||
|
ptcl[cmd_offset] = CMD_FILL;
|
||||||
|
ptcl[cmd_offset + 1u] = fill.tile;
|
||||||
|
ptcl[cmd_offset + 2u] = u32(fill.backdrop);
|
||||||
|
cmd_offset += 3u;
|
||||||
|
} else {
|
||||||
|
ptcl[cmd_offset] = CMD_SOLID;
|
||||||
|
cmd_offset += 1u;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
let stroke = CmdStroke(tile.segments, 0.5 * linewidth);
|
||||||
|
ptcl[cmd_offset] = CMD_STROKE;
|
||||||
|
ptcl[cmd_offset + 1u] = stroke.tile;
|
||||||
|
ptcl[cmd_offset + 2u] = bitcast<u32>(stroke.half_width);
|
||||||
|
cmd_offset += 3u;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_color(color: CmdColor) {
|
||||||
|
alloc_cmd(2u);
|
||||||
|
ptcl[cmd_offset] = CMD_COLOR;
|
||||||
|
ptcl[cmd_offset + 1u] = color.rgba_color;
|
||||||
|
cmd_offset += 2u;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Discussion point: these are basically copying from info to ptcl. We
|
||||||
|
// could just write an info offset and have fine bind that buffer and read
|
||||||
|
// from it.
|
||||||
|
|
||||||
|
fn write_lin_grad(lin: CmdLinGrad) {
|
||||||
|
alloc_cmd(5u);
|
||||||
|
ptcl[cmd_offset] = CMD_LIN_GRAD;
|
||||||
|
ptcl[cmd_offset + 1u] = lin.index;
|
||||||
|
ptcl[cmd_offset + 2u] = bitcast<u32>(lin.line_x);
|
||||||
|
ptcl[cmd_offset + 3u] = bitcast<u32>(lin.line_y);
|
||||||
|
ptcl[cmd_offset + 4u] = bitcast<u32>(lin.line_c);
|
||||||
|
cmd_offset += 5u;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_rad_grad(rad: CmdRadGrad) {
|
||||||
|
alloc_cmd(12u);
|
||||||
|
ptcl[cmd_offset] = CMD_RAD_GRAD;
|
||||||
|
ptcl[cmd_offset + 1u] = rad.index;
|
||||||
|
ptcl[cmd_offset + 2u] = bitcast<u32>(rad.matrx.x);
|
||||||
|
ptcl[cmd_offset + 3u] = bitcast<u32>(rad.matrx.y);
|
||||||
|
ptcl[cmd_offset + 4u] = bitcast<u32>(rad.matrx.z);
|
||||||
|
ptcl[cmd_offset + 5u] = bitcast<u32>(rad.matrx.w);
|
||||||
|
ptcl[cmd_offset + 6u] = bitcast<u32>(rad.xlat.x);
|
||||||
|
ptcl[cmd_offset + 7u] = bitcast<u32>(rad.xlat.y);
|
||||||
|
ptcl[cmd_offset + 8u] = bitcast<u32>(rad.c1.x);
|
||||||
|
ptcl[cmd_offset + 9u] = bitcast<u32>(rad.c1.y);
|
||||||
|
ptcl[cmd_offset + 10u] = bitcast<u32>(rad.ra);
|
||||||
|
ptcl[cmd_offset + 11u] = bitcast<u32>(rad.roff);
|
||||||
|
cmd_offset += 12u;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_begin_clip() {
|
||||||
|
alloc_cmd(1u);
|
||||||
|
ptcl[cmd_offset] = CMD_BEGIN_CLIP;
|
||||||
|
cmd_offset += 1u;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn write_end_clip(blend: u32) {
|
||||||
|
alloc_cmd(2u);
|
||||||
|
ptcl[cmd_offset] = CMD_END_CLIP;
|
||||||
|
ptcl[cmd_offset + 1u] = blend;
|
||||||
|
cmd_offset += 2u;
|
||||||
|
}
|
||||||
|
|
||||||
|
@compute @workgroup_size(256)
|
||||||
|
fn main(
|
||||||
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||||
|
@builtin(workgroup_id) wg_id: vec3<u32>,
|
||||||
|
) {
|
||||||
|
let width_in_bins = (config.width_in_tiles + N_TILE_X - 1u) / N_TILE_X;
|
||||||
|
let bin_ix = width_in_bins * wg_id.y + wg_id.x;
|
||||||
|
let n_partitions = (config.n_drawobj + N_TILE - 1u) / N_TILE;
|
||||||
|
|
||||||
|
// Coordinates of the top left of this bin, in tiles.
|
||||||
|
let bin_tile_x = N_TILE_X * wg_id.x;
|
||||||
|
let bin_tile_y = N_TILE_Y * wg_id.y;
|
||||||
|
|
||||||
|
let tile_x = local_id.x % N_TILE_X;
|
||||||
|
let tile_y = local_id.x / N_TILE_X;
|
||||||
|
let this_tile_ix = (bin_tile_y + tile_y) * config.width_in_tiles + bin_tile_x + tile_x;
|
||||||
|
cmd_offset = this_tile_ix * PTCL_INITIAL_ALLOC;
|
||||||
|
cmd_limit = cmd_offset + (PTCL_INITIAL_ALLOC - PTCL_HEADROOM);
|
||||||
|
|
||||||
|
// clip state
|
||||||
|
var clip_zero_depth = 0u;
|
||||||
|
var clip_depth = 0u;
|
||||||
|
|
||||||
|
var partition_ix = 0u;
|
||||||
|
var rd_ix = 0u;
|
||||||
|
var wr_ix = 0u;
|
||||||
|
var part_start_ix = 0u;
|
||||||
|
var ready_ix = 0u;
|
||||||
|
|
||||||
|
// blend state
|
||||||
|
var render_blend_depth = 0u;
|
||||||
|
var max_blend_depth = 0u;
|
||||||
|
|
||||||
|
while true {
|
||||||
|
for (var i = 0u; i < N_SLICE; i += 1u) {
|
||||||
|
atomicStore(&sh_bitmaps[i][local_id.x], 0u);
|
||||||
|
}
|
||||||
|
|
||||||
|
while true {
|
||||||
|
if ready_ix == wr_ix && partition_ix < n_partitions {
|
||||||
|
part_start_ix = ready_ix;
|
||||||
|
var count = 0u;
|
||||||
|
if partition_ix + local_id.x < n_partitions {
|
||||||
|
let in_ix = (partition_ix + local_id.x) * N_TILE + bin_ix;
|
||||||
|
let bin_header = bin_headers[in_ix];
|
||||||
|
count = bin_header.element_count;
|
||||||
|
sh_part_offsets[local_id.x] = bin_header.chunk_offset;
|
||||||
|
}
|
||||||
|
// prefix sum the element counts
|
||||||
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||||
|
sh_part_count[local_id.x] = count;
|
||||||
|
workgroupBarrier();
|
||||||
|
if local_id.x >= (1u << i) {
|
||||||
|
count += sh_part_count[local_id.x - (1u << i)];
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
}
|
||||||
|
sh_part_count[local_id.x] = part_start_ix + count;
|
||||||
|
workgroupBarrier();
|
||||||
|
ready_ix = sh_part_count[WG_SIZE - 1u];
|
||||||
|
partition_ix += WG_SIZE;
|
||||||
|
}
|
||||||
|
// use binary search to find draw object to read
|
||||||
|
var ix = rd_ix + local_id.x;
|
||||||
|
if ix >= wr_ix && ix < ready_ix {
|
||||||
|
var part_ix = 0u;
|
||||||
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||||
|
let probe = part_ix + ((N_TILE / 2u) >> i);
|
||||||
|
if ix >= sh_part_count[probe - 1u] {
|
||||||
|
part_ix = probe;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ix -= select(part_start_ix, sh_part_count[part_ix - 1u], part_ix > 0u);
|
||||||
|
let offset = sh_part_offsets[part_ix];
|
||||||
|
sh_drawobj_ix[local_id.x] = bin_data[offset + ix];
|
||||||
|
}
|
||||||
|
wr_ix = min(rd_ix + N_TILE, ready_ix);
|
||||||
|
if wr_ix - rd_ix >= N_TILE || (wr_ix >= ready_ix && partition_ix >= n_partitions) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// At this point, sh_drawobj_ix[0.. wr_ix - rd_ix] contains merged binning results.
|
||||||
|
var tag = DRAWTAG_NOP;
|
||||||
|
var drawobj_ix: u32;
|
||||||
|
if local_id.x + rd_ix < wr_ix {
|
||||||
|
drawobj_ix = sh_drawobj_ix[local_id.x];
|
||||||
|
tag = scene[config.drawtag_base + drawobj_ix];
|
||||||
|
}
|
||||||
|
|
||||||
|
var tile_count = 0u;
|
||||||
|
// I think this predicate is the same as the last, maybe they can be combined
|
||||||
|
if tag != DRAWTAG_NOP {
|
||||||
|
let path_ix = draw_monoids[drawobj_ix].path_ix;
|
||||||
|
let path = paths[path_ix];
|
||||||
|
let stride = path.bbox.z - path.bbox.x;
|
||||||
|
sh_tile_stride[local_id.x] = stride;
|
||||||
|
let dx = i32(path.bbox.x) - i32(bin_tile_x);
|
||||||
|
let dy = i32(path.bbox.y) - i32(bin_tile_y);
|
||||||
|
let x0 = clamp(dx, 0, i32(N_TILE_X));
|
||||||
|
let y0 = clamp(dy, 0, i32(N_TILE_Y));
|
||||||
|
let x1 = clamp(i32(path.bbox.z) - i32(bin_tile_x), 0, i32(N_TILE_X));
|
||||||
|
let y1 = clamp(i32(path.bbox.w) - i32(bin_tile_y), 0, i32(N_TILE_Y));
|
||||||
|
sh_tile_width[local_id.x] = u32(x1 - x0);
|
||||||
|
sh_tile_x0[local_id.x] = u32(x0);
|
||||||
|
sh_tile_y0[local_id.x] = u32(y0);
|
||||||
|
tile_count = u32(x1 - x0) * u32(y1 - y0);
|
||||||
|
// base relative to bin
|
||||||
|
let base = path.tiles - u32(dy * i32(stride) + dx);
|
||||||
|
sh_tile_base[local_id.x] = base;
|
||||||
|
// TODO: there's a write_tile_alloc here in the source, not sure what it's supposed to do
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prefix sum of tile counts
|
||||||
|
sh_tile_count[local_id.x] = tile_count;
|
||||||
|
for (var i = 0u; i < firstTrailingBit(N_TILE); i += 1u) {
|
||||||
|
workgroupBarrier();
|
||||||
|
if local_id.x >= (1u << i) {
|
||||||
|
tile_count += sh_tile_count[local_id.x - (1u << i)];
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
sh_tile_count[local_id.x] = tile_count;
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
let total_tile_count = sh_tile_count[N_TILE - 1u];
|
||||||
|
// Parallel iteration over all tiles
|
||||||
|
for (var ix = local_id.x; ix < total_tile_count; ix += N_TILE) {
|
||||||
|
// Binary search to find draw object which contains this tile
|
||||||
|
var el_ix = 0u;
|
||||||
|
for (var i = 0u; i < firstTrailingBit(N_TILE); i += 1u) {
|
||||||
|
let probe = el_ix + ((N_TILE / 2u) >> i);
|
||||||
|
if ix >= sh_tile_count[probe - 1u] {
|
||||||
|
el_ix = probe;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
drawobj_ix = sh_drawobj_ix[el_ix];
|
||||||
|
tag = scene[config.drawtag_base + drawobj_ix];
|
||||||
|
// TODO: clip logic
|
||||||
|
let seq_ix = ix - select(0u, sh_tile_count[el_ix - 1u], el_ix > 0u);
|
||||||
|
let width = sh_tile_width[el_ix];
|
||||||
|
let x = sh_tile_x0[el_ix] + seq_ix % width;
|
||||||
|
let y = sh_tile_y0[el_ix] + seq_ix / width;
|
||||||
|
let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * y + x;
|
||||||
|
let tile = tiles[tile_ix];
|
||||||
|
let is_clip = (tag & 1u) != 0u;
|
||||||
|
var is_blend = false;
|
||||||
|
if is_clip {
|
||||||
|
let BLEND_CLIP = (128u << 8u) | 3u;
|
||||||
|
let scene_offset = draw_monoids[drawobj_ix].scene_offset;
|
||||||
|
let dd = config.drawdata_base + scene_offset;
|
||||||
|
let blend = scene[dd];
|
||||||
|
is_blend = blend != BLEND_CLIP;
|
||||||
|
}
|
||||||
|
let include_tile = tile.segments != 0u || (tile.backdrop == 0) == is_clip || is_blend;
|
||||||
|
if include_tile {
|
||||||
|
let el_slice = el_ix / 32u;
|
||||||
|
let el_mask = 1u << (el_ix & 31u);
|
||||||
|
atomicOr(&sh_bitmaps[el_slice][y * N_TILE_X + x], el_mask);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
// At this point bit drawobj % 32 is set in sh_bitmaps[drawobj / 32][y * N_TILE_X + x]
|
||||||
|
// if drawobj touches tile (x, y).
|
||||||
|
|
||||||
|
// Write per-tile command list for this tile
|
||||||
|
var slice_ix = 0u;
|
||||||
|
var bitmap = atomicLoad(&sh_bitmaps[0u][local_id.x]);
|
||||||
|
while true {
|
||||||
|
if bitmap == 0u {
|
||||||
|
slice_ix += 1u;
|
||||||
|
// potential optimization: make iteration limit dynamic
|
||||||
|
if slice_ix == N_SLICE {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
bitmap = atomicLoad(&sh_bitmaps[slice_ix][local_id.x]);
|
||||||
|
if bitmap == 0u {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let el_ix = slice_ix * 32u + firstTrailingBit(bitmap);
|
||||||
|
drawobj_ix = sh_drawobj_ix[el_ix];
|
||||||
|
// clear LSB of bitmap, using bit magic
|
||||||
|
bitmap &= bitmap - 1u;
|
||||||
|
let drawtag = scene[config.drawtag_base + drawobj_ix];
|
||||||
|
let dm = draw_monoids[drawobj_ix];
|
||||||
|
let dd = config.drawdata_base + dm.scene_offset;
|
||||||
|
let di = dm.info_offset;
|
||||||
|
if clip_zero_depth == 0u {
|
||||||
|
let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * tile_y + tile_x;
|
||||||
|
let tile = tiles[tile_ix];
|
||||||
|
switch drawtag {
|
||||||
|
// DRAWTAG_FILL_COLOR
|
||||||
|
case 0x44u: {
|
||||||
|
let linewidth = bitcast<f32>(info[di]);
|
||||||
|
write_path(tile, linewidth);
|
||||||
|
let rgba_color = scene[dd];
|
||||||
|
write_color(CmdColor(rgba_color));
|
||||||
|
}
|
||||||
|
// DRAWTAG_FILL_LIN_GRADIENT
|
||||||
|
case 0x114u: {
|
||||||
|
let linewidth = bitcast<f32>(info[di]);
|
||||||
|
write_path(tile, linewidth);
|
||||||
|
var lin: CmdLinGrad;
|
||||||
|
lin.index = scene[dd];
|
||||||
|
lin.line_x = bitcast<f32>(info[di + 1u]);
|
||||||
|
lin.line_y = bitcast<f32>(info[di + 2u]);
|
||||||
|
lin.line_c = bitcast<f32>(info[di + 3u]);
|
||||||
|
write_lin_grad(lin);
|
||||||
|
}
|
||||||
|
// DRAWTAG_FILL_RAD_GRADIENT
|
||||||
|
case 0x2dcu: {
|
||||||
|
let linewidth = bitcast<f32>(info[di]);
|
||||||
|
write_path(tile, linewidth);
|
||||||
|
var rad: CmdRadGrad;
|
||||||
|
rad.index = scene[dd];
|
||||||
|
let m0 = bitcast<f32>(info[di + 1u]);
|
||||||
|
let m1 = bitcast<f32>(info[di + 2u]);
|
||||||
|
let m2 = bitcast<f32>(info[di + 3u]);
|
||||||
|
let m3 = bitcast<f32>(info[di + 4u]);
|
||||||
|
rad.matrx = vec4<f32>(m0, m1, m2, m3);
|
||||||
|
rad.xlat = vec2<f32>(bitcast<f32>(info[di + 5u]), bitcast<f32>(info[di + 6u]));
|
||||||
|
rad.c1 = vec2<f32>(bitcast<f32>(info[di + 7u]), bitcast<f32>(info[di + 8u]));
|
||||||
|
rad.ra = bitcast<f32>(info[di + 9u]);
|
||||||
|
rad.roff = bitcast<f32>(info[di + 10u]);
|
||||||
|
write_rad_grad(rad);
|
||||||
|
}
|
||||||
|
// DRAWTAG_BEGIN_CLIP
|
||||||
|
case 0x05u: {
|
||||||
|
if tile.segments == 0u && tile.backdrop == 0 {
|
||||||
|
clip_zero_depth = clip_depth + 1u;
|
||||||
|
} else {
|
||||||
|
write_begin_clip();
|
||||||
|
render_blend_depth += 1u;
|
||||||
|
max_blend_depth = max(max_blend_depth, render_blend_depth);
|
||||||
|
}
|
||||||
|
clip_depth += 1u;
|
||||||
|
}
|
||||||
|
// DRAWTAG_END_CLIP
|
||||||
|
case 0x25u: {
|
||||||
|
clip_depth -= 1u;
|
||||||
|
write_path(tile, -1.0);
|
||||||
|
write_end_clip(scene[dd]);
|
||||||
|
render_blend_depth -= 1u;
|
||||||
|
}
|
||||||
|
default: {}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// In "clip zero" state, suppress all drawing
|
||||||
|
switch drawtag {
|
||||||
|
// DRAWTAG_BEGIN_CLIP
|
||||||
|
case 0x05u: {
|
||||||
|
clip_depth += 1u;
|
||||||
|
}
|
||||||
|
// DRAWTAG_END_CLIP
|
||||||
|
case 0x25u: {
|
||||||
|
if clip_depth == clip_zero_depth {
|
||||||
|
clip_zero_depth = 0u;
|
||||||
|
}
|
||||||
|
clip_depth -= 1u;
|
||||||
|
}
|
||||||
|
default: {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rd_ix += N_TILE;
|
||||||
|
if rd_ix >= ready_ix && partition_ix >= n_partitions {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
}
|
||||||
|
if bin_tile_x < config.width_in_tiles && bin_tile_y < config.height_in_tiles {
|
||||||
|
//ptcl[cmd_offset] = CMD_END;
|
||||||
|
// TODO: blend stack allocation
|
||||||
|
}
|
||||||
|
}
|
197
piet-wgsl/shader/draw_leaf.wgsl
Normal file
197
piet-wgsl/shader/draw_leaf.wgsl
Normal file
|
@ -0,0 +1,197 @@
|
||||||
|
// Copyright 2022 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
// Finish prefix sum of drawtags, decode draw objects.
|
||||||
|
|
||||||
|
#import config
|
||||||
|
#import clip
|
||||||
|
#import drawtag
|
||||||
|
#import bbox
|
||||||
|
|
||||||
|
@group(0) @binding(0)
|
||||||
|
var<storage> config: Config;
|
||||||
|
|
||||||
|
@group(0) @binding(1)
|
||||||
|
var<storage> scene: array<u32>;
|
||||||
|
|
||||||
|
@group(0) @binding(2)
|
||||||
|
var<storage> reduced: array<DrawMonoid>;
|
||||||
|
|
||||||
|
@group(0) @binding(3)
|
||||||
|
var<storage> path_bbox: array<PathBbox>;
|
||||||
|
|
||||||
|
@group(0) @binding(4)
|
||||||
|
var<storage, read_write> draw_monoid: array<DrawMonoid>;
|
||||||
|
|
||||||
|
@group(0) @binding(5)
|
||||||
|
var<storage, read_write> info: array<u32>;
|
||||||
|
|
||||||
|
@group(0) @binding(6)
|
||||||
|
var<storage, read_write> clip_inp: array<i32>;
|
||||||
|
|
||||||
|
let WG_SIZE = 256u;
|
||||||
|
|
||||||
|
// Possibly dedup?
|
||||||
|
struct Transform {
|
||||||
|
matrx: vec4<f32>,
|
||||||
|
translate: vec2<f32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_transform(transform_base: u32, ix: u32) -> Transform {
|
||||||
|
let base = transform_base + ix * 6u;
|
||||||
|
let c0 = bitcast<f32>(scene[base]);
|
||||||
|
let c1 = bitcast<f32>(scene[base + 1u]);
|
||||||
|
let c2 = bitcast<f32>(scene[base + 2u]);
|
||||||
|
let c3 = bitcast<f32>(scene[base + 3u]);
|
||||||
|
let c4 = bitcast<f32>(scene[base + 4u]);
|
||||||
|
let c5 = bitcast<f32>(scene[base + 5u]);
|
||||||
|
let matrx = vec4<f32>(c0, c1, c2, c3);
|
||||||
|
let translate = vec2<f32>(c4, c5);
|
||||||
|
return Transform(matrx, translate);
|
||||||
|
}
|
||||||
|
|
||||||
|
var<workgroup> sh_scratch: array<DrawMonoid, WG_SIZE>;
|
||||||
|
|
||||||
|
@compute @workgroup_size(256)
|
||||||
|
fn main(
|
||||||
|
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||||
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||||
|
@builtin(workgroup_id) wg_id: vec3<u32>,
|
||||||
|
) {
|
||||||
|
let ix = global_id.x;
|
||||||
|
// Reduce prefix of workgroups up to this one
|
||||||
|
var agg = draw_monoid_identity();
|
||||||
|
if local_id.x < wg_id.x {
|
||||||
|
agg = reduced[local_id.x];
|
||||||
|
}
|
||||||
|
sh_scratch[local_id.x] = agg;
|
||||||
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||||
|
workgroupBarrier();
|
||||||
|
if local_id.x + (1u << i) < WG_SIZE {
|
||||||
|
let other = sh_scratch[local_id.x + (1u << i)];
|
||||||
|
agg = combine_draw_monoid(agg, other);
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
sh_scratch[local_id.x] = agg;
|
||||||
|
}
|
||||||
|
// Two barriers can be eliminated if we use separate shared arrays
|
||||||
|
// for prefix and intra-workgroup prefix sum.
|
||||||
|
workgroupBarrier();
|
||||||
|
var m = sh_scratch[0];
|
||||||
|
workgroupBarrier();
|
||||||
|
let tag_word = scene[config.drawtag_base + ix];
|
||||||
|
agg = map_draw_tag(tag_word);
|
||||||
|
sh_scratch[local_id.x] = agg;
|
||||||
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||||
|
workgroupBarrier();
|
||||||
|
if local_id.x >= 1u << i {
|
||||||
|
let other = sh_scratch[local_id.x - (1u << i)];
|
||||||
|
agg = combine_draw_monoid(agg, other);
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
sh_scratch[local_id.x] = agg;
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
if local_id.x > 0u {
|
||||||
|
m = combine_draw_monoid(m, sh_scratch[local_id.x - 1u]);
|
||||||
|
}
|
||||||
|
// m now contains exclusive prefix sum of draw monoid
|
||||||
|
draw_monoid[ix] = m;
|
||||||
|
let dd = config.drawdata_base + m.scene_offset;
|
||||||
|
let di = m.info_offset;
|
||||||
|
if tag_word == DRAWTAG_FILL_COLOR || tag_word == DRAWTAG_FILL_LIN_GRADIENT ||
|
||||||
|
tag_word == DRAWTAG_FILL_RAD_GRADIENT || tag_word == DRAWTAG_FILL_IMAGE ||
|
||||||
|
tag_word == DRAWTAG_BEGIN_CLIP
|
||||||
|
{
|
||||||
|
let bbox = path_bbox[m.path_ix];
|
||||||
|
// TODO: bbox is mostly yagni here, sort that out. Maybe clips?
|
||||||
|
// let x0 = f32(bbox.x0);
|
||||||
|
// let y0 = f32(bbox.y0);
|
||||||
|
// let x1 = f32(bbox.x1);
|
||||||
|
// let y1 = f32(bbox.y1);
|
||||||
|
// let bbox_f = vec4(x0, y0, x1, y1);
|
||||||
|
let fill_mode = u32(bbox.linewidth >= 0.0);
|
||||||
|
var matrx: vec4<f32>;
|
||||||
|
var translate: vec2<f32>;
|
||||||
|
var linewidth = bbox.linewidth;
|
||||||
|
if linewidth >= 0.0 || tag_word == DRAWTAG_FILL_LIN_GRADIENT || tag_word == DRAWTAG_FILL_RAD_GRADIENT {
|
||||||
|
let transform = read_transform(config.transform_base, bbox.trans_ix);
|
||||||
|
matrx = transform.matrx;
|
||||||
|
translate = transform.translate;
|
||||||
|
}
|
||||||
|
if linewidth >= 0.0 {
|
||||||
|
// Note: doesn't deal with anisotropic case
|
||||||
|
linewidth *= sqrt(abs(matrx.x * matrx.w - matrx.y * matrx.z));
|
||||||
|
}
|
||||||
|
switch tag_word {
|
||||||
|
// DRAWTAG_FILL_COLOR, DRAWTAG_FILL_IMAGE
|
||||||
|
case 0x44u, 0x48u: {
|
||||||
|
info[di] = bitcast<u32>(linewidth);
|
||||||
|
}
|
||||||
|
// DRAWTAG_FILL_LIN_GRADIENT
|
||||||
|
case 0x114u: {
|
||||||
|
info[di] = bitcast<u32>(linewidth);
|
||||||
|
var p0 = bitcast<vec2<f32>>(vec2(scene[dd + 1u], scene[dd + 2u]));
|
||||||
|
var p1 = bitcast<vec2<f32>>(vec2(scene[dd + 3u], scene[dd + 4u]));
|
||||||
|
p0 = matrx.xy * p0.x + matrx.zw * p0.y + translate;
|
||||||
|
p1 = matrx.xy * p1.x + matrx.zw * p1.y + translate;
|
||||||
|
let dxy = p1 - p0;
|
||||||
|
let scale = 1.0 / dot(dxy, dxy);
|
||||||
|
let line_xy = dxy * scale;
|
||||||
|
let line_c = -dot(p0, line_xy);
|
||||||
|
info[di + 1u] = bitcast<u32>(line_xy.x);
|
||||||
|
info[di + 2u] = bitcast<u32>(line_xy.y);
|
||||||
|
info[di + 3u] = bitcast<u32>(line_c);
|
||||||
|
}
|
||||||
|
// DRAWTAG_FILL_RAD_GRADIENT
|
||||||
|
case 0x2dcu: {
|
||||||
|
info[di] = bitcast<u32>(linewidth);
|
||||||
|
var p0 = bitcast<vec2<f32>>(vec2(scene[dd + 1u], scene[dd + 2u]));
|
||||||
|
var p1 = bitcast<vec2<f32>>(vec2(scene[dd + 3u], scene[dd + 4u]));
|
||||||
|
let r0 = bitcast<f32>(scene[dd + 5u]);
|
||||||
|
let r1 = bitcast<f32>(scene[dd + 6u]);
|
||||||
|
let inv_det = 1.0 / (matrx.x * matrx.w - matrx.y * matrx.z);
|
||||||
|
let inv_mat = inv_det * vec4<f32>(matrx.w, -matrx.y, -matrx.z, matrx.x);
|
||||||
|
var inv_tr = inv_mat.xz * translate.x + inv_mat.yw * translate.y;
|
||||||
|
inv_tr += p0;
|
||||||
|
let center1 = p1 - p0;
|
||||||
|
let rr = r1 / (r1 - r0);
|
||||||
|
let ra_inv = rr / (r1 * r1 - dot(center1, center1));
|
||||||
|
let c1 = center1 * ra_inv;
|
||||||
|
let ra = rr * ra_inv;
|
||||||
|
let roff = rr - 1.0;
|
||||||
|
info[di + 1u] = bitcast<u32>(inv_mat.x);
|
||||||
|
info[di + 2u] = bitcast<u32>(inv_mat.y);
|
||||||
|
info[di + 3u] = bitcast<u32>(inv_mat.z);
|
||||||
|
info[di + 4u] = bitcast<u32>(inv_mat.w);
|
||||||
|
info[di + 5u] = bitcast<u32>(inv_tr.x);
|
||||||
|
info[di + 6u] = bitcast<u32>(inv_tr.y);
|
||||||
|
info[di + 7u] = bitcast<u32>(c1.x);
|
||||||
|
info[di + 8u] = bitcast<u32>(c1.y);
|
||||||
|
info[di + 9u] = bitcast<u32>(ra);
|
||||||
|
info[di + 10u] = bitcast<u32>(roff);
|
||||||
|
}
|
||||||
|
default: {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if tag_word == DRAWTAG_BEGIN_CLIP || tag_word == DRAWTAG_END_CLIP {
|
||||||
|
var path_ix = ~ix;
|
||||||
|
if tag_word == DRAWTAG_BEGIN_CLIP {
|
||||||
|
path_ix = m.path_ix;
|
||||||
|
}
|
||||||
|
clip_inp[m.clip_ix] = i32(path_ix);
|
||||||
|
}
|
||||||
|
}
|
54
piet-wgsl/shader/draw_reduce.wgsl
Normal file
54
piet-wgsl/shader/draw_reduce.wgsl
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
// Copyright 2022 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
#import config
|
||||||
|
#import drawtag
|
||||||
|
|
||||||
|
@group(0) @binding(0)
|
||||||
|
var<storage> config: Config;
|
||||||
|
|
||||||
|
@group(0) @binding(1)
|
||||||
|
var<storage> scene: array<u32>;
|
||||||
|
|
||||||
|
@group(0) @binding(2)
|
||||||
|
var<storage, read_write> reduced: array<DrawMonoid>;
|
||||||
|
|
||||||
|
let WG_SIZE = 256u;
|
||||||
|
|
||||||
|
var<workgroup> sh_scratch: array<DrawMonoid, WG_SIZE>;
|
||||||
|
|
||||||
|
@compute @workgroup_size(256)
|
||||||
|
fn main(
|
||||||
|
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||||
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||||
|
) {
|
||||||
|
let ix = global_id.x;
|
||||||
|
let tag_word = scene[config.drawtag_base + ix];
|
||||||
|
var agg = map_draw_tag(tag_word);
|
||||||
|
sh_scratch[local_id.x] = agg;
|
||||||
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||||
|
workgroupBarrier();
|
||||||
|
if local_id.x + (1u << i) < WG_SIZE {
|
||||||
|
let other = sh_scratch[local_id.x + (1u << i)];
|
||||||
|
agg = combine_draw_monoid(agg, other);
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
sh_scratch[local_id.x] = agg;
|
||||||
|
}
|
||||||
|
if local_id.x == 0u {
|
||||||
|
reduced[ix >> firstTrailingBit(WG_SIZE)] = agg;
|
||||||
|
}
|
||||||
|
}
|
|
@ -14,6 +14,9 @@
|
||||||
//
|
//
|
||||||
// Also licensed under MIT license, at your choice.
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
// Fine rasterizer. This can run in simple (just path rendering) and full
|
||||||
|
// modes, controllable by #define.
|
||||||
|
|
||||||
// This is a cut'n'paste w/ backdrop.
|
// This is a cut'n'paste w/ backdrop.
|
||||||
struct Tile {
|
struct Tile {
|
||||||
backdrop: i32,
|
backdrop: i32,
|
||||||
|
@ -36,17 +39,64 @@ var<storage> segments: array<Segment>;
|
||||||
@group(0) @binding(3)
|
@group(0) @binding(3)
|
||||||
var<storage, read_write> output: array<u32>;
|
var<storage, read_write> output: array<u32>;
|
||||||
|
|
||||||
|
#ifdef full
|
||||||
|
|
||||||
|
#import blend
|
||||||
|
#import ptcl
|
||||||
|
|
||||||
|
let GRADIENT_WIDTH = 512;
|
||||||
|
let BLEND_STACK_SPLIT = 4u;
|
||||||
|
|
||||||
|
@group(0) @binding(4)
|
||||||
|
var<storage> ptcl: array<u32>;
|
||||||
|
|
||||||
|
@group(0) @binding(5)
|
||||||
|
var gradients: texture_2d<f32>;
|
||||||
|
|
||||||
|
fn read_fill(cmd_ix: u32) -> CmdFill {
|
||||||
|
let tile = ptcl[cmd_ix + 1u];
|
||||||
|
let backdrop = i32(ptcl[cmd_ix + 2u]);
|
||||||
|
return CmdFill(tile, backdrop);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_stroke(cmd_ix: u32) -> CmdStroke {
|
||||||
|
let tile = ptcl[cmd_ix + 1u];
|
||||||
|
let half_width = bitcast<f32>(ptcl[cmd_ix + 2u]);
|
||||||
|
return CmdStroke(tile, half_width);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_color(cmd_ix: u32) -> CmdColor {
|
||||||
|
let rgba_color = ptcl[cmd_ix + 1u];
|
||||||
|
return CmdColor(rgba_color);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_lin_grad(cmd_ix: u32) -> CmdLinGrad {
|
||||||
|
let index = ptcl[cmd_ix + 1u];
|
||||||
|
let line_x = bitcast<f32>(ptcl[cmd_ix + 2u]);
|
||||||
|
let line_y = bitcast<f32>(ptcl[cmd_ix + 3u]);
|
||||||
|
let line_c = bitcast<f32>(ptcl[cmd_ix + 4u]);
|
||||||
|
return CmdLinGrad(index, line_x, line_y, line_c);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_rad_grad(cmd_ix: u32) -> CmdRadGrad {
|
||||||
|
let index = ptcl[cmd_ix + 1u];
|
||||||
|
let m0 = bitcast<f32>(ptcl[cmd_ix + 2u]);
|
||||||
|
let m1 = bitcast<f32>(ptcl[cmd_ix + 3u]);
|
||||||
|
let m2 = bitcast<f32>(ptcl[cmd_ix + 4u]);
|
||||||
|
let m3 = bitcast<f32>(ptcl[cmd_ix + 5u]);
|
||||||
|
let matrx = vec4<f32>(m0, m1, m2, m3);
|
||||||
|
let xlat = vec2<f32>(bitcast<f32>(ptcl[cmd_ix + 6u]), bitcast<f32>(ptcl[cmd_ix + 7u]));
|
||||||
|
let c1 = vec2<f32>(bitcast<f32>(ptcl[cmd_ix + 8u]), bitcast<f32>(ptcl[cmd_ix + 9u]));
|
||||||
|
let ra = bitcast<f32>(ptcl[cmd_ix + 10u]);
|
||||||
|
let roff = bitcast<f32>(ptcl[cmd_ix + 11u]);
|
||||||
|
return CmdRadGrad(index, matrx, xlat, c1, ra, roff);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
let PIXELS_PER_THREAD = 4u;
|
let PIXELS_PER_THREAD = 4u;
|
||||||
|
|
||||||
@compute @workgroup_size(4, 16)
|
fn fill_path(tile: Tile, xy: vec2<f32>) -> array<f32, PIXELS_PER_THREAD> {
|
||||||
fn main(
|
|
||||||
@builtin(global_invocation_id) global_id: vec3<u32>,
|
|
||||||
@builtin(local_invocation_id) local_id: vec3<u32>,
|
|
||||||
@builtin(workgroup_id) wg_id: vec3<u32>,
|
|
||||||
) {
|
|
||||||
let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x;
|
|
||||||
let xy = vec2<f32>(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y));
|
|
||||||
let tile = tiles[tile_ix];
|
|
||||||
var area: array<f32, PIXELS_PER_THREAD>;
|
var area: array<f32, PIXELS_PER_THREAD>;
|
||||||
let backdrop_f = f32(tile.backdrop);
|
let backdrop_f = f32(tile.backdrop);
|
||||||
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||||
|
@ -89,8 +139,168 @@ fn main(
|
||||||
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||||
area[i] = abs(area[i]);
|
area[i] = abs(area[i]);
|
||||||
}
|
}
|
||||||
|
return area;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn stroke_path(seg: u32, half_width: f32, xy: vec2<f32>) -> array<f32, PIXELS_PER_THREAD> {
|
||||||
|
var df: array<f32, PIXELS_PER_THREAD>;
|
||||||
|
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||||
|
df[i] = 1e9;
|
||||||
|
}
|
||||||
|
var segment_ix = seg;
|
||||||
|
while segment_ix != 0u {
|
||||||
|
let segment = segments[segment_ix];
|
||||||
|
let delta = segment.delta;
|
||||||
|
let dpos0 = xy + vec2<f32>(0.5, 0.5) - segment.origin;
|
||||||
|
let scale = 1.0 / dot(delta, delta);
|
||||||
|
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||||
|
let dpos = vec2<f32>(dpos0.x + f32(i), dpos0.y);
|
||||||
|
let t = clamp(dot(dpos, delta) * scale, 0.0, 1.0);
|
||||||
|
// performance idea: hoist sqrt out of loop
|
||||||
|
df[i] = min(df[i], length(delta * t - dpos));
|
||||||
|
}
|
||||||
|
segment_ix = segment.next;
|
||||||
|
}
|
||||||
|
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||||
|
// reuse array; return alpha rather than distance
|
||||||
|
df[i] = clamp(half_width + 0.5 - df[i], 0.0, 1.0);
|
||||||
|
}
|
||||||
|
return df;
|
||||||
|
}
|
||||||
|
|
||||||
|
@compute @workgroup_size(4, 16)
|
||||||
|
fn main(
|
||||||
|
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||||
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||||
|
@builtin(workgroup_id) wg_id: vec3<u32>,
|
||||||
|
) {
|
||||||
|
let tile_ix = wg_id.y * config.width_in_tiles + wg_id.x;
|
||||||
|
let xy = vec2<f32>(f32(global_id.x * PIXELS_PER_THREAD), f32(global_id.y));
|
||||||
|
#ifdef full
|
||||||
|
var rgba: array<vec4<f32>, PIXELS_PER_THREAD>;
|
||||||
|
var blend_stack: array<array<u32, BLEND_STACK_SPLIT>, PIXELS_PER_THREAD>;
|
||||||
|
var clip_depth = 0u;
|
||||||
|
var area: array<f32, PIXELS_PER_THREAD>;
|
||||||
|
var cmd_ix = tile_ix * PTCL_INITIAL_ALLOC;
|
||||||
|
|
||||||
|
// main interpretation loop
|
||||||
|
while true {
|
||||||
|
let tag = ptcl[cmd_ix];
|
||||||
|
if tag == CMD_END {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
switch tag {
|
||||||
|
// CMD_FILL
|
||||||
|
case 1u: {
|
||||||
|
let fill = read_fill(cmd_ix);
|
||||||
|
let tile = Tile(fill.backdrop, fill.tile);
|
||||||
|
area = fill_path(tile, xy);
|
||||||
|
cmd_ix += 3u;
|
||||||
|
}
|
||||||
|
// CMD_STROKE
|
||||||
|
case 2u: {
|
||||||
|
let stroke = read_stroke(cmd_ix);
|
||||||
|
area = stroke_path(stroke.tile, stroke.half_width, xy);
|
||||||
|
cmd_ix += 3u;
|
||||||
|
}
|
||||||
|
// CMD_SOLID
|
||||||
|
case 3u: {
|
||||||
|
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||||
|
area[i] = 1.0;
|
||||||
|
}
|
||||||
|
cmd_ix += 1u;
|
||||||
|
}
|
||||||
|
// CMD_COLOR
|
||||||
|
case 5u: {
|
||||||
|
let color = read_color(cmd_ix);
|
||||||
|
let fg = unpack4x8unorm(color.rgba_color).wzyx;
|
||||||
|
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||||
|
let fg_i = fg * area[i];
|
||||||
|
rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i;
|
||||||
|
}
|
||||||
|
cmd_ix += 2u;
|
||||||
|
}
|
||||||
|
// CMD_LIN_GRAD
|
||||||
|
case 6u: {
|
||||||
|
let lin = read_lin_grad(cmd_ix);
|
||||||
|
let d = lin.line_x * xy.x + lin.line_y * xy.y + lin.line_c;
|
||||||
|
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||||
|
let my_d = d + lin.line_x * f32(i);
|
||||||
|
let x = i32(round(clamp(my_d, 0.0, 1.0) * f32(GRADIENT_WIDTH - 1)));
|
||||||
|
let fg_rgba = textureLoad(gradients, vec2<i32>(x, i32(lin.index)), 0);
|
||||||
|
let fg_i = fg_rgba * area[i];
|
||||||
|
rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i;
|
||||||
|
}
|
||||||
|
cmd_ix += 5u;
|
||||||
|
}
|
||||||
|
// CMD_RAD_GRAD
|
||||||
|
case 7u: {
|
||||||
|
let rad = read_rad_grad(cmd_ix);
|
||||||
|
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||||
|
let my_xy = vec2<f32>(xy.x + f32(i), xy.y);
|
||||||
|
// TODO: can hoist y, but for now stick to piet-gpu
|
||||||
|
let xy_xformed = rad.matrx.xz * my_xy.x + rad.matrx.yw * my_xy.y - rad.xlat;
|
||||||
|
let ba = dot(xy_xformed, rad.c1);
|
||||||
|
let ca = rad.ra * dot(xy_xformed, xy_xformed);
|
||||||
|
let t = sqrt(ba * ba + ca) - ba - rad.roff;
|
||||||
|
let x = i32(round(clamp(t, 0.0, 1.0) * f32(GRADIENT_WIDTH - 1)));
|
||||||
|
let fg_rgba = textureLoad(gradients, vec2<i32>(x, i32(rad.index)), 0);
|
||||||
|
let fg_i = fg_rgba * area[i];
|
||||||
|
rgba[i] = rgba[i] * (1.0 - fg_i.a) + fg_i;
|
||||||
|
}
|
||||||
|
cmd_ix += 12u;
|
||||||
|
}
|
||||||
|
// CMD_BEGIN_CLIP
|
||||||
|
case 9u: {
|
||||||
|
if clip_depth < BLEND_STACK_SPLIT {
|
||||||
|
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||||
|
blend_stack[clip_depth][i] = pack4x8unorm(rgba[i]);
|
||||||
|
rgba[i] = vec4<f32>(0.0);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// TODO: spill to memory
|
||||||
|
}
|
||||||
|
clip_depth += 1u;
|
||||||
|
cmd_ix += 1u;
|
||||||
|
}
|
||||||
|
// CMD_END_CLIP
|
||||||
|
case 10u: {
|
||||||
|
let blend = ptcl[cmd_ix + 1u];
|
||||||
|
clip_depth -= 1u;
|
||||||
|
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||||
|
var bg_rgba: u32;
|
||||||
|
if clip_depth < BLEND_STACK_SPLIT {
|
||||||
|
bg_rgba = blend_stack[clip_depth][i];
|
||||||
|
} else {
|
||||||
|
// load from memory
|
||||||
|
}
|
||||||
|
let bg = unpack4x8unorm(bg_rgba);
|
||||||
|
let fg = rgba[i] * area[i];
|
||||||
|
rgba[i] = blend_mix_compose(bg, fg, blend);
|
||||||
|
}
|
||||||
|
cmd_ix += 2u;
|
||||||
|
}
|
||||||
|
// CMD_JUMP
|
||||||
|
case 11u: {
|
||||||
|
cmd_ix = ptcl[cmd_ix + 1u];
|
||||||
|
}
|
||||||
|
default: {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let out_ix = global_id.y * (config.width_in_tiles * TILE_WIDTH) + global_id.x * PIXELS_PER_THREAD;
|
||||||
|
for (var i = 0u; i < PIXELS_PER_THREAD; i += 1u) {
|
||||||
|
let fg = rgba[i];
|
||||||
|
let a_inv = 1.0 / (fg.a + 1e-6);
|
||||||
|
let rgba_sep = vec4<f32>(fg.r * a_inv, fg.g * a_inv, fg.b * a_inv, fg.a);
|
||||||
|
let bytes = pack4x8unorm(rgba_sep);
|
||||||
|
output[out_ix + i] = bytes;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
let tile = tiles[tile_ix];
|
||||||
|
let area = fill_path(tile, xy);
|
||||||
|
|
||||||
let bytes = pack4x8unorm(vec4<f32>(area[0], area[1], area[2], area[3]));
|
let bytes = pack4x8unorm(vec4<f32>(area[0], area[1], area[2], area[3]));
|
||||||
let out_ix = global_id.y * (config.width_in_tiles * 4u) + global_id.x;
|
let out_ix = global_id.y * (config.width_in_tiles * 4u) + global_id.x;
|
||||||
output[out_ix] = bytes;
|
output[out_ix] = bytes;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -14,59 +14,53 @@
|
||||||
//
|
//
|
||||||
// Also licensed under MIT license, at your choice.
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
#import config
|
||||||
#import pathtag
|
#import pathtag
|
||||||
|
|
||||||
@group(0) @binding(0)
|
@group(0) @binding(0)
|
||||||
var<storage> path_tags: array<u32>;
|
var<storage> config: Config;
|
||||||
|
|
||||||
@group(0) @binding(1)
|
@group(0) @binding(1)
|
||||||
var<storage> tag_monoids: array<TagMonoid>;
|
var<storage> scene: array<u32>;
|
||||||
|
|
||||||
// TODO: should probably have single "scene" binding.
|
|
||||||
@group(0) @binding(2)
|
@group(0) @binding(2)
|
||||||
var<storage> path_data: array<u32>;
|
var<storage> tag_monoids: array<TagMonoid>;
|
||||||
|
|
||||||
#ifdef cubics_out
|
#ifdef cubics_out
|
||||||
@group(0) @binding(3)
|
@group(0) @binding(3)
|
||||||
var<storage, read_write> output: array<vec2<f32>>;
|
var<storage, read_write> output: array<vec2<f32>>;
|
||||||
#else
|
#else
|
||||||
#import config
|
// We don't get this from import as it's the atomic version
|
||||||
|
struct AtomicTile {
|
||||||
struct Tile {
|
|
||||||
backdrop: atomic<i32>,
|
backdrop: atomic<i32>,
|
||||||
segments: atomic<u32>,
|
segments: atomic<u32>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#import segment
|
#import segment
|
||||||
|
|
||||||
// Should probably be uniform binding
|
|
||||||
@group(0) @binding(3)
|
@group(0) @binding(3)
|
||||||
var<storage> config: Config;
|
var<storage, read_write> tiles: array<AtomicTile>;
|
||||||
|
|
||||||
@group(0) @binding(4)
|
@group(0) @binding(4)
|
||||||
var<storage, read_write> tiles: array<Tile>;
|
|
||||||
|
|
||||||
@group(0) @binding(5)
|
|
||||||
var<storage, read_write> segments: array<Segment>;
|
var<storage, read_write> segments: array<Segment>;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
var<private> pathdata_base: u32;
|
||||||
|
|
||||||
fn read_f32_point(ix: u32) -> vec2<f32> {
|
fn read_f32_point(ix: u32) -> vec2<f32> {
|
||||||
let x = bitcast<f32>(path_data[ix]);
|
let x = bitcast<f32>(scene[pathdata_base + ix]);
|
||||||
let y = bitcast<f32>(path_data[ix + 1u]);
|
let y = bitcast<f32>(scene[pathdata_base + ix + 1u]);
|
||||||
return vec2<f32>(x, y);
|
return vec2<f32>(x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn read_i16_point(ix: u32) -> vec2<f32> {
|
fn read_i16_point(ix: u32) -> vec2<f32> {
|
||||||
let raw = path_data[ix];
|
let raw = scene[pathdata_base + ix];
|
||||||
let x = f32(i32(raw << 16u) >> 16u);
|
let x = f32(i32(raw << 16u) >> 16u);
|
||||||
let y = f32(i32(raw) >> 16u);
|
let y = f32(i32(raw) >> 16u);
|
||||||
return vec2<f32>(x, y);
|
return vec2<f32>(x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef cubics_out
|
#ifndef cubics_out
|
||||||
let TILE_WIDTH = 16u;
|
|
||||||
let TILE_HEIGHT = 16u;
|
|
||||||
|
|
||||||
struct SubdivResult {
|
struct SubdivResult {
|
||||||
val: f32,
|
val: f32,
|
||||||
a0: f32,
|
a0: f32,
|
||||||
|
@ -136,7 +130,8 @@ fn main(
|
||||||
) {
|
) {
|
||||||
// Obtain exclusive prefix sum of tag monoid
|
// Obtain exclusive prefix sum of tag monoid
|
||||||
let ix = global_id.x;
|
let ix = global_id.x;
|
||||||
let tag_word = path_tags[ix >> 2u];
|
let tag_word = scene[config.pathtag_base + (ix >> 2u)];
|
||||||
|
pathdata_base = config.pathdata_base;
|
||||||
let shift = (ix & 3u) * 8u;
|
let shift = (ix & 3u) * 8u;
|
||||||
var tm = reduce_tag(tag_word & ((1u << shift) - 1u));
|
var tm = reduce_tag(tag_word & ((1u << shift) - 1u));
|
||||||
tm = combine_tag_monoid(tag_monoids[ix >> 2u], tm);
|
tm = combine_tag_monoid(tag_monoids[ix >> 2u], tm);
|
||||||
|
|
275
piet-wgsl/shader/path_coarse_full.wgsl
Normal file
275
piet-wgsl/shader/path_coarse_full.wgsl
Normal file
|
@ -0,0 +1,275 @@
|
||||||
|
// Copyright 2022 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
// Path coarse rasterization for the full implementation.
|
||||||
|
|
||||||
|
#import config
|
||||||
|
#import pathtag
|
||||||
|
#import tile
|
||||||
|
#import segment
|
||||||
|
#import cubic
|
||||||
|
#import bump
|
||||||
|
|
||||||
|
@group(0) @binding(0)
|
||||||
|
var<storage> config: Config;
|
||||||
|
|
||||||
|
@group(0) @binding(1)
|
||||||
|
var<storage> scene: array<u32>;
|
||||||
|
|
||||||
|
@group(0) @binding(2)
|
||||||
|
var<storage> tag_monoids: array<TagMonoid>;
|
||||||
|
|
||||||
|
@group(0) @binding(3)
|
||||||
|
var<storage> cubics: array<Cubic>;
|
||||||
|
|
||||||
|
@group(0) @binding(4)
|
||||||
|
var<storage> paths: array<Path>;
|
||||||
|
|
||||||
|
// We don't get this from import as it's the atomic version
|
||||||
|
struct AtomicTile {
|
||||||
|
backdrop: atomic<i32>,
|
||||||
|
segments: atomic<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
@group(0) @binding(5)
|
||||||
|
var<storage, read_write> bump: BumpAllocators;
|
||||||
|
|
||||||
|
@group(0) @binding(6)
|
||||||
|
var<storage, read_write> tiles: array<AtomicTile>;
|
||||||
|
|
||||||
|
@group(0) @binding(7)
|
||||||
|
var<storage, read_write> segments: array<Segment>;
|
||||||
|
|
||||||
|
struct SubdivResult {
|
||||||
|
val: f32,
|
||||||
|
a0: f32,
|
||||||
|
a2: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
let D = 0.67;
|
||||||
|
fn approx_parabola_integral(x: f32) -> f32 {
|
||||||
|
return x * inverseSqrt(sqrt(1.0 - D + (D * D * D * D + 0.25 * x * x)));
|
||||||
|
}
|
||||||
|
|
||||||
|
let B = 0.39;
|
||||||
|
fn approx_parabola_inv_integral(x: f32) -> f32 {
|
||||||
|
return x * sqrt(1.0 - B + (B * B + 0.5 * x * x));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn estimate_subdiv(p0: vec2<f32>, p1: vec2<f32>, p2: vec2<f32>, sqrt_tol: f32) -> SubdivResult {
|
||||||
|
let d01 = p1 - p0;
|
||||||
|
let d12 = p2 - p1;
|
||||||
|
let dd = d01 - d12;
|
||||||
|
let cross = (p2.x - p0.x) * dd.y - (p2.y - p0.y) * dd.x;
|
||||||
|
let cross_inv = 1.0 / cross;
|
||||||
|
let x0 = dot(d01, dd) * cross_inv;
|
||||||
|
let x2 = dot(d12, dd) * cross_inv;
|
||||||
|
let scale = abs(cross / (length(dd) * (x2 - x0)));
|
||||||
|
|
||||||
|
let a0 = approx_parabola_integral(x0);
|
||||||
|
let a2 = approx_parabola_integral(x2);
|
||||||
|
var val = 0.0;
|
||||||
|
if scale < 1e9 {
|
||||||
|
let da = abs(a2 - a0);
|
||||||
|
let sqrt_scale = sqrt(scale);
|
||||||
|
if sign(x0) == sign(x2) {
|
||||||
|
val = sqrt_scale;
|
||||||
|
} else {
|
||||||
|
let xmin = sqrt_tol / sqrt_scale;
|
||||||
|
val = sqrt_tol / approx_parabola_integral(xmin);
|
||||||
|
}
|
||||||
|
val *= da;
|
||||||
|
}
|
||||||
|
return SubdivResult(val, a0, a2);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn eval_quad(p0: vec2<f32>, p1: vec2<f32>, p2: vec2<f32>, t: f32) -> vec2<f32> {
|
||||||
|
let mt = 1.0 - t;
|
||||||
|
return p0 * (mt * mt) + (p1 * (mt * 2.0) + p2 * t) * t;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn eval_cubic(p0: vec2<f32>, p1: vec2<f32>, p2: vec2<f32>, p3: vec2<f32>, t: f32) -> vec2<f32> {
|
||||||
|
let mt = 1.0 - t;
|
||||||
|
return p0 * (mt * mt * mt) + (p1 * (mt * mt * 3.0) + (p2 * (mt * 3.0) + p3 * t) * t) * t;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn alloc_segment() -> u32 {
|
||||||
|
return atomicAdd(&bump.segments, 1u) + 1u;
|
||||||
|
}
|
||||||
|
|
||||||
|
let MAX_QUADS = 16u;
|
||||||
|
|
||||||
|
@compute @workgroup_size(256)
|
||||||
|
fn main(
|
||||||
|
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||||
|
) {
|
||||||
|
let ix = global_id.x;
|
||||||
|
let tag_word = scene[config.pathtag_base + (ix >> 2u)];
|
||||||
|
let shift = (ix & 3u) * 8u;
|
||||||
|
var tag_byte = (tag_word >> shift) & 0xffu;
|
||||||
|
|
||||||
|
if (tag_byte & PATH_TAG_SEG_TYPE) != 0u {
|
||||||
|
// Discussion question: it might actually be cheaper to do the path segment
|
||||||
|
// decoding & transform again rather than store the result in a buffer;
|
||||||
|
// classic memory vs ALU tradeoff.
|
||||||
|
let cubic = cubics[global_id.x];
|
||||||
|
let path = paths[cubic.path_ix];
|
||||||
|
let bbox = vec4<i32>(path.bbox);
|
||||||
|
let p0 = cubic.p0;
|
||||||
|
let p1 = cubic.p1;
|
||||||
|
let p2 = cubic.p2;
|
||||||
|
let p3 = cubic.p3;
|
||||||
|
let err_v = 3.0 * (p2 - p1) + p0 - p3;
|
||||||
|
let err = dot(err_v, err_v);
|
||||||
|
let ACCURACY = 0.25;
|
||||||
|
let Q_ACCURACY = ACCURACY * 0.1;
|
||||||
|
let REM_ACCURACY = (ACCURACY - Q_ACCURACY);
|
||||||
|
let MAX_HYPOT2 = 432.0 * Q_ACCURACY * Q_ACCURACY;
|
||||||
|
var n_quads = max(u32(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0))), 1u);
|
||||||
|
n_quads = min(n_quads, MAX_QUADS);
|
||||||
|
var keep_params: array<SubdivResult, MAX_QUADS>;
|
||||||
|
var val = 0.0;
|
||||||
|
var qp0 = p0;
|
||||||
|
let step = 1.0 / f32(n_quads);
|
||||||
|
for (var i = 0u; i < n_quads; i += 1u) {
|
||||||
|
let t = f32(i + 1u) * step;
|
||||||
|
let qp2 = eval_cubic(p0, p1, p2, p3, t);
|
||||||
|
var qp1 = eval_cubic(p0, p1, p2, p3, t - 0.5 * step);
|
||||||
|
qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
|
||||||
|
let params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY));
|
||||||
|
keep_params[i] = params;
|
||||||
|
val += params.val;
|
||||||
|
qp0 = qp2;
|
||||||
|
}
|
||||||
|
let n = max(u32(ceil(val * (0.5 / sqrt(REM_ACCURACY)))), 1u);
|
||||||
|
var lp0 = p0;
|
||||||
|
qp0 = p0;
|
||||||
|
let v_step = val / f32(n);
|
||||||
|
var n_out = 1u;
|
||||||
|
var val_sum = 0.0;
|
||||||
|
for (var i = 0u; i < n_quads; i += 1u) {
|
||||||
|
let t = f32(i + 1u) * step;
|
||||||
|
let qp2 = eval_cubic(p0, p1, p2, p3, t);
|
||||||
|
var qp1 = eval_cubic(p0, p1, p2, p3, t - 0.5 * step);
|
||||||
|
qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
|
||||||
|
let params = keep_params[i];
|
||||||
|
let u0 = approx_parabola_inv_integral(params.a0);
|
||||||
|
let u2 = approx_parabola_inv_integral(params.a2);
|
||||||
|
let uscale = 1.0 / (u2 - u0);
|
||||||
|
var val_target = f32(n_out) * v_step;
|
||||||
|
while n_out == n || val_target < val_sum + params.val {
|
||||||
|
var lp1: vec2<f32>;
|
||||||
|
if n_out == n {
|
||||||
|
lp1 = p3;
|
||||||
|
} else {
|
||||||
|
let u = (val_target - val_sum) / params.val;
|
||||||
|
let a = mix(params.a0, params.a2, u);
|
||||||
|
let au = approx_parabola_inv_integral(a);
|
||||||
|
let t = (au - u0) * uscale;
|
||||||
|
lp1 = eval_quad(qp0, qp1, qp2, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output line segment lp0..lp1
|
||||||
|
let xymin = min(lp0, lp1);
|
||||||
|
let xymax = max(lp0, lp1);
|
||||||
|
let dp = lp1 - lp0;
|
||||||
|
let recip_dx = 1.0 / dp.x;
|
||||||
|
let invslope = select(dp.x / dp.y, 1.0e9, abs(dp.y) < 1.0e-9);
|
||||||
|
let c = 0.5 * abs(invslope);
|
||||||
|
let b = invslope;
|
||||||
|
let SX = 1.0 / f32(TILE_WIDTH);
|
||||||
|
let SY = 1.0 / f32(TILE_HEIGHT);
|
||||||
|
let a = (lp0.x - (lp0.y - 0.5 * f32(TILE_HEIGHT)) * b) * SX;
|
||||||
|
var x0 = i32(floor(xymin.x * SX));
|
||||||
|
var x1 = i32(floor(xymax.x * SX) + 1.0);
|
||||||
|
var y0 = i32(floor(xymin.y * SY));
|
||||||
|
var y1 = i32(floor(xymax.y * SY) + 1.0);
|
||||||
|
x0 = clamp(x0, bbox.x, bbox.z);
|
||||||
|
x1 = clamp(x1, bbox.x, bbox.z);
|
||||||
|
y0 = clamp(y0, bbox.y, bbox.w);
|
||||||
|
y1 = clamp(y1, bbox.y, bbox.w);
|
||||||
|
var xc = a + b * f32(y0);
|
||||||
|
let stride = bbox.z - bbox.x;
|
||||||
|
var base = i32(path.tiles) + (y0 - bbox.y) * stride - bbox.x;
|
||||||
|
var xray = i32(floor(lp0.x * SX));
|
||||||
|
var last_xray = i32(floor(lp1.x * SX));
|
||||||
|
if dp.y < 0.0 {
|
||||||
|
let tmp = xray;
|
||||||
|
xray = last_xray;
|
||||||
|
last_xray = tmp;
|
||||||
|
}
|
||||||
|
for (var y = y0; y < y1; y += 1) {
|
||||||
|
let tile_y0 = f32(y) * f32(TILE_HEIGHT);
|
||||||
|
let xbackdrop = max(xray + 1, bbox.x);
|
||||||
|
if xymin.y < tile_y0 && xbackdrop < bbox.z {
|
||||||
|
let backdrop = select(-1, 1, dp.y < 0.0);
|
||||||
|
let tile_ix = base + xbackdrop;
|
||||||
|
atomicAdd(&tiles[tile_ix].backdrop, backdrop);
|
||||||
|
}
|
||||||
|
var next_xray = last_xray;
|
||||||
|
if y + 1 < y1 {
|
||||||
|
let tile_y1 = f32(y + 1) * f32(TILE_HEIGHT);
|
||||||
|
let x_edge = lp0.x + (tile_y1 - lp0.y) * invslope;
|
||||||
|
next_xray = i32(floor(x_edge * SX));
|
||||||
|
}
|
||||||
|
let min_xray = min(xray, next_xray);
|
||||||
|
let max_xray = max(xray, next_xray);
|
||||||
|
var xx0 = min(i32(floor(xc - c)), min_xray);
|
||||||
|
var xx1 = max(i32(ceil(xc + c)), max_xray + 1);
|
||||||
|
xx0 = clamp(xx0, x0, x1);
|
||||||
|
xx1 = clamp(xx1, x0, x1);
|
||||||
|
var tile_seg: Segment;
|
||||||
|
for (var x = xx0; x < xx1; x += 1) {
|
||||||
|
let tile_x0 = f32(x) * f32(TILE_WIDTH);
|
||||||
|
let tile_ix = base + x;
|
||||||
|
// allocate segment, insert linked list
|
||||||
|
let seg_ix = alloc_segment();
|
||||||
|
let old = atomicExchange(&tiles[tile_ix].segments, seg_ix);
|
||||||
|
tile_seg.origin = lp0;
|
||||||
|
tile_seg.delta = dp;
|
||||||
|
var y_edge = mix(lp0.y, lp1.y, (tile_x0 - lp0.x) * recip_dx);
|
||||||
|
if xymin.x < tile_x0 {
|
||||||
|
let p = vec2<f32>(tile_x0, y_edge);
|
||||||
|
if dp.x < 0.0 {
|
||||||
|
tile_seg.delta = p - lp0;
|
||||||
|
} else {
|
||||||
|
tile_seg.origin = p;
|
||||||
|
tile_seg.delta = lp1 - p;
|
||||||
|
}
|
||||||
|
if tile_seg.delta.x == 0.0 {
|
||||||
|
tile_seg.delta.x = sign(dp.x) * 1e-9;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if x <= min_xray || max_xray < x {
|
||||||
|
y_edge = 1e9;
|
||||||
|
}
|
||||||
|
tile_seg.y_edge = y_edge;
|
||||||
|
tile_seg.next = old;
|
||||||
|
segments[seg_ix] = tile_seg;
|
||||||
|
}
|
||||||
|
xc += b;
|
||||||
|
base += stride;
|
||||||
|
xray = next_xray;
|
||||||
|
}
|
||||||
|
n_out += 1u;
|
||||||
|
val_target += v_step;
|
||||||
|
lp0 = lp1;
|
||||||
|
}
|
||||||
|
val_sum += params.val;
|
||||||
|
qp0 = qp2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
217
piet-wgsl/shader/pathseg.wgsl
Normal file
217
piet-wgsl/shader/pathseg.wgsl
Normal file
|
@ -0,0 +1,217 @@
|
||||||
|
// Copyright 2022 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
// Path segment decoding for the full case.
|
||||||
|
|
||||||
|
// In the simple case, path segments are decoded as part of the coarse
|
||||||
|
// path rendering stage. In the full case, they are separated, as the
|
||||||
|
// decoding process also generates bounding boxes, and those in turn are
|
||||||
|
// used for tile allocation and clipping; actual coarse path rasterization
|
||||||
|
// can't proceed until those are complete.
|
||||||
|
|
||||||
|
// There's some duplication of the decoding code but we won't worry about
|
||||||
|
// that just now. Perhaps it could be factored more nicely later.
|
||||||
|
|
||||||
|
#import config
|
||||||
|
#import pathtag
|
||||||
|
#import cubic
|
||||||
|
|
||||||
|
@group(0) @binding(0)
|
||||||
|
var<storage> config: Config;
|
||||||
|
|
||||||
|
@group(0) @binding(1)
|
||||||
|
var<storage> scene: array<u32>;
|
||||||
|
|
||||||
|
@group(0) @binding(2)
|
||||||
|
var<storage> tag_monoids: array<TagMonoid>;
|
||||||
|
|
||||||
|
struct AtomicPathBbox {
|
||||||
|
x0: atomic<i32>,
|
||||||
|
y0: atomic<i32>,
|
||||||
|
x1: atomic<i32>,
|
||||||
|
y1: atomic<i32>,
|
||||||
|
linewidth: f32,
|
||||||
|
trans_ix: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
@group(0) @binding(3)
|
||||||
|
var<storage, read_write> path_bboxes: array<AtomicPathBbox>;
|
||||||
|
|
||||||
|
|
||||||
|
@group(0) @binding(4)
|
||||||
|
var<storage, read_write> cubics: array<Cubic>;
|
||||||
|
|
||||||
|
// Monoid is yagni, for future optimization
|
||||||
|
|
||||||
|
// struct BboxMonoid {
|
||||||
|
// bbox: vec4<f32>,
|
||||||
|
// flags: u32,
|
||||||
|
// }
|
||||||
|
|
||||||
|
// let FLAG_RESET_BBOX = 1u;
|
||||||
|
// let FLAG_SET_BBOX = 2u;
|
||||||
|
|
||||||
|
// fn combine_bbox_monoid(a: BboxMonoid, b: BboxMonoid) -> BboxMonoid {
|
||||||
|
// var c: BboxMonoid;
|
||||||
|
// c.bbox = b.bbox;
|
||||||
|
// // TODO: previous-me thought this should be gated on b & SET_BBOX == false also
|
||||||
|
// if (a.flags & FLAG_RESET_BBOX) == 0u && b.bbox.z <= b.bbox.x && b.bbox.w <= b.bbox.y {
|
||||||
|
// c.bbox = a.bbox;
|
||||||
|
// } else if (a.flags & FLAG_RESET_BBOX) == 0u && (b.flags & FLAG_SET_BBOX) == 0u ||
|
||||||
|
// (a.bbox.z > a.bbox.x || a.bbox.w > a.bbox.y)
|
||||||
|
// {
|
||||||
|
// c.bbox = vec4<f32>(min(a.bbox.xy, c.bbox.xy), max(a.bbox.xw, c.bbox.zw));
|
||||||
|
// }
|
||||||
|
// c.flags = (a.flags & FLAG_SET_BBOX) | b.flags;
|
||||||
|
// c.flags |= (a.flags & FLAG_RESET_BBOX) << 1u;
|
||||||
|
// return c;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// fn bbox_monoid_identity() -> BboxMonoid {
|
||||||
|
// return BboxMonoid();
|
||||||
|
// }
|
||||||
|
|
||||||
|
var<private> pathdata_base: u32;
|
||||||
|
|
||||||
|
fn read_f32_point(ix: u32) -> vec2<f32> {
|
||||||
|
let x = bitcast<f32>(scene[pathdata_base + ix]);
|
||||||
|
let y = bitcast<f32>(scene[pathdata_base + ix + 1u]);
|
||||||
|
return vec2<f32>(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_i16_point(ix: u32) -> vec2<f32> {
|
||||||
|
let raw = scene[pathdata_base + ix];
|
||||||
|
let x = f32(i32(raw << 16u) >> 16u);
|
||||||
|
let y = f32(i32(raw) >> 16u);
|
||||||
|
return vec2<f32>(x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Transform {
|
||||||
|
matrx: vec4<f32>,
|
||||||
|
translate: vec2<f32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_transform(transform_base: u32, ix: u32) -> Transform {
|
||||||
|
let base = transform_base + ix * 6u;
|
||||||
|
let c0 = bitcast<f32>(scene[base]);
|
||||||
|
let c1 = bitcast<f32>(scene[base + 1u]);
|
||||||
|
let c2 = bitcast<f32>(scene[base + 2u]);
|
||||||
|
let c3 = bitcast<f32>(scene[base + 3u]);
|
||||||
|
let c4 = bitcast<f32>(scene[base + 4u]);
|
||||||
|
let c5 = bitcast<f32>(scene[base + 5u]);
|
||||||
|
let matrx = vec4<f32>(c0, c1, c2, c3);
|
||||||
|
let translate = vec2<f32>(c4, c5);
|
||||||
|
return Transform(matrx, translate);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn transform_apply(transform: Transform, p: vec2<f32>) -> vec2<f32> {
|
||||||
|
return transform.matrx.xy * p.x + transform.matrx.zw * p.y + transform.translate;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn round_down(x: f32) -> i32 {
|
||||||
|
return i32(floor(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn round_up(x: f32) -> i32 {
|
||||||
|
return i32(ceil(x));
|
||||||
|
}
|
||||||
|
|
||||||
|
@compute @workgroup_size(256)
|
||||||
|
fn main(
|
||||||
|
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||||
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||||
|
) {
|
||||||
|
let ix = global_id.x;
|
||||||
|
let tag_word = scene[config.pathtag_base + (ix >> 2u)];
|
||||||
|
pathdata_base = config.pathdata_base;
|
||||||
|
let shift = (ix & 3u) * 8u;
|
||||||
|
var tm = reduce_tag(tag_word & ((1u << shift) - 1u));
|
||||||
|
tm = combine_tag_monoid(tag_monoids[ix >> 2u], tm);
|
||||||
|
var tag_byte = (tag_word >> shift) & 0xffu;
|
||||||
|
|
||||||
|
let out = &path_bboxes[tm.path_ix];
|
||||||
|
var linewidth: f32;
|
||||||
|
if (tag_byte & PATH_TAG_PATH) != 0u {
|
||||||
|
linewidth = bitcast<f32>(scene[config.linewidth_base + tm.linewidth_ix]);
|
||||||
|
(*out).linewidth = linewidth;
|
||||||
|
(*out).trans_ix = tm.trans_ix;
|
||||||
|
}
|
||||||
|
// Decode path data
|
||||||
|
let seg_type = tag_byte & PATH_TAG_SEG_TYPE;
|
||||||
|
if seg_type != 0u {
|
||||||
|
var p0: vec2<f32>;
|
||||||
|
var p1: vec2<f32>;
|
||||||
|
var p2: vec2<f32>;
|
||||||
|
var p3: vec2<f32>;
|
||||||
|
if (tag_byte & PATH_TAG_F32) != 0u {
|
||||||
|
p0 = read_f32_point(tm.pathseg_offset);
|
||||||
|
p1 = read_f32_point(tm.pathseg_offset + 2u);
|
||||||
|
if seg_type >= PATH_TAG_QUADTO {
|
||||||
|
p2 = read_f32_point(tm.pathseg_offset + 4u);
|
||||||
|
if seg_type == PATH_TAG_CUBICTO {
|
||||||
|
p3 = read_f32_point(tm.pathseg_offset + 6u);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
p0 = read_i16_point(tm.pathseg_offset);
|
||||||
|
p1 = read_i16_point(tm.pathseg_offset + 1u);
|
||||||
|
if seg_type >= PATH_TAG_QUADTO {
|
||||||
|
p2 = read_i16_point(tm.pathseg_offset + 2u);
|
||||||
|
if seg_type == PATH_TAG_CUBICTO {
|
||||||
|
p3 = read_i16_point(tm.pathseg_offset + 3u);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let transform = read_transform(config.transform_base, tm.trans_ix);
|
||||||
|
//let transform = Transform(vec4<f32>(1.0, 0.0, 0.0, 1.0), vec2<f32>());
|
||||||
|
p0 = transform_apply(transform, p0);
|
||||||
|
p1 = transform_apply(transform, p1);
|
||||||
|
var bbox = vec4<f32>(min(p0, p1), max(p0, p1));
|
||||||
|
// Degree-raise
|
||||||
|
if seg_type == PATH_TAG_LINETO {
|
||||||
|
p3 = p1;
|
||||||
|
p2 = mix(p3, p0, 1.0 / 3.0);
|
||||||
|
p1 = mix(p0, p3, 1.0 / 3.0);
|
||||||
|
} else if seg_type >= PATH_TAG_QUADTO {
|
||||||
|
p2 = transform_apply(transform, p2);
|
||||||
|
bbox = vec4<f32>(min(bbox.xy, p2), max(bbox.zw, p2));
|
||||||
|
if seg_type == PATH_TAG_CUBICTO {
|
||||||
|
p3 = transform_apply(transform, p3);
|
||||||
|
bbox = vec4<f32>(min(bbox.xy, p3), max(bbox.zw, p3));
|
||||||
|
} else {
|
||||||
|
p3 = p2;
|
||||||
|
p2 = mix(p1, p2, 1.0 / 3.0);
|
||||||
|
p1 = mix(p1, p0, 1.0 / 3.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if linewidth >= 0.0 {
|
||||||
|
// See https://www.iquilezles.org/www/articles/ellipses/ellipses.htm
|
||||||
|
// This is the correct bounding box, but we're not handling rendering
|
||||||
|
// in the isotropic case, so it may mismatch.
|
||||||
|
let stroke = 0.5 * linewidth * vec2<f32>(length(transform.matrx.xz), length(transform.matrx.yw));
|
||||||
|
bbox += vec4<f32>(-stroke, stroke);
|
||||||
|
}
|
||||||
|
cubics[global_id.x] = Cubic(p0, p1, p2, p3, tm.path_ix, 0u);
|
||||||
|
// Update bounding box using atomics only. Computing a monoid is a
|
||||||
|
// potential future optimization.
|
||||||
|
if bbox.z > bbox.x || bbox.w > bbox.y {
|
||||||
|
atomicMin(&(*out).x0, round_down(bbox.x));
|
||||||
|
atomicMin(&(*out).y0, round_down(bbox.y));
|
||||||
|
atomicMax(&(*out).x1, round_up(bbox.z));
|
||||||
|
atomicMax(&(*out).y1, round_up(bbox.w));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -14,14 +14,16 @@
|
||||||
//
|
//
|
||||||
// Also licensed under MIT license, at your choice.
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
#import config
|
||||||
#import pathtag
|
#import pathtag
|
||||||
|
|
||||||
// Note: should have a single scene binding, path_tags are a slice
|
|
||||||
// in that; need a config uniform.
|
|
||||||
@group(0) @binding(0)
|
@group(0) @binding(0)
|
||||||
var<storage> path_tags: array<u32>;
|
var<storage> config: Config;
|
||||||
|
|
||||||
@group(0) @binding(1)
|
@group(0) @binding(1)
|
||||||
|
var<storage> scene: array<u32>;
|
||||||
|
|
||||||
|
@group(0) @binding(2)
|
||||||
var<storage, read_write> reduced: array<TagMonoid>;
|
var<storage, read_write> reduced: array<TagMonoid>;
|
||||||
|
|
||||||
let LG_WG_SIZE = 8u;
|
let LG_WG_SIZE = 8u;
|
||||||
|
@ -35,7 +37,7 @@ fn main(
|
||||||
@builtin(local_invocation_id) local_id: vec3<u32>,
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||||
) {
|
) {
|
||||||
let ix = global_id.x;
|
let ix = global_id.x;
|
||||||
let tag_word = path_tags[ix];
|
let tag_word = scene[config.pathtag_base + ix];
|
||||||
var agg = reduce_tag(tag_word);
|
var agg = reduce_tag(tag_word);
|
||||||
sh_scratch[local_id.x] = agg;
|
sh_scratch[local_id.x] = agg;
|
||||||
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||||
|
|
|
@ -14,15 +14,19 @@
|
||||||
//
|
//
|
||||||
// Also licensed under MIT license, at your choice.
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
#import config
|
||||||
#import pathtag
|
#import pathtag
|
||||||
|
|
||||||
@group(0) @binding(0)
|
@group(0) @binding(0)
|
||||||
var<storage> path_tags: array<u32>;
|
var<storage> config: Config;
|
||||||
|
|
||||||
@group(0) @binding(1)
|
@group(0) @binding(1)
|
||||||
var<storage> reduced: array<TagMonoid>;
|
var<storage> scene: array<u32>;
|
||||||
|
|
||||||
@group(0) @binding(2)
|
@group(0) @binding(2)
|
||||||
|
var<storage> reduced: array<TagMonoid>;
|
||||||
|
|
||||||
|
@group(0) @binding(3)
|
||||||
var<storage, read_write> tag_monoids: array<TagMonoid>;
|
var<storage, read_write> tag_monoids: array<TagMonoid>;
|
||||||
|
|
||||||
let LG_WG_SIZE = 8u;
|
let LG_WG_SIZE = 8u;
|
||||||
|
@ -39,13 +43,13 @@ fn main(
|
||||||
@builtin(workgroup_id) wg_id: vec3<u32>,
|
@builtin(workgroup_id) wg_id: vec3<u32>,
|
||||||
) {
|
) {
|
||||||
var agg = tag_monoid_identity();
|
var agg = tag_monoid_identity();
|
||||||
if (local_id.x < wg_id.x) {
|
if local_id.x < wg_id.x {
|
||||||
agg = reduced[local_id.x];
|
agg = reduced[local_id.x];
|
||||||
}
|
}
|
||||||
sh_parent[local_id.x] = agg;
|
sh_parent[local_id.x] = agg;
|
||||||
for (var i = 0u; i < LG_WG_SIZE; i += 1u) {
|
for (var i = 0u; i < LG_WG_SIZE; i += 1u) {
|
||||||
workgroupBarrier();
|
workgroupBarrier();
|
||||||
if (local_id.x + (1u << i) < WG_SIZE) {
|
if local_id.x + (1u << i) < WG_SIZE {
|
||||||
let other = sh_parent[local_id.x + (1u << i)];
|
let other = sh_parent[local_id.x + (1u << i)];
|
||||||
agg = combine_tag_monoid(agg, other);
|
agg = combine_tag_monoid(agg, other);
|
||||||
}
|
}
|
||||||
|
@ -54,12 +58,12 @@ fn main(
|
||||||
}
|
}
|
||||||
|
|
||||||
let ix = global_id.x;
|
let ix = global_id.x;
|
||||||
let tag_word = path_tags[ix];
|
let tag_word = scene[config.pathtag_base + ix];
|
||||||
agg = reduce_tag(tag_word);
|
agg = reduce_tag(tag_word);
|
||||||
sh_monoid[local_id.x] = agg;
|
sh_monoid[local_id.x] = agg;
|
||||||
for (var i = 0u; i < LG_WG_SIZE; i += 1u) {
|
for (var i = 0u; i < LG_WG_SIZE; i += 1u) {
|
||||||
workgroupBarrier();
|
workgroupBarrier();
|
||||||
if (local_id.x >= 1u << i) {
|
if local_id.x >= 1u << i {
|
||||||
let other = sh_monoid[local_id.x - (1u << i)];
|
let other = sh_monoid[local_id.x - (1u << i)];
|
||||||
agg = combine_tag_monoid(other, agg);
|
agg = combine_tag_monoid(other, agg);
|
||||||
}
|
}
|
||||||
|
@ -68,7 +72,7 @@ fn main(
|
||||||
}
|
}
|
||||||
// prefix up to this workgroup
|
// prefix up to this workgroup
|
||||||
var tm = sh_parent[0];
|
var tm = sh_parent[0];
|
||||||
if (local_id.x > 0u) {
|
if local_id.x > 0u {
|
||||||
tm = combine_tag_monoid(tm, sh_monoid[local_id.x - 1u]);
|
tm = combine_tag_monoid(tm, sh_monoid[local_id.x - 1u]);
|
||||||
}
|
}
|
||||||
// exclusive prefix sum, granularity of 4 tag bytes
|
// exclusive prefix sum, granularity of 4 tag bytes
|
||||||
|
|
32
piet-wgsl/shader/shared/bbox.wgsl
Normal file
32
piet-wgsl/shader/shared/bbox.wgsl
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
// Copyright 2022 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
// The annotated bounding box for a path. It has been transformed,
|
||||||
|
// but contains a link to the active transform, mostly for gradients.
|
||||||
|
// Coordinates are integer pixels (for the convenience of atomic update)
|
||||||
|
// but will probably become fixed-point fractions for rectangles.
|
||||||
|
struct PathBbox {
|
||||||
|
x0: i32,
|
||||||
|
y0: i32,
|
||||||
|
x1: i32,
|
||||||
|
y1: i32,
|
||||||
|
linewidth: f32,
|
||||||
|
trans_ix: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bbox_intersect(a: vec4<f32>, b: vec4<f32>) -> vec4<f32> {
|
||||||
|
return vec4(max(a.xy, b.xy), min(a.zw, b.zw));
|
||||||
|
}
|
351
piet-wgsl/shader/shared/blend.wgsl
Normal file
351
piet-wgsl/shader/shared/blend.wgsl
Normal file
|
@ -0,0 +1,351 @@
|
||||||
|
// Copyright 2022 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
|
||||||
|
// Color mixing modes
|
||||||
|
|
||||||
|
let MIX_NORMAL = 0u;
|
||||||
|
let MIX_MULTIPLY = 1u;
|
||||||
|
let MIX_SCREEN = 2u;
|
||||||
|
let MIX_OVERLAY = 3u;
|
||||||
|
let MIX_DARKEN = 4u;
|
||||||
|
let MIX_LIGHTEN = 5u;
|
||||||
|
let MIX_COLOR_DODGE = 6u;
|
||||||
|
let MIX_COLOR_BURN = 7u;
|
||||||
|
let MIX_HARD_LIGHT = 8u;
|
||||||
|
let MIX_SOFT_LIGHT = 9u;
|
||||||
|
let MIX_DIFFERENCE = 10u;
|
||||||
|
let MIX_EXCLUSION = 11u;
|
||||||
|
let MIX_HUE = 12u;
|
||||||
|
let MIX_SATURATION = 13u;
|
||||||
|
let MIX_COLOR = 14u;
|
||||||
|
let MIX_LUMINOSITY = 15u;
|
||||||
|
let MIX_CLIP = 128u;
|
||||||
|
|
||||||
|
fn screen(cb: vec3<f32>, cs: vec3<f32>) -> vec3<f32> {
|
||||||
|
return cb + cs - (cb * cs);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn color_dodge(cb: f32, cs: f32) -> f32 {
|
||||||
|
if cb == 0.0 {
|
||||||
|
return 0.0;
|
||||||
|
} else if cs == 1.0 {
|
||||||
|
return 1.0;
|
||||||
|
} else {
|
||||||
|
return min(1.0, cb / (1.0 - cs));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn color_burn(cb: f32, cs: f32) -> f32 {
|
||||||
|
if cb == 1.0 {
|
||||||
|
return 1.0;
|
||||||
|
} else if cs == 0.0 {
|
||||||
|
return 0.0;
|
||||||
|
} else {
|
||||||
|
return 1.0 - min(1.0, (1.0 - cb) / cs);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn hard_light(cb: vec3<f32>, cs: vec3<f32>) -> vec3<f32> {
|
||||||
|
return mix(
|
||||||
|
screen(cb, 2.0 * cs - 1.0),
|
||||||
|
cb * 2.0 * cs,
|
||||||
|
vec3<f32>(cs <= vec3<f32>(0.5))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn soft_light(cb: vec3<f32>, cs: vec3<f32>) -> vec3<f32> {
|
||||||
|
let d = mix(
|
||||||
|
sqrt(cb),
|
||||||
|
((16.0 * cb - vec3(12.0)) * cb + vec3(4.0)) * cb,
|
||||||
|
vec3<f32>(cb <= vec3<f32>(0.25))
|
||||||
|
);
|
||||||
|
return mix(
|
||||||
|
cb + (2.0 * cs - vec3(1.0)) * (d - cb),
|
||||||
|
cb - (vec3(1.0) - 2.0 * cs) * cb * (vec3(1.0) - cb),
|
||||||
|
vec3<f32>(cs <= vec3<f32>(0.5))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn sat(c: vec3<f32>) -> f32 {
|
||||||
|
return max(c.x, max(c.y, c.z)) - min(c.x, min(c.y, c.z));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lum(c: vec3<f32>) -> f32 {
|
||||||
|
let f = vec3<f32>(0.3, 0.59, 0.11);
|
||||||
|
return dot(c, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clip_color(c: vec3<f32>) -> vec3<f32> {
|
||||||
|
var c = c;
|
||||||
|
let l = lum(c);
|
||||||
|
let n = min(c.x, min(c.y, c.z));
|
||||||
|
let x = max(c.x, max(c.y, c.z));
|
||||||
|
if n < 0.0 {
|
||||||
|
c = l + (((c - l) * l) / (l - n));
|
||||||
|
}
|
||||||
|
if x > 1.0 {
|
||||||
|
c = l + (((c - l) * (1.0 - l)) / (x - l));
|
||||||
|
}
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_lum(c: vec3<f32>, l: f32) -> vec3<f32> {
|
||||||
|
return clip_color(c + (l - lum(c)));
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_sat_inner(
|
||||||
|
cmin: ptr<function, f32>,
|
||||||
|
cmid: ptr<function, f32>,
|
||||||
|
cmax: ptr<function, f32>,
|
||||||
|
s: f32
|
||||||
|
) {
|
||||||
|
if *cmax > *cmin {
|
||||||
|
*cmid = ((*cmid - *cmin) * s) / (*cmax - *cmin);
|
||||||
|
*cmax = s;
|
||||||
|
} else {
|
||||||
|
*cmid = 0.0;
|
||||||
|
*cmax = 0.0;
|
||||||
|
}
|
||||||
|
*cmin = 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_sat(c: vec3<f32>, s: f32) -> vec3<f32> {
|
||||||
|
var r = c.r;
|
||||||
|
var g = c.g;
|
||||||
|
var b = c.b;
|
||||||
|
if r <= g {
|
||||||
|
if g <= b {
|
||||||
|
set_sat_inner(&r, &g, &b, s);
|
||||||
|
} else {
|
||||||
|
if r <= b {
|
||||||
|
set_sat_inner(&r, &b, &g, s);
|
||||||
|
} else {
|
||||||
|
set_sat_inner(&b, &r, &g, s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if r <= b {
|
||||||
|
set_sat_inner(&g, &r, &b, s);
|
||||||
|
} else {
|
||||||
|
if g <= b {
|
||||||
|
set_sat_inner(&g, &b, &r, s);
|
||||||
|
} else {
|
||||||
|
set_sat_inner(&b, &g, &r, s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return vec3<f32>(r, g, b);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Blends two RGB colors together. The colors are assumed to be in sRGB
|
||||||
|
// color space, and this function does not take alpha into account.
|
||||||
|
fn blend_mix(cb: vec3<f32>, cs: vec3<f32>, mode: u32) -> vec3<f32> {
|
||||||
|
var b = vec3<f32>(0.0);
|
||||||
|
switch mode {
|
||||||
|
// MIX_MULTIPLY
|
||||||
|
case 1u: {
|
||||||
|
b = cb * cs;
|
||||||
|
}
|
||||||
|
// MIX_SCREEN
|
||||||
|
case 2u: {
|
||||||
|
b = screen(cb, cs);
|
||||||
|
}
|
||||||
|
// MIX_OVERLAY
|
||||||
|
case 3u: {
|
||||||
|
b = hard_light(cs, cb);
|
||||||
|
}
|
||||||
|
// MIX_DARKEN
|
||||||
|
case 4u: {
|
||||||
|
b = min(cb, cs);
|
||||||
|
}
|
||||||
|
// MIX_LIGHTEN
|
||||||
|
case 5u: {
|
||||||
|
b = max(cb, cs);
|
||||||
|
}
|
||||||
|
// MIX_COLOR_DODGE
|
||||||
|
case 6u: {
|
||||||
|
b = vec3<f32>(color_dodge(cb.x, cs.x), color_dodge(cb.y, cs.y), color_dodge(cb.z, cs.z));
|
||||||
|
}
|
||||||
|
// MIX_COLOR_BURN
|
||||||
|
case 7u: {
|
||||||
|
b = vec3<f32>(color_burn(cb.x, cs.x), color_burn(cb.y, cs.y), color_burn(cb.z, cs.z));
|
||||||
|
}
|
||||||
|
// MIX_HARD_LIGHT
|
||||||
|
case 8u: {
|
||||||
|
b = hard_light(cb, cs);
|
||||||
|
}
|
||||||
|
// MIX_SOFT_LIGHT
|
||||||
|
case 9u: {
|
||||||
|
b = soft_light(cb, cs);
|
||||||
|
}
|
||||||
|
// MIX_DIFFERENCE
|
||||||
|
case 10u: {
|
||||||
|
b = abs(cb - cs);
|
||||||
|
}
|
||||||
|
// MIX_EXCLUSION
|
||||||
|
case 11u: {
|
||||||
|
b = cb + cs - 2.0 * cb * cs;
|
||||||
|
}
|
||||||
|
// MIX_HUE
|
||||||
|
case 12u: {
|
||||||
|
b = set_lum(set_sat(cs, sat(cb)), lum(cb));
|
||||||
|
}
|
||||||
|
// MIX_SATURATION
|
||||||
|
case 13u: {
|
||||||
|
b = set_lum(set_sat(cb, sat(cs)), lum(cb));
|
||||||
|
}
|
||||||
|
// MIX_COLOR
|
||||||
|
case 14u: {
|
||||||
|
b = set_lum(cs, lum(cb));
|
||||||
|
}
|
||||||
|
// MIX_LUMINOSITY
|
||||||
|
case 15u: {
|
||||||
|
b = set_lum(cb, lum(cs));
|
||||||
|
}
|
||||||
|
default: {
|
||||||
|
b = cs;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Composition modes
|
||||||
|
|
||||||
|
let COMPOSE_CLEAR = 0u;
|
||||||
|
let COMPOSE_COPY = 1u;
|
||||||
|
let COMPOSE_DEST = 2u;
|
||||||
|
let COMPOSE_SRC_OVER = 3u;
|
||||||
|
let COMPOSE_DEST_OVER = 4u;
|
||||||
|
let COMPOSE_SRC_IN = 5u;
|
||||||
|
let COMPOSE_DEST_IN = 6u;
|
||||||
|
let COMPOSE_SRC_OUT = 7u;
|
||||||
|
let COMPOSE_DEST_OUT = 8u;
|
||||||
|
let COMPOSE_SRC_ATOP = 9u;
|
||||||
|
let COMPOSE_DEST_ATOP = 10u;
|
||||||
|
let COMPOSE_XOR = 11u;
|
||||||
|
let COMPOSE_PLUS = 12u;
|
||||||
|
let COMPOSE_PLUS_LIGHTER = 13u;
|
||||||
|
|
||||||
|
// Apply general compositing operation.
|
||||||
|
// Inputs are separated colors and alpha, output is premultiplied.
|
||||||
|
fn blend_compose(
|
||||||
|
cb: vec3<f32>,
|
||||||
|
cs: vec3<f32>,
|
||||||
|
ab: f32,
|
||||||
|
as_: f32,
|
||||||
|
mode: u32
|
||||||
|
) -> vec4<f32> {
|
||||||
|
var fa = 0.0;
|
||||||
|
var fb = 0.0;
|
||||||
|
switch mode {
|
||||||
|
// COMPOSE_COPY
|
||||||
|
case 1u: {
|
||||||
|
fa = 1.0;
|
||||||
|
fb = 0.0;
|
||||||
|
}
|
||||||
|
// COMPOSE_DEST
|
||||||
|
case 2u: {
|
||||||
|
fa = 0.0;
|
||||||
|
fb = 1.0;
|
||||||
|
}
|
||||||
|
// COMPOSE_SRC_OVER
|
||||||
|
case 3u: {
|
||||||
|
fa = 1.0;
|
||||||
|
fb = 1.0 - as_;
|
||||||
|
}
|
||||||
|
// COMPOSE_DEST_OVER
|
||||||
|
case 4u: {
|
||||||
|
fa = 1.0 - ab;
|
||||||
|
fb = 1.0;
|
||||||
|
}
|
||||||
|
// COMPOSE_SRC_IN
|
||||||
|
case 5u: {
|
||||||
|
fa = ab;
|
||||||
|
fb = 0.0;
|
||||||
|
}
|
||||||
|
// COMPOSE_DEST_IN
|
||||||
|
case 6u: {
|
||||||
|
fa = 0.0;
|
||||||
|
fb = as_;
|
||||||
|
}
|
||||||
|
// COMPOSE_SRC_OUT
|
||||||
|
case 7u: {
|
||||||
|
fa = 1.0 - ab;
|
||||||
|
fb = 0.0;
|
||||||
|
}
|
||||||
|
// COMPOSE_DEST_OUT
|
||||||
|
case 8u: {
|
||||||
|
fa = 0.0;
|
||||||
|
fb = 1.0 - as_;
|
||||||
|
}
|
||||||
|
// COMPOSE_SRC_ATOP
|
||||||
|
case 9u: {
|
||||||
|
fa = ab;
|
||||||
|
fb = 1.0 - as_;
|
||||||
|
}
|
||||||
|
// COMPOSE_DEST_ATOP
|
||||||
|
case 10u: {
|
||||||
|
fa = 1.0 - ab;
|
||||||
|
fb = as_;
|
||||||
|
}
|
||||||
|
// COMPOSE_XOR
|
||||||
|
case 11u: {
|
||||||
|
fa = 1.0 - ab;
|
||||||
|
fb = 1.0 - as_;
|
||||||
|
}
|
||||||
|
// COMPOSE_PLUS
|
||||||
|
case 12u: {
|
||||||
|
fa = 1.0;
|
||||||
|
fb = 1.0;
|
||||||
|
}
|
||||||
|
// COMPOSE_PLUS_LIGHTER
|
||||||
|
case 13u: {
|
||||||
|
return min(vec4<f32>(1.0), vec4<f32>(as_ * cs + ab * cb, as_ + ab));
|
||||||
|
}
|
||||||
|
default: {}
|
||||||
|
}
|
||||||
|
let as_fa = as_ * fa;
|
||||||
|
let ab_fb = ab * fb;
|
||||||
|
let co = as_fa * cs + ab_fb * cb;
|
||||||
|
return vec4<f32>(co, as_fa + ab_fb);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Apply color mixing and composition. Both input and output colors are
|
||||||
|
// premultiplied RGB.
|
||||||
|
fn blend_mix_compose(backdrop: vec4<f32>, src: vec4<f32>, mode: u32) -> vec4<f32> {
|
||||||
|
let BLEND_DEFAULT = ((MIX_NORMAL << 8u) | COMPOSE_SRC_OVER);
|
||||||
|
let EPSILON = 1e-15;
|
||||||
|
if (mode & 0x7fffu) == BLEND_DEFAULT {
|
||||||
|
// Both normal+src_over blend and clip case
|
||||||
|
return backdrop * (1.0 - src.a) + src;
|
||||||
|
}
|
||||||
|
// Un-premultiply colors for blending
|
||||||
|
let inv_src_a = 1.0 / (src.a + EPSILON);
|
||||||
|
var cs = src.rgb * inv_src_a;
|
||||||
|
let inv_backdrop_a = 1.0 / (backdrop.a + EPSILON);
|
||||||
|
let cb = backdrop.rgb * inv_backdrop_a;
|
||||||
|
let mix_mode = mode >> 8u;
|
||||||
|
let mixed = blend_mix(cb, cs, mix_mode);
|
||||||
|
cs = mix(cs, mixed, backdrop.a);
|
||||||
|
let compose_mode = mode & 0xffu;
|
||||||
|
if compose_mode == COMPOSE_SRC_OVER {
|
||||||
|
let co = mix(backdrop.rgb, cs, src.a);
|
||||||
|
return vec4<f32>(co, src.a + backdrop.a * (1.0 - src.a));
|
||||||
|
} else {
|
||||||
|
return blend_compose(cb, cs, backdrop.a, src.a, compose_mode);
|
||||||
|
}
|
||||||
|
}
|
23
piet-wgsl/shader/shared/bump.wgsl
Normal file
23
piet-wgsl/shader/shared/bump.wgsl
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
// Copyright 2022 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
// TODO: robust memory (failure flags)
|
||||||
|
struct BumpAllocators {
|
||||||
|
binning: atomic<u32>,
|
||||||
|
ptcl: atomic<u32>,
|
||||||
|
tile: atomic<u32>,
|
||||||
|
segments: atomic<u32>,
|
||||||
|
}
|
14
piet-wgsl/shader/shared/clip.wgsl
Normal file
14
piet-wgsl/shader/shared/clip.wgsl
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
struct Bic {
|
||||||
|
a: u32,
|
||||||
|
b: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bic_combine(x: Bic, y: Bic) -> Bic {
|
||||||
|
let m = min(x.b, y.a);
|
||||||
|
return Bic(x.a + y.a - m, x.b + y.b - m);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct ClipEl {
|
||||||
|
parent_ix: u32,
|
||||||
|
bbox: vec4<f32>,
|
||||||
|
}
|
|
@ -17,4 +17,29 @@
|
||||||
struct Config {
|
struct Config {
|
||||||
width_in_tiles: u32,
|
width_in_tiles: u32,
|
||||||
height_in_tiles: u32,
|
height_in_tiles: u32,
|
||||||
|
|
||||||
|
n_drawobj: u32,
|
||||||
|
n_path: u32,
|
||||||
|
n_clip: u32,
|
||||||
|
|
||||||
|
// offsets within scene buffer (in u32 units)
|
||||||
|
// Note: this is a difference from piet-gpu, which is in bytes
|
||||||
|
pathtag_base: u32,
|
||||||
|
pathdata_base: u32,
|
||||||
|
|
||||||
|
drawtag_base: u32,
|
||||||
|
drawdata_base: u32,
|
||||||
|
|
||||||
|
transform_base: u32,
|
||||||
|
linewidth_base: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Geometry of tiles and bins
|
||||||
|
|
||||||
|
let TILE_WIDTH = 16u;
|
||||||
|
let TILE_HEIGHT = 16u;
|
||||||
|
// Number of tiles per bin
|
||||||
|
let N_TILE_X = 16u;
|
||||||
|
let N_TILE_Y = 16u;
|
||||||
|
//let N_TILE = N_TILE_X * N_TILE_Y;
|
||||||
|
let N_TILE = 256u;
|
||||||
|
|
25
piet-wgsl/shader/shared/cubic.wgsl
Normal file
25
piet-wgsl/shader/shared/cubic.wgsl
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
// Copyright 2022 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
struct Cubic {
|
||||||
|
p0: vec2<f32>,
|
||||||
|
p1: vec2<f32>,
|
||||||
|
p2: vec2<f32>,
|
||||||
|
p3: vec2<f32>,
|
||||||
|
path_ix: u32,
|
||||||
|
// Needed?
|
||||||
|
padding: u32,
|
||||||
|
}
|
60
piet-wgsl/shader/shared/drawtag.wgsl
Normal file
60
piet-wgsl/shader/shared/drawtag.wgsl
Normal file
|
@ -0,0 +1,60 @@
|
||||||
|
// Copyright 2022 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
// The DrawMonoid is computed as a prefix sum to aid in decoding
|
||||||
|
// the variable-length encoding of draw objects.
|
||||||
|
struct DrawMonoid {
|
||||||
|
// The number of paths preceding this draw object.
|
||||||
|
path_ix: u32,
|
||||||
|
// The number of clip operations preceding this draw object.
|
||||||
|
clip_ix: u32,
|
||||||
|
// The offset of the encoded draw object in the scene (u32s).
|
||||||
|
scene_offset: u32,
|
||||||
|
// The offset of the associated info.
|
||||||
|
info_offset: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Each draw object has a 32-bit draw tag, which is a bit-packed
|
||||||
|
// version of the draw monoid.
|
||||||
|
let DRAWTAG_NOP = 0u;
|
||||||
|
let DRAWTAG_FILL_COLOR = 0x44u;
|
||||||
|
let DRAWTAG_FILL_LIN_GRADIENT = 0x114u;
|
||||||
|
let DRAWTAG_FILL_RAD_GRADIENT = 0x2dcu;
|
||||||
|
let DRAWTAG_FILL_IMAGE = 0x48u;
|
||||||
|
let DRAWTAG_BEGIN_CLIP = 0x05u;
|
||||||
|
let DRAWTAG_END_CLIP = 0x25u;
|
||||||
|
|
||||||
|
fn draw_monoid_identity() -> DrawMonoid {
|
||||||
|
return DrawMonoid();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn combine_draw_monoid(a: DrawMonoid, b: DrawMonoid) -> DrawMonoid {
|
||||||
|
var c: DrawMonoid;
|
||||||
|
c.path_ix = a.path_ix + b.path_ix;
|
||||||
|
c.clip_ix = a.clip_ix + b.clip_ix;
|
||||||
|
c.scene_offset = a.scene_offset + b.scene_offset;
|
||||||
|
c.info_offset = a.info_offset + b.info_offset;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn map_draw_tag(tag_word: u32) -> DrawMonoid {
|
||||||
|
var c: DrawMonoid;
|
||||||
|
c.path_ix = u32(tag_word != DRAWTAG_NOP);
|
||||||
|
c.clip_ix = tag_word & 1u;
|
||||||
|
c.scene_offset = (tag_word >> 2u) & 0x07u;
|
||||||
|
c.info_offset = (tag_word >> 6u) & 0x0fu;
|
||||||
|
return c;
|
||||||
|
}
|
|
@ -16,9 +16,13 @@
|
||||||
|
|
||||||
struct TagMonoid {
|
struct TagMonoid {
|
||||||
trans_ix: u32,
|
trans_ix: u32,
|
||||||
|
// TODO: I don't think pathseg_ix is used.
|
||||||
pathseg_ix: u32,
|
pathseg_ix: u32,
|
||||||
pathseg_offset: u32,
|
pathseg_offset: u32,
|
||||||
// Note: piet-gpu has linewidth and path, but not needed here
|
#ifdef full
|
||||||
|
linewidth_ix: u32,
|
||||||
|
path_ix: u32,
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
let PATH_TAG_SEG_TYPE = 3u;
|
let PATH_TAG_SEG_TYPE = 3u;
|
||||||
|
@ -26,15 +30,14 @@ let PATH_TAG_LINETO = 1u;
|
||||||
let PATH_TAG_QUADTO = 2u;
|
let PATH_TAG_QUADTO = 2u;
|
||||||
let PATH_TAG_CUBICTO = 3u;
|
let PATH_TAG_CUBICTO = 3u;
|
||||||
let PATH_TAG_F32 = 8u;
|
let PATH_TAG_F32 = 8u;
|
||||||
let PATH_TAG_PATH = 0x10u;
|
|
||||||
let PATH_TAG_TRANSFORM = 0x20u;
|
let PATH_TAG_TRANSFORM = 0x20u;
|
||||||
|
#ifdef full
|
||||||
|
let PATH_TAG_PATH = 0x10u;
|
||||||
|
let PATH_TAG_LINEWIDTH = 0x40u;
|
||||||
|
#endif
|
||||||
|
|
||||||
fn tag_monoid_identity() -> TagMonoid {
|
fn tag_monoid_identity() -> TagMonoid {
|
||||||
var c: TagMonoid;
|
return TagMonoid();
|
||||||
c.trans_ix = 0u;
|
|
||||||
c.pathseg_ix = 0u;
|
|
||||||
c.pathseg_offset = 0u;
|
|
||||||
return c;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn combine_tag_monoid(a: TagMonoid, b: TagMonoid) -> TagMonoid {
|
fn combine_tag_monoid(a: TagMonoid, b: TagMonoid) -> TagMonoid {
|
||||||
|
@ -42,6 +45,10 @@ fn combine_tag_monoid(a: TagMonoid, b: TagMonoid) -> TagMonoid {
|
||||||
c.trans_ix = a.trans_ix + b.trans_ix;
|
c.trans_ix = a.trans_ix + b.trans_ix;
|
||||||
c.pathseg_ix = a.pathseg_ix + b.pathseg_ix;
|
c.pathseg_ix = a.pathseg_ix + b.pathseg_ix;
|
||||||
c.pathseg_offset = a.pathseg_offset + b.pathseg_offset;
|
c.pathseg_offset = a.pathseg_offset + b.pathseg_offset;
|
||||||
|
#ifdef full
|
||||||
|
c.linewidth_ix = a.linewidth_ix + b.linewidth_ix;
|
||||||
|
c.path_ix = a.path_ix + b.path_ix;
|
||||||
|
#endif
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -55,5 +62,9 @@ fn reduce_tag(tag_word: u32) -> TagMonoid {
|
||||||
a += a >> 8u;
|
a += a >> 8u;
|
||||||
a += a >> 16u;
|
a += a >> 16u;
|
||||||
c.pathseg_offset = a & 0xffu;
|
c.pathseg_offset = a & 0xffu;
|
||||||
|
#ifdef full
|
||||||
|
c.path_ix = countOneBits(tag_word & (PATH_TAG_PATH * 0x1010101u));
|
||||||
|
c.linewidth_ix = countOneBits(tag_word & (PATH_TAG_LINEWIDTH * 0x1010101u));
|
||||||
|
#endif
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
72
piet-wgsl/shader/shared/ptcl.wgsl
Normal file
72
piet-wgsl/shader/shared/ptcl.wgsl
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
// Copyright 2022 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
// Layout of per-tile command list
|
||||||
|
// Initial allocation, in u32's.
|
||||||
|
let PTCL_INITIAL_ALLOC = 64u;
|
||||||
|
let PTCL_INCREMENT = 256u;
|
||||||
|
|
||||||
|
// Amount of space taken by jump
|
||||||
|
let PTCL_HEADROOM = 2u;
|
||||||
|
|
||||||
|
// Tags for PTCL commands
|
||||||
|
let CMD_END = 0u;
|
||||||
|
let CMD_FILL = 1u;
|
||||||
|
let CMD_STROKE = 2u;
|
||||||
|
let CMD_SOLID = 3u;
|
||||||
|
let CMD_COLOR = 5u;
|
||||||
|
let CMD_LIN_GRAD = 6u;
|
||||||
|
let CMD_RAD_GRAD = 7u;
|
||||||
|
let CMD_BEGIN_CLIP = 9u;
|
||||||
|
let CMD_END_CLIP = 10u;
|
||||||
|
let CMD_JUMP = 11u;
|
||||||
|
|
||||||
|
// The individual PTCL structs are written here, but read/write is by
|
||||||
|
// hand in the relevant shaders
|
||||||
|
|
||||||
|
struct CmdFill {
|
||||||
|
tile: u32,
|
||||||
|
backdrop: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CmdStroke {
|
||||||
|
tile: u32,
|
||||||
|
half_width: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CmdJump {
|
||||||
|
new_ix: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CmdColor {
|
||||||
|
rgba_color: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CmdLinGrad {
|
||||||
|
index: u32,
|
||||||
|
line_x: f32,
|
||||||
|
line_y: f32,
|
||||||
|
line_c: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CmdRadGrad {
|
||||||
|
index: u32,
|
||||||
|
matrx: vec4<f32>,
|
||||||
|
xlat: vec2<f32>,
|
||||||
|
c1: vec2<f32>,
|
||||||
|
ra: f32,
|
||||||
|
roff: f32,
|
||||||
|
}
|
29
piet-wgsl/shader/shared/tile.wgsl
Normal file
29
piet-wgsl/shader/shared/tile.wgsl
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
// Copyright 2022 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
// Common datatypes for path and tile intermediate info.
|
||||||
|
|
||||||
|
struct Path {
|
||||||
|
// bounding box in tiles
|
||||||
|
bbox: vec4<u32>,
|
||||||
|
// offset (in u32's) to tile rectangle
|
||||||
|
tiles: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Tile {
|
||||||
|
backdrop: i32,
|
||||||
|
segments: u32,
|
||||||
|
}
|
114
piet-wgsl/shader/tile_alloc.wgsl
Normal file
114
piet-wgsl/shader/tile_alloc.wgsl
Normal file
|
@ -0,0 +1,114 @@
|
||||||
|
// Copyright 2022 Google LLC
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
// Tile allocation (and zeroing of tiles)
|
||||||
|
|
||||||
|
#import config
|
||||||
|
#import bump
|
||||||
|
#import drawtag
|
||||||
|
#import tile
|
||||||
|
|
||||||
|
@group(0) @binding(0)
|
||||||
|
var<storage> config: Config;
|
||||||
|
|
||||||
|
@group(0) @binding(1)
|
||||||
|
var<storage> scene: array<u32>;
|
||||||
|
|
||||||
|
@group(0) @binding(2)
|
||||||
|
var<storage> draw_bboxes: array<vec4<f32>>;
|
||||||
|
|
||||||
|
@group(0) @binding(3)
|
||||||
|
var<storage, read_write> bump: BumpAllocators;
|
||||||
|
|
||||||
|
@group(0) @binding(4)
|
||||||
|
var<storage, read_write> paths: array<Path>;
|
||||||
|
|
||||||
|
@group(0) @binding(5)
|
||||||
|
var<storage, read_write> tiles: array<Tile>;
|
||||||
|
|
||||||
|
let WG_SIZE = 256u;
|
||||||
|
|
||||||
|
var<workgroup> sh_tile_count: array<u32, WG_SIZE>;
|
||||||
|
var<workgroup> sh_tile_offset: u32;
|
||||||
|
|
||||||
|
@compute @workgroup_size(256)
|
||||||
|
fn main(
|
||||||
|
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||||
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||||
|
) {
|
||||||
|
// scale factors useful for converting coordinates to tiles
|
||||||
|
// TODO: make into constants
|
||||||
|
let SX = 1.0 / f32(TILE_WIDTH);
|
||||||
|
let SY = 1.0 / f32(TILE_HEIGHT);
|
||||||
|
|
||||||
|
let drawobj_ix = global_id.x;
|
||||||
|
var drawtag = DRAWTAG_NOP;
|
||||||
|
if drawobj_ix < config.n_drawobj {
|
||||||
|
drawtag = scene[config.drawtag_base + drawobj_ix];
|
||||||
|
}
|
||||||
|
var x0 = 0;
|
||||||
|
var y0 = 0;
|
||||||
|
var x1 = 0;
|
||||||
|
var y1 = 0;
|
||||||
|
if drawtag != DRAWTAG_NOP && drawtag != DRAWTAG_END_CLIP {
|
||||||
|
let bbox = draw_bboxes[drawobj_ix];
|
||||||
|
x0 = i32(floor(bbox.x * SX));
|
||||||
|
y0 = i32(floor(bbox.y * SY));
|
||||||
|
x1 = i32(ceil(bbox.z * SX));
|
||||||
|
y1 = i32(ceil(bbox.w * SY));
|
||||||
|
}
|
||||||
|
let ux0 = u32(clamp(x0, 0, i32(config.width_in_tiles)));
|
||||||
|
let uy0 = u32(clamp(y0, 0, i32(config.height_in_tiles)));
|
||||||
|
let ux1 = u32(clamp(x1, 0, i32(config.width_in_tiles)));
|
||||||
|
let uy1 = u32(clamp(y1, 0, i32(config.height_in_tiles)));
|
||||||
|
let tile_count = (ux1 - ux0) * (uy1 - uy0);
|
||||||
|
var total_tile_count = tile_count;
|
||||||
|
sh_tile_count[local_id.x] = tile_count;
|
||||||
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||||
|
workgroupBarrier();
|
||||||
|
if local_id.x >= (1u << i) {
|
||||||
|
total_tile_count += sh_tile_count[local_id.x - (1u << i)];
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
sh_tile_count[local_id.x] = total_tile_count;
|
||||||
|
}
|
||||||
|
if local_id.x == WG_SIZE - 1u {
|
||||||
|
paths[drawobj_ix].tiles = atomicAdd(&bump.tile, sh_tile_count[WG_SIZE - 1u]);
|
||||||
|
}
|
||||||
|
// Using storage barriers is a workaround for what appears to be a miscompilation
|
||||||
|
// when a normal workgroup-shared variable is used to broadcast the value.
|
||||||
|
storageBarrier();
|
||||||
|
let tile_offset = paths[drawobj_ix | (WG_SIZE - 1u)].tiles;
|
||||||
|
storageBarrier();
|
||||||
|
if drawobj_ix < config.n_drawobj {
|
||||||
|
let tile_subix = select(0u, sh_tile_count[local_id.x - 1u], local_id.x > 0u);
|
||||||
|
let bbox = vec4<u32>(ux0, uy0, ux1, uy1);
|
||||||
|
let path = Path(bbox, tile_offset + tile_subix);
|
||||||
|
paths[drawobj_ix] = path;
|
||||||
|
}
|
||||||
|
|
||||||
|
// zero allocated memory
|
||||||
|
// Note: if the number of draw objects is small, utilization will be poor.
|
||||||
|
// There are two things that can be done to improve that. One would be a
|
||||||
|
// separate (indirect) dispatch. Another would be to have each workgroup
|
||||||
|
// process fewer draw objects than the number of threads in the wg.
|
||||||
|
let total_count = sh_tile_count[WG_SIZE - 1u];
|
||||||
|
for (var i = local_id.x; i < total_count; i += WG_SIZE) {
|
||||||
|
// Note: could format output buffer as u32 for even better load
|
||||||
|
// balancing, as does piet-gpu.
|
||||||
|
tiles[tile_offset + i] = Tile(0, 0u);
|
||||||
|
}
|
||||||
|
}
|
5
piet-wgsl/src/debug.rs
Normal file
5
piet-wgsl/src/debug.rs
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
#![allow(dead_code)]
|
||||||
|
|
||||||
|
pub mod clip;
|
||||||
|
pub mod draw;
|
||||||
|
pub mod fine;
|
13
piet-wgsl/src/debug/clip.rs
Normal file
13
piet-wgsl/src/debug/clip.rs
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
use bytemuck::{Pod, Zeroable};
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug, Zeroable, Pod)]
|
||||||
|
#[repr(C)]
|
||||||
|
pub struct ClipEl {
|
||||||
|
pub parent_ix: u32,
|
||||||
|
pub pad: [u32; 3],
|
||||||
|
pub bbox: [f32; 4],
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_clip_els(data: &[u8]) -> Vec<ClipEl> {
|
||||||
|
Vec::from(bytemuck::cast_slice(data))
|
||||||
|
}
|
14
piet-wgsl/src/debug/draw.rs
Normal file
14
piet-wgsl/src/debug/draw.rs
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
use bytemuck::{Pod, Zeroable};
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug, Zeroable, Pod)]
|
||||||
|
#[repr(C)]
|
||||||
|
pub struct DrawMonoid {
|
||||||
|
pub path_ix: u32,
|
||||||
|
pub clip_ix: u32,
|
||||||
|
pub scene_offset: u32,
|
||||||
|
pub info_offset: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_draw_monoids(data: &[u8]) -> Vec<DrawMonoid> {
|
||||||
|
Vec::from(bytemuck::cast_slice(data))
|
||||||
|
}
|
153
piet-wgsl/src/debug/fine.rs
Normal file
153
piet-wgsl/src/debug/fine.rs
Normal file
|
@ -0,0 +1,153 @@
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
#[repr(C)]
|
||||||
|
pub struct Fill {
|
||||||
|
pub tile: u32,
|
||||||
|
pub backdrop: i32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
#[repr(C)]
|
||||||
|
pub struct Stroke {
|
||||||
|
pub tile: u32,
|
||||||
|
pub half_width: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
#[repr(C)]
|
||||||
|
pub struct Color {
|
||||||
|
abgr: [u8; 4],
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
#[repr(C)]
|
||||||
|
pub struct LinGrad {
|
||||||
|
pub index: u32,
|
||||||
|
pub line_x: f32,
|
||||||
|
pub line_y: f32,
|
||||||
|
pub line_c: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
#[repr(C)]
|
||||||
|
pub struct RadGrad {
|
||||||
|
pub index: u32,
|
||||||
|
pub matrix: [f32; 4],
|
||||||
|
pub xlat: [f32; 2],
|
||||||
|
pub c1: [f32; 2],
|
||||||
|
pub ra: f32,
|
||||||
|
pub roff: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
pub enum Command {
|
||||||
|
Fill(Fill),
|
||||||
|
Stroke(Stroke),
|
||||||
|
Solid,
|
||||||
|
Color(Color),
|
||||||
|
LinGrad(LinGrad),
|
||||||
|
RadGrad(RadGrad),
|
||||||
|
BeginClip,
|
||||||
|
EndClip(u32),
|
||||||
|
End,
|
||||||
|
}
|
||||||
|
|
||||||
|
const PTCL_INITIAL_ALLOC: usize = 64;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct CommandList {
|
||||||
|
pub tiles: Vec<(u32, u32, Vec<Command>)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CommandList {
|
||||||
|
pub fn parse(width: usize, height: usize, ptcl: &[u8]) -> Self {
|
||||||
|
let mut tiles = vec![];
|
||||||
|
let width_tiles = width / 16;
|
||||||
|
let height_tiles = height / 16;
|
||||||
|
for y in 0..height_tiles {
|
||||||
|
for x in 0..width_tiles {
|
||||||
|
let tile_ix = y * width_tiles + x;
|
||||||
|
let ix = tile_ix * PTCL_INITIAL_ALLOC;
|
||||||
|
let commands = parse_commands(ptcl, ix);
|
||||||
|
if !commands.is_empty() {
|
||||||
|
tiles.push((x as u32, y as u32, commands));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Self { tiles }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_commands(ptcl: &[u8], mut ix: usize) -> Vec<Command> {
|
||||||
|
let mut commands = vec![];
|
||||||
|
let words: &[u32] = bytemuck::cast_slice(ptcl);
|
||||||
|
while ix < words.len() {
|
||||||
|
let tag = words[ix];
|
||||||
|
ix += 1;
|
||||||
|
match tag {
|
||||||
|
0 => break,
|
||||||
|
1 => {
|
||||||
|
commands.push(Command::Fill(Fill {
|
||||||
|
tile: words[ix],
|
||||||
|
backdrop: words[ix + 1] as i32,
|
||||||
|
}));
|
||||||
|
ix += 2;
|
||||||
|
}
|
||||||
|
2 => {
|
||||||
|
commands.push(Command::Stroke(Stroke {
|
||||||
|
tile: words[ix],
|
||||||
|
half_width: bytemuck::cast(words[ix + 1]),
|
||||||
|
}));
|
||||||
|
ix += 2;
|
||||||
|
}
|
||||||
|
3 => {
|
||||||
|
commands.push(Command::Solid);
|
||||||
|
}
|
||||||
|
5 => {
|
||||||
|
commands.push(Command::Color(Color {
|
||||||
|
abgr: bytemuck::cast(words[ix]),
|
||||||
|
}));
|
||||||
|
ix += 1;
|
||||||
|
}
|
||||||
|
6 => {
|
||||||
|
commands.push(Command::LinGrad(LinGrad {
|
||||||
|
index: words[ix],
|
||||||
|
line_x: bytemuck::cast(words[ix + 1]),
|
||||||
|
line_y: bytemuck::cast(words[ix + 2]),
|
||||||
|
line_c: bytemuck::cast(words[ix + 3]),
|
||||||
|
}));
|
||||||
|
ix += 4;
|
||||||
|
}
|
||||||
|
7 => {
|
||||||
|
let matrix = [
|
||||||
|
bytemuck::cast(words[ix + 1]),
|
||||||
|
bytemuck::cast(words[ix + 2]),
|
||||||
|
bytemuck::cast(words[ix + 3]),
|
||||||
|
bytemuck::cast(words[ix + 4]),
|
||||||
|
];
|
||||||
|
let xlat = [bytemuck::cast(words[ix + 5]), bytemuck::cast(words[ix + 6])];
|
||||||
|
let c1 = [bytemuck::cast(words[ix + 7]), bytemuck::cast(words[ix + 8])];
|
||||||
|
commands.push(Command::RadGrad(RadGrad {
|
||||||
|
index: words[ix],
|
||||||
|
matrix,
|
||||||
|
xlat,
|
||||||
|
c1,
|
||||||
|
ra: bytemuck::cast(words[ix + 9]),
|
||||||
|
roff: bytemuck::cast(words[ix + 10]),
|
||||||
|
}));
|
||||||
|
ix += 11;
|
||||||
|
}
|
||||||
|
9 => {
|
||||||
|
commands.push(Command::BeginClip);
|
||||||
|
}
|
||||||
|
10 => {
|
||||||
|
commands.push(Command::EndClip(words[ix]));
|
||||||
|
ix += 1;
|
||||||
|
}
|
||||||
|
11 => {
|
||||||
|
ix = words[ix] as usize;
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
commands
|
||||||
|
}
|
|
@ -17,7 +17,7 @@
|
||||||
use std::{
|
use std::{
|
||||||
borrow::Cow,
|
borrow::Cow,
|
||||||
collections::{hash_map::Entry, HashMap},
|
collections::{hash_map::Entry, HashMap},
|
||||||
num::NonZeroU64,
|
num::{NonZeroU32, NonZeroU64},
|
||||||
sync::atomic::{AtomicU64, Ordering},
|
sync::atomic::{AtomicU64, Ordering},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -25,7 +25,8 @@ use futures_intrusive::channel::shared::GenericOneshotReceiver;
|
||||||
use parking_lot::RawMutex;
|
use parking_lot::RawMutex;
|
||||||
use wgpu::{
|
use wgpu::{
|
||||||
util::DeviceExt, BindGroup, BindGroupLayout, Buffer, BufferAsyncError, BufferSlice, BufferView,
|
util::DeviceExt, BindGroup, BindGroupLayout, Buffer, BufferAsyncError, BufferSlice, BufferView,
|
||||||
ComputePipeline, Device, Queue,
|
ComputePipeline, Device, Queue, Texture, TextureAspect, TextureFormat, TextureUsages,
|
||||||
|
TextureView, TextureViewDimension,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub type Error = Box<dyn std::error::Error>;
|
pub type Error = Box<dyn std::error::Error>;
|
||||||
|
@ -58,12 +59,27 @@ pub struct BufProxy {
|
||||||
id: Id,
|
id: Id,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub struct ImageProxy {
|
||||||
|
width: u32,
|
||||||
|
height: u32,
|
||||||
|
// TODO: format
|
||||||
|
id: Id,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub enum ResourceProxy {
|
||||||
|
Buf(BufProxy),
|
||||||
|
Image(ImageProxy),
|
||||||
|
}
|
||||||
|
|
||||||
pub enum Command {
|
pub enum Command {
|
||||||
Upload(BufProxy, Vec<u8>),
|
Upload(BufProxy, Vec<u8>),
|
||||||
|
UploadImage(ImageProxy, Vec<u8>),
|
||||||
// Discussion question: third argument is vec of resources?
|
// Discussion question: third argument is vec of resources?
|
||||||
// Maybe use tricks to make more ergonomic?
|
// Maybe use tricks to make more ergonomic?
|
||||||
// Alternative: provide bufs & images as separate sequences, like piet-gpu.
|
// Alternative: provide bufs & images as separate sequences, like piet-gpu.
|
||||||
Dispatch(ShaderId, (u32, u32, u32), Vec<BufProxy>),
|
Dispatch(ShaderId, (u32, u32, u32), Vec<ResourceProxy>),
|
||||||
Download(BufProxy),
|
Download(BufProxy),
|
||||||
Clear(BufProxy, u64, Option<NonZeroU64>),
|
Clear(BufProxy, u64, Option<NonZeroU64>),
|
||||||
}
|
}
|
||||||
|
@ -92,6 +108,7 @@ pub enum BindType {
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
struct BindMap {
|
struct BindMap {
|
||||||
buf_map: HashMap<Id, Buffer>,
|
buf_map: HashMap<Id, Buffer>,
|
||||||
|
image_map: HashMap<Id, (Texture, TextureView)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Engine {
|
impl Engine {
|
||||||
|
@ -132,6 +149,16 @@ impl Engine {
|
||||||
},
|
},
|
||||||
count: None,
|
count: None,
|
||||||
},
|
},
|
||||||
|
BindType::ImageRead => wgpu::BindGroupLayoutEntry {
|
||||||
|
binding: i as u32,
|
||||||
|
visibility: wgpu::ShaderStages::COMPUTE,
|
||||||
|
ty: wgpu::BindingType::Texture {
|
||||||
|
sample_type: wgpu::TextureSampleType::Float { filterable: true },
|
||||||
|
view_dimension: wgpu::TextureViewDimension::D2,
|
||||||
|
multisampled: false,
|
||||||
|
},
|
||||||
|
count: None,
|
||||||
|
},
|
||||||
_ => todo!(),
|
_ => todo!(),
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
@ -182,7 +209,60 @@ impl Engine {
|
||||||
});
|
});
|
||||||
bind_map.insert_buf(buf_proxy.id, buf);
|
bind_map.insert_buf(buf_proxy.id, buf);
|
||||||
}
|
}
|
||||||
|
Command::UploadImage(image_proxy, bytes) => {
|
||||||
|
let buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
|
||||||
|
label: None,
|
||||||
|
contents: &bytes,
|
||||||
|
usage: wgpu::BufferUsages::COPY_SRC,
|
||||||
|
});
|
||||||
|
let texture = device.create_texture(&wgpu::TextureDescriptor {
|
||||||
|
label: None,
|
||||||
|
size: wgpu::Extent3d {
|
||||||
|
width: image_proxy.width,
|
||||||
|
height: image_proxy.height,
|
||||||
|
depth_or_array_layers: 1,
|
||||||
|
},
|
||||||
|
mip_level_count: 1,
|
||||||
|
sample_count: 1,
|
||||||
|
dimension: wgpu::TextureDimension::D2,
|
||||||
|
usage: TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST,
|
||||||
|
format: TextureFormat::Rgba8Unorm,
|
||||||
|
});
|
||||||
|
let texture_view = texture.create_view(&wgpu::TextureViewDescriptor {
|
||||||
|
label: None,
|
||||||
|
dimension: Some(TextureViewDimension::D2),
|
||||||
|
aspect: TextureAspect::All,
|
||||||
|
mip_level_count: None,
|
||||||
|
base_mip_level: 0,
|
||||||
|
base_array_layer: 0,
|
||||||
|
array_layer_count: None,
|
||||||
|
format: Some(TextureFormat::Rgba8Unorm),
|
||||||
|
});
|
||||||
|
encoder.copy_buffer_to_texture(
|
||||||
|
wgpu::ImageCopyBuffer {
|
||||||
|
buffer: &buf,
|
||||||
|
layout: wgpu::ImageDataLayout {
|
||||||
|
offset: 0,
|
||||||
|
bytes_per_row: NonZeroU32::new(image_proxy.width * 4),
|
||||||
|
rows_per_image: None,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
wgpu::ImageCopyTexture {
|
||||||
|
texture: &texture,
|
||||||
|
mip_level: 0,
|
||||||
|
origin: wgpu::Origin3d { x: 0, y: 0, z: 0 },
|
||||||
|
aspect: TextureAspect::All,
|
||||||
|
},
|
||||||
|
wgpu::Extent3d {
|
||||||
|
width: image_proxy.width,
|
||||||
|
height: image_proxy.height,
|
||||||
|
depth_or_array_layers: 1,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
bind_map.insert_image(image_proxy.id, texture, texture_view)
|
||||||
|
}
|
||||||
Command::Dispatch(shader_id, wg_size, bindings) => {
|
Command::Dispatch(shader_id, wg_size, bindings) => {
|
||||||
|
println!("dispatching {:?} with {} bindings", wg_size, bindings.len());
|
||||||
let shader = &self.shaders[shader_id.0];
|
let shader = &self.shaders[shader_id.0];
|
||||||
let bind_group =
|
let bind_group =
|
||||||
bind_map.create_bind_group(device, &shader.bind_group_layout, bindings)?;
|
bind_map.create_bind_group(device, &shader.bind_group_layout, bindings)?;
|
||||||
|
@ -225,13 +305,28 @@ impl Recording {
|
||||||
buf_proxy
|
buf_proxy
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn dispatch(
|
pub fn upload_image(
|
||||||
&mut self,
|
&mut self,
|
||||||
shader: ShaderId,
|
width: u32,
|
||||||
wg_size: (u32, u32, u32),
|
height: u32,
|
||||||
resources: impl Into<Vec<BufProxy>>,
|
data: impl Into<Vec<u8>>,
|
||||||
) {
|
) -> ImageProxy {
|
||||||
self.push(Command::Dispatch(shader, wg_size, resources.into()));
|
let data = data.into();
|
||||||
|
let image_proxy = ImageProxy::new(width, height);
|
||||||
|
self.push(Command::UploadImage(image_proxy, data));
|
||||||
|
image_proxy
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn dispatch<R>(&mut self, shader: ShaderId, wg_size: (u32, u32, u32), resources: R)
|
||||||
|
where
|
||||||
|
R: IntoIterator,
|
||||||
|
R::Item: Into<ResourceProxy>,
|
||||||
|
{
|
||||||
|
self.push(Command::Dispatch(
|
||||||
|
shader,
|
||||||
|
wg_size,
|
||||||
|
resources.into_iter().map(|r| r.into()).collect(),
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn download(&mut self, buf: BufProxy) {
|
pub fn download(&mut self, buf: BufProxy) {
|
||||||
|
@ -246,7 +341,53 @@ impl Recording {
|
||||||
impl BufProxy {
|
impl BufProxy {
|
||||||
pub fn new(size: u64) -> Self {
|
pub fn new(size: u64) -> Self {
|
||||||
let id = Id::next();
|
let id = Id::next();
|
||||||
BufProxy { id, size }
|
BufProxy {
|
||||||
|
id,
|
||||||
|
size: size.max(16),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ImageProxy {
|
||||||
|
pub fn new(width: u32, height: u32) -> Self {
|
||||||
|
let id = Id::next();
|
||||||
|
ImageProxy { width, height, id }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ResourceProxy {
|
||||||
|
pub fn new_buf(size: u64) -> Self {
|
||||||
|
Self::Buf(BufProxy::new(size))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new_image(width: u32, height: u32) -> Self {
|
||||||
|
Self::Image(ImageProxy::new(width, height))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_buf(&self) -> Option<&BufProxy> {
|
||||||
|
match self {
|
||||||
|
Self::Buf(proxy) => Some(&proxy),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn as_image(&self) -> Option<&ImageProxy> {
|
||||||
|
match self {
|
||||||
|
Self::Image(proxy) => Some(&proxy),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<BufProxy> for ResourceProxy {
|
||||||
|
fn from(value: BufProxy) -> Self {
|
||||||
|
Self::Buf(value)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<ImageProxy> for ResourceProxy {
|
||||||
|
fn from(value: ImageProxy) -> Self {
|
||||||
|
Self::Image(value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -263,34 +404,79 @@ impl BindMap {
|
||||||
self.buf_map.insert(id, buf);
|
self.buf_map.insert(id, buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn insert_image(&mut self, id: Id, image: Texture, image_view: TextureView) {
|
||||||
|
self.image_map.insert(id, (image, image_view));
|
||||||
|
}
|
||||||
|
|
||||||
fn create_bind_group(
|
fn create_bind_group(
|
||||||
&mut self,
|
&mut self,
|
||||||
device: &Device,
|
device: &Device,
|
||||||
layout: &BindGroupLayout,
|
layout: &BindGroupLayout,
|
||||||
bindings: &[BufProxy],
|
bindings: &[ResourceProxy],
|
||||||
) -> Result<BindGroup, Error> {
|
) -> Result<BindGroup, Error> {
|
||||||
for proxy in bindings {
|
for proxy in bindings {
|
||||||
if let Entry::Vacant(v) = self.buf_map.entry(proxy.id) {
|
match proxy {
|
||||||
let buf = device.create_buffer(&wgpu::BufferDescriptor {
|
ResourceProxy::Buf(proxy) => {
|
||||||
label: None,
|
if let Entry::Vacant(v) = self.buf_map.entry(proxy.id) {
|
||||||
size: proxy.size,
|
let buf = device.create_buffer(&wgpu::BufferDescriptor {
|
||||||
usage: wgpu::BufferUsages::STORAGE
|
label: None,
|
||||||
| wgpu::BufferUsages::COPY_DST
|
size: proxy.size,
|
||||||
| wgpu::BufferUsages::COPY_SRC,
|
usage: wgpu::BufferUsages::STORAGE
|
||||||
mapped_at_creation: false,
|
| wgpu::BufferUsages::COPY_DST
|
||||||
});
|
| wgpu::BufferUsages::COPY_SRC,
|
||||||
v.insert(buf);
|
mapped_at_creation: false,
|
||||||
|
});
|
||||||
|
v.insert(buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ResourceProxy::Image(proxy) => {
|
||||||
|
if let Entry::Vacant(v) = self.image_map.entry(proxy.id) {
|
||||||
|
let texture = device.create_texture(&wgpu::TextureDescriptor {
|
||||||
|
label: None,
|
||||||
|
size: wgpu::Extent3d {
|
||||||
|
width: proxy.width,
|
||||||
|
height: proxy.height,
|
||||||
|
depth_or_array_layers: 1,
|
||||||
|
},
|
||||||
|
mip_level_count: 1,
|
||||||
|
sample_count: 1,
|
||||||
|
dimension: wgpu::TextureDimension::D2,
|
||||||
|
usage: TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_DST,
|
||||||
|
format: TextureFormat::Rgba8Unorm,
|
||||||
|
});
|
||||||
|
let texture_view = texture.create_view(&wgpu::TextureViewDescriptor {
|
||||||
|
label: None,
|
||||||
|
dimension: Some(TextureViewDimension::D2),
|
||||||
|
aspect: TextureAspect::All,
|
||||||
|
mip_level_count: None,
|
||||||
|
base_mip_level: 0,
|
||||||
|
base_array_layer: 0,
|
||||||
|
array_layer_count: None,
|
||||||
|
format: Some(TextureFormat::Rgba8Unorm),
|
||||||
|
});
|
||||||
|
v.insert((texture, texture_view));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let entries = bindings
|
let entries = bindings
|
||||||
.iter()
|
.iter()
|
||||||
.enumerate()
|
.enumerate()
|
||||||
.map(|(i, proxy)| {
|
.map(|(i, proxy)| match proxy {
|
||||||
let buf = self.buf_map.get(&proxy.id).unwrap();
|
ResourceProxy::Buf(proxy) => {
|
||||||
Ok(wgpu::BindGroupEntry {
|
let buf = self.buf_map.get(&proxy.id).unwrap();
|
||||||
binding: i as u32,
|
Ok(wgpu::BindGroupEntry {
|
||||||
resource: buf.as_entire_binding(),
|
binding: i as u32,
|
||||||
})
|
resource: buf.as_entire_binding(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
ResourceProxy::Image(proxy) => {
|
||||||
|
let texture = self.image_map.get(&proxy.id).unwrap();
|
||||||
|
Ok(wgpu::BindGroupEntry {
|
||||||
|
binding: i as u32,
|
||||||
|
resource: wgpu::BindingResource::TextureView(&texture.1),
|
||||||
|
})
|
||||||
|
}
|
||||||
})
|
})
|
||||||
.collect::<Result<Vec<_>, Error>>()?;
|
.collect::<Result<Vec<_>, Error>>()?;
|
||||||
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
|
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
|
||||||
|
|
|
@ -20,11 +20,12 @@ use std::{fs::File, io::BufWriter};
|
||||||
|
|
||||||
use engine::Engine;
|
use engine::Engine;
|
||||||
|
|
||||||
use render::render;
|
use wgpu::{Device, Limits, Queue};
|
||||||
use test_scene::dump_scene_info;
|
|
||||||
use wgpu::{Device, Queue};
|
|
||||||
|
|
||||||
|
mod debug;
|
||||||
mod engine;
|
mod engine;
|
||||||
|
mod pico_svg;
|
||||||
|
mod ramp;
|
||||||
mod render;
|
mod render;
|
||||||
mod shaders;
|
mod shaders;
|
||||||
mod test_scene;
|
mod test_scene;
|
||||||
|
@ -33,12 +34,14 @@ async fn run() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
let instance = wgpu::Instance::new(wgpu::Backends::PRIMARY);
|
let instance = wgpu::Instance::new(wgpu::Backends::PRIMARY);
|
||||||
let adapter = instance.request_adapter(&Default::default()).await.unwrap();
|
let adapter = instance.request_adapter(&Default::default()).await.unwrap();
|
||||||
let features = adapter.features();
|
let features = adapter.features();
|
||||||
|
let mut limits = Limits::default();
|
||||||
|
limits.max_storage_buffers_per_shader_stage = 16;
|
||||||
let (device, queue) = adapter
|
let (device, queue) = adapter
|
||||||
.request_device(
|
.request_device(
|
||||||
&wgpu::DeviceDescriptor {
|
&wgpu::DeviceDescriptor {
|
||||||
label: None,
|
label: None,
|
||||||
features: features & wgpu::Features::TIMESTAMP_QUERY,
|
features: features & wgpu::Features::TIMESTAMP_QUERY,
|
||||||
limits: Default::default(),
|
limits,
|
||||||
},
|
},
|
||||||
None,
|
None,
|
||||||
)
|
)
|
||||||
|
@ -49,25 +52,46 @@ async fn run() -> Result<(), Box<dyn std::error::Error>> {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn dump_buf(buf: &[u32]) {
|
||||||
|
for (i, val) in buf.iter().enumerate() {
|
||||||
|
if *val != 0 {
|
||||||
|
let lo = val & 0x7fff_ffff;
|
||||||
|
if lo >= 0x3000_0000 && lo < 0x5000_0000 {
|
||||||
|
println!("{}: {:x} {}", i, val, f32::from_bits(*val));
|
||||||
|
} else {
|
||||||
|
println!("{}: {:x}", i, val);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async fn do_render(
|
async fn do_render(
|
||||||
device: &Device,
|
device: &Device,
|
||||||
queue: &Queue,
|
queue: &Queue,
|
||||||
engine: &mut Engine,
|
engine: &mut Engine,
|
||||||
) -> Result<(), Box<dyn std::error::Error>> {
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
#[allow(unused)]
|
||||||
let shaders = shaders::init_shaders(device, engine)?;
|
let shaders = shaders::init_shaders(device, engine)?;
|
||||||
|
let full_shaders = shaders::full_shaders(device, engine)?;
|
||||||
let scene = test_scene::gen_test_scene();
|
let scene = test_scene::gen_test_scene();
|
||||||
dump_scene_info(&scene);
|
//test_scene::dump_scene_info(&scene);
|
||||||
let (recording, buf) = render(&scene, &shaders);
|
//let (recording, buf) = render::render(&scene, &shaders);
|
||||||
|
let (recording, buf) = render::render_full(&scene, &full_shaders);
|
||||||
let downloads = engine.run_recording(&device, &queue, &recording)?;
|
let downloads = engine.run_recording(&device, &queue, &recording)?;
|
||||||
let mapped = downloads.map();
|
let mapped = downloads.map();
|
||||||
device.poll(wgpu::Maintain::Wait);
|
device.poll(wgpu::Maintain::Wait);
|
||||||
let buf = mapped.get_mapped(buf).await?;
|
let buf = mapped.get_mapped(buf).await?;
|
||||||
|
|
||||||
let file = File::create("image.png")?;
|
if false {
|
||||||
let w = BufWriter::new(file);
|
dump_buf(bytemuck::cast_slice(&buf));
|
||||||
let encoder = png::Encoder::new(w, 1024, 1024);
|
} else {
|
||||||
let mut writer = encoder.write_header()?;
|
let file = File::create("image.png")?;
|
||||||
writer.write_image_data(&buf)?;
|
let w = BufWriter::new(file);
|
||||||
|
let mut encoder = png::Encoder::new(w, 1024, 1024);
|
||||||
|
encoder.set_color(png::ColorType::Rgba);
|
||||||
|
let mut writer = encoder.write_header()?;
|
||||||
|
writer.write_image_data(&buf)?;
|
||||||
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
140
piet-wgsl/src/pico_svg.rs
Normal file
140
piet-wgsl/src/pico_svg.rs
Normal file
|
@ -0,0 +1,140 @@
|
||||||
|
//! A loader for a tiny fragment of SVG
|
||||||
|
|
||||||
|
use std::str::FromStr;
|
||||||
|
|
||||||
|
use roxmltree::{Document, Node};
|
||||||
|
|
||||||
|
use kurbo::{Affine, BezPath};
|
||||||
|
|
||||||
|
use piet_scene::Color;
|
||||||
|
|
||||||
|
pub struct PicoSvg {
|
||||||
|
pub items: Vec<Item>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum Item {
|
||||||
|
Fill(FillItem),
|
||||||
|
Stroke(StrokeItem),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct StrokeItem {
|
||||||
|
pub width: f64,
|
||||||
|
pub color: Color,
|
||||||
|
pub path: BezPath,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FillItem {
|
||||||
|
pub color: Color,
|
||||||
|
pub path: BezPath,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Parser<'a> {
|
||||||
|
scale: f64,
|
||||||
|
items: &'a mut Vec<Item>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PicoSvg {
|
||||||
|
pub fn load(xml_string: &str, scale: f64) -> Result<PicoSvg, Box<dyn std::error::Error>> {
|
||||||
|
let doc = Document::parse(xml_string)?;
|
||||||
|
let root = doc.root_element();
|
||||||
|
let mut items = Vec::new();
|
||||||
|
let mut parser = Parser::new(&mut items, scale);
|
||||||
|
for node in root.children() {
|
||||||
|
parser.rec_parse(node)?;
|
||||||
|
}
|
||||||
|
Ok(PicoSvg { items })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Parser<'a> {
|
||||||
|
fn new(items: &'a mut Vec<Item>, scale: f64) -> Parser<'a> {
|
||||||
|
Parser { scale, items }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn rec_parse(&mut self, node: Node) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let transform = if self.scale >= 0.0 {
|
||||||
|
Affine::scale(self.scale)
|
||||||
|
} else {
|
||||||
|
Affine::new([-self.scale, 0.0, 0.0, self.scale, 0.0, 1536.0])
|
||||||
|
};
|
||||||
|
if node.is_element() {
|
||||||
|
match node.tag_name().name() {
|
||||||
|
"g" => {
|
||||||
|
for child in node.children() {
|
||||||
|
self.rec_parse(child)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"path" => {
|
||||||
|
let d = node.attribute("d").ok_or("missing 'd' attribute")?;
|
||||||
|
let bp = BezPath::from_svg(d)?;
|
||||||
|
let path = transform * bp;
|
||||||
|
// TODO: default fill color is black, but this is overridden in tiger to this logic.
|
||||||
|
if let Some(fill_color) = node.attribute("fill") {
|
||||||
|
if fill_color != "none" {
|
||||||
|
let color = parse_color(fill_color);
|
||||||
|
let color = modify_opacity(color, "fill-opacity", node);
|
||||||
|
self.items.push(Item::Fill(FillItem {
|
||||||
|
color,
|
||||||
|
path: path.clone(),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(stroke_color) = node.attribute("stroke") {
|
||||||
|
if stroke_color != "none" {
|
||||||
|
let width = self.scale.abs()
|
||||||
|
* f64::from_str(
|
||||||
|
node.attribute("stroke-width").ok_or("missing width")?,
|
||||||
|
)?;
|
||||||
|
let color = parse_color(stroke_color);
|
||||||
|
let color = modify_opacity(color, "stroke-opacity", node);
|
||||||
|
self.items
|
||||||
|
.push(Item::Stroke(StrokeItem { width, color, path }));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_color(color: &str) -> Color {
|
||||||
|
if color.as_bytes()[0] == b'#' {
|
||||||
|
let mut hex = u32::from_str_radix(&color[1..], 16).unwrap();
|
||||||
|
if color.len() == 4 {
|
||||||
|
hex = (hex >> 8) * 0x110000 + ((hex >> 4) & 0xf) * 0x1100 + (hex & 0xf) * 0x11;
|
||||||
|
}
|
||||||
|
let rgba = (hex << 8) + 0xff;
|
||||||
|
let (r, g, b, a) = (
|
||||||
|
(rgba >> 24 & 255) as u8,
|
||||||
|
((rgba >> 16) & 255) as u8,
|
||||||
|
((rgba >> 8) & 255) as u8,
|
||||||
|
(rgba & 255) as u8,
|
||||||
|
);
|
||||||
|
Color::rgba8(r, g, b, a)
|
||||||
|
} else if color.starts_with("rgb(") {
|
||||||
|
let mut iter = color[4..color.len() - 1].split(',');
|
||||||
|
let r = u8::from_str(iter.next().unwrap()).unwrap();
|
||||||
|
let g = u8::from_str(iter.next().unwrap()).unwrap();
|
||||||
|
let b = u8::from_str(iter.next().unwrap()).unwrap();
|
||||||
|
Color::rgb8(r, g, b)
|
||||||
|
} else {
|
||||||
|
Color::rgba8(255, 0, 255, 0x80)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn modify_opacity(mut color: Color, attr_name: &str, node: Node) -> Color {
|
||||||
|
if let Some(opacity) = node.attribute(attr_name) {
|
||||||
|
let alpha = if opacity.ends_with("%") {
|
||||||
|
let pctg = opacity[..opacity.len() - 1].parse().unwrap_or(100.0);
|
||||||
|
pctg * 0.01
|
||||||
|
} else {
|
||||||
|
opacity.parse().unwrap_or(1.0)
|
||||||
|
} as f64;
|
||||||
|
color.a = (alpha.min(1.0).max(0.0) * 255.0).round() as u8;
|
||||||
|
color
|
||||||
|
} else {
|
||||||
|
color
|
||||||
|
}
|
||||||
|
}
|
137
piet-wgsl/src/ramp.rs
Normal file
137
piet-wgsl/src/ramp.rs
Normal file
|
@ -0,0 +1,137 @@
|
||||||
|
use piet_scene::{Color, GradientStop, GradientStops};
|
||||||
|
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
const N_SAMPLES: usize = 512;
|
||||||
|
const RETAINED_COUNT: usize = 64;
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct RampCache {
|
||||||
|
epoch: u64,
|
||||||
|
map: HashMap<GradientStops, (u32, u64)>,
|
||||||
|
data: Vec<u32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RampCache {
|
||||||
|
pub fn advance(&mut self) {
|
||||||
|
self.epoch += 1;
|
||||||
|
if self.map.len() > RETAINED_COUNT {
|
||||||
|
self.map
|
||||||
|
.retain(|_key, value| value.0 < RETAINED_COUNT as u32);
|
||||||
|
self.data.truncate(RETAINED_COUNT * N_SAMPLES);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn add(&mut self, stops: &[GradientStop]) -> u32 {
|
||||||
|
if let Some(entry) = self.map.get_mut(stops) {
|
||||||
|
entry.1 = self.epoch;
|
||||||
|
entry.0
|
||||||
|
} else if self.map.len() < RETAINED_COUNT {
|
||||||
|
let id = (self.data.len() / N_SAMPLES) as u32;
|
||||||
|
self.data.extend(make_ramp(stops));
|
||||||
|
self.map.insert(stops.into(), (id, self.epoch));
|
||||||
|
id
|
||||||
|
} else {
|
||||||
|
let mut reuse = None;
|
||||||
|
for (stops, (id, epoch)) in &self.map {
|
||||||
|
if *epoch + 2 < self.epoch {
|
||||||
|
reuse = Some((stops.to_owned(), *id));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some((old_stops, id)) = reuse {
|
||||||
|
self.map.remove(&old_stops);
|
||||||
|
let start = id as usize * N_SAMPLES;
|
||||||
|
for (dst, src) in self.data[start..start + N_SAMPLES]
|
||||||
|
.iter_mut()
|
||||||
|
.zip(make_ramp(stops))
|
||||||
|
{
|
||||||
|
*dst = src;
|
||||||
|
}
|
||||||
|
self.map.insert(stops.into(), (id, self.epoch));
|
||||||
|
id
|
||||||
|
} else {
|
||||||
|
let id = (self.data.len() / N_SAMPLES) as u32;
|
||||||
|
self.data.extend(make_ramp(stops));
|
||||||
|
self.map.insert(stops.into(), (id, self.epoch));
|
||||||
|
id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn data(&self) -> &[u32] {
|
||||||
|
&self.data
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn width(&self) -> u32 {
|
||||||
|
N_SAMPLES as u32
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn height(&self) -> u32 {
|
||||||
|
(self.data.len() / N_SAMPLES) as u32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_ramp<'a>(stops: &'a [GradientStop]) -> impl Iterator<Item = u32> + 'a {
|
||||||
|
let mut last_u = 0.0;
|
||||||
|
let mut last_c = ColorF64::from_color(stops[0].color);
|
||||||
|
let mut this_u = last_u;
|
||||||
|
let mut this_c = last_c;
|
||||||
|
let mut j = 0;
|
||||||
|
(0..N_SAMPLES).map(move |i| {
|
||||||
|
let u = (i as f64) / (N_SAMPLES - 1) as f64;
|
||||||
|
while u > this_u {
|
||||||
|
last_u = this_u;
|
||||||
|
last_c = this_c;
|
||||||
|
if let Some(s) = stops.get(j + 1) {
|
||||||
|
this_u = s.offset as f64;
|
||||||
|
this_c = ColorF64::from_color(s.color);
|
||||||
|
j += 1;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let du = this_u - last_u;
|
||||||
|
let c = if du < 1e-9 {
|
||||||
|
this_c
|
||||||
|
} else {
|
||||||
|
last_c.lerp(&this_c, (u - last_u) / du)
|
||||||
|
};
|
||||||
|
c.to_premul_u32()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Copy, Clone, Debug)]
|
||||||
|
struct ColorF64([f64; 4]);
|
||||||
|
|
||||||
|
impl ColorF64 {
|
||||||
|
fn from_color(color: Color) -> Self {
|
||||||
|
Self([
|
||||||
|
color.r as f64 / 255.0,
|
||||||
|
color.g as f64 / 255.0,
|
||||||
|
color.b as f64 / 255.0,
|
||||||
|
color.a as f64 / 255.0,
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lerp(&self, other: &Self, a: f64) -> Self {
|
||||||
|
fn l(x: f64, y: f64, a: f64) -> f64 {
|
||||||
|
x * (1.0 - a) + y * a
|
||||||
|
}
|
||||||
|
Self([
|
||||||
|
l(self.0[0], other.0[0], a),
|
||||||
|
l(self.0[1], other.0[1], a),
|
||||||
|
l(self.0[2], other.0[2], a),
|
||||||
|
l(self.0[3], other.0[3], a),
|
||||||
|
])
|
||||||
|
}
|
||||||
|
|
||||||
|
fn to_premul_u32(&self) -> u32 {
|
||||||
|
let a = self.0[3].min(1.0).max(0.0);
|
||||||
|
let r = ((self.0[0] * a).min(1.0).max(0.0) * 255.0) as u32;
|
||||||
|
let g = ((self.0[1] * a).min(1.0).max(0.0) * 255.0) as u32;
|
||||||
|
let b = ((self.0[2] * a).min(1.0).max(0.0) * 255.0) as u32;
|
||||||
|
let a = (a * 255.0) as u32;
|
||||||
|
r | (g << 8) | (b << 16) | (a << 24)
|
||||||
|
}
|
||||||
|
}
|
|
@ -4,17 +4,39 @@ use bytemuck::{Pod, Zeroable};
|
||||||
use piet_scene::Scene;
|
use piet_scene::Scene;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
engine::{BufProxy, Recording},
|
engine::{BufProxy, Recording, ResourceProxy},
|
||||||
shaders::{self, Shaders},
|
shaders::{self, FullShaders, Shaders},
|
||||||
};
|
};
|
||||||
|
|
||||||
const TAG_MONOID_SIZE: u64 = 12;
|
const TAG_MONOID_SIZE: u64 = 12;
|
||||||
|
const TAG_MONOID_FULL_SIZE: u64 = 20;
|
||||||
|
const PATH_BBOX_SIZE: u64 = 24;
|
||||||
|
const CUBIC_SIZE: u64 = 40;
|
||||||
|
const DRAWMONOID_SIZE: u64 = 16;
|
||||||
|
const MAX_DRAWINFO_SIZE: u64 = 44;
|
||||||
|
const CLIP_BIC_SIZE: u64 = 8;
|
||||||
|
const CLIP_EL_SIZE: u64 = 32;
|
||||||
|
const CLIP_INP_SIZE: u64 = 4;
|
||||||
|
const CLIP_BBOX_SIZE: u64 = 16;
|
||||||
|
const PATH_SIZE: u64 = 32;
|
||||||
|
const DRAW_BBOX_SIZE: u64 = 16;
|
||||||
|
const BUMP_SIZE: u64 = 16;
|
||||||
|
const BIN_HEADER_SIZE: u64 = 8;
|
||||||
|
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
#[derive(Clone, Copy, Zeroable, Pod)]
|
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
|
||||||
struct Config {
|
struct Config {
|
||||||
width_in_tiles: u32,
|
width_in_tiles: u32,
|
||||||
height_in_tiles: u32,
|
height_in_tiles: u32,
|
||||||
|
n_drawobj: u32,
|
||||||
|
n_path: u32,
|
||||||
|
n_clip: u32,
|
||||||
|
pathtag_base: u32,
|
||||||
|
pathdata_base: u32,
|
||||||
|
drawtag_base: u32,
|
||||||
|
drawdata_base: u32,
|
||||||
|
transform_base: u32,
|
||||||
|
linewidth_base: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
|
@ -26,22 +48,39 @@ pub struct PathSegment {
|
||||||
next: u32,
|
next: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn size_to_words(byte_size: usize) -> u32 {
|
||||||
|
(byte_size / std::mem::size_of::<u32>()) as u32
|
||||||
|
}
|
||||||
|
|
||||||
pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
|
pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
|
||||||
let mut recording = Recording::default();
|
let mut recording = Recording::default();
|
||||||
let data = scene.data();
|
let data = scene.data();
|
||||||
let n_pathtag = data.tag_stream.len();
|
let n_pathtag = data.tag_stream.len();
|
||||||
let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG);
|
let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG);
|
||||||
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
|
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
|
||||||
let mut tag_data: Vec<u8> = Vec::with_capacity(pathtag_padded);
|
let mut scene: Vec<u8> = Vec::with_capacity(pathtag_padded);
|
||||||
tag_data.extend(&data.tag_stream);
|
let pathtag_base = size_to_words(scene.len());
|
||||||
tag_data.resize(pathtag_padded, 0);
|
scene.extend(&data.tag_stream);
|
||||||
let pathtag_buf = recording.upload(tag_data);
|
scene.resize(pathtag_padded, 0);
|
||||||
|
let pathdata_base = size_to_words(scene.len());
|
||||||
|
scene.extend(&data.pathseg_stream);
|
||||||
|
|
||||||
|
let config = Config {
|
||||||
|
width_in_tiles: 64,
|
||||||
|
height_in_tiles: 64,
|
||||||
|
pathtag_base,
|
||||||
|
pathdata_base,
|
||||||
|
..Default::default()
|
||||||
|
};
|
||||||
|
let scene_buf = recording.upload(scene);
|
||||||
|
let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned());
|
||||||
|
|
||||||
let reduced_buf = BufProxy::new(pathtag_wgs as u64 * TAG_MONOID_SIZE);
|
let reduced_buf = BufProxy::new(pathtag_wgs as u64 * TAG_MONOID_SIZE);
|
||||||
// TODO: really only need pathtag_wgs - 1
|
// TODO: really only need pathtag_wgs - 1
|
||||||
recording.dispatch(
|
recording.dispatch(
|
||||||
shaders.pathtag_reduce,
|
shaders.pathtag_reduce,
|
||||||
(pathtag_wgs as u32, 1, 1),
|
(pathtag_wgs as u32, 1, 1),
|
||||||
[pathtag_buf, reduced_buf],
|
[config_buf, scene_buf, reduced_buf],
|
||||||
);
|
);
|
||||||
|
|
||||||
let tagmonoid_buf =
|
let tagmonoid_buf =
|
||||||
|
@ -49,19 +88,11 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
|
||||||
recording.dispatch(
|
recording.dispatch(
|
||||||
shaders.pathtag_scan,
|
shaders.pathtag_scan,
|
||||||
(pathtag_wgs as u32, 1, 1),
|
(pathtag_wgs as u32, 1, 1),
|
||||||
[pathtag_buf, reduced_buf, tagmonoid_buf],
|
[config_buf, scene_buf, reduced_buf, tagmonoid_buf],
|
||||||
);
|
);
|
||||||
|
|
||||||
let path_coarse_wgs = (data.n_pathseg + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
|
let path_coarse_wgs =
|
||||||
// The clone here is kinda BS, think about reducing copies
|
(n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
|
||||||
// Of course, we'll probably end up concatenating into a single scene binding.
|
|
||||||
let pathdata_buf = recording.upload(data.pathseg_stream.clone());
|
|
||||||
//let cubics_buf = BufProxy::new(data.n_pathseg as u64 * 32);
|
|
||||||
let config = Config {
|
|
||||||
width_in_tiles: 64,
|
|
||||||
height_in_tiles: 64,
|
|
||||||
};
|
|
||||||
let config_buf = recording.upload(bytemuck::bytes_of(&config).to_owned());
|
|
||||||
// TODO: more principled size calc
|
// TODO: more principled size calc
|
||||||
let tiles_buf = BufProxy::new(4097 * 8);
|
let tiles_buf = BufProxy::new(4097 * 8);
|
||||||
let segments_buf = BufProxy::new(256 * 24);
|
let segments_buf = BufProxy::new(256 * 24);
|
||||||
|
@ -70,10 +101,9 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
|
||||||
shaders.path_coarse,
|
shaders.path_coarse,
|
||||||
(path_coarse_wgs, 1, 1),
|
(path_coarse_wgs, 1, 1),
|
||||||
[
|
[
|
||||||
pathtag_buf,
|
|
||||||
tagmonoid_buf,
|
|
||||||
pathdata_buf,
|
|
||||||
config_buf,
|
config_buf,
|
||||||
|
scene_buf,
|
||||||
|
tagmonoid_buf,
|
||||||
tiles_buf,
|
tiles_buf,
|
||||||
segments_buf,
|
segments_buf,
|
||||||
],
|
],
|
||||||
|
@ -95,6 +125,280 @@ pub fn render(scene: &Scene, shaders: &Shaders) -> (Recording, BufProxy) {
|
||||||
(recording, out_buf)
|
(recording, out_buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn render_full(scene: &Scene, shaders: &FullShaders) -> (Recording, BufProxy) {
|
||||||
|
let mut recording = Recording::default();
|
||||||
|
let mut ramps = crate::ramp::RampCache::default();
|
||||||
|
let mut drawdata_patches: Vec<(usize, u32)> = vec![];
|
||||||
|
let data = scene.data();
|
||||||
|
let stop_data = &data.resources.stops;
|
||||||
|
for patch in &data.resources.patches {
|
||||||
|
use piet_scene::ResourcePatch;
|
||||||
|
match patch {
|
||||||
|
ResourcePatch::Ramp { offset, stops } => {
|
||||||
|
let ramp_id = ramps.add(&stop_data[stops.clone()]);
|
||||||
|
drawdata_patches.push((*offset, ramp_id));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let gradient_image = if drawdata_patches.is_empty() {
|
||||||
|
ResourceProxy::new_image(1, 1)
|
||||||
|
} else {
|
||||||
|
let data = ramps.data();
|
||||||
|
let width = ramps.width();
|
||||||
|
let height = ramps.height();
|
||||||
|
let data: &[u8] = bytemuck::cast_slice(data);
|
||||||
|
println!(
|
||||||
|
"gradient image: {}x{} ({} bytes)",
|
||||||
|
width,
|
||||||
|
height,
|
||||||
|
data.len()
|
||||||
|
);
|
||||||
|
ResourceProxy::Image(recording.upload_image(width, height, data))
|
||||||
|
};
|
||||||
|
let n_pathtag = data.tag_stream.len();
|
||||||
|
let pathtag_padded = align_up(n_pathtag, 4 * shaders::PATHTAG_REDUCE_WG);
|
||||||
|
// TODO: can compute size accurately, avoid reallocation
|
||||||
|
let mut scene: Vec<u8> = Vec::with_capacity(pathtag_padded);
|
||||||
|
let pathtag_base = size_to_words(scene.len());
|
||||||
|
scene.extend(&data.tag_stream);
|
||||||
|
scene.resize(pathtag_padded, 0);
|
||||||
|
let pathdata_base = size_to_words(scene.len());
|
||||||
|
scene.extend(&data.pathseg_stream);
|
||||||
|
let drawtag_base = size_to_words(scene.len());
|
||||||
|
scene.extend(bytemuck::cast_slice(&data.drawtag_stream));
|
||||||
|
let drawdata_base = size_to_words(scene.len());
|
||||||
|
if !drawdata_patches.is_empty() {
|
||||||
|
let mut pos = 0;
|
||||||
|
for patch in drawdata_patches {
|
||||||
|
let offset = patch.0;
|
||||||
|
let value = patch.1;
|
||||||
|
if pos < offset {
|
||||||
|
scene.extend_from_slice(&data.drawdata_stream[pos..offset]);
|
||||||
|
}
|
||||||
|
scene.extend_from_slice(bytemuck::bytes_of(&value));
|
||||||
|
pos = offset + 4;
|
||||||
|
}
|
||||||
|
if pos < data.drawdata_stream.len() {
|
||||||
|
scene.extend_from_slice(&data.drawdata_stream[pos..])
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
scene.extend(&data.drawdata_stream);
|
||||||
|
}
|
||||||
|
let transform_base = size_to_words(scene.len());
|
||||||
|
scene.extend(bytemuck::cast_slice(&data.transform_stream));
|
||||||
|
let linewidth_base = size_to_words(scene.len());
|
||||||
|
scene.extend(bytemuck::cast_slice(&data.linewidth_stream));
|
||||||
|
let n_path = data.n_path;
|
||||||
|
// TODO: calculate for real when we do rectangles
|
||||||
|
let n_drawobj = n_path;
|
||||||
|
let n_clip = data.n_clip;
|
||||||
|
let config = Config {
|
||||||
|
width_in_tiles: 64,
|
||||||
|
height_in_tiles: 64,
|
||||||
|
n_drawobj,
|
||||||
|
n_path,
|
||||||
|
n_clip,
|
||||||
|
pathtag_base,
|
||||||
|
pathdata_base,
|
||||||
|
drawtag_base,
|
||||||
|
drawdata_base,
|
||||||
|
transform_base,
|
||||||
|
linewidth_base,
|
||||||
|
};
|
||||||
|
println!("{:?}", config);
|
||||||
|
let scene_buf = ResourceProxy::Buf(recording.upload(scene));
|
||||||
|
let config_buf = ResourceProxy::Buf(recording.upload(bytemuck::bytes_of(&config).to_owned()));
|
||||||
|
|
||||||
|
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
|
||||||
|
let reduced_buf = ResourceProxy::new_buf(pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE);
|
||||||
|
// TODO: really only need pathtag_wgs - 1
|
||||||
|
recording.dispatch(
|
||||||
|
shaders.pathtag_reduce,
|
||||||
|
(pathtag_wgs as u32, 1, 1),
|
||||||
|
[config_buf, scene_buf, reduced_buf],
|
||||||
|
);
|
||||||
|
|
||||||
|
let tagmonoid_buf = ResourceProxy::new_buf(
|
||||||
|
pathtag_wgs as u64 * shaders::PATHTAG_REDUCE_WG as u64 * TAG_MONOID_FULL_SIZE,
|
||||||
|
);
|
||||||
|
recording.dispatch(
|
||||||
|
shaders.pathtag_scan,
|
||||||
|
(pathtag_wgs as u32, 1, 1),
|
||||||
|
[config_buf, scene_buf, reduced_buf, tagmonoid_buf],
|
||||||
|
);
|
||||||
|
let drawobj_wgs = (n_drawobj + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG;
|
||||||
|
let path_bbox_buf = ResourceProxy::new_buf(n_path as u64 * PATH_BBOX_SIZE);
|
||||||
|
recording.dispatch(
|
||||||
|
shaders.bbox_clear,
|
||||||
|
(drawobj_wgs, 1, 1),
|
||||||
|
[config_buf, path_bbox_buf],
|
||||||
|
);
|
||||||
|
let cubic_buf = ResourceProxy::new_buf(n_pathtag as u64 * CUBIC_SIZE);
|
||||||
|
let path_coarse_wgs =
|
||||||
|
(n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
|
||||||
|
recording.dispatch(
|
||||||
|
shaders.pathseg,
|
||||||
|
(path_coarse_wgs, 1, 1),
|
||||||
|
[
|
||||||
|
config_buf,
|
||||||
|
scene_buf,
|
||||||
|
tagmonoid_buf,
|
||||||
|
path_bbox_buf,
|
||||||
|
cubic_buf,
|
||||||
|
],
|
||||||
|
);
|
||||||
|
let draw_reduced_buf = ResourceProxy::new_buf(drawobj_wgs as u64 * DRAWMONOID_SIZE);
|
||||||
|
recording.dispatch(
|
||||||
|
shaders.draw_reduce,
|
||||||
|
(drawobj_wgs, 1, 1),
|
||||||
|
[config_buf, scene_buf, draw_reduced_buf],
|
||||||
|
);
|
||||||
|
let draw_monoid_buf = ResourceProxy::new_buf(n_drawobj as u64 * DRAWMONOID_SIZE);
|
||||||
|
let info_buf = ResourceProxy::new_buf(n_drawobj as u64 * MAX_DRAWINFO_SIZE);
|
||||||
|
let clip_inp_buf = ResourceProxy::new_buf(data.n_clip as u64 * CLIP_INP_SIZE);
|
||||||
|
recording.dispatch(
|
||||||
|
shaders.draw_leaf,
|
||||||
|
(drawobj_wgs, 1, 1),
|
||||||
|
[
|
||||||
|
config_buf,
|
||||||
|
scene_buf,
|
||||||
|
draw_reduced_buf,
|
||||||
|
path_bbox_buf,
|
||||||
|
draw_monoid_buf,
|
||||||
|
info_buf,
|
||||||
|
clip_inp_buf,
|
||||||
|
],
|
||||||
|
);
|
||||||
|
let clip_el_buf = ResourceProxy::new_buf(data.n_clip as u64 * CLIP_EL_SIZE);
|
||||||
|
let clip_bic_buf =
|
||||||
|
ResourceProxy::new_buf((n_clip / shaders::CLIP_REDUCE_WG) as u64 * CLIP_BIC_SIZE);
|
||||||
|
let clip_wg_reduce = n_clip.saturating_sub(1) / shaders::CLIP_REDUCE_WG;
|
||||||
|
if clip_wg_reduce > 0 {
|
||||||
|
recording.dispatch(
|
||||||
|
shaders.clip_reduce,
|
||||||
|
(clip_wg_reduce, 1, 1),
|
||||||
|
[
|
||||||
|
config_buf,
|
||||||
|
clip_inp_buf,
|
||||||
|
path_bbox_buf,
|
||||||
|
clip_bic_buf,
|
||||||
|
clip_el_buf,
|
||||||
|
],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let clip_wg = (n_clip + shaders::CLIP_REDUCE_WG - 1) / shaders::CLIP_REDUCE_WG;
|
||||||
|
let clip_bbox_buf = ResourceProxy::new_buf(n_clip as u64 * CLIP_BBOX_SIZE);
|
||||||
|
if clip_wg > 0 {
|
||||||
|
recording.dispatch(
|
||||||
|
shaders.clip_leaf,
|
||||||
|
(clip_wg, 1, 1),
|
||||||
|
[
|
||||||
|
config_buf,
|
||||||
|
clip_inp_buf,
|
||||||
|
path_bbox_buf,
|
||||||
|
clip_bic_buf,
|
||||||
|
clip_el_buf,
|
||||||
|
draw_monoid_buf,
|
||||||
|
clip_bbox_buf,
|
||||||
|
],
|
||||||
|
);
|
||||||
|
}
|
||||||
|
let draw_bbox_buf = ResourceProxy::new_buf(n_path as u64 * DRAW_BBOX_SIZE);
|
||||||
|
let bump_buf = BufProxy::new(BUMP_SIZE);
|
||||||
|
let bin_data_buf = ResourceProxy::new_buf(1 << 20);
|
||||||
|
let width_in_bins = (config.width_in_tiles + 15) / 16;
|
||||||
|
let height_in_bins = (config.height_in_tiles + 15) / 16;
|
||||||
|
let n_bins = width_in_bins * height_in_bins;
|
||||||
|
let bin_header_buf = ResourceProxy::new_buf((n_bins * drawobj_wgs) as u64 * BIN_HEADER_SIZE);
|
||||||
|
recording.clear_all(bump_buf);
|
||||||
|
let bump_buf = ResourceProxy::Buf(bump_buf);
|
||||||
|
recording.dispatch(
|
||||||
|
shaders.binning,
|
||||||
|
(drawobj_wgs, 1, 1),
|
||||||
|
[
|
||||||
|
config_buf,
|
||||||
|
draw_monoid_buf,
|
||||||
|
path_bbox_buf,
|
||||||
|
clip_bbox_buf,
|
||||||
|
draw_bbox_buf,
|
||||||
|
bump_buf,
|
||||||
|
bin_data_buf,
|
||||||
|
bin_header_buf,
|
||||||
|
],
|
||||||
|
);
|
||||||
|
let path_buf = ResourceProxy::new_buf(n_path as u64 * PATH_SIZE);
|
||||||
|
let tile_buf = ResourceProxy::new_buf(1 << 20);
|
||||||
|
let path_wgs = (n_path + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG;
|
||||||
|
recording.dispatch(
|
||||||
|
shaders.tile_alloc,
|
||||||
|
(path_wgs, 1, 1),
|
||||||
|
[
|
||||||
|
config_buf,
|
||||||
|
scene_buf,
|
||||||
|
draw_bbox_buf,
|
||||||
|
bump_buf,
|
||||||
|
path_buf,
|
||||||
|
tile_buf,
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
let segments_buf = ResourceProxy::new_buf(1 << 24);
|
||||||
|
recording.dispatch(
|
||||||
|
shaders.path_coarse,
|
||||||
|
(path_coarse_wgs, 1, 1),
|
||||||
|
[
|
||||||
|
config_buf,
|
||||||
|
scene_buf,
|
||||||
|
tagmonoid_buf,
|
||||||
|
cubic_buf,
|
||||||
|
path_buf,
|
||||||
|
bump_buf,
|
||||||
|
tile_buf,
|
||||||
|
segments_buf,
|
||||||
|
],
|
||||||
|
);
|
||||||
|
recording.dispatch(
|
||||||
|
shaders.backdrop,
|
||||||
|
(path_wgs, 1, 1),
|
||||||
|
[config_buf, path_buf, tile_buf],
|
||||||
|
);
|
||||||
|
let ptcl_buf = ResourceProxy::new_buf(1 << 24);
|
||||||
|
recording.dispatch(
|
||||||
|
shaders.coarse,
|
||||||
|
(width_in_bins, height_in_bins, 1),
|
||||||
|
[
|
||||||
|
config_buf,
|
||||||
|
scene_buf,
|
||||||
|
draw_monoid_buf,
|
||||||
|
bin_header_buf,
|
||||||
|
bin_data_buf,
|
||||||
|
path_buf,
|
||||||
|
tile_buf,
|
||||||
|
info_buf,
|
||||||
|
bump_buf,
|
||||||
|
ptcl_buf,
|
||||||
|
],
|
||||||
|
);
|
||||||
|
let out_buf_size = config.width_in_tiles * config.height_in_tiles * 1024;
|
||||||
|
let out_buf = BufProxy::new(out_buf_size as u64);
|
||||||
|
recording.dispatch(
|
||||||
|
shaders.fine,
|
||||||
|
(config.width_in_tiles, config.height_in_tiles, 1),
|
||||||
|
[
|
||||||
|
config_buf,
|
||||||
|
tile_buf,
|
||||||
|
segments_buf,
|
||||||
|
ResourceProxy::Buf(out_buf),
|
||||||
|
ptcl_buf,
|
||||||
|
gradient_image,
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
let download_buf = out_buf;
|
||||||
|
recording.download(download_buf);
|
||||||
|
(recording, download_buf)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn align_up(len: usize, alignment: u32) -> usize {
|
pub fn align_up(len: usize, alignment: u32) -> usize {
|
||||||
len + (len.wrapping_neg() & alignment as usize - 1)
|
len + (len.wrapping_neg() & alignment as usize - 1)
|
||||||
}
|
}
|
||||||
|
|
|
@ -25,7 +25,10 @@ use wgpu::Device;
|
||||||
use crate::engine::{BindType, Engine, Error, ShaderId};
|
use crate::engine::{BindType, Engine, Error, ShaderId};
|
||||||
|
|
||||||
pub const PATHTAG_REDUCE_WG: u32 = 256;
|
pub const PATHTAG_REDUCE_WG: u32 = 256;
|
||||||
|
pub const PATH_BBOX_WG: u32 = 256;
|
||||||
pub const PATH_COARSE_WG: u32 = 256;
|
pub const PATH_COARSE_WG: u32 = 256;
|
||||||
|
pub const PATH_DRAWOBJ_WG: u32 = 256;
|
||||||
|
pub const CLIP_REDUCE_WG: u32 = 256;
|
||||||
|
|
||||||
pub struct Shaders {
|
pub struct Shaders {
|
||||||
pub pathtag_reduce: ShaderId,
|
pub pathtag_reduce: ShaderId,
|
||||||
|
@ -35,6 +38,24 @@ pub struct Shaders {
|
||||||
pub fine: ShaderId,
|
pub fine: ShaderId,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Shaders for the full pipeline
|
||||||
|
pub struct FullShaders {
|
||||||
|
pub pathtag_reduce: ShaderId,
|
||||||
|
pub pathtag_scan: ShaderId,
|
||||||
|
pub bbox_clear: ShaderId,
|
||||||
|
pub pathseg: ShaderId,
|
||||||
|
pub draw_reduce: ShaderId,
|
||||||
|
pub draw_leaf: ShaderId,
|
||||||
|
pub clip_reduce: ShaderId,
|
||||||
|
pub clip_leaf: ShaderId,
|
||||||
|
pub binning: ShaderId,
|
||||||
|
pub tile_alloc: ShaderId,
|
||||||
|
pub path_coarse: ShaderId,
|
||||||
|
pub backdrop: ShaderId,
|
||||||
|
pub coarse: ShaderId,
|
||||||
|
pub fine: ShaderId,
|
||||||
|
}
|
||||||
|
|
||||||
pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Error> {
|
pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Error> {
|
||||||
let shader_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/shader"));
|
let shader_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/shader"));
|
||||||
let imports = preprocess::get_imports(shader_dir);
|
let imports = preprocess::get_imports(shader_dir);
|
||||||
|
@ -44,12 +65,17 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Err
|
||||||
let pathtag_reduce = engine.add_shader(
|
let pathtag_reduce = engine.add_shader(
|
||||||
device,
|
device,
|
||||||
preprocess::preprocess(&read_shader("pathtag_reduce"), &empty, &imports).into(),
|
preprocess::preprocess(&read_shader("pathtag_reduce"), &empty, &imports).into(),
|
||||||
&[BindType::BufReadOnly, BindType::Buffer],
|
&[
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::Buffer,
|
||||||
|
],
|
||||||
)?;
|
)?;
|
||||||
let pathtag_scan = engine.add_shader(
|
let pathtag_scan = engine.add_shader(
|
||||||
device,
|
device,
|
||||||
preprocess::preprocess(&read_shader("pathtag_scan"), &empty, &imports).into(),
|
preprocess::preprocess(&read_shader("pathtag_scan"), &empty, &imports).into(),
|
||||||
&[
|
&[
|
||||||
|
BindType::BufReadOnly,
|
||||||
BindType::BufReadOnly,
|
BindType::BufReadOnly,
|
||||||
BindType::BufReadOnly,
|
BindType::BufReadOnly,
|
||||||
BindType::Buffer,
|
BindType::Buffer,
|
||||||
|
@ -65,7 +91,6 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Err
|
||||||
BindType::BufReadOnly,
|
BindType::BufReadOnly,
|
||||||
BindType::BufReadOnly,
|
BindType::BufReadOnly,
|
||||||
BindType::BufReadOnly,
|
BindType::BufReadOnly,
|
||||||
BindType::BufReadOnly,
|
|
||||||
BindType::Buffer,
|
BindType::Buffer,
|
||||||
BindType::Buffer,
|
BindType::Buffer,
|
||||||
],
|
],
|
||||||
|
@ -93,3 +118,188 @@ pub fn init_shaders(device: &Device, engine: &mut Engine) -> Result<Shaders, Err
|
||||||
fine,
|
fine,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders, Error> {
|
||||||
|
let shader_dir = Path::new(concat!(env!("CARGO_MANIFEST_DIR"), "/shader"));
|
||||||
|
let imports = preprocess::get_imports(shader_dir);
|
||||||
|
let read_shader =
|
||||||
|
|path: &str| fs::read_to_string(shader_dir.join(path.to_string() + ".wgsl")).unwrap();
|
||||||
|
let empty = HashSet::new();
|
||||||
|
let mut full_config = HashSet::new();
|
||||||
|
full_config.insert("full".into());
|
||||||
|
let pathtag_reduce = engine.add_shader(
|
||||||
|
device,
|
||||||
|
preprocess::preprocess(&read_shader("pathtag_reduce"), &full_config, &imports).into(),
|
||||||
|
&[
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::Buffer,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
let pathtag_scan = engine.add_shader(
|
||||||
|
device,
|
||||||
|
preprocess::preprocess(&read_shader("pathtag_scan"), &full_config, &imports).into(),
|
||||||
|
&[
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::Buffer,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
let bbox_clear = engine.add_shader(
|
||||||
|
device,
|
||||||
|
preprocess::preprocess(&read_shader("bbox_clear"), &empty, &imports).into(),
|
||||||
|
&[BindType::BufReadOnly, BindType::Buffer],
|
||||||
|
)?;
|
||||||
|
let pathseg = engine.add_shader(
|
||||||
|
device,
|
||||||
|
preprocess::preprocess(&read_shader("pathseg"), &full_config, &imports).into(),
|
||||||
|
&[
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::Buffer,
|
||||||
|
BindType::Buffer,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
let draw_reduce = engine.add_shader(
|
||||||
|
device,
|
||||||
|
preprocess::preprocess(&read_shader("draw_reduce"), &empty, &imports).into(),
|
||||||
|
&[
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::Buffer,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
let draw_leaf = engine.add_shader(
|
||||||
|
device,
|
||||||
|
preprocess::preprocess(&read_shader("draw_leaf"), &empty, &imports).into(),
|
||||||
|
&[
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::Buffer,
|
||||||
|
BindType::Buffer,
|
||||||
|
BindType::Buffer,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
let clip_reduce = engine.add_shader(
|
||||||
|
device,
|
||||||
|
preprocess::preprocess(&read_shader("clip_reduce"), &empty, &imports).into(),
|
||||||
|
&[
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::Buffer,
|
||||||
|
BindType::Buffer,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
let clip_leaf = engine.add_shader(
|
||||||
|
device,
|
||||||
|
preprocess::preprocess(&read_shader("clip_leaf"), &empty, &imports).into(),
|
||||||
|
&[
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::Buffer,
|
||||||
|
BindType::Buffer,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
let binning = engine.add_shader(
|
||||||
|
device,
|
||||||
|
preprocess::preprocess(&read_shader("binning"), &empty, &imports).into(),
|
||||||
|
&[
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::Buffer,
|
||||||
|
BindType::Buffer,
|
||||||
|
BindType::Buffer,
|
||||||
|
BindType::Buffer,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
let tile_alloc = engine.add_shader(
|
||||||
|
device,
|
||||||
|
preprocess::preprocess(&read_shader("tile_alloc"), &empty, &imports).into(),
|
||||||
|
&[
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::Buffer,
|
||||||
|
BindType::Buffer,
|
||||||
|
BindType::Buffer,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let path_coarse = engine.add_shader(
|
||||||
|
device,
|
||||||
|
preprocess::preprocess(&read_shader("path_coarse_full"), &full_config, &imports).into(),
|
||||||
|
&[
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::Buffer,
|
||||||
|
BindType::Buffer,
|
||||||
|
BindType::Buffer,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
let backdrop = engine.add_shader(
|
||||||
|
device,
|
||||||
|
preprocess::preprocess(&read_shader("backdrop_dyn"), &empty, &imports).into(),
|
||||||
|
&[
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::Buffer,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
let coarse = engine.add_shader(
|
||||||
|
device,
|
||||||
|
preprocess::preprocess(&read_shader("coarse"), &empty, &imports).into(),
|
||||||
|
&[
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::Buffer,
|
||||||
|
BindType::Buffer,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
let fine = engine.add_shader(
|
||||||
|
device,
|
||||||
|
preprocess::preprocess(&read_shader("fine"), &full_config, &imports).into(),
|
||||||
|
&[
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::Buffer,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::ImageRead,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
Ok(FullShaders {
|
||||||
|
pathtag_reduce,
|
||||||
|
pathtag_scan,
|
||||||
|
bbox_clear,
|
||||||
|
pathseg,
|
||||||
|
draw_reduce,
|
||||||
|
draw_leaf,
|
||||||
|
clip_reduce,
|
||||||
|
clip_leaf,
|
||||||
|
binning,
|
||||||
|
tile_alloc,
|
||||||
|
path_coarse,
|
||||||
|
backdrop,
|
||||||
|
coarse,
|
||||||
|
fine,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
|
@ -14,24 +14,54 @@
|
||||||
//
|
//
|
||||||
// Also licensed under MIT license, at your choice.
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
use piet_scene::{Affine, Brush, Color, Fill, PathElement, Point, Scene, SceneBuilder};
|
use kurbo::BezPath;
|
||||||
|
use piet_scene::{
|
||||||
|
Affine, BlendMode, Brush, Color, Compose, ExtendMode, Fill, GradientStop, LinearGradient, Mix,
|
||||||
|
PathElement, Point, RadialGradient, Rect, Scene, SceneBuilder, SceneFragment, Stroke,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::pico_svg::PicoSvg;
|
||||||
|
|
||||||
pub fn gen_test_scene() -> Scene {
|
pub fn gen_test_scene() -> Scene {
|
||||||
let mut scene = Scene::default();
|
let mut scene = Scene::default();
|
||||||
let mut builder = SceneBuilder::for_scene(&mut scene);
|
let mut builder = SceneBuilder::for_scene(&mut scene);
|
||||||
let path = [
|
let scene_ix = 1;
|
||||||
PathElement::MoveTo(Point::new(100.0, 100.0)),
|
match scene_ix {
|
||||||
PathElement::LineTo(Point::new(500.0, 120.0)),
|
0 => {
|
||||||
PathElement::LineTo(Point::new(300.0, 150.0)),
|
let path = [
|
||||||
PathElement::LineTo(Point::new(200.0, 260.0)),
|
PathElement::MoveTo(Point::new(100.0, 100.0)),
|
||||||
PathElement::LineTo(Point::new(150.0, 210.0)),
|
PathElement::LineTo(Point::new(500.0, 120.0)),
|
||||||
PathElement::Close,
|
PathElement::LineTo(Point::new(300.0, 150.0)),
|
||||||
];
|
PathElement::LineTo(Point::new(200.0, 260.0)),
|
||||||
let brush = Brush::Solid(Color::rgb8(0x80, 0x80, 0x80));
|
PathElement::LineTo(Point::new(150.0, 210.0)),
|
||||||
builder.fill(Fill::NonZero, Affine::IDENTITY, &brush, None, &path);
|
PathElement::Close,
|
||||||
|
];
|
||||||
|
let brush = Brush::Solid(Color::rgb8(0x40, 0x40, 0xff));
|
||||||
|
builder.fill(Fill::NonZero, Affine::IDENTITY, &brush, None, &path);
|
||||||
|
let transform = Affine::translate(50.0, 50.0);
|
||||||
|
let brush = Brush::Solid(Color::rgba8(0xff, 0xff, 0x00, 0x80));
|
||||||
|
builder.fill(Fill::NonZero, transform, &brush, None, &path);
|
||||||
|
let transform = Affine::translate(100.0, 100.0);
|
||||||
|
let style = simple_stroke(1.0);
|
||||||
|
let brush = Brush::Solid(Color::rgb8(0xa0, 0x00, 0x00));
|
||||||
|
builder.stroke(&style, transform, &brush, None, &path);
|
||||||
|
}
|
||||||
|
1 => {
|
||||||
|
render_blend_grid(&mut builder);
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
let xml_str =
|
||||||
|
std::str::from_utf8(include_bytes!("../../piet-gpu/Ghostscript_Tiger.svg"))
|
||||||
|
.unwrap();
|
||||||
|
let svg = PicoSvg::load(xml_str, 6.0).unwrap();
|
||||||
|
render_svg(&mut builder, &svg, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
builder.finish();
|
||||||
scene
|
scene
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
pub fn dump_scene_info(scene: &Scene) {
|
pub fn dump_scene_info(scene: &Scene) {
|
||||||
let data = scene.data();
|
let data = scene.data();
|
||||||
println!("tags {:?}", data.tag_stream);
|
println!("tags {:?}", data.tag_stream);
|
||||||
|
@ -40,3 +70,212 @@ pub fn dump_scene_info(scene: &Scene) {
|
||||||
bytemuck::cast_slice::<u8, f32>(&data.pathseg_stream)
|
bytemuck::cast_slice::<u8, f32>(&data.pathseg_stream)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn render_svg(sb: &mut SceneBuilder, svg: &PicoSvg, print_stats: bool) {
|
||||||
|
use crate::pico_svg::*;
|
||||||
|
let start = std::time::Instant::now();
|
||||||
|
for item in svg.items.iter() {
|
||||||
|
match item {
|
||||||
|
Item::Fill(fill) => {
|
||||||
|
sb.fill(
|
||||||
|
Fill::NonZero,
|
||||||
|
Affine::IDENTITY,
|
||||||
|
&fill.color.into(),
|
||||||
|
None,
|
||||||
|
convert_bez_path(&fill.path),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
Item::Stroke(stroke) => {
|
||||||
|
sb.stroke(
|
||||||
|
&simple_stroke(stroke.width as f32),
|
||||||
|
Affine::IDENTITY,
|
||||||
|
&stroke.color.into(),
|
||||||
|
None,
|
||||||
|
convert_bez_path(&stroke.path),
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if print_stats {
|
||||||
|
println!("flattening and encoding time: {:?}", start.elapsed());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn convert_bez_path<'a>(path: &'a BezPath) -> impl Iterator<Item = PathElement> + 'a + Clone {
|
||||||
|
path.elements()
|
||||||
|
.iter()
|
||||||
|
.map(|el| PathElement::from_kurbo(*el))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn simple_stroke(width: f32) -> Stroke<[f32; 0]> {
|
||||||
|
Stroke {
|
||||||
|
width,
|
||||||
|
join: piet_scene::Join::Round,
|
||||||
|
miter_limit: 1.4,
|
||||||
|
start_cap: piet_scene::Cap::Round,
|
||||||
|
end_cap: piet_scene::Cap::Round,
|
||||||
|
dash_pattern: [],
|
||||||
|
dash_offset: 0.0,
|
||||||
|
scale: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
|
pub fn render_blend_grid(sb: &mut SceneBuilder) {
|
||||||
|
const BLEND_MODES: &[Mix] = &[
|
||||||
|
Mix::Normal,
|
||||||
|
Mix::Multiply,
|
||||||
|
Mix::Darken,
|
||||||
|
Mix::Screen,
|
||||||
|
Mix::Lighten,
|
||||||
|
Mix::Overlay,
|
||||||
|
Mix::ColorDodge,
|
||||||
|
Mix::ColorBurn,
|
||||||
|
Mix::HardLight,
|
||||||
|
Mix::SoftLight,
|
||||||
|
Mix::Difference,
|
||||||
|
Mix::Exclusion,
|
||||||
|
Mix::Hue,
|
||||||
|
Mix::Saturation,
|
||||||
|
Mix::Color,
|
||||||
|
Mix::Luminosity,
|
||||||
|
];
|
||||||
|
for (ix, &blend) in BLEND_MODES.iter().enumerate() {
|
||||||
|
let i = ix % 4;
|
||||||
|
let j = ix / 4;
|
||||||
|
let transform = Affine::translate(i as f32 * 225., j as f32 * 225.);
|
||||||
|
let square = blend_square(blend.into());
|
||||||
|
sb.append(&square, Some(transform));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
|
fn render_blend_square(sb: &mut SceneBuilder, blend: BlendMode, transform: Affine) {
|
||||||
|
// Inspired by https://developer.mozilla.org/en-US/docs/Web/CSS/mix-blend-mode
|
||||||
|
let rect = Rect::from_origin_size(Point::new(0., 0.), 200., 200.);
|
||||||
|
let stops = &[
|
||||||
|
GradientStop {
|
||||||
|
color: Color::rgb8(0, 0, 0),
|
||||||
|
offset: 0.0,
|
||||||
|
},
|
||||||
|
GradientStop {
|
||||||
|
color: Color::rgb8(255, 255, 255),
|
||||||
|
offset: 1.0,
|
||||||
|
},
|
||||||
|
][..];
|
||||||
|
let linear = Brush::LinearGradient(LinearGradient {
|
||||||
|
start: Point::new(0.0, 0.0),
|
||||||
|
end: Point::new(200.0, 0.0),
|
||||||
|
stops: stops.into(),
|
||||||
|
extend: ExtendMode::Pad,
|
||||||
|
});
|
||||||
|
sb.fill(Fill::NonZero, transform, &linear, None, rect.elements());
|
||||||
|
const GRADIENTS: &[(f32, f32, Color)] = &[
|
||||||
|
(150., 0., Color::rgb8(255, 240, 64)),
|
||||||
|
(175., 100., Color::rgb8(255, 96, 240)),
|
||||||
|
(125., 200., Color::rgb8(64, 192, 255)),
|
||||||
|
];
|
||||||
|
for (x, y, c) in GRADIENTS {
|
||||||
|
let mut color2 = c.clone();
|
||||||
|
color2.a = 0;
|
||||||
|
let stops = &[
|
||||||
|
GradientStop {
|
||||||
|
color: c.clone(),
|
||||||
|
offset: 0.0,
|
||||||
|
},
|
||||||
|
GradientStop {
|
||||||
|
color: color2,
|
||||||
|
offset: 1.0,
|
||||||
|
},
|
||||||
|
][..];
|
||||||
|
let rad = Brush::RadialGradient(RadialGradient {
|
||||||
|
center0: Point::new(*x, *y),
|
||||||
|
center1: Point::new(*x, *y),
|
||||||
|
radius0: 0.0,
|
||||||
|
radius1: 100.0,
|
||||||
|
stops: stops.into(),
|
||||||
|
extend: ExtendMode::Pad,
|
||||||
|
});
|
||||||
|
sb.fill(Fill::NonZero, transform, &rad, None, rect.elements());
|
||||||
|
}
|
||||||
|
const COLORS: &[Color] = &[
|
||||||
|
Color::rgb8(255, 0, 0),
|
||||||
|
Color::rgb8(0, 255, 0),
|
||||||
|
Color::rgb8(0, 0, 255),
|
||||||
|
];
|
||||||
|
sb.push_layer(Mix::Normal.into(), transform, rect.elements());
|
||||||
|
for (i, c) in COLORS.iter().enumerate() {
|
||||||
|
let stops = &[
|
||||||
|
GradientStop {
|
||||||
|
color: Color::rgb8(255, 255, 255),
|
||||||
|
offset: 0.0,
|
||||||
|
},
|
||||||
|
GradientStop {
|
||||||
|
color: c.clone(),
|
||||||
|
offset: 1.0,
|
||||||
|
},
|
||||||
|
][..];
|
||||||
|
let linear = Brush::LinearGradient(LinearGradient {
|
||||||
|
start: Point::new(0.0, 0.0),
|
||||||
|
end: Point::new(0.0, 200.0),
|
||||||
|
stops: stops.into(),
|
||||||
|
extend: ExtendMode::Pad,
|
||||||
|
});
|
||||||
|
sb.push_layer(blend, transform, rect.elements());
|
||||||
|
// squash the ellipse
|
||||||
|
let a = transform
|
||||||
|
* Affine::translate(100., 100.)
|
||||||
|
* Affine::rotate(std::f32::consts::FRAC_PI_3 * (i * 2 + 1) as f32)
|
||||||
|
* Affine::scale(1.0, 0.357)
|
||||||
|
* Affine::translate(-100., -100.);
|
||||||
|
sb.fill(
|
||||||
|
Fill::NonZero,
|
||||||
|
a,
|
||||||
|
&linear,
|
||||||
|
None,
|
||||||
|
make_ellipse(100., 100., 90., 90.),
|
||||||
|
);
|
||||||
|
sb.pop_layer();
|
||||||
|
}
|
||||||
|
sb.pop_layer();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
|
fn blend_square(blend: BlendMode) -> SceneFragment {
|
||||||
|
let mut fragment = SceneFragment::default();
|
||||||
|
let mut sb = SceneBuilder::for_fragment(&mut fragment);
|
||||||
|
render_blend_square(&mut sb, blend, Affine::IDENTITY);
|
||||||
|
sb.finish();
|
||||||
|
fragment
|
||||||
|
}
|
||||||
|
|
||||||
|
fn make_ellipse(cx: f32, cy: f32, rx: f32, ry: f32) -> impl Iterator<Item = PathElement> + Clone {
|
||||||
|
let a = 0.551915024494;
|
||||||
|
let arx = a * rx;
|
||||||
|
let ary = a * ry;
|
||||||
|
let elements = [
|
||||||
|
PathElement::MoveTo(Point::new(cx + rx, cy)),
|
||||||
|
PathElement::CurveTo(
|
||||||
|
Point::new(cx + rx, cy + ary),
|
||||||
|
Point::new(cx + arx, cy + ry),
|
||||||
|
Point::new(cx, cy + ry),
|
||||||
|
),
|
||||||
|
PathElement::CurveTo(
|
||||||
|
Point::new(cx - arx, cy + ry),
|
||||||
|
Point::new(cx - rx, cy + ary),
|
||||||
|
Point::new(cx - rx, cy),
|
||||||
|
),
|
||||||
|
PathElement::CurveTo(
|
||||||
|
Point::new(cx - rx, cy - ary),
|
||||||
|
Point::new(cx - arx, cy - ry),
|
||||||
|
Point::new(cx, cy - ry),
|
||||||
|
),
|
||||||
|
PathElement::CurveTo(
|
||||||
|
Point::new(cx + arx, cy - ry),
|
||||||
|
Point::new(cx + rx, cy - ary),
|
||||||
|
Point::new(cx + rx, cy),
|
||||||
|
),
|
||||||
|
PathElement::Close,
|
||||||
|
];
|
||||||
|
(0..elements.len()).map(move |i| elements[i])
|
||||||
|
}
|
||||||
|
|
Loading…
Reference in a new issue