Write more shaders

This is super WIP, but represents partially written shaders for more of
the piet-gpu pipeline. Checkpointing as other work is incoming.
This commit is contained in:
Raph Levien 2022-10-26 13:55:45 -07:00 committed by Raph Levien
parent 1b84071d33
commit b6da6d958b
8 changed files with 472 additions and 5 deletions

View file

@ -1,6 +1,8 @@
{ {
"wgsl-analyzer.customImports": { "wgsl-analyzer.customImports": {
"bbox": "${workspaceFolder}/piet-wgsl/shader/shared/bbox.wgsl",
"config": "${workspaceFolder}/piet-wgsl/shader/shared/config.wgsl", "config": "${workspaceFolder}/piet-wgsl/shader/shared/config.wgsl",
"drawtag": "${workspaceFolder}/piet-wgsl/shader/shared/drawtag.wgsl",
"segment": "${workspaceFolder}/piet-wgsl/shader/shared/segment.wgsl", "segment": "${workspaceFolder}/piet-wgsl/shader/shared/segment.wgsl",
"pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl" "pathtag": "${workspaceFolder}/piet-wgsl/shader/shared/pathtag.wgsl"
}, },

View file

@ -0,0 +1,173 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
// The binning stage
#import config
#import drawtag
#import bbox
@group(0) @binding(0)
var<storage> config: Config;
@group(0) @binding(1)
var<storage> draw_monoids: array<DrawMonoid>;
@group(0) @binding(2)
var<storage> path_bbox_buf: array<PathBBox>;
@group(0) @binding(3)
var<storage> clip_bbox_buf: array<vec4<f32>>;
@group(0) @binding(4)
var<storage, read_write> intersected_bbox: array<vec4<f32>>;
// TODO: put into shared include
// TODO: robust memory (failure flags)
struct BumpAllocators {
binning: atomic<u32>,
}
@group(0) @binding(5)
var<storate, read_write> bump: BumpAllocators;
@group(0) @binding(6)
var<storage, read_write> bin_data: array<u32>;
struct BinHeader {
element_count: u32,
chunk_offset: u32,
}
@group(0) @binding(7)
var<storage, read_write> bin_header: array<BinHeader>;
// These should probably be in a common block.
let TILE_WIDTH = 16u;
let TILE_HEIGHT = 16u;
// Number of tiles per bin
let N_TILE_X = 16u;
let N_TILE_Y = 16u;
let N_TILE = N_TILE_X * N_TILE_Y;
// conversion factors from coordinates to bin
let SX = 1.0 / f32(N_TILE_X * TILE_WIDTH);
let SY = 1.0 / f32(N_TILE_Y * TILE_HEIGHT);
let WG_SIZE = 256u;
let N_SLICE = WG_SIZE / 32u;
var<workgroup> sh_bitmaps: array<array<atomic<u32>, N_TILE>, N_SLICE>;
var<workgroup> sh_count: array<array<u32, N_TILE>, N_SLICE>;
var<workgroup> sh_chunk_offset: array<u32, N_TILE>;
@compute @workgroup_size(256)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
@builtin(workgroup_id) wg_id: vec3<u32>,
) {
for (var i = 0u; i < N_SLICE; i += 1u) {
atomicStore(&sh_bitmaps[i][local_id.x], 0u);
}
// Read inputs and determine coverage of bins
let element_ix = global_id.x;
var x0 = 0;
var y0 = 0;
var x1 = 0;
var y1 = 0;
if element_ix < config.n_drawobj {
let draw_monoid = draw_monoids[element_ix];
var clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9);
if draw_monoid.clip_ix > 0u {
clip_bbox = clip_bbox_buf[draw_monoid.clip_ix - 1u];
}
// For clip elements, clip_box is the bbox of the clip path,
// intersected with enclosing clips.
// For other elements, it is the bbox of the enclosing clips.
// TODO check this is true
let path_bbox = path_bbox_buf[draw_monoid.path_ix];
let pb = vec4(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1);
let bbox = bbox_intersect(clip_bbox, pb);
bbox.zw = max(bbox.xy, bbox.zw);
intersected_bbox[element_ix] = bbox;
x0 = i32(floor(bbox.x * SX));
y0 = i32(floor(bbox.y * SY));
x1 = i32(ceil(bbox.z * SX));
y1 = i32(ceil(bbox.w * SY));
}
let width_in_bins = i32((config.width_in_tiles + N_TILE_X - 1u) / N_TILE_X);
let height_in_bins = i32((config.height_in_tiles + N_TILE_Y - 1u) / N_TILE_Y);
x0 = clamp(x0, 0, width_in_bins);
y0 = clamp(y0, 0, height_in_bins);
x1 = clamp(x1, 0, width_in_bins);
y1 = clamp(y1, 0, height_in_bins);
if x0 == x1 {
y1 = y0;
}
var x = x0;
var y = y0;
let my_slice = local_id.x / 32u;
let my_mask = 1u << (local_id.x & 31u);
while y < y1 {
atomicOr(&sh_bitmaps[my_slice][y * width_in_bins + x], my_mask);
x += 1u;
if x == x1 {
x = x0;
y += 1;
}
}
workgroupBarrier();
// Allocate output segments
var element_count = 0u;
for (var i = 0u; i < N_SLICE; i += 1u) {
elementCount += countOneBits(atomicLoad(&sh_bitmaps[i][local_id.x]));
sh_count[i][id_ix] = element_count;
}
// element_count is the number of draw objects covering this thread's bin
let chunk_offset = atomicAdd(&bump.binning, element_count);
sh_chunk_offset[local_id.x] = chunk_offset;
bin_header[global_id.x].element_count = element_count;
bin_header[global_id.x].chunk_offset = chunk_offset;
workgroupBarrier();
// loop over bbox of bins touched by this draw object
x = x0;
y = y0;
while y < y1 {
let bin_ix = y * width_in_bins + x;
let out_mask = atomicLoad(&sh_bitmaps[my_slice][bin_ix]);
// I think this predicate will always be true...
if (out_mask & my_mask) != 0 {
var idx = countOneBits(out_mask & (my_mask - 1u));
if my_slice > 0 {
idx += sh_count[my_slice - 1u][bin_ix];
}
let offset = sh_chunk_offset[bin_ix];
bin_data[offset + idx] = element_ix;
}
x += 1u;
if x == x1 {
x = x0;
y += 1u;
}
}
}

View file

@ -0,0 +1,145 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
// Finish prefix sum of drawtags, decode draw objects.
#import config
#import drawtag
#import bbox
@group(0) @binding(0)
var<storage> config: Config;
@group(0) @binding(1)
var<storage> scene: array<u32>;
@group(0) @binding(2)
var<storage> reduced: array<DrawMonoid>;
@group(0) @binding(3)
var<storage, read_write> draw_monoid: array<DrawMonoid>;
@group(0) @binding(4)
var<storage> path_bbox: array<PathBbox>;
@group(0) @binding(5)
var<storage, read_write> info: array<u32>;
let WG_SIZE = 256;
var<workgroup> sh_scratch: array<DrawMonoid, WG_SIZE>;
@compute @workgroup_size(256)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
@builtin(workgroup_id) wg_id: vec3<u32>,
) {
let ix = global_id.x;
let tag_word = scene[config.drawtag_base + ix];
let agg = map_draw_tag(tag_word);
sh_scratch[local_id.x] = agg;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
workgroupBarrier();
if local_id.x + (1u << i) < WG_SIZE {
let other = sh_scratch[local_id.x + (1u << i)];
agg = combine_draw_monoid(agg, other);
}
workgroupBarrier();
sh_scratch[local_id.x] = agg;
}
workgroupBarrier();
var m = draw_monoid_identity();
if wg_id.x > 0u {
m = parent[wg_id.x - 1u];
}
if local_id.x > 0u {
m = combine_draw_monoid(m, sh_scratch[local_id.x - 1u]);
}
// m now contains exclusive prefix sum of draw monoid
draw_monoid[ix] = m;
let dd = config.drawdata_base + m.scene_offset;
let di = m.info_offset;
if tag_word == DRAWTAG_FILL_COLOR || tag_word == DRAWTAG_FILL_LIN_GRADIENT ||
tag_word == DRAWTAG_FILL_RAD_GRADIENT || tag_word == DRAWTAG_FILL_IMAGE ||
tag_word == DRAWTAG_BEGIN_CLIP
{
let bbox = path_bbox[m.path_ix];
let x0 = f32(bbox.x0) - 32768.0;
let y0 = f32(bbox.y0) - 32768.0;
let x1 = f32(bbox.x1) - 32768.0;
let y1 = f32(bbox.y1) - 32768.0;
let bbox_f = vec4(x0, y0, x1, y1);
let fill_mode = u32(bbox.linewidth >= 0.0);
var mat: vec4<f32>;
var translate: vec2<f32>;
var linewidth = bbox.linewidth;
if linewidth >= 0.0 || tag_word == DRAWTAG_FILL_LIN_GRADIENT || tag_word == DRAWTAG_FILL_RAD_GRADIENT {
// TODO: retrieve transform from scene. Packed?
}
if linewidth >= 0.0 {
// Note: doesn't deal with anisotropic case
linewidth *= sqrt(abs(mat.x * mat.w - mat.y * mat.z));
}
switch tag_word {
case DRAWTAG_FILL_COLOR, DRAWTAG_FILL_IMAGE: {
info[di] = bitcast<u32>(linewidth);
}
case DRAWTAG_FILL_LIN_GRADIENT: {
info[di] = bitcast<u32>(linewidth);
var p0 = bitcast<vec2<f32>>(vec2(scene[dd + 1u], scene[dd + 2u]));
var p1 = bitcast<vec2<f32>>(vec2(scene[dd + 3u], scene[dd + 4u]));
p0 = mat.xy * p0.x + mat.zw * p0.y + translate;
p1 = mat.xy * p1.x + mat.zw * p1.y + translate;
let dxy = p1 - p0;
let scale = 1.0 / dot(dxy, dxy);
let line_xy = dxy * scale;
let line_c = -dot(p0, line_xy);
info[di + 1u] = bitcast<u32>(line_xy.x);
info[di + 2u] = bitcast<u32>(line_xy.y);
info[di + 3u] = bitcast<u32>(line_c);
}
case DRAWTAG_FILL_RAD_GRADIENT: {
info[di] = bitcast<u32>(linewidth);
var p0 = bitcast<vec2<f32>>(vec2(scene[dd + 1u], scene[dd + 2u]));
var p1 = bitcast<vec2<f32>>(vec2(scene[dd + 3u], scene[dd + 4u]));
let r0 = bitcast<f32>(scene[dd + 5u]);
let r1 = bitcast<f32>(scene[dd + 6u]);
let inv_det = 1.0 / (mat.x * mat.w - mat.y * mat.z);
let inv_mat = inv_det * vec4(mat.w, -mat.y, -mat.z, mat.x);
var inv_tr = inv_mat.xz * translate.x + inv_mat.yw * translate.y;
inv_tr += p0;
let center1 = p1 - p0;
let rr = r1 / (r1 - r0);
let ra_inv = rr / (r1 * r1 - dot(center1, center1));
let c1 = center1 * ra_inv;
let ra = rr * ra_inv;
let roff = rr - 1.0;
info[di + 1u] = bitcast<u32>(inv_mat.x);
info[di + 2u] = bitcast<u32>(inv_mat.y);
info[di + 3u] = bitcast<u32>(inv_mat.z);
info[di + 4u] = bitcast<u32>(inv_mat.w);
info[di + 5u] = bitcast<u32>(inv_tr.x);
info[di + 6u] = bitcast<u32>(inv_tr.y);
info[di + 7u] = bitcast<u32>(c1.x);
info[di + 8u] = bitcast<u32>(c1.y);
info[di + 9u] = bitcast<u32>(ra);
info[di + 10u] = bitcast<u32>(roff);
}
default: {}
}
}
}

View file

@ -0,0 +1,54 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
#import config
#import drawtag
@group(0) @binding(0)
var<storage> config: Config;
@group(0) @binding(1)
var<storage> scene: array<u32>;
@group(0) @binding(2)
var<storage, read_write> reduced: array<DrawMonoid>;
let WG_SIZE = 256;
var<workgroup> sh_scratch: array<DrawMonoid, WG_SIZE>;
@compute @workgroup_size(256)
fn main(
@builtin(global_invocation_id) global_id: vec3<u32>,
@builtin(local_invocation_id) local_id: vec3<u32>,
) {
let ix = global_id.x;
let tag_word = scene[config.drawtag_base + ix];
let agg = map_draw_tag(tag_word);
sh_scratch[local_id.x] = agg;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
workgroupBarrier();
if local_id.x + (1u << i) < WG_SIZE {
let other = sh_scratch[local_id.x + (1u << i)];
agg = combine_draw_monoid(agg, other);
}
workgroupBarrier();
sh_scratch[local_id.x] = agg;
}
if local_id.x == 0u {
reduced[ix >> LG_WG_SIZE] = agg;
}
}

View file

@ -0,0 +1,30 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
// The annotated bounding box for a path. It has been transformed,
// but contains a link to the active transform, mostly for gradients.
struct PathBbox {
x0: u32,
y0: u32,
x1: u32,
y1: u32,
linewidth: f32,
trans_ix: u32,
}
fn bbox_intersect(a: vec4<f32>, b: vec4<f32>) -> f32 {
return vec4(max(a.xy, b.xy), min(a.zyw, b.zw));
}

View file

@ -17,4 +17,11 @@
struct Config { struct Config {
width_in_tiles: u32, width_in_tiles: u32,
height_in_tiles: u32, height_in_tiles: u32,
n_drawobj: u32,
// offsets within config file (in u32 units)
// Note: this is a difference from piet-gpu, which is in bytes
drawtag_base: u32,
drawdata_base: u32,
} }

View file

@ -0,0 +1,60 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Also licensed under MIT license, at your choice.
// The DrawMonoid is computed as a prefix sum to aid in decoding
// the variable-length encoding of draw objects.
struct DrawMonoid {
// The number of paths preceding this draw object.
path_ix: u32,
// The number of clip operations preceding this draw object.
clip_ix: u32,
// The offset of the encoded draw object in the scene (u32s).
scene_offset: u32,
// The offset of the associated info.
info_offset: u32,
}
// Each draw object has a 32-bit draw tag, which is a bit-packed
// version of the draw monoid.
let DRAWTAG_NOP = 0u;
let DRAWTAG_FILL_COLOR = 0x44u;
let DRAWTAG_FILL_LIN_GRADIENT = 0x114;
let DRAWTAG_FILL_RAD_GRADIENT = 0x2dc;
let DRAWTAG_FILL_IMAGE = 0x48;
let DRAWTAG_BEGIN_CLIP = 0x05;
let DRAWTAG_END_CLIP = 0x25;
fn draw_monoid_identity() -> DrawMonoid {
return DrawMonoid();
}
fn combine_draw_monoid(a: DrawMonoid, b: DrawMonoid) {
var c: DrawMonoid;
c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset;
}
fn map_draw_tag(tag_word: u32) -> DawMonoid {
let has_path = ;
var c: DrawMonoid;
c.path_ix = u32(tag_word != DRAWTAG_NOP);
c.clip_ix = tag_word & 1u;
c.scene_offset = (tag_word >> 2u) & 0x07u;
c.info_offset = (tag_word >> 6u) & 0x0fu;
return c;
}

View file

@ -30,11 +30,7 @@ let PATH_TAG_PATH = 0x10u;
let PATH_TAG_TRANSFORM = 0x20u; let PATH_TAG_TRANSFORM = 0x20u;
fn tag_monoid_identity() -> TagMonoid { fn tag_monoid_identity() -> TagMonoid {
var c: TagMonoid; return TagMonoid();
c.trans_ix = 0u;
c.pathseg_ix = 0u;
c.pathseg_offset = 0u;
return c;
} }
fn combine_tag_monoid(a: TagMonoid, b: TagMonoid) -> TagMonoid { fn combine_tag_monoid(a: TagMonoid, b: TagMonoid) -> TagMonoid {