Another checkpoint

This commit is contained in:
Raph Levien 2022-10-30 08:08:22 -07:00
parent 06ec395b68
commit 40416fd2ea
6 changed files with 34 additions and 32 deletions

View file

@ -71,7 +71,8 @@ var<storage> tiles: array<Tile>;
// Much of this code assumes WG_SIZE == N_TILE. If these diverge, then // Much of this code assumes WG_SIZE == N_TILE. If these diverge, then
// a fair amount of fixup is needed. // a fair amount of fixup is needed.
let WG_SIZE = 256u; let WG_SIZE = 256u;
let N_SLICE = WG_SIZE / 32u; //let N_SLICE = WG_SIZE / 32u;
let N_SLICE = 8u;
var<workgroup> sh_bitmaps: array<array<atomic<u32>, N_TILE>, N_SLICE>; var<workgroup> sh_bitmaps: array<array<atomic<u32>, N_TILE>, N_SLICE>;
var<workgroup> sh_part_count: array<u32, WG_SIZE>; var<workgroup> sh_part_count: array<u32, WG_SIZE>;
@ -174,7 +175,7 @@ fn main(
sh_part_count[local_id.x] = count; sh_part_count[local_id.x] = count;
workgroupBarrier(); workgroupBarrier();
if local_id.x >= (1u << i) { if local_id.x >= (1u << i) {
count += sh_part_count[local_id - (1u << i)]; count += sh_part_count[local_id.x - (1u << i)];
} }
workgroupBarrier(); workgroupBarrier();
} }
@ -235,7 +236,7 @@ fn main(
// Prefix sum of tile counts // Prefix sum of tile counts
sh_tile_count[local_id.x] = tile_count; sh_tile_count[local_id.x] = tile_count;
for (var i = 0; i < firstTrailingBit(N_TILE); i += 1u) { for (var i = 0u; i < firstTrailingBit(N_TILE); i += 1u) {
workgroupBarrier(); workgroupBarrier();
if local_id.x >= (1u << i) { if local_id.x >= (1u << i) {
tile_count += sh_tile_count[local_id.x - (1u << i)]; tile_count += sh_tile_count[local_id.x - (1u << i)];
@ -298,11 +299,13 @@ fn main(
let drawtag = scene[config.drawtag_base + drawobj_ix]; let drawtag = scene[config.drawtag_base + drawobj_ix];
let dm = draw_monoids[drawobj_ix]; let dm = draw_monoids[drawobj_ix];
let dd = config.drawdata_base + dm.scene_offset; let dd = config.drawdata_base + dm.scene_offset;
// TODO: set up draw info from monoid
if clip_zero_depth == 0u { if clip_zero_depth == 0u {
let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * tile_y + tile_x; let tile_ix = sh_tile_base[el_ix] + sh_tile_stride[el_ix] * tile_y + tile_x;
let tile = tiles[tile_ix]; let tile = tiles[tile_ix];
switch drawtag { switch drawtag {
case DRAWTAG_FILL_COLOR: { // DRAWTAG_FILL_COLOR
case 0x44u: {
// TODO: get linewidth from draw object // TODO: get linewidth from draw object
let linewidth = -1.0; let linewidth = -1.0;
let rgba_color = scene[dd]; let rgba_color = scene[dd];

View file

@ -50,7 +50,7 @@ fn main(
) { ) {
let ix = global_id.x; let ix = global_id.x;
let tag_word = scene[config.drawtag_base + ix]; let tag_word = scene[config.drawtag_base + ix];
let agg = map_draw_tag(tag_word); var agg = map_draw_tag(tag_word);
sh_scratch[local_id.x] = agg; sh_scratch[local_id.x] = agg;
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) { for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
workgroupBarrier(); workgroupBarrier();
@ -64,7 +64,7 @@ fn main(
workgroupBarrier(); workgroupBarrier();
var m = draw_monoid_identity(); var m = draw_monoid_identity();
if wg_id.x > 0u { if wg_id.x > 0u {
m = parent[wg_id.x - 1u]; m = reduced[wg_id.x - 1u];
} }
if local_id.x > 0u { if local_id.x > 0u {
m = combine_draw_monoid(m, sh_scratch[local_id.x - 1u]); m = combine_draw_monoid(m, sh_scratch[local_id.x - 1u]);
@ -84,7 +84,7 @@ fn main(
let y1 = f32(bbox.y1) - 32768.0; let y1 = f32(bbox.y1) - 32768.0;
let bbox_f = vec4(x0, y0, x1, y1); let bbox_f = vec4(x0, y0, x1, y1);
let fill_mode = u32(bbox.linewidth >= 0.0); let fill_mode = u32(bbox.linewidth >= 0.0);
var mat: vec4<f32>; var matrx: vec4<f32>;
var translate: vec2<f32>; var translate: vec2<f32>;
var linewidth = bbox.linewidth; var linewidth = bbox.linewidth;
if linewidth >= 0.0 || tag_word == DRAWTAG_FILL_LIN_GRADIENT || tag_word == DRAWTAG_FILL_RAD_GRADIENT { if linewidth >= 0.0 || tag_word == DRAWTAG_FILL_LIN_GRADIENT || tag_word == DRAWTAG_FILL_RAD_GRADIENT {
@ -92,18 +92,20 @@ fn main(
} }
if linewidth >= 0.0 { if linewidth >= 0.0 {
// Note: doesn't deal with anisotropic case // Note: doesn't deal with anisotropic case
linewidth *= sqrt(abs(mat.x * mat.w - mat.y * mat.z)); linewidth *= sqrt(abs(matrx.x * matrx.w - matrx.y * matrx.z));
} }
switch tag_word { switch tag_word {
case DRAWTAG_FILL_COLOR, DRAWTAG_FILL_IMAGE: { // DRAWTAG_FILL_COLOR, DRAWTAG_FILL_IMAGE
case 0x44u, 0x48u: {
info[di] = bitcast<u32>(linewidth); info[di] = bitcast<u32>(linewidth);
} }
case DRAWTAG_FILL_LIN_GRADIENT: { // DRAWTAG_FILL_LIN_GRADIENT
case 0x114u: {
info[di] = bitcast<u32>(linewidth); info[di] = bitcast<u32>(linewidth);
var p0 = bitcast<vec2<f32>>(vec2(scene[dd + 1u], scene[dd + 2u])); var p0 = bitcast<vec2<f32>>(vec2(scene[dd + 1u], scene[dd + 2u]));
var p1 = bitcast<vec2<f32>>(vec2(scene[dd + 3u], scene[dd + 4u])); var p1 = bitcast<vec2<f32>>(vec2(scene[dd + 3u], scene[dd + 4u]));
p0 = mat.xy * p0.x + mat.zw * p0.y + translate; p0 = matrx.xy * p0.x + matrx.zw * p0.y + translate;
p1 = mat.xy * p1.x + mat.zw * p1.y + translate; p1 = matrx.xy * p1.x + matrx.zw * p1.y + translate;
let dxy = p1 - p0; let dxy = p1 - p0;
let scale = 1.0 / dot(dxy, dxy); let scale = 1.0 / dot(dxy, dxy);
let line_xy = dxy * scale; let line_xy = dxy * scale;
@ -112,14 +114,15 @@ fn main(
info[di + 2u] = bitcast<u32>(line_xy.y); info[di + 2u] = bitcast<u32>(line_xy.y);
info[di + 3u] = bitcast<u32>(line_c); info[di + 3u] = bitcast<u32>(line_c);
} }
case DRAWTAG_FILL_RAD_GRADIENT: { // DRAWTAG_FILL_RAD_GRADIENT
case 0x2dcu: {
info[di] = bitcast<u32>(linewidth); info[di] = bitcast<u32>(linewidth);
var p0 = bitcast<vec2<f32>>(vec2(scene[dd + 1u], scene[dd + 2u])); var p0 = bitcast<vec2<f32>>(vec2(scene[dd + 1u], scene[dd + 2u]));
var p1 = bitcast<vec2<f32>>(vec2(scene[dd + 3u], scene[dd + 4u])); var p1 = bitcast<vec2<f32>>(vec2(scene[dd + 3u], scene[dd + 4u]));
let r0 = bitcast<f32>(scene[dd + 5u]); let r0 = bitcast<f32>(scene[dd + 5u]);
let r1 = bitcast<f32>(scene[dd + 6u]); let r1 = bitcast<f32>(scene[dd + 6u]);
let inv_det = 1.0 / (mat.x * mat.w - mat.y * mat.z); let inv_det = 1.0 / (matrx.x * matrx.w - matrx.y * matrx.z);
let inv_mat = inv_det * vec4<f32>(mat.w, -mat.y, -mat.z, mat.x); let inv_mat = inv_det * vec4<f32>(matrx.w, -matrx.y, -matrx.z, matrx.x);
var inv_tr = inv_mat.xz * translate.x + inv_mat.yw * translate.y; var inv_tr = inv_mat.xz * translate.x + inv_mat.yw * translate.y;
inv_tr += p0; inv_tr += p0;
let center1 = p1 - p0; let center1 = p1 - p0;

View file

@ -64,9 +64,6 @@ fn read_i16_point(ix: u32) -> vec2<f32> {
} }
#ifndef cubics_out #ifndef cubics_out
let TILE_WIDTH = 16u;
let TILE_HEIGHT = 16u;
struct SubdivResult { struct SubdivResult {
val: f32, val: f32,
a0: f32, a0: f32,

View file

@ -28,20 +28,10 @@ struct Config {
// Geometry of tiles and bins // Geometry of tiles and bins
let TILE_WIDTH = 16u; let TILE_WIDTH = 16u;
let TILE_HEIGHT = 16u; let TILE_HEIGHT = 16u;
// Number of tiles per bin // Number of tiles per bin
let N_TILE_X = 16u; let N_TILE_X = 16u;
let N_TILE_Y = 16u; let N_TILE_Y = 16u;
let N_TILE = N_TILE_X * N_TILE_Y; //let N_TILE = N_TILE_X * N_TILE_Y;
let N_TILE = 256u;
// Should ptcl stuff move to a separate import?
// Layout of per-tile command list
// Initial allocation, in u32's.
let PTCL_INITIAL_ALLOC = 64u;
let PTCL_INCREMENT = 256u;
// Amount of space taken by jump
let PTCL_HEADROOM = 2u;

View file

@ -41,12 +41,13 @@ fn draw_monoid_identity() -> DrawMonoid {
return DrawMonoid(); return DrawMonoid();
} }
fn combine_draw_monoid(a: DrawMonoid, b: DrawMonoid) { fn combine_draw_monoid(a: DrawMonoid, b: DrawMonoid) -> DrawMonoid {
var c: DrawMonoid; var c: DrawMonoid;
c.path_ix = a.path_ix + b.path_ix; c.path_ix = a.path_ix + b.path_ix;
c.clip_ix = a.clip_ix + b.clip_ix; c.clip_ix = a.clip_ix + b.clip_ix;
c.scene_offset = a.scene_offset + b.scene_offset; c.scene_offset = a.scene_offset + b.scene_offset;
c.info_offset = a.info_offset + b.info_offset; c.info_offset = a.info_offset + b.info_offset;
return c;
} }
fn map_draw_tag(tag_word: u32) -> DrawMonoid { fn map_draw_tag(tag_word: u32) -> DrawMonoid {

View file

@ -14,6 +14,14 @@
// //
// Also licensed under MIT license, at your choice. // Also licensed under MIT license, at your choice.
// Layout of per-tile command list
// Initial allocation, in u32's.
let PTCL_INITIAL_ALLOC = 64u;
let PTCL_INCREMENT = 256u;
// Amount of space taken by jump
let PTCL_HEADROOM = 2u;
// Tags for PTCL commands // Tags for PTCL commands
let CMD_END = 0u; let CMD_END = 0u;
let CMD_FILL = 1u; let CMD_FILL = 1u;
@ -30,7 +38,7 @@ struct CmdFill {
} }
struct CmdJump { struct CmdJump {
target: u32, new_ix: u32,
} }
struct CmdColor { struct CmdColor {