diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index 860be2e..a4837bd 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -324,16 +324,18 @@ void main() { if (tile.tile.offset != 0) { CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop); Cmd_Fill_write(cmd_alloc, cmd_ref, cmd_fill); + cmd_ref.offset += 4 + CmdFill_size; } else { Cmd_Solid_write(cmd_alloc, cmd_ref); + cmd_ref.offset += 4; } } else { CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * fill.linewidth); Cmd_Stroke_write(cmd_alloc, cmd_ref, cmd_stroke); + cmd_ref.offset += 4 + CmdStroke_size; } - cmd_ref.offset += Cmd_size; Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(fill.rgba_color)); - cmd_ref.offset += Cmd_size; + cmd_ref.offset += 4 + CmdColor_size; break; case Annotated_Image: tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix] @@ -346,16 +348,18 @@ void main() { if (tile.tile.offset != 0) { CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop); Cmd_Fill_write(cmd_alloc, cmd_ref, cmd_fill); + cmd_ref.offset += 4 + CmdFill_size; } else { Cmd_Solid_write(cmd_alloc, cmd_ref); + cmd_ref.offset += 4; } } else { CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * fill_img.linewidth); Cmd_Stroke_write(cmd_alloc, cmd_ref, cmd_stroke); + cmd_ref.offset += 4 + CmdStroke_size; } - cmd_ref.offset += Cmd_size; Cmd_Image_write(cmd_alloc, cmd_ref, CmdImage(fill_img.index, fill_img.offset)); - cmd_ref.offset += Cmd_size; + cmd_ref.offset += 4 + CmdImage_size; break; case Annotated_BeginClip: tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix] @@ -373,18 +377,20 @@ void main() { if (tile.tile.offset != 0) { CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop); Cmd_Fill_write(cmd_alloc, cmd_ref, cmd_fill); + cmd_ref.offset += 4 + CmdFill_size; } else { // TODO: here is where a bunch of optimization magic should happen float alpha = tile.backdrop == 0 ? 0.0 : 1.0; Cmd_Alpha_write(cmd_alloc, cmd_ref, CmdAlpha(alpha)); + cmd_ref.offset += 4 + CmdAlpha_size; } } else { CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * begin_clip.linewidth); Cmd_Stroke_write(cmd_alloc, cmd_ref, cmd_stroke); + cmd_ref.offset += 4 + CmdStroke_size; } - cmd_ref.offset += Cmd_size; Cmd_BeginClip_write(cmd_alloc, cmd_ref); - cmd_ref.offset += Cmd_size; + cmd_ref.offset += 4; if (clip_depth < 32) { clip_one_mask &= ~(1 << clip_depth); } @@ -398,9 +404,9 @@ void main() { break; } Cmd_Solid_write(cmd_alloc, cmd_ref); - cmd_ref.offset += Cmd_size; + cmd_ref.offset += 4; Cmd_EndClip_write(cmd_alloc, cmd_ref); - cmd_ref.offset += Cmd_size; + cmd_ref.offset += 4; } break; } diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index 7e85c00..763da55 100644 Binary files a/piet-gpu/shader/coarse.spv and b/piet-gpu/shader/coarse.spv differ diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index 8e26e1b..b9f59dc 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -153,6 +153,7 @@ void main() { for (uint k = 0; k < CHUNK; k++) { area[k] = clamp(stroke.half_width + 0.5 - df[k], 0.0, 1.0); } + cmd_ref.offset += 4 + CmdStroke_size; break; case Cmd_Fill: CmdFill fill = Cmd_Fill_read(cmd_alloc, cmd_ref); @@ -184,17 +185,20 @@ void main() { for (uint k = 0; k < CHUNK; k++) { area[k] = min(abs(area[k]), 1.0); } + cmd_ref.offset += 4 + CmdFill_size; break; case Cmd_Solid: for (uint k = 0; k < CHUNK; k++) { area[k] = 1.0; } + cmd_ref.offset += 4; break; case Cmd_Alpha: CmdAlpha alpha = Cmd_Alpha_read(cmd_alloc, cmd_ref); for (uint k = 0; k < CHUNK; k++) { area[k] = alpha.alpha; } + cmd_ref.offset += 4 + CmdAlpha_size; break; case Cmd_Color: CmdColor color = Cmd_Color_read(cmd_alloc, cmd_ref); @@ -202,6 +206,7 @@ void main() { for (uint k = 0; k < CHUNK; k++) { rgb[k] = mix(rgb[k], fg_rgba.rgb, mask[k] * area[k] * fg_rgba.a); } + cmd_ref.offset += 4 + CmdColor_size; break; case Cmd_Image: CmdImage fill_img = Cmd_Image_read(cmd_alloc, cmd_ref); @@ -209,6 +214,7 @@ void main() { for (uint k = 0; k < CHUNK; k++) { rgb[k] = mix(rgb[k], rgba[k].rgb, mask[k] * area[k] * rgba[k].a); } + cmd_ref.offset += 4 + CmdImage_size; break; case Cmd_BeginClip: uint blend_slot = blend_sp % BLEND_STACK_SIZE; @@ -229,6 +235,7 @@ void main() { blend_stack[blend_slot][k] = packsRGB(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0))); } blend_sp++; + cmd_ref.offset += 4; break; case Cmd_EndClip: blend_slot = (blend_sp - 1) % BLEND_STACK_SIZE; @@ -245,13 +252,13 @@ void main() { vec4 rgba = unpacksRGB(blend_stack[blend_slot][k]); rgb[k] = mix(rgba.rgb, rgb[k], area[k] * rgba.a); } + cmd_ref.offset += 4; break; case Cmd_Jump: cmd_ref = CmdRef(Cmd_Jump_read(cmd_alloc, cmd_ref).new_ref); cmd_alloc.offset = cmd_ref.offset; continue; } - cmd_ref.offset += Cmd_size; } for (uint i = 0; i < CHUNK; i++) { diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index 91e54dd..5f776e4 100644 Binary files a/piet-gpu/shader/kernel4.spv and b/piet-gpu/shader/kernel4.spv differ