kernel4: separate area from alpha in clip stack

This change prepares for kernel4 to output alpha. No functional changes. Signed-off-by: Elias Naur <mail@eliasnaur.com>
2025-01-10 20:51:29 +11:00 · 2021-03-22 16:13:39 +01:00 · 2021-03-22 16:13:39 +01:00 · ee4429a26f
parent 22507dea0e
commit ee4429a26f
5 changed files with 18 additions and 12 deletions
--- a/piet-gpu/shader/coarse.comp
+++ b/piet-gpu/shader/coarse.comp
@ -418,8 +418,8 @@ void main() {
    if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) {
        Cmd_End_write(cmd_alloc, cmd_ref);
        if (num_begin_slots > 0) {
-            // Write scratch allocation: one word per BeginClip per rasterizer chunk.
+            // Write scratch allocation: one state per BeginClip per rasterizer chunk.
-            uint scratch_size = num_begin_slots * TILE_WIDTH_PX * TILE_HEIGHT_PX * 4;
+            uint scratch_size = num_begin_slots * TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE * 4;
            MallocResult scratch = malloc(scratch_size);
            // Ignore scratch.failed; we don't use the allocation and kernel4
            // checks for memory overflow before using it.
--- a/piet-gpu/shader/coarse.spv
+++ b/piet-gpu/shader/coarse.spv
--- a/piet-gpu/shader/kernel4.comp
+++ b/piet-gpu/shader/kernel4.comp
@ -201,25 +201,28 @@ void main() {
            cmd_ref.offset += 4 + CmdImage_size;
            break;
        case Cmd_BeginClip:
-            uint base_ix = (scratch_alloc.offset >> 2) + clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX +
+            uint base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX +
-                gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
+                gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
            for (uint k = 0; k < CHUNK; k++) {
                uvec2 offset = chunk_offset(k);
-                uint state = packsRGB(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0)));
+                uint srgb = packsRGB(vec4(rgb[k], 1.0));
-                write_mem(scratch_alloc, base_ix + offset.x + offset.y * TILE_WIDTH_PX, state);
+                float alpha = clamp(abs(area[k]), 0.0, 1.0);
                write_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), srgb);
                write_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), floatBitsToUint(alpha));
            }
            clip_depth++;
            cmd_ref.offset += 4;
            break;
        case Cmd_EndClip:
            clip_depth--;
-            base_ix = (scratch_alloc.offset >> 2) + clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX +
+            base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX +
-                gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
+                gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
            for (uint k = 0; k < CHUNK; k++) {
                uvec2 offset = chunk_offset(k);
-                uint state = read_mem(scratch_alloc, base_ix + offset.x + offset.y * TILE_WIDTH_PX);
+                uint srgb = read_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX));
-                vec4 rgba = unpacksRGB(state);
+                uint alpha = read_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX));
-                rgb[k] = mix(rgba.rgb, rgb[k], area[k] * rgba.a);
+                vec4 rgba = unpacksRGB(srgb);
                rgb[k] = mix(rgba.rgb, rgb[k], area[k] * uintBitsToFloat(alpha));
            }
            cmd_ref.offset += 4;
            break;
--- a/piet-gpu/shader/kernel4.spv
+++ b/piet-gpu/shader/kernel4.spv
--- a/piet-gpu/shader/setup.h
+++ b/piet-gpu/shader/setup.h
@ -44,6 +44,9 @@ struct Config {
 #define MODE_NONZERO 0
 #define MODE_STROKE 1
 // Size of kernel4 clip state, in words.
 #define CLIP_STATE_SIZE 2
 // fill_mode_from_flags extracts the fill mode from tag flags.
 uint fill_mode_from_flags(uint flags) {
    return flags & 0x1;