diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index 3d771dd..76d7fc6 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -418,8 +418,8 @@ void main() { if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) { Cmd_End_write(cmd_alloc, cmd_ref); if (num_begin_slots > 0) { - // Write scratch allocation: one word per BeginClip per rasterizer chunk. - uint scratch_size = num_begin_slots * TILE_WIDTH_PX * TILE_HEIGHT_PX * 4; + // Write scratch allocation: one state per BeginClip per rasterizer chunk. + uint scratch_size = num_begin_slots * TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE * 4; MallocResult scratch = malloc(scratch_size); // Ignore scratch.failed; we don't use the allocation and kernel4 // checks for memory overflow before using it. diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index 5bc80ae..7d0b629 100644 Binary files a/piet-gpu/shader/coarse.spv and b/piet-gpu/shader/coarse.spv differ diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index 4fe9f76..b0c2a60 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -201,25 +201,28 @@ void main() { cmd_ref.offset += 4 + CmdImage_size; break; case Cmd_BeginClip: - uint base_ix = (scratch_alloc.offset >> 2) + clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX + - gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y; + uint base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX + + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y); for (uint k = 0; k < CHUNK; k++) { uvec2 offset = chunk_offset(k); - uint state = packsRGB(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0))); - write_mem(scratch_alloc, base_ix + offset.x + offset.y * TILE_WIDTH_PX, state); + uint srgb = packsRGB(vec4(rgb[k], 1.0)); + float alpha = clamp(abs(area[k]), 0.0, 1.0); + write_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), srgb); + write_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), floatBitsToUint(alpha)); } clip_depth++; cmd_ref.offset += 4; break; case Cmd_EndClip: clip_depth--; - base_ix = (scratch_alloc.offset >> 2) + clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX + - gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y; + base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX + + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y); for (uint k = 0; k < CHUNK; k++) { uvec2 offset = chunk_offset(k); - uint state = read_mem(scratch_alloc, base_ix + offset.x + offset.y * TILE_WIDTH_PX); - vec4 rgba = unpacksRGB(state); - rgb[k] = mix(rgba.rgb, rgb[k], area[k] * rgba.a); + uint srgb = read_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX)); + uint alpha = read_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX)); + vec4 rgba = unpacksRGB(srgb); + rgb[k] = mix(rgba.rgb, rgb[k], area[k] * uintBitsToFloat(alpha)); } cmd_ref.offset += 4; break; diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index 5bdc0b4..71b3193 100644 Binary files a/piet-gpu/shader/kernel4.spv and b/piet-gpu/shader/kernel4.spv differ diff --git a/piet-gpu/shader/setup.h b/piet-gpu/shader/setup.h index d26d842..9979367 100644 --- a/piet-gpu/shader/setup.h +++ b/piet-gpu/shader/setup.h @@ -44,7 +44,10 @@ struct Config { #define MODE_NONZERO 0 #define MODE_STROKE 1 +// Size of kernel4 clip state, in words. +#define CLIP_STATE_SIZE 2 + // fill_mode_from_flags extracts the fill mode from tag flags. uint fill_mode_from_flags(uint flags) { - return flags & 0x1; + return flags & 0x1; }