diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index b0c2a60..b7834cf 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -97,12 +97,12 @@ void main() { uvec2 xy_uint = uvec2(gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_WorkGroupID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y); vec2 xy = vec2(xy_uint); - vec3 rgb[CHUNK]; + vec4 rgba[CHUNK]; for (uint i = 0; i < CHUNK; i++) { - rgb[i] = vec3(0.5); + rgba[i] = vec4(0.0); #ifdef ENABLE_IMAGE_INDICES if (xy_uint.x < 1024 && xy_uint.y < 1024) { - rgb[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint + chunk_offset(i))/4).rgb; + rgba[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint + chunk_offset(i))/4); } #endif } @@ -187,7 +187,7 @@ void main() { vec4 fg = unpacksRGB(color.rgba_color); for (uint k = 0; k < CHUNK; k++) { vec4 fg_k = fg * area[k]; - rgb[k] = rgb[k] * (1.0 - fg_k.a) + fg_k.rgb; + rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k; } cmd_ref.offset += 4 + CmdColor_size; break; @@ -196,7 +196,7 @@ void main() { vec4 img[CHUNK] = fillImage(xy_uint, fill_img); for (uint k = 0; k < CHUNK; k++) { vec4 fg_k = img[k] * area[k]; - rgb[k] = rgb[k] * (1.0 - fg_k.a) + fg_k.rgb; + rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k; } cmd_ref.offset += 4 + CmdImage_size; break; @@ -205,10 +205,11 @@ void main() { gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y); for (uint k = 0; k < CHUNK; k++) { uvec2 offset = chunk_offset(k); - uint srgb = packsRGB(vec4(rgb[k], 1.0)); + uint srgb = packsRGB(vec4(rgba[k])); float alpha = clamp(abs(area[k]), 0.0, 1.0); write_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), srgb); write_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), floatBitsToUint(alpha)); + rgba[k] = vec4(0.0); } clip_depth++; cmd_ref.offset += 4; @@ -221,8 +222,9 @@ void main() { uvec2 offset = chunk_offset(k); uint srgb = read_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX)); uint alpha = read_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX)); - vec4 rgba = unpacksRGB(srgb); - rgb[k] = mix(rgba.rgb, rgb[k], area[k] * uintBitsToFloat(alpha)); + vec4 bg = unpacksRGB(srgb); + vec4 fg = rgba[k] * area[k] * uintBitsToFloat(alpha); + rgba[k] = bg * (1.0 - fg.a) + fg; } cmd_ref.offset += 4; break; @@ -234,6 +236,6 @@ void main() { } for (uint i = 0; i < CHUNK; i++) { - imageStore(image, ivec2(xy_uint + chunk_offset(i)), vec4(tosRGB(rgb[i]), 1.0)); + imageStore(image, ivec2(xy_uint + chunk_offset(i)), vec4(tosRGB(rgba[i].rgb), rgba[i].a)); } } diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index 71b3193..af7fe18 100644 Binary files a/piet-gpu/shader/kernel4.spv and b/piet-gpu/shader/kernel4.spv differ