// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense // The leaf scan pass for draw tag scan implemented as a tree reduction. // This stage can be fused with its consumer but is separate now. #version 450 #extension GL_GOOGLE_include_directive : enable #include "mem.h" #include "setup.h" #define N_ROWS 8 #define LG_WG_SIZE (7 + LG_WG_FACTOR) #define WG_SIZE (1 << LG_WG_SIZE) #define PARTITION_SIZE (WG_SIZE * N_ROWS) layout(local_size_x = WG_SIZE, local_size_y = 1) in; layout(binding = 1) readonly buffer ConfigBuf { Config conf; }; layout(binding = 2) readonly buffer SceneBuf { uint[] scene; }; #include "scene.h" #include "tile.h" #include "drawtag.h" #include "blend.h" #define Monoid DrawMonoid layout(set = 0, binding = 3) readonly buffer ParentBuf { Monoid[] parent; }; shared Monoid sh_scratch[WG_SIZE]; void main() { Monoid local[N_ROWS]; uint ix = gl_GlobalInvocationID.x * N_ROWS; uint drawtag_base = conf.drawtag_offset >> 2; uint tag_word = scene[drawtag_base + ix]; Monoid agg = map_tag(tag_word); local[0] = agg; for (uint i = 1; i < N_ROWS; i++) { tag_word = scene[drawtag_base + ix + i]; agg = combine_draw_monoid(agg, map_tag(tag_word)); local[i] = agg; } sh_scratch[gl_LocalInvocationID.x] = agg; for (uint i = 0; i < LG_WG_SIZE; i++) { barrier(); if (gl_LocalInvocationID.x >= (1u << i)) { Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)]; agg = combine_draw_monoid(other, agg); } barrier(); sh_scratch[gl_LocalInvocationID.x] = agg; } barrier(); Monoid row = draw_monoid_identity(); if (gl_WorkGroupID.x > 0) { row = parent[gl_WorkGroupID.x - 1]; } if (gl_LocalInvocationID.x > 0) { row = combine_draw_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]); } uint drawdata_base = conf.drawdata_offset >> 2; uint drawinfo_base = conf.drawinfo_alloc.offset >> 2; uint out_ix = gl_GlobalInvocationID.x * N_ROWS; uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 4; uint clip_out_base = conf.clip_alloc.offset >> 2; for (uint i = 0; i < N_ROWS; i++) { Monoid m = row; if (i > 0) { m = combine_draw_monoid(m, local[i - 1]); } // m now holds exclusive scan of draw monoid memory[out_base + i * 4] = m.path_ix; memory[out_base + i * 4 + 1] = m.clip_ix; memory[out_base + i * 4 + 2] = m.scene_offset; memory[out_base + i * 4 + 3] = m.info_offset; // u32 offset of drawobj data uint dd = drawdata_base + (m.scene_offset >> 2); uint di = drawinfo_base + (m.info_offset >> 2); // For compatibility, we'll generate an Annotated object, same as old // pipeline. However, going forward we'll get rid of that, and have // later stages read scene + bbox etc. tag_word = scene[drawtag_base + ix + i]; if (tag_word == Drawtag_FillColor || tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient || tag_word == Drawtag_FillImage || tag_word == Drawtag_BeginClip) { uint bbox_offset = (conf.path_bbox_alloc.offset >> 2) + 6 * m.path_ix; float bbox_l = float(memory[bbox_offset]) - 32768.0; float bbox_t = float(memory[bbox_offset + 1]) - 32768.0; float bbox_r = float(memory[bbox_offset + 2]) - 32768.0; float bbox_b = float(memory[bbox_offset + 3]) - 32768.0; vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b); float linewidth = uintBitsToFloat(memory[bbox_offset + 4]); uint fill_mode = uint(linewidth >= 0.0); vec4 mat; vec2 translate; if (linewidth >= 0.0 || tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient) { uint trans_ix = memory[bbox_offset + 5]; uint t = (conf.trans_offset >> 2) + trans_ix * 6; mat = uintBitsToFloat(uvec4(scene[t], scene[t + 1], scene[t + 2], scene[t + 3])); if (tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient) { translate = uintBitsToFloat(uvec2(scene[t + 4], scene[t + 5])); } } if (linewidth >= 0.0) { // TODO: need to deal with anisotropic case linewidth *= sqrt(abs(mat.x * mat.w - mat.y * mat.z)); } switch (tag_word) { case Drawtag_FillColor: case Drawtag_FillImage: memory[di] = floatBitsToUint(linewidth); break; case Drawtag_FillLinGradient: memory[di] = floatBitsToUint(linewidth); vec2 p0 = uintBitsToFloat(uvec2(scene[dd + 1], scene[dd + 2])); vec2 p1 = uintBitsToFloat(uvec2(scene[dd + 3], scene[dd + 4])); p0 = mat.xy * p0.x + mat.zw * p0.y + translate; p1 = mat.xy * p1.x + mat.zw * p1.y + translate; vec2 dxy = p1 - p0; float scale = 1.0 / (dxy.x * dxy.x + dxy.y * dxy.y); float line_x = dxy.x * scale; float line_y = dxy.y * scale; float line_c = -(p0.x * line_x + p0.y * line_y); memory[di + 1] = floatBitsToUint(line_x); memory[di + 2] = floatBitsToUint(line_y); memory[di + 3] = floatBitsToUint(line_c); break; case Drawtag_FillRadGradient: p0 = uintBitsToFloat(uvec2(scene[dd + 1], scene[dd + 2])); p1 = uintBitsToFloat(uvec2(scene[dd + 3], scene[dd + 4])); float r0 = uintBitsToFloat(scene[dd + 5]); float r1 = uintBitsToFloat(scene[dd + 6]); float inv_det = 1.0 / (mat.x * mat.w - mat.y * mat.z); vec4 inv_mat = inv_det * vec4(mat.w, -mat.y, -mat.z, mat.x); vec2 inv_tr = inv_mat.xz * translate.x + inv_mat.yw * translate.y; inv_tr += p0; vec2 center1 = p1 - p0; float rr = r1 / (r1 - r0); float rainv = rr / (r1 * r1 - dot(center1, center1)); vec2 c1 = center1 * rainv; float ra = rr * rainv; float roff = rr - 1.0; memory[di] = floatBitsToUint(linewidth); memory[di + 1] = floatBitsToUint(inv_mat.x); memory[di + 2] = floatBitsToUint(inv_mat.y); memory[di + 3] = floatBitsToUint(inv_mat.z); memory[di + 4] = floatBitsToUint(inv_mat.w); memory[di + 5] = floatBitsToUint(inv_tr.x); memory[di + 6] = floatBitsToUint(inv_tr.y); memory[di + 7] = floatBitsToUint(c1.x); memory[di + 8] = floatBitsToUint(c1.y); memory[di + 9] = floatBitsToUint(ra); memory[di + 10] = floatBitsToUint(roff); break; case Drawtag_BeginClip: break; } } // Generate clip stream. if (tag_word == Drawtag_BeginClip || tag_word == Drawtag_EndClip) { uint path_ix = ~(out_ix + i); if (tag_word == Drawtag_BeginClip) { path_ix = m.path_ix; } memory[clip_out_base + m.clip_ix] = path_ix; } } }