// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense // The leaf scan pass for draw tag scan implemented as a tree reduction. // This stage can be fused with its consumer but is separate now. #version 450 #extension GL_GOOGLE_include_directive : enable #include "mem.h" #include "setup.h" #define N_ROWS 8 #define LG_WG_SIZE (7 + LG_WG_FACTOR) #define WG_SIZE (1 << LG_WG_SIZE) #define PARTITION_SIZE (WG_SIZE * N_ROWS) layout(local_size_x = WG_SIZE, local_size_y = 1) in; layout(binding = 1) readonly buffer ConfigBuf { Config conf; }; layout(binding = 2) readonly buffer SceneBuf { uint[] scene; }; #include "scene.h" #include "tile.h" #include "drawtag.h" #include "annotated.h" #include "blend.h" #define Monoid DrawMonoid layout(set = 0, binding = 3) readonly buffer ParentBuf { Monoid[] parent; }; shared Monoid sh_scratch[WG_SIZE]; void main() { Monoid local[N_ROWS]; uint ix = gl_GlobalInvocationID.x * N_ROWS; ElementRef ref = ElementRef(ix * Element_size); uint tag_word = Element_tag(ref).tag; Monoid agg = map_tag(tag_word); local[0] = agg; for (uint i = 1; i < N_ROWS; i++) { tag_word = Element_tag(Element_index(ref, i)).tag; agg = combine_tag_monoid(agg, map_tag(tag_word)); local[i] = agg; } sh_scratch[gl_LocalInvocationID.x] = agg; for (uint i = 0; i < LG_WG_SIZE; i++) { barrier(); if (gl_LocalInvocationID.x >= (1u << i)) { Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)]; agg = combine_tag_monoid(other, agg); } barrier(); sh_scratch[gl_LocalInvocationID.x] = agg; } barrier(); Monoid row = tag_monoid_identity(); if (gl_WorkGroupID.x > 0) { row = parent[gl_WorkGroupID.x - 1]; } if (gl_LocalInvocationID.x > 0) { row = combine_tag_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]); } uint out_ix = gl_GlobalInvocationID.x * N_ROWS; uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 2; uint clip_out_base = conf.clip_alloc.offset >> 2; AnnotatedRef out_ref = AnnotatedRef(conf.anno_alloc.offset + out_ix * Annotated_size); for (uint i = 0; i < N_ROWS; i++) { Monoid m = row; if (i > 0) { m = combine_tag_monoid(m, local[i - 1]); } // m now holds exclusive scan of draw monoid memory[out_base + i * 2] = m.path_ix; memory[out_base + i * 2 + 1] = m.clip_ix; // For compatibility, we'll generate an Annotated object, same as old // pipeline. However, going forward we'll get rid of that, and have // later stages read scene + bbox etc. ElementRef this_ref = Element_index(ref, i); tag_word = Element_tag(this_ref).tag; if (tag_word == Element_FillColor || tag_word == Element_FillLinGradient || tag_word == Element_FillImage || tag_word == Element_BeginClip) { uint bbox_offset = (conf.bbox_alloc.offset >> 2) + 6 * m.path_ix; float bbox_l = float(memory[bbox_offset]) - 32768.0; float bbox_t = float(memory[bbox_offset + 1]) - 32768.0; float bbox_r = float(memory[bbox_offset + 2]) - 32768.0; float bbox_b = float(memory[bbox_offset + 3]) - 32768.0; vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b); float linewidth = uintBitsToFloat(memory[bbox_offset + 4]); uint fill_mode = uint(linewidth >= 0.0); vec4 mat; vec2 translate; if (linewidth >= 0.0 || tag_word == Element_FillLinGradient) { uint trans_ix = memory[bbox_offset + 5]; uint t = (conf.trans_alloc.offset >> 2) + 6 * trans_ix; mat = uintBitsToFloat(uvec4(memory[t], memory[t + 1], memory[t + 2], memory[t + 3])); if (tag_word == Element_FillLinGradient) { translate = uintBitsToFloat(uvec2(memory[t + 4], memory[t + 5])); } } if (linewidth >= 0.0) { // TODO: need to deal with anisotropic case linewidth *= sqrt(abs(mat.x * mat.w - mat.y * mat.z)); } linewidth = max(linewidth, 0.0); switch (tag_word) { case Element_FillColor: FillColor fill = Element_FillColor_read(this_ref); AnnoColor anno_fill; anno_fill.bbox = bbox; anno_fill.linewidth = linewidth; anno_fill.rgba_color = fill.rgba_color; Annotated_Color_write(conf.anno_alloc, out_ref, fill_mode, anno_fill); break; case Element_FillLinGradient: FillLinGradient lin = Element_FillLinGradient_read(this_ref); AnnoLinGradient anno_lin; anno_lin.bbox = bbox; anno_lin.linewidth = linewidth; anno_lin.index = lin.index; vec2 p0 = mat.xy * lin.p0.x + mat.zw * lin.p0.y + translate; vec2 p1 = mat.xy * lin.p1.x + mat.zw * lin.p1.y + translate; vec2 dxy = p1 - p0; float scale = 1.0 / (dxy.x * dxy.x + dxy.y * dxy.y); float line_x = dxy.x * scale; float line_y = dxy.y * scale; anno_lin.line_x = line_x; anno_lin.line_y = line_y; anno_lin.line_c = -(p0.x * line_x + p0.y * line_y); Annotated_LinGradient_write(conf.anno_alloc, out_ref, fill_mode, anno_lin); break; case Element_FillImage: FillImage fill_img = Element_FillImage_read(this_ref); AnnoImage anno_img; anno_img.bbox = bbox; anno_img.linewidth = linewidth; anno_img.index = fill_img.index; anno_img.offset = fill_img.offset; Annotated_Image_write(conf.anno_alloc, out_ref, fill_mode, anno_img); break; case Element_BeginClip: Clip begin_clip = Element_BeginClip_read(this_ref); AnnoBeginClip anno_begin_clip; anno_begin_clip.bbox = bbox; anno_begin_clip.linewidth = 0.0; // don't support clip-with-stroke anno_begin_clip.blend = begin_clip.blend; uint flags = uint(begin_clip.blend != BlendComp_default) << 1; Annotated_BeginClip_write(conf.anno_alloc, out_ref, flags, anno_begin_clip); break; } } else if (tag_word == Element_EndClip) { Clip end_clip = Element_BeginClip_read(this_ref); AnnoEndClip anno_end_clip; // The actual bbox will be reconstructed from clip stream output. anno_end_clip.bbox = vec4(-1e9, -1e9, 1e9, 1e9); anno_end_clip.blend = end_clip.blend; uint flags = uint(end_clip.blend != BlendComp_default) << 1; Annotated_EndClip_write(conf.anno_alloc, out_ref, flags, anno_end_clip); } // Generate clip stream. if (tag_word == Element_BeginClip || tag_word == Element_EndClip) { uint path_ix = ~(out_ix + i); if (tag_word == Element_BeginClip) { path_ix = m.path_ix; } memory[clip_out_base + m.clip_ix] = path_ix; } out_ref.offset += Annotated_size; } }