vello/piet-gpu/shader/draw_leaf.comp

163 lines
6.5 KiB
GLSL
Raw Normal View History

// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// The leaf scan pass for draw tag scan implemented as a tree reduction.
// This stage can be fused with its consumer but is separate now.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#define N_ROWS 8
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
#define WG_SIZE (1 << LG_WG_SIZE)
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
layout(binding = 1) readonly buffer ConfigBuf {
Config conf;
};
layout(binding = 2) readonly buffer SceneBuf {
uint[] scene;
};
#include "scene.h"
#include "tile.h"
#include "drawtag.h"
#include "annotated.h"
#define Monoid DrawMonoid
layout(set = 0, binding = 3) readonly buffer ParentBuf {
Monoid[] parent;
};
shared Monoid sh_scratch[WG_SIZE];
void main() {
Monoid local[N_ROWS];
uint ix = gl_GlobalInvocationID.x * N_ROWS;
ElementRef ref = ElementRef(ix * Element_size);
uint tag_word = Element_tag(ref).tag;
Monoid agg = map_tag(tag_word);
local[0] = agg;
for (uint i = 1; i < N_ROWS; i++) {
tag_word = Element_tag(Element_index(ref, i)).tag;
agg = combine_tag_monoid(agg, map_tag(tag_word));
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
if (gl_LocalInvocationID.x >= (1u << i)) {
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)];
agg = combine_tag_monoid(other, agg);
}
barrier();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
barrier();
Monoid row = tag_monoid_identity();
if (gl_WorkGroupID.x > 0) {
row = parent[gl_WorkGroupID.x - 1];
}
if (gl_LocalInvocationID.x > 0) {
row = combine_tag_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]);
}
uint out_ix = gl_GlobalInvocationID.x * N_ROWS;
uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 2;
AnnotatedRef out_ref = AnnotatedRef(conf.anno_alloc.offset + out_ix * Annotated_size);
for (uint i = 0; i < N_ROWS; i++) {
Monoid m = combine_tag_monoid(row, local[i]);
memory[out_base + i * 2] = m.path_ix;
memory[out_base + i * 2 + 1] = m.clip_ix;
// For compatibility, we'll generate an Annotated object, same as old
// pipeline. However, going forward we'll get rid of that, and have
// later stages read scene + bbox etc.
ElementRef this_ref = Element_index(ref, i);
tag_word = Element_tag(this_ref).tag;
if (tag_word == Element_FillColor || tag_word == Element_FillLinGradient || tag_word == Element_FillImage) {
uint bbox_offset = (conf.bbox_alloc.offset >> 2) + 6 * (m.path_ix - 1);
float bbox_l = float(memory[bbox_offset]) - 32768.0;
float bbox_t = float(memory[bbox_offset + 1]) - 32768.0;
float bbox_r = float(memory[bbox_offset + 2]) - 32768.0;
float bbox_b = float(memory[bbox_offset + 3]) - 32768.0;
vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b);
float linewidth = uintBitsToFloat(memory[bbox_offset + 4]);
uint fill_mode = uint(linewidth >= 0.0);
vec4 mat;
vec2 translate;
if (linewidth >= 0.0 || tag_word == Element_FillLinGradient) {
uint trans_ix = memory[bbox_offset + 5];
uint t = (conf.trans_alloc.offset >> 2) + 6 * trans_ix;
mat = uintBitsToFloat(uvec4(memory[t], memory[t + 1], memory[t + 2], memory[t + 3]));
if (tag_word == Element_FillLinGradient) {
translate = uintBitsToFloat(uvec2(memory[t + 4], memory[t + 5]));
}
}
if (linewidth >= 0.0) {
// TODO: need to deal with anisotropic case
linewidth *= sqrt(abs(mat.x * mat.w - mat.y * mat.z));
}
linewidth = max(linewidth, 0.0);
switch (tag_word) {
case Element_FillColor:
FillColor fill = Element_FillColor_read(this_ref);
AnnoColor anno_fill;
anno_fill.bbox = bbox;
anno_fill.linewidth = linewidth;
anno_fill.rgba_color = fill.rgba_color;
Annotated_Color_write(conf.anno_alloc, out_ref, fill_mode, anno_fill);
break;
case Element_FillLinGradient:
FillLinGradient lin = Element_FillLinGradient_read(this_ref);
AnnoLinGradient anno_lin;
anno_lin.bbox = bbox;
anno_lin.linewidth = linewidth;
anno_lin.index = lin.index;
vec2 p0 = mat.xy * lin.p0.x + mat.zw * lin.p0.y + translate;
vec2 p1 = mat.xy * lin.p1.x + mat.zw * lin.p1.y + translate;
vec2 dxy = p1 - p0;
float scale = 1.0 / (dxy.x * dxy.x + dxy.y * dxy.y);
float line_x = dxy.x * scale;
float line_y = dxy.y * scale;
anno_lin.line_x = line_x;
anno_lin.line_y = line_y;
anno_lin.line_c = -(p0.x * line_x + p0.y * line_y);
Annotated_LinGradient_write(conf.anno_alloc, out_ref, fill_mode, anno_lin);
break;
case Element_FillImage:
FillImage fill_img = Element_FillImage_read(this_ref);
AnnoImage anno_img;
anno_img.bbox = bbox;
anno_img.linewidth = linewidth;
anno_img.index = fill_img.index;
anno_img.offset = fill_img.offset;
Annotated_Image_write(conf.anno_alloc, out_ref, fill_mode, anno_img);
break;
}
} else if (tag_word == Element_BeginClip) {
Clip begin_clip = Element_BeginClip_read(this_ref);
AnnoBeginClip anno_begin_clip;
// This is the absolute bbox, it's been transformed during encoding.
anno_begin_clip.bbox = begin_clip.bbox;
anno_begin_clip.linewidth = 0.0; // don't support clip-with-stroke
Annotated_BeginClip_write(conf.anno_alloc, out_ref, 0, anno_begin_clip);
} else if (tag_word == Element_EndClip) {
Clip end_clip = Element_EndClip_read(this_ref);
AnnoEndClip anno_end_clip;
// This bbox is expected to be the same as the begin one.
anno_end_clip.bbox = end_clip.bbox;
Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip);
}
out_ref.offset += Annotated_size;
}
}