vello/piet-gpu/shader/draw_leaf.comp

182 lines
7.3 KiB
GLSL
Raw Normal View History

// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// The leaf scan pass for draw tag scan implemented as a tree reduction.
// This stage can be fused with its consumer but is separate now.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#define N_ROWS 8
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
#define WG_SIZE (1 << LG_WG_SIZE)
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
layout(binding = 1) readonly buffer ConfigBuf {
Config conf;
};
layout(binding = 2) readonly buffer SceneBuf {
uint[] scene;
};
#include "scene.h"
#include "tile.h"
#include "drawtag.h"
#include "blend.h"
#define Monoid DrawMonoid
layout(set = 0, binding = 3) readonly buffer ParentBuf {
Monoid[] parent;
};
shared Monoid sh_scratch[WG_SIZE];
void main() {
Monoid local[N_ROWS];
uint ix = gl_GlobalInvocationID.x * N_ROWS;
uint drawtag_base = conf.drawtag_offset >> 2;
uint tag_word = scene[drawtag_base + ix];
Monoid agg = map_tag(tag_word);
local[0] = agg;
for (uint i = 1; i < N_ROWS; i++) {
tag_word = scene[drawtag_base + ix + i];
agg = combine_draw_monoid(agg, map_tag(tag_word));
local[i] = agg;
}
sh_scratch[gl_LocalInvocationID.x] = agg;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
if (gl_LocalInvocationID.x >= (1u << i)) {
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)];
agg = combine_draw_monoid(other, agg);
}
barrier();
sh_scratch[gl_LocalInvocationID.x] = agg;
}
barrier();
Monoid row = draw_monoid_identity();
if (gl_WorkGroupID.x > 0) {
row = parent[gl_WorkGroupID.x - 1];
}
if (gl_LocalInvocationID.x > 0) {
row = combine_draw_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]);
}
uint drawdata_base = conf.drawdata_offset >> 2;
uint drawinfo_base = conf.drawinfo_alloc.offset >> 2;
uint out_ix = gl_GlobalInvocationID.x * N_ROWS;
uint out_base = (conf.drawmonoid_alloc.offset >> 2) + out_ix * 4;
uint clip_out_base = conf.clip_alloc.offset >> 2;
for (uint i = 0; i < N_ROWS; i++) {
Monoid m = row;
if (i > 0) {
m = combine_draw_monoid(m, local[i - 1]);
}
// m now holds exclusive scan of draw monoid
memory[out_base + i * 4] = m.path_ix;
memory[out_base + i * 4 + 1] = m.clip_ix;
memory[out_base + i * 4 + 2] = m.scene_offset;
memory[out_base + i * 4 + 3] = m.info_offset;
// u32 offset of drawobj data
uint dd = drawdata_base + (m.scene_offset >> 2);
uint di = drawinfo_base + (m.info_offset >> 2);
// For compatibility, we'll generate an Annotated object, same as old
// pipeline. However, going forward we'll get rid of that, and have
// later stages read scene + bbox etc.
tag_word = scene[drawtag_base + ix + i];
if (tag_word == Drawtag_FillColor || tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient ||
tag_word == Drawtag_FillImage || tag_word == Drawtag_BeginClip) {
uint bbox_offset = (conf.path_bbox_alloc.offset >> 2) + 6 * m.path_ix;
float bbox_l = float(memory[bbox_offset]) - 32768.0;
float bbox_t = float(memory[bbox_offset + 1]) - 32768.0;
float bbox_r = float(memory[bbox_offset + 2]) - 32768.0;
float bbox_b = float(memory[bbox_offset + 3]) - 32768.0;
vec4 bbox = vec4(bbox_l, bbox_t, bbox_r, bbox_b);
float linewidth = uintBitsToFloat(memory[bbox_offset + 4]);
uint fill_mode = uint(linewidth >= 0.0);
vec4 mat;
vec2 translate;
if (linewidth >= 0.0 || tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient) {
uint trans_ix = memory[bbox_offset + 5];
uint t = (conf.trans_alloc.offset >> 2) + 6 * trans_ix;
mat = uintBitsToFloat(uvec4(memory[t], memory[t + 1], memory[t + 2], memory[t + 3]));
if (tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient) {
translate = uintBitsToFloat(uvec2(memory[t + 4], memory[t + 5]));
}
}
if (linewidth >= 0.0) {
// TODO: need to deal with anisotropic case
linewidth *= sqrt(abs(mat.x * mat.w - mat.y * mat.z));
}
switch (tag_word) {
case Drawtag_FillColor:
case Drawtag_FillImage:
memory[di] = floatBitsToUint(linewidth);
break;
case Drawtag_FillLinGradient:
memory[di] = floatBitsToUint(linewidth);
vec2 p0 = uintBitsToFloat(uvec2(scene[dd + 1], scene[dd + 2]));
vec2 p1 = uintBitsToFloat(uvec2(scene[dd + 3], scene[dd + 4]));
p0 = mat.xy * p0.x + mat.zw * p0.y + translate;
p1 = mat.xy * p1.x + mat.zw * p1.y + translate;
vec2 dxy = p1 - p0;
float scale = 1.0 / (dxy.x * dxy.x + dxy.y * dxy.y);
float line_x = dxy.x * scale;
float line_y = dxy.y * scale;
float line_c = -(p0.x * line_x + p0.y * line_y);
memory[di + 1] = floatBitsToUint(line_x);
memory[di + 2] = floatBitsToUint(line_y);
memory[di + 3] = floatBitsToUint(line_c);
break;
case Drawtag_FillRadGradient:
p0 = uintBitsToFloat(uvec2(scene[dd + 1], scene[dd + 2]));
p1 = uintBitsToFloat(uvec2(scene[dd + 3], scene[dd + 4]));
float r0 = uintBitsToFloat(scene[dd + 5]);
float r1 = uintBitsToFloat(scene[dd + 6]);
float inv_det = 1.0 / (mat.x * mat.w - mat.y * mat.z);
vec4 inv_mat = inv_det * vec4(mat.w, -mat.y, -mat.z, mat.x);
vec2 inv_tr = inv_mat.xz * translate.x + inv_mat.yw * translate.y;
inv_tr += p0;
vec2 center1 = p1 - p0;
float rr = r1 / (r1 - r0);
float rainv = rr / (r1 * r1 - dot(center1, center1));
vec2 c1 = center1 * rainv;
float ra = rr * rainv;
float roff = rr - 1.0;
memory[di] = floatBitsToUint(linewidth);
memory[di + 1] = floatBitsToUint(inv_mat.x);
memory[di + 2] = floatBitsToUint(inv_mat.y);
memory[di + 3] = floatBitsToUint(inv_mat.z);
memory[di + 4] = floatBitsToUint(inv_mat.w);
memory[di + 5] = floatBitsToUint(inv_tr.x);
memory[di + 6] = floatBitsToUint(inv_tr.y);
memory[di + 7] = floatBitsToUint(c1.x);
memory[di + 8] = floatBitsToUint(c1.y);
memory[di + 9] = floatBitsToUint(ra);
memory[di + 10] = floatBitsToUint(roff);
break;
case Drawtag_BeginClip:
break;
}
}
// Generate clip stream.
if (tag_word == Drawtag_BeginClip || tag_word == Drawtag_EndClip) {
uint path_ix = ~(out_ix + i);
if (tag_word == Drawtag_BeginClip) {
path_ix = m.path_ix;
}
memory[clip_out_base + m.clip_ix] = path_ix;
}
}
}