mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-25 10:46:34 +11:00
d948126c16
Make max workgroup size 256 and respect LG_WG_FACTOR. Because the monoid scans only support a height of 2, this will reduce the maximum scene complexity we can render. But it also increases compatibility. Supporting larger scans is a TODO.
69 lines
1.8 KiB
GLSL
69 lines
1.8 KiB
GLSL
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
|
|
|
// The reduction phase for transform scan implemented as a tree reduction.
|
|
|
|
#version 450
|
|
#extension GL_GOOGLE_include_directive : enable
|
|
|
|
#include "mem.h"
|
|
#include "setup.h"
|
|
|
|
#define N_ROWS 8
|
|
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
|
|
#define WG_SIZE (1 << LG_WG_SIZE)
|
|
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
|
|
|
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
|
|
|
|
layout(binding = 1) readonly buffer ConfigBuf {
|
|
Config conf;
|
|
};
|
|
|
|
layout(binding = 2) readonly buffer SceneBuf {
|
|
uint[] scene;
|
|
};
|
|
|
|
#include "scene.h"
|
|
|
|
#define Monoid Transform
|
|
|
|
layout(set = 0, binding = 3) buffer OutBuf {
|
|
Monoid[] outbuf;
|
|
};
|
|
|
|
Monoid monoid_identity() {
|
|
return Monoid(vec4(1.0, 0.0, 0.0, 1.0), vec2(0.0, 0.0));
|
|
}
|
|
|
|
Monoid combine_monoid(Monoid a, Monoid b) {
|
|
Monoid c;
|
|
c.mat = a.mat.xyxy * b.mat.xxzz + a.mat.zwzw * b.mat.yyww;
|
|
c.translate = a.mat.xy * b.translate.x + a.mat.zw * b.translate.y + a.translate;
|
|
return c;
|
|
}
|
|
|
|
shared Monoid sh_scratch[WG_SIZE];
|
|
|
|
void main() {
|
|
uint ix = gl_GlobalInvocationID.x * N_ROWS;
|
|
TransformRef ref = TransformRef(conf.trans_offset + ix * Transform_size);
|
|
|
|
Monoid agg = Transform_read(ref);
|
|
for (uint i = 1; i < N_ROWS; i++) {
|
|
agg = combine_monoid(agg, Transform_read(Transform_index(ref, i)));
|
|
}
|
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
|
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
|
barrier();
|
|
// We could make this predicate tighter, but would it help?
|
|
if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) {
|
|
Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)];
|
|
agg = combine_monoid(agg, other);
|
|
}
|
|
barrier();
|
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
|
}
|
|
if (gl_LocalInvocationID.x == 0) {
|
|
outbuf[gl_WorkGroupID.x] = agg;
|
|
}
|
|
}
|