mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 20:51:29 +11:00
acb3933d94
This patch switches to a variable size encoding of draw objects. In addition to the CPU-side scene encoding, it changes the representation of intermediate per draw object state from the `Annotated` struct to a variable "info" encoding. In addition, the bounding boxes are moved to a separate array (for a more "structure of "arrays" approach). Data that's unchanged from the scene encoding is not copied. Rather, downstream stages can access the data from the scene buffer (reducing allocation and copying). Prefix sums, computed in `DrawMonoid` track the offset of both scene and intermediate data. The tags for the CPU-side encoding have been split into their own stream (again a change from AoS to SoA style). This is not necessarily the final form. There's some stuff (including at least one piet-gpu-derive type) that can be deleted. In addition, the linewidth field should probably move from the info to path-specific. Also, the 1:1 correspondence between draw object and path has not yet been broken. Closes #152
62 lines
1.6 KiB
GLSL
62 lines
1.6 KiB
GLSL
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
|
|
|
// The reduction phase for draw scan implemented as a tree reduction.
|
|
|
|
#version 450
|
|
#extension GL_GOOGLE_include_directive : enable
|
|
|
|
#include "mem.h"
|
|
#include "setup.h"
|
|
|
|
#define N_ROWS 8
|
|
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
|
|
#define WG_SIZE (1 << LG_WG_SIZE)
|
|
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
|
|
|
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
|
|
|
|
layout(binding = 1) readonly buffer ConfigBuf {
|
|
Config conf;
|
|
};
|
|
|
|
layout(binding = 2) readonly buffer SceneBuf {
|
|
uint[] scene;
|
|
};
|
|
|
|
#include "scene.h"
|
|
#include "drawtag.h"
|
|
|
|
#define Monoid DrawMonoid
|
|
|
|
layout(set = 0, binding = 3) buffer OutBuf {
|
|
Monoid[] outbuf;
|
|
};
|
|
|
|
shared Monoid sh_scratch[WG_SIZE];
|
|
|
|
void main() {
|
|
uint ix = gl_GlobalInvocationID.x * N_ROWS;
|
|
uint drawtag_base = conf.drawtag_offset >> 2;
|
|
uint tag_word = scene[drawtag_base + ix];
|
|
|
|
Monoid agg = map_tag(tag_word);
|
|
for (uint i = 1; i < N_ROWS; i++) {
|
|
uint tag_word = scene[drawtag_base + ix + i];
|
|
agg = combine_draw_monoid(agg, map_tag(tag_word));
|
|
}
|
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
|
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
|
barrier();
|
|
// We could make this predicate tighter, but would it help?
|
|
if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) {
|
|
Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)];
|
|
agg = combine_draw_monoid(agg, other);
|
|
}
|
|
barrier();
|
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
|
}
|
|
if (gl_LocalInvocationID.x == 0) {
|
|
outbuf[gl_WorkGroupID.x] = agg;
|
|
}
|
|
}
|