mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 04:31:30 +11:00
Merge pull request #24 from msiglreith/docs
Shader documentation and a slight cleanup
This commit is contained in:
commit
368721e317
|
@ -1,4 +1,14 @@
|
||||||
// Propagation of tile backdrop for filling.
|
// Propagation of tile backdrop for filling.
|
||||||
|
//
|
||||||
|
// Each thread reads one path element and calculates the number of spanned tiles
|
||||||
|
// based on the bounding box.
|
||||||
|
// In a further compaction step, the workgroup loops over the corresponding tile rows per element in parallel.
|
||||||
|
// For each row the per tile backdrop will be read, as calculated in the previous coarse path segment kernel,
|
||||||
|
// and propagated from the left to the right (prefix summed).
|
||||||
|
//
|
||||||
|
// Output state:
|
||||||
|
// - Each path element has an array of tiles covering the whole path based on boundig box
|
||||||
|
// - Each tile per path element contains the 'backdrop' and a list of subdivided path segments
|
||||||
|
|
||||||
#version 450
|
#version 450
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
@ -17,8 +27,8 @@ layout(set = 0, binding = 0) buffer AnnotatedBuf {
|
||||||
// This is really only used for n_elements; maybe we can handle that
|
// This is really only used for n_elements; maybe we can handle that
|
||||||
// a different way, but it's convenient to have the same signature as
|
// a different way, but it's convenient to have the same signature as
|
||||||
// tile allocation.
|
// tile allocation.
|
||||||
layout(set = 0, binding = 1) buffer AllocBuf {
|
layout(set = 0, binding = 1) readonly buffer AllocBuf {
|
||||||
uint n_elements;
|
uint n_elements; // paths
|
||||||
uint n_pathseg;
|
uint n_pathseg;
|
||||||
uint alloc;
|
uint alloc;
|
||||||
};
|
};
|
||||||
|
@ -39,6 +49,7 @@ void main() {
|
||||||
uint element_ix = gl_GlobalInvocationID.x;
|
uint element_ix = gl_GlobalInvocationID.x;
|
||||||
AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size);
|
AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size);
|
||||||
|
|
||||||
|
// Work assignment: 1 thread : 1 path element
|
||||||
uint row_count = 0;
|
uint row_count = 0;
|
||||||
if (element_ix < n_elements) {
|
if (element_ix < n_elements) {
|
||||||
uint tag = Annotated_tag(ref);
|
uint tag = Annotated_tag(ref);
|
||||||
|
@ -67,6 +78,7 @@ void main() {
|
||||||
sh_row_count[th_ix] = row_count;
|
sh_row_count[th_ix] = row_count;
|
||||||
}
|
}
|
||||||
barrier();
|
barrier();
|
||||||
|
// Work assignment: 1 thread : 1 path element row
|
||||||
uint total_rows = sh_row_count[BACKDROP_WG - 1];
|
uint total_rows = sh_row_count[BACKDROP_WG - 1];
|
||||||
for (uint row = th_ix; row < total_rows; row += BACKDROP_WG) {
|
for (uint row = th_ix; row < total_rows; row += BACKDROP_WG) {
|
||||||
// Binary search to find element
|
// Binary search to find element
|
||||||
|
@ -80,6 +92,7 @@ void main() {
|
||||||
uint seq_ix = row - (el_ix > 0 ? sh_row_count[el_ix - 1] : 0);
|
uint seq_ix = row - (el_ix > 0 ? sh_row_count[el_ix - 1] : 0);
|
||||||
uint width = sh_row_width[el_ix];
|
uint width = sh_row_width[el_ix];
|
||||||
// Process one row sequentially
|
// Process one row sequentially
|
||||||
|
// Read backdrop value per tile and prefix sum it
|
||||||
uint tile_el_ix = sh_row_base[el_ix] + seq_ix * 2 * width;
|
uint tile_el_ix = sh_row_base[el_ix] + seq_ix * 2 * width;
|
||||||
uint sum = tile[tile_el_ix];
|
uint sum = tile[tile_el_ix];
|
||||||
for (uint x = 1; x < width; x++) {
|
for (uint x = 1; x < width; x++) {
|
||||||
|
|
Binary file not shown.
|
@ -1,4 +1,8 @@
|
||||||
// The binning stage of the pipeline.
|
// The binning stage of the pipeline.
|
||||||
|
//
|
||||||
|
// Each workgroup processes N_TILE paths.
|
||||||
|
// Each thread processes one path and calculates a N_TILE_X x N_TILE_Y coverage mask
|
||||||
|
// based on the path bounding box to bin the paths.
|
||||||
|
|
||||||
#version 450
|
#version 450
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
@ -17,7 +21,7 @@ layout(set = 0, binding = 1) buffer StateBuf {
|
||||||
};
|
};
|
||||||
|
|
||||||
layout(set = 0, binding = 2) buffer AllocBuf {
|
layout(set = 0, binding = 2) buffer AllocBuf {
|
||||||
uint n_elements;
|
uint n_elements; // paths
|
||||||
// Will be incremented atomically to claim tiles
|
// Will be incremented atomically to claim tiles
|
||||||
uint tile_ix;
|
uint tile_ix;
|
||||||
uint alloc;
|
uint alloc;
|
||||||
|
@ -41,6 +45,7 @@ layout(set = 0, binding = 3) buffer BinsBuf {
|
||||||
#define INFINITY (1.0 / 0.0)
|
#define INFINITY (1.0 / 0.0)
|
||||||
|
|
||||||
// Note: cudaraster has N_TILE + 1 to cut down on bank conflicts.
|
// Note: cudaraster has N_TILE + 1 to cut down on bank conflicts.
|
||||||
|
// Bitmaps are sliced (256bit into 8 (N_SLICE) 32bit submaps)
|
||||||
shared uint bitmaps[N_SLICE][N_TILE];
|
shared uint bitmaps[N_SLICE][N_TILE];
|
||||||
shared uint count[N_SLICE][N_TILE];
|
shared uint count[N_SLICE][N_TILE];
|
||||||
shared uint sh_chunk_start[N_TILE];
|
shared uint sh_chunk_start[N_TILE];
|
||||||
|
@ -72,17 +77,17 @@ void main() {
|
||||||
}
|
}
|
||||||
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
|
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
|
||||||
float my_right_edge = INFINITY;
|
float my_right_edge = INFINITY;
|
||||||
bool crosses_edge = false;
|
// bool crosses_edge = false;
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
case Annotated_FillLine:
|
// case Annotated_FillLine:
|
||||||
case Annotated_StrokeLine:
|
// case Annotated_StrokeLine:
|
||||||
AnnoStrokeLineSeg line = Annotated_StrokeLine_read(ref);
|
// AnnoStrokeLineSeg line = Annotated_StrokeLine_read(ref);
|
||||||
x0 = int(floor((min(line.p0.x, line.p1.x) - line.stroke.x) * SX));
|
// x0 = int(floor((min(line.p0.x, line.p1.x) - line.stroke.x) * SX));
|
||||||
y0 = int(floor((min(line.p0.y, line.p1.y) - line.stroke.y) * SY));
|
// y0 = int(floor((min(line.p0.y, line.p1.y) - line.stroke.y) * SY));
|
||||||
x1 = int(ceil((max(line.p0.x, line.p1.x) + line.stroke.x) * SX));
|
// x1 = int(ceil((max(line.p0.x, line.p1.x) + line.stroke.x) * SX));
|
||||||
y1 = int(ceil((max(line.p0.y, line.p1.y) + line.stroke.y) * SY));
|
// y1 = int(ceil((max(line.p0.y, line.p1.y) + line.stroke.y) * SY));
|
||||||
crosses_edge = tag == Annotated_FillLine && ceil(line.p0.y * TSY) != ceil(line.p1.y * TSY);
|
// crosses_edge = tag == Annotated_FillLine && ceil(line.p0.y * TSY) != ceil(line.p1.y * TSY);
|
||||||
break;
|
// break;
|
||||||
case Annotated_Fill:
|
case Annotated_Fill:
|
||||||
case Annotated_Stroke:
|
case Annotated_Stroke:
|
||||||
// Note: we take advantage of the fact that fills and strokes
|
// Note: we take advantage of the fact that fills and strokes
|
||||||
|
@ -98,6 +103,7 @@ void main() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
// If the last element in this partition is a fill edge, then we need to do a
|
// If the last element in this partition is a fill edge, then we need to do a
|
||||||
// look-forward to find the right edge of its corresponding fill. That data is
|
// look-forward to find the right edge of its corresponding fill. That data is
|
||||||
// recorded in aggregates computed in the element processing pass.
|
// recorded in aggregates computed in the element processing pass.
|
||||||
|
@ -126,6 +132,7 @@ void main() {
|
||||||
if (crosses_edge) {
|
if (crosses_edge) {
|
||||||
x1 = int(ceil(my_right_edge * SX));
|
x1 = int(ceil(my_right_edge * SX));
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
// At this point, we run an iterator over the coverage area,
|
// At this point, we run an iterator over the coverage area,
|
||||||
// trying to keep divergence low.
|
// trying to keep divergence low.
|
||||||
|
|
Binary file not shown.
|
@ -1,4 +1,12 @@
|
||||||
// The coarse rasterizer stage of the pipeline.
|
// The coarse rasterizer stage of the pipeline.
|
||||||
|
//
|
||||||
|
// As input we have the ordered partitions of paths from the binning phase and
|
||||||
|
// the annotated tile list of segments and backdrop per path.
|
||||||
|
//
|
||||||
|
// Each workgroup operating on one bin by stream compacting
|
||||||
|
// the elements corresponding to the bin.
|
||||||
|
//
|
||||||
|
// As output we have an ordered command stream per tile. Every tile from a path (backdrop + segment list) will be encoded.
|
||||||
|
|
||||||
#version 450
|
#version 450
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
@ -369,7 +377,6 @@ void main() {
|
||||||
cmd_fill.rgba_color = fill.rgba_color;
|
cmd_fill.rgba_color = fill.rgba_color;
|
||||||
Cmd_Fill_write(cmd_ref, cmd_fill);
|
Cmd_Fill_write(cmd_ref, cmd_fill);
|
||||||
} else {
|
} else {
|
||||||
AnnoFill fill = Annotated_Fill_read(ref);
|
|
||||||
Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
|
Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
|
||||||
}
|
}
|
||||||
cmd_ref.offset += Cmd_size;
|
cmd_ref.offset += Cmd_size;
|
||||||
|
|
Binary file not shown.
|
@ -59,6 +59,7 @@ void main() {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Cmd_Stroke:
|
case Cmd_Stroke:
|
||||||
|
// Calculate distance field from all the line segments in this tile.
|
||||||
CmdStroke stroke = Cmd_Stroke_read(cmd_ref);
|
CmdStroke stroke = Cmd_Stroke_read(cmd_ref);
|
||||||
float df[CHUNK];
|
float df[CHUNK];
|
||||||
for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
|
for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
|
||||||
|
@ -81,6 +82,7 @@ void main() {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Cmd_Fill:
|
case Cmd_Fill:
|
||||||
|
// Calculate coverage based on backdrop + coverage of each line segment
|
||||||
CmdFill fill = Cmd_Fill_read(cmd_ref);
|
CmdFill fill = Cmd_Fill_read(cmd_ref);
|
||||||
// Probably better to store as float, but conversion is no doubt cheap.
|
// Probably better to store as float, but conversion is no doubt cheap.
|
||||||
float area[CHUNK];
|
float area[CHUNK];
|
||||||
|
|
|
@ -167,7 +167,7 @@ impl<D: Device> Renderer<D> {
|
||||||
let dev = MemFlags::device_local();
|
let dev = MemFlags::device_local();
|
||||||
|
|
||||||
let n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size();
|
let n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size();
|
||||||
println!("scene: {} elements", n_elements);
|
println!("scene: {} elements, {} paths, {} path_segments", n_elements, n_paths, n_pathseg);
|
||||||
|
|
||||||
let scene_buf = device
|
let scene_buf = device
|
||||||
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, host)
|
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, host)
|
||||||
|
|
Loading…
Reference in a new issue