diff --git a/piet-gpu/shader/backdrop.comp b/piet-gpu/shader/backdrop.comp index 090db1d..afe4d62 100644 --- a/piet-gpu/shader/backdrop.comp +++ b/piet-gpu/shader/backdrop.comp @@ -1,4 +1,14 @@ // Propagation of tile backdrop for filling. +// +// Each thread reads one path element and calculates the number of spanned tiles +// based on the bounding box. +// In a further compaction step, the workgroup loops over the corresponding tile rows per element in parallel. +// For each row the per tile backdrop will be read, as calculated in the previous coarse path segment kernel, +// and propagated from the left to the right (prefix summed). +// +// Output state: +// - Each path element has an array of tiles covering the whole path based on boundig box +// - Each tile per path element contains the 'backdrop' and a list of subdivided path segments #version 450 #extension GL_GOOGLE_include_directive : enable @@ -17,8 +27,8 @@ layout(set = 0, binding = 0) buffer AnnotatedBuf { // This is really only used for n_elements; maybe we can handle that // a different way, but it's convenient to have the same signature as // tile allocation. -layout(set = 0, binding = 1) buffer AllocBuf { - uint n_elements; +layout(set = 0, binding = 1) readonly buffer AllocBuf { + uint n_elements; // paths uint n_pathseg; uint alloc; }; @@ -39,6 +49,7 @@ void main() { uint element_ix = gl_GlobalInvocationID.x; AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size); + // Work assignment: 1 thread : 1 path element uint row_count = 0; if (element_ix < n_elements) { uint tag = Annotated_tag(ref); @@ -67,6 +78,7 @@ void main() { sh_row_count[th_ix] = row_count; } barrier(); + // Work assignment: 1 thread : 1 path element row uint total_rows = sh_row_count[BACKDROP_WG - 1]; for (uint row = th_ix; row < total_rows; row += BACKDROP_WG) { // Binary search to find element @@ -80,6 +92,7 @@ void main() { uint seq_ix = row - (el_ix > 0 ? sh_row_count[el_ix - 1] : 0); uint width = sh_row_width[el_ix]; // Process one row sequentially + // Read backdrop value per tile and prefix sum it uint tile_el_ix = sh_row_base[el_ix] + seq_ix * 2 * width; uint sum = tile[tile_el_ix]; for (uint x = 1; x < width; x++) { diff --git a/piet-gpu/shader/backdrop.spv b/piet-gpu/shader/backdrop.spv index a633c16..67d05df 100644 Binary files a/piet-gpu/shader/backdrop.spv and b/piet-gpu/shader/backdrop.spv differ diff --git a/piet-gpu/shader/binning.comp b/piet-gpu/shader/binning.comp index d35c2d9..0ddeb7e 100644 --- a/piet-gpu/shader/binning.comp +++ b/piet-gpu/shader/binning.comp @@ -1,4 +1,8 @@ // The binning stage of the pipeline. +// +// Each workgroup processes N_TILE paths. +// Each thread processes one path and calculates a N_TILE_X x N_TILE_Y coverage mask +// based on the path bounding box to bin the paths. #version 450 #extension GL_GOOGLE_include_directive : enable @@ -17,7 +21,7 @@ layout(set = 0, binding = 1) buffer StateBuf { }; layout(set = 0, binding = 2) buffer AllocBuf { - uint n_elements; + uint n_elements; // paths // Will be incremented atomically to claim tiles uint tile_ix; uint alloc; @@ -41,6 +45,7 @@ layout(set = 0, binding = 3) buffer BinsBuf { #define INFINITY (1.0 / 0.0) // Note: cudaraster has N_TILE + 1 to cut down on bank conflicts. +// Bitmaps are sliced (256bit into 8 (N_SLICE) 32bit submaps) shared uint bitmaps[N_SLICE][N_TILE]; shared uint count[N_SLICE][N_TILE]; shared uint sh_chunk_start[N_TILE]; @@ -72,17 +77,17 @@ void main() { } int x0 = 0, y0 = 0, x1 = 0, y1 = 0; float my_right_edge = INFINITY; - bool crosses_edge = false; + // bool crosses_edge = false; switch (tag) { - case Annotated_FillLine: - case Annotated_StrokeLine: - AnnoStrokeLineSeg line = Annotated_StrokeLine_read(ref); - x0 = int(floor((min(line.p0.x, line.p1.x) - line.stroke.x) * SX)); - y0 = int(floor((min(line.p0.y, line.p1.y) - line.stroke.y) * SY)); - x1 = int(ceil((max(line.p0.x, line.p1.x) + line.stroke.x) * SX)); - y1 = int(ceil((max(line.p0.y, line.p1.y) + line.stroke.y) * SY)); - crosses_edge = tag == Annotated_FillLine && ceil(line.p0.y * TSY) != ceil(line.p1.y * TSY); - break; + // case Annotated_FillLine: + // case Annotated_StrokeLine: + // AnnoStrokeLineSeg line = Annotated_StrokeLine_read(ref); + // x0 = int(floor((min(line.p0.x, line.p1.x) - line.stroke.x) * SX)); + // y0 = int(floor((min(line.p0.y, line.p1.y) - line.stroke.y) * SY)); + // x1 = int(ceil((max(line.p0.x, line.p1.x) + line.stroke.x) * SX)); + // y1 = int(ceil((max(line.p0.y, line.p1.y) + line.stroke.y) * SY)); + // crosses_edge = tag == Annotated_FillLine && ceil(line.p0.y * TSY) != ceil(line.p1.y * TSY); + // break; case Annotated_Fill: case Annotated_Stroke: // Note: we take advantage of the fact that fills and strokes @@ -98,6 +103,7 @@ void main() { break; } + /* // If the last element in this partition is a fill edge, then we need to do a // look-forward to find the right edge of its corresponding fill. That data is // recorded in aggregates computed in the element processing pass. @@ -126,6 +132,7 @@ void main() { if (crosses_edge) { x1 = int(ceil(my_right_edge * SX)); } + */ // At this point, we run an iterator over the coverage area, // trying to keep divergence low. diff --git a/piet-gpu/shader/binning.spv b/piet-gpu/shader/binning.spv index 524f9e4..0d30dfc 100644 Binary files a/piet-gpu/shader/binning.spv and b/piet-gpu/shader/binning.spv differ diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index eec0bfe..3dcd856 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -1,4 +1,12 @@ // The coarse rasterizer stage of the pipeline. +// +// As input we have the ordered partitions of paths from the binning phase and +// the annotated tile list of segments and backdrop per path. +// +// Each workgroup operating on one bin by stream compacting +// the elements corresponding to the bin. +// +// As output we have an ordered command stream per tile. Every tile from a path (backdrop + segment list) will be encoded. #version 450 #extension GL_GOOGLE_include_directive : enable @@ -369,7 +377,6 @@ void main() { cmd_fill.rgba_color = fill.rgba_color; Cmd_Fill_write(cmd_ref, cmd_fill); } else { - AnnoFill fill = Annotated_Fill_read(ref); Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color)); } cmd_ref.offset += Cmd_size; diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index ad24e6b..692b6c1 100644 Binary files a/piet-gpu/shader/coarse.spv and b/piet-gpu/shader/coarse.spv differ diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index a88bc3d..7727b2c 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -59,6 +59,7 @@ void main() { } break; case Cmd_Stroke: + // Calculate distance field from all the line segments in this tile. CmdStroke stroke = Cmd_Stroke_read(cmd_ref); float df[CHUNK]; for (uint k = 0; k < CHUNK; k++) df[k] = 1e9; @@ -81,6 +82,7 @@ void main() { } break; case Cmd_Fill: + // Calculate coverage based on backdrop + coverage of each line segment CmdFill fill = Cmd_Fill_read(cmd_ref); // Probably better to store as float, but conversion is no doubt cheap. float area[CHUNK]; diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index 8c720a2..7c0a1fc 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -167,7 +167,7 @@ impl Renderer { let dev = MemFlags::device_local(); let n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size(); - println!("scene: {} elements", n_elements); + println!("scene: {} elements, {} paths, {} path_segments", n_elements, n_paths, n_pathseg); let scene_buf = device .create_buffer(std::mem::size_of_val(&scene[..]) as u64, host)