From de4f963ba0f07c7a2eb7d19e294e30bc431067da Mon Sep 17 00:00:00 2001 From: Elias Naur Date: Fri, 28 Aug 2020 17:37:46 +0200 Subject: [PATCH] shader: remove dead code Signed-off-by: Elias Naur --- piet-gpu/shader/binning.comp | 13 ----- piet-gpu/shader/coarse.comp | 100 -------------------------------- piet-gpu/shader/kernel4.comp | 3 - piet-gpu/shader/tile_alloc.comp | 1 - 4 files changed, 117 deletions(-) diff --git a/piet-gpu/shader/binning.comp b/piet-gpu/shader/binning.comp index 1413927..3ed507b 100644 --- a/piet-gpu/shader/binning.comp +++ b/piet-gpu/shader/binning.comp @@ -33,8 +33,6 @@ layout(set = 0, binding = 2) buffer BinsBuf { #define SX (1.0 / float(N_TILE_X * TILE_WIDTH_PX)) #define SY (1.0 / float(N_TILE_Y * TILE_HEIGHT_PX)) -#define TSY (1.0 / float(TILE_HEIGHT_PX)) - // Constant not available in GLSL. Also consider uintBitsToFloat(0x7f800000) #define INFINITY (1.0 / 0.0) @@ -47,7 +45,6 @@ shared uint sh_chunk_start[N_TILE]; shared float sh_right_edge[N_TILE]; void main() { - uint chunk_n = 0; uint my_n_elements = n_elements; uint my_partition = gl_WorkGroupID.x; @@ -65,17 +62,7 @@ void main() { } int x0 = 0, y0 = 0, x1 = 0, y1 = 0; float my_right_edge = INFINITY; - // bool crosses_edge = false; switch (tag) { - // case Annotated_FillLine: - // case Annotated_StrokeLine: - // AnnoStrokeLineSeg line = Annotated_StrokeLine_read(ref); - // x0 = int(floor((min(line.p0.x, line.p1.x) - line.stroke.x) * SX)); - // y0 = int(floor((min(line.p0.y, line.p1.y) - line.stroke.y) * SY)); - // x1 = int(ceil((max(line.p0.x, line.p1.x) + line.stroke.x) * SX)); - // y1 = int(ceil((max(line.p0.y, line.p1.y) + line.stroke.y) * SY)); - // crosses_edge = tag == Annotated_FillLine && ceil(line.p0.y * TSY) != ceil(line.p1.y * TSY); - // break; case Annotated_Fill: case Annotated_Stroke: // Note: we take advantage of the fact that fills and strokes diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index 3dcd856..56b3c07 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -63,10 +63,6 @@ shared uint sh_tile_y0[N_TILE]; shared uint sh_tile_base[N_TILE]; shared uint sh_tile_stride[N_TILE]; -// scale factors useful for converting coordinates to tiles -#define SX (1.0 / float(TILE_WIDTH_PX)) -#define SY (1.0 / float(TILE_HEIGHT_PX)) - // Perhaps cmd_limit should be a global? This is a style question. void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) { if (cmd_ref.offset > cmd_limit) { @@ -84,8 +80,6 @@ void main() { uint bin_ix = N_TILE_X * gl_WorkGroupID.y + gl_WorkGroupID.x; uint partition_ix = 0; uint n_partitions = (n_elements + N_TILE - 1) / N_TILE; - // Top left coordinates of this bin. - vec2 xy0 = vec2(N_TILE_X * TILE_WIDTH_PX * gl_WorkGroupID.x, N_TILE_Y * TILE_HEIGHT_PX * gl_WorkGroupID.y); uint th_ix = gl_LocalInvocationID.x; // Coordinates of top left of bin, in tiles. @@ -105,7 +99,6 @@ void main() { // Items between part_start_ix and ready_ix are ready to be transferred from sh_part_elements uint part_start_ix = 0; uint ready_ix = 0; - int backdrop = 0; while (true) { for (uint i = 0; i < N_SLICE; i++) { sh_bitmaps[i][th_ix] = 0; @@ -243,99 +236,6 @@ void main() { barrier(); - // We've computed coverage and other info for each element in the input, now for - // the output stage. We'll do segments first using a more parallel algorithm. - - /* - uint seg_count = 0; - for (uint i = 0; i < N_SLICE; i++) { - seg_count += bitCount(sh_bitmaps[i][th_ix] & sh_is_segment[i]); - } - sh_seg_count[th_ix] = seg_count; - // Prefix sum of sh_seg_count - for (uint i = 0; i < LG_N_TILE; i++) { - barrier(); - if (th_ix >= (1 << i)) { - seg_count += sh_seg_count[th_ix - (1 << i)]; - } - barrier(); - sh_seg_count[th_ix] = seg_count; - } - if (th_ix == N_TILE - 1) { - sh_seg_alloc = atomicAdd(alloc, seg_count * Segment_size); - } - barrier(); - uint total_seg_count = sh_seg_count[N_TILE - 1]; - uint seg_alloc = sh_seg_alloc; - - // Output buffer is allocated as segments for each tile laid end-to-end. - - for (uint ix = th_ix; ix < total_seg_count; ix += N_TILE) { - // Find the work item; this thread is now not bound to an element or tile. - // First find the tile (by binary search) - uint tile_ix = 0; - for (uint i = 0; i < LG_N_TILE; i++) { - uint probe = tile_ix + ((N_TILE / 2) >> i); - if (ix >= sh_seg_count[probe - 1]) { - tile_ix = probe; - } - } - // Now, sh_seg_count[tile_ix - 1] <= ix < sh_seg_count[tile_ix]. - // (considering sh_seg_count[-1] == 0) - - // Index of segment within tile's segments - uint seq_ix = ix; - // Maybe consider a sentinel value to avoid the conditional? - if (tile_ix > 0) { - seq_ix -= sh_seg_count[tile_ix - 1]; - } - // Find the segment. This is done by linear scan through the bitmaps of the - // tile, accelerated by bit counting. Binary search might help, maybe not. - uint slice_ix = 0; - uint seq_bits; - - while (true) { - seq_bits = sh_bitmaps[slice_ix][tile_ix] & sh_is_segment[slice_ix]; - uint this_count = bitCount(seq_bits); - if (this_count > seq_ix) { - break; - } - seq_ix -= this_count; - slice_ix++; - } - // Now find position of nth bit set (n = seq_ix) in seq_bits; binary search - uint bit_ix = 0; - for (int i = 0; i < 5; i++) { - uint probe = bit_ix + (16 >> i); - if (seq_ix >= bitCount(seq_bits & ((1 << probe) - 1))) { - bit_ix = probe; - } - } - uint out_offset = seg_alloc + Segment_size * ix + SegChunk_size; - uint rd_el_ix = slice_ix * 32 + bit_ix; - uint element_ix = sh_elements[rd_el_ix]; - ref = AnnotatedRef(element_ix * Annotated_size); - AnnoFillLineSeg line = Annotated_FillLine_read(ref); - float y_edge = 0.0; - // This is basically the same logic as piet-metal, but should be made numerically robust. - if (Annotated_tag(ref) == Annotated_FillLine) { - vec2 tile_xy = xy0 + vec2((tile_ix % N_TILE_X) * TILE_WIDTH_PX, (tile_ix / N_TILE_X) * TILE_HEIGHT_PX); - y_edge = mix(line.p0.y, line.p1.y, (tile_xy.x - line.p0.x) / (line.p1.x - line.p0.x)); - if (min(line.p0.x, line.p1.x) < tile_xy.x && y_edge >= tile_xy.y && y_edge < tile_xy.y + TILE_HEIGHT_PX) { - if (line.p0.x > line.p1.x) { - line.p1 = vec2(tile_xy.x, y_edge); - } else { - line.p0 = vec2(tile_xy.x, y_edge); - } - } else { - y_edge = 1e9; - } - } - Segment seg = Segment(line.p0, line.p1, y_edge); - Segment_write(SegmentRef(seg_alloc + Segment_size * ix), seg); - } - */ - // Output non-segment elements for this tile. The thread does a sequential walk // through the non-segment elements, and for segments, count and backdrop are // aggregated using bit counting. diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index 7727b2c..70c879f 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -6,7 +6,6 @@ #version 450 #extension GL_GOOGLE_include_directive : enable -#extension GL_KHR_shader_subgroup_basic : enable #include "setup.h" @@ -34,8 +33,6 @@ void main() { uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y); vec2 xy = vec2(xy_uint); - vec2 uv = xy * vec2(1.0 / IMAGE_WIDTH, 1.0 / IMAGE_HEIGHT); - //vec3 rgb = uv.xyy; vec3 rgb[CHUNK]; for (uint i = 0; i < CHUNK; i++) { rgb[i] = vec3(0.5); diff --git a/piet-gpu/shader/tile_alloc.comp b/piet-gpu/shader/tile_alloc.comp index 593b87a..ee5d1db 100644 --- a/piet-gpu/shader/tile_alloc.comp +++ b/piet-gpu/shader/tile_alloc.comp @@ -65,7 +65,6 @@ void main() { Path path; path.bbox = uvec4(x0, y0, x1, y1); uint tile_count = (x1 - x0) * (y1 - y0); - uint n_tiles = tile_count; sh_tile_count[th_ix] = tile_count; // Prefix sum of sh_tile_count