// Copyright 2022 Google LLC // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Also licensed under MIT license, at your choice. // The binning stage #import config #import drawtag #import bbox @group(0) @binding(0) var config: Config; @group(0) @binding(1) var draw_monoids: array; @group(0) @binding(2) var path_bbox_buf: array; @group(0) @binding(3) var clip_bbox_buf: array>; @group(0) @binding(4) var intersected_bbox: array>; // TODO: put into shared include // TODO: robust memory (failure flags) struct BumpAllocators { binning: atomic, } @group(0) @binding(5) var bump: BumpAllocators; @group(0) @binding(6) var bin_data: array; struct BinHeader { element_count: u32, chunk_offset: u32, } @group(0) @binding(7) var bin_header: array; // These should probably be in a common block. let TILE_WIDTH = 16u; let TILE_HEIGHT = 16u; // Number of tiles per bin let N_TILE_X = 16u; let N_TILE_Y = 16u; let N_TILE = N_TILE_X * N_TILE_Y; // conversion factors from coordinates to bin let SX = 1.0 / f32(N_TILE_X * TILE_WIDTH); let SY = 1.0 / f32(N_TILE_Y * TILE_HEIGHT); let WG_SIZE = 256u; let N_SLICE = WG_SIZE / 32u; var sh_bitmaps: array, N_TILE>, N_SLICE>; var sh_count: array, N_SLICE>; var sh_chunk_offset: array; @compute @workgroup_size(256) fn main( @builtin(global_invocation_id) global_id: vec3, @builtin(local_invocation_id) local_id: vec3, @builtin(workgroup_id) wg_id: vec3, ) { for (var i = 0u; i < N_SLICE; i += 1u) { atomicStore(&sh_bitmaps[i][local_id.x], 0u); } // Read inputs and determine coverage of bins let element_ix = global_id.x; var x0 = 0; var y0 = 0; var x1 = 0; var y1 = 0; if element_ix < config.n_drawobj { let draw_monoid = draw_monoids[element_ix]; var clip_bbox = vec4(-1e9, -1e9, 1e9, 1e9); if draw_monoid.clip_ix > 0u { clip_bbox = clip_bbox_buf[draw_monoid.clip_ix - 1u]; } // For clip elements, clip_box is the bbox of the clip path, // intersected with enclosing clips. // For other elements, it is the bbox of the enclosing clips. // TODO check this is true let path_bbox = path_bbox_buf[draw_monoid.path_ix]; let pb = vec4(path_bbox.x0, path_bbox.y0, path_bbox.x1, path_bbox.y1); let bbox = bbox_intersect(clip_bbox, pb); bbox.zw = max(bbox.xy, bbox.zw); intersected_bbox[element_ix] = bbox; x0 = i32(floor(bbox.x * SX)); y0 = i32(floor(bbox.y * SY)); x1 = i32(ceil(bbox.z * SX)); y1 = i32(ceil(bbox.w * SY)); } let width_in_bins = i32((config.width_in_tiles + N_TILE_X - 1u) / N_TILE_X); let height_in_bins = i32((config.height_in_tiles + N_TILE_Y - 1u) / N_TILE_Y); x0 = clamp(x0, 0, width_in_bins); y0 = clamp(y0, 0, height_in_bins); x1 = clamp(x1, 0, width_in_bins); y1 = clamp(y1, 0, height_in_bins); if x0 == x1 { y1 = y0; } var x = x0; var y = y0; let my_slice = local_id.x / 32u; let my_mask = 1u << (local_id.x & 31u); while y < y1 { atomicOr(&sh_bitmaps[my_slice][y * width_in_bins + x], my_mask); x += 1u; if x == x1 { x = x0; y += 1; } } workgroupBarrier(); // Allocate output segments var element_count = 0u; for (var i = 0u; i < N_SLICE; i += 1u) { elementCount += countOneBits(atomicLoad(&sh_bitmaps[i][local_id.x])); sh_count[i][id_ix] = element_count; } // element_count is the number of draw objects covering this thread's bin let chunk_offset = atomicAdd(&bump.binning, element_count); sh_chunk_offset[local_id.x] = chunk_offset; bin_header[global_id.x].element_count = element_count; bin_header[global_id.x].chunk_offset = chunk_offset; workgroupBarrier(); // loop over bbox of bins touched by this draw object x = x0; y = y0; while y < y1 { let bin_ix = y * width_in_bins + x; let out_mask = atomicLoad(&sh_bitmaps[my_slice][bin_ix]); // I think this predicate will always be true... if (out_mask & my_mask) != 0 { var idx = countOneBits(out_mask & (my_mask - 1u)); if my_slice > 0 { idx += sh_count[my_slice - 1u][bin_ix]; } let offset = sh_chunk_offset[bin_ix]; bin_data[offset + idx] = element_ix; } x += 1u; if x == x1 { x = x0; y += 1u; } } }