From c749addf6cfcb693397c50c3259c19a3f4addc8e Mon Sep 17 00:00:00 2001 From: Chad Brokaw Date: Mon, 9 May 2022 22:39:59 -0400 Subject: [PATCH] rebase on timer query patch --- Cargo.lock | 1 + piet-gpu-hal/Cargo.toml | 1 + piet-gpu-hal/examples/collatz.rs | 8 +- piet-gpu-hal/src/backend.rs | 36 ++- piet-gpu-hal/src/dx12.rs | 35 ++- piet-gpu-hal/src/dx12/wrappers.rs | 3 - piet-gpu-hal/src/hub.rs | 91 +++++--- piet-gpu-hal/src/lib.rs | 24 +- piet-gpu-hal/src/metal.rs | 321 +++++++++++++++++++++++--- piet-gpu-hal/src/metal/timer.rs | 172 ++++++++++++++ piet-gpu-hal/src/mux.rs | 17 ++ piet-gpu-hal/src/vulkan.rs | 55 +++-- piet-gpu/bin/cli.rs | 8 +- piet-gpu/bin/winit.rs | 45 ++-- piet-gpu/shader/coarse.comp | 2 +- piet-gpu/shader/gen/binning.dxil | Bin 6336 -> 6336 bytes piet-gpu/shader/gen/coarse.dxil | Bin 11652 -> 11628 bytes piet-gpu/shader/gen/coarse.hlsl | 79 +++---- piet-gpu/shader/gen/coarse.msl | 61 +++-- piet-gpu/shader/gen/coarse.spv | Bin 58964 -> 58852 bytes piet-gpu/shader/gen/draw_leaf.dxil | Bin 6760 -> 6764 bytes piet-gpu/shader/gen/draw_reduce.dxil | Bin 4264 -> 4260 bytes piet-gpu/shader/gen/draw_root.dxil | Bin 4468 -> 4468 bytes piet-gpu/shader/gen/kernel4.dxil | Bin 15108 -> 15112 bytes piet-gpu/shader/gen/kernel4_gray.dxil | Bin 15016 -> 15016 bytes piet-gpu/shader/gen/tile_alloc.dxil | Bin 5136 -> 5132 bytes piet-gpu/src/encoder.rs | 156 ------------- piet-gpu/src/lib.rs | 98 ++++---- piet-gpu/src/render_ctx.rs | 6 +- piet-gpu/src/stages.rs | 10 +- piet-gpu/src/stages/clip.rs | 12 +- piet-gpu/src/stages/draw.rs | 14 +- piet-gpu/src/stages/path.rs | 16 +- piet-gpu/src/stages/transform.rs | 14 +- piet-gpu/src/test_scenes.rs | 7 +- tests/src/clear.rs | 19 +- tests/src/clip.rs | 6 +- tests/src/draw.rs | 6 +- tests/src/linkedlist.rs | 6 +- tests/src/message_passing.rs | 6 +- tests/src/path.rs | 6 +- tests/src/prefix.rs | 6 +- tests/src/prefix_tree.rs | 14 +- tests/src/runner.rs | 14 +- tests/src/transform.rs | 6 +- 45 files changed, 887 insertions(+), 494 deletions(-) create mode 100644 piet-gpu-hal/src/metal/timer.rs diff --git a/Cargo.lock b/Cargo.lock index 4e98c9e..cb6b76a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -925,6 +925,7 @@ dependencies = [ "block", "bytemuck", "cocoa-foundation", + "foreign-types", "metal", "objc", "raw-window-handle 0.3.4", diff --git a/piet-gpu-hal/Cargo.toml b/piet-gpu-hal/Cargo.toml index 29b51bd..f9b844a 100644 --- a/piet-gpu-hal/Cargo.toml +++ b/piet-gpu-hal/Cargo.toml @@ -28,3 +28,4 @@ metal = "0.22" objc = "0.2.5" block = "0.1.6" cocoa-foundation = "0.1" +foreign-types = "0.3.2" diff --git a/piet-gpu-hal/examples/collatz.rs b/piet-gpu-hal/examples/collatz.rs index dae5b31..7aff938 100644 --- a/piet-gpu-hal/examples/collatz.rs +++ b/piet-gpu-hal/examples/collatz.rs @@ -1,4 +1,4 @@ -use piet_gpu_hal::{include_shader, BindType}; +use piet_gpu_hal::{include_shader, BindType, ComputePassDescriptor}; use piet_gpu_hal::{BufferUsage, Instance, InstanceFlags, Session}; fn main() { @@ -20,9 +20,9 @@ fn main() { let mut cmd_buf = session.cmd_buf().unwrap(); cmd_buf.begin(); cmd_buf.reset_query_pool(&query_pool); - cmd_buf.write_timestamp(&query_pool, 0); - cmd_buf.dispatch(&pipeline, &descriptor_set, (256, 1, 1), (1, 1, 1)); - cmd_buf.write_timestamp(&query_pool, 1); + let mut pass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(&query_pool, 0, 1)); + pass.dispatch(&pipeline, &descriptor_set, (256, 1, 1), (1, 1, 1)); + pass.end(); cmd_buf.finish_timestamps(&query_pool); cmd_buf.host_barrier(); cmd_buf.finish(); diff --git a/piet-gpu-hal/src/backend.rs b/piet-gpu-hal/src/backend.rs index 02ac7cb..f2c67a1 100644 --- a/piet-gpu-hal/src/backend.rs +++ b/piet-gpu-hal/src/backend.rs @@ -17,7 +17,8 @@ //! The generic trait for backends to implement. use crate::{ - BindType, BufferUsage, Error, GpuInfo, ImageFormat, ImageLayout, MapMode, SamplerParams, + BindType, BufferUsage, ComputePassDescriptor, Error, GpuInfo, ImageFormat, ImageLayout, + MapMode, SamplerParams, }; pub trait Device: Sized { @@ -159,14 +160,32 @@ pub trait Device: Sized { unsafe fn create_sampler(&self, params: SamplerParams) -> Result; } +/// The trait implemented by backend command buffer implementations. +/// +/// Valid encoding is represented by a state machine (currently not validated +/// but it is easy to imagine there might be at least debug validation). Most +/// methods are only valid in a particular state, and some move it to another +/// state. pub trait CmdBuf { + /// Begin encoding. + /// + /// State: init -> ready unsafe fn begin(&mut self); + /// State: ready -> finished unsafe fn finish(&mut self); /// Return true if the command buffer is suitable for reuse. unsafe fn reset(&mut self) -> bool; + /// Begin a compute pass. + /// + /// State: ready -> in_compute_pass + unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor); + + /// Dispatch + /// + /// State: in_compute_pass unsafe fn dispatch( &mut self, pipeline: &D::Pipeline, @@ -175,6 +194,9 @@ pub trait CmdBuf { workgroup_size: (u32, u32, u32), ); + /// State: in_compute_pass -> ready + unsafe fn end_compute_pass(&mut self); + /// Insert an execution and memory barrier. /// /// Compute kernels (and other actions) after this barrier may read from buffers @@ -202,16 +224,16 @@ pub trait CmdBuf { /// This is readily supported in Vulkan, but for portability it is remarkably /// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute /// kernel, or organize the code not to need it. - unsafe fn clear_buffer(&self, buffer: &D::Buffer, size: Option); + unsafe fn clear_buffer(&mut self, buffer: &D::Buffer, size: Option); - unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer); + unsafe fn copy_buffer(&mut self, src: &D::Buffer, dst: &D::Buffer); - unsafe fn copy_image_to_buffer(&self, src: &D::Image, dst: &D::Buffer); + unsafe fn copy_image_to_buffer(&mut self, src: &D::Image, dst: &D::Buffer); - unsafe fn copy_buffer_to_image(&self, src: &D::Buffer, dst: &D::Image); + unsafe fn copy_buffer_to_image(&mut self, src: &D::Buffer, dst: &D::Image); // low portability, dx12 doesn't support it natively - unsafe fn blit_image(&self, src: &D::Image, dst: &D::Image); + unsafe fn blit_image(&mut self, src: &D::Image, dst: &D::Image); /// Reset the query pool. /// @@ -227,7 +249,7 @@ pub trait CmdBuf { unsafe fn finish_timestamps(&mut self, _pool: &D::QueryPool) {} /// Begin a labeled section for debugging and profiling purposes. - unsafe fn begin_debug_label(&mut self, label: &str) {} + unsafe fn begin_debug_label(&mut self, _label: &str) {} /// End a section opened by `begin_debug_label`. unsafe fn end_debug_label(&mut self) {} diff --git a/piet-gpu-hal/src/dx12.rs b/piet-gpu-hal/src/dx12.rs index 78ad449..c5e1e04 100644 --- a/piet-gpu-hal/src/dx12.rs +++ b/piet-gpu-hal/src/dx12.rs @@ -21,7 +21,7 @@ use raw_window_handle::{HasRawWindowHandle, RawWindowHandle}; use smallvec::SmallVec; -use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, MapMode, WorkgroupLimits, ImageFormat}; +use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, MapMode, WorkgroupLimits, ImageFormat, ComputePassDescriptor}; use self::{ descriptor::{CpuHeapRefOwned, DescriptorPool, GpuHeapRefOwned}, @@ -76,6 +76,7 @@ pub struct CmdBuf { c: wrappers::GraphicsCommandList, allocator: CommandAllocator, needs_reset: bool, + end_query: Option<(wrappers::QueryHeap, u32)>, } pub struct Pipeline { @@ -360,6 +361,7 @@ impl crate::backend::Device for Dx12Device { c, allocator, needs_reset: false, + end_query: None, }) } } @@ -388,11 +390,10 @@ impl crate::backend::Device for Dx12Device { let mapped = self.map_buffer(&pool.buf, 0, size as u64, MapMode::Read)?; std::ptr::copy_nonoverlapping(mapped, buf.as_mut_ptr() as *mut u8, size); self.unmap_buffer(&pool.buf, 0, size as u64, MapMode::Read)?; - let ts0 = buf[0]; let tsp = (self.ts_freq as f64).recip(); - let result = buf[1..] + let result = buf .iter() - .map(|ts| ts.wrapping_sub(ts0) as f64 * tsp) + .map(|ts| *ts as f64 * tsp) .collect(); Ok(result) } @@ -610,6 +611,16 @@ impl crate::backend::CmdBuf for CmdBuf { self.allocator.reset().is_ok() && self.c.reset(&self.allocator, None).is_ok() } + unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor) { + if let Some((pool, start, end)) = &desc.timer_queries { + #[allow(irrefutable_let_patterns)] + if let crate::hub::QueryPool::Dx12(pool) = pool { + self.write_timestamp(pool, *start); + self.end_query = Some((pool.heap.clone(), *end)); + } + } + } + unsafe fn dispatch( &mut self, pipeline: &Pipeline, @@ -628,6 +639,12 @@ impl crate::backend::CmdBuf for CmdBuf { .dispatch(workgroup_count.0, workgroup_count.1, workgroup_count.2); } + unsafe fn end_compute_pass(&mut self) { + if let Some((heap, end)) = self.end_query.take() { + self.c.end_timing_query(&heap, end); + } + } + unsafe fn memory_barrier(&mut self) { // See comments in CommandBuffer::pipeline_barrier in gfx-hal dx12 backend. // The "proper" way to do this would be to name the actual buffers participating @@ -666,7 +683,7 @@ impl crate::backend::CmdBuf for CmdBuf { self.memory_barrier(); } - unsafe fn clear_buffer(&self, buffer: &Buffer, size: Option) { + unsafe fn clear_buffer(&mut self, buffer: &Buffer, size: Option) { let cpu_ref = buffer.cpu_ref.as_ref().unwrap(); let (gpu_ref, heap) = buffer .gpu_ref @@ -684,23 +701,23 @@ impl crate::backend::CmdBuf for CmdBuf { ); } - unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) { + unsafe fn copy_buffer(&mut self, src: &Buffer, dst: &Buffer) { // TODO: consider using copy_resource here (if sizes match) let size = src.size.min(dst.size); self.c.copy_buffer(&dst.resource, 0, &src.resource, 0, size); } - unsafe fn copy_image_to_buffer(&self, src: &Image, dst: &Buffer) { + unsafe fn copy_image_to_buffer(&mut self, src: &Image, dst: &Buffer) { self.c .copy_texture_to_buffer(&src.resource, &dst.resource, src.size.0, src.size.1); } - unsafe fn copy_buffer_to_image(&self, src: &Buffer, dst: &Image) { + unsafe fn copy_buffer_to_image(&mut self, src: &Buffer, dst: &Image) { self.c .copy_buffer_to_texture(&src.resource, &dst.resource, dst.size.0, dst.size.1); } - unsafe fn blit_image(&self, src: &Image, dst: &Image) { + unsafe fn blit_image(&mut self, src: &Image, dst: &Image) { self.c.copy_resource(&src.resource, &dst.resource); } diff --git a/piet-gpu-hal/src/dx12/wrappers.rs b/piet-gpu-hal/src/dx12/wrappers.rs index 4bbb86c..9a3fb90 100644 --- a/piet-gpu-hal/src/dx12/wrappers.rs +++ b/piet-gpu-hal/src/dx12/wrappers.rs @@ -79,7 +79,6 @@ pub struct Blob(pub ComPtr); #[derive(Clone)] pub struct ShaderByteCode { pub bytecode: d3d12::D3D12_SHADER_BYTECODE, - blob: Option, } #[derive(Clone)] @@ -741,7 +740,6 @@ impl ShaderByteCode { BytecodeLength: blob.0.GetBufferSize(), pShaderBytecode: blob.0.GetBufferPointer(), }, - blob: Some(blob), } } @@ -810,7 +808,6 @@ impl ShaderByteCode { BytecodeLength: bytecode.len(), pShaderBytecode: bytecode.as_ptr() as *const _, }, - blob: None, } } } diff --git a/piet-gpu-hal/src/hub.rs b/piet-gpu-hal/src/hub.rs index cc09832..ea17754 100644 --- a/piet-gpu-hal/src/hub.rs +++ b/piet-gpu-hal/src/hub.rs @@ -13,7 +13,7 @@ use std::sync::{Arc, Mutex, Weak}; use bytemuck::Pod; use smallvec::SmallVec; -use crate::{mux, BackendType, BufWrite, ImageFormat, MapMode}; +use crate::{mux, BackendType, BufWrite, ComputePassDescriptor, ImageFormat, MapMode}; use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, SamplerParams}; @@ -135,6 +135,11 @@ pub struct BufReadGuard<'a> { size: u64, } +/// A sub-object of a command buffer for a sequence of compute dispatches. +pub struct ComputePass<'a> { + cmd_buf: &'a mut CmdBuf, +} + impl Session { /// Create a new session, choosing the best backend. pub fn new(device: mux::Device) -> Session { @@ -370,8 +375,17 @@ impl Session { /// /// This should be called after waiting on the command buffer that wrote the /// timer queries. + /// + /// The returned vector is one shorter than the number of timer queries in the + /// pool; the first value is subtracted off. It would likely be better to return + /// the raw timestamps, but that change should be made consistently. pub unsafe fn fetch_query_pool(&self, pool: &QueryPool) -> Result, Error> { - self.0.device.fetch_query_pool(pool) + let result = self.0.device.fetch_query_pool(pool)?; + // Subtract off first timestamp. + Ok(result[1..] + .iter() + .map(|ts| *ts as f64 - result[0]) + .collect()) } #[doc(hidden)] @@ -471,23 +485,10 @@ impl CmdBuf { self.cmd_buf().finish(); } - /// Dispatch a compute shader. - /// - /// Request a compute shader to be run, using the pipeline to specify the - /// code, and the descriptor set to address the resources read and written. - /// - /// Both the workgroup count (number of workgroups) and the workgroup size - /// (number of threads in a workgroup) must be specified here, though not - /// all back-ends require the latter info. - pub unsafe fn dispatch( - &mut self, - pipeline: &Pipeline, - descriptor_set: &DescriptorSet, - workgroup_count: (u32, u32, u32), - workgroup_size: (u32, u32, u32), - ) { - self.cmd_buf() - .dispatch(pipeline, descriptor_set, workgroup_count, workgroup_size); + /// Begin a compute pass. + pub unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor) -> ComputePass { + self.cmd_buf().begin_compute_pass(desc); + ComputePass { cmd_buf: self } } /// Insert an execution and memory barrier. @@ -582,13 +583,6 @@ impl CmdBuf { self.cmd_buf().reset_query_pool(pool); } - /// Write a timestamp. - /// - /// The query index must be less than the size of the query pool on creation. - pub unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) { - self.cmd_buf().write_timestamp(pool, query); - } - /// Prepare the timestamps for reading. This isn't required on Vulkan but /// is required on (at least) DX12. /// @@ -692,6 +686,51 @@ impl Drop for SubmittedCmdBuf { } } +impl<'a> ComputePass<'a> { + /// Dispatch a compute shader. + /// + /// Request a compute shader to be run, using the pipeline to specify the + /// code, and the descriptor set to address the resources read and written. + /// + /// Both the workgroup count (number of workgroups) and the workgroup size + /// (number of threads in a workgroup) must be specified here, though not + /// all back-ends require the latter info. + pub unsafe fn dispatch( + &mut self, + pipeline: &Pipeline, + descriptor_set: &DescriptorSet, + workgroup_count: (u32, u32, u32), + workgroup_size: (u32, u32, u32), + ) { + self.cmd_buf + .cmd_buf() + .dispatch(pipeline, descriptor_set, workgroup_count, workgroup_size); + } + + /// Add a memory barrier. + /// + /// Inserts a memory barrier in the compute encoder. This is a convenience + /// function for calling the same function on the underlying command buffer, + /// avoiding borrow check issues. + pub unsafe fn memory_barrier(&mut self) { + self.cmd_buf.memory_barrier(); + } + + /// Begin a labeled section for debugging and profiling purposes. + pub unsafe fn begin_debug_label(&mut self, label: &str) { + self.cmd_buf.begin_debug_label(label); + } + + /// End a section opened by `begin_debug_label`. + pub unsafe fn end_debug_label(&mut self) { + self.cmd_buf.end_debug_label(); + } + + pub unsafe fn end(self) { + self.cmd_buf.cmd_buf().end_compute_pass(); + } +} + impl Drop for BufferInner { fn drop(&mut self) { if let Some(session) = Weak::upgrade(&self.session) { diff --git a/piet-gpu-hal/src/lib.rs b/piet-gpu-hal/src/lib.rs index fab7d65..a1073f4 100644 --- a/piet-gpu-hal/src/lib.rs +++ b/piet-gpu-hal/src/lib.rs @@ -21,8 +21,8 @@ pub use crate::mux::{ }; pub use bufwrite::BufWrite; pub use hub::{ - BufReadGuard, BufWriteGuard, Buffer, CmdBuf, DescriptorSetBuilder, Image, RetainResource, - Session, SubmittedCmdBuf, + BufReadGuard, BufWriteGuard, Buffer, CmdBuf, ComputePass, DescriptorSetBuilder, Image, + RetainResource, Session, SubmittedCmdBuf, }; // TODO: because these are conditionally included, "cargo fmt" does not @@ -189,3 +189,23 @@ pub struct WorkgroupLimits { /// dimension. pub max_invocations: u32, } + +/// Options for creating a compute pass. +#[derive(Default)] +pub struct ComputePassDescriptor<'a> { + // Maybe label should go here? It does in wgpu and wgpu_hal. + /// Timer query parameters. + /// + /// To record timer queries for a compute pass, set the query pool, start + /// query index, and end query index here. The indices must be less than + /// the size of the query pool. + timer_queries: Option<(&'a QueryPool, u32, u32)>, +} + +impl<'a> ComputePassDescriptor<'a> { + pub fn timer(pool: &'a QueryPool, start_query: u32, end_query: u32) -> ComputePassDescriptor { + ComputePassDescriptor { + timer_queries: Some((pool, start_query, end_query)), + } + } +} diff --git a/piet-gpu-hal/src/metal.rs b/piet-gpu-hal/src/metal.rs index e3157d4..307def8 100644 --- a/piet-gpu-hal/src/metal.rs +++ b/piet-gpu-hal/src/metal.rs @@ -15,25 +15,32 @@ // Also licensed under MIT license, at your choice. mod clear; +mod timer; mod util; use std::mem; use std::sync::{Arc, Mutex}; +use block::Block; use cocoa_foundation::base::id; use cocoa_foundation::foundation::{NSInteger, NSUInteger}; +use foreign_types::ForeignType; use objc::rc::autoreleasepool; use objc::runtime::{Object, BOOL, YES}; use objc::{class, msg_send, sel, sel_impl}; -use metal::{CGFloat, MTLFeatureSet}; +use metal::{CGFloat, CommandBufferRef, MTLFeatureSet}; use raw_window_handle::{HasRawWindowHandle, RawWindowHandle}; -use crate::{BufferUsage, Error, GpuInfo, ImageFormat, MapMode, WorkgroupLimits}; +use crate::{ + BufferUsage, ComputePassDescriptor, Error, GpuInfo, ImageFormat, MapMode, WorkgroupLimits, +}; use util::*; +use self::timer::{CounterSampleBuffer, CounterSet, TimeCalibration}; + pub struct MtlInstance; pub struct MtlDevice { @@ -41,6 +48,18 @@ pub struct MtlDevice { cmd_queue: Arc>, gpu_info: GpuInfo, helpers: Arc, + timer_set: Option, + counter_style: CounterStyle, +} + +/// Type of counter sampling. +/// +/// See https://developer.apple.com/documentation/metal/counter_sampling/sampling_gpu_data_into_counter_sample_buffers +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +enum CounterStyle { + None, + Stage, + Command, } pub struct MtlSurface { @@ -81,9 +100,22 @@ pub struct Semaphore; pub struct CmdBuf { cmd_buf: metal::CommandBuffer, helpers: Arc, + cur_encoder: Encoder, + time_calibration: Arc>, + counter_style: CounterStyle, } -pub struct QueryPool; +enum Encoder { + None, + Compute(metal::ComputeCommandEncoder, Option<(id, u32)>), + Blit(metal::BlitCommandEncoder), +} + +#[derive(Default)] +pub struct QueryPool { + counter_sample_buf: Option, + calibration: Arc>>>>, +} pub struct Pipeline(metal::ComputePipelineState); @@ -209,18 +241,43 @@ impl MtlDevice { let helpers = Arc::new(Helpers { clear_pipeline: clear::make_clear_pipeline(&device), }); + // Timer stuff + let timer_set = CounterSet::get_timer_counter_set(&device); + let counter_style = if timer_set.is_some() { + if device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtStageBoundary) { + CounterStyle::Stage + } else if device + .supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDispatchBoundary) + { + CounterStyle::Command + } else { + CounterStyle::None + } + } else { + CounterStyle::None + }; MtlDevice { device, cmd_queue: Arc::new(Mutex::new(cmd_queue)), gpu_info, helpers, + timer_set, + counter_style, } } pub fn cmd_buf_from_raw_mtl(&self, raw_cmd_buf: metal::CommandBuffer) -> CmdBuf { let cmd_buf = raw_cmd_buf; let helpers = self.helpers.clone(); - CmdBuf { cmd_buf, helpers } + let cur_encoder = Encoder::None; + let time_calibration = Default::default(); + CmdBuf { + cmd_buf, + helpers, + cur_encoder, + time_calibration, + counter_style: self.counter_style, + } } pub fn image_from_raw_mtl(&self, texture: metal::Texture, width: u32, height: u32) -> Image { @@ -330,11 +387,35 @@ impl crate::backend::Device for MtlDevice { fn create_cmd_buf(&self) -> Result { let cmd_queue = self.cmd_queue.lock().unwrap(); + // A discussion about autorelease pools. + // + // Autorelease pools are a sore point in Rust/Objective-C interop. Basically, + // you can have any two of correctness, ergonomics, and performance. Here we've + // chosen the first two, using the pattern of a fine grained autorelease pool + // to give the Obj-C object Rust-like lifetime semantics whenever objects are + // created as autorelease (by convention, this is any object creation with an + // Obj-C method name that doesn't begin with "new" or "alloc"). + // + // To gain back some of the performance, we'd need a way to wrap an autorelease + // pool over a chunk of work - that could be one frame of rendering, but for + // tests that iterate a number of command buffer submissions, it would need to + // be around that. On non-mac platforms, it would be a no-op. + // + // In any case, this way, the caller doesn't need to worry, and the performance + // hit might not be so bad (perhaps we should measure). + // consider new_command_buffer_with_unretained_references for performance - let cmd_buf = cmd_queue.new_command_buffer(); - let cmd_buf = autoreleasepool(|| cmd_buf.to_owned()); + let cmd_buf = autoreleasepool(|| cmd_queue.new_command_buffer().to_owned()); let helpers = self.helpers.clone(); - Ok(CmdBuf { cmd_buf, helpers }) + let cur_encoder = Encoder::None; + let time_calibration = Default::default(); + Ok(CmdBuf { + cmd_buf, + helpers, + cur_encoder, + time_calibration, + counter_style: self.counter_style, + }) } unsafe fn destroy_cmd_buf(&self, _cmd_buf: Self::CmdBuf) -> Result<(), Error> { @@ -342,12 +423,31 @@ impl crate::backend::Device for MtlDevice { } fn create_query_pool(&self, n_queries: u32) -> Result { - // TODO - Ok(QueryPool) + if let Some(timer_set) = &self.timer_set { + let pool = CounterSampleBuffer::new(&self.device, n_queries as u64, timer_set) + .ok_or("error creating timer query pool")?; + return Ok(QueryPool { + counter_sample_buf: Some(pool), + calibration: Default::default(), + }); + } + Ok(QueryPool::default()) } unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result, Error> { - // TODO + if let Some(raw) = &pool.counter_sample_buf { + let resolved = raw.resolve(); + let calibration = pool.calibration.lock().unwrap(); + if let Some(calibration) = &*calibration { + let calibration = calibration.lock().unwrap(); + let result = resolved + .iter() + .map(|time_ns| calibration.correlate(*time_ns)) + .collect(); + return Ok(result); + } + } + // Maybe should return None indicating it wasn't successful? But that might break. Ok(Vec::new()) } @@ -358,7 +458,37 @@ impl crate::backend::Device for MtlDevice { _signal_semaphores: &[&Self::Semaphore], fence: Option<&mut Self::Fence>, ) -> Result<(), Error> { + unsafe fn add_scheduled_handler( + cmd_buf: &metal::CommandBufferRef, + block: &Block<(&CommandBufferRef,), ()>, + ) { + msg_send![cmd_buf, addScheduledHandler: block] + } for cmd_buf in cmd_bufs { + let time_calibration = cmd_buf.time_calibration.clone(); + let start_block = block::ConcreteBlock::new(move |buffer: &metal::CommandBufferRef| { + let device: id = msg_send![buffer, device]; + let mut time_calibration = time_calibration.lock().unwrap(); + let cpu_ts_ptr = &mut time_calibration.cpu_start_ts as *mut _; + let gpu_ts_ptr = &mut time_calibration.gpu_start_ts as *mut _; + // TODO: only do this if supported. + let () = msg_send![device, sampleTimestamps: cpu_ts_ptr gpuTimestamp: gpu_ts_ptr]; + }) + .copy(); + add_scheduled_handler(&cmd_buf.cmd_buf, &start_block); + let time_calibration = cmd_buf.time_calibration.clone(); + let completed_block = + block::ConcreteBlock::new(move |buffer: &metal::CommandBufferRef| { + let device: id = msg_send![buffer, device]; + let mut time_calibration = time_calibration.lock().unwrap(); + let cpu_ts_ptr = &mut time_calibration.cpu_end_ts as *mut _; + let gpu_ts_ptr = &mut time_calibration.gpu_end_ts as *mut _; + // TODO: only do this if supported. + let () = + msg_send![device, sampleTimestamps: cpu_ts_ptr gpuTimestamp: gpu_ts_ptr]; + }) + .copy(); + cmd_buf.cmd_buf.add_completed_handler(&completed_block); cmd_buf.cmd_buf.commit(); } if let Some(last_cmd_buf) = cmd_bufs.last() { @@ -439,12 +569,70 @@ impl crate::backend::Device for MtlDevice { impl crate::backend::CmdBuf for CmdBuf { unsafe fn begin(&mut self) {} - unsafe fn finish(&mut self) {} + unsafe fn finish(&mut self) { + self.flush_encoder(); + } unsafe fn reset(&mut self) -> bool { false } + unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor) { + // TODO: we might want to get better about validation but the following + // assert is likely to trigger, and also a case can be made that + // validation should be done at the hub level, for consistency. + //debug_assert!(matches!(self.cur_encoder, Encoder::None)); + self.flush_encoder(); + autoreleasepool(|| { + let (encoder, end_query) = match (&desc.timer_queries, self.counter_style) { + (Some(queries), CounterStyle::Stage) => { + let descriptor: id = + msg_send![class!(MTLComputePassDescriptor), computePassDescriptor]; + let attachments: id = msg_send![descriptor, sampleBufferAttachments]; + let index: NSUInteger = 0; + let attachment: id = msg_send![attachments, objectAtIndexedSubscript: index]; + // Here we break the hub/mux separation a bit, for expedience + #[allow(irrefutable_let_patterns)] + if let crate::hub::QueryPool::Mtl(query_pool) = queries.0 { + if let Some(sample_buf) = &query_pool.counter_sample_buf { + let () = msg_send![attachment, setSampleBuffer: sample_buf.id()]; + } + } + let start_index = queries.1 as NSUInteger; + let end_index = queries.2 as NSInteger; + let () = msg_send![attachment, setStartOfEncoderSampleIndex: start_index]; + let () = msg_send![attachment, setEndOfEncoderSampleIndex: end_index]; + ( + msg_send![ + self.cmd_buf, + computeCommandEncoderWithDescriptor: descriptor + ], + None, + ) + } + (Some(queries), CounterStyle::Command) => { + let encoder = self.cmd_buf.new_compute_command_encoder(); + #[allow(irrefutable_let_patterns)] + let end_query = if let crate::hub::QueryPool::Mtl(query_pool) = queries.0 { + if let Some(sample_buf) = &query_pool.counter_sample_buf { + let sample_index = queries.1 as NSUInteger; + let sample_buf = sample_buf.id(); + let () = msg_send![encoder, sampleCountersInBuffer: sample_buf atSampleIndex: sample_index withBarrier: true]; + Some((sample_buf, queries.2)) + } else { + None + } + } else { + None + }; + (encoder, end_query) + } + _ => (self.cmd_buf.new_compute_command_encoder(), None), + }; + self.cur_encoder = Encoder::Compute(encoder.to_owned(), end_query); + }); + } + unsafe fn dispatch( &mut self, pipeline: &Pipeline, @@ -452,7 +640,7 @@ impl crate::backend::CmdBuf for CmdBuf { workgroup_count: (u32, u32, u32), workgroup_size: (u32, u32, u32), ) { - let encoder = self.cmd_buf.new_compute_command_encoder(); + let encoder = self.compute_command_encoder(); encoder.set_compute_pipeline_state(&pipeline.0); let mut buf_ix = 0; for buffer in &descriptor_set.buffers { @@ -475,7 +663,11 @@ impl crate::backend::CmdBuf for CmdBuf { depth: workgroup_size.2 as u64, }; encoder.dispatch_thread_groups(workgroup_count, workgroup_size); - encoder.end_encoding(); + } + + unsafe fn end_compute_pass(&mut self) { + // TODO: might validate that we are in a compute encoder state + self.flush_encoder(); } unsafe fn memory_barrier(&mut self) { @@ -494,22 +686,23 @@ impl crate::backend::CmdBuf for CmdBuf { // I think these are being tracked. } - unsafe fn clear_buffer(&self, buffer: &Buffer, size: Option) { + unsafe fn clear_buffer(&mut self, buffer: &Buffer, size: Option) { let size = size.unwrap_or(buffer.size); - let encoder = self.cmd_buf.new_compute_command_encoder(); - clear::encode_clear(&encoder, &self.helpers.clear_pipeline, &buffer.buffer, size); - encoder.end_encoding() + let _ = self.compute_command_encoder(); + // Getting this directly is a workaround for a borrow checker issue. + if let Encoder::Compute(e, _) = &self.cur_encoder { + clear::encode_clear(e, &self.helpers.clear_pipeline, &buffer.buffer, size); + } } - unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) { - let encoder = self.cmd_buf.new_blit_command_encoder(); + unsafe fn copy_buffer(&mut self, src: &Buffer, dst: &Buffer) { + let encoder = self.blit_command_encoder(); let size = src.size.min(dst.size); encoder.copy_from_buffer(&src.buffer, 0, &dst.buffer, 0, size); - encoder.end_encoding(); } - unsafe fn copy_image_to_buffer(&self, src: &Image, dst: &Buffer) { - let encoder = self.cmd_buf.new_blit_command_encoder(); + unsafe fn copy_image_to_buffer(&mut self, src: &Image, dst: &Buffer) { + let encoder = self.blit_command_encoder(); assert_eq!(dst.size, (src.width as u64) * (src.height as u64) * 4); let bytes_per_row = (src.width * 4) as NSUInteger; let src_size = metal::MTLSize { @@ -530,11 +723,10 @@ impl crate::backend::CmdBuf for CmdBuf { bytes_per_row * src.height as NSUInteger, metal::MTLBlitOption::empty(), ); - encoder.end_encoding(); } - unsafe fn copy_buffer_to_image(&self, src: &Buffer, dst: &Image) { - let encoder = self.cmd_buf.new_blit_command_encoder(); + unsafe fn copy_buffer_to_image(&mut self, src: &Buffer, dst: &Image) { + let encoder = self.blit_command_encoder(); assert_eq!(src.size, (dst.width as u64) * (dst.height as u64) * 4); let bytes_per_row = (dst.width * 4) as NSUInteger; let src_size = metal::MTLSize { @@ -555,11 +747,10 @@ impl crate::backend::CmdBuf for CmdBuf { origin, metal::MTLBlitOption::empty(), ); - encoder.end_encoding(); } - unsafe fn blit_image(&self, src: &Image, dst: &Image) { - let encoder = self.cmd_buf.new_blit_command_encoder(); + unsafe fn blit_image(&mut self, src: &Image, dst: &Image) { + let encoder = self.blit_command_encoder(); let src_size = metal::MTLSize { width: src.width.min(dst.width) as NSUInteger, height: src.width.min(dst.height) as NSUInteger, @@ -577,15 +768,79 @@ impl crate::backend::CmdBuf for CmdBuf { 0, origin, ); - encoder.end_encoding(); } - unsafe fn reset_query_pool(&mut self, pool: &QueryPool) {} + unsafe fn reset_query_pool(&mut self, pool: &QueryPool) { + let mut calibration = pool.calibration.lock().unwrap(); + *calibration = Some(self.time_calibration.clone()); + } unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) { - // TODO - // This really a PITA because it's pretty different than Vulkan. - // See https://developer.apple.com/documentation/metal/counter_sampling + if let Some(buf) = &pool.counter_sample_buf { + if matches!(self.cur_encoder, Encoder::None) { + self.cur_encoder = + Encoder::Compute(self.cmd_buf.new_compute_command_encoder().to_owned(), None); + } + let sample_index = query as NSUInteger; + if self.counter_style == CounterStyle::Command { + match &self.cur_encoder { + Encoder::Compute(e, _) => { + let () = msg_send![e.as_ptr(), sampleCountersInBuffer: buf.id() atSampleIndex: sample_index withBarrier: true]; + } + Encoder::None => unreachable!(), + _ => todo!(), + } + } else if self.counter_style == CounterStyle::Stage { + match &self.cur_encoder { + Encoder::Compute(_e, _) => { + println!("write_timestamp is not supported for stage-style encoders"); + } + _ => (), + } + } + } + } +} + +impl CmdBuf { + fn compute_command_encoder(&mut self) -> &metal::ComputeCommandEncoder { + if !matches!(self.cur_encoder, Encoder::Compute(..)) { + self.flush_encoder(); + self.cur_encoder = + Encoder::Compute(self.cmd_buf.new_compute_command_encoder().to_owned(), None); + } + if let Encoder::Compute(e, _) = &self.cur_encoder { + e + } else { + unreachable!() + } + } + + fn blit_command_encoder(&mut self) -> &metal::BlitCommandEncoder { + if !matches!(self.cur_encoder, Encoder::Blit(_)) { + self.flush_encoder(); + self.cur_encoder = Encoder::Blit(self.cmd_buf.new_blit_command_encoder().to_owned()); + } + if let Encoder::Blit(e) = &self.cur_encoder { + e + } else { + unreachable!() + } + } + + fn flush_encoder(&mut self) { + match std::mem::replace(&mut self.cur_encoder, Encoder::None) { + Encoder::Compute(e, Some((sample_buf, end_query))) => { + let sample_index = end_query as NSUInteger; + unsafe { + let () = msg_send![e.as_ptr(), sampleCountersInBuffer: sample_buf atSampleIndex: sample_index withBarrier: true]; + } + e.end_encoding(); + } + Encoder::Compute(e, None) => e.end_encoding(), + Encoder::Blit(e) => e.end_encoding(), + Encoder::None => (), + } } } diff --git a/piet-gpu-hal/src/metal/timer.rs b/piet-gpu-hal/src/metal/timer.rs new file mode 100644 index 0000000..65c8026 --- /dev/null +++ b/piet-gpu-hal/src/metal/timer.rs @@ -0,0 +1,172 @@ +// Copyright 2021 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +//! Support for timer queries. +//! +//! Likely some of this should be upstreamed into metal-rs. + +use std::{ffi::CStr, ptr::null_mut}; + +use cocoa_foundation::{ + base::id, + foundation::{NSRange, NSUInteger}, +}; +use metal::{DeviceRef, MTLStorageMode}; +use objc::{class, msg_send, sel, sel_impl}; + +pub struct CounterSampleBuffer { + id: id, + count: u64, +} + +pub struct CounterSet { + id: id, +} + +#[derive(Default)] +pub struct TimeCalibration { + pub cpu_start_ts: u64, + pub gpu_start_ts: u64, + pub cpu_end_ts: u64, + pub gpu_end_ts: u64, +} + +impl Drop for CounterSampleBuffer { + fn drop(&mut self) { + unsafe { msg_send![self.id, release] } + } +} + +impl Clone for CounterSampleBuffer { + fn clone(&self) -> CounterSampleBuffer { + unsafe { + CounterSampleBuffer { + id: msg_send![self.id, retain], + count: self.count, + } + } + } +} + +impl CounterSampleBuffer { + pub fn id(&self) -> id { + self.id + } +} + +impl Drop for CounterSet { + fn drop(&mut self) { + unsafe { msg_send![self.id, release] } + } +} + +impl CounterSet { + pub fn get_timer_counter_set(device: &DeviceRef) -> Option { + unsafe { + // TODO: version check + let sets: id = msg_send!(device, counterSets); + let count: NSUInteger = msg_send![sets, count]; + for i in 0..count { + let set: id = msg_send![sets, objectAtIndex: i]; + let name: id = msg_send![set, name]; + let name_cstr = CStr::from_ptr(msg_send![name, UTF8String]); + if name_cstr.to_bytes() == b"timestamp" { + return Some(CounterSet { id: set }); + } + } + None + } + } +} + +// copied from metal-rs; should be in common utilities maybe? +fn nsstring_as_str(nsstr: &objc::runtime::Object) -> &str { + let bytes = unsafe { + let bytes: *const std::os::raw::c_char = msg_send![nsstr, UTF8String]; + bytes as *const u8 + }; + let len: NSUInteger = unsafe { msg_send![nsstr, length] }; + unsafe { + let bytes = std::slice::from_raw_parts(bytes, len as usize); + std::str::from_utf8(bytes).unwrap() + } +} + +impl CounterSampleBuffer { + pub fn new( + device: &DeviceRef, + count: u64, + counter_set: &CounterSet, + ) -> Option { + unsafe { + let desc_cls = class!(MTLCounterSampleBufferDescriptor); + let descriptor: id = msg_send![desc_cls, alloc]; + let _: id = msg_send![descriptor, init]; + let count = count as NSUInteger; + let () = msg_send![descriptor, setSampleCount: count]; + let () = msg_send![descriptor, setCounterSet: counter_set.id]; + let () = msg_send![ + descriptor, + setStorageMode: MTLStorageMode::Shared as NSUInteger + ]; + let mut error: id = null_mut(); + let buf: id = msg_send![device, newCounterSampleBufferWithDescriptor: descriptor error: &mut error]; + let () = msg_send![descriptor, release]; + if !error.is_null() { + let description = msg_send![error, localizedDescription]; + println!( + "error allocating sample buffer, code = {}", + nsstring_as_str(description) + ); + let () = msg_send![error, release]; + return None; + } + Some(CounterSampleBuffer { id: buf, count }) + } + } + + // Read the timestamps. + // + // Safety: the lifetime of the returned slice is wrong, it's actually autoreleased. + pub unsafe fn resolve(&self) -> &[u64] { + let range = NSRange::new(0, self.count); + let data: id = msg_send![self.id, resolveCounterRange: range]; + if data.is_null() { + &[] + } else { + let bytes: *const u64 = msg_send![data, bytes]; + std::slice::from_raw_parts(bytes, self.count as usize) + } + } +} + +impl TimeCalibration { + /// Convert GPU timestamp into CPU time base. + /// + /// See https://developer.apple.com/documentation/metal/performance_tuning/correlating_cpu_and_gpu_timestamps + pub fn correlate(&self, raw_ts: u64) -> f64 { + let delta_cpu = self.cpu_end_ts - self.cpu_start_ts; + let delta_gpu = self.gpu_end_ts - self.gpu_start_ts; + let adj_ts = if delta_gpu > 0 { + let scale = delta_cpu as f64 / delta_gpu as f64; + self.cpu_start_ts as f64 + (raw_ts as f64 - self.gpu_start_ts as f64) * scale + } else { + // Default is ns on Apple Silicon; on other hardware this will be wrong + raw_ts as f64 + }; + adj_ts * 1e-9 + } +} diff --git a/piet-gpu-hal/src/mux.rs b/piet-gpu-hal/src/mux.rs index af1702d..9795193 100644 --- a/piet-gpu-hal/src/mux.rs +++ b/piet-gpu-hal/src/mux.rs @@ -35,6 +35,7 @@ use crate::backend::DescriptorSetBuilder as DescriptorSetBuilderTrait; use crate::backend::Device as DeviceTrait; use crate::BackendType; use crate::BindType; +use crate::ComputePassDescriptor; use crate::ImageFormat; use crate::MapMode; use crate::{BufferUsage, Error, GpuInfo, ImageLayout, InstanceFlags}; @@ -658,6 +659,14 @@ impl CmdBuf { } } + pub unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor) { + mux_match! { self; + CmdBuf::Vk(c) => c.begin_compute_pass(desc), + CmdBuf::Dx12(c) => c.begin_compute_pass(desc), + CmdBuf::Mtl(c) => c.begin_compute_pass(desc), + } + } + /// Dispatch a compute shader. /// /// Note that both the number of workgroups (`workgroup_count`) and the number of @@ -680,6 +689,14 @@ impl CmdBuf { } } + pub unsafe fn end_compute_pass(&mut self) { + mux_match! { self; + CmdBuf::Vk(c) => c.end_compute_pass(), + CmdBuf::Dx12(c) => c.end_compute_pass(), + CmdBuf::Mtl(c) => c.end_compute_pass(), + } + } + pub unsafe fn memory_barrier(&mut self) { mux_match! { self; CmdBuf::Vk(c) => c.memory_barrier(), diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs index 8392899..504d947 100644 --- a/piet-gpu-hal/src/vulkan.rs +++ b/piet-gpu-hal/src/vulkan.rs @@ -15,7 +15,7 @@ use smallvec::SmallVec; use crate::backend::Device as DeviceTrait; use crate::{ BindType, BufferUsage, Error, GpuInfo, ImageFormat, ImageLayout, MapMode, SamplerParams, SubgroupSize, - WorkgroupLimits, + WorkgroupLimits, ComputePassDescriptor, }; pub struct VkInstance { @@ -92,6 +92,7 @@ pub struct CmdBuf { cmd_buf: vk::CommandBuffer, cmd_pool: vk::CommandPool, device: Arc, + end_query: Option<(vk::QueryPool, u32)>, } pub struct QueryPool { @@ -738,6 +739,7 @@ impl crate::backend::Device for VkDevice { cmd_buf, cmd_pool, device: self.device.clone(), + end_query: None, }) } } @@ -770,11 +772,10 @@ impl crate::backend::Device for VkDevice { // results (Windows 10, AMD 5700 XT). let flags = vk::QueryResultFlags::TYPE_64 | vk::QueryResultFlags::WAIT; device.get_query_pool_results(pool.pool, 0, pool.n_queries, &mut buf, flags)?; - let ts0 = buf[0]; let tsp = self.timestamp_period as f64 * 1e-9; - let result = buf[1..] + let result = buf .iter() - .map(|ts| ts.wrapping_sub(ts0) as f64 * tsp) + .map(|ts| *ts as f64 * tsp) .collect(); Ok(result) } @@ -902,6 +903,16 @@ impl crate::backend::CmdBuf for CmdBuf { true } + unsafe fn begin_compute_pass(&mut self, desc: &ComputePassDescriptor) { + if let Some((pool, start, end)) = &desc.timer_queries { + #[allow(irrefutable_let_patterns)] + if let crate::hub::QueryPool::Vk(pool) = pool { + self.write_timestamp_raw(pool.pool, *start); + self.end_query = Some((pool.pool, *end)); + } + } + } + unsafe fn dispatch( &mut self, pipeline: &Pipeline, @@ -931,6 +942,12 @@ impl crate::backend::CmdBuf for CmdBuf { ); } + unsafe fn end_compute_pass(&mut self) { + if let Some((pool, end)) = self.end_query.take() { + self.write_timestamp_raw(pool, end); + } + } + /// Insert a pipeline barrier for all memory accesses. unsafe fn memory_barrier(&mut self) { let device = &self.device.device; @@ -995,13 +1012,13 @@ impl crate::backend::CmdBuf for CmdBuf { ); } - unsafe fn clear_buffer(&self, buffer: &Buffer, size: Option) { + unsafe fn clear_buffer(&mut self, buffer: &Buffer, size: Option) { let device = &self.device.device; let size = size.unwrap_or(vk::WHOLE_SIZE); device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, size, 0); } - unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) { + unsafe fn copy_buffer(&mut self, src: &Buffer, dst: &Buffer) { let device = &self.device.device; let size = src.size.min(dst.size); device.cmd_copy_buffer( @@ -1012,7 +1029,7 @@ impl crate::backend::CmdBuf for CmdBuf { ); } - unsafe fn copy_image_to_buffer(&self, src: &Image, dst: &Buffer) { + unsafe fn copy_image_to_buffer(&mut self, src: &Image, dst: &Buffer) { let device = &self.device.device; device.cmd_copy_image_to_buffer( self.cmd_buf, @@ -1035,7 +1052,7 @@ impl crate::backend::CmdBuf for CmdBuf { ); } - unsafe fn copy_buffer_to_image(&self, src: &Buffer, dst: &Image) { + unsafe fn copy_buffer_to_image(&mut self, src: &Buffer, dst: &Image) { let device = &self.device.device; device.cmd_copy_buffer_to_image( self.cmd_buf, @@ -1058,7 +1075,7 @@ impl crate::backend::CmdBuf for CmdBuf { ); } - unsafe fn blit_image(&self, src: &Image, dst: &Image) { + unsafe fn blit_image(&mut self, src: &Image, dst: &Image) { let device = &self.device.device; device.cmd_blit_image( self.cmd_buf, @@ -1106,13 +1123,7 @@ impl crate::backend::CmdBuf for CmdBuf { } unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) { - let device = &self.device.device; - device.cmd_write_timestamp( - self.cmd_buf, - vk::PipelineStageFlags::COMPUTE_SHADER, - pool.pool, - query, - ); + self.write_timestamp_raw(pool.pool, query); } unsafe fn begin_debug_label(&mut self, label: &str) { @@ -1130,6 +1141,18 @@ impl crate::backend::CmdBuf for CmdBuf { } } +impl CmdBuf { + unsafe fn write_timestamp_raw(&mut self, pool: vk::QueryPool, query: u32) { + let device = &self.device.device; + device.cmd_write_timestamp( + self.cmd_buf, + vk::PipelineStageFlags::COMPUTE_SHADER, + pool, + query, + ); + } +} + impl crate::backend::DescriptorSetBuilder for DescriptorSetBuilder { fn add_buffers(&mut self, buffers: &[&Buffer]) { self.buffers.extend(buffers.iter().map(|b| b.buffer)); diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs index 70023af..abe6ae1 100644 --- a/piet-gpu/bin/cli.rs +++ b/piet-gpu/bin/cli.rs @@ -6,7 +6,7 @@ use clap::{App, Arg}; use piet_gpu_hal::{BufferUsage, Error, Instance, InstanceFlags, Session}; -use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer}; +use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, Renderer}; const WIDTH: usize = 2048; const HEIGHT: usize = 1536; @@ -243,7 +243,11 @@ fn main() -> Result<(), Error> { if matches.is_present("flip") { scale = -scale; } - test_scenes::render_svg(&mut ctx, input, scale); + let xml_str = std::fs::read_to_string(input).unwrap(); + let start = std::time::Instant::now(); + let svg = PicoSvg::load(&xml_str, scale).unwrap(); + println!("parsing time: {:?}", start.elapsed()); + test_scenes::render_svg(&mut ctx, &svg); } else { test_scenes::render_scene(&mut ctx); } diff --git a/piet-gpu/bin/winit.rs b/piet-gpu/bin/winit.rs index 3ca0742..1642026 100644 --- a/piet-gpu/bin/winit.rs +++ b/piet-gpu/bin/winit.rs @@ -2,7 +2,7 @@ use piet::kurbo::Point; use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder}; use piet_gpu_hal::{CmdBuf, Error, ImageLayout, Instance, Session, SubmittedCmdBuf}; -use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer}; +use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, Renderer}; use clap::{App, Arg}; @@ -29,6 +29,25 @@ fn main() -> Result<(), Error> { ) .get_matches(); + // Collect SVG if input + let svg = match matches.value_of("INPUT") { + Some(file) => { + let mut scale = matches + .value_of("scale") + .map(|scale| scale.parse().unwrap()) + .unwrap_or(8.0); + if matches.is_present("flip") { + scale = -scale; + } + let xml_str = std::fs::read_to_string(file).unwrap(); + let start = std::time::Instant::now(); + let svg = PicoSvg::load(&xml_str, scale).unwrap(); + println!("parsing time: {:?}", start.elapsed()); + Some(svg) + } + None => None, + }; + let event_loop = EventLoop::new(); let window = WindowBuilder::new() .with_inner_size(winit::dpi::LogicalSize { @@ -51,7 +70,7 @@ fn main() -> Result<(), Error> { .map(|_| session.create_semaphore()) .collect::, Error>>()?; let query_pools = (0..NUM_FRAMES) - .map(|_| session.create_query_pool(8)) + .map(|_| session.create_query_pool(12)) .collect::, Error>>()?; let mut cmd_bufs: [Option; NUM_FRAMES] = Default::default(); let mut submitted: [Option; NUM_FRAMES] = Default::default(); @@ -93,29 +112,23 @@ fn main() -> Result<(), Error> { if !ts.is_empty() { info_string = format!( "{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms", - ts[6] * 1e3, + ts[10] * 1e3, ts[0] * 1e3, (ts[1] - ts[0]) * 1e3, (ts[2] - ts[1]) * 1e3, - (ts[3] - ts[2]) * 1e3, (ts[4] - ts[3]) * 1e3, - (ts[5] - ts[4]) * 1e3, (ts[6] - ts[5]) * 1e3, + (ts[8] - ts[7]) * 1e3, + (ts[10] - ts[9]) * 1e3, ); } } let mut ctx = PietGpuRenderContext::new(); - if let Some(input) = matches.value_of("INPUT") { - let mut scale = matches - .value_of("scale") - .map(|scale| scale.parse().unwrap()) - .unwrap_or(8.0); - if matches.is_present("flip") { - scale = -scale; - } - test_scenes::render_svg(&mut ctx, input, scale); - } else { + let test_blend = false; + if let Some(svg) = &svg { + test_scenes::render_svg(&mut ctx, svg); + } else if test_blend { use piet_gpu::{Blend, BlendMode::*, CompositionMode::*}; let blends = [ Blend::new(Normal, SrcOver), @@ -151,6 +164,8 @@ fn main() -> Result<(), Error> { let blend = blends[mode % blends.len()]; test_scenes::render_blend_test(&mut ctx, current_frame, blend); info_string = format!("{:?}", blend); + } else { + test_scenes::render_anim_frame(&mut ctx, current_frame); } render_info_string(&mut ctx, &info_string); if let Err(e) = renderer.upload_render_ctx(&mut ctx, frame_idx) { diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index adbedfd..3abb2e0 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -306,7 +306,7 @@ void main() { is_blend = (blend != BlendComp_default); } include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip - || (is_clip && is_blend); + || is_blend; } if (include_tile) { uint el_slice = el_ix / 32; diff --git a/piet-gpu/shader/gen/binning.dxil b/piet-gpu/shader/gen/binning.dxil index 4a4f0734c4735cdf4f37ec0ade7e7e7cacccabbc..3050aa83bdb31b0655c4e6a2c8d80987840b6532 100644 GIT binary patch delta 2232 zcmYjSeNYo;8h6cK2&oPwm@CLW3vkQ1*X;+ahX zX{u1dt!){wy&zcW6njA}jvYFaK(6MhT*cBGu0vb6<8FFa&jP16(>boaU5WJepWWyA z{hsG}pWpku@A`B6InvVF;*z3!w?2Q$@9B+keEFa~aC&WAI*H;&bHVg;<~M#gcw6;w zn5oFVwusjAZ2z+Fyv1i;jZm#6fXzp{ypslK*rY-Xfbg*4_Z)_J`mjN(d-?Da1C*#x zlnx`nRV2m%6t=K-F$s8KgIF``0Twp2w&htGkYS4oeF{PPu;KSaVE+##AwPzB1Z=iw zmZ($pOXEb9o^E04vme1Fyb22T5+7g0BRyU;EfGq?|7--I3lMn%q1|yne2B~4P5|&z z_8rf!^ca~!tjwlFf@CUs$S3M2!JMnpB)1X-ZywK;z`2X5=!FT3`mHV!K^!Dif;W>1 zLXe80hKt!*GtCYU@+TM?O-1SVEq<4UZ>ZfjG33pl?iGyOoLy}{+P2HtF`U^p%#SOH zVh-lM_DAH_UPSj$cl|1_I7KSGu3kw+kJ#vD;)7UhJ7y;6fXjdd0aot*@Wb(DB1B?- zYTAS}07L>Weoo?tOT;}pFYMw@GEa1gDer?bf^)8MnaVQhh=QV+I#Hk^f!@}3Jc0r?;8NDb& zQLI`tR1LMtDz#R2)KG+?qApQ04#qB0*yzYO<;YZHF*v_FYTwJ*=4hzK8@V6K^t&M* zO%gOzFj-8%KJ5Sw%6zO=AoBl`on3wSL zVSnDzrvE!S<`h1{)Qhui--1XVh~U@za z=}r|D1dtwQM=#}}UFP@|QH9b`16P53@%eK_o9$Pwmt?_Y4%@yWx51F@zwA9ji})}w zXn}Kp6xO2^)?78%+Q>vmqHQ9vDc{=4NnBm)hCl5g5?HnDN%DWwCz6l|w)MsC-#vqW z;SNBov;{xkE$-BY>2>|E`iZ#fox9eHpPme|DkSilgqMMAvU4vk(OU0vnXP)c))Hf= z!t-`Z^2TFsfO>wV-)-Qt8EtNRT?pO*=c^ZjRSTt0U*I*#GNB|SnypMCjRwg-@a0aA zq_9u&ZEKY8mv`W9B9~hrG1^t-EAdIDs_zG@gU+1~$lr+ge6~e;c`Dfv1#}szq_h>%8)u!TjTi>t0GK-W8oCvP#yUKo2;FwBF@47pqm*XYhUEXDdk=f%-7s(VAgk@%(n9Ohg>>fz9a zfaPOL^4KF`&Rl$phzN6TBIcriTlrY!^4~%VNVm``rLiHEEvA7M+xQQE50^oBnG}`kFGO8+|e%%jK`!H+nVhv!m{_^sNoCvaD-c zzBe4GgJMg+)7^H5mz*b*_hnF+2tFaSJaXvjZfz^{pJ@QlC6{A*j4Z)5hqB?`crNNd5}T9X#F0mPIP%0}az001c}!m6$fSxRr~ZLs dmz;qce3!I+Ud+91Ubk4zWz-3S?4Kh1{0|^9yNdt- delta 2206 zcmZ8ieNYo;8h4vW=hjc?DaB6R|5NWEwfHF#mI!gc#6)|VMYbiam z2@q2ZHO%4{p1sA=k+!sk)1f%$9|<6bbJ~LE&EOqggXPrg^m<^=n|8F*+m%3XZvV+X z&+qqq{od!@gp47h+EQIq4F2yKQeQ=J)mIlzS6=^paLciKhi*Zop-U0eE^$c_=GJ$i z{%_nzH^%-Lg|#G-KoYfyM;T@SM-6J;#sWNQFq2;ZXxRt_VkHtXim-;`Fl>$*p!gja zWtR=_MjZ~MQELO%Vj=+=wTMTD%`3^Nc}V)dkWZ+z99HiJK*2$*ZMhS?iQpy=nocN!vV5dc~U0Jd{Rtjwi?*}qVv%t%wxw?i;Ake@L=ijPgvdsDhp-T(coi6aXzL;?~PtoY-)gR zPZ(}?z5SW}tUY;Fzw3;C)g)1KI_2H>FXtBy=LPys^_|e**?l~(2)hbsYg^M;#7z@T z#CjvzrWhmqG!ejCTkHNx^|*=lus4j|Ah9eI4AbNkAE!UN>+F_f+Zp}c9&(__`@pdy ztKigs?&oKcy@9zaGaW;BPFydO`PbLKp8INJZGFR`vV-*vP1`*ESkgPkcHWt_pPQ|| zGuy*_Q0#rM^*&be{HfT-C-0XO6lBg^qL6?@SxW-&yl~1YxF$zD#S|_RFLfp1JEWE_ z&;!=h!Hpk(b3e(iDtu-&!h3(lZo)26ap54O42*)RUm(F! zU!QiffygH%+X#036wn%HN7uk(h=q-lR=koNO7rftb}%z6T2%}7RZ6SJgYpk?Opil9 z!OZLSgJZ-SeHu@#G|kxo4IJgn%50q^b(SvUotje0!HPXHjJ%(@S;V=OI+ruckfw-D*cE|@wSdw6&!jU2ZzDV`rVAG()PY4U?fe3$bPJkxm@5&%* z7WYYs8+mHWO7i&24Dwoq2v%4McgC~;4OZb^+ZZzuV5qaUotTS_|Y`i-6{8 z+jcx*rK!Y!JLBk={8?})rpy+MTUb+<%8UcZjAJ1t^O0L4`8837vhv>F0c~X4mW?ap zmwsEE1OFh7uQN5!YeN^vBdk&i1DXPDy@bq;S{)_>$Zh0eRSFlBSW)0`iV~Msx5Izx zB2omk-09lK?BR4IRv10v?|2En>kC7Iw1xF_VmlS3+j?NbvbawzTb@cbjEoB^q)3`% z7l3A@eLGN5jwvy-!>oOwHXbU&O|;516#ogZk8KJ0XsM9V<#XG{;q`F7VWz!o#X%& zb(+(wvt8ZVCr<4?qadAT3a)bM3ZvV%pgXXb&^V_R&k8Cx$F#ba#3B1iYAD{gq{!BN z{hJv7l7Rim`@?XQ^U*{NJh>#Gb#Pjnus6!NoqRw^L^)R`>>I;A{WF!V7Z9L~TkfDH2_05e zRhZwqwvP*F2y{q9dhA!Zpnya~NbYHTiU?l=rBWt*4H$x*^VV8I%*i>MC++DMC74!_H&Ubg+}f46ru)gr zab`RMgnhlN=~MhUbbPuU4Dk89=gCwyMl_?a@pJ>z%ZxMPO6RJR)+EeSE7gk$KQwmR zJbQRgn;zlLX>(nMJpB_xnkYR#r{5u56Q6IpIc<5j*!1-CdZ@E{wI^-$BYmb1UEYv+ z^sTQ-0%_LHerqTDhb6Ic9QU64p580*&w0ViLf=rQCweoasg>P-8|c;~30KPIY2+n1 zIiJ*wz7AvDW*rIjar<@pv>4o-CRt^q01mfhlpoFzhB&$oYO^69#+9a6VOYa+13JTV z2M=j_WL1T8$eXkt6-ZFT-caVFid(lO-tu0RUhflg?6U-kM;JRQ#z+;@r zs|piMNkIl1#_SYgM_!-=+HMhjcuaI2Q3@foCXpE^5eVMfs1VL4i&4ANg`CnRkvz~P xl82v>3qL3nZce6_g5OL8OVoErdlx46BHUu~J2A$$}tNmS6zU zxb3!;Eat8D`4hief*(GUHgMMczpXt71B^`8rg|M}1PIfpa%&O9^o&bvJCJ2$)6 zyohxOiV=$B|M^4H?v%1xQoX8nlOTE}3_(!FX$KmPxc3xWJXv+4bpg%=@Au&)aa*p^ zazZ!K_a2ki{dG{x7b|hIAV>z`A=maVH3-Tj;q+@NPdpt6pIcyIv+ddb8gXj-yghv z{WM-}yrsbr8#c)^QrMye3K5sfmI?&|M+~o$T?n5`nC#$Fd?=4t1;%_8rJGM_XgPhq z{X$J?`Gwo7EYzIj?D^YLzS_EBb7lnQxP`;feQR0YuRSY7aTR`-b8TJ z6`$GsYntH;-Q_ng^AV}$E{vqtlWqYWQ+Sq zx5Pov9PKl@v!YBSATY8;Rb}BG(jb@_akcy--^hp(Fk@a3%2s??rr^V5hKyfj!}wkJ z_Tx-Os7v`~z6+6&#fM!PY!MvJUEtA)Gux6c*k#l&78i-9I+~a5jYI4mu(QSJ8w9vu5PD9pd zo2|JD;Y|9jabmRvjKB_je4~=d3WqUn1QJ+B*Wcmpeg+{=4Lb4hwaWhPj>v_U(M67F zOWT!93KGg*&RU~_Oz^x;Zp;q4B?=cg$Kf$j8JU-;L?t-1w5Q`P@htBU9by(>i&<=l zq*hf>&kbEx2<8{g!SV5rm8L~b=g!}4bQ zYK(pbod8Pl#l3xl9*f&hvX8iNmKXlPg_^bO>( zAGAu)`k)WcF36=6?J&2QP~cR*GxTA`i0CpLXqZ7LAKfDxP?Fv!Ksw5l7jNk&3A2Du0# zCnF+VD<#Ofa9SHDY~5dx-|1{n?JXC_)e0eIWf#+$kDph?JdZPh=GgH{EmbQ~HCQiz zr!+>A>B=v!92X*5ONM~z!nsnQf|YER`&MgPZ*44Nq2-cN+Y{Y*O1>lJw6>Bl$89_) z4DpAm``Ghk6+;en5bj@gyH#X^ol`8Gz=67|@Kqt&VFm}z&{CKz$p{9=Wf??^F=VCJ zARC{=dx)?E_-{_z4dF>qyh>sQjsYc|hb^zjeyHN9$st(rfMtx)@Yz{w1S%XREMVn6 z%kLm@TCCiS+RaQRJgUuOE{3bLKQh^2$`Y<>nGOSU+-c5Ue29+2t_tjNn~!MZv(k+~Ks~ZBB)ecmY0$TZ0wa1#o}187ZDp_^Nh= zowI`H>T7bmy+gz_+&eGec&)erK3*3WkZ2`^x|WOX@@Vw56wPHp6#1Y>^4FT%f+)=1 zIQ)8;S=SvoG}G+3h02h0oU1pOZ}QLE1qC(v3Qad~x<0yVI53l@AWG(H#uw{CGH`wz zbi(q(B$L_hx^ab$f_tG?H9vr?SQ>M_VLOdEsN#pw?*sg z;KMGHK@zzbLujQuvMXe<(4ceW%5COmxf)oRlO;+@szbnHQmTbPro)8L7HJhM z=Bg13U{|H2Y1*z9CcBO%drsQYn`FELYb@NMEAm|1LSvRF0msx`i$yqWVDb5vY^E&E zFLgE!Dx-6;q^G+Z+Pr|!^&Wo$8oR0;5m~A z8VyRaIGDS{fi>+$o7RU1N2$vlfE&3YgkTCC0>to#Lq&bLAtl%rqca%RXPtK$Kgg?0 zVdr|2u2Hg|a0YDa4C|ZK2UONr93BT?9Qn!O3Z>BC4k_F! zXY+H96ECTT^M^j`mQ}!%UUubdB-a{?1>z!7;-=oziUh0Ia8!X*=_N4`0G);D1%Lg2 z?KM;X8Nx$HAoP-UwDl#`VS%S?f`|g^25hI5>XFho4;ye^fT*8^0ix>N6OW4@B_%|Nn z04WYTYSubwoc%S<%QS2KG;5b=9?sXS_0&A%YSwZz58X9uXKEgTiAXHi2YbI>#4YON zr$9)iyvrz{5tlhVlke_4ZMnkJ>6E;t!;8FPrqgzLU*{Iu9)G7Xc_E6`9dVRyL2iri zv`9bcF!E9uY1z+})+0s2exjn^S^5TfC`zG~`$?xHI_xOl!i%0jBF5#R&*53CMPn zvJGUy49P9<-G9e2#LN0%Xw{Z@Y_c;o24gZt1l8&m=I9$f-}%qE_W} zuc(4pT-Lpa)AOB-DbAoE(A-lkzbA1GpQ|aR;teMbCCYx6A z15h5ae;Qo0BYjs||d;Fj0;c5a9~T(_y2dzo1otCIAuqlg9iD^-(tP*erwy z_z?rr0pgt%cyeK&2TsTy5w_yK<{=4hot#@_JYp+lyNoEP>iEdVe*{5n1#*euW)Ddp z$`>vO63z+|wna3I2OeW4DHKRWn)EE96OfKy5d5sP9TC&u)2%xQj#!7m0pb2nMdaEZ!ANnE@zJ=~(UX zzMsA4t6JGS_PXy;LMxE;@46BkyRuxf?Vpd`EGSkQC61@%|J^665ZZeJRp@ks4IF< zr%3;-hr~hLrp}=1lJcu0WBl`e<7*V0|ZO|!(uXNeWw(mgXb;B{v2yxHrnNBH7j`Qn-&@uDnj%VMQX zA!`vDzR{A}KB3CP;zY9eM6xI{&*p|rO$UgUO3XGghQpZO-CJb307y$IQy6k;F7#ac{;?WKB17?__l)GnHpxb57m#Y2Bc8WOk{ILR9g>S(k_;)6Yn*2U zX|xa@t1-^&JAaT`#o zsR28*2)RHNmVlu64{aBmww`|>VrluEWr~|NZ5{l)ykeL;klI3SrJNcf3)=BZ1^AH~ zlf8y*{fu8~LMO^XpOuB~kZu-)BC4fkK6_Ua`W2%oqBZn+Tj)1Zs|7$-E}*js)_IIS zp@Dr=0N$XZ+{%5NN(uUy&+XpF>6UPBcpEw#Dg89X+bm|AtvQc`|9AHa!7^Yt1O5qh z1@7CRU3uNMQg&D@IRqr7c|91fL7DArJRs**_9xnM&kNHn8oBeu^S2^MCgk<0Wa@x%4K1^XsHmOa*0Q zeQ9j1t_CeAd8KMo*(kqO6Ys&V2W4YjQH{)0*d7sktR_dLgTnUO^SLGpxOp{ji_5y47%Em&f@j?BYr@c9mYvIgB}i zLeu{j`XK#ex5wxdbP3PHB!N})uCc?I78H6YS8JBw38$&ggVl&i;6>-w@tdZ(Q9)Vq zqc?Xu`Ih+Pt+A$CV@LN=hR7T?S8r%C6U{|&V>Y>OFQrec?-L&ryK_zHBlJ|C_^7(w zCbvDdvHiix8P~`rRsXxO{m@CJ-k{VE?A1@!la89rGr3nk>XF7hrGL<%e^Dn2>3{c7aw*M2hRJYHCZTG zF`fJy^}Xr!hPK4(Z7Z+0Wi-UhYSXE2_nLww^Lbl5hp#8JBbM#*hC>(CtJP~+qb-Ar zdhoth8LIWUia0d8t%cmyK-K6YWv$F@jjTGZGGz#4w|ut~hg^S}A+_Wno_fqVSw)q- zLB-0HmpsCNv#uKlXPr*4<7&^tGZLSlOXQwSm3|=D|+dgX5kLTpPpVEmJKz5=<+;KJIzcFTJrnz42^%V-pxKoqc!@M4N$xq4;YKVIIvnOAhvQ_)N%%aGiH7Jo|cbc=+_~SLH zOyxcF3&E>3h;g%-ga23@zmchl&MvjzQEI<7vNs+PWN*O#X5RJwfgKkjc=KhEGc3g^7DC3DctL%3UM=t&ih_e})^_A=AgY-drkTMFfOL38N-=f{TLH6A3 zLcqJv5URN-{H2cCK3K=E|i-=6r_ zn`k(vh?O&JpHmg}_UbMH{!Oh}qcJ&oQMz?SInNH{)JH9T4rF}>6e=HOhmWwspYnPV zZf7W|iySq%BqDf&frmwmKoL)Q*LBp*?7o(D3dERW*5iUd*PUc4W4=|f3RJ9&$kIfl z@pU3kGETV6nZ$Qm)&I%?_X&vh;?qHr{%();vP+s3ocLk!^nJpn_@VvA@&WWZ!C!~K z-)&&J5=_^LmqVX4=RYU$%;R;7fU8z0l3H=UgZsqoO+C~^rl ze@w+XSBw(=%l^UWXV@|X-juJXIqlS($JAEJt8qeyT+w;Bz=6f^Nl>K5p(p#K=I3go#nD5b7npCav&SO zY}?#rSCW=piLV`G-LUEDG%c8MC21sOBBtm~Ov#&=Z*GDC1^A%ao$F0|JST0N$@ed< zE-8;5PeOfvcZmQWaKdEks|PFVqj{;beNvNrQvZm|2V*kx*lmY{r=yY7 z&T*`RcMkubuJ<$Q;2p!ArthECcBzRzgOJ!|F)-|`l=vh>1wOaFqnqnY)uHeEqrW%a z2QC0=wrD=k=-&pKIfSe2)=xf8-Rd)S)gEPPjEZ@1AVnI}5iqb-M{A{g7@`S0vk1(d zgC_6alb%gT6dXMmd-U<@qqPI`RDm)+_?ZO~ga0-8%Shd_>jon1km2wTo1N>K>yArq zNpa6pQqEH9=o({10u$ps;C)`mC#=cefr5KzC>U`C;7&0BD^kvXhob$hQS>|%U9%iT zpF`1zJ&OKw3cU_pdPxxqZbQLU%TTZ`1O+=<0I$E?qeV)}r7oHUMl4MZqt= z0AK=wf@K^O95Z_pDs>ZzHk702pHQ?aZfa=>y7aaw^nxilW&^P3XB132h=S+X0x+C{ bf_I0a-~kj&4?w|p=c3@hamheYpZ@*_0bBw> delta 6967 zcmZu$2~<^umFN&5DbycAPD=0Kwl>4P6X{@iSDrz9fJO*s6B8en-OXy4lc!2E=^Y7q@8tk zI8ZG(pkY!=72TE!ywm4Ih0YGL+6<*v;xsZ61nuTP&{}mheU;pr2t7o&W?mwQfN&8J zbl%+T9MXmO5W#zk`Oi&A*CHQclPF7LrsCx3iTd|`g1jzfS6}!Y;ueb5JU9xcBaWb< zE%~(RGc^ln)vN=ijY(Sw5%L-GCBdxl3( z5=Ct9Www{SFJ5f%zu=0yok#!Wnn%&i#_&w3Y{04Wiz*3?=cfKVhv%03oz;TW>yz=% zdR{ypf7ade;v_NJ(R}~2$lXUscX4-1q<~mkDs<+g(tG3lh!KHM}gLot}NF~dv>)ig>;`^DpFBT z7|2HPIkBCKwv;MYH(I*M@$;Bp8^|Zja461B2-hpvoJbs=N1}jnGWJpiVvFt5m$WF@bTopyg0n#hrk#Q3=eou1hG-aE;usI(qhwShNr7qb?ULS( zd*oK+TPDQLBb0Es5Jjb|V6+*o$#EV;;;qn|X5Q>4LCtq4BA)7ap9DT*C$F=8(!q_sIqo zl#hu}24hlfZjIosr~|D-DQ8=~@mL9m79izxJ<@w>0VxgMG5;1W6?m{I_!R5GofwT5fuV*cy`75nY zUWe%g_NpdzCF>L31aURG96B)odB`dZc3Oz|lTDtIs>Ih$da5`$Edw-8xSayyRJExS~WsHhgv~xJ-k`@wpa`UP6dT zB&-U$#q&`8!C6HC-sN$4UZ&R;A+ZDMwF>8vNSH?!QFi66EGF*J73#57B0RCZ(j^MD z@)~9itNAt#IK6tMjia19-{0tX2ZqV%#Az?!c+JE?A*_jCmSi3Ub*+%xLoAr-X{Fah zG1My8lrKwfi()$V$HSX(?3(M*ee+C?Tl8^d2GPkI^f&tF?a;!S5P8vc;B*7f{77J? zqQV$-z6oD)G;|9wz!mGTqLN}X+dUp%Y>)c}Gn=D}DY%rMelr|0IUlq4zOU1r1gccs zzI!8TM_VtPk9xH&2j{0ww{@7iqpR{EMQ>d0S5`dzcSRfr5I(rvg&tOu>hEoRaUAua z?Q+7k@LOsQ$35UPYkxaGZkY_Ci`X0x7hmB?&Gg4MMH~(mbo1S|;TLAl*KsnZ4l3wr zc7cABG&7w{gHN~cRm(Z-RR%L)S4G&FS-YAU?K+y`K55N7BSVgCFmr*rvvRGA^x2X` zBKv61dLf?+EbejDYR2Mxqs0|$7pHjj2*-MkFkVAkr)1No4twBWFfo)Xqo1^=^0Ztv z!_I%cAtAF1!U>u57J1^40JGs#A1A25o*~EOt|NI-ay6x2s5jdM$@?@bW00w$8}Y#3 zV8R1kcnA*RyVT%e{^XIkjhkUt~aHJzViit&|0h6+=xG-v?LHA5z^P2mX_nJt-75DAw)76IFVDDMmB#6w*IksSzpyq#ePY&yEtn$&52_@P3y0vF5l zj1O*Vrf-&!^jW!1{)uR`rkS`IK@-i=GzqC$IA$|39nchIvo13U3^#1f>Gjr%<`CJ` zCArSjzrJtv*O%MN2X96rWc1pjP&3?_MA@+)&ARDDIv0pn>5S%{-*;6xQox;eT~>jk z-{V#~%W^FV1h5J;ZsFE}w2Er;ccPfk`bnZQ=W{r^v>ceWk6L)z#7 z9H^cE97w*6OBR`*A#|GSA1m~N+(4ZO?HCI!4u?MH?E3Y~8G2xLF@Wk_N(UmL9H-Oc z{KxP>WKE!D7`ENfA^Z#ELFWRCI|1QOk+&V$w6%ip(@1&8ALO-j!~aB{ zcjj4GFAaZ=%os&MBFix2D2Uk8dE#dAOuXXe2j=jixH_twG(C1qftQtS4?y+RzRze; zhNUKso9<=<p_GD5ujrVH#Mc(_vWBq|)#0U0Drr+&EL@JshfHji@&iUPNFI-inq9 z;VUQDQw$?L`2`tH_$jAmvdWJHql-WIWvXQr8i>(T?8kUm?nPkt{t_(=G!osM|JM96 zO#BU-bj3ddy$hO1_E&zG8R&hkO&^_}VVKtO`kx|bu$?YilnUMSv zRsJ^~eq);P81t{GRk9#3W0vw_r}+s`*&sJc$zL*yI08rx81Jt5H7~#=PyGHw(C>?q zvarIUpLX$=(TQKMVxcJzRsNmMo&qkaGw~}B zdi#$#w#!g*Gkv#$ENaD5L@>98M=}Q~Mt7HK)*69|ktnfql(=o}SxAg3DP!_N5djR9 z<)0!xYkd^`LcB0r+`e|z-#US4E_cKyME%UppV&+Xl~-35!Tt{>h?_rf^N2S6h&wE$BT}!s(}`QH-*s zJp1>@3YD;{6Qzj*#Q_OdntSHNRAVN=OG7Pp|6CO3N`YgannQ-Yi&bz-S3b78=5nu*YT_ZF5sNiXUy+!sP;U_9S( z!aix9H#QeFox2u3;^9#_bzVP(AICE-qUN2r^DXEYL{;%`bm9vz2Kyfb>B+UlLjvfntP(rRwmPNy=l zYgvkx8>m{RiZ*tNs1Sde&sfvXTVv(C=G$r@kRz%2&lV;1VL>-6*xyfeIx!$U;Uhf} zEcx=|p%|tVnU%%6ABv>GwF?-vQPSEJi9fVK;$m{p3n@}h*3rXR(o@;e#ZeV5dV!t~ zVu!(()p51rkq*J5LL3&a>6i1QUMy`th-%hOFfew6kdwakY=|_68+-ucwANY~smms! zF;9XM6oblY_X!XoGv|;)YXvV+Bn+djVu1)gZ;I?=YWKg;tgxrqVHdK)(xWUEi{O?T z_8=I`2P>#fMO<7EqV!mPsV3}FONjG;sr^F4C7%$v-oy8DOW5VE5Z`|UKSf*)4#9c4 zU+D_FG7z$&#nj<`#FeNJk*D+3fv~I3LZUBP^;&i5vk2>t!ge4%irs*oG{7{%pVjWi z)+}wk@ zVf!4EZR(QQMgO$=x8RkyrM?qg5fcL|?@nR5b_mHjz3Ot4>hhHi;lA2~IKgPLTt|Ij z2R_L%9sWua{>qgOL9U=niTxFME|0Jh`$f_UC0H`1hLD zJXWuFugaa-Y3{c@Qij|>X)S=)XZJmFdVw@Iu(m94OdZ$|Y4)y$ZLl`Vev14MEX4&q zC(46fTL-m9n*C{_!B$pKd1T*e|J$qo8d%*J#S8@A)IrngpsW2r zP`N|;#%`8$Ue$gaD=U>t>*ViG$yK!h?>==KU1(yolR|#Es`SnNQfgi4;aYn|L-ldl z3<=2T?NwHF(bNzrr|!@k*Y*4x$HDQ*h7R771 ztFa_qbt-~ip(?{@X-G;nmZZPAmVaNXsKrXsD{>PB=;~`TU3b+13eX~bK(U#F(N_5x zne+(0YKB4yyG+>LjFUO8+8OL3u4yXwhAq^i_@fv!GB?Kr9psGGRT0We&}ZYP`dulfF%6c48b_5A8XCI}9K)Z-uvWR}RMc+i%GvbMdAeD33Thaq zHuO;&-qc;^HfmJo`>D6UDreHmL$}s%Pstjt%Np)wG)&bIn*n)~0C^jxG8#r*Wo}+F zx3;9yd+YUQX1AK&{zKB~9O@92K2+Z@)N}m6B+x-VlW}gH9qRcv{=%E#o`&J^H$IWp zP!mn<5iL)o$MF}>Ka(~-%c$Z1Y--PJkw42guDbaBi;Tt>2Wq4bOoEb@7YB~xFC88| z&^TIOlX2VB-q%-~s=NK(J7{Tkdqoa~ zBcwm4+`-w4Le4TJBkQw+{dJg0;HC%J9u-?a7A z=iN6sP|r7QIecxypW23@diUmfxJ_0eb1?Q=bav=XU1wYTjkYZ}+HPe4LD2`-X(K^= zmC*Kf?e(qs_qRT}x3xOjvALE#08Yp=baW)4ogE{}e6TX}c}!;AU31+%bOVwkpUr$Z zWB20V%gz(`GGE$kh)&sngdq%qqk_PQTTFR^PHayw*5pH9zm8c4|KJOV+_%QobbGVu z$NU)4E-YC&8eq2Kh>?S1H$A%>Big+`c6WK~?g!D%uV8lJkr-iuF{L`}J!Ts(heyjb zwmH?Y3FU}M5WlxNHXa`PvL-gC_DDjsBmoIyl2^=Mc)rtIv{$JYNS0Je(CqV@u;xzl zuaXvmdb)}6>r6cj@*O+rd1~y(Ynxs@Px#f>SWXx2zrXS7eXN|G+<+>-{){~V&z942 z`e-S`R8PBfC+u-f`a62l+Z9R8#4%7$*KJWS-q|1R6m$k?C1|yH6N=k2W~@tH3pQ1= zpV{tz9=rGXk&L_AKHDhSCSz_IJ}`Da9sY24;{M*3wx37e0ZV8C8w#Cb2vQk>a`0+% zi!`oiR?3%@W|d-FUCr=FWq6b`n&}fvve^71%$_vuqir&`V!f#FL2O}uT;b0#Y(A-$ z?*%Rex1*;IZ$gF*{01>N7rZh2vyR2xn;LwR7sVtwzu3ehkwH%8j>YZ0Sg{{75&RBj zmsQ4iVP|qmk{r1)$(S^4-P>ed$c7~`$%5{rrG8-$xKp%wO!|+oTe-B?`?_QqG2hLN z#6DALv@bc$vtR?R=q0B}&ne1}9qTzDg56IVEA##9ipFBN$;CxKR~8k=`Yr(azCrqM zS+6y*FEz1~NMB-ACez}i+`c8(feb!zoY%V8mpUN1fmm$k(*1kQs+1y29}>H-vcf41 zWLAqmghp3BmA2%Q1XW3GRmoUoNkc5`cam-37-c`DEe%P)9ei$f@HOXPYpj_+$*54P z-NEwMzT~g^lb7F1eicnE(F>Hzi{!Jx@oU=hw1k3#4=N8%S0222A4srEkL50J(aghX zM&Dn5bb4?%Ec3&Th95amlgUO~I~*&v`x;K!nUoTemJ+im708%rrW)l~dDYi&T1;hC zRn>z%H$lCGkRiA2D0xaqonM}0|bO27@Bg~)V+r@i`M&&72| zfeV0Z+TAdSnLdlZnQ~(c$7Jj{xoKO-W>8!r!5<*gkwtb}?EdDubHoBHsrmsk3tGoxj+r_Ayw4q=(hVHY#(CB=C?v!Ha_Wc-oxg~}cVCb3^ zC;(r;z^E+-em4W&gn<_qW8fAHT)hkfYeF%wy%_+P@-Xnu1=z@yGhj6a-fjiJdU*tf zp7<1?NhpSv@iBCqGp6+!46Q52&_7^kW&F(0Vc5_+XTY8_dSGD5j~JLzg@He@24Gw& d1}=-hz(W`q4aC5Y7h>R##8j|R;76K${151p0G9v& diff --git a/piet-gpu/shader/gen/coarse.hlsl b/piet-gpu/shader/gen/coarse.hlsl index 1e610ec..04529bb 100644 --- a/piet-gpu/shader/gen/coarse.hlsl +++ b/piet-gpu/shader/gen/coarse.hlsl @@ -931,23 +931,14 @@ void comp_main() { _1701 = _1692; } - bool _1708; - if (!_1701) - { - _1708 = is_clip && is_blend; - } - else - { - _1708 = _1701; - } - include_tile = _1708; + include_tile = _1701 || is_blend; } if (include_tile) { uint el_slice = el_ix / 32u; uint el_mask = 1u << (el_ix & 31u); - uint _1728; - InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1728); + uint _1723; + InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1723); } } GroupMemoryBarrierWithGroupSync(); @@ -976,9 +967,9 @@ void comp_main() { uint param_25 = element_ref_ix; bool param_26 = mem_ok; - TileRef _1805 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _1800 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_27 = read_tile_alloc(param_25, param_26); - TileRef param_28 = _1805; + TileRef param_28 = _1800; Tile tile_1 = Tile_read(param_27, param_28); uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2); uint scene_offset_1 = _260.Load((drawmonoid_base_2 + 2u) * 4 + 8); @@ -993,11 +984,11 @@ void comp_main() Alloc param_29 = cmd_alloc; CmdRef param_30 = cmd_ref; uint param_31 = cmd_limit; - bool _1853 = alloc_cmd(param_29, param_30, param_31); + bool _1848 = alloc_cmd(param_29, param_30, param_31); cmd_alloc = param_29; cmd_ref = param_30; cmd_limit = param_31; - if (!_1853) + if (!_1848) { break; } @@ -1008,10 +999,10 @@ void comp_main() write_fill(param_32, param_33, param_34, param_35); cmd_ref = param_33; uint rgba = _1372.Load(dd_1 * 4 + 0); - CmdColor _1876 = { rgba }; + CmdColor _1871 = { rgba }; Alloc param_36 = cmd_alloc; CmdRef param_37 = cmd_ref; - CmdColor param_38 = _1876; + CmdColor param_38 = _1871; Cmd_Color_write(param_36, param_37, param_38); cmd_ref.offset += 8u; break; @@ -1021,11 +1012,11 @@ void comp_main() Alloc param_39 = cmd_alloc; CmdRef param_40 = cmd_ref; uint param_41 = cmd_limit; - bool _1894 = alloc_cmd(param_39, param_40, param_41); + bool _1889 = alloc_cmd(param_39, param_40, param_41); cmd_alloc = param_39; cmd_ref = param_40; cmd_limit = param_41; - if (!_1894) + if (!_1889) { break; } @@ -1052,11 +1043,11 @@ void comp_main() Alloc param_49 = cmd_alloc; CmdRef param_50 = cmd_ref; uint param_51 = cmd_limit; - bool _1958 = alloc_cmd(param_49, param_50, param_51); + bool _1953 = alloc_cmd(param_49, param_50, param_51); cmd_alloc = param_49; cmd_ref = param_50; cmd_limit = param_51; - if (!_1958) + if (!_1953) { break; } @@ -1086,11 +1077,11 @@ void comp_main() Alloc param_59 = cmd_alloc; CmdRef param_60 = cmd_ref; uint param_61 = cmd_limit; - bool _2064 = alloc_cmd(param_59, param_60, param_61); + bool _2059 = alloc_cmd(param_59, param_60, param_61); cmd_alloc = param_59; cmd_ref = param_60; cmd_limit = param_61; - if (!_2064) + if (!_2059) { break; } @@ -1103,27 +1094,27 @@ void comp_main() uint index = _1372.Load(dd_1 * 4 + 0); uint raw1 = _1372.Load((dd_1 + 1u) * 4 + 0); int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - CmdImage _2103 = { index, offset_1 }; + CmdImage _2098 = { index, offset_1 }; Alloc param_66 = cmd_alloc; CmdRef param_67 = cmd_ref; - CmdImage param_68 = _2103; + CmdImage param_68 = _2098; Cmd_Image_write(param_66, param_67, param_68); cmd_ref.offset += 12u; break; } case 5u: { - bool _2117 = tile_1.tile.offset == 0u; - bool _2123; - if (_2117) + bool _2112 = tile_1.tile.offset == 0u; + bool _2118; + if (_2112) { - _2123 = tile_1.backdrop == 0; + _2118 = tile_1.backdrop == 0; } else { - _2123 = _2117; + _2118 = _2112; } - if (_2123) + if (_2118) { clip_zero_depth = clip_depth + 1u; } @@ -1132,11 +1123,11 @@ void comp_main() Alloc param_69 = cmd_alloc; CmdRef param_70 = cmd_ref; uint param_71 = cmd_limit; - bool _2135 = alloc_cmd(param_69, param_70, param_71); + bool _2130 = alloc_cmd(param_69, param_70, param_71); cmd_alloc = param_69; cmd_ref = param_70; cmd_limit = param_71; - if (!_2135) + if (!_2130) { break; } @@ -1154,11 +1145,11 @@ void comp_main() Alloc param_74 = cmd_alloc; CmdRef param_75 = cmd_ref; uint param_76 = cmd_limit; - bool _2163 = alloc_cmd(param_74, param_75, param_76); + bool _2158 = alloc_cmd(param_74, param_75, param_76); cmd_alloc = param_74; cmd_ref = param_75; cmd_limit = param_76; - if (!_2163) + if (!_2158) { break; } @@ -1169,10 +1160,10 @@ void comp_main() write_fill(param_77, param_78, param_79, param_80); cmd_ref = param_78; uint blend_1 = _1372.Load(dd_1 * 4 + 0); - CmdEndClip _2186 = { blend_1 }; + CmdEndClip _2181 = { blend_1 }; Alloc param_81 = cmd_alloc; CmdRef param_82 = cmd_ref; - CmdEndClip param_83 = _2186; + CmdEndClip param_83 = _2181; Cmd_EndClip_write(param_81, param_82, param_83); cmd_ref.offset += 8u; break; @@ -1207,17 +1198,17 @@ void comp_main() break; } } - bool _2233 = (bin_tile_x + tile_x) < _1005.Load(8); - bool _2242; - if (_2233) + bool _2228 = (bin_tile_x + tile_x) < _1005.Load(8); + bool _2237; + if (_2228) { - _2242 = (bin_tile_y + tile_y) < _1005.Load(12); + _2237 = (bin_tile_y + tile_y) < _1005.Load(12); } else { - _2242 = _2233; + _2237 = _2228; } - if (_2242) + if (_2237) { Alloc param_84 = cmd_alloc; CmdRef param_85 = cmd_ref; diff --git a/piet-gpu/shader/gen/coarse.msl b/piet-gpu/shader/gen/coarse.msl index abd636b..55812d4 100644 --- a/piet-gpu/shader/gen/coarse.msl +++ b/piet-gpu/shader/gen/coarse.msl @@ -954,22 +954,13 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M { _1701 = _1692; } - bool _1708; - if (!_1701) - { - _1708 = is_clip && is_blend; - } - else - { - _1708 = _1701; - } - include_tile = _1708; + include_tile = _1701 || is_blend; } if (include_tile) { uint el_slice = el_ix / 32u; uint el_mask = 1u << (el_ix & 31u); - uint _1728 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); + uint _1723 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); } } threadgroup_barrier(mem_flags::mem_threadgroup); @@ -1014,11 +1005,11 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_29 = cmd_alloc; CmdRef param_30 = cmd_ref; uint param_31 = cmd_limit; - bool _1853 = alloc_cmd(param_29, param_30, param_31, v_260, v_260BufferSize); + bool _1848 = alloc_cmd(param_29, param_30, param_31, v_260, v_260BufferSize); cmd_alloc = param_29; cmd_ref = param_30; cmd_limit = param_31; - if (!_1853) + if (!_1848) { break; } @@ -1041,11 +1032,11 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_39 = cmd_alloc; CmdRef param_40 = cmd_ref; uint param_41 = cmd_limit; - bool _1894 = alloc_cmd(param_39, param_40, param_41, v_260, v_260BufferSize); + bool _1889 = alloc_cmd(param_39, param_40, param_41, v_260, v_260BufferSize); cmd_alloc = param_39; cmd_ref = param_40; cmd_limit = param_41; - if (!_1894) + if (!_1889) { break; } @@ -1072,11 +1063,11 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_49 = cmd_alloc; CmdRef param_50 = cmd_ref; uint param_51 = cmd_limit; - bool _1958 = alloc_cmd(param_49, param_50, param_51, v_260, v_260BufferSize); + bool _1953 = alloc_cmd(param_49, param_50, param_51, v_260, v_260BufferSize); cmd_alloc = param_49; cmd_ref = param_50; cmd_limit = param_51; - if (!_1958) + if (!_1953) { break; } @@ -1106,11 +1097,11 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_59 = cmd_alloc; CmdRef param_60 = cmd_ref; uint param_61 = cmd_limit; - bool _2064 = alloc_cmd(param_59, param_60, param_61, v_260, v_260BufferSize); + bool _2059 = alloc_cmd(param_59, param_60, param_61, v_260, v_260BufferSize); cmd_alloc = param_59; cmd_ref = param_60; cmd_limit = param_61; - if (!_2064) + if (!_2059) { break; } @@ -1132,17 +1123,17 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M } case 5u: { - bool _2117 = tile_1.tile.offset == 0u; - bool _2123; - if (_2117) + bool _2112 = tile_1.tile.offset == 0u; + bool _2118; + if (_2112) { - _2123 = tile_1.backdrop == 0; + _2118 = tile_1.backdrop == 0; } else { - _2123 = _2117; + _2118 = _2112; } - if (_2123) + if (_2118) { clip_zero_depth = clip_depth + 1u; } @@ -1151,11 +1142,11 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_69 = cmd_alloc; CmdRef param_70 = cmd_ref; uint param_71 = cmd_limit; - bool _2135 = alloc_cmd(param_69, param_70, param_71, v_260, v_260BufferSize); + bool _2130 = alloc_cmd(param_69, param_70, param_71, v_260, v_260BufferSize); cmd_alloc = param_69; cmd_ref = param_70; cmd_limit = param_71; - if (!_2135) + if (!_2130) { break; } @@ -1173,11 +1164,11 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_74 = cmd_alloc; CmdRef param_75 = cmd_ref; uint param_76 = cmd_limit; - bool _2163 = alloc_cmd(param_74, param_75, param_76, v_260, v_260BufferSize); + bool _2158 = alloc_cmd(param_74, param_75, param_76, v_260, v_260BufferSize); cmd_alloc = param_74; cmd_ref = param_75; cmd_limit = param_76; - if (!_2163) + if (!_2158) { break; } @@ -1225,17 +1216,17 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M break; } } - bool _2233 = (bin_tile_x + tile_x) < _1005.conf.width_in_tiles; - bool _2242; - if (_2233) + bool _2228 = (bin_tile_x + tile_x) < _1005.conf.width_in_tiles; + bool _2237; + if (_2228) { - _2242 = (bin_tile_y + tile_y) < _1005.conf.height_in_tiles; + _2237 = (bin_tile_y + tile_y) < _1005.conf.height_in_tiles; } else { - _2242 = _2233; + _2237 = _2228; } - if (_2242) + if (_2237) { Alloc param_84 = cmd_alloc; CmdRef param_85 = cmd_ref; diff --git a/piet-gpu/shader/gen/coarse.spv b/piet-gpu/shader/gen/coarse.spv index fdc10a08e6089784d240cac387f52eef71f95253..6d33ee70c7b65cf2d8b8fef43e5b5ea63b20cbdf 100644 GIT binary patch delta 12338 zcmZ9S3A|QiwZ`{>0~{4oJ2+iRseWdP^GuGQfwQEhT_-wn4vGRIkf>-I1<}l$zBKPO zHO+ZORGd*9OSAM^SvFZ#T53`XPSySYzwU-+V5Wb`_A^I=lZ|) zZ2wjJ_xs$SqG&5tE;@>*=C4|AyumF4i=t=JxapH7j_)b>3@F-*J9=f4yQUuxA5c8f zn7?snxd`de6K72BnyS^n;x|pJp2&79G7 z3>NLhQuvICllmeqgHP?6)-}16!3QwruWR~-Ozwyt;R*eJIkSHySq)Ye=$rvu{YsO!ILIV>6tmPd&aS?1hzu9+<$wq zO^t6?Y2 z6K$C@vLQSN-Meb*+WKu*;AMNU{R)EiV&59yzsASb_+d4EM2#O?E5TjtoiR=J%v-22o{8*V^5ZMXsLwBhsMHrjCQ zZM1#^XeQtSJ8cY{?X=-8u+xTXZ>J4cZ>J4cZ>J4+w$X-bZ=>}aKwkoO+89_OJ8ifX zI=#Y`&#rJC+IM4b0`}eT3*q+NaP958t4830#S7lsd)F;C-I*hSs<@tpat7miy1M)Z z@O5&=E_`7d&wNAUg;_((8$q&VNN;BL^C4c8-^5V%K3!O>cc$W2{#lPP5AX8Z`A3=k zUhno-WwE~#to_q1`@6u(cpj9+6D{Ea1RZTfp;9L{Wo2=4H(1Bv_ka&)%x$f|7p&|Z z`h08_!VhB1ZrR)i)=v3IQlHY!MZ6z!e0xzCK!YrP0BjOVs612gApdAE-A_~<@MABA^d)thfoJO(%24@pT~ z>=uL7>CFe0+;kpiHXY}sT)-!ppJMn?|8z_5jIXkJUY>y*+6>ZOh@W7%P)Fhf{UO+3 zH&bBG*^j`=+<+(QIj|p3l)7gb%Jd_SpC5zW`0S$WJyOp@6t0RRccfBT7W@RPEP;Ot zRyM0&Z{Qcf39Rk~hB5<--FY^oJbqs)Av%uWXW(l2e-5|&FEOi&-7moEQhs@KFN3R9 z`XyXhs`Lst$(AaA#ZYLrVzc#{$(74fM&|_jE!gC9Py7z7%o{T{zXzMl;9_7gny9aW zDRQwF16^61#f6N!P^Ze3gxl+3s|}9 z{njLp?sf2@f!vU1w(iz95GeG9i_u@3M$22t-{8u$P3e`z{w?sHBoqEN+`!q?-@)1_ z_YNCS66_tw5oGQyra=}j0jo36o6Pd){-K;eIgtN^E6X*05A4S~LEXEIi@9aIq~iD~ z--lR6gDhi~tMvBz5bSNK?gOxV1X-_)?j!K)%&t)vhaZEhm*rpZ7ulTh*!&x;99}lB z{eK|UMLvAyut<3pwfX#Eg!colLvlHM{o&duk4-yx2sSDI0JwI_d)IOO2SQYMFUN2Y z+$`0Nr5ZaEy92D;YWR)W3W)u-tj@}e!EP1sKFr#v(`G5Q%9EH6)7?pIXTQ~20xUup*%^g4tAkBoepMJ7N?&CD~o+6SlJx>G!Gl|5O_QD2!{HVvUttL$T{n` z@t2r6o&DDB*Eng;sBQN%!B&2s#&!+H;?~Ww9$1-v5^sI@ry5Ieb6{hQxd)URz&*Ei z=1stUJ}Oo#l*{*kf1Y^=J~m|jH5cFu%zGl=h?xUWY|Oj?^KfR*uJ*}kGw?7*_O>}V zdqY?D8E%0>rLBe^YQ>gdWfpN$X1Pv1*IR=(WaH|#0?XB1jc!};HZ7gMbmU29d+-h| zoxggv_Y2zy4y-rQ4hY_6)}aeENWc+b6vEWeRNjkgnc zhmwCR{VvRYYy~QIW+=0qV!OMSL#{lDjRdC>yMg6NY)znwdZ7 z6sWZHW5AYN-Dqa{2y*7q7kh%2F&j*$uGJ<%_iD*g4Y@k+pMAhX89594di~Q>RmN~X zFv?~E{+N_Uw?9~!cYHTa4*)Au@7ee=I29QSmaFq@90X294g?=GXzSh}2Saq^S#XzM z0YAj70}a*t3}Y$|X}o%NXL%^_5Jv8+uY#4?pkn#oaKrh`upV;lsu$CHDybgx%S7gJjc2V?6KA%3!6Wl6Yyxba+5Q#JORgpwaYu&=HQ+V zR^~UVl=~a-lv{n)+O5=`R`7=wcRw4V(BS^2Jp*j;d_|XQ69;F4tHI~Ml~td9bKwcD zK5Nect7~3r&R;PPqAVRoh6lP69?yjtHHksSGI2RmgRdM>&}HIxcaR97Fb=* zS$Q5rVS;}fZ1ABtGq_wE9oUvG0O# zO??Nl7$Y6JP=lOJ#u9uySi)V0i-m0Ia<_1IrWdTHBP4 zR2ukNaN5*$V1wGGZeaFfn^Jx~Lz!(#T&;xi1h@(8iOPL*GdQP5U0<8J1)?&?=T@*X z6SI5Bv)kLjDerAyd3JjzIOV+qELS&%K?Fd%3wNm zu{H^MZ%dvM%hh>F?*k8IqyqPY)27fZZ&MGTP?%KTN4lKu#u#MEc)vtV^&i$TRsZ1Op}POgJr!p^hsT-v zIQs=y-B=QTnfaFt11NWMzWe!AOYY`g0sore=0>z}|9ff`XcVWv0ap|HEnL~uNax2(feoNMyZbX(d3N^~u)E8X zMy^eKz79@8rKkT51PbR>cK26!5>lVVe*>#aqk0pp%-}xY-vS%FlTBG!xi(q(Z(-C>zF3c51x{#i(UT?YOFFfYX>82Ze>#($Wxzz;MB(#Kyr1K zrvsew_%cYIAS;9O-1fzgJkRY_I=IJtaO=>88YJLguyX5aV0i+r3f5kof#nJK39x?D z8Cag@_G;i&7(TbxU}pcl4{jB!GZgyZ7FWxwJc$hfrxL!}k|(jXz^R0dlBXJSb>6@0frnPk!us$$W6|x} zXE+RnN}c$=O&%wo1?wb-^mA}!9c_&Xmu^=2I&K3-%)bCu_5>&095wGV;I<%j5m?B1h4i2xpvjVx^e4zGZnj_^U&El94W)D$kpY~%e!@O z85HL4%cH=4{922$7|BrP-61xig{?Vgp zeu-Ic-jI7T`!S+6V;K7|j3`d(dx_hZmuYXfGQZtfc>Zc$779&3r#jG)4!oTEfpa{8GR?$3AZxg$apdK z@!1Vmman4fl(C+_c{bY4>MSR;mgFtaF>qzoCCXBhiQv@aSg<@bIUbyv90!&swaMU| zj!9rSr=$1%=M;#?Lx(QZAOTMRE6?eWC*V}D_BkE$1UwO}pPUYPPRBH`F_#r(|6&Hf zkHboZw>W{#>u0tp8sz>%%XH_rB3Qp!ene_Yi2L!Zvm#vmei?x+@sU zbS$>kI`9!?(CDsgjJ&Gz$lTxG$MPyhHBDvJG)=c!H|rSQ%UN6 zxHig@)C1ro<&WGX^&oO}N$Mf6>y>{{l>Pb=Fp+#yc^GVl*2)DY=MEMzX9wCm!cgW0 w#0IsSJO)k!eH1KLXES*moMy7vCaECL*C)W&W8|X63@=y$EL!l!HOKG%|Bv6BEdT%j delta 12561 zcmZ9S2b^71wZ?anNufzeXfFy8UqlEJnm~kz)KH`gk!F}=2AD?@LJ}z^Z~~#Ih=8Ml zpie+LNDI9a0-=dYQBhEyqJj#7N)6!q|L@#yI63#1J^TCCT6?Xv_t|@$bMMKEj}3bI z@j)vL8u*E!r3@&`mSN?w8&;^+AAS9hQo5&3nl-g&N_XKixO9}8`ef5OXB`P2TpsAX zVg2FNBBYahW>4#!q1BM`hn7|MwC?G%C(Z0WY*Np0$_JO1Yw|u>M|mAt*UZjiXLlZs zMMqfzpWQRHKjM4v8J#mbr?oTqpus;fw5+w1!HP`d!vRCe%DtUqR$p^9q_d`UPw$>| z)M1Cs>Ym*S?JSEJB&b(!R#GFsfzm+&UwsXf!XkL~H2eMCEfEs%ZazoU$6 z@U0qr#|EF!;JXYh)lv?3YZ&YapLzJ?&c2j8%6<*~#0Ed0!4GZl$qn8$v|4&bGaCl8 z8~m6CKd!;&!lz8@n#9VrE@^K=|J?>Zw`vG3Z19U4{E`O0vB7U@@LL-Ewg#{6LSV)- zJG)vLztb>S(%|nm__D*6Ucu!X{38v%#?rhh|FyKBe6+#WYVdX7J=43oIasZPKMCg$ zZJa%_0X!SsQ?+Gd{AZWqRY%!+DM3fsyTSKu@ckNmQiFFk__PK;s?PtL43Dl0;By=N z#NN)a!>82`-pLJ%Q{j`Rc2DOVx7Os0hW^Y3KdZqXZ1BYk?jL*lpaG9A*k#jOm#c21 z?u^3o7;*!aTllkfji1k!7y%!J{wXm3idScK`126Q3~cLPXzP!M#~)?4Y}D3|0XzPb zwtml2Rw;83mNM_&4RH15pi&-PaLzXS4ymlt+HOT0dnbl-qUFX*5I=4QD zzlMM7b2$Ab#R46{G8kB(!&ie-p!@j_@D+P6-Dx*Ze5=yN@^!e?Hr$a`+VG3vR@(4Z zU*p?oI8U)uLf(Q{2j)|hy5EQgUwpy8i;{^M00=vK1UK$=&$MR zoHMfe2}ovY*U|eiHRadRl$n~?)ac3${8@{38mY(^@JE?97` zEvZlMU>Ds3IkKY^189)J_kvC0brzGZ+{YhnO>!BUdKW$`WG`Qn-W9WtulvCxXh~!t zSXsyN1a`J67xoT5Vcp8Ro$fRALHIDN`tmL1A-L(jOG@fuw-~HWZ{8v0rt>hp>Da%? z1$>nLF`6IskGJ*q_^Mi)>!*;3tsosm{0Pl~Iua-76JUc~#R9u2e+E|O1l&=-0Q+%A zsrxxinSR9a^CZ}b&ns2mmU;@Ja0MJWBbCZB;Ayb31bzmrY|g;Gz`p_~u)1H;lo?p; z%rhb7@%wBA(QyR70oRxRw{VyLS$cJ`dk(BF%P)`ad2oG|eg{{UReAxOWGj`wrztdB zvDtdfcl5%tet7P;&~Lsyl#Tq|VdFZ@xN zJ^K<^hsvF$k6p^*_^)7P+p-s?vqFCZZ$7vb<+1rYSh?e0rk6+e3V6{F4&=%0)A}j` zg&w#V{i9{{VJUeHu1wo3y|UQ<6TCCYg#Qa};7sb@U~QCp!Uk6adjoPXnR~=E$ly1@ z>J0Qcy*#?NloKc$@@=@XT;q4aemn{4-l3h#k@1p>|9!qr} zfaT-JdRaYl|9OedG3w%Q0P^}}83=!p$tjP`Ah2?H?~;?2sp|HFkz@tUTj~ zg4ZCg?7LxbZIs7m8Sn^fvhvHqwNsvP%Yl(stz#U+JeNr7r@G#su4@1P$?Q=bgh?tNzd+5>j)ePjNf z)hChfQ_F5$_(6OL*`e6636?$W^pCx6*qFlZMd<^}F z0bGg=0MC*1r|EY_u^~O%p^T*;O}`Pno7e#fu&Fv)Cb}6o6IBI^JTz|Vtqq|U(dJPB^A9nGcRp5BioLHTwxWv-^! zDqnd$6sSmMJAtzjJA&ovOl)UxR$>BJu5JRlUBT{t6WawWSLf1y0ql~i+l^j6t_-UP zUCQo|x9AO~QwM95pnJ6CSq-^5VwJtXqiER+dxKM4)x~ZfusVJCEml6h@GVm(`$80Y z{=48`1S@l}dt-laR%Ac0Jjs0toE4b}mK)>%di}T;oaBMvyXYOSo%(HuvDfL#LA^(x zKD;^@cn~cI?GUgspE|tl0n3;9V>oXfhLLOM)y&28OeOVBa8f@MEKllFz)5|wby0yz z*Q6VqeeKtBdG_^TVE45S8QA*To`8qLm7AP_HWB`l^;n{<~A1BS3-FLOb3sqWv3hkPSsF1-mm=5dye!HdwB10x^yS=d2t9mRBl0EXP9(WWeL-?vS%b3KgC{#1xUgZpjzG?lbGtIM^CgVVwF;9r9)n_mn5b$EiS&)9E()wM1) z`>&h~80pY~8e|__ z&?-|uKym>sSNn%xW#`jk`M+SpXUE93s~=8p`I*W^;7sL0usl=wF*sAX*lnsnrF;5P zaLUvrV0p^aWnjyc4jI_`tu6sChbwmt3@lH;E5O>TGq61IuCz=UNTq?V1gA`01vaQ< z>Kb}KmMP^|)0A1J#PyX>o&eW^-BCF=*MYNp)b*FC>me$$eQp3NGcl`&JhNQ@&hp*} zmS?s%gR{Ihf#vEZko>LSl&M?5^8PY)JH$W+yp3L-GIa;oGG#EGI#`;&ii$Y~K-F@J@XxVgfZQ}TTaQ(0@ge$9;sR!U$ZuJ?v2&}IE znc+c5-PuEMW%aVN7#?Tp?d~x$6L<5)qDf*{rE`JKOT$?z5 z23$|wQkF)2%>h>b> z=jne(Gk|g@=Ub=Wx8+Xm1@QmToZPqpoPT$%0*&JI58!%2e}pTWQA_AWcoI^dv3~-q z3;#2??&~jbWvvH6=kHRzgdol|$l#a3>N2~(f(@WNv-=xZd1m)_u(QjZMy^eKz5-4{ zm1p!-1PV{9&F&xYB&0rrUjwU4p?V#x%;4VO{|Pqua3W1YTFFVA;@|9~IHMu(15Z$fls0AC#SX%WjL1|n15y5stoG6F5|@+3AAoR#o>n_S%l0(}ge zTduF(kt-f|75QwM94pdW9`9cys8I*;=wz@vut?(w}1 zt95{*YpQiys#5Z__bIrt`b~2^c<#{Z<92SgVbBs0d6#Q%k*Oxkz^BtwK{=55UAg+W# z9I!c9BOROy-vX?Sa&6rE+v@w1;>t6|-$yW_=aJ7;^3EJ9TXP;M0}u(YfQSIQ6Qt19Elw z&3Q+#GJbQeT4!Sd#E)k~;Qv!NO!0AVHyV7Wh;bBirgz)#XUMqM^-7uYfN4cLuLc`nysU}Y112Ft+15oDmVm1|hPhF;68 z%n@P&#c2;*S)N+eDXY(XBaTVNAL)30s_1`@G!=oux&vkHr7F|GS*dAYc~)u$I4gA& zSe`&L!PzrMgXP&X)hvjcMu!g6AOUBCm1on)6Ywiw?Xzd(33v=xKiM<#?3rW1#&pjd zZ2YUW%}s6kG{IpCaz zB=v1%%CqBp!OH4CD}M)`aR!xZSHF}SvpRC#HCC-_eJ%zGavnG@eJ%j|S;7OYa=Gtx zzEAHvo%87}F!GH5ftXX3B!383RzFqBOyB~qzYcU12R{;R{;fgRA<#%g{-e)X@39qf4JW_KsOAG1?;hqX+h z=|K7&-34B$mek$wHEK!S1J_1*lDZe1r2HnGr0zqmE=k=FcD(X;N;R-Q0Tao?&O)#m zx>gP_IcKnlJ~Pnf0h%%=AU3F_<{@xO&4XaMI!n#N;FOxh8$cA~4*dxDGK?IwnC1mb NfJFw?wp1Q2NxAh5O@lM0+CVsl+Xkp~0iY+QT2LJ+~K#u?rumN?PO(70aeq|B)SppM)%J(697|h5MuaE6~ zljR-59Nh4?9P>0fWOPIzF%Dn&(}q}VNSK1h6%%H#DVD3$+H?d?txICt61rPhPK4HJmllY&qFS(ormlMu6Zr4mKzeZr)0kb7(W1fN(< zu~rsxX5T-ou*c{*KQO%K@Auk^HTR11lkszHv;F=PA30`FE)1V_ETY0F*H@IUqkrO~ zyNYW){S%{Q^bO$sGVC|7Lmi1gLyq#u2Gw1N&wp7RwohE$281}~Slb9?haH%~L>L{I z3Gp{3e$pX%*h|4}UcB984m%9OL>NYG6L#~7(9&k^ajaq|LsCZ}Tab4KUR$78YfnMQ z`l5P1zMAW0leTqnne z0W+3uP)I;t4~=P!g`Wc!EtY^6GnoK!lbvDP)W@Qk|8hmR8Q4)GOov)zmj+#CMf|jh zpC`0E53DSkTVT}Dl(iV)H{zX8XEGm41F8~BwG*Cc%uS^XF?-UcDs!_|9=^~feoiwkga1-2R@dZLmKPt2Vj6(`x z{w8qNhBY5@+VJL1;$(mrfmv1)gB1D*%7}VsBv6J#4A=;5D0yafjg^J3<5M!Lc>$R| zD@3W!t_F4`oln&Szm0PONIUvIDK>x2f z)Z8dTb^^DbZ6BigaMKhat~Le+NmJH=WFaY%H`t(OiDu^uDKb+NFZH`{MQLCxGN5cV zq&{|vPqp_T`HDmhiI1HQ7zcc>NT%6g>mf=-DYqlej$9y!SKq=lVpr2Mcsk#I=P_$E zvkrjVQB<*bhdLdmP{4XP@+KpD+94u+PP6^&l#uPjh{AraP zmDy1(B~FgMWC|Xga%|+qLf<%c^41%S_H7%d>WkQ`sY>|7Ci-5;j7FqNR=eS_4k1ft zqLp?GyGSTmWm)Nr1t-{$J#h#$vuUYHrW2PKwdOreSkBi}P=nB@YpVHAqbJ4R$9S?= zp7$s7-;5j!MvVk{KqMVXtYGbyYV=Z{p$mDmaj-}_oJg=5r4<(GWEb|x-=LR{B-XOJ zrIlx;H@j?;A)-ST>BountRZRDp!CzO_S^DsKfrqCuu2yu?tOFK34hkdp&?XA*RVcs zGSS&G(Fpi2;x7cQLE|K_aLsF8Gze8RW*WPp&t1S0_sadV*qoGf!n)@Lr_=cod0@Oe zg-7)3wtt>q%!8d*KWpQ{y~6xU@Vqg|ec!TSDKWK`;_j1A&DxB)*7v%JbVkF;*;mZU z5n3^m9k^$=TEFMF7z-&5VhBKTHuJ(SwUHAf%kU$=3^%U!7nR~pc+tn02?16gU#@5| z$^t=cOQBe*Az78EFA%F&FAEVxebnm57xlRbp!7^+Q1h#smn?baUjp2}>L}bamD5qE zoT{ct{aP%QQ3`WH$BGRljrnG-fI4?hjs&${InG;d0h_7NGkDE2>V}iT)%Vr8CBIu5 z5d807zAbC*hxOQ#QP`bVk7b>nwMpwE@13gY>?UnRvORGNk_6kh%>+?TxQtqEiaWU6 zYseOv(EY-nr$*)tlElO1ZD@8s;{YXXHC0Bi)3N4EtMe)A z+Aoi`IWL%P`qKl{o+OZL z;y^H21KGvPo@jNdLfKyCW}AYYT}UeXMWl96)lMB&1*@GQ)A2a;Tf3=hzoz~HsVRG$ zw(Q@AEnMtj&goTxksAmsOJUDTW|=ek0RefVBd@JBbDuDN=PkKx_>$>}U4BRJ2Jz|b zOBHhRhE-vZrQlR;jIBSH$m7?;%bf`nMTsm$zT#}z!hSo)n3-P?9Zauoho(to)^I9& z$}2aQeG4wnfO2BU85k(nuD#JY^j(_|o!D_WI-YMcguZAw&(;+b+Tr9 z_sN;G;RC(@aHA3G4JT(1VcB@WytoRix z(9d3vb1OSX=^ave`#gt9&7FrJ0~N|YWWR#sh`iK?dG$0_mzfztwTyaQTh%$r(VME# z5zk&yqt6QIo4RTL0Ld3kdq=04*QSxW!o(2h&CyYEYg>+TY)&=i_dH8#wm;H$YtDWZ zSuc9v9ecq1unMg!%P@pc?~ac7w)%3Esy)@OZ>dS?SjzqXEFwgM#wMK|WIN1+>38a5|FZwoiPw`yR2zT5FpA+e#bCLN;MBj|z~|rDGW0?K delta 2961 zcmbtWdsGu=7N5!FF?qox;V}W!37{KAA#Xqt5`v0RXj%nBQ9D3EYyl%~Ij)C2c@QB; zQc?nl*1GXnyH=?i)OMxY^#lP)DXpm}B1^lVtn0RP7ipJ*b@%K963^M5vw!XUG5PK{ z_wl>GdvCsPTJ~7R&(dY8fuB7fA(waYe+Q?&lao3e(K^n)%zm)l_~U`svvgaxTR8xL z0~DZ`3;=!^Ak-1ULQm{xsy$x|SO8FP1*FqRj1m!G zUR9-;0k3GO6hF2Ngd^?lT|1YFTjdMdc{C5)q=A;ZjcjZfNVfQU=tstwz4Fz+B`a%~)hY>KfpM?7HHg3Cd6b`xpm;0F_8Ne%N6&pK>cmqTe~noID#wG7&@A9A3?DPP{4c!Sg`Nix z61(2S00ch#zXh=fK*-S4%8AKk^=$Y})K~-+NBaT$EPlY3alodbqpW1|0zh@H<}wXj z-YgQ@%XYiCxac!ujH*9#>u$;v5J!dmRqyG`7|jH@XX&8UlIpR7BqN6Eqyla?V(Tl< z1n!tZM8!sbH?`-ql9R}*#3vEp+&pAks#_aR_=Ks_`0KV7lxRVib_yj5UhQoZYr$;V zJA_~`){Hia#}Qf@NWH?+a9d++q;C@ViIpe2frU7KIc-2qe)@(cS6Iw)7K#cX}dHmHk%W_%8#@IoijDC`+T+#L-K2gTa;q885 z_mU9NeiDDU=CO-({~99~+RTaq$UVTY{SMNE%N=)h_Pfd^19yAy!=Ko9(I%rD8Nt~j z*-cT;_aEBtVoff&W2D?ZqHT)R1ogjVj|iJQa^#+#eNWe+zPv?&__HM2H&u9*8oIRq}z%KK+kKrLuEc(6>Iy=$iN+WzvkV+fk zNVn1JE#?n|qXqT9U@!!ss95Rm;HHd`5*caE?}G!8iMeqOJ(7@r$tJt~otp85@99|U8q=lrC%PoxvETyHJ}9qo+ID30?re`x`PN7I9tBPK(~ z;HQ4yxi8KkrMI+maA_0!4!)1?5(?>ALA5A>f%7%2d6m$+K;i_jGPmSTC6mJVMCiEI zjjH^TZCOdJb#1#Sc_VdmS5M`$BlS4b{uFgY9YmiEOZ?74p^}C zrN3VHyIVWRf#vy64K{K_pQ*Nt{+@}k$>9}5m4TYcZ-gnXan}*(TN1u4+&i_#OF%34 z(`d`7v80=q^N+l{~bpHFiF57t# z$O&Y-HkI!D3GVYJdkp3Uk~Lh+^=4(5CC{_fc*%jv-ge3snw2?zwo$1mP`0O*oUHhA zC&6|~=6znP{2XhlWE3)@C~-SW(!9 zrI($ms~y5<3%XR+Y*)2ts>G7&9UzO0%@j&)1q{vWaL+)B#M*rDRb<7*%kv%KzJW{I zh_>@z2z*1fYhhWc4y13uYD|y7X2sxJ&Flt4EsGvGKfVDdJS>pz>iN`TRYAm|fUkE- z9&kH{+>(SVlE>A{-11W_8!97CZ}6rxL7k}j3|==ZS$8(^9#}{j3>u{mOCE7QvCT-5 zzL!iBMsK)+MpQ*oU7~6T7W1Uyhlzg$3mZRMGBTieF;DspcgQA^635W0L95LQ zX>X$Hb6CDZDvwM0Em&y(V=z?N4(Z>y<2JoC`H=L7YM%R_)}T?nA{k0jeF3LBrKu%J zTfo8>pC|R2Q##9?wz;Jnu1M#r!`<>OmLZsxVuLzKbqn4&EZulEX&+da7XXOjRm0Lb z?laqrG~;{eGs!kR`>L=^a-y+xRoe;egJIVHC%qDI6IS7{g(QQ4`W;` HaR2@fw5$C> diff --git a/piet-gpu/shader/gen/draw_reduce.dxil b/piet-gpu/shader/gen/draw_reduce.dxil index c101fc8c7f1997dba7d8584f3ec61030fb5e0059..be69aad2781f64f94fa4a1d109e7b980e3bf0fdb 100644 GIT binary patch delta 860 zcmZ8gU1$_n6h5<=nZ22vB-_N%yIF-9B$|YfY}Sq2+Er(gRHA5DVGSD*ytA6LlpuMq zSZzvX|Kbv}vKlp7O*_GeTj;}T0?lqzCMqH*Ws~}_C^msoA3~v(zWC6G&eV#~`*1kl zch1N8&b{2MG%dM0!|ffbTfW}^Q&UR#}L`s%8> zLvZfg_P8^=d+!AVK!AZd8-RtuY-pGqd6IwHJ>I~b zmiWH?bYBoLU2;LrrlB}V>47p?Wlb%d#HM(Bk=v)b*@xmLH1P?ELBL(z-kVYhGIQB| zUg<@jnt8k!s7d_(jR=eM)c0GPI%l0r#@?Ul0hFzvLzF>JiCweF8~J@s-{&#ts-ekq z&%lFRRP?4_1P@{I#rQgAGeyRCGE&Wf4c@?n*S{?v$o}Z_7xE2?eP-D)W*gR`8wYV) zBm*L1yKx|P^T0;79Aac8Mftw`t}Q~W!XsDxaw~q*Apd`Alh+^BLTSlXZw~1m;aF%E{EdVWQiD zS_{sCbH!ou$Kjs-Ei`80?p7I`biBUmC(-eY_#_x{+X5)=ke}u32NSiQ2`4n+hiK1h zS*QxVmOrQ}ZQ(rl78ul5Toxk}5|32jtBOU;Qqg2YLanB)1pir;;m; zVKOHL8Smvjag-z%)~Z58H3 zkcz~*h4goHsbCMZ9A;?yPV#G&29hzvmwyfDHWr delta 906 zcmZ9LQA}G^7{|YRxxKx;_qG(T0 z|8xF1-*-;VtUYhH_J{g>@V`M|&R?_M*FJuK__^ZMq4$1Ce)Z1K3-#wJ{h_1JW*Gnx zG~gfsY7BV05ho<<_xVcS+j)R}a~;gDgs#)|#NO;a{oRSlHujv|a68cWRDezgBx;sR zZ8ETJg|Cl*Iflb;BLLszI_r;{sOonvLD%7(cakOwHdQ0%<9$9bQNpImbG}}zZIpIe z7_Lx+>R`sdjMxt-Sw*?fD*6H=!hQ}MjK+K6(L<{va_*Rk5~l^&P!cvpPtA;ME98o_ za=E-5`*!Dpf#A-7yM_5oeNe1?cxUy}&7xn|mvIcgOGtmjvnPN49| zL-%Z9>(-Re=2L#!f4uvT;6Zn=+uc(58HcBe>vw+i-n>*?U-j#HVQz~XBjc$=^Bbru zs)E4K6R5ZR>9J;72eMclBRbED;y6OU$cY5aqOo218vP6!T-(kNq$HsJn5*Iau4yA! zQbf?>(Y$oJnPza>c$N5s+B($-yqkL2NbiyIGy3Zp|7B0z73p4B7IUOgTjk{h8<${N zKzT4%Il3LR6>NFP&vWO=b~S11@PLs@K3)Hfz-@?~c2I0`FO%dv8zt?D)ZSB;HzmuJ zMDUu&q96R-)6h`qXIF zV*0_JVQ(n%XF806=-otP#-hBKPs@xe1IgNSk)cOV=NM*G6+g=1KB+RVB-oO0SFAO? z3voN9-D)2;4b50^SxzKH>Y^rEtVnALfx3mmQQ&P_gs<}ppy&8yBkw-@u%@(+47UAM z9!H({nU+$9d6o4#%|(?f7FdgPYhu@*G8A)c!r8Qt;}%M6OQrm;KWkVhuuHAZ1B(T2 eagFVo|8L>CVX?##j~%*E;%-#gIf}ub2;grG!X&-` diff --git a/piet-gpu/shader/gen/draw_root.dxil b/piet-gpu/shader/gen/draw_root.dxil index 873fa29da1ffca54fc149ad5b31f42a47e79f5ba..4ea23f7699a63ef9a4dcac871e1f234a5ea5b0ef 100644 GIT binary patch delta 855 zcmeyO^hHU;CBn%$@SV$#EerNKbX_obPUV2Oj~3jKcxEhuNBDXk8x!!k@XZ8?}k49na8O z%iFh>C*?(IMk;HImxe}$^9Emm4H<_Q6y9x9{Je|z@b}Wt8#mrW-io<&;?$iN%UB#m zjcmks8|~b=#eBOa{v zN0Xxoo2XF4Vot_@!`qeFIDl#dW-@MLxtZa_FlFM~4o0R11qqkT4VH}NvjiBXrdTI6 zFeq@a2})N4ewggV7w!Axfg6ti1Is~$Q_ZFyn9L-6IU1y1CAdSx9xVSOHH zffxgZ!)!;?cn)k}T*%98kmHbYgh4`--#{Xdf&+7IY7wG zbBc@S17}v9jZpEEEglb?6NK74XS8@eapqKkiJv{<@x-}6=$Pl6Bc3mud6i(|7g>PZ z1|c@D1uR}~nmvsfn;*74P&tq=&C!#Ir-P9d7<0S#;vX<-+*{mPNO>!Y_9?*;~ylfhDUn^&;2F!AXbSc}h?%YEh? ztK266hROGs;wDRQNyzdY4G?P$Xkck^XIRn8AfP9(%a}nVje((3f`P$nvM-l!y)esx zn+6J;hK@V5n1op#%v3PB+}0W)(Ig>tys&}y;O0gK1%??4A}pQTHkfSZWODNrV901< zaFJ%qmM{}w35}KJ%4AaNah@eDJAb>`c9R|EyNt}OCKzrpuyhN~%q_@CRdCjK*49=C zadLE&X3A@7@RV*_``L`=*7ID60}MQ|d$e+L%5wEGQ&RIvf}0j_NzUtzop~qb*6iC; z8Zw;Po+pVPOAA*sS2cJw;o;6R3tJ~nJ!!gk;?#}4oRf@FD)fp{i}Op1l2eO=nO5*g z&hwrrJvT<~O7tx0yLYBAFmDcHHDaO%1 znJiH8T35n+YC#QiRznCUgEAW@LsPdXmT`R6A=no%*hzg zyj_`%1E@w|=E1EjH!_?UrZm3oU}S1gkZ{RdV0qANmH@-l6zil01_cf_LFtmf2a~<{ zqJ1AdaN`kRU^%F8irMr-lc|I+M}yRh1b2woqh>RX<^#NL!b}Tz%x?%XvoJUsbo^uB zJE73ge6fXL#S`ALBMgiV0z68AGn$P#UQGVP=d4r9z_yvkNuWnU0_eO(Meec@6^}UaL@M?9W5CkVB9&S>#`;moT96Tip;sm~rajO9=RrV&;lzqbX(8() z_BLe|e3T!QfI#pO9*7-81Ok3W05}=+z<~D|nTAK9;CWsoIJ}(G8T)}`KIfF0fLspDH8imVD;!+z(sCnxOpw*!#WO7F5K?d<>WyKzU6fGJj zZzA1_n2bs0kui&Ds==Err1En}1BQ#pZK?S7;%xGdHh{jXL=trZG*>Jspq50Zd)rLX zih$piQiG3NNV$;IntCZAHNBeE!km`<@#=M=l9i0ishM$cj0G87E)A7f$EcT`7ep%( zISLXwpaB_JLcPl&S)`c^ajDxwENz=lWUX1)=Sm^l>$Z z^1j-1$cH?;)4pt|&6+-RbKnFfJ2>IP{nJ{QXA>@-NIjeILkx1hl6ZUOnUa^M=d5ok zSwBm64|I;%biC%4Vq4y(i)WP^AT0{3K) z49olVMdwp7c=*@heSf89BjgO#KW$Xx!9TBT1)W3j$}OqcoL?G`Y!YJP)*t>7NIKrX z1i(MM^1L)PoALGfxe6myHvRsZw>v4X^n?A6sj}b9$6k2C1Hbu_s-{R8xMPp0NSvJE zHLguk8h9xIZ~06xob8_-Mk@F5h4?ehE^)KRYl$1d&Zk1w%JkRqMf=j^f z86mZ@$-VNpnolT_Aik$%s;y0hgrTL+M4r3j1~O*$i#$AdKGSOKlnoMYh<|#?UrcY$ z;1scTM}^l?_T6?JlG|WJ5NYJhK%zm13>|;qf`^P9bLlSyN82xWJ9L)5Cp*LigZBR@ z*Z`VEgQ2VzXTJ>R0|h+9mr~IZ1CT4UdyncY0wO1a?w_va<_^K5n2a@#eCAAuuB`A{ zEAolbTLdIte}sQ}6L*%jhU-iB$A1L|fZ{#}>OGQ1+QG=QmMnJ7X5Vn=?`mu`(x>K# zFWa{hvw-Q7UMPj_vwUt-c?@Q9joo*G#3!a3z1%kiI&iK+mQx|4MU+S(?^(oJk_CCx zbQ4;I*o(wKJgmUtvA7yd`GDq!UWqPMw@7{BX!YE$p_SxaNb1m%Klp!b+N+rmvMV#~ zM!bxb$A|U~VHW6omLTUET!t;;`ZklTrs52QAH|pHQe=sww>%y*jdhTWpatiLPX4LT zD#bkzAX3unrP7E6v z4&i`LTHeiEs?D_fAIBqkv)|A87;@sDCmRp&f?>L-EK&9;>nYELQz2Bi~>y zNcMIQHfZX0bA za=JyB!8t-vpb=bVTA3mVwTH?|Ly$2F^oMi8nRw6W(K=3-xjcd$6*0`>R%`HbaX!wL zSjdN6;Onim*}6M2x}+@gxPclDVZj_kO*=|3z~xvNH2>ii1HU!YMq<{jEUx1x$%xWE zrS?Muf1h!&{NHHdjGJSa2Mg^BtcklsnYaAYg4{ zAN>hMeM4nL0(J)dBM9LxE-9_Xzpd5)e;`BBg+{8s$8f;-%!OH&aL(5*ljN?Ljyi80 z6#2fD(Sf@#h={9lRyUzPaa>7kRZEV=U*b}~0Vl?6(BF|w;$+PL?!=)Y4tfZ!{-#w< zEs4AQcWXFPT$93lPwU-&!(e~QQn_7t+QzVBl!i{=!pr#9Z0uzVB{(33fr9zedt;Zq6PT9>NQ0cZ z1(O4mky53SonBk-gr_F}YTN)svv|^tyMbYuFq=Ld%dG0*@^lC)};PZ<|ygJKRgk{)S)c(SoF}EeU7)lZRQWR2mPG zSmIh!>E3k8bD?Xo3-rE5w(E!Ly&qES?jIK4@eQ4!3@?q8zo-=zunBznD#5n5Cn#|==cE`G5 zR%o^Nx(*#jdRvFQ6IqFXfhQ6G(*{gM zMMZ#Vb5;V@c6ardx&dpoHvu`dzV(y?U~;*80SS4NrYRBu3AM3+(8f8X9V>w4_@6-9 z?1m!#K17Vqw17s#ic%8-3Cx_X`~1KHJijO@6v(vWt@08k%6#gdJWguDvYdVj0NPp4r5FQr91oTPmPU|w;OQ2FbJnn+K%AzTvFM+xT zflkucl5;5&TR#9(xefH?O}KF9fM;(om}fDf2w&Z{`x_adG&_e#1}4`Yr|F;Vl}bvo zeNkF@{T-iPD9#!%`PeF*{P3Lw19Y4DoN#;>^EvLg8WAu`r6i27GFA)-O`V;q(!#bJ z=%ekT@{1hG@T$51c4mGy`$WB3tmX!cTIC5Ndc;bbaM#UFGit9)Gc!J%?pXkVeah)h zR_f_pHUZqBAG7&6Ao%Rp5kV0rA#>n%syaiL%F=sHCA@BNdCG49}`Xz1UcL@M+t2 z{`g8>8kc*$u$&;7?4y&l?u?PK!|UZZYf8C47Hw*==LkXL+vDPj6`OP z&op~{lDYwN4wL&UDVFQ8rC~w?h@#HaGd1t1X@)U+C)h+OWgsj%Q_o+0V&2W=&w}?* zEZyizw=b5?3iE6D3D~;!Y>sPR4xV~)KHj4#3GAWw)NYY~<()3iFvh_mx!W;-4^Ekt zzVWAdj67K8&%th)zbEBQ@w@AkxiZf!GyT{3M2}Wj=HA_InHT0<%*r%Qfn|O!ajSXJ zcj`l)%_;J6j9cXJtGsl-@}Gg4L92#VcZ5iah8A~(tQ`8NBV@(Uw2lzrP)x^X=<-=p%LtZ-iC2M}z+uEP_pl zo**efof4hS<4M_f@{vXO8E6&@MS_757? z^QE_4+s2HU^N}$-#}iWI!bubA_IR9%aQ87(9g%xNfvjSfi_uLlpVbz`ay<4G#_TIR z37AHmO_tv&=Tc`glzlk)H_8oZ^5XA6{9^R3b`0fC8nqssGolxMGa$T~ZZu2$Hp?|J zFEDi412CD^8ga>%Nu%!FbkVeAS2JT#y1O}l(Fe&^?c5Q);K+awJ)>*Y_|CfT%E(E9 zYm>eD#@*a^6?IDBj#?*Z(8&jb%)BpeU_rnFyL^=2$P|5HwT5z7?4Pds__NBk4D9D4 zr{)z5GUiBT5?z#dOHqj9GhiZ&CXy?6NNNuGT%8;Ej>=j|T|s6WtTU`A6d%R?eak{n0Z6Ld9~osr=ue znq8*a_m1Fh`v2|IY*bqIXh_E*>9p)T_XZ*@*h+G38s&9xQ{vn}RMxE|ICjiVCC*QV zqe;e&BO_B2!O{HVf4+b~=pdLZAau;FC!G`$|Q5UGD!e;y% zm{G|hEBO1QL?~9tW1#)KI`~Hwi}x1jEXJn%{DMLL!bvIdrOY_La1eGR&bX6B6&}xA za=su##)@YS2<4%48gY~==v;La{b`T&oCVojOk6T3U6kxUxzrFxt8vGxENc70xJr#j zg}8}4AiM#h77pyT#{Y?0Ad#d{U==y-aJfUn1e%xo5~VWk0V7BO?JNn%0i)3gxSQV= z62UGntLh8sHtbcb7|-gyPYKu8q4X7AXZ98Wclu@iIl)oI+S37gi}Ad*2?R{n{-^H4 z^Ed<%y7$dFJ9{A7>pXUfeZRP55R75@%&s?Hw+Y)N##R5#jll67AoBk6 zc-nY#=KZ03S30or{!8_EQ=+K2Z|`QdnLu_@*=5H0)o3U&QEnYJp8iqg(xJ5HeRK1R z_suP&+p2Kow8A>)N8pMVvC%)Mc$aH9<5xuQ;`d^Gk~a_h~y}%j{c@#RJwj z7ImagWw-}cw{XjOu)5Lwbq-=o@@QS*ni1C1y92^oC%eq@j}@9}C!^?|^GR&~(+9=z z1<4?CJC70bH@#ixY8Gx9%XT$4ZhDtuHLM+x=#Tya=-lk4^S*D;7u}KXu&2AZ@38O1 z|10t>8MLjMW(Le0%m3p6px2QX^|bg>d*b1*(9YoIBnUX{t`g+H;qia+If_!O#4C<^ zzm&~W6q6V!lDY?cVy|JVf@wg<`t@@BtbD(a`DDd>9eR27BZ2o*qbe6<03)ohJ-6eH z)2&>i*^9v5%(Jt8-=m4yFV4R>AnYL16P1oSmTA>KTS!xQ>S)2D^&|Qpd%?V}8O_S= z3Jqxt0!CypF7|`WSFtDV8hBt{?7wVml+e_djM2ihQEGdanL3C4nq}8(WSZ?naXv9* zw2u1W2#dAlzlJc^g|a({!>l5ehO|dqQVqt4pwoOD)@p-UiD93^{N+06?D*H-!*rAT zh|evR~aSF%1qy?ZAUropH+{aRiJ`hCqGtM<3vI!Gf(8* zPHCv#XiUAuz>3Jk9Gmf`$K!ZKkcrEW-;W*Ee~cS+muB8vJ$%dK9A+caBEWracw(eE zwji+Wl#_P2HxIiB^nS(d2E8+bEnEpd;ScOEYvKFuJKo+4=>grpfV-DUpUQ$7w=zNZ zGrUcpyU2Sp=&ow(0o~iD_j37Zsf1yD!Ta9WcP|eA2zvj;44UWp!9?*n;#bvJqbMtO zXqmkyVMrE&xxeWGneMqz<|=(jL<4lkyJdX-M6h~%Zz!F}_0F1ZK=Zsu_-Ta`O;a32 z0b^2cd7o+#`LgZMO1pxMQx9zmW&4{@+g*oFgwZzEJwQHtO@F$^4!^#w^ z1Nw#CWI8Q&)XD<^c@{e*!mbYmxjSB5gnubWI~KXQ864Y|$p5O^03NnAJJvIwL&w)M zZx*4(284-}vJw3O@{Ca{qs*=sjDZ|(6lh)#beIOVw2c|;lzO_jgfj!UD**IO8W2j_ z=K?K{RNr=;_O;Y8KEvO=MHeEJvI1Ac2&>{!p+doXvYDZLb_#z)kF2>ZDxa(4^FRi{ zz9Z)jlX4v8p3sdi74txjshF4T@0dgn|9Y%Ov%BK&F7vAmD$R?>Wcriqq3rvTFC`TE zzpr0bY2H0vZZbnxt2G`w#A>VOXo0lTOt=OfE5*d5DhIU~6rC*8e&Kv?$j<7*q8)Xw z6iBg}m=EevV!J*eZKtCE0e(v4fCd?|>%(?yD3#)pT(Ayn0X2V-YDmpUdJTvZNhWhc zD6oV~T#|;Cj9NtyIOaHQT#{=`1WfyZc7{U#jr0ppRJj%uF~^H7WyGf}s_V4$%q$I2 zxWlA`_?$%r%VR7K|Ij9Ep@i634{+WN>eeO!gC6dkLL3kd+);wjETmDZmNug2rVa?Z z$n-p=qaZ=z3|cNnF7D!lpkEhgT*ty?33X<}ba!3k(*zxOdf=+2dDRhLvZNY=lEH4L z{LT%znR?4cI03j>i`^Oxs2y9M0Vo*qM<{UqM!u)){En|o=DXp*pi1%3^!{-6{ScrL zZcvh#$M4_vvpb^jjJZ+ke4`{vW%>L6u;DQMI?@Ws^a)F_*zA#a{s`8R_p} zm2^O*y|_w~S&|U}boxfzoi!(dogL=XX1@L(wjy+1I6?u5mK0rBU7_#TzoWah)tOBnRay8|mVJ-P3d=NvE| zY>UyIkV*(mF^#CTgi{FYX~RKOoD>_Yn_AnN3R*omIUJl}#MCx95kb3}Ty1QzmCqdml1=CJxNr;2}+4BlmdP~)5u=JTzn=<)7Y>XS$hgEAH&N}0)*q+1u z$qdrRT|5S~?~~X)jUv+-dlhg=X?r^6hq$Xg^rfiLy5_N2;Ob^DF;C~!PO*)|wSg_v z>rZlBt29qcBn$|jgiQg3U*M|7bl_;77lErPrd4MG_Dp#S`_k_^FWJ#s7ft3S4qG|p znGf9;_W!1Kk?B(7*Ffcl_Gv)n7m~IqK(Z7_re1Ez6RSN3g!YmDlHBQe17 z>BQQAHvmh2CBH_vt|i#Cgnn(aBqMF`6wm8TY_;MVVNl!LHjVsTI}RQL;J8<)>xAnJ ztj*9YPqUvQdo}UA#(>~*@L7+(PIztrST!{sE~jX4cE{OQVkvwmoUM-m`(z_?EYQQ-vPu7#9`wG=o;u31dF4bKvqEtcUn{j z#RU4f(;{J@wwOnDrA5`NtB)Be#2n)H0p!Q==nq;l`IQuU-Y!Rh++HuB!-t{1bFKsmjF{uSKM%Nzp17vp!AM{%pf3!(6lT6>swS_dQ1k0)7d^9lf7)1h5A}USu8G@iJlBZf!Z;sYAD3{|LzxN0jp!) zP+1l;1a=>i5ZI^_;mmp^<3VfxCFo3`de>mNS3R^LuywUNmR*9H15I8-SLW11qQEPw z`57SnD#Eg(y1m3Db;01Q;`BU^X5?u|LAI{ueTV!8a`{E?TH7fj!{tn}W@9rn)8$M# z*Pv^Ge&bg^zSN*Mfn#e#fQ#rl$OLu{@Bh@ioia7yxj@^-9F&os{;%(}j6FYdKz=#Y z7lH~uUis2#IFK2FQgS9cWzZXB6m*AGJHsCEl=i$cf+|*xAb;Ain%*I$*O%eY z67Anc5=TzT=%?UWRQv}F{K<$Se173RLj-Xl`dua1oFyL>G(ju;jx0vi)I~BY!54Ee z>1H%o&?v#Vz*lpD=jXQ-C))`(qkt6+&#w5e&L0{avltT1(r}kN397N zBl?Q+0b%k$_J}@kTli=J$+%lQv`7K6bx;r#kyMTXsma`t9NjPE`oNd(5>Qr88%v@7 z#$TowztSe|x=!j$Vf-R>-}aR%&XR5>Q;Eyb)2lQd34ZQ7`|MQCZ;0if&SQ&cIvxs+ ze3{_LryLNj!ps`cClVq@tpSyL#NAYzumeF)gn)-RgVCK!$cNCs1Bv6lB@wl*=UqTv zg}_P7c@S%+S=aHJjeLIXEO(_od^7qA>18sm2tTJ#+auc07h+pV#m^_>KgQt?5jkyF zDS;S{Ir(qUUHpT`B~W zX{YbeP=1J@hx<6FyA5U)hMgT@KMZ+#H{?D`yApSpF^&{fBD7%>1a(Q zP_{mY>Ezt76Joe;^{W>N%BcLBZ z;KqX003M1Y4eJ>6#Jefgnt-*mPIvM3Eoob7t}D|vJ~n&_s$tF)eBT(UEH$@8^cyhE z70{7pOL>28c}G)iV7g8!{nw2Su%hGFm;V4=m{#TmkXdTsj(6dX&r?9~+DtP@%*x9o zTcZ2^0xm&LE~Wm)C|77u!Ua^RNHpQq25mAF6~y-l3tC2E(>VC)G=oZp2H98s)S$uj zq6;$e&<0&}*Y3+Qa6yMvqE6Ru~+b z3WFWl0IYoM1c$)jn@;dH7+j!%!FDH@4TG&YF!(JDHqc@4{0IpU-ChNwi+985sXnmi zY#8003!?+^04)lK(X%39ty^I*c|Qyuae_aC!S*~DTn2*?2{1S#3I=z<;OGM|INBGM zyb=cM@?r2k7%V{~!stRLx(7x#I?-t`TDA&CJ76@VgVBr>7~Bbi6~Dq@&LLRz8W=2C z4}-0s0 zYp;!FbH(O>i~>PA>i_(p!8OhhaFsC4v97jySM}A&kzujKbX)bQjDk72a6AenLV2L7 z$S4$YjRJ5gA_3eNq#J(@0rz*dQ#>bP!zb-+in*;P>}{qJRp)7_v`iGym#ydyKn0ed zB9Ed`+h7!`sBR89^!)kd6HksM~}q?&D1W0mm`Q>#qocePSTXbz5( zxWE_fgG=a&AfzYq;S-*n>~&PaT626ZnNL~lJNqF{tWS=4V4*(Bo4ufXT|vv9kkd_j zj}QFB%WAl^jv4HQiXdQk$75setW%XicAU5+nJliM$ms+OIi6$B+sd$2qe%&uzGA0w zH>_K~x@<%Fs`ceeNvU|F_T=|JIqbK2RUcL@S;A|nP^pq}C2W4GdaYRmmDWJ?s`xy0 zqMhAp3Os?{otSckl1^{o2TI`{BjtNNk$z;wk(|J#hv<*%nJt2<--Mbk()6lykDkHl`2@4^tMe!=DXY;nCH zyuQ%nn>8}QckV5}>;=AsbA0E~Am98UsD1-a`Kg?@y8hieKH6^yr9LwNs#z4wLWT2g z!pd#u&b+9m4YW?%En*8%DkecL!KJPJ?pcb^W<5>^7wZ}OuBQm8|1B3a?ijnelp-8_ zxq2IT#3ZPkaax-G@^?o{*h1<7k!FhoS6=;gT8gl(@98#hkrs9KVv5jcJ~IzooF90d z2y1C}$(NUYf%%MmQ~$1}b?l7!={oYodiib>j3%gwnvCZTU!n=G|?>?RSC$idhB zEeG>o?fkay%DAqo&atNiXk3>R3<|}9JW(o<{(&G!p^UyDh|wnquy$HcUDYWNrh6E5vg;3avai$H|XOh51rEOv%)tSdelDcLqBMJO&dCp7s<&>=HV;6qv=P{`4AQ6 z1y%4TuH<`VypZOR>nS$Q_n7QfX&Dq7SkF&|L~U!2sUP26%w4iI)vMRH9d|QE8IXV6 zC(o2!>M^-UK8Z`G<%}+GIEVBW3Lv=ea=%ImQn#qTF$P#khM>Y4K3Y` zwhxpM*BJcGP=><`+jbA__hV;F6-DC+f<(>fQEeW}fHBcC$VMT4aVe=CPG+jM!is9I zGYLFk2 zR`Ah{f>A-9|CoXWZUwiA{rjl;8$iGNLq6jiyuV)iuLTFzlA>T@Sf3p0Ty@!w38 zD!r72+6WvkX{KA`o|}cO>C}l1lUQ{R7muomTdv7u`?DZC zaH6u&%x$<;hI{;o7c4ID2Of-C<{>HYwUms%`F`3o9V*sk5+>~r!u_IV^V}EKoxoj` zY}6~_O?I^Au1m?t&R~9_Rz9wNZRo#f*KP>$|3KA_0(ZiQQKkM`aUr#FT+7sDVO-2* zcwBw+>;aa3U$vKbLFm5_Z|g+aVqvq9`Cf8-5yP~?Wfu->gw_uPx98D(1I7hU)QVS8 z-73rX(;w*+aW0*Ww0DG2p!2e8ntkIsS5Ej7-Hyw8qTb8%__N8cmEiKX5*BNCB7YC! zZ?q7Mi(^kTdo)5tC~)C2rI;S{LXcU$5-^|w;nT$Foh)EpHbR?*gpx4Pm)hbL?N z*D%e|^$Q1C2ovvAyJdcYiMU47+(KmfzL$ASdus!3)kdA+A-rRs;WK#0Za}enyc@jv zdE7j|o?DA~lb5L*MIPHWrofw23;b=6tkqEMEZ|wJM?<*&iGu<1@ioOR8OUhSxfZ+V z^Le;GU5lN*WH7_%eo!1fzhV^LPsRN9Ujt>qXT0yk=N{lGImMol(FdFbO%`j#K5m5Q z6^CD^^Ozu$I{GDioZEwTeV$(>auKwQ^Qkc-CXWWlBOhZ=$kdZXRF{uyb57%?#E&~a z{<1#1Ce-zyKIb$ws))~I(4mnAs>O;)s?`O}k;PCQlH1-4AIw|$s~gq|9`{0k8H476 zwVp^e#-Nk(#13Uxu9>z`!Rkzz#KBJ!$R?9N?xaTGLOP6W`Jc4u8lA@mlv~f&Pd>n~ zT|`xXWC^GGkccDFas_SsJ?vVX6Rf*{jk<|{g8}iuu>y$?dqI3yh{OjO8pH<@kuXWi zDH0*v{kE$=hwY;At+pIv1PCsvI4rMEq@`gI_7~*Jt3Fb>%)apH3RMbl%iAF^DijZb z5|W}#2jbs(ySQYEANKCL)rS7q%Y>?#21_hPB;5HQq#be@cK>Hze5RiV{I{F{DQqT~ z1wkNcv|Y=+g>%`ki+mlo6*zTkIvaJu>n6;Rvyq52yErexwcyRgufy$iV(jrC6`Hx3 zebW^$liK4=uE3G{Yn-wX*x_GPGU~FLBP5eS1QwYBKJ#?3*nX(SQ?Sae(CikWvS+>Z zDE6$W_Sz7ikdOe_bRr()CPzi_nqt7r>{W7*oGf$h7`p)j8*T%p)%YB#Jp`DR`YB*7 zdvXdk0j#7V3XoGduS~cSyc}~eLc(MH-FJ`>aT+1?^m~Vn0?Vlu2Pgt*hm}h)C$joi z>M?cqXjmLD^MLW>s)gZO58bOk*$0f1McCwt9n*m>W|UN3KImfzI6~?P0@Fb}NK+6W zjdpvy^rBNu9H42&4XNcRX3K8ywE-X|oKPi;KT%!6lDtt;_wiY?_b|}@&lDY9x#`nAZ`@q zl+s&8B^NaRFcG&4zeUk!m|LQ?X`L-BZ9HaWza*op%Sx!N)zMh2#$JtMRn>;nBE_Lz z`u@nGVkemsFw(FwewYKT>X(picUdv@O5L<57K4-zas3TiS&S$EP;Jbeil*;ap~cn1PPHUCWmVI6`Yuox+S5BayWSw8(vTN5l=1-AjC#~xSG*o+;Rc)M(J9~VXMp% z!D|F78L5(qEGRT!K-%AP8qa(FSdtim`kl&v$L;%4ZpW{~yg|&nuoEvNQ-PD}eVX1Z zJ;K~2^uaj1B=u9uV73Qv)twK15Grd(2BWV>#i?m*3U9|V5{Loq&VtNf0dpV=_`w@ z5P)A0*V%`exMoi^`!J7ere(Y(|5Ao);+j(?@#U>|2Oo*K!-ijF{Sy4!F01b=rEbbI zEMw7qJU6*TCzBR1M(jrJZ~$kFxE}l0l)~e~@cmNvaQ6Q7rER^t`X$)EyR22C@0Hqo zBHirb^Og(mhg&V_=5jN0@edSrzI<*9dEv!-=*Ss9{L@C~F?!Loo6Jr)16C6x4WnU027QgrPbF)G`z0PB3|6QU zrkSi70#}$Z2uK=QgsP$tN@0Z<+ed>$c14*A&0Zu5hsi0p6LK8XP7a!w|M(iM+nc#6 zsA@4U&OCH&$*xIWT2Uo+2B` zp|^ugu5$ZYeRjBS>-Qjs<|n)t8E5b&dYnZ>di+6?yJSwDckY6KAA`u8yTvW@)4G_; zdAuA%=8{;q%z^wIj&IMHOyPT(X}#VQk1L4G8-Kag{B7RV@^pS2B68;`x5yiHanJHl z^MJ^*qlQxfJLC<9l7JochRFdtY7D=@JG6#V@bsKPNV%Uy^@y+3!xrQI8`S-_n zhGEGM=2h?r{`tcOaSf=WWv(bLzcoch)Yi8-3(7236`Jd=JMU+CoJe%#lz=d#pdfl}(rT-e->w zNh_YXhdC>r-5063x2#n=59lbPVU4cLI(+(6m23Fw_WR-M-%nM&k6P>D6Kk5yF^~Ni zNWdbFG~{*3t$X=g;nv}$1I&3l!-=FI%dK-lag#drFTW`<<b@s}40*fxs*3npo}D=!sm{dS8$PAD0PLq%eVs;fMq!g)E&k41W8&RFeZ`0G ztKj^+!Vi?v)s${}^nmsM-N&M~ygOp5ad6EpopB0qGpt5kr^uBDad~?^QvfSEeXG8o4gPh8<4hePz zFzpp!+Rfo^)4qECmD{whN!Q#%XY@6QF38~~YYxh!aO{XH%V3K7lUzgZq9^WQXwlQ8 zTo7uK9<#F^Ai;+ewCnaa-6DCDdH!Dy13;*mLQMyuW|m*kYPOU|wIX&@bATNy@(?@H z%CVv~>>|XDuMs=mnIi*e54<8gRNu%T8fAk|Fn)JhdoI_ z%X98jReGJhSZopC9~QN%2oiVxt72*1Al$%QN4TLm2e@$^a6<;TF>=72Rpi3r+pYjE z*DrZLZYnO`OL-m6puYs>cYNk1=9G(xbQiBVNlEVEw2Mh}sXY#4k&lKAP2coO(&Z*A z^iHLtyz^-MU)*pd{(|(2Qdc>!G$+Yj4#<}-N-A>QURIc7Civ9pXp`j`%+7Ca~h{yh{(Of*Ew}%S;nN7GvTzJ)6_2u-Aq%UR= zL|E3oRNRS9ghe%YBG z{jzT#iVmWtJlvbqg8H%CRW)q7dNF{saUG}{qNph`=pQevd45-PkVyfh%wsQ`BSl^3 zDv8(poIjU+k@(%*HHaSjZWQuN>?pZ0DOUNo4YT5h(5;kcM^fys4+qK3XW73Kn)kDB zYi0p!=z%o^bGbszVObRm^8x*Oucr3_Sl#1_i@gaa?)OXfesRm%u&7QKdn$rKnn&S2 zJ#knTla=HwxUgo}AbJ2I_t-Wf=gzZ2*N$S}oqy8DhjQrKJ462^!kz>4{IE_(xgO!B z=jNBcxH-AitKU7`KDGaUN4_R?baQtuHBK!EKBwlIHZr z%AlbCRnH0G;eZHom7Lpw7gA0nY3zzMYEGH@H_D_LEdmoILrD>$Lp?Z=9@A}=&w^fy zg0&h@zlMQ2V5Hl-M;+uVd!cto@E(jsQ$f{w%UYkqEhZ0fBpO{nx~$mqr73M=oo>^U z2*&g7x!mLdx=dh_pe3?Fso|M&`RK%bzJmIm2R#G0k*6rtSKCBC*`P*K;2FFw*AU zF8#V?dlL`ej(Y>5E^*34Svz?n3U4aI1-C9Ayc6HWpM<-)aT}E}y;9-Mw))2BmzT41 zn0RbZ6%{`t_?l9OFJn)tCB`AMH?hI&42Il-Ouvr~!bNgi6C>v*xaQwmE3*z1!nqHCbu28i)|AV zcA}n}f;+Yw@n%|Y&1RrQ*#tSz;=bS~M2ndUfk?hK#cRmm@ONur^@RTUaI^CA3Toq0 z)eiQDis>V8oaLBIJF$NrH@P8LCQY5`v@5m)H=^S0z>NqWYrgKIgNsM+z3BXQ$Mlhc znXit?T7yTytX4IsxA7zPHm_kr3Bw_|%Qag6u~VU&^EZjBv-e+6 z^LqC0P_s0`IG>yB(>s}Q#B&Jnxew2v-<-fru3^ZeU7jQM8Xw@LlfmX}1VrmsYMq?& z3?Oz1xXGgxGO5$^eZJjmngA8K!bMIbB!V?JTq)x?Tp?!q|C?-5BiJECFzDh+0*eSaydnBO7FlYyR*dO-@jYNb*>%sJmRYXn|8&?w7&2bl9WMLxv&o8Bk+ zYs4+dfsBiefet_9nd3~ur@NG9D%aoPt12RW-X zx;g2x+(3Ds#2f$~C)DUDC7>rXW@Bp=4tUTT(be9-A;aUcYNvgDhtQphdab-*wL5+7 zB0uH8uEaECRe%Rzd9=x$Sgw%Xi)J)e4=QxE)<9^OGb}RmxWeFdFxbsL248k1gh4M) z=gJR)WnQV#g^vX-v8w@lkle=+>Hcsf4s$qm8@Y$&y81O-y`9|CJkzyhRNOKKW1R#k zp)X<5g5drpz|FHFBB1uRn%%>9;!ku=!I^1KJ`V0o`Bp)^L@n6S0s7=KF(UyMtdK5?b{@LdA}2y8*jfRk1iB<28*@r{i(`Q-?XDT{>sMP8{$f z{oNR4ET9v9C*hZv^k}<4^p}A#WEdrYj$Jt8&k64|x}i*9RLt9MZ+mB1hWrJhlBF{m zs5E+y0aQZCNkFGPHCeKl44{+kmNmCOqfZj%{cn}{p{(OTr3*|FP-#!S2B_2xRJs%z zqV)^yR;@1utd;WfIE+vh-E8S(Px>`CaU--H6meQjRK zJ){ncRnOxlPwg9P{&st}n#(FK*X#q1+JK#gaHnW0;z?f5sqmGfIkIkIzhvOR)FBSy z{iZZ>X6wEyK;`tbqv`BP_om8UAd&}wWJAtXi?xHth2sA8KO|o>?gQ`9Vot+F#0_~f zNNOH;N!)+G2k;Il&tc*mXKY}rHj24Ubnu`)#W_K$=!xPSW||(8<~UR4)uQ)uMx8M8 z4mAPnAU|&4%pmDB%Ih@K>ur?0LVcb1L=V6NXB6Q2D7FdwMZhKiZWF`}0`NwZ*JzXm z`wg+zNPJ=hSl)o~T<5gGGkS3Ugjr)kj&PSJ20BXgaU-g@D*^q*iX)&98M~bn*k8gU zcdmz!+adv#K_WXFL`Exw1%(Gmj`;DjQb(hSiE{*u7CVr6W2af-z@w#Xg3C8Gs};xj zprtX^w&$EQ?SVWvuD-w_C^B%8sm!83j2^lZ-Nc9&%k<9atRz?~GV+o}{5w37?Vp&qd4|;-&J!KDg^VBML_+PA^M%gb)Eo!-|0339L z1Y>fv&Vr2=t9KYTIZQrm;MeLX`()Y$eUc3D4!E~4$7Y2H+~kZw17CGFhChRx4?{EB zOw<44Gc#*)ARvxTdMIpNwH)+LEHAPV&Ea3e72Pb9{1g}7H7fnG<$?vzro zPCFaeNJ$>AdFA7g?2EOI)20E7>XjRgYBaK_^e`uO`}wR@-p~k2)l2u zj#ePkcJ)a%2iAQU7gd*#l)6IE^Fq<{yQ1eM>i*xT`=hA)Jh48NZPEC?D)r5s<7Ka9cfGq6v$|{Q5vlLU;5YF~gKj6vxK)&>r`=3Dv+$cF{3@i4`Edn38OVsguL3W>G zZ-xHyX#{xoGRo*=5I!Ts7X$SRK#iW;DNV|1cG|y$LVOVbBCs$ci+f31Ba?=Nj1*8V z*UDO}e$L`9NiPdy{P{ox3bIUD%yMif*Pl6L_?WWSSx_^6fD4LpuWu-pWcZ=p?dBWQ z21-{F(?tQ)V}(?`kpiVZ(x6bs->zrxHO@sumkV(7pi+$ydEgu|X+k^cULk zOKtcO|6wpu##U=?YU!dM(61w5H|?I$v=`xqtjWH6~v-J%^&{I0GG=I)ek!_&m-vL3Yh`W=uKn zkw~T%HDPZ<{d58;C@(qk7m?lsC&Fq-Le3K2Bz5nCPEz)cW*c69n?!FyUr_} z8<^L@ijE(lzJq?HlassGfogOBnYal|thz!0IpZ;iUh>}cSm*y~B;HIW^g5c2upjh} zzT9J!URtg6#24@h_z%=~p*+D|h+X7BPG^50;jhqp)-iE!=`qOMr zKueDwzA)6NfKltfKcy8ywS*r<-}1aofzp87&;oTVHXFCLZB^bLxrlO!97Ah09W;fj zlI-!uj5eqcx;KUnM9|Av0IilH=!R+p+<|~YQV_6H2*6dpjDdp@@a-}1b_86YL%@S$ zU@ii-XCmM~5wMYgfER`V$#U~v1YKH-pyRv|(Lw~>orRzSJpsCmg`l&;5v@BAF!eJ8 zJUj;e2mue~AmCjH7!{9z1rZ4N76Ohugn%P`5Xq|$a9b_{J~#$WFv$VBXbgP^LAQ*d zQxUYX1VK9yG}MNmnaPOgE(EN8gn)TR5YcN8uy{QJw$DXGUqisg6a@VD7?Y0h+xAL2sX0rjAX2?6Dqb{f&qM*3JF%{{TW{KIs4e diff --git a/piet-gpu/shader/gen/kernel4_gray.dxil b/piet-gpu/shader/gen/kernel4_gray.dxil index a594d502fa09859dac2cb640e38b2553cc692204..046045f51c54cb050580f10b12ea5046b1b56a4f 100644 GIT binary patch delta 10526 zcmaKSdt8!d|34>h5JACHASozno+|L1TLfy_(u!OwJ8dYYR#tXgEs~QF8VgEobf)8D z%L=NsY#l&Iuyon7;_0zM=UQ{t(5I(itL=B)2>U+2-}m+U@dsSoysyvwxvuN;eqW#K zbJM)Bd1Fv+adwX5=ZVbKxzEBME@zn~Zy9fxD1Sy4SRdhTDOb_{tEPO22~j2N)+zX*^0Z zF^3`$n;{tjT}Vg|eu~{7G`6W;PE_r>yZFH3uwL5IeY9{JWs!FL@za;mGcPA=j(&Bt zDTI`8RHq}FsD-2-jE`AbWY`Il;wYhqK};vC)I-aSkzHS|Hg`iSG!PBSsFtTTx3!l) zdcG%b{`0(y7~I#IzE*ovN7u>gt+RZUv1QXXPuf&cwQ1S9k5R|{sfX{3Uqo5BZ^?by zLh8R75%D>SpQ9F}|2}_0#{Be*7{?&!+}L`r<0szrldbo=?&N|AoQe?1NoVi-7?Gb2xT@%h0T zA!b|B)!~dh#~Z_otwM~L9q^{r{jCyw`|a@F-!t+MGKTW6HVSh8uh+JK&XK-~%^7*z zN6kmp3o(hS4}T6U9q(TR;0)Z`&nYUh)A{xNClqPY)u}`Pe^Y$sWkz1N_1I3a5VM-T z^YUaBS;D{_dqzRxWDNf?Z&EYIE(*jfa689eJ4_Vbvu~5m`MG^%#JPW!3E&?YN--0T zI|LK*MaTrYe`beEkV)6t5>%9E1WqR1M_$51{(KOM~mZ+jo5Dg~sR+_+%l<8CeugkKgl?>OmyfMaiX97qpzmYg{9yPER}SIvw<5lFN9Th(to8SaDt(ygjj@%*_+iOoq3|+H0ZMx zbcd4Gf?GV*#lUuHS8_QGPlK4ypBijDGOz&~oFYa{$f+gFBUnh0A)T0ctiY^N2|VIM zb6RTSF%#MQNr;e6xqk*`8WOrlFp&ujL{wmWCwgg69<=Xboiu;Di59&|0tL(@G!na! zM=iIZ3PckU1M#sX-98I7Xz~ZNVDvKd2Gu1=U?S}xZzr^jvr$><`_NmR>2E8PqJGovk}4MAeP6oMZL92m?^Mli^B_f6MIVS z65Qhw#O17mZHx4`^hoj*GoorE*XJ^KX9+rL^A4naP=jA}G}H{`A_CC*2fUD8)Ix4bg^oHV4l{fVH*H;)=+w33jVv8dEI6-z$cMH-7~-Ve0( z$f@1JZ0-@V9F5?yGL7<7)E+T~orxe}E`hF*&*ep5a8L76*b$BJ6sPQzMctXI-ilby~rK2!RnxQyXhca7zD(18M_nTpEBlTe*LBV?of~W%{UQejp2cGLU!3S5KdeQV!w~j z9%`u=bJBm@ZZ&t3$NSB@-{M}Bp2;cJ+P1>M#zJiHYiF@={bG-0tExA@dYs2{x3UJa z#qhwCG(UIxnB~Vl!eT1MGNqTq1LReC~Lydi5{)G_i28;Id00{eXIclO=k)N}?4CRDGF`n3(r zF9EOyIr)p|eBg`(X2zC2<_F1u8Mgq@x_8n$c7us!0XCUGm3&kW#%E#!Zn~lhjb&gZ zHGqKK<4S*s0u!ns0MAnS>`T*6P7Tp3=R3qq4{gHt_u#598OeY)(``5pZN81oOr}Y9 z!L$iyd56*3vCE#<&4h<>d#9I)Yx|k4WJw1Y#k}=@n>vM$74_s}_^*mSs;n0H9f=yM54Us=WEgp!8Y z6-u=a(HoJy33D4Q9+_p58iJCsghekX4i>6vfNK~}9V4>yU`S!40< z7n*2BubVZHM1WB-u+T`mQvz}Vx z`v!!1q+jONA_tZhZf)z%juLSw6s^xvvB&G8$Ik;&o8DSmsfl&3R=q zJ|JFyBt`Orx<`M^80hf~C`_6l)D4!(x9Jsq5 z_#~DjtTLyMGkyTZau+zvZ~FG$K0ik|7-uD-+*i}K`;ZjBAupdm0&}iALo@u+B&o^^ zLg{1&?*%@E67AH9$13!)!}pSn&|TIb{`fA|Anv#dK^>)#lSkOuWdlNs%~p{awRvA3 zZ5KsQ?ou%8wNy?{VIJqyL6u0X;!#KKvg8p1Vwpp@>$c5`YLcp_Co$=M#SoyHJDz&| zp9OoGj@i#hp60-=XN2_EDp8daSa7C{P(88rd3J>-%x-<2>uvt@d0vHfHRZ&Lt0kJR ze^KzhF8948tv9{mT*&MOLhJV%CYWMI<#1vtBLsZM)AA0@ zoC`gsvfKHQZIoP!9491|5bo@2&_}0=754%N3YzQ_IY7tk=_C(cnlYJp-wXRfP60Gz+hYGxk7iZ5{$&YL?*1m#{&t~109g(Vi z>P0FN%~jlOvqBIj6z3e&q>#-fo{8rTgRk@d78VA|J~fj93NJ71z3v%lNKg+75S_t`d?7PY8ZVZbl5(s* zpJc4TT*S=y=d1*t&*l>$%|Jxmsb_w{QOh(_+?QYvCD#C1bf=zA0^{G$5zK`5P%Pc# z$+oXCs(;s-F-jTiDg+w#)Mr;B`jT2jFt`atD1iJg76Hrtd4o8)c7h&?vH3`#o7 zXB5CT?}vMBK2I%}6x`#sxvaozGwYv41fMIg&8FR6o7UN_xjCjuu+6n%ubbC{CO_dj z+$NvJdrgkKF31Y5y${?RUoo`2JwjYQw6Hy5+0aMr5oJSD+9QNR@$KISiqIkG8tCal zQOJw?pH$CTgq~62^KL^1Wut6vteqq()JM9stPU&m5htBL!Y|QO8GK-<;!^sXQ)?~^m{{i%i(YxF=G-JZ3{pjowgYeLR@OGBTDh}QxQ^&u=&}oms zV6HTXsy0s;^=_sMrlfhAMGLaL&C&%Qq}g?IMhqoK288Ho9Tj66Yv}cn(;uPSK##j|W)=n{Q!(UV&XcHh5%`p;V(L9~SM+QZ{^6*H(mG`}WM-;z7o2 z-E@3GxF!D5OsQL~qEx(EcFW<0f$&tZJ;@jfYgP{mXco&QYdd~Pwh(n<0Nx>Suh{Xg7e>_ZrtV)w>4eFl(P72+bR*Cp=GR;kyNpSHgP42O~@y$PZOERn5D#L z)tI>p%oW%r9oAh}_Nc?x#n3+x`xYXs9kVhBMQN~_6zw?j zc5(_hwSUam1Oe$tuyh0Im{V6A29f{^8vzms0l}z8Mh2r1t=4=B3@CR6~-($SQ zES%M)@bG%}s=u53FS{Vx>jF;t&{v`=2e^mvGpFI`O@~k}HdXvLH^RnpfawnhlW1ej zWezx=!T z1@W$ushCo#b-#s`zf6d`tmJrBPTb)Vuu%F zGf9YMiL1E1JQWdyQVEuwj<}2+faHA&-6N2T_Ypc7%@30^njfMvv@zV<-;@#)Bx&Je zS;Oil)vMZ3OKBi$z}tuglC-mr-==6|X2_-W(r?kJt}aLPSV}DTI^&a|!n3js4Pme! zr6ZwymI_}ztDrigh5P7|8270ezj@XjSK5c>=0sIiTQgIY+n|ELTrC9ZUBsB632{?iVE$rvc@5 zpCU@v|1sawT(|yRo~J2W|32MrTsa~(9DM}neBMjv(8K6fuks!C^EN||1hxKO%C~6H zQITZ@%zRh%^J74-Bf|%2NgJIP4{t@g#m!9+aM-OV&4=ajf6_U!LZtAO9}RdVohz>- zGSbD`#}Y!XaSNJlL?;A4`BtpK!blc}yl6<^{CrJ7VJ8bwE_rhD5d3*iv9iNMsb*%0Z+*;$0CYozA0&6Cp zcEY{+*!7_IYhEYlofF>86AO}m#tyUR|J~jC$AgG2(0wD^eS_q=bbRv`7U+JCzaDh2 z4%h^`E8Dt2ck|RL*IMGGIRt?rvotrSU#MzZR zB#pp4T>mYJ?l)iRIlPOjU(j9emk0#o!0HJCpezC}Aa|+}%?}t6WR{MzOmdY|-$?>w zeaZ!-?;Hn~IprLjYUqndPN)U7&2#9)nH=vrC+RQx^`{g#u;_3_H&!1pBt3%_vBs1A zm&0EC?u5OVcw#B=q8A;9xxDE|;Kgbw2c&H73Y<>hs97~m`!Ap2eKA+CKl)s0P!DJK zc33y_UV$b41%%Ok5Jt_|BYn8FlItLhNIUExjBsl;*Woa7N6{JVjRy6DoM#=@LDO#o zLUR@X9cNso3KB0=u%D=PPWRg!ruX483aY+EGzy?zb&^2K@AYStihd^PKK zIqKbjFoj$*V%SHTHfm?oI1MH5Kn^zwJUEQH zgW%kg@rH@{u3A6n*5~rMAjg!?%?ou+pfh*AYf$gr{%42v^%|x6xoD(6H-xCCR#jduZF-e$34eYGaM;^X_sl|$n<{^9|1=dD?t%+tkzOPc)_N4PD{g9 zU4+~#lVZXUn*x@{RLT6%A-qJ6aIzoc0$h|WEhP*(Ga#KXARM@-0C(#qj@os!5d$w{ zK-fW|7bslC$zr!?c>=k*gByW^wJ3qMWPT8&e^Wf4KL9(;T3siy7H5?-+-YNLX| zF_VAdMcmG~xb)pY)p@TP@ZeCYVr#XlQCblk+eF zc!YZ)PAL%hnaQtAumvOjYhdAubt?|AJHehUcwIstW{=_qgieO8`{&t?b z&$PcSUVlm=#<#>bqgLY2Ah2hR`%#JD;g|lD6YpU8wZz*{nLXgAODul*l~dJty##+Q zzFC&pBFnraTZwOR>_;VC0oZf#o#5*QZVz~eu||L$kl^(I8`pxW!kNk)L} z0q+byeS8nNeu-78M~Fdk{^&EPl&a~_Dt$kKm{esMd>vdKR|yHJBsqT%627b@S*$2D zsERlZCir9pKTTV5lB!$A!bxXB@l+k?=6-yDT?wtD652dxud~$7WqfZ5@Jc!3D<04L zXkfo-36xJ@jc0cYxnRz|mH0JmNu88}7zZT~ehjr6L;v4T1v9#VP`hSt_k{aLW|Ig?v*1eez(4jZC0>L)YLxhu98 zF~73L8xmWO0q^_7PCrw%<(#t~grvlrg}Iz~J&?W_HL5-TZYFqg^CG1{?{6kMMiSe= z7V7^iNqa?7AR^!ggg-`20(D;ys-`RuXnw6AROM4NIe|Hn0Gz{*yfU?^`!gT?tOq4g7V<`L@Z9GYJ1OhQBMJ7Cj%lYtRWg^bP#a68xa# ze0CP0k@^;3>91us@HcgMhYsJbJ1@@8d~t^F|29D*zkz?DJKr{iG^iT`zXRaJ*QlHL zn+&YOcwUz2JVWwt;rqV>hHJt5LG(@hpb=nIlq9(B{K;`-4fJ-;WY@6+yw%VZs^{{u z3YVpx=1dT;wTaJtX(0 zMOr96EZCbCm4)dl`JjsE-hvyKYmR}(s`-SksibE~=npRC2He<)p`f zWBP72d!|T*dm*gl(wWd(I|A%)I8OQur+8mX^BwoKTM;t|`1&`YGHY+8DlZScD&eo~P=xtRJhcE{eM50}t zffh4&dx;TEyY|06t7|tLlLF${%u_LWONQB~ooezzQPp(xL|_WkjzLvJE-L(Yzc3ff zI_52f-ED=y?n4xUIkF+#S+96Zv>siB&V{LVy{Pp+2(1abvfQg>-$CcYEdE2+W*>yA z!>%nCWP|jp9LtIA^cPiW!@*g_9eE+=k!K+}$-bQb59A@p6Bmw^jx#2PCzy7hH=T#3 zdxB}l4d_N#@YtiDAjnqy{#ZJ0fJg5)>9NVp*rx^Pr5Djl z&!CrnhtB!~o%L^Y7J7RFCM{EJJmK>JWBF14vX}ld7P-(3q3^g|TvPAQih-2zWX%eO zXdGo3^xk46yscAHUkA}?IFK2FQgRL_{Y3!CDCjP`Zkm(&oHl5Srihdy$RBnrr`sg- zgEb_ySodcik$a4o-A}G#Q|dlo_@0g~_bn{lYm6q$N58KFo3r?%;ufeZ_{c(3gEod$ z2R>K}iMQjxg2t9y40}BXM1EmgWt#H{h#qb2gB6lIkRG#qFFs&=*)RMytdsPj0Y zTaHJ9BVP_U@+k&{6_}YLh7^3vsGVB3N7PAi2-^|#6bM9^Ta5ayBA-D24kL`kmbm)b zYW}y#>kv4JxesD`ntc`jyh$KvoawF9=WIe>Bfd(*mHW;v)pb>`>5FhIrufb!`4$nf zQ6b#ykUve=eZ-7gI(qM72i6t(1{wMqd1Je%dJU2eJtnw9U-52i-_7#b&E2jsb|9i1 zi(S|SP5^w1oFGQmNVad&tl`lei*4A^P^Sr%yhrV`nn4fh<$?;PRHWiEN9~aXBL=P- z)TJUom1gQ5HTiNhy#VJb&Nf;DQJlRb=f0TCI;lUJ?VA?oJCE`IYo^Be7BT8d(4~z^ zP(2qja+kve^{7U7Eu9-rL2RAyj3{9Bl6qGoz~5Y^hhdg|%^QUH$U(^Sf0!pONKb32 zBH326K-u~t#>TzHI$8URUt7_RJw#guE{>H){E@cA$3d$M`GqF%`H8lK$e?k3{|YlI zrD%|~%p0aTS7|OvNv~IntH)an-%4X%(%=W8tY-*9IdxuC42qycB36MvB`t<_;~om` za05+YRn)?$V);b$Ow3CCGJc(ac$ttyZZYjO#mLg^$;Mngv>3YQ28Y4mMN9yeSHWQI zmoWGW435lz!LB?2Ry+e>@f~gijK1wge*vS5)iB!WMsr}aJs(E@0h>0`VQ^72Fm0}f z!IisV@Z><)bRG=uoB@Nwd;z$c34>?Gz)oL*!KANX@Q54y84Pw7h+%XMj7B8G=W z>!EpF^SXfCQb7*tzkf^tBLu2${lysz<3H>=cyajxl`Lm=TbOSpHa=`;Q_@WXVdoJl zQFV@nD#$|-1KEo15R_mUs`68`p$bBwK7vswN!=OJOnE?c?>#otm%}Gu_n2{FMMku( zgKBk9%{AuzOsyq@-`PU;LvwJPwAq2^09Q%4uv4)uyz#S)&(hj6FQ;T?>pADylZ11Zzh5O?%FId5 zNlavx2>5(DCar;a)N(Op8l@wVN! z&fbdI-O{~#RROgm()yt(vF>2u<_{0%_)}+g4la7u;e2+p|2R$91#It!k03DsbEdljA=;Qun2LQ{jh44j%N^quVkmWQ@F?h(ZP6 z5@~)o6lyxyth8=a;G9Tk-|S2K3dH*aG5g9af%zlj0%vvw70eDSn-MsR1_hQ3LHkzo zlpo7^%lG~LHy<4|pS&+G1k%rqV4-4oH(=#Qt*4*urVX@A+Ad-XQ7R@uF2QB5{OWP0 z&|yDJ2N$atyRKylssE9S8aIz#UCa~?{<{1l@D`JxawltPhD%=^T*eks_lh(dB)GM^ zUrx#t8vB0w2wY?*oV}0<#2Vz=EiigqYxaZPw9dode*->Cq`UC%trFapq|5G1q3eaI z^3M(!O;9mk?xrc2guXFt^3~SSn?y(=2OsxZ4r^)8w|v=mc}!Q;*c1XZrb{LUg$0(R^p!s#zX&dOpevah(9U3$;W$hD@Uu~jGFukwJ$EVo8 z?j8TC0DeS$w$#lYR~UX#X4rm=a**3o_iulO|F7bcR{J=&v8VUxc3J6@!4Y+xUjQ@6 z3?LGe|A?_iOcDqf_1Riw{KuJ*Cj$@RS3jbJBkC;5CFS(8S@d2yWL3|`(0 zzqmW+EFo7bi=iK31;2G2seambBc(s~vT<`s`^XM-Q|?V@8R}y+4l2fr9X<>6ax85U zJ+KXTBS{&u?_5B!rJ&Mh3YrBGz}7epT`#&SC=NK{i!RPBGJ9;fLL6|8>atY|P||g5 zp9{si#Mr3H?P%wdD&k5@uocR6QLx6l=pbibQVm(-3Wr3ea}v%^Ur3@;(Njp~`S^KN zyV~FkrfMUsI75-29t_UnNO`znHmWSV6g}~Nxl=*-NsPLfeO|E0eBFqqIjpE||0HaX zby8Zx9LuaqAE{Tr2iHqq35tV9^(*!2w@Dn_M>Sjr`t=M2u*a;q_`RdkC*f8i);#9M zX<;Vt6Fxpvs-(!3W!hLAFMX<4=I$G1EjiTjc6jqn({^}sjj0XS&AV<+`7(li+>e81 zeB-lUq4Uj?6=n6!y}o>r&(WrJ?a+LE=mz!dx8S836e9=c9`j^=2Yt;@mcZl2Ggf!| zJm=|r(OZNh3JPa&p>%f$`Q>t(3&o?7_6)Ljo1>Eq{Sr(JBVez)L2t9;_}o~+Feg%R zTcbn+FHZV-cBo}Oek*F+Zj3|=hk-R%EDZdfZS5CksWzsdJ`nVh!$~sj(Ko4&t{0e^Ihg&xtqKiJIFUCF46H_+=jTo1%WBUA;aZ|1SdVFW^oXF{(0HD=wop zdX!ug&Ba`T$F%=(`T)zY>kLIKj`|I8whokI0&F!iHMiv+$*JWG%VLjYK5K;54upcLaa@qQo4vkgt<_X;Ud+8EMW=K%2lwmWMVE4&3jQ;wGg4+N?8sm+^CK zb~zobLzWSr>|Mt78M=PqAPZsQt=(RcYca7`X__uXWZ)Z-hqPU*aZA_eOy9wqcbPts z!<)AQirwK|7rkA~Ee>*Z%`1Lup|w%uvuRBxyiPSc*a68}OuOBsJe&Q~NN#Z2pvNOw z^9OR9b)L;$a%>jvPtRsc<`3qYy>C>;yj}eY-b2Ow{QZ-vh!+1_DMfpEN=}7uT;g7L zX_HNEuinLt6+P$h>vTTjWKvhZgpc#Okoj0ijmSgLD$d8|+@xZfmpz<`$IGc6AJ-S2 z!o{bIIe*?ILxDcZ^F~AADQZGFpUI#@BMnrW9g|+G3!foNqPiqE{TTt6m$3Y^>-K6M z_k4&IgXV(09#1kSq0@`SE@gC)m9|E~>c|Y^;3o-WZ;?LepvK}N+s*7hX_GZNpVcU@ zn(jyMVAvk39C^)z6;o>8+bPc#0S*7NPO4{;=>#yK5UVr zL4fch62i2cauGsazpjBowui@E#zJ!}h%Q+;EU!-(ocdVHZv0K>gz-Hd+aKwG@SgzFbg>F zS^}hSSYQ?efv{mTYrAkBBX*On;Wh%VZpmSzj#6&G95xb=mR8J(^=!Dc;zf+JPK-Sq zu0k`{vu}99Wrj7y;$iosACr}hzz)ArNvKO|j*vtG5m?l<{}W%ihsLh`dSAg(r$Vz` zgjzA}rB8*g{tRWcAT=!&(CKIj$W5-J719d8%z~wWfwmpDY~6r?4L1SP^mPaK?*~lF z`WTRw{Z>{$D%eOx0wAYy)*9gwur}sAgoK9%NM9i#;uON?$@fAI0n5o%^FkC+wFi_7 zFh>jeml!a0cW77~F!Mmi@uhQOHtxSujdDIQzaNl{KDM&P#_wdI16BQf4IuD~8Vt5I8>!AckNH~Xw*!=9Oj^ppBOd&G~ z3gQ}JVHLeawCud*Ul!sf;g=}-6l-&$HoK#lrA@&s>6heocG?NGwK^J$)!3_XEv;Fd zRj%0IOWzY$Ug0KjLPi?aqzrT9(9(X1-_1@tW}i|wDS^fCD}lJdCao+<6auI=>Q8GX zZ=UNJswT^FJj1<{x1>59slz7AvVMv6cIPsWZ#_gG5|0gofAIAV>wlmf5?>bUPd%>T zpE^#5P9BJHKnc*5FQ@kLKwwC|8c-}E`H@B%M4fghhZ_r3%j&ZmwC=FN@S+Nt{um`T zhubv=Ul-5VMvzd&q$n-Fnv=Aw?pjou92QPv!>b5RJgFjtFroR~4;ann4d+#VC_Nkb z+%B`l@*2TL#;If?8ww2=aQXF&#&f>kE=!92!+6M<=BuJw5QU=xifv}1Z|z2yaW-!zL4b~l`c6i3fhINb@l-!?uf6NeSk-@(sEyteyGAVajltQe0j_5!Fyuvu<1uxzXboX(;oO- zsf&M%Wz6ltb2FNCGHEGe#A)UZhl~+7X3vUB4~M7kk$Q(q_pGWk_HON$U|)CIYhJxk zYQxbStB21U&%YUdWXrLxwKC^@Lspl_XJ(S-T)2x~|73t1$FA|&CM$~Tmt1EuSW`xl z*0~Q&J*7TI`NEpF9+dPOUJg*|vtR9D^rC4um>qB~tR_erUPTX?40>IVO4b7ROMD_3 ztSC23v)DBRt}u5HkTj|sRYN9J!U{3Aj|PdHiYgVFJ-0FjCS~G|$#I}C4UF?4yS<}I^9k4mI8%Go=@Y)?*=(ETn6IYXtn)#Z~Ts8 zMFEmSZ$?-=)%NH63St6Vz6Lq8B=wERER#Rc=PV-9=XaXiBQtYW>v;h`36VK-gIDJM zx};0Ryh23g{0Ux}k4p+Uf%ivc3g5__-0M&Fxs1rX{+CzHKW1H7o5N2=L~c3Z71>pn z{J0o33y7@x)N~?bv%JByEM)UO(_0~%^`@WU&05n5cyi$&xrF2n*|=p>kY^F(p^op>2hyU*w@t+LrwXs);JyqoWHG|iKfOZS(E z3W8hqdgu5iZY55HD{TgZi&3V_C+03Zi3*y(;u!b^OcSN?3BmwKoTh|pScLVV*Ln$u z4}tilsyflz#yZq5*`D2H-?MJ7F8(QwA)o8?QD6~)5X0# z?6G0t;z!=$uEmdgL~3rAy=Kc^9r;ysqbIRGKXszUGkkIL&CvbiM9rJ1wF*AE;W8qzA ze#VD;RB(x?>>Z1AIr;N#L9TD_^4~5-qqjK%N_b`s9}CVPxsM;cGa;rdhgo(mrtDlg zaK$6O0`>tm;fSw|9memzJ4nu>u1%ML__LVY7+w{_nA}&um9OvoSXNY<=x#`PXOOe) ztV@Dj3`~0&n09@P*R(I5e&#i;+wZD(_%iV-L>CltGc@%wDLi4slUe-B`_nzcsPaeN zVM_T==|v#aq(5Ni-$jB?VffZ3Uv!J)C#`QQogpC9oTTP}P%|y2d^uanquLQWsyV=p z)y0S%X=|~f6})o9j&8(`H+Uq~0!#|r3=5Q`k}McZtyo%anS*c3=78;6(Bw4sSX$<2 zy z7a&Qe%7F!Q)4k<@d|_F7x##vmNxD@NRHvgQ#WGk6 zi@3t}2CoixE@ZPjI^0X2>>c)Acr$#QK83Xh{z)Sq{kKAY;k3aSCHSR)aFICYiofd2 zWOMq-DFl%TE8kHDJOMY#sE?=;ITl>G_*ydXh7a(Z48%3UZBCRkLQf}wV*FjKdO=Mp zTWu))^1flw)9*y}sFUCAOm9YgyVg@Rth;g{#Ba^}plV2fGn3HYp09j*TU5^^gHq-p z#o@|S*Lh0flMhQ~vM&%FGglyb?CL7xS=b43bNYm)2S&`|Z=yDm6J6;Oe*A8bbmT1i z3E{{d_D#(+U=0JXhF~UFs5v03VPW2(U+dNM-UYjRSaG2@^=MDOWar5)d&As1-Gmdd z48K`q?oUS#$ddBY-KFPO44XuELF67iMeMruxXg2)`0~~-^s%8F^80G&dl9x3(DU6o z9r;?Um!2Dc{lUx0O_YA`P&Tpuza!sCb>rhOdeXYVwnHlcUAEr%a24Rl?M=UuM}yla zQzWNz8h3<;|5rIDgoi_7Ni}kAJ6=dWnyzswR$x=A)IXEMrZfwB$+=BrAv$UsCvMwh z$F|4eZc&6*1M1gkPzQ{3``=fG2g;rqToSwwW9~#ywRYL}6>=*`LmY`l7m_2Z@cq@2 zy{1mL?olk`Y4=QS#sFO=b!NJq=GB8CdtmI|L|{Zt+@(Ic99-2UUZZO-Tbk+Gl$tsn z=Tn)sw_n1*GX8|!PP^G|bJgm0SXhhzb>9E^2yIB8q?^i+)VQ6@E;}@Xn~^_kid5>z zzA|aPE48p{oG0nMig-N25A&S8&|8oFjQ_xMmdkVR|CgL$vm{xjdftxQdTcy#t@o(s z-v4f#!+Tun->|8Phi}9E0irH3e!Q%Wv<8K@RN*3877gA?>EwsuZmju;%9y-O;mx*3 z8=qcU#4cpwvEemT{FI2RN*%t69ac+BMrN;LgW0(Zxeb}#gAT{Vez+AZ=b!0U8Nb{C z(~FVmJbYzDr}6;)Lv~m%F&&xyhX_osj=PV{zHbf3)qHq_%GmYK?4HJ7AA{LdH<|HPTCaXR(4uOb9B9!a_&1`(RE0n!Uztf6GP#1iTHuM$FW;?K zURq3TT%g*_epfMh1WvXcl4-~H&*Ek@M98FBQ{7I*Cg4U?yc4();RDU*eRObf?~NCo z+vJ)&QabhdAz4erD=@1?4eD(?h?JCJ(=vuja+_6mmAOgf}Y;}W+ z!+-4j4LY)l{RBE9jWxf`%?Rjyi*e9*2=KWN&!FEJ$IZ|)WYSLG5vM)?c=9C)8|MdI&kXAgm;eXuUtmtpRoY!Re z5a%_$Px9A>@k5-yuk3O=|9&uCCUy9YlvZ`xHJ|HsJ{nnz3tZ|u?Ytuzof4uku)8C< zvA>H@j5-+`SaBL`ws)G-`Xm-SBT@YgXv2WFQn5W?AVx6~{YT(haD2(-W?Y42(v4)d zb1-y}vs|N_ks~V#mG?=kA>i#)y^g#L^n^xjY_Guq5BfuNr8jiQ^q^|D+qtS;=uJhB z?V1Sd>djZT19rElCg6mO7c@qlj>iqo8s@y} z)VWKy;D8?)ZYL=x06O8f5PnF?NpuQCf0-CVrdI^eu!~{*Iqq+bZYU2J74x#&-`P=> zD}RQlWa~%-D!sbH04ky6en6)k`g~bZF3`!*WiRT_?UO|N|DzH=igg&Mbe`!4RNApm z161k;DqW09@@Ev*0+seDWhCLd(cBwRN|}@}kXk76v(hGp(|R`*EgU?u7PHEF>kK;8B`U2LZMt!F=37 zGy=Jk%or)Lm3IsLKZiOfkWVlJFbncS`tpA*W!9tjYj-s0m$3IL^C8b|Wd*w)%^}2u zjNbO;Bd0Nm3eBLCKsP1{>F&)tfL;ixsm)9Dd(j18+Gv;GSV8^7!t^%%4k#Bv(>kx=m@xDOF-a2YZ#0Ed8I?EI~>a&s`C+ zrZ)n^I_rj&*X^1l?(w~K-=`(Y$C~?CuKFHH;8Wst(%Xs*^=x0ymHxI1T+u%EG%SBg zHQzFnVCX70DbI1UB@RU9tY-p{<`UbGjlX6F8;?8V+zB@Rc}F&`BJneSMj~d!d8WTW zR)p-gkz%D?*_(JX^Ghm4{soBd&xozSA+cF4Iif3HCk~ln^tzFotu}i#;^j}-Vd#lp zL;=ZtMv8KWU%Sn2;Bhl1_Kh}wyZu|NRn?dDyMUuMU?(BmDW8aV66GlszWiyStee;` z8Q42(iY zY+P*ekUzsu&Ew8b{?8i#Z@;n?lk7U}07tb~m}^8A59(923bI6x6s?#^228f=bQPu9 zKyfD=wet2i0qh_@dEoS*-zgO36qE8YL0+xCMto!d;DOT$aQ!K^3H(RE763L1k_Q2J z4T|zAL4*B**lQ*}G6O7cz}(s~Y4EfG+&^m7Sda&_i_$|~%M8gQs^rTd{S}IXpb#0o zoe|o>34ENO;aEH|WSznN;JFsBw+*1*-LMGn=$tq+a(wuY2?xQ>mbK*wDV+ ztV!ewMMIpZT{`7518`svoWX;=a=mW(yD}|dKvK%Nx{f- zLd;!Jk@{TZK~wx4MXs7g_S`1kRmh9fpIY`GVy7CapA z0d3^=qV)W4L{wD}xg;(Bj`v~4d~$ypT-U#lbUl4)o6Npis&a4%tw!wwO+8M%WdzUp=o ze+sDthNc)TlmF*6FaNDjKpgdy`#B<85QAlO!?oA#^V1wa6cdOVfkA&FEtxCCJ?97v zIvRG1l!|pb*}z6h(pb$4&J&0SoLT8QWf{=S8Za*(Jgz+Gjbx^kgQxh<%wIjVEk~WO z9E4(VpafhGx=zo`_L_T2@JMmg#fCy1E7%0(A!uEwW4R}0;jclThmtxx&NLG0UQz6Q z7%{NYahlH`bEW``sa4qu<*~y!isd{&#plK2>Xs%?Of^J62_$udQ}jQtHLU1KJFx7Emqurrnvt^asO9T&udiAD^w3pY{+8UG=a}6 z1B+$^+7bf?Zjqm#A@lZ-b61lq-zLjq#=CvS_jiPgj)ZUIM?8;=AZ=!iq@W>m{~Pe649uk;;OPXGCo=yik(dy^ryB z#wqzmtiDpMSLBwHgng9!@`N1=Fm(+vY+v`c-W%cIh3g4n~@KQ8i zow5^T_ZiL>=&yiAfM+ixzj_OV&sgz=P{V9cqZf5Z)ANtGooArPKm>pYEX&R3UexMk z(#Xh>Qu3u*Sxe2seD3_5s%XZacSWEe%abK7!bWj}nM0-z$UEJo`myJ?peWz-1=*GX ze`oNz`C7G!+?mewP=N42Ayuy-Lpk5^bbS&f>AI)nr+iKRDueeOvmqwS_i1H5_uIRA zSxZ~wh%-dFSN2RRn895yJRHqPh1?Asvt9e96WP9s{h8$|1VY_tY?8Oq9q;p6EnZ)H z2Cbv_tYpLRF@I3@Q4q@OAGlVjHix2IygOK^3e4>Y6m-O5^;N5T8KoA+ z{jvf4L6X}N^a6d&XG_)UNQSG-yj_`ry|!($eziC{pC4VsA^6Ti`ObF(0k159>aB{^ zN;#vy%!yy%z>fqEgNZV>Qrj<4(mUGJCc~Lx&NV}2(4A3mC<0!{0${Zi0XOVM!0iY)G7|y2g#cXh1HmZ= zq9YLW%~A9w1YN2_(1W9BE`oOEA?QC5(Pjn$o)Zm3TX!Ph%31`R?2m{RBH-?P1RUxM zz*Q^+To8jO-Hw2%pCI7jQSf^Rcu-!5ptmAuR0@I?#3JY}1Rb{@LB|CkqL(6QV-W(b z9|fl(;PO%MEd<;=3eG~n%4G=HjesE|0%m3);7$arzK4K$2NB6D0QkLn@hSxEoQa6O zilEJz2>SIXdSFzKLkPMV5#76W6g&qJJxBy#_6h{NY2rrpgq#OHtAWyAD9Fg(d4B#M DTaNA( diff --git a/piet-gpu/shader/gen/tile_alloc.dxil b/piet-gpu/shader/gen/tile_alloc.dxil index 7b130e013427d747d2d4199badc1dcd7fb8e8297..7759910ca527b4ff12fbe9ba3ee03fdb6a428ca7 100644 GIT binary patch delta 1180 zcmYLJe@q)?82;{hcfE2QTuVx?EsQG^>|EWhY$A2h^a@T;HXDOk`C%-DDH=e=HnZs> zZGm#j8ZBP8q0?E0b4^yv_%R%bGi`wdHh&aEDN0DeEkRA@Vy2skAzrhh-(T-P;vA z^KO}~qOzF)zz$IG6AVD|4g@$UFsg*Sr8K;(l>#s~62TYYTLhWpQiF4_Q{p!Inuuo)o}v9vF$;nc*xEvk*2m4HZLCEfB7!C9 zueKgKw*2GLTw;0c`qG;DS}b~#+H?MLWay+zdR0mfhMZOjinickyW5H_cvc9JG0q%m zpfL?rM2n34N9aQ^JiuQ=X+_{y-^#due0Js*d-eSI?ALoFB>;@%V%0Ej&#-iu<^h`z z3d}PUguCh6v&_5<2LNgUARm(ex7~M{eKY{6xd_L?PMT_LwJTn;V-1YM2WT8y)YN&@ zAxPz=D1f^o2gEV;e<7*yQnZkTlwt?d714ML5%Z>x{bZPflAYQP+p(v!c`d2z_{7z| z7-_Nqf01~f1t>zUcODQI#H|2*i}`>ABS_SY(#T&x`WSMI#3nXHHC?R4^-YnqjBz-V zN|y97R)O^=!7uz7IZ2w)Hv)=zC?(-^+M6--Lb9mT)uNj>Bn&3R^<=0cD$K6)H5UYg zopE93@RVEcXYZs9NT4=$`1{F?F8hd2eOSaGED}a%YJSH4P#GCOBBEQ$b@~dR8&=Iv z=3|)9Ai#C%5hZ9Oa;9II3v>lmFVeWzS6T4F6&WtZfs#wP_$P6%7%A@jadw+4JPheegsZ+ZS zI#)c2oUr{Aw8g!G)s%>tzWwEW0q)=xStklurq8;X-LBPH9J<|kl(8gf22n7{D$5-^ zwcue&90a+%ORmILbD}HR&+upEG<>Ot56jhvq*vd=FUz%1^u5fjFD7zZPnp$0&{e)S zkVXd&w@C&|oX0PQ)5kogow80$7bj#QNP%0~=hiq@BshGewKp?gHab-^`c&;` zcSTiw7PCasjm!;eb;Jkw E0q+okRR910 delta 1185 zcmZ8hYfM{Z82(N>)fywCeQ zKR&-Ypw1|_n#|yT4`Fk-knujR?Bc+0q_X1Oo&r1B_DjAp7&TJ| zxS%%;HEd}W{9+^Y4$&wwrdKV8VI&6GIH*@e!f=g^F~*(GYYo=H`EwCd6=5V+OX}!; za?4Pj4r-y_UEXLN{|TKW7Wo`Y9R%kh^TG(`iIzCcUCrcrSCwn(mp^~l8Nr@ZHtbY# zb^Xuff7f0(E4(^O^tSFg+8xQh8nwFJ46!tyj@A=41=cd({4{DK^n}BLwOIOwR_9jp zhGy;#Icwl)WyG~mL`|Ih&@~~3b;JjA;M2Ny+T;Rv2cd{B2vM7m@PER z-K?_#jI}El;{%T^a2!NWE)t^KkYM(USJ0nfv9 zao1%3hN+o;se-K{5^l4WWHSBvs6qJIFr){N_@>To3w9!ll0!*m&~4vj+D%|MhudHR z=^^*kXTTr1?@_0qwAYjAGxFWv>xgo#os84oSKl=zW0Tr(2;TaG`2-r0yMyU5B*8Z- z=!qqN8E;BdjVtm)zS$VJ8P0@Ro~A%YUN5VU$Lgb3OzL6do?wcPbQ%}e>BA)r_!j|; z(XdC;)DBG5A1H1DMax6OYq=Q4!h!>4H7WtExIEgnOalWXhR;j;wmD?&t)#!6S>X1h zOjAXog5GUUcFa>2-!#A=ta9z|op^tlUz!y+Cn@zh zMG~<60Pfb>JL3k_zPj%Tr{rSXxPpzyF*KT(d9sIJc{u_0rhW^hdKGm~q~Z9IC`5d$ zez!9vg89S*sr(ywAN135+BSj|cphM9v|R0&qiQV3bKMkqlYvn?W(jAVSE@{QRb`H} zxD-G%3C{}~B~|tkB`Ad4&~`TUkywI-0Q$e;6+ zZRl*AN y({e>iD`%D}8khR2PrpvZ(xj@}wmU)Gn9-5(u~O|l8mUGd^A372$o?Jwy?+3ac!wAO diff --git a/piet-gpu/src/encoder.rs b/piet-gpu/src/encoder.rs index 8f21485..2f4b85e 100644 --- a/piet-gpu/src/encoder.rs +++ b/piet-gpu/src/encoder.rs @@ -37,147 +37,6 @@ pub struct Encoder { n_clip: u32, } -#[derive(Copy, Clone, Debug)] -pub struct EncodedSceneRef<'a, T: Copy + Pod> { - pub transform_stream: &'a [T], - pub tag_stream: &'a [u8], - pub pathseg_stream: &'a [u8], - pub linewidth_stream: &'a [f32], - pub drawtag_stream: &'a [u32], - pub drawdata_stream: &'a [u8], - pub n_path: u32, - pub n_pathseg: u32, - pub n_clip: u32, - pub ramp_data: &'a [u32], -} - -impl<'a, T: Copy + Pod> EncodedSceneRef<'a, T> { - /// Return a config for the element processing pipeline. - /// - /// This does not include further pipeline processing. Also returns the - /// beginning of free memory. - pub fn stage_config(&self) -> (Config, usize) { - // Layout of scene buffer - let drawtag_offset = 0; - let n_drawobj = self.n_drawobj(); - let n_drawobj_padded = align_up(n_drawobj, DRAW_PART_SIZE as usize); - let drawdata_offset = drawtag_offset + n_drawobj_padded * DRAWTAG_SIZE; - let trans_offset = drawdata_offset + self.drawdata_stream.len(); - let n_trans = self.transform_stream.len(); - let n_trans_padded = align_up(n_trans, TRANSFORM_PART_SIZE as usize); - let linewidth_offset = trans_offset + n_trans_padded * TRANSFORM_SIZE; - let n_linewidth = self.linewidth_stream.len(); - let pathtag_offset = linewidth_offset + n_linewidth * LINEWIDTH_SIZE; - let n_pathtag = self.tag_stream.len(); - let n_pathtag_padded = align_up(n_pathtag, PATHSEG_PART_SIZE as usize); - let pathseg_offset = pathtag_offset + n_pathtag_padded; - - // Layout of memory - let mut alloc = 0; - let trans_alloc = alloc; - alloc += trans_alloc + n_trans_padded * TRANSFORM_SIZE; - let pathseg_alloc = alloc; - alloc += pathseg_alloc + self.n_pathseg as usize * PATHSEG_SIZE; - let path_bbox_alloc = alloc; - let n_path = self.n_path as usize; - alloc += path_bbox_alloc + n_path * PATH_BBOX_SIZE; - let drawmonoid_alloc = alloc; - alloc += n_drawobj_padded * DRAWMONOID_SIZE; - let anno_alloc = alloc; - alloc += n_drawobj * ANNOTATED_SIZE; - let clip_alloc = alloc; - let n_clip = self.n_clip as usize; - const CLIP_SIZE: usize = 4; - alloc += n_clip * CLIP_SIZE; - let clip_bic_alloc = alloc; - const CLIP_BIC_SIZE: usize = 8; - // This can round down, as we only reduce the prefix - alloc += (n_clip / CLIP_PART_SIZE as usize) * CLIP_BIC_SIZE; - let clip_stack_alloc = alloc; - const CLIP_EL_SIZE: usize = 20; - alloc += n_clip * CLIP_EL_SIZE; - let clip_bbox_alloc = alloc; - const CLIP_BBOX_SIZE: usize = 16; - alloc += align_up(n_clip as usize, CLIP_PART_SIZE as usize) * CLIP_BBOX_SIZE; - let draw_bbox_alloc = alloc; - alloc += n_drawobj * DRAW_BBOX_SIZE; - let drawinfo_alloc = alloc; - // TODO: not optimized; it can be accumulated during encoding or summed from drawtags - const MAX_DRAWINFO_SIZE: usize = 44; - alloc += n_drawobj * MAX_DRAWINFO_SIZE; - - let config = Config { - n_elements: n_drawobj as u32, - n_pathseg: self.n_pathseg, - pathseg_alloc: pathseg_alloc as u32, - anno_alloc: anno_alloc as u32, - trans_alloc: trans_alloc as u32, - path_bbox_alloc: path_bbox_alloc as u32, - drawmonoid_alloc: drawmonoid_alloc as u32, - clip_alloc: clip_alloc as u32, - clip_bic_alloc: clip_bic_alloc as u32, - clip_stack_alloc: clip_stack_alloc as u32, - clip_bbox_alloc: clip_bbox_alloc as u32, - draw_bbox_alloc: draw_bbox_alloc as u32, - drawinfo_alloc: drawinfo_alloc as u32, - n_trans: n_trans as u32, - n_path: self.n_path, - n_clip: self.n_clip, - trans_offset: trans_offset as u32, - linewidth_offset: linewidth_offset as u32, - pathtag_offset: pathtag_offset as u32, - pathseg_offset: pathseg_offset as u32, - drawtag_offset: drawtag_offset as u32, - drawdata_offset: drawdata_offset as u32, - ..Default::default() - }; - (config, alloc) - } - - pub fn write_scene(&self, buf: &mut BufWrite) { - buf.extend_slice(&self.drawtag_stream); - let n_drawobj = self.drawtag_stream.len(); - buf.fill_zero(padding(n_drawobj, DRAW_PART_SIZE as usize) * DRAWTAG_SIZE); - buf.extend_slice(&self.drawdata_stream); - buf.extend_slice(&self.transform_stream); - let n_trans = self.transform_stream.len(); - buf.fill_zero(padding(n_trans, TRANSFORM_PART_SIZE as usize) * TRANSFORM_SIZE); - buf.extend_slice(&self.linewidth_stream); - buf.extend_slice(&self.tag_stream); - let n_pathtag = self.tag_stream.len(); - buf.fill_zero(padding(n_pathtag, PATHSEG_PART_SIZE as usize)); - buf.extend_slice(&self.pathseg_stream); - } - - /// The number of draw objects in the draw object stream. - pub(crate) fn n_drawobj(&self) -> usize { - self.drawtag_stream.len() - } - - /// The number of paths. - pub(crate) fn n_path(&self) -> u32 { - self.n_path - } - - /// The number of path segments. - pub(crate) fn n_pathseg(&self) -> u32 { - self.n_pathseg - } - - pub(crate) fn n_transform(&self) -> usize { - self.transform_stream.len() - } - - /// The number of tags in the path stream. - pub(crate) fn n_pathtag(&self) -> usize { - self.tag_stream.len() - } - - pub(crate) fn n_clip(&self) -> u32 { - self.n_clip - } -} - /// A scene fragment encoding a glyph. /// /// This is a reduced version of the full encoder. @@ -471,21 +330,6 @@ impl Encoder { self.n_path += glyph.n_path; self.n_pathseg += glyph.n_pathseg; } - - pub(crate) fn scene_ref(&self) -> EncodedSceneRef { - EncodedSceneRef { - transform_stream: &self.transform_stream, - tag_stream: &self.tag_stream, - pathseg_stream: &self.pathseg_stream, - linewidth_stream: &self.linewidth_stream, - drawtag_stream: &self.drawtag_stream, - drawdata_stream: &self.drawdata_stream, - n_path: self.n_path, - n_pathseg: self.n_pathseg, - n_clip: self.n_clip, - ramp_data: &[], - } - } } fn align_up(x: usize, align: usize) -> usize { diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index b3ead90..773007d 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -10,10 +10,7 @@ mod text; use std::convert::TryInto; -use bytemuck::Pod; - pub use blend::{Blend, BlendMode, CompositionMode}; -pub use encoder::EncodedSceneRef; pub use render_ctx::PietGpuRenderContext; pub use gradient::Colrv1RadialGradient; @@ -21,11 +18,11 @@ use piet::kurbo::Vec2; use piet::{ImageFormat, RenderContext}; use piet_gpu_hal::{ - include_shader, BindType, Buffer, BufferUsage, CmdBuf, DescriptorSet, Error, Image, - ImageLayout, Pipeline, QueryPool, Session, + include_shader, BindType, Buffer, BufferUsage, CmdBuf, ComputePassDescriptor, DescriptorSet, + Error, Image, ImageLayout, Pipeline, QueryPool, Session, }; -use pico_svg::PicoSvg; +pub use pico_svg::PicoSvg; use stages::{ClipBinding, ElementBinding, ElementCode}; use crate::stages::{ClipCode, Config, ElementStage}; @@ -358,27 +355,16 @@ impl Renderer { render_ctx: &mut PietGpuRenderContext, buf_ix: usize, ) -> Result<(), Error> { - let mut scene = render_ctx.encoded_scene(); - let ramp_data = render_ctx.get_ramp_data(); - scene.ramp_data = &ramp_data; - self.upload_scene(&scene, buf_ix) - } - - pub fn upload_scene( - &mut self, - scene: &EncodedSceneRef, - buf_ix: usize, - ) -> Result<(), Error> { - let (mut config, mut alloc) = scene.stage_config(); - let n_drawobj = scene.n_drawobj(); + let (mut config, mut alloc) = render_ctx.stage_config(); + let n_drawobj = render_ctx.n_drawobj(); // TODO: be more consistent in size types - let n_path = scene.n_path() as usize; + let n_path = render_ctx.n_path() as usize; self.n_paths = n_path; - self.n_transform = scene.n_transform(); - self.n_drawobj = scene.n_drawobj(); - self.n_pathseg = scene.n_pathseg() as usize; - self.n_pathtag = scene.n_pathtag(); - self.n_clip = scene.n_clip(); + self.n_transform = render_ctx.n_transform(); + self.n_drawobj = render_ctx.n_drawobj(); + self.n_pathseg = render_ctx.n_pathseg() as usize; + self.n_pathtag = render_ctx.n_pathtag(); + self.n_clip = render_ctx.n_clip(); // These constants depend on encoding and may need to be updated. // Perhaps we can plumb these from piet-gpu-derive? @@ -402,18 +388,19 @@ impl Renderer { // TODO: reallocate scene buffer if size is inadequate { let mut mapped_scene = self.scene_bufs[buf_ix].map_write(..)?; - scene.write_scene(&mut mapped_scene); + render_ctx.write_scene(&mut mapped_scene); } self.config_bufs[buf_ix].write(&[config])?; self.memory_buf_host[buf_ix].write(&[alloc as u32, 0 /* Overflow flag */])?; // Upload gradient data. - if !scene.ramp_data.is_empty() { + let ramp_data = render_ctx.get_ramp_data(); + if !ramp_data.is_empty() { assert!( self.gradient_bufs[buf_ix].size() as usize - >= std::mem::size_of_val(&*scene.ramp_data) + >= std::mem::size_of_val(&*ramp_data) ); - self.gradient_bufs[buf_ix].write(scene.ramp_data)?; + self.gradient_bufs[buf_ix].write(&ramp_data)?; } } Ok(()) @@ -437,10 +424,10 @@ impl Renderer { cmd_buf.copy_buffer_to_image(&self.gradient_bufs[buf_ix], &self.gradients); cmd_buf.image_barrier(&self.gradients, ImageLayout::BlitDst, ImageLayout::General); cmd_buf.reset_query_pool(&query_pool); - cmd_buf.write_timestamp(&query_pool, 0); cmd_buf.begin_debug_label("Element bounding box calculation"); + let mut pass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(&query_pool, 0, 1)); self.element_stage.record( - cmd_buf, + &mut pass, &self.element_code, &self.element_bindings[buf_ix], self.n_transform as u64, @@ -448,56 +435,59 @@ impl Renderer { self.n_pathtag as u32, self.n_drawobj as u64, ); + pass.end(); cmd_buf.end_debug_label(); - cmd_buf.write_timestamp(&query_pool, 1); cmd_buf.memory_barrier(); - cmd_buf.begin_debug_label("Clip bounding box calculation"); + let mut pass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(&query_pool, 2, 3)); + pass.begin_debug_label("Clip bounding box calculation"); self.clip_binding - .record(cmd_buf, &self.clip_code, self.n_clip as u32); - cmd_buf.end_debug_label(); - cmd_buf.begin_debug_label("Element binning"); - cmd_buf.dispatch( + .record(&mut pass, &self.clip_code, self.n_clip as u32); + pass.end_debug_label(); + pass.begin_debug_label("Element binning"); + pass.dispatch( &self.bin_pipeline, &self.bin_ds, (((self.n_paths + 255) / 256) as u32, 1, 1), (256, 1, 1), ); - cmd_buf.end_debug_label(); - cmd_buf.memory_barrier(); - cmd_buf.begin_debug_label("Tile allocation"); - cmd_buf.dispatch( + pass.end_debug_label(); + pass.memory_barrier(); + pass.begin_debug_label("Tile allocation"); + pass.dispatch( &self.tile_pipeline, &self.tile_ds[buf_ix], (((self.n_paths + 255) / 256) as u32, 1, 1), (256, 1, 1), ); - cmd_buf.end_debug_label(); - cmd_buf.write_timestamp(&query_pool, 2); - cmd_buf.memory_barrier(); + pass.end_debug_label(); + pass.end(); cmd_buf.begin_debug_label("Path flattening"); - cmd_buf.dispatch( + cmd_buf.memory_barrier(); + let mut pass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(&query_pool, 4, 5)); + pass.dispatch( &self.path_pipeline, &self.path_ds, (((self.n_pathseg + 31) / 32) as u32, 1, 1), (32, 1, 1), ); + pass.end(); cmd_buf.end_debug_label(); - cmd_buf.write_timestamp(&query_pool, 3); cmd_buf.memory_barrier(); cmd_buf.begin_debug_label("Backdrop propagation"); - cmd_buf.dispatch( + let mut pass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(&query_pool, 6, 7)); + pass.dispatch( &self.backdrop_pipeline, &self.backdrop_ds, (((self.n_paths + 255) / 256) as u32, 1, 1), (256, self.backdrop_y, 1), ); + pass.end(); cmd_buf.end_debug_label(); - cmd_buf.write_timestamp(&query_pool, 4); // TODO: redo query accounting - cmd_buf.write_timestamp(&query_pool, 5); cmd_buf.memory_barrier(); cmd_buf.begin_debug_label("Coarse raster"); - cmd_buf.dispatch( + let mut pass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(&query_pool, 8, 9)); + pass.dispatch( &self.coarse_pipeline, &self.coarse_ds[buf_ix], ( @@ -507,11 +497,13 @@ impl Renderer { ), (256, 1, 1), ); + pass.end(); cmd_buf.end_debug_label(); - cmd_buf.write_timestamp(&query_pool, 6); cmd_buf.memory_barrier(); cmd_buf.begin_debug_label("Fine raster"); - cmd_buf.dispatch( + let mut pass = + cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(&query_pool, 10, 11)); + pass.dispatch( &self.k4_pipeline, &self.k4_ds, ( @@ -521,8 +513,8 @@ impl Renderer { ), (8, 4, 1), ); + pass.end(); cmd_buf.end_debug_label(); - cmd_buf.write_timestamp(&query_pool, 7); cmd_buf.memory_barrier(); cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc); } diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index ad608ca..dca03eb 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -1,6 +1,6 @@ use std::borrow::Cow; -use crate::encoder::{EncodedSceneRef, GlyphEncoder}; +use crate::encoder::GlyphEncoder; use crate::stages::{Config, Transform}; use crate::MAX_BLEND_STACK; use piet::kurbo::{Affine, Insets, PathEl, Point, Rect, Shape}; @@ -97,10 +97,6 @@ impl PietGpuRenderContext { self.new_encoder.stage_config() } - pub fn encoded_scene(&self) -> EncodedSceneRef { - self.new_encoder.scene_ref() - } - /// Number of draw objects. /// /// This is for the new element processing pipeline. It's not necessarily the diff --git a/piet-gpu/src/stages.rs b/piet-gpu/src/stages.rs index 52b8bf1..5442ba3 100644 --- a/piet-gpu/src/stages.rs +++ b/piet-gpu/src/stages.rs @@ -26,7 +26,7 @@ use bytemuck::{Pod, Zeroable}; pub use clip::{ClipBinding, ClipCode, CLIP_PART_SIZE}; pub use draw::{DrawBinding, DrawCode, DrawMonoid, DrawStage, DRAW_PART_SIZE}; pub use path::{PathBinding, PathCode, PathEncoder, PathStage, PATHSEG_PART_SIZE}; -use piet_gpu_hal::{Buffer, CmdBuf, Session}; +use piet_gpu_hal::{Buffer, ComputePass, Session}; pub use transform::{ Transform, TransformBinding, TransformCode, TransformStage, TRANSFORM_PART_SIZE, }; @@ -140,7 +140,7 @@ impl ElementStage { pub unsafe fn record( &self, - cmd_buf: &mut CmdBuf, + pass: &mut ComputePass, code: &ElementCode, binding: &ElementBinding, n_transform: u64, @@ -149,14 +149,14 @@ impl ElementStage { n_drawobj: u64, ) { self.transform_stage.record( - cmd_buf, + pass, &code.transform_code, &binding.transform_binding, n_transform, ); // No memory barrier needed here; path has at least one before pathseg self.path_stage.record( - cmd_buf, + pass, &code.path_code, &binding.path_binding, n_paths, @@ -164,6 +164,6 @@ impl ElementStage { ); // No memory barrier needed here; draw has at least one before draw_leaf self.draw_stage - .record(cmd_buf, &code.draw_code, &binding.draw_binding, n_drawobj); + .record(pass, &code.draw_code, &binding.draw_binding, n_drawobj); } } diff --git a/piet-gpu/src/stages/clip.rs b/piet-gpu/src/stages/clip.rs index e4bc3db..2fd195b 100644 --- a/piet-gpu/src/stages/clip.rs +++ b/piet-gpu/src/stages/clip.rs @@ -16,7 +16,7 @@ //! The clip processing stage (includes substages). -use piet_gpu_hal::{include_shader, BindType, Buffer, CmdBuf, DescriptorSet, Pipeline, Session}; +use piet_gpu_hal::{include_shader, BindType, Buffer, ComputePass, DescriptorSet, Pipeline, Session}; // Note that this isn't the code/stage/binding pattern of most of the other stages // in the new element processing pipeline. We want to move those temporary buffers @@ -69,26 +69,26 @@ impl ClipBinding { /// Record the clip dispatches. /// /// Assumes memory barrier on entry. Provides memory barrier on exit. - pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, code: &ClipCode, n_clip: u32) { + pub unsafe fn record(&self, pass: &mut ComputePass, code: &ClipCode, n_clip: u32) { let n_wg_reduce = n_clip.saturating_sub(1) / CLIP_PART_SIZE; if n_wg_reduce > 0 { - cmd_buf.dispatch( + pass.dispatch( &code.reduce_pipeline, &self.reduce_ds, (n_wg_reduce, 1, 1), (CLIP_PART_SIZE, 1, 1), ); - cmd_buf.memory_barrier(); + pass.memory_barrier(); } let n_wg = (n_clip + CLIP_PART_SIZE - 1) / CLIP_PART_SIZE; if n_wg > 0 { - cmd_buf.dispatch( + pass.dispatch( &code.leaf_pipeline, &self.leaf_ds, (n_wg, 1, 1), (CLIP_PART_SIZE, 1, 1), ); - cmd_buf.memory_barrier(); + pass.memory_barrier(); } } } diff --git a/piet-gpu/src/stages/draw.rs b/piet-gpu/src/stages/draw.rs index 21312a4..f0ee2b6 100644 --- a/piet-gpu/src/stages/draw.rs +++ b/piet-gpu/src/stages/draw.rs @@ -19,7 +19,7 @@ use bytemuck::{Pod, Zeroable}; use piet_gpu_hal::{ - include_shader, BindType, Buffer, BufferUsage, CmdBuf, DescriptorSet, Pipeline, Session, + include_shader, BindType, Buffer, BufferUsage, ComputePass, DescriptorSet, Pipeline, Session, }; /// The output element of the draw object stage. @@ -130,7 +130,7 @@ impl DrawStage { pub unsafe fn record( &self, - cmd_buf: &mut CmdBuf, + pass: &mut ComputePass, code: &DrawCode, binding: &DrawBinding, size: u64, @@ -140,22 +140,22 @@ impl DrawStage { } let n_workgroups = (size + DRAW_PART_SIZE - 1) / DRAW_PART_SIZE; if n_workgroups > 1 { - cmd_buf.dispatch( + pass.dispatch( &code.reduce_pipeline, &binding.reduce_ds, (n_workgroups as u32, 1, 1), (DRAW_WG as u32, 1, 1), ); - cmd_buf.memory_barrier(); - cmd_buf.dispatch( + pass.memory_barrier(); + pass.dispatch( &code.root_pipeline, &self.root_ds, (1, 1, 1), (DRAW_WG as u32, 1, 1), ); } - cmd_buf.memory_barrier(); - cmd_buf.dispatch( + pass.memory_barrier(); + pass.dispatch( &code.leaf_pipeline, &binding.leaf_ds, (n_workgroups as u32, 1, 1), diff --git a/piet-gpu/src/stages/path.rs b/piet-gpu/src/stages/path.rs index 6c524a2..be33041 100644 --- a/piet-gpu/src/stages/path.rs +++ b/piet-gpu/src/stages/path.rs @@ -17,7 +17,7 @@ //! The path stage (includes substages). use piet_gpu_hal::{ - include_shader, BindType, Buffer, BufferUsage, CmdBuf, DescriptorSet, Pipeline, Session, + include_shader, BindType, Buffer, BufferUsage, ComputePass, DescriptorSet, Pipeline, Session, }; pub struct PathCode { @@ -148,7 +148,7 @@ impl PathStage { /// those are consumed. Result is written without barrier. pub unsafe fn record( &self, - cmd_buf: &mut CmdBuf, + pass: &mut ComputePass, code: &PathCode, binding: &PathBinding, n_paths: u32, @@ -166,15 +166,15 @@ impl PathStage { let reduce_part_tags = REDUCE_PART_SIZE * 4; let n_wg_tag_reduce = (n_tags + reduce_part_tags - 1) / reduce_part_tags; if n_wg_tag_reduce > 1 { - cmd_buf.dispatch( + pass.dispatch( &code.reduce_pipeline, &binding.reduce_ds, (n_wg_tag_reduce, 1, 1), (REDUCE_WG, 1, 1), ); // I think we can skip root if n_wg_tag_reduce == 2 - cmd_buf.memory_barrier(); - cmd_buf.dispatch( + pass.memory_barrier(); + pass.dispatch( &code.tag_root_pipeline, &self.tag_root_ds, (1, 1, 1), @@ -183,15 +183,15 @@ impl PathStage { // No barrier needed here; clear doesn't depend on path tags } let n_wg_clear = (n_paths + CLEAR_WG - 1) / CLEAR_WG; - cmd_buf.dispatch( + pass.dispatch( &code.clear_pipeline, &binding.clear_ds, (n_wg_clear, 1, 1), (CLEAR_WG, 1, 1), ); - cmd_buf.memory_barrier(); + pass.memory_barrier(); let n_wg_pathseg = (n_tags + SCAN_PART_SIZE - 1) / SCAN_PART_SIZE; - cmd_buf.dispatch( + pass.dispatch( &code.pathseg_pipeline, &binding.path_ds, (n_wg_pathseg, 1, 1), diff --git a/piet-gpu/src/stages/transform.rs b/piet-gpu/src/stages/transform.rs index b21712f..8de7cee 100644 --- a/piet-gpu/src/stages/transform.rs +++ b/piet-gpu/src/stages/transform.rs @@ -20,7 +20,7 @@ use bytemuck::{Pod, Zeroable}; use piet::kurbo::Affine; use piet_gpu_hal::{ - include_shader, BindType, Buffer, BufferUsage, CmdBuf, DescriptorSet, Pipeline, Session, + include_shader, BindType, Buffer, BufferUsage, ComputePass, DescriptorSet, Pipeline, Session, }; /// An affine transform. @@ -132,7 +132,7 @@ impl TransformStage { pub unsafe fn record( &self, - cmd_buf: &mut CmdBuf, + pass: &mut ComputePass, code: &TransformCode, binding: &TransformBinding, size: u64, @@ -142,22 +142,22 @@ impl TransformStage { } let n_workgroups = (size + TRANSFORM_PART_SIZE - 1) / TRANSFORM_PART_SIZE; if n_workgroups > 1 { - cmd_buf.dispatch( + pass.dispatch( &code.reduce_pipeline, &binding.reduce_ds, (n_workgroups as u32, 1, 1), (TRANSFORM_WG as u32, 1, 1), ); - cmd_buf.memory_barrier(); - cmd_buf.dispatch( + pass.memory_barrier(); + pass.dispatch( &code.root_pipeline, &self.root_ds, (1, 1, 1), (TRANSFORM_WG as u32, 1, 1), ); - cmd_buf.memory_barrier(); + pass.memory_barrier(); } - cmd_buf.dispatch( + pass.dispatch( &code.leaf_pipeline, &binding.leaf_ds, (n_workgroups as u32, 1, 1), diff --git a/piet-gpu/src/test_scenes.rs b/piet-gpu/src/test_scenes.rs index cf5a50d..bfd2af2 100644 --- a/piet-gpu/src/test_scenes.rs +++ b/piet-gpu/src/test_scenes.rs @@ -21,12 +21,7 @@ pub fn render_blend_test(rc: &mut PietGpuRenderContext, i: usize, blend: Blend) rc.restore().unwrap(); } -pub fn render_svg(rc: &mut impl RenderContext, filename: &str, scale: f64) { - let xml_str = std::fs::read_to_string(filename).unwrap(); - let start = std::time::Instant::now(); - let svg = PicoSvg::load(&xml_str, scale).unwrap(); - println!("parsing time: {:?}", start.elapsed()); - +pub fn render_svg(rc: &mut impl RenderContext, svg: &PicoSvg) { let start = std::time::Instant::now(); svg.render(rc); println!("flattening and encoding time: {:?}", start.elapsed()); diff --git a/tests/src/clear.rs b/tests/src/clear.rs index fc6f063..af4b8ea 100644 --- a/tests/src/clear.rs +++ b/tests/src/clear.rs @@ -16,11 +16,11 @@ //! Utilities (and a benchmark) for clearing buffers with compute shaders. -use piet_gpu_hal::{include_shader, BindType, BufferUsage, DescriptorSet}; +use piet_gpu_hal::{include_shader, BindType, BufferUsage, ComputePass, DescriptorSet}; use piet_gpu_hal::{Buffer, Pipeline}; use crate::config::Config; -use crate::runner::{Commands, Runner}; +use crate::runner::Runner; use crate::test_result::TestResult; const WG_SIZE: u64 = 256; @@ -52,9 +52,9 @@ pub unsafe fn run_clear_test(runner: &mut Runner, config: &Config) -> TestResult let mut total_elapsed = 0.0; for i in 0..n_iter { let mut commands = runner.commands(); - commands.write_timestamp(0); - stage.record(&mut commands, &code, &binding); - commands.write_timestamp(1); + let mut pass = commands.compute_pass(0, 1); + stage.record(&mut pass, &code, &binding); + pass.end(); if i == 0 || config.verify_all { commands.cmd_buf.memory_barrier(); commands.download(&out_buf); @@ -108,17 +108,12 @@ impl ClearStage { ClearBinding { descriptor_set } } - pub unsafe fn record( - &self, - commands: &mut Commands, - code: &ClearCode, - bindings: &ClearBinding, - ) { + pub unsafe fn record(&self, pass: &mut ComputePass, code: &ClearCode, bindings: &ClearBinding) { let n_workgroups = (self.n_elements + WG_SIZE - 1) / WG_SIZE; // An issue: for clearing large buffers (>16M), we need to check the // number of workgroups against the (dynamically detected) limit, and // potentially issue multiple dispatches. - commands.cmd_buf.dispatch( + pass.dispatch( &code.pipeline, &bindings.descriptor_set, (n_workgroups as u32, 1, 1), diff --git a/tests/src/clip.rs b/tests/src/clip.rs index 4a38949..b1f8613 100644 --- a/tests/src/clip.rs +++ b/tests/src/clip.rs @@ -58,11 +58,11 @@ pub unsafe fn clip_test(runner: &mut Runner, config: &Config) -> TestResult { let binding = ClipBinding::new(&runner.session, &code, &config_buf, &memory.dev_buf); let mut commands = runner.commands(); - commands.write_timestamp(0); commands.upload(&memory); - binding.record(&mut commands.cmd_buf, &code, n_clip as u32); + let mut pass = commands.compute_pass(0, 1); + binding.record(&mut pass, &code, n_clip as u32); + pass.end(); commands.download(&memory); - commands.write_timestamp(1); runner.submit(commands); let dst = memory.map_read(..); if let Some(failure) = data.verify(&dst) { diff --git a/tests/src/draw.rs b/tests/src/draw.rs index 4372da4..dc82572 100644 --- a/tests/src/draw.rs +++ b/tests/src/draw.rs @@ -77,9 +77,9 @@ pub unsafe fn draw_test(runner: &mut Runner, config: &Config) -> TestResult { let n_iter = config.n_iter; for i in 0..n_iter { let mut commands = runner.commands(); - commands.write_timestamp(0); - stage.record(&mut commands.cmd_buf, &code, &binding, n_tag); - commands.write_timestamp(1); + let mut pass = commands.compute_pass(0, 1); + stage.record(&mut pass, &code, &binding, n_tag); + pass.end(); if i == 0 || config.verify_all { commands.cmd_buf.memory_barrier(); commands.download(&memory); diff --git a/tests/src/linkedlist.rs b/tests/src/linkedlist.rs index 5767806..e24adcb 100644 --- a/tests/src/linkedlist.rs +++ b/tests/src/linkedlist.rs @@ -45,9 +45,7 @@ pub unsafe fn run_linkedlist_test(runner: &mut Runner, config: &Config) -> TestR for i in 0..n_iter { let mut commands = runner.commands(); // Might clear only buckets to save time. - commands.write_timestamp(0); stage.record(&mut commands, &code, &binding, &mem_buf.dev_buf); - commands.write_timestamp(1); if i == 0 || config.verify_all { commands.cmd_buf.memory_barrier(); commands.download(&mem_buf); @@ -107,12 +105,14 @@ impl LinkedListStage { commands.cmd_buf.clear_buffer(out_buf, None); commands.cmd_buf.memory_barrier(); let n_workgroups = N_BUCKETS / WG_SIZE; - commands.cmd_buf.dispatch( + let mut pass = commands.compute_pass(0, 1); + pass.dispatch( &code.pipeline, &bindings.descriptor_set, (n_workgroups as u32, 1, 1), (WG_SIZE as u32, 1, 1), ); + pass.end(); } } diff --git a/tests/src/message_passing.rs b/tests/src/message_passing.rs index c5d989b..39e71dc 100644 --- a/tests/src/message_passing.rs +++ b/tests/src/message_passing.rs @@ -59,9 +59,7 @@ pub unsafe fn run_message_passing_test( let mut failures = 0; for _ in 0..n_iter { let mut commands = runner.commands(); - commands.write_timestamp(0); stage.record(&mut commands, &code, &binding, &out_buf.dev_buf); - commands.write_timestamp(1); commands.cmd_buf.memory_barrier(); commands.download(&out_buf); total_elapsed += runner.submit(commands); @@ -128,11 +126,13 @@ impl MessagePassingStage { commands.cmd_buf.clear_buffer(&self.data_buf, None); commands.cmd_buf.clear_buffer(out_buf, None); commands.cmd_buf.memory_barrier(); - commands.cmd_buf.dispatch( + let mut pass = commands.compute_pass(0, 1); + pass.dispatch( &code.pipeline, &bindings.descriptor_set, (256, 1, 1), (256, 1, 1), ); + pass.end(); } } diff --git a/tests/src/path.rs b/tests/src/path.rs index bf72c68..9d794e1 100644 --- a/tests/src/path.rs +++ b/tests/src/path.rs @@ -105,15 +105,15 @@ pub unsafe fn path_test(runner: &mut Runner, config: &Config) -> TestResult { let mut commands = runner.commands(); commands.cmd_buf.copy_buffer(&memory_init, &memory.dev_buf); commands.cmd_buf.memory_barrier(); - commands.write_timestamp(0); + let mut pass = commands.compute_pass(0, 1); stage.record( - &mut commands.cmd_buf, + &mut pass, &code, &binding, path_data.n_path, path_data.tags.len() as u32, ); - commands.write_timestamp(1); + pass.end(); if i == 0 || config.verify_all { commands.cmd_buf.memory_barrier(); commands.download(&memory); diff --git a/tests/src/prefix.rs b/tests/src/prefix.rs index 4174d8d..dbaf256 100644 --- a/tests/src/prefix.rs +++ b/tests/src/prefix.rs @@ -85,9 +85,7 @@ pub unsafe fn run_prefix_test( let mut total_elapsed = 0.0; for i in 0..n_iter { let mut commands = runner.commands(); - commands.write_timestamp(0); stage.record(&mut commands, &code, &binding); - commands.write_timestamp(1); if i == 0 || config.verify_all { commands.cmd_buf.memory_barrier(); commands.download(&out_buf); @@ -159,12 +157,14 @@ impl PrefixStage { let n_workgroups = (self.n_elements + ELEMENTS_PER_WG - 1) / ELEMENTS_PER_WG; commands.cmd_buf.clear_buffer(&self.state_buf, None); commands.cmd_buf.memory_barrier(); - commands.cmd_buf.dispatch( + let mut pass = commands.compute_pass(0, 1); + pass.dispatch( &code.pipeline, &bindings.descriptor_set, (n_workgroups as u32, 1, 1), (WG_SIZE as u32, 1, 1), ); + pass.end(); // One thing that's missing here is registering the buffers so // they can be safely dropped by Rust code before the execution // of the command buffer completes. diff --git a/tests/src/prefix_tree.rs b/tests/src/prefix_tree.rs index 24be2af..3c9c813 100644 --- a/tests/src/prefix_tree.rs +++ b/tests/src/prefix_tree.rs @@ -66,9 +66,7 @@ pub unsafe fn run_prefix_test(runner: &mut Runner, config: &Config) -> TestResul let mut commands = runner.commands(); commands.cmd_buf.copy_buffer(&data_buf, &out_buf.dev_buf); commands.cmd_buf.memory_barrier(); - commands.write_timestamp(0); stage.record(&mut commands, &code, &binding); - commands.write_timestamp(1); if i == 0 || config.verify_all { commands.cmd_buf.memory_barrier(); commands.download(&out_buf); @@ -175,33 +173,35 @@ impl PrefixTreeStage { code: &PrefixTreeCode, bindings: &PrefixTreeBinding, ) { + let mut pass = commands.compute_pass(0, 1); let n = self.tmp_bufs.len(); for i in 0..n { let n_workgroups = self.sizes[i + 1]; - commands.cmd_buf.dispatch( + pass.dispatch( &code.reduce_pipeline, &bindings.descriptor_sets[i], (n_workgroups as u32, 1, 1), (WG_SIZE as u32, 1, 1), ); - commands.cmd_buf.memory_barrier(); + pass.memory_barrier(); } - commands.cmd_buf.dispatch( + pass.dispatch( &code.root_pipeline, &bindings.descriptor_sets[n], (1, 1, 1), (WG_SIZE as u32, 1, 1), ); for i in (0..n).rev() { - commands.cmd_buf.memory_barrier(); + pass.memory_barrier(); let n_workgroups = self.sizes[i + 1]; - commands.cmd_buf.dispatch( + pass.dispatch( &code.scan_pipeline, &bindings.descriptor_sets[2 * n - i], (n_workgroups as u32, 1, 1), (WG_SIZE as u32, 1, 1), ); } + pass.end(); } } diff --git a/tests/src/runner.rs b/tests/src/runner.rs index 1fd6774..3ba8223 100644 --- a/tests/src/runner.rs +++ b/tests/src/runner.rs @@ -20,8 +20,8 @@ use std::ops::RangeBounds; use bytemuck::Pod; use piet_gpu_hal::{ - BackendType, BufReadGuard, BufWriteGuard, Buffer, BufferUsage, CmdBuf, Instance, InstanceFlags, - QueryPool, Session, + BackendType, BufReadGuard, BufWriteGuard, Buffer, BufferUsage, CmdBuf, ComputePass, + ComputePassDescriptor, Instance, InstanceFlags, QueryPool, Session, }; pub struct Runner { @@ -118,8 +118,14 @@ impl Runner { } impl Commands { - pub unsafe fn write_timestamp(&mut self, query: u32) { - self.cmd_buf.write_timestamp(&self.query_pool, query); + /// Start a compute pass with timer queries. + pub unsafe fn compute_pass(&mut self, start_query: u32, end_query: u32) -> ComputePass { + self.cmd_buf + .begin_compute_pass(&ComputePassDescriptor::timer( + &self.query_pool, + start_query, + end_query, + )) } pub unsafe fn upload(&mut self, buf: &BufStage) { diff --git a/tests/src/transform.rs b/tests/src/transform.rs index 6edcc3f..43bfc67 100644 --- a/tests/src/transform.rs +++ b/tests/src/transform.rs @@ -61,9 +61,9 @@ pub unsafe fn transform_test(runner: &mut Runner, config: &Config) -> TestResult let n_iter = config.n_iter; for i in 0..n_iter { let mut commands = runner.commands(); - commands.write_timestamp(0); - stage.record(&mut commands.cmd_buf, &code, &binding, n_elements); - commands.write_timestamp(1); + let mut pass = commands.compute_pass(0, 1); + stage.record(&mut pass, &code, &binding, n_elements); + pass.end(); if i == 0 || config.verify_all { commands.cmd_buf.memory_barrier(); commands.download(&memory);