From 4458e36efa9fe3520480ad017172b347c0b472aa Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Thu, 18 Nov 2021 19:38:31 -0800 Subject: [PATCH] Start implementing timer queries in Metal --- piet-gpu-hal/src/backend.rs | 10 +-- piet-gpu-hal/src/metal.rs | 150 +++++++++++++++++++++++++++----- piet-gpu-hal/src/metal/timer.rs | 98 +++++++++++++++++++++ 3 files changed, 229 insertions(+), 29 deletions(-) create mode 100644 piet-gpu-hal/src/metal/timer.rs diff --git a/piet-gpu-hal/src/backend.rs b/piet-gpu-hal/src/backend.rs index 02ac7cb..5715d62 100644 --- a/piet-gpu-hal/src/backend.rs +++ b/piet-gpu-hal/src/backend.rs @@ -202,16 +202,16 @@ pub trait CmdBuf { /// This is readily supported in Vulkan, but for portability it is remarkably /// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute /// kernel, or organize the code not to need it. - unsafe fn clear_buffer(&self, buffer: &D::Buffer, size: Option); + unsafe fn clear_buffer(&mut self, buffer: &D::Buffer, size: Option); - unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer); + unsafe fn copy_buffer(&mut self, src: &D::Buffer, dst: &D::Buffer); - unsafe fn copy_image_to_buffer(&self, src: &D::Image, dst: &D::Buffer); + unsafe fn copy_image_to_buffer(&mut self, src: &D::Image, dst: &D::Buffer); - unsafe fn copy_buffer_to_image(&self, src: &D::Buffer, dst: &D::Image); + unsafe fn copy_buffer_to_image(&mut self, src: &D::Buffer, dst: &D::Image); // low portability, dx12 doesn't support it natively - unsafe fn blit_image(&self, src: &D::Image, dst: &D::Image); + unsafe fn blit_image(&mut self, src: &D::Image, dst: &D::Image); /// Reset the query pool. /// diff --git a/piet-gpu-hal/src/metal.rs b/piet-gpu-hal/src/metal.rs index e3157d4..c96f971 100644 --- a/piet-gpu-hal/src/metal.rs +++ b/piet-gpu-hal/src/metal.rs @@ -15,18 +15,20 @@ // Also licensed under MIT license, at your choice. mod clear; +mod timer; mod util; use std::mem; use std::sync::{Arc, Mutex}; +use block::Block; use cocoa_foundation::base::id; use cocoa_foundation::foundation::{NSInteger, NSUInteger}; use objc::rc::autoreleasepool; use objc::runtime::{Object, BOOL, YES}; use objc::{class, msg_send, sel, sel_impl}; -use metal::{CGFloat, MTLFeatureSet}; +use metal::{CGFloat, CommandBufferRef, MTLFeatureSet}; use raw_window_handle::{HasRawWindowHandle, RawWindowHandle}; @@ -81,6 +83,22 @@ pub struct Semaphore; pub struct CmdBuf { cmd_buf: metal::CommandBuffer, helpers: Arc, + cur_encoder: Encoder, + time_calibration: Arc>, +} + +enum Encoder { + None, + Compute(metal::ComputeCommandEncoder), + Blit(metal::BlitCommandEncoder), +} + +#[derive(Default)] +struct TimeCalibration { + cpu_start_ts: u64, + gpu_start_ts: u64, + cpu_end_ts: u64, + gpu_end_ts: u64, } pub struct QueryPool; @@ -209,6 +227,10 @@ impl MtlDevice { let helpers = Arc::new(Helpers { clear_pipeline: clear::make_clear_pipeline(&device), }); + // Timer stuff + if let Some(timer_set) = timer::CounterSet::get_timer_counter_set(&device) { + let timer = timer::CounterSampleBuffer::new(&device, 4, &timer_set); + } MtlDevice { device, cmd_queue: Arc::new(Mutex::new(cmd_queue)), @@ -220,7 +242,9 @@ impl MtlDevice { pub fn cmd_buf_from_raw_mtl(&self, raw_cmd_buf: metal::CommandBuffer) -> CmdBuf { let cmd_buf = raw_cmd_buf; let helpers = self.helpers.clone(); - CmdBuf { cmd_buf, helpers } + let cur_encoder = Encoder::None; + let time_calibration = Default::default(); + CmdBuf { cmd_buf, helpers, cur_encoder, time_calibration } } pub fn image_from_raw_mtl(&self, texture: metal::Texture, width: u32, height: u32) -> Image { @@ -331,10 +355,16 @@ impl crate::backend::Device for MtlDevice { fn create_cmd_buf(&self) -> Result { let cmd_queue = self.cmd_queue.lock().unwrap(); // consider new_command_buffer_with_unretained_references for performance - let cmd_buf = cmd_queue.new_command_buffer(); - let cmd_buf = autoreleasepool(|| cmd_buf.to_owned()); + let cmd_buf = cmd_queue.new_command_buffer().to_owned(); let helpers = self.helpers.clone(); - Ok(CmdBuf { cmd_buf, helpers }) + let cur_encoder = Encoder::None; + let time_calibration = Default::default(); + Ok(CmdBuf { + cmd_buf, + helpers, + cur_encoder, + time_calibration, + }) } unsafe fn destroy_cmd_buf(&self, _cmd_buf: Self::CmdBuf) -> Result<(), Error> { @@ -358,7 +388,45 @@ impl crate::backend::Device for MtlDevice { _signal_semaphores: &[&Self::Semaphore], fence: Option<&mut Self::Fence>, ) -> Result<(), Error> { + unsafe fn add_scheduled_handler( + cmd_buf: &metal::CommandBufferRef, + block: &Block<(&CommandBufferRef,), ()>, + ) { + msg_send![cmd_buf, addScheduledHandler: block] + } for cmd_buf in cmd_bufs { + let time_calibration = cmd_buf.time_calibration.clone(); + let start_block = block::ConcreteBlock::new(move |buffer: &metal::CommandBufferRef| { + let device: id = msg_send![buffer, device]; + let mut time_calibration = time_calibration.lock().unwrap(); + let cpu_ts_ptr = &mut time_calibration.cpu_start_ts as *mut _; + let gpu_ts_ptr = &mut time_calibration.gpu_start_ts as *mut _; + // TODO: only do this if supported. + let () = msg_send![device, sampleTimestamps: cpu_ts_ptr gpuTimestamp: gpu_ts_ptr]; + println!( + "scheduled, {}, {}", + time_calibration.cpu_start_ts, time_calibration.gpu_start_ts + ); + }) + .copy(); + add_scheduled_handler(&cmd_buf.cmd_buf, &start_block); + let time_calibration = cmd_buf.time_calibration.clone(); + let completed_block = + block::ConcreteBlock::new(move |buffer: &metal::CommandBufferRef| { + let device: id = msg_send![buffer, device]; + let mut time_calibration = time_calibration.lock().unwrap(); + let cpu_ts_ptr = &mut time_calibration.cpu_end_ts as *mut _; + let gpu_ts_ptr = &mut time_calibration.gpu_end_ts as *mut _; + // TODO: only do this if supported. + let () = + msg_send![device, sampleTimestamps: cpu_ts_ptr gpuTimestamp: gpu_ts_ptr]; + println!( + "completed, {}, {}", + time_calibration.cpu_end_ts, time_calibration.gpu_end_ts + ); + }) + .copy(); + cmd_buf.cmd_buf.add_completed_handler(&completed_block); cmd_buf.cmd_buf.commit(); } if let Some(last_cmd_buf) = cmd_bufs.last() { @@ -439,7 +507,9 @@ impl crate::backend::Device for MtlDevice { impl crate::backend::CmdBuf for CmdBuf { unsafe fn begin(&mut self) {} - unsafe fn finish(&mut self) {} + unsafe fn finish(&mut self) { + self.flush_encoder(); + } unsafe fn reset(&mut self) -> bool { false @@ -452,7 +522,7 @@ impl crate::backend::CmdBuf for CmdBuf { workgroup_count: (u32, u32, u32), workgroup_size: (u32, u32, u32), ) { - let encoder = self.cmd_buf.new_compute_command_encoder(); + let encoder = self.compute_command_encoder(); encoder.set_compute_pipeline_state(&pipeline.0); let mut buf_ix = 0; for buffer in &descriptor_set.buffers { @@ -475,7 +545,6 @@ impl crate::backend::CmdBuf for CmdBuf { depth: workgroup_size.2 as u64, }; encoder.dispatch_thread_groups(workgroup_count, workgroup_size); - encoder.end_encoding(); } unsafe fn memory_barrier(&mut self) { @@ -494,22 +563,23 @@ impl crate::backend::CmdBuf for CmdBuf { // I think these are being tracked. } - unsafe fn clear_buffer(&self, buffer: &Buffer, size: Option) { + unsafe fn clear_buffer(&mut self, buffer: &Buffer, size: Option) { let size = size.unwrap_or(buffer.size); - let encoder = self.cmd_buf.new_compute_command_encoder(); - clear::encode_clear(&encoder, &self.helpers.clear_pipeline, &buffer.buffer, size); - encoder.end_encoding() + let _ = self.compute_command_encoder(); + // Getting this directly is a workaround for a borrow checker issue. + if let Encoder::Compute(e) = &self.cur_encoder { + clear::encode_clear(e, &self.helpers.clear_pipeline, &buffer.buffer, size); + } } - unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) { - let encoder = self.cmd_buf.new_blit_command_encoder(); + unsafe fn copy_buffer(&mut self, src: &Buffer, dst: &Buffer) { + let encoder = self.blit_command_encoder(); let size = src.size.min(dst.size); encoder.copy_from_buffer(&src.buffer, 0, &dst.buffer, 0, size); - encoder.end_encoding(); } - unsafe fn copy_image_to_buffer(&self, src: &Image, dst: &Buffer) { - let encoder = self.cmd_buf.new_blit_command_encoder(); + unsafe fn copy_image_to_buffer(&mut self, src: &Image, dst: &Buffer) { + let encoder = self.blit_command_encoder(); assert_eq!(dst.size, (src.width as u64) * (src.height as u64) * 4); let bytes_per_row = (src.width * 4) as NSUInteger; let src_size = metal::MTLSize { @@ -530,11 +600,10 @@ impl crate::backend::CmdBuf for CmdBuf { bytes_per_row * src.height as NSUInteger, metal::MTLBlitOption::empty(), ); - encoder.end_encoding(); } - unsafe fn copy_buffer_to_image(&self, src: &Buffer, dst: &Image) { - let encoder = self.cmd_buf.new_blit_command_encoder(); + unsafe fn copy_buffer_to_image(&mut self, src: &Buffer, dst: &Image) { + let encoder = self.blit_command_encoder(); assert_eq!(src.size, (dst.width as u64) * (dst.height as u64) * 4); let bytes_per_row = (dst.width * 4) as NSUInteger; let src_size = metal::MTLSize { @@ -555,11 +624,10 @@ impl crate::backend::CmdBuf for CmdBuf { origin, metal::MTLBlitOption::empty(), ); - encoder.end_encoding(); } - unsafe fn blit_image(&self, src: &Image, dst: &Image) { - let encoder = self.cmd_buf.new_blit_command_encoder(); + unsafe fn blit_image(&mut self, src: &Image, dst: &Image) { + let encoder = self.blit_command_encoder(); let src_size = metal::MTLSize { width: src.width.min(dst.width) as NSUInteger, height: src.width.min(dst.height) as NSUInteger, @@ -577,7 +645,6 @@ impl crate::backend::CmdBuf for CmdBuf { 0, origin, ); - encoder.end_encoding(); } unsafe fn reset_query_pool(&mut self, pool: &QueryPool) {} @@ -589,6 +656,41 @@ impl crate::backend::CmdBuf for CmdBuf { } } +impl CmdBuf { + fn compute_command_encoder(&mut self) -> &metal::ComputeCommandEncoder { + if !matches!(self.cur_encoder, Encoder::Compute(_)) { + self.flush_encoder(); + self.cur_encoder = + Encoder::Compute(self.cmd_buf.new_compute_command_encoder().to_owned()); + } + if let Encoder::Compute(e) = &self.cur_encoder { + e + } else { + unreachable!() + } + } + + fn blit_command_encoder(&mut self) -> &metal::BlitCommandEncoder { + if !matches!(self.cur_encoder, Encoder::Blit(_)) { + self.flush_encoder(); + self.cur_encoder = Encoder::Blit(self.cmd_buf.new_blit_command_encoder().to_owned()); + } + if let Encoder::Blit(e) = &self.cur_encoder { + e + } else { + unreachable!() + } + } + + fn flush_encoder(&mut self) { + match std::mem::replace(&mut self.cur_encoder, Encoder::None) { + Encoder::Compute(e) => e.end_encoding(), + Encoder::Blit(e) => e.end_encoding(), + Encoder::None => (), + } + } +} + impl crate::backend::DescriptorSetBuilder for DescriptorSetBuilder { fn add_buffers(&mut self, buffers: &[&Buffer]) { self.0.buffers.extend(buffers.iter().copied().cloned()); diff --git a/piet-gpu-hal/src/metal/timer.rs b/piet-gpu-hal/src/metal/timer.rs new file mode 100644 index 0000000..5830fee --- /dev/null +++ b/piet-gpu-hal/src/metal/timer.rs @@ -0,0 +1,98 @@ +// Copyright 2021 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +//! Support for timer queries. +//! +//! Likely some of this should be upstreamed into metal-rs. + +use std::{ffi::CStr, ptr::null_mut}; + +use cocoa_foundation::{base::id, foundation::NSUInteger}; +use metal::DeviceRef; +use objc::{class, msg_send, sel, sel_impl}; + +pub struct CounterSampleBuffer { + id: id, +} + +pub struct CounterSet { + id: id, +} + +impl Drop for CounterSampleBuffer { + fn drop(&mut self) { + unsafe { msg_send![self.id, release] } + } +} + +impl Clone for CounterSampleBuffer { + fn clone(&self) -> CounterSampleBuffer { + unsafe { + CounterSampleBuffer { + id: msg_send![self.id, retain], + } + } + } +} + +impl CounterSet { + pub fn get_timer_counter_set(device: &DeviceRef) -> Option { + unsafe { + // TODO: version check + let sets: id = msg_send!(device, counterSets); + let count: NSUInteger = msg_send![sets, count]; + for i in 0..count { + let set: id = msg_send![sets, objectAtIndex: i]; + let name: id = msg_send![set, name]; + let name_cstr = CStr::from_ptr(msg_send![name, UTF8String]); + if name_cstr.to_bytes() == b"timestamp" { + return Some(CounterSet { id: set }); + } + } + None + } + } +} + +impl CounterSampleBuffer { + pub fn new( + device: &DeviceRef, + count: u64, + counter_set: &CounterSet, + ) -> Option { + unsafe { + let desc_cls = class!(MTLCounterSampleBufferDescriptor); + let descriptor: id = msg_send![desc_cls, alloc]; + println!("descriptor = {:?}", descriptor); + let _: id = msg_send![descriptor, init]; + let count = count as NSUInteger; + let () = msg_send![descriptor, setSampleCount: count]; + let () = msg_send![descriptor, setCounterSet: counter_set.id]; + let mut error: id = null_mut(); + let buf: id = msg_send![device, newCounterSampleBufferWithDescriptor: descriptor error: &mut error]; + let () = msg_send![descriptor, release]; + if !error.is_null() { + let () = msg_send![error, release]; + return None; + } + Some(CounterSampleBuffer { id: buf }) + } + } + + pub fn id(&self) -> id { + self.id + } +}