More progress

This puts most of the infrastructure in place but I'm hitting an error
that "sampleCountersInBuffer is not supported on this device".

The issue is that M1 supports stage boundaries and not command boundaries.
We'll have to rework the logic a bit. (And, in the most general case, support
both)

Start implementing stage boundaries, but it will probably require an API
change.
This commit is contained in:
Raph Levien 2022-01-06 10:02:07 -08:00
parent 4458e36efa
commit 290d5d2e13
6 changed files with 177 additions and 18 deletions

1
Cargo.lock generated
View file

@ -921,6 +921,7 @@ dependencies = [
"block", "block",
"bytemuck", "bytemuck",
"cocoa-foundation", "cocoa-foundation",
"foreign-types",
"metal", "metal",
"objc", "objc",
"raw-window-handle", "raw-window-handle",

View file

@ -28,3 +28,4 @@ metal = "0.22"
objc = "0.2.5" objc = "0.2.5"
block = "0.1.6" block = "0.1.6"
cocoa-foundation = "0.1" cocoa-foundation = "0.1"
foreign-types = "0.3.2"

View file

@ -160,6 +160,8 @@ pub trait Device: Sized {
} }
pub trait CmdBuf<D: Device> { pub trait CmdBuf<D: Device> {
type ComputeEncoder;
unsafe fn begin(&mut self); unsafe fn begin(&mut self);
unsafe fn finish(&mut self); unsafe fn finish(&mut self);
@ -231,6 +233,8 @@ pub trait CmdBuf<D: Device> {
/// End a section opened by `begin_debug_label`. /// End a section opened by `begin_debug_label`.
unsafe fn end_debug_label(&mut self) {} unsafe fn end_debug_label(&mut self) {}
unsafe fn new_compute_encoder(&mut self) -> Self::ComputeEncoder;
} }
/// A builder for descriptor sets with more complex layouts. /// A builder for descriptor sets with more complex layouts.
@ -252,3 +256,16 @@ pub trait DescriptorSetBuilder<D: Device> {
fn add_textures(&mut self, images: &[&D::Image]); fn add_textures(&mut self, images: &[&D::Image]);
unsafe fn build(self, device: &D, pipeline: &D::Pipeline) -> Result<D::DescriptorSet, Error>; unsafe fn build(self, device: &D, pipeline: &D::Pipeline) -> Result<D::DescriptorSet, Error>;
} }
pub trait ComputeEncoder<D: Device> {
unsafe fn dispatch(
&mut self,
pipeline: &D::Pipeline,
descriptor_set: &D::DescriptorSet,
workgroup_count: (u32, u32, u32),
workgroup_size: (u32, u32, u32),
);
// Question: should be self?
unsafe fn finish(&mut self);
}

View file

@ -24,6 +24,7 @@ use std::sync::{Arc, Mutex};
use block::Block; use block::Block;
use cocoa_foundation::base::id; use cocoa_foundation::base::id;
use cocoa_foundation::foundation::{NSInteger, NSUInteger}; use cocoa_foundation::foundation::{NSInteger, NSUInteger};
use foreign_types::ForeignType;
use objc::rc::autoreleasepool; use objc::rc::autoreleasepool;
use objc::runtime::{Object, BOOL, YES}; use objc::runtime::{Object, BOOL, YES};
use objc::{class, msg_send, sel, sel_impl}; use objc::{class, msg_send, sel, sel_impl};
@ -36,6 +37,8 @@ use crate::{BufferUsage, Error, GpuInfo, ImageFormat, MapMode, WorkgroupLimits};
use util::*; use util::*;
use self::timer::{CounterSampleBuffer, CounterSet};
pub struct MtlInstance; pub struct MtlInstance;
pub struct MtlDevice { pub struct MtlDevice {
@ -43,6 +46,18 @@ pub struct MtlDevice {
cmd_queue: Arc<Mutex<metal::CommandQueue>>, cmd_queue: Arc<Mutex<metal::CommandQueue>>,
gpu_info: GpuInfo, gpu_info: GpuInfo,
helpers: Arc<Helpers>, helpers: Arc<Helpers>,
timer_set: Option<CounterSet>,
counter_style: CounterStyle,
}
/// Type of counter sampling.
///
/// See https://developer.apple.com/documentation/metal/counter_sampling/sampling_gpu_data_into_counter_sample_buffers
#[derive(Clone, Copy, PartialEq, Eq)]
enum CounterStyle {
None,
Stage,
Command,
} }
pub struct MtlSurface { pub struct MtlSurface {
@ -85,6 +100,7 @@ pub struct CmdBuf {
helpers: Arc<Helpers>, helpers: Arc<Helpers>,
cur_encoder: Encoder, cur_encoder: Encoder,
time_calibration: Arc<Mutex<TimeCalibration>>, time_calibration: Arc<Mutex<TimeCalibration>>,
counter_style: CounterStyle,
} }
enum Encoder { enum Encoder {
@ -101,7 +117,7 @@ struct TimeCalibration {
gpu_end_ts: u64, gpu_end_ts: u64,
} }
pub struct QueryPool; pub struct QueryPool(Option<CounterSampleBuffer>);
pub struct Pipeline(metal::ComputePipelineState); pub struct Pipeline(metal::ComputePipelineState);
@ -118,6 +134,10 @@ struct Helpers {
clear_pipeline: metal::ComputePipelineState, clear_pipeline: metal::ComputePipelineState,
} }
pub struct ComputeEncoder {
raw: metal::ComputeCommandEncoder,
}
impl MtlInstance { impl MtlInstance {
pub fn new( pub fn new(
window_handle: Option<&dyn HasRawWindowHandle>, window_handle: Option<&dyn HasRawWindowHandle>,
@ -228,14 +248,21 @@ impl MtlDevice {
clear_pipeline: clear::make_clear_pipeline(&device), clear_pipeline: clear::make_clear_pipeline(&device),
}); });
// Timer stuff // Timer stuff
if let Some(timer_set) = timer::CounterSet::get_timer_counter_set(&device) { let timer_set = CounterSet::get_timer_counter_set(&device);
let timer = timer::CounterSampleBuffer::new(&device, 4, &timer_set); let counter_style = if timer_set.is_some() {
} // TODO: M1 is stage style, but should do proper runtime detection.
CounterStyle::Stage
} else {
CounterStyle::None
};
MtlDevice { MtlDevice {
device, device,
cmd_queue: Arc::new(Mutex::new(cmd_queue)), cmd_queue: Arc::new(Mutex::new(cmd_queue)),
gpu_info, gpu_info,
helpers, helpers,
timer_set,
counter_style,
} }
} }
@ -244,7 +271,13 @@ impl MtlDevice {
let helpers = self.helpers.clone(); let helpers = self.helpers.clone();
let cur_encoder = Encoder::None; let cur_encoder = Encoder::None;
let time_calibration = Default::default(); let time_calibration = Default::default();
CmdBuf { cmd_buf, helpers, cur_encoder, time_calibration } CmdBuf {
cmd_buf,
helpers,
cur_encoder,
time_calibration,
counter_style: self.counter_style,
}
} }
pub fn image_from_raw_mtl(&self, texture: metal::Texture, width: u32, height: u32) -> Image { pub fn image_from_raw_mtl(&self, texture: metal::Texture, width: u32, height: u32) -> Image {
@ -364,6 +397,7 @@ impl crate::backend::Device for MtlDevice {
helpers, helpers,
cur_encoder, cur_encoder,
time_calibration, time_calibration,
counter_style: self.counter_style,
}) })
} }
@ -372,12 +406,19 @@ impl crate::backend::Device for MtlDevice {
} }
fn create_query_pool(&self, n_queries: u32) -> Result<Self::QueryPool, Error> { fn create_query_pool(&self, n_queries: u32) -> Result<Self::QueryPool, Error> {
// TODO if let Some(timer_set) = &self.timer_set {
Ok(QueryPool) let pool = CounterSampleBuffer::new(&self.device, n_queries as u64, timer_set)
.ok_or("error creating timer query pool")?;
return Ok(QueryPool(Some(pool)));
}
Ok(QueryPool(None))
} }
unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error> { unsafe fn fetch_query_pool(&self, pool: &Self::QueryPool) -> Result<Vec<f64>, Error> {
// TODO if let Some(raw) = &pool.0 {
let resolved = raw.resolve();
println!("resolved = {:?}", resolved);
}
Ok(Vec::new()) Ok(Vec::new())
} }
@ -505,6 +546,8 @@ impl crate::backend::Device for MtlDevice {
} }
impl crate::backend::CmdBuf<MtlDevice> for CmdBuf { impl crate::backend::CmdBuf<MtlDevice> for CmdBuf {
type ComputeEncoder = ComputeEncoder;
unsafe fn begin(&mut self) {} unsafe fn begin(&mut self) {}
unsafe fn finish(&mut self) { unsafe fn finish(&mut self) {
@ -647,12 +690,39 @@ impl crate::backend::CmdBuf<MtlDevice> for CmdBuf {
); );
} }
unsafe fn reset_query_pool(&mut self, pool: &QueryPool) {} unsafe fn reset_query_pool(&mut self, _pool: &QueryPool) {}
unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) { unsafe fn write_timestamp(&mut self, pool: &QueryPool, query: u32) {
// TODO if let Some(buf) = &pool.0 {
// This really a PITA because it's pretty different than Vulkan. if matches!(self.cur_encoder, Encoder::None) {
// See https://developer.apple.com/documentation/metal/counter_sampling self.cur_encoder =
Encoder::Compute(self.cmd_buf.new_compute_command_encoder().to_owned());
}
let sample_index = query as NSUInteger;
if self.counter_style == CounterStyle::Command {
match &self.cur_encoder {
Encoder::Compute(e) => {
let () = msg_send![e.as_ptr(), sampleCountersInBuffer: buf.id() atSampleIndex: sample_index withBarrier: true];
}
Encoder::None => unreachable!(),
_ => todo!(),
}
} else if self.counter_style == CounterStyle::Stage {
match &self.cur_encoder {
Encoder::Compute(e) => {
println!("here we are");
}
_ => (),
}
}
}
}
unsafe fn new_compute_encoder(&mut self) -> Self::ComputeEncoder {
let raw = self.cmd_buf.new_compute_command_encoder().to_owned();
ComputeEncoder {
raw
}
} }
} }
@ -691,6 +761,43 @@ impl CmdBuf {
} }
} }
impl crate::backend::ComputeEncoder<MtlDevice> for ComputeEncoder {
unsafe fn dispatch(
&mut self,
pipeline: &Pipeline,
descriptor_set: &DescriptorSet,
workgroup_count: (u32, u32, u32),
workgroup_size: (u32, u32, u32),
) {
self.raw.set_compute_pipeline_state(&pipeline.0);
let mut buf_ix = 0;
for buffer in &descriptor_set.buffers {
self.raw.set_buffer(buf_ix, Some(&buffer.buffer), 0);
buf_ix += 1;
}
let mut img_ix = buf_ix;
for image in &descriptor_set.images {
self.raw.set_texture(img_ix, Some(&image.texture));
img_ix += 1;
}
let workgroup_count = metal::MTLSize {
width: workgroup_count.0 as u64,
height: workgroup_count.1 as u64,
depth: workgroup_count.2 as u64,
};
let workgroup_size = metal::MTLSize {
width: workgroup_size.0 as u64,
height: workgroup_size.1 as u64,
depth: workgroup_size.2 as u64,
};
self.raw.dispatch_thread_groups(workgroup_count, workgroup_size);
}
unsafe fn finish(&mut self) {
self.raw.end_encoding();
}
}
impl crate::backend::DescriptorSetBuilder<MtlDevice> for DescriptorSetBuilder { impl crate::backend::DescriptorSetBuilder<MtlDevice> for DescriptorSetBuilder {
fn add_buffers(&mut self, buffers: &[&Buffer]) { fn add_buffers(&mut self, buffers: &[&Buffer]) {
self.0.buffers.extend(buffers.iter().copied().cloned()); self.0.buffers.extend(buffers.iter().copied().cloned());

View file

@ -20,12 +20,16 @@
use std::{ffi::CStr, ptr::null_mut}; use std::{ffi::CStr, ptr::null_mut};
use cocoa_foundation::{base::id, foundation::NSUInteger}; use cocoa_foundation::{
use metal::DeviceRef; base::id,
foundation::{NSRange, NSUInteger},
};
use metal::{DeviceRef, MTLStorageMode};
use objc::{class, msg_send, sel, sel_impl}; use objc::{class, msg_send, sel, sel_impl};
pub struct CounterSampleBuffer { pub struct CounterSampleBuffer {
id: id, id: id,
count: u64,
} }
pub struct CounterSet { pub struct CounterSet {
@ -43,11 +47,18 @@ impl Clone for CounterSampleBuffer {
unsafe { unsafe {
CounterSampleBuffer { CounterSampleBuffer {
id: msg_send![self.id, retain], id: msg_send![self.id, retain],
count: self.count,
} }
} }
} }
} }
impl CounterSampleBuffer {
pub fn id(&self) -> id {
self.id
}
}
impl CounterSet { impl CounterSet {
pub fn get_timer_counter_set(device: &DeviceRef) -> Option<CounterSet> { pub fn get_timer_counter_set(device: &DeviceRef) -> Option<CounterSet> {
unsafe { unsafe {
@ -81,6 +92,10 @@ impl CounterSampleBuffer {
let count = count as NSUInteger; let count = count as NSUInteger;
let () = msg_send![descriptor, setSampleCount: count]; let () = msg_send![descriptor, setSampleCount: count];
let () = msg_send![descriptor, setCounterSet: counter_set.id]; let () = msg_send![descriptor, setCounterSet: counter_set.id];
let () = msg_send![
descriptor,
setStorageMode: MTLStorageMode::Shared as NSUInteger
];
let mut error: id = null_mut(); let mut error: id = null_mut();
let buf: id = msg_send![device, newCounterSampleBufferWithDescriptor: descriptor error: &mut error]; let buf: id = msg_send![device, newCounterSampleBufferWithDescriptor: descriptor error: &mut error];
let () = msg_send![descriptor, release]; let () = msg_send![descriptor, release];
@ -88,11 +103,21 @@ impl CounterSampleBuffer {
let () = msg_send![error, release]; let () = msg_send![error, release];
return None; return None;
} }
Some(CounterSampleBuffer { id: buf }) Some(CounterSampleBuffer { id: buf, count })
} }
} }
pub fn id(&self) -> id { // Read the timestamps.
self.id //
// Safety: the lifetime of the returned slice is wrong, it's actually autoreleased.
pub unsafe fn resolve(&self) -> &[u64] {
let range = NSRange::new(0, self.count);
let data: id = msg_send![self.id, resolveCounterRange: range];
if data.is_null() {
&[]
} else {
let bytes: *const u64 = msg_send![data, bytes];
std::slice::from_raw_parts(bytes, self.count as usize)
}
} }
} }

View file

@ -100,6 +100,14 @@ mux_device_enum! {
QueryPool } QueryPool }
mux_device_enum! { Sampler } mux_device_enum! { Sampler }
mux_enum! {
pub enum ComputeEncoder {
Vk(<crate::vulkan::CmdBuf as crate::backend::CmdBuf<vulkan::VkDevice>>::ComputeEncoder),
Dx12(<crate::dx12::Dx12Device as crate::backend::CmdBuf<dx12::Dx12Device>>::ComputeEncoder),
Mtl(<crate::metal::CmdBuf as crate::backend::CmdBuf<metal::MtlDevice>>::ComputeEncoder),
}
}
/// The code for a shader, either as source or intermediate representation. /// The code for a shader, either as source or intermediate representation.
pub enum ShaderCode<'a> { pub enum ShaderCode<'a> {
/// SPIR-V (binary intermediate representation) /// SPIR-V (binary intermediate representation)