From a5991ecf97639ebfd2ce6a44928c8f709e810b0a Mon Sep 17 00:00:00 2001 From: Raph Levien Date: Sat, 8 May 2021 10:51:04 -0700 Subject: [PATCH] Expand runtime query of GPU capabilities Test whether the GPU supports subgroups (including size control) and memory model. This patch does all the ceremony needed for runtime query, including testing the Vulkan version and only probing the extensions when available. Thus, it should work fine on older devices (not yet tested). The reporting of capabilities follows Vulkan concepts, but is not particularly Vulkan-specific. --- piet-gpu-hal/src/hub.rs | 12 +++---- piet-gpu-hal/src/lib.rs | 29 ++++++++++++++++ piet-gpu-hal/src/vulkan.rs | 68 +++++++++++++++++++++++++++++++------- piet-gpu/src/lib.rs | 17 +++++++--- piet-gpu/src/render_ctx.rs | 26 ++++++++++----- 5 files changed, 121 insertions(+), 31 deletions(-) diff --git a/piet-gpu-hal/src/hub.rs b/piet-gpu-hal/src/hub.rs index 5649546..d848d4b 100644 --- a/piet-gpu-hal/src/hub.rs +++ b/piet-gpu-hal/src/hub.rs @@ -10,7 +10,7 @@ use std::sync::{Arc, Mutex, Weak}; use crate::vulkan; use crate::DescriptorSetBuilder as DescriptorSetBuilderTrait; use crate::PipelineBuilder as PipelineBuilderTrait; -use crate::{Device, Error, SamplerParams}; +use crate::{Device, Error, GpuInfo, SamplerParams}; pub type MemFlags = ::MemFlags; pub type Semaphore = ::Semaphore; @@ -32,6 +32,7 @@ struct SessionInner { cmd_buf_pool: Mutex>, /// Command buffers that are still pending (so resources can't be freed). pending: Mutex>, + gpu_info: GpuInfo, } pub struct CmdBuf { @@ -72,8 +73,10 @@ pub struct DescriptorSetBuilder(vulkan::DescriptorSetBuilder); impl Session { pub fn new(device: vulkan::VkDevice) -> Session { + let gpu_info = device.query_gpu_info(); Session(Arc::new(SessionInner { device, + gpu_info, cmd_buf_pool: Default::default(), pending: Default::default(), })) @@ -210,11 +213,8 @@ impl Session { self.0.device.create_sampler(params) } - /// Report whether the device supports descriptor indexing. - /// - /// As we have more queries, we might replace this with a capabilities structure. - pub fn has_descriptor_indexing(&self) -> bool { - self.0.device.has_descriptor_indexing + pub fn gpu_info(&self) -> &GpuInfo { + &self.0.gpu_info } } diff --git a/piet-gpu-hal/src/lib.rs b/piet-gpu-hal/src/lib.rs index 2848774..9dfe8da 100644 --- a/piet-gpu-hal/src/lib.rs +++ b/piet-gpu-hal/src/lib.rs @@ -29,6 +29,29 @@ pub enum SamplerParams { Linear, } +#[derive(Clone, Debug)] +/// Information about the GPU. +pub struct GpuInfo { + /// The GPU supports descriptor indexing. + pub has_descriptor_indexing: bool, + /// The GPU supports subgroups. + /// + /// Right now, this just checks for basic subgroup capability (as + /// required in Vulkan 1.1), and we should have finer grained + /// queries for shuffles, etc. + pub has_subgroups: bool, + /// Info about subgroup size control, if available. + pub subgroup_size: Option, + /// The GPU supports a real, grown-ass memory model. + pub has_memory_model: bool, +} + +#[derive(Clone, Debug)] +pub struct SubgroupSize { + min: u32, + max: u32, +} + pub trait Device: Sized { type Buffer: 'static; type Image; @@ -43,6 +66,12 @@ pub trait Device: Sized { type DescriptorSetBuilder: DescriptorSetBuilder; type Sampler; + /// Query the GPU info. + /// + /// This method may be expensive, so the hub should call it once and retain + /// the info. + fn query_gpu_info(&self) -> GpuInfo; + fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result; /// Destroy a buffer. diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs index 273d860..dd48dcf 100644 --- a/piet-gpu-hal/src/vulkan.rs +++ b/piet-gpu-hal/src/vulkan.rs @@ -6,10 +6,10 @@ use std::os::raw::c_char; use std::sync::Arc; use ash::extensions::{ext::DebugUtils, khr}; -use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0}; +use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0, InstanceV1_1}; use ash::{vk, Device, Entry, Instance}; -use crate::{Device as DeviceTrait, Error, ImageLayout, SamplerParams}; +use crate::{Device as DeviceTrait, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize}; pub struct VkInstance { /// Retain the dynamic lib. @@ -17,6 +17,7 @@ pub struct VkInstance { entry: Entry, instance: Instance, get_phys_dev_props: Option, + vk_version: u32, _dbg_loader: Option, _dbg_callbk: Option, } @@ -28,8 +29,7 @@ pub struct VkDevice { queue: vk::Queue, qfi: u32, timestamp_period: f32, - /// Does the device support descriptor indexing? - pub has_descriptor_indexing: bool, + gpu_info: GpuInfo, } struct RawDevice { @@ -181,6 +181,16 @@ impl VkInstance { } } + let supported_version = entry + .try_enumerate_instance_version()? + .unwrap_or(vk::make_version(1, 0, 0)); + let vk_version = if supported_version >= vk::make_version(1, 1, 0) { + // We need Vulkan 1.1 to do subgroups; most other things can be extensions. + vk::make_version(1, 1, 0) + } else { + vk::make_version(1, 0, 0) + }; + let instance = entry.create_instance( &vk::InstanceCreateInfo::builder() .application_info( @@ -188,7 +198,7 @@ impl VkInstance { .application_name(&app_name) .application_version(0) .engine_name(&app_name) - .api_version(vk::make_version(1, 0, 0)), + .api_version(vk_version), ) .enabled_layer_names(layers.as_ptrs()) .enabled_extension_names(exts.as_ptrs()), @@ -234,6 +244,7 @@ impl VkInstance { entry, instance, get_phys_dev_props, + vk_version, _dbg_loader, _dbg_callbk, }; @@ -282,17 +293,24 @@ impl VkInstance { .descriptor_binding_variable_descriptor_count(true) .runtime_descriptor_array(true); - let mut extensions = Vec::new(); + let mut extensions = Extensions::new( + self.instance + .enumerate_device_extension_properties(pdevice)?, + ); if surface.is_some() { - extensions.push(khr::Swapchain::name().as_ptr()); + extensions.try_add(khr::Swapchain::name()); } if has_descriptor_indexing { - extensions.push(vk::KhrMaintenance3Fn::name().as_ptr()); - extensions.push(vk::ExtDescriptorIndexingFn::name().as_ptr()); + extensions.try_add(vk::KhrMaintenance3Fn::name()); + extensions.try_add(vk::ExtDescriptorIndexingFn::name()); } + let has_subgroup_size = self.vk_version >= vk::make_version(1, 1, 0) + && extensions.try_add(vk::ExtSubgroupSizeControlFn::name()); + let has_memory_model = self.vk_version >= vk::make_version(1, 1, 0) + && extensions.try_add(vk::KhrVulkanMemoryModelFn::name()); let mut create_info = vk::DeviceCreateInfo::builder() .queue_create_infos(&queue_create_infos) - .enabled_extension_names(&extensions); + .enabled_extension_names(extensions.as_ptrs()); if has_descriptor_indexing { create_info = create_info.push_next(&mut descriptor_indexing); } @@ -307,6 +325,28 @@ impl VkInstance { let props = self.instance.get_physical_device_properties(pdevice); let timestamp_period = props.limits.timestamp_period; + let subgroup_size = if has_subgroup_size { + let mut subgroup_props = vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT::default(); + let mut properties = + vk::PhysicalDeviceProperties2::builder().push_next(&mut subgroup_props); + self.instance + .get_physical_device_properties2(pdevice, &mut properties); + Some(SubgroupSize { + min: subgroup_props.min_subgroup_size, + max: subgroup_props.max_subgroup_size, + }) + } else { + None + }; + + // TODO: finer grained query of specific subgroup info. + let has_subgroups = self.vk_version >= vk::make_version(1, 1, 0); + let gpu_info = GpuInfo { + has_descriptor_indexing, + has_subgroups, + subgroup_size, + has_memory_model, + }; Ok(VkDevice { device, @@ -315,7 +355,7 @@ impl VkInstance { qfi, queue, timestamp_period, - has_descriptor_indexing, + gpu_info, }) } @@ -414,6 +454,10 @@ impl crate::Device for VkDevice { type DescriptorSetBuilder = DescriptorSetBuilder; type Sampler = vk::Sampler; + fn query_gpu_info(&self) -> GpuInfo { + self.gpu_info.clone() + } + fn create_buffer(&self, size: u64, mem_flags: MemFlags) -> Result { unsafe { let device = &self.device.device; @@ -570,7 +614,7 @@ impl crate::Device for VkDevice { bindings: Vec::new(), binding_flags: Vec::new(), max_textures: 0, - has_descriptor_indexing: self.has_descriptor_indexing, + has_descriptor_indexing: self.gpu_info.has_descriptor_indexing, } } diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index 8e5be4d..6c85bef 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -128,11 +128,20 @@ fn render_clip_test(rc: &mut impl RenderContext) { #[allow(unused)] fn render_alpha_test(rc: &mut impl RenderContext) { // Alpha compositing tests. - rc.fill(diamond(Point::new(1024.0, 100.0)), &Color::Rgba32(0xff0000ff)); - rc.fill(diamond(Point::new(1024.0, 125.0)), &Color::Rgba32(0x00ff0080)); + rc.fill( + diamond(Point::new(1024.0, 100.0)), + &Color::Rgba32(0xff0000ff), + ); + rc.fill( + diamond(Point::new(1024.0, 125.0)), + &Color::Rgba32(0x00ff0080), + ); rc.save(); rc.clip(diamond(Point::new(1024.0, 150.0))); - rc.fill(diamond(Point::new(1024.0, 175.0)), &Color::Rgba32(0x0000ff80)); + rc.fill( + diamond(Point::new(1024.0, 175.0)), + &Color::Rgba32(0x0000ff80), + ); rc.restore(); } @@ -325,7 +334,7 @@ impl Renderer { let bg_image = Self::make_test_bg_image(&session); - let k4_code = if session.has_descriptor_indexing() { + let k4_code = if session.gpu_info().has_descriptor_indexing { &include_bytes!("../shader/kernel4_idx.spv")[..] } else { println!("doing non-indexed k4"); diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index 74503ef..b022507 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -1,8 +1,8 @@ use std::{borrow::Cow, ops::RangeBounds}; use piet::{ - HitTestPosition, - kurbo::{Affine, Insets, PathEl, Point, Rect, Shape, Size}, TextAttribute, TextStorage, + kurbo::{Affine, Insets, PathEl, Point, Rect, Shape, Size}, + HitTestPosition, TextAttribute, TextStorage, }; use piet::{ Color, Error, FixedGradient, FontFamily, HitTestPoint, ImageFormat, InterpolationMode, @@ -143,7 +143,12 @@ impl RenderContext for PietGpuRenderContext { // // See also http://ssp.impulsetrain.com/gamma-premult.html. let (r, g, b, a) = color.as_rgba(); - let premul = Color::rgba(to_srgb(from_srgb(r) * a), to_srgb(from_srgb(g) * a), to_srgb(from_srgb(b) * a), a); + let premul = Color::rgba( + to_srgb(from_srgb(r) * a), + to_srgb(from_srgb(g) * a), + to_srgb(from_srgb(b) * a), + a, + ); PietGpuBrush::Solid(premul.as_rgba_u32()) } @@ -182,7 +187,8 @@ impl RenderContext for PietGpuRenderContext { _brush: &impl IntoBrush, _width: f64, _style: &StrokeStyle, - ) {} + ) { + } fn fill(&mut self, shape: impl Shape, brush: &impl IntoBrush) { let brush = brush.make_brush(self, || shape.bounding_box()).into_owned(); @@ -284,7 +290,8 @@ impl RenderContext for PietGpuRenderContext { _image: &Self::Image, _rect: impl Into, _interp: InterpolationMode, - ) {} + ) { + } fn draw_image_area( &mut self, @@ -292,7 +299,8 @@ impl RenderContext for PietGpuRenderContext { _src_rect: impl Into, _dst_rect: impl Into, _interp: InterpolationMode, - ) {} + ) { + } fn blurred_rect(&mut self, _rect: Rect, _blur_radius: f64, _brush: &impl IntoBrush) {} @@ -323,7 +331,7 @@ impl PietGpuRenderContext { self.pathseg_count += 1; } - fn encode_path(&mut self, path: impl Iterator, is_fill: bool) { + fn encode_path(&mut self, path: impl Iterator, is_fill: bool) { if is_fill { self.encode_path_inner( path.flat_map(|el| { @@ -341,7 +349,7 @@ impl PietGpuRenderContext { } } - fn encode_path_inner(&mut self, path: impl Iterator) { + fn encode_path_inner(&mut self, path: impl Iterator) { let flatten = false; if flatten { let mut start_pt = None; @@ -606,4 +614,4 @@ fn from_srgb(f: f64) -> f64 { let a = 0.055; f64::powf((f + a) * f64::recip(1. + a), 2.4) } -} \ No newline at end of file +}