diff --git a/piet-gpu-hal/src/hub.rs b/piet-gpu-hal/src/hub.rs index 5649546..d848d4b 100644 --- a/piet-gpu-hal/src/hub.rs +++ b/piet-gpu-hal/src/hub.rs @@ -10,7 +10,7 @@ use std::sync::{Arc, Mutex, Weak}; use crate::vulkan; use crate::DescriptorSetBuilder as DescriptorSetBuilderTrait; use crate::PipelineBuilder as PipelineBuilderTrait; -use crate::{Device, Error, SamplerParams}; +use crate::{Device, Error, GpuInfo, SamplerParams}; pub type MemFlags = ::MemFlags; pub type Semaphore = ::Semaphore; @@ -32,6 +32,7 @@ struct SessionInner { cmd_buf_pool: Mutex>, /// Command buffers that are still pending (so resources can't be freed). pending: Mutex>, + gpu_info: GpuInfo, } pub struct CmdBuf { @@ -72,8 +73,10 @@ pub struct DescriptorSetBuilder(vulkan::DescriptorSetBuilder); impl Session { pub fn new(device: vulkan::VkDevice) -> Session { + let gpu_info = device.query_gpu_info(); Session(Arc::new(SessionInner { device, + gpu_info, cmd_buf_pool: Default::default(), pending: Default::default(), })) @@ -210,11 +213,8 @@ impl Session { self.0.device.create_sampler(params) } - /// Report whether the device supports descriptor indexing. - /// - /// As we have more queries, we might replace this with a capabilities structure. - pub fn has_descriptor_indexing(&self) -> bool { - self.0.device.has_descriptor_indexing + pub fn gpu_info(&self) -> &GpuInfo { + &self.0.gpu_info } } diff --git a/piet-gpu-hal/src/lib.rs b/piet-gpu-hal/src/lib.rs index 2848774..9dfe8da 100644 --- a/piet-gpu-hal/src/lib.rs +++ b/piet-gpu-hal/src/lib.rs @@ -29,6 +29,29 @@ pub enum SamplerParams { Linear, } +#[derive(Clone, Debug)] +/// Information about the GPU. +pub struct GpuInfo { + /// The GPU supports descriptor indexing. + pub has_descriptor_indexing: bool, + /// The GPU supports subgroups. + /// + /// Right now, this just checks for basic subgroup capability (as + /// required in Vulkan 1.1), and we should have finer grained + /// queries for shuffles, etc. + pub has_subgroups: bool, + /// Info about subgroup size control, if available. + pub subgroup_size: Option, + /// The GPU supports a real, grown-ass memory model. + pub has_memory_model: bool, +} + +#[derive(Clone, Debug)] +pub struct SubgroupSize { + min: u32, + max: u32, +} + pub trait Device: Sized { type Buffer: 'static; type Image; @@ -43,6 +66,12 @@ pub trait Device: Sized { type DescriptorSetBuilder: DescriptorSetBuilder; type Sampler; + /// Query the GPU info. + /// + /// This method may be expensive, so the hub should call it once and retain + /// the info. + fn query_gpu_info(&self) -> GpuInfo; + fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result; /// Destroy a buffer. diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs index 273d860..dd48dcf 100644 --- a/piet-gpu-hal/src/vulkan.rs +++ b/piet-gpu-hal/src/vulkan.rs @@ -6,10 +6,10 @@ use std::os::raw::c_char; use std::sync::Arc; use ash::extensions::{ext::DebugUtils, khr}; -use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0}; +use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0, InstanceV1_1}; use ash::{vk, Device, Entry, Instance}; -use crate::{Device as DeviceTrait, Error, ImageLayout, SamplerParams}; +use crate::{Device as DeviceTrait, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize}; pub struct VkInstance { /// Retain the dynamic lib. @@ -17,6 +17,7 @@ pub struct VkInstance { entry: Entry, instance: Instance, get_phys_dev_props: Option, + vk_version: u32, _dbg_loader: Option, _dbg_callbk: Option, } @@ -28,8 +29,7 @@ pub struct VkDevice { queue: vk::Queue, qfi: u32, timestamp_period: f32, - /// Does the device support descriptor indexing? - pub has_descriptor_indexing: bool, + gpu_info: GpuInfo, } struct RawDevice { @@ -181,6 +181,16 @@ impl VkInstance { } } + let supported_version = entry + .try_enumerate_instance_version()? + .unwrap_or(vk::make_version(1, 0, 0)); + let vk_version = if supported_version >= vk::make_version(1, 1, 0) { + // We need Vulkan 1.1 to do subgroups; most other things can be extensions. + vk::make_version(1, 1, 0) + } else { + vk::make_version(1, 0, 0) + }; + let instance = entry.create_instance( &vk::InstanceCreateInfo::builder() .application_info( @@ -188,7 +198,7 @@ impl VkInstance { .application_name(&app_name) .application_version(0) .engine_name(&app_name) - .api_version(vk::make_version(1, 0, 0)), + .api_version(vk_version), ) .enabled_layer_names(layers.as_ptrs()) .enabled_extension_names(exts.as_ptrs()), @@ -234,6 +244,7 @@ impl VkInstance { entry, instance, get_phys_dev_props, + vk_version, _dbg_loader, _dbg_callbk, }; @@ -282,17 +293,24 @@ impl VkInstance { .descriptor_binding_variable_descriptor_count(true) .runtime_descriptor_array(true); - let mut extensions = Vec::new(); + let mut extensions = Extensions::new( + self.instance + .enumerate_device_extension_properties(pdevice)?, + ); if surface.is_some() { - extensions.push(khr::Swapchain::name().as_ptr()); + extensions.try_add(khr::Swapchain::name()); } if has_descriptor_indexing { - extensions.push(vk::KhrMaintenance3Fn::name().as_ptr()); - extensions.push(vk::ExtDescriptorIndexingFn::name().as_ptr()); + extensions.try_add(vk::KhrMaintenance3Fn::name()); + extensions.try_add(vk::ExtDescriptorIndexingFn::name()); } + let has_subgroup_size = self.vk_version >= vk::make_version(1, 1, 0) + && extensions.try_add(vk::ExtSubgroupSizeControlFn::name()); + let has_memory_model = self.vk_version >= vk::make_version(1, 1, 0) + && extensions.try_add(vk::KhrVulkanMemoryModelFn::name()); let mut create_info = vk::DeviceCreateInfo::builder() .queue_create_infos(&queue_create_infos) - .enabled_extension_names(&extensions); + .enabled_extension_names(extensions.as_ptrs()); if has_descriptor_indexing { create_info = create_info.push_next(&mut descriptor_indexing); } @@ -307,6 +325,28 @@ impl VkInstance { let props = self.instance.get_physical_device_properties(pdevice); let timestamp_period = props.limits.timestamp_period; + let subgroup_size = if has_subgroup_size { + let mut subgroup_props = vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT::default(); + let mut properties = + vk::PhysicalDeviceProperties2::builder().push_next(&mut subgroup_props); + self.instance + .get_physical_device_properties2(pdevice, &mut properties); + Some(SubgroupSize { + min: subgroup_props.min_subgroup_size, + max: subgroup_props.max_subgroup_size, + }) + } else { + None + }; + + // TODO: finer grained query of specific subgroup info. + let has_subgroups = self.vk_version >= vk::make_version(1, 1, 0); + let gpu_info = GpuInfo { + has_descriptor_indexing, + has_subgroups, + subgroup_size, + has_memory_model, + }; Ok(VkDevice { device, @@ -315,7 +355,7 @@ impl VkInstance { qfi, queue, timestamp_period, - has_descriptor_indexing, + gpu_info, }) } @@ -414,6 +454,10 @@ impl crate::Device for VkDevice { type DescriptorSetBuilder = DescriptorSetBuilder; type Sampler = vk::Sampler; + fn query_gpu_info(&self) -> GpuInfo { + self.gpu_info.clone() + } + fn create_buffer(&self, size: u64, mem_flags: MemFlags) -> Result { unsafe { let device = &self.device.device; @@ -570,7 +614,7 @@ impl crate::Device for VkDevice { bindings: Vec::new(), binding_flags: Vec::new(), max_textures: 0, - has_descriptor_indexing: self.has_descriptor_indexing, + has_descriptor_indexing: self.gpu_info.has_descriptor_indexing, } } diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index 8e5be4d..6c85bef 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -128,11 +128,20 @@ fn render_clip_test(rc: &mut impl RenderContext) { #[allow(unused)] fn render_alpha_test(rc: &mut impl RenderContext) { // Alpha compositing tests. - rc.fill(diamond(Point::new(1024.0, 100.0)), &Color::Rgba32(0xff0000ff)); - rc.fill(diamond(Point::new(1024.0, 125.0)), &Color::Rgba32(0x00ff0080)); + rc.fill( + diamond(Point::new(1024.0, 100.0)), + &Color::Rgba32(0xff0000ff), + ); + rc.fill( + diamond(Point::new(1024.0, 125.0)), + &Color::Rgba32(0x00ff0080), + ); rc.save(); rc.clip(diamond(Point::new(1024.0, 150.0))); - rc.fill(diamond(Point::new(1024.0, 175.0)), &Color::Rgba32(0x0000ff80)); + rc.fill( + diamond(Point::new(1024.0, 175.0)), + &Color::Rgba32(0x0000ff80), + ); rc.restore(); } @@ -325,7 +334,7 @@ impl Renderer { let bg_image = Self::make_test_bg_image(&session); - let k4_code = if session.has_descriptor_indexing() { + let k4_code = if session.gpu_info().has_descriptor_indexing { &include_bytes!("../shader/kernel4_idx.spv")[..] } else { println!("doing non-indexed k4"); diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index 74503ef..b022507 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -1,8 +1,8 @@ use std::{borrow::Cow, ops::RangeBounds}; use piet::{ - HitTestPosition, - kurbo::{Affine, Insets, PathEl, Point, Rect, Shape, Size}, TextAttribute, TextStorage, + kurbo::{Affine, Insets, PathEl, Point, Rect, Shape, Size}, + HitTestPosition, TextAttribute, TextStorage, }; use piet::{ Color, Error, FixedGradient, FontFamily, HitTestPoint, ImageFormat, InterpolationMode, @@ -143,7 +143,12 @@ impl RenderContext for PietGpuRenderContext { // // See also http://ssp.impulsetrain.com/gamma-premult.html. let (r, g, b, a) = color.as_rgba(); - let premul = Color::rgba(to_srgb(from_srgb(r) * a), to_srgb(from_srgb(g) * a), to_srgb(from_srgb(b) * a), a); + let premul = Color::rgba( + to_srgb(from_srgb(r) * a), + to_srgb(from_srgb(g) * a), + to_srgb(from_srgb(b) * a), + a, + ); PietGpuBrush::Solid(premul.as_rgba_u32()) } @@ -182,7 +187,8 @@ impl RenderContext for PietGpuRenderContext { _brush: &impl IntoBrush, _width: f64, _style: &StrokeStyle, - ) {} + ) { + } fn fill(&mut self, shape: impl Shape, brush: &impl IntoBrush) { let brush = brush.make_brush(self, || shape.bounding_box()).into_owned(); @@ -284,7 +290,8 @@ impl RenderContext for PietGpuRenderContext { _image: &Self::Image, _rect: impl Into, _interp: InterpolationMode, - ) {} + ) { + } fn draw_image_area( &mut self, @@ -292,7 +299,8 @@ impl RenderContext for PietGpuRenderContext { _src_rect: impl Into, _dst_rect: impl Into, _interp: InterpolationMode, - ) {} + ) { + } fn blurred_rect(&mut self, _rect: Rect, _blur_radius: f64, _brush: &impl IntoBrush) {} @@ -323,7 +331,7 @@ impl PietGpuRenderContext { self.pathseg_count += 1; } - fn encode_path(&mut self, path: impl Iterator, is_fill: bool) { + fn encode_path(&mut self, path: impl Iterator, is_fill: bool) { if is_fill { self.encode_path_inner( path.flat_map(|el| { @@ -341,7 +349,7 @@ impl PietGpuRenderContext { } } - fn encode_path_inner(&mut self, path: impl Iterator) { + fn encode_path_inner(&mut self, path: impl Iterator) { let flatten = false; if flatten { let mut start_pt = None; @@ -606,4 +614,4 @@ fn from_srgb(f: f64) -> f64 { let a = 0.055; f64::powf((f + a) * f64::recip(1. + a), 2.4) } -} \ No newline at end of file +}