Expand runtime query of GPU capabilities

Test whether the GPU supports subgroups (including size control) and
memory model.

This patch does all the ceremony needed for runtime query, including
testing the Vulkan version and only probing the extensions when
available. Thus, it should work fine on older devices (not yet tested).

The reporting of capabilities follows Vulkan concepts, but is not
particularly Vulkan-specific.
This commit is contained in:
Raph Levien 2021-05-08 10:51:04 -07:00
parent f6c2558743
commit a5991ecf97
5 changed files with 121 additions and 31 deletions

View file

@ -10,7 +10,7 @@ use std::sync::{Arc, Mutex, Weak};
use crate::vulkan;
use crate::DescriptorSetBuilder as DescriptorSetBuilderTrait;
use crate::PipelineBuilder as PipelineBuilderTrait;
use crate::{Device, Error, SamplerParams};
use crate::{Device, Error, GpuInfo, SamplerParams};
pub type MemFlags = <vulkan::VkDevice as Device>::MemFlags;
pub type Semaphore = <vulkan::VkDevice as Device>::Semaphore;
@ -32,6 +32,7 @@ struct SessionInner {
cmd_buf_pool: Mutex<Vec<(vulkan::CmdBuf, Fence)>>,
/// Command buffers that are still pending (so resources can't be freed).
pending: Mutex<Vec<SubmittedCmdBufInner>>,
gpu_info: GpuInfo,
}
pub struct CmdBuf {
@ -72,8 +73,10 @@ pub struct DescriptorSetBuilder(vulkan::DescriptorSetBuilder);
impl Session {
pub fn new(device: vulkan::VkDevice) -> Session {
let gpu_info = device.query_gpu_info();
Session(Arc::new(SessionInner {
device,
gpu_info,
cmd_buf_pool: Default::default(),
pending: Default::default(),
}))
@ -210,11 +213,8 @@ impl Session {
self.0.device.create_sampler(params)
}
/// Report whether the device supports descriptor indexing.
///
/// As we have more queries, we might replace this with a capabilities structure.
pub fn has_descriptor_indexing(&self) -> bool {
self.0.device.has_descriptor_indexing
pub fn gpu_info(&self) -> &GpuInfo {
&self.0.gpu_info
}
}

View file

@ -29,6 +29,29 @@ pub enum SamplerParams {
Linear,
}
#[derive(Clone, Debug)]
/// Information about the GPU.
pub struct GpuInfo {
/// The GPU supports descriptor indexing.
pub has_descriptor_indexing: bool,
/// The GPU supports subgroups.
///
/// Right now, this just checks for basic subgroup capability (as
/// required in Vulkan 1.1), and we should have finer grained
/// queries for shuffles, etc.
pub has_subgroups: bool,
/// Info about subgroup size control, if available.
pub subgroup_size: Option<SubgroupSize>,
/// The GPU supports a real, grown-ass memory model.
pub has_memory_model: bool,
}
#[derive(Clone, Debug)]
pub struct SubgroupSize {
min: u32,
max: u32,
}
pub trait Device: Sized {
type Buffer: 'static;
type Image;
@ -43,6 +66,12 @@ pub trait Device: Sized {
type DescriptorSetBuilder: DescriptorSetBuilder<Self>;
type Sampler;
/// Query the GPU info.
///
/// This method may be expensive, so the hub should call it once and retain
/// the info.
fn query_gpu_info(&self) -> GpuInfo;
fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result<Self::Buffer, Error>;
/// Destroy a buffer.

View file

@ -6,10 +6,10 @@ use std::os::raw::c_char;
use std::sync::Arc;
use ash::extensions::{ext::DebugUtils, khr};
use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0};
use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0, InstanceV1_1};
use ash::{vk, Device, Entry, Instance};
use crate::{Device as DeviceTrait, Error, ImageLayout, SamplerParams};
use crate::{Device as DeviceTrait, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize};
pub struct VkInstance {
/// Retain the dynamic lib.
@ -17,6 +17,7 @@ pub struct VkInstance {
entry: Entry,
instance: Instance,
get_phys_dev_props: Option<vk::KhrGetPhysicalDeviceProperties2Fn>,
vk_version: u32,
_dbg_loader: Option<DebugUtils>,
_dbg_callbk: Option<vk::DebugUtilsMessengerEXT>,
}
@ -28,8 +29,7 @@ pub struct VkDevice {
queue: vk::Queue,
qfi: u32,
timestamp_period: f32,
/// Does the device support descriptor indexing?
pub has_descriptor_indexing: bool,
gpu_info: GpuInfo,
}
struct RawDevice {
@ -181,6 +181,16 @@ impl VkInstance {
}
}
let supported_version = entry
.try_enumerate_instance_version()?
.unwrap_or(vk::make_version(1, 0, 0));
let vk_version = if supported_version >= vk::make_version(1, 1, 0) {
// We need Vulkan 1.1 to do subgroups; most other things can be extensions.
vk::make_version(1, 1, 0)
} else {
vk::make_version(1, 0, 0)
};
let instance = entry.create_instance(
&vk::InstanceCreateInfo::builder()
.application_info(
@ -188,7 +198,7 @@ impl VkInstance {
.application_name(&app_name)
.application_version(0)
.engine_name(&app_name)
.api_version(vk::make_version(1, 0, 0)),
.api_version(vk_version),
)
.enabled_layer_names(layers.as_ptrs())
.enabled_extension_names(exts.as_ptrs()),
@ -234,6 +244,7 @@ impl VkInstance {
entry,
instance,
get_phys_dev_props,
vk_version,
_dbg_loader,
_dbg_callbk,
};
@ -282,17 +293,24 @@ impl VkInstance {
.descriptor_binding_variable_descriptor_count(true)
.runtime_descriptor_array(true);
let mut extensions = Vec::new();
let mut extensions = Extensions::new(
self.instance
.enumerate_device_extension_properties(pdevice)?,
);
if surface.is_some() {
extensions.push(khr::Swapchain::name().as_ptr());
extensions.try_add(khr::Swapchain::name());
}
if has_descriptor_indexing {
extensions.push(vk::KhrMaintenance3Fn::name().as_ptr());
extensions.push(vk::ExtDescriptorIndexingFn::name().as_ptr());
extensions.try_add(vk::KhrMaintenance3Fn::name());
extensions.try_add(vk::ExtDescriptorIndexingFn::name());
}
let has_subgroup_size = self.vk_version >= vk::make_version(1, 1, 0)
&& extensions.try_add(vk::ExtSubgroupSizeControlFn::name());
let has_memory_model = self.vk_version >= vk::make_version(1, 1, 0)
&& extensions.try_add(vk::KhrVulkanMemoryModelFn::name());
let mut create_info = vk::DeviceCreateInfo::builder()
.queue_create_infos(&queue_create_infos)
.enabled_extension_names(&extensions);
.enabled_extension_names(extensions.as_ptrs());
if has_descriptor_indexing {
create_info = create_info.push_next(&mut descriptor_indexing);
}
@ -307,6 +325,28 @@ impl VkInstance {
let props = self.instance.get_physical_device_properties(pdevice);
let timestamp_period = props.limits.timestamp_period;
let subgroup_size = if has_subgroup_size {
let mut subgroup_props = vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT::default();
let mut properties =
vk::PhysicalDeviceProperties2::builder().push_next(&mut subgroup_props);
self.instance
.get_physical_device_properties2(pdevice, &mut properties);
Some(SubgroupSize {
min: subgroup_props.min_subgroup_size,
max: subgroup_props.max_subgroup_size,
})
} else {
None
};
// TODO: finer grained query of specific subgroup info.
let has_subgroups = self.vk_version >= vk::make_version(1, 1, 0);
let gpu_info = GpuInfo {
has_descriptor_indexing,
has_subgroups,
subgroup_size,
has_memory_model,
};
Ok(VkDevice {
device,
@ -315,7 +355,7 @@ impl VkInstance {
qfi,
queue,
timestamp_period,
has_descriptor_indexing,
gpu_info,
})
}
@ -414,6 +454,10 @@ impl crate::Device for VkDevice {
type DescriptorSetBuilder = DescriptorSetBuilder;
type Sampler = vk::Sampler;
fn query_gpu_info(&self) -> GpuInfo {
self.gpu_info.clone()
}
fn create_buffer(&self, size: u64, mem_flags: MemFlags) -> Result<Buffer, Error> {
unsafe {
let device = &self.device.device;
@ -570,7 +614,7 @@ impl crate::Device for VkDevice {
bindings: Vec::new(),
binding_flags: Vec::new(),
max_textures: 0,
has_descriptor_indexing: self.has_descriptor_indexing,
has_descriptor_indexing: self.gpu_info.has_descriptor_indexing,
}
}

View file

@ -128,11 +128,20 @@ fn render_clip_test(rc: &mut impl RenderContext) {
#[allow(unused)]
fn render_alpha_test(rc: &mut impl RenderContext) {
// Alpha compositing tests.
rc.fill(diamond(Point::new(1024.0, 100.0)), &Color::Rgba32(0xff0000ff));
rc.fill(diamond(Point::new(1024.0, 125.0)), &Color::Rgba32(0x00ff0080));
rc.fill(
diamond(Point::new(1024.0, 100.0)),
&Color::Rgba32(0xff0000ff),
);
rc.fill(
diamond(Point::new(1024.0, 125.0)),
&Color::Rgba32(0x00ff0080),
);
rc.save();
rc.clip(diamond(Point::new(1024.0, 150.0)));
rc.fill(diamond(Point::new(1024.0, 175.0)), &Color::Rgba32(0x0000ff80));
rc.fill(
diamond(Point::new(1024.0, 175.0)),
&Color::Rgba32(0x0000ff80),
);
rc.restore();
}
@ -325,7 +334,7 @@ impl Renderer {
let bg_image = Self::make_test_bg_image(&session);
let k4_code = if session.has_descriptor_indexing() {
let k4_code = if session.gpu_info().has_descriptor_indexing {
&include_bytes!("../shader/kernel4_idx.spv")[..]
} else {
println!("doing non-indexed k4");

View file

@ -1,8 +1,8 @@
use std::{borrow::Cow, ops::RangeBounds};
use piet::{
HitTestPosition,
kurbo::{Affine, Insets, PathEl, Point, Rect, Shape, Size}, TextAttribute, TextStorage,
kurbo::{Affine, Insets, PathEl, Point, Rect, Shape, Size},
HitTestPosition, TextAttribute, TextStorage,
};
use piet::{
Color, Error, FixedGradient, FontFamily, HitTestPoint, ImageFormat, InterpolationMode,
@ -143,7 +143,12 @@ impl RenderContext for PietGpuRenderContext {
//
// See also http://ssp.impulsetrain.com/gamma-premult.html.
let (r, g, b, a) = color.as_rgba();
let premul = Color::rgba(to_srgb(from_srgb(r) * a), to_srgb(from_srgb(g) * a), to_srgb(from_srgb(b) * a), a);
let premul = Color::rgba(
to_srgb(from_srgb(r) * a),
to_srgb(from_srgb(g) * a),
to_srgb(from_srgb(b) * a),
a,
);
PietGpuBrush::Solid(premul.as_rgba_u32())
}
@ -182,7 +187,8 @@ impl RenderContext for PietGpuRenderContext {
_brush: &impl IntoBrush<Self>,
_width: f64,
_style: &StrokeStyle,
) {}
) {
}
fn fill(&mut self, shape: impl Shape, brush: &impl IntoBrush<Self>) {
let brush = brush.make_brush(self, || shape.bounding_box()).into_owned();
@ -284,7 +290,8 @@ impl RenderContext for PietGpuRenderContext {
_image: &Self::Image,
_rect: impl Into<Rect>,
_interp: InterpolationMode,
) {}
) {
}
fn draw_image_area(
&mut self,
@ -292,7 +299,8 @@ impl RenderContext for PietGpuRenderContext {
_src_rect: impl Into<Rect>,
_dst_rect: impl Into<Rect>,
_interp: InterpolationMode,
) {}
) {
}
fn blurred_rect(&mut self, _rect: Rect, _blur_radius: f64, _brush: &impl IntoBrush<Self>) {}
@ -323,7 +331,7 @@ impl PietGpuRenderContext {
self.pathseg_count += 1;
}
fn encode_path(&mut self, path: impl Iterator<Item=PathEl>, is_fill: bool) {
fn encode_path(&mut self, path: impl Iterator<Item = PathEl>, is_fill: bool) {
if is_fill {
self.encode_path_inner(
path.flat_map(|el| {
@ -341,7 +349,7 @@ impl PietGpuRenderContext {
}
}
fn encode_path_inner(&mut self, path: impl Iterator<Item=PathEl>) {
fn encode_path_inner(&mut self, path: impl Iterator<Item = PathEl>) {
let flatten = false;
if flatten {
let mut start_pt = None;
@ -606,4 +614,4 @@ fn from_srgb(f: f64) -> f64 {
let a = 0.055;
f64::powf((f + a) * f64::recip(1. + a), 2.4)
}
}
}