mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Expand runtime query of GPU capabilities
Test whether the GPU supports subgroups (including size control) and memory model. This patch does all the ceremony needed for runtime query, including testing the Vulkan version and only probing the extensions when available. Thus, it should work fine on older devices (not yet tested). The reporting of capabilities follows Vulkan concepts, but is not particularly Vulkan-specific.
This commit is contained in:
parent
f6c2558743
commit
a5991ecf97
|
@ -10,7 +10,7 @@ use std::sync::{Arc, Mutex, Weak};
|
||||||
use crate::vulkan;
|
use crate::vulkan;
|
||||||
use crate::DescriptorSetBuilder as DescriptorSetBuilderTrait;
|
use crate::DescriptorSetBuilder as DescriptorSetBuilderTrait;
|
||||||
use crate::PipelineBuilder as PipelineBuilderTrait;
|
use crate::PipelineBuilder as PipelineBuilderTrait;
|
||||||
use crate::{Device, Error, SamplerParams};
|
use crate::{Device, Error, GpuInfo, SamplerParams};
|
||||||
|
|
||||||
pub type MemFlags = <vulkan::VkDevice as Device>::MemFlags;
|
pub type MemFlags = <vulkan::VkDevice as Device>::MemFlags;
|
||||||
pub type Semaphore = <vulkan::VkDevice as Device>::Semaphore;
|
pub type Semaphore = <vulkan::VkDevice as Device>::Semaphore;
|
||||||
|
@ -32,6 +32,7 @@ struct SessionInner {
|
||||||
cmd_buf_pool: Mutex<Vec<(vulkan::CmdBuf, Fence)>>,
|
cmd_buf_pool: Mutex<Vec<(vulkan::CmdBuf, Fence)>>,
|
||||||
/// Command buffers that are still pending (so resources can't be freed).
|
/// Command buffers that are still pending (so resources can't be freed).
|
||||||
pending: Mutex<Vec<SubmittedCmdBufInner>>,
|
pending: Mutex<Vec<SubmittedCmdBufInner>>,
|
||||||
|
gpu_info: GpuInfo,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct CmdBuf {
|
pub struct CmdBuf {
|
||||||
|
@ -72,8 +73,10 @@ pub struct DescriptorSetBuilder(vulkan::DescriptorSetBuilder);
|
||||||
|
|
||||||
impl Session {
|
impl Session {
|
||||||
pub fn new(device: vulkan::VkDevice) -> Session {
|
pub fn new(device: vulkan::VkDevice) -> Session {
|
||||||
|
let gpu_info = device.query_gpu_info();
|
||||||
Session(Arc::new(SessionInner {
|
Session(Arc::new(SessionInner {
|
||||||
device,
|
device,
|
||||||
|
gpu_info,
|
||||||
cmd_buf_pool: Default::default(),
|
cmd_buf_pool: Default::default(),
|
||||||
pending: Default::default(),
|
pending: Default::default(),
|
||||||
}))
|
}))
|
||||||
|
@ -210,11 +213,8 @@ impl Session {
|
||||||
self.0.device.create_sampler(params)
|
self.0.device.create_sampler(params)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Report whether the device supports descriptor indexing.
|
pub fn gpu_info(&self) -> &GpuInfo {
|
||||||
///
|
&self.0.gpu_info
|
||||||
/// As we have more queries, we might replace this with a capabilities structure.
|
|
||||||
pub fn has_descriptor_indexing(&self) -> bool {
|
|
||||||
self.0.device.has_descriptor_indexing
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,29 @@ pub enum SamplerParams {
|
||||||
Linear,
|
Linear,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
/// Information about the GPU.
|
||||||
|
pub struct GpuInfo {
|
||||||
|
/// The GPU supports descriptor indexing.
|
||||||
|
pub has_descriptor_indexing: bool,
|
||||||
|
/// The GPU supports subgroups.
|
||||||
|
///
|
||||||
|
/// Right now, this just checks for basic subgroup capability (as
|
||||||
|
/// required in Vulkan 1.1), and we should have finer grained
|
||||||
|
/// queries for shuffles, etc.
|
||||||
|
pub has_subgroups: bool,
|
||||||
|
/// Info about subgroup size control, if available.
|
||||||
|
pub subgroup_size: Option<SubgroupSize>,
|
||||||
|
/// The GPU supports a real, grown-ass memory model.
|
||||||
|
pub has_memory_model: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct SubgroupSize {
|
||||||
|
min: u32,
|
||||||
|
max: u32,
|
||||||
|
}
|
||||||
|
|
||||||
pub trait Device: Sized {
|
pub trait Device: Sized {
|
||||||
type Buffer: 'static;
|
type Buffer: 'static;
|
||||||
type Image;
|
type Image;
|
||||||
|
@ -43,6 +66,12 @@ pub trait Device: Sized {
|
||||||
type DescriptorSetBuilder: DescriptorSetBuilder<Self>;
|
type DescriptorSetBuilder: DescriptorSetBuilder<Self>;
|
||||||
type Sampler;
|
type Sampler;
|
||||||
|
|
||||||
|
/// Query the GPU info.
|
||||||
|
///
|
||||||
|
/// This method may be expensive, so the hub should call it once and retain
|
||||||
|
/// the info.
|
||||||
|
fn query_gpu_info(&self) -> GpuInfo;
|
||||||
|
|
||||||
fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result<Self::Buffer, Error>;
|
fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result<Self::Buffer, Error>;
|
||||||
|
|
||||||
/// Destroy a buffer.
|
/// Destroy a buffer.
|
||||||
|
|
|
@ -6,10 +6,10 @@ use std::os::raw::c_char;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use ash::extensions::{ext::DebugUtils, khr};
|
use ash::extensions::{ext::DebugUtils, khr};
|
||||||
use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0};
|
use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0, InstanceV1_1};
|
||||||
use ash::{vk, Device, Entry, Instance};
|
use ash::{vk, Device, Entry, Instance};
|
||||||
|
|
||||||
use crate::{Device as DeviceTrait, Error, ImageLayout, SamplerParams};
|
use crate::{Device as DeviceTrait, Error, GpuInfo, ImageLayout, SamplerParams, SubgroupSize};
|
||||||
|
|
||||||
pub struct VkInstance {
|
pub struct VkInstance {
|
||||||
/// Retain the dynamic lib.
|
/// Retain the dynamic lib.
|
||||||
|
@ -17,6 +17,7 @@ pub struct VkInstance {
|
||||||
entry: Entry,
|
entry: Entry,
|
||||||
instance: Instance,
|
instance: Instance,
|
||||||
get_phys_dev_props: Option<vk::KhrGetPhysicalDeviceProperties2Fn>,
|
get_phys_dev_props: Option<vk::KhrGetPhysicalDeviceProperties2Fn>,
|
||||||
|
vk_version: u32,
|
||||||
_dbg_loader: Option<DebugUtils>,
|
_dbg_loader: Option<DebugUtils>,
|
||||||
_dbg_callbk: Option<vk::DebugUtilsMessengerEXT>,
|
_dbg_callbk: Option<vk::DebugUtilsMessengerEXT>,
|
||||||
}
|
}
|
||||||
|
@ -28,8 +29,7 @@ pub struct VkDevice {
|
||||||
queue: vk::Queue,
|
queue: vk::Queue,
|
||||||
qfi: u32,
|
qfi: u32,
|
||||||
timestamp_period: f32,
|
timestamp_period: f32,
|
||||||
/// Does the device support descriptor indexing?
|
gpu_info: GpuInfo,
|
||||||
pub has_descriptor_indexing: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
struct RawDevice {
|
struct RawDevice {
|
||||||
|
@ -181,6 +181,16 @@ impl VkInstance {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let supported_version = entry
|
||||||
|
.try_enumerate_instance_version()?
|
||||||
|
.unwrap_or(vk::make_version(1, 0, 0));
|
||||||
|
let vk_version = if supported_version >= vk::make_version(1, 1, 0) {
|
||||||
|
// We need Vulkan 1.1 to do subgroups; most other things can be extensions.
|
||||||
|
vk::make_version(1, 1, 0)
|
||||||
|
} else {
|
||||||
|
vk::make_version(1, 0, 0)
|
||||||
|
};
|
||||||
|
|
||||||
let instance = entry.create_instance(
|
let instance = entry.create_instance(
|
||||||
&vk::InstanceCreateInfo::builder()
|
&vk::InstanceCreateInfo::builder()
|
||||||
.application_info(
|
.application_info(
|
||||||
|
@ -188,7 +198,7 @@ impl VkInstance {
|
||||||
.application_name(&app_name)
|
.application_name(&app_name)
|
||||||
.application_version(0)
|
.application_version(0)
|
||||||
.engine_name(&app_name)
|
.engine_name(&app_name)
|
||||||
.api_version(vk::make_version(1, 0, 0)),
|
.api_version(vk_version),
|
||||||
)
|
)
|
||||||
.enabled_layer_names(layers.as_ptrs())
|
.enabled_layer_names(layers.as_ptrs())
|
||||||
.enabled_extension_names(exts.as_ptrs()),
|
.enabled_extension_names(exts.as_ptrs()),
|
||||||
|
@ -234,6 +244,7 @@ impl VkInstance {
|
||||||
entry,
|
entry,
|
||||||
instance,
|
instance,
|
||||||
get_phys_dev_props,
|
get_phys_dev_props,
|
||||||
|
vk_version,
|
||||||
_dbg_loader,
|
_dbg_loader,
|
||||||
_dbg_callbk,
|
_dbg_callbk,
|
||||||
};
|
};
|
||||||
|
@ -282,17 +293,24 @@ impl VkInstance {
|
||||||
.descriptor_binding_variable_descriptor_count(true)
|
.descriptor_binding_variable_descriptor_count(true)
|
||||||
.runtime_descriptor_array(true);
|
.runtime_descriptor_array(true);
|
||||||
|
|
||||||
let mut extensions = Vec::new();
|
let mut extensions = Extensions::new(
|
||||||
|
self.instance
|
||||||
|
.enumerate_device_extension_properties(pdevice)?,
|
||||||
|
);
|
||||||
if surface.is_some() {
|
if surface.is_some() {
|
||||||
extensions.push(khr::Swapchain::name().as_ptr());
|
extensions.try_add(khr::Swapchain::name());
|
||||||
}
|
}
|
||||||
if has_descriptor_indexing {
|
if has_descriptor_indexing {
|
||||||
extensions.push(vk::KhrMaintenance3Fn::name().as_ptr());
|
extensions.try_add(vk::KhrMaintenance3Fn::name());
|
||||||
extensions.push(vk::ExtDescriptorIndexingFn::name().as_ptr());
|
extensions.try_add(vk::ExtDescriptorIndexingFn::name());
|
||||||
}
|
}
|
||||||
|
let has_subgroup_size = self.vk_version >= vk::make_version(1, 1, 0)
|
||||||
|
&& extensions.try_add(vk::ExtSubgroupSizeControlFn::name());
|
||||||
|
let has_memory_model = self.vk_version >= vk::make_version(1, 1, 0)
|
||||||
|
&& extensions.try_add(vk::KhrVulkanMemoryModelFn::name());
|
||||||
let mut create_info = vk::DeviceCreateInfo::builder()
|
let mut create_info = vk::DeviceCreateInfo::builder()
|
||||||
.queue_create_infos(&queue_create_infos)
|
.queue_create_infos(&queue_create_infos)
|
||||||
.enabled_extension_names(&extensions);
|
.enabled_extension_names(extensions.as_ptrs());
|
||||||
if has_descriptor_indexing {
|
if has_descriptor_indexing {
|
||||||
create_info = create_info.push_next(&mut descriptor_indexing);
|
create_info = create_info.push_next(&mut descriptor_indexing);
|
||||||
}
|
}
|
||||||
|
@ -307,6 +325,28 @@ impl VkInstance {
|
||||||
|
|
||||||
let props = self.instance.get_physical_device_properties(pdevice);
|
let props = self.instance.get_physical_device_properties(pdevice);
|
||||||
let timestamp_period = props.limits.timestamp_period;
|
let timestamp_period = props.limits.timestamp_period;
|
||||||
|
let subgroup_size = if has_subgroup_size {
|
||||||
|
let mut subgroup_props = vk::PhysicalDeviceSubgroupSizeControlPropertiesEXT::default();
|
||||||
|
let mut properties =
|
||||||
|
vk::PhysicalDeviceProperties2::builder().push_next(&mut subgroup_props);
|
||||||
|
self.instance
|
||||||
|
.get_physical_device_properties2(pdevice, &mut properties);
|
||||||
|
Some(SubgroupSize {
|
||||||
|
min: subgroup_props.min_subgroup_size,
|
||||||
|
max: subgroup_props.max_subgroup_size,
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO: finer grained query of specific subgroup info.
|
||||||
|
let has_subgroups = self.vk_version >= vk::make_version(1, 1, 0);
|
||||||
|
let gpu_info = GpuInfo {
|
||||||
|
has_descriptor_indexing,
|
||||||
|
has_subgroups,
|
||||||
|
subgroup_size,
|
||||||
|
has_memory_model,
|
||||||
|
};
|
||||||
|
|
||||||
Ok(VkDevice {
|
Ok(VkDevice {
|
||||||
device,
|
device,
|
||||||
|
@ -315,7 +355,7 @@ impl VkInstance {
|
||||||
qfi,
|
qfi,
|
||||||
queue,
|
queue,
|
||||||
timestamp_period,
|
timestamp_period,
|
||||||
has_descriptor_indexing,
|
gpu_info,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -414,6 +454,10 @@ impl crate::Device for VkDevice {
|
||||||
type DescriptorSetBuilder = DescriptorSetBuilder;
|
type DescriptorSetBuilder = DescriptorSetBuilder;
|
||||||
type Sampler = vk::Sampler;
|
type Sampler = vk::Sampler;
|
||||||
|
|
||||||
|
fn query_gpu_info(&self) -> GpuInfo {
|
||||||
|
self.gpu_info.clone()
|
||||||
|
}
|
||||||
|
|
||||||
fn create_buffer(&self, size: u64, mem_flags: MemFlags) -> Result<Buffer, Error> {
|
fn create_buffer(&self, size: u64, mem_flags: MemFlags) -> Result<Buffer, Error> {
|
||||||
unsafe {
|
unsafe {
|
||||||
let device = &self.device.device;
|
let device = &self.device.device;
|
||||||
|
@ -570,7 +614,7 @@ impl crate::Device for VkDevice {
|
||||||
bindings: Vec::new(),
|
bindings: Vec::new(),
|
||||||
binding_flags: Vec::new(),
|
binding_flags: Vec::new(),
|
||||||
max_textures: 0,
|
max_textures: 0,
|
||||||
has_descriptor_indexing: self.has_descriptor_indexing,
|
has_descriptor_indexing: self.gpu_info.has_descriptor_indexing,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -128,11 +128,20 @@ fn render_clip_test(rc: &mut impl RenderContext) {
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
fn render_alpha_test(rc: &mut impl RenderContext) {
|
fn render_alpha_test(rc: &mut impl RenderContext) {
|
||||||
// Alpha compositing tests.
|
// Alpha compositing tests.
|
||||||
rc.fill(diamond(Point::new(1024.0, 100.0)), &Color::Rgba32(0xff0000ff));
|
rc.fill(
|
||||||
rc.fill(diamond(Point::new(1024.0, 125.0)), &Color::Rgba32(0x00ff0080));
|
diamond(Point::new(1024.0, 100.0)),
|
||||||
|
&Color::Rgba32(0xff0000ff),
|
||||||
|
);
|
||||||
|
rc.fill(
|
||||||
|
diamond(Point::new(1024.0, 125.0)),
|
||||||
|
&Color::Rgba32(0x00ff0080),
|
||||||
|
);
|
||||||
rc.save();
|
rc.save();
|
||||||
rc.clip(diamond(Point::new(1024.0, 150.0)));
|
rc.clip(diamond(Point::new(1024.0, 150.0)));
|
||||||
rc.fill(diamond(Point::new(1024.0, 175.0)), &Color::Rgba32(0x0000ff80));
|
rc.fill(
|
||||||
|
diamond(Point::new(1024.0, 175.0)),
|
||||||
|
&Color::Rgba32(0x0000ff80),
|
||||||
|
);
|
||||||
rc.restore();
|
rc.restore();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -325,7 +334,7 @@ impl Renderer {
|
||||||
|
|
||||||
let bg_image = Self::make_test_bg_image(&session);
|
let bg_image = Self::make_test_bg_image(&session);
|
||||||
|
|
||||||
let k4_code = if session.has_descriptor_indexing() {
|
let k4_code = if session.gpu_info().has_descriptor_indexing {
|
||||||
&include_bytes!("../shader/kernel4_idx.spv")[..]
|
&include_bytes!("../shader/kernel4_idx.spv")[..]
|
||||||
} else {
|
} else {
|
||||||
println!("doing non-indexed k4");
|
println!("doing non-indexed k4");
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
use std::{borrow::Cow, ops::RangeBounds};
|
use std::{borrow::Cow, ops::RangeBounds};
|
||||||
|
|
||||||
use piet::{
|
use piet::{
|
||||||
HitTestPosition,
|
kurbo::{Affine, Insets, PathEl, Point, Rect, Shape, Size},
|
||||||
kurbo::{Affine, Insets, PathEl, Point, Rect, Shape, Size}, TextAttribute, TextStorage,
|
HitTestPosition, TextAttribute, TextStorage,
|
||||||
};
|
};
|
||||||
use piet::{
|
use piet::{
|
||||||
Color, Error, FixedGradient, FontFamily, HitTestPoint, ImageFormat, InterpolationMode,
|
Color, Error, FixedGradient, FontFamily, HitTestPoint, ImageFormat, InterpolationMode,
|
||||||
|
@ -143,7 +143,12 @@ impl RenderContext for PietGpuRenderContext {
|
||||||
//
|
//
|
||||||
// See also http://ssp.impulsetrain.com/gamma-premult.html.
|
// See also http://ssp.impulsetrain.com/gamma-premult.html.
|
||||||
let (r, g, b, a) = color.as_rgba();
|
let (r, g, b, a) = color.as_rgba();
|
||||||
let premul = Color::rgba(to_srgb(from_srgb(r) * a), to_srgb(from_srgb(g) * a), to_srgb(from_srgb(b) * a), a);
|
let premul = Color::rgba(
|
||||||
|
to_srgb(from_srgb(r) * a),
|
||||||
|
to_srgb(from_srgb(g) * a),
|
||||||
|
to_srgb(from_srgb(b) * a),
|
||||||
|
a,
|
||||||
|
);
|
||||||
PietGpuBrush::Solid(premul.as_rgba_u32())
|
PietGpuBrush::Solid(premul.as_rgba_u32())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -182,7 +187,8 @@ impl RenderContext for PietGpuRenderContext {
|
||||||
_brush: &impl IntoBrush<Self>,
|
_brush: &impl IntoBrush<Self>,
|
||||||
_width: f64,
|
_width: f64,
|
||||||
_style: &StrokeStyle,
|
_style: &StrokeStyle,
|
||||||
) {}
|
) {
|
||||||
|
}
|
||||||
|
|
||||||
fn fill(&mut self, shape: impl Shape, brush: &impl IntoBrush<Self>) {
|
fn fill(&mut self, shape: impl Shape, brush: &impl IntoBrush<Self>) {
|
||||||
let brush = brush.make_brush(self, || shape.bounding_box()).into_owned();
|
let brush = brush.make_brush(self, || shape.bounding_box()).into_owned();
|
||||||
|
@ -284,7 +290,8 @@ impl RenderContext for PietGpuRenderContext {
|
||||||
_image: &Self::Image,
|
_image: &Self::Image,
|
||||||
_rect: impl Into<Rect>,
|
_rect: impl Into<Rect>,
|
||||||
_interp: InterpolationMode,
|
_interp: InterpolationMode,
|
||||||
) {}
|
) {
|
||||||
|
}
|
||||||
|
|
||||||
fn draw_image_area(
|
fn draw_image_area(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
@ -292,7 +299,8 @@ impl RenderContext for PietGpuRenderContext {
|
||||||
_src_rect: impl Into<Rect>,
|
_src_rect: impl Into<Rect>,
|
||||||
_dst_rect: impl Into<Rect>,
|
_dst_rect: impl Into<Rect>,
|
||||||
_interp: InterpolationMode,
|
_interp: InterpolationMode,
|
||||||
) {}
|
) {
|
||||||
|
}
|
||||||
|
|
||||||
fn blurred_rect(&mut self, _rect: Rect, _blur_radius: f64, _brush: &impl IntoBrush<Self>) {}
|
fn blurred_rect(&mut self, _rect: Rect, _blur_radius: f64, _brush: &impl IntoBrush<Self>) {}
|
||||||
|
|
||||||
|
@ -323,7 +331,7 @@ impl PietGpuRenderContext {
|
||||||
self.pathseg_count += 1;
|
self.pathseg_count += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode_path(&mut self, path: impl Iterator<Item=PathEl>, is_fill: bool) {
|
fn encode_path(&mut self, path: impl Iterator<Item = PathEl>, is_fill: bool) {
|
||||||
if is_fill {
|
if is_fill {
|
||||||
self.encode_path_inner(
|
self.encode_path_inner(
|
||||||
path.flat_map(|el| {
|
path.flat_map(|el| {
|
||||||
|
@ -341,7 +349,7 @@ impl PietGpuRenderContext {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode_path_inner(&mut self, path: impl Iterator<Item=PathEl>) {
|
fn encode_path_inner(&mut self, path: impl Iterator<Item = PathEl>) {
|
||||||
let flatten = false;
|
let flatten = false;
|
||||||
if flatten {
|
if flatten {
|
||||||
let mut start_pt = None;
|
let mut start_pt = None;
|
||||||
|
@ -606,4 +614,4 @@ fn from_srgb(f: f64) -> f64 {
|
||||||
let a = 0.055;
|
let a = 0.055;
|
||||||
f64::powf((f + a) * f64::recip(1. + a), 2.4)
|
f64::powf((f + a) * f64::recip(1. + a), 2.4)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue