Merge pull request #79 from linebender/ext_query

Query extensions at runtime
This commit is contained in:
Raph Levien 2021-04-11 15:34:19 -07:00 committed by GitHub
commit 74f2003a1d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 240 additions and 133 deletions

1
Cargo.lock generated
View file

@ -797,7 +797,6 @@ version = "0.1.0"
dependencies = [
"ash",
"ash-window",
"once_cell",
"raw-window-handle",
]

View file

@ -157,7 +157,12 @@ fn gen_enum_read(
writeln!(r, "{}Tag {}_tag({}Ref ref) {{", name, name, name).unwrap();
writeln!(r, " uint tag_and_flags = {}[ref.offset >> 2];", bufname).unwrap();
}
writeln!(r, " return {}Tag(tag_and_flags & 0xffff, tag_and_flags >> 16);", name).unwrap();
writeln!(
r,
" return {}Tag(tag_and_flags & 0xffff, tag_and_flags >> 16);",
name
)
.unwrap();
writeln!(r, "}}\n").unwrap();
for (var_name, payload) in variants {
let payload_ix = if payload.len() == 1 {
@ -564,7 +569,9 @@ fn gen_enum_write(
}
writeln!(r, "}}\n").unwrap();
}
} else if payload.len() == 2 && matches!(payload[0].1.ty, GpuType::Scalar(GpuScalar::TagFlags)) {
} else if payload.len() == 2
&& matches!(payload[0].1.ty, GpuType::Scalar(GpuScalar::TagFlags))
{
if let GpuType::InlineStruct(structname) = &payload[1].1.ty {
if is_mem {
writeln!(

View file

@ -8,6 +8,5 @@ edition = "2018"
[dependencies]
ash = "0.31"
once_cell = "1.3.1"
ash-window = "0.5"
raw-window-handle = "0.3"

View file

@ -209,6 +209,13 @@ impl Session {
pub unsafe fn create_sampler(&self, params: SamplerParams) -> Result<Sampler, Error> {
self.0.device.create_sampler(params)
}
/// Report whether the device supports descriptor indexing.
///
/// As we have more queries, we might replace this with a capabilities structure.
pub fn has_descriptor_indexing(&self) -> bool {
self.0.device.has_descriptor_indexing
}
}
impl CmdBuf {

View file

@ -2,12 +2,12 @@
use std::borrow::Cow;
use std::ffi::{CStr, CString};
use std::os::raw::c_char;
use std::sync::Arc;
use ash::extensions::{ext::DebugUtils, khr};
use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0};
use ash::{vk, Device, Entry, Instance};
use once_cell::sync::Lazy;
use crate::{Device as DeviceTrait, Error, ImageLayout, SamplerParams};
@ -16,6 +16,7 @@ pub struct VkInstance {
#[allow(unused)]
entry: Entry,
instance: Instance,
get_phys_dev_props: Option<vk::KhrGetPhysicalDeviceProperties2Fn>,
_dbg_loader: Option<DebugUtils>,
_dbg_callbk: Option<vk::DebugUtilsMessengerEXT>,
}
@ -27,6 +28,8 @@ pub struct VkDevice {
queue: vk::Queue,
qfi: u32,
timestamp_period: f32,
/// Does the device support descriptor indexing?
pub has_descriptor_indexing: bool,
}
struct RawDevice {
@ -95,6 +98,7 @@ pub struct PipelineBuilder {
bindings: Vec<vk::DescriptorSetLayoutBinding>,
binding_flags: Vec<vk::DescriptorBindingFlags>,
max_textures: u32,
has_descriptor_indexing: bool,
}
pub struct DescriptorSetBuilder {
@ -104,6 +108,16 @@ pub struct DescriptorSetBuilder {
sampler: vk::Sampler,
}
struct Extensions {
exts: Vec<*const c_char>,
exist_exts: Vec<vk::ExtensionProperties>,
}
struct Layers {
layers: Vec<*const c_char>,
exist_layers: Vec<vk::LayerProperties>,
}
unsafe extern "system" fn vulkan_debug_callback(
message_severity: vk::DebugUtilsMessageSeverityFlagsEXT,
message_type: vk::DebugUtilsMessageTypeFlagsEXT,
@ -133,24 +147,6 @@ unsafe extern "system" fn vulkan_debug_callback(
vk::FALSE
}
static LAYERS: Lazy<Vec<&'static CStr>> = Lazy::new(|| {
let mut layers: Vec<&'static CStr> = vec![];
if cfg!(debug_assertions) {
layers.push(CStr::from_bytes_with_nul(b"VK_LAYER_KHRONOS_validation\0").unwrap());
}
layers
});
static EXTS: Lazy<Vec<&'static CStr>> = Lazy::new(|| {
let mut exts: Vec<&'static CStr> = vec![];
if cfg!(debug_assertions) {
exts.push(DebugUtils::name());
}
// We'll need this to do runtime query of descriptor indexing.
//exts.push(vk::KhrGetPhysicalDeviceProperties2Fn::name());
exts
});
impl VkInstance {
/// Create a new instance.
///
@ -166,50 +162,24 @@ impl VkInstance {
let app_name = CString::new("VkToy").unwrap();
let entry = Entry::new()?;
let exist_layers = entry.enumerate_instance_layer_properties()?;
let layers = LAYERS
.iter()
.filter_map(|&lyr| {
exist_layers
.iter()
.find(|x| CStr::from_ptr(x.layer_name.as_ptr()) == lyr)
.map(|_| lyr.as_ptr())
.or_else(|| {
println!(
"Unable to find layer: {}, have you installed the Vulkan SDK?",
lyr.to_string_lossy()
);
None
})
})
.collect::<Vec<_>>();
let exist_exts = entry.enumerate_instance_extension_properties()?;
let mut exts = EXTS
.iter()
.filter_map(|&ext| {
exist_exts
.iter()
.find(|x| CStr::from_ptr(x.extension_name.as_ptr()) == ext)
.map(|_| ext.as_ptr())
.or_else(|| {
println!(
"Unable to find extension: {}, have you installed the Vulkan SDK?",
ext.to_string_lossy()
);
None
})
})
.collect::<Vec<_>>();
let surface_extensions = match window_handle {
Some(ref handle) => ash_window::enumerate_required_extensions(*handle)?,
None => vec![],
};
for extension in surface_extensions {
exts.push(extension.as_ptr());
let mut layers = Layers::new(entry.enumerate_instance_layer_properties()?);
if cfg!(debug_assertions) {
layers
.try_add(CStr::from_bytes_with_nul(b"VK_LAYER_KHRONOS_validation\0").unwrap());
}
let mut exts = Extensions::new(entry.enumerate_instance_extension_properties()?);
let mut has_debug_ext = false;
if cfg!(debug_assertions) {
has_debug_ext = exts.try_add(DebugUtils::name());
}
// We'll need this to do runtime query of descriptor indexing.
let has_phys_dev_props = exts.try_add(vk::KhrGetPhysicalDeviceProperties2Fn::name());
if let Some(ref handle) = window_handle {
for ext in ash_window::enumerate_required_extensions(*handle)? {
exts.try_add(ext);
}
}
exts.push(vk::KhrGetPhysicalDeviceProperties2Fn::name().as_ptr());
let instance = entry.create_instance(
&vk::InstanceCreateInfo::builder()
@ -220,12 +190,12 @@ impl VkInstance {
.engine_name(&app_name)
.api_version(vk::make_version(1, 0, 0)),
)
.enabled_layer_names(&layers)
.enabled_extension_names(&exts),
.enabled_layer_names(layers.as_ptrs())
.enabled_extension_names(exts.as_ptrs()),
None,
)?;
let (_dbg_loader, _dbg_callbk) = if cfg!(debug_assertions) {
let (_dbg_loader, _dbg_callbk) = if has_debug_ext {
let dbg_info = vk::DebugUtilsMessengerCreateInfoEXT::builder()
.message_severity(
vk::DebugUtilsMessageSeverityFlagsEXT::ERROR
@ -250,9 +220,20 @@ impl VkInstance {
None => None,
};
let get_phys_dev_props = if has_phys_dev_props {
Some(vk::KhrGetPhysicalDeviceProperties2Fn::load(|name| {
std::mem::transmute(
entry.get_instance_proc_addr(instance.handle(), name.as_ptr()),
)
}))
} else {
None
};
let vk_instance = VkInstance {
entry,
instance,
get_phys_dev_props,
_dbg_loader,
_dbg_callbk,
};
@ -273,29 +254,48 @@ impl VkInstance {
let (pdevice, qfi) =
choose_compute_device(&self.instance, &devices, surface).ok_or("no suitable device")?;
let mut has_descriptor_indexing = false;
if let Some(ref get_phys_dev_props) = self.get_phys_dev_props {
let mut descriptor_indexing_features =
vk::PhysicalDeviceDescriptorIndexingFeatures::builder();
// See https://github.com/MaikKlein/ash/issues/325 for why we do this workaround.
let mut features_v2 = vk::PhysicalDeviceFeatures2::default();
features_v2.p_next =
&mut descriptor_indexing_features as *mut _ as *mut std::ffi::c_void;
get_phys_dev_props.get_physical_device_features2_khr(pdevice, &mut features_v2);
has_descriptor_indexing = descriptor_indexing_features
.shader_storage_image_array_non_uniform_indexing
== vk::TRUE
&& descriptor_indexing_features.descriptor_binding_variable_descriptor_count
== vk::TRUE
&& descriptor_indexing_features.runtime_descriptor_array == vk::TRUE;
}
let queue_priorities = [1.0];
let queue_create_infos = [vk::DeviceQueueCreateInfo::builder()
.queue_family_index(qfi)
.queue_priorities(&queue_priorities)
.build()];
// support for descriptor indexing (maybe should be optional for compatibility)
let descriptor_indexing = vk::PhysicalDeviceDescriptorIndexingFeatures::builder()
let mut descriptor_indexing = vk::PhysicalDeviceDescriptorIndexingFeatures::builder()
.shader_storage_image_array_non_uniform_indexing(true)
.descriptor_binding_variable_descriptor_count(true)
.runtime_descriptor_array(true);
let mut extensions = match surface {
Some(_) => vec![khr::Swapchain::name().as_ptr()],
None => vec![],
};
extensions.push(vk::ExtDescriptorIndexingFn::name().as_ptr());
extensions.push(vk::KhrMaintenance3Fn::name().as_ptr());
let create_info = vk::DeviceCreateInfo::builder()
let mut extensions = Vec::new();
if surface.is_some() {
extensions.push(khr::Swapchain::name().as_ptr());
}
if has_descriptor_indexing {
extensions.push(vk::KhrMaintenance3Fn::name().as_ptr());
extensions.push(vk::ExtDescriptorIndexingFn::name().as_ptr());
}
let mut create_info = vk::DeviceCreateInfo::builder()
.queue_create_infos(&queue_create_infos)
.enabled_extension_names(&extensions)
.push_next(&mut descriptor_indexing.build())
.build();
.enabled_extension_names(&extensions);
if has_descriptor_indexing {
create_info = create_info.push_next(&mut descriptor_indexing);
}
let device = self.instance.create_device(pdevice, &create_info, None)?;
let device_mem_props = self.instance.get_physical_device_memory_properties(pdevice);
@ -315,6 +315,7 @@ impl VkInstance {
qfi,
queue,
timestamp_period,
has_descriptor_indexing,
})
}
@ -569,6 +570,7 @@ impl crate::Device for VkDevice {
bindings: Vec::new(),
binding_flags: Vec::new(),
max_textures: 0,
has_descriptor_indexing: self.has_descriptor_indexing,
}
}
@ -711,21 +713,23 @@ impl crate::Device for VkDevice {
SamplerParams::Linear => vk::Filter::LINEAR,
SamplerParams::Nearest => vk::Filter::NEAREST,
};
let sampler = device.create_sampler(&vk::SamplerCreateInfo::builder()
.mag_filter(filter)
.min_filter(filter)
.mipmap_mode(vk::SamplerMipmapMode::LINEAR)
.address_mode_u(vk::SamplerAddressMode::CLAMP_TO_BORDER)
.address_mode_v(vk::SamplerAddressMode::CLAMP_TO_BORDER)
.address_mode_w(vk::SamplerAddressMode::CLAMP_TO_BORDER)
.mip_lod_bias(0.0)
.compare_op(vk::CompareOp::NEVER)
.min_lod(0.0)
.max_lod(0.0)
.border_color(vk::BorderColor::FLOAT_TRANSPARENT_BLACK)
.max_anisotropy(1.0)
.anisotropy_enable(false)
, None)?;
let sampler = device.create_sampler(
&vk::SamplerCreateInfo::builder()
.mag_filter(filter)
.min_filter(filter)
.mipmap_mode(vk::SamplerMipmapMode::LINEAR)
.address_mode_u(vk::SamplerAddressMode::CLAMP_TO_BORDER)
.address_mode_v(vk::SamplerAddressMode::CLAMP_TO_BORDER)
.address_mode_w(vk::SamplerAddressMode::CLAMP_TO_BORDER)
.mip_lod_bias(0.0)
.compare_op(vk::CompareOp::NEVER)
.min_lod(0.0)
.max_lod(0.0)
.border_color(vk::BorderColor::FLOAT_TRANSPARENT_BLACK)
.max_anisotropy(1.0)
.anisotropy_enable(false),
None,
)?;
Ok(sampler)
}
}
@ -1007,8 +1011,12 @@ impl crate::PipelineBuilder<VkDevice> for PipelineBuilder {
.stage_flags(vk::ShaderStageFlags::COMPUTE)
.build(),
);
self.binding_flags
.push(vk::DescriptorBindingFlags::VARIABLE_DESCRIPTOR_COUNT);
let flags = if self.has_descriptor_indexing {
vk::DescriptorBindingFlags::VARIABLE_DESCRIPTOR_COUNT
} else {
Default::default()
};
self.binding_flags.push(flags);
self.max_textures += max_textures;
}
@ -1231,6 +1239,64 @@ impl VkSwapchain {
}
}
impl Extensions {
fn new(exist_exts: Vec<vk::ExtensionProperties>) -> Extensions {
Extensions {
exist_exts,
exts: vec![],
}
}
fn try_add(&mut self, ext: &'static CStr) -> bool {
unsafe {
if self
.exist_exts
.iter()
.find(|x| CStr::from_ptr(x.extension_name.as_ptr()) == ext)
.is_some()
{
self.exts.push(ext.as_ptr());
true
} else {
false
}
}
}
fn as_ptrs(&self) -> &[*const c_char] {
&self.exts
}
}
impl Layers {
fn new(exist_layers: Vec<vk::LayerProperties>) -> Layers {
Layers {
exist_layers,
layers: vec![],
}
}
fn try_add(&mut self, ext: &'static CStr) -> bool {
unsafe {
if self
.exist_layers
.iter()
.find(|x| CStr::from_ptr(x.layer_name.as_ptr()) == ext)
.is_some()
{
self.layers.push(ext.as_ptr());
true
} else {
false
}
}
}
fn as_ptrs(&self) -> &[*const c_char] {
&self.layers
}
}
unsafe fn choose_compute_device(
instance: &Instance,
devices: &[vk::PhysicalDevice],

View file

@ -25,7 +25,8 @@ fn main() -> Result<(), Error> {
let (instance, surface) = VkInstance::new(Some(&window))?;
unsafe {
let device = instance.device(surface.as_ref())?;
let mut swapchain = instance.swapchain(WIDTH / 2, HEIGHT / 2, &device, surface.as_ref().unwrap())?;
let mut swapchain =
instance.swapchain(WIDTH / 2, HEIGHT / 2, &device, surface.as_ref().unwrap())?;
let session = hub::Session::new(device);
let mut current_frame = 0;

View file

@ -5,7 +5,7 @@
glslang_validator = glslangValidator
rule glsl
command = $glslang_validator -V -o $out $in
command = $glslang_validator $flags -V -o $out $in
build elements.spv: glsl elements.comp | scene.h state.h annotated.h
@ -21,3 +21,6 @@ build backdrop.spv: glsl backdrop.comp | annotated.h tile.h setup.h
build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h
build kernel4_idx.spv: glsl kernel4.comp | ptcl.h setup.h
flags = -DENABLE_IMAGE_INDICES

View file

@ -8,7 +8,9 @@
#version 450
#extension GL_GOOGLE_include_directive : enable
#ifdef ENABLE_IMAGE_INDICES
#extension GL_EXT_nonuniform_qualifier : enable
#endif
#include "mem.h"
#include "setup.h"
@ -26,7 +28,7 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image;
#if GL_EXT_nonuniform_qualifier
#ifdef ENABLE_IMAGE_INDICES
layout(rgba8, set = 0, binding = 3) uniform readonly image2D images[];
#else
layout(rgba8, set = 0, binding = 3) uniform readonly image2D images[1];
@ -100,10 +102,17 @@ void main() {
vec4 rgba[CHUNK];
for (uint i = 0; i < CHUNK; i++) {
rgba[i] = vec4(0.0);
// TODO: remove this debug image support when the actual image method is plumbed.
#ifdef DEBUG_IMAGES
#ifdef ENABLE_IMAGE_INDICES
if (xy_uint.x < 1024 && xy_uint.y < 1024) {
rgba[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint + chunk_offset(i))/4);
}
#else
if (xy_uint.x < 1024 && xy_uint.y < 1024) {
rgb[i] = imageLoad(images[0], ivec2(xy_uint + chunk_offset(i))/4).rgb;
}
#endif
#endif
}

Binary file not shown.

Binary file not shown.

View file

@ -15,7 +15,8 @@
#define PTCL_INITIAL_ALLOC 1024
#define ENABLE_IMAGE_INDICES
// This is now set in the ninja file during compilation
//#define ENABLE_IMAGE_INDICES
// These should probably be renamed and/or reworked. In the binning
// kernel, they represent the number of bins. Also, the workgroup size

View file

@ -12,7 +12,7 @@ use piet::{Color, ImageFormat, RenderContext};
use piet_gpu_types::encoder::Encode;
use piet_gpu_hal::{hub};
use piet_gpu_hal::hub;
use piet_gpu_hal::{CmdBuf, Error, ImageLayout, MemFlags};
use pico_svg::PicoSvg;
@ -248,7 +248,7 @@ impl Renderer {
let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?;
let image_dev = session.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?;
const CONFIG_SIZE: u64 = 10*4; // Size of Config in setup.h.
const CONFIG_SIZE: u64 = 10 * 4; // Size of Config in setup.h.
let mut config_buf_host = session.create_buffer(CONFIG_SIZE, host)?;
let config_buf_dev = session.create_buffer(CONFIG_SIZE, dev)?;
@ -271,9 +271,20 @@ impl Renderer {
alloc += (n_paths * ANNO_SIZE + 3) & !3;
let trans_base = alloc;
alloc += (n_trans * TRANS_SIZE + 3) & !3;
config_buf_host.write(&[n_paths as u32, n_pathseg as u32, WIDTH_IN_TILES as u32, HEIGHT_IN_TILES as u32, tile_base as u32, bin_base as u32, ptcl_base as u32, pathseg_base as u32, anno_base as u32, trans_base as u32])?;
config_buf_host.write(&[
n_paths as u32,
n_pathseg as u32,
WIDTH_IN_TILES as u32,
HEIGHT_IN_TILES as u32,
tile_base as u32,
bin_base as u32,
ptcl_base as u32,
pathseg_base as u32,
anno_base as u32,
trans_base as u32,
])?;
let mut memory_buf_host = session.create_buffer(2*4, host)?;
let mut memory_buf_host = session.create_buffer(2 * 4, host)?;
let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?;
memory_buf_host.write(&[alloc as u32, 0 /* Overflow flag */])?;
@ -286,17 +297,13 @@ impl Renderer {
let tile_alloc_code = include_bytes!("../shader/tile_alloc.spv");
let tile_pipeline = session.create_simple_compute_pipeline(tile_alloc_code, 2)?;
let tile_ds = session.create_simple_descriptor_set(
&tile_pipeline,
&[&memory_buf_dev, &config_buf_dev],
)?;
let tile_ds = session
.create_simple_descriptor_set(&tile_pipeline, &[&memory_buf_dev, &config_buf_dev])?;
let path_alloc_code = include_bytes!("../shader/path_coarse.spv");
let path_pipeline = session.create_simple_compute_pipeline(path_alloc_code, 2)?;
let path_ds = session.create_simple_descriptor_set(
&path_pipeline,
&[&memory_buf_dev, &config_buf_dev],
)?;
let path_ds = session
.create_simple_descriptor_set(&path_pipeline, &[&memory_buf_dev, &config_buf_dev])?;
let backdrop_alloc_code = include_bytes!("../shader/backdrop.spv");
let backdrop_pipeline = session.create_simple_compute_pipeline(backdrop_alloc_code, 2)?;
@ -308,21 +315,22 @@ impl Renderer {
// TODO: constants
let bin_code = include_bytes!("../shader/binning.spv");
let bin_pipeline = session.create_simple_compute_pipeline(bin_code, 2)?;
let bin_ds = session.create_simple_descriptor_set(
&bin_pipeline,
&[&memory_buf_dev, &config_buf_dev],
)?;
let bin_ds = session
.create_simple_descriptor_set(&bin_pipeline, &[&memory_buf_dev, &config_buf_dev])?;
let coarse_code = include_bytes!("../shader/coarse.spv");
let coarse_pipeline = session.create_simple_compute_pipeline(coarse_code, 2)?;
let coarse_ds = session.create_simple_descriptor_set(
&coarse_pipeline,
&[&memory_buf_dev, &config_buf_dev],
)?;
let coarse_ds = session
.create_simple_descriptor_set(&coarse_pipeline, &[&memory_buf_dev, &config_buf_dev])?;
let bg_image = Self::make_test_bg_image(&session);
let k4_code = include_bytes!("../shader/kernel4.spv");
let k4_code = if session.has_descriptor_indexing() {
&include_bytes!("../shader/kernel4_idx.spv")[..]
} else {
println!("doing non-indexed k4");
&include_bytes!("../shader/kernel4.spv")[..]
};
// This is an arbitrary limit on the number of textures that can be referenced by
// the fine rasterizer. To set it for real, we probably want to pay attention both
// to the device limit (maxDescriptorSetSampledImages) but also to the number of
@ -377,7 +385,10 @@ impl Renderer {
}
pub unsafe fn record(&self, cmd_buf: &mut hub::CmdBuf, query_pool: &hub::QueryPool) {
cmd_buf.copy_buffer(self.scene_buf_host.vk_buffer(), self.scene_buf_dev.vk_buffer());
cmd_buf.copy_buffer(
self.scene_buf_host.vk_buffer(),
self.scene_buf_dev.vk_buffer(),
);
cmd_buf.copy_buffer(
self.config_buf_host.vk_buffer(),
self.config_buf_dev.vk_buffer(),

View file

@ -119,8 +119,9 @@ impl PietGpuRenderContext {
fn set_fill_mode(ctx: &mut PietGpuRenderContext, fill_mode: FillMode) {
if ctx.fill_mode != fill_mode {
ctx.elements
.push(Element::SetFillMode(SetFillMode { fill_mode: fill_mode as u32 }));
ctx.elements.push(Element::SetFillMode(SetFillMode {
fill_mode: fill_mode as u32,
}));
ctx.fill_mode = fill_mode;
}
}
@ -324,14 +325,17 @@ impl PietGpuRenderContext {
fn encode_path(&mut self, path: impl Iterator<Item=PathEl>, is_fill: bool) {
if is_fill {
self.encode_path_inner(path.flat_map(|el| {
match el {
PathEl::MoveTo(..) => {
Some(PathEl::ClosePath)
self.encode_path_inner(
path.flat_map(|el| {
match el {
PathEl::MoveTo(..) => Some(PathEl::ClosePath),
_ => None,
}
_ => None
}.into_iter().chain(Some(el))
}).chain(Some(PathEl::ClosePath)))
.into_iter()
.chain(Some(el))
})
.chain(Some(PathEl::ClosePath)),
)
} else {
self.encode_path_inner(path)
}