Use sampler for texture images

Provide images to fine rasterization kernel as readonly textures with a
sampler, rather than storage images. That lets us use the GPU's hardware
for sampling, which should be considerably more efficient.

There are a bunch of parameters that are hardcoded, but it does seem to
work.
This commit is contained in:
Raph Levien 2020-11-25 12:43:42 -08:00
parent 047a0830d1
commit facc9e0982
6 changed files with 86 additions and 21 deletions

View file

@ -10,13 +10,14 @@ use std::sync::{Arc, Mutex, Weak};
use crate::vulkan; use crate::vulkan;
use crate::DescriptorSetBuilder as DescriptorSetBuilderTrait; use crate::DescriptorSetBuilder as DescriptorSetBuilderTrait;
use crate::PipelineBuilder as PipelineBuilderTrait; use crate::PipelineBuilder as PipelineBuilderTrait;
use crate::{Device, Error}; use crate::{Device, Error, SamplerParams};
pub type MemFlags = <vulkan::VkDevice as Device>::MemFlags; pub type MemFlags = <vulkan::VkDevice as Device>::MemFlags;
pub type Semaphore = <vulkan::VkDevice as Device>::Semaphore; pub type Semaphore = <vulkan::VkDevice as Device>::Semaphore;
pub type Pipeline = <vulkan::VkDevice as Device>::Pipeline; pub type Pipeline = <vulkan::VkDevice as Device>::Pipeline;
pub type DescriptorSet = <vulkan::VkDevice as Device>::DescriptorSet; pub type DescriptorSet = <vulkan::VkDevice as Device>::DescriptorSet;
pub type QueryPool = <vulkan::VkDevice as Device>::QueryPool; pub type QueryPool = <vulkan::VkDevice as Device>::QueryPool;
pub type Sampler = <vulkan::VkDevice as Device>::Sampler;
type Fence = <vulkan::VkDevice as Device>::Fence; type Fence = <vulkan::VkDevice as Device>::Fence;
@ -204,6 +205,10 @@ impl Session {
pub unsafe fn descriptor_set_builder(&self) -> DescriptorSetBuilder { pub unsafe fn descriptor_set_builder(&self) -> DescriptorSetBuilder {
DescriptorSetBuilder(self.0.device.descriptor_set_builder()) DescriptorSetBuilder(self.0.device.descriptor_set_builder())
} }
pub unsafe fn create_sampler(&self, params: SamplerParams) -> Result<Sampler, Error> {
self.0.device.create_sampler(params)
}
} }
impl CmdBuf { impl CmdBuf {
@ -355,9 +360,9 @@ impl DescriptorSetBuilder {
self self
} }
pub fn add_textures<'a>(mut self, images: impl IntoRefs<'a, Image>) -> Self { pub fn add_textures<'a>(mut self, images: impl IntoRefs<'a, Image>, sampler: &Sampler) -> Self {
let vk_images = images.into_refs().map(|i| i.vk_image()).collect::<Vec<_>>(); let vk_images = images.into_refs().map(|i| i.vk_image()).collect::<Vec<_>>();
self.0.add_textures(&vk_images); self.0.add_textures(&vk_images, sampler);
self self
} }

View file

@ -16,6 +16,17 @@ pub enum ImageLayout {
BlitSrc, BlitSrc,
BlitDst, BlitDst,
General, General,
ShaderRead,
}
/// The type of sampling for image lookup.
///
/// This could take a lot more params, such as filtering, repeat, behavior
/// at edges, etc., but for now we'll keep it simple.
#[derive(Copy, Clone, Debug)]
pub enum SamplerParams {
Nearest,
Linear,
} }
pub trait Device: Sized { pub trait Device: Sized {
@ -30,6 +41,7 @@ pub trait Device: Sized {
type Semaphore; type Semaphore;
type PipelineBuilder: PipelineBuilder<Self>; type PipelineBuilder: PipelineBuilder<Self>;
type DescriptorSetBuilder: DescriptorSetBuilder<Self>; type DescriptorSetBuilder: DescriptorSetBuilder<Self>;
type Sampler;
fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result<Self::Buffer, Error>; fn create_buffer(&self, size: u64, mem_flags: Self::MemFlags) -> Result<Self::Buffer, Error>;
@ -138,6 +150,8 @@ pub trait Device: Sized {
unsafe fn create_fence(&self, signaled: bool) -> Result<Self::Fence, Error>; unsafe fn create_fence(&self, signaled: bool) -> Result<Self::Fence, Error>;
unsafe fn wait_and_reset(&self, fences: &[Self::Fence]) -> Result<(), Error>; unsafe fn wait_and_reset(&self, fences: &[Self::Fence]) -> Result<(), Error>;
unsafe fn get_fence_status(&self, fence: Self::Fence) -> Result<bool, Error>; unsafe fn get_fence_status(&self, fence: Self::Fence) -> Result<bool, Error>;
unsafe fn create_sampler(&self, params: SamplerParams) -> Result<Self::Sampler, Error>;
} }
pub trait CmdBuf<D: Device> { pub trait CmdBuf<D: Device> {
@ -223,7 +237,16 @@ pub trait PipelineBuilder<D: Device> {
/// be buffers, then images, then textures. /// be buffers, then images, then textures.
pub trait DescriptorSetBuilder<D: Device> { pub trait DescriptorSetBuilder<D: Device> {
fn add_buffers(&mut self, buffers: &[&D::Buffer]); fn add_buffers(&mut self, buffers: &[&D::Buffer]);
/// Add an array of storage images.
///
/// The images need to be in `ImageLayout::General` layout.
fn add_images(&mut self, images: &[&D::Image]); fn add_images(&mut self, images: &[&D::Image]);
fn add_textures(&mut self, images: &[&D::Image]); /// Add an array of textures.
///
/// The images need to be in `ImageLayout::ShaderRead` layout.
///
/// The same sampler is used for all textures, which is not very sophisticated;
/// we should have a way to vary the sampler.
fn add_textures(&mut self, images: &[&D::Image], sampler: &D::Sampler);
unsafe fn build(self, device: &D, pipeline: &D::Pipeline) -> Result<D::DescriptorSet, Error>; unsafe fn build(self, device: &D, pipeline: &D::Pipeline) -> Result<D::DescriptorSet, Error>;
} }

View file

@ -9,7 +9,7 @@ use ash::version::{DeviceV1_0, EntryV1_0, InstanceV1_0};
use ash::{vk, Device, Entry, Instance}; use ash::{vk, Device, Entry, Instance};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use crate::{Device as DeviceTrait, Error, ImageLayout}; use crate::{Device as DeviceTrait, Error, ImageLayout, SamplerParams};
pub struct VkInstance { pub struct VkInstance {
/// Retain the dynamic lib. /// Retain the dynamic lib.
@ -101,6 +101,7 @@ pub struct DescriptorSetBuilder {
buffers: Vec<vk::Buffer>, buffers: Vec<vk::Buffer>,
images: Vec<vk::ImageView>, images: Vec<vk::ImageView>,
textures: Vec<vk::ImageView>, textures: Vec<vk::ImageView>,
sampler: vk::Sampler,
} }
unsafe extern "system" fn vulkan_debug_callback( unsafe extern "system" fn vulkan_debug_callback(
@ -401,6 +402,7 @@ impl crate::Device for VkDevice {
type Semaphore = vk::Semaphore; type Semaphore = vk::Semaphore;
type PipelineBuilder = PipelineBuilder; type PipelineBuilder = PipelineBuilder;
type DescriptorSetBuilder = DescriptorSetBuilder; type DescriptorSetBuilder = DescriptorSetBuilder;
type Sampler = vk::Sampler;
fn create_buffer(&self, size: u64, mem_flags: MemFlags) -> Result<Buffer, Error> { fn create_buffer(&self, size: u64, mem_flags: MemFlags) -> Result<Buffer, Error> {
unsafe { unsafe {
@ -461,7 +463,8 @@ impl crate::Device for VkDevice {
// want to add sampling for images and so on. // want to add sampling for images and so on.
let usage = vk::ImageUsageFlags::STORAGE let usage = vk::ImageUsageFlags::STORAGE
| vk::ImageUsageFlags::TRANSFER_SRC | vk::ImageUsageFlags::TRANSFER_SRC
| vk::ImageUsageFlags::TRANSFER_DST; | vk::ImageUsageFlags::TRANSFER_DST
| vk::ImageUsageFlags::SAMPLED;
let image = device.create_image( let image = device.create_image(
&vk::ImageCreateInfo::builder() &vk::ImageCreateInfo::builder()
.image_type(vk::ImageType::TYPE_2D) .image_type(vk::ImageType::TYPE_2D)
@ -566,6 +569,7 @@ impl crate::Device for VkDevice {
buffers: Vec::new(), buffers: Vec::new(),
images: Vec::new(), images: Vec::new(),
textures: Vec::new(), textures: Vec::new(),
sampler: vk::Sampler::null(),
} }
} }
@ -692,6 +696,30 @@ impl crate::Device for VkDevice {
device.unmap_memory(buffer.buffer_memory); device.unmap_memory(buffer.buffer_memory);
Ok(()) Ok(())
} }
unsafe fn create_sampler(&self, params: SamplerParams) -> Result<Self::Sampler, Error> {
let device = &self.device.device;
let filter = match params {
SamplerParams::Linear => vk::Filter::LINEAR,
SamplerParams::Nearest => vk::Filter::NEAREST,
};
let sampler = device.create_sampler(&vk::SamplerCreateInfo::builder()
.mag_filter(filter)
.min_filter(filter)
.mipmap_mode(vk::SamplerMipmapMode::LINEAR)
.address_mode_u(vk::SamplerAddressMode::CLAMP_TO_BORDER)
.address_mode_v(vk::SamplerAddressMode::CLAMP_TO_BORDER)
.address_mode_w(vk::SamplerAddressMode::CLAMP_TO_BORDER)
.mip_lod_bias(0.0)
.compare_op(vk::CompareOp::NEVER)
.min_lod(0.0)
.max_lod(0.0)
.border_color(vk::BorderColor::FLOAT_TRANSPARENT_BLACK)
.max_anisotropy(1.0)
.anisotropy_enable(false)
, None)?;
Ok(sampler)
}
} }
impl crate::CmdBuf<VkDevice> for CmdBuf { impl crate::CmdBuf<VkDevice> for CmdBuf {
@ -967,7 +995,7 @@ impl crate::PipelineBuilder<VkDevice> for PipelineBuilder {
vk::DescriptorSetLayoutBinding::builder() vk::DescriptorSetLayoutBinding::builder()
.binding(start) .binding(start)
// TODO: we do want these to be sampled images // TODO: we do want these to be sampled images
.descriptor_type(vk::DescriptorType::STORAGE_IMAGE) .descriptor_type(vk::DescriptorType::COMBINED_IMAGE_SAMPLER)
.descriptor_count(max_textures) .descriptor_count(max_textures)
.stage_flags(vk::ShaderStageFlags::COMPUTE) .stage_flags(vk::ShaderStageFlags::COMPUTE)
.build(), .build(),
@ -1040,8 +1068,9 @@ impl crate::DescriptorSetBuilder<VkDevice> for DescriptorSetBuilder {
self.images.extend(images.iter().map(|i| i.image_view)); self.images.extend(images.iter().map(|i| i.image_view));
} }
fn add_textures(&mut self, images: &[&Image]) { fn add_textures(&mut self, images: &[&Image], sampler: &vk::Sampler) {
self.textures.extend(images.iter().map(|i| i.image_view)); self.textures.extend(images.iter().map(|i| i.image_view));
self.sampler = *sampler;
} }
unsafe fn build(self, device: &VkDevice, pipeline: &Pipeline) -> Result<DescriptorSet, Error> { unsafe fn build(self, device: &VkDevice, pipeline: &Pipeline) -> Result<DescriptorSet, Error> {
@ -1055,12 +1084,19 @@ impl crate::DescriptorSetBuilder<VkDevice> for DescriptorSetBuilder {
.build(), .build(),
); );
} }
let n_images_total = self.images.len() + pipeline.max_textures as usize; if !self.images.is_empty() {
if n_images_total > 0 {
descriptor_pool_sizes.push( descriptor_pool_sizes.push(
vk::DescriptorPoolSize::builder() vk::DescriptorPoolSize::builder()
.ty(vk::DescriptorType::STORAGE_IMAGE) .ty(vk::DescriptorType::STORAGE_IMAGE)
.descriptor_count(n_images_total as u32) .descriptor_count(self.images.len() as u32)
.build(),
);
}
if pipeline.max_textures > 0 {
descriptor_pool_sizes.push(
vk::DescriptorPoolSize::builder()
.ty(vk::DescriptorType::COMBINED_IMAGE_SAMPLER)
.descriptor_count(pipeline.max_textures)
.build(), .build(),
); );
} }
@ -1118,9 +1154,9 @@ impl crate::DescriptorSetBuilder<VkDevice> for DescriptorSetBuilder {
.iter() .iter()
.map(|texture| { .map(|texture| {
vk::DescriptorImageInfo::builder() vk::DescriptorImageInfo::builder()
.sampler(vk::Sampler::null()) .sampler(self.sampler)
.image_view(*texture) .image_view(*texture)
.image_layout(vk::ImageLayout::GENERAL) .image_layout(vk::ImageLayout::SHADER_READ_ONLY_OPTIMAL)
.build() .build()
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
@ -1128,7 +1164,7 @@ impl crate::DescriptorSetBuilder<VkDevice> for DescriptorSetBuilder {
&[vk::WriteDescriptorSet::builder() &[vk::WriteDescriptorSet::builder()
.dst_set(descriptor_sets[0]) .dst_set(descriptor_sets[0])
.dst_binding(binding) .dst_binding(binding)
.descriptor_type(vk::DescriptorType::STORAGE_IMAGE) .descriptor_type(vk::DescriptorType::COMBINED_IMAGE_SAMPLER)
.image_info(&infos) .image_info(&infos)
.build()], .build()],
&[], &[],
@ -1245,5 +1281,6 @@ fn map_image_layout(layout: ImageLayout) -> vk::ImageLayout {
ImageLayout::BlitSrc => vk::ImageLayout::TRANSFER_SRC_OPTIMAL, ImageLayout::BlitSrc => vk::ImageLayout::TRANSFER_SRC_OPTIMAL,
ImageLayout::BlitDst => vk::ImageLayout::TRANSFER_DST_OPTIMAL, ImageLayout::BlitDst => vk::ImageLayout::TRANSFER_DST_OPTIMAL,
ImageLayout::General => vk::ImageLayout::GENERAL, ImageLayout::General => vk::ImageLayout::GENERAL,
ImageLayout::ShaderRead => vk::ImageLayout::SHADER_READ_ONLY_OPTIMAL,
} }
} }

View file

@ -29,7 +29,7 @@ layout(set = 0, binding = 2) buffer ClipScratchBuf {
layout(rgba8, set = 0, binding = 3) uniform writeonly image2D image; layout(rgba8, set = 0, binding = 3) uniform writeonly image2D image;
layout(rgba8, set = 0, binding = 4) uniform readonly image2D textures[]; layout(set = 0, binding = 4) uniform sampler2D textures[];
#include "ptcl.h" #include "ptcl.h"
#include "tile.h" #include "tile.h"
@ -106,8 +106,8 @@ void main() {
uint clip_tos = 0; uint clip_tos = 0;
for (uint i = 0; i < CHUNK; i++) { for (uint i = 0; i < CHUNK; i++) {
rgb[i] = vec3(0.5); rgb[i] = vec3(0.5);
if (xy_uint.x < 256 && xy_uint.y < 256) { if (xy_uint.x < 1024 && xy_uint.y < 1024) {
rgb[i] = imageLoad(textures[gl_WorkGroupID.x / 16], ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i)).rgb; rgb[i] = texture(textures[gl_WorkGroupID.x / 64], vec2(xy_uint.x, xy_uint.y + CHUNK_DY * i) / 1024.0).rgb;
} }
mask[i] = 1.0; mask[i] = 1.0;
} }

Binary file not shown.

View file

@ -12,7 +12,7 @@ use piet::{Color, ImageFormat, RenderContext};
use piet_gpu_types::encoder::Encode; use piet_gpu_types::encoder::Encode;
use piet_gpu_hal::hub; use piet_gpu_hal::{SamplerParams, hub};
use piet_gpu_hal::{CmdBuf, Error, ImageLayout, MemFlags}; use piet_gpu_hal::{CmdBuf, Error, ImageLayout, MemFlags};
use pico_svg::PicoSvg; use pico_svg::PicoSvg;
@ -313,6 +313,7 @@ impl Renderer {
// it can't be satisfied, then for compatibility we'll probably want to fall back // it can't be satisfied, then for compatibility we'll probably want to fall back
// to an atlasing approach. // to an atlasing approach.
let max_textures = 256; let max_textures = 256;
let sampler = session.create_sampler(SamplerParams::Linear)?;
let k4_pipeline = session let k4_pipeline = session
.pipeline_builder() .pipeline_builder()
.add_buffers(3) .add_buffers(3)
@ -323,7 +324,7 @@ impl Renderer {
.descriptor_set_builder() .descriptor_set_builder()
.add_buffers(&[&ptcl_buf, &tile_buf, &clip_scratch_buf]) .add_buffers(&[&ptcl_buf, &tile_buf, &clip_scratch_buf])
.add_images(&[&image_dev]) .add_images(&[&image_dev])
.add_textures(&[&bg_image]) .add_textures(&[&bg_image], &sampler)
.build(&session, &k4_pipeline)?; .build(&session, &k4_pipeline)?;
Ok(Renderer { Ok(Renderer {
@ -472,8 +473,7 @@ impl Renderer {
ImageLayout::BlitDst, ImageLayout::BlitDst,
); );
cmd_buf.copy_buffer_to_image(buffer.vk_buffer(), image.vk_image()); cmd_buf.copy_buffer_to_image(buffer.vk_buffer(), image.vk_image());
// TODO: instead of General, we might want ShaderReadOnly cmd_buf.image_barrier(image.vk_image(), ImageLayout::BlitDst, ImageLayout::ShaderRead);
cmd_buf.image_barrier(image.vk_image(), ImageLayout::BlitDst, ImageLayout::General);
cmd_buf.finish(); cmd_buf.finish();
// Make sure not to drop the buffer and image until the command buffer completes. // Make sure not to drop the buffer and image until the command buffer completes.
cmd_buf.add_resource(&buffer); cmd_buf.add_resource(&buffer);