diff --git a/piet-gpu-hal/src/backend.rs b/piet-gpu-hal/src/backend.rs index 496a6f0..02ac7cb 100644 --- a/piet-gpu-hal/src/backend.rs +++ b/piet-gpu-hal/src/backend.rs @@ -16,7 +16,9 @@ //! The generic trait for backends to implement. -use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, MapMode, SamplerParams}; +use crate::{ + BindType, BufferUsage, Error, GpuInfo, ImageFormat, ImageLayout, MapMode, SamplerParams, +}; pub trait Device: Sized { type Buffer: 'static; @@ -47,7 +49,12 @@ pub trait Device: Sized { /// Maybe doesn't need result return? unsafe fn destroy_buffer(&self, buffer: &Self::Buffer) -> Result<(), Error>; - unsafe fn create_image2d(&self, width: u32, height: u32) -> Result; + unsafe fn create_image2d( + &self, + width: u32, + height: u32, + format: ImageFormat, + ) -> Result; /// Destroy an image. /// diff --git a/piet-gpu-hal/src/dx12.rs b/piet-gpu-hal/src/dx12.rs index 337ca04..78ad449 100644 --- a/piet-gpu-hal/src/dx12.rs +++ b/piet-gpu-hal/src/dx12.rs @@ -21,7 +21,7 @@ use raw_window_handle::{HasRawWindowHandle, RawWindowHandle}; use smallvec::SmallVec; -use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, MapMode, WorkgroupLimits}; +use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, MapMode, WorkgroupLimits, ImageFormat}; use self::{ descriptor::{CpuHeapRefOwned, DescriptorPool, GpuHeapRefOwned}, @@ -321,8 +321,11 @@ impl crate::backend::Device for Dx12Device { Ok(()) } - unsafe fn create_image2d(&self, width: u32, height: u32) -> Result { - let format = winapi::shared::dxgiformat::DXGI_FORMAT_R8G8B8A8_UNORM; + unsafe fn create_image2d(&self, width: u32, height: u32, format: ImageFormat) -> Result { + let format = match format { + ImageFormat::A8 => winapi::shared::dxgiformat::DXGI_FORMAT_R8_UNORM, + ImageFormat::Rgba8 => winapi::shared::dxgiformat::DXGI_FORMAT_R8G8B8A8_UNORM, + }; let resource = self .device .create_texture2d_buffer(width.into(), height, format, true)?; diff --git a/piet-gpu-hal/src/hub.rs b/piet-gpu-hal/src/hub.rs index 2a7290d..cc09832 100644 --- a/piet-gpu-hal/src/hub.rs +++ b/piet-gpu-hal/src/hub.rs @@ -13,7 +13,7 @@ use std::sync::{Arc, Mutex, Weak}; use bytemuck::Pod; use smallvec::SmallVec; -use crate::{mux, BackendType, BufWrite, MapMode}; +use crate::{mux, BackendType, BufWrite, ImageFormat, MapMode}; use crate::{BindType, BufferUsage, Error, GpuInfo, ImageLayout, SamplerParams}; @@ -308,12 +308,14 @@ impl Session { } } - /// Create an image. - /// - /// Currently this creates only a 2D image in RGBA8 format, with usage - /// so that it can be accessed by shaders and used for transfer. - pub unsafe fn create_image2d(&self, width: u32, height: u32) -> Result { - let image = self.0.device.create_image2d(width, height)?; + /// Create an image of the given size and pixel format. + pub unsafe fn create_image2d( + &self, + width: u32, + height: u32, + format: ImageFormat, + ) -> Result { + let image = self.0.device.create_image2d(width, height, format)?; Ok(Image(Arc::new(ImageInner { image, session: Arc::downgrade(&self.0), @@ -401,6 +403,33 @@ impl Session { pub fn backend_type(&self) -> BackendType { self.0.device.backend_type() } + + #[cfg(target_os = "macos")] + pub unsafe fn cmd_buf_from_raw_mtl(&self, raw_cmd_buf: &::metal::CommandBufferRef) -> CmdBuf { + let cmd_buf = Some(self.0.device.cmd_buf_from_raw_mtl(raw_cmd_buf)); + let resources = Vec::new(); + // Expect client to do cleanup manually. + let session = Weak::new(); + CmdBuf { + cmd_buf, + fence: None, + resources, + session, + } + } + + #[cfg(target_os = "macos")] + pub unsafe fn image_from_raw_mtl( + &self, + raw_texture: &::metal::TextureRef, + width: u32, + height: u32, + ) -> Image { + let image = self.0.device.image_from_raw_mtl(raw_texture, width, height); + // Expect client to do cleanup manually. + let session = Weak::new(); + Image(Arc::new(ImageInner { image, session })) + } } impl SessionInner { diff --git a/piet-gpu-hal/src/lib.rs b/piet-gpu-hal/src/lib.rs index a0a4da3..fab7d65 100644 --- a/piet-gpu-hal/src/lib.rs +++ b/piet-gpu-hal/src/lib.rs @@ -16,8 +16,8 @@ mod macros; mod mux; pub use crate::mux::{ - DescriptorSet, Fence, Instance, Pipeline, QueryPool, Sampler, Semaphore, ShaderCode, Surface, - Swapchain, + DescriptorSet, Device, Fence, Instance, Pipeline, QueryPool, Sampler, Semaphore, ShaderCode, + Surface, Swapchain, }; pub use bufwrite::BufWrite; pub use hub::{ @@ -91,6 +91,15 @@ pub enum SamplerParams { Linear, } +/// Image format. +#[derive(Copy, Clone, Debug)] +pub enum ImageFormat { + // 8 bit grayscale / alpha + A8, + // 8 bit per pixel RGBA + Rgba8, +} + bitflags! { /// The intended usage for a buffer, specified on creation. pub struct BufferUsage: u32 { diff --git a/piet-gpu-hal/src/metal.rs b/piet-gpu-hal/src/metal.rs index 45e0406..e3157d4 100644 --- a/piet-gpu-hal/src/metal.rs +++ b/piet-gpu-hal/src/metal.rs @@ -30,7 +30,7 @@ use metal::{CGFloat, MTLFeatureSet}; use raw_window_handle::{HasRawWindowHandle, RawWindowHandle}; -use crate::{BufferUsage, Error, GpuInfo, MapMode, WorkgroupLimits}; +use crate::{BufferUsage, Error, GpuInfo, ImageFormat, MapMode, WorkgroupLimits}; use util::*; @@ -153,41 +153,7 @@ impl MtlInstance { pub fn device(&self, _surface: Option<&MtlSurface>) -> Result { if let Some(device) = metal::Device::system_default() { let cmd_queue = device.new_command_queue(); - let is_mac = device.supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v1); - let is_ios = device.supports_feature_set(MTLFeatureSet::iOS_GPUFamily1_v1); - let version = NSOperatingSystemVersion::get(); - - let use_staging_buffers = - if (is_mac && version.at_least(10, 15)) || (is_ios && version.at_least(13, 0)) { - !device.has_unified_memory() - } else { - !device.is_low_power() - }; - // TODO: these are conservative; we need to derive these from - // supports_feature_set queries. - let gpu_info = GpuInfo { - has_descriptor_indexing: false, - has_subgroups: false, - subgroup_size: None, - // The workgroup limits are taken from the minimum of a desktop installation; - // we don't support iOS right now, but in case of testing on those devices it might - // need to change these (or just queried properly). - workgroup_limits: WorkgroupLimits { - max_size: [1024, 1024, 64], - max_invocations: 1024, - }, - has_memory_model: false, - use_staging_buffers, - }; - let helpers = Arc::new(Helpers { - clear_pipeline: clear::make_clear_pipeline(&device), - }); - Ok(MtlDevice { - device, - cmd_queue: Arc::new(Mutex::new(cmd_queue)), - gpu_info, - helpers, - }) + Ok(MtlDevice::new_from_raw_mtl(device, cmd_queue)) } else { Err("can't create system default Metal device".into()) } @@ -212,6 +178,60 @@ impl MtlInstance { } } +impl MtlDevice { + pub fn new_from_raw_mtl(device: metal::Device, cmd_queue: metal::CommandQueue) -> MtlDevice { + let is_mac = device.supports_feature_set(MTLFeatureSet::macOS_GPUFamily1_v1); + let is_ios = device.supports_feature_set(MTLFeatureSet::iOS_GPUFamily1_v1); + let version = NSOperatingSystemVersion::get(); + + let use_staging_buffers = + if (is_mac && version.at_least(10, 15)) || (is_ios && version.at_least(13, 0)) { + !device.has_unified_memory() + } else { + !device.is_low_power() + }; + // TODO: these are conservative; we need to derive these from + // supports_feature_set queries. + let gpu_info = GpuInfo { + has_descriptor_indexing: false, + has_subgroups: false, + subgroup_size: None, + // The workgroup limits are taken from the minimum of a desktop installation; + // we don't support iOS right now, but in case of testing on those devices it might + // need to change these (or just queried properly). + workgroup_limits: WorkgroupLimits { + max_size: [1024, 1024, 64], + max_invocations: 1024, + }, + has_memory_model: false, + use_staging_buffers, + }; + let helpers = Arc::new(Helpers { + clear_pipeline: clear::make_clear_pipeline(&device), + }); + MtlDevice { + device, + cmd_queue: Arc::new(Mutex::new(cmd_queue)), + gpu_info, + helpers, + } + } + + pub fn cmd_buf_from_raw_mtl(&self, raw_cmd_buf: metal::CommandBuffer) -> CmdBuf { + let cmd_buf = raw_cmd_buf; + let helpers = self.helpers.clone(); + CmdBuf { cmd_buf, helpers } + } + + pub fn image_from_raw_mtl(&self, texture: metal::Texture, width: u32, height: u32) -> Image { + Image { + texture, + width, + height, + } + } +} + impl crate::backend::Device for MtlDevice { type Buffer = Buffer; @@ -259,14 +279,23 @@ impl crate::backend::Device for MtlDevice { Ok(()) } - unsafe fn create_image2d(&self, width: u32, height: u32) -> Result { + unsafe fn create_image2d( + &self, + width: u32, + height: u32, + format: ImageFormat, + ) -> Result { let desc = metal::TextureDescriptor::new(); desc.set_width(width as u64); desc.set_height(height as u64); // These are defaults so don't need to be explicitly set. //desc.set_depth(1); //desc.set_mipmap_level_count(1); - //desc.set_pixel_format(metal::MTLPixelFormat::RGBA8Unorm); + let mtl_format = match format { + ImageFormat::A8 => metal::MTLPixelFormat::R8Unorm, + ImageFormat::Rgba8 => metal::MTLPixelFormat::RGBA8Unorm, + }; + desc.set_pixel_format(mtl_format); desc.set_usage(metal::MTLTextureUsage::ShaderRead | metal::MTLTextureUsage::ShaderWrite); let texture = self.device.new_texture(&desc); Ok(Image { diff --git a/piet-gpu-hal/src/mux.rs b/piet-gpu-hal/src/mux.rs index c67de86..af1702d 100644 --- a/piet-gpu-hal/src/mux.rs +++ b/piet-gpu-hal/src/mux.rs @@ -35,6 +35,7 @@ use crate::backend::DescriptorSetBuilder as DescriptorSetBuilderTrait; use crate::backend::Device as DeviceTrait; use crate::BackendType; use crate::BindType; +use crate::ImageFormat; use crate::MapMode; use crate::{BufferUsage, Error, GpuInfo, ImageLayout, InstanceFlags}; @@ -208,6 +209,38 @@ impl Instance { // but not doing so lets us diverge more easily (at the moment, the divergence is // missing functionality). impl Device { + #[cfg(target_os = "macos")] + pub fn new_from_raw_mtl( + device: &::metal::DeviceRef, + queue: &::metal::CommandQueueRef, + ) -> Device { + Device::Mtl(metal::MtlDevice::new_from_raw_mtl( + device.to_owned(), + queue.to_owned(), + )) + } + + #[cfg(target_os = "macos")] + pub fn cmd_buf_from_raw_mtl(&self, raw_cmd_buf: &::metal::CommandBufferRef) -> CmdBuf { + // Note: this will cause problems if we support multiple back-ends on mac. But it will + // be a compile error; + let Device::Mtl(d) = self; + CmdBuf::Mtl(d.cmd_buf_from_raw_mtl(raw_cmd_buf.to_owned())) + } + + #[cfg(target_os = "macos")] + pub fn image_from_raw_mtl( + &self, + raw_texture: &::metal::TextureRef, + width: u32, + height: u32, + ) -> Image { + // Note: this will cause problems if we support multiple back-ends on mac. But it will + // be a compile error; + let Device::Mtl(d) = self; + Image::Mtl(d.image_from_raw_mtl(raw_texture.to_owned(), width, height)) + } + pub fn query_gpu_info(&self) -> GpuInfo { mux_match! { self; Device::Vk(d) => d.query_gpu_info(), @@ -232,11 +265,16 @@ impl Device { } } - pub unsafe fn create_image2d(&self, width: u32, height: u32) -> Result { + pub unsafe fn create_image2d( + &self, + width: u32, + height: u32, + format: ImageFormat, + ) -> Result { mux_match! { self; - Device::Vk(d) => d.create_image2d(width, height).map(Image::Vk), - Device::Dx12(d) => d.create_image2d(width, height).map(Image::Dx12), - Device::Mtl(d) => d.create_image2d(width, height).map(Image::Mtl), + Device::Vk(d) => d.create_image2d(width, height, format).map(Image::Vk), + Device::Dx12(d) => d.create_image2d(width, height, format).map(Image::Dx12), + Device::Mtl(d) => d.create_image2d(width, height, format).map(Image::Mtl), } } diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs index e34981e..8392899 100644 --- a/piet-gpu-hal/src/vulkan.rs +++ b/piet-gpu-hal/src/vulkan.rs @@ -14,7 +14,7 @@ use smallvec::SmallVec; use crate::backend::Device as DeviceTrait; use crate::{ - BindType, BufferUsage, Error, GpuInfo, ImageLayout, MapMode, SamplerParams, SubgroupSize, + BindType, BufferUsage, Error, GpuInfo, ImageFormat, ImageLayout, MapMode, SamplerParams, SubgroupSize, WorkgroupLimits, }; @@ -535,7 +535,7 @@ impl crate::backend::Device for VkDevice { Ok(()) } - unsafe fn create_image2d(&self, width: u32, height: u32) -> Result { + unsafe fn create_image2d(&self, width: u32, height: u32, format: ImageFormat) -> Result { let device = &self.device.device; let extent = vk::Extent3D { width, @@ -547,10 +547,14 @@ impl crate::backend::Device for VkDevice { let usage = vk::ImageUsageFlags::STORAGE | vk::ImageUsageFlags::TRANSFER_SRC | vk::ImageUsageFlags::TRANSFER_DST; + let vk_format = match format { + ImageFormat::A8 => vk::Format::R8_UNORM, + ImageFormat::Rgba8 => vk::Format::R8G8B8A8_UNORM, + }; let image = device.create_image( &vk::ImageCreateInfo::builder() .image_type(vk::ImageType::TYPE_2D) - .format(vk::Format::R8G8B8A8_UNORM) + .format(vk_format) .extent(extent) .mip_levels(1) .array_layers(1) diff --git a/piet-gpu/shader/build.ninja b/piet-gpu/shader/build.ninja index 1073c20..7b80f6f 100644 --- a/piet-gpu/shader/build.ninja +++ b/piet-gpu/shader/build.ninja @@ -58,6 +58,12 @@ build gen/kernel4.hlsl: hlsl gen/kernel4.spv build gen/kernel4.dxil: dxil gen/kernel4.hlsl build gen/kernel4.msl: msl gen/kernel4.spv +build gen/kernel4_gray.spv: glsl kernel4.comp | ptcl.h setup.h + flags = -DGRAY +build gen/kernel4_gray.hlsl: hlsl gen/kernel4_gray.spv +build gen/kernel4_gray.dxil: dxil gen/kernel4_gray.hlsl +build gen/kernel4_gray.msl: msl gen/kernel4_gray.spv + # New element pipeline follows build gen/transform_reduce.spv: glsl transform_reduce.comp | scene.h setup.h mem.h @@ -113,6 +119,6 @@ build gen/draw_leaf.hlsl: hlsl gen/draw_leaf.spv build gen/draw_leaf.dxil: dxil gen/draw_leaf.hlsl build gen/draw_leaf.msl: msl gen/draw_leaf.spv -build spv: phony gen/backdrop_lg.spv gen/backdrop.spv gen/bbox_clear.spv gen/binning.spv gen/coarse.spv gen/draw_leaf.spv gen/draw_reduce.spv gen/draw_root.spv gen/kernel4.spv gen/path_coarse.spv gen/pathseg.spv gen/pathtag_reduce.spv gen/pathtag_root.spv gen/tile_alloc.spv gen/transform_leaf.spv gen/transform_reduce.spv gen/transform_root.spv -build dxil: phony gen/backdrop.hlsl gen/backdrop_lg.hlsl gen/bbox_clear.hlsl gen/binning.hlsl gen/coarse.hlsl gen/draw_leaf.hlsl gen/draw_reduce.hlsl gen/draw_root.hlsl gen/kernel4.hlsl gen/path_coarse.hlsl gen/pathseg.hlsl gen/pathtag_reduce.hlsl gen/pathtag_root.hlsl gen/tile_alloc.hlsl gen/transform_leaf.hlsl gen/transform_reduce.hlsl gen/transform_root.hlsl -build msl: phony gen/backdrop_lg.msl gen/backdrop.msl gen/bbox_clear.msl gen/binning.msl gen/coarse.msl gen/draw_leaf.msl gen/draw_reduce.msl gen/draw_root.msl gen/kernel4.msl gen/path_coarse.msl gen/pathseg.msl gen/pathtag_reduce.msl gen/pathtag_root.msl gen/tile_alloc.msl gen/transform_leaf.msl gen/transform_reduce.msl gen/transform_root.msl +build spv: phony gen/backdrop_lg.spv gen/backdrop.spv gen/bbox_clear.spv gen/binning.spv gen/coarse.spv gen/draw_leaf.spv gen/draw_reduce.spv gen/draw_root.spv gen/kernel4.spv gen/kernel4_gray.spv gen/path_coarse.spv gen/pathseg.spv gen/pathtag_reduce.spv gen/pathtag_root.spv gen/tile_alloc.spv gen/transform_leaf.spv gen/transform_reduce.spv gen/transform_root.spv +build dxil: phony gen/backdrop.hlsl gen/backdrop_lg.hlsl gen/bbox_clear.hlsl gen/binning.hlsl gen/coarse.hlsl gen/draw_leaf.hlsl gen/draw_reduce.hlsl gen/draw_root.hlsl gen/kernel4.hlsl gen/kernel4_gray.hlsl gen/path_coarse.hlsl gen/pathseg.hlsl gen/pathtag_reduce.hlsl gen/pathtag_root.hlsl gen/tile_alloc.hlsl gen/transform_leaf.hlsl gen/transform_reduce.hlsl gen/transform_root.hlsl +build msl: phony gen/backdrop_lg.msl gen/backdrop.msl gen/bbox_clear.msl gen/binning.msl gen/coarse.msl gen/draw_leaf.msl gen/draw_reduce.msl gen/draw_root.msl gen/kernel4.msl gen/kernel4_gray.msl gen/path_coarse.msl gen/pathseg.msl gen/pathtag_reduce.msl gen/pathtag_root.msl gen/tile_alloc.msl gen/transform_leaf.msl gen/transform_reduce.msl gen/transform_root.msl diff --git a/piet-gpu/shader/gen/kernel4_gray.dxil b/piet-gpu/shader/gen/kernel4_gray.dxil new file mode 100644 index 0000000..6ec6d57 Binary files /dev/null and b/piet-gpu/shader/gen/kernel4_gray.dxil differ diff --git a/piet-gpu/shader/gen/kernel4_gray.hlsl b/piet-gpu/shader/gen/kernel4_gray.hlsl new file mode 100644 index 0000000..7426758 --- /dev/null +++ b/piet-gpu/shader/gen/kernel4_gray.hlsl @@ -0,0 +1,688 @@ +struct Alloc +{ + uint offset; +}; + +struct CmdStrokeRef +{ + uint offset; +}; + +struct CmdStroke +{ + uint tile_ref; + float half_width; +}; + +struct CmdFillRef +{ + uint offset; +}; + +struct CmdFill +{ + uint tile_ref; + int backdrop; +}; + +struct CmdColorRef +{ + uint offset; +}; + +struct CmdColor +{ + uint rgba_color; +}; + +struct CmdLinGradRef +{ + uint offset; +}; + +struct CmdLinGrad +{ + uint index; + float line_x; + float line_y; + float line_c; +}; + +struct CmdImageRef +{ + uint offset; +}; + +struct CmdImage +{ + uint index; + int2 offset; +}; + +struct CmdAlphaRef +{ + uint offset; +}; + +struct CmdAlpha +{ + float alpha; +}; + +struct CmdJumpRef +{ + uint offset; +}; + +struct CmdJump +{ + uint new_ref; +}; + +struct CmdRef +{ + uint offset; +}; + +struct CmdTag +{ + uint tag; + uint flags; +}; + +struct TileSegRef +{ + uint offset; +}; + +struct TileSeg +{ + float2 origin; + float2 _vector; + float y_edge; + TileSegRef next; +}; + +struct Config +{ + uint n_elements; + uint n_pathseg; + uint width_in_tiles; + uint height_in_tiles; + Alloc tile_alloc; + Alloc bin_alloc; + Alloc ptcl_alloc; + Alloc pathseg_alloc; + Alloc anno_alloc; + Alloc trans_alloc; + Alloc bbox_alloc; + Alloc drawmonoid_alloc; + uint n_trans; + uint n_path; + uint trans_offset; + uint linewidth_offset; + uint pathtag_offset; + uint pathseg_offset; +}; + +static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); + +RWByteAddressBuffer _202 : register(u0, space0); +ByteAddressBuffer _723 : register(t1, space0); +RWTexture2D image_atlas : register(u3, space0); +RWTexture2D gradients : register(u4, space0); +RWTexture2D image : register(u2, space0); + +static uint3 gl_WorkGroupID; +static uint3 gl_LocalInvocationID; +struct SPIRV_Cross_Input +{ + uint3 gl_WorkGroupID : SV_GroupID; + uint3 gl_LocalInvocationID : SV_GroupThreadID; +}; + +uint spvPackUnorm4x8(float4 value) +{ + uint4 Packed = uint4(round(saturate(value) * 255.0)); + return Packed.x | (Packed.y << 8) | (Packed.z << 16) | (Packed.w << 24); +} + +float4 spvUnpackUnorm4x8(uint value) +{ + uint4 Packed = uint4(value & 0xff, (value >> 8) & 0xff, (value >> 16) & 0xff, value >> 24); + return float4(Packed) / 255.0; +} + +Alloc slice_mem(Alloc a, uint offset, uint size) +{ + Alloc _215 = { a.offset + offset }; + return _215; +} + +bool touch_mem(Alloc alloc, uint offset) +{ + return true; +} + +uint read_mem(Alloc alloc, uint offset) +{ + Alloc param = alloc; + uint param_1 = offset; + if (!touch_mem(param, param_1)) + { + return 0u; + } + uint v = _202.Load(offset * 4 + 8); + return v; +} + +CmdTag Cmd_tag(Alloc a, CmdRef ref) +{ + Alloc param = a; + uint param_1 = ref.offset >> uint(2); + uint tag_and_flags = read_mem(param, param_1); + CmdTag _432 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _432; +} + +CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3); + CmdStroke s; + s.tile_ref = raw0; + s.half_width = asfloat(raw1); + return s; +} + +CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) +{ + CmdStrokeRef _449 = { ref.offset + 4u }; + Alloc param = a; + CmdStrokeRef param_1 = _449; + return CmdStroke_read(param, param_1); +} + +Alloc new_alloc(uint offset, uint size, bool mem_ok) +{ + Alloc a; + a.offset = offset; + return a; +} + +TileSeg TileSeg_read(Alloc a, TileSegRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3); + Alloc param_4 = a; + uint param_5 = ix + 2u; + uint raw2 = read_mem(param_4, param_5); + Alloc param_6 = a; + uint param_7 = ix + 3u; + uint raw3 = read_mem(param_6, param_7); + Alloc param_8 = a; + uint param_9 = ix + 4u; + uint raw4 = read_mem(param_8, param_9); + Alloc param_10 = a; + uint param_11 = ix + 5u; + uint raw5 = read_mem(param_10, param_11); + TileSeg s; + s.origin = float2(asfloat(raw0), asfloat(raw1)); + s._vector = float2(asfloat(raw2), asfloat(raw3)); + s.y_edge = asfloat(raw4); + TileSegRef _572 = { raw5 }; + s.next = _572; + return s; +} + +uint2 chunk_offset(uint i) +{ + return uint2((i % 2u) * 8u, (i / 2u) * 4u); +} + +CmdFill CmdFill_read(Alloc a, CmdFillRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3); + CmdFill s; + s.tile_ref = raw0; + s.backdrop = int(raw1); + return s; +} + +CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) +{ + CmdFillRef _439 = { ref.offset + 4u }; + Alloc param = a; + CmdFillRef param_1 = _439; + return CmdFill_read(param, param_1); +} + +CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + CmdAlpha s; + s.alpha = asfloat(raw0); + return s; +} + +CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) +{ + CmdAlphaRef _459 = { ref.offset + 4u }; + Alloc param = a; + CmdAlphaRef param_1 = _459; + return CmdAlpha_read(param, param_1); +} + +CmdColor CmdColor_read(Alloc a, CmdColorRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + CmdColor s; + s.rgba_color = raw0; + return s; +} + +CmdColor Cmd_Color_read(Alloc a, CmdRef ref) +{ + CmdColorRef _469 = { ref.offset + 4u }; + Alloc param = a; + CmdColorRef param_1 = _469; + return CmdColor_read(param, param_1); +} + +float3 fromsRGB(float3 srgb) +{ + bool3 cutoff = bool3(srgb.x >= 0.040449999272823333740234375f.xxx.x, srgb.y >= 0.040449999272823333740234375f.xxx.y, srgb.z >= 0.040449999272823333740234375f.xxx.z); + float3 below = srgb / 12.9200000762939453125f.xxx; + float3 above = pow((srgb + 0.054999999701976776123046875f.xxx) / 1.05499994754791259765625f.xxx, 2.400000095367431640625f.xxx); + return float3(cutoff.x ? above.x : below.x, cutoff.y ? above.y : below.y, cutoff.z ? above.z : below.z); +} + +float4 unpacksRGB(uint srgba) +{ + float4 color = spvUnpackUnorm4x8(srgba).wzyx; + float3 param = color.xyz; + return float4(fromsRGB(param), color.w); +} + +CmdLinGrad CmdLinGrad_read(Alloc a, CmdLinGradRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3); + Alloc param_4 = a; + uint param_5 = ix + 2u; + uint raw2 = read_mem(param_4, param_5); + Alloc param_6 = a; + uint param_7 = ix + 3u; + uint raw3 = read_mem(param_6, param_7); + CmdLinGrad s; + s.index = raw0; + s.line_x = asfloat(raw1); + s.line_y = asfloat(raw2); + s.line_c = asfloat(raw3); + return s; +} + +CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) +{ + CmdLinGradRef _479 = { ref.offset + 4u }; + Alloc param = a; + CmdLinGradRef param_1 = _479; + return CmdLinGrad_read(param, param_1); +} + +CmdImage CmdImage_read(Alloc a, CmdImageRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3); + CmdImage s; + s.index = raw0; + s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); + return s; +} + +CmdImage Cmd_Image_read(Alloc a, CmdRef ref) +{ + CmdImageRef _489 = { ref.offset + 4u }; + Alloc param = a; + CmdImageRef param_1 = _489; + return CmdImage_read(param, param_1); +} + +void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) +{ + float4 rgba[8]; + for (uint i = 0u; i < 8u; i++) + { + uint param = i; + int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; + float4 fg_rgba = image_atlas[uv]; + float3 param_1 = fg_rgba.xyz; + float3 _695 = fromsRGB(param_1); + fg_rgba.x = _695.x; + fg_rgba.y = _695.y; + fg_rgba.z = _695.z; + rgba[i] = fg_rgba; + } + spvReturnValue = rgba; +} + +float3 tosRGB(float3 rgb) +{ + bool3 cutoff = bool3(rgb.x >= 0.003130800090730190277099609375f.xxx.x, rgb.y >= 0.003130800090730190277099609375f.xxx.y, rgb.z >= 0.003130800090730190277099609375f.xxx.z); + float3 below = 12.9200000762939453125f.xxx * rgb; + float3 above = (1.05499994754791259765625f.xxx * pow(rgb, 0.416660010814666748046875f.xxx)) - 0.054999999701976776123046875f.xxx; + return float3(cutoff.x ? above.x : below.x, cutoff.y ? above.y : below.y, cutoff.z ? above.z : below.z); +} + +uint packsRGB(inout float4 rgba) +{ + float3 param = rgba.xyz; + rgba = float4(tosRGB(param), rgba.w); + return spvPackUnorm4x8(rgba.wzyx); +} + +CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + CmdJump s; + s.new_ref = raw0; + return s; +} + +CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) +{ + CmdJumpRef _499 = { ref.offset + 4u }; + Alloc param = a; + CmdJumpRef param_1 = _499; + return CmdJump_read(param, param_1); +} + +void comp_main() +{ + uint tile_ix = (gl_WorkGroupID.y * _723.Load(8)) + gl_WorkGroupID.x; + Alloc _738; + _738.offset = _723.Load(24); + Alloc param; + param.offset = _738.offset; + uint param_1 = tile_ix * 1024u; + uint param_2 = 1024u; + Alloc cmd_alloc = slice_mem(param, param_1, param_2); + CmdRef _747 = { cmd_alloc.offset }; + CmdRef cmd_ref = _747; + uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); + float2 xy = float2(xy_uint); + float4 rgba[8]; + for (uint i = 0u; i < 8u; i++) + { + rgba[i] = 0.0f.xxxx; + } + uint clip_depth = 0u; + bool mem_ok = _202.Load(4) == 0u; + float df[8]; + TileSegRef tile_seg_ref; + float area[8]; + uint blend_stack[128][8]; + float blend_alpha_stack[128][8]; + while (mem_ok) + { + Alloc param_3 = cmd_alloc; + CmdRef param_4 = cmd_ref; + uint tag = Cmd_tag(param_3, param_4).tag; + if (tag == 0u) + { + break; + } + switch (tag) + { + case 2u: + { + Alloc param_5 = cmd_alloc; + CmdRef param_6 = cmd_ref; + CmdStroke stroke = Cmd_Stroke_read(param_5, param_6); + for (uint k = 0u; k < 8u; k++) + { + df[k] = 1000000000.0f; + } + TileSegRef _842 = { stroke.tile_ref }; + tile_seg_ref = _842; + do + { + uint param_7 = tile_seg_ref.offset; + uint param_8 = 24u; + bool param_9 = mem_ok; + Alloc param_10 = new_alloc(param_7, param_8, param_9); + TileSegRef param_11 = tile_seg_ref; + TileSeg seg = TileSeg_read(param_10, param_11); + float2 line_vec = seg._vector; + for (uint k_1 = 0u; k_1 < 8u; k_1++) + { + float2 dpos = (xy + 0.5f.xx) - seg.origin; + uint param_12 = k_1; + dpos += float2(chunk_offset(param_12)); + float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0f, 1.0f); + df[k_1] = min(df[k_1], length((line_vec * t) - dpos)); + } + tile_seg_ref = seg.next; + } while (tile_seg_ref.offset != 0u); + for (uint k_2 = 0u; k_2 < 8u; k_2++) + { + area[k_2] = clamp((stroke.half_width + 0.5f) - df[k_2], 0.0f, 1.0f); + } + cmd_ref.offset += 12u; + break; + } + case 1u: + { + Alloc param_13 = cmd_alloc; + CmdRef param_14 = cmd_ref; + CmdFill fill = Cmd_Fill_read(param_13, param_14); + for (uint k_3 = 0u; k_3 < 8u; k_3++) + { + area[k_3] = float(fill.backdrop); + } + TileSegRef _964 = { fill.tile_ref }; + tile_seg_ref = _964; + do + { + uint param_15 = tile_seg_ref.offset; + uint param_16 = 24u; + bool param_17 = mem_ok; + Alloc param_18 = new_alloc(param_15, param_16, param_17); + TileSegRef param_19 = tile_seg_ref; + TileSeg seg_1 = TileSeg_read(param_18, param_19); + for (uint k_4 = 0u; k_4 < 8u; k_4++) + { + uint param_20 = k_4; + float2 my_xy = xy + float2(chunk_offset(param_20)); + float2 start = seg_1.origin - my_xy; + float2 end = start + seg_1._vector; + float2 window = clamp(float2(start.y, end.y), 0.0f.xx, 1.0f.xx); + if (window.x != window.y) + { + float2 t_1 = (window - start.y.xx) / seg_1._vector.y.xx; + float2 xs = float2(lerp(start.x, end.x, t_1.x), lerp(start.x, end.x, t_1.y)); + float xmin = min(min(xs.x, xs.y), 1.0f) - 9.9999999747524270787835121154785e-07f; + float xmax = max(xs.x, xs.y); + float b = min(xmax, 1.0f); + float c = max(b, 0.0f); + float d = max(xmin, 0.0f); + float a = ((b + (0.5f * ((d * d) - (c * c)))) - xmin) / (xmax - xmin); + area[k_4] += (a * (window.x - window.y)); + } + area[k_4] += (sign(seg_1._vector.x) * clamp((my_xy.y - seg_1.y_edge) + 1.0f, 0.0f, 1.0f)); + } + tile_seg_ref = seg_1.next; + } while (tile_seg_ref.offset != 0u); + for (uint k_5 = 0u; k_5 < 8u; k_5++) + { + area[k_5] = min(abs(area[k_5]), 1.0f); + } + cmd_ref.offset += 12u; + break; + } + case 3u: + { + for (uint k_6 = 0u; k_6 < 8u; k_6++) + { + area[k_6] = 1.0f; + } + cmd_ref.offset += 4u; + break; + } + case 4u: + { + Alloc param_21 = cmd_alloc; + CmdRef param_22 = cmd_ref; + CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22); + for (uint k_7 = 0u; k_7 < 8u; k_7++) + { + area[k_7] = alpha.alpha; + } + cmd_ref.offset += 8u; + break; + } + case 5u: + { + Alloc param_23 = cmd_alloc; + CmdRef param_24 = cmd_ref; + CmdColor color = Cmd_Color_read(param_23, param_24); + uint param_25 = color.rgba_color; + float4 fg = unpacksRGB(param_25); + for (uint k_8 = 0u; k_8 < 8u; k_8++) + { + float4 fg_k = fg * area[k_8]; + rgba[k_8] = (rgba[k_8] * (1.0f - fg_k.w)) + fg_k; + } + cmd_ref.offset += 8u; + break; + } + case 6u: + { + Alloc param_26 = cmd_alloc; + CmdRef param_27 = cmd_ref; + CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27); + float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; + for (uint k_9 = 0u; k_9 < 8u; k_9++) + { + uint param_28 = k_9; + float2 chunk_xy = float2(chunk_offset(param_28)); + float my_d = (d_1 + (lin.line_x * chunk_xy.x)) + (lin.line_y * chunk_xy.y); + int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); + float4 fg_rgba = gradients[int2(x, int(lin.index))]; + float3 param_29 = fg_rgba.xyz; + float3 _1298 = fromsRGB(param_29); + fg_rgba.x = _1298.x; + fg_rgba.y = _1298.y; + fg_rgba.z = _1298.z; + rgba[k_9] = fg_rgba; + } + cmd_ref.offset += 20u; + break; + } + case 7u: + { + Alloc param_30 = cmd_alloc; + CmdRef param_31 = cmd_ref; + CmdImage fill_img = Cmd_Image_read(param_30, param_31); + uint2 param_32 = xy_uint; + CmdImage param_33 = fill_img; + float4 _1327[8]; + fillImage(_1327, param_32, param_33); + float4 img[8] = _1327; + for (uint k_10 = 0u; k_10 < 8u; k_10++) + { + float4 fg_k_1 = img[k_10] * area[k_10]; + rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_1.w)) + fg_k_1; + } + cmd_ref.offset += 12u; + break; + } + case 8u: + { + for (uint k_11 = 0u; k_11 < 8u; k_11++) + { + uint d_2 = min(clip_depth, 127u); + float4 param_34 = float4(rgba[k_11]); + uint _1390 = packsRGB(param_34); + blend_stack[d_2][k_11] = _1390; + blend_alpha_stack[d_2][k_11] = clamp(abs(area[k_11]), 0.0f, 1.0f); + rgba[k_11] = 0.0f.xxxx; + } + clip_depth++; + cmd_ref.offset += 4u; + break; + } + case 9u: + { + clip_depth--; + for (uint k_12 = 0u; k_12 < 8u; k_12++) + { + uint d_3 = min(clip_depth, 127u); + uint param_35 = blend_stack[d_3][k_12]; + float4 bg = unpacksRGB(param_35); + float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12]; + rgba[k_12] = (bg * (1.0f - fg_1.w)) + fg_1; + } + cmd_ref.offset += 4u; + break; + } + case 10u: + { + Alloc param_36 = cmd_alloc; + CmdRef param_37 = cmd_ref; + CmdRef _1469 = { Cmd_Jump_read(param_36, param_37).new_ref }; + cmd_ref = _1469; + cmd_alloc.offset = cmd_ref.offset; + break; + } + } + } + for (uint i_1 = 0u; i_1 < 8u; i_1++) + { + uint param_38 = i_1; + image[int2(xy_uint + chunk_offset(param_38))] = rgba[i_1].w.x; + } +} + +[numthreads(8, 4, 1)] +void main(SPIRV_Cross_Input stage_input) +{ + gl_WorkGroupID = stage_input.gl_WorkGroupID; + gl_LocalInvocationID = stage_input.gl_LocalInvocationID; + comp_main(); +} diff --git a/piet-gpu/shader/gen/kernel4_gray.msl b/piet-gpu/shader/gen/kernel4_gray.msl new file mode 100644 index 0000000..e672020 --- /dev/null +++ b/piet-gpu/shader/gen/kernel4_gray.msl @@ -0,0 +1,727 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wmissing-braces" + +#include +#include + +using namespace metal; + +template +struct spvUnsafeArray +{ + T elements[Num ? Num : 1]; + + thread T& operator [] (size_t pos) thread + { + return elements[pos]; + } + constexpr const thread T& operator [] (size_t pos) const thread + { + return elements[pos]; + } + + device T& operator [] (size_t pos) device + { + return elements[pos]; + } + constexpr const device T& operator [] (size_t pos) const device + { + return elements[pos]; + } + + constexpr const constant T& operator [] (size_t pos) const constant + { + return elements[pos]; + } + + threadgroup T& operator [] (size_t pos) threadgroup + { + return elements[pos]; + } + constexpr const threadgroup T& operator [] (size_t pos) const threadgroup + { + return elements[pos]; + } +}; + +struct Alloc +{ + uint offset; +}; + +struct CmdStrokeRef +{ + uint offset; +}; + +struct CmdStroke +{ + uint tile_ref; + float half_width; +}; + +struct CmdFillRef +{ + uint offset; +}; + +struct CmdFill +{ + uint tile_ref; + int backdrop; +}; + +struct CmdColorRef +{ + uint offset; +}; + +struct CmdColor +{ + uint rgba_color; +}; + +struct CmdLinGradRef +{ + uint offset; +}; + +struct CmdLinGrad +{ + uint index; + float line_x; + float line_y; + float line_c; +}; + +struct CmdImageRef +{ + uint offset; +}; + +struct CmdImage +{ + uint index; + int2 offset; +}; + +struct CmdAlphaRef +{ + uint offset; +}; + +struct CmdAlpha +{ + float alpha; +}; + +struct CmdJumpRef +{ + uint offset; +}; + +struct CmdJump +{ + uint new_ref; +}; + +struct CmdRef +{ + uint offset; +}; + +struct CmdTag +{ + uint tag; + uint flags; +}; + +struct TileSegRef +{ + uint offset; +}; + +struct TileSeg +{ + float2 origin; + float2 vector; + float y_edge; + TileSegRef next; +}; + +struct Memory +{ + uint mem_offset; + uint mem_error; + uint memory[1]; +}; + +struct Alloc_1 +{ + uint offset; +}; + +struct Config +{ + uint n_elements; + uint n_pathseg; + uint width_in_tiles; + uint height_in_tiles; + Alloc_1 tile_alloc; + Alloc_1 bin_alloc; + Alloc_1 ptcl_alloc; + Alloc_1 pathseg_alloc; + Alloc_1 anno_alloc; + Alloc_1 trans_alloc; + Alloc_1 bbox_alloc; + Alloc_1 drawmonoid_alloc; + uint n_trans; + uint n_path; + uint trans_offset; + uint linewidth_offset; + uint pathtag_offset; + uint pathseg_offset; +}; + +struct ConfigBuf +{ + Config conf; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 4u, 1u); + +static inline __attribute__((always_inline)) +Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const uint& size) +{ + return Alloc{ a.offset + offset }; +} + +static inline __attribute__((always_inline)) +bool touch_mem(thread const Alloc& alloc, thread const uint& offset) +{ + return true; +} + +static inline __attribute__((always_inline)) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_202) +{ + Alloc param = alloc; + uint param_1 = offset; + if (!touch_mem(param, param_1)) + { + return 0u; + } + uint v = v_202.memory[offset]; + return v; +} + +static inline __attribute__((always_inline)) +CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +{ + Alloc param = a; + uint param_1 = ref.offset >> uint(2); + uint tag_and_flags = read_mem(param, param_1, v_202); + return CmdTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; +} + +static inline __attribute__((always_inline)) +CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_202) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1, v_202); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3, v_202); + CmdStroke s; + s.tile_ref = raw0; + s.half_width = as_type(raw1); + return s; +} + +static inline __attribute__((always_inline)) +CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +{ + Alloc param = a; + CmdStrokeRef param_1 = CmdStrokeRef{ ref.offset + 4u }; + return CmdStroke_read(param, param_1, v_202); +} + +static inline __attribute__((always_inline)) +Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok) +{ + Alloc a; + a.offset = offset; + return a; +} + +static inline __attribute__((always_inline)) +TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_202) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1, v_202); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3, v_202); + Alloc param_4 = a; + uint param_5 = ix + 2u; + uint raw2 = read_mem(param_4, param_5, v_202); + Alloc param_6 = a; + uint param_7 = ix + 3u; + uint raw3 = read_mem(param_6, param_7, v_202); + Alloc param_8 = a; + uint param_9 = ix + 4u; + uint raw4 = read_mem(param_8, param_9, v_202); + Alloc param_10 = a; + uint param_11 = ix + 5u; + uint raw5 = read_mem(param_10, param_11, v_202); + TileSeg s; + s.origin = float2(as_type(raw0), as_type(raw1)); + s.vector = float2(as_type(raw2), as_type(raw3)); + s.y_edge = as_type(raw4); + s.next = TileSegRef{ raw5 }; + return s; +} + +static inline __attribute__((always_inline)) +uint2 chunk_offset(thread const uint& i) +{ + return uint2((i % 2u) * 8u, (i / 2u) * 4u); +} + +static inline __attribute__((always_inline)) +CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_202) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1, v_202); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3, v_202); + CmdFill s; + s.tile_ref = raw0; + s.backdrop = int(raw1); + return s; +} + +static inline __attribute__((always_inline)) +CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +{ + Alloc param = a; + CmdFillRef param_1 = CmdFillRef{ ref.offset + 4u }; + return CmdFill_read(param, param_1, v_202); +} + +static inline __attribute__((always_inline)) +CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_202) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1, v_202); + CmdAlpha s; + s.alpha = as_type(raw0); + return s; +} + +static inline __attribute__((always_inline)) +CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +{ + Alloc param = a; + CmdAlphaRef param_1 = CmdAlphaRef{ ref.offset + 4u }; + return CmdAlpha_read(param, param_1, v_202); +} + +static inline __attribute__((always_inline)) +CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_202) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1, v_202); + CmdColor s; + s.rgba_color = raw0; + return s; +} + +static inline __attribute__((always_inline)) +CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +{ + Alloc param = a; + CmdColorRef param_1 = CmdColorRef{ ref.offset + 4u }; + return CmdColor_read(param, param_1, v_202); +} + +static inline __attribute__((always_inline)) +float3 fromsRGB(thread const float3& srgb) +{ + bool3 cutoff = srgb >= float3(0.040449999272823333740234375); + float3 below = srgb / float3(12.9200000762939453125); + float3 above = pow((srgb + float3(0.054999999701976776123046875)) / float3(1.05499994754791259765625), float3(2.400000095367431640625)); + return select(below, above, cutoff); +} + +static inline __attribute__((always_inline)) +float4 unpacksRGB(thread const uint& srgba) +{ + float4 color = unpack_unorm4x8_to_float(srgba).wzyx; + float3 param = color.xyz; + return float4(fromsRGB(param), color.w); +} + +static inline __attribute__((always_inline)) +CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_202) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1, v_202); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3, v_202); + Alloc param_4 = a; + uint param_5 = ix + 2u; + uint raw2 = read_mem(param_4, param_5, v_202); + Alloc param_6 = a; + uint param_7 = ix + 3u; + uint raw3 = read_mem(param_6, param_7, v_202); + CmdLinGrad s; + s.index = raw0; + s.line_x = as_type(raw1); + s.line_y = as_type(raw2); + s.line_c = as_type(raw3); + return s; +} + +static inline __attribute__((always_inline)) +CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +{ + Alloc param = a; + CmdLinGradRef param_1 = CmdLinGradRef{ ref.offset + 4u }; + return CmdLinGrad_read(param, param_1, v_202); +} + +static inline __attribute__((always_inline)) +CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_202) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1, v_202); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3, v_202); + CmdImage s; + s.index = raw0; + s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); + return s; +} + +static inline __attribute__((always_inline)) +CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +{ + Alloc param = a; + CmdImageRef param_1 = CmdImageRef{ ref.offset + 4u }; + return CmdImage_read(param, param_1, v_202); +} + +static inline __attribute__((always_inline)) +spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImage& cmd_img, thread texture2d image_atlas) +{ + spvUnsafeArray rgba; + for (uint i = 0u; i < 8u; i++) + { + uint param = i; + int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; + float4 fg_rgba = image_atlas.read(uint2(uv)); + float3 param_1 = fg_rgba.xyz; + float3 _695 = fromsRGB(param_1); + fg_rgba.x = _695.x; + fg_rgba.y = _695.y; + fg_rgba.z = _695.z; + rgba[i] = fg_rgba; + } + return rgba; +} + +static inline __attribute__((always_inline)) +float3 tosRGB(thread const float3& rgb) +{ + bool3 cutoff = rgb >= float3(0.003130800090730190277099609375); + float3 below = float3(12.9200000762939453125) * rgb; + float3 above = (float3(1.05499994754791259765625) * pow(rgb, float3(0.416660010814666748046875))) - float3(0.054999999701976776123046875); + return select(below, above, cutoff); +} + +static inline __attribute__((always_inline)) +uint packsRGB(thread float4& rgba) +{ + float3 param = rgba.xyz; + rgba = float4(tosRGB(param), rgba.w); + return pack_float_to_unorm4x8(rgba.wzyx); +} + +static inline __attribute__((always_inline)) +CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_202) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1, v_202); + CmdJump s; + s.new_ref = raw0; + return s; +} + +static inline __attribute__((always_inline)) +CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_202) +{ + Alloc param = a; + CmdJumpRef param_1 = CmdJumpRef{ ref.offset + 4u }; + return CmdJump_read(param, param_1, v_202); +} + +kernel void main0(device Memory& v_202 [[buffer(0)]], const device ConfigBuf& _723 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +{ + uint tile_ix = (gl_WorkGroupID.y * _723.conf.width_in_tiles) + gl_WorkGroupID.x; + Alloc param; + param.offset = _723.conf.ptcl_alloc.offset; + uint param_1 = tile_ix * 1024u; + uint param_2 = 1024u; + Alloc cmd_alloc = slice_mem(param, param_1, param_2); + CmdRef cmd_ref = CmdRef{ cmd_alloc.offset }; + uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); + float2 xy = float2(xy_uint); + spvUnsafeArray rgba; + for (uint i = 0u; i < 8u; i++) + { + rgba[i] = float4(0.0); + } + uint clip_depth = 0u; + bool mem_ok = v_202.mem_error == 0u; + spvUnsafeArray df; + TileSegRef tile_seg_ref; + spvUnsafeArray area; + spvUnsafeArray, 128> blend_stack; + spvUnsafeArray, 128> blend_alpha_stack; + while (mem_ok) + { + Alloc param_3 = cmd_alloc; + CmdRef param_4 = cmd_ref; + uint tag = Cmd_tag(param_3, param_4, v_202).tag; + if (tag == 0u) + { + break; + } + switch (tag) + { + case 2u: + { + Alloc param_5 = cmd_alloc; + CmdRef param_6 = cmd_ref; + CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_202); + for (uint k = 0u; k < 8u; k++) + { + df[k] = 1000000000.0; + } + tile_seg_ref = TileSegRef{ stroke.tile_ref }; + do + { + uint param_7 = tile_seg_ref.offset; + uint param_8 = 24u; + bool param_9 = mem_ok; + Alloc param_10 = new_alloc(param_7, param_8, param_9); + TileSegRef param_11 = tile_seg_ref; + TileSeg seg = TileSeg_read(param_10, param_11, v_202); + float2 line_vec = seg.vector; + for (uint k_1 = 0u; k_1 < 8u; k_1++) + { + float2 dpos = (xy + float2(0.5)) - seg.origin; + uint param_12 = k_1; + dpos += float2(chunk_offset(param_12)); + float t = fast::clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0); + df[k_1] = fast::min(df[k_1], length((line_vec * t) - dpos)); + } + tile_seg_ref = seg.next; + } while (tile_seg_ref.offset != 0u); + for (uint k_2 = 0u; k_2 < 8u; k_2++) + { + area[k_2] = fast::clamp((stroke.half_width + 0.5) - df[k_2], 0.0, 1.0); + } + cmd_ref.offset += 12u; + break; + } + case 1u: + { + Alloc param_13 = cmd_alloc; + CmdRef param_14 = cmd_ref; + CmdFill fill = Cmd_Fill_read(param_13, param_14, v_202); + for (uint k_3 = 0u; k_3 < 8u; k_3++) + { + area[k_3] = float(fill.backdrop); + } + tile_seg_ref = TileSegRef{ fill.tile_ref }; + do + { + uint param_15 = tile_seg_ref.offset; + uint param_16 = 24u; + bool param_17 = mem_ok; + Alloc param_18 = new_alloc(param_15, param_16, param_17); + TileSegRef param_19 = tile_seg_ref; + TileSeg seg_1 = TileSeg_read(param_18, param_19, v_202); + for (uint k_4 = 0u; k_4 < 8u; k_4++) + { + uint param_20 = k_4; + float2 my_xy = xy + float2(chunk_offset(param_20)); + float2 start = seg_1.origin - my_xy; + float2 end = start + seg_1.vector; + float2 window = fast::clamp(float2(start.y, end.y), float2(0.0), float2(1.0)); + if ((isunordered(window.x, window.y) || window.x != window.y)) + { + float2 t_1 = (window - float2(start.y)) / float2(seg_1.vector.y); + float2 xs = float2(mix(start.x, end.x, t_1.x), mix(start.x, end.x, t_1.y)); + float xmin = fast::min(fast::min(xs.x, xs.y), 1.0) - 9.9999999747524270787835121154785e-07; + float xmax = fast::max(xs.x, xs.y); + float b = fast::min(xmax, 1.0); + float c = fast::max(b, 0.0); + float d = fast::max(xmin, 0.0); + float a = ((b + (0.5 * ((d * d) - (c * c)))) - xmin) / (xmax - xmin); + area[k_4] += (a * (window.x - window.y)); + } + area[k_4] += (sign(seg_1.vector.x) * fast::clamp((my_xy.y - seg_1.y_edge) + 1.0, 0.0, 1.0)); + } + tile_seg_ref = seg_1.next; + } while (tile_seg_ref.offset != 0u); + for (uint k_5 = 0u; k_5 < 8u; k_5++) + { + area[k_5] = fast::min(abs(area[k_5]), 1.0); + } + cmd_ref.offset += 12u; + break; + } + case 3u: + { + for (uint k_6 = 0u; k_6 < 8u; k_6++) + { + area[k_6] = 1.0; + } + cmd_ref.offset += 4u; + break; + } + case 4u: + { + Alloc param_21 = cmd_alloc; + CmdRef param_22 = cmd_ref; + CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_202); + for (uint k_7 = 0u; k_7 < 8u; k_7++) + { + area[k_7] = alpha.alpha; + } + cmd_ref.offset += 8u; + break; + } + case 5u: + { + Alloc param_23 = cmd_alloc; + CmdRef param_24 = cmd_ref; + CmdColor color = Cmd_Color_read(param_23, param_24, v_202); + uint param_25 = color.rgba_color; + float4 fg = unpacksRGB(param_25); + for (uint k_8 = 0u; k_8 < 8u; k_8++) + { + float4 fg_k = fg * area[k_8]; + rgba[k_8] = (rgba[k_8] * (1.0 - fg_k.w)) + fg_k; + } + cmd_ref.offset += 8u; + break; + } + case 6u: + { + Alloc param_26 = cmd_alloc; + CmdRef param_27 = cmd_ref; + CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_202); + float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; + for (uint k_9 = 0u; k_9 < 8u; k_9++) + { + uint param_28 = k_9; + float2 chunk_xy = float2(chunk_offset(param_28)); + float my_d = (d_1 + (lin.line_x * chunk_xy.x)) + (lin.line_y * chunk_xy.y); + int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); + float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); + float3 param_29 = fg_rgba.xyz; + float3 _1298 = fromsRGB(param_29); + fg_rgba.x = _1298.x; + fg_rgba.y = _1298.y; + fg_rgba.z = _1298.z; + rgba[k_9] = fg_rgba; + } + cmd_ref.offset += 20u; + break; + } + case 7u: + { + Alloc param_30 = cmd_alloc; + CmdRef param_31 = cmd_ref; + CmdImage fill_img = Cmd_Image_read(param_30, param_31, v_202); + uint2 param_32 = xy_uint; + CmdImage param_33 = fill_img; + spvUnsafeArray img; + img = fillImage(param_32, param_33, image_atlas); + for (uint k_10 = 0u; k_10 < 8u; k_10++) + { + float4 fg_k_1 = img[k_10] * area[k_10]; + rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_1.w)) + fg_k_1; + } + cmd_ref.offset += 12u; + break; + } + case 8u: + { + for (uint k_11 = 0u; k_11 < 8u; k_11++) + { + uint d_2 = min(clip_depth, 127u); + float4 param_34 = float4(rgba[k_11]); + uint _1390 = packsRGB(param_34); + blend_stack[d_2][k_11] = _1390; + blend_alpha_stack[d_2][k_11] = fast::clamp(abs(area[k_11]), 0.0, 1.0); + rgba[k_11] = float4(0.0); + } + clip_depth++; + cmd_ref.offset += 4u; + break; + } + case 9u: + { + clip_depth--; + for (uint k_12 = 0u; k_12 < 8u; k_12++) + { + uint d_3 = min(clip_depth, 127u); + uint param_35 = blend_stack[d_3][k_12]; + float4 bg = unpacksRGB(param_35); + float4 fg_1 = (rgba[k_12] * area[k_12]) * blend_alpha_stack[d_3][k_12]; + rgba[k_12] = (bg * (1.0 - fg_1.w)) + fg_1; + } + cmd_ref.offset += 4u; + break; + } + case 10u: + { + Alloc param_36 = cmd_alloc; + CmdRef param_37 = cmd_ref; + cmd_ref = CmdRef{ Cmd_Jump_read(param_36, param_37, v_202).new_ref }; + cmd_alloc.offset = cmd_ref.offset; + break; + } + } + } + for (uint i_1 = 0u; i_1 < 8u; i_1++) + { + uint param_38 = i_1; + image.write(float4(rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_38)))); + } +} + diff --git a/piet-gpu/shader/gen/kernel4_gray.spv b/piet-gpu/shader/gen/kernel4_gray.spv new file mode 100644 index 0000000..61e5b1c Binary files /dev/null and b/piet-gpu/shader/gen/kernel4_gray.spv differ diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index c9c3fed..9aba204 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -23,7 +23,11 @@ layout(set = 0, binding = 1) restrict readonly buffer ConfigBuf { Config conf; }; +#ifdef GRAY +layout(r8, set = 0, binding = 2) uniform restrict writeonly image2D image; +#else layout(rgba8, set = 0, binding = 2) uniform restrict writeonly image2D image; +#endif layout(rgba8, set = 0, binding = 3) uniform restrict readonly image2D image_atlas; @@ -231,6 +235,12 @@ void main() { } for (uint i = 0; i < CHUNK; i++) { +#ifdef GRAY + // Just store the alpha value; later we can specialize this kernel more to avoid + // computing unneeded RGB colors. + imageStore(image, ivec2(xy_uint + chunk_offset(i)), vec4(rgba[i].a)); +#else imageStore(image, ivec2(xy_uint + chunk_offset(i)), vec4(tosRGB(rgba[i].rgb), rgba[i].a)); +#endif } } diff --git a/piet-gpu/src/glyph_render.rs b/piet-gpu/src/glyph_render.rs new file mode 100644 index 0000000..8f4c626 --- /dev/null +++ b/piet-gpu/src/glyph_render.rs @@ -0,0 +1,87 @@ +// Copyright 2022 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +//! An experimental API for glyph rendering. + +use piet::{kurbo::Affine, RenderContext}; +use swash::{scale::ScaleContext, CacheKey, FontDataRef}; + +use crate::{encoder::GlyphEncoder, PietGpuRenderContext}; + +pub struct GlyphRenderer { + pub render_ctx: PietGpuRenderContext, + scale_context: ScaleContext, +} + +#[repr(transparent)] +pub struct FontId(CacheKey); + +impl GlyphRenderer { + pub fn new() -> GlyphRenderer { + let render_ctx = PietGpuRenderContext::new(); + let scale_context = ScaleContext::new(); + GlyphRenderer { + render_ctx, + scale_context, + } + } + + pub unsafe fn add_glyph( + &mut self, + font_data: &[u8], + font_id: u64, + glyph_id: u16, + transform: [f32; 6], + ) { + // This transmute is dodgy because the definition in swash isn't repr(transparent). + // I think the best solution is to have a from_u64 method, but we'll work that out + // later. + let font_id = FontId(std::mem::transmute(font_id)); + let encoder = self.make_glyph(font_data, font_id, glyph_id); + const DEFAULT_UPEM: u16 = 2048; + let affine = Affine::new([ + transform[0] as f64, + transform[1] as f64, + transform[2] as f64, + transform[3] as f64, + transform[4] as f64, + transform[5] as f64, + ]) * Affine::scale(1.0 / DEFAULT_UPEM as f64); + self.render_ctx.transform(affine); + self.render_ctx.encode_glyph(&encoder); + // TODO: don't fill glyph if RGBA + self.render_ctx.fill_glyph(0xff_ff_ff_ff); + self.render_ctx.transform(affine.inverse()); + } + + pub fn reset(&mut self) { + self.render_ctx = PietGpuRenderContext::new(); + } + + fn make_glyph(&mut self, font_data: &[u8], font_id: FontId, glyph_id: u16) -> GlyphEncoder { + let mut encoder = GlyphEncoder::default(); + let font_data = FontDataRef::new(font_data).expect("invalid font"); + let mut font_ref = font_data.get(0).expect("invalid font index"); + font_ref.key = font_id.0; + let mut scaler = self.scale_context.builder(font_ref).size(2048.).build(); + if let Some(outline) = scaler.scale_outline(glyph_id) { + crate::text::append_outline(&mut encoder, outline.verbs(), outline.points()); + } else { + println!("failed to scale"); + } + encoder + } +} diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index aa06c3f..97e1f28 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -1,4 +1,5 @@ mod encoder; +pub mod glyph_render; mod gradient; mod pico_svg; mod render_ctx; @@ -48,6 +49,18 @@ pub fn dump_k1_data(k1_buf: &[u32]) { } } +pub struct RenderConfig { + width: usize, + height: usize, + format: PixelFormat, +} + +// Should we just use the enum from piet-gpu-hal? +pub enum PixelFormat { + A8, + Rgba8, +} + pub struct Renderer { // These sizes are aligned to tile boundaries, though at some point // we'll want to have a good strategy for dealing with odd sizes. @@ -105,15 +118,41 @@ pub struct Renderer { gradients: Image, } +impl RenderConfig { + pub fn new(width: usize, height: usize) -> RenderConfig { + RenderConfig { + width, + height, + format: PixelFormat::Rgba8, + } + } + + pub fn pixel_format(mut self, format: PixelFormat) -> Self { + self.format = format; + self + } +} + impl Renderer { - /// Create a new renderer. pub unsafe fn new( session: &Session, width: usize, height: usize, n_bufs: usize, + ) -> Result { + let config = RenderConfig::new(width, height); + Self::new_from_config(session, config, n_bufs) + } + + /// Create a new renderer. + pub unsafe fn new_from_config( + session: &Session, + config: RenderConfig, + n_bufs: usize, ) -> Result { // For now, round up to tile alignment + let width = config.width; + let height = config.height; let width = width + (width.wrapping_neg() & (TILE_W - 1)); let height = height + (height.wrapping_neg() & (TILE_W - 1)); let dev = BufferUsage::STORAGE | BufferUsage::COPY_DST; @@ -125,7 +164,11 @@ impl Renderer { .map(|_| session.create_buffer(8 * 1024 * 1024, host_upload).unwrap()) .collect::>(); - let image_dev = session.create_image2d(width as u32, height as u32)?; + let image_format = match config.format { + PixelFormat::A8 => piet_gpu_hal::ImageFormat::A8, + PixelFormat::Rgba8 => piet_gpu_hal::ImageFormat::Rgba8, + }; + let image_dev = session.create_image2d(width as u32, height as u32, image_format)?; // Note: this must be updated when the config struct size changes. const CONFIG_BUFFER_SIZE: u64 = std::mem::size_of::() as u64; @@ -210,7 +253,10 @@ impl Renderer { .collect(); let gradients = Self::make_gradient_image(&session); - let k4_code = include_shader!(session, "../shader/gen/kernel4"); + let k4_code = match config.format { + PixelFormat::A8 => include_shader!(session, "../shader/gen/kernel4_gray"), + PixelFormat::Rgba8 => include_shader!(session, "../shader/gen/kernel4"), + }; let k4_pipeline = session.create_compute_pipeline( k4_code, &[ @@ -441,7 +487,8 @@ impl Renderer { return Err("unsupported image format".into()); } let buffer = session.create_buffer_init(&buf, BufferUsage::COPY_SRC)?; - let image = session.create_image2d(width.try_into()?, height.try_into()?)?; + const RGBA: piet_gpu_hal::ImageFormat = piet_gpu_hal::ImageFormat::Rgba8; + let image = session.create_image2d(width.try_into()?, height.try_into()?, RGBA)?; let mut cmd_buf = session.cmd_buf()?; cmd_buf.begin(); cmd_buf.image_barrier(&image, ImageLayout::Undefined, ImageLayout::BlitDst); @@ -477,8 +524,13 @@ impl Renderer { fn make_gradient_image(session: &Session) -> Image { unsafe { + const RGBA: piet_gpu_hal::ImageFormat = piet_gpu_hal::ImageFormat::Rgba8; session - .create_image2d(gradient::N_SAMPLES as u32, gradient::N_GRADIENTS as u32) + .create_image2d( + gradient::N_SAMPLES as u32, + gradient::N_GRADIENTS as u32, + RGBA, + ) .unwrap() } } diff --git a/piet-gpu/src/text.rs b/piet-gpu/src/text.rs index dec3ffa..0fb508b 100644 --- a/piet-gpu/src/text.rs +++ b/piet-gpu/src/text.rs @@ -260,7 +260,7 @@ impl TextLayoutBuilder for PietGpuTextLayoutBuilder { } } -fn append_outline(encoder: &mut GlyphEncoder, verbs: &[Verb], points: &[Vector]) { +pub(crate) fn append_outline(encoder: &mut GlyphEncoder, verbs: &[Verb], points: &[Vector]) { let mut path_encoder = encoder.path_encoder(); let mut i = 0; for verb in verbs {