diff --git a/piet-gpu-hal/src/hub.rs b/piet-gpu-hal/src/hub.rs index 53eb0c5..a52040f 100644 --- a/piet-gpu-hal/src/hub.rs +++ b/piet-gpu-hal/src/hub.rs @@ -360,9 +360,9 @@ impl DescriptorSetBuilder { self } - pub fn add_textures<'a>(mut self, images: impl IntoRefs<'a, Image>, sampler: &Sampler) -> Self { + pub fn add_textures<'a>(mut self, images: impl IntoRefs<'a, Image>) -> Self { let vk_images = images.into_refs().map(|i| i.vk_image()).collect::>(); - self.0.add_textures(&vk_images, sampler); + self.0.add_textures(&vk_images); self } diff --git a/piet-gpu-hal/src/lib.rs b/piet-gpu-hal/src/lib.rs index 5d01351..2848774 100644 --- a/piet-gpu-hal/src/lib.rs +++ b/piet-gpu-hal/src/lib.rs @@ -247,6 +247,6 @@ pub trait DescriptorSetBuilder { /// /// The same sampler is used for all textures, which is not very sophisticated; /// we should have a way to vary the sampler. - fn add_textures(&mut self, images: &[&D::Image], sampler: &D::Sampler); + fn add_textures(&mut self, images: &[&D::Image]); unsafe fn build(self, device: &D, pipeline: &D::Pipeline) -> Result; } diff --git a/piet-gpu-hal/src/vulkan.rs b/piet-gpu-hal/src/vulkan.rs index aa7972d..19b8dbe 100644 --- a/piet-gpu-hal/src/vulkan.rs +++ b/piet-gpu-hal/src/vulkan.rs @@ -209,6 +209,7 @@ impl VkInstance { for extension in surface_extensions { exts.push(extension.as_ptr()); } + exts.push(vk::KhrGetPhysicalDeviceProperties2Fn::name().as_ptr()); let instance = entry.create_instance( &vk::InstanceCreateInfo::builder() @@ -280,15 +281,16 @@ impl VkInstance { // support for descriptor indexing (maybe should be optional for compatibility) let descriptor_indexing = vk::PhysicalDeviceDescriptorIndexingFeatures::builder() + .shader_storage_image_array_non_uniform_indexing(true) .descriptor_binding_variable_descriptor_count(true) .runtime_descriptor_array(true); - let extensions = match surface { + let mut extensions = match surface { Some(_) => vec![khr::Swapchain::name().as_ptr()], None => vec![], }; - //extensions.push(vk::KhrMaintenance3Fn::name().as_ptr()); - //extensions.push(vk::ExtDescriptorIndexingFn::name().as_ptr()); + extensions.push(vk::ExtDescriptorIndexingFn::name().as_ptr()); + extensions.push(vk::KhrMaintenance3Fn::name().as_ptr()); let create_info = vk::DeviceCreateInfo::builder() .queue_create_infos(&queue_create_infos) .enabled_extension_names(&extensions) @@ -463,8 +465,7 @@ impl crate::Device for VkDevice { // want to add sampling for images and so on. let usage = vk::ImageUsageFlags::STORAGE | vk::ImageUsageFlags::TRANSFER_SRC - | vk::ImageUsageFlags::TRANSFER_DST - | vk::ImageUsageFlags::SAMPLED; + | vk::ImageUsageFlags::TRANSFER_DST; let image = device.create_image( &vk::ImageCreateInfo::builder() .image_type(vk::ImageType::TYPE_2D) @@ -994,8 +995,7 @@ impl crate::PipelineBuilder for PipelineBuilder { self.bindings.push( vk::DescriptorSetLayoutBinding::builder() .binding(start) - // TODO: we do want these to be sampled images - .descriptor_type(vk::DescriptorType::COMBINED_IMAGE_SAMPLER) + .descriptor_type(vk::DescriptorType::STORAGE_IMAGE) .descriptor_count(max_textures) .stage_flags(vk::ShaderStageFlags::COMPUTE) .build(), @@ -1068,9 +1068,8 @@ impl crate::DescriptorSetBuilder for DescriptorSetBuilder { self.images.extend(images.iter().map(|i| i.image_view)); } - fn add_textures(&mut self, images: &[&Image], sampler: &vk::Sampler) { + fn add_textures(&mut self, images: &[&Image]) { self.textures.extend(images.iter().map(|i| i.image_view)); - self.sampler = *sampler; } unsafe fn build(self, device: &VkDevice, pipeline: &Pipeline) -> Result { @@ -1095,7 +1094,7 @@ impl crate::DescriptorSetBuilder for DescriptorSetBuilder { if pipeline.max_textures > 0 { descriptor_pool_sizes.push( vk::DescriptorPoolSize::builder() - .ty(vk::DescriptorType::COMBINED_IMAGE_SAMPLER) + .ty(vk::DescriptorType::STORAGE_IMAGE) .descriptor_count(pipeline.max_textures) .build(), ); @@ -1107,11 +1106,16 @@ impl crate::DescriptorSetBuilder for DescriptorSetBuilder { None, )?; let descriptor_set_layouts = [pipeline.descriptor_set_layout]; + + let counts = &[pipeline.max_textures]; + let variable_info = vk::DescriptorSetVariableDescriptorCountAllocateInfo::builder() + .descriptor_counts(counts); let descriptor_sets = device .allocate_descriptor_sets( &vk::DescriptorSetAllocateInfo::builder() .descriptor_pool(descriptor_pool) - .set_layouts(&descriptor_set_layouts), + .set_layouts(&descriptor_set_layouts) + .push_next(&mut variable_info.build()), ) .unwrap(); let mut binding = 0; @@ -1156,7 +1160,7 @@ impl crate::DescriptorSetBuilder for DescriptorSetBuilder { vk::DescriptorImageInfo::builder() .sampler(self.sampler) .image_view(*texture) - .image_layout(vk::ImageLayout::SHADER_READ_ONLY_OPTIMAL) + .image_layout(vk::ImageLayout::GENERAL) .build() }) .collect::>(); @@ -1164,7 +1168,7 @@ impl crate::DescriptorSetBuilder for DescriptorSetBuilder { &[vk::WriteDescriptorSet::builder() .dst_set(descriptor_sets[0]) .dst_binding(binding) - .descriptor_type(vk::DescriptorType::COMBINED_IMAGE_SAMPLER) + .descriptor_type(vk::DescriptorType::STORAGE_IMAGE) .image_info(&infos) .build()], &[], diff --git a/piet-gpu-types/src/annotated.rs b/piet-gpu-types/src/annotated.rs index 304726c..2460992 100644 --- a/piet-gpu-types/src/annotated.rs +++ b/piet-gpu-types/src/annotated.rs @@ -9,6 +9,11 @@ piet_gpu! { bbox: [f32; 4], rgba_color: u32, } + struct AnnoFillImage { + bbox: [f32; 4], + index: u32, + offset: [i16; 2], + } struct AnnoStroke { bbox: [f32; 4], rgba_color: u32, @@ -23,6 +28,7 @@ piet_gpu! { Nop, Stroke(AnnoStroke), Fill(AnnoFill), + FillImage(AnnoFillImage), BeginClip(AnnoClip), EndClip(AnnoClip), } diff --git a/piet-gpu-types/src/ptcl.rs b/piet-gpu-types/src/ptcl.rs index a266eb5..b35b9bf 100644 --- a/piet-gpu-types/src/ptcl.rs +++ b/piet-gpu-types/src/ptcl.rs @@ -16,6 +16,13 @@ piet_gpu! { backdrop: i32, rgba_color: u32, } + struct CmdFillImage { + // As above, really Ref + tile_ref: u32, + backdrop: i32, + index: u32, + offset: [i16; 2], + } struct CmdBeginClip { tile_ref: u32, backdrop: i32, @@ -32,17 +39,23 @@ piet_gpu! { struct CmdSolid { rgba_color: u32, } + struct CmdSolidImage { + index: u32, + offset: [i16; 2], + } struct CmdJump { new_ref: u32, } enum Cmd { End, Fill(CmdFill), + FillImage(CmdFillImage), BeginClip(CmdBeginClip), BeginSolidClip(CmdBeginSolidClip), EndClip(CmdEndClip), Stroke(CmdStroke), Solid(CmdSolid), + SolidImage(CmdSolidImage), Jump(CmdJump), } } diff --git a/piet-gpu-types/src/scene.rs b/piet-gpu-types/src/scene.rs index e0adf4e..935ea4b 100644 --- a/piet-gpu-types/src/scene.rs +++ b/piet-gpu-types/src/scene.rs @@ -25,6 +25,10 @@ piet_gpu! { struct Fill { rgba_color: u32, } + struct FillImage { + index: u32, + offset: [i16; 2], + } struct Stroke { rgba_color: u32, } @@ -58,6 +62,7 @@ piet_gpu! { Transform(Transform), BeginClip(Clip), EndClip(Clip), + FillImage(FillImage), } } } diff --git a/piet-gpu/shader/annotated.h b/piet-gpu/shader/annotated.h index 291496f..40ded79 100644 --- a/piet-gpu/shader/annotated.h +++ b/piet-gpu/shader/annotated.h @@ -6,6 +6,10 @@ struct AnnoFillRef { uint offset; }; +struct AnnoFillImageRef { + uint offset; +}; + struct AnnoStrokeRef { uint offset; }; @@ -29,6 +33,18 @@ AnnoFillRef AnnoFill_index(AnnoFillRef ref, uint index) { return AnnoFillRef(ref.offset + index * AnnoFill_size); } +struct AnnoFillImage { + vec4 bbox; + uint index; + ivec2 offset; +}; + +#define AnnoFillImage_size 24 + +AnnoFillImageRef AnnoFillImage_index(AnnoFillImageRef ref, uint index) { + return AnnoFillImageRef(ref.offset + index * AnnoFillImage_size); +} + struct AnnoStroke { vec4 bbox; uint rgba_color; @@ -54,8 +70,9 @@ AnnoClipRef AnnoClip_index(AnnoClipRef ref, uint index) { #define Annotated_Nop 0 #define Annotated_Stroke 1 #define Annotated_Fill 2 -#define Annotated_BeginClip 3 -#define Annotated_EndClip 4 +#define Annotated_FillImage 3 +#define Annotated_BeginClip 4 +#define Annotated_EndClip 5 #define Annotated_size 28 AnnotatedRef Annotated_index(AnnotatedRef ref, uint index) { @@ -84,6 +101,31 @@ void AnnoFill_write(Alloc a, AnnoFillRef ref, AnnoFill s) { write_mem(a, ix + 4, s.rgba_color); } +AnnoFillImage AnnoFillImage_read(Alloc a, AnnoFillImageRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = read_mem(a, ix + 0); + uint raw1 = read_mem(a, ix + 1); + uint raw2 = read_mem(a, ix + 2); + uint raw3 = read_mem(a, ix + 3); + uint raw4 = read_mem(a, ix + 4); + uint raw5 = read_mem(a, ix + 5); + AnnoFillImage s; + s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3)); + s.index = raw4; + s.offset = ivec2(int(raw5 << 16) >> 16, int(raw5) >> 16); + return s; +} + +void AnnoFillImage_write(Alloc a, AnnoFillImageRef ref, AnnoFillImage s) { + uint ix = ref.offset >> 2; + write_mem(a, ix + 0, floatBitsToUint(s.bbox.x)); + write_mem(a, ix + 1, floatBitsToUint(s.bbox.y)); + write_mem(a, ix + 2, floatBitsToUint(s.bbox.z)); + write_mem(a, ix + 3, floatBitsToUint(s.bbox.w)); + write_mem(a, ix + 4, s.index); + write_mem(a, ix + 5, (uint(s.offset.x) & 0xffff) | (uint(s.offset.y) << 16)); +} + AnnoStroke AnnoStroke_read(Alloc a, AnnoStrokeRef ref) { uint ix = ref.offset >> 2; uint raw0 = read_mem(a, ix + 0); @@ -140,6 +182,10 @@ AnnoFill Annotated_Fill_read(Alloc a, AnnotatedRef ref) { return AnnoFill_read(a, AnnoFillRef(ref.offset + 4)); } +AnnoFillImage Annotated_FillImage_read(Alloc a, AnnotatedRef ref) { + return AnnoFillImage_read(a, AnnoFillImageRef(ref.offset + 4)); +} + AnnoClip Annotated_BeginClip_read(Alloc a, AnnotatedRef ref) { return AnnoClip_read(a, AnnoClipRef(ref.offset + 4)); } @@ -162,6 +208,11 @@ void Annotated_Fill_write(Alloc a, AnnotatedRef ref, AnnoFill s) { AnnoFill_write(a, AnnoFillRef(ref.offset + 4), s); } +void Annotated_FillImage_write(Alloc a, AnnotatedRef ref, AnnoFillImage s) { + write_mem(a, ref.offset >> 2, Annotated_FillImage); + AnnoFillImage_write(a, AnnoFillImageRef(ref.offset + 4), s); +} + void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, AnnoClip s) { write_mem(a, ref.offset >> 2, Annotated_BeginClip); AnnoClip_write(a, AnnoClipRef(ref.offset + 4), s); diff --git a/piet-gpu/shader/backdrop.comp b/piet-gpu/shader/backdrop.comp index 85e54e8..49de925 100644 --- a/piet-gpu/shader/backdrop.comp +++ b/piet-gpu/shader/backdrop.comp @@ -49,6 +49,7 @@ void main() { uint tag = Annotated_tag(conf.anno_alloc, ref); switch (tag) { case Annotated_Fill: + case Annotated_FillImage: case Annotated_BeginClip: PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size); Path path = Path_read(conf.tile_alloc, path_ref); diff --git a/piet-gpu/shader/backdrop.spv b/piet-gpu/shader/backdrop.spv index 48fcb48..f33a50b 100644 Binary files a/piet-gpu/shader/backdrop.spv and b/piet-gpu/shader/backdrop.spv differ diff --git a/piet-gpu/shader/binning.comp b/piet-gpu/shader/binning.comp index 8ad72c7..3a63ac2 100644 --- a/piet-gpu/shader/binning.comp +++ b/piet-gpu/shader/binning.comp @@ -61,6 +61,7 @@ void main() { int x0 = 0, y0 = 0, x1 = 0, y1 = 0; switch (tag) { case Annotated_Fill: + case Annotated_FillImage: case Annotated_Stroke: case Annotated_BeginClip: case Annotated_EndClip: diff --git a/piet-gpu/shader/binning.spv b/piet-gpu/shader/binning.spv index be8662d..6fb185d 100644 Binary files a/piet-gpu/shader/binning.spv and b/piet-gpu/shader/binning.spv differ diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index 3f4e460..069367f 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -203,6 +203,7 @@ void main() { uint tile_count; switch (tag) { case Annotated_Fill: + case Annotated_FillImage: case Annotated_Stroke: case Annotated_BeginClip: case Annotated_EndClip: @@ -326,6 +327,28 @@ void main() { } cmd_ref.offset += Cmd_size; break; + case Annotated_FillImage: + tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix] + + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); + AnnoFillImage fill_img = Annotated_FillImage_read(conf.anno_alloc, ref); + if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { + break; + } + if (tile.tile.offset != 0) { + CmdFillImage cmd_fill_img; + cmd_fill_img.tile_ref = tile.tile.offset; + cmd_fill_img.backdrop = tile.backdrop; + cmd_fill_img.index = fill_img.index; + cmd_fill_img.offset = fill_img.offset; + Cmd_FillImage_write(cmd_alloc, cmd_ref, cmd_fill_img); + } else { + CmdSolidImage cmd_solid_img; + cmd_solid_img.index = fill_img.index; + cmd_solid_img.offset = fill_img.offset; + Cmd_SolidImage_write(cmd_alloc, cmd_ref, cmd_solid_img); + } + cmd_ref.offset += Cmd_size; + break; case Annotated_BeginClip: tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix] + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size)); diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index 23eb962..7d0dcce 100644 Binary files a/piet-gpu/shader/coarse.spv and b/piet-gpu/shader/coarse.spv differ diff --git a/piet-gpu/shader/elements.comp b/piet-gpu/shader/elements.comp index ad899e0..eb18571 100644 --- a/piet-gpu/shader/elements.comp +++ b/piet-gpu/shader/elements.comp @@ -132,6 +132,7 @@ State map_element(ElementRef ref) { c.pathseg_count = 1; break; case Element_Fill: + case Element_FillImage: case Element_Stroke: case Element_BeginClip: c.flags = FLAG_RESET_BBOX; @@ -374,6 +375,15 @@ void main() { out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size); Annotated_Fill_write(conf.anno_alloc, out_ref, anno_fill); break; + case Element_FillImage: + FillImage fill_img = Element_FillImage_read(this_ref); + AnnoFillImage anno_fill_img; + anno_fill_img.index = fill_img.index; + anno_fill_img.offset = fill_img.offset; + anno_fill_img.bbox = st.bbox; + out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size); + Annotated_FillImage_write(conf.anno_alloc, out_ref, anno_fill_img); + break; case Element_BeginClip: Clip begin_clip = Element_BeginClip_read(this_ref); AnnoClip anno_begin_clip = AnnoClip(begin_clip.bbox); diff --git a/piet-gpu/shader/elements.spv b/piet-gpu/shader/elements.spv index 70db1ae..2c61b2e 100644 Binary files a/piet-gpu/shader/elements.spv and b/piet-gpu/shader/elements.spv differ diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index 23e6e40..922ae83 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -23,7 +23,11 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf { layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image; -layout(set = 0, binding = 3) uniform sampler2D textures[]; +#if GL_EXT_nonuniform_qualifier +layout(rgba8, set = 0, binding = 3) uniform readonly image2D images[]; +#else +layout(rgba8, set = 0, binding = 3) uniform readonly image2D images[1]; +#endif #include "ptcl.h" #include "tile.h" @@ -86,6 +90,49 @@ float[CHUNK] computeArea(vec2 xy, int backdrop, uint tile_ref) { return area; } +vec3 tosRGB(vec3 rgb) { + bvec3 cutoff = greaterThanEqual(rgb, vec3(0.0031308)); + vec3 below = vec3(12.92)*rgb; + vec3 above = vec3(1.055)*pow(rgb, vec3(0.41666)) - vec3(0.055); + return mix(below, above, cutoff); +} + +vec3 fromsRGB(vec3 srgb) { + // Formula from EXT_sRGB. + bvec3 cutoff = greaterThanEqual(srgb, vec3(0.04045)); + vec3 below = srgb/vec3(12.92); + vec3 above = pow((srgb + vec3(0.055))/vec3(1.055), vec3(2.4)); + return mix(below, above, cutoff); +} + +// unpacksRGB unpacks a color in the sRGB color space to a vec4 in the linear color +// space. +vec4 unpacksRGB(uint srgba) { + vec4 color = unpackUnorm4x8(srgba).wzyx; + return vec4(fromsRGB(color.rgb), color.a); +} + +// packsRGB packs a color in the linear color space into its 8-bit sRGB equivalent. +uint packsRGB(vec4 rgba) { + rgba = vec4(tosRGB(rgba.rgb), rgba.a); + return packUnorm4x8(rgba.wzyx); +} + +vec4[CHUNK] fillImage(uvec2 xy, CmdSolidImage cmd_img) { + vec4 rgba[CHUNK]; + for (uint i = 0; i < CHUNK; i++) { + ivec2 uv = ivec2(xy.x, xy.y + i * CHUNK_DY) + cmd_img.offset; +#ifdef ENABLE_IMAGE_INDICES + vec4 fg_rgba = imageLoad(images[cmd_img.index], uv); +#else + vec4 fg_rgba = imageLoad(images[0], uv); +#endif + fg_rgba.rgb = fromsRGB(fg_rgba.rgb); + rgba[i] = fg_rgba; + } + return rgba; +} + void main() { if (mem_error != NO_ERROR) { return; @@ -105,9 +152,11 @@ void main() { Alloc clip_tos = new_alloc(0, 0); for (uint i = 0; i < CHUNK; i++) { rgb[i] = vec3(0.5); +#ifdef ENABLE_IMAGE_INDICES if (xy_uint.x < 1024 && xy_uint.y < 1024) { - rgb[i] = texture(textures[gl_WorkGroupID.x / 64], vec2(xy_uint.x, xy_uint.y + CHUNK_DY * i) / 1024.0).rgb; + rgb[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i)/4).rgb; } +#endif mask[i] = 1.0; } @@ -134,7 +183,7 @@ void main() { } tile_seg_ref = seg.next; } while (tile_seg_ref.offset != 0); - vec4 fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx; + vec4 fg_rgba = unpacksRGB(stroke.rgba_color); for (uint k = 0; k < CHUNK; k++) { float alpha = clamp(stroke.half_width + 0.5 - df[k], 0.0, 1.0); rgb[k] = mix(rgb[k], fg_rgba.rgb, mask[k] * alpha * fg_rgba.a); @@ -144,11 +193,19 @@ void main() { CmdFill fill = Cmd_Fill_read(cmd_alloc, cmd_ref); float area[CHUNK]; area = computeArea(xy, fill.backdrop, fill.tile_ref); - fg_rgba = unpackUnorm4x8(fill.rgba_color).wzyx; + fg_rgba = unpacksRGB(fill.rgba_color); for (uint k = 0; k < CHUNK; k++) { rgb[k] = mix(rgb[k], fg_rgba.rgb, mask[k] * area[k] * fg_rgba.a); } break; + case Cmd_FillImage: + CmdFillImage fill_img = Cmd_FillImage_read(cmd_alloc, cmd_ref); + area = computeArea(xy, fill_img.backdrop, fill_img.tile_ref); + vec4 rgba[CHUNK] = fillImage(xy_uint, CmdSolidImage(fill_img.index, fill_img.offset)); + for (uint k = 0; k < CHUNK; k++) { + rgb[k] = mix(rgb[k], rgba[k].rgb, mask[k] * area[k] * rgba[k].a); + } + break; case Cmd_BeginClip: case Cmd_BeginSolidClip: uint blend_slot = blend_sp % BLEND_STACK_SIZE; @@ -169,13 +226,13 @@ void main() { CmdBeginClip begin_clip = Cmd_BeginClip_read(cmd_alloc, cmd_ref); area = computeArea(xy, begin_clip.backdrop, begin_clip.tile_ref); for (uint k = 0; k < CHUNK; k++) { - blend_stack[blend_slot][k] = packUnorm4x8(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0))); + blend_stack[blend_slot][k] = packsRGB(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0))); } } else { CmdBeginSolidClip begin_solid_clip = Cmd_BeginSolidClip_read(cmd_alloc, cmd_ref); float solid_alpha = begin_solid_clip.alpha; for (uint k = 0; k < CHUNK; k++) { - blend_stack[blend_slot][k] = packUnorm4x8(vec4(rgb[k], solid_alpha)); + blend_stack[blend_slot][k] = packsRGB(vec4(rgb[k], solid_alpha)); } } blend_sp++; @@ -193,17 +250,24 @@ void main() { } blend_sp--; for (uint k = 0; k < CHUNK; k++) { - vec4 rgba = unpackUnorm4x8(blend_stack[blend_slot][k]); + vec4 rgba = unpacksRGB(blend_stack[blend_slot][k]); rgb[k] = mix(rgba.rgb, rgb[k], end_clip.alpha * rgba.a); } break; case Cmd_Solid: CmdSolid solid = Cmd_Solid_read(cmd_alloc, cmd_ref); - fg_rgba = unpackUnorm4x8(solid.rgba_color).wzyx; + fg_rgba = unpacksRGB(solid.rgba_color); for (uint k = 0; k < CHUNK; k++) { rgb[k] = mix(rgb[k], fg_rgba.rgb, mask[k] * fg_rgba.a); } break; + case Cmd_SolidImage: + CmdSolidImage solid_img = Cmd_SolidImage_read(cmd_alloc, cmd_ref); + rgba = fillImage(xy_uint, solid_img); + for (uint k = 0; k < CHUNK; k++) { + rgb[k] = mix(rgb[k], rgba[k].rgb, mask[k] * rgba[k].a); + } + break; case Cmd_Jump: cmd_ref = CmdRef(Cmd_Jump_read(cmd_alloc, cmd_ref).new_ref); cmd_alloc.offset = cmd_ref.offset; @@ -212,8 +276,7 @@ void main() { cmd_ref.offset += Cmd_size; } - // TODO: sRGB for (uint i = 0; i < CHUNK; i++) { - imageStore(image, ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i), vec4(rgb[i], 1.0)); + imageStore(image, ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i), vec4(tosRGB(rgb[i]), 1.0)); } } diff --git a/piet-gpu/shader/kernel4.spv b/piet-gpu/shader/kernel4.spv index b48d661..7768659 100644 Binary files a/piet-gpu/shader/kernel4.spv and b/piet-gpu/shader/kernel4.spv differ diff --git a/piet-gpu/shader/ptcl.h b/piet-gpu/shader/ptcl.h index 8e7d05e..53b9850 100644 --- a/piet-gpu/shader/ptcl.h +++ b/piet-gpu/shader/ptcl.h @@ -10,6 +10,10 @@ struct CmdFillRef { uint offset; }; +struct CmdFillImageRef { + uint offset; +}; + struct CmdBeginClipRef { uint offset; }; @@ -26,6 +30,10 @@ struct CmdSolidRef { uint offset; }; +struct CmdSolidImageRef { + uint offset; +}; + struct CmdJumpRef { uint offset; }; @@ -58,6 +66,19 @@ CmdFillRef CmdFill_index(CmdFillRef ref, uint index) { return CmdFillRef(ref.offset + index * CmdFill_size); } +struct CmdFillImage { + uint tile_ref; + int backdrop; + uint index; + ivec2 offset; +}; + +#define CmdFillImage_size 16 + +CmdFillImageRef CmdFillImage_index(CmdFillImageRef ref, uint index) { + return CmdFillImageRef(ref.offset + index * CmdFillImage_size); +} + struct CmdBeginClip { uint tile_ref; int backdrop; @@ -99,6 +120,17 @@ CmdSolidRef CmdSolid_index(CmdSolidRef ref, uint index) { return CmdSolidRef(ref.offset + index * CmdSolid_size); } +struct CmdSolidImage { + uint index; + ivec2 offset; +}; + +#define CmdSolidImage_size 8 + +CmdSolidImageRef CmdSolidImage_index(CmdSolidImageRef ref, uint index) { + return CmdSolidImageRef(ref.offset + index * CmdSolidImage_size); +} + struct CmdJump { uint new_ref; }; @@ -111,13 +143,15 @@ CmdJumpRef CmdJump_index(CmdJumpRef ref, uint index) { #define Cmd_End 0 #define Cmd_Fill 1 -#define Cmd_BeginClip 2 -#define Cmd_BeginSolidClip 3 -#define Cmd_EndClip 4 -#define Cmd_Stroke 5 -#define Cmd_Solid 6 -#define Cmd_Jump 7 -#define Cmd_size 16 +#define Cmd_FillImage 2 +#define Cmd_BeginClip 3 +#define Cmd_BeginSolidClip 4 +#define Cmd_EndClip 5 +#define Cmd_Stroke 6 +#define Cmd_Solid 7 +#define Cmd_SolidImage 8 +#define Cmd_Jump 9 +#define Cmd_size 20 CmdRef Cmd_index(CmdRef ref, uint index) { return CmdRef(ref.offset + index * Cmd_size); @@ -161,6 +195,28 @@ void CmdFill_write(Alloc a, CmdFillRef ref, CmdFill s) { write_mem(a, ix + 2, s.rgba_color); } +CmdFillImage CmdFillImage_read(Alloc a, CmdFillImageRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = read_mem(a, ix + 0); + uint raw1 = read_mem(a, ix + 1); + uint raw2 = read_mem(a, ix + 2); + uint raw3 = read_mem(a, ix + 3); + CmdFillImage s; + s.tile_ref = raw0; + s.backdrop = int(raw1); + s.index = raw2; + s.offset = ivec2(int(raw3 << 16) >> 16, int(raw3) >> 16); + return s; +} + +void CmdFillImage_write(Alloc a, CmdFillImageRef ref, CmdFillImage s) { + uint ix = ref.offset >> 2; + write_mem(a, ix + 0, s.tile_ref); + write_mem(a, ix + 1, uint(s.backdrop)); + write_mem(a, ix + 2, s.index); + write_mem(a, ix + 3, (uint(s.offset.x) & 0xffff) | (uint(s.offset.y) << 16)); +} + CmdBeginClip CmdBeginClip_read(Alloc a, CmdBeginClipRef ref) { uint ix = ref.offset >> 2; uint raw0 = read_mem(a, ix + 0); @@ -216,6 +272,22 @@ void CmdSolid_write(Alloc a, CmdSolidRef ref, CmdSolid s) { write_mem(a, ix + 0, s.rgba_color); } +CmdSolidImage CmdSolidImage_read(Alloc a, CmdSolidImageRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = read_mem(a, ix + 0); + uint raw1 = read_mem(a, ix + 1); + CmdSolidImage s; + s.index = raw0; + s.offset = ivec2(int(raw1 << 16) >> 16, int(raw1) >> 16); + return s; +} + +void CmdSolidImage_write(Alloc a, CmdSolidImageRef ref, CmdSolidImage s) { + uint ix = ref.offset >> 2; + write_mem(a, ix + 0, s.index); + write_mem(a, ix + 1, (uint(s.offset.x) & 0xffff) | (uint(s.offset.y) << 16)); +} + CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) { uint ix = ref.offset >> 2; uint raw0 = read_mem(a, ix + 0); @@ -237,6 +309,10 @@ CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) { return CmdFill_read(a, CmdFillRef(ref.offset + 4)); } +CmdFillImage Cmd_FillImage_read(Alloc a, CmdRef ref) { + return CmdFillImage_read(a, CmdFillImageRef(ref.offset + 4)); +} + CmdBeginClip Cmd_BeginClip_read(Alloc a, CmdRef ref) { return CmdBeginClip_read(a, CmdBeginClipRef(ref.offset + 4)); } @@ -257,6 +333,10 @@ CmdSolid Cmd_Solid_read(Alloc a, CmdRef ref) { return CmdSolid_read(a, CmdSolidRef(ref.offset + 4)); } +CmdSolidImage Cmd_SolidImage_read(Alloc a, CmdRef ref) { + return CmdSolidImage_read(a, CmdSolidImageRef(ref.offset + 4)); +} + CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) { return CmdJump_read(a, CmdJumpRef(ref.offset + 4)); } @@ -270,6 +350,11 @@ void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s) { CmdFill_write(a, CmdFillRef(ref.offset + 4), s); } +void Cmd_FillImage_write(Alloc a, CmdRef ref, CmdFillImage s) { + write_mem(a, ref.offset >> 2, Cmd_FillImage); + CmdFillImage_write(a, CmdFillImageRef(ref.offset + 4), s); +} + void Cmd_BeginClip_write(Alloc a, CmdRef ref, CmdBeginClip s) { write_mem(a, ref.offset >> 2, Cmd_BeginClip); CmdBeginClip_write(a, CmdBeginClipRef(ref.offset + 4), s); @@ -295,6 +380,11 @@ void Cmd_Solid_write(Alloc a, CmdRef ref, CmdSolid s) { CmdSolid_write(a, CmdSolidRef(ref.offset + 4), s); } +void Cmd_SolidImage_write(Alloc a, CmdRef ref, CmdSolidImage s) { + write_mem(a, ref.offset >> 2, Cmd_SolidImage); + CmdSolidImage_write(a, CmdSolidImageRef(ref.offset + 4), s); +} + void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s) { write_mem(a, ref.offset >> 2, Cmd_Jump); CmdJump_write(a, CmdJumpRef(ref.offset + 4), s); diff --git a/piet-gpu/shader/scene.h b/piet-gpu/shader/scene.h index d0bba6f..e6ea591 100644 --- a/piet-gpu/shader/scene.h +++ b/piet-gpu/shader/scene.h @@ -18,6 +18,10 @@ struct FillRef { uint offset; }; +struct FillImageRef { + uint offset; +}; + struct StrokeRef { uint offset; }; @@ -84,6 +88,17 @@ FillRef Fill_index(FillRef ref, uint index) { return FillRef(ref.offset + index * Fill_size); } +struct FillImage { + uint index; + ivec2 offset; +}; + +#define FillImage_size 8 + +FillImageRef FillImage_index(FillImageRef ref, uint index) { + return FillImageRef(ref.offset + index * FillImage_size); +} + struct Stroke { uint rgba_color; }; @@ -138,6 +153,7 @@ ClipRef Clip_index(ClipRef ref, uint index) { #define Element_Transform 10 #define Element_BeginClip 11 #define Element_EndClip 12 +#define Element_FillImage 13 #define Element_size 36 ElementRef Element_index(ElementRef ref, uint index) { @@ -197,6 +213,16 @@ Fill Fill_read(FillRef ref) { return s; } +FillImage FillImage_read(FillImageRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = scene[ix + 0]; + uint raw1 = scene[ix + 1]; + FillImage s; + s.index = raw0; + s.offset = ivec2(int(raw1 << 16) >> 16, int(raw1) >> 16); + return s; +} + Stroke Stroke_read(StrokeRef ref) { uint ix = ref.offset >> 2; uint raw0 = scene[ix + 0]; @@ -290,3 +316,7 @@ Clip Element_EndClip_read(ElementRef ref) { return Clip_read(ClipRef(ref.offset + 4)); } +FillImage Element_FillImage_read(ElementRef ref) { + return FillImage_read(FillImageRef(ref.offset + 4)); +} + diff --git a/piet-gpu/shader/setup.h b/piet-gpu/shader/setup.h index f2ca87c..7d4a3e8 100644 --- a/piet-gpu/shader/setup.h +++ b/piet-gpu/shader/setup.h @@ -15,6 +15,8 @@ #define PTCL_INITIAL_ALLOC 1024 +#define ENABLE_IMAGE_INDICES + // These should probably be renamed and/or reworked. In the binning // kernel, they represent the number of bins. Also, the workgroup size // of that kernel is equal to the number of bins, but should probably diff --git a/piet-gpu/shader/tile_alloc.comp b/piet-gpu/shader/tile_alloc.comp index f0d42da..896bb22 100644 --- a/piet-gpu/shader/tile_alloc.comp +++ b/piet-gpu/shader/tile_alloc.comp @@ -44,6 +44,7 @@ void main() { int x0 = 0, y0 = 0, x1 = 0, y1 = 0; switch (tag) { case Annotated_Fill: + case Annotated_FillImage: case Annotated_Stroke: case Annotated_BeginClip: case Annotated_EndClip: diff --git a/piet-gpu/shader/tile_alloc.spv b/piet-gpu/shader/tile_alloc.spv index f97a7d5..5fb2a95 100644 Binary files a/piet-gpu/shader/tile_alloc.spv and b/piet-gpu/shader/tile_alloc.spv differ diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index 5f0f504..21860e0 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -12,7 +12,7 @@ use piet::{Color, ImageFormat, RenderContext}; use piet_gpu_types::encoder::Encode; -use piet_gpu_hal::{SamplerParams, hub}; +use piet_gpu_hal::{hub}; use piet_gpu_hal::{CmdBuf, Error, ImageLayout, MemFlags}; use pico_svg::PicoSvg; @@ -192,7 +192,8 @@ pub struct Renderer { n_paths: usize, n_pathseg: usize, - bg_image: hub::Image, + // Keep a reference to the image so that it is not destroyed. + _bg_image: hub::Image, } impl Renderer { @@ -304,8 +305,10 @@ impl Renderer { // images encoded (I believe there's an cost when allocating descriptor pools). If // it can't be satisfied, then for compatibility we'll probably want to fall back // to an atlasing approach. - let max_textures = 256; - let sampler = session.create_sampler(SamplerParams::Linear)?; + // + // However, we're adding only one texture for now. Avoid a harmless Vulkan validation + // error by using a tight bound. + let max_textures = 1; let k4_pipeline = session .pipeline_builder() .add_buffers(2) @@ -316,7 +319,7 @@ impl Renderer { .descriptor_set_builder() .add_buffers(&[&memory_buf_dev, &config_buf_dev]) .add_images(&[&image_dev]) - .add_textures(&[&bg_image], &sampler) + .add_textures(&[&bg_image]) .build(&session, &k4_pipeline)?; Ok(Renderer { @@ -345,7 +348,7 @@ impl Renderer { n_elements, n_paths, n_pathseg, - bg_image, + _bg_image: bg_image, }) } @@ -452,7 +455,7 @@ impl Renderer { ImageLayout::BlitDst, ); cmd_buf.copy_buffer_to_image(buffer.vk_buffer(), image.vk_image()); - cmd_buf.image_barrier(image.vk_image(), ImageLayout::BlitDst, ImageLayout::ShaderRead); + cmd_buf.image_barrier(image.vk_image(), ImageLayout::BlitDst, ImageLayout::General); cmd_buf.finish(); // Make sure not to drop the buffer and image until the command buffer completes. cmd_buf.add_resource(&buffer);