mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-25 18:56:35 +11:00
Merge pull request #45 from linebender/clip_scratch
Scratch buffer for clip stack
This commit is contained in:
commit
4bbc7dee1d
5 changed files with 107 additions and 20 deletions
|
@ -148,7 +148,7 @@ pub trait CmdBuf<D: Device> {
|
||||||
/// This is readily supported in Vulkan, but for portability it is remarkably
|
/// This is readily supported in Vulkan, but for portability it is remarkably
|
||||||
/// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute
|
/// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute
|
||||||
/// kernel, or organize the code not to need it.
|
/// kernel, or organize the code not to need it.
|
||||||
unsafe fn clear_buffer(&self, buffer: &D::Buffer);
|
unsafe fn clear_buffer(&self, buffer: &D::Buffer, size: Option<u64>);
|
||||||
|
|
||||||
unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer);
|
unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer);
|
||||||
|
|
||||||
|
|
|
@ -902,9 +902,10 @@ impl crate::CmdBuf<VkDevice> for CmdBuf {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn clear_buffer(&self, buffer: &Buffer) {
|
unsafe fn clear_buffer(&self, buffer: &Buffer, size: Option<u64>) {
|
||||||
let device = &self.device.device;
|
let device = &self.device.device;
|
||||||
device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, vk::WHOLE_SIZE, 0);
|
let size = size.unwrap_or(vk::WHOLE_SIZE);
|
||||||
|
device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, size, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) {
|
unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) {
|
||||||
|
|
|
@ -22,13 +22,39 @@ layout(set = 0, binding = 1) buffer TileBuf {
|
||||||
uint[] tile;
|
uint[] tile;
|
||||||
};
|
};
|
||||||
|
|
||||||
layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image;
|
layout(set = 0, binding = 2) buffer ClipScratchBuf {
|
||||||
|
uint[] clip_scratch;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(rgba8, set = 0, binding = 3) uniform writeonly image2D image;
|
||||||
|
|
||||||
#include "ptcl.h"
|
#include "ptcl.h"
|
||||||
#include "tile.h"
|
#include "tile.h"
|
||||||
|
|
||||||
#define BLEND_STACK_SIZE 4
|
#define BLEND_STACK_SIZE 4
|
||||||
|
|
||||||
|
// Layout of clip_scratch buffer:
|
||||||
|
// [0] is the alloc bump offset (in units of 32 bit words, initially 0)
|
||||||
|
// Starting at 1 is a sequence of frames.
|
||||||
|
// Each frame is WIDTH * HEIGHT 32-bit words, then a link reference.
|
||||||
|
|
||||||
|
#define CLIP_LINK_OFFSET (TILE_WIDTH_PX * TILE_HEIGHT_PX)
|
||||||
|
#define CLIP_BUF_SIZE (CLIP_LINK_OFFSET + 1)
|
||||||
|
|
||||||
|
shared uint sh_clip_alloc;
|
||||||
|
|
||||||
|
// Allocate a scratch buffer for clipping. Unlike offsets in the rest of the code,
|
||||||
|
// it counts 32-bit words.
|
||||||
|
uint alloc_clip_buf(uint link) {
|
||||||
|
if (gl_LocalInvocationID.x == 0 && gl_LocalInvocationID.y == 0) {
|
||||||
|
uint alloc = atomicAdd(clip_scratch[0], CLIP_BUF_SIZE) + 1;
|
||||||
|
sh_clip_alloc = alloc;
|
||||||
|
clip_scratch[alloc + CLIP_LINK_OFFSET] = link;
|
||||||
|
}
|
||||||
|
barrier();
|
||||||
|
return sh_clip_alloc;
|
||||||
|
}
|
||||||
|
|
||||||
// Calculate coverage based on backdrop + coverage of each line segment
|
// Calculate coverage based on backdrop + coverage of each line segment
|
||||||
float[CHUNK] computeArea(vec2 xy, int backdrop, uint tile_ref) {
|
float[CHUNK] computeArea(vec2 xy, int backdrop, uint tile_ref) {
|
||||||
// Probably better to store as float, but conversion is no doubt cheap.
|
// Probably better to store as float, but conversion is no doubt cheap.
|
||||||
|
@ -72,7 +98,9 @@ void main() {
|
||||||
vec3 rgb[CHUNK];
|
vec3 rgb[CHUNK];
|
||||||
float mask[CHUNK];
|
float mask[CHUNK];
|
||||||
uint blend_stack[BLEND_STACK_SIZE][CHUNK];
|
uint blend_stack[BLEND_STACK_SIZE][CHUNK];
|
||||||
|
uint blend_spill = 0;
|
||||||
uint blend_sp = 0;
|
uint blend_sp = 0;
|
||||||
|
uint clip_tos = 0;
|
||||||
for (uint i = 0; i < CHUNK; i++) {
|
for (uint i = 0; i < CHUNK; i++) {
|
||||||
rgb[i] = vec3(0.5);
|
rgb[i] = vec3(0.5);
|
||||||
mask[i] = 1.0;
|
mask[i] = 1.0;
|
||||||
|
@ -142,26 +170,46 @@ void main() {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case Cmd_BeginClip:
|
case Cmd_BeginClip:
|
||||||
CmdBeginClip begin_clip = Cmd_BeginClip_read(cmd_ref);
|
|
||||||
area = computeArea(xy, begin_clip.backdrop, begin_clip.tile_ref);
|
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
|
||||||
blend_stack[blend_sp][k] = packUnorm4x8(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0)));
|
|
||||||
}
|
|
||||||
blend_sp++;
|
|
||||||
break;
|
|
||||||
case Cmd_BeginSolidClip:
|
case Cmd_BeginSolidClip:
|
||||||
CmdBeginSolidClip begin_solid_clip = Cmd_BeginSolidClip_read(cmd_ref);
|
uint blend_slot = blend_sp % BLEND_STACK_SIZE;
|
||||||
float solid_alpha = begin_solid_clip.alpha;
|
if (blend_sp == blend_spill + BLEND_STACK_SIZE) {
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
// spill to scratch buffer
|
||||||
blend_stack[blend_sp][k] = packUnorm4x8(vec4(rgb[k], solid_alpha));
|
clip_tos = alloc_clip_buf(clip_tos);
|
||||||
|
uint base_ix = clip_tos + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
|
||||||
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
|
clip_scratch[base_ix + k * TILE_WIDTH_PX * CHUNK_DY] = blend_stack[blend_slot][k];
|
||||||
|
}
|
||||||
|
blend_spill++;
|
||||||
|
}
|
||||||
|
if (tag == Cmd_BeginClip) {
|
||||||
|
CmdBeginClip begin_clip = Cmd_BeginClip_read(cmd_ref);
|
||||||
|
area = computeArea(xy, begin_clip.backdrop, begin_clip.tile_ref);
|
||||||
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
|
blend_stack[blend_slot][k] = packUnorm4x8(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0)));
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
CmdBeginSolidClip begin_solid_clip = Cmd_BeginSolidClip_read(cmd_ref);
|
||||||
|
float solid_alpha = begin_solid_clip.alpha;
|
||||||
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
|
blend_stack[blend_slot][k] = packUnorm4x8(vec4(rgb[k], solid_alpha));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
blend_sp++;
|
blend_sp++;
|
||||||
break;
|
break;
|
||||||
case Cmd_EndClip:
|
case Cmd_EndClip:
|
||||||
CmdEndClip end_clip = Cmd_EndClip_read(cmd_ref);
|
CmdEndClip end_clip = Cmd_EndClip_read(cmd_ref);
|
||||||
|
blend_slot = (blend_sp - 1) % BLEND_STACK_SIZE;
|
||||||
|
if (blend_sp == blend_spill) {
|
||||||
|
uint base_ix = clip_tos + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
|
||||||
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
|
blend_stack[blend_slot][k] = clip_scratch[base_ix + k * TILE_WIDTH_PX * CHUNK_DY];
|
||||||
|
}
|
||||||
|
clip_tos = clip_scratch[clip_tos + CLIP_LINK_OFFSET];
|
||||||
|
blend_spill--;
|
||||||
|
}
|
||||||
blend_sp--;
|
blend_sp--;
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
vec4 rgba = unpackUnorm4x8(blend_stack[blend_sp][k]);
|
vec4 rgba = unpackUnorm4x8(blend_stack[blend_slot][k]);
|
||||||
rgb[k] = mix(rgba.rgb, rgb[k], end_clip.alpha * rgba.a);
|
rgb[k] = mix(rgba.rgb, rgb[k], end_clip.alpha * rgba.a);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
Binary file not shown.
|
@ -73,7 +73,8 @@ pub fn render_scene(rc: &mut impl RenderContext) {
|
||||||
5.0,
|
5.0,
|
||||||
);
|
);
|
||||||
//render_cardioid(rc);
|
//render_cardioid(rc);
|
||||||
render_tiger(rc);
|
render_clip_test(rc);
|
||||||
|
//render_tiger(rc);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
|
@ -94,6 +95,33 @@ fn render_cardioid(rc: &mut impl RenderContext) {
|
||||||
rc.stroke(&path, &Color::BLACK, 2.0);
|
rc.stroke(&path, &Color::BLACK, 2.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
|
fn render_clip_test(rc: &mut impl RenderContext) {
|
||||||
|
const N: usize = 16;
|
||||||
|
const X0: f64 = 50.0;
|
||||||
|
const Y0: f64 = 50.0;
|
||||||
|
const X1: f64 = 100.0;
|
||||||
|
const Y1: f64 = 100.0;
|
||||||
|
let step = 1.0 / ((N + 1) as f64);
|
||||||
|
for i in 0..N {
|
||||||
|
let t = ((i + 1) as f64) * step;
|
||||||
|
rc.save();
|
||||||
|
let mut path = BezPath::new();
|
||||||
|
path.move_to((X0, Y0));
|
||||||
|
path.line_to((X1, Y0));
|
||||||
|
path.line_to((X1, Y0 + t * (Y1 - Y0)));
|
||||||
|
path.line_to((X1 + t * (X0 - X1), Y1));
|
||||||
|
path.line_to((X0, Y1));
|
||||||
|
path.close_path();
|
||||||
|
rc.clip(path);
|
||||||
|
}
|
||||||
|
let rect = piet::kurbo::Rect::new(X0, Y0, X1, Y1);
|
||||||
|
rc.fill(rect, &Color::BLACK);
|
||||||
|
for _ in 0..N {
|
||||||
|
rc.restore();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn render_tiger(rc: &mut impl RenderContext) {
|
fn render_tiger(rc: &mut impl RenderContext) {
|
||||||
let xml_str = std::str::from_utf8(include_bytes!("../Ghostscript_Tiger.svg")).unwrap();
|
let xml_str = std::str::from_utf8(include_bytes!("../Ghostscript_Tiger.svg")).unwrap();
|
||||||
let start = std::time::Instant::now();
|
let start = std::time::Instant::now();
|
||||||
|
@ -163,6 +191,8 @@ pub struct Renderer {
|
||||||
coarse_alloc_buf_host: hub::Buffer,
|
coarse_alloc_buf_host: hub::Buffer,
|
||||||
coarse_alloc_buf_dev: hub::Buffer,
|
coarse_alloc_buf_dev: hub::Buffer,
|
||||||
|
|
||||||
|
clip_scratch_buf: hub::Buffer,
|
||||||
|
|
||||||
k4_pipeline: hub::Pipeline,
|
k4_pipeline: hub::Pipeline,
|
||||||
k4_ds: hub::DescriptorSet,
|
k4_ds: hub::DescriptorSet,
|
||||||
|
|
||||||
|
@ -278,6 +308,8 @@ impl Renderer {
|
||||||
&[],
|
&[],
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
let clip_scratch_buf = session.create_buffer(1024 * 1024, dev)?;
|
||||||
|
|
||||||
let mut coarse_alloc_buf_host = session.create_buffer(8, host)?;
|
let mut coarse_alloc_buf_host = session.create_buffer(8, host)?;
|
||||||
let coarse_alloc_buf_dev = session.create_buffer(8, dev)?;
|
let coarse_alloc_buf_dev = session.create_buffer(8, dev)?;
|
||||||
|
|
||||||
|
@ -298,10 +330,14 @@ impl Renderer {
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let k4_code = include_bytes!("../shader/kernel4.spv");
|
let k4_code = include_bytes!("../shader/kernel4.spv");
|
||||||
let k4_pipeline = session.create_simple_compute_pipeline(k4_code, 2, 1)?;
|
let k4_pipeline = session.create_simple_compute_pipeline(k4_code, 3, 1)?;
|
||||||
let k4_ds = session.create_descriptor_set(
|
let k4_ds = session.create_descriptor_set(
|
||||||
&k4_pipeline,
|
&k4_pipeline,
|
||||||
&[ptcl_buf.vk_buffer(), tile_buf.vk_buffer()],
|
&[
|
||||||
|
ptcl_buf.vk_buffer(),
|
||||||
|
tile_buf.vk_buffer(),
|
||||||
|
clip_scratch_buf.vk_buffer(),
|
||||||
|
],
|
||||||
&[image_dev.vk_image()],
|
&[image_dev.vk_image()],
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
@ -335,6 +371,7 @@ impl Renderer {
|
||||||
bin_alloc_buf_dev,
|
bin_alloc_buf_dev,
|
||||||
coarse_alloc_buf_host,
|
coarse_alloc_buf_host,
|
||||||
coarse_alloc_buf_dev,
|
coarse_alloc_buf_dev,
|
||||||
|
clip_scratch_buf,
|
||||||
n_elements,
|
n_elements,
|
||||||
n_paths,
|
n_paths,
|
||||||
n_pathseg,
|
n_pathseg,
|
||||||
|
@ -355,7 +392,8 @@ impl Renderer {
|
||||||
self.coarse_alloc_buf_host.vk_buffer(),
|
self.coarse_alloc_buf_host.vk_buffer(),
|
||||||
self.coarse_alloc_buf_dev.vk_buffer(),
|
self.coarse_alloc_buf_dev.vk_buffer(),
|
||||||
);
|
);
|
||||||
cmd_buf.clear_buffer(self.state_buf.vk_buffer());
|
cmd_buf.clear_buffer(self.state_buf.vk_buffer(), None);
|
||||||
|
cmd_buf.clear_buffer(self.clip_scratch_buf.vk_buffer(), Some(4));
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
cmd_buf.image_barrier(
|
cmd_buf.image_barrier(
|
||||||
self.image_dev.vk_image(),
|
self.image_dev.vk_image(),
|
||||||
|
|
Loading…
Add table
Reference in a new issue