Merge pull request #45 from linebender/clip_scratch

Scratch buffer for clip stack
This commit is contained in:
Raph Levien 2020-11-24 22:11:09 -08:00 committed by GitHub
commit 4bbc7dee1d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 107 additions and 20 deletions

View file

@ -148,7 +148,7 @@ pub trait CmdBuf<D: Device> {
/// This is readily supported in Vulkan, but for portability it is remarkably /// This is readily supported in Vulkan, but for portability it is remarkably
/// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute /// tricky (unimplemented in gfx-hal right now). Possibly best to write a compute
/// kernel, or organize the code not to need it. /// kernel, or organize the code not to need it.
unsafe fn clear_buffer(&self, buffer: &D::Buffer); unsafe fn clear_buffer(&self, buffer: &D::Buffer, size: Option<u64>);
unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer); unsafe fn copy_buffer(&self, src: &D::Buffer, dst: &D::Buffer);

View file

@ -902,9 +902,10 @@ impl crate::CmdBuf<VkDevice> for CmdBuf {
); );
} }
unsafe fn clear_buffer(&self, buffer: &Buffer) { unsafe fn clear_buffer(&self, buffer: &Buffer, size: Option<u64>) {
let device = &self.device.device; let device = &self.device.device;
device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, vk::WHOLE_SIZE, 0); let size = size.unwrap_or(vk::WHOLE_SIZE);
device.cmd_fill_buffer(self.cmd_buf, buffer.buffer, 0, size, 0);
} }
unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) { unsafe fn copy_buffer(&self, src: &Buffer, dst: &Buffer) {

View file

@ -22,13 +22,39 @@ layout(set = 0, binding = 1) buffer TileBuf {
uint[] tile; uint[] tile;
}; };
layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image; layout(set = 0, binding = 2) buffer ClipScratchBuf {
uint[] clip_scratch;
};
layout(rgba8, set = 0, binding = 3) uniform writeonly image2D image;
#include "ptcl.h" #include "ptcl.h"
#include "tile.h" #include "tile.h"
#define BLEND_STACK_SIZE 4 #define BLEND_STACK_SIZE 4
// Layout of clip_scratch buffer:
// [0] is the alloc bump offset (in units of 32 bit words, initially 0)
// Starting at 1 is a sequence of frames.
// Each frame is WIDTH * HEIGHT 32-bit words, then a link reference.
#define CLIP_LINK_OFFSET (TILE_WIDTH_PX * TILE_HEIGHT_PX)
#define CLIP_BUF_SIZE (CLIP_LINK_OFFSET + 1)
shared uint sh_clip_alloc;
// Allocate a scratch buffer for clipping. Unlike offsets in the rest of the code,
// it counts 32-bit words.
uint alloc_clip_buf(uint link) {
if (gl_LocalInvocationID.x == 0 && gl_LocalInvocationID.y == 0) {
uint alloc = atomicAdd(clip_scratch[0], CLIP_BUF_SIZE) + 1;
sh_clip_alloc = alloc;
clip_scratch[alloc + CLIP_LINK_OFFSET] = link;
}
barrier();
return sh_clip_alloc;
}
// Calculate coverage based on backdrop + coverage of each line segment // Calculate coverage based on backdrop + coverage of each line segment
float[CHUNK] computeArea(vec2 xy, int backdrop, uint tile_ref) { float[CHUNK] computeArea(vec2 xy, int backdrop, uint tile_ref) {
// Probably better to store as float, but conversion is no doubt cheap. // Probably better to store as float, but conversion is no doubt cheap.
@ -72,7 +98,9 @@ void main() {
vec3 rgb[CHUNK]; vec3 rgb[CHUNK];
float mask[CHUNK]; float mask[CHUNK];
uint blend_stack[BLEND_STACK_SIZE][CHUNK]; uint blend_stack[BLEND_STACK_SIZE][CHUNK];
uint blend_spill = 0;
uint blend_sp = 0; uint blend_sp = 0;
uint clip_tos = 0;
for (uint i = 0; i < CHUNK; i++) { for (uint i = 0; i < CHUNK; i++) {
rgb[i] = vec3(0.5); rgb[i] = vec3(0.5);
mask[i] = 1.0; mask[i] = 1.0;
@ -142,26 +170,46 @@ void main() {
} }
break; break;
case Cmd_BeginClip: case Cmd_BeginClip:
CmdBeginClip begin_clip = Cmd_BeginClip_read(cmd_ref);
area = computeArea(xy, begin_clip.backdrop, begin_clip.tile_ref);
for (uint k = 0; k < CHUNK; k++) {
blend_stack[blend_sp][k] = packUnorm4x8(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0)));
}
blend_sp++;
break;
case Cmd_BeginSolidClip: case Cmd_BeginSolidClip:
CmdBeginSolidClip begin_solid_clip = Cmd_BeginSolidClip_read(cmd_ref); uint blend_slot = blend_sp % BLEND_STACK_SIZE;
float solid_alpha = begin_solid_clip.alpha; if (blend_sp == blend_spill + BLEND_STACK_SIZE) {
for (uint k = 0; k < CHUNK; k++) { // spill to scratch buffer
blend_stack[blend_sp][k] = packUnorm4x8(vec4(rgb[k], solid_alpha)); clip_tos = alloc_clip_buf(clip_tos);
uint base_ix = clip_tos + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
for (uint k = 0; k < CHUNK; k++) {
clip_scratch[base_ix + k * TILE_WIDTH_PX * CHUNK_DY] = blend_stack[blend_slot][k];
}
blend_spill++;
}
if (tag == Cmd_BeginClip) {
CmdBeginClip begin_clip = Cmd_BeginClip_read(cmd_ref);
area = computeArea(xy, begin_clip.backdrop, begin_clip.tile_ref);
for (uint k = 0; k < CHUNK; k++) {
blend_stack[blend_slot][k] = packUnorm4x8(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0)));
}
} else {
CmdBeginSolidClip begin_solid_clip = Cmd_BeginSolidClip_read(cmd_ref);
float solid_alpha = begin_solid_clip.alpha;
for (uint k = 0; k < CHUNK; k++) {
blend_stack[blend_slot][k] = packUnorm4x8(vec4(rgb[k], solid_alpha));
}
} }
blend_sp++; blend_sp++;
break; break;
case Cmd_EndClip: case Cmd_EndClip:
CmdEndClip end_clip = Cmd_EndClip_read(cmd_ref); CmdEndClip end_clip = Cmd_EndClip_read(cmd_ref);
blend_slot = (blend_sp - 1) % BLEND_STACK_SIZE;
if (blend_sp == blend_spill) {
uint base_ix = clip_tos + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
for (uint k = 0; k < CHUNK; k++) {
blend_stack[blend_slot][k] = clip_scratch[base_ix + k * TILE_WIDTH_PX * CHUNK_DY];
}
clip_tos = clip_scratch[clip_tos + CLIP_LINK_OFFSET];
blend_spill--;
}
blend_sp--; blend_sp--;
for (uint k = 0; k < CHUNK; k++) { for (uint k = 0; k < CHUNK; k++) {
vec4 rgba = unpackUnorm4x8(blend_stack[blend_sp][k]); vec4 rgba = unpackUnorm4x8(blend_stack[blend_slot][k]);
rgb[k] = mix(rgba.rgb, rgb[k], end_clip.alpha * rgba.a); rgb[k] = mix(rgba.rgb, rgb[k], end_clip.alpha * rgba.a);
} }
break; break;

Binary file not shown.

View file

@ -73,7 +73,8 @@ pub fn render_scene(rc: &mut impl RenderContext) {
5.0, 5.0,
); );
//render_cardioid(rc); //render_cardioid(rc);
render_tiger(rc); render_clip_test(rc);
//render_tiger(rc);
} }
#[allow(unused)] #[allow(unused)]
@ -94,6 +95,33 @@ fn render_cardioid(rc: &mut impl RenderContext) {
rc.stroke(&path, &Color::BLACK, 2.0); rc.stroke(&path, &Color::BLACK, 2.0);
} }
#[allow(unused)]
fn render_clip_test(rc: &mut impl RenderContext) {
const N: usize = 16;
const X0: f64 = 50.0;
const Y0: f64 = 50.0;
const X1: f64 = 100.0;
const Y1: f64 = 100.0;
let step = 1.0 / ((N + 1) as f64);
for i in 0..N {
let t = ((i + 1) as f64) * step;
rc.save();
let mut path = BezPath::new();
path.move_to((X0, Y0));
path.line_to((X1, Y0));
path.line_to((X1, Y0 + t * (Y1 - Y0)));
path.line_to((X1 + t * (X0 - X1), Y1));
path.line_to((X0, Y1));
path.close_path();
rc.clip(path);
}
let rect = piet::kurbo::Rect::new(X0, Y0, X1, Y1);
rc.fill(rect, &Color::BLACK);
for _ in 0..N {
rc.restore();
}
}
fn render_tiger(rc: &mut impl RenderContext) { fn render_tiger(rc: &mut impl RenderContext) {
let xml_str = std::str::from_utf8(include_bytes!("../Ghostscript_Tiger.svg")).unwrap(); let xml_str = std::str::from_utf8(include_bytes!("../Ghostscript_Tiger.svg")).unwrap();
let start = std::time::Instant::now(); let start = std::time::Instant::now();
@ -163,6 +191,8 @@ pub struct Renderer {
coarse_alloc_buf_host: hub::Buffer, coarse_alloc_buf_host: hub::Buffer,
coarse_alloc_buf_dev: hub::Buffer, coarse_alloc_buf_dev: hub::Buffer,
clip_scratch_buf: hub::Buffer,
k4_pipeline: hub::Pipeline, k4_pipeline: hub::Pipeline,
k4_ds: hub::DescriptorSet, k4_ds: hub::DescriptorSet,
@ -278,6 +308,8 @@ impl Renderer {
&[], &[],
)?; )?;
let clip_scratch_buf = session.create_buffer(1024 * 1024, dev)?;
let mut coarse_alloc_buf_host = session.create_buffer(8, host)?; let mut coarse_alloc_buf_host = session.create_buffer(8, host)?;
let coarse_alloc_buf_dev = session.create_buffer(8, dev)?; let coarse_alloc_buf_dev = session.create_buffer(8, dev)?;
@ -298,10 +330,14 @@ impl Renderer {
)?; )?;
let k4_code = include_bytes!("../shader/kernel4.spv"); let k4_code = include_bytes!("../shader/kernel4.spv");
let k4_pipeline = session.create_simple_compute_pipeline(k4_code, 2, 1)?; let k4_pipeline = session.create_simple_compute_pipeline(k4_code, 3, 1)?;
let k4_ds = session.create_descriptor_set( let k4_ds = session.create_descriptor_set(
&k4_pipeline, &k4_pipeline,
&[ptcl_buf.vk_buffer(), tile_buf.vk_buffer()], &[
ptcl_buf.vk_buffer(),
tile_buf.vk_buffer(),
clip_scratch_buf.vk_buffer(),
],
&[image_dev.vk_image()], &[image_dev.vk_image()],
)?; )?;
@ -335,6 +371,7 @@ impl Renderer {
bin_alloc_buf_dev, bin_alloc_buf_dev,
coarse_alloc_buf_host, coarse_alloc_buf_host,
coarse_alloc_buf_dev, coarse_alloc_buf_dev,
clip_scratch_buf,
n_elements, n_elements,
n_paths, n_paths,
n_pathseg, n_pathseg,
@ -355,7 +392,8 @@ impl Renderer {
self.coarse_alloc_buf_host.vk_buffer(), self.coarse_alloc_buf_host.vk_buffer(),
self.coarse_alloc_buf_dev.vk_buffer(), self.coarse_alloc_buf_dev.vk_buffer(),
); );
cmd_buf.clear_buffer(self.state_buf.vk_buffer()); cmd_buf.clear_buffer(self.state_buf.vk_buffer(), None);
cmd_buf.clear_buffer(self.clip_scratch_buf.vk_buffer(), Some(4));
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
cmd_buf.image_barrier( cmd_buf.image_barrier(
self.image_dev.vk_image(), self.image_dev.vk_image(),