mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-09 20:31:29 +11:00
Merge pull request #20 from linebender/sorta
A sorta-middle architecture
This commit is contained in:
commit
dc5facd198
|
@ -3,9 +3,11 @@ use piet_gpu_derive::piet_gpu;
|
||||||
piet_gpu! {
|
piet_gpu! {
|
||||||
#[gpu_write]
|
#[gpu_write]
|
||||||
mod annotated {
|
mod annotated {
|
||||||
|
// Note: path segments have moved to pathseg, delete these.
|
||||||
struct AnnoFillLineSeg {
|
struct AnnoFillLineSeg {
|
||||||
p0: [f32; 2],
|
p0: [f32; 2],
|
||||||
p1: [f32; 2],
|
p1: [f32; 2],
|
||||||
|
path_ix: u32,
|
||||||
// A note: the layout of this struct is shared with
|
// A note: the layout of this struct is shared with
|
||||||
// AnnoStrokeLineSeg. In that case, we actually write
|
// AnnoStrokeLineSeg. In that case, we actually write
|
||||||
// [0.0, 0.0] as the stroke field, to minimize divergence.
|
// [0.0, 0.0] as the stroke field, to minimize divergence.
|
||||||
|
@ -13,6 +15,7 @@ piet_gpu! {
|
||||||
struct AnnoStrokeLineSeg {
|
struct AnnoStrokeLineSeg {
|
||||||
p0: [f32; 2],
|
p0: [f32; 2],
|
||||||
p1: [f32; 2],
|
p1: [f32; 2],
|
||||||
|
path_ix: u32,
|
||||||
// halfwidth in both x and y for binning
|
// halfwidth in both x and y for binning
|
||||||
stroke: [f32; 2],
|
stroke: [f32; 2],
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,8 +3,10 @@
|
||||||
pub mod annotated;
|
pub mod annotated;
|
||||||
pub mod bins;
|
pub mod bins;
|
||||||
pub mod encoder;
|
pub mod encoder;
|
||||||
|
pub mod pathseg;
|
||||||
pub mod ptcl;
|
pub mod ptcl;
|
||||||
pub mod scene;
|
pub mod scene;
|
||||||
pub mod state;
|
pub mod state;
|
||||||
pub mod test;
|
pub mod test;
|
||||||
|
pub mod tile;
|
||||||
pub mod tilegroup;
|
pub mod tilegroup;
|
||||||
|
|
|
@ -7,7 +7,9 @@ fn main() {
|
||||||
"scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()),
|
"scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()),
|
||||||
"state" => print!("{}", piet_gpu_types::state::gen_gpu_state()),
|
"state" => print!("{}", piet_gpu_types::state::gen_gpu_state()),
|
||||||
"annotated" => print!("{}", piet_gpu_types::annotated::gen_gpu_annotated()),
|
"annotated" => print!("{}", piet_gpu_types::annotated::gen_gpu_annotated()),
|
||||||
|
"pathseg" => print!("{}", piet_gpu_types::pathseg::gen_gpu_pathseg()),
|
||||||
"bins" => print!("{}", piet_gpu_types::bins::gen_gpu_bins()),
|
"bins" => print!("{}", piet_gpu_types::bins::gen_gpu_bins()),
|
||||||
|
"tile" => print!("{}", piet_gpu_types::tile::gen_gpu_tile()),
|
||||||
"tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()),
|
"tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()),
|
||||||
"ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()),
|
"ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()),
|
||||||
"test" => print!("{}", piet_gpu_types::test::gen_gpu_test()),
|
"test" => print!("{}", piet_gpu_types::test::gen_gpu_test()),
|
||||||
|
|
67
piet-gpu-types/src/pathseg.rs
Normal file
67
piet-gpu-types/src/pathseg.rs
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
use piet_gpu_derive::piet_gpu;
|
||||||
|
|
||||||
|
piet_gpu! {
|
||||||
|
#[gpu_write]
|
||||||
|
mod pathseg {
|
||||||
|
struct PathFillLine {
|
||||||
|
p0: [f32; 2],
|
||||||
|
p1: [f32; 2],
|
||||||
|
path_ix: u32,
|
||||||
|
// A note: the layout of this struct is shared with
|
||||||
|
// PathStrokeLine. In that case, we actually write
|
||||||
|
// [0.0, 0.0] as the stroke field, to minimize divergence.
|
||||||
|
}
|
||||||
|
struct PathStrokeLine {
|
||||||
|
p0: [f32; 2],
|
||||||
|
p1: [f32; 2],
|
||||||
|
path_ix: u32,
|
||||||
|
// halfwidth in both x and y for binning
|
||||||
|
stroke: [f32; 2],
|
||||||
|
}
|
||||||
|
struct PathFillCubic {
|
||||||
|
p0: [f32; 2],
|
||||||
|
p1: [f32; 2],
|
||||||
|
p2: [f32; 2],
|
||||||
|
p3: [f32; 2],
|
||||||
|
path_ix: u32,
|
||||||
|
// A note: the layout of this struct is shared with
|
||||||
|
// PathStrokeCubic. In that case, we actually write
|
||||||
|
// [0.0, 0.0] as the stroke field, to minimize divergence.
|
||||||
|
}
|
||||||
|
struct PathStrokeCubic {
|
||||||
|
p0: [f32; 2],
|
||||||
|
p1: [f32; 2],
|
||||||
|
p2: [f32; 2],
|
||||||
|
p3: [f32; 2],
|
||||||
|
path_ix: u32,
|
||||||
|
// halfwidth in both x and y for binning
|
||||||
|
stroke: [f32; 2],
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
struct PathQuad {
|
||||||
|
p0: [f32; 2],
|
||||||
|
p1: [f32; 2],
|
||||||
|
p2: [f32; 2],
|
||||||
|
stroke: [f32; 2],
|
||||||
|
}
|
||||||
|
struct PathCubic {
|
||||||
|
p0: [f32; 2],
|
||||||
|
p1: [f32; 2],
|
||||||
|
p2: [f32; 2],
|
||||||
|
p3: [f32; 2],
|
||||||
|
stroke: [f32; 2],
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
enum PathSeg {
|
||||||
|
Nop,
|
||||||
|
FillLine(PathFillLine),
|
||||||
|
StrokeLine(PathStrokeLine),
|
||||||
|
FillCubic(PathFillCubic),
|
||||||
|
StrokeCubic(PathStrokeCubic),
|
||||||
|
/*
|
||||||
|
Quad(AnnoQuadSeg),
|
||||||
|
Cubic(AnnoCubicSeg),
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -13,13 +13,15 @@ piet_gpu! {
|
||||||
end: [f32; 2],
|
end: [f32; 2],
|
||||||
}
|
}
|
||||||
struct CmdStroke {
|
struct CmdStroke {
|
||||||
// Consider a specialization to one segment.
|
// This is really a Ref<Tile>, but we don't have cross-module
|
||||||
seg_ref: Ref<SegChunk>,
|
// references.
|
||||||
|
tile_ref: u32,
|
||||||
half_width: f32,
|
half_width: f32,
|
||||||
rgba_color: u32,
|
rgba_color: u32,
|
||||||
}
|
}
|
||||||
struct CmdFill {
|
struct CmdFill {
|
||||||
seg_ref: Ref<SegChunk>,
|
// As above, really Ref<Tile>
|
||||||
|
tile_ref: u32,
|
||||||
backdrop: i32,
|
backdrop: i32,
|
||||||
rgba_color: u32,
|
rgba_color: u32,
|
||||||
}
|
}
|
||||||
|
|
|
@ -92,10 +92,10 @@ piet_gpu! {
|
||||||
StrokeLine(LineSeg),
|
StrokeLine(LineSeg),
|
||||||
FillLine(LineSeg),
|
FillLine(LineSeg),
|
||||||
|
|
||||||
// Note: we'll need to handle the stroke/fill distinction
|
StrokeQuad(QuadSeg),
|
||||||
// for these as well, when we do flattening on the GPU.
|
FillQuad(QuadSeg),
|
||||||
Quad(QuadSeg),
|
StrokeCubic(CubicSeg),
|
||||||
Cubic(CubicSeg),
|
FillCubic(CubicSeg),
|
||||||
Stroke(Stroke),
|
Stroke(Stroke),
|
||||||
Fill(Fill),
|
Fill(Fill),
|
||||||
SetLineWidth(SetLineWidth),
|
SetLineWidth(SetLineWidth),
|
||||||
|
|
|
@ -9,6 +9,8 @@ piet_gpu! {
|
||||||
bbox: [f32; 4],
|
bbox: [f32; 4],
|
||||||
linewidth: f32,
|
linewidth: f32,
|
||||||
flags: u32,
|
flags: u32,
|
||||||
|
path_count: u32,
|
||||||
|
pathseg_count: u32,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
22
piet-gpu-types/src/tile.rs
Normal file
22
piet-gpu-types/src/tile.rs
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
use piet_gpu_derive::piet_gpu;
|
||||||
|
|
||||||
|
piet_gpu! {
|
||||||
|
#[gpu_write]
|
||||||
|
mod tile {
|
||||||
|
struct Path {
|
||||||
|
bbox: [u16; 4],
|
||||||
|
tiles: Ref<Tile>,
|
||||||
|
}
|
||||||
|
struct Tile {
|
||||||
|
tile: Ref<TileSeg>,
|
||||||
|
backdrop: i32,
|
||||||
|
}
|
||||||
|
// Segments within a tile are represented as a linked list.
|
||||||
|
struct TileSeg {
|
||||||
|
start: [f32; 2],
|
||||||
|
end: [f32; 2],
|
||||||
|
y_edge: f32,
|
||||||
|
next: Ref<TileSeg>,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -171,7 +171,7 @@ fn main() -> Result<(), Error> {
|
||||||
|
|
||||||
let fence = device.create_fence(false)?;
|
let fence = device.create_fence(false)?;
|
||||||
let mut cmd_buf = device.create_cmd_buf()?;
|
let mut cmd_buf = device.create_cmd_buf()?;
|
||||||
let query_pool = device.create_query_pool(5)?;
|
let query_pool = device.create_query_pool(8)?;
|
||||||
|
|
||||||
let mut ctx = PietGpuRenderContext::new();
|
let mut ctx = PietGpuRenderContext::new();
|
||||||
if let Some(input) = matches.value_of("INPUT") {
|
if let Some(input) = matches.value_of("INPUT") {
|
||||||
|
@ -185,10 +185,12 @@ fn main() -> Result<(), Error> {
|
||||||
} else {
|
} else {
|
||||||
render_scene(&mut ctx);
|
render_scene(&mut ctx);
|
||||||
}
|
}
|
||||||
|
let n_paths = ctx.path_count();
|
||||||
|
let n_pathseg = ctx.pathseg_count();
|
||||||
let scene = ctx.get_scene_buf();
|
let scene = ctx.get_scene_buf();
|
||||||
//dump_scene(&scene);
|
//dump_scene(&scene);
|
||||||
|
|
||||||
let renderer = Renderer::new(&device, scene)?;
|
let renderer = Renderer::new(&device, scene, n_paths, n_pathseg)?;
|
||||||
let image_buf =
|
let image_buf =
|
||||||
device.create_buffer((WIDTH * HEIGHT * 4) as u64, MemFlags::host_coherent())?;
|
device.create_buffer((WIDTH * HEIGHT * 4) as u64, MemFlags::host_coherent())?;
|
||||||
|
|
||||||
|
@ -200,13 +202,16 @@ fn main() -> Result<(), Error> {
|
||||||
device.wait_and_reset(&[fence])?;
|
device.wait_and_reset(&[fence])?;
|
||||||
let ts = device.reap_query_pool(&query_pool).unwrap();
|
let ts = device.reap_query_pool(&query_pool).unwrap();
|
||||||
println!("Element kernel time: {:.3}ms", ts[0] * 1e3);
|
println!("Element kernel time: {:.3}ms", ts[0] * 1e3);
|
||||||
println!("Binning kernel time: {:.3}ms", (ts[1] - ts[0]) * 1e3);
|
println!("Tile allocation kernel time: {:.3}ms", (ts[1] - ts[0]) * 1e3);
|
||||||
println!("Coarse kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3);
|
println!("Coarse path kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3);
|
||||||
println!("Render kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
|
println!("Backdrop kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
|
||||||
|
println!("Binning kernel time: {:.3}ms", (ts[4] - ts[3]) * 1e3);
|
||||||
|
println!("Coarse raster kernel time: {:.3}ms", (ts[5] - ts[4]) * 1e3);
|
||||||
|
println!("Render kernel time: {:.3}ms", (ts[6] - ts[5]) * 1e3);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
let mut data: Vec<u32> = Default::default();
|
let mut data: Vec<u32> = Default::default();
|
||||||
device.read_buffer(&renderer.ptcl_buf, &mut data).unwrap();
|
device.read_buffer(&renderer.tile_buf, &mut data).unwrap();
|
||||||
piet_gpu::dump_k1_data(&data);
|
piet_gpu::dump_k1_data(&data);
|
||||||
//trace_ptcl(&data);
|
//trace_ptcl(&data);
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -42,9 +42,11 @@ fn main() -> Result<(), Error> {
|
||||||
|
|
||||||
let mut ctx = PietGpuRenderContext::new();
|
let mut ctx = PietGpuRenderContext::new();
|
||||||
render_scene(&mut ctx);
|
render_scene(&mut ctx);
|
||||||
|
let n_paths = ctx.path_count();
|
||||||
|
let n_pathseg = ctx.pathseg_count();
|
||||||
let scene = ctx.get_scene_buf();
|
let scene = ctx.get_scene_buf();
|
||||||
|
|
||||||
let renderer = Renderer::new(&device, scene)?;
|
let renderer = Renderer::new(&device, scene, n_paths, n_pathseg)?;
|
||||||
|
|
||||||
event_loop.run(move |event, _, control_flow| {
|
event_loop.run(move |event, _, control_flow| {
|
||||||
*control_flow = ControlFlow::Poll; // `ControlFlow::Wait` if only re-render on event
|
*control_flow = ControlFlow::Poll; // `ControlFlow::Wait` if only re-render on event
|
||||||
|
|
|
@ -31,9 +31,10 @@ struct AnnotatedRef {
|
||||||
struct AnnoFillLineSeg {
|
struct AnnoFillLineSeg {
|
||||||
vec2 p0;
|
vec2 p0;
|
||||||
vec2 p1;
|
vec2 p1;
|
||||||
|
uint path_ix;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define AnnoFillLineSeg_size 16
|
#define AnnoFillLineSeg_size 20
|
||||||
|
|
||||||
AnnoFillLineSegRef AnnoFillLineSeg_index(AnnoFillLineSegRef ref, uint index) {
|
AnnoFillLineSegRef AnnoFillLineSeg_index(AnnoFillLineSegRef ref, uint index) {
|
||||||
return AnnoFillLineSegRef(ref.offset + index * AnnoFillLineSeg_size);
|
return AnnoFillLineSegRef(ref.offset + index * AnnoFillLineSeg_size);
|
||||||
|
@ -42,10 +43,11 @@ AnnoFillLineSegRef AnnoFillLineSeg_index(AnnoFillLineSegRef ref, uint index) {
|
||||||
struct AnnoStrokeLineSeg {
|
struct AnnoStrokeLineSeg {
|
||||||
vec2 p0;
|
vec2 p0;
|
||||||
vec2 p1;
|
vec2 p1;
|
||||||
|
uint path_ix;
|
||||||
vec2 stroke;
|
vec2 stroke;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define AnnoStrokeLineSeg_size 24
|
#define AnnoStrokeLineSeg_size 28
|
||||||
|
|
||||||
AnnoStrokeLineSegRef AnnoStrokeLineSeg_index(AnnoStrokeLineSegRef ref, uint index) {
|
AnnoStrokeLineSegRef AnnoStrokeLineSeg_index(AnnoStrokeLineSegRef ref, uint index) {
|
||||||
return AnnoStrokeLineSegRef(ref.offset + index * AnnoStrokeLineSeg_size);
|
return AnnoStrokeLineSegRef(ref.offset + index * AnnoStrokeLineSeg_size);
|
||||||
|
@ -120,9 +122,11 @@ AnnoFillLineSeg AnnoFillLineSeg_read(AnnoFillLineSegRef ref) {
|
||||||
uint raw1 = annotated[ix + 1];
|
uint raw1 = annotated[ix + 1];
|
||||||
uint raw2 = annotated[ix + 2];
|
uint raw2 = annotated[ix + 2];
|
||||||
uint raw3 = annotated[ix + 3];
|
uint raw3 = annotated[ix + 3];
|
||||||
|
uint raw4 = annotated[ix + 4];
|
||||||
AnnoFillLineSeg s;
|
AnnoFillLineSeg s;
|
||||||
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||||
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
|
s.path_ix = raw4;
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -132,6 +136,7 @@ void AnnoFillLineSeg_write(AnnoFillLineSegRef ref, AnnoFillLineSeg s) {
|
||||||
annotated[ix + 1] = floatBitsToUint(s.p0.y);
|
annotated[ix + 1] = floatBitsToUint(s.p0.y);
|
||||||
annotated[ix + 2] = floatBitsToUint(s.p1.x);
|
annotated[ix + 2] = floatBitsToUint(s.p1.x);
|
||||||
annotated[ix + 3] = floatBitsToUint(s.p1.y);
|
annotated[ix + 3] = floatBitsToUint(s.p1.y);
|
||||||
|
annotated[ix + 4] = s.path_ix;
|
||||||
}
|
}
|
||||||
|
|
||||||
AnnoStrokeLineSeg AnnoStrokeLineSeg_read(AnnoStrokeLineSegRef ref) {
|
AnnoStrokeLineSeg AnnoStrokeLineSeg_read(AnnoStrokeLineSegRef ref) {
|
||||||
|
@ -142,10 +147,12 @@ AnnoStrokeLineSeg AnnoStrokeLineSeg_read(AnnoStrokeLineSegRef ref) {
|
||||||
uint raw3 = annotated[ix + 3];
|
uint raw3 = annotated[ix + 3];
|
||||||
uint raw4 = annotated[ix + 4];
|
uint raw4 = annotated[ix + 4];
|
||||||
uint raw5 = annotated[ix + 5];
|
uint raw5 = annotated[ix + 5];
|
||||||
|
uint raw6 = annotated[ix + 6];
|
||||||
AnnoStrokeLineSeg s;
|
AnnoStrokeLineSeg s;
|
||||||
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||||
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
s.stroke = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
s.path_ix = raw4;
|
||||||
|
s.stroke = vec2(uintBitsToFloat(raw5), uintBitsToFloat(raw6));
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -155,8 +162,9 @@ void AnnoStrokeLineSeg_write(AnnoStrokeLineSegRef ref, AnnoStrokeLineSeg s) {
|
||||||
annotated[ix + 1] = floatBitsToUint(s.p0.y);
|
annotated[ix + 1] = floatBitsToUint(s.p0.y);
|
||||||
annotated[ix + 2] = floatBitsToUint(s.p1.x);
|
annotated[ix + 2] = floatBitsToUint(s.p1.x);
|
||||||
annotated[ix + 3] = floatBitsToUint(s.p1.y);
|
annotated[ix + 3] = floatBitsToUint(s.p1.y);
|
||||||
annotated[ix + 4] = floatBitsToUint(s.stroke.x);
|
annotated[ix + 4] = s.path_ix;
|
||||||
annotated[ix + 5] = floatBitsToUint(s.stroke.y);
|
annotated[ix + 5] = floatBitsToUint(s.stroke.x);
|
||||||
|
annotated[ix + 6] = floatBitsToUint(s.stroke.y);
|
||||||
}
|
}
|
||||||
|
|
||||||
AnnoQuadSeg AnnoQuadSeg_read(AnnoQuadSegRef ref) {
|
AnnoQuadSeg AnnoQuadSeg_read(AnnoQuadSegRef ref) {
|
||||||
|
|
91
piet-gpu/shader/backdrop.comp
Normal file
91
piet-gpu/shader/backdrop.comp
Normal file
|
@ -0,0 +1,91 @@
|
||||||
|
// Propagation of tile backdrop for filling.
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#include "setup.h"
|
||||||
|
|
||||||
|
#define LG_BACKDROP_WG 8
|
||||||
|
#define BACKDROP_WG (1 << LG_BACKDROP_WG)
|
||||||
|
|
||||||
|
layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
|
||||||
|
|
||||||
|
layout(set = 0, binding = 0) buffer AnnotatedBuf {
|
||||||
|
uint[] annotated;
|
||||||
|
};
|
||||||
|
|
||||||
|
// This is really only used for n_elements; maybe we can handle that
|
||||||
|
// a different way, but it's convenient to have the same signature as
|
||||||
|
// tile allocation.
|
||||||
|
layout(set = 0, binding = 1) buffer AllocBuf {
|
||||||
|
uint n_elements;
|
||||||
|
uint n_pathseg;
|
||||||
|
uint alloc;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(set = 0, binding = 2) buffer TileBuf {
|
||||||
|
uint[] tile;
|
||||||
|
};
|
||||||
|
|
||||||
|
#include "annotated.h"
|
||||||
|
#include "tile.h"
|
||||||
|
|
||||||
|
shared uint sh_row_count[BACKDROP_WG];
|
||||||
|
shared uint sh_row_base[BACKDROP_WG];
|
||||||
|
shared uint sh_row_width[BACKDROP_WG];
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uint th_ix = gl_LocalInvocationID.x;
|
||||||
|
uint element_ix = gl_GlobalInvocationID.x;
|
||||||
|
AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size);
|
||||||
|
|
||||||
|
uint row_count = 0;
|
||||||
|
if (element_ix < n_elements) {
|
||||||
|
uint tag = Annotated_tag(ref);
|
||||||
|
if (tag == Annotated_Fill) {
|
||||||
|
PathRef path_ref = PathRef(element_ix * Path_size);
|
||||||
|
Path path = Path_read(path_ref);
|
||||||
|
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
|
||||||
|
row_count = path.bbox.w - path.bbox.y;
|
||||||
|
if (row_count == 1) {
|
||||||
|
// Note: this can probably be expanded to width = 2 as
|
||||||
|
// long as it doesn't cross the left edge.
|
||||||
|
row_count = 0;
|
||||||
|
}
|
||||||
|
sh_row_base[th_ix] = (path.tiles.offset >> 2) + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sh_row_count[th_ix] = row_count;
|
||||||
|
// Prefix sum of sh_row_count
|
||||||
|
for (uint i = 0; i < LG_BACKDROP_WG; i++) {
|
||||||
|
barrier();
|
||||||
|
if (th_ix >= (1 << i)) {
|
||||||
|
row_count += sh_row_count[th_ix - (1 << i)];
|
||||||
|
}
|
||||||
|
barrier();
|
||||||
|
sh_row_count[th_ix] = row_count;
|
||||||
|
}
|
||||||
|
barrier();
|
||||||
|
uint total_rows = sh_row_count[BACKDROP_WG - 1];
|
||||||
|
for (uint row = th_ix; row < total_rows; row += BACKDROP_WG) {
|
||||||
|
// Binary search to find element
|
||||||
|
uint el_ix = 0;
|
||||||
|
for (uint i = 0; i < LG_BACKDROP_WG; i++) {
|
||||||
|
uint probe = el_ix + ((BACKDROP_WG / 2) >> i);
|
||||||
|
if (row >= sh_row_count[probe - 1]) {
|
||||||
|
el_ix = probe;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
uint seq_ix = row - (el_ix > 0 ? sh_row_count[el_ix - 1] : 0);
|
||||||
|
uint width = sh_row_width[el_ix];
|
||||||
|
// Process one row sequentially
|
||||||
|
uint tile_el_ix = sh_row_base[el_ix] + seq_ix * 2 * width;
|
||||||
|
uint sum = tile[tile_el_ix];
|
||||||
|
for (uint x = 1; x < width; x++) {
|
||||||
|
tile_el_ix += 2;
|
||||||
|
sum += tile[tile_el_ix];
|
||||||
|
tile[tile_el_ix] = sum;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
BIN
piet-gpu/shader/backdrop.spv
Normal file
BIN
piet-gpu/shader/backdrop.spv
Normal file
Binary file not shown.
Binary file not shown.
|
@ -14,6 +14,12 @@ build elements.spv: glsl elements.comp | scene.h state.h annotated.h
|
||||||
|
|
||||||
build binning.spv: glsl binning.comp | annotated.h state.h bins.h setup.h
|
build binning.spv: glsl binning.comp | annotated.h state.h bins.h setup.h
|
||||||
|
|
||||||
|
build tile_alloc.spv: glsl tile_alloc.comp | annotated.h tile.h setup.h
|
||||||
|
|
||||||
|
build path_coarse.spv: glsl path_coarse.comp | annotated.h pathseg.h tile.h setup.h
|
||||||
|
|
||||||
|
build backdrop.spv: glsl backdrop.comp | annotated.h tile.h setup.h
|
||||||
|
|
||||||
build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
|
build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
|
||||||
|
|
||||||
build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h
|
build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h
|
||||||
|
|
|
@ -15,17 +15,22 @@ layout(set = 0, binding = 1) buffer BinsBuf {
|
||||||
uint[] bins;
|
uint[] bins;
|
||||||
};
|
};
|
||||||
|
|
||||||
layout(set = 0, binding = 2) buffer AllocBuf {
|
layout(set = 0, binding = 2) buffer TileBuf {
|
||||||
|
uint[] tile;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(set = 0, binding = 3) buffer AllocBuf {
|
||||||
uint n_elements;
|
uint n_elements;
|
||||||
uint alloc;
|
uint alloc;
|
||||||
};
|
};
|
||||||
|
|
||||||
layout(set = 0, binding = 3) buffer PtclBuf {
|
layout(set = 0, binding = 4) buffer PtclBuf {
|
||||||
uint[] ptcl;
|
uint[] ptcl;
|
||||||
};
|
};
|
||||||
|
|
||||||
#include "annotated.h"
|
#include "annotated.h"
|
||||||
#include "bins.h"
|
#include "bins.h"
|
||||||
|
#include "tile.h"
|
||||||
#include "ptcl.h"
|
#include "ptcl.h"
|
||||||
|
|
||||||
#define LG_N_PART_READ 8
|
#define LG_N_PART_READ 8
|
||||||
|
@ -39,16 +44,16 @@ shared uint sh_part_count[N_PART_READ];
|
||||||
shared uint sh_part_elements[N_PART_READ];
|
shared uint sh_part_elements[N_PART_READ];
|
||||||
|
|
||||||
shared uint sh_bitmaps[N_SLICE][N_TILE];
|
shared uint sh_bitmaps[N_SLICE][N_TILE];
|
||||||
shared uint sh_backdrop[N_SLICE][N_TILE];
|
|
||||||
shared uint sh_bd_sign[N_SLICE];
|
|
||||||
shared uint sh_is_segment[N_SLICE];
|
|
||||||
|
|
||||||
// Shared state for parallel segment output stage
|
shared uint sh_tile_count[N_TILE];
|
||||||
|
// The width of the tile rect for the element, intersected with this bin
|
||||||
|
shared uint sh_tile_width[N_TILE];
|
||||||
|
shared uint sh_tile_x0[N_TILE];
|
||||||
|
shared uint sh_tile_y0[N_TILE];
|
||||||
|
|
||||||
// Count of total number of segments in each tile, then
|
// These are set up so base + tile_y * stride + tile_x points to a Tile.
|
||||||
// inclusive prefix sum of same.
|
shared uint sh_tile_base[N_TILE];
|
||||||
shared uint sh_seg_count[N_TILE];
|
shared uint sh_tile_stride[N_TILE];
|
||||||
shared uint sh_seg_alloc;
|
|
||||||
|
|
||||||
// scale factors useful for converting coordinates to tiles
|
// scale factors useful for converting coordinates to tiles
|
||||||
#define SX (1.0 / float(TILE_WIDTH_PX))
|
#define SX (1.0 / float(TILE_WIDTH_PX))
|
||||||
|
@ -65,30 +70,6 @@ void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#define CHUNK_ALLOC_SLAB 16
|
|
||||||
|
|
||||||
uint alloc_chunk_remaining;
|
|
||||||
uint alloc_chunk_offset;
|
|
||||||
|
|
||||||
SegChunkRef alloc_seg_chunk() {
|
|
||||||
if (alloc_chunk_remaining == 0) {
|
|
||||||
alloc_chunk_offset = atomicAdd(alloc, CHUNK_ALLOC_SLAB * SegChunk_size);
|
|
||||||
alloc_chunk_remaining = CHUNK_ALLOC_SLAB;
|
|
||||||
}
|
|
||||||
uint offset = alloc_chunk_offset;
|
|
||||||
alloc_chunk_offset += SegChunk_size;
|
|
||||||
alloc_chunk_remaining--;
|
|
||||||
return SegChunkRef(offset);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Accumulate delta to backdrop.
|
|
||||||
//
|
|
||||||
// Each bit for which bd_bitmap is 1 and bd_sign is 1 counts as +1, and each
|
|
||||||
// bit for which bd_bitmap is 1 and bd_sign is 0 counts as -1.
|
|
||||||
int count_backdrop(uint bd_bitmap, uint bd_sign) {
|
|
||||||
return bitCount(bd_bitmap & bd_sign) - bitCount(bd_bitmap & ~bd_sign);
|
|
||||||
}
|
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
// Could use either linear or 2d layouts for both dispatch and
|
// Could use either linear or 2d layouts for both dispatch and
|
||||||
// invocations within the workgroup. We'll use variables to abstract.
|
// invocations within the workgroup. We'll use variables to abstract.
|
||||||
|
@ -99,19 +80,15 @@ void main() {
|
||||||
vec2 xy0 = vec2(N_TILE_X * TILE_WIDTH_PX * gl_WorkGroupID.x, N_TILE_Y * TILE_HEIGHT_PX * gl_WorkGroupID.y);
|
vec2 xy0 = vec2(N_TILE_X * TILE_WIDTH_PX * gl_WorkGroupID.x, N_TILE_Y * TILE_HEIGHT_PX * gl_WorkGroupID.y);
|
||||||
uint th_ix = gl_LocalInvocationID.x;
|
uint th_ix = gl_LocalInvocationID.x;
|
||||||
|
|
||||||
uint tile_x = N_TILE_X * gl_WorkGroupID.x + gl_LocalInvocationID.x % N_TILE_X;
|
// Coordinates of top left of bin, in tiles.
|
||||||
uint tile_y = N_TILE_Y * gl_WorkGroupID.y + gl_LocalInvocationID.x / N_TILE_X;
|
uint bin_tile_x = N_TILE_X * gl_WorkGroupID.x;
|
||||||
uint this_tile_ix = tile_y * WIDTH_IN_TILES + tile_x;
|
uint bin_tile_y = N_TILE_Y * gl_WorkGroupID.y;
|
||||||
|
uint tile_x = gl_LocalInvocationID.x % N_TILE_X;
|
||||||
|
uint tile_y = gl_LocalInvocationID.x / N_TILE_X;
|
||||||
|
uint this_tile_ix = (bin_tile_y + tile_y) * WIDTH_IN_TILES + bin_tile_x + tile_x;
|
||||||
CmdRef cmd_ref = CmdRef(this_tile_ix * PTCL_INITIAL_ALLOC);
|
CmdRef cmd_ref = CmdRef(this_tile_ix * PTCL_INITIAL_ALLOC);
|
||||||
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
||||||
|
|
||||||
// Allocation and management of segment output
|
|
||||||
SegChunkRef first_seg_chunk = SegChunkRef(0);
|
|
||||||
SegChunkRef last_chunk_ref = SegChunkRef(0);
|
|
||||||
uint last_chunk_n = 0;
|
|
||||||
SegmentRef last_chunk_segs = SegmentRef(0);
|
|
||||||
alloc_chunk_remaining = 0;
|
|
||||||
|
|
||||||
// I'm sure we can figure out how to do this with at least one fewer register...
|
// I'm sure we can figure out how to do this with at least one fewer register...
|
||||||
// Items up to rd_ix have been read from sh_elements
|
// Items up to rd_ix have been read from sh_elements
|
||||||
uint rd_ix = 0;
|
uint rd_ix = 0;
|
||||||
|
@ -120,17 +97,10 @@ void main() {
|
||||||
// Items between part_start_ix and ready_ix are ready to be transferred from sh_part_elements
|
// Items between part_start_ix and ready_ix are ready to be transferred from sh_part_elements
|
||||||
uint part_start_ix = 0;
|
uint part_start_ix = 0;
|
||||||
uint ready_ix = 0;
|
uint ready_ix = 0;
|
||||||
if (th_ix < N_SLICE) {
|
|
||||||
sh_bd_sign[th_ix] = 0;
|
|
||||||
}
|
|
||||||
int backdrop = 0;
|
int backdrop = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
for (uint i = 0; i < N_SLICE; i++) {
|
for (uint i = 0; i < N_SLICE; i++) {
|
||||||
sh_bitmaps[i][th_ix] = 0;
|
sh_bitmaps[i][th_ix] = 0;
|
||||||
sh_backdrop[i][th_ix] = 0;
|
|
||||||
}
|
|
||||||
if (th_ix < N_SLICE) {
|
|
||||||
sh_is_segment[th_ix] = 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// parallel read of input partitions
|
// parallel read of input partitions
|
||||||
|
@ -188,103 +158,87 @@ void main() {
|
||||||
|
|
||||||
// Read one element, compute coverage.
|
// Read one element, compute coverage.
|
||||||
uint tag = Annotated_Nop;
|
uint tag = Annotated_Nop;
|
||||||
|
uint element_ix;
|
||||||
AnnotatedRef ref;
|
AnnotatedRef ref;
|
||||||
float right_edge = 0.0;
|
float right_edge = 0.0;
|
||||||
if (th_ix + rd_ix < wr_ix) {
|
if (th_ix + rd_ix < wr_ix) {
|
||||||
uint element_ix = sh_elements[th_ix];
|
element_ix = sh_elements[th_ix];
|
||||||
right_edge = sh_right_edge[th_ix];
|
right_edge = sh_right_edge[th_ix];
|
||||||
ref = AnnotatedRef(element_ix * Annotated_size);
|
ref = AnnotatedRef(element_ix * Annotated_size);
|
||||||
tag = Annotated_tag(ref);
|
tag = Annotated_tag(ref);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Setup for coverage algorithm.
|
|
||||||
float a, b, c;
|
|
||||||
// Bounding box of element in pixel coordinates.
|
// Bounding box of element in pixel coordinates.
|
||||||
float xmin, xmax, ymin, ymax;
|
uint tile_count;
|
||||||
uint my_slice = th_ix / 32;
|
|
||||||
uint my_mask = 1 << (th_ix & 31);
|
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
case Annotated_FillLine:
|
|
||||||
case Annotated_StrokeLine:
|
|
||||||
AnnoStrokeLineSeg line = Annotated_StrokeLine_read(ref);
|
|
||||||
xmin = min(line.p0.x, line.p1.x) - line.stroke.x;
|
|
||||||
xmax = max(line.p0.x, line.p1.x) + line.stroke.x;
|
|
||||||
ymin = min(line.p0.y, line.p1.y) - line.stroke.y;
|
|
||||||
ymax = max(line.p0.y, line.p1.y) + line.stroke.y;
|
|
||||||
float dx = line.p1.x - line.p0.x;
|
|
||||||
float dy = line.p1.y - line.p0.y;
|
|
||||||
if (tag == Annotated_FillLine) {
|
|
||||||
// Set bit for backdrop sign calculation, 1 is +1, 0 is -1.
|
|
||||||
if (dy < 0) {
|
|
||||||
atomicOr(sh_bd_sign[my_slice], my_mask);
|
|
||||||
} else {
|
|
||||||
atomicAnd(sh_bd_sign[my_slice], ~my_mask);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
atomicOr(sh_is_segment[my_slice], my_mask);
|
|
||||||
// Set up for per-scanline coverage formula, below.
|
|
||||||
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
|
|
||||||
c = (line.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + line.stroke.y)) * SX;
|
|
||||||
b = invslope; // Note: assumes square tiles, otherwise scale.
|
|
||||||
a = (line.p0.x - xy0.x - (line.p0.y - 0.5 * float(TILE_HEIGHT_PX) - xy0.y) * b) * SX;
|
|
||||||
break;
|
|
||||||
case Annotated_Fill:
|
case Annotated_Fill:
|
||||||
case Annotated_Stroke:
|
case Annotated_Stroke:
|
||||||
// Note: we take advantage of the fact that fills and strokes
|
// Because the only elements we're processing right now are
|
||||||
// have compatible layout.
|
// paths, we can just use the element index as the path index.
|
||||||
AnnoFill fill = Annotated_Fill_read(ref);
|
// In future, when we're doing a bunch of stuff, the path index
|
||||||
xmin = fill.bbox.x;
|
// should probably be stored in the annotated element.
|
||||||
xmax = fill.bbox.z;
|
uint path_ix = element_ix;
|
||||||
ymin = fill.bbox.y;
|
Path path = Path_read(PathRef(path_ix * Path_size));
|
||||||
ymax = fill.bbox.w;
|
uint stride = path.bbox.z - path.bbox.x;
|
||||||
// Just let the clamping to xmin and xmax determine the bounds.
|
sh_tile_stride[th_ix] = stride;
|
||||||
a = 0.0;
|
int dx = int(path.bbox.x) - int(bin_tile_x);
|
||||||
b = 0.0;
|
int dy = int(path.bbox.y) - int(bin_tile_y);
|
||||||
c = 1e9;
|
int x0 = clamp(dx, 0, N_TILE_X);
|
||||||
|
int y0 = clamp(dy, 0, N_TILE_Y);
|
||||||
|
int x1 = clamp(int(path.bbox.z) - int(bin_tile_x), 0, N_TILE_X);
|
||||||
|
int y1 = clamp(int(path.bbox.w) - int(bin_tile_y), 0, N_TILE_Y);
|
||||||
|
sh_tile_width[th_ix] = uint(x1 - x0);
|
||||||
|
sh_tile_x0[th_ix] = x0;
|
||||||
|
sh_tile_y0[th_ix] = y0;
|
||||||
|
tile_count = uint(x1 - x0) * uint(y1 - y0);
|
||||||
|
// base relative to bin
|
||||||
|
uint base = path.tiles.offset - uint(dy * stride + dx) * Tile_size;
|
||||||
|
sh_tile_base[th_ix] = base;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
ymin = 0;
|
tile_count = 0;
|
||||||
ymax = 0;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Draw the coverage area into the bitmasks. This uses an algorithm
|
// Prefix sum of sh_tile_count
|
||||||
// that computes the coverage of a span for given scanline.
|
sh_tile_count[th_ix] = tile_count;
|
||||||
|
for (uint i = 0; i < LG_N_TILE; i++) {
|
||||||
// Compute bounding box in tiles and clip to this bin.
|
barrier();
|
||||||
int x0 = int(floor((xmin - xy0.x) * SX));
|
if (th_ix >= (1 << i)) {
|
||||||
int x1 = int(ceil((xmax - xy0.x) * SX));
|
tile_count += sh_tile_count[th_ix - (1 << i)];
|
||||||
int xr = int(ceil((right_edge - xy0.x) * SX));
|
|
||||||
int y0 = int(floor((ymin - xy0.y) * SY));
|
|
||||||
int y1 = int(ceil((ymax - xy0.y) * SY));
|
|
||||||
x0 = clamp(x0, 0, N_TILE_X);
|
|
||||||
x1 = clamp(x1, x0, N_TILE_X);
|
|
||||||
xr = clamp(xr, 0, N_TILE_X);
|
|
||||||
y0 = clamp(y0, 0, N_TILE_Y);
|
|
||||||
y1 = clamp(y1, y0, N_TILE_Y);
|
|
||||||
float t = a + b * float(y0);
|
|
||||||
for (uint y = y0; y < y1; y++) {
|
|
||||||
uint xx0 = clamp(int(floor(t - c)), x0, x1);
|
|
||||||
uint xx1 = clamp(int(ceil(t + c)), x0, x1);
|
|
||||||
for (uint x = xx0; x < xx1; x++) {
|
|
||||||
atomicOr(sh_bitmaps[my_slice][y * N_TILE_X + x], my_mask);
|
|
||||||
}
|
}
|
||||||
if (tag == Annotated_FillLine && ymin <= xy0.y + float(y * TILE_HEIGHT_PX)) {
|
barrier();
|
||||||
// Assign backdrop to all tiles to the right of the ray crossing the
|
sh_tile_count[th_ix] = tile_count;
|
||||||
// top edge of this tile, up to the right edge of the fill bbox.
|
}
|
||||||
float xray = t - 0.5 * b;
|
barrier();
|
||||||
xx0 = max(int(ceil(xray)), 0);
|
uint total_tile_count = sh_tile_count[N_TILE - 1];
|
||||||
for (uint x = xx0; x < xr; x++) {
|
for (uint ix = th_ix; ix < total_tile_count; ix += N_TILE) {
|
||||||
atomicOr(sh_backdrop[my_slice][y * N_TILE_X + x], my_mask);
|
// Binary search to find element
|
||||||
|
uint el_ix = 0;
|
||||||
|
for (uint i = 0; i < LG_N_TILE; i++) {
|
||||||
|
uint probe = el_ix + ((N_TILE / 2) >> i);
|
||||||
|
if (ix >= sh_tile_count[probe - 1]) {
|
||||||
|
el_ix = probe;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
t += b;
|
uint seq_ix = ix - (el_ix > 0 ? sh_tile_count[el_ix - 1] : 0);
|
||||||
|
uint width = sh_tile_width[el_ix];
|
||||||
|
uint x = sh_tile_x0[el_ix] + seq_ix % width;
|
||||||
|
uint y = sh_tile_y0[el_ix] + seq_ix / width;
|
||||||
|
Tile tile = Tile_read(TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
|
||||||
|
if (tile.tile.offset != 0 || tile.backdrop != 0) {
|
||||||
|
uint el_slice = el_ix / 32;
|
||||||
|
uint el_mask = 1 << (el_ix & 31);
|
||||||
|
atomicOr(sh_bitmaps[el_slice][y * N_TILE_X + x], el_mask);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
barrier();
|
barrier();
|
||||||
|
|
||||||
// We've computed coverage and other info for each element in the input, now for
|
// We've computed coverage and other info for each element in the input, now for
|
||||||
// the output stage. We'll do segments first using a more parallel algorithm.
|
// the output stage. We'll do segments first using a more parallel algorithm.
|
||||||
|
|
||||||
|
/*
|
||||||
uint seg_count = 0;
|
uint seg_count = 0;
|
||||||
for (uint i = 0; i < N_SLICE; i++) {
|
for (uint i = 0; i < N_SLICE; i++) {
|
||||||
seg_count += bitCount(sh_bitmaps[i][th_ix] & sh_is_segment[i]);
|
seg_count += bitCount(sh_bitmaps[i][th_ix] & sh_is_segment[i]);
|
||||||
|
@ -372,45 +326,29 @@ void main() {
|
||||||
Segment seg = Segment(line.p0, line.p1, y_edge);
|
Segment seg = Segment(line.p0, line.p1, y_edge);
|
||||||
Segment_write(SegmentRef(seg_alloc + Segment_size * ix), seg);
|
Segment_write(SegmentRef(seg_alloc + Segment_size * ix), seg);
|
||||||
}
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
// Output non-segment elements for this tile. The thread does a sequential walk
|
// Output non-segment elements for this tile. The thread does a sequential walk
|
||||||
// through the non-segment elements, and for segments, count and backdrop are
|
// through the non-segment elements, and for segments, count and backdrop are
|
||||||
// aggregated using bit counting.
|
// aggregated using bit counting.
|
||||||
uint slice_ix = 0;
|
uint slice_ix = 0;
|
||||||
uint bitmap = sh_bitmaps[0][th_ix];
|
uint bitmap = sh_bitmaps[0][th_ix];
|
||||||
uint bd_bitmap = sh_backdrop[0][th_ix];
|
|
||||||
uint bd_sign = sh_bd_sign[0];
|
|
||||||
uint is_segment = sh_is_segment[0];
|
|
||||||
uint seg_start = th_ix == 0 ? 0 : sh_seg_count[th_ix - 1];
|
|
||||||
seg_count = 0;
|
|
||||||
while (true) {
|
while (true) {
|
||||||
uint nonseg_bitmap = bitmap & ~is_segment;
|
if (bitmap == 0) {
|
||||||
if (nonseg_bitmap == 0) {
|
|
||||||
backdrop += count_backdrop(bd_bitmap, bd_sign);
|
|
||||||
seg_count += bitCount(bitmap & is_segment);
|
|
||||||
slice_ix++;
|
slice_ix++;
|
||||||
if (slice_ix == N_SLICE) {
|
if (slice_ix == N_SLICE) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
bitmap = sh_bitmaps[slice_ix][th_ix];
|
bitmap = sh_bitmaps[slice_ix][th_ix];
|
||||||
bd_bitmap = sh_backdrop[slice_ix][th_ix];
|
if (bitmap == 0) {
|
||||||
bd_sign = sh_bd_sign[slice_ix];
|
|
||||||
is_segment = sh_is_segment[slice_ix];
|
|
||||||
nonseg_bitmap = bitmap & ~is_segment;
|
|
||||||
if (nonseg_bitmap == 0) {
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
uint element_ref_ix = slice_ix * 32 + findLSB(nonseg_bitmap);
|
uint element_ref_ix = slice_ix * 32 + findLSB(bitmap);
|
||||||
uint element_ix = sh_elements[element_ref_ix];
|
uint element_ix = sh_elements[element_ref_ix];
|
||||||
|
|
||||||
// Bits up to and including the lsb
|
// Clear LSB
|
||||||
uint bd_mask = (nonseg_bitmap - 1) ^ nonseg_bitmap;
|
bitmap &= bitmap - 1;
|
||||||
backdrop += count_backdrop(bd_bitmap & bd_mask, bd_sign);
|
|
||||||
seg_count += bitCount(bitmap & bd_mask & is_segment);
|
|
||||||
// Clear bits that have been consumed.
|
|
||||||
bd_bitmap &= ~bd_mask;
|
|
||||||
bitmap &= ~bd_mask;
|
|
||||||
|
|
||||||
// At this point, we read the element again from global memory.
|
// At this point, we read the element again from global memory.
|
||||||
// If that turns out to be expensive, maybe we can pack it into
|
// If that turns out to be expensive, maybe we can pack it into
|
||||||
|
@ -420,103 +358,36 @@ void main() {
|
||||||
|
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
case Annotated_Fill:
|
case Annotated_Fill:
|
||||||
if (last_chunk_n > 0 || seg_count > 0) {
|
Tile tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
|
||||||
SegChunkRef chunk_ref = SegChunkRef(0);
|
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||||
if (seg_count > 0) {
|
AnnoFill fill = Annotated_Fill_read(ref);
|
||||||
chunk_ref = alloc_seg_chunk();
|
alloc_cmd(cmd_ref, cmd_limit);
|
||||||
SegChunk chunk;
|
if (tile.tile.offset != 0) {
|
||||||
chunk.n = seg_count;
|
|
||||||
chunk.next = SegChunkRef(0);
|
|
||||||
uint seg_offset = seg_alloc + seg_start * Segment_size;
|
|
||||||
chunk.segs = SegmentRef(seg_offset);
|
|
||||||
SegChunk_write(chunk_ref, chunk);
|
|
||||||
}
|
|
||||||
if (last_chunk_n > 0) {
|
|
||||||
SegChunk chunk;
|
|
||||||
chunk.n = last_chunk_n;
|
|
||||||
chunk.next = chunk_ref;
|
|
||||||
chunk.segs = last_chunk_segs;
|
|
||||||
SegChunk_write(last_chunk_ref, chunk);
|
|
||||||
} else {
|
|
||||||
first_seg_chunk = chunk_ref;
|
|
||||||
}
|
|
||||||
|
|
||||||
AnnoFill fill = Annotated_Fill_read(ref);
|
|
||||||
CmdFill cmd_fill;
|
CmdFill cmd_fill;
|
||||||
cmd_fill.seg_ref = first_seg_chunk;
|
cmd_fill.tile_ref = tile.tile.offset;
|
||||||
cmd_fill.backdrop = backdrop;
|
cmd_fill.backdrop = tile.backdrop;
|
||||||
cmd_fill.rgba_color = fill.rgba_color;
|
cmd_fill.rgba_color = fill.rgba_color;
|
||||||
alloc_cmd(cmd_ref, cmd_limit);
|
|
||||||
Cmd_Fill_write(cmd_ref, cmd_fill);
|
Cmd_Fill_write(cmd_ref, cmd_fill);
|
||||||
cmd_ref.offset += Cmd_size;
|
} else {
|
||||||
last_chunk_n = 0;
|
|
||||||
} else if (backdrop != 0) {
|
|
||||||
AnnoFill fill = Annotated_Fill_read(ref);
|
AnnoFill fill = Annotated_Fill_read(ref);
|
||||||
alloc_cmd(cmd_ref, cmd_limit);
|
|
||||||
Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
|
Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
|
||||||
cmd_ref.offset += Cmd_size;
|
|
||||||
}
|
}
|
||||||
seg_start += seg_count;
|
cmd_ref.offset += Cmd_size;
|
||||||
seg_count = 0;
|
|
||||||
backdrop = 0;
|
|
||||||
break;
|
break;
|
||||||
case Annotated_Stroke:
|
case Annotated_Stroke:
|
||||||
// TODO: reduce divergence & code duplication? Much of the
|
tile = Tile_read(TileRef(sh_tile_base[element_ref_ix]
|
||||||
// fill and stroke processing is in common.
|
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||||
if (last_chunk_n > 0 || seg_count > 0) {
|
AnnoStroke stroke = Annotated_Stroke_read(ref);
|
||||||
SegChunkRef chunk_ref = SegChunkRef(0);
|
CmdStroke cmd_stroke;
|
||||||
if (seg_count > 0) {
|
cmd_stroke.tile_ref = tile.tile.offset;
|
||||||
chunk_ref = alloc_seg_chunk();
|
cmd_stroke.half_width = 0.5 * stroke.linewidth;
|
||||||
SegChunk chunk;
|
cmd_stroke.rgba_color = stroke.rgba_color;
|
||||||
chunk.n = seg_count;
|
alloc_cmd(cmd_ref, cmd_limit);
|
||||||
chunk.next = SegChunkRef(0);
|
Cmd_Stroke_write(cmd_ref, cmd_stroke);
|
||||||
uint seg_offset = seg_alloc + seg_start * Segment_size;
|
cmd_ref.offset += Cmd_size;
|
||||||
chunk.segs = SegmentRef(seg_offset);
|
|
||||||
SegChunk_write(chunk_ref, chunk);
|
|
||||||
}
|
|
||||||
if (last_chunk_n > 0) {
|
|
||||||
SegChunk chunk;
|
|
||||||
chunk.n = last_chunk_n;
|
|
||||||
chunk.next = chunk_ref;
|
|
||||||
chunk.segs = last_chunk_segs;
|
|
||||||
SegChunk_write(last_chunk_ref, chunk);
|
|
||||||
} else {
|
|
||||||
first_seg_chunk = chunk_ref;
|
|
||||||
}
|
|
||||||
|
|
||||||
AnnoStroke stroke = Annotated_Stroke_read(ref);
|
|
||||||
CmdStroke cmd_stroke;
|
|
||||||
cmd_stroke.seg_ref = first_seg_chunk;
|
|
||||||
cmd_stroke.half_width = 0.5 * stroke.linewidth;
|
|
||||||
cmd_stroke.rgba_color = stroke.rgba_color;
|
|
||||||
alloc_cmd(cmd_ref, cmd_limit);
|
|
||||||
Cmd_Stroke_write(cmd_ref, cmd_stroke);
|
|
||||||
cmd_ref.offset += Cmd_size;
|
|
||||||
last_chunk_n = 0;
|
|
||||||
}
|
|
||||||
seg_start += seg_count;
|
|
||||||
seg_count = 0;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
// This shouldn't happen, but just in case.
|
|
||||||
seg_start++;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (seg_count > 0) {
|
|
||||||
SegChunkRef chunk_ref = alloc_seg_chunk();
|
|
||||||
if (last_chunk_n > 0) {
|
|
||||||
SegChunk_write(last_chunk_ref, SegChunk(last_chunk_n, chunk_ref, last_chunk_segs));
|
|
||||||
} else {
|
|
||||||
first_seg_chunk = chunk_ref;
|
|
||||||
}
|
|
||||||
// TODO: free two registers by writing count and segments ref now,
|
|
||||||
// as opposed to deferring SegChunk write until all fields are known.
|
|
||||||
last_chunk_ref = chunk_ref;
|
|
||||||
last_chunk_n = seg_count;
|
|
||||||
uint seg_offset = seg_alloc + seg_start * Segment_size;
|
|
||||||
last_chunk_segs = SegmentRef(seg_offset);
|
|
||||||
}
|
|
||||||
barrier();
|
barrier();
|
||||||
|
|
||||||
rd_ix += N_TILE;
|
rd_ix += N_TILE;
|
||||||
|
|
Binary file not shown.
|
@ -30,9 +30,15 @@ layout(set = 0, binding = 2) buffer AnnotatedBuf {
|
||||||
uint[] annotated;
|
uint[] annotated;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Path segments are stored here.
|
||||||
|
layout(set = 0, binding = 3) buffer PathSegBuf {
|
||||||
|
uint[] pathseg;
|
||||||
|
};
|
||||||
|
|
||||||
#include "scene.h"
|
#include "scene.h"
|
||||||
#include "state.h"
|
#include "state.h"
|
||||||
#include "annotated.h"
|
#include "annotated.h"
|
||||||
|
#include "pathseg.h"
|
||||||
|
|
||||||
#define StateBuf_stride (8 + 2 * State_size)
|
#define StateBuf_stride (8 + 2 * State_size)
|
||||||
|
|
||||||
|
@ -83,6 +89,8 @@ State combine_state(State a, State b) {
|
||||||
c.linewidth = (b.flags & FLAG_SET_LINEWIDTH) == 0 ? a.linewidth : b.linewidth;
|
c.linewidth = (b.flags & FLAG_SET_LINEWIDTH) == 0 ? a.linewidth : b.linewidth;
|
||||||
c.flags = (a.flags & (FLAG_SET_LINEWIDTH | FLAG_SET_BBOX)) | b.flags;
|
c.flags = (a.flags & (FLAG_SET_LINEWIDTH | FLAG_SET_BBOX)) | b.flags;
|
||||||
c.flags |= (a.flags & FLAG_RESET_BBOX) >> 1;
|
c.flags |= (a.flags & FLAG_RESET_BBOX) >> 1;
|
||||||
|
c.path_count = a.path_count + b.path_count;
|
||||||
|
c.pathseg_count = a.pathseg_count + b.pathseg_count;
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -96,6 +104,8 @@ State map_element(ElementRef ref, inout bool is_fill) {
|
||||||
c.translate = vec2(0.0, 0.0);
|
c.translate = vec2(0.0, 0.0);
|
||||||
c.linewidth = 1.0; // TODO should be 0.0
|
c.linewidth = 1.0; // TODO should be 0.0
|
||||||
c.flags = 0;
|
c.flags = 0;
|
||||||
|
c.path_count = 0;
|
||||||
|
c.pathseg_count = 0;
|
||||||
is_fill = false;
|
is_fill = false;
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
case Element_FillLine:
|
case Element_FillLine:
|
||||||
|
@ -103,22 +113,28 @@ State map_element(ElementRef ref, inout bool is_fill) {
|
||||||
LineSeg line = Element_FillLine_read(ref);
|
LineSeg line = Element_FillLine_read(ref);
|
||||||
c.bbox.xy = min(line.p0, line.p1);
|
c.bbox.xy = min(line.p0, line.p1);
|
||||||
c.bbox.zw = max(line.p0, line.p1);
|
c.bbox.zw = max(line.p0, line.p1);
|
||||||
|
c.pathseg_count = 1;
|
||||||
break;
|
break;
|
||||||
case Element_Quad:
|
case Element_FillQuad:
|
||||||
QuadSeg quad = Element_Quad_read(ref);
|
case Element_StrokeQuad:
|
||||||
|
QuadSeg quad = Element_FillQuad_read(ref);
|
||||||
c.bbox.xy = min(min(quad.p0, quad.p1), quad.p2);
|
c.bbox.xy = min(min(quad.p0, quad.p1), quad.p2);
|
||||||
c.bbox.zw = max(max(quad.p0, quad.p1), quad.p2);
|
c.bbox.zw = max(max(quad.p0, quad.p1), quad.p2);
|
||||||
|
c.pathseg_count = 1;
|
||||||
break;
|
break;
|
||||||
case Element_Cubic:
|
case Element_FillCubic:
|
||||||
CubicSeg cubic = Element_Cubic_read(ref);
|
case Element_StrokeCubic:
|
||||||
|
CubicSeg cubic = Element_FillCubic_read(ref);
|
||||||
c.bbox.xy = min(min(cubic.p0, cubic.p1), min(cubic.p2, cubic.p3));
|
c.bbox.xy = min(min(cubic.p0, cubic.p1), min(cubic.p2, cubic.p3));
|
||||||
c.bbox.zw = max(max(cubic.p0, cubic.p1), max(cubic.p2, cubic.p3));
|
c.bbox.zw = max(max(cubic.p0, cubic.p1), max(cubic.p2, cubic.p3));
|
||||||
|
c.pathseg_count = 1;
|
||||||
break;
|
break;
|
||||||
case Element_Fill:
|
case Element_Fill:
|
||||||
is_fill = true;
|
is_fill = true;
|
||||||
// fall-through
|
// fall-through
|
||||||
case Element_Stroke:
|
case Element_Stroke:
|
||||||
c.flags = FLAG_RESET_BBOX;
|
c.flags = FLAG_RESET_BBOX;
|
||||||
|
c.path_count = 1;
|
||||||
break;
|
break;
|
||||||
case Element_SetLineWidth:
|
case Element_SetLineWidth:
|
||||||
SetLineWidth lw = Element_SetLineWidth_read(ref);
|
SetLineWidth lw = Element_SetLineWidth_read(ref);
|
||||||
|
@ -148,6 +164,8 @@ shared vec2 sh_translate[WG_SIZE];
|
||||||
shared vec4 sh_bbox[WG_SIZE];
|
shared vec4 sh_bbox[WG_SIZE];
|
||||||
shared float sh_width[WG_SIZE];
|
shared float sh_width[WG_SIZE];
|
||||||
shared uint sh_flags[WG_SIZE];
|
shared uint sh_flags[WG_SIZE];
|
||||||
|
shared uint sh_path_count[WG_SIZE];
|
||||||
|
shared uint sh_pathseg_count[WG_SIZE];
|
||||||
|
|
||||||
shared uint sh_min_fill;
|
shared uint sh_min_fill;
|
||||||
|
|
||||||
|
@ -187,6 +205,8 @@ void main() {
|
||||||
sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
|
sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
|
||||||
sh_width[gl_LocalInvocationID.x] = agg.linewidth;
|
sh_width[gl_LocalInvocationID.x] = agg.linewidth;
|
||||||
sh_flags[gl_LocalInvocationID.x] = agg.flags;
|
sh_flags[gl_LocalInvocationID.x] = agg.flags;
|
||||||
|
sh_path_count[gl_LocalInvocationID.x] = agg.path_count;
|
||||||
|
sh_pathseg_count[gl_LocalInvocationID.x] = agg.pathseg_count;
|
||||||
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||||
barrier();
|
barrier();
|
||||||
if (gl_LocalInvocationID.x >= (1 << i)) {
|
if (gl_LocalInvocationID.x >= (1 << i)) {
|
||||||
|
@ -197,6 +217,8 @@ void main() {
|
||||||
other.bbox = sh_bbox[ix];
|
other.bbox = sh_bbox[ix];
|
||||||
other.linewidth = sh_width[ix];
|
other.linewidth = sh_width[ix];
|
||||||
other.flags = sh_flags[ix];
|
other.flags = sh_flags[ix];
|
||||||
|
other.path_count = sh_path_count[ix];
|
||||||
|
other.pathseg_count = sh_pathseg_count[ix];
|
||||||
agg = combine_state(other, agg);
|
agg = combine_state(other, agg);
|
||||||
}
|
}
|
||||||
barrier();
|
barrier();
|
||||||
|
@ -205,6 +227,8 @@ void main() {
|
||||||
sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
|
sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
|
||||||
sh_width[gl_LocalInvocationID.x] = agg.linewidth;
|
sh_width[gl_LocalInvocationID.x] = agg.linewidth;
|
||||||
sh_flags[gl_LocalInvocationID.x] = agg.flags;
|
sh_flags[gl_LocalInvocationID.x] = agg.flags;
|
||||||
|
sh_path_count[gl_LocalInvocationID.x] = agg.path_count;
|
||||||
|
sh_pathseg_count[gl_LocalInvocationID.x] = agg.pathseg_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
State exclusive;
|
State exclusive;
|
||||||
|
@ -213,6 +237,8 @@ void main() {
|
||||||
exclusive.translate = vec2(0.0, 0.0);
|
exclusive.translate = vec2(0.0, 0.0);
|
||||||
exclusive.linewidth = 1.0; //TODO should be 0.0
|
exclusive.linewidth = 1.0; //TODO should be 0.0
|
||||||
exclusive.flags = 0;
|
exclusive.flags = 0;
|
||||||
|
exclusive.path_count = 0;
|
||||||
|
exclusive.pathseg_count = 0;
|
||||||
|
|
||||||
// Publish aggregate for this partition
|
// Publish aggregate for this partition
|
||||||
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
|
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
|
||||||
|
@ -266,6 +292,8 @@ void main() {
|
||||||
other.bbox = sh_bbox[ix];
|
other.bbox = sh_bbox[ix];
|
||||||
other.linewidth = sh_width[ix];
|
other.linewidth = sh_width[ix];
|
||||||
other.flags = sh_flags[ix];
|
other.flags = sh_flags[ix];
|
||||||
|
other.path_count = sh_path_count[ix];
|
||||||
|
other.pathseg_count = sh_pathseg_count[ix];
|
||||||
row = combine_state(row, other);
|
row = combine_state(row, other);
|
||||||
}
|
}
|
||||||
if (my_min_fill == ~0 && gl_LocalInvocationID.x == 0) {
|
if (my_min_fill == ~0 && gl_LocalInvocationID.x == 0) {
|
||||||
|
@ -284,25 +312,75 @@ void main() {
|
||||||
// gains to be had from stashing in shared memory or possibly
|
// gains to be had from stashing in shared memory or possibly
|
||||||
// registers (though register pressure is an issue).
|
// registers (though register pressure is an issue).
|
||||||
ElementRef this_ref = Element_index(ref, i);
|
ElementRef this_ref = Element_index(ref, i);
|
||||||
AnnotatedRef out_ref = AnnotatedRef((ix + i) * Annotated_size);
|
|
||||||
uint tag = Element_tag(this_ref);
|
uint tag = Element_tag(this_ref);
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
case Element_FillLine:
|
case Element_FillLine:
|
||||||
case Element_StrokeLine:
|
case Element_StrokeLine:
|
||||||
LineSeg line = Element_StrokeLine_read(this_ref);
|
LineSeg line = Element_StrokeLine_read(this_ref);
|
||||||
AnnoStrokeLineSeg anno_line;
|
vec2 p0 = st.mat.xy * line.p0.x + st.mat.zw * line.p0.y + st.translate;
|
||||||
anno_line.p0 = st.mat.xy * line.p0.x + st.mat.zw * line.p0.y + st.translate;
|
vec2 p1 = st.mat.xy * line.p1.x + st.mat.zw * line.p1.y + st.translate;
|
||||||
anno_line.p1 = st.mat.xy * line.p1.x + st.mat.zw * line.p1.y + st.translate;
|
PathStrokeCubic path_cubic;
|
||||||
|
path_cubic.p0 = p0;
|
||||||
|
path_cubic.p1 = mix(p0, p1, 1.0 / 3.0);
|
||||||
|
path_cubic.p2 = mix(p1, p0, 1.0 / 3.0);
|
||||||
|
path_cubic.p3 = p1;
|
||||||
|
path_cubic.path_ix = st.path_count;
|
||||||
if (tag == Element_StrokeLine) {
|
if (tag == Element_StrokeLine) {
|
||||||
anno_line.stroke = get_linewidth(st);
|
path_cubic.stroke = get_linewidth(st);
|
||||||
} else {
|
} else {
|
||||||
anno_line.stroke = vec2(0.0);
|
path_cubic.stroke = vec2(0.0);
|
||||||
}
|
}
|
||||||
// We do encoding a bit by hand to minimize divergence. Another approach
|
// We do encoding a bit by hand to minimize divergence. Another approach
|
||||||
// would be to have a fill/stroke bool.
|
// would be to have a fill/stroke bool.
|
||||||
uint out_tag = tag == Element_FillLine ? Annotated_FillLine : Annotated_StrokeLine;
|
PathSegRef path_out_ref = PathSegRef((st.pathseg_count - 1) * PathSeg_size);
|
||||||
annotated[out_ref.offset >> 2] = out_tag;
|
uint out_tag = tag == Element_FillLine ? PathSeg_FillCubic : PathSeg_StrokeCubic;
|
||||||
AnnoStrokeLineSeg_write(AnnoStrokeLineSegRef(out_ref.offset + 4), anno_line);
|
pathseg[path_out_ref.offset >> 2] = out_tag;
|
||||||
|
PathStrokeCubic_write(PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic);
|
||||||
|
break;
|
||||||
|
case Element_FillQuad:
|
||||||
|
case Element_StrokeQuad:
|
||||||
|
QuadSeg quad = Element_StrokeQuad_read(this_ref);
|
||||||
|
p0 = st.mat.xy * quad.p0.x + st.mat.zw * quad.p0.y + st.translate;
|
||||||
|
p1 = st.mat.xy * quad.p1.x + st.mat.zw * quad.p1.y + st.translate;
|
||||||
|
vec2 p2 = st.mat.xy * quad.p2.x + st.mat.zw * quad.p2.y + st.translate;
|
||||||
|
path_cubic;
|
||||||
|
path_cubic.p0 = p0;
|
||||||
|
path_cubic.p1 = mix(p1, p0, 1.0 / 3.0);
|
||||||
|
path_cubic.p2 = mix(p1, p2, 1.0 / 3.0);
|
||||||
|
path_cubic.p3 = p2;
|
||||||
|
path_cubic.path_ix = st.path_count;
|
||||||
|
if (tag == Element_StrokeQuad) {
|
||||||
|
path_cubic.stroke = get_linewidth(st);
|
||||||
|
} else {
|
||||||
|
path_cubic.stroke = vec2(0.0);
|
||||||
|
}
|
||||||
|
// We do encoding a bit by hand to minimize divergence. Another approach
|
||||||
|
// would be to have a fill/stroke bool.
|
||||||
|
path_out_ref = PathSegRef((st.pathseg_count - 1) * PathSeg_size);
|
||||||
|
out_tag = tag == Element_FillQuad ? PathSeg_FillCubic : PathSeg_StrokeCubic;
|
||||||
|
pathseg[path_out_ref.offset >> 2] = out_tag;
|
||||||
|
PathStrokeCubic_write(PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic);
|
||||||
|
break;
|
||||||
|
case Element_FillCubic:
|
||||||
|
case Element_StrokeCubic:
|
||||||
|
CubicSeg cubic = Element_StrokeCubic_read(this_ref);
|
||||||
|
path_cubic;
|
||||||
|
path_cubic.p0 = st.mat.xy * cubic.p0.x + st.mat.zw * cubic.p0.y + st.translate;
|
||||||
|
path_cubic.p1 = st.mat.xy * cubic.p1.x + st.mat.zw * cubic.p1.y + st.translate;
|
||||||
|
path_cubic.p2 = st.mat.xy * cubic.p2.x + st.mat.zw * cubic.p2.y + st.translate;
|
||||||
|
path_cubic.p3 = st.mat.xy * cubic.p3.x + st.mat.zw * cubic.p3.y + st.translate;
|
||||||
|
path_cubic.path_ix = st.path_count;
|
||||||
|
if (tag == Element_StrokeCubic) {
|
||||||
|
path_cubic.stroke = get_linewidth(st);
|
||||||
|
} else {
|
||||||
|
path_cubic.stroke = vec2(0.0);
|
||||||
|
}
|
||||||
|
// We do encoding a bit by hand to minimize divergence. Another approach
|
||||||
|
// would be to have a fill/stroke bool.
|
||||||
|
path_out_ref = PathSegRef((st.pathseg_count - 1) * PathSeg_size);
|
||||||
|
out_tag = tag == Element_FillCubic ? PathSeg_FillCubic : PathSeg_StrokeCubic;
|
||||||
|
pathseg[path_out_ref.offset >> 2] = out_tag;
|
||||||
|
PathStrokeCubic_write(PathStrokeCubicRef(path_out_ref.offset + 4), path_cubic);
|
||||||
break;
|
break;
|
||||||
case Element_Stroke:
|
case Element_Stroke:
|
||||||
Stroke stroke = Element_Stroke_read(this_ref);
|
Stroke stroke = Element_Stroke_read(this_ref);
|
||||||
|
@ -311,6 +389,7 @@ void main() {
|
||||||
vec2 lw = get_linewidth(st);
|
vec2 lw = get_linewidth(st);
|
||||||
anno_stroke.bbox = st.bbox + vec4(-lw, lw);
|
anno_stroke.bbox = st.bbox + vec4(-lw, lw);
|
||||||
anno_stroke.linewidth = st.linewidth * sqrt(st.mat.x * st.mat.w - st.mat.y * st.mat.z);
|
anno_stroke.linewidth = st.linewidth * sqrt(st.mat.x * st.mat.w - st.mat.y * st.mat.z);
|
||||||
|
AnnotatedRef out_ref = AnnotatedRef((st.path_count - 1) * Annotated_size);
|
||||||
Annotated_Stroke_write(out_ref, anno_stroke);
|
Annotated_Stroke_write(out_ref, anno_stroke);
|
||||||
break;
|
break;
|
||||||
case Element_Fill:
|
case Element_Fill:
|
||||||
|
@ -318,11 +397,9 @@ void main() {
|
||||||
AnnoFill anno_fill;
|
AnnoFill anno_fill;
|
||||||
anno_fill.rgba_color = fill.rgba_color;
|
anno_fill.rgba_color = fill.rgba_color;
|
||||||
anno_fill.bbox = st.bbox;
|
anno_fill.bbox = st.bbox;
|
||||||
|
out_ref = AnnotatedRef((st.path_count - 1) * Annotated_size);
|
||||||
Annotated_Fill_write(out_ref, anno_fill);
|
Annotated_Fill_write(out_ref, anno_fill);
|
||||||
break;
|
break;
|
||||||
default:
|
|
||||||
Annotated_Nop_write(out_ref);
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
|
@ -17,9 +17,14 @@ layout(set = 0, binding = 0) buffer PtclBuf {
|
||||||
uint[] ptcl;
|
uint[] ptcl;
|
||||||
};
|
};
|
||||||
|
|
||||||
layout(rgba8, set = 0, binding = 1) uniform writeonly image2D image;
|
layout(set = 0, binding = 1) buffer TileBuf {
|
||||||
|
uint[] tile;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image;
|
||||||
|
|
||||||
#include "ptcl.h"
|
#include "ptcl.h"
|
||||||
|
#include "tile.h"
|
||||||
|
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
|
@ -57,22 +62,18 @@ void main() {
|
||||||
CmdStroke stroke = Cmd_Stroke_read(cmd_ref);
|
CmdStroke stroke = Cmd_Stroke_read(cmd_ref);
|
||||||
float df[CHUNK];
|
float df[CHUNK];
|
||||||
for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
|
for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
|
||||||
SegChunkRef seg_chunk_ref = stroke.seg_ref;
|
TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
|
||||||
do {
|
do {
|
||||||
SegChunk seg_chunk = SegChunk_read(seg_chunk_ref);
|
TileSeg seg = TileSeg_read(tile_seg_ref);
|
||||||
SegmentRef segs = seg_chunk.segs;
|
vec2 line_vec = seg.end - seg.start;
|
||||||
for (int i = 0; i < seg_chunk.n; i++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
Segment seg = Segment_read(Segment_index(segs, i));
|
vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
|
||||||
vec2 line_vec = seg.end - seg.start;
|
dpos.y += float(k * CHUNK_DY);
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
|
||||||
vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
|
df[k] = min(df[k], length(line_vec * t - dpos));
|
||||||
dpos.y += float(k * CHUNK_DY);
|
|
||||||
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
|
|
||||||
df[k] = min(df[k], length(line_vec * t - dpos));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
seg_chunk_ref = seg_chunk.next;
|
tile_seg_ref = seg.next;
|
||||||
} while (seg_chunk_ref.offset != 0);
|
} while (tile_seg_ref.offset != 0);
|
||||||
fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx;
|
fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx;
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
float alpha = clamp(stroke.half_width + 0.5 - df[k], 0.0, 1.0);
|
float alpha = clamp(stroke.half_width + 0.5 - df[k], 0.0, 1.0);
|
||||||
|
@ -84,33 +85,29 @@ void main() {
|
||||||
// Probably better to store as float, but conversion is no doubt cheap.
|
// Probably better to store as float, but conversion is no doubt cheap.
|
||||||
float area[CHUNK];
|
float area[CHUNK];
|
||||||
for (uint k = 0; k < CHUNK; k++) area[k] = float(fill.backdrop);
|
for (uint k = 0; k < CHUNK; k++) area[k] = float(fill.backdrop);
|
||||||
SegChunkRef fill_seg_chunk_ref = fill.seg_ref;
|
tile_seg_ref = TileSegRef(fill.tile_ref);
|
||||||
do {
|
do {
|
||||||
SegChunk seg_chunk = SegChunk_read(fill_seg_chunk_ref);
|
TileSeg seg = TileSeg_read(tile_seg_ref);
|
||||||
SegmentRef segs = seg_chunk.segs;
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
for (int i = 0; i < seg_chunk.n; i++) {
|
vec2 my_xy = vec2(xy.x, xy.y + float(k * CHUNK_DY));
|
||||||
Segment seg = Segment_read(Segment_index(segs, i));
|
vec2 start = seg.start - my_xy;
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
vec2 end = seg.end - my_xy;
|
||||||
vec2 my_xy = vec2(xy.x, xy.y + float(k * CHUNK_DY));
|
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
|
||||||
vec2 start = seg.start - my_xy;
|
if (window.x != window.y) {
|
||||||
vec2 end = seg.end - my_xy;
|
vec2 t = (window - start.y) / (end.y - start.y);
|
||||||
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
|
vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));
|
||||||
if (window.x != window.y) {
|
float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6;
|
||||||
vec2 t = (window - start.y) / (end.y - start.y);
|
float xmax = max(xs.x, xs.y);
|
||||||
vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));
|
float b = min(xmax, 1.0);
|
||||||
float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6;
|
float c = max(b, 0.0);
|
||||||
float xmax = max(xs.x, xs.y);
|
float d = max(xmin, 0.0);
|
||||||
float b = min(xmax, 1.0);
|
float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
|
||||||
float c = max(b, 0.0);
|
area[k] += a * (window.x - window.y);
|
||||||
float d = max(xmin, 0.0);
|
|
||||||
float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
|
|
||||||
area[k] += a * (window.x - window.y);
|
|
||||||
}
|
|
||||||
area[k] += sign(end.x - start.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0);
|
|
||||||
}
|
}
|
||||||
|
area[k] += sign(end.x - start.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0);
|
||||||
}
|
}
|
||||||
fill_seg_chunk_ref = seg_chunk.next;
|
tile_seg_ref = seg.next;
|
||||||
} while (fill_seg_chunk_ref.offset != 0);
|
} while (tile_seg_ref.offset != 0);
|
||||||
fg_rgba = unpackUnorm4x8(fill.rgba_color).wzyx;
|
fg_rgba = unpackUnorm4x8(fill.rgba_color).wzyx;
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
float alpha = min(abs(area[k]), 1.0);
|
float alpha = min(abs(area[k]), 1.0);
|
||||||
|
|
Binary file not shown.
265
piet-gpu/shader/path_coarse.comp
Normal file
265
piet-gpu/shader/path_coarse.comp
Normal file
|
@ -0,0 +1,265 @@
|
||||||
|
// Coarse rasterization of path segments.
|
||||||
|
|
||||||
|
// Allocation and initialization of tiles for paths.
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#include "setup.h"
|
||||||
|
|
||||||
|
#define LG_COARSE_WG 5
|
||||||
|
#define COARSE_WG (1 << LG_COARSE_WG)
|
||||||
|
|
||||||
|
layout(local_size_x = COARSE_WG, local_size_y = 1) in;
|
||||||
|
|
||||||
|
layout(set = 0, binding = 0) buffer PathSegBuf {
|
||||||
|
uint[] pathseg;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(set = 0, binding = 1) buffer AllocBuf {
|
||||||
|
uint n_paths;
|
||||||
|
uint n_pathseg;
|
||||||
|
uint alloc;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(set = 0, binding = 2) buffer TileBuf {
|
||||||
|
uint[] tile;
|
||||||
|
};
|
||||||
|
|
||||||
|
#include "pathseg.h"
|
||||||
|
#include "tile.h"
|
||||||
|
|
||||||
|
// scale factors useful for converting coordinates to tiles
|
||||||
|
#define SX (1.0 / float(TILE_WIDTH_PX))
|
||||||
|
#define SY (1.0 / float(TILE_HEIGHT_PX))
|
||||||
|
|
||||||
|
#define ACCURACY 0.25
|
||||||
|
#define Q_ACCURACY (ACCURACY * 0.1)
|
||||||
|
#define REM_ACCURACY (ACCURACY - Q_ACCURACY)
|
||||||
|
#define MAX_HYPOT2 (432.0 * Q_ACCURACY * Q_ACCURACY)
|
||||||
|
|
||||||
|
vec2 eval_quad(vec2 p0, vec2 p1, vec2 p2, float t) {
|
||||||
|
float mt = 1.0 - t;
|
||||||
|
return p0 * (mt * mt) + (p1 * (mt * 2.0) + p2 * t) * t;
|
||||||
|
}
|
||||||
|
|
||||||
|
vec2 eval_cubic(vec2 p0, vec2 p1, vec2 p2, vec2 p3, float t) {
|
||||||
|
float mt = 1.0 - t;
|
||||||
|
return p0 * (mt * mt * mt) + (p1 * (mt * mt * 3.0) + (p2 * (mt * 3.0) + p3 * t) * t) * t;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct SubdivResult {
|
||||||
|
float val;
|
||||||
|
float a0;
|
||||||
|
float a2;
|
||||||
|
};
|
||||||
|
|
||||||
|
/// An approximation to $\int (1 + 4x^2) ^ -0.25 dx$
|
||||||
|
///
|
||||||
|
/// This is used for flattening curves.
|
||||||
|
#define D 0.67
|
||||||
|
float approx_parabola_integral(float x) {
|
||||||
|
return x * inversesqrt(sqrt(1.0 - D + (D * D * D * D + 0.25 * x * x)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An approximation to the inverse parabola integral.
|
||||||
|
#define B 0.39
|
||||||
|
float approx_parabola_inv_integral(float x) {
|
||||||
|
return x * sqrt(1.0 - B + (B * B + 0.25 * x * x));
|
||||||
|
}
|
||||||
|
|
||||||
|
SubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol) {
|
||||||
|
vec2 d01 = p1 - p0;
|
||||||
|
vec2 d12 = p2 - p1;
|
||||||
|
vec2 dd = d01 - d12;
|
||||||
|
float cross = (p2.x - p0.x) * dd.y - (p2.y - p0.y) * dd.x;
|
||||||
|
float x0 = (d01.x * dd.x + d01.y * dd.y) / cross;
|
||||||
|
float x2 = (d12.x * dd.x + d12.y * dd.y) / cross;
|
||||||
|
float scale = abs(cross / (length(dd) * (x2 - x0)));
|
||||||
|
|
||||||
|
float a0 = approx_parabola_integral(x0);
|
||||||
|
float a2 = approx_parabola_integral(x2);
|
||||||
|
float val = 0.0;
|
||||||
|
if (scale < 1e9) {
|
||||||
|
float da = abs(a2 - a0);
|
||||||
|
float sqrt_scale = sqrt(scale);
|
||||||
|
if (sign(x0) == sign(x2)) {
|
||||||
|
val = da * sqrt_scale;
|
||||||
|
} else {
|
||||||
|
float xmin = sqrt_tol / sqrt_scale;
|
||||||
|
val = sqrt_tol * da / approx_parabola_integral(xmin);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return SubdivResult(val, a0, a2);
|
||||||
|
}
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uint element_ix = gl_GlobalInvocationID.x;
|
||||||
|
PathSegRef ref = PathSegRef(element_ix * PathSeg_size);
|
||||||
|
|
||||||
|
uint tag = PathSeg_Nop;
|
||||||
|
if (element_ix < n_pathseg) {
|
||||||
|
tag = PathSeg_tag(ref);
|
||||||
|
}
|
||||||
|
// Setup for coverage algorithm.
|
||||||
|
float a, b, c;
|
||||||
|
// Bounding box of element in pixel coordinates.
|
||||||
|
float xmin, xmax, ymin, ymax;
|
||||||
|
PathStrokeLine line;
|
||||||
|
float dx;
|
||||||
|
switch (tag) {
|
||||||
|
/*
|
||||||
|
case PathSeg_FillLine:
|
||||||
|
case PathSeg_StrokeLine:
|
||||||
|
line = PathSeg_StrokeLine_read(ref);
|
||||||
|
xmin = min(line.p0.x, line.p1.x) - line.stroke.x;
|
||||||
|
xmax = max(line.p0.x, line.p1.x) + line.stroke.x;
|
||||||
|
ymin = min(line.p0.y, line.p1.y) - line.stroke.y;
|
||||||
|
ymax = max(line.p0.y, line.p1.y) + line.stroke.y;
|
||||||
|
dx = line.p1.x - line.p0.x;
|
||||||
|
float dy = line.p1.y - line.p0.y;
|
||||||
|
// Set up for per-scanline coverage formula, below.
|
||||||
|
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
|
||||||
|
c = (line.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + line.stroke.y)) * SX;
|
||||||
|
b = invslope; // Note: assumes square tiles, otherwise scale.
|
||||||
|
a = (line.p0.x - (line.p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
|
||||||
|
break;
|
||||||
|
*/
|
||||||
|
case PathSeg_FillCubic:
|
||||||
|
case PathSeg_StrokeCubic:
|
||||||
|
PathStrokeCubic cubic = PathSeg_StrokeCubic_read(ref);
|
||||||
|
// Commented out code is for computing error bound on conversion to quadratics
|
||||||
|
vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
|
||||||
|
float err = err_v.x * err_v.x + err_v.y * err_v.y;
|
||||||
|
// The number of quadratics.
|
||||||
|
uint n_quads = max(uint(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0))), 1);
|
||||||
|
// Iterate over quadratics and tote up the estimated number of segments.
|
||||||
|
float val = 0.0;
|
||||||
|
vec2 qp0 = cubic.p0;
|
||||||
|
float step = 1.0 / float(n_quads);
|
||||||
|
for (uint i = 0; i < n_quads; i++) {
|
||||||
|
float t = float(i + 1) * step;
|
||||||
|
vec2 qp2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t);
|
||||||
|
vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step);
|
||||||
|
qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
|
||||||
|
SubdivResult params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY));
|
||||||
|
val += params.val;
|
||||||
|
|
||||||
|
qp0 = qp2;
|
||||||
|
}
|
||||||
|
uint n = max(uint(ceil(val * 0.5 / sqrt(REM_ACCURACY))), 1);
|
||||||
|
|
||||||
|
uint path_ix = cubic.path_ix;
|
||||||
|
Path path = Path_read(PathRef(path_ix * Path_size));
|
||||||
|
ivec4 bbox = ivec4(path.bbox);
|
||||||
|
vec2 p0 = cubic.p0;
|
||||||
|
qp0 = cubic.p0;
|
||||||
|
float v_step = val / float(n);
|
||||||
|
int n_out = 1;
|
||||||
|
float val_sum = 0.0;
|
||||||
|
for (uint i = 0; i < n_quads; i++) {
|
||||||
|
float t = float(i + 1) * step;
|
||||||
|
vec2 qp2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t);
|
||||||
|
vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step);
|
||||||
|
qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
|
||||||
|
SubdivResult params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY));
|
||||||
|
float u0 = approx_parabola_inv_integral(params.a0);
|
||||||
|
float u2 = approx_parabola_inv_integral(params.a2);
|
||||||
|
float uscale = 1.0 / (u2 - u0);
|
||||||
|
float target = float(n_out) * v_step;
|
||||||
|
while (n_out == n || target < val_sum + params.val) {
|
||||||
|
vec2 p1;
|
||||||
|
if (n_out == n) {
|
||||||
|
p1 = cubic.p3;
|
||||||
|
} else {
|
||||||
|
float u = (target - val_sum) / params.val;
|
||||||
|
float a = mix(params.a0, params.a2, u);
|
||||||
|
float au = approx_parabola_inv_integral(a);
|
||||||
|
float t = (au - u0) * uscale;
|
||||||
|
p1 = eval_quad(qp0, qp1, qp2, t);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Output line segment
|
||||||
|
xmin = min(p0.x, p1.x) - cubic.stroke.x;
|
||||||
|
xmax = max(p0.x, p1.x) + cubic.stroke.x;
|
||||||
|
ymin = min(p0.y, p1.y) - cubic.stroke.y;
|
||||||
|
ymax = max(p0.y, p1.y) + cubic.stroke.y;
|
||||||
|
float dx = p1.x - p0.x;
|
||||||
|
float dy = p1.y - p0.y;
|
||||||
|
// Set up for per-scanline coverage formula, below.
|
||||||
|
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
|
||||||
|
c = (cubic.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + cubic.stroke.y)) * SX;
|
||||||
|
b = invslope; // Note: assumes square tiles, otherwise scale.
|
||||||
|
a = (p0.x - (p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
|
||||||
|
|
||||||
|
int x0 = int(floor((xmin) * SX));
|
||||||
|
int x1 = int(ceil((xmax) * SX));
|
||||||
|
int y0 = int(floor((ymin) * SY));
|
||||||
|
int y1 = int(ceil((ymax) * SY));
|
||||||
|
|
||||||
|
x0 = clamp(x0, bbox.x, bbox.z);
|
||||||
|
y0 = clamp(y0, bbox.y, bbox.w);
|
||||||
|
x1 = clamp(x1, bbox.x, bbox.z);
|
||||||
|
y1 = clamp(y1, bbox.y, bbox.w);
|
||||||
|
float xc = a + b * float(y0);
|
||||||
|
int stride = bbox.z - bbox.x;
|
||||||
|
int base = (y0 - bbox.y) * stride - bbox.x;
|
||||||
|
// TODO: can be tighter, use c to bound width
|
||||||
|
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
|
||||||
|
// Consider using subgroups to aggregate atomic add.
|
||||||
|
uint tile_offset = atomicAdd(alloc, n_tile_alloc * TileSeg_size);
|
||||||
|
TileSeg tile_seg;
|
||||||
|
for (int y = y0; y < y1; y++) {
|
||||||
|
float tile_y0 = float(y * TILE_HEIGHT_PX);
|
||||||
|
if (tag == PathSeg_FillCubic && min(p0.y, p1.y) <= tile_y0) {
|
||||||
|
int xray = max(int(ceil(xc - 0.5 * b)), bbox.x);
|
||||||
|
if (xray < bbox.z) {
|
||||||
|
int backdrop = p1.y < p0.y ? 1 : -1;
|
||||||
|
TileRef tile_ref = Tile_index(path.tiles, uint(base + xray));
|
||||||
|
uint tile_el = tile_ref.offset >> 2;
|
||||||
|
atomicAdd(tile[tile_el + 1], backdrop);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int xx0 = clamp(int(floor(xc - c)), x0, x1);
|
||||||
|
int xx1 = clamp(int(ceil(xc + c)), x0, x1);
|
||||||
|
for (int x = xx0; x < xx1; x++) {
|
||||||
|
float tile_x0 = float(x * TILE_WIDTH_PX);
|
||||||
|
TileRef tile_ref = Tile_index(path.tiles, uint(base + x));
|
||||||
|
uint tile_el = tile_ref.offset >> 2;
|
||||||
|
uint old = atomicExchange(tile[tile_el], tile_offset);
|
||||||
|
tile_seg.start = p0;
|
||||||
|
tile_seg.end = p1;
|
||||||
|
float y_edge = 0.0;
|
||||||
|
if (tag == PathSeg_FillCubic) {
|
||||||
|
y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx);
|
||||||
|
if (min(p0.x, p1.x) < tile_x0 && y_edge >= tile_y0 && y_edge < tile_y0 + TILE_HEIGHT_PX) {
|
||||||
|
if (p0.x > p1.x) {
|
||||||
|
tile_seg.end = vec2(tile_x0, y_edge);
|
||||||
|
} else {
|
||||||
|
tile_seg.start = vec2(tile_x0, y_edge);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
y_edge = 1e9;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tile_seg.y_edge = y_edge;
|
||||||
|
tile_seg.next.offset = old;
|
||||||
|
TileSeg_write(TileSegRef(tile_offset), tile_seg);
|
||||||
|
tile_offset += TileSeg_size;
|
||||||
|
}
|
||||||
|
xc += b;
|
||||||
|
base += stride;
|
||||||
|
}
|
||||||
|
|
||||||
|
n_out += 1;
|
||||||
|
target += v_step;
|
||||||
|
p0 = p1;
|
||||||
|
}
|
||||||
|
val_sum += params.val;
|
||||||
|
|
||||||
|
qp0 = qp2;
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
BIN
piet-gpu/shader/path_coarse.spv
Normal file
BIN
piet-gpu/shader/path_coarse.spv
Normal file
Binary file not shown.
253
piet-gpu/shader/pathseg.h
Normal file
253
piet-gpu/shader/pathseg.h
Normal file
|
@ -0,0 +1,253 @@
|
||||||
|
// Code auto-generated by piet-gpu-derive
|
||||||
|
|
||||||
|
struct PathFillLineRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PathStrokeLineRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PathFillCubicRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PathStrokeCubicRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PathSegRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct PathFillLine {
|
||||||
|
vec2 p0;
|
||||||
|
vec2 p1;
|
||||||
|
uint path_ix;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define PathFillLine_size 20
|
||||||
|
|
||||||
|
PathFillLineRef PathFillLine_index(PathFillLineRef ref, uint index) {
|
||||||
|
return PathFillLineRef(ref.offset + index * PathFillLine_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct PathStrokeLine {
|
||||||
|
vec2 p0;
|
||||||
|
vec2 p1;
|
||||||
|
uint path_ix;
|
||||||
|
vec2 stroke;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define PathStrokeLine_size 28
|
||||||
|
|
||||||
|
PathStrokeLineRef PathStrokeLine_index(PathStrokeLineRef ref, uint index) {
|
||||||
|
return PathStrokeLineRef(ref.offset + index * PathStrokeLine_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct PathFillCubic {
|
||||||
|
vec2 p0;
|
||||||
|
vec2 p1;
|
||||||
|
vec2 p2;
|
||||||
|
vec2 p3;
|
||||||
|
uint path_ix;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define PathFillCubic_size 36
|
||||||
|
|
||||||
|
PathFillCubicRef PathFillCubic_index(PathFillCubicRef ref, uint index) {
|
||||||
|
return PathFillCubicRef(ref.offset + index * PathFillCubic_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct PathStrokeCubic {
|
||||||
|
vec2 p0;
|
||||||
|
vec2 p1;
|
||||||
|
vec2 p2;
|
||||||
|
vec2 p3;
|
||||||
|
uint path_ix;
|
||||||
|
vec2 stroke;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define PathStrokeCubic_size 44
|
||||||
|
|
||||||
|
PathStrokeCubicRef PathStrokeCubic_index(PathStrokeCubicRef ref, uint index) {
|
||||||
|
return PathStrokeCubicRef(ref.offset + index * PathStrokeCubic_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define PathSeg_Nop 0
|
||||||
|
#define PathSeg_FillLine 1
|
||||||
|
#define PathSeg_StrokeLine 2
|
||||||
|
#define PathSeg_FillCubic 3
|
||||||
|
#define PathSeg_StrokeCubic 4
|
||||||
|
#define PathSeg_size 48
|
||||||
|
|
||||||
|
PathSegRef PathSeg_index(PathSegRef ref, uint index) {
|
||||||
|
return PathSegRef(ref.offset + index * PathSeg_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
PathFillLine PathFillLine_read(PathFillLineRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = pathseg[ix + 0];
|
||||||
|
uint raw1 = pathseg[ix + 1];
|
||||||
|
uint raw2 = pathseg[ix + 2];
|
||||||
|
uint raw3 = pathseg[ix + 3];
|
||||||
|
uint raw4 = pathseg[ix + 4];
|
||||||
|
PathFillLine s;
|
||||||
|
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||||
|
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
|
s.path_ix = raw4;
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PathFillLine_write(PathFillLineRef ref, PathFillLine s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
pathseg[ix + 0] = floatBitsToUint(s.p0.x);
|
||||||
|
pathseg[ix + 1] = floatBitsToUint(s.p0.y);
|
||||||
|
pathseg[ix + 2] = floatBitsToUint(s.p1.x);
|
||||||
|
pathseg[ix + 3] = floatBitsToUint(s.p1.y);
|
||||||
|
pathseg[ix + 4] = s.path_ix;
|
||||||
|
}
|
||||||
|
|
||||||
|
PathStrokeLine PathStrokeLine_read(PathStrokeLineRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = pathseg[ix + 0];
|
||||||
|
uint raw1 = pathseg[ix + 1];
|
||||||
|
uint raw2 = pathseg[ix + 2];
|
||||||
|
uint raw3 = pathseg[ix + 3];
|
||||||
|
uint raw4 = pathseg[ix + 4];
|
||||||
|
uint raw5 = pathseg[ix + 5];
|
||||||
|
uint raw6 = pathseg[ix + 6];
|
||||||
|
PathStrokeLine s;
|
||||||
|
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||||
|
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
|
s.path_ix = raw4;
|
||||||
|
s.stroke = vec2(uintBitsToFloat(raw5), uintBitsToFloat(raw6));
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PathStrokeLine_write(PathStrokeLineRef ref, PathStrokeLine s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
pathseg[ix + 0] = floatBitsToUint(s.p0.x);
|
||||||
|
pathseg[ix + 1] = floatBitsToUint(s.p0.y);
|
||||||
|
pathseg[ix + 2] = floatBitsToUint(s.p1.x);
|
||||||
|
pathseg[ix + 3] = floatBitsToUint(s.p1.y);
|
||||||
|
pathseg[ix + 4] = s.path_ix;
|
||||||
|
pathseg[ix + 5] = floatBitsToUint(s.stroke.x);
|
||||||
|
pathseg[ix + 6] = floatBitsToUint(s.stroke.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
PathFillCubic PathFillCubic_read(PathFillCubicRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = pathseg[ix + 0];
|
||||||
|
uint raw1 = pathseg[ix + 1];
|
||||||
|
uint raw2 = pathseg[ix + 2];
|
||||||
|
uint raw3 = pathseg[ix + 3];
|
||||||
|
uint raw4 = pathseg[ix + 4];
|
||||||
|
uint raw5 = pathseg[ix + 5];
|
||||||
|
uint raw6 = pathseg[ix + 6];
|
||||||
|
uint raw7 = pathseg[ix + 7];
|
||||||
|
uint raw8 = pathseg[ix + 8];
|
||||||
|
PathFillCubic s;
|
||||||
|
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||||
|
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
|
s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||||
|
s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));
|
||||||
|
s.path_ix = raw8;
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PathFillCubic_write(PathFillCubicRef ref, PathFillCubic s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
pathseg[ix + 0] = floatBitsToUint(s.p0.x);
|
||||||
|
pathseg[ix + 1] = floatBitsToUint(s.p0.y);
|
||||||
|
pathseg[ix + 2] = floatBitsToUint(s.p1.x);
|
||||||
|
pathseg[ix + 3] = floatBitsToUint(s.p1.y);
|
||||||
|
pathseg[ix + 4] = floatBitsToUint(s.p2.x);
|
||||||
|
pathseg[ix + 5] = floatBitsToUint(s.p2.y);
|
||||||
|
pathseg[ix + 6] = floatBitsToUint(s.p3.x);
|
||||||
|
pathseg[ix + 7] = floatBitsToUint(s.p3.y);
|
||||||
|
pathseg[ix + 8] = s.path_ix;
|
||||||
|
}
|
||||||
|
|
||||||
|
PathStrokeCubic PathStrokeCubic_read(PathStrokeCubicRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = pathseg[ix + 0];
|
||||||
|
uint raw1 = pathseg[ix + 1];
|
||||||
|
uint raw2 = pathseg[ix + 2];
|
||||||
|
uint raw3 = pathseg[ix + 3];
|
||||||
|
uint raw4 = pathseg[ix + 4];
|
||||||
|
uint raw5 = pathseg[ix + 5];
|
||||||
|
uint raw6 = pathseg[ix + 6];
|
||||||
|
uint raw7 = pathseg[ix + 7];
|
||||||
|
uint raw8 = pathseg[ix + 8];
|
||||||
|
uint raw9 = pathseg[ix + 9];
|
||||||
|
uint raw10 = pathseg[ix + 10];
|
||||||
|
PathStrokeCubic s;
|
||||||
|
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||||
|
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
|
s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||||
|
s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));
|
||||||
|
s.path_ix = raw8;
|
||||||
|
s.stroke = vec2(uintBitsToFloat(raw9), uintBitsToFloat(raw10));
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PathStrokeCubic_write(PathStrokeCubicRef ref, PathStrokeCubic s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
pathseg[ix + 0] = floatBitsToUint(s.p0.x);
|
||||||
|
pathseg[ix + 1] = floatBitsToUint(s.p0.y);
|
||||||
|
pathseg[ix + 2] = floatBitsToUint(s.p1.x);
|
||||||
|
pathseg[ix + 3] = floatBitsToUint(s.p1.y);
|
||||||
|
pathseg[ix + 4] = floatBitsToUint(s.p2.x);
|
||||||
|
pathseg[ix + 5] = floatBitsToUint(s.p2.y);
|
||||||
|
pathseg[ix + 6] = floatBitsToUint(s.p3.x);
|
||||||
|
pathseg[ix + 7] = floatBitsToUint(s.p3.y);
|
||||||
|
pathseg[ix + 8] = s.path_ix;
|
||||||
|
pathseg[ix + 9] = floatBitsToUint(s.stroke.x);
|
||||||
|
pathseg[ix + 10] = floatBitsToUint(s.stroke.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint PathSeg_tag(PathSegRef ref) {
|
||||||
|
return pathseg[ref.offset >> 2];
|
||||||
|
}
|
||||||
|
|
||||||
|
PathFillLine PathSeg_FillLine_read(PathSegRef ref) {
|
||||||
|
return PathFillLine_read(PathFillLineRef(ref.offset + 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
PathStrokeLine PathSeg_StrokeLine_read(PathSegRef ref) {
|
||||||
|
return PathStrokeLine_read(PathStrokeLineRef(ref.offset + 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
PathFillCubic PathSeg_FillCubic_read(PathSegRef ref) {
|
||||||
|
return PathFillCubic_read(PathFillCubicRef(ref.offset + 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
PathStrokeCubic PathSeg_StrokeCubic_read(PathSegRef ref) {
|
||||||
|
return PathStrokeCubic_read(PathStrokeCubicRef(ref.offset + 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
void PathSeg_Nop_write(PathSegRef ref) {
|
||||||
|
pathseg[ref.offset >> 2] = PathSeg_Nop;
|
||||||
|
}
|
||||||
|
|
||||||
|
void PathSeg_FillLine_write(PathSegRef ref, PathFillLine s) {
|
||||||
|
pathseg[ref.offset >> 2] = PathSeg_FillLine;
|
||||||
|
PathFillLine_write(PathFillLineRef(ref.offset + 4), s);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PathSeg_StrokeLine_write(PathSegRef ref, PathStrokeLine s) {
|
||||||
|
pathseg[ref.offset >> 2] = PathSeg_StrokeLine;
|
||||||
|
PathStrokeLine_write(PathStrokeLineRef(ref.offset + 4), s);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PathSeg_FillCubic_write(PathSegRef ref, PathFillCubic s) {
|
||||||
|
pathseg[ref.offset >> 2] = PathSeg_FillCubic;
|
||||||
|
PathFillCubic_write(PathFillCubicRef(ref.offset + 4), s);
|
||||||
|
}
|
||||||
|
|
||||||
|
void PathSeg_StrokeCubic_write(PathSegRef ref, PathStrokeCubic s) {
|
||||||
|
pathseg[ref.offset >> 2] = PathSeg_StrokeCubic;
|
||||||
|
PathStrokeCubic_write(PathStrokeCubicRef(ref.offset + 4), s);
|
||||||
|
}
|
||||||
|
|
|
@ -68,7 +68,7 @@ CmdLineRef CmdLine_index(CmdLineRef ref, uint index) {
|
||||||
}
|
}
|
||||||
|
|
||||||
struct CmdStroke {
|
struct CmdStroke {
|
||||||
SegChunkRef seg_ref;
|
uint tile_ref;
|
||||||
float half_width;
|
float half_width;
|
||||||
uint rgba_color;
|
uint rgba_color;
|
||||||
};
|
};
|
||||||
|
@ -80,7 +80,7 @@ CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
|
||||||
}
|
}
|
||||||
|
|
||||||
struct CmdFill {
|
struct CmdFill {
|
||||||
SegChunkRef seg_ref;
|
uint tile_ref;
|
||||||
int backdrop;
|
int backdrop;
|
||||||
uint rgba_color;
|
uint rgba_color;
|
||||||
};
|
};
|
||||||
|
@ -220,7 +220,7 @@ CmdStroke CmdStroke_read(CmdStrokeRef ref) {
|
||||||
uint raw1 = ptcl[ix + 1];
|
uint raw1 = ptcl[ix + 1];
|
||||||
uint raw2 = ptcl[ix + 2];
|
uint raw2 = ptcl[ix + 2];
|
||||||
CmdStroke s;
|
CmdStroke s;
|
||||||
s.seg_ref = SegChunkRef(raw0);
|
s.tile_ref = raw0;
|
||||||
s.half_width = uintBitsToFloat(raw1);
|
s.half_width = uintBitsToFloat(raw1);
|
||||||
s.rgba_color = raw2;
|
s.rgba_color = raw2;
|
||||||
return s;
|
return s;
|
||||||
|
@ -228,7 +228,7 @@ CmdStroke CmdStroke_read(CmdStrokeRef ref) {
|
||||||
|
|
||||||
void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) {
|
void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
ptcl[ix + 0] = s.seg_ref.offset;
|
ptcl[ix + 0] = s.tile_ref;
|
||||||
ptcl[ix + 1] = floatBitsToUint(s.half_width);
|
ptcl[ix + 1] = floatBitsToUint(s.half_width);
|
||||||
ptcl[ix + 2] = s.rgba_color;
|
ptcl[ix + 2] = s.rgba_color;
|
||||||
}
|
}
|
||||||
|
@ -239,7 +239,7 @@ CmdFill CmdFill_read(CmdFillRef ref) {
|
||||||
uint raw1 = ptcl[ix + 1];
|
uint raw1 = ptcl[ix + 1];
|
||||||
uint raw2 = ptcl[ix + 2];
|
uint raw2 = ptcl[ix + 2];
|
||||||
CmdFill s;
|
CmdFill s;
|
||||||
s.seg_ref = SegChunkRef(raw0);
|
s.tile_ref = raw0;
|
||||||
s.backdrop = int(raw1);
|
s.backdrop = int(raw1);
|
||||||
s.rgba_color = raw2;
|
s.rgba_color = raw2;
|
||||||
return s;
|
return s;
|
||||||
|
@ -247,7 +247,7 @@ CmdFill CmdFill_read(CmdFillRef ref) {
|
||||||
|
|
||||||
void CmdFill_write(CmdFillRef ref, CmdFill s) {
|
void CmdFill_write(CmdFillRef ref, CmdFill s) {
|
||||||
uint ix = ref.offset >> 2;
|
uint ix = ref.offset >> 2;
|
||||||
ptcl[ix + 0] = s.seg_ref.offset;
|
ptcl[ix + 0] = s.tile_ref;
|
||||||
ptcl[ix + 1] = uint(s.backdrop);
|
ptcl[ix + 1] = uint(s.backdrop);
|
||||||
ptcl[ix + 2] = s.rgba_color;
|
ptcl[ix + 2] = s.rgba_color;
|
||||||
}
|
}
|
||||||
|
|
|
@ -240,12 +240,14 @@ TransformRef Transform_index(TransformRef ref, uint index) {
|
||||||
#define Element_Nop 0
|
#define Element_Nop 0
|
||||||
#define Element_StrokeLine 1
|
#define Element_StrokeLine 1
|
||||||
#define Element_FillLine 2
|
#define Element_FillLine 2
|
||||||
#define Element_Quad 3
|
#define Element_StrokeQuad 3
|
||||||
#define Element_Cubic 4
|
#define Element_FillQuad 4
|
||||||
#define Element_Stroke 5
|
#define Element_StrokeCubic 5
|
||||||
#define Element_Fill 6
|
#define Element_FillCubic 6
|
||||||
#define Element_SetLineWidth 7
|
#define Element_Stroke 7
|
||||||
#define Element_Transform 8
|
#define Element_Fill 8
|
||||||
|
#define Element_SetLineWidth 9
|
||||||
|
#define Element_Transform 10
|
||||||
#define Element_size 36
|
#define Element_size 36
|
||||||
|
|
||||||
ElementRef Element_index(ElementRef ref, uint index) {
|
ElementRef Element_index(ElementRef ref, uint index) {
|
||||||
|
@ -455,11 +457,19 @@ LineSeg Element_FillLine_read(ElementRef ref) {
|
||||||
return LineSeg_read(LineSegRef(ref.offset + 4));
|
return LineSeg_read(LineSegRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
QuadSeg Element_Quad_read(ElementRef ref) {
|
QuadSeg Element_StrokeQuad_read(ElementRef ref) {
|
||||||
return QuadSeg_read(QuadSegRef(ref.offset + 4));
|
return QuadSeg_read(QuadSegRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
CubicSeg Element_Cubic_read(ElementRef ref) {
|
QuadSeg Element_FillQuad_read(ElementRef ref) {
|
||||||
|
return QuadSeg_read(QuadSegRef(ref.offset + 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
CubicSeg Element_StrokeCubic_read(ElementRef ref) {
|
||||||
|
return CubicSeg_read(CubicSegRef(ref.offset + 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
CubicSeg Element_FillCubic_read(ElementRef ref) {
|
||||||
return CubicSeg_read(CubicSegRef(ref.offset + 4));
|
return CubicSeg_read(CubicSegRef(ref.offset + 4));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -31,6 +31,7 @@
|
||||||
// TODO: compute all these
|
// TODO: compute all these
|
||||||
|
|
||||||
#define WIDTH_IN_TILES 128
|
#define WIDTH_IN_TILES 128
|
||||||
|
#define HEIGHT_IN_TILES 96
|
||||||
#define TILEGROUP_WIDTH_TILES 32
|
#define TILEGROUP_WIDTH_TILES 32
|
||||||
#define TILE_WIDTH_PX 16
|
#define TILE_WIDTH_PX 16
|
||||||
#define TILE_HEIGHT_PX 16
|
#define TILE_HEIGHT_PX 16
|
||||||
|
|
|
@ -10,9 +10,11 @@ struct State {
|
||||||
vec4 bbox;
|
vec4 bbox;
|
||||||
float linewidth;
|
float linewidth;
|
||||||
uint flags;
|
uint flags;
|
||||||
|
uint path_count;
|
||||||
|
uint pathseg_count;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define State_size 48
|
#define State_size 56
|
||||||
|
|
||||||
StateRef State_index(StateRef ref, uint index) {
|
StateRef State_index(StateRef ref, uint index) {
|
||||||
return StateRef(ref.offset + index * State_size);
|
return StateRef(ref.offset + index * State_size);
|
||||||
|
@ -32,12 +34,16 @@ State State_read(StateRef ref) {
|
||||||
uint raw9 = state[ix + 9];
|
uint raw9 = state[ix + 9];
|
||||||
uint raw10 = state[ix + 10];
|
uint raw10 = state[ix + 10];
|
||||||
uint raw11 = state[ix + 11];
|
uint raw11 = state[ix + 11];
|
||||||
|
uint raw12 = state[ix + 12];
|
||||||
|
uint raw13 = state[ix + 13];
|
||||||
State s;
|
State s;
|
||||||
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||||
s.bbox = vec4(uintBitsToFloat(raw6), uintBitsToFloat(raw7), uintBitsToFloat(raw8), uintBitsToFloat(raw9));
|
s.bbox = vec4(uintBitsToFloat(raw6), uintBitsToFloat(raw7), uintBitsToFloat(raw8), uintBitsToFloat(raw9));
|
||||||
s.linewidth = uintBitsToFloat(raw10);
|
s.linewidth = uintBitsToFloat(raw10);
|
||||||
s.flags = raw11;
|
s.flags = raw11;
|
||||||
|
s.path_count = raw12;
|
||||||
|
s.pathseg_count = raw13;
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -55,5 +61,7 @@ void State_write(StateRef ref, State s) {
|
||||||
state[ix + 9] = floatBitsToUint(s.bbox.w);
|
state[ix + 9] = floatBitsToUint(s.bbox.w);
|
||||||
state[ix + 10] = floatBitsToUint(s.linewidth);
|
state[ix + 10] = floatBitsToUint(s.linewidth);
|
||||||
state[ix + 11] = s.flags;
|
state[ix + 11] = s.flags;
|
||||||
|
state[ix + 12] = s.path_count;
|
||||||
|
state[ix + 13] = s.pathseg_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
109
piet-gpu/shader/tile.h
Normal file
109
piet-gpu/shader/tile.h
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
// Code auto-generated by piet-gpu-derive
|
||||||
|
|
||||||
|
struct PathRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TileRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct TileSegRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct Path {
|
||||||
|
uvec4 bbox;
|
||||||
|
TileRef tiles;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define Path_size 12
|
||||||
|
|
||||||
|
PathRef Path_index(PathRef ref, uint index) {
|
||||||
|
return PathRef(ref.offset + index * Path_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Tile {
|
||||||
|
TileSegRef tile;
|
||||||
|
int backdrop;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define Tile_size 8
|
||||||
|
|
||||||
|
TileRef Tile_index(TileRef ref, uint index) {
|
||||||
|
return TileRef(ref.offset + index * Tile_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct TileSeg {
|
||||||
|
vec2 start;
|
||||||
|
vec2 end;
|
||||||
|
float y_edge;
|
||||||
|
TileSegRef next;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define TileSeg_size 24
|
||||||
|
|
||||||
|
TileSegRef TileSeg_index(TileSegRef ref, uint index) {
|
||||||
|
return TileSegRef(ref.offset + index * TileSeg_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
Path Path_read(PathRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = tile[ix + 0];
|
||||||
|
uint raw1 = tile[ix + 1];
|
||||||
|
uint raw2 = tile[ix + 2];
|
||||||
|
Path s;
|
||||||
|
s.bbox = uvec4(raw0 & 0xffff, raw0 >> 16, raw1 & 0xffff, raw1 >> 16);
|
||||||
|
s.tiles = TileRef(raw2);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Path_write(PathRef ref, Path s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
tile[ix + 0] = s.bbox.x | (s.bbox.y << 16);
|
||||||
|
tile[ix + 1] = s.bbox.z | (s.bbox.w << 16);
|
||||||
|
tile[ix + 2] = s.tiles.offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
Tile Tile_read(TileRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = tile[ix + 0];
|
||||||
|
uint raw1 = tile[ix + 1];
|
||||||
|
Tile s;
|
||||||
|
s.tile = TileSegRef(raw0);
|
||||||
|
s.backdrop = int(raw1);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Tile_write(TileRef ref, Tile s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
tile[ix + 0] = s.tile.offset;
|
||||||
|
tile[ix + 1] = uint(s.backdrop);
|
||||||
|
}
|
||||||
|
|
||||||
|
TileSeg TileSeg_read(TileSegRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = tile[ix + 0];
|
||||||
|
uint raw1 = tile[ix + 1];
|
||||||
|
uint raw2 = tile[ix + 2];
|
||||||
|
uint raw3 = tile[ix + 3];
|
||||||
|
uint raw4 = tile[ix + 4];
|
||||||
|
uint raw5 = tile[ix + 5];
|
||||||
|
TileSeg s;
|
||||||
|
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||||
|
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
|
s.y_edge = uintBitsToFloat(raw4);
|
||||||
|
s.next = TileSegRef(raw5);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void TileSeg_write(TileSegRef ref, TileSeg s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
tile[ix + 0] = floatBitsToUint(s.start.x);
|
||||||
|
tile[ix + 1] = floatBitsToUint(s.start.y);
|
||||||
|
tile[ix + 2] = floatBitsToUint(s.end.x);
|
||||||
|
tile[ix + 3] = floatBitsToUint(s.end.y);
|
||||||
|
tile[ix + 4] = floatBitsToUint(s.y_edge);
|
||||||
|
tile[ix + 5] = s.next.offset;
|
||||||
|
}
|
||||||
|
|
100
piet-gpu/shader/tile_alloc.comp
Normal file
100
piet-gpu/shader/tile_alloc.comp
Normal file
|
@ -0,0 +1,100 @@
|
||||||
|
// Allocation and initialization of tiles for paths.
|
||||||
|
|
||||||
|
#version 450
|
||||||
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#include "setup.h"
|
||||||
|
|
||||||
|
#define LG_TILE_ALLOC_WG 8
|
||||||
|
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
|
||||||
|
|
||||||
|
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
|
||||||
|
|
||||||
|
layout(set = 0, binding = 0) buffer AnnotatedBuf {
|
||||||
|
uint[] annotated;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(set = 0, binding = 1) buffer AllocBuf {
|
||||||
|
uint n_elements;
|
||||||
|
uint n_pathseg;
|
||||||
|
uint alloc;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(set = 0, binding = 2) buffer TileBuf {
|
||||||
|
uint[] tile;
|
||||||
|
};
|
||||||
|
|
||||||
|
#include "annotated.h"
|
||||||
|
#include "tile.h"
|
||||||
|
|
||||||
|
// scale factors useful for converting coordinates to tiles
|
||||||
|
#define SX (1.0 / float(TILE_WIDTH_PX))
|
||||||
|
#define SY (1.0 / float(TILE_HEIGHT_PX))
|
||||||
|
|
||||||
|
shared uint sh_tile_count[TILE_ALLOC_WG];
|
||||||
|
shared uint sh_tile_alloc;
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uint th_ix = gl_LocalInvocationID.x;
|
||||||
|
uint element_ix = gl_GlobalInvocationID.x;
|
||||||
|
PathRef path_ref = PathRef(element_ix * Path_size);
|
||||||
|
AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size);
|
||||||
|
|
||||||
|
uint tag = Annotated_Nop;
|
||||||
|
if (element_ix < n_elements) {
|
||||||
|
tag = Annotated_tag(ref);
|
||||||
|
}
|
||||||
|
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
|
||||||
|
switch (tag) {
|
||||||
|
case Annotated_Fill:
|
||||||
|
case Annotated_Stroke:
|
||||||
|
// Note: we take advantage of the fact that fills and strokes
|
||||||
|
// have compatible layout.
|
||||||
|
AnnoFill fill = Annotated_Fill_read(ref);
|
||||||
|
x0 = int(floor(fill.bbox.x * SX));
|
||||||
|
y0 = int(floor(fill.bbox.y * SY));
|
||||||
|
x1 = int(ceil(fill.bbox.z * SX));
|
||||||
|
y1 = int(ceil(fill.bbox.w * SY));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
x0 = clamp(x0, 0, WIDTH_IN_TILES);
|
||||||
|
y0 = clamp(y0, 0, HEIGHT_IN_TILES);
|
||||||
|
x1 = clamp(x1, 0, WIDTH_IN_TILES);
|
||||||
|
y1 = clamp(y1, 0, HEIGHT_IN_TILES);
|
||||||
|
|
||||||
|
Path path;
|
||||||
|
path.bbox = uvec4(x0, y0, x1, y1);
|
||||||
|
uint tile_count = (x1 - x0) * (y1 - y0);
|
||||||
|
uint n_tiles = tile_count;
|
||||||
|
|
||||||
|
sh_tile_count[th_ix] = tile_count;
|
||||||
|
// Prefix sum of sh_tile_count
|
||||||
|
for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) {
|
||||||
|
barrier();
|
||||||
|
if (th_ix >= (1 << i)) {
|
||||||
|
tile_count += sh_tile_count[th_ix - (1 << i)];
|
||||||
|
}
|
||||||
|
barrier();
|
||||||
|
sh_tile_count[th_ix] = tile_count;
|
||||||
|
}
|
||||||
|
if (th_ix == TILE_ALLOC_WG - 1) {
|
||||||
|
sh_tile_alloc = atomicAdd(alloc, tile_count * Tile_size);
|
||||||
|
}
|
||||||
|
barrier();
|
||||||
|
uint alloc_start = sh_tile_alloc;
|
||||||
|
|
||||||
|
if (element_ix < n_elements) {
|
||||||
|
uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
|
||||||
|
path.tiles = TileRef(alloc_start + Tile_size * tile_subix);
|
||||||
|
Path_write(path_ref, path);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Zero out allocated tiles efficiently
|
||||||
|
uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
|
||||||
|
uint start_ix = alloc_start >> 2;
|
||||||
|
for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
|
||||||
|
// Note: this interleaving is faster than using Tile_write
|
||||||
|
// by a significant amount.
|
||||||
|
tile[start_ix + i] = 0;
|
||||||
|
}
|
||||||
|
}
|
BIN
piet-gpu/shader/tile_alloc.spv
Normal file
BIN
piet-gpu/shader/tile_alloc.spv
Normal file
Binary file not shown.
|
@ -121,12 +121,26 @@ pub struct Renderer<D: Device> {
|
||||||
|
|
||||||
pub state_buf: D::Buffer,
|
pub state_buf: D::Buffer,
|
||||||
pub anno_buf: D::Buffer,
|
pub anno_buf: D::Buffer,
|
||||||
|
pub pathseg_buf: D::Buffer,
|
||||||
|
pub tile_buf: D::Buffer,
|
||||||
pub bin_buf: D::Buffer,
|
pub bin_buf: D::Buffer,
|
||||||
pub ptcl_buf: D::Buffer,
|
pub ptcl_buf: D::Buffer,
|
||||||
|
|
||||||
el_pipeline: D::Pipeline,
|
el_pipeline: D::Pipeline,
|
||||||
el_ds: D::DescriptorSet,
|
el_ds: D::DescriptorSet,
|
||||||
|
|
||||||
|
tile_pipeline: D::Pipeline,
|
||||||
|
tile_ds: D::DescriptorSet,
|
||||||
|
|
||||||
|
path_pipeline: D::Pipeline,
|
||||||
|
path_ds: D::DescriptorSet,
|
||||||
|
|
||||||
|
backdrop_pipeline: D::Pipeline,
|
||||||
|
backdrop_ds: D::DescriptorSet,
|
||||||
|
|
||||||
|
tile_alloc_buf_host: D::Buffer,
|
||||||
|
tile_alloc_buf_dev: D::Buffer,
|
||||||
|
|
||||||
bin_pipeline: D::Pipeline,
|
bin_pipeline: D::Pipeline,
|
||||||
bin_ds: D::DescriptorSet,
|
bin_ds: D::DescriptorSet,
|
||||||
|
|
||||||
|
@ -143,10 +157,12 @@ pub struct Renderer<D: Device> {
|
||||||
k4_ds: D::DescriptorSet,
|
k4_ds: D::DescriptorSet,
|
||||||
|
|
||||||
n_elements: usize,
|
n_elements: usize,
|
||||||
|
n_paths: usize,
|
||||||
|
n_pathseg: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<D: Device> Renderer<D> {
|
impl<D: Device> Renderer<D> {
|
||||||
pub unsafe fn new(device: &D, scene: &[u8]) -> Result<Self, Error> {
|
pub unsafe fn new(device: &D, scene: &[u8], n_paths: usize, n_pathseg: usize) -> Result<Self, Error> {
|
||||||
let host = MemFlags::host_coherent();
|
let host = MemFlags::host_coherent();
|
||||||
let dev = MemFlags::device_local();
|
let dev = MemFlags::device_local();
|
||||||
|
|
||||||
|
@ -163,15 +179,51 @@ impl<D: Device> Renderer<D> {
|
||||||
|
|
||||||
let state_buf = device.create_buffer(1 * 1024 * 1024, dev)?;
|
let state_buf = device.create_buffer(1 * 1024 * 1024, dev)?;
|
||||||
let anno_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
let anno_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
||||||
|
let pathseg_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
||||||
|
let tile_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
||||||
let bin_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
let bin_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
||||||
let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev)?;
|
let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev)?;
|
||||||
let image_dev = device.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?;
|
let image_dev = device.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?;
|
||||||
|
|
||||||
let el_code = include_bytes!("../shader/elements.spv");
|
let el_code = include_bytes!("../shader/elements.spv");
|
||||||
let el_pipeline = device.create_simple_compute_pipeline(el_code, 3, 0)?;
|
let el_pipeline = device.create_simple_compute_pipeline(el_code, 4, 0)?;
|
||||||
let el_ds = device.create_descriptor_set(
|
let el_ds = device.create_descriptor_set(
|
||||||
&el_pipeline,
|
&el_pipeline,
|
||||||
&[&scene_dev, &state_buf, &anno_buf],
|
&[&scene_dev, &state_buf, &anno_buf, &pathseg_buf],
|
||||||
|
&[],
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let tile_alloc_buf_host = device.create_buffer(12, host)?;
|
||||||
|
let tile_alloc_buf_dev = device.create_buffer(12, dev)?;
|
||||||
|
|
||||||
|
// TODO: constants
|
||||||
|
const PATH_SIZE: usize = 12;
|
||||||
|
let tile_alloc_start = ((n_paths + 31) & !31) * PATH_SIZE;
|
||||||
|
device.write_buffer(
|
||||||
|
&tile_alloc_buf_host,
|
||||||
|
&[n_paths as u32, n_pathseg as u32, tile_alloc_start as u32],
|
||||||
|
)?;
|
||||||
|
let tile_alloc_code = include_bytes!("../shader/tile_alloc.spv");
|
||||||
|
let tile_pipeline = device.create_simple_compute_pipeline(tile_alloc_code, 3, 0)?;
|
||||||
|
let tile_ds = device.create_descriptor_set(
|
||||||
|
&tile_pipeline,
|
||||||
|
&[&anno_buf, &tile_alloc_buf_dev, &tile_buf],
|
||||||
|
&[],
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let path_alloc_code = include_bytes!("../shader/path_coarse.spv");
|
||||||
|
let path_pipeline = device.create_simple_compute_pipeline(path_alloc_code, 3, 0)?;
|
||||||
|
let path_ds = device.create_descriptor_set(
|
||||||
|
&path_pipeline,
|
||||||
|
&[&pathseg_buf, &tile_alloc_buf_dev, &tile_buf],
|
||||||
|
&[],
|
||||||
|
)?;
|
||||||
|
|
||||||
|
let backdrop_alloc_code = include_bytes!("../shader/backdrop.spv");
|
||||||
|
let backdrop_pipeline = device.create_simple_compute_pipeline(backdrop_alloc_code, 3, 0)?;
|
||||||
|
let backdrop_ds = device.create_descriptor_set(
|
||||||
|
&backdrop_pipeline,
|
||||||
|
&[&anno_buf, &tile_alloc_buf_dev, &tile_buf],
|
||||||
&[],
|
&[],
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
|
@ -179,10 +231,10 @@ impl<D: Device> Renderer<D> {
|
||||||
let bin_alloc_buf_dev = device.create_buffer(12, dev)?;
|
let bin_alloc_buf_dev = device.create_buffer(12, dev)?;
|
||||||
|
|
||||||
// TODO: constants
|
// TODO: constants
|
||||||
let bin_alloc_start = ((n_elements + 255) & !255) * 8;
|
let bin_alloc_start = ((n_paths + 255) & !255) * 8;
|
||||||
device.write_buffer(
|
device.write_buffer(
|
||||||
&bin_alloc_buf_host,
|
&bin_alloc_buf_host,
|
||||||
&[n_elements as u32, 0, bin_alloc_start as u32],
|
&[n_paths as u32, 0, bin_alloc_start as u32],
|
||||||
)?;
|
)?;
|
||||||
let bin_code = include_bytes!("../shader/binning.spv");
|
let bin_code = include_bytes!("../shader/binning.spv");
|
||||||
let bin_pipeline = device.create_simple_compute_pipeline(bin_code, 4, 0)?;
|
let bin_pipeline = device.create_simple_compute_pipeline(bin_code, 4, 0)?;
|
||||||
|
@ -198,19 +250,23 @@ impl<D: Device> Renderer<D> {
|
||||||
let coarse_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
|
let coarse_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
|
||||||
device.write_buffer(
|
device.write_buffer(
|
||||||
&coarse_alloc_buf_host,
|
&coarse_alloc_buf_host,
|
||||||
&[n_elements as u32, coarse_alloc_start as u32],
|
&[n_paths as u32, coarse_alloc_start as u32],
|
||||||
)?;
|
)?;
|
||||||
let coarse_code = include_bytes!("../shader/coarse.spv");
|
let coarse_code = include_bytes!("../shader/coarse.spv");
|
||||||
let coarse_pipeline = device.create_simple_compute_pipeline(coarse_code, 4, 0)?;
|
let coarse_pipeline = device.create_simple_compute_pipeline(coarse_code, 5, 0)?;
|
||||||
let coarse_ds = device.create_descriptor_set(
|
let coarse_ds = device.create_descriptor_set(
|
||||||
&coarse_pipeline,
|
&coarse_pipeline,
|
||||||
&[&anno_buf, &bin_buf, &coarse_alloc_buf_dev, &ptcl_buf],
|
&[&anno_buf, &bin_buf, &tile_buf, &coarse_alloc_buf_dev, &ptcl_buf],
|
||||||
&[],
|
&[],
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let k4_code = include_bytes!("../shader/kernel4.spv");
|
let k4_code = include_bytes!("../shader/kernel4.spv");
|
||||||
let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 1, 1)?;
|
let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 2, 1)?;
|
||||||
let k4_ds = device.create_descriptor_set(&k4_pipeline, &[&ptcl_buf], &[&image_dev])?;
|
let k4_ds = device.create_descriptor_set(
|
||||||
|
&k4_pipeline,
|
||||||
|
&[&ptcl_buf, &tile_buf],
|
||||||
|
&[&image_dev]
|
||||||
|
)?;
|
||||||
|
|
||||||
Ok(Renderer {
|
Ok(Renderer {
|
||||||
scene_buf,
|
scene_buf,
|
||||||
|
@ -218,6 +274,12 @@ impl<D: Device> Renderer<D> {
|
||||||
image_dev,
|
image_dev,
|
||||||
el_pipeline,
|
el_pipeline,
|
||||||
el_ds,
|
el_ds,
|
||||||
|
tile_pipeline,
|
||||||
|
tile_ds,
|
||||||
|
path_pipeline,
|
||||||
|
path_ds,
|
||||||
|
backdrop_pipeline,
|
||||||
|
backdrop_ds,
|
||||||
bin_pipeline,
|
bin_pipeline,
|
||||||
bin_ds,
|
bin_ds,
|
||||||
coarse_pipeline,
|
coarse_pipeline,
|
||||||
|
@ -226,18 +288,25 @@ impl<D: Device> Renderer<D> {
|
||||||
k4_ds,
|
k4_ds,
|
||||||
state_buf,
|
state_buf,
|
||||||
anno_buf,
|
anno_buf,
|
||||||
|
pathseg_buf,
|
||||||
|
tile_buf,
|
||||||
bin_buf,
|
bin_buf,
|
||||||
ptcl_buf,
|
ptcl_buf,
|
||||||
|
tile_alloc_buf_host,
|
||||||
|
tile_alloc_buf_dev,
|
||||||
bin_alloc_buf_host,
|
bin_alloc_buf_host,
|
||||||
bin_alloc_buf_dev,
|
bin_alloc_buf_dev,
|
||||||
coarse_alloc_buf_host,
|
coarse_alloc_buf_host,
|
||||||
coarse_alloc_buf_dev,
|
coarse_alloc_buf_dev,
|
||||||
n_elements,
|
n_elements,
|
||||||
|
n_paths,
|
||||||
|
n_pathseg,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn record(&self, cmd_buf: &mut impl CmdBuf<D>, query_pool: &D::QueryPool) {
|
pub unsafe fn record(&self, cmd_buf: &mut impl CmdBuf<D>, query_pool: &D::QueryPool) {
|
||||||
cmd_buf.copy_buffer(&self.scene_buf, &self.scene_dev);
|
cmd_buf.copy_buffer(&self.scene_buf, &self.scene_dev);
|
||||||
|
cmd_buf.copy_buffer(&self.tile_alloc_buf_host, &self.tile_alloc_buf_dev);
|
||||||
cmd_buf.copy_buffer(&self.bin_alloc_buf_host, &self.bin_alloc_buf_dev);
|
cmd_buf.copy_buffer(&self.bin_alloc_buf_host, &self.bin_alloc_buf_dev);
|
||||||
cmd_buf.copy_buffer(&self.coarse_alloc_buf_host, &self.coarse_alloc_buf_dev);
|
cmd_buf.copy_buffer(&self.coarse_alloc_buf_host, &self.coarse_alloc_buf_dev);
|
||||||
cmd_buf.clear_buffer(&self.state_buf);
|
cmd_buf.clear_buffer(&self.state_buf);
|
||||||
|
@ -257,25 +326,49 @@ impl<D: Device> Renderer<D> {
|
||||||
cmd_buf.write_timestamp(&query_pool, 1);
|
cmd_buf.write_timestamp(&query_pool, 1);
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
cmd_buf.dispatch(
|
cmd_buf.dispatch(
|
||||||
&self.bin_pipeline,
|
&self.tile_pipeline,
|
||||||
&self.bin_ds,
|
&self.tile_ds,
|
||||||
(((self.n_elements + 255) / 256) as u32, 1, 1),
|
(((self.n_paths + 255) / 256) as u32, 1, 1),
|
||||||
);
|
);
|
||||||
cmd_buf.write_timestamp(&query_pool, 2);
|
cmd_buf.write_timestamp(&query_pool, 2);
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
|
cmd_buf.dispatch(
|
||||||
|
&self.path_pipeline,
|
||||||
|
&self.path_ds,
|
||||||
|
(((self.n_pathseg + 31) / 32) as u32, 1, 1),
|
||||||
|
);
|
||||||
|
cmd_buf.write_timestamp(&query_pool, 3);
|
||||||
|
cmd_buf.memory_barrier();
|
||||||
|
cmd_buf.dispatch(
|
||||||
|
&self.backdrop_pipeline,
|
||||||
|
&self.backdrop_ds,
|
||||||
|
(((self.n_paths + 255) / 256) as u32, 1, 1),
|
||||||
|
);
|
||||||
|
cmd_buf.write_timestamp(&query_pool, 4);
|
||||||
|
// Note: this barrier is not needed as an actual dependency between
|
||||||
|
// pipeline stages, but I am keeping it in so that timer queries are
|
||||||
|
// easier to interpret.
|
||||||
|
cmd_buf.memory_barrier();
|
||||||
|
cmd_buf.dispatch(
|
||||||
|
&self.bin_pipeline,
|
||||||
|
&self.bin_ds,
|
||||||
|
(((self.n_paths + 255) / 256) as u32, 1, 1),
|
||||||
|
);
|
||||||
|
cmd_buf.write_timestamp(&query_pool, 5);
|
||||||
|
cmd_buf.memory_barrier();
|
||||||
cmd_buf.dispatch(
|
cmd_buf.dispatch(
|
||||||
&self.coarse_pipeline,
|
&self.coarse_pipeline,
|
||||||
&self.coarse_ds,
|
&self.coarse_ds,
|
||||||
(WIDTH as u32 / 256, HEIGHT as u32 / 256, 1),
|
(WIDTH as u32 / 256, HEIGHT as u32 / 256, 1),
|
||||||
);
|
);
|
||||||
cmd_buf.write_timestamp(&query_pool, 3);
|
cmd_buf.write_timestamp(&query_pool, 6);
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
cmd_buf.dispatch(
|
cmd_buf.dispatch(
|
||||||
&self.k4_pipeline,
|
&self.k4_pipeline,
|
||||||
&self.k4_ds,
|
&self.k4_ds,
|
||||||
((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
|
((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
|
||||||
);
|
);
|
||||||
cmd_buf.write_timestamp(&query_pool, 4);
|
cmd_buf.write_timestamp(&query_pool, 7);
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc);
|
cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc);
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,6 +31,10 @@ pub struct PietGpuRenderContext {
|
||||||
// Will probably need direct accesss to hal Device to create images etc.
|
// Will probably need direct accesss to hal Device to create images etc.
|
||||||
inner_text: PietGpuText,
|
inner_text: PietGpuText,
|
||||||
stroke_width: f32,
|
stroke_width: f32,
|
||||||
|
// We're tallying these cpu-side for expedience, but will probably
|
||||||
|
// move this to some kind of readback from element processing.
|
||||||
|
path_count: usize,
|
||||||
|
pathseg_count: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
|
@ -52,6 +56,8 @@ impl PietGpuRenderContext {
|
||||||
elements,
|
elements,
|
||||||
inner_text,
|
inner_text,
|
||||||
stroke_width,
|
stroke_width,
|
||||||
|
path_count: 0,
|
||||||
|
pathseg_count: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -59,6 +65,14 @@ impl PietGpuRenderContext {
|
||||||
self.elements.encode(&mut self.encoder);
|
self.elements.encode(&mut self.encoder);
|
||||||
self.encoder.buf()
|
self.encoder.buf()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn path_count(&self) -> usize {
|
||||||
|
self.path_count
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn pathseg_count(&self) -> usize {
|
||||||
|
self.pathseg_count
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RenderContext for PietGpuRenderContext {
|
impl RenderContext for PietGpuRenderContext {
|
||||||
|
@ -95,6 +109,7 @@ impl RenderContext for PietGpuRenderContext {
|
||||||
PietGpuBrush::Solid(rgba_color) => {
|
PietGpuBrush::Solid(rgba_color) => {
|
||||||
let stroke = Stroke { rgba_color };
|
let stroke = Stroke { rgba_color };
|
||||||
self.elements.push(Element::Stroke(stroke));
|
self.elements.push(Element::Stroke(stroke));
|
||||||
|
self.path_count += 1;
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
|
@ -117,6 +132,7 @@ impl RenderContext for PietGpuRenderContext {
|
||||||
PietGpuBrush::Solid(rgba_color) => {
|
PietGpuBrush::Solid(rgba_color) => {
|
||||||
let fill = Fill { rgba_color };
|
let fill = Fill { rgba_color };
|
||||||
self.elements.push(Element::Fill(fill));
|
self.elements.push(Element::Fill(fill));
|
||||||
|
self.path_count += 1;
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
|
@ -200,10 +216,29 @@ impl PietGpuRenderContext {
|
||||||
} else {
|
} else {
|
||||||
self.elements.push(Element::StrokeLine(seg));
|
self.elements.push(Element::StrokeLine(seg));
|
||||||
}
|
}
|
||||||
|
self.pathseg_count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode_quad_seg(&mut self, seg: QuadSeg, is_fill: bool) {
|
||||||
|
if is_fill {
|
||||||
|
self.elements.push(Element::FillQuad(seg));
|
||||||
|
} else {
|
||||||
|
self.elements.push(Element::StrokeQuad(seg));
|
||||||
|
}
|
||||||
|
self.pathseg_count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn encode_cubic_seg(&mut self, seg: CubicSeg, is_fill: bool) {
|
||||||
|
if is_fill {
|
||||||
|
self.elements.push(Element::FillCubic(seg));
|
||||||
|
} else {
|
||||||
|
self.elements.push(Element::StrokeCubic(seg));
|
||||||
|
}
|
||||||
|
self.pathseg_count += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn encode_path(&mut self, path: impl Iterator<Item = PathEl>, is_fill: bool) {
|
fn encode_path(&mut self, path: impl Iterator<Item = PathEl>, is_fill: bool) {
|
||||||
let flatten = true;
|
let flatten = false;
|
||||||
if flatten {
|
if flatten {
|
||||||
let mut start_pt = None;
|
let mut start_pt = None;
|
||||||
let mut last_pt = None;
|
let mut last_pt = None;
|
||||||
|
@ -265,7 +300,7 @@ impl PietGpuRenderContext {
|
||||||
p1: scene_p1,
|
p1: scene_p1,
|
||||||
p2: scene_p2,
|
p2: scene_p2,
|
||||||
};
|
};
|
||||||
self.elements.push(Element::Quad(seg));
|
self.encode_quad_seg(seg, is_fill);
|
||||||
last_pt = Some(scene_p2);
|
last_pt = Some(scene_p2);
|
||||||
}
|
}
|
||||||
PathEl::CurveTo(p1, p2, p3) => {
|
PathEl::CurveTo(p1, p2, p3) => {
|
||||||
|
@ -278,7 +313,7 @@ impl PietGpuRenderContext {
|
||||||
p2: scene_p2,
|
p2: scene_p2,
|
||||||
p3: scene_p3,
|
p3: scene_p3,
|
||||||
};
|
};
|
||||||
self.elements.push(Element::Cubic(seg));
|
self.encode_cubic_seg(seg, is_fill);
|
||||||
last_pt = Some(scene_p3);
|
last_pt = Some(scene_p3);
|
||||||
}
|
}
|
||||||
PathEl::ClosePath => {
|
PathEl::ClosePath => {
|
||||||
|
|
Loading…
Reference in a new issue