mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-09 12:21:31 +11:00
Merge pull request #19 from linebender/sort_middle
Bring sort_middle branch to master
This commit is contained in:
commit
73df5534a1
72
Cargo.lock
generated
72
Cargo.lock
generated
|
@ -26,6 +26,15 @@ version = "0.2.3"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "000444226fcff248f2bc4c7625be32c63caccfecc2723a2b9f78a7487a49c407"
|
||||
|
||||
[[package]]
|
||||
name = "ansi_term"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
|
||||
dependencies = [
|
||||
"winapi 0.3.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "approx"
|
||||
version = "0.3.2"
|
||||
|
@ -59,6 +68,17 @@ dependencies = [
|
|||
"raw-window-handle",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
"winapi 0.3.8",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.0.0"
|
||||
|
@ -106,6 +126,21 @@ version = "0.1.10"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "2.33.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129"
|
||||
dependencies = [
|
||||
"ansi_term",
|
||||
"atty",
|
||||
"bitflags",
|
||||
"strsim",
|
||||
"textwrap",
|
||||
"unicode-width",
|
||||
"vec_map",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cloudabi"
|
||||
version = "0.0.3"
|
||||
|
@ -259,6 +294,15 @@ version = "1.5.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f36b5f248235f45773d4944f555f83ea61fe07b18b561ccf99d7483d7381e54d"
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "91780f809e750b0a89f5544be56617ff6b1227ee485bcb06ebe10cdf89bd3b71"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "inflate"
|
||||
version = "0.4.5"
|
||||
|
@ -525,6 +569,7 @@ dependencies = [
|
|||
name = "piet-gpu"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"clap",
|
||||
"piet",
|
||||
"piet-gpu-hal",
|
||||
"piet-gpu-types",
|
||||
|
@ -758,6 +803,12 @@ dependencies = [
|
|||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.17"
|
||||
|
@ -769,6 +820,21 @@ dependencies = [
|
|||
"unicode-xid 0.2.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "textwrap"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
|
||||
dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "caaa9d531767d1ff2150b9332433f32a24622147e5ebb1f26409d5da67afd479"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.1.0"
|
||||
|
@ -781,6 +847,12 @@ version = "0.2.0"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
|
||||
|
||||
[[package]]
|
||||
name = "vec_map"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
|
||||
|
||||
[[package]]
|
||||
name = "void"
|
||||
version = "1.0.2"
|
||||
|
|
53
piet-gpu-types/src/annotated.rs
Normal file
53
piet-gpu-types/src/annotated.rs
Normal file
|
@ -0,0 +1,53 @@
|
|||
use piet_gpu_derive::piet_gpu;
|
||||
|
||||
piet_gpu! {
|
||||
#[gpu_write]
|
||||
mod annotated {
|
||||
struct AnnoFillLineSeg {
|
||||
p0: [f32; 2],
|
||||
p1: [f32; 2],
|
||||
// A note: the layout of this struct is shared with
|
||||
// AnnoStrokeLineSeg. In that case, we actually write
|
||||
// [0.0, 0.0] as the stroke field, to minimize divergence.
|
||||
}
|
||||
struct AnnoStrokeLineSeg {
|
||||
p0: [f32; 2],
|
||||
p1: [f32; 2],
|
||||
// halfwidth in both x and y for binning
|
||||
stroke: [f32; 2],
|
||||
}
|
||||
struct AnnoQuadSeg {
|
||||
p0: [f32; 2],
|
||||
p1: [f32; 2],
|
||||
p2: [f32; 2],
|
||||
stroke: [f32; 2],
|
||||
}
|
||||
struct AnnoCubicSeg {
|
||||
p0: [f32; 2],
|
||||
p1: [f32; 2],
|
||||
p2: [f32; 2],
|
||||
p3: [f32; 2],
|
||||
stroke: [f32; 2],
|
||||
}
|
||||
struct AnnoFill {
|
||||
rgba_color: u32,
|
||||
bbox: [f32; 4],
|
||||
}
|
||||
struct AnnoStroke {
|
||||
rgba_color: u32,
|
||||
bbox: [f32; 4],
|
||||
// For the nonuniform scale case, this needs to be a 2x2 matrix.
|
||||
// That's expected to be uncommon, so we could special-case it.
|
||||
linewidth: f32,
|
||||
}
|
||||
enum Annotated {
|
||||
Nop,
|
||||
FillLine(AnnoFillLineSeg),
|
||||
StrokeLine(AnnoStrokeLineSeg),
|
||||
Quad(AnnoQuadSeg),
|
||||
Cubic(AnnoCubicSeg),
|
||||
Stroke(AnnoStroke),
|
||||
Fill(AnnoFill),
|
||||
}
|
||||
}
|
||||
}
|
22
piet-gpu-types/src/bins.rs
Normal file
22
piet-gpu-types/src/bins.rs
Normal file
|
@ -0,0 +1,22 @@
|
|||
use piet_gpu_derive::piet_gpu;
|
||||
|
||||
// The output of the binning stage, organized as a linked list of chunks.
|
||||
|
||||
piet_gpu! {
|
||||
#[gpu_write]
|
||||
mod bins {
|
||||
struct BinInstance {
|
||||
element_ix: u32,
|
||||
// Right edge of the bounding box of the associated fill
|
||||
// element; used in backdrop computation.
|
||||
right_edge: f32,
|
||||
}
|
||||
|
||||
struct BinChunk {
|
||||
// First chunk can have n = 0, subsequent ones not.
|
||||
n: u32,
|
||||
next: Ref<BinChunk>,
|
||||
// Instances follow
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,37 +0,0 @@
|
|||
use piet_gpu_derive::piet_gpu;
|
||||
|
||||
// Structures representing segments for fill items.
|
||||
|
||||
// There is some cut'n'paste here from stroke segments, which can be
|
||||
// traced to the fact that buffers in GLSL are basically global.
|
||||
// Maybe there's a way to address that, but in the meantime living
|
||||
// with the duplication is easiest.
|
||||
|
||||
piet_gpu! {
|
||||
#[gpu_write]
|
||||
mod fill_seg {
|
||||
struct FillTileHeader {
|
||||
n: u32,
|
||||
items: Ref<FillItemHeader>,
|
||||
}
|
||||
|
||||
struct FillItemHeader {
|
||||
backdrop: i32,
|
||||
segments: Ref<FillSegChunk>,
|
||||
}
|
||||
|
||||
// TODO: strongly consider using f16. If so, these would be
|
||||
// relative to the tile. We're doing f32 for now to minimize
|
||||
// divergence from piet-metal originals.
|
||||
struct FillSegment {
|
||||
start: [f32; 2],
|
||||
end: [f32; 2],
|
||||
}
|
||||
|
||||
struct FillSegChunk {
|
||||
n: u32,
|
||||
next: Ref<FillSegChunk>,
|
||||
// Segments follow (could represent this as a variable sized array).
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,7 +1,10 @@
|
|||
// Structures used only internally probably don't need to be pub.
|
||||
|
||||
pub mod annotated;
|
||||
pub mod bins;
|
||||
pub mod encoder;
|
||||
pub mod fill_seg;
|
||||
pub mod ptcl;
|
||||
pub mod scene;
|
||||
pub mod segment;
|
||||
pub mod state;
|
||||
pub mod test;
|
||||
pub mod tilegroup;
|
||||
|
|
|
@ -5,9 +5,10 @@ fn main() {
|
|||
.expect("provide a module name");
|
||||
match mod_name.as_str() {
|
||||
"scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()),
|
||||
"state" => print!("{}", piet_gpu_types::state::gen_gpu_state()),
|
||||
"annotated" => print!("{}", piet_gpu_types::annotated::gen_gpu_annotated()),
|
||||
"bins" => print!("{}", piet_gpu_types::bins::gen_gpu_bins()),
|
||||
"tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()),
|
||||
"segment" => print!("{}", piet_gpu_types::segment::gen_gpu_segment()),
|
||||
"fill_seg" => print!("{}", piet_gpu_types::fill_seg::gen_gpu_fill_seg()),
|
||||
"ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()),
|
||||
"test" => print!("{}", piet_gpu_types::test::gen_gpu_test()),
|
||||
_ => println!("Oops, unknown module name"),
|
||||
|
|
|
@ -13,14 +13,13 @@ piet_gpu! {
|
|||
end: [f32; 2],
|
||||
}
|
||||
struct CmdStroke {
|
||||
// Should be Ref<SegChunk> if we had cross-module references.
|
||||
seg_ref: u32,
|
||||
// Consider a specialization to one segment.
|
||||
seg_ref: Ref<SegChunk>,
|
||||
half_width: f32,
|
||||
rgba_color: u32,
|
||||
}
|
||||
struct CmdFill {
|
||||
// Should be Ref<FillSegChunk> if we had cross-module references.
|
||||
seg_ref: u32,
|
||||
seg_ref: Ref<SegChunk>,
|
||||
backdrop: i32,
|
||||
rgba_color: u32,
|
||||
}
|
||||
|
@ -51,5 +50,24 @@ piet_gpu! {
|
|||
Jump(CmdJump),
|
||||
Bail,
|
||||
}
|
||||
|
||||
// TODO: strongly consider using f16. If so, these would be
|
||||
// relative to the tile. We're doing f32 for now to minimize
|
||||
// divergence from piet-metal originals.
|
||||
struct Segment {
|
||||
start: [f32; 2],
|
||||
end: [f32; 2],
|
||||
|
||||
// This is used for fills only, but we're including it in
|
||||
// the general structure for simplicity.
|
||||
y_edge: f32,
|
||||
}
|
||||
|
||||
struct SegChunk {
|
||||
n: u32,
|
||||
next: Ref<SegChunk>,
|
||||
// Actually a reference to a variable-sized slice.
|
||||
segs: Ref<Segment>,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,6 +4,8 @@ pub use self::scene::{
|
|||
Bbox, PietCircle, PietFill, PietItem, PietStrokeLine, PietStrokePolyLine, Point, SimpleGroup,
|
||||
};
|
||||
|
||||
pub use self::scene::{CubicSeg, Element, Fill, LineSeg, QuadSeg, SetLineWidth, Stroke, Transform};
|
||||
|
||||
piet_gpu! {
|
||||
#[rust_encode]
|
||||
mod scene {
|
||||
|
@ -51,5 +53,53 @@ piet_gpu! {
|
|||
Fill(PietFill),
|
||||
Poly(PietStrokePolyLine),
|
||||
}
|
||||
|
||||
// New approach follows (above to be deleted)
|
||||
struct LineSeg {
|
||||
p0: [f32; 2],
|
||||
p1: [f32; 2],
|
||||
}
|
||||
struct QuadSeg {
|
||||
p0: [f32; 2],
|
||||
p1: [f32; 2],
|
||||
p2: [f32; 2],
|
||||
}
|
||||
struct CubicSeg {
|
||||
p0: [f32; 2],
|
||||
p1: [f32; 2],
|
||||
p2: [f32; 2],
|
||||
p3: [f32; 2],
|
||||
}
|
||||
struct Fill {
|
||||
rgba_color: u32,
|
||||
}
|
||||
struct Stroke {
|
||||
rgba_color: u32,
|
||||
}
|
||||
struct SetLineWidth {
|
||||
width: f32,
|
||||
}
|
||||
struct Transform {
|
||||
mat: [f32; 4],
|
||||
translate: [f32; 2],
|
||||
}
|
||||
enum Element {
|
||||
Nop,
|
||||
// Another approach to encoding would be to use a single
|
||||
// variant but have a bool for fill/stroke. This could be
|
||||
// packed into the tag, so the on-the-wire representation
|
||||
// would be very similar to what's here.
|
||||
StrokeLine(LineSeg),
|
||||
FillLine(LineSeg),
|
||||
|
||||
// Note: we'll need to handle the stroke/fill distinction
|
||||
// for these as well, when we do flattening on the GPU.
|
||||
Quad(QuadSeg),
|
||||
Cubic(CubicSeg),
|
||||
Stroke(Stroke),
|
||||
Fill(Fill),
|
||||
SetLineWidth(SetLineWidth),
|
||||
Transform(Transform),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,32 +0,0 @@
|
|||
use piet_gpu_derive::piet_gpu;
|
||||
|
||||
// Structures representing segments for stroke/fill items.
|
||||
|
||||
piet_gpu! {
|
||||
#[gpu_write]
|
||||
mod segment {
|
||||
struct TileHeader {
|
||||
n: u32,
|
||||
items: Ref<ItemHeader>,
|
||||
}
|
||||
|
||||
// Note: this is only suitable for strokes, fills require backdrop.
|
||||
struct ItemHeader {
|
||||
segments: Ref<SegChunk>,
|
||||
}
|
||||
|
||||
// TODO: strongly consider using f16. If so, these would be
|
||||
// relative to the tile. We're doing f32 for now to minimize
|
||||
// divergence from piet-metal originals.
|
||||
struct Segment {
|
||||
start: [f32; 2],
|
||||
end: [f32; 2],
|
||||
}
|
||||
|
||||
struct SegChunk {
|
||||
n: u32,
|
||||
next: Ref<SegChunk>,
|
||||
// Segments follow (could represent this as a variable sized array).
|
||||
}
|
||||
}
|
||||
}
|
14
piet-gpu-types/src/state.rs
Normal file
14
piet-gpu-types/src/state.rs
Normal file
|
@ -0,0 +1,14 @@
|
|||
use piet_gpu_derive::piet_gpu;
|
||||
|
||||
piet_gpu! {
|
||||
#[gpu_write]
|
||||
mod state {
|
||||
struct State {
|
||||
mat: [f32; 4],
|
||||
translate: [f32; 2],
|
||||
bbox: [f32; 4],
|
||||
linewidth: f32,
|
||||
flags: u32,
|
||||
}
|
||||
}
|
||||
}
|
|
@ -26,3 +26,4 @@ png = "0.16.2"
|
|||
rand = "0.7.3"
|
||||
roxmltree = "0.11"
|
||||
winit = "0.22"
|
||||
clap = "2.33"
|
||||
|
|
|
@ -2,10 +2,12 @@ use std::fs::File;
|
|||
use std::io::BufWriter;
|
||||
use std::path::Path;
|
||||
|
||||
use clap::{Arg, App};
|
||||
|
||||
use piet_gpu_hal::vulkan::VkInstance;
|
||||
use piet_gpu_hal::{CmdBuf, Device, Error, MemFlags};
|
||||
|
||||
use piet_gpu::{PietGpuRenderContext, Renderer, render_scene, WIDTH, HEIGHT};
|
||||
use piet_gpu::{render_scene, render_svg, PietGpuRenderContext, Renderer, HEIGHT, WIDTH};
|
||||
|
||||
#[allow(unused)]
|
||||
fn dump_scene(buf: &[u8]) {
|
||||
|
@ -16,22 +18,179 @@ fn dump_scene(buf: &[u8]) {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
fn dump_state(buf: &[u8]) {
|
||||
for i in 0..(buf.len() / 48) {
|
||||
let j = i * 48;
|
||||
let floats = (0..11).map(|k| {
|
||||
let mut buf_f32 = [0u8; 4];
|
||||
buf_f32.copy_from_slice(&buf[j + k * 4..j + k * 4 + 4]);
|
||||
f32::from_le_bytes(buf_f32)
|
||||
}).collect::<Vec<_>>();
|
||||
println!("{}: [{} {} {} {} {} {}] ({}, {})-({} {}) {} {}",
|
||||
i,
|
||||
floats[0], floats[1], floats[2], floats[3], floats[4], floats[5],
|
||||
floats[6], floats[7], floats[8], floats[9],
|
||||
floats[10], buf[j + 44]);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// Interpret the output of the binning stage, for diagnostic purposes.
|
||||
#[allow(unused)]
|
||||
fn trace_merge(buf: &[u32]) {
|
||||
for bin in 0..256 {
|
||||
println!("bin {}:", bin);
|
||||
let mut starts = (0..16).map(|i| Some((bin * 16 + i) * 64)).collect::<Vec<Option<usize>>>();
|
||||
loop {
|
||||
let min_start = starts.iter().map(|st|
|
||||
st.map(|st|
|
||||
if buf[st / 4] == 0 {
|
||||
!0
|
||||
} else {
|
||||
buf[st / 4 + 2]
|
||||
}).unwrap_or(!0)).min().unwrap();
|
||||
if min_start == !0 {
|
||||
break;
|
||||
}
|
||||
let mut selected = !0;
|
||||
for i in 0..16 {
|
||||
if let Some(st) = starts[i] {
|
||||
if buf[st/4] != 0 && buf[st/4 + 2] == min_start {
|
||||
selected = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
let st = starts[selected].unwrap();
|
||||
println!("selected {}, start {:x}", selected, st);
|
||||
for j in 0..buf[st/4] {
|
||||
println!("{:x}", buf[st/4 + 2 + j as usize])
|
||||
}
|
||||
if buf[st/4 + 1] == 0 {
|
||||
starts[selected] = None;
|
||||
} else {
|
||||
starts[selected] = Some(buf[st/4 + 1] as usize);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
/// Interpret the output of the coarse raster stage, for diagnostic purposes.
|
||||
#[allow(unused)]
|
||||
fn trace_ptcl(buf: &[u32]) {
|
||||
for y in 0..96 {
|
||||
for x in 0..128 {
|
||||
let tile_ix = y * 128 + x;
|
||||
println!("tile {} @({}, {})", tile_ix, x, y);
|
||||
let mut tile_offset = tile_ix * 1024;
|
||||
loop {
|
||||
let tag = buf[tile_offset / 4];
|
||||
match tag {
|
||||
0 => break,
|
||||
3 => {
|
||||
let backdrop = buf[tile_offset / 4 + 2];
|
||||
let rgba_color = buf[tile_offset / 4 + 3];
|
||||
println!(" {:x}: fill {:x} {}", tile_offset, rgba_color, backdrop);
|
||||
let mut seg_chunk = buf[tile_offset / 4 + 1] as usize;
|
||||
let n = buf[seg_chunk / 4] as usize;
|
||||
let segs = buf[seg_chunk / 4 + 2] as usize;
|
||||
println!(" chunk @{:x}: n={}, segs @{:x}", seg_chunk, n, segs);
|
||||
for i in 0..n {
|
||||
let x0 = f32::from_bits(buf[segs / 4 + i * 5]);
|
||||
let y0 = f32::from_bits(buf[segs / 4 + i * 5 + 1]);
|
||||
let x1 = f32::from_bits(buf[segs / 4 + i * 5 + 2]);
|
||||
let y1 = f32::from_bits(buf[segs / 4 + i * 5 + 3]);
|
||||
let y_edge = f32::from_bits(buf[segs / 4 + i * 5 + 4]);
|
||||
println!(" ({:.3}, {:.3}) - ({:.3}, {:.3}) | {:.3}", x0, y0, x1, y1, y_edge);
|
||||
}
|
||||
loop {
|
||||
seg_chunk = buf[seg_chunk / 4 + 1] as usize;
|
||||
if seg_chunk == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
4 => {
|
||||
let line_width = f32::from_bits(buf[tile_offset / 4 + 2]);
|
||||
let rgba_color = buf[tile_offset / 4 + 3];
|
||||
println!(" {:x}: stroke {:x} {}", tile_offset, rgba_color, line_width);
|
||||
let mut seg_chunk = buf[tile_offset / 4 + 1] as usize;
|
||||
let n = buf[seg_chunk / 4] as usize;
|
||||
let segs = buf[seg_chunk / 4 + 2] as usize;
|
||||
println!(" chunk @{:x}: n={}, segs @{:x}", seg_chunk, n, segs);
|
||||
for i in 0..n {
|
||||
let x0 = f32::from_bits(buf[segs / 4 + i * 5]);
|
||||
let y0 = f32::from_bits(buf[segs / 4 + i * 5 + 1]);
|
||||
let x1 = f32::from_bits(buf[segs / 4 + i * 5 + 2]);
|
||||
let y1 = f32::from_bits(buf[segs / 4 + i * 5 + 3]);
|
||||
let y_edge = f32::from_bits(buf[segs / 4 + i * 5 + 4]);
|
||||
println!(" ({:.3}, {:.3}) - ({:.3}, {:.3}) | {:.3}", x0, y0, x1, y1, y_edge);
|
||||
}
|
||||
loop {
|
||||
seg_chunk = buf[seg_chunk / 4 + 1] as usize;
|
||||
if seg_chunk == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
println!("{:x}: {}", tile_offset, tag);
|
||||
}
|
||||
}
|
||||
if tag == 0 {
|
||||
break;
|
||||
}
|
||||
if tag == 8 {
|
||||
tile_offset = buf[tile_offset / 4 + 1] as usize;
|
||||
} else {
|
||||
tile_offset += 20;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
fn main() -> Result<(), Error> {
|
||||
let matches = App::new("piet-gpu test")
|
||||
.arg(Arg::with_name("INPUT")
|
||||
.index(1))
|
||||
.arg(Arg::with_name("flip")
|
||||
.short("f")
|
||||
.long("flip"))
|
||||
.arg(Arg::with_name("scale")
|
||||
.short("s")
|
||||
.long("scale")
|
||||
.takes_value(true))
|
||||
.get_matches();
|
||||
let (instance, _) = VkInstance::new(None)?;
|
||||
unsafe {
|
||||
let device = instance.device(None)?;
|
||||
|
||||
let fence = device.create_fence(false)?;
|
||||
let mut cmd_buf = device.create_cmd_buf()?;
|
||||
let query_pool = device.create_query_pool(6)?;
|
||||
let query_pool = device.create_query_pool(5)?;
|
||||
|
||||
let mut ctx = PietGpuRenderContext::new();
|
||||
render_scene(&mut ctx);
|
||||
if let Some(input) = matches.value_of("INPUT") {
|
||||
let mut scale = matches.value_of("scale")
|
||||
.map(|scale| scale.parse().unwrap())
|
||||
.unwrap_or(8.0);
|
||||
if matches.is_present("flip") {
|
||||
scale = -scale;
|
||||
}
|
||||
render_svg(&mut ctx, input, scale);
|
||||
} else {
|
||||
render_scene(&mut ctx);
|
||||
}
|
||||
let scene = ctx.get_scene_buf();
|
||||
//dump_scene(&scene);
|
||||
|
||||
let renderer = Renderer::new(&device, scene)?;
|
||||
let image_buf = device.create_buffer((WIDTH * HEIGHT * 4) as u64, MemFlags::host_coherent())?;
|
||||
let image_buf =
|
||||
device.create_buffer((WIDTH * HEIGHT * 4) as u64, MemFlags::host_coherent())?;
|
||||
|
||||
cmd_buf.begin();
|
||||
renderer.record(&mut cmd_buf, &query_pool);
|
||||
|
@ -39,29 +198,17 @@ fn main() -> Result<(), Error> {
|
|||
cmd_buf.finish();
|
||||
device.run_cmd_buf(&cmd_buf, &[], &[], Some(&fence))?;
|
||||
device.wait_and_reset(&[fence])?;
|
||||
let timestamps = device.reap_query_pool(&query_pool).unwrap();
|
||||
println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3);
|
||||
println!(
|
||||
"Kernel 2s time: {:.3}ms",
|
||||
(timestamps[1] - timestamps[0]) * 1e3
|
||||
);
|
||||
println!(
|
||||
"Kernel 2f time: {:.3}ms",
|
||||
(timestamps[2] - timestamps[1]) * 1e3
|
||||
);
|
||||
println!(
|
||||
"Kernel 3 time: {:.3}ms",
|
||||
(timestamps[3] - timestamps[2]) * 1e3
|
||||
);
|
||||
println!(
|
||||
"Render time: {:.3}ms",
|
||||
(timestamps[4] - timestamps[3]) * 1e3
|
||||
);
|
||||
let ts = device.reap_query_pool(&query_pool).unwrap();
|
||||
println!("Element kernel time: {:.3}ms", ts[0] * 1e3);
|
||||
println!("Binning kernel time: {:.3}ms", (ts[1] - ts[0]) * 1e3);
|
||||
println!("Coarse kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3);
|
||||
println!("Render kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
|
||||
|
||||
/*
|
||||
let mut k1_data: Vec<u32> = Default::default();
|
||||
device.read_buffer(&segment_buf, &mut k1_data).unwrap();
|
||||
dump_k1_data(&k1_data);
|
||||
let mut data: Vec<u32> = Default::default();
|
||||
device.read_buffer(&renderer.ptcl_buf, &mut data).unwrap();
|
||||
piet_gpu::dump_k1_data(&data);
|
||||
//trace_ptcl(&data);
|
||||
*/
|
||||
|
||||
let mut img_data: Vec<u8> = Default::default();
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use piet_gpu_hal::vulkan::VkInstance;
|
||||
use piet_gpu_hal::{CmdBuf, Device, Error, ImageLayout};
|
||||
|
||||
use piet_gpu::{PietGpuRenderContext, Renderer, render_scene, WIDTH, HEIGHT};
|
||||
use piet_gpu::{render_scene, PietGpuRenderContext, Renderer, HEIGHT, WIDTH};
|
||||
|
||||
use winit::{
|
||||
event::{Event, WindowEvent},
|
||||
|
@ -37,7 +37,7 @@ fn main() -> Result<(), Error> {
|
|||
.map(|_| device.create_cmd_buf())
|
||||
.collect::<Result<Vec<_>, Error>>()?;
|
||||
let query_pools = (0..NUM_FRAMES)
|
||||
.map(|_| device.create_query_pool(6))
|
||||
.map(|_| device.create_query_pool(5))
|
||||
.collect::<Result<Vec<_>, Error>>()?;
|
||||
|
||||
let mut ctx = PietGpuRenderContext::new();
|
||||
|
@ -69,12 +69,12 @@ fn main() -> Result<(), Error> {
|
|||
device.wait_and_reset(&[frame_fences[frame_idx]]).unwrap();
|
||||
|
||||
let timestamps = device.reap_query_pool(query_pool).unwrap();
|
||||
window.set_title(&format!("k1: {:.3}ms, k2s: {:.3}ms, k2f: {:.3}ms, k3: {:.3}ms, k4: {:.3}ms",
|
||||
window.set_title(&format!(
|
||||
"e: {:.3}ms, b: {:.3}ms, c: {:.3}ms, f: {:.3}ms",
|
||||
timestamps[0] * 1e3,
|
||||
(timestamps[1] - timestamps[0]) * 1e3,
|
||||
(timestamps[2] - timestamps[1]) * 1e3,
|
||||
(timestamps[3] - timestamps[2]) * 1e3,
|
||||
(timestamps[4] - timestamps[3]) * 1e3,
|
||||
));
|
||||
}
|
||||
|
||||
|
@ -93,11 +93,7 @@ fn main() -> Result<(), Error> {
|
|||
ImageLayout::BlitDst,
|
||||
);
|
||||
cmd_buf.blit_image(&renderer.image_dev, &swap_image);
|
||||
cmd_buf.image_barrier(
|
||||
&swap_image,
|
||||
ImageLayout::BlitDst,
|
||||
ImageLayout::Present,
|
||||
);
|
||||
cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present);
|
||||
cmd_buf.finish();
|
||||
|
||||
device
|
||||
|
|
335
piet-gpu/shader/annotated.h
Normal file
335
piet-gpu/shader/annotated.h
Normal file
|
@ -0,0 +1,335 @@
|
|||
// Code auto-generated by piet-gpu-derive
|
||||
|
||||
struct AnnoFillLineSegRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct AnnoStrokeLineSegRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct AnnoQuadSegRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct AnnoCubicSegRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct AnnoFillRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct AnnoStrokeRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct AnnotatedRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct AnnoFillLineSeg {
|
||||
vec2 p0;
|
||||
vec2 p1;
|
||||
};
|
||||
|
||||
#define AnnoFillLineSeg_size 16
|
||||
|
||||
AnnoFillLineSegRef AnnoFillLineSeg_index(AnnoFillLineSegRef ref, uint index) {
|
||||
return AnnoFillLineSegRef(ref.offset + index * AnnoFillLineSeg_size);
|
||||
}
|
||||
|
||||
struct AnnoStrokeLineSeg {
|
||||
vec2 p0;
|
||||
vec2 p1;
|
||||
vec2 stroke;
|
||||
};
|
||||
|
||||
#define AnnoStrokeLineSeg_size 24
|
||||
|
||||
AnnoStrokeLineSegRef AnnoStrokeLineSeg_index(AnnoStrokeLineSegRef ref, uint index) {
|
||||
return AnnoStrokeLineSegRef(ref.offset + index * AnnoStrokeLineSeg_size);
|
||||
}
|
||||
|
||||
struct AnnoQuadSeg {
|
||||
vec2 p0;
|
||||
vec2 p1;
|
||||
vec2 p2;
|
||||
vec2 stroke;
|
||||
};
|
||||
|
||||
#define AnnoQuadSeg_size 32
|
||||
|
||||
AnnoQuadSegRef AnnoQuadSeg_index(AnnoQuadSegRef ref, uint index) {
|
||||
return AnnoQuadSegRef(ref.offset + index * AnnoQuadSeg_size);
|
||||
}
|
||||
|
||||
struct AnnoCubicSeg {
|
||||
vec2 p0;
|
||||
vec2 p1;
|
||||
vec2 p2;
|
||||
vec2 p3;
|
||||
vec2 stroke;
|
||||
};
|
||||
|
||||
#define AnnoCubicSeg_size 40
|
||||
|
||||
AnnoCubicSegRef AnnoCubicSeg_index(AnnoCubicSegRef ref, uint index) {
|
||||
return AnnoCubicSegRef(ref.offset + index * AnnoCubicSeg_size);
|
||||
}
|
||||
|
||||
struct AnnoFill {
|
||||
uint rgba_color;
|
||||
vec4 bbox;
|
||||
};
|
||||
|
||||
#define AnnoFill_size 20
|
||||
|
||||
AnnoFillRef AnnoFill_index(AnnoFillRef ref, uint index) {
|
||||
return AnnoFillRef(ref.offset + index * AnnoFill_size);
|
||||
}
|
||||
|
||||
struct AnnoStroke {
|
||||
uint rgba_color;
|
||||
vec4 bbox;
|
||||
float linewidth;
|
||||
};
|
||||
|
||||
#define AnnoStroke_size 24
|
||||
|
||||
AnnoStrokeRef AnnoStroke_index(AnnoStrokeRef ref, uint index) {
|
||||
return AnnoStrokeRef(ref.offset + index * AnnoStroke_size);
|
||||
}
|
||||
|
||||
#define Annotated_Nop 0
|
||||
#define Annotated_FillLine 1
|
||||
#define Annotated_StrokeLine 2
|
||||
#define Annotated_Quad 3
|
||||
#define Annotated_Cubic 4
|
||||
#define Annotated_Stroke 5
|
||||
#define Annotated_Fill 6
|
||||
#define Annotated_size 44
|
||||
|
||||
AnnotatedRef Annotated_index(AnnotatedRef ref, uint index) {
|
||||
return AnnotatedRef(ref.offset + index * Annotated_size);
|
||||
}
|
||||
|
||||
AnnoFillLineSeg AnnoFillLineSeg_read(AnnoFillLineSegRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = annotated[ix + 0];
|
||||
uint raw1 = annotated[ix + 1];
|
||||
uint raw2 = annotated[ix + 2];
|
||||
uint raw3 = annotated[ix + 3];
|
||||
AnnoFillLineSeg s;
|
||||
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
return s;
|
||||
}
|
||||
|
||||
void AnnoFillLineSeg_write(AnnoFillLineSegRef ref, AnnoFillLineSeg s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
annotated[ix + 0] = floatBitsToUint(s.p0.x);
|
||||
annotated[ix + 1] = floatBitsToUint(s.p0.y);
|
||||
annotated[ix + 2] = floatBitsToUint(s.p1.x);
|
||||
annotated[ix + 3] = floatBitsToUint(s.p1.y);
|
||||
}
|
||||
|
||||
AnnoStrokeLineSeg AnnoStrokeLineSeg_read(AnnoStrokeLineSegRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = annotated[ix + 0];
|
||||
uint raw1 = annotated[ix + 1];
|
||||
uint raw2 = annotated[ix + 2];
|
||||
uint raw3 = annotated[ix + 3];
|
||||
uint raw4 = annotated[ix + 4];
|
||||
uint raw5 = annotated[ix + 5];
|
||||
AnnoStrokeLineSeg s;
|
||||
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.stroke = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||
return s;
|
||||
}
|
||||
|
||||
void AnnoStrokeLineSeg_write(AnnoStrokeLineSegRef ref, AnnoStrokeLineSeg s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
annotated[ix + 0] = floatBitsToUint(s.p0.x);
|
||||
annotated[ix + 1] = floatBitsToUint(s.p0.y);
|
||||
annotated[ix + 2] = floatBitsToUint(s.p1.x);
|
||||
annotated[ix + 3] = floatBitsToUint(s.p1.y);
|
||||
annotated[ix + 4] = floatBitsToUint(s.stroke.x);
|
||||
annotated[ix + 5] = floatBitsToUint(s.stroke.y);
|
||||
}
|
||||
|
||||
AnnoQuadSeg AnnoQuadSeg_read(AnnoQuadSegRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = annotated[ix + 0];
|
||||
uint raw1 = annotated[ix + 1];
|
||||
uint raw2 = annotated[ix + 2];
|
||||
uint raw3 = annotated[ix + 3];
|
||||
uint raw4 = annotated[ix + 4];
|
||||
uint raw5 = annotated[ix + 5];
|
||||
uint raw6 = annotated[ix + 6];
|
||||
uint raw7 = annotated[ix + 7];
|
||||
AnnoQuadSeg s;
|
||||
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||
s.stroke = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));
|
||||
return s;
|
||||
}
|
||||
|
||||
void AnnoQuadSeg_write(AnnoQuadSegRef ref, AnnoQuadSeg s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
annotated[ix + 0] = floatBitsToUint(s.p0.x);
|
||||
annotated[ix + 1] = floatBitsToUint(s.p0.y);
|
||||
annotated[ix + 2] = floatBitsToUint(s.p1.x);
|
||||
annotated[ix + 3] = floatBitsToUint(s.p1.y);
|
||||
annotated[ix + 4] = floatBitsToUint(s.p2.x);
|
||||
annotated[ix + 5] = floatBitsToUint(s.p2.y);
|
||||
annotated[ix + 6] = floatBitsToUint(s.stroke.x);
|
||||
annotated[ix + 7] = floatBitsToUint(s.stroke.y);
|
||||
}
|
||||
|
||||
AnnoCubicSeg AnnoCubicSeg_read(AnnoCubicSegRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = annotated[ix + 0];
|
||||
uint raw1 = annotated[ix + 1];
|
||||
uint raw2 = annotated[ix + 2];
|
||||
uint raw3 = annotated[ix + 3];
|
||||
uint raw4 = annotated[ix + 4];
|
||||
uint raw5 = annotated[ix + 5];
|
||||
uint raw6 = annotated[ix + 6];
|
||||
uint raw7 = annotated[ix + 7];
|
||||
uint raw8 = annotated[ix + 8];
|
||||
uint raw9 = annotated[ix + 9];
|
||||
AnnoCubicSeg s;
|
||||
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||
s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));
|
||||
s.stroke = vec2(uintBitsToFloat(raw8), uintBitsToFloat(raw9));
|
||||
return s;
|
||||
}
|
||||
|
||||
void AnnoCubicSeg_write(AnnoCubicSegRef ref, AnnoCubicSeg s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
annotated[ix + 0] = floatBitsToUint(s.p0.x);
|
||||
annotated[ix + 1] = floatBitsToUint(s.p0.y);
|
||||
annotated[ix + 2] = floatBitsToUint(s.p1.x);
|
||||
annotated[ix + 3] = floatBitsToUint(s.p1.y);
|
||||
annotated[ix + 4] = floatBitsToUint(s.p2.x);
|
||||
annotated[ix + 5] = floatBitsToUint(s.p2.y);
|
||||
annotated[ix + 6] = floatBitsToUint(s.p3.x);
|
||||
annotated[ix + 7] = floatBitsToUint(s.p3.y);
|
||||
annotated[ix + 8] = floatBitsToUint(s.stroke.x);
|
||||
annotated[ix + 9] = floatBitsToUint(s.stroke.y);
|
||||
}
|
||||
|
||||
AnnoFill AnnoFill_read(AnnoFillRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = annotated[ix + 0];
|
||||
uint raw1 = annotated[ix + 1];
|
||||
uint raw2 = annotated[ix + 2];
|
||||
uint raw3 = annotated[ix + 3];
|
||||
uint raw4 = annotated[ix + 4];
|
||||
AnnoFill s;
|
||||
s.rgba_color = raw0;
|
||||
s.bbox = vec4(uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3), uintBitsToFloat(raw4));
|
||||
return s;
|
||||
}
|
||||
|
||||
void AnnoFill_write(AnnoFillRef ref, AnnoFill s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
annotated[ix + 0] = s.rgba_color;
|
||||
annotated[ix + 1] = floatBitsToUint(s.bbox.x);
|
||||
annotated[ix + 2] = floatBitsToUint(s.bbox.y);
|
||||
annotated[ix + 3] = floatBitsToUint(s.bbox.z);
|
||||
annotated[ix + 4] = floatBitsToUint(s.bbox.w);
|
||||
}
|
||||
|
||||
AnnoStroke AnnoStroke_read(AnnoStrokeRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = annotated[ix + 0];
|
||||
uint raw1 = annotated[ix + 1];
|
||||
uint raw2 = annotated[ix + 2];
|
||||
uint raw3 = annotated[ix + 3];
|
||||
uint raw4 = annotated[ix + 4];
|
||||
uint raw5 = annotated[ix + 5];
|
||||
AnnoStroke s;
|
||||
s.rgba_color = raw0;
|
||||
s.bbox = vec4(uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3), uintBitsToFloat(raw4));
|
||||
s.linewidth = uintBitsToFloat(raw5);
|
||||
return s;
|
||||
}
|
||||
|
||||
void AnnoStroke_write(AnnoStrokeRef ref, AnnoStroke s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
annotated[ix + 0] = s.rgba_color;
|
||||
annotated[ix + 1] = floatBitsToUint(s.bbox.x);
|
||||
annotated[ix + 2] = floatBitsToUint(s.bbox.y);
|
||||
annotated[ix + 3] = floatBitsToUint(s.bbox.z);
|
||||
annotated[ix + 4] = floatBitsToUint(s.bbox.w);
|
||||
annotated[ix + 5] = floatBitsToUint(s.linewidth);
|
||||
}
|
||||
|
||||
uint Annotated_tag(AnnotatedRef ref) {
|
||||
return annotated[ref.offset >> 2];
|
||||
}
|
||||
|
||||
AnnoFillLineSeg Annotated_FillLine_read(AnnotatedRef ref) {
|
||||
return AnnoFillLineSeg_read(AnnoFillLineSegRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
AnnoStrokeLineSeg Annotated_StrokeLine_read(AnnotatedRef ref) {
|
||||
return AnnoStrokeLineSeg_read(AnnoStrokeLineSegRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
AnnoQuadSeg Annotated_Quad_read(AnnotatedRef ref) {
|
||||
return AnnoQuadSeg_read(AnnoQuadSegRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
AnnoCubicSeg Annotated_Cubic_read(AnnotatedRef ref) {
|
||||
return AnnoCubicSeg_read(AnnoCubicSegRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
AnnoStroke Annotated_Stroke_read(AnnotatedRef ref) {
|
||||
return AnnoStroke_read(AnnoStrokeRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
AnnoFill Annotated_Fill_read(AnnotatedRef ref) {
|
||||
return AnnoFill_read(AnnoFillRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
void Annotated_Nop_write(AnnotatedRef ref) {
|
||||
annotated[ref.offset >> 2] = Annotated_Nop;
|
||||
}
|
||||
|
||||
void Annotated_FillLine_write(AnnotatedRef ref, AnnoFillLineSeg s) {
|
||||
annotated[ref.offset >> 2] = Annotated_FillLine;
|
||||
AnnoFillLineSeg_write(AnnoFillLineSegRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Annotated_StrokeLine_write(AnnotatedRef ref, AnnoStrokeLineSeg s) {
|
||||
annotated[ref.offset >> 2] = Annotated_StrokeLine;
|
||||
AnnoStrokeLineSeg_write(AnnoStrokeLineSegRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Annotated_Quad_write(AnnotatedRef ref, AnnoQuadSeg s) {
|
||||
annotated[ref.offset >> 2] = Annotated_Quad;
|
||||
AnnoQuadSeg_write(AnnoQuadSegRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Annotated_Cubic_write(AnnotatedRef ref, AnnoCubicSeg s) {
|
||||
annotated[ref.offset >> 2] = Annotated_Cubic;
|
||||
AnnoCubicSeg_write(AnnoCubicSegRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Annotated_Stroke_write(AnnotatedRef ref, AnnoStroke s) {
|
||||
annotated[ref.offset >> 2] = Annotated_Stroke;
|
||||
AnnoStroke_write(AnnoStrokeRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Annotated_Fill_write(AnnotatedRef ref, AnnoFill s) {
|
||||
annotated[ref.offset >> 2] = Annotated_Fill;
|
||||
AnnoFill_write(AnnoFillRef(ref.offset + 4), s);
|
||||
}
|
||||
|
193
piet-gpu/shader/binning.comp
Normal file
193
piet-gpu/shader/binning.comp
Normal file
|
@ -0,0 +1,193 @@
|
|||
// The binning stage of the pipeline.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#include "setup.h"
|
||||
|
||||
layout(local_size_x = N_TILE, local_size_y = 1) in;
|
||||
|
||||
layout(set = 0, binding = 0) buffer AnnotatedBuf {
|
||||
uint[] annotated;
|
||||
};
|
||||
|
||||
// This is for scanning forward for right_edge data.
|
||||
layout(set = 0, binding = 1) buffer StateBuf {
|
||||
uint[] state;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 2) buffer AllocBuf {
|
||||
uint n_elements;
|
||||
// Will be incremented atomically to claim tiles
|
||||
uint tile_ix;
|
||||
uint alloc;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 3) buffer BinsBuf {
|
||||
uint[] bins;
|
||||
};
|
||||
|
||||
#include "annotated.h"
|
||||
#include "state.h"
|
||||
#include "bins.h"
|
||||
|
||||
// scale factors useful for converting coordinates to bins
|
||||
#define SX (1.0 / float(N_TILE_X * TILE_WIDTH_PX))
|
||||
#define SY (1.0 / float(N_TILE_Y * TILE_HEIGHT_PX))
|
||||
|
||||
#define TSY (1.0 / float(TILE_HEIGHT_PX))
|
||||
|
||||
// Constant not available in GLSL. Also consider uintBitsToFloat(0x7f800000)
|
||||
#define INFINITY (1.0 / 0.0)
|
||||
|
||||
// Note: cudaraster has N_TILE + 1 to cut down on bank conflicts.
|
||||
shared uint bitmaps[N_SLICE][N_TILE];
|
||||
shared uint count[N_SLICE][N_TILE];
|
||||
shared uint sh_chunk_start[N_TILE];
|
||||
|
||||
shared float sh_right_edge[N_TILE];
|
||||
|
||||
#define StateBuf_stride (8 + 2 * State_size)
|
||||
|
||||
uint state_right_edge_index(uint partition_ix) {
|
||||
return 2 + partition_ix * (StateBuf_stride / 4);
|
||||
}
|
||||
|
||||
void main() {
|
||||
uint chunk_n = 0;
|
||||
uint my_n_elements = n_elements;
|
||||
uint my_partition = gl_WorkGroupID.x;
|
||||
|
||||
for (uint i = 0; i < N_SLICE; i++) {
|
||||
bitmaps[i][gl_LocalInvocationID.x] = 0;
|
||||
}
|
||||
barrier();
|
||||
|
||||
// Read inputs and determine coverage of bins
|
||||
uint element_ix = my_partition * N_TILE + gl_LocalInvocationID.x;
|
||||
AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size);
|
||||
uint tag = Annotated_Nop;
|
||||
if (element_ix < my_n_elements) {
|
||||
tag = Annotated_tag(ref);
|
||||
}
|
||||
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
|
||||
float my_right_edge = INFINITY;
|
||||
bool crosses_edge = false;
|
||||
switch (tag) {
|
||||
case Annotated_FillLine:
|
||||
case Annotated_StrokeLine:
|
||||
AnnoStrokeLineSeg line = Annotated_StrokeLine_read(ref);
|
||||
x0 = int(floor((min(line.p0.x, line.p1.x) - line.stroke.x) * SX));
|
||||
y0 = int(floor((min(line.p0.y, line.p1.y) - line.stroke.y) * SY));
|
||||
x1 = int(ceil((max(line.p0.x, line.p1.x) + line.stroke.x) * SX));
|
||||
y1 = int(ceil((max(line.p0.y, line.p1.y) + line.stroke.y) * SY));
|
||||
crosses_edge = tag == Annotated_FillLine && ceil(line.p0.y * TSY) != ceil(line.p1.y * TSY);
|
||||
break;
|
||||
case Annotated_Fill:
|
||||
case Annotated_Stroke:
|
||||
// Note: we take advantage of the fact that fills and strokes
|
||||
// have compatible layout.
|
||||
AnnoFill fill = Annotated_Fill_read(ref);
|
||||
x0 = int(floor(fill.bbox.x * SX));
|
||||
y0 = int(floor(fill.bbox.y * SY));
|
||||
x1 = int(ceil(fill.bbox.z * SX));
|
||||
y1 = int(ceil(fill.bbox.w * SY));
|
||||
// It probably makes more sense to track x1, to avoid having to redo
|
||||
// the rounding to tile coords.
|
||||
my_right_edge = fill.bbox.z;
|
||||
break;
|
||||
}
|
||||
|
||||
// If the last element in this partition is a fill edge, then we need to do a
|
||||
// look-forward to find the right edge of its corresponding fill. That data is
|
||||
// recorded in aggregates computed in the element processing pass.
|
||||
if (gl_LocalInvocationID.x == N_TILE - 1 && tag == Annotated_FillLine) {
|
||||
uint aggregate_ix = (my_partition + 1) * ELEMENT_BINNING_RATIO;
|
||||
// This is sequential but the expectation is that the amount of
|
||||
// look-forward is small (performance may degrade in the case
|
||||
// of massively complex paths).
|
||||
do {
|
||||
my_right_edge = uintBitsToFloat(state[state_right_edge_index(aggregate_ix)]);
|
||||
aggregate_ix++;
|
||||
} while (isinf(my_right_edge));
|
||||
}
|
||||
|
||||
// Now propagate right_edge backward, from fill to segment.
|
||||
for (uint i = 0; i < LG_N_TILE; i++) {
|
||||
// Note: we could try to cut down on write bandwidth here if the value hasn't
|
||||
// changed, but not sure it's worth the complexity to track.
|
||||
sh_right_edge[gl_LocalInvocationID.x] = my_right_edge;
|
||||
barrier();
|
||||
if (gl_LocalInvocationID.x + (1 << i) < N_TILE && isinf(my_right_edge)) {
|
||||
my_right_edge = sh_right_edge[gl_LocalInvocationID.x + (1 << i)];
|
||||
}
|
||||
barrier();
|
||||
}
|
||||
if (crosses_edge) {
|
||||
x1 = int(ceil(my_right_edge * SX));
|
||||
}
|
||||
|
||||
// At this point, we run an iterator over the coverage area,
|
||||
// trying to keep divergence low.
|
||||
// Right now, it's just a bbox, but we'll get finer with
|
||||
// segments.
|
||||
x0 = clamp(x0, 0, N_TILE_X);
|
||||
x1 = clamp(x1, x0, N_TILE_X);
|
||||
y0 = clamp(y0, 0, N_TILE_Y);
|
||||
y1 = clamp(y1, y0, N_TILE_Y);
|
||||
if (x0 == x1) y1 = y0;
|
||||
int x = x0, y = y0;
|
||||
uint my_slice = gl_LocalInvocationID.x / 32;
|
||||
uint my_mask = 1 << (gl_LocalInvocationID.x & 31);
|
||||
while (y < y1) {
|
||||
atomicOr(bitmaps[my_slice][y * N_TILE_X + x], my_mask);
|
||||
x++;
|
||||
if (x == x1) {
|
||||
x = x0;
|
||||
y++;
|
||||
}
|
||||
}
|
||||
|
||||
barrier();
|
||||
// Allocate output segments.
|
||||
uint element_count = 0;
|
||||
for (uint i = 0; i < N_SLICE; i++) {
|
||||
element_count += bitCount(bitmaps[i][gl_LocalInvocationID.x]);
|
||||
count[i][gl_LocalInvocationID.x] = element_count;
|
||||
}
|
||||
// element_count is number of elements covering bin for this invocation.
|
||||
uint chunk_start = 0;
|
||||
if (element_count != 0) {
|
||||
// TODO: aggregate atomic adds (subgroup is probably fastest)
|
||||
chunk_start = atomicAdd(alloc, element_count * BinInstance_size);
|
||||
sh_chunk_start[gl_LocalInvocationID.x] = chunk_start;
|
||||
}
|
||||
// Note: it might be more efficient for reading to do this in the
|
||||
// other order (each bin is a contiguous sequence of partitions)
|
||||
uint out_ix = (my_partition * N_TILE + gl_LocalInvocationID.x) * 2;
|
||||
bins[out_ix] = element_count;
|
||||
bins[out_ix + 1] = chunk_start;
|
||||
|
||||
barrier();
|
||||
// Use similar strategy as Laine & Karras paper; loop over bbox of bins
|
||||
// touched by this element
|
||||
x = x0;
|
||||
y = y0;
|
||||
while (y < y1) {
|
||||
uint bin_ix = y * N_TILE_X + x;
|
||||
uint out_mask = bitmaps[my_slice][bin_ix];
|
||||
if ((out_mask & my_mask) != 0) {
|
||||
uint idx = bitCount(out_mask & (my_mask - 1));
|
||||
if (my_slice > 0) {
|
||||
idx += count[my_slice - 1][bin_ix];
|
||||
}
|
||||
uint out_offset = sh_chunk_start[bin_ix] + idx * BinInstance_size;
|
||||
BinInstance_write(BinInstanceRef(out_offset), BinInstance(element_ix, my_right_edge));
|
||||
}
|
||||
x++;
|
||||
if (x == x1) {
|
||||
x = x0;
|
||||
y++;
|
||||
}
|
||||
}
|
||||
}
|
BIN
piet-gpu/shader/binning.spv
Normal file
BIN
piet-gpu/shader/binning.spv
Normal file
Binary file not shown.
64
piet-gpu/shader/bins.h
Normal file
64
piet-gpu/shader/bins.h
Normal file
|
@ -0,0 +1,64 @@
|
|||
// Code auto-generated by piet-gpu-derive
|
||||
|
||||
struct BinInstanceRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct BinChunkRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct BinInstance {
|
||||
uint element_ix;
|
||||
float right_edge;
|
||||
};
|
||||
|
||||
#define BinInstance_size 8
|
||||
|
||||
BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) {
|
||||
return BinInstanceRef(ref.offset + index * BinInstance_size);
|
||||
}
|
||||
|
||||
struct BinChunk {
|
||||
uint n;
|
||||
BinChunkRef next;
|
||||
};
|
||||
|
||||
#define BinChunk_size 8
|
||||
|
||||
BinChunkRef BinChunk_index(BinChunkRef ref, uint index) {
|
||||
return BinChunkRef(ref.offset + index * BinChunk_size);
|
||||
}
|
||||
|
||||
BinInstance BinInstance_read(BinInstanceRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = bins[ix + 0];
|
||||
uint raw1 = bins[ix + 1];
|
||||
BinInstance s;
|
||||
s.element_ix = raw0;
|
||||
s.right_edge = uintBitsToFloat(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void BinInstance_write(BinInstanceRef ref, BinInstance s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
bins[ix + 0] = s.element_ix;
|
||||
bins[ix + 1] = floatBitsToUint(s.right_edge);
|
||||
}
|
||||
|
||||
BinChunk BinChunk_read(BinChunkRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = bins[ix + 0];
|
||||
uint raw1 = bins[ix + 1];
|
||||
BinChunk s;
|
||||
s.n = raw0;
|
||||
s.next = BinChunkRef(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void BinChunk_write(BinChunkRef ref, BinChunk s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
bins[ix + 0] = s.n;
|
||||
bins[ix + 1] = s.next.offset;
|
||||
}
|
||||
|
|
@ -9,12 +9,11 @@ rule glsl
|
|||
|
||||
build image.spv: glsl image.comp | scene.h
|
||||
|
||||
build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h setup.h
|
||||
|
||||
build kernel2s.spv: glsl kernel2s.comp | scene.h tilegroup.h segment.h setup.h
|
||||
build elements.spv: glsl elements.comp | scene.h state.h annotated.h
|
||||
|
||||
build kernel2f.spv: glsl kernel2f.comp | scene.h tilegroup.h fill_seg.h setup.h
|
||||
build binning.spv: glsl binning.comp | annotated.h state.h bins.h setup.h
|
||||
|
||||
build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h segment.h fill_seg.h ptcl.h setup.h
|
||||
build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
|
||||
|
||||
build kernel4.spv: glsl kernel4.comp | ptcl.h segment.h fill_seg.h setup.h
|
||||
build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h
|
||||
|
|
526
piet-gpu/shader/coarse.comp
Normal file
526
piet-gpu/shader/coarse.comp
Normal file
|
@ -0,0 +1,526 @@
|
|||
// The coarse rasterizer stage of the pipeline.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#include "setup.h"
|
||||
|
||||
layout(local_size_x = N_TILE, local_size_y = 1) in;
|
||||
|
||||
layout(set = 0, binding = 0) buffer AnnotatedBuf {
|
||||
uint[] annotated;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 1) buffer BinsBuf {
|
||||
uint[] bins;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 2) buffer AllocBuf {
|
||||
uint n_elements;
|
||||
uint alloc;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 3) buffer PtclBuf {
|
||||
uint[] ptcl;
|
||||
};
|
||||
|
||||
#include "annotated.h"
|
||||
#include "bins.h"
|
||||
#include "ptcl.h"
|
||||
|
||||
#define LG_N_PART_READ 8
|
||||
#define N_PART_READ (1 << LG_N_PART_READ)
|
||||
|
||||
shared uint sh_elements[N_TILE];
|
||||
shared float sh_right_edge[N_TILE];
|
||||
|
||||
// Number of elements in the partition; prefix sum.
|
||||
shared uint sh_part_count[N_PART_READ];
|
||||
shared uint sh_part_elements[N_PART_READ];
|
||||
|
||||
shared uint sh_bitmaps[N_SLICE][N_TILE];
|
||||
shared uint sh_backdrop[N_SLICE][N_TILE];
|
||||
shared uint sh_bd_sign[N_SLICE];
|
||||
shared uint sh_is_segment[N_SLICE];
|
||||
|
||||
// Shared state for parallel segment output stage
|
||||
|
||||
// Count of total number of segments in each tile, then
|
||||
// inclusive prefix sum of same.
|
||||
shared uint sh_seg_count[N_TILE];
|
||||
shared uint sh_seg_alloc;
|
||||
|
||||
// scale factors useful for converting coordinates to tiles
|
||||
#define SX (1.0 / float(TILE_WIDTH_PX))
|
||||
#define SY (1.0 / float(TILE_HEIGHT_PX))
|
||||
|
||||
// Perhaps cmd_limit should be a global? This is a style question.
|
||||
void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) {
|
||||
if (cmd_ref.offset > cmd_limit) {
|
||||
uint new_cmd = atomicAdd(alloc, PTCL_INITIAL_ALLOC);
|
||||
CmdJump jump = CmdJump(new_cmd);
|
||||
Cmd_Jump_write(cmd_ref, jump);
|
||||
cmd_ref = CmdRef(new_cmd);
|
||||
cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
||||
}
|
||||
}
|
||||
|
||||
#define CHUNK_ALLOC_SLAB 16
|
||||
|
||||
uint alloc_chunk_remaining;
|
||||
uint alloc_chunk_offset;
|
||||
|
||||
SegChunkRef alloc_seg_chunk() {
|
||||
if (alloc_chunk_remaining == 0) {
|
||||
alloc_chunk_offset = atomicAdd(alloc, CHUNK_ALLOC_SLAB * SegChunk_size);
|
||||
alloc_chunk_remaining = CHUNK_ALLOC_SLAB;
|
||||
}
|
||||
uint offset = alloc_chunk_offset;
|
||||
alloc_chunk_offset += SegChunk_size;
|
||||
alloc_chunk_remaining--;
|
||||
return SegChunkRef(offset);
|
||||
}
|
||||
|
||||
// Accumulate delta to backdrop.
|
||||
//
|
||||
// Each bit for which bd_bitmap is 1 and bd_sign is 1 counts as +1, and each
|
||||
// bit for which bd_bitmap is 1 and bd_sign is 0 counts as -1.
|
||||
int count_backdrop(uint bd_bitmap, uint bd_sign) {
|
||||
return bitCount(bd_bitmap & bd_sign) - bitCount(bd_bitmap & ~bd_sign);
|
||||
}
|
||||
|
||||
void main() {
|
||||
// Could use either linear or 2d layouts for both dispatch and
|
||||
// invocations within the workgroup. We'll use variables to abstract.
|
||||
uint bin_ix = N_TILE_X * gl_WorkGroupID.y + gl_WorkGroupID.x;
|
||||
uint partition_ix = 0;
|
||||
uint n_partitions = (n_elements + N_TILE - 1) / N_TILE;
|
||||
// Top left coordinates of this bin.
|
||||
vec2 xy0 = vec2(N_TILE_X * TILE_WIDTH_PX * gl_WorkGroupID.x, N_TILE_Y * TILE_HEIGHT_PX * gl_WorkGroupID.y);
|
||||
uint th_ix = gl_LocalInvocationID.x;
|
||||
|
||||
uint tile_x = N_TILE_X * gl_WorkGroupID.x + gl_LocalInvocationID.x % N_TILE_X;
|
||||
uint tile_y = N_TILE_Y * gl_WorkGroupID.y + gl_LocalInvocationID.x / N_TILE_X;
|
||||
uint this_tile_ix = tile_y * WIDTH_IN_TILES + tile_x;
|
||||
CmdRef cmd_ref = CmdRef(this_tile_ix * PTCL_INITIAL_ALLOC);
|
||||
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
||||
|
||||
// Allocation and management of segment output
|
||||
SegChunkRef first_seg_chunk = SegChunkRef(0);
|
||||
SegChunkRef last_chunk_ref = SegChunkRef(0);
|
||||
uint last_chunk_n = 0;
|
||||
SegmentRef last_chunk_segs = SegmentRef(0);
|
||||
alloc_chunk_remaining = 0;
|
||||
|
||||
// I'm sure we can figure out how to do this with at least one fewer register...
|
||||
// Items up to rd_ix have been read from sh_elements
|
||||
uint rd_ix = 0;
|
||||
// Items up to wr_ix have been written into sh_elements
|
||||
uint wr_ix = 0;
|
||||
// Items between part_start_ix and ready_ix are ready to be transferred from sh_part_elements
|
||||
uint part_start_ix = 0;
|
||||
uint ready_ix = 0;
|
||||
if (th_ix < N_SLICE) {
|
||||
sh_bd_sign[th_ix] = 0;
|
||||
}
|
||||
int backdrop = 0;
|
||||
while (true) {
|
||||
for (uint i = 0; i < N_SLICE; i++) {
|
||||
sh_bitmaps[i][th_ix] = 0;
|
||||
sh_backdrop[i][th_ix] = 0;
|
||||
}
|
||||
if (th_ix < N_SLICE) {
|
||||
sh_is_segment[th_ix] = 0;
|
||||
}
|
||||
|
||||
// parallel read of input partitions
|
||||
do {
|
||||
if (ready_ix == wr_ix && partition_ix < n_partitions) {
|
||||
part_start_ix = ready_ix;
|
||||
uint count = 0;
|
||||
if (th_ix < N_PART_READ && partition_ix + th_ix < n_partitions) {
|
||||
uint in_ix = ((partition_ix + th_ix) * N_TILE + bin_ix) * 2;
|
||||
count = bins[in_ix];
|
||||
sh_part_elements[th_ix] = bins[in_ix + 1];
|
||||
}
|
||||
// prefix sum of counts
|
||||
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
||||
if (th_ix < N_PART_READ) {
|
||||
sh_part_count[th_ix] = count;
|
||||
}
|
||||
barrier();
|
||||
if (th_ix < N_PART_READ) {
|
||||
if (th_ix >= (1 << i)) {
|
||||
count += sh_part_count[th_ix - (1 << i)];
|
||||
}
|
||||
}
|
||||
barrier();
|
||||
}
|
||||
if (th_ix < N_PART_READ) {
|
||||
sh_part_count[th_ix] = part_start_ix + count;
|
||||
}
|
||||
barrier();
|
||||
ready_ix = sh_part_count[N_PART_READ - 1];
|
||||
partition_ix += N_PART_READ;
|
||||
}
|
||||
// use binary search to find element to read
|
||||
uint ix = rd_ix + th_ix;
|
||||
if (ix >= wr_ix && ix < ready_ix) {
|
||||
uint part_ix = 0;
|
||||
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
||||
uint probe = part_ix + ((N_PART_READ / 2) >> i);
|
||||
if (ix >= sh_part_count[probe - 1]) {
|
||||
part_ix = probe;
|
||||
}
|
||||
}
|
||||
ix -= part_ix > 0 ? sh_part_count[part_ix - 1] : part_start_ix;
|
||||
BinInstanceRef inst_ref = BinInstanceRef(sh_part_elements[part_ix]);
|
||||
BinInstance inst = BinInstance_read(BinInstance_index(inst_ref, ix));
|
||||
sh_elements[th_ix] = inst.element_ix;
|
||||
sh_right_edge[th_ix] = inst.right_edge;
|
||||
}
|
||||
barrier();
|
||||
|
||||
wr_ix = min(rd_ix + N_TILE, ready_ix);
|
||||
} while (wr_ix - rd_ix < N_TILE && (wr_ix < ready_ix || partition_ix < n_partitions));
|
||||
|
||||
// We've done the merge and filled the buffer.
|
||||
|
||||
// Read one element, compute coverage.
|
||||
uint tag = Annotated_Nop;
|
||||
AnnotatedRef ref;
|
||||
float right_edge = 0.0;
|
||||
if (th_ix + rd_ix < wr_ix) {
|
||||
uint element_ix = sh_elements[th_ix];
|
||||
right_edge = sh_right_edge[th_ix];
|
||||
ref = AnnotatedRef(element_ix * Annotated_size);
|
||||
tag = Annotated_tag(ref);
|
||||
}
|
||||
|
||||
// Setup for coverage algorithm.
|
||||
float a, b, c;
|
||||
// Bounding box of element in pixel coordinates.
|
||||
float xmin, xmax, ymin, ymax;
|
||||
uint my_slice = th_ix / 32;
|
||||
uint my_mask = 1 << (th_ix & 31);
|
||||
switch (tag) {
|
||||
case Annotated_FillLine:
|
||||
case Annotated_StrokeLine:
|
||||
AnnoStrokeLineSeg line = Annotated_StrokeLine_read(ref);
|
||||
xmin = min(line.p0.x, line.p1.x) - line.stroke.x;
|
||||
xmax = max(line.p0.x, line.p1.x) + line.stroke.x;
|
||||
ymin = min(line.p0.y, line.p1.y) - line.stroke.y;
|
||||
ymax = max(line.p0.y, line.p1.y) + line.stroke.y;
|
||||
float dx = line.p1.x - line.p0.x;
|
||||
float dy = line.p1.y - line.p0.y;
|
||||
if (tag == Annotated_FillLine) {
|
||||
// Set bit for backdrop sign calculation, 1 is +1, 0 is -1.
|
||||
if (dy < 0) {
|
||||
atomicOr(sh_bd_sign[my_slice], my_mask);
|
||||
} else {
|
||||
atomicAnd(sh_bd_sign[my_slice], ~my_mask);
|
||||
}
|
||||
}
|
||||
atomicOr(sh_is_segment[my_slice], my_mask);
|
||||
// Set up for per-scanline coverage formula, below.
|
||||
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
|
||||
c = (line.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + line.stroke.y)) * SX;
|
||||
b = invslope; // Note: assumes square tiles, otherwise scale.
|
||||
a = (line.p0.x - xy0.x - (line.p0.y - 0.5 * float(TILE_HEIGHT_PX) - xy0.y) * b) * SX;
|
||||
break;
|
||||
case Annotated_Fill:
|
||||
case Annotated_Stroke:
|
||||
// Note: we take advantage of the fact that fills and strokes
|
||||
// have compatible layout.
|
||||
AnnoFill fill = Annotated_Fill_read(ref);
|
||||
xmin = fill.bbox.x;
|
||||
xmax = fill.bbox.z;
|
||||
ymin = fill.bbox.y;
|
||||
ymax = fill.bbox.w;
|
||||
// Just let the clamping to xmin and xmax determine the bounds.
|
||||
a = 0.0;
|
||||
b = 0.0;
|
||||
c = 1e9;
|
||||
break;
|
||||
default:
|
||||
ymin = 0;
|
||||
ymax = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
// Draw the coverage area into the bitmasks. This uses an algorithm
|
||||
// that computes the coverage of a span for given scanline.
|
||||
|
||||
// Compute bounding box in tiles and clip to this bin.
|
||||
int x0 = int(floor((xmin - xy0.x) * SX));
|
||||
int x1 = int(ceil((xmax - xy0.x) * SX));
|
||||
int xr = int(ceil((right_edge - xy0.x) * SX));
|
||||
int y0 = int(floor((ymin - xy0.y) * SY));
|
||||
int y1 = int(ceil((ymax - xy0.y) * SY));
|
||||
x0 = clamp(x0, 0, N_TILE_X);
|
||||
x1 = clamp(x1, x0, N_TILE_X);
|
||||
xr = clamp(xr, 0, N_TILE_X);
|
||||
y0 = clamp(y0, 0, N_TILE_Y);
|
||||
y1 = clamp(y1, y0, N_TILE_Y);
|
||||
float t = a + b * float(y0);
|
||||
for (uint y = y0; y < y1; y++) {
|
||||
uint xx0 = clamp(int(floor(t - c)), x0, x1);
|
||||
uint xx1 = clamp(int(ceil(t + c)), x0, x1);
|
||||
for (uint x = xx0; x < xx1; x++) {
|
||||
atomicOr(sh_bitmaps[my_slice][y * N_TILE_X + x], my_mask);
|
||||
}
|
||||
if (tag == Annotated_FillLine && ymin <= xy0.y + float(y * TILE_HEIGHT_PX)) {
|
||||
// Assign backdrop to all tiles to the right of the ray crossing the
|
||||
// top edge of this tile, up to the right edge of the fill bbox.
|
||||
float xray = t - 0.5 * b;
|
||||
xx0 = max(int(ceil(xray)), 0);
|
||||
for (uint x = xx0; x < xr; x++) {
|
||||
atomicOr(sh_backdrop[my_slice][y * N_TILE_X + x], my_mask);
|
||||
}
|
||||
}
|
||||
t += b;
|
||||
}
|
||||
barrier();
|
||||
|
||||
// We've computed coverage and other info for each element in the input, now for
|
||||
// the output stage. We'll do segments first using a more parallel algorithm.
|
||||
|
||||
uint seg_count = 0;
|
||||
for (uint i = 0; i < N_SLICE; i++) {
|
||||
seg_count += bitCount(sh_bitmaps[i][th_ix] & sh_is_segment[i]);
|
||||
}
|
||||
sh_seg_count[th_ix] = seg_count;
|
||||
// Prefix sum of sh_seg_count
|
||||
for (uint i = 0; i < LG_N_TILE; i++) {
|
||||
barrier();
|
||||
if (th_ix >= (1 << i)) {
|
||||
seg_count += sh_seg_count[th_ix - (1 << i)];
|
||||
}
|
||||
barrier();
|
||||
sh_seg_count[th_ix] = seg_count;
|
||||
}
|
||||
if (th_ix == N_TILE - 1) {
|
||||
sh_seg_alloc = atomicAdd(alloc, seg_count * Segment_size);
|
||||
}
|
||||
barrier();
|
||||
uint total_seg_count = sh_seg_count[N_TILE - 1];
|
||||
uint seg_alloc = sh_seg_alloc;
|
||||
|
||||
// Output buffer is allocated as segments for each tile laid end-to-end.
|
||||
|
||||
for (uint ix = th_ix; ix < total_seg_count; ix += N_TILE) {
|
||||
// Find the work item; this thread is now not bound to an element or tile.
|
||||
// First find the tile (by binary search)
|
||||
uint tile_ix = 0;
|
||||
for (uint i = 0; i < LG_N_TILE; i++) {
|
||||
uint probe = tile_ix + ((N_TILE / 2) >> i);
|
||||
if (ix >= sh_seg_count[probe - 1]) {
|
||||
tile_ix = probe;
|
||||
}
|
||||
}
|
||||
// Now, sh_seg_count[tile_ix - 1] <= ix < sh_seg_count[tile_ix].
|
||||
// (considering sh_seg_count[-1] == 0)
|
||||
|
||||
// Index of segment within tile's segments
|
||||
uint seq_ix = ix;
|
||||
// Maybe consider a sentinel value to avoid the conditional?
|
||||
if (tile_ix > 0) {
|
||||
seq_ix -= sh_seg_count[tile_ix - 1];
|
||||
}
|
||||
// Find the segment. This is done by linear scan through the bitmaps of the
|
||||
// tile, accelerated by bit counting. Binary search might help, maybe not.
|
||||
uint slice_ix = 0;
|
||||
uint seq_bits;
|
||||
|
||||
while (true) {
|
||||
seq_bits = sh_bitmaps[slice_ix][tile_ix] & sh_is_segment[slice_ix];
|
||||
uint this_count = bitCount(seq_bits);
|
||||
if (this_count > seq_ix) {
|
||||
break;
|
||||
}
|
||||
seq_ix -= this_count;
|
||||
slice_ix++;
|
||||
}
|
||||
// Now find position of nth bit set (n = seq_ix) in seq_bits; binary search
|
||||
uint bit_ix = 0;
|
||||
for (int i = 0; i < 5; i++) {
|
||||
uint probe = bit_ix + (16 >> i);
|
||||
if (seq_ix >= bitCount(seq_bits & ((1 << probe) - 1))) {
|
||||
bit_ix = probe;
|
||||
}
|
||||
}
|
||||
uint out_offset = seg_alloc + Segment_size * ix + SegChunk_size;
|
||||
uint rd_el_ix = slice_ix * 32 + bit_ix;
|
||||
uint element_ix = sh_elements[rd_el_ix];
|
||||
ref = AnnotatedRef(element_ix * Annotated_size);
|
||||
AnnoFillLineSeg line = Annotated_FillLine_read(ref);
|
||||
float y_edge = 0.0;
|
||||
// This is basically the same logic as piet-metal, but should be made numerically robust.
|
||||
if (Annotated_tag(ref) == Annotated_FillLine) {
|
||||
vec2 tile_xy = xy0 + vec2((tile_ix % N_TILE_X) * TILE_WIDTH_PX, (tile_ix / N_TILE_X) * TILE_HEIGHT_PX);
|
||||
y_edge = mix(line.p0.y, line.p1.y, (tile_xy.x - line.p0.x) / (line.p1.x - line.p0.x));
|
||||
if (min(line.p0.x, line.p1.x) < tile_xy.x && y_edge >= tile_xy.y && y_edge < tile_xy.y + TILE_HEIGHT_PX) {
|
||||
if (line.p0.x > line.p1.x) {
|
||||
line.p1 = vec2(tile_xy.x, y_edge);
|
||||
} else {
|
||||
line.p0 = vec2(tile_xy.x, y_edge);
|
||||
}
|
||||
} else {
|
||||
y_edge = 1e9;
|
||||
}
|
||||
}
|
||||
Segment seg = Segment(line.p0, line.p1, y_edge);
|
||||
Segment_write(SegmentRef(seg_alloc + Segment_size * ix), seg);
|
||||
}
|
||||
|
||||
// Output non-segment elements for this tile. The thread does a sequential walk
|
||||
// through the non-segment elements, and for segments, count and backdrop are
|
||||
// aggregated using bit counting.
|
||||
uint slice_ix = 0;
|
||||
uint bitmap = sh_bitmaps[0][th_ix];
|
||||
uint bd_bitmap = sh_backdrop[0][th_ix];
|
||||
uint bd_sign = sh_bd_sign[0];
|
||||
uint is_segment = sh_is_segment[0];
|
||||
uint seg_start = th_ix == 0 ? 0 : sh_seg_count[th_ix - 1];
|
||||
seg_count = 0;
|
||||
while (true) {
|
||||
uint nonseg_bitmap = bitmap & ~is_segment;
|
||||
if (nonseg_bitmap == 0) {
|
||||
backdrop += count_backdrop(bd_bitmap, bd_sign);
|
||||
seg_count += bitCount(bitmap & is_segment);
|
||||
slice_ix++;
|
||||
if (slice_ix == N_SLICE) {
|
||||
break;
|
||||
}
|
||||
bitmap = sh_bitmaps[slice_ix][th_ix];
|
||||
bd_bitmap = sh_backdrop[slice_ix][th_ix];
|
||||
bd_sign = sh_bd_sign[slice_ix];
|
||||
is_segment = sh_is_segment[slice_ix];
|
||||
nonseg_bitmap = bitmap & ~is_segment;
|
||||
if (nonseg_bitmap == 0) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
uint element_ref_ix = slice_ix * 32 + findLSB(nonseg_bitmap);
|
||||
uint element_ix = sh_elements[element_ref_ix];
|
||||
|
||||
// Bits up to and including the lsb
|
||||
uint bd_mask = (nonseg_bitmap - 1) ^ nonseg_bitmap;
|
||||
backdrop += count_backdrop(bd_bitmap & bd_mask, bd_sign);
|
||||
seg_count += bitCount(bitmap & bd_mask & is_segment);
|
||||
// Clear bits that have been consumed.
|
||||
bd_bitmap &= ~bd_mask;
|
||||
bitmap &= ~bd_mask;
|
||||
|
||||
// At this point, we read the element again from global memory.
|
||||
// If that turns out to be expensive, maybe we can pack it into
|
||||
// shared memory (or perhaps just the tag).
|
||||
ref = AnnotatedRef(element_ix * Annotated_size);
|
||||
tag = Annotated_tag(ref);
|
||||
|
||||
switch (tag) {
|
||||
case Annotated_Fill:
|
||||
if (last_chunk_n > 0 || seg_count > 0) {
|
||||
SegChunkRef chunk_ref = SegChunkRef(0);
|
||||
if (seg_count > 0) {
|
||||
chunk_ref = alloc_seg_chunk();
|
||||
SegChunk chunk;
|
||||
chunk.n = seg_count;
|
||||
chunk.next = SegChunkRef(0);
|
||||
uint seg_offset = seg_alloc + seg_start * Segment_size;
|
||||
chunk.segs = SegmentRef(seg_offset);
|
||||
SegChunk_write(chunk_ref, chunk);
|
||||
}
|
||||
if (last_chunk_n > 0) {
|
||||
SegChunk chunk;
|
||||
chunk.n = last_chunk_n;
|
||||
chunk.next = chunk_ref;
|
||||
chunk.segs = last_chunk_segs;
|
||||
SegChunk_write(last_chunk_ref, chunk);
|
||||
} else {
|
||||
first_seg_chunk = chunk_ref;
|
||||
}
|
||||
|
||||
AnnoFill fill = Annotated_Fill_read(ref);
|
||||
CmdFill cmd_fill;
|
||||
cmd_fill.seg_ref = first_seg_chunk;
|
||||
cmd_fill.backdrop = backdrop;
|
||||
cmd_fill.rgba_color = fill.rgba_color;
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Fill_write(cmd_ref, cmd_fill);
|
||||
cmd_ref.offset += Cmd_size;
|
||||
last_chunk_n = 0;
|
||||
} else if (backdrop != 0) {
|
||||
AnnoFill fill = Annotated_Fill_read(ref);
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
|
||||
cmd_ref.offset += Cmd_size;
|
||||
}
|
||||
seg_start += seg_count;
|
||||
seg_count = 0;
|
||||
backdrop = 0;
|
||||
break;
|
||||
case Annotated_Stroke:
|
||||
// TODO: reduce divergence & code duplication? Much of the
|
||||
// fill and stroke processing is in common.
|
||||
if (last_chunk_n > 0 || seg_count > 0) {
|
||||
SegChunkRef chunk_ref = SegChunkRef(0);
|
||||
if (seg_count > 0) {
|
||||
chunk_ref = alloc_seg_chunk();
|
||||
SegChunk chunk;
|
||||
chunk.n = seg_count;
|
||||
chunk.next = SegChunkRef(0);
|
||||
uint seg_offset = seg_alloc + seg_start * Segment_size;
|
||||
chunk.segs = SegmentRef(seg_offset);
|
||||
SegChunk_write(chunk_ref, chunk);
|
||||
}
|
||||
if (last_chunk_n > 0) {
|
||||
SegChunk chunk;
|
||||
chunk.n = last_chunk_n;
|
||||
chunk.next = chunk_ref;
|
||||
chunk.segs = last_chunk_segs;
|
||||
SegChunk_write(last_chunk_ref, chunk);
|
||||
} else {
|
||||
first_seg_chunk = chunk_ref;
|
||||
}
|
||||
|
||||
AnnoStroke stroke = Annotated_Stroke_read(ref);
|
||||
CmdStroke cmd_stroke;
|
||||
cmd_stroke.seg_ref = first_seg_chunk;
|
||||
cmd_stroke.half_width = 0.5 * stroke.linewidth;
|
||||
cmd_stroke.rgba_color = stroke.rgba_color;
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Stroke_write(cmd_ref, cmd_stroke);
|
||||
cmd_ref.offset += Cmd_size;
|
||||
last_chunk_n = 0;
|
||||
}
|
||||
seg_start += seg_count;
|
||||
seg_count = 0;
|
||||
break;
|
||||
default:
|
||||
// This shouldn't happen, but just in case.
|
||||
seg_start++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (seg_count > 0) {
|
||||
SegChunkRef chunk_ref = alloc_seg_chunk();
|
||||
if (last_chunk_n > 0) {
|
||||
SegChunk_write(last_chunk_ref, SegChunk(last_chunk_n, chunk_ref, last_chunk_segs));
|
||||
} else {
|
||||
first_seg_chunk = chunk_ref;
|
||||
}
|
||||
// TODO: free two registers by writing count and segments ref now,
|
||||
// as opposed to deferring SegChunk write until all fields are known.
|
||||
last_chunk_ref = chunk_ref;
|
||||
last_chunk_n = seg_count;
|
||||
uint seg_offset = seg_alloc + seg_start * Segment_size;
|
||||
last_chunk_segs = SegmentRef(seg_offset);
|
||||
}
|
||||
barrier();
|
||||
|
||||
rd_ix += N_TILE;
|
||||
if (rd_ix >= ready_ix && partition_ix >= n_partitions) break;
|
||||
}
|
||||
Cmd_End_write(cmd_ref);
|
||||
}
|
BIN
piet-gpu/shader/coarse.spv
Normal file
BIN
piet-gpu/shader/coarse.spv
Normal file
Binary file not shown.
328
piet-gpu/shader/elements.comp
Normal file
328
piet-gpu/shader/elements.comp
Normal file
|
@ -0,0 +1,328 @@
|
|||
// The element processing stage, first in the pipeline.
|
||||
//
|
||||
// This stage is primarily about applying transforms and computing bounding
|
||||
// boxes. It is organized as a scan over the input elements, producing
|
||||
// annotated output elements.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#define N_ROWS 4
|
||||
#define WG_SIZE 32
|
||||
#define LG_WG_SIZE 5
|
||||
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
||||
|
||||
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
|
||||
|
||||
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
||||
uint[] scene;
|
||||
};
|
||||
|
||||
// It would be better to use the Vulkan memory model than
|
||||
// "volatile" but shooting for compatibility here rather
|
||||
// than doing things right.
|
||||
layout(set = 0, binding = 1) volatile buffer StateBuf {
|
||||
uint[] state;
|
||||
};
|
||||
|
||||
// The annotated results are stored here.
|
||||
layout(set = 0, binding = 2) buffer AnnotatedBuf {
|
||||
uint[] annotated;
|
||||
};
|
||||
|
||||
#include "scene.h"
|
||||
#include "state.h"
|
||||
#include "annotated.h"
|
||||
|
||||
#define StateBuf_stride (8 + 2 * State_size)
|
||||
|
||||
StateRef state_aggregate_ref(uint partition_ix) {
|
||||
return StateRef(12 + partition_ix * StateBuf_stride);
|
||||
}
|
||||
|
||||
StateRef state_prefix_ref(uint partition_ix) {
|
||||
return StateRef(12 + partition_ix * StateBuf_stride + State_size);
|
||||
}
|
||||
|
||||
uint state_flag_index(uint partition_ix) {
|
||||
return 1 + partition_ix * (StateBuf_stride / 4);
|
||||
}
|
||||
|
||||
// These correspond to X, A, P respectively in the prefix sum paper.
|
||||
#define FLAG_NOT_READY 0
|
||||
#define FLAG_AGGREGATE_READY 1
|
||||
#define FLAG_PREFIX_READY 2
|
||||
|
||||
#define FLAG_SET_LINEWIDTH 1
|
||||
#define FLAG_SET_BBOX 2
|
||||
#define FLAG_RESET_BBOX 4
|
||||
|
||||
// This is almost like a monoid (the interaction between transformation and
|
||||
// bounding boxes is approximate)
|
||||
State combine_state(State a, State b) {
|
||||
State c;
|
||||
c.bbox.x = min(a.mat.x * b.bbox.x, a.mat.x * b.bbox.z) + min(a.mat.z * b.bbox.y, a.mat.z * b.bbox.w) + a.translate.x;
|
||||
c.bbox.y = min(a.mat.y * b.bbox.x, a.mat.y * b.bbox.z) + min(a.mat.w * b.bbox.y, a.mat.w * b.bbox.w) + a.translate.y;
|
||||
c.bbox.z = max(a.mat.x * b.bbox.x, a.mat.x * b.bbox.z) + max(a.mat.z * b.bbox.y, a.mat.z * b.bbox.w) + a.translate.x;
|
||||
c.bbox.w = max(a.mat.y * b.bbox.x, a.mat.y * b.bbox.z) + max(a.mat.w * b.bbox.y, a.mat.w * b.bbox.w) + a.translate.y;
|
||||
if ((a.flags & FLAG_RESET_BBOX) == 0 && b.bbox.z <= b.bbox.x && b.bbox.w <= b.bbox.y) {
|
||||
c.bbox = a.bbox;
|
||||
} else if ((a.flags & FLAG_RESET_BBOX) == 0 && (b.flags & FLAG_SET_BBOX) == 0 &&
|
||||
(a.bbox.z > a.bbox.x || a.bbox.w > a.bbox.y))
|
||||
{
|
||||
c.bbox.xy = min(a.bbox.xy, c.bbox.xy);
|
||||
c.bbox.zw = max(a.bbox.zw, c.bbox.zw);
|
||||
}
|
||||
// It would be more concise to cast to matrix types; ah well.
|
||||
c.mat.x = a.mat.x * b.mat.x + a.mat.z * b.mat.y;
|
||||
c.mat.y = a.mat.y * b.mat.x + a.mat.w * b.mat.y;
|
||||
c.mat.z = a.mat.x * b.mat.z + a.mat.z * b.mat.w;
|
||||
c.mat.w = a.mat.y * b.mat.z + a.mat.w * b.mat.w;
|
||||
c.translate.x = a.mat.x * b.translate.x + a.mat.z * b.translate.y + a.translate.x;
|
||||
c.translate.y = a.mat.y * b.translate.x + a.mat.w * b.translate.y + a.translate.y;
|
||||
c.linewidth = (b.flags & FLAG_SET_LINEWIDTH) == 0 ? a.linewidth : b.linewidth;
|
||||
c.flags = (a.flags & (FLAG_SET_LINEWIDTH | FLAG_SET_BBOX)) | b.flags;
|
||||
c.flags |= (a.flags & FLAG_RESET_BBOX) >> 1;
|
||||
return c;
|
||||
}
|
||||
|
||||
State map_element(ElementRef ref, inout bool is_fill) {
|
||||
// TODO: it would *probably* be more efficient to make the memory read patterns less
|
||||
// divergent, though it would be more wasted memory.
|
||||
uint tag = Element_tag(ref);
|
||||
State c;
|
||||
c.bbox = vec4(0.0, 0.0, 0.0, 0.0);
|
||||
c.mat = vec4(1.0, 0.0, 0.0, 1.0);
|
||||
c.translate = vec2(0.0, 0.0);
|
||||
c.linewidth = 1.0; // TODO should be 0.0
|
||||
c.flags = 0;
|
||||
is_fill = false;
|
||||
switch (tag) {
|
||||
case Element_FillLine:
|
||||
case Element_StrokeLine:
|
||||
LineSeg line = Element_FillLine_read(ref);
|
||||
c.bbox.xy = min(line.p0, line.p1);
|
||||
c.bbox.zw = max(line.p0, line.p1);
|
||||
break;
|
||||
case Element_Quad:
|
||||
QuadSeg quad = Element_Quad_read(ref);
|
||||
c.bbox.xy = min(min(quad.p0, quad.p1), quad.p2);
|
||||
c.bbox.zw = max(max(quad.p0, quad.p1), quad.p2);
|
||||
break;
|
||||
case Element_Cubic:
|
||||
CubicSeg cubic = Element_Cubic_read(ref);
|
||||
c.bbox.xy = min(min(cubic.p0, cubic.p1), min(cubic.p2, cubic.p3));
|
||||
c.bbox.zw = max(max(cubic.p0, cubic.p1), max(cubic.p2, cubic.p3));
|
||||
break;
|
||||
case Element_Fill:
|
||||
is_fill = true;
|
||||
// fall-through
|
||||
case Element_Stroke:
|
||||
c.flags = FLAG_RESET_BBOX;
|
||||
break;
|
||||
case Element_SetLineWidth:
|
||||
SetLineWidth lw = Element_SetLineWidth_read(ref);
|
||||
c.linewidth = lw.width;
|
||||
c.flags = FLAG_SET_LINEWIDTH;
|
||||
break;
|
||||
case Element_Transform:
|
||||
Transform t = Element_Transform_read(ref);
|
||||
c.mat = t.mat;
|
||||
c.translate = t.translate;
|
||||
break;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
// Get the bounding box of a circle transformed by the matrix into an ellipse.
|
||||
vec2 get_linewidth(State st) {
|
||||
// See https://www.iquilezles.org/www/articles/ellipses/ellipses.htm
|
||||
return 0.5 * st.linewidth * vec2(length(st.mat.xz), length(st.mat.yw));
|
||||
}
|
||||
|
||||
// We should be able to use an array of structs but the NV shader compiler
|
||||
// doesn't seem to like it :/
|
||||
//shared State sh_state[WG_SIZE];
|
||||
shared vec4 sh_mat[WG_SIZE];
|
||||
shared vec2 sh_translate[WG_SIZE];
|
||||
shared vec4 sh_bbox[WG_SIZE];
|
||||
shared float sh_width[WG_SIZE];
|
||||
shared uint sh_flags[WG_SIZE];
|
||||
|
||||
shared uint sh_min_fill;
|
||||
|
||||
shared uint sh_tile_ix;
|
||||
shared State sh_prefix;
|
||||
|
||||
void main() {
|
||||
State th_state[N_ROWS];
|
||||
// Determine partition to process by atomic counter (described in Section
|
||||
// 4.4 of prefix sum paper).
|
||||
if (gl_LocalInvocationID.x == 0) {
|
||||
sh_tile_ix = atomicAdd(state[0], 1);
|
||||
sh_min_fill = ~0;
|
||||
}
|
||||
barrier();
|
||||
uint tile_ix = sh_tile_ix;
|
||||
|
||||
uint ix = tile_ix * PARTITION_SIZE + gl_LocalInvocationID.x * N_ROWS;
|
||||
ElementRef ref = ElementRef(ix * Element_size);
|
||||
|
||||
bool is_fill;
|
||||
uint my_min_fill = ~0;
|
||||
th_state[0] = map_element(ref, is_fill);
|
||||
if (is_fill) my_min_fill = ix;
|
||||
for (uint i = 1; i < N_ROWS; i++) {
|
||||
// discussion question: would it be faster to load using more coherent patterns
|
||||
// into thread memory? This is kinda strided.
|
||||
th_state[i] = combine_state(th_state[i - 1], map_element(Element_index(ref, i), is_fill));
|
||||
if (is_fill && my_min_fill == ~0) {
|
||||
my_min_fill = ix + i;
|
||||
}
|
||||
}
|
||||
atomicMin(sh_min_fill, my_min_fill);
|
||||
State agg = th_state[N_ROWS - 1];
|
||||
sh_mat[gl_LocalInvocationID.x] = agg.mat;
|
||||
sh_translate[gl_LocalInvocationID.x] = agg.translate;
|
||||
sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
|
||||
sh_width[gl_LocalInvocationID.x] = agg.linewidth;
|
||||
sh_flags[gl_LocalInvocationID.x] = agg.flags;
|
||||
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||
barrier();
|
||||
if (gl_LocalInvocationID.x >= (1 << i)) {
|
||||
State other;
|
||||
uint ix = gl_LocalInvocationID.x - (1 << i);
|
||||
other.mat = sh_mat[ix];
|
||||
other.translate = sh_translate[ix];
|
||||
other.bbox = sh_bbox[ix];
|
||||
other.linewidth = sh_width[ix];
|
||||
other.flags = sh_flags[ix];
|
||||
agg = combine_state(other, agg);
|
||||
}
|
||||
barrier();
|
||||
sh_mat[gl_LocalInvocationID.x] = agg.mat;
|
||||
sh_translate[gl_LocalInvocationID.x] = agg.translate;
|
||||
sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
|
||||
sh_width[gl_LocalInvocationID.x] = agg.linewidth;
|
||||
sh_flags[gl_LocalInvocationID.x] = agg.flags;
|
||||
}
|
||||
|
||||
State exclusive;
|
||||
exclusive.bbox = vec4(0.0, 0.0, 0.0, 0.0);
|
||||
exclusive.mat = vec4(1.0, 0.0, 0.0, 1.0);
|
||||
exclusive.translate = vec2(0.0, 0.0);
|
||||
exclusive.linewidth = 1.0; //TODO should be 0.0
|
||||
exclusive.flags = 0;
|
||||
|
||||
// Publish aggregate for this partition
|
||||
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
|
||||
// Note: with memory model, we'd want to generate the atomic store version of this.
|
||||
State_write(state_aggregate_ref(tile_ix), agg);
|
||||
uint flag = FLAG_AGGREGATE_READY;
|
||||
memoryBarrierBuffer();
|
||||
if (tile_ix == 0) {
|
||||
State_write(state_prefix_ref(tile_ix), agg);
|
||||
flag = FLAG_PREFIX_READY;
|
||||
}
|
||||
state[state_flag_index(tile_ix)] = flag;
|
||||
if (tile_ix != 0) {
|
||||
// step 4 of paper: decoupled lookback
|
||||
uint look_back_ix = tile_ix - 1;
|
||||
while (true) {
|
||||
flag = state[state_flag_index(look_back_ix)];
|
||||
if (flag == FLAG_PREFIX_READY) {
|
||||
State their_prefix = State_read(state_prefix_ref(look_back_ix));
|
||||
exclusive = combine_state(their_prefix, exclusive);
|
||||
break;
|
||||
} else if (flag == FLAG_AGGREGATE_READY) {
|
||||
State their_agg = State_read(state_aggregate_ref(look_back_ix));
|
||||
exclusive = combine_state(their_agg, exclusive);
|
||||
look_back_ix--;
|
||||
}
|
||||
// else spin
|
||||
}
|
||||
|
||||
// step 5 of paper: compute inclusive prefix
|
||||
State inclusive_prefix = combine_state(exclusive, agg);
|
||||
sh_prefix = exclusive;
|
||||
State_write(state_prefix_ref(tile_ix), inclusive_prefix);
|
||||
memoryBarrierBuffer();
|
||||
flag = FLAG_PREFIX_READY;
|
||||
state[state_flag_index(tile_ix)] = flag;
|
||||
}
|
||||
}
|
||||
barrier();
|
||||
my_min_fill = sh_min_fill;
|
||||
if (tile_ix != 0) {
|
||||
exclusive = sh_prefix;
|
||||
}
|
||||
|
||||
State row = exclusive;
|
||||
if (gl_LocalInvocationID.x > 0) {
|
||||
uint ix = gl_LocalInvocationID.x - 1;
|
||||
State other;
|
||||
other.mat = sh_mat[ix];
|
||||
other.translate = sh_translate[ix];
|
||||
other.bbox = sh_bbox[ix];
|
||||
other.linewidth = sh_width[ix];
|
||||
other.flags = sh_flags[ix];
|
||||
row = combine_state(row, other);
|
||||
}
|
||||
if (my_min_fill == ~0 && gl_LocalInvocationID.x == 0) {
|
||||
state[state_flag_index(tile_ix) + 1] = 0x7f800000; // infinity
|
||||
}
|
||||
for (uint i = 0; i < N_ROWS; i++) {
|
||||
State st = combine_state(row, th_state[i]);
|
||||
if (my_min_fill == ix + i) {
|
||||
state[state_flag_index(tile_ix) + 1] = floatBitsToUint(st.bbox.z);
|
||||
}
|
||||
// We write the state now for development purposes, but the
|
||||
// actual goal is to write transformed and annotated elements.
|
||||
//State_write(StateRef((ix + i) * State_size), st);
|
||||
|
||||
// Here we read again from the original scene. There may be
|
||||
// gains to be had from stashing in shared memory or possibly
|
||||
// registers (though register pressure is an issue).
|
||||
ElementRef this_ref = Element_index(ref, i);
|
||||
AnnotatedRef out_ref = AnnotatedRef((ix + i) * Annotated_size);
|
||||
uint tag = Element_tag(this_ref);
|
||||
switch (tag) {
|
||||
case Element_FillLine:
|
||||
case Element_StrokeLine:
|
||||
LineSeg line = Element_StrokeLine_read(this_ref);
|
||||
AnnoStrokeLineSeg anno_line;
|
||||
anno_line.p0 = st.mat.xy * line.p0.x + st.mat.zw * line.p0.y + st.translate;
|
||||
anno_line.p1 = st.mat.xy * line.p1.x + st.mat.zw * line.p1.y + st.translate;
|
||||
if (tag == Element_StrokeLine) {
|
||||
anno_line.stroke = get_linewidth(st);
|
||||
} else {
|
||||
anno_line.stroke = vec2(0.0);
|
||||
}
|
||||
// We do encoding a bit by hand to minimize divergence. Another approach
|
||||
// would be to have a fill/stroke bool.
|
||||
uint out_tag = tag == Element_FillLine ? Annotated_FillLine : Annotated_StrokeLine;
|
||||
annotated[out_ref.offset >> 2] = out_tag;
|
||||
AnnoStrokeLineSeg_write(AnnoStrokeLineSegRef(out_ref.offset + 4), anno_line);
|
||||
break;
|
||||
case Element_Stroke:
|
||||
Stroke stroke = Element_Stroke_read(this_ref);
|
||||
AnnoStroke anno_stroke;
|
||||
anno_stroke.rgba_color = stroke.rgba_color;
|
||||
vec2 lw = get_linewidth(st);
|
||||
anno_stroke.bbox = st.bbox + vec4(-lw, lw);
|
||||
anno_stroke.linewidth = st.linewidth * sqrt(st.mat.x * st.mat.w - st.mat.y * st.mat.z);
|
||||
Annotated_Stroke_write(out_ref, anno_stroke);
|
||||
break;
|
||||
case Element_Fill:
|
||||
Fill fill = Element_Fill_read(this_ref);
|
||||
AnnoFill anno_fill;
|
||||
anno_fill.rgba_color = fill.rgba_color;
|
||||
anno_fill.bbox = st.bbox;
|
||||
Annotated_Fill_write(out_ref, anno_fill);
|
||||
break;
|
||||
default:
|
||||
Annotated_Nop_write(out_ref);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
BIN
piet-gpu/shader/elements.spv
Normal file
BIN
piet-gpu/shader/elements.spv
Normal file
Binary file not shown.
|
@ -1,130 +0,0 @@
|
|||
// Code auto-generated by piet-gpu-derive
|
||||
|
||||
struct FillTileHeaderRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct FillItemHeaderRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct FillSegmentRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct FillSegChunkRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct FillTileHeader {
|
||||
uint n;
|
||||
FillItemHeaderRef items;
|
||||
};
|
||||
|
||||
#define FillTileHeader_size 8
|
||||
|
||||
FillTileHeaderRef FillTileHeader_index(FillTileHeaderRef ref, uint index) {
|
||||
return FillTileHeaderRef(ref.offset + index * FillTileHeader_size);
|
||||
}
|
||||
|
||||
struct FillItemHeader {
|
||||
int backdrop;
|
||||
FillSegChunkRef segments;
|
||||
};
|
||||
|
||||
#define FillItemHeader_size 8
|
||||
|
||||
FillItemHeaderRef FillItemHeader_index(FillItemHeaderRef ref, uint index) {
|
||||
return FillItemHeaderRef(ref.offset + index * FillItemHeader_size);
|
||||
}
|
||||
|
||||
struct FillSegment {
|
||||
vec2 start;
|
||||
vec2 end;
|
||||
};
|
||||
|
||||
#define FillSegment_size 16
|
||||
|
||||
FillSegmentRef FillSegment_index(FillSegmentRef ref, uint index) {
|
||||
return FillSegmentRef(ref.offset + index * FillSegment_size);
|
||||
}
|
||||
|
||||
struct FillSegChunk {
|
||||
uint n;
|
||||
FillSegChunkRef next;
|
||||
};
|
||||
|
||||
#define FillSegChunk_size 8
|
||||
|
||||
FillSegChunkRef FillSegChunk_index(FillSegChunkRef ref, uint index) {
|
||||
return FillSegChunkRef(ref.offset + index * FillSegChunk_size);
|
||||
}
|
||||
|
||||
FillTileHeader FillTileHeader_read(FillTileHeaderRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = fill_seg[ix + 0];
|
||||
uint raw1 = fill_seg[ix + 1];
|
||||
FillTileHeader s;
|
||||
s.n = raw0;
|
||||
s.items = FillItemHeaderRef(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void FillTileHeader_write(FillTileHeaderRef ref, FillTileHeader s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
fill_seg[ix + 0] = s.n;
|
||||
fill_seg[ix + 1] = s.items.offset;
|
||||
}
|
||||
|
||||
FillItemHeader FillItemHeader_read(FillItemHeaderRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = fill_seg[ix + 0];
|
||||
uint raw1 = fill_seg[ix + 1];
|
||||
FillItemHeader s;
|
||||
s.backdrop = int(raw0);
|
||||
s.segments = FillSegChunkRef(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void FillItemHeader_write(FillItemHeaderRef ref, FillItemHeader s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
fill_seg[ix + 0] = uint(s.backdrop);
|
||||
fill_seg[ix + 1] = s.segments.offset;
|
||||
}
|
||||
|
||||
FillSegment FillSegment_read(FillSegmentRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = fill_seg[ix + 0];
|
||||
uint raw1 = fill_seg[ix + 1];
|
||||
uint raw2 = fill_seg[ix + 2];
|
||||
uint raw3 = fill_seg[ix + 3];
|
||||
FillSegment s;
|
||||
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
return s;
|
||||
}
|
||||
|
||||
void FillSegment_write(FillSegmentRef ref, FillSegment s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
fill_seg[ix + 0] = floatBitsToUint(s.start.x);
|
||||
fill_seg[ix + 1] = floatBitsToUint(s.start.y);
|
||||
fill_seg[ix + 2] = floatBitsToUint(s.end.x);
|
||||
fill_seg[ix + 3] = floatBitsToUint(s.end.y);
|
||||
}
|
||||
|
||||
FillSegChunk FillSegChunk_read(FillSegChunkRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = fill_seg[ix + 0];
|
||||
uint raw1 = fill_seg[ix + 1];
|
||||
FillSegChunk s;
|
||||
s.n = raw0;
|
||||
s.next = FillSegChunkRef(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void FillSegChunk_write(FillSegChunkRef ref, FillSegChunk s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
fill_seg[ix + 0] = s.n;
|
||||
fill_seg[ix + 1] = s.next.offset;
|
||||
}
|
||||
|
|
@ -1,161 +0,0 @@
|
|||
// This is "kernel 1" in a 4-kernel pipeline. It traverses the scene graph
|
||||
// and outputs "instances" (references to item + translation) for each item
|
||||
// that intersects the tilegroup.
|
||||
//
|
||||
// This implementation is simplistic and leaves a lot of performance on the
|
||||
// table. A fancier implementation would use threadgroup shared memory or
|
||||
// subgroups (or possibly both) to parallelize the reading of the input and
|
||||
// the computation of tilegroup intersection.
|
||||
//
|
||||
// In addition, there are some features currently missing, such as support
|
||||
// for clipping.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
// It's possible we should lay this out with x and do our own math.
|
||||
layout(local_size_x = 1, local_size_y = 32) in;
|
||||
|
||||
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
||||
uint[] scene;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 1) buffer TilegroupBuf {
|
||||
uint[] tilegroup;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 2) buffer AllocBuf {
|
||||
uint alloc;
|
||||
};
|
||||
|
||||
#include "scene.h"
|
||||
#include "tilegroup.h"
|
||||
|
||||
#include "setup.h"
|
||||
|
||||
#define MAX_STACK 8
|
||||
|
||||
struct StackElement {
|
||||
PietItemRef group;
|
||||
uint index;
|
||||
vec2 offset;
|
||||
};
|
||||
|
||||
void main() {
|
||||
StackElement stack[MAX_STACK];
|
||||
uint stack_ix = 0;
|
||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x;
|
||||
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE);
|
||||
uint tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size;
|
||||
|
||||
// State for stroke references.
|
||||
TileGroupRef stroke_start = TileGroupRef(tg_ref.offset + TILEGROUP_STROKE_START);
|
||||
ChunkRef stroke_chunk_start = ChunkRef(stroke_start.offset + 4);
|
||||
InstanceRef stroke_ref = InstanceRef(stroke_chunk_start.offset + Chunk_size);
|
||||
uint stroke_limit = stroke_start.offset + TILEGROUP_INITIAL_STROKE_ALLOC - Instance_size;
|
||||
uint stroke_chunk_n = 0;
|
||||
uint stroke_n = 0;
|
||||
|
||||
// State for fill references. All this is a bit cut'n'paste, but making a
|
||||
// proper abstraction isn't easy.
|
||||
TileGroupRef fill_start = TileGroupRef(tg_ref.offset + TILEGROUP_FILL_START);
|
||||
ChunkRef fill_chunk_start = ChunkRef(fill_start.offset + 4);
|
||||
InstanceRef fill_ref = InstanceRef(fill_chunk_start.offset + Chunk_size);
|
||||
uint fill_limit = fill_start.offset + TILEGROUP_INITIAL_FILL_ALLOC - Instance_size;
|
||||
uint fill_chunk_n = 0;
|
||||
uint fill_n = 0;
|
||||
|
||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX);
|
||||
PietItemRef root = PietItemRef(0);
|
||||
SimpleGroup group = PietItem_Group_read(root);
|
||||
StackElement tos = StackElement(root, 0, group.offset.xy);
|
||||
|
||||
while (true) {
|
||||
if (tos.index < group.n_items) {
|
||||
Bbox bbox = Bbox_read(Bbox_index(group.bboxes, tos.index));
|
||||
vec4 bb = vec4(bbox.bbox) + tos.offset.xyxy;
|
||||
bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH_PX))
|
||||
&& max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT_PX));
|
||||
bool is_group = false;
|
||||
uint tag;
|
||||
if (hit) {
|
||||
PietItemRef item_ref = PietItem_index(group.items, tos.index);
|
||||
tag = PietItem_tag(item_ref);
|
||||
is_group = tag == PietItem_Group;
|
||||
}
|
||||
if (hit && !is_group) {
|
||||
PietItemRef item_ref = PietItem_index(group.items, tos.index);
|
||||
Instance ins = Instance(item_ref.offset, tos.offset);
|
||||
if (tg_ref.offset > tg_limit) {
|
||||
// Allocation exceeded; do atomic bump alloc.
|
||||
uint new_tg = atomicAdd(alloc, TILEGROUP_INITIAL_ALLOC);
|
||||
Jump jump = Jump(TileGroupRef(new_tg));
|
||||
TileGroup_Jump_write(tg_ref, jump);
|
||||
tg_ref = TileGroupRef(new_tg);
|
||||
tg_limit = tg_ref.offset + TILEGROUP_INITIAL_ALLOC - 2 * TileGroup_size;
|
||||
}
|
||||
TileGroup_Instance_write(tg_ref, ins);
|
||||
tg_ref.offset += TileGroup_size;
|
||||
if (tag == PietItem_Poly) {
|
||||
if (stroke_ref.offset > stroke_limit) {
|
||||
uint new_stroke = atomicAdd(alloc, TILEGROUP_STROKE_ALLOC);
|
||||
Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(new_stroke)));
|
||||
stroke_chunk_start = ChunkRef(new_stroke);
|
||||
stroke_ref = InstanceRef(new_stroke + Chunk_size);
|
||||
stroke_n += stroke_chunk_n;
|
||||
stroke_chunk_n = 0;
|
||||
stroke_limit = new_stroke + TILEGROUP_STROKE_ALLOC - Instance_size;
|
||||
}
|
||||
Instance_write(stroke_ref, ins);
|
||||
stroke_chunk_n++;
|
||||
stroke_ref.offset += Instance_size;
|
||||
} else if (tag == PietItem_Fill) {
|
||||
if (fill_ref.offset > fill_limit) {
|
||||
uint new_fill = atomicAdd(alloc, TILEGROUP_FILL_ALLOC);
|
||||
Chunk_write(fill_chunk_start, Chunk(fill_chunk_n, ChunkRef(new_fill)));
|
||||
fill_chunk_start = ChunkRef(new_fill);
|
||||
fill_ref = InstanceRef(new_fill + Chunk_size);
|
||||
fill_n += fill_chunk_n;
|
||||
fill_chunk_n = 0;
|
||||
fill_limit = new_fill + TILEGROUP_FILL_ALLOC - Instance_size;
|
||||
}
|
||||
Instance_write(fill_ref, ins);
|
||||
fill_chunk_n++;
|
||||
fill_ref.offset += Instance_size;
|
||||
|
||||
}
|
||||
}
|
||||
if (is_group) {
|
||||
PietItemRef item_ref = PietItem_index(group.items, tos.index);
|
||||
tos.index++;
|
||||
if (tos.index < group.n_items) {
|
||||
stack[stack_ix++] = tos;
|
||||
}
|
||||
group = PietItem_Group_read(item_ref);
|
||||
tos = StackElement(item_ref, 0, tos.offset + group.offset.xy);
|
||||
} else {
|
||||
tos.index++;
|
||||
}
|
||||
} else {
|
||||
// processed all items in this group; pop the stack
|
||||
if (stack_ix == 0) {
|
||||
break;
|
||||
}
|
||||
tos = stack[--stack_ix];
|
||||
group = PietItem_Group_read(tos.group);
|
||||
}
|
||||
}
|
||||
TileGroup_End_write(tg_ref);
|
||||
|
||||
stroke_n += stroke_chunk_n;
|
||||
if (stroke_n > 0) {
|
||||
Chunk_write(stroke_chunk_start, Chunk(stroke_chunk_n, ChunkRef(0)));
|
||||
}
|
||||
tilegroup[stroke_start.offset >> 2] = stroke_n;
|
||||
|
||||
fill_n += fill_chunk_n;
|
||||
if (fill_n > 0) {
|
||||
Chunk_write(fill_chunk_start, Chunk(fill_chunk_n, ChunkRef(0)));
|
||||
}
|
||||
tilegroup[fill_start.offset >> 2] = fill_n;
|
||||
}
|
Binary file not shown.
|
@ -1,167 +0,0 @@
|
|||
// This is "kernel 2" (fill) in a 4-kernel pipeline. It processes the fill
|
||||
// (polyline) items in the scene and generates a list of segments for each, for
|
||||
// each tile.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
layout(local_size_x = 32) in;
|
||||
|
||||
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
||||
uint[] scene;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 1) buffer TilegroupBuf {
|
||||
uint[] tilegroup;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 2) buffer FillSegBuf {
|
||||
uint[] fill_seg;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 3) buffer AllocBuf {
|
||||
uint alloc;
|
||||
};
|
||||
|
||||
#include "scene.h"
|
||||
#include "tilegroup.h"
|
||||
#include "fill_seg.h"
|
||||
|
||||
#include "setup.h"
|
||||
|
||||
// Ensure that there is space to encode a segment.
|
||||
void alloc_chunk(inout uint chunk_n_segs, inout FillSegChunkRef seg_chunk_ref,
|
||||
inout FillSegChunkRef first_seg_chunk, inout uint seg_limit)
|
||||
{
|
||||
if (chunk_n_segs == 0) {
|
||||
if (seg_chunk_ref.offset + 40 > seg_limit) {
|
||||
seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
||||
seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - FillSegment_size;
|
||||
}
|
||||
first_seg_chunk = seg_chunk_ref;
|
||||
} else if (seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs > seg_limit) {
|
||||
uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
||||
seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - FillSegment_size;
|
||||
FillSegChunk_write(seg_chunk_ref, FillSegChunk(chunk_n_segs, FillSegChunkRef(new_chunk_ref)));
|
||||
seg_chunk_ref.offset = new_chunk_ref;
|
||||
chunk_n_segs = 0;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void main() {
|
||||
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
|
||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
|
||||
+ (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
|
||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
|
||||
TileGroupRef fill_start = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE + TILEGROUP_FILL_START);
|
||||
uint fill_n = tilegroup[fill_start.offset >> 2];
|
||||
|
||||
FillTileHeaderRef tile_header_ref = FillTileHeaderRef(tile_ix * FillTileHeader_size);
|
||||
if (fill_n > 0) {
|
||||
ChunkRef chunk_ref = ChunkRef(fill_start.offset + 4);
|
||||
Chunk chunk = Chunk_read(chunk_ref);
|
||||
InstanceRef fill_ref = InstanceRef(chunk_ref.offset + Chunk_size);
|
||||
FillItemHeaderRef item_header = FillItemHeaderRef(atomicAdd(alloc, fill_n * FillItemHeader_size));
|
||||
FillTileHeader_write(tile_header_ref, FillTileHeader(fill_n, item_header));
|
||||
FillSegChunkRef seg_chunk_ref = FillSegChunkRef(0);
|
||||
uint seg_limit = 0;
|
||||
// Iterate through items; fill_n holds count remaining.
|
||||
while (true) {
|
||||
if (chunk.chunk_n == 0) {
|
||||
chunk_ref = chunk.next;
|
||||
if (chunk_ref.offset == 0) {
|
||||
break;
|
||||
}
|
||||
chunk = Chunk_read(chunk_ref);
|
||||
fill_ref = InstanceRef(chunk_ref.offset + Chunk_size);
|
||||
}
|
||||
Instance ins = Instance_read(fill_ref);
|
||||
PietFill fill = PietItem_Fill_read(PietItemRef(ins.item_ref));
|
||||
|
||||
// Process the fill polyline item.
|
||||
uint max_n_segs = fill.n_points - 1;
|
||||
uint chunk_n_segs = 0;
|
||||
int backdrop = 0;
|
||||
FillSegChunkRef seg_chunk_ref;
|
||||
FillSegChunkRef first_seg_chunk = FillSegChunkRef(0);
|
||||
vec2 start = Point_read(fill.points).xy;
|
||||
for (uint j = 0; j < max_n_segs; j++) {
|
||||
fill.points.offset += Point_size;
|
||||
vec2 end = Point_read(fill.points).xy;
|
||||
|
||||
// Process one segment.
|
||||
|
||||
// TODO: I think this would go more smoothly (and be easier to
|
||||
// make numerically robust) if it were based on clipping the line
|
||||
// to the tile box. See:
|
||||
// https://tavianator.com/fast-branchless-raybounding-box-intersections/
|
||||
vec2 xymin = min(start, end);
|
||||
vec2 xymax = max(start, end);
|
||||
float a = end.y - start.y;
|
||||
float b = start.x - end.x;
|
||||
float c = -(a * start.x + b * start.y);
|
||||
vec2 xy1 = xy0 + vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
|
||||
float ytop = max(xy0.y, xymin.y);
|
||||
float ybot = min(xy1.y, xymax.y);
|
||||
float s00 = sign(b * ytop + a * xy0.x + c);
|
||||
float s01 = sign(b * ytop + a * xy1.x + c);
|
||||
float s10 = sign(b * ybot + a * xy0.x + c);
|
||||
float s11 = sign(b * ybot + a * xy1.x + c);
|
||||
float sTopLeft = sign(b * xy0.y + a * xy0.x + c);
|
||||
if (sTopLeft == sign(a) && xymin.y <= xy0.y && xymax.y > xy0.y) {
|
||||
backdrop -= int(s00);
|
||||
}
|
||||
|
||||
// This is adapted from piet-metal but could be improved.
|
||||
|
||||
if (max(xymin.x, xy0.x) < min(xymax.x, xy1.x)
|
||||
&& ytop < ybot
|
||||
&& s00 * s01 + s00 * s10 + s00 * s11 < 3.0)
|
||||
{
|
||||
// avoid overwriting `end` so that it can be used as start
|
||||
vec2 enc_end = end;
|
||||
if (xymin.x < xy0.x) {
|
||||
float yEdge = mix(start.y, end.y, (start.x - xy0.x) / b);
|
||||
if (yEdge >= xy0.y && yEdge < xy1.y) {
|
||||
// This is encoded the same as a general fill segment, but could be
|
||||
// special-cased, either here or in rendering. (It was special-cased
|
||||
// in piet-metal).
|
||||
FillSegment edge_seg;
|
||||
if (b > 0.0) {
|
||||
enc_end = vec2(xy0.x, yEdge);
|
||||
edge_seg.start = enc_end;
|
||||
edge_seg.end = vec2(xy0.x, xy1.y);
|
||||
} else {
|
||||
start = vec2(xy0.x, yEdge);
|
||||
edge_seg.start = vec2(xy0.x, xy1.y);
|
||||
edge_seg.end = start;
|
||||
}
|
||||
alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit);
|
||||
FillSegment_write(FillSegmentRef(seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs), edge_seg);
|
||||
chunk_n_segs++;
|
||||
}
|
||||
}
|
||||
alloc_chunk(chunk_n_segs, seg_chunk_ref, first_seg_chunk, seg_limit);
|
||||
FillSegment seg = FillSegment(start, enc_end);
|
||||
FillSegment_write(FillSegmentRef(seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * chunk_n_segs), seg);
|
||||
chunk_n_segs++;
|
||||
}
|
||||
|
||||
start = end;
|
||||
}
|
||||
FillItemHeader_write(item_header, FillItemHeader(backdrop, first_seg_chunk));
|
||||
if (chunk_n_segs != 0) {
|
||||
FillSegChunk_write(seg_chunk_ref, FillSegChunk(chunk_n_segs, FillSegChunkRef(0)));
|
||||
seg_chunk_ref.offset += FillSegChunk_size + FillSegment_size * chunk_n_segs;
|
||||
}
|
||||
|
||||
fill_ref.offset += Instance_size;
|
||||
chunk.chunk_n--;
|
||||
item_header.offset += FillItemHeader_size;
|
||||
}
|
||||
} else {
|
||||
// As an optimization, we could just write 0 for the size.
|
||||
FillTileHeader_write(tile_header_ref, FillTileHeader(fill_n, FillItemHeaderRef(0)));
|
||||
}
|
||||
}
|
Binary file not shown.
|
@ -1,137 +0,0 @@
|
|||
// This is "kernel 2" (strokes) in a 4-kernel pipeline. It processes the stroke
|
||||
// (polyline) items in the scene and generates a list of segments for each, for
|
||||
// each tile.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
layout(local_size_x = 32) in;
|
||||
|
||||
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
||||
uint[] scene;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 1) buffer TilegroupBuf {
|
||||
uint[] tilegroup;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 2) buffer SegmentBuf {
|
||||
uint[] segment;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 3) buffer AllocBuf {
|
||||
uint alloc;
|
||||
};
|
||||
|
||||
#include "scene.h"
|
||||
#include "tilegroup.h"
|
||||
#include "segment.h"
|
||||
|
||||
#include "setup.h"
|
||||
|
||||
void main() {
|
||||
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
|
||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
|
||||
+ (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
|
||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
|
||||
TileGroupRef stroke_start = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE + TILEGROUP_STROKE_START);
|
||||
uint stroke_n = tilegroup[stroke_start.offset >> 2];
|
||||
|
||||
TileHeaderRef tile_header_ref = TileHeaderRef(tile_ix * TileHeader_size);
|
||||
if (stroke_n > 0) {
|
||||
ChunkRef chunk_ref = ChunkRef(stroke_start.offset + 4);
|
||||
Chunk chunk = Chunk_read(chunk_ref);
|
||||
InstanceRef stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
|
||||
ItemHeaderRef item_header = ItemHeaderRef(atomicAdd(alloc, stroke_n * ItemHeader_size));
|
||||
TileHeader_write(tile_header_ref, TileHeader(stroke_n, item_header));
|
||||
SegChunkRef seg_chunk_ref = SegChunkRef(0);
|
||||
uint seg_limit = 0;
|
||||
// Iterate through items; stroke_n holds count remaining.
|
||||
while (true) {
|
||||
if (chunk.chunk_n == 0) {
|
||||
chunk_ref = chunk.next;
|
||||
if (chunk_ref.offset == 0) {
|
||||
break;
|
||||
}
|
||||
chunk = Chunk_read(chunk_ref);
|
||||
stroke_ref = InstanceRef(chunk_ref.offset + Chunk_size);
|
||||
}
|
||||
Instance ins = Instance_read(stroke_ref);
|
||||
PietStrokePolyLine poly = PietItem_Poly_read(PietItemRef(ins.item_ref));
|
||||
|
||||
// Process the stroke polyline item.
|
||||
uint max_n_segs = poly.n_points - 1;
|
||||
uint chunk_n_segs = 0;
|
||||
SegChunkRef seg_chunk_ref;
|
||||
vec2 start = Point_read(poly.points).xy;
|
||||
for (uint j = 0; j < max_n_segs; j++) {
|
||||
poly.points.offset += Point_size;
|
||||
vec2 end = Point_read(poly.points).xy;
|
||||
|
||||
// Process one segment.
|
||||
|
||||
// This logic just tests for collision. What we probably want to do
|
||||
// is a clipping algorithm like Liang-Barsky, and then store coords
|
||||
// relative to the tile in f16. See also:
|
||||
// https://tavianator.com/fast-branchless-raybounding-box-intersections/
|
||||
|
||||
// Also note that when we go to the fancy version, we want to compute
|
||||
// the (horizontal projection of) the bounding box of the intersection
|
||||
// once per tilegroup, so we can assign work to individual tiles.
|
||||
|
||||
float a = end.y - start.y;
|
||||
float b = start.x - end.x;
|
||||
float c = -(a * start.x + b * start.y);
|
||||
float half_width = 0.5 * poly.width;
|
||||
// Tile boundaries padded by half-width.
|
||||
float xmin = xy0.x - half_width;
|
||||
float ymin = xy0.y - half_width;
|
||||
float xmax = xy0.x + float(TILE_WIDTH_PX) + half_width;
|
||||
float ymax = xy0.y + float(TILE_HEIGHT_PX) + half_width;
|
||||
float s00 = sign(b * ymin + a * xmin + c);
|
||||
float s01 = sign(b * ymin + a * xmax + c);
|
||||
float s10 = sign(b * ymax + a * xmin + c);
|
||||
float s11 = sign(b * ymax + a * xmax + c);
|
||||
// If bounding boxes intersect and not all four corners are on the same side, hit.
|
||||
// Also note: this is designed to be false on NAN input.
|
||||
if (max(min(start.x, end.x), xmin) < min(max(start.x, end.x), xmax)
|
||||
&& max(min(start.y, end.y), ymin) < min(max(start.y, end.y), ymax)
|
||||
&& s00 * s01 + s00 * s10 + s00 * s11 < 3.0)
|
||||
{
|
||||
// Allocate a chunk if needed.
|
||||
if (chunk_n_segs == 0) {
|
||||
if (seg_chunk_ref.offset + 40 > seg_limit) {
|
||||
seg_chunk_ref.offset = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
||||
seg_limit = seg_chunk_ref.offset + SEG_CHUNK_ALLOC - Segment_size;
|
||||
}
|
||||
ItemHeader_write(item_header, ItemHeader(seg_chunk_ref));
|
||||
} else if (seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs > seg_limit) {
|
||||
uint new_chunk_ref = atomicAdd(alloc, SEG_CHUNK_ALLOC);
|
||||
seg_limit = new_chunk_ref + SEG_CHUNK_ALLOC - Segment_size;
|
||||
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(new_chunk_ref)));
|
||||
seg_chunk_ref.offset = new_chunk_ref;
|
||||
chunk_n_segs = 0;
|
||||
}
|
||||
Segment seg = Segment(start, end);
|
||||
Segment_write(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * chunk_n_segs), seg);
|
||||
chunk_n_segs++;
|
||||
}
|
||||
|
||||
start = end;
|
||||
}
|
||||
if (chunk_n_segs == 0) {
|
||||
ItemHeader_write(item_header, ItemHeader(SegChunkRef(0)));
|
||||
} else {
|
||||
SegChunk_write(seg_chunk_ref, SegChunk(chunk_n_segs, SegChunkRef(0)));
|
||||
seg_chunk_ref.offset += SegChunk_size + Segment_size * chunk_n_segs;
|
||||
}
|
||||
|
||||
stroke_ref.offset += Instance_size;
|
||||
chunk.chunk_n--;
|
||||
item_header.offset += ItemHeader_size;
|
||||
}
|
||||
} else {
|
||||
// As an optimization, we could just write 0 for the size.
|
||||
TileHeader_write(tile_header_ref, TileHeader(stroke_n, ItemHeaderRef(0)));
|
||||
}
|
||||
}
|
Binary file not shown.
|
@ -1,135 +0,0 @@
|
|||
// This is "kernel 3" in a 4-kernel pipeline. It walks the active items
|
||||
// for the tilegroup and produces a per-tile command list for each tile.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
layout(local_size_x = 32, local_size_y = 1) in;
|
||||
|
||||
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
||||
uint[] scene;
|
||||
};
|
||||
|
||||
// TODO: this should have a `readonly` qualifier, but then inclusion
|
||||
// of ptcl.h would fail because of the writers.
|
||||
layout(set = 0, binding = 1) buffer TilegroupBuf {
|
||||
uint[] tilegroup;
|
||||
};
|
||||
|
||||
// Used readonly
|
||||
layout(set = 0, binding = 2) buffer SegmentBuf {
|
||||
uint[] segment;
|
||||
};
|
||||
|
||||
// Used readonly
|
||||
layout(set = 0, binding = 3) buffer FillSegmentBuf {
|
||||
uint[] fill_seg;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 4) buffer PtclBuf {
|
||||
uint[] ptcl;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 5) buffer AllocBuf {
|
||||
uint alloc;
|
||||
};
|
||||
|
||||
#include "scene.h"
|
||||
#include "tilegroup.h"
|
||||
#include "segment.h"
|
||||
#include "fill_seg.h"
|
||||
#include "ptcl.h"
|
||||
|
||||
#include "setup.h"
|
||||
|
||||
void alloc_cmd(inout CmdRef cmd_ref, inout uint cmd_limit) {
|
||||
if (cmd_ref.offset > cmd_limit) {
|
||||
uint new_cmd = atomicAdd(alloc, PTCL_INITIAL_ALLOC);
|
||||
CmdJump jump = CmdJump(new_cmd);
|
||||
Cmd_Jump_write(cmd_ref, jump);
|
||||
cmd_ref = CmdRef(new_cmd);
|
||||
cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
||||
}
|
||||
}
|
||||
|
||||
void main() {
|
||||
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
|
||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
|
||||
+ (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
|
||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
|
||||
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_STRIDE);
|
||||
CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC);
|
||||
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - 2 * Cmd_size;
|
||||
|
||||
TileHeader stroke_th = TileHeader_read(TileHeaderRef(tile_ix * TileHeader_size));
|
||||
FillTileHeader fill_th = FillTileHeader_read(FillTileHeaderRef(tile_ix * FillTileHeader_size));
|
||||
|
||||
while (true) {
|
||||
uint tg_tag = TileGroup_tag(tg_ref);
|
||||
if (tg_tag == TileGroup_End) {
|
||||
break;
|
||||
}
|
||||
if (tg_tag == TileGroup_Jump) {
|
||||
tg_ref = TileGroup_Jump_read(tg_ref).new_ref;
|
||||
continue;
|
||||
}
|
||||
// Assume tg_tag is `Instance`, though there will be more cases.
|
||||
Instance ins = TileGroup_Instance_read(tg_ref);
|
||||
PietItemRef item_ref = PietItemRef(ins.item_ref);
|
||||
uint item_tag = PietItem_tag(item_ref);
|
||||
switch (item_tag) {
|
||||
case PietItem_Circle:
|
||||
PietCircle circle = PietItem_Circle_read(item_ref);
|
||||
vec2 center = ins.offset + circle.center.xy;
|
||||
float r = circle.radius;
|
||||
if (max(center.x - r, xy0.x) < min(center.x + r, xy0.x + float(TILE_WIDTH_PX))
|
||||
&& max(center.y - r, xy0.y) < min(center.y + r, xy0.y + float(TILE_HEIGHT_PX)))
|
||||
{
|
||||
CmdCircle cmd = CmdCircle(center, r, circle.rgba_color);
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Circle_write(cmd_ref, cmd);
|
||||
cmd_ref.offset += Cmd_size;
|
||||
}
|
||||
break;
|
||||
case PietItem_Poly:
|
||||
ItemHeader stroke_item = ItemHeader_read(stroke_th.items);
|
||||
stroke_th.items.offset += ItemHeader_size;
|
||||
if (stroke_item.segments.offset != 0) {
|
||||
PietStrokePolyLine poly = PietItem_Poly_read(item_ref);
|
||||
CmdStroke cmd = CmdStroke(
|
||||
stroke_item.segments.offset,
|
||||
0.5 * poly.width,
|
||||
poly.rgba_color
|
||||
);
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Stroke_write(cmd_ref, cmd);
|
||||
cmd_ref.offset += Cmd_size;
|
||||
}
|
||||
break;
|
||||
case PietItem_Fill:
|
||||
FillItemHeader fill_item = FillItemHeader_read(fill_th.items);
|
||||
fill_th.items.offset += FillItemHeader_size;
|
||||
// TODO: handle segments == 0 but backdrop != specially, it's a solid tile.
|
||||
if (fill_item.segments.offset != 0) {
|
||||
PietFill fill = PietItem_Fill_read(item_ref);
|
||||
CmdFill cmd = CmdFill(
|
||||
fill_item.segments.offset,
|
||||
fill_item.backdrop,
|
||||
fill.rgba_color
|
||||
);
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Fill_write(cmd_ref, cmd);
|
||||
cmd_ref.offset += Cmd_size;
|
||||
} else if (fill_item.backdrop != 0) {
|
||||
// TODO: truncate existing cmd list if alpha is opaque
|
||||
PietFill fill = PietItem_Fill_read(item_ref);
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Solid_write(cmd_ref, CmdSolid(fill.rgba_color));
|
||||
cmd_ref.offset += Cmd_size;
|
||||
}
|
||||
break;
|
||||
}
|
||||
tg_ref.offset += TileGroup_size;
|
||||
}
|
||||
Cmd_End_write(cmd_ref);
|
||||
}
|
Binary file not shown.
|
@ -6,29 +6,20 @@
|
|||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
#extension GL_KHR_shader_subgroup_basic : enable
|
||||
|
||||
layout(local_size_x = 16, local_size_y = 16) in;
|
||||
#define CHUNK 8
|
||||
#define CHUNK_DY (16 / CHUNK)
|
||||
layout(local_size_x = 16, local_size_y = 2) in;
|
||||
|
||||
// Same concern that this should be readonly as in kernel 3.
|
||||
layout(set = 0, binding = 0) buffer PtclBuf {
|
||||
uint[] ptcl;
|
||||
};
|
||||
|
||||
// Used readonly
|
||||
layout(set = 0, binding = 1) buffer SegmentBuf {
|
||||
uint[] segment;
|
||||
};
|
||||
|
||||
// Used readonly
|
||||
layout(set = 0, binding = 2) buffer FillSegBuf {
|
||||
uint[] fill_seg;
|
||||
};
|
||||
|
||||
layout(rgba8, set = 0, binding = 3) uniform writeonly image2D image;
|
||||
layout(rgba8, set = 0, binding = 1) uniform writeonly image2D image;
|
||||
|
||||
#include "ptcl.h"
|
||||
#include "segment.h"
|
||||
#include "fill_seg.h"
|
||||
|
||||
#include "setup.h"
|
||||
|
||||
|
@ -36,10 +27,14 @@ void main() {
|
|||
uint tile_ix = gl_WorkGroupID.y * WIDTH_IN_TILES + gl_WorkGroupID.x;
|
||||
CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC);
|
||||
|
||||
uvec2 xy_uint = gl_GlobalInvocationID.xy;
|
||||
uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
|
||||
vec2 xy = vec2(xy_uint);
|
||||
vec2 uv = xy * vec2(1.0 / IMAGE_WIDTH, 1.0 / IMAGE_HEIGHT);
|
||||
vec3 rgb = uv.xyy;
|
||||
//vec3 rgb = uv.xyy;
|
||||
vec3 rgb[CHUNK];
|
||||
for (uint i = 0; i < CHUNK; i++) {
|
||||
rgb[i] = vec3(0.5);
|
||||
}
|
||||
|
||||
while (true) {
|
||||
uint tag = Cmd_tag(cmd_ref);
|
||||
|
@ -49,65 +44,85 @@ void main() {
|
|||
switch (tag) {
|
||||
case Cmd_Circle:
|
||||
CmdCircle circle = Cmd_Circle_read(cmd_ref);
|
||||
float r = length(xy + vec2(0.5, 0.5) - circle.center.xy);
|
||||
float alpha = clamp(0.5 + circle.radius - r, 0.0, 1.0);
|
||||
vec4 fg_rgba = unpackUnorm4x8(circle.rgba_color).wzyx;
|
||||
// TODO: sRGB
|
||||
rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a);
|
||||
for (uint i = 0; i < CHUNK; i++) {
|
||||
float dy = float(i * CHUNK_DY);
|
||||
float r = length(vec2(xy.x, xy.y + dy) + vec2(0.5, 0.5) - circle.center.xy);
|
||||
float alpha = clamp(0.5 + circle.radius - r, 0.0, 1.0);
|
||||
// TODO: sRGB
|
||||
rgb[i] = mix(rgb[i], fg_rgba.rgb, alpha * fg_rgba.a);
|
||||
}
|
||||
break;
|
||||
case Cmd_Stroke:
|
||||
CmdStroke stroke = Cmd_Stroke_read(cmd_ref);
|
||||
float df = 1e9;
|
||||
SegChunkRef seg_chunk_ref = SegChunkRef(stroke.seg_ref);
|
||||
float df[CHUNK];
|
||||
for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
|
||||
SegChunkRef seg_chunk_ref = stroke.seg_ref;
|
||||
do {
|
||||
SegChunk seg_chunk = SegChunk_read(seg_chunk_ref);
|
||||
SegmentRef segs = seg_chunk.segs;
|
||||
for (int i = 0; i < seg_chunk.n; i++) {
|
||||
Segment seg = Segment_read(SegmentRef(seg_chunk_ref.offset + SegChunk_size + Segment_size * i));
|
||||
Segment seg = Segment_read(Segment_index(segs, i));
|
||||
vec2 line_vec = seg.end - seg.start;
|
||||
vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
|
||||
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
|
||||
df = min(df, length(line_vec * t - dpos));
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
|
||||
dpos.y += float(k * CHUNK_DY);
|
||||
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
|
||||
df[k] = min(df[k], length(line_vec * t - dpos));
|
||||
}
|
||||
}
|
||||
seg_chunk_ref = seg_chunk.next;
|
||||
} while (seg_chunk_ref.offset != 0);
|
||||
fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx;
|
||||
alpha = clamp(stroke.half_width + 0.5 - df, 0.0, 1.0);
|
||||
rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a);
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
float alpha = clamp(stroke.half_width + 0.5 - df[k], 0.0, 1.0);
|
||||
rgb[k] = mix(rgb[k], fg_rgba.rgb, alpha * fg_rgba.a);
|
||||
}
|
||||
break;
|
||||
case Cmd_Fill:
|
||||
CmdFill fill = Cmd_Fill_read(cmd_ref);
|
||||
// Probably better to store as float, but conversion is no doubt cheap.
|
||||
float area = float(fill.backdrop);
|
||||
FillSegChunkRef fill_seg_chunk_ref = FillSegChunkRef(fill.seg_ref);
|
||||
float area[CHUNK];
|
||||
for (uint k = 0; k < CHUNK; k++) area[k] = float(fill.backdrop);
|
||||
SegChunkRef fill_seg_chunk_ref = fill.seg_ref;
|
||||
do {
|
||||
FillSegChunk seg_chunk = FillSegChunk_read(fill_seg_chunk_ref);
|
||||
SegChunk seg_chunk = SegChunk_read(fill_seg_chunk_ref);
|
||||
SegmentRef segs = seg_chunk.segs;
|
||||
for (int i = 0; i < seg_chunk.n; i++) {
|
||||
FillSegment seg = FillSegment_read(FillSegmentRef(fill_seg_chunk_ref.offset + FillSegChunk_size + FillSegment_size * i));
|
||||
vec2 start = seg.start - xy;
|
||||
vec2 end = seg.end - xy;
|
||||
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
|
||||
if (window.x != window.y) {
|
||||
vec2 t = (window - start.y) / (end.y - start.y);
|
||||
vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));
|
||||
float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6;
|
||||
float xmax = max(xs.x, xs.y);
|
||||
float b = min(xmax, 1.0);
|
||||
float c = max(b, 0.0);
|
||||
float d = max(xmin, 0.0);
|
||||
float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
|
||||
area += a * (window.x - window.y);
|
||||
Segment seg = Segment_read(Segment_index(segs, i));
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
vec2 my_xy = vec2(xy.x, xy.y + float(k * CHUNK_DY));
|
||||
vec2 start = seg.start - my_xy;
|
||||
vec2 end = seg.end - my_xy;
|
||||
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
|
||||
if (window.x != window.y) {
|
||||
vec2 t = (window - start.y) / (end.y - start.y);
|
||||
vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));
|
||||
float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6;
|
||||
float xmax = max(xs.x, xs.y);
|
||||
float b = min(xmax, 1.0);
|
||||
float c = max(b, 0.0);
|
||||
float d = max(xmin, 0.0);
|
||||
float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
|
||||
area[k] += a * (window.x - window.y);
|
||||
}
|
||||
area[k] += sign(end.x - start.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0);
|
||||
}
|
||||
}
|
||||
fill_seg_chunk_ref = seg_chunk.next;
|
||||
} while (fill_seg_chunk_ref.offset != 0);
|
||||
fg_rgba = unpackUnorm4x8(fill.rgba_color).wzyx;
|
||||
alpha = min(abs(area), 1.0);
|
||||
rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a);
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
float alpha = min(abs(area[k]), 1.0);
|
||||
rgb[k] = mix(rgb[k], fg_rgba.rgb, alpha * fg_rgba.a);
|
||||
}
|
||||
break;
|
||||
case Cmd_Solid:
|
||||
CmdSolid solid = Cmd_Solid_read(cmd_ref);
|
||||
fg_rgba = unpackUnorm4x8(solid.rgba_color).wzyx;
|
||||
rgb = mix(rgb, fg_rgba.rgb, fg_rgba.a);
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
rgb[k] = mix(rgb[k], fg_rgba.rgb, fg_rgba.a);
|
||||
}
|
||||
break;
|
||||
case Cmd_Jump:
|
||||
cmd_ref = CmdRef(Cmd_Jump_read(cmd_ref).new_ref);
|
||||
|
@ -116,5 +131,8 @@ void main() {
|
|||
cmd_ref.offset += Cmd_size;
|
||||
}
|
||||
|
||||
imageStore(image, ivec2(xy_uint), vec4(rgb, 1.0));
|
||||
// TODO: sRGB
|
||||
for (uint i = 0; i < CHUNK; i++) {
|
||||
imageStore(image, ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i), vec4(rgb[i], 1.0));
|
||||
}
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -36,6 +36,14 @@ struct CmdRef {
|
|||
uint offset;
|
||||
};
|
||||
|
||||
struct SegmentRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct SegChunkRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdCircle {
|
||||
vec2 center;
|
||||
float radius;
|
||||
|
@ -60,7 +68,7 @@ CmdLineRef CmdLine_index(CmdLineRef ref, uint index) {
|
|||
}
|
||||
|
||||
struct CmdStroke {
|
||||
uint seg_ref;
|
||||
SegChunkRef seg_ref;
|
||||
float half_width;
|
||||
uint rgba_color;
|
||||
};
|
||||
|
@ -72,7 +80,7 @@ CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
|
|||
}
|
||||
|
||||
struct CmdFill {
|
||||
uint seg_ref;
|
||||
SegChunkRef seg_ref;
|
||||
int backdrop;
|
||||
uint rgba_color;
|
||||
};
|
||||
|
@ -141,6 +149,30 @@ CmdRef Cmd_index(CmdRef ref, uint index) {
|
|||
return CmdRef(ref.offset + index * Cmd_size);
|
||||
}
|
||||
|
||||
struct Segment {
|
||||
vec2 start;
|
||||
vec2 end;
|
||||
float y_edge;
|
||||
};
|
||||
|
||||
#define Segment_size 20
|
||||
|
||||
SegmentRef Segment_index(SegmentRef ref, uint index) {
|
||||
return SegmentRef(ref.offset + index * Segment_size);
|
||||
}
|
||||
|
||||
struct SegChunk {
|
||||
uint n;
|
||||
SegChunkRef next;
|
||||
SegmentRef segs;
|
||||
};
|
||||
|
||||
#define SegChunk_size 12
|
||||
|
||||
SegChunkRef SegChunk_index(SegChunkRef ref, uint index) {
|
||||
return SegChunkRef(ref.offset + index * SegChunk_size);
|
||||
}
|
||||
|
||||
CmdCircle CmdCircle_read(CmdCircleRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = ptcl[ix + 0];
|
||||
|
@ -188,7 +220,7 @@ CmdStroke CmdStroke_read(CmdStrokeRef ref) {
|
|||
uint raw1 = ptcl[ix + 1];
|
||||
uint raw2 = ptcl[ix + 2];
|
||||
CmdStroke s;
|
||||
s.seg_ref = raw0;
|
||||
s.seg_ref = SegChunkRef(raw0);
|
||||
s.half_width = uintBitsToFloat(raw1);
|
||||
s.rgba_color = raw2;
|
||||
return s;
|
||||
|
@ -196,7 +228,7 @@ CmdStroke CmdStroke_read(CmdStrokeRef ref) {
|
|||
|
||||
void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = s.seg_ref;
|
||||
ptcl[ix + 0] = s.seg_ref.offset;
|
||||
ptcl[ix + 1] = floatBitsToUint(s.half_width);
|
||||
ptcl[ix + 2] = s.rgba_color;
|
||||
}
|
||||
|
@ -207,7 +239,7 @@ CmdFill CmdFill_read(CmdFillRef ref) {
|
|||
uint raw1 = ptcl[ix + 1];
|
||||
uint raw2 = ptcl[ix + 2];
|
||||
CmdFill s;
|
||||
s.seg_ref = raw0;
|
||||
s.seg_ref = SegChunkRef(raw0);
|
||||
s.backdrop = int(raw1);
|
||||
s.rgba_color = raw2;
|
||||
return s;
|
||||
|
@ -215,7 +247,7 @@ CmdFill CmdFill_read(CmdFillRef ref) {
|
|||
|
||||
void CmdFill_write(CmdFillRef ref, CmdFill s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = s.seg_ref;
|
||||
ptcl[ix + 0] = s.seg_ref.offset;
|
||||
ptcl[ix + 1] = uint(s.backdrop);
|
||||
ptcl[ix + 2] = s.rgba_color;
|
||||
}
|
||||
|
@ -362,3 +394,45 @@ void Cmd_Bail_write(CmdRef ref) {
|
|||
ptcl[ref.offset >> 2] = Cmd_Bail;
|
||||
}
|
||||
|
||||
Segment Segment_read(SegmentRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = ptcl[ix + 0];
|
||||
uint raw1 = ptcl[ix + 1];
|
||||
uint raw2 = ptcl[ix + 2];
|
||||
uint raw3 = ptcl[ix + 3];
|
||||
uint raw4 = ptcl[ix + 4];
|
||||
Segment s;
|
||||
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.y_edge = uintBitsToFloat(raw4);
|
||||
return s;
|
||||
}
|
||||
|
||||
void Segment_write(SegmentRef ref, Segment s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = floatBitsToUint(s.start.x);
|
||||
ptcl[ix + 1] = floatBitsToUint(s.start.y);
|
||||
ptcl[ix + 2] = floatBitsToUint(s.end.x);
|
||||
ptcl[ix + 3] = floatBitsToUint(s.end.y);
|
||||
ptcl[ix + 4] = floatBitsToUint(s.y_edge);
|
||||
}
|
||||
|
||||
SegChunk SegChunk_read(SegChunkRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = ptcl[ix + 0];
|
||||
uint raw1 = ptcl[ix + 1];
|
||||
uint raw2 = ptcl[ix + 2];
|
||||
SegChunk s;
|
||||
s.n = raw0;
|
||||
s.next = SegChunkRef(raw1);
|
||||
s.segs = SegmentRef(raw2);
|
||||
return s;
|
||||
}
|
||||
|
||||
void SegChunk_write(SegChunkRef ref, SegChunk s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = s.n;
|
||||
ptcl[ix + 1] = s.next.offset;
|
||||
ptcl[ix + 2] = s.segs.offset;
|
||||
}
|
||||
|
||||
|
|
|
@ -32,6 +32,38 @@ struct PietItemRef {
|
|||
uint offset;
|
||||
};
|
||||
|
||||
struct LineSegRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct QuadSegRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CubicSegRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct FillRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct StrokeRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct SetLineWidthRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct TransformRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct ElementRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct Bbox {
|
||||
ivec4 bbox;
|
||||
};
|
||||
|
@ -128,6 +160,98 @@ PietItemRef PietItem_index(PietItemRef ref, uint index) {
|
|||
return PietItemRef(ref.offset + index * PietItem_size);
|
||||
}
|
||||
|
||||
struct LineSeg {
|
||||
vec2 p0;
|
||||
vec2 p1;
|
||||
};
|
||||
|
||||
#define LineSeg_size 16
|
||||
|
||||
LineSegRef LineSeg_index(LineSegRef ref, uint index) {
|
||||
return LineSegRef(ref.offset + index * LineSeg_size);
|
||||
}
|
||||
|
||||
struct QuadSeg {
|
||||
vec2 p0;
|
||||
vec2 p1;
|
||||
vec2 p2;
|
||||
};
|
||||
|
||||
#define QuadSeg_size 24
|
||||
|
||||
QuadSegRef QuadSeg_index(QuadSegRef ref, uint index) {
|
||||
return QuadSegRef(ref.offset + index * QuadSeg_size);
|
||||
}
|
||||
|
||||
struct CubicSeg {
|
||||
vec2 p0;
|
||||
vec2 p1;
|
||||
vec2 p2;
|
||||
vec2 p3;
|
||||
};
|
||||
|
||||
#define CubicSeg_size 32
|
||||
|
||||
CubicSegRef CubicSeg_index(CubicSegRef ref, uint index) {
|
||||
return CubicSegRef(ref.offset + index * CubicSeg_size);
|
||||
}
|
||||
|
||||
struct Fill {
|
||||
uint rgba_color;
|
||||
};
|
||||
|
||||
#define Fill_size 4
|
||||
|
||||
FillRef Fill_index(FillRef ref, uint index) {
|
||||
return FillRef(ref.offset + index * Fill_size);
|
||||
}
|
||||
|
||||
struct Stroke {
|
||||
uint rgba_color;
|
||||
};
|
||||
|
||||
#define Stroke_size 4
|
||||
|
||||
StrokeRef Stroke_index(StrokeRef ref, uint index) {
|
||||
return StrokeRef(ref.offset + index * Stroke_size);
|
||||
}
|
||||
|
||||
struct SetLineWidth {
|
||||
float width;
|
||||
};
|
||||
|
||||
#define SetLineWidth_size 4
|
||||
|
||||
SetLineWidthRef SetLineWidth_index(SetLineWidthRef ref, uint index) {
|
||||
return SetLineWidthRef(ref.offset + index * SetLineWidth_size);
|
||||
}
|
||||
|
||||
struct Transform {
|
||||
vec4 mat;
|
||||
vec2 translate;
|
||||
};
|
||||
|
||||
#define Transform_size 24
|
||||
|
||||
TransformRef Transform_index(TransformRef ref, uint index) {
|
||||
return TransformRef(ref.offset + index * Transform_size);
|
||||
}
|
||||
|
||||
#define Element_Nop 0
|
||||
#define Element_StrokeLine 1
|
||||
#define Element_FillLine 2
|
||||
#define Element_Quad 3
|
||||
#define Element_Cubic 4
|
||||
#define Element_Stroke 5
|
||||
#define Element_Fill 6
|
||||
#define Element_SetLineWidth 7
|
||||
#define Element_Transform 8
|
||||
#define Element_size 36
|
||||
|
||||
ElementRef Element_index(ElementRef ref, uint index) {
|
||||
return ElementRef(ref.offset + index * Element_size);
|
||||
}
|
||||
|
||||
Bbox Bbox_read(BboxRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = scene[ix + 0];
|
||||
|
@ -236,3 +360,122 @@ PietStrokePolyLine PietItem_Poly_read(PietItemRef ref) {
|
|||
return PietStrokePolyLine_read(PietStrokePolyLineRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
LineSeg LineSeg_read(LineSegRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = scene[ix + 0];
|
||||
uint raw1 = scene[ix + 1];
|
||||
uint raw2 = scene[ix + 2];
|
||||
uint raw3 = scene[ix + 3];
|
||||
LineSeg s;
|
||||
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
return s;
|
||||
}
|
||||
|
||||
QuadSeg QuadSeg_read(QuadSegRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = scene[ix + 0];
|
||||
uint raw1 = scene[ix + 1];
|
||||
uint raw2 = scene[ix + 2];
|
||||
uint raw3 = scene[ix + 3];
|
||||
uint raw4 = scene[ix + 4];
|
||||
uint raw5 = scene[ix + 5];
|
||||
QuadSeg s;
|
||||
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||
return s;
|
||||
}
|
||||
|
||||
CubicSeg CubicSeg_read(CubicSegRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = scene[ix + 0];
|
||||
uint raw1 = scene[ix + 1];
|
||||
uint raw2 = scene[ix + 2];
|
||||
uint raw3 = scene[ix + 3];
|
||||
uint raw4 = scene[ix + 4];
|
||||
uint raw5 = scene[ix + 5];
|
||||
uint raw6 = scene[ix + 6];
|
||||
uint raw7 = scene[ix + 7];
|
||||
CubicSeg s;
|
||||
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||
s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));
|
||||
return s;
|
||||
}
|
||||
|
||||
Fill Fill_read(FillRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = scene[ix + 0];
|
||||
Fill s;
|
||||
s.rgba_color = raw0;
|
||||
return s;
|
||||
}
|
||||
|
||||
Stroke Stroke_read(StrokeRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = scene[ix + 0];
|
||||
Stroke s;
|
||||
s.rgba_color = raw0;
|
||||
return s;
|
||||
}
|
||||
|
||||
SetLineWidth SetLineWidth_read(SetLineWidthRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = scene[ix + 0];
|
||||
SetLineWidth s;
|
||||
s.width = uintBitsToFloat(raw0);
|
||||
return s;
|
||||
}
|
||||
|
||||
Transform Transform_read(TransformRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = scene[ix + 0];
|
||||
uint raw1 = scene[ix + 1];
|
||||
uint raw2 = scene[ix + 2];
|
||||
uint raw3 = scene[ix + 3];
|
||||
uint raw4 = scene[ix + 4];
|
||||
uint raw5 = scene[ix + 5];
|
||||
Transform s;
|
||||
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||
return s;
|
||||
}
|
||||
|
||||
uint Element_tag(ElementRef ref) {
|
||||
return scene[ref.offset >> 2];
|
||||
}
|
||||
|
||||
LineSeg Element_StrokeLine_read(ElementRef ref) {
|
||||
return LineSeg_read(LineSegRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
LineSeg Element_FillLine_read(ElementRef ref) {
|
||||
return LineSeg_read(LineSegRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
QuadSeg Element_Quad_read(ElementRef ref) {
|
||||
return QuadSeg_read(QuadSegRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
CubicSeg Element_Cubic_read(ElementRef ref) {
|
||||
return CubicSeg_read(CubicSegRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
Stroke Element_Stroke_read(ElementRef ref) {
|
||||
return Stroke_read(StrokeRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
Fill Element_Fill_read(ElementRef ref) {
|
||||
return Fill_read(FillRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
SetLineWidth Element_SetLineWidth_read(ElementRef ref) {
|
||||
return SetLineWidth_read(SetLineWidthRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
Transform Element_Transform_read(ElementRef ref) {
|
||||
return Transform_read(TransformRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
|
|
|
@ -1,126 +0,0 @@
|
|||
// Code auto-generated by piet-gpu-derive
|
||||
|
||||
struct TileHeaderRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct ItemHeaderRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct SegmentRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct SegChunkRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct TileHeader {
|
||||
uint n;
|
||||
ItemHeaderRef items;
|
||||
};
|
||||
|
||||
#define TileHeader_size 8
|
||||
|
||||
TileHeaderRef TileHeader_index(TileHeaderRef ref, uint index) {
|
||||
return TileHeaderRef(ref.offset + index * TileHeader_size);
|
||||
}
|
||||
|
||||
struct ItemHeader {
|
||||
SegChunkRef segments;
|
||||
};
|
||||
|
||||
#define ItemHeader_size 4
|
||||
|
||||
ItemHeaderRef ItemHeader_index(ItemHeaderRef ref, uint index) {
|
||||
return ItemHeaderRef(ref.offset + index * ItemHeader_size);
|
||||
}
|
||||
|
||||
struct Segment {
|
||||
vec2 start;
|
||||
vec2 end;
|
||||
};
|
||||
|
||||
#define Segment_size 16
|
||||
|
||||
SegmentRef Segment_index(SegmentRef ref, uint index) {
|
||||
return SegmentRef(ref.offset + index * Segment_size);
|
||||
}
|
||||
|
||||
struct SegChunk {
|
||||
uint n;
|
||||
SegChunkRef next;
|
||||
};
|
||||
|
||||
#define SegChunk_size 8
|
||||
|
||||
SegChunkRef SegChunk_index(SegChunkRef ref, uint index) {
|
||||
return SegChunkRef(ref.offset + index * SegChunk_size);
|
||||
}
|
||||
|
||||
TileHeader TileHeader_read(TileHeaderRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = segment[ix + 0];
|
||||
uint raw1 = segment[ix + 1];
|
||||
TileHeader s;
|
||||
s.n = raw0;
|
||||
s.items = ItemHeaderRef(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void TileHeader_write(TileHeaderRef ref, TileHeader s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
segment[ix + 0] = s.n;
|
||||
segment[ix + 1] = s.items.offset;
|
||||
}
|
||||
|
||||
ItemHeader ItemHeader_read(ItemHeaderRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = segment[ix + 0];
|
||||
ItemHeader s;
|
||||
s.segments = SegChunkRef(raw0);
|
||||
return s;
|
||||
}
|
||||
|
||||
void ItemHeader_write(ItemHeaderRef ref, ItemHeader s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
segment[ix + 0] = s.segments.offset;
|
||||
}
|
||||
|
||||
Segment Segment_read(SegmentRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = segment[ix + 0];
|
||||
uint raw1 = segment[ix + 1];
|
||||
uint raw2 = segment[ix + 2];
|
||||
uint raw3 = segment[ix + 3];
|
||||
Segment s;
|
||||
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
return s;
|
||||
}
|
||||
|
||||
void Segment_write(SegmentRef ref, Segment s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
segment[ix + 0] = floatBitsToUint(s.start.x);
|
||||
segment[ix + 1] = floatBitsToUint(s.start.y);
|
||||
segment[ix + 2] = floatBitsToUint(s.end.x);
|
||||
segment[ix + 3] = floatBitsToUint(s.end.y);
|
||||
}
|
||||
|
||||
SegChunk SegChunk_read(SegChunkRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = segment[ix + 0];
|
||||
uint raw1 = segment[ix + 1];
|
||||
SegChunk s;
|
||||
s.n = raw0;
|
||||
s.next = SegChunkRef(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void SegChunk_write(SegChunkRef ref, SegChunk s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
segment[ix + 0] = s.n;
|
||||
segment[ix + 1] = s.next.offset;
|
||||
}
|
||||
|
|
@ -39,4 +39,26 @@
|
|||
|
||||
// Maximum number of segments in a SegChunk
|
||||
#define SEG_CHUNK_N 32
|
||||
#define SEG_CHUNK_ALLOC 512
|
||||
#define SEG_CHUNK_ALLOC 512
|
||||
|
||||
// Stuff for new algorithm follows; some of the above should get
|
||||
// deleted.
|
||||
|
||||
// These should probably be renamed and/or reworked. In the binning
|
||||
// kernel, they represent the number of bins. Also, the workgroup size
|
||||
// of that kernel is equal to the number of bins, but should probably
|
||||
// be more flexible (it's 512 in the K&L paper).
|
||||
#define N_TILE_X 16
|
||||
#define N_TILE_Y 16
|
||||
#define N_TILE (N_TILE_X * N_TILE_Y)
|
||||
#define LG_N_TILE 8
|
||||
#define N_SLICE (N_TILE / 32)
|
||||
// Number of workgroups for binning kernel
|
||||
#define N_WG 16
|
||||
|
||||
// This is the ratio of the number of elements in a binning workgroup
|
||||
// over the number of elements in a partition workgroup.
|
||||
#define ELEMENT_BINNING_RATIO 2
|
||||
|
||||
#define BIN_INITIAL_ALLOC 64
|
||||
#define BIN_ALLOC 256
|
||||
|
|
59
piet-gpu/shader/state.h
Normal file
59
piet-gpu/shader/state.h
Normal file
|
@ -0,0 +1,59 @@
|
|||
// Code auto-generated by piet-gpu-derive
|
||||
|
||||
struct StateRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct State {
|
||||
vec4 mat;
|
||||
vec2 translate;
|
||||
vec4 bbox;
|
||||
float linewidth;
|
||||
uint flags;
|
||||
};
|
||||
|
||||
#define State_size 48
|
||||
|
||||
StateRef State_index(StateRef ref, uint index) {
|
||||
return StateRef(ref.offset + index * State_size);
|
||||
}
|
||||
|
||||
State State_read(StateRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = state[ix + 0];
|
||||
uint raw1 = state[ix + 1];
|
||||
uint raw2 = state[ix + 2];
|
||||
uint raw3 = state[ix + 3];
|
||||
uint raw4 = state[ix + 4];
|
||||
uint raw5 = state[ix + 5];
|
||||
uint raw6 = state[ix + 6];
|
||||
uint raw7 = state[ix + 7];
|
||||
uint raw8 = state[ix + 8];
|
||||
uint raw9 = state[ix + 9];
|
||||
uint raw10 = state[ix + 10];
|
||||
uint raw11 = state[ix + 11];
|
||||
State s;
|
||||
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||
s.bbox = vec4(uintBitsToFloat(raw6), uintBitsToFloat(raw7), uintBitsToFloat(raw8), uintBitsToFloat(raw9));
|
||||
s.linewidth = uintBitsToFloat(raw10);
|
||||
s.flags = raw11;
|
||||
return s;
|
||||
}
|
||||
|
||||
void State_write(StateRef ref, State s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
state[ix + 0] = floatBitsToUint(s.mat.x);
|
||||
state[ix + 1] = floatBitsToUint(s.mat.y);
|
||||
state[ix + 2] = floatBitsToUint(s.mat.z);
|
||||
state[ix + 3] = floatBitsToUint(s.mat.w);
|
||||
state[ix + 4] = floatBitsToUint(s.translate.x);
|
||||
state[ix + 5] = floatBitsToUint(s.translate.y);
|
||||
state[ix + 6] = floatBitsToUint(s.bbox.x);
|
||||
state[ix + 7] = floatBitsToUint(s.bbox.y);
|
||||
state[ix + 8] = floatBitsToUint(s.bbox.z);
|
||||
state[ix + 9] = floatBitsToUint(s.bbox.w);
|
||||
state[ix + 10] = floatBitsToUint(s.linewidth);
|
||||
state[ix + 11] = s.flags;
|
||||
}
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
mod render_ctx;
|
||||
mod pico_svg;
|
||||
mod render_ctx;
|
||||
|
||||
pub use render_ctx::PietGpuRenderContext;
|
||||
|
||||
|
@ -8,6 +8,8 @@ use rand::{Rng, RngCore};
|
|||
use piet::kurbo::{BezPath, Circle, Line, Point, Vec2};
|
||||
use piet::{Color, RenderContext};
|
||||
|
||||
use piet_gpu_types::encoder::Encode;
|
||||
|
||||
use piet_gpu_hal::{CmdBuf, Device, Error, ImageLayout, MemFlags};
|
||||
|
||||
use pico_svg::PicoSvg;
|
||||
|
@ -28,7 +30,20 @@ const PTCL_INITIAL_ALLOC: usize = 1024;
|
|||
|
||||
const K2_PER_TILE_SIZE: usize = 8;
|
||||
|
||||
const N_CIRCLES: usize = 1;
|
||||
const N_CIRCLES: usize = 0;
|
||||
|
||||
const N_WG: u32 = 16;
|
||||
|
||||
pub fn render_svg(rc: &mut impl RenderContext, filename: &str, scale: f64) {
|
||||
let xml_str = std::fs::read_to_string(filename).unwrap();
|
||||
let start = std::time::Instant::now();
|
||||
let svg = PicoSvg::load(&xml_str, scale).unwrap();
|
||||
println!("parsing time: {:?}", start.elapsed());
|
||||
|
||||
let start = std::time::Instant::now();
|
||||
svg.render(rc);
|
||||
println!("flattening and encoding time: {:?}", start.elapsed());
|
||||
}
|
||||
|
||||
pub fn render_scene(rc: &mut impl RenderContext) {
|
||||
let mut rng = rand::thread_rng();
|
||||
|
@ -42,12 +57,14 @@ pub fn render_scene(rc: &mut impl RenderContext) {
|
|||
let circle = Circle::new(center, radius);
|
||||
rc.fill(circle, &color);
|
||||
}
|
||||
/*
|
||||
let mut path = BezPath::new();
|
||||
path.move_to((100.0, 1150.0));
|
||||
path.line_to((200.0, 1200.0));
|
||||
path.line_to((150.0, 1250.0));
|
||||
path.close_path();
|
||||
rc.fill(path, &Color::rgb8(128, 0, 128));
|
||||
*/
|
||||
rc.stroke(
|
||||
Line::new((100.0, 100.0), (200.0, 150.0)),
|
||||
&Color::WHITE,
|
||||
|
@ -59,7 +76,7 @@ pub fn render_scene(rc: &mut impl RenderContext) {
|
|||
|
||||
#[allow(unused)]
|
||||
fn render_cardioid(rc: &mut impl RenderContext) {
|
||||
let n = 91;
|
||||
let n = 601;
|
||||
let dth = std::f64::consts::PI * 2.0 / (n as f64);
|
||||
let center = Point::new(1024.0, 768.0);
|
||||
let r = 750.0;
|
||||
|
@ -67,7 +84,7 @@ fn render_cardioid(rc: &mut impl RenderContext) {
|
|||
for i in 1..n {
|
||||
let p0 = center + Vec2::from_angle(i as f64 * dth) * r;
|
||||
let p1 = center + Vec2::from_angle(((i * 2) % n) as f64 * dth) * r;
|
||||
rc.fill(&Circle::new(p0, 8.0), &Color::WHITE);
|
||||
//rc.fill(&Circle::new(p0, 8.0), &Color::WHITE);
|
||||
path.move_to(p0);
|
||||
path.line_to(p1);
|
||||
//rc.stroke(Line::new(p0, p1), &Color::BLACK, 2.0);
|
||||
|
@ -96,10 +113,10 @@ fn dump_scene(buf: &[u8]) {
|
|||
}
|
||||
|
||||
#[allow(unused)]
|
||||
fn dump_k1_data(k1_buf: &[u32]) {
|
||||
pub fn dump_k1_data(k1_buf: &[u32]) {
|
||||
for i in 0..k1_buf.len() {
|
||||
if k1_buf[i] != 0 {
|
||||
println!("{:4x}: {:8x}", i, k1_buf[i]);
|
||||
println!("{:4x}: {:8x}", i * 4, k1_buf[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -110,27 +127,30 @@ pub struct Renderer<D: Device> {
|
|||
scene_buf: D::Buffer,
|
||||
scene_dev: D::Buffer,
|
||||
|
||||
k1_alloc_buf_host: D::Buffer,
|
||||
k1_alloc_buf_dev: D::Buffer,
|
||||
k2s_alloc_buf_host: D::Buffer,
|
||||
k2s_alloc_buf_dev: D::Buffer,
|
||||
k2f_alloc_buf_host: D::Buffer,
|
||||
k2f_alloc_buf_dev: D::Buffer,
|
||||
k3_alloc_buf_host: D::Buffer,
|
||||
k3_alloc_buf_dev: D::Buffer,
|
||||
tilegroup_buf: D::Buffer,
|
||||
ptcl_buf: D::Buffer,
|
||||
pub state_buf: D::Buffer,
|
||||
pub anno_buf: D::Buffer,
|
||||
pub bin_buf: D::Buffer,
|
||||
pub ptcl_buf: D::Buffer,
|
||||
|
||||
el_pipeline: D::Pipeline,
|
||||
el_ds: D::DescriptorSet,
|
||||
|
||||
bin_pipeline: D::Pipeline,
|
||||
bin_ds: D::DescriptorSet,
|
||||
|
||||
bin_alloc_buf_host: D::Buffer,
|
||||
bin_alloc_buf_dev: D::Buffer,
|
||||
|
||||
coarse_pipeline: D::Pipeline,
|
||||
coarse_ds: D::DescriptorSet,
|
||||
|
||||
coarse_alloc_buf_host: D::Buffer,
|
||||
coarse_alloc_buf_dev: D::Buffer,
|
||||
|
||||
k1_pipeline: D::Pipeline,
|
||||
k1_ds: D::DescriptorSet,
|
||||
k2s_pipeline: D::Pipeline,
|
||||
k2s_ds: D::DescriptorSet,
|
||||
k2f_pipeline: D::Pipeline,
|
||||
k2f_ds: D::DescriptorSet,
|
||||
k3_pipeline: D::Pipeline,
|
||||
k3_ds: D::DescriptorSet,
|
||||
k4_pipeline: D::Pipeline,
|
||||
k4_ds: D::DescriptorSet,
|
||||
|
||||
n_elements: usize,
|
||||
}
|
||||
|
||||
impl<D: Device> Renderer<D> {
|
||||
|
@ -138,6 +158,9 @@ impl<D: Device> Renderer<D> {
|
|||
let host = MemFlags::host_coherent();
|
||||
let dev = MemFlags::device_local();
|
||||
|
||||
let n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size();
|
||||
println!("scene: {} elements", n_elements);
|
||||
|
||||
let scene_buf = device
|
||||
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, host)
|
||||
.unwrap();
|
||||
|
@ -146,174 +169,121 @@ impl<D: Device> Renderer<D> {
|
|||
.unwrap();
|
||||
device.write_buffer(&scene_buf, &scene)?;
|
||||
|
||||
let tilegroup_buf = device.create_buffer(4 * 1024 * 1024, dev)?;
|
||||
let state_buf = device.create_buffer(1 * 1024 * 1024, dev)?;
|
||||
let anno_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
||||
let bin_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
||||
let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev)?;
|
||||
let segment_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
||||
let fill_seg_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
||||
let image_dev = device.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?;
|
||||
|
||||
let k1_alloc_buf_host = device.create_buffer(4, host)?;
|
||||
let k1_alloc_buf_dev = device.create_buffer(4, dev)?;
|
||||
let k1_alloc_start = WIDTH_IN_TILEGROUPS * HEIGHT_IN_TILEGROUPS * TILEGROUP_STRIDE;
|
||||
device.write_buffer(&k1_alloc_buf_host, &[k1_alloc_start as u32])?;
|
||||
let k1_code = include_bytes!("../shader/kernel1.spv");
|
||||
let k1_pipeline = device
|
||||
.create_simple_compute_pipeline(k1_code, 3, 0)?;
|
||||
let k1_ds = device
|
||||
.create_descriptor_set(
|
||||
&k1_pipeline,
|
||||
&[&scene_dev, &tilegroup_buf, &k1_alloc_buf_dev],
|
||||
&[],
|
||||
)?;
|
||||
let el_code = include_bytes!("../shader/elements.spv");
|
||||
let el_pipeline = device.create_simple_compute_pipeline(el_code, 3, 0)?;
|
||||
let el_ds = device.create_descriptor_set(
|
||||
&el_pipeline,
|
||||
&[&scene_dev, &state_buf, &anno_buf],
|
||||
&[],
|
||||
)?;
|
||||
|
||||
let k2s_alloc_buf_host = device.create_buffer(4, host)?;
|
||||
let k2s_alloc_buf_dev = device.create_buffer(4, dev)?;
|
||||
let k2s_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * K2_PER_TILE_SIZE;
|
||||
device
|
||||
.write_buffer(&k2s_alloc_buf_host, &[k2s_alloc_start as u32])
|
||||
?;
|
||||
let k2s_code = include_bytes!("../shader/kernel2s.spv");
|
||||
let k2s_pipeline = device
|
||||
.create_simple_compute_pipeline(k2s_code, 4, 0)
|
||||
?;
|
||||
let k2s_ds = device
|
||||
.create_descriptor_set(
|
||||
&k2s_pipeline,
|
||||
&[&scene_dev, &tilegroup_buf, &segment_buf, &k2s_alloc_buf_dev],
|
||||
&[],
|
||||
)
|
||||
?;
|
||||
let bin_alloc_buf_host = device.create_buffer(12, host)?;
|
||||
let bin_alloc_buf_dev = device.create_buffer(12, dev)?;
|
||||
|
||||
let k2f_alloc_buf_host = device.create_buffer(4, host)?;
|
||||
let k2f_alloc_buf_dev = device.create_buffer(4, dev)?;
|
||||
let k2f_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * K2_PER_TILE_SIZE;
|
||||
device
|
||||
.write_buffer(&k2f_alloc_buf_host, &[k2f_alloc_start as u32])
|
||||
?;
|
||||
let k2f_code = include_bytes!("../shader/kernel2f.spv");
|
||||
let k2f_pipeline = device.create_simple_compute_pipeline(k2f_code, 4, 0)?;
|
||||
let k2f_ds = device
|
||||
.create_descriptor_set(
|
||||
&k2f_pipeline,
|
||||
&[
|
||||
&scene_dev,
|
||||
&tilegroup_buf,
|
||||
&fill_seg_buf,
|
||||
&k2f_alloc_buf_dev,
|
||||
],
|
||||
&[],
|
||||
)
|
||||
?;
|
||||
// TODO: constants
|
||||
let bin_alloc_start = ((n_elements + 255) & !255) * 8;
|
||||
device.write_buffer(
|
||||
&bin_alloc_buf_host,
|
||||
&[n_elements as u32, 0, bin_alloc_start as u32],
|
||||
)?;
|
||||
let bin_code = include_bytes!("../shader/binning.spv");
|
||||
let bin_pipeline = device.create_simple_compute_pipeline(bin_code, 4, 0)?;
|
||||
let bin_ds = device.create_descriptor_set(
|
||||
&bin_pipeline,
|
||||
&[&anno_buf, &state_buf, &bin_alloc_buf_dev, &bin_buf],
|
||||
&[],
|
||||
)?;
|
||||
|
||||
let k3_alloc_buf_host = device.create_buffer(4, host)?;
|
||||
let k3_alloc_buf_dev = device.create_buffer(4, dev)?;
|
||||
let k3_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
|
||||
device
|
||||
.write_buffer(&k3_alloc_buf_host, &[k3_alloc_start as u32])
|
||||
?;
|
||||
let k3_code = include_bytes!("../shader/kernel3.spv");
|
||||
let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 6, 0)?;
|
||||
let k3_ds = device
|
||||
.create_descriptor_set(
|
||||
&k3_pipeline,
|
||||
&[
|
||||
&scene_dev,
|
||||
&tilegroup_buf,
|
||||
&segment_buf,
|
||||
&fill_seg_buf,
|
||||
&ptcl_buf,
|
||||
&k3_alloc_buf_dev,
|
||||
],
|
||||
&[],
|
||||
)
|
||||
?;
|
||||
let coarse_alloc_buf_host = device.create_buffer(8, host)?;
|
||||
let coarse_alloc_buf_dev = device.create_buffer(8, dev)?;
|
||||
|
||||
let coarse_alloc_start = WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
|
||||
device.write_buffer(
|
||||
&coarse_alloc_buf_host,
|
||||
&[n_elements as u32, coarse_alloc_start as u32],
|
||||
)?;
|
||||
let coarse_code = include_bytes!("../shader/coarse.spv");
|
||||
let coarse_pipeline = device.create_simple_compute_pipeline(coarse_code, 4, 0)?;
|
||||
let coarse_ds = device.create_descriptor_set(
|
||||
&coarse_pipeline,
|
||||
&[&anno_buf, &bin_buf, &coarse_alloc_buf_dev, &ptcl_buf],
|
||||
&[],
|
||||
)?;
|
||||
|
||||
let k4_code = include_bytes!("../shader/kernel4.spv");
|
||||
let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 3, 1)?;
|
||||
let k4_ds = device
|
||||
.create_descriptor_set(&k4_pipeline, &[&ptcl_buf, &segment_buf, &fill_seg_buf], &[&image_dev])
|
||||
?;
|
||||
let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 1, 1)?;
|
||||
let k4_ds = device.create_descriptor_set(&k4_pipeline, &[&ptcl_buf], &[&image_dev])?;
|
||||
|
||||
Ok(Renderer {
|
||||
scene_buf,
|
||||
scene_dev,
|
||||
image_dev,
|
||||
k1_alloc_buf_host,
|
||||
k1_alloc_buf_dev,
|
||||
k2s_alloc_buf_host,
|
||||
k2s_alloc_buf_dev,
|
||||
k2f_alloc_buf_host,
|
||||
k2f_alloc_buf_dev,
|
||||
k3_alloc_buf_host,
|
||||
k3_alloc_buf_dev,
|
||||
tilegroup_buf,
|
||||
ptcl_buf,
|
||||
k1_pipeline,
|
||||
k1_ds,
|
||||
k2s_pipeline,
|
||||
k2s_ds,
|
||||
k2f_pipeline,
|
||||
k2f_ds,
|
||||
k3_pipeline,
|
||||
k3_ds,
|
||||
el_pipeline,
|
||||
el_ds,
|
||||
bin_pipeline,
|
||||
bin_ds,
|
||||
coarse_pipeline,
|
||||
coarse_ds,
|
||||
k4_pipeline,
|
||||
k4_ds,
|
||||
state_buf,
|
||||
anno_buf,
|
||||
bin_buf,
|
||||
ptcl_buf,
|
||||
bin_alloc_buf_host,
|
||||
bin_alloc_buf_dev,
|
||||
coarse_alloc_buf_host,
|
||||
coarse_alloc_buf_dev,
|
||||
n_elements,
|
||||
})
|
||||
}
|
||||
|
||||
pub unsafe fn record(&self, cmd_buf: &mut impl CmdBuf<D>, query_pool: &D::QueryPool) {
|
||||
cmd_buf.copy_buffer(&self.scene_buf, &self.scene_dev);
|
||||
// Note: we could use one alloc buf and reuse it. But we'll stick with
|
||||
// multiple ones for clarity.
|
||||
cmd_buf.copy_buffer(&self.k1_alloc_buf_host, &self.k1_alloc_buf_dev);
|
||||
cmd_buf.copy_buffer(&self.k2s_alloc_buf_host, &self.k2s_alloc_buf_dev);
|
||||
cmd_buf.copy_buffer(&self.k2f_alloc_buf_host, &self.k2f_alloc_buf_dev);
|
||||
cmd_buf.copy_buffer(&self.k3_alloc_buf_host, &self.k3_alloc_buf_dev);
|
||||
// Note: these clears aren't necessary, and are here to make inspection
|
||||
// of the buffers cleaner. Can likely be removed.
|
||||
cmd_buf.clear_buffer(&self.tilegroup_buf);
|
||||
cmd_buf.clear_buffer(&self.ptcl_buf);
|
||||
cmd_buf.copy_buffer(&self.bin_alloc_buf_host, &self.bin_alloc_buf_dev);
|
||||
cmd_buf.copy_buffer(&self.coarse_alloc_buf_host, &self.coarse_alloc_buf_dev);
|
||||
cmd_buf.clear_buffer(&self.state_buf);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.image_barrier(&self.image_dev, ImageLayout::Undefined, ImageLayout::General);
|
||||
cmd_buf.image_barrier(
|
||||
&self.image_dev,
|
||||
ImageLayout::Undefined,
|
||||
ImageLayout::General,
|
||||
);
|
||||
cmd_buf.reset_query_pool(&query_pool);
|
||||
cmd_buf.write_timestamp(&query_pool, 0);
|
||||
cmd_buf.dispatch(
|
||||
&self.k1_pipeline,
|
||||
&self.k1_ds,
|
||||
((WIDTH / 512) as u32, (HEIGHT / 512) as u32, 1),
|
||||
&self.el_pipeline,
|
||||
&self.el_ds,
|
||||
(((self.n_elements + 127) / 128) as u32, 1, 1),
|
||||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 1);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.dispatch(
|
||||
&self.k2s_pipeline,
|
||||
&self.k2s_ds,
|
||||
((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1),
|
||||
&self.bin_pipeline,
|
||||
&self.bin_ds,
|
||||
(((self.n_elements + 255) / 256) as u32, 1, 1),
|
||||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 2);
|
||||
// Note: this barrier is not necessary (k2f does not depend on
|
||||
// k2s output), but I'm keeping it here to increase transparency
|
||||
// of performance.
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.dispatch(
|
||||
&self.k2f_pipeline,
|
||||
&self.k2f_ds,
|
||||
((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 2),
|
||||
&self.coarse_pipeline,
|
||||
&self.coarse_ds,
|
||||
(WIDTH as u32 / 256, HEIGHT as u32 / 256, 1),
|
||||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 3);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.dispatch(
|
||||
&self.k3_pipeline,
|
||||
&self.k3_ds,
|
||||
((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 3),
|
||||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 4);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.dispatch(
|
||||
&self.k4_pipeline,
|
||||
&self.k4_ds,
|
||||
((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
|
||||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 5);
|
||||
cmd_buf.write_timestamp(&query_pool, 4);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc);
|
||||
}
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
use std::str::FromStr;
|
||||
|
||||
use roxmltree::Document;
|
||||
use roxmltree::{Document, Node};
|
||||
|
||||
use piet::kurbo::{Affine, BezPath};
|
||||
|
||||
|
@ -28,27 +28,19 @@ pub struct FillItem {
|
|||
path: BezPath,
|
||||
}
|
||||
|
||||
struct Parser<'a> {
|
||||
scale: f64,
|
||||
items: &'a mut Vec<Item>,
|
||||
}
|
||||
|
||||
impl PicoSvg {
|
||||
pub fn load(xml_string: &str, scale: f64) -> Result<PicoSvg, Box<dyn std::error::Error>> {
|
||||
let doc = Document::parse(xml_string)?;
|
||||
let root = doc.root_element();
|
||||
let g = root.first_element_child().ok_or("no root element")?;
|
||||
let mut items = Vec::new();
|
||||
for el in g.children() {
|
||||
if el.is_element() {
|
||||
let d = el.attribute("d").ok_or("missing 'd' attribute")?;
|
||||
let bp = BezPath::from_svg(d)?;
|
||||
let path = Affine::scale(scale) * bp;
|
||||
if let Some(fill_color) = el.attribute("fill") {
|
||||
let color = parse_color(fill_color);
|
||||
items.push(Item::Fill(FillItem { color, path: path.clone() }));
|
||||
}
|
||||
if let Some(stroke_color) = el.attribute("stroke") {
|
||||
let width = f64::from_str(el.attribute("stroke-width").ok_or("missing width")?)?;
|
||||
let color = parse_color(stroke_color);
|
||||
items.push(Item::Stroke(StrokeItem { width, color, path }));
|
||||
}
|
||||
}
|
||||
let mut parser = Parser::new(&mut items, scale);
|
||||
for node in root.children() {
|
||||
parser.rec_parse(node)?;
|
||||
}
|
||||
Ok(PicoSvg { items })
|
||||
}
|
||||
|
@ -58,6 +50,7 @@ impl PicoSvg {
|
|||
match item {
|
||||
Item::Fill(fill_item) => {
|
||||
rc.fill(&fill_item.path, &fill_item.color);
|
||||
//rc.stroke(&fill_item.path, &fill_item.color, 1.0);
|
||||
}
|
||||
Item::Stroke(stroke_item) => {
|
||||
rc.stroke(&stroke_item.path, &stroke_item.color, stroke_item.width);
|
||||
|
@ -67,6 +60,59 @@ impl PicoSvg {
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
fn new(items: &'a mut Vec<Item>, scale: f64) -> Parser<'a> {
|
||||
Parser { scale, items }
|
||||
}
|
||||
|
||||
fn rec_parse(&mut self, node: Node) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let transform = if self.scale >= 0.0 {
|
||||
Affine::scale(self.scale)
|
||||
} else {
|
||||
Affine::new([-self.scale, 0.0, 0.0, self.scale, 0.0, 1536.0])
|
||||
};
|
||||
if node.is_element() {
|
||||
match node.tag_name().name() {
|
||||
"g" => {
|
||||
for child in node.children() {
|
||||
self.rec_parse(child)?;
|
||||
}
|
||||
}
|
||||
"path" => {
|
||||
let d = node.attribute("d").ok_or("missing 'd' attribute")?;
|
||||
let bp = BezPath::from_svg(d)?;
|
||||
let path = transform * bp;
|
||||
// TODO: default fill color is black, but this is overridden in tiger to this logic.
|
||||
if let Some(fill_color) = node.attribute("fill") {
|
||||
if fill_color != "none" {
|
||||
let color = parse_color(fill_color);
|
||||
let color = modify_opacity(color, "fill-opacity", node);
|
||||
self.items.push(Item::Fill(FillItem {
|
||||
color,
|
||||
path: path.clone(),
|
||||
}));
|
||||
}
|
||||
}
|
||||
if let Some(stroke_color) = node.attribute("stroke") {
|
||||
if stroke_color != "none" {
|
||||
let width = self.scale.abs()
|
||||
* f64::from_str(
|
||||
node.attribute("stroke-width").ok_or("missing width")?,
|
||||
)?;
|
||||
let color = parse_color(stroke_color);
|
||||
let color = modify_opacity(color, "stroke-opacity", node);
|
||||
self.items
|
||||
.push(Item::Stroke(StrokeItem { width, color, path }));
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_color(color: &str) -> Color {
|
||||
if color.as_bytes()[0] == b'#' {
|
||||
let mut hex = u32::from_str_radix(&color[1..], 16).unwrap();
|
||||
|
@ -74,7 +120,27 @@ fn parse_color(color: &str) -> Color {
|
|||
hex = (hex >> 8) * 0x110000 + ((hex >> 4) & 0xf) * 0x1100 + (hex & 0xf) * 0x11;
|
||||
}
|
||||
Color::from_rgba32_u32((hex << 8) + 0xff)
|
||||
} else if color.starts_with("rgb(") {
|
||||
let mut iter = color[4..color.len() - 1].split(',');
|
||||
let r = u8::from_str(iter.next().unwrap()).unwrap();
|
||||
let g = u8::from_str(iter.next().unwrap()).unwrap();
|
||||
let b = u8::from_str(iter.next().unwrap()).unwrap();
|
||||
Color::rgb8(r, g, b)
|
||||
} else {
|
||||
Color::from_rgba32_u32(0xff00ff80)
|
||||
}
|
||||
}
|
||||
|
||||
fn modify_opacity(color: Color, attr_name: &str, node: Node) -> Color {
|
||||
if let Some(opacity) = node.attribute(attr_name) {
|
||||
let alpha = if opacity.ends_with("%") {
|
||||
let pctg = opacity[..opacity.len() - 1].parse().unwrap_or(100.0);
|
||||
pctg * 0.01
|
||||
} else {
|
||||
opacity.parse().unwrap_or(1.0)
|
||||
};
|
||||
color.with_alpha(alpha)
|
||||
} else {
|
||||
color
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,7 +2,11 @@ use std::borrow::Cow;
|
|||
|
||||
use piet_gpu_types::encoder::{Encode, Encoder, Ref};
|
||||
use piet_gpu_types::scene;
|
||||
use piet_gpu_types::scene::{Bbox, PietCircle, PietFill, PietItem, PietStrokePolyLine, SimpleGroup};
|
||||
use piet_gpu_types::scene::{
|
||||
Bbox, PietCircle, PietFill, PietItem, PietStrokePolyLine, SimpleGroup,
|
||||
};
|
||||
|
||||
use piet_gpu_types::scene::{CubicSeg, Element, Fill, LineSeg, QuadSeg, SetLineWidth, Stroke};
|
||||
|
||||
use piet::kurbo::{Affine, PathEl, Point, Rect, Shape};
|
||||
|
||||
|
@ -27,10 +31,10 @@ pub struct PietGpuText;
|
|||
|
||||
pub struct PietGpuRenderContext {
|
||||
encoder: Encoder,
|
||||
bboxes: Vec<Bbox>,
|
||||
items: Vec<PietItem>,
|
||||
elements: Vec<Element>,
|
||||
// Will probably need direct accesss to hal Device to create images etc.
|
||||
inner_text: PietGpuText,
|
||||
stroke_width: f32,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
|
@ -43,47 +47,22 @@ const TOLERANCE: f64 = 0.25;
|
|||
|
||||
impl PietGpuRenderContext {
|
||||
pub fn new() -> PietGpuRenderContext {
|
||||
let mut encoder = Encoder::new();
|
||||
let _reserve_root = encoder.alloc_chunk(PietItem::fixed_size() as u32);
|
||||
let bboxes = Vec::new();
|
||||
let items = Vec::new();
|
||||
let encoder = Encoder::new();
|
||||
let elements = Vec::new();
|
||||
let inner_text = PietGpuText;
|
||||
let stroke_width = 0.0;
|
||||
PietGpuRenderContext {
|
||||
encoder,
|
||||
bboxes,
|
||||
items,
|
||||
elements,
|
||||
inner_text,
|
||||
stroke_width,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_scene_buf(&mut self) -> &[u8] {
|
||||
let n_items = self.bboxes.len() as u32;
|
||||
let bboxes = self.bboxes.encode(&mut self.encoder).transmute();
|
||||
let items = self.items.encode(&mut self.encoder).transmute();
|
||||
let offset = scene::Point { xy: [0.0, 0.0] };
|
||||
let simple_group = SimpleGroup {
|
||||
n_items,
|
||||
bboxes,
|
||||
items,
|
||||
offset,
|
||||
};
|
||||
let root_item = PietItem::Group(simple_group);
|
||||
root_item.encode_to(&mut self.encoder.buf_mut()[0..PietItem::fixed_size()]);
|
||||
self.elements.encode(&mut self.encoder);
|
||||
self.encoder.buf()
|
||||
}
|
||||
|
||||
fn push_item(&mut self, item: PietItem, bbox: Rect) {
|
||||
let scene_bbox = Bbox {
|
||||
bbox: [
|
||||
bbox.x0.floor() as i16,
|
||||
bbox.y0.floor() as i16,
|
||||
bbox.x1.ceil() as i16,
|
||||
bbox.y1.ceil() as i16,
|
||||
],
|
||||
};
|
||||
self.items.push(item);
|
||||
self.bboxes.push(scene_bbox);
|
||||
}
|
||||
}
|
||||
|
||||
impl RenderContext for PietGpuRenderContext {
|
||||
|
@ -107,20 +86,19 @@ impl RenderContext for PietGpuRenderContext {
|
|||
fn clear(&mut self, _color: Color) {}
|
||||
|
||||
fn stroke(&mut self, shape: impl Shape, brush: &impl IntoBrush<Self>, width: f64) {
|
||||
let bbox = shape.bounding_box();
|
||||
let brush = brush.make_brush(self, || bbox).into_owned();
|
||||
let width = width as f32;
|
||||
if self.stroke_width != width {
|
||||
self.elements
|
||||
.push(Element::SetLineWidth(SetLineWidth { width }));
|
||||
self.stroke_width = width;
|
||||
}
|
||||
let brush = brush.make_brush(self, || shape.bounding_box()).into_owned();
|
||||
let path = shape.to_bez_path(TOLERANCE);
|
||||
let (n_points, points) = flatten_shape(&mut self.encoder, path);
|
||||
self.encode_path(path, false);
|
||||
match brush {
|
||||
PietGpuBrush::Solid(rgba_color) => {
|
||||
let poly_line = PietStrokePolyLine {
|
||||
rgba_color,
|
||||
width: width as f32,
|
||||
n_points,
|
||||
points,
|
||||
};
|
||||
let bbox = bbox.inset(-0.5 * width);
|
||||
self.push_item(PietItem::Poly(poly_line), bbox);
|
||||
let stroke = Stroke { rgba_color };
|
||||
self.elements.push(Element::Stroke(stroke));
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
@ -136,35 +114,13 @@ impl RenderContext for PietGpuRenderContext {
|
|||
}
|
||||
|
||||
fn fill(&mut self, shape: impl Shape, brush: &impl IntoBrush<Self>) {
|
||||
let bbox = shape.bounding_box();
|
||||
let brush = brush.make_brush(self, || shape.bounding_box()).into_owned();
|
||||
|
||||
if let Some(circle) = shape.as_circle() {
|
||||
match brush {
|
||||
PietGpuBrush::Solid(rgba_color) => {
|
||||
let piet_circle = PietCircle {
|
||||
rgba_color,
|
||||
center: to_scene_point(circle.center),
|
||||
radius: circle.radius as f32,
|
||||
};
|
||||
let bbox = circle.bounding_box();
|
||||
self.push_item(PietItem::Circle(piet_circle), bbox);
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
return;
|
||||
}
|
||||
let path = shape.to_bez_path(TOLERANCE);
|
||||
let (n_points, points) = flatten_shape(&mut self.encoder, path);
|
||||
self.encode_path(path, true);
|
||||
match brush {
|
||||
PietGpuBrush::Solid(rgba_color) => {
|
||||
let fill = PietFill {
|
||||
flags: 0,
|
||||
rgba_color,
|
||||
n_points,
|
||||
points,
|
||||
};
|
||||
self.push_item(PietItem::Fill(fill), bbox);
|
||||
let fill = Fill { rgba_color };
|
||||
self.elements.push(Element::Fill(fill));
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
@ -241,45 +197,110 @@ impl RenderContext for PietGpuRenderContext {
|
|||
}
|
||||
}
|
||||
|
||||
fn flatten_shape(
|
||||
encoder: &mut Encoder,
|
||||
path: impl Iterator<Item = PathEl>,
|
||||
) -> (u32, Ref<scene::Point>) {
|
||||
let mut points = Vec::new();
|
||||
let mut start_pt = None;
|
||||
let mut last_pt = None;
|
||||
piet::kurbo::flatten(path, TOLERANCE, |el| {
|
||||
match el {
|
||||
PathEl::MoveTo(p) => {
|
||||
let scene_pt = to_scene_point(p);
|
||||
start_pt = Some(clone_scene_pt(&scene_pt));
|
||||
if !points.is_empty() {
|
||||
points.push(scene::Point {
|
||||
xy: [std::f32::NAN, std::f32::NAN],
|
||||
});
|
||||
impl PietGpuRenderContext {
|
||||
fn encode_line_seg(&mut self, seg: LineSeg, is_fill: bool) {
|
||||
if is_fill {
|
||||
self.elements.push(Element::FillLine(seg));
|
||||
} else {
|
||||
self.elements.push(Element::StrokeLine(seg));
|
||||
}
|
||||
}
|
||||
|
||||
fn encode_path(&mut self, path: impl Iterator<Item = PathEl>, is_fill: bool) {
|
||||
let flatten = true;
|
||||
if flatten {
|
||||
let mut start_pt = None;
|
||||
let mut last_pt = None;
|
||||
piet::kurbo::flatten(path, TOLERANCE, |el| {
|
||||
match el {
|
||||
PathEl::MoveTo(p) => {
|
||||
let scene_pt = to_f32_2(p);
|
||||
start_pt = Some(scene_pt);
|
||||
last_pt = Some(scene_pt);
|
||||
}
|
||||
PathEl::LineTo(p) => {
|
||||
let scene_pt = to_f32_2(p);
|
||||
let seg = LineSeg {
|
||||
p0: last_pt.unwrap(),
|
||||
p1: scene_pt,
|
||||
};
|
||||
self.encode_line_seg(seg, is_fill);
|
||||
last_pt = Some(scene_pt);
|
||||
}
|
||||
PathEl::ClosePath => {
|
||||
if let (Some(start), Some(last)) = (start_pt.take(), last_pt.take()) {
|
||||
if last != start {
|
||||
let seg = LineSeg {
|
||||
p0: last,
|
||||
p1: start,
|
||||
};
|
||||
self.encode_line_seg(seg, is_fill);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
last_pt = Some(clone_scene_pt(&scene_pt));
|
||||
points.push(scene_pt);
|
||||
}
|
||||
PathEl::LineTo(p) => {
|
||||
let scene_pt = to_scene_point(p);
|
||||
last_pt = Some(clone_scene_pt(&scene_pt));
|
||||
points.push(scene_pt);
|
||||
}
|
||||
PathEl::ClosePath => {
|
||||
if let (Some(start), Some(last)) = (start_pt.take(), last_pt.take()) {
|
||||
if start.xy != last.xy {
|
||||
points.push(start);
|
||||
//println!("{:?}", el);
|
||||
});
|
||||
} else {
|
||||
let mut start_pt = None;
|
||||
let mut last_pt = None;
|
||||
for el in path {
|
||||
match el {
|
||||
PathEl::MoveTo(p) => {
|
||||
let scene_pt = to_f32_2(p);
|
||||
start_pt = Some(scene_pt);
|
||||
last_pt = Some(scene_pt);
|
||||
}
|
||||
PathEl::LineTo(p) => {
|
||||
let scene_pt = to_f32_2(p);
|
||||
let seg = LineSeg {
|
||||
p0: last_pt.unwrap(),
|
||||
p1: scene_pt,
|
||||
};
|
||||
self.encode_line_seg(seg, is_fill);
|
||||
last_pt = Some(scene_pt);
|
||||
}
|
||||
PathEl::QuadTo(p1, p2) => {
|
||||
let scene_p1 = to_f32_2(p1);
|
||||
let scene_p2 = to_f32_2(p2);
|
||||
let seg = QuadSeg {
|
||||
p0: last_pt.unwrap(),
|
||||
p1: scene_p1,
|
||||
p2: scene_p2,
|
||||
};
|
||||
self.elements.push(Element::Quad(seg));
|
||||
last_pt = Some(scene_p2);
|
||||
}
|
||||
PathEl::CurveTo(p1, p2, p3) => {
|
||||
let scene_p1 = to_f32_2(p1);
|
||||
let scene_p2 = to_f32_2(p2);
|
||||
let scene_p3 = to_f32_2(p3);
|
||||
let seg = CubicSeg {
|
||||
p0: last_pt.unwrap(),
|
||||
p1: scene_p1,
|
||||
p2: scene_p2,
|
||||
p3: scene_p3,
|
||||
};
|
||||
self.elements.push(Element::Cubic(seg));
|
||||
last_pt = Some(scene_p3);
|
||||
}
|
||||
PathEl::ClosePath => {
|
||||
if let (Some(start), Some(last)) = (start_pt.take(), last_pt.take()) {
|
||||
if last != start {
|
||||
let seg = LineSeg {
|
||||
p0: last,
|
||||
p1: start,
|
||||
};
|
||||
self.encode_line_seg(seg, is_fill);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
//println!("{:?}", el);
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
//println!("{:?}", el);
|
||||
});
|
||||
let n_points = points.len() as u32;
|
||||
let points_ref = points.encode(encoder).transmute();
|
||||
(n_points, points_ref)
|
||||
}
|
||||
}
|
||||
|
||||
impl Text for PietGpuText {
|
||||
|
@ -360,13 +381,6 @@ impl IntoBrush<PietGpuRenderContext> for PietGpuBrush {
|
|||
}
|
||||
}
|
||||
|
||||
fn to_scene_point(point: Point) -> scene::Point {
|
||||
scene::Point {
|
||||
xy: [point.x as f32, point.y as f32],
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: allow #[derive(Clone)] in piet-gpu-derive.
|
||||
fn clone_scene_pt(p: &scene::Point) -> scene::Point {
|
||||
scene::Point { xy: p.xy }
|
||||
fn to_f32_2(point: Point) -> [f32; 2] {
|
||||
[point.x as f32, point.y as f32]
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue