Experiment with new sorting scheme

Path segments are unsorted, but other elements are using the same
sort-middle approach as before.

This is a checkpoint. At this point, there are unoptimized versions
of tile init and coarse path raster, but it isn't wired up into a
working pipeline. Also observing about a 3x performance regression in
element processing, which needs to be investigated.
This commit is contained in:
Raph Levien 2020-06-02 17:10:20 -07:00
parent f3cb904f86
commit 294f6fd1db
24 changed files with 657 additions and 33 deletions

View file

@ -3,9 +3,11 @@ use piet_gpu_derive::piet_gpu;
piet_gpu! { piet_gpu! {
#[gpu_write] #[gpu_write]
mod annotated { mod annotated {
// Note: path segments have moved to pathseg, delete these.
struct AnnoFillLineSeg { struct AnnoFillLineSeg {
p0: [f32; 2], p0: [f32; 2],
p1: [f32; 2], p1: [f32; 2],
path_ix: u32,
// A note: the layout of this struct is shared with // A note: the layout of this struct is shared with
// AnnoStrokeLineSeg. In that case, we actually write // AnnoStrokeLineSeg. In that case, we actually write
// [0.0, 0.0] as the stroke field, to minimize divergence. // [0.0, 0.0] as the stroke field, to minimize divergence.
@ -13,6 +15,7 @@ piet_gpu! {
struct AnnoStrokeLineSeg { struct AnnoStrokeLineSeg {
p0: [f32; 2], p0: [f32; 2],
p1: [f32; 2], p1: [f32; 2],
path_ix: u32,
// halfwidth in both x and y for binning // halfwidth in both x and y for binning
stroke: [f32; 2], stroke: [f32; 2],
} }

View file

@ -3,8 +3,10 @@
pub mod annotated; pub mod annotated;
pub mod bins; pub mod bins;
pub mod encoder; pub mod encoder;
pub mod pathseg;
pub mod ptcl; pub mod ptcl;
pub mod scene; pub mod scene;
pub mod state; pub mod state;
pub mod test; pub mod test;
pub mod tile;
pub mod tilegroup; pub mod tilegroup;

View file

@ -7,7 +7,9 @@ fn main() {
"scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()), "scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()),
"state" => print!("{}", piet_gpu_types::state::gen_gpu_state()), "state" => print!("{}", piet_gpu_types::state::gen_gpu_state()),
"annotated" => print!("{}", piet_gpu_types::annotated::gen_gpu_annotated()), "annotated" => print!("{}", piet_gpu_types::annotated::gen_gpu_annotated()),
"pathseg" => print!("{}", piet_gpu_types::pathseg::gen_gpu_pathseg()),
"bins" => print!("{}", piet_gpu_types::bins::gen_gpu_bins()), "bins" => print!("{}", piet_gpu_types::bins::gen_gpu_bins()),
"tile" => print!("{}", piet_gpu_types::tile::gen_gpu_tile()),
"tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()), "tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()),
"ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()), "ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()),
"test" => print!("{}", piet_gpu_types::test::gen_gpu_test()), "test" => print!("{}", piet_gpu_types::test::gen_gpu_test()),

View file

@ -0,0 +1,46 @@
use piet_gpu_derive::piet_gpu;
piet_gpu! {
#[gpu_write]
mod pathseg {
struct PathFillLine {
p0: [f32; 2],
p1: [f32; 2],
path_ix: u32,
// A note: the layout of this struct is shared with
// PathStrokeLine. In that case, we actually write
// [0.0, 0.0] as the stroke field, to minimize divergence.
}
struct PathStrokeLine {
p0: [f32; 2],
p1: [f32; 2],
path_ix: u32,
// halfwidth in both x and y for binning
stroke: [f32; 2],
}
/*
struct PathQuad {
p0: [f32; 2],
p1: [f32; 2],
p2: [f32; 2],
stroke: [f32; 2],
}
struct PathCubic {
p0: [f32; 2],
p1: [f32; 2],
p2: [f32; 2],
p3: [f32; 2],
stroke: [f32; 2],
}
*/
enum PathSeg {
Nop,
FillLine(PathFillLine),
StrokeLine(PathStrokeLine),
/*
Quad(AnnoQuadSeg),
Cubic(AnnoCubicSeg),
*/
}
}
}

View file

@ -9,6 +9,8 @@ piet_gpu! {
bbox: [f32; 4], bbox: [f32; 4],
linewidth: f32, linewidth: f32,
flags: u32, flags: u32,
path_count: u32,
pathseg_count: u32,
} }
} }
} }

View file

@ -0,0 +1,21 @@
use piet_gpu_derive::piet_gpu;
piet_gpu! {
#[gpu_write]
mod tile {
struct Path {
bbox: [u16; 4],
tiles: Ref<Tile>,
}
struct Tile {
tile: Ref<TileSeg>,
backdrop: i32,
}
// Segments within a tile are represented as a linked list.
struct TileSeg {
start: [f32; 2],
end: [f32; 2],
next: Ref<TileSeg>,
}
}
}

View file

@ -185,10 +185,12 @@ fn main() -> Result<(), Error> {
} else { } else {
render_scene(&mut ctx); render_scene(&mut ctx);
} }
let n_paths = ctx.path_count();
let n_pathseg = ctx.pathseg_count();
let scene = ctx.get_scene_buf(); let scene = ctx.get_scene_buf();
//dump_scene(&scene); //dump_scene(&scene);
let renderer = Renderer::new(&device, scene)?; let renderer = Renderer::new(&device, scene, n_paths, n_pathseg)?;
let image_buf = let image_buf =
device.create_buffer((WIDTH * HEIGHT * 4) as u64, MemFlags::host_coherent())?; device.create_buffer((WIDTH * HEIGHT * 4) as u64, MemFlags::host_coherent())?;
@ -200,16 +202,16 @@ fn main() -> Result<(), Error> {
device.wait_and_reset(&[fence])?; device.wait_and_reset(&[fence])?;
let ts = device.reap_query_pool(&query_pool).unwrap(); let ts = device.reap_query_pool(&query_pool).unwrap();
println!("Element kernel time: {:.3}ms", ts[0] * 1e3); println!("Element kernel time: {:.3}ms", ts[0] * 1e3);
println!("Binning kernel time: {:.3}ms", (ts[1] - ts[0]) * 1e3); println!("Tile allocation kernel time: {:.3}ms", (ts[1] - ts[0]) * 1e3);
println!("Coarse kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3); println!("Coarse path kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3);
println!("Render kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
/* /*
println!("Render kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
*/
let mut data: Vec<u32> = Default::default(); let mut data: Vec<u32> = Default::default();
device.read_buffer(&renderer.ptcl_buf, &mut data).unwrap(); device.read_buffer(&renderer.tile_buf, &mut data).unwrap();
piet_gpu::dump_k1_data(&data); piet_gpu::dump_k1_data(&data);
//trace_ptcl(&data); //trace_ptcl(&data);
*/
let mut img_data: Vec<u8> = Default::default(); let mut img_data: Vec<u8> = Default::default();
// Note: because png can use a `&[u8]` slice, we could avoid an extra copy // Note: because png can use a `&[u8]` slice, we could avoid an extra copy

View file

@ -42,9 +42,11 @@ fn main() -> Result<(), Error> {
let mut ctx = PietGpuRenderContext::new(); let mut ctx = PietGpuRenderContext::new();
render_scene(&mut ctx); render_scene(&mut ctx);
let n_paths = ctx.path_count();
let n_pathseg = ctx.pathseg_count();
let scene = ctx.get_scene_buf(); let scene = ctx.get_scene_buf();
let renderer = Renderer::new(&device, scene)?; let renderer = Renderer::new(&device, scene, n_paths, n_pathseg)?;
event_loop.run(move |event, _, control_flow| { event_loop.run(move |event, _, control_flow| {
*control_flow = ControlFlow::Poll; // `ControlFlow::Wait` if only re-render on event *control_flow = ControlFlow::Poll; // `ControlFlow::Wait` if only re-render on event

View file

@ -31,9 +31,10 @@ struct AnnotatedRef {
struct AnnoFillLineSeg { struct AnnoFillLineSeg {
vec2 p0; vec2 p0;
vec2 p1; vec2 p1;
uint path_ix;
}; };
#define AnnoFillLineSeg_size 16 #define AnnoFillLineSeg_size 20
AnnoFillLineSegRef AnnoFillLineSeg_index(AnnoFillLineSegRef ref, uint index) { AnnoFillLineSegRef AnnoFillLineSeg_index(AnnoFillLineSegRef ref, uint index) {
return AnnoFillLineSegRef(ref.offset + index * AnnoFillLineSeg_size); return AnnoFillLineSegRef(ref.offset + index * AnnoFillLineSeg_size);
@ -42,10 +43,11 @@ AnnoFillLineSegRef AnnoFillLineSeg_index(AnnoFillLineSegRef ref, uint index) {
struct AnnoStrokeLineSeg { struct AnnoStrokeLineSeg {
vec2 p0; vec2 p0;
vec2 p1; vec2 p1;
uint path_ix;
vec2 stroke; vec2 stroke;
}; };
#define AnnoStrokeLineSeg_size 24 #define AnnoStrokeLineSeg_size 28
AnnoStrokeLineSegRef AnnoStrokeLineSeg_index(AnnoStrokeLineSegRef ref, uint index) { AnnoStrokeLineSegRef AnnoStrokeLineSeg_index(AnnoStrokeLineSegRef ref, uint index) {
return AnnoStrokeLineSegRef(ref.offset + index * AnnoStrokeLineSeg_size); return AnnoStrokeLineSegRef(ref.offset + index * AnnoStrokeLineSeg_size);
@ -120,9 +122,11 @@ AnnoFillLineSeg AnnoFillLineSeg_read(AnnoFillLineSegRef ref) {
uint raw1 = annotated[ix + 1]; uint raw1 = annotated[ix + 1];
uint raw2 = annotated[ix + 2]; uint raw2 = annotated[ix + 2];
uint raw3 = annotated[ix + 3]; uint raw3 = annotated[ix + 3];
uint raw4 = annotated[ix + 4];
AnnoFillLineSeg s; AnnoFillLineSeg s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.path_ix = raw4;
return s; return s;
} }
@ -132,6 +136,7 @@ void AnnoFillLineSeg_write(AnnoFillLineSegRef ref, AnnoFillLineSeg s) {
annotated[ix + 1] = floatBitsToUint(s.p0.y); annotated[ix + 1] = floatBitsToUint(s.p0.y);
annotated[ix + 2] = floatBitsToUint(s.p1.x); annotated[ix + 2] = floatBitsToUint(s.p1.x);
annotated[ix + 3] = floatBitsToUint(s.p1.y); annotated[ix + 3] = floatBitsToUint(s.p1.y);
annotated[ix + 4] = s.path_ix;
} }
AnnoStrokeLineSeg AnnoStrokeLineSeg_read(AnnoStrokeLineSegRef ref) { AnnoStrokeLineSeg AnnoStrokeLineSeg_read(AnnoStrokeLineSegRef ref) {
@ -142,10 +147,12 @@ AnnoStrokeLineSeg AnnoStrokeLineSeg_read(AnnoStrokeLineSegRef ref) {
uint raw3 = annotated[ix + 3]; uint raw3 = annotated[ix + 3];
uint raw4 = annotated[ix + 4]; uint raw4 = annotated[ix + 4];
uint raw5 = annotated[ix + 5]; uint raw5 = annotated[ix + 5];
uint raw6 = annotated[ix + 6];
AnnoStrokeLineSeg s; AnnoStrokeLineSeg s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1)); s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3)); s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.stroke = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5)); s.path_ix = raw4;
s.stroke = vec2(uintBitsToFloat(raw5), uintBitsToFloat(raw6));
return s; return s;
} }
@ -155,8 +162,9 @@ void AnnoStrokeLineSeg_write(AnnoStrokeLineSegRef ref, AnnoStrokeLineSeg s) {
annotated[ix + 1] = floatBitsToUint(s.p0.y); annotated[ix + 1] = floatBitsToUint(s.p0.y);
annotated[ix + 2] = floatBitsToUint(s.p1.x); annotated[ix + 2] = floatBitsToUint(s.p1.x);
annotated[ix + 3] = floatBitsToUint(s.p1.y); annotated[ix + 3] = floatBitsToUint(s.p1.y);
annotated[ix + 4] = floatBitsToUint(s.stroke.x); annotated[ix + 4] = s.path_ix;
annotated[ix + 5] = floatBitsToUint(s.stroke.y); annotated[ix + 5] = floatBitsToUint(s.stroke.x);
annotated[ix + 6] = floatBitsToUint(s.stroke.y);
} }
AnnoQuadSeg AnnoQuadSeg_read(AnnoQuadSegRef ref) { AnnoQuadSeg AnnoQuadSeg_read(AnnoQuadSegRef ref) {

Binary file not shown.

View file

@ -14,6 +14,10 @@ build elements.spv: glsl elements.comp | scene.h state.h annotated.h
build binning.spv: glsl binning.comp | annotated.h state.h bins.h setup.h build binning.spv: glsl binning.comp | annotated.h state.h bins.h setup.h
build tile_alloc.spv: glsl tile_alloc.comp | annotated.h tile.h setup.h
build path_coarse.spv: glsl path_coarse.comp | annotated.h tile.h setup.h
build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h

Binary file not shown.

View file

@ -30,9 +30,15 @@ layout(set = 0, binding = 2) buffer AnnotatedBuf {
uint[] annotated; uint[] annotated;
}; };
// Path segments are stored here.
layout(set = 0, binding = 3) buffer PathSegBuf {
uint[] pathseg;
};
#include "scene.h" #include "scene.h"
#include "state.h" #include "state.h"
#include "annotated.h" #include "annotated.h"
#include "pathseg.h"
#define StateBuf_stride (8 + 2 * State_size) #define StateBuf_stride (8 + 2 * State_size)
@ -83,6 +89,8 @@ State combine_state(State a, State b) {
c.linewidth = (b.flags & FLAG_SET_LINEWIDTH) == 0 ? a.linewidth : b.linewidth; c.linewidth = (b.flags & FLAG_SET_LINEWIDTH) == 0 ? a.linewidth : b.linewidth;
c.flags = (a.flags & (FLAG_SET_LINEWIDTH | FLAG_SET_BBOX)) | b.flags; c.flags = (a.flags & (FLAG_SET_LINEWIDTH | FLAG_SET_BBOX)) | b.flags;
c.flags |= (a.flags & FLAG_RESET_BBOX) >> 1; c.flags |= (a.flags & FLAG_RESET_BBOX) >> 1;
c.path_count = a.path_count + b.path_count;
c.pathseg_count = a.pathseg_count + b.pathseg_count;
return c; return c;
} }
@ -96,6 +104,8 @@ State map_element(ElementRef ref, inout bool is_fill) {
c.translate = vec2(0.0, 0.0); c.translate = vec2(0.0, 0.0);
c.linewidth = 1.0; // TODO should be 0.0 c.linewidth = 1.0; // TODO should be 0.0
c.flags = 0; c.flags = 0;
c.path_count = 0;
c.pathseg_count = 0;
is_fill = false; is_fill = false;
switch (tag) { switch (tag) {
case Element_FillLine: case Element_FillLine:
@ -103,22 +113,26 @@ State map_element(ElementRef ref, inout bool is_fill) {
LineSeg line = Element_FillLine_read(ref); LineSeg line = Element_FillLine_read(ref);
c.bbox.xy = min(line.p0, line.p1); c.bbox.xy = min(line.p0, line.p1);
c.bbox.zw = max(line.p0, line.p1); c.bbox.zw = max(line.p0, line.p1);
c.pathseg_count = 1;
break; break;
case Element_Quad: case Element_Quad:
QuadSeg quad = Element_Quad_read(ref); QuadSeg quad = Element_Quad_read(ref);
c.bbox.xy = min(min(quad.p0, quad.p1), quad.p2); c.bbox.xy = min(min(quad.p0, quad.p1), quad.p2);
c.bbox.zw = max(max(quad.p0, quad.p1), quad.p2); c.bbox.zw = max(max(quad.p0, quad.p1), quad.p2);
c.pathseg_count = 1;
break; break;
case Element_Cubic: case Element_Cubic:
CubicSeg cubic = Element_Cubic_read(ref); CubicSeg cubic = Element_Cubic_read(ref);
c.bbox.xy = min(min(cubic.p0, cubic.p1), min(cubic.p2, cubic.p3)); c.bbox.xy = min(min(cubic.p0, cubic.p1), min(cubic.p2, cubic.p3));
c.bbox.zw = max(max(cubic.p0, cubic.p1), max(cubic.p2, cubic.p3)); c.bbox.zw = max(max(cubic.p0, cubic.p1), max(cubic.p2, cubic.p3));
c.pathseg_count = 1;
break; break;
case Element_Fill: case Element_Fill:
is_fill = true; is_fill = true;
// fall-through // fall-through
case Element_Stroke: case Element_Stroke:
c.flags = FLAG_RESET_BBOX; c.flags = FLAG_RESET_BBOX;
c.path_count = 1;
break; break;
case Element_SetLineWidth: case Element_SetLineWidth:
SetLineWidth lw = Element_SetLineWidth_read(ref); SetLineWidth lw = Element_SetLineWidth_read(ref);
@ -148,6 +162,8 @@ shared vec2 sh_translate[WG_SIZE];
shared vec4 sh_bbox[WG_SIZE]; shared vec4 sh_bbox[WG_SIZE];
shared float sh_width[WG_SIZE]; shared float sh_width[WG_SIZE];
shared uint sh_flags[WG_SIZE]; shared uint sh_flags[WG_SIZE];
shared uint sh_path_count[WG_SIZE];
shared uint sh_pathseg_count[WG_SIZE];
shared uint sh_min_fill; shared uint sh_min_fill;
@ -187,6 +203,8 @@ void main() {
sh_bbox[gl_LocalInvocationID.x] = agg.bbox; sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
sh_width[gl_LocalInvocationID.x] = agg.linewidth; sh_width[gl_LocalInvocationID.x] = agg.linewidth;
sh_flags[gl_LocalInvocationID.x] = agg.flags; sh_flags[gl_LocalInvocationID.x] = agg.flags;
sh_path_count[gl_LocalInvocationID.x] = agg.path_count;
sh_pathseg_count[gl_LocalInvocationID.x] = agg.pathseg_count;
for (uint i = 0; i < LG_WG_SIZE; i++) { for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier(); barrier();
if (gl_LocalInvocationID.x >= (1 << i)) { if (gl_LocalInvocationID.x >= (1 << i)) {
@ -197,6 +215,8 @@ void main() {
other.bbox = sh_bbox[ix]; other.bbox = sh_bbox[ix];
other.linewidth = sh_width[ix]; other.linewidth = sh_width[ix];
other.flags = sh_flags[ix]; other.flags = sh_flags[ix];
other.path_count = sh_path_count[ix];
other.pathseg_count = sh_pathseg_count[ix];
agg = combine_state(other, agg); agg = combine_state(other, agg);
} }
barrier(); barrier();
@ -205,6 +225,8 @@ void main() {
sh_bbox[gl_LocalInvocationID.x] = agg.bbox; sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
sh_width[gl_LocalInvocationID.x] = agg.linewidth; sh_width[gl_LocalInvocationID.x] = agg.linewidth;
sh_flags[gl_LocalInvocationID.x] = agg.flags; sh_flags[gl_LocalInvocationID.x] = agg.flags;
sh_path_count[gl_LocalInvocationID.x] = agg.path_count;
sh_pathseg_count[gl_LocalInvocationID.x] = agg.pathseg_count;
} }
State exclusive; State exclusive;
@ -213,6 +235,8 @@ void main() {
exclusive.translate = vec2(0.0, 0.0); exclusive.translate = vec2(0.0, 0.0);
exclusive.linewidth = 1.0; //TODO should be 0.0 exclusive.linewidth = 1.0; //TODO should be 0.0
exclusive.flags = 0; exclusive.flags = 0;
exclusive.path_count = 0;
exclusive.pathseg_count = 0;
// Publish aggregate for this partition // Publish aggregate for this partition
if (gl_LocalInvocationID.x == WG_SIZE - 1) { if (gl_LocalInvocationID.x == WG_SIZE - 1) {
@ -266,6 +290,8 @@ void main() {
other.bbox = sh_bbox[ix]; other.bbox = sh_bbox[ix];
other.linewidth = sh_width[ix]; other.linewidth = sh_width[ix];
other.flags = sh_flags[ix]; other.flags = sh_flags[ix];
other.path_count = sh_path_count[ix];
other.pathseg_count = sh_pathseg_count[ix];
row = combine_state(row, other); row = combine_state(row, other);
} }
if (my_min_fill == ~0 && gl_LocalInvocationID.x == 0) { if (my_min_fill == ~0 && gl_LocalInvocationID.x == 0) {
@ -284,25 +310,26 @@ void main() {
// gains to be had from stashing in shared memory or possibly // gains to be had from stashing in shared memory or possibly
// registers (though register pressure is an issue). // registers (though register pressure is an issue).
ElementRef this_ref = Element_index(ref, i); ElementRef this_ref = Element_index(ref, i);
AnnotatedRef out_ref = AnnotatedRef((ix + i) * Annotated_size);
uint tag = Element_tag(this_ref); uint tag = Element_tag(this_ref);
switch (tag) { switch (tag) {
case Element_FillLine: case Element_FillLine:
case Element_StrokeLine: case Element_StrokeLine:
LineSeg line = Element_StrokeLine_read(this_ref); LineSeg line = Element_StrokeLine_read(this_ref);
AnnoStrokeLineSeg anno_line; PathStrokeLine path_line;
anno_line.p0 = st.mat.xy * line.p0.x + st.mat.zw * line.p0.y + st.translate; path_line.p0 = st.mat.xy * line.p0.x + st.mat.zw * line.p0.y + st.translate;
anno_line.p1 = st.mat.xy * line.p1.x + st.mat.zw * line.p1.y + st.translate; path_line.p1 = st.mat.xy * line.p1.x + st.mat.zw * line.p1.y + st.translate;
path_line.path_ix = st.path_count;
if (tag == Element_StrokeLine) { if (tag == Element_StrokeLine) {
anno_line.stroke = get_linewidth(st); path_line.stroke = get_linewidth(st);
} else { } else {
anno_line.stroke = vec2(0.0); path_line.stroke = vec2(0.0);
} }
// We do encoding a bit by hand to minimize divergence. Another approach // We do encoding a bit by hand to minimize divergence. Another approach
// would be to have a fill/stroke bool. // would be to have a fill/stroke bool.
uint out_tag = tag == Element_FillLine ? Annotated_FillLine : Annotated_StrokeLine; PathSegRef path_out_ref = PathSegRef((st.pathseg_count - 1) * PathSeg_size);
annotated[out_ref.offset >> 2] = out_tag; uint out_tag = tag == Element_FillLine ? PathSeg_FillLine : PathSeg_StrokeLine;
AnnoStrokeLineSeg_write(AnnoStrokeLineSegRef(out_ref.offset + 4), anno_line); pathseg[path_out_ref.offset >> 2] = out_tag;
PathStrokeLine_write(PathStrokeLineRef(path_out_ref.offset + 4), path_line);
break; break;
case Element_Stroke: case Element_Stroke:
Stroke stroke = Element_Stroke_read(this_ref); Stroke stroke = Element_Stroke_read(this_ref);
@ -311,6 +338,7 @@ void main() {
vec2 lw = get_linewidth(st); vec2 lw = get_linewidth(st);
anno_stroke.bbox = st.bbox + vec4(-lw, lw); anno_stroke.bbox = st.bbox + vec4(-lw, lw);
anno_stroke.linewidth = st.linewidth * sqrt(st.mat.x * st.mat.w - st.mat.y * st.mat.z); anno_stroke.linewidth = st.linewidth * sqrt(st.mat.x * st.mat.w - st.mat.y * st.mat.z);
AnnotatedRef out_ref = AnnotatedRef((st.path_count - 1) * Annotated_size);
Annotated_Stroke_write(out_ref, anno_stroke); Annotated_Stroke_write(out_ref, anno_stroke);
break; break;
case Element_Fill: case Element_Fill:
@ -318,11 +346,9 @@ void main() {
AnnoFill anno_fill; AnnoFill anno_fill;
anno_fill.rgba_color = fill.rgba_color; anno_fill.rgba_color = fill.rgba_color;
anno_fill.bbox = st.bbox; anno_fill.bbox = st.bbox;
out_ref = AnnotatedRef((st.path_count - 1) * Annotated_size);
Annotated_Fill_write(out_ref, anno_fill); Annotated_Fill_write(out_ref, anno_fill);
break; break;
default:
Annotated_Nop_write(out_ref);
break;
} }
} }
} }

Binary file not shown.

View file

@ -0,0 +1,107 @@
// Coarse rasterization of path segments.
// Allocation and initialization of tiles for paths.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "setup.h"
#define TILE_ALLOC_WG 32
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
layout(set = 0, binding = 0) buffer PathSegBuf {
uint[] pathseg;
};
layout(set = 0, binding = 1) buffer AllocBuf {
uint n_paths;
uint n_pathseg;
uint alloc;
};
layout(set = 0, binding = 2) buffer TileBuf {
uint[] tile;
};
#include "pathseg.h"
#include "tile.h"
// scale factors useful for converting coordinates to tiles
#define SX (1.0 / float(TILE_WIDTH_PX))
#define SY (1.0 / float(TILE_HEIGHT_PX))
void main() {
uint element_ix = gl_GlobalInvocationID.x;
PathSegRef ref = PathSegRef(element_ix * PathSeg_size);
uint tag = PathSeg_Nop;
if (element_ix < n_pathseg) {
tag = PathSeg_tag(ref);
}
// Setup for coverage algorithm.
float a, b, c;
// Bounding box of element in pixel coordinates.
float xmin, xmax, ymin, ymax;
PathStrokeLine line;
switch (tag) {
case PathSeg_FillLine:
case PathSeg_StrokeLine:
line = PathSeg_StrokeLine_read(ref);
xmin = min(line.p0.x, line.p1.x) - line.stroke.x;
xmax = max(line.p0.x, line.p1.x) + line.stroke.x;
ymin = min(line.p0.y, line.p1.y) - line.stroke.y;
ymax = max(line.p0.y, line.p1.y) + line.stroke.y;
float dx = line.p1.x - line.p0.x;
float dy = line.p1.y - line.p0.y;
// Set up for per-scanline coverage formula, below.
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
c = (line.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + line.stroke.y)) * SX;
b = invslope; // Note: assumes square tiles, otherwise scale.
a = (line.p0.x - (line.p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
break;
}
int x0 = int(floor((xmin) * SX));
int x1 = int(ceil((xmax) * SX));
int y0 = int(floor((ymin) * SY));
int y1 = int(ceil((ymax) * SY));
uint path_ix = line.path_ix;
Path path = Path_read(PathRef(path_ix * Path_size));
ivec4 bbox = ivec4(path.bbox);
x0 = clamp(x0, bbox.x, bbox.z);
y0 = clamp(y0, bbox.y, bbox.w);
x1 = clamp(x1, bbox.x, bbox.z);
y1 = clamp(y1, bbox.y, bbox.w);
float t = a + b * float(y0);
int stride = bbox.z - bbox.x;
int base = (y0 - bbox.y) * stride - bbox.x;
// TODO: can be tighter, use c to bound width
uint n_tile_alloc = uint(stride * (bbox.w - bbox.y));
// Consider using subgroups to aggregate atomic add.
uint tile_offset = atomicAdd(alloc, n_tile_alloc * TileSeg_size);
TileSeg tile_seg;
tile_seg.start = line.p0;
tile_seg.end = line.p1;
for (int y = y0; y < y1; y++) {
int xx0 = clamp(int(floor(t - c)), x0, x1);
int xx1 = clamp(int(ceil(t + c)), x0, x1);
for (int x = xx0; x < xx1; x++) {
TileRef tile_ref = Tile_index(path.tiles, uint(base + x));
uint tile_el = tile_ref.offset >> 2;
uint old;
uint actual;
do {
old = tile[tile_el];
actual = atomicCompSwap(tile[tile_el], old, tile_offset);
} while (actual != old);
tile_seg.next.offset = old;
TileSeg_write(TileSegRef(tile_offset), tile_seg);
tile_offset += TileSeg_size;
}
// TODO for fills: backdrop
t += b;
base += stride;
}
}

Binary file not shown.

125
piet-gpu/shader/pathseg.h Normal file
View file

@ -0,0 +1,125 @@
// Code auto-generated by piet-gpu-derive
struct PathFillLineRef {
uint offset;
};
struct PathStrokeLineRef {
uint offset;
};
struct PathSegRef {
uint offset;
};
struct PathFillLine {
vec2 p0;
vec2 p1;
uint path_ix;
};
#define PathFillLine_size 20
PathFillLineRef PathFillLine_index(PathFillLineRef ref, uint index) {
return PathFillLineRef(ref.offset + index * PathFillLine_size);
}
struct PathStrokeLine {
vec2 p0;
vec2 p1;
uint path_ix;
vec2 stroke;
};
#define PathStrokeLine_size 28
PathStrokeLineRef PathStrokeLine_index(PathStrokeLineRef ref, uint index) {
return PathStrokeLineRef(ref.offset + index * PathStrokeLine_size);
}
#define PathSeg_Nop 0
#define PathSeg_FillLine 1
#define PathSeg_StrokeLine 2
#define PathSeg_size 32
PathSegRef PathSeg_index(PathSegRef ref, uint index) {
return PathSegRef(ref.offset + index * PathSeg_size);
}
PathFillLine PathFillLine_read(PathFillLineRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = pathseg[ix + 0];
uint raw1 = pathseg[ix + 1];
uint raw2 = pathseg[ix + 2];
uint raw3 = pathseg[ix + 3];
uint raw4 = pathseg[ix + 4];
PathFillLine s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.path_ix = raw4;
return s;
}
void PathFillLine_write(PathFillLineRef ref, PathFillLine s) {
uint ix = ref.offset >> 2;
pathseg[ix + 0] = floatBitsToUint(s.p0.x);
pathseg[ix + 1] = floatBitsToUint(s.p0.y);
pathseg[ix + 2] = floatBitsToUint(s.p1.x);
pathseg[ix + 3] = floatBitsToUint(s.p1.y);
pathseg[ix + 4] = s.path_ix;
}
PathStrokeLine PathStrokeLine_read(PathStrokeLineRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = pathseg[ix + 0];
uint raw1 = pathseg[ix + 1];
uint raw2 = pathseg[ix + 2];
uint raw3 = pathseg[ix + 3];
uint raw4 = pathseg[ix + 4];
uint raw5 = pathseg[ix + 5];
uint raw6 = pathseg[ix + 6];
PathStrokeLine s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.path_ix = raw4;
s.stroke = vec2(uintBitsToFloat(raw5), uintBitsToFloat(raw6));
return s;
}
void PathStrokeLine_write(PathStrokeLineRef ref, PathStrokeLine s) {
uint ix = ref.offset >> 2;
pathseg[ix + 0] = floatBitsToUint(s.p0.x);
pathseg[ix + 1] = floatBitsToUint(s.p0.y);
pathseg[ix + 2] = floatBitsToUint(s.p1.x);
pathseg[ix + 3] = floatBitsToUint(s.p1.y);
pathseg[ix + 4] = s.path_ix;
pathseg[ix + 5] = floatBitsToUint(s.stroke.x);
pathseg[ix + 6] = floatBitsToUint(s.stroke.y);
}
uint PathSeg_tag(PathSegRef ref) {
return pathseg[ref.offset >> 2];
}
PathFillLine PathSeg_FillLine_read(PathSegRef ref) {
return PathFillLine_read(PathFillLineRef(ref.offset + 4));
}
PathStrokeLine PathSeg_StrokeLine_read(PathSegRef ref) {
return PathStrokeLine_read(PathStrokeLineRef(ref.offset + 4));
}
void PathSeg_Nop_write(PathSegRef ref) {
pathseg[ref.offset >> 2] = PathSeg_Nop;
}
void PathSeg_FillLine_write(PathSegRef ref, PathFillLine s) {
pathseg[ref.offset >> 2] = PathSeg_FillLine;
PathFillLine_write(PathFillLineRef(ref.offset + 4), s);
}
void PathSeg_StrokeLine_write(PathSegRef ref, PathStrokeLine s) {
pathseg[ref.offset >> 2] = PathSeg_StrokeLine;
PathStrokeLine_write(PathStrokeLineRef(ref.offset + 4), s);
}

View file

@ -31,6 +31,7 @@
// TODO: compute all these // TODO: compute all these
#define WIDTH_IN_TILES 128 #define WIDTH_IN_TILES 128
#define HEIGHT_IN_TILES 96
#define TILEGROUP_WIDTH_TILES 32 #define TILEGROUP_WIDTH_TILES 32
#define TILE_WIDTH_PX 16 #define TILE_WIDTH_PX 16
#define TILE_HEIGHT_PX 16 #define TILE_HEIGHT_PX 16

View file

@ -10,9 +10,11 @@ struct State {
vec4 bbox; vec4 bbox;
float linewidth; float linewidth;
uint flags; uint flags;
uint path_count;
uint pathseg_count;
}; };
#define State_size 48 #define State_size 56
StateRef State_index(StateRef ref, uint index) { StateRef State_index(StateRef ref, uint index) {
return StateRef(ref.offset + index * State_size); return StateRef(ref.offset + index * State_size);
@ -32,12 +34,16 @@ State State_read(StateRef ref) {
uint raw9 = state[ix + 9]; uint raw9 = state[ix + 9];
uint raw10 = state[ix + 10]; uint raw10 = state[ix + 10];
uint raw11 = state[ix + 11]; uint raw11 = state[ix + 11];
uint raw12 = state[ix + 12];
uint raw13 = state[ix + 13];
State s; State s;
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3)); s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5)); s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
s.bbox = vec4(uintBitsToFloat(raw6), uintBitsToFloat(raw7), uintBitsToFloat(raw8), uintBitsToFloat(raw9)); s.bbox = vec4(uintBitsToFloat(raw6), uintBitsToFloat(raw7), uintBitsToFloat(raw8), uintBitsToFloat(raw9));
s.linewidth = uintBitsToFloat(raw10); s.linewidth = uintBitsToFloat(raw10);
s.flags = raw11; s.flags = raw11;
s.path_count = raw12;
s.pathseg_count = raw13;
return s; return s;
} }
@ -55,5 +61,7 @@ void State_write(StateRef ref, State s) {
state[ix + 9] = floatBitsToUint(s.bbox.w); state[ix + 9] = floatBitsToUint(s.bbox.w);
state[ix + 10] = floatBitsToUint(s.linewidth); state[ix + 10] = floatBitsToUint(s.linewidth);
state[ix + 11] = s.flags; state[ix + 11] = s.flags;
state[ix + 12] = s.path_count;
state[ix + 13] = s.pathseg_count;
} }

105
piet-gpu/shader/tile.h Normal file
View file

@ -0,0 +1,105 @@
// Code auto-generated by piet-gpu-derive
struct PathRef {
uint offset;
};
struct TileRef {
uint offset;
};
struct TileSegRef {
uint offset;
};
struct Path {
uvec4 bbox;
TileRef tiles;
};
#define Path_size 12
PathRef Path_index(PathRef ref, uint index) {
return PathRef(ref.offset + index * Path_size);
}
struct Tile {
TileSegRef tile;
int backdrop;
};
#define Tile_size 8
TileRef Tile_index(TileRef ref, uint index) {
return TileRef(ref.offset + index * Tile_size);
}
struct TileSeg {
vec2 start;
vec2 end;
TileSegRef next;
};
#define TileSeg_size 20
TileSegRef TileSeg_index(TileSegRef ref, uint index) {
return TileSegRef(ref.offset + index * TileSeg_size);
}
Path Path_read(PathRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = tile[ix + 0];
uint raw1 = tile[ix + 1];
uint raw2 = tile[ix + 2];
Path s;
s.bbox = uvec4(raw0 & 0xffff, raw0 >> 16, raw1 & 0xffff, raw1 >> 16);
s.tiles = TileRef(raw2);
return s;
}
void Path_write(PathRef ref, Path s) {
uint ix = ref.offset >> 2;
tile[ix + 0] = s.bbox.x | (s.bbox.y << 16);
tile[ix + 1] = s.bbox.z | (s.bbox.w << 16);
tile[ix + 2] = s.tiles.offset;
}
Tile Tile_read(TileRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = tile[ix + 0];
uint raw1 = tile[ix + 1];
Tile s;
s.tile = TileSegRef(raw0);
s.backdrop = int(raw1);
return s;
}
void Tile_write(TileRef ref, Tile s) {
uint ix = ref.offset >> 2;
tile[ix + 0] = s.tile.offset;
tile[ix + 1] = uint(s.backdrop);
}
TileSeg TileSeg_read(TileSegRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = tile[ix + 0];
uint raw1 = tile[ix + 1];
uint raw2 = tile[ix + 2];
uint raw3 = tile[ix + 3];
uint raw4 = tile[ix + 4];
TileSeg s;
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.next = TileSegRef(raw4);
return s;
}
void TileSeg_write(TileSegRef ref, TileSeg s) {
uint ix = ref.offset >> 2;
tile[ix + 0] = floatBitsToUint(s.start.x);
tile[ix + 1] = floatBitsToUint(s.start.y);
tile[ix + 2] = floatBitsToUint(s.end.x);
tile[ix + 3] = floatBitsToUint(s.end.y);
tile[ix + 4] = s.next.offset;
}

View file

@ -0,0 +1,73 @@
// Allocation and initialization of tiles for paths.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "setup.h"
#define TILE_ALLOC_WG 32
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
layout(set = 0, binding = 0) buffer AnnotatedBuf {
uint[] annotated;
};
layout(set = 0, binding = 1) buffer AllocBuf {
uint n_elements;
uint n_pathseg;
uint alloc;
};
layout(set = 0, binding = 2) buffer TileBuf {
uint[] tile;
};
#include "annotated.h"
#include "tile.h"
// scale factors useful for converting coordinates to tiles
#define SX (1.0 / float(TILE_WIDTH_PX))
#define SY (1.0 / float(TILE_HEIGHT_PX))
void main() {
uint element_ix = gl_GlobalInvocationID.x;
PathRef path_ref = PathRef(element_ix * Path_size);
AnnotatedRef ref = AnnotatedRef(element_ix * Annotated_size);
uint tag = Annotated_Nop;
if (element_ix < n_elements) {
tag = Annotated_tag(ref);
}
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
switch (tag) {
case Annotated_Fill:
case Annotated_Stroke:
// Note: we take advantage of the fact that fills and strokes
// have compatible layout.
AnnoFill fill = Annotated_Fill_read(ref);
x0 = int(floor(fill.bbox.x * SX));
y0 = int(floor(fill.bbox.y * SY));
x1 = int(ceil(fill.bbox.z * SX));
y1 = int(ceil(fill.bbox.w * SY));
break;
}
x0 = clamp(x0, 0, WIDTH_IN_TILES);
y0 = clamp(y0, 0, HEIGHT_IN_TILES);
x1 = clamp(x1, 0, WIDTH_IN_TILES);
y1 = clamp(y1, 0, HEIGHT_IN_TILES);
Path path;
path.bbox = uvec4(x0, y0, x1, y1);
uint n_tiles = (x1 - x0) * (y1 - y0);
path.tiles = TileRef(0);
if (n_tiles > 0) {
path.tiles.offset = atomicAdd(alloc, n_tiles * Tile_size);
Tile init_tile = Tile(TileSegRef(0), 0);
// TODO: improve load balancing
for (uint i = 0; i < n_tiles; i++) {
Tile_write(Tile_index(path.tiles, i), init_tile);
}
}
Path_write(path_ref, path);
}

Binary file not shown.

View file

@ -129,12 +129,23 @@ pub struct Renderer<D: Device> {
pub state_buf: D::Buffer, pub state_buf: D::Buffer,
pub anno_buf: D::Buffer, pub anno_buf: D::Buffer,
pub pathseg_buf: D::Buffer,
pub tile_buf: D::Buffer,
pub bin_buf: D::Buffer, pub bin_buf: D::Buffer,
pub ptcl_buf: D::Buffer, pub ptcl_buf: D::Buffer,
el_pipeline: D::Pipeline, el_pipeline: D::Pipeline,
el_ds: D::DescriptorSet, el_ds: D::DescriptorSet,
tile_pipeline: D::Pipeline,
tile_ds: D::DescriptorSet,
path_pipeline: D::Pipeline,
path_ds: D::DescriptorSet,
tile_alloc_buf_host: D::Buffer,
tile_alloc_buf_dev: D::Buffer,
bin_pipeline: D::Pipeline, bin_pipeline: D::Pipeline,
bin_ds: D::DescriptorSet, bin_ds: D::DescriptorSet,
@ -151,10 +162,12 @@ pub struct Renderer<D: Device> {
k4_ds: D::DescriptorSet, k4_ds: D::DescriptorSet,
n_elements: usize, n_elements: usize,
n_paths: usize,
n_pathseg: usize,
} }
impl<D: Device> Renderer<D> { impl<D: Device> Renderer<D> {
pub unsafe fn new(device: &D, scene: &[u8]) -> Result<Self, Error> { pub unsafe fn new(device: &D, scene: &[u8], n_paths: usize, n_pathseg: usize) -> Result<Self, Error> {
let host = MemFlags::host_coherent(); let host = MemFlags::host_coherent();
let dev = MemFlags::device_local(); let dev = MemFlags::device_local();
@ -170,16 +183,44 @@ impl<D: Device> Renderer<D> {
device.write_buffer(&scene_buf, &scene)?; device.write_buffer(&scene_buf, &scene)?;
let state_buf = device.create_buffer(1 * 1024 * 1024, dev)?; let state_buf = device.create_buffer(1 * 1024 * 1024, dev)?;
let anno_buf = device.create_buffer(64 * 1024 * 1024, dev)?; let anno_buf = device.create_buffer(64 * 1024 * 1024, host)?;
let pathseg_buf = device.create_buffer(64 * 1024 * 1024, host)?;
let tile_buf = device.create_buffer(64 * 1024 * 1024, host)?;
let bin_buf = device.create_buffer(64 * 1024 * 1024, dev)?; let bin_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev)?; let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev)?;
let image_dev = device.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?; let image_dev = device.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?;
let el_code = include_bytes!("../shader/elements.spv"); let el_code = include_bytes!("../shader/elements.spv");
let el_pipeline = device.create_simple_compute_pipeline(el_code, 3, 0)?; let el_pipeline = device.create_simple_compute_pipeline(el_code, 4, 0)?;
let el_ds = device.create_descriptor_set( let el_ds = device.create_descriptor_set(
&el_pipeline, &el_pipeline,
&[&scene_dev, &state_buf, &anno_buf], &[&scene_dev, &state_buf, &anno_buf, &pathseg_buf],
&[],
)?;
let tile_alloc_buf_host = device.create_buffer(12, host)?;
let tile_alloc_buf_dev = device.create_buffer(12, dev)?;
// TODO: constants
const PATH_SIZE: usize = 12;
let tile_alloc_start = ((n_paths + 31) & !31) * PATH_SIZE;
device.write_buffer(
&tile_alloc_buf_host,
&[n_paths as u32, n_pathseg as u32, tile_alloc_start as u32],
)?;
let tile_alloc_code = include_bytes!("../shader/tile_alloc.spv");
let tile_pipeline = device.create_simple_compute_pipeline(tile_alloc_code, 3, 0)?;
let tile_ds = device.create_descriptor_set(
&tile_pipeline,
&[&anno_buf, &tile_alloc_buf_dev, &tile_buf],
&[],
)?;
let path_alloc_code = include_bytes!("../shader/path_coarse.spv");
let path_pipeline = device.create_simple_compute_pipeline(path_alloc_code, 3, 0)?;
let path_ds = device.create_descriptor_set(
&path_pipeline,
&[&pathseg_buf, &tile_alloc_buf_dev, &tile_buf],
&[], &[],
)?; )?;
@ -226,6 +267,10 @@ impl<D: Device> Renderer<D> {
image_dev, image_dev,
el_pipeline, el_pipeline,
el_ds, el_ds,
tile_pipeline,
tile_ds,
path_pipeline,
path_ds,
bin_pipeline, bin_pipeline,
bin_ds, bin_ds,
coarse_pipeline, coarse_pipeline,
@ -234,18 +279,25 @@ impl<D: Device> Renderer<D> {
k4_ds, k4_ds,
state_buf, state_buf,
anno_buf, anno_buf,
pathseg_buf,
tile_buf,
bin_buf, bin_buf,
ptcl_buf, ptcl_buf,
tile_alloc_buf_host,
tile_alloc_buf_dev,
bin_alloc_buf_host, bin_alloc_buf_host,
bin_alloc_buf_dev, bin_alloc_buf_dev,
coarse_alloc_buf_host, coarse_alloc_buf_host,
coarse_alloc_buf_dev, coarse_alloc_buf_dev,
n_elements, n_elements,
n_paths,
n_pathseg,
}) })
} }
pub unsafe fn record(&self, cmd_buf: &mut impl CmdBuf<D>, query_pool: &D::QueryPool) { pub unsafe fn record(&self, cmd_buf: &mut impl CmdBuf<D>, query_pool: &D::QueryPool) {
cmd_buf.copy_buffer(&self.scene_buf, &self.scene_dev); cmd_buf.copy_buffer(&self.scene_buf, &self.scene_dev);
cmd_buf.copy_buffer(&self.tile_alloc_buf_host, &self.tile_alloc_buf_dev);
cmd_buf.copy_buffer(&self.bin_alloc_buf_host, &self.bin_alloc_buf_dev); cmd_buf.copy_buffer(&self.bin_alloc_buf_host, &self.bin_alloc_buf_dev);
cmd_buf.copy_buffer(&self.coarse_alloc_buf_host, &self.coarse_alloc_buf_dev); cmd_buf.copy_buffer(&self.coarse_alloc_buf_host, &self.coarse_alloc_buf_dev);
cmd_buf.clear_buffer(&self.state_buf); cmd_buf.clear_buffer(&self.state_buf);
@ -264,26 +316,44 @@ impl<D: Device> Renderer<D> {
); );
cmd_buf.write_timestamp(&query_pool, 1); cmd_buf.write_timestamp(&query_pool, 1);
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
cmd_buf.dispatch(
&self.tile_pipeline,
&self.tile_ds,
(((self.n_paths + 31) / 32) as u32, 1, 1),
);
cmd_buf.write_timestamp(&query_pool, 2);
cmd_buf.memory_barrier();
cmd_buf.dispatch(
&self.path_pipeline,
&self.path_ds,
(((self.n_pathseg + 31) / 32) as u32, 1, 1),
);
/*
cmd_buf.dispatch( cmd_buf.dispatch(
&self.bin_pipeline, &self.bin_pipeline,
&self.bin_ds, &self.bin_ds,
(((self.n_elements + 255) / 256) as u32, 1, 1), (((self.n_elements + 255) / 256) as u32, 1, 1),
); );
cmd_buf.write_timestamp(&query_pool, 2); */
cmd_buf.write_timestamp(&query_pool, 3);
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
/*
cmd_buf.dispatch( cmd_buf.dispatch(
&self.coarse_pipeline, &self.coarse_pipeline,
&self.coarse_ds, &self.coarse_ds,
(WIDTH as u32 / 256, HEIGHT as u32 / 256, 1), (WIDTH as u32 / 256, HEIGHT as u32 / 256, 1),
); );
cmd_buf.write_timestamp(&query_pool, 3); */
cmd_buf.write_timestamp(&query_pool, 4);
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
/*
cmd_buf.dispatch( cmd_buf.dispatch(
&self.k4_pipeline, &self.k4_pipeline,
&self.k4_ds, &self.k4_ds,
((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1), ((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
); );
cmd_buf.write_timestamp(&query_pool, 4); cmd_buf.write_timestamp(&query_pool, 5);
*/
cmd_buf.memory_barrier(); cmd_buf.memory_barrier();
cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc); cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc);
} }

View file

@ -35,6 +35,10 @@ pub struct PietGpuRenderContext {
// Will probably need direct accesss to hal Device to create images etc. // Will probably need direct accesss to hal Device to create images etc.
inner_text: PietGpuText, inner_text: PietGpuText,
stroke_width: f32, stroke_width: f32,
// We're tallying these cpu-side for expedience, but will probably
// move this to some kind of readback from element processing.
path_count: usize,
pathseg_count: usize,
} }
#[derive(Clone)] #[derive(Clone)]
@ -56,6 +60,8 @@ impl PietGpuRenderContext {
elements, elements,
inner_text, inner_text,
stroke_width, stroke_width,
path_count: 0,
pathseg_count: 0,
} }
} }
@ -63,6 +69,14 @@ impl PietGpuRenderContext {
self.elements.encode(&mut self.encoder); self.elements.encode(&mut self.encoder);
self.encoder.buf() self.encoder.buf()
} }
pub fn path_count(&self) -> usize {
self.path_count
}
pub fn pathseg_count(&self) -> usize {
self.pathseg_count
}
} }
impl RenderContext for PietGpuRenderContext { impl RenderContext for PietGpuRenderContext {
@ -99,6 +113,7 @@ impl RenderContext for PietGpuRenderContext {
PietGpuBrush::Solid(rgba_color) => { PietGpuBrush::Solid(rgba_color) => {
let stroke = Stroke { rgba_color }; let stroke = Stroke { rgba_color };
self.elements.push(Element::Stroke(stroke)); self.elements.push(Element::Stroke(stroke));
self.path_count += 1;
} }
_ => (), _ => (),
} }
@ -121,6 +136,7 @@ impl RenderContext for PietGpuRenderContext {
PietGpuBrush::Solid(rgba_color) => { PietGpuBrush::Solid(rgba_color) => {
let fill = Fill { rgba_color }; let fill = Fill { rgba_color };
self.elements.push(Element::Fill(fill)); self.elements.push(Element::Fill(fill));
self.path_count += 1;
} }
_ => (), _ => (),
} }
@ -204,6 +220,7 @@ impl PietGpuRenderContext {
} else { } else {
self.elements.push(Element::StrokeLine(seg)); self.elements.push(Element::StrokeLine(seg));
} }
self.pathseg_count += 1;
} }
fn encode_path(&mut self, path: impl Iterator<Item = PathEl>, is_fill: bool) { fn encode_path(&mut self, path: impl Iterator<Item = PathEl>, is_fill: bool) {