mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-09 20:31:29 +11:00
Continue building out pipeline
Plumbs the new tiling scheme to k4. This works (stroke only) but still has some performance issues.
This commit is contained in:
parent
294f6fd1db
commit
70a9c17e23
|
@ -13,8 +13,9 @@ piet_gpu! {
|
|||
end: [f32; 2],
|
||||
}
|
||||
struct CmdStroke {
|
||||
// Consider a specialization to one segment.
|
||||
seg_ref: Ref<SegChunk>,
|
||||
// This is really a Ref<Tile>, but we don't have cross-module
|
||||
// references.
|
||||
tile_ref: u32,
|
||||
half_width: f32,
|
||||
rgba_color: u32,
|
||||
}
|
||||
|
|
|
@ -171,7 +171,7 @@ fn main() -> Result<(), Error> {
|
|||
|
||||
let fence = device.create_fence(false)?;
|
||||
let mut cmd_buf = device.create_cmd_buf()?;
|
||||
let query_pool = device.create_query_pool(5)?;
|
||||
let query_pool = device.create_query_pool(7)?;
|
||||
|
||||
let mut ctx = PietGpuRenderContext::new();
|
||||
if let Some(input) = matches.value_of("INPUT") {
|
||||
|
@ -204,14 +204,16 @@ fn main() -> Result<(), Error> {
|
|||
println!("Element kernel time: {:.3}ms", ts[0] * 1e3);
|
||||
println!("Tile allocation kernel time: {:.3}ms", (ts[1] - ts[0]) * 1e3);
|
||||
println!("Coarse path kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3);
|
||||
/*
|
||||
println!("Render kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
|
||||
*/
|
||||
println!("Binning kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
|
||||
println!("Coarse raster kernel time: {:.3}ms", (ts[4] - ts[3]) * 1e3);
|
||||
println!("Render kernel time: {:.3}ms", (ts[5] - ts[4]) * 1e3);
|
||||
|
||||
/*
|
||||
let mut data: Vec<u32> = Default::default();
|
||||
device.read_buffer(&renderer.tile_buf, &mut data).unwrap();
|
||||
device.read_buffer(&renderer.ptcl_buf, &mut data).unwrap();
|
||||
piet_gpu::dump_k1_data(&data);
|
||||
//trace_ptcl(&data);
|
||||
*/
|
||||
|
||||
let mut img_data: Vec<u8> = Default::default();
|
||||
// Note: because png can use a `&[u8]` slice, we could avoid an extra copy
|
||||
|
|
|
@ -15,17 +15,22 @@ layout(set = 0, binding = 1) buffer BinsBuf {
|
|||
uint[] bins;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 2) buffer AllocBuf {
|
||||
layout(set = 0, binding = 2) buffer TileBuf {
|
||||
uint[] tile;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 3) buffer AllocBuf {
|
||||
uint n_elements;
|
||||
uint alloc;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 3) buffer PtclBuf {
|
||||
layout(set = 0, binding = 4) buffer PtclBuf {
|
||||
uint[] ptcl;
|
||||
};
|
||||
|
||||
#include "annotated.h"
|
||||
#include "bins.h"
|
||||
#include "tile.h"
|
||||
#include "ptcl.h"
|
||||
|
||||
#define LG_N_PART_READ 8
|
||||
|
@ -197,37 +202,11 @@ void main() {
|
|||
tag = Annotated_tag(ref);
|
||||
}
|
||||
|
||||
// Setup for coverage algorithm.
|
||||
float a, b, c;
|
||||
// Bounding box of element in pixel coordinates.
|
||||
float xmin, xmax, ymin, ymax;
|
||||
uint my_slice = th_ix / 32;
|
||||
uint my_mask = 1 << (th_ix & 31);
|
||||
switch (tag) {
|
||||
case Annotated_FillLine:
|
||||
case Annotated_StrokeLine:
|
||||
AnnoStrokeLineSeg line = Annotated_StrokeLine_read(ref);
|
||||
xmin = min(line.p0.x, line.p1.x) - line.stroke.x;
|
||||
xmax = max(line.p0.x, line.p1.x) + line.stroke.x;
|
||||
ymin = min(line.p0.y, line.p1.y) - line.stroke.y;
|
||||
ymax = max(line.p0.y, line.p1.y) + line.stroke.y;
|
||||
float dx = line.p1.x - line.p0.x;
|
||||
float dy = line.p1.y - line.p0.y;
|
||||
if (tag == Annotated_FillLine) {
|
||||
// Set bit for backdrop sign calculation, 1 is +1, 0 is -1.
|
||||
if (dy < 0) {
|
||||
atomicOr(sh_bd_sign[my_slice], my_mask);
|
||||
} else {
|
||||
atomicAnd(sh_bd_sign[my_slice], ~my_mask);
|
||||
}
|
||||
}
|
||||
atomicOr(sh_is_segment[my_slice], my_mask);
|
||||
// Set up for per-scanline coverage formula, below.
|
||||
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
|
||||
c = (line.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + line.stroke.y)) * SX;
|
||||
b = invslope; // Note: assumes square tiles, otherwise scale.
|
||||
a = (line.p0.x - xy0.x - (line.p0.y - 0.5 * float(TILE_HEIGHT_PX) - xy0.y) * b) * SX;
|
||||
break;
|
||||
case Annotated_Fill:
|
||||
case Annotated_Stroke:
|
||||
// Note: we take advantage of the fact that fills and strokes
|
||||
|
@ -237,10 +216,6 @@ void main() {
|
|||
xmax = fill.bbox.z;
|
||||
ymin = fill.bbox.y;
|
||||
ymax = fill.bbox.w;
|
||||
// Just let the clamping to xmin and xmax determine the bounds.
|
||||
a = 0.0;
|
||||
b = 0.0;
|
||||
c = 1e9;
|
||||
break;
|
||||
default:
|
||||
ymin = 0;
|
||||
|
@ -254,37 +229,23 @@ void main() {
|
|||
// Compute bounding box in tiles and clip to this bin.
|
||||
int x0 = int(floor((xmin - xy0.x) * SX));
|
||||
int x1 = int(ceil((xmax - xy0.x) * SX));
|
||||
int xr = int(ceil((right_edge - xy0.x) * SX));
|
||||
int y0 = int(floor((ymin - xy0.y) * SY));
|
||||
int y1 = int(ceil((ymax - xy0.y) * SY));
|
||||
x0 = clamp(x0, 0, N_TILE_X);
|
||||
x1 = clamp(x1, x0, N_TILE_X);
|
||||
xr = clamp(xr, 0, N_TILE_X);
|
||||
y0 = clamp(y0, 0, N_TILE_Y);
|
||||
y1 = clamp(y1, y0, N_TILE_Y);
|
||||
float t = a + b * float(y0);
|
||||
for (uint y = y0; y < y1; y++) {
|
||||
uint xx0 = clamp(int(floor(t - c)), x0, x1);
|
||||
uint xx1 = clamp(int(ceil(t + c)), x0, x1);
|
||||
for (uint x = xx0; x < xx1; x++) {
|
||||
for (uint x = x0; x < x1; x++) {
|
||||
atomicOr(sh_bitmaps[my_slice][y * N_TILE_X + x], my_mask);
|
||||
}
|
||||
if (tag == Annotated_FillLine && ymin <= xy0.y + float(y * TILE_HEIGHT_PX)) {
|
||||
// Assign backdrop to all tiles to the right of the ray crossing the
|
||||
// top edge of this tile, up to the right edge of the fill bbox.
|
||||
float xray = t - 0.5 * b;
|
||||
xx0 = max(int(ceil(xray)), 0);
|
||||
for (uint x = xx0; x < xr; x++) {
|
||||
atomicOr(sh_backdrop[my_slice][y * N_TILE_X + x], my_mask);
|
||||
}
|
||||
}
|
||||
t += b;
|
||||
}
|
||||
barrier();
|
||||
|
||||
// We've computed coverage and other info for each element in the input, now for
|
||||
// the output stage. We'll do segments first using a more parallel algorithm.
|
||||
|
||||
/*
|
||||
uint seg_count = 0;
|
||||
for (uint i = 0; i < N_SLICE; i++) {
|
||||
seg_count += bitCount(sh_bitmaps[i][th_ix] & sh_is_segment[i]);
|
||||
|
@ -372,45 +333,29 @@ void main() {
|
|||
Segment seg = Segment(line.p0, line.p1, y_edge);
|
||||
Segment_write(SegmentRef(seg_alloc + Segment_size * ix), seg);
|
||||
}
|
||||
*/
|
||||
|
||||
// Output non-segment elements for this tile. The thread does a sequential walk
|
||||
// through the non-segment elements, and for segments, count and backdrop are
|
||||
// aggregated using bit counting.
|
||||
uint slice_ix = 0;
|
||||
uint bitmap = sh_bitmaps[0][th_ix];
|
||||
uint bd_bitmap = sh_backdrop[0][th_ix];
|
||||
uint bd_sign = sh_bd_sign[0];
|
||||
uint is_segment = sh_is_segment[0];
|
||||
uint seg_start = th_ix == 0 ? 0 : sh_seg_count[th_ix - 1];
|
||||
seg_count = 0;
|
||||
while (true) {
|
||||
uint nonseg_bitmap = bitmap & ~is_segment;
|
||||
if (nonseg_bitmap == 0) {
|
||||
backdrop += count_backdrop(bd_bitmap, bd_sign);
|
||||
seg_count += bitCount(bitmap & is_segment);
|
||||
if (bitmap == 0) {
|
||||
slice_ix++;
|
||||
if (slice_ix == N_SLICE) {
|
||||
break;
|
||||
}
|
||||
bitmap = sh_bitmaps[slice_ix][th_ix];
|
||||
bd_bitmap = sh_backdrop[slice_ix][th_ix];
|
||||
bd_sign = sh_bd_sign[slice_ix];
|
||||
is_segment = sh_is_segment[slice_ix];
|
||||
nonseg_bitmap = bitmap & ~is_segment;
|
||||
if (nonseg_bitmap == 0) {
|
||||
if (bitmap == 0) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
uint element_ref_ix = slice_ix * 32 + findLSB(nonseg_bitmap);
|
||||
uint element_ref_ix = slice_ix * 32 + findLSB(bitmap);
|
||||
uint element_ix = sh_elements[element_ref_ix];
|
||||
|
||||
// Bits up to and including the lsb
|
||||
uint bd_mask = (nonseg_bitmap - 1) ^ nonseg_bitmap;
|
||||
backdrop += count_backdrop(bd_bitmap & bd_mask, bd_sign);
|
||||
seg_count += bitCount(bitmap & bd_mask & is_segment);
|
||||
// Clear bits that have been consumed.
|
||||
bd_bitmap &= ~bd_mask;
|
||||
bitmap &= ~bd_mask;
|
||||
// Clear LSB
|
||||
bitmap &= bitmap - 1;
|
||||
|
||||
// At this point, we read the element again from global memory.
|
||||
// If that turns out to be expensive, maybe we can pack it into
|
||||
|
@ -419,6 +364,7 @@ void main() {
|
|||
tag = Annotated_tag(ref);
|
||||
|
||||
switch (tag) {
|
||||
/*
|
||||
case Annotated_Fill:
|
||||
if (last_chunk_n > 0 || seg_count > 0) {
|
||||
SegChunkRef chunk_ref = SegChunkRef(0);
|
||||
|
@ -460,63 +406,34 @@ void main() {
|
|||
seg_count = 0;
|
||||
backdrop = 0;
|
||||
break;
|
||||
*/
|
||||
case Annotated_Stroke:
|
||||
// TODO: reduce divergence & code duplication? Much of the
|
||||
// fill and stroke processing is in common.
|
||||
if (last_chunk_n > 0 || seg_count > 0) {
|
||||
SegChunkRef chunk_ref = SegChunkRef(0);
|
||||
if (seg_count > 0) {
|
||||
chunk_ref = alloc_seg_chunk();
|
||||
SegChunk chunk;
|
||||
chunk.n = seg_count;
|
||||
chunk.next = SegChunkRef(0);
|
||||
uint seg_offset = seg_alloc + seg_start * Segment_size;
|
||||
chunk.segs = SegmentRef(seg_offset);
|
||||
SegChunk_write(chunk_ref, chunk);
|
||||
// Because the only elements we're processing right now are
|
||||
// paths, we can just use the element index as the path index.
|
||||
// In future, when we're doing a bunch of stuff, the path index
|
||||
// should probably be stored in the annotated element.
|
||||
uint path_ix = element_ix;
|
||||
Path path = Path_read(PathRef(path_ix * Path_size));
|
||||
// It may be we have a strong guarantee this will always be `true`, but
|
||||
// I prefer not to take chances.
|
||||
if (tile_x >= path.bbox.x && tile_x < path.bbox.z && tile_y >= path.bbox.y && tile_y < path.bbox.w) {
|
||||
uint stride = path.bbox.z - path.bbox.x;
|
||||
uint tile_subix = (tile_y - path.bbox.y) * stride + tile_x - path.bbox.x;
|
||||
Tile tile = Tile_read(Tile_index(path.tiles, tile_subix));
|
||||
if (tile.tile.offset != 0) {
|
||||
AnnoStroke stroke = Annotated_Stroke_read(ref);
|
||||
CmdStroke cmd_stroke;
|
||||
cmd_stroke.tile_ref = tile.tile.offset;
|
||||
cmd_stroke.half_width = 0.5 * stroke.linewidth;
|
||||
cmd_stroke.rgba_color = stroke.rgba_color;
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Stroke_write(cmd_ref, cmd_stroke);
|
||||
cmd_ref.offset += Cmd_size;
|
||||
}
|
||||
if (last_chunk_n > 0) {
|
||||
SegChunk chunk;
|
||||
chunk.n = last_chunk_n;
|
||||
chunk.next = chunk_ref;
|
||||
chunk.segs = last_chunk_segs;
|
||||
SegChunk_write(last_chunk_ref, chunk);
|
||||
} else {
|
||||
first_seg_chunk = chunk_ref;
|
||||
}
|
||||
|
||||
AnnoStroke stroke = Annotated_Stroke_read(ref);
|
||||
CmdStroke cmd_stroke;
|
||||
cmd_stroke.seg_ref = first_seg_chunk;
|
||||
cmd_stroke.half_width = 0.5 * stroke.linewidth;
|
||||
cmd_stroke.rgba_color = stroke.rgba_color;
|
||||
alloc_cmd(cmd_ref, cmd_limit);
|
||||
Cmd_Stroke_write(cmd_ref, cmd_stroke);
|
||||
cmd_ref.offset += Cmd_size;
|
||||
last_chunk_n = 0;
|
||||
}
|
||||
seg_start += seg_count;
|
||||
seg_count = 0;
|
||||
break;
|
||||
default:
|
||||
// This shouldn't happen, but just in case.
|
||||
seg_start++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (seg_count > 0) {
|
||||
SegChunkRef chunk_ref = alloc_seg_chunk();
|
||||
if (last_chunk_n > 0) {
|
||||
SegChunk_write(last_chunk_ref, SegChunk(last_chunk_n, chunk_ref, last_chunk_segs));
|
||||
} else {
|
||||
first_seg_chunk = chunk_ref;
|
||||
}
|
||||
// TODO: free two registers by writing count and segments ref now,
|
||||
// as opposed to deferring SegChunk write until all fields are known.
|
||||
last_chunk_ref = chunk_ref;
|
||||
last_chunk_n = seg_count;
|
||||
uint seg_offset = seg_alloc + seg_start * Segment_size;
|
||||
last_chunk_segs = SegmentRef(seg_offset);
|
||||
}
|
||||
barrier();
|
||||
|
||||
rd_ix += N_TILE;
|
||||
|
|
Binary file not shown.
|
@ -17,9 +17,14 @@ layout(set = 0, binding = 0) buffer PtclBuf {
|
|||
uint[] ptcl;
|
||||
};
|
||||
|
||||
layout(rgba8, set = 0, binding = 1) uniform writeonly image2D image;
|
||||
layout(set = 0, binding = 1) buffer TileBuf {
|
||||
uint[] tile;
|
||||
};
|
||||
|
||||
layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image;
|
||||
|
||||
#include "ptcl.h"
|
||||
#include "tile.h"
|
||||
|
||||
#include "setup.h"
|
||||
|
||||
|
@ -57,28 +62,25 @@ void main() {
|
|||
CmdStroke stroke = Cmd_Stroke_read(cmd_ref);
|
||||
float df[CHUNK];
|
||||
for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
|
||||
SegChunkRef seg_chunk_ref = stroke.seg_ref;
|
||||
TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
|
||||
do {
|
||||
SegChunk seg_chunk = SegChunk_read(seg_chunk_ref);
|
||||
SegmentRef segs = seg_chunk.segs;
|
||||
for (int i = 0; i < seg_chunk.n; i++) {
|
||||
Segment seg = Segment_read(Segment_index(segs, i));
|
||||
vec2 line_vec = seg.end - seg.start;
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
|
||||
dpos.y += float(k * CHUNK_DY);
|
||||
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
|
||||
df[k] = min(df[k], length(line_vec * t - dpos));
|
||||
}
|
||||
TileSeg seg = TileSeg_read(tile_seg_ref);
|
||||
vec2 line_vec = seg.end - seg.start;
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
vec2 dpos = xy + vec2(0.5, 0.5) - seg.start;
|
||||
dpos.y += float(k * CHUNK_DY);
|
||||
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
|
||||
df[k] = min(df[k], length(line_vec * t - dpos));
|
||||
}
|
||||
seg_chunk_ref = seg_chunk.next;
|
||||
} while (seg_chunk_ref.offset != 0);
|
||||
tile_seg_ref = seg.next;
|
||||
} while (tile_seg_ref.offset != 0);
|
||||
fg_rgba = unpackUnorm4x8(stroke.rgba_color).wzyx;
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
float alpha = clamp(stroke.half_width + 0.5 - df[k], 0.0, 1.0);
|
||||
rgb[k] = mix(rgb[k], fg_rgba.rgb, alpha * fg_rgba.a);
|
||||
}
|
||||
break;
|
||||
/*
|
||||
case Cmd_Fill:
|
||||
CmdFill fill = Cmd_Fill_read(cmd_ref);
|
||||
// Probably better to store as float, but conversion is no doubt cheap.
|
||||
|
@ -117,6 +119,7 @@ void main() {
|
|||
rgb[k] = mix(rgb[k], fg_rgba.rgb, alpha * fg_rgba.a);
|
||||
}
|
||||
break;
|
||||
*/
|
||||
case Cmd_Solid:
|
||||
CmdSolid solid = Cmd_Solid_read(cmd_ref);
|
||||
fg_rgba = unpackUnorm4x8(solid.rgba_color).wzyx;
|
||||
|
|
Binary file not shown.
|
@ -78,7 +78,7 @@ void main() {
|
|||
int stride = bbox.z - bbox.x;
|
||||
int base = (y0 - bbox.y) * stride - bbox.x;
|
||||
// TODO: can be tighter, use c to bound width
|
||||
uint n_tile_alloc = uint(stride * (bbox.w - bbox.y));
|
||||
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
|
||||
// Consider using subgroups to aggregate atomic add.
|
||||
uint tile_offset = atomicAdd(alloc, n_tile_alloc * TileSeg_size);
|
||||
TileSeg tile_seg;
|
||||
|
|
Binary file not shown.
|
@ -68,7 +68,7 @@ CmdLineRef CmdLine_index(CmdLineRef ref, uint index) {
|
|||
}
|
||||
|
||||
struct CmdStroke {
|
||||
SegChunkRef seg_ref;
|
||||
uint tile_ref;
|
||||
float half_width;
|
||||
uint rgba_color;
|
||||
};
|
||||
|
@ -220,7 +220,7 @@ CmdStroke CmdStroke_read(CmdStrokeRef ref) {
|
|||
uint raw1 = ptcl[ix + 1];
|
||||
uint raw2 = ptcl[ix + 2];
|
||||
CmdStroke s;
|
||||
s.seg_ref = SegChunkRef(raw0);
|
||||
s.tile_ref = raw0;
|
||||
s.half_width = uintBitsToFloat(raw1);
|
||||
s.rgba_color = raw2;
|
||||
return s;
|
||||
|
@ -228,7 +228,7 @@ CmdStroke CmdStroke_read(CmdStrokeRef ref) {
|
|||
|
||||
void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = s.seg_ref.offset;
|
||||
ptcl[ix + 0] = s.tile_ref;
|
||||
ptcl[ix + 1] = floatBitsToUint(s.half_width);
|
||||
ptcl[ix + 2] = s.rgba_color;
|
||||
}
|
||||
|
|
|
@ -183,9 +183,9 @@ impl<D: Device> Renderer<D> {
|
|||
device.write_buffer(&scene_buf, &scene)?;
|
||||
|
||||
let state_buf = device.create_buffer(1 * 1024 * 1024, dev)?;
|
||||
let anno_buf = device.create_buffer(64 * 1024 * 1024, host)?;
|
||||
let pathseg_buf = device.create_buffer(64 * 1024 * 1024, host)?;
|
||||
let tile_buf = device.create_buffer(64 * 1024 * 1024, host)?;
|
||||
let anno_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
||||
let pathseg_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
||||
let tile_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
||||
let bin_buf = device.create_buffer(64 * 1024 * 1024, dev)?;
|
||||
let ptcl_buf = device.create_buffer(48 * 1024 * 1024, dev)?;
|
||||
let image_dev = device.create_image2d(WIDTH as u32, HEIGHT as u32, dev)?;
|
||||
|
@ -228,10 +228,10 @@ impl<D: Device> Renderer<D> {
|
|||
let bin_alloc_buf_dev = device.create_buffer(12, dev)?;
|
||||
|
||||
// TODO: constants
|
||||
let bin_alloc_start = ((n_elements + 255) & !255) * 8;
|
||||
let bin_alloc_start = ((n_paths + 255) & !255) * 8;
|
||||
device.write_buffer(
|
||||
&bin_alloc_buf_host,
|
||||
&[n_elements as u32, 0, bin_alloc_start as u32],
|
||||
&[n_paths as u32, 0, bin_alloc_start as u32],
|
||||
)?;
|
||||
let bin_code = include_bytes!("../shader/binning.spv");
|
||||
let bin_pipeline = device.create_simple_compute_pipeline(bin_code, 4, 0)?;
|
||||
|
@ -250,16 +250,20 @@ impl<D: Device> Renderer<D> {
|
|||
&[n_elements as u32, coarse_alloc_start as u32],
|
||||
)?;
|
||||
let coarse_code = include_bytes!("../shader/coarse.spv");
|
||||
let coarse_pipeline = device.create_simple_compute_pipeline(coarse_code, 4, 0)?;
|
||||
let coarse_pipeline = device.create_simple_compute_pipeline(coarse_code, 5, 0)?;
|
||||
let coarse_ds = device.create_descriptor_set(
|
||||
&coarse_pipeline,
|
||||
&[&anno_buf, &bin_buf, &coarse_alloc_buf_dev, &ptcl_buf],
|
||||
&[&anno_buf, &bin_buf, &tile_buf, &coarse_alloc_buf_dev, &ptcl_buf],
|
||||
&[],
|
||||
)?;
|
||||
|
||||
let k4_code = include_bytes!("../shader/kernel4.spv");
|
||||
let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 1, 1)?;
|
||||
let k4_ds = device.create_descriptor_set(&k4_pipeline, &[&ptcl_buf], &[&image_dev])?;
|
||||
let k4_pipeline = device.create_simple_compute_pipeline(k4_code, 2, 1)?;
|
||||
let k4_ds = device.create_descriptor_set(
|
||||
&k4_pipeline,
|
||||
&[&ptcl_buf, &tile_buf],
|
||||
&[&image_dev]
|
||||
)?;
|
||||
|
||||
Ok(Renderer {
|
||||
scene_buf,
|
||||
|
@ -328,32 +332,31 @@ impl<D: Device> Renderer<D> {
|
|||
&self.path_ds,
|
||||
(((self.n_pathseg + 31) / 32) as u32, 1, 1),
|
||||
);
|
||||
/*
|
||||
cmd_buf.write_timestamp(&query_pool, 3);
|
||||
// Note: this barrier is not needed as an actual dependency between
|
||||
// pipeline stages, but I am keeping it in so that timer queries are
|
||||
// easier to interpret.
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.dispatch(
|
||||
&self.bin_pipeline,
|
||||
&self.bin_ds,
|
||||
(((self.n_elements + 255) / 256) as u32, 1, 1),
|
||||
(((self.n_paths + 255) / 256) as u32, 1, 1),
|
||||
);
|
||||
*/
|
||||
cmd_buf.write_timestamp(&query_pool, 3);
|
||||
cmd_buf.write_timestamp(&query_pool, 4);
|
||||
cmd_buf.memory_barrier();
|
||||
/*
|
||||
cmd_buf.dispatch(
|
||||
&self.coarse_pipeline,
|
||||
&self.coarse_ds,
|
||||
(WIDTH as u32 / 256, HEIGHT as u32 / 256, 1),
|
||||
);
|
||||
*/
|
||||
cmd_buf.write_timestamp(&query_pool, 4);
|
||||
cmd_buf.write_timestamp(&query_pool, 5);
|
||||
cmd_buf.memory_barrier();
|
||||
/*
|
||||
cmd_buf.dispatch(
|
||||
&self.k4_pipeline,
|
||||
&self.k4_ds,
|
||||
((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
|
||||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 5);
|
||||
*/
|
||||
cmd_buf.write_timestamp(&query_pool, 6);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc);
|
||||
}
|
||||
|
|
|
@ -49,8 +49,8 @@ impl PicoSvg {
|
|||
for item in &self.items {
|
||||
match item {
|
||||
Item::Fill(fill_item) => {
|
||||
rc.fill(&fill_item.path, &fill_item.color);
|
||||
//rc.stroke(&fill_item.path, &fill_item.color, 1.0);
|
||||
//rc.fill(&fill_item.path, &fill_item.color);
|
||||
rc.stroke(&fill_item.path, &fill_item.color, 1.0);
|
||||
}
|
||||
Item::Stroke(stroke_item) => {
|
||||
rc.stroke(&stroke_item.path, &stroke_item.color, stroke_item.width);
|
||||
|
|
Loading…
Reference in a new issue