mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-09 20:31:29 +11:00
Add first draft of kernel 3
A fairly simple approach, but it adds the translation (not tested yet in scene encoding) and does bounding box culling.
This commit is contained in:
parent
2ed89dd65e
commit
6976f877e0
|
@ -4,8 +4,9 @@ piet_gpu! {
|
|||
#[gpu_write]
|
||||
mod ptcl {
|
||||
struct CmdCircle {
|
||||
// In existing code, this is packed; we might need an annotation for this.
|
||||
bbox: [u16; 4],
|
||||
center: [f32; 2],
|
||||
radius: f32,
|
||||
rgba_color: u32,
|
||||
}
|
||||
struct CmdLine {
|
||||
start: [f32; 2],
|
||||
|
|
|
@ -10,3 +10,5 @@ rule glsl
|
|||
build image.spv: glsl image.comp | scene.h
|
||||
|
||||
build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h
|
||||
|
||||
build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h ptcl.h
|
||||
|
|
|
@ -40,7 +40,7 @@ void main() {
|
|||
if (tag == PietItem_Circle) {
|
||||
PietCircle circle = PietItem_Circle_read(item_ref);
|
||||
float r = length(xy + vec2(0.5, 0.5) - circle.center.xy);
|
||||
float alpha = clamp(circle.radius - r, 0.0, 1.0);
|
||||
float alpha = clamp(0.5 + circle.radius - r, 0.0, 1.0);
|
||||
vec4 fg_rgba = unpackUnorm4x8(circle.rgba_color);
|
||||
// TODO: sRGB
|
||||
rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a);
|
||||
|
|
Binary file not shown.
|
@ -1,3 +1,16 @@
|
|||
// This is "kernel 1" in a 4-kernel pipeline. It traverses the scene graph
|
||||
// and outputs "instances" (references to item + translation) for each item
|
||||
// that intersects the tilegroup.
|
||||
//
|
||||
// This implementation is simplistic and leaves a lot of performance on the
|
||||
// table. A fancier implementation would use threadgroup shared memory or
|
||||
// subgroups (or possibly both) to parallelize the reading of the input and
|
||||
// the computation of tilegroup intersection.
|
||||
//
|
||||
// In addition, there are some features currently missing. One is the use of
|
||||
// a bump allocator to extend the current fixed allocation. Another is support
|
||||
// for clipping.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
|
@ -18,10 +31,10 @@ layout(set = 0, binding = 1) buffer TilegroupBuf {
|
|||
// TODO: compute this
|
||||
#define WIDTH_IN_TILEGROUPS 4
|
||||
|
||||
#define TILEGROUP_WIDTH 512
|
||||
#define TILEGROUP_HEIGHT 16
|
||||
#define TILEGROUP_WIDTH_PX 512
|
||||
#define TILEGROUP_HEIGHT_PX 16
|
||||
|
||||
#define INITIAL_ALLOC 1024
|
||||
#define TILEGROUP_INITIAL_ALLOC 1024
|
||||
|
||||
#define MAX_STACK 8
|
||||
|
||||
|
@ -35,8 +48,8 @@ void main() {
|
|||
StackElement stack[MAX_STACK];
|
||||
uint stack_ix = 0;
|
||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x;
|
||||
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * INITIAL_ALLOC);
|
||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH, TILEGROUP_HEIGHT);
|
||||
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC);
|
||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX);
|
||||
PietItemRef root = PietItemRef(0);
|
||||
SimpleGroup group = PietItem_Group_read(root);
|
||||
StackElement tos = StackElement(root, 0, group.offset.xy);
|
||||
|
@ -45,8 +58,8 @@ void main() {
|
|||
if (tos.index < group.n_items) {
|
||||
Bbox bbox = Bbox_read(Bbox_index(group.bboxes, tos.index));
|
||||
vec4 bb = vec4(bbox.bbox) + tos.offset.xyxy;
|
||||
bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH))
|
||||
&& max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT));
|
||||
bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH_PX))
|
||||
&& max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT_PX));
|
||||
bool is_group = false;
|
||||
if (hit) {
|
||||
PietItemRef item_ref = PietItem_index(group.items, tos.index);
|
||||
|
|
72
piet-gpu/shader/kernel3.comp
Normal file
72
piet-gpu/shader/kernel3.comp
Normal file
|
@ -0,0 +1,72 @@
|
|||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
layout(local_size_x = 32, local_size_y = 1) in;
|
||||
|
||||
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
||||
uint[] scene;
|
||||
};
|
||||
|
||||
// TODO: this should have a `readonly` qualifier, but then inclusion
|
||||
// of ptcl.h would fail because of the writers.
|
||||
layout(set = 0, binding = 1) buffer TilegroupBuf {
|
||||
uint[] tilegroup;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 2) buffer PtclBuf {
|
||||
uint[] ptcl;
|
||||
};
|
||||
|
||||
#include "scene.h"
|
||||
#include "tilegroup.h"
|
||||
#include "ptcl.h"
|
||||
|
||||
// TODO: compute all these
|
||||
|
||||
#define WIDTH_IN_TILEGROUPS 4
|
||||
#define WIDTH_IN_TILES 128
|
||||
#define TILEGROUP_WIDTH_TILES 32
|
||||
#define TILE_WIDTH_PX 16
|
||||
#define TILE_HEIGHT_PX 16
|
||||
|
||||
// Must be the same as kernel1. Might be a good idea to move these particular
|
||||
// constants to their own .h file.
|
||||
#define TILEGROUP_INITIAL_ALLOC 1024
|
||||
|
||||
#define PTCL_INITIAL_ALLOC 4096
|
||||
|
||||
void main() {
|
||||
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
|
||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
|
||||
+ (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
|
||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
|
||||
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC);
|
||||
CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC);
|
||||
|
||||
while (true) {
|
||||
uint tg_tag = TileGroup_tag(tg_ref);
|
||||
if (tg_tag == TileGroup_End) {
|
||||
break;
|
||||
}
|
||||
// Assume tg_tag is `Instance`, though there will be more cases.
|
||||
Instance ins = TileGroup_Instance_read(tg_ref);
|
||||
PietItemRef item_ref = PietItemRef(ins.item_ref);
|
||||
uint item_tag = PietItem_tag(item_ref);
|
||||
switch (item_tag) {
|
||||
case PietItem_Circle:
|
||||
PietCircle circle = PietItem_Circle_read(item_ref);
|
||||
vec2 center = ins.offset + circle.center.xy;
|
||||
float r = circle.radius;
|
||||
if (max(center.x - r, xy0.x) < min(center.x + r, xy0.x + float(TILE_WIDTH_PX))
|
||||
&& max(center.y - r, xy0.y) < min(center.y + r, xy0.y + float(TILE_HEIGHT_PX)))
|
||||
{
|
||||
CmdCircle cmd = CmdCircle(center, r, circle.rgba_color);
|
||||
Cmd_Circle_write(cmd_ref, cmd);
|
||||
cmd_ref.offset += Cmd_size;
|
||||
}
|
||||
break;
|
||||
}
|
||||
tg_ref.offset += TileGroup_size;
|
||||
}
|
||||
Cmd_End_write(cmd_ref);
|
||||
}
|
BIN
piet-gpu/shader/kernel3.spv
Normal file
BIN
piet-gpu/shader/kernel3.spv
Normal file
Binary file not shown.
323
piet-gpu/shader/ptcl.h
Normal file
323
piet-gpu/shader/ptcl.h
Normal file
|
@ -0,0 +1,323 @@
|
|||
// Code auto-generated by piet-gpu-derive
|
||||
|
||||
struct CmdCircleRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdLineRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdStrokeRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdFillRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdFillEdgeRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdDrawFillRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdSolidRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdCircle {
|
||||
vec2 center;
|
||||
float radius;
|
||||
uint rgba_color;
|
||||
};
|
||||
|
||||
#define CmdCircle_size 16
|
||||
|
||||
CmdCircleRef CmdCircle_index(CmdCircleRef ref, uint index) {
|
||||
return CmdCircleRef(ref.offset + index * CmdCircle_size);
|
||||
}
|
||||
|
||||
struct CmdLine {
|
||||
vec2 start;
|
||||
vec2 end;
|
||||
};
|
||||
|
||||
#define CmdLine_size 16
|
||||
|
||||
CmdLineRef CmdLine_index(CmdLineRef ref, uint index) {
|
||||
return CmdLineRef(ref.offset + index * CmdLine_size);
|
||||
}
|
||||
|
||||
struct CmdStroke {
|
||||
float halfWidth;
|
||||
uint rgba_color;
|
||||
};
|
||||
|
||||
#define CmdStroke_size 8
|
||||
|
||||
CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
|
||||
return CmdStrokeRef(ref.offset + index * CmdStroke_size);
|
||||
}
|
||||
|
||||
struct CmdFill {
|
||||
vec2 start;
|
||||
vec2 end;
|
||||
};
|
||||
|
||||
#define CmdFill_size 16
|
||||
|
||||
CmdFillRef CmdFill_index(CmdFillRef ref, uint index) {
|
||||
return CmdFillRef(ref.offset + index * CmdFill_size);
|
||||
}
|
||||
|
||||
struct CmdFillEdge {
|
||||
int sign;
|
||||
float y;
|
||||
};
|
||||
|
||||
#define CmdFillEdge_size 8
|
||||
|
||||
CmdFillEdgeRef CmdFillEdge_index(CmdFillEdgeRef ref, uint index) {
|
||||
return CmdFillEdgeRef(ref.offset + index * CmdFillEdge_size);
|
||||
}
|
||||
|
||||
struct CmdDrawFill {
|
||||
int backdrop;
|
||||
uint rgba_color;
|
||||
};
|
||||
|
||||
#define CmdDrawFill_size 8
|
||||
|
||||
CmdDrawFillRef CmdDrawFill_index(CmdDrawFillRef ref, uint index) {
|
||||
return CmdDrawFillRef(ref.offset + index * CmdDrawFill_size);
|
||||
}
|
||||
|
||||
struct CmdSolid {
|
||||
uint rgba_color;
|
||||
};
|
||||
|
||||
#define CmdSolid_size 4
|
||||
|
||||
CmdSolidRef CmdSolid_index(CmdSolidRef ref, uint index) {
|
||||
return CmdSolidRef(ref.offset + index * CmdSolid_size);
|
||||
}
|
||||
|
||||
#define Cmd_End 0
|
||||
#define Cmd_Circle 1
|
||||
#define Cmd_Line 2
|
||||
#define Cmd_Fill 3
|
||||
#define Cmd_Stroke 4
|
||||
#define Cmd_FillEdge 5
|
||||
#define Cmd_DrawFill 6
|
||||
#define Cmd_Solid 7
|
||||
#define Cmd_Bail 8
|
||||
#define Cmd_size 20
|
||||
|
||||
CmdRef Cmd_index(CmdRef ref, uint index) {
|
||||
return CmdRef(ref.offset + index * Cmd_size);
|
||||
}
|
||||
|
||||
CmdCircle CmdCircle_read(CmdCircleRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = ptcl[ix + 0];
|
||||
uint raw1 = ptcl[ix + 1];
|
||||
uint raw2 = ptcl[ix + 2];
|
||||
uint raw3 = ptcl[ix + 3];
|
||||
CmdCircle s;
|
||||
s.center = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.radius = uintBitsToFloat(raw2);
|
||||
s.rgba_color = raw3;
|
||||
return s;
|
||||
}
|
||||
|
||||
void CmdCircle_write(CmdCircleRef ref, CmdCircle s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = floatBitsToUint(s.center.x);
|
||||
ptcl[ix + 1] = floatBitsToUint(s.center.y);
|
||||
ptcl[ix + 2] = floatBitsToUint(s.radius);
|
||||
ptcl[ix + 3] = s.rgba_color;
|
||||
}
|
||||
|
||||
CmdLine CmdLine_read(CmdLineRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = ptcl[ix + 0];
|
||||
uint raw1 = ptcl[ix + 1];
|
||||
uint raw2 = ptcl[ix + 2];
|
||||
uint raw3 = ptcl[ix + 3];
|
||||
CmdLine s;
|
||||
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
return s;
|
||||
}
|
||||
|
||||
void CmdLine_write(CmdLineRef ref, CmdLine s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = floatBitsToUint(s.start.x);
|
||||
ptcl[ix + 1] = floatBitsToUint(s.start.y);
|
||||
ptcl[ix + 2] = floatBitsToUint(s.end.x);
|
||||
ptcl[ix + 3] = floatBitsToUint(s.end.y);
|
||||
}
|
||||
|
||||
CmdStroke CmdStroke_read(CmdStrokeRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = ptcl[ix + 0];
|
||||
uint raw1 = ptcl[ix + 1];
|
||||
CmdStroke s;
|
||||
s.halfWidth = uintBitsToFloat(raw0);
|
||||
s.rgba_color = raw1;
|
||||
return s;
|
||||
}
|
||||
|
||||
void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = floatBitsToUint(s.halfWidth);
|
||||
ptcl[ix + 1] = s.rgba_color;
|
||||
}
|
||||
|
||||
CmdFill CmdFill_read(CmdFillRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = ptcl[ix + 0];
|
||||
uint raw1 = ptcl[ix + 1];
|
||||
uint raw2 = ptcl[ix + 2];
|
||||
uint raw3 = ptcl[ix + 3];
|
||||
CmdFill s;
|
||||
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
return s;
|
||||
}
|
||||
|
||||
void CmdFill_write(CmdFillRef ref, CmdFill s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = floatBitsToUint(s.start.x);
|
||||
ptcl[ix + 1] = floatBitsToUint(s.start.y);
|
||||
ptcl[ix + 2] = floatBitsToUint(s.end.x);
|
||||
ptcl[ix + 3] = floatBitsToUint(s.end.y);
|
||||
}
|
||||
|
||||
CmdFillEdge CmdFillEdge_read(CmdFillEdgeRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = ptcl[ix + 0];
|
||||
uint raw1 = ptcl[ix + 1];
|
||||
CmdFillEdge s;
|
||||
s.sign = int(raw0);
|
||||
s.y = uintBitsToFloat(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void CmdFillEdge_write(CmdFillEdgeRef ref, CmdFillEdge s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = uint(s.sign);
|
||||
ptcl[ix + 1] = floatBitsToUint(s.y);
|
||||
}
|
||||
|
||||
CmdDrawFill CmdDrawFill_read(CmdDrawFillRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = ptcl[ix + 0];
|
||||
uint raw1 = ptcl[ix + 1];
|
||||
CmdDrawFill s;
|
||||
s.backdrop = int(raw0);
|
||||
s.rgba_color = raw1;
|
||||
return s;
|
||||
}
|
||||
|
||||
void CmdDrawFill_write(CmdDrawFillRef ref, CmdDrawFill s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = uint(s.backdrop);
|
||||
ptcl[ix + 1] = s.rgba_color;
|
||||
}
|
||||
|
||||
CmdSolid CmdSolid_read(CmdSolidRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = ptcl[ix + 0];
|
||||
CmdSolid s;
|
||||
s.rgba_color = raw0;
|
||||
return s;
|
||||
}
|
||||
|
||||
void CmdSolid_write(CmdSolidRef ref, CmdSolid s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
ptcl[ix + 0] = s.rgba_color;
|
||||
}
|
||||
|
||||
uint Cmd_tag(CmdRef ref) {
|
||||
return ptcl[ref.offset >> 2];
|
||||
}
|
||||
|
||||
CmdCircle Cmd_Circle_read(CmdRef ref) {
|
||||
return CmdCircle_read(CmdCircleRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
CmdLine Cmd_Line_read(CmdRef ref) {
|
||||
return CmdLine_read(CmdLineRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
CmdFill Cmd_Fill_read(CmdRef ref) {
|
||||
return CmdFill_read(CmdFillRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
CmdStroke Cmd_Stroke_read(CmdRef ref) {
|
||||
return CmdStroke_read(CmdStrokeRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
CmdFillEdge Cmd_FillEdge_read(CmdRef ref) {
|
||||
return CmdFillEdge_read(CmdFillEdgeRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
CmdDrawFill Cmd_DrawFill_read(CmdRef ref) {
|
||||
return CmdDrawFill_read(CmdDrawFillRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
CmdSolid Cmd_Solid_read(CmdRef ref) {
|
||||
return CmdSolid_read(CmdSolidRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
void Cmd_End_write(CmdRef ref) {
|
||||
ptcl[ref.offset >> 2] = Cmd_End;
|
||||
}
|
||||
|
||||
void Cmd_Circle_write(CmdRef ref, CmdCircle s) {
|
||||
ptcl[ref.offset >> 2] = Cmd_Circle;
|
||||
CmdCircle_write(CmdCircleRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Cmd_Line_write(CmdRef ref, CmdLine s) {
|
||||
ptcl[ref.offset >> 2] = Cmd_Line;
|
||||
CmdLine_write(CmdLineRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Cmd_Fill_write(CmdRef ref, CmdFill s) {
|
||||
ptcl[ref.offset >> 2] = Cmd_Fill;
|
||||
CmdFill_write(CmdFillRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Cmd_Stroke_write(CmdRef ref, CmdStroke s) {
|
||||
ptcl[ref.offset >> 2] = Cmd_Stroke;
|
||||
CmdStroke_write(CmdStrokeRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Cmd_FillEdge_write(CmdRef ref, CmdFillEdge s) {
|
||||
ptcl[ref.offset >> 2] = Cmd_FillEdge;
|
||||
CmdFillEdge_write(CmdFillEdgeRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Cmd_DrawFill_write(CmdRef ref, CmdDrawFill s) {
|
||||
ptcl[ref.offset >> 2] = Cmd_DrawFill;
|
||||
CmdDrawFill_write(CmdDrawFillRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Cmd_Solid_write(CmdRef ref, CmdSolid s) {
|
||||
ptcl[ref.offset >> 2] = Cmd_Solid;
|
||||
CmdSolid_write(CmdSolidRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Cmd_Bail_write(CmdRef ref) {
|
||||
ptcl[ref.offset >> 2] = Cmd_Bail;
|
||||
}
|
||||
|
|
@ -73,6 +73,7 @@ fn dump_scene(buf: &[u8]) {
|
|||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
fn dump_k1_data(k1_buf: &[u32]) {
|
||||
for i in 0..k1_buf.len() {
|
||||
if k1_buf[i] != 0 {
|
||||
|
@ -96,7 +97,9 @@ fn main() {
|
|||
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev)
|
||||
.unwrap();
|
||||
device.write_buffer(&scene_buf, &scene).unwrap();
|
||||
// These should only be on the host if we're going to examine them from Rust.
|
||||
let tilegroup_buf = device.create_buffer(384 * 1024, host).unwrap();
|
||||
let ptcl_buf = device.create_buffer(12 * 1024 * 4096, host).unwrap();
|
||||
let image_buf = device
|
||||
.create_buffer((WIDTH * HEIGHT * 4) as u64, host)
|
||||
.unwrap();
|
||||
|
@ -110,16 +113,23 @@ fn main() {
|
|||
.create_descriptor_set(&k1_pipeline, &[&scene_dev, &tilegroup_buf])
|
||||
.unwrap();
|
||||
|
||||
let k3_code = include_bytes!("../shader/kernel3.spv");
|
||||
let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 3).unwrap();
|
||||
let k3_ds = device
|
||||
.create_descriptor_set(&k3_pipeline, &[&scene_dev, &tilegroup_buf, &ptcl_buf])
|
||||
.unwrap();
|
||||
|
||||
let code = include_bytes!("../shader/image.spv");
|
||||
let pipeline = device.create_simple_compute_pipeline(code, 2).unwrap();
|
||||
let descriptor_set = device
|
||||
.create_descriptor_set(&pipeline, &[&scene_dev, &image_dev])
|
||||
.unwrap();
|
||||
let query_pool = device.create_query_pool(3).unwrap();
|
||||
let query_pool = device.create_query_pool(4).unwrap();
|
||||
let mut cmd_buf = device.create_cmd_buf().unwrap();
|
||||
cmd_buf.begin();
|
||||
cmd_buf.copy_buffer(&scene_buf, &scene_dev);
|
||||
cmd_buf.clear_buffer(&tilegroup_buf);
|
||||
cmd_buf.clear_buffer(&ptcl_buf);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.write_timestamp(&query_pool, 0);
|
||||
cmd_buf.dispatch(
|
||||
|
@ -129,22 +139,36 @@ fn main() {
|
|||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 1);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.dispatch(
|
||||
&k3_pipeline,
|
||||
&k3_ds,
|
||||
((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1),
|
||||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 2);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.dispatch(
|
||||
&pipeline,
|
||||
&descriptor_set,
|
||||
((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
|
||||
);
|
||||
cmd_buf.write_timestamp(&query_pool, 2);
|
||||
cmd_buf.write_timestamp(&query_pool, 3);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.copy_buffer(&image_dev, &image_buf);
|
||||
cmd_buf.finish();
|
||||
device.run_cmd_buf(&cmd_buf).unwrap();
|
||||
let timestamps = device.reap_query_pool(query_pool).unwrap();
|
||||
println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3);
|
||||
println!("Render time: {:.3}ms", (timestamps[1] - timestamps[0]) * 1e3);
|
||||
println!(
|
||||
"Kernel 3 time: {:.3}ms",
|
||||
(timestamps[1] - timestamps[0]) * 1e3
|
||||
);
|
||||
println!(
|
||||
"Render time: {:.3}ms",
|
||||
(timestamps[2] - timestamps[1]) * 1e3
|
||||
);
|
||||
|
||||
let mut k1_data: Vec<u32> = Default::default();
|
||||
device.read_buffer(&tilegroup_buf, &mut k1_data).unwrap();
|
||||
device.read_buffer(&ptcl_buf, &mut k1_data).unwrap();
|
||||
dump_k1_data(&k1_data);
|
||||
|
||||
let mut img_data: Vec<u8> = Default::default();
|
||||
|
|
Loading…
Reference in a new issue