mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Add first draft of kernel 3
A fairly simple approach, but it adds the translation (not tested yet in scene encoding) and does bounding box culling.
This commit is contained in:
parent
2ed89dd65e
commit
6976f877e0
|
@ -4,8 +4,9 @@ piet_gpu! {
|
||||||
#[gpu_write]
|
#[gpu_write]
|
||||||
mod ptcl {
|
mod ptcl {
|
||||||
struct CmdCircle {
|
struct CmdCircle {
|
||||||
// In existing code, this is packed; we might need an annotation for this.
|
center: [f32; 2],
|
||||||
bbox: [u16; 4],
|
radius: f32,
|
||||||
|
rgba_color: u32,
|
||||||
}
|
}
|
||||||
struct CmdLine {
|
struct CmdLine {
|
||||||
start: [f32; 2],
|
start: [f32; 2],
|
||||||
|
|
|
@ -10,3 +10,5 @@ rule glsl
|
||||||
build image.spv: glsl image.comp | scene.h
|
build image.spv: glsl image.comp | scene.h
|
||||||
|
|
||||||
build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h
|
build kernel1.spv: glsl kernel1.comp | scene.h tilegroup.h
|
||||||
|
|
||||||
|
build kernel3.spv: glsl kernel3.comp | scene.h tilegroup.h ptcl.h
|
||||||
|
|
|
@ -40,7 +40,7 @@ void main() {
|
||||||
if (tag == PietItem_Circle) {
|
if (tag == PietItem_Circle) {
|
||||||
PietCircle circle = PietItem_Circle_read(item_ref);
|
PietCircle circle = PietItem_Circle_read(item_ref);
|
||||||
float r = length(xy + vec2(0.5, 0.5) - circle.center.xy);
|
float r = length(xy + vec2(0.5, 0.5) - circle.center.xy);
|
||||||
float alpha = clamp(circle.radius - r, 0.0, 1.0);
|
float alpha = clamp(0.5 + circle.radius - r, 0.0, 1.0);
|
||||||
vec4 fg_rgba = unpackUnorm4x8(circle.rgba_color);
|
vec4 fg_rgba = unpackUnorm4x8(circle.rgba_color);
|
||||||
// TODO: sRGB
|
// TODO: sRGB
|
||||||
rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a);
|
rgb = mix(rgb, fg_rgba.rgb, alpha * fg_rgba.a);
|
||||||
|
|
Binary file not shown.
|
@ -1,3 +1,16 @@
|
||||||
|
// This is "kernel 1" in a 4-kernel pipeline. It traverses the scene graph
|
||||||
|
// and outputs "instances" (references to item + translation) for each item
|
||||||
|
// that intersects the tilegroup.
|
||||||
|
//
|
||||||
|
// This implementation is simplistic and leaves a lot of performance on the
|
||||||
|
// table. A fancier implementation would use threadgroup shared memory or
|
||||||
|
// subgroups (or possibly both) to parallelize the reading of the input and
|
||||||
|
// the computation of tilegroup intersection.
|
||||||
|
//
|
||||||
|
// In addition, there are some features currently missing. One is the use of
|
||||||
|
// a bump allocator to extend the current fixed allocation. Another is support
|
||||||
|
// for clipping.
|
||||||
|
|
||||||
#version 450
|
#version 450
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
@ -18,10 +31,10 @@ layout(set = 0, binding = 1) buffer TilegroupBuf {
|
||||||
// TODO: compute this
|
// TODO: compute this
|
||||||
#define WIDTH_IN_TILEGROUPS 4
|
#define WIDTH_IN_TILEGROUPS 4
|
||||||
|
|
||||||
#define TILEGROUP_WIDTH 512
|
#define TILEGROUP_WIDTH_PX 512
|
||||||
#define TILEGROUP_HEIGHT 16
|
#define TILEGROUP_HEIGHT_PX 16
|
||||||
|
|
||||||
#define INITIAL_ALLOC 1024
|
#define TILEGROUP_INITIAL_ALLOC 1024
|
||||||
|
|
||||||
#define MAX_STACK 8
|
#define MAX_STACK 8
|
||||||
|
|
||||||
|
@ -35,8 +48,8 @@ void main() {
|
||||||
StackElement stack[MAX_STACK];
|
StackElement stack[MAX_STACK];
|
||||||
uint stack_ix = 0;
|
uint stack_ix = 0;
|
||||||
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x;
|
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS + gl_GlobalInvocationID.x;
|
||||||
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * INITIAL_ALLOC);
|
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC);
|
||||||
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH, TILEGROUP_HEIGHT);
|
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILEGROUP_WIDTH_PX, TILEGROUP_HEIGHT_PX);
|
||||||
PietItemRef root = PietItemRef(0);
|
PietItemRef root = PietItemRef(0);
|
||||||
SimpleGroup group = PietItem_Group_read(root);
|
SimpleGroup group = PietItem_Group_read(root);
|
||||||
StackElement tos = StackElement(root, 0, group.offset.xy);
|
StackElement tos = StackElement(root, 0, group.offset.xy);
|
||||||
|
@ -45,8 +58,8 @@ void main() {
|
||||||
if (tos.index < group.n_items) {
|
if (tos.index < group.n_items) {
|
||||||
Bbox bbox = Bbox_read(Bbox_index(group.bboxes, tos.index));
|
Bbox bbox = Bbox_read(Bbox_index(group.bboxes, tos.index));
|
||||||
vec4 bb = vec4(bbox.bbox) + tos.offset.xyxy;
|
vec4 bb = vec4(bbox.bbox) + tos.offset.xyxy;
|
||||||
bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH))
|
bool hit = max(bb.x, xy0.x) < min(bb.z, xy0.x + float(TILEGROUP_WIDTH_PX))
|
||||||
&& max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT));
|
&& max(bb.y, xy0.y) < min(bb.w, xy0.y + float(TILEGROUP_HEIGHT_PX));
|
||||||
bool is_group = false;
|
bool is_group = false;
|
||||||
if (hit) {
|
if (hit) {
|
||||||
PietItemRef item_ref = PietItem_index(group.items, tos.index);
|
PietItemRef item_ref = PietItem_index(group.items, tos.index);
|
||||||
|
|
72
piet-gpu/shader/kernel3.comp
Normal file
72
piet-gpu/shader/kernel3.comp
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
#version 450
|
||||||
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
layout(local_size_x = 32, local_size_y = 1) in;
|
||||||
|
|
||||||
|
layout(set = 0, binding = 0) readonly buffer SceneBuf {
|
||||||
|
uint[] scene;
|
||||||
|
};
|
||||||
|
|
||||||
|
// TODO: this should have a `readonly` qualifier, but then inclusion
|
||||||
|
// of ptcl.h would fail because of the writers.
|
||||||
|
layout(set = 0, binding = 1) buffer TilegroupBuf {
|
||||||
|
uint[] tilegroup;
|
||||||
|
};
|
||||||
|
|
||||||
|
layout(set = 0, binding = 2) buffer PtclBuf {
|
||||||
|
uint[] ptcl;
|
||||||
|
};
|
||||||
|
|
||||||
|
#include "scene.h"
|
||||||
|
#include "tilegroup.h"
|
||||||
|
#include "ptcl.h"
|
||||||
|
|
||||||
|
// TODO: compute all these
|
||||||
|
|
||||||
|
#define WIDTH_IN_TILEGROUPS 4
|
||||||
|
#define WIDTH_IN_TILES 128
|
||||||
|
#define TILEGROUP_WIDTH_TILES 32
|
||||||
|
#define TILE_WIDTH_PX 16
|
||||||
|
#define TILE_HEIGHT_PX 16
|
||||||
|
|
||||||
|
// Must be the same as kernel1. Might be a good idea to move these particular
|
||||||
|
// constants to their own .h file.
|
||||||
|
#define TILEGROUP_INITIAL_ALLOC 1024
|
||||||
|
|
||||||
|
#define PTCL_INITIAL_ALLOC 4096
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
uint tile_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILES + gl_GlobalInvocationID.x;
|
||||||
|
uint tilegroup_ix = gl_GlobalInvocationID.y * WIDTH_IN_TILEGROUPS
|
||||||
|
+ (gl_GlobalInvocationID.x / TILEGROUP_WIDTH_TILES);
|
||||||
|
vec2 xy0 = vec2(gl_GlobalInvocationID.xy) * vec2(TILE_WIDTH_PX, TILE_HEIGHT_PX);
|
||||||
|
TileGroupRef tg_ref = TileGroupRef(tilegroup_ix * TILEGROUP_INITIAL_ALLOC);
|
||||||
|
CmdRef cmd_ref = CmdRef(tile_ix * PTCL_INITIAL_ALLOC);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
uint tg_tag = TileGroup_tag(tg_ref);
|
||||||
|
if (tg_tag == TileGroup_End) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Assume tg_tag is `Instance`, though there will be more cases.
|
||||||
|
Instance ins = TileGroup_Instance_read(tg_ref);
|
||||||
|
PietItemRef item_ref = PietItemRef(ins.item_ref);
|
||||||
|
uint item_tag = PietItem_tag(item_ref);
|
||||||
|
switch (item_tag) {
|
||||||
|
case PietItem_Circle:
|
||||||
|
PietCircle circle = PietItem_Circle_read(item_ref);
|
||||||
|
vec2 center = ins.offset + circle.center.xy;
|
||||||
|
float r = circle.radius;
|
||||||
|
if (max(center.x - r, xy0.x) < min(center.x + r, xy0.x + float(TILE_WIDTH_PX))
|
||||||
|
&& max(center.y - r, xy0.y) < min(center.y + r, xy0.y + float(TILE_HEIGHT_PX)))
|
||||||
|
{
|
||||||
|
CmdCircle cmd = CmdCircle(center, r, circle.rgba_color);
|
||||||
|
Cmd_Circle_write(cmd_ref, cmd);
|
||||||
|
cmd_ref.offset += Cmd_size;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
tg_ref.offset += TileGroup_size;
|
||||||
|
}
|
||||||
|
Cmd_End_write(cmd_ref);
|
||||||
|
}
|
BIN
piet-gpu/shader/kernel3.spv
Normal file
BIN
piet-gpu/shader/kernel3.spv
Normal file
Binary file not shown.
323
piet-gpu/shader/ptcl.h
Normal file
323
piet-gpu/shader/ptcl.h
Normal file
|
@ -0,0 +1,323 @@
|
||||||
|
// Code auto-generated by piet-gpu-derive
|
||||||
|
|
||||||
|
struct CmdCircleRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CmdLineRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CmdStrokeRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CmdFillRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CmdFillEdgeRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CmdDrawFillRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CmdSolidRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CmdRef {
|
||||||
|
uint offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CmdCircle {
|
||||||
|
vec2 center;
|
||||||
|
float radius;
|
||||||
|
uint rgba_color;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define CmdCircle_size 16
|
||||||
|
|
||||||
|
CmdCircleRef CmdCircle_index(CmdCircleRef ref, uint index) {
|
||||||
|
return CmdCircleRef(ref.offset + index * CmdCircle_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CmdLine {
|
||||||
|
vec2 start;
|
||||||
|
vec2 end;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define CmdLine_size 16
|
||||||
|
|
||||||
|
CmdLineRef CmdLine_index(CmdLineRef ref, uint index) {
|
||||||
|
return CmdLineRef(ref.offset + index * CmdLine_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CmdStroke {
|
||||||
|
float halfWidth;
|
||||||
|
uint rgba_color;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define CmdStroke_size 8
|
||||||
|
|
||||||
|
CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
|
||||||
|
return CmdStrokeRef(ref.offset + index * CmdStroke_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CmdFill {
|
||||||
|
vec2 start;
|
||||||
|
vec2 end;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define CmdFill_size 16
|
||||||
|
|
||||||
|
CmdFillRef CmdFill_index(CmdFillRef ref, uint index) {
|
||||||
|
return CmdFillRef(ref.offset + index * CmdFill_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CmdFillEdge {
|
||||||
|
int sign;
|
||||||
|
float y;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define CmdFillEdge_size 8
|
||||||
|
|
||||||
|
CmdFillEdgeRef CmdFillEdge_index(CmdFillEdgeRef ref, uint index) {
|
||||||
|
return CmdFillEdgeRef(ref.offset + index * CmdFillEdge_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CmdDrawFill {
|
||||||
|
int backdrop;
|
||||||
|
uint rgba_color;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define CmdDrawFill_size 8
|
||||||
|
|
||||||
|
CmdDrawFillRef CmdDrawFill_index(CmdDrawFillRef ref, uint index) {
|
||||||
|
return CmdDrawFillRef(ref.offset + index * CmdDrawFill_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CmdSolid {
|
||||||
|
uint rgba_color;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define CmdSolid_size 4
|
||||||
|
|
||||||
|
CmdSolidRef CmdSolid_index(CmdSolidRef ref, uint index) {
|
||||||
|
return CmdSolidRef(ref.offset + index * CmdSolid_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define Cmd_End 0
|
||||||
|
#define Cmd_Circle 1
|
||||||
|
#define Cmd_Line 2
|
||||||
|
#define Cmd_Fill 3
|
||||||
|
#define Cmd_Stroke 4
|
||||||
|
#define Cmd_FillEdge 5
|
||||||
|
#define Cmd_DrawFill 6
|
||||||
|
#define Cmd_Solid 7
|
||||||
|
#define Cmd_Bail 8
|
||||||
|
#define Cmd_size 20
|
||||||
|
|
||||||
|
CmdRef Cmd_index(CmdRef ref, uint index) {
|
||||||
|
return CmdRef(ref.offset + index * Cmd_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
CmdCircle CmdCircle_read(CmdCircleRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = ptcl[ix + 0];
|
||||||
|
uint raw1 = ptcl[ix + 1];
|
||||||
|
uint raw2 = ptcl[ix + 2];
|
||||||
|
uint raw3 = ptcl[ix + 3];
|
||||||
|
CmdCircle s;
|
||||||
|
s.center = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||||
|
s.radius = uintBitsToFloat(raw2);
|
||||||
|
s.rgba_color = raw3;
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CmdCircle_write(CmdCircleRef ref, CmdCircle s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
ptcl[ix + 0] = floatBitsToUint(s.center.x);
|
||||||
|
ptcl[ix + 1] = floatBitsToUint(s.center.y);
|
||||||
|
ptcl[ix + 2] = floatBitsToUint(s.radius);
|
||||||
|
ptcl[ix + 3] = s.rgba_color;
|
||||||
|
}
|
||||||
|
|
||||||
|
CmdLine CmdLine_read(CmdLineRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = ptcl[ix + 0];
|
||||||
|
uint raw1 = ptcl[ix + 1];
|
||||||
|
uint raw2 = ptcl[ix + 2];
|
||||||
|
uint raw3 = ptcl[ix + 3];
|
||||||
|
CmdLine s;
|
||||||
|
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||||
|
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CmdLine_write(CmdLineRef ref, CmdLine s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
ptcl[ix + 0] = floatBitsToUint(s.start.x);
|
||||||
|
ptcl[ix + 1] = floatBitsToUint(s.start.y);
|
||||||
|
ptcl[ix + 2] = floatBitsToUint(s.end.x);
|
||||||
|
ptcl[ix + 3] = floatBitsToUint(s.end.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
CmdStroke CmdStroke_read(CmdStrokeRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = ptcl[ix + 0];
|
||||||
|
uint raw1 = ptcl[ix + 1];
|
||||||
|
CmdStroke s;
|
||||||
|
s.halfWidth = uintBitsToFloat(raw0);
|
||||||
|
s.rgba_color = raw1;
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CmdStroke_write(CmdStrokeRef ref, CmdStroke s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
ptcl[ix + 0] = floatBitsToUint(s.halfWidth);
|
||||||
|
ptcl[ix + 1] = s.rgba_color;
|
||||||
|
}
|
||||||
|
|
||||||
|
CmdFill CmdFill_read(CmdFillRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = ptcl[ix + 0];
|
||||||
|
uint raw1 = ptcl[ix + 1];
|
||||||
|
uint raw2 = ptcl[ix + 2];
|
||||||
|
uint raw3 = ptcl[ix + 3];
|
||||||
|
CmdFill s;
|
||||||
|
s.start = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||||
|
s.end = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CmdFill_write(CmdFillRef ref, CmdFill s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
ptcl[ix + 0] = floatBitsToUint(s.start.x);
|
||||||
|
ptcl[ix + 1] = floatBitsToUint(s.start.y);
|
||||||
|
ptcl[ix + 2] = floatBitsToUint(s.end.x);
|
||||||
|
ptcl[ix + 3] = floatBitsToUint(s.end.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
CmdFillEdge CmdFillEdge_read(CmdFillEdgeRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = ptcl[ix + 0];
|
||||||
|
uint raw1 = ptcl[ix + 1];
|
||||||
|
CmdFillEdge s;
|
||||||
|
s.sign = int(raw0);
|
||||||
|
s.y = uintBitsToFloat(raw1);
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CmdFillEdge_write(CmdFillEdgeRef ref, CmdFillEdge s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
ptcl[ix + 0] = uint(s.sign);
|
||||||
|
ptcl[ix + 1] = floatBitsToUint(s.y);
|
||||||
|
}
|
||||||
|
|
||||||
|
CmdDrawFill CmdDrawFill_read(CmdDrawFillRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = ptcl[ix + 0];
|
||||||
|
uint raw1 = ptcl[ix + 1];
|
||||||
|
CmdDrawFill s;
|
||||||
|
s.backdrop = int(raw0);
|
||||||
|
s.rgba_color = raw1;
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CmdDrawFill_write(CmdDrawFillRef ref, CmdDrawFill s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
ptcl[ix + 0] = uint(s.backdrop);
|
||||||
|
ptcl[ix + 1] = s.rgba_color;
|
||||||
|
}
|
||||||
|
|
||||||
|
CmdSolid CmdSolid_read(CmdSolidRef ref) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
uint raw0 = ptcl[ix + 0];
|
||||||
|
CmdSolid s;
|
||||||
|
s.rgba_color = raw0;
|
||||||
|
return s;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CmdSolid_write(CmdSolidRef ref, CmdSolid s) {
|
||||||
|
uint ix = ref.offset >> 2;
|
||||||
|
ptcl[ix + 0] = s.rgba_color;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint Cmd_tag(CmdRef ref) {
|
||||||
|
return ptcl[ref.offset >> 2];
|
||||||
|
}
|
||||||
|
|
||||||
|
CmdCircle Cmd_Circle_read(CmdRef ref) {
|
||||||
|
return CmdCircle_read(CmdCircleRef(ref.offset + 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
CmdLine Cmd_Line_read(CmdRef ref) {
|
||||||
|
return CmdLine_read(CmdLineRef(ref.offset + 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
CmdFill Cmd_Fill_read(CmdRef ref) {
|
||||||
|
return CmdFill_read(CmdFillRef(ref.offset + 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
CmdStroke Cmd_Stroke_read(CmdRef ref) {
|
||||||
|
return CmdStroke_read(CmdStrokeRef(ref.offset + 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
CmdFillEdge Cmd_FillEdge_read(CmdRef ref) {
|
||||||
|
return CmdFillEdge_read(CmdFillEdgeRef(ref.offset + 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
CmdDrawFill Cmd_DrawFill_read(CmdRef ref) {
|
||||||
|
return CmdDrawFill_read(CmdDrawFillRef(ref.offset + 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
CmdSolid Cmd_Solid_read(CmdRef ref) {
|
||||||
|
return CmdSolid_read(CmdSolidRef(ref.offset + 4));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Cmd_End_write(CmdRef ref) {
|
||||||
|
ptcl[ref.offset >> 2] = Cmd_End;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Cmd_Circle_write(CmdRef ref, CmdCircle s) {
|
||||||
|
ptcl[ref.offset >> 2] = Cmd_Circle;
|
||||||
|
CmdCircle_write(CmdCircleRef(ref.offset + 4), s);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Cmd_Line_write(CmdRef ref, CmdLine s) {
|
||||||
|
ptcl[ref.offset >> 2] = Cmd_Line;
|
||||||
|
CmdLine_write(CmdLineRef(ref.offset + 4), s);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Cmd_Fill_write(CmdRef ref, CmdFill s) {
|
||||||
|
ptcl[ref.offset >> 2] = Cmd_Fill;
|
||||||
|
CmdFill_write(CmdFillRef(ref.offset + 4), s);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Cmd_Stroke_write(CmdRef ref, CmdStroke s) {
|
||||||
|
ptcl[ref.offset >> 2] = Cmd_Stroke;
|
||||||
|
CmdStroke_write(CmdStrokeRef(ref.offset + 4), s);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Cmd_FillEdge_write(CmdRef ref, CmdFillEdge s) {
|
||||||
|
ptcl[ref.offset >> 2] = Cmd_FillEdge;
|
||||||
|
CmdFillEdge_write(CmdFillEdgeRef(ref.offset + 4), s);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Cmd_DrawFill_write(CmdRef ref, CmdDrawFill s) {
|
||||||
|
ptcl[ref.offset >> 2] = Cmd_DrawFill;
|
||||||
|
CmdDrawFill_write(CmdDrawFillRef(ref.offset + 4), s);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Cmd_Solid_write(CmdRef ref, CmdSolid s) {
|
||||||
|
ptcl[ref.offset >> 2] = Cmd_Solid;
|
||||||
|
CmdSolid_write(CmdSolidRef(ref.offset + 4), s);
|
||||||
|
}
|
||||||
|
|
||||||
|
void Cmd_Bail_write(CmdRef ref) {
|
||||||
|
ptcl[ref.offset >> 2] = Cmd_Bail;
|
||||||
|
}
|
||||||
|
|
|
@ -73,6 +73,7 @@ fn dump_scene(buf: &[u8]) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(unused)]
|
||||||
fn dump_k1_data(k1_buf: &[u32]) {
|
fn dump_k1_data(k1_buf: &[u32]) {
|
||||||
for i in 0..k1_buf.len() {
|
for i in 0..k1_buf.len() {
|
||||||
if k1_buf[i] != 0 {
|
if k1_buf[i] != 0 {
|
||||||
|
@ -96,7 +97,9 @@ fn main() {
|
||||||
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev)
|
.create_buffer(std::mem::size_of_val(&scene[..]) as u64, dev)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
device.write_buffer(&scene_buf, &scene).unwrap();
|
device.write_buffer(&scene_buf, &scene).unwrap();
|
||||||
|
// These should only be on the host if we're going to examine them from Rust.
|
||||||
let tilegroup_buf = device.create_buffer(384 * 1024, host).unwrap();
|
let tilegroup_buf = device.create_buffer(384 * 1024, host).unwrap();
|
||||||
|
let ptcl_buf = device.create_buffer(12 * 1024 * 4096, host).unwrap();
|
||||||
let image_buf = device
|
let image_buf = device
|
||||||
.create_buffer((WIDTH * HEIGHT * 4) as u64, host)
|
.create_buffer((WIDTH * HEIGHT * 4) as u64, host)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
@ -110,16 +113,23 @@ fn main() {
|
||||||
.create_descriptor_set(&k1_pipeline, &[&scene_dev, &tilegroup_buf])
|
.create_descriptor_set(&k1_pipeline, &[&scene_dev, &tilegroup_buf])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
let k3_code = include_bytes!("../shader/kernel3.spv");
|
||||||
|
let k3_pipeline = device.create_simple_compute_pipeline(k3_code, 3).unwrap();
|
||||||
|
let k3_ds = device
|
||||||
|
.create_descriptor_set(&k3_pipeline, &[&scene_dev, &tilegroup_buf, &ptcl_buf])
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let code = include_bytes!("../shader/image.spv");
|
let code = include_bytes!("../shader/image.spv");
|
||||||
let pipeline = device.create_simple_compute_pipeline(code, 2).unwrap();
|
let pipeline = device.create_simple_compute_pipeline(code, 2).unwrap();
|
||||||
let descriptor_set = device
|
let descriptor_set = device
|
||||||
.create_descriptor_set(&pipeline, &[&scene_dev, &image_dev])
|
.create_descriptor_set(&pipeline, &[&scene_dev, &image_dev])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let query_pool = device.create_query_pool(3).unwrap();
|
let query_pool = device.create_query_pool(4).unwrap();
|
||||||
let mut cmd_buf = device.create_cmd_buf().unwrap();
|
let mut cmd_buf = device.create_cmd_buf().unwrap();
|
||||||
cmd_buf.begin();
|
cmd_buf.begin();
|
||||||
cmd_buf.copy_buffer(&scene_buf, &scene_dev);
|
cmd_buf.copy_buffer(&scene_buf, &scene_dev);
|
||||||
cmd_buf.clear_buffer(&tilegroup_buf);
|
cmd_buf.clear_buffer(&tilegroup_buf);
|
||||||
|
cmd_buf.clear_buffer(&ptcl_buf);
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
cmd_buf.write_timestamp(&query_pool, 0);
|
cmd_buf.write_timestamp(&query_pool, 0);
|
||||||
cmd_buf.dispatch(
|
cmd_buf.dispatch(
|
||||||
|
@ -129,22 +139,36 @@ fn main() {
|
||||||
);
|
);
|
||||||
cmd_buf.write_timestamp(&query_pool, 1);
|
cmd_buf.write_timestamp(&query_pool, 1);
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
|
cmd_buf.dispatch(
|
||||||
|
&k3_pipeline,
|
||||||
|
&k3_ds,
|
||||||
|
((WIDTH / 512) as u32, (HEIGHT / 16) as u32, 1),
|
||||||
|
);
|
||||||
|
cmd_buf.write_timestamp(&query_pool, 2);
|
||||||
|
cmd_buf.memory_barrier();
|
||||||
cmd_buf.dispatch(
|
cmd_buf.dispatch(
|
||||||
&pipeline,
|
&pipeline,
|
||||||
&descriptor_set,
|
&descriptor_set,
|
||||||
((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
|
((WIDTH / TILE_W) as u32, (HEIGHT / TILE_H) as u32, 1),
|
||||||
);
|
);
|
||||||
cmd_buf.write_timestamp(&query_pool, 2);
|
cmd_buf.write_timestamp(&query_pool, 3);
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
cmd_buf.copy_buffer(&image_dev, &image_buf);
|
cmd_buf.copy_buffer(&image_dev, &image_buf);
|
||||||
cmd_buf.finish();
|
cmd_buf.finish();
|
||||||
device.run_cmd_buf(&cmd_buf).unwrap();
|
device.run_cmd_buf(&cmd_buf).unwrap();
|
||||||
let timestamps = device.reap_query_pool(query_pool).unwrap();
|
let timestamps = device.reap_query_pool(query_pool).unwrap();
|
||||||
println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3);
|
println!("Kernel 1 time: {:.3}ms", timestamps[0] * 1e3);
|
||||||
println!("Render time: {:.3}ms", (timestamps[1] - timestamps[0]) * 1e3);
|
println!(
|
||||||
|
"Kernel 3 time: {:.3}ms",
|
||||||
|
(timestamps[1] - timestamps[0]) * 1e3
|
||||||
|
);
|
||||||
|
println!(
|
||||||
|
"Render time: {:.3}ms",
|
||||||
|
(timestamps[2] - timestamps[1]) * 1e3
|
||||||
|
);
|
||||||
|
|
||||||
let mut k1_data: Vec<u32> = Default::default();
|
let mut k1_data: Vec<u32> = Default::default();
|
||||||
device.read_buffer(&tilegroup_buf, &mut k1_data).unwrap();
|
device.read_buffer(&ptcl_buf, &mut k1_data).unwrap();
|
||||||
dump_k1_data(&k1_data);
|
dump_k1_data(&k1_data);
|
||||||
|
|
||||||
let mut img_data: Vec<u8> = Default::default();
|
let mut img_data: Vec<u8> = Default::default();
|
||||||
|
|
Loading…
Reference in a new issue