mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Start work on new element pipeline
There's a bit of reorganizing as well. Shader stages are made available from piet-gpu to the test rig, config is now a proper structure (marshaled with bytemuck). This commit just has the transform stage, which is a simple monoid scan of affine transforms. Progress toward #119
This commit is contained in:
parent
a8103a4c20
commit
47f8812e2f
18
Cargo.lock
generated
18
Cargo.lock
generated
|
@ -96,6 +96,20 @@ name = "bytemuck"
|
|||
version = "1.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72957246c41db82b8ef88a5486143830adeb8227ef9837740bdec67724cf2c5b"
|
||||
dependencies = [
|
||||
"bytemuck_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bytemuck_derive"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8e215f8c2f9f79cb53c8335e687ffd07d5bfcb6fe5fc80723762d0be46e7cc54"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"syn",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
|
@ -872,6 +886,7 @@ dependencies = [
|
|||
name = "piet-gpu"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"bytemuck",
|
||||
"clap",
|
||||
"ndk",
|
||||
"ndk-glue",
|
||||
|
@ -920,7 +935,10 @@ version = "0.1.0"
|
|||
dependencies = [
|
||||
"bytemuck",
|
||||
"clap",
|
||||
"kurbo",
|
||||
"piet-gpu",
|
||||
"piet-gpu-hal",
|
||||
"rand",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
@ -33,6 +33,7 @@ roxmltree = "0.13"
|
|||
winit = "0.25"
|
||||
clap = "2.33"
|
||||
swash = "0.1.4"
|
||||
bytemuck = { version = "1.7.2", features = ["derive"] }
|
||||
|
||||
[target.'cfg(target_os = "android")'.dependencies]
|
||||
ndk = "0.3"
|
||||
|
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -1,12 +1,27 @@
|
|||
# Build file for shaders.
|
||||
|
||||
# You must have glslangValidator in your path, or patch here.
|
||||
# You must have Vulkan tools in your path, or patch here.
|
||||
|
||||
glslang_validator = glslangValidator
|
||||
spirv_cross = spirv-cross
|
||||
dxc = dxc
|
||||
|
||||
# See https://github.com/KhronosGroup/SPIRV-Cross/issues/1248 for
|
||||
# why we set this.
|
||||
msl_flags = --msl-decoration-binding
|
||||
|
||||
rule glsl
|
||||
command = $glslang_validator $flags -V -o $out $in
|
||||
|
||||
rule hlsl
|
||||
command = $spirv_cross --hlsl $in --output $out
|
||||
|
||||
rule dxil
|
||||
command = $dxc -T cs_6_0 $in -Fo $out
|
||||
|
||||
rule msl
|
||||
command = $spirv_cross --msl $in --output $out $msl_flags
|
||||
|
||||
|
||||
build elements.spv: glsl elements.comp | scene.h state.h annotated.h
|
||||
|
||||
|
@ -24,3 +39,21 @@ build backdrop_lg.spv: glsl backdrop.comp | annotated.h tile.h setup.h
|
|||
build coarse.spv: glsl coarse.comp | annotated.h bins.h ptcl.h setup.h
|
||||
|
||||
build kernel4.spv: glsl kernel4.comp | ptcl.h setup.h
|
||||
|
||||
# New element pipeline follows
|
||||
|
||||
build gen/transform_reduce.spv: glsl transform_reduce.comp | scene.h setup.h mem.h
|
||||
build gen/transform_reduce.hlsl: hlsl gen/transform_reduce.spv
|
||||
build gen/transform_reduce.dxil: dxil gen/transform_reduce.hlsl
|
||||
build gen/transform_reduce.msl: msl gen/transform_reduce.spv
|
||||
|
||||
build gen/transform_root.spv: glsl transform_scan.comp
|
||||
flags = -DROOT
|
||||
build gen/transform_root.hlsl: hlsl gen/transform_root.spv
|
||||
build gen/transform_root.dxil: dxil gen/transform_root.hlsl
|
||||
build gen/transform_root.msl: msl gen/transform_root.spv
|
||||
|
||||
build gen/transform_leaf.spv: glsl transform_leaf.comp | scene.h tile.h setup.h mem.h
|
||||
build gen/transform_leaf.hlsl: hlsl gen/transform_leaf.spv
|
||||
build gen/transform_leaf.dxil: dxil gen/transform_leaf.hlsl
|
||||
build gen/transform_leaf.msl: msl gen/transform_leaf.spv
|
||||
|
|
Binary file not shown.
BIN
piet-gpu/shader/gen/transform_leaf.dxil
Normal file
BIN
piet-gpu/shader/gen/transform_leaf.dxil
Normal file
Binary file not shown.
219
piet-gpu/shader/gen/transform_leaf.hlsl
Normal file
219
piet-gpu/shader/gen/transform_leaf.hlsl
Normal file
|
@ -0,0 +1,219 @@
|
|||
struct Alloc
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct TransformRef
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct Transform
|
||||
{
|
||||
float4 mat;
|
||||
float2 translate;
|
||||
};
|
||||
|
||||
struct TransformSegRef
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct TransformSeg
|
||||
{
|
||||
float4 mat;
|
||||
float2 translate;
|
||||
};
|
||||
|
||||
struct Config
|
||||
{
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
uint height_in_tiles;
|
||||
Alloc tile_alloc;
|
||||
Alloc bin_alloc;
|
||||
Alloc ptcl_alloc;
|
||||
Alloc pathseg_alloc;
|
||||
Alloc anno_alloc;
|
||||
Alloc trans_alloc;
|
||||
uint n_trans;
|
||||
uint trans_offset;
|
||||
};
|
||||
|
||||
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
||||
|
||||
static const Transform _224 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx };
|
||||
|
||||
RWByteAddressBuffer _71 : register(u0);
|
||||
ByteAddressBuffer _96 : register(t2);
|
||||
ByteAddressBuffer _278 : register(t1);
|
||||
ByteAddressBuffer _377 : register(t3);
|
||||
|
||||
static uint3 gl_WorkGroupID;
|
||||
static uint3 gl_LocalInvocationID;
|
||||
static uint3 gl_GlobalInvocationID;
|
||||
struct SPIRV_Cross_Input
|
||||
{
|
||||
uint3 gl_WorkGroupID : SV_GroupID;
|
||||
uint3 gl_LocalInvocationID : SV_GroupThreadID;
|
||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||
};
|
||||
|
||||
groupshared Transform sh_scratch[512];
|
||||
|
||||
Transform Transform_read(TransformRef ref)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
uint raw0 = _96.Load((ix + 0u) * 4 + 0);
|
||||
uint raw1 = _96.Load((ix + 1u) * 4 + 0);
|
||||
uint raw2 = _96.Load((ix + 2u) * 4 + 0);
|
||||
uint raw3 = _96.Load((ix + 3u) * 4 + 0);
|
||||
uint raw4 = _96.Load((ix + 4u) * 4 + 0);
|
||||
uint raw5 = _96.Load((ix + 5u) * 4 + 0);
|
||||
Transform s;
|
||||
s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
|
||||
s.translate = float2(asfloat(raw4), asfloat(raw5));
|
||||
return s;
|
||||
}
|
||||
|
||||
TransformRef Transform_index(TransformRef ref, uint index)
|
||||
{
|
||||
TransformRef _85 = { ref.offset + (index * 24u) };
|
||||
return _85;
|
||||
}
|
||||
|
||||
Transform combine_monoid(Transform a, Transform b)
|
||||
{
|
||||
Transform c;
|
||||
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
|
||||
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
|
||||
return c;
|
||||
}
|
||||
|
||||
Transform monoid_identity()
|
||||
{
|
||||
return _224;
|
||||
}
|
||||
|
||||
bool touch_mem(Alloc alloc, uint offset)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
void write_mem(Alloc alloc, uint offset, uint val)
|
||||
{
|
||||
Alloc param = alloc;
|
||||
uint param_1 = offset;
|
||||
if (!touch_mem(param, param_1))
|
||||
{
|
||||
return;
|
||||
}
|
||||
_71.Store(offset * 4 + 8, val);
|
||||
}
|
||||
|
||||
void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint param_2 = asuint(s.mat.x);
|
||||
write_mem(param, param_1, param_2);
|
||||
Alloc param_3 = a;
|
||||
uint param_4 = ix + 1u;
|
||||
uint param_5 = asuint(s.mat.y);
|
||||
write_mem(param_3, param_4, param_5);
|
||||
Alloc param_6 = a;
|
||||
uint param_7 = ix + 2u;
|
||||
uint param_8 = asuint(s.mat.z);
|
||||
write_mem(param_6, param_7, param_8);
|
||||
Alloc param_9 = a;
|
||||
uint param_10 = ix + 3u;
|
||||
uint param_11 = asuint(s.mat.w);
|
||||
write_mem(param_9, param_10, param_11);
|
||||
Alloc param_12 = a;
|
||||
uint param_13 = ix + 4u;
|
||||
uint param_14 = asuint(s.translate.x);
|
||||
write_mem(param_12, param_13, param_14);
|
||||
Alloc param_15 = a;
|
||||
uint param_16 = ix + 5u;
|
||||
uint param_17 = asuint(s.translate.y);
|
||||
write_mem(param_15, param_16, param_17);
|
||||
}
|
||||
|
||||
void comp_main()
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||
TransformRef _285 = { _278.Load(44) + (ix * 24u) };
|
||||
TransformRef ref = _285;
|
||||
TransformRef param = ref;
|
||||
Transform agg = Transform_read(param);
|
||||
Transform local[8];
|
||||
local[0] = agg;
|
||||
for (uint i = 1u; i < 8u; i++)
|
||||
{
|
||||
TransformRef param_1 = ref;
|
||||
uint param_2 = i;
|
||||
TransformRef param_3 = Transform_index(param_1, param_2);
|
||||
Transform param_4 = agg;
|
||||
Transform param_5 = Transform_read(param_3);
|
||||
agg = combine_monoid(param_4, param_5);
|
||||
local[i] = agg;
|
||||
}
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
||||
{
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||
{
|
||||
Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
|
||||
Transform param_6 = other;
|
||||
Transform param_7 = agg;
|
||||
agg = combine_monoid(param_6, param_7);
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
Transform row = monoid_identity();
|
||||
if (gl_WorkGroupID.x > 0u)
|
||||
{
|
||||
Transform _383;
|
||||
_383.mat = asfloat(_377.Load4((gl_WorkGroupID.x - 1u) * 32 + 0));
|
||||
_383.translate = asfloat(_377.Load2((gl_WorkGroupID.x - 1u) * 32 + 16));
|
||||
row.mat = _383.mat;
|
||||
row.translate = _383.translate;
|
||||
}
|
||||
if (gl_LocalInvocationID.x > 0u)
|
||||
{
|
||||
Transform param_8 = row;
|
||||
Transform param_9 = sh_scratch[gl_LocalInvocationID.x - 1u];
|
||||
row = combine_monoid(param_8, param_9);
|
||||
}
|
||||
Alloc param_12;
|
||||
for (uint i_2 = 0u; i_2 < 8u; i_2++)
|
||||
{
|
||||
Transform param_10 = row;
|
||||
Transform param_11 = local[i_2];
|
||||
Transform m = combine_monoid(param_10, param_11);
|
||||
TransformSeg _423 = { m.mat, m.translate };
|
||||
TransformSeg transform = _423;
|
||||
TransformSegRef _433 = { _278.Load(36) + ((ix + i_2) * 24u) };
|
||||
TransformSegRef trans_ref = _433;
|
||||
Alloc _437;
|
||||
_437.offset = _278.Load(36);
|
||||
param_12.offset = _437.offset;
|
||||
TransformSegRef param_13 = trans_ref;
|
||||
TransformSeg param_14 = transform;
|
||||
TransformSeg_write(param_12, param_13, param_14);
|
||||
}
|
||||
}
|
||||
|
||||
[numthreads(512, 1, 1)]
|
||||
void main(SPIRV_Cross_Input stage_input)
|
||||
{
|
||||
gl_WorkGroupID = stage_input.gl_WorkGroupID;
|
||||
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
|
||||
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
|
||||
comp_main();
|
||||
}
|
272
piet-gpu/shader/gen/transform_leaf.msl
Normal file
272
piet-gpu/shader/gen/transform_leaf.msl
Normal file
|
@ -0,0 +1,272 @@
|
|||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct Alloc
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct TransformRef
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct Transform
|
||||
{
|
||||
float4 mat;
|
||||
float2 translate;
|
||||
};
|
||||
|
||||
struct TransformSegRef
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct TransformSeg
|
||||
{
|
||||
float4 mat;
|
||||
float2 translate;
|
||||
};
|
||||
|
||||
struct Memory
|
||||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
struct SceneBuf
|
||||
{
|
||||
uint scene[1];
|
||||
};
|
||||
|
||||
struct Alloc_1
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct Config
|
||||
{
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
uint height_in_tiles;
|
||||
Alloc_1 tile_alloc;
|
||||
Alloc_1 bin_alloc;
|
||||
Alloc_1 ptcl_alloc;
|
||||
Alloc_1 pathseg_alloc;
|
||||
Alloc_1 anno_alloc;
|
||||
Alloc_1 trans_alloc;
|
||||
uint n_trans;
|
||||
uint trans_offset;
|
||||
};
|
||||
|
||||
struct ConfigBuf
|
||||
{
|
||||
Config conf;
|
||||
};
|
||||
|
||||
struct Transform_1
|
||||
{
|
||||
float4 mat;
|
||||
float2 translate;
|
||||
char _m0_final_padding[8];
|
||||
};
|
||||
|
||||
struct ParentBuf
|
||||
{
|
||||
Transform_1 parent[1];
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_96)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
uint raw0 = v_96.scene[ix + 0u];
|
||||
uint raw1 = v_96.scene[ix + 1u];
|
||||
uint raw2 = v_96.scene[ix + 2u];
|
||||
uint raw3 = v_96.scene[ix + 3u];
|
||||
uint raw4 = v_96.scene[ix + 4u];
|
||||
uint raw5 = v_96.scene[ix + 5u];
|
||||
Transform s;
|
||||
s.mat = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
|
||||
s.translate = float2(as_type<float>(raw4), as_type<float>(raw5));
|
||||
return s;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
TransformRef Transform_index(thread const TransformRef& ref, thread const uint& index)
|
||||
{
|
||||
return TransformRef{ ref.offset + (index * 24u) };
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Transform combine_monoid(thread const Transform& a, thread const Transform& b)
|
||||
{
|
||||
Transform c;
|
||||
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
|
||||
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
|
||||
return c;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Transform monoid_identity()
|
||||
{
|
||||
return Transform{ float4(1.0, 0.0, 0.0, 1.0), float2(0.0) };
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_71)
|
||||
{
|
||||
Alloc param = alloc;
|
||||
uint param_1 = offset;
|
||||
if (!touch_mem(param, param_1))
|
||||
{
|
||||
return;
|
||||
}
|
||||
v_71.memory[offset] = val;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void TransformSeg_write(thread const Alloc& a, thread const TransformSegRef& ref, thread const TransformSeg& s, device Memory& v_71)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint param_2 = as_type<uint>(s.mat.x);
|
||||
write_mem(param, param_1, param_2, v_71);
|
||||
Alloc param_3 = a;
|
||||
uint param_4 = ix + 1u;
|
||||
uint param_5 = as_type<uint>(s.mat.y);
|
||||
write_mem(param_3, param_4, param_5, v_71);
|
||||
Alloc param_6 = a;
|
||||
uint param_7 = ix + 2u;
|
||||
uint param_8 = as_type<uint>(s.mat.z);
|
||||
write_mem(param_6, param_7, param_8, v_71);
|
||||
Alloc param_9 = a;
|
||||
uint param_10 = ix + 3u;
|
||||
uint param_11 = as_type<uint>(s.mat.w);
|
||||
write_mem(param_9, param_10, param_11, v_71);
|
||||
Alloc param_12 = a;
|
||||
uint param_13 = ix + 4u;
|
||||
uint param_14 = as_type<uint>(s.translate.x);
|
||||
write_mem(param_12, param_13, param_14, v_71);
|
||||
Alloc param_15 = a;
|
||||
uint param_16 = ix + 5u;
|
||||
uint param_17 = as_type<uint>(s.translate.y);
|
||||
write_mem(param_15, param_16, param_17, v_71);
|
||||
}
|
||||
|
||||
kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _278 [[buffer(1)]], const device SceneBuf& v_96 [[buffer(2)]], const device ParentBuf& _377 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||
{
|
||||
threadgroup Transform sh_scratch[512];
|
||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||
TransformRef ref = TransformRef{ _278.conf.trans_offset + (ix * 24u) };
|
||||
TransformRef param = ref;
|
||||
Transform agg = Transform_read(param, v_96);
|
||||
spvUnsafeArray<Transform, 8> local;
|
||||
local[0] = agg;
|
||||
for (uint i = 1u; i < 8u; i++)
|
||||
{
|
||||
TransformRef param_1 = ref;
|
||||
uint param_2 = i;
|
||||
TransformRef param_3 = Transform_index(param_1, param_2);
|
||||
Transform param_4 = agg;
|
||||
Transform param_5 = Transform_read(param_3, v_96);
|
||||
agg = combine_monoid(param_4, param_5);
|
||||
local[i] = agg;
|
||||
}
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
||||
{
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||
{
|
||||
Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
|
||||
Transform param_6 = other;
|
||||
Transform param_7 = agg;
|
||||
agg = combine_monoid(param_6, param_7);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
Transform row = monoid_identity();
|
||||
if (gl_WorkGroupID.x > 0u)
|
||||
{
|
||||
uint _380 = gl_WorkGroupID.x - 1u;
|
||||
row.mat = _377.parent[_380].mat;
|
||||
row.translate = _377.parent[_380].translate;
|
||||
}
|
||||
if (gl_LocalInvocationID.x > 0u)
|
||||
{
|
||||
Transform param_8 = row;
|
||||
Transform param_9 = sh_scratch[gl_LocalInvocationID.x - 1u];
|
||||
row = combine_monoid(param_8, param_9);
|
||||
}
|
||||
Alloc param_12;
|
||||
for (uint i_2 = 0u; i_2 < 8u; i_2++)
|
||||
{
|
||||
Transform param_10 = row;
|
||||
Transform param_11 = local[i_2];
|
||||
Transform m = combine_monoid(param_10, param_11);
|
||||
TransformSeg transform = TransformSeg{ m.mat, m.translate };
|
||||
TransformSegRef trans_ref = TransformSegRef{ _278.conf.trans_alloc.offset + ((ix + i_2) * 24u) };
|
||||
param_12.offset = _278.conf.trans_alloc.offset;
|
||||
TransformSegRef param_13 = trans_ref;
|
||||
TransformSeg param_14 = transform;
|
||||
TransformSeg_write(param_12, param_13, param_14, v_71);
|
||||
}
|
||||
}
|
||||
|
BIN
piet-gpu/shader/gen/transform_leaf.spv
Normal file
BIN
piet-gpu/shader/gen/transform_leaf.spv
Normal file
Binary file not shown.
BIN
piet-gpu/shader/gen/transform_reduce.dxil
Normal file
BIN
piet-gpu/shader/gen/transform_reduce.dxil
Normal file
Binary file not shown.
125
piet-gpu/shader/gen/transform_reduce.hlsl
Normal file
125
piet-gpu/shader/gen/transform_reduce.hlsl
Normal file
|
@ -0,0 +1,125 @@
|
|||
struct TransformRef
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct Transform
|
||||
{
|
||||
float4 mat;
|
||||
float2 translate;
|
||||
};
|
||||
|
||||
struct Alloc
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct Config
|
||||
{
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
uint height_in_tiles;
|
||||
Alloc tile_alloc;
|
||||
Alloc bin_alloc;
|
||||
Alloc ptcl_alloc;
|
||||
Alloc pathseg_alloc;
|
||||
Alloc anno_alloc;
|
||||
Alloc trans_alloc;
|
||||
uint n_trans;
|
||||
uint trans_offset;
|
||||
};
|
||||
|
||||
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
||||
|
||||
ByteAddressBuffer _49 : register(t2);
|
||||
ByteAddressBuffer _161 : register(t1);
|
||||
RWByteAddressBuffer _251 : register(u3);
|
||||
RWByteAddressBuffer _267 : register(u0);
|
||||
|
||||
static uint3 gl_WorkGroupID;
|
||||
static uint3 gl_LocalInvocationID;
|
||||
static uint3 gl_GlobalInvocationID;
|
||||
struct SPIRV_Cross_Input
|
||||
{
|
||||
uint3 gl_WorkGroupID : SV_GroupID;
|
||||
uint3 gl_LocalInvocationID : SV_GroupThreadID;
|
||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||
};
|
||||
|
||||
groupshared Transform sh_scratch[512];
|
||||
|
||||
Transform Transform_read(TransformRef ref)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
uint raw0 = _49.Load((ix + 0u) * 4 + 0);
|
||||
uint raw1 = _49.Load((ix + 1u) * 4 + 0);
|
||||
uint raw2 = _49.Load((ix + 2u) * 4 + 0);
|
||||
uint raw3 = _49.Load((ix + 3u) * 4 + 0);
|
||||
uint raw4 = _49.Load((ix + 4u) * 4 + 0);
|
||||
uint raw5 = _49.Load((ix + 5u) * 4 + 0);
|
||||
Transform s;
|
||||
s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
|
||||
s.translate = float2(asfloat(raw4), asfloat(raw5));
|
||||
return s;
|
||||
}
|
||||
|
||||
TransformRef Transform_index(TransformRef ref, uint index)
|
||||
{
|
||||
TransformRef _37 = { ref.offset + (index * 24u) };
|
||||
return _37;
|
||||
}
|
||||
|
||||
Transform combine_monoid(Transform a, Transform b)
|
||||
{
|
||||
Transform c;
|
||||
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
|
||||
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
|
||||
return c;
|
||||
}
|
||||
|
||||
void comp_main()
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||
TransformRef _168 = { _161.Load(44) + (ix * 24u) };
|
||||
TransformRef ref = _168;
|
||||
TransformRef param = ref;
|
||||
Transform agg = Transform_read(param);
|
||||
for (uint i = 1u; i < 8u; i++)
|
||||
{
|
||||
TransformRef param_1 = ref;
|
||||
uint param_2 = i;
|
||||
TransformRef param_3 = Transform_index(param_1, param_2);
|
||||
Transform param_4 = agg;
|
||||
Transform param_5 = Transform_read(param_3);
|
||||
agg = combine_monoid(param_4, param_5);
|
||||
}
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
||||
{
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u)
|
||||
{
|
||||
Transform other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
|
||||
Transform param_6 = agg;
|
||||
Transform param_7 = other;
|
||||
agg = combine_monoid(param_6, param_7);
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
}
|
||||
if (gl_LocalInvocationID.x == 0u)
|
||||
{
|
||||
_251.Store4(gl_WorkGroupID.x * 32 + 0, asuint(agg.mat));
|
||||
_251.Store2(gl_WorkGroupID.x * 32 + 16, asuint(agg.translate));
|
||||
}
|
||||
}
|
||||
|
||||
[numthreads(512, 1, 1)]
|
||||
void main(SPIRV_Cross_Input stage_input)
|
||||
{
|
||||
gl_WorkGroupID = stage_input.gl_WorkGroupID;
|
||||
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
|
||||
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
|
||||
comp_main();
|
||||
}
|
138
piet-gpu/shader/gen/transform_reduce.msl
Normal file
138
piet-gpu/shader/gen/transform_reduce.msl
Normal file
|
@ -0,0 +1,138 @@
|
|||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct TransformRef
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct Transform
|
||||
{
|
||||
float4 mat;
|
||||
float2 translate;
|
||||
};
|
||||
|
||||
struct SceneBuf
|
||||
{
|
||||
uint scene[1];
|
||||
};
|
||||
|
||||
struct Alloc
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct Config
|
||||
{
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
uint height_in_tiles;
|
||||
Alloc tile_alloc;
|
||||
Alloc bin_alloc;
|
||||
Alloc ptcl_alloc;
|
||||
Alloc pathseg_alloc;
|
||||
Alloc anno_alloc;
|
||||
Alloc trans_alloc;
|
||||
uint n_trans;
|
||||
uint trans_offset;
|
||||
};
|
||||
|
||||
struct ConfigBuf
|
||||
{
|
||||
Config conf;
|
||||
};
|
||||
|
||||
struct Transform_1
|
||||
{
|
||||
float4 mat;
|
||||
float2 translate;
|
||||
char _m0_final_padding[8];
|
||||
};
|
||||
|
||||
struct OutBuf
|
||||
{
|
||||
Transform_1 outbuf[1];
|
||||
};
|
||||
|
||||
struct Memory
|
||||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_49)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
uint raw0 = v_49.scene[ix + 0u];
|
||||
uint raw1 = v_49.scene[ix + 1u];
|
||||
uint raw2 = v_49.scene[ix + 2u];
|
||||
uint raw3 = v_49.scene[ix + 3u];
|
||||
uint raw4 = v_49.scene[ix + 4u];
|
||||
uint raw5 = v_49.scene[ix + 5u];
|
||||
Transform s;
|
||||
s.mat = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
|
||||
s.translate = float2(as_type<float>(raw4), as_type<float>(raw5));
|
||||
return s;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
TransformRef Transform_index(thread const TransformRef& ref, thread const uint& index)
|
||||
{
|
||||
return TransformRef{ ref.offset + (index * 24u) };
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Transform combine_monoid(thread const Transform& a, thread const Transform& b)
|
||||
{
|
||||
Transform c;
|
||||
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
|
||||
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
|
||||
return c;
|
||||
}
|
||||
|
||||
kernel void main0(const device ConfigBuf& _161 [[buffer(1)]], const device SceneBuf& v_49 [[buffer(2)]], device OutBuf& _251 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||
{
|
||||
threadgroup Transform sh_scratch[512];
|
||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||
TransformRef ref = TransformRef{ _161.conf.trans_offset + (ix * 24u) };
|
||||
TransformRef param = ref;
|
||||
Transform agg = Transform_read(param, v_49);
|
||||
for (uint i = 1u; i < 8u; i++)
|
||||
{
|
||||
TransformRef param_1 = ref;
|
||||
uint param_2 = i;
|
||||
TransformRef param_3 = Transform_index(param_1, param_2);
|
||||
Transform param_4 = agg;
|
||||
Transform param_5 = Transform_read(param_3, v_49);
|
||||
agg = combine_monoid(param_4, param_5);
|
||||
}
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
||||
{
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u)
|
||||
{
|
||||
Transform other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
|
||||
Transform param_6 = agg;
|
||||
Transform param_7 = other;
|
||||
agg = combine_monoid(param_6, param_7);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
}
|
||||
if (gl_LocalInvocationID.x == 0u)
|
||||
{
|
||||
_251.outbuf[gl_WorkGroupID.x].mat = agg.mat;
|
||||
_251.outbuf[gl_WorkGroupID.x].translate = agg.translate;
|
||||
}
|
||||
}
|
||||
|
BIN
piet-gpu/shader/gen/transform_reduce.spv
Normal file
BIN
piet-gpu/shader/gen/transform_reduce.spv
Normal file
Binary file not shown.
BIN
piet-gpu/shader/gen/transform_root.dxil
Normal file
BIN
piet-gpu/shader/gen/transform_root.dxil
Normal file
Binary file not shown.
94
piet-gpu/shader/gen/transform_root.hlsl
Normal file
94
piet-gpu/shader/gen/transform_root.hlsl
Normal file
|
@ -0,0 +1,94 @@
|
|||
struct Transform
|
||||
{
|
||||
float4 mat;
|
||||
float2 translate;
|
||||
};
|
||||
|
||||
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
||||
|
||||
static const Transform _23 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx };
|
||||
|
||||
RWByteAddressBuffer _89 : register(u0);
|
||||
|
||||
static uint3 gl_LocalInvocationID;
|
||||
static uint3 gl_GlobalInvocationID;
|
||||
struct SPIRV_Cross_Input
|
||||
{
|
||||
uint3 gl_LocalInvocationID : SV_GroupThreadID;
|
||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||
};
|
||||
|
||||
groupshared Transform sh_scratch[512];
|
||||
|
||||
Transform combine_monoid(Transform a, Transform b)
|
||||
{
|
||||
Transform c;
|
||||
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
|
||||
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
|
||||
return c;
|
||||
}
|
||||
|
||||
Transform monoid_identity()
|
||||
{
|
||||
return _23;
|
||||
}
|
||||
|
||||
void comp_main()
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||
Transform _93;
|
||||
_93.mat = asfloat(_89.Load4(ix * 32 + 0));
|
||||
_93.translate = asfloat(_89.Load2(ix * 32 + 16));
|
||||
Transform local[8];
|
||||
local[0].mat = _93.mat;
|
||||
local[0].translate = _93.translate;
|
||||
Transform param_1;
|
||||
for (uint i = 1u; i < 8u; i++)
|
||||
{
|
||||
Transform param = local[i - 1u];
|
||||
Transform _119;
|
||||
_119.mat = asfloat(_89.Load4((ix + i) * 32 + 0));
|
||||
_119.translate = asfloat(_89.Load2((ix + i) * 32 + 16));
|
||||
param_1.mat = _119.mat;
|
||||
param_1.translate = _119.translate;
|
||||
local[i] = combine_monoid(param, param_1);
|
||||
}
|
||||
Transform agg = local[7];
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
||||
{
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||
{
|
||||
Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
|
||||
Transform param_2 = other;
|
||||
Transform param_3 = agg;
|
||||
agg = combine_monoid(param_2, param_3);
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
Transform row = monoid_identity();
|
||||
if (gl_LocalInvocationID.x > 0u)
|
||||
{
|
||||
row = sh_scratch[gl_LocalInvocationID.x - 1u];
|
||||
}
|
||||
for (uint i_2 = 0u; i_2 < 8u; i_2++)
|
||||
{
|
||||
Transform param_4 = row;
|
||||
Transform param_5 = local[i_2];
|
||||
Transform m = combine_monoid(param_4, param_5);
|
||||
uint _209 = ix + i_2;
|
||||
_89.Store4(_209 * 32 + 0, asuint(m.mat));
|
||||
_89.Store2(_209 * 32 + 16, asuint(m.translate));
|
||||
}
|
||||
}
|
||||
|
||||
[numthreads(512, 1, 1)]
|
||||
void main(SPIRV_Cross_Input stage_input)
|
||||
{
|
||||
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
|
||||
gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
|
||||
comp_main();
|
||||
}
|
129
piet-gpu/shader/gen/transform_root.msl
Normal file
129
piet-gpu/shader/gen/transform_root.msl
Normal file
|
@ -0,0 +1,129 @@
|
|||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wmissing-braces"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
template<typename T, size_t Num>
|
||||
struct spvUnsafeArray
|
||||
{
|
||||
T elements[Num ? Num : 1];
|
||||
|
||||
thread T& operator [] (size_t pos) thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const thread T& operator [] (size_t pos) const thread
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
device T& operator [] (size_t pos) device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const device T& operator [] (size_t pos) const device
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
constexpr const constant T& operator [] (size_t pos) const constant
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
|
||||
threadgroup T& operator [] (size_t pos) threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
|
||||
{
|
||||
return elements[pos];
|
||||
}
|
||||
};
|
||||
|
||||
struct Transform
|
||||
{
|
||||
float4 mat;
|
||||
float2 translate;
|
||||
};
|
||||
|
||||
struct Transform_1
|
||||
{
|
||||
float4 mat;
|
||||
float2 translate;
|
||||
char _m0_final_padding[8];
|
||||
};
|
||||
|
||||
struct DataBuf
|
||||
{
|
||||
Transform_1 data[1];
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Transform combine_monoid(thread const Transform& a, thread const Transform& b)
|
||||
{
|
||||
Transform c;
|
||||
c.mat = (a.mat.xyxy * b.mat.xxzz) + (a.mat.zwzw * b.mat.yyww);
|
||||
c.translate = ((a.mat.xy * b.translate.x) + (a.mat.zw * b.translate.y)) + a.translate;
|
||||
return c;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Transform monoid_identity()
|
||||
{
|
||||
return Transform{ float4(1.0, 0.0, 0.0, 1.0), float2(0.0) };
|
||||
}
|
||||
|
||||
kernel void main0(device DataBuf& _89 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||
{
|
||||
threadgroup Transform sh_scratch[512];
|
||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||
spvUnsafeArray<Transform, 8> local;
|
||||
local[0].mat = _89.data[ix].mat;
|
||||
local[0].translate = _89.data[ix].translate;
|
||||
Transform param_1;
|
||||
for (uint i = 1u; i < 8u; i++)
|
||||
{
|
||||
uint _113 = ix + i;
|
||||
Transform param = local[i - 1u];
|
||||
param_1.mat = _89.data[_113].mat;
|
||||
param_1.translate = _89.data[_113].translate;
|
||||
local[i] = combine_monoid(param, param_1);
|
||||
}
|
||||
Transform agg = local[7];
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
||||
{
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||
{
|
||||
Transform other = sh_scratch[gl_LocalInvocationID.x - (1u << i_1)];
|
||||
Transform param_2 = other;
|
||||
Transform param_3 = agg;
|
||||
agg = combine_monoid(param_2, param_3);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
Transform row = monoid_identity();
|
||||
if (gl_LocalInvocationID.x > 0u)
|
||||
{
|
||||
row = sh_scratch[gl_LocalInvocationID.x - 1u];
|
||||
}
|
||||
for (uint i_2 = 0u; i_2 < 8u; i_2++)
|
||||
{
|
||||
Transform param_4 = row;
|
||||
Transform param_5 = local[i_2];
|
||||
Transform m = combine_monoid(param_4, param_5);
|
||||
uint _209 = ix + i_2;
|
||||
_89.data[_209].mat = m.mat;
|
||||
_89.data[_209].translate = m.translate;
|
||||
}
|
||||
}
|
||||
|
BIN
piet-gpu/shader/gen/transform_root.spv
Normal file
BIN
piet-gpu/shader/gen/transform_root.spv
Normal file
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -38,6 +38,12 @@ struct Config {
|
|||
Alloc pathseg_alloc;
|
||||
Alloc anno_alloc;
|
||||
Alloc trans_alloc;
|
||||
// new element pipeline stuff follows
|
||||
|
||||
// Number of transforms in scene
|
||||
uint n_trans;
|
||||
// Offset (in bytes) of transform stream in scene buffer
|
||||
uint trans_offset;
|
||||
};
|
||||
|
||||
// Fill modes.
|
||||
|
|
Binary file not shown.
86
piet-gpu/shader/transform_leaf.comp
Normal file
86
piet-gpu/shader/transform_leaf.comp
Normal file
|
@ -0,0 +1,86 @@
|
|||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// A scan for a tree reduction prefix scan that outputs the final result.
|
||||
// Output is written into memory at trans_alloc.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#include "mem.h"
|
||||
#include "setup.h"
|
||||
|
||||
#define N_ROWS 8
|
||||
#define LG_WG_SIZE 9
|
||||
#define WG_SIZE (1 << LG_WG_SIZE)
|
||||
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
||||
|
||||
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
|
||||
|
||||
layout(binding = 1) readonly buffer ConfigBuf {
|
||||
Config conf;
|
||||
};
|
||||
|
||||
layout(binding = 2) readonly buffer SceneBuf {
|
||||
uint[] scene;
|
||||
};
|
||||
|
||||
#include "scene.h"
|
||||
#include "tile.h"
|
||||
|
||||
#define Monoid Transform
|
||||
|
||||
layout(set = 0, binding = 3) readonly buffer ParentBuf {
|
||||
Monoid[] parent;
|
||||
};
|
||||
|
||||
Monoid monoid_identity() {
|
||||
return Monoid(vec4(1.0, 0.0, 0.0, 1.0), vec2(0.0, 0.0));
|
||||
}
|
||||
|
||||
Monoid combine_monoid(Monoid a, Monoid b) {
|
||||
Monoid c;
|
||||
c.mat = a.mat.xyxy * b.mat.xxzz + a.mat.zwzw * b.mat.yyww;
|
||||
c.translate = a.mat.xy * b.translate.x + a.mat.zw * b.translate.y + a.translate;
|
||||
return c;
|
||||
}
|
||||
|
||||
shared Monoid sh_scratch[WG_SIZE];
|
||||
|
||||
void main() {
|
||||
Monoid local[N_ROWS];
|
||||
|
||||
uint ix = gl_GlobalInvocationID.x * N_ROWS;
|
||||
TransformRef ref = TransformRef(conf.trans_offset + ix * Transform_size);
|
||||
|
||||
Monoid agg = Transform_read(ref);
|
||||
local[0] = agg;
|
||||
for (uint i = 1; i < N_ROWS; i++) {
|
||||
agg = combine_monoid(agg, Transform_read(Transform_index(ref, i)));
|
||||
local[i] = agg;
|
||||
}
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||
barrier();
|
||||
if (gl_LocalInvocationID.x >= (1u << i)) {
|
||||
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)];
|
||||
agg = combine_monoid(other, agg);
|
||||
}
|
||||
barrier();
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
}
|
||||
|
||||
barrier();
|
||||
Monoid row = monoid_identity();
|
||||
if (gl_WorkGroupID.x > 0) {
|
||||
row = parent[gl_WorkGroupID.x - 1];
|
||||
}
|
||||
if (gl_LocalInvocationID.x > 0) {
|
||||
row = combine_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]);
|
||||
}
|
||||
for (uint i = 0; i < N_ROWS; i++) {
|
||||
Monoid m = combine_monoid(row, local[i]);
|
||||
TransformSeg transform = TransformSeg(m.mat, m.translate);
|
||||
TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (ix + i) * TransformSeg_size);
|
||||
TransformSeg_write(conf.trans_alloc, trans_ref, transform);
|
||||
}
|
||||
}
|
69
piet-gpu/shader/transform_reduce.comp
Normal file
69
piet-gpu/shader/transform_reduce.comp
Normal file
|
@ -0,0 +1,69 @@
|
|||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// The reduction phase for transform scan implemented as a tree reduction.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#include "mem.h"
|
||||
#include "setup.h"
|
||||
|
||||
#define N_ROWS 8
|
||||
#define LG_WG_SIZE 9
|
||||
#define WG_SIZE (1 << LG_WG_SIZE)
|
||||
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
||||
|
||||
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
|
||||
|
||||
layout(binding = 1) readonly buffer ConfigBuf {
|
||||
Config conf;
|
||||
};
|
||||
|
||||
layout(binding = 2) readonly buffer SceneBuf {
|
||||
uint[] scene;
|
||||
};
|
||||
|
||||
#include "scene.h"
|
||||
|
||||
#define Monoid Transform
|
||||
|
||||
layout(set = 0, binding = 3) buffer OutBuf {
|
||||
Monoid[] outbuf;
|
||||
};
|
||||
|
||||
Monoid monoid_identity() {
|
||||
return Monoid(vec4(1.0, 0.0, 0.0, 1.0), vec2(0.0, 0.0));
|
||||
}
|
||||
|
||||
Monoid combine_monoid(Monoid a, Monoid b) {
|
||||
Monoid c;
|
||||
c.mat = a.mat.xyxy * b.mat.xxzz + a.mat.zwzw * b.mat.yyww;
|
||||
c.translate = a.mat.xy * b.translate.x + a.mat.zw * b.translate.y + a.translate;
|
||||
return c;
|
||||
}
|
||||
|
||||
shared Monoid sh_scratch[WG_SIZE];
|
||||
|
||||
void main() {
|
||||
uint ix = gl_GlobalInvocationID.x * N_ROWS;
|
||||
TransformRef ref = TransformRef(conf.trans_offset + ix * Transform_size);
|
||||
|
||||
Monoid agg = Transform_read(ref);
|
||||
for (uint i = 1; i < N_ROWS; i++) {
|
||||
agg = combine_monoid(agg, Transform_read(Transform_index(ref, i)));
|
||||
}
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||
barrier();
|
||||
// We could make this predicate tighter, but would it help?
|
||||
if (gl_LocalInvocationID.x + (1u << i) < WG_SIZE) {
|
||||
Monoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i)];
|
||||
agg = combine_monoid(agg, other);
|
||||
}
|
||||
barrier();
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
}
|
||||
if (gl_LocalInvocationID.x == 0) {
|
||||
outbuf[gl_WorkGroupID.x] = agg;
|
||||
}
|
||||
}
|
89
piet-gpu/shader/transform_scan.comp
Normal file
89
piet-gpu/shader/transform_scan.comp
Normal file
|
@ -0,0 +1,89 @@
|
|||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// A scan for a tree reduction prefix scan (either root or not, by ifdef).
|
||||
|
||||
#version 450
|
||||
|
||||
#define N_ROWS 8
|
||||
#define LG_WG_SIZE 9
|
||||
#define WG_SIZE (1 << LG_WG_SIZE)
|
||||
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
||||
|
||||
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
|
||||
|
||||
// This is copy-pasted from scene.h. It might be better for DRY
|
||||
// to include it, but that pulls in more stuff we don't need.
|
||||
struct Transform {
|
||||
vec4 mat;
|
||||
vec2 translate;
|
||||
};
|
||||
|
||||
#define Monoid Transform
|
||||
|
||||
layout(binding = 0) buffer DataBuf {
|
||||
Monoid[] data;
|
||||
};
|
||||
|
||||
#ifndef ROOT
|
||||
layout(binding = 1) readonly buffer ParentBuf {
|
||||
Monoid[] parent;
|
||||
};
|
||||
#endif
|
||||
|
||||
Monoid monoid_identity() {
|
||||
return Monoid(vec4(1.0, 0.0, 0.0, 1.0), vec2(0.0, 0.0));
|
||||
}
|
||||
|
||||
Monoid combine_monoid(Monoid a, Monoid b) {
|
||||
Monoid c;
|
||||
c.mat = a.mat.xyxy * b.mat.xxzz + a.mat.zwzw * b.mat.yyww;
|
||||
c.translate = a.mat.xy * b.translate.x + a.mat.zw * b.translate.y + a.translate;
|
||||
return c;
|
||||
}
|
||||
|
||||
shared Monoid sh_scratch[WG_SIZE];
|
||||
|
||||
void main() {
|
||||
Monoid local[N_ROWS];
|
||||
|
||||
uint ix = gl_GlobalInvocationID.x * N_ROWS;
|
||||
|
||||
// TODO: gate buffer read
|
||||
local[0] = data[ix];
|
||||
for (uint i = 1; i < N_ROWS; i++) {
|
||||
local[i] = combine_monoid(local[i - 1], data[ix + i]);
|
||||
}
|
||||
Monoid agg = local[N_ROWS - 1];
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||
barrier();
|
||||
if (gl_LocalInvocationID.x >= (1u << i)) {
|
||||
Monoid other = sh_scratch[gl_LocalInvocationID.x - (1u << i)];
|
||||
agg = combine_monoid(other, agg);
|
||||
}
|
||||
barrier();
|
||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||
}
|
||||
|
||||
barrier();
|
||||
// This could be a semigroup instead of a monoid if we reworked the
|
||||
// conditional logic, but that might impact performance.
|
||||
Monoid row = monoid_identity();
|
||||
#ifdef ROOT
|
||||
if (gl_LocalInvocationID.x > 0) {
|
||||
row = sh_scratch[gl_LocalInvocationID.x - 1];
|
||||
}
|
||||
#else
|
||||
if (gl_WorkGroupID.x > 0) {
|
||||
row = parent[gl_WorkGroupID.x - 1];
|
||||
}
|
||||
if (gl_LocalInvocationID.x > 0) {
|
||||
row = combine_monoid(row, sh_scratch[gl_LocalInvocationID.x - 1]);
|
||||
}
|
||||
#endif
|
||||
for (uint i = 0; i < N_ROWS; i++) {
|
||||
Monoid m = combine_monoid(row, local[i]);
|
||||
// TODO: gate buffer write
|
||||
data[ix + i] = m;
|
||||
}
|
||||
}
|
|
@ -1,6 +1,7 @@
|
|||
mod gradient;
|
||||
mod pico_svg;
|
||||
mod render_ctx;
|
||||
pub mod stages;
|
||||
pub mod test_scenes;
|
||||
mod text;
|
||||
|
||||
|
@ -20,6 +21,8 @@ use piet_gpu_hal::{
|
|||
|
||||
use pico_svg::PicoSvg;
|
||||
|
||||
use crate::stages::Config;
|
||||
|
||||
const TILE_W: usize = 16;
|
||||
const TILE_H: usize = 16;
|
||||
|
||||
|
@ -123,7 +126,7 @@ impl Renderer {
|
|||
let image_dev = session.create_image2d(width as u32, height as u32)?;
|
||||
|
||||
// Note: this must be updated when the config struct size changes.
|
||||
const CONFIG_BUFFER_SIZE: u64 = 40;
|
||||
const CONFIG_BUFFER_SIZE: u64 = std::mem::size_of::<Config>() as u64;
|
||||
let config_buf = session.create_buffer(CONFIG_BUFFER_SIZE, dev).unwrap();
|
||||
// TODO: separate staging buffer (if needed)
|
||||
let config_bufs = (0..n_bufs)
|
||||
|
@ -295,25 +298,28 @@ impl Renderer {
|
|||
alloc += (n_paths * ANNO_SIZE + 3) & !3;
|
||||
let trans_base = alloc;
|
||||
alloc += (n_trans * TRANS_SIZE + 3) & !3;
|
||||
let config = &[
|
||||
n_paths as u32,
|
||||
n_pathseg as u32,
|
||||
width_in_tiles as u32,
|
||||
height_in_tiles as u32,
|
||||
tile_base as u32,
|
||||
bin_base as u32,
|
||||
ptcl_base as u32,
|
||||
pathseg_base as u32,
|
||||
anno_base as u32,
|
||||
trans_base as u32,
|
||||
];
|
||||
let trans_offset = 0; // For new element pipeline, not yet used
|
||||
let config = Config {
|
||||
n_elements: n_paths as u32,
|
||||
n_pathseg: n_pathseg as u32,
|
||||
width_in_tiles: width_in_tiles as u32,
|
||||
height_in_tiles: height_in_tiles as u32,
|
||||
tile_alloc: tile_base as u32,
|
||||
bin_alloc: bin_base as u32,
|
||||
ptcl_alloc: ptcl_base as u32,
|
||||
pathseg_alloc: pathseg_base as u32,
|
||||
anno_alloc: anno_base as u32,
|
||||
trans_alloc: trans_base as u32,
|
||||
n_trans: n_trans as u32,
|
||||
trans_offset: trans_offset as u32,
|
||||
};
|
||||
unsafe {
|
||||
let scene = render_ctx.get_scene_buf();
|
||||
self.n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size();
|
||||
// TODO: reallocate scene buffer if size is inadequate
|
||||
assert!(self.scene_bufs[buf_ix].size() as usize >= scene.len());
|
||||
self.scene_bufs[buf_ix].write(scene)?;
|
||||
self.config_bufs[buf_ix].write(config)?;
|
||||
self.config_bufs[buf_ix].write(&[config])?;
|
||||
self.memory_buf_host[buf_ix].write(&[alloc as u32, 0 /* Overflow flag */])?;
|
||||
|
||||
// Upload gradient data.
|
||||
|
|
209
piet-gpu/src/stages.rs
Normal file
209
piet-gpu/src/stages.rs
Normal file
|
@ -0,0 +1,209 @@
|
|||
// Copyright 2021 The piet-gpu authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Also licensed under MIT license, at your choice.
|
||||
|
||||
//! Stages for new element pipeline, exposed for testing.
|
||||
|
||||
use bytemuck::{Pod, Zeroable};
|
||||
|
||||
use piet::kurbo::Affine;
|
||||
use piet_gpu_hal::{
|
||||
include_shader, BindType, Buffer, BufferUsage, CmdBuf, DescriptorSet, Pipeline, Session,
|
||||
};
|
||||
|
||||
/// The configuration block passed to piet-gpu shaders.
|
||||
///
|
||||
/// Note: this should be kept in sync with the version in setup.h.
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy, Default, Zeroable, Pod)]
|
||||
pub struct Config {
|
||||
pub n_elements: u32, // paths
|
||||
pub n_pathseg: u32,
|
||||
pub width_in_tiles: u32,
|
||||
pub height_in_tiles: u32,
|
||||
pub tile_alloc: u32,
|
||||
pub bin_alloc: u32,
|
||||
pub ptcl_alloc: u32,
|
||||
pub pathseg_alloc: u32,
|
||||
pub anno_alloc: u32,
|
||||
pub trans_alloc: u32,
|
||||
pub n_trans: u32,
|
||||
pub trans_offset: u32,
|
||||
}
|
||||
|
||||
// The individual stages will probably be separate files but for now, all in one.
|
||||
|
||||
// This is equivalent to the version in piet-gpu-types, but the bytemuck
|
||||
// representation will likely be faster.
|
||||
#[repr(C)]
|
||||
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
|
||||
pub struct Transform {
|
||||
pub mat: [f32; 4],
|
||||
pub translate: [f32; 2],
|
||||
}
|
||||
|
||||
const TRANSFORM_WG: u64 = 512;
|
||||
const TRANSFORM_N_ROWS: u64 = 8;
|
||||
const TRANSFORM_PART_SIZE: u64 = TRANSFORM_WG * TRANSFORM_N_ROWS;
|
||||
|
||||
pub struct TransformCode {
|
||||
reduce_pipeline: Pipeline,
|
||||
root_pipeline: Pipeline,
|
||||
leaf_pipeline: Pipeline,
|
||||
}
|
||||
|
||||
pub struct TransformStage {
|
||||
// Right now we're limited to partition^2 (~16M) elements. This can be
|
||||
// expanded but is tedious.
|
||||
root_buf: Buffer,
|
||||
root_ds: DescriptorSet,
|
||||
}
|
||||
|
||||
pub struct TransformBinding {
|
||||
reduce_ds: DescriptorSet,
|
||||
leaf_ds: DescriptorSet,
|
||||
}
|
||||
|
||||
impl TransformCode {
|
||||
pub unsafe fn new(session: &Session) -> TransformCode {
|
||||
let reduce_code = include_shader!(session, "../shader/gen/transform_reduce");
|
||||
let reduce_pipeline = session
|
||||
.create_compute_pipeline(
|
||||
reduce_code,
|
||||
&[
|
||||
BindType::Buffer,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Buffer,
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
let root_code = include_shader!(session, "../shader/gen/transform_root");
|
||||
let root_pipeline = session
|
||||
.create_compute_pipeline(root_code, &[BindType::Buffer])
|
||||
.unwrap();
|
||||
let leaf_code = include_shader!(session, "../shader/gen/transform_leaf");
|
||||
let leaf_pipeline = session
|
||||
.create_compute_pipeline(
|
||||
leaf_code,
|
||||
&[
|
||||
BindType::Buffer,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
],
|
||||
)
|
||||
.unwrap();
|
||||
TransformCode {
|
||||
reduce_pipeline,
|
||||
root_pipeline,
|
||||
leaf_pipeline,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TransformStage {
|
||||
pub unsafe fn new(session: &Session, code: &TransformCode) -> TransformStage {
|
||||
// We're limited to TRANSFORM_PART_SIZE^2
|
||||
// Also note: size here allows padding
|
||||
let root_buf_size = TRANSFORM_PART_SIZE * 32;
|
||||
let root_buf = session
|
||||
.create_buffer(root_buf_size, BufferUsage::STORAGE)
|
||||
.unwrap();
|
||||
let root_ds = session
|
||||
.create_simple_descriptor_set(&code.root_pipeline, &[&root_buf])
|
||||
.unwrap();
|
||||
TransformStage { root_buf, root_ds }
|
||||
}
|
||||
|
||||
pub unsafe fn bind(
|
||||
&self,
|
||||
session: &Session,
|
||||
code: &TransformCode,
|
||||
config_buf: &Buffer,
|
||||
scene_buf: &Buffer,
|
||||
memory_buf: &Buffer,
|
||||
) -> TransformBinding {
|
||||
let reduce_ds = session
|
||||
.create_simple_descriptor_set(
|
||||
&code.reduce_pipeline,
|
||||
&[memory_buf, config_buf, scene_buf, &self.root_buf],
|
||||
)
|
||||
.unwrap();
|
||||
let leaf_ds = session
|
||||
.create_simple_descriptor_set(
|
||||
&code.leaf_pipeline,
|
||||
&[memory_buf, config_buf, scene_buf, &self.root_buf],
|
||||
)
|
||||
.unwrap();
|
||||
TransformBinding { reduce_ds, leaf_ds }
|
||||
}
|
||||
|
||||
pub unsafe fn record(
|
||||
&self,
|
||||
cmd_buf: &mut CmdBuf,
|
||||
code: &TransformCode,
|
||||
binding: &TransformBinding,
|
||||
size: u64,
|
||||
) {
|
||||
if size > TRANSFORM_PART_SIZE.pow(2) {
|
||||
panic!("very large scan not yet implemented");
|
||||
}
|
||||
let n_workgroups = (size + TRANSFORM_PART_SIZE - 1) / TRANSFORM_PART_SIZE;
|
||||
if n_workgroups > 1 {
|
||||
cmd_buf.dispatch(
|
||||
&code.reduce_pipeline,
|
||||
&binding.reduce_ds,
|
||||
(n_workgroups as u32, 1, 1),
|
||||
(TRANSFORM_WG as u32, 1, 1),
|
||||
);
|
||||
cmd_buf.memory_barrier();
|
||||
cmd_buf.dispatch(
|
||||
&code.root_pipeline,
|
||||
&self.root_ds,
|
||||
(1, 1, 1),
|
||||
(TRANSFORM_WG as u32, 1, 1),
|
||||
);
|
||||
cmd_buf.memory_barrier();
|
||||
}
|
||||
cmd_buf.dispatch(
|
||||
&code.leaf_pipeline,
|
||||
&binding.leaf_ds,
|
||||
(n_workgroups as u32, 1, 1),
|
||||
(TRANSFORM_WG as u32, 1, 1),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
impl Transform {
|
||||
pub fn from_kurbo(a: Affine) -> Transform {
|
||||
let c = a.as_coeffs();
|
||||
Transform {
|
||||
mat: [c[0] as f32, c[1] as f32, c[2] as f32, c[3] as f32],
|
||||
translate: [c[4] as f32, c[5] as f32],
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_kurbo(self) -> Affine {
|
||||
Affine::new([
|
||||
self.mat[0] as f64,
|
||||
self.mat[1] as f64,
|
||||
self.mat[2] as f64,
|
||||
self.mat[3] as f64,
|
||||
self.translate[0] as f64,
|
||||
self.translate[1] as f64,
|
||||
])
|
||||
}
|
||||
}
|
|
@ -6,9 +6,18 @@ description = "Tests for piet-gpu shaders and generic GPU capabilities."
|
|||
license = "MIT/Apache-2.0"
|
||||
edition = "2021"
|
||||
|
||||
[features]
|
||||
default = ["piet-gpu"]
|
||||
|
||||
[dependencies]
|
||||
clap = "2.33"
|
||||
bytemuck = "1.7.2"
|
||||
kurbo = "0.7.1"
|
||||
rand = "0.7.3"
|
||||
|
||||
[dependencies.piet-gpu-hal]
|
||||
path = "../piet-gpu-hal"
|
||||
|
||||
[dependencies.piet-gpu]
|
||||
path = "../piet-gpu"
|
||||
optional = true
|
||||
|
|
|
@ -55,12 +55,12 @@ pub unsafe fn run_clear_test(runner: &mut Runner, config: &Config) -> TestResult
|
|||
commands.write_timestamp(0);
|
||||
stage.record(&mut commands, &code, &binding);
|
||||
commands.write_timestamp(1);
|
||||
if i == 0 {
|
||||
if i == 0 || config.verify_all {
|
||||
commands.cmd_buf.memory_barrier();
|
||||
commands.download(&out_buf);
|
||||
}
|
||||
total_elapsed += runner.submit(commands);
|
||||
if i == 0 {
|
||||
if i == 0 || config.verify_all {
|
||||
let mut dst: Vec<u32> = Default::default();
|
||||
out_buf.read(&mut dst);
|
||||
if let Some(failure) = verify(&dst) {
|
||||
|
|
|
@ -22,6 +22,7 @@ pub struct Config {
|
|||
pub groups: Groups,
|
||||
pub size: Size,
|
||||
pub n_iter: u64,
|
||||
pub verify_all: bool,
|
||||
}
|
||||
|
||||
pub struct Groups(String);
|
||||
|
@ -40,10 +41,12 @@ impl Config {
|
|||
.value_of("n_iter")
|
||||
.and_then(|s| s.parse().ok())
|
||||
.unwrap_or(1000);
|
||||
let verify_all = matches.is_present("verify_all");
|
||||
Config {
|
||||
groups,
|
||||
size,
|
||||
n_iter,
|
||||
verify_all,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -48,12 +48,12 @@ pub unsafe fn run_linkedlist_test(runner: &mut Runner, config: &Config) -> TestR
|
|||
commands.write_timestamp(0);
|
||||
stage.record(&mut commands, &code, &binding, &mem_buf.dev_buf);
|
||||
commands.write_timestamp(1);
|
||||
if i == 0 {
|
||||
if i == 0 || config.verify_all {
|
||||
commands.cmd_buf.memory_barrier();
|
||||
commands.download(&mem_buf);
|
||||
}
|
||||
total_elapsed += runner.submit(commands);
|
||||
if i == 0 {
|
||||
if i == 0 || config.verify_all {
|
||||
let mut dst: Vec<u32> = Default::default();
|
||||
mem_buf.read(&mut dst);
|
||||
if !verify(&dst) {
|
||||
|
|
|
@ -25,6 +25,9 @@ mod prefix_tree;
|
|||
mod runner;
|
||||
mod test_result;
|
||||
|
||||
#[cfg(feature = "piet-gpu")]
|
||||
mod transform;
|
||||
|
||||
use clap::{App, Arg};
|
||||
use piet_gpu_hal::InstanceFlags;
|
||||
|
||||
|
@ -62,6 +65,11 @@ fn main() {
|
|||
.help("Number of iterations")
|
||||
.takes_value(true),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("verify_all")
|
||||
.long("verify_all")
|
||||
.help("Verify all iterations"),
|
||||
)
|
||||
.arg(
|
||||
Arg::with_name("dx12")
|
||||
.long("dx12")
|
||||
|
@ -123,5 +131,9 @@ fn main() {
|
|||
}
|
||||
report(&linkedlist::run_linkedlist_test(&mut runner, &config));
|
||||
}
|
||||
#[cfg(feature = "piet-gpu")]
|
||||
if config.groups.matches("piet") {
|
||||
report(&transform::transform_test(&mut runner, &config));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -85,12 +85,12 @@ pub unsafe fn run_prefix_test(
|
|||
commands.write_timestamp(0);
|
||||
stage.record(&mut commands, &code, &binding);
|
||||
commands.write_timestamp(1);
|
||||
if i == 0 {
|
||||
if i == 0 || config.verify_all {
|
||||
commands.cmd_buf.memory_barrier();
|
||||
commands.download(&out_buf);
|
||||
}
|
||||
total_elapsed += runner.submit(commands);
|
||||
if i == 0 {
|
||||
if i == 0 || config.verify_all {
|
||||
let mut dst: Vec<u32> = Default::default();
|
||||
out_buf.read(&mut dst);
|
||||
if let Some(failure) = verify(&dst) {
|
||||
|
|
|
@ -66,12 +66,12 @@ pub unsafe fn run_prefix_test(runner: &mut Runner, config: &Config) -> TestResul
|
|||
commands.write_timestamp(0);
|
||||
stage.record(&mut commands, &code, &binding);
|
||||
commands.write_timestamp(1);
|
||||
if i == 0 {
|
||||
if i == 0 || config.verify_all {
|
||||
commands.cmd_buf.memory_barrier();
|
||||
commands.download(&out_buf);
|
||||
}
|
||||
total_elapsed += runner.submit(commands);
|
||||
if i == 0 {
|
||||
if i == 0 || config.verify_all {
|
||||
let mut dst: Vec<u32> = Default::default();
|
||||
out_buf.read(&mut dst);
|
||||
if let Some(failure) = verify(&dst) {
|
||||
|
|
133
tests/src/transform.rs
Normal file
133
tests/src/transform.rs
Normal file
|
@ -0,0 +1,133 @@
|
|||
// Copyright 2021 The piet-gpu authors.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// https://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
// Also licensed under MIT license, at your choice.
|
||||
|
||||
//! Tests for piet-gpu shaders.
|
||||
|
||||
use crate::{Config, Runner, TestResult};
|
||||
|
||||
use kurbo::Affine;
|
||||
use piet_gpu::stages::{self, Transform, TransformCode, TransformStage};
|
||||
use piet_gpu_hal::BufferUsage;
|
||||
use rand::Rng;
|
||||
|
||||
struct AffineTestData {
|
||||
input_data: Vec<Transform>,
|
||||
expected: Vec<Affine>,
|
||||
}
|
||||
|
||||
pub unsafe fn transform_test(runner: &mut Runner, config: &Config) -> TestResult {
|
||||
let mut result = TestResult::new("transform");
|
||||
let n_elements: u64 = config.size.choose(1 << 12, 1 << 18, 1 << 24);
|
||||
// TODO: would be nice to validate with real transform.
|
||||
let data = AffineTestData::new(n_elements as usize);
|
||||
let data_buf = runner
|
||||
.session
|
||||
.create_buffer_init(&data.input_data, BufferUsage::STORAGE)
|
||||
.unwrap();
|
||||
let memory = runner.buf_down(data_buf.size() + 24, BufferUsage::empty());
|
||||
let stage_config = stages::Config {
|
||||
n_trans: n_elements as u32,
|
||||
// This is a hack to get elements aligned.
|
||||
trans_alloc: 16,
|
||||
..Default::default()
|
||||
};
|
||||
let config_buf = runner
|
||||
.session
|
||||
.create_buffer_init(std::slice::from_ref(&stage_config), BufferUsage::STORAGE)
|
||||
.unwrap();
|
||||
|
||||
let code = TransformCode::new(&runner.session);
|
||||
let stage = TransformStage::new(&runner.session, &code);
|
||||
let binding = stage.bind(
|
||||
&runner.session,
|
||||
&code,
|
||||
&config_buf,
|
||||
&data_buf,
|
||||
&memory.dev_buf,
|
||||
);
|
||||
let mut total_elapsed = 0.0;
|
||||
let n_iter = config.n_iter;
|
||||
for i in 0..n_iter {
|
||||
let mut commands = runner.commands();
|
||||
commands.write_timestamp(0);
|
||||
stage.record(&mut commands.cmd_buf, &code, &binding, n_elements);
|
||||
commands.write_timestamp(1);
|
||||
if i == 0 || config.verify_all {
|
||||
commands.cmd_buf.memory_barrier();
|
||||
commands.download(&memory);
|
||||
}
|
||||
total_elapsed += runner.submit(commands);
|
||||
if i == 0 || config.verify_all {
|
||||
let mut dst: Vec<Transform> = Default::default();
|
||||
memory.read(&mut dst);
|
||||
if let Some(failure) = data.verify(&dst[1..]) {
|
||||
result.fail(failure);
|
||||
}
|
||||
}
|
||||
}
|
||||
result.timing(total_elapsed, n_elements * n_iter);
|
||||
result
|
||||
}
|
||||
|
||||
impl AffineTestData {
|
||||
fn new(n: usize) -> AffineTestData {
|
||||
let mut rng = rand::thread_rng();
|
||||
let mut a = Affine::default();
|
||||
let mut b;
|
||||
let mut input_data = Vec::with_capacity(n);
|
||||
let mut expected = Vec::with_capacity(n);
|
||||
for _ in 0..n {
|
||||
loop {
|
||||
b = Affine::new([
|
||||
rng.gen_range(-10.0, 10.0),
|
||||
rng.gen_range(-10.0, 10.0),
|
||||
rng.gen_range(-10.0, 10.0),
|
||||
rng.gen_range(-10.0, 10.0),
|
||||
rng.gen_range(-10.0, 10.0),
|
||||
rng.gen_range(-10.0, 10.0),
|
||||
]);
|
||||
if b.determinant() >= 1.0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
expected.push(b);
|
||||
let c = a.inverse() * b;
|
||||
input_data.push(Transform::from_kurbo(c));
|
||||
a = b;
|
||||
}
|
||||
AffineTestData {
|
||||
input_data,
|
||||
expected,
|
||||
}
|
||||
}
|
||||
|
||||
fn verify(&self, actual: &[Transform]) -> Option<String> {
|
||||
for (i, (actual, expected)) in actual.iter().zip(&self.expected).enumerate() {
|
||||
let error: f64 = actual
|
||||
.to_kurbo()
|
||||
.as_coeffs()
|
||||
.iter()
|
||||
.zip(expected.as_coeffs())
|
||||
.map(|(actual, expected)| (actual - expected).powi(2))
|
||||
.sum();
|
||||
let tolerance = 1e-6 * (i + 1) as f64;
|
||||
if error > tolerance {
|
||||
return Some(format!("{}: {} {}", i, error, tolerance));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue