mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
commit compiled shaders
This commit is contained in:
parent
d529d3b0e8
commit
bbdd4432f5
BIN
piet-gpu/shader/gen/backdrop.dxil
generated
BIN
piet-gpu/shader/gen/backdrop.dxil
generated
Binary file not shown.
107
piet-gpu/shader/gen/backdrop.hlsl
generated
107
piet-gpu/shader/gen/backdrop.hlsl
generated
|
@ -21,6 +21,7 @@ struct Path
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -52,8 +53,8 @@ struct Config
|
|||
|
||||
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||
|
||||
RWByteAddressBuffer _67 : register(u0, space0);
|
||||
ByteAddressBuffer _166 : register(t1, space0);
|
||||
RWByteAddressBuffer _59 : register(u0, space0);
|
||||
ByteAddressBuffer _181 : register(t1, space0);
|
||||
|
||||
static uint3 gl_LocalInvocationID;
|
||||
static uint3 gl_GlobalInvocationID;
|
||||
|
@ -69,6 +70,13 @@ groupshared uint sh_row_width[256];
|
|||
groupshared Alloc sh_row_alloc[256];
|
||||
groupshared uint sh_row_count[256];
|
||||
|
||||
bool check_deps(uint dep_stage)
|
||||
{
|
||||
uint _65;
|
||||
_59.InterlockedOr(4, 0u, _65);
|
||||
return (_65 & dep_stage) == 0u;
|
||||
}
|
||||
|
||||
bool touch_mem(Alloc alloc, uint offset)
|
||||
{
|
||||
return true;
|
||||
|
@ -82,7 +90,7 @@ uint read_mem(Alloc alloc, uint offset)
|
|||
{
|
||||
return 0u;
|
||||
}
|
||||
uint v = _67.Load(offset * 4 + 8);
|
||||
uint v = _59.Load(offset * 4 + 12);
|
||||
return v;
|
||||
}
|
||||
|
||||
|
@ -100,8 +108,8 @@ Path Path_read(Alloc a, PathRef ref)
|
|||
uint raw2 = read_mem(param_4, param_5);
|
||||
Path s;
|
||||
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
|
||||
TileRef _134 = { raw2 };
|
||||
s.tiles = _134;
|
||||
TileRef _146 = { raw2 };
|
||||
s.tiles = _146;
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -120,47 +128,52 @@ void write_mem(Alloc alloc, uint offset, uint val)
|
|||
{
|
||||
return;
|
||||
}
|
||||
_67.Store(offset * 4 + 8, val);
|
||||
_59.Store(offset * 4 + 12, val);
|
||||
}
|
||||
|
||||
void comp_main()
|
||||
{
|
||||
uint param = 7u;
|
||||
bool _154 = check_deps(param);
|
||||
if (!_154)
|
||||
{
|
||||
return;
|
||||
}
|
||||
uint th_ix = gl_LocalInvocationIndex;
|
||||
uint element_ix = gl_GlobalInvocationID.x;
|
||||
uint row_count = 0u;
|
||||
bool mem_ok = _67.Load(4) == 0u;
|
||||
if (gl_LocalInvocationID.y == 0u)
|
||||
{
|
||||
if (element_ix < _166.Load(0))
|
||||
if (element_ix < _181.Load(4))
|
||||
{
|
||||
PathRef _180 = { _166.Load(16) + (element_ix * 12u) };
|
||||
PathRef path_ref = _180;
|
||||
Alloc _185;
|
||||
_185.offset = _166.Load(16);
|
||||
Alloc param;
|
||||
param.offset = _185.offset;
|
||||
PathRef param_1 = path_ref;
|
||||
Path path = Path_read(param, param_1);
|
||||
PathRef _195 = { _181.Load(20) + (element_ix * 12u) };
|
||||
PathRef path_ref = _195;
|
||||
Alloc _200;
|
||||
_200.offset = _181.Load(20);
|
||||
Alloc param_1;
|
||||
param_1.offset = _200.offset;
|
||||
PathRef param_2 = path_ref;
|
||||
Path path = Path_read(param_1, param_2);
|
||||
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
|
||||
row_count = path.bbox.w - path.bbox.y;
|
||||
bool _210 = row_count == 1u;
|
||||
bool _216;
|
||||
if (_210)
|
||||
bool _225 = row_count == 1u;
|
||||
bool _231;
|
||||
if (_225)
|
||||
{
|
||||
_216 = path.bbox.y > 0u;
|
||||
_231 = path.bbox.y > 0u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_216 = _210;
|
||||
_231 = _225;
|
||||
}
|
||||
if (_216)
|
||||
if (_231)
|
||||
{
|
||||
row_count = 0u;
|
||||
}
|
||||
uint param_2 = path.tiles.offset;
|
||||
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
|
||||
bool param_4 = mem_ok;
|
||||
Alloc path_alloc = new_alloc(param_2, param_3, param_4);
|
||||
uint param_3 = path.tiles.offset;
|
||||
uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
|
||||
bool param_5 = true;
|
||||
Alloc path_alloc = new_alloc(param_3, param_4, param_5);
|
||||
sh_row_alloc[th_ix] = path_alloc;
|
||||
}
|
||||
sh_row_count[th_ix] = row_count;
|
||||
|
@ -168,17 +181,17 @@ void comp_main()
|
|||
for (uint i = 0u; i < 8u; i++)
|
||||
{
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
bool _262 = gl_LocalInvocationID.y == 0u;
|
||||
bool _269;
|
||||
if (_262)
|
||||
bool _276 = gl_LocalInvocationID.y == 0u;
|
||||
bool _283;
|
||||
if (_276)
|
||||
{
|
||||
_269 = th_ix >= (1u << i);
|
||||
_283 = th_ix >= (1u << i);
|
||||
}
|
||||
else
|
||||
{
|
||||
_269 = _262;
|
||||
_283 = _276;
|
||||
}
|
||||
if (_269)
|
||||
if (_283)
|
||||
{
|
||||
row_count += sh_row_count[th_ix - (1u << i)];
|
||||
}
|
||||
|
@ -190,7 +203,7 @@ void comp_main()
|
|||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
uint total_rows = sh_row_count[255];
|
||||
uint _348;
|
||||
uint _360;
|
||||
for (uint row = th_ix; row < total_rows; row += 256u)
|
||||
{
|
||||
uint el_ix = 0u;
|
||||
|
@ -203,32 +216,32 @@ void comp_main()
|
|||
}
|
||||
}
|
||||
uint width = sh_row_width[el_ix];
|
||||
if ((width > 0u) && mem_ok)
|
||||
if (width > 0u)
|
||||
{
|
||||
Alloc tiles_alloc = sh_row_alloc[el_ix];
|
||||
if (el_ix > 0u)
|
||||
{
|
||||
_348 = sh_row_count[el_ix - 1u];
|
||||
_360 = sh_row_count[el_ix - 1u];
|
||||
}
|
||||
else
|
||||
{
|
||||
_348 = 0u;
|
||||
_360 = 0u;
|
||||
}
|
||||
uint seq_ix = row - _348;
|
||||
uint seq_ix = row - _360;
|
||||
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
|
||||
Alloc param_5 = tiles_alloc;
|
||||
uint param_6 = tile_el_ix;
|
||||
uint sum = read_mem(param_5, param_6);
|
||||
Alloc param_6 = tiles_alloc;
|
||||
uint param_7 = tile_el_ix;
|
||||
uint sum = read_mem(param_6, param_7);
|
||||
for (uint x = 1u; x < width; x++)
|
||||
{
|
||||
tile_el_ix += 2u;
|
||||
Alloc param_7 = tiles_alloc;
|
||||
uint param_8 = tile_el_ix;
|
||||
sum += read_mem(param_7, param_8);
|
||||
Alloc param_9 = tiles_alloc;
|
||||
uint param_10 = tile_el_ix;
|
||||
uint param_11 = sum;
|
||||
write_mem(param_9, param_10, param_11);
|
||||
Alloc param_8 = tiles_alloc;
|
||||
uint param_9 = tile_el_ix;
|
||||
sum += read_mem(param_8, param_9);
|
||||
Alloc param_10 = tiles_alloc;
|
||||
uint param_11 = tile_el_ix;
|
||||
uint param_12 = sum;
|
||||
write_mem(param_10, param_11, param_12);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
110
piet-gpu/shader/gen/backdrop.msl
generated
110
piet-gpu/shader/gen/backdrop.msl
generated
|
@ -1,7 +1,9 @@
|
|||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
#include <metal_atomic>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
|
@ -30,6 +32,7 @@ struct Memory
|
|||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint blend_offset;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
|
@ -40,6 +43,7 @@ struct Alloc_1
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -76,6 +80,13 @@ struct ConfigBuf
|
|||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
bool check_deps(thread const uint& dep_stage, device Memory& v_59)
|
||||
{
|
||||
uint _65 = atomic_fetch_or_explicit((device atomic_uint*)&v_59.mem_error, 0u, memory_order_relaxed);
|
||||
return (_65 & dep_stage) == 0u;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
|
||||
{
|
||||
|
@ -83,7 +94,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_67)
|
||||
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_59)
|
||||
{
|
||||
Alloc param = alloc;
|
||||
uint param_1 = offset;
|
||||
|
@ -91,23 +102,23 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor
|
|||
{
|
||||
return 0u;
|
||||
}
|
||||
uint v = v_67.memory[offset];
|
||||
uint v = v_59.memory[offset];
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_67)
|
||||
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_59)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint raw0 = read_mem(param, param_1, v_67);
|
||||
uint raw0 = read_mem(param, param_1, v_59);
|
||||
Alloc param_2 = a;
|
||||
uint param_3 = ix + 1u;
|
||||
uint raw1 = read_mem(param_2, param_3, v_67);
|
||||
uint raw1 = read_mem(param_2, param_3, v_59);
|
||||
Alloc param_4 = a;
|
||||
uint param_5 = ix + 2u;
|
||||
uint raw2 = read_mem(param_4, param_5, v_67);
|
||||
uint raw2 = read_mem(param_4, param_5, v_59);
|
||||
Path s;
|
||||
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
|
||||
s.tiles = TileRef{ raw2 };
|
||||
|
@ -123,7 +134,7 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_67)
|
||||
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_59)
|
||||
{
|
||||
Alloc param = alloc;
|
||||
uint param_1 = offset;
|
||||
|
@ -131,47 +142,52 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
|
|||
{
|
||||
return;
|
||||
}
|
||||
v_67.memory[offset] = val;
|
||||
v_59.memory[offset] = val;
|
||||
}
|
||||
|
||||
kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _166 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||
kernel void main0(device Memory& v_59 [[buffer(0)]], const device ConfigBuf& _181 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||
{
|
||||
threadgroup uint sh_row_width[256];
|
||||
threadgroup Alloc sh_row_alloc[256];
|
||||
threadgroup uint sh_row_count[256];
|
||||
uint param = 7u;
|
||||
bool _154 = check_deps(param, v_59);
|
||||
if (!_154)
|
||||
{
|
||||
return;
|
||||
}
|
||||
uint th_ix = gl_LocalInvocationIndex;
|
||||
uint element_ix = gl_GlobalInvocationID.x;
|
||||
uint row_count = 0u;
|
||||
bool mem_ok = v_67.mem_error == 0u;
|
||||
if (gl_LocalInvocationID.y == 0u)
|
||||
{
|
||||
if (element_ix < _166.conf.n_elements)
|
||||
if (element_ix < _181.conf.n_elements)
|
||||
{
|
||||
PathRef path_ref = PathRef{ _166.conf.tile_alloc.offset + (element_ix * 12u) };
|
||||
Alloc param;
|
||||
param.offset = _166.conf.tile_alloc.offset;
|
||||
PathRef param_1 = path_ref;
|
||||
Path path = Path_read(param, param_1, v_67);
|
||||
PathRef path_ref = PathRef{ _181.conf.tile_alloc.offset + (element_ix * 12u) };
|
||||
Alloc param_1;
|
||||
param_1.offset = _181.conf.tile_alloc.offset;
|
||||
PathRef param_2 = path_ref;
|
||||
Path path = Path_read(param_1, param_2, v_59);
|
||||
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
|
||||
row_count = path.bbox.w - path.bbox.y;
|
||||
bool _210 = row_count == 1u;
|
||||
bool _216;
|
||||
if (_210)
|
||||
bool _225 = row_count == 1u;
|
||||
bool _231;
|
||||
if (_225)
|
||||
{
|
||||
_216 = path.bbox.y > 0u;
|
||||
_231 = path.bbox.y > 0u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_216 = _210;
|
||||
_231 = _225;
|
||||
}
|
||||
if (_216)
|
||||
if (_231)
|
||||
{
|
||||
row_count = 0u;
|
||||
}
|
||||
uint param_2 = path.tiles.offset;
|
||||
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
|
||||
bool param_4 = mem_ok;
|
||||
Alloc path_alloc = new_alloc(param_2, param_3, param_4);
|
||||
uint param_3 = path.tiles.offset;
|
||||
uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
|
||||
bool param_5 = true;
|
||||
Alloc path_alloc = new_alloc(param_3, param_4, param_5);
|
||||
sh_row_alloc[th_ix] = path_alloc;
|
||||
}
|
||||
sh_row_count[th_ix] = row_count;
|
||||
|
@ -179,17 +195,17 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
|
|||
for (uint i = 0u; i < 8u; i++)
|
||||
{
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
bool _262 = gl_LocalInvocationID.y == 0u;
|
||||
bool _269;
|
||||
if (_262)
|
||||
bool _276 = gl_LocalInvocationID.y == 0u;
|
||||
bool _283;
|
||||
if (_276)
|
||||
{
|
||||
_269 = th_ix >= (1u << i);
|
||||
_283 = th_ix >= (1u << i);
|
||||
}
|
||||
else
|
||||
{
|
||||
_269 = _262;
|
||||
_283 = _276;
|
||||
}
|
||||
if (_269)
|
||||
if (_283)
|
||||
{
|
||||
row_count += sh_row_count[th_ix - (1u << i)];
|
||||
}
|
||||
|
@ -201,7 +217,7 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
|
|||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
uint total_rows = sh_row_count[255];
|
||||
uint _348;
|
||||
uint _360;
|
||||
for (uint row = th_ix; row < total_rows; row += 256u)
|
||||
{
|
||||
uint el_ix = 0u;
|
||||
|
@ -214,32 +230,32 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
|
|||
}
|
||||
}
|
||||
uint width = sh_row_width[el_ix];
|
||||
if ((width > 0u) && mem_ok)
|
||||
if (width > 0u)
|
||||
{
|
||||
Alloc tiles_alloc = sh_row_alloc[el_ix];
|
||||
if (el_ix > 0u)
|
||||
{
|
||||
_348 = sh_row_count[el_ix - 1u];
|
||||
_360 = sh_row_count[el_ix - 1u];
|
||||
}
|
||||
else
|
||||
{
|
||||
_348 = 0u;
|
||||
_360 = 0u;
|
||||
}
|
||||
uint seq_ix = row - _348;
|
||||
uint seq_ix = row - _360;
|
||||
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
|
||||
Alloc param_5 = tiles_alloc;
|
||||
uint param_6 = tile_el_ix;
|
||||
uint sum = read_mem(param_5, param_6, v_67);
|
||||
Alloc param_6 = tiles_alloc;
|
||||
uint param_7 = tile_el_ix;
|
||||
uint sum = read_mem(param_6, param_7, v_59);
|
||||
for (uint x = 1u; x < width; x++)
|
||||
{
|
||||
tile_el_ix += 2u;
|
||||
Alloc param_7 = tiles_alloc;
|
||||
uint param_8 = tile_el_ix;
|
||||
sum += read_mem(param_7, param_8, v_67);
|
||||
Alloc param_9 = tiles_alloc;
|
||||
uint param_10 = tile_el_ix;
|
||||
uint param_11 = sum;
|
||||
write_mem(param_9, param_10, param_11, v_67);
|
||||
Alloc param_8 = tiles_alloc;
|
||||
uint param_9 = tile_el_ix;
|
||||
sum += read_mem(param_8, param_9, v_59);
|
||||
Alloc param_10 = tiles_alloc;
|
||||
uint param_11 = tile_el_ix;
|
||||
uint param_12 = sum;
|
||||
write_mem(param_10, param_11, param_12, v_59);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
BIN
piet-gpu/shader/gen/backdrop.spv
generated
BIN
piet-gpu/shader/gen/backdrop.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/backdrop_lg.dxil
generated
BIN
piet-gpu/shader/gen/backdrop_lg.dxil
generated
Binary file not shown.
107
piet-gpu/shader/gen/backdrop_lg.hlsl
generated
107
piet-gpu/shader/gen/backdrop_lg.hlsl
generated
|
@ -21,6 +21,7 @@ struct Path
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -52,8 +53,8 @@ struct Config
|
|||
|
||||
static const uint3 gl_WorkGroupSize = uint3(256u, 4u, 1u);
|
||||
|
||||
RWByteAddressBuffer _67 : register(u0, space0);
|
||||
ByteAddressBuffer _166 : register(t1, space0);
|
||||
RWByteAddressBuffer _59 : register(u0, space0);
|
||||
ByteAddressBuffer _181 : register(t1, space0);
|
||||
|
||||
static uint3 gl_LocalInvocationID;
|
||||
static uint3 gl_GlobalInvocationID;
|
||||
|
@ -69,6 +70,13 @@ groupshared uint sh_row_width[256];
|
|||
groupshared Alloc sh_row_alloc[256];
|
||||
groupshared uint sh_row_count[256];
|
||||
|
||||
bool check_deps(uint dep_stage)
|
||||
{
|
||||
uint _65;
|
||||
_59.InterlockedOr(4, 0u, _65);
|
||||
return (_65 & dep_stage) == 0u;
|
||||
}
|
||||
|
||||
bool touch_mem(Alloc alloc, uint offset)
|
||||
{
|
||||
return true;
|
||||
|
@ -82,7 +90,7 @@ uint read_mem(Alloc alloc, uint offset)
|
|||
{
|
||||
return 0u;
|
||||
}
|
||||
uint v = _67.Load(offset * 4 + 8);
|
||||
uint v = _59.Load(offset * 4 + 12);
|
||||
return v;
|
||||
}
|
||||
|
||||
|
@ -100,8 +108,8 @@ Path Path_read(Alloc a, PathRef ref)
|
|||
uint raw2 = read_mem(param_4, param_5);
|
||||
Path s;
|
||||
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
|
||||
TileRef _134 = { raw2 };
|
||||
s.tiles = _134;
|
||||
TileRef _146 = { raw2 };
|
||||
s.tiles = _146;
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -120,47 +128,52 @@ void write_mem(Alloc alloc, uint offset, uint val)
|
|||
{
|
||||
return;
|
||||
}
|
||||
_67.Store(offset * 4 + 8, val);
|
||||
_59.Store(offset * 4 + 12, val);
|
||||
}
|
||||
|
||||
void comp_main()
|
||||
{
|
||||
uint param = 7u;
|
||||
bool _154 = check_deps(param);
|
||||
if (!_154)
|
||||
{
|
||||
return;
|
||||
}
|
||||
uint th_ix = gl_LocalInvocationIndex;
|
||||
uint element_ix = gl_GlobalInvocationID.x;
|
||||
uint row_count = 0u;
|
||||
bool mem_ok = _67.Load(4) == 0u;
|
||||
if (gl_LocalInvocationID.y == 0u)
|
||||
{
|
||||
if (element_ix < _166.Load(0))
|
||||
if (element_ix < _181.Load(4))
|
||||
{
|
||||
PathRef _180 = { _166.Load(16) + (element_ix * 12u) };
|
||||
PathRef path_ref = _180;
|
||||
Alloc _185;
|
||||
_185.offset = _166.Load(16);
|
||||
Alloc param;
|
||||
param.offset = _185.offset;
|
||||
PathRef param_1 = path_ref;
|
||||
Path path = Path_read(param, param_1);
|
||||
PathRef _195 = { _181.Load(20) + (element_ix * 12u) };
|
||||
PathRef path_ref = _195;
|
||||
Alloc _200;
|
||||
_200.offset = _181.Load(20);
|
||||
Alloc param_1;
|
||||
param_1.offset = _200.offset;
|
||||
PathRef param_2 = path_ref;
|
||||
Path path = Path_read(param_1, param_2);
|
||||
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
|
||||
row_count = path.bbox.w - path.bbox.y;
|
||||
bool _210 = row_count == 1u;
|
||||
bool _216;
|
||||
if (_210)
|
||||
bool _225 = row_count == 1u;
|
||||
bool _231;
|
||||
if (_225)
|
||||
{
|
||||
_216 = path.bbox.y > 0u;
|
||||
_231 = path.bbox.y > 0u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_216 = _210;
|
||||
_231 = _225;
|
||||
}
|
||||
if (_216)
|
||||
if (_231)
|
||||
{
|
||||
row_count = 0u;
|
||||
}
|
||||
uint param_2 = path.tiles.offset;
|
||||
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
|
||||
bool param_4 = mem_ok;
|
||||
Alloc path_alloc = new_alloc(param_2, param_3, param_4);
|
||||
uint param_3 = path.tiles.offset;
|
||||
uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
|
||||
bool param_5 = true;
|
||||
Alloc path_alloc = new_alloc(param_3, param_4, param_5);
|
||||
sh_row_alloc[th_ix] = path_alloc;
|
||||
}
|
||||
sh_row_count[th_ix] = row_count;
|
||||
|
@ -168,17 +181,17 @@ void comp_main()
|
|||
for (uint i = 0u; i < 8u; i++)
|
||||
{
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
bool _262 = gl_LocalInvocationID.y == 0u;
|
||||
bool _269;
|
||||
if (_262)
|
||||
bool _276 = gl_LocalInvocationID.y == 0u;
|
||||
bool _283;
|
||||
if (_276)
|
||||
{
|
||||
_269 = th_ix >= (1u << i);
|
||||
_283 = th_ix >= (1u << i);
|
||||
}
|
||||
else
|
||||
{
|
||||
_269 = _262;
|
||||
_283 = _276;
|
||||
}
|
||||
if (_269)
|
||||
if (_283)
|
||||
{
|
||||
row_count += sh_row_count[th_ix - (1u << i)];
|
||||
}
|
||||
|
@ -190,7 +203,7 @@ void comp_main()
|
|||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
uint total_rows = sh_row_count[255];
|
||||
uint _348;
|
||||
uint _360;
|
||||
for (uint row = th_ix; row < total_rows; row += 1024u)
|
||||
{
|
||||
uint el_ix = 0u;
|
||||
|
@ -203,32 +216,32 @@ void comp_main()
|
|||
}
|
||||
}
|
||||
uint width = sh_row_width[el_ix];
|
||||
if ((width > 0u) && mem_ok)
|
||||
if (width > 0u)
|
||||
{
|
||||
Alloc tiles_alloc = sh_row_alloc[el_ix];
|
||||
if (el_ix > 0u)
|
||||
{
|
||||
_348 = sh_row_count[el_ix - 1u];
|
||||
_360 = sh_row_count[el_ix - 1u];
|
||||
}
|
||||
else
|
||||
{
|
||||
_348 = 0u;
|
||||
_360 = 0u;
|
||||
}
|
||||
uint seq_ix = row - _348;
|
||||
uint seq_ix = row - _360;
|
||||
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
|
||||
Alloc param_5 = tiles_alloc;
|
||||
uint param_6 = tile_el_ix;
|
||||
uint sum = read_mem(param_5, param_6);
|
||||
Alloc param_6 = tiles_alloc;
|
||||
uint param_7 = tile_el_ix;
|
||||
uint sum = read_mem(param_6, param_7);
|
||||
for (uint x = 1u; x < width; x++)
|
||||
{
|
||||
tile_el_ix += 2u;
|
||||
Alloc param_7 = tiles_alloc;
|
||||
uint param_8 = tile_el_ix;
|
||||
sum += read_mem(param_7, param_8);
|
||||
Alloc param_9 = tiles_alloc;
|
||||
uint param_10 = tile_el_ix;
|
||||
uint param_11 = sum;
|
||||
write_mem(param_9, param_10, param_11);
|
||||
Alloc param_8 = tiles_alloc;
|
||||
uint param_9 = tile_el_ix;
|
||||
sum += read_mem(param_8, param_9);
|
||||
Alloc param_10 = tiles_alloc;
|
||||
uint param_11 = tile_el_ix;
|
||||
uint param_12 = sum;
|
||||
write_mem(param_10, param_11, param_12);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
110
piet-gpu/shader/gen/backdrop_lg.msl
generated
110
piet-gpu/shader/gen/backdrop_lg.msl
generated
|
@ -1,7 +1,9 @@
|
|||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
#include <metal_atomic>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
|
@ -30,6 +32,7 @@ struct Memory
|
|||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint blend_offset;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
|
@ -40,6 +43,7 @@ struct Alloc_1
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -76,6 +80,13 @@ struct ConfigBuf
|
|||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 4u, 1u);
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
bool check_deps(thread const uint& dep_stage, device Memory& v_59)
|
||||
{
|
||||
uint _65 = atomic_fetch_or_explicit((device atomic_uint*)&v_59.mem_error, 0u, memory_order_relaxed);
|
||||
return (_65 & dep_stage) == 0u;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
|
||||
{
|
||||
|
@ -83,7 +94,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_67)
|
||||
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_59)
|
||||
{
|
||||
Alloc param = alloc;
|
||||
uint param_1 = offset;
|
||||
|
@ -91,23 +102,23 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor
|
|||
{
|
||||
return 0u;
|
||||
}
|
||||
uint v = v_67.memory[offset];
|
||||
uint v = v_59.memory[offset];
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_67)
|
||||
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_59)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint raw0 = read_mem(param, param_1, v_67);
|
||||
uint raw0 = read_mem(param, param_1, v_59);
|
||||
Alloc param_2 = a;
|
||||
uint param_3 = ix + 1u;
|
||||
uint raw1 = read_mem(param_2, param_3, v_67);
|
||||
uint raw1 = read_mem(param_2, param_3, v_59);
|
||||
Alloc param_4 = a;
|
||||
uint param_5 = ix + 2u;
|
||||
uint raw2 = read_mem(param_4, param_5, v_67);
|
||||
uint raw2 = read_mem(param_4, param_5, v_59);
|
||||
Path s;
|
||||
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
|
||||
s.tiles = TileRef{ raw2 };
|
||||
|
@ -123,7 +134,7 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_67)
|
||||
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_59)
|
||||
{
|
||||
Alloc param = alloc;
|
||||
uint param_1 = offset;
|
||||
|
@ -131,47 +142,52 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
|
|||
{
|
||||
return;
|
||||
}
|
||||
v_67.memory[offset] = val;
|
||||
v_59.memory[offset] = val;
|
||||
}
|
||||
|
||||
kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _166 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||
kernel void main0(device Memory& v_59 [[buffer(0)]], const device ConfigBuf& _181 [[buffer(1)]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||
{
|
||||
threadgroup uint sh_row_width[256];
|
||||
threadgroup Alloc sh_row_alloc[256];
|
||||
threadgroup uint sh_row_count[256];
|
||||
uint param = 7u;
|
||||
bool _154 = check_deps(param, v_59);
|
||||
if (!_154)
|
||||
{
|
||||
return;
|
||||
}
|
||||
uint th_ix = gl_LocalInvocationIndex;
|
||||
uint element_ix = gl_GlobalInvocationID.x;
|
||||
uint row_count = 0u;
|
||||
bool mem_ok = v_67.mem_error == 0u;
|
||||
if (gl_LocalInvocationID.y == 0u)
|
||||
{
|
||||
if (element_ix < _166.conf.n_elements)
|
||||
if (element_ix < _181.conf.n_elements)
|
||||
{
|
||||
PathRef path_ref = PathRef{ _166.conf.tile_alloc.offset + (element_ix * 12u) };
|
||||
Alloc param;
|
||||
param.offset = _166.conf.tile_alloc.offset;
|
||||
PathRef param_1 = path_ref;
|
||||
Path path = Path_read(param, param_1, v_67);
|
||||
PathRef path_ref = PathRef{ _181.conf.tile_alloc.offset + (element_ix * 12u) };
|
||||
Alloc param_1;
|
||||
param_1.offset = _181.conf.tile_alloc.offset;
|
||||
PathRef param_2 = path_ref;
|
||||
Path path = Path_read(param_1, param_2, v_59);
|
||||
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
|
||||
row_count = path.bbox.w - path.bbox.y;
|
||||
bool _210 = row_count == 1u;
|
||||
bool _216;
|
||||
if (_210)
|
||||
bool _225 = row_count == 1u;
|
||||
bool _231;
|
||||
if (_225)
|
||||
{
|
||||
_216 = path.bbox.y > 0u;
|
||||
_231 = path.bbox.y > 0u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_216 = _210;
|
||||
_231 = _225;
|
||||
}
|
||||
if (_216)
|
||||
if (_231)
|
||||
{
|
||||
row_count = 0u;
|
||||
}
|
||||
uint param_2 = path.tiles.offset;
|
||||
uint param_3 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
|
||||
bool param_4 = mem_ok;
|
||||
Alloc path_alloc = new_alloc(param_2, param_3, param_4);
|
||||
uint param_3 = path.tiles.offset;
|
||||
uint param_4 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
|
||||
bool param_5 = true;
|
||||
Alloc path_alloc = new_alloc(param_3, param_4, param_5);
|
||||
sh_row_alloc[th_ix] = path_alloc;
|
||||
}
|
||||
sh_row_count[th_ix] = row_count;
|
||||
|
@ -179,17 +195,17 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
|
|||
for (uint i = 0u; i < 8u; i++)
|
||||
{
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
bool _262 = gl_LocalInvocationID.y == 0u;
|
||||
bool _269;
|
||||
if (_262)
|
||||
bool _276 = gl_LocalInvocationID.y == 0u;
|
||||
bool _283;
|
||||
if (_276)
|
||||
{
|
||||
_269 = th_ix >= (1u << i);
|
||||
_283 = th_ix >= (1u << i);
|
||||
}
|
||||
else
|
||||
{
|
||||
_269 = _262;
|
||||
_283 = _276;
|
||||
}
|
||||
if (_269)
|
||||
if (_283)
|
||||
{
|
||||
row_count += sh_row_count[th_ix - (1u << i)];
|
||||
}
|
||||
|
@ -201,7 +217,7 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
|
|||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
uint total_rows = sh_row_count[255];
|
||||
uint _348;
|
||||
uint _360;
|
||||
for (uint row = th_ix; row < total_rows; row += 1024u)
|
||||
{
|
||||
uint el_ix = 0u;
|
||||
|
@ -214,32 +230,32 @@ kernel void main0(device Memory& v_67 [[buffer(0)]], const device ConfigBuf& _16
|
|||
}
|
||||
}
|
||||
uint width = sh_row_width[el_ix];
|
||||
if ((width > 0u) && mem_ok)
|
||||
if (width > 0u)
|
||||
{
|
||||
Alloc tiles_alloc = sh_row_alloc[el_ix];
|
||||
if (el_ix > 0u)
|
||||
{
|
||||
_348 = sh_row_count[el_ix - 1u];
|
||||
_360 = sh_row_count[el_ix - 1u];
|
||||
}
|
||||
else
|
||||
{
|
||||
_348 = 0u;
|
||||
_360 = 0u;
|
||||
}
|
||||
uint seq_ix = row - _348;
|
||||
uint seq_ix = row - _360;
|
||||
uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);
|
||||
Alloc param_5 = tiles_alloc;
|
||||
uint param_6 = tile_el_ix;
|
||||
uint sum = read_mem(param_5, param_6, v_67);
|
||||
Alloc param_6 = tiles_alloc;
|
||||
uint param_7 = tile_el_ix;
|
||||
uint sum = read_mem(param_6, param_7, v_59);
|
||||
for (uint x = 1u; x < width; x++)
|
||||
{
|
||||
tile_el_ix += 2u;
|
||||
Alloc param_7 = tiles_alloc;
|
||||
uint param_8 = tile_el_ix;
|
||||
sum += read_mem(param_7, param_8, v_67);
|
||||
Alloc param_9 = tiles_alloc;
|
||||
uint param_10 = tile_el_ix;
|
||||
uint param_11 = sum;
|
||||
write_mem(param_9, param_10, param_11, v_67);
|
||||
Alloc param_8 = tiles_alloc;
|
||||
uint param_9 = tile_el_ix;
|
||||
sum += read_mem(param_8, param_9, v_59);
|
||||
Alloc param_10 = tiles_alloc;
|
||||
uint param_11 = tile_el_ix;
|
||||
uint param_12 = sum;
|
||||
write_mem(param_10, param_11, param_12, v_59);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
BIN
piet-gpu/shader/gen/backdrop_lg.spv
generated
BIN
piet-gpu/shader/gen/backdrop_lg.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/bbox_clear.dxil
generated
BIN
piet-gpu/shader/gen/bbox_clear.dxil
generated
Binary file not shown.
13
piet-gpu/shader/gen/bbox_clear.hlsl
generated
13
piet-gpu/shader/gen/bbox_clear.hlsl
generated
|
@ -5,6 +5,7 @@ struct Alloc
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -48,13 +49,13 @@ struct SPIRV_Cross_Input
|
|||
void comp_main()
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x;
|
||||
if (ix < _21.Load(76))
|
||||
if (ix < _21.Load(80))
|
||||
{
|
||||
uint out_ix = (_21.Load(40) >> uint(2)) + (6u * ix);
|
||||
_45.Store(out_ix * 4 + 8, 65535u);
|
||||
_45.Store((out_ix + 1u) * 4 + 8, 65535u);
|
||||
_45.Store((out_ix + 2u) * 4 + 8, 0u);
|
||||
_45.Store((out_ix + 3u) * 4 + 8, 0u);
|
||||
uint out_ix = (_21.Load(44) >> uint(2)) + (6u * ix);
|
||||
_45.Store(out_ix * 4 + 12, 65535u);
|
||||
_45.Store((out_ix + 1u) * 4 + 12, 65535u);
|
||||
_45.Store((out_ix + 2u) * 4 + 12, 0u);
|
||||
_45.Store((out_ix + 3u) * 4 + 12, 0u);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
2
piet-gpu/shader/gen/bbox_clear.msl
generated
2
piet-gpu/shader/gen/bbox_clear.msl
generated
|
@ -10,6 +10,7 @@ struct Alloc
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -48,6 +49,7 @@ struct Memory
|
|||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint blend_offset;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
|
|
BIN
piet-gpu/shader/gen/bbox_clear.spv
generated
BIN
piet-gpu/shader/gen/bbox_clear.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/binning.dxil
generated
BIN
piet-gpu/shader/gen/binning.dxil
generated
Binary file not shown.
214
piet-gpu/shader/gen/binning.hlsl
generated
214
piet-gpu/shader/gen/binning.hlsl
generated
|
@ -3,22 +3,6 @@ struct Alloc
|
|||
uint offset;
|
||||
};
|
||||
|
||||
struct MallocResult
|
||||
{
|
||||
Alloc alloc;
|
||||
bool failed;
|
||||
};
|
||||
|
||||
struct BinInstanceRef
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct BinInstance
|
||||
{
|
||||
uint element_ix;
|
||||
};
|
||||
|
||||
struct DrawMonoid
|
||||
{
|
||||
uint path_ix;
|
||||
|
@ -29,6 +13,7 @@ struct DrawMonoid
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -60,8 +45,8 @@ struct Config
|
|||
|
||||
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||
|
||||
RWByteAddressBuffer _81 : register(u0, space0);
|
||||
ByteAddressBuffer _156 : register(t1, space0);
|
||||
RWByteAddressBuffer _57 : register(u0, space0);
|
||||
ByteAddressBuffer _101 : register(t1, space0);
|
||||
|
||||
static uint3 gl_WorkGroupID;
|
||||
static uint3 gl_LocalInvocationID;
|
||||
|
@ -72,39 +57,38 @@ struct SPIRV_Cross_Input
|
|||
};
|
||||
|
||||
groupshared uint bitmaps[8][256];
|
||||
groupshared bool sh_alloc_failed;
|
||||
groupshared uint count[8][256];
|
||||
groupshared Alloc sh_chunk_alloc[256];
|
||||
groupshared uint sh_chunk_offset[256];
|
||||
|
||||
DrawMonoid load_draw_monoid(uint element_ix)
|
||||
{
|
||||
uint base = (_156.Load(44) >> uint(2)) + (4u * element_ix);
|
||||
uint path_ix = _81.Load(base * 4 + 8);
|
||||
uint clip_ix = _81.Load((base + 1u) * 4 + 8);
|
||||
uint scene_offset = _81.Load((base + 2u) * 4 + 8);
|
||||
uint info_offset = _81.Load((base + 3u) * 4 + 8);
|
||||
DrawMonoid _190 = { path_ix, clip_ix, scene_offset, info_offset };
|
||||
return _190;
|
||||
uint base = (_101.Load(48) >> uint(2)) + (4u * element_ix);
|
||||
uint path_ix = _57.Load(base * 4 + 12);
|
||||
uint clip_ix = _57.Load((base + 1u) * 4 + 12);
|
||||
uint scene_offset = _57.Load((base + 2u) * 4 + 12);
|
||||
uint info_offset = _57.Load((base + 3u) * 4 + 12);
|
||||
DrawMonoid _136 = { path_ix, clip_ix, scene_offset, info_offset };
|
||||
return _136;
|
||||
}
|
||||
|
||||
float4 load_clip_bbox(uint clip_ix)
|
||||
{
|
||||
uint base = (_156.Load(60) >> uint(2)) + (4u * clip_ix);
|
||||
float x0 = asfloat(_81.Load(base * 4 + 8));
|
||||
float y0 = asfloat(_81.Load((base + 1u) * 4 + 8));
|
||||
float x1 = asfloat(_81.Load((base + 2u) * 4 + 8));
|
||||
float y1 = asfloat(_81.Load((base + 3u) * 4 + 8));
|
||||
uint base = (_101.Load(64) >> uint(2)) + (4u * clip_ix);
|
||||
float x0 = asfloat(_57.Load(base * 4 + 12));
|
||||
float y0 = asfloat(_57.Load((base + 1u) * 4 + 12));
|
||||
float x1 = asfloat(_57.Load((base + 2u) * 4 + 12));
|
||||
float y1 = asfloat(_57.Load((base + 3u) * 4 + 12));
|
||||
float4 bbox = float4(x0, y0, x1, y1);
|
||||
return bbox;
|
||||
}
|
||||
|
||||
float4 load_path_bbox(uint path_ix)
|
||||
{
|
||||
uint base = (_156.Load(40) >> uint(2)) + (6u * path_ix);
|
||||
float bbox_l = float(_81.Load(base * 4 + 8)) - 32768.0f;
|
||||
float bbox_t = float(_81.Load((base + 1u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_r = float(_81.Load((base + 2u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_b = float(_81.Load((base + 3u) * 4 + 8)) - 32768.0f;
|
||||
uint base = (_101.Load(44) >> uint(2)) + (6u * path_ix);
|
||||
float bbox_l = float(_57.Load(base * 4 + 12)) - 32768.0f;
|
||||
float bbox_t = float(_57.Load((base + 1u) * 4 + 12)) - 32768.0f;
|
||||
float bbox_r = float(_57.Load((base + 2u) * 4 + 12)) - 32768.0f;
|
||||
float bbox_b = float(_57.Load((base + 3u) * 4 + 12)) - 32768.0f;
|
||||
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||
return bbox;
|
||||
}
|
||||
|
@ -116,41 +100,25 @@ float4 bbox_intersect(float4 a, float4 b)
|
|||
|
||||
void store_draw_bbox(uint draw_ix, float4 bbox)
|
||||
{
|
||||
uint base = (_156.Load(64) >> uint(2)) + (4u * draw_ix);
|
||||
_81.Store(base * 4 + 8, asuint(bbox.x));
|
||||
_81.Store((base + 1u) * 4 + 8, asuint(bbox.y));
|
||||
_81.Store((base + 2u) * 4 + 8, asuint(bbox.z));
|
||||
_81.Store((base + 3u) * 4 + 8, asuint(bbox.w));
|
||||
uint base = (_101.Load(68) >> uint(2)) + (4u * draw_ix);
|
||||
_57.Store(base * 4 + 12, asuint(bbox.x));
|
||||
_57.Store((base + 1u) * 4 + 12, asuint(bbox.y));
|
||||
_57.Store((base + 2u) * 4 + 12, asuint(bbox.z));
|
||||
_57.Store((base + 3u) * 4 + 12, asuint(bbox.w));
|
||||
}
|
||||
|
||||
Alloc new_alloc(uint offset, uint size, bool mem_ok)
|
||||
uint malloc_stage(uint size, uint mem_size, uint stage)
|
||||
{
|
||||
Alloc a;
|
||||
a.offset = offset;
|
||||
return a;
|
||||
}
|
||||
|
||||
MallocResult malloc(uint size)
|
||||
{
|
||||
uint _87;
|
||||
_81.InterlockedAdd(0, size, _87);
|
||||
uint offset = _87;
|
||||
uint _94;
|
||||
_81.GetDimensions(_94);
|
||||
_94 = (_94 - 8) / 4;
|
||||
MallocResult r;
|
||||
r.failed = (offset + size) > uint(int(_94) * 4);
|
||||
uint param = offset;
|
||||
uint param_1 = size;
|
||||
bool param_2 = !r.failed;
|
||||
r.alloc = new_alloc(param, param_1, param_2);
|
||||
if (r.failed)
|
||||
uint _65;
|
||||
_57.InterlockedAdd(0, size, _65);
|
||||
uint offset = _65;
|
||||
if ((offset + size) > mem_size)
|
||||
{
|
||||
uint _116;
|
||||
_81.InterlockedMax(4, 1u, _116);
|
||||
return r;
|
||||
uint _76;
|
||||
_57.InterlockedOr(4, stage, _76);
|
||||
offset = 0u;
|
||||
}
|
||||
return r;
|
||||
return offset;
|
||||
}
|
||||
|
||||
bool touch_mem(Alloc alloc, uint offset)
|
||||
|
@ -166,16 +134,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
|
|||
{
|
||||
return;
|
||||
}
|
||||
_81.Store(offset * 4 + 8, val);
|
||||
}
|
||||
|
||||
void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint param_2 = s.element_ix;
|
||||
write_mem(param, param_1, param_2);
|
||||
_57.Store(offset * 4 + 12, val);
|
||||
}
|
||||
|
||||
void comp_main()
|
||||
|
@ -185,17 +144,12 @@ void comp_main()
|
|||
{
|
||||
bitmaps[i][gl_LocalInvocationID.x] = 0u;
|
||||
}
|
||||
if (gl_LocalInvocationID.x == 0u)
|
||||
{
|
||||
sh_alloc_failed = false;
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
|
||||
int x0 = 0;
|
||||
int y0 = 0;
|
||||
int x1 = 0;
|
||||
int y1 = 0;
|
||||
if (element_ix < _156.Load(0))
|
||||
if (element_ix < _101.Load(4))
|
||||
{
|
||||
uint param = element_ix;
|
||||
DrawMonoid draw_monoid = load_draw_monoid(param);
|
||||
|
@ -212,11 +166,11 @@ void comp_main()
|
|||
float4 param_3 = path_bbox;
|
||||
float4 param_4 = clip_bbox;
|
||||
float4 bbox = bbox_intersect(param_3, param_4);
|
||||
float4 _417 = bbox;
|
||||
float4 _419 = bbox;
|
||||
float2 _421 = max(_417.xy, _419.zw);
|
||||
bbox.z = _421.x;
|
||||
bbox.w = _421.y;
|
||||
float4 _354 = bbox;
|
||||
float4 _356 = bbox;
|
||||
float2 _358 = max(_354.xy, _356.zw);
|
||||
bbox.z = _358.x;
|
||||
bbox.w = _358.y;
|
||||
uint param_5 = element_ix;
|
||||
float4 param_6 = bbox;
|
||||
store_draw_bbox(param_5, param_6);
|
||||
|
@ -225,8 +179,8 @@ void comp_main()
|
|||
x1 = int(ceil(bbox.z * 0.00390625f));
|
||||
y1 = int(ceil(bbox.w * 0.00390625f));
|
||||
}
|
||||
uint width_in_bins = ((_156.Load(8) + 16u) - 1u) / 16u;
|
||||
uint height_in_bins = ((_156.Load(12) + 16u) - 1u) / 16u;
|
||||
uint width_in_bins = ((_101.Load(12) + 16u) - 1u) / 16u;
|
||||
uint height_in_bins = ((_101.Load(16) + 16u) - 1u) / 16u;
|
||||
x0 = clamp(x0, 0, int(width_in_bins));
|
||||
x1 = clamp(x1, x0, int(width_in_bins));
|
||||
y0 = clamp(y0, 0, int(height_in_bins));
|
||||
|
@ -241,8 +195,8 @@ void comp_main()
|
|||
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
|
||||
while (y < y1)
|
||||
{
|
||||
uint _523;
|
||||
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _523);
|
||||
uint _460;
|
||||
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _460);
|
||||
x++;
|
||||
if (x == x1)
|
||||
{
|
||||
|
@ -257,51 +211,32 @@ void comp_main()
|
|||
element_count += uint(int(countbits(bitmaps[i_1][gl_LocalInvocationID.x])));
|
||||
count[i_1][gl_LocalInvocationID.x] = element_count;
|
||||
}
|
||||
uint param_7 = 0u;
|
||||
uint param_8 = 0u;
|
||||
bool param_9 = true;
|
||||
Alloc chunk_alloc = new_alloc(param_7, param_8, param_9);
|
||||
uint chunk_offset = 0u;
|
||||
if (element_count != 0u)
|
||||
{
|
||||
uint param_10 = element_count * 4u;
|
||||
MallocResult _573 = malloc(param_10);
|
||||
MallocResult chunk = _573;
|
||||
chunk_alloc = chunk.alloc;
|
||||
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
|
||||
if (chunk.failed)
|
||||
{
|
||||
sh_alloc_failed = true;
|
||||
}
|
||||
uint param_7 = element_count * 4u;
|
||||
uint param_8 = _101.Load(0);
|
||||
uint param_9 = 1u;
|
||||
uint _510 = malloc_stage(param_7, param_8, param_9);
|
||||
chunk_offset = _510;
|
||||
sh_chunk_offset[gl_LocalInvocationID.x] = chunk_offset;
|
||||
}
|
||||
uint out_ix = (_156.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
|
||||
Alloc _603;
|
||||
_603.offset = _156.Load(20);
|
||||
Alloc param_11;
|
||||
param_11.offset = _603.offset;
|
||||
uint param_12 = out_ix;
|
||||
uint param_13 = element_count;
|
||||
write_mem(param_11, param_12, param_13);
|
||||
Alloc _615;
|
||||
_615.offset = _156.Load(20);
|
||||
Alloc param_14;
|
||||
param_14.offset = _615.offset;
|
||||
uint param_15 = out_ix + 1u;
|
||||
uint param_16 = chunk_alloc.offset;
|
||||
write_mem(param_14, param_15, param_16);
|
||||
uint out_ix = (_101.Load(24) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
|
||||
Alloc _532;
|
||||
_532.offset = _101.Load(24);
|
||||
Alloc param_10;
|
||||
param_10.offset = _532.offset;
|
||||
uint param_11 = out_ix;
|
||||
uint param_12 = element_count;
|
||||
write_mem(param_10, param_11, param_12);
|
||||
Alloc _544;
|
||||
_544.offset = _101.Load(24);
|
||||
Alloc param_13;
|
||||
param_13.offset = _544.offset;
|
||||
uint param_14 = out_ix + 1u;
|
||||
uint param_15 = chunk_offset;
|
||||
write_mem(param_13, param_14, param_15);
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
bool _630;
|
||||
if (!sh_alloc_failed)
|
||||
{
|
||||
_630 = _81.Load(4) != 0u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_630 = sh_alloc_failed;
|
||||
}
|
||||
if (_630)
|
||||
{
|
||||
return;
|
||||
}
|
||||
x = x0;
|
||||
y = y0;
|
||||
while (y < y1)
|
||||
|
@ -315,14 +250,11 @@ void comp_main()
|
|||
{
|
||||
idx += count[my_slice - 1u][bin_ix];
|
||||
}
|
||||
Alloc out_alloc = sh_chunk_alloc[bin_ix];
|
||||
uint out_offset = out_alloc.offset + (idx * 4u);
|
||||
BinInstanceRef _692 = { out_offset };
|
||||
BinInstance _694 = { element_ix };
|
||||
Alloc param_17 = out_alloc;
|
||||
BinInstanceRef param_18 = _692;
|
||||
BinInstance param_19 = _694;
|
||||
BinInstance_write(param_17, param_18, param_19);
|
||||
uint chunk_offset_1 = sh_chunk_offset[bin_ix];
|
||||
if (chunk_offset_1 != 0u)
|
||||
{
|
||||
_57.Store(((chunk_offset_1 >> uint(2)) + idx) * 4 + 12, element_ix);
|
||||
}
|
||||
}
|
||||
x++;
|
||||
if (x == x1)
|
||||
|
|
211
piet-gpu/shader/gen/binning.msl
generated
211
piet-gpu/shader/gen/binning.msl
generated
|
@ -12,22 +12,6 @@ struct Alloc
|
|||
uint offset;
|
||||
};
|
||||
|
||||
struct MallocResult
|
||||
{
|
||||
Alloc alloc;
|
||||
bool failed;
|
||||
};
|
||||
|
||||
struct BinInstanceRef
|
||||
{
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct BinInstance
|
||||
{
|
||||
uint element_ix;
|
||||
};
|
||||
|
||||
struct DrawMonoid
|
||||
{
|
||||
uint path_ix;
|
||||
|
@ -40,6 +24,7 @@ struct Memory
|
|||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint blend_offset;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
|
@ -50,6 +35,7 @@ struct Alloc_1
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -87,36 +73,36 @@ struct ConfigBuf
|
|||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
|
||||
DrawMonoid load_draw_monoid(thread const uint& element_ix, device Memory& v_57, const device ConfigBuf& v_101)
|
||||
{
|
||||
uint base = (v_156.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * element_ix);
|
||||
uint path_ix = v_81.memory[base];
|
||||
uint clip_ix = v_81.memory[base + 1u];
|
||||
uint scene_offset = v_81.memory[base + 2u];
|
||||
uint info_offset = v_81.memory[base + 3u];
|
||||
uint base = (v_101.conf.drawmonoid_alloc.offset >> uint(2)) + (4u * element_ix);
|
||||
uint path_ix = v_57.memory[base];
|
||||
uint clip_ix = v_57.memory[base + 1u];
|
||||
uint scene_offset = v_57.memory[base + 2u];
|
||||
uint info_offset = v_57.memory[base + 3u];
|
||||
return DrawMonoid{ path_ix, clip_ix, scene_offset, info_offset };
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
|
||||
float4 load_clip_bbox(thread const uint& clip_ix, device Memory& v_57, const device ConfigBuf& v_101)
|
||||
{
|
||||
uint base = (v_156.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix);
|
||||
float x0 = as_type<float>(v_81.memory[base]);
|
||||
float y0 = as_type<float>(v_81.memory[base + 1u]);
|
||||
float x1 = as_type<float>(v_81.memory[base + 2u]);
|
||||
float y1 = as_type<float>(v_81.memory[base + 3u]);
|
||||
uint base = (v_101.conf.clip_bbox_alloc.offset >> uint(2)) + (4u * clip_ix);
|
||||
float x0 = as_type<float>(v_57.memory[base]);
|
||||
float y0 = as_type<float>(v_57.memory[base + 1u]);
|
||||
float x1 = as_type<float>(v_57.memory[base + 2u]);
|
||||
float y1 = as_type<float>(v_57.memory[base + 3u]);
|
||||
float4 bbox = float4(x0, y0, x1, y1);
|
||||
return bbox;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
float4 load_path_bbox(thread const uint& path_ix, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
|
||||
float4 load_path_bbox(thread const uint& path_ix, device Memory& v_57, const device ConfigBuf& v_101)
|
||||
{
|
||||
uint base = (v_156.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix);
|
||||
float bbox_l = float(v_81.memory[base]) - 32768.0;
|
||||
float bbox_t = float(v_81.memory[base + 1u]) - 32768.0;
|
||||
float bbox_r = float(v_81.memory[base + 2u]) - 32768.0;
|
||||
float bbox_b = float(v_81.memory[base + 3u]) - 32768.0;
|
||||
uint base = (v_101.conf.path_bbox_alloc.offset >> uint(2)) + (6u * path_ix);
|
||||
float bbox_l = float(v_57.memory[base]) - 32768.0;
|
||||
float bbox_t = float(v_57.memory[base + 1u]) - 32768.0;
|
||||
float bbox_r = float(v_57.memory[base + 2u]) - 32768.0;
|
||||
float bbox_b = float(v_57.memory[base + 3u]) - 32768.0;
|
||||
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||
return bbox;
|
||||
}
|
||||
|
@ -128,40 +114,26 @@ float4 bbox_intersect(thread const float4& a, thread const float4& b)
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void store_draw_bbox(thread const uint& draw_ix, thread const float4& bbox, device Memory& v_81, constant uint& v_81BufferSize, const device ConfigBuf& v_156)
|
||||
void store_draw_bbox(thread const uint& draw_ix, thread const float4& bbox, device Memory& v_57, const device ConfigBuf& v_101)
|
||||
{
|
||||
uint base = (v_156.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
|
||||
v_81.memory[base] = as_type<uint>(bbox.x);
|
||||
v_81.memory[base + 1u] = as_type<uint>(bbox.y);
|
||||
v_81.memory[base + 2u] = as_type<uint>(bbox.z);
|
||||
v_81.memory[base + 3u] = as_type<uint>(bbox.w);
|
||||
uint base = (v_101.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
|
||||
v_57.memory[base] = as_type<uint>(bbox.x);
|
||||
v_57.memory[base + 1u] = as_type<uint>(bbox.y);
|
||||
v_57.memory[base + 2u] = as_type<uint>(bbox.z);
|
||||
v_57.memory[base + 3u] = as_type<uint>(bbox.w);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok)
|
||||
uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_57)
|
||||
{
|
||||
Alloc a;
|
||||
a.offset = offset;
|
||||
return a;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
MallocResult malloc(thread const uint& size, device Memory& v_81, constant uint& v_81BufferSize)
|
||||
{
|
||||
uint _87 = atomic_fetch_add_explicit((device atomic_uint*)&v_81.mem_offset, size, memory_order_relaxed);
|
||||
uint offset = _87;
|
||||
MallocResult r;
|
||||
r.failed = (offset + size) > uint(int((v_81BufferSize - 8) / 4) * 4);
|
||||
uint param = offset;
|
||||
uint param_1 = size;
|
||||
bool param_2 = !r.failed;
|
||||
r.alloc = new_alloc(param, param_1, param_2);
|
||||
if (r.failed)
|
||||
uint _65 = atomic_fetch_add_explicit((device atomic_uint*)&v_57.mem_offset, size, memory_order_relaxed);
|
||||
uint offset = _65;
|
||||
if ((offset + size) > mem_size)
|
||||
{
|
||||
uint _116 = atomic_fetch_max_explicit((device atomic_uint*)&v_81.mem_error, 1u, memory_order_relaxed);
|
||||
return r;
|
||||
uint _76 = atomic_fetch_or_explicit((device atomic_uint*)&v_57.mem_error, stage, memory_order_relaxed);
|
||||
offset = 0u;
|
||||
}
|
||||
return r;
|
||||
return offset;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
|
@ -171,7 +143,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_81, constant uint& v_81BufferSize)
|
||||
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_57)
|
||||
{
|
||||
Alloc param = alloc;
|
||||
uint param_1 = offset;
|
||||
|
@ -179,73 +151,56 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
|
|||
{
|
||||
return;
|
||||
}
|
||||
v_81.memory[offset] = val;
|
||||
v_57.memory[offset] = val;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void BinInstance_write(thread const Alloc& a, thread const BinInstanceRef& ref, thread const BinInstance& s, device Memory& v_81, constant uint& v_81BufferSize)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint param_2 = s.element_ix;
|
||||
write_mem(param, param_1, param_2, v_81, v_81BufferSize);
|
||||
}
|
||||
|
||||
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_81 [[buffer(0)]], const device ConfigBuf& v_156 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||
kernel void main0(device Memory& v_57 [[buffer(0)]], const device ConfigBuf& v_101 [[buffer(1)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||
{
|
||||
threadgroup uint bitmaps[8][256];
|
||||
threadgroup short sh_alloc_failed;
|
||||
threadgroup uint count[8][256];
|
||||
threadgroup Alloc sh_chunk_alloc[256];
|
||||
constant uint& v_81BufferSize = spvBufferSizeConstants[0];
|
||||
threadgroup uint sh_chunk_offset[256];
|
||||
uint my_partition = gl_WorkGroupID.x;
|
||||
for (uint i = 0u; i < 8u; i++)
|
||||
{
|
||||
bitmaps[i][gl_LocalInvocationID.x] = 0u;
|
||||
}
|
||||
if (gl_LocalInvocationID.x == 0u)
|
||||
{
|
||||
sh_alloc_failed = short(false);
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
uint element_ix = (my_partition * 256u) + gl_LocalInvocationID.x;
|
||||
int x0 = 0;
|
||||
int y0 = 0;
|
||||
int x1 = 0;
|
||||
int y1 = 0;
|
||||
if (element_ix < v_156.conf.n_elements)
|
||||
if (element_ix < v_101.conf.n_elements)
|
||||
{
|
||||
uint param = element_ix;
|
||||
DrawMonoid draw_monoid = load_draw_monoid(param, v_81, v_81BufferSize, v_156);
|
||||
DrawMonoid draw_monoid = load_draw_monoid(param, v_57, v_101);
|
||||
uint path_ix = draw_monoid.path_ix;
|
||||
float4 clip_bbox = float4(-1000000000.0, -1000000000.0, 1000000000.0, 1000000000.0);
|
||||
uint clip_ix = draw_monoid.clip_ix;
|
||||
if (clip_ix > 0u)
|
||||
{
|
||||
uint param_1 = clip_ix - 1u;
|
||||
clip_bbox = load_clip_bbox(param_1, v_81, v_81BufferSize, v_156);
|
||||
clip_bbox = load_clip_bbox(param_1, v_57, v_101);
|
||||
}
|
||||
uint param_2 = path_ix;
|
||||
float4 path_bbox = load_path_bbox(param_2, v_81, v_81BufferSize, v_156);
|
||||
float4 path_bbox = load_path_bbox(param_2, v_57, v_101);
|
||||
float4 param_3 = path_bbox;
|
||||
float4 param_4 = clip_bbox;
|
||||
float4 bbox = bbox_intersect(param_3, param_4);
|
||||
float4 _417 = bbox;
|
||||
float4 _419 = bbox;
|
||||
float2 _421 = fast::max(_417.xy, _419.zw);
|
||||
bbox.z = _421.x;
|
||||
bbox.w = _421.y;
|
||||
float4 _354 = bbox;
|
||||
float4 _356 = bbox;
|
||||
float2 _358 = fast::max(_354.xy, _356.zw);
|
||||
bbox.z = _358.x;
|
||||
bbox.w = _358.y;
|
||||
uint param_5 = element_ix;
|
||||
float4 param_6 = bbox;
|
||||
store_draw_bbox(param_5, param_6, v_81, v_81BufferSize, v_156);
|
||||
store_draw_bbox(param_5, param_6, v_57, v_101);
|
||||
x0 = int(floor(bbox.x * 0.00390625));
|
||||
y0 = int(floor(bbox.y * 0.00390625));
|
||||
x1 = int(ceil(bbox.z * 0.00390625));
|
||||
y1 = int(ceil(bbox.w * 0.00390625));
|
||||
}
|
||||
uint width_in_bins = ((v_156.conf.width_in_tiles + 16u) - 1u) / 16u;
|
||||
uint height_in_bins = ((v_156.conf.height_in_tiles + 16u) - 1u) / 16u;
|
||||
uint width_in_bins = ((v_101.conf.width_in_tiles + 16u) - 1u) / 16u;
|
||||
uint height_in_bins = ((v_101.conf.height_in_tiles + 16u) - 1u) / 16u;
|
||||
x0 = clamp(x0, 0, int(width_in_bins));
|
||||
x1 = clamp(x1, x0, int(width_in_bins));
|
||||
y0 = clamp(y0, 0, int(height_in_bins));
|
||||
|
@ -260,7 +215,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
|
||||
while (y < y1)
|
||||
{
|
||||
uint _523 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
|
||||
uint _460 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
|
||||
x++;
|
||||
if (x == x1)
|
||||
{
|
||||
|
@ -275,47 +230,28 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
element_count += uint(int(popcount(bitmaps[i_1][gl_LocalInvocationID.x])));
|
||||
count[i_1][gl_LocalInvocationID.x] = element_count;
|
||||
}
|
||||
uint param_7 = 0u;
|
||||
uint param_8 = 0u;
|
||||
bool param_9 = true;
|
||||
Alloc chunk_alloc = new_alloc(param_7, param_8, param_9);
|
||||
uint chunk_offset = 0u;
|
||||
if (element_count != 0u)
|
||||
{
|
||||
uint param_10 = element_count * 4u;
|
||||
MallocResult _573 = malloc(param_10, v_81, v_81BufferSize);
|
||||
MallocResult chunk = _573;
|
||||
chunk_alloc = chunk.alloc;
|
||||
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
|
||||
if (chunk.failed)
|
||||
{
|
||||
sh_alloc_failed = short(true);
|
||||
}
|
||||
uint param_7 = element_count * 4u;
|
||||
uint param_8 = v_101.conf.mem_size;
|
||||
uint param_9 = 1u;
|
||||
uint _510 = malloc_stage(param_7, param_8, param_9, v_57);
|
||||
chunk_offset = _510;
|
||||
sh_chunk_offset[gl_LocalInvocationID.x] = chunk_offset;
|
||||
}
|
||||
uint out_ix = (v_156.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
|
||||
Alloc param_11;
|
||||
param_11.offset = v_156.conf.bin_alloc.offset;
|
||||
uint param_12 = out_ix;
|
||||
uint param_13 = element_count;
|
||||
write_mem(param_11, param_12, param_13, v_81, v_81BufferSize);
|
||||
Alloc param_14;
|
||||
param_14.offset = v_156.conf.bin_alloc.offset;
|
||||
uint param_15 = out_ix + 1u;
|
||||
uint param_16 = chunk_alloc.offset;
|
||||
write_mem(param_14, param_15, param_16, v_81, v_81BufferSize);
|
||||
uint out_ix = (v_101.conf.bin_alloc.offset >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
|
||||
Alloc param_10;
|
||||
param_10.offset = v_101.conf.bin_alloc.offset;
|
||||
uint param_11 = out_ix;
|
||||
uint param_12 = element_count;
|
||||
write_mem(param_10, param_11, param_12, v_57);
|
||||
Alloc param_13;
|
||||
param_13.offset = v_101.conf.bin_alloc.offset;
|
||||
uint param_14 = out_ix + 1u;
|
||||
uint param_15 = chunk_offset;
|
||||
write_mem(param_13, param_14, param_15, v_57);
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
bool _630;
|
||||
if (!bool(sh_alloc_failed))
|
||||
{
|
||||
_630 = v_81.mem_error != 0u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_630 = bool(sh_alloc_failed);
|
||||
}
|
||||
if (_630)
|
||||
{
|
||||
return;
|
||||
}
|
||||
x = x0;
|
||||
y = y0;
|
||||
while (y < y1)
|
||||
|
@ -329,12 +265,11 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
{
|
||||
idx += count[my_slice - 1u][bin_ix];
|
||||
}
|
||||
Alloc out_alloc = sh_chunk_alloc[bin_ix];
|
||||
uint out_offset = out_alloc.offset + (idx * 4u);
|
||||
Alloc param_17 = out_alloc;
|
||||
BinInstanceRef param_18 = BinInstanceRef{ out_offset };
|
||||
BinInstance param_19 = BinInstance{ element_ix };
|
||||
BinInstance_write(param_17, param_18, param_19, v_81, v_81BufferSize);
|
||||
uint chunk_offset_1 = sh_chunk_offset[bin_ix];
|
||||
if (chunk_offset_1 != 0u)
|
||||
{
|
||||
v_57.memory[(chunk_offset_1 >> uint(2)) + idx] = element_ix;
|
||||
}
|
||||
}
|
||||
x++;
|
||||
if (x == x1)
|
||||
|
|
BIN
piet-gpu/shader/gen/binning.spv
generated
BIN
piet-gpu/shader/gen/binning.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/clip_leaf.dxil
generated
BIN
piet-gpu/shader/gen/clip_leaf.dxil
generated
Binary file not shown.
77
piet-gpu/shader/gen/clip_leaf.hlsl
generated
77
piet-gpu/shader/gen/clip_leaf.hlsl
generated
|
@ -17,6 +17,7 @@ struct Alloc
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -48,7 +49,7 @@ struct Config
|
|||
|
||||
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||
|
||||
static const Bic _393 = { 0u, 0u };
|
||||
static const Bic _394 = { 0u, 0u };
|
||||
|
||||
ByteAddressBuffer _80 : register(t1, space0);
|
||||
RWByteAddressBuffer _96 : register(u0, space0);
|
||||
|
@ -71,9 +72,9 @@ groupshared float4 sh_bbox[256];
|
|||
|
||||
Bic load_bic(uint ix)
|
||||
{
|
||||
uint base = (_80.Load(52) >> uint(2)) + (2u * ix);
|
||||
Bic _286 = { _96.Load(base * 4 + 8), _96.Load((base + 1u) * 4 + 8) };
|
||||
return _286;
|
||||
uint base = (_80.Load(56) >> uint(2)) + (2u * ix);
|
||||
Bic _287 = { _96.Load(base * 4 + 12), _96.Load((base + 1u) * 4 + 12) };
|
||||
return _287;
|
||||
}
|
||||
|
||||
Bic bic_combine(Bic x, Bic y)
|
||||
|
@ -85,15 +86,15 @@ Bic bic_combine(Bic x, Bic y)
|
|||
|
||||
ClipEl load_clip_el(uint ix)
|
||||
{
|
||||
uint base = (_80.Load(56) >> uint(2)) + (5u * ix);
|
||||
uint parent_ix = _96.Load(base * 4 + 8);
|
||||
float x0 = asfloat(_96.Load((base + 1u) * 4 + 8));
|
||||
float y0 = asfloat(_96.Load((base + 2u) * 4 + 8));
|
||||
float x1 = asfloat(_96.Load((base + 3u) * 4 + 8));
|
||||
float y1 = asfloat(_96.Load((base + 4u) * 4 + 8));
|
||||
uint base = (_80.Load(60) >> uint(2)) + (5u * ix);
|
||||
uint parent_ix = _96.Load(base * 4 + 12);
|
||||
float x0 = asfloat(_96.Load((base + 1u) * 4 + 12));
|
||||
float y0 = asfloat(_96.Load((base + 2u) * 4 + 12));
|
||||
float x1 = asfloat(_96.Load((base + 3u) * 4 + 12));
|
||||
float y1 = asfloat(_96.Load((base + 4u) * 4 + 12));
|
||||
float4 bbox = float4(x0, y0, x1, y1);
|
||||
ClipEl _335 = { parent_ix, bbox };
|
||||
return _335;
|
||||
ClipEl _336 = { parent_ix, bbox };
|
||||
return _336;
|
||||
}
|
||||
|
||||
float4 bbox_intersect(float4 a, float4 b)
|
||||
|
@ -103,9 +104,9 @@ float4 bbox_intersect(float4 a, float4 b)
|
|||
|
||||
uint load_path_ix(uint ix)
|
||||
{
|
||||
if (ix < _80.Load(80))
|
||||
if (ix < _80.Load(84))
|
||||
{
|
||||
return _96.Load(((_80.Load(48) >> uint(2)) + ix) * 4 + 8);
|
||||
return _96.Load(((_80.Load(52) >> uint(2)) + ix) * 4 + 12);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -115,11 +116,11 @@ uint load_path_ix(uint ix)
|
|||
|
||||
float4 load_path_bbox(uint path_ix)
|
||||
{
|
||||
uint base = (_80.Load(40) >> uint(2)) + (6u * path_ix);
|
||||
float bbox_l = float(_96.Load(base * 4 + 8)) - 32768.0f;
|
||||
float bbox_t = float(_96.Load((base + 1u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_r = float(_96.Load((base + 2u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_b = float(_96.Load((base + 3u) * 4 + 8)) - 32768.0f;
|
||||
uint base = (_80.Load(44) >> uint(2)) + (6u * path_ix);
|
||||
float bbox_l = float(_96.Load(base * 4 + 12)) - 32768.0f;
|
||||
float bbox_t = float(_96.Load((base + 1u) * 4 + 12)) - 32768.0f;
|
||||
float bbox_r = float(_96.Load((base + 2u) * 4 + 12)) - 32768.0f;
|
||||
float bbox_b = float(_96.Load((base + 3u) * 4 + 12)) - 32768.0f;
|
||||
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||
return bbox;
|
||||
}
|
||||
|
@ -173,17 +174,17 @@ uint search_link(inout Bic bic)
|
|||
|
||||
void store_clip_bbox(uint ix, float4 bbox)
|
||||
{
|
||||
uint base = (_80.Load(60) >> uint(2)) + (4u * ix);
|
||||
_96.Store(base * 4 + 8, asuint(bbox.x));
|
||||
_96.Store((base + 1u) * 4 + 8, asuint(bbox.y));
|
||||
_96.Store((base + 2u) * 4 + 8, asuint(bbox.z));
|
||||
_96.Store((base + 3u) * 4 + 8, asuint(bbox.w));
|
||||
uint base = (_80.Load(64) >> uint(2)) + (4u * ix);
|
||||
_96.Store(base * 4 + 12, asuint(bbox.x));
|
||||
_96.Store((base + 1u) * 4 + 12, asuint(bbox.y));
|
||||
_96.Store((base + 2u) * 4 + 12, asuint(bbox.z));
|
||||
_96.Store((base + 3u) * 4 + 12, asuint(bbox.w));
|
||||
}
|
||||
|
||||
void comp_main()
|
||||
{
|
||||
uint th = gl_LocalInvocationID.x;
|
||||
Bic bic = _393;
|
||||
Bic bic = _394;
|
||||
if (th < gl_WorkGroupID.x)
|
||||
{
|
||||
uint param = th;
|
||||
|
@ -240,8 +241,8 @@ void comp_main()
|
|||
uint param_6 = gl_GlobalInvocationID.x;
|
||||
uint inp = load_path_ix(param_6);
|
||||
bool is_push = int(inp) >= 0;
|
||||
Bic _559 = { 1u - uint(is_push), uint(is_push) };
|
||||
bic = _559;
|
||||
Bic _560 = { 1u - uint(is_push), uint(is_push) };
|
||||
bic = _560;
|
||||
sh_bic[th] = bic;
|
||||
if (is_push)
|
||||
{
|
||||
|
@ -266,11 +267,11 @@ void comp_main()
|
|||
inbase = outbase;
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
bic = _393;
|
||||
bic = _394;
|
||||
Bic param_10 = bic;
|
||||
uint _618 = search_link(param_10);
|
||||
uint _619 = search_link(param_10);
|
||||
bic = param_10;
|
||||
uint link = _618;
|
||||
uint link = _619;
|
||||
sh_link[th] = link;
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
uint grandparent;
|
||||
|
@ -324,22 +325,22 @@ void comp_main()
|
|||
sh_bbox[th] = bbox;
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
uint path_ix = inp;
|
||||
bool _717 = !is_push;
|
||||
bool _725;
|
||||
if (_717)
|
||||
bool _718 = !is_push;
|
||||
bool _726;
|
||||
if (_718)
|
||||
{
|
||||
_725 = gl_GlobalInvocationID.x < _80.Load(80);
|
||||
_726 = gl_GlobalInvocationID.x < _80.Load(84);
|
||||
}
|
||||
else
|
||||
{
|
||||
_725 = _717;
|
||||
_726 = _718;
|
||||
}
|
||||
if (_725)
|
||||
if (_726)
|
||||
{
|
||||
uint param_15 = parent;
|
||||
path_ix = load_path_ix(param_15);
|
||||
uint drawmonoid_out_base = (_80.Load(44) >> uint(2)) + (4u * (~inp));
|
||||
_96.Store(drawmonoid_out_base * 4 + 8, path_ix);
|
||||
uint drawmonoid_out_base = (_80.Load(48) >> uint(2)) + (4u * (~inp));
|
||||
_96.Store(drawmonoid_out_base * 4 + 12, path_ix);
|
||||
if (int(grandparent) >= 0)
|
||||
{
|
||||
bbox = sh_bbox[grandparent];
|
||||
|
|
18
piet-gpu/shader/gen/clip_leaf.msl
generated
18
piet-gpu/shader/gen/clip_leaf.msl
generated
|
@ -24,6 +24,7 @@ struct Alloc
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -62,6 +63,7 @@ struct Memory
|
|||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint blend_offset;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
|
@ -275,9 +277,9 @@ kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_8
|
|||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
bic = Bic{ 0u, 0u };
|
||||
Bic param_10 = bic;
|
||||
uint _618 = search_link(param_10, gl_LocalInvocationID, sh_bic);
|
||||
uint _619 = search_link(param_10, gl_LocalInvocationID, sh_bic);
|
||||
bic = param_10;
|
||||
uint link = _618;
|
||||
uint link = _619;
|
||||
sh_link[th] = link;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
uint grandparent;
|
||||
|
@ -331,17 +333,17 @@ kernel void main0(device Memory& v_96 [[buffer(0)]], const device ConfigBuf& v_8
|
|||
sh_bbox[th] = bbox;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
uint path_ix = inp;
|
||||
bool _717 = !is_push;
|
||||
bool _725;
|
||||
if (_717)
|
||||
bool _718 = !is_push;
|
||||
bool _726;
|
||||
if (_718)
|
||||
{
|
||||
_725 = gl_GlobalInvocationID.x < v_80.conf.n_clip;
|
||||
_726 = gl_GlobalInvocationID.x < v_80.conf.n_clip;
|
||||
}
|
||||
else
|
||||
{
|
||||
_725 = _717;
|
||||
_726 = _718;
|
||||
}
|
||||
if (_725)
|
||||
if (_726)
|
||||
{
|
||||
uint param_15 = parent;
|
||||
path_ix = load_path_ix(param_15, v_80, v_96);
|
||||
|
|
BIN
piet-gpu/shader/gen/clip_leaf.spv
generated
BIN
piet-gpu/shader/gen/clip_leaf.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/clip_reduce.dxil
generated
BIN
piet-gpu/shader/gen/clip_reduce.dxil
generated
Binary file not shown.
51
piet-gpu/shader/gen/clip_reduce.hlsl
generated
51
piet-gpu/shader/gen/clip_reduce.hlsl
generated
|
@ -17,6 +17,7 @@ struct Alloc
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -48,7 +49,7 @@ struct Config
|
|||
|
||||
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||
|
||||
static const Bic _267 = { 0u, 0u };
|
||||
static const Bic _268 = { 0u, 0u };
|
||||
|
||||
ByteAddressBuffer _64 : register(t1, space0);
|
||||
RWByteAddressBuffer _80 : register(u0, space0);
|
||||
|
@ -77,39 +78,39 @@ Bic bic_combine(Bic x, Bic y)
|
|||
|
||||
void store_bic(uint ix, Bic bic)
|
||||
{
|
||||
uint base = (_64.Load(52) >> uint(2)) + (2u * ix);
|
||||
_80.Store(base * 4 + 8, bic.a);
|
||||
_80.Store((base + 1u) * 4 + 8, bic.b);
|
||||
uint base = (_64.Load(56) >> uint(2)) + (2u * ix);
|
||||
_80.Store(base * 4 + 12, bic.a);
|
||||
_80.Store((base + 1u) * 4 + 12, bic.b);
|
||||
}
|
||||
|
||||
float4 load_path_bbox(uint path_ix)
|
||||
{
|
||||
uint base = (_64.Load(40) >> uint(2)) + (6u * path_ix);
|
||||
float bbox_l = float(_80.Load(base * 4 + 8)) - 32768.0f;
|
||||
float bbox_t = float(_80.Load((base + 1u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_r = float(_80.Load((base + 2u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_b = float(_80.Load((base + 3u) * 4 + 8)) - 32768.0f;
|
||||
uint base = (_64.Load(44) >> uint(2)) + (6u * path_ix);
|
||||
float bbox_l = float(_80.Load(base * 4 + 12)) - 32768.0f;
|
||||
float bbox_t = float(_80.Load((base + 1u) * 4 + 12)) - 32768.0f;
|
||||
float bbox_r = float(_80.Load((base + 2u) * 4 + 12)) - 32768.0f;
|
||||
float bbox_b = float(_80.Load((base + 3u) * 4 + 12)) - 32768.0f;
|
||||
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||
return bbox;
|
||||
}
|
||||
|
||||
void store_clip_el(uint ix, ClipEl el)
|
||||
{
|
||||
uint base = (_64.Load(56) >> uint(2)) + (5u * ix);
|
||||
_80.Store(base * 4 + 8, el.parent_ix);
|
||||
_80.Store((base + 1u) * 4 + 8, asuint(el.bbox.x));
|
||||
_80.Store((base + 2u) * 4 + 8, asuint(el.bbox.y));
|
||||
_80.Store((base + 3u) * 4 + 8, asuint(el.bbox.z));
|
||||
_80.Store((base + 4u) * 4 + 8, asuint(el.bbox.w));
|
||||
uint base = (_64.Load(60) >> uint(2)) + (5u * ix);
|
||||
_80.Store(base * 4 + 12, el.parent_ix);
|
||||
_80.Store((base + 1u) * 4 + 12, asuint(el.bbox.x));
|
||||
_80.Store((base + 2u) * 4 + 12, asuint(el.bbox.y));
|
||||
_80.Store((base + 3u) * 4 + 12, asuint(el.bbox.z));
|
||||
_80.Store((base + 4u) * 4 + 12, asuint(el.bbox.w));
|
||||
}
|
||||
|
||||
void comp_main()
|
||||
{
|
||||
uint th = gl_LocalInvocationID.x;
|
||||
uint inp = _80.Load(((_64.Load(48) >> uint(2)) + gl_GlobalInvocationID.x) * 4 + 8);
|
||||
uint inp = _80.Load(((_64.Load(52) >> uint(2)) + gl_GlobalInvocationID.x) * 4 + 12);
|
||||
bool is_push = int(inp) >= 0;
|
||||
Bic _207 = { 1u - uint(is_push), uint(is_push) };
|
||||
Bic bic = _207;
|
||||
Bic _208 = { 1u - uint(is_push), uint(is_push) };
|
||||
Bic bic = _208;
|
||||
sh_bic[gl_LocalInvocationID.x] = bic;
|
||||
for (uint i = 0u; i < 8u; i++)
|
||||
{
|
||||
|
@ -132,21 +133,21 @@ void comp_main()
|
|||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
uint size = sh_bic[0].b;
|
||||
bic = _267;
|
||||
bic = _268;
|
||||
if ((th + 1u) < 256u)
|
||||
{
|
||||
bic = sh_bic[th + 1u];
|
||||
}
|
||||
bool _283;
|
||||
bool _284;
|
||||
if (is_push)
|
||||
{
|
||||
_283 = bic.a == 0u;
|
||||
_284 = bic.a == 0u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_283 = is_push;
|
||||
_284 = is_push;
|
||||
}
|
||||
if (_283)
|
||||
if (_284)
|
||||
{
|
||||
uint local_ix = (size - bic.b) - 1u;
|
||||
sh_parent[local_ix] = th;
|
||||
|
@ -163,8 +164,8 @@ void comp_main()
|
|||
if (th < size)
|
||||
{
|
||||
uint parent_ix = sh_parent[th] + (gl_WorkGroupID.x * 256u);
|
||||
ClipEl _331 = { parent_ix, bbox };
|
||||
ClipEl el = _331;
|
||||
ClipEl _332 = { parent_ix, bbox };
|
||||
ClipEl el = _332;
|
||||
uint param_5 = gl_GlobalInvocationID.x;
|
||||
ClipEl param_6 = el;
|
||||
store_clip_el(param_5, param_6);
|
||||
|
|
10
piet-gpu/shader/gen/clip_reduce.msl
generated
10
piet-gpu/shader/gen/clip_reduce.msl
generated
|
@ -24,6 +24,7 @@ struct Alloc
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -62,6 +63,7 @@ struct Memory
|
|||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint blend_offset;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
|
@ -142,16 +144,16 @@ kernel void main0(device Memory& v_80 [[buffer(0)]], const device ConfigBuf& v_6
|
|||
{
|
||||
bic = sh_bic[th + 1u];
|
||||
}
|
||||
bool _283;
|
||||
bool _284;
|
||||
if (is_push)
|
||||
{
|
||||
_283 = bic.a == 0u;
|
||||
_284 = bic.a == 0u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_283 = is_push;
|
||||
_284 = is_push;
|
||||
}
|
||||
if (_283)
|
||||
if (_284)
|
||||
{
|
||||
uint local_ix = (size - bic.b) - 1u;
|
||||
sh_parent[local_ix] = th;
|
||||
|
|
BIN
piet-gpu/shader/gen/clip_reduce.spv
generated
BIN
piet-gpu/shader/gen/clip_reduce.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/coarse.dxil
generated
BIN
piet-gpu/shader/gen/coarse.dxil
generated
Binary file not shown.
742
piet-gpu/shader/gen/coarse.hlsl
generated
742
piet-gpu/shader/gen/coarse.hlsl
generated
File diff suppressed because it is too large
Load diff
775
piet-gpu/shader/gen/coarse.msl
generated
775
piet-gpu/shader/gen/coarse.msl
generated
File diff suppressed because it is too large
Load diff
BIN
piet-gpu/shader/gen/coarse.spv
generated
BIN
piet-gpu/shader/gen/coarse.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/draw_leaf.dxil
generated
BIN
piet-gpu/shader/gen/draw_leaf.dxil
generated
Binary file not shown.
73
piet-gpu/shader/gen/draw_leaf.hlsl
generated
73
piet-gpu/shader/gen/draw_leaf.hlsl
generated
|
@ -13,6 +13,7 @@ struct Alloc
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -88,7 +89,7 @@ DrawMonoid draw_monoid_identity()
|
|||
void comp_main()
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||
uint drawtag_base = _93.Load(100) >> uint(2);
|
||||
uint drawtag_base = _93.Load(104) >> uint(2);
|
||||
uint tag_word = _103.Load((drawtag_base + ix) * 4 + 0);
|
||||
uint param = tag_word;
|
||||
DrawMonoid agg = map_tag(param);
|
||||
|
@ -137,11 +138,11 @@ void comp_main()
|
|||
DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u];
|
||||
row = combine_draw_monoid(param_6, param_7);
|
||||
}
|
||||
uint drawdata_base = _93.Load(104) >> uint(2);
|
||||
uint drawinfo_base = _93.Load(68) >> uint(2);
|
||||
uint drawdata_base = _93.Load(108) >> uint(2);
|
||||
uint drawinfo_base = _93.Load(72) >> uint(2);
|
||||
uint out_ix = gl_GlobalInvocationID.x * 8u;
|
||||
uint out_base = (_93.Load(44) >> uint(2)) + (out_ix * 4u);
|
||||
uint clip_out_base = _93.Load(48) >> uint(2);
|
||||
uint out_base = (_93.Load(48) >> uint(2)) + (out_ix * 4u);
|
||||
uint clip_out_base = _93.Load(52) >> uint(2);
|
||||
float4 mat;
|
||||
float2 translate;
|
||||
float2 p0;
|
||||
|
@ -155,31 +156,31 @@ void comp_main()
|
|||
DrawMonoid param_9 = local[i_2 - 1u];
|
||||
m = combine_draw_monoid(param_8, param_9);
|
||||
}
|
||||
_285.Store((out_base + (i_2 * 4u)) * 4 + 8, m.path_ix);
|
||||
_285.Store(((out_base + (i_2 * 4u)) + 1u) * 4 + 8, m.clip_ix);
|
||||
_285.Store(((out_base + (i_2 * 4u)) + 2u) * 4 + 8, m.scene_offset);
|
||||
_285.Store(((out_base + (i_2 * 4u)) + 3u) * 4 + 8, m.info_offset);
|
||||
_285.Store((out_base + (i_2 * 4u)) * 4 + 12, m.path_ix);
|
||||
_285.Store(((out_base + (i_2 * 4u)) + 1u) * 4 + 12, m.clip_ix);
|
||||
_285.Store(((out_base + (i_2 * 4u)) + 2u) * 4 + 12, m.scene_offset);
|
||||
_285.Store(((out_base + (i_2 * 4u)) + 3u) * 4 + 12, m.info_offset);
|
||||
uint dd = drawdata_base + (m.scene_offset >> uint(2));
|
||||
uint di = drawinfo_base + (m.info_offset >> uint(2));
|
||||
tag_word = _103.Load(((drawtag_base + ix) + i_2) * 4 + 0);
|
||||
if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u))
|
||||
{
|
||||
uint bbox_offset = (_93.Load(40) >> uint(2)) + (6u * m.path_ix);
|
||||
float bbox_l = float(_285.Load(bbox_offset * 4 + 8)) - 32768.0f;
|
||||
float bbox_t = float(_285.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_r = float(_285.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f;
|
||||
float bbox_b = float(_285.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f;
|
||||
uint bbox_offset = (_93.Load(44) >> uint(2)) + (6u * m.path_ix);
|
||||
float bbox_l = float(_285.Load(bbox_offset * 4 + 12)) - 32768.0f;
|
||||
float bbox_t = float(_285.Load((bbox_offset + 1u) * 4 + 12)) - 32768.0f;
|
||||
float bbox_r = float(_285.Load((bbox_offset + 2u) * 4 + 12)) - 32768.0f;
|
||||
float bbox_b = float(_285.Load((bbox_offset + 3u) * 4 + 12)) - 32768.0f;
|
||||
float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b);
|
||||
float linewidth = asfloat(_285.Load((bbox_offset + 4u) * 4 + 8));
|
||||
float linewidth = asfloat(_285.Load((bbox_offset + 4u) * 4 + 12));
|
||||
uint fill_mode = uint(linewidth >= 0.0f);
|
||||
if (((linewidth >= 0.0f) || (tag_word == 276u)) || (tag_word == 732u))
|
||||
{
|
||||
uint trans_ix = _285.Load((bbox_offset + 5u) * 4 + 8);
|
||||
uint t = (_93.Load(36) >> uint(2)) + (6u * trans_ix);
|
||||
mat = asfloat(uint4(_285.Load(t * 4 + 8), _285.Load((t + 1u) * 4 + 8), _285.Load((t + 2u) * 4 + 8), _285.Load((t + 3u) * 4 + 8)));
|
||||
uint trans_ix = _285.Load((bbox_offset + 5u) * 4 + 12);
|
||||
uint t = (_93.Load(40) >> uint(2)) + (6u * trans_ix);
|
||||
mat = asfloat(uint4(_285.Load(t * 4 + 12), _285.Load((t + 1u) * 4 + 12), _285.Load((t + 2u) * 4 + 12), _285.Load((t + 3u) * 4 + 12)));
|
||||
if ((tag_word == 276u) || (tag_word == 732u))
|
||||
{
|
||||
translate = asfloat(uint2(_285.Load((t + 4u) * 4 + 8), _285.Load((t + 5u) * 4 + 8)));
|
||||
translate = asfloat(uint2(_285.Load((t + 4u) * 4 + 12), _285.Load((t + 5u) * 4 + 12)));
|
||||
}
|
||||
}
|
||||
if (linewidth >= 0.0f)
|
||||
|
@ -191,12 +192,12 @@ void comp_main()
|
|||
case 68u:
|
||||
case 72u:
|
||||
{
|
||||
_285.Store(di * 4 + 8, asuint(linewidth));
|
||||
_285.Store(di * 4 + 12, asuint(linewidth));
|
||||
break;
|
||||
}
|
||||
case 276u:
|
||||
{
|
||||
_285.Store(di * 4 + 8, asuint(linewidth));
|
||||
_285.Store(di * 4 + 12, asuint(linewidth));
|
||||
p0 = asfloat(uint2(_103.Load((dd + 1u) * 4 + 0), _103.Load((dd + 2u) * 4 + 0)));
|
||||
p1 = asfloat(uint2(_103.Load((dd + 3u) * 4 + 0), _103.Load((dd + 4u) * 4 + 0)));
|
||||
p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate;
|
||||
|
@ -206,9 +207,9 @@ void comp_main()
|
|||
float line_x = dxy.x * scale;
|
||||
float line_y = dxy.y * scale;
|
||||
float line_c = -((p0.x * line_x) + (p0.y * line_y));
|
||||
_285.Store((di + 1u) * 4 + 8, asuint(line_x));
|
||||
_285.Store((di + 2u) * 4 + 8, asuint(line_y));
|
||||
_285.Store((di + 3u) * 4 + 8, asuint(line_c));
|
||||
_285.Store((di + 1u) * 4 + 12, asuint(line_x));
|
||||
_285.Store((di + 2u) * 4 + 12, asuint(line_y));
|
||||
_285.Store((di + 3u) * 4 + 12, asuint(line_c));
|
||||
break;
|
||||
}
|
||||
case 732u:
|
||||
|
@ -227,17 +228,17 @@ void comp_main()
|
|||
float2 c1 = center1 * rainv;
|
||||
float ra = rr * rainv;
|
||||
float roff = rr - 1.0f;
|
||||
_285.Store(di * 4 + 8, asuint(linewidth));
|
||||
_285.Store((di + 1u) * 4 + 8, asuint(inv_mat.x));
|
||||
_285.Store((di + 2u) * 4 + 8, asuint(inv_mat.y));
|
||||
_285.Store((di + 3u) * 4 + 8, asuint(inv_mat.z));
|
||||
_285.Store((di + 4u) * 4 + 8, asuint(inv_mat.w));
|
||||
_285.Store((di + 5u) * 4 + 8, asuint(inv_tr.x));
|
||||
_285.Store((di + 6u) * 4 + 8, asuint(inv_tr.y));
|
||||
_285.Store((di + 7u) * 4 + 8, asuint(c1.x));
|
||||
_285.Store((di + 8u) * 4 + 8, asuint(c1.y));
|
||||
_285.Store((di + 9u) * 4 + 8, asuint(ra));
|
||||
_285.Store((di + 10u) * 4 + 8, asuint(roff));
|
||||
_285.Store(di * 4 + 12, asuint(linewidth));
|
||||
_285.Store((di + 1u) * 4 + 12, asuint(inv_mat.x));
|
||||
_285.Store((di + 2u) * 4 + 12, asuint(inv_mat.y));
|
||||
_285.Store((di + 3u) * 4 + 12, asuint(inv_mat.z));
|
||||
_285.Store((di + 4u) * 4 + 12, asuint(inv_mat.w));
|
||||
_285.Store((di + 5u) * 4 + 12, asuint(inv_tr.x));
|
||||
_285.Store((di + 6u) * 4 + 12, asuint(inv_tr.y));
|
||||
_285.Store((di + 7u) * 4 + 12, asuint(c1.x));
|
||||
_285.Store((di + 8u) * 4 + 12, asuint(c1.y));
|
||||
_285.Store((di + 9u) * 4 + 12, asuint(ra));
|
||||
_285.Store((di + 10u) * 4 + 12, asuint(roff));
|
||||
break;
|
||||
}
|
||||
case 5u:
|
||||
|
@ -253,7 +254,7 @@ void comp_main()
|
|||
{
|
||||
path_ix = m.path_ix;
|
||||
}
|
||||
_285.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix);
|
||||
_285.Store((clip_out_base + m.clip_ix) * 4 + 12, path_ix);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
2
piet-gpu/shader/gen/draw_leaf.msl
generated
2
piet-gpu/shader/gen/draw_leaf.msl
generated
|
@ -59,6 +59,7 @@ struct Alloc
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -115,6 +116,7 @@ struct Memory
|
|||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint blend_offset;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
|
|
BIN
piet-gpu/shader/gen/draw_leaf.spv
generated
BIN
piet-gpu/shader/gen/draw_leaf.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/draw_reduce.dxil
generated
BIN
piet-gpu/shader/gen/draw_reduce.dxil
generated
Binary file not shown.
3
piet-gpu/shader/gen/draw_reduce.hlsl
generated
3
piet-gpu/shader/gen/draw_reduce.hlsl
generated
|
@ -13,6 +13,7 @@ struct Alloc
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -81,7 +82,7 @@ DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b)
|
|||
void comp_main()
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||
uint drawtag_base = _87.Load(100) >> uint(2);
|
||||
uint drawtag_base = _87.Load(104) >> uint(2);
|
||||
uint tag_word = _97.Load((drawtag_base + ix) * 4 + 0);
|
||||
uint param = tag_word;
|
||||
DrawMonoid agg = map_tag(param);
|
||||
|
|
2
piet-gpu/shader/gen/draw_reduce.msl
generated
2
piet-gpu/shader/gen/draw_reduce.msl
generated
|
@ -20,6 +20,7 @@ struct Alloc
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -76,6 +77,7 @@ struct Memory
|
|||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint blend_offset;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
|
|
BIN
piet-gpu/shader/gen/draw_reduce.spv
generated
BIN
piet-gpu/shader/gen/draw_reduce.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/kernel4.dxil
generated
BIN
piet-gpu/shader/gen/kernel4.dxil
generated
Binary file not shown.
67
piet-gpu/shader/gen/kernel4.hlsl
generated
67
piet-gpu/shader/gen/kernel4.hlsl
generated
|
@ -130,6 +130,7 @@ struct TileSeg
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -163,9 +164,10 @@ static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u);
|
|||
|
||||
RWByteAddressBuffer _297 : register(u0, space0);
|
||||
ByteAddressBuffer _1681 : register(t1, space0);
|
||||
RWTexture2D<unorm float4> image_atlas : register(u3, space0);
|
||||
RWTexture2D<unorm float4> gradients : register(u4, space0);
|
||||
RWTexture2D<unorm float4> image : register(u2, space0);
|
||||
RWByteAddressBuffer _2506 : register(u2, space0);
|
||||
RWTexture2D<unorm float4> image_atlas : register(u4, space0);
|
||||
RWTexture2D<unorm float4> gradients : register(u5, space0);
|
||||
RWTexture2D<unorm float4> image : register(u3, space0);
|
||||
|
||||
static uint3 gl_WorkGroupID;
|
||||
static uint3 gl_LocalInvocationID;
|
||||
|
@ -206,7 +208,7 @@ uint read_mem(Alloc alloc, uint offset)
|
|||
{
|
||||
return 0u;
|
||||
}
|
||||
uint v = _297.Load(offset * 4 + 8);
|
||||
uint v = _297.Load(offset * 4 + 12);
|
||||
return v;
|
||||
}
|
||||
|
||||
|
@ -989,9 +991,9 @@ CmdJump Cmd_Jump_read(Alloc a, CmdRef ref)
|
|||
|
||||
void comp_main()
|
||||
{
|
||||
uint tile_ix = (gl_WorkGroupID.y * _1681.Load(8)) + gl_WorkGroupID.x;
|
||||
uint tile_ix = (gl_WorkGroupID.y * _1681.Load(12)) + gl_WorkGroupID.x;
|
||||
Alloc _1696;
|
||||
_1696.offset = _1681.Load(24);
|
||||
_1696.offset = _1681.Load(28);
|
||||
Alloc param;
|
||||
param.offset = _1696.offset;
|
||||
uint param_1 = tile_ix * 1024u;
|
||||
|
@ -999,7 +1001,7 @@ void comp_main()
|
|||
Alloc cmd_alloc = slice_mem(param, param_1, param_2);
|
||||
CmdRef _1705 = { cmd_alloc.offset };
|
||||
CmdRef cmd_ref = _1705;
|
||||
uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 8);
|
||||
uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 12);
|
||||
cmd_ref.offset += 4u;
|
||||
uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y));
|
||||
float2 xy = float2(xy_uint);
|
||||
|
@ -1009,14 +1011,13 @@ void comp_main()
|
|||
rgba[i] = 0.0f.xxxx;
|
||||
}
|
||||
uint clip_depth = 0u;
|
||||
bool mem_ok = _297.Load(4) == 0u;
|
||||
float df[8];
|
||||
TileSegRef tile_seg_ref;
|
||||
float area[8];
|
||||
uint blend_stack[4][8];
|
||||
uint base_ix_1;
|
||||
uint bg_rgba;
|
||||
while (mem_ok)
|
||||
while (true)
|
||||
{
|
||||
Alloc param_3 = cmd_alloc;
|
||||
CmdRef param_4 = cmd_ref;
|
||||
|
@ -1036,13 +1037,13 @@ void comp_main()
|
|||
{
|
||||
df[k] = 1000000000.0f;
|
||||
}
|
||||
TileSegRef _1810 = { stroke.tile_ref };
|
||||
tile_seg_ref = _1810;
|
||||
TileSegRef _1805 = { stroke.tile_ref };
|
||||
tile_seg_ref = _1805;
|
||||
do
|
||||
{
|
||||
uint param_7 = tile_seg_ref.offset;
|
||||
uint param_8 = 24u;
|
||||
bool param_9 = mem_ok;
|
||||
bool param_9 = true;
|
||||
Alloc param_10 = new_alloc(param_7, param_8, param_9);
|
||||
TileSegRef param_11 = tile_seg_ref;
|
||||
TileSeg seg = TileSeg_read(param_10, param_11);
|
||||
|
@ -1073,13 +1074,13 @@ void comp_main()
|
|||
{
|
||||
area[k_3] = float(fill.backdrop);
|
||||
}
|
||||
TileSegRef _1930 = { fill.tile_ref };
|
||||
tile_seg_ref = _1930;
|
||||
TileSegRef _1924 = { fill.tile_ref };
|
||||
tile_seg_ref = _1924;
|
||||
do
|
||||
{
|
||||
uint param_15 = tile_seg_ref.offset;
|
||||
uint param_16 = 24u;
|
||||
bool param_17 = mem_ok;
|
||||
bool param_17 = true;
|
||||
Alloc param_18 = new_alloc(param_15, param_16, param_17);
|
||||
TileSegRef param_19 = tile_seg_ref;
|
||||
TileSeg seg_1 = TileSeg_read(param_18, param_19);
|
||||
|
@ -1163,10 +1164,10 @@ void comp_main()
|
|||
int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f));
|
||||
float4 fg_rgba = gradients[int2(x, int(lin.index))];
|
||||
float3 param_29 = fg_rgba.xyz;
|
||||
float3 _2264 = fromsRGB(param_29);
|
||||
fg_rgba.x = _2264.x;
|
||||
fg_rgba.y = _2264.y;
|
||||
fg_rgba.z = _2264.z;
|
||||
float3 _2257 = fromsRGB(param_29);
|
||||
fg_rgba.x = _2257.x;
|
||||
fg_rgba.y = _2257.y;
|
||||
fg_rgba.z = _2257.z;
|
||||
float4 fg_k_1 = fg_rgba * area[k_9];
|
||||
rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1;
|
||||
}
|
||||
|
@ -1189,10 +1190,10 @@ void comp_main()
|
|||
int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f));
|
||||
float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))];
|
||||
float3 param_33 = fg_rgba_1.xyz;
|
||||
float3 _2374 = fromsRGB(param_33);
|
||||
fg_rgba_1.x = _2374.x;
|
||||
fg_rgba_1.y = _2374.y;
|
||||
fg_rgba_1.z = _2374.z;
|
||||
float3 _2367 = fromsRGB(param_33);
|
||||
fg_rgba_1.x = _2367.x;
|
||||
fg_rgba_1.y = _2367.y;
|
||||
fg_rgba_1.z = _2367.z;
|
||||
float4 fg_k_2 = fg_rgba_1 * area[k_10];
|
||||
rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2;
|
||||
}
|
||||
|
@ -1206,9 +1207,9 @@ void comp_main()
|
|||
CmdImage fill_img = Cmd_Image_read(param_34, param_35);
|
||||
uint2 param_36 = xy_uint;
|
||||
CmdImage param_37 = fill_img;
|
||||
float4 _2417[8];
|
||||
fillImage(_2417, param_36, param_37);
|
||||
float4 img[8] = _2417;
|
||||
float4 _2410[8];
|
||||
fillImage(_2410, param_36, param_37);
|
||||
float4 img[8] = _2410;
|
||||
for (uint k_11 = 0u; k_11 < 8u; k_11++)
|
||||
{
|
||||
float4 fg_k_3 = img[k_11] * area[k_11];
|
||||
|
@ -1224,8 +1225,8 @@ void comp_main()
|
|||
for (uint k_12 = 0u; k_12 < 8u; k_12++)
|
||||
{
|
||||
float4 param_38 = float4(rgba[k_12]);
|
||||
uint _2479 = packsRGB(param_38);
|
||||
blend_stack[clip_depth][k_12] = _2479;
|
||||
uint _2472 = packsRGB(param_38);
|
||||
blend_stack[clip_depth][k_12] = _2472;
|
||||
rgba[k_12] = 0.0f.xxxx;
|
||||
}
|
||||
}
|
||||
|
@ -1235,8 +1236,8 @@ void comp_main()
|
|||
for (uint k_13 = 0u; k_13 < 8u; k_13++)
|
||||
{
|
||||
float4 param_39 = float4(rgba[k_13]);
|
||||
uint _2522 = packsRGB(param_39);
|
||||
_297.Store((base_ix + k_13) * 4 + 8, _2522);
|
||||
uint _2519 = packsRGB(param_39);
|
||||
_2506.Store((base_ix + k_13) * 4 + 0, _2519);
|
||||
rgba[k_13] = 0.0f.xxxx;
|
||||
}
|
||||
}
|
||||
|
@ -1262,7 +1263,7 @@ void comp_main()
|
|||
}
|
||||
else
|
||||
{
|
||||
bg_rgba = _297.Load((base_ix_1 + k_14) * 4 + 8);
|
||||
bg_rgba = _2506.Load((base_ix_1 + k_14) * 4 + 0);
|
||||
}
|
||||
uint param_42 = bg_rgba;
|
||||
float4 bg = unpacksRGB(param_42);
|
||||
|
@ -1279,8 +1280,8 @@ void comp_main()
|
|||
{
|
||||
Alloc param_46 = cmd_alloc;
|
||||
CmdRef param_47 = cmd_ref;
|
||||
CmdRef _2621 = { Cmd_Jump_read(param_46, param_47).new_ref };
|
||||
cmd_ref = _2621;
|
||||
CmdRef _2618 = { Cmd_Jump_read(param_46, param_47).new_ref };
|
||||
cmd_ref = _2618;
|
||||
cmd_alloc.offset = cmd_ref.offset;
|
||||
break;
|
||||
}
|
||||
|
|
42
piet-gpu/shader/gen/kernel4.msl
generated
42
piet-gpu/shader/gen/kernel4.msl
generated
|
@ -178,6 +178,7 @@ struct Memory
|
|||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint blend_offset;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
|
@ -188,6 +189,7 @@ struct Alloc_1
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -222,6 +224,11 @@ struct ConfigBuf
|
|||
Config conf;
|
||||
};
|
||||
|
||||
struct BlendBuf
|
||||
{
|
||||
uint blend_mem[1];
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 4u, 1u);
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
|
@ -1047,7 +1054,7 @@ CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Me
|
|||
return CmdJump_read(param, param_1, v_297);
|
||||
}
|
||||
|
||||
kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], texture2d<float, access::write> image [[texture(2)]], texture2d<float> image_atlas [[texture(3)]], texture2d<float> gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||
kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], device BlendBuf& _2506 [[buffer(2)]], texture2d<float, access::write> image [[texture(3)]], texture2d<float> image_atlas [[texture(4)]], texture2d<float> gradients [[texture(5)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||
{
|
||||
uint tile_ix = (gl_WorkGroupID.y * _1681.conf.width_in_tiles) + gl_WorkGroupID.x;
|
||||
Alloc param;
|
||||
|
@ -1066,14 +1073,13 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
|
|||
rgba[i] = float4(0.0);
|
||||
}
|
||||
uint clip_depth = 0u;
|
||||
bool mem_ok = v_297.mem_error == 0u;
|
||||
spvUnsafeArray<float, 8> df;
|
||||
TileSegRef tile_seg_ref;
|
||||
spvUnsafeArray<float, 8> area;
|
||||
spvUnsafeArray<spvUnsafeArray<uint, 8>, 4> blend_stack;
|
||||
uint base_ix_1;
|
||||
uint bg_rgba;
|
||||
while (mem_ok)
|
||||
while (true)
|
||||
{
|
||||
Alloc param_3 = cmd_alloc;
|
||||
CmdRef param_4 = cmd_ref;
|
||||
|
@ -1098,7 +1104,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
|
|||
{
|
||||
uint param_7 = tile_seg_ref.offset;
|
||||
uint param_8 = 24u;
|
||||
bool param_9 = mem_ok;
|
||||
bool param_9 = true;
|
||||
Alloc param_10 = new_alloc(param_7, param_8, param_9);
|
||||
TileSegRef param_11 = tile_seg_ref;
|
||||
TileSeg seg = TileSeg_read(param_10, param_11, v_297);
|
||||
|
@ -1134,7 +1140,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
|
|||
{
|
||||
uint param_15 = tile_seg_ref.offset;
|
||||
uint param_16 = 24u;
|
||||
bool param_17 = mem_ok;
|
||||
bool param_17 = true;
|
||||
Alloc param_18 = new_alloc(param_15, param_16, param_17);
|
||||
TileSegRef param_19 = tile_seg_ref;
|
||||
TileSeg seg_1 = TileSeg_read(param_18, param_19, v_297);
|
||||
|
@ -1218,10 +1224,10 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
|
|||
int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0));
|
||||
float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index))));
|
||||
float3 param_29 = fg_rgba.xyz;
|
||||
float3 _2264 = fromsRGB(param_29);
|
||||
fg_rgba.x = _2264.x;
|
||||
fg_rgba.y = _2264.y;
|
||||
fg_rgba.z = _2264.z;
|
||||
float3 _2257 = fromsRGB(param_29);
|
||||
fg_rgba.x = _2257.x;
|
||||
fg_rgba.y = _2257.y;
|
||||
fg_rgba.z = _2257.z;
|
||||
float4 fg_k_1 = fg_rgba * area[k_9];
|
||||
rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1;
|
||||
}
|
||||
|
@ -1244,10 +1250,10 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
|
|||
int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0));
|
||||
float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index))));
|
||||
float3 param_33 = fg_rgba_1.xyz;
|
||||
float3 _2374 = fromsRGB(param_33);
|
||||
fg_rgba_1.x = _2374.x;
|
||||
fg_rgba_1.y = _2374.y;
|
||||
fg_rgba_1.z = _2374.z;
|
||||
float3 _2367 = fromsRGB(param_33);
|
||||
fg_rgba_1.x = _2367.x;
|
||||
fg_rgba_1.y = _2367.y;
|
||||
fg_rgba_1.z = _2367.z;
|
||||
float4 fg_k_2 = fg_rgba_1 * area[k_10];
|
||||
rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2;
|
||||
}
|
||||
|
@ -1278,8 +1284,8 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
|
|||
for (uint k_12 = 0u; k_12 < 8u; k_12++)
|
||||
{
|
||||
float4 param_38 = float4(rgba[k_12]);
|
||||
uint _2479 = packsRGB(param_38);
|
||||
blend_stack[clip_depth][k_12] = _2479;
|
||||
uint _2472 = packsRGB(param_38);
|
||||
blend_stack[clip_depth][k_12] = _2472;
|
||||
rgba[k_12] = float4(0.0);
|
||||
}
|
||||
}
|
||||
|
@ -1289,8 +1295,8 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
|
|||
for (uint k_13 = 0u; k_13 < 8u; k_13++)
|
||||
{
|
||||
float4 param_39 = float4(rgba[k_13]);
|
||||
uint _2522 = packsRGB(param_39);
|
||||
v_297.memory[base_ix + k_13] = _2522;
|
||||
uint _2519 = packsRGB(param_39);
|
||||
_2506.blend_mem[base_ix + k_13] = _2519;
|
||||
rgba[k_13] = float4(0.0);
|
||||
}
|
||||
}
|
||||
|
@ -1316,7 +1322,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
|
|||
}
|
||||
else
|
||||
{
|
||||
bg_rgba = v_297.memory[base_ix_1 + k_14];
|
||||
bg_rgba = _2506.blend_mem[base_ix_1 + k_14];
|
||||
}
|
||||
uint param_42 = bg_rgba;
|
||||
float4 bg = unpacksRGB(param_42);
|
||||
|
|
BIN
piet-gpu/shader/gen/kernel4.spv
generated
BIN
piet-gpu/shader/gen/kernel4.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/kernel4_gray.dxil
generated
BIN
piet-gpu/shader/gen/kernel4_gray.dxil
generated
Binary file not shown.
67
piet-gpu/shader/gen/kernel4_gray.hlsl
generated
67
piet-gpu/shader/gen/kernel4_gray.hlsl
generated
|
@ -130,6 +130,7 @@ struct TileSeg
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -163,9 +164,10 @@ static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u);
|
|||
|
||||
RWByteAddressBuffer _297 : register(u0, space0);
|
||||
ByteAddressBuffer _1681 : register(t1, space0);
|
||||
RWTexture2D<unorm float4> image_atlas : register(u3, space0);
|
||||
RWTexture2D<unorm float4> gradients : register(u4, space0);
|
||||
RWTexture2D<unorm float> image : register(u2, space0);
|
||||
RWByteAddressBuffer _2506 : register(u2, space0);
|
||||
RWTexture2D<unorm float4> image_atlas : register(u4, space0);
|
||||
RWTexture2D<unorm float4> gradients : register(u5, space0);
|
||||
RWTexture2D<unorm float> image : register(u3, space0);
|
||||
|
||||
static uint3 gl_WorkGroupID;
|
||||
static uint3 gl_LocalInvocationID;
|
||||
|
@ -206,7 +208,7 @@ uint read_mem(Alloc alloc, uint offset)
|
|||
{
|
||||
return 0u;
|
||||
}
|
||||
uint v = _297.Load(offset * 4 + 8);
|
||||
uint v = _297.Load(offset * 4 + 12);
|
||||
return v;
|
||||
}
|
||||
|
||||
|
@ -989,9 +991,9 @@ CmdJump Cmd_Jump_read(Alloc a, CmdRef ref)
|
|||
|
||||
void comp_main()
|
||||
{
|
||||
uint tile_ix = (gl_WorkGroupID.y * _1681.Load(8)) + gl_WorkGroupID.x;
|
||||
uint tile_ix = (gl_WorkGroupID.y * _1681.Load(12)) + gl_WorkGroupID.x;
|
||||
Alloc _1696;
|
||||
_1696.offset = _1681.Load(24);
|
||||
_1696.offset = _1681.Load(28);
|
||||
Alloc param;
|
||||
param.offset = _1696.offset;
|
||||
uint param_1 = tile_ix * 1024u;
|
||||
|
@ -999,7 +1001,7 @@ void comp_main()
|
|||
Alloc cmd_alloc = slice_mem(param, param_1, param_2);
|
||||
CmdRef _1705 = { cmd_alloc.offset };
|
||||
CmdRef cmd_ref = _1705;
|
||||
uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 8);
|
||||
uint blend_offset = _297.Load((cmd_ref.offset >> uint(2)) * 4 + 12);
|
||||
cmd_ref.offset += 4u;
|
||||
uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y));
|
||||
float2 xy = float2(xy_uint);
|
||||
|
@ -1009,14 +1011,13 @@ void comp_main()
|
|||
rgba[i] = 0.0f.xxxx;
|
||||
}
|
||||
uint clip_depth = 0u;
|
||||
bool mem_ok = _297.Load(4) == 0u;
|
||||
float df[8];
|
||||
TileSegRef tile_seg_ref;
|
||||
float area[8];
|
||||
uint blend_stack[4][8];
|
||||
uint base_ix_1;
|
||||
uint bg_rgba;
|
||||
while (mem_ok)
|
||||
while (true)
|
||||
{
|
||||
Alloc param_3 = cmd_alloc;
|
||||
CmdRef param_4 = cmd_ref;
|
||||
|
@ -1036,13 +1037,13 @@ void comp_main()
|
|||
{
|
||||
df[k] = 1000000000.0f;
|
||||
}
|
||||
TileSegRef _1810 = { stroke.tile_ref };
|
||||
tile_seg_ref = _1810;
|
||||
TileSegRef _1805 = { stroke.tile_ref };
|
||||
tile_seg_ref = _1805;
|
||||
do
|
||||
{
|
||||
uint param_7 = tile_seg_ref.offset;
|
||||
uint param_8 = 24u;
|
||||
bool param_9 = mem_ok;
|
||||
bool param_9 = true;
|
||||
Alloc param_10 = new_alloc(param_7, param_8, param_9);
|
||||
TileSegRef param_11 = tile_seg_ref;
|
||||
TileSeg seg = TileSeg_read(param_10, param_11);
|
||||
|
@ -1073,13 +1074,13 @@ void comp_main()
|
|||
{
|
||||
area[k_3] = float(fill.backdrop);
|
||||
}
|
||||
TileSegRef _1930 = { fill.tile_ref };
|
||||
tile_seg_ref = _1930;
|
||||
TileSegRef _1924 = { fill.tile_ref };
|
||||
tile_seg_ref = _1924;
|
||||
do
|
||||
{
|
||||
uint param_15 = tile_seg_ref.offset;
|
||||
uint param_16 = 24u;
|
||||
bool param_17 = mem_ok;
|
||||
bool param_17 = true;
|
||||
Alloc param_18 = new_alloc(param_15, param_16, param_17);
|
||||
TileSegRef param_19 = tile_seg_ref;
|
||||
TileSeg seg_1 = TileSeg_read(param_18, param_19);
|
||||
|
@ -1163,10 +1164,10 @@ void comp_main()
|
|||
int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f));
|
||||
float4 fg_rgba = gradients[int2(x, int(lin.index))];
|
||||
float3 param_29 = fg_rgba.xyz;
|
||||
float3 _2264 = fromsRGB(param_29);
|
||||
fg_rgba.x = _2264.x;
|
||||
fg_rgba.y = _2264.y;
|
||||
fg_rgba.z = _2264.z;
|
||||
float3 _2257 = fromsRGB(param_29);
|
||||
fg_rgba.x = _2257.x;
|
||||
fg_rgba.y = _2257.y;
|
||||
fg_rgba.z = _2257.z;
|
||||
float4 fg_k_1 = fg_rgba * area[k_9];
|
||||
rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1;
|
||||
}
|
||||
|
@ -1189,10 +1190,10 @@ void comp_main()
|
|||
int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f));
|
||||
float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))];
|
||||
float3 param_33 = fg_rgba_1.xyz;
|
||||
float3 _2374 = fromsRGB(param_33);
|
||||
fg_rgba_1.x = _2374.x;
|
||||
fg_rgba_1.y = _2374.y;
|
||||
fg_rgba_1.z = _2374.z;
|
||||
float3 _2367 = fromsRGB(param_33);
|
||||
fg_rgba_1.x = _2367.x;
|
||||
fg_rgba_1.y = _2367.y;
|
||||
fg_rgba_1.z = _2367.z;
|
||||
float4 fg_k_2 = fg_rgba_1 * area[k_10];
|
||||
rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2;
|
||||
}
|
||||
|
@ -1206,9 +1207,9 @@ void comp_main()
|
|||
CmdImage fill_img = Cmd_Image_read(param_34, param_35);
|
||||
uint2 param_36 = xy_uint;
|
||||
CmdImage param_37 = fill_img;
|
||||
float4 _2417[8];
|
||||
fillImage(_2417, param_36, param_37);
|
||||
float4 img[8] = _2417;
|
||||
float4 _2410[8];
|
||||
fillImage(_2410, param_36, param_37);
|
||||
float4 img[8] = _2410;
|
||||
for (uint k_11 = 0u; k_11 < 8u; k_11++)
|
||||
{
|
||||
float4 fg_k_3 = img[k_11] * area[k_11];
|
||||
|
@ -1224,8 +1225,8 @@ void comp_main()
|
|||
for (uint k_12 = 0u; k_12 < 8u; k_12++)
|
||||
{
|
||||
float4 param_38 = float4(rgba[k_12]);
|
||||
uint _2479 = packsRGB(param_38);
|
||||
blend_stack[clip_depth][k_12] = _2479;
|
||||
uint _2472 = packsRGB(param_38);
|
||||
blend_stack[clip_depth][k_12] = _2472;
|
||||
rgba[k_12] = 0.0f.xxxx;
|
||||
}
|
||||
}
|
||||
|
@ -1235,8 +1236,8 @@ void comp_main()
|
|||
for (uint k_13 = 0u; k_13 < 8u; k_13++)
|
||||
{
|
||||
float4 param_39 = float4(rgba[k_13]);
|
||||
uint _2522 = packsRGB(param_39);
|
||||
_297.Store((base_ix + k_13) * 4 + 8, _2522);
|
||||
uint _2519 = packsRGB(param_39);
|
||||
_2506.Store((base_ix + k_13) * 4 + 0, _2519);
|
||||
rgba[k_13] = 0.0f.xxxx;
|
||||
}
|
||||
}
|
||||
|
@ -1262,7 +1263,7 @@ void comp_main()
|
|||
}
|
||||
else
|
||||
{
|
||||
bg_rgba = _297.Load((base_ix_1 + k_14) * 4 + 8);
|
||||
bg_rgba = _2506.Load((base_ix_1 + k_14) * 4 + 0);
|
||||
}
|
||||
uint param_42 = bg_rgba;
|
||||
float4 bg = unpacksRGB(param_42);
|
||||
|
@ -1279,8 +1280,8 @@ void comp_main()
|
|||
{
|
||||
Alloc param_46 = cmd_alloc;
|
||||
CmdRef param_47 = cmd_ref;
|
||||
CmdRef _2621 = { Cmd_Jump_read(param_46, param_47).new_ref };
|
||||
cmd_ref = _2621;
|
||||
CmdRef _2618 = { Cmd_Jump_read(param_46, param_47).new_ref };
|
||||
cmd_ref = _2618;
|
||||
cmd_alloc.offset = cmd_ref.offset;
|
||||
break;
|
||||
}
|
||||
|
|
42
piet-gpu/shader/gen/kernel4_gray.msl
generated
42
piet-gpu/shader/gen/kernel4_gray.msl
generated
|
@ -178,6 +178,7 @@ struct Memory
|
|||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint blend_offset;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
|
@ -188,6 +189,7 @@ struct Alloc_1
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -222,6 +224,11 @@ struct ConfigBuf
|
|||
Config conf;
|
||||
};
|
||||
|
||||
struct BlendBuf
|
||||
{
|
||||
uint blend_mem[1];
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 4u, 1u);
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
|
@ -1047,7 +1054,7 @@ CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Me
|
|||
return CmdJump_read(param, param_1, v_297);
|
||||
}
|
||||
|
||||
kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], texture2d<float, access::write> image [[texture(2)]], texture2d<float> image_atlas [[texture(3)]], texture2d<float> gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||
kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1681 [[buffer(1)]], device BlendBuf& _2506 [[buffer(2)]], texture2d<float, access::write> image [[texture(3)]], texture2d<float> image_atlas [[texture(4)]], texture2d<float> gradients [[texture(5)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||
{
|
||||
uint tile_ix = (gl_WorkGroupID.y * _1681.conf.width_in_tiles) + gl_WorkGroupID.x;
|
||||
Alloc param;
|
||||
|
@ -1066,14 +1073,13 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
|
|||
rgba[i] = float4(0.0);
|
||||
}
|
||||
uint clip_depth = 0u;
|
||||
bool mem_ok = v_297.mem_error == 0u;
|
||||
spvUnsafeArray<float, 8> df;
|
||||
TileSegRef tile_seg_ref;
|
||||
spvUnsafeArray<float, 8> area;
|
||||
spvUnsafeArray<spvUnsafeArray<uint, 8>, 4> blend_stack;
|
||||
uint base_ix_1;
|
||||
uint bg_rgba;
|
||||
while (mem_ok)
|
||||
while (true)
|
||||
{
|
||||
Alloc param_3 = cmd_alloc;
|
||||
CmdRef param_4 = cmd_ref;
|
||||
|
@ -1098,7 +1104,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
|
|||
{
|
||||
uint param_7 = tile_seg_ref.offset;
|
||||
uint param_8 = 24u;
|
||||
bool param_9 = mem_ok;
|
||||
bool param_9 = true;
|
||||
Alloc param_10 = new_alloc(param_7, param_8, param_9);
|
||||
TileSegRef param_11 = tile_seg_ref;
|
||||
TileSeg seg = TileSeg_read(param_10, param_11, v_297);
|
||||
|
@ -1134,7 +1140,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
|
|||
{
|
||||
uint param_15 = tile_seg_ref.offset;
|
||||
uint param_16 = 24u;
|
||||
bool param_17 = mem_ok;
|
||||
bool param_17 = true;
|
||||
Alloc param_18 = new_alloc(param_15, param_16, param_17);
|
||||
TileSegRef param_19 = tile_seg_ref;
|
||||
TileSeg seg_1 = TileSeg_read(param_18, param_19, v_297);
|
||||
|
@ -1218,10 +1224,10 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
|
|||
int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0));
|
||||
float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index))));
|
||||
float3 param_29 = fg_rgba.xyz;
|
||||
float3 _2264 = fromsRGB(param_29);
|
||||
fg_rgba.x = _2264.x;
|
||||
fg_rgba.y = _2264.y;
|
||||
fg_rgba.z = _2264.z;
|
||||
float3 _2257 = fromsRGB(param_29);
|
||||
fg_rgba.x = _2257.x;
|
||||
fg_rgba.y = _2257.y;
|
||||
fg_rgba.z = _2257.z;
|
||||
float4 fg_k_1 = fg_rgba * area[k_9];
|
||||
rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1;
|
||||
}
|
||||
|
@ -1244,10 +1250,10 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
|
|||
int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0));
|
||||
float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index))));
|
||||
float3 param_33 = fg_rgba_1.xyz;
|
||||
float3 _2374 = fromsRGB(param_33);
|
||||
fg_rgba_1.x = _2374.x;
|
||||
fg_rgba_1.y = _2374.y;
|
||||
fg_rgba_1.z = _2374.z;
|
||||
float3 _2367 = fromsRGB(param_33);
|
||||
fg_rgba_1.x = _2367.x;
|
||||
fg_rgba_1.y = _2367.y;
|
||||
fg_rgba_1.z = _2367.z;
|
||||
float4 fg_k_2 = fg_rgba_1 * area[k_10];
|
||||
rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2;
|
||||
}
|
||||
|
@ -1278,8 +1284,8 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
|
|||
for (uint k_12 = 0u; k_12 < 8u; k_12++)
|
||||
{
|
||||
float4 param_38 = float4(rgba[k_12]);
|
||||
uint _2479 = packsRGB(param_38);
|
||||
blend_stack[clip_depth][k_12] = _2479;
|
||||
uint _2472 = packsRGB(param_38);
|
||||
blend_stack[clip_depth][k_12] = _2472;
|
||||
rgba[k_12] = float4(0.0);
|
||||
}
|
||||
}
|
||||
|
@ -1289,8 +1295,8 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
|
|||
for (uint k_13 = 0u; k_13 < 8u; k_13++)
|
||||
{
|
||||
float4 param_39 = float4(rgba[k_13]);
|
||||
uint _2522 = packsRGB(param_39);
|
||||
v_297.memory[base_ix + k_13] = _2522;
|
||||
uint _2519 = packsRGB(param_39);
|
||||
_2506.blend_mem[base_ix + k_13] = _2519;
|
||||
rgba[k_13] = float4(0.0);
|
||||
}
|
||||
}
|
||||
|
@ -1316,7 +1322,7 @@ kernel void main0(device Memory& v_297 [[buffer(0)]], const device ConfigBuf& _1
|
|||
}
|
||||
else
|
||||
{
|
||||
bg_rgba = v_297.memory[base_ix_1 + k_14];
|
||||
bg_rgba = _2506.blend_mem[base_ix_1 + k_14];
|
||||
}
|
||||
uint param_42 = bg_rgba;
|
||||
float4 bg = unpacksRGB(param_42);
|
||||
|
|
BIN
piet-gpu/shader/gen/kernel4_gray.spv
generated
BIN
piet-gpu/shader/gen/kernel4_gray.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/path_coarse.dxil
generated
BIN
piet-gpu/shader/gen/path_coarse.dxil
generated
Binary file not shown.
328
piet-gpu/shader/gen/path_coarse.hlsl
generated
328
piet-gpu/shader/gen/path_coarse.hlsl
generated
|
@ -3,12 +3,6 @@ struct Alloc
|
|||
uint offset;
|
||||
};
|
||||
|
||||
struct MallocResult
|
||||
{
|
||||
Alloc alloc;
|
||||
bool failed;
|
||||
};
|
||||
|
||||
struct PathCubicRef
|
||||
{
|
||||
uint offset;
|
||||
|
@ -74,6 +68,7 @@ struct SubdivResult
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -105,10 +100,10 @@ struct Config
|
|||
|
||||
static const uint3 gl_WorkGroupSize = uint3(32u, 1u, 1u);
|
||||
|
||||
static const PathSegTag _721 = { 0u, 0u };
|
||||
static const PathSegTag _722 = { 0u, 0u };
|
||||
|
||||
RWByteAddressBuffer _136 : register(u0, space0);
|
||||
ByteAddressBuffer _710 : register(t1, space0);
|
||||
RWByteAddressBuffer _143 : register(u0, space0);
|
||||
ByteAddressBuffer _711 : register(t1, space0);
|
||||
|
||||
static uint3 gl_GlobalInvocationID;
|
||||
struct SPIRV_Cross_Input
|
||||
|
@ -116,6 +111,15 @@ struct SPIRV_Cross_Input
|
|||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||
};
|
||||
|
||||
static bool mem_ok;
|
||||
|
||||
bool check_deps(uint dep_stage)
|
||||
{
|
||||
uint _149;
|
||||
_143.InterlockedOr(4, 0u, _149);
|
||||
return (_149 & dep_stage) == 0u;
|
||||
}
|
||||
|
||||
bool touch_mem(Alloc alloc, uint offset)
|
||||
{
|
||||
return true;
|
||||
|
@ -129,7 +133,7 @@ uint read_mem(Alloc alloc, uint offset)
|
|||
{
|
||||
return 0u;
|
||||
}
|
||||
uint v = _136.Load(offset * 4 + 8);
|
||||
uint v = _143.Load(offset * 4 + 12);
|
||||
return v;
|
||||
}
|
||||
|
||||
|
@ -138,8 +142,8 @@ PathSegTag PathSeg_tag(Alloc a, PathSegRef ref)
|
|||
Alloc param = a;
|
||||
uint param_1 = ref.offset >> uint(2);
|
||||
uint tag_and_flags = read_mem(param, param_1);
|
||||
PathSegTag _367 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
||||
return _367;
|
||||
PathSegTag _362 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
||||
return _362;
|
||||
}
|
||||
|
||||
PathCubic PathCubic_read(Alloc a, PathCubicRef ref)
|
||||
|
@ -194,9 +198,9 @@ PathCubic PathCubic_read(Alloc a, PathCubicRef ref)
|
|||
|
||||
PathCubic PathSeg_Cubic_read(Alloc a, PathSegRef ref)
|
||||
{
|
||||
PathCubicRef _373 = { ref.offset + 4u };
|
||||
PathCubicRef _368 = { ref.offset + 4u };
|
||||
Alloc param = a;
|
||||
PathCubicRef param_1 = _373;
|
||||
PathCubicRef param_1 = _368;
|
||||
return PathCubic_read(param, param_1);
|
||||
}
|
||||
|
||||
|
@ -240,8 +244,8 @@ SubdivResult estimate_subdiv(float2 p0, float2 p1, float2 p2, float sqrt_tol)
|
|||
val = (sqrt_tol * da) / approx_parabola_integral(param_2);
|
||||
}
|
||||
}
|
||||
SubdivResult _695 = { val, a0, a2 };
|
||||
return _695;
|
||||
SubdivResult _690 = { val, a0, a2 };
|
||||
return _690;
|
||||
}
|
||||
|
||||
uint fill_mode_from_flags(uint flags)
|
||||
|
@ -263,12 +267,12 @@ Path Path_read(Alloc a, PathRef ref)
|
|||
uint raw2 = read_mem(param_4, param_5);
|
||||
Path s;
|
||||
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
|
||||
TileRef _427 = { raw2 };
|
||||
s.tiles = _427;
|
||||
TileRef _422 = { raw2 };
|
||||
s.tiles = _422;
|
||||
return s;
|
||||
}
|
||||
|
||||
Alloc new_alloc(uint offset, uint size, bool mem_ok)
|
||||
Alloc new_alloc(uint offset, uint size, bool mem_ok_1)
|
||||
{
|
||||
Alloc a;
|
||||
a.offset = offset;
|
||||
|
@ -286,33 +290,24 @@ float2 eval_quad(float2 p0, float2 p1, float2 p2, float t)
|
|||
return (p0 * (mt * mt)) + (((p1 * (mt * 2.0f)) + (p2 * t)) * t);
|
||||
}
|
||||
|
||||
MallocResult malloc(uint size)
|
||||
uint malloc_stage(uint size, uint mem_size, uint stage)
|
||||
{
|
||||
uint _142;
|
||||
_136.InterlockedAdd(0, size, _142);
|
||||
uint offset = _142;
|
||||
uint _149;
|
||||
_136.GetDimensions(_149);
|
||||
_149 = (_149 - 8) / 4;
|
||||
MallocResult r;
|
||||
r.failed = (offset + size) > uint(int(_149) * 4);
|
||||
uint param = offset;
|
||||
uint param_1 = size;
|
||||
bool param_2 = !r.failed;
|
||||
r.alloc = new_alloc(param, param_1, param_2);
|
||||
if (r.failed)
|
||||
uint _158;
|
||||
_143.InterlockedAdd(0, size, _158);
|
||||
uint offset = _158;
|
||||
if ((offset + size) > mem_size)
|
||||
{
|
||||
uint _171;
|
||||
_136.InterlockedMax(4, 1u, _171);
|
||||
return r;
|
||||
uint _168;
|
||||
_143.InterlockedOr(4, stage, _168);
|
||||
offset = 0u;
|
||||
}
|
||||
return r;
|
||||
return offset;
|
||||
}
|
||||
|
||||
TileRef Tile_index(TileRef ref, uint index)
|
||||
{
|
||||
TileRef _385 = { ref.offset + (index * 8u) };
|
||||
return _385;
|
||||
TileRef _380 = { ref.offset + (index * 8u) };
|
||||
return _380;
|
||||
}
|
||||
|
||||
void write_mem(Alloc alloc, uint offset, uint val)
|
||||
|
@ -323,7 +318,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
|
|||
{
|
||||
return;
|
||||
}
|
||||
_136.Store(offset * 4 + 8, val);
|
||||
_143.Store(offset * 4 + 12, val);
|
||||
}
|
||||
|
||||
void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s)
|
||||
|
@ -357,30 +352,36 @@ void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s)
|
|||
|
||||
void comp_main()
|
||||
{
|
||||
uint element_ix = gl_GlobalInvocationID.x;
|
||||
PathSegRef _718 = { _710.Load(28) + (element_ix * 52u) };
|
||||
PathSegRef ref = _718;
|
||||
PathSegTag tag = _721;
|
||||
if (element_ix < _710.Load(4))
|
||||
mem_ok = true;
|
||||
uint param = 7u;
|
||||
bool _694 = check_deps(param);
|
||||
if (!_694)
|
||||
{
|
||||
Alloc _731;
|
||||
_731.offset = _710.Load(28);
|
||||
Alloc param;
|
||||
param.offset = _731.offset;
|
||||
PathSegRef param_1 = ref;
|
||||
tag = PathSeg_tag(param, param_1);
|
||||
return;
|
||||
}
|
||||
uint element_ix = gl_GlobalInvocationID.x;
|
||||
PathSegRef _719 = { _711.Load(32) + (element_ix * 52u) };
|
||||
PathSegRef ref = _719;
|
||||
PathSegTag tag = _722;
|
||||
if (element_ix < _711.Load(8))
|
||||
{
|
||||
Alloc _732;
|
||||
_732.offset = _711.Load(32);
|
||||
Alloc param_1;
|
||||
param_1.offset = _732.offset;
|
||||
PathSegRef param_2 = ref;
|
||||
tag = PathSeg_tag(param_1, param_2);
|
||||
}
|
||||
bool mem_ok = _136.Load(4) == 0u;
|
||||
switch (tag.tag)
|
||||
{
|
||||
case 1u:
|
||||
{
|
||||
Alloc _748;
|
||||
_748.offset = _710.Load(28);
|
||||
Alloc param_2;
|
||||
param_2.offset = _748.offset;
|
||||
PathSegRef param_3 = ref;
|
||||
PathCubic cubic = PathSeg_Cubic_read(param_2, param_3);
|
||||
Alloc _745;
|
||||
_745.offset = _711.Load(32);
|
||||
Alloc param_3;
|
||||
param_3.offset = _745.offset;
|
||||
PathSegRef param_4 = ref;
|
||||
PathCubic cubic = PathSeg_Cubic_read(param_3, param_4);
|
||||
float2 err_v = (((cubic.p2 - cubic.p1) * 3.0f) + cubic.p0) - cubic.p3;
|
||||
float err = (err_v.x * err_v.x) + (err_v.y * err_v.y);
|
||||
uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875f, 0.16666667163372039794921875f))), 1u);
|
||||
|
@ -392,43 +393,43 @@ void comp_main()
|
|||
for (uint i = 0u; i < n_quads; i++)
|
||||
{
|
||||
float t = float(i + 1u) * _step;
|
||||
float2 param_4 = cubic.p0;
|
||||
float2 param_5 = cubic.p1;
|
||||
float2 param_6 = cubic.p2;
|
||||
float2 param_7 = cubic.p3;
|
||||
float param_8 = t;
|
||||
float2 qp2 = eval_cubic(param_4, param_5, param_6, param_7, param_8);
|
||||
float2 param_9 = cubic.p0;
|
||||
float2 param_10 = cubic.p1;
|
||||
float2 param_11 = cubic.p2;
|
||||
float2 param_12 = cubic.p3;
|
||||
float param_13 = t - (0.5f * _step);
|
||||
float2 qp1 = eval_cubic(param_9, param_10, param_11, param_12, param_13);
|
||||
float2 param_5 = cubic.p0;
|
||||
float2 param_6 = cubic.p1;
|
||||
float2 param_7 = cubic.p2;
|
||||
float2 param_8 = cubic.p3;
|
||||
float param_9 = t;
|
||||
float2 qp2 = eval_cubic(param_5, param_6, param_7, param_8, param_9);
|
||||
float2 param_10 = cubic.p0;
|
||||
float2 param_11 = cubic.p1;
|
||||
float2 param_12 = cubic.p2;
|
||||
float2 param_13 = cubic.p3;
|
||||
float param_14 = t - (0.5f * _step);
|
||||
float2 qp1 = eval_cubic(param_10, param_11, param_12, param_13, param_14);
|
||||
qp1 = (qp1 * 2.0f) - ((qp0 + qp2) * 0.5f);
|
||||
float2 param_14 = qp0;
|
||||
float2 param_15 = qp1;
|
||||
float2 param_16 = qp2;
|
||||
float param_17 = 0.4743416607379913330078125f;
|
||||
SubdivResult params = estimate_subdiv(param_14, param_15, param_16, param_17);
|
||||
float2 param_15 = qp0;
|
||||
float2 param_16 = qp1;
|
||||
float2 param_17 = qp2;
|
||||
float param_18 = 0.4743416607379913330078125f;
|
||||
SubdivResult params = estimate_subdiv(param_15, param_16, param_17, param_18);
|
||||
keep_params[i] = params;
|
||||
val += params.val;
|
||||
qp0 = qp2;
|
||||
}
|
||||
uint n = max(uint(ceil((val * 0.5f) / 0.4743416607379913330078125f)), 1u);
|
||||
uint param_18 = tag.flags;
|
||||
bool is_stroke = fill_mode_from_flags(param_18) == 1u;
|
||||
uint param_19 = tag.flags;
|
||||
bool is_stroke = fill_mode_from_flags(param_19) == 1u;
|
||||
uint path_ix = cubic.path_ix;
|
||||
PathRef _904 = { _710.Load(16) + (path_ix * 12u) };
|
||||
Alloc _907;
|
||||
_907.offset = _710.Load(16);
|
||||
Alloc param_19;
|
||||
param_19.offset = _907.offset;
|
||||
PathRef param_20 = _904;
|
||||
Path path = Path_read(param_19, param_20);
|
||||
uint param_21 = path.tiles.offset;
|
||||
uint param_22 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
|
||||
bool param_23 = mem_ok;
|
||||
Alloc path_alloc = new_alloc(param_21, param_22, param_23);
|
||||
PathRef _901 = { _711.Load(20) + (path_ix * 12u) };
|
||||
Alloc _904;
|
||||
_904.offset = _711.Load(20);
|
||||
Alloc param_20;
|
||||
param_20.offset = _904.offset;
|
||||
PathRef param_21 = _901;
|
||||
Path path = Path_read(param_20, param_21);
|
||||
uint param_22 = path.tiles.offset;
|
||||
uint param_23 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
|
||||
bool param_24 = true;
|
||||
Alloc path_alloc = new_alloc(param_22, param_23, param_24);
|
||||
int4 bbox = int4(path.bbox);
|
||||
float2 p0 = cubic.p0;
|
||||
qp0 = cubic.p0;
|
||||
|
@ -436,44 +437,44 @@ void comp_main()
|
|||
int n_out = 1;
|
||||
float val_sum = 0.0f;
|
||||
float2 p1;
|
||||
float _1147;
|
||||
float _1143;
|
||||
TileSeg tile_seg;
|
||||
for (uint i_1 = 0u; i_1 < n_quads; i_1++)
|
||||
{
|
||||
float t_1 = float(i_1 + 1u) * _step;
|
||||
float2 param_24 = cubic.p0;
|
||||
float2 param_25 = cubic.p1;
|
||||
float2 param_26 = cubic.p2;
|
||||
float2 param_27 = cubic.p3;
|
||||
float param_28 = t_1;
|
||||
float2 qp2_1 = eval_cubic(param_24, param_25, param_26, param_27, param_28);
|
||||
float2 param_29 = cubic.p0;
|
||||
float2 param_30 = cubic.p1;
|
||||
float2 param_31 = cubic.p2;
|
||||
float2 param_32 = cubic.p3;
|
||||
float param_33 = t_1 - (0.5f * _step);
|
||||
float2 qp1_1 = eval_cubic(param_29, param_30, param_31, param_32, param_33);
|
||||
float2 param_25 = cubic.p0;
|
||||
float2 param_26 = cubic.p1;
|
||||
float2 param_27 = cubic.p2;
|
||||
float2 param_28 = cubic.p3;
|
||||
float param_29 = t_1;
|
||||
float2 qp2_1 = eval_cubic(param_25, param_26, param_27, param_28, param_29);
|
||||
float2 param_30 = cubic.p0;
|
||||
float2 param_31 = cubic.p1;
|
||||
float2 param_32 = cubic.p2;
|
||||
float2 param_33 = cubic.p3;
|
||||
float param_34 = t_1 - (0.5f * _step);
|
||||
float2 qp1_1 = eval_cubic(param_30, param_31, param_32, param_33, param_34);
|
||||
qp1_1 = (qp1_1 * 2.0f) - ((qp0 + qp2_1) * 0.5f);
|
||||
SubdivResult params_1 = keep_params[i_1];
|
||||
float param_34 = params_1.a0;
|
||||
float u0 = approx_parabola_inv_integral(param_34);
|
||||
float param_35 = params_1.a2;
|
||||
float u2 = approx_parabola_inv_integral(param_35);
|
||||
float param_35 = params_1.a0;
|
||||
float u0 = approx_parabola_inv_integral(param_35);
|
||||
float param_36 = params_1.a2;
|
||||
float u2 = approx_parabola_inv_integral(param_36);
|
||||
float uscale = 1.0f / (u2 - u0);
|
||||
float target = float(n_out) * v_step;
|
||||
for (;;)
|
||||
{
|
||||
bool _1040 = uint(n_out) == n;
|
||||
bool _1050;
|
||||
if (!_1040)
|
||||
bool _1036 = uint(n_out) == n;
|
||||
bool _1046;
|
||||
if (!_1036)
|
||||
{
|
||||
_1050 = target < (val_sum + params_1.val);
|
||||
_1046 = target < (val_sum + params_1.val);
|
||||
}
|
||||
else
|
||||
{
|
||||
_1050 = _1040;
|
||||
_1046 = _1036;
|
||||
}
|
||||
if (_1050)
|
||||
if (_1046)
|
||||
{
|
||||
if (uint(n_out) == n)
|
||||
{
|
||||
|
@ -483,14 +484,14 @@ void comp_main()
|
|||
{
|
||||
float u = (target - val_sum) / params_1.val;
|
||||
float a = lerp(params_1.a0, params_1.a2, u);
|
||||
float param_36 = a;
|
||||
float au = approx_parabola_inv_integral(param_36);
|
||||
float param_37 = a;
|
||||
float au = approx_parabola_inv_integral(param_37);
|
||||
float t_2 = (au - u0) * uscale;
|
||||
float2 param_37 = qp0;
|
||||
float2 param_38 = qp1_1;
|
||||
float2 param_39 = qp2_1;
|
||||
float param_40 = t_2;
|
||||
p1 = eval_quad(param_37, param_38, param_39, param_40);
|
||||
float2 param_38 = qp0;
|
||||
float2 param_39 = qp1_1;
|
||||
float2 param_40 = qp2_1;
|
||||
float param_41 = t_2;
|
||||
p1 = eval_quad(param_38, param_39, param_40, param_41);
|
||||
}
|
||||
float xmin = min(p0.x, p1.x) - cubic.stroke.x;
|
||||
float xmax = max(p0.x, p1.x) + cubic.stroke.x;
|
||||
|
@ -500,13 +501,13 @@ void comp_main()
|
|||
float dy = p1.y - p0.y;
|
||||
if (abs(dy) < 9.999999717180685365747194737196e-10f)
|
||||
{
|
||||
_1147 = 1000000000.0f;
|
||||
_1143 = 1000000000.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
_1147 = dx / dy;
|
||||
_1143 = dx / dy;
|
||||
}
|
||||
float invslope = _1147;
|
||||
float invslope = _1143;
|
||||
float c = (cubic.stroke.x + (abs(invslope) * (8.0f + cubic.stroke.y))) * 0.0625f;
|
||||
float b = invslope;
|
||||
float a_1 = (p0.x - ((p0.y - 8.0f) * b)) * 0.0625f;
|
||||
|
@ -522,14 +523,20 @@ void comp_main()
|
|||
int stride = bbox.z - bbox.x;
|
||||
int base = ((y0 - bbox.y) * stride) - bbox.x;
|
||||
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
|
||||
uint param_41 = n_tile_alloc * 24u;
|
||||
MallocResult _1263 = malloc(param_41);
|
||||
MallocResult tile_alloc = _1263;
|
||||
if (tile_alloc.failed || (!mem_ok))
|
||||
uint malloc_size = n_tile_alloc * 24u;
|
||||
uint param_42 = malloc_size;
|
||||
uint param_43 = _711.Load(0);
|
||||
uint param_44 = 4u;
|
||||
uint _1265 = malloc_stage(param_42, param_43, param_44);
|
||||
uint tile_offset = _1265;
|
||||
if (tile_offset == 0u)
|
||||
{
|
||||
return;
|
||||
mem_ok = false;
|
||||
}
|
||||
uint tile_offset = tile_alloc.alloc.offset;
|
||||
uint param_45 = tile_offset;
|
||||
uint param_46 = malloc_size;
|
||||
bool param_47 = true;
|
||||
Alloc tile_alloc = new_alloc(param_45, param_46, param_47);
|
||||
int xray = int(floor(p0.x * 0.0625f));
|
||||
int last_xray = int(floor(p1.x * 0.0625f));
|
||||
if (p0.y > p1.y)
|
||||
|
@ -542,39 +549,34 @@ void comp_main()
|
|||
{
|
||||
float tile_y0 = float(y * 16);
|
||||
int xbackdrop = max((xray + 1), bbox.x);
|
||||
bool _1319 = !is_stroke;
|
||||
bool _1329;
|
||||
if (_1319)
|
||||
bool _1322 = !is_stroke;
|
||||
bool _1332;
|
||||
if (_1322)
|
||||
{
|
||||
_1329 = min(p0.y, p1.y) < tile_y0;
|
||||
_1332 = min(p0.y, p1.y) < tile_y0;
|
||||
}
|
||||
else
|
||||
{
|
||||
_1329 = _1319;
|
||||
_1332 = _1322;
|
||||
}
|
||||
bool _1336;
|
||||
if (_1329)
|
||||
bool _1339;
|
||||
if (_1332)
|
||||
{
|
||||
_1336 = xbackdrop < bbox.z;
|
||||
_1339 = xbackdrop < bbox.z;
|
||||
}
|
||||
else
|
||||
{
|
||||
_1336 = _1329;
|
||||
_1339 = _1332;
|
||||
}
|
||||
if (_1336)
|
||||
if (_1339)
|
||||
{
|
||||
int backdrop = (p1.y < p0.y) ? 1 : (-1);
|
||||
TileRef param_42 = path.tiles;
|
||||
uint param_43 = uint(base + xbackdrop);
|
||||
TileRef tile_ref = Tile_index(param_42, param_43);
|
||||
TileRef param_48 = path.tiles;
|
||||
uint param_49 = uint(base + xbackdrop);
|
||||
TileRef tile_ref = Tile_index(param_48, param_49);
|
||||
uint tile_el = tile_ref.offset >> uint(2);
|
||||
Alloc param_44 = path_alloc;
|
||||
uint param_45 = tile_el + 1u;
|
||||
if (touch_mem(param_44, param_45))
|
||||
{
|
||||
uint _1374;
|
||||
_136.InterlockedAdd((tile_el + 1u) * 4 + 8, uint(backdrop), _1374);
|
||||
}
|
||||
uint _1369;
|
||||
_143.InterlockedAdd((tile_el + 1u) * 4 + 12, uint(backdrop), _1369);
|
||||
}
|
||||
int next_xray = last_xray;
|
||||
if (y < (y1 - 1))
|
||||
|
@ -592,20 +594,15 @@ void comp_main()
|
|||
for (int x = xx0; x < xx1; x++)
|
||||
{
|
||||
float tile_x0 = float(x * 16);
|
||||
TileRef _1454 = { path.tiles.offset };
|
||||
TileRef param_46 = _1454;
|
||||
uint param_47 = uint(base + x);
|
||||
TileRef tile_ref_1 = Tile_index(param_46, param_47);
|
||||
TileRef _1449 = { path.tiles.offset };
|
||||
TileRef param_50 = _1449;
|
||||
uint param_51 = uint(base + x);
|
||||
TileRef tile_ref_1 = Tile_index(param_50, param_51);
|
||||
uint tile_el_1 = tile_ref_1.offset >> uint(2);
|
||||
uint old = 0u;
|
||||
Alloc param_48 = path_alloc;
|
||||
uint param_49 = tile_el_1;
|
||||
if (touch_mem(param_48, param_49))
|
||||
{
|
||||
uint _1477;
|
||||
_136.InterlockedExchange(tile_el_1 * 4 + 8, tile_offset, _1477);
|
||||
old = _1477;
|
||||
}
|
||||
uint _1465;
|
||||
_143.InterlockedExchange(tile_el_1 * 4 + 12, tile_offset, _1465);
|
||||
old = _1465;
|
||||
tile_seg.origin = p0;
|
||||
tile_seg._vector = p1 - p0;
|
||||
float y_edge = 0.0f;
|
||||
|
@ -636,11 +633,14 @@ void comp_main()
|
|||
}
|
||||
tile_seg.y_edge = y_edge;
|
||||
tile_seg.next.offset = old;
|
||||
TileSegRef _1559 = { tile_offset };
|
||||
Alloc param_50 = tile_alloc.alloc;
|
||||
TileSegRef param_51 = _1559;
|
||||
TileSeg param_52 = tile_seg;
|
||||
TileSeg_write(param_50, param_51, param_52);
|
||||
if (mem_ok)
|
||||
{
|
||||
TileSegRef _1550 = { tile_offset };
|
||||
Alloc param_52 = tile_alloc;
|
||||
TileSegRef param_53 = _1550;
|
||||
TileSeg param_54 = tile_seg;
|
||||
TileSeg_write(param_52, param_53, param_54);
|
||||
}
|
||||
tile_offset += 24u;
|
||||
}
|
||||
xc += b;
|
||||
|
|
331
piet-gpu/shader/gen/path_coarse.msl
generated
331
piet-gpu/shader/gen/path_coarse.msl
generated
|
@ -51,12 +51,6 @@ struct Alloc
|
|||
uint offset;
|
||||
};
|
||||
|
||||
struct MallocResult
|
||||
{
|
||||
Alloc alloc;
|
||||
bool failed;
|
||||
};
|
||||
|
||||
struct PathCubicRef
|
||||
{
|
||||
uint offset;
|
||||
|
@ -124,6 +118,7 @@ struct Memory
|
|||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint blend_offset;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
|
@ -134,6 +129,7 @@ struct Alloc_1
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -170,6 +166,13 @@ struct ConfigBuf
|
|||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(32u, 1u, 1u);
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
bool check_deps(thread const uint& dep_stage, device Memory& v_143)
|
||||
{
|
||||
uint _149 = atomic_fetch_or_explicit((device atomic_uint*)&v_143.mem_error, 0u, memory_order_relaxed);
|
||||
return (_149 & dep_stage) == 0u;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
|
||||
{
|
||||
|
@ -177,7 +180,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_136, constant uint& v_136BufferSize)
|
||||
uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_143)
|
||||
{
|
||||
Alloc param = alloc;
|
||||
uint param_1 = offset;
|
||||
|
@ -185,59 +188,59 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor
|
|||
{
|
||||
return 0u;
|
||||
}
|
||||
uint v = v_136.memory[offset];
|
||||
uint v = v_143.memory[offset];
|
||||
return v;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
PathSegTag PathSeg_tag(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_136, constant uint& v_136BufferSize)
|
||||
PathSegTag PathSeg_tag(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_143)
|
||||
{
|
||||
Alloc param = a;
|
||||
uint param_1 = ref.offset >> uint(2);
|
||||
uint tag_and_flags = read_mem(param, param_1, v_136, v_136BufferSize);
|
||||
uint tag_and_flags = read_mem(param, param_1, v_143);
|
||||
return PathSegTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) };
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
PathCubic PathCubic_read(thread const Alloc& a, thread const PathCubicRef& ref, device Memory& v_136, constant uint& v_136BufferSize)
|
||||
PathCubic PathCubic_read(thread const Alloc& a, thread const PathCubicRef& ref, device Memory& v_143)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint raw0 = read_mem(param, param_1, v_136, v_136BufferSize);
|
||||
uint raw0 = read_mem(param, param_1, v_143);
|
||||
Alloc param_2 = a;
|
||||
uint param_3 = ix + 1u;
|
||||
uint raw1 = read_mem(param_2, param_3, v_136, v_136BufferSize);
|
||||
uint raw1 = read_mem(param_2, param_3, v_143);
|
||||
Alloc param_4 = a;
|
||||
uint param_5 = ix + 2u;
|
||||
uint raw2 = read_mem(param_4, param_5, v_136, v_136BufferSize);
|
||||
uint raw2 = read_mem(param_4, param_5, v_143);
|
||||
Alloc param_6 = a;
|
||||
uint param_7 = ix + 3u;
|
||||
uint raw3 = read_mem(param_6, param_7, v_136, v_136BufferSize);
|
||||
uint raw3 = read_mem(param_6, param_7, v_143);
|
||||
Alloc param_8 = a;
|
||||
uint param_9 = ix + 4u;
|
||||
uint raw4 = read_mem(param_8, param_9, v_136, v_136BufferSize);
|
||||
uint raw4 = read_mem(param_8, param_9, v_143);
|
||||
Alloc param_10 = a;
|
||||
uint param_11 = ix + 5u;
|
||||
uint raw5 = read_mem(param_10, param_11, v_136, v_136BufferSize);
|
||||
uint raw5 = read_mem(param_10, param_11, v_143);
|
||||
Alloc param_12 = a;
|
||||
uint param_13 = ix + 6u;
|
||||
uint raw6 = read_mem(param_12, param_13, v_136, v_136BufferSize);
|
||||
uint raw6 = read_mem(param_12, param_13, v_143);
|
||||
Alloc param_14 = a;
|
||||
uint param_15 = ix + 7u;
|
||||
uint raw7 = read_mem(param_14, param_15, v_136, v_136BufferSize);
|
||||
uint raw7 = read_mem(param_14, param_15, v_143);
|
||||
Alloc param_16 = a;
|
||||
uint param_17 = ix + 8u;
|
||||
uint raw8 = read_mem(param_16, param_17, v_136, v_136BufferSize);
|
||||
uint raw8 = read_mem(param_16, param_17, v_143);
|
||||
Alloc param_18 = a;
|
||||
uint param_19 = ix + 9u;
|
||||
uint raw9 = read_mem(param_18, param_19, v_136, v_136BufferSize);
|
||||
uint raw9 = read_mem(param_18, param_19, v_143);
|
||||
Alloc param_20 = a;
|
||||
uint param_21 = ix + 10u;
|
||||
uint raw10 = read_mem(param_20, param_21, v_136, v_136BufferSize);
|
||||
uint raw10 = read_mem(param_20, param_21, v_143);
|
||||
Alloc param_22 = a;
|
||||
uint param_23 = ix + 11u;
|
||||
uint raw11 = read_mem(param_22, param_23, v_136, v_136BufferSize);
|
||||
uint raw11 = read_mem(param_22, param_23, v_143);
|
||||
PathCubic s;
|
||||
s.p0 = float2(as_type<float>(raw0), as_type<float>(raw1));
|
||||
s.p1 = float2(as_type<float>(raw2), as_type<float>(raw3));
|
||||
|
@ -250,11 +253,11 @@ PathCubic PathCubic_read(thread const Alloc& a, thread const PathCubicRef& ref,
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
PathCubic PathSeg_Cubic_read(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_136, constant uint& v_136BufferSize)
|
||||
PathCubic PathSeg_Cubic_read(thread const Alloc& a, thread const PathSegRef& ref, device Memory& v_143)
|
||||
{
|
||||
Alloc param = a;
|
||||
PathCubicRef param_1 = PathCubicRef{ ref.offset + 4u };
|
||||
return PathCubic_read(param, param_1, v_136, v_136BufferSize);
|
||||
return PathCubic_read(param, param_1, v_143);
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
|
@ -310,18 +313,18 @@ uint fill_mode_from_flags(thread const uint& flags)
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_136, constant uint& v_136BufferSize)
|
||||
Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_143)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint raw0 = read_mem(param, param_1, v_136, v_136BufferSize);
|
||||
uint raw0 = read_mem(param, param_1, v_143);
|
||||
Alloc param_2 = a;
|
||||
uint param_3 = ix + 1u;
|
||||
uint raw1 = read_mem(param_2, param_3, v_136, v_136BufferSize);
|
||||
uint raw1 = read_mem(param_2, param_3, v_143);
|
||||
Alloc param_4 = a;
|
||||
uint param_5 = ix + 2u;
|
||||
uint raw2 = read_mem(param_4, param_5, v_136, v_136BufferSize);
|
||||
uint raw2 = read_mem(param_4, param_5, v_143);
|
||||
Path s;
|
||||
s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));
|
||||
s.tiles = TileRef{ raw2 };
|
||||
|
@ -350,22 +353,16 @@ float2 eval_quad(thread const float2& p0, thread const float2& p1, thread const
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
MallocResult malloc(thread const uint& size, device Memory& v_136, constant uint& v_136BufferSize)
|
||||
uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_143)
|
||||
{
|
||||
uint _142 = atomic_fetch_add_explicit((device atomic_uint*)&v_136.mem_offset, size, memory_order_relaxed);
|
||||
uint offset = _142;
|
||||
MallocResult r;
|
||||
r.failed = (offset + size) > uint(int((v_136BufferSize - 8) / 4) * 4);
|
||||
uint param = offset;
|
||||
uint param_1 = size;
|
||||
bool param_2 = !r.failed;
|
||||
r.alloc = new_alloc(param, param_1, param_2);
|
||||
if (r.failed)
|
||||
uint _158 = atomic_fetch_add_explicit((device atomic_uint*)&v_143.mem_offset, size, memory_order_relaxed);
|
||||
uint offset = _158;
|
||||
if ((offset + size) > mem_size)
|
||||
{
|
||||
uint _171 = atomic_fetch_max_explicit((device atomic_uint*)&v_136.mem_error, 1u, memory_order_relaxed);
|
||||
return r;
|
||||
uint _168 = atomic_fetch_or_explicit((device atomic_uint*)&v_143.mem_error, stage, memory_order_relaxed);
|
||||
offset = 0u;
|
||||
}
|
||||
return r;
|
||||
return offset;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
|
@ -375,7 +372,7 @@ TileRef Tile_index(thread const TileRef& ref, thread const uint& index)
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_136, constant uint& v_136BufferSize)
|
||||
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_143)
|
||||
{
|
||||
Alloc param = alloc;
|
||||
uint param_1 = offset;
|
||||
|
@ -383,61 +380,66 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
|
|||
{
|
||||
return;
|
||||
}
|
||||
v_136.memory[offset] = val;
|
||||
v_143.memory[offset] = val;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void TileSeg_write(thread const Alloc& a, thread const TileSegRef& ref, thread const TileSeg& s, device Memory& v_136, constant uint& v_136BufferSize)
|
||||
void TileSeg_write(thread const Alloc& a, thread const TileSegRef& ref, thread const TileSeg& s, device Memory& v_143)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint param_2 = as_type<uint>(s.origin.x);
|
||||
write_mem(param, param_1, param_2, v_136, v_136BufferSize);
|
||||
write_mem(param, param_1, param_2, v_143);
|
||||
Alloc param_3 = a;
|
||||
uint param_4 = ix + 1u;
|
||||
uint param_5 = as_type<uint>(s.origin.y);
|
||||
write_mem(param_3, param_4, param_5, v_136, v_136BufferSize);
|
||||
write_mem(param_3, param_4, param_5, v_143);
|
||||
Alloc param_6 = a;
|
||||
uint param_7 = ix + 2u;
|
||||
uint param_8 = as_type<uint>(s.vector.x);
|
||||
write_mem(param_6, param_7, param_8, v_136, v_136BufferSize);
|
||||
write_mem(param_6, param_7, param_8, v_143);
|
||||
Alloc param_9 = a;
|
||||
uint param_10 = ix + 3u;
|
||||
uint param_11 = as_type<uint>(s.vector.y);
|
||||
write_mem(param_9, param_10, param_11, v_136, v_136BufferSize);
|
||||
write_mem(param_9, param_10, param_11, v_143);
|
||||
Alloc param_12 = a;
|
||||
uint param_13 = ix + 4u;
|
||||
uint param_14 = as_type<uint>(s.y_edge);
|
||||
write_mem(param_12, param_13, param_14, v_136, v_136BufferSize);
|
||||
write_mem(param_12, param_13, param_14, v_143);
|
||||
Alloc param_15 = a;
|
||||
uint param_16 = ix + 5u;
|
||||
uint param_17 = s.next.offset;
|
||||
write_mem(param_15, param_16, param_17, v_136, v_136BufferSize);
|
||||
write_mem(param_15, param_16, param_17, v_143);
|
||||
}
|
||||
|
||||
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_136 [[buffer(0)]], const device ConfigBuf& _710 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
kernel void main0(device Memory& v_143 [[buffer(0)]], const device ConfigBuf& _711 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
constant uint& v_136BufferSize = spvBufferSizeConstants[0];
|
||||
uint element_ix = gl_GlobalInvocationID.x;
|
||||
PathSegRef ref = PathSegRef{ _710.conf.pathseg_alloc.offset + (element_ix * 52u) };
|
||||
PathSegTag tag = PathSegTag{ 0u, 0u };
|
||||
if (element_ix < _710.conf.n_pathseg)
|
||||
bool mem_ok = true;
|
||||
uint param = 7u;
|
||||
bool _694 = check_deps(param, v_143);
|
||||
if (!_694)
|
||||
{
|
||||
Alloc param;
|
||||
param.offset = _710.conf.pathseg_alloc.offset;
|
||||
PathSegRef param_1 = ref;
|
||||
tag = PathSeg_tag(param, param_1, v_136, v_136BufferSize);
|
||||
return;
|
||||
}
|
||||
uint element_ix = gl_GlobalInvocationID.x;
|
||||
PathSegRef ref = PathSegRef{ _711.conf.pathseg_alloc.offset + (element_ix * 52u) };
|
||||
PathSegTag tag = PathSegTag{ 0u, 0u };
|
||||
if (element_ix < _711.conf.n_pathseg)
|
||||
{
|
||||
Alloc param_1;
|
||||
param_1.offset = _711.conf.pathseg_alloc.offset;
|
||||
PathSegRef param_2 = ref;
|
||||
tag = PathSeg_tag(param_1, param_2, v_143);
|
||||
}
|
||||
bool mem_ok = v_136.mem_error == 0u;
|
||||
switch (tag.tag)
|
||||
{
|
||||
case 1u:
|
||||
{
|
||||
Alloc param_2;
|
||||
param_2.offset = _710.conf.pathseg_alloc.offset;
|
||||
PathSegRef param_3 = ref;
|
||||
PathCubic cubic = PathSeg_Cubic_read(param_2, param_3, v_136, v_136BufferSize);
|
||||
Alloc param_3;
|
||||
param_3.offset = _711.conf.pathseg_alloc.offset;
|
||||
PathSegRef param_4 = ref;
|
||||
PathCubic cubic = PathSeg_Cubic_read(param_3, param_4, v_143);
|
||||
float2 err_v = (((cubic.p2 - cubic.p1) * 3.0) + cubic.p0) - cubic.p3;
|
||||
float err = (err_v.x * err_v.x) + (err_v.y * err_v.y);
|
||||
uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875, 0.16666667163372039794921875))), 1u);
|
||||
|
@ -449,40 +451,40 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
for (uint i = 0u; i < n_quads; i++)
|
||||
{
|
||||
float t = float(i + 1u) * _step;
|
||||
float2 param_4 = cubic.p0;
|
||||
float2 param_5 = cubic.p1;
|
||||
float2 param_6 = cubic.p2;
|
||||
float2 param_7 = cubic.p3;
|
||||
float param_8 = t;
|
||||
float2 qp2 = eval_cubic(param_4, param_5, param_6, param_7, param_8);
|
||||
float2 param_9 = cubic.p0;
|
||||
float2 param_10 = cubic.p1;
|
||||
float2 param_11 = cubic.p2;
|
||||
float2 param_12 = cubic.p3;
|
||||
float param_13 = t - (0.5 * _step);
|
||||
float2 qp1 = eval_cubic(param_9, param_10, param_11, param_12, param_13);
|
||||
float2 param_5 = cubic.p0;
|
||||
float2 param_6 = cubic.p1;
|
||||
float2 param_7 = cubic.p2;
|
||||
float2 param_8 = cubic.p3;
|
||||
float param_9 = t;
|
||||
float2 qp2 = eval_cubic(param_5, param_6, param_7, param_8, param_9);
|
||||
float2 param_10 = cubic.p0;
|
||||
float2 param_11 = cubic.p1;
|
||||
float2 param_12 = cubic.p2;
|
||||
float2 param_13 = cubic.p3;
|
||||
float param_14 = t - (0.5 * _step);
|
||||
float2 qp1 = eval_cubic(param_10, param_11, param_12, param_13, param_14);
|
||||
qp1 = (qp1 * 2.0) - ((qp0 + qp2) * 0.5);
|
||||
float2 param_14 = qp0;
|
||||
float2 param_15 = qp1;
|
||||
float2 param_16 = qp2;
|
||||
float param_17 = 0.4743416607379913330078125;
|
||||
SubdivResult params = estimate_subdiv(param_14, param_15, param_16, param_17);
|
||||
float2 param_15 = qp0;
|
||||
float2 param_16 = qp1;
|
||||
float2 param_17 = qp2;
|
||||
float param_18 = 0.4743416607379913330078125;
|
||||
SubdivResult params = estimate_subdiv(param_15, param_16, param_17, param_18);
|
||||
keep_params[i] = params;
|
||||
val += params.val;
|
||||
qp0 = qp2;
|
||||
}
|
||||
uint n = max(uint(ceil((val * 0.5) / 0.4743416607379913330078125)), 1u);
|
||||
uint param_18 = tag.flags;
|
||||
bool is_stroke = fill_mode_from_flags(param_18) == 1u;
|
||||
uint param_19 = tag.flags;
|
||||
bool is_stroke = fill_mode_from_flags(param_19) == 1u;
|
||||
uint path_ix = cubic.path_ix;
|
||||
Alloc param_19;
|
||||
param_19.offset = _710.conf.tile_alloc.offset;
|
||||
PathRef param_20 = PathRef{ _710.conf.tile_alloc.offset + (path_ix * 12u) };
|
||||
Path path = Path_read(param_19, param_20, v_136, v_136BufferSize);
|
||||
uint param_21 = path.tiles.offset;
|
||||
uint param_22 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
|
||||
bool param_23 = mem_ok;
|
||||
Alloc path_alloc = new_alloc(param_21, param_22, param_23);
|
||||
Alloc param_20;
|
||||
param_20.offset = _711.conf.tile_alloc.offset;
|
||||
PathRef param_21 = PathRef{ _711.conf.tile_alloc.offset + (path_ix * 12u) };
|
||||
Path path = Path_read(param_20, param_21, v_143);
|
||||
uint param_22 = path.tiles.offset;
|
||||
uint param_23 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;
|
||||
bool param_24 = true;
|
||||
Alloc path_alloc = new_alloc(param_22, param_23, param_24);
|
||||
int4 bbox = int4(path.bbox);
|
||||
float2 p0 = cubic.p0;
|
||||
qp0 = cubic.p0;
|
||||
|
@ -490,44 +492,44 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
int n_out = 1;
|
||||
float val_sum = 0.0;
|
||||
float2 p1;
|
||||
float _1147;
|
||||
float _1143;
|
||||
TileSeg tile_seg;
|
||||
for (uint i_1 = 0u; i_1 < n_quads; i_1++)
|
||||
{
|
||||
float t_1 = float(i_1 + 1u) * _step;
|
||||
float2 param_24 = cubic.p0;
|
||||
float2 param_25 = cubic.p1;
|
||||
float2 param_26 = cubic.p2;
|
||||
float2 param_27 = cubic.p3;
|
||||
float param_28 = t_1;
|
||||
float2 qp2_1 = eval_cubic(param_24, param_25, param_26, param_27, param_28);
|
||||
float2 param_29 = cubic.p0;
|
||||
float2 param_30 = cubic.p1;
|
||||
float2 param_31 = cubic.p2;
|
||||
float2 param_32 = cubic.p3;
|
||||
float param_33 = t_1 - (0.5 * _step);
|
||||
float2 qp1_1 = eval_cubic(param_29, param_30, param_31, param_32, param_33);
|
||||
float2 param_25 = cubic.p0;
|
||||
float2 param_26 = cubic.p1;
|
||||
float2 param_27 = cubic.p2;
|
||||
float2 param_28 = cubic.p3;
|
||||
float param_29 = t_1;
|
||||
float2 qp2_1 = eval_cubic(param_25, param_26, param_27, param_28, param_29);
|
||||
float2 param_30 = cubic.p0;
|
||||
float2 param_31 = cubic.p1;
|
||||
float2 param_32 = cubic.p2;
|
||||
float2 param_33 = cubic.p3;
|
||||
float param_34 = t_1 - (0.5 * _step);
|
||||
float2 qp1_1 = eval_cubic(param_30, param_31, param_32, param_33, param_34);
|
||||
qp1_1 = (qp1_1 * 2.0) - ((qp0 + qp2_1) * 0.5);
|
||||
SubdivResult params_1 = keep_params[i_1];
|
||||
float param_34 = params_1.a0;
|
||||
float u0 = approx_parabola_inv_integral(param_34);
|
||||
float param_35 = params_1.a2;
|
||||
float u2 = approx_parabola_inv_integral(param_35);
|
||||
float param_35 = params_1.a0;
|
||||
float u0 = approx_parabola_inv_integral(param_35);
|
||||
float param_36 = params_1.a2;
|
||||
float u2 = approx_parabola_inv_integral(param_36);
|
||||
float uscale = 1.0 / (u2 - u0);
|
||||
float target = float(n_out) * v_step;
|
||||
for (;;)
|
||||
{
|
||||
bool _1040 = uint(n_out) == n;
|
||||
bool _1050;
|
||||
if (!_1040)
|
||||
bool _1036 = uint(n_out) == n;
|
||||
bool _1046;
|
||||
if (!_1036)
|
||||
{
|
||||
_1050 = target < (val_sum + params_1.val);
|
||||
_1046 = target < (val_sum + params_1.val);
|
||||
}
|
||||
else
|
||||
{
|
||||
_1050 = _1040;
|
||||
_1046 = _1036;
|
||||
}
|
||||
if (_1050)
|
||||
if (_1046)
|
||||
{
|
||||
if (uint(n_out) == n)
|
||||
{
|
||||
|
@ -537,14 +539,14 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
{
|
||||
float u = (target - val_sum) / params_1.val;
|
||||
float a = mix(params_1.a0, params_1.a2, u);
|
||||
float param_36 = a;
|
||||
float au = approx_parabola_inv_integral(param_36);
|
||||
float param_37 = a;
|
||||
float au = approx_parabola_inv_integral(param_37);
|
||||
float t_2 = (au - u0) * uscale;
|
||||
float2 param_37 = qp0;
|
||||
float2 param_38 = qp1_1;
|
||||
float2 param_39 = qp2_1;
|
||||
float param_40 = t_2;
|
||||
p1 = eval_quad(param_37, param_38, param_39, param_40);
|
||||
float2 param_38 = qp0;
|
||||
float2 param_39 = qp1_1;
|
||||
float2 param_40 = qp2_1;
|
||||
float param_41 = t_2;
|
||||
p1 = eval_quad(param_38, param_39, param_40, param_41);
|
||||
}
|
||||
float xmin = fast::min(p0.x, p1.x) - cubic.stroke.x;
|
||||
float xmax = fast::max(p0.x, p1.x) + cubic.stroke.x;
|
||||
|
@ -554,13 +556,13 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
float dy = p1.y - p0.y;
|
||||
if (abs(dy) < 9.999999717180685365747194737196e-10)
|
||||
{
|
||||
_1147 = 1000000000.0;
|
||||
_1143 = 1000000000.0;
|
||||
}
|
||||
else
|
||||
{
|
||||
_1147 = dx / dy;
|
||||
_1143 = dx / dy;
|
||||
}
|
||||
float invslope = _1147;
|
||||
float invslope = _1143;
|
||||
float c = (cubic.stroke.x + (abs(invslope) * (8.0 + cubic.stroke.y))) * 0.0625;
|
||||
float b = invslope;
|
||||
float a_1 = (p0.x - ((p0.y - 8.0) * b)) * 0.0625;
|
||||
|
@ -576,14 +578,20 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
int stride = bbox.z - bbox.x;
|
||||
int base = ((y0 - bbox.y) * stride) - bbox.x;
|
||||
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
|
||||
uint param_41 = n_tile_alloc * 24u;
|
||||
MallocResult _1263 = malloc(param_41, v_136, v_136BufferSize);
|
||||
MallocResult tile_alloc = _1263;
|
||||
if (tile_alloc.failed || (!mem_ok))
|
||||
uint malloc_size = n_tile_alloc * 24u;
|
||||
uint param_42 = malloc_size;
|
||||
uint param_43 = _711.conf.mem_size;
|
||||
uint param_44 = 4u;
|
||||
uint _1265 = malloc_stage(param_42, param_43, param_44, v_143);
|
||||
uint tile_offset = _1265;
|
||||
if (tile_offset == 0u)
|
||||
{
|
||||
return;
|
||||
mem_ok = false;
|
||||
}
|
||||
uint tile_offset = tile_alloc.alloc.offset;
|
||||
uint param_45 = tile_offset;
|
||||
uint param_46 = malloc_size;
|
||||
bool param_47 = true;
|
||||
Alloc tile_alloc = new_alloc(param_45, param_46, param_47);
|
||||
int xray = int(floor(p0.x * 0.0625));
|
||||
int last_xray = int(floor(p1.x * 0.0625));
|
||||
if (p0.y > p1.y)
|
||||
|
@ -596,38 +604,33 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
{
|
||||
float tile_y0 = float(y * 16);
|
||||
int xbackdrop = max((xray + 1), bbox.x);
|
||||
bool _1319 = !is_stroke;
|
||||
bool _1329;
|
||||
if (_1319)
|
||||
bool _1322 = !is_stroke;
|
||||
bool _1332;
|
||||
if (_1322)
|
||||
{
|
||||
_1329 = fast::min(p0.y, p1.y) < tile_y0;
|
||||
_1332 = fast::min(p0.y, p1.y) < tile_y0;
|
||||
}
|
||||
else
|
||||
{
|
||||
_1329 = _1319;
|
||||
_1332 = _1322;
|
||||
}
|
||||
bool _1336;
|
||||
if (_1329)
|
||||
bool _1339;
|
||||
if (_1332)
|
||||
{
|
||||
_1336 = xbackdrop < bbox.z;
|
||||
_1339 = xbackdrop < bbox.z;
|
||||
}
|
||||
else
|
||||
{
|
||||
_1336 = _1329;
|
||||
_1339 = _1332;
|
||||
}
|
||||
if (_1336)
|
||||
if (_1339)
|
||||
{
|
||||
int backdrop = (p1.y < p0.y) ? 1 : (-1);
|
||||
TileRef param_42 = path.tiles;
|
||||
uint param_43 = uint(base + xbackdrop);
|
||||
TileRef tile_ref = Tile_index(param_42, param_43);
|
||||
TileRef param_48 = path.tiles;
|
||||
uint param_49 = uint(base + xbackdrop);
|
||||
TileRef tile_ref = Tile_index(param_48, param_49);
|
||||
uint tile_el = tile_ref.offset >> uint(2);
|
||||
Alloc param_44 = path_alloc;
|
||||
uint param_45 = tile_el + 1u;
|
||||
if (touch_mem(param_44, param_45))
|
||||
{
|
||||
uint _1374 = atomic_fetch_add_explicit((device atomic_uint*)&v_136.memory[tile_el + 1u], uint(backdrop), memory_order_relaxed);
|
||||
}
|
||||
uint _1369 = atomic_fetch_add_explicit((device atomic_uint*)&v_143.memory[tile_el + 1u], uint(backdrop), memory_order_relaxed);
|
||||
}
|
||||
int next_xray = last_xray;
|
||||
if (y < (y1 - 1))
|
||||
|
@ -645,18 +648,13 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
for (int x = xx0; x < xx1; x++)
|
||||
{
|
||||
float tile_x0 = float(x * 16);
|
||||
TileRef param_46 = TileRef{ path.tiles.offset };
|
||||
uint param_47 = uint(base + x);
|
||||
TileRef tile_ref_1 = Tile_index(param_46, param_47);
|
||||
TileRef param_50 = TileRef{ path.tiles.offset };
|
||||
uint param_51 = uint(base + x);
|
||||
TileRef tile_ref_1 = Tile_index(param_50, param_51);
|
||||
uint tile_el_1 = tile_ref_1.offset >> uint(2);
|
||||
uint old = 0u;
|
||||
Alloc param_48 = path_alloc;
|
||||
uint param_49 = tile_el_1;
|
||||
if (touch_mem(param_48, param_49))
|
||||
{
|
||||
uint _1477 = atomic_exchange_explicit((device atomic_uint*)&v_136.memory[tile_el_1], tile_offset, memory_order_relaxed);
|
||||
old = _1477;
|
||||
}
|
||||
uint _1465 = atomic_exchange_explicit((device atomic_uint*)&v_143.memory[tile_el_1], tile_offset, memory_order_relaxed);
|
||||
old = _1465;
|
||||
tile_seg.origin = p0;
|
||||
tile_seg.vector = p1 - p0;
|
||||
float y_edge = 0.0;
|
||||
|
@ -687,10 +685,13 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
}
|
||||
tile_seg.y_edge = y_edge;
|
||||
tile_seg.next.offset = old;
|
||||
Alloc param_50 = tile_alloc.alloc;
|
||||
TileSegRef param_51 = TileSegRef{ tile_offset };
|
||||
TileSeg param_52 = tile_seg;
|
||||
TileSeg_write(param_50, param_51, param_52, v_136, v_136BufferSize);
|
||||
if (mem_ok)
|
||||
{
|
||||
Alloc param_52 = tile_alloc;
|
||||
TileSegRef param_53 = TileSegRef{ tile_offset };
|
||||
TileSeg param_54 = tile_seg;
|
||||
TileSeg_write(param_52, param_53, param_54, v_143);
|
||||
}
|
||||
tile_offset += 24u;
|
||||
}
|
||||
xc += b;
|
||||
|
|
BIN
piet-gpu/shader/gen/path_coarse.spv
generated
BIN
piet-gpu/shader/gen/path_coarse.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/pathseg.dxil
generated
BIN
piet-gpu/shader/gen/pathseg.dxil
generated
Binary file not shown.
115
piet-gpu/shader/gen/pathseg.hlsl
generated
115
piet-gpu/shader/gen/pathseg.hlsl
generated
|
@ -52,6 +52,7 @@ struct Monoid
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -164,7 +165,7 @@ uint read_mem(Alloc alloc, uint offset)
|
|||
{
|
||||
return 0u;
|
||||
}
|
||||
uint v = _111.Load(offset * 4 + 8);
|
||||
uint v = _111.Load(offset * 4 + 12);
|
||||
return v;
|
||||
}
|
||||
|
||||
|
@ -203,7 +204,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
|
|||
{
|
||||
return;
|
||||
}
|
||||
_111.Store(offset * 4 + 8, val);
|
||||
_111.Store(offset * 4 + 12, val);
|
||||
}
|
||||
|
||||
void PathCubic_write(Alloc a, PathCubicRef ref, PathCubic s)
|
||||
|
@ -365,7 +366,7 @@ uint round_up(float x)
|
|||
void comp_main()
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x * 4u;
|
||||
uint tag_word = _574.Load(((_639.Load(92) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
|
||||
uint tag_word = _574.Load(((_639.Load(96) >> uint(2)) + (ix >> uint(2))) * 4 + 0);
|
||||
uint param = tag_word;
|
||||
TagMonoid local_tm = reduce_tag(param);
|
||||
sh_tag[gl_LocalInvocationID.x] = local_tm;
|
||||
|
@ -404,14 +405,14 @@ void comp_main()
|
|||
TagMonoid param_4 = sh_tag[gl_LocalInvocationID.x - 1u];
|
||||
tm = combine_tag_monoid(param_3, param_4);
|
||||
}
|
||||
uint ps_ix = (_639.Load(96) >> uint(2)) + tm.pathseg_offset;
|
||||
uint lw_ix = (_639.Load(88) >> uint(2)) + tm.linewidth_ix;
|
||||
uint ps_ix = (_639.Load(100) >> uint(2)) + tm.pathseg_offset;
|
||||
uint lw_ix = (_639.Load(92) >> uint(2)) + tm.linewidth_ix;
|
||||
uint save_path_ix = tm.path_ix;
|
||||
uint trans_ix = tm.trans_ix;
|
||||
TransformSegRef _771 = { _639.Load(36) + (trans_ix * 24u) };
|
||||
TransformSegRef _771 = { _639.Load(40) + (trans_ix * 24u) };
|
||||
TransformSegRef trans_ref = _771;
|
||||
PathSegRef _781 = { _639.Load(28) + (tm.pathseg_ix * 52u) };
|
||||
PathSegRef ps_ref = _781;
|
||||
PathSegRef _780 = { _639.Load(32) + (tm.pathseg_ix * 52u) };
|
||||
PathSegRef ps_ref = _780;
|
||||
float linewidth[4];
|
||||
uint save_trans_ix[4];
|
||||
float2 p0;
|
||||
|
@ -464,9 +465,9 @@ void comp_main()
|
|||
}
|
||||
}
|
||||
}
|
||||
Alloc _877;
|
||||
_877.offset = _639.Load(36);
|
||||
param_13.offset = _877.offset;
|
||||
Alloc _876;
|
||||
_876.offset = _639.Load(40);
|
||||
param_13.offset = _876.offset;
|
||||
TransformSegRef param_14 = trans_ref;
|
||||
TransformSeg transform = TransformSeg_read(param_13, param_14);
|
||||
p0 = ((transform.mat.xy * p0.x) + (transform.mat.zw * p0.y)) + transform.translate;
|
||||
|
@ -475,25 +476,25 @@ void comp_main()
|
|||
if (seg_type >= 2u)
|
||||
{
|
||||
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
|
||||
float4 _947 = bbox;
|
||||
float2 _950 = min(_947.xy, p2);
|
||||
bbox.x = _950.x;
|
||||
bbox.y = _950.y;
|
||||
float4 _955 = bbox;
|
||||
float2 _958 = max(_955.zw, p2);
|
||||
bbox.z = _958.x;
|
||||
bbox.w = _958.y;
|
||||
float4 _946 = bbox;
|
||||
float2 _949 = min(_946.xy, p2);
|
||||
bbox.x = _949.x;
|
||||
bbox.y = _949.y;
|
||||
float4 _954 = bbox;
|
||||
float2 _957 = max(_954.zw, p2);
|
||||
bbox.z = _957.x;
|
||||
bbox.w = _957.y;
|
||||
if (seg_type == 3u)
|
||||
{
|
||||
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
|
||||
float4 _983 = bbox;
|
||||
float2 _986 = min(_983.xy, p3);
|
||||
bbox.x = _986.x;
|
||||
bbox.y = _986.y;
|
||||
float4 _991 = bbox;
|
||||
float2 _994 = max(_991.zw, p3);
|
||||
bbox.z = _994.x;
|
||||
bbox.w = _994.y;
|
||||
float4 _982 = bbox;
|
||||
float2 _985 = min(_982.xy, p3);
|
||||
bbox.x = _985.x;
|
||||
bbox.y = _985.y;
|
||||
float4 _990 = bbox;
|
||||
float2 _993 = max(_990.zw, p3);
|
||||
bbox.z = _993.x;
|
||||
bbox.w = _993.y;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -524,9 +525,9 @@ void comp_main()
|
|||
cubic.trans_ix = (gl_GlobalInvocationID.x * 4u) + i_1;
|
||||
cubic.stroke = stroke;
|
||||
uint fill_mode = uint(linewidth[i_1] >= 0.0f);
|
||||
Alloc _1089;
|
||||
_1089.offset = _639.Load(28);
|
||||
param_15.offset = _1089.offset;
|
||||
Alloc _1088;
|
||||
_1088.offset = _639.Load(32);
|
||||
param_15.offset = _1088.offset;
|
||||
PathSegRef param_16 = ps_ref;
|
||||
uint param_17 = fill_mode;
|
||||
PathCubic param_18 = cubic;
|
||||
|
@ -571,7 +572,7 @@ void comp_main()
|
|||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
uint path_ix = save_path_ix;
|
||||
uint bbox_out_ix = (_639.Load(40) >> uint(2)) + (path_ix * 6u);
|
||||
uint bbox_out_ix = (_639.Load(44) >> uint(2)) + (path_ix * 6u);
|
||||
Monoid row = monoid_identity();
|
||||
if (gl_LocalInvocationID.x > 0u)
|
||||
{
|
||||
|
@ -583,24 +584,24 @@ void comp_main()
|
|||
Monoid param_24 = local[i_4];
|
||||
Monoid m = combine_monoid(param_23, param_24);
|
||||
bool do_atomic = false;
|
||||
bool _1264 = i_4 == 3u;
|
||||
bool _1270;
|
||||
if (_1264)
|
||||
bool _1263 = i_4 == 3u;
|
||||
bool _1269;
|
||||
if (_1263)
|
||||
{
|
||||
_1270 = gl_LocalInvocationID.x == 255u;
|
||||
_1269 = gl_LocalInvocationID.x == 255u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_1270 = _1264;
|
||||
_1269 = _1263;
|
||||
}
|
||||
if (_1270)
|
||||
if (_1269)
|
||||
{
|
||||
do_atomic = true;
|
||||
}
|
||||
if ((m.flags & 1u) != 0u)
|
||||
{
|
||||
_111.Store((bbox_out_ix + 4u) * 4 + 8, asuint(linewidth[i_4]));
|
||||
_111.Store((bbox_out_ix + 5u) * 4 + 8, save_trans_ix[i_4]);
|
||||
_111.Store((bbox_out_ix + 4u) * 4 + 12, asuint(linewidth[i_4]));
|
||||
_111.Store((bbox_out_ix + 5u) * 4 + 12, save_trans_ix[i_4]);
|
||||
if ((m.flags & 2u) == 0u)
|
||||
{
|
||||
do_atomic = true;
|
||||
|
@ -608,43 +609,43 @@ void comp_main()
|
|||
else
|
||||
{
|
||||
float param_25 = m.bbox.x;
|
||||
_111.Store(bbox_out_ix * 4 + 8, round_down(param_25));
|
||||
_111.Store(bbox_out_ix * 4 + 12, round_down(param_25));
|
||||
float param_26 = m.bbox.y;
|
||||
_111.Store((bbox_out_ix + 1u) * 4 + 8, round_down(param_26));
|
||||
_111.Store((bbox_out_ix + 1u) * 4 + 12, round_down(param_26));
|
||||
float param_27 = m.bbox.z;
|
||||
_111.Store((bbox_out_ix + 2u) * 4 + 8, round_up(param_27));
|
||||
_111.Store((bbox_out_ix + 2u) * 4 + 12, round_up(param_27));
|
||||
float param_28 = m.bbox.w;
|
||||
_111.Store((bbox_out_ix + 3u) * 4 + 8, round_up(param_28));
|
||||
_111.Store((bbox_out_ix + 3u) * 4 + 12, round_up(param_28));
|
||||
bbox_out_ix += 6u;
|
||||
do_atomic = false;
|
||||
}
|
||||
}
|
||||
if (do_atomic)
|
||||
{
|
||||
bool _1335 = m.bbox.z > m.bbox.x;
|
||||
bool _1344;
|
||||
if (!_1335)
|
||||
bool _1334 = m.bbox.z > m.bbox.x;
|
||||
bool _1343;
|
||||
if (!_1334)
|
||||
{
|
||||
_1344 = m.bbox.w > m.bbox.y;
|
||||
_1343 = m.bbox.w > m.bbox.y;
|
||||
}
|
||||
else
|
||||
{
|
||||
_1344 = _1335;
|
||||
_1343 = _1334;
|
||||
}
|
||||
if (_1344)
|
||||
if (_1343)
|
||||
{
|
||||
float param_29 = m.bbox.x;
|
||||
uint _1353;
|
||||
_111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1353);
|
||||
uint _1352;
|
||||
_111.InterlockedMin(bbox_out_ix * 4 + 12, round_down(param_29), _1352);
|
||||
float param_30 = m.bbox.y;
|
||||
uint _1361;
|
||||
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1361);
|
||||
uint _1360;
|
||||
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 12, round_down(param_30), _1360);
|
||||
float param_31 = m.bbox.z;
|
||||
uint _1369;
|
||||
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1369);
|
||||
uint _1368;
|
||||
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 12, round_up(param_31), _1368);
|
||||
float param_32 = m.bbox.w;
|
||||
uint _1377;
|
||||
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1377);
|
||||
uint _1376;
|
||||
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 12, round_up(param_32), _1376);
|
||||
}
|
||||
bbox_out_ix += 6u;
|
||||
}
|
||||
|
|
66
piet-gpu/shader/gen/pathseg.msl
generated
66
piet-gpu/shader/gen/pathseg.msl
generated
|
@ -102,6 +102,7 @@ struct Memory
|
|||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint blend_offset;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
|
@ -117,6 +118,7 @@ struct Alloc_1
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -545,25 +547,25 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
|
|||
if (seg_type >= 2u)
|
||||
{
|
||||
p2 = ((transform.mat.xy * p2.x) + (transform.mat.zw * p2.y)) + transform.translate;
|
||||
float4 _947 = bbox;
|
||||
float2 _950 = fast::min(_947.xy, p2);
|
||||
bbox.x = _950.x;
|
||||
bbox.y = _950.y;
|
||||
float4 _955 = bbox;
|
||||
float2 _958 = fast::max(_955.zw, p2);
|
||||
bbox.z = _958.x;
|
||||
bbox.w = _958.y;
|
||||
float4 _946 = bbox;
|
||||
float2 _949 = fast::min(_946.xy, p2);
|
||||
bbox.x = _949.x;
|
||||
bbox.y = _949.y;
|
||||
float4 _954 = bbox;
|
||||
float2 _957 = fast::max(_954.zw, p2);
|
||||
bbox.z = _957.x;
|
||||
bbox.w = _957.y;
|
||||
if (seg_type == 3u)
|
||||
{
|
||||
p3 = ((transform.mat.xy * p3.x) + (transform.mat.zw * p3.y)) + transform.translate;
|
||||
float4 _983 = bbox;
|
||||
float2 _986 = fast::min(_983.xy, p3);
|
||||
bbox.x = _986.x;
|
||||
bbox.y = _986.y;
|
||||
float4 _991 = bbox;
|
||||
float2 _994 = fast::max(_991.zw, p3);
|
||||
bbox.z = _994.x;
|
||||
bbox.w = _994.y;
|
||||
float4 _982 = bbox;
|
||||
float2 _985 = fast::min(_982.xy, p3);
|
||||
bbox.x = _985.x;
|
||||
bbox.y = _985.y;
|
||||
float4 _990 = bbox;
|
||||
float2 _993 = fast::max(_990.zw, p3);
|
||||
bbox.z = _993.x;
|
||||
bbox.w = _993.y;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -651,17 +653,17 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
|
|||
Monoid param_24 = local[i_4];
|
||||
Monoid m = combine_monoid(param_23, param_24);
|
||||
bool do_atomic = false;
|
||||
bool _1264 = i_4 == 3u;
|
||||
bool _1270;
|
||||
if (_1264)
|
||||
bool _1263 = i_4 == 3u;
|
||||
bool _1269;
|
||||
if (_1263)
|
||||
{
|
||||
_1270 = gl_LocalInvocationID.x == 255u;
|
||||
_1269 = gl_LocalInvocationID.x == 255u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_1270 = _1264;
|
||||
_1269 = _1263;
|
||||
}
|
||||
if (_1270)
|
||||
if (_1269)
|
||||
{
|
||||
do_atomic = true;
|
||||
}
|
||||
|
@ -689,26 +691,26 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
|
|||
}
|
||||
if (do_atomic)
|
||||
{
|
||||
bool _1335 = m.bbox.z > m.bbox.x;
|
||||
bool _1344;
|
||||
if (!_1335)
|
||||
bool _1334 = m.bbox.z > m.bbox.x;
|
||||
bool _1343;
|
||||
if (!_1334)
|
||||
{
|
||||
_1344 = m.bbox.w > m.bbox.y;
|
||||
_1343 = m.bbox.w > m.bbox.y;
|
||||
}
|
||||
else
|
||||
{
|
||||
_1344 = _1335;
|
||||
_1343 = _1334;
|
||||
}
|
||||
if (_1344)
|
||||
if (_1343)
|
||||
{
|
||||
float param_29 = m.bbox.x;
|
||||
uint _1353 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
|
||||
uint _1352 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
|
||||
float param_30 = m.bbox.y;
|
||||
uint _1361 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
|
||||
uint _1360 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
|
||||
float param_31 = m.bbox.z;
|
||||
uint _1369 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
|
||||
uint _1368 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
|
||||
float param_32 = m.bbox.w;
|
||||
uint _1377 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
|
||||
uint _1376 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
|
||||
}
|
||||
bbox_out_ix += 6u;
|
||||
}
|
||||
|
|
BIN
piet-gpu/shader/gen/pathseg.spv
generated
BIN
piet-gpu/shader/gen/pathseg.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/pathtag_reduce.dxil
generated
BIN
piet-gpu/shader/gen/pathtag_reduce.dxil
generated
Binary file not shown.
3
piet-gpu/shader/gen/pathtag_reduce.hlsl
generated
3
piet-gpu/shader/gen/pathtag_reduce.hlsl
generated
|
@ -14,6 +14,7 @@ struct Alloc
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -92,7 +93,7 @@ TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
|
|||
void comp_main()
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x * 2u;
|
||||
uint scene_ix = (_139.Load(92) >> uint(2)) + ix;
|
||||
uint scene_ix = (_139.Load(96) >> uint(2)) + ix;
|
||||
uint tag_word = _151.Load(scene_ix * 4 + 0);
|
||||
uint param = tag_word;
|
||||
TagMonoid agg = reduce_tag(param);
|
||||
|
|
2
piet-gpu/shader/gen/pathtag_reduce.msl
generated
2
piet-gpu/shader/gen/pathtag_reduce.msl
generated
|
@ -21,6 +21,7 @@ struct Alloc
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -78,6 +79,7 @@ struct Memory
|
|||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint blend_offset;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
|
|
BIN
piet-gpu/shader/gen/pathtag_reduce.spv
generated
BIN
piet-gpu/shader/gen/pathtag_reduce.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/tile_alloc.dxil
generated
BIN
piet-gpu/shader/gen/tile_alloc.dxil
generated
Binary file not shown.
154
piet-gpu/shader/gen/tile_alloc.hlsl
generated
154
piet-gpu/shader/gen/tile_alloc.hlsl
generated
|
@ -3,12 +3,6 @@ struct Alloc
|
|||
uint offset;
|
||||
};
|
||||
|
||||
struct MallocResult
|
||||
{
|
||||
Alloc alloc;
|
||||
bool failed;
|
||||
};
|
||||
|
||||
struct PathRef
|
||||
{
|
||||
uint offset;
|
||||
|
@ -27,6 +21,7 @@ struct Path
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -58,9 +53,9 @@ struct Config
|
|||
|
||||
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||
|
||||
RWByteAddressBuffer _70 : register(u0, space0);
|
||||
ByteAddressBuffer _181 : register(t1, space0);
|
||||
ByteAddressBuffer _257 : register(t2, space0);
|
||||
RWByteAddressBuffer _53 : register(u0, space0);
|
||||
ByteAddressBuffer _148 : register(t1, space0);
|
||||
ByteAddressBuffer _232 : register(t2, space0);
|
||||
|
||||
static uint3 gl_LocalInvocationID;
|
||||
static uint3 gl_GlobalInvocationID;
|
||||
|
@ -71,53 +66,38 @@ struct SPIRV_Cross_Input
|
|||
};
|
||||
|
||||
groupshared uint sh_tile_count[256];
|
||||
groupshared MallocResult sh_tile_alloc;
|
||||
groupshared uint sh_tile_offset;
|
||||
|
||||
bool check_deps(uint dep_stage)
|
||||
{
|
||||
uint _60;
|
||||
_53.InterlockedOr(4, 0u, _60);
|
||||
return (_60 & dep_stage) == 0u;
|
||||
}
|
||||
|
||||
float4 load_draw_bbox(uint draw_ix)
|
||||
{
|
||||
uint base = (_181.Load(64) >> uint(2)) + (4u * draw_ix);
|
||||
float x0 = asfloat(_70.Load(base * 4 + 8));
|
||||
float y0 = asfloat(_70.Load((base + 1u) * 4 + 8));
|
||||
float x1 = asfloat(_70.Load((base + 2u) * 4 + 8));
|
||||
float y1 = asfloat(_70.Load((base + 3u) * 4 + 8));
|
||||
uint base = (_148.Load(68) >> uint(2)) + (4u * draw_ix);
|
||||
float x0 = asfloat(_53.Load(base * 4 + 12));
|
||||
float y0 = asfloat(_53.Load((base + 1u) * 4 + 12));
|
||||
float x1 = asfloat(_53.Load((base + 2u) * 4 + 12));
|
||||
float y1 = asfloat(_53.Load((base + 3u) * 4 + 12));
|
||||
float4 bbox = float4(x0, y0, x1, y1);
|
||||
return bbox;
|
||||
}
|
||||
|
||||
Alloc new_alloc(uint offset, uint size, bool mem_ok)
|
||||
uint malloc_stage(uint size, uint mem_size, uint stage)
|
||||
{
|
||||
Alloc a;
|
||||
a.offset = offset;
|
||||
return a;
|
||||
}
|
||||
|
||||
MallocResult malloc(uint size)
|
||||
{
|
||||
uint _76;
|
||||
_70.InterlockedAdd(0, size, _76);
|
||||
uint offset = _76;
|
||||
uint _83;
|
||||
_70.GetDimensions(_83);
|
||||
_83 = (_83 - 8) / 4;
|
||||
MallocResult r;
|
||||
r.failed = (offset + size) > uint(int(_83) * 4);
|
||||
uint param = offset;
|
||||
uint param_1 = size;
|
||||
bool param_2 = !r.failed;
|
||||
r.alloc = new_alloc(param, param_1, param_2);
|
||||
if (r.failed)
|
||||
uint _70;
|
||||
_53.InterlockedAdd(0, size, _70);
|
||||
uint offset = _70;
|
||||
if ((offset + size) > mem_size)
|
||||
{
|
||||
uint _105;
|
||||
_70.InterlockedMax(4, 1u, _105);
|
||||
return r;
|
||||
uint _80;
|
||||
_53.InterlockedOr(4, stage, _80);
|
||||
offset = 0u;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
Alloc slice_mem(Alloc a, uint offset, uint size)
|
||||
{
|
||||
Alloc _131 = { a.offset + offset };
|
||||
return _131;
|
||||
return offset;
|
||||
}
|
||||
|
||||
bool touch_mem(Alloc alloc, uint offset)
|
||||
|
@ -133,7 +113,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
|
|||
{
|
||||
return;
|
||||
}
|
||||
_70.Store(offset * 4 + 8, val);
|
||||
_53.Store(offset * 4 + 12, val);
|
||||
}
|
||||
|
||||
void Path_write(Alloc a, PathRef ref, Path s)
|
||||
|
@ -155,15 +135,21 @@ void Path_write(Alloc a, PathRef ref, Path s)
|
|||
|
||||
void comp_main()
|
||||
{
|
||||
uint param = 1u;
|
||||
bool _192 = check_deps(param);
|
||||
if (!_192)
|
||||
{
|
||||
return;
|
||||
}
|
||||
uint th_ix = gl_LocalInvocationID.x;
|
||||
uint element_ix = gl_GlobalInvocationID.x;
|
||||
PathRef _241 = { _181.Load(16) + (element_ix * 12u) };
|
||||
PathRef path_ref = _241;
|
||||
uint drawtag_base = _181.Load(100) >> uint(2);
|
||||
PathRef _216 = { _148.Load(20) + (element_ix * 12u) };
|
||||
PathRef path_ref = _216;
|
||||
uint drawtag_base = _148.Load(104) >> uint(2);
|
||||
uint drawtag = 0u;
|
||||
if (element_ix < _181.Load(0))
|
||||
if (element_ix < _148.Load(4))
|
||||
{
|
||||
drawtag = _257.Load((drawtag_base + element_ix) * 4 + 0);
|
||||
drawtag = _232.Load((drawtag_base + element_ix) * 4 + 0);
|
||||
}
|
||||
int x0 = 0;
|
||||
int y0 = 0;
|
||||
|
@ -171,17 +157,17 @@ void comp_main()
|
|||
int y1 = 0;
|
||||
if ((drawtag != 0u) && (drawtag != 37u))
|
||||
{
|
||||
uint param = element_ix;
|
||||
float4 bbox = load_draw_bbox(param);
|
||||
uint param_1 = element_ix;
|
||||
float4 bbox = load_draw_bbox(param_1);
|
||||
x0 = int(floor(bbox.x * 0.0625f));
|
||||
y0 = int(floor(bbox.y * 0.0625f));
|
||||
x1 = int(ceil(bbox.z * 0.0625f));
|
||||
y1 = int(ceil(bbox.w * 0.0625f));
|
||||
}
|
||||
x0 = clamp(x0, 0, int(_181.Load(8)));
|
||||
y0 = clamp(y0, 0, int(_181.Load(12)));
|
||||
x1 = clamp(x1, 0, int(_181.Load(8)));
|
||||
y1 = clamp(y1, 0, int(_181.Load(12)));
|
||||
x0 = clamp(x0, 0, int(_148.Load(12)));
|
||||
y0 = clamp(y0, 0, int(_148.Load(16)));
|
||||
x1 = clamp(x1, 0, int(_148.Load(12)));
|
||||
y1 = clamp(y1, 0, int(_148.Load(16)));
|
||||
Path path;
|
||||
path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1));
|
||||
uint tile_count = uint((x1 - x0) * (y1 - y0));
|
||||
|
@ -199,59 +185,45 @@ void comp_main()
|
|||
}
|
||||
if (th_ix == 255u)
|
||||
{
|
||||
uint param_1 = total_tile_count * 8u;
|
||||
MallocResult _392 = malloc(param_1);
|
||||
sh_tile_alloc = _392;
|
||||
uint param_2 = total_tile_count * 8u;
|
||||
uint param_3 = _148.Load(0);
|
||||
uint param_4 = 2u;
|
||||
uint _370 = malloc_stage(param_2, param_3, param_4);
|
||||
sh_tile_offset = _370;
|
||||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
MallocResult alloc_start = sh_tile_alloc;
|
||||
bool _403;
|
||||
if (!alloc_start.failed)
|
||||
{
|
||||
_403 = _70.Load(4) != 0u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_403 = alloc_start.failed;
|
||||
}
|
||||
if (_403)
|
||||
uint offset_start = sh_tile_offset;
|
||||
if (offset_start == 0u)
|
||||
{
|
||||
return;
|
||||
}
|
||||
if (element_ix < _181.Load(0))
|
||||
if (element_ix < _148.Load(4))
|
||||
{
|
||||
uint _416;
|
||||
uint _387;
|
||||
if (th_ix > 0u)
|
||||
{
|
||||
_416 = sh_tile_count[th_ix - 1u];
|
||||
_387 = sh_tile_count[th_ix - 1u];
|
||||
}
|
||||
else
|
||||
{
|
||||
_416 = 0u;
|
||||
_387 = 0u;
|
||||
}
|
||||
uint tile_subix = _416;
|
||||
Alloc param_2 = alloc_start.alloc;
|
||||
uint param_3 = 8u * tile_subix;
|
||||
uint param_4 = 8u * tile_count;
|
||||
Alloc tiles_alloc = slice_mem(param_2, param_3, param_4);
|
||||
TileRef _438 = { tiles_alloc.offset };
|
||||
path.tiles = _438;
|
||||
Alloc _444;
|
||||
_444.offset = _181.Load(16);
|
||||
uint tile_subix = _387;
|
||||
TileRef _400 = { offset_start + (8u * tile_subix) };
|
||||
path.tiles = _400;
|
||||
Alloc _406;
|
||||
_406.offset = _148.Load(20);
|
||||
Alloc param_5;
|
||||
param_5.offset = _444.offset;
|
||||
param_5.offset = _406.offset;
|
||||
PathRef param_6 = path_ref;
|
||||
Path param_7 = path;
|
||||
Path_write(param_5, param_6, param_7);
|
||||
}
|
||||
uint total_count = sh_tile_count[255] * 2u;
|
||||
uint start_ix = alloc_start.alloc.offset >> uint(2);
|
||||
uint start_ix = offset_start >> uint(2);
|
||||
for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u)
|
||||
{
|
||||
Alloc param_8 = alloc_start.alloc;
|
||||
uint param_9 = start_ix + i_1;
|
||||
uint param_10 = 0u;
|
||||
write_mem(param_8, param_9, param_10);
|
||||
_53.Store((start_ix + i_1) * 4 + 12, 0u);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
152
piet-gpu/shader/gen/tile_alloc.msl
generated
152
piet-gpu/shader/gen/tile_alloc.msl
generated
|
@ -12,12 +12,6 @@ struct Alloc
|
|||
uint offset;
|
||||
};
|
||||
|
||||
struct MallocResult
|
||||
{
|
||||
Alloc alloc;
|
||||
bool failed;
|
||||
};
|
||||
|
||||
struct PathRef
|
||||
{
|
||||
uint offset;
|
||||
|
@ -38,6 +32,7 @@ struct Memory
|
|||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint blend_offset;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
|
@ -48,6 +43,7 @@ struct Alloc_1
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -90,48 +86,35 @@ struct SceneBuf
|
|||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
float4 load_draw_bbox(thread const uint& draw_ix, device Memory& v_70, constant uint& v_70BufferSize, const device ConfigBuf& v_181)
|
||||
bool check_deps(thread const uint& dep_stage, device Memory& v_53)
|
||||
{
|
||||
uint base = (v_181.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
|
||||
float x0 = as_type<float>(v_70.memory[base]);
|
||||
float y0 = as_type<float>(v_70.memory[base + 1u]);
|
||||
float x1 = as_type<float>(v_70.memory[base + 2u]);
|
||||
float y1 = as_type<float>(v_70.memory[base + 3u]);
|
||||
uint _60 = atomic_fetch_or_explicit((device atomic_uint*)&v_53.mem_error, 0u, memory_order_relaxed);
|
||||
return (_60 & dep_stage) == 0u;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
float4 load_draw_bbox(thread const uint& draw_ix, device Memory& v_53, const device ConfigBuf& v_148)
|
||||
{
|
||||
uint base = (v_148.conf.draw_bbox_alloc.offset >> uint(2)) + (4u * draw_ix);
|
||||
float x0 = as_type<float>(v_53.memory[base]);
|
||||
float y0 = as_type<float>(v_53.memory[base + 1u]);
|
||||
float x1 = as_type<float>(v_53.memory[base + 2u]);
|
||||
float y1 = as_type<float>(v_53.memory[base + 3u]);
|
||||
float4 bbox = float4(x0, y0, x1, y1);
|
||||
return bbox;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const bool& mem_ok)
|
||||
uint malloc_stage(thread const uint& size, thread const uint& mem_size, thread const uint& stage, device Memory& v_53)
|
||||
{
|
||||
Alloc a;
|
||||
a.offset = offset;
|
||||
return a;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
MallocResult malloc(thread const uint& size, device Memory& v_70, constant uint& v_70BufferSize)
|
||||
{
|
||||
uint _76 = atomic_fetch_add_explicit((device atomic_uint*)&v_70.mem_offset, size, memory_order_relaxed);
|
||||
uint offset = _76;
|
||||
MallocResult r;
|
||||
r.failed = (offset + size) > uint(int((v_70BufferSize - 8) / 4) * 4);
|
||||
uint param = offset;
|
||||
uint param_1 = size;
|
||||
bool param_2 = !r.failed;
|
||||
r.alloc = new_alloc(param, param_1, param_2);
|
||||
if (r.failed)
|
||||
uint _70 = atomic_fetch_add_explicit((device atomic_uint*)&v_53.mem_offset, size, memory_order_relaxed);
|
||||
uint offset = _70;
|
||||
if ((offset + size) > mem_size)
|
||||
{
|
||||
uint _105 = atomic_fetch_max_explicit((device atomic_uint*)&v_70.mem_error, 1u, memory_order_relaxed);
|
||||
return r;
|
||||
uint _80 = atomic_fetch_or_explicit((device atomic_uint*)&v_53.mem_error, stage, memory_order_relaxed);
|
||||
offset = 0u;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Alloc slice_mem(thread const Alloc& a, thread const uint& offset, thread const uint& size)
|
||||
{
|
||||
return Alloc{ a.offset + offset };
|
||||
return offset;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
|
@ -141,7 +124,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset)
|
|||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_70, constant uint& v_70BufferSize)
|
||||
void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_53)
|
||||
{
|
||||
Alloc param = alloc;
|
||||
uint param_1 = offset;
|
||||
|
@ -149,40 +132,45 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons
|
|||
{
|
||||
return;
|
||||
}
|
||||
v_70.memory[offset] = val;
|
||||
v_53.memory[offset] = val;
|
||||
}
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
void Path_write(thread const Alloc& a, thread const PathRef& ref, thread const Path& s, device Memory& v_70, constant uint& v_70BufferSize)
|
||||
void Path_write(thread const Alloc& a, thread const PathRef& ref, thread const Path& s, device Memory& v_53)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
Alloc param = a;
|
||||
uint param_1 = ix + 0u;
|
||||
uint param_2 = s.bbox.x | (s.bbox.y << uint(16));
|
||||
write_mem(param, param_1, param_2, v_70, v_70BufferSize);
|
||||
write_mem(param, param_1, param_2, v_53);
|
||||
Alloc param_3 = a;
|
||||
uint param_4 = ix + 1u;
|
||||
uint param_5 = s.bbox.z | (s.bbox.w << uint(16));
|
||||
write_mem(param_3, param_4, param_5, v_70, v_70BufferSize);
|
||||
write_mem(param_3, param_4, param_5, v_53);
|
||||
Alloc param_6 = a;
|
||||
uint param_7 = ix + 2u;
|
||||
uint param_8 = s.tiles.offset;
|
||||
write_mem(param_6, param_7, param_8, v_70, v_70BufferSize);
|
||||
write_mem(param_6, param_7, param_8, v_53);
|
||||
}
|
||||
|
||||
kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_70 [[buffer(0)]], const device ConfigBuf& v_181 [[buffer(1)]], const device SceneBuf& _257 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
kernel void main0(device Memory& v_53 [[buffer(0)]], const device ConfigBuf& v_148 [[buffer(1)]], const device SceneBuf& _232 [[buffer(2)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
threadgroup uint sh_tile_count[256];
|
||||
threadgroup MallocResult sh_tile_alloc;
|
||||
constant uint& v_70BufferSize = spvBufferSizeConstants[0];
|
||||
threadgroup uint sh_tile_offset;
|
||||
uint param = 1u;
|
||||
bool _192 = check_deps(param, v_53);
|
||||
if (!_192)
|
||||
{
|
||||
return;
|
||||
}
|
||||
uint th_ix = gl_LocalInvocationID.x;
|
||||
uint element_ix = gl_GlobalInvocationID.x;
|
||||
PathRef path_ref = PathRef{ v_181.conf.tile_alloc.offset + (element_ix * 12u) };
|
||||
uint drawtag_base = v_181.conf.drawtag_offset >> uint(2);
|
||||
PathRef path_ref = PathRef{ v_148.conf.tile_alloc.offset + (element_ix * 12u) };
|
||||
uint drawtag_base = v_148.conf.drawtag_offset >> uint(2);
|
||||
uint drawtag = 0u;
|
||||
if (element_ix < v_181.conf.n_elements)
|
||||
if (element_ix < v_148.conf.n_elements)
|
||||
{
|
||||
drawtag = _257.scene[drawtag_base + element_ix];
|
||||
drawtag = _232.scene[drawtag_base + element_ix];
|
||||
}
|
||||
int x0 = 0;
|
||||
int y0 = 0;
|
||||
|
@ -190,17 +178,17 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
int y1 = 0;
|
||||
if ((drawtag != 0u) && (drawtag != 37u))
|
||||
{
|
||||
uint param = element_ix;
|
||||
float4 bbox = load_draw_bbox(param, v_70, v_70BufferSize, v_181);
|
||||
uint param_1 = element_ix;
|
||||
float4 bbox = load_draw_bbox(param_1, v_53, v_148);
|
||||
x0 = int(floor(bbox.x * 0.0625));
|
||||
y0 = int(floor(bbox.y * 0.0625));
|
||||
x1 = int(ceil(bbox.z * 0.0625));
|
||||
y1 = int(ceil(bbox.w * 0.0625));
|
||||
}
|
||||
x0 = clamp(x0, 0, int(v_181.conf.width_in_tiles));
|
||||
y0 = clamp(y0, 0, int(v_181.conf.height_in_tiles));
|
||||
x1 = clamp(x1, 0, int(v_181.conf.width_in_tiles));
|
||||
y1 = clamp(y1, 0, int(v_181.conf.height_in_tiles));
|
||||
x0 = clamp(x0, 0, int(v_148.conf.width_in_tiles));
|
||||
y0 = clamp(y0, 0, int(v_148.conf.height_in_tiles));
|
||||
x1 = clamp(x1, 0, int(v_148.conf.width_in_tiles));
|
||||
y1 = clamp(y1, 0, int(v_148.conf.height_in_tiles));
|
||||
Path path;
|
||||
path.bbox = uint4(uint(x0), uint(y0), uint(x1), uint(y1));
|
||||
uint tile_count = uint((x1 - x0) * (y1 - y0));
|
||||
|
@ -218,56 +206,42 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
|||
}
|
||||
if (th_ix == 255u)
|
||||
{
|
||||
uint param_1 = total_tile_count * 8u;
|
||||
MallocResult _392 = malloc(param_1, v_70, v_70BufferSize);
|
||||
sh_tile_alloc = _392;
|
||||
uint param_2 = total_tile_count * 8u;
|
||||
uint param_3 = v_148.conf.mem_size;
|
||||
uint param_4 = 2u;
|
||||
uint _370 = malloc_stage(param_2, param_3, param_4, v_53);
|
||||
sh_tile_offset = _370;
|
||||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
MallocResult alloc_start = sh_tile_alloc;
|
||||
bool _403;
|
||||
if (!alloc_start.failed)
|
||||
{
|
||||
_403 = v_70.mem_error != 0u;
|
||||
}
|
||||
else
|
||||
{
|
||||
_403 = alloc_start.failed;
|
||||
}
|
||||
if (_403)
|
||||
uint offset_start = sh_tile_offset;
|
||||
if (offset_start == 0u)
|
||||
{
|
||||
return;
|
||||
}
|
||||
if (element_ix < v_181.conf.n_elements)
|
||||
if (element_ix < v_148.conf.n_elements)
|
||||
{
|
||||
uint _416;
|
||||
uint _387;
|
||||
if (th_ix > 0u)
|
||||
{
|
||||
_416 = sh_tile_count[th_ix - 1u];
|
||||
_387 = sh_tile_count[th_ix - 1u];
|
||||
}
|
||||
else
|
||||
{
|
||||
_416 = 0u;
|
||||
_387 = 0u;
|
||||
}
|
||||
uint tile_subix = _416;
|
||||
Alloc param_2 = alloc_start.alloc;
|
||||
uint param_3 = 8u * tile_subix;
|
||||
uint param_4 = 8u * tile_count;
|
||||
Alloc tiles_alloc = slice_mem(param_2, param_3, param_4);
|
||||
path.tiles = TileRef{ tiles_alloc.offset };
|
||||
uint tile_subix = _387;
|
||||
path.tiles = TileRef{ offset_start + (8u * tile_subix) };
|
||||
Alloc param_5;
|
||||
param_5.offset = v_181.conf.tile_alloc.offset;
|
||||
param_5.offset = v_148.conf.tile_alloc.offset;
|
||||
PathRef param_6 = path_ref;
|
||||
Path param_7 = path;
|
||||
Path_write(param_5, param_6, param_7, v_70, v_70BufferSize);
|
||||
Path_write(param_5, param_6, param_7, v_53);
|
||||
}
|
||||
uint total_count = sh_tile_count[255] * 2u;
|
||||
uint start_ix = alloc_start.alloc.offset >> uint(2);
|
||||
uint start_ix = offset_start >> uint(2);
|
||||
for (uint i_1 = th_ix; i_1 < total_count; i_1 += 256u)
|
||||
{
|
||||
Alloc param_8 = alloc_start.alloc;
|
||||
uint param_9 = start_ix + i_1;
|
||||
uint param_10 = 0u;
|
||||
write_mem(param_8, param_9, param_10, v_70, v_70BufferSize);
|
||||
v_53.memory[start_ix + i_1] = 0u;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
BIN
piet-gpu/shader/gen/tile_alloc.spv
generated
BIN
piet-gpu/shader/gen/tile_alloc.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/transform_leaf.dxil
generated
BIN
piet-gpu/shader/gen/transform_leaf.dxil
generated
Binary file not shown.
53
piet-gpu/shader/gen/transform_leaf.hlsl
generated
53
piet-gpu/shader/gen/transform_leaf.hlsl
generated
|
@ -27,6 +27,7 @@ struct TransformSeg
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -58,12 +59,12 @@ struct Config
|
|||
|
||||
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||
|
||||
static const Transform _224 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx };
|
||||
static const Transform _225 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx };
|
||||
|
||||
RWByteAddressBuffer _71 : register(u0, space0);
|
||||
ByteAddressBuffer _96 : register(t2, space0);
|
||||
ByteAddressBuffer _278 : register(t1, space0);
|
||||
ByteAddressBuffer _376 : register(t3, space0);
|
||||
ByteAddressBuffer _97 : register(t2, space0);
|
||||
ByteAddressBuffer _279 : register(t1, space0);
|
||||
ByteAddressBuffer _377 : register(t3, space0);
|
||||
|
||||
static uint3 gl_WorkGroupID;
|
||||
static uint3 gl_LocalInvocationID;
|
||||
|
@ -80,12 +81,12 @@ groupshared Transform sh_scratch[256];
|
|||
Transform Transform_read(TransformRef ref)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
uint raw0 = _96.Load((ix + 0u) * 4 + 0);
|
||||
uint raw1 = _96.Load((ix + 1u) * 4 + 0);
|
||||
uint raw2 = _96.Load((ix + 2u) * 4 + 0);
|
||||
uint raw3 = _96.Load((ix + 3u) * 4 + 0);
|
||||
uint raw4 = _96.Load((ix + 4u) * 4 + 0);
|
||||
uint raw5 = _96.Load((ix + 5u) * 4 + 0);
|
||||
uint raw0 = _97.Load((ix + 0u) * 4 + 0);
|
||||
uint raw1 = _97.Load((ix + 1u) * 4 + 0);
|
||||
uint raw2 = _97.Load((ix + 2u) * 4 + 0);
|
||||
uint raw3 = _97.Load((ix + 3u) * 4 + 0);
|
||||
uint raw4 = _97.Load((ix + 4u) * 4 + 0);
|
||||
uint raw5 = _97.Load((ix + 5u) * 4 + 0);
|
||||
Transform s;
|
||||
s.mat = float4(asfloat(raw0), asfloat(raw1), asfloat(raw2), asfloat(raw3));
|
||||
s.translate = float2(asfloat(raw4), asfloat(raw5));
|
||||
|
@ -108,7 +109,7 @@ Transform combine_monoid(Transform a, Transform b)
|
|||
|
||||
Transform monoid_identity()
|
||||
{
|
||||
return _224;
|
||||
return _225;
|
||||
}
|
||||
|
||||
bool touch_mem(Alloc alloc, uint offset)
|
||||
|
@ -124,7 +125,7 @@ void write_mem(Alloc alloc, uint offset, uint val)
|
|||
{
|
||||
return;
|
||||
}
|
||||
_71.Store(offset * 4 + 8, val);
|
||||
_71.Store(offset * 4 + 12, val);
|
||||
}
|
||||
|
||||
void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)
|
||||
|
@ -159,8 +160,8 @@ void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s)
|
|||
void comp_main()
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||
TransformRef _285 = { _278.Load(84) + (ix * 24u) };
|
||||
TransformRef ref = _285;
|
||||
TransformRef _286 = { _279.Load(88) + (ix * 24u) };
|
||||
TransformRef ref = _286;
|
||||
TransformRef param = ref;
|
||||
Transform agg = Transform_read(param);
|
||||
Transform local[8];
|
||||
|
@ -193,11 +194,11 @@ void comp_main()
|
|||
Transform row = monoid_identity();
|
||||
if (gl_WorkGroupID.x > 0u)
|
||||
{
|
||||
Transform _382;
|
||||
_382.mat = asfloat(_376.Load4((gl_WorkGroupID.x - 1u) * 32 + 0));
|
||||
_382.translate = asfloat(_376.Load2((gl_WorkGroupID.x - 1u) * 32 + 16));
|
||||
row.mat = _382.mat;
|
||||
row.translate = _382.translate;
|
||||
Transform _383;
|
||||
_383.mat = asfloat(_377.Load4((gl_WorkGroupID.x - 1u) * 32 + 0));
|
||||
_383.translate = asfloat(_377.Load2((gl_WorkGroupID.x - 1u) * 32 + 16));
|
||||
row.mat = _383.mat;
|
||||
row.translate = _383.translate;
|
||||
}
|
||||
if (gl_LocalInvocationID.x > 0u)
|
||||
{
|
||||
|
@ -211,13 +212,13 @@ void comp_main()
|
|||
Transform param_10 = row;
|
||||
Transform param_11 = local[i_2];
|
||||
Transform m = combine_monoid(param_10, param_11);
|
||||
TransformSeg _422 = { m.mat, m.translate };
|
||||
TransformSeg transform = _422;
|
||||
TransformSegRef _432 = { _278.Load(36) + ((ix + i_2) * 24u) };
|
||||
TransformSegRef trans_ref = _432;
|
||||
Alloc _436;
|
||||
_436.offset = _278.Load(36);
|
||||
param_12.offset = _436.offset;
|
||||
TransformSeg _423 = { m.mat, m.translate };
|
||||
TransformSeg transform = _423;
|
||||
TransformSegRef _433 = { _279.Load(40) + ((ix + i_2) * 24u) };
|
||||
TransformSegRef trans_ref = _433;
|
||||
Alloc _437;
|
||||
_437.offset = _279.Load(40);
|
||||
param_12.offset = _437.offset;
|
||||
TransformSegRef param_13 = trans_ref;
|
||||
TransformSeg param_14 = transform;
|
||||
TransformSeg_write(param_12, param_13, param_14);
|
||||
|
|
34
piet-gpu/shader/gen/transform_leaf.msl
generated
34
piet-gpu/shader/gen/transform_leaf.msl
generated
|
@ -75,6 +75,7 @@ struct Memory
|
|||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint blend_offset;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
|
@ -90,6 +91,7 @@ struct Alloc_1
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -139,15 +141,15 @@ struct ParentBuf
|
|||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||
|
||||
static inline __attribute__((always_inline))
|
||||
Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_96)
|
||||
Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_97)
|
||||
{
|
||||
uint ix = ref.offset >> uint(2);
|
||||
uint raw0 = v_96.scene[ix + 0u];
|
||||
uint raw1 = v_96.scene[ix + 1u];
|
||||
uint raw2 = v_96.scene[ix + 2u];
|
||||
uint raw3 = v_96.scene[ix + 3u];
|
||||
uint raw4 = v_96.scene[ix + 4u];
|
||||
uint raw5 = v_96.scene[ix + 5u];
|
||||
uint raw0 = v_97.scene[ix + 0u];
|
||||
uint raw1 = v_97.scene[ix + 1u];
|
||||
uint raw2 = v_97.scene[ix + 2u];
|
||||
uint raw3 = v_97.scene[ix + 3u];
|
||||
uint raw4 = v_97.scene[ix + 4u];
|
||||
uint raw5 = v_97.scene[ix + 5u];
|
||||
Transform s;
|
||||
s.mat = float4(as_type<float>(raw0), as_type<float>(raw1), as_type<float>(raw2), as_type<float>(raw3));
|
||||
s.translate = float2(as_type<float>(raw4), as_type<float>(raw5));
|
||||
|
@ -223,13 +225,13 @@ void TransformSeg_write(thread const Alloc& a, thread const TransformSegRef& ref
|
|||
write_mem(param_15, param_16, param_17, v_71);
|
||||
}
|
||||
|
||||
kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _278 [[buffer(1)]], const device SceneBuf& v_96 [[buffer(2)]], const device ParentBuf& _376 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||
kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _279 [[buffer(1)]], const device SceneBuf& v_97 [[buffer(2)]], const device ParentBuf& _377 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||
{
|
||||
threadgroup Transform sh_scratch[256];
|
||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||
TransformRef ref = TransformRef{ _278.conf.trans_offset + (ix * 24u) };
|
||||
TransformRef ref = TransformRef{ _279.conf.trans_offset + (ix * 24u) };
|
||||
TransformRef param = ref;
|
||||
Transform agg = Transform_read(param, v_96);
|
||||
Transform agg = Transform_read(param, v_97);
|
||||
spvUnsafeArray<Transform, 8> local;
|
||||
local[0] = agg;
|
||||
for (uint i = 1u; i < 8u; i++)
|
||||
|
@ -238,7 +240,7 @@ kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _27
|
|||
uint param_2 = i;
|
||||
TransformRef param_3 = Transform_index(param_1, param_2);
|
||||
Transform param_4 = agg;
|
||||
Transform param_5 = Transform_read(param_3, v_96);
|
||||
Transform param_5 = Transform_read(param_3, v_97);
|
||||
agg = combine_monoid(param_4, param_5);
|
||||
local[i] = agg;
|
||||
}
|
||||
|
@ -260,9 +262,9 @@ kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _27
|
|||
Transform row = monoid_identity();
|
||||
if (gl_WorkGroupID.x > 0u)
|
||||
{
|
||||
uint _379 = gl_WorkGroupID.x - 1u;
|
||||
row.mat = _376.parent[_379].mat;
|
||||
row.translate = _376.parent[_379].translate;
|
||||
uint _380 = gl_WorkGroupID.x - 1u;
|
||||
row.mat = _377.parent[_380].mat;
|
||||
row.translate = _377.parent[_380].translate;
|
||||
}
|
||||
if (gl_LocalInvocationID.x > 0u)
|
||||
{
|
||||
|
@ -277,8 +279,8 @@ kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _27
|
|||
Transform param_11 = local[i_2];
|
||||
Transform m = combine_monoid(param_10, param_11);
|
||||
TransformSeg transform = TransformSeg{ m.mat, m.translate };
|
||||
TransformSegRef trans_ref = TransformSegRef{ _278.conf.trans_alloc.offset + ((ix + i_2) * 24u) };
|
||||
param_12.offset = _278.conf.trans_alloc.offset;
|
||||
TransformSegRef trans_ref = TransformSegRef{ _279.conf.trans_alloc.offset + ((ix + i_2) * 24u) };
|
||||
param_12.offset = _279.conf.trans_alloc.offset;
|
||||
TransformSegRef param_13 = trans_ref;
|
||||
TransformSeg param_14 = transform;
|
||||
TransformSeg_write(param_12, param_13, param_14, v_71);
|
||||
|
|
BIN
piet-gpu/shader/gen/transform_leaf.spv
generated
BIN
piet-gpu/shader/gen/transform_leaf.spv
generated
Binary file not shown.
BIN
piet-gpu/shader/gen/transform_reduce.dxil
generated
BIN
piet-gpu/shader/gen/transform_reduce.dxil
generated
Binary file not shown.
3
piet-gpu/shader/gen/transform_reduce.hlsl
generated
3
piet-gpu/shader/gen/transform_reduce.hlsl
generated
|
@ -16,6 +16,7 @@ struct Alloc
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -96,7 +97,7 @@ Transform combine_monoid(Transform a, Transform b)
|
|||
void comp_main()
|
||||
{
|
||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||
TransformRef _168 = { _161.Load(84) + (ix * 24u) };
|
||||
TransformRef _168 = { _161.Load(88) + (ix * 24u) };
|
||||
TransformRef ref = _168;
|
||||
TransformRef param = ref;
|
||||
Transform agg = Transform_read(param);
|
||||
|
|
2
piet-gpu/shader/gen/transform_reduce.msl
generated
2
piet-gpu/shader/gen/transform_reduce.msl
generated
|
@ -28,6 +28,7 @@ struct Alloc
|
|||
|
||||
struct Config
|
||||
{
|
||||
uint mem_size;
|
||||
uint n_elements;
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
|
@ -78,6 +79,7 @@ struct Memory
|
|||
{
|
||||
uint mem_offset;
|
||||
uint mem_error;
|
||||
uint blend_offset;
|
||||
uint memory[1];
|
||||
};
|
||||
|
||||
|
|
BIN
piet-gpu/shader/gen/transform_reduce.spv
generated
BIN
piet-gpu/shader/gen/transform_reduce.spv
generated
Binary file not shown.
Loading…
Reference in a new issue