mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Adjust workgroup sizes
Make max workgroup size 256 and respect LG_WG_FACTOR. Because the monoid scans only support a height of 2, this will reduce the maximum scene complexity we can render. But it also increases compatibility. Supporting larger scans is a TODO.
This commit is contained in:
parent
75496f5e67
commit
d948126c16
|
@ -84,7 +84,7 @@ void main() {
|
||||||
if (x0 == x1) y1 = y0;
|
if (x0 == x1) y1 = y0;
|
||||||
int x = x0, y = y0;
|
int x = x0, y = y0;
|
||||||
uint my_slice = gl_LocalInvocationID.x / 32;
|
uint my_slice = gl_LocalInvocationID.x / 32;
|
||||||
uint my_mask = 1 << (gl_LocalInvocationID.x & 31);
|
uint my_mask = 1u << (gl_LocalInvocationID.x & 31);
|
||||||
while (y < y1) {
|
while (y < y1) {
|
||||||
atomicOr(bitmaps[my_slice][y * width_in_bins + x], my_mask);
|
atomicOr(bitmaps[my_slice][y * width_in_bins + x], my_mask);
|
||||||
x++;
|
x++;
|
||||||
|
|
|
@ -65,7 +65,7 @@ build gen/transform_reduce.hlsl: hlsl gen/transform_reduce.spv
|
||||||
build gen/transform_reduce.dxil: dxil gen/transform_reduce.hlsl
|
build gen/transform_reduce.dxil: dxil gen/transform_reduce.hlsl
|
||||||
build gen/transform_reduce.msl: msl gen/transform_reduce.spv
|
build gen/transform_reduce.msl: msl gen/transform_reduce.spv
|
||||||
|
|
||||||
build gen/transform_root.spv: glsl transform_scan.comp
|
build gen/transform_root.spv: glsl transform_scan.comp | setup.h
|
||||||
flags = -DROOT
|
flags = -DROOT
|
||||||
build gen/transform_root.hlsl: hlsl gen/transform_root.spv
|
build gen/transform_root.hlsl: hlsl gen/transform_root.spv
|
||||||
build gen/transform_root.dxil: dxil gen/transform_root.hlsl
|
build gen/transform_root.dxil: dxil gen/transform_root.hlsl
|
||||||
|
@ -81,7 +81,7 @@ build gen/pathtag_reduce.hlsl: hlsl gen/pathtag_reduce.spv
|
||||||
build gen/pathtag_reduce.dxil: dxil gen/pathtag_reduce.hlsl
|
build gen/pathtag_reduce.dxil: dxil gen/pathtag_reduce.hlsl
|
||||||
build gen/pathtag_reduce.msl: msl gen/pathtag_reduce.spv
|
build gen/pathtag_reduce.msl: msl gen/pathtag_reduce.spv
|
||||||
|
|
||||||
build gen/pathtag_root.spv: glsl pathtag_scan.comp | pathtag.h
|
build gen/pathtag_root.spv: glsl pathtag_scan.comp | pathtag.h setup.h
|
||||||
flags = -DROOT
|
flags = -DROOT
|
||||||
build gen/pathtag_root.hlsl: hlsl gen/pathtag_root.spv
|
build gen/pathtag_root.hlsl: hlsl gen/pathtag_root.spv
|
||||||
build gen/pathtag_root.dxil: dxil gen/pathtag_root.hlsl
|
build gen/pathtag_root.dxil: dxil gen/pathtag_root.hlsl
|
||||||
|
@ -102,7 +102,7 @@ build gen/draw_reduce.hlsl: hlsl gen/draw_reduce.spv
|
||||||
build gen/draw_reduce.dxil: dxil gen/draw_reduce.hlsl
|
build gen/draw_reduce.dxil: dxil gen/draw_reduce.hlsl
|
||||||
build gen/draw_reduce.msl: msl gen/draw_reduce.spv
|
build gen/draw_reduce.msl: msl gen/draw_reduce.spv
|
||||||
|
|
||||||
build gen/draw_root.spv: glsl draw_scan.comp | drawtag.h
|
build gen/draw_root.spv: glsl draw_scan.comp | drawtag.h setup.h
|
||||||
flags = -DROOT
|
flags = -DROOT
|
||||||
build gen/draw_root.hlsl: hlsl gen/draw_root.spv
|
build gen/draw_root.hlsl: hlsl gen/draw_root.spv
|
||||||
build gen/draw_root.dxil: dxil gen/draw_root.hlsl
|
build gen/draw_root.dxil: dxil gen/draw_root.hlsl
|
||||||
|
|
|
@ -11,7 +11,7 @@
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
#define N_ROWS 8
|
#define N_ROWS 8
|
||||||
#define LG_WG_SIZE 9
|
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
|
||||||
#define WG_SIZE (1 << LG_WG_SIZE)
|
#define WG_SIZE (1 << LG_WG_SIZE)
|
||||||
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
#define N_ROWS 8
|
#define N_ROWS 8
|
||||||
#define LG_WG_SIZE 9
|
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
|
||||||
#define WG_SIZE (1 << LG_WG_SIZE)
|
#define WG_SIZE (1 << LG_WG_SIZE)
|
||||||
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
||||||
|
|
||||||
|
|
|
@ -5,10 +5,11 @@
|
||||||
#version 450
|
#version 450
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#include "setup.h"
|
||||||
#include "drawtag.h"
|
#include "drawtag.h"
|
||||||
|
|
||||||
#define N_ROWS 8
|
#define N_ROWS 8
|
||||||
#define LG_WG_SIZE 9
|
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
|
||||||
#define WG_SIZE (1 << LG_WG_SIZE)
|
#define WG_SIZE (1 << LG_WG_SIZE)
|
||||||
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
||||||
|
|
||||||
|
|
|
@ -248,11 +248,11 @@ void comp_main()
|
||||||
int x = x0;
|
int x = x0;
|
||||||
int y = y0;
|
int y = y0;
|
||||||
uint my_slice = gl_LocalInvocationID.x / 32u;
|
uint my_slice = gl_LocalInvocationID.x / 32u;
|
||||||
uint my_mask = uint(1 << int(gl_LocalInvocationID.x & 31u));
|
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
|
||||||
while (y < y1)
|
while (y < y1)
|
||||||
{
|
{
|
||||||
uint _438;
|
uint _437;
|
||||||
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _438);
|
InterlockedOr(bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, _437);
|
||||||
x++;
|
x++;
|
||||||
if (x == x1)
|
if (x == x1)
|
||||||
{
|
{
|
||||||
|
@ -274,8 +274,8 @@ void comp_main()
|
||||||
if (element_count != 0u)
|
if (element_count != 0u)
|
||||||
{
|
{
|
||||||
uint param_7 = element_count * 4u;
|
uint param_7 = element_count * 4u;
|
||||||
MallocResult _488 = malloc(param_7);
|
MallocResult _487 = malloc(param_7);
|
||||||
MallocResult chunk = _488;
|
MallocResult chunk = _487;
|
||||||
chunk_alloc = chunk.alloc;
|
chunk_alloc = chunk.alloc;
|
||||||
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
|
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
|
||||||
if (chunk.failed)
|
if (chunk.failed)
|
||||||
|
@ -284,31 +284,31 @@ void comp_main()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
uint out_ix = (_253.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
|
uint out_ix = (_253.Load(20) >> uint(2)) + (((my_partition * 256u) + gl_LocalInvocationID.x) * 2u);
|
||||||
Alloc _517;
|
Alloc _516;
|
||||||
_517.offset = _253.Load(20);
|
_516.offset = _253.Load(20);
|
||||||
Alloc param_8;
|
Alloc param_8;
|
||||||
param_8.offset = _517.offset;
|
param_8.offset = _516.offset;
|
||||||
uint param_9 = out_ix;
|
uint param_9 = out_ix;
|
||||||
uint param_10 = element_count;
|
uint param_10 = element_count;
|
||||||
write_mem(param_8, param_9, param_10);
|
write_mem(param_8, param_9, param_10);
|
||||||
Alloc _529;
|
Alloc _528;
|
||||||
_529.offset = _253.Load(20);
|
_528.offset = _253.Load(20);
|
||||||
Alloc param_11;
|
Alloc param_11;
|
||||||
param_11.offset = _529.offset;
|
param_11.offset = _528.offset;
|
||||||
uint param_12 = out_ix + 1u;
|
uint param_12 = out_ix + 1u;
|
||||||
uint param_13 = chunk_alloc.offset;
|
uint param_13 = chunk_alloc.offset;
|
||||||
write_mem(param_11, param_12, param_13);
|
write_mem(param_11, param_12, param_13);
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
bool _544;
|
bool _543;
|
||||||
if (!sh_alloc_failed)
|
if (!sh_alloc_failed)
|
||||||
{
|
{
|
||||||
_544 = _84.Load(4) != 0u;
|
_543 = _84.Load(4) != 0u;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_544 = sh_alloc_failed;
|
_543 = sh_alloc_failed;
|
||||||
}
|
}
|
||||||
if (_544)
|
if (_543)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -327,11 +327,11 @@ void comp_main()
|
||||||
}
|
}
|
||||||
Alloc out_alloc = sh_chunk_alloc[bin_ix];
|
Alloc out_alloc = sh_chunk_alloc[bin_ix];
|
||||||
uint out_offset = out_alloc.offset + (idx * 4u);
|
uint out_offset = out_alloc.offset + (idx * 4u);
|
||||||
BinInstanceRef _606 = { out_offset };
|
BinInstanceRef _605 = { out_offset };
|
||||||
BinInstance _608 = { element_ix };
|
BinInstance _607 = { element_ix };
|
||||||
Alloc param_14 = out_alloc;
|
Alloc param_14 = out_alloc;
|
||||||
BinInstanceRef param_15 = _606;
|
BinInstanceRef param_15 = _605;
|
||||||
BinInstance param_16 = _608;
|
BinInstance param_16 = _607;
|
||||||
BinInstance_write(param_14, param_15, param_16);
|
BinInstance_write(param_14, param_15, param_16);
|
||||||
}
|
}
|
||||||
x++;
|
x++;
|
||||||
|
|
|
@ -260,10 +260,10 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
||||||
int x = x0;
|
int x = x0;
|
||||||
int y = y0;
|
int y = y0;
|
||||||
uint my_slice = gl_LocalInvocationID.x / 32u;
|
uint my_slice = gl_LocalInvocationID.x / 32u;
|
||||||
uint my_mask = uint(1 << int(gl_LocalInvocationID.x & 31u));
|
uint my_mask = 1u << (gl_LocalInvocationID.x & 31u);
|
||||||
while (y < y1)
|
while (y < y1)
|
||||||
{
|
{
|
||||||
uint _438 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
|
uint _437 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&bitmaps[my_slice][(uint(y) * width_in_bins) + uint(x)], my_mask, memory_order_relaxed);
|
||||||
x++;
|
x++;
|
||||||
if (x == x1)
|
if (x == x1)
|
||||||
{
|
{
|
||||||
|
@ -285,8 +285,8 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
||||||
if (element_count != 0u)
|
if (element_count != 0u)
|
||||||
{
|
{
|
||||||
uint param_7 = element_count * 4u;
|
uint param_7 = element_count * 4u;
|
||||||
MallocResult _488 = malloc(param_7, v_84, v_84BufferSize);
|
MallocResult _487 = malloc(param_7, v_84, v_84BufferSize);
|
||||||
MallocResult chunk = _488;
|
MallocResult chunk = _487;
|
||||||
chunk_alloc = chunk.alloc;
|
chunk_alloc = chunk.alloc;
|
||||||
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
|
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
|
||||||
if (chunk.failed)
|
if (chunk.failed)
|
||||||
|
@ -306,16 +306,16 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
||||||
uint param_13 = chunk_alloc.offset;
|
uint param_13 = chunk_alloc.offset;
|
||||||
write_mem(param_11, param_12, param_13, v_84, v_84BufferSize);
|
write_mem(param_11, param_12, param_13, v_84, v_84BufferSize);
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
bool _544;
|
bool _543;
|
||||||
if (!bool(sh_alloc_failed))
|
if (!bool(sh_alloc_failed))
|
||||||
{
|
{
|
||||||
_544 = v_84.mem_error != 0u;
|
_543 = v_84.mem_error != 0u;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_544 = bool(sh_alloc_failed);
|
_543 = bool(sh_alloc_failed);
|
||||||
}
|
}
|
||||||
if (_544)
|
if (_543)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -151,7 +151,7 @@ struct Config
|
||||||
uint pathseg_offset;
|
uint pathseg_offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
static const DrawMonoid _418 = { 0u, 0u };
|
static const DrawMonoid _418 = { 0u, 0u };
|
||||||
static const DrawMonoid _442 = { 1u, 0u };
|
static const DrawMonoid _442 = { 1u, 0u };
|
||||||
|
@ -159,8 +159,8 @@ static const DrawMonoid _444 = { 1u, 1u };
|
||||||
|
|
||||||
RWByteAddressBuffer _201 : register(u0, space0);
|
RWByteAddressBuffer _201 : register(u0, space0);
|
||||||
ByteAddressBuffer _225 : register(t2, space0);
|
ByteAddressBuffer _225 : register(t2, space0);
|
||||||
ByteAddressBuffer _1005 : register(t3, space0);
|
ByteAddressBuffer _1004 : register(t3, space0);
|
||||||
ByteAddressBuffer _1039 : register(t1, space0);
|
ByteAddressBuffer _1038 : register(t1, space0);
|
||||||
|
|
||||||
static uint3 gl_WorkGroupID;
|
static uint3 gl_WorkGroupID;
|
||||||
static uint3 gl_LocalInvocationID;
|
static uint3 gl_LocalInvocationID;
|
||||||
|
@ -172,7 +172,7 @@ struct SPIRV_Cross_Input
|
||||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||||
};
|
};
|
||||||
|
|
||||||
groupshared DrawMonoid sh_scratch[512];
|
groupshared DrawMonoid sh_scratch[256];
|
||||||
|
|
||||||
ElementTag Element_tag(ElementRef ref)
|
ElementTag Element_tag(ElementRef ref)
|
||||||
{
|
{
|
||||||
|
@ -558,7 +558,7 @@ void comp_main()
|
||||||
local[i] = agg;
|
local[i] = agg;
|
||||||
}
|
}
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||||
{
|
{
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if (gl_LocalInvocationID.x >= (1u << i_1))
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||||
|
@ -575,11 +575,11 @@ void comp_main()
|
||||||
DrawMonoid row = tag_monoid_identity();
|
DrawMonoid row = tag_monoid_identity();
|
||||||
if (gl_WorkGroupID.x > 0u)
|
if (gl_WorkGroupID.x > 0u)
|
||||||
{
|
{
|
||||||
DrawMonoid _1011;
|
DrawMonoid _1010;
|
||||||
_1011.path_ix = _1005.Load((gl_WorkGroupID.x - 1u) * 8 + 0);
|
_1010.path_ix = _1004.Load((gl_WorkGroupID.x - 1u) * 8 + 0);
|
||||||
_1011.clip_ix = _1005.Load((gl_WorkGroupID.x - 1u) * 8 + 4);
|
_1010.clip_ix = _1004.Load((gl_WorkGroupID.x - 1u) * 8 + 4);
|
||||||
row.path_ix = _1011.path_ix;
|
row.path_ix = _1010.path_ix;
|
||||||
row.clip_ix = _1011.clip_ix;
|
row.clip_ix = _1010.clip_ix;
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x > 0u)
|
if (gl_LocalInvocationID.x > 0u)
|
||||||
{
|
{
|
||||||
|
@ -588,9 +588,9 @@ void comp_main()
|
||||||
row = combine_tag_monoid(param_10, param_11);
|
row = combine_tag_monoid(param_10, param_11);
|
||||||
}
|
}
|
||||||
uint out_ix = gl_GlobalInvocationID.x * 8u;
|
uint out_ix = gl_GlobalInvocationID.x * 8u;
|
||||||
uint out_base = (_1039.Load(44) >> uint(2)) + (out_ix * 2u);
|
uint out_base = (_1038.Load(44) >> uint(2)) + (out_ix * 2u);
|
||||||
AnnotatedRef _1055 = { _1039.Load(32) + (out_ix * 40u) };
|
AnnotatedRef _1054 = { _1038.Load(32) + (out_ix * 40u) };
|
||||||
AnnotatedRef out_ref = _1055;
|
AnnotatedRef out_ref = _1054;
|
||||||
float4 mat;
|
float4 mat;
|
||||||
float2 translate;
|
float2 translate;
|
||||||
AnnoColor anno_fill;
|
AnnoColor anno_fill;
|
||||||
|
@ -617,7 +617,7 @@ void comp_main()
|
||||||
tag_word = Element_tag(param_16).tag;
|
tag_word = Element_tag(param_16).tag;
|
||||||
if (((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u))
|
if (((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u))
|
||||||
{
|
{
|
||||||
uint bbox_offset = (_1039.Load(40) >> uint(2)) + (6u * (m.path_ix - 1u));
|
uint bbox_offset = (_1038.Load(40) >> uint(2)) + (6u * (m.path_ix - 1u));
|
||||||
float bbox_l = float(_201.Load(bbox_offset * 4 + 8)) - 32768.0f;
|
float bbox_l = float(_201.Load(bbox_offset * 4 + 8)) - 32768.0f;
|
||||||
float bbox_t = float(_201.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f;
|
float bbox_t = float(_201.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f;
|
||||||
float bbox_r = float(_201.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f;
|
float bbox_r = float(_201.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f;
|
||||||
|
@ -628,7 +628,7 @@ void comp_main()
|
||||||
if ((linewidth >= 0.0f) || (tag_word == 5u))
|
if ((linewidth >= 0.0f) || (tag_word == 5u))
|
||||||
{
|
{
|
||||||
uint trans_ix = _201.Load((bbox_offset + 5u) * 4 + 8);
|
uint trans_ix = _201.Load((bbox_offset + 5u) * 4 + 8);
|
||||||
uint t = (_1039.Load(36) >> uint(2)) + (6u * trans_ix);
|
uint t = (_1038.Load(36) >> uint(2)) + (6u * trans_ix);
|
||||||
mat = asfloat(uint4(_201.Load(t * 4 + 8), _201.Load((t + 1u) * 4 + 8), _201.Load((t + 2u) * 4 + 8), _201.Load((t + 3u) * 4 + 8)));
|
mat = asfloat(uint4(_201.Load(t * 4 + 8), _201.Load((t + 1u) * 4 + 8), _201.Load((t + 2u) * 4 + 8), _201.Load((t + 3u) * 4 + 8)));
|
||||||
if (tag_word == 5u)
|
if (tag_word == 5u)
|
||||||
{
|
{
|
||||||
|
@ -649,9 +649,9 @@ void comp_main()
|
||||||
anno_fill.bbox = bbox;
|
anno_fill.bbox = bbox;
|
||||||
anno_fill.linewidth = linewidth;
|
anno_fill.linewidth = linewidth;
|
||||||
anno_fill.rgba_color = fill.rgba_color;
|
anno_fill.rgba_color = fill.rgba_color;
|
||||||
Alloc _1258;
|
Alloc _1257;
|
||||||
_1258.offset = _1039.Load(32);
|
_1257.offset = _1038.Load(32);
|
||||||
param_18.offset = _1258.offset;
|
param_18.offset = _1257.offset;
|
||||||
AnnotatedRef param_19 = out_ref;
|
AnnotatedRef param_19 = out_ref;
|
||||||
uint param_20 = fill_mode;
|
uint param_20 = fill_mode;
|
||||||
AnnoColor param_21 = anno_fill;
|
AnnoColor param_21 = anno_fill;
|
||||||
|
@ -674,9 +674,9 @@ void comp_main()
|
||||||
anno_lin.line_x = line_x;
|
anno_lin.line_x = line_x;
|
||||||
anno_lin.line_y = line_y;
|
anno_lin.line_y = line_y;
|
||||||
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y));
|
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y));
|
||||||
Alloc _1354;
|
Alloc _1353;
|
||||||
_1354.offset = _1039.Load(32);
|
_1353.offset = _1038.Load(32);
|
||||||
param_23.offset = _1354.offset;
|
param_23.offset = _1353.offset;
|
||||||
AnnotatedRef param_24 = out_ref;
|
AnnotatedRef param_24 = out_ref;
|
||||||
uint param_25 = fill_mode;
|
uint param_25 = fill_mode;
|
||||||
AnnoLinGradient param_26 = anno_lin;
|
AnnoLinGradient param_26 = anno_lin;
|
||||||
|
@ -691,9 +691,9 @@ void comp_main()
|
||||||
anno_img.linewidth = linewidth;
|
anno_img.linewidth = linewidth;
|
||||||
anno_img.index = fill_img.index;
|
anno_img.index = fill_img.index;
|
||||||
anno_img.offset = fill_img.offset;
|
anno_img.offset = fill_img.offset;
|
||||||
Alloc _1382;
|
Alloc _1381;
|
||||||
_1382.offset = _1039.Load(32);
|
_1381.offset = _1038.Load(32);
|
||||||
param_28.offset = _1382.offset;
|
param_28.offset = _1381.offset;
|
||||||
AnnotatedRef param_29 = out_ref;
|
AnnotatedRef param_29 = out_ref;
|
||||||
uint param_30 = fill_mode;
|
uint param_30 = fill_mode;
|
||||||
AnnoImage param_31 = anno_img;
|
AnnoImage param_31 = anno_img;
|
||||||
|
@ -711,7 +711,7 @@ void comp_main()
|
||||||
anno_begin_clip.bbox = begin_clip.bbox;
|
anno_begin_clip.bbox = begin_clip.bbox;
|
||||||
anno_begin_clip.linewidth = 0.0f;
|
anno_begin_clip.linewidth = 0.0f;
|
||||||
Alloc _1410;
|
Alloc _1410;
|
||||||
_1410.offset = _1039.Load(32);
|
_1410.offset = _1038.Load(32);
|
||||||
param_33.offset = _1410.offset;
|
param_33.offset = _1410.offset;
|
||||||
AnnotatedRef param_34 = out_ref;
|
AnnotatedRef param_34 = out_ref;
|
||||||
uint param_35 = 0u;
|
uint param_35 = 0u;
|
||||||
|
@ -726,7 +726,7 @@ void comp_main()
|
||||||
Clip end_clip = Element_EndClip_read(param_37);
|
Clip end_clip = Element_EndClip_read(param_37);
|
||||||
anno_end_clip.bbox = end_clip.bbox;
|
anno_end_clip.bbox = end_clip.bbox;
|
||||||
Alloc _1435;
|
Alloc _1435;
|
||||||
_1435.offset = _1039.Load(32);
|
_1435.offset = _1038.Load(32);
|
||||||
param_38.offset = _1435.offset;
|
param_38.offset = _1435.offset;
|
||||||
AnnotatedRef param_39 = out_ref;
|
AnnotatedRef param_39 = out_ref;
|
||||||
AnnoEndClip param_40 = anno_end_clip;
|
AnnoEndClip param_40 = anno_end_clip;
|
||||||
|
@ -738,7 +738,7 @@ void comp_main()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[numthreads(512, 1, 1)]
|
[numthreads(256, 1, 1)]
|
||||||
void main(SPIRV_Cross_Input stage_input)
|
void main(SPIRV_Cross_Input stage_input)
|
||||||
{
|
{
|
||||||
gl_WorkGroupID = stage_input.gl_WorkGroupID;
|
gl_WorkGroupID = stage_input.gl_WorkGroupID;
|
||||||
|
|
|
@ -230,7 +230,7 @@ struct ConfigBuf
|
||||||
Config conf;
|
Config conf;
|
||||||
};
|
};
|
||||||
|
|
||||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_225)
|
ElementTag Element_tag(thread const ElementRef& ref, const device SceneBuf& v_225)
|
||||||
|
@ -606,9 +606,9 @@ void Annotated_EndClip_write(thread const Alloc& a, thread const AnnotatedRef& r
|
||||||
AnnoEndClip_write(param_3, param_4, param_5, v_201);
|
AnnoEndClip_write(param_3, param_4, param_5, v_201);
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1039 [[buffer(1)]], const device SceneBuf& v_225 [[buffer(2)]], const device ParentBuf& _1005 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1038 [[buffer(1)]], const device SceneBuf& v_225 [[buffer(2)]], const device ParentBuf& _1004 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||||
{
|
{
|
||||||
threadgroup DrawMonoid sh_scratch[512];
|
threadgroup DrawMonoid sh_scratch[256];
|
||||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||||
ElementRef ref = ElementRef{ ix * 36u };
|
ElementRef ref = ElementRef{ ix * 36u };
|
||||||
ElementRef param = ref;
|
ElementRef param = ref;
|
||||||
|
@ -630,7 +630,7 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
||||||
local[i] = agg;
|
local[i] = agg;
|
||||||
}
|
}
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||||
{
|
{
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if (gl_LocalInvocationID.x >= (1u << i_1))
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||||
|
@ -647,9 +647,9 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
||||||
DrawMonoid row = tag_monoid_identity();
|
DrawMonoid row = tag_monoid_identity();
|
||||||
if (gl_WorkGroupID.x > 0u)
|
if (gl_WorkGroupID.x > 0u)
|
||||||
{
|
{
|
||||||
uint _1008 = gl_WorkGroupID.x - 1u;
|
uint _1007 = gl_WorkGroupID.x - 1u;
|
||||||
row.path_ix = _1005.parent[_1008].path_ix;
|
row.path_ix = _1004.parent[_1007].path_ix;
|
||||||
row.clip_ix = _1005.parent[_1008].clip_ix;
|
row.clip_ix = _1004.parent[_1007].clip_ix;
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x > 0u)
|
if (gl_LocalInvocationID.x > 0u)
|
||||||
{
|
{
|
||||||
|
@ -658,8 +658,8 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
||||||
row = combine_tag_monoid(param_10, param_11);
|
row = combine_tag_monoid(param_10, param_11);
|
||||||
}
|
}
|
||||||
uint out_ix = gl_GlobalInvocationID.x * 8u;
|
uint out_ix = gl_GlobalInvocationID.x * 8u;
|
||||||
uint out_base = (_1039.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u);
|
uint out_base = (_1038.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 2u);
|
||||||
AnnotatedRef out_ref = AnnotatedRef{ _1039.conf.anno_alloc.offset + (out_ix * 40u) };
|
AnnotatedRef out_ref = AnnotatedRef{ _1038.conf.anno_alloc.offset + (out_ix * 40u) };
|
||||||
float4 mat;
|
float4 mat;
|
||||||
float2 translate;
|
float2 translate;
|
||||||
AnnoColor anno_fill;
|
AnnoColor anno_fill;
|
||||||
|
@ -686,7 +686,7 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
||||||
tag_word = Element_tag(param_16, v_225).tag;
|
tag_word = Element_tag(param_16, v_225).tag;
|
||||||
if (((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u))
|
if (((tag_word == 4u) || (tag_word == 5u)) || (tag_word == 6u))
|
||||||
{
|
{
|
||||||
uint bbox_offset = (_1039.conf.bbox_alloc.offset >> uint(2)) + (6u * (m.path_ix - 1u));
|
uint bbox_offset = (_1038.conf.bbox_alloc.offset >> uint(2)) + (6u * (m.path_ix - 1u));
|
||||||
float bbox_l = float(v_201.memory[bbox_offset]) - 32768.0;
|
float bbox_l = float(v_201.memory[bbox_offset]) - 32768.0;
|
||||||
float bbox_t = float(v_201.memory[bbox_offset + 1u]) - 32768.0;
|
float bbox_t = float(v_201.memory[bbox_offset + 1u]) - 32768.0;
|
||||||
float bbox_r = float(v_201.memory[bbox_offset + 2u]) - 32768.0;
|
float bbox_r = float(v_201.memory[bbox_offset + 2u]) - 32768.0;
|
||||||
|
@ -697,7 +697,7 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
||||||
if ((linewidth >= 0.0) || (tag_word == 5u))
|
if ((linewidth >= 0.0) || (tag_word == 5u))
|
||||||
{
|
{
|
||||||
uint trans_ix = v_201.memory[bbox_offset + 5u];
|
uint trans_ix = v_201.memory[bbox_offset + 5u];
|
||||||
uint t = (_1039.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix);
|
uint t = (_1038.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix);
|
||||||
mat = as_type<float4>(uint4(v_201.memory[t], v_201.memory[t + 1u], v_201.memory[t + 2u], v_201.memory[t + 3u]));
|
mat = as_type<float4>(uint4(v_201.memory[t], v_201.memory[t + 1u], v_201.memory[t + 2u], v_201.memory[t + 3u]));
|
||||||
if (tag_word == 5u)
|
if (tag_word == 5u)
|
||||||
{
|
{
|
||||||
|
@ -718,7 +718,7 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
||||||
anno_fill.bbox = bbox;
|
anno_fill.bbox = bbox;
|
||||||
anno_fill.linewidth = linewidth;
|
anno_fill.linewidth = linewidth;
|
||||||
anno_fill.rgba_color = fill.rgba_color;
|
anno_fill.rgba_color = fill.rgba_color;
|
||||||
param_18.offset = _1039.conf.anno_alloc.offset;
|
param_18.offset = _1038.conf.anno_alloc.offset;
|
||||||
AnnotatedRef param_19 = out_ref;
|
AnnotatedRef param_19 = out_ref;
|
||||||
uint param_20 = fill_mode;
|
uint param_20 = fill_mode;
|
||||||
AnnoColor param_21 = anno_fill;
|
AnnoColor param_21 = anno_fill;
|
||||||
|
@ -741,7 +741,7 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
||||||
anno_lin.line_x = line_x;
|
anno_lin.line_x = line_x;
|
||||||
anno_lin.line_y = line_y;
|
anno_lin.line_y = line_y;
|
||||||
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y));
|
anno_lin.line_c = -((p0.x * line_x) + (p0.y * line_y));
|
||||||
param_23.offset = _1039.conf.anno_alloc.offset;
|
param_23.offset = _1038.conf.anno_alloc.offset;
|
||||||
AnnotatedRef param_24 = out_ref;
|
AnnotatedRef param_24 = out_ref;
|
||||||
uint param_25 = fill_mode;
|
uint param_25 = fill_mode;
|
||||||
AnnoLinGradient param_26 = anno_lin;
|
AnnoLinGradient param_26 = anno_lin;
|
||||||
|
@ -756,7 +756,7 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
||||||
anno_img.linewidth = linewidth;
|
anno_img.linewidth = linewidth;
|
||||||
anno_img.index = fill_img.index;
|
anno_img.index = fill_img.index;
|
||||||
anno_img.offset = fill_img.offset;
|
anno_img.offset = fill_img.offset;
|
||||||
param_28.offset = _1039.conf.anno_alloc.offset;
|
param_28.offset = _1038.conf.anno_alloc.offset;
|
||||||
AnnotatedRef param_29 = out_ref;
|
AnnotatedRef param_29 = out_ref;
|
||||||
uint param_30 = fill_mode;
|
uint param_30 = fill_mode;
|
||||||
AnnoImage param_31 = anno_img;
|
AnnoImage param_31 = anno_img;
|
||||||
|
@ -773,7 +773,7 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
||||||
Clip begin_clip = Element_BeginClip_read(param_32, v_225);
|
Clip begin_clip = Element_BeginClip_read(param_32, v_225);
|
||||||
anno_begin_clip.bbox = begin_clip.bbox;
|
anno_begin_clip.bbox = begin_clip.bbox;
|
||||||
anno_begin_clip.linewidth = 0.0;
|
anno_begin_clip.linewidth = 0.0;
|
||||||
param_33.offset = _1039.conf.anno_alloc.offset;
|
param_33.offset = _1038.conf.anno_alloc.offset;
|
||||||
AnnotatedRef param_34 = out_ref;
|
AnnotatedRef param_34 = out_ref;
|
||||||
uint param_35 = 0u;
|
uint param_35 = 0u;
|
||||||
AnnoBeginClip param_36 = anno_begin_clip;
|
AnnoBeginClip param_36 = anno_begin_clip;
|
||||||
|
@ -786,7 +786,7 @@ kernel void main0(device Memory& v_201 [[buffer(0)]], const device ConfigBuf& _1
|
||||||
ElementRef param_37 = this_ref;
|
ElementRef param_37 = this_ref;
|
||||||
Clip end_clip = Element_EndClip_read(param_37, v_225);
|
Clip end_clip = Element_EndClip_read(param_37, v_225);
|
||||||
anno_end_clip.bbox = end_clip.bbox;
|
anno_end_clip.bbox = end_clip.bbox;
|
||||||
param_38.offset = _1039.conf.anno_alloc.offset;
|
param_38.offset = _1038.conf.anno_alloc.offset;
|
||||||
AnnotatedRef param_39 = out_ref;
|
AnnotatedRef param_39 = out_ref;
|
||||||
AnnoEndClip param_40 = anno_end_clip;
|
AnnoEndClip param_40 = anno_end_clip;
|
||||||
Annotated_EndClip_write(param_38, param_39, param_40, v_201);
|
Annotated_EndClip_write(param_38, param_39, param_40, v_201);
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -15,7 +15,7 @@ struct DrawMonoid
|
||||||
uint clip_ix;
|
uint clip_ix;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
struct Alloc
|
struct Alloc
|
||||||
{
|
{
|
||||||
|
@ -49,9 +49,9 @@ static const DrawMonoid _89 = { 1u, 1u };
|
||||||
static const DrawMonoid _91 = { 0u, 0u };
|
static const DrawMonoid _91 = { 0u, 0u };
|
||||||
|
|
||||||
ByteAddressBuffer _46 : register(t2, space0);
|
ByteAddressBuffer _46 : register(t2, space0);
|
||||||
RWByteAddressBuffer _200 : register(u3, space0);
|
RWByteAddressBuffer _199 : register(u3, space0);
|
||||||
RWByteAddressBuffer _214 : register(u0, space0);
|
RWByteAddressBuffer _213 : register(u0, space0);
|
||||||
ByteAddressBuffer _220 : register(t1, space0);
|
ByteAddressBuffer _219 : register(t1, space0);
|
||||||
|
|
||||||
static uint3 gl_WorkGroupID;
|
static uint3 gl_WorkGroupID;
|
||||||
static uint3 gl_LocalInvocationID;
|
static uint3 gl_LocalInvocationID;
|
||||||
|
@ -63,7 +63,7 @@ struct SPIRV_Cross_Input
|
||||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||||
};
|
};
|
||||||
|
|
||||||
groupshared DrawMonoid sh_scratch[512];
|
groupshared DrawMonoid sh_scratch[256];
|
||||||
|
|
||||||
ElementTag Element_tag(ElementRef ref)
|
ElementTag Element_tag(ElementRef ref)
|
||||||
{
|
{
|
||||||
|
@ -129,10 +129,10 @@ void comp_main()
|
||||||
agg = combine_tag_monoid(param_6, param_7);
|
agg = combine_tag_monoid(param_6, param_7);
|
||||||
}
|
}
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||||
{
|
{
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u)
|
if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u)
|
||||||
{
|
{
|
||||||
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
|
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
|
||||||
DrawMonoid param_8 = agg;
|
DrawMonoid param_8 = agg;
|
||||||
|
@ -144,12 +144,12 @@ void comp_main()
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x == 0u)
|
if (gl_LocalInvocationID.x == 0u)
|
||||||
{
|
{
|
||||||
_200.Store(gl_WorkGroupID.x * 8 + 0, agg.path_ix);
|
_199.Store(gl_WorkGroupID.x * 8 + 0, agg.path_ix);
|
||||||
_200.Store(gl_WorkGroupID.x * 8 + 4, agg.clip_ix);
|
_199.Store(gl_WorkGroupID.x * 8 + 4, agg.clip_ix);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[numthreads(512, 1, 1)]
|
[numthreads(256, 1, 1)]
|
||||||
void main(SPIRV_Cross_Input stage_input)
|
void main(SPIRV_Cross_Input stage_input)
|
||||||
{
|
{
|
||||||
gl_WorkGroupID = stage_input.gl_WorkGroupID;
|
gl_WorkGroupID = stage_input.gl_WorkGroupID;
|
||||||
|
|
|
@ -45,7 +45,7 @@ struct Memory
|
||||||
uint memory[1];
|
uint memory[1];
|
||||||
};
|
};
|
||||||
|
|
||||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
struct Alloc
|
struct Alloc
|
||||||
{
|
{
|
||||||
|
@ -124,9 +124,9 @@ DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoi
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel void main0(const device SceneBuf& v_46 [[buffer(2)]], device OutBuf& _200 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
kernel void main0(const device SceneBuf& v_46 [[buffer(2)]], device OutBuf& _199 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||||
{
|
{
|
||||||
threadgroup DrawMonoid sh_scratch[512];
|
threadgroup DrawMonoid sh_scratch[256];
|
||||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||||
ElementRef ref = ElementRef{ ix * 36u };
|
ElementRef ref = ElementRef{ ix * 36u };
|
||||||
ElementRef param = ref;
|
ElementRef param = ref;
|
||||||
|
@ -145,10 +145,10 @@ kernel void main0(const device SceneBuf& v_46 [[buffer(2)]], device OutBuf& _200
|
||||||
agg = combine_tag_monoid(param_6, param_7);
|
agg = combine_tag_monoid(param_6, param_7);
|
||||||
}
|
}
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||||
{
|
{
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u)
|
if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u)
|
||||||
{
|
{
|
||||||
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
|
DrawMonoid other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
|
||||||
DrawMonoid param_8 = agg;
|
DrawMonoid param_8 = agg;
|
||||||
|
@ -160,8 +160,8 @@ kernel void main0(const device SceneBuf& v_46 [[buffer(2)]], device OutBuf& _200
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x == 0u)
|
if (gl_LocalInvocationID.x == 0u)
|
||||||
{
|
{
|
||||||
_200.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
|
_199.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
|
||||||
_200.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix;
|
_199.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -4,7 +4,7 @@ struct DrawMonoid
|
||||||
uint clip_ix;
|
uint clip_ix;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
static const DrawMonoid _18 = { 0u, 0u };
|
static const DrawMonoid _18 = { 0u, 0u };
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ struct SPIRV_Cross_Input
|
||||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||||
};
|
};
|
||||||
|
|
||||||
groupshared DrawMonoid sh_scratch[512];
|
groupshared DrawMonoid sh_scratch[256];
|
||||||
|
|
||||||
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
|
DrawMonoid combine_tag_monoid(DrawMonoid a, DrawMonoid b)
|
||||||
{
|
{
|
||||||
|
@ -55,7 +55,7 @@ void comp_main()
|
||||||
}
|
}
|
||||||
DrawMonoid agg = local[7];
|
DrawMonoid agg = local[7];
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||||
{
|
{
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if (gl_LocalInvocationID.x >= (1u << i_1))
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||||
|
@ -79,13 +79,13 @@ void comp_main()
|
||||||
DrawMonoid param_4 = row;
|
DrawMonoid param_4 = row;
|
||||||
DrawMonoid param_5 = local[i_2];
|
DrawMonoid param_5 = local[i_2];
|
||||||
DrawMonoid m = combine_tag_monoid(param_4, param_5);
|
DrawMonoid m = combine_tag_monoid(param_4, param_5);
|
||||||
uint _178 = ix + i_2;
|
uint _177 = ix + i_2;
|
||||||
_57.Store(_178 * 8 + 0, m.path_ix);
|
_57.Store(_177 * 8 + 0, m.path_ix);
|
||||||
_57.Store(_178 * 8 + 4, m.clip_ix);
|
_57.Store(_177 * 8 + 4, m.clip_ix);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[numthreads(512, 1, 1)]
|
[numthreads(256, 1, 1)]
|
||||||
void main(SPIRV_Cross_Input stage_input)
|
void main(SPIRV_Cross_Input stage_input)
|
||||||
{
|
{
|
||||||
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
|
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
|
||||||
|
|
|
@ -61,7 +61,7 @@ struct DataBuf
|
||||||
DrawMonoid_1 data[1];
|
DrawMonoid_1 data[1];
|
||||||
};
|
};
|
||||||
|
|
||||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
|
DrawMonoid combine_tag_monoid(thread const DrawMonoid& a, thread const DrawMonoid& b)
|
||||||
|
@ -80,7 +80,7 @@ DrawMonoid tag_monoid_identity()
|
||||||
|
|
||||||
kernel void main0(device DataBuf& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
kernel void main0(device DataBuf& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||||
{
|
{
|
||||||
threadgroup DrawMonoid sh_scratch[512];
|
threadgroup DrawMonoid sh_scratch[256];
|
||||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||||
spvUnsafeArray<DrawMonoid, 8> local;
|
spvUnsafeArray<DrawMonoid, 8> local;
|
||||||
local[0].path_ix = _57.data[ix].path_ix;
|
local[0].path_ix = _57.data[ix].path_ix;
|
||||||
|
@ -96,7 +96,7 @@ kernel void main0(device DataBuf& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID
|
||||||
}
|
}
|
||||||
DrawMonoid agg = local[7];
|
DrawMonoid agg = local[7];
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||||
{
|
{
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if (gl_LocalInvocationID.x >= (1u << i_1))
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||||
|
@ -120,9 +120,9 @@ kernel void main0(device DataBuf& _57 [[buffer(0)]], uint3 gl_GlobalInvocationID
|
||||||
DrawMonoid param_4 = row;
|
DrawMonoid param_4 = row;
|
||||||
DrawMonoid param_5 = local[i_2];
|
DrawMonoid param_5 = local[i_2];
|
||||||
DrawMonoid m = combine_tag_monoid(param_4, param_5);
|
DrawMonoid m = combine_tag_monoid(param_4, param_5);
|
||||||
uint _178 = ix + i_2;
|
uint _177 = ix + i_2;
|
||||||
_57.data[_178].path_ix = m.path_ix;
|
_57.data[_177].path_ix = m.path_ix;
|
||||||
_57.data[_178].clip_ix = m.clip_ix;
|
_57.data[_177].clip_ix = m.clip_ix;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -72,7 +72,7 @@ struct Config
|
||||||
uint pathseg_offset;
|
uint pathseg_offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
static const TagMonoid _135 = { 0u, 0u, 0u, 0u, 0u };
|
static const TagMonoid _135 = { 0u, 0u, 0u, 0u, 0u };
|
||||||
static const Monoid _567 = { 0.0f.xxxx, 0u };
|
static const Monoid _567 = { 0.0f.xxxx, 0u };
|
||||||
|
@ -92,8 +92,8 @@ struct SPIRV_Cross_Input
|
||||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||||
};
|
};
|
||||||
|
|
||||||
groupshared TagMonoid sh_tag[512];
|
groupshared TagMonoid sh_tag[256];
|
||||||
groupshared Monoid sh_scratch[512];
|
groupshared Monoid sh_scratch[256];
|
||||||
|
|
||||||
TagMonoid reduce_tag(uint tag_word)
|
TagMonoid reduce_tag(uint tag_word)
|
||||||
{
|
{
|
||||||
|
@ -360,7 +360,7 @@ void comp_main()
|
||||||
uint param = tag_word;
|
uint param = tag_word;
|
||||||
TagMonoid local_tm = reduce_tag(param);
|
TagMonoid local_tm = reduce_tag(param);
|
||||||
sh_tag[gl_LocalInvocationID.x] = local_tm;
|
sh_tag[gl_LocalInvocationID.x] = local_tm;
|
||||||
for (uint i = 0u; i < 9u; i++)
|
for (uint i = 0u; i < 8u; i++)
|
||||||
{
|
{
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if (gl_LocalInvocationID.x >= (1u << i))
|
if (gl_LocalInvocationID.x >= (1u << i))
|
||||||
|
@ -547,7 +547,7 @@ void comp_main()
|
||||||
local[i_2] = agg;
|
local[i_2] = agg;
|
||||||
}
|
}
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i_3 = 0u; i_3 < 9u; i_3++)
|
for (uint i_3 = 0u; i_3 < 8u; i_3++)
|
||||||
{
|
{
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if (gl_LocalInvocationID.x >= (1u << i_3))
|
if (gl_LocalInvocationID.x >= (1u << i_3))
|
||||||
|
@ -575,16 +575,16 @@ void comp_main()
|
||||||
Monoid m = combine_monoid(param_23, param_24);
|
Monoid m = combine_monoid(param_23, param_24);
|
||||||
bool do_atomic = false;
|
bool do_atomic = false;
|
||||||
bool _1263 = i_4 == 3u;
|
bool _1263 = i_4 == 3u;
|
||||||
bool _1270;
|
bool _1269;
|
||||||
if (_1263)
|
if (_1263)
|
||||||
{
|
{
|
||||||
_1270 = gl_LocalInvocationID.x == 511u;
|
_1269 = gl_LocalInvocationID.x == 255u;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_1270 = _1263;
|
_1269 = _1263;
|
||||||
}
|
}
|
||||||
if (_1270)
|
if (_1269)
|
||||||
{
|
{
|
||||||
do_atomic = true;
|
do_atomic = true;
|
||||||
}
|
}
|
||||||
|
@ -612,37 +612,37 @@ void comp_main()
|
||||||
}
|
}
|
||||||
if (do_atomic)
|
if (do_atomic)
|
||||||
{
|
{
|
||||||
bool _1335 = m.bbox.z > m.bbox.x;
|
bool _1334 = m.bbox.z > m.bbox.x;
|
||||||
bool _1344;
|
bool _1343;
|
||||||
if (!_1335)
|
if (!_1334)
|
||||||
{
|
{
|
||||||
_1344 = m.bbox.w > m.bbox.y;
|
_1343 = m.bbox.w > m.bbox.y;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_1344 = _1335;
|
_1343 = _1334;
|
||||||
}
|
}
|
||||||
if (_1344)
|
if (_1343)
|
||||||
{
|
{
|
||||||
float param_29 = m.bbox.x;
|
float param_29 = m.bbox.x;
|
||||||
uint _1353;
|
uint _1352;
|
||||||
_111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1353);
|
_111.InterlockedMin(bbox_out_ix * 4 + 8, round_down(param_29), _1352);
|
||||||
float param_30 = m.bbox.y;
|
float param_30 = m.bbox.y;
|
||||||
uint _1361;
|
uint _1360;
|
||||||
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1361);
|
_111.InterlockedMin((bbox_out_ix + 1u) * 4 + 8, round_down(param_30), _1360);
|
||||||
float param_31 = m.bbox.z;
|
float param_31 = m.bbox.z;
|
||||||
uint _1369;
|
uint _1368;
|
||||||
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1369);
|
_111.InterlockedMax((bbox_out_ix + 2u) * 4 + 8, round_up(param_31), _1368);
|
||||||
float param_32 = m.bbox.w;
|
float param_32 = m.bbox.w;
|
||||||
uint _1377;
|
uint _1376;
|
||||||
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1377);
|
_111.InterlockedMax((bbox_out_ix + 3u) * 4 + 8, round_up(param_32), _1376);
|
||||||
}
|
}
|
||||||
bbox_out_ix += 6u;
|
bbox_out_ix += 6u;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[numthreads(512, 1, 1)]
|
[numthreads(256, 1, 1)]
|
||||||
void main(SPIRV_Cross_Input stage_input)
|
void main(SPIRV_Cross_Input stage_input)
|
||||||
{
|
{
|
||||||
gl_WorkGroupID = stage_input.gl_WorkGroupID;
|
gl_WorkGroupID = stage_input.gl_WorkGroupID;
|
||||||
|
|
|
@ -156,7 +156,7 @@ struct ParentBuf
|
||||||
TagMonoid_1 parent[1];
|
TagMonoid_1 parent[1];
|
||||||
};
|
};
|
||||||
|
|
||||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
TagMonoid reduce_tag(thread const uint& tag_word)
|
TagMonoid reduce_tag(thread const uint& tag_word)
|
||||||
|
@ -432,14 +432,14 @@ uint round_up(thread const float& x)
|
||||||
|
|
||||||
kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _709 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _639 [[buffer(1)]], const device SceneBuf& v_574 [[buffer(2)]], const device ParentBuf& _709 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||||
{
|
{
|
||||||
threadgroup TagMonoid sh_tag[512];
|
threadgroup TagMonoid sh_tag[256];
|
||||||
threadgroup Monoid sh_scratch[512];
|
threadgroup Monoid sh_scratch[256];
|
||||||
uint ix = gl_GlobalInvocationID.x * 4u;
|
uint ix = gl_GlobalInvocationID.x * 4u;
|
||||||
uint tag_word = v_574.scene[(_639.conf.pathtag_offset >> uint(2)) + (ix >> uint(2))];
|
uint tag_word = v_574.scene[(_639.conf.pathtag_offset >> uint(2)) + (ix >> uint(2))];
|
||||||
uint param = tag_word;
|
uint param = tag_word;
|
||||||
TagMonoid local_tm = reduce_tag(param);
|
TagMonoid local_tm = reduce_tag(param);
|
||||||
sh_tag[gl_LocalInvocationID.x] = local_tm;
|
sh_tag[gl_LocalInvocationID.x] = local_tm;
|
||||||
for (uint i = 0u; i < 9u; i++)
|
for (uint i = 0u; i < 8u; i++)
|
||||||
{
|
{
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if (gl_LocalInvocationID.x >= (1u << i))
|
if (gl_LocalInvocationID.x >= (1u << i))
|
||||||
|
@ -615,7 +615,7 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
|
||||||
local[i_2] = agg;
|
local[i_2] = agg;
|
||||||
}
|
}
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i_3 = 0u; i_3 < 9u; i_3++)
|
for (uint i_3 = 0u; i_3 < 8u; i_3++)
|
||||||
{
|
{
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if (gl_LocalInvocationID.x >= (1u << i_3))
|
if (gl_LocalInvocationID.x >= (1u << i_3))
|
||||||
|
@ -643,16 +643,16 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
|
||||||
Monoid m = combine_monoid(param_23, param_24);
|
Monoid m = combine_monoid(param_23, param_24);
|
||||||
bool do_atomic = false;
|
bool do_atomic = false;
|
||||||
bool _1263 = i_4 == 3u;
|
bool _1263 = i_4 == 3u;
|
||||||
bool _1270;
|
bool _1269;
|
||||||
if (_1263)
|
if (_1263)
|
||||||
{
|
{
|
||||||
_1270 = gl_LocalInvocationID.x == 511u;
|
_1269 = gl_LocalInvocationID.x == 255u;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_1270 = _1263;
|
_1269 = _1263;
|
||||||
}
|
}
|
||||||
if (_1270)
|
if (_1269)
|
||||||
{
|
{
|
||||||
do_atomic = true;
|
do_atomic = true;
|
||||||
}
|
}
|
||||||
|
@ -680,26 +680,26 @@ kernel void main0(device Memory& v_111 [[buffer(0)]], const device ConfigBuf& _6
|
||||||
}
|
}
|
||||||
if (do_atomic)
|
if (do_atomic)
|
||||||
{
|
{
|
||||||
bool _1335 = m.bbox.z > m.bbox.x;
|
bool _1334 = m.bbox.z > m.bbox.x;
|
||||||
bool _1344;
|
bool _1343;
|
||||||
if (!_1335)
|
if (!_1334)
|
||||||
{
|
{
|
||||||
_1344 = m.bbox.w > m.bbox.y;
|
_1343 = m.bbox.w > m.bbox.y;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_1344 = _1335;
|
_1343 = _1334;
|
||||||
}
|
}
|
||||||
if (_1344)
|
if (_1343)
|
||||||
{
|
{
|
||||||
float param_29 = m.bbox.x;
|
float param_29 = m.bbox.x;
|
||||||
uint _1353 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
|
uint _1352 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix], round_down(param_29), memory_order_relaxed);
|
||||||
float param_30 = m.bbox.y;
|
float param_30 = m.bbox.y;
|
||||||
uint _1361 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
|
uint _1360 = atomic_fetch_min_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 1u], round_down(param_30), memory_order_relaxed);
|
||||||
float param_31 = m.bbox.z;
|
float param_31 = m.bbox.z;
|
||||||
uint _1369 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
|
uint _1368 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 2u], round_up(param_31), memory_order_relaxed);
|
||||||
float param_32 = m.bbox.w;
|
float param_32 = m.bbox.w;
|
||||||
uint _1377 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
|
uint _1376 = atomic_fetch_max_explicit((device atomic_uint*)&v_111.memory[bbox_out_ix + 3u], round_up(param_32), memory_order_relaxed);
|
||||||
}
|
}
|
||||||
bbox_out_ix += 6u;
|
bbox_out_ix += 6u;
|
||||||
}
|
}
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -38,8 +38,8 @@ static const uint3 gl_WorkGroupSize = uint3(128u, 1u, 1u);
|
||||||
|
|
||||||
ByteAddressBuffer _139 : register(t1, space0);
|
ByteAddressBuffer _139 : register(t1, space0);
|
||||||
ByteAddressBuffer _150 : register(t2, space0);
|
ByteAddressBuffer _150 : register(t2, space0);
|
||||||
RWByteAddressBuffer _238 : register(u3, space0);
|
RWByteAddressBuffer _237 : register(u3, space0);
|
||||||
RWByteAddressBuffer _258 : register(u0, space0);
|
RWByteAddressBuffer _257 : register(u0, space0);
|
||||||
|
|
||||||
static uint3 gl_WorkGroupID;
|
static uint3 gl_WorkGroupID;
|
||||||
static uint3 gl_LocalInvocationID;
|
static uint3 gl_LocalInvocationID;
|
||||||
|
@ -82,12 +82,12 @@ TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
|
||||||
|
|
||||||
void comp_main()
|
void comp_main()
|
||||||
{
|
{
|
||||||
uint ix = gl_GlobalInvocationID.x * 4u;
|
uint ix = gl_GlobalInvocationID.x * 2u;
|
||||||
uint scene_ix = (_139.Load(64) >> uint(2)) + ix;
|
uint scene_ix = (_139.Load(64) >> uint(2)) + ix;
|
||||||
uint tag_word = _150.Load(scene_ix * 4 + 0);
|
uint tag_word = _150.Load(scene_ix * 4 + 0);
|
||||||
uint param = tag_word;
|
uint param = tag_word;
|
||||||
TagMonoid agg = reduce_tag(param);
|
TagMonoid agg = reduce_tag(param);
|
||||||
for (uint i = 1u; i < 4u; i++)
|
for (uint i = 1u; i < 2u; i++)
|
||||||
{
|
{
|
||||||
tag_word = _150.Load((scene_ix + i) * 4 + 0);
|
tag_word = _150.Load((scene_ix + i) * 4 + 0);
|
||||||
uint param_1 = tag_word;
|
uint param_1 = tag_word;
|
||||||
|
@ -111,11 +111,11 @@ void comp_main()
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x == 0u)
|
if (gl_LocalInvocationID.x == 0u)
|
||||||
{
|
{
|
||||||
_238.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix);
|
_237.Store(gl_WorkGroupID.x * 20 + 0, agg.trans_ix);
|
||||||
_238.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix);
|
_237.Store(gl_WorkGroupID.x * 20 + 4, agg.linewidth_ix);
|
||||||
_238.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix);
|
_237.Store(gl_WorkGroupID.x * 20 + 8, agg.pathseg_ix);
|
||||||
_238.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix);
|
_237.Store(gl_WorkGroupID.x * 20 + 12, agg.path_ix);
|
||||||
_238.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset);
|
_237.Store(gl_WorkGroupID.x * 20 + 16, agg.pathseg_offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -103,15 +103,15 @@ TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid&
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _150 [[buffer(2)]], device OutBuf& _238 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device SceneBuf& _150 [[buffer(2)]], device OutBuf& _237 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||||
{
|
{
|
||||||
threadgroup TagMonoid sh_scratch[128];
|
threadgroup TagMonoid sh_scratch[128];
|
||||||
uint ix = gl_GlobalInvocationID.x * 4u;
|
uint ix = gl_GlobalInvocationID.x * 2u;
|
||||||
uint scene_ix = (_139.conf.pathtag_offset >> uint(2)) + ix;
|
uint scene_ix = (_139.conf.pathtag_offset >> uint(2)) + ix;
|
||||||
uint tag_word = _150.scene[scene_ix];
|
uint tag_word = _150.scene[scene_ix];
|
||||||
uint param = tag_word;
|
uint param = tag_word;
|
||||||
TagMonoid agg = reduce_tag(param);
|
TagMonoid agg = reduce_tag(param);
|
||||||
for (uint i = 1u; i < 4u; i++)
|
for (uint i = 1u; i < 2u; i++)
|
||||||
{
|
{
|
||||||
tag_word = _150.scene[scene_ix + i];
|
tag_word = _150.scene[scene_ix + i];
|
||||||
uint param_1 = tag_word;
|
uint param_1 = tag_word;
|
||||||
|
@ -135,11 +135,11 @@ kernel void main0(const device ConfigBuf& _139 [[buffer(1)]], const device Scene
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x == 0u)
|
if (gl_LocalInvocationID.x == 0u)
|
||||||
{
|
{
|
||||||
_238.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix;
|
_237.outbuf[gl_WorkGroupID.x].trans_ix = agg.trans_ix;
|
||||||
_238.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix;
|
_237.outbuf[gl_WorkGroupID.x].linewidth_ix = agg.linewidth_ix;
|
||||||
_238.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix;
|
_237.outbuf[gl_WorkGroupID.x].pathseg_ix = agg.pathseg_ix;
|
||||||
_238.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
|
_237.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix;
|
||||||
_238.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset;
|
_237.outbuf[gl_WorkGroupID.x].pathseg_offset = agg.pathseg_offset;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -7,7 +7,7 @@ struct TagMonoid
|
||||||
uint pathseg_offset;
|
uint pathseg_offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
static const TagMonoid _18 = { 0u, 0u, 0u, 0u, 0u };
|
static const TagMonoid _18 = { 0u, 0u, 0u, 0u, 0u };
|
||||||
|
|
||||||
|
@ -21,7 +21,7 @@ struct SPIRV_Cross_Input
|
||||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||||
};
|
};
|
||||||
|
|
||||||
groupshared TagMonoid sh_scratch[512];
|
groupshared TagMonoid sh_scratch[256];
|
||||||
|
|
||||||
TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
|
TagMonoid combine_tag_monoid(TagMonoid a, TagMonoid b)
|
||||||
{
|
{
|
||||||
|
@ -73,7 +73,7 @@ void comp_main()
|
||||||
}
|
}
|
||||||
TagMonoid agg = local[7];
|
TagMonoid agg = local[7];
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||||
{
|
{
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if (gl_LocalInvocationID.x >= (1u << i_1))
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||||
|
@ -97,16 +97,16 @@ void comp_main()
|
||||||
TagMonoid param_4 = row;
|
TagMonoid param_4 = row;
|
||||||
TagMonoid param_5 = local[i_2];
|
TagMonoid param_5 = local[i_2];
|
||||||
TagMonoid m = combine_tag_monoid(param_4, param_5);
|
TagMonoid m = combine_tag_monoid(param_4, param_5);
|
||||||
uint _211 = ix + i_2;
|
uint _210 = ix + i_2;
|
||||||
_78.Store(_211 * 20 + 0, m.trans_ix);
|
_78.Store(_210 * 20 + 0, m.trans_ix);
|
||||||
_78.Store(_211 * 20 + 4, m.linewidth_ix);
|
_78.Store(_210 * 20 + 4, m.linewidth_ix);
|
||||||
_78.Store(_211 * 20 + 8, m.pathseg_ix);
|
_78.Store(_210 * 20 + 8, m.pathseg_ix);
|
||||||
_78.Store(_211 * 20 + 12, m.path_ix);
|
_78.Store(_210 * 20 + 12, m.path_ix);
|
||||||
_78.Store(_211 * 20 + 16, m.pathseg_offset);
|
_78.Store(_210 * 20 + 16, m.pathseg_offset);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[numthreads(512, 1, 1)]
|
[numthreads(256, 1, 1)]
|
||||||
void main(SPIRV_Cross_Input stage_input)
|
void main(SPIRV_Cross_Input stage_input)
|
||||||
{
|
{
|
||||||
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
|
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
|
||||||
|
|
|
@ -67,7 +67,7 @@ struct DataBuf
|
||||||
TagMonoid_1 data[1];
|
TagMonoid_1 data[1];
|
||||||
};
|
};
|
||||||
|
|
||||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& b)
|
TagMonoid combine_tag_monoid(thread const TagMonoid& a, thread const TagMonoid& b)
|
||||||
|
@ -89,7 +89,7 @@ TagMonoid tag_monoid_identity()
|
||||||
|
|
||||||
kernel void main0(device DataBuf& _78 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
kernel void main0(device DataBuf& _78 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||||
{
|
{
|
||||||
threadgroup TagMonoid sh_scratch[512];
|
threadgroup TagMonoid sh_scratch[256];
|
||||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||||
spvUnsafeArray<TagMonoid, 8> local;
|
spvUnsafeArray<TagMonoid, 8> local;
|
||||||
local[0].trans_ix = _78.data[ix].trans_ix;
|
local[0].trans_ix = _78.data[ix].trans_ix;
|
||||||
|
@ -111,7 +111,7 @@ kernel void main0(device DataBuf& _78 [[buffer(0)]], uint3 gl_GlobalInvocationID
|
||||||
}
|
}
|
||||||
TagMonoid agg = local[7];
|
TagMonoid agg = local[7];
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||||
{
|
{
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if (gl_LocalInvocationID.x >= (1u << i_1))
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||||
|
@ -135,12 +135,12 @@ kernel void main0(device DataBuf& _78 [[buffer(0)]], uint3 gl_GlobalInvocationID
|
||||||
TagMonoid param_4 = row;
|
TagMonoid param_4 = row;
|
||||||
TagMonoid param_5 = local[i_2];
|
TagMonoid param_5 = local[i_2];
|
||||||
TagMonoid m = combine_tag_monoid(param_4, param_5);
|
TagMonoid m = combine_tag_monoid(param_4, param_5);
|
||||||
uint _211 = ix + i_2;
|
uint _210 = ix + i_2;
|
||||||
_78.data[_211].trans_ix = m.trans_ix;
|
_78.data[_210].trans_ix = m.trans_ix;
|
||||||
_78.data[_211].linewidth_ix = m.linewidth_ix;
|
_78.data[_210].linewidth_ix = m.linewidth_ix;
|
||||||
_78.data[_211].pathseg_ix = m.pathseg_ix;
|
_78.data[_210].pathseg_ix = m.pathseg_ix;
|
||||||
_78.data[_211].path_ix = m.path_ix;
|
_78.data[_210].path_ix = m.path_ix;
|
||||||
_78.data[_211].pathseg_offset = m.pathseg_offset;
|
_78.data[_210].pathseg_offset = m.pathseg_offset;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -261,7 +261,7 @@ void comp_main()
|
||||||
for (uint i = 0u; i < 8u; i++)
|
for (uint i = 0u; i < 8u; i++)
|
||||||
{
|
{
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if (th_ix >= uint(1 << int(i)))
|
if (th_ix >= (1u << i))
|
||||||
{
|
{
|
||||||
total_tile_count += sh_tile_count[th_ix - (1u << i)];
|
total_tile_count += sh_tile_count[th_ix - (1u << i)];
|
||||||
}
|
}
|
||||||
|
@ -271,46 +271,46 @@ void comp_main()
|
||||||
if (th_ix == 255u)
|
if (th_ix == 255u)
|
||||||
{
|
{
|
||||||
uint param_4 = total_tile_count * 8u;
|
uint param_4 = total_tile_count * 8u;
|
||||||
MallocResult _477 = malloc(param_4);
|
MallocResult _476 = malloc(param_4);
|
||||||
sh_tile_alloc = _477;
|
sh_tile_alloc = _476;
|
||||||
}
|
}
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
MallocResult alloc_start = sh_tile_alloc;
|
MallocResult alloc_start = sh_tile_alloc;
|
||||||
bool _488;
|
bool _487;
|
||||||
if (!alloc_start.failed)
|
if (!alloc_start.failed)
|
||||||
{
|
{
|
||||||
_488 = _92.Load(4) != 0u;
|
_487 = _92.Load(4) != 0u;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_488 = alloc_start.failed;
|
_487 = alloc_start.failed;
|
||||||
}
|
}
|
||||||
if (_488)
|
if (_487)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (element_ix < _305.Load(0))
|
if (element_ix < _305.Load(0))
|
||||||
{
|
{
|
||||||
uint _501;
|
uint _500;
|
||||||
if (th_ix > 0u)
|
if (th_ix > 0u)
|
||||||
{
|
{
|
||||||
_501 = sh_tile_count[th_ix - 1u];
|
_500 = sh_tile_count[th_ix - 1u];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_501 = 0u;
|
_500 = 0u;
|
||||||
}
|
}
|
||||||
uint tile_subix = _501;
|
uint tile_subix = _500;
|
||||||
Alloc param_5 = alloc_start.alloc;
|
Alloc param_5 = alloc_start.alloc;
|
||||||
uint param_6 = 8u * tile_subix;
|
uint param_6 = 8u * tile_subix;
|
||||||
uint param_7 = 8u * tile_count;
|
uint param_7 = 8u * tile_count;
|
||||||
Alloc tiles_alloc = slice_mem(param_5, param_6, param_7);
|
Alloc tiles_alloc = slice_mem(param_5, param_6, param_7);
|
||||||
TileRef _523 = { tiles_alloc.offset };
|
TileRef _522 = { tiles_alloc.offset };
|
||||||
path.tiles = _523;
|
path.tiles = _522;
|
||||||
Alloc _528;
|
Alloc _527;
|
||||||
_528.offset = _305.Load(16);
|
_527.offset = _305.Load(16);
|
||||||
Alloc param_8;
|
Alloc param_8;
|
||||||
param_8.offset = _528.offset;
|
param_8.offset = _527.offset;
|
||||||
PathRef param_9 = path_ref;
|
PathRef param_9 = path_ref;
|
||||||
Path param_10 = path;
|
Path param_10 = path;
|
||||||
Path_write(param_8, param_9, param_10);
|
Path_write(param_8, param_9, param_10);
|
||||||
|
|
|
@ -272,7 +272,7 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
||||||
for (uint i = 0u; i < 8u; i++)
|
for (uint i = 0u; i < 8u; i++)
|
||||||
{
|
{
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if (th_ix >= uint(1 << int(i)))
|
if (th_ix >= (1u << i))
|
||||||
{
|
{
|
||||||
total_tile_count += sh_tile_count[th_ix - (1u << i)];
|
total_tile_count += sh_tile_count[th_ix - (1u << i)];
|
||||||
}
|
}
|
||||||
|
@ -282,36 +282,36 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M
|
||||||
if (th_ix == 255u)
|
if (th_ix == 255u)
|
||||||
{
|
{
|
||||||
uint param_4 = total_tile_count * 8u;
|
uint param_4 = total_tile_count * 8u;
|
||||||
MallocResult _477 = malloc(param_4, v_92, v_92BufferSize);
|
MallocResult _476 = malloc(param_4, v_92, v_92BufferSize);
|
||||||
sh_tile_alloc = _477;
|
sh_tile_alloc = _476;
|
||||||
}
|
}
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
MallocResult alloc_start = sh_tile_alloc;
|
MallocResult alloc_start = sh_tile_alloc;
|
||||||
bool _488;
|
bool _487;
|
||||||
if (!alloc_start.failed)
|
if (!alloc_start.failed)
|
||||||
{
|
{
|
||||||
_488 = v_92.mem_error != 0u;
|
_487 = v_92.mem_error != 0u;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_488 = alloc_start.failed;
|
_487 = alloc_start.failed;
|
||||||
}
|
}
|
||||||
if (_488)
|
if (_487)
|
||||||
{
|
{
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (element_ix < _305.conf.n_elements)
|
if (element_ix < _305.conf.n_elements)
|
||||||
{
|
{
|
||||||
uint _501;
|
uint _500;
|
||||||
if (th_ix > 0u)
|
if (th_ix > 0u)
|
||||||
{
|
{
|
||||||
_501 = sh_tile_count[th_ix - 1u];
|
_500 = sh_tile_count[th_ix - 1u];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
_501 = 0u;
|
_500 = 0u;
|
||||||
}
|
}
|
||||||
uint tile_subix = _501;
|
uint tile_subix = _500;
|
||||||
Alloc param_5 = alloc_start.alloc;
|
Alloc param_5 = alloc_start.alloc;
|
||||||
uint param_6 = 8u * tile_subix;
|
uint param_6 = 8u * tile_subix;
|
||||||
uint param_7 = 8u * tile_count;
|
uint param_7 = 8u * tile_count;
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -47,14 +47,14 @@ struct Config
|
||||||
uint pathseg_offset;
|
uint pathseg_offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
static const Transform _224 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx };
|
static const Transform _224 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx };
|
||||||
|
|
||||||
RWByteAddressBuffer _71 : register(u0, space0);
|
RWByteAddressBuffer _71 : register(u0, space0);
|
||||||
ByteAddressBuffer _96 : register(t2, space0);
|
ByteAddressBuffer _96 : register(t2, space0);
|
||||||
ByteAddressBuffer _278 : register(t1, space0);
|
ByteAddressBuffer _278 : register(t1, space0);
|
||||||
ByteAddressBuffer _377 : register(t3, space0);
|
ByteAddressBuffer _376 : register(t3, space0);
|
||||||
|
|
||||||
static uint3 gl_WorkGroupID;
|
static uint3 gl_WorkGroupID;
|
||||||
static uint3 gl_LocalInvocationID;
|
static uint3 gl_LocalInvocationID;
|
||||||
|
@ -66,7 +66,7 @@ struct SPIRV_Cross_Input
|
||||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||||
};
|
};
|
||||||
|
|
||||||
groupshared Transform sh_scratch[512];
|
groupshared Transform sh_scratch[256];
|
||||||
|
|
||||||
Transform Transform_read(TransformRef ref)
|
Transform Transform_read(TransformRef ref)
|
||||||
{
|
{
|
||||||
|
@ -167,7 +167,7 @@ void comp_main()
|
||||||
local[i] = agg;
|
local[i] = agg;
|
||||||
}
|
}
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||||
{
|
{
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if (gl_LocalInvocationID.x >= (1u << i_1))
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||||
|
@ -184,11 +184,11 @@ void comp_main()
|
||||||
Transform row = monoid_identity();
|
Transform row = monoid_identity();
|
||||||
if (gl_WorkGroupID.x > 0u)
|
if (gl_WorkGroupID.x > 0u)
|
||||||
{
|
{
|
||||||
Transform _383;
|
Transform _382;
|
||||||
_383.mat = asfloat(_377.Load4((gl_WorkGroupID.x - 1u) * 32 + 0));
|
_382.mat = asfloat(_376.Load4((gl_WorkGroupID.x - 1u) * 32 + 0));
|
||||||
_383.translate = asfloat(_377.Load2((gl_WorkGroupID.x - 1u) * 32 + 16));
|
_382.translate = asfloat(_376.Load2((gl_WorkGroupID.x - 1u) * 32 + 16));
|
||||||
row.mat = _383.mat;
|
row.mat = _382.mat;
|
||||||
row.translate = _383.translate;
|
row.translate = _382.translate;
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x > 0u)
|
if (gl_LocalInvocationID.x > 0u)
|
||||||
{
|
{
|
||||||
|
@ -202,20 +202,20 @@ void comp_main()
|
||||||
Transform param_10 = row;
|
Transform param_10 = row;
|
||||||
Transform param_11 = local[i_2];
|
Transform param_11 = local[i_2];
|
||||||
Transform m = combine_monoid(param_10, param_11);
|
Transform m = combine_monoid(param_10, param_11);
|
||||||
TransformSeg _423 = { m.mat, m.translate };
|
TransformSeg _422 = { m.mat, m.translate };
|
||||||
TransformSeg transform = _423;
|
TransformSeg transform = _422;
|
||||||
TransformSegRef _433 = { _278.Load(36) + ((ix + i_2) * 24u) };
|
TransformSegRef _432 = { _278.Load(36) + ((ix + i_2) * 24u) };
|
||||||
TransformSegRef trans_ref = _433;
|
TransformSegRef trans_ref = _432;
|
||||||
Alloc _437;
|
Alloc _436;
|
||||||
_437.offset = _278.Load(36);
|
_436.offset = _278.Load(36);
|
||||||
param_12.offset = _437.offset;
|
param_12.offset = _436.offset;
|
||||||
TransformSegRef param_13 = trans_ref;
|
TransformSegRef param_13 = trans_ref;
|
||||||
TransformSeg param_14 = transform;
|
TransformSeg param_14 = transform;
|
||||||
TransformSeg_write(param_12, param_13, param_14);
|
TransformSeg_write(param_12, param_13, param_14);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[numthreads(512, 1, 1)]
|
[numthreads(256, 1, 1)]
|
||||||
void main(SPIRV_Cross_Input stage_input)
|
void main(SPIRV_Cross_Input stage_input)
|
||||||
{
|
{
|
||||||
gl_WorkGroupID = stage_input.gl_WorkGroupID;
|
gl_WorkGroupID = stage_input.gl_WorkGroupID;
|
||||||
|
|
|
@ -127,7 +127,7 @@ struct ParentBuf
|
||||||
Transform_1 parent[1];
|
Transform_1 parent[1];
|
||||||
};
|
};
|
||||||
|
|
||||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_96)
|
Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_96)
|
||||||
|
@ -214,9 +214,9 @@ void TransformSeg_write(thread const Alloc& a, thread const TransformSegRef& ref
|
||||||
write_mem(param_15, param_16, param_17, v_71);
|
write_mem(param_15, param_16, param_17, v_71);
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _278 [[buffer(1)]], const device SceneBuf& v_96 [[buffer(2)]], const device ParentBuf& _377 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _278 [[buffer(1)]], const device SceneBuf& v_96 [[buffer(2)]], const device ParentBuf& _376 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||||
{
|
{
|
||||||
threadgroup Transform sh_scratch[512];
|
threadgroup Transform sh_scratch[256];
|
||||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||||
TransformRef ref = TransformRef{ _278.conf.trans_offset + (ix * 24u) };
|
TransformRef ref = TransformRef{ _278.conf.trans_offset + (ix * 24u) };
|
||||||
TransformRef param = ref;
|
TransformRef param = ref;
|
||||||
|
@ -234,7 +234,7 @@ kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _27
|
||||||
local[i] = agg;
|
local[i] = agg;
|
||||||
}
|
}
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||||
{
|
{
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if (gl_LocalInvocationID.x >= (1u << i_1))
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||||
|
@ -251,9 +251,9 @@ kernel void main0(device Memory& v_71 [[buffer(0)]], const device ConfigBuf& _27
|
||||||
Transform row = monoid_identity();
|
Transform row = monoid_identity();
|
||||||
if (gl_WorkGroupID.x > 0u)
|
if (gl_WorkGroupID.x > 0u)
|
||||||
{
|
{
|
||||||
uint _380 = gl_WorkGroupID.x - 1u;
|
uint _379 = gl_WorkGroupID.x - 1u;
|
||||||
row.mat = _377.parent[_380].mat;
|
row.mat = _376.parent[_379].mat;
|
||||||
row.translate = _377.parent[_380].translate;
|
row.translate = _376.parent[_379].translate;
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x > 0u)
|
if (gl_LocalInvocationID.x > 0u)
|
||||||
{
|
{
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -36,12 +36,12 @@ struct Config
|
||||||
uint pathseg_offset;
|
uint pathseg_offset;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
ByteAddressBuffer _49 : register(t2, space0);
|
ByteAddressBuffer _49 : register(t2, space0);
|
||||||
ByteAddressBuffer _161 : register(t1, space0);
|
ByteAddressBuffer _161 : register(t1, space0);
|
||||||
RWByteAddressBuffer _251 : register(u3, space0);
|
RWByteAddressBuffer _250 : register(u3, space0);
|
||||||
RWByteAddressBuffer _267 : register(u0, space0);
|
RWByteAddressBuffer _266 : register(u0, space0);
|
||||||
|
|
||||||
static uint3 gl_WorkGroupID;
|
static uint3 gl_WorkGroupID;
|
||||||
static uint3 gl_LocalInvocationID;
|
static uint3 gl_LocalInvocationID;
|
||||||
|
@ -53,7 +53,7 @@ struct SPIRV_Cross_Input
|
||||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||||
};
|
};
|
||||||
|
|
||||||
groupshared Transform sh_scratch[512];
|
groupshared Transform sh_scratch[256];
|
||||||
|
|
||||||
Transform Transform_read(TransformRef ref)
|
Transform Transform_read(TransformRef ref)
|
||||||
{
|
{
|
||||||
|
@ -101,10 +101,10 @@ void comp_main()
|
||||||
agg = combine_monoid(param_4, param_5);
|
agg = combine_monoid(param_4, param_5);
|
||||||
}
|
}
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||||
{
|
{
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u)
|
if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u)
|
||||||
{
|
{
|
||||||
Transform other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
|
Transform other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
|
||||||
Transform param_6 = agg;
|
Transform param_6 = agg;
|
||||||
|
@ -116,12 +116,12 @@ void comp_main()
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x == 0u)
|
if (gl_LocalInvocationID.x == 0u)
|
||||||
{
|
{
|
||||||
_251.Store4(gl_WorkGroupID.x * 32 + 0, asuint(agg.mat));
|
_250.Store4(gl_WorkGroupID.x * 32 + 0, asuint(agg.mat));
|
||||||
_251.Store2(gl_WorkGroupID.x * 32 + 16, asuint(agg.translate));
|
_250.Store2(gl_WorkGroupID.x * 32 + 16, asuint(agg.translate));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[numthreads(512, 1, 1)]
|
[numthreads(256, 1, 1)]
|
||||||
void main(SPIRV_Cross_Input stage_input)
|
void main(SPIRV_Cross_Input stage_input)
|
||||||
{
|
{
|
||||||
gl_WorkGroupID = stage_input.gl_WorkGroupID;
|
gl_WorkGroupID = stage_input.gl_WorkGroupID;
|
||||||
|
|
|
@ -72,7 +72,7 @@ struct Memory
|
||||||
uint memory[1];
|
uint memory[1];
|
||||||
};
|
};
|
||||||
|
|
||||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_49)
|
Transform Transform_read(thread const TransformRef& ref, const device SceneBuf& v_49)
|
||||||
|
@ -105,9 +105,9 @@ Transform combine_monoid(thread const Transform& a, thread const Transform& b)
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
kernel void main0(const device ConfigBuf& _161 [[buffer(1)]], const device SceneBuf& v_49 [[buffer(2)]], device OutBuf& _251 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
kernel void main0(const device ConfigBuf& _161 [[buffer(1)]], const device SceneBuf& v_49 [[buffer(2)]], device OutBuf& _250 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]])
|
||||||
{
|
{
|
||||||
threadgroup Transform sh_scratch[512];
|
threadgroup Transform sh_scratch[256];
|
||||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||||
TransformRef ref = TransformRef{ _161.conf.trans_offset + (ix * 24u) };
|
TransformRef ref = TransformRef{ _161.conf.trans_offset + (ix * 24u) };
|
||||||
TransformRef param = ref;
|
TransformRef param = ref;
|
||||||
|
@ -122,10 +122,10 @@ kernel void main0(const device ConfigBuf& _161 [[buffer(1)]], const device Scene
|
||||||
agg = combine_monoid(param_4, param_5);
|
agg = combine_monoid(param_4, param_5);
|
||||||
}
|
}
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||||
{
|
{
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if ((gl_LocalInvocationID.x + (1u << i_1)) < 512u)
|
if ((gl_LocalInvocationID.x + (1u << i_1)) < 256u)
|
||||||
{
|
{
|
||||||
Transform other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
|
Transform other = sh_scratch[gl_LocalInvocationID.x + (1u << i_1)];
|
||||||
Transform param_6 = agg;
|
Transform param_6 = agg;
|
||||||
|
@ -137,8 +137,8 @@ kernel void main0(const device ConfigBuf& _161 [[buffer(1)]], const device Scene
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x == 0u)
|
if (gl_LocalInvocationID.x == 0u)
|
||||||
{
|
{
|
||||||
_251.outbuf[gl_WorkGroupID.x].mat = agg.mat;
|
_250.outbuf[gl_WorkGroupID.x].mat = agg.mat;
|
||||||
_251.outbuf[gl_WorkGroupID.x].translate = agg.translate;
|
_250.outbuf[gl_WorkGroupID.x].translate = agg.translate;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -4,7 +4,7 @@ struct Transform
|
||||||
float2 translate;
|
float2 translate;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const uint3 gl_WorkGroupSize = uint3(512u, 1u, 1u);
|
static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
static const Transform _23 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx };
|
static const Transform _23 = { float4(1.0f, 0.0f, 0.0f, 1.0f), 0.0f.xx };
|
||||||
|
|
||||||
|
@ -18,7 +18,7 @@ struct SPIRV_Cross_Input
|
||||||
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
|
||||||
};
|
};
|
||||||
|
|
||||||
groupshared Transform sh_scratch[512];
|
groupshared Transform sh_scratch[256];
|
||||||
|
|
||||||
Transform combine_monoid(Transform a, Transform b)
|
Transform combine_monoid(Transform a, Transform b)
|
||||||
{
|
{
|
||||||
|
@ -55,7 +55,7 @@ void comp_main()
|
||||||
}
|
}
|
||||||
Transform agg = local[7];
|
Transform agg = local[7];
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||||
{
|
{
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if (gl_LocalInvocationID.x >= (1u << i_1))
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||||
|
@ -79,13 +79,13 @@ void comp_main()
|
||||||
Transform param_4 = row;
|
Transform param_4 = row;
|
||||||
Transform param_5 = local[i_2];
|
Transform param_5 = local[i_2];
|
||||||
Transform m = combine_monoid(param_4, param_5);
|
Transform m = combine_monoid(param_4, param_5);
|
||||||
uint _209 = ix + i_2;
|
uint _208 = ix + i_2;
|
||||||
_89.Store4(_209 * 32 + 0, asuint(m.mat));
|
_89.Store4(_208 * 32 + 0, asuint(m.mat));
|
||||||
_89.Store2(_209 * 32 + 16, asuint(m.translate));
|
_89.Store2(_208 * 32 + 16, asuint(m.translate));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
[numthreads(512, 1, 1)]
|
[numthreads(256, 1, 1)]
|
||||||
void main(SPIRV_Cross_Input stage_input)
|
void main(SPIRV_Cross_Input stage_input)
|
||||||
{
|
{
|
||||||
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
|
gl_LocalInvocationID = stage_input.gl_LocalInvocationID;
|
||||||
|
|
|
@ -62,7 +62,7 @@ struct DataBuf
|
||||||
Transform_1 data[1];
|
Transform_1 data[1];
|
||||||
};
|
};
|
||||||
|
|
||||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(512u, 1u, 1u);
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(256u, 1u, 1u);
|
||||||
|
|
||||||
static inline __attribute__((always_inline))
|
static inline __attribute__((always_inline))
|
||||||
Transform combine_monoid(thread const Transform& a, thread const Transform& b)
|
Transform combine_monoid(thread const Transform& a, thread const Transform& b)
|
||||||
|
@ -81,7 +81,7 @@ Transform monoid_identity()
|
||||||
|
|
||||||
kernel void main0(device DataBuf& _89 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
kernel void main0(device DataBuf& _89 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]])
|
||||||
{
|
{
|
||||||
threadgroup Transform sh_scratch[512];
|
threadgroup Transform sh_scratch[256];
|
||||||
uint ix = gl_GlobalInvocationID.x * 8u;
|
uint ix = gl_GlobalInvocationID.x * 8u;
|
||||||
spvUnsafeArray<Transform, 8> local;
|
spvUnsafeArray<Transform, 8> local;
|
||||||
local[0].mat = _89.data[ix].mat;
|
local[0].mat = _89.data[ix].mat;
|
||||||
|
@ -97,7 +97,7 @@ kernel void main0(device DataBuf& _89 [[buffer(0)]], uint3 gl_GlobalInvocationID
|
||||||
}
|
}
|
||||||
Transform agg = local[7];
|
Transform agg = local[7];
|
||||||
sh_scratch[gl_LocalInvocationID.x] = agg;
|
sh_scratch[gl_LocalInvocationID.x] = agg;
|
||||||
for (uint i_1 = 0u; i_1 < 9u; i_1++)
|
for (uint i_1 = 0u; i_1 < 8u; i_1++)
|
||||||
{
|
{
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if (gl_LocalInvocationID.x >= (1u << i_1))
|
if (gl_LocalInvocationID.x >= (1u << i_1))
|
||||||
|
@ -121,9 +121,9 @@ kernel void main0(device DataBuf& _89 [[buffer(0)]], uint3 gl_GlobalInvocationID
|
||||||
Transform param_4 = row;
|
Transform param_4 = row;
|
||||||
Transform param_5 = local[i_2];
|
Transform param_5 = local[i_2];
|
||||||
Transform m = combine_monoid(param_4, param_5);
|
Transform m = combine_monoid(param_4, param_5);
|
||||||
uint _209 = ix + i_2;
|
uint _208 = ix + i_2;
|
||||||
_89.data[_209].mat = m.mat;
|
_89.data[_208].mat = m.mat;
|
||||||
_89.data[_209].translate = m.translate;
|
_89.data[_208].translate = m.translate;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -10,7 +10,7 @@
|
||||||
#include "pathtag.h"
|
#include "pathtag.h"
|
||||||
|
|
||||||
#define N_SEQ 4
|
#define N_SEQ 4
|
||||||
#define LG_WG_SIZE 9
|
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
|
||||||
#define WG_SIZE (1 << LG_WG_SIZE)
|
#define WG_SIZE (1 << LG_WG_SIZE)
|
||||||
#define PARTITION_SIZE (WG_SIZE * N_SEQ)
|
#define PARTITION_SIZE (WG_SIZE * N_SEQ)
|
||||||
|
|
||||||
|
|
|
@ -11,8 +11,8 @@
|
||||||
|
|
||||||
// Note: the partition size is smaller than pathseg by a factor
|
// Note: the partition size is smaller than pathseg by a factor
|
||||||
// of 4, as there are 4 tag bytes to a tag word.
|
// of 4, as there are 4 tag bytes to a tag word.
|
||||||
#define N_ROWS 4
|
#define N_ROWS 2
|
||||||
#define LG_WG_SIZE 7
|
#define LG_WG_SIZE (6 + LG_WG_FACTOR)
|
||||||
#define WG_SIZE (1 << LG_WG_SIZE)
|
#define WG_SIZE (1 << LG_WG_SIZE)
|
||||||
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
||||||
|
|
||||||
|
|
|
@ -5,10 +5,11 @@
|
||||||
#version 450
|
#version 450
|
||||||
#extension GL_GOOGLE_include_directive : enable
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#include "setup.h"
|
||||||
#include "pathtag.h"
|
#include "pathtag.h"
|
||||||
|
|
||||||
#define N_ROWS 8
|
#define N_ROWS 8
|
||||||
#define LG_WG_SIZE 9
|
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
|
||||||
#define WG_SIZE (1 << LG_WG_SIZE)
|
#define WG_SIZE (1 << LG_WG_SIZE)
|
||||||
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,7 @@
|
||||||
|
|
||||||
#define GRADIENT_WIDTH 512
|
#define GRADIENT_WIDTH 512
|
||||||
|
|
||||||
|
#ifdef ERR_MALLOC_FAILED
|
||||||
struct Config {
|
struct Config {
|
||||||
uint n_elements; // paths
|
uint n_elements; // paths
|
||||||
uint n_pathseg;
|
uint n_pathseg;
|
||||||
|
@ -59,6 +60,7 @@ struct Config {
|
||||||
// Offset (in bytes) of path segment stream in scene
|
// Offset (in bytes) of path segment stream in scene
|
||||||
uint pathseg_offset;
|
uint pathseg_offset;
|
||||||
};
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
// Fill modes.
|
// Fill modes.
|
||||||
#define MODE_NONZERO 0
|
#define MODE_NONZERO 0
|
||||||
|
|
|
@ -72,7 +72,7 @@ void main() {
|
||||||
// Prefix sum of sh_tile_count
|
// Prefix sum of sh_tile_count
|
||||||
for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) {
|
for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) {
|
||||||
barrier();
|
barrier();
|
||||||
if (th_ix >= (1 << i)) {
|
if (th_ix >= (1u << i)) {
|
||||||
total_tile_count += sh_tile_count[th_ix - (1u << i)];
|
total_tile_count += sh_tile_count[th_ix - (1u << i)];
|
||||||
}
|
}
|
||||||
barrier();
|
barrier();
|
||||||
|
|
|
@ -10,7 +10,7 @@
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
#define N_ROWS 8
|
#define N_ROWS 8
|
||||||
#define LG_WG_SIZE 9
|
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
|
||||||
#define WG_SIZE (1 << LG_WG_SIZE)
|
#define WG_SIZE (1 << LG_WG_SIZE)
|
||||||
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
#define N_ROWS 8
|
#define N_ROWS 8
|
||||||
#define LG_WG_SIZE 9
|
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
|
||||||
#define WG_SIZE (1 << LG_WG_SIZE)
|
#define WG_SIZE (1 << LG_WG_SIZE)
|
||||||
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
||||||
|
|
||||||
|
|
|
@ -3,9 +3,12 @@
|
||||||
// A scan for a tree reduction prefix scan (either root or not, by ifdef).
|
// A scan for a tree reduction prefix scan (either root or not, by ifdef).
|
||||||
|
|
||||||
#version 450
|
#version 450
|
||||||
|
#extension GL_GOOGLE_include_directive : enable
|
||||||
|
|
||||||
|
#include "setup.h"
|
||||||
|
|
||||||
#define N_ROWS 8
|
#define N_ROWS 8
|
||||||
#define LG_WG_SIZE 9
|
#define LG_WG_SIZE (7 + LG_WG_FACTOR)
|
||||||
#define WG_SIZE (1 << LG_WG_SIZE)
|
#define WG_SIZE (1 << LG_WG_SIZE)
|
||||||
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
||||||
|
|
||||||
|
|
|
@ -19,7 +19,9 @@
|
||||||
use bytemuck::{Pod, Zeroable};
|
use bytemuck::{Pod, Zeroable};
|
||||||
use piet_gpu_hal::BufWrite;
|
use piet_gpu_hal::BufWrite;
|
||||||
|
|
||||||
use crate::stages::{self, Config, PathEncoder, Transform};
|
use crate::stages::{
|
||||||
|
self, Config, PathEncoder, Transform, DRAW_PART_SIZE, PATHSEG_PART_SIZE, TRANSFORM_PART_SIZE,
|
||||||
|
};
|
||||||
|
|
||||||
pub struct Encoder {
|
pub struct Encoder {
|
||||||
transform_stream: Vec<stages::Transform>,
|
transform_stream: Vec<stages::Transform>,
|
||||||
|
@ -52,12 +54,6 @@ const BBOX_SIZE: usize = 24;
|
||||||
const DRAWMONOID_SIZE: usize = 8;
|
const DRAWMONOID_SIZE: usize = 8;
|
||||||
const ANNOTATED_SIZE: usize = 40;
|
const ANNOTATED_SIZE: usize = 40;
|
||||||
|
|
||||||
// Maybe pull these from the relevant stages? In any case, they may depend
|
|
||||||
// on runtime query of GPU (supported workgroup size).
|
|
||||||
const TRANSFORM_PART_SIZE: usize = 4096;
|
|
||||||
const PATHSEG_PART_SIZE: usize = 2048;
|
|
||||||
const DRAWOBJ_PART_SIZE: usize = 4096;
|
|
||||||
|
|
||||||
// These are bytemuck versions of elements currently defined in the
|
// These are bytemuck versions of elements currently defined in the
|
||||||
// Element struct in piet-gpu-types; that's pretty much going away.
|
// Element struct in piet-gpu-types; that's pretty much going away.
|
||||||
|
|
||||||
|
@ -183,15 +179,15 @@ impl Encoder {
|
||||||
pub fn stage_config(&self) -> (Config, usize) {
|
pub fn stage_config(&self) -> (Config, usize) {
|
||||||
// Layout of scene buffer
|
// Layout of scene buffer
|
||||||
let n_drawobj = self.n_drawobj();
|
let n_drawobj = self.n_drawobj();
|
||||||
let n_drawobj_padded = align_up(n_drawobj, DRAWOBJ_PART_SIZE);
|
let n_drawobj_padded = align_up(n_drawobj, DRAW_PART_SIZE as usize);
|
||||||
let trans_offset = n_drawobj_padded * DRAWOBJ_SIZE;
|
let trans_offset = n_drawobj_padded * DRAWOBJ_SIZE;
|
||||||
let n_trans = self.transform_stream.len();
|
let n_trans = self.transform_stream.len();
|
||||||
let n_trans_padded = align_up(n_trans, TRANSFORM_PART_SIZE);
|
let n_trans_padded = align_up(n_trans, TRANSFORM_PART_SIZE as usize);
|
||||||
let linewidth_offset = trans_offset + n_trans_padded * TRANSFORM_SIZE;
|
let linewidth_offset = trans_offset + n_trans_padded * TRANSFORM_SIZE;
|
||||||
let n_linewidth = self.linewidth_stream.len();
|
let n_linewidth = self.linewidth_stream.len();
|
||||||
let pathtag_offset = linewidth_offset + n_linewidth * LINEWIDTH_SIZE;
|
let pathtag_offset = linewidth_offset + n_linewidth * LINEWIDTH_SIZE;
|
||||||
let n_pathtag = self.tag_stream.len();
|
let n_pathtag = self.tag_stream.len();
|
||||||
let n_pathtag_padded = align_up(n_pathtag, PATHSEG_PART_SIZE);
|
let n_pathtag_padded = align_up(n_pathtag, PATHSEG_PART_SIZE as usize);
|
||||||
let pathseg_offset = pathtag_offset + n_pathtag_padded;
|
let pathseg_offset = pathtag_offset + n_pathtag_padded;
|
||||||
|
|
||||||
// Layout of memory
|
// Layout of memory
|
||||||
|
@ -230,14 +226,14 @@ impl Encoder {
|
||||||
pub fn write_scene(&self, buf: &mut BufWrite) {
|
pub fn write_scene(&self, buf: &mut BufWrite) {
|
||||||
buf.extend_slice(&self.drawobj_stream);
|
buf.extend_slice(&self.drawobj_stream);
|
||||||
let n_drawobj = self.drawobj_stream.len() / DRAWOBJ_SIZE;
|
let n_drawobj = self.drawobj_stream.len() / DRAWOBJ_SIZE;
|
||||||
buf.fill_zero(padding(n_drawobj, DRAWOBJ_PART_SIZE) * DRAWOBJ_SIZE);
|
buf.fill_zero(padding(n_drawobj, DRAW_PART_SIZE as usize) * DRAWOBJ_SIZE);
|
||||||
buf.extend_slice(&self.transform_stream);
|
buf.extend_slice(&self.transform_stream);
|
||||||
let n_trans = self.transform_stream.len();
|
let n_trans = self.transform_stream.len();
|
||||||
buf.fill_zero(padding(n_trans, TRANSFORM_PART_SIZE) * TRANSFORM_SIZE);
|
buf.fill_zero(padding(n_trans, TRANSFORM_PART_SIZE as usize) * TRANSFORM_SIZE);
|
||||||
buf.extend_slice(&self.linewidth_stream);
|
buf.extend_slice(&self.linewidth_stream);
|
||||||
buf.extend_slice(&self.tag_stream);
|
buf.extend_slice(&self.tag_stream);
|
||||||
let n_pathtag = self.tag_stream.len();
|
let n_pathtag = self.tag_stream.len();
|
||||||
buf.fill_zero(padding(n_pathtag, PATHSEG_PART_SIZE));
|
buf.fill_zero(padding(n_pathtag, PATHSEG_PART_SIZE as usize));
|
||||||
buf.extend_slice(&self.pathseg_stream);
|
buf.extend_slice(&self.pathseg_stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -11,7 +11,7 @@ use piet::{
|
||||||
|
|
||||||
use piet_gpu_hal::BufWrite;
|
use piet_gpu_hal::BufWrite;
|
||||||
use piet_gpu_types::encoder::{Encode, Encoder};
|
use piet_gpu_types::encoder::{Encode, Encoder};
|
||||||
use piet_gpu_types::scene::{Element, SetFillMode};
|
use piet_gpu_types::scene::Element;
|
||||||
|
|
||||||
use crate::gradient::{LinearGradient, RampCache};
|
use crate::gradient::{LinearGradient, RampCache};
|
||||||
use crate::text::Font;
|
use crate::text::Font;
|
||||||
|
@ -25,7 +25,6 @@ pub struct PietGpuRenderContext {
|
||||||
// Will probably need direct accesss to hal Device to create images etc.
|
// Will probably need direct accesss to hal Device to create images etc.
|
||||||
inner_text: PietGpuText,
|
inner_text: PietGpuText,
|
||||||
stroke_width: f32,
|
stroke_width: f32,
|
||||||
fill_mode: FillMode,
|
|
||||||
// We're tallying these cpu-side for expedience, but will probably
|
// We're tallying these cpu-side for expedience, but will probably
|
||||||
// move this to some kind of readback from element processing.
|
// move this to some kind of readback from element processing.
|
||||||
/// The count of elements that make it through to coarse rasterization.
|
/// The count of elements that make it through to coarse rasterization.
|
||||||
|
@ -69,14 +68,6 @@ struct ClipElement {
|
||||||
bbox: Option<Rect>,
|
bbox: Option<Rect>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, PartialEq)]
|
|
||||||
pub(crate) enum FillMode {
|
|
||||||
// Fill path according to the non-zero winding rule.
|
|
||||||
Nonzero = 0,
|
|
||||||
// Fill stroked path.
|
|
||||||
Stroke = 1,
|
|
||||||
}
|
|
||||||
|
|
||||||
const TOLERANCE: f64 = 0.25;
|
const TOLERANCE: f64 = 0.25;
|
||||||
|
|
||||||
impl PietGpuRenderContext {
|
impl PietGpuRenderContext {
|
||||||
|
@ -91,7 +82,6 @@ impl PietGpuRenderContext {
|
||||||
elements,
|
elements,
|
||||||
inner_text,
|
inner_text,
|
||||||
stroke_width,
|
stroke_width,
|
||||||
fill_mode: FillMode::Nonzero,
|
|
||||||
path_count: 0,
|
path_count: 0,
|
||||||
pathseg_count: 0,
|
pathseg_count: 0,
|
||||||
trans_count: 0,
|
trans_count: 0,
|
||||||
|
@ -160,15 +150,6 @@ impl PietGpuRenderContext {
|
||||||
pub fn get_ramp_data(&self) -> Vec<u32> {
|
pub fn get_ramp_data(&self) -> Vec<u32> {
|
||||||
self.ramp_cache.get_ramp_data()
|
self.ramp_cache.get_ramp_data()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn set_fill_mode(&mut self, fill_mode: FillMode) {
|
|
||||||
if self.fill_mode != fill_mode {
|
|
||||||
self.elements.push(Element::SetFillMode(SetFillMode {
|
|
||||||
fill_mode: fill_mode as u32,
|
|
||||||
}));
|
|
||||||
self.fill_mode = fill_mode;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RenderContext for PietGpuRenderContext {
|
impl RenderContext for PietGpuRenderContext {
|
||||||
|
|
|
@ -22,10 +22,12 @@ mod transform;
|
||||||
|
|
||||||
use bytemuck::{Pod, Zeroable};
|
use bytemuck::{Pod, Zeroable};
|
||||||
|
|
||||||
pub use draw::{DrawBinding, DrawCode, DrawMonoid, DrawStage};
|
pub use draw::{DrawBinding, DrawCode, DrawMonoid, DrawStage, DRAW_PART_SIZE};
|
||||||
pub use path::{PathBinding, PathCode, PathEncoder, PathStage};
|
pub use path::{PathBinding, PathCode, PathEncoder, PathStage, PATHSEG_PART_SIZE};
|
||||||
use piet_gpu_hal::{Buffer, CmdBuf, Session};
|
use piet_gpu_hal::{Buffer, CmdBuf, Session};
|
||||||
pub use transform::{Transform, TransformBinding, TransformCode, TransformStage};
|
pub use transform::{
|
||||||
|
Transform, TransformBinding, TransformCode, TransformStage, TRANSFORM_PART_SIZE,
|
||||||
|
};
|
||||||
|
|
||||||
/// The configuration block passed to piet-gpu shaders.
|
/// The configuration block passed to piet-gpu shaders.
|
||||||
///
|
///
|
||||||
|
|
|
@ -30,9 +30,9 @@ pub struct DrawMonoid {
|
||||||
pub clip_ix: u32,
|
pub clip_ix: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
const DRAW_WG: u64 = 512;
|
const DRAW_WG: u64 = 256;
|
||||||
const DRAW_N_ROWS: u64 = 8;
|
const DRAW_N_ROWS: u64 = 8;
|
||||||
const DRAW_PART_SIZE: u64 = DRAW_WG * DRAW_N_ROWS;
|
pub const DRAW_PART_SIZE: u64 = DRAW_WG * DRAW_N_ROWS;
|
||||||
|
|
||||||
pub struct DrawCode {
|
pub struct DrawCode {
|
||||||
reduce_pipeline: Pipeline,
|
reduce_pipeline: Pipeline,
|
||||||
|
|
|
@ -39,22 +39,23 @@ pub struct PathBinding {
|
||||||
}
|
}
|
||||||
|
|
||||||
const REDUCE_WG: u32 = 128;
|
const REDUCE_WG: u32 = 128;
|
||||||
const REDUCE_N_ROWS: u32 = 4;
|
const REDUCE_N_ROWS: u32 = 2;
|
||||||
const REDUCE_PART_SIZE: u32 = REDUCE_WG * REDUCE_N_ROWS;
|
const REDUCE_PART_SIZE: u32 = REDUCE_WG * REDUCE_N_ROWS;
|
||||||
|
|
||||||
const ROOT_WG: u32 = 512;
|
const ROOT_WG: u32 = 256;
|
||||||
const ROOT_N_ROWS: u32 = 8;
|
const ROOT_N_ROWS: u32 = 8;
|
||||||
const ROOT_PART_SIZE: u32 = ROOT_WG * ROOT_N_ROWS;
|
const ROOT_PART_SIZE: u32 = ROOT_WG * ROOT_N_ROWS;
|
||||||
|
|
||||||
const SCAN_WG: u32 = 512;
|
const SCAN_WG: u32 = 256;
|
||||||
const SCAN_N_ROWS: u32 = 4;
|
const SCAN_N_ROWS: u32 = 4;
|
||||||
const SCAN_PART_SIZE: u32 = SCAN_WG * SCAN_N_ROWS;
|
const SCAN_PART_SIZE: u32 = SCAN_WG * SCAN_N_ROWS;
|
||||||
|
|
||||||
const CLEAR_WG: u32 = 512;
|
pub const PATHSEG_PART_SIZE: u32 = SCAN_PART_SIZE;
|
||||||
|
|
||||||
|
const CLEAR_WG: u32 = 256;
|
||||||
|
|
||||||
impl PathCode {
|
impl PathCode {
|
||||||
pub unsafe fn new(session: &Session) -> PathCode {
|
pub unsafe fn new(session: &Session) -> PathCode {
|
||||||
// TODO: add cross-compilation
|
|
||||||
let reduce_code = include_shader!(session, "../../shader/gen/pathtag_reduce");
|
let reduce_code = include_shader!(session, "../../shader/gen/pathtag_reduce");
|
||||||
let reduce_pipeline = session
|
let reduce_pipeline = session
|
||||||
.create_compute_pipeline(
|
.create_compute_pipeline(
|
||||||
|
|
|
@ -33,9 +33,9 @@ pub struct Transform {
|
||||||
pub translate: [f32; 2],
|
pub translate: [f32; 2],
|
||||||
}
|
}
|
||||||
|
|
||||||
const TRANSFORM_WG: u64 = 512;
|
const TRANSFORM_WG: u64 = 256;
|
||||||
const TRANSFORM_N_ROWS: u64 = 8;
|
const TRANSFORM_N_ROWS: u64 = 8;
|
||||||
const TRANSFORM_PART_SIZE: u64 = TRANSFORM_WG * TRANSFORM_N_ROWS;
|
pub const TRANSFORM_PART_SIZE: u64 = TRANSFORM_WG * TRANSFORM_N_ROWS;
|
||||||
|
|
||||||
pub struct TransformCode {
|
pub struct TransformCode {
|
||||||
reduce_pipeline: Pipeline,
|
reduce_pipeline: Pipeline,
|
||||||
|
|
|
@ -11,7 +11,7 @@ use piet::{
|
||||||
};
|
};
|
||||||
|
|
||||||
use crate::encoder::GlyphEncoder;
|
use crate::encoder::GlyphEncoder;
|
||||||
use crate::render_ctx::{self, FillMode};
|
use crate::render_ctx;
|
||||||
use crate::stages::Transform;
|
use crate::stages::Transform;
|
||||||
use crate::PietGpuRenderContext;
|
use crate::PietGpuRenderContext;
|
||||||
|
|
||||||
|
@ -172,7 +172,6 @@ impl PietGpuTextLayout {
|
||||||
let mut inv_transform = None;
|
let mut inv_transform = None;
|
||||||
// TODO: handle y offsets also
|
// TODO: handle y offsets also
|
||||||
let mut last_x = 0.0;
|
let mut last_x = 0.0;
|
||||||
ctx.set_fill_mode(FillMode::Nonzero);
|
|
||||||
for glyph in &self.glyphs {
|
for glyph in &self.glyphs {
|
||||||
let transform = match &mut inv_transform {
|
let transform = match &mut inv_transform {
|
||||||
None => {
|
None => {
|
||||||
|
|
|
@ -38,7 +38,8 @@ struct DrawTestData {
|
||||||
|
|
||||||
pub unsafe fn draw_test(runner: &mut Runner, config: &Config) -> TestResult {
|
pub unsafe fn draw_test(runner: &mut Runner, config: &Config) -> TestResult {
|
||||||
let mut result = TestResult::new("draw");
|
let mut result = TestResult::new("draw");
|
||||||
let n_tag: u64 = config.size.choose(1 << 12, 1 << 20, 1 << 24);
|
// TODO: implement large scan and set large to 1 << 24
|
||||||
|
let n_tag: u64 = config.size.choose(1 << 12, 1 << 20, 1 << 22);
|
||||||
let data = DrawTestData::new(n_tag);
|
let data = DrawTestData::new(n_tag);
|
||||||
let stage_config = data.get_config();
|
let stage_config = data.get_config();
|
||||||
|
|
||||||
|
|
|
@ -62,7 +62,8 @@ struct Bbox {
|
||||||
pub unsafe fn path_test(runner: &mut Runner, config: &Config) -> TestResult {
|
pub unsafe fn path_test(runner: &mut Runner, config: &Config) -> TestResult {
|
||||||
let mut result = TestResult::new("path");
|
let mut result = TestResult::new("path");
|
||||||
|
|
||||||
let n_path: u64 = config.size.choose(1 << 12, 1 << 16, 1 << 18);
|
// TODO: implement large scans and raise limit
|
||||||
|
let n_path: u64 = config.size.choose(1 << 12, 1 << 16, 209_000);
|
||||||
let path_data = PathData::new(n_path as u32);
|
let path_data = PathData::new(n_path as u32);
|
||||||
let stage_config = path_data.get_config();
|
let stage_config = path_data.get_config();
|
||||||
let config_buf = runner
|
let config_buf = runner
|
||||||
|
|
|
@ -30,7 +30,8 @@ struct AffineTestData {
|
||||||
|
|
||||||
pub unsafe fn transform_test(runner: &mut Runner, config: &Config) -> TestResult {
|
pub unsafe fn transform_test(runner: &mut Runner, config: &Config) -> TestResult {
|
||||||
let mut result = TestResult::new("transform");
|
let mut result = TestResult::new("transform");
|
||||||
let n_elements: u64 = config.size.choose(1 << 12, 1 << 18, 1 << 24);
|
// TODO: implement large scan and set large to 1 << 24
|
||||||
|
let n_elements: u64 = config.size.choose(1 << 12, 1 << 18, 1 << 22);
|
||||||
// Validate with real transform data.
|
// Validate with real transform data.
|
||||||
let data = AffineTestData::new(n_elements as usize);
|
let data = AffineTestData::new(n_elements as usize);
|
||||||
let data_buf = runner
|
let data_buf = runner
|
||||||
|
|
Loading…
Reference in a new issue