mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 20:51:29 +11:00
33d7b25a92
This adds a prefix sum test. This patch is also trying to get a little more serious about structuring both the test runner (toward the goal of collecting proper statistics) and pipeline stages for the tests. Still WIP but giving good results.
47 lines
874 B
Plaintext
Generated
47 lines
874 B
Plaintext
Generated
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
|
|
|
#include <metal_stdlib>
|
|
#include <simd/simd.h>
|
|
|
|
using namespace metal;
|
|
|
|
struct OutBuf
|
|
{
|
|
uint out_buf[1];
|
|
};
|
|
|
|
struct InBuf
|
|
{
|
|
uint in_buf[1];
|
|
};
|
|
|
|
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(1u);
|
|
|
|
static inline __attribute__((always_inline))
|
|
uint collatz_iterations(thread uint& n)
|
|
{
|
|
uint i = 0u;
|
|
while (n != 1u)
|
|
{
|
|
if ((n % 2u) == 0u)
|
|
{
|
|
n /= 2u;
|
|
}
|
|
else
|
|
{
|
|
n = (3u * n) + 1u;
|
|
}
|
|
i++;
|
|
}
|
|
return i;
|
|
}
|
|
|
|
kernel void main0(device OutBuf& _53 [[buffer(0)]], const device InBuf& _59 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
|
{
|
|
uint index = gl_GlobalInvocationID.x;
|
|
uint param = _59.in_buf[index];
|
|
uint _65 = collatz_iterations(param);
|
|
_53.out_buf[index] = _65;
|
|
}
|
|
|