mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Fix write-after-read in prefix test
Thanks to Jeff Bolz for spotting the write-after-read hazard on the sh_flag accesses. This fixes observed failures on Nvidia Turing and Ampere on DX12.
This commit is contained in:
parent
76a6f1fec8
commit
69b6632085
Binary file not shown.
|
@ -109,6 +109,7 @@ void comp_main()
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
DeviceMemoryBarrier();
|
DeviceMemoryBarrier();
|
||||||
uint flag_1 = sh_flag;
|
uint flag_1 = sh_flag;
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if (flag_1 == 2u)
|
if (flag_1 == 2u)
|
||||||
{
|
{
|
||||||
if (gl_LocalInvocationID.x == 511u)
|
if (gl_LocalInvocationID.x == 511u)
|
||||||
|
@ -174,6 +175,7 @@ void comp_main()
|
||||||
}
|
}
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
flag_1 = sh_flag;
|
flag_1 = sh_flag;
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if (flag_1 == 2u)
|
if (flag_1 == 2u)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -160,6 +160,7 @@ kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[b
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
threadgroup_barrier(mem_flags::mem_device);
|
threadgroup_barrier(mem_flags::mem_device);
|
||||||
uint flag_1 = sh_flag;
|
uint flag_1 = sh_flag;
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if (flag_1 == 2u)
|
if (flag_1 == 2u)
|
||||||
{
|
{
|
||||||
if (gl_LocalInvocationID.x == 511u)
|
if (gl_LocalInvocationID.x == 511u)
|
||||||
|
@ -219,6 +220,7 @@ kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[b
|
||||||
}
|
}
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
flag_1 = sh_flag;
|
flag_1 = sh_flag;
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if (flag_1 == 2u)
|
if (flag_1 == 2u)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -112,6 +112,7 @@ void comp_main()
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
DeviceMemoryBarrier();
|
DeviceMemoryBarrier();
|
||||||
uint flag_1 = sh_flag;
|
uint flag_1 = sh_flag;
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if (flag_1 == 2u)
|
if (flag_1 == 2u)
|
||||||
{
|
{
|
||||||
if (gl_LocalInvocationID.x == 511u)
|
if (gl_LocalInvocationID.x == 511u)
|
||||||
|
@ -177,6 +178,7 @@ void comp_main()
|
||||||
}
|
}
|
||||||
GroupMemoryBarrierWithGroupSync();
|
GroupMemoryBarrierWithGroupSync();
|
||||||
flag_1 = sh_flag;
|
flag_1 = sh_flag;
|
||||||
|
GroupMemoryBarrierWithGroupSync();
|
||||||
if (flag_1 == 2u)
|
if (flag_1 == 2u)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -161,6 +161,7 @@ kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[b
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
threadgroup_barrier(mem_flags::mem_device);
|
threadgroup_barrier(mem_flags::mem_device);
|
||||||
uint flag_1 = sh_flag;
|
uint flag_1 = sh_flag;
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if (flag_1 == 2u)
|
if (flag_1 == 2u)
|
||||||
{
|
{
|
||||||
if (gl_LocalInvocationID.x == 511u)
|
if (gl_LocalInvocationID.x == 511u)
|
||||||
|
@ -220,6 +221,7 @@ kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[b
|
||||||
}
|
}
|
||||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
flag_1 = sh_flag;
|
flag_1 = sh_flag;
|
||||||
|
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||||
if (flag_1 == 2u)
|
if (flag_1 == 2u)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -144,6 +144,7 @@ void main() {
|
||||||
memoryBarrierBuffer();
|
memoryBarrierBuffer();
|
||||||
#endif
|
#endif
|
||||||
uint flag = sh_flag;
|
uint flag = sh_flag;
|
||||||
|
barrier();
|
||||||
|
|
||||||
if (flag == FLAG_PREFIX_READY) {
|
if (flag == FLAG_PREFIX_READY) {
|
||||||
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
|
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
|
||||||
|
@ -185,6 +186,7 @@ void main() {
|
||||||
}
|
}
|
||||||
barrier();
|
barrier();
|
||||||
flag = sh_flag;
|
flag = sh_flag;
|
||||||
|
barrier();
|
||||||
if (flag == FLAG_PREFIX_READY) {
|
if (flag == FLAG_PREFIX_READY) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue