mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-09 20:31:29 +11:00
Fix write-after-read in prefix test
Thanks to Jeff Bolz for spotting the write-after-read hazard on the sh_flag accesses. This fixes observed failures on Nvidia Turing and Ampere on DX12.
This commit is contained in:
parent
76a6f1fec8
commit
69b6632085
Binary file not shown.
|
@ -109,6 +109,7 @@ void comp_main()
|
|||
GroupMemoryBarrierWithGroupSync();
|
||||
DeviceMemoryBarrier();
|
||||
uint flag_1 = sh_flag;
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
if (flag_1 == 2u)
|
||||
{
|
||||
if (gl_LocalInvocationID.x == 511u)
|
||||
|
@ -174,6 +175,7 @@ void comp_main()
|
|||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
flag_1 = sh_flag;
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
if (flag_1 == 2u)
|
||||
{
|
||||
break;
|
||||
|
|
|
@ -160,6 +160,7 @@ kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[b
|
|||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
threadgroup_barrier(mem_flags::mem_device);
|
||||
uint flag_1 = sh_flag;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if (flag_1 == 2u)
|
||||
{
|
||||
if (gl_LocalInvocationID.x == 511u)
|
||||
|
@ -219,6 +220,7 @@ kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[b
|
|||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
flag_1 = sh_flag;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if (flag_1 == 2u)
|
||||
{
|
||||
break;
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -112,6 +112,7 @@ void comp_main()
|
|||
GroupMemoryBarrierWithGroupSync();
|
||||
DeviceMemoryBarrier();
|
||||
uint flag_1 = sh_flag;
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
if (flag_1 == 2u)
|
||||
{
|
||||
if (gl_LocalInvocationID.x == 511u)
|
||||
|
@ -177,6 +178,7 @@ void comp_main()
|
|||
}
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
flag_1 = sh_flag;
|
||||
GroupMemoryBarrierWithGroupSync();
|
||||
if (flag_1 == 2u)
|
||||
{
|
||||
break;
|
||||
|
|
|
@ -161,6 +161,7 @@ kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[b
|
|||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
threadgroup_barrier(mem_flags::mem_device);
|
||||
uint flag_1 = sh_flag;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if (flag_1 == 2u)
|
||||
{
|
||||
if (gl_LocalInvocationID.x == 511u)
|
||||
|
@ -220,6 +221,7 @@ kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[b
|
|||
}
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
flag_1 = sh_flag;
|
||||
threadgroup_barrier(mem_flags::mem_threadgroup);
|
||||
if (flag_1 == 2u)
|
||||
{
|
||||
break;
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -144,6 +144,7 @@ void main() {
|
|||
memoryBarrierBuffer();
|
||||
#endif
|
||||
uint flag = sh_flag;
|
||||
barrier();
|
||||
|
||||
if (flag == FLAG_PREFIX_READY) {
|
||||
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
|
||||
|
@ -185,6 +186,7 @@ void main() {
|
|||
}
|
||||
barrier();
|
||||
flag = sh_flag;
|
||||
barrier();
|
||||
if (flag == FLAG_PREFIX_READY) {
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue