Fix write-after-read in prefix test

Thanks to Jeff Bolz for spotting the write-after-read hazard on the
sh_flag accesses. This fixes observed failures on Nvidia Turing and
Ampere on DX12.
This commit is contained in:
Raph Levien 2021-11-13 22:31:26 -08:00
parent 76a6f1fec8
commit 69b6632085
10 changed files with 10 additions and 0 deletions

Binary file not shown.

View file

@ -109,6 +109,7 @@ void comp_main()
GroupMemoryBarrierWithGroupSync();
DeviceMemoryBarrier();
uint flag_1 = sh_flag;
GroupMemoryBarrierWithGroupSync();
if (flag_1 == 2u)
{
if (gl_LocalInvocationID.x == 511u)
@ -174,6 +175,7 @@ void comp_main()
}
GroupMemoryBarrierWithGroupSync();
flag_1 = sh_flag;
GroupMemoryBarrierWithGroupSync();
if (flag_1 == 2u)
{
break;

View file

@ -160,6 +160,7 @@ kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[b
threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup_barrier(mem_flags::mem_device);
uint flag_1 = sh_flag;
threadgroup_barrier(mem_flags::mem_threadgroup);
if (flag_1 == 2u)
{
if (gl_LocalInvocationID.x == 511u)
@ -219,6 +220,7 @@ kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[b
}
threadgroup_barrier(mem_flags::mem_threadgroup);
flag_1 = sh_flag;
threadgroup_barrier(mem_flags::mem_threadgroup);
if (flag_1 == 2u)
{
break;

Binary file not shown.

Binary file not shown.

View file

@ -112,6 +112,7 @@ void comp_main()
GroupMemoryBarrierWithGroupSync();
DeviceMemoryBarrier();
uint flag_1 = sh_flag;
GroupMemoryBarrierWithGroupSync();
if (flag_1 == 2u)
{
if (gl_LocalInvocationID.x == 511u)
@ -177,6 +178,7 @@ void comp_main()
}
GroupMemoryBarrierWithGroupSync();
flag_1 = sh_flag;
GroupMemoryBarrierWithGroupSync();
if (flag_1 == 2u)
{
break;

View file

@ -161,6 +161,7 @@ kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[b
threadgroup_barrier(mem_flags::mem_threadgroup);
threadgroup_barrier(mem_flags::mem_device);
uint flag_1 = sh_flag;
threadgroup_barrier(mem_flags::mem_threadgroup);
if (flag_1 == 2u)
{
if (gl_LocalInvocationID.x == 511u)
@ -220,6 +221,7 @@ kernel void main0(const device InBuf& _67 [[buffer(0)]], device OutBuf& _372 [[b
}
threadgroup_barrier(mem_flags::mem_threadgroup);
flag_1 = sh_flag;
threadgroup_barrier(mem_flags::mem_threadgroup);
if (flag_1 == 2u)
{
break;

Binary file not shown.

Binary file not shown.

View file

@ -144,6 +144,7 @@ void main() {
memoryBarrierBuffer();
#endif
uint flag = sh_flag;
barrier();
if (flag == FLAG_PREFIX_READY) {
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
@ -185,6 +186,7 @@ void main() {
}
barrier();
flag = sh_flag;
barrier();
if (flag == FLAG_PREFIX_READY) {
break;
}