mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
piet-gpu/shader: ensure forward progress in decoupled lookback
The Vulkan and OpenGL specifications offer only weak forward progress guarantees, and in practice several mobile devices fail to complete the decoupled lookback spinloop without mitigation. This patch implements Raph's suggestion from the "Forward Progress" section from https://raphlinus.github.io/gpu/2020/04/30/prefix-sum.html Signed-off-by: Elias Naur <mail@eliasnaur.com>
This commit is contained in:
parent
bc01180519
commit
b942e4035b
|
@ -243,6 +243,9 @@ void main() {
|
||||||
if (part_ix != 0) {
|
if (part_ix != 0) {
|
||||||
// step 4 of paper: decoupled lookback
|
// step 4 of paper: decoupled lookback
|
||||||
uint look_back_ix = part_ix - 1;
|
uint look_back_ix = part_ix - 1;
|
||||||
|
|
||||||
|
State their_agg;
|
||||||
|
uint their_ix = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
flag = state[state_flag_index(look_back_ix)];
|
flag = state[state_flag_index(look_back_ix)];
|
||||||
if (flag == FLAG_PREFIX_READY) {
|
if (flag == FLAG_PREFIX_READY) {
|
||||||
|
@ -250,11 +253,33 @@ void main() {
|
||||||
exclusive = combine_state(their_prefix, exclusive);
|
exclusive = combine_state(their_prefix, exclusive);
|
||||||
break;
|
break;
|
||||||
} else if (flag == FLAG_AGGREGATE_READY) {
|
} else if (flag == FLAG_AGGREGATE_READY) {
|
||||||
State their_agg = State_read(state_aggregate_ref(look_back_ix));
|
their_agg = State_read(state_aggregate_ref(look_back_ix));
|
||||||
exclusive = combine_state(their_agg, exclusive);
|
exclusive = combine_state(their_agg, exclusive);
|
||||||
look_back_ix--;
|
look_back_ix--;
|
||||||
|
their_ix = 0;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
// else spin
|
// else spin
|
||||||
|
|
||||||
|
// Unfortunately there's no guarantee of forward progress of other
|
||||||
|
// workgroups, so compute a bit of the aggregate before trying again.
|
||||||
|
// In the worst case, spinning stops when the aggregate is complete.
|
||||||
|
ElementRef ref = ElementRef((look_back_ix * PARTITION_SIZE + their_ix) * Element_size);
|
||||||
|
State s = map_element(ref);
|
||||||
|
if (their_ix == 0) {
|
||||||
|
their_agg = s;
|
||||||
|
} else {
|
||||||
|
their_agg = combine_state(their_agg, s);
|
||||||
|
}
|
||||||
|
their_ix++;
|
||||||
|
if (their_ix == PARTITION_SIZE) {
|
||||||
|
exclusive = combine_state(their_agg, exclusive);
|
||||||
|
if (look_back_ix == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
look_back_ix--;
|
||||||
|
their_ix = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// step 5 of paper: compute inclusive prefix
|
// step 5 of paper: compute inclusive prefix
|
||||||
|
|
Binary file not shown.
Loading…
Reference in a new issue