mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Merge pull request #35 from eliasnaur/stop-spin
Limit spinning in elements.comp
This commit is contained in:
commit
d3fe8630be
|
@ -94,7 +94,7 @@ State combine_state(State a, State b) {
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
|
||||||
State map_element(ElementRef ref, inout bool is_fill) {
|
State map_element(ElementRef ref) {
|
||||||
// TODO: it would *probably* be more efficient to make the memory read patterns less
|
// TODO: it would *probably* be more efficient to make the memory read patterns less
|
||||||
// divergent, though it would be more wasted memory.
|
// divergent, though it would be more wasted memory.
|
||||||
uint tag = Element_tag(ref);
|
uint tag = Element_tag(ref);
|
||||||
|
@ -106,7 +106,6 @@ State map_element(ElementRef ref, inout bool is_fill) {
|
||||||
c.flags = 0;
|
c.flags = 0;
|
||||||
c.path_count = 0;
|
c.path_count = 0;
|
||||||
c.pathseg_count = 0;
|
c.pathseg_count = 0;
|
||||||
is_fill = false;
|
|
||||||
switch (tag) {
|
switch (tag) {
|
||||||
case Element_FillLine:
|
case Element_FillLine:
|
||||||
case Element_StrokeLine:
|
case Element_StrokeLine:
|
||||||
|
@ -132,8 +131,6 @@ State map_element(ElementRef ref, inout bool is_fill) {
|
||||||
case Element_Fill:
|
case Element_Fill:
|
||||||
case Element_FillMask:
|
case Element_FillMask:
|
||||||
case Element_FillMaskInv:
|
case Element_FillMaskInv:
|
||||||
is_fill = true;
|
|
||||||
// fall-through
|
|
||||||
case Element_Stroke:
|
case Element_Stroke:
|
||||||
c.flags = FLAG_RESET_BBOX;
|
c.flags = FLAG_RESET_BBOX;
|
||||||
c.path_count = 1;
|
c.path_count = 1;
|
||||||
|
@ -185,12 +182,11 @@ void main() {
|
||||||
uint ix = part_ix * PARTITION_SIZE + gl_LocalInvocationID.x * N_ROWS;
|
uint ix = part_ix * PARTITION_SIZE + gl_LocalInvocationID.x * N_ROWS;
|
||||||
ElementRef ref = ElementRef(ix * Element_size);
|
ElementRef ref = ElementRef(ix * Element_size);
|
||||||
|
|
||||||
bool is_fill;
|
th_state[0] = map_element(ref);
|
||||||
th_state[0] = map_element(ref, is_fill);
|
|
||||||
for (uint i = 1; i < N_ROWS; i++) {
|
for (uint i = 1; i < N_ROWS; i++) {
|
||||||
// discussion question: would it be faster to load using more coherent patterns
|
// discussion question: would it be faster to load using more coherent patterns
|
||||||
// into thread memory? This is kinda strided.
|
// into thread memory? This is kinda strided.
|
||||||
th_state[i] = combine_state(th_state[i - 1], map_element(Element_index(ref, i), is_fill));
|
th_state[i] = combine_state(th_state[i - 1], map_element(Element_index(ref, i)));
|
||||||
}
|
}
|
||||||
State agg = th_state[N_ROWS - 1];
|
State agg = th_state[N_ROWS - 1];
|
||||||
sh_mat[gl_LocalInvocationID.x] = agg.mat;
|
sh_mat[gl_LocalInvocationID.x] = agg.mat;
|
||||||
|
@ -247,6 +243,9 @@ void main() {
|
||||||
if (part_ix != 0) {
|
if (part_ix != 0) {
|
||||||
// step 4 of paper: decoupled lookback
|
// step 4 of paper: decoupled lookback
|
||||||
uint look_back_ix = part_ix - 1;
|
uint look_back_ix = part_ix - 1;
|
||||||
|
|
||||||
|
State their_agg;
|
||||||
|
uint their_ix = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
flag = state[state_flag_index(look_back_ix)];
|
flag = state[state_flag_index(look_back_ix)];
|
||||||
if (flag == FLAG_PREFIX_READY) {
|
if (flag == FLAG_PREFIX_READY) {
|
||||||
|
@ -254,11 +253,33 @@ void main() {
|
||||||
exclusive = combine_state(their_prefix, exclusive);
|
exclusive = combine_state(their_prefix, exclusive);
|
||||||
break;
|
break;
|
||||||
} else if (flag == FLAG_AGGREGATE_READY) {
|
} else if (flag == FLAG_AGGREGATE_READY) {
|
||||||
State their_agg = State_read(state_aggregate_ref(look_back_ix));
|
their_agg = State_read(state_aggregate_ref(look_back_ix));
|
||||||
exclusive = combine_state(their_agg, exclusive);
|
exclusive = combine_state(their_agg, exclusive);
|
||||||
look_back_ix--;
|
look_back_ix--;
|
||||||
|
their_ix = 0;
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
// else spin
|
// else spin
|
||||||
|
|
||||||
|
// Unfortunately there's no guarantee of forward progress of other
|
||||||
|
// workgroups, so compute a bit of the aggregate before trying again.
|
||||||
|
// In the worst case, spinning stops when the aggregate is complete.
|
||||||
|
ElementRef ref = ElementRef((look_back_ix * PARTITION_SIZE + their_ix) * Element_size);
|
||||||
|
State s = map_element(ref);
|
||||||
|
if (their_ix == 0) {
|
||||||
|
their_agg = s;
|
||||||
|
} else {
|
||||||
|
their_agg = combine_state(their_agg, s);
|
||||||
|
}
|
||||||
|
their_ix++;
|
||||||
|
if (their_ix == PARTITION_SIZE) {
|
||||||
|
exclusive = combine_state(their_agg, exclusive);
|
||||||
|
if (look_back_ix == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
look_back_ix--;
|
||||||
|
their_ix = 0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// step 5 of paper: compute inclusive prefix
|
// step 5 of paper: compute inclusive prefix
|
||||||
|
|
Binary file not shown.
Loading…
Reference in a new issue