mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-22 09:26:33 +11:00
Support for larger pathtags
Previously there was a limit of 256k pathtags in a scene, due to the need for multi-dispatch prefix sum for the pathtag monoid. This patch increases the limit to 64M, which ought to be enough for most applications. It works by having 4 dispatches for the pathtag prefix sum: 2 to reduce, then 2 to scan.
This commit is contained in:
parent
57d79bdf1f
commit
d94257a7c5
5 changed files with 181 additions and 7 deletions
40
shader/pathtag_reduce2.wgsl
Normal file
40
shader/pathtag_reduce2.wgsl
Normal file
|
@ -0,0 +1,40 @@
|
|||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// This shader is the second stage of reduction for the pathtag
|
||||
// monoid scan, needed when the number of tags is large.
|
||||
|
||||
#import config
|
||||
#import pathtag
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> reduced_in: array<TagMonoid>;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage, read_write> reduced: array<TagMonoid>;
|
||||
|
||||
let LG_WG_SIZE = 8u;
|
||||
let WG_SIZE = 256u;
|
||||
|
||||
var<workgroup> sh_scratch: array<TagMonoid, WG_SIZE>;
|
||||
|
||||
@compute @workgroup_size(256)
|
||||
fn main(
|
||||
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||
) {
|
||||
let ix = global_id.x;
|
||||
var agg = reduced_in[ix];
|
||||
sh_scratch[local_id.x] = agg;
|
||||
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||
workgroupBarrier();
|
||||
if local_id.x + (1u << i) < WG_SIZE {
|
||||
let other = sh_scratch[local_id.x + (1u << i)];
|
||||
agg = combine_tag_monoid(agg, other);
|
||||
}
|
||||
workgroupBarrier();
|
||||
sh_scratch[local_id.x] = agg;
|
||||
}
|
||||
if local_id.x == 0u {
|
||||
reduced[ix >> LG_WG_SIZE] = agg;
|
||||
}
|
||||
}
|
|
@ -18,7 +18,9 @@ var<storage, read_write> tag_monoids: array<TagMonoid>;
|
|||
let LG_WG_SIZE = 8u;
|
||||
let WG_SIZE = 256u;
|
||||
|
||||
#ifdef small
|
||||
var<workgroup> sh_parent: array<TagMonoid, WG_SIZE>;
|
||||
#endif
|
||||
// These could be combined?
|
||||
var<workgroup> sh_monoid: array<TagMonoid, WG_SIZE>;
|
||||
|
||||
|
@ -28,6 +30,7 @@ fn main(
|
|||
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||
@builtin(workgroup_id) wg_id: vec3<u32>,
|
||||
) {
|
||||
#ifdef small
|
||||
var agg = tag_monoid_identity();
|
||||
if local_id.x < wg_id.x {
|
||||
agg = reduced[local_id.x];
|
||||
|
@ -42,22 +45,27 @@ fn main(
|
|||
workgroupBarrier();
|
||||
sh_parent[local_id.x] = agg;
|
||||
}
|
||||
#endif
|
||||
|
||||
let ix = global_id.x;
|
||||
let tag_word = scene[config.pathtag_base + ix];
|
||||
agg = reduce_tag(tag_word);
|
||||
sh_monoid[local_id.x] = agg;
|
||||
var agg_part = reduce_tag(tag_word);
|
||||
sh_monoid[local_id.x] = agg_part;
|
||||
for (var i = 0u; i < LG_WG_SIZE; i += 1u) {
|
||||
workgroupBarrier();
|
||||
if local_id.x >= 1u << i {
|
||||
let other = sh_monoid[local_id.x - (1u << i)];
|
||||
agg = combine_tag_monoid(other, agg);
|
||||
agg_part = combine_tag_monoid(other, agg_part);
|
||||
}
|
||||
workgroupBarrier();
|
||||
sh_monoid[local_id.x] = agg;
|
||||
sh_monoid[local_id.x] = agg_part;
|
||||
}
|
||||
// prefix up to this workgroup
|
||||
#ifdef small
|
||||
var tm = sh_parent[0];
|
||||
#else
|
||||
var tm = reduced[wg_id.x];
|
||||
#endif
|
||||
if local_id.x > 0u {
|
||||
tm = combine_tag_monoid(tm, sh_monoid[local_id.x - 1u]);
|
||||
}
|
||||
|
|
65
shader/pathtag_scan1.wgsl
Normal file
65
shader/pathtag_scan1.wgsl
Normal file
|
@ -0,0 +1,65 @@
|
|||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// This shader computes the scan of reduced tag monoids given
|
||||
// two levels of reduction.
|
||||
|
||||
#import config
|
||||
#import pathtag
|
||||
|
||||
@group(0) @binding(0)
|
||||
var<storage> reduced: array<TagMonoid>;
|
||||
|
||||
@group(0) @binding(1)
|
||||
var<storage> reduced2: array<TagMonoid>;
|
||||
|
||||
@group(0) @binding(2)
|
||||
var<storage, read_write> tag_monoids: array<TagMonoid>;
|
||||
|
||||
let LG_WG_SIZE = 8u;
|
||||
let WG_SIZE = 256u;
|
||||
|
||||
var<workgroup> sh_parent: array<TagMonoid, WG_SIZE>;
|
||||
// These could be combined?
|
||||
var<workgroup> sh_monoid: array<TagMonoid, WG_SIZE>;
|
||||
|
||||
@compute @workgroup_size(256)
|
||||
fn main(
|
||||
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||
@builtin(workgroup_id) wg_id: vec3<u32>,
|
||||
) {
|
||||
var agg = tag_monoid_identity();
|
||||
if local_id.x < wg_id.x {
|
||||
agg = reduced2[local_id.x];
|
||||
}
|
||||
sh_parent[local_id.x] = agg;
|
||||
for (var i = 0u; i < LG_WG_SIZE; i += 1u) {
|
||||
workgroupBarrier();
|
||||
if local_id.x + (1u << i) < WG_SIZE {
|
||||
let other = sh_parent[local_id.x + (1u << i)];
|
||||
agg = combine_tag_monoid(agg, other);
|
||||
}
|
||||
workgroupBarrier();
|
||||
sh_parent[local_id.x] = agg;
|
||||
}
|
||||
|
||||
let ix = global_id.x;
|
||||
agg = reduced[ix];
|
||||
sh_monoid[local_id.x] = agg;
|
||||
for (var i = 0u; i < LG_WG_SIZE; i += 1u) {
|
||||
workgroupBarrier();
|
||||
if local_id.x >= 1u << i {
|
||||
let other = sh_monoid[local_id.x - (1u << i)];
|
||||
agg = combine_tag_monoid(other, agg);
|
||||
}
|
||||
workgroupBarrier();
|
||||
sh_monoid[local_id.x] = agg;
|
||||
}
|
||||
// prefix up to this workgroup
|
||||
var tm = sh_parent[0];
|
||||
if local_id.x > 0u {
|
||||
tm = combine_tag_monoid(tm, sh_monoid[local_id.x - 1u]);
|
||||
}
|
||||
// exclusive prefix sum, granularity of 4 tag bytes * workgroup size
|
||||
tag_monoids[ix] = tm;
|
||||
}
|
|
@ -236,21 +236,49 @@ pub fn render_full(
|
|||
let config_buf = ResourceProxy::Buf(recording.upload_uniform(bytemuck::bytes_of(&config)));
|
||||
|
||||
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
|
||||
let reduced_buf = ResourceProxy::new_buf(pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE);
|
||||
let pathtag_large = pathtag_wgs > shaders::PATHTAG_REDUCE_WG as usize;
|
||||
let reduced_size = if pathtag_large {
|
||||
align_up(pathtag_wgs, shaders::PATHTAG_REDUCE_WG)
|
||||
} else {
|
||||
pathtag_wgs
|
||||
};
|
||||
let reduced_buf = ResourceProxy::new_buf(reduced_size as u64 * TAG_MONOID_FULL_SIZE);
|
||||
// TODO: really only need pathtag_wgs - 1
|
||||
recording.dispatch(
|
||||
shaders.pathtag_reduce,
|
||||
(pathtag_wgs as u32, 1, 1),
|
||||
[config_buf, scene_buf, reduced_buf],
|
||||
);
|
||||
let mut pathtag_parent = reduced_buf;
|
||||
if pathtag_large {
|
||||
let reduced2_size = shaders::PATHTAG_REDUCE_WG as usize;
|
||||
let reduced2_buf = ResourceProxy::new_buf(reduced2_size as u64 * TAG_MONOID_FULL_SIZE);
|
||||
recording.dispatch(
|
||||
shaders.pathtag_reduce2,
|
||||
(reduced2_size as u32, 1, 1),
|
||||
[reduced_buf, reduced2_buf],
|
||||
);
|
||||
let reduced_scan_buf = ResourceProxy::new_buf(pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE);
|
||||
recording.dispatch(
|
||||
shaders.pathtag_scan1,
|
||||
(reduced_size as u32 / shaders::PATHTAG_REDUCE_WG, 1, 1),
|
||||
[reduced_buf, reduced2_buf, reduced_scan_buf],
|
||||
);
|
||||
pathtag_parent = reduced_scan_buf;
|
||||
}
|
||||
|
||||
let tagmonoid_buf = ResourceProxy::new_buf(
|
||||
pathtag_wgs as u64 * shaders::PATHTAG_REDUCE_WG as u64 * TAG_MONOID_FULL_SIZE,
|
||||
);
|
||||
let pathtag_scan = if pathtag_large {
|
||||
shaders.pathtag_scan_large
|
||||
} else {
|
||||
shaders.pathtag_scan
|
||||
};
|
||||
recording.dispatch(
|
||||
shaders.pathtag_scan,
|
||||
pathtag_scan,
|
||||
(pathtag_wgs as u32, 1, 1),
|
||||
[config_buf, scene_buf, reduced_buf, tagmonoid_buf],
|
||||
[config_buf, scene_buf, pathtag_parent, tagmonoid_buf],
|
||||
);
|
||||
let drawobj_wgs = (n_drawobj + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG;
|
||||
let path_bbox_buf = ResourceProxy::new_buf(n_path as u64 * PATH_BBOX_SIZE);
|
||||
|
|
|
@ -47,7 +47,10 @@ pub struct Shaders {
|
|||
// Shaders for the full pipeline
|
||||
pub struct FullShaders {
|
||||
pub pathtag_reduce: ShaderId,
|
||||
pub pathtag_reduce2: ShaderId,
|
||||
pub pathtag_scan1: ShaderId,
|
||||
pub pathtag_scan: ShaderId,
|
||||
pub pathtag_scan_large: ShaderId,
|
||||
pub bbox_clear: ShaderId,
|
||||
pub pathseg: ShaderId,
|
||||
pub draw_reduce: ShaderId,
|
||||
|
@ -129,12 +132,39 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
|
|||
let empty = HashSet::new();
|
||||
let mut full_config = HashSet::new();
|
||||
full_config.insert("full".into());
|
||||
let mut small_config = HashSet::new();
|
||||
small_config.insert("full".into());
|
||||
small_config.insert("small".into());
|
||||
let pathtag_reduce = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(shader!("pathtag_reduce"), &full_config, &imports).into(),
|
||||
&[BindType::Uniform, BindType::BufReadOnly, BindType::Buffer],
|
||||
)?;
|
||||
let pathtag_reduce2 = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(shader!("pathtag_reduce2"), &full_config, &imports).into(),
|
||||
&[BindType::BufReadOnly, BindType::Buffer],
|
||||
)?;
|
||||
let pathtag_scan1 = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(shader!("pathtag_scan1"), &full_config, &imports).into(),
|
||||
&[
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Buffer,
|
||||
],
|
||||
)?;
|
||||
let pathtag_scan = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(shader!("pathtag_scan"), &small_config, &imports).into(),
|
||||
&[
|
||||
BindType::Uniform,
|
||||
BindType::BufReadOnly,
|
||||
BindType::BufReadOnly,
|
||||
BindType::Buffer,
|
||||
],
|
||||
)?;
|
||||
let pathtag_scan_large = engine.add_shader(
|
||||
device,
|
||||
preprocess::preprocess(shader!("pathtag_scan"), &full_config, &imports).into(),
|
||||
&[
|
||||
|
@ -278,7 +308,10 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
|
|||
)?;
|
||||
Ok(FullShaders {
|
||||
pathtag_reduce,
|
||||
pathtag_reduce2,
|
||||
pathtag_scan,
|
||||
pathtag_scan1,
|
||||
pathtag_scan_large,
|
||||
bbox_clear,
|
||||
pathseg,
|
||||
draw_reduce,
|
||||
|
|
Loading…
Add table
Reference in a new issue