mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-22 09:26:33 +11:00
Support for larger pathtags
Previously there was a limit of 256k pathtags in a scene, due to the need for multi-dispatch prefix sum for the pathtag monoid. This patch increases the limit to 64M, which ought to be enough for most applications. It works by having 4 dispatches for the pathtag prefix sum: 2 to reduce, then 2 to scan.
This commit is contained in:
parent
57d79bdf1f
commit
d94257a7c5
5 changed files with 181 additions and 7 deletions
40
shader/pathtag_reduce2.wgsl
Normal file
40
shader/pathtag_reduce2.wgsl
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
|
// This shader is the second stage of reduction for the pathtag
|
||||||
|
// monoid scan, needed when the number of tags is large.
|
||||||
|
|
||||||
|
#import config
|
||||||
|
#import pathtag
|
||||||
|
|
||||||
|
@group(0) @binding(0)
|
||||||
|
var<storage> reduced_in: array<TagMonoid>;
|
||||||
|
|
||||||
|
@group(0) @binding(1)
|
||||||
|
var<storage, read_write> reduced: array<TagMonoid>;
|
||||||
|
|
||||||
|
let LG_WG_SIZE = 8u;
|
||||||
|
let WG_SIZE = 256u;
|
||||||
|
|
||||||
|
var<workgroup> sh_scratch: array<TagMonoid, WG_SIZE>;
|
||||||
|
|
||||||
|
@compute @workgroup_size(256)
|
||||||
|
fn main(
|
||||||
|
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||||
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||||
|
) {
|
||||||
|
let ix = global_id.x;
|
||||||
|
var agg = reduced_in[ix];
|
||||||
|
sh_scratch[local_id.x] = agg;
|
||||||
|
for (var i = 0u; i < firstTrailingBit(WG_SIZE); i += 1u) {
|
||||||
|
workgroupBarrier();
|
||||||
|
if local_id.x + (1u << i) < WG_SIZE {
|
||||||
|
let other = sh_scratch[local_id.x + (1u << i)];
|
||||||
|
agg = combine_tag_monoid(agg, other);
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
sh_scratch[local_id.x] = agg;
|
||||||
|
}
|
||||||
|
if local_id.x == 0u {
|
||||||
|
reduced[ix >> LG_WG_SIZE] = agg;
|
||||||
|
}
|
||||||
|
}
|
|
@ -18,7 +18,9 @@ var<storage, read_write> tag_monoids: array<TagMonoid>;
|
||||||
let LG_WG_SIZE = 8u;
|
let LG_WG_SIZE = 8u;
|
||||||
let WG_SIZE = 256u;
|
let WG_SIZE = 256u;
|
||||||
|
|
||||||
|
#ifdef small
|
||||||
var<workgroup> sh_parent: array<TagMonoid, WG_SIZE>;
|
var<workgroup> sh_parent: array<TagMonoid, WG_SIZE>;
|
||||||
|
#endif
|
||||||
// These could be combined?
|
// These could be combined?
|
||||||
var<workgroup> sh_monoid: array<TagMonoid, WG_SIZE>;
|
var<workgroup> sh_monoid: array<TagMonoid, WG_SIZE>;
|
||||||
|
|
||||||
|
@ -28,6 +30,7 @@ fn main(
|
||||||
@builtin(local_invocation_id) local_id: vec3<u32>,
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||||
@builtin(workgroup_id) wg_id: vec3<u32>,
|
@builtin(workgroup_id) wg_id: vec3<u32>,
|
||||||
) {
|
) {
|
||||||
|
#ifdef small
|
||||||
var agg = tag_monoid_identity();
|
var agg = tag_monoid_identity();
|
||||||
if local_id.x < wg_id.x {
|
if local_id.x < wg_id.x {
|
||||||
agg = reduced[local_id.x];
|
agg = reduced[local_id.x];
|
||||||
|
@ -42,22 +45,27 @@ fn main(
|
||||||
workgroupBarrier();
|
workgroupBarrier();
|
||||||
sh_parent[local_id.x] = agg;
|
sh_parent[local_id.x] = agg;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
let ix = global_id.x;
|
let ix = global_id.x;
|
||||||
let tag_word = scene[config.pathtag_base + ix];
|
let tag_word = scene[config.pathtag_base + ix];
|
||||||
agg = reduce_tag(tag_word);
|
var agg_part = reduce_tag(tag_word);
|
||||||
sh_monoid[local_id.x] = agg;
|
sh_monoid[local_id.x] = agg_part;
|
||||||
for (var i = 0u; i < LG_WG_SIZE; i += 1u) {
|
for (var i = 0u; i < LG_WG_SIZE; i += 1u) {
|
||||||
workgroupBarrier();
|
workgroupBarrier();
|
||||||
if local_id.x >= 1u << i {
|
if local_id.x >= 1u << i {
|
||||||
let other = sh_monoid[local_id.x - (1u << i)];
|
let other = sh_monoid[local_id.x - (1u << i)];
|
||||||
agg = combine_tag_monoid(other, agg);
|
agg_part = combine_tag_monoid(other, agg_part);
|
||||||
}
|
}
|
||||||
workgroupBarrier();
|
workgroupBarrier();
|
||||||
sh_monoid[local_id.x] = agg;
|
sh_monoid[local_id.x] = agg_part;
|
||||||
}
|
}
|
||||||
// prefix up to this workgroup
|
// prefix up to this workgroup
|
||||||
|
#ifdef small
|
||||||
var tm = sh_parent[0];
|
var tm = sh_parent[0];
|
||||||
|
#else
|
||||||
|
var tm = reduced[wg_id.x];
|
||||||
|
#endif
|
||||||
if local_id.x > 0u {
|
if local_id.x > 0u {
|
||||||
tm = combine_tag_monoid(tm, sh_monoid[local_id.x - 1u]);
|
tm = combine_tag_monoid(tm, sh_monoid[local_id.x - 1u]);
|
||||||
}
|
}
|
||||||
|
|
65
shader/pathtag_scan1.wgsl
Normal file
65
shader/pathtag_scan1.wgsl
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||||
|
|
||||||
|
// This shader computes the scan of reduced tag monoids given
|
||||||
|
// two levels of reduction.
|
||||||
|
|
||||||
|
#import config
|
||||||
|
#import pathtag
|
||||||
|
|
||||||
|
@group(0) @binding(0)
|
||||||
|
var<storage> reduced: array<TagMonoid>;
|
||||||
|
|
||||||
|
@group(0) @binding(1)
|
||||||
|
var<storage> reduced2: array<TagMonoid>;
|
||||||
|
|
||||||
|
@group(0) @binding(2)
|
||||||
|
var<storage, read_write> tag_monoids: array<TagMonoid>;
|
||||||
|
|
||||||
|
let LG_WG_SIZE = 8u;
|
||||||
|
let WG_SIZE = 256u;
|
||||||
|
|
||||||
|
var<workgroup> sh_parent: array<TagMonoid, WG_SIZE>;
|
||||||
|
// These could be combined?
|
||||||
|
var<workgroup> sh_monoid: array<TagMonoid, WG_SIZE>;
|
||||||
|
|
||||||
|
@compute @workgroup_size(256)
|
||||||
|
fn main(
|
||||||
|
@builtin(global_invocation_id) global_id: vec3<u32>,
|
||||||
|
@builtin(local_invocation_id) local_id: vec3<u32>,
|
||||||
|
@builtin(workgroup_id) wg_id: vec3<u32>,
|
||||||
|
) {
|
||||||
|
var agg = tag_monoid_identity();
|
||||||
|
if local_id.x < wg_id.x {
|
||||||
|
agg = reduced2[local_id.x];
|
||||||
|
}
|
||||||
|
sh_parent[local_id.x] = agg;
|
||||||
|
for (var i = 0u; i < LG_WG_SIZE; i += 1u) {
|
||||||
|
workgroupBarrier();
|
||||||
|
if local_id.x + (1u << i) < WG_SIZE {
|
||||||
|
let other = sh_parent[local_id.x + (1u << i)];
|
||||||
|
agg = combine_tag_monoid(agg, other);
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
sh_parent[local_id.x] = agg;
|
||||||
|
}
|
||||||
|
|
||||||
|
let ix = global_id.x;
|
||||||
|
agg = reduced[ix];
|
||||||
|
sh_monoid[local_id.x] = agg;
|
||||||
|
for (var i = 0u; i < LG_WG_SIZE; i += 1u) {
|
||||||
|
workgroupBarrier();
|
||||||
|
if local_id.x >= 1u << i {
|
||||||
|
let other = sh_monoid[local_id.x - (1u << i)];
|
||||||
|
agg = combine_tag_monoid(other, agg);
|
||||||
|
}
|
||||||
|
workgroupBarrier();
|
||||||
|
sh_monoid[local_id.x] = agg;
|
||||||
|
}
|
||||||
|
// prefix up to this workgroup
|
||||||
|
var tm = sh_parent[0];
|
||||||
|
if local_id.x > 0u {
|
||||||
|
tm = combine_tag_monoid(tm, sh_monoid[local_id.x - 1u]);
|
||||||
|
}
|
||||||
|
// exclusive prefix sum, granularity of 4 tag bytes * workgroup size
|
||||||
|
tag_monoids[ix] = tm;
|
||||||
|
}
|
|
@ -236,21 +236,49 @@ pub fn render_full(
|
||||||
let config_buf = ResourceProxy::Buf(recording.upload_uniform(bytemuck::bytes_of(&config)));
|
let config_buf = ResourceProxy::Buf(recording.upload_uniform(bytemuck::bytes_of(&config)));
|
||||||
|
|
||||||
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
|
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
|
||||||
let reduced_buf = ResourceProxy::new_buf(pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE);
|
let pathtag_large = pathtag_wgs > shaders::PATHTAG_REDUCE_WG as usize;
|
||||||
|
let reduced_size = if pathtag_large {
|
||||||
|
align_up(pathtag_wgs, shaders::PATHTAG_REDUCE_WG)
|
||||||
|
} else {
|
||||||
|
pathtag_wgs
|
||||||
|
};
|
||||||
|
let reduced_buf = ResourceProxy::new_buf(reduced_size as u64 * TAG_MONOID_FULL_SIZE);
|
||||||
// TODO: really only need pathtag_wgs - 1
|
// TODO: really only need pathtag_wgs - 1
|
||||||
recording.dispatch(
|
recording.dispatch(
|
||||||
shaders.pathtag_reduce,
|
shaders.pathtag_reduce,
|
||||||
(pathtag_wgs as u32, 1, 1),
|
(pathtag_wgs as u32, 1, 1),
|
||||||
[config_buf, scene_buf, reduced_buf],
|
[config_buf, scene_buf, reduced_buf],
|
||||||
);
|
);
|
||||||
|
let mut pathtag_parent = reduced_buf;
|
||||||
|
if pathtag_large {
|
||||||
|
let reduced2_size = shaders::PATHTAG_REDUCE_WG as usize;
|
||||||
|
let reduced2_buf = ResourceProxy::new_buf(reduced2_size as u64 * TAG_MONOID_FULL_SIZE);
|
||||||
|
recording.dispatch(
|
||||||
|
shaders.pathtag_reduce2,
|
||||||
|
(reduced2_size as u32, 1, 1),
|
||||||
|
[reduced_buf, reduced2_buf],
|
||||||
|
);
|
||||||
|
let reduced_scan_buf = ResourceProxy::new_buf(pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE);
|
||||||
|
recording.dispatch(
|
||||||
|
shaders.pathtag_scan1,
|
||||||
|
(reduced_size as u32 / shaders::PATHTAG_REDUCE_WG, 1, 1),
|
||||||
|
[reduced_buf, reduced2_buf, reduced_scan_buf],
|
||||||
|
);
|
||||||
|
pathtag_parent = reduced_scan_buf;
|
||||||
|
}
|
||||||
|
|
||||||
let tagmonoid_buf = ResourceProxy::new_buf(
|
let tagmonoid_buf = ResourceProxy::new_buf(
|
||||||
pathtag_wgs as u64 * shaders::PATHTAG_REDUCE_WG as u64 * TAG_MONOID_FULL_SIZE,
|
pathtag_wgs as u64 * shaders::PATHTAG_REDUCE_WG as u64 * TAG_MONOID_FULL_SIZE,
|
||||||
);
|
);
|
||||||
|
let pathtag_scan = if pathtag_large {
|
||||||
|
shaders.pathtag_scan_large
|
||||||
|
} else {
|
||||||
|
shaders.pathtag_scan
|
||||||
|
};
|
||||||
recording.dispatch(
|
recording.dispatch(
|
||||||
shaders.pathtag_scan,
|
pathtag_scan,
|
||||||
(pathtag_wgs as u32, 1, 1),
|
(pathtag_wgs as u32, 1, 1),
|
||||||
[config_buf, scene_buf, reduced_buf, tagmonoid_buf],
|
[config_buf, scene_buf, pathtag_parent, tagmonoid_buf],
|
||||||
);
|
);
|
||||||
let drawobj_wgs = (n_drawobj + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG;
|
let drawobj_wgs = (n_drawobj + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG;
|
||||||
let path_bbox_buf = ResourceProxy::new_buf(n_path as u64 * PATH_BBOX_SIZE);
|
let path_bbox_buf = ResourceProxy::new_buf(n_path as u64 * PATH_BBOX_SIZE);
|
||||||
|
|
|
@ -47,7 +47,10 @@ pub struct Shaders {
|
||||||
// Shaders for the full pipeline
|
// Shaders for the full pipeline
|
||||||
pub struct FullShaders {
|
pub struct FullShaders {
|
||||||
pub pathtag_reduce: ShaderId,
|
pub pathtag_reduce: ShaderId,
|
||||||
|
pub pathtag_reduce2: ShaderId,
|
||||||
|
pub pathtag_scan1: ShaderId,
|
||||||
pub pathtag_scan: ShaderId,
|
pub pathtag_scan: ShaderId,
|
||||||
|
pub pathtag_scan_large: ShaderId,
|
||||||
pub bbox_clear: ShaderId,
|
pub bbox_clear: ShaderId,
|
||||||
pub pathseg: ShaderId,
|
pub pathseg: ShaderId,
|
||||||
pub draw_reduce: ShaderId,
|
pub draw_reduce: ShaderId,
|
||||||
|
@ -129,12 +132,39 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
|
||||||
let empty = HashSet::new();
|
let empty = HashSet::new();
|
||||||
let mut full_config = HashSet::new();
|
let mut full_config = HashSet::new();
|
||||||
full_config.insert("full".into());
|
full_config.insert("full".into());
|
||||||
|
let mut small_config = HashSet::new();
|
||||||
|
small_config.insert("full".into());
|
||||||
|
small_config.insert("small".into());
|
||||||
let pathtag_reduce = engine.add_shader(
|
let pathtag_reduce = engine.add_shader(
|
||||||
device,
|
device,
|
||||||
preprocess::preprocess(shader!("pathtag_reduce"), &full_config, &imports).into(),
|
preprocess::preprocess(shader!("pathtag_reduce"), &full_config, &imports).into(),
|
||||||
&[BindType::Uniform, BindType::BufReadOnly, BindType::Buffer],
|
&[BindType::Uniform, BindType::BufReadOnly, BindType::Buffer],
|
||||||
)?;
|
)?;
|
||||||
|
let pathtag_reduce2 = engine.add_shader(
|
||||||
|
device,
|
||||||
|
preprocess::preprocess(shader!("pathtag_reduce2"), &full_config, &imports).into(),
|
||||||
|
&[BindType::BufReadOnly, BindType::Buffer],
|
||||||
|
)?;
|
||||||
|
let pathtag_scan1 = engine.add_shader(
|
||||||
|
device,
|
||||||
|
preprocess::preprocess(shader!("pathtag_scan1"), &full_config, &imports).into(),
|
||||||
|
&[
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::Buffer,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
let pathtag_scan = engine.add_shader(
|
let pathtag_scan = engine.add_shader(
|
||||||
|
device,
|
||||||
|
preprocess::preprocess(shader!("pathtag_scan"), &small_config, &imports).into(),
|
||||||
|
&[
|
||||||
|
BindType::Uniform,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::BufReadOnly,
|
||||||
|
BindType::Buffer,
|
||||||
|
],
|
||||||
|
)?;
|
||||||
|
let pathtag_scan_large = engine.add_shader(
|
||||||
device,
|
device,
|
||||||
preprocess::preprocess(shader!("pathtag_scan"), &full_config, &imports).into(),
|
preprocess::preprocess(shader!("pathtag_scan"), &full_config, &imports).into(),
|
||||||
&[
|
&[
|
||||||
|
@ -278,7 +308,10 @@ pub fn full_shaders(device: &Device, engine: &mut Engine) -> Result<FullShaders,
|
||||||
)?;
|
)?;
|
||||||
Ok(FullShaders {
|
Ok(FullShaders {
|
||||||
pathtag_reduce,
|
pathtag_reduce,
|
||||||
|
pathtag_reduce2,
|
||||||
pathtag_scan,
|
pathtag_scan,
|
||||||
|
pathtag_scan1,
|
||||||
|
pathtag_scan_large,
|
||||||
bbox_clear,
|
bbox_clear,
|
||||||
pathseg,
|
pathseg,
|
||||||
draw_reduce,
|
draw_reduce,
|
||||||
|
|
Loading…
Add table
Reference in a new issue