Merge pull request #264 from linebender/async

Experimental async wiring
This commit is contained in:
Raph Levien 2023-01-27 12:43:48 -08:00 committed by GitHub
commit 6a184244e6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 650 additions and 364 deletions

View file

@ -22,6 +22,7 @@ use std::{borrow::Cow, path::PathBuf, time::Instant};
use clap::Parser;
use vello::{
block_on_wgpu,
kurbo::{Affine, Vec2},
util::RenderContext,
Renderer, Scene, SceneBuilder,
@ -187,6 +188,24 @@ async fn run(event_loop: EventLoop<UserEvent>, window: Window, args: Args) {
.surface
.get_current_texture()
.expect("failed to get surface texture");
#[cfg(not(target_arch = "wasm32"))]
{
block_on_wgpu(
&device_handle.device,
renderer.render_to_surface_async(
&device_handle.device,
&device_handle.queue,
&scene,
&surface_texture,
width,
height,
),
)
.expect("failed to render to surface");
}
// Note: in the wasm case, we're currently not running the robust
// pipeline, as it requires more async wiring for the readback.
#[cfg(target_arch = "wasm32")]
renderer
.render_to_surface(
&device_handle.device,
@ -198,7 +217,7 @@ async fn run(event_loop: EventLoop<UserEvent>, window: Window, args: Args) {
)
.expect("failed to render to surface");
surface_texture.present();
device_handle.device.poll(wgpu::Maintain::Wait);
device_handle.device.poll(wgpu::Maintain::Poll);
}
Event::UserEvent(event) => match event {
#[cfg(not(target_arch = "wasm32"))]

View file

@ -16,17 +16,14 @@
use std::{
borrow::Cow,
collections::{hash_map::Entry, HashMap},
collections::{hash_map::Entry, HashMap, HashSet},
num::{NonZeroU32, NonZeroU64},
sync::atomic::{AtomicU64, Ordering},
};
use futures_intrusive::channel::shared::GenericOneshotReceiver;
use parking_lot::RawMutex;
use wgpu::{
util::DeviceExt, BindGroup, BindGroupLayout, Buffer, BufferAsyncError, BufferSlice,
BufferUsages, BufferView, ComputePipeline, Device, Queue, Texture, TextureAspect,
TextureFormat, TextureUsages, TextureView, TextureViewDimension,
util::DeviceExt, BindGroup, BindGroupLayout, Buffer, BufferUsages, ComputePipeline, Device,
Queue, Texture, TextureAspect, TextureFormat, TextureUsages, TextureView, TextureViewDimension,
};
pub type Error = Box<dyn std::error::Error>;
@ -42,6 +39,8 @@ static ID_COUNTER: AtomicU64 = AtomicU64::new(0);
pub struct Engine {
shaders: Vec<Shader>,
pool: ResourcePool,
bind_map: BindMap,
downloads: HashMap<Id, Buffer>,
}
struct Shader {
@ -96,11 +95,8 @@ pub enum Command {
Dispatch(ShaderId, (u32, u32, u32), Vec<ResourceProxy>),
Download(BufProxy),
Clear(BufProxy, u64, Option<NonZeroU64>),
}
#[derive(Default)]
pub struct Downloads {
buf_map: HashMap<Id, Buffer>,
FreeBuf(BufProxy),
FreeImage(ImageProxy),
}
/// The type of resource that will be bound to a slot in a shader.
@ -149,6 +145,8 @@ impl Engine {
Engine {
shaders: vec![],
pool: Default::default(),
bind_map: Default::default(),
downloads: Default::default(),
}
}
@ -249,9 +247,9 @@ impl Engine {
queue: &Queue,
recording: &Recording,
external_resources: &[ExternalResource],
) -> Result<Downloads, Error> {
let mut bind_map = BindMap::default();
let mut downloads = Downloads::default();
) -> Result<(), Error> {
let mut free_bufs: HashSet<Id> = Default::default();
let mut free_images: HashSet<Id> = Default::default();
let mut encoder = device.create_command_encoder(&Default::default());
for command in &recording.commands {
@ -259,18 +257,22 @@ impl Engine {
Command::Upload(buf_proxy, bytes) => {
let usage =
BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE;
let buf = self.pool.get_buf(buf_proxy, usage, device);
let buf = self
.pool
.get_buf(buf_proxy.size, buf_proxy.name, usage, device);
// TODO: if buffer is newly created, might be better to make it mapped at creation
// and copy. However, we expect reuse will be most common.
queue.write_buffer(&buf, 0, bytes);
bind_map.insert_buf(buf_proxy, buf);
self.bind_map.insert_buf(buf_proxy, buf);
}
Command::UploadUniform(buf_proxy, bytes) => {
let usage = BufferUsages::UNIFORM | BufferUsages::COPY_DST;
// Same consideration as above
let buf = self.pool.get_buf(buf_proxy, usage, device);
let buf = self
.pool
.get_buf(buf_proxy.size, buf_proxy.name, usage, device);
queue.write_buffer(&buf, 0, bytes);
bind_map.insert_buf(buf_proxy, buf);
self.bind_map.insert_buf(buf_proxy, buf);
}
Command::UploadImage(image_proxy, bytes) => {
let buf = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
@ -322,12 +324,13 @@ impl Engine {
depth_or_array_layers: 1,
},
);
bind_map.insert_image(image_proxy.id, texture, texture_view)
self.bind_map
.insert_image(image_proxy.id, texture, texture_view)
}
Command::Dispatch(shader_id, wg_size, bindings) => {
// println!("dispatching {:?} with {} bindings", wg_size, bindings.len());
let shader = &self.shaders[shader_id.0];
let bind_group = bind_map.create_bind_group(
let bind_group = self.bind_map.create_bind_group(
device,
&shader.bind_group_layout,
bindings,
@ -340,18 +343,20 @@ impl Engine {
cpass.dispatch_workgroups(wg_size.0, wg_size.1, wg_size.2);
}
Command::Download(proxy) => {
let src_buf = bind_map.buf_map.get(&proxy.id).ok_or("buffer not in map")?;
let buf = device.create_buffer(&wgpu::BufferDescriptor {
label: Some(proxy.name),
size: proxy.size,
usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
mapped_at_creation: false,
});
let src_buf = self
.bind_map
.buf_map
.get(&proxy.id)
.ok_or("buffer not in map")?;
let usage = BufferUsages::MAP_READ | BufferUsages::COPY_DST;
let buf = self.pool.get_buf(proxy.size, "download", usage, device);
encoder.copy_buffer_to_buffer(&src_buf.buffer, 0, &buf, 0, proxy.size);
downloads.buf_map.insert(proxy.id, buf);
self.downloads.insert(proxy.id, buf);
}
Command::Clear(proxy, offset, size) => {
let buffer = bind_map.get_or_create(*proxy, device, &mut self.pool)?;
let buffer = self
.bind_map
.get_or_create(*proxy, device, &mut self.pool)?;
#[cfg(not(target_arch = "wasm32"))]
encoder.clear_buffer(buffer, *offset, *size);
#[cfg(target_arch = "wasm32")]
@ -366,11 +371,42 @@ impl Engine {
queue.write_buffer(buffer, *offset, &zeros);
}
}
Command::FreeBuf(proxy) => {
free_bufs.insert(proxy.id);
}
Command::FreeImage(proxy) => {
free_images.insert(proxy.id);
}
}
}
queue.submit(Some(encoder.finish()));
self.pool.reap_bindmap(bind_map);
Ok(downloads)
for id in free_bufs {
if let Some(buf) = self.bind_map.buf_map.remove(&id) {
let props = BufferProperties {
size: buf.buffer.size(),
usages: buf.buffer.usage(),
#[cfg(feature = "buffer_labels")]
name: buf.label,
};
self.pool.bufs.entry(props).or_default().push(buf.buffer);
}
}
for id in free_images {
if let Some((texture, view)) = self.bind_map.image_map.remove(&id) {
// TODO: have a pool to avoid needless re-allocation
drop(texture);
drop(view);
}
}
Ok(())
}
pub fn get_download(&self, buf: BufProxy) -> Option<&Buffer> {
self.downloads.get(&buf.id)
}
pub fn free_download(&mut self, buf: BufProxy) {
self.downloads.remove(&buf.id);
}
}
@ -418,6 +454,10 @@ impl Recording {
));
}
/// Prepare a buffer for downloading.
///
/// Currently this copies to a download buffer. The original buffer can be freed
/// immediately after.
pub fn download(&mut self, buf: BufProxy) {
self.push(Command::Download(buf));
}
@ -425,6 +465,21 @@ impl Recording {
pub fn clear_all(&mut self, buf: BufProxy) {
self.push(Command::Clear(buf, 0, None));
}
pub fn free_buf(&mut self, buf: BufProxy) {
self.push(Command::FreeBuf(buf));
}
pub fn free_image(&mut self, image: ImageProxy) {
self.push(Command::FreeImage(image));
}
pub fn free_resource(&mut self, resource: ResourceProxy) {
match resource {
ResourceProxy::Buf(buf) => self.free_buf(buf),
ResourceProxy::Image(image) => self.free_image(image),
}
}
}
impl BufProxy {
@ -565,7 +620,7 @@ impl BindMap {
if let Entry::Vacant(v) = self.buf_map.entry(proxy.id) {
let usage =
BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE;
let buf = pool.get_buf(&proxy, usage, device);
let buf = pool.get_buf(proxy.size, proxy.name, usage, device);
v.insert(BindMapBuffer {
buffer: buf,
label: proxy.name,
@ -647,7 +702,7 @@ impl BindMap {
Entry::Occupied(occupied) => Ok(&occupied.into_mut().buffer),
Entry::Vacant(vacant) => {
let usage = BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE;
let buf = pool.get_buf(&proxy, usage, device);
let buf = pool.get_buf(proxy.size, proxy.name, usage, device);
Ok(&vacant
.insert(BindMapBuffer {
buffer: buf,
@ -659,53 +714,23 @@ impl BindMap {
}
}
pub struct DownloadsMapped<'a>(
HashMap<
Id,
(
BufferSlice<'a>,
GenericOneshotReceiver<RawMutex, Result<(), BufferAsyncError>>,
),
>,
);
impl Downloads {
// Discussion: should API change so we get one buffer, rather than mapping all?
pub fn map(&self) -> DownloadsMapped {
let mut map = HashMap::new();
for (id, buf) in &self.buf_map {
let buf_slice = buf.slice(..);
let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel();
buf_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
map.insert(*id, (buf_slice, receiver));
}
DownloadsMapped(map)
}
}
impl<'a> DownloadsMapped<'a> {
pub async fn get_mapped(&self, proxy: BufProxy) -> Result<BufferView, Error> {
let (slice, recv) = self.0.get(&proxy.id).ok_or("buffer not in map")?;
if let Some(recv_result) = recv.receive().await {
recv_result?;
} else {
return Err("channel was closed".into());
}
Ok(slice.get_mapped_range())
}
}
const SIZE_CLASS_BITS: u32 = 1;
impl ResourcePool {
/// Get a buffer from the pool or create one.
fn get_buf(&mut self, proxy: &BufProxy, usage: BufferUsages, device: &Device) -> Buffer {
let rounded_size = Self::size_class(proxy.size, SIZE_CLASS_BITS);
fn get_buf(
&mut self,
size: u64,
name: &'static str,
usage: BufferUsages,
device: &Device,
) -> Buffer {
let rounded_size = Self::size_class(size, SIZE_CLASS_BITS);
let props = BufferProperties {
size: rounded_size,
usages: usage,
#[cfg(feature = "buffer_labels")]
name: proxy.name,
name: name,
};
if let Some(buf_vec) = self.bufs.get_mut(&props) {
if let Some(buf) = buf_vec.pop() {
@ -714,7 +739,7 @@ impl ResourcePool {
}
device.create_buffer(&wgpu::BufferDescriptor {
#[cfg(feature = "buffer_labels")]
label: Some(proxy.name),
label: Some(name),
#[cfg(not(feature = "buffer_labels"))]
label: None,
size: rounded_size,
@ -723,19 +748,6 @@ impl ResourcePool {
})
}
fn reap_bindmap(&mut self, bind_map: BindMap) {
for (_id, buf) in bind_map.buf_map {
let size = buf.buffer.size();
let props = BufferProperties {
size,
usages: buf.buffer.usage(),
#[cfg(feature = "buffer_labels")]
name: buf.label,
};
self.bufs.entry(props).or_default().push(buf.buffer);
}
}
/// Quantize a size up to the nearest size class.
fn size_class(x: u64, bits: u32) -> u64 {
if x > 1 << bits {

View file

@ -29,9 +29,11 @@ pub mod encoding;
pub mod glyph;
pub mod util;
use render::Render;
pub use scene::{Scene, SceneBuilder, SceneFragment};
pub use util::block_on_wgpu;
use engine::{Engine, ExternalResource};
use engine::{Engine, ExternalResource, Recording};
use shaders::FullShaders;
use wgpu::{Device, Queue, SurfaceTexture, TextureFormat, TextureView};
@ -83,8 +85,7 @@ impl Renderer {
*target.as_image().unwrap(),
texture,
)];
let _ = self
.engine
self.engine
.run_recording(device, queue, &recording, &external_resources)?;
Ok(())
}
@ -164,6 +165,105 @@ impl Renderer {
self.shaders = shaders;
Ok(())
}
/// Renders a scene to the target texture.
///
/// The texture is assumed to be of the specified dimensions and have been created with
/// the [wgpu::TextureFormat::Rgba8Unorm] format and the [wgpu::TextureUsages::STORAGE_BINDING]
/// flag set.
pub async fn render_to_texture_async(
&mut self,
device: &Device,
queue: &Queue,
scene: &Scene,
texture: &TextureView,
width: u32,
height: u32,
) -> Result<()> {
let mut render = Render::new();
let encoding = scene.data();
let recording = render.render_encoding_coarse(encoding, &self.shaders, width, height, true);
let target = render.out_image();
let bump_buf = render.bump_buf();
self.engine.run_recording(device, queue, &recording, &[])?;
if let Some(bump_buf) = self.engine.get_download(bump_buf) {
let buf_slice = bump_buf.slice(..);
let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel();
buf_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
if let Some(recv_result) = receiver.receive().await {
recv_result?;
} else {
return Err("channel was closed".into());
}
let mapped = buf_slice.get_mapped_range();
println!("{:?}", bytemuck::cast_slice::<_, u32>(&mapped));
}
// TODO: apply logic to determine whether we need to rerun coarse, and also
// allocate the blend stack as needed.
self.engine.free_download(bump_buf);
// Maybe clear to reuse allocation?
let mut recording = Recording::default();
render.record_fine(&self.shaders, &mut recording);
let external_resources = [ExternalResource::Image(target, texture)];
self.engine
.run_recording(device, queue, &recording, &external_resources)?;
Ok(())
}
pub async fn render_to_surface_async(
&mut self,
device: &Device,
queue: &Queue,
scene: &Scene,
surface: &SurfaceTexture,
width: u32,
height: u32,
) -> Result<()> {
let mut target = self
.target
.take()
.unwrap_or_else(|| TargetTexture::new(device, width, height));
// TODO: implement clever resizing semantics here to avoid thrashing the memory allocator
// during resize, specifically on metal.
if target.width != width || target.height != height {
target = TargetTexture::new(device, width, height);
}
self.render_to_texture_async(device, queue, scene, &target.view, width, height)
.await?;
let mut encoder =
device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
{
let surface_view = surface
.texture
.create_view(&wgpu::TextureViewDescriptor::default());
let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
label: None,
layout: &self.blit.bind_layout,
entries: &[wgpu::BindGroupEntry {
binding: 0,
resource: wgpu::BindingResource::TextureView(&target.view),
}],
});
let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
label: None,
color_attachments: &[Some(wgpu::RenderPassColorAttachment {
view: &surface_view,
resolve_target: None,
ops: wgpu::Operations {
load: wgpu::LoadOp::Clear(wgpu::Color::default()),
store: true,
},
})],
depth_stencil_attachment: None,
});
render_pass.set_pipeline(&self.blit.pipeline);
render_pass.set_bind_group(0, &bind_group, &[]);
render_pass.draw(0..6, 0..1);
}
queue.submit(Some(encoder.finish()));
self.target = Some(target);
Ok(())
}
}
struct TargetTexture {

View file

@ -9,6 +9,34 @@ use crate::{
Scene,
};
/// State for a render in progress.
pub struct Render {
/// Size of binning and info combined buffer in u32 units
binning_info_size: u32,
/// Size of tiles buf in tiles
tiles_size: u32,
/// Size of segments buf in segments
segments_size: u32,
/// Size of per-tile command list in u32 units
ptcl_size: u32,
width_in_tiles: u32,
height_in_tiles: u32,
fine: Option<FineResources>,
}
/// Resources produced by pipeline, needed for fine rasterization.
struct FineResources {
config_buf: ResourceProxy,
bump_buf: ResourceProxy,
tile_buf: ResourceProxy,
segments_buf: ResourceProxy,
ptcl_buf: ResourceProxy,
gradient_image: ResourceProxy,
info_bin_data_buf: ResourceProxy,
out_image: ImageProxy,
}
const TAG_MONOID_SIZE: u64 = 12;
const TAG_MONOID_FULL_SIZE: u64 = 20;
const PATH_BBOX_SIZE: u64 = 24;
@ -157,288 +185,389 @@ pub fn render_full(
render_encoding_full(scene.data(), shaders, width, height)
}
/// Create a single recording with both coarse and fine render stages.
///
/// This function is not recommended when the scene can be complex, as it does not
/// implement robust dynamic memory.
pub fn render_encoding_full(
encoding: &Encoding,
shaders: &FullShaders,
width: u32,
height: u32,
) -> (Recording, ResourceProxy) {
use crate::encoding::{resource::ResourceCache, PackedEncoding};
let mut recording = Recording::default();
let mut resources = ResourceCache::new();
let mut packed = PackedEncoding::default();
packed.pack(encoding, &mut resources);
let (ramp_data, ramps_width, ramps_height) = resources.ramps(packed.resources).unwrap();
let gradient_image = if encoding.patches.is_empty() {
ResourceProxy::new_image(1, 1, ImageFormat::Rgba8)
} else {
let data: &[u8] = bytemuck::cast_slice(ramp_data);
ResourceProxy::Image(recording.upload_image(
ramps_width,
ramps_height,
ImageFormat::Rgba8,
data,
))
};
// TODO: calculate for real when we do rectangles
let n_pathtag = encoding.path_tags.len();
let pathtag_padded = align_up(encoding.path_tags.len(), 4 * shaders::PATHTAG_REDUCE_WG);
let n_paths = encoding.n_paths;
let n_drawobj = n_paths;
let n_clip = encoding.n_clips;
let new_width = next_multiple_of(width, 16);
let new_height = next_multiple_of(height, 16);
let info_size = packed.layout.bin_data_start;
let config = crate::encoding::Config {
width_in_tiles: new_width / 16,
height_in_tiles: new_height / 16,
target_width: width,
target_height: height,
binning_size: ((1 << 20) / 4) - info_size,
tiles_size: (1 << 24) / TILE_SIZE as u32,
segments_size: (1 << 26) / SEGMENT_SIZE as u32,
ptcl_size: (1 << 25) / 4,
layout: packed.layout,
};
// println!("{:?}", config);
let scene_buf = ResourceProxy::Buf(recording.upload("scene", packed.data));
let config_buf =
ResourceProxy::Buf(recording.upload_uniform("config", bytemuck::bytes_of(&config)));
let info_bin_data_buf = ResourceProxy::new_buf(
(info_size + config.binning_size) as u64 * 4,
"info_bin_data_buf",
);
let tile_buf = ResourceProxy::new_buf(config.tiles_size as u64 * TILE_SIZE, "tile_buf");
let segments_buf =
ResourceProxy::new_buf(config.segments_size as u64 * SEGMENT_SIZE, "segments_buf");
let ptcl_buf = ResourceProxy::new_buf(config.ptcl_size as u64 * 4, "ptcl_buf");
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
let pathtag_large = pathtag_wgs > shaders::PATHTAG_REDUCE_WG as usize;
let reduced_size = if pathtag_large {
align_up(pathtag_wgs, shaders::PATHTAG_REDUCE_WG)
} else {
pathtag_wgs
};
let reduced_buf =
ResourceProxy::new_buf(reduced_size as u64 * TAG_MONOID_FULL_SIZE, "reduced_buf");
// TODO: really only need pathtag_wgs - 1
recording.dispatch(
shaders.pathtag_reduce,
(pathtag_wgs as u32, 1, 1),
[config_buf, scene_buf, reduced_buf],
);
let mut pathtag_parent = reduced_buf;
if pathtag_large {
let reduced2_size = shaders::PATHTAG_REDUCE_WG as usize;
let reduced2_buf =
ResourceProxy::new_buf(reduced2_size as u64 * TAG_MONOID_FULL_SIZE, "reduced2_buf");
recording.dispatch(
shaders.pathtag_reduce2,
(reduced2_size as u32, 1, 1),
[reduced_buf, reduced2_buf],
);
let reduced_scan_buf = ResourceProxy::new_buf(
pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE,
"reduced_scan_buf",
);
recording.dispatch(
shaders.pathtag_scan1,
(reduced_size as u32 / shaders::PATHTAG_REDUCE_WG, 1, 1),
[reduced_buf, reduced2_buf, reduced_scan_buf],
);
pathtag_parent = reduced_scan_buf;
}
let tagmonoid_buf = ResourceProxy::new_buf(
pathtag_wgs as u64 * shaders::PATHTAG_REDUCE_WG as u64 * TAG_MONOID_FULL_SIZE,
"tagmonoid_buf",
);
let pathtag_scan = if pathtag_large {
shaders.pathtag_scan_large
} else {
shaders.pathtag_scan
};
recording.dispatch(
pathtag_scan,
(pathtag_wgs as u32, 1, 1),
[config_buf, scene_buf, pathtag_parent, tagmonoid_buf],
);
let drawobj_wgs = (n_drawobj + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG;
let path_bbox_buf = ResourceProxy::new_buf(n_paths as u64 * PATH_BBOX_SIZE, "path_bbox_buf");
recording.dispatch(
shaders.bbox_clear,
(drawobj_wgs, 1, 1),
[config_buf, path_bbox_buf],
);
let cubic_buf = ResourceProxy::new_buf(n_pathtag as u64 * CUBIC_SIZE, "cubic_buf");
let path_coarse_wgs =
(n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
recording.dispatch(
shaders.pathseg,
(path_coarse_wgs, 1, 1),
[
config_buf,
scene_buf,
tagmonoid_buf,
path_bbox_buf,
cubic_buf,
],
);
let draw_reduced_buf =
ResourceProxy::new_buf(drawobj_wgs as u64 * DRAWMONOID_SIZE, "draw_reduced_buf");
recording.dispatch(
shaders.draw_reduce,
(drawobj_wgs, 1, 1),
[config_buf, scene_buf, draw_reduced_buf],
);
let draw_monoid_buf =
ResourceProxy::new_buf(n_drawobj as u64 * DRAWMONOID_SIZE, "draw_monoid_buf");
let clip_inp_buf =
ResourceProxy::new_buf(encoding.n_clips as u64 * CLIP_INP_SIZE, "clip_inp_buf");
recording.dispatch(
shaders.draw_leaf,
(drawobj_wgs, 1, 1),
[
config_buf,
scene_buf,
draw_reduced_buf,
path_bbox_buf,
draw_monoid_buf,
info_bin_data_buf,
clip_inp_buf,
],
);
let clip_el_buf = ResourceProxy::new_buf(encoding.n_clips as u64 * CLIP_EL_SIZE, "clip_el_buf");
let clip_bic_buf = ResourceProxy::new_buf(
(n_clip / shaders::CLIP_REDUCE_WG) as u64 * CLIP_BIC_SIZE,
"clip_bic_buf",
);
let clip_wg_reduce = n_clip.saturating_sub(1) / shaders::CLIP_REDUCE_WG;
if clip_wg_reduce > 0 {
recording.dispatch(
shaders.clip_reduce,
(clip_wg_reduce, 1, 1),
[
config_buf,
clip_inp_buf,
path_bbox_buf,
clip_bic_buf,
clip_el_buf,
],
);
}
let clip_wg = (n_clip + shaders::CLIP_REDUCE_WG - 1) / shaders::CLIP_REDUCE_WG;
let clip_bbox_buf = ResourceProxy::new_buf(n_clip as u64 * CLIP_BBOX_SIZE, "clip_bbox_buf");
if clip_wg > 0 {
recording.dispatch(
shaders.clip_leaf,
(clip_wg, 1, 1),
[
config_buf,
clip_inp_buf,
path_bbox_buf,
clip_bic_buf,
clip_el_buf,
draw_monoid_buf,
clip_bbox_buf,
],
);
}
let draw_bbox_buf = ResourceProxy::new_buf(n_paths as u64 * DRAW_BBOX_SIZE, "draw_bbox_buf");
let bump_buf = BufProxy::new(BUMP_SIZE, "bump_buf");
let width_in_bins = (config.width_in_tiles + 15) / 16;
let height_in_bins = (config.height_in_tiles + 15) / 16;
let bin_header_buf = ResourceProxy::new_buf(
(256 * drawobj_wgs) as u64 * BIN_HEADER_SIZE,
"bin_header_buf",
);
recording.clear_all(bump_buf);
let bump_buf = ResourceProxy::Buf(bump_buf);
recording.dispatch(
shaders.binning,
(drawobj_wgs, 1, 1),
[
config_buf,
draw_monoid_buf,
path_bbox_buf,
clip_bbox_buf,
draw_bbox_buf,
bump_buf,
info_bin_data_buf,
bin_header_buf,
],
);
// Note: this only needs to be rounded up because of the workaround to store the tile_offset
// in storage rather than workgroup memory.
let n_path_aligned = align_up(n_paths as usize, 256);
let path_buf = ResourceProxy::new_buf(n_path_aligned as u64 * PATH_SIZE, "path_buf");
let path_wgs = (n_paths + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG;
recording.dispatch(
shaders.tile_alloc,
(path_wgs, 1, 1),
[
config_buf,
scene_buf,
draw_bbox_buf,
bump_buf,
path_buf,
tile_buf,
],
);
recording.dispatch(
shaders.path_coarse,
(path_coarse_wgs, 1, 1),
[
config_buf,
scene_buf,
tagmonoid_buf,
cubic_buf,
path_buf,
bump_buf,
tile_buf,
segments_buf,
],
);
recording.dispatch(
shaders.backdrop,
(path_wgs, 1, 1),
[config_buf, path_buf, tile_buf],
);
recording.dispatch(
shaders.coarse,
(width_in_bins, height_in_bins, 1),
[
config_buf,
scene_buf,
draw_monoid_buf,
bin_header_buf,
info_bin_data_buf,
path_buf,
tile_buf,
bump_buf,
ptcl_buf,
],
);
let out_image = ImageProxy::new(width, height, ImageFormat::Rgba8);
recording.dispatch(
shaders.fine,
(config.width_in_tiles, config.height_in_tiles, 1),
[
config_buf,
tile_buf,
segments_buf,
ResourceProxy::Image(out_image),
ptcl_buf,
gradient_image,
info_bin_data_buf,
],
);
(recording, ResourceProxy::Image(out_image))
let mut render = Render::new();
let mut recording = render.render_encoding_coarse(encoding, shaders, width, height, false);
let out_image = render.out_image();
render.record_fine(shaders, &mut recording);
(recording, out_image.into())
}
pub fn align_up(len: usize, alignment: u32) -> usize {
len + (len.wrapping_neg() & (alignment as usize - 1))
}
impl Render {
pub fn new() -> Self {
// These sizes are adequate for paris-30k but should probably be dialed down.
Render {
binning_info_size: (1 << 20) / 4,
tiles_size: (1 << 24) / TILE_SIZE as u32,
segments_size: (1 << 26) / SEGMENT_SIZE as u32,
ptcl_size: (1 << 25) / 4 as u32,
width_in_tiles: 0,
height_in_tiles: 0,
fine: None,
}
}
/// Prepare a recording for the coarse rasterization phase.
///
/// The `robust` parameter controls whether we're preparing for readback
/// of the atomic bump buffer, for robust dynamic memory.
pub fn render_encoding_coarse(
&mut self,
encoding: &Encoding,
shaders: &FullShaders,
width: u32,
height: u32,
robust: bool,
) -> Recording {
use crate::encoding::{resource::ResourceCache, PackedEncoding};
let mut recording = Recording::default();
let mut resources = ResourceCache::new();
let mut packed = PackedEncoding::default();
packed.pack(encoding, &mut resources);
let (ramp_data, ramps_width, ramps_height) = resources.ramps(packed.resources).unwrap();
let gradient_image = if encoding.patches.is_empty() {
ResourceProxy::new_image(1, 1, ImageFormat::Rgba8)
} else {
let data: &[u8] = bytemuck::cast_slice(ramp_data);
ResourceProxy::Image(recording.upload_image(
ramps_width,
ramps_height,
ImageFormat::Rgba8,
data,
))
};
// TODO: calculate for real when we do rectangles
let n_pathtag = encoding.path_tags.len();
let pathtag_padded = align_up(encoding.path_tags.len(), 4 * shaders::PATHTAG_REDUCE_WG);
let n_paths = encoding.n_paths;
let n_drawobj = n_paths;
let n_clip = encoding.n_clips;
let new_width = next_multiple_of(width, 16);
let new_height = next_multiple_of(height, 16);
let info_size = packed.layout.bin_data_start;
let config = crate::encoding::Config {
width_in_tiles: new_width / 16,
height_in_tiles: new_height / 16,
target_width: width,
target_height: height,
binning_size: self.binning_info_size - info_size,
tiles_size: self.tiles_size,
segments_size: self.segments_size,
ptcl_size: self.ptcl_size,
layout: packed.layout,
};
// println!("{:?}", config);
let scene_buf = ResourceProxy::Buf(recording.upload("scene", packed.data));
let config_buf =
ResourceProxy::Buf(recording.upload_uniform("config", bytemuck::bytes_of(&config)));
let info_bin_data_buf = ResourceProxy::new_buf(
(info_size + config.binning_size) as u64 * 4,
"info_bin_data_buf",
);
let tile_buf = ResourceProxy::new_buf(config.tiles_size as u64 * TILE_SIZE, "tile_buf");
let segments_buf =
ResourceProxy::new_buf(config.segments_size as u64 * SEGMENT_SIZE, "segments_buf");
let ptcl_buf = ResourceProxy::new_buf(config.ptcl_size as u64 * 4, "ptcl_buf");
let pathtag_wgs = pathtag_padded / (4 * shaders::PATHTAG_REDUCE_WG as usize);
let pathtag_large = pathtag_wgs > shaders::PATHTAG_REDUCE_WG as usize;
let reduced_size = if pathtag_large {
align_up(pathtag_wgs, shaders::PATHTAG_REDUCE_WG)
} else {
pathtag_wgs
};
let reduced_buf =
ResourceProxy::new_buf(reduced_size as u64 * TAG_MONOID_FULL_SIZE, "reduced_buf");
// TODO: really only need pathtag_wgs - 1
recording.dispatch(
shaders.pathtag_reduce,
(pathtag_wgs as u32, 1, 1),
[config_buf, scene_buf, reduced_buf],
);
let mut pathtag_parent = reduced_buf;
let mut large_pathtag_bufs = None;
if pathtag_large {
let reduced2_size = shaders::PATHTAG_REDUCE_WG as usize;
let reduced2_buf =
ResourceProxy::new_buf(reduced2_size as u64 * TAG_MONOID_FULL_SIZE, "reduced2_buf");
recording.dispatch(
shaders.pathtag_reduce2,
(reduced2_size as u32, 1, 1),
[reduced_buf, reduced2_buf],
);
let reduced_scan_buf = ResourceProxy::new_buf(
pathtag_wgs as u64 * TAG_MONOID_FULL_SIZE,
"reduced_scan_buf",
);
recording.dispatch(
shaders.pathtag_scan1,
(reduced_size as u32 / shaders::PATHTAG_REDUCE_WG, 1, 1),
[reduced_buf, reduced2_buf, reduced_scan_buf],
);
pathtag_parent = reduced_scan_buf;
large_pathtag_bufs = Some((reduced2_buf, reduced_scan_buf));
}
let tagmonoid_buf = ResourceProxy::new_buf(
pathtag_wgs as u64 * shaders::PATHTAG_REDUCE_WG as u64 * TAG_MONOID_FULL_SIZE,
"tagmonoid_buf",
);
let pathtag_scan = if pathtag_large {
shaders.pathtag_scan_large
} else {
shaders.pathtag_scan
};
recording.dispatch(
pathtag_scan,
(pathtag_wgs as u32, 1, 1),
[config_buf, scene_buf, pathtag_parent, tagmonoid_buf],
);
recording.free_resource(reduced_buf);
if let Some((reduced2, reduced_scan)) = large_pathtag_bufs {
recording.free_resource(reduced2);
recording.free_resource(reduced_scan);
}
let drawobj_wgs = (n_drawobj + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG;
let path_bbox_buf =
ResourceProxy::new_buf(n_paths as u64 * PATH_BBOX_SIZE, "path_bbox_buf");
recording.dispatch(
shaders.bbox_clear,
(drawobj_wgs, 1, 1),
[config_buf, path_bbox_buf],
);
let cubic_buf = ResourceProxy::new_buf(n_pathtag as u64 * CUBIC_SIZE, "cubic_buf");
let path_coarse_wgs =
(n_pathtag as u32 + shaders::PATH_COARSE_WG - 1) / shaders::PATH_COARSE_WG;
recording.dispatch(
shaders.pathseg,
(path_coarse_wgs, 1, 1),
[
config_buf,
scene_buf,
tagmonoid_buf,
path_bbox_buf,
cubic_buf,
],
);
let draw_reduced_buf =
ResourceProxy::new_buf(drawobj_wgs as u64 * DRAWMONOID_SIZE, "draw_reduced_buf");
recording.dispatch(
shaders.draw_reduce,
(drawobj_wgs, 1, 1),
[config_buf, scene_buf, draw_reduced_buf],
);
let draw_monoid_buf =
ResourceProxy::new_buf(n_drawobj as u64 * DRAWMONOID_SIZE, "draw_monoid_buf");
let clip_inp_buf =
ResourceProxy::new_buf(encoding.n_clips as u64 * CLIP_INP_SIZE, "clip_inp_buf");
recording.dispatch(
shaders.draw_leaf,
(drawobj_wgs, 1, 1),
[
config_buf,
scene_buf,
draw_reduced_buf,
path_bbox_buf,
draw_monoid_buf,
info_bin_data_buf,
clip_inp_buf,
],
);
recording.free_resource(draw_reduced_buf);
let clip_el_buf =
ResourceProxy::new_buf(encoding.n_clips as u64 * CLIP_EL_SIZE, "clip_el_buf");
let clip_bic_buf = ResourceProxy::new_buf(
(n_clip / shaders::CLIP_REDUCE_WG) as u64 * CLIP_BIC_SIZE,
"clip_bic_buf",
);
let clip_wg_reduce = n_clip.saturating_sub(1) / shaders::CLIP_REDUCE_WG;
if clip_wg_reduce > 0 {
recording.dispatch(
shaders.clip_reduce,
(clip_wg_reduce, 1, 1),
[
config_buf,
clip_inp_buf,
path_bbox_buf,
clip_bic_buf,
clip_el_buf,
],
);
}
let clip_wg = (n_clip + shaders::CLIP_REDUCE_WG - 1) / shaders::CLIP_REDUCE_WG;
let clip_bbox_buf = ResourceProxy::new_buf(n_clip as u64 * CLIP_BBOX_SIZE, "clip_bbox_buf");
if clip_wg > 0 {
recording.dispatch(
shaders.clip_leaf,
(clip_wg, 1, 1),
[
config_buf,
clip_inp_buf,
path_bbox_buf,
clip_bic_buf,
clip_el_buf,
draw_monoid_buf,
clip_bbox_buf,
],
);
}
recording.free_resource(clip_inp_buf);
recording.free_resource(clip_bic_buf);
recording.free_resource(clip_el_buf);
let draw_bbox_buf =
ResourceProxy::new_buf(n_paths as u64 * DRAW_BBOX_SIZE, "draw_bbox_buf");
let bump_buf = BufProxy::new(BUMP_SIZE, "bump_buf");
let width_in_bins = (config.width_in_tiles + 15) / 16;
let height_in_bins = (config.height_in_tiles + 15) / 16;
let bin_header_buf = ResourceProxy::new_buf(
(256 * drawobj_wgs) as u64 * BIN_HEADER_SIZE,
"bin_header_buf",
);
recording.clear_all(bump_buf);
let bump_buf = ResourceProxy::Buf(bump_buf);
recording.dispatch(
shaders.binning,
(drawobj_wgs, 1, 1),
[
config_buf,
draw_monoid_buf,
path_bbox_buf,
clip_bbox_buf,
draw_bbox_buf,
bump_buf,
info_bin_data_buf,
bin_header_buf,
],
);
recording.free_resource(draw_monoid_buf);
recording.free_resource(path_bbox_buf);
recording.free_resource(clip_bbox_buf);
// Note: this only needs to be rounded up because of the workaround to store the tile_offset
// in storage rather than workgroup memory.
let n_path_aligned = align_up(n_paths as usize, 256);
let path_buf = ResourceProxy::new_buf(n_path_aligned as u64 * PATH_SIZE, "path_buf");
let path_wgs = (n_paths + shaders::PATH_BBOX_WG - 1) / shaders::PATH_BBOX_WG;
recording.dispatch(
shaders.tile_alloc,
(path_wgs, 1, 1),
[
config_buf,
scene_buf,
draw_bbox_buf,
bump_buf,
path_buf,
tile_buf,
],
);
recording.free_resource(draw_bbox_buf);
recording.dispatch(
shaders.path_coarse,
(path_coarse_wgs, 1, 1),
[
config_buf,
scene_buf,
tagmonoid_buf,
cubic_buf,
path_buf,
bump_buf,
tile_buf,
segments_buf,
],
);
recording.free_resource(tagmonoid_buf);
recording.free_resource(cubic_buf);
recording.dispatch(
shaders.backdrop,
(path_wgs, 1, 1),
[config_buf, path_buf, tile_buf],
);
recording.dispatch(
shaders.coarse,
(width_in_bins, height_in_bins, 1),
[
config_buf,
scene_buf,
draw_monoid_buf,
bin_header_buf,
info_bin_data_buf,
path_buf,
tile_buf,
bump_buf,
ptcl_buf,
],
);
recording.free_resource(scene_buf);
recording.free_resource(draw_monoid_buf);
recording.free_resource(bin_header_buf);
recording.free_resource(path_buf);
let out_image = ImageProxy::new(width, height, ImageFormat::Rgba8);
self.width_in_tiles = config.width_in_tiles;
self.height_in_tiles = config.height_in_tiles;
self.fine = Some(FineResources {
config_buf,
bump_buf,
tile_buf,
segments_buf,
ptcl_buf,
gradient_image,
info_bin_data_buf,
out_image,
});
if robust {
recording.download(*bump_buf.as_buf().unwrap());
}
recording.free_resource(bump_buf);
recording
}
/// Run fine rasterization assuming the coarse phase succeeded.
pub fn record_fine(&mut self, shaders: &FullShaders, recording: &mut Recording) {
let fine = self.fine.take().unwrap();
recording.dispatch(
shaders.fine,
(self.width_in_tiles, self.height_in_tiles, 1),
[
fine.config_buf,
fine.tile_buf,
fine.segments_buf,
ResourceProxy::Image(fine.out_image),
fine.ptcl_buf,
fine.gradient_image,
fine.info_bin_data_buf,
],
);
recording.free_resource(fine.config_buf);
recording.free_resource(fine.tile_buf);
recording.free_resource(fine.segments_buf);
recording.free_resource(fine.ptcl_buf);
recording.free_resource(fine.gradient_image);
recording.free_resource(fine.info_bin_data_buf);
}
/// Get the output image.
///
/// This is going away, as the caller will add the output image to the bind
/// map.
pub fn out_image(&self) -> ImageProxy {
self.fine.as_ref().unwrap().out_image
}
pub fn bump_buf(&self) -> BufProxy {
*self.fine.as_ref().unwrap().bump_buf.as_buf().unwrap()
}
}

View file

@ -16,6 +16,8 @@
//! Simple helpers for managing wgpu state and surfaces.
use std::future::Future;
use super::Result;
use raw_window_handle::{HasRawDisplayHandle, HasRawWindowHandle};
@ -132,3 +134,27 @@ pub struct RenderSurface {
pub config: SurfaceConfiguration,
pub dev_id: usize,
}
struct NullWake;
impl std::task::Wake for NullWake {
fn wake(self: std::sync::Arc<Self>) {}
}
/// Block on a future, polling the device as needed.
///
/// This will deadlock if the future is awaiting anything other than GPU progress.
pub fn block_on_wgpu<F: Future>(device: &Device, mut fut: F) -> F::Output {
let waker = std::task::Waker::from(std::sync::Arc::new(NullWake));
let mut context = std::task::Context::from_waker(&waker);
// Same logic as `pin_mut!` macro from `pin_utils`.
let mut fut = unsafe { std::pin::Pin::new_unchecked(&mut fut) };
loop {
match fut.as_mut().poll(&mut context) {
std::task::Poll::Pending => {
device.poll(wgpu::Maintain::Wait);
}
std::task::Poll::Ready(item) => break item,
}
}
}