diff --git a/examples/with_winit/src/main.rs b/examples/with_winit/src/main.rs index 4046538..f12f8c3 100644 --- a/examples/with_winit/src/main.rs +++ b/examples/with_winit/src/main.rs @@ -22,6 +22,7 @@ use std::{borrow::Cow, path::PathBuf, time::Instant}; use clap::Parser; use vello::{ + block_on_wgpu, kurbo::{Affine, Vec2}, util::RenderContext, Renderer, Scene, SceneBuilder, @@ -187,18 +188,25 @@ async fn run(event_loop: EventLoop, window: Window, args: Args) { .surface .get_current_texture() .expect("failed to get surface texture"); - renderer - .render_to_surface( - &device_handle.device, - &device_handle.queue, - &scene, - &surface_texture, - width, - height, - ) - .expect("failed to render to surface"); - surface_texture.present(); - device_handle.device.poll(wgpu::Maintain::Wait); + let fut = async { + renderer + .render_to_surface_async( + &device_handle.device, + &device_handle.queue, + &scene, + &surface_texture, + width, + height, + ) + .await + .expect("failed to render to surface"); + surface_texture.present(); + }; + #[cfg(not(target_arch = "wasm32"))] + block_on_wgpu(&device_handle.device, fut); + #[cfg(target_arch = "wasm32")] + wasm_bindgen_futures::spawn_local(fut); + device_handle.device.poll(wgpu::Maintain::Poll); } Event::UserEvent(event) => match event { #[cfg(not(target_arch = "wasm32"))] diff --git a/src/engine.rs b/src/engine.rs index d4d9c91..76d7524 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -21,12 +21,9 @@ use std::{ sync::atomic::{AtomicU64, Ordering}, }; -use futures_intrusive::channel::shared::GenericOneshotReceiver; -use parking_lot::RawMutex; use wgpu::{ - util::DeviceExt, BindGroup, BindGroupLayout, Buffer, BufferAsyncError, BufferSlice, - BufferUsages, BufferView, ComputePipeline, Device, Queue, Texture, TextureAspect, - TextureFormat, TextureUsages, TextureView, TextureViewDimension, + util::DeviceExt, BindGroup, BindGroupLayout, Buffer, BufferUsages, ComputePipeline, Device, + Queue, Texture, TextureAspect, TextureFormat, TextureUsages, TextureView, TextureViewDimension, }; pub type Error = Box; @@ -43,6 +40,7 @@ pub struct Engine { shaders: Vec, pool: ResourcePool, bind_map: BindMap, + downloads: HashMap, } struct Shader { @@ -101,11 +99,6 @@ pub enum Command { FreeImage(ImageProxy), } -#[derive(Default)] -pub struct Downloads { - buf_map: HashMap, -} - /// The type of resource that will be bound to a slot in a shader. #[derive(Clone, Copy, PartialEq, Eq)] pub enum BindType { @@ -153,6 +146,7 @@ impl Engine { shaders: vec![], pool: Default::default(), bind_map: Default::default(), + downloads: Default::default(), } } @@ -253,8 +247,7 @@ impl Engine { queue: &Queue, recording: &Recording, external_resources: &[ExternalResource], - ) -> Result { - let mut downloads = Downloads::default(); + ) -> Result<(), Error> { let mut free_bufs: HashSet = Default::default(); let mut free_images: HashSet = Default::default(); @@ -264,7 +257,9 @@ impl Engine { Command::Upload(buf_proxy, bytes) => { let usage = BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE; - let buf = self.pool.get_buf(buf_proxy, usage, device); + let buf = self + .pool + .get_buf(buf_proxy.size, buf_proxy.name, usage, device); // TODO: if buffer is newly created, might be better to make it mapped at creation // and copy. However, we expect reuse will be most common. queue.write_buffer(&buf, 0, bytes); @@ -273,7 +268,9 @@ impl Engine { Command::UploadUniform(buf_proxy, bytes) => { let usage = BufferUsages::UNIFORM | BufferUsages::COPY_DST; // Same consideration as above - let buf = self.pool.get_buf(buf_proxy, usage, device); + let buf = self + .pool + .get_buf(buf_proxy.size, buf_proxy.name, usage, device); queue.write_buffer(&buf, 0, bytes); self.bind_map.insert_buf(buf_proxy, buf); } @@ -351,14 +348,10 @@ impl Engine { .buf_map .get(&proxy.id) .ok_or("buffer not in map")?; - let buf = device.create_buffer(&wgpu::BufferDescriptor { - label: Some(proxy.name), - size: proxy.size, - usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, - mapped_at_creation: false, - }); + let usage = BufferUsages::MAP_READ | BufferUsages::COPY_DST; + let buf = self.pool.get_buf(proxy.size, "download", usage, device); encoder.copy_buffer_to_buffer(&src_buf.buffer, 0, &buf, 0, proxy.size); - downloads.buf_map.insert(proxy.id, buf); + self.downloads.insert(proxy.id, buf); } Command::Clear(proxy, offset, size) => { let buffer = self @@ -393,7 +386,15 @@ impl Engine { drop(view); } } - Ok(downloads) + Ok(()) + } + + pub fn get_download(&self, buf: BufProxy) -> Option<&Buffer> { + self.downloads.get(&buf.id) + } + + pub fn free_download(&mut self, buf: BufProxy) { + self.downloads.remove(&buf.id); } } @@ -441,6 +442,10 @@ impl Recording { )); } + /// Prepare a buffer for downloading. + /// + /// Currently this copies to a download buffer. The original buffer can be freed + /// immediately after. pub fn download(&mut self, buf: BufProxy) { self.push(Command::Download(buf)); } @@ -603,7 +608,7 @@ impl BindMap { if let Entry::Vacant(v) = self.buf_map.entry(proxy.id) { let usage = BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE; - let buf = pool.get_buf(&proxy, usage, device); + let buf = pool.get_buf(proxy.size, proxy.name, usage, device); v.insert(BindMapBuffer { buffer: buf, label: proxy.name, @@ -685,7 +690,7 @@ impl BindMap { Entry::Occupied(occupied) => Ok(&occupied.into_mut().buffer), Entry::Vacant(vacant) => { let usage = BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE; - let buf = pool.get_buf(&proxy, usage, device); + let buf = pool.get_buf(proxy.size, proxy.name, usage, device); Ok(&vacant .insert(BindMapBuffer { buffer: buf, @@ -697,52 +702,22 @@ impl BindMap { } } -pub struct DownloadsMapped<'a>( - HashMap< - Id, - ( - BufferSlice<'a>, - GenericOneshotReceiver>, - ), - >, -); - -impl Downloads { - // Discussion: should API change so we get one buffer, rather than mapping all? - pub fn map(&self) -> DownloadsMapped { - let mut map = HashMap::new(); - for (id, buf) in &self.buf_map { - let buf_slice = buf.slice(..); - let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel(); - buf_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap()); - map.insert(*id, (buf_slice, receiver)); - } - DownloadsMapped(map) - } -} - -impl<'a> DownloadsMapped<'a> { - pub async fn get_mapped(&self, proxy: BufProxy) -> Result { - let (slice, recv) = self.0.get(&proxy.id).ok_or("buffer not in map")?; - if let Some(recv_result) = recv.receive().await { - recv_result?; - } else { - return Err("channel was closed".into()); - } - Ok(slice.get_mapped_range()) - } -} - const SIZE_CLASS_BITS: u32 = 1; impl ResourcePool { /// Get a buffer from the pool or create one. - fn get_buf(&mut self, proxy: &BufProxy, usage: BufferUsages, device: &Device) -> Buffer { - let rounded_size = Self::size_class(proxy.size, SIZE_CLASS_BITS); + fn get_buf( + &mut self, + size: u64, + name: &'static str, + usage: BufferUsages, + device: &Device, + ) -> Buffer { + let rounded_size = Self::size_class(size, SIZE_CLASS_BITS); let props = BufferProperties { size: rounded_size, usages: usage, - name: proxy.name, + name: name, }; if let Some(buf_vec) = self.bufs.get_mut(&props) { if let Some(buf) = buf_vec.pop() { @@ -751,7 +726,7 @@ impl ResourcePool { } device.create_buffer(&wgpu::BufferDescriptor { #[cfg(feature = "buffer_labels")] - label: Some(proxy.name), + label: Some(name), #[cfg(not(feature = "buffer_labels"))] label: None, size: rounded_size, diff --git a/src/lib.rs b/src/lib.rs index 6dc7bb2..db2968f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,9 +29,11 @@ pub mod encoding; pub mod glyph; pub mod util; +use render::Render; pub use scene::{Scene, SceneBuilder, SceneFragment}; +pub use util::block_on_wgpu; -use engine::{Engine, ExternalResource}; +use engine::{Engine, ExternalResource, Recording}; use shaders::FullShaders; use wgpu::{Device, Queue, SurfaceTexture, TextureFormat, TextureView}; @@ -83,8 +85,7 @@ impl Renderer { *target.as_image().unwrap(), texture, )]; - let _ = self - .engine + self.engine .run_recording(device, queue, &recording, &external_resources)?; Ok(()) } @@ -164,6 +165,105 @@ impl Renderer { self.shaders = shaders; Ok(()) } + + /// Renders a scene to the target texture. + /// + /// The texture is assumed to be of the specified dimensions and have been created with + /// the [wgpu::TextureFormat::Rgba8Unorm] format and the [wgpu::TextureUsages::STORAGE_BINDING] + /// flag set. + pub async fn render_to_texture_async( + &mut self, + device: &Device, + queue: &Queue, + scene: &Scene, + texture: &TextureView, + width: u32, + height: u32, + ) -> Result<()> { + let mut render = Render::new(); + let encoding = scene.data(); + let recording = render.render_encoding_coarse(encoding, &self.shaders, width, height); + let target = render.out_image(); + let bump_buf = render.bump_buf(); + self.engine.run_recording(device, queue, &recording, &[])?; + if let Some(bump_buf) = self.engine.get_download(bump_buf) { + let buf_slice = bump_buf.slice(..); + let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel(); + buf_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap()); + if let Some(recv_result) = receiver.receive().await { + recv_result?; + } else { + return Err("channel was closed".into()); + } + let mapped = buf_slice.get_mapped_range(); + println!("{:?}", bytemuck::cast_slice::<_, u32>(&mapped)); + } + // TODO: apply logic to determine whether we need to rerun coarse, and also + // allocate the blend stack as needed. + self.engine.free_download(bump_buf); + // Maybe clear to reuse allocation? + let mut recording = Recording::default(); + render.record_fine(&self.shaders, &mut recording); + let external_resources = [ExternalResource::Image(target, texture)]; + self.engine + .run_recording(device, queue, &recording, &external_resources)?; + Ok(()) + } + + pub async fn render_to_surface_async( + &mut self, + device: &Device, + queue: &Queue, + scene: &Scene, + surface: &SurfaceTexture, + width: u32, + height: u32, + ) -> Result<()> { + let mut target = self + .target + .take() + .unwrap_or_else(|| TargetTexture::new(device, width, height)); + // TODO: implement clever resizing semantics here to avoid thrashing the memory allocator + // during resize, specifically on metal. + if target.width != width || target.height != height { + target = TargetTexture::new(device, width, height); + } + self.render_to_texture_async(device, queue, scene, &target.view, width, height) + .await?; + let mut encoder = + device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None }); + { + let surface_view = surface + .texture + .create_view(&wgpu::TextureViewDescriptor::default()); + let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &self.blit.bind_layout, + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: wgpu::BindingResource::TextureView(&target.view), + }], + }); + let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor { + label: None, + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + view: &surface_view, + resolve_target: None, + ops: wgpu::Operations { + load: wgpu::LoadOp::Clear(wgpu::Color::default()), + store: true, + }, + })], + depth_stencil_attachment: None, + }); + render_pass.set_pipeline(&self.blit.pipeline); + render_pass.set_bind_group(0, &bind_group, &[]); + render_pass.draw(0..6, 0..1); + } + queue.submit(Some(encoder.finish())); + self.target = Some(target); + Ok(()) + } } struct TargetTexture { diff --git a/src/render.rs b/src/render.rs index fa3d97b..710c77b 100644 --- a/src/render.rs +++ b/src/render.rs @@ -196,6 +196,8 @@ pub fn render_encoding_full( height: u32, ) -> (Recording, ResourceProxy) { let mut render = Render::new(); + // TODO: leaks the download of the bump buf; a good way to fix would be to conditionalize + // that download. let mut recording = render.render_encoding_coarse(encoding, shaders, width, height); let out_image = render.out_image(); render.record_fine(shaders, &mut recording); @@ -524,13 +526,14 @@ impl Render { info_bin_data_buf, out_image, }); + recording.download(*bump_buf.as_buf().unwrap()); + recording.free_resource(bump_buf); recording } /// Run fine rasterization assuming the coarse phase succeeded. pub fn record_fine(&mut self, shaders: &FullShaders, recording: &mut Recording) { let fine = self.fine.take().unwrap(); - recording.free_resource(fine.bump_buf); recording.dispatch( shaders.fine, (self.width_in_tiles, self.height_in_tiles, 1), @@ -559,4 +562,8 @@ impl Render { pub fn out_image(&self) -> ImageProxy { self.fine.as_ref().unwrap().out_image } + + pub fn bump_buf(&self) -> BufProxy { + *self.fine.as_ref().unwrap().bump_buf.as_buf().unwrap() + } } diff --git a/src/util.rs b/src/util.rs index 0245d11..a75f804 100644 --- a/src/util.rs +++ b/src/util.rs @@ -16,6 +16,8 @@ //! Simple helpers for managing wgpu state and surfaces. +use std::future::Future; + use super::Result; use raw_window_handle::{HasRawDisplayHandle, HasRawWindowHandle}; @@ -132,3 +134,27 @@ pub struct RenderSurface { pub config: SurfaceConfiguration, pub dev_id: usize, } + +struct NullWake; + +impl std::task::Wake for NullWake { + fn wake(self: std::sync::Arc) {} +} + +/// Block on a future, polling the device as needed. +/// +/// This will deadlock if the future is awaiting anything other than GPU progress. +pub fn block_on_wgpu(device: &Device, mut fut: F) -> F::Output { + let waker = std::task::Waker::from(std::sync::Arc::new(NullWake)); + let mut context = std::task::Context::from_waker(&waker); + // Same logic as `pin_mut!` macro from `pin_utils`. + let mut fut = unsafe { std::pin::Pin::new_unchecked(&mut fut) }; + loop { + match fut.as_mut().poll(&mut context) { + std::task::Poll::Pending => { + device.poll(wgpu::Maintain::Wait); + } + std::task::Poll::Ready(item) => break item, + } + } +}