Start async wiring

Make async versions of the main library entry points, and invoke those from the with_winit example. Right now this just prints the contents of the bump buffer before just running the fine dispatch, but it could apply conditional logic.
2025-01-09 20:31:29 +11:00 · 2023-01-24 20:25:44 -08:00 · 2023-01-24 20:25:44 -08:00 · 418256ffd1
parent 17907893af
commit 418256ffd1
5 changed files with 196 additions and 80 deletions
--- a/examples/with_winit/src/main.rs
+++ b/examples/with_winit/src/main.rs
@ -22,6 +22,7 @@ use std::{borrow::Cow, path::PathBuf, time::Instant};

 use clap::Parser;
 use vello::{
+    block_on_wgpu,
    kurbo::{Affine, Vec2},
    util::RenderContext,
    Renderer, Scene, SceneBuilder,
@ -187,18 +188,25 @@ async fn run(event_loop: EventLoop<UserEvent>, window: Window, args: Args) {
                .surface
                .get_current_texture()
                .expect("failed to get surface texture");
-            renderer
-                .render_to_surface(
-                    &device_handle.device,
-                    &device_handle.queue,
-                    &scene,
-                    &surface_texture,
-                    width,
-                    height,
-                )
-                .expect("failed to render to surface");
-            surface_texture.present();
-            device_handle.device.poll(wgpu::Maintain::Wait);
+            let fut = async {
+                renderer
+                    .render_to_surface_async(
+                        &device_handle.device,
+                        &device_handle.queue,
+                        &scene,
+                        &surface_texture,
+                        width,
+                        height,
+                    )
+                    .await
+                    .expect("failed to render to surface");
+                surface_texture.present();
+            };
+            #[cfg(not(target_arch = "wasm32"))]
+            block_on_wgpu(&device_handle.device, fut);
+            #[cfg(target_arch = "wasm32")]
+            wasm_bindgen_futures::spawn_local(fut);
+            device_handle.device.poll(wgpu::Maintain::Poll);
        }
        Event::UserEvent(event) => match event {
            #[cfg(not(target_arch = "wasm32"))]
--- a/src/engine.rs
+++ b/src/engine.rs
@ -21,12 +21,9 @@ use std::{
    sync::atomic::{AtomicU64, Ordering},
 };

-use futures_intrusive::channel::shared::GenericOneshotReceiver;
-use parking_lot::RawMutex;
 use wgpu::{
-    util::DeviceExt, BindGroup, BindGroupLayout, Buffer, BufferAsyncError, BufferSlice,
-    BufferUsages, BufferView, ComputePipeline, Device, Queue, Texture, TextureAspect,
-    TextureFormat, TextureUsages, TextureView, TextureViewDimension,
+    util::DeviceExt, BindGroup, BindGroupLayout, Buffer, BufferUsages, ComputePipeline, Device,
+    Queue, Texture, TextureAspect, TextureFormat, TextureUsages, TextureView, TextureViewDimension,
 };

 pub type Error = Box<dyn std::error::Error>;
@ -43,6 +40,7 @@ pub struct Engine {
    shaders: Vec<Shader>,
    pool: ResourcePool,
    bind_map: BindMap,
+    downloads: HashMap<Id, Buffer>,
 }

 struct Shader {
@ -101,11 +99,6 @@ pub enum Command {
    FreeImage(ImageProxy),
 }

-#[derive(Default)]
-pub struct Downloads {
-    buf_map: HashMap<Id, Buffer>,
-}
-
 /// The type of resource that will be bound to a slot in a shader.
 #[derive(Clone, Copy, PartialEq, Eq)]
 pub enum BindType {
@ -153,6 +146,7 @@ impl Engine {
            shaders: vec![],
            pool: Default::default(),
            bind_map: Default::default(),
+            downloads: Default::default(),
        }
    }

@ -253,8 +247,7 @@ impl Engine {
        queue: &Queue,
        recording: &Recording,
        external_resources: &[ExternalResource],
-    ) -> Result<Downloads, Error> {
-        let mut downloads = Downloads::default();
+    ) -> Result<(), Error> {
        let mut free_bufs: HashSet<Id> = Default::default();
        let mut free_images: HashSet<Id> = Default::default();

@ -264,7 +257,9 @@ impl Engine {
                Command::Upload(buf_proxy, bytes) => {
                    let usage =
                        BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE;
-                    let buf = self.pool.get_buf(buf_proxy, usage, device);
+                    let buf = self
+                        .pool
+                        .get_buf(buf_proxy.size, buf_proxy.name, usage, device);
                    // TODO: if buffer is newly created, might be better to make it mapped at creation
                    // and copy. However, we expect reuse will be most common.
                    queue.write_buffer(&buf, 0, bytes);
@ -273,7 +268,9 @@ impl Engine {
                Command::UploadUniform(buf_proxy, bytes) => {
                    let usage = BufferUsages::UNIFORM | BufferUsages::COPY_DST;
                    // Same consideration as above
-                    let buf = self.pool.get_buf(buf_proxy, usage, device);
+                    let buf = self
+                        .pool
+                        .get_buf(buf_proxy.size, buf_proxy.name, usage, device);
                    queue.write_buffer(&buf, 0, bytes);
                    self.bind_map.insert_buf(buf_proxy, buf);
                }
@ -351,14 +348,10 @@ impl Engine {
                        .buf_map
                        .get(&proxy.id)
                        .ok_or("buffer not in map")?;
-                    let buf = device.create_buffer(&wgpu::BufferDescriptor {
-                        label: Some(proxy.name),
-                        size: proxy.size,
-                        usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
-                        mapped_at_creation: false,
-                    });
+                    let usage = BufferUsages::MAP_READ | BufferUsages::COPY_DST;
+                    let buf = self.pool.get_buf(proxy.size, "download", usage, device);
                    encoder.copy_buffer_to_buffer(&src_buf.buffer, 0, &buf, 0, proxy.size);
-                    downloads.buf_map.insert(proxy.id, buf);
+                    self.downloads.insert(proxy.id, buf);
                }
                Command::Clear(proxy, offset, size) => {
                    let buffer = self
@ -393,7 +386,15 @@ impl Engine {
                drop(view);
            }
        }
-        Ok(downloads)
+        Ok(())
+    }
+
+    pub fn get_download(&self, buf: BufProxy) -> Option<&Buffer> {
+        self.downloads.get(&buf.id)
+    }
+
+    pub fn free_download(&mut self, buf: BufProxy) {
+        self.downloads.remove(&buf.id);
    }
 }

@ -441,6 +442,10 @@ impl Recording {
        ));
    }

+    /// Prepare a buffer for downloading.
+    ///
+    /// Currently this copies to a download buffer. The original buffer can be freed
+    /// immediately after.
    pub fn download(&mut self, buf: BufProxy) {
        self.push(Command::Download(buf));
    }
@ -603,7 +608,7 @@ impl BindMap {
                    if let Entry::Vacant(v) = self.buf_map.entry(proxy.id) {
                        let usage =
                            BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE;
-                        let buf = pool.get_buf(&proxy, usage, device);
+                        let buf = pool.get_buf(proxy.size, proxy.name, usage, device);
                        v.insert(BindMapBuffer {
                            buffer: buf,
                            label: proxy.name,
@ -685,7 +690,7 @@ impl BindMap {
            Entry::Occupied(occupied) => Ok(&occupied.into_mut().buffer),
            Entry::Vacant(vacant) => {
                let usage = BufferUsages::COPY_SRC | BufferUsages::COPY_DST | BufferUsages::STORAGE;
-                let buf = pool.get_buf(&proxy, usage, device);
+                let buf = pool.get_buf(proxy.size, proxy.name, usage, device);
                Ok(&vacant
                    .insert(BindMapBuffer {
                        buffer: buf,
@ -697,52 +702,22 @@ impl BindMap {
    }
 }

-pub struct DownloadsMapped<'a>(
-    HashMap<
-        Id,
-        (
-            BufferSlice<'a>,
-            GenericOneshotReceiver<RawMutex, Result<(), BufferAsyncError>>,
-        ),
-    >,
-);
-
-impl Downloads {
-    // Discussion: should API change so we get one buffer, rather than mapping all?
-    pub fn map(&self) -> DownloadsMapped {
-        let mut map = HashMap::new();
-        for (id, buf) in &self.buf_map {
-            let buf_slice = buf.slice(..);
-            let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel();
-            buf_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
-            map.insert(*id, (buf_slice, receiver));
-        }
-        DownloadsMapped(map)
-    }
-}
-
-impl<'a> DownloadsMapped<'a> {
-    pub async fn get_mapped(&self, proxy: BufProxy) -> Result<BufferView, Error> {
-        let (slice, recv) = self.0.get(&proxy.id).ok_or("buffer not in map")?;
-        if let Some(recv_result) = recv.receive().await {
-            recv_result?;
-        } else {
-            return Err("channel was closed".into());
-        }
-        Ok(slice.get_mapped_range())
-    }
-}
-
 const SIZE_CLASS_BITS: u32 = 1;

 impl ResourcePool {
    /// Get a buffer from the pool or create one.
-    fn get_buf(&mut self, proxy: &BufProxy, usage: BufferUsages, device: &Device) -> Buffer {
-        let rounded_size = Self::size_class(proxy.size, SIZE_CLASS_BITS);
+    fn get_buf(
+        &mut self,
+        size: u64,
+        name: &'static str,
+        usage: BufferUsages,
+        device: &Device,
+    ) -> Buffer {
+        let rounded_size = Self::size_class(size, SIZE_CLASS_BITS);
        let props = BufferProperties {
            size: rounded_size,
            usages: usage,
-            name: proxy.name,
+            name: name,
        };
        if let Some(buf_vec) = self.bufs.get_mut(&props) {
            if let Some(buf) = buf_vec.pop() {
@ -751,7 +726,7 @@ impl ResourcePool {
        }
        device.create_buffer(&wgpu::BufferDescriptor {
            #[cfg(feature = "buffer_labels")]
-            label: Some(proxy.name),
+            label: Some(name),
            #[cfg(not(feature = "buffer_labels"))]
            label: None,
            size: rounded_size,
--- a/src/lib.rs
+++ b/src/lib.rs
@ -29,9 +29,11 @@ pub mod encoding;
 pub mod glyph;
 pub mod util;

+use render::Render;
 pub use scene::{Scene, SceneBuilder, SceneFragment};
+pub use util::block_on_wgpu;

-use engine::{Engine, ExternalResource};
+use engine::{Engine, ExternalResource, Recording};
 use shaders::FullShaders;

 use wgpu::{Device, Queue, SurfaceTexture, TextureFormat, TextureView};
@ -83,8 +85,7 @@ impl Renderer {
            *target.as_image().unwrap(),
            texture,
        )];
-        let _ = self
-            .engine
+        self.engine
            .run_recording(device, queue, &recording, &external_resources)?;
        Ok(())
    }
@ -164,6 +165,105 @@ impl Renderer {
        self.shaders = shaders;
        Ok(())
    }
+
+    /// Renders a scene to the target texture.
+    ///
+    /// The texture is assumed to be of the specified dimensions and have been created with
+    /// the [wgpu::TextureFormat::Rgba8Unorm] format and the [wgpu::TextureUsages::STORAGE_BINDING]
+    /// flag set.
+    pub async fn render_to_texture_async(
+        &mut self,
+        device: &Device,
+        queue: &Queue,
+        scene: &Scene,
+        texture: &TextureView,
+        width: u32,
+        height: u32,
+    ) -> Result<()> {
+        let mut render = Render::new();
+        let encoding = scene.data();
+        let recording = render.render_encoding_coarse(encoding, &self.shaders, width, height);
+        let target = render.out_image();
+        let bump_buf = render.bump_buf();
+        self.engine.run_recording(device, queue, &recording, &[])?;
+        if let Some(bump_buf) = self.engine.get_download(bump_buf) {
+            let buf_slice = bump_buf.slice(..);
+            let (sender, receiver) = futures_intrusive::channel::shared::oneshot_channel();
+            buf_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap());
+            if let Some(recv_result) = receiver.receive().await {
+                recv_result?;
+            } else {
+                return Err("channel was closed".into());
+            }
+            let mapped = buf_slice.get_mapped_range();
+            println!("{:?}", bytemuck::cast_slice::<_, u32>(&mapped));
+        }
+        // TODO: apply logic to determine whether we need to rerun coarse, and also
+        // allocate the blend stack as needed.
+        self.engine.free_download(bump_buf);
+        // Maybe clear to reuse allocation?
+        let mut recording = Recording::default();
+        render.record_fine(&self.shaders, &mut recording);
+        let external_resources = [ExternalResource::Image(target, texture)];
+        self.engine
+            .run_recording(device, queue, &recording, &external_resources)?;
+        Ok(())
+    }
+
+    pub async fn render_to_surface_async(
+        &mut self,
+        device: &Device,
+        queue: &Queue,
+        scene: &Scene,
+        surface: &SurfaceTexture,
+        width: u32,
+        height: u32,
+    ) -> Result<()> {
+        let mut target = self
+            .target
+            .take()
+            .unwrap_or_else(|| TargetTexture::new(device, width, height));
+        // TODO: implement clever resizing semantics here to avoid thrashing the memory allocator
+        // during resize, specifically on metal.
+        if target.width != width || target.height != height {
+            target = TargetTexture::new(device, width, height);
+        }
+        self.render_to_texture_async(device, queue, scene, &target.view, width, height)
+            .await?;
+        let mut encoder =
+            device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
+        {
+            let surface_view = surface
+                .texture
+                .create_view(&wgpu::TextureViewDescriptor::default());
+            let bind_group = device.create_bind_group(&wgpu::BindGroupDescriptor {
+                label: None,
+                layout: &self.blit.bind_layout,
+                entries: &[wgpu::BindGroupEntry {
+                    binding: 0,
+                    resource: wgpu::BindingResource::TextureView(&target.view),
+                }],
+            });
+            let mut render_pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
+                label: None,
+                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
+                    view: &surface_view,
+                    resolve_target: None,
+                    ops: wgpu::Operations {
+                        load: wgpu::LoadOp::Clear(wgpu::Color::default()),
+                        store: true,
+                    },
+                })],
+                depth_stencil_attachment: None,
+            });
+            render_pass.set_pipeline(&self.blit.pipeline);
+            render_pass.set_bind_group(0, &bind_group, &[]);
+            render_pass.draw(0..6, 0..1);
+        }
+        queue.submit(Some(encoder.finish()));
+        self.target = Some(target);
+        Ok(())
+    }
 }

 struct TargetTexture {
--- a/src/render.rs
+++ b/src/render.rs
@ -196,6 +196,8 @@ pub fn render_encoding_full(
    height: u32,
 ) -> (Recording, ResourceProxy) {
    let mut render = Render::new();
+    // TODO: leaks the download of the bump buf; a good way to fix would be to conditionalize
+    // that download.
    let mut recording = render.render_encoding_coarse(encoding, shaders, width, height);
    let out_image = render.out_image();
    render.record_fine(shaders, &mut recording);
@ -524,13 +526,14 @@ impl Render {
            info_bin_data_buf,
            out_image,
        });
+        recording.download(*bump_buf.as_buf().unwrap());
+        recording.free_resource(bump_buf);
        recording
    }

    /// Run fine rasterization assuming the coarse phase succeeded.
    pub fn record_fine(&mut self, shaders: &FullShaders, recording: &mut Recording) {
        let fine = self.fine.take().unwrap();
-        recording.free_resource(fine.bump_buf);
        recording.dispatch(
            shaders.fine,
            (self.width_in_tiles, self.height_in_tiles, 1),
@ -559,4 +562,8 @@ impl Render {
    pub fn out_image(&self) -> ImageProxy {
        self.fine.as_ref().unwrap().out_image
    }
+
+    pub fn bump_buf(&self) -> BufProxy {
+        *self.fine.as_ref().unwrap().bump_buf.as_buf().unwrap()
+    }
 }
--- a/src/util.rs
+++ b/src/util.rs
@ -16,6 +16,8 @@

 //! Simple helpers for managing wgpu state and surfaces.

+use std::future::Future;
+
 use super::Result;

 use raw_window_handle::{HasRawDisplayHandle, HasRawWindowHandle};
@ -132,3 +134,27 @@ pub struct RenderSurface {
    pub config: SurfaceConfiguration,
    pub dev_id: usize,
 }
+
+struct NullWake;
+
+impl std::task::Wake for NullWake {
+    fn wake(self: std::sync::Arc<Self>) {}
+}
+
+/// Block on a future, polling the device as needed.
+///
+/// This will deadlock if the future is awaiting anything other than GPU progress.
+pub fn block_on_wgpu<F: Future>(device: &Device, mut fut: F) -> F::Output {
+    let waker = std::task::Waker::from(std::sync::Arc::new(NullWake));
+    let mut context = std::task::Context::from_waker(&waker);
+    // Same logic as `pin_mut!` macro from `pin_utils`.
+    let mut fut = unsafe { std::pin::Pin::new_unchecked(&mut fut) };
+    loop {
+        match fut.as_mut().poll(&mut context) {
+            std::task::Poll::Pending => {
+                device.poll(wgpu::Maintain::Wait);
+            }
+            std::task::Poll::Ready(item) => break item,
+        }
+    }
+}