Basically get gradients working

Separate out render context upload from renderer creation. Upload ramps to GPU buffer. Encode gradients to scene description. Fix a number of bugs in uploading and processing. This renders gradients in a test image, but has some shortcomings. For one, staging buffers need to be applied for a couple things (they're just host mapped for now). Also, the interaction between sRGB and premultiplied alpha isn't quite right. The size of the gradient ramp buffer is fixed and should be dynamic. And of course there's always more optimization to be done, including making the upload of gradient ramps more incremental, and probably hashing of the stops instead of the processed ramps.
2025-01-10 12:41:30 +11:00 · 2021-08-03 09:04:19 -07:00 · 2021-08-03 09:04:19 -07:00 · 05e81acebc
parent 6f707c4c62
commit 05e81acebc
14 changed files with 262 additions and 117 deletions
--- a/piet-gpu-hal/src/hub.rs
+++ b/piet-gpu-hal/src/hub.rs
@ -673,6 +673,13 @@ impl Buffer {
        // else session lost error?
        Ok(())
    }
+
+    /// The size of the buffer.
+    ///
+    /// This is at least as large as the value provided on creation.
+    pub fn size(&self) -> u64 {
+        self.0.buffer.size()
+    }
 }

 impl PipelineBuilder {
--- a/piet-gpu-types/src/scene.rs
+++ b/piet-gpu-types/src/scene.rs
@ -1,7 +1,7 @@
 use piet_gpu_derive::piet_gpu;

 pub use self::scene::{
-    Clip, CubicSeg, Element, FillColor, LineSeg, QuadSeg, SetFillMode, SetLineWidth, Transform,
+    Clip, CubicSeg, Element, FillColor, FillLinGradient, LineSeg, QuadSeg, SetFillMode, SetLineWidth, Transform,
 };

 piet_gpu! {
--- a/piet-gpu/bin/cli.rs
+++ b/piet-gpu/bin/cli.rs
@ -244,13 +244,9 @@ fn main() -> Result<(), Error> {
        } else {
            render_scene(&mut ctx);
        }
-        let n_paths = ctx.path_count();
-        let n_pathseg = ctx.pathseg_count();
-        let n_trans = ctx.trans_count();
-        let scene = ctx.get_scene_buf();
-        //dump_scene(&scene);

-        let renderer = Renderer::new(&session, scene, n_paths, n_pathseg, n_trans)?;
+        let mut renderer = Renderer::new(&session)?;
+        renderer.upload_render_ctx(&mut ctx)?;
        let image_usage = BufferUsage::MAP_READ | BufferUsage::COPY_DST;
        let image_buf = session.create_buffer((WIDTH * HEIGHT * 4) as u64, image_usage)?;

--- a/piet-gpu/shader/backdrop.comp
+++ b/piet-gpu/shader/backdrop.comp
@ -58,6 +58,7 @@ void main() {
            AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
            switch (tag.tag) {
                case Annotated_Image:
+                case Annotated_LinGradient:
                case Annotated_BeginClip:
                case Annotated_Color:
                if (fill_mode_from_flags(tag.flags) != MODE_NONZERO) {
--- a/piet-gpu/shader/backdrop.spv
+++ b/piet-gpu/shader/backdrop.spv
--- a/piet-gpu/shader/backdrop_lg.spv
+++ b/piet-gpu/shader/backdrop_lg.spv
--- a/piet-gpu/shader/coarse.comp
+++ b/piet-gpu/shader/coarse.comp
@ -346,7 +346,6 @@ void main() {
                    cmd_ref.offset += 4 + CmdColor_size;
                    break;
                case Annotated_LinGradient:
-                    // TODO: process and write linear gradient
                    tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
                        + (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
                    AnnoLinGradient lin = Annotated_LinGradient_read(conf.anno_alloc, ref);
--- a/piet-gpu/shader/elements.comp
+++ b/piet-gpu/shader/elements.comp
@ -137,6 +137,7 @@ State map_element(ElementRef ref) {
        c.pathseg_count = 1;
        break;
    case Element_FillColor:
+    case Element_FillLinGradient:
    case Element_FillImage:
    case Element_BeginClip:
        c.flags = FLAG_RESET_BBOX;
@ -370,7 +371,7 @@ void main() {
            vec2 p0 = st.mat.xy * lin.p0.x + st.mat.zw * lin.p0.y + st.translate;
            vec2 p1 = st.mat.xy * lin.p1.x + st.mat.zw * lin.p1.y + st.translate;
            vec2 dxy = p1 - p0;
-            float scale = inversesqrt(dxy.x * dxy.x + dxy.y * dxy.y);
+            float scale = 1.0 / (dxy.x * dxy.x + dxy.y * dxy.y);
            float line_x = dxy.x * scale;
            float line_y = dxy.y * scale;
            anno_lin.line_x = line_x;
--- a/piet-gpu/shader/elements.spv
+++ b/piet-gpu/shader/elements.spv
--- a/piet-gpu/shader/tile_alloc.comp
+++ b/piet-gpu/shader/tile_alloc.comp
@ -40,6 +40,7 @@ void main() {
    int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
    switch (tag) {
    case Annotated_Color:
+    case Annotated_LinGradient:
    case Annotated_Image:
    case Annotated_BeginClip:
    case Annotated_EndClip:
--- a/piet-gpu/shader/tile_alloc.spv
+++ b/piet-gpu/shader/tile_alloc.spv
--- a/piet-gpu/src/gradient.rs
+++ b/piet-gpu/src/gradient.rs
@ -25,11 +25,13 @@ pub struct BakedGradient {
    ramp: Vec<u32>,
 }

+/// This is basically the same type as scene::FillLinGradient, so could
+/// potentially use that directly.
 #[derive(Clone)]
 pub struct LinearGradient {
-    start: [f32; 2],
-    end: [f32; 2],
-    ramp_id: u32,
+    pub(crate) start: [f32; 2],
+    pub(crate) end: [f32; 2],
+    pub(crate) ramp_id: u32,
 }

 #[derive(Default)]
@ -58,7 +60,11 @@ impl PremulRgba {

    fn to_u32(&self) -> u32 {
        let z = self.0;
-        Color::rgba(z[0], z[1], z[2], z[3]).as_rgba_u32()
+        let r = (z[0].max(0.0).min(1.0) * 255.0).round() as u32;
+        let g = (z[1].max(0.0).min(1.0) * 255.0).round() as u32;
+        let b = (z[2].max(0.0).min(1.0) * 255.0).round() as u32;
+        let a = (z[3].max(0.0).min(1.0) * 255.0).round() as u32;
+        r | (g << 8) | (b << 16) | (a << 24)
    }

    fn lerp(&self, other: PremulRgba, t: f64) -> PremulRgba {
@ -67,7 +73,12 @@ impl PremulRgba {
        }
        let a = self.0;
        let b = other.0;
-        PremulRgba([l(a[0], b[0], t), l(a[1], b[1], t), l(a[2], b[2], t), l(a[2], b[3], t)])
+        PremulRgba([
+            l(a[0], b[0], t),
+            l(a[1], b[1], t),
+            l(a[2], b[2], t),
+            l(a[3], b[3], t),
+        ])
    }
 }

@ -78,8 +89,9 @@ impl GradientRamp {
        let mut this_u = last_u;
        let mut this_c = last_c;
        let mut j = 0;
-        let v = (0..N_SAMPLES).map(|i| {
-            let u = (i as f64) / 255.0;
+        let v = (0..N_SAMPLES)
+            .map(|i| {
+                let u = (i as f64) / (N_SAMPLES - 1) as f64;
                while u > this_u {
                    last_u = this_u;
                    last_c = this_c;
@ -98,9 +110,17 @@ impl GradientRamp {
                    last_c.lerp(this_c, (u - last_u) / du)
                };
                c.to_u32()
-        }).collect();
+            })
+            .collect();
        GradientRamp(v)
    }
+
+    /// For debugging/development.
+    pub(crate) fn dump(&self) {
+        for val in &self.0 {
+            println!("{:x}", val);
+        }
+    }
 }

 impl RampCache {
@ -133,13 +153,52 @@ impl RampCache {
            end: crate::render_ctx::to_f32_2(lin.end),
        }
    }
+
+    /// Dump the contents of a gradient. This is for debugging.
+    #[allow(unused)]
+    pub(crate) fn dump_gradient(&self, lin: &LinearGradient) {
+        println!("id = {}", lin.ramp_id);
+        self.ramps[lin.ramp_id as usize].dump();
    }

+    /// Get the ramp data.
+    ///
+    /// This concatenates all the ramps; we'll want a more sophisticated approach to
+    /// incremental update.
+    pub fn get_ramp_data(&self) -> Vec<u32> {
+        let mut result = Vec::with_capacity(N_SAMPLES * self.ramps.len());
+        for ramp in &self.ramps {
+            result.extend(&ramp.0);
+        }
+        result
+    }
+}

 #[cfg(test)]
 mod test {
+    use super::RampCache;
+    use piet::kurbo::Point;
+    use piet::{Color, FixedLinearGradient, GradientStop};
+
    #[test]
-    fn it_works() {
-        println!("it works!");
+    fn simple_ramp() {
+        let stops = vec![
+            GradientStop {
+                color: Color::WHITE,
+                pos: 0.0,
+            },
+            GradientStop {
+                color: Color::BLACK,
+                pos: 1.0,
+            },
+        ];
+        let mut cache = RampCache::default();
+        let lin = FixedLinearGradient {
+            start: Point::new(0.0, 0.0),
+            end: Point::new(0.0, 1.0),
+            stops,
+        };
+        let our_lin = cache.add_linear_gradient(&lin);
+        cache.dump_gradient(&our_lin);
    }
 }
--- a/piet-gpu/src/lib.rs
+++ b/piet-gpu/src/lib.rs
@ -9,8 +9,11 @@ pub use render_ctx::PietGpuRenderContext;

 use rand::{Rng, RngCore};

-use piet::kurbo::{Affine, BezPath, Circle, Point, Shape, Vec2};
-use piet::{Color, ImageFormat, RenderContext, Text, TextAttribute, TextLayoutBuilder};
+use piet::kurbo::{BezPath, Circle, Point, Rect, Shape, Vec2};
+use piet::{
+    Color, FixedGradient, FixedLinearGradient, GradientStop, ImageFormat, RenderContext, Text,
+    TextAttribute, TextLayoutBuilder,
+};

 use piet_gpu_types::encoder::Encode;

@ -79,6 +82,7 @@ pub fn render_scene(rc: &mut impl RenderContext) {
    //render_cardioid(rc);
    render_clip_test(rc);
    render_alpha_test(rc);
+    render_gradient_test(rc);
    render_text_test(rc);
    //render_tiger(rc);
 }
@ -150,6 +154,28 @@ fn render_alpha_test(rc: &mut impl RenderContext) {
    rc.restore();
 }

+#[allow(unused)]
+fn render_gradient_test(rc: &mut impl RenderContext) {
+    let stops = vec![
+        GradientStop {
+            color: Color::rgb8(0, 255, 0),
+            pos: 0.0,
+        },
+        GradientStop {
+            color: Color::BLACK,
+            pos: 1.0,
+        },
+    ];
+    let lin = FixedLinearGradient {
+        start: Point::new(0.0, 100.0),
+        end: Point::new(0.0, 300.0),
+        stops,
+    };
+    let brush = FixedGradient::Linear(lin);
+    //let brush = Color::rgb8(0, 128, 0);
+    rc.fill(Rect::new(100.0, 100.0, 300.0, 300.0), &brush);
+}
+
 fn diamond(origin: Point) -> impl Shape {
    let mut path = BezPath::new();
    const SIZE: f64 = 50.0;
@ -250,69 +276,34 @@ pub struct Renderer {
    // Keep a reference to the image so that it is not destroyed.
    _bg_image: Image,

+    gradient_buf: Buffer,
    gradients: Image,
 }

 impl Renderer {
-    pub unsafe fn new(
-        session: &Session,
-        scene: &[u8],
-        n_paths: usize,
-        n_pathseg: usize,
-        n_trans: usize,
-    ) -> Result<Self, Error> {
+    /// Create a new renderer.
+    pub unsafe fn new(session: &Session) -> Result<Self, Error> {
        let dev = BufferUsage::STORAGE | BufferUsage::COPY_DST;
        let host_upload = BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC;

-        let n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size();
-        println!(
-            "scene: {} elements, {} paths, {} path_segments, {} transforms",
-            n_elements, n_paths, n_pathseg, n_trans
-        );
-
-        let scene_buf = session.create_buffer_init(&scene[..], dev).unwrap();
+        // This may be inadequate for very complex scenes (paris etc)
+        // TODO: separate staging buffer (if needed)
+        let scene_buf = session.create_buffer(1 * 1024 * 1024, host_upload).unwrap();

        let state_buf = session.create_buffer(1 * 1024 * 1024, dev)?;
        let image_dev = session.create_image2d(WIDTH as u32, HEIGHT as u32)?;

-        // TODO: constants
-        const PATH_SIZE: usize = 12;
-        const BIN_SIZE: usize = 8;
-        const PATHSEG_SIZE: usize = 52;
-        const ANNO_SIZE: usize = 32;
-        const TRANS_SIZE: usize = 24;
-        let mut alloc = 0;
-        let tile_base = alloc;
-        alloc += ((n_paths + 3) & !3) * PATH_SIZE;
-        let bin_base = alloc;
-        alloc += ((n_paths + 255) & !255) * BIN_SIZE;
-        let ptcl_base = alloc;
-        alloc += WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
-        let pathseg_base = alloc;
-        alloc += (n_pathseg * PATHSEG_SIZE + 3) & !3;
-        let anno_base = alloc;
-        alloc += (n_paths * ANNO_SIZE + 3) & !3;
-        let trans_base = alloc;
-        alloc += (n_trans * TRANS_SIZE + 3) & !3;
-        let config = &[
-            n_paths as u32,
-            n_pathseg as u32,
-            WIDTH_IN_TILES as u32,
-            HEIGHT_IN_TILES as u32,
-            tile_base as u32,
-            bin_base as u32,
-            ptcl_base as u32,
-            pathseg_base as u32,
-            anno_base as u32,
-            trans_base as u32,
-        ];
-        let config_buf = session.create_buffer_init(&config[..], dev).unwrap();
+        // Note: this must be updated when the config struct size changes.
+        const CONFIG_BUFFER_SIZE: u64 = 40;
+        // TODO: separate staging buffer (if needed)
+        let config_buf = session
+            .create_buffer(CONFIG_BUFFER_SIZE, host_upload)
+            .unwrap();

        // Perhaps we could avoid the explicit staging buffer by having buffer creation method
        // that takes both initial contents and a size.
-        let mut memory_buf_host = session.create_buffer(2 * 4, host_upload)?;
+        let memory_buf_host = session.create_buffer(2 * 4, host_upload)?;
        let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?;
-        memory_buf_host.write(&[alloc as u32, 0 /* Overflow flag */])?;

        let el_code = ShaderCode::Spv(include_bytes!("../shader/elements.spv"));
        let el_pipeline = session.create_simple_compute_pipeline(el_code, 4)?;
@ -354,6 +345,9 @@ impl Renderer {

        let bg_image = Self::make_test_bg_image(&session);

+        const GRADIENT_BUF_SIZE: usize =
+            crate::gradient::N_GRADIENTS * crate::gradient::N_SAMPLES * 4;
+        let gradient_buf = session.create_buffer(GRADIENT_BUF_SIZE as u64, host_upload)?;
        let gradients = Self::make_gradient_image(&session);

        let k4_code = ShaderCode::Spv(include_bytes!("../shader/kernel4.spv"));
@ -396,14 +390,82 @@ impl Renderer {
            coarse_ds,
            k4_pipeline,
            k4_ds,
-            n_elements,
-            n_paths,
-            n_pathseg,
+            n_elements: 0,
+            n_paths: 0,
+            n_pathseg: 0,
            _bg_image: bg_image,
-            gradients: gradients,
+            gradient_buf,
+            gradients,
        })
    }

+    /// Convert the scene in the render context to GPU resources.
+    ///
+    /// At present, this requires that any command buffer submission has completed.
+    /// A future evolution will handle staging of the next frame's scene while the
+    /// rendering of the current frame is in flight.
+    pub fn upload_render_ctx(
+        &mut self,
+        render_ctx: &mut PietGpuRenderContext,
+    ) -> Result<(), Error> {
+        let n_paths = render_ctx.path_count();
+        let n_pathseg = render_ctx.pathseg_count();
+        let n_trans = render_ctx.trans_count();
+        self.n_paths = n_paths;
+        self.n_pathseg = n_pathseg;
+
+        // These constants depend on encoding and may need to be updated.
+        // Perhaps we can plumb these from piet-gpu-derive?
+        const PATH_SIZE: usize = 12;
+        const BIN_SIZE: usize = 8;
+        const PATHSEG_SIZE: usize = 52;
+        const ANNO_SIZE: usize = 40;
+        const TRANS_SIZE: usize = 24;
+        let mut alloc = 0;
+        let tile_base = alloc;
+        alloc += ((n_paths + 3) & !3) * PATH_SIZE;
+        let bin_base = alloc;
+        alloc += ((n_paths + 255) & !255) * BIN_SIZE;
+        let ptcl_base = alloc;
+        alloc += WIDTH_IN_TILES * HEIGHT_IN_TILES * PTCL_INITIAL_ALLOC;
+        let pathseg_base = alloc;
+        alloc += (n_pathseg * PATHSEG_SIZE + 3) & !3;
+        let anno_base = alloc;
+        alloc += (n_paths * ANNO_SIZE + 3) & !3;
+        let trans_base = alloc;
+        alloc += (n_trans * TRANS_SIZE + 3) & !3;
+        let config = &[
+            n_paths as u32,
+            n_pathseg as u32,
+            WIDTH_IN_TILES as u32,
+            HEIGHT_IN_TILES as u32,
+            tile_base as u32,
+            bin_base as u32,
+            ptcl_base as u32,
+            pathseg_base as u32,
+            anno_base as u32,
+            trans_base as u32,
+        ];
+        unsafe {
+            let scene = render_ctx.get_scene_buf();
+            self.n_elements = scene.len() / piet_gpu_types::scene::Element::fixed_size();
+            // TODO: reallocate scene buffer if size is inadequate
+            assert!(self.scene_buf.size() as usize >= scene.len());
+            self.scene_buf.write(scene)?;
+            self.config_buf.write(config)?;
+            self.memory_buf_host
+                .write(&[alloc as u32, 0 /* Overflow flag */])?;
+
+            // Upload gradient data.
+            let ramp_data = render_ctx.get_ramp_data();
+            if !ramp_data.is_empty() {
+                assert!(self.gradient_buf.size() as usize >= std::mem::size_of_val(&*ramp_data));
+                self.gradient_buf.write(&ramp_data)?;
+            }
+        }
+        Ok(())
+    }
+
    pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, query_pool: &QueryPool) {
        cmd_buf.copy_buffer(&self.memory_buf_host, &self.memory_buf_dev);
        cmd_buf.clear_buffer(&self.state_buf, None);
@ -417,8 +479,10 @@ impl Renderer {
        cmd_buf.image_barrier(
            &self.gradients,
            ImageLayout::Undefined,
-            ImageLayout::General,
+            ImageLayout::BlitDst,
        );
+        cmd_buf.copy_buffer_to_image(&self.gradient_buf, &self.gradients);
+        cmd_buf.image_barrier(&self.gradients, ImageLayout::BlitDst, ImageLayout::General);
        cmd_buf.reset_query_pool(&query_pool);
        cmd_buf.write_timestamp(&query_pool, 0);
        cmd_buf.dispatch(
@ -531,7 +595,9 @@ impl Renderer {

    fn make_gradient_image(session: &Session) -> Image {
        unsafe {
-            session.create_image2d(gradient::N_SAMPLES as u32, gradient::N_GRADIENTS as u32).unwrap()
+            session
+                .create_image2d(gradient::N_SAMPLES as u32, gradient::N_GRADIENTS as u32)
+                .unwrap()
        }
    }
 }
--- a/piet-gpu/src/render_ctx.rs
+++ b/piet-gpu/src/render_ctx.rs
@ -11,7 +11,8 @@ use piet::{

 use piet_gpu_types::encoder::{Encode, Encoder};
 use piet_gpu_types::scene::{
-    Clip, CubicSeg, Element, FillColor, LineSeg, QuadSeg, SetFillMode, SetLineWidth, Transform,
+    Clip, CubicSeg, Element, FillColor, FillLinGradient, LineSeg, QuadSeg, SetFillMode,
+    SetLineWidth, Transform,
 };

 use crate::gradient::{LinearGradient, RampCache};
@ -116,6 +117,10 @@ impl PietGpuRenderContext {
        self.trans_count
    }

+    pub fn get_ramp_data(&self) -> Vec<u32> {
+        self.ramp_cache.get_ramp_data()
+    }
+
    pub(crate) fn set_fill_mode(&mut self, fill_mode: FillMode) {
        if self.fill_mode != fill_mode {
            self.elements.push(Element::SetFillMode(SetFillMode {
@ -173,18 +178,11 @@ impl RenderContext for PietGpuRenderContext {
        }
        self.set_fill_mode(FillMode::Stroke);
        let brush = brush.make_brush(self, || shape.bounding_box()).into_owned();
-        match brush {
-            PietGpuBrush::Solid(rgba_color) => {
        // Note: the bbox contribution of stroke becomes more complicated with miter joins.
        self.accumulate_bbox(|| shape.bounding_box() + Insets::uniform(width * 0.5));
        let path = shape.path_elements(TOLERANCE);
        self.encode_path(path, false);
-                let stroke = FillColor { rgba_color };
-                self.elements.push(Element::FillColor(stroke));
-                self.path_count += 1;
-            }
-            _ => (),
-        }
+        self.encode_brush(&brush);
    }

    fn stroke_styled(
@ -198,17 +196,13 @@ impl RenderContext for PietGpuRenderContext {

    fn fill(&mut self, shape: impl Shape, brush: &impl IntoBrush<Self>) {
        let brush = brush.make_brush(self, || shape.bounding_box()).into_owned();
-        if let PietGpuBrush::Solid(rgba_color) = brush {
        // Note: we might get a good speedup from using an approximate bounding box.
        // Perhaps that should be added to kurbo.
        self.accumulate_bbox(|| shape.bounding_box());
        let path = shape.path_elements(TOLERANCE);
        self.set_fill_mode(FillMode::Nonzero);
        self.encode_path(path, true);
-            let fill = FillColor { rgba_color };
-            self.elements.push(Element::FillColor(fill));
-            self.path_count += 1;
-        }
+        self.encode_brush(&brush);
    }

    fn fill_even_odd(&mut self, _shape: impl Shape, _brush: &impl IntoBrush<Self>) {}
@ -507,6 +501,27 @@ impl PietGpuRenderContext {
        self.elements.push(Element::Transform(transform));
        self.trans_count += 1;
    }
+
+    fn encode_brush(&mut self, brush: &PietGpuBrush) {
+        match brush {
+            PietGpuBrush::Solid(rgba_color) => {
+                let fill = FillColor {
+                    rgba_color: *rgba_color,
+                };
+                self.elements.push(Element::FillColor(fill));
+                self.path_count += 1;
+            }
+            PietGpuBrush::LinGradient(lin) => {
+                let fill_lin = FillLinGradient {
+                    index: lin.ramp_id,
+                    p0: lin.start,
+                    p1: lin.end,
+                };
+                self.elements.push(Element::FillLinGradient(fill_lin));
+                self.path_count += 1;
+            }
+        }
+    }
 }

 impl IntoBrush<PietGpuRenderContext> for PietGpuBrush {