diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs index f37f0cd..c5c0b6b 100644 --- a/piet-gpu/bin/cli.rs +++ b/piet-gpu/bin/cli.rs @@ -34,6 +34,47 @@ fn dump_state(buf: &[u8]) { } +/// Interpret the output of the binning stage, for diagnostic purposes. +#[allow(unused)] +fn trace_merge(buf: &[u32]) { + for bin in 0..256 { + println!("bin {}:", bin); + let mut starts = (0..16).map(|i| Some((bin * 16 + i) * 64)).collect::>>(); + loop { + let min_start = starts.iter().map(|st| + st.map(|st| + if buf[st / 4] == 0 { + !0 + } else { + buf[st / 4 + 2] + }).unwrap_or(!0)).min().unwrap(); + if min_start == !0 { + break; + } + let mut selected = !0; + for i in 0..16 { + if let Some(st) = starts[i] { + if buf[st/4] != 0 && buf[st/4 + 2] == min_start { + selected = i; + break; + } + } + } + let st = starts[selected].unwrap(); + println!("selected {}, start {:x}", selected, st); + for j in 0..buf[st/4] { + println!("{:x}", buf[st/4 + 2 + j as usize]) + } + if buf[st/4 + 1] == 0 { + starts[selected] = None; + } else { + starts[selected] = Some(buf[st/4 + 1] as usize); + } + } + + } +} + fn main() -> Result<(), Error> { let (instance, _) = VkInstance::new(None)?; unsafe { @@ -66,12 +107,9 @@ fn main() -> Result<(), Error> { /* let mut data: Vec = Default::default(); - device.read_buffer(&renderer.ptcl_buf, &mut data).unwrap(); - piet_gpu::dump_k1_data(&data); - - let mut data: Vec = Default::default(); - device.read_buffer(&renderer.anno_buf, &mut data).unwrap(); - piet_gpu::dump_k1_data(&data); + device.read_buffer(&renderer.bin_buf, &mut data).unwrap(); + //piet_gpu::dump_k1_data(&data); + //trace_merge(&data); */ let mut img_data: Vec = Default::default(); diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index f94dc6b..2ca0cff 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -109,8 +109,8 @@ void main() { sh_first_el[th_ix] = chunk.n > 0 ? BinInstance_read(BinInstanceRef(start_chunk + BinChunk_size)).element_ix : ~0; } - uint probe = 0; // for debugging - do { + uint count = 0; + while (true) { for (uint i = 0; i < N_SLICE; i++) { sh_bitmaps[i][th_ix] = 0; } @@ -121,18 +121,18 @@ void main() { if (th_ix < N_WG) { if (th_ix == 0) { sh_selected_n = 0; - sh_min_buf = ~1; + sh_min_buf = ~0; } } barrier(); // Tempting to do this with subgroups, but atomic should be good enough. - my_min = sh_first_el[th_ix]; if (th_ix < N_WG) { + my_min = sh_first_el[th_ix]; atomicMin(sh_min_buf, my_min); } barrier(); if (th_ix < N_WG) { - if (sh_first_el[th_ix] == sh_min_buf) { + if (my_min == sh_min_buf && my_min != ~0) { sh_elements_ref = sh_chunk[th_ix] + BinChunk_size; uint selected_n = sh_chunk_n[th_ix]; sh_selected_n = selected_n; @@ -162,6 +162,7 @@ void main() { } wr_ix += chunk_n; } + barrier(); // We've done the merge and filled the buffer. @@ -272,8 +273,11 @@ void main() { // clear LSB bitmap &= bitmap - 1; } + barrier(); rd_ix += N_TILE; - break; - } while (wr_ix > rd_ix); + // The second disjunct is there as a strange workaround on Nvidia. If it is + // removed, then the kernel fails with ERROR_DEVICE_LOST. + if (rd_ix >= wr_ix || bin_ix == ~0) break; + } } diff --git a/piet-gpu/shader/coarse.spv b/piet-gpu/shader/coarse.spv index 92ecbae..b0bec3f 100644 Binary files a/piet-gpu/shader/coarse.spv and b/piet-gpu/shader/coarse.spv differ diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index bf7a7c7..2dca39d 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -65,7 +65,7 @@ pub fn render_scene(rc: &mut impl RenderContext) { #[allow(unused)] fn render_cardioid(rc: &mut impl RenderContext) { - let n = 91; + let n = 601; let dth = std::f64::consts::PI * 2.0 / (n as f64); let center = Point::new(1024.0, 768.0); let r = 750.0; @@ -73,7 +73,7 @@ fn render_cardioid(rc: &mut impl RenderContext) { for i in 1..n { let p0 = center + Vec2::from_angle(i as f64 * dth) * r; let p1 = center + Vec2::from_angle(((i * 2) % n) as f64 * dth) * r; - rc.fill(&Circle::new(p0, 8.0), &Color::WHITE); + //rc.fill(&Circle::new(p0, 8.0), &Color::WHITE); path.move_to(p0); path.line_to(p1); //rc.stroke(Line::new(p0, p1), &Color::BLACK, 2.0); diff --git a/piet-gpu/src/pico_svg.rs b/piet-gpu/src/pico_svg.rs index a630f70..9cf5cc3 100644 --- a/piet-gpu/src/pico_svg.rs +++ b/piet-gpu/src/pico_svg.rs @@ -48,7 +48,7 @@ impl PicoSvg { } if let Some(stroke_color) = el.attribute("stroke") { let width = - f64::from_str(el.attribute("stroke-width").ok_or("missing width")?)?; + scale * f64::from_str(el.attribute("stroke-width").ok_or("missing width")?)?; let color = parse_color(stroke_color); items.push(Item::Stroke(StrokeItem { width, color, path })); }