Correct rendering (on Intel)

Handle multiple passes in coarse raster. Doesn't work on NV, WIP to
find out why.
This commit is contained in:
Raph Levien 2020-05-15 20:57:07 -07:00
parent 93044b469b
commit 9bb06ec340
5 changed files with 58 additions and 16 deletions

View file

@ -34,6 +34,47 @@ fn dump_state(buf: &[u8]) {
} }
/// Interpret the output of the binning stage, for diagnostic purposes.
#[allow(unused)]
fn trace_merge(buf: &[u32]) {
for bin in 0..256 {
println!("bin {}:", bin);
let mut starts = (0..16).map(|i| Some((bin * 16 + i) * 64)).collect::<Vec<Option<usize>>>();
loop {
let min_start = starts.iter().map(|st|
st.map(|st|
if buf[st / 4] == 0 {
!0
} else {
buf[st / 4 + 2]
}).unwrap_or(!0)).min().unwrap();
if min_start == !0 {
break;
}
let mut selected = !0;
for i in 0..16 {
if let Some(st) = starts[i] {
if buf[st/4] != 0 && buf[st/4 + 2] == min_start {
selected = i;
break;
}
}
}
let st = starts[selected].unwrap();
println!("selected {}, start {:x}", selected, st);
for j in 0..buf[st/4] {
println!("{:x}", buf[st/4 + 2 + j as usize])
}
if buf[st/4 + 1] == 0 {
starts[selected] = None;
} else {
starts[selected] = Some(buf[st/4 + 1] as usize);
}
}
}
}
fn main() -> Result<(), Error> { fn main() -> Result<(), Error> {
let (instance, _) = VkInstance::new(None)?; let (instance, _) = VkInstance::new(None)?;
unsafe { unsafe {
@ -66,12 +107,9 @@ fn main() -> Result<(), Error> {
/* /*
let mut data: Vec<u32> = Default::default(); let mut data: Vec<u32> = Default::default();
device.read_buffer(&renderer.ptcl_buf, &mut data).unwrap(); device.read_buffer(&renderer.bin_buf, &mut data).unwrap();
piet_gpu::dump_k1_data(&data); //piet_gpu::dump_k1_data(&data);
//trace_merge(&data);
let mut data: Vec<u32> = Default::default();
device.read_buffer(&renderer.anno_buf, &mut data).unwrap();
piet_gpu::dump_k1_data(&data);
*/ */
let mut img_data: Vec<u8> = Default::default(); let mut img_data: Vec<u8> = Default::default();

View file

@ -109,8 +109,8 @@ void main() {
sh_first_el[th_ix] = chunk.n > 0 ? sh_first_el[th_ix] = chunk.n > 0 ?
BinInstance_read(BinInstanceRef(start_chunk + BinChunk_size)).element_ix : ~0; BinInstance_read(BinInstanceRef(start_chunk + BinChunk_size)).element_ix : ~0;
} }
uint probe = 0; // for debugging uint count = 0;
do { while (true) {
for (uint i = 0; i < N_SLICE; i++) { for (uint i = 0; i < N_SLICE; i++) {
sh_bitmaps[i][th_ix] = 0; sh_bitmaps[i][th_ix] = 0;
} }
@ -121,18 +121,18 @@ void main() {
if (th_ix < N_WG) { if (th_ix < N_WG) {
if (th_ix == 0) { if (th_ix == 0) {
sh_selected_n = 0; sh_selected_n = 0;
sh_min_buf = ~1; sh_min_buf = ~0;
} }
} }
barrier(); barrier();
// Tempting to do this with subgroups, but atomic should be good enough. // Tempting to do this with subgroups, but atomic should be good enough.
my_min = sh_first_el[th_ix];
if (th_ix < N_WG) { if (th_ix < N_WG) {
my_min = sh_first_el[th_ix];
atomicMin(sh_min_buf, my_min); atomicMin(sh_min_buf, my_min);
} }
barrier(); barrier();
if (th_ix < N_WG) { if (th_ix < N_WG) {
if (sh_first_el[th_ix] == sh_min_buf) { if (my_min == sh_min_buf && my_min != ~0) {
sh_elements_ref = sh_chunk[th_ix] + BinChunk_size; sh_elements_ref = sh_chunk[th_ix] + BinChunk_size;
uint selected_n = sh_chunk_n[th_ix]; uint selected_n = sh_chunk_n[th_ix];
sh_selected_n = selected_n; sh_selected_n = selected_n;
@ -162,6 +162,7 @@ void main() {
} }
wr_ix += chunk_n; wr_ix += chunk_n;
} }
barrier();
// We've done the merge and filled the buffer. // We've done the merge and filled the buffer.
@ -272,8 +273,11 @@ void main() {
// clear LSB // clear LSB
bitmap &= bitmap - 1; bitmap &= bitmap - 1;
} }
barrier();
rd_ix += N_TILE; rd_ix += N_TILE;
break; // The second disjunct is there as a strange workaround on Nvidia. If it is
} while (wr_ix > rd_ix); // removed, then the kernel fails with ERROR_DEVICE_LOST.
if (rd_ix >= wr_ix || bin_ix == ~0) break;
}
} }

Binary file not shown.

View file

@ -65,7 +65,7 @@ pub fn render_scene(rc: &mut impl RenderContext) {
#[allow(unused)] #[allow(unused)]
fn render_cardioid(rc: &mut impl RenderContext) { fn render_cardioid(rc: &mut impl RenderContext) {
let n = 91; let n = 601;
let dth = std::f64::consts::PI * 2.0 / (n as f64); let dth = std::f64::consts::PI * 2.0 / (n as f64);
let center = Point::new(1024.0, 768.0); let center = Point::new(1024.0, 768.0);
let r = 750.0; let r = 750.0;
@ -73,7 +73,7 @@ fn render_cardioid(rc: &mut impl RenderContext) {
for i in 1..n { for i in 1..n {
let p0 = center + Vec2::from_angle(i as f64 * dth) * r; let p0 = center + Vec2::from_angle(i as f64 * dth) * r;
let p1 = center + Vec2::from_angle(((i * 2) % n) as f64 * dth) * r; let p1 = center + Vec2::from_angle(((i * 2) % n) as f64 * dth) * r;
rc.fill(&Circle::new(p0, 8.0), &Color::WHITE); //rc.fill(&Circle::new(p0, 8.0), &Color::WHITE);
path.move_to(p0); path.move_to(p0);
path.line_to(p1); path.line_to(p1);
//rc.stroke(Line::new(p0, p1), &Color::BLACK, 2.0); //rc.stroke(Line::new(p0, p1), &Color::BLACK, 2.0);

View file

@ -48,7 +48,7 @@ impl PicoSvg {
} }
if let Some(stroke_color) = el.attribute("stroke") { if let Some(stroke_color) = el.attribute("stroke") {
let width = let width =
f64::from_str(el.attribute("stroke-width").ok_or("missing width")?)?; scale * f64::from_str(el.attribute("stroke-width").ok_or("missing width")?)?;
let color = parse_color(stroke_color); let color = parse_color(stroke_color);
items.push(Item::Stroke(StrokeItem { width, color, path })); items.push(Item::Stroke(StrokeItem { width, color, path }));
} }