mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-10 12:41:30 +11:00
Correct rendering (on Intel)
Handle multiple passes in coarse raster. Doesn't work on NV, WIP to find out why.
This commit is contained in:
parent
93044b469b
commit
9bb06ec340
|
@ -34,6 +34,47 @@ fn dump_state(buf: &[u8]) {
|
|||
|
||||
}
|
||||
|
||||
/// Interpret the output of the binning stage, for diagnostic purposes.
|
||||
#[allow(unused)]
|
||||
fn trace_merge(buf: &[u32]) {
|
||||
for bin in 0..256 {
|
||||
println!("bin {}:", bin);
|
||||
let mut starts = (0..16).map(|i| Some((bin * 16 + i) * 64)).collect::<Vec<Option<usize>>>();
|
||||
loop {
|
||||
let min_start = starts.iter().map(|st|
|
||||
st.map(|st|
|
||||
if buf[st / 4] == 0 {
|
||||
!0
|
||||
} else {
|
||||
buf[st / 4 + 2]
|
||||
}).unwrap_or(!0)).min().unwrap();
|
||||
if min_start == !0 {
|
||||
break;
|
||||
}
|
||||
let mut selected = !0;
|
||||
for i in 0..16 {
|
||||
if let Some(st) = starts[i] {
|
||||
if buf[st/4] != 0 && buf[st/4 + 2] == min_start {
|
||||
selected = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
let st = starts[selected].unwrap();
|
||||
println!("selected {}, start {:x}", selected, st);
|
||||
for j in 0..buf[st/4] {
|
||||
println!("{:x}", buf[st/4 + 2 + j as usize])
|
||||
}
|
||||
if buf[st/4 + 1] == 0 {
|
||||
starts[selected] = None;
|
||||
} else {
|
||||
starts[selected] = Some(buf[st/4 + 1] as usize);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
fn main() -> Result<(), Error> {
|
||||
let (instance, _) = VkInstance::new(None)?;
|
||||
unsafe {
|
||||
|
@ -66,12 +107,9 @@ fn main() -> Result<(), Error> {
|
|||
|
||||
/*
|
||||
let mut data: Vec<u32> = Default::default();
|
||||
device.read_buffer(&renderer.ptcl_buf, &mut data).unwrap();
|
||||
piet_gpu::dump_k1_data(&data);
|
||||
|
||||
let mut data: Vec<u32> = Default::default();
|
||||
device.read_buffer(&renderer.anno_buf, &mut data).unwrap();
|
||||
piet_gpu::dump_k1_data(&data);
|
||||
device.read_buffer(&renderer.bin_buf, &mut data).unwrap();
|
||||
//piet_gpu::dump_k1_data(&data);
|
||||
//trace_merge(&data);
|
||||
*/
|
||||
|
||||
let mut img_data: Vec<u8> = Default::default();
|
||||
|
|
|
@ -109,8 +109,8 @@ void main() {
|
|||
sh_first_el[th_ix] = chunk.n > 0 ?
|
||||
BinInstance_read(BinInstanceRef(start_chunk + BinChunk_size)).element_ix : ~0;
|
||||
}
|
||||
uint probe = 0; // for debugging
|
||||
do {
|
||||
uint count = 0;
|
||||
while (true) {
|
||||
for (uint i = 0; i < N_SLICE; i++) {
|
||||
sh_bitmaps[i][th_ix] = 0;
|
||||
}
|
||||
|
@ -121,18 +121,18 @@ void main() {
|
|||
if (th_ix < N_WG) {
|
||||
if (th_ix == 0) {
|
||||
sh_selected_n = 0;
|
||||
sh_min_buf = ~1;
|
||||
sh_min_buf = ~0;
|
||||
}
|
||||
}
|
||||
barrier();
|
||||
// Tempting to do this with subgroups, but atomic should be good enough.
|
||||
my_min = sh_first_el[th_ix];
|
||||
if (th_ix < N_WG) {
|
||||
my_min = sh_first_el[th_ix];
|
||||
atomicMin(sh_min_buf, my_min);
|
||||
}
|
||||
barrier();
|
||||
if (th_ix < N_WG) {
|
||||
if (sh_first_el[th_ix] == sh_min_buf) {
|
||||
if (my_min == sh_min_buf && my_min != ~0) {
|
||||
sh_elements_ref = sh_chunk[th_ix] + BinChunk_size;
|
||||
uint selected_n = sh_chunk_n[th_ix];
|
||||
sh_selected_n = selected_n;
|
||||
|
@ -162,6 +162,7 @@ void main() {
|
|||
}
|
||||
wr_ix += chunk_n;
|
||||
}
|
||||
barrier();
|
||||
|
||||
// We've done the merge and filled the buffer.
|
||||
|
||||
|
@ -272,8 +273,11 @@ void main() {
|
|||
// clear LSB
|
||||
bitmap &= bitmap - 1;
|
||||
}
|
||||
barrier();
|
||||
|
||||
rd_ix += N_TILE;
|
||||
break;
|
||||
} while (wr_ix > rd_ix);
|
||||
// The second disjunct is there as a strange workaround on Nvidia. If it is
|
||||
// removed, then the kernel fails with ERROR_DEVICE_LOST.
|
||||
if (rd_ix >= wr_ix || bin_ix == ~0) break;
|
||||
}
|
||||
}
|
||||
|
|
Binary file not shown.
|
@ -65,7 +65,7 @@ pub fn render_scene(rc: &mut impl RenderContext) {
|
|||
|
||||
#[allow(unused)]
|
||||
fn render_cardioid(rc: &mut impl RenderContext) {
|
||||
let n = 91;
|
||||
let n = 601;
|
||||
let dth = std::f64::consts::PI * 2.0 / (n as f64);
|
||||
let center = Point::new(1024.0, 768.0);
|
||||
let r = 750.0;
|
||||
|
@ -73,7 +73,7 @@ fn render_cardioid(rc: &mut impl RenderContext) {
|
|||
for i in 1..n {
|
||||
let p0 = center + Vec2::from_angle(i as f64 * dth) * r;
|
||||
let p1 = center + Vec2::from_angle(((i * 2) % n) as f64 * dth) * r;
|
||||
rc.fill(&Circle::new(p0, 8.0), &Color::WHITE);
|
||||
//rc.fill(&Circle::new(p0, 8.0), &Color::WHITE);
|
||||
path.move_to(p0);
|
||||
path.line_to(p1);
|
||||
//rc.stroke(Line::new(p0, p1), &Color::BLACK, 2.0);
|
||||
|
|
|
@ -48,7 +48,7 @@ impl PicoSvg {
|
|||
}
|
||||
if let Some(stroke_color) = el.attribute("stroke") {
|
||||
let width =
|
||||
f64::from_str(el.attribute("stroke-width").ok_or("missing width")?)?;
|
||||
scale * f64::from_str(el.attribute("stroke-width").ok_or("missing width")?)?;
|
||||
let color = parse_color(stroke_color);
|
||||
items.push(Item::Stroke(StrokeItem { width, color, path }));
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue