mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-25 18:56:35 +11:00
Merge pull request #181 from linebender/mem2
Implement robust dynamic memory
This commit is contained in:
commit
bfa4abf642
22 changed files with 994 additions and 634 deletions
2
.github/workflows/push-shader.yml
vendored
2
.github/workflows/push-shader.yml
vendored
|
@ -20,7 +20,7 @@ jobs:
|
||||||
git switch main
|
git switch main
|
||||||
git config user.name "Commit by GitHub Action"
|
git config user.name "Commit by GitHub Action"
|
||||||
git config user.email "nobody@example.com"
|
git config user.email "nobody@example.com"
|
||||||
git merge dev -m "merge from dev branch"
|
git merge dev -m "merge from dev branch - ${{ github.ref_name }}"
|
||||||
sed -i '' '/shader\/gen/d' .gitignore
|
sed -i '' '/shader\/gen/d' .gitignore
|
||||||
git add .gitignore
|
git add .gitignore
|
||||||
git rm -r --ignore-unmatch piet-gpu/shader/gen
|
git rm -r --ignore-unmatch piet-gpu/shader/gen
|
||||||
|
|
|
@ -20,7 +20,7 @@ use piet_gpu_hal::{
|
||||||
use piet::kurbo::Point;
|
use piet::kurbo::Point;
|
||||||
use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder};
|
use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder};
|
||||||
|
|
||||||
use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer};
|
use piet_gpu::{test_scenes, PietGpuRenderContext, RenderDriver, Renderer};
|
||||||
|
|
||||||
#[cfg_attr(target_os = "android", ndk_glue::main(backtrace = "on"))]
|
#[cfg_attr(target_os = "android", ndk_glue::main(backtrace = "on"))]
|
||||||
fn main() {
|
fn main() {
|
||||||
|
@ -34,12 +34,9 @@ struct MyHandle {
|
||||||
// State required to render and present the contents
|
// State required to render and present the contents
|
||||||
struct GfxState {
|
struct GfxState {
|
||||||
session: Session,
|
session: Session,
|
||||||
renderer: Renderer,
|
render_driver: RenderDriver,
|
||||||
swapchain: Swapchain,
|
swapchain: Swapchain,
|
||||||
current_frame: usize,
|
current_frame: usize,
|
||||||
submitted: [Option<SubmittedCmdBuf>; NUM_FRAMES],
|
|
||||||
cmd_bufs: [Option<CmdBuf>; NUM_FRAMES],
|
|
||||||
query_pools: Vec<QueryPool>,
|
|
||||||
present_semaphores: Vec<Semaphore>,
|
present_semaphores: Vec<Semaphore>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -110,22 +107,15 @@ impl GfxState {
|
||||||
let present_semaphores = (0..NUM_FRAMES)
|
let present_semaphores = (0..NUM_FRAMES)
|
||||||
.map(|_| session.create_semaphore())
|
.map(|_| session.create_semaphore())
|
||||||
.collect::<Result<Vec<_>, Error>>()?;
|
.collect::<Result<Vec<_>, Error>>()?;
|
||||||
let query_pools = (0..NUM_FRAMES)
|
|
||||||
.map(|_| session.create_query_pool(Renderer::QUERY_POOL_SIZE))
|
|
||||||
.collect::<Result<Vec<_>, Error>>()?;
|
|
||||||
let submitted = Default::default();
|
|
||||||
let cmd_bufs = Default::default();
|
|
||||||
|
|
||||||
let renderer = Renderer::new(&session, width, height, NUM_FRAMES)?;
|
let renderer = Renderer::new(&session, width, height, NUM_FRAMES)?;
|
||||||
|
let render_driver = RenderDriver::new(&session, NUM_FRAMES, renderer);
|
||||||
|
|
||||||
Ok(GfxState {
|
Ok(GfxState {
|
||||||
session,
|
session,
|
||||||
renderer,
|
render_driver,
|
||||||
swapchain,
|
swapchain,
|
||||||
current_frame,
|
current_frame,
|
||||||
submitted,
|
|
||||||
cmd_bufs,
|
|
||||||
query_pools,
|
|
||||||
present_semaphores,
|
present_semaphores,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -137,51 +127,47 @@ impl GfxState {
|
||||||
let frame_idx = self.current_frame % NUM_FRAMES;
|
let frame_idx = self.current_frame % NUM_FRAMES;
|
||||||
let mut info_string = String::new();
|
let mut info_string = String::new();
|
||||||
|
|
||||||
if let Some(submitted) = self.submitted[frame_idx].take() {
|
if self.current_frame >= NUM_FRAMES {
|
||||||
self.cmd_bufs[frame_idx] = submitted.wait().unwrap();
|
let stats = self
|
||||||
let ts = self
|
.render_driver
|
||||||
.session
|
.get_timing_stats(&self.session, frame_idx);
|
||||||
.fetch_query_pool(&self.query_pools[frame_idx])
|
info_string = stats.short_summary();
|
||||||
.unwrap();
|
println!("{}", info_string);
|
||||||
info_string = format!("{:.1}ms", ts.last().unwrap() * 1e3);
|
|
||||||
println!("render time: {:?}", ts);
|
|
||||||
}
|
}
|
||||||
let mut ctx = PietGpuRenderContext::new();
|
let mut ctx = PietGpuRenderContext::new();
|
||||||
test_scenes::render_anim_frame(&mut ctx, self.current_frame);
|
test_scenes::render_anim_frame(&mut ctx, self.current_frame);
|
||||||
//test_scenes::render_tiger(&mut ctx);
|
//test_scenes::render_tiger(&mut ctx);
|
||||||
render_info_string(&mut ctx, &info_string);
|
render_info_string(&mut ctx, &info_string);
|
||||||
if let Err(e) = self.renderer.upload_render_ctx(&mut ctx, frame_idx) {
|
if let Err(e) = self
|
||||||
|
.render_driver
|
||||||
|
.upload_render_ctx(&self.session, &mut ctx)
|
||||||
|
{
|
||||||
println!("error in uploading: {}", e);
|
println!("error in uploading: {}", e);
|
||||||
}
|
}
|
||||||
let (image_idx, acquisition_semaphore) = self.swapchain.next().unwrap();
|
let (image_idx, acquisition_semaphore) = self.swapchain.next().unwrap();
|
||||||
let swap_image = self.swapchain.image(image_idx);
|
let swap_image = self.swapchain.image(image_idx);
|
||||||
let query_pool = &self.query_pools[frame_idx];
|
self.render_driver.run_coarse(&self.session).unwrap();
|
||||||
let mut cmd_buf = self.cmd_bufs[frame_idx]
|
let target = self.render_driver.record_fine(&self.session).unwrap();
|
||||||
.take()
|
let cmd_buf = target.cmd_buf;
|
||||||
.unwrap_or_else(|| self.session.cmd_buf().unwrap());
|
|
||||||
cmd_buf.begin();
|
|
||||||
self.renderer.record(&mut cmd_buf, &query_pool, frame_idx);
|
|
||||||
|
|
||||||
// Image -> Swapchain
|
// Image -> Swapchain
|
||||||
cmd_buf.image_barrier(&swap_image, ImageLayout::Undefined, ImageLayout::BlitDst);
|
cmd_buf.image_barrier(&swap_image, ImageLayout::Undefined, ImageLayout::BlitDst);
|
||||||
cmd_buf.blit_image(&self.renderer.image_dev, &swap_image);
|
cmd_buf.blit_image(target.image, &swap_image);
|
||||||
cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present);
|
cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present);
|
||||||
cmd_buf.finish();
|
|
||||||
|
|
||||||
self.submitted[frame_idx] = Some(
|
self.render_driver
|
||||||
self.session
|
.submit(
|
||||||
.run_cmd_buf(
|
&self.session,
|
||||||
cmd_buf,
|
&[&acquisition_semaphore],
|
||||||
&[&acquisition_semaphore],
|
&[&self.present_semaphores[frame_idx]],
|
||||||
&[&self.present_semaphores[frame_idx]],
|
)
|
||||||
)
|
.unwrap();
|
||||||
.unwrap(),
|
|
||||||
);
|
|
||||||
|
|
||||||
self.swapchain
|
self.swapchain
|
||||||
.present(image_idx, &[&self.present_semaphores[frame_idx]])
|
.present(image_idx, &[&self.present_semaphores[frame_idx]])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
self.render_driver.next_buffer();
|
||||||
self.current_frame += 1;
|
self.current_frame += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,7 +6,7 @@ use clap::{App, Arg};
|
||||||
|
|
||||||
use piet_gpu_hal::{BufferUsage, Error, Instance, InstanceFlags, Session};
|
use piet_gpu_hal::{BufferUsage, Error, Instance, InstanceFlags, Session};
|
||||||
|
|
||||||
use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, Renderer};
|
use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, RenderDriver, Renderer};
|
||||||
|
|
||||||
const WIDTH: usize = 2048;
|
const WIDTH: usize = 2048;
|
||||||
const HEIGHT: usize = 1536;
|
const HEIGHT: usize = 1536;
|
||||||
|
@ -231,9 +231,6 @@ fn main() -> Result<(), Error> {
|
||||||
let device = instance.device(None)?;
|
let device = instance.device(None)?;
|
||||||
let session = Session::new(device);
|
let session = Session::new(device);
|
||||||
|
|
||||||
let mut cmd_buf = session.cmd_buf()?;
|
|
||||||
let query_pool = session.create_query_pool(Renderer::QUERY_POOL_SIZE)?;
|
|
||||||
|
|
||||||
let mut ctx = PietGpuRenderContext::new();
|
let mut ctx = PietGpuRenderContext::new();
|
||||||
if let Some(input) = matches.value_of("INPUT") {
|
if let Some(input) = matches.value_of("INPUT") {
|
||||||
let mut scale = matches
|
let mut scale = matches
|
||||||
|
@ -253,40 +250,22 @@ fn main() -> Result<(), Error> {
|
||||||
test_scenes::render_blend_grid(&mut ctx);
|
test_scenes::render_blend_grid(&mut ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, 1)?;
|
let renderer = Renderer::new(&session, WIDTH, HEIGHT, 1)?;
|
||||||
renderer.upload_render_ctx(&mut ctx, 0)?;
|
let mut render_driver = RenderDriver::new(&session, 1, renderer);
|
||||||
|
let start = std::time::Instant::now();
|
||||||
|
render_driver.upload_render_ctx(&session, &mut ctx)?;
|
||||||
let image_usage = BufferUsage::MAP_READ | BufferUsage::COPY_DST;
|
let image_usage = BufferUsage::MAP_READ | BufferUsage::COPY_DST;
|
||||||
let image_buf = session.create_buffer((WIDTH * HEIGHT * 4) as u64, image_usage)?;
|
let image_buf = session.create_buffer((WIDTH * HEIGHT * 4) as u64, image_usage)?;
|
||||||
|
|
||||||
cmd_buf.begin();
|
render_driver.run_coarse(&session)?;
|
||||||
renderer.record(&mut cmd_buf, &query_pool, 0);
|
let target = render_driver.record_fine(&session)?;
|
||||||
cmd_buf.copy_image_to_buffer(&renderer.image_dev, &image_buf);
|
target
|
||||||
cmd_buf.finish_timestamps(&query_pool);
|
.cmd_buf
|
||||||
cmd_buf.host_barrier();
|
.copy_image_to_buffer(target.image, &image_buf);
|
||||||
cmd_buf.finish();
|
render_driver.submit(&session, &[], &[])?;
|
||||||
let start = std::time::Instant::now();
|
render_driver.wait(&session);
|
||||||
let submitted = session.run_cmd_buf(cmd_buf, &[], &[])?;
|
|
||||||
submitted.wait()?;
|
|
||||||
println!("elapsed = {:?}", start.elapsed());
|
println!("elapsed = {:?}", start.elapsed());
|
||||||
let ts = session.fetch_query_pool(&query_pool).unwrap();
|
render_driver.get_timing_stats(&session, 0).print_summary();
|
||||||
if !ts.is_empty() {
|
|
||||||
println!("Element kernel time: {:.3}ms", ts[0] * 1e3);
|
|
||||||
println!(
|
|
||||||
"Tile allocation kernel time: {:.3}ms",
|
|
||||||
(ts[1] - ts[0]) * 1e3
|
|
||||||
);
|
|
||||||
println!("Coarse path kernel time: {:.3}ms", (ts[2] - ts[1]) * 1e3);
|
|
||||||
println!("Backdrop kernel time: {:.3}ms", (ts[3] - ts[2]) * 1e3);
|
|
||||||
println!("Binning kernel time: {:.3}ms", (ts[4] - ts[3]) * 1e3);
|
|
||||||
println!("Coarse raster kernel time: {:.3}ms", (ts[5] - ts[4]) * 1e3);
|
|
||||||
println!("Render kernel time: {:.3}ms", (ts[6] - ts[5]) * 1e3);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
let mut data: Vec<u32> = Default::default();
|
|
||||||
renderer.memory_buf_dev.read(&mut data).unwrap();
|
|
||||||
piet_gpu::dump_k1_data(&data[2..]);
|
|
||||||
*/
|
|
||||||
|
|
||||||
let mut img_data: Vec<u8> = Default::default();
|
let mut img_data: Vec<u8> = Default::default();
|
||||||
// Note: because png can use a `&[u8]` slice, we could avoid an extra copy
|
// Note: because png can use a `&[u8]` slice, we could avoid an extra copy
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
use piet::kurbo::Point;
|
use piet::kurbo::Point;
|
||||||
use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder};
|
use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder};
|
||||||
use piet_gpu_hal::{CmdBuf, Error, ImageLayout, Instance, Session, SubmittedCmdBuf};
|
use piet_gpu_hal::{Error, ImageLayout, Instance, Session};
|
||||||
|
|
||||||
use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, Renderer};
|
use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, RenderDriver, Renderer};
|
||||||
|
|
||||||
use clap::{App, Arg};
|
use clap::{App, Arg};
|
||||||
|
|
||||||
|
@ -69,13 +69,9 @@ fn main() -> Result<(), Error> {
|
||||||
let present_semaphores = (0..NUM_FRAMES)
|
let present_semaphores = (0..NUM_FRAMES)
|
||||||
.map(|_| session.create_semaphore())
|
.map(|_| session.create_semaphore())
|
||||||
.collect::<Result<Vec<_>, Error>>()?;
|
.collect::<Result<Vec<_>, Error>>()?;
|
||||||
let query_pools = (0..NUM_FRAMES)
|
|
||||||
.map(|_| session.create_query_pool(Renderer::QUERY_POOL_SIZE))
|
|
||||||
.collect::<Result<Vec<_>, Error>>()?;
|
|
||||||
let mut cmd_bufs: [Option<CmdBuf>; NUM_FRAMES] = Default::default();
|
|
||||||
let mut submitted: [Option<SubmittedCmdBuf>; NUM_FRAMES] = Default::default();
|
|
||||||
|
|
||||||
let mut renderer = Renderer::new(&session, WIDTH, HEIGHT, NUM_FRAMES)?;
|
let renderer = Renderer::new(&session, WIDTH, HEIGHT, NUM_FRAMES)?;
|
||||||
|
let mut render_driver = RenderDriver::new(&session, NUM_FRAMES, renderer);
|
||||||
let mut mode = 0usize;
|
let mut mode = 0usize;
|
||||||
|
|
||||||
event_loop.run(move |event, _, control_flow| {
|
event_loop.run(move |event, _, control_flow| {
|
||||||
|
@ -106,26 +102,13 @@ fn main() -> Result<(), Error> {
|
||||||
Event::RedrawRequested(window_id) if window_id == window.id() => {
|
Event::RedrawRequested(window_id) if window_id == window.id() => {
|
||||||
let frame_idx = current_frame % NUM_FRAMES;
|
let frame_idx = current_frame % NUM_FRAMES;
|
||||||
|
|
||||||
if let Some(submitted) = submitted[frame_idx].take() {
|
if current_frame >= NUM_FRAMES {
|
||||||
cmd_bufs[frame_idx] = submitted.wait().unwrap();
|
let stats = render_driver.get_timing_stats(&session, frame_idx);
|
||||||
let ts = session.fetch_query_pool(&query_pools[frame_idx]).unwrap();
|
info_string = stats.short_summary();
|
||||||
if !ts.is_empty() {
|
|
||||||
info_string = format!(
|
|
||||||
"{:.3}ms :: e:{:.3}ms|alloc:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|bin:{:.3}ms|cr:{:.3}ms|r:{:.3}ms",
|
|
||||||
ts[10] * 1e3,
|
|
||||||
ts[0] * 1e3,
|
|
||||||
(ts[1] - ts[0]) * 1e3,
|
|
||||||
(ts[2] - ts[1]) * 1e3,
|
|
||||||
(ts[4] - ts[3]) * 1e3,
|
|
||||||
(ts[6] - ts[5]) * 1e3,
|
|
||||||
(ts[8] - ts[7]) * 1e3,
|
|
||||||
(ts[10] - ts[9]) * 1e3,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut ctx = PietGpuRenderContext::new();
|
let mut ctx = PietGpuRenderContext::new();
|
||||||
let test_blend = true;
|
let test_blend = false;
|
||||||
if let Some(svg) = &svg {
|
if let Some(svg) = &svg {
|
||||||
test_scenes::render_svg(&mut ctx, svg);
|
test_scenes::render_svg(&mut ctx, svg);
|
||||||
} else if test_blend {
|
} else if test_blend {
|
||||||
|
@ -168,16 +151,15 @@ fn main() -> Result<(), Error> {
|
||||||
test_scenes::render_anim_frame(&mut ctx, current_frame);
|
test_scenes::render_anim_frame(&mut ctx, current_frame);
|
||||||
}
|
}
|
||||||
render_info_string(&mut ctx, &info_string);
|
render_info_string(&mut ctx, &info_string);
|
||||||
if let Err(e) = renderer.upload_render_ctx(&mut ctx, frame_idx) {
|
if let Err(e) = render_driver.upload_render_ctx(&session, &mut ctx) {
|
||||||
println!("error in uploading: {}", e);
|
println!("error in uploading: {}", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
let (image_idx, acquisition_semaphore) = swapchain.next().unwrap();
|
let (image_idx, acquisition_semaphore) = swapchain.next().unwrap();
|
||||||
let swap_image = swapchain.image(image_idx);
|
let swap_image = swapchain.image(image_idx);
|
||||||
let query_pool = &query_pools[frame_idx];
|
render_driver.run_coarse(&session).unwrap();
|
||||||
let mut cmd_buf = cmd_bufs[frame_idx].take().unwrap_or_else(|| session.cmd_buf().unwrap());
|
let target = render_driver.record_fine(&session).unwrap();
|
||||||
cmd_buf.begin();
|
let cmd_buf = target.cmd_buf;
|
||||||
renderer.record(&mut cmd_buf, &query_pool, frame_idx);
|
|
||||||
|
|
||||||
// Image -> Swapchain
|
// Image -> Swapchain
|
||||||
cmd_buf.image_barrier(
|
cmd_buf.image_barrier(
|
||||||
|
@ -185,32 +167,25 @@ fn main() -> Result<(), Error> {
|
||||||
ImageLayout::Undefined,
|
ImageLayout::Undefined,
|
||||||
ImageLayout::BlitDst,
|
ImageLayout::BlitDst,
|
||||||
);
|
);
|
||||||
cmd_buf.blit_image(&renderer.image_dev, &swap_image);
|
cmd_buf.blit_image(target.image, &swap_image);
|
||||||
cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present);
|
cmd_buf.image_barrier(&swap_image, ImageLayout::BlitDst, ImageLayout::Present);
|
||||||
cmd_buf.finish();
|
render_driver
|
||||||
|
.submit(
|
||||||
submitted[frame_idx] = Some(session
|
&session,
|
||||||
.run_cmd_buf(
|
|
||||||
cmd_buf,
|
|
||||||
&[&acquisition_semaphore],
|
&[&acquisition_semaphore],
|
||||||
&[&present_semaphores[frame_idx]],
|
&[&present_semaphores[frame_idx]],
|
||||||
)
|
)
|
||||||
.unwrap());
|
.unwrap();
|
||||||
|
|
||||||
swapchain
|
swapchain
|
||||||
.present(image_idx, &[&present_semaphores[frame_idx]])
|
.present(image_idx, &[&present_semaphores[frame_idx]])
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
render_driver.next_buffer();
|
||||||
current_frame += 1;
|
current_frame += 1;
|
||||||
}
|
}
|
||||||
Event::LoopDestroyed => {
|
Event::LoopDestroyed => {
|
||||||
for cmd_buf in &mut submitted {
|
render_driver.wait_all(&session);
|
||||||
// Wait for command list submission, otherwise dropping of renderer may
|
|
||||||
// cause validation errors (and possibly crashes).
|
|
||||||
if let Some(cmd_buf) = cmd_buf.take() {
|
|
||||||
cmd_buf.wait().unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,12 +45,15 @@ shared Alloc sh_row_alloc[BACKDROP_WG];
|
||||||
shared uint sh_row_width[BACKDROP_WG];
|
shared uint sh_row_width[BACKDROP_WG];
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
|
if (!check_deps(STAGE_BINNING | STAGE_TILE_ALLOC | STAGE_PATH_COARSE)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
uint th_ix = gl_LocalInvocationIndex;
|
uint th_ix = gl_LocalInvocationIndex;
|
||||||
uint element_ix = gl_GlobalInvocationID.x;
|
uint element_ix = gl_GlobalInvocationID.x;
|
||||||
|
|
||||||
// Work assignment: 1 thread : 1 path element
|
// Work assignment: 1 thread : 1 path element
|
||||||
uint row_count = 0;
|
uint row_count = 0;
|
||||||
bool mem_ok = mem_error == NO_ERROR;
|
|
||||||
if (gl_LocalInvocationID.y == 0) {
|
if (gl_LocalInvocationID.y == 0) {
|
||||||
if (element_ix < conf.n_elements) {
|
if (element_ix < conf.n_elements) {
|
||||||
// Possible TODO: it's not necessary to process backdrops of stroked paths.
|
// Possible TODO: it's not necessary to process backdrops of stroked paths.
|
||||||
|
@ -68,7 +71,7 @@ void main() {
|
||||||
row_count = 0;
|
row_count = 0;
|
||||||
}
|
}
|
||||||
Alloc path_alloc = new_alloc(
|
Alloc path_alloc = new_alloc(
|
||||||
path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
|
path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, true);
|
||||||
sh_row_alloc[th_ix] = path_alloc;
|
sh_row_alloc[th_ix] = path_alloc;
|
||||||
}
|
}
|
||||||
sh_row_count[th_ix] = row_count;
|
sh_row_count[th_ix] = row_count;
|
||||||
|
@ -98,7 +101,7 @@ void main() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
uint width = sh_row_width[el_ix];
|
uint width = sh_row_width[el_ix];
|
||||||
if (width > 0 && mem_ok) {
|
if (width > 0) {
|
||||||
// Process one row sequentially
|
// Process one row sequentially
|
||||||
// Read backdrop value per tile and prefix sum it
|
// Read backdrop value per tile and prefix sum it
|
||||||
Alloc tiles_alloc = sh_row_alloc[el_ix];
|
Alloc tiles_alloc = sh_row_alloc[el_ix];
|
||||||
|
|
|
@ -32,8 +32,7 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
||||||
// Bitmaps are sliced (256bit into 8 (N_SLICE) 32bit submaps)
|
// Bitmaps are sliced (256bit into 8 (N_SLICE) 32bit submaps)
|
||||||
shared uint bitmaps[N_SLICE][N_TILE];
|
shared uint bitmaps[N_SLICE][N_TILE];
|
||||||
shared uint count[N_SLICE][N_TILE];
|
shared uint count[N_SLICE][N_TILE];
|
||||||
shared Alloc sh_chunk_alloc[N_TILE];
|
shared uint sh_chunk_offset[N_TILE];
|
||||||
shared bool sh_alloc_failed;
|
|
||||||
|
|
||||||
DrawMonoid load_draw_monoid(uint element_ix) {
|
DrawMonoid load_draw_monoid(uint element_ix) {
|
||||||
uint base = (conf.drawmonoid_alloc.offset >> 2) + 4 * element_ix;
|
uint base = (conf.drawmonoid_alloc.offset >> 2) + 4 * element_ix;
|
||||||
|
@ -84,10 +83,6 @@ void main() {
|
||||||
for (uint i = 0; i < N_SLICE; i++) {
|
for (uint i = 0; i < N_SLICE; i++) {
|
||||||
bitmaps[i][gl_LocalInvocationID.x] = 0;
|
bitmaps[i][gl_LocalInvocationID.x] = 0;
|
||||||
}
|
}
|
||||||
if (gl_LocalInvocationID.x == 0) {
|
|
||||||
sh_alloc_failed = false;
|
|
||||||
}
|
|
||||||
barrier();
|
|
||||||
|
|
||||||
// Read inputs and determine coverage of bins
|
// Read inputs and determine coverage of bins
|
||||||
uint element_ix = my_partition * N_TILE + gl_LocalInvocationID.x;
|
uint element_ix = my_partition * N_TILE + gl_LocalInvocationID.x;
|
||||||
|
@ -148,26 +143,18 @@ void main() {
|
||||||
count[i][gl_LocalInvocationID.x] = element_count;
|
count[i][gl_LocalInvocationID.x] = element_count;
|
||||||
}
|
}
|
||||||
// element_count is number of elements covering bin for this invocation.
|
// element_count is number of elements covering bin for this invocation.
|
||||||
Alloc chunk_alloc = new_alloc(0, 0, true);
|
uint chunk_offset = 0;
|
||||||
if (element_count != 0) {
|
if (element_count != 0) {
|
||||||
// TODO: aggregate atomic adds (subgroup is probably fastest)
|
chunk_offset = malloc_stage(element_count * BinInstance_size, conf.mem_size, STAGE_BINNING);
|
||||||
MallocResult chunk = malloc(element_count * BinInstance_size);
|
sh_chunk_offset[gl_LocalInvocationID.x] = chunk_offset;
|
||||||
chunk_alloc = chunk.alloc;
|
|
||||||
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
|
|
||||||
if (chunk.failed) {
|
|
||||||
sh_alloc_failed = true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Note: it might be more efficient for reading to do this in the
|
// Note: it might be more efficient for reading to do this in the
|
||||||
// other order (each bin is a contiguous sequence of partitions)
|
// other order (each bin is a contiguous sequence of partitions)
|
||||||
uint out_ix = (conf.bin_alloc.offset >> 2) + (my_partition * N_TILE + gl_LocalInvocationID.x) * 2;
|
uint out_ix = (conf.bin_alloc.offset >> 2) + (my_partition * N_TILE + gl_LocalInvocationID.x) * 2;
|
||||||
write_mem(conf.bin_alloc, out_ix, element_count);
|
write_mem(conf.bin_alloc, out_ix, element_count);
|
||||||
write_mem(conf.bin_alloc, out_ix + 1, chunk_alloc.offset);
|
write_mem(conf.bin_alloc, out_ix + 1, chunk_offset);
|
||||||
|
|
||||||
barrier();
|
barrier();
|
||||||
if (sh_alloc_failed || mem_error != NO_ERROR) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Use similar strategy as Laine & Karras paper; loop over bbox of bins
|
// Use similar strategy as Laine & Karras paper; loop over bbox of bins
|
||||||
// touched by this element
|
// touched by this element
|
||||||
|
@ -181,9 +168,10 @@ void main() {
|
||||||
if (my_slice > 0) {
|
if (my_slice > 0) {
|
||||||
idx += count[my_slice - 1][bin_ix];
|
idx += count[my_slice - 1][bin_ix];
|
||||||
}
|
}
|
||||||
Alloc out_alloc = sh_chunk_alloc[bin_ix];
|
uint chunk_offset = sh_chunk_offset[bin_ix];
|
||||||
uint out_offset = out_alloc.offset + idx * BinInstance_size;
|
if (chunk_offset != MALLOC_FAILED) {
|
||||||
BinInstance_write(out_alloc, BinInstanceRef(out_offset), BinInstance(element_ix));
|
memory[(chunk_offset >> 2) + idx] = element_ix;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
x++;
|
x++;
|
||||||
if (x == x1) {
|
if (x == x1) {
|
||||||
|
|
|
@ -72,49 +72,62 @@ void write_tile_alloc(uint el_ix, Alloc a) {
|
||||||
|
|
||||||
Alloc read_tile_alloc(uint el_ix, bool mem_ok) {
|
Alloc read_tile_alloc(uint el_ix, bool mem_ok) {
|
||||||
// All memory.
|
// All memory.
|
||||||
return new_alloc(0, memory.length() * 4, mem_ok);
|
return new_alloc(0, conf.mem_size, mem_ok);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// The maximum number of commands per annotated element.
|
// The maximum number of commands per annotated element.
|
||||||
#define ANNO_COMMANDS 2
|
#define ANNO_COMMANDS 2
|
||||||
|
|
||||||
// Perhaps cmd_alloc should be a global? This is a style question.
|
// All writes to the output must be gated by mem_ok.
|
||||||
bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit) {
|
bool mem_ok = true;
|
||||||
|
|
||||||
|
// Perhaps cmd allocations should be a global? This is a style question.
|
||||||
|
void alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit) {
|
||||||
if (cmd_ref.offset < cmd_limit) {
|
if (cmd_ref.offset < cmd_limit) {
|
||||||
return true;
|
return;
|
||||||
}
|
}
|
||||||
MallocResult new_cmd = malloc(PTCL_INITIAL_ALLOC);
|
uint new_cmd = malloc_stage(PTCL_INITIAL_ALLOC, conf.mem_size, STAGE_COARSE);
|
||||||
if (new_cmd.failed) {
|
if (new_cmd == MALLOC_FAILED) {
|
||||||
return false;
|
mem_ok = false;
|
||||||
}
|
}
|
||||||
CmdJump jump = CmdJump(new_cmd.alloc.offset);
|
if (mem_ok) {
|
||||||
Cmd_Jump_write(cmd_alloc, cmd_ref, jump);
|
CmdJump jump = CmdJump(new_cmd);
|
||||||
cmd_alloc = new_cmd.alloc;
|
Cmd_Jump_write(cmd_alloc, cmd_ref, jump);
|
||||||
cmd_ref = CmdRef(cmd_alloc.offset);
|
}
|
||||||
|
cmd_alloc = new_alloc(new_cmd, PTCL_INITIAL_ALLOC, true);
|
||||||
|
cmd_ref = CmdRef(new_cmd);
|
||||||
// Reserve space for the maximum number of commands and a potential jump.
|
// Reserve space for the maximum number of commands and a potential jump.
|
||||||
cmd_limit = cmd_alloc.offset + PTCL_INITIAL_ALLOC - (ANNO_COMMANDS + 1) * Cmd_size;
|
cmd_limit = new_cmd + PTCL_INITIAL_ALLOC - (ANNO_COMMANDS + 1) * Cmd_size;
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void write_fill(Alloc alloc, inout CmdRef cmd_ref, Tile tile, float linewidth) {
|
void write_fill(Alloc alloc, inout CmdRef cmd_ref, Tile tile, float linewidth) {
|
||||||
if (linewidth < 0.0) {
|
if (linewidth < 0.0) {
|
||||||
if (tile.tile.offset != 0) {
|
if (tile.tile.offset != 0) {
|
||||||
CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);
|
CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);
|
||||||
Cmd_Fill_write(alloc, cmd_ref, cmd_fill);
|
if (mem_ok) {
|
||||||
|
Cmd_Fill_write(alloc, cmd_ref, cmd_fill);
|
||||||
|
}
|
||||||
cmd_ref.offset += 4 + CmdFill_size;
|
cmd_ref.offset += 4 + CmdFill_size;
|
||||||
} else {
|
} else {
|
||||||
Cmd_Solid_write(alloc, cmd_ref);
|
if (mem_ok) {
|
||||||
|
Cmd_Solid_write(alloc, cmd_ref);
|
||||||
|
}
|
||||||
cmd_ref.offset += 4;
|
cmd_ref.offset += 4;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * linewidth);
|
CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * linewidth);
|
||||||
Cmd_Stroke_write(alloc, cmd_ref, cmd_stroke);
|
if (mem_ok) {
|
||||||
|
Cmd_Stroke_write(alloc, cmd_ref, cmd_stroke);
|
||||||
|
}
|
||||||
cmd_ref.offset += 4 + CmdStroke_size;
|
cmd_ref.offset += 4 + CmdStroke_size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
|
if (!check_deps(STAGE_BINNING | STAGE_TILE_ALLOC | STAGE_PATH_COARSE)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
// Could use either linear or 2d layouts for both dispatch and
|
// Could use either linear or 2d layouts for both dispatch and
|
||||||
// invocations within the workgroup. We'll use variables to abstract.
|
// invocations within the workgroup. We'll use variables to abstract.
|
||||||
uint width_in_bins = (conf.width_in_tiles + N_TILE_X - 1) / N_TILE_X;
|
uint width_in_bins = (conf.width_in_tiles + N_TILE_X - 1) / N_TILE_X;
|
||||||
|
@ -161,7 +174,6 @@ void main() {
|
||||||
uint drawtag_start = conf.drawtag_offset >> 2;
|
uint drawtag_start = conf.drawtag_offset >> 2;
|
||||||
uint drawdata_start = conf.drawdata_offset >> 2;
|
uint drawdata_start = conf.drawdata_offset >> 2;
|
||||||
uint drawinfo_start = conf.drawinfo_alloc.offset >> 2;
|
uint drawinfo_start = conf.drawinfo_alloc.offset >> 2;
|
||||||
bool mem_ok = mem_error == NO_ERROR;
|
|
||||||
while (true) {
|
while (true) {
|
||||||
for (uint i = 0; i < N_SLICE; i++) {
|
for (uint i = 0; i < N_SLICE; i++) {
|
||||||
sh_bitmaps[i][th_ix] = 0;
|
sh_bitmaps[i][th_ix] = 0;
|
||||||
|
@ -176,7 +188,7 @@ void main() {
|
||||||
uint in_ix = (conf.bin_alloc.offset >> 2) + ((partition_ix + th_ix) * N_TILE + bin_ix) * 2;
|
uint in_ix = (conf.bin_alloc.offset >> 2) + ((partition_ix + th_ix) * N_TILE + bin_ix) * 2;
|
||||||
count = read_mem(conf.bin_alloc, in_ix);
|
count = read_mem(conf.bin_alloc, in_ix);
|
||||||
uint offset = read_mem(conf.bin_alloc, in_ix + 1);
|
uint offset = read_mem(conf.bin_alloc, in_ix + 1);
|
||||||
sh_part_elements[th_ix] = new_alloc(offset, count * BinInstance_size, mem_ok);
|
sh_part_elements[th_ix] = new_alloc(offset, count * BinInstance_size, true);
|
||||||
}
|
}
|
||||||
// prefix sum of counts
|
// prefix sum of counts
|
||||||
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
||||||
|
@ -200,7 +212,7 @@ void main() {
|
||||||
}
|
}
|
||||||
// use binary search to find element to read
|
// use binary search to find element to read
|
||||||
uint ix = rd_ix + th_ix;
|
uint ix = rd_ix + th_ix;
|
||||||
if (ix >= wr_ix && ix < ready_ix && mem_ok) {
|
if (ix >= wr_ix && ix < ready_ix) {
|
||||||
uint part_ix = 0;
|
uint part_ix = 0;
|
||||||
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
||||||
uint probe = part_ix + (uint(N_PART_READ / 2) >> i);
|
uint probe = part_ix + (uint(N_PART_READ / 2) >> i);
|
||||||
|
@ -257,7 +269,7 @@ void main() {
|
||||||
uint base = path.tiles.offset - uint(dy * stride + dx) * Tile_size;
|
uint base = path.tiles.offset - uint(dy * stride + dx) * Tile_size;
|
||||||
sh_tile_base[th_ix] = base;
|
sh_tile_base[th_ix] = base;
|
||||||
Alloc path_alloc = new_alloc(path.tiles.offset,
|
Alloc path_alloc = new_alloc(path.tiles.offset,
|
||||||
(path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
|
(path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, true);
|
||||||
write_tile_alloc(th_ix, path_alloc);
|
write_tile_alloc(th_ix, path_alloc);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
@ -293,27 +305,25 @@ void main() {
|
||||||
uint x = sh_tile_x0[el_ix] + seq_ix % width;
|
uint x = sh_tile_x0[el_ix] + seq_ix % width;
|
||||||
uint y = sh_tile_y0[el_ix] + seq_ix / width;
|
uint y = sh_tile_y0[el_ix] + seq_ix / width;
|
||||||
bool include_tile = false;
|
bool include_tile = false;
|
||||||
if (mem_ok) {
|
Tile tile = Tile_read(read_tile_alloc(el_ix, true),
|
||||||
Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok),
|
TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
|
||||||
TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
|
bool is_clip = (tag & 1) != 0;
|
||||||
bool is_clip = (tag & 1) != 0;
|
// Always include the tile if it contains a path segment.
|
||||||
// Always include the tile if it contains a path segment.
|
// For draws, include the tile if it is solid.
|
||||||
// For draws, include the tile if it is solid.
|
// For clips, include the tile if it is empty - this way, logic
|
||||||
// For clips, include the tile if it is empty - this way, logic
|
// below will suppress the drawing of inner elements.
|
||||||
// below will suppress the drawing of inner elements.
|
// For blends, include the tile if
|
||||||
// For blends, include the tile if
|
// (blend_mode, composition_mode) != (Normal, SrcOver)
|
||||||
// (blend_mode, composition_mode) != (Normal, SrcOver)
|
bool is_blend = false;
|
||||||
bool is_blend = false;
|
if (is_clip) {
|
||||||
if (is_clip) {
|
uint drawmonoid_base = drawmonoid_start + 4 * element_ix;
|
||||||
uint drawmonoid_base = drawmonoid_start + 4 * element_ix;
|
uint scene_offset = memory[drawmonoid_base + 2];
|
||||||
uint scene_offset = memory[drawmonoid_base + 2];
|
uint dd = drawdata_start + (scene_offset >> 2);
|
||||||
uint dd = drawdata_start + (scene_offset >> 2);
|
uint blend = scene[dd];
|
||||||
uint blend = scene[dd];
|
is_blend = (blend != BlendComp_clip);
|
||||||
is_blend = (blend != BlendComp_clip);
|
|
||||||
}
|
|
||||||
include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip
|
|
||||||
|| is_blend;
|
|
||||||
}
|
}
|
||||||
|
include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip
|
||||||
|
|| is_blend;
|
||||||
if (include_tile) {
|
if (include_tile) {
|
||||||
uint el_slice = el_ix / 32;
|
uint el_slice = el_ix / 32;
|
||||||
uint el_mask = 1u << (el_ix & 31);
|
uint el_mask = 1u << (el_ix & 31);
|
||||||
|
@ -327,7 +337,7 @@ void main() {
|
||||||
// through the draw objects.
|
// through the draw objects.
|
||||||
uint slice_ix = 0;
|
uint slice_ix = 0;
|
||||||
uint bitmap = sh_bitmaps[0][th_ix];
|
uint bitmap = sh_bitmaps[0][th_ix];
|
||||||
while (mem_ok) {
|
while (true) {
|
||||||
if (bitmap == 0) {
|
if (bitmap == 0) {
|
||||||
slice_ix++;
|
slice_ix++;
|
||||||
if (slice_ix == N_SLICE) {
|
if (slice_ix == N_SLICE) {
|
||||||
|
@ -347,7 +357,7 @@ void main() {
|
||||||
uint drawtag = scene[drawtag_start + element_ix];
|
uint drawtag = scene[drawtag_start + element_ix];
|
||||||
|
|
||||||
if (clip_zero_depth == 0) {
|
if (clip_zero_depth == 0) {
|
||||||
Tile tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok),
|
Tile tile = Tile_read(read_tile_alloc(element_ref_ix, true),
|
||||||
TileRef(sh_tile_base[element_ref_ix] +
|
TileRef(sh_tile_base[element_ref_ix] +
|
||||||
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
(sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||||
uint drawmonoid_base = drawmonoid_start + 4 * element_ix;
|
uint drawmonoid_base = drawmonoid_start + 4 * element_ix;
|
||||||
|
@ -358,18 +368,16 @@ void main() {
|
||||||
switch (drawtag) {
|
switch (drawtag) {
|
||||||
case Drawtag_FillColor:
|
case Drawtag_FillColor:
|
||||||
float linewidth = uintBitsToFloat(memory[di]);
|
float linewidth = uintBitsToFloat(memory[di]);
|
||||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
alloc_cmd(cmd_alloc, cmd_ref, cmd_limit);
|
||||||
break;
|
|
||||||
}
|
|
||||||
write_fill(cmd_alloc, cmd_ref, tile, linewidth);
|
write_fill(cmd_alloc, cmd_ref, tile, linewidth);
|
||||||
uint rgba = scene[dd];
|
uint rgba = scene[dd];
|
||||||
Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(rgba));
|
if (mem_ok) {
|
||||||
|
Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(rgba));
|
||||||
|
}
|
||||||
cmd_ref.offset += 4 + CmdColor_size;
|
cmd_ref.offset += 4 + CmdColor_size;
|
||||||
break;
|
break;
|
||||||
case Drawtag_FillLinGradient:
|
case Drawtag_FillLinGradient:
|
||||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
alloc_cmd(cmd_alloc, cmd_ref, cmd_limit);
|
||||||
break;
|
|
||||||
}
|
|
||||||
linewidth = uintBitsToFloat(memory[di]);
|
linewidth = uintBitsToFloat(memory[di]);
|
||||||
write_fill(cmd_alloc, cmd_ref, tile, linewidth);
|
write_fill(cmd_alloc, cmd_ref, tile, linewidth);
|
||||||
CmdLinGrad cmd_lin;
|
CmdLinGrad cmd_lin;
|
||||||
|
@ -377,13 +385,13 @@ void main() {
|
||||||
cmd_lin.line_x = uintBitsToFloat(memory[di + 1]);
|
cmd_lin.line_x = uintBitsToFloat(memory[di + 1]);
|
||||||
cmd_lin.line_y = uintBitsToFloat(memory[di + 2]);
|
cmd_lin.line_y = uintBitsToFloat(memory[di + 2]);
|
||||||
cmd_lin.line_c = uintBitsToFloat(memory[di + 3]);
|
cmd_lin.line_c = uintBitsToFloat(memory[di + 3]);
|
||||||
Cmd_LinGrad_write(cmd_alloc, cmd_ref, cmd_lin);
|
if (mem_ok) {
|
||||||
|
Cmd_LinGrad_write(cmd_alloc, cmd_ref, cmd_lin);
|
||||||
|
}
|
||||||
cmd_ref.offset += 4 + CmdLinGrad_size;
|
cmd_ref.offset += 4 + CmdLinGrad_size;
|
||||||
break;
|
break;
|
||||||
case Drawtag_FillRadGradient:
|
case Drawtag_FillRadGradient:
|
||||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
alloc_cmd(cmd_alloc, cmd_ref, cmd_limit);
|
||||||
break;
|
|
||||||
}
|
|
||||||
linewidth = uintBitsToFloat(memory[di]);
|
linewidth = uintBitsToFloat(memory[di]);
|
||||||
write_fill(cmd_alloc, cmd_ref, tile, linewidth);
|
write_fill(cmd_alloc, cmd_ref, tile, linewidth);
|
||||||
CmdRadGrad cmd_rad;
|
CmdRadGrad cmd_rad;
|
||||||
|
@ -396,29 +404,31 @@ void main() {
|
||||||
cmd_rad.c1 = uintBitsToFloat(uvec2(memory[di + 7], memory[di + 8]));
|
cmd_rad.c1 = uintBitsToFloat(uvec2(memory[di + 7], memory[di + 8]));
|
||||||
cmd_rad.ra = uintBitsToFloat(memory[di + 9]);
|
cmd_rad.ra = uintBitsToFloat(memory[di + 9]);
|
||||||
cmd_rad.roff = uintBitsToFloat(memory[di + 10]);
|
cmd_rad.roff = uintBitsToFloat(memory[di + 10]);
|
||||||
Cmd_RadGrad_write(cmd_alloc, cmd_ref, cmd_rad);
|
if (mem_ok) {
|
||||||
|
Cmd_RadGrad_write(cmd_alloc, cmd_ref, cmd_rad);
|
||||||
|
}
|
||||||
cmd_ref.offset += 4 + CmdRadGrad_size;
|
cmd_ref.offset += 4 + CmdRadGrad_size;
|
||||||
break;
|
break;
|
||||||
case Drawtag_FillImage:
|
case Drawtag_FillImage:
|
||||||
|
alloc_cmd(cmd_alloc, cmd_ref, cmd_limit);
|
||||||
linewidth = uintBitsToFloat(memory[di]);
|
linewidth = uintBitsToFloat(memory[di]);
|
||||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
write_fill(cmd_alloc, cmd_ref, tile, linewidth);
|
write_fill(cmd_alloc, cmd_ref, tile, linewidth);
|
||||||
uint index = scene[dd];
|
uint index = scene[dd];
|
||||||
uint raw1 = scene[dd + 1];
|
uint raw1 = scene[dd + 1];
|
||||||
ivec2 offset = ivec2(int(raw1 << 16) >> 16, int(raw1) >> 16);
|
ivec2 offset = ivec2(int(raw1 << 16) >> 16, int(raw1) >> 16);
|
||||||
Cmd_Image_write(cmd_alloc, cmd_ref, CmdImage(index, offset));
|
if (mem_ok) {
|
||||||
|
Cmd_Image_write(cmd_alloc, cmd_ref, CmdImage(index, offset));
|
||||||
|
}
|
||||||
cmd_ref.offset += 4 + CmdImage_size;
|
cmd_ref.offset += 4 + CmdImage_size;
|
||||||
break;
|
break;
|
||||||
case Drawtag_BeginClip:
|
case Drawtag_BeginClip:
|
||||||
if (tile.tile.offset == 0 && tile.backdrop == 0) {
|
if (tile.tile.offset == 0 && tile.backdrop == 0) {
|
||||||
clip_zero_depth = clip_depth + 1;
|
clip_zero_depth = clip_depth + 1;
|
||||||
} else {
|
} else {
|
||||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
alloc_cmd(cmd_alloc, cmd_ref, cmd_limit);
|
||||||
break;
|
if (mem_ok) {
|
||||||
|
Cmd_BeginClip_write(cmd_alloc, cmd_ref);
|
||||||
}
|
}
|
||||||
Cmd_BeginClip_write(cmd_alloc, cmd_ref);
|
|
||||||
cmd_ref.offset += 4;
|
cmd_ref.offset += 4;
|
||||||
render_blend_depth++;
|
render_blend_depth++;
|
||||||
max_blend_depth = max(max_blend_depth, render_blend_depth);
|
max_blend_depth = max(max_blend_depth, render_blend_depth);
|
||||||
|
@ -427,12 +437,11 @@ void main() {
|
||||||
break;
|
break;
|
||||||
case Drawtag_EndClip:
|
case Drawtag_EndClip:
|
||||||
clip_depth--;
|
clip_depth--;
|
||||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
write_fill(cmd_alloc, cmd_ref, tile, -1.0);
|
write_fill(cmd_alloc, cmd_ref, tile, -1.0);
|
||||||
uint blend = scene[dd];
|
uint blend = scene[dd];
|
||||||
Cmd_EndClip_write(cmd_alloc, cmd_ref, CmdEndClip(blend));
|
if (mem_ok) {
|
||||||
|
Cmd_EndClip_write(cmd_alloc, cmd_ref, CmdEndClip(blend));
|
||||||
|
}
|
||||||
cmd_ref.offset += 4 + CmdEndClip_size;
|
cmd_ref.offset += 4 + CmdEndClip_size;
|
||||||
render_blend_depth--;
|
render_blend_depth--;
|
||||||
break;
|
break;
|
||||||
|
@ -459,11 +468,13 @@ void main() {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) {
|
if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) {
|
||||||
Cmd_End_write(cmd_alloc, cmd_ref);
|
if (mem_ok) {
|
||||||
|
Cmd_End_write(cmd_alloc, cmd_ref);
|
||||||
|
}
|
||||||
if (max_blend_depth > BLEND_STACK_SPLIT) {
|
if (max_blend_depth > BLEND_STACK_SPLIT) {
|
||||||
uint scratch_size = max_blend_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE * 4;
|
uint scratch_size = max_blend_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE * 4;
|
||||||
MallocResult scratch = malloc(scratch_size);
|
uint scratch = atomicAdd(blend_offset, scratch_size);
|
||||||
alloc_write(scratch_alloc, scratch_alloc.offset, scratch.alloc);
|
write_mem(scratch_alloc, scratch_alloc.offset >> 2, scratch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
BIN
piet-gpu/shader/image.png
Normal file
BIN
piet-gpu/shader/image.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 337 KiB |
|
@ -14,6 +14,7 @@
|
||||||
// higher quality antialiasing among other things).
|
// higher quality antialiasing among other things).
|
||||||
#define DO_SRGB_CONVERSION 0
|
#define DO_SRGB_CONVERSION 0
|
||||||
|
|
||||||
|
// TODO: the binding of the main buffer can be readonly
|
||||||
#include "mem.h"
|
#include "mem.h"
|
||||||
#include "setup.h"
|
#include "setup.h"
|
||||||
|
|
||||||
|
@ -24,19 +25,23 @@
|
||||||
#define CHUNK_DY (TILE_HEIGHT_PX / CHUNK_Y)
|
#define CHUNK_DY (TILE_HEIGHT_PX / CHUNK_Y)
|
||||||
layout(local_size_x = CHUNK_DX, local_size_y = CHUNK_DY) in;
|
layout(local_size_x = CHUNK_DX, local_size_y = CHUNK_DY) in;
|
||||||
|
|
||||||
layout(set = 0, binding = 1) restrict readonly buffer ConfigBuf {
|
layout(binding = 1) restrict readonly buffer ConfigBuf {
|
||||||
Config conf;
|
Config conf;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
layout(binding = 2) buffer BlendBuf {
|
||||||
|
uint blend_mem[];
|
||||||
|
};
|
||||||
|
|
||||||
#ifdef GRAY
|
#ifdef GRAY
|
||||||
layout(r8, set = 0, binding = 2) uniform restrict writeonly image2D image;
|
layout(r8, binding = 3) uniform restrict writeonly image2D image;
|
||||||
#else
|
#else
|
||||||
layout(rgba8, set = 0, binding = 2) uniform restrict writeonly image2D image;
|
layout(rgba8, binding = 3) uniform restrict writeonly image2D image;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
layout(rgba8, set = 0, binding = 3) uniform restrict readonly image2D image_atlas;
|
layout(rgba8, binding = 4) uniform restrict readonly image2D image_atlas;
|
||||||
|
|
||||||
layout(rgba8, set = 0, binding = 4) uniform restrict readonly image2D gradients;
|
layout(rgba8, binding = 5) uniform restrict readonly image2D gradients;
|
||||||
|
|
||||||
#include "ptcl.h"
|
#include "ptcl.h"
|
||||||
#include "tile.h"
|
#include "tile.h"
|
||||||
|
@ -114,8 +119,9 @@ void main() {
|
||||||
|
|
||||||
mediump float area[CHUNK];
|
mediump float area[CHUNK];
|
||||||
uint clip_depth = 0;
|
uint clip_depth = 0;
|
||||||
bool mem_ok = mem_error == NO_ERROR;
|
// Previously we would early-out if there was a memory failure, so we wouldn't try to read corrupt
|
||||||
while (mem_ok) {
|
// tiles. But now we assume this is checked CPU-side before launching fine rasterization.
|
||||||
|
while (true) {
|
||||||
uint tag = Cmd_tag(cmd_alloc, cmd_ref).tag;
|
uint tag = Cmd_tag(cmd_alloc, cmd_ref).tag;
|
||||||
if (tag == Cmd_End) {
|
if (tag == Cmd_End) {
|
||||||
break;
|
break;
|
||||||
|
@ -129,7 +135,7 @@ void main() {
|
||||||
df[k] = 1e9;
|
df[k] = 1e9;
|
||||||
TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
|
TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
|
||||||
do {
|
do {
|
||||||
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref);
|
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, true), tile_seg_ref);
|
||||||
vec2 line_vec = seg.vector;
|
vec2 line_vec = seg.vector;
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin;
|
vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin;
|
||||||
|
@ -151,7 +157,7 @@ void main() {
|
||||||
tile_seg_ref = TileSegRef(fill.tile_ref);
|
tile_seg_ref = TileSegRef(fill.tile_ref);
|
||||||
// Calculate coverage based on backdrop + coverage of each line segment
|
// Calculate coverage based on backdrop + coverage of each line segment
|
||||||
do {
|
do {
|
||||||
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref);
|
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, true), tile_seg_ref);
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
vec2 my_xy = xy + vec2(chunk_offset(k));
|
vec2 my_xy = xy + vec2(chunk_offset(k));
|
||||||
vec2 start = seg.origin - my_xy;
|
vec2 start = seg.origin - my_xy;
|
||||||
|
@ -248,7 +254,7 @@ void main() {
|
||||||
uint base_ix = (blend_offset >> 2) + (clip_depth - BLEND_STACK_SPLIT) * TILE_HEIGHT_PX * TILE_WIDTH_PX +
|
uint base_ix = (blend_offset >> 2) + (clip_depth - BLEND_STACK_SPLIT) * TILE_HEIGHT_PX * TILE_WIDTH_PX +
|
||||||
CHUNK * (gl_LocalInvocationID.x + CHUNK_DX * gl_LocalInvocationID.y);
|
CHUNK * (gl_LocalInvocationID.x + CHUNK_DX * gl_LocalInvocationID.y);
|
||||||
for (uint k = 0; k < CHUNK; k++) {
|
for (uint k = 0; k < CHUNK; k++) {
|
||||||
memory[base_ix + k] = packsRGB(vec4(rgba[k]));
|
blend_mem[base_ix + k] = packsRGB(vec4(rgba[k]));
|
||||||
rgba[k] = vec4(0.0);
|
rgba[k] = vec4(0.0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -268,7 +274,7 @@ void main() {
|
||||||
if (clip_depth < BLEND_STACK_SPLIT) {
|
if (clip_depth < BLEND_STACK_SPLIT) {
|
||||||
bg_rgba = blend_stack[clip_depth][k];
|
bg_rgba = blend_stack[clip_depth][k];
|
||||||
} else {
|
} else {
|
||||||
bg_rgba = memory[base_ix + k];
|
bg_rgba = blend_mem[base_ix + k];
|
||||||
}
|
}
|
||||||
mediump vec4 bg = unpacksRGB(bg_rgba);
|
mediump vec4 bg = unpacksRGB(bg_rgba);
|
||||||
mediump vec4 fg = rgba[k] * area[k];
|
mediump vec4 fg = rgba[k] * area[k];
|
||||||
|
|
|
@ -3,27 +3,23 @@
|
||||||
layout(set = 0, binding = 0) buffer Memory {
|
layout(set = 0, binding = 0) buffer Memory {
|
||||||
// offset into memory of the next allocation, initialized by the user.
|
// offset into memory of the next allocation, initialized by the user.
|
||||||
uint mem_offset;
|
uint mem_offset;
|
||||||
// mem_error tracks the status of memory accesses, initialized to NO_ERROR
|
// mem_error is a bitmask of stages that have failed allocation.
|
||||||
// by the user. ERR_MALLOC_FAILED is reported for insufficient memory.
|
|
||||||
// If MEM_DEBUG is defined the following errors are reported:
|
|
||||||
// - ERR_OUT_OF_BOUNDS is reported for out of bounds writes.
|
|
||||||
// - ERR_UNALIGNED_ACCESS for memory access not aligned to 32-bit words.
|
|
||||||
uint mem_error;
|
uint mem_error;
|
||||||
|
// offset into blend memory of allocations for blend stack.
|
||||||
|
uint blend_offset;
|
||||||
uint[] memory;
|
uint[] memory;
|
||||||
};
|
};
|
||||||
|
|
||||||
// Uncomment this line to add the size field to Alloc and enable memory checks.
|
// Uncomment this line to add the size field to Alloc and enable memory checks.
|
||||||
// Note that the Config struct in setup.h grows size fields as well.
|
// Note that the Config struct in setup.h grows size fields as well.
|
||||||
//#define MEM_DEBUG
|
|
||||||
|
|
||||||
#define NO_ERROR 0
|
// This setting is not working and the mechanism will be removed.
|
||||||
#define ERR_MALLOC_FAILED 1
|
//#define MEM_DEBUG
|
||||||
#define ERR_OUT_OF_BOUNDS 2
|
|
||||||
#define ERR_UNALIGNED_ACCESS 3
|
|
||||||
|
|
||||||
#ifdef MEM_DEBUG
|
#ifdef MEM_DEBUG
|
||||||
#define Alloc_size 16
|
#define Alloc_size 16
|
||||||
#else
|
#else
|
||||||
|
// TODO: this seems wrong
|
||||||
#define Alloc_size 8
|
#define Alloc_size 8
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -37,12 +33,6 @@ struct Alloc {
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
struct MallocResult {
|
|
||||||
Alloc alloc;
|
|
||||||
// failed is true if the allocation overflowed memory.
|
|
||||||
bool failed;
|
|
||||||
};
|
|
||||||
|
|
||||||
// new_alloc synthesizes an Alloc from an offset and size.
|
// new_alloc synthesizes an Alloc from an offset and size.
|
||||||
Alloc new_alloc(uint offset, uint size, bool mem_ok) {
|
Alloc new_alloc(uint offset, uint size, bool mem_ok) {
|
||||||
Alloc a;
|
Alloc a;
|
||||||
|
@ -57,24 +47,32 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok) {
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
// malloc allocates size bytes of memory.
|
#define STAGE_BINNING (1u << 0)
|
||||||
MallocResult malloc(uint size) {
|
#define STAGE_TILE_ALLOC (1u << 1)
|
||||||
MallocResult r;
|
#define STAGE_PATH_COARSE (1u << 2)
|
||||||
|
#define STAGE_COARSE (1u << 3)
|
||||||
|
|
||||||
|
// Allocations in main memory will never be 0, and this might be slightly
|
||||||
|
// faster to test against than some other value.
|
||||||
|
#define MALLOC_FAILED 0
|
||||||
|
|
||||||
|
// Check that previous dependent stages have succeeded.
|
||||||
|
bool check_deps(uint dep_stage) {
|
||||||
|
// TODO: this should be an atomic relaxed load, but that involves
|
||||||
|
// bringing in "memory scope semantics"
|
||||||
|
return (atomicOr(mem_error, 0) & dep_stage) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Allocate size bytes of memory, offset in bytes.
|
||||||
|
// Note: with a bit of rearrangement of header files, we could make the
|
||||||
|
// mem_size argument go away (it comes from the config binding).
|
||||||
|
uint malloc_stage(uint size, uint mem_size, uint stage) {
|
||||||
uint offset = atomicAdd(mem_offset, size);
|
uint offset = atomicAdd(mem_offset, size);
|
||||||
r.failed = offset + size > memory.length() * 4;
|
if (offset + size > mem_size) {
|
||||||
r.alloc = new_alloc(offset, size, !r.failed);
|
atomicOr(mem_error, stage);
|
||||||
if (r.failed) {
|
offset = MALLOC_FAILED;
|
||||||
atomicMax(mem_error, ERR_MALLOC_FAILED);
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
#ifdef MEM_DEBUG
|
return offset;
|
||||||
if ((size & 3) != 0) {
|
|
||||||
r.failed = true;
|
|
||||||
atomicMax(mem_error, ERR_UNALIGNED_ACCESS);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// touch_mem checks whether access to the memory word at offset is valid.
|
// touch_mem checks whether access to the memory word at offset is valid.
|
||||||
|
|
|
@ -87,7 +87,13 @@ SubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol) {
|
||||||
return SubdivResult(val, a0, a2);
|
return SubdivResult(val, a0, a2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// All writes to the output must be gated by mem_ok.
|
||||||
|
bool mem_ok = true;
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
|
if (!check_deps(STAGE_BINNING | STAGE_TILE_ALLOC | STAGE_PATH_COARSE)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
uint element_ix = gl_GlobalInvocationID.x;
|
uint element_ix = gl_GlobalInvocationID.x;
|
||||||
PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size);
|
PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size);
|
||||||
|
|
||||||
|
@ -95,24 +101,10 @@ void main() {
|
||||||
if (element_ix < conf.n_pathseg) {
|
if (element_ix < conf.n_pathseg) {
|
||||||
tag = PathSeg_tag(conf.pathseg_alloc, ref);
|
tag = PathSeg_tag(conf.pathseg_alloc, ref);
|
||||||
}
|
}
|
||||||
bool mem_ok = mem_error == NO_ERROR;
|
|
||||||
switch (tag.tag) {
|
switch (tag.tag) {
|
||||||
case PathSeg_Cubic:
|
case PathSeg_Cubic:
|
||||||
PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref);
|
PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref);
|
||||||
|
|
||||||
// Affine transform is now applied in pathseg
|
|
||||||
/*
|
|
||||||
uint trans_ix = cubic.trans_ix;
|
|
||||||
if (trans_ix > 0) {
|
|
||||||
TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (trans_ix - 1) * TransformSeg_size);
|
|
||||||
TransformSeg trans = TransformSeg_read(conf.trans_alloc, trans_ref);
|
|
||||||
cubic.p0 = trans.mat.xy * cubic.p0.x + trans.mat.zw * cubic.p0.y + trans.translate;
|
|
||||||
cubic.p1 = trans.mat.xy * cubic.p1.x + trans.mat.zw * cubic.p1.y + trans.translate;
|
|
||||||
cubic.p2 = trans.mat.xy * cubic.p2.x + trans.mat.zw * cubic.p2.y + trans.translate;
|
|
||||||
cubic.p3 = trans.mat.xy * cubic.p3.x + trans.mat.zw * cubic.p3.y + trans.translate;
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
|
vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
|
||||||
float err = err_v.x * err_v.x + err_v.y * err_v.y;
|
float err = err_v.x * err_v.x + err_v.y * err_v.y;
|
||||||
// The number of quadratics.
|
// The number of quadratics.
|
||||||
|
@ -140,7 +132,7 @@ void main() {
|
||||||
uint path_ix = cubic.path_ix;
|
uint path_ix = cubic.path_ix;
|
||||||
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
|
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
|
||||||
Alloc path_alloc =
|
Alloc path_alloc =
|
||||||
new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
|
new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, true);
|
||||||
ivec4 bbox = ivec4(path.bbox);
|
ivec4 bbox = ivec4(path.bbox);
|
||||||
vec2 p0 = cubic.p0;
|
vec2 p0 = cubic.p0;
|
||||||
qp0 = cubic.p0;
|
qp0 = cubic.p0;
|
||||||
|
@ -199,11 +191,12 @@ void main() {
|
||||||
// TODO: can be tighter, use c to bound width
|
// TODO: can be tighter, use c to bound width
|
||||||
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
|
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
|
||||||
// Consider using subgroups to aggregate atomic add.
|
// Consider using subgroups to aggregate atomic add.
|
||||||
MallocResult tile_alloc = malloc(n_tile_alloc * TileSeg_size);
|
uint malloc_size = n_tile_alloc * TileSeg_size;
|
||||||
if (tile_alloc.failed || !mem_ok) {
|
uint tile_offset = malloc_stage(malloc_size, conf.mem_size, STAGE_PATH_COARSE);
|
||||||
return;
|
if (tile_offset == MALLOC_FAILED) {
|
||||||
|
mem_ok = false;
|
||||||
}
|
}
|
||||||
uint tile_offset = tile_alloc.alloc.offset;
|
Alloc tile_alloc = new_alloc(tile_offset, malloc_size, true);
|
||||||
|
|
||||||
TileSeg tile_seg;
|
TileSeg tile_seg;
|
||||||
|
|
||||||
|
@ -221,9 +214,7 @@ void main() {
|
||||||
int backdrop = p1.y < p0.y ? 1 : -1;
|
int backdrop = p1.y < p0.y ? 1 : -1;
|
||||||
TileRef tile_ref = Tile_index(path.tiles, uint(base + xbackdrop));
|
TileRef tile_ref = Tile_index(path.tiles, uint(base + xbackdrop));
|
||||||
uint tile_el = tile_ref.offset >> 2;
|
uint tile_el = tile_ref.offset >> 2;
|
||||||
if (touch_mem(path_alloc, tile_el + 1)) {
|
atomicAdd(memory[tile_el + 1], backdrop);
|
||||||
atomicAdd(memory[tile_el + 1], backdrop);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// next_xray is the xray for the next scanline; the line segment intersects
|
// next_xray is the xray for the next scanline; the line segment intersects
|
||||||
|
@ -247,9 +238,7 @@ void main() {
|
||||||
TileRef tile_ref = Tile_index(TileRef(path.tiles.offset), uint(base + x));
|
TileRef tile_ref = Tile_index(TileRef(path.tiles.offset), uint(base + x));
|
||||||
uint tile_el = tile_ref.offset >> 2;
|
uint tile_el = tile_ref.offset >> 2;
|
||||||
uint old = 0;
|
uint old = 0;
|
||||||
if (touch_mem(path_alloc, tile_el)) {
|
old = atomicExchange(memory[tile_el], tile_offset);
|
||||||
old = atomicExchange(memory[tile_el], tile_offset);
|
|
||||||
}
|
|
||||||
tile_seg.origin = p0;
|
tile_seg.origin = p0;
|
||||||
tile_seg.vector = p1 - p0;
|
tile_seg.vector = p1 - p0;
|
||||||
float y_edge = 0.0;
|
float y_edge = 0.0;
|
||||||
|
@ -276,7 +265,9 @@ void main() {
|
||||||
}
|
}
|
||||||
tile_seg.y_edge = y_edge;
|
tile_seg.y_edge = y_edge;
|
||||||
tile_seg.next.offset = old;
|
tile_seg.next.offset = old;
|
||||||
TileSeg_write(tile_alloc.alloc, TileSegRef(tile_offset), tile_seg);
|
if (mem_ok) {
|
||||||
|
TileSeg_write(tile_alloc, TileSegRef(tile_offset), tile_seg);
|
||||||
|
}
|
||||||
tile_offset += TileSeg_size;
|
tile_offset += TileSeg_size;
|
||||||
}
|
}
|
||||||
xc += b;
|
xc += b;
|
||||||
|
|
|
@ -31,8 +31,9 @@
|
||||||
// to memory for the overflow.
|
// to memory for the overflow.
|
||||||
#define BLEND_STACK_SPLIT 4
|
#define BLEND_STACK_SPLIT 4
|
||||||
|
|
||||||
#ifdef ERR_MALLOC_FAILED
|
#ifdef MALLOC_FAILED
|
||||||
struct Config {
|
struct Config {
|
||||||
|
uint mem_size; // in bytes
|
||||||
uint n_elements; // paths
|
uint n_elements; // paths
|
||||||
uint n_pathseg;
|
uint n_pathseg;
|
||||||
uint width_in_tiles;
|
uint width_in_tiles;
|
||||||
|
|
|
@ -29,7 +29,7 @@ layout(binding = 2) readonly buffer SceneBuf {
|
||||||
#define SY (1.0 / float(TILE_HEIGHT_PX))
|
#define SY (1.0 / float(TILE_HEIGHT_PX))
|
||||||
|
|
||||||
shared uint sh_tile_count[TILE_ALLOC_WG];
|
shared uint sh_tile_count[TILE_ALLOC_WG];
|
||||||
shared MallocResult sh_tile_alloc;
|
shared uint sh_tile_offset;
|
||||||
|
|
||||||
vec4 load_draw_bbox(uint draw_ix) {
|
vec4 load_draw_bbox(uint draw_ix) {
|
||||||
uint base = (conf.draw_bbox_alloc.offset >> 2) + 4 * draw_ix;
|
uint base = (conf.draw_bbox_alloc.offset >> 2) + 4 * draw_ix;
|
||||||
|
@ -42,6 +42,9 @@ vec4 load_draw_bbox(uint draw_ix) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void main() {
|
void main() {
|
||||||
|
if (!check_deps(STAGE_BINNING)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
uint th_ix = gl_LocalInvocationID.x;
|
uint th_ix = gl_LocalInvocationID.x;
|
||||||
uint element_ix = gl_GlobalInvocationID.x;
|
uint element_ix = gl_GlobalInvocationID.x;
|
||||||
// At the moment, element_ix == path_ix. The clip-intersected bounding boxes
|
// At the moment, element_ix == path_ix. The clip-intersected bounding boxes
|
||||||
|
@ -86,27 +89,24 @@ void main() {
|
||||||
sh_tile_count[th_ix] = total_tile_count;
|
sh_tile_count[th_ix] = total_tile_count;
|
||||||
}
|
}
|
||||||
if (th_ix == TILE_ALLOC_WG - 1) {
|
if (th_ix == TILE_ALLOC_WG - 1) {
|
||||||
sh_tile_alloc = malloc(total_tile_count * Tile_size);
|
sh_tile_offset = malloc_stage(total_tile_count * Tile_size, conf.mem_size, STAGE_TILE_ALLOC);
|
||||||
}
|
}
|
||||||
barrier();
|
barrier();
|
||||||
MallocResult alloc_start = sh_tile_alloc;
|
uint offset_start = sh_tile_offset;
|
||||||
if (alloc_start.failed || mem_error != NO_ERROR) {
|
if (offset_start == MALLOC_FAILED) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (element_ix < conf.n_elements) {
|
if (element_ix < conf.n_elements) {
|
||||||
uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
|
uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
|
||||||
Alloc tiles_alloc = slice_mem(alloc_start.alloc, Tile_size * tile_subix, Tile_size * tile_count);
|
path.tiles = TileRef(offset_start + Tile_size * tile_subix);
|
||||||
path.tiles = TileRef(tiles_alloc.offset);
|
|
||||||
Path_write(conf.tile_alloc, path_ref, path);
|
Path_write(conf.tile_alloc, path_ref, path);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Zero out allocated tiles efficiently
|
// Zero out allocated tiles efficiently
|
||||||
uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
|
uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
|
||||||
uint start_ix = alloc_start.alloc.offset >> 2;
|
uint start_ix = offset_start >> 2;
|
||||||
for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
|
for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
|
||||||
// Note: this interleaving is faster than using Tile_write
|
memory[start_ix + i] = 0;
|
||||||
// by a significant amount.
|
|
||||||
write_mem(alloc_start.alloc, start_ix + i, 0);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,13 +16,12 @@
|
||||||
|
|
||||||
//! Low-level scene encoding.
|
//! Low-level scene encoding.
|
||||||
|
|
||||||
use crate::Blend;
|
use crate::{Blend, SceneStats, DRAWTAG_SIZE, TRANSFORM_SIZE};
|
||||||
use bytemuck::{Pod, Zeroable};
|
use bytemuck::{Pod, Zeroable};
|
||||||
use piet_gpu_hal::BufWrite;
|
use piet_gpu_hal::BufWrite;
|
||||||
|
|
||||||
use crate::stages::{
|
use crate::stages::{
|
||||||
self, Config, PathEncoder, Transform, CLIP_PART_SIZE, DRAW_PART_SIZE, PATHSEG_PART_SIZE,
|
self, PathEncoder, Transform, DRAW_PART_SIZE, PATHSEG_PART_SIZE, TRANSFORM_PART_SIZE,
|
||||||
TRANSFORM_PART_SIZE,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct Encoder {
|
pub struct Encoder {
|
||||||
|
@ -52,86 +51,19 @@ pub struct EncodedSceneRef<'a, T: Copy + Pod> {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, T: Copy + Pod> EncodedSceneRef<'a, T> {
|
impl<'a, T: Copy + Pod> EncodedSceneRef<'a, T> {
|
||||||
/// Return a config for the element processing pipeline.
|
pub(crate) fn stats(&self) -> SceneStats {
|
||||||
///
|
SceneStats {
|
||||||
/// This does not include further pipeline processing. Also returns the
|
n_drawobj: self.drawtag_stream.len(),
|
||||||
/// beginning of free memory.
|
drawdata_len: self.drawdata_stream.len(),
|
||||||
pub fn stage_config(&self) -> (Config, usize) {
|
n_transform: self.transform_stream.len(),
|
||||||
// Layout of scene buffer
|
linewidth_len: std::mem::size_of_val(self.linewidth_stream),
|
||||||
let drawtag_offset = 0;
|
pathseg_len: self.pathseg_stream.len(),
|
||||||
let n_drawobj = self.n_drawobj();
|
n_pathtag: self.tag_stream.len(),
|
||||||
let n_drawobj_padded = align_up(n_drawobj, DRAW_PART_SIZE as usize);
|
|
||||||
let drawdata_offset = drawtag_offset + n_drawobj_padded * DRAWTAG_SIZE;
|
|
||||||
let trans_offset = drawdata_offset + self.drawdata_stream.len();
|
|
||||||
let n_trans = self.transform_stream.len();
|
|
||||||
let n_trans_padded = align_up(n_trans, TRANSFORM_PART_SIZE as usize);
|
|
||||||
let linewidth_offset = trans_offset + n_trans_padded * TRANSFORM_SIZE;
|
|
||||||
let n_linewidth = self.linewidth_stream.len();
|
|
||||||
let pathtag_offset = linewidth_offset + n_linewidth * LINEWIDTH_SIZE;
|
|
||||||
let n_pathtag = self.tag_stream.len();
|
|
||||||
let n_pathtag_padded = align_up(n_pathtag, PATHSEG_PART_SIZE as usize);
|
|
||||||
let pathseg_offset = pathtag_offset + n_pathtag_padded;
|
|
||||||
|
|
||||||
// Layout of memory
|
|
||||||
let mut alloc = 0;
|
|
||||||
let trans_alloc = alloc;
|
|
||||||
alloc += trans_alloc + n_trans_padded * TRANSFORM_SIZE;
|
|
||||||
let pathseg_alloc = alloc;
|
|
||||||
alloc += pathseg_alloc + self.n_pathseg as usize * PATHSEG_SIZE;
|
|
||||||
let path_bbox_alloc = alloc;
|
|
||||||
let n_path = self.n_path as usize;
|
|
||||||
alloc += path_bbox_alloc + n_path * PATH_BBOX_SIZE;
|
|
||||||
let drawmonoid_alloc = alloc;
|
|
||||||
alloc += n_drawobj_padded * DRAWMONOID_SIZE;
|
|
||||||
let anno_alloc = alloc;
|
|
||||||
alloc += n_drawobj * ANNOTATED_SIZE;
|
|
||||||
let clip_alloc = alloc;
|
|
||||||
let n_clip = self.n_clip as usize;
|
|
||||||
const CLIP_SIZE: usize = 4;
|
|
||||||
alloc += n_clip * CLIP_SIZE;
|
|
||||||
let clip_bic_alloc = alloc;
|
|
||||||
const CLIP_BIC_SIZE: usize = 8;
|
|
||||||
// This can round down, as we only reduce the prefix
|
|
||||||
alloc += (n_clip / CLIP_PART_SIZE as usize) * CLIP_BIC_SIZE;
|
|
||||||
let clip_stack_alloc = alloc;
|
|
||||||
const CLIP_EL_SIZE: usize = 20;
|
|
||||||
alloc += n_clip * CLIP_EL_SIZE;
|
|
||||||
let clip_bbox_alloc = alloc;
|
|
||||||
const CLIP_BBOX_SIZE: usize = 16;
|
|
||||||
alloc += align_up(n_clip as usize, CLIP_PART_SIZE as usize) * CLIP_BBOX_SIZE;
|
|
||||||
let draw_bbox_alloc = alloc;
|
|
||||||
alloc += n_drawobj * DRAW_BBOX_SIZE;
|
|
||||||
let drawinfo_alloc = alloc;
|
|
||||||
// TODO: not optimized; it can be accumulated during encoding or summed from drawtags
|
|
||||||
const MAX_DRAWINFO_SIZE: usize = 44;
|
|
||||||
alloc += n_drawobj * MAX_DRAWINFO_SIZE;
|
|
||||||
|
|
||||||
let config = Config {
|
|
||||||
n_elements: n_drawobj as u32,
|
|
||||||
n_pathseg: self.n_pathseg,
|
|
||||||
pathseg_alloc: pathseg_alloc as u32,
|
|
||||||
anno_alloc: anno_alloc as u32,
|
|
||||||
trans_alloc: trans_alloc as u32,
|
|
||||||
path_bbox_alloc: path_bbox_alloc as u32,
|
|
||||||
drawmonoid_alloc: drawmonoid_alloc as u32,
|
|
||||||
clip_alloc: clip_alloc as u32,
|
|
||||||
clip_bic_alloc: clip_bic_alloc as u32,
|
|
||||||
clip_stack_alloc: clip_stack_alloc as u32,
|
|
||||||
clip_bbox_alloc: clip_bbox_alloc as u32,
|
|
||||||
draw_bbox_alloc: draw_bbox_alloc as u32,
|
|
||||||
drawinfo_alloc: drawinfo_alloc as u32,
|
|
||||||
n_trans: n_trans as u32,
|
|
||||||
n_path: self.n_path,
|
n_path: self.n_path,
|
||||||
|
n_pathseg: self.n_pathseg,
|
||||||
n_clip: self.n_clip,
|
n_clip: self.n_clip,
|
||||||
trans_offset: trans_offset as u32,
|
}
|
||||||
linewidth_offset: linewidth_offset as u32,
|
|
||||||
pathtag_offset: pathtag_offset as u32,
|
|
||||||
pathseg_offset: pathseg_offset as u32,
|
|
||||||
drawtag_offset: drawtag_offset as u32,
|
|
||||||
drawdata_offset: drawdata_offset as u32,
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
(config, alloc)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn write_scene(&self, buf: &mut BufWrite) {
|
pub fn write_scene(&self, buf: &mut BufWrite) {
|
||||||
|
@ -148,34 +80,6 @@ impl<'a, T: Copy + Pod> EncodedSceneRef<'a, T> {
|
||||||
buf.fill_zero(padding(n_pathtag, PATHSEG_PART_SIZE as usize));
|
buf.fill_zero(padding(n_pathtag, PATHSEG_PART_SIZE as usize));
|
||||||
buf.extend_slice(&self.pathseg_stream);
|
buf.extend_slice(&self.pathseg_stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The number of draw objects in the draw object stream.
|
|
||||||
pub(crate) fn n_drawobj(&self) -> usize {
|
|
||||||
self.drawtag_stream.len()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The number of paths.
|
|
||||||
pub(crate) fn n_path(&self) -> u32 {
|
|
||||||
self.n_path
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The number of path segments.
|
|
||||||
pub(crate) fn n_pathseg(&self) -> u32 {
|
|
||||||
self.n_pathseg
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn n_transform(&self) -> usize {
|
|
||||||
self.transform_stream.len()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The number of tags in the path stream.
|
|
||||||
pub(crate) fn n_pathtag(&self) -> usize {
|
|
||||||
self.tag_stream.len()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn n_clip(&self) -> u32 {
|
|
||||||
self.n_clip
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A scene fragment encoding a glyph.
|
/// A scene fragment encoding a glyph.
|
||||||
|
@ -191,15 +95,6 @@ pub struct GlyphEncoder {
|
||||||
n_pathseg: u32,
|
n_pathseg: u32,
|
||||||
}
|
}
|
||||||
|
|
||||||
const TRANSFORM_SIZE: usize = 24;
|
|
||||||
const LINEWIDTH_SIZE: usize = 4;
|
|
||||||
const PATHSEG_SIZE: usize = 52;
|
|
||||||
const PATH_BBOX_SIZE: usize = 24;
|
|
||||||
const DRAWMONOID_SIZE: usize = 16;
|
|
||||||
const DRAW_BBOX_SIZE: usize = 16;
|
|
||||||
const DRAWTAG_SIZE: usize = 4;
|
|
||||||
const ANNOTATED_SIZE: usize = 40;
|
|
||||||
|
|
||||||
// Tags for draw objects. See shader/drawtag.h for the authoritative source.
|
// Tags for draw objects. See shader/drawtag.h for the authoritative source.
|
||||||
const DRAWTAG_FILLCOLOR: u32 = 0x44;
|
const DRAWTAG_FILLCOLOR: u32 = 0x44;
|
||||||
const DRAWTAG_FILLLINGRADIENT: u32 = 0x114;
|
const DRAWTAG_FILLLINGRADIENT: u32 = 0x114;
|
||||||
|
@ -343,88 +238,6 @@ impl Encoder {
|
||||||
self.n_clip += 1;
|
self.n_clip += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return a config for the element processing pipeline.
|
|
||||||
///
|
|
||||||
/// This does not include further pipeline processing. Also returns the
|
|
||||||
/// beginning of free memory.
|
|
||||||
pub fn stage_config(&self) -> (Config, usize) {
|
|
||||||
// Layout of scene buffer
|
|
||||||
let drawtag_offset = 0;
|
|
||||||
let n_drawobj = self.n_drawobj();
|
|
||||||
let n_drawobj_padded = align_up(n_drawobj, DRAW_PART_SIZE as usize);
|
|
||||||
let drawdata_offset = drawtag_offset + n_drawobj_padded * DRAWTAG_SIZE;
|
|
||||||
let trans_offset = drawdata_offset + self.drawdata_stream.len();
|
|
||||||
let n_trans = self.transform_stream.len();
|
|
||||||
let n_trans_padded = align_up(n_trans, TRANSFORM_PART_SIZE as usize);
|
|
||||||
let linewidth_offset = trans_offset + n_trans_padded * TRANSFORM_SIZE;
|
|
||||||
let n_linewidth = self.linewidth_stream.len();
|
|
||||||
let pathtag_offset = linewidth_offset + n_linewidth * LINEWIDTH_SIZE;
|
|
||||||
let n_pathtag = self.tag_stream.len();
|
|
||||||
let n_pathtag_padded = align_up(n_pathtag, PATHSEG_PART_SIZE as usize);
|
|
||||||
let pathseg_offset = pathtag_offset + n_pathtag_padded;
|
|
||||||
|
|
||||||
// Layout of memory
|
|
||||||
let mut alloc = 0;
|
|
||||||
let trans_alloc = alloc;
|
|
||||||
alloc += trans_alloc + n_trans_padded * TRANSFORM_SIZE;
|
|
||||||
let pathseg_alloc = alloc;
|
|
||||||
alloc += pathseg_alloc + self.n_pathseg as usize * PATHSEG_SIZE;
|
|
||||||
let path_bbox_alloc = alloc;
|
|
||||||
let n_path = self.n_path as usize;
|
|
||||||
alloc += path_bbox_alloc + n_path * PATH_BBOX_SIZE;
|
|
||||||
let drawmonoid_alloc = alloc;
|
|
||||||
alloc += n_drawobj_padded * DRAWMONOID_SIZE;
|
|
||||||
let anno_alloc = alloc;
|
|
||||||
alloc += n_drawobj * ANNOTATED_SIZE;
|
|
||||||
let clip_alloc = alloc;
|
|
||||||
let n_clip = self.n_clip as usize;
|
|
||||||
const CLIP_SIZE: usize = 4;
|
|
||||||
alloc += n_clip * CLIP_SIZE;
|
|
||||||
let clip_bic_alloc = alloc;
|
|
||||||
const CLIP_BIC_SIZE: usize = 8;
|
|
||||||
// This can round down, as we only reduce the prefix
|
|
||||||
alloc += (n_clip / CLIP_PART_SIZE as usize) * CLIP_BIC_SIZE;
|
|
||||||
let clip_stack_alloc = alloc;
|
|
||||||
const CLIP_EL_SIZE: usize = 20;
|
|
||||||
alloc += n_clip * CLIP_EL_SIZE;
|
|
||||||
let clip_bbox_alloc = alloc;
|
|
||||||
const CLIP_BBOX_SIZE: usize = 16;
|
|
||||||
alloc += align_up(n_clip as usize, CLIP_PART_SIZE as usize) * CLIP_BBOX_SIZE;
|
|
||||||
let draw_bbox_alloc = alloc;
|
|
||||||
alloc += n_drawobj * DRAW_BBOX_SIZE;
|
|
||||||
let drawinfo_alloc = alloc;
|
|
||||||
// TODO: not optimized; it can be accumulated during encoding or summed from drawtags
|
|
||||||
const MAX_DRAWINFO_SIZE: usize = 44;
|
|
||||||
alloc += n_drawobj * MAX_DRAWINFO_SIZE;
|
|
||||||
|
|
||||||
let config = Config {
|
|
||||||
n_elements: n_drawobj as u32,
|
|
||||||
n_pathseg: self.n_pathseg,
|
|
||||||
pathseg_alloc: pathseg_alloc as u32,
|
|
||||||
anno_alloc: anno_alloc as u32,
|
|
||||||
trans_alloc: trans_alloc as u32,
|
|
||||||
path_bbox_alloc: path_bbox_alloc as u32,
|
|
||||||
drawmonoid_alloc: drawmonoid_alloc as u32,
|
|
||||||
clip_alloc: clip_alloc as u32,
|
|
||||||
clip_bic_alloc: clip_bic_alloc as u32,
|
|
||||||
clip_stack_alloc: clip_stack_alloc as u32,
|
|
||||||
clip_bbox_alloc: clip_bbox_alloc as u32,
|
|
||||||
draw_bbox_alloc: draw_bbox_alloc as u32,
|
|
||||||
drawinfo_alloc: drawinfo_alloc as u32,
|
|
||||||
n_trans: n_trans as u32,
|
|
||||||
n_path: self.n_path,
|
|
||||||
n_clip: self.n_clip,
|
|
||||||
trans_offset: trans_offset as u32,
|
|
||||||
linewidth_offset: linewidth_offset as u32,
|
|
||||||
pathtag_offset: pathtag_offset as u32,
|
|
||||||
pathseg_offset: pathseg_offset as u32,
|
|
||||||
drawtag_offset: drawtag_offset as u32,
|
|
||||||
drawdata_offset: drawdata_offset as u32,
|
|
||||||
..Default::default()
|
|
||||||
};
|
|
||||||
(config, alloc)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn write_scene(&self, buf: &mut BufWrite) {
|
pub fn write_scene(&self, buf: &mut BufWrite) {
|
||||||
buf.extend_slice(&self.drawtag_stream);
|
buf.extend_slice(&self.drawtag_stream);
|
||||||
let n_drawobj = self.drawtag_stream.len();
|
let n_drawobj = self.drawtag_stream.len();
|
||||||
|
@ -440,32 +253,19 @@ impl Encoder {
|
||||||
buf.extend_slice(&self.pathseg_stream);
|
buf.extend_slice(&self.pathseg_stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The number of draw objects in the draw object stream.
|
pub(crate) fn stats(&self) -> SceneStats {
|
||||||
pub(crate) fn n_drawobj(&self) -> usize {
|
SceneStats {
|
||||||
self.drawtag_stream.len()
|
n_drawobj: self.drawtag_stream.len(),
|
||||||
}
|
drawdata_len: self.drawdata_stream.len(),
|
||||||
|
n_transform: self.transform_stream.len(),
|
||||||
|
linewidth_len: std::mem::size_of_val(&*self.linewidth_stream),
|
||||||
|
n_pathtag: self.tag_stream.len(),
|
||||||
|
pathseg_len: self.pathseg_stream.len(),
|
||||||
|
|
||||||
/// The number of paths.
|
n_path: self.n_path,
|
||||||
pub(crate) fn n_path(&self) -> u32 {
|
n_pathseg: self.n_pathseg,
|
||||||
self.n_path
|
n_clip: self.n_clip,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The number of path segments.
|
|
||||||
pub(crate) fn n_pathseg(&self) -> u32 {
|
|
||||||
self.n_pathseg
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn n_transform(&self) -> usize {
|
|
||||||
self.transform_stream.len()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// The number of tags in the path stream.
|
|
||||||
pub(crate) fn n_pathtag(&self) -> usize {
|
|
||||||
self.tag_stream.len()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn n_clip(&self) -> u32 {
|
|
||||||
self.n_clip
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) {
|
pub(crate) fn encode_glyph(&mut self, glyph: &GlyphEncoder) {
|
||||||
|
@ -478,11 +278,6 @@ impl Encoder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn align_up(x: usize, align: usize) -> usize {
|
|
||||||
debug_assert!(align.is_power_of_two());
|
|
||||||
(x + align - 1) & !(align - 1)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn padding(x: usize, align: usize) -> usize {
|
fn padding(x: usize, align: usize) -> usize {
|
||||||
x.wrapping_neg() & (align - 1)
|
x.wrapping_neg() & (align - 1)
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,17 +4,19 @@ pub mod glyph_render;
|
||||||
mod gradient;
|
mod gradient;
|
||||||
mod pico_svg;
|
mod pico_svg;
|
||||||
mod render_ctx;
|
mod render_ctx;
|
||||||
|
mod render_driver;
|
||||||
pub mod stages;
|
pub mod stages;
|
||||||
pub mod test_scenes;
|
pub mod test_scenes;
|
||||||
mod text;
|
mod text;
|
||||||
|
|
||||||
use bytemuck::Pod;
|
use bytemuck::{Pod, Zeroable};
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
|
|
||||||
pub use blend::{Blend, BlendMode, CompositionMode};
|
pub use blend::{Blend, BlendMode, CompositionMode};
|
||||||
pub use encoder::EncodedSceneRef;
|
pub use encoder::EncodedSceneRef;
|
||||||
pub use gradient::Colrv1RadialGradient;
|
pub use gradient::Colrv1RadialGradient;
|
||||||
pub use render_ctx::PietGpuRenderContext;
|
pub use render_ctx::PietGpuRenderContext;
|
||||||
|
pub use render_driver::RenderDriver;
|
||||||
|
|
||||||
use piet::kurbo::Vec2;
|
use piet::kurbo::Vec2;
|
||||||
use piet::{ImageFormat, RenderContext};
|
use piet::{ImageFormat, RenderContext};
|
||||||
|
@ -25,9 +27,12 @@ use piet_gpu_hal::{
|
||||||
};
|
};
|
||||||
|
|
||||||
pub use pico_svg::PicoSvg;
|
pub use pico_svg::PicoSvg;
|
||||||
use stages::{ClipBinding, ElementBinding, ElementCode};
|
use stages::{
|
||||||
|
ClipBinding, ElementBinding, ElementCode, DRAW_PART_SIZE, PATHSEG_PART_SIZE,
|
||||||
|
TRANSFORM_PART_SIZE,
|
||||||
|
};
|
||||||
|
|
||||||
use crate::stages::{ClipCode, Config, ElementStage};
|
use crate::stages::{ClipCode, Config, ElementStage, CLIP_PART_SIZE};
|
||||||
|
|
||||||
const TILE_W: usize = 16;
|
const TILE_W: usize = 16;
|
||||||
const TILE_H: usize = 16;
|
const TILE_H: usize = 16;
|
||||||
|
@ -64,6 +69,31 @@ pub enum PixelFormat {
|
||||||
Rgba8,
|
Rgba8,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
#[derive(Clone, Copy, Debug, Zeroable, Pod)]
|
||||||
|
pub(crate) struct MemoryHeader {
|
||||||
|
mem_offset: u32,
|
||||||
|
mem_error: u32,
|
||||||
|
blend_offset: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The sizes of various objects in the encoded scene, needed for memory layout.
|
||||||
|
#[derive(Default)]
|
||||||
|
pub(crate) struct SceneStats {
|
||||||
|
// Slices of scene encoding, in order
|
||||||
|
pub n_drawobj: usize,
|
||||||
|
pub drawdata_len: usize,
|
||||||
|
pub n_transform: usize,
|
||||||
|
pub linewidth_len: usize,
|
||||||
|
pub pathseg_len: usize,
|
||||||
|
pub n_pathtag: usize,
|
||||||
|
|
||||||
|
// Additional stats needed needed for memory layout & dispatch
|
||||||
|
pub n_path: u32,
|
||||||
|
pub n_pathseg: u32,
|
||||||
|
pub n_clip: u32,
|
||||||
|
}
|
||||||
|
|
||||||
pub struct Renderer {
|
pub struct Renderer {
|
||||||
// These sizes are aligned to tile boundaries, though at some point
|
// These sizes are aligned to tile boundaries, though at some point
|
||||||
// we'll want to have a good strategy for dealing with odd sizes.
|
// we'll want to have a good strategy for dealing with odd sizes.
|
||||||
|
@ -72,18 +102,23 @@ pub struct Renderer {
|
||||||
|
|
||||||
pub image_dev: Image, // resulting image
|
pub image_dev: Image, // resulting image
|
||||||
|
|
||||||
// The reference is held by the pipelines. We will be changing
|
// TODO: two changes needed here. First, if we're fencing on the coarse
|
||||||
// this to make the scene upload dynamic.
|
// pipeline, then we only need one copy (this changes if we also bind the
|
||||||
|
// scene buffer in fine rasterization, which might be a good idea to reduce
|
||||||
|
// copying). Second, there should be a staging buffer for discrete cards.
|
||||||
scene_bufs: Vec<Buffer>,
|
scene_bufs: Vec<Buffer>,
|
||||||
|
|
||||||
memory_buf_host: Vec<Buffer>,
|
memory_buf_host: Vec<Buffer>,
|
||||||
memory_buf_dev: Buffer,
|
memory_buf_dev: Buffer,
|
||||||
|
memory_buf_readback: Buffer,
|
||||||
|
|
||||||
// Staging buffers
|
// Staging buffers
|
||||||
config_bufs: Vec<Buffer>,
|
config_bufs: Vec<Buffer>,
|
||||||
// Device config buf
|
// Device config buf
|
||||||
config_buf: Buffer,
|
config_buf: Buffer,
|
||||||
|
|
||||||
|
blend_buf: Buffer,
|
||||||
|
|
||||||
// New element pipeline
|
// New element pipeline
|
||||||
element_code: ElementCode,
|
element_code: ElementCode,
|
||||||
element_stage: ElementStage,
|
element_stage: ElementStage,
|
||||||
|
@ -111,6 +146,8 @@ pub struct Renderer {
|
||||||
k4_pipeline: Pipeline,
|
k4_pipeline: Pipeline,
|
||||||
k4_ds: DescriptorSet,
|
k4_ds: DescriptorSet,
|
||||||
|
|
||||||
|
scene_stats: SceneStats,
|
||||||
|
// TODO: the following stats are now redundant and can be removed.
|
||||||
n_transform: usize,
|
n_transform: usize,
|
||||||
n_drawobj: usize,
|
n_drawobj: usize,
|
||||||
n_paths: usize,
|
n_paths: usize,
|
||||||
|
@ -142,7 +179,13 @@ impl RenderConfig {
|
||||||
|
|
||||||
impl Renderer {
|
impl Renderer {
|
||||||
/// The number of query pool entries needed to run the renderer.
|
/// The number of query pool entries needed to run the renderer.
|
||||||
pub const QUERY_POOL_SIZE: u32 = 12;
|
pub const QUERY_POOL_SIZE: u32 = Self::COARSE_QUERY_POOL_SIZE + Self::FINE_QUERY_POOL_SIZE;
|
||||||
|
|
||||||
|
/// The number of query pool entries needed to run the coarse pipeline.
|
||||||
|
pub const COARSE_QUERY_POOL_SIZE: u32 = 10;
|
||||||
|
|
||||||
|
/// The number of query pool entries needed to run the fine pipeline.
|
||||||
|
pub const FINE_QUERY_POOL_SIZE: u32 = 2;
|
||||||
|
|
||||||
pub unsafe fn new(
|
pub unsafe fn new(
|
||||||
session: &Session,
|
session: &Session,
|
||||||
|
@ -166,12 +209,18 @@ impl Renderer {
|
||||||
let width = width + (width.wrapping_neg() & (TILE_W - 1));
|
let width = width + (width.wrapping_neg() & (TILE_W - 1));
|
||||||
let height = height + (height.wrapping_neg() & (TILE_W - 1));
|
let height = height + (height.wrapping_neg() & (TILE_W - 1));
|
||||||
let dev = BufferUsage::STORAGE | BufferUsage::COPY_DST;
|
let dev = BufferUsage::STORAGE | BufferUsage::COPY_DST;
|
||||||
let host_upload = BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC;
|
let usage_mem_dev = BufferUsage::STORAGE | BufferUsage::COPY_DST | BufferUsage::COPY_SRC;
|
||||||
|
let usage_blend = BufferUsage::STORAGE;
|
||||||
|
let usage_upload = BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC;
|
||||||
|
let usage_readback = BufferUsage::MAP_READ | BufferUsage::COPY_DST;
|
||||||
|
|
||||||
// This may be inadequate for very complex scenes (paris etc)
|
|
||||||
// TODO: separate staging buffer (if needed)
|
// TODO: separate staging buffer (if needed)
|
||||||
let scene_bufs = (0..n_bufs)
|
let scene_bufs = (0..n_bufs)
|
||||||
.map(|_| session.create_buffer(8 * 1024 * 1024, host_upload).unwrap())
|
.map(|_| {
|
||||||
|
session
|
||||||
|
.create_buffer(8 * 1024 * 1024, usage_upload)
|
||||||
|
.unwrap()
|
||||||
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let image_format = match config.format {
|
let image_format = match config.format {
|
||||||
|
@ -185,15 +234,22 @@ impl Renderer {
|
||||||
let config_bufs = (0..n_bufs)
|
let config_bufs = (0..n_bufs)
|
||||||
.map(|_| {
|
.map(|_| {
|
||||||
session
|
session
|
||||||
.create_buffer(CONFIG_BUFFER_SIZE, host_upload)
|
.create_buffer(CONFIG_BUFFER_SIZE, usage_upload)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let memory_buf_host = (0..n_bufs)
|
let memory_buf_host = (0..n_bufs)
|
||||||
.map(|_| session.create_buffer(2 * 4, host_upload).unwrap())
|
.map(|_| {
|
||||||
|
session
|
||||||
|
.create_buffer(std::mem::size_of::<MemoryHeader>() as u64, usage_upload)
|
||||||
|
.unwrap()
|
||||||
|
})
|
||||||
.collect();
|
.collect();
|
||||||
let memory_buf_dev = session.create_buffer(128 * 1024 * 1024, dev)?;
|
let memory_buf_dev = session.create_buffer(16 * 1024 * 1024, usage_mem_dev)?;
|
||||||
|
let memory_buf_readback =
|
||||||
|
session.create_buffer(std::mem::size_of::<MemoryHeader>() as u64, usage_readback)?;
|
||||||
|
let blend_buf = session.create_buffer(16 * 1024 * 1024, usage_blend)?;
|
||||||
|
|
||||||
let element_code = ElementCode::new(session);
|
let element_code = ElementCode::new(session);
|
||||||
let element_stage = ElementStage::new(session, &element_code);
|
let element_stage = ElementStage::new(session, &element_code);
|
||||||
|
@ -282,7 +338,7 @@ impl Renderer {
|
||||||
let gradient_bufs = (0..n_bufs)
|
let gradient_bufs = (0..n_bufs)
|
||||||
.map(|_| {
|
.map(|_| {
|
||||||
session
|
session
|
||||||
.create_buffer(GRADIENT_BUF_SIZE as u64, host_upload)
|
.create_buffer(GRADIENT_BUF_SIZE as u64, usage_upload)
|
||||||
.unwrap()
|
.unwrap()
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
@ -297,6 +353,7 @@ impl Renderer {
|
||||||
&[
|
&[
|
||||||
BindType::Buffer,
|
BindType::Buffer,
|
||||||
BindType::BufReadOnly,
|
BindType::BufReadOnly,
|
||||||
|
BindType::Buffer,
|
||||||
BindType::Image,
|
BindType::Image,
|
||||||
BindType::ImageRead,
|
BindType::ImageRead,
|
||||||
BindType::ImageRead,
|
BindType::ImageRead,
|
||||||
|
@ -304,19 +361,22 @@ impl Renderer {
|
||||||
)?;
|
)?;
|
||||||
let k4_ds = session
|
let k4_ds = session
|
||||||
.descriptor_set_builder()
|
.descriptor_set_builder()
|
||||||
.add_buffers(&[&memory_buf_dev, &config_buf])
|
.add_buffers(&[&memory_buf_dev, &config_buf, &blend_buf])
|
||||||
.add_images(&[&image_dev])
|
.add_images(&[&image_dev])
|
||||||
.add_textures(&[&bg_image, &gradients])
|
.add_textures(&[&bg_image, &gradients])
|
||||||
.build(&session, &k4_pipeline)?;
|
.build(&session, &k4_pipeline)?;
|
||||||
|
|
||||||
|
let scene_stats = Default::default();
|
||||||
Ok(Renderer {
|
Ok(Renderer {
|
||||||
width,
|
width,
|
||||||
height,
|
height,
|
||||||
scene_bufs,
|
scene_bufs,
|
||||||
memory_buf_host,
|
memory_buf_host,
|
||||||
memory_buf_dev,
|
memory_buf_dev,
|
||||||
|
memory_buf_readback,
|
||||||
config_buf,
|
config_buf,
|
||||||
config_bufs,
|
config_bufs,
|
||||||
|
blend_buf,
|
||||||
image_dev,
|
image_dev,
|
||||||
element_code,
|
element_code,
|
||||||
element_stage,
|
element_stage,
|
||||||
|
@ -336,6 +396,7 @@ impl Renderer {
|
||||||
coarse_ds,
|
coarse_ds,
|
||||||
k4_pipeline,
|
k4_pipeline,
|
||||||
k4_ds,
|
k4_ds,
|
||||||
|
scene_stats,
|
||||||
n_transform: 0,
|
n_transform: 0,
|
||||||
n_drawobj: 0,
|
n_drawobj: 0,
|
||||||
n_paths: 0,
|
n_paths: 0,
|
||||||
|
@ -358,43 +419,14 @@ impl Renderer {
|
||||||
render_ctx: &mut PietGpuRenderContext,
|
render_ctx: &mut PietGpuRenderContext,
|
||||||
buf_ix: usize,
|
buf_ix: usize,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
let (mut config, mut alloc) = render_ctx.stage_config();
|
self.scene_stats = render_ctx.stats();
|
||||||
let n_drawobj = render_ctx.n_drawobj();
|
|
||||||
// TODO: be more consistent in size types
|
|
||||||
let n_path = render_ctx.n_path() as usize;
|
|
||||||
self.n_paths = n_path;
|
|
||||||
self.n_transform = render_ctx.n_transform();
|
|
||||||
self.n_drawobj = render_ctx.n_drawobj();
|
|
||||||
self.n_pathseg = render_ctx.n_pathseg() as usize;
|
|
||||||
self.n_pathtag = render_ctx.n_pathtag();
|
|
||||||
self.n_clip = render_ctx.n_clip();
|
|
||||||
|
|
||||||
// These constants depend on encoding and may need to be updated.
|
|
||||||
// Perhaps we can plumb these from piet-gpu-derive?
|
|
||||||
const PATH_SIZE: usize = 12;
|
|
||||||
const BIN_SIZE: usize = 8;
|
|
||||||
let width_in_tiles = self.width / TILE_W;
|
|
||||||
let height_in_tiles = self.height / TILE_H;
|
|
||||||
let tile_base = alloc;
|
|
||||||
alloc += ((n_path + 3) & !3) * PATH_SIZE;
|
|
||||||
let bin_base = alloc;
|
|
||||||
alloc += ((n_drawobj + 255) & !255) * BIN_SIZE;
|
|
||||||
let ptcl_base = alloc;
|
|
||||||
alloc += width_in_tiles * height_in_tiles * PTCL_INITIAL_ALLOC;
|
|
||||||
|
|
||||||
config.width_in_tiles = width_in_tiles as u32;
|
|
||||||
config.height_in_tiles = height_in_tiles as u32;
|
|
||||||
config.tile_alloc = tile_base as u32;
|
|
||||||
config.bin_alloc = bin_base as u32;
|
|
||||||
config.ptcl_alloc = ptcl_base as u32;
|
|
||||||
unsafe {
|
unsafe {
|
||||||
// TODO: reallocate scene buffer if size is inadequate
|
self.upload_config(buf_ix)?;
|
||||||
{
|
{
|
||||||
let mut mapped_scene = self.scene_bufs[buf_ix].map_write(..)?;
|
let mut mapped_scene = self.scene_bufs[buf_ix].map_write(..)?;
|
||||||
render_ctx.write_scene(&mut mapped_scene);
|
render_ctx.write_scene(&mut mapped_scene);
|
||||||
}
|
}
|
||||||
self.config_bufs[buf_ix].write(&[config])?;
|
|
||||||
self.memory_buf_host[buf_ix].write(&[alloc as u32, 0 /* Overflow flag */])?;
|
|
||||||
|
|
||||||
// Upload gradient data.
|
// Upload gradient data.
|
||||||
let ramp_data = render_ctx.get_ramp_data();
|
let ramp_data = render_ctx.get_ramp_data();
|
||||||
|
@ -414,43 +446,14 @@ impl Renderer {
|
||||||
scene: &EncodedSceneRef<T>,
|
scene: &EncodedSceneRef<T>,
|
||||||
buf_ix: usize,
|
buf_ix: usize,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
let (mut config, mut alloc) = scene.stage_config();
|
self.scene_stats = scene.stats();
|
||||||
let n_drawobj = scene.n_drawobj();
|
|
||||||
// TODO: be more consistent in size types
|
|
||||||
let n_path = scene.n_path() as usize;
|
|
||||||
self.n_paths = n_path;
|
|
||||||
self.n_transform = scene.n_transform();
|
|
||||||
self.n_drawobj = scene.n_drawobj();
|
|
||||||
self.n_pathseg = scene.n_pathseg() as usize;
|
|
||||||
self.n_pathtag = scene.n_pathtag();
|
|
||||||
self.n_clip = scene.n_clip();
|
|
||||||
|
|
||||||
// These constants depend on encoding and may need to be updated.
|
|
||||||
// Perhaps we can plumb these from piet-gpu-derive?
|
|
||||||
const PATH_SIZE: usize = 12;
|
|
||||||
const BIN_SIZE: usize = 8;
|
|
||||||
let width_in_tiles = self.width / TILE_W;
|
|
||||||
let height_in_tiles = self.height / TILE_H;
|
|
||||||
let tile_base = alloc;
|
|
||||||
alloc += ((n_path + 3) & !3) * PATH_SIZE;
|
|
||||||
let bin_base = alloc;
|
|
||||||
alloc += ((n_drawobj + 255) & !255) * BIN_SIZE;
|
|
||||||
let ptcl_base = alloc;
|
|
||||||
alloc += width_in_tiles * height_in_tiles * PTCL_INITIAL_ALLOC;
|
|
||||||
|
|
||||||
config.width_in_tiles = width_in_tiles as u32;
|
|
||||||
config.height_in_tiles = height_in_tiles as u32;
|
|
||||||
config.tile_alloc = tile_base as u32;
|
|
||||||
config.bin_alloc = bin_base as u32;
|
|
||||||
config.ptcl_alloc = ptcl_base as u32;
|
|
||||||
unsafe {
|
unsafe {
|
||||||
// TODO: reallocate scene buffer if size is inadequate
|
self.upload_config(buf_ix)?;
|
||||||
{
|
{
|
||||||
let mut mapped_scene = self.scene_bufs[buf_ix].map_write(..)?;
|
let mut mapped_scene = self.scene_bufs[buf_ix].map_write(..)?;
|
||||||
scene.write_scene(&mut mapped_scene);
|
scene.write_scene(&mut mapped_scene);
|
||||||
}
|
}
|
||||||
self.config_bufs[buf_ix].write(&[config])?;
|
|
||||||
self.memory_buf_host[buf_ix].write(&[alloc as u32, 0 /* Overflow flag */])?;
|
|
||||||
|
|
||||||
// Upload gradient data.
|
// Upload gradient data.
|
||||||
if !scene.ramp_data.is_empty() {
|
if !scene.ramp_data.is_empty() {
|
||||||
|
@ -464,7 +467,41 @@ impl Renderer {
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, query_pool: &QueryPool, buf_ix: usize) {
|
// Note: configuration has to be re-uploaded when memory buffer is resized
|
||||||
|
pub(crate) unsafe fn upload_config(&mut self, buf_ix: usize) -> Result<(), Error> {
|
||||||
|
let stats = &self.scene_stats;
|
||||||
|
let n_path = stats.n_path as usize;
|
||||||
|
self.n_paths = n_path;
|
||||||
|
self.n_transform = stats.n_transform;
|
||||||
|
self.n_drawobj = stats.n_drawobj;
|
||||||
|
self.n_pathseg = stats.n_pathseg as usize;
|
||||||
|
self.n_pathtag = stats.n_pathtag;
|
||||||
|
self.n_clip = stats.n_clip;
|
||||||
|
let (mut config, alloc) = stats.config(self.width, self.height);
|
||||||
|
config.mem_size = self.memory_buf_size() as u32;
|
||||||
|
self.config_bufs[buf_ix].write(&[config])?;
|
||||||
|
let mem_header = MemoryHeader {
|
||||||
|
mem_offset: alloc as u32,
|
||||||
|
mem_error: 0,
|
||||||
|
blend_offset: 0,
|
||||||
|
};
|
||||||
|
// Note: we could skip doing this on realloc, but probably not worth the bother
|
||||||
|
self.memory_buf_host[buf_ix].write(&[mem_header])?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the size of memory for the allocations known in advance.
|
||||||
|
pub(crate) fn memory_size(&self, stats: &SceneStats) -> usize {
|
||||||
|
stats.config(self.width, self.height).1
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record the coarse part of a render pipeline.
|
||||||
|
pub unsafe fn record_coarse(
|
||||||
|
&self,
|
||||||
|
cmd_buf: &mut CmdBuf,
|
||||||
|
query_pool: &QueryPool,
|
||||||
|
buf_ix: usize,
|
||||||
|
) {
|
||||||
cmd_buf.copy_buffer(&self.config_bufs[buf_ix], &self.config_buf);
|
cmd_buf.copy_buffer(&self.config_bufs[buf_ix], &self.config_buf);
|
||||||
cmd_buf.copy_buffer(&self.memory_buf_host[buf_ix], &self.memory_buf_dev);
|
cmd_buf.copy_buffer(&self.memory_buf_host[buf_ix], &self.memory_buf_dev);
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
|
@ -558,9 +595,21 @@ impl Renderer {
|
||||||
pass.end();
|
pass.end();
|
||||||
cmd_buf.end_debug_label();
|
cmd_buf.end_debug_label();
|
||||||
cmd_buf.memory_barrier();
|
cmd_buf.memory_barrier();
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn record_fine(
|
||||||
|
&self,
|
||||||
|
cmd_buf: &mut CmdBuf,
|
||||||
|
query_pool: &QueryPool,
|
||||||
|
query_start: u32,
|
||||||
|
) {
|
||||||
|
cmd_buf.reset_query_pool(&query_pool);
|
||||||
cmd_buf.begin_debug_label("Fine raster");
|
cmd_buf.begin_debug_label("Fine raster");
|
||||||
let mut pass =
|
let mut pass = cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(
|
||||||
cmd_buf.begin_compute_pass(&ComputePassDescriptor::timer(&query_pool, 10, 11));
|
&query_pool,
|
||||||
|
query_start,
|
||||||
|
query_start + 1,
|
||||||
|
));
|
||||||
pass.dispatch(
|
pass.dispatch(
|
||||||
&self.k4_pipeline,
|
&self.k4_pipeline,
|
||||||
&self.k4_ds,
|
&self.k4_ds,
|
||||||
|
@ -577,6 +626,19 @@ impl Renderer {
|
||||||
cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc);
|
cmd_buf.image_barrier(&self.image_dev, ImageLayout::General, ImageLayout::BlitSrc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub unsafe fn record_readback(&self, cmd_buf: &mut CmdBuf) {
|
||||||
|
cmd_buf.copy_buffer(&self.memory_buf_dev, &self.memory_buf_readback);
|
||||||
|
cmd_buf.memory_barrier();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record a render pipeline.
|
||||||
|
///
|
||||||
|
/// This *assumes* the buffers are adequately sized.
|
||||||
|
pub unsafe fn record(&self, cmd_buf: &mut CmdBuf, query_pool: &QueryPool, buf_ix: usize) {
|
||||||
|
self.record_coarse(cmd_buf, query_pool, buf_ix);
|
||||||
|
self.record_fine(cmd_buf, query_pool, 10);
|
||||||
|
}
|
||||||
|
|
||||||
pub fn make_image(
|
pub fn make_image(
|
||||||
session: &Session,
|
session: &Session,
|
||||||
width: usize,
|
width: usize,
|
||||||
|
@ -636,4 +698,210 @@ impl Renderer {
|
||||||
.unwrap()
|
.unwrap()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) unsafe fn realloc_scene_if_needed(
|
||||||
|
&mut self,
|
||||||
|
session: &Session,
|
||||||
|
new_size: u64,
|
||||||
|
buf_ix: usize,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
if new_size <= self.scene_bufs[buf_ix].size() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
const ALIGN: u64 = 0x10000;
|
||||||
|
let new_size = (new_size + ALIGN - 1) & ALIGN.wrapping_neg();
|
||||||
|
println!(
|
||||||
|
"reallocating scene buf[{}] {} -> {}",
|
||||||
|
buf_ix,
|
||||||
|
self.scene_bufs[buf_ix].size(),
|
||||||
|
new_size
|
||||||
|
);
|
||||||
|
let usage_upload = BufferUsage::MAP_WRITE | BufferUsage::COPY_SRC;
|
||||||
|
let scene_buf = session.create_buffer(new_size, usage_upload)?;
|
||||||
|
self.element_bindings[buf_ix].rebind_scene(session, &scene_buf);
|
||||||
|
session.update_buffer_descriptor(&mut self.tile_ds[buf_ix], 2, &scene_buf);
|
||||||
|
session.update_buffer_descriptor(&mut self.coarse_ds[buf_ix], 2, &scene_buf);
|
||||||
|
self.scene_bufs[buf_ix] = scene_buf;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get the size of the memory buffer.
|
||||||
|
///
|
||||||
|
/// This is the usable size (not including the header).
|
||||||
|
pub(crate) fn memory_buf_size(&self) -> u64 {
|
||||||
|
self.memory_buf_dev.size() - std::mem::size_of::<MemoryHeader>() as u64
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) unsafe fn realloc_memory(
|
||||||
|
&mut self,
|
||||||
|
session: &Session,
|
||||||
|
new_size: u64,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
println!(
|
||||||
|
"reallocating memory buf {} -> {}",
|
||||||
|
self.memory_buf_dev.size(),
|
||||||
|
new_size
|
||||||
|
);
|
||||||
|
let usage_mem_dev = BufferUsage::STORAGE | BufferUsage::COPY_DST | BufferUsage::COPY_SRC;
|
||||||
|
let memory_buf_dev = session.create_buffer(new_size, usage_mem_dev)?;
|
||||||
|
for element_binding in &mut self.element_bindings {
|
||||||
|
element_binding.rebind_memory(session, &memory_buf_dev);
|
||||||
|
}
|
||||||
|
self.clip_binding.rebind_memory(session, &memory_buf_dev);
|
||||||
|
for tile_ds in &mut self.tile_ds {
|
||||||
|
session.update_buffer_descriptor(tile_ds, 0, &memory_buf_dev);
|
||||||
|
}
|
||||||
|
session.update_buffer_descriptor(&mut self.path_ds, 0, &memory_buf_dev);
|
||||||
|
session.update_buffer_descriptor(&mut self.backdrop_ds, 0, &memory_buf_dev);
|
||||||
|
session.update_buffer_descriptor(&mut self.bin_ds, 0, &memory_buf_dev);
|
||||||
|
for coarse_ds in &mut self.coarse_ds {
|
||||||
|
session.update_buffer_descriptor(coarse_ds, 0, &memory_buf_dev);
|
||||||
|
}
|
||||||
|
session.update_buffer_descriptor(&mut self.k4_ds, 0, &memory_buf_dev);
|
||||||
|
self.memory_buf_dev = memory_buf_dev;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn blend_size(&self) -> u64 {
|
||||||
|
self.blend_buf.size()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) unsafe fn realloc_blend(
|
||||||
|
&mut self,
|
||||||
|
session: &Session,
|
||||||
|
new_size: u64,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
println!(
|
||||||
|
"reallocating blend buf {} -> {}",
|
||||||
|
self.blend_size(),
|
||||||
|
new_size
|
||||||
|
);
|
||||||
|
let usage_blend = BufferUsage::STORAGE;
|
||||||
|
let blend_buf = session.create_buffer(new_size, usage_blend)?;
|
||||||
|
session.update_buffer_descriptor(&mut self.k4_ds, 2, &blend_buf);
|
||||||
|
self.blend_buf = blend_buf;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const TRANSFORM_SIZE: usize = 24;
|
||||||
|
const PATHSEG_SIZE: usize = 52;
|
||||||
|
const PATH_BBOX_SIZE: usize = 24;
|
||||||
|
const DRAWMONOID_SIZE: usize = 16;
|
||||||
|
const DRAW_BBOX_SIZE: usize = 16;
|
||||||
|
const DRAWTAG_SIZE: usize = 4;
|
||||||
|
const ANNOTATED_SIZE: usize = 40;
|
||||||
|
|
||||||
|
impl SceneStats {
|
||||||
|
pub(crate) fn scene_size(&self) -> usize {
|
||||||
|
align_up(self.n_drawobj, DRAW_PART_SIZE as usize) * DRAWTAG_SIZE
|
||||||
|
+ self.drawdata_len
|
||||||
|
+ align_up(self.n_transform, TRANSFORM_PART_SIZE as usize) * TRANSFORM_SIZE
|
||||||
|
+ self.linewidth_len
|
||||||
|
+ align_up(self.n_pathtag, PATHSEG_PART_SIZE as usize)
|
||||||
|
+ self.pathseg_len
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return a config for a scene with these stats.
|
||||||
|
///
|
||||||
|
/// Also returns the beginning of free (dynamic) memory.
|
||||||
|
fn config(&self, width: usize, height: usize) -> (Config, usize) {
|
||||||
|
// Layout of scene buffer
|
||||||
|
let drawtag_offset = 0;
|
||||||
|
let n_drawobj = self.n_drawobj;
|
||||||
|
let n_drawobj_padded = align_up(n_drawobj, DRAW_PART_SIZE as usize);
|
||||||
|
let drawdata_offset = drawtag_offset + n_drawobj_padded * DRAWTAG_SIZE;
|
||||||
|
let trans_offset = drawdata_offset + self.drawdata_len;
|
||||||
|
let n_trans = self.n_transform;
|
||||||
|
let n_trans_padded = align_up(n_trans, TRANSFORM_PART_SIZE as usize);
|
||||||
|
let linewidth_offset = trans_offset + n_trans_padded * TRANSFORM_SIZE;
|
||||||
|
let pathtag_offset = linewidth_offset + self.linewidth_len;
|
||||||
|
let n_pathtag = self.n_pathtag;
|
||||||
|
let n_pathtag_padded = align_up(n_pathtag, PATHSEG_PART_SIZE as usize);
|
||||||
|
let pathseg_offset = pathtag_offset + n_pathtag_padded;
|
||||||
|
|
||||||
|
// Layout of memory
|
||||||
|
let mut alloc = 0;
|
||||||
|
let trans_alloc = alloc;
|
||||||
|
alloc += trans_alloc + n_trans_padded * TRANSFORM_SIZE;
|
||||||
|
let pathseg_alloc = alloc;
|
||||||
|
alloc += pathseg_alloc + self.n_pathseg as usize * PATHSEG_SIZE;
|
||||||
|
let path_bbox_alloc = alloc;
|
||||||
|
let n_path = self.n_path as usize;
|
||||||
|
alloc += path_bbox_alloc + n_path * PATH_BBOX_SIZE;
|
||||||
|
let drawmonoid_alloc = alloc;
|
||||||
|
alloc += n_drawobj_padded * DRAWMONOID_SIZE;
|
||||||
|
let anno_alloc = alloc;
|
||||||
|
alloc += n_drawobj * ANNOTATED_SIZE;
|
||||||
|
let clip_alloc = alloc;
|
||||||
|
let n_clip = self.n_clip as usize;
|
||||||
|
const CLIP_SIZE: usize = 4;
|
||||||
|
alloc += n_clip * CLIP_SIZE;
|
||||||
|
let clip_bic_alloc = alloc;
|
||||||
|
const CLIP_BIC_SIZE: usize = 8;
|
||||||
|
// This can round down, as we only reduce the prefix
|
||||||
|
alloc += (n_clip / CLIP_PART_SIZE as usize) * CLIP_BIC_SIZE;
|
||||||
|
let clip_stack_alloc = alloc;
|
||||||
|
const CLIP_EL_SIZE: usize = 20;
|
||||||
|
alloc += n_clip * CLIP_EL_SIZE;
|
||||||
|
let clip_bbox_alloc = alloc;
|
||||||
|
const CLIP_BBOX_SIZE: usize = 16;
|
||||||
|
alloc += align_up(n_clip as usize, CLIP_PART_SIZE as usize) * CLIP_BBOX_SIZE;
|
||||||
|
let draw_bbox_alloc = alloc;
|
||||||
|
alloc += n_drawobj * DRAW_BBOX_SIZE;
|
||||||
|
let drawinfo_alloc = alloc;
|
||||||
|
// TODO: not optimized; it can be accumulated during encoding or summed from drawtags
|
||||||
|
const MAX_DRAWINFO_SIZE: usize = 44;
|
||||||
|
alloc += n_drawobj * MAX_DRAWINFO_SIZE;
|
||||||
|
|
||||||
|
// These constants depend on encoding and may need to be updated.
|
||||||
|
const PATH_SIZE: usize = 12;
|
||||||
|
const BIN_SIZE: usize = 8;
|
||||||
|
let width_in_tiles = width / TILE_W;
|
||||||
|
let height_in_tiles = height / TILE_H;
|
||||||
|
let tile_base = alloc;
|
||||||
|
alloc += ((n_path + 3) & !3) * PATH_SIZE;
|
||||||
|
let bin_base = alloc;
|
||||||
|
alloc += ((n_drawobj + 255) & !255) * BIN_SIZE;
|
||||||
|
let ptcl_base = alloc;
|
||||||
|
alloc += width_in_tiles * height_in_tiles * PTCL_INITIAL_ALLOC;
|
||||||
|
|
||||||
|
let config = Config {
|
||||||
|
mem_size: 0, // to be filled in later
|
||||||
|
n_elements: n_drawobj as u32,
|
||||||
|
n_pathseg: self.n_pathseg,
|
||||||
|
pathseg_alloc: pathseg_alloc as u32,
|
||||||
|
anno_alloc: anno_alloc as u32,
|
||||||
|
trans_alloc: trans_alloc as u32,
|
||||||
|
path_bbox_alloc: path_bbox_alloc as u32,
|
||||||
|
drawmonoid_alloc: drawmonoid_alloc as u32,
|
||||||
|
clip_alloc: clip_alloc as u32,
|
||||||
|
clip_bic_alloc: clip_bic_alloc as u32,
|
||||||
|
clip_stack_alloc: clip_stack_alloc as u32,
|
||||||
|
clip_bbox_alloc: clip_bbox_alloc as u32,
|
||||||
|
draw_bbox_alloc: draw_bbox_alloc as u32,
|
||||||
|
drawinfo_alloc: drawinfo_alloc as u32,
|
||||||
|
n_trans: n_trans as u32,
|
||||||
|
n_path: self.n_path,
|
||||||
|
n_clip: self.n_clip,
|
||||||
|
trans_offset: trans_offset as u32,
|
||||||
|
linewidth_offset: linewidth_offset as u32,
|
||||||
|
pathtag_offset: pathtag_offset as u32,
|
||||||
|
pathseg_offset: pathseg_offset as u32,
|
||||||
|
drawtag_offset: drawtag_offset as u32,
|
||||||
|
drawdata_offset: drawdata_offset as u32,
|
||||||
|
width_in_tiles: width_in_tiles as u32,
|
||||||
|
height_in_tiles: height_in_tiles as u32,
|
||||||
|
tile_alloc: tile_base as u32,
|
||||||
|
bin_alloc: bin_base as u32,
|
||||||
|
ptcl_alloc: ptcl_base as u32,
|
||||||
|
};
|
||||||
|
|
||||||
|
(config, alloc)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn align_up(x: usize, align: usize) -> usize {
|
||||||
|
debug_assert!(align.is_power_of_two());
|
||||||
|
(x + align - 1) & !(align - 1)
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,7 +4,7 @@ const DO_SRGB_CONVERSION: bool = false;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
use crate::encoder::GlyphEncoder;
|
use crate::encoder::GlyphEncoder;
|
||||||
use crate::stages::{Config, Transform};
|
use crate::stages::Transform;
|
||||||
use piet::kurbo::{Affine, PathEl, Point, Rect, Shape};
|
use piet::kurbo::{Affine, PathEl, Point, Rect, Shape};
|
||||||
use piet::{
|
use piet::{
|
||||||
Color, Error, FixedGradient, ImageFormat, InterpolationMode, IntoBrush, RenderContext,
|
Color, Error, FixedGradient, ImageFormat, InterpolationMode, IntoBrush, RenderContext,
|
||||||
|
@ -18,7 +18,7 @@ use piet_gpu_types::scene::Element;
|
||||||
use crate::gradient::{Colrv1RadialGradient, LinearGradient, RadialGradient, RampCache};
|
use crate::gradient::{Colrv1RadialGradient, LinearGradient, RadialGradient, RampCache};
|
||||||
use crate::text::Font;
|
use crate::text::Font;
|
||||||
pub use crate::text::{PietGpuText, PietGpuTextLayout, PietGpuTextLayoutBuilder};
|
pub use crate::text::{PietGpuText, PietGpuTextLayout, PietGpuTextLayoutBuilder};
|
||||||
use crate::Blend;
|
use crate::{Blend, SceneStats};
|
||||||
|
|
||||||
pub struct PietGpuImage;
|
pub struct PietGpuImage;
|
||||||
|
|
||||||
|
@ -95,44 +95,15 @@ impl PietGpuRenderContext {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn stage_config(&self) -> (Config, usize) {
|
pub(crate) fn stats(&self) -> SceneStats {
|
||||||
self.new_encoder.stage_config()
|
self.new_encoder.stats()
|
||||||
}
|
|
||||||
|
|
||||||
/// Number of draw objects.
|
|
||||||
///
|
|
||||||
/// This is for the new element processing pipeline. It's not necessarily the
|
|
||||||
/// same as the number of paths (as in the old pipeline), but it might take a
|
|
||||||
/// while to sort that out.
|
|
||||||
pub fn n_drawobj(&self) -> usize {
|
|
||||||
self.new_encoder.n_drawobj()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Number of paths.
|
|
||||||
pub fn n_path(&self) -> u32 {
|
|
||||||
self.new_encoder.n_path()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn n_pathseg(&self) -> u32 {
|
|
||||||
self.new_encoder.n_pathseg()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn n_pathtag(&self) -> usize {
|
|
||||||
self.new_encoder.n_pathtag()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn n_transform(&self) -> usize {
|
|
||||||
self.new_encoder.n_transform()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn n_clip(&self) -> u32 {
|
|
||||||
self.new_encoder.n_clip()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn write_scene(&self, buf: &mut BufWrite) {
|
pub fn write_scene(&self, buf: &mut BufWrite) {
|
||||||
self.new_encoder.write_scene(buf);
|
self.new_encoder.write_scene(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: delete
|
||||||
pub fn get_scene_buf(&mut self) -> &[u8] {
|
pub fn get_scene_buf(&mut self) -> &[u8] {
|
||||||
const ALIGN: usize = 128;
|
const ALIGN: usize = 128;
|
||||||
let padded_size = (self.elements.len() + (ALIGN - 1)) & ALIGN.wrapping_neg();
|
let padded_size = (self.elements.len() + (ALIGN - 1)) & ALIGN.wrapping_neg();
|
||||||
|
@ -194,7 +165,6 @@ impl RenderContext for PietGpuRenderContext {
|
||||||
let rad = self.ramp_cache.add_radial_gradient(&rad);
|
let rad = self.ramp_cache.add_radial_gradient(&rad);
|
||||||
Ok(PietGpuBrush::RadGradient(rad))
|
Ok(PietGpuBrush::RadGradient(rad))
|
||||||
}
|
}
|
||||||
_ => todo!("don't do radial gradients yet"),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
332
piet-gpu/src/render_driver.rs
Normal file
332
piet-gpu/src/render_driver.rs
Normal file
|
@ -0,0 +1,332 @@
|
||||||
|
// Copyright 2022 The piet-gpu authors.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
// you may not use this file except in compliance with the License.
|
||||||
|
// You may obtain a copy of the License at
|
||||||
|
//
|
||||||
|
// https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
//
|
||||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
// See the License for the specific language governing permissions and
|
||||||
|
// limitations under the License.
|
||||||
|
//
|
||||||
|
// Also licensed under MIT license, at your choice.
|
||||||
|
|
||||||
|
use bytemuck::Pod;
|
||||||
|
use piet_gpu_hal::{CmdBuf, Error, Image, QueryPool, Semaphore, Session, SubmittedCmdBuf};
|
||||||
|
|
||||||
|
use crate::{EncodedSceneRef, MemoryHeader, PietGpuRenderContext, Renderer, SceneStats};
|
||||||
|
|
||||||
|
/// Additional logic for sequencing rendering operations, specifically
|
||||||
|
/// for handling failure and reallocation.
|
||||||
|
///
|
||||||
|
/// It may be this shouldn't be a separate object from Renderer.
|
||||||
|
pub struct RenderDriver {
|
||||||
|
frames: Vec<RenderFrame>,
|
||||||
|
renderer: Renderer,
|
||||||
|
buf_ix: usize,
|
||||||
|
/// The index of a pending fine rasterization submission.
|
||||||
|
pending: Option<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct TargetState<'a> {
|
||||||
|
pub cmd_buf: &'a mut CmdBuf,
|
||||||
|
pub image: &'a Image,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default, Debug)]
|
||||||
|
pub struct TimingStats {
|
||||||
|
coarse: Vec<f64>,
|
||||||
|
fine: Vec<f64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RenderFrame {
|
||||||
|
cmd_buf: CmdBufState,
|
||||||
|
coarse_query_pool: QueryPool,
|
||||||
|
fine_query_pool: QueryPool,
|
||||||
|
timing_stats: TimingStats,
|
||||||
|
}
|
||||||
|
|
||||||
|
enum CmdBufState {
|
||||||
|
Start,
|
||||||
|
Submitted(SubmittedCmdBuf),
|
||||||
|
Ready(CmdBuf),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl RenderDriver {
|
||||||
|
/// Create new render driver.
|
||||||
|
///
|
||||||
|
/// Should probably be fallible.
|
||||||
|
///
|
||||||
|
/// We can get n from the renderer as well.
|
||||||
|
pub fn new(session: &Session, n: usize, renderer: Renderer) -> RenderDriver {
|
||||||
|
let frames = (0..n)
|
||||||
|
.map(|_| {
|
||||||
|
// Maybe should allocate here so it doesn't happen on first frame?
|
||||||
|
let cmd_buf = CmdBufState::default();
|
||||||
|
let coarse_query_pool =
|
||||||
|
session.create_query_pool(Renderer::COARSE_QUERY_POOL_SIZE)?;
|
||||||
|
let fine_query_pool = session.create_query_pool(Renderer::FINE_QUERY_POOL_SIZE)?;
|
||||||
|
Ok(RenderFrame {
|
||||||
|
cmd_buf,
|
||||||
|
coarse_query_pool,
|
||||||
|
fine_query_pool,
|
||||||
|
timing_stats: TimingStats::default(),
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect::<Result<_, Error>>()
|
||||||
|
.unwrap();
|
||||||
|
RenderDriver {
|
||||||
|
frames,
|
||||||
|
renderer,
|
||||||
|
buf_ix: 0,
|
||||||
|
pending: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn upload_render_ctx(
|
||||||
|
&mut self,
|
||||||
|
session: &Session,
|
||||||
|
render_ctx: &mut PietGpuRenderContext,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let stats = render_ctx.stats();
|
||||||
|
self.ensure_scene_buffers(session, &stats)?;
|
||||||
|
self.renderer.upload_render_ctx(render_ctx, self.buf_ix)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn upload_scene<T: Copy + Pod>(
|
||||||
|
&mut self,
|
||||||
|
session: &Session,
|
||||||
|
scene: &EncodedSceneRef<T>,
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let stats = scene.stats();
|
||||||
|
self.ensure_scene_buffers(session, &stats)?;
|
||||||
|
self.renderer.upload_scene(scene, self.buf_ix)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ensure_scene_buffers(&mut self, session: &Session, stats: &SceneStats) -> Result<(), Error> {
|
||||||
|
let scene_size = stats.scene_size();
|
||||||
|
unsafe {
|
||||||
|
self.renderer
|
||||||
|
.realloc_scene_if_needed(session, scene_size as u64, self.buf_ix)?;
|
||||||
|
}
|
||||||
|
let memory_size = self.renderer.memory_size(&stats);
|
||||||
|
// TODO: better estimate of additional memory needed
|
||||||
|
// Note: if we were to cover the worst-case binning output, we could make the
|
||||||
|
// binning stage infallible and cut checking logic. It also may not be a bad
|
||||||
|
// estimate for the rest.
|
||||||
|
let estimated_needed = memory_size as u64 + (1 << 20);
|
||||||
|
if estimated_needed > self.renderer.memory_buf_size() {
|
||||||
|
if let Some(pending) = self.pending.take() {
|
||||||
|
// There might be a fine rasterization task that binds the memory buffer
|
||||||
|
// still in flight.
|
||||||
|
self.frames[pending].cmd_buf.wait();
|
||||||
|
}
|
||||||
|
unsafe {
|
||||||
|
self.renderer.realloc_memory(session, estimated_needed)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run one try of the coarse rendering pipeline.
|
||||||
|
pub(crate) fn try_run_coarse(&mut self, session: &Session) -> Result<MemoryHeader, Error> {
|
||||||
|
let frame = &mut self.frames[self.buf_ix];
|
||||||
|
let cmd_buf = frame.cmd_buf.cmd_buf(session)?;
|
||||||
|
unsafe {
|
||||||
|
cmd_buf.begin();
|
||||||
|
// TODO: probably want to return query results as well
|
||||||
|
self.renderer
|
||||||
|
.record_coarse(cmd_buf, &frame.coarse_query_pool, self.buf_ix);
|
||||||
|
self.renderer.record_readback(cmd_buf);
|
||||||
|
let cmd_buf = frame.cmd_buf.cmd_buf(session)?;
|
||||||
|
cmd_buf.finish_timestamps(&frame.coarse_query_pool);
|
||||||
|
cmd_buf.host_barrier();
|
||||||
|
cmd_buf.finish();
|
||||||
|
frame.cmd_buf.submit(session, &[], &[])?;
|
||||||
|
frame.cmd_buf.wait();
|
||||||
|
frame.timing_stats.coarse = session.fetch_query_pool(&frame.coarse_query_pool)?;
|
||||||
|
let mut result = Vec::new();
|
||||||
|
// TODO: consider read method for single POD value
|
||||||
|
self.renderer.memory_buf_readback.read(&mut result)?;
|
||||||
|
Ok(result[0])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Run the coarse render pipeline, ensuring enough memory for intermediate buffers.
|
||||||
|
pub fn run_coarse(&mut self, session: &Session) -> Result<(), Error> {
|
||||||
|
loop {
|
||||||
|
let mem_header = self.try_run_coarse(session)?;
|
||||||
|
//println!("{:?}", mem_header);
|
||||||
|
if mem_header.mem_error == 0 {
|
||||||
|
let blend_needed = mem_header.blend_offset as u64;
|
||||||
|
if blend_needed > self.renderer.blend_size() {
|
||||||
|
unsafe {
|
||||||
|
self.renderer.realloc_blend(session, blend_needed)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
// Not enough memory, reallocate and retry.
|
||||||
|
// TODO: be smarter (multiplier for early stages)
|
||||||
|
let mem_size = mem_header.mem_offset + 4096;
|
||||||
|
// Safety rationalization: no command buffers containing the buffer are
|
||||||
|
// in flight.
|
||||||
|
unsafe {
|
||||||
|
self.renderer.realloc_memory(session, mem_size.into())?;
|
||||||
|
self.renderer.upload_config(self.buf_ix)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Record the fine rasterizer, leaving the command buffer open.
|
||||||
|
pub fn record_fine(&mut self, session: &Session) -> Result<TargetState, Error> {
|
||||||
|
let frame = &mut self.frames[self.buf_ix];
|
||||||
|
let cmd_buf = frame.cmd_buf.cmd_buf(session)?;
|
||||||
|
unsafe {
|
||||||
|
cmd_buf.begin();
|
||||||
|
self.renderer
|
||||||
|
.record_fine(cmd_buf, &frame.fine_query_pool, 0);
|
||||||
|
}
|
||||||
|
let image = &self.renderer.image_dev;
|
||||||
|
Ok(TargetState { cmd_buf, image })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Submit the current command buffer.
|
||||||
|
pub fn submit(
|
||||||
|
&mut self,
|
||||||
|
session: &Session,
|
||||||
|
wait_semaphores: &[&Semaphore],
|
||||||
|
signal_semaphores: &[&Semaphore],
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
let frame = &mut self.frames[self.buf_ix];
|
||||||
|
let cmd_buf = frame.cmd_buf.cmd_buf(session)?;
|
||||||
|
unsafe {
|
||||||
|
cmd_buf.finish_timestamps(&frame.fine_query_pool);
|
||||||
|
cmd_buf.host_barrier();
|
||||||
|
cmd_buf.finish();
|
||||||
|
frame
|
||||||
|
.cmd_buf
|
||||||
|
.submit(session, wait_semaphores, signal_semaphores)?
|
||||||
|
}
|
||||||
|
self.pending = Some(self.buf_ix);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn wait_frame(&mut self, session: &Session, buf_ix: usize) {
|
||||||
|
let frame = &mut self.frames[buf_ix];
|
||||||
|
frame.cmd_buf.wait();
|
||||||
|
if let Ok(stats) = session.fetch_query_pool(&frame.fine_query_pool) {
|
||||||
|
frame.timing_stats.fine = stats;
|
||||||
|
}
|
||||||
|
if self.pending == Some(buf_ix) {
|
||||||
|
self.pending = None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn wait(&mut self, session: &Session) {
|
||||||
|
self.wait_frame(session, self.buf_ix);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Move to the next buffer.
|
||||||
|
pub fn next_buffer(&mut self) {
|
||||||
|
self.buf_ix = (self.buf_ix + 1) % self.frames.len()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn get_timing_stats(&mut self, session: &Session, buf_ix: usize) -> &TimingStats {
|
||||||
|
self.wait_frame(session, buf_ix);
|
||||||
|
&self.frames[buf_ix].timing_stats
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn wait_all(&mut self, session: &Session) {
|
||||||
|
for buf_ix in 0..self.frames.len() {
|
||||||
|
unsafe {
|
||||||
|
self.wait_frame(session, buf_ix);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for CmdBufState {
|
||||||
|
fn default() -> Self {
|
||||||
|
CmdBufState::Start
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CmdBufState {
|
||||||
|
/// Get a command buffer suitable for recording.
|
||||||
|
///
|
||||||
|
/// If the command buffer is submitted, wait.
|
||||||
|
fn cmd_buf(&mut self, session: &Session) -> Result<&mut CmdBuf, Error> {
|
||||||
|
if let CmdBufState::Ready(cmd_buf) = self {
|
||||||
|
return Ok(cmd_buf);
|
||||||
|
}
|
||||||
|
if let CmdBufState::Submitted(submitted) = std::mem::take(self) {
|
||||||
|
if let Ok(Some(cmd_buf)) = submitted.wait() {
|
||||||
|
*self = CmdBufState::Ready(cmd_buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if matches!(self, CmdBufState::Start) {
|
||||||
|
*self = CmdBufState::Ready(session.cmd_buf()?);
|
||||||
|
}
|
||||||
|
if let CmdBufState::Ready(cmd_buf) = self {
|
||||||
|
Ok(cmd_buf)
|
||||||
|
} else {
|
||||||
|
unreachable!()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn submit(
|
||||||
|
&mut self,
|
||||||
|
session: &Session,
|
||||||
|
wait_semaphores: &[&Semaphore],
|
||||||
|
signal_semaphores: &[&Semaphore],
|
||||||
|
) -> Result<(), Error> {
|
||||||
|
if let CmdBufState::Ready(cmd_buf) = std::mem::take(self) {
|
||||||
|
let submitted = session.run_cmd_buf(cmd_buf, wait_semaphores, signal_semaphores)?;
|
||||||
|
*self = CmdBufState::Submitted(submitted);
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err("Tried to submit CmdBufState not in ready state".into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn wait(&mut self) {
|
||||||
|
if matches!(self, CmdBufState::Submitted(_)) {
|
||||||
|
if let CmdBufState::Submitted(submitted) = std::mem::take(self) {
|
||||||
|
if let Ok(Some(cmd_buf)) = submitted.wait() {
|
||||||
|
*self = CmdBufState::Ready(cmd_buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TimingStats {
|
||||||
|
pub fn print_summary(&self) {
|
||||||
|
let ts = &self.coarse;
|
||||||
|
println!("Element time: {:.3}ms", ts[0] * 1e3);
|
||||||
|
println!("Clip + bin + tile time: {:.3}ms", (ts[2] - ts[1]) * 1e3);
|
||||||
|
println!("Coarse path time: {:.3}ms", (ts[4] - ts[2]) * 1e3);
|
||||||
|
println!("Backdrop time: {:.3}ms", (ts[6] - ts[5]) * 1e3);
|
||||||
|
println!("Coarse raster kernel time: {:.3}ms", (ts[8] - ts[7]) * 1e3);
|
||||||
|
println!("Fine kernel time: {:.3}ms", self.fine[0] * 1e3);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn short_summary(&self) -> String {
|
||||||
|
let ts = &self.coarse;
|
||||||
|
let el = ts[0] * 1e3;
|
||||||
|
let cl = (ts[2] - ts[1]) * 1e3;
|
||||||
|
let cp = (ts[4] - ts[3]) * 1e3;
|
||||||
|
let bd = (ts[6] - ts[5]) * 1e3;
|
||||||
|
let cr = (ts[8] - ts[7]) * 1e3;
|
||||||
|
let fr = self.fine[0] * 1e3;
|
||||||
|
let total = el + cl + cp + bd + cr + fr;
|
||||||
|
format!(
|
||||||
|
"{:.3}ms :: el:{:.3}ms|cl:{:.3}ms|cp:{:.3}ms|bd:{:.3}ms|cr:{:.3}ms|fr:{:.3}ms",
|
||||||
|
total, el, cl, cp, bd, cr, fr
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
|
@ -37,6 +37,7 @@ pub use transform::{
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
#[derive(Clone, Copy, Default, Debug, Zeroable, Pod)]
|
#[derive(Clone, Copy, Default, Debug, Zeroable, Pod)]
|
||||||
pub struct Config {
|
pub struct Config {
|
||||||
|
pub mem_size: u32,
|
||||||
pub n_elements: u32, // paths
|
pub n_elements: u32, // paths
|
||||||
pub n_pathseg: u32,
|
pub n_pathseg: u32,
|
||||||
pub width_in_tiles: u32,
|
pub width_in_tiles: u32,
|
||||||
|
@ -167,3 +168,17 @@ impl ElementStage {
|
||||||
.record(pass, &code.draw_code, &binding.draw_binding, n_drawobj);
|
.record(pass, &code.draw_code, &binding.draw_binding, n_drawobj);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl ElementBinding {
|
||||||
|
pub unsafe fn rebind_memory(&mut self, session: &Session, memory: &Buffer) {
|
||||||
|
self.transform_binding.rebind_memory(session, memory);
|
||||||
|
self.path_binding.rebind_memory(session, memory);
|
||||||
|
self.draw_binding.rebind_memory(session, memory);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn rebind_scene(&mut self, session: &Session, scene: &Buffer) {
|
||||||
|
self.transform_binding.rebind_scene(session, scene);
|
||||||
|
self.path_binding.rebind_scene(session, scene);
|
||||||
|
self.draw_binding.rebind_scene(session, scene);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -93,4 +93,9 @@ impl ClipBinding {
|
||||||
pass.memory_barrier();
|
pass.memory_barrier();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub unsafe fn rebind_memory(&mut self, session: &Session, memory: &Buffer) {
|
||||||
|
session.update_buffer_descriptor(&mut self.reduce_ds, 0, memory);
|
||||||
|
session.update_buffer_descriptor(&mut self.leaf_ds, 0, memory);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -163,3 +163,15 @@ impl DrawStage {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl DrawBinding {
|
||||||
|
pub unsafe fn rebind_memory(&mut self, session: &Session, memory: &Buffer) {
|
||||||
|
session.update_buffer_descriptor(&mut self.reduce_ds, 0, memory);
|
||||||
|
session.update_buffer_descriptor(&mut self.leaf_ds, 0, memory);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn rebind_scene(&mut self, session: &Session, scene: &Buffer) {
|
||||||
|
session.update_buffer_descriptor(&mut self.reduce_ds, 2, scene);
|
||||||
|
session.update_buffer_descriptor(&mut self.leaf_ds, 2, scene);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -200,6 +200,19 @@ impl PathStage {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl PathBinding {
|
||||||
|
pub unsafe fn rebind_memory(&mut self, session: &Session, memory: &Buffer) {
|
||||||
|
session.update_buffer_descriptor(&mut self.reduce_ds, 0, memory);
|
||||||
|
session.update_buffer_descriptor(&mut self.clear_ds, 0, memory);
|
||||||
|
session.update_buffer_descriptor(&mut self.path_ds, 0, memory);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn rebind_scene(&mut self, session: &Session, scene: &Buffer) {
|
||||||
|
session.update_buffer_descriptor(&mut self.reduce_ds, 2, scene);
|
||||||
|
session.update_buffer_descriptor(&mut self.path_ds, 2, scene);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct PathEncoder<'a> {
|
pub struct PathEncoder<'a> {
|
||||||
tag_stream: &'a mut Vec<u8>,
|
tag_stream: &'a mut Vec<u8>,
|
||||||
// If we're never going to use the i16 encoding, it might be
|
// If we're never going to use the i16 encoding, it might be
|
||||||
|
|
|
@ -166,6 +166,18 @@ impl TransformStage {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl TransformBinding {
|
||||||
|
pub unsafe fn rebind_memory(&mut self, session: &Session, memory: &Buffer) {
|
||||||
|
session.update_buffer_descriptor(&mut self.reduce_ds, 0, memory);
|
||||||
|
session.update_buffer_descriptor(&mut self.leaf_ds, 0, memory);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub unsafe fn rebind_scene(&mut self, session: &Session, scene: &Buffer) {
|
||||||
|
session.update_buffer_descriptor(&mut self.reduce_ds, 2, scene);
|
||||||
|
session.update_buffer_descriptor(&mut self.leaf_ds, 2, scene);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl Transform {
|
impl Transform {
|
||||||
pub const IDENTITY: Transform = Transform {
|
pub const IDENTITY: Transform = Transform {
|
||||||
mat: [1.0, 0.0, 0.0, 1.0],
|
mat: [1.0, 0.0, 0.0, 1.0],
|
||||||
|
|
Loading…
Add table
Reference in a new issue