diff --git a/Cargo.lock b/Cargo.lock index 737c033..78b6326 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -949,6 +949,14 @@ dependencies = [ "piet-gpu-derive", ] +[[package]] +name = "piet-scene" +version = "0.1.0" +dependencies = [ + "bytemuck", + "smallvec", +] + [[package]] name = "pkg-config" version = "0.3.22" @@ -1139,9 +1147,9 @@ checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" [[package]] name = "smallvec" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309" +checksum = "f2dd574626839106c320a323308629dcb1acfc96e32a8cba364ddc61ac23ee83" [[package]] name = "smithay-client-toolkit" diff --git a/Cargo.toml b/Cargo.toml index bfa0030..b94b82b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,5 +5,6 @@ members = [ "piet-gpu-derive", "piet-gpu-hal", "piet-gpu-types", + "piet-scene", "tests" ] diff --git a/piet-gpu-types/src/ptcl.rs b/piet-gpu-types/src/ptcl.rs index e8c29c3..14831ca 100644 --- a/piet-gpu-types/src/ptcl.rs +++ b/piet-gpu-types/src/ptcl.rs @@ -24,6 +24,14 @@ piet_gpu! { line_y: f32, line_c: f32, } + struct CmdRadGrad { + index: u32, + mat: [f32; 4], + xlat: [f32; 2], + c1: [f32; 2], + ra: f32, + roff: f32, + } struct CmdImage { index: u32, offset: [i16; 2], @@ -31,6 +39,9 @@ piet_gpu! { struct CmdAlpha { alpha: f32, } + struct CmdEndClip { + blend: u32, + } struct CmdJump { new_ref: u32, } @@ -42,9 +53,10 @@ piet_gpu! { Alpha(CmdAlpha), Color(CmdColor), LinGrad(CmdLinGrad), + RadGrad(CmdRadGrad), Image(CmdImage), BeginClip, - EndClip, + EndClip(CmdEndClip), Jump(CmdJump), } } diff --git a/piet-gpu/bin/cli.rs b/piet-gpu/bin/cli.rs index abe6ae1..70023af 100644 --- a/piet-gpu/bin/cli.rs +++ b/piet-gpu/bin/cli.rs @@ -6,7 +6,7 @@ use clap::{App, Arg}; use piet_gpu_hal::{BufferUsage, Error, Instance, InstanceFlags, Session}; -use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, Renderer}; +use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer}; const WIDTH: usize = 2048; const HEIGHT: usize = 1536; @@ -243,11 +243,7 @@ fn main() -> Result<(), Error> { if matches.is_present("flip") { scale = -scale; } - let xml_str = std::fs::read_to_string(input).unwrap(); - let start = std::time::Instant::now(); - let svg = PicoSvg::load(&xml_str, scale).unwrap(); - println!("parsing time: {:?}", start.elapsed()); - test_scenes::render_svg(&mut ctx, &svg); + test_scenes::render_svg(&mut ctx, input, scale); } else { test_scenes::render_scene(&mut ctx); } diff --git a/piet-gpu/bin/winit.rs b/piet-gpu/bin/winit.rs index b1db5e0..3ca0742 100644 --- a/piet-gpu/bin/winit.rs +++ b/piet-gpu/bin/winit.rs @@ -2,7 +2,7 @@ use piet::kurbo::Point; use piet::{RenderContext, Text, TextAttribute, TextLayoutBuilder}; use piet_gpu_hal::{CmdBuf, Error, ImageLayout, Instance, Session, SubmittedCmdBuf}; -use piet_gpu::{test_scenes, PicoSvg, PietGpuRenderContext, Renderer}; +use piet_gpu::{test_scenes, PietGpuRenderContext, Renderer}; use clap::{App, Arg}; @@ -29,25 +29,6 @@ fn main() -> Result<(), Error> { ) .get_matches(); - // Collect SVG if input - let svg = match matches.value_of("INPUT") { - Some(file) => { - let mut scale = matches - .value_of("scale") - .map(|scale| scale.parse().unwrap()) - .unwrap_or(8.0); - if matches.is_present("flip") { - scale = -scale; - } - let xml_str = std::fs::read_to_string(file).unwrap(); - let start = std::time::Instant::now(); - let svg = PicoSvg::load(&xml_str, scale).unwrap(); - println!("parsing time: {:?}", start.elapsed()); - Some(svg) - } - None => None, - }; - let event_loop = EventLoop::new(); let window = WindowBuilder::new() .with_inner_size(winit::dpi::LogicalSize { @@ -125,8 +106,15 @@ fn main() -> Result<(), Error> { } let mut ctx = PietGpuRenderContext::new(); - if let Some(svg) = &svg { - test_scenes::render_svg(&mut ctx, svg); + if let Some(input) = matches.value_of("INPUT") { + let mut scale = matches + .value_of("scale") + .map(|scale| scale.parse().unwrap()) + .unwrap_or(8.0); + if matches.is_present("flip") { + scale = -scale; + } + test_scenes::render_svg(&mut ctx, input, scale); } else { use piet_gpu::{Blend, BlendMode::*, CompositionMode::*}; let blends = [ diff --git a/piet-gpu/shader/coarse.comp b/piet-gpu/shader/coarse.comp index 454371c..adbedfd 100644 --- a/piet-gpu/shader/coarse.comp +++ b/piet-gpu/shader/coarse.comp @@ -229,6 +229,7 @@ void main() { case Drawtag_FillColor: case Drawtag_FillImage: case Drawtag_FillLinGradient: + case Drawtag_FillRadGradient: case Drawtag_BeginClip: case Drawtag_EndClip: uint drawmonoid_base = drawmonoid_start + 4 * element_ix; @@ -305,7 +306,7 @@ void main() { is_blend = (blend != BlendComp_default); } include_tile = tile.tile.offset != 0 || (tile.backdrop == 0) == is_clip - || is_blend; + || (is_clip && is_blend); } if (include_tile) { uint el_slice = el_ix / 32; @@ -373,6 +374,25 @@ void main() { Cmd_LinGrad_write(cmd_alloc, cmd_ref, cmd_lin); cmd_ref.offset += 4 + CmdLinGrad_size; break; + case Drawtag_FillRadGradient: + if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { + break; + } + linewidth = uintBitsToFloat(memory[di]); + write_fill(cmd_alloc, cmd_ref, tile, linewidth); + CmdRadGrad cmd_rad; + cmd_rad.index = scene[dd]; + // Given that this is basically a memcpy, we might consider + // letting the fine raster read the info itself. + cmd_rad.mat = uintBitsToFloat(uvec4(memory[di + 1], memory[di + 2], + memory[di + 3], memory[di + 4])); + cmd_rad.xlat = uintBitsToFloat(uvec2(memory[di + 5], memory[di + 6])); + cmd_rad.c1 = uintBitsToFloat(uvec2(memory[di + 7], memory[di + 8])); + cmd_rad.ra = uintBitsToFloat(memory[di + 9]); + cmd_rad.roff = uintBitsToFloat(memory[di + 10]); + Cmd_RadGrad_write(cmd_alloc, cmd_ref, cmd_rad); + cmd_ref.offset += 4 + CmdRadGrad_size; + break; case Drawtag_FillImage: linewidth = uintBitsToFloat(memory[di]); if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { diff --git a/piet-gpu/shader/draw_leaf.comp b/piet-gpu/shader/draw_leaf.comp index 1cee0ef..ef369c9 100644 --- a/piet-gpu/shader/draw_leaf.comp +++ b/piet-gpu/shader/draw_leaf.comp @@ -94,8 +94,8 @@ void main() { // pipeline. However, going forward we'll get rid of that, and have // later stages read scene + bbox etc. tag_word = scene[drawtag_base + ix + i]; - if (tag_word == Drawtag_FillColor || tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillImage || - tag_word == Drawtag_BeginClip) { + if (tag_word == Drawtag_FillColor || tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient || + tag_word == Drawtag_FillImage || tag_word == Drawtag_BeginClip) { uint bbox_offset = (conf.path_bbox_alloc.offset >> 2) + 6 * m.path_ix; float bbox_l = float(memory[bbox_offset]) - 32768.0; float bbox_t = float(memory[bbox_offset + 1]) - 32768.0; @@ -106,11 +106,11 @@ void main() { uint fill_mode = uint(linewidth >= 0.0); vec4 mat; vec2 translate; - if (linewidth >= 0.0 || tag_word == Drawtag_FillLinGradient) { + if (linewidth >= 0.0 || tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient) { uint trans_ix = memory[bbox_offset + 5]; uint t = (conf.trans_alloc.offset >> 2) + 6 * trans_ix; mat = uintBitsToFloat(uvec4(memory[t], memory[t + 1], memory[t + 2], memory[t + 3])); - if (tag_word == Drawtag_FillLinGradient) { + if (tag_word == Drawtag_FillLinGradient || tag_word == Drawtag_FillRadGradient) { translate = uintBitsToFloat(uvec2(memory[t + 4], memory[t + 5])); } } @@ -125,7 +125,6 @@ void main() { break; case Drawtag_FillLinGradient: memory[di] = floatBitsToUint(linewidth); - uint index = scene[dd]; vec2 p0 = uintBitsToFloat(uvec2(scene[dd + 1], scene[dd + 2])); vec2 p1 = uintBitsToFloat(uvec2(scene[dd + 3], scene[dd + 4])); p0 = mat.xy * p0.x + mat.zw * p0.y + translate; @@ -139,6 +138,33 @@ void main() { memory[di + 2] = floatBitsToUint(line_y); memory[di + 3] = floatBitsToUint(line_c); break; + case Drawtag_FillRadGradient: + p0 = uintBitsToFloat(uvec2(scene[dd + 1], scene[dd + 2])); + p1 = uintBitsToFloat(uvec2(scene[dd + 3], scene[dd + 4])); + float r0 = uintBitsToFloat(scene[dd + 5]); + float r1 = uintBitsToFloat(scene[dd + 6]); + float inv_det = 1.0 / (mat.x * mat.w - mat.y * mat.z); + vec4 inv_mat = inv_det * vec4(mat.w, -mat.y, -mat.z, mat.x); + vec2 inv_tr = inv_mat.xz * translate.x + inv_mat.yw * translate.y; + inv_tr += p0; + vec2 center1 = p1 - p0; + float rr = r1 / (r1 - r0); + float rainv = rr / (r1 * r1 - dot(center1, center1)); + vec2 c1 = center1 * rainv; + float ra = rr * rainv; + float roff = rr - 1.0; + memory[di] = floatBitsToUint(linewidth); + memory[di + 1] = floatBitsToUint(inv_mat.x); + memory[di + 2] = floatBitsToUint(inv_mat.y); + memory[di + 3] = floatBitsToUint(inv_mat.z); + memory[di + 4] = floatBitsToUint(inv_mat.w); + memory[di + 5] = floatBitsToUint(inv_tr.x); + memory[di + 6] = floatBitsToUint(inv_tr.y); + memory[di + 7] = floatBitsToUint(c1.x); + memory[di + 8] = floatBitsToUint(c1.y); + memory[di + 9] = floatBitsToUint(ra); + memory[di + 10] = floatBitsToUint(roff); + break; case Drawtag_BeginClip: break; } diff --git a/piet-gpu/shader/drawtag.h b/piet-gpu/shader/drawtag.h index 7f73546..1e35318 100644 --- a/piet-gpu/shader/drawtag.h +++ b/piet-gpu/shader/drawtag.h @@ -4,11 +4,12 @@ // Design of draw tag: & 0x1c gives scene size in bytes // & 1 gives clip -// (tag >> 4) & 0x1c is info size in bytes +// (tag >> 4) & 0x3c is info size in bytes #define Drawtag_Nop 0 #define Drawtag_FillColor 0x44 #define Drawtag_FillLinGradient 0x114 +#define Drawtag_FillRadGradient 0x2dc #define Drawtag_FillImage 0x48 #define Drawtag_BeginClip 0x05 #define Drawtag_EndClip 0x25 @@ -36,5 +37,5 @@ DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) { DrawMonoid map_tag(uint tag_word) { // TODO: at some point, EndClip should not generate a path uint has_path = uint(tag_word != Drawtag_Nop); - return DrawMonoid(has_path, tag_word & 1, tag_word & 0x1c, (tag_word >> 4) & 0x1c); + return DrawMonoid(has_path, tag_word & 1, tag_word & 0x1c, (tag_word >> 4) & 0x3c); } diff --git a/piet-gpu/shader/gen/binning.dxil b/piet-gpu/shader/gen/binning.dxil index 3050aa8..4a4f073 100644 Binary files a/piet-gpu/shader/gen/binning.dxil and b/piet-gpu/shader/gen/binning.dxil differ diff --git a/piet-gpu/shader/gen/coarse.dxil b/piet-gpu/shader/gen/coarse.dxil index 12e88dd..879b7c8 100644 Binary files a/piet-gpu/shader/gen/coarse.dxil and b/piet-gpu/shader/gen/coarse.dxil differ diff --git a/piet-gpu/shader/gen/coarse.hlsl b/piet-gpu/shader/gen/coarse.hlsl index a702df5..1e610ec 100644 --- a/piet-gpu/shader/gen/coarse.hlsl +++ b/piet-gpu/shader/gen/coarse.hlsl @@ -91,6 +91,21 @@ struct CmdLinGrad float line_c; }; +struct CmdRadGradRef +{ + uint offset; +}; + +struct CmdRadGrad +{ + uint index; + float4 mat; + float2 xlat; + float2 c1; + float ra; + float roff; +}; + struct CmdImageRef { uint offset; @@ -160,9 +175,9 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); -RWByteAddressBuffer _242 : register(u0, space0); -ByteAddressBuffer _854 : register(t1, space0); -ByteAddressBuffer _1222 : register(t2, space0); +RWByteAddressBuffer _260 : register(u0, space0); +ByteAddressBuffer _1005 : register(t1, space0); +ByteAddressBuffer _1372 : register(t2, space0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -185,8 +200,8 @@ groupshared uint sh_tile_count[256]; Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _319 = { a.offset + offset }; - return _319; + Alloc _337 = { a.offset + offset }; + return _337; } bool touch_mem(Alloc alloc, uint offset) @@ -202,7 +217,7 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _242.Load(offset * 4 + 8); + uint v = _260.Load(offset * 4 + 8); return v; } @@ -215,8 +230,8 @@ Alloc new_alloc(uint offset, uint size, bool mem_ok) BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) { - BinInstanceRef _328 = { ref.offset + (index * 4u) }; - return _328; + BinInstanceRef _346 = { ref.offset + (index * 4u) }; + return _346; } BinInstance BinInstance_read(Alloc a, BinInstanceRef ref) @@ -244,8 +259,8 @@ Path Path_read(Alloc a, PathRef ref) uint raw2 = read_mem(param_4, param_5); Path s; s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); - TileRef _391 = { raw2 }; - s.tiles = _391; + TileRef _409 = { raw2 }; + s.tiles = _409; return s; } @@ -255,11 +270,11 @@ void write_tile_alloc(uint el_ix, Alloc a) Alloc read_tile_alloc(uint el_ix, bool mem_ok) { - uint _741; - _242.GetDimensions(_741); - _741 = (_741 - 8) / 4; + uint _892; + _260.GetDimensions(_892); + _892 = (_892 - 8) / 4; uint param = 0u; - uint param_1 = uint(int(_741) * 4); + uint param_1 = uint(int(_892) * 4); bool param_2 = mem_ok; return new_alloc(param, param_1, param_2); } @@ -273,31 +288,31 @@ Tile Tile_read(Alloc a, TileRef ref) Alloc param_2 = a; uint param_3 = ix + 1u; uint raw1 = read_mem(param_2, param_3); - TileSegRef _416 = { raw0 }; + TileSegRef _434 = { raw0 }; Tile s; - s.tile = _416; + s.tile = _434; s.backdrop = int(raw1); return s; } MallocResult malloc(uint size) { - uint _248; - _242.InterlockedAdd(0, size, _248); - uint offset = _248; - uint _255; - _242.GetDimensions(_255); - _255 = (_255 - 8) / 4; + uint _266; + _260.InterlockedAdd(0, size, _266); + uint offset = _266; + uint _273; + _260.GetDimensions(_273); + _273 = (_273 - 8) / 4; MallocResult r; - r.failed = (offset + size) > uint(int(_255) * 4); + r.failed = (offset + size) > uint(int(_273) * 4); uint param = offset; uint param_1 = size; bool param_2 = !r.failed; r.alloc = new_alloc(param, param_1, param_2); if (r.failed) { - uint _277; - _242.InterlockedMax(4, 1u, _277); + uint _295; + _260.InterlockedMax(4, 1u, _295); return r; } return r; @@ -311,7 +326,7 @@ void write_mem(Alloc alloc, uint offset, uint val) { return; } - _242.Store(offset * 4 + 8, val); + _260.Store(offset * 4 + 8, val); } void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s) @@ -327,11 +342,11 @@ void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint param_2 = 10u; + uint param_2 = 11u; write_mem(param, param_1, param_2); - CmdJumpRef _734 = { ref.offset + 4u }; + CmdJumpRef _885 = { ref.offset + 4u }; Alloc param_3 = a; - CmdJumpRef param_4 = _734; + CmdJumpRef param_4 = _885; CmdJump param_5 = s; CmdJump_write(param_3, param_4, param_5); } @@ -343,22 +358,22 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit return true; } uint param = 1024u; - MallocResult _762 = malloc(param); - MallocResult new_cmd = _762; + MallocResult _913 = malloc(param); + MallocResult new_cmd = _913; if (new_cmd.failed) { return false; } - CmdJump _772 = { new_cmd.alloc.offset }; - CmdJump jump = _772; + CmdJump _923 = { new_cmd.alloc.offset }; + CmdJump jump = _923; Alloc param_1 = cmd_alloc; CmdRef param_2 = cmd_ref; CmdJump param_3 = jump; Cmd_Jump_write(param_1, param_2, param_3); cmd_alloc = new_cmd.alloc; - CmdRef _784 = { cmd_alloc.offset }; - cmd_ref = _784; - cmd_limit = (cmd_alloc.offset + 1024u) - 60u; + CmdRef _935 = { cmd_alloc.offset }; + cmd_ref = _935; + cmd_limit = (cmd_alloc.offset + 1024u) - 144u; return true; } @@ -381,9 +396,9 @@ void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s) uint param_1 = ref.offset >> uint(2); uint param_2 = 1u; write_mem(param, param_1, param_2); - CmdFillRef _604 = { ref.offset + 4u }; + CmdFillRef _742 = { ref.offset + 4u }; Alloc param_3 = a; - CmdFillRef param_4 = _604; + CmdFillRef param_4 = _742; CmdFill param_5 = s; CmdFill_write(param_3, param_4, param_5); } @@ -415,9 +430,9 @@ void Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s) uint param_1 = ref.offset >> uint(2); uint param_2 = 2u; write_mem(param, param_1, param_2); - CmdStrokeRef _622 = { ref.offset + 4u }; + CmdStrokeRef _760 = { ref.offset + 4u }; Alloc param_3 = a; - CmdStrokeRef param_4 = _622; + CmdStrokeRef param_4 = _760; CmdStroke param_5 = s; CmdStroke_write(param_3, param_4, param_5); } @@ -428,8 +443,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, Tile tile, float linewidth) { if (tile.tile.offset != 0u) { - CmdFill _807 = { tile.tile.offset, tile.backdrop }; - CmdFill cmd_fill = _807; + CmdFill _958 = { tile.tile.offset, tile.backdrop }; + CmdFill cmd_fill = _958; Alloc param = alloc; CmdRef param_1 = cmd_ref; CmdFill param_2 = cmd_fill; @@ -446,8 +461,8 @@ void write_fill(Alloc alloc, inout CmdRef cmd_ref, Tile tile, float linewidth) } else { - CmdStroke _837 = { tile.tile.offset, 0.5f * linewidth }; - CmdStroke cmd_stroke = _837; + CmdStroke _988 = { tile.tile.offset, 0.5f * linewidth }; + CmdStroke cmd_stroke = _988; Alloc param_5 = alloc; CmdRef param_6 = cmd_ref; CmdStroke param_7 = cmd_stroke; @@ -471,9 +486,9 @@ void Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s) uint param_1 = ref.offset >> uint(2); uint param_2 = 5u; write_mem(param, param_1, param_2); - CmdColorRef _649 = { ref.offset + 4u }; + CmdColorRef _786 = { ref.offset + 4u }; Alloc param_3 = a; - CmdColorRef param_4 = _649; + CmdColorRef param_4 = _786; CmdColor param_5 = s; CmdColor_write(param_3, param_4, param_5); } @@ -505,13 +520,75 @@ void Cmd_LinGrad_write(Alloc a, CmdRef ref, CmdLinGrad s) uint param_1 = ref.offset >> uint(2); uint param_2 = 6u; write_mem(param, param_1, param_2); - CmdLinGradRef _668 = { ref.offset + 4u }; + CmdLinGradRef _804 = { ref.offset + 4u }; Alloc param_3 = a; - CmdLinGradRef param_4 = _668; + CmdLinGradRef param_4 = _804; CmdLinGrad param_5 = s; CmdLinGrad_write(param_3, param_4, param_5); } +void CmdRadGrad_write(Alloc a, CmdRadGradRef ref, CmdRadGrad s) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint param_2 = s.index; + write_mem(param, param_1, param_2); + Alloc param_3 = a; + uint param_4 = ix + 1u; + uint param_5 = asuint(s.mat.x); + write_mem(param_3, param_4, param_5); + Alloc param_6 = a; + uint param_7 = ix + 2u; + uint param_8 = asuint(s.mat.y); + write_mem(param_6, param_7, param_8); + Alloc param_9 = a; + uint param_10 = ix + 3u; + uint param_11 = asuint(s.mat.z); + write_mem(param_9, param_10, param_11); + Alloc param_12 = a; + uint param_13 = ix + 4u; + uint param_14 = asuint(s.mat.w); + write_mem(param_12, param_13, param_14); + Alloc param_15 = a; + uint param_16 = ix + 5u; + uint param_17 = asuint(s.xlat.x); + write_mem(param_15, param_16, param_17); + Alloc param_18 = a; + uint param_19 = ix + 6u; + uint param_20 = asuint(s.xlat.y); + write_mem(param_18, param_19, param_20); + Alloc param_21 = a; + uint param_22 = ix + 7u; + uint param_23 = asuint(s.c1.x); + write_mem(param_21, param_22, param_23); + Alloc param_24 = a; + uint param_25 = ix + 8u; + uint param_26 = asuint(s.c1.y); + write_mem(param_24, param_25, param_26); + Alloc param_27 = a; + uint param_28 = ix + 9u; + uint param_29 = asuint(s.ra); + write_mem(param_27, param_28, param_29); + Alloc param_30 = a; + uint param_31 = ix + 10u; + uint param_32 = asuint(s.roff); + write_mem(param_30, param_31, param_32); +} + +void Cmd_RadGrad_write(Alloc a, CmdRef ref, CmdRadGrad s) +{ + Alloc param = a; + uint param_1 = ref.offset >> uint(2); + uint param_2 = 7u; + write_mem(param, param_1, param_2); + CmdRadGradRef _822 = { ref.offset + 4u }; + Alloc param_3 = a; + CmdRadGradRef param_4 = _822; + CmdRadGrad param_5 = s; + CmdRadGrad_write(param_3, param_4, param_5); +} + void CmdImage_write(Alloc a, CmdImageRef ref, CmdImage s) { uint ix = ref.offset >> uint(2); @@ -529,11 +606,11 @@ void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint param_2 = 7u; + uint param_2 = 8u; write_mem(param, param_1, param_2); - CmdImageRef _687 = { ref.offset + 4u }; + CmdImageRef _840 = { ref.offset + 4u }; Alloc param_3 = a; - CmdImageRef param_4 = _687; + CmdImageRef param_4 = _840; CmdImage param_5 = s; CmdImage_write(param_3, param_4, param_5); } @@ -542,7 +619,7 @@ void Cmd_BeginClip_write(Alloc a, CmdRef ref) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint param_2 = 8u; + uint param_2 = 9u; write_mem(param, param_1, param_2); } @@ -559,11 +636,11 @@ void Cmd_EndClip_write(Alloc a, CmdRef ref, CmdEndClip s) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint param_2 = 9u; + uint param_2 = 10u; write_mem(param, param_1, param_2); - CmdEndClipRef _715 = { ref.offset + 4u }; + CmdEndClipRef _866 = { ref.offset + 4u }; Alloc param_3 = a; - CmdEndClipRef param_4 = _715; + CmdEndClipRef param_4 = _866; CmdEndClip param_5 = s; CmdEndClip_write(param_3, param_4, param_5); } @@ -578,80 +655,81 @@ void Cmd_End_write(Alloc a, CmdRef ref) void comp_main() { - uint width_in_bins = ((_854.Load(8) + 16u) - 1u) / 16u; + uint width_in_bins = ((_1005.Load(8) + 16u) - 1u) / 16u; uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; uint partition_ix = 0u; - uint n_partitions = ((_854.Load(0) + 256u) - 1u) / 256u; + uint n_partitions = ((_1005.Load(0) + 256u) - 1u) / 256u; uint th_ix = gl_LocalInvocationID.x; uint bin_tile_x = 16u * gl_WorkGroupID.x; uint bin_tile_y = 16u * gl_WorkGroupID.y; uint tile_x = gl_LocalInvocationID.x % 16u; uint tile_y = gl_LocalInvocationID.x / 16u; - uint this_tile_ix = (((bin_tile_y + tile_y) * _854.Load(8)) + bin_tile_x) + tile_x; - Alloc _919; - _919.offset = _854.Load(24); + uint this_tile_ix = (((bin_tile_y + tile_y) * _1005.Load(8)) + bin_tile_x) + tile_x; + Alloc _1070; + _1070.offset = _1005.Load(24); Alloc param; - param.offset = _919.offset; + param.offset = _1070.offset; uint param_1 = this_tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _928 = { cmd_alloc.offset }; - CmdRef cmd_ref = _928; - uint cmd_limit = (cmd_ref.offset + 1024u) - 60u; + CmdRef _1079 = { cmd_alloc.offset }; + CmdRef cmd_ref = _1079; + uint cmd_limit = (cmd_ref.offset + 1024u) - 144u; uint clip_depth = 0u; uint clip_zero_depth = 0u; uint rd_ix = 0u; uint wr_ix = 0u; uint part_start_ix = 0u; uint ready_ix = 0u; - uint drawmonoid_start = _854.Load(44) >> uint(2); - uint drawtag_start = _854.Load(100) >> uint(2); - uint drawdata_start = _854.Load(104) >> uint(2); - uint drawinfo_start = _854.Load(68) >> uint(2); - bool mem_ok = _242.Load(4) == 0u; + uint drawmonoid_start = _1005.Load(44) >> uint(2); + uint drawtag_start = _1005.Load(100) >> uint(2); + uint drawdata_start = _1005.Load(104) >> uint(2); + uint drawinfo_start = _1005.Load(68) >> uint(2); + bool mem_ok = _260.Load(4) == 0u; Alloc param_3; Alloc param_5; - uint _1154; + uint _1304; uint element_ix; Alloc param_14; uint tile_count; - uint _1455; + uint _1605; float linewidth; CmdLinGrad cmd_lin; + CmdRadGrad cmd_rad; while (true) { for (uint i = 0u; i < 8u; i++) { sh_bitmaps[i][th_ix] = 0u; } - bool _1206; + bool _1356; for (;;) { if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) { part_start_ix = ready_ix; uint count = 0u; - bool _1003 = th_ix < 256u; - bool _1011; - if (_1003) + bool _1154 = th_ix < 256u; + bool _1162; + if (_1154) { - _1011 = (partition_ix + th_ix) < n_partitions; + _1162 = (partition_ix + th_ix) < n_partitions; } else { - _1011 = _1003; + _1162 = _1154; } - if (_1011) + if (_1162) { - uint in_ix = (_854.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); - Alloc _1029; - _1029.offset = _854.Load(20); - param_3.offset = _1029.offset; + uint in_ix = (_1005.Load(20) >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); + Alloc _1179; + _1179.offset = _1005.Load(20); + param_3.offset = _1179.offset; uint param_4 = in_ix; count = read_mem(param_3, param_4); - Alloc _1040; - _1040.offset = _854.Load(20); - param_5.offset = _1040.offset; + Alloc _1190; + _1190.offset = _1005.Load(20); + param_5.offset = _1190.offset; uint param_6 = in_ix + 1u; uint offset = read_mem(param_5, param_6); uint param_7 = offset; @@ -697,16 +775,16 @@ void comp_main() } if (part_ix > 0u) { - _1154 = sh_part_count[part_ix - 1u]; + _1304 = sh_part_count[part_ix - 1u]; } else { - _1154 = part_start_ix; + _1304 = part_start_ix; } - ix -= _1154; + ix -= _1304; Alloc bin_alloc = sh_part_elements[part_ix]; - BinInstanceRef _1173 = { bin_alloc.offset }; - BinInstanceRef inst_ref = _1173; + BinInstanceRef _1323 = { bin_alloc.offset }; + BinInstanceRef inst_ref = _1323; BinInstanceRef param_10 = inst_ref; uint param_11 = ix; Alloc param_12 = bin_alloc; @@ -716,16 +794,16 @@ void comp_main() } GroupMemoryBarrierWithGroupSync(); wr_ix = min((rd_ix + 256u), ready_ix); - bool _1196 = (wr_ix - rd_ix) < 256u; - if (_1196) + bool _1346 = (wr_ix - rd_ix) < 256u; + if (_1346) { - _1206 = (wr_ix < ready_ix) || (partition_ix < n_partitions); + _1356 = (wr_ix < ready_ix) || (partition_ix < n_partitions); } else { - _1206 = _1196; + _1356 = _1346; } - if (_1206) + if (_1356) { continue; } @@ -738,23 +816,24 @@ void comp_main() if ((th_ix + rd_ix) < wr_ix) { element_ix = sh_elements[th_ix]; - tag = _1222.Load((drawtag_start + element_ix) * 4 + 0); + tag = _1372.Load((drawtag_start + element_ix) * 4 + 0); } switch (tag) { case 68u: case 72u: case 276u: + case 732u: case 5u: case 37u: { uint drawmonoid_base = drawmonoid_start + (4u * element_ix); - uint path_ix = _242.Load(drawmonoid_base * 4 + 8); - PathRef _1247 = { _854.Load(16) + (path_ix * 12u) }; - Alloc _1250; - _1250.offset = _854.Load(16); - param_14.offset = _1250.offset; - PathRef param_15 = _1247; + uint path_ix = _260.Load(drawmonoid_base * 4 + 8); + PathRef _1397 = { _1005.Load(16) + (path_ix * 12u) }; + Alloc _1400; + _1400.offset = _1005.Load(16); + param_14.offset = _1400.offset; + PathRef param_15 = _1397; Path path = Path_read(param_14, param_15); uint stride = path.bbox.z - path.bbox.x; sh_tile_stride[th_ix] = stride; @@ -810,16 +889,16 @@ void comp_main() } } uint element_ix_1 = sh_elements[el_ix]; - uint tag_1 = _1222.Load((drawtag_start + element_ix_1) * 4 + 0); + uint tag_1 = _1372.Load((drawtag_start + element_ix_1) * 4 + 0); if (el_ix > 0u) { - _1455 = sh_tile_count[el_ix - 1u]; + _1605 = sh_tile_count[el_ix - 1u]; } else { - _1455 = 0u; + _1605 = 0u; } - uint seq_ix = ix_1 - _1455; + uint seq_ix = ix_1 - _1605; uint width = sh_tile_width[el_ix]; uint x = sh_tile_x0[el_ix] + (seq_ix % width); uint y = sh_tile_y0[el_ix] + (seq_ix / width); @@ -828,38 +907,47 @@ void comp_main() { uint param_21 = el_ix; bool param_22 = mem_ok; - TileRef _1507 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; + TileRef _1657 = { sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; Alloc param_23 = read_tile_alloc(param_21, param_22); - TileRef param_24 = _1507; + TileRef param_24 = _1657; Tile tile = Tile_read(param_23, param_24); bool is_clip = (tag_1 & 1u) != 0u; bool is_blend = false; if (is_clip) { uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1); - uint scene_offset = _242.Load((drawmonoid_base_1 + 2u) * 4 + 8); + uint scene_offset = _260.Load((drawmonoid_base_1 + 2u) * 4 + 8); uint dd = drawdata_start + (scene_offset >> uint(2)); - uint blend = _1222.Load(dd * 4 + 0); + uint blend = _1372.Load(dd * 4 + 0); is_blend = blend != 3u; } - bool _1542 = tile.tile.offset != 0u; - bool _1551; - if (!_1542) + bool _1692 = tile.tile.offset != 0u; + bool _1701; + if (!_1692) { - _1551 = (tile.backdrop == 0) == is_clip; + _1701 = (tile.backdrop == 0) == is_clip; } else { - _1551 = _1542; + _1701 = _1692; } - include_tile = _1551 || is_blend; + bool _1708; + if (!_1701) + { + _1708 = is_clip && is_blend; + } + else + { + _1708 = _1701; + } + include_tile = _1708; } if (include_tile) { uint el_slice = el_ix / 32u; uint el_mask = 1u << (el_ix & 31u); - uint _1573; - InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1573); + uint _1728; + InterlockedOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask, _1728); } } GroupMemoryBarrierWithGroupSync(); @@ -883,33 +971,33 @@ void comp_main() uint element_ref_ix = (slice_ix * 32u) + uint(int(firstbitlow(bitmap))); uint element_ix_2 = sh_elements[element_ref_ix]; bitmap &= (bitmap - 1u); - uint drawtag = _1222.Load((drawtag_start + element_ix_2) * 4 + 0); + uint drawtag = _1372.Load((drawtag_start + element_ix_2) * 4 + 0); if (clip_zero_depth == 0u) { uint param_25 = element_ref_ix; bool param_26 = mem_ok; - TileRef _1650 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; + TileRef _1805 = { sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; Alloc param_27 = read_tile_alloc(param_25, param_26); - TileRef param_28 = _1650; + TileRef param_28 = _1805; Tile tile_1 = Tile_read(param_27, param_28); uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2); - uint scene_offset_1 = _242.Load((drawmonoid_base_2 + 2u) * 4 + 8); - uint info_offset = _242.Load((drawmonoid_base_2 + 3u) * 4 + 8); + uint scene_offset_1 = _260.Load((drawmonoid_base_2 + 2u) * 4 + 8); + uint info_offset = _260.Load((drawmonoid_base_2 + 3u) * 4 + 8); uint dd_1 = drawdata_start + (scene_offset_1 >> uint(2)); uint di = drawinfo_start + (info_offset >> uint(2)); switch (drawtag) { case 68u: { - linewidth = asfloat(_242.Load(di * 4 + 8)); + linewidth = asfloat(_260.Load(di * 4 + 8)); Alloc param_29 = cmd_alloc; CmdRef param_30 = cmd_ref; uint param_31 = cmd_limit; - bool _1697 = alloc_cmd(param_29, param_30, param_31); + bool _1853 = alloc_cmd(param_29, param_30, param_31); cmd_alloc = param_29; cmd_ref = param_30; cmd_limit = param_31; - if (!_1697) + if (!_1853) { break; } @@ -919,11 +1007,11 @@ void comp_main() float param_35 = linewidth; write_fill(param_32, param_33, param_34, param_35); cmd_ref = param_33; - uint rgba = _1222.Load(dd_1 * 4 + 0); - CmdColor _1720 = { rgba }; + uint rgba = _1372.Load(dd_1 * 4 + 0); + CmdColor _1876 = { rgba }; Alloc param_36 = cmd_alloc; CmdRef param_37 = cmd_ref; - CmdColor param_38 = _1720; + CmdColor param_38 = _1876; Cmd_Color_write(param_36, param_37, param_38); cmd_ref.offset += 8u; break; @@ -933,25 +1021,25 @@ void comp_main() Alloc param_39 = cmd_alloc; CmdRef param_40 = cmd_ref; uint param_41 = cmd_limit; - bool _1738 = alloc_cmd(param_39, param_40, param_41); + bool _1894 = alloc_cmd(param_39, param_40, param_41); cmd_alloc = param_39; cmd_ref = param_40; cmd_limit = param_41; - if (!_1738) + if (!_1894) { break; } - linewidth = asfloat(_242.Load(di * 4 + 8)); + linewidth = asfloat(_260.Load(di * 4 + 8)); Alloc param_42 = cmd_alloc; CmdRef param_43 = cmd_ref; Tile param_44 = tile_1; float param_45 = linewidth; write_fill(param_42, param_43, param_44, param_45); cmd_ref = param_43; - cmd_lin.index = _1222.Load(dd_1 * 4 + 0); - cmd_lin.line_x = asfloat(_242.Load((di + 1u) * 4 + 8)); - cmd_lin.line_y = asfloat(_242.Load((di + 2u) * 4 + 8)); - cmd_lin.line_c = asfloat(_242.Load((di + 3u) * 4 + 8)); + cmd_lin.index = _1372.Load(dd_1 * 4 + 0); + cmd_lin.line_x = asfloat(_260.Load((di + 1u) * 4 + 8)); + cmd_lin.line_y = asfloat(_260.Load((di + 2u) * 4 + 8)); + cmd_lin.line_c = asfloat(_260.Load((di + 3u) * 4 + 8)); Alloc param_46 = cmd_alloc; CmdRef param_47 = cmd_ref; CmdLinGrad param_48 = cmd_lin; @@ -959,69 +1047,102 @@ void comp_main() cmd_ref.offset += 20u; break; } - case 72u: + case 732u: { - linewidth = asfloat(_242.Load(di * 4 + 8)); Alloc param_49 = cmd_alloc; CmdRef param_50 = cmd_ref; uint param_51 = cmd_limit; - bool _1806 = alloc_cmd(param_49, param_50, param_51); + bool _1958 = alloc_cmd(param_49, param_50, param_51); cmd_alloc = param_49; cmd_ref = param_50; cmd_limit = param_51; - if (!_1806) + if (!_1958) { break; } + linewidth = asfloat(_260.Load(di * 4 + 8)); Alloc param_52 = cmd_alloc; CmdRef param_53 = cmd_ref; Tile param_54 = tile_1; float param_55 = linewidth; write_fill(param_52, param_53, param_54, param_55); cmd_ref = param_53; - uint index = _1222.Load(dd_1 * 4 + 0); - uint raw1 = _1222.Load((dd_1 + 1u) * 4 + 0); - int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); - CmdImage _1845 = { index, offset_1 }; + cmd_rad.index = _1372.Load(dd_1 * 4 + 0); + cmd_rad.mat = asfloat(uint4(_260.Load((di + 1u) * 4 + 8), _260.Load((di + 2u) * 4 + 8), _260.Load((di + 3u) * 4 + 8), _260.Load((di + 4u) * 4 + 8))); + cmd_rad.xlat = asfloat(uint2(_260.Load((di + 5u) * 4 + 8), _260.Load((di + 6u) * 4 + 8))); + cmd_rad.c1 = asfloat(uint2(_260.Load((di + 7u) * 4 + 8), _260.Load((di + 8u) * 4 + 8))); + cmd_rad.ra = asfloat(_260.Load((di + 9u) * 4 + 8)); + cmd_rad.roff = asfloat(_260.Load((di + 10u) * 4 + 8)); Alloc param_56 = cmd_alloc; CmdRef param_57 = cmd_ref; - CmdImage param_58 = _1845; - Cmd_Image_write(param_56, param_57, param_58); + CmdRadGrad param_58 = cmd_rad; + Cmd_RadGrad_write(param_56, param_57, param_58); + cmd_ref.offset += 48u; + break; + } + case 72u: + { + linewidth = asfloat(_260.Load(di * 4 + 8)); + Alloc param_59 = cmd_alloc; + CmdRef param_60 = cmd_ref; + uint param_61 = cmd_limit; + bool _2064 = alloc_cmd(param_59, param_60, param_61); + cmd_alloc = param_59; + cmd_ref = param_60; + cmd_limit = param_61; + if (!_2064) + { + break; + } + Alloc param_62 = cmd_alloc; + CmdRef param_63 = cmd_ref; + Tile param_64 = tile_1; + float param_65 = linewidth; + write_fill(param_62, param_63, param_64, param_65); + cmd_ref = param_63; + uint index = _1372.Load(dd_1 * 4 + 0); + uint raw1 = _1372.Load((dd_1 + 1u) * 4 + 0); + int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); + CmdImage _2103 = { index, offset_1 }; + Alloc param_66 = cmd_alloc; + CmdRef param_67 = cmd_ref; + CmdImage param_68 = _2103; + Cmd_Image_write(param_66, param_67, param_68); cmd_ref.offset += 12u; break; } case 5u: { - bool _1859 = tile_1.tile.offset == 0u; - bool _1865; - if (_1859) + bool _2117 = tile_1.tile.offset == 0u; + bool _2123; + if (_2117) { - _1865 = tile_1.backdrop == 0; + _2123 = tile_1.backdrop == 0; } else { - _1865 = _1859; + _2123 = _2117; } - if (_1865) + if (_2123) { clip_zero_depth = clip_depth + 1u; } else { - Alloc param_59 = cmd_alloc; - CmdRef param_60 = cmd_ref; - uint param_61 = cmd_limit; - bool _1877 = alloc_cmd(param_59, param_60, param_61); - cmd_alloc = param_59; - cmd_ref = param_60; - cmd_limit = param_61; - if (!_1877) + Alloc param_69 = cmd_alloc; + CmdRef param_70 = cmd_ref; + uint param_71 = cmd_limit; + bool _2135 = alloc_cmd(param_69, param_70, param_71); + cmd_alloc = param_69; + cmd_ref = param_70; + cmd_limit = param_71; + if (!_2135) { break; } - Alloc param_62 = cmd_alloc; - CmdRef param_63 = cmd_ref; - Cmd_BeginClip_write(param_62, param_63); + Alloc param_72 = cmd_alloc; + CmdRef param_73 = cmd_ref; + Cmd_BeginClip_write(param_72, param_73); cmd_ref.offset += 4u; } clip_depth++; @@ -1030,29 +1151,29 @@ void comp_main() case 37u: { clip_depth--; - Alloc param_64 = cmd_alloc; - CmdRef param_65 = cmd_ref; - uint param_66 = cmd_limit; - bool _1905 = alloc_cmd(param_64, param_65, param_66); - cmd_alloc = param_64; - cmd_ref = param_65; - cmd_limit = param_66; - if (!_1905) + Alloc param_74 = cmd_alloc; + CmdRef param_75 = cmd_ref; + uint param_76 = cmd_limit; + bool _2163 = alloc_cmd(param_74, param_75, param_76); + cmd_alloc = param_74; + cmd_ref = param_75; + cmd_limit = param_76; + if (!_2163) { break; } - Alloc param_67 = cmd_alloc; - CmdRef param_68 = cmd_ref; - Tile param_69 = tile_1; - float param_70 = -1.0f; - write_fill(param_67, param_68, param_69, param_70); - cmd_ref = param_68; - uint blend_1 = _1222.Load(dd_1 * 4 + 0); - CmdEndClip _1928 = { blend_1 }; - Alloc param_71 = cmd_alloc; - CmdRef param_72 = cmd_ref; - CmdEndClip param_73 = _1928; - Cmd_EndClip_write(param_71, param_72, param_73); + Alloc param_77 = cmd_alloc; + CmdRef param_78 = cmd_ref; + Tile param_79 = tile_1; + float param_80 = -1.0f; + write_fill(param_77, param_78, param_79, param_80); + cmd_ref = param_78; + uint blend_1 = _1372.Load(dd_1 * 4 + 0); + CmdEndClip _2186 = { blend_1 }; + Alloc param_81 = cmd_alloc; + CmdRef param_82 = cmd_ref; + CmdEndClip param_83 = _2186; + Cmd_EndClip_write(param_81, param_82, param_83); cmd_ref.offset += 8u; break; } @@ -1086,21 +1207,21 @@ void comp_main() break; } } - bool _1975 = (bin_tile_x + tile_x) < _854.Load(8); - bool _1984; - if (_1975) + bool _2233 = (bin_tile_x + tile_x) < _1005.Load(8); + bool _2242; + if (_2233) { - _1984 = (bin_tile_y + tile_y) < _854.Load(12); + _2242 = (bin_tile_y + tile_y) < _1005.Load(12); } else { - _1984 = _1975; + _2242 = _2233; } - if (_1984) + if (_2242) { - Alloc param_74 = cmd_alloc; - CmdRef param_75 = cmd_ref; - Cmd_End_write(param_74, param_75); + Alloc param_84 = cmd_alloc; + CmdRef param_85 = cmd_ref; + Cmd_End_write(param_84, param_85); } } diff --git a/piet-gpu/shader/gen/coarse.msl b/piet-gpu/shader/gen/coarse.msl index 4226352..abd636b 100644 --- a/piet-gpu/shader/gen/coarse.msl +++ b/piet-gpu/shader/gen/coarse.msl @@ -107,6 +107,21 @@ struct CmdLinGrad float line_c; }; +struct CmdRadGradRef +{ + uint offset; +}; + +struct CmdRadGrad +{ + uint index; + float4 mat; + float2 xlat; + float2 c1; + float ra; + float roff; +}; + struct CmdImageRef { uint offset; @@ -211,7 +226,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_242, constant uint& v_242BufferSize) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = alloc; uint param_1 = offset; @@ -219,7 +234,7 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_242.memory[offset]; + uint v = v_260.memory[offset]; return v; } @@ -238,30 +253,30 @@ BinInstanceRef BinInstance_index(thread const BinInstanceRef& ref, thread const } static inline __attribute__((always_inline)) -BinInstance BinInstance_read(thread const Alloc& a, thread const BinInstanceRef& ref, device Memory& v_242, constant uint& v_242BufferSize) +BinInstance BinInstance_read(thread const Alloc& a, thread const BinInstanceRef& ref, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_242, v_242BufferSize); + uint raw0 = read_mem(param, param_1, v_260, v_260BufferSize); BinInstance s; s.element_ix = raw0; return s; } static inline __attribute__((always_inline)) -Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_242, constant uint& v_242BufferSize) +Path Path_read(thread const Alloc& a, thread const PathRef& ref, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_242, v_242BufferSize); + uint raw0 = read_mem(param, param_1, v_260, v_260BufferSize); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_242, v_242BufferSize); + uint raw1 = read_mem(param_2, param_3, v_260, v_260BufferSize); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_242, v_242BufferSize); + uint raw2 = read_mem(param_4, param_5, v_260, v_260BufferSize); Path s; s.bbox = uint4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16)); s.tiles = TileRef{ raw2 }; @@ -274,24 +289,24 @@ void write_tile_alloc(thread const uint& el_ix, thread const Alloc& a) } static inline __attribute__((always_inline)) -Alloc read_tile_alloc(thread const uint& el_ix, thread const bool& mem_ok, device Memory& v_242, constant uint& v_242BufferSize) +Alloc read_tile_alloc(thread const uint& el_ix, thread const bool& mem_ok, device Memory& v_260, constant uint& v_260BufferSize) { uint param = 0u; - uint param_1 = uint(int((v_242BufferSize - 8) / 4) * 4); + uint param_1 = uint(int((v_260BufferSize - 8) / 4) * 4); bool param_2 = mem_ok; return new_alloc(param, param_1, param_2); } static inline __attribute__((always_inline)) -Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& v_242, constant uint& v_242BufferSize) +Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_242, v_242BufferSize); + uint raw0 = read_mem(param, param_1, v_260, v_260BufferSize); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_242, v_242BufferSize); + uint raw1 = read_mem(param_2, param_3, v_260, v_260BufferSize); Tile s; s.tile = TileSegRef{ raw0 }; s.backdrop = int(raw1); @@ -299,26 +314,26 @@ Tile Tile_read(thread const Alloc& a, thread const TileRef& ref, device Memory& } static inline __attribute__((always_inline)) -MallocResult malloc(thread const uint& size, device Memory& v_242, constant uint& v_242BufferSize) +MallocResult malloc(thread const uint& size, device Memory& v_260, constant uint& v_260BufferSize) { - uint _248 = atomic_fetch_add_explicit((device atomic_uint*)&v_242.mem_offset, size, memory_order_relaxed); - uint offset = _248; + uint _266 = atomic_fetch_add_explicit((device atomic_uint*)&v_260.mem_offset, size, memory_order_relaxed); + uint offset = _266; MallocResult r; - r.failed = (offset + size) > uint(int((v_242BufferSize - 8) / 4) * 4); + r.failed = (offset + size) > uint(int((v_260BufferSize - 8) / 4) * 4); uint param = offset; uint param_1 = size; bool param_2 = !r.failed; r.alloc = new_alloc(param, param_1, param_2); if (r.failed) { - uint _277 = atomic_fetch_max_explicit((device atomic_uint*)&v_242.mem_error, 1u, memory_order_relaxed); + uint _295 = atomic_fetch_max_explicit((device atomic_uint*)&v_260.mem_error, 1u, memory_order_relaxed); return r; } return r; } static inline __attribute__((always_inline)) -void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_242, constant uint& v_242BufferSize) +void write_mem(thread const Alloc& alloc, thread const uint& offset, thread const uint& val, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = alloc; uint param_1 = offset; @@ -326,42 +341,42 @@ void write_mem(thread const Alloc& alloc, thread const uint& offset, thread cons { return; } - v_242.memory[offset] = val; + v_260.memory[offset] = val; } static inline __attribute__((always_inline)) -void CmdJump_write(thread const Alloc& a, thread const CmdJumpRef& ref, thread const CmdJump& s, device Memory& v_242, constant uint& v_242BufferSize) +void CmdJump_write(thread const Alloc& a, thread const CmdJumpRef& ref, thread const CmdJump& s, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.new_ref; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Jump_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdJump& s, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_Jump_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdJump& s, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint param_2 = 10u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + uint param_2 = 11u; + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; CmdJumpRef param_4 = CmdJumpRef{ ref.offset + 4u }; CmdJump param_5 = s; - CmdJump_write(param_3, param_4, param_5, v_242, v_242BufferSize); + CmdJump_write(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -bool alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd_limit, device Memory& v_242, constant uint& v_242BufferSize) +bool alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd_limit, device Memory& v_260, constant uint& v_260BufferSize) { if (cmd_ref.offset < cmd_limit) { return true; } uint param = 1024u; - MallocResult _762 = malloc(param, v_242, v_242BufferSize); - MallocResult new_cmd = _762; + MallocResult _913 = malloc(param, v_260, v_260BufferSize); + MallocResult new_cmd = _913; if (new_cmd.failed) { return false; @@ -370,78 +385,78 @@ bool alloc_cmd(thread Alloc& cmd_alloc, thread CmdRef& cmd_ref, thread uint& cmd Alloc param_1 = cmd_alloc; CmdRef param_2 = cmd_ref; CmdJump param_3 = jump; - Cmd_Jump_write(param_1, param_2, param_3, v_242, v_242BufferSize); + Cmd_Jump_write(param_1, param_2, param_3, v_260, v_260BufferSize); cmd_alloc = new_cmd.alloc; cmd_ref = CmdRef{ cmd_alloc.offset }; - cmd_limit = (cmd_alloc.offset + 1024u) - 60u; + cmd_limit = (cmd_alloc.offset + 1024u) - 144u; return true; } static inline __attribute__((always_inline)) -void CmdFill_write(thread const Alloc& a, thread const CmdFillRef& ref, thread const CmdFill& s, device Memory& v_242, constant uint& v_242BufferSize) +void CmdFill_write(thread const Alloc& a, thread const CmdFillRef& ref, thread const CmdFill& s, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = uint(s.backdrop); - write_mem(param_3, param_4, param_5, v_242, v_242BufferSize); + write_mem(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Fill_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdFill& s, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_Fill_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdFill& s, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 1u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; CmdFillRef param_4 = CmdFillRef{ ref.offset + 4u }; CmdFill param_5 = s; - CmdFill_write(param_3, param_4, param_5, v_242, v_242BufferSize); + CmdFill_write(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Solid_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_Solid_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 3u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void CmdStroke_write(thread const Alloc& a, thread const CmdStrokeRef& ref, thread const CmdStroke& s, device Memory& v_242, constant uint& v_242BufferSize) +void CmdStroke_write(thread const Alloc& a, thread const CmdStrokeRef& ref, thread const CmdStroke& s, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.tile_ref; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.half_width); - write_mem(param_3, param_4, param_5, v_242, v_242BufferSize); + write_mem(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Stroke_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdStroke& s, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_Stroke_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdStroke& s, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 2u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; CmdStrokeRef param_4 = CmdStrokeRef{ ref.offset + 4u }; CmdStroke param_5 = s; - CmdStroke_write(param_3, param_4, param_5, v_242, v_242BufferSize); + CmdStroke_write(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Tile& tile, thread const float& linewidth, device Memory& v_242, constant uint& v_242BufferSize) +void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Tile& tile, thread const float& linewidth, device Memory& v_260, constant uint& v_260BufferSize) { if (linewidth < 0.0) { @@ -451,14 +466,14 @@ void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Alloc param = alloc; CmdRef param_1 = cmd_ref; CmdFill param_2 = cmd_fill; - Cmd_Fill_write(param, param_1, param_2, v_242, v_242BufferSize); + Cmd_Fill_write(param, param_1, param_2, v_260, v_260BufferSize); cmd_ref.offset += 12u; } else { Alloc param_3 = alloc; CmdRef param_4 = cmd_ref; - Cmd_Solid_write(param_3, param_4, v_242, v_242BufferSize); + Cmd_Solid_write(param_3, param_4, v_260, v_260BufferSize); cmd_ref.offset += 4u; } } @@ -468,138 +483,201 @@ void write_fill(thread const Alloc& alloc, thread CmdRef& cmd_ref, thread const Alloc param_5 = alloc; CmdRef param_6 = cmd_ref; CmdStroke param_7 = cmd_stroke; - Cmd_Stroke_write(param_5, param_6, param_7, v_242, v_242BufferSize); + Cmd_Stroke_write(param_5, param_6, param_7, v_260, v_260BufferSize); cmd_ref.offset += 12u; } } static inline __attribute__((always_inline)) -void CmdColor_write(thread const Alloc& a, thread const CmdColorRef& ref, thread const CmdColor& s, device Memory& v_242, constant uint& v_242BufferSize) +void CmdColor_write(thread const Alloc& a, thread const CmdColorRef& ref, thread const CmdColor& s, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.rgba_color; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Color_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdColor& s, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_Color_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdColor& s, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 5u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; CmdColorRef param_4 = CmdColorRef{ ref.offset + 4u }; CmdColor param_5 = s; - CmdColor_write(param_3, param_4, param_5, v_242, v_242BufferSize); + CmdColor_write(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void CmdLinGrad_write(thread const Alloc& a, thread const CmdLinGradRef& ref, thread const CmdLinGrad& s, device Memory& v_242, constant uint& v_242BufferSize) +void CmdLinGrad_write(thread const Alloc& a, thread const CmdLinGradRef& ref, thread const CmdLinGrad& s, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.index; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; uint param_5 = as_type(s.line_x); - write_mem(param_3, param_4, param_5, v_242, v_242BufferSize); + write_mem(param_3, param_4, param_5, v_260, v_260BufferSize); Alloc param_6 = a; uint param_7 = ix + 2u; uint param_8 = as_type(s.line_y); - write_mem(param_6, param_7, param_8, v_242, v_242BufferSize); + write_mem(param_6, param_7, param_8, v_260, v_260BufferSize); Alloc param_9 = a; uint param_10 = ix + 3u; uint param_11 = as_type(s.line_c); - write_mem(param_9, param_10, param_11, v_242, v_242BufferSize); + write_mem(param_9, param_10, param_11, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_LinGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdLinGrad& s, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_LinGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdLinGrad& s, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 6u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; CmdLinGradRef param_4 = CmdLinGradRef{ ref.offset + 4u }; CmdLinGrad param_5 = s; - CmdLinGrad_write(param_3, param_4, param_5, v_242, v_242BufferSize); + CmdLinGrad_write(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void CmdImage_write(thread const Alloc& a, thread const CmdImageRef& ref, thread const CmdImage& s, device Memory& v_242, constant uint& v_242BufferSize) +void CmdRadGrad_write(thread const Alloc& a, thread const CmdRadGradRef& ref, thread const CmdRadGrad& s, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.index; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; uint param_4 = ix + 1u; - uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); - write_mem(param_3, param_4, param_5, v_242, v_242BufferSize); + uint param_5 = as_type(s.mat.x); + write_mem(param_3, param_4, param_5, v_260, v_260BufferSize); + Alloc param_6 = a; + uint param_7 = ix + 2u; + uint param_8 = as_type(s.mat.y); + write_mem(param_6, param_7, param_8, v_260, v_260BufferSize); + Alloc param_9 = a; + uint param_10 = ix + 3u; + uint param_11 = as_type(s.mat.z); + write_mem(param_9, param_10, param_11, v_260, v_260BufferSize); + Alloc param_12 = a; + uint param_13 = ix + 4u; + uint param_14 = as_type(s.mat.w); + write_mem(param_12, param_13, param_14, v_260, v_260BufferSize); + Alloc param_15 = a; + uint param_16 = ix + 5u; + uint param_17 = as_type(s.xlat.x); + write_mem(param_15, param_16, param_17, v_260, v_260BufferSize); + Alloc param_18 = a; + uint param_19 = ix + 6u; + uint param_20 = as_type(s.xlat.y); + write_mem(param_18, param_19, param_20, v_260, v_260BufferSize); + Alloc param_21 = a; + uint param_22 = ix + 7u; + uint param_23 = as_type(s.c1.x); + write_mem(param_21, param_22, param_23, v_260, v_260BufferSize); + Alloc param_24 = a; + uint param_25 = ix + 8u; + uint param_26 = as_type(s.c1.y); + write_mem(param_24, param_25, param_26, v_260, v_260BufferSize); + Alloc param_27 = a; + uint param_28 = ix + 9u; + uint param_29 = as_type(s.ra); + write_mem(param_27, param_28, param_29, v_260, v_260BufferSize); + Alloc param_30 = a; + uint param_31 = ix + 10u; + uint param_32 = as_type(s.roff); + write_mem(param_30, param_31, param_32, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_Image_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdImage& s, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_RadGrad_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdRadGrad& s, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 7u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; - CmdImageRef param_4 = CmdImageRef{ ref.offset + 4u }; - CmdImage param_5 = s; - CmdImage_write(param_3, param_4, param_5, v_242, v_242BufferSize); + CmdRadGradRef param_4 = CmdRadGradRef{ ref.offset + 4u }; + CmdRadGrad param_5 = s; + CmdRadGrad_write(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_BeginClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_242, constant uint& v_242BufferSize) +void CmdImage_write(thread const Alloc& a, thread const CmdImageRef& ref, thread const CmdImage& s, device Memory& v_260, constant uint& v_260BufferSize) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint param_2 = s.index; + write_mem(param, param_1, param_2, v_260, v_260BufferSize); + Alloc param_3 = a; + uint param_4 = ix + 1u; + uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16)); + write_mem(param_3, param_4, param_5, v_260, v_260BufferSize); +} + +static inline __attribute__((always_inline)) +void Cmd_Image_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdImage& s, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 8u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); + Alloc param_3 = a; + CmdImageRef param_4 = CmdImageRef{ ref.offset + 4u }; + CmdImage param_5 = s; + CmdImage_write(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void CmdEndClip_write(thread const Alloc& a, thread const CmdEndClipRef& ref, thread const CmdEndClip& s, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_BeginClip_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_260, constant uint& v_260BufferSize) +{ + Alloc param = a; + uint param_1 = ref.offset >> uint(2); + uint param_2 = 9u; + write_mem(param, param_1, param_2, v_260, v_260BufferSize); +} + +static inline __attribute__((always_inline)) +void CmdEndClip_write(thread const Alloc& a, thread const CmdEndClipRef& ref, thread const CmdEndClip& s, device Memory& v_260, constant uint& v_260BufferSize) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; uint param_2 = s.blend; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_EndClip_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdEndClip& s, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_EndClip_write(thread const Alloc& a, thread const CmdRef& ref, thread const CmdEndClip& s, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint param_2 = 9u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + uint param_2 = 10u; + write_mem(param, param_1, param_2, v_260, v_260BufferSize); Alloc param_3 = a; CmdEndClipRef param_4 = CmdEndClipRef{ ref.offset + 4u }; CmdEndClip param_5 = s; - CmdEndClip_write(param_3, param_4, param_5, v_242, v_242BufferSize); + CmdEndClip_write(param_3, param_4, param_5, v_260, v_260BufferSize); } static inline __attribute__((always_inline)) -void Cmd_End_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_242, constant uint& v_242BufferSize) +void Cmd_End_write(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_260, constant uint& v_260BufferSize) { Alloc param = a; uint param_1 = ref.offset >> uint(2); uint param_2 = 0u; - write_mem(param, param_1, param_2, v_242, v_242BufferSize); + write_mem(param, param_1, param_2, v_260, v_260BufferSize); } -kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_242 [[buffer(0)]], const device ConfigBuf& _854 [[buffer(1)]], const device SceneBuf& _1222 [[buffer(2)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device Memory& v_260 [[buffer(0)]], const device ConfigBuf& _1005 [[buffer(1)]], const device SceneBuf& _1372 [[buffer(2)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { threadgroup uint sh_bitmaps[8][256]; threadgroup Alloc sh_part_elements[256]; @@ -611,76 +689,77 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M threadgroup uint sh_tile_y0[256]; threadgroup uint sh_tile_base[256]; threadgroup uint sh_tile_count[256]; - constant uint& v_242BufferSize = spvBufferSizeConstants[0]; - uint width_in_bins = ((_854.conf.width_in_tiles + 16u) - 1u) / 16u; + constant uint& v_260BufferSize = spvBufferSizeConstants[0]; + uint width_in_bins = ((_1005.conf.width_in_tiles + 16u) - 1u) / 16u; uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x; uint partition_ix = 0u; - uint n_partitions = ((_854.conf.n_elements + 256u) - 1u) / 256u; + uint n_partitions = ((_1005.conf.n_elements + 256u) - 1u) / 256u; uint th_ix = gl_LocalInvocationID.x; uint bin_tile_x = 16u * gl_WorkGroupID.x; uint bin_tile_y = 16u * gl_WorkGroupID.y; uint tile_x = gl_LocalInvocationID.x % 16u; uint tile_y = gl_LocalInvocationID.x / 16u; - uint this_tile_ix = (((bin_tile_y + tile_y) * _854.conf.width_in_tiles) + bin_tile_x) + tile_x; + uint this_tile_ix = (((bin_tile_y + tile_y) * _1005.conf.width_in_tiles) + bin_tile_x) + tile_x; Alloc param; - param.offset = _854.conf.ptcl_alloc.offset; + param.offset = _1005.conf.ptcl_alloc.offset; uint param_1 = this_tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); CmdRef cmd_ref = CmdRef{ cmd_alloc.offset }; - uint cmd_limit = (cmd_ref.offset + 1024u) - 60u; + uint cmd_limit = (cmd_ref.offset + 1024u) - 144u; uint clip_depth = 0u; uint clip_zero_depth = 0u; uint rd_ix = 0u; uint wr_ix = 0u; uint part_start_ix = 0u; uint ready_ix = 0u; - uint drawmonoid_start = _854.conf.drawmonoid_alloc.offset >> uint(2); - uint drawtag_start = _854.conf.drawtag_offset >> uint(2); - uint drawdata_start = _854.conf.drawdata_offset >> uint(2); - uint drawinfo_start = _854.conf.drawinfo_alloc.offset >> uint(2); - bool mem_ok = v_242.mem_error == 0u; + uint drawmonoid_start = _1005.conf.drawmonoid_alloc.offset >> uint(2); + uint drawtag_start = _1005.conf.drawtag_offset >> uint(2); + uint drawdata_start = _1005.conf.drawdata_offset >> uint(2); + uint drawinfo_start = _1005.conf.drawinfo_alloc.offset >> uint(2); + bool mem_ok = v_260.mem_error == 0u; Alloc param_3; Alloc param_5; - uint _1154; + uint _1304; uint element_ix; Alloc param_14; uint tile_count; - uint _1455; + uint _1605; float linewidth; CmdLinGrad cmd_lin; + CmdRadGrad cmd_rad; while (true) { for (uint i = 0u; i < 8u; i++) { sh_bitmaps[i][th_ix] = 0u; } - bool _1206; + bool _1356; for (;;) { if ((ready_ix == wr_ix) && (partition_ix < n_partitions)) { part_start_ix = ready_ix; uint count = 0u; - bool _1003 = th_ix < 256u; - bool _1011; - if (_1003) + bool _1154 = th_ix < 256u; + bool _1162; + if (_1154) { - _1011 = (partition_ix + th_ix) < n_partitions; + _1162 = (partition_ix + th_ix) < n_partitions; } else { - _1011 = _1003; + _1162 = _1154; } - if (_1011) + if (_1162) { - uint in_ix = (_854.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); - param_3.offset = _854.conf.bin_alloc.offset; + uint in_ix = (_1005.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 256u) + bin_ix) * 2u); + param_3.offset = _1005.conf.bin_alloc.offset; uint param_4 = in_ix; - count = read_mem(param_3, param_4, v_242, v_242BufferSize); - param_5.offset = _854.conf.bin_alloc.offset; + count = read_mem(param_3, param_4, v_260, v_260BufferSize); + param_5.offset = _1005.conf.bin_alloc.offset; uint param_6 = in_ix + 1u; - uint offset = read_mem(param_5, param_6, v_242, v_242BufferSize); + uint offset = read_mem(param_5, param_6, v_260, v_260BufferSize); uint param_7 = offset; uint param_8 = count * 4u; bool param_9 = mem_ok; @@ -724,34 +803,34 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M } if (part_ix > 0u) { - _1154 = sh_part_count[part_ix - 1u]; + _1304 = sh_part_count[part_ix - 1u]; } else { - _1154 = part_start_ix; + _1304 = part_start_ix; } - ix -= _1154; + ix -= _1304; Alloc bin_alloc = sh_part_elements[part_ix]; BinInstanceRef inst_ref = BinInstanceRef{ bin_alloc.offset }; BinInstanceRef param_10 = inst_ref; uint param_11 = ix; Alloc param_12 = bin_alloc; BinInstanceRef param_13 = BinInstance_index(param_10, param_11); - BinInstance inst = BinInstance_read(param_12, param_13, v_242, v_242BufferSize); + BinInstance inst = BinInstance_read(param_12, param_13, v_260, v_260BufferSize); sh_elements[th_ix] = inst.element_ix; } threadgroup_barrier(mem_flags::mem_threadgroup); wr_ix = min((rd_ix + 256u), ready_ix); - bool _1196 = (wr_ix - rd_ix) < 256u; - if (_1196) + bool _1346 = (wr_ix - rd_ix) < 256u; + if (_1346) { - _1206 = (wr_ix < ready_ix) || (partition_ix < n_partitions); + _1356 = (wr_ix < ready_ix) || (partition_ix < n_partitions); } else { - _1206 = _1196; + _1356 = _1346; } - if (_1206) + if (_1356) { continue; } @@ -764,21 +843,22 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M if ((th_ix + rd_ix) < wr_ix) { element_ix = sh_elements[th_ix]; - tag = _1222.scene[drawtag_start + element_ix]; + tag = _1372.scene[drawtag_start + element_ix]; } switch (tag) { case 68u: case 72u: case 276u: + case 732u: case 5u: case 37u: { uint drawmonoid_base = drawmonoid_start + (4u * element_ix); - uint path_ix = v_242.memory[drawmonoid_base]; - param_14.offset = _854.conf.tile_alloc.offset; - PathRef param_15 = PathRef{ _854.conf.tile_alloc.offset + (path_ix * 12u) }; - Path path = Path_read(param_14, param_15, v_242, v_242BufferSize); + uint path_ix = v_260.memory[drawmonoid_base]; + param_14.offset = _1005.conf.tile_alloc.offset; + PathRef param_15 = PathRef{ _1005.conf.tile_alloc.offset + (path_ix * 12u) }; + Path path = Path_read(param_14, param_15, v_260, v_260BufferSize); uint stride = path.bbox.z - path.bbox.x; sh_tile_stride[th_ix] = stride; int dx = int(path.bbox.x) - int(bin_tile_x); @@ -833,16 +913,16 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M } } uint element_ix_1 = sh_elements[el_ix]; - uint tag_1 = _1222.scene[drawtag_start + element_ix_1]; + uint tag_1 = _1372.scene[drawtag_start + element_ix_1]; if (el_ix > 0u) { - _1455 = sh_tile_count[el_ix - 1u]; + _1605 = sh_tile_count[el_ix - 1u]; } else { - _1455 = 0u; + _1605 = 0u; } - uint seq_ix = ix_1 - _1455; + uint seq_ix = ix_1 - _1605; uint width = sh_tile_width[el_ix]; uint x = sh_tile_x0[el_ix] + (seq_ix % width); uint y = sh_tile_y0[el_ix] + (seq_ix / width); @@ -851,36 +931,45 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M { uint param_21 = el_ix; bool param_22 = mem_ok; - Alloc param_23 = read_tile_alloc(param_21, param_22, v_242, v_242BufferSize); + Alloc param_23 = read_tile_alloc(param_21, param_22, v_260, v_260BufferSize); TileRef param_24 = TileRef{ sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u) }; - Tile tile = Tile_read(param_23, param_24, v_242, v_242BufferSize); + Tile tile = Tile_read(param_23, param_24, v_260, v_260BufferSize); bool is_clip = (tag_1 & 1u) != 0u; bool is_blend = false; if (is_clip) { uint drawmonoid_base_1 = drawmonoid_start + (4u * element_ix_1); - uint scene_offset = v_242.memory[drawmonoid_base_1 + 2u]; + uint scene_offset = v_260.memory[drawmonoid_base_1 + 2u]; uint dd = drawdata_start + (scene_offset >> uint(2)); - uint blend = _1222.scene[dd]; + uint blend = _1372.scene[dd]; is_blend = blend != 3u; } - bool _1542 = tile.tile.offset != 0u; - bool _1551; - if (!_1542) + bool _1692 = tile.tile.offset != 0u; + bool _1701; + if (!_1692) { - _1551 = (tile.backdrop == 0) == is_clip; + _1701 = (tile.backdrop == 0) == is_clip; } else { - _1551 = _1542; + _1701 = _1692; } - include_tile = _1551 || is_blend; + bool _1708; + if (!_1701) + { + _1708 = is_clip && is_blend; + } + else + { + _1708 = _1701; + } + include_tile = _1708; } if (include_tile) { uint el_slice = el_ix / 32u; uint el_mask = 1u << (el_ix & 31u); - uint _1573 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); + uint _1728 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&sh_bitmaps[el_slice][(y * 16u) + x], el_mask, memory_order_relaxed); } } threadgroup_barrier(mem_flags::mem_threadgroup); @@ -904,32 +993,32 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M uint element_ref_ix = (slice_ix * 32u) + uint(int(spvFindLSB(bitmap))); uint element_ix_2 = sh_elements[element_ref_ix]; bitmap &= (bitmap - 1u); - uint drawtag = _1222.scene[drawtag_start + element_ix_2]; + uint drawtag = _1372.scene[drawtag_start + element_ix_2]; if (clip_zero_depth == 0u) { uint param_25 = element_ref_ix; bool param_26 = mem_ok; - Alloc param_27 = read_tile_alloc(param_25, param_26, v_242, v_242BufferSize); + Alloc param_27 = read_tile_alloc(param_25, param_26, v_260, v_260BufferSize); TileRef param_28 = TileRef{ sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u) }; - Tile tile_1 = Tile_read(param_27, param_28, v_242, v_242BufferSize); + Tile tile_1 = Tile_read(param_27, param_28, v_260, v_260BufferSize); uint drawmonoid_base_2 = drawmonoid_start + (4u * element_ix_2); - uint scene_offset_1 = v_242.memory[drawmonoid_base_2 + 2u]; - uint info_offset = v_242.memory[drawmonoid_base_2 + 3u]; + uint scene_offset_1 = v_260.memory[drawmonoid_base_2 + 2u]; + uint info_offset = v_260.memory[drawmonoid_base_2 + 3u]; uint dd_1 = drawdata_start + (scene_offset_1 >> uint(2)); uint di = drawinfo_start + (info_offset >> uint(2)); switch (drawtag) { case 68u: { - linewidth = as_type(v_242.memory[di]); + linewidth = as_type(v_260.memory[di]); Alloc param_29 = cmd_alloc; CmdRef param_30 = cmd_ref; uint param_31 = cmd_limit; - bool _1697 = alloc_cmd(param_29, param_30, param_31, v_242, v_242BufferSize); + bool _1853 = alloc_cmd(param_29, param_30, param_31, v_260, v_260BufferSize); cmd_alloc = param_29; cmd_ref = param_30; cmd_limit = param_31; - if (!_1697) + if (!_1853) { break; } @@ -937,13 +1026,13 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M CmdRef param_33 = cmd_ref; Tile param_34 = tile_1; float param_35 = linewidth; - write_fill(param_32, param_33, param_34, param_35, v_242, v_242BufferSize); + write_fill(param_32, param_33, param_34, param_35, v_260, v_260BufferSize); cmd_ref = param_33; - uint rgba = _1222.scene[dd_1]; + uint rgba = _1372.scene[dd_1]; Alloc param_36 = cmd_alloc; CmdRef param_37 = cmd_ref; CmdColor param_38 = CmdColor{ rgba }; - Cmd_Color_write(param_36, param_37, param_38, v_242, v_242BufferSize); + Cmd_Color_write(param_36, param_37, param_38, v_260, v_260BufferSize); cmd_ref.offset += 8u; break; } @@ -952,94 +1041,127 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M Alloc param_39 = cmd_alloc; CmdRef param_40 = cmd_ref; uint param_41 = cmd_limit; - bool _1738 = alloc_cmd(param_39, param_40, param_41, v_242, v_242BufferSize); + bool _1894 = alloc_cmd(param_39, param_40, param_41, v_260, v_260BufferSize); cmd_alloc = param_39; cmd_ref = param_40; cmd_limit = param_41; - if (!_1738) + if (!_1894) { break; } - linewidth = as_type(v_242.memory[di]); + linewidth = as_type(v_260.memory[di]); Alloc param_42 = cmd_alloc; CmdRef param_43 = cmd_ref; Tile param_44 = tile_1; float param_45 = linewidth; - write_fill(param_42, param_43, param_44, param_45, v_242, v_242BufferSize); + write_fill(param_42, param_43, param_44, param_45, v_260, v_260BufferSize); cmd_ref = param_43; - cmd_lin.index = _1222.scene[dd_1]; - cmd_lin.line_x = as_type(v_242.memory[di + 1u]); - cmd_lin.line_y = as_type(v_242.memory[di + 2u]); - cmd_lin.line_c = as_type(v_242.memory[di + 3u]); + cmd_lin.index = _1372.scene[dd_1]; + cmd_lin.line_x = as_type(v_260.memory[di + 1u]); + cmd_lin.line_y = as_type(v_260.memory[di + 2u]); + cmd_lin.line_c = as_type(v_260.memory[di + 3u]); Alloc param_46 = cmd_alloc; CmdRef param_47 = cmd_ref; CmdLinGrad param_48 = cmd_lin; - Cmd_LinGrad_write(param_46, param_47, param_48, v_242, v_242BufferSize); + Cmd_LinGrad_write(param_46, param_47, param_48, v_260, v_260BufferSize); cmd_ref.offset += 20u; break; } - case 72u: + case 732u: { - linewidth = as_type(v_242.memory[di]); Alloc param_49 = cmd_alloc; CmdRef param_50 = cmd_ref; uint param_51 = cmd_limit; - bool _1806 = alloc_cmd(param_49, param_50, param_51, v_242, v_242BufferSize); + bool _1958 = alloc_cmd(param_49, param_50, param_51, v_260, v_260BufferSize); cmd_alloc = param_49; cmd_ref = param_50; cmd_limit = param_51; - if (!_1806) + if (!_1958) { break; } + linewidth = as_type(v_260.memory[di]); Alloc param_52 = cmd_alloc; CmdRef param_53 = cmd_ref; Tile param_54 = tile_1; float param_55 = linewidth; - write_fill(param_52, param_53, param_54, param_55, v_242, v_242BufferSize); + write_fill(param_52, param_53, param_54, param_55, v_260, v_260BufferSize); cmd_ref = param_53; - uint index = _1222.scene[dd_1]; - uint raw1 = _1222.scene[dd_1 + 1u]; - int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); + cmd_rad.index = _1372.scene[dd_1]; + cmd_rad.mat = as_type(uint4(v_260.memory[di + 1u], v_260.memory[di + 2u], v_260.memory[di + 3u], v_260.memory[di + 4u])); + cmd_rad.xlat = as_type(uint2(v_260.memory[di + 5u], v_260.memory[di + 6u])); + cmd_rad.c1 = as_type(uint2(v_260.memory[di + 7u], v_260.memory[di + 8u])); + cmd_rad.ra = as_type(v_260.memory[di + 9u]); + cmd_rad.roff = as_type(v_260.memory[di + 10u]); Alloc param_56 = cmd_alloc; CmdRef param_57 = cmd_ref; - CmdImage param_58 = CmdImage{ index, offset_1 }; - Cmd_Image_write(param_56, param_57, param_58, v_242, v_242BufferSize); + CmdRadGrad param_58 = cmd_rad; + Cmd_RadGrad_write(param_56, param_57, param_58, v_260, v_260BufferSize); + cmd_ref.offset += 48u; + break; + } + case 72u: + { + linewidth = as_type(v_260.memory[di]); + Alloc param_59 = cmd_alloc; + CmdRef param_60 = cmd_ref; + uint param_61 = cmd_limit; + bool _2064 = alloc_cmd(param_59, param_60, param_61, v_260, v_260BufferSize); + cmd_alloc = param_59; + cmd_ref = param_60; + cmd_limit = param_61; + if (!_2064) + { + break; + } + Alloc param_62 = cmd_alloc; + CmdRef param_63 = cmd_ref; + Tile param_64 = tile_1; + float param_65 = linewidth; + write_fill(param_62, param_63, param_64, param_65, v_260, v_260BufferSize); + cmd_ref = param_63; + uint index = _1372.scene[dd_1]; + uint raw1 = _1372.scene[dd_1 + 1u]; + int2 offset_1 = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); + Alloc param_66 = cmd_alloc; + CmdRef param_67 = cmd_ref; + CmdImage param_68 = CmdImage{ index, offset_1 }; + Cmd_Image_write(param_66, param_67, param_68, v_260, v_260BufferSize); cmd_ref.offset += 12u; break; } case 5u: { - bool _1859 = tile_1.tile.offset == 0u; - bool _1865; - if (_1859) + bool _2117 = tile_1.tile.offset == 0u; + bool _2123; + if (_2117) { - _1865 = tile_1.backdrop == 0; + _2123 = tile_1.backdrop == 0; } else { - _1865 = _1859; + _2123 = _2117; } - if (_1865) + if (_2123) { clip_zero_depth = clip_depth + 1u; } else { - Alloc param_59 = cmd_alloc; - CmdRef param_60 = cmd_ref; - uint param_61 = cmd_limit; - bool _1877 = alloc_cmd(param_59, param_60, param_61, v_242, v_242BufferSize); - cmd_alloc = param_59; - cmd_ref = param_60; - cmd_limit = param_61; - if (!_1877) + Alloc param_69 = cmd_alloc; + CmdRef param_70 = cmd_ref; + uint param_71 = cmd_limit; + bool _2135 = alloc_cmd(param_69, param_70, param_71, v_260, v_260BufferSize); + cmd_alloc = param_69; + cmd_ref = param_70; + cmd_limit = param_71; + if (!_2135) { break; } - Alloc param_62 = cmd_alloc; - CmdRef param_63 = cmd_ref; - Cmd_BeginClip_write(param_62, param_63, v_242, v_242BufferSize); + Alloc param_72 = cmd_alloc; + CmdRef param_73 = cmd_ref; + Cmd_BeginClip_write(param_72, param_73, v_260, v_260BufferSize); cmd_ref.offset += 4u; } clip_depth++; @@ -1048,28 +1170,28 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M case 37u: { clip_depth--; - Alloc param_64 = cmd_alloc; - CmdRef param_65 = cmd_ref; - uint param_66 = cmd_limit; - bool _1905 = alloc_cmd(param_64, param_65, param_66, v_242, v_242BufferSize); - cmd_alloc = param_64; - cmd_ref = param_65; - cmd_limit = param_66; - if (!_1905) + Alloc param_74 = cmd_alloc; + CmdRef param_75 = cmd_ref; + uint param_76 = cmd_limit; + bool _2163 = alloc_cmd(param_74, param_75, param_76, v_260, v_260BufferSize); + cmd_alloc = param_74; + cmd_ref = param_75; + cmd_limit = param_76; + if (!_2163) { break; } - Alloc param_67 = cmd_alloc; - CmdRef param_68 = cmd_ref; - Tile param_69 = tile_1; - float param_70 = -1.0; - write_fill(param_67, param_68, param_69, param_70, v_242, v_242BufferSize); - cmd_ref = param_68; - uint blend_1 = _1222.scene[dd_1]; - Alloc param_71 = cmd_alloc; - CmdRef param_72 = cmd_ref; - CmdEndClip param_73 = CmdEndClip{ blend_1 }; - Cmd_EndClip_write(param_71, param_72, param_73, v_242, v_242BufferSize); + Alloc param_77 = cmd_alloc; + CmdRef param_78 = cmd_ref; + Tile param_79 = tile_1; + float param_80 = -1.0; + write_fill(param_77, param_78, param_79, param_80, v_260, v_260BufferSize); + cmd_ref = param_78; + uint blend_1 = _1372.scene[dd_1]; + Alloc param_81 = cmd_alloc; + CmdRef param_82 = cmd_ref; + CmdEndClip param_83 = CmdEndClip{ blend_1 }; + Cmd_EndClip_write(param_81, param_82, param_83, v_260, v_260BufferSize); cmd_ref.offset += 8u; break; } @@ -1103,21 +1225,21 @@ kernel void main0(constant uint* spvBufferSizeConstants [[buffer(25)]], device M break; } } - bool _1975 = (bin_tile_x + tile_x) < _854.conf.width_in_tiles; - bool _1984; - if (_1975) + bool _2233 = (bin_tile_x + tile_x) < _1005.conf.width_in_tiles; + bool _2242; + if (_2233) { - _1984 = (bin_tile_y + tile_y) < _854.conf.height_in_tiles; + _2242 = (bin_tile_y + tile_y) < _1005.conf.height_in_tiles; } else { - _1984 = _1975; + _2242 = _2233; } - if (_1984) + if (_2242) { - Alloc param_74 = cmd_alloc; - CmdRef param_75 = cmd_ref; - Cmd_End_write(param_74, param_75, v_242, v_242BufferSize); + Alloc param_84 = cmd_alloc; + CmdRef param_85 = cmd_ref; + Cmd_End_write(param_84, param_85, v_260, v_260BufferSize); } } diff --git a/piet-gpu/shader/gen/coarse.spv b/piet-gpu/shader/gen/coarse.spv index b85fd8c..fdc10a0 100644 Binary files a/piet-gpu/shader/gen/coarse.spv and b/piet-gpu/shader/gen/coarse.spv differ diff --git a/piet-gpu/shader/gen/draw_leaf.dxil b/piet-gpu/shader/gen/draw_leaf.dxil index 77396c1..6353f19 100644 Binary files a/piet-gpu/shader/gen/draw_leaf.dxil and b/piet-gpu/shader/gen/draw_leaf.dxil differ diff --git a/piet-gpu/shader/gen/draw_leaf.hlsl b/piet-gpu/shader/gen/draw_leaf.hlsl index f812f52..734d21e 100644 --- a/piet-gpu/shader/gen/draw_leaf.hlsl +++ b/piet-gpu/shader/gen/draw_leaf.hlsl @@ -46,10 +46,10 @@ static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); static const DrawMonoid _23 = { 0u, 0u, 0u, 0u }; -ByteAddressBuffer _92 : register(t1, space0); -ByteAddressBuffer _102 : register(t2, space0); -ByteAddressBuffer _202 : register(t3, space0); -RWByteAddressBuffer _284 : register(u0, space0); +ByteAddressBuffer _93 : register(t1, space0); +ByteAddressBuffer _103 : register(t2, space0); +ByteAddressBuffer _203 : register(t3, space0); +RWByteAddressBuffer _285 : register(u0, space0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -66,8 +66,8 @@ groupshared DrawMonoid sh_scratch[256]; DrawMonoid map_tag(uint tag_word) { uint has_path = uint(tag_word != 0u); - DrawMonoid _75 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u }; - return _75; + DrawMonoid _76 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u }; + return _76; } DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) @@ -88,15 +88,15 @@ DrawMonoid draw_monoid_identity() void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - uint drawtag_base = _92.Load(100) >> uint(2); - uint tag_word = _102.Load((drawtag_base + ix) * 4 + 0); + uint drawtag_base = _93.Load(100) >> uint(2); + uint tag_word = _103.Load((drawtag_base + ix) * 4 + 0); uint param = tag_word; DrawMonoid agg = map_tag(param); DrawMonoid local[8]; local[0] = agg; for (uint i = 1u; i < 8u; i++) { - tag_word = _102.Load(((drawtag_base + ix) + i) * 4 + 0); + tag_word = _103.Load(((drawtag_base + ix) + i) * 4 + 0); uint param_1 = tag_word; DrawMonoid param_2 = agg; DrawMonoid param_3 = map_tag(param_1); @@ -121,15 +121,15 @@ void comp_main() DrawMonoid row = draw_monoid_identity(); if (gl_WorkGroupID.x > 0u) { - DrawMonoid _208; - _208.path_ix = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 0); - _208.clip_ix = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 4); - _208.scene_offset = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 8); - _208.info_offset = _202.Load((gl_WorkGroupID.x - 1u) * 16 + 12); - row.path_ix = _208.path_ix; - row.clip_ix = _208.clip_ix; - row.scene_offset = _208.scene_offset; - row.info_offset = _208.info_offset; + DrawMonoid _209; + _209.path_ix = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 0); + _209.clip_ix = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 4); + _209.scene_offset = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 8); + _209.info_offset = _203.Load((gl_WorkGroupID.x - 1u) * 16 + 12); + row.path_ix = _209.path_ix; + row.clip_ix = _209.clip_ix; + row.scene_offset = _209.scene_offset; + row.info_offset = _209.info_offset; } if (gl_LocalInvocationID.x > 0u) { @@ -137,13 +137,15 @@ void comp_main() DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u]; row = combine_draw_monoid(param_6, param_7); } - uint drawdata_base = _92.Load(104) >> uint(2); - uint drawinfo_base = _92.Load(68) >> uint(2); + uint drawdata_base = _93.Load(104) >> uint(2); + uint drawinfo_base = _93.Load(68) >> uint(2); uint out_ix = gl_GlobalInvocationID.x * 8u; - uint out_base = (_92.Load(44) >> uint(2)) + (out_ix * 4u); - uint clip_out_base = _92.Load(48) >> uint(2); + uint out_base = (_93.Load(44) >> uint(2)) + (out_ix * 4u); + uint clip_out_base = _93.Load(48) >> uint(2); float4 mat; float2 translate; + float2 p0; + float2 p1; for (uint i_2 = 0u; i_2 < 8u; i_2++) { DrawMonoid m = row; @@ -153,31 +155,31 @@ void comp_main() DrawMonoid param_9 = local[i_2 - 1u]; m = combine_draw_monoid(param_8, param_9); } - _284.Store((out_base + (i_2 * 4u)) * 4 + 8, m.path_ix); - _284.Store(((out_base + (i_2 * 4u)) + 1u) * 4 + 8, m.clip_ix); - _284.Store(((out_base + (i_2 * 4u)) + 2u) * 4 + 8, m.scene_offset); - _284.Store(((out_base + (i_2 * 4u)) + 3u) * 4 + 8, m.info_offset); + _285.Store((out_base + (i_2 * 4u)) * 4 + 8, m.path_ix); + _285.Store(((out_base + (i_2 * 4u)) + 1u) * 4 + 8, m.clip_ix); + _285.Store(((out_base + (i_2 * 4u)) + 2u) * 4 + 8, m.scene_offset); + _285.Store(((out_base + (i_2 * 4u)) + 3u) * 4 + 8, m.info_offset); uint dd = drawdata_base + (m.scene_offset >> uint(2)); uint di = drawinfo_base + (m.info_offset >> uint(2)); - tag_word = _102.Load(((drawtag_base + ix) + i_2) * 4 + 0); - if ((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 72u)) || (tag_word == 5u)) + tag_word = _103.Load(((drawtag_base + ix) + i_2) * 4 + 0); + if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u)) { - uint bbox_offset = (_92.Load(40) >> uint(2)) + (6u * m.path_ix); - float bbox_l = float(_284.Load(bbox_offset * 4 + 8)) - 32768.0f; - float bbox_t = float(_284.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f; - float bbox_r = float(_284.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f; - float bbox_b = float(_284.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f; + uint bbox_offset = (_93.Load(40) >> uint(2)) + (6u * m.path_ix); + float bbox_l = float(_285.Load(bbox_offset * 4 + 8)) - 32768.0f; + float bbox_t = float(_285.Load((bbox_offset + 1u) * 4 + 8)) - 32768.0f; + float bbox_r = float(_285.Load((bbox_offset + 2u) * 4 + 8)) - 32768.0f; + float bbox_b = float(_285.Load((bbox_offset + 3u) * 4 + 8)) - 32768.0f; float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - float linewidth = asfloat(_284.Load((bbox_offset + 4u) * 4 + 8)); + float linewidth = asfloat(_285.Load((bbox_offset + 4u) * 4 + 8)); uint fill_mode = uint(linewidth >= 0.0f); - if ((linewidth >= 0.0f) || (tag_word == 276u)) + if (((linewidth >= 0.0f) || (tag_word == 276u)) || (tag_word == 732u)) { - uint trans_ix = _284.Load((bbox_offset + 5u) * 4 + 8); - uint t = (_92.Load(36) >> uint(2)) + (6u * trans_ix); - mat = asfloat(uint4(_284.Load(t * 4 + 8), _284.Load((t + 1u) * 4 + 8), _284.Load((t + 2u) * 4 + 8), _284.Load((t + 3u) * 4 + 8))); - if (tag_word == 276u) + uint trans_ix = _285.Load((bbox_offset + 5u) * 4 + 8); + uint t = (_93.Load(36) >> uint(2)) + (6u * trans_ix); + mat = asfloat(uint4(_285.Load(t * 4 + 8), _285.Load((t + 1u) * 4 + 8), _285.Load((t + 2u) * 4 + 8), _285.Load((t + 3u) * 4 + 8))); + if ((tag_word == 276u) || (tag_word == 732u)) { - translate = asfloat(uint2(_284.Load((t + 4u) * 4 + 8), _284.Load((t + 5u) * 4 + 8))); + translate = asfloat(uint2(_285.Load((t + 4u) * 4 + 8), _285.Load((t + 5u) * 4 + 8))); } } if (linewidth >= 0.0f) @@ -189,15 +191,14 @@ void comp_main() case 68u: case 72u: { - _284.Store(di * 4 + 8, asuint(linewidth)); + _285.Store(di * 4 + 8, asuint(linewidth)); break; } case 276u: { - _284.Store(di * 4 + 8, asuint(linewidth)); - uint index = _102.Load(dd * 4 + 0); - float2 p0 = asfloat(uint2(_102.Load((dd + 1u) * 4 + 0), _102.Load((dd + 2u) * 4 + 0))); - float2 p1 = asfloat(uint2(_102.Load((dd + 3u) * 4 + 0), _102.Load((dd + 4u) * 4 + 0))); + _285.Store(di * 4 + 8, asuint(linewidth)); + p0 = asfloat(uint2(_103.Load((dd + 1u) * 4 + 0), _103.Load((dd + 2u) * 4 + 0))); + p1 = asfloat(uint2(_103.Load((dd + 3u) * 4 + 0), _103.Load((dd + 4u) * 4 + 0))); p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate; p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate; float2 dxy = p1 - p0; @@ -205,9 +206,38 @@ void comp_main() float line_x = dxy.x * scale; float line_y = dxy.y * scale; float line_c = -((p0.x * line_x) + (p0.y * line_y)); - _284.Store((di + 1u) * 4 + 8, asuint(line_x)); - _284.Store((di + 2u) * 4 + 8, asuint(line_y)); - _284.Store((di + 3u) * 4 + 8, asuint(line_c)); + _285.Store((di + 1u) * 4 + 8, asuint(line_x)); + _285.Store((di + 2u) * 4 + 8, asuint(line_y)); + _285.Store((di + 3u) * 4 + 8, asuint(line_c)); + break; + } + case 732u: + { + p0 = asfloat(uint2(_103.Load((dd + 1u) * 4 + 0), _103.Load((dd + 2u) * 4 + 0))); + p1 = asfloat(uint2(_103.Load((dd + 3u) * 4 + 0), _103.Load((dd + 4u) * 4 + 0))); + float r0 = asfloat(_103.Load((dd + 5u) * 4 + 0)); + float r1 = asfloat(_103.Load((dd + 6u) * 4 + 0)); + float inv_det = 1.0f / ((mat.x * mat.w) - (mat.y * mat.z)); + float4 inv_mat = float4(mat.w, -mat.y, -mat.z, mat.x) * inv_det; + float2 inv_tr = (inv_mat.xz * translate.x) + (inv_mat.yw * translate.y); + inv_tr += p0; + float2 center1 = p1 - p0; + float rr = r1 / (r1 - r0); + float rainv = rr / ((r1 * r1) - dot(center1, center1)); + float2 c1 = center1 * rainv; + float ra = rr * rainv; + float roff = rr - 1.0f; + _285.Store(di * 4 + 8, asuint(linewidth)); + _285.Store((di + 1u) * 4 + 8, asuint(inv_mat.x)); + _285.Store((di + 2u) * 4 + 8, asuint(inv_mat.y)); + _285.Store((di + 3u) * 4 + 8, asuint(inv_mat.z)); + _285.Store((di + 4u) * 4 + 8, asuint(inv_mat.w)); + _285.Store((di + 5u) * 4 + 8, asuint(inv_tr.x)); + _285.Store((di + 6u) * 4 + 8, asuint(inv_tr.y)); + _285.Store((di + 7u) * 4 + 8, asuint(c1.x)); + _285.Store((di + 8u) * 4 + 8, asuint(c1.y)); + _285.Store((di + 9u) * 4 + 8, asuint(ra)); + _285.Store((di + 10u) * 4 + 8, asuint(roff)); break; } case 5u: @@ -223,7 +253,7 @@ void comp_main() { path_ix = m.path_ix; } - _284.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix); + _285.Store((clip_out_base + m.clip_ix) * 4 + 8, path_ix); } } } diff --git a/piet-gpu/shader/gen/draw_leaf.msl b/piet-gpu/shader/gen/draw_leaf.msl index a8516ae..c11e21b 100644 --- a/piet-gpu/shader/gen/draw_leaf.msl +++ b/piet-gpu/shader/gen/draw_leaf.msl @@ -124,7 +124,7 @@ static inline __attribute__((always_inline)) DrawMonoid map_tag(thread const uint& tag_word) { uint has_path = uint(tag_word != 0u); - return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u }; + return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u }; } static inline __attribute__((always_inline)) @@ -144,19 +144,19 @@ DrawMonoid draw_monoid_identity() return DrawMonoid{ 0u, 0u, 0u, 0u }; } -kernel void main0(device Memory& _284 [[buffer(0)]], const device ConfigBuf& _92 [[buffer(1)]], const device SceneBuf& _102 [[buffer(2)]], const device ParentBuf& _202 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +kernel void main0(device Memory& _285 [[buffer(0)]], const device ConfigBuf& _93 [[buffer(1)]], const device SceneBuf& _103 [[buffer(2)]], const device ParentBuf& _203 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { threadgroup DrawMonoid sh_scratch[256]; uint ix = gl_GlobalInvocationID.x * 8u; - uint drawtag_base = _92.conf.drawtag_offset >> uint(2); - uint tag_word = _102.scene[drawtag_base + ix]; + uint drawtag_base = _93.conf.drawtag_offset >> uint(2); + uint tag_word = _103.scene[drawtag_base + ix]; uint param = tag_word; DrawMonoid agg = map_tag(param); spvUnsafeArray local; local[0] = agg; for (uint i = 1u; i < 8u; i++) { - tag_word = _102.scene[(drawtag_base + ix) + i]; + tag_word = _103.scene[(drawtag_base + ix) + i]; uint param_1 = tag_word; DrawMonoid param_2 = agg; DrawMonoid param_3 = map_tag(param_1); @@ -181,11 +181,11 @@ kernel void main0(device Memory& _284 [[buffer(0)]], const device ConfigBuf& _92 DrawMonoid row = draw_monoid_identity(); if (gl_WorkGroupID.x > 0u) { - uint _205 = gl_WorkGroupID.x - 1u; - row.path_ix = _202.parent[_205].path_ix; - row.clip_ix = _202.parent[_205].clip_ix; - row.scene_offset = _202.parent[_205].scene_offset; - row.info_offset = _202.parent[_205].info_offset; + uint _206 = gl_WorkGroupID.x - 1u; + row.path_ix = _203.parent[_206].path_ix; + row.clip_ix = _203.parent[_206].clip_ix; + row.scene_offset = _203.parent[_206].scene_offset; + row.info_offset = _203.parent[_206].info_offset; } if (gl_LocalInvocationID.x > 0u) { @@ -193,13 +193,15 @@ kernel void main0(device Memory& _284 [[buffer(0)]], const device ConfigBuf& _92 DrawMonoid param_7 = sh_scratch[gl_LocalInvocationID.x - 1u]; row = combine_draw_monoid(param_6, param_7); } - uint drawdata_base = _92.conf.drawdata_offset >> uint(2); - uint drawinfo_base = _92.conf.drawinfo_alloc.offset >> uint(2); + uint drawdata_base = _93.conf.drawdata_offset >> uint(2); + uint drawinfo_base = _93.conf.drawinfo_alloc.offset >> uint(2); uint out_ix = gl_GlobalInvocationID.x * 8u; - uint out_base = (_92.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 4u); - uint clip_out_base = _92.conf.clip_alloc.offset >> uint(2); + uint out_base = (_93.conf.drawmonoid_alloc.offset >> uint(2)) + (out_ix * 4u); + uint clip_out_base = _93.conf.clip_alloc.offset >> uint(2); float4 mat; float2 translate; + float2 p0; + float2 p1; for (uint i_2 = 0u; i_2 < 8u; i_2++) { DrawMonoid m = row; @@ -209,31 +211,31 @@ kernel void main0(device Memory& _284 [[buffer(0)]], const device ConfigBuf& _92 DrawMonoid param_9 = local[i_2 - 1u]; m = combine_draw_monoid(param_8, param_9); } - _284.memory[out_base + (i_2 * 4u)] = m.path_ix; - _284.memory[(out_base + (i_2 * 4u)) + 1u] = m.clip_ix; - _284.memory[(out_base + (i_2 * 4u)) + 2u] = m.scene_offset; - _284.memory[(out_base + (i_2 * 4u)) + 3u] = m.info_offset; + _285.memory[out_base + (i_2 * 4u)] = m.path_ix; + _285.memory[(out_base + (i_2 * 4u)) + 1u] = m.clip_ix; + _285.memory[(out_base + (i_2 * 4u)) + 2u] = m.scene_offset; + _285.memory[(out_base + (i_2 * 4u)) + 3u] = m.info_offset; uint dd = drawdata_base + (m.scene_offset >> uint(2)); uint di = drawinfo_base + (m.info_offset >> uint(2)); - tag_word = _102.scene[(drawtag_base + ix) + i_2]; - if ((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 72u)) || (tag_word == 5u)) + tag_word = _103.scene[(drawtag_base + ix) + i_2]; + if (((((tag_word == 68u) || (tag_word == 276u)) || (tag_word == 732u)) || (tag_word == 72u)) || (tag_word == 5u)) { - uint bbox_offset = (_92.conf.path_bbox_alloc.offset >> uint(2)) + (6u * m.path_ix); - float bbox_l = float(_284.memory[bbox_offset]) - 32768.0; - float bbox_t = float(_284.memory[bbox_offset + 1u]) - 32768.0; - float bbox_r = float(_284.memory[bbox_offset + 2u]) - 32768.0; - float bbox_b = float(_284.memory[bbox_offset + 3u]) - 32768.0; + uint bbox_offset = (_93.conf.path_bbox_alloc.offset >> uint(2)) + (6u * m.path_ix); + float bbox_l = float(_285.memory[bbox_offset]) - 32768.0; + float bbox_t = float(_285.memory[bbox_offset + 1u]) - 32768.0; + float bbox_r = float(_285.memory[bbox_offset + 2u]) - 32768.0; + float bbox_b = float(_285.memory[bbox_offset + 3u]) - 32768.0; float4 bbox = float4(bbox_l, bbox_t, bbox_r, bbox_b); - float linewidth = as_type(_284.memory[bbox_offset + 4u]); + float linewidth = as_type(_285.memory[bbox_offset + 4u]); uint fill_mode = uint(linewidth >= 0.0); - if ((linewidth >= 0.0) || (tag_word == 276u)) + if (((linewidth >= 0.0) || (tag_word == 276u)) || (tag_word == 732u)) { - uint trans_ix = _284.memory[bbox_offset + 5u]; - uint t = (_92.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix); - mat = as_type(uint4(_284.memory[t], _284.memory[t + 1u], _284.memory[t + 2u], _284.memory[t + 3u])); - if (tag_word == 276u) + uint trans_ix = _285.memory[bbox_offset + 5u]; + uint t = (_93.conf.trans_alloc.offset >> uint(2)) + (6u * trans_ix); + mat = as_type(uint4(_285.memory[t], _285.memory[t + 1u], _285.memory[t + 2u], _285.memory[t + 3u])); + if ((tag_word == 276u) || (tag_word == 732u)) { - translate = as_type(uint2(_284.memory[t + 4u], _284.memory[t + 5u])); + translate = as_type(uint2(_285.memory[t + 4u], _285.memory[t + 5u])); } } if (linewidth >= 0.0) @@ -245,15 +247,14 @@ kernel void main0(device Memory& _284 [[buffer(0)]], const device ConfigBuf& _92 case 68u: case 72u: { - _284.memory[di] = as_type(linewidth); + _285.memory[di] = as_type(linewidth); break; } case 276u: { - _284.memory[di] = as_type(linewidth); - uint index = _102.scene[dd]; - float2 p0 = as_type(uint2(_102.scene[dd + 1u], _102.scene[dd + 2u])); - float2 p1 = as_type(uint2(_102.scene[dd + 3u], _102.scene[dd + 4u])); + _285.memory[di] = as_type(linewidth); + p0 = as_type(uint2(_103.scene[dd + 1u], _103.scene[dd + 2u])); + p1 = as_type(uint2(_103.scene[dd + 3u], _103.scene[dd + 4u])); p0 = ((mat.xy * p0.x) + (mat.zw * p0.y)) + translate; p1 = ((mat.xy * p1.x) + (mat.zw * p1.y)) + translate; float2 dxy = p1 - p0; @@ -261,9 +262,38 @@ kernel void main0(device Memory& _284 [[buffer(0)]], const device ConfigBuf& _92 float line_x = dxy.x * scale; float line_y = dxy.y * scale; float line_c = -((p0.x * line_x) + (p0.y * line_y)); - _284.memory[di + 1u] = as_type(line_x); - _284.memory[di + 2u] = as_type(line_y); - _284.memory[di + 3u] = as_type(line_c); + _285.memory[di + 1u] = as_type(line_x); + _285.memory[di + 2u] = as_type(line_y); + _285.memory[di + 3u] = as_type(line_c); + break; + } + case 732u: + { + p0 = as_type(uint2(_103.scene[dd + 1u], _103.scene[dd + 2u])); + p1 = as_type(uint2(_103.scene[dd + 3u], _103.scene[dd + 4u])); + float r0 = as_type(_103.scene[dd + 5u]); + float r1 = as_type(_103.scene[dd + 6u]); + float inv_det = 1.0 / ((mat.x * mat.w) - (mat.y * mat.z)); + float4 inv_mat = float4(mat.w, -mat.y, -mat.z, mat.x) * inv_det; + float2 inv_tr = (inv_mat.xz * translate.x) + (inv_mat.yw * translate.y); + inv_tr += p0; + float2 center1 = p1 - p0; + float rr = r1 / (r1 - r0); + float rainv = rr / ((r1 * r1) - dot(center1, center1)); + float2 c1 = center1 * rainv; + float ra = rr * rainv; + float roff = rr - 1.0; + _285.memory[di] = as_type(linewidth); + _285.memory[di + 1u] = as_type(inv_mat.x); + _285.memory[di + 2u] = as_type(inv_mat.y); + _285.memory[di + 3u] = as_type(inv_mat.z); + _285.memory[di + 4u] = as_type(inv_mat.w); + _285.memory[di + 5u] = as_type(inv_tr.x); + _285.memory[di + 6u] = as_type(inv_tr.y); + _285.memory[di + 7u] = as_type(c1.x); + _285.memory[di + 8u] = as_type(c1.y); + _285.memory[di + 9u] = as_type(ra); + _285.memory[di + 10u] = as_type(roff); break; } case 5u: @@ -279,7 +309,7 @@ kernel void main0(device Memory& _284 [[buffer(0)]], const device ConfigBuf& _92 { path_ix = m.path_ix; } - _284.memory[clip_out_base + m.clip_ix] = path_ix; + _285.memory[clip_out_base + m.clip_ix] = path_ix; } } } diff --git a/piet-gpu/shader/gen/draw_leaf.spv b/piet-gpu/shader/gen/draw_leaf.spv index d18b287..58dde43 100644 Binary files a/piet-gpu/shader/gen/draw_leaf.spv and b/piet-gpu/shader/gen/draw_leaf.spv differ diff --git a/piet-gpu/shader/gen/draw_reduce.dxil b/piet-gpu/shader/gen/draw_reduce.dxil index 4df0ec5..c101fc8 100644 Binary files a/piet-gpu/shader/gen/draw_reduce.dxil and b/piet-gpu/shader/gen/draw_reduce.dxil differ diff --git a/piet-gpu/shader/gen/draw_reduce.hlsl b/piet-gpu/shader/gen/draw_reduce.hlsl index 7220b7e..8311155 100644 --- a/piet-gpu/shader/gen/draw_reduce.hlsl +++ b/piet-gpu/shader/gen/draw_reduce.hlsl @@ -44,10 +44,10 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(256u, 1u, 1u); -ByteAddressBuffer _86 : register(t1, space0); -ByteAddressBuffer _96 : register(t2, space0); -RWByteAddressBuffer _187 : register(u3, space0); -RWByteAddressBuffer _205 : register(u0, space0); +ByteAddressBuffer _87 : register(t1, space0); +ByteAddressBuffer _97 : register(t2, space0); +RWByteAddressBuffer _188 : register(u3, space0); +RWByteAddressBuffer _206 : register(u0, space0); static uint3 gl_WorkGroupID; static uint3 gl_LocalInvocationID; @@ -64,8 +64,8 @@ groupshared DrawMonoid sh_scratch[256]; DrawMonoid map_tag(uint tag_word) { uint has_path = uint(tag_word != 0u); - DrawMonoid _69 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u }; - return _69; + DrawMonoid _70 = { has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u }; + return _70; } DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) @@ -81,13 +81,13 @@ DrawMonoid combine_draw_monoid(DrawMonoid a, DrawMonoid b) void comp_main() { uint ix = gl_GlobalInvocationID.x * 8u; - uint drawtag_base = _86.Load(100) >> uint(2); - uint tag_word = _96.Load((drawtag_base + ix) * 4 + 0); + uint drawtag_base = _87.Load(100) >> uint(2); + uint tag_word = _97.Load((drawtag_base + ix) * 4 + 0); uint param = tag_word; DrawMonoid agg = map_tag(param); for (uint i = 1u; i < 8u; i++) { - uint tag_word_1 = _96.Load(((drawtag_base + ix) + i) * 4 + 0); + uint tag_word_1 = _97.Load(((drawtag_base + ix) + i) * 4 + 0); uint param_1 = tag_word_1; DrawMonoid param_2 = agg; DrawMonoid param_3 = map_tag(param_1); @@ -109,10 +109,10 @@ void comp_main() } if (gl_LocalInvocationID.x == 0u) { - _187.Store(gl_WorkGroupID.x * 16 + 0, agg.path_ix); - _187.Store(gl_WorkGroupID.x * 16 + 4, agg.clip_ix); - _187.Store(gl_WorkGroupID.x * 16 + 8, agg.scene_offset); - _187.Store(gl_WorkGroupID.x * 16 + 12, agg.info_offset); + _188.Store(gl_WorkGroupID.x * 16 + 0, agg.path_ix); + _188.Store(gl_WorkGroupID.x * 16 + 4, agg.clip_ix); + _188.Store(gl_WorkGroupID.x * 16 + 8, agg.scene_offset); + _188.Store(gl_WorkGroupID.x * 16 + 12, agg.info_offset); } } diff --git a/piet-gpu/shader/gen/draw_reduce.msl b/piet-gpu/shader/gen/draw_reduce.msl index 8e409a8..759267c 100644 --- a/piet-gpu/shader/gen/draw_reduce.msl +++ b/piet-gpu/shader/gen/draw_reduce.msl @@ -85,7 +85,7 @@ static inline __attribute__((always_inline)) DrawMonoid map_tag(thread const uint& tag_word) { uint has_path = uint(tag_word != 0u); - return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 28u }; + return DrawMonoid{ has_path, tag_word & 1u, tag_word & 28u, (tag_word >> uint(4)) & 60u }; } static inline __attribute__((always_inline)) @@ -99,17 +99,17 @@ DrawMonoid combine_draw_monoid(thread const DrawMonoid& a, thread const DrawMono return c; } -kernel void main0(const device ConfigBuf& _86 [[buffer(1)]], const device SceneBuf& _96 [[buffer(2)]], device OutBuf& _187 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +kernel void main0(const device ConfigBuf& _87 [[buffer(1)]], const device SceneBuf& _97 [[buffer(2)]], device OutBuf& _188 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) { threadgroup DrawMonoid sh_scratch[256]; uint ix = gl_GlobalInvocationID.x * 8u; - uint drawtag_base = _86.conf.drawtag_offset >> uint(2); - uint tag_word = _96.scene[drawtag_base + ix]; + uint drawtag_base = _87.conf.drawtag_offset >> uint(2); + uint tag_word = _97.scene[drawtag_base + ix]; uint param = tag_word; DrawMonoid agg = map_tag(param); for (uint i = 1u; i < 8u; i++) { - uint tag_word_1 = _96.scene[(drawtag_base + ix) + i]; + uint tag_word_1 = _97.scene[(drawtag_base + ix) + i]; uint param_1 = tag_word_1; DrawMonoid param_2 = agg; DrawMonoid param_3 = map_tag(param_1); @@ -131,10 +131,10 @@ kernel void main0(const device ConfigBuf& _86 [[buffer(1)]], const device SceneB } if (gl_LocalInvocationID.x == 0u) { - _187.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; - _187.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix; - _187.outbuf[gl_WorkGroupID.x].scene_offset = agg.scene_offset; - _187.outbuf[gl_WorkGroupID.x].info_offset = agg.info_offset; + _188.outbuf[gl_WorkGroupID.x].path_ix = agg.path_ix; + _188.outbuf[gl_WorkGroupID.x].clip_ix = agg.clip_ix; + _188.outbuf[gl_WorkGroupID.x].scene_offset = agg.scene_offset; + _188.outbuf[gl_WorkGroupID.x].info_offset = agg.info_offset; } } diff --git a/piet-gpu/shader/gen/draw_reduce.spv b/piet-gpu/shader/gen/draw_reduce.spv index 4daf43a..d6c6fb7 100644 Binary files a/piet-gpu/shader/gen/draw_reduce.spv and b/piet-gpu/shader/gen/draw_reduce.spv differ diff --git a/piet-gpu/shader/gen/draw_root.dxil b/piet-gpu/shader/gen/draw_root.dxil index 4ea23f7..873fa29 100644 Binary files a/piet-gpu/shader/gen/draw_root.dxil and b/piet-gpu/shader/gen/draw_root.dxil differ diff --git a/piet-gpu/shader/gen/kernel4.dxil b/piet-gpu/shader/gen/kernel4.dxil index c0c27c9..9f8080b 100644 Binary files a/piet-gpu/shader/gen/kernel4.dxil and b/piet-gpu/shader/gen/kernel4.dxil differ diff --git a/piet-gpu/shader/gen/kernel4.hlsl b/piet-gpu/shader/gen/kernel4.hlsl index f17b240..92fe05b 100644 --- a/piet-gpu/shader/gen/kernel4.hlsl +++ b/piet-gpu/shader/gen/kernel4.hlsl @@ -48,6 +48,21 @@ struct CmdLinGrad float line_c; }; +struct CmdRadGradRef +{ + uint offset; +}; + +struct CmdRadGrad +{ + uint index; + float4 mat; + float2 xlat; + float2 c1; + float ra; + float roff; +}; + struct CmdImageRef { uint offset; @@ -146,8 +161,8 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); -RWByteAddressBuffer _278 : register(u0, space0); -ByteAddressBuffer _1521 : register(t1, space0); +RWByteAddressBuffer _291 : register(u0, space0); +ByteAddressBuffer _1666 : register(t1, space0); RWTexture2D image_atlas : register(u3, space0); RWTexture2D gradients : register(u4, space0); RWTexture2D image : register(u2, space0); @@ -174,8 +189,8 @@ float4 spvUnpackUnorm4x8(uint value) Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _291 = { a.offset + offset }; - return _291; + Alloc _304 = { a.offset + offset }; + return _304; } bool touch_mem(Alloc alloc, uint offset) @@ -191,7 +206,7 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _278.Load(offset * 4 + 8); + uint v = _291.Load(offset * 4 + 8); return v; } @@ -200,8 +215,8 @@ CmdTag Cmd_tag(Alloc a, CmdRef ref) Alloc param = a; uint param_1 = ref.offset >> uint(2); uint tag_and_flags = read_mem(param, param_1); - CmdTag _525 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _525; + CmdTag _663 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _663; } CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) @@ -221,9 +236,9 @@ CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) { - CmdStrokeRef _542 = { ref.offset + 4u }; + CmdStrokeRef _679 = { ref.offset + 4u }; Alloc param = a; - CmdStrokeRef param_1 = _542; + CmdStrokeRef param_1 = _679; return CmdStroke_read(param, param_1); } @@ -259,8 +274,8 @@ TileSeg TileSeg_read(Alloc a, TileSegRef ref) s.origin = float2(asfloat(raw0), asfloat(raw1)); s._vector = float2(asfloat(raw2), asfloat(raw3)); s.y_edge = asfloat(raw4); - TileSegRef _675 = { raw5 }; - s.next = _675; + TileSegRef _820 = { raw5 }; + s.next = _820; return s; } @@ -286,9 +301,9 @@ CmdFill CmdFill_read(Alloc a, CmdFillRef ref) CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) { - CmdFillRef _532 = { ref.offset + 4u }; + CmdFillRef _669 = { ref.offset + 4u }; Alloc param = a; - CmdFillRef param_1 = _532; + CmdFillRef param_1 = _669; return CmdFill_read(param, param_1); } @@ -305,9 +320,9 @@ CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) { - CmdAlphaRef _552 = { ref.offset + 4u }; + CmdAlphaRef _689 = { ref.offset + 4u }; Alloc param = a; - CmdAlphaRef param_1 = _552; + CmdAlphaRef param_1 = _689; return CmdAlpha_read(param, param_1); } @@ -324,9 +339,9 @@ CmdColor CmdColor_read(Alloc a, CmdColorRef ref) CmdColor Cmd_Color_read(Alloc a, CmdRef ref) { - CmdColorRef _562 = { ref.offset + 4u }; + CmdColorRef _699 = { ref.offset + 4u }; Alloc param = a; - CmdColorRef param_1 = _562; + CmdColorRef param_1 = _699; return CmdColor_read(param, param_1); } @@ -370,12 +385,66 @@ CmdLinGrad CmdLinGrad_read(Alloc a, CmdLinGradRef ref) CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) { - CmdLinGradRef _572 = { ref.offset + 4u }; + CmdLinGradRef _709 = { ref.offset + 4u }; Alloc param = a; - CmdLinGradRef param_1 = _572; + CmdLinGradRef param_1 = _709; return CmdLinGrad_read(param, param_1); } +CmdRadGrad CmdRadGrad_read(Alloc a, CmdRadGradRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3); + Alloc param_4 = a; + uint param_5 = ix + 2u; + uint raw2 = read_mem(param_4, param_5); + Alloc param_6 = a; + uint param_7 = ix + 3u; + uint raw3 = read_mem(param_6, param_7); + Alloc param_8 = a; + uint param_9 = ix + 4u; + uint raw4 = read_mem(param_8, param_9); + Alloc param_10 = a; + uint param_11 = ix + 5u; + uint raw5 = read_mem(param_10, param_11); + Alloc param_12 = a; + uint param_13 = ix + 6u; + uint raw6 = read_mem(param_12, param_13); + Alloc param_14 = a; + uint param_15 = ix + 7u; + uint raw7 = read_mem(param_14, param_15); + Alloc param_16 = a; + uint param_17 = ix + 8u; + uint raw8 = read_mem(param_16, param_17); + Alloc param_18 = a; + uint param_19 = ix + 9u; + uint raw9 = read_mem(param_18, param_19); + Alloc param_20 = a; + uint param_21 = ix + 10u; + uint raw10 = read_mem(param_20, param_21); + CmdRadGrad s; + s.index = raw0; + s.mat = float4(asfloat(raw1), asfloat(raw2), asfloat(raw3), asfloat(raw4)); + s.xlat = float2(asfloat(raw5), asfloat(raw6)); + s.c1 = float2(asfloat(raw7), asfloat(raw8)); + s.ra = asfloat(raw9); + s.roff = asfloat(raw10); + return s; +} + +CmdRadGrad Cmd_RadGrad_read(Alloc a, CmdRef ref) +{ + CmdRadGradRef _719 = { ref.offset + 4u }; + Alloc param = a; + CmdRadGradRef param_1 = _719; + return CmdRadGrad_read(param, param_1); +} + CmdImage CmdImage_read(Alloc a, CmdImageRef ref) { uint ix = ref.offset >> uint(2); @@ -393,9 +462,9 @@ CmdImage CmdImage_read(Alloc a, CmdImageRef ref) CmdImage Cmd_Image_read(Alloc a, CmdRef ref) { - CmdImageRef _582 = { ref.offset + 4u }; + CmdImageRef _729 = { ref.offset + 4u }; Alloc param = a; - CmdImageRef param_1 = _582; + CmdImageRef param_1 = _729; return CmdImage_read(param, param_1); } @@ -408,10 +477,10 @@ void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas[uv]; float3 param_1 = fg_rgba.xyz; - float3 _1493 = fromsRGB(param_1); - fg_rgba.x = _1493.x; - fg_rgba.y = _1493.y; - fg_rgba.z = _1493.z; + float3 _1638 = fromsRGB(param_1); + fg_rgba.x = _1638.x; + fg_rgba.y = _1638.y; + fg_rgba.z = _1638.z; rgba[i] = fg_rgba; } spvReturnValue = rgba; @@ -445,9 +514,9 @@ CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) { - CmdEndClipRef _592 = { ref.offset + 4u }; + CmdEndClipRef _739 = { ref.offset + 4u }; Alloc param = a; - CmdEndClipRef param_1 = _592; + CmdEndClipRef param_1 = _739; return CmdEndClip_read(param, param_1); } @@ -637,8 +706,8 @@ float3 set_lum(float3 c, float l) { float3 param = c; float3 param_1 = c + (l - lum(param)).xxx; - float3 _901 = clip_color(param_1); - return _901; + float3 _1046 = clip_color(param_1); + return _1046; } float3 mix_blend(float3 cb, float3 cs, uint mode) @@ -726,9 +795,9 @@ float3 mix_blend(float3 cb, float3 cs, uint mode) float3 param_20 = cb; float3 param_21 = cs; float param_22 = sat(param_20); - float3 _1192 = set_sat(param_21, param_22); + float3 _1337 = set_sat(param_21, param_22); float3 param_23 = cb; - float3 param_24 = _1192; + float3 param_24 = _1337; float param_25 = lum(param_23); b = set_lum(param_24, param_25); break; @@ -738,9 +807,9 @@ float3 mix_blend(float3 cb, float3 cs, uint mode) float3 param_26 = cs; float3 param_27 = cb; float param_28 = sat(param_26); - float3 _1206 = set_sat(param_27, param_28); + float3 _1351 = set_sat(param_27, param_28); float3 param_29 = cb; - float3 param_30 = _1206; + float3 param_30 = _1351; float param_31 = lum(param_29); b = set_lum(param_30, param_31); break; @@ -877,24 +946,24 @@ CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) { - CmdJumpRef _602 = { ref.offset + 4u }; + CmdJumpRef _749 = { ref.offset + 4u }; Alloc param = a; - CmdJumpRef param_1 = _602; + CmdJumpRef param_1 = _749; return CmdJump_read(param, param_1); } void comp_main() { - uint tile_ix = (gl_WorkGroupID.y * _1521.Load(8)) + gl_WorkGroupID.x; - Alloc _1536; - _1536.offset = _1521.Load(24); + uint tile_ix = (gl_WorkGroupID.y * _1666.Load(8)) + gl_WorkGroupID.x; + Alloc _1681; + _1681.offset = _1666.Load(24); Alloc param; - param.offset = _1536.offset; + param.offset = _1681.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _1545 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1545; + CmdRef _1690 = { cmd_alloc.offset }; + CmdRef cmd_ref = _1690; uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); float2 xy = float2(xy_uint); float4 rgba[8]; @@ -903,7 +972,7 @@ void comp_main() rgba[i] = 0.0f.xxxx; } uint clip_depth = 0u; - bool mem_ok = _278.Load(4) == 0u; + bool mem_ok = _291.Load(4) == 0u; float df[8]; TileSegRef tile_seg_ref; float area[8]; @@ -928,8 +997,8 @@ void comp_main() { df[k] = 1000000000.0f; } - TileSegRef _1638 = { stroke.tile_ref }; - tile_seg_ref = _1638; + TileSegRef _1784 = { stroke.tile_ref }; + tile_seg_ref = _1784; do { uint param_7 = tile_seg_ref.offset; @@ -965,8 +1034,8 @@ void comp_main() { area[k_3] = float(fill.backdrop); } - TileSegRef _1758 = { fill.tile_ref }; - tile_seg_ref = _1758; + TileSegRef _1904 = { fill.tile_ref }; + tile_seg_ref = _1904; do { uint param_15 = tile_seg_ref.offset; @@ -1055,11 +1124,12 @@ void comp_main() int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); float4 fg_rgba = gradients[int2(x, int(lin.index))]; float3 param_29 = fg_rgba.xyz; - float3 _2092 = fromsRGB(param_29); - fg_rgba.x = _2092.x; - fg_rgba.y = _2092.y; - fg_rgba.z = _2092.z; - rgba[k_9] = fg_rgba; + float3 _2238 = fromsRGB(param_29); + fg_rgba.x = _2238.x; + fg_rgba.y = _2238.y; + fg_rgba.z = _2238.z; + float4 fg_k_1 = fg_rgba * area[k_9]; + rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1; } cmd_ref.offset += 20u; break; @@ -1068,74 +1138,100 @@ void comp_main() { Alloc param_30 = cmd_alloc; CmdRef param_31 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_30, param_31); - uint2 param_32 = xy_uint; - CmdImage param_33 = fill_img; - float4 _2121[8]; - fillImage(_2121, param_32, param_33); - float4 img[8] = _2121; + CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31); for (uint k_10 = 0u; k_10 < 8u; k_10++) { - float4 fg_k_1 = img[k_10] * area[k_10]; - rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_1.w)) + fg_k_1; + uint param_32 = k_10; + float2 my_xy_1 = xy + float2(chunk_offset(param_32)); + my_xy_1 = ((rad.mat.xz * my_xy_1.x) + (rad.mat.yw * my_xy_1.y)) - rad.xlat; + float ba = dot(my_xy_1, rad.c1); + float ca = rad.ra * dot(my_xy_1, my_xy_1); + float t_2 = (sqrt((ba * ba) + ca) - ba) - rad.roff; + int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f)); + float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))]; + float3 param_33 = fg_rgba_1.xyz; + float3 _2348 = fromsRGB(param_33); + fg_rgba_1.x = _2348.x; + fg_rgba_1.y = _2348.y; + fg_rgba_1.z = _2348.z; + float4 fg_k_2 = fg_rgba_1 * area[k_10]; + rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2; } - cmd_ref.offset += 12u; + cmd_ref.offset += 48u; break; } case 8u: { + Alloc param_34 = cmd_alloc; + CmdRef param_35 = cmd_ref; + CmdImage fill_img = Cmd_Image_read(param_34, param_35); + uint2 param_36 = xy_uint; + CmdImage param_37 = fill_img; + float4 _2391[8]; + fillImage(_2391, param_36, param_37); + float4 img[8] = _2391; for (uint k_11 = 0u; k_11 < 8u; k_11++) + { + float4 fg_k_3 = img[k_11] * area[k_11]; + rgba[k_11] = (rgba[k_11] * (1.0f - fg_k_3.w)) + fg_k_3; + } + cmd_ref.offset += 12u; + break; + } + case 9u: + { + for (uint k_12 = 0u; k_12 < 8u; k_12++) { uint d_2 = min(clip_depth, 127u); - float4 param_34 = float4(rgba[k_11]); - uint _2184 = packsRGB(param_34); - blend_stack[d_2][k_11] = _2184; - rgba[k_11] = 0.0f.xxxx; + float4 param_38 = float4(rgba[k_12]); + uint _2454 = packsRGB(param_38); + blend_stack[d_2][k_12] = _2454; + rgba[k_12] = 0.0f.xxxx; } clip_depth++; cmd_ref.offset += 4u; break; } - case 9u: + case 10u: { - Alloc param_35 = cmd_alloc; - CmdRef param_36 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_35, param_36); + Alloc param_39 = cmd_alloc; + CmdRef param_40 = cmd_ref; + CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40); uint blend_mode = end_clip.blend >> uint(8); uint comp_mode = end_clip.blend & 255u; clip_depth--; - for (uint k_12 = 0u; k_12 < 8u; k_12++) + for (uint k_13 = 0u; k_13 < 8u; k_13++) { uint d_3 = min(clip_depth, 127u); - uint param_37 = blend_stack[d_3][k_12]; - float4 bg = unpacksRGB(param_37); - float4 fg_1 = rgba[k_12] * area[k_12]; - float3 param_38 = bg.xyz; - float3 param_39 = fg_1.xyz; - uint param_40 = blend_mode; - float3 blend = mix_blend(param_38, param_39, param_40); - float4 _2251 = fg_1; - float _2255 = fg_1.w; - float3 _2262 = lerp(_2251.xyz, blend, float((_2255 * bg.w) > 0.0f).xxx); - fg_1.x = _2262.x; - fg_1.y = _2262.y; - fg_1.z = _2262.z; - float3 param_41 = bg.xyz; - float3 param_42 = fg_1.xyz; - float param_43 = bg.w; - float param_44 = fg_1.w; - uint param_45 = comp_mode; - rgba[k_12] = mix_compose(param_41, param_42, param_43, param_44, param_45); + uint param_41 = blend_stack[d_3][k_13]; + float4 bg = unpacksRGB(param_41); + float4 fg_1 = rgba[k_13] * area[k_13]; + float3 param_42 = bg.xyz; + float3 param_43 = fg_1.xyz; + uint param_44 = blend_mode; + float3 blend = mix_blend(param_42, param_43, param_44); + float4 _2521 = fg_1; + float _2525 = fg_1.w; + float3 _2532 = lerp(_2521.xyz, blend, float((_2525 * bg.w) > 0.0f).xxx); + fg_1.x = _2532.x; + fg_1.y = _2532.y; + fg_1.z = _2532.z; + float3 param_45 = bg.xyz; + float3 param_46 = fg_1.xyz; + float param_47 = bg.w; + float param_48 = fg_1.w; + uint param_49 = comp_mode; + rgba[k_13] = mix_compose(param_45, param_46, param_47, param_48, param_49); } cmd_ref.offset += 8u; break; } - case 10u: + case 11u: { - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - CmdRef _2299 = { Cmd_Jump_read(param_46, param_47).new_ref }; - cmd_ref = _2299; + Alloc param_50 = cmd_alloc; + CmdRef param_51 = cmd_ref; + CmdRef _2569 = { Cmd_Jump_read(param_50, param_51).new_ref }; + cmd_ref = _2569; cmd_alloc.offset = cmd_ref.offset; break; } @@ -1143,9 +1239,9 @@ void comp_main() } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_48 = i_1; - float3 param_49 = rgba[i_1].xyz; - image[int2(xy_uint + chunk_offset(param_48))] = float4(tosRGB(param_49), rgba[i_1].w); + uint param_52 = i_1; + float3 param_53 = rgba[i_1].xyz; + image[int2(xy_uint + chunk_offset(param_52))] = float4(tosRGB(param_53), rgba[i_1].w); } } diff --git a/piet-gpu/shader/gen/kernel4.msl b/piet-gpu/shader/gen/kernel4.msl index c1f41af..6489563 100644 --- a/piet-gpu/shader/gen/kernel4.msl +++ b/piet-gpu/shader/gen/kernel4.msl @@ -94,6 +94,21 @@ struct CmdLinGrad float line_c; }; +struct CmdRadGradRef +{ + uint offset; +}; + +struct CmdRadGrad +{ + uint index; + float4 mat; + float2 xlat; + float2 c1; + float ra; + float roff; +}; + struct CmdImageRef { uint offset; @@ -222,7 +237,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_278) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_291) { Alloc param = alloc; uint param_1 = offset; @@ -230,29 +245,29 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_278.memory[offset]; + uint v = v_291.memory[offset]; return v; } static inline __attribute__((always_inline)) -CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_278); + uint tag_and_flags = read_mem(param, param_1, v_291); return CmdTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; } static inline __attribute__((always_inline)) -CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_278) +CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); CmdStroke s; s.tile_ref = raw0; s.half_width = as_type(raw1); @@ -260,11 +275,11 @@ CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, } static inline __attribute__((always_inline)) -CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdStrokeRef param_1 = CmdStrokeRef{ ref.offset + 4u }; - return CmdStroke_read(param, param_1, v_278); + return CmdStroke_read(param, param_1, v_291); } static inline __attribute__((always_inline)) @@ -276,27 +291,27 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const } static inline __attribute__((always_inline)) -TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_278) +TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_278); + uint raw2 = read_mem(param_4, param_5, v_291); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_278); + uint raw3 = read_mem(param_6, param_7, v_291); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_278); + uint raw4 = read_mem(param_8, param_9, v_291); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_278); + uint raw5 = read_mem(param_10, param_11, v_291); TileSeg s; s.origin = float2(as_type(raw0), as_type(raw1)); s.vector = float2(as_type(raw2), as_type(raw3)); @@ -312,15 +327,15 @@ uint2 chunk_offset(thread const uint& i) } static inline __attribute__((always_inline)) -CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_278) +CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); CmdFill s; s.tile_ref = raw0; s.backdrop = int(raw1); @@ -328,51 +343,51 @@ CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device } static inline __attribute__((always_inline)) -CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdFillRef param_1 = CmdFillRef{ ref.offset + 4u }; - return CmdFill_read(param, param_1, v_278); + return CmdFill_read(param, param_1, v_291); } static inline __attribute__((always_inline)) -CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_278) +CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); CmdAlpha s; s.alpha = as_type(raw0); return s; } static inline __attribute__((always_inline)) -CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdAlphaRef param_1 = CmdAlphaRef{ ref.offset + 4u }; - return CmdAlpha_read(param, param_1, v_278); + return CmdAlpha_read(param, param_1, v_291); } static inline __attribute__((always_inline)) -CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_278) +CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); CmdColor s; s.rgba_color = raw0; return s; } static inline __attribute__((always_inline)) -CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdColorRef param_1 = CmdColorRef{ ref.offset + 4u }; - return CmdColor_read(param, param_1, v_278); + return CmdColor_read(param, param_1, v_291); } static inline __attribute__((always_inline)) @@ -393,21 +408,21 @@ float4 unpacksRGB(thread const uint& srgba) } static inline __attribute__((always_inline)) -CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_278) +CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_278); + uint raw2 = read_mem(param_4, param_5, v_291); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_278); + uint raw3 = read_mem(param_6, param_7, v_291); CmdLinGrad s; s.index = raw0; s.line_x = as_type(raw1); @@ -417,23 +432,78 @@ CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& re } static inline __attribute__((always_inline)) -CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdLinGradRef param_1 = CmdLinGradRef{ ref.offset + 4u }; - return CmdLinGrad_read(param, param_1, v_278); + return CmdLinGrad_read(param, param_1, v_291); } static inline __attribute__((always_inline)) -CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_278) +CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); + Alloc param_4 = a; + uint param_5 = ix + 2u; + uint raw2 = read_mem(param_4, param_5, v_291); + Alloc param_6 = a; + uint param_7 = ix + 3u; + uint raw3 = read_mem(param_6, param_7, v_291); + Alloc param_8 = a; + uint param_9 = ix + 4u; + uint raw4 = read_mem(param_8, param_9, v_291); + Alloc param_10 = a; + uint param_11 = ix + 5u; + uint raw5 = read_mem(param_10, param_11, v_291); + Alloc param_12 = a; + uint param_13 = ix + 6u; + uint raw6 = read_mem(param_12, param_13, v_291); + Alloc param_14 = a; + uint param_15 = ix + 7u; + uint raw7 = read_mem(param_14, param_15, v_291); + Alloc param_16 = a; + uint param_17 = ix + 8u; + uint raw8 = read_mem(param_16, param_17, v_291); + Alloc param_18 = a; + uint param_19 = ix + 9u; + uint raw9 = read_mem(param_18, param_19, v_291); + Alloc param_20 = a; + uint param_21 = ix + 10u; + uint raw10 = read_mem(param_20, param_21, v_291); + CmdRadGrad s; + s.index = raw0; + s.mat = float4(as_type(raw1), as_type(raw2), as_type(raw3), as_type(raw4)); + s.xlat = float2(as_type(raw5), as_type(raw6)); + s.c1 = float2(as_type(raw7), as_type(raw8)); + s.ra = as_type(raw9); + s.roff = as_type(raw10); + return s; +} + +static inline __attribute__((always_inline)) +CmdRadGrad Cmd_RadGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +{ + Alloc param = a; + CmdRadGradRef param_1 = CmdRadGradRef{ ref.offset + 4u }; + return CmdRadGrad_read(param, param_1, v_291); +} + +static inline __attribute__((always_inline)) +CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_291) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1, v_291); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3, v_291); CmdImage s; s.index = raw0; s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); @@ -441,11 +511,11 @@ CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, dev } static inline __attribute__((always_inline)) -CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdImageRef param_1 = CmdImageRef{ ref.offset + 4u }; - return CmdImage_read(param, param_1, v_278); + return CmdImage_read(param, param_1, v_291); } static inline __attribute__((always_inline)) @@ -458,10 +528,10 @@ spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImag int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas.read(uint2(uv)); float3 param_1 = fg_rgba.xyz; - float3 _1493 = fromsRGB(param_1); - fg_rgba.x = _1493.x; - fg_rgba.y = _1493.y; - fg_rgba.z = _1493.z; + float3 _1638 = fromsRGB(param_1); + fg_rgba.x = _1638.x; + fg_rgba.y = _1638.y; + fg_rgba.z = _1638.z; rgba[i] = fg_rgba; } return rgba; @@ -485,23 +555,23 @@ uint packsRGB(thread float4& rgba) } static inline __attribute__((always_inline)) -CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_278) +CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); CmdEndClip s; s.blend = raw0; return s; } static inline __attribute__((always_inline)) -CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdEndClipRef param_1 = CmdEndClipRef{ ref.offset + 4u }; - return CmdEndClip_read(param, param_1, v_278); + return CmdEndClip_read(param, param_1, v_291); } static inline __attribute__((always_inline)) @@ -701,8 +771,8 @@ float3 set_lum(thread const float3& c, thread const float& l) { float3 param = c; float3 param_1 = c + float3(l - lum(param)); - float3 _901 = clip_color(param_1); - return _901; + float3 _1046 = clip_color(param_1); + return _1046; } static inline __attribute__((always_inline)) @@ -791,9 +861,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_20 = cb; float3 param_21 = cs; float param_22 = sat(param_20); - float3 _1192 = set_sat(param_21, param_22); + float3 _1337 = set_sat(param_21, param_22); float3 param_23 = cb; - float3 param_24 = _1192; + float3 param_24 = _1337; float param_25 = lum(param_23); b = set_lum(param_24, param_25); break; @@ -803,9 +873,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_26 = cs; float3 param_27 = cb; float param_28 = sat(param_26); - float3 _1206 = set_sat(param_27, param_28); + float3 _1351 = set_sat(param_27, param_28); float3 param_29 = cb; - float3 param_30 = _1206; + float3 param_30 = _1351; float param_31 = lum(param_29); b = set_lum(param_30, param_31); break; @@ -931,30 +1001,30 @@ float4 mix_compose(thread const float3& cb, thread const float3& cs, thread cons } static inline __attribute__((always_inline)) -CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_278) +CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); CmdJump s; s.new_ref = raw0; return s; } static inline __attribute__((always_inline)) -CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdJumpRef param_1 = CmdJumpRef{ ref.offset + 4u }; - return CmdJump_read(param, param_1, v_278); + return CmdJump_read(param, param_1, v_291); } -kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1521 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1666 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { - uint tile_ix = (gl_WorkGroupID.y * _1521.conf.width_in_tiles) + gl_WorkGroupID.x; + uint tile_ix = (gl_WorkGroupID.y * _1666.conf.width_in_tiles) + gl_WorkGroupID.x; Alloc param; - param.offset = _1521.conf.ptcl_alloc.offset; + param.offset = _1666.conf.ptcl_alloc.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); @@ -967,7 +1037,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 rgba[i] = float4(0.0); } uint clip_depth = 0u; - bool mem_ok = v_278.mem_error == 0u; + bool mem_ok = v_291.mem_error == 0u; spvUnsafeArray df; TileSegRef tile_seg_ref; spvUnsafeArray area; @@ -976,7 +1046,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_3 = cmd_alloc; CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4, v_278).tag; + uint tag = Cmd_tag(param_3, param_4, v_291).tag; if (tag == 0u) { break; @@ -987,7 +1057,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_5 = cmd_alloc; CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_278); + CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_291); for (uint k = 0u; k < 8u; k++) { df[k] = 1000000000.0; @@ -1000,7 +1070,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 bool param_9 = mem_ok; Alloc param_10 = new_alloc(param_7, param_8, param_9); TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11, v_278); + TileSeg seg = TileSeg_read(param_10, param_11, v_291); float2 line_vec = seg.vector; for (uint k_1 = 0u; k_1 < 8u; k_1++) { @@ -1023,7 +1093,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_13 = cmd_alloc; CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14, v_278); + CmdFill fill = Cmd_Fill_read(param_13, param_14, v_291); for (uint k_3 = 0u; k_3 < 8u; k_3++) { area[k_3] = float(fill.backdrop); @@ -1036,7 +1106,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 bool param_17 = mem_ok; Alloc param_18 = new_alloc(param_15, param_16, param_17); TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19, v_278); + TileSeg seg_1 = TileSeg_read(param_18, param_19, v_291); for (uint k_4 = 0u; k_4 < 8u; k_4++) { uint param_20 = k_4; @@ -1080,7 +1150,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_21 = cmd_alloc; CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_278); + CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_291); for (uint k_7 = 0u; k_7 < 8u; k_7++) { area[k_7] = alpha.alpha; @@ -1092,7 +1162,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_23 = cmd_alloc; CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24, v_278); + CmdColor color = Cmd_Color_read(param_23, param_24, v_291); uint param_25 = color.rgba_color; float4 fg = unpacksRGB(param_25); for (uint k_8 = 0u; k_8 < 8u; k_8++) @@ -1107,7 +1177,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_26 = cmd_alloc; CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_278); + CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_291); float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; for (uint k_9 = 0u; k_9 < 8u; k_9++) { @@ -1117,11 +1187,12 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); float3 param_29 = fg_rgba.xyz; - float3 _2092 = fromsRGB(param_29); - fg_rgba.x = _2092.x; - fg_rgba.y = _2092.y; - fg_rgba.z = _2092.z; - rgba[k_9] = fg_rgba; + float3 _2238 = fromsRGB(param_29); + fg_rgba.x = _2238.x; + fg_rgba.y = _2238.y; + fg_rgba.z = _2238.z; + float4 fg_k_1 = fg_rgba * area[k_9]; + rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1; } cmd_ref.offset += 20u; break; @@ -1130,72 +1201,98 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_30 = cmd_alloc; CmdRef param_31 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_30, param_31, v_278); - uint2 param_32 = xy_uint; - CmdImage param_33 = fill_img; - spvUnsafeArray img; - img = fillImage(param_32, param_33, image_atlas); + CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31, v_291); for (uint k_10 = 0u; k_10 < 8u; k_10++) { - float4 fg_k_1 = img[k_10] * area[k_10]; - rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_1.w)) + fg_k_1; + uint param_32 = k_10; + float2 my_xy_1 = xy + float2(chunk_offset(param_32)); + my_xy_1 = ((rad.mat.xz * my_xy_1.x) + (rad.mat.yw * my_xy_1.y)) - rad.xlat; + float ba = dot(my_xy_1, rad.c1); + float ca = rad.ra * dot(my_xy_1, my_xy_1); + float t_2 = (sqrt((ba * ba) + ca) - ba) - rad.roff; + int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0)); + float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index)))); + float3 param_33 = fg_rgba_1.xyz; + float3 _2348 = fromsRGB(param_33); + fg_rgba_1.x = _2348.x; + fg_rgba_1.y = _2348.y; + fg_rgba_1.z = _2348.z; + float4 fg_k_2 = fg_rgba_1 * area[k_10]; + rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2; } - cmd_ref.offset += 12u; + cmd_ref.offset += 48u; break; } case 8u: { + Alloc param_34 = cmd_alloc; + CmdRef param_35 = cmd_ref; + CmdImage fill_img = Cmd_Image_read(param_34, param_35, v_291); + uint2 param_36 = xy_uint; + CmdImage param_37 = fill_img; + spvUnsafeArray img; + img = fillImage(param_36, param_37, image_atlas); for (uint k_11 = 0u; k_11 < 8u; k_11++) + { + float4 fg_k_3 = img[k_11] * area[k_11]; + rgba[k_11] = (rgba[k_11] * (1.0 - fg_k_3.w)) + fg_k_3; + } + cmd_ref.offset += 12u; + break; + } + case 9u: + { + for (uint k_12 = 0u; k_12 < 8u; k_12++) { uint d_2 = min(clip_depth, 127u); - float4 param_34 = float4(rgba[k_11]); - uint _2184 = packsRGB(param_34); - blend_stack[d_2][k_11] = _2184; - rgba[k_11] = float4(0.0); + float4 param_38 = float4(rgba[k_12]); + uint _2454 = packsRGB(param_38); + blend_stack[d_2][k_12] = _2454; + rgba[k_12] = float4(0.0); } clip_depth++; cmd_ref.offset += 4u; break; } - case 9u: + case 10u: { - Alloc param_35 = cmd_alloc; - CmdRef param_36 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_35, param_36, v_278); + Alloc param_39 = cmd_alloc; + CmdRef param_40 = cmd_ref; + CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40, v_291); uint blend_mode = end_clip.blend >> uint(8); uint comp_mode = end_clip.blend & 255u; clip_depth--; - for (uint k_12 = 0u; k_12 < 8u; k_12++) + for (uint k_13 = 0u; k_13 < 8u; k_13++) { uint d_3 = min(clip_depth, 127u); - uint param_37 = blend_stack[d_3][k_12]; - float4 bg = unpacksRGB(param_37); - float4 fg_1 = rgba[k_12] * area[k_12]; - float3 param_38 = bg.xyz; - float3 param_39 = fg_1.xyz; - uint param_40 = blend_mode; - float3 blend = mix_blend(param_38, param_39, param_40); - float4 _2251 = fg_1; - float _2255 = fg_1.w; - float3 _2262 = mix(_2251.xyz, blend, float3(float((_2255 * bg.w) > 0.0))); - fg_1.x = _2262.x; - fg_1.y = _2262.y; - fg_1.z = _2262.z; - float3 param_41 = bg.xyz; - float3 param_42 = fg_1.xyz; - float param_43 = bg.w; - float param_44 = fg_1.w; - uint param_45 = comp_mode; - rgba[k_12] = mix_compose(param_41, param_42, param_43, param_44, param_45); + uint param_41 = blend_stack[d_3][k_13]; + float4 bg = unpacksRGB(param_41); + float4 fg_1 = rgba[k_13] * area[k_13]; + float3 param_42 = bg.xyz; + float3 param_43 = fg_1.xyz; + uint param_44 = blend_mode; + float3 blend = mix_blend(param_42, param_43, param_44); + float4 _2521 = fg_1; + float _2525 = fg_1.w; + float3 _2532 = mix(_2521.xyz, blend, float3(float((_2525 * bg.w) > 0.0))); + fg_1.x = _2532.x; + fg_1.y = _2532.y; + fg_1.z = _2532.z; + float3 param_45 = bg.xyz; + float3 param_46 = fg_1.xyz; + float param_47 = bg.w; + float param_48 = fg_1.w; + uint param_49 = comp_mode; + rgba[k_13] = mix_compose(param_45, param_46, param_47, param_48, param_49); } cmd_ref.offset += 8u; break; } - case 10u: + case 11u: { - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - cmd_ref = CmdRef{ Cmd_Jump_read(param_46, param_47, v_278).new_ref }; + Alloc param_50 = cmd_alloc; + CmdRef param_51 = cmd_ref; + cmd_ref = CmdRef{ Cmd_Jump_read(param_50, param_51, v_291).new_ref }; cmd_alloc.offset = cmd_ref.offset; break; } @@ -1203,9 +1300,9 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_48 = i_1; - float3 param_49 = rgba[i_1].xyz; - image.write(float4(tosRGB(param_49), rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_48)))); + uint param_52 = i_1; + float3 param_53 = rgba[i_1].xyz; + image.write(float4(tosRGB(param_53), rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_52)))); } } diff --git a/piet-gpu/shader/gen/kernel4.spv b/piet-gpu/shader/gen/kernel4.spv index 91272da..7061263 100644 Binary files a/piet-gpu/shader/gen/kernel4.spv and b/piet-gpu/shader/gen/kernel4.spv differ diff --git a/piet-gpu/shader/gen/kernel4_gray.dxil b/piet-gpu/shader/gen/kernel4_gray.dxil index 18c4b7e..a594d50 100644 Binary files a/piet-gpu/shader/gen/kernel4_gray.dxil and b/piet-gpu/shader/gen/kernel4_gray.dxil differ diff --git a/piet-gpu/shader/gen/kernel4_gray.hlsl b/piet-gpu/shader/gen/kernel4_gray.hlsl index de95771..019a73c 100644 --- a/piet-gpu/shader/gen/kernel4_gray.hlsl +++ b/piet-gpu/shader/gen/kernel4_gray.hlsl @@ -48,6 +48,21 @@ struct CmdLinGrad float line_c; }; +struct CmdRadGradRef +{ + uint offset; +}; + +struct CmdRadGrad +{ + uint index; + float4 mat; + float2 xlat; + float2 c1; + float ra; + float roff; +}; + struct CmdImageRef { uint offset; @@ -146,8 +161,8 @@ struct Config static const uint3 gl_WorkGroupSize = uint3(8u, 4u, 1u); -RWByteAddressBuffer _278 : register(u0, space0); -ByteAddressBuffer _1521 : register(t1, space0); +RWByteAddressBuffer _291 : register(u0, space0); +ByteAddressBuffer _1666 : register(t1, space0); RWTexture2D image_atlas : register(u3, space0); RWTexture2D gradients : register(u4, space0); RWTexture2D image : register(u2, space0); @@ -174,8 +189,8 @@ float4 spvUnpackUnorm4x8(uint value) Alloc slice_mem(Alloc a, uint offset, uint size) { - Alloc _291 = { a.offset + offset }; - return _291; + Alloc _304 = { a.offset + offset }; + return _304; } bool touch_mem(Alloc alloc, uint offset) @@ -191,7 +206,7 @@ uint read_mem(Alloc alloc, uint offset) { return 0u; } - uint v = _278.Load(offset * 4 + 8); + uint v = _291.Load(offset * 4 + 8); return v; } @@ -200,8 +215,8 @@ CmdTag Cmd_tag(Alloc a, CmdRef ref) Alloc param = a; uint param_1 = ref.offset >> uint(2); uint tag_and_flags = read_mem(param, param_1); - CmdTag _525 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; - return _525; + CmdTag _663 = { tag_and_flags & 65535u, tag_and_flags >> uint(16) }; + return _663; } CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) @@ -221,9 +236,9 @@ CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) { - CmdStrokeRef _542 = { ref.offset + 4u }; + CmdStrokeRef _679 = { ref.offset + 4u }; Alloc param = a; - CmdStrokeRef param_1 = _542; + CmdStrokeRef param_1 = _679; return CmdStroke_read(param, param_1); } @@ -259,8 +274,8 @@ TileSeg TileSeg_read(Alloc a, TileSegRef ref) s.origin = float2(asfloat(raw0), asfloat(raw1)); s._vector = float2(asfloat(raw2), asfloat(raw3)); s.y_edge = asfloat(raw4); - TileSegRef _675 = { raw5 }; - s.next = _675; + TileSegRef _820 = { raw5 }; + s.next = _820; return s; } @@ -286,9 +301,9 @@ CmdFill CmdFill_read(Alloc a, CmdFillRef ref) CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) { - CmdFillRef _532 = { ref.offset + 4u }; + CmdFillRef _669 = { ref.offset + 4u }; Alloc param = a; - CmdFillRef param_1 = _532; + CmdFillRef param_1 = _669; return CmdFill_read(param, param_1); } @@ -305,9 +320,9 @@ CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) { - CmdAlphaRef _552 = { ref.offset + 4u }; + CmdAlphaRef _689 = { ref.offset + 4u }; Alloc param = a; - CmdAlphaRef param_1 = _552; + CmdAlphaRef param_1 = _689; return CmdAlpha_read(param, param_1); } @@ -324,9 +339,9 @@ CmdColor CmdColor_read(Alloc a, CmdColorRef ref) CmdColor Cmd_Color_read(Alloc a, CmdRef ref) { - CmdColorRef _562 = { ref.offset + 4u }; + CmdColorRef _699 = { ref.offset + 4u }; Alloc param = a; - CmdColorRef param_1 = _562; + CmdColorRef param_1 = _699; return CmdColor_read(param, param_1); } @@ -370,12 +385,66 @@ CmdLinGrad CmdLinGrad_read(Alloc a, CmdLinGradRef ref) CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) { - CmdLinGradRef _572 = { ref.offset + 4u }; + CmdLinGradRef _709 = { ref.offset + 4u }; Alloc param = a; - CmdLinGradRef param_1 = _572; + CmdLinGradRef param_1 = _709; return CmdLinGrad_read(param, param_1); } +CmdRadGrad CmdRadGrad_read(Alloc a, CmdRadGradRef ref) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3); + Alloc param_4 = a; + uint param_5 = ix + 2u; + uint raw2 = read_mem(param_4, param_5); + Alloc param_6 = a; + uint param_7 = ix + 3u; + uint raw3 = read_mem(param_6, param_7); + Alloc param_8 = a; + uint param_9 = ix + 4u; + uint raw4 = read_mem(param_8, param_9); + Alloc param_10 = a; + uint param_11 = ix + 5u; + uint raw5 = read_mem(param_10, param_11); + Alloc param_12 = a; + uint param_13 = ix + 6u; + uint raw6 = read_mem(param_12, param_13); + Alloc param_14 = a; + uint param_15 = ix + 7u; + uint raw7 = read_mem(param_14, param_15); + Alloc param_16 = a; + uint param_17 = ix + 8u; + uint raw8 = read_mem(param_16, param_17); + Alloc param_18 = a; + uint param_19 = ix + 9u; + uint raw9 = read_mem(param_18, param_19); + Alloc param_20 = a; + uint param_21 = ix + 10u; + uint raw10 = read_mem(param_20, param_21); + CmdRadGrad s; + s.index = raw0; + s.mat = float4(asfloat(raw1), asfloat(raw2), asfloat(raw3), asfloat(raw4)); + s.xlat = float2(asfloat(raw5), asfloat(raw6)); + s.c1 = float2(asfloat(raw7), asfloat(raw8)); + s.ra = asfloat(raw9); + s.roff = asfloat(raw10); + return s; +} + +CmdRadGrad Cmd_RadGrad_read(Alloc a, CmdRef ref) +{ + CmdRadGradRef _719 = { ref.offset + 4u }; + Alloc param = a; + CmdRadGradRef param_1 = _719; + return CmdRadGrad_read(param, param_1); +} + CmdImage CmdImage_read(Alloc a, CmdImageRef ref) { uint ix = ref.offset >> uint(2); @@ -393,9 +462,9 @@ CmdImage CmdImage_read(Alloc a, CmdImageRef ref) CmdImage Cmd_Image_read(Alloc a, CmdRef ref) { - CmdImageRef _582 = { ref.offset + 4u }; + CmdImageRef _729 = { ref.offset + 4u }; Alloc param = a; - CmdImageRef param_1 = _582; + CmdImageRef param_1 = _729; return CmdImage_read(param, param_1); } @@ -408,10 +477,10 @@ void fillImage(out float4 spvReturnValue[8], uint2 xy, CmdImage cmd_img) int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas[uv]; float3 param_1 = fg_rgba.xyz; - float3 _1493 = fromsRGB(param_1); - fg_rgba.x = _1493.x; - fg_rgba.y = _1493.y; - fg_rgba.z = _1493.z; + float3 _1638 = fromsRGB(param_1); + fg_rgba.x = _1638.x; + fg_rgba.y = _1638.y; + fg_rgba.z = _1638.z; rgba[i] = fg_rgba; } spvReturnValue = rgba; @@ -445,9 +514,9 @@ CmdEndClip CmdEndClip_read(Alloc a, CmdEndClipRef ref) CmdEndClip Cmd_EndClip_read(Alloc a, CmdRef ref) { - CmdEndClipRef _592 = { ref.offset + 4u }; + CmdEndClipRef _739 = { ref.offset + 4u }; Alloc param = a; - CmdEndClipRef param_1 = _592; + CmdEndClipRef param_1 = _739; return CmdEndClip_read(param, param_1); } @@ -637,8 +706,8 @@ float3 set_lum(float3 c, float l) { float3 param = c; float3 param_1 = c + (l - lum(param)).xxx; - float3 _901 = clip_color(param_1); - return _901; + float3 _1046 = clip_color(param_1); + return _1046; } float3 mix_blend(float3 cb, float3 cs, uint mode) @@ -726,9 +795,9 @@ float3 mix_blend(float3 cb, float3 cs, uint mode) float3 param_20 = cb; float3 param_21 = cs; float param_22 = sat(param_20); - float3 _1192 = set_sat(param_21, param_22); + float3 _1337 = set_sat(param_21, param_22); float3 param_23 = cb; - float3 param_24 = _1192; + float3 param_24 = _1337; float param_25 = lum(param_23); b = set_lum(param_24, param_25); break; @@ -738,9 +807,9 @@ float3 mix_blend(float3 cb, float3 cs, uint mode) float3 param_26 = cs; float3 param_27 = cb; float param_28 = sat(param_26); - float3 _1206 = set_sat(param_27, param_28); + float3 _1351 = set_sat(param_27, param_28); float3 param_29 = cb; - float3 param_30 = _1206; + float3 param_30 = _1351; float param_31 = lum(param_29); b = set_lum(param_30, param_31); break; @@ -877,24 +946,24 @@ CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) { - CmdJumpRef _602 = { ref.offset + 4u }; + CmdJumpRef _749 = { ref.offset + 4u }; Alloc param = a; - CmdJumpRef param_1 = _602; + CmdJumpRef param_1 = _749; return CmdJump_read(param, param_1); } void comp_main() { - uint tile_ix = (gl_WorkGroupID.y * _1521.Load(8)) + gl_WorkGroupID.x; - Alloc _1536; - _1536.offset = _1521.Load(24); + uint tile_ix = (gl_WorkGroupID.y * _1666.Load(8)) + gl_WorkGroupID.x; + Alloc _1681; + _1681.offset = _1666.Load(24); Alloc param; - param.offset = _1536.offset; + param.offset = _1681.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); - CmdRef _1545 = { cmd_alloc.offset }; - CmdRef cmd_ref = _1545; + CmdRef _1690 = { cmd_alloc.offset }; + CmdRef cmd_ref = _1690; uint2 xy_uint = uint2(gl_LocalInvocationID.x + (16u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (16u * gl_WorkGroupID.y)); float2 xy = float2(xy_uint); float4 rgba[8]; @@ -903,7 +972,7 @@ void comp_main() rgba[i] = 0.0f.xxxx; } uint clip_depth = 0u; - bool mem_ok = _278.Load(4) == 0u; + bool mem_ok = _291.Load(4) == 0u; float df[8]; TileSegRef tile_seg_ref; float area[8]; @@ -928,8 +997,8 @@ void comp_main() { df[k] = 1000000000.0f; } - TileSegRef _1638 = { stroke.tile_ref }; - tile_seg_ref = _1638; + TileSegRef _1784 = { stroke.tile_ref }; + tile_seg_ref = _1784; do { uint param_7 = tile_seg_ref.offset; @@ -965,8 +1034,8 @@ void comp_main() { area[k_3] = float(fill.backdrop); } - TileSegRef _1758 = { fill.tile_ref }; - tile_seg_ref = _1758; + TileSegRef _1904 = { fill.tile_ref }; + tile_seg_ref = _1904; do { uint param_15 = tile_seg_ref.offset; @@ -1055,11 +1124,12 @@ void comp_main() int x = int(round(clamp(my_d, 0.0f, 1.0f) * 511.0f)); float4 fg_rgba = gradients[int2(x, int(lin.index))]; float3 param_29 = fg_rgba.xyz; - float3 _2092 = fromsRGB(param_29); - fg_rgba.x = _2092.x; - fg_rgba.y = _2092.y; - fg_rgba.z = _2092.z; - rgba[k_9] = fg_rgba; + float3 _2238 = fromsRGB(param_29); + fg_rgba.x = _2238.x; + fg_rgba.y = _2238.y; + fg_rgba.z = _2238.z; + float4 fg_k_1 = fg_rgba * area[k_9]; + rgba[k_9] = (rgba[k_9] * (1.0f - fg_k_1.w)) + fg_k_1; } cmd_ref.offset += 20u; break; @@ -1068,74 +1138,100 @@ void comp_main() { Alloc param_30 = cmd_alloc; CmdRef param_31 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_30, param_31); - uint2 param_32 = xy_uint; - CmdImage param_33 = fill_img; - float4 _2121[8]; - fillImage(_2121, param_32, param_33); - float4 img[8] = _2121; + CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31); for (uint k_10 = 0u; k_10 < 8u; k_10++) { - float4 fg_k_1 = img[k_10] * area[k_10]; - rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_1.w)) + fg_k_1; + uint param_32 = k_10; + float2 my_xy_1 = xy + float2(chunk_offset(param_32)); + my_xy_1 = ((rad.mat.xz * my_xy_1.x) + (rad.mat.yw * my_xy_1.y)) - rad.xlat; + float ba = dot(my_xy_1, rad.c1); + float ca = rad.ra * dot(my_xy_1, my_xy_1); + float t_2 = (sqrt((ba * ba) + ca) - ba) - rad.roff; + int x_1 = int(round(clamp(t_2, 0.0f, 1.0f) * 511.0f)); + float4 fg_rgba_1 = gradients[int2(x_1, int(rad.index))]; + float3 param_33 = fg_rgba_1.xyz; + float3 _2348 = fromsRGB(param_33); + fg_rgba_1.x = _2348.x; + fg_rgba_1.y = _2348.y; + fg_rgba_1.z = _2348.z; + float4 fg_k_2 = fg_rgba_1 * area[k_10]; + rgba[k_10] = (rgba[k_10] * (1.0f - fg_k_2.w)) + fg_k_2; } - cmd_ref.offset += 12u; + cmd_ref.offset += 48u; break; } case 8u: { + Alloc param_34 = cmd_alloc; + CmdRef param_35 = cmd_ref; + CmdImage fill_img = Cmd_Image_read(param_34, param_35); + uint2 param_36 = xy_uint; + CmdImage param_37 = fill_img; + float4 _2391[8]; + fillImage(_2391, param_36, param_37); + float4 img[8] = _2391; for (uint k_11 = 0u; k_11 < 8u; k_11++) + { + float4 fg_k_3 = img[k_11] * area[k_11]; + rgba[k_11] = (rgba[k_11] * (1.0f - fg_k_3.w)) + fg_k_3; + } + cmd_ref.offset += 12u; + break; + } + case 9u: + { + for (uint k_12 = 0u; k_12 < 8u; k_12++) { uint d_2 = min(clip_depth, 127u); - float4 param_34 = float4(rgba[k_11]); - uint _2184 = packsRGB(param_34); - blend_stack[d_2][k_11] = _2184; - rgba[k_11] = 0.0f.xxxx; + float4 param_38 = float4(rgba[k_12]); + uint _2454 = packsRGB(param_38); + blend_stack[d_2][k_12] = _2454; + rgba[k_12] = 0.0f.xxxx; } clip_depth++; cmd_ref.offset += 4u; break; } - case 9u: + case 10u: { - Alloc param_35 = cmd_alloc; - CmdRef param_36 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_35, param_36); + Alloc param_39 = cmd_alloc; + CmdRef param_40 = cmd_ref; + CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40); uint blend_mode = end_clip.blend >> uint(8); uint comp_mode = end_clip.blend & 255u; clip_depth--; - for (uint k_12 = 0u; k_12 < 8u; k_12++) + for (uint k_13 = 0u; k_13 < 8u; k_13++) { uint d_3 = min(clip_depth, 127u); - uint param_37 = blend_stack[d_3][k_12]; - float4 bg = unpacksRGB(param_37); - float4 fg_1 = rgba[k_12] * area[k_12]; - float3 param_38 = bg.xyz; - float3 param_39 = fg_1.xyz; - uint param_40 = blend_mode; - float3 blend = mix_blend(param_38, param_39, param_40); - float4 _2251 = fg_1; - float _2255 = fg_1.w; - float3 _2262 = lerp(_2251.xyz, blend, float((_2255 * bg.w) > 0.0f).xxx); - fg_1.x = _2262.x; - fg_1.y = _2262.y; - fg_1.z = _2262.z; - float3 param_41 = bg.xyz; - float3 param_42 = fg_1.xyz; - float param_43 = bg.w; - float param_44 = fg_1.w; - uint param_45 = comp_mode; - rgba[k_12] = mix_compose(param_41, param_42, param_43, param_44, param_45); + uint param_41 = blend_stack[d_3][k_13]; + float4 bg = unpacksRGB(param_41); + float4 fg_1 = rgba[k_13] * area[k_13]; + float3 param_42 = bg.xyz; + float3 param_43 = fg_1.xyz; + uint param_44 = blend_mode; + float3 blend = mix_blend(param_42, param_43, param_44); + float4 _2521 = fg_1; + float _2525 = fg_1.w; + float3 _2532 = lerp(_2521.xyz, blend, float((_2525 * bg.w) > 0.0f).xxx); + fg_1.x = _2532.x; + fg_1.y = _2532.y; + fg_1.z = _2532.z; + float3 param_45 = bg.xyz; + float3 param_46 = fg_1.xyz; + float param_47 = bg.w; + float param_48 = fg_1.w; + uint param_49 = comp_mode; + rgba[k_13] = mix_compose(param_45, param_46, param_47, param_48, param_49); } cmd_ref.offset += 8u; break; } - case 10u: + case 11u: { - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - CmdRef _2299 = { Cmd_Jump_read(param_46, param_47).new_ref }; - cmd_ref = _2299; + Alloc param_50 = cmd_alloc; + CmdRef param_51 = cmd_ref; + CmdRef _2569 = { Cmd_Jump_read(param_50, param_51).new_ref }; + cmd_ref = _2569; cmd_alloc.offset = cmd_ref.offset; break; } @@ -1143,8 +1239,8 @@ void comp_main() } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_48 = i_1; - image[int2(xy_uint + chunk_offset(param_48))] = rgba[i_1].w.x; + uint param_52 = i_1; + image[int2(xy_uint + chunk_offset(param_52))] = rgba[i_1].w.x; } } diff --git a/piet-gpu/shader/gen/kernel4_gray.msl b/piet-gpu/shader/gen/kernel4_gray.msl index 5128e99..6402c6f 100644 --- a/piet-gpu/shader/gen/kernel4_gray.msl +++ b/piet-gpu/shader/gen/kernel4_gray.msl @@ -94,6 +94,21 @@ struct CmdLinGrad float line_c; }; +struct CmdRadGradRef +{ + uint offset; +}; + +struct CmdRadGrad +{ + uint index; + float4 mat; + float2 xlat; + float2 c1; + float ra; + float roff; +}; + struct CmdImageRef { uint offset; @@ -222,7 +237,7 @@ bool touch_mem(thread const Alloc& alloc, thread const uint& offset) } static inline __attribute__((always_inline)) -uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_278) +uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memory& v_291) { Alloc param = alloc; uint param_1 = offset; @@ -230,29 +245,29 @@ uint read_mem(thread const Alloc& alloc, thread const uint& offset, device Memor { return 0u; } - uint v = v_278.memory[offset]; + uint v = v_291.memory[offset]; return v; } static inline __attribute__((always_inline)) -CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdTag Cmd_tag(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; uint param_1 = ref.offset >> uint(2); - uint tag_and_flags = read_mem(param, param_1, v_278); + uint tag_and_flags = read_mem(param, param_1, v_291); return CmdTag{ tag_and_flags & 65535u, tag_and_flags >> uint(16) }; } static inline __attribute__((always_inline)) -CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_278) +CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); CmdStroke s; s.tile_ref = raw0; s.half_width = as_type(raw1); @@ -260,11 +275,11 @@ CmdStroke CmdStroke_read(thread const Alloc& a, thread const CmdStrokeRef& ref, } static inline __attribute__((always_inline)) -CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdStroke Cmd_Stroke_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdStrokeRef param_1 = CmdStrokeRef{ ref.offset + 4u }; - return CmdStroke_read(param, param_1, v_278); + return CmdStroke_read(param, param_1, v_291); } static inline __attribute__((always_inline)) @@ -276,27 +291,27 @@ Alloc new_alloc(thread const uint& offset, thread const uint& size, thread const } static inline __attribute__((always_inline)) -TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_278) +TileSeg TileSeg_read(thread const Alloc& a, thread const TileSegRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_278); + uint raw2 = read_mem(param_4, param_5, v_291); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_278); + uint raw3 = read_mem(param_6, param_7, v_291); Alloc param_8 = a; uint param_9 = ix + 4u; - uint raw4 = read_mem(param_8, param_9, v_278); + uint raw4 = read_mem(param_8, param_9, v_291); Alloc param_10 = a; uint param_11 = ix + 5u; - uint raw5 = read_mem(param_10, param_11, v_278); + uint raw5 = read_mem(param_10, param_11, v_291); TileSeg s; s.origin = float2(as_type(raw0), as_type(raw1)); s.vector = float2(as_type(raw2), as_type(raw3)); @@ -312,15 +327,15 @@ uint2 chunk_offset(thread const uint& i) } static inline __attribute__((always_inline)) -CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_278) +CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); CmdFill s; s.tile_ref = raw0; s.backdrop = int(raw1); @@ -328,51 +343,51 @@ CmdFill CmdFill_read(thread const Alloc& a, thread const CmdFillRef& ref, device } static inline __attribute__((always_inline)) -CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdFill Cmd_Fill_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdFillRef param_1 = CmdFillRef{ ref.offset + 4u }; - return CmdFill_read(param, param_1, v_278); + return CmdFill_read(param, param_1, v_291); } static inline __attribute__((always_inline)) -CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_278) +CmdAlpha CmdAlpha_read(thread const Alloc& a, thread const CmdAlphaRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); CmdAlpha s; s.alpha = as_type(raw0); return s; } static inline __attribute__((always_inline)) -CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdAlpha Cmd_Alpha_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdAlphaRef param_1 = CmdAlphaRef{ ref.offset + 4u }; - return CmdAlpha_read(param, param_1, v_278); + return CmdAlpha_read(param, param_1, v_291); } static inline __attribute__((always_inline)) -CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_278) +CmdColor CmdColor_read(thread const Alloc& a, thread const CmdColorRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); CmdColor s; s.rgba_color = raw0; return s; } static inline __attribute__((always_inline)) -CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdColor Cmd_Color_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdColorRef param_1 = CmdColorRef{ ref.offset + 4u }; - return CmdColor_read(param, param_1, v_278); + return CmdColor_read(param, param_1, v_291); } static inline __attribute__((always_inline)) @@ -393,21 +408,21 @@ float4 unpacksRGB(thread const uint& srgba) } static inline __attribute__((always_inline)) -CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_278) +CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); Alloc param_4 = a; uint param_5 = ix + 2u; - uint raw2 = read_mem(param_4, param_5, v_278); + uint raw2 = read_mem(param_4, param_5, v_291); Alloc param_6 = a; uint param_7 = ix + 3u; - uint raw3 = read_mem(param_6, param_7, v_278); + uint raw3 = read_mem(param_6, param_7, v_291); CmdLinGrad s; s.index = raw0; s.line_x = as_type(raw1); @@ -417,23 +432,78 @@ CmdLinGrad CmdLinGrad_read(thread const Alloc& a, thread const CmdLinGradRef& re } static inline __attribute__((always_inline)) -CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdLinGrad Cmd_LinGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdLinGradRef param_1 = CmdLinGradRef{ ref.offset + 4u }; - return CmdLinGrad_read(param, param_1, v_278); + return CmdLinGrad_read(param, param_1, v_291); } static inline __attribute__((always_inline)) -CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_278) +CmdRadGrad CmdRadGrad_read(thread const Alloc& a, thread const CmdRadGradRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); Alloc param_2 = a; uint param_3 = ix + 1u; - uint raw1 = read_mem(param_2, param_3, v_278); + uint raw1 = read_mem(param_2, param_3, v_291); + Alloc param_4 = a; + uint param_5 = ix + 2u; + uint raw2 = read_mem(param_4, param_5, v_291); + Alloc param_6 = a; + uint param_7 = ix + 3u; + uint raw3 = read_mem(param_6, param_7, v_291); + Alloc param_8 = a; + uint param_9 = ix + 4u; + uint raw4 = read_mem(param_8, param_9, v_291); + Alloc param_10 = a; + uint param_11 = ix + 5u; + uint raw5 = read_mem(param_10, param_11, v_291); + Alloc param_12 = a; + uint param_13 = ix + 6u; + uint raw6 = read_mem(param_12, param_13, v_291); + Alloc param_14 = a; + uint param_15 = ix + 7u; + uint raw7 = read_mem(param_14, param_15, v_291); + Alloc param_16 = a; + uint param_17 = ix + 8u; + uint raw8 = read_mem(param_16, param_17, v_291); + Alloc param_18 = a; + uint param_19 = ix + 9u; + uint raw9 = read_mem(param_18, param_19, v_291); + Alloc param_20 = a; + uint param_21 = ix + 10u; + uint raw10 = read_mem(param_20, param_21, v_291); + CmdRadGrad s; + s.index = raw0; + s.mat = float4(as_type(raw1), as_type(raw2), as_type(raw3), as_type(raw4)); + s.xlat = float2(as_type(raw5), as_type(raw6)); + s.c1 = float2(as_type(raw7), as_type(raw8)); + s.ra = as_type(raw9); + s.roff = as_type(raw10); + return s; +} + +static inline __attribute__((always_inline)) +CmdRadGrad Cmd_RadGrad_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) +{ + Alloc param = a; + CmdRadGradRef param_1 = CmdRadGradRef{ ref.offset + 4u }; + return CmdRadGrad_read(param, param_1, v_291); +} + +static inline __attribute__((always_inline)) +CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, device Memory& v_291) +{ + uint ix = ref.offset >> uint(2); + Alloc param = a; + uint param_1 = ix + 0u; + uint raw0 = read_mem(param, param_1, v_291); + Alloc param_2 = a; + uint param_3 = ix + 1u; + uint raw1 = read_mem(param_2, param_3, v_291); CmdImage s; s.index = raw0; s.offset = int2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16); @@ -441,11 +511,11 @@ CmdImage CmdImage_read(thread const Alloc& a, thread const CmdImageRef& ref, dev } static inline __attribute__((always_inline)) -CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdImage Cmd_Image_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdImageRef param_1 = CmdImageRef{ ref.offset + 4u }; - return CmdImage_read(param, param_1, v_278); + return CmdImage_read(param, param_1, v_291); } static inline __attribute__((always_inline)) @@ -458,10 +528,10 @@ spvUnsafeArray fillImage(thread const uint2& xy, thread const CmdImag int2 uv = int2(xy + chunk_offset(param)) + cmd_img.offset; float4 fg_rgba = image_atlas.read(uint2(uv)); float3 param_1 = fg_rgba.xyz; - float3 _1493 = fromsRGB(param_1); - fg_rgba.x = _1493.x; - fg_rgba.y = _1493.y; - fg_rgba.z = _1493.z; + float3 _1638 = fromsRGB(param_1); + fg_rgba.x = _1638.x; + fg_rgba.y = _1638.y; + fg_rgba.z = _1638.z; rgba[i] = fg_rgba; } return rgba; @@ -485,23 +555,23 @@ uint packsRGB(thread float4& rgba) } static inline __attribute__((always_inline)) -CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_278) +CmdEndClip CmdEndClip_read(thread const Alloc& a, thread const CmdEndClipRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); CmdEndClip s; s.blend = raw0; return s; } static inline __attribute__((always_inline)) -CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdEndClip Cmd_EndClip_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdEndClipRef param_1 = CmdEndClipRef{ ref.offset + 4u }; - return CmdEndClip_read(param, param_1, v_278); + return CmdEndClip_read(param, param_1, v_291); } static inline __attribute__((always_inline)) @@ -701,8 +771,8 @@ float3 set_lum(thread const float3& c, thread const float& l) { float3 param = c; float3 param_1 = c + float3(l - lum(param)); - float3 _901 = clip_color(param_1); - return _901; + float3 _1046 = clip_color(param_1); + return _1046; } static inline __attribute__((always_inline)) @@ -791,9 +861,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_20 = cb; float3 param_21 = cs; float param_22 = sat(param_20); - float3 _1192 = set_sat(param_21, param_22); + float3 _1337 = set_sat(param_21, param_22); float3 param_23 = cb; - float3 param_24 = _1192; + float3 param_24 = _1337; float param_25 = lum(param_23); b = set_lum(param_24, param_25); break; @@ -803,9 +873,9 @@ float3 mix_blend(thread const float3& cb, thread const float3& cs, thread const float3 param_26 = cs; float3 param_27 = cb; float param_28 = sat(param_26); - float3 _1206 = set_sat(param_27, param_28); + float3 _1351 = set_sat(param_27, param_28); float3 param_29 = cb; - float3 param_30 = _1206; + float3 param_30 = _1351; float param_31 = lum(param_29); b = set_lum(param_30, param_31); break; @@ -931,30 +1001,30 @@ float4 mix_compose(thread const float3& cb, thread const float3& cs, thread cons } static inline __attribute__((always_inline)) -CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_278) +CmdJump CmdJump_read(thread const Alloc& a, thread const CmdJumpRef& ref, device Memory& v_291) { uint ix = ref.offset >> uint(2); Alloc param = a; uint param_1 = ix + 0u; - uint raw0 = read_mem(param, param_1, v_278); + uint raw0 = read_mem(param, param_1, v_291); CmdJump s; s.new_ref = raw0; return s; } static inline __attribute__((always_inline)) -CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_278) +CmdJump Cmd_Jump_read(thread const Alloc& a, thread const CmdRef& ref, device Memory& v_291) { Alloc param = a; CmdJumpRef param_1 = CmdJumpRef{ ref.offset + 4u }; - return CmdJump_read(param, param_1, v_278); + return CmdJump_read(param, param_1, v_291); } -kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1521 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +kernel void main0(device Memory& v_291 [[buffer(0)]], const device ConfigBuf& _1666 [[buffer(1)]], texture2d image [[texture(2)]], texture2d image_atlas [[texture(3)]], texture2d gradients [[texture(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { - uint tile_ix = (gl_WorkGroupID.y * _1521.conf.width_in_tiles) + gl_WorkGroupID.x; + uint tile_ix = (gl_WorkGroupID.y * _1666.conf.width_in_tiles) + gl_WorkGroupID.x; Alloc param; - param.offset = _1521.conf.ptcl_alloc.offset; + param.offset = _1666.conf.ptcl_alloc.offset; uint param_1 = tile_ix * 1024u; uint param_2 = 1024u; Alloc cmd_alloc = slice_mem(param, param_1, param_2); @@ -967,7 +1037,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 rgba[i] = float4(0.0); } uint clip_depth = 0u; - bool mem_ok = v_278.mem_error == 0u; + bool mem_ok = v_291.mem_error == 0u; spvUnsafeArray df; TileSegRef tile_seg_ref; spvUnsafeArray area; @@ -976,7 +1046,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_3 = cmd_alloc; CmdRef param_4 = cmd_ref; - uint tag = Cmd_tag(param_3, param_4, v_278).tag; + uint tag = Cmd_tag(param_3, param_4, v_291).tag; if (tag == 0u) { break; @@ -987,7 +1057,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_5 = cmd_alloc; CmdRef param_6 = cmd_ref; - CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_278); + CmdStroke stroke = Cmd_Stroke_read(param_5, param_6, v_291); for (uint k = 0u; k < 8u; k++) { df[k] = 1000000000.0; @@ -1000,7 +1070,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 bool param_9 = mem_ok; Alloc param_10 = new_alloc(param_7, param_8, param_9); TileSegRef param_11 = tile_seg_ref; - TileSeg seg = TileSeg_read(param_10, param_11, v_278); + TileSeg seg = TileSeg_read(param_10, param_11, v_291); float2 line_vec = seg.vector; for (uint k_1 = 0u; k_1 < 8u; k_1++) { @@ -1023,7 +1093,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_13 = cmd_alloc; CmdRef param_14 = cmd_ref; - CmdFill fill = Cmd_Fill_read(param_13, param_14, v_278); + CmdFill fill = Cmd_Fill_read(param_13, param_14, v_291); for (uint k_3 = 0u; k_3 < 8u; k_3++) { area[k_3] = float(fill.backdrop); @@ -1036,7 +1106,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 bool param_17 = mem_ok; Alloc param_18 = new_alloc(param_15, param_16, param_17); TileSegRef param_19 = tile_seg_ref; - TileSeg seg_1 = TileSeg_read(param_18, param_19, v_278); + TileSeg seg_1 = TileSeg_read(param_18, param_19, v_291); for (uint k_4 = 0u; k_4 < 8u; k_4++) { uint param_20 = k_4; @@ -1080,7 +1150,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_21 = cmd_alloc; CmdRef param_22 = cmd_ref; - CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_278); + CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22, v_291); for (uint k_7 = 0u; k_7 < 8u; k_7++) { area[k_7] = alpha.alpha; @@ -1092,7 +1162,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_23 = cmd_alloc; CmdRef param_24 = cmd_ref; - CmdColor color = Cmd_Color_read(param_23, param_24, v_278); + CmdColor color = Cmd_Color_read(param_23, param_24, v_291); uint param_25 = color.rgba_color; float4 fg = unpacksRGB(param_25); for (uint k_8 = 0u; k_8 < 8u; k_8++) @@ -1107,7 +1177,7 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_26 = cmd_alloc; CmdRef param_27 = cmd_ref; - CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_278); + CmdLinGrad lin = Cmd_LinGrad_read(param_26, param_27, v_291); float d_1 = ((lin.line_x * xy.x) + (lin.line_y * xy.y)) + lin.line_c; for (uint k_9 = 0u; k_9 < 8u; k_9++) { @@ -1117,11 +1187,12 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 int x = int(round(fast::clamp(my_d, 0.0, 1.0) * 511.0)); float4 fg_rgba = gradients.read(uint2(int2(x, int(lin.index)))); float3 param_29 = fg_rgba.xyz; - float3 _2092 = fromsRGB(param_29); - fg_rgba.x = _2092.x; - fg_rgba.y = _2092.y; - fg_rgba.z = _2092.z; - rgba[k_9] = fg_rgba; + float3 _2238 = fromsRGB(param_29); + fg_rgba.x = _2238.x; + fg_rgba.y = _2238.y; + fg_rgba.z = _2238.z; + float4 fg_k_1 = fg_rgba * area[k_9]; + rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1; } cmd_ref.offset += 20u; break; @@ -1130,72 +1201,98 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 { Alloc param_30 = cmd_alloc; CmdRef param_31 = cmd_ref; - CmdImage fill_img = Cmd_Image_read(param_30, param_31, v_278); - uint2 param_32 = xy_uint; - CmdImage param_33 = fill_img; - spvUnsafeArray img; - img = fillImage(param_32, param_33, image_atlas); + CmdRadGrad rad = Cmd_RadGrad_read(param_30, param_31, v_291); for (uint k_10 = 0u; k_10 < 8u; k_10++) { - float4 fg_k_1 = img[k_10] * area[k_10]; - rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_1.w)) + fg_k_1; + uint param_32 = k_10; + float2 my_xy_1 = xy + float2(chunk_offset(param_32)); + my_xy_1 = ((rad.mat.xz * my_xy_1.x) + (rad.mat.yw * my_xy_1.y)) - rad.xlat; + float ba = dot(my_xy_1, rad.c1); + float ca = rad.ra * dot(my_xy_1, my_xy_1); + float t_2 = (sqrt((ba * ba) + ca) - ba) - rad.roff; + int x_1 = int(round(fast::clamp(t_2, 0.0, 1.0) * 511.0)); + float4 fg_rgba_1 = gradients.read(uint2(int2(x_1, int(rad.index)))); + float3 param_33 = fg_rgba_1.xyz; + float3 _2348 = fromsRGB(param_33); + fg_rgba_1.x = _2348.x; + fg_rgba_1.y = _2348.y; + fg_rgba_1.z = _2348.z; + float4 fg_k_2 = fg_rgba_1 * area[k_10]; + rgba[k_10] = (rgba[k_10] * (1.0 - fg_k_2.w)) + fg_k_2; } - cmd_ref.offset += 12u; + cmd_ref.offset += 48u; break; } case 8u: { + Alloc param_34 = cmd_alloc; + CmdRef param_35 = cmd_ref; + CmdImage fill_img = Cmd_Image_read(param_34, param_35, v_291); + uint2 param_36 = xy_uint; + CmdImage param_37 = fill_img; + spvUnsafeArray img; + img = fillImage(param_36, param_37, image_atlas); for (uint k_11 = 0u; k_11 < 8u; k_11++) + { + float4 fg_k_3 = img[k_11] * area[k_11]; + rgba[k_11] = (rgba[k_11] * (1.0 - fg_k_3.w)) + fg_k_3; + } + cmd_ref.offset += 12u; + break; + } + case 9u: + { + for (uint k_12 = 0u; k_12 < 8u; k_12++) { uint d_2 = min(clip_depth, 127u); - float4 param_34 = float4(rgba[k_11]); - uint _2184 = packsRGB(param_34); - blend_stack[d_2][k_11] = _2184; - rgba[k_11] = float4(0.0); + float4 param_38 = float4(rgba[k_12]); + uint _2454 = packsRGB(param_38); + blend_stack[d_2][k_12] = _2454; + rgba[k_12] = float4(0.0); } clip_depth++; cmd_ref.offset += 4u; break; } - case 9u: + case 10u: { - Alloc param_35 = cmd_alloc; - CmdRef param_36 = cmd_ref; - CmdEndClip end_clip = Cmd_EndClip_read(param_35, param_36, v_278); + Alloc param_39 = cmd_alloc; + CmdRef param_40 = cmd_ref; + CmdEndClip end_clip = Cmd_EndClip_read(param_39, param_40, v_291); uint blend_mode = end_clip.blend >> uint(8); uint comp_mode = end_clip.blend & 255u; clip_depth--; - for (uint k_12 = 0u; k_12 < 8u; k_12++) + for (uint k_13 = 0u; k_13 < 8u; k_13++) { uint d_3 = min(clip_depth, 127u); - uint param_37 = blend_stack[d_3][k_12]; - float4 bg = unpacksRGB(param_37); - float4 fg_1 = rgba[k_12] * area[k_12]; - float3 param_38 = bg.xyz; - float3 param_39 = fg_1.xyz; - uint param_40 = blend_mode; - float3 blend = mix_blend(param_38, param_39, param_40); - float4 _2251 = fg_1; - float _2255 = fg_1.w; - float3 _2262 = mix(_2251.xyz, blend, float3(float((_2255 * bg.w) > 0.0))); - fg_1.x = _2262.x; - fg_1.y = _2262.y; - fg_1.z = _2262.z; - float3 param_41 = bg.xyz; - float3 param_42 = fg_1.xyz; - float param_43 = bg.w; - float param_44 = fg_1.w; - uint param_45 = comp_mode; - rgba[k_12] = mix_compose(param_41, param_42, param_43, param_44, param_45); + uint param_41 = blend_stack[d_3][k_13]; + float4 bg = unpacksRGB(param_41); + float4 fg_1 = rgba[k_13] * area[k_13]; + float3 param_42 = bg.xyz; + float3 param_43 = fg_1.xyz; + uint param_44 = blend_mode; + float3 blend = mix_blend(param_42, param_43, param_44); + float4 _2521 = fg_1; + float _2525 = fg_1.w; + float3 _2532 = mix(_2521.xyz, blend, float3(float((_2525 * bg.w) > 0.0))); + fg_1.x = _2532.x; + fg_1.y = _2532.y; + fg_1.z = _2532.z; + float3 param_45 = bg.xyz; + float3 param_46 = fg_1.xyz; + float param_47 = bg.w; + float param_48 = fg_1.w; + uint param_49 = comp_mode; + rgba[k_13] = mix_compose(param_45, param_46, param_47, param_48, param_49); } cmd_ref.offset += 8u; break; } - case 10u: + case 11u: { - Alloc param_46 = cmd_alloc; - CmdRef param_47 = cmd_ref; - cmd_ref = CmdRef{ Cmd_Jump_read(param_46, param_47, v_278).new_ref }; + Alloc param_50 = cmd_alloc; + CmdRef param_51 = cmd_ref; + cmd_ref = CmdRef{ Cmd_Jump_read(param_50, param_51, v_291).new_ref }; cmd_alloc.offset = cmd_ref.offset; break; } @@ -1203,8 +1300,8 @@ kernel void main0(device Memory& v_278 [[buffer(0)]], const device ConfigBuf& _1 } for (uint i_1 = 0u; i_1 < 8u; i_1++) { - uint param_48 = i_1; - image.write(float4(rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_48)))); + uint param_52 = i_1; + image.write(float4(rgba[i_1].w), uint2(int2(xy_uint + chunk_offset(param_52)))); } } diff --git a/piet-gpu/shader/gen/kernel4_gray.spv b/piet-gpu/shader/gen/kernel4_gray.spv index 791b76c..4633401 100644 Binary files a/piet-gpu/shader/gen/kernel4_gray.spv and b/piet-gpu/shader/gen/kernel4_gray.spv differ diff --git a/piet-gpu/shader/gen/tile_alloc.dxil b/piet-gpu/shader/gen/tile_alloc.dxil index 7759910..7b130e0 100644 Binary files a/piet-gpu/shader/gen/tile_alloc.dxil and b/piet-gpu/shader/gen/tile_alloc.dxil differ diff --git a/piet-gpu/shader/kernel4.comp b/piet-gpu/shader/kernel4.comp index a97715a..c49e2fa 100644 --- a/piet-gpu/shader/kernel4.comp +++ b/piet-gpu/shader/kernel4.comp @@ -192,10 +192,27 @@ void main() { int x = int(round(clamp(my_d, 0.0, 1.0) * float(GRADIENT_WIDTH - 1))); mediump vec4 fg_rgba = imageLoad(gradients, ivec2(x, int(lin.index))); fg_rgba.rgb = fromsRGB(fg_rgba.rgb); - rgba[k] = fg_rgba; + mediump vec4 fg_k = fg_rgba * area[k]; + rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k; } cmd_ref.offset += 4 + CmdLinGrad_size; break; + case Cmd_RadGrad: + CmdRadGrad rad = Cmd_RadGrad_read(cmd_alloc, cmd_ref); + for (uint k = 0; k < CHUNK; k++) { + vec2 my_xy = xy + vec2(chunk_offset(k)); + my_xy = rad.mat.xz * my_xy.x + rad.mat.yw * my_xy.y - rad.xlat; + float ba = dot(my_xy, rad.c1); + float ca = rad.ra * dot(my_xy, my_xy); + float t = sqrt(ba * ba + ca) - ba - rad.roff; + int x = int(round(clamp(t, 0.0, 1.0) * float(GRADIENT_WIDTH - 1))); + mediump vec4 fg_rgba = imageLoad(gradients, ivec2(x, int(rad.index))); + fg_rgba.rgb = fromsRGB(fg_rgba.rgb); + mediump vec4 fg_k = fg_rgba * area[k]; + rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k; + } + cmd_ref.offset += 4 + CmdRadGrad_size; + break; case Cmd_Image: CmdImage fill_img = Cmd_Image_read(cmd_alloc, cmd_ref); mediump vec4 img[CHUNK] = fillImage(xy_uint, fill_img); diff --git a/piet-gpu/shader/ptcl.h b/piet-gpu/shader/ptcl.h index 9b9b341..54dcc9e 100644 --- a/piet-gpu/shader/ptcl.h +++ b/piet-gpu/shader/ptcl.h @@ -18,6 +18,10 @@ struct CmdLinGradRef { uint offset; }; +struct CmdRadGradRef { + uint offset; +}; + struct CmdImageRef { uint offset; }; @@ -83,6 +87,21 @@ CmdLinGradRef CmdLinGrad_index(CmdLinGradRef ref, uint index) { return CmdLinGradRef(ref.offset + index * CmdLinGrad_size); } +struct CmdRadGrad { + uint index; + vec4 mat; + vec2 xlat; + vec2 c1; + float ra; + float roff; +}; + +#define CmdRadGrad_size 44 + +CmdRadGradRef CmdRadGrad_index(CmdRadGradRef ref, uint index) { + return CmdRadGradRef(ref.offset + index * CmdRadGrad_size); +} + struct CmdImage { uint index; ivec2 offset; @@ -131,11 +150,12 @@ CmdJumpRef CmdJump_index(CmdJumpRef ref, uint index) { #define Cmd_Alpha 4 #define Cmd_Color 5 #define Cmd_LinGrad 6 -#define Cmd_Image 7 -#define Cmd_BeginClip 8 -#define Cmd_EndClip 9 -#define Cmd_Jump 10 -#define Cmd_size 20 +#define Cmd_RadGrad 7 +#define Cmd_Image 8 +#define Cmd_BeginClip 9 +#define Cmd_EndClip 10 +#define Cmd_Jump 11 +#define Cmd_size 48 CmdRef Cmd_index(CmdRef ref, uint index) { return CmdRef(ref.offset + index * Cmd_size); @@ -213,6 +233,44 @@ void CmdLinGrad_write(Alloc a, CmdLinGradRef ref, CmdLinGrad s) { write_mem(a, ix + 3, floatBitsToUint(s.line_c)); } +CmdRadGrad CmdRadGrad_read(Alloc a, CmdRadGradRef ref) { + uint ix = ref.offset >> 2; + uint raw0 = read_mem(a, ix + 0); + uint raw1 = read_mem(a, ix + 1); + uint raw2 = read_mem(a, ix + 2); + uint raw3 = read_mem(a, ix + 3); + uint raw4 = read_mem(a, ix + 4); + uint raw5 = read_mem(a, ix + 5); + uint raw6 = read_mem(a, ix + 6); + uint raw7 = read_mem(a, ix + 7); + uint raw8 = read_mem(a, ix + 8); + uint raw9 = read_mem(a, ix + 9); + uint raw10 = read_mem(a, ix + 10); + CmdRadGrad s; + s.index = raw0; + s.mat = vec4(uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3), uintBitsToFloat(raw4)); + s.xlat = vec2(uintBitsToFloat(raw5), uintBitsToFloat(raw6)); + s.c1 = vec2(uintBitsToFloat(raw7), uintBitsToFloat(raw8)); + s.ra = uintBitsToFloat(raw9); + s.roff = uintBitsToFloat(raw10); + return s; +} + +void CmdRadGrad_write(Alloc a, CmdRadGradRef ref, CmdRadGrad s) { + uint ix = ref.offset >> 2; + write_mem(a, ix + 0, s.index); + write_mem(a, ix + 1, floatBitsToUint(s.mat.x)); + write_mem(a, ix + 2, floatBitsToUint(s.mat.y)); + write_mem(a, ix + 3, floatBitsToUint(s.mat.z)); + write_mem(a, ix + 4, floatBitsToUint(s.mat.w)); + write_mem(a, ix + 5, floatBitsToUint(s.xlat.x)); + write_mem(a, ix + 6, floatBitsToUint(s.xlat.y)); + write_mem(a, ix + 7, floatBitsToUint(s.c1.x)); + write_mem(a, ix + 8, floatBitsToUint(s.c1.y)); + write_mem(a, ix + 9, floatBitsToUint(s.ra)); + write_mem(a, ix + 10, floatBitsToUint(s.roff)); +} + CmdImage CmdImage_read(Alloc a, CmdImageRef ref) { uint ix = ref.offset >> 2; uint raw0 = read_mem(a, ix + 0); @@ -293,6 +351,10 @@ CmdLinGrad Cmd_LinGrad_read(Alloc a, CmdRef ref) { return CmdLinGrad_read(a, CmdLinGradRef(ref.offset + 4)); } +CmdRadGrad Cmd_RadGrad_read(Alloc a, CmdRef ref) { + return CmdRadGrad_read(a, CmdRadGradRef(ref.offset + 4)); +} + CmdImage Cmd_Image_read(Alloc a, CmdRef ref) { return CmdImage_read(a, CmdImageRef(ref.offset + 4)); } @@ -338,6 +400,11 @@ void Cmd_LinGrad_write(Alloc a, CmdRef ref, CmdLinGrad s) { CmdLinGrad_write(a, CmdLinGradRef(ref.offset + 4), s); } +void Cmd_RadGrad_write(Alloc a, CmdRef ref, CmdRadGrad s) { + write_mem(a, ref.offset >> 2, Cmd_RadGrad); + CmdRadGrad_write(a, CmdRadGradRef(ref.offset + 4), s); +} + void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s) { write_mem(a, ref.offset >> 2, Cmd_Image); CmdImage_write(a, CmdImageRef(ref.offset + 4), s); diff --git a/piet-gpu/src/encoder.rs b/piet-gpu/src/encoder.rs index 62c59c4..2f4b85e 100644 --- a/piet-gpu/src/encoder.rs +++ b/piet-gpu/src/encoder.rs @@ -62,6 +62,7 @@ const ANNOTATED_SIZE: usize = 40; // Tags for draw objects. See shader/drawtag.h for the authoritative source. const DRAWTAG_FILLCOLOR: u32 = 0x44; const DRAWTAG_FILLLINGRADIENT: u32 = 0x114; +const DRAWTAG_FILLRADGRADIENT: u32 = 0x2dc; const DRAWTAG_BEGINCLIP: u32 = 0x05; const DRAWTAG_ENDCLIP: u32 = 0x25; @@ -79,6 +80,16 @@ pub struct FillLinGradient { p1: [f32; 2], } +#[repr(C)] +#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] +pub struct FillRadGradient { + index: u32, + p0: [f32; 2], + p1: [f32; 2], + r0: f32, + r1: f32, +} + #[allow(unused)] #[repr(C)] #[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] @@ -123,6 +134,13 @@ impl Encoder { self.transform_stream.push(transform); } + // Swap the last two tags in the tag stream; used for transformed + // gradients. + pub fn swap_last_tags(&mut self) { + let len = self.tag_stream.len(); + self.tag_stream.swap(len - 1, len - 2); + } + // -1.0 means "fill" pub fn linewidth(&mut self, linewidth: f32) { self.tag_stream.push(0x40); @@ -147,6 +165,16 @@ impl Encoder { self.drawdata_stream.extend(bytemuck::bytes_of(&element)); } + + /// Encode a fill radial gradient draw object. + /// + /// This should be encoded after a path. + pub fn fill_rad_gradient(&mut self, index: u32, p0: [f32; 2], p1: [f32; 2], r0: f32, r1: f32) { + self.drawtag_stream.push(DRAWTAG_FILLRADGRADIENT); + let element = FillRadGradient { index, p0, p1, r0, r1 }; + self.drawdata_stream.extend(bytemuck::bytes_of(&element)); + } + /// Start a clip. pub fn begin_clip(&mut self, blend: Option) { self.drawtag_stream.push(DRAWTAG_BEGINCLIP); @@ -220,7 +248,7 @@ impl Encoder { alloc += n_drawobj * DRAW_BBOX_SIZE; let drawinfo_alloc = alloc; // TODO: not optimized; it can be accumulated during encoding or summed from drawtags - const MAX_DRAWINFO_SIZE: usize = 16; + const MAX_DRAWINFO_SIZE: usize = 44; alloc += n_drawobj * MAX_DRAWINFO_SIZE; let config = Config { diff --git a/piet-gpu/src/gradient.rs b/piet-gpu/src/gradient.rs index 20982e9..e655908 100644 --- a/piet-gpu/src/gradient.rs +++ b/piet-gpu/src/gradient.rs @@ -18,15 +18,29 @@ use std::collections::hash_map::{Entry, HashMap}; -use piet::{Color, FixedLinearGradient, GradientStop}; +use piet::kurbo::Point; +use piet::{Color, FixedLinearGradient, GradientStop, FixedRadialGradient}; + +/// Radial gradient compatible with COLRv1 spec +#[derive(Debug, Clone)] +pub struct Colrv1RadialGradient { + /// The center of the iner circle. + pub center0: Point, + /// The offset of the origin relative to the center. + pub center1: Point, + /// The radius of the inner circle. + pub radius0: f64, + /// The radius of the outer circle. + pub radius1: f64, + /// The stops. + pub stops: Vec, +} #[derive(Clone)] pub struct BakedGradient { ramp: Vec, } -/// This is basically the same type as scene::FillLinGradient, so could -/// potentially use that directly. #[derive(Clone)] pub struct LinearGradient { pub(crate) start: [f32; 2], @@ -34,6 +48,15 @@ pub struct LinearGradient { pub(crate) ramp_id: u32, } +#[derive(Clone)] +pub struct RadialGradient { + pub(crate) start: [f32; 2], + pub(crate) end: [f32; 2], + pub(crate) r0: f32, + pub(crate) r1: f32, + pub(crate) ramp_id: u32, +} + #[derive(Default)] pub struct RampCache { ramps: Vec, @@ -154,6 +177,28 @@ impl RampCache { } } + pub fn add_radial_gradient(&mut self, rad: &FixedRadialGradient) -> RadialGradient { + let ramp_id = self.add_ramp(&rad.stops); + RadialGradient { + ramp_id: ramp_id as u32, + start: crate::render_ctx::to_f32_2(rad.center + rad.origin_offset), + end: crate::render_ctx::to_f32_2(rad.center), + r0: 0.0, + r1: rad.radius as f32, + } + } + + pub fn add_radial_gradient_colrv1(&mut self, rad: &Colrv1RadialGradient) -> RadialGradient { + let ramp_id = self.add_ramp(&rad.stops); + RadialGradient { + ramp_id: ramp_id as u32, + start: crate::render_ctx::to_f32_2(rad.center0), + end: crate::render_ctx::to_f32_2(rad.center1), + r0: rad.radius0 as f32, + r1: rad.radius1 as f32, + } + } + /// Dump the contents of a gradient. This is for debugging. #[allow(unused)] pub(crate) fn dump_gradient(&self, lin: &LinearGradient) { diff --git a/piet-gpu/src/lib.rs b/piet-gpu/src/lib.rs index 249735a..475d723 100644 --- a/piet-gpu/src/lib.rs +++ b/piet-gpu/src/lib.rs @@ -12,6 +12,7 @@ use std::convert::TryInto; pub use blend::{Blend, BlendMode, CompositionMode}; pub use render_ctx::PietGpuRenderContext; +pub use gradient::Colrv1RadialGradient; use piet::kurbo::Vec2; use piet::{ImageFormat, RenderContext}; @@ -21,7 +22,7 @@ use piet_gpu_hal::{ ImageLayout, Pipeline, QueryPool, Session, }; -pub use pico_svg::PicoSvg; +use pico_svg::PicoSvg; use stages::{ClipBinding, ElementBinding, ElementCode}; use crate::stages::{ClipCode, Config, ElementStage}; diff --git a/piet-gpu/src/render_ctx.rs b/piet-gpu/src/render_ctx.rs index 024dd2b..dca03eb 100644 --- a/piet-gpu/src/render_ctx.rs +++ b/piet-gpu/src/render_ctx.rs @@ -13,7 +13,7 @@ use piet_gpu_hal::BufWrite; use piet_gpu_types::encoder::{Encode, Encoder}; use piet_gpu_types::scene::Element; -use crate::gradient::{LinearGradient, RampCache}; +use crate::gradient::{LinearGradient, RadialGradient, RampCache, Colrv1RadialGradient}; use crate::text::Font; pub use crate::text::{PietGpuText, PietGpuTextLayout, PietGpuTextLayoutBuilder}; use crate::Blend; @@ -50,6 +50,7 @@ pub struct PietGpuRenderContext { pub enum PietGpuBrush { Solid(u32), LinGradient(LinearGradient), + RadGradient(RadialGradient), } #[derive(Default)] @@ -187,6 +188,10 @@ impl RenderContext for PietGpuRenderContext { let lin = self.ramp_cache.add_linear_gradient(&lin); Ok(PietGpuBrush::LinGradient(lin)) } + FixedGradient::Radial(rad) => { + let rad = self.ramp_cache.add_radial_gradient(&rad); + Ok(PietGpuBrush::RadGradient(rad)) + } _ => todo!("don't do radial gradients yet"), } } @@ -338,6 +343,20 @@ impl PietGpuRenderContext { } } + pub fn radial_gradient_colrv1(&mut self, rad: &Colrv1RadialGradient) -> PietGpuBrush { + PietGpuBrush::RadGradient(self.ramp_cache.add_radial_gradient_colrv1(rad)) + } + + pub fn fill_transform(&mut self, shape: impl Shape, brush: &PietGpuBrush, transform: Affine) { + let path = shape.path_elements(TOLERANCE); + self.encode_linewidth(-1.0); + self.encode_path(path, true); + self.encode_transform(Transform::from_kurbo(transform)); + self.new_encoder.swap_last_tags(); + self.encode_brush(&brush); + self.encode_transform(Transform::from_kurbo(transform.inverse())); + } + fn encode_path(&mut self, path: impl Iterator, is_fill: bool) { if is_fill { self.encode_path_inner( @@ -420,6 +439,10 @@ impl PietGpuRenderContext { self.new_encoder .fill_lin_gradient(lin.ramp_id, lin.start, lin.end); } + PietGpuBrush::RadGradient(rad) => { + self.new_encoder + .fill_rad_gradient(rad.ramp_id, rad.start, rad.end, rad.r0, rad.r1); + } } } } diff --git a/piet-gpu/src/test_scenes.rs b/piet-gpu/src/test_scenes.rs index 350b9dd..cf5a50d 100644 --- a/piet-gpu/src/test_scenes.rs +++ b/piet-gpu/src/test_scenes.rs @@ -2,10 +2,10 @@ use rand::{Rng, RngCore}; -use crate::{Blend, BlendMode, CompositionMode, PietGpuRenderContext}; +use crate::{Blend, BlendMode, CompositionMode, PietGpuRenderContext, Colrv1RadialGradient}; use piet::kurbo::{Affine, BezPath, Circle, Line, Point, Rect, Shape}; use piet::{ - Color, FixedGradient, FixedLinearGradient, GradientStop, Text, TextAttribute, TextLayoutBuilder, + Color, FixedGradient, FixedRadialGradient, GradientStop, Text, TextAttribute, TextLayoutBuilder, }; use crate::{PicoSvg, RenderContext, Vec2}; @@ -21,13 +21,18 @@ pub fn render_blend_test(rc: &mut PietGpuRenderContext, i: usize, blend: Blend) rc.restore().unwrap(); } -pub fn render_svg(rc: &mut impl RenderContext, svg: &PicoSvg) { +pub fn render_svg(rc: &mut impl RenderContext, filename: &str, scale: f64) { + let xml_str = std::fs::read_to_string(filename).unwrap(); + let start = std::time::Instant::now(); + let svg = PicoSvg::load(&xml_str, scale).unwrap(); + println!("parsing time: {:?}", start.elapsed()); + let start = std::time::Instant::now(); svg.render(rc); println!("flattening and encoding time: {:?}", start.elapsed()); } -pub fn render_scene(rc: &mut impl RenderContext) { +pub fn render_scene(rc: &mut PietGpuRenderContext) { const WIDTH: usize = 2048; const HEIGHT: usize = 1536; let mut rng = rand::thread_rng(); @@ -137,7 +142,7 @@ fn render_alpha_test(rc: &mut impl RenderContext) { } #[allow(unused)] -fn render_gradient_test(rc: &mut impl RenderContext) { +fn render_gradient_test(rc: &mut PietGpuRenderContext) { let stops = vec![ GradientStop { color: Color::rgb8(0, 255, 0), @@ -148,14 +153,18 @@ fn render_gradient_test(rc: &mut impl RenderContext) { pos: 1.0, }, ]; - let lin = FixedLinearGradient { - start: Point::new(0.0, 100.0), - end: Point::new(0.0, 300.0), + let rad = Colrv1RadialGradient { + center0: Point::new(200.0, 200.0), + center1: Point::new(250.0, 200.0), + radius0: 50.0, + radius1: 100.0, stops, }; - let brush = FixedGradient::Linear(lin); + let brush = rc.radial_gradient_colrv1(&rad); + //let brush = FixedGradient::Radial(rad); //let brush = Color::rgb8(0, 128, 0); - rc.fill(Rect::new(100.0, 100.0, 300.0, 300.0), &brush); + let transform = Affine::new([1.0, 0.0, 0.0, 0.5, 0.0, 100.0]); + rc.fill_transform(Rect::new(100.0, 100.0, 300.0, 300.0), &brush, transform); } fn diamond(origin: Point) -> impl Shape { diff --git a/piet-scene/Cargo.toml b/piet-scene/Cargo.toml new file mode 100644 index 0000000..8706119 --- /dev/null +++ b/piet-scene/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "piet-scene" +version = "0.1.0" +license = "MIT/Apache-2.0" +edition = "2021" + +[dependencies] +bytemuck = { version = "1.7.2", features = ["derive"] } +smallvec = "1.8.0" diff --git a/piet-scene/src/brush/color.rs b/piet-scene/src/brush/color.rs new file mode 100644 index 0000000..d079210 --- /dev/null +++ b/piet-scene/src/brush/color.rs @@ -0,0 +1,62 @@ +// Copyright 2022 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] +pub struct Color { + pub r: u8, + pub g: u8, + pub b: u8, + pub a: u8, +} + +impl Color { + pub const fn rgb8(r: u8, g: u8, b: u8) -> Self { + Self { r, g, b, a: 255 } + } + + pub const fn rgba8(r: u8, g: u8, b: u8, a: u8) -> Self { + Self { r, g, b, a } + } + + pub fn rgb>(r: C, g: C, b: C) -> Self { + Self::rgb8( + (r.into() / 255.0) as u8, + (g.into() / 255.0) as u8, + (b.into() / 255.0) as u8, + ) + } + + pub fn rgba>(r: C, g: C, b: C, a: C) -> Self { + Self::rgba8( + (r.into() / 255.0) as u8, + (g.into() / 255.0) as u8, + (b.into() / 255.0) as u8, + (a.into() / 255.0) as u8, + ) + } + + pub fn pack(self) -> u32 { + (self.b as u32) << 24 | (self.g as u32) << 16 | (self.r as u32) << 8 | self.a as u32 + } + + pub fn to_premul_u32(self) -> u32 { + let a = self.a as f64 / 255.0; + let r = (self.r as f64 * a) as u32; + let g = (self.g as f64 * a) as u32; + let b = (self.b as f64 * a) as u32; + r | (g << 8) | (b << 16) | ((self.a as u32) << 24) + } +} diff --git a/piet-scene/src/brush/gradient.rs b/piet-scene/src/brush/gradient.rs new file mode 100644 index 0000000..51d558e --- /dev/null +++ b/piet-scene/src/brush/gradient.rs @@ -0,0 +1,87 @@ +// Copyright 2022 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +use super::color::Color; +use crate::geometry::Point; +use smallvec::SmallVec; +use std::hash::{Hash, Hasher}; + +#[derive(Copy, Clone, PartialOrd, Default, Debug)] +pub struct Stop { + pub offset: f32, + pub color: Color, +} + +impl Hash for Stop { + fn hash(&self, state: &mut H) { + self.offset.to_bits().hash(state); + self.color.hash(state); + } +} + +// Override PartialEq to use to_bits for the offset to match with the Hash impl +impl std::cmp::PartialEq for Stop { + fn eq(&self, other: &Self) -> bool { + self.offset.to_bits() == other.offset.to_bits() && self.color == other.color + } +} + +impl std::cmp::Eq for Stop {} + +pub type StopVec = SmallVec<[Stop; 4]>; + +#[derive(Copy, Clone, PartialEq, Debug)] +pub enum Extend { + Pad, + Repeat, + Reflect, +} + +#[derive(Copy, Clone, PartialEq, Debug)] +pub enum Space { + Object, + Global, +} + +#[derive(Clone, Debug)] +pub struct LinearGradient { + pub space: Space, + pub start: Point, + pub end: Point, + pub stops: StopVec, + pub extend: Extend, +} + +#[derive(Clone, Debug)] +pub struct RadialGradient { + pub space: Space, + pub center0: Point, + pub radius0: f32, + pub center1: Point, + pub radius1: f32, + pub stops: StopVec, + pub extend: Extend, +} + +#[derive(Clone, Debug)] +pub struct SweepGradient { + pub space: Space, + pub center: Point, + pub start_angle: f32, + pub end_angle: f32, + pub stops: StopVec, + pub extend: Extend, +} diff --git a/piet-scene/src/brush/image.rs b/piet-scene/src/brush/image.rs new file mode 100644 index 0000000..07157e7 --- /dev/null +++ b/piet-scene/src/brush/image.rs @@ -0,0 +1,96 @@ +// Copyright 2022 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +use std::result::Result; +use std::sync::atomic::{AtomicU64, Ordering}; +use std::sync::Arc; + +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum Format { + A8, + Rgba8, +} + +impl Format { + pub fn data_size(self, width: u32, height: u32) -> Option { + (width as usize) + .checked_mul(height as usize) + .and_then(|size| { + size.checked_mul(match self { + Self::A8 => 1, + Self::Rgba8 => 4, + }) + }) + } +} + +#[derive(Clone, Debug)] +pub struct Image(Arc); + +#[derive(Clone, Debug)] +struct Inner { + id: u64, + format: Format, + width: u32, + height: u32, + data: Vec, +} + +impl Image { + pub fn new( + format: Format, + width: u32, + height: u32, + mut data: Vec, + ) -> Result { + let data_size = format.data_size(width, height).ok_or(DataSizeError)?; + if data.len() < data_size { + return Err(DataSizeError); + } + data.truncate(data_size); + static ID: AtomicU64 = AtomicU64::new(1); + Ok(Self(Arc::new(Inner { + id: ID.fetch_add(1, Ordering::Relaxed), + format, + width, + height, + data, + }))) + } + + pub fn id(&self) -> u64 { + self.0.id + } + + pub fn format(&self) -> Format { + self.0.format + } + + pub fn width(&self) -> u32 { + self.0.width + } + + pub fn height(&self) -> u32 { + self.0.height + } + + pub fn data(&self) -> &[u8] { + &self.0.data + } +} + +#[derive(Clone, Debug)] +pub struct DataSizeError; diff --git a/piet-scene/src/brush/mod.rs b/piet-scene/src/brush/mod.rs new file mode 100644 index 0000000..9cde1fb --- /dev/null +++ b/piet-scene/src/brush/mod.rs @@ -0,0 +1,35 @@ +// Copyright 2022 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +mod color; +mod gradient; +mod image; + +pub use color::Color; +pub use gradient::*; +pub use image::*; + +use crate::resource::PersistentBrush; + +#[derive(Clone, Debug)] +pub enum Brush { + Solid(Color), + LinearGradient(LinearGradient), + RadialGradient(RadialGradient), + SweepGradient(SweepGradient), + Image(Image), + Persistent(PersistentBrush), +} diff --git a/piet-scene/src/geometry.rs b/piet-scene/src/geometry.rs new file mode 100644 index 0000000..a40cc40 --- /dev/null +++ b/piet-scene/src/geometry.rs @@ -0,0 +1,189 @@ +// Copyright 2022 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +use core::borrow::Borrow; +use core::hash::{Hash, Hasher}; + +/// Two dimensional point. +#[derive(Copy, Clone, PartialEq, PartialOrd, Default, Debug)] +#[repr(C)] +pub struct Point { + pub x: f32, + pub y: f32, +} + +impl Hash for Point { + fn hash(&self, state: &mut H) { + self.x.to_bits().hash(state); + self.y.to_bits().hash(state); + } +} + +impl Point { + pub const fn new(x: f32, y: f32) -> Self { + Self { x, y } + } + + pub fn transform(&self, affine: &Affine) -> Self { + Self { + x: self.x * affine.xx + self.y * affine.yx + affine.dx, + y: self.y * affine.yy + self.y * affine.xy + affine.dy, + } + } +} + +impl From<[f32; 2]> for Point { + fn from(value: [f32; 2]) -> Self { + Self::new(value[0], value[1]) + } +} + +impl From<(f32, f32)> for Point { + fn from(value: (f32, f32)) -> Self { + Self::new(value.0, value.1) + } +} + +/// Affine transformation matrix. +#[derive(Copy, Clone, Debug)] +#[repr(C)] +pub struct Affine { + pub xx: f32, + pub yx: f32, + pub xy: f32, + pub yy: f32, + pub dx: f32, + pub dy: f32, +} + +impl Affine { + pub const fn new(elements: &[f32; 6]) -> Self { + Self { + xx: elements[0], + yx: elements[1], + xy: elements[2], + yy: elements[3], + dx: elements[4], + dy: elements[5], + } + } + + /// Creates a new affine transform representing the specified scale along the + /// x and y axes. + pub fn scale(x: f32, y: f32) -> Self { + Self::new(&[x, 0., 0., y, 0., 0.]) + } + + /// Creates a new affine transform representing the specified translation. + pub fn translate(x: f32, y: f32) -> Self { + Self::new(&[1., 0., 0., 1., x, y]) + } + + /// Creates a new affine transform representing a counter-clockwise + /// rotation for the specified angle in radians. + pub fn rotate(th: f32) -> Self { + let (s, c) = th.sin_cos(); + Self::new(&[c, s, -s, c, 0., 0.]) + } + + /// Creates a new skew transform + pub fn skew(x: f32, y: f32) -> Self { + Self::new(&[1., x.tan(), y.tan(), 1., 0., 0.]) + } + + pub fn around_center(&self, x: f32, y: f32) -> Self { + Self::translate(x, y) * *self * Self::translate(-x, -y) + } + + /// Transforms the specified point. + pub fn transform_point(&self, point: Point) -> Point { + Point { + x: point.x * self.xx + point.y * self.yx + self.dx, + y: point.y * self.yy + point.y * self.xy + self.dy, + } + } +} + +impl std::ops::Mul for Affine { + type Output = Self; + fn mul(self, other: Self) -> Self { + Self::new(&[ + self.xx * other.xx + self.xy * other.yx, + self.yx * other.xx + self.yy * other.yx, + self.xx * other.xy + self.xy * other.yy, + self.yx * other.xy + self.yy * other.yy, + self.xx * other.dx + self.xy * other.dy + self.dx, + self.yx * other.dx + self.yy * other.dy + self.dy, + ]) + } +} + +/// Axis-aligned rectangle represented as minimum and maximum points. +#[derive(Copy, Clone, Default, Debug)] +#[repr(C)] +pub struct Rect { + pub min: Point, + pub max: Point, +} + +impl Rect { + /// Creates a new rectangle that encloses the specified collection of + /// points. + pub fn from_points(points: I) -> Self + where + I: IntoIterator, + I::Item: Borrow, + { + let mut rect = Self { + min: Point::new(f32::MAX, f32::MAX), + max: Point::new(f32::MIN, f32::MIN), + }; + let mut count = 0; + for point in points { + rect.add(*point.borrow()); + count += 1; + } + if count != 0 { + rect + } else { + Self::default() + } + } + + /// Returns the width of the rectangle. + pub fn width(&self) -> f32 { + self.max.x - self.min.x + } + + /// Returns the height of the rectangle. + pub fn height(&self) -> f32 { + self.max.y - self.min.y + } + + /// Extends the rectangle to include the specified point. + pub fn add(&mut self, point: Point) { + self.min.x = self.min.x.min(point.x); + self.min.y = self.min.y.min(point.y); + self.max.x = self.max.x.max(point.x); + self.max.y = self.max.y.max(point.y); + } + + /// Returns a new rectangle that encloses the minimum and maximum points + /// of this rectangle after applying the specified transform to each. + pub fn transform(&self, affine: &Affine) -> Self { + Self::from_points([self.min.transform(affine), self.max.transform(affine)]) + } +} diff --git a/piet-scene/src/lib.rs b/piet-scene/src/lib.rs new file mode 100644 index 0000000..6b23f6d --- /dev/null +++ b/piet-scene/src/lib.rs @@ -0,0 +1,21 @@ +// Copyright 2022 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +pub mod brush; +pub mod geometry; +pub mod path; +pub mod resource; +pub mod scene; diff --git a/piet-scene/src/main.rs b/piet-scene/src/main.rs new file mode 100644 index 0000000..8fd361a --- /dev/null +++ b/piet-scene/src/main.rs @@ -0,0 +1,30 @@ +use piet_scene::geometry::*; +use piet_scene::path::*; +use piet_scene::scene::*; +use piet_scene::{geometry::*, path::*, resource::ResourceContext, scene::*}; + +fn main() { + let mut scene = Scene::default(); + let mut rcx = ResourceContext::new(); + let mut sb = build_scene(&mut scene, &mut rcx); + + sb.push_layer(Blend::default(), Rect::default().elements()); + + let mut path = Path::new(); + let mut b = PathBuilder::new(&mut path); + b.move_to(100., 100.); + b.line_to(200., 200.); + b.close_path(); + b.move_to(50., 50.); + b.line_to(600., 150.); + b.move_to(4., 2.); + b.quad_to(8., 8., 9., 9.); + b.close_path(); + println!("{:?}", path); + for el in path.elements() { + println!("{:?}", el); + } + //sb.push_layer(path.elements(), BlendMode::default()); + + sb.push_layer(Blend::default(), [Element::MoveTo((0., 0.).into())]); +} diff --git a/piet-scene/src/path.rs b/piet-scene/src/path.rs new file mode 100644 index 0000000..ffbce3e --- /dev/null +++ b/piet-scene/src/path.rs @@ -0,0 +1,311 @@ +// Copyright 2022 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +use super::geometry::{Point, Rect}; + +/// Action of a path element. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum Verb { + MoveTo, + LineTo, + QuadTo, + CurveTo, + Close, +} + +/// Element of a path represented by a verb and its associated points. +#[derive(Copy, Clone, PartialEq, Debug)] +pub enum Element { + MoveTo(Point), + LineTo(Point), + QuadTo(Point, Point), + CurveTo(Point, Point, Point), + Close, +} + +impl Element { + /// Returns the verb that describes the action of the path element. + pub fn verb(&self) -> Verb { + match self { + Self::MoveTo(..) => Verb::MoveTo, + Self::LineTo(..) => Verb::LineTo, + Self::QuadTo(..) => Verb::QuadTo, + Self::CurveTo(..) => Verb::CurveTo, + Self::Close => Verb::Close, + } + } +} + +/// Encoded collection of path elements. +#[derive(Clone, Default, Debug)] +pub struct Path { + tag_stream: Vec, + pathseg_stream: Vec, + n_path: u32, + n_pathseg: u32, +} + +impl Path { + pub fn new() -> Self { + Self::default() + } + + pub fn elements(&self) -> Elements { + Elements::new(&self) + } +} + +#[derive(Clone)] +pub struct Elements<'a> { + tag_stream: &'a [u8], + points: &'a [[f32; 2]], + tag_ix: usize, + point_ix: usize, + next_element: Option, + close: bool, +} + +impl<'a> Elements<'a> { + fn new(path: &'a Path) -> Self { + let points: &'a [[f32; 2]] = bytemuck::cast_slice(&path.pathseg_stream); + let (point_ix, next_element) = match points.get(0) { + Some(&point) => (1, Some(Element::MoveTo(point.into()))), + None => (0, None), + }; + Self { + tag_stream: &path.tag_stream, + points, + tag_ix: 0, + point_ix, + next_element, + close: false, + } + } +} + +impl<'a> Iterator for Elements<'a> { + type Item = Element; + + fn next(&mut self) -> Option { + // println!("n_points: {}", self.points.len()); + // println!("tag_ix: {}, point_ix: {}, el: {:?}, close: {}", self.tag_ix, self.point_ix, self.next_element, self.close); + if self.close { + self.close = false; + return Some(Element::Close); + } + if let Some(next_el) = self.next_element.take() { + return Some(next_el); + } + let tag = *self.tag_stream.get(self.tag_ix)?; + self.tag_ix += 1; + let end = tag & 4 != 0; + let el = match tag & 3 { + 1 => { + let p0 = *self.points.get(self.point_ix)?; + self.point_ix += 1; + Element::LineTo(p0.into()) + } + 2 => { + let p0 = *self.points.get(self.point_ix)?; + let p1 = *self.points.get(self.point_ix + 1)?; + self.point_ix += 2; + Element::QuadTo(p0.into(), p1.into()) + } + 3 => { + let p0 = *self.points.get(self.point_ix)?; + let p1 = *self.points.get(self.point_ix + 1)?; + let p2 = *self.points.get(self.point_ix + 2)?; + self.point_ix += 3; + Element::CurveTo(p0.into(), p1.into(), p2.into()) + } + _ => return None, + }; + if end { + // println!("END!"); + if let Some(&p0) = self.points.get(self.point_ix) { + self.point_ix += 1; + self.next_element = Some(Element::MoveTo(p0.into())); + } + self.close = tag & 0x80 != 0; + } + Some(el) + } +} + +pub struct PathBuilder<'a> { + tag_stream: &'a mut Vec, + // If we're never going to use the i16 encoding, it might be + // slightly faster to store this as Vec, we'd get aligned + // stores on ARM etc. + pathseg_stream: &'a mut Vec, + first_pt: [f32; 2], + state: State, + n_pathseg: u32, +} + +#[derive(PartialEq)] +enum State { + Start, + MoveTo, + NonemptySubpath, +} + +impl<'a> PathBuilder<'a> { + pub fn new(path: &'a mut Path) -> Self { + Self { + tag_stream: &mut path.tag_stream, + pathseg_stream: &mut path.pathseg_stream, + first_pt: [0.0, 0.0], + state: State::Start, + n_pathseg: 0, + } + } + + fn new_inner(tags: &'a mut Vec, pathsegs: &'a mut Vec) -> PathBuilder<'a> { + PathBuilder { + tag_stream: tags, + pathseg_stream: pathsegs, + first_pt: [0.0, 0.0], + state: State::Start, + n_pathseg: 0, + } + } + + pub fn move_to(&mut self, x: f32, y: f32) { + let buf = [x, y]; + let bytes = bytemuck::bytes_of(&buf); + self.first_pt = buf; + if self.state == State::MoveTo { + let new_len = self.pathseg_stream.len() - 8; + self.pathseg_stream.truncate(new_len); + } + if self.state == State::NonemptySubpath { + if let Some(tag) = self.tag_stream.last_mut() { + *tag |= 4; + } + } + self.pathseg_stream.extend_from_slice(bytes); + self.state = State::MoveTo; + } + + pub fn line_to(&mut self, x: f32, y: f32) { + if self.state == State::Start { + // should warn or error + return; + } + let buf = [x, y]; + let bytes = bytemuck::bytes_of(&buf); + self.pathseg_stream.extend_from_slice(bytes); + self.tag_stream.push(9); + self.state = State::NonemptySubpath; + self.n_pathseg += 1; + } + + pub fn quad_to(&mut self, x1: f32, y1: f32, x2: f32, y2: f32) { + if self.state == State::Start { + return; + } + let buf = [x1, y1, x2, y2]; + let bytes = bytemuck::bytes_of(&buf); + self.pathseg_stream.extend_from_slice(bytes); + self.tag_stream.push(10); + self.state = State::NonemptySubpath; + self.n_pathseg += 1; + } + + pub fn cubic_to(&mut self, x1: f32, y1: f32, x2: f32, y2: f32, x3: f32, y3: f32) { + if self.state == State::Start { + return; + } + let buf = [x1, y1, x2, y2, x3, y3]; + let bytes = bytemuck::bytes_of(&buf); + self.pathseg_stream.extend_from_slice(bytes); + self.tag_stream.push(11); + self.state = State::NonemptySubpath; + self.n_pathseg += 1; + } + + pub fn close_path(&mut self) { + match self.state { + State::Start => return, + State::MoveTo => { + let new_len = self.pathseg_stream.len() - 8; + self.pathseg_stream.truncate(new_len); + self.state = State::Start; + return; + } + State::NonemptySubpath => (), + } + let len = self.pathseg_stream.len(); + if len < 8 { + // can't happen + return; + } + let first_bytes = bytemuck::bytes_of(&self.first_pt); + if &self.pathseg_stream[len - 8..len] != first_bytes { + self.pathseg_stream.extend_from_slice(first_bytes); + self.tag_stream.push(0x80 | 13); + self.n_pathseg += 1; + } else { + if let Some(tag) = self.tag_stream.last_mut() { + *tag |= 0x80 | 4; + } + } + self.state = State::Start; + } + + fn finish(&mut self) { + if self.state == State::MoveTo { + let new_len = self.pathseg_stream.len() - 8; + self.pathseg_stream.truncate(new_len); + } + if let Some(tag) = self.tag_stream.last_mut() { + *tag |= 4; + } + } + + /// Finish encoding a path. + /// + /// Encode this after encoding path segments. + pub fn path(&mut self) { + self.finish(); + // maybe don't encode if path is empty? might throw off sync though + self.tag_stream.push(0x10); + } + + /// Get the number of path segments. + /// + /// This is the number of path segments that will be written by the + /// path stage; use this for allocating the output buffer. + /// + /// Also note: it takes `self` for lifetime reasons. + pub fn n_pathseg(self) -> u32 { + self.n_pathseg + } +} + +impl Rect { + pub fn elements(&self) -> impl Iterator + Clone { + let elements = [ + Element::MoveTo((self.min.x, self.min.y).into()), + Element::LineTo((self.max.x, self.min.y).into()), + Element::LineTo((self.max.x, self.max.y).into()), + Element::LineTo((self.min.x, self.max.y).into()), + Element::Close, + ]; + (0..5).map(move |i| elements[i]) + } +} diff --git a/piet-scene/src/resource/mod.rs b/piet-scene/src/resource/mod.rs new file mode 100644 index 0000000..36e6419 --- /dev/null +++ b/piet-scene/src/resource/mod.rs @@ -0,0 +1,37 @@ +mod ramp_cache; + +use crate::brush::{Brush, Stop}; +use ramp_cache::RampCache; + +/// Context for caching resources across rendering operations. +#[derive(Default)] +pub struct ResourceContext { + ramp_cache: RampCache, +} + +impl ResourceContext { + pub fn new() -> Self { + Self::default() + } + + pub fn advance(&mut self) { + self.ramp_cache.advance(); + } + + pub fn add_ramp(&mut self, stops: &[Stop]) -> u32 { + self.ramp_cache.add(stops) + } + + pub fn create_brush(&mut self, brush: &Brush) -> PersistentBrush { + PersistentBrush { kind: 0, id: 0 } + } + + pub fn destroy_brush(&mut self, brush: PersistentBrush) {} +} + +/// Handle for a brush that is managed by the resource context. +#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug)] +pub struct PersistentBrush { + kind: u8, + id: u64, +} diff --git a/piet-scene/src/resource/ramp_cache.rs b/piet-scene/src/resource/ramp_cache.rs new file mode 100644 index 0000000..0c509e4 --- /dev/null +++ b/piet-scene/src/resource/ramp_cache.rs @@ -0,0 +1,138 @@ +use crate::brush::{Color, Stop, StopVec}; +use std::collections::HashMap; + +const N_SAMPLES: usize = 512; +const RETAINED_COUNT: usize = 64; + +#[derive(Default)] +pub struct RampCache { + epoch: u64, + map: HashMap, + data: Vec, +} + +impl RampCache { + pub fn new() -> Self { + Self::default() + } + + pub fn advance(&mut self) { + self.epoch += 1; + if self.map.len() > RETAINED_COUNT { + self.map + .retain(|_key, value| value.0 < RETAINED_COUNT as u32); + self.data.truncate(RETAINED_COUNT * N_SAMPLES); + } + } + + pub fn clear(&mut self) { + self.epoch = 0; + self.map.clear(); + self.data.clear(); + } + + pub fn add(&mut self, stops: &[Stop]) -> u32 { + if let Some(entry) = self.map.get_mut(stops) { + entry.1 = self.epoch; + entry.0 + } else if self.map.len() < RETAINED_COUNT { + let id = (self.data.len() / N_SAMPLES) as u32; + self.data.extend(make_ramp(stops)); + self.map.insert(stops.into(), (id, self.epoch)); + id + } else { + let mut reuse = None; + for (stops, (id, epoch)) in &self.map { + if *epoch + 2 < self.epoch { + reuse = Some((stops.to_owned(), *id)); + break; + } + } + if let Some((old_stops, id)) = reuse { + self.map.remove(&old_stops); + let start = id as usize * N_SAMPLES; + for (dst, src) in self.data[start..start + N_SAMPLES] + .iter_mut() + .zip(make_ramp(stops)) + { + *dst = src; + } + self.map.insert(stops.into(), (id, self.epoch)); + id + } else { + let id = (self.data.len() / N_SAMPLES) as u32; + self.data.extend(make_ramp(stops)); + self.map.insert(stops.into(), (id, self.epoch)); + id + } + } + } + + pub fn data(&self) -> &[u32] { + &self.data + } +} + +fn make_ramp<'a>(stops: &'a [Stop]) -> impl Iterator + 'a { + let mut last_u = 0.0; + let mut last_c = ColorF64::from_color(stops[0].color); + let mut this_u = last_u; + let mut this_c = last_c; + let mut j = 0; + (0..N_SAMPLES).map(move |i| { + let u = (i as f64) / (N_SAMPLES - 1) as f64; + while u > this_u { + last_u = this_u; + last_c = this_c; + if let Some(s) = stops.get(j + 1) { + this_u = s.offset as f64; + this_c = ColorF64::from_color(s.color); + j += 1; + } else { + break; + } + } + let du = this_u - last_u; + let c = if du < 1e-9 { + this_c + } else { + last_c.lerp(&this_c, (u - last_u) / du) + }; + c.to_premul_u32() + }) +} + +#[derive(Copy, Clone)] +struct ColorF64([f64; 4]); + +impl ColorF64 { + fn from_color(color: Color) -> Self { + Self([ + color.r as f64 / 255.0, + color.g as f64 / 255.0, + color.b as f64 / 255.0, + color.a as f64 / 255.0, + ]) + } + + fn lerp(&self, other: &Self, a: f64) -> Self { + fn l(x: f64, y: f64, a: f64) -> f64 { + x + (y - x) * a + } + Self([ + l(self.0[0], other.0[0], a), + l(self.0[1], other.0[1], a), + l(self.0[2], other.0[2], a), + l(self.0[3], other.0[3], a), + ]) + } + + fn to_premul_u32(&self) -> u32 { + let a = self.0[3].min(1.0).max(0.0); + let r = ((self.0[0] * a).min(1.0).max(0.0) / 255.0) as u32; + let g = ((self.0[1] * a).min(1.0).max(0.0) / 255.0) as u32; + let b = ((self.0[2] * a).min(1.0).max(0.0) / 255.0) as u32; + let a = (a / 255.0) as u32; + r | (g << 8) | (b << 16) | (a << 24) + } +} diff --git a/piet-scene/src/scene/blend.rs b/piet-scene/src/scene/blend.rs new file mode 100644 index 0000000..7edc6cd --- /dev/null +++ b/piet-scene/src/scene/blend.rs @@ -0,0 +1,102 @@ +// Copyright 2022 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +/// Defines the color mixing function for a blend operation. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[repr(C)] +pub enum Mix { + Normal = 0, + Multiply = 1, + Screen = 2, + Overlay = 3, + Darken = 4, + Lighten = 5, + ColorDodge = 6, + ColorBurn = 7, + HardLight = 8, + SoftLight = 9, + Difference = 10, + Exclusion = 11, + Hue = 12, + Saturation = 13, + Color = 14, + Luminosity = 15, +} + +/// Defines the layer composition function for a blend operation. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +#[repr(C)] +pub enum Compose { + Clear = 0, + Copy = 1, + Dest = 2, + SrcOver = 3, + DestOver = 4, + SrcIn = 5, + DestIn = 6, + SrcOut = 7, + DestOut = 8, + SrcAtop = 9, + DestAtop = 10, + Xor = 11, + Plus = 12, + PlusDarker = 13, + PlusLighter = 14, +} + +/// Blend mode consisting of mixing and composition functions. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct Blend { + pub mix: Mix, + pub compose: Compose, +} + +impl Blend { + pub fn new(mix: Mix, compose: Compose) -> Self { + Self { mix, compose } + } + + pub fn pack(&self) -> u32 { + (self.mix as u32) << 8 | self.compose as u32 + } +} + +impl Default for Blend { + fn default() -> Self { + Self { + mix: Mix::Normal, + compose: Compose::SrcOver, + } + } +} + +impl From for Blend { + fn from(mix: Mix) -> Self { + Self { + mix, + compose: Compose::SrcOver, + } + } +} + +impl From for Blend { + fn from(compose: Compose) -> Self { + Self { + mix: Mix::Normal, + compose, + } + } +} diff --git a/piet-scene/src/scene/builder.rs b/piet-scene/src/scene/builder.rs new file mode 100644 index 0000000..59e0a15 --- /dev/null +++ b/piet-scene/src/scene/builder.rs @@ -0,0 +1,433 @@ +// Copyright 2022 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +use super::style::{Fill, Stroke}; +use super::{Affine, Blend, Element, Fragment, FragmentResources, ResourcePatch, Scene, SceneData}; +use crate::brush::*; +use crate::resource::ResourceContext; +use bytemuck::{Pod, Zeroable}; +use core::borrow::Borrow; + +/// Creates a new builder for constructing a scene. +pub fn build_scene<'a>(scene: &'a mut Scene, resources: &'a mut ResourceContext) -> Builder<'a> { + Builder::new(&mut scene.data, ResourceData::Scene(resources)) +} + +/// Creates a new builder for construction a scene fragment. +pub fn build_fragment<'a>(fragment: &'a mut Fragment) -> Builder<'a> { + Builder::new( + &mut fragment.data, + ResourceData::Fragment(&mut fragment.resources), + ) +} + +/// Builder for constructing a scene or scene fragment. +pub struct Builder<'a> { + scene: &'a mut SceneData, + resources: ResourceData<'a>, +} + +impl<'a> Builder<'a> { + /// Creates a new builder for constructing a scene. + fn new(scene: &'a mut SceneData, resources: ResourceData<'a>) -> Self { + Self { scene, resources } + } + + /// Pushes a transform matrix onto the stack. + pub fn push_transform(&mut self, transform: &Affine) {} + + /// Pops the current transform matrix. + pub fn pop_transform(&mut self) {} + + /// Pushes a new layer bound by the specifed shape and composed with + /// previous layers using the specified blend mode. + pub fn push_layer<'s, E>(&mut self, blend: Blend, elements: E) + where + E: IntoIterator, + E::IntoIter: Clone, + E::Item: Borrow, + { + let elements = elements.into_iter(); + self.encode_path(elements, true); + } + + /// Pops the current layer. + pub fn pop_layer(&mut self) {} + + /// Fills a shape using the specified style and brush. + pub fn fill<'s, E>( + &mut self, + style: Fill, + brush: &Brush, + brush_transform: Option, + elements: E, + ) where + E: IntoIterator, + E::IntoIter: Clone, + E::Item: Borrow, + { + let elements = elements.into_iter(); + self.encode_path(elements, true); + } + + /// Strokes a shape using the specified style and brush. + pub fn stroke<'s, D, E>( + &mut self, + style: &Stroke, + brush: &Brush, + brush_transform: Option, + elements: E, + ) where + D: Borrow<[f32]>, + E: IntoIterator, + E::IntoIter: Clone, + E::Item: Borrow, + { + let elements = elements.into_iter(); + self.encode_path(elements, false); + } + + /// Appends a fragment to the scene. + pub fn append(&mut self, fragment: &Fragment) { + let drawdata_base = self.scene.drawdata_stream.len(); + self.scene.append(&fragment.data); + match &mut self.resources { + ResourceData::Scene(res) => { + for patch in &fragment.resources.patches { + match patch { + ResourcePatch::Ramp { + drawdata_offset, + stops, + } => { + let stops = &fragment.resources.stops[stops.clone()]; + let ramp_id = res.add_ramp(stops); + let patch_base = *drawdata_offset + drawdata_base; + (&mut self.scene.drawdata_stream[patch_base..patch_base + 4]) + .copy_from_slice(bytemuck::bytes_of(&ramp_id)); + } + } + } + } + ResourceData::Fragment(res) => { + let stops_base = res.stops.len(); + res.stops.extend_from_slice(&fragment.resources.stops); + res.patches.extend(fragment.resources.patches.iter().map( + |pending| match pending { + ResourcePatch::Ramp { + drawdata_offset, + stops, + } => ResourcePatch::Ramp { + drawdata_offset: drawdata_offset + drawdata_base, + stops: stops.start + stops_base..stops.end + stops_base, + }, + }, + )); + } + } + } + + /// Completes construction and finalizes the underlying scene. + pub fn finish(self) {} + + fn encode_path(&mut self, elements: E, is_fill: bool) + where + E: Iterator, + E::Item: Borrow, + { + if is_fill { + self.encode_path_inner( + elements + .map(|el| *el.borrow()) + .flat_map(|el| { + match el { + Element::MoveTo(..) => Some(Element::Close), + _ => None, + } + .into_iter() + .chain(Some(el)) + }) + .chain(Some(Element::Close)), + ) + } else { + self.encode_path_inner(elements.map(|el| *el.borrow())) + } + } + + fn encode_path_inner(&mut self, elements: impl Iterator) { + let mut b = PathBuilder::new(&mut self.scene.tag_stream, &mut self.scene.pathseg_stream); + for el in elements { + match el { + Element::MoveTo(p0) => b.move_to(p0.x, p0.y), + Element::LineTo(p0) => b.line_to(p0.x, p0.y), + Element::QuadTo(p0, p1) => b.quad_to(p0.x, p0.y, p1.x, p1.y), + Element::CurveTo(p0, p1, p2) => b.cubic_to(p0.x, p0.y, p1.x, p1.y, p2.x, p2.y), + Element::Close => b.close_path(), + } + } + b.path(); + let n_pathseg = b.n_pathseg(); + self.scene.n_path += 1; + self.scene.n_pathseg += n_pathseg; + } + + fn encode_brush(&mut self, brush: &Brush) { + match brush { + Brush::Solid(color) => { + self.scene.drawtag_stream.push(DRAWTAG_FILLCOLOR); + let rgba_color = color.to_premul_u32(); + self.scene + .drawdata_stream + .extend(bytemuck::bytes_of(&FillColor { rgba_color })); + } + Brush::LinearGradient(gradient) => { + let index = self.add_ramp(&gradient.stops); + self.scene.drawtag_stream.push(DRAWTAG_FILLLINGRADIENT); + self.scene + .drawdata_stream + .extend(bytemuck::bytes_of(&FillLinGradient { + index, + p0: [gradient.start.x, gradient.start.y], + p1: [gradient.end.x, gradient.end.y], + })); + } + Brush::RadialGradient(gradient) => { + let index = self.add_ramp(&gradient.stops); + self.scene.drawtag_stream.push(DRAWTAG_FILLRADGRADIENT); + self.scene + .drawdata_stream + .extend(bytemuck::bytes_of(&FillRadGradient { + index, + p0: [gradient.center0.x, gradient.center0.y], + p1: [gradient.center1.x, gradient.center1.y], + r0: gradient.radius0, + r1: gradient.radius1, + })); + } + Brush::SweepGradient(_gradient) => todo!("sweep gradients aren't done yet!"), + Brush::Image(_image) => todo!("images aren't done yet!"), + Brush::Persistent(_) => todo!("persistent brushes aren't done yet!"), + } + } + + fn add_ramp(&mut self, stops: &[Stop]) -> u32 { + match &mut self.resources { + ResourceData::Scene(res) => res.add_ramp(stops), + ResourceData::Fragment(res) => { + let stops_start = res.stops.len(); + res.stops.extend_from_slice(stops); + let id = res.patches.len() as u32; + res.patches.push(ResourcePatch::Ramp { + drawdata_offset: self.scene.drawdata_stream.len(), + stops: stops_start..stops_start + stops.len(), + }); + id + } + } + } +} + +enum ResourceData<'a> { + Fragment(&'a mut FragmentResources), + Scene(&'a mut ResourceContext), +} + +// Tags for draw objects. See shader/drawtag.h for the authoritative source. +const DRAWTAG_FILLCOLOR: u32 = 0x44; +const DRAWTAG_FILLLINGRADIENT: u32 = 0x114; +const DRAWTAG_FILLRADGRADIENT: u32 = 0x2dc; +const DRAWTAG_BEGINCLIP: u32 = 0x05; +const DRAWTAG_ENDCLIP: u32 = 0x25; + +#[repr(C)] +#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] +pub struct FillColor { + rgba_color: u32, +} + +#[repr(C)] +#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] +pub struct FillLinGradient { + index: u32, + p0: [f32; 2], + p1: [f32; 2], +} + +#[repr(C)] +#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] +pub struct FillRadGradient { + index: u32, + p0: [f32; 2], + p1: [f32; 2], + r0: f32, + r1: f32, +} + +#[allow(unused)] +#[repr(C)] +#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] +pub struct FillImage { + index: u32, + // [i16; 2] + offset: u32, +} + +#[repr(C)] +#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)] +pub struct Clip { + blend: u32, +} + +struct PathBuilder<'a> { + tag_stream: &'a mut Vec, + // If we're never going to use the i16 encoding, it might be + // slightly faster to store this as Vec, we'd get aligned + // stores on ARM etc. + pathseg_stream: &'a mut Vec, + first_pt: [f32; 2], + state: PathState, + n_pathseg: u32, +} + +#[derive(PartialEq)] +enum PathState { + Start, + MoveTo, + NonemptySubpath, +} + +impl<'a> PathBuilder<'a> { + pub fn new(tags: &'a mut Vec, pathsegs: &'a mut Vec) -> PathBuilder<'a> { + PathBuilder { + tag_stream: tags, + pathseg_stream: pathsegs, + first_pt: [0.0, 0.0], + state: PathState::Start, + n_pathseg: 0, + } + } + + pub fn move_to(&mut self, x: f32, y: f32) { + let buf = [x, y]; + let bytes = bytemuck::bytes_of(&buf); + self.first_pt = buf; + if self.state == PathState::MoveTo { + let new_len = self.pathseg_stream.len() - 8; + self.pathseg_stream.truncate(new_len); + } + if self.state == PathState::NonemptySubpath { + if let Some(tag) = self.tag_stream.last_mut() { + *tag |= 4; + } + } + self.pathseg_stream.extend_from_slice(bytes); + self.state = PathState::MoveTo; + } + + pub fn line_to(&mut self, x: f32, y: f32) { + if self.state == PathState::Start { + // should warn or error + return; + } + let buf = [x, y]; + let bytes = bytemuck::bytes_of(&buf); + self.pathseg_stream.extend_from_slice(bytes); + self.tag_stream.push(9); + self.state = PathState::NonemptySubpath; + self.n_pathseg += 1; + } + + pub fn quad_to(&mut self, x1: f32, y1: f32, x2: f32, y2: f32) { + if self.state == PathState::Start { + return; + } + let buf = [x1, y1, x2, y2]; + let bytes = bytemuck::bytes_of(&buf); + self.pathseg_stream.extend_from_slice(bytes); + self.tag_stream.push(10); + self.state = PathState::NonemptySubpath; + self.n_pathseg += 1; + } + + pub fn cubic_to(&mut self, x1: f32, y1: f32, x2: f32, y2: f32, x3: f32, y3: f32) { + if self.state == PathState::Start { + return; + } + let buf = [x1, y1, x2, y2, x3, y3]; + let bytes = bytemuck::bytes_of(&buf); + self.pathseg_stream.extend_from_slice(bytes); + self.tag_stream.push(11); + self.state = PathState::NonemptySubpath; + self.n_pathseg += 1; + } + + pub fn close_path(&mut self) { + match self.state { + PathState::Start => return, + PathState::MoveTo => { + let new_len = self.pathseg_stream.len() - 8; + self.pathseg_stream.truncate(new_len); + self.state = PathState::Start; + return; + } + PathState::NonemptySubpath => (), + } + let len = self.pathseg_stream.len(); + if len < 8 { + // can't happen + return; + } + let first_bytes = bytemuck::bytes_of(&self.first_pt); + if &self.pathseg_stream[len - 8..len] != first_bytes { + self.pathseg_stream.extend_from_slice(first_bytes); + self.tag_stream.push(13); + self.n_pathseg += 1; + } else { + if let Some(tag) = self.tag_stream.last_mut() { + *tag |= 4; + } + } + self.state = PathState::Start; + } + + fn finish(&mut self) { + if self.state == PathState::MoveTo { + let new_len = self.pathseg_stream.len() - 8; + self.pathseg_stream.truncate(new_len); + } + if let Some(tag) = self.tag_stream.last_mut() { + *tag |= 4; + } + } + + /// Finish encoding a path. + /// + /// Encode this after encoding path segments. + pub fn path(&mut self) { + self.finish(); + // maybe don't encode if path is empty? might throw off sync though + self.tag_stream.push(0x10); + } + + /// Get the number of path segments. + /// + /// This is the number of path segments that will be written by the + /// path stage; use this for allocating the output buffer. + /// + /// Also note: it takes `self` for lifetime reasons. + pub fn n_pathseg(self) -> u32 { + self.n_pathseg + } +} diff --git a/piet-scene/src/scene/mod.rs b/piet-scene/src/scene/mod.rs new file mode 100644 index 0000000..df9db90 --- /dev/null +++ b/piet-scene/src/scene/mod.rs @@ -0,0 +1,97 @@ +// Copyright 2022 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +mod blend; +mod builder; +mod style; + +pub use blend::{Blend, Compose, Mix}; +pub use builder::{build_fragment, build_scene, Builder}; +pub use style::*; + +use super::brush::*; +use super::geometry::{Affine, Rect}; +use super::path::Element; +use core::ops::Range; + +#[derive(Default)] +struct SceneData { + transform_stream: Vec, + tag_stream: Vec, + pathseg_stream: Vec, + linewidth_stream: Vec, + drawtag_stream: Vec, + drawdata_stream: Vec, + n_path: u32, + n_pathseg: u32, + n_clip: u32, +} + +impl SceneData { + fn clear(&mut self) { + self.transform_stream.clear(); + self.tag_stream.clear(); + self.pathseg_stream.clear(); + self.linewidth_stream.clear(); + self.drawtag_stream.clear(); + self.drawdata_stream.clear(); + self.n_path = 0; + self.n_pathseg = 0; + self.n_clip = 0; + } + + fn append(&mut self, other: &SceneData) { + self.transform_stream + .extend_from_slice(&other.transform_stream); + self.tag_stream.extend_from_slice(&other.tag_stream); + self.pathseg_stream.extend_from_slice(&other.pathseg_stream); + self.linewidth_stream + .extend_from_slice(&other.linewidth_stream); + self.drawtag_stream.extend_from_slice(&other.drawtag_stream); + self.drawdata_stream + .extend_from_slice(&other.drawdata_stream); + self.n_path += other.n_path; + self.n_pathseg += other.n_pathseg; + self.n_clip += other.n_clip; + } +} + +/// Encoded definition of a scene that is ready for rendering when paired with +/// an associated resource context. +#[derive(Default)] +pub struct Scene { + data: SceneData, +} + +/// Encoded definition of a scene fragment and associated resources. +#[derive(Default)] +pub struct Fragment { + data: SceneData, + resources: FragmentResources, +} + +#[derive(Default)] +struct FragmentResources { + patches: Vec, + stops: Vec, +} + +enum ResourcePatch { + Ramp { + drawdata_offset: usize, + stops: Range, + }, +} diff --git a/piet-scene/src/scene/style.rs b/piet-scene/src/scene/style.rs new file mode 100644 index 0000000..0aded61 --- /dev/null +++ b/piet-scene/src/scene/style.rs @@ -0,0 +1,71 @@ +// Copyright 2022 The piet-gpu authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Also licensed under MIT license, at your choice. + +use core::borrow::Borrow; + +/// Describes the winding rule that determines the interior portion of a path. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum Fill { + NonZero, + EvenOdd, +} + +/// Defines the connection between two segments of a stroke. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum Join { + /// A straight line connecting the segments. + Bevel, + /// The segments are extended to their natural intersection point. + Miter, + /// An arc between the segments. + Round, +} + +/// Defines the shape to be drawn at the ends of a stroke. +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum Cap { + /// Flat cap. + Butt, + /// Square cap with dimensions equal to half the stroke width. + Square, + /// Rounded cap with radius equal to half the stroke width. + Round, +} + +/// Describes the visual style of a stroke. +#[derive(Copy, Clone, Debug)] +pub struct Stroke +where + D: Borrow<[f32]>, +{ + /// Width of the stroke. + pub width: f32, + /// Style for connecting segments of the stroke. + pub join: Join, + /// Limit for miter joins. + pub miter_limit: f32, + /// Style for capping the beginning of an open subpath. + pub start_cap: Cap, + /// Style for capping the end of an open subpath. + pub end_cap: Cap, + /// Lengths of dashes in alternating on/off order. + pub dash_pattern: D, + /// Offset of the first dash. + pub dash_offset: f32, + /// True if the stroke width should be affected by the scale of a + /// transform. + pub scale: bool, +}