diff --git a/Cargo.lock b/Cargo.lock index 1bec058..3ba133e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -69,6 +69,12 @@ dependencies = [ "wasi", ] +[[package]] +name = "half" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f36b5f248235f45773d4944f555f83ea61fe07b18b561ccf99d7483d7381e54d" + [[package]] name = "inflate" version = "0.4.5" @@ -124,6 +130,7 @@ dependencies = [ name = "piet-gpu-types" version = "0.0.0" dependencies = [ + "half", "piet-gpu-derive", ] diff --git a/piet-gpu-derive/src/derive.rs b/piet-gpu-derive/src/derive.rs index bc84bfb..3b4c478 100644 --- a/piet-gpu-derive/src/derive.rs +++ b/piet-gpu-derive/src/derive.rs @@ -14,6 +14,16 @@ pub fn gen_derive(module: &LayoutModule) -> proc_macro2::TokenStream { } quote! { mod #module_name { + pub trait HalfToLeBytes { + fn to_le_bytes(&self) -> [u8; 2]; + } + + impl HalfToLeBytes for half::f16 { + fn to_le_bytes(&self) -> [u8; 2] { + self.to_bits().to_le_bytes() + } + } + #ts } } @@ -121,6 +131,7 @@ fn gen_derive_ty(ty: &GpuType) -> proc_macro2::TokenStream { fn gen_derive_scalar_ty(ty: &GpuScalar) -> proc_macro2::TokenStream { match ty { + GpuScalar::F16 => quote!(half::f16), GpuScalar::F32 => quote!(f32), GpuScalar::I8 => quote!(i8), GpuScalar::I16 => quote!(i16), diff --git a/piet-gpu-derive/src/glsl.rs b/piet-gpu-derive/src/glsl.rs index 5164179..77d5ac8 100644 --- a/piet-gpu-derive/src/glsl.rs +++ b/piet-gpu-derive/src/glsl.rs @@ -14,6 +14,7 @@ pub fn gen_glsl(module: &LayoutModule) -> String { for name in &module.def_names { gen_refdef(&mut r, &name); } + for name in &module.def_names { match module.defs.get(name).unwrap() { (size, LayoutTypeDef::Struct(fields)) => { @@ -26,6 +27,7 @@ pub fn gen_glsl(module: &LayoutModule) -> String { } } } + for name in &module.def_names { let def = module.defs.get(name).unwrap(); match def { @@ -43,6 +45,7 @@ pub fn gen_glsl(module: &LayoutModule) -> String { } } } + r } @@ -98,9 +101,21 @@ fn gen_struct_read( } } writeln!(r, " {} s;", name).unwrap(); + + let mut preload: bool = false; for (name, offset, ty) in fields { - writeln!(r, " s.{} = {};", name, gen_extract(*offset, &ty.ty)).unwrap(); + let (setup, extract) = gen_extract(*offset, &ty.ty, preload); + writeln!(r, "{} s.{} = {};", setup, name, extract).unwrap(); + + if let GpuType::Scalar(GpuScalar::F16) = &ty.ty { + if offset % 4 == 0 { + preload = true; + continue; + } + } + preload = false; } + writeln!(r, " return s;").unwrap(); writeln!(r, "}}\n").unwrap(); } @@ -136,34 +151,67 @@ fn gen_enum_read( } } -fn gen_extract(offset: usize, ty: &GpuType) -> String { +fn gen_extract(offset: usize, ty: &GpuType, preload: bool) -> (String, String) { match ty { - GpuType::Scalar(scalar) => gen_extract_scalar(offset, scalar), + GpuType::Scalar(scalar) => { + let setup = match scalar { + GpuScalar::F16 => { + if preload { + String::new() + } else { + let ix = offset / 4; + format!(" vec2 halves{} = unpackHalf2x16(raw{});\n", ix, ix) + } + } + _ => String::new(), + }; + + (setup, gen_extract_scalar(offset, scalar)) + } GpuType::Vector(scalar, size) => { - let mut r = glsl_type(ty); - r.push_str("("); + let is_f16 = match scalar { + GpuScalar::F16 => true, + _ => false, + }; + + let mut setup = String::new(); + let mut extract = glsl_type(ty); + &extract.push_str("("); for i in 0..*size { if i != 0 { - r.push_str(", "); + &extract.push_str(", "); } + + if is_f16 && i % 2 == 0 { + let ix = (offset + i * scalar.size()) / 4; + let s = format!(" vec2 halves{} = unpackHalf2x16(raw{});\n", ix, ix); + setup.push_str(&s); + }; + let el_offset = offset + i * scalar.size(); - r.push_str(&gen_extract_scalar(el_offset, scalar)); + &extract.push_str(&gen_extract_scalar(el_offset, scalar)); } - r.push_str(")"); - r + &extract.push_str(")"); + (setup, extract) } - GpuType::InlineStruct(name) => format!( - "{}_read({}Ref({}))", - name, - name, - simplified_add("ref.offset", offset) + GpuType::InlineStruct(name) => ( + String::new(), + format!( + "{}_read({}Ref({}))", + name, + name, + simplified_add("ref.offset", offset) + ), ), GpuType::Ref(inner) => { if let GpuType::InlineStruct(name) = inner.deref() { - format!( - "{}Ref({})", - name, - gen_extract_scalar(offset, &GpuScalar::U32) + ( + String::new(), + format!( + "{}Ref({})", + name, + gen_extract_scalar(offset, &GpuScalar::U32) + ), ) } else { panic!("only know how to deal with Ref of struct") @@ -174,7 +222,7 @@ fn gen_extract(offset: usize, ty: &GpuType) -> String { fn gen_extract_scalar(offset: usize, ty: &GpuScalar) -> String { match ty { - GpuScalar::F32 => format!("uintBitsToFloat(raw{})", offset / 4), + GpuScalar::F16 | GpuScalar::F32 => extract_fbits(offset, ty.size()), GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => extract_ubits(offset, ty.size()), GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => extract_ibits(offset, ty.size()), } @@ -210,8 +258,41 @@ fn extract_ibits(offset: usize, nbytes: usize) -> String { } } +fn extract_fbits(offset: usize, nbytes: usize) -> String { + match nbytes { + 4 => format!("uintBitsToFloat(raw{})", offset / 4), + 2 => match offset % 4 { + 0 => { + let ix = offset / 4; + format!("halves{}.x", ix) + } + 2 => format!("halves{}.y", offset / 4), + _ => panic!("unexpected packing of f16 at offset {}", offset % 4), + }, + _ => { + panic!("unexpected extraction of float with nbytes = {}", nbytes); + } + } +} + // Writing +fn is_f16(ty: &GpuType) -> bool { + match ty { + GpuType::Scalar(GpuScalar::F16) => true, + GpuType::Vector(GpuScalar::F16, _) => true, + _ => false, + } +} + +fn is_f16_pair(field_ixs: &[usize], fields: &[(String, usize, LayoutType)]) -> bool { + if field_ixs.len() == 2 { + fields.iter().all(|(_, _, t)| is_f16(&t.ty)) + } else { + false + } +} + fn gen_struct_write( r: &mut String, bufname: &str, @@ -220,39 +301,78 @@ fn gen_struct_write( ) { writeln!(r, "void {}_write({}Ref ref, {} s) {{", name, name, name).unwrap(); let coverage = crate::layout::struct_coverage(fields, true); + for (i, field_ixs) in coverage.iter().enumerate() { let mut pieces = Vec::new(); - for field_ix in field_ixs { - let (name, offset, ty) = &fields[*field_ix]; - match &ty.ty { - GpuType::Scalar(scalar) => { - let inner = format!("s.{}", name); - pieces.push(gen_pack_bits_scalar(scalar, *offset, &inner)); - } - GpuType::Vector(scalar, len) => { - let size = scalar.size(); - let ix_lo = (i * 4 - offset) / size; - let ix_hi = ((4 + i * 4 - offset) / size).min(*len); - for ix in ix_lo..ix_hi { - let scalar_offset = offset + ix * size; - let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]); - pieces.push(gen_pack_bits_scalar(scalar, scalar_offset, &inner)); + + if is_f16_pair(field_ixs, fields) { + let (ix0, ix1) = (field_ixs[0], field_ixs[1]); + let inner0 = format!("s.{}", fields[ix0].0); + let inner1 = format!("s.{}", fields[ix1].0); + pieces.push(format!("packHalf2x16(vec2({}, {}))", &inner0, &inner1)); + } else { + for field_ix in field_ixs { + let (name, offset, ty) = &fields[*field_ix]; + match &ty.ty { + GpuType::Scalar(scalar) => { + let inner = format!("s.{}", name); + pieces.push(gen_pack_bits_scalar(scalar, *offset, &inner)); } + GpuType::Vector(scalar, len) => { + let size = scalar.size(); + let ix_lo = (i * 4 - offset) / size; + let ix_hi = ((4 + i * 4 - offset) / size).min(*len); + match scalar { + GpuScalar::F16 => { + if ix_hi - ix_lo == 2 { + let inner0 = + format!("s.{}.{}", name, &"xyzw"[ix_lo..ix_lo + 1]); + let inner1 = + format!("s.{}.{}", name, &"xyzw"[ix_lo + 1..ix_hi]); + pieces.push(format!( + "packHalf2x16(vec2({}, {}))", + &inner0, &inner1 + )); + } else { + let ix = ix_lo; + let scalar_offset = offset + ix * size; + let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]); + pieces.push(gen_pack_bits_scalar( + scalar, + scalar_offset, + &inner, + )); + } + } + _ => { + for ix in ix_lo..ix_hi { + let scalar_offset = offset + ix * size; + let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]); + pieces.push(gen_pack_bits_scalar( + scalar, + scalar_offset, + &inner, + )); + } + } + } + } + GpuType::InlineStruct(structname) => { + writeln!( + r, + " {}_write({}Ref({}), s.{});", + structname, + structname, + simplified_add("ref.offset", *offset), + name + ) + .unwrap(); + } + GpuType::Ref(_) => pieces.push(format!("s.{}.offset", name)), } - GpuType::InlineStruct(structname) => { - writeln!( - r, - " {}_write({}Ref({}), s.{});", - structname, - structname, - simplified_add("ref.offset", *offset), - name - ) - .unwrap(); - } - GpuType::Ref(_) => pieces.push(format!("s.{}.offset", name)), } } + if !pieces.is_empty() { write!(r, " {}[{}] = ", bufname, i).unwrap(); for (j, piece) in pieces.iter().enumerate() { @@ -270,6 +390,7 @@ fn gen_struct_write( fn gen_pack_bits_scalar(ty: &GpuScalar, offset: usize, inner: &str) -> String { let shift = (offset % 4) * 8; let bits = match ty { + GpuScalar::F16 => format!("packHalf2x16(vec2({}, 0.0)) & 0xffff", inner), GpuScalar::F32 => format!("floatBitsToUint({})", inner), // Note: this doesn't mask small unsigned int types; the caller is // responsible for making sure they don't overflow. @@ -366,7 +487,7 @@ fn glsl_type(ty: &GpuType) -> String { // GLSL type that can contain the scalar value. fn glsl_scalar(s: &GpuScalar) -> &'static str { match s { - GpuScalar::F32 => "float", + GpuScalar::F16 | GpuScalar::F32 => "float", GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "int", GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => "uint", } @@ -374,7 +495,7 @@ fn glsl_scalar(s: &GpuScalar) -> &'static str { fn glsl_vecname(s: &GpuScalar) -> &'static str { match s { - GpuScalar::F32 => "vec", + GpuScalar::F16 | GpuScalar::F32 => "vec", GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "ivec", GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => "uvec", } diff --git a/piet-gpu-derive/src/parse.rs b/piet-gpu-derive/src/parse.rs index 8e51bab..9461338 100644 --- a/piet-gpu-derive/src/parse.rs +++ b/piet-gpu-derive/src/parse.rs @@ -12,14 +12,14 @@ use syn::{ /// A scalar that can be represented in a packed data structure. #[derive(Clone, Copy, PartialEq)] pub enum GpuScalar { + F16, + F32, I8, I16, I32, - F32, U8, U16, U32, - // TODO: Add F16 } /// An algebraic datatype. @@ -52,6 +52,7 @@ impl GpuScalar { fn from_syn(ty: &syn::Type) -> Option { ty_as_single_ident(ty).and_then(|ident| match ident.as_str() { "f32" => Some(GpuScalar::F32), + "f16" => Some(GpuScalar::F16), "i8" => Some(GpuScalar::I8), "i16" => Some(GpuScalar::I16), "i32" => Some(GpuScalar::I32), @@ -70,7 +71,7 @@ impl GpuScalar { match self { GpuScalar::F32 | GpuScalar::I32 | GpuScalar::U32 => 4, GpuScalar::I8 | GpuScalar::U8 => 1, - GpuScalar::I16 | GpuScalar::U16 => 2, + GpuScalar::F16 | GpuScalar::I16 | GpuScalar::U16 => 2, } } } diff --git a/piet-gpu-types/Cargo.toml b/piet-gpu-types/Cargo.toml index 6de92a5..629cd62 100644 --- a/piet-gpu-types/Cargo.toml +++ b/piet-gpu-types/Cargo.toml @@ -9,3 +9,4 @@ keywords = ["graphics", "2d"] [dependencies] piet-gpu-derive = { path = "../piet-gpu-derive" } +half = "1.5.0" diff --git a/piet-gpu-types/src/lib.rs b/piet-gpu-types/src/lib.rs index 60c11ab..2f802ca 100644 --- a/piet-gpu-types/src/lib.rs +++ b/piet-gpu-types/src/lib.rs @@ -1,3 +1,4 @@ pub mod encoder; pub mod ptcl; pub mod scene; +pub mod test; diff --git a/piet-gpu-types/src/main.rs b/piet-gpu-types/src/main.rs index 00a5d0b..c3d537e 100644 --- a/piet-gpu-types/src/main.rs +++ b/piet-gpu-types/src/main.rs @@ -1,8 +1,12 @@ fn main() { - let mod_name = std::env::args().skip(1).next().expect("provide a module name"); + let mod_name = std::env::args() + .skip(1) + .next() + .expect("provide a module name"); match mod_name.as_str() { "scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()), "ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()), + "test" => print!("{}", piet_gpu_types::test::gen_gpu_test()), _ => println!("Oops, unknown module name"), } } diff --git a/piet-gpu-types/src/test.rs b/piet-gpu-types/src/test.rs new file mode 100644 index 0000000..e92aaca --- /dev/null +++ b/piet-gpu-types/src/test.rs @@ -0,0 +1,33 @@ +use piet_gpu_derive::piet_gpu; + +piet_gpu! { + #[rust_encode] + #[gpu_write] + mod test { + struct StructA { + a: f16, + b: f16, + } + + struct StructB { + a: f16, + b: u16, + c: f16, + } + + struct StructC { + a: f16, + b: u16, + c: u16, + d: f16, + } + + struct StructD { + a: [f16; 2], + } + + struct StructE { + a: [f16; 3], + } + } +} diff --git a/piet-gpu/src/main.rs b/piet-gpu/src/main.rs index 3d97b64..271c133 100644 --- a/piet-gpu/src/main.rs +++ b/piet-gpu/src/main.rs @@ -1,6 +1,6 @@ -use std::path::Path; use std::fs::File; use std::io::BufWriter; +use std::path::Path; use rand::{Rng, RngCore}; @@ -29,7 +29,10 @@ fn make_scene() -> Vec { let circle = PietCircle { rgba_color: rng.next_u32(), center: Point { - xy: [rng.gen_range(0.0, WIDTH as f32), rng.gen_range(0.0, HEIGHT as f32)], + xy: [ + rng.gen_range(0.0, WIDTH as f32), + rng.gen_range(0.0, HEIGHT as f32), + ], }, radius: rng.gen_range(0.0, 50.0), }; @@ -58,7 +61,7 @@ fn make_scene() -> Vec { fn dump_scene(buf: &[u8]) { for i in 0..(buf.len() / 4) { let mut buf_u32 = [0u8; 4]; - buf_u32.copy_from_slice(&buf[i * 4 .. i * 4 + 4]); + buf_u32.copy_from_slice(&buf[i * 4..i * 4 + 4]); println!("{:4x}: {:8x}", i * 4, u32::from_le_bytes(buf_u32)); } } @@ -105,12 +108,12 @@ fn main() { let path = Path::new("image.png"); let file = File::create(path).unwrap(); let ref mut w = BufWriter::new(file); - + let mut encoder = png::Encoder::new(w, WIDTH as u32, HEIGHT as u32); encoder.set_color(png::ColorType::RGBA); encoder.set_depth(png::BitDepth::Eight); let mut writer = encoder.write_header().unwrap(); - + writer.write_image_data(&img_data).unwrap(); } }