From 3270ee64c294caea88a712d08ab9e91b69843f3f Mon Sep 17 00:00:00 2001 From: Brian Merchant Date: Sat, 18 Apr 2020 17:04:19 -0700 Subject: [PATCH] Add f16 support. Handling f16 requires special work, compared to other scalars, as the minimum conversion operation for u32->f16 in GLSL (unpackHalf2x16) loads two f16s from one u32. This means that in order to minimize unnecessary calls to unpackHalf2x16, we should look-ahead to see if the current f16 has already been extracted in the process of dealing with the last f16. Similar considerations exist for write operations, where we want to pack, when possible, two f16s in one go (using packHalf2x16). --- Cargo.lock | 7 ++ piet-gpu-derive/src/derive.rs | 11 ++ piet-gpu-derive/src/glsl.rs | 217 ++++++++++++++++++++++++++-------- piet-gpu-derive/src/parse.rs | 7 +- piet-gpu-types/Cargo.toml | 1 + piet-gpu-types/src/lib.rs | 1 + piet-gpu-types/src/main.rs | 6 +- piet-gpu-types/src/test.rs | 33 ++++++ piet-gpu/src/main.rs | 13 +- 9 files changed, 239 insertions(+), 57 deletions(-) create mode 100644 piet-gpu-types/src/test.rs diff --git a/Cargo.lock b/Cargo.lock index 1bec058..3ba133e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -69,6 +69,12 @@ dependencies = [ "wasi", ] +[[package]] +name = "half" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f36b5f248235f45773d4944f555f83ea61fe07b18b561ccf99d7483d7381e54d" + [[package]] name = "inflate" version = "0.4.5" @@ -124,6 +130,7 @@ dependencies = [ name = "piet-gpu-types" version = "0.0.0" dependencies = [ + "half", "piet-gpu-derive", ] diff --git a/piet-gpu-derive/src/derive.rs b/piet-gpu-derive/src/derive.rs index bc84bfb..3b4c478 100644 --- a/piet-gpu-derive/src/derive.rs +++ b/piet-gpu-derive/src/derive.rs @@ -14,6 +14,16 @@ pub fn gen_derive(module: &LayoutModule) -> proc_macro2::TokenStream { } quote! { mod #module_name { + pub trait HalfToLeBytes { + fn to_le_bytes(&self) -> [u8; 2]; + } + + impl HalfToLeBytes for half::f16 { + fn to_le_bytes(&self) -> [u8; 2] { + self.to_bits().to_le_bytes() + } + } + #ts } } @@ -121,6 +131,7 @@ fn gen_derive_ty(ty: &GpuType) -> proc_macro2::TokenStream { fn gen_derive_scalar_ty(ty: &GpuScalar) -> proc_macro2::TokenStream { match ty { + GpuScalar::F16 => quote!(half::f16), GpuScalar::F32 => quote!(f32), GpuScalar::I8 => quote!(i8), GpuScalar::I16 => quote!(i16), diff --git a/piet-gpu-derive/src/glsl.rs b/piet-gpu-derive/src/glsl.rs index 5164179..77d5ac8 100644 --- a/piet-gpu-derive/src/glsl.rs +++ b/piet-gpu-derive/src/glsl.rs @@ -14,6 +14,7 @@ pub fn gen_glsl(module: &LayoutModule) -> String { for name in &module.def_names { gen_refdef(&mut r, &name); } + for name in &module.def_names { match module.defs.get(name).unwrap() { (size, LayoutTypeDef::Struct(fields)) => { @@ -26,6 +27,7 @@ pub fn gen_glsl(module: &LayoutModule) -> String { } } } + for name in &module.def_names { let def = module.defs.get(name).unwrap(); match def { @@ -43,6 +45,7 @@ pub fn gen_glsl(module: &LayoutModule) -> String { } } } + r } @@ -98,9 +101,21 @@ fn gen_struct_read( } } writeln!(r, " {} s;", name).unwrap(); + + let mut preload: bool = false; for (name, offset, ty) in fields { - writeln!(r, " s.{} = {};", name, gen_extract(*offset, &ty.ty)).unwrap(); + let (setup, extract) = gen_extract(*offset, &ty.ty, preload); + writeln!(r, "{} s.{} = {};", setup, name, extract).unwrap(); + + if let GpuType::Scalar(GpuScalar::F16) = &ty.ty { + if offset % 4 == 0 { + preload = true; + continue; + } + } + preload = false; } + writeln!(r, " return s;").unwrap(); writeln!(r, "}}\n").unwrap(); } @@ -136,34 +151,67 @@ fn gen_enum_read( } } -fn gen_extract(offset: usize, ty: &GpuType) -> String { +fn gen_extract(offset: usize, ty: &GpuType, preload: bool) -> (String, String) { match ty { - GpuType::Scalar(scalar) => gen_extract_scalar(offset, scalar), + GpuType::Scalar(scalar) => { + let setup = match scalar { + GpuScalar::F16 => { + if preload { + String::new() + } else { + let ix = offset / 4; + format!(" vec2 halves{} = unpackHalf2x16(raw{});\n", ix, ix) + } + } + _ => String::new(), + }; + + (setup, gen_extract_scalar(offset, scalar)) + } GpuType::Vector(scalar, size) => { - let mut r = glsl_type(ty); - r.push_str("("); + let is_f16 = match scalar { + GpuScalar::F16 => true, + _ => false, + }; + + let mut setup = String::new(); + let mut extract = glsl_type(ty); + &extract.push_str("("); for i in 0..*size { if i != 0 { - r.push_str(", "); + &extract.push_str(", "); } + + if is_f16 && i % 2 == 0 { + let ix = (offset + i * scalar.size()) / 4; + let s = format!(" vec2 halves{} = unpackHalf2x16(raw{});\n", ix, ix); + setup.push_str(&s); + }; + let el_offset = offset + i * scalar.size(); - r.push_str(&gen_extract_scalar(el_offset, scalar)); + &extract.push_str(&gen_extract_scalar(el_offset, scalar)); } - r.push_str(")"); - r + &extract.push_str(")"); + (setup, extract) } - GpuType::InlineStruct(name) => format!( - "{}_read({}Ref({}))", - name, - name, - simplified_add("ref.offset", offset) + GpuType::InlineStruct(name) => ( + String::new(), + format!( + "{}_read({}Ref({}))", + name, + name, + simplified_add("ref.offset", offset) + ), ), GpuType::Ref(inner) => { if let GpuType::InlineStruct(name) = inner.deref() { - format!( - "{}Ref({})", - name, - gen_extract_scalar(offset, &GpuScalar::U32) + ( + String::new(), + format!( + "{}Ref({})", + name, + gen_extract_scalar(offset, &GpuScalar::U32) + ), ) } else { panic!("only know how to deal with Ref of struct") @@ -174,7 +222,7 @@ fn gen_extract(offset: usize, ty: &GpuType) -> String { fn gen_extract_scalar(offset: usize, ty: &GpuScalar) -> String { match ty { - GpuScalar::F32 => format!("uintBitsToFloat(raw{})", offset / 4), + GpuScalar::F16 | GpuScalar::F32 => extract_fbits(offset, ty.size()), GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => extract_ubits(offset, ty.size()), GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => extract_ibits(offset, ty.size()), } @@ -210,8 +258,41 @@ fn extract_ibits(offset: usize, nbytes: usize) -> String { } } +fn extract_fbits(offset: usize, nbytes: usize) -> String { + match nbytes { + 4 => format!("uintBitsToFloat(raw{})", offset / 4), + 2 => match offset % 4 { + 0 => { + let ix = offset / 4; + format!("halves{}.x", ix) + } + 2 => format!("halves{}.y", offset / 4), + _ => panic!("unexpected packing of f16 at offset {}", offset % 4), + }, + _ => { + panic!("unexpected extraction of float with nbytes = {}", nbytes); + } + } +} + // Writing +fn is_f16(ty: &GpuType) -> bool { + match ty { + GpuType::Scalar(GpuScalar::F16) => true, + GpuType::Vector(GpuScalar::F16, _) => true, + _ => false, + } +} + +fn is_f16_pair(field_ixs: &[usize], fields: &[(String, usize, LayoutType)]) -> bool { + if field_ixs.len() == 2 { + fields.iter().all(|(_, _, t)| is_f16(&t.ty)) + } else { + false + } +} + fn gen_struct_write( r: &mut String, bufname: &str, @@ -220,39 +301,78 @@ fn gen_struct_write( ) { writeln!(r, "void {}_write({}Ref ref, {} s) {{", name, name, name).unwrap(); let coverage = crate::layout::struct_coverage(fields, true); + for (i, field_ixs) in coverage.iter().enumerate() { let mut pieces = Vec::new(); - for field_ix in field_ixs { - let (name, offset, ty) = &fields[*field_ix]; - match &ty.ty { - GpuType::Scalar(scalar) => { - let inner = format!("s.{}", name); - pieces.push(gen_pack_bits_scalar(scalar, *offset, &inner)); - } - GpuType::Vector(scalar, len) => { - let size = scalar.size(); - let ix_lo = (i * 4 - offset) / size; - let ix_hi = ((4 + i * 4 - offset) / size).min(*len); - for ix in ix_lo..ix_hi { - let scalar_offset = offset + ix * size; - let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]); - pieces.push(gen_pack_bits_scalar(scalar, scalar_offset, &inner)); + + if is_f16_pair(field_ixs, fields) { + let (ix0, ix1) = (field_ixs[0], field_ixs[1]); + let inner0 = format!("s.{}", fields[ix0].0); + let inner1 = format!("s.{}", fields[ix1].0); + pieces.push(format!("packHalf2x16(vec2({}, {}))", &inner0, &inner1)); + } else { + for field_ix in field_ixs { + let (name, offset, ty) = &fields[*field_ix]; + match &ty.ty { + GpuType::Scalar(scalar) => { + let inner = format!("s.{}", name); + pieces.push(gen_pack_bits_scalar(scalar, *offset, &inner)); } + GpuType::Vector(scalar, len) => { + let size = scalar.size(); + let ix_lo = (i * 4 - offset) / size; + let ix_hi = ((4 + i * 4 - offset) / size).min(*len); + match scalar { + GpuScalar::F16 => { + if ix_hi - ix_lo == 2 { + let inner0 = + format!("s.{}.{}", name, &"xyzw"[ix_lo..ix_lo + 1]); + let inner1 = + format!("s.{}.{}", name, &"xyzw"[ix_lo + 1..ix_hi]); + pieces.push(format!( + "packHalf2x16(vec2({}, {}))", + &inner0, &inner1 + )); + } else { + let ix = ix_lo; + let scalar_offset = offset + ix * size; + let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]); + pieces.push(gen_pack_bits_scalar( + scalar, + scalar_offset, + &inner, + )); + } + } + _ => { + for ix in ix_lo..ix_hi { + let scalar_offset = offset + ix * size; + let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]); + pieces.push(gen_pack_bits_scalar( + scalar, + scalar_offset, + &inner, + )); + } + } + } + } + GpuType::InlineStruct(structname) => { + writeln!( + r, + " {}_write({}Ref({}), s.{});", + structname, + structname, + simplified_add("ref.offset", *offset), + name + ) + .unwrap(); + } + GpuType::Ref(_) => pieces.push(format!("s.{}.offset", name)), } - GpuType::InlineStruct(structname) => { - writeln!( - r, - " {}_write({}Ref({}), s.{});", - structname, - structname, - simplified_add("ref.offset", *offset), - name - ) - .unwrap(); - } - GpuType::Ref(_) => pieces.push(format!("s.{}.offset", name)), } } + if !pieces.is_empty() { write!(r, " {}[{}] = ", bufname, i).unwrap(); for (j, piece) in pieces.iter().enumerate() { @@ -270,6 +390,7 @@ fn gen_struct_write( fn gen_pack_bits_scalar(ty: &GpuScalar, offset: usize, inner: &str) -> String { let shift = (offset % 4) * 8; let bits = match ty { + GpuScalar::F16 => format!("packHalf2x16(vec2({}, 0.0)) & 0xffff", inner), GpuScalar::F32 => format!("floatBitsToUint({})", inner), // Note: this doesn't mask small unsigned int types; the caller is // responsible for making sure they don't overflow. @@ -366,7 +487,7 @@ fn glsl_type(ty: &GpuType) -> String { // GLSL type that can contain the scalar value. fn glsl_scalar(s: &GpuScalar) -> &'static str { match s { - GpuScalar::F32 => "float", + GpuScalar::F16 | GpuScalar::F32 => "float", GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "int", GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => "uint", } @@ -374,7 +495,7 @@ fn glsl_scalar(s: &GpuScalar) -> &'static str { fn glsl_vecname(s: &GpuScalar) -> &'static str { match s { - GpuScalar::F32 => "vec", + GpuScalar::F16 | GpuScalar::F32 => "vec", GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "ivec", GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => "uvec", } diff --git a/piet-gpu-derive/src/parse.rs b/piet-gpu-derive/src/parse.rs index 8e51bab..9461338 100644 --- a/piet-gpu-derive/src/parse.rs +++ b/piet-gpu-derive/src/parse.rs @@ -12,14 +12,14 @@ use syn::{ /// A scalar that can be represented in a packed data structure. #[derive(Clone, Copy, PartialEq)] pub enum GpuScalar { + F16, + F32, I8, I16, I32, - F32, U8, U16, U32, - // TODO: Add F16 } /// An algebraic datatype. @@ -52,6 +52,7 @@ impl GpuScalar { fn from_syn(ty: &syn::Type) -> Option { ty_as_single_ident(ty).and_then(|ident| match ident.as_str() { "f32" => Some(GpuScalar::F32), + "f16" => Some(GpuScalar::F16), "i8" => Some(GpuScalar::I8), "i16" => Some(GpuScalar::I16), "i32" => Some(GpuScalar::I32), @@ -70,7 +71,7 @@ impl GpuScalar { match self { GpuScalar::F32 | GpuScalar::I32 | GpuScalar::U32 => 4, GpuScalar::I8 | GpuScalar::U8 => 1, - GpuScalar::I16 | GpuScalar::U16 => 2, + GpuScalar::F16 | GpuScalar::I16 | GpuScalar::U16 => 2, } } } diff --git a/piet-gpu-types/Cargo.toml b/piet-gpu-types/Cargo.toml index 6de92a5..629cd62 100644 --- a/piet-gpu-types/Cargo.toml +++ b/piet-gpu-types/Cargo.toml @@ -9,3 +9,4 @@ keywords = ["graphics", "2d"] [dependencies] piet-gpu-derive = { path = "../piet-gpu-derive" } +half = "1.5.0" diff --git a/piet-gpu-types/src/lib.rs b/piet-gpu-types/src/lib.rs index 60c11ab..2f802ca 100644 --- a/piet-gpu-types/src/lib.rs +++ b/piet-gpu-types/src/lib.rs @@ -1,3 +1,4 @@ pub mod encoder; pub mod ptcl; pub mod scene; +pub mod test; diff --git a/piet-gpu-types/src/main.rs b/piet-gpu-types/src/main.rs index 00a5d0b..c3d537e 100644 --- a/piet-gpu-types/src/main.rs +++ b/piet-gpu-types/src/main.rs @@ -1,8 +1,12 @@ fn main() { - let mod_name = std::env::args().skip(1).next().expect("provide a module name"); + let mod_name = std::env::args() + .skip(1) + .next() + .expect("provide a module name"); match mod_name.as_str() { "scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()), "ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()), + "test" => print!("{}", piet_gpu_types::test::gen_gpu_test()), _ => println!("Oops, unknown module name"), } } diff --git a/piet-gpu-types/src/test.rs b/piet-gpu-types/src/test.rs new file mode 100644 index 0000000..e92aaca --- /dev/null +++ b/piet-gpu-types/src/test.rs @@ -0,0 +1,33 @@ +use piet_gpu_derive::piet_gpu; + +piet_gpu! { + #[rust_encode] + #[gpu_write] + mod test { + struct StructA { + a: f16, + b: f16, + } + + struct StructB { + a: f16, + b: u16, + c: f16, + } + + struct StructC { + a: f16, + b: u16, + c: u16, + d: f16, + } + + struct StructD { + a: [f16; 2], + } + + struct StructE { + a: [f16; 3], + } + } +} diff --git a/piet-gpu/src/main.rs b/piet-gpu/src/main.rs index 3d97b64..271c133 100644 --- a/piet-gpu/src/main.rs +++ b/piet-gpu/src/main.rs @@ -1,6 +1,6 @@ -use std::path::Path; use std::fs::File; use std::io::BufWriter; +use std::path::Path; use rand::{Rng, RngCore}; @@ -29,7 +29,10 @@ fn make_scene() -> Vec { let circle = PietCircle { rgba_color: rng.next_u32(), center: Point { - xy: [rng.gen_range(0.0, WIDTH as f32), rng.gen_range(0.0, HEIGHT as f32)], + xy: [ + rng.gen_range(0.0, WIDTH as f32), + rng.gen_range(0.0, HEIGHT as f32), + ], }, radius: rng.gen_range(0.0, 50.0), }; @@ -58,7 +61,7 @@ fn make_scene() -> Vec { fn dump_scene(buf: &[u8]) { for i in 0..(buf.len() / 4) { let mut buf_u32 = [0u8; 4]; - buf_u32.copy_from_slice(&buf[i * 4 .. i * 4 + 4]); + buf_u32.copy_from_slice(&buf[i * 4..i * 4 + 4]); println!("{:4x}: {:8x}", i * 4, u32::from_le_bytes(buf_u32)); } } @@ -105,12 +108,12 @@ fn main() { let path = Path::new("image.png"); let file = File::create(path).unwrap(); let ref mut w = BufWriter::new(file); - + let mut encoder = png::Encoder::new(w, WIDTH as u32, HEIGHT as u32); encoder.set_color(png::ColorType::RGBA); encoder.set_depth(png::BitDepth::Eight); let mut writer = encoder.write_header().unwrap(); - + writer.write_image_data(&img_data).unwrap(); } }