Add f16 support.

This commit is contained in:
Brian Merchant 2020-04-21 23:45:24 -07:00 committed by GitHub
commit 4aaa6f1f29
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 227 additions and 51 deletions

7
Cargo.lock generated
View file

@ -69,6 +69,12 @@ dependencies = [
"wasi", "wasi",
] ]
[[package]]
name = "half"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f36b5f248235f45773d4944f555f83ea61fe07b18b561ccf99d7483d7381e54d"
[[package]] [[package]]
name = "inflate" name = "inflate"
version = "0.4.5" version = "0.4.5"
@ -124,6 +130,7 @@ dependencies = [
name = "piet-gpu-types" name = "piet-gpu-types"
version = "0.0.0" version = "0.0.0"
dependencies = [ dependencies = [
"half",
"piet-gpu-derive", "piet-gpu-derive",
] ]

View file

@ -14,6 +14,16 @@ pub fn gen_derive(module: &LayoutModule) -> proc_macro2::TokenStream {
} }
quote! { quote! {
mod #module_name { mod #module_name {
pub trait HalfToLeBytes {
fn to_le_bytes(&self) -> [u8; 2];
}
impl HalfToLeBytes for half::f16 {
fn to_le_bytes(&self) -> [u8; 2] {
self.to_bits().to_le_bytes()
}
}
#ts #ts
} }
} }
@ -121,6 +131,7 @@ fn gen_derive_ty(ty: &GpuType) -> proc_macro2::TokenStream {
fn gen_derive_scalar_ty(ty: &GpuScalar) -> proc_macro2::TokenStream { fn gen_derive_scalar_ty(ty: &GpuScalar) -> proc_macro2::TokenStream {
match ty { match ty {
GpuScalar::F16 => quote!(half::f16),
GpuScalar::F32 => quote!(f32), GpuScalar::F32 => quote!(f32),
GpuScalar::I8 => quote!(i8), GpuScalar::I8 => quote!(i8),
GpuScalar::I16 => quote!(i16), GpuScalar::I16 => quote!(i16),

View file

@ -14,6 +14,7 @@ pub fn gen_glsl(module: &LayoutModule) -> String {
for name in &module.def_names { for name in &module.def_names {
gen_refdef(&mut r, &name); gen_refdef(&mut r, &name);
} }
for name in &module.def_names { for name in &module.def_names {
match module.defs.get(name).unwrap() { match module.defs.get(name).unwrap() {
(size, LayoutTypeDef::Struct(fields)) => { (size, LayoutTypeDef::Struct(fields)) => {
@ -26,6 +27,7 @@ pub fn gen_glsl(module: &LayoutModule) -> String {
} }
} }
} }
for name in &module.def_names { for name in &module.def_names {
let def = module.defs.get(name).unwrap(); let def = module.defs.get(name).unwrap();
match def { match def {
@ -43,6 +45,7 @@ pub fn gen_glsl(module: &LayoutModule) -> String {
} }
} }
} }
r r
} }
@ -98,9 +101,21 @@ fn gen_struct_read(
} }
} }
writeln!(r, " {} s;", name).unwrap(); writeln!(r, " {} s;", name).unwrap();
let mut preload: bool = false;
for (name, offset, ty) in fields { for (name, offset, ty) in fields {
writeln!(r, " s.{} = {};", name, gen_extract(*offset, &ty.ty)).unwrap(); let (setup, extract) = gen_extract(*offset, &ty.ty, preload);
writeln!(r, "{} s.{} = {};", setup, name, extract).unwrap();
if let GpuType::Scalar(GpuScalar::F16) = &ty.ty {
if offset % 4 == 0 {
preload = true;
continue;
}
}
preload = false;
} }
writeln!(r, " return s;").unwrap(); writeln!(r, " return s;").unwrap();
writeln!(r, "}}\n").unwrap(); writeln!(r, "}}\n").unwrap();
} }
@ -136,34 +151,67 @@ fn gen_enum_read(
} }
} }
fn gen_extract(offset: usize, ty: &GpuType) -> String { fn gen_extract(offset: usize, ty: &GpuType, preload: bool) -> (String, String) {
match ty { match ty {
GpuType::Scalar(scalar) => gen_extract_scalar(offset, scalar), GpuType::Scalar(scalar) => {
let setup = match scalar {
GpuScalar::F16 => {
if preload {
String::new()
} else {
let ix = offset / 4;
format!(" vec2 halves{} = unpackHalf2x16(raw{});\n", ix, ix)
}
}
_ => String::new(),
};
(setup, gen_extract_scalar(offset, scalar))
}
GpuType::Vector(scalar, size) => { GpuType::Vector(scalar, size) => {
let mut r = glsl_type(ty); let is_f16 = match scalar {
r.push_str("("); GpuScalar::F16 => true,
_ => false,
};
let mut setup = String::new();
let mut extract = glsl_type(ty);
&extract.push_str("(");
for i in 0..*size { for i in 0..*size {
if i != 0 { if i != 0 {
r.push_str(", "); &extract.push_str(", ");
} }
if is_f16 && i % 2 == 0 {
let ix = (offset + i * scalar.size()) / 4;
let s = format!(" vec2 halves{} = unpackHalf2x16(raw{});\n", ix, ix);
setup.push_str(&s);
};
let el_offset = offset + i * scalar.size(); let el_offset = offset + i * scalar.size();
r.push_str(&gen_extract_scalar(el_offset, scalar)); &extract.push_str(&gen_extract_scalar(el_offset, scalar));
} }
r.push_str(")"); &extract.push_str(")");
r (setup, extract)
} }
GpuType::InlineStruct(name) => format!( GpuType::InlineStruct(name) => (
"{}_read({}Ref({}))", String::new(),
name, format!(
name, "{}_read({}Ref({}))",
simplified_add("ref.offset", offset) name,
name,
simplified_add("ref.offset", offset)
),
), ),
GpuType::Ref(inner) => { GpuType::Ref(inner) => {
if let GpuType::InlineStruct(name) = inner.deref() { if let GpuType::InlineStruct(name) = inner.deref() {
format!( (
"{}Ref({})", String::new(),
name, format!(
gen_extract_scalar(offset, &GpuScalar::U32) "{}Ref({})",
name,
gen_extract_scalar(offset, &GpuScalar::U32)
),
) )
} else { } else {
panic!("only know how to deal with Ref of struct") panic!("only know how to deal with Ref of struct")
@ -174,7 +222,7 @@ fn gen_extract(offset: usize, ty: &GpuType) -> String {
fn gen_extract_scalar(offset: usize, ty: &GpuScalar) -> String { fn gen_extract_scalar(offset: usize, ty: &GpuScalar) -> String {
match ty { match ty {
GpuScalar::F32 => format!("uintBitsToFloat(raw{})", offset / 4), GpuScalar::F16 | GpuScalar::F32 => extract_fbits(offset, ty.size()),
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => extract_ubits(offset, ty.size()), GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => extract_ubits(offset, ty.size()),
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => extract_ibits(offset, ty.size()), GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => extract_ibits(offset, ty.size()),
} }
@ -210,8 +258,41 @@ fn extract_ibits(offset: usize, nbytes: usize) -> String {
} }
} }
fn extract_fbits(offset: usize, nbytes: usize) -> String {
match nbytes {
4 => format!("uintBitsToFloat(raw{})", offset / 4),
2 => match offset % 4 {
0 => {
let ix = offset / 4;
format!("halves{}.x", ix)
}
2 => format!("halves{}.y", offset / 4),
_ => panic!("unexpected packing of f16 at offset {}", offset % 4),
},
_ => {
panic!("unexpected extraction of float with nbytes = {}", nbytes);
}
}
}
// Writing // Writing
fn is_f16(ty: &GpuType) -> bool {
match ty {
GpuType::Scalar(GpuScalar::F16) => true,
GpuType::Vector(GpuScalar::F16, _) => true,
_ => false,
}
}
fn is_f16_pair(field_ixs: &[usize], fields: &[(String, usize, LayoutType)]) -> bool {
if field_ixs.len() == 2 {
fields.iter().all(|(_, _, t)| is_f16(&t.ty))
} else {
false
}
}
fn gen_struct_write( fn gen_struct_write(
r: &mut String, r: &mut String,
bufname: &str, bufname: &str,
@ -221,39 +302,78 @@ fn gen_struct_write(
writeln!(r, "void {}_write({}Ref ref, {} s) {{", name, name, name).unwrap(); writeln!(r, "void {}_write({}Ref ref, {} s) {{", name, name, name).unwrap();
writeln!(r, " uint ix = ref.offset >> 2;").unwrap(); writeln!(r, " uint ix = ref.offset >> 2;").unwrap();
let coverage = crate::layout::struct_coverage(fields, true); let coverage = crate::layout::struct_coverage(fields, true);
for (i, field_ixs) in coverage.iter().enumerate() { for (i, field_ixs) in coverage.iter().enumerate() {
let mut pieces = Vec::new(); let mut pieces = Vec::new();
for field_ix in field_ixs {
let (name, offset, ty) = &fields[*field_ix]; if is_f16_pair(field_ixs, fields) {
match &ty.ty { let (ix0, ix1) = (field_ixs[0], field_ixs[1]);
GpuType::Scalar(scalar) => { let inner0 = format!("s.{}", fields[ix0].0);
let inner = format!("s.{}", name); let inner1 = format!("s.{}", fields[ix1].0);
pieces.push(gen_pack_bits_scalar(scalar, *offset, &inner)); pieces.push(format!("packHalf2x16(vec2({}, {}))", &inner0, &inner1));
} } else {
GpuType::Vector(scalar, len) => { for field_ix in field_ixs {
let size = scalar.size(); let (name, offset, ty) = &fields[*field_ix];
let ix_lo = (i * 4 - offset) / size; match &ty.ty {
let ix_hi = ((4 + i * 4 - offset) / size).min(*len); GpuType::Scalar(scalar) => {
for ix in ix_lo..ix_hi { let inner = format!("s.{}", name);
let scalar_offset = offset + ix * size; pieces.push(gen_pack_bits_scalar(scalar, *offset, &inner));
let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]);
pieces.push(gen_pack_bits_scalar(scalar, scalar_offset, &inner));
} }
GpuType::Vector(scalar, len) => {
let size = scalar.size();
let ix_lo = (i * 4 - offset) / size;
let ix_hi = ((4 + i * 4 - offset) / size).min(*len);
match scalar {
GpuScalar::F16 => {
if ix_hi - ix_lo == 2 {
let inner0 =
format!("s.{}.{}", name, &"xyzw"[ix_lo..ix_lo + 1]);
let inner1 =
format!("s.{}.{}", name, &"xyzw"[ix_lo + 1..ix_hi]);
pieces.push(format!(
"packHalf2x16(vec2({}, {}))",
&inner0, &inner1
));
} else {
let ix = ix_lo;
let scalar_offset = offset + ix * size;
let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]);
pieces.push(gen_pack_bits_scalar(
scalar,
scalar_offset,
&inner,
));
}
}
_ => {
for ix in ix_lo..ix_hi {
let scalar_offset = offset + ix * size;
let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]);
pieces.push(gen_pack_bits_scalar(
scalar,
scalar_offset,
&inner,
));
}
}
}
}
GpuType::InlineStruct(structname) => {
writeln!(
r,
" {}_write({}Ref({}), s.{});",
structname,
structname,
simplified_add("ref.offset", *offset),
name
)
.unwrap();
}
GpuType::Ref(_) => pieces.push(format!("s.{}.offset", name)),
} }
GpuType::InlineStruct(structname) => {
writeln!(
r,
" {}_write({}Ref({}), s.{});",
structname,
structname,
simplified_add("ref.offset", *offset),
name
)
.unwrap();
}
GpuType::Ref(_) => pieces.push(format!("s.{}.offset", name)),
} }
} }
if !pieces.is_empty() { if !pieces.is_empty() {
write!(r, " {}[ix + {}] = ", bufname, i).unwrap(); write!(r, " {}[ix + {}] = ", bufname, i).unwrap();
for (j, piece) in pieces.iter().enumerate() { for (j, piece) in pieces.iter().enumerate() {
@ -271,6 +391,7 @@ fn gen_struct_write(
fn gen_pack_bits_scalar(ty: &GpuScalar, offset: usize, inner: &str) -> String { fn gen_pack_bits_scalar(ty: &GpuScalar, offset: usize, inner: &str) -> String {
let shift = (offset % 4) * 8; let shift = (offset % 4) * 8;
let bits = match ty { let bits = match ty {
GpuScalar::F16 => format!("packHalf2x16(vec2({}, 0.0)) & 0xffff", inner),
GpuScalar::F32 => format!("floatBitsToUint({})", inner), GpuScalar::F32 => format!("floatBitsToUint({})", inner),
// Note: this doesn't mask small unsigned int types; the caller is // Note: this doesn't mask small unsigned int types; the caller is
// responsible for making sure they don't overflow. // responsible for making sure they don't overflow.
@ -367,7 +488,7 @@ fn glsl_type(ty: &GpuType) -> String {
// GLSL type that can contain the scalar value. // GLSL type that can contain the scalar value.
fn glsl_scalar(s: &GpuScalar) -> &'static str { fn glsl_scalar(s: &GpuScalar) -> &'static str {
match s { match s {
GpuScalar::F32 => "float", GpuScalar::F16 | GpuScalar::F32 => "float",
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "int", GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "int",
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => "uint", GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => "uint",
} }
@ -375,7 +496,7 @@ fn glsl_scalar(s: &GpuScalar) -> &'static str {
fn glsl_vecname(s: &GpuScalar) -> &'static str { fn glsl_vecname(s: &GpuScalar) -> &'static str {
match s { match s {
GpuScalar::F32 => "vec", GpuScalar::F16 | GpuScalar::F32 => "vec",
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "ivec", GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "ivec",
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => "uvec", GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => "uvec",
} }

View file

@ -12,14 +12,14 @@ use syn::{
/// A scalar that can be represented in a packed data structure. /// A scalar that can be represented in a packed data structure.
#[derive(Clone, Copy, PartialEq)] #[derive(Clone, Copy, PartialEq)]
pub enum GpuScalar { pub enum GpuScalar {
F16,
F32,
I8, I8,
I16, I16,
I32, I32,
F32,
U8, U8,
U16, U16,
U32, U32,
// TODO: Add F16
} }
/// An algebraic datatype. /// An algebraic datatype.
@ -52,6 +52,7 @@ impl GpuScalar {
fn from_syn(ty: &syn::Type) -> Option<Self> { fn from_syn(ty: &syn::Type) -> Option<Self> {
ty_as_single_ident(ty).and_then(|ident| match ident.as_str() { ty_as_single_ident(ty).and_then(|ident| match ident.as_str() {
"f32" => Some(GpuScalar::F32), "f32" => Some(GpuScalar::F32),
"f16" => Some(GpuScalar::F16),
"i8" => Some(GpuScalar::I8), "i8" => Some(GpuScalar::I8),
"i16" => Some(GpuScalar::I16), "i16" => Some(GpuScalar::I16),
"i32" => Some(GpuScalar::I32), "i32" => Some(GpuScalar::I32),
@ -70,7 +71,7 @@ impl GpuScalar {
match self { match self {
GpuScalar::F32 | GpuScalar::I32 | GpuScalar::U32 => 4, GpuScalar::F32 | GpuScalar::I32 | GpuScalar::U32 => 4,
GpuScalar::I8 | GpuScalar::U8 => 1, GpuScalar::I8 | GpuScalar::U8 => 1,
GpuScalar::I16 | GpuScalar::U16 => 2, GpuScalar::F16 | GpuScalar::I16 | GpuScalar::U16 => 2,
} }
} }
} }

View file

@ -9,3 +9,4 @@ keywords = ["graphics", "2d"]
[dependencies] [dependencies]
piet-gpu-derive = { path = "../piet-gpu-derive" } piet-gpu-derive = { path = "../piet-gpu-derive" }
half = "1.5.0"

View file

@ -1,4 +1,5 @@
pub mod encoder; pub mod encoder;
pub mod ptcl; pub mod ptcl;
pub mod scene; pub mod scene;
pub mod test;
pub mod tilegroup; pub mod tilegroup;

View file

@ -7,6 +7,7 @@ fn main() {
"scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()), "scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()),
"tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()), "tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()),
"ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()), "ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()),
"test" => print!("{}", piet_gpu_types::test::gen_gpu_test()),
_ => println!("Oops, unknown module name"), _ => println!("Oops, unknown module name"),
} }
} }

View file

@ -0,0 +1,33 @@
use piet_gpu_derive::piet_gpu;
piet_gpu! {
#[rust_encode]
#[gpu_write]
mod test {
struct StructA {
a: f16,
b: f16,
}
struct StructB {
a: f16,
b: u16,
c: f16,
}
struct StructC {
a: f16,
b: u16,
c: u16,
d: f16,
}
struct StructD {
a: [f16; 2],
}
struct StructE {
a: [f16; 3],
}
}
}