Add f16 support.

This commit is contained in:
Brian Merchant 2020-04-21 23:45:24 -07:00 committed by GitHub
commit 4aaa6f1f29
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 227 additions and 51 deletions

7
Cargo.lock generated
View file

@ -69,6 +69,12 @@ dependencies = [
"wasi",
]
[[package]]
name = "half"
version = "1.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f36b5f248235f45773d4944f555f83ea61fe07b18b561ccf99d7483d7381e54d"
[[package]]
name = "inflate"
version = "0.4.5"
@ -124,6 +130,7 @@ dependencies = [
name = "piet-gpu-types"
version = "0.0.0"
dependencies = [
"half",
"piet-gpu-derive",
]

View file

@ -14,6 +14,16 @@ pub fn gen_derive(module: &LayoutModule) -> proc_macro2::TokenStream {
}
quote! {
mod #module_name {
pub trait HalfToLeBytes {
fn to_le_bytes(&self) -> [u8; 2];
}
impl HalfToLeBytes for half::f16 {
fn to_le_bytes(&self) -> [u8; 2] {
self.to_bits().to_le_bytes()
}
}
#ts
}
}
@ -121,6 +131,7 @@ fn gen_derive_ty(ty: &GpuType) -> proc_macro2::TokenStream {
fn gen_derive_scalar_ty(ty: &GpuScalar) -> proc_macro2::TokenStream {
match ty {
GpuScalar::F16 => quote!(half::f16),
GpuScalar::F32 => quote!(f32),
GpuScalar::I8 => quote!(i8),
GpuScalar::I16 => quote!(i16),

View file

@ -14,6 +14,7 @@ pub fn gen_glsl(module: &LayoutModule) -> String {
for name in &module.def_names {
gen_refdef(&mut r, &name);
}
for name in &module.def_names {
match module.defs.get(name).unwrap() {
(size, LayoutTypeDef::Struct(fields)) => {
@ -26,6 +27,7 @@ pub fn gen_glsl(module: &LayoutModule) -> String {
}
}
}
for name in &module.def_names {
let def = module.defs.get(name).unwrap();
match def {
@ -43,6 +45,7 @@ pub fn gen_glsl(module: &LayoutModule) -> String {
}
}
}
r
}
@ -98,9 +101,21 @@ fn gen_struct_read(
}
}
writeln!(r, " {} s;", name).unwrap();
let mut preload: bool = false;
for (name, offset, ty) in fields {
writeln!(r, " s.{} = {};", name, gen_extract(*offset, &ty.ty)).unwrap();
let (setup, extract) = gen_extract(*offset, &ty.ty, preload);
writeln!(r, "{} s.{} = {};", setup, name, extract).unwrap();
if let GpuType::Scalar(GpuScalar::F16) = &ty.ty {
if offset % 4 == 0 {
preload = true;
continue;
}
}
preload = false;
}
writeln!(r, " return s;").unwrap();
writeln!(r, "}}\n").unwrap();
}
@ -136,34 +151,67 @@ fn gen_enum_read(
}
}
fn gen_extract(offset: usize, ty: &GpuType) -> String {
fn gen_extract(offset: usize, ty: &GpuType, preload: bool) -> (String, String) {
match ty {
GpuType::Scalar(scalar) => gen_extract_scalar(offset, scalar),
GpuType::Scalar(scalar) => {
let setup = match scalar {
GpuScalar::F16 => {
if preload {
String::new()
} else {
let ix = offset / 4;
format!(" vec2 halves{} = unpackHalf2x16(raw{});\n", ix, ix)
}
}
_ => String::new(),
};
(setup, gen_extract_scalar(offset, scalar))
}
GpuType::Vector(scalar, size) => {
let mut r = glsl_type(ty);
r.push_str("(");
let is_f16 = match scalar {
GpuScalar::F16 => true,
_ => false,
};
let mut setup = String::new();
let mut extract = glsl_type(ty);
&extract.push_str("(");
for i in 0..*size {
if i != 0 {
r.push_str(", ");
&extract.push_str(", ");
}
if is_f16 && i % 2 == 0 {
let ix = (offset + i * scalar.size()) / 4;
let s = format!(" vec2 halves{} = unpackHalf2x16(raw{});\n", ix, ix);
setup.push_str(&s);
};
let el_offset = offset + i * scalar.size();
r.push_str(&gen_extract_scalar(el_offset, scalar));
&extract.push_str(&gen_extract_scalar(el_offset, scalar));
}
r.push_str(")");
r
&extract.push_str(")");
(setup, extract)
}
GpuType::InlineStruct(name) => format!(
"{}_read({}Ref({}))",
name,
name,
simplified_add("ref.offset", offset)
GpuType::InlineStruct(name) => (
String::new(),
format!(
"{}_read({}Ref({}))",
name,
name,
simplified_add("ref.offset", offset)
),
),
GpuType::Ref(inner) => {
if let GpuType::InlineStruct(name) = inner.deref() {
format!(
"{}Ref({})",
name,
gen_extract_scalar(offset, &GpuScalar::U32)
(
String::new(),
format!(
"{}Ref({})",
name,
gen_extract_scalar(offset, &GpuScalar::U32)
),
)
} else {
panic!("only know how to deal with Ref of struct")
@ -174,7 +222,7 @@ fn gen_extract(offset: usize, ty: &GpuType) -> String {
fn gen_extract_scalar(offset: usize, ty: &GpuScalar) -> String {
match ty {
GpuScalar::F32 => format!("uintBitsToFloat(raw{})", offset / 4),
GpuScalar::F16 | GpuScalar::F32 => extract_fbits(offset, ty.size()),
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => extract_ubits(offset, ty.size()),
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => extract_ibits(offset, ty.size()),
}
@ -210,8 +258,41 @@ fn extract_ibits(offset: usize, nbytes: usize) -> String {
}
}
fn extract_fbits(offset: usize, nbytes: usize) -> String {
match nbytes {
4 => format!("uintBitsToFloat(raw{})", offset / 4),
2 => match offset % 4 {
0 => {
let ix = offset / 4;
format!("halves{}.x", ix)
}
2 => format!("halves{}.y", offset / 4),
_ => panic!("unexpected packing of f16 at offset {}", offset % 4),
},
_ => {
panic!("unexpected extraction of float with nbytes = {}", nbytes);
}
}
}
// Writing
fn is_f16(ty: &GpuType) -> bool {
match ty {
GpuType::Scalar(GpuScalar::F16) => true,
GpuType::Vector(GpuScalar::F16, _) => true,
_ => false,
}
}
fn is_f16_pair(field_ixs: &[usize], fields: &[(String, usize, LayoutType)]) -> bool {
if field_ixs.len() == 2 {
fields.iter().all(|(_, _, t)| is_f16(&t.ty))
} else {
false
}
}
fn gen_struct_write(
r: &mut String,
bufname: &str,
@ -221,39 +302,78 @@ fn gen_struct_write(
writeln!(r, "void {}_write({}Ref ref, {} s) {{", name, name, name).unwrap();
writeln!(r, " uint ix = ref.offset >> 2;").unwrap();
let coverage = crate::layout::struct_coverage(fields, true);
for (i, field_ixs) in coverage.iter().enumerate() {
let mut pieces = Vec::new();
for field_ix in field_ixs {
let (name, offset, ty) = &fields[*field_ix];
match &ty.ty {
GpuType::Scalar(scalar) => {
let inner = format!("s.{}", name);
pieces.push(gen_pack_bits_scalar(scalar, *offset, &inner));
}
GpuType::Vector(scalar, len) => {
let size = scalar.size();
let ix_lo = (i * 4 - offset) / size;
let ix_hi = ((4 + i * 4 - offset) / size).min(*len);
for ix in ix_lo..ix_hi {
let scalar_offset = offset + ix * size;
let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]);
pieces.push(gen_pack_bits_scalar(scalar, scalar_offset, &inner));
if is_f16_pair(field_ixs, fields) {
let (ix0, ix1) = (field_ixs[0], field_ixs[1]);
let inner0 = format!("s.{}", fields[ix0].0);
let inner1 = format!("s.{}", fields[ix1].0);
pieces.push(format!("packHalf2x16(vec2({}, {}))", &inner0, &inner1));
} else {
for field_ix in field_ixs {
let (name, offset, ty) = &fields[*field_ix];
match &ty.ty {
GpuType::Scalar(scalar) => {
let inner = format!("s.{}", name);
pieces.push(gen_pack_bits_scalar(scalar, *offset, &inner));
}
GpuType::Vector(scalar, len) => {
let size = scalar.size();
let ix_lo = (i * 4 - offset) / size;
let ix_hi = ((4 + i * 4 - offset) / size).min(*len);
match scalar {
GpuScalar::F16 => {
if ix_hi - ix_lo == 2 {
let inner0 =
format!("s.{}.{}", name, &"xyzw"[ix_lo..ix_lo + 1]);
let inner1 =
format!("s.{}.{}", name, &"xyzw"[ix_lo + 1..ix_hi]);
pieces.push(format!(
"packHalf2x16(vec2({}, {}))",
&inner0, &inner1
));
} else {
let ix = ix_lo;
let scalar_offset = offset + ix * size;
let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]);
pieces.push(gen_pack_bits_scalar(
scalar,
scalar_offset,
&inner,
));
}
}
_ => {
for ix in ix_lo..ix_hi {
let scalar_offset = offset + ix * size;
let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]);
pieces.push(gen_pack_bits_scalar(
scalar,
scalar_offset,
&inner,
));
}
}
}
}
GpuType::InlineStruct(structname) => {
writeln!(
r,
" {}_write({}Ref({}), s.{});",
structname,
structname,
simplified_add("ref.offset", *offset),
name
)
.unwrap();
}
GpuType::Ref(_) => pieces.push(format!("s.{}.offset", name)),
}
GpuType::InlineStruct(structname) => {
writeln!(
r,
" {}_write({}Ref({}), s.{});",
structname,
structname,
simplified_add("ref.offset", *offset),
name
)
.unwrap();
}
GpuType::Ref(_) => pieces.push(format!("s.{}.offset", name)),
}
}
if !pieces.is_empty() {
write!(r, " {}[ix + {}] = ", bufname, i).unwrap();
for (j, piece) in pieces.iter().enumerate() {
@ -271,6 +391,7 @@ fn gen_struct_write(
fn gen_pack_bits_scalar(ty: &GpuScalar, offset: usize, inner: &str) -> String {
let shift = (offset % 4) * 8;
let bits = match ty {
GpuScalar::F16 => format!("packHalf2x16(vec2({}, 0.0)) & 0xffff", inner),
GpuScalar::F32 => format!("floatBitsToUint({})", inner),
// Note: this doesn't mask small unsigned int types; the caller is
// responsible for making sure they don't overflow.
@ -367,7 +488,7 @@ fn glsl_type(ty: &GpuType) -> String {
// GLSL type that can contain the scalar value.
fn glsl_scalar(s: &GpuScalar) -> &'static str {
match s {
GpuScalar::F32 => "float",
GpuScalar::F16 | GpuScalar::F32 => "float",
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "int",
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => "uint",
}
@ -375,7 +496,7 @@ fn glsl_scalar(s: &GpuScalar) -> &'static str {
fn glsl_vecname(s: &GpuScalar) -> &'static str {
match s {
GpuScalar::F32 => "vec",
GpuScalar::F16 | GpuScalar::F32 => "vec",
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "ivec",
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => "uvec",
}

View file

@ -12,14 +12,14 @@ use syn::{
/// A scalar that can be represented in a packed data structure.
#[derive(Clone, Copy, PartialEq)]
pub enum GpuScalar {
F16,
F32,
I8,
I16,
I32,
F32,
U8,
U16,
U32,
// TODO: Add F16
}
/// An algebraic datatype.
@ -52,6 +52,7 @@ impl GpuScalar {
fn from_syn(ty: &syn::Type) -> Option<Self> {
ty_as_single_ident(ty).and_then(|ident| match ident.as_str() {
"f32" => Some(GpuScalar::F32),
"f16" => Some(GpuScalar::F16),
"i8" => Some(GpuScalar::I8),
"i16" => Some(GpuScalar::I16),
"i32" => Some(GpuScalar::I32),
@ -70,7 +71,7 @@ impl GpuScalar {
match self {
GpuScalar::F32 | GpuScalar::I32 | GpuScalar::U32 => 4,
GpuScalar::I8 | GpuScalar::U8 => 1,
GpuScalar::I16 | GpuScalar::U16 => 2,
GpuScalar::F16 | GpuScalar::I16 | GpuScalar::U16 => 2,
}
}
}

View file

@ -9,3 +9,4 @@ keywords = ["graphics", "2d"]
[dependencies]
piet-gpu-derive = { path = "../piet-gpu-derive" }
half = "1.5.0"

View file

@ -1,4 +1,5 @@
pub mod encoder;
pub mod ptcl;
pub mod scene;
pub mod test;
pub mod tilegroup;

View file

@ -7,6 +7,7 @@ fn main() {
"scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()),
"tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()),
"ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()),
"test" => print!("{}", piet_gpu_types::test::gen_gpu_test()),
_ => println!("Oops, unknown module name"),
}
}

View file

@ -0,0 +1,33 @@
use piet_gpu_derive::piet_gpu;
piet_gpu! {
#[rust_encode]
#[gpu_write]
mod test {
struct StructA {
a: f16,
b: f16,
}
struct StructB {
a: f16,
b: u16,
c: f16,
}
struct StructC {
a: f16,
b: u16,
c: u16,
d: f16,
}
struct StructD {
a: [f16; 2],
}
struct StructE {
a: [f16; 3],
}
}
}