mirror of
https://github.com/italicsjenga/vello.git
synced 2025-01-09 20:31:29 +11:00
Add f16 support.
This commit is contained in:
commit
4aaa6f1f29
7
Cargo.lock
generated
7
Cargo.lock
generated
|
@ -69,6 +69,12 @@ dependencies = [
|
|||
"wasi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "half"
|
||||
version = "1.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f36b5f248235f45773d4944f555f83ea61fe07b18b561ccf99d7483d7381e54d"
|
||||
|
||||
[[package]]
|
||||
name = "inflate"
|
||||
version = "0.4.5"
|
||||
|
@ -124,6 +130,7 @@ dependencies = [
|
|||
name = "piet-gpu-types"
|
||||
version = "0.0.0"
|
||||
dependencies = [
|
||||
"half",
|
||||
"piet-gpu-derive",
|
||||
]
|
||||
|
||||
|
|
|
@ -14,6 +14,16 @@ pub fn gen_derive(module: &LayoutModule) -> proc_macro2::TokenStream {
|
|||
}
|
||||
quote! {
|
||||
mod #module_name {
|
||||
pub trait HalfToLeBytes {
|
||||
fn to_le_bytes(&self) -> [u8; 2];
|
||||
}
|
||||
|
||||
impl HalfToLeBytes for half::f16 {
|
||||
fn to_le_bytes(&self) -> [u8; 2] {
|
||||
self.to_bits().to_le_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
#ts
|
||||
}
|
||||
}
|
||||
|
@ -121,6 +131,7 @@ fn gen_derive_ty(ty: &GpuType) -> proc_macro2::TokenStream {
|
|||
|
||||
fn gen_derive_scalar_ty(ty: &GpuScalar) -> proc_macro2::TokenStream {
|
||||
match ty {
|
||||
GpuScalar::F16 => quote!(half::f16),
|
||||
GpuScalar::F32 => quote!(f32),
|
||||
GpuScalar::I8 => quote!(i8),
|
||||
GpuScalar::I16 => quote!(i16),
|
||||
|
|
|
@ -14,6 +14,7 @@ pub fn gen_glsl(module: &LayoutModule) -> String {
|
|||
for name in &module.def_names {
|
||||
gen_refdef(&mut r, &name);
|
||||
}
|
||||
|
||||
for name in &module.def_names {
|
||||
match module.defs.get(name).unwrap() {
|
||||
(size, LayoutTypeDef::Struct(fields)) => {
|
||||
|
@ -26,6 +27,7 @@ pub fn gen_glsl(module: &LayoutModule) -> String {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
for name in &module.def_names {
|
||||
let def = module.defs.get(name).unwrap();
|
||||
match def {
|
||||
|
@ -43,6 +45,7 @@ pub fn gen_glsl(module: &LayoutModule) -> String {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
r
|
||||
}
|
||||
|
||||
|
@ -98,9 +101,21 @@ fn gen_struct_read(
|
|||
}
|
||||
}
|
||||
writeln!(r, " {} s;", name).unwrap();
|
||||
|
||||
let mut preload: bool = false;
|
||||
for (name, offset, ty) in fields {
|
||||
writeln!(r, " s.{} = {};", name, gen_extract(*offset, &ty.ty)).unwrap();
|
||||
let (setup, extract) = gen_extract(*offset, &ty.ty, preload);
|
||||
writeln!(r, "{} s.{} = {};", setup, name, extract).unwrap();
|
||||
|
||||
if let GpuType::Scalar(GpuScalar::F16) = &ty.ty {
|
||||
if offset % 4 == 0 {
|
||||
preload = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
preload = false;
|
||||
}
|
||||
|
||||
writeln!(r, " return s;").unwrap();
|
||||
writeln!(r, "}}\n").unwrap();
|
||||
}
|
||||
|
@ -136,34 +151,67 @@ fn gen_enum_read(
|
|||
}
|
||||
}
|
||||
|
||||
fn gen_extract(offset: usize, ty: &GpuType) -> String {
|
||||
fn gen_extract(offset: usize, ty: &GpuType, preload: bool) -> (String, String) {
|
||||
match ty {
|
||||
GpuType::Scalar(scalar) => gen_extract_scalar(offset, scalar),
|
||||
GpuType::Scalar(scalar) => {
|
||||
let setup = match scalar {
|
||||
GpuScalar::F16 => {
|
||||
if preload {
|
||||
String::new()
|
||||
} else {
|
||||
let ix = offset / 4;
|
||||
format!(" vec2 halves{} = unpackHalf2x16(raw{});\n", ix, ix)
|
||||
}
|
||||
}
|
||||
_ => String::new(),
|
||||
};
|
||||
|
||||
(setup, gen_extract_scalar(offset, scalar))
|
||||
}
|
||||
GpuType::Vector(scalar, size) => {
|
||||
let mut r = glsl_type(ty);
|
||||
r.push_str("(");
|
||||
let is_f16 = match scalar {
|
||||
GpuScalar::F16 => true,
|
||||
_ => false,
|
||||
};
|
||||
|
||||
let mut setup = String::new();
|
||||
let mut extract = glsl_type(ty);
|
||||
&extract.push_str("(");
|
||||
for i in 0..*size {
|
||||
if i != 0 {
|
||||
r.push_str(", ");
|
||||
&extract.push_str(", ");
|
||||
}
|
||||
|
||||
if is_f16 && i % 2 == 0 {
|
||||
let ix = (offset + i * scalar.size()) / 4;
|
||||
let s = format!(" vec2 halves{} = unpackHalf2x16(raw{});\n", ix, ix);
|
||||
setup.push_str(&s);
|
||||
};
|
||||
|
||||
let el_offset = offset + i * scalar.size();
|
||||
r.push_str(&gen_extract_scalar(el_offset, scalar));
|
||||
&extract.push_str(&gen_extract_scalar(el_offset, scalar));
|
||||
}
|
||||
r.push_str(")");
|
||||
r
|
||||
&extract.push_str(")");
|
||||
(setup, extract)
|
||||
}
|
||||
GpuType::InlineStruct(name) => format!(
|
||||
"{}_read({}Ref({}))",
|
||||
name,
|
||||
name,
|
||||
simplified_add("ref.offset", offset)
|
||||
GpuType::InlineStruct(name) => (
|
||||
String::new(),
|
||||
format!(
|
||||
"{}_read({}Ref({}))",
|
||||
name,
|
||||
name,
|
||||
simplified_add("ref.offset", offset)
|
||||
),
|
||||
),
|
||||
GpuType::Ref(inner) => {
|
||||
if let GpuType::InlineStruct(name) = inner.deref() {
|
||||
format!(
|
||||
"{}Ref({})",
|
||||
name,
|
||||
gen_extract_scalar(offset, &GpuScalar::U32)
|
||||
(
|
||||
String::new(),
|
||||
format!(
|
||||
"{}Ref({})",
|
||||
name,
|
||||
gen_extract_scalar(offset, &GpuScalar::U32)
|
||||
),
|
||||
)
|
||||
} else {
|
||||
panic!("only know how to deal with Ref of struct")
|
||||
|
@ -174,7 +222,7 @@ fn gen_extract(offset: usize, ty: &GpuType) -> String {
|
|||
|
||||
fn gen_extract_scalar(offset: usize, ty: &GpuScalar) -> String {
|
||||
match ty {
|
||||
GpuScalar::F32 => format!("uintBitsToFloat(raw{})", offset / 4),
|
||||
GpuScalar::F16 | GpuScalar::F32 => extract_fbits(offset, ty.size()),
|
||||
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => extract_ubits(offset, ty.size()),
|
||||
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => extract_ibits(offset, ty.size()),
|
||||
}
|
||||
|
@ -210,8 +258,41 @@ fn extract_ibits(offset: usize, nbytes: usize) -> String {
|
|||
}
|
||||
}
|
||||
|
||||
fn extract_fbits(offset: usize, nbytes: usize) -> String {
|
||||
match nbytes {
|
||||
4 => format!("uintBitsToFloat(raw{})", offset / 4),
|
||||
2 => match offset % 4 {
|
||||
0 => {
|
||||
let ix = offset / 4;
|
||||
format!("halves{}.x", ix)
|
||||
}
|
||||
2 => format!("halves{}.y", offset / 4),
|
||||
_ => panic!("unexpected packing of f16 at offset {}", offset % 4),
|
||||
},
|
||||
_ => {
|
||||
panic!("unexpected extraction of float with nbytes = {}", nbytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Writing
|
||||
|
||||
fn is_f16(ty: &GpuType) -> bool {
|
||||
match ty {
|
||||
GpuType::Scalar(GpuScalar::F16) => true,
|
||||
GpuType::Vector(GpuScalar::F16, _) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn is_f16_pair(field_ixs: &[usize], fields: &[(String, usize, LayoutType)]) -> bool {
|
||||
if field_ixs.len() == 2 {
|
||||
fields.iter().all(|(_, _, t)| is_f16(&t.ty))
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
fn gen_struct_write(
|
||||
r: &mut String,
|
||||
bufname: &str,
|
||||
|
@ -221,39 +302,78 @@ fn gen_struct_write(
|
|||
writeln!(r, "void {}_write({}Ref ref, {} s) {{", name, name, name).unwrap();
|
||||
writeln!(r, " uint ix = ref.offset >> 2;").unwrap();
|
||||
let coverage = crate::layout::struct_coverage(fields, true);
|
||||
|
||||
for (i, field_ixs) in coverage.iter().enumerate() {
|
||||
let mut pieces = Vec::new();
|
||||
for field_ix in field_ixs {
|
||||
let (name, offset, ty) = &fields[*field_ix];
|
||||
match &ty.ty {
|
||||
GpuType::Scalar(scalar) => {
|
||||
let inner = format!("s.{}", name);
|
||||
pieces.push(gen_pack_bits_scalar(scalar, *offset, &inner));
|
||||
}
|
||||
GpuType::Vector(scalar, len) => {
|
||||
let size = scalar.size();
|
||||
let ix_lo = (i * 4 - offset) / size;
|
||||
let ix_hi = ((4 + i * 4 - offset) / size).min(*len);
|
||||
for ix in ix_lo..ix_hi {
|
||||
let scalar_offset = offset + ix * size;
|
||||
let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]);
|
||||
pieces.push(gen_pack_bits_scalar(scalar, scalar_offset, &inner));
|
||||
|
||||
if is_f16_pair(field_ixs, fields) {
|
||||
let (ix0, ix1) = (field_ixs[0], field_ixs[1]);
|
||||
let inner0 = format!("s.{}", fields[ix0].0);
|
||||
let inner1 = format!("s.{}", fields[ix1].0);
|
||||
pieces.push(format!("packHalf2x16(vec2({}, {}))", &inner0, &inner1));
|
||||
} else {
|
||||
for field_ix in field_ixs {
|
||||
let (name, offset, ty) = &fields[*field_ix];
|
||||
match &ty.ty {
|
||||
GpuType::Scalar(scalar) => {
|
||||
let inner = format!("s.{}", name);
|
||||
pieces.push(gen_pack_bits_scalar(scalar, *offset, &inner));
|
||||
}
|
||||
GpuType::Vector(scalar, len) => {
|
||||
let size = scalar.size();
|
||||
let ix_lo = (i * 4 - offset) / size;
|
||||
let ix_hi = ((4 + i * 4 - offset) / size).min(*len);
|
||||
match scalar {
|
||||
GpuScalar::F16 => {
|
||||
if ix_hi - ix_lo == 2 {
|
||||
let inner0 =
|
||||
format!("s.{}.{}", name, &"xyzw"[ix_lo..ix_lo + 1]);
|
||||
let inner1 =
|
||||
format!("s.{}.{}", name, &"xyzw"[ix_lo + 1..ix_hi]);
|
||||
pieces.push(format!(
|
||||
"packHalf2x16(vec2({}, {}))",
|
||||
&inner0, &inner1
|
||||
));
|
||||
} else {
|
||||
let ix = ix_lo;
|
||||
let scalar_offset = offset + ix * size;
|
||||
let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]);
|
||||
pieces.push(gen_pack_bits_scalar(
|
||||
scalar,
|
||||
scalar_offset,
|
||||
&inner,
|
||||
));
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
for ix in ix_lo..ix_hi {
|
||||
let scalar_offset = offset + ix * size;
|
||||
let inner = format!("s.{}.{}", name, &"xyzw"[ix..ix + 1]);
|
||||
pieces.push(gen_pack_bits_scalar(
|
||||
scalar,
|
||||
scalar_offset,
|
||||
&inner,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
GpuType::InlineStruct(structname) => {
|
||||
writeln!(
|
||||
r,
|
||||
" {}_write({}Ref({}), s.{});",
|
||||
structname,
|
||||
structname,
|
||||
simplified_add("ref.offset", *offset),
|
||||
name
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
GpuType::Ref(_) => pieces.push(format!("s.{}.offset", name)),
|
||||
}
|
||||
GpuType::InlineStruct(structname) => {
|
||||
writeln!(
|
||||
r,
|
||||
" {}_write({}Ref({}), s.{});",
|
||||
structname,
|
||||
structname,
|
||||
simplified_add("ref.offset", *offset),
|
||||
name
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
GpuType::Ref(_) => pieces.push(format!("s.{}.offset", name)),
|
||||
}
|
||||
}
|
||||
|
||||
if !pieces.is_empty() {
|
||||
write!(r, " {}[ix + {}] = ", bufname, i).unwrap();
|
||||
for (j, piece) in pieces.iter().enumerate() {
|
||||
|
@ -271,6 +391,7 @@ fn gen_struct_write(
|
|||
fn gen_pack_bits_scalar(ty: &GpuScalar, offset: usize, inner: &str) -> String {
|
||||
let shift = (offset % 4) * 8;
|
||||
let bits = match ty {
|
||||
GpuScalar::F16 => format!("packHalf2x16(vec2({}, 0.0)) & 0xffff", inner),
|
||||
GpuScalar::F32 => format!("floatBitsToUint({})", inner),
|
||||
// Note: this doesn't mask small unsigned int types; the caller is
|
||||
// responsible for making sure they don't overflow.
|
||||
|
@ -367,7 +488,7 @@ fn glsl_type(ty: &GpuType) -> String {
|
|||
// GLSL type that can contain the scalar value.
|
||||
fn glsl_scalar(s: &GpuScalar) -> &'static str {
|
||||
match s {
|
||||
GpuScalar::F32 => "float",
|
||||
GpuScalar::F16 | GpuScalar::F32 => "float",
|
||||
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "int",
|
||||
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => "uint",
|
||||
}
|
||||
|
@ -375,7 +496,7 @@ fn glsl_scalar(s: &GpuScalar) -> &'static str {
|
|||
|
||||
fn glsl_vecname(s: &GpuScalar) -> &'static str {
|
||||
match s {
|
||||
GpuScalar::F32 => "vec",
|
||||
GpuScalar::F16 | GpuScalar::F32 => "vec",
|
||||
GpuScalar::I8 | GpuScalar::I16 | GpuScalar::I32 => "ivec",
|
||||
GpuScalar::U8 | GpuScalar::U16 | GpuScalar::U32 => "uvec",
|
||||
}
|
||||
|
|
|
@ -12,14 +12,14 @@ use syn::{
|
|||
/// A scalar that can be represented in a packed data structure.
|
||||
#[derive(Clone, Copy, PartialEq)]
|
||||
pub enum GpuScalar {
|
||||
F16,
|
||||
F32,
|
||||
I8,
|
||||
I16,
|
||||
I32,
|
||||
F32,
|
||||
U8,
|
||||
U16,
|
||||
U32,
|
||||
// TODO: Add F16
|
||||
}
|
||||
|
||||
/// An algebraic datatype.
|
||||
|
@ -52,6 +52,7 @@ impl GpuScalar {
|
|||
fn from_syn(ty: &syn::Type) -> Option<Self> {
|
||||
ty_as_single_ident(ty).and_then(|ident| match ident.as_str() {
|
||||
"f32" => Some(GpuScalar::F32),
|
||||
"f16" => Some(GpuScalar::F16),
|
||||
"i8" => Some(GpuScalar::I8),
|
||||
"i16" => Some(GpuScalar::I16),
|
||||
"i32" => Some(GpuScalar::I32),
|
||||
|
@ -70,7 +71,7 @@ impl GpuScalar {
|
|||
match self {
|
||||
GpuScalar::F32 | GpuScalar::I32 | GpuScalar::U32 => 4,
|
||||
GpuScalar::I8 | GpuScalar::U8 => 1,
|
||||
GpuScalar::I16 | GpuScalar::U16 => 2,
|
||||
GpuScalar::F16 | GpuScalar::I16 | GpuScalar::U16 => 2,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,3 +9,4 @@ keywords = ["graphics", "2d"]
|
|||
|
||||
[dependencies]
|
||||
piet-gpu-derive = { path = "../piet-gpu-derive" }
|
||||
half = "1.5.0"
|
||||
|
|
|
@ -1,4 +1,5 @@
|
|||
pub mod encoder;
|
||||
pub mod ptcl;
|
||||
pub mod scene;
|
||||
pub mod test;
|
||||
pub mod tilegroup;
|
||||
|
|
|
@ -7,6 +7,7 @@ fn main() {
|
|||
"scene" => print!("{}", piet_gpu_types::scene::gen_gpu_scene()),
|
||||
"tilegroup" => print!("{}", piet_gpu_types::tilegroup::gen_gpu_tilegroup()),
|
||||
"ptcl" => print!("{}", piet_gpu_types::ptcl::gen_gpu_ptcl()),
|
||||
"test" => print!("{}", piet_gpu_types::test::gen_gpu_test()),
|
||||
_ => println!("Oops, unknown module name"),
|
||||
}
|
||||
}
|
||||
|
|
33
piet-gpu-types/src/test.rs
Normal file
33
piet-gpu-types/src/test.rs
Normal file
|
@ -0,0 +1,33 @@
|
|||
use piet_gpu_derive::piet_gpu;
|
||||
|
||||
piet_gpu! {
|
||||
#[rust_encode]
|
||||
#[gpu_write]
|
||||
mod test {
|
||||
struct StructA {
|
||||
a: f16,
|
||||
b: f16,
|
||||
}
|
||||
|
||||
struct StructB {
|
||||
a: f16,
|
||||
b: u16,
|
||||
c: f16,
|
||||
}
|
||||
|
||||
struct StructC {
|
||||
a: f16,
|
||||
b: u16,
|
||||
c: u16,
|
||||
d: f16,
|
||||
}
|
||||
|
||||
struct StructD {
|
||||
a: [f16; 2],
|
||||
}
|
||||
|
||||
struct StructE {
|
||||
a: [f16; 3],
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue