From f59e4ad32228a354fe77e4098d1e2b26e96dd5aa Mon Sep 17 00:00:00 2001 From: Corwin Date: Fri, 7 Apr 2023 01:40:27 +0100 Subject: [PATCH] various micro optimisations --- .../display/object/unmanaged/attributes.rs | 33 ++++---- agb/src/display/object/unmanaged/object.rs | 82 +++++++++++-------- 2 files changed, 64 insertions(+), 51 deletions(-) diff --git a/agb/src/display/object/unmanaged/attributes.rs b/agb/src/display/object/unmanaged/attributes.rs index 2ae52059..2ae0d948 100644 --- a/agb/src/display/object/unmanaged/attributes.rs +++ b/agb/src/display/object/unmanaged/attributes.rs @@ -35,23 +35,26 @@ pub enum AffineMode { } impl Attributes { - pub fn bytes(self) -> [u8; 6] { + pub fn write(self, ptr: *mut u16) { let mode = self.a0.object_mode(); - let attrs = match mode { - ObjectMode::Normal => [ - self.a0.into_bytes(), - self.a1s.into_bytes(), - self.a2.into_bytes(), - ], - _ => [ - self.a0.into_bytes(), - self.a1a.into_bytes(), - self.a2.into_bytes(), - ], - }; + unsafe { + let attrs = core::mem::transmute::<_, [u16; 3]>(match mode { + ObjectMode::Normal => [ + self.a0.into_bytes(), + self.a1s.into_bytes(), + self.a2.into_bytes(), + ], + _ => [ + self.a0.into_bytes(), + self.a1a.into_bytes(), + self.a2.into_bytes(), + ], + }); - // Safety: length and alignment are the same, and every possible value is valid - unsafe { core::mem::transmute(attrs) } + ptr.add(0).write_volatile(attrs[0]); + ptr.add(1).write_volatile(attrs[1]); + ptr.add(2).write_volatile(attrs[2]); + } } pub fn is_visible(self) -> bool { diff --git a/agb/src/display/object/unmanaged/object.rs b/agb/src/display/object/unmanaged/object.rs index f0d4e1d2..fb1c42de 100644 --- a/agb/src/display/object/unmanaged/object.rs +++ b/agb/src/display/object/unmanaged/object.rs @@ -13,11 +13,12 @@ use crate::display::{ use super::attributes::{AffineMode, Attributes}; -#[derive(Default, Debug)] +#[derive(Debug)] struct OamFrameModifyables { this_frame_sprites: Vec, frame: u32, affine_matrix_count: u32, + previous_index: usize, } pub struct OamUnmanaged<'gba> { @@ -47,60 +48,62 @@ impl Drop for OamSlot<'_> { impl OamSlot<'_> { /// Set the slot in OAM to contain the sprite given. - pub fn set(mut self, object: &ObjectUnmanaged) { - let mut attributes = object.attributes; - // SAFETY: This function is not reentrant and we currently hold a mutable borrow of the [UnmanagedOAM]. - let frame_data = unsafe { &mut *self.frame_data.get() }; - - Self::handle_affine(&mut attributes, frame_data, object); - self.set_bytes(attributes.bytes()); - - frame_data.this_frame_sprites.push(object.sprite.clone()); + #[inline(always)] + pub fn set(self, object: &ObjectUnmanaged) { + self.set_inner(object); // don't call the drop implementation. // okay as none of the fields we have have drop implementations. core::mem::forget(self); } + /// By writing these as two separate functions, one inlined and one not, the + /// compiler doesn't have to copy around the slot structure while still + /// keeping move semantics. This is slightly faster in benchmarks. + #[inline(never)] + fn set_inner(&self, object: &ObjectUnmanaged) { + let mut attributes = object.attributes; + // SAFETY: This function is not reentrant and we currently hold a mutable borrow of the [UnmanagedOAM]. + let frame_data = unsafe { &mut *self.frame_data.get() }; + + if let Some(affine_matrix) = &object.affine_matrix { + Self::handle_affine(&mut attributes, frame_data, affine_matrix); + } + attributes.write(unsafe { (OBJECT_ATTRIBUTE_MEMORY as *mut u16).add(self.slot * 4) }); + + frame_data.this_frame_sprites.push(object.sprite.clone()); + } + fn handle_affine( attributes: &mut Attributes, frame_data: &mut OamFrameModifyables, - object: &ObjectUnmanaged, + affine_matrix: &AffineMatrixVram, ) { - if let Some(affine_matrix) = &object.affine_matrix { - if affine_matrix.frame_count() != frame_data.frame { - affine_matrix.set_frame_count(frame_data.frame); - assert!( - frame_data.affine_matrix_count <= 32, - "too many affine matricies in one frame" - ); - affine_matrix.set_location(frame_data.affine_matrix_count); - frame_data.affine_matrix_count += 1; - affine_matrix.write_to_location(OBJECT_ATTRIBUTE_MEMORY); - } - - attributes.set_affine_matrix(affine_matrix.location() as u16); + if affine_matrix.frame_count() != frame_data.frame { + affine_matrix.set_frame_count(frame_data.frame); + assert!( + frame_data.affine_matrix_count <= 32, + "too many affine matricies in one frame" + ); + affine_matrix.set_location(frame_data.affine_matrix_count); + frame_data.affine_matrix_count += 1; + affine_matrix.write_to_location(OBJECT_ATTRIBUTE_MEMORY); } - } - fn set_bytes(&mut self, bytes: [u8; 6]) { - unsafe { - let address = (OBJECT_ATTRIBUTE_MEMORY as *mut u8).add(self.slot * 8); - address.copy_from_nonoverlapping(bytes.as_ptr(), bytes.len()); - } + attributes.set_affine_matrix(affine_matrix.location() as u16); } } impl<'oam> Iterator for OamIterator<'oam> { type Item = OamSlot<'oam>; + #[inline(always)] fn next(&mut self) -> Option { let idx = self.index; - self.index += 1; - - if idx >= 128 { + if idx == 128 { None } else { + self.index += 1; Some(OamSlot { slot: idx, frame_data: self.frame_data, @@ -112,13 +115,15 @@ impl<'oam> Iterator for OamIterator<'oam> { impl Drop for OamIterator<'_> { fn drop(&mut self) { let number_writen = self.index; + let last_frame_written = unsafe { &mut (*self.frame_data.get()).previous_index }; - for idx in number_writen..128 { + for idx in number_writen..*last_frame_written { unsafe { let ptr = (OBJECT_ATTRIBUTE_MEMORY as *mut u16).add(idx * 4); ptr.write_volatile(0b10 << 8); } } + *last_frame_written = number_writen; } } @@ -130,7 +135,7 @@ impl OamUnmanaged<'_> { // We drain the previous frame sprites here to reuse the Vecs allocation and remove the now unused sprites. // Any sprites currently being shown will now be put in the new Vec. - self.previous_frame_sprites.drain(..); + self.previous_frame_sprites.clear(); core::mem::swap( &mut frame_data.this_frame_sprites, &mut self.previous_frame_sprites, @@ -144,7 +149,12 @@ impl OamUnmanaged<'_> { pub(crate) fn new() -> Self { Self { - frame_data: Default::default(), + frame_data: UnsafeCell::new(OamFrameModifyables { + this_frame_sprites: Vec::new(), + frame: 0, + affine_matrix_count: 0, + previous_index: 0, + }), phantom: PhantomData, previous_frame_sprites: Default::default(), }