various micro optimisations

This commit is contained in:
Corwin 2023-04-07 01:40:27 +01:00
parent 4f19d6c240
commit f59e4ad322
No known key found for this signature in database
2 changed files with 64 additions and 51 deletions

View file

@ -35,9 +35,10 @@ pub enum AffineMode {
} }
impl Attributes { impl Attributes {
pub fn bytes(self) -> [u8; 6] { pub fn write(self, ptr: *mut u16) {
let mode = self.a0.object_mode(); let mode = self.a0.object_mode();
let attrs = match mode { unsafe {
let attrs = core::mem::transmute::<_, [u16; 3]>(match mode {
ObjectMode::Normal => [ ObjectMode::Normal => [
self.a0.into_bytes(), self.a0.into_bytes(),
self.a1s.into_bytes(), self.a1s.into_bytes(),
@ -48,10 +49,12 @@ impl Attributes {
self.a1a.into_bytes(), self.a1a.into_bytes(),
self.a2.into_bytes(), self.a2.into_bytes(),
], ],
}; });
// Safety: length and alignment are the same, and every possible value is valid ptr.add(0).write_volatile(attrs[0]);
unsafe { core::mem::transmute(attrs) } ptr.add(1).write_volatile(attrs[1]);
ptr.add(2).write_volatile(attrs[2]);
}
} }
pub fn is_visible(self) -> bool { pub fn is_visible(self) -> bool {

View file

@ -13,11 +13,12 @@ use crate::display::{
use super::attributes::{AffineMode, Attributes}; use super::attributes::{AffineMode, Attributes};
#[derive(Default, Debug)] #[derive(Debug)]
struct OamFrameModifyables { struct OamFrameModifyables {
this_frame_sprites: Vec<SpriteVram>, this_frame_sprites: Vec<SpriteVram>,
frame: u32, frame: u32,
affine_matrix_count: u32, affine_matrix_count: u32,
previous_index: usize,
} }
pub struct OamUnmanaged<'gba> { pub struct OamUnmanaged<'gba> {
@ -47,27 +48,37 @@ impl Drop for OamSlot<'_> {
impl OamSlot<'_> { impl OamSlot<'_> {
/// Set the slot in OAM to contain the sprite given. /// Set the slot in OAM to contain the sprite given.
pub fn set(mut self, object: &ObjectUnmanaged) { #[inline(always)]
let mut attributes = object.attributes; pub fn set(self, object: &ObjectUnmanaged) {
// SAFETY: This function is not reentrant and we currently hold a mutable borrow of the [UnmanagedOAM]. self.set_inner(object);
let frame_data = unsafe { &mut *self.frame_data.get() };
Self::handle_affine(&mut attributes, frame_data, object);
self.set_bytes(attributes.bytes());
frame_data.this_frame_sprites.push(object.sprite.clone());
// don't call the drop implementation. // don't call the drop implementation.
// okay as none of the fields we have have drop implementations. // okay as none of the fields we have have drop implementations.
core::mem::forget(self); core::mem::forget(self);
} }
/// By writing these as two separate functions, one inlined and one not, the
/// compiler doesn't have to copy around the slot structure while still
/// keeping move semantics. This is slightly faster in benchmarks.
#[inline(never)]
fn set_inner(&self, object: &ObjectUnmanaged) {
let mut attributes = object.attributes;
// SAFETY: This function is not reentrant and we currently hold a mutable borrow of the [UnmanagedOAM].
let frame_data = unsafe { &mut *self.frame_data.get() };
if let Some(affine_matrix) = &object.affine_matrix {
Self::handle_affine(&mut attributes, frame_data, affine_matrix);
}
attributes.write(unsafe { (OBJECT_ATTRIBUTE_MEMORY as *mut u16).add(self.slot * 4) });
frame_data.this_frame_sprites.push(object.sprite.clone());
}
fn handle_affine( fn handle_affine(
attributes: &mut Attributes, attributes: &mut Attributes,
frame_data: &mut OamFrameModifyables, frame_data: &mut OamFrameModifyables,
object: &ObjectUnmanaged, affine_matrix: &AffineMatrixVram,
) { ) {
if let Some(affine_matrix) = &object.affine_matrix {
if affine_matrix.frame_count() != frame_data.frame { if affine_matrix.frame_count() != frame_data.frame {
affine_matrix.set_frame_count(frame_data.frame); affine_matrix.set_frame_count(frame_data.frame);
assert!( assert!(
@ -81,26 +92,18 @@ impl OamSlot<'_> {
attributes.set_affine_matrix(affine_matrix.location() as u16); attributes.set_affine_matrix(affine_matrix.location() as u16);
} }
}
fn set_bytes(&mut self, bytes: [u8; 6]) {
unsafe {
let address = (OBJECT_ATTRIBUTE_MEMORY as *mut u8).add(self.slot * 8);
address.copy_from_nonoverlapping(bytes.as_ptr(), bytes.len());
}
}
} }
impl<'oam> Iterator for OamIterator<'oam> { impl<'oam> Iterator for OamIterator<'oam> {
type Item = OamSlot<'oam>; type Item = OamSlot<'oam>;
#[inline(always)]
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
let idx = self.index; let idx = self.index;
self.index += 1; if idx == 128 {
if idx >= 128 {
None None
} else { } else {
self.index += 1;
Some(OamSlot { Some(OamSlot {
slot: idx, slot: idx,
frame_data: self.frame_data, frame_data: self.frame_data,
@ -112,13 +115,15 @@ impl<'oam> Iterator for OamIterator<'oam> {
impl Drop for OamIterator<'_> { impl Drop for OamIterator<'_> {
fn drop(&mut self) { fn drop(&mut self) {
let number_writen = self.index; let number_writen = self.index;
let last_frame_written = unsafe { &mut (*self.frame_data.get()).previous_index };
for idx in number_writen..128 { for idx in number_writen..*last_frame_written {
unsafe { unsafe {
let ptr = (OBJECT_ATTRIBUTE_MEMORY as *mut u16).add(idx * 4); let ptr = (OBJECT_ATTRIBUTE_MEMORY as *mut u16).add(idx * 4);
ptr.write_volatile(0b10 << 8); ptr.write_volatile(0b10 << 8);
} }
} }
*last_frame_written = number_writen;
} }
} }
@ -130,7 +135,7 @@ impl OamUnmanaged<'_> {
// We drain the previous frame sprites here to reuse the Vecs allocation and remove the now unused sprites. // We drain the previous frame sprites here to reuse the Vecs allocation and remove the now unused sprites.
// Any sprites currently being shown will now be put in the new Vec. // Any sprites currently being shown will now be put in the new Vec.
self.previous_frame_sprites.drain(..); self.previous_frame_sprites.clear();
core::mem::swap( core::mem::swap(
&mut frame_data.this_frame_sprites, &mut frame_data.this_frame_sprites,
&mut self.previous_frame_sprites, &mut self.previous_frame_sprites,
@ -144,7 +149,12 @@ impl OamUnmanaged<'_> {
pub(crate) fn new() -> Self { pub(crate) fn new() -> Self {
Self { Self {
frame_data: Default::default(), frame_data: UnsafeCell::new(OamFrameModifyables {
this_frame_sprites: Vec::new(),
frame: 0,
affine_matrix_count: 0,
previous_index: 0,
}),
phantom: PhantomData, phantom: PhantomData,
previous_frame_sprites: Default::default(), previous_frame_sprites: Default::default(),
} }