From 51dbb749ef711b68bb0a0f6d8e8e5ce079de73e1 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Mon, 24 Dec 2018 15:43:36 -0700 Subject: [PATCH 1/5] DMA first draft --- .travis.yml | 2 + examples/light_cycle.rs | 2 +- src/io.rs | 1 + src/io/dma.rs | 209 ++++++++++++++++++++++++++++++++++++++++ src/video.rs | 13 ++- 5 files changed, 224 insertions(+), 3 deletions(-) create mode 100644 src/io/dma.rs diff --git a/.travis.yml b/.travis.yml index ec6794f..9afa017 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,6 +17,8 @@ before_script: - cargo install-update -a script: + # Travis seems to cache for some dumb reason, but we don't want that at all. + - rm -fr target # Obtain the devkitPro tools, using `target/` as a temp directory - mkdir -p target - cd target diff --git a/examples/light_cycle.rs b/examples/light_cycle.rs index ee367f9..6b47d57 100644 --- a/examples/light_cycle.rs +++ b/examples/light_cycle.rs @@ -46,7 +46,7 @@ fn main(_argc: isize, _argv: *const *const u8) -> isize { let color_here = Mode3::read_pixel(px, py); if color_here != Some(BLACK) { // crashed into our own line, reset the screen - Mode3::clear_to(BLACK); + Mode3::dma_clear_to(BLACK); color = color.rotate_left(5); } else { // draw the new part of the line diff --git a/src/io.rs b/src/io.rs index c0f5424..4248d94 100644 --- a/src/io.rs +++ b/src/io.rs @@ -11,4 +11,5 @@ use super::*; use gba_proc_macro::register_bit; pub mod display; +pub mod dma; pub mod keypad; diff --git a/src/io/dma.rs b/src/io/dma.rs new file mode 100644 index 0000000..0fb2df2 --- /dev/null +++ b/src/io/dma.rs @@ -0,0 +1,209 @@ +//! Module for using the four Direct Memory Access (DMA) units. +//! +//! The GBA has four DMA units, numbered 0 through 3. If you ever try to have +//! more than one active at once the lowest numbered DMA will take priority and +//! complete first. Any use of DMA halts the CPU's operation. DMA can also be +//! configured to activate automatically at certain times, and when configured +//! like that the CPU runs in between the automatic DMA activations. (This is +//! actually the intended method for doing sound.) Each DMA unit has an intended +//! use: +//! +//! * DMA0: highest priority, but can only read from internal memory. +//! * DMA1/DMA2: Intended for sound transfers. +//! * DMA3: Can be used to write into Game Pak ROM / FlashROM (not SRAM). +//! +//! ## DMA Anatomy +//! +//! Each DMA is utilized via a combination four IO registers: +//! +//! * **Source Address:** (`*const u32`) Where to read from. DMA0 can only read +//! from internal memory, the other units can read from any non-SRAM memory. +//! * **Destination Address:** (`*mut u32`) Where to write to. DMA0/1/2 can only +//! write to internal memory, DMA3 can write to any non-SRAM memory. +//! * **Word Count:** (`u16`) How many units to transfer. Despite being called +//! "word count" you can also use DMA to transfer half-words. DMA0/1/2 are +//! limited to a 14-bit counter value, DMA3 allowed the full 16-bit range to +//! be used for the counter. Note that even when transferring half-words you +//! MUST have both Source and Destination be 32-bit aligned. +//! * **Control:** (`DMAControlSetting`) This is one of those fiddly bit-flag +//! registers with all sorts of settings. See the type for more info. +//! +//! Note that Source, Destination, and Count are all read-only, while the +//! Control is read/write. When a DMA unit is _Enabled_ it copies the relevent +//! Source, Destination, and Count values into its own internal registers (so a +//! second Enable will reuse the old values). If the DMA _Repeats_ it re-copies +//! the Count, and also the Destination if +//! `DMADestAddressControl::IncrementReload` is configured in the Control, but +//! not the Source. +//! +//! When the DMA completes the Enable bit will be cleared from the Control, +//! unless the Repeat bit is set in the Control, in which case the Enable bit is +//! left active and the DMA will automatically activate again at the right time +//! (depending on the Start Timing setting). You have to manually turn off the +//! correct bit to stop the DMA unit. +//! +//! ## Safety +//! +//! As you might have noticed by now, utilizing DMA can be very fiddly. It moves +//! around bytes with no concern for the type system, including the `Clone` and +//! `Copy` traits that Rust relies on. Use of DMA can be made _somewhat safe_ +//! via wrapper methods (such as those we've provided), but it's fundamentally +//! an unsafe thing to use. +//! +//! ## DMA Can Cause Subtle Bugs +//! +//! Since the CPU is halted while DMA is active you can miss out on interrupts +//! that should have fired. This can cause any number of unintended effects. DMA +//! is primarily intended for loading large amounts of graphical data from ROM, +//! or loading sound data at regulated intervals to avoid pops and crackles. It +//! _can_ be used for general purpose bulk transfers but you are advised to use +//! restraint. + +use super::*; + +newtype! { + /// Allows you to configure a DMA unit. + #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] + DMAControlSetting, u16 +} +#[allow(missing_docs)] +impl DMAControlSetting { + pub const DEST_ADDR_CONTROL_MASK: u16 = 0b11 << 5; + pub fn dest_address_control(self) -> DMADestAddressControl { + // TODO: constify + match self.0 & Self::DEST_ADDR_CONTROL_MASK { + 0 => DMADestAddressControl::Increment, + 1 => DMADestAddressControl::Decrement, + 2 => DMADestAddressControl::Fixed, + 3 => DMADestAddressControl::IncrementReload, + _ => unsafe { core::hint::unreachable_unchecked() }, + } + } + pub const fn with_dest_address_control(self, new_control: DMADestAddressControl) -> Self { + Self((self.0 & !Self::DEST_ADDR_CONTROL_MASK) | ((new_control as u16) << 5)) + } + + pub const SRC_ADDR_CONTROL_MASK: u16 = 0b11 << 7; + pub fn src_address_control(self) -> DMASrcAddressControl { + // TODO: constify + match self.0 & Self::SRC_ADDR_CONTROL_MASK { + 0 => DMASrcAddressControl::Increment, + 1 => DMASrcAddressControl::Decrement, + 2 => DMASrcAddressControl::Fixed, + _ => unreachable!(), + } + } + pub const fn with_src_address_control(self, new_control: DMASrcAddressControl) -> Self { + Self((self.0 & !Self::SRC_ADDR_CONTROL_MASK) | ((new_control as u16) << 7)) + } + + register_bit!(REPEAT, u16, 1 << 9, repeat); + register_bit!(TRANSFER_U32, u16, 1 << 10, transfer_u32); + // TODO: Game Pak DRQ? (bit 11) DMA3 only, and requires specific hardware + + pub const START_TIMING_MASK: u16 = 0b11 << 12; + pub fn start_timing(self) -> DMAStartTiming { + // TODO: constify + match self.0 & Self::DEST_ADDR_CONTROL_MASK { + 0 => DMAStartTiming::Immediate, + 1 => DMAStartTiming::VBlank, + 2 => DMAStartTiming::HBlank, + 3 => DMAStartTiming::Special, + _ => unsafe { core::hint::unreachable_unchecked() }, + } + } + pub const fn with_start_timing(self, new_control: DMAStartTiming) -> Self { + Self((self.0 & !Self::START_TIMING_MASK) | ((new_control as u16) << 12)) + } + + register_bit!(IRQ_AT_END, u16, 1 << 14, irq_at_end); + register_bit!(ENABLE, u16, 1 << 15, enable); +} + +/// Sets how the destination address should be adjusted per data transfer. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u16)] +pub enum DMADestAddressControl { + /// Offset +1 + Increment = 0, + /// Offset -1 + Decrement = 1, + /// No change + Fixed = 2, + /// Offset +1 per transfer and auto-reset to base when the DMA repeats. + IncrementReload = 3, +} + +/// Sets how the source address should be adjusted per data transfer. +/// +/// Note that only 0,1,2 are allowed, 3 is prohibited. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u16)] +pub enum DMASrcAddressControl { + /// Offset +1 + Increment = 0, + /// Offset -1 + Decrement = 1, + /// No change + Fixed = 2, +} + +/// Sets when the DMA should activate. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u16)] +pub enum DMAStartTiming { + /// Causes the DMA to start as soon as possible (2 wait cycles after enabled) + Immediate = 0, + /// Start at VBlank + VBlank = 1, + /// Start at HBlank + HBlank = 2, + /// The special timing depends on the DMA it's used with: + /// * 0: Prohibited + /// * 1/2: Sound FIFO, + /// * 3: Video Capture, for transferring from memory/camera into VRAM + Special = 3, +} + +/// This is the "general purpose" DMA unit, with the fewest limits. +pub struct DMA3; +impl DMA3 { + /// DMA 3 Source Address, read only. + const DMA3SAD: VolAddress<*const u32> = unsafe { VolAddress::new_unchecked(0x400_00D4) }; + /// DMA 3 Destination Address, read only. + const DMA3DAD: VolAddress<*mut u32> = unsafe { VolAddress::new_unchecked(0x400_00D8) }; + /// DMA 3 Word Count, read only. + const DMA3CNT_L: VolAddress = unsafe { VolAddress::new_unchecked(0x400_00DC) }; + /// DMA 3 Control, read/write. + const DMA3CNT_H: VolAddress = unsafe { VolAddress::new_unchecked(0x400_00DE) }; + + /// Fills `count` slots (starting at `dest`) with the value at `src`. + /// + /// # Safety + /// + /// Both pointers must be aligned, and all positions specified for writing + /// must be valid for writing. + pub unsafe fn fill32(src: *const u32, dest: *mut u32, count: u16) { + const FILL_CONTROL: DMAControlSetting = DMAControlSetting::new() + .with_src_address_control(DMASrcAddressControl::Fixed) + .with_transfer_u32(true) + .with_enable(true); + // TODO: destination checking against SRAM + Self::DMA3SAD.write(src); + Self::DMA3DAD.write(dest); + Self::DMA3CNT_L.write(count); + Self::DMA3CNT_H.write(FILL_CONTROL); + // Note(Lokathor): Once DMA is set to activate it takes 2 cycles for it to + // kick in. You can do any non-DMA thing you like before that, but since + // it's only two cycles we just insert two NOP instructions to ensure that + // successive calls to `fill32` or other DMA methods don't interfere with + // each other. + asm!(/* ASM */ "NOP + NOP" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} diff --git a/src/video.rs b/src/video.rs index d6bb6c7..8e2586f 100644 --- a/src/video.rs +++ b/src/video.rs @@ -48,9 +48,11 @@ impl Mode3 { pub const VRAM: VolAddressBlock = unsafe { VolAddressBlock::new_unchecked(VolAddress::new_unchecked(VRAM_BASE_USIZE), Self::SCREEN_WIDTH * Self::SCREEN_HEIGHT) }; + const MODE3_U32_COUNT: u16 = (Self::SCREEN_WIDTH * Self::SCREEN_HEIGHT / 2) as u16; + /// private iterator over the pixels, two at a time const BULK_ITER: VolAddressIter = - unsafe { VolAddressBlock::new_unchecked(VolAddress::new_unchecked(VRAM_BASE_USIZE), Self::SCREEN_WIDTH * Self::SCREEN_HEIGHT / 2).iter() }; + unsafe { VolAddressBlock::new_unchecked(VolAddress::new_unchecked(VRAM_BASE_USIZE), Self::MODE3_U32_COUNT as usize).iter() }; /// Reads the pixel at the given (col,row). /// @@ -79,5 +81,12 @@ impl Mode3 { } } - // TODO: dma_clear_to? + /// Clears the whole screen to the desired color using DMA3. + pub fn dma_clear_to(color: Color) { + use crate::io::dma::DMA3; + + let color32 = color.0 as u32; + let bulk_color = color32 << 16 | color32; + unsafe { DMA3::fill32(&bulk_color, VRAM_BASE_USIZE as *mut u32, Self::MODE3_U32_COUNT) }; + } } From 1dcecb72cb80b695229fa945c73ba8b92244cc81 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Mon, 24 Dec 2018 16:02:47 -0700 Subject: [PATCH 2/5] added the downshifts. --- src/io/dma.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/io/dma.rs b/src/io/dma.rs index 0fb2df2..22b118e 100644 --- a/src/io/dma.rs +++ b/src/io/dma.rs @@ -71,7 +71,7 @@ impl DMAControlSetting { pub const DEST_ADDR_CONTROL_MASK: u16 = 0b11 << 5; pub fn dest_address_control(self) -> DMADestAddressControl { // TODO: constify - match self.0 & Self::DEST_ADDR_CONTROL_MASK { + match (self.0 & Self::DEST_ADDR_CONTROL_MASK) >> 5 { 0 => DMADestAddressControl::Increment, 1 => DMADestAddressControl::Decrement, 2 => DMADestAddressControl::Fixed, @@ -86,7 +86,7 @@ impl DMAControlSetting { pub const SRC_ADDR_CONTROL_MASK: u16 = 0b11 << 7; pub fn src_address_control(self) -> DMASrcAddressControl { // TODO: constify - match self.0 & Self::SRC_ADDR_CONTROL_MASK { + match (self.0 & Self::SRC_ADDR_CONTROL_MASK) >> 7 { 0 => DMASrcAddressControl::Increment, 1 => DMASrcAddressControl::Decrement, 2 => DMASrcAddressControl::Fixed, @@ -104,7 +104,7 @@ impl DMAControlSetting { pub const START_TIMING_MASK: u16 = 0b11 << 12; pub fn start_timing(self) -> DMAStartTiming { // TODO: constify - match self.0 & Self::DEST_ADDR_CONTROL_MASK { + match (self.0 & Self::DEST_ADDR_CONTROL_MASK) >> 12 { 0 => DMAStartTiming::Immediate, 1 => DMAStartTiming::VBlank, 2 => DMAStartTiming::HBlank, From 6a07f95973329743f2163fe4790acfa6d4b26953 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Mon, 24 Dec 2018 16:05:11 -0700 Subject: [PATCH 3/5] TODO for later --- src/io/dma.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/io/dma.rs b/src/io/dma.rs index 22b118e..ddfcc62 100644 --- a/src/io/dma.rs +++ b/src/io/dma.rs @@ -90,7 +90,7 @@ impl DMAControlSetting { 0 => DMASrcAddressControl::Increment, 1 => DMASrcAddressControl::Decrement, 2 => DMASrcAddressControl::Fixed, - _ => unreachable!(), + _ => unreachable!(), // TODO: custom error message? } } pub const fn with_src_address_control(self, new_control: DMASrcAddressControl) -> Self { From 745dea5da57957d6ca7216178e319ea00510fd22 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Mon, 24 Dec 2018 17:28:06 -0700 Subject: [PATCH 4/5] DMA description into the book --- book/src/04-non-video/03-dma.md | 132 ++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) diff --git a/book/src/04-non-video/03-dma.md b/book/src/04-non-video/03-dma.md index 08754f5..ef9c846 100644 --- a/book/src/04-non-video/03-dma.md +++ b/book/src/04-non-video/03-dma.md @@ -1 +1,133 @@ # Direct Memory Access + +The GBA has four Direct Memory Access (DMA) units that can be utilized. They're +mostly the same in terms of overall operation, but each unit has special rules +that make it better suited to a particular task. + +**Please Note:** TONC and GBATEK have slightly different concepts of how a DMA +unit's registers should be viewed. I've chosen to go by what GBATEK uses. + +## General DMA + +A single DMA unit is controlled through four different IO Registers. + +* **Source:** (`DMAxSAD`, read only) A `*const` pointer that the DMA reads from. +* **Destination:** (`DMAxDAD`, read only) A `*mut` pointer that the DMA writes + to. +* **Count:** (`DMAxCNT_L`, read only) How many transfers to perform. +* **Control:** (`DMAxCNT_H`, read/write) A register full of bit-flags that + controls all sorts of details. + +Here, the `x` is replaced with 0 through 3 when utilizing whichever particular +DMA unit. + +### Source Address + +This is either a `u32` or `u16` address depending on the unit's assigned +transfer mode (see Control). The address MUST be aligned. + +With DMA0 the source must be internal memory. With other DMA units the source +can be any non-`SRAM` location. + +### Destination Address + +As with the Source, this is either a `u32` or `u16` address depending on the +unit's assigned transfer mode (see Control). The address MUST be aligned. + +With DMA0/1/2 the destination must be internal memory. With DMA3 the destination +can be any non-`SRAM` memory (allowing writes into Game Pak ROM / FlashROM, +assuming that your Game Pak hardware supports that). + +### Count + +This is a `u16` that says how many transfers (`u16` or `u32`) to make. + +DMA0/1/2 will only actually accept a 14-bit value, while DMA3 will accept a full +16-bit value. A value of 0 instead acts as if you'd used the _maximum_ value for +the DMA in question. Put another way, DMA0/1/2 transfer `1` through `0x4000` +words, with `0` as the `0x4000` value, and DMA3 transfers `1` through `0x1_0000` +words, with `0` as the `0x1_0000` value. + +The maximum value isn't a very harsh limit. Even in just `u16` mode, `0x4000` +transfers is 32k, which would for example be all 32k of `IWRAM` (including your +own user stack). If you for some reason do need to transfer more than a single +DMA use can move around at once then you can just setup the DMA a second time +and keep going. + +### Control + +This `u16` bit-flag field is where things get wild. + +* Bits 0-4 do nothing +* Bit 5-6 control how the destination address changes per transfer: + * 0: Offset +1 + * 1: Offset -1 + * 2: No Change + * 3: Offset +1 and reload when a Repeat starts (below) +* Bit 7-8 similarly control how the source address changes per transfer: + * 0: Offset +1 + * 1: Offset -1 + * 2: No Change + * 3: Prohibited +* Bit 9: enables Repeat mode. +* Bit 10: Transfer `u16` (false) or `u32` (true) data. +* Bit 11: "Game Pak DRQ" flag. GBATEK says that this is only allowed for DMA3, + and also your Game Pak hardware must be equipped to use DRQ mode. I don't even + know what DRQ mode is all about, and GBATEK doesn't say much either. If DRQ is + set then you _must not_ set the Repeat bit as well. The `gba` crate simply + doesn't bother to expose this flag to users. +* Bit 12-13: DMA Start: + * 0: "Immediate", which is 2 cycles after requested. + * 1: VBlank + * 2: HBlank + * 3: Special, depending on what DMA unit is involved: + * DMA0: Prohibited. + * DMA1/2: Sound FIFO (see the [Sound](04-sound.md) section) + * DMA3: Video Capture, intended for use with the Repeat flag, performs a + transfer per scanline (similar to HBlank) starting at `VCOUNT` 2 and + stopping at `VCOUNT` 162. Intended for copying things from ROM or camera + into VRAM. +* Bit 14: Interrupt upon DMA complete. +* Bit 15: Enable this DMA unit. + +## DMA Life Cycle + +The general technique for using a DMA unit involves first setting the relevent +source, destination, and count registers, then setting the appropriate control +register value with the Enable bit set. + +Once the Enable flag is set the appropriate DMA unit will trigger at the +assigned time (Bit 12-13). The CPU's operation is halted while any DMA unit is +active, until the DMA completes its task. If more than one DMA unit is supposed +to be active at once, then the DMA unit with the lower number will activate and +complete before any others. + +When the DMA triggers via _Enable_, the `Source`, `Destination`, and `Count` +values are copied from the GBA's registers into the DMA unit's internal +registers. Changes to the DMA unit's internal copy of the data don't affect the +values in the GBA registers. Another _Enable_ will read the same values as +before. + +If DMA is triggered via having _Repeat_ active then _only_ the Count is copied +in to the DMA unit registers. The `Source` and `Destination` are unaffected +during a Repeat. The exception to this is if the destination address control +value (Bits 5-6) are set to 3 (`0b11`), in which case a _Repeat_ will also +re-copy the `Destination` as well as the `Count`. + +Once a DMA operation completes, the Enable flag of its Control register will +automatically be disabled, _unless_ the Repeat flag is on, in which case the +Enable flag is left active. You will have to manually disable it if you don't +want the DMA to kick in again over and over at the specified starting time. + +## DMA Limitations + +The DMA units cannot access `SRAM` at all. + +If you're using HBlank to access any part of the memory that the display +controller utilizes (`OAM`, `PALRAM`, `VRAM`), you need to have enabled the +"HBlank Interval Free" bit in the Display Control Register (`DISPCNT`). + +Whenever DMA is active the CPU is _not_ active, which means that +[Interrupts](05-interrupts.md) will not fire while DMA is happening. This can +cause any number of hard to track down bugs. Try to limit your use of the DMA +units if you can. From b183e9b6b482b353e51a9160ad6dbbd653d00fc5 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Mon, 24 Dec 2018 17:56:46 -0700 Subject: [PATCH 5/5] Allow limited unsafe access to DMA3 Possibly make a "DMA" trait to ensure that things stay uniform once we also have DMA0/1/2? --- src/io/dma.rs | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/src/io/dma.rs b/src/io/dma.rs index ddfcc62..f796f8c 100644 --- a/src/io/dma.rs +++ b/src/io/dma.rs @@ -177,6 +177,55 @@ impl DMA3 { /// DMA 3 Control, read/write. const DMA3CNT_H: VolAddress = unsafe { VolAddress::new_unchecked(0x400_00DE) }; + /// Assigns the source register. + /// + /// This register is read only, so it is not exposed directly. + /// + /// # Safety + /// + /// The source pointer must be aligned and valid to read from. + pub unsafe fn set_source(src: *const u32) { + Self::DMA3SAD.write(src) + } + + /// Assigns the destination register. + /// + /// This register is read only, so it is not exposed directly. + /// + /// # Safety + /// + /// The source pointer must be aligned and valid to write to. + pub unsafe fn set_dest(dest: *mut u32) { + Self::DMA3DAD.write(dest) + } + + /// Assigns the count register. + /// + /// This register is read only, so it is not exposed directly. + /// + /// # Safety + /// + /// The count given must specify a valid number of units to write, starting at + /// the assigned destination address. + pub unsafe fn set_count(count: u16) { + Self::DMA3CNT_L.write(count) + } + + /// Reads the current control setting. + pub fn control() -> DMAControlSetting { + Self::DMA3CNT_H.read() + } + + /// Writes the control setting given. + /// + /// # Safety + /// + /// You must ensure that the Source, Destination, and Count values are set + /// correctly **before** you activate the Enable bit. + pub unsafe fn set_control(setting: DMAControlSetting) { + Self::DMA3CNT_H.write(setting) + } + /// Fills `count` slots (starting at `dest`) with the value at `src`. /// /// # Safety