Merge pull request #36 from rust-console/lokathor

Much improved Quirks chapter, some Concepts work
2025-01-23 07:56:33 +11:00 · 2018-12-20 17:20:36 -07:00 · 2018-12-20 17:20:36 -07:00 · c666cc114d
commit c666cc114d
parent c7027e1db4 a1b85fa98f
43 changed files with 2906 additions and 1195 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -9,6 +9,7 @@ rust:

 before_script:
  - rustup component add rust-src
+  - rustup component add clippy
  - (test -x $HOME/.cargo/bin/cargo-install-update || cargo install cargo-update)
  - (test -x $HOME/.cargo/bin/cargo-xbuild || cargo install cargo-xbuild)
  - (test -x $HOME/.cargo/bin/cargo-make || cargo install cargo-make)
@ -27,9 +28,15 @@ script:
  - export PATH="$PATH:/opt/devkitpro/devkitARM/bin"
  - export PATH="$PATH:/opt/devkitpro/tools/bin"
  - cd ..
-  # Test the lib and then compile all examples with `cargo make`
-  - cargo test --lib && cargo test --lib --release
-  - cargo make
+  # Run all verificaions, both debug and release
+  - cargo clippy
+  - cargo clippy --release
+  - cargo test --no-fail-fast --lib
+  - cargo test --no-fail-fast --lib --release
+  - cargo test --no-fail-fast --tests
+  - cargo test --no-fail-fast --tests --release
+  # Let cargo make take over the rest
+  - cargo make build-all
  # Test build the book so that a failed book build kills this run
  - cd book && mdbook build

--- a/Cargo.toml
+++ b/Cargo.toml
@ -12,8 +12,13 @@ license = "Apache-2.0"
 publish = false

 [dependencies]
+typenum = "1.10"
 gba-proc-macro = "0.2.1"

+#[dev-dependencies]
+#quickcheck="0.7"
+# TODO: F
+
 [profile.release]
 lto = true
 panic = "abort"
--- a/Makefile.toml
+++ b/Makefile.toml
@ -55,12 +55,15 @@ fn main() -> std::io::Result<()> {
 '''
 ]

-[tasks.build]
-dependencies = ["build-examples-debug", "build-examples-release", "pack-roms"]
-
 [tasks.test]
 command = "cargo"
 args = ["test", "--lib"]

+[tasks.justrelease]
+dependencies = ["build-examples-release", "pack-roms"]
+
+[tasks.build-all]
+dependencies = ["build-examples-debug", "build-examples-release", "pack-roms"]
+
 [tasks.default]
-alias = "build"
+alias = "build-all"
--- a/book/src-bak/ch01/hello1.md
+++ b/book/src-bak/ch01/hello1.md
@ -1,115 +0,0 @@
-
-
-## A basic hello1 explanation
-
-So, what just happened? Even if you're used to Rust that might look pretty
-strange. We'll go over most of the little parts right here, and then bigger
-parts will get their own sections.
-
-```rust
-#![feature(start)]
-```
-
-This enables the [start
-feature](https://doc.rust-lang.org/beta/unstable-book/language-features/start.html),
-which you would normally be able to read about in the unstable book, except that
-the book tells you nothing at all except to look at the [tracking
-issue](https://github.com/rust-lang/rust/issues/29633).
-
-Basically, a GBA game is even more low-level than the _normal_ amount of
-low-level that you get from Rust, so we have to tell the compiler to account for
-that by specifying a `#[start]`, and we need this feature on to do that.
-
-```rust
-#![no_std]
-```
-
-There's no standard library available on the GBA, so we'll have to live a core
-only life.
-
-```rust
-#[panic_handler]
-fn panic(_info: &core::panic::PanicInfo) -> ! {
-  loop {}
-}
-```
-
-This sets our [panic
-handler](https://doc.rust-lang.org/nightly/nomicon/panic-handler.html).
-Basically, if we somehow trigger a panic, this is where the program goes.
-However, right now we don't know how to get any sort of message out to the user
-so... we do nothing at all. We _can't even return_ from here, so we just sit in
-an infinite loop. The player will have to reset the universe from the outside.
-
-```rust
-#[start]
-fn main(_argc: isize, _argv: *const *const u8) -> isize {
-```
-
-This is our `#[start]`. We call it `main`, but it's not like a `main` that you'd
-see in a Rust program. It's _more like_ the sort of `main` that you'd see in a C
-program, but it's still **not** that either. If you compile a `#[start]` program
-for a target with an OS such as `arm-none-eabi-nm` you can open up the debug
-info and see that your result will have the symbol for the C `main` along side
-the symbol for the start `main` that we write here. Our start `main` is just its
-own unique thing, and the inputs and outputs have to be like that because that's
-how `#[start]` is specified to work in Rust.
-
-If you think about it for a moment you'll probably realize that, those inputs
-and outputs are totally useless to us on a GBA. There's no OS on the GBA to call
-our program, and there's no place for our program to "return to" when it's done.
-
-Side note: if you want to learn more about stuff "before main gets called" you
-can watch a great [CppCon talk](https://www.youtube.com/watch?v=dOfucXtyEsU) by
-Matt Godbolt (yes, that Godbolt) where he delves into quite a bit of it. The
-talk doesn't really apply to the GBA, but it's pretty good.
-
-```rust
-  unsafe {
-```
-
-I hope you're all set for some `unsafe`, because there's a lot of it to be had.
-
-```rust
-    (0x04000000 as *mut u16).write_volatile(0x0403);
-```
-
-Sure!
-
-```rust
-    (0x06000000 as *mut u16).offset(120 + 80 * 240).write_volatile(0x001F);
-    (0x06000000 as *mut u16).offset(136 + 80 * 240).write_volatile(0x03E0);
-    (0x06000000 as *mut u16).offset(120 + 96 * 240).write_volatile(0x7C00);
-```
-
-Ah, of course.
-
-```rust
-    loop {}
-  }
-}
-```
-
-And, as mentioned above, there's no place for a GBA program to "return to", so
-we can't ever let `main` try to return there. Instead, we go into an infinite
-`loop` that does nothing. The fact that this doesn't ever return an `isize`
-value doesn't seem to bother Rust, because I guess we're at least not returning
-any other type of thing instead.
-
-Fun fact: unlike in C++, an infinite loop with no side effects isn't Undefined
-Behavior for us rustaceans... _semantically_. In truth LLVM has a [known
-bug](https://github.com/rust-lang/rust/issues/28728) in this area, so we won't
-actually be relying on empty loops in any future programs.
-
-## All Those Magic Numbers
-
-Alright, I cheated quite a bit in the middle there. The program works, but I
-didn't really tell you why because I didn't really tell you what any of those
-magic numbers mean or do.
-
-* `0x04000000` is the address of an IO Register called the Display Control.
-* `0x06000000` is the start of Video RAM.
-
-So we write some magic to the display control register once, then we write some
-other magic to three magic locations in the Video RAM. Somehow that shows three
-dots. Gotta read on to find out why!
--- a/book/src-bak/ch01/hello2.md
+++ b/book/src-bak/ch01/hello2.md
@ -1,132 +0,0 @@
-# hello2
-
-Okay so let's have a look again:
-
-`hello1`
-
-```rust
-#![feature(start)]
-#![no_std]
-
-#[panic_handler]
-fn panic(_info: &core::panic::PanicInfo) -> ! {
-  loop {}
-}
-
-#[start]
-fn main(_argc: isize, _argv: *const *const u8) -> isize {
-  unsafe {
-    (0x04000000 as *mut u16).write_volatile(0x0403);
-    (0x06000000 as *mut u16).offset(120 + 80 * 240).write_volatile(0x001F);
-    (0x06000000 as *mut u16).offset(136 + 80 * 240).write_volatile(0x03E0);
-    (0x06000000 as *mut u16).offset(120 + 96 * 240).write_volatile(0x7C00);
-    loop {}
-  }
-}
-```
-
-Now let's clean this up so that it's clearer what's going on.
-
-First we'll label that display control stuff, including using the `VolatilePtr`
-type from the volatile explanation:
-
-```rust
-pub const DISPCNT: VolatilePtr<u16> = VolatilePtr(0x04000000 as *mut u16);
-pub const MODE3: u16 = 3;
-pub const BG2: u16 = 0b100_0000_0000;
-```
-
-Next we make some const values for the actual pixel drawing
-
-```rust
-pub const VRAM: usize = 0x06000000;
-pub const SCREEN_WIDTH: isize = 240;
-```
-
-Note that VRAM has to be interpreted in different ways depending on mode, so we
-just leave it as `usize` and we'll cast it into the right form closer to the
-actual use.
-
-Next we want a small helper function for putting together a color value.
-Happily, this one can even be declared as a `const` function. At the time of
-writing, we've got the "minimal const fn" support in nightly. It really is quite
-limited, but I'm happy to let rustc and LLVM pre-compute as much as they can
-when it comes to the GBA's tiny CPU.
-
-```rust
-pub const fn rgb16(red: u16, green: u16, blue: u16) -> u16 {
-  blue << 10 | green << 5 | red
-}
-```
-
-Finally, we'll make a function for drawing a pixel in Mode 3. Even though it's
-just a one-liner, having the "important parts" be labeled as function arguments
-usually helps you think about it a lot better.
-
-```rust
-pub unsafe fn mode3_pixel(col: isize, row: isize, color: u16) {
-  VolatilePtr(VRAM as *mut u16).offset(col + row * SCREEN_WIDTH).write(color);
-}
-```
-
-So now we've got this:
-
-`hello2`
-
-```rust
-#![feature(start)]
-#![no_std]
-
-#[panic_handler]
-fn panic(_info: &core::panic::PanicInfo) -> ! {
-  loop {}
-}
-
-#[start]
-fn main(_argc: isize, _argv: *const *const u8) -> isize {
-  unsafe {
-    DISPCNT.write(MODE3 | BG2);
-    mode3_pixel(120, 80, rgb16(31, 0, 0));
-    mode3_pixel(136, 80, rgb16(0, 31, 0));
-    mode3_pixel(120, 96, rgb16(0, 0, 31));
-    loop {}
-  }
-}
-
-#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
-#[repr(transparent)]
-pub struct VolatilePtr<T>(pub *mut T);
-impl<T> VolatilePtr<T> {
-  pub unsafe fn read(&self) -> T {
-    core::ptr::read_volatile(self.0)
-  }
-  pub unsafe fn write(&self, data: T) {
-    core::ptr::write_volatile(self.0, data);
-  }
-  pub unsafe fn offset(self, count: isize) -> Self {
-    VolatilePtr(self.0.wrapping_offset(count))
-  }
-}
-
-pub const DISPCNT: VolatilePtr<u16> = VolatilePtr(0x04000000 as *mut u16);
-pub const MODE3: u16 = 3;
-pub const BG2: u16 = 0b100_0000_0000;
-
-pub const VRAM: usize = 0x06000000;
-pub const SCREEN_WIDTH: isize = 240;
-
-pub const fn rgb16(red: u16, green: u16, blue: u16) -> u16 {
-  blue << 10 | green << 5 | red
-}
-
-pub unsafe fn mode3_pixel(col: isize, row: isize, color: u16) {
-  VolatilePtr(VRAM as *mut u16).offset(col + row * SCREEN_WIDTH).write(color);
-}
-```
-
-Exact same program that we started with, but much easier to read.
-
-Of course, in the full `gba` crate that this book is a part of we have these and
-other elements all labeled and sorted out for you (not identically, but
-similarly). Still, for educational purposes it's often best to do it yourself at
-least once.
--- a/book/src-bak/ch01/index.md
+++ b/book/src-bak/ch01/index.md
@ -1,10 +0,0 @@
-# Ch 1: Hello GBA
-
-Traditionally a person writes a "hello, world" program so that they can test
-that their development environment is setup properly and to just get a feel for
-using the tools involved. To get an idea of what a small part of a source file
-will look like. All that stuff.
-
-Normally, you write a program that prints "hello, world" to the terminal. The
-GBA has no terminal, but it does have a screen, so instead we're going to draw
-three dots to the screen.
--- a/book/src-bak/ch01/volatile.md
+++ b/book/src-bak/ch01/volatile.md
@ -1,70 +0,0 @@
-# Volatile
-
-Before we focus on what the numbers mean, first let's ask ourselves: Why are we
-doing _volatile_ writes? You've probably never used that keywords before at all.
-What _is_ volatile anyway?
-
-Well, the optimizer is pretty aggressive, and so it'll skip reads and writes
-when it thinks can. Like if you write to a pointer once, and then again a moment
-later, and it didn't see any other reads in between, it'll think that it can
-just skip doing that first write since it'll get overwritten anyway. Sometimes
-that's correct, but sometimes it's not.
-
-Marking a read or write as _volatile_ tells the compiler that it really must do
-that action, and in the exact order that we wrote it out. It says that there
-might even be special hardware side effects going on that the compiler isn't
-aware of. In this case, the write to the display control register sets a video
-mode, and the writes to the Video RAM set pixels that will show up on the
-screen.
-
-Similar to "atomic" operations you might have heard about, all volatile
-operations are enforced to happen in the exact order that you specify them, but
-only relative to other volatile operations. So something like
-
-```rust
-c.write_volatile(5);
-a += b;
-d.write_volatile(7);
-```
-
-might end up changing `a` either before or after the change to `c` (since the
-value of `a` doesn't affect the write to `c`), but the write to `d` will
-_always_ happen after the write to `c`, even though the compiler doesn't see any
-direct data dependency there.
-
-If you ever go on to use volatile stuff on other platforms it's important to
-note that volatile doesn't make things thread-safe, you still need atomic for
-that. However, the GBA doesn't have threads, so we don't have to worry about
-those sorts of thread safety concerns (there's interrupts, but that's another
-matter).
-
-## Volatile by default
-
-Of course, writing out `volatile_write` every time is more than we wanna do.
-There's clarity and then there's excessive. This is a chance to write our first
-[newtype](https://doc.rust-lang.org/1.0.0/style/features/types/newtype.html).
-Basically a type that's got the exact same binary representation as some other
-type, but new methods and trait implementations.
-
-We want a `*mut T` that's volatile by default, and also when we offset it...
-well the verdict is slightly unclear on how `offset` vs `wrapping_offset` work
-when you're using pointers that you made up out of nowhere. I've asked the
-experts and they genuinely weren't sure, so we'll make an `offset` method that
-does a `wrapping_offset` just to be careful.
-
-```rust
-#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
-#[repr(transparent)]
-pub struct VolatilePtr<T>(pub *mut T);
-impl<T> VolatilePtr<T> {
-  pub unsafe fn read(&self) -> T {
-    core::ptr::read_volatile(self.0)
-  }
-  pub unsafe fn write(&self, data: T) {
-    core::ptr::write_volatile(self.0, data);
-  }
-  pub unsafe fn offset(self, count: isize) -> Self {
-    VolatilePtr(self.0.wrapping_offset(count))
-  }
-}
-```
--- a/book/src-bak/ch02/index.md
+++ b/book/src-bak/ch02/index.md
@ -1,22 +0,0 @@
-# Ch 2: User Input
-
-It's all well and good to draw three pixels, but they don't do anything yet. We
-want them to do something, and for that we need to get some input from the user.
-
-The GBA, as I'm sure you know, has an arrow pad, A and B, L and R, Start and
-Select. That's a little more than the NES/GB/CGB had, and a little less than the
-SNES had. As you can guess, we get key state info from an IO register.
-
-Also, we will need a way to keep the program from running "too fast". On a
-modern computer or console you do this with vsync info from the GPU and Monitor,
-and on the GBA we'll be using vsync info from an IO register that tracks what
-the display hardware is doing.
-
-As a way to apply our knowledge We'll make a simple "light cycle" game where
-your dot leaves a trail behind them and you die if you go off the screen or if
-you touch your own trail. We just make a copy of `hello2.rs` named
-`light_cycle.rs` and then fill it in as we go through the chapter. Normally you
-might not place the entire program into a single source file, particularly as it
-grows over time, but since these are small examples it's much better to have
-them be completely self contained than it is to have them be "properly
-organized" for the long term.
--- a/book/src-bak/ch03/gba_memory_mapping.md
+++ b/book/src-bak/ch03/gba_memory_mapping.md
@ -1,256 +0,0 @@
-# GBA Memory Mapping
-
-The [GBA Memory Map](http://problemkaputt.de/gbatek.htm#gbamemorymap) has
-several memory portions to it, each with their own little differences. Most of
-the memory has pre-determined use according to the hardware, but there is also
-space for games to use as a scratch pad in whatever way the game sees fit.
-
-The memory ranges listed here are _inclusive_, so they end with a lot of F's
-and E's.
-
-We've talked about volatile memory before, but just as a reminder I'll say that
-all of the memory we'll talk about here should be accessed using volatile with
-two exceptions:
-
-1) Work RAM (both internal and external) can be used normally, and if the
-   compiler is able to totally elide some reads and writes that's okay.
-2) However, if you set aside any space in Work RAM where an interrupt will
-   communicate with the main program then that specific location will have to
-   keep using volatile access, since the compiler never knows when an interrupt
-   will actually happen.
-
-## BIOS / System ROM
-
-* `0x0` to `0x3FFF` (16k)
-
-This is special memory for the BIOS. It is "read-only", but even then it's only
-accessible when the program counter is pointing into the BIOS region. At all
-other times you get a [garbage
-value](http://problemkaputt.de/gbatek.htm#gbaunpredictablethings) back when you
-try to read out of the BIOS.
-
-## External Work RAM / EWRAM
-
-* `0x2000000` to `0x203FFFF` (256k)
-
-This is a big pile of space, the use of which is up to each game. However, the
-external work ram has only a 16-bit bus (if you read/write a 32-bit value it
-silently breaks it up into two 16-bit operations) and also 2 wait cycles (extra
-CPU cycles that you have to expend _per 16-bit bus use_).
-
-It's most helpful to think of EWRAM as slower, distant memory, similar to the
-"heap" in a normal application. You can take the time to go store something
-within EWRAM, or to load it out of EWRAM, but if you've got several operations
-to do in a row and you're worried about time you should pull that value into
-local memory, work on your local copy, and then push it back out to EWRAM.
-
-## Internal Work RAM / IWRAM
-
-* `0x3000000` to `0x3007FFF` (32k)
-
-This is a smaller pile of space, but it has a 32-bit bus and no wait.
-
-By default, `0x3007F00` to `0x3007FFF` is reserved for interrupt and BIOS use.
-The rest of it is totally up to you. The user's stack space starts at
-`0x3007F00` and proceeds _down_ from there. For best results you should probably
-start at `0x3000000` and then go upwards. Under normal use it's unlikely that
-the two memory regions will crash into each other.
-
-## IO Registers
-
-* `0x4000000` to `0x40003FE`
-
-We've touched upon a few of these so far, and we'll get to more later. At the
-moment it is enough to say that, as you might have guessed, all of them live in
-this region. Each individual register is a `u16` or `u32` and they control all
-sorts of things. We'll actually be talking about some more of them in this very
-chapter, because that's how we'll control some of the background and object
-stuff.
-
-## Palette RAM / PALRAM
-
-* `0x5000000` to `0x50003FF` (1k)
-
-Palette RAM has a 16-bit bus, which isn't really a problem because it
-conceptually just holds `u16` values. There's no automatic wait state, but if
-you try to access the same location that the display controller is accessing you
-get bumped by 1 cycle. Since the display controller can use the palette ram any
-number of times per scanline it's basically impossible to predict if you'll have
-to do a wait or not during VDraw. During VBlank you won't have any wait of
-course.
-
-PALRAM is among the memory where there's weirdness if you try to write just one
-byte: if you try to write just 1 byte, it writes that byte into _both_ parts of
-the larger 16-bit location. This doesn't really affect us much with PALRAM,
-because palette values are all supposed to be `u16` anyway.
-
-The palette memory actually contains not one, but _two_ sets of palettes. First
-there's 256 entries for the background palette data (starting at `0x5000000`),
-and then there's 256 entries for object palette data (starting at `0x5000200`).
-
-The GBA also has two modes for palette access: 8-bits-per-pixel (8bpp) and
-4-bits-per-pixel (4bpp).
-
-* In 8bpp mode an 8-bit palette index value within a background or sprite
-  simply indexes directly into the 256 slots for that type of thing.
-* In 4bpp mode a 4-bit palette index value within a background or sprite
-  specifies an index within a particular "palbank" (16 palette entries each),
-  and then a _separate_ setting outside of the graphical data determines which
-  palbank is to be used for that background or object (the screen entry data for
-  backgrounds, and the object attributes for objects).
-
-### Transparency
-
-When a pixel within a background or object specifies index 0 as its palette
-entry it is treated as a transparent pixel. This means that in 8bpp mode there's
-only 255 actual color options (0 being transparent), and in 4bpp mode there's
-only 15 actual color options available within each palbank (the 0th entry of
-_each_ palbank is transparent).
-
-Individual backgrounds, and individual objects, each determine if they're 4bpp
-or 8bpp separately, so a given overall palette slot might map to a used color in
-8bpp and an unused/transparent color in 4bpp. If you're a palette wizard.
-
-Palette slot 0 of the overall background palette is used to determine the
-"backdrop" color. That's the color you see if no background or object ends up
-being rendered within a given pixel.
-
-Since display mode 3 and display mode 5 don't use the palette, they cannot
-benefit from transparency.
-
-## Video RAM / VRAM
-
-* `0x6000000` to `0x6017FFF` (96k)
-
-We've used this before! VRAM has a 16-bit bus and no wait. However, the same as
-with PALRAM, the "you might have to wait if the display controller is looking at
-it" rule applies here.
-
-Unfortunately there's not much more exact detail that can be given about VRAM.
-The use of the memory depends on the video mode that you're using.
-
-One general detail of note is that you can't write individual bytes to any part
-of VRAM. Depending on mode and location, you'll either get your bytes doubled
-into both the upper and lower parts of the 16-bit location targeted, or you
-won't even affect the memory. This usually isn't a big deal, except in two
-situations:
-
-* In Mode 4, if you want to change just 1 pixel, you'll have to be very careful
-  to read the old `u16`, overwrite just the byte you wanted to change, and then
-  write that back.
-* In any display mode, avoid using `memcopy` to place things into VRAM.
-  It's written to be byte oriented, and only does 32-bit transfers under select
-  conditions. The rest of the time it'll copy one byte at a time and you'll get
-  either garbage or nothing at all.
-
-## Object Attribute Memory / OAM
-
-* `0x7000000` to `0x70003FF` (1k)
-
-The Object Attribute Memory has a 32-bit bus and no default wait, but suffers
-from the "you might have to wait if the display controller is looking at it"
-rule. You cannot write individual bytes to OAM at all, but that's not really a
-problem because all the fields of the data types within OAM are either `i16` or
-`u16` anyway.
-
-Object attribute memory is the wildest yet: it conceptually contains two types
-of things, but they're _interlaced_ with each other all the way through.
-
-Now, [GBATEK](http://problemkaputt.de/gbatek.htm#lcdobjoamattributes) and
-[CowByte](https://www.cs.rit.edu/~tjh8300/CowBite/CowBiteSpec.htm#OAM%20(sprites))
-doesn't quite give names to the two data types here.
-[TONC](https://www.coranac.com/tonc/text/regobj.htm#sec-oam) calls them
-`OBJ_ATTR` and `OBJ_AFFINE`, but we'll be giving them names fitting with the
-Rust naming convention. Just know that if you try to talk about it with others
-they might not be using the same names. In Rust terms their layout would look
-like this:
-
-```rust
-#[repr(C)]
-pub struct ObjectAttributes {
-  attr0: u16,
-  attr1: u16,
-  attr2: u16,
-  filler: i16,
-}
-
-#[repr(C)]
-pub struct AffineMatrix {
-  filler0: [u16; 3],
-  pa: i16,
-  filler1: [u16; 3],
-  pb: i16,
-  filler2: [u16; 3],
-  pc: i16,
-  filler3: [u16; 3],
-  pd: i16,
-}
-```
-
-(Note: the `#[repr(C)]` part just means that Rust must lay out the data exactly
-in the order we specify, which otherwise it is not required to do).
-
-So, we've got 1024 bytes in OAM and each `ObjectAttributes` value is 8 bytes, so
-naturally we can support up to 128 objects.
-
-_At the same time_, we've got 1024 bytes in OAM and each `AffineMatrix` is 32
-bytes, so we can have 32 of them.
-
-But, as I said, these things are all _interlaced_ with each other. See how
-there's "filler" fields in each struct? If we imagine the OAM as being just an
-array of one type or the other, indexes 0/1/2/3 of the `ObjectAttributes` array
-would line up with index 0 of the `AffineMatrix` array. It's kinda weird, but
-that's just how it works. When we setup functions to read and write these values
-we'll have to be careful with how we do it. We probably _won't_ want to use
-those representations above, at least not with the `AffineMatrix` type, because
-they're quite wasteful if you want to store just object attributes or just
-affine matrices.
-
-## Game Pak ROM / Flash ROM
-
-* `0x8000000` to `0x9FFFFFF` (wait 0)
-* `0xA000000` to `0xBFFFFFF` (wait 1)
-* `0xC000000` to `0xDFFFFFF` (wait 2)
-* Max of 32Mb
-
-These portions of the memory are less fixed, because they depend on the precise
-details of the game pak you've inserted into the GBA. In general, they connect
-to the game pak ROM and/or Flash memory, using a 16-bit bus. The ROM is
-read-only, but the Flash memory (if any) allows writes.
-
-The game pak ROM is listed as being in three sections, but it's actually the
-same memory being effectively mirrored into three different locations. The
-mirror that you choose to access the game pak through affects which wait state
-setting it uses (configured via IO register of course). Unfortunately, the
-details come down more to the game pak hardware that you load your game onto
-than anything else, so there's not much I can say right here. We'll eventually
-talk about it more later when I'm forced to do the boring thing and just cover
-all the IO registers that aren't covered anywhere else.
-
-One thing of note is the way that the 16-bit bus affects us: the instructions to
-execute are coming through the same bus as the rest of the game data, so we want
-them to be as compact as possible. The ARM chip in the GBA supports two
-different instruction sets, "thumb" and "non-thumb". The thumb mode instructions
-are 16-bit, so they can each be loaded one at a time, and the non-thumb
-instructions are 32-bit, so we're at a penalty if we execute them directly out
-of the game pak. However, some things will demand that we use non-thumb code, so
-we'll have to deal with that eventually. It's possible to switch between modes,
-but it's a pain to keep track of what mode you're in because there's not
-currently support for it in Rust itself (perhaps some day). So we'll stick with
-thumb code as much as we possibly can, that's why our target profile for our
-builds starts with `thumbv4`.
-
-## Game Pak SRAM
-
-* `0xE000000` to `0xE00FFFF` (64k)
-
-The game pak SRAM has an 8-bit bus. Why did Pokémon always take so long to save?
-Saving the whole game one byte at a time is why. The SRAM also has some amount
-of wait, but as with the ROM, the details depend on your game pak hardware (and
-also as with ROM, you can adjust the settings with an IO register, should you
-need to).
-
-One thing to note about the SRAM is that the GBA has a Direct Memory Access
-(DMA) feature that can be used for bulk memory movements in some cases, but the
-DMA _cannot_ access the SRAM region. You really are stuck reading and writing
-one byte at a time when you're using the SRAM.
--- a/book/src/00-introduction/05-help_and_resources.md
+++ b/book/src/00-introduction/05-help_and_resources.md
@ -26,9 +26,19 @@ available while you're debugging problems.

 ## Information Resources

-Ketsuban and I didn't magically learn this all from nowhere, we read various
-technical manuals and guides ourselves and then distilled the knowledge (usually
-oriented towards C and C++) into this book for Rust.
+First, if I fail to describe something related to Rust, you can always try
+checking in [The Rust
+Reference](https://doc.rust-lang.org/nightly/reference/introduction.html) to see
+if they cover it. You can mostly ignore that big scary red banner at the top,
+things are a lot better documented than they make it sound.
+
+If you need help trying to fiddle your math down as hard as you can, there are
+resources such as the [Bit Twiddling
+Hacks](https://graphics.stanford.edu/~seander/bithacks.html) page.
+
+As to GBA related lore, Ketsuban and I didn't magically learn this all from
+nowhere, we read various technical manuals and guides ourselves and then
+distilled those works oriented around C and C++ into a book for Rust.

 We have personally used some or all of the following:

--- a/book/src/01-quirks/01-no_std.md
+++ b/book/src/01-quirks/01-no_std.md
@ -89,10 +89,6 @@ the standard library types to be used "for free" once it was set up, or just a
 custom allocator that's GBA specific if Rust's global allocator style isn't a
 good fit for the GBA (I honestly haven't looked into it).

-## LLVM Intrinsics
-
-TODO: explain that we'll occasionally have to provide some intrinsics.
-
 ## Bare Metal Panic

 TODO: expand this
@ -114,3 +110,10 @@ TODO: expand this
 * Sending the message also automatically zeroes the output buffer.
 * View the output within  the "Tools" menu, "View Logs...". Note that the Fatal
  message, if any doesn't get logged.
+
+TODO: this will probably fail without a `__clzsi2` implementation, which is a
+good seg for the next section
+
+## LLVM Intrinsics
+
+TODO: explain that we'll occasionally have to provide some intrinsics.
--- a/book/src/01-quirks/02-fixed_only.md
+++ b/book/src/01-quirks/02-fixed_only.md
@ -1,13 +1,548 @@
 # Fixed Only

-In addition to not having the standard library available, we don't even have a
-floating point unit available! We can't do floating point math in hardware! We
-could still do floating point math as software computations if we wanted, but
-that's a slow, slow thing to do.
+In addition to not having much of the standard library available, we don't even
+have a floating point unit available! We can't do floating point math in
+hardware! We _could_ still do floating point math as pure software computations
+if we wanted, but that's a slow, slow thing to do.

-Instead let's learn about another way to have fractional values called "Fixed
-Point"
+Are there faster ways? It's the same answer as always: "Yes, but not without a
+tradeoff."

-## Fixed Point
+The faster way is to represent fractional values using a system called a [Fixed
+Point Representation](https://en.wikipedia.org/wiki/Fixed-point_arithmetic).
+What do we trade away? Numeric range.

-TODO: describe fixed point, make some types, do the impls, all that.
+* Floating point math stores bits for base value and for exponent all according
+  to a single [well defined](https://en.wikipedia.org/wiki/IEEE_754) standard
+  for how such a complicated thing works.
+* Fixed point math takes a normal integer (either signed or unsigned) and then
+  just "mentally associates" it (so to speak) with a fractional value for its
+  "units". If you have 3 and it's in units of 1/2, then you have 3/2, or 1.5
+  using decimal notation. If your number is 256 and it's in units of 1/256th
+  then the value is 1.0 in decimal notation.
+
+Floating point math requires dedicated hardware to perform quickly, but it can
+"trade" precision when it needs to represent extremely large or small values.
+
+Fixed point math is just integral math, which our GBA is reasonably good at, but
+because your number is associated with a fixed fraction your results can get out
+of range very easily.
+
+## Representing A Fixed Point Value
+
+So we want to associate our numbers with a mental note of what units they're in:
+
+* [PhantomData](https://doc.rust-lang.org/core/marker/struct.PhantomData.html)
+  is a type that tells the compiler "please remember this extra type info" when
+  you add it as a field to a struct. It goes away at compile time, so it's
+  perfect for us to use as space for a note to ourselves without causing runtime
+  overhead.
+* The [typenum](https://crates.io/crates/typenum) crate is the best way to
+  represent a number within a type in Rust. Since our values on the GBA are
+  always specified as a number of fractional bits to count the number as, we can
+  put `typenum` types such as `U8` or `U14` into our `PhantomData` to keep track
+  of what's going on.
+
+Now, those of you who know me, or perhaps just know my reputation, will of
+course _immediately_ question what happened to the real Lokathor. I do not care
+for most crates, and I particularly don't care for using a crate in teaching
+situations. However, `typenum` has a number of factors on its side that let me
+suggest it in this situation:
+
+* It's version 1.10 with a total of 21 versions and nearly 700k downloads, so we
+  can expect that the major troubles have been shaken out and that it will remain
+  fairly stable for quite some time to come.
+* It has no further dependencies that it's going to drag into the compilation.
+* It happens all at compile time, so it's not clogging up our actual game with
+  any nonsense.
+* The (interesting) subject of "how do you do math inside Rust's trait system?" is
+  totally separate from the concern that we're trying to focus on here.
+
+Therefore, we will consider it acceptable to use this crate.
+
+Now the `typenum` crate defines a whole lot, but we'll focus down to just a
+single type at the moment:
+[UInt](https://docs.rs/typenum/1.10.0/typenum/uint/struct.UInt.html) is a
+type-level unsigned value. It's like `u8` or `u16`, but while they're types that
+then have values, each `UInt` construction statically equates to a specific
+value. Like how the `()` type only has one value, which is also called `()`. In
+this case, you wrap up `UInt` around smaller `UInt` values and a `B1` or `B0`
+value to build up the binary number that you want at the type level.
+
+In other words, instead of writing
+
+```rust
+let six = 0b110;
+```
+
+We write
+
+```rust
+type U6 = UInt<UInt<UInt<UTerm, B1>, B1>, B0>;
+```
+
+Wild, I know. If you look into the `typenum` crate you can do math and stuff
+with these type level numbers, and we will a little bit below, but to start off
+we _just_ need to store one in some `PhantomData`.
+
+### A struct For Fixed Point
+
+Our actual type for a fixed point value looks like this:
+
+```rust
+use core::marker::PhantomData;
+use typenum::marker_traits::Unsigned;
+
+/// Fixed point `T` value with `F` fractional bits.
+#[derive(Debug, Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)]
+#[repr(transparent)]
+pub struct Fx<T, F: Unsigned> {
+  bits: T,
+  _phantom: PhantomData<F>,
+}
+```
+
+This says that `Fx<T,F>` is a generic type that holds some base number type `T`
+and a `F` type that's marking off how many fractional bits we're using. We only
+want people giving unsigned type-level values for the `PhantomData` type, so we
+use the trait bound `F: Unsigned`.
+
+We use
+[repr(transparent)](https://github.com/rust-lang/rfcs/blob/master/text/1758-repr-transparent.md)
+here to ensure that `Fx` will always be treated just like the base type in the
+final program (in terms of bit pattern and ABI).
+
+If you go and check, this is _basically_ how the existing general purpose crates
+for fixed point math represent their numbers. They're a little fancier about it
+because they have to cover every case, and we only have to cover our GBA case.
+
+That's quite a bit to type though. We probably want to make a few type aliases
+for things to be easier to look at. Unfortunately there's [no standard
+notation](https://en.wikipedia.org/wiki/Fixed-point_arithmetic#Notation) for how
+you write a fixed point type. We also have to limit ourselves to what's valid
+for use in a Rust type too. I like the `fx` thing, so we'll use that for signed
+and then `fxu` if we need an unsigned value.
+
+```rust
+/// Alias for an `i16` fixed point value with 8 fractional bits.
+pub type fx8_8 = Fx<i16,U8>;
+```
+
+Rust will complain about having `non_camel_case_types`, and you can shut that
+warning up by putting an `#[allow(non_camel_case_types)]` attribute on the type
+alias directly, or you can use `#![allow(non_camel_case_types)]` at the very top
+of the module to shut up that warning for the whole module (which is what I
+did).
+
+## Constructing A Fixed Point Value
+
+So how do we actually _make_ one of these values? Well, we can always just wrap or unwrap any value in our `Fx` type:
+
+```rust
+impl<T, F: Unsigned> Fx<T, F> {
+  /// Uses the provided value directly.
+  pub fn from_raw(r: T) -> Self {
+    Fx {
+      num: r,
+      phantom: PhantomData,
+    }
+  }
+  /// Unwraps the inner value.
+  pub fn into_raw(self) -> T {
+    self.num
+  }
+}
+```
+
+I'd like to use the `From` trait of course, but it was giving me some trouble, i
+think because of the orphan rule. Oh well.
+
+If we want to be particular to the fact that these are supposed to be
+_numbers_... that gets tricky. Rust is actually quite bad at being generic about
+number types. You can use the [num](https://crates.io/crates/num) crate, or you
+can just use a macro and invoke it once per type. Guess what we're gonna do.
+
+```rust
+macro_rules! fixed_point_methods {
+  ($t:ident) => {
+    impl<F: Unsigned> Fx<$t, F> {
+      /// Gives the smallest positive non-zero value.
+      pub fn precision() -> Self {
+        Fx {
+          num: 1,
+          phantom: PhantomData,
+        }
+      }
+
+      /// Makes a value with the integer part shifted into place.
+      pub fn from_int_part(i: $t) -> Self {
+        Fx {
+          num: i << F::U8,
+          phantom: PhantomData,
+        }
+      }
+    }
+  };
+}
+
+fixed_point_methods! {u8}
+fixed_point_methods! {i8}
+fixed_point_methods! {i16}
+fixed_point_methods! {u16}
+fixed_point_methods! {i32}
+fixed_point_methods! {u32}
+```
+
+Now _you'd think_ that those can be `const`, but at the moment you can't have a
+`const` function with a bound on any trait other than `Sized`, so they have to
+be normal functions.
+
+Also, we're doing something a little interesting there with `from_int_part`. We
+can take our `F` type and get its constant value. There's other associated
+constants if we want it in other types, and also non-const methods if you wanted
+that for some reason (maybe passing it as a closure function? dunno).
+
+## Casting Base Values
+
+Next, once we have a value in one base type we will need to be able to move it
+into another base type. Unfortunately this means we gotta use the `as` operator,
+which requires a concrete source type and a concrete destination type. There's
+no easy way for us to make it generic here.
+
+We could let the user use `into_raw`, cast, and then do `from_raw`, but that's
+error prone because they might change the fractional bit count accidentally.
+This means that we have to write a function that does the casting while
+perfectly preserving the fractional bit quantity. If we wrote one function for
+each conversion it'd be like 30 different possible casts (6 base types that we
+support, and then 5 possible target types). Instead, we'll write it just once in
+a way that takes a closure, and let the user pass a closure that does the cast.
+The compiler should merge it all together quite nicely for us once optimizations
+kick in.
+
+This code goes outside the macro. I want to avoid too much code in the macro if
+we can, it's a little easier to cope with I think.
+
+```rust
+  /// Casts the base type, keeping the fractional bit quantity the same.
+  pub fn cast_inner<Z, C: Fn(T) -> Z>(self, op: C) -> Fx<Z, F> {
+    Fx {
+      num: op(self.num),
+      phantom: PhantomData,
+    }
+  }
+```
+
+It's horrible and ugly, but Rust is just bad at numbers sometimes.
+
+## Adjusting Fractional Part
+
+In addition to the base value we might want to change our fractional bit
+quantity. This is actually easier that it sounds, but it also requires us to be
+tricky with the generics. We can actually use some typenum type level operators
+here.
+
+This code goes inside the macro: we need to be able to use the left shift and
+right shift, which is easiest when we just use the macro's `$t` as our type. We
+could alternately put a similar function outside the macro and be generic on `T`
+having the left and right shift operators by using a `where` clause. As much as
+I'd like to avoid too much code being generated by macro, I'd _even more_ like
+to avoid generic code with huge and complicated trait bounds. It comes down to
+style, and you gotta decide for yourself.
+
+```rust
+      /// Changes the fractional bit quantity, keeping the base type the same.
+      pub fn adjust_fractional_bits<Y: Unsigned + IsEqual<F, Output = False>>(self) -> Fx<$t, Y> {
+        let leftward_movement: i32 = Y::to_i32() - F::to_i32();
+        Fx {
+          num: if leftward_movement > 0 {
+            self.num << leftward_movement
+          } else {
+            self.num >> (-leftward_movement)
+          },
+          phantom: PhantomData,
+        }
+      }
+```
+
+There's a few things at work. First, we introduce `Y` as the target number of
+fractional bits, and we _also_ limit it that the target bits quantity can't be
+the same as we already have using a type-level operator. If it's the same as we
+started with, why are you doing the cast at all?
+
+Now, once we're sure that the current bits and target bits aren't the same, we
+compute `target - start`, and call this our "leftward movement". Example: if
+we're targeting 8 bits and we're at 4 bits, we do 8-4 and get +4 as our leftward
+movement. If the leftward_movement is positive we naturally shift our current
+value to the left. If it's not positive then it _must_ be negative because we
+eliminated 0 as a possibility using the type-level operator, so we shift to the
+right by the negative value.
+
+## Addition, Subtraction, Shifting, Negative, Comparisons
+
+From here on we're getting help from [this blog
+post](https://spin.atomicobject.com/2012/03/15/simple-fixed-point-math/) by [Job
+Vranish](https://spin.atomicobject.com/author/vranish/), so thank them if you
+learn something.
+
+I might have given away the game a bit with those `derive` traits on our fixed
+point type. For a fair number of operations you can use the normal form of the
+op on the inner bits as long as the fractional parts have the same quantity.
+This includes equality and ordering (which we derived) as well as addition,
+subtraction, and bit shifting (which we need to do ourselves).
+
+This code can go outside the macro, with sufficient trait bounds.
+
+```rust
+impl<T: Add<Output = T>, F: Unsigned> Add for Fx<T, F> {
+  type Output = Self;
+  fn add(self, rhs: Fx<T, F>) -> Self::Output {
+    Fx {
+      num: self.num + rhs.num,
+      phantom: PhantomData,
+    }
+  }
+}
+```
+
+The bound on `T` makes it so that `Fx<T, F>` can be added any time that `T` can
+be added to its own type with itself as the output. We can use the exact same
+pattern for `Sub`, `Shl`, `Shr`, and `Neg`. With enough trait bounds, we can do
+anything!
+
+```rust
+impl<T: Sub<Output = T>, F: Unsigned> Sub for Fx<T, F> {
+  type Output = Self;
+  fn sub(self, rhs: Fx<T, F>) -> Self::Output {
+    Fx {
+      num: self.num - rhs.num,
+      phantom: PhantomData,
+    }
+  }
+}
+
+impl<T: Shl<u32, Output = T>, F: Unsigned> Shl<u32> for Fx<T, F> {
+  type Output = Self;
+  fn shl(self, rhs: u32) -> Self::Output {
+    Fx {
+      num: self.num << rhs,
+      phantom: PhantomData,
+    }
+  }
+}
+
+impl<T: Shr<u32, Output = T>, F: Unsigned> Shr<u32> for Fx<T, F> {
+  type Output = Self;
+  fn shr(self, rhs: u32) -> Self::Output {
+    Fx {
+      num: self.num >> rhs,
+      phantom: PhantomData,
+    }
+  }
+}
+
+impl<T: Neg<Output = T>, F: Unsigned> Neg for Fx<T, F> {
+  type Output = Self;
+  fn neg(self) -> Self::Output {
+    Fx {
+      num: -self.num,
+      phantom: PhantomData,
+    }
+  }
+}
+```
+
+Unfortunately, for `Shl` and `Shr` to have as much coverage on our type as it
+does on the base type (allowing just about any right hand side) we'd have to do
+another macro, but I think just `u32` is fine. We can always add more later if
+we need.
+
+We could also implement `BitAnd`, `BitOr`, `BitXor`, and `Not`, but they don't
+seem relevent to our fixed point math use, and this section is getting long
+already. Just use the same general patterns if you want to add it in your own
+programs. Shockingly, `Rem` also works directly if you want it, though I don't
+forsee us needing floating point remainder. Also, the GBA can't do hardware
+division or remainder, and we'll have to work around that below when we
+implement `Div` (which maybe we don't need, but it's complex enough I should
+show it instead of letting people guess).
+
+**Note:** In addition to the various `Op` traits, there's also `OpAssign`
+variants. Each `OpAssign` is the same as `Op`, but takes `&mut self` instead of
+`self` and then modifies in place instead of producing a fresh value. In other
+words, if you want both `+` and `+=` you'll need to do the `AddAssign` trait
+too. It's not the worst thing to just write `a = a+b`, so I won't bother with
+showing all that here. It's pretty easy to figure out for yourself if you want.
+
+## Multiplication
+
+This is where things get more interesting. When we have two numbers `A` and `B`
+they really stand for `(a*f)` and `(b*f)`. If we write `A*B` then we're really
+writing `(a*f)*(b*f)`, which can be rewritten as `(a*b)*2f`, and now it's
+obvious that we have one more `f` than we wanted to have. We have to do the
+multiply of the inner value and then divide out the `f`. We divide by `1 <<
+bit_count`, so if we have 8 fractional bits we'll divide by 256.
+
+The catch is that, when we do the multiply we're _extremely_ likely to overflow
+our base type with that multiplication step. Then we do that divide, and now our
+result is basically nonsense. We can avoid this to some extent by casting up to
+a higher bit type, doing the multiplication and division at higher precision,
+and then casting back down. We want as much precision as possible without being
+too inefficient, so we'll always cast up to 32-bit (on a 64-bit machine you'd
+cast up to 64-bit instead).
+
+Naturally, any signed value has to be cast up to `i32` and any unsigned value
+has to be cast up to `u32`, so we'll have to handle those separately.
+
+Also, instead of doing an _actual_ divide we can right-shift by the correct
+number of bits to achieve the same effect. _Except_ when we have a signed value
+that's negative, because actual division truncates towards zero and
+right-shifting truncates towards negative infinity. We can get around _this_ by
+flipping the sign, doing the shift, and flipping the sign again (which sounds
+silly but it's so much faster than doing an actual division).
+
+Also, again signed values can be annoying, because if the value _just happens_
+to be `i32::MIN` then when you negate it you'll have... _still_ a negative
+value. I'm not 100% on this, but I think the correct thing to do at that point
+is to give `$t::MIN` as out output num value.
+
+Did you get all that? Good, because this is involves casting, we will need to
+implement it three times, which calls for another macro.
+
+```rust
+macro_rules! fixed_point_signed_multiply {
+  ($t:ident) => {
+    impl<F: Unsigned> Mul for Fx<$t, F> {
+      type Output = Self;
+      fn mul(self, rhs: Fx<$t, F>) -> Self::Output {
+        let pre_shift = (self.num as i32).wrapping_mul(rhs.num as i32);
+        if pre_shift < 0 {
+          if pre_shift == core::i32::MIN {
+            Fx {
+              num: core::$t::MIN,
+              phantom: PhantomData,
+            }
+          } else {
+            Fx {
+              num: (-((-pre_shift) >> F::U8)) as $t,
+              phantom: PhantomData,
+            }
+          }
+        } else {
+          Fx {
+            num: (pre_shift >> F::U8) as $t,
+            phantom: PhantomData,
+          }
+        }
+      }
+    }
+  };
+}
+
+fixed_point_signed_multiply! {i8}
+fixed_point_signed_multiply! {i16}
+fixed_point_signed_multiply! {i32}
+
+macro_rules! fixed_point_unsigned_multiply {
+  ($t:ident) => {
+    impl<F: Unsigned> Mul for Fx<$t, F> {
+      type Output = Self;
+      fn mul(self, rhs: Fx<$t, F>) -> Self::Output {
+        Fx {
+          num: ((self.num as u32).wrapping_mul(rhs.num as u32) >> F::U8) as $t,
+          phantom: PhantomData,
+        }
+      }
+    }
+  };
+}
+
+fixed_point_unsigned_multiply! {u8}
+fixed_point_unsigned_multiply! {u16}
+fixed_point_unsigned_multiply! {u32}
+```
+
+## Division
+
+Division is similar to multiplication, but reversed. Which makes sense. This
+time `A/B` gives `(a*f)/(b*f)` which is `a/b`, one _less_ `f` than we were
+after.
+
+As with the multiplication version of things, we have to up-cast our inner value
+as much a we can before doing the math, to allow for the most precision
+possible.
+
+The snag here is that the GBA has no division or remainder. Instead, the GBA has
+a BIOS function you can call to do `i32/i32` division.
+
+This is a potential problem for us though. If we have some unsigned value, we
+need it to fit within the positive space of an `i32` _after the multiply_ so
+that we can cast it to `i32`, call the BIOS function that only works on `i32`
+values, and cast it back to its actual type.
+
+* If you have a u8 you're always okay, even with 8 floating bits.
+* If you have a u16 you're okay even with a maximum value up to 15 floating
+  bits, but having a maximum value and 16 floating bits makes it break.
+* If you have a u32 you're probably going to be in trouble all the time.
+
+So... ugh, there's not much we can do about this. For now we'll just have to
+suffer some.
+
+// TODO: find a numerics book that tells us how to do `u32/u32` divisions.
+
+```rust
+macro_rules! fixed_point_signed_division {
+  ($t:ident) => {
+    impl<F: Unsigned> Div for Fx<$t, F> {
+      type Output = Self;
+      fn div(self, rhs: Fx<$t, F>) -> Self::Output {
+        let mul_output: i32 = (self.num as i32).wrapping_mul(1 << F::U8);
+        let divide_result: i32 = crate::bios::div(mul_output, rhs.num as i32);
+        Fx {
+          num: divide_result as $t,
+          phantom: PhantomData,
+        }
+      }
+    }
+  };
+}
+
+fixed_point_signed_division! {i8}
+fixed_point_signed_division! {i16}
+fixed_point_signed_division! {i32}
+
+macro_rules! fixed_point_unsigned_division {
+  ($t:ident) => {
+    impl<F: Unsigned> Div for Fx<$t, F> {
+      type Output = Self;
+      fn div(self, rhs: Fx<$t, F>) -> Self::Output {
+        let mul_output: i32 = (self.num as i32).wrapping_mul(1 << F::U8);
+        let divide_result: i32 = crate::bios::div(mul_output, rhs.num as i32);
+        Fx {
+          num: divide_result as $t,
+          phantom: PhantomData,
+        }
+      }
+    }
+  };
+}
+
+fixed_point_unsigned_division! {u8}
+fixed_point_unsigned_division! {u16}
+fixed_point_unsigned_division! {u32}
+```
+
+## Trigonometry
+
+TODO: look up tables! arcbits!
+
+## Just Using A Crate
+
+If, after seeing all that, and seeing that I still didn't even cover every
+possible trait impl that you might want for all the possible types... if after
+all that you feel too intimidated, then I'll cave a bit on your behalf and
+suggest to you that the [fixed](https://crates.io/crates/fixed) crate seems to
+be the best crate available for fixed point math.
+
+_I have not tested its use on the GBA myself_.
+
+It's just my recommendation from looking at the docs of the various options
+available, if you really wanted to just have a crate for it.
--- a/book/src/01-quirks/04-newtype.md
+++ b/book/src/01-quirks/04-newtype.md
@ -1,5 +1,8 @@
 # Newtype

+TODO: we've already used newtype twice by now (fixed point values and volatile
+addresses), so we need to adjust how we start this section.
+
 There's a great Zero Cost abstraction that we'll be using a lot that you might
 not already be familiar with: we're talking about the "Newtype Pattern"!

@ -27,32 +30,19 @@ cost at compile time.
 pub struct PixelColor(u16);
 ```

+TODO: we've already talked about repr(transparent) by now
+
 Ah, except that, as I'm sure you remember from [The
 Rustonomicon](https://doc.rust-lang.org/nomicon/other-reprs.html#reprtransparent)
-(and from [the
-RFC](https://github.com/rust-lang/rfcs/blob/master/text/1758-repr-transparent.md)
-too, of course), if we have a single field struct that's sometimes different
-from having just the bare value, so we should be using `#[repr(transparent)]`
-with our newtypes.
+(and from the RFC too, of course), if we have a single field struct that's
+sometimes different from having just the bare value, so we should be using
+`#[repr(transparent)]` with our newtypes.

 ```rust
 #[repr(transparent)]
 pub struct PixelColor(u16);
 ```

-Ah, and of course we'll need to make it so you can unwrap the value:
-
-```rust
-#[repr(transparent)]
-pub struct PixelColor(u16);
-
-impl From<PixelColor> for u16 {
-  fn from(color: PixelColor) -> u16 {
-    color.0
-  }
-}
-```
-
 And then we'll need to do that same thing for _every other newtype we want_.

 Except there's only two tiny parts that actually differ between newtype
@ -62,7 +52,12 @@ a job for a macro to me!

 ## Making It A Macro

-The most basic version of the macro we want goes like this:
+If you're going to do much with macros you should definitely read through [The
+Little Book of Rust
+Macros](https://danielkeep.github.io/tlborm/book/index.html), but we won't be
+doing too much so you can just follow along here a bit if you like.
+
+The most basic version of a newtype macro starts like this:

 ```rust
 #[macro_export]
@ -74,8 +69,39 @@ macro_rules! newtype {
 }
 ```

-Except we also want to be able to add attributes (which includes doc comments),
-so we upgrade our macro a bit:
+The `#[macro_export]` makes it exported by the current module (like `pub`
+kinda), and then we have one expansion option that takes an identifier, a `,`,
+and then a second identifier. The new name is the outer type we'll be using, and
+the old name is the inner type that's being wrapped. You'd use our new macro
+something like this:
+
+```rust
+newtype! {PixelColorCurly, u16}
+
+newtype!(PixelColorParens, u16);
+
+newtype![PixelColorBrackets, u16];
+```
+
+Note that you can invoke the macro with the outermost grouping as any of `()`,
+`[]`, or `{}`.  It makes no particular difference to the macro. Also, that space
+in the first version is kinda to show off that you can put white space in
+between the macro name and the grouping if you want. The difference is mostly
+style, but there are some rules and considerations here:
+
+* If you use curly braces then you _must not_ put a `;` after the invocation.
+* If you use parentheses or brackets then you _must_ put the `;` at the end.
+* Rustfmt cares which you use and formats accordingly:
+  * Curly brace macro use mostly gets treated like a code block.
+  * Parentheses macro use mostly gets treated like a function call.
+  * Bracket macro use mostly gets treated like an array declaration.
+
+## Upgrade That Macro!
+
+We also want to be able to add `derive` stuff and doc comments to our newtype.
+Within the context of `macro_rules!` definitions these are called "meta". Since
+we can have any number of them we wrap it all up in a "zero or more" matcher.
+Then our macro looks like this:

 ```rust
 #[macro_export]
@ -88,52 +114,44 @@ macro_rules! newtype {
 }
 ```

-And we want to automatically add the ability to turn the wrapper type back into
-the wrapped type.
+So now we can write

 ```rust
-#[macro_export]
-macro_rules! newtype {
-  ($(#[$attr:meta])* $new_name:ident, $old_name:ident) => {
-    $(#[$attr])*
-    #[repr(transparent)]
-    pub struct $new_name($old_name);
-    
-    impl From<$new_name> for $old_name {
-      fn from(x: $new_name) -> $old_name {
-        x.0
-      }
-    }
-  };
+newtype! {
+  /// Color on the GBA gives 5 bits for each channel, the highest bit is ignored.
+  #[derive(Debug, Clone, Copy)]
+  PixelColor, u16
 }
 ```

-That seems like enough for all of our examples, so we'll stop there. We could
-add more things:
-
-* Making the `From` impl being optional. We'd have to make the newtype
-  invocation be more complicated somehow, the user puts ", no-unwrap" after the
-  inner type declaration or something, or something like that.
-* Allowing for more precise visibility controls on the wrapping type and on the
-  inner field. This would add a lot of line noise, so we'll just always have our
-  newtypes be `pub`.
-* Allowing for generic newtypes, which might sound silly but that we'll actually
-  see an example of soon enough. To do this you might _think_ that we can change
-  the `:ident` declarations to `:ty`, but since we're declaring a fresh type not
-  using an existing type we have to accept it as an `:ident`. The way you get
-  around this is with a proc-macro, which is a lot more powerful but which also
-  requires that you write the proc-macro in an entirely other crate that gets
-  compiled first. We don't need that much power, so for our examples we'll go
-  with the macro_rules version and just do it by hand in the few cases where we
-  need a generic newtype.
-* Allowing for `Deref` and `DerefMut`, which usually defeats the point of doing
-  the newtype, but maybe sometimes it's the right thing, so if you were going
-  for the full industrial strength version with a proc-macro and all you might
-  want to make that part of your optional add-ons as well the same way you might
-  want optional `From`. You'd probably want `From` to be "on by default" and
-  `Deref`/`DerefMut` to be "off by default", but whatever.
+And that's about all we'll need for the examples.

 **As a reminder:** remember that `macro_rules` macros have to appear _before_
 they're invoked in your source, so the `newtype` macro will always have to be at
 the very top of your file, or if you put it in a module within your project
 you'll need to declare the module before anything that uses it.
+
+## Potential Homework
+
+If you wanted to keep going and get really fancy with it, you could potentially
+add a lot more:
+
+* Make a `pub const fn new() -> Self` method that outputs the base value in a
+  const way. Combine this with builder style "setter" methods that are also
+  const and you can get the compiler to do quite a bit of the value building
+  work at compile time.
+* Making the macro optionally emit a `From` impl to unwrap it back into the base
+  type.
+* Allow for visibility modifiers to be applied to the inner field and the newly
+  generated type.
+* Allowing for generic newtypes. You already saw the need for this once in the
+  volatile section. Unfortunately, this particular part gets really tricky if
+  you're using `macro_rules!`, so you might need to move up to a full
+  `proc_macro`. Having a `proc_macro` isn't bad except that they have to be
+  defined in a crate of their own and they're compiled before use. You can't
+  ever use them in the crate that defines them, so we won't be using them in any
+  of our single file examples.
+* Allowing for optional `Deref` and `DerefMut` of the inner value. This takes
+  away most all the safety aspect of doing the newtype, but there may be times
+  for it. As an example, you could make a newtype with a different form of
+  Display impl that you want to otherwise treat as the base type in all places.
--- a/book/src/01-quirks/05-const_asserts.md
+++ b/book/src/01-quirks/05-const_asserts.md
@ -0,0 +1,130 @@
+# Constant Assertions
+
+Have you ever wanted to assert things _even before runtime_? We all have, of
+course. Particularly when the runtime machine is a poor little GBA, we'd like to
+have the machine doing the compile handle as much checking as possible.
+
+Enter the [static assertions](https://docs.rs/static_assertions/) crate, which
+provides a way to let you assert on a `const` expression.
+
+This is an amazing crate that you should definitely use when you can.
+
+It's written by [Nikolai Vazquez](https://github.com/nvzqz), and they kindly
+wrote up a [blog
+post](https://nikolaivazquez.com/posts/programming/rust-static-assertions/) that
+explains the thinking behind it.
+
+However, I promised that each example would be single file, and I also promised
+to explain what's going on as we go, so we'll briefly touch upon giving an
+explanation here.
+
+## How We Const Assert
+
+Alright, as it stands (2018-12-15), we can't use `if` in a `const` context.
+
+Since we can't use `if`, we can't use a normal `assert!`. Some day it will be
+possible, and a failed assert at compile time will be a compile error and a
+failed assert at run time will be a panic and we'll have a nice unified
+programming experience. We can add runtime-only assertions by being a little
+tricky with the compiler.
+
+If we write
+
+```rust
+const ASSERT: usize = 0 - 1;
+```
+
+that gives a warning, since the math would underflow. We can upgrade that
+warning to a hard error:
+
+```rust
+#[deny(const_err)]
+const ASSERT: usize = 0 - 1;
+```
+
+And to make our construction reusable we can enable the
+[underscore_const_names](https://github.com/rust-lang/rust/issues/54912) feature
+in our program (or library) and then give each such const an underscore for a
+name.
+
+```rust
+#![feature(underscore_const_names)]
+
+#[deny(const_err)]
+const _: usize = 0 - 1;
+```
+
+Now we wrap this in a macro where we give a `bool` expression as input. We
+negate the bool then cast it to a `usize`, meaning that `true` negates into
+`false`, which becomes `0usize`, and then there's no underflow error. Or if the
+input was `false`, it negates into `true`, then becomes `1usize`, and then the
+underflow error fires.
+
+```rust
+macro_rules! const_assert {
+  ($condition:expr) => {
+    #[deny(const_err)]
+    #[allow(dead_code)]
+    const ASSERT: usize = 0 - !$condition as usize;
+  }
+}
+```
+
+Technically, written like this, the expression can be anything with a
+`core::ops::Not` implementation that can also be `as` cast into `usize`. That's
+`bool`, but also basically all the other number types. Since we want to ensure
+that we get proper looking type errors when things go wrong, we can use
+`($condition && true)` to enforce that we get a `bool` (thanks to `Talchas` for
+that particular suggestion).
+
+```rust
+macro_rules! const_assert {
+  ($condition:expr) => {
+    #[deny(const_err)]
+    #[allow(dead_code)]
+    const _: usize = 0 - !($condition && true) as usize;
+  }
+}
+```
+
+## Asserting Something
+
+As an example of how we might use a `const_assert`, we'll do a demo with colors.
+There's a red, blue, and green channel. We store colors in a `u16` with 5 bits
+for each channel.
+
+```rust
+newtype! {
+  #[derive(Debug, Clone, Copy, PartialEq, Eq)]
+  Color, u16
+}
+```
+
+And when we're building a color, we're passing in `u16` values, but they could
+be using more than just 5 bits of space. We want to make sure that each channel
+is 31 or less, so we can make a color builder that does a `const_assert!` on the
+value of each channel.
+
+```rust
+macro_rules! rgb {
+  ($r:expr, $g:expr, $b:expr) => {
+    {
+      const_assert!($r <= 31);
+      const_assert!($g <= 31);
+      const_assert!($b <= 31);
+      Color($b << 10 | $g << 5 | $r)
+    }
+  }
+}
+```
+
+And then we can declare some colors
+
+```rust
+const RED: Color = rgb!(31, 0, 0);
+
+const BLUE: Color = rgb!(31, 500, 0);
+```
+
+The second one is clearly out of bounds and it fires an error just like we
+wanted.
--- a/book/src/02-concepts/00-index.md
+++ b/book/src/02-concepts/00-index.md
@ -1 +1,38 @@
 # Broad Concepts
+
+The GameBoy Advance sits in a middle place between the chthonic game consoles of
+the ancient past and the "small PC in a funny case" consoles of the modern age.
+
+On the one hand, yeah, you're gonna find a few strange conventions as you learn
+all the ropes.
+
+On the other, at least we're writing in Rust at all, and not having to do all
+the assembly by hand.
+
+This chapter for "concepts" has a section for each part of the GBA's hardware
+memory map, going by increasing order of base address value. The sections try to
+explain as much as possible while sticking to just the concerns you might have
+regarding that part of the memory map.
+
+For an assessment of how to wrangle all three parts of the video system (PALRAM,
+VRAM, and OAM), along with the correct IO registers, into something that shows a
+picture, you'll want the Video chapter.
+
+Similarly, the "IO Registers" part of the GBA actually controls how you interact
+with every single bit of hardware connected to the GBA. A full description of
+everything is obviously too much for just one section of the book. Instead you
+get an overview of general IO register rules and advice. Each particular
+register is described in the appropriate sections of either the Video or
+Non-Video chapters.
+
+## Bus Size
+
+TODO: describe this
+
+## Minimum Write Size
+
+TODO: talk about parts where you can't write one byte at a time
+
+## Volatile or Not?
+
+TODO: discuss what memory should be used volatile style and what can be used normal style.
--- a/book/src/02-concepts/02-bios.md
+++ b/book/src/02-concepts/02-bios.md
@ -1 +1,241 @@
 # BIOS
+
+* **Address Span:** `0x0` to `0x3FFF` (16k)
+
+The [BIOS](https://en.wikipedia.org/wiki/BIOS) of the GBA is a small read-only
+portion of memory at the very base of the address space. However, it is also
+hardware protected against reading, so if you try to read from BIOS memory when
+the program counter isn't pointed into the BIOS (eg: any time code _you_ write
+is executing) then you get [basically garbage
+data](https://problemkaputt.de/gbatek.htm#gbaunpredictablethings) back.
+
+So we're not going to spend time here talking about what bits to read or write
+within BIOS memory like we do with the other sections. Instead we're going to
+spend time talking about [inline
+assembly](https://doc.rust-lang.org/unstable-book/language-features/asm.html)
+([tracking issue](https://github.com/rust-lang/rust/issues/29722)) and then use
+it to call the [GBA BIOS
+Functions](https://problemkaputt.de/gbatek.htm#biosfunctions).
+
+Note that BIOS calls have _more overhead than normal function calls_, so don't
+go using them all over the place if you don't have to. They're also usually
+written more to be compact in terms of code than for raw speed, so you actually
+can out speed them in some cases. Between the increased overhead and not being
+as speed optimized, you can sometimes do a faster job without calling the BIOS
+at all. (TODO: investigate more about  what parts of the BIOS we could
+potentially offer faster alternatives for.)
+
+I'd like to take a moment to thank [Marc Brinkmann](https://github.com/mbr)
+(with contributions from [Oliver Schneider](https://github.com/oli-obk) and
+[Philipp Oppermann](https://github.com/phil-opp)) for writing [this blog
+post](http://embed.rs/articles/2016/arm-inline-assembly-rust/). It's at least
+ten times the tutorial quality as the `asm` entry in the Unstable Book has. In
+fairness to the Unstable Book, the actual spec of how inline ASM works in rust
+is "basically what clang does", and that's specified as "basically what GCC
+does", and that's basically/shockingly not specified much at all despite GCC
+being like 30 years old.
+
+So let's be slow and pedantic about this process.
+
+## Inline ASM
+
+**Fair Warning:** Inline asm is one of the least stable parts of Rust overall,
+and if you write bad things you can trigger internal compiler errors and panics
+and crashes and make LLVM choke and die without explanation. If you write some
+inline asm and then suddenly your program suddenly stops compiling without
+explanation, try commenting out that whole inline asm use and see if it's
+causing the problem. Double check that you've written every single part of the
+asm call absolutely correctly, etc, etc.
+
+**Bonus Warning:** The general information that follows regarding the asm macro
+is consistent from system to system, but specific information about register
+names, register quantities, asm instruction argument ordering, and so on is
+specific to ARM on the GBA. If you're programming for any other device you'll
+need to carefully investigate that before you begin.
+
+Now then, with those out of the way, the inline asm docs describe an asm call as
+looking like this:
+
+```rust
+asm!(assembly template
+   : output operands
+   : input operands
+   : clobbers
+   : options
+   );
+```
+
+And once you stick a lot of stuff in there it can _absolutely_ be hard to
+remember the ordering of the elements. So we'll start with a code block that
+has some comments thrown in on each line:
+
+```rust
+asm!(/* ASM */ TODO
+    :/* OUT */ TODO
+    :/* INP */ TODO
+    :/* CLO */ TODO
+    :/* OPT */
+);
+```
+
+Now we have to decide what we're gonna write. Obviously we're going to do some
+instructions, but those instructions use registers, and how are we gonna talk
+about them? We've got two choices.
+
+1) We can pick each and every register used by specifying exact register names.
+   In THUMB mode we have 8 registers available, named `r0` through `r7`. If you
+   switch into 32-bit mode there's additional registers that are also available.
+
+2) We can specify slots for registers we need and let LLVM decide. In this style
+   you name your slots `$0`, `$1` and so on. Slot numbers are assigned first to
+   all specified outputs, then to all specified inputs, in the order that you
+   list them.
+
+In the case of the GBA BIOS, each BIOS function has pre-designated input and
+output registers, so we will use the first style. If you use inline ASM in other
+parts of your code you're free to use the second style.
+
+### Assembly
+
+This is just one big string literal. You write out one instruction per line, and
+excess whitespace is ignored. You can also do comments within your assembly
+using `;` to start a comment that goes until the end of the line.
+
+Assembly convention doesn't consider it unreasonable to comment potentially as
+much as _every single line_ of asm that you write when you're getting used to
+things. Or even if you are used to things. This is cryptic stuff, there's a
+reason we avoid writing in it as much as possible.
+
+Remember that our Rust code is in 16-bit mode. You _can_ switch to 32-bit mode
+within your asm as long as you switch back by the time the block ends. Otherwise
+you'll have a bad time.
+
+### Outputs
+
+A comma separated list. Each entry looks like
+
+* `"constraint" (binding)`
+
+An output constraint starts with a symbol:
+
+* `=` for write only
+* `+` for reads and writes
+* `&` for for "early clobber", meaning that you'll write to this at some point
+  before all input values have been read. It prevents this register from being
+  assigned to an input register.
+
+Followed by _either_ the letter `r` (if you want LLVM to pick the register to
+use) or curly braces around a specific register (if you want to pick).
+
+* The binding can be any single 32-bit or smaller value.
+* If your binding has bit pattern requirements ("must be non-zero", etc) you are
+  responsible for upholding that.
+* If your binding type will try to `Drop` later then you are responsible for it
+  being in a fit state to do that.
+* The binding must be either a mutable binding or a binding that was
+  pre-declared but not yet assigned.
+
+Anything else is UB.
+
+### Inputs
+
+This is a similar comma separated list.
+
+* `"constraint" (binding)`
+
+An input constraint doesn't have the symbol prefix, you just pick either `r` or
+a named register with curly braces around it.
+
+* An input binding must be a single 32-bit or smaller value.
+* An input binding _should_ be a type that is `Copy` but this is not an absolute
+  requirement. Having the input be read is semantically similar to using
+  `core::ptr::read(&binding)` and forgetting the value when you're done.
+
+### Clobbers
+
+Sometimes your asm will touch registers other than the ones declared for input
+and output. 
+
+Clobbers are declared as a comma separated list of string literals naming
+specific registers. You don't use curly braces with clobbers.
+
+LLVM _needs_ to know this information. It can move things around to keep your
+data safe, but only if you tell it what's about to happen.
+
+Failure to define all of your clobbers can cause UB.
+
+### Options
+
+There's only one option we'd care to specify. That option is "volatile".
+
+Just like with a function call, LLVM will skip a block of asm if it doesn't see
+that any outputs from the asm were used later on. Nearly every single BIOS call
+(other than the math operations) will need to be marked as "volatile".
+
+### BIOS ASM
+
+* Inputs are always `r0`, `r1`, `r2`, and/or `r3`, depending on function.
+* Outputs are always zero or more of `r0`, `r1`, and `r3`.
+* Any of the output registers that aren't actually used should be marked as
+  clobbered.
+* All other registers are unaffected.
+
+All of the GBA BIOS calls are performed using the
+[swi](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0068b/BABFCEEG.html)
+instruction, combined with a value depending on what BIOS function you're trying
+to invoke. If you're in 16-bit code you use the value directly, and if you're in
+32-bit mode you shift the value up by 16 bits first.
+
+### Example BIOS Function: Division
+
+For our example we'll use the division function, because GBATEK gives very clear
+instructions on how each register is used with that one:
+
+```txt
+Signed Division, r0/r1.
+  r0  signed 32bit Number
+  r1  signed 32bit Denom
+Return:
+  r0  Number DIV Denom ;signed
+  r1  Number MOD Denom ;signed
+  r3  ABS (Number DIV Denom) ;unsigned
+For example, incoming -1234, 10 should return -123, -4, +123.
+The function usually gets caught in an endless loop upon division by zero.
+```
+
+The math folks tell me that the `r1` value should be properly called the
+"remainder" not the "modulus". We'll go with that for our function, doesn't hurt
+to use the correct names. Our Rust function has an assert against dividing by
+`0`, then we name some bindings _without_ giving them a value, we make the asm
+call, and then return what we got.
+
+```rust
+pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) {
+  assert!(denominator != 0);
+  let div_out: i32;
+  let rem_out: i32;
+  unsafe {
+    asm!(/* ASM */ "swi 0x06"
+        :/* OUT */ "={r0}"(div_out), "={r1}"(rem_out)
+        :/* INP */ "{r0}"(numerator), "{r1}"(denominator)
+        :/* CLO */ "r3"
+        :/* OPT */
+    );
+  }
+  (div_out, rem_out)
+}
+```
+
+I _hope_ this all makes sense by now.
+
+## Specific BIOS Functions
+
+For a full list of all the specific BIOS functions and their use you should
+check the `gba::bios` module within the `gba` crate. There's just so many of
+them that enumerating them all here wouldn't serve much purpose.
+
+Which is not to say that we'll never cover any BIOS functions in this book!
+Instead, we'll simply mention them when whenever they're relevent to the task at
+hand (such as controlling sound or waiting for vblank).
+
+//TODO: list/name all BIOS functions as well as what they relate to elsewhere.
--- a/book/src/02-concepts/03-wram.md
+++ b/book/src/02-concepts/03-wram.md
@ -1 +1,28 @@
 # Work RAM
+
+## External Work RAM (EWRAM)
+
+* **Address Span:** `0x2000000` to `0x203FFFF` (256k)
+
+This is a big pile of space, the use of which is up to each game. However, the
+external work ram has only a 16-bit bus (if you read/write a 32-bit value it
+silently breaks it up into two 16-bit operations) and also 2 wait cycles (extra
+CPU cycles that you have to expend _per 16-bit bus use_).
+
+It's most helpful to think of EWRAM as slower, distant memory, similar to the
+"heap" in a normal application. You can take the time to go store something
+within EWRAM, or to load it out of EWRAM, but if you've got several operations
+to do in a row and you're worried about time you should pull that value into
+local memory, work on your local copy, and then push it back out to EWRAM.
+
+## Internal Work RAM (IWRAM)
+
+* **Address Span:** `0x3000000` to `0x3007FFF` (32k)
+
+This is a smaller pile of space, but it has a 32-bit bus and no wait.
+
+By default, `0x3007F00` to `0x3007FFF` is reserved for interrupt and BIOS use.
+The rest of it is mostly up to you. The user's stack space starts at `0x3007F00`
+and proceeds _down_ from there. For best results you should probably start at
+`0x3000000` and then go upwards. Under normal use it's unlikely that the two
+memory regions will crash into each other.
--- a/book/src/02-concepts/04-io-registers.md
+++ b/book/src/02-concepts/04-io-registers.md
@ -1 +1,3 @@
 # IO Registers
+
+* **Address Span:** `0x400_0000` to `0x400_03FE`
--- a/book/src/02-concepts/05-palram.md
+++ b/book/src/02-concepts/05-palram.md
@ -1 +1,50 @@
-# Palette RAM
+# Palette RAM (PALRAM)
+
+* **Address Span:** `0x500_0000` to `0x500_03FF` (1k)
+
+Palette RAM has a 16-bit bus, which isn't really a problem because it
+conceptually just holds `u16` values. There's no automatic wait state, but if
+you try to access the same location that the display controller is accessing you
+get bumped by 1 cycle. Since the display controller can use the palette ram any
+number of times per scanline it's basically impossible to predict if you'll have
+to do a wait or not during VDraw. During VBlank you won't have any wait of
+course.
+
+PALRAM is among the memory where there's weirdness if you try to write just one
+byte: if you try to write just 1 byte, it writes that byte into _both_ parts of
+the larger 16-bit location. This doesn't really affect us much with PALRAM,
+because palette values are all supposed to be `u16` anyway.
+
+The palette memory actually contains not one, but _two_ sets of palettes. First
+there's 256 entries for the background palette data (starting at `0x5000000`),
+and then there's 256 entries for object palette data (starting at `0x5000200`).
+
+The GBA also has two modes for palette access: 8-bits-per-pixel (8bpp) and
+4-bits-per-pixel (4bpp).
+
+* In 8bpp mode an 8-bit palette index value within a background or sprite
+  simply indexes directly into the 256 slots for that type of thing.
+* In 4bpp mode a 4-bit palette index value within a background or sprite
+  specifies an index within a particular "palbank" (16 palette entries each),
+  and then a _separate_ setting outside of the graphical data determines which
+  palbank is to be used for that background or object (the screen entry data for
+  backgrounds, and the object attributes for objects).
+
+### Transparency
+
+When a pixel within a background or object specifies index 0 as its palette
+entry it is treated as a transparent pixel. This means that in 8bpp mode there's
+only 255 actual color options (0 being transparent), and in 4bpp mode there's
+only 15 actual color options available within each palbank (the 0th entry of
+_each_ palbank is transparent).
+
+Individual backgrounds, and individual objects, each determine if they're 4bpp
+or 8bpp separately, so a given overall palette slot might map to a used color in
+8bpp and an unused/transparent color in 4bpp. If you're a palette wizard.
+
+Palette slot 0 of the overall background palette is used to determine the
+"backdrop" color. That's the color you see if no background or object ends up
+being rendered within a given pixel.
+
+Since display mode 3 and display mode 5 don't use the palette, they cannot
+benefit from transparency.
--- a/book/src/02-concepts/06-vram.md
+++ b/book/src/02-concepts/06-vram.md
@ -1 +1,24 @@
-# Video RAM
+# Video RAM (VRAM)
+
+* **Address Span:** `0x600_0000` to `0x601_7FFF` (96k)
+
+We've used this before! VRAM has a 16-bit bus and no wait. However, the same as
+with PALRAM, the "you might have to wait if the display controller is looking at
+it" rule applies here.
+
+Unfortunately there's not much more exact detail that can be given about VRAM.
+The use of the memory depends on the video mode that you're using.
+
+One general detail of note is that you can't write individual bytes to any part
+of VRAM. Depending on mode and location, you'll either get your bytes doubled
+into both the upper and lower parts of the 16-bit location targeted, or you
+won't even affect the memory. This usually isn't a big deal, except in two
+situations:
+
+* In Mode 4, if you want to change just 1 pixel, you'll have to be very careful
+  to read the old `u16`, overwrite just the byte you wanted to change, and then
+  write that back.
+* In any display mode, avoid using `memcopy` to place things into VRAM.
+  It's written to be byte oriented, and only does 32-bit transfers under select
+  conditions. The rest of the time it'll copy one byte at a time and you'll get
+  either garbage or nothing at all.
--- a/book/src/02-concepts/07-oam.md
+++ b/book/src/02-concepts/07-oam.md
@ -1 +1,62 @@
-# Object Attribute Memory
+# Object Attribute Memory (OAM)
+
+* **Address Span:** `0x700_0000` to `0x700_03FF` (1k)
+
+The Object Attribute Memory has a 32-bit bus and no default wait, but suffers
+from the "you might have to wait if the display controller is looking at it"
+rule. You cannot write individual bytes to OAM at all, but that's not really a
+problem because all the fields of the data types within OAM are either `i16` or
+`u16` anyway.
+
+Object attribute memory is the wildest yet: it conceptually contains two types
+of things, but they're _interlaced_ with each other all the way through.
+
+Now, [GBATEK](http://problemkaputt.de/gbatek.htm#lcdobjoamattributes) and
+[CowByte](https://www.cs.rit.edu/~tjh8300/CowBite/CowBiteSpec.htm#OAM%20(sprites))
+doesn't quite give names to the two data types here.
+[TONC](https://www.coranac.com/tonc/text/regobj.htm#sec-oam) calls them
+`OBJ_ATTR` and `OBJ_AFFINE`, but we'll be giving them names fitting with the
+Rust naming convention. Just know that if you try to talk about it with others
+they might not be using the same names. In Rust terms their layout would look
+like this:
+
+```rust
+#[repr(C)]
+pub struct ObjectAttributes {
+  attr0: u16,
+  attr1: u16,
+  attr2: u16,
+  filler: i16,
+}
+
+#[repr(C)]
+pub struct AffineMatrix {
+  filler0: [u16; 3],
+  pa: i16,
+  filler1: [u16; 3],
+  pb: i16,
+  filler2: [u16; 3],
+  pc: i16,
+  filler3: [u16; 3],
+  pd: i16,
+}
+```
+
+(Note: the `#[repr(C)]` part just means that Rust must lay out the data exactly
+in the order we specify, which otherwise it is not required to do).
+
+So, we've got 1024 bytes in OAM and each `ObjectAttributes` value is 8 bytes, so
+naturally we can support up to 128 objects.
+
+_At the same time_, we've got 1024 bytes in OAM and each `AffineMatrix` is 32
+bytes, so we can have 32 of them.
+
+But, as I said, these things are all _interlaced_ with each other. See how
+there's "filler" fields in each struct? If we imagine the OAM as being just an
+array of one type or the other, indexes 0/1/2/3 of the `ObjectAttributes` array
+would line up with index 0 of the `AffineMatrix` array. It's kinda weird, but
+that's just how it works. When we setup functions to read and write these values
+we'll have to be careful with how we do it. We probably _won't_ want to use
+those representations above, at least not with the `AffineMatrix` type, because
+they're quite wasteful if you want to store just object attributes or just
+affine matrices.
--- a/book/src/02-concepts/08-rom.md
+++ b/book/src/02-concepts/08-rom.md
@ -1 +1,14 @@
-# Game Pak ROM / Flash ROM
+# Game Pak ROM / Flash ROM (ROM)
+
+* **Address Span (Wait State 0):** `0x800_0000` to `0x9FF_FFFF`
+* **Address Span (Wait State 1):** `0xA00_0000` to `0xBFF_FFFF`
+* **Address Span (Wait State 2):** `0xC00_0000` to `0xDFF_FFFF`
+
+The game's ROM data is a single set of data that's up to 32 megabytes in size.
+However, that data is mirrored to three different locations in the address
+space. Depending on which part of the address space you use, it can affect the
+memory timings involved.
+
+TODO: describe `WAITCNT` here, we won't get a better chance at it.
+
+TODO: discuss THUMB vs ARM code and why THUMB is so much faster (because ROM is a 16-bit bus)
--- a/book/src/02-concepts/09-sram.md
+++ b/book/src/02-concepts/09-sram.md
@ -1 +1,16 @@
-# Save RAM
+# Save RAM (SRAM)
+
+* **Address Span:** `0xE00_0000` to `0xE00FFFF` (64k)
+
+The actual amount of SRAM available depends on your game pak, and the 64k figure
+is simply the maximum possible. A particular game pak might have less, and an
+emulator will likely let you have all 64k if you want.
+
+As with other portions of the address space, SRAM has some number of wait cycles
+per use. As with ROM, you can change the wait cycle settings via the `WAITCNT`
+register if the defaults don't work well for your game pak. See the ROM section
+for full details of how the `WAITCNT` register works.
+
+The game pak SRAM also has only an 8-bit bus, so have fun with that.
+
+The GBA Direct Memory Access (DMA) unit cannot access SRAM.
--- a/book/src/03-video/00-index.md
+++ b/book/src/03-video/00-index.md
@ -1 +1,9 @@
 # Video
+
+GBA Video starts with an IO register called the "Display Control Register", and
+then spirals out from there. You generally have to use Palette RAM (PALRAM),
+Video RAM (VRAM), Object Attribute Memory (OAM), as well as any number of other
+IO registers.
+
+They all have to work together just right, and there's a lot going on when you
+first try doing it, so try to take it very slowly as you're learning each step.
--- a/book/src/04-non-video/00-index.md
+++ b/book/src/04-non-video/00-index.md
@ -1 +1,21 @@
 # Non-Video
+
+Besides video effects the GBA still has an okay amount of stuff going on.
+
+Obviously you'll want to know how to read the user's button inputs. That can
+almost go without saying, except that I said it.
+
+Each other part can be handled in about any order you like.
+
+Using interrupts is perhaps one of the hardest things for us as Rust programmers
+due to quirks in our compilation process. Our code all gets compiled to 16-bit
+THUMB instructions, and we don't have a way to mark a function to be compiled
+using 32-bit ASM instructions instead. However, an interrupt handler _must_ be
+written in 32-bit ASM instructions for it to work. That means that we have to
+write our interrupt handler in 32-bit ASM by hand. We'll do it, but I don't
+think we'll be too happy about it.
+
+The Link Cable related stuff is also probably a little harder to test than
+anything else. Just because link cable emulation isn't always the best, and or
+you need two GBAs with two flash carts and the cable for hardware testing.
+Still, we'll try to go over it eventually.
--- a/book/src/04-non-video/01-buttons.md
+++ b/book/src/04-non-video/01-buttons.md
@ -1 +1,5 @@
 # Buttons
+
+It's all well and good to just show a picture, even to show an animation, but if
+we want a game we have to let the user interact with something.
+
--- a/book/src/04-non-video/06-link_cable.md
+++ b/book/src/04-non-video/06-link_cable.md
@ -0,0 +1 @@
+# Link Cable
--- a/book/src/04-non-video/06-network.md
+++ b/book/src/04-non-video/06-network.md
@ -1 +0,0 @@
-# Network
--- a/book/src/SUMMARY.md
+++ b/book/src/SUMMARY.md
@ -12,6 +12,7 @@
  * [Fixed Only](01-quirks/02-fixed_only.md)
  * [Volatile Destination](01-quirks/03-volatile_destination.md)
  * [Newtype](01-quirks/04-newtype.md)
+  * [Const Asserts](01-quirks/05-const_asserts.md)
 * [Concepts](02-concepts/00-index.md)
  * [CPU](02-concepts/01-cpu.md)
  * [BIOS](02-concepts/02-bios.md)
@ -31,7 +32,7 @@
  * [Direct Memory Access](04-non-video/03-dma.md)
  * [Sound](04-non-video/04-sound.md)
  * [Interrupts](04-non-video/05-interrupts.md)
-  * [Network](04-non-video/06-network.md)
+  * [Link Cable](04-non-video/06-link_cable.md)
  * [Game Pak](04-non-video/07-game_pak.md)
 * [Examples](05-examples/00-index.md)
  * [hello_magic](05-examples/01-hello_magic.md)
--- a/examples/bg_demo.rs
+++ b/examples/bg_demo.rs
@ -1,5 +1,5 @@
-#![feature(start)]
 #![no_std]
+#![feature(start)]

 #[panic_handler]
 fn panic(_info: &core::panic::PanicInfo) -> ! {
--- a/examples/hello_world.rs
+++ b/examples/hello_world.rs
@ -1,22 +1,37 @@
-#![feature(start)]
 #![no_std]
+#![feature(start)]
+#![feature(underscore_const_names)]

-#[panic_handler]
-fn panic(_info: &core::panic::PanicInfo) -> ! {
-  loop {}
+#[macro_export]
+macro_rules! newtype {
+  ($(#[$attr:meta])* $new_name:ident, $old_name:ident) => {
+    $(#[$attr])*
+    #[repr(transparent)]
+    pub struct $new_name($old_name);
+  };
 }

-#[start]
-fn main(_argc: isize, _argv: *const *const u8) -> isize {
-  unsafe {
-    DISPCNT.write(MODE3 | BG2);
-    mode3_pixel(120, 80, rgb16(31, 0, 0));
-    mode3_pixel(136, 80, rgb16(0, 31, 0));
-    mode3_pixel(120, 96, rgb16(0, 0, 31));
-    loop {}
-  }
+#[macro_export]
+macro_rules! const_assert {
+  ($condition:expr) => {
+    #[deny(const_err)]
+    #[allow(dead_code)]
+    const _: usize = 0 - !$condition as usize;
+  };
 }

+/// Constructs an RGB value with a `const_assert!` that the input is in range.
+#[macro_export]
+macro_rules! const_rgb {
+  ($r:expr, $g:expr, $b:expr) => {{
+    const_assert!($r <= 31);
+    const_assert!($g <= 31);
+    const_assert!($b <= 31);
+    Color::new($r, $g, $b)
+  }};
+}
+
+// TODO: kill this
 #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
 #[repr(transparent)]
 pub struct VolatilePtr<T>(pub *mut T);
@ -32,17 +47,50 @@ impl<T> VolatilePtr<T> {
  }
 }

-pub const DISPCNT: VolatilePtr<u16> = VolatilePtr(0x04000000 as *mut u16);
-pub const MODE3: u16 = 3;
-pub const BG2: u16 = 0b100_0000_0000;
-
-pub const VRAM: usize = 0x06000000;
-pub const SCREEN_WIDTH: isize = 240;
-
-pub const fn rgb16(red: u16, green: u16, blue: u16) -> u16 {
-  blue << 10 | green << 5 | red
+newtype! {
+  #[derive(Debug, Clone, Copy, PartialEq, Eq)]
+  Color, u16
 }

-pub unsafe fn mode3_pixel(col: isize, row: isize, color: u16) {
-  VolatilePtr(VRAM as *mut u16).offset(col + row * SCREEN_WIDTH).write(color);
+impl Color {
+  /// Combines the Red, Blue, and Green provided into a single color value.
+  pub const fn new(red: u16, green: u16, blue: u16) -> Color {
+    Color(blue << 10 | green << 5 | red)
+  }
+}
+
+newtype! {
+  #[derive(Debug, Clone, Copy, PartialEq, Eq)]
+  DisplayControlSetting, u16
+}
+
+pub const DISPLAY_CONTROL: VolatilePtr<DisplayControlSetting> = VolatilePtr(0x0400_0000 as *mut DisplayControlSetting);
+pub const JUST_MODE3: DisplayControlSetting = DisplayControlSetting(3);
+pub const JUST_BG2: DisplayControlSetting = DisplayControlSetting(0b100_0000_0000);
+pub const JUST_MODE3_AND_BG2: DisplayControlSetting = DisplayControlSetting(JUST_MODE3.0 | JUST_BG2.0);
+
+pub struct Mode3;
+impl Mode3 {
+  const SCREEN_WIDTH: isize = 240;
+  const PIXELS: VolatilePtr<Color> = VolatilePtr(0x600_0000 as *mut Color);
+
+  pub unsafe fn draw_pixel_unchecked(col: isize, row: isize, color: Color) {
+    Self::PIXELS.offset(col + row * Self::SCREEN_WIDTH).write(color);
+  }
+}
+
+#[panic_handler]
+fn panic(_info: &core::panic::PanicInfo) -> ! {
+  loop {}
+}
+
+#[start]
+fn main(_argc: isize, _argv: *const *const u8) -> isize {
+  unsafe {
+    DISPLAY_CONTROL.write(JUST_MODE3_AND_BG2);
+    Mode3::draw_pixel_unchecked(120, 80, const_rgb!(31, 0, 0));
+    Mode3::draw_pixel_unchecked(136, 80, const_rgb!(0, 31, 0));
+    Mode3::draw_pixel_unchecked(120, 96, const_rgb!(0, 0, 31));
+    loop {}
+  }
 }
--- a/examples/light_cycle.rs
+++ b/examples/light_cycle.rs
@ -1,5 +1,5 @@
-#![feature(start)]
 #![no_std]
+#![feature(start)]

 #[panic_handler]
 fn panic(_info: &core::panic::PanicInfo) -> ! {
--- a/src/bios.rs
+++ b/src/bios.rs
@ -0,0 +1,517 @@
+//! This module contains wrappers for all GBA BIOS function calls.
+//!
+//! A GBA BIOS call has significantly more overhead than a normal function call,
+//! so think carefully before using them too much.
+//!
+//! The actual content of each function here is generally a single inline asm
+//! instruction to invoke the correct BIOS function (`swi x`, with `x` being
+//! whatever value is necessary for that function). Some functions also perform
+//! necessary checks to save you from yourself, such as not dividing by zero.
+
+//TODO: ALL functions in this module should have `if cfg!(test)` blocks. The
+//functions that never return must panic, the functions that return nothing
+//should just do so, and the math functions should just return the correct math
+//I guess.
+
+/// (`swi 0x00`) SoftReset the device.
+///
+/// This function does not ever return.
+///
+/// Instead, it clears the top `0x200` bytes of IWRAM (containing stacks, and
+/// BIOS IRQ vector/flags), re-initializes the system, supervisor, and irq stack
+/// pointers (new values listed below), sets `r0` through `r12`, `LR_svc`,
+/// `SPSR_svc`, `LR_irq`, and `SPSR_irq` to zero, and enters system mode. The
+/// return address is loaded into `r14` and then the function jumps there with
+/// `bx r14`.
+///
+/// * sp_svc: `0x300_7FE0`
+/// * sp_irq: `0x300_7FA0`
+/// * sp_sys: `0x300_7F00`
+/// * Zero-filled Area: `0x300_7E00` to `0x300_7FFF`
+/// * Return Address: Depends on the 8-bit flag value at `0x300_7FFA`. In either
+///   case execution proceeds in ARM mode.
+///   * zero flag: `0x800_0000` (ROM), which for our builds means that the
+///     `crt0` program to execute (just like with a fresh boot), and then
+///     control passes into `main` and so on.
+///   * non-zero flag: `0x200_0000` (RAM), This is where a multiboot image would
+///     go if you were doing a multiboot thing. However, this project doesn't
+///     support multiboot at the moment. You'd need an entirely different build
+///     pipeline because there's differences in header format and things like
+///     that. Perhaps someday, but probably not even then. Submit the PR for it
+///     if you like!
+///
+/// ## Safety
+///
+/// This functions isn't ever unsafe to the current iteration of the program.
+/// However, because not all memory is fully cleared you theoretically could
+/// threaten the _next_ iteration of the program that runs. I'm _fairly_
+/// convinced that you can't actually use this to force purely safe code to
+/// perform UB, but such a scenario might exist.
+#[inline(always)]
+pub unsafe fn soft_reset() -> ! {
+  asm!(/* ASM */ "swi 0x00"
+      :/* OUT */ // none
+      :/* INP */ // none
+      :/* CLO */ // none
+      :/* OPT */ "volatile"
+  );
+  core::hint::unreachable_unchecked()
+}
+
+/// (`swi 0x01`) RegisterRamReset.
+///
+/// Clears the portions of memory given by the `flags` value, sets the Display
+/// Control Register to `0x80` (forced blank and nothing else), then returns.
+///
+/// * Flag bits:
+///   0) Clears the 256k of EWRAM (don't use if this is where your function call
+///      will return to!)
+///   1) Clears the 32k of IWRAM _excluding_ the last `0x200` bytes (see also:
+///      the `soft_reset` function)
+///   2) Clears all Palette data
+///   3) Clears all VRAM
+///   4) Clears all OAM (reminder: a zeroed object isn't disabled!)
+///   5) Reset SIO registers (resets them to general purpose mode)
+///   6) Reset Sound registers
+///   7) Reset all IO registers _other than_ SIO and Sound
+///
+/// **Bug:** The LSB of `SIODATA32` is always zeroed, even if bit 5 was not
+/// enabled. This is sadly a bug in the design of the GBA itself.
+///
+/// ## Safety
+///
+/// It is generally a safe operation to suddenly clear any part of the GBA's
+/// memory, except in the case that you were executing out of EWRAM and clear
+/// that. If you do then you return to nothing and have a bad time.
+#[inline(always)]
+pub unsafe fn register_ram_reset(flags: u8) {
+  asm!(/* ASM */ "swi 0x01"
+      :/* OUT */ // none
+      :/* INP */ "{r0}"(flags)
+      :/* CLO */ // none
+      :/* OPT */ "volatile"
+  );
+}
+//TODO(lokathor): newtype this flag business.
+
+/// (`swi 0x02`) Halts the CPU until an interrupt occurs.
+///
+/// Components _other than_ the CPU continue to function. Halt mode ends when
+/// any enabled interrupt triggers.
+#[inline(always)]
+pub fn halt() {
+  unsafe {
+    asm!(/* ASM */ "swi 0x02"
+        :/* OUT */ // none
+        :/* INP */ // none
+        :/* CLO */ // none
+        :/* OPT */ "volatile"
+    );
+  }
+}
+
+/// (`swi 0x03`) Stops the CPU as well as most other components.
+///
+/// Stop mode must be stopped by an interrupt, but can _only_ be stopped by a
+/// Keypad, Game Pak, or General-Purpose-SIO interrupt.
+///
+/// Before going into stop mode you should manually disable video and sound (or
+/// they will continue to consume power), and you should also disable any other
+/// optional externals such as rumble and infra-red.
+#[inline(always)]
+pub fn stop() {
+  unsafe {
+    asm!(/* ASM */ "swi 0x03"
+        :/* OUT */ // none
+        :/* INP */ // none
+        :/* CLO */ // none
+        :/* OPT */ "volatile"
+    );
+  }
+}
+
+/// (`swi 0x04`) "IntrWait", similar to halt but with more options.
+///
+/// * The first argument controls if you want to ignore all current flags and
+///   wait until a new flag is set.
+/// * The second argument is what flags you're waiting on (same format as the
+///   IE/IF registers).
+///
+/// If you're trying to handle more than one interrupt at once this has less
+/// overhead than calling `halt` over and over.
+///
+/// When using this routing your interrupt handler MUST update the BIOS
+/// Interrupt Flags `0x300_7FF8` in addition to the usual interrupt
+/// acknowledgement.
+#[inline(always)]
+pub fn interrupt_wait(ignore_current_flags: bool, target_flags: u16) {
+  unsafe {
+    asm!(/* ASM */ "swi 0x04"
+        :/* OUT */ // none
+        :/* INP */ "{r0}"(ignore_current_flags), "{r1}"(target_flags)
+        :/* CLO */ // none
+        :/* OPT */ "volatile"
+    );
+  }
+}
+//TODO(lokathor): newtype this flag business.
+
+/// (`swi 0x05`) "VBlankIntrWait", VBlank Interrupt Wait.
+///
+/// This is as per `interrupt_wait(true, 1)` (aka "wait for a new vblank"). You
+/// must follow the same guidelines that `interrupt_wait` outlines.
+#[inline(always)]
+pub fn vblank_interrupt_wait() {
+  unsafe {
+    asm!(/* ASM */ "swi 0x04"
+        :/* OUT */ // none
+        :/* INP */ // none
+        :/* CLO */ "r0", "r1" // both set to 1 by the routine
+        :/* OPT */ "volatile"
+    );
+  }
+}
+
+/// (`swi 0x06`) Software Division and Remainder.
+///
+/// ## Panics
+///
+/// If the denominator is 0.
+#[inline(always)]
+pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) {
+  assert!(denominator != 0);
+  if cfg!(test) {
+    (numerator / denominator, numerator % denominator)
+  } else {
+    let div_out: i32;
+    let rem_out: i32;
+    unsafe {
+      asm!(/* ASM */ "swi 0x06"
+          :/* OUT */ "={r0}"(div_out), "={r1}"(rem_out)
+          :/* INP */ "{r0}"(numerator), "{r1}"(denominator)
+          :/* CLO */ "r3"
+          :/* OPT */
+      );
+    }
+    (div_out, rem_out)
+  }
+}
+
+/// As `div_rem`, keeping only the `div` output.
+#[inline(always)]
+pub fn div(numerator: i32, denominator: i32) -> i32 {
+  div_rem(numerator, denominator).0
+}
+
+/// As `div_rem`, keeping only the `rem` output.
+#[inline(always)]
+pub fn rem(numerator: i32, denominator: i32) -> i32 {
+  div_rem(numerator, denominator).1
+}
+
+// (`swi 0x07`): We deliberately don't implement this one. It's the same as DIV
+// but with reversed arguments, so it just runs 3 cycles slower as it does the
+// swap.
+
+/// (`swi 0x08`) Integer square root.
+///
+/// If you want more fractional precision, you can shift your input to the left
+/// by `2n` bits to get `n` more bits of fractional precision in your output.
+#[inline(always)]
+pub fn sqrt(val: u32) -> u16 {
+  let out: u16;
+  unsafe {
+    asm!(/* ASM */ "swi 0x08"
+        :/* OUT */ "={r0}"(out)
+        :/* INP */ "{r0}"(val)
+        :/* CLO */ "r1", "r3"
+        :/* OPT */
+    );
+  }
+  out
+}
+
+/// (`swi 0x09`) Gives the arctangent of `theta`.
+///
+/// The input format is 1 bit for sign, 1 bit for integral part, 14 bits for
+/// fractional part.
+///
+/// Accuracy suffers if `theta` is less than `-pi/4` or greater than `pi/4`.
+#[inline(always)]
+pub fn atan(theta: i16) -> i16 {
+  let out: i16;
+  unsafe {
+    asm!(/* ASM */ "swi 0x09"
+        :/* OUT */ "={r0}"(out)
+        :/* INP */ "{r0}"(theta)
+        :/* CLO */ "r1", "r3"
+        :/* OPT */
+    );
+  }
+  out
+}
+
+/// (`swi 0x0A`) Gives the atan2 of `y` over `x`.
+///
+/// The output `theta` value maps into the range `[0, 2pi)`, or `0 .. 2pi` if
+/// you prefer Rust's range notation.
+///
+/// `y` and `x` use the same format as with `atan`: 1 bit for sign, 1 bit for
+/// integral, 14 bits for fractional.
+#[inline(always)]
+pub fn atan2(y: i16, x: i16) -> u16 {
+  let out: u16;
+  unsafe {
+    asm!(/* ASM */ "swi 0x0A"
+        :/* OUT */ "={r0}"(out)
+        :/* INP */ "{r0}"(x), "{r1}"(y)
+        :/* CLO */ "r3"
+        :/* OPT */
+    );
+  }
+  out
+}
+
+/// (`swi 0x0B`) "CpuSet", `u16` memory copy.
+///
+/// * `count` is the number of `u16` values to copy (20 bits or less)
+/// * `fixed_source` argument, if true, turns this copying routine into a
+///   filling routine.
+///
+/// ## Safety
+///
+/// * Both pointers must be aligned
+#[inline(always)]
+pub unsafe fn cpu_set16(src: *const u16, dest: *mut u16, count: u32, fixed_source: bool) {
+  let control = count + ((fixed_source as u32) << 24);
+  asm!(/* ASM */ "swi 0x0B"
+      :/* OUT */ // none
+      :/* INP */ "{r0}"(src), "{r1}"(dest), "{r2}"(control)
+      :/* CLO */ // none
+      :/* OPT */ "volatile"
+  );
+}
+
+/// (`swi 0x0B`) "CpuSet", `u32`  memory copy/fill.
+///
+/// * `count` is the number of `u32` values to copy (20 bits or less)
+/// * `fixed_source` argument, if true, turns this copying routine into a
+///   filling routine.
+///
+/// ## Safety
+///
+/// * Both pointers must be aligned
+#[inline(always)]
+pub unsafe fn cpu_set32(src: *const u32, dest: *mut u32, count: u32, fixed_source: bool) {
+  let control = count + ((fixed_source as u32) << 24) + (1 << 26);
+  asm!(/* ASM */ "swi 0x0B"
+      :/* OUT */ // none
+      :/* INP */ "{r0}"(src), "{r1}"(dest), "{r2}"(control)
+      :/* CLO */ // none
+      :/* OPT */ "volatile"
+  );
+}
+
+/// (`swi 0x0C`) "CpuFastSet", copies memory in 32 byte chunks.
+///
+/// * The `count` value is the number of `u32` values to transfer (20 bits or
+///   less), and it's rounded up to the nearest multiple of 8 words.
+/// * The `fixed_source` argument, if true, turns this copying routine into a
+///   filling routine.
+///
+/// ## Safety
+///
+/// * Both pointers must be aligned
+#[inline(always)]
+pub unsafe fn cpu_fast_set(src: *const u32, dest: *mut u32, count: u32, fixed_source: bool) {
+  let control = count + ((fixed_source as u32) << 24);
+  asm!(/* ASM */ "swi 0x0C"
+      :/* OUT */ // none
+      :/* INP */ "{r0}"(src), "{r1}"(dest), "{r2}"(control)
+      :/* CLO */ // none
+      :/* OPT */ "volatile"
+  );
+}
+
+/// (`swi 0x0C`) "GetBiosChecksum" (Undocumented)
+///
+/// Though we usually don't cover undocumented functionality, this one can make
+/// it into the crate.
+///
+/// The function computes the checksum of the BIOS data. You should get either
+/// `0xBAAE_187F` (GBA / GBA SP) or `0xBAAE_1880` (DS in GBA mode). If you get
+/// some other value I guess you're probably running on an emulator that just
+/// broke the fourth wall.
+pub fn get_bios_checksum() -> u32 {
+  let out: u32;
+  unsafe {
+    asm!(/* ASM */ "swi 0x0D"
+        :/* OUT */ "={r0}"(out)
+        :/* INP */ // none
+        :/* CLO */ // none
+        :/* OPT */ // none
+    );
+  }
+  out
+}
+
+// TODO: these things will require that we build special structs
+
+//BgAffineSet
+//ObjAffineSet
+//BitUnPack
+//LZ77UnCompReadNormalWrite8bit
+//LZ77UnCompReadNormalWrite16bit
+//HuffUnCompReadNormal
+//RLUnCompReadNormalWrite8bit
+//Diff8bitUnFilterWrite8bit
+//Diff8bitUnFilterWrite16bit
+//Diff16bitUnFilter
+
+/// (`swi 0x19`) "SoundBias", adjusts the volume level to a new level.
+///
+/// This increases or decreases the current level of the `SOUNDBIAS` register
+/// (with short delays) until at the new target level. The upper bits of the
+/// register are unaffected.
+///
+/// The final sound level setting will be `level` * `0x200`.
+pub fn sound_bias(level: u32) {
+  unsafe {
+    asm!(/* ASM */ "swi 0x19"
+        :/* OUT */ // none
+        :/* INP */ "{r0}"(level)
+        :/* CLO */ // none
+        :/* OPT */ "volatile"
+    );
+  }
+}
+
+//SoundDriverInit
+
+/// (`swi 0x1B`) "SoundDriverMode", sets the sound driver operation mode.
+///
+/// The `mode` input uses the following flags and bits:
+///
+/// * Bits 0-6: Reverb value
+/// * Bit 7: Reverb Enable
+/// * Bits 8-11: Simultaneously-produced channel count (default=8)
+/// * Bits 12-15: Master Volume (1-15, default=15)
+/// * Bits 16-19: Playback Frequency Index (see below, default=4)
+/// * Bits 20-23: "Final number of D/A converter bits (8-11 = 9-6bits, def. 9=8bits)" TODO: what the hek?
+/// * Bits 24 and up: Not used
+///
+/// The frequency index selects a frequency from the following array:
+/// * 0: 5734
+/// * 1: 7884
+/// * 2: 10512
+/// * 3: 13379
+/// * 4: 15768
+/// * 5: 18157
+/// * 6: 21024
+/// * 7: 26758
+/// * 8: 31536
+/// * 9: 36314
+/// * 10: 40137
+/// * 11: 42048
+pub fn sound_driver_mode(mode: u32) {
+  unsafe {
+    asm!(/* ASM */ "swi 0x1B"
+        :/* OUT */ // none
+        :/* INP */ "{r0}"(mode)
+        :/* CLO */ // none
+        :/* OPT */ "volatile"
+    );
+  }
+}
+//TODO(lokathor): newtype this mode business.
+
+/// (`swi 0x1C`) "SoundDriverMain", main of the sound driver
+///
+/// You should call `SoundDriverVSync` immediately after the vblank interrupt
+/// fires.
+///
+/// "After that, this routine is called after BG and OBJ processing is
+/// executed." --what?
+#[inline(always)]
+pub fn sound_driver_main() {
+  unsafe {
+    asm!(/* ASM */ "swi 0x1C"
+        :/* OUT */ // none
+        :/* INP */ // none
+        :/* CLO */ // none
+        :/* OPT */ "volatile"
+    );
+  }
+}
+
+/// (`swi 0x1D`) "SoundDriverVSync", resets the sound DMA.
+///
+/// The timing is critical, so you should call this _immediately_ after the
+/// vblank interrupt (every 1/60th of a second).
+#[inline(always)]
+pub fn sound_driver_vsync() {
+  unsafe {
+    asm!(/* ASM */ "swi 0x1D"
+        :/* OUT */ // none
+        :/* INP */ // none
+        :/* CLO */ // none
+        :/* OPT */ "volatile"
+    );
+  }
+}
+
+/// (`swi 0x1E`) "SoundChannelClear", clears the direct sound channels and stops
+/// the sound.
+///
+/// "This function may not operate properly when the library which expands the
+/// sound driver feature is combined afterwards. In this case, do not use it."
+/// --what?
+#[inline(always)]
+pub fn sound_channel_clear() {
+  unsafe {
+    asm!(/* ASM */ "swi 0x1E"
+        :/* OUT */ // none
+        :/* INP */ // none
+        :/* CLO */ // none
+        :/* OPT */ "volatile"
+    );
+  }
+}
+
+//MidiKey2Freq
+//MultiBoot
+
+/// (`swi 0x28`) "SoundDriverVSyncOff", disables sound
+///
+/// If you can't use vblank interrupts to ensure that `sound_driver_vsync` is
+/// called every 1/60th of a second for any reason you must use this function to
+/// stop sound DMA. Otherwise the DMA will overrun its buffer and cause random
+/// noise.
+#[inline(always)]
+pub fn sound_driver_vsync_off() {
+  unsafe {
+    asm!(/* ASM */ "swi 0x28"
+        :/* OUT */ // none
+        :/* INP */ // none
+        :/* CLO */ // none
+        :/* OPT */ "volatile"
+    );
+  }
+}
+
+/// (`swi 0x29`) "SoundDriverVSyncOn", enables sound that was stopped by
+/// `sound_driver_vsync_off`.
+///
+/// Restarts sound DMA system. After restarting the sound you must have a vblank
+/// interrupt followed by a `sound_driver_vsync` within 2/60th of a second.
+#[inline(always)]
+pub fn sound_driver_vsync_on() {
+  unsafe {
+    asm!(/* ASM */ "swi 0x29"
+        :/* OUT */ // none
+        :/* INP */ // none
+        :/* CLO */ // none
+        :/* OPT */ "volatile"
+    );
+  }
+}
--- a/src/builtins.rs
+++ b/src/builtins.rs
@ -0,0 +1,77 @@
+#![allow(missing_docs)]
+
+//! The module to provide "builtin" functions that LLVM expects.
+//!
+//! You shouldn't need to call anything in here yourself, it just has to be in
+//! the translation unit and LLVM will find it.
+
+#[no_mangle]
+#[cfg(any(target_pointer_width = "16", target_pointer_width = "32", target_pointer_width = "64"))]
+pub extern "C" fn __clzsi2(mut x: usize) -> usize {
+  // TODO: const this? Requires const if
+  let mut y: usize;
+  let mut n: usize = {
+    #[cfg(target_pointer_width = "64")]
+    {
+      64
+    }
+    #[cfg(target_pointer_width = "32")]
+    {
+      32
+    }
+    #[cfg(target_pointer_width = "16")]
+    {
+      16
+    }
+  };
+  #[cfg(target_pointer_width = "64")]
+  {
+    y = x >> 32;
+    if y != 0 {
+      n -= 32;
+      x = y;
+    }
+  }
+  #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))]
+  {
+    y = x >> 16;
+    if y != 0 {
+      n -= 16;
+      x = y;
+    }
+  }
+  y = x >> 8;
+  if y != 0 {
+    n -= 8;
+    x = y;
+  }
+  y = x >> 4;
+  if y != 0 {
+    n -= 4;
+    x = y;
+  }
+  y = x >> 2;
+  if y != 0 {
+    n -= 2;
+    x = y;
+  }
+  y = x >> 1;
+  if y != 0 {
+    n - 2
+  } else {
+    n - x
+  }
+}
+
+#[test]
+fn __clzsi2_test() {
+  let mut i = 1 << 63;
+  while i > 0 {
+    assert_eq!(__clzsi2(i), i.leading_zeros() as usize);
+    i >>= 1;
+  }
+}
+
+// TODO: add some shims
+// #[no_mangle] extern "aapcs" fn __aeabi_uidiv(num: u32: denom: u32) -> u32
+// #[no_mangle] extern "aapcs" fn __aeabi_idiv(num: i32: denom: i32) -> u32
--- a/src/core_extras.rs
+++ b/src/core_extras.rs
@ -1,39 +1,301 @@
 //! Things that I wish were in core, but aren't.

-/// A simple wrapper for any `*mut T` to adjust the basic operations.
+//TODO(Lokathor): reorganize as gba::core_extras::fixed_point and gba::core_extras::volatile ?
+
+use core::{cmp::Ordering, iter::FusedIterator, marker::PhantomData, num::NonZeroUsize};
+
+/// Abstracts the use of a volatile hardware address.
 ///
-/// Read and Write are made to be volatile. Offset is made to be
-/// wrapping_offset. This makes it much easier to correctly work with IO
-/// Registers and all display related memory on the GBA.
-#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)]
+/// If you're trying to do anything other than abstract a volatile hardware
+/// device then you _do not want to use this type_. Use one of the many other
+/// smart pointer types.
+///
+/// A volatile address doesn't store a value in the normal way: It maps to some
+/// real hardware _other than_ RAM, and that hardware might have any sort of
+/// strange rules. The specifics of reading and writing depend on the hardware
+/// being mapped. For example, a particular address might be read only (ignoring
+/// writes), write only (returning some arbitrary value if you read it),
+/// "normal" read write (where you read back what you wrote), or some complex
+/// read-write situation where writes have an effect but you _don't_ read back
+/// what you wrote.
+///
+/// As you imagine it can be very unsafe. The design of this type is set up so
+/// that _creation_ is unsafe, and _use_ is safe. This gives an optimal
+/// experience, since you'll use memory locations a lot more often than you try
+/// to name them, on average.
+///
+/// `VolAddress` is _not_ a thread safe type. If your device is multi-threaded
+/// then you must arrange for synchronization in some other way. A `VolAddress`
+/// _can_ be used to share data between an interrupt running on a core and a
+/// thread running on that core as long as all access of that location is
+/// volatile (if you're using the `asm!` macro add the "volatile" option, if
+/// you're linking in ASM with the linker that's effectively volatile since the
+/// compiler doesn't get a chance to mess with it).
+///
+/// # Safety
+///
+/// In order for values of this type to operate correctly they must follow quite
+/// a few safety limits:
+///
+/// * The declared address must be non-null (it uses the `NonNull` optimization
+///   for better iteration results). This shouldn't be a big problem, since
+///   hardware can't really live at the null address.
+/// * The declared address must be aligned for the declared type of `T`.
+/// * The declared address must _always_ read as something that's a valid bit
+///   pattern for `T`. Don't pick any enums or things like that if your hardware
+///   doesn't back it up. If there's _any_ doubt at all, you must instead read
+///   or write an unsigned int of the correct bit size and then parse the bits
+///   by hand.
+/// * The declared address must be a part of the address space that Rust's
+///   allocator and/or stack frames will never use. If you're not sure, please
+///   re-read the hardware specs of your device and its memory map until you
+///   know.
+///
+/// The exact points of UB are if the address is ever 0, or if you ever `read`
+/// or `write` with the invalid pointer. For example, if you offset to some
+/// crazy (non-zero) value and then never use it that won't be an immediate
+/// trigger of UB.
+#[derive(Debug)]
 #[repr(transparent)]
-pub struct VolatilePtr<T>(pub *mut T);
-
-impl<T> core::fmt::Pointer for VolatilePtr<T> {
-  /// Formats exactly like the inner `*mut T`.
-  fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
-    write!(f, "{:p}", self.0)
+pub struct VolAddress<T> {
+  address: NonZeroUsize,
+  marker: PhantomData<*mut T>,
+}
+// Note(Lokathor): We have to hand implement all these traits because if we use
+// `derive` then they only get derived if the inner `T` has the trait. However,
+// since we're acting like a pointer to `T`, the capability we offer isn't
+// affected by whatever type `T` ends up being.
+impl<T> Clone for VolAddress<T> {
+  fn clone(&self) -> Self {
+    *self
+  }
+}
+impl<T> Copy for VolAddress<T> {}
+impl<T> PartialEq for VolAddress<T> {
+  fn eq(&self, other: &Self) -> bool {
+    self.address == other.address
+  }
+}
+impl<T> Eq for VolAddress<T> {}
+impl<T> PartialOrd for VolAddress<T> {
+  fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+    Some(self.address.cmp(&other.address))
+  }
+}
+impl<T> Ord for VolAddress<T> {
+  fn cmp(&self, other: &Self) -> Ordering {
+    self.address.cmp(&other.address)
  }
 }

-impl<T> VolatilePtr<T> {
-  /// Performs a `read_volatile`.
-  pub unsafe fn read(&self) -> T {
-    self.0.read_volatile()
+impl<T> VolAddress<T> {
+  /// Constructs a new address.
+  ///
+  /// # Safety
+  ///
+  /// You must follow the standard safety rules as outlined in the type docs.
+  pub const unsafe fn new_unchecked(address: usize) -> Self {
+    VolAddress {
+      address: NonZeroUsize::new_unchecked(address),
+      marker: PhantomData,
+    }
  }

-  /// Performs a `write_volatile`.
-  pub unsafe fn write(&self, data: T) {
-    self.0.write_volatile(data);
+  /// Casts the type of `T` into type `Z`.
+  ///
+  /// # Safety
+  ///
+  /// You must follow the standard safety rules as outlined in the type docs.
+  pub const unsafe fn cast<Z>(self) -> VolAddress<Z> {
+    VolAddress {
+      address: self.address,
+      marker: PhantomData,
+    }
  }

-  /// Performs a `wrapping_offset`.
-  pub fn offset(self, count: isize) -> Self {
-    VolatilePtr(self.0.wrapping_offset(count))
+  /// Offsets the address by `offset` slots (like `pointer::wrapping_offset`).
+  ///
+  /// # Safety
+  ///
+  /// You must follow the standard safety rules as outlined in the type docs.
+  pub unsafe fn offset(self, offset: isize) -> Self {
+    // TODO: const this
+    VolAddress {
+      address: NonZeroUsize::new_unchecked(self.address.get().wrapping_add(offset as usize * core::mem::size_of::<T>())),
+      marker: PhantomData,
+    }
  }

-  /// Performs a cast into some new pointer type.
-  pub fn cast<Z>(self) -> VolatilePtr<Z> {
-    VolatilePtr(self.0 as *mut Z)
+  /// Checks that the current target type of this address is aligned at this
+  /// address value.
+  ///
+  /// Technically it's a safety violation to even make a `VolAddress` that isn't
+  /// aligned. However, I know you're gonna try doing the bad thing, and it's
+  /// better to give you a chance to call `is_aligned` and potentially back off
+  /// from the operation or throw a `debug_assert!` or something instead of
+  /// triggering UB. Eventually this will be `const fn`, which will potentially
+  /// let you spot errors without even having to run your program.
+  pub fn is_aligned(self) -> bool {
+    // TODO: const this
+    self.address.get() % core::mem::align_of::<T>() == 0
+  }
+
+  /// Makes an iterator starting here across the given number of slots.
+  ///
+  /// # Safety
+  ///
+  /// The normal safety rules must be correct for each address iterated over.
+  pub const unsafe fn iter_slots(self, slots: usize) -> VolAddressIter<T> {
+    VolAddressIter { vol_address: self, slots }
+  }
+
+  // non-const and never can be.
+
+  /// Reads a `Copy` value out of the address.
+  ///
+  /// The `Copy` bound is actually supposed to be `!Drop`, but rust doesn't
+  /// allow negative trait bounds. If your type isn't `Copy` you can use the
+  /// `read_non_copy` fallback to do an unsafe read.
+  ///
+  /// That said, I don't think that you legitimately have hardware that maps to
+  /// a Rust type with a `Drop` impl. If you do please tell me, I'm interested
+  /// to hear about it.
+  pub fn read(self) -> T
+  where
+    T: Copy,
+  {
+    unsafe { (self.address.get() as *mut T).read_volatile() }
+  }
+
+  /// Reads a value out of the address with no trait bound.
+  ///
+  /// # Safety
+  ///
+  /// This is _not_ a move, it forms a bit duplicate of the current address
+  /// value. If `T` has a `Drop` trait that does anything it is up to you to
+  /// ensure that repeated drops do not cause UB (such as a double free).
+  pub unsafe fn read_non_copy(self) -> T {
+    (self.address.get() as *mut T).read_volatile()
+  }
+
+  /// Writes a value to the address.
+  ///
+  /// Semantically, the value is moved into the `VolAddress` and then forgotten,
+  /// so if `T` has a `Drop` impl then that will never get executed. This is
+  /// "safe" under Rust's safety rules, but could cause something unintended
+  /// (eg: a memory leak).
+  pub fn write(self, val: T) {
+    unsafe { (self.address.get() as *mut T).write_volatile(val) }
+  }
+}
+
+/// An iterator that produces a series of `VolAddress` values.
+#[derive(Debug)]
+pub struct VolAddressIter<T> {
+  vol_address: VolAddress<T>,
+  slots: usize,
+}
+impl<T> Clone for VolAddressIter<T> {
+  fn clone(&self) -> Self {
+    VolAddressIter {
+      vol_address: self.vol_address,
+      slots: self.slots,
+    }
+  }
+}
+impl<T> PartialEq for VolAddressIter<T> {
+  fn eq(&self, other: &Self) -> bool {
+    self.vol_address == other.vol_address && self.slots == other.slots
+  }
+}
+impl<T> Eq for VolAddressIter<T> {}
+impl<T> Iterator for VolAddressIter<T> {
+  type Item = VolAddress<T>;
+
+  fn next(&mut self) -> Option<Self::Item> {
+    if self.slots > 0 {
+      let out = self.vol_address;
+      unsafe {
+        self.slots -= 1;
+        self.vol_address = self.vol_address.offset(1);
+      }
+      Some(out)
+    } else {
+      None
+    }
+  }
+}
+impl<T> FusedIterator for VolAddressIter<T> {}
+
+/// This type is like `VolAddress`, but for when you have a block of values all
+/// in a row.
+///
+/// This is similar to the idea of an array or a slice, but called a "block"
+/// because you could _also_ construct a `[VolAddress]`, and we want to avoid
+/// any accidental confusion.
+#[derive(Debug)]
+pub struct VolAddressBlock<T> {
+  vol_address: VolAddress<T>,
+  slots: usize,
+}
+impl<T> Clone for VolAddressBlock<T> {
+  fn clone(&self) -> Self {
+    VolAddressBlock {
+      vol_address: self.vol_address,
+      slots: self.slots,
+    }
+  }
+}
+impl<T> PartialEq for VolAddressBlock<T> {
+  fn eq(&self, other: &Self) -> bool {
+    self.vol_address == other.vol_address && self.slots == other.slots
+  }
+}
+impl<T> Eq for VolAddressBlock<T> {}
+
+impl<T> VolAddressBlock<T> {
+  /// Constructs a new `VolAddressBlock`.
+  ///
+  /// # Safety
+  ///
+  /// The given `VolAddress` must be valid when offset by each of `0 .. slots`
+  pub const unsafe fn new_unchecked(vol_address: VolAddress<T>, slots: usize) -> Self {
+    VolAddressBlock { vol_address, slots }
+  }
+
+  /// Gives an iterator over this block's slots.
+  pub const fn iter(self) -> VolAddressIter<T> {
+    VolAddressIter {
+      vol_address: self.vol_address,
+      slots: self.slots,
+    }
+  }
+
+  /// Unchecked indexing into the block.
+  ///
+  /// # Safety
+  ///
+  /// The slot given must be in bounds.
+  pub unsafe fn index_unchecked(self, slot: usize) -> VolAddress<T> {
+    // TODO: const this
+    self.vol_address.offset(slot as isize)
+  }
+
+  /// Checked "indexing" style access of the block, giving either a `VolAddress` or a panic.
+  pub fn index(self, slot: usize) -> VolAddress<T> {
+    if slot < self.slots {
+      unsafe { self.vol_address.offset(slot as isize) }
+    } else {
+      panic!("Index Requested: {} >= Bound: {}", slot, self.slots)
+    }
+  }
+
+  /// Checked "getting" style access of the block, giving an Option value.
+  pub fn get(self, slot: usize) -> Option<VolAddress<T>> {
+    if slot < self.slots {
+      unsafe { Some(self.vol_address.offset(slot as isize)) }
+    } else {
+      None
+    }
  }
 }
--- a/src/fixed.rs
+++ b/src/fixed.rs
@ -0,0 +1,295 @@
+#![allow(non_camel_case_types)]
+
+//! Module for fixed point math types and operations.
+
+use core::{
+  marker::PhantomData,
+  ops::{Add, Div, Mul, Neg, Shl, Shr, Sub},
+};
+use typenum::{consts::False, marker_traits::Unsigned, type_operators::IsEqual, U8};
+
+/// Fixed point `T` value with `F` fractional bits.
+#[derive(Debug, Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)]
+#[repr(transparent)]
+pub struct Fx<T, F: Unsigned> {
+  num: T,
+  phantom: PhantomData<F>,
+}
+
+impl<T, F: Unsigned> Fx<T, F> {
+  /// Uses the provided value directly.
+  pub fn from_raw(r: T) -> Self {
+    Fx {
+      num: r,
+      phantom: PhantomData,
+    }
+  }
+
+  /// Unwraps the inner value.
+  pub fn into_raw(self) -> T {
+    self.num
+  }
+
+  /// Casts the base type, keeping the fractional bit quantity the same.
+  pub fn cast_inner<Z, C: Fn(T) -> Z>(self, op: C) -> Fx<Z, F> {
+    Fx {
+      num: op(self.num),
+      phantom: PhantomData,
+    }
+  }
+}
+
+impl<T: Add<Output = T>, F: Unsigned> Add for Fx<T, F> {
+  type Output = Self;
+  fn add(self, rhs: Fx<T, F>) -> Self::Output {
+    Fx {
+      num: self.num + rhs.num,
+      phantom: PhantomData,
+    }
+  }
+}
+
+impl<T: Sub<Output = T>, F: Unsigned> Sub for Fx<T, F> {
+  type Output = Self;
+  fn sub(self, rhs: Fx<T, F>) -> Self::Output {
+    Fx {
+      num: self.num - rhs.num,
+      phantom: PhantomData,
+    }
+  }
+}
+
+impl<T: Shl<u32, Output = T>, F: Unsigned> Shl<u32> for Fx<T, F> {
+  type Output = Self;
+  fn shl(self, rhs: u32) -> Self::Output {
+    Fx {
+      num: self.num << rhs,
+      phantom: PhantomData,
+    }
+  }
+}
+
+impl<T: Shr<u32, Output = T>, F: Unsigned> Shr<u32> for Fx<T, F> {
+  type Output = Self;
+  fn shr(self, rhs: u32) -> Self::Output {
+    Fx {
+      num: self.num >> rhs,
+      phantom: PhantomData,
+    }
+  }
+}
+
+impl<T: Neg<Output = T>, F: Unsigned> Neg for Fx<T, F> {
+  type Output = Self;
+  fn neg(self) -> Self::Output {
+    Fx {
+      num: -self.num,
+      phantom: PhantomData,
+    }
+  }
+}
+
+macro_rules! fixed_point_methods {
+  ($t:ident) => {
+    impl<F: Unsigned> Fx<$t, F> {
+      /// Gives the smallest positive non-zero value.
+      pub fn precision() -> Self {
+        Fx {
+          num: 1,
+          phantom: PhantomData,
+        }
+      }
+
+      /// Makes a value with the integer part shifted into place.
+      pub fn from_int_part(i: $t) -> Self {
+        Fx {
+          num: i << F::U8,
+          phantom: PhantomData,
+        }
+      }
+
+      /// Changes the fractional bit quantity, keeping the base type the same.
+      pub fn adjust_fractional_bits<Y: Unsigned + IsEqual<F, Output = False>>(self) -> Fx<$t, Y> {
+        let leftward_movement: i32 = Y::to_i32() - F::to_i32();
+        Fx {
+          num: if leftward_movement > 0 {
+            self.num << leftward_movement
+          } else {
+            self.num >> (-leftward_movement)
+          },
+          phantom: PhantomData,
+        }
+      }
+    }
+  };
+}
+
+fixed_point_methods! {u8}
+fixed_point_methods! {i8}
+fixed_point_methods! {i16}
+fixed_point_methods! {u16}
+fixed_point_methods! {i32}
+fixed_point_methods! {u32}
+
+macro_rules! fixed_point_signed_multiply {
+  ($t:ident) => {
+    impl<F: Unsigned> Mul for Fx<$t, F> {
+      type Output = Self;
+      #[allow(clippy::suspicious_arithmetic_impl)]
+      fn mul(self, rhs: Fx<$t, F>) -> Self::Output {
+        let pre_shift = (self.num as i32).wrapping_mul(rhs.num as i32);
+        if pre_shift < 0 {
+          if pre_shift == core::i32::MIN {
+            Fx {
+              num: core::$t::MIN,
+              phantom: PhantomData,
+            }
+          } else {
+            Fx {
+              num: (-((-pre_shift) >> F::U8)) as $t,
+              phantom: PhantomData,
+            }
+          }
+        } else {
+          Fx {
+            num: (pre_shift >> F::U8) as $t,
+            phantom: PhantomData,
+          }
+        }
+      }
+    }
+  };
+}
+
+fixed_point_signed_multiply! {i8}
+fixed_point_signed_multiply! {i16}
+fixed_point_signed_multiply! {i32}
+
+macro_rules! fixed_point_unsigned_multiply {
+  ($t:ident) => {
+    impl<F: Unsigned> Mul for Fx<$t, F> {
+      type Output = Self;
+      #[allow(clippy::suspicious_arithmetic_impl)]
+      fn mul(self, rhs: Fx<$t, F>) -> Self::Output {
+        Fx {
+          num: ((self.num as u32).wrapping_mul(rhs.num as u32) >> F::U8) as $t,
+          phantom: PhantomData,
+        }
+      }
+    }
+  };
+}
+
+fixed_point_unsigned_multiply! {u8}
+fixed_point_unsigned_multiply! {u16}
+fixed_point_unsigned_multiply! {u32}
+
+macro_rules! fixed_point_signed_division {
+  ($t:ident) => {
+    impl<F: Unsigned> Div for Fx<$t, F> {
+      type Output = Self;
+      #[allow(clippy::suspicious_arithmetic_impl)]
+      fn div(self, rhs: Fx<$t, F>) -> Self::Output {
+        let mul_output: i32 = (self.num as i32).wrapping_mul(1 << F::U8);
+        let divide_result: i32 = crate::bios::div(mul_output, rhs.num as i32);
+        Fx {
+          num: divide_result as $t,
+          phantom: PhantomData,
+        }
+      }
+    }
+  };
+}
+
+fixed_point_signed_division! {i8}
+fixed_point_signed_division! {i16}
+fixed_point_signed_division! {i32}
+
+macro_rules! fixed_point_unsigned_division {
+  ($t:ident) => {
+    impl<F: Unsigned> Div for Fx<$t, F> {
+      type Output = Self;
+      #[allow(clippy::suspicious_arithmetic_impl)]
+      fn div(self, rhs: Fx<$t, F>) -> Self::Output {
+        let mul_output: i32 = (self.num as i32).wrapping_mul(1 << F::U8);
+        let divide_result: i32 = crate::bios::div(mul_output, rhs.num as i32);
+        Fx {
+          num: divide_result as $t,
+          phantom: PhantomData,
+        }
+      }
+    }
+  };
+}
+
+fixed_point_unsigned_division! {u8}
+fixed_point_unsigned_division! {u16}
+fixed_point_unsigned_division! {u32}
+
+/// Alias for an `i16` fixed point value with 8 fractional bits.
+pub type fx8_8 = Fx<i16, U8>;
+
+#[cfg(test)]
+mod fixed_tests {
+  use super::*;
+
+  #[test]
+  fn test_add() {
+    use typenum::U4;
+    let one = Fx::<u16, U4>::from_int_part(1);
+    let two = Fx::<u16, U4>::from_int_part(2);
+    assert!(one + one == two)
+  }
+
+  #[test]
+  fn test_sub() {
+    use typenum::U4;
+    let one = Fx::<u16, U4>::from_int_part(1);
+    let two = Fx::<u16, U4>::from_int_part(2);
+    assert!(two - one == one)
+  }
+
+  #[test]
+  fn test_shl() {
+    use typenum::U4;
+    let one = Fx::<u16, U4>::from_int_part(1);
+    let two = Fx::<u16, U4>::from_int_part(2);
+    assert!(one << 1 == two)
+  }
+
+  #[test]
+  fn test_shr() {
+    use typenum::U4;
+    let one = Fx::<u16, U4>::from_int_part(1);
+    let two = Fx::<u16, U4>::from_int_part(2);
+    assert!(two >> 1 == one)
+  }
+
+  #[test]
+  fn test_neg() {
+    use typenum::U4;
+    let one = Fx::<i16, U4>::from_int_part(1);
+    let neg_one = Fx::<i16, U4>::from_int_part(-1);
+    assert!(-one == neg_one);
+    assert!(-(-one) == one);
+  }
+
+  #[test]
+  fn test_mul() {
+    use typenum::U4;
+    let half = Fx::<u16, U4>::from_int_part(1) >> 1;
+    let two = Fx::<u16, U4>::from_int_part(2);
+    let three = Fx::<u16, U4>::from_int_part(3);
+    let twelve = Fx::<u16, U4>::from_int_part(12);
+    assert!(two * three == twelve * half);
+  }
+
+  #[test]
+  fn test_div() {
+    use typenum::U4;
+    let two = Fx::<u16, U4>::from_int_part(2);
+    let six = Fx::<u16, U4>::from_int_part(6);
+    let twelve = Fx::<u16, U4>::from_int_part(12);
+    assert!(twelve / two == six);
+  }
+}
--- a/src/io.rs
+++ b/src/io.rs
@ -0,0 +1,13 @@
+//! This module contains definitions and types for the IO Registers.
+//!
+//! ## Naming
+//!
+//! In the interest of making things easy to search for, all io register
+//! constants are given the names used in the
+//! [GBATEK](https://problemkaputt.de/gbatek.htm) technical description.
+
+use super::*;
+
+use gba_proc_macro::register_bit;
+
+pub mod keypad;
--- a/src/io/keypad.rs
+++ b/src/io/keypad.rs
@ -0,0 +1,121 @@
+//! Allows access to the keypad.
+
+use super::*;
+
+/// The Key Input Register.
+///
+/// This register follows the "low-active" convention. If you want your code to
+/// follow the "high-active" convention (hint: you probably do, it's far easier
+/// to work with) then call `read_key_input()` rather than reading this register
+/// directly. It will perform the necessary bit flip operation for you.
+pub const KEYINPUT: VolAddress<u16> = unsafe { VolAddress::new_unchecked(0x400_0130) };
+
+/// A "tribool" value helps us interpret the arrow pad.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+#[repr(i32)]
+#[allow(missing_docs)]
+pub enum TriBool {
+  Minus = -1,
+  Neutral = 0,
+  Plus = 1,
+}
+
+newtype! {
+  /// Records a particular key press combination.
+  ///
+  /// Methods here follow the "high-active" convention, where a bit is enabled
+  /// when it's part of the set.
+  #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
+  KeyInput, u16
+}
+
+#[allow(missing_docs)]
+impl KeyInput {
+  register_bit!(A_BIT, u16, 1, a_pressed);
+  register_bit!(B_BIT, u16, 1 << 1, b_pressed);
+  register_bit!(SELECT_BIT, u16, 1 << 2, select_pressed);
+  register_bit!(START_BIT, u16, 1 << 3, start_pressed);
+  register_bit!(RIGHT_BIT, u16, 1 << 4, right_pressed);
+  register_bit!(LEFT_BIT, u16, 1 << 5, left_pressed);
+  register_bit!(UP_BIT, u16, 1 << 6, up_pressed);
+  register_bit!(DOWN_BIT, u16, 1 << 7, down_pressed);
+  register_bit!(R_BIT, u16, 1 << 8, r_pressed);
+  register_bit!(L_BIT, u16, 1 << 9, l_pressed);
+
+  /// Takes the set difference between these keys and another set of keys.
+  pub fn difference(self, other: Self) -> Self {
+    KeyInput(self.0 ^ other.0)
+  }
+
+  /// Gives the arrow pad value as a tribool, with Plus being increased column
+  /// value (right).
+  pub fn column_direction(self) -> TriBool {
+    if self.right_pressed() {
+      TriBool::Plus
+    } else if self.left_pressed() {
+      TriBool::Minus
+    } else {
+      TriBool::Neutral
+    }
+  }
+
+  /// Gives the arrow pad value as a tribool, with Plus being increased row
+  /// value (down).
+  pub fn row_direction(self) -> TriBool {
+    if self.down_pressed() {
+      TriBool::Plus
+    } else if self.up_pressed() {
+      TriBool::Minus
+    } else {
+      TriBool::Neutral
+    }
+  }
+}
+
+/// Gets the current state of the keys
+pub fn read_key_input() -> KeyInput {
+  // Note(Lokathor): The 10 used bits are "low when pressed" style, but the 6
+  // unused bits are always low, so we XOR with this mask to get a result where
+  // the only active bits are currently pressed keys.
+  KeyInput(KEYINPUT.read() ^ 0b0000_0011_1111_1111)
+}
+
+newtype! {
+  /// Allows configuration of when a keypad interrupt fires.
+  ///
+  /// * The most important bit here is the `irq_enabled` bit, which determines
+  ///   if an interrupt happens at all.
+  /// * The second most important bit is the `irq_logical_and` bit. If this bit
+  ///   is set, _all_ the selected buttons are required to be set for the
+  ///   interrupt to be fired (logical AND). If it's not set then _any_ of the
+  ///   buttons selected can be pressed to fire the interrupt (logical OR).
+  /// * All other bits select a particular button to be required or not as part
+  ///   of the interrupt firing.
+  ///
+  /// NOTE: This _only_ configures the operation of when keypad interrupts can
+  /// fire. You must still set the `IME` to have interrupts at all, and you must
+  /// further set `IE` for keypad interrupts to be possible.
+  #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
+  KeyInterruptSetting, u16
+}
+#[allow(missing_docs)]
+impl KeyInterruptSetting {
+  register_bit!(A_BIT, u16, 1, a_pressed);
+  register_bit!(B_BIT, u16, 1 << 1, b_pressed);
+  register_bit!(SELECT_BIT, u16, 1 << 2, select_pressed);
+  register_bit!(START_BIT, u16, 1 << 3, start_pressed);
+  register_bit!(RIGHT_BIT, u16, 1 << 4, right_pressed);
+  register_bit!(LEFT_BIT, u16, 1 << 5, left_pressed);
+  register_bit!(UP_BIT, u16, 1 << 6, up_pressed);
+  register_bit!(DOWN_BIT, u16, 1 << 7, down_pressed);
+  register_bit!(R_BIT, u16, 1 << 8, r_pressed);
+  register_bit!(L_BIT, u16, 1 << 9, l_pressed);
+  //
+  register_bit!(IRQ_ENABLE_BIT, u16, 1 << 14, irq_enabled);
+  register_bit!(IRQ_AND_BIT, u16, 1 << 15, irq_logical_and);
+}
+
+/// Use this to configure when a keypad interrupt happens.
+///
+/// See the `KeyInterruptSetting` type for more.
+pub const KEYCNT: VolAddress<KeyInterruptSetting> = unsafe { VolAddress::new_unchecked(0x400_0132) };
--- a/src/io_registers.rs
+++ b/src/io_registers.rs
@ -15,19 +15,20 @@

 // TODO(lokathor): IO Register newtypes.

-use gba_proc_macro::{newtype, register_bit};
+use gba_proc_macro::register_bit;

 use super::*;

 /// LCD Control. Read/Write.
 ///
 /// * [gbatek entry](http://problemkaputt.de/gbatek.htm#lcdiodisplaycontrol)
-pub const DISPCNT: VolatilePtr<u16> = VolatilePtr(0x400_0000 as *mut u16);
+pub const DISPCNT: VolAddress<DisplayControlSetting> = unsafe { VolAddress::new_unchecked(0x400_0000) };

 newtype!(
+  /// A newtype over the various display control options that you have on a GBA.
+  #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
  DisplayControlSetting,
-  u16,
-  "A newtype over the various display control options that you have on a GBA."
+  u16
 );

 #[allow(missing_docs)]
@ -97,24 +98,19 @@ pub enum DisplayControlMode {

 /// Assigns the given display control setting.
 pub fn set_display_control(setting: DisplayControlSetting) {
-  unsafe {
-    DISPCNT.write(setting.0);
-  }
+  DISPCNT.write(setting);
 }
 /// Obtains the current display control setting.
 pub fn display_control() -> DisplayControlSetting {
-  unsafe { DisplayControlSetting(DISPCNT.read()) }
+  DISPCNT.read()
 }

-/// General LCD Status (STAT,LYC)
-pub const DISPSTAT: VolatilePtr<u16> = VolatilePtr(0x400_0004 as *mut u16);
-
 /// Vertical Counter (LY)
-pub const VCOUNT: VolatilePtr<u16> = VolatilePtr(0x400_0006 as *mut u16);
+pub const VCOUNT: VolAddress<u16> = unsafe { VolAddress::new_unchecked(0x400_0006) };

 /// Obtains the current VCount value.
 pub fn vcount() -> u16 {
-  unsafe { VCOUNT.read() }
+  VCOUNT.read()
 }

 /// Performs a busy loop until VBlank starts.
@ -128,369 +124,3 @@ pub fn wait_until_vdraw() {
  // TODO: make this the better version with BIOS and interrupts and such.
  while vcount() >= SCREEN_HEIGHT as u16 {}
 }
-
-/// BG0 Control
-pub const BG0CNT: VolatilePtr<u16> = VolatilePtr(0x400_0008 as *mut u16);
-
-/// BG1 Control
-pub const BG1CNT: VolatilePtr<u16> = VolatilePtr(0x400_000A as *mut u16);
-
-/// BG2 Control
-pub const BG2CNT: VolatilePtr<u16> = VolatilePtr(0x400_000C as *mut u16);
-
-/// BG3 Control
-pub const BG3CNT: VolatilePtr<u16> = VolatilePtr(0x400_000E as *mut u16);
-
-/// BG0 X-Offset
-pub const BG0HOFS: VolatilePtr<u16> = VolatilePtr(0x400_0010 as *mut u16);
-
-/// BG0 Y-Offset
-pub const BG0VOFS: VolatilePtr<u16> = VolatilePtr(0x400_0012 as *mut u16);
-
-/// BG1 X-Offset
-pub const BG1HOFS: VolatilePtr<u16> = VolatilePtr(0x400_0014 as *mut u16);
-
-/// BG1 Y-Offset
-pub const BG1VOFS: VolatilePtr<u16> = VolatilePtr(0x400_0016 as *mut u16);
-
-/// BG2 X-Offset
-pub const BG2HOFS: VolatilePtr<u16> = VolatilePtr(0x400_0018 as *mut u16);
-
-/// BG2 Y-Offset
-pub const BG2VOFS: VolatilePtr<u16> = VolatilePtr(0x400_001A as *mut u16);
-
-/// BG3 X-Offset
-pub const BG3HOFS: VolatilePtr<u16> = VolatilePtr(0x400_001C as *mut u16);
-
-/// BG3 Y-Offset
-pub const BG3VOFS: VolatilePtr<u16> = VolatilePtr(0x400_001E as *mut u16);
-
-/// BG2 Rotation/Scaling Parameter A (dx)
-pub const BG2PA: VolatilePtr<u16> = VolatilePtr(0x400_0020 as *mut u16);
-
-/// BG2 Rotation/Scaling Parameter B (dmx)
-pub const BG2PB: VolatilePtr<u16> = VolatilePtr(0x400_0022 as *mut u16);
-
-/// BG2 Rotation/Scaling Parameter C (dy)
-pub const BG2PC: VolatilePtr<u16> = VolatilePtr(0x400_0024 as *mut u16);
-
-/// BG2 Rotation/Scaling Parameter D (dmy)
-pub const BG2PD: VolatilePtr<u16> = VolatilePtr(0x400_0026 as *mut u16);
-
-/// BG2 Reference Point X-Coordinate
-pub const BG2X: VolatilePtr<u32> = VolatilePtr(0x400_0028 as *mut u32);
-
-/// BG2 Reference Point Y-Coordinate
-pub const BG2Y: VolatilePtr<u32> = VolatilePtr(0x400_002C as *mut u32);
-
-/// BG3 Rotation/Scaling Parameter A (dx)
-pub const BG3PA: VolatilePtr<u16> = VolatilePtr(0x400_0030 as *mut u16);
-
-/// BG3 Rotation/Scaling Parameter B (dmx)
-pub const BG3PB: VolatilePtr<u16> = VolatilePtr(0x400_0032 as *mut u16);
-
-/// BG3 Rotation/Scaling Parameter C (dy)
-pub const BG3PC: VolatilePtr<u16> = VolatilePtr(0x400_0034 as *mut u16);
-
-/// BG3 Rotation/Scaling Parameter D (dmy)
-pub const BG3PD: VolatilePtr<u16> = VolatilePtr(0x400_0036 as *mut u16);
-
-/// BG3 Reference Point X-Coordinate
-pub const BG3X: VolatilePtr<u32> = VolatilePtr(0x400_0038 as *mut u32);
-
-/// BG3 Reference Point Y-Coordinate
-pub const BG3Y: VolatilePtr<u32> = VolatilePtr(0x400_003C as *mut u32);
-
-/// Window 0 Horizontal Dimensions
-pub const WIN0H: VolatilePtr<u16> = VolatilePtr(0x400_0040 as *mut u16);
-
-/// Window 1 Horizontal Dimensions
-pub const WIN1H: VolatilePtr<u16> = VolatilePtr(0x400_0042 as *mut u16);
-
-/// Window 0 Vertical Dimensions
-pub const WIN0V: VolatilePtr<u16> = VolatilePtr(0x400_0044 as *mut u16);
-
-/// Window 1 Vertical Dimensions
-pub const WIN1V: VolatilePtr<u16> = VolatilePtr(0x400_0046 as *mut u16);
-
-/// Inside of Window 0 and 1
-pub const WININ: VolatilePtr<u16> = VolatilePtr(0x400_0048 as *mut u16);
-
-/// Inside of OBJ Window & Outside of Windows
-pub const WINOUT: VolatilePtr<u16> = VolatilePtr(0x400_004A as *mut u16);
-
-/// Mosaic Size
-pub const MOSAIC: VolatilePtr<u16> = VolatilePtr(0x400_004C as *mut u16);
-
-/// Color Special Effects Selection
-pub const BLDCNT: VolatilePtr<u16> = VolatilePtr(0x400_0050 as *mut u16);
-
-/// Alpha Blending Coefficients
-pub const BLDALPHA: VolatilePtr<u16> = VolatilePtr(0x400_0052 as *mut u16);
-
-/// Brightness (Fade-In/Out) Coefficient
-pub const BLDY: VolatilePtr<u16> = VolatilePtr(0x400_0054 as *mut u16);
-
-/// Channel 1 Sweep register       (NR10)
-pub const UND1CNT_L: VolatilePtr<u16> = VolatilePtr(0x400_0060 as *mut u16);
-
-/// Channel 1 Duty/Length/Envelope (NR11, NR12)
-pub const UND1CNT_H: VolatilePtr<u16> = VolatilePtr(0x400_0062 as *mut u16);
-
-/// Channel 1 Frequency/Control    (NR13, NR14)
-pub const UND1CNT_X: VolatilePtr<u16> = VolatilePtr(0x400_0064 as *mut u16);
-
-/// Channel 2 Duty/Length/Envelope (NR21, NR22)
-pub const UND2CNT_L: VolatilePtr<u16> = VolatilePtr(0x400_0068 as *mut u16);
-
-/// Channel 2 Frequency/Control    (NR23, NR24)
-pub const UND2CNT_H: VolatilePtr<u16> = VolatilePtr(0x400_006C as *mut u16);
-
-/// Channel 3 Stop/Wave RAM select (NR30)
-pub const UND3CNT_L: VolatilePtr<u16> = VolatilePtr(0x400_0070 as *mut u16);
-
-/// Channel 3 Length/Volume        (NR31, NR32)
-pub const UND3CNT_H: VolatilePtr<u16> = VolatilePtr(0x400_0072 as *mut u16);
-
-/// Channel 3 Frequency/Control    (NR33, NR34)
-pub const UND3CNT_X: VolatilePtr<u16> = VolatilePtr(0x400_0074 as *mut u16);
-
-/// Channel 4 Length/Envelope      (NR41, NR42)
-pub const UND4CNT_L: VolatilePtr<u16> = VolatilePtr(0x400_0078 as *mut u16);
-
-/// Channel 4 Frequency/Control    (NR43, NR44)
-pub const UND4CNT_H: VolatilePtr<u16> = VolatilePtr(0x400_007C as *mut u16);
-
-/// Control Stereo/Volume/Enable   (NR50, NR51)
-pub const UNDCNT_L: VolatilePtr<u16> = VolatilePtr(0x400_0080 as *mut u16);
-
-/// Control Mixing/DMA Control
-pub const UNDCNT_H: VolatilePtr<u16> = VolatilePtr(0x400_0082 as *mut u16);
-
-/// Control Sound on/off           (NR52)
-pub const UNDCNT_X: VolatilePtr<u16> = VolatilePtr(0x400_0084 as *mut u16);
-
-/// Sound PWM Control
-pub const UNDBIAS: VolatilePtr<u16> = VolatilePtr(0x400_0088 as *mut u16);
-
-/// Channel 3 Wave Pattern RAM (W/R)
-pub const WAVE_RAM0_L: VolatilePtr<u16> = VolatilePtr(0x400_0090 as *mut u16);
-
-/// Channel 3 Wave Pattern RAM (W/R)
-pub const WAVE_RAM0_H: VolatilePtr<u16> = VolatilePtr(0x400_0092 as *mut u16);
-
-/// Channel 3 Wave Pattern RAM (W/R)
-pub const WAVE_RAM1_L: VolatilePtr<u16> = VolatilePtr(0x400_0094 as *mut u16);
-
-/// Channel 3 Wave Pattern RAM (W/R)
-pub const WAVE_RAM1_H: VolatilePtr<u16> = VolatilePtr(0x400_0096 as *mut u16);
-
-/// Channel 3 Wave Pattern RAM (W/R)
-pub const WAVE_RAM2_L: VolatilePtr<u16> = VolatilePtr(0x400_0098 as *mut u16);
-
-/// Channel 3 Wave Pattern RAM (W/R)
-pub const WAVE_RAM2_H: VolatilePtr<u16> = VolatilePtr(0x400_009A as *mut u16);
-
-/// Channel 3 Wave Pattern RAM (W/R)
-pub const WAVE_RAM3_L: VolatilePtr<u16> = VolatilePtr(0x400_009C as *mut u16);
-
-/// Channel 3 Wave Pattern RAM (W/R)
-pub const WAVE_RAM3_H: VolatilePtr<u16> = VolatilePtr(0x400_009E as *mut u16);
-
-/// Channel A FIFO, Data 0-3
-pub const FIFO_A: VolatilePtr<u32> = VolatilePtr(0x400_00A0 as *mut u32);
-
-/// Channel B FIFO, Data 0-3
-pub const FIFO_B: VolatilePtr<u32> = VolatilePtr(0x400_00A4 as *mut u32);
-
-/// DMA 0 Source Address
-pub const DMA0SAD: VolatilePtr<u32> = VolatilePtr(0x400_00B0 as *mut u32);
-
-/// DMA 0 Destination Address
-pub const DMA0DAD: VolatilePtr<u32> = VolatilePtr(0x400_00B4 as *mut u32);
-
-/// DMA 0 Word Count
-pub const DMA0CNT_L: VolatilePtr<u16> = VolatilePtr(0x400_00B8 as *mut u16);
-
-/// DMA 0 Control
-pub const DMA0CNT_H: VolatilePtr<u16> = VolatilePtr(0x400_00BA as *mut u16);
-
-/// DMA 1 Source Address
-pub const DMA1SAD: VolatilePtr<u32> = VolatilePtr(0x400_00BC as *mut u32);
-
-/// DMA 1 Destination Address
-pub const DMA1DAD: VolatilePtr<u32> = VolatilePtr(0x400_00C0 as *mut u32);
-
-/// DMA 1 Word Count
-pub const DMA1CNT_L: VolatilePtr<u16> = VolatilePtr(0x400_00C4 as *mut u16);
-
-/// DMA 1 Control
-pub const DMA1CNT_H: VolatilePtr<u16> = VolatilePtr(0x400_00C6 as *mut u16);
-
-/// DMA 2 Source Address
-pub const DMA2SAD: VolatilePtr<u32> = VolatilePtr(0x400_00C8 as *mut u32);
-
-/// DMA 2 Destination Address
-pub const DMA2DAD: VolatilePtr<u32> = VolatilePtr(0x400_00CC as *mut u32);
-
-/// DMA 2 Word Count
-pub const DMA2CNT_L: VolatilePtr<u16> = VolatilePtr(0x400_00D0 as *mut u16);
-
-/// DMA 2 Control
-pub const DMA2CNT_H: VolatilePtr<u16> = VolatilePtr(0x400_00D2 as *mut u16);
-
-/// DMA 3 Source Address
-pub const DMA3SAD: VolatilePtr<u32> = VolatilePtr(0x400_00D4 as *mut u32);
-
-/// DMA 3 Destination Address
-pub const DMA3DAD: VolatilePtr<u32> = VolatilePtr(0x400_00D8 as *mut u32);
-
-/// DMA 3 Word Count
-pub const DMA3CNT_L: VolatilePtr<u16> = VolatilePtr(0x400_00DC as *mut u16);
-
-/// DMA 3 Control
-pub const DMA3CNT_H: VolatilePtr<u16> = VolatilePtr(0x400_00DE as *mut u16);
-
-/// Timer 0 Counter/Reload
-pub const TM0D: VolatilePtr<u16> = VolatilePtr(0x400_0100 as *mut u16);
-
-/// Timer 0 Control
-pub const TM0CNT: VolatilePtr<u16> = VolatilePtr(0x400_0102 as *mut u16);
-
-/// Timer 1 Counter/Reload
-pub const TM1D: VolatilePtr<u16> = VolatilePtr(0x400_0104 as *mut u16);
-
-/// Timer 1 Control
-pub const TM1CNT: VolatilePtr<u16> = VolatilePtr(0x400_0106 as *mut u16);
-
-/// Timer 2 Counter/Reload
-pub const TM2D: VolatilePtr<u16> = VolatilePtr(0x400_0108 as *mut u16);
-
-/// Timer 2 Control
-pub const TM2CNT: VolatilePtr<u16> = VolatilePtr(0x400_010A as *mut u16);
-
-/// Timer 3 Counter/Reload
-pub const TM3D: VolatilePtr<u16> = VolatilePtr(0x400_010C as *mut u16);
-
-/// Timer 3 Control
-pub const TM3CNT: VolatilePtr<u16> = VolatilePtr(0x400_010E as *mut u16);
-
-/// SIO Data (Normal-32bit Mode; shared with below)
-pub const SIODATA32: VolatilePtr<u32> = VolatilePtr(0x400_0120 as *mut u32);
-
-/// SIO Data 0 (Parent)    (Multi-Player Mode)
-pub const SIOMULTI0: VolatilePtr<u16> = VolatilePtr(0x400_0120 as *mut u16);
-
-/// SIO Data 1 (1st Child) (Multi-Player Mode)
-pub const SIOMULTI1: VolatilePtr<u16> = VolatilePtr(0x400_0122 as *mut u16);
-
-/// SIO Data 2 (2nd Child) (Multi-Player Mode)
-pub const SIOMULTI2: VolatilePtr<u16> = VolatilePtr(0x400_0124 as *mut u16);
-
-/// SIO Data 3 (3rd Child) (Multi-Player Mode)
-pub const SIOMULTI3: VolatilePtr<u16> = VolatilePtr(0x400_0126 as *mut u16);
-
-/// SIO Control Register
-pub const SIOCNT: VolatilePtr<u16> = VolatilePtr(0x400_0128 as *mut u16);
-
-/// D SIO Data (Local of MultiPlayer; shared below)
-pub const SIOMLT_SEN: VolatilePtr<u16> = VolatilePtr(0x400_012A as *mut u16);
-
-/// SIO Data (Normal-8bit and UART Mode)
-pub const SIODATA8: VolatilePtr<u16> = VolatilePtr(0x400_012A as *mut u16);
-
-/// Key Status
-pub const KEYINPUT: VolatilePtr<u16> = VolatilePtr(0x400_0130 as *mut u16);
-
-/// A "tribool" value helps us interpret the arrow pad.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-#[repr(i32)]
-#[allow(missing_docs)]
-pub enum TriBool {
-  Minus = -1,
-  Neutral = 0,
-  Plus = 1,
-}
-
-newtype!(KeyInputSetting, u16, "A newtype over the key input state of the GBA");
-
-#[allow(missing_docs)]
-impl KeyInputSetting {
-  register_bit!(A_BIT, u16, 1, a_pressed);
-  register_bit!(B_BIT, u16, 1 << 1, b_pressed);
-  register_bit!(SELECT_BIT, u16, 1 << 2, select_pressed);
-  register_bit!(START_BIT, u16, 1 << 3, start_pressed);
-  register_bit!(RIGHT_BIT, u16, 1 << 4, right_pressed);
-  register_bit!(LEFT_BIT, u16, 1 << 5, left_pressed);
-  register_bit!(UP_BIT, u16, 1 << 6, up_pressed);
-  register_bit!(DOWN_BIT, u16, 1 << 7, down_pressed);
-  register_bit!(R_BIT, u16, 1 << 8, r_pressed);
-  register_bit!(L_BIT, u16, 1 << 9, l_pressed);
-
-  /// Takes the difference between these keys and another set of keys.
-  pub fn difference(self, other: KeyInputSetting) -> KeyInputSetting {
-    KeyInputSetting(self.0 ^ other.0)
-  }
-
-  /// Gives the arrow pad value as a tribool, with Plus being increased column
-  /// value (right).
-  pub fn column_direction(self) -> TriBool {
-    if self.right_pressed() {
-      TriBool::Plus
-    } else if self.left_pressed() {
-      TriBool::Minus
-    } else {
-      TriBool::Neutral
-    }
-  }
-
-  /// Gives the arrow pad value as a tribool, with Plus being increased row
-  /// value (down).
-  pub fn row_direction(self) -> TriBool {
-    if self.down_pressed() {
-      TriBool::Plus
-    } else if self.up_pressed() {
-      TriBool::Minus
-    } else {
-      TriBool::Neutral
-    }
-  }
-}
-
-/// Gets the current state of the keys
-pub fn key_input() -> KeyInputSetting {
-  // Note(Lokathor): The 10 used bits are "low when pressed" style, but the 6
-  // unused bits are always low, so we XOR with this mask to get a result where
-  // the only active bits are currently pressed keys.
-  unsafe { KeyInputSetting(KEYINPUT.read() ^ 0b0000_0011_1111_1111) }
-}
-
-/// Key Interrupt Control
-pub const KEYCNT: VolatilePtr<u16> = VolatilePtr(0x400_0132 as *mut u16);
-
-/// SIO Mode Select/General Purpose Data
-pub const RCNT: VolatilePtr<u16> = VolatilePtr(0x400_0134 as *mut u16);
-
-/// SIO JOY Bus Control
-pub const JOYCNT: VolatilePtr<u16> = VolatilePtr(0x400_0140 as *mut u16);
-
-/// SIO JOY Bus Receive Data
-pub const JOY_RECV: VolatilePtr<u32> = VolatilePtr(0x400_0150 as *mut u32);
-
-/// SIO JOY Bus Transmit Data
-pub const JOY_TRANS: VolatilePtr<u32> = VolatilePtr(0x400_0154 as *mut u32);
-
-/// SIO JOY Bus Receive Status
-pub const JOYSTAT: VolatilePtr<u16> = VolatilePtr(0x400_0158 as *mut u16);
-
-/// Interrupt Enable Register
-pub const IE: VolatilePtr<u16> = VolatilePtr(0x400_0200 as *mut u16);
-
-/// Interrupt Request Flags / IRQ Acknowledge
-pub const IF: VolatilePtr<u16> = VolatilePtr(0x400_0202 as *mut u16);
-
-/// Game Pak Waitstate Control
-pub const WAITCNT: VolatilePtr<u16> = VolatilePtr(0x400_0204 as *mut u16);
-
-/// Interrupt Master Enable Register
-pub const IME: VolatilePtr<u16> = VolatilePtr(0x400_0208 as *mut u16);
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,12 +1,14 @@
 #![cfg_attr(not(test), no_std)]
-#![cfg_attr(not(test), feature(asm))]
+#![feature(asm)]
+#![feature(const_int_wrapping)]
+#![feature(min_const_unsafe_fn)]
 #![warn(missing_docs)]
 #![allow(clippy::cast_lossless)]
 #![deny(clippy::float_arithmetic)]

 //! This crate helps you write GBA ROMs.
 //!
-//! # SAFETY POLICY
+//! ## SAFETY POLICY
 //!
 //! Some parts of this crate are safe wrappers around unsafe operations. This is
 //! good, and what you'd expect from a Rust crate.
@ -16,78 +18,211 @@
 //!
 //! **Do not** use this crate in programs that aren't running on the GBA. If you
 //! do, it's a giant bag of Undefined Behavior.
-//!
-//! # TESTING POLICY
-//!
-//! It is the intent of the crate authors that as much of the crate as possible
-//! be written so that you can use `cargo test` for at least some parts of your
-//! code without everything exploding instantly. To that end, where possible we
-//! attempt to use `cfg` flags to make things safe for `cargo test`. Hopefully
-//! we got it all.
+
+/// Assists in defining a newtype wrapper over some base type.
+///
+/// Note that rustdoc and derives are all the "meta" stuff, so you can write all
+/// of your docs and derives in front of your newtype in the same way you would
+/// for a normal struct. Then the inner type to be wrapped it name.
+///
+/// The macro _assumes_ that you'll be using it to wrap zero safe numeric types,
+/// so it automatically provides a `const fn` method for `new` that just wraps
+/// `0`. If this is not desired you can add `, no frills` to the invocation.
+///
+/// Example:
+/// ```
+/// newtype! {
+///   /// Records a particular key press combination.
+///   #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
+///   KeyInput, u16
+/// }
+/// ```
+#[macro_export]
+macro_rules! newtype {
+  ($(#[$attr:meta])* $new_name:ident, $old_name:ident) => {
+    $(#[$attr])*
+    #[repr(transparent)]
+    pub struct $new_name($old_name);
+    impl $new_name {
+      /// A `const` "zero value" constructor
+      pub const fn new() -> Self {
+        $new_name(0)
+      }
+    }
+  };
+  ($(#[$attr:meta])* $new_name:ident, $old_name:ident, no frills) => {
+    $(#[$attr])*
+    #[repr(transparent)]
+    pub struct $new_name($old_name);
+  };
+}
+
+pub mod builtins;
+
+pub mod fixed;
+
+pub mod bios;

 pub mod core_extras;
 pub(crate) use crate::core_extras::*;

-pub mod io_registers;
+pub mod io;

 pub mod video_ram;
-pub(crate) use crate::video_ram::*;

-/// Combines the Red, Blue, and Green provided into a single color value.
-pub const fn rgb16(red: u16, green: u16, blue: u16) -> u16 {
-  blue << 10 | green << 5 | red
+/// Performs unsigned divide and remainder, gives None if dividing by 0.
+pub fn divrem_u32(numer: u32, denom: u32) -> Option<(u32, u32)> {
+  // TODO: const this? Requires const if
+  if denom == 0 {
+    None
+  } else {
+    Some(unsafe { divrem_u32_unchecked(numer, denom) })
+  }
 }

-/// BIOS Call: Div (GBA SWI 0x06).
+/// Performs divide and remainder, no check for 0 division.
 ///
-/// Gives just the DIV output of `numerator / denominator`.
+/// # Safety
 ///
-/// # Panics
-///
-/// If `denominator` is 0.
-#[inline]
-pub fn div(numerator: i32, denominator: i32) -> i32 {
-  div_modulus(numerator, denominator).0
+/// If you call this with a denominator of 0 the result is implementation
+/// defined (not literal UB) including but not limited to: an infinite loop,
+/// panic on overflow, or incorrect output.
+pub unsafe fn divrem_u32_unchecked(numer: u32, denom: u32) -> (u32, u32) {
+  // TODO: const this? Requires const if
+  if (numer >> 5) < denom {
+    divrem_u32_simple(numer, denom)
+  } else {
+    divrem_u32_non_restoring(numer, denom)
+  }
 }

-/// BIOS Call: Div (GBA SWI 0x06).
-///
-/// Gives just the MOD output of `numerator / denominator`.
-///
-/// # Panics
-///
-/// If `denominator` is 0.
-#[inline]
-pub fn modulus(numerator: i32, denominator: i32) -> i32 {
-  div_modulus(numerator, denominator).1
+/// The simplest form of division. If N is too much larger than D this will be
+/// extremely slow. If N is close enough to D then it will likely be faster than
+/// the non_restoring form.
+fn divrem_u32_simple(mut numer: u32, denom: u32) -> (u32, u32) {
+  // TODO: const this? Requires const if
+  let mut quot = 0;
+  while numer >= denom {
+    numer -= denom;
+    quot += 1;
+  }
+  (quot, numer)
 }

-/// BIOS Call: Div (GBA SWI 0x06).
-///
-/// Gives both the DIV and MOD output of `numerator / denominator`.
-///
-/// # Panics
-///
-/// If `denominator` is 0.
-#[inline]
-pub fn div_modulus(numerator: i32, denominator: i32) -> (i32, i32) {
-  assert!(denominator != 0);
-  #[cfg(not(test))]
-  {
-    let div_out: i32;
-    let mod_out: i32;
-    unsafe {
-      asm!(/* assembly template */ "swi 0x06"
-          :/* output operands */ "={r0}"(div_out), "={r1}"(mod_out)
-          :/* input operands */ "{r0}"(numerator), "{r1}"(denominator)
-          :/* clobbers */ "r3"
-          :/* options */
-      );
+/// Takes a fixed quantity of time based on the bit width of the number (in this
+/// case 32).
+fn divrem_u32_non_restoring(numer: u32, denom: u32) -> (u32, u32) {
+  // TODO: const this? Requires const if
+  let mut r: i64 = numer as i64;
+  let d: i64 = (denom as i64) << 32;
+  let mut q: u32 = 0;
+  let mut i = 1 << 31;
+  while i > 0 {
+    if r >= 0 {
+      q |= i;
+      r = 2 * r - d;
+    } else {
+      r = 2 * r + d;
    }
-    (div_out, mod_out)
+    i >>= 1;
  }
-  #[cfg(test)]
-  {
-    (numerator / denominator, numerator % denominator)
+  q -= !q;
+  if r < 0 {
+    q -= 1;
+    r += d;
+  }
+  r >>= 32;
+  // TODO: remove this once we've done more checks here.
+  debug_assert!(r >= 0);
+  debug_assert!(r <= core::u32::MAX as i64);
+  (q, r as u32)
+}
+
+/// Performs signed divide and remainder, gives None if dividing by 0 or
+/// computing `MIN/-1`
+pub fn divrem_i32(numer: i32, denom: i32) -> Option<(i32, i32)> {
+  if denom == 0 || (numer == core::i32::MIN && denom == -1) {
+    None
+  } else {
+    Some(unsafe { divrem_i32_unchecked(numer, denom) })
  }
 }
+
+/// Performs signed divide and remainder, no check for 0 division or `MIN/-1`.
+///
+/// # Safety
+///
+/// * If you call this with a denominator of 0 the result is implementation
+///   defined (not literal UB) including but not limited to: an infinite loop,
+///   panic on overflow, or incorrect output.
+/// * If you call this with `MIN/-1` you'll get a panic in debug or just `MIN`
+///   in release (which is incorrect), because of how twos-compliment works.
+pub unsafe fn divrem_i32_unchecked(numer: i32, denom: i32) -> (i32, i32) {
+  // TODO: const this? Requires const if
+  let unsigned_numer = numer.abs() as u32;
+  let unsigned_denom = denom.abs() as u32;
+  let opposite_sign = (numer ^ denom) < 0;
+  let (udiv, urem) = if (numer >> 5) < denom {
+    divrem_u32_simple(unsigned_numer, unsigned_denom)
+  } else {
+    divrem_u32_non_restoring(unsigned_numer, unsigned_denom)
+  };
+  match (opposite_sign, numer < 0) {
+    (true, true) => (-(udiv as i32), -(urem as i32)),
+    (true, false) => (-(udiv as i32), urem as i32),
+    (false, true) => (udiv as i32, -(urem as i32)),
+    (false, false) => (udiv as i32, urem as i32),
+  }
+}
+
+/*
+#[cfg(test)]
+mod tests {
+  use super::*;
+  use quickcheck::quickcheck;
+
+  // We have an explicit property on the non_restoring division
+  quickcheck! {
+    fn divrem_u32_non_restoring_prop(num: u32, denom: u32) -> bool {
+      if denom > 0 {
+        divrem_u32_non_restoring(num, denom) == (num / denom, num % denom)
+      } else {
+        true
+      }
+    }
+  }
+
+  // We have an explicit property on the simple division
+  quickcheck! {
+    fn divrem_u32_simple_prop(num: u32, denom: u32) -> bool {
+      if denom > 0 {
+        divrem_u32_simple(num, denom) == (num / denom, num % denom)
+      } else {
+        true
+      }
+    }
+  }
+
+  // Test the u32 wrapper
+  quickcheck! {
+    fn divrem_u32_prop(num: u32, denom: u32) -> bool {
+      if denom > 0 {
+        divrem_u32(num, denom).unwrap() == (num / denom, num % denom)
+      } else {
+        divrem_u32(num, denom).is_none()
+      }
+    }
+  }
+
+  // test the i32 wrapper
+  quickcheck! {
+    fn divrem_i32_prop(num: i32, denom: i32) -> bool {
+      if denom == 0 || num == core::i32::MIN && denom == -1 {
+        divrem_i32(num, denom).is_none()
+      } else {
+        divrem_i32(num, denom).unwrap() == (num / denom, num % denom)
+      }
+    }
+  }
+}
+*/
--- a/src/macros.rs
+++ b/src/macros.rs
@ -1,8 +0,0 @@
-//! Module for all macros.
-//!
-//! Macros are the only thing in Rust where declaration order matters, so we
-//! place all of them here regardless of what they do so that the macros module
-//! can appear at the "top" of the library and all other modules can see them
-//! properly.
-
-// no macros yet!
--- a/src/video_ram.rs
+++ b/src/video_ram.rs
@ -15,6 +15,8 @@

 pub use super::*;

+// TODO: kill all this too
+
 /// The physical width in pixels of the GBA screen.
 pub const SCREEN_WIDTH: isize = 240;

@ -28,6 +30,8 @@ pub const SCREEN_HEIGHT: isize = 160;
 /// value as just being a `usize`.
 pub const VRAM_BASE_ADDRESS: usize = 0x0600_0000;

+const MODE3_VRAM: VolAddress<u16> = unsafe { VolAddress::new_unchecked(VRAM_BASE_ADDRESS) };
+
 /// Draws a pixel to the screen while in Display Mode 3, with bounds checks.
 ///
 /// # Panics
@ -51,7 +55,7 @@ pub fn mode3_draw_pixel(col: isize, row: isize, color: u16) {
 /// * `col` must be in `0..SCREEN_WIDTH`
 /// * `row` must be in `0..SCREEN_HEIGHT`
 pub unsafe fn mode3_draw_pixel_unchecked(col: isize, row: isize, color: u16) {
-  VolatilePtr(VRAM_BASE_ADDRESS as *mut u16).offset(col + row * SCREEN_WIDTH).write(color);
+  MODE3_VRAM.offset(col + row * SCREEN_WIDTH).write(color);
 }

 /// Reads the given pixel of video memory according to Mode 3 placement.
@ -61,7 +65,7 @@ pub unsafe fn mode3_draw_pixel_unchecked(col: isize, row: isize, color: u16) {
 /// If the location is out of bounds you get `None`.
 pub fn mode3_read_pixel(col: isize, row: isize) -> Option<u16> {
  if col >= 0 && col < SCREEN_WIDTH && row >= 0 && row < SCREEN_HEIGHT {
-    unsafe { Some(VolatilePtr(VRAM_BASE_ADDRESS as *mut u16).offset(col + row * SCREEN_WIDTH).read()) }
+    unsafe { Some(MODE3_VRAM.offset(col + row * SCREEN_WIDTH).read()) }
  } else {
    None
  }
@ -72,9 +76,8 @@ pub unsafe fn mode3_clear_screen(color: u16) {
  // TODO: use DMA?
  let color = color as u32;
  let bulk_color = color << 16 | color;
-  let mut ptr = VolatilePtr(VRAM_BASE_ADDRESS as *mut u32);
-  for _ in 0..(SCREEN_HEIGHT * SCREEN_WIDTH / 2) {
-    ptr.write(bulk_color);
-    ptr = ptr.offset(1);
+  let block: VolAddressBlock<u32> = VolAddressBlock::new_unchecked(MODE3_VRAM.cast::<u32>(), (SCREEN_HEIGHT * SCREEN_WIDTH / 2) as usize);
+  for b in block.iter() {
+    b.write(bulk_color);
  }
 }
--- a/todo_check.bat
+++ b/todo_check.bat
@ -0,0 +1,12 @@
+@echo off
+
+echo -------
+echo -------
+
+set Wildcard=*.rs
+
+echo TODOS FOUND:
+findstr -s -n -i -l "TODO" %Wildcard%
+
+echo -------
+echo -------