diff --git a/.travis.yml b/.travis.yml index 45e0d04..2f5bf99 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ rust: before_script: - rustup component add rust-src + - rustup component add clippy - (test -x $HOME/.cargo/bin/cargo-install-update || cargo install cargo-update) - (test -x $HOME/.cargo/bin/cargo-xbuild || cargo install cargo-xbuild) - (test -x $HOME/.cargo/bin/cargo-make || cargo install cargo-make) @@ -27,9 +28,15 @@ script: - export PATH="$PATH:/opt/devkitpro/devkitARM/bin" - export PATH="$PATH:/opt/devkitpro/tools/bin" - cd .. - # Test the lib and then compile all examples with `cargo make` - - cargo test --lib && cargo test --lib --release - - cargo make + # Run all verificaions, both debug and release + - cargo clippy + - cargo clippy --release + - cargo test --no-fail-fast --lib + - cargo test --no-fail-fast --lib --release + - cargo test --no-fail-fast --tests + - cargo test --no-fail-fast --tests --release + # Let cargo make take over the rest + - cargo make build-all # Test build the book so that a failed book build kills this run - cd book && mdbook build diff --git a/Cargo.toml b/Cargo.toml index 3c39e21..400c908 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,8 +12,13 @@ license = "Apache-2.0" publish = false [dependencies] +typenum = "1.10" gba-proc-macro = "0.2.1" +#[dev-dependencies] +#quickcheck="0.7" +# TODO: F + [profile.release] lto = true panic = "abort" diff --git a/Makefile.toml b/Makefile.toml index e01af30..7c937ec 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -55,12 +55,15 @@ fn main() -> std::io::Result<()> { ''' ] -[tasks.build] -dependencies = ["build-examples-debug", "build-examples-release", "pack-roms"] - [tasks.test] command = "cargo" args = ["test", "--lib"] +[tasks.justrelease] +dependencies = ["build-examples-release", "pack-roms"] + +[tasks.build-all] +dependencies = ["build-examples-debug", "build-examples-release", "pack-roms"] + [tasks.default] -alias = "build" +alias = "build-all" diff --git a/book/src-bak/ch01/hello1.md b/book/src-bak/ch01/hello1.md deleted file mode 100644 index 561f77e..0000000 --- a/book/src-bak/ch01/hello1.md +++ /dev/null @@ -1,115 +0,0 @@ - - -## A basic hello1 explanation - -So, what just happened? Even if you're used to Rust that might look pretty -strange. We'll go over most of the little parts right here, and then bigger -parts will get their own sections. - -```rust -#![feature(start)] -``` - -This enables the [start -feature](https://doc.rust-lang.org/beta/unstable-book/language-features/start.html), -which you would normally be able to read about in the unstable book, except that -the book tells you nothing at all except to look at the [tracking -issue](https://github.com/rust-lang/rust/issues/29633). - -Basically, a GBA game is even more low-level than the _normal_ amount of -low-level that you get from Rust, so we have to tell the compiler to account for -that by specifying a `#[start]`, and we need this feature on to do that. - -```rust -#![no_std] -``` - -There's no standard library available on the GBA, so we'll have to live a core -only life. - -```rust -#[panic_handler] -fn panic(_info: &core::panic::PanicInfo) -> ! { - loop {} -} -``` - -This sets our [panic -handler](https://doc.rust-lang.org/nightly/nomicon/panic-handler.html). -Basically, if we somehow trigger a panic, this is where the program goes. -However, right now we don't know how to get any sort of message out to the user -so... we do nothing at all. We _can't even return_ from here, so we just sit in -an infinite loop. The player will have to reset the universe from the outside. - -```rust -#[start] -fn main(_argc: isize, _argv: *const *const u8) -> isize { -``` - -This is our `#[start]`. We call it `main`, but it's not like a `main` that you'd -see in a Rust program. It's _more like_ the sort of `main` that you'd see in a C -program, but it's still **not** that either. If you compile a `#[start]` program -for a target with an OS such as `arm-none-eabi-nm` you can open up the debug -info and see that your result will have the symbol for the C `main` along side -the symbol for the start `main` that we write here. Our start `main` is just its -own unique thing, and the inputs and outputs have to be like that because that's -how `#[start]` is specified to work in Rust. - -If you think about it for a moment you'll probably realize that, those inputs -and outputs are totally useless to us on a GBA. There's no OS on the GBA to call -our program, and there's no place for our program to "return to" when it's done. - -Side note: if you want to learn more about stuff "before main gets called" you -can watch a great [CppCon talk](https://www.youtube.com/watch?v=dOfucXtyEsU) by -Matt Godbolt (yes, that Godbolt) where he delves into quite a bit of it. The -talk doesn't really apply to the GBA, but it's pretty good. - -```rust - unsafe { -``` - -I hope you're all set for some `unsafe`, because there's a lot of it to be had. - -```rust - (0x04000000 as *mut u16).write_volatile(0x0403); -``` - -Sure! - -```rust - (0x06000000 as *mut u16).offset(120 + 80 * 240).write_volatile(0x001F); - (0x06000000 as *mut u16).offset(136 + 80 * 240).write_volatile(0x03E0); - (0x06000000 as *mut u16).offset(120 + 96 * 240).write_volatile(0x7C00); -``` - -Ah, of course. - -```rust - loop {} - } -} -``` - -And, as mentioned above, there's no place for a GBA program to "return to", so -we can't ever let `main` try to return there. Instead, we go into an infinite -`loop` that does nothing. The fact that this doesn't ever return an `isize` -value doesn't seem to bother Rust, because I guess we're at least not returning -any other type of thing instead. - -Fun fact: unlike in C++, an infinite loop with no side effects isn't Undefined -Behavior for us rustaceans... _semantically_. In truth LLVM has a [known -bug](https://github.com/rust-lang/rust/issues/28728) in this area, so we won't -actually be relying on empty loops in any future programs. - -## All Those Magic Numbers - -Alright, I cheated quite a bit in the middle there. The program works, but I -didn't really tell you why because I didn't really tell you what any of those -magic numbers mean or do. - -* `0x04000000` is the address of an IO Register called the Display Control. -* `0x06000000` is the start of Video RAM. - -So we write some magic to the display control register once, then we write some -other magic to three magic locations in the Video RAM. Somehow that shows three -dots. Gotta read on to find out why! diff --git a/book/src-bak/ch01/hello2.md b/book/src-bak/ch01/hello2.md deleted file mode 100644 index 7c991d4..0000000 --- a/book/src-bak/ch01/hello2.md +++ /dev/null @@ -1,132 +0,0 @@ -# hello2 - -Okay so let's have a look again: - -`hello1` - -```rust -#![feature(start)] -#![no_std] - -#[panic_handler] -fn panic(_info: &core::panic::PanicInfo) -> ! { - loop {} -} - -#[start] -fn main(_argc: isize, _argv: *const *const u8) -> isize { - unsafe { - (0x04000000 as *mut u16).write_volatile(0x0403); - (0x06000000 as *mut u16).offset(120 + 80 * 240).write_volatile(0x001F); - (0x06000000 as *mut u16).offset(136 + 80 * 240).write_volatile(0x03E0); - (0x06000000 as *mut u16).offset(120 + 96 * 240).write_volatile(0x7C00); - loop {} - } -} -``` - -Now let's clean this up so that it's clearer what's going on. - -First we'll label that display control stuff, including using the `VolatilePtr` -type from the volatile explanation: - -```rust -pub const DISPCNT: VolatilePtr = VolatilePtr(0x04000000 as *mut u16); -pub const MODE3: u16 = 3; -pub const BG2: u16 = 0b100_0000_0000; -``` - -Next we make some const values for the actual pixel drawing - -```rust -pub const VRAM: usize = 0x06000000; -pub const SCREEN_WIDTH: isize = 240; -``` - -Note that VRAM has to be interpreted in different ways depending on mode, so we -just leave it as `usize` and we'll cast it into the right form closer to the -actual use. - -Next we want a small helper function for putting together a color value. -Happily, this one can even be declared as a `const` function. At the time of -writing, we've got the "minimal const fn" support in nightly. It really is quite -limited, but I'm happy to let rustc and LLVM pre-compute as much as they can -when it comes to the GBA's tiny CPU. - -```rust -pub const fn rgb16(red: u16, green: u16, blue: u16) -> u16 { - blue << 10 | green << 5 | red -} -``` - -Finally, we'll make a function for drawing a pixel in Mode 3. Even though it's -just a one-liner, having the "important parts" be labeled as function arguments -usually helps you think about it a lot better. - -```rust -pub unsafe fn mode3_pixel(col: isize, row: isize, color: u16) { - VolatilePtr(VRAM as *mut u16).offset(col + row * SCREEN_WIDTH).write(color); -} -``` - -So now we've got this: - -`hello2` - -```rust -#![feature(start)] -#![no_std] - -#[panic_handler] -fn panic(_info: &core::panic::PanicInfo) -> ! { - loop {} -} - -#[start] -fn main(_argc: isize, _argv: *const *const u8) -> isize { - unsafe { - DISPCNT.write(MODE3 | BG2); - mode3_pixel(120, 80, rgb16(31, 0, 0)); - mode3_pixel(136, 80, rgb16(0, 31, 0)); - mode3_pixel(120, 96, rgb16(0, 0, 31)); - loop {} - } -} - -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] -#[repr(transparent)] -pub struct VolatilePtr(pub *mut T); -impl VolatilePtr { - pub unsafe fn read(&self) -> T { - core::ptr::read_volatile(self.0) - } - pub unsafe fn write(&self, data: T) { - core::ptr::write_volatile(self.0, data); - } - pub unsafe fn offset(self, count: isize) -> Self { - VolatilePtr(self.0.wrapping_offset(count)) - } -} - -pub const DISPCNT: VolatilePtr = VolatilePtr(0x04000000 as *mut u16); -pub const MODE3: u16 = 3; -pub const BG2: u16 = 0b100_0000_0000; - -pub const VRAM: usize = 0x06000000; -pub const SCREEN_WIDTH: isize = 240; - -pub const fn rgb16(red: u16, green: u16, blue: u16) -> u16 { - blue << 10 | green << 5 | red -} - -pub unsafe fn mode3_pixel(col: isize, row: isize, color: u16) { - VolatilePtr(VRAM as *mut u16).offset(col + row * SCREEN_WIDTH).write(color); -} -``` - -Exact same program that we started with, but much easier to read. - -Of course, in the full `gba` crate that this book is a part of we have these and -other elements all labeled and sorted out for you (not identically, but -similarly). Still, for educational purposes it's often best to do it yourself at -least once. diff --git a/book/src-bak/ch01/index.md b/book/src-bak/ch01/index.md deleted file mode 100644 index 7c21c79..0000000 --- a/book/src-bak/ch01/index.md +++ /dev/null @@ -1,10 +0,0 @@ -# Ch 1: Hello GBA - -Traditionally a person writes a "hello, world" program so that they can test -that their development environment is setup properly and to just get a feel for -using the tools involved. To get an idea of what a small part of a source file -will look like. All that stuff. - -Normally, you write a program that prints "hello, world" to the terminal. The -GBA has no terminal, but it does have a screen, so instead we're going to draw -three dots to the screen. diff --git a/book/src-bak/ch01/volatile.md b/book/src-bak/ch01/volatile.md deleted file mode 100644 index 940e05b..0000000 --- a/book/src-bak/ch01/volatile.md +++ /dev/null @@ -1,70 +0,0 @@ -# Volatile - -Before we focus on what the numbers mean, first let's ask ourselves: Why are we -doing _volatile_ writes? You've probably never used that keywords before at all. -What _is_ volatile anyway? - -Well, the optimizer is pretty aggressive, and so it'll skip reads and writes -when it thinks can. Like if you write to a pointer once, and then again a moment -later, and it didn't see any other reads in between, it'll think that it can -just skip doing that first write since it'll get overwritten anyway. Sometimes -that's correct, but sometimes it's not. - -Marking a read or write as _volatile_ tells the compiler that it really must do -that action, and in the exact order that we wrote it out. It says that there -might even be special hardware side effects going on that the compiler isn't -aware of. In this case, the write to the display control register sets a video -mode, and the writes to the Video RAM set pixels that will show up on the -screen. - -Similar to "atomic" operations you might have heard about, all volatile -operations are enforced to happen in the exact order that you specify them, but -only relative to other volatile operations. So something like - -```rust -c.write_volatile(5); -a += b; -d.write_volatile(7); -``` - -might end up changing `a` either before or after the change to `c` (since the -value of `a` doesn't affect the write to `c`), but the write to `d` will -_always_ happen after the write to `c`, even though the compiler doesn't see any -direct data dependency there. - -If you ever go on to use volatile stuff on other platforms it's important to -note that volatile doesn't make things thread-safe, you still need atomic for -that. However, the GBA doesn't have threads, so we don't have to worry about -those sorts of thread safety concerns (there's interrupts, but that's another -matter). - -## Volatile by default - -Of course, writing out `volatile_write` every time is more than we wanna do. -There's clarity and then there's excessive. This is a chance to write our first -[newtype](https://doc.rust-lang.org/1.0.0/style/features/types/newtype.html). -Basically a type that's got the exact same binary representation as some other -type, but new methods and trait implementations. - -We want a `*mut T` that's volatile by default, and also when we offset it... -well the verdict is slightly unclear on how `offset` vs `wrapping_offset` work -when you're using pointers that you made up out of nowhere. I've asked the -experts and they genuinely weren't sure, so we'll make an `offset` method that -does a `wrapping_offset` just to be careful. - -```rust -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] -#[repr(transparent)] -pub struct VolatilePtr(pub *mut T); -impl VolatilePtr { - pub unsafe fn read(&self) -> T { - core::ptr::read_volatile(self.0) - } - pub unsafe fn write(&self, data: T) { - core::ptr::write_volatile(self.0, data); - } - pub unsafe fn offset(self, count: isize) -> Self { - VolatilePtr(self.0.wrapping_offset(count)) - } -} -``` diff --git a/book/src-bak/ch02/index.md b/book/src-bak/ch02/index.md deleted file mode 100644 index 1263ced..0000000 --- a/book/src-bak/ch02/index.md +++ /dev/null @@ -1,22 +0,0 @@ -# Ch 2: User Input - -It's all well and good to draw three pixels, but they don't do anything yet. We -want them to do something, and for that we need to get some input from the user. - -The GBA, as I'm sure you know, has an arrow pad, A and B, L and R, Start and -Select. That's a little more than the NES/GB/CGB had, and a little less than the -SNES had. As you can guess, we get key state info from an IO register. - -Also, we will need a way to keep the program from running "too fast". On a -modern computer or console you do this with vsync info from the GPU and Monitor, -and on the GBA we'll be using vsync info from an IO register that tracks what -the display hardware is doing. - -As a way to apply our knowledge We'll make a simple "light cycle" game where -your dot leaves a trail behind them and you die if you go off the screen or if -you touch your own trail. We just make a copy of `hello2.rs` named -`light_cycle.rs` and then fill it in as we go through the chapter. Normally you -might not place the entire program into a single source file, particularly as it -grows over time, but since these are small examples it's much better to have -them be completely self contained than it is to have them be "properly -organized" for the long term. diff --git a/book/src-bak/ch03/gba_memory_mapping.md b/book/src-bak/ch03/gba_memory_mapping.md deleted file mode 100644 index f3c4e8a..0000000 --- a/book/src-bak/ch03/gba_memory_mapping.md +++ /dev/null @@ -1,256 +0,0 @@ -# GBA Memory Mapping - -The [GBA Memory Map](http://problemkaputt.de/gbatek.htm#gbamemorymap) has -several memory portions to it, each with their own little differences. Most of -the memory has pre-determined use according to the hardware, but there is also -space for games to use as a scratch pad in whatever way the game sees fit. - -The memory ranges listed here are _inclusive_, so they end with a lot of F's -and E's. - -We've talked about volatile memory before, but just as a reminder I'll say that -all of the memory we'll talk about here should be accessed using volatile with -two exceptions: - -1) Work RAM (both internal and external) can be used normally, and if the - compiler is able to totally elide some reads and writes that's okay. -2) However, if you set aside any space in Work RAM where an interrupt will - communicate with the main program then that specific location will have to - keep using volatile access, since the compiler never knows when an interrupt - will actually happen. - -## BIOS / System ROM - -* `0x0` to `0x3FFF` (16k) - -This is special memory for the BIOS. It is "read-only", but even then it's only -accessible when the program counter is pointing into the BIOS region. At all -other times you get a [garbage -value](http://problemkaputt.de/gbatek.htm#gbaunpredictablethings) back when you -try to read out of the BIOS. - -## External Work RAM / EWRAM - -* `0x2000000` to `0x203FFFF` (256k) - -This is a big pile of space, the use of which is up to each game. However, the -external work ram has only a 16-bit bus (if you read/write a 32-bit value it -silently breaks it up into two 16-bit operations) and also 2 wait cycles (extra -CPU cycles that you have to expend _per 16-bit bus use_). - -It's most helpful to think of EWRAM as slower, distant memory, similar to the -"heap" in a normal application. You can take the time to go store something -within EWRAM, or to load it out of EWRAM, but if you've got several operations -to do in a row and you're worried about time you should pull that value into -local memory, work on your local copy, and then push it back out to EWRAM. - -## Internal Work RAM / IWRAM - -* `0x3000000` to `0x3007FFF` (32k) - -This is a smaller pile of space, but it has a 32-bit bus and no wait. - -By default, `0x3007F00` to `0x3007FFF` is reserved for interrupt and BIOS use. -The rest of it is totally up to you. The user's stack space starts at -`0x3007F00` and proceeds _down_ from there. For best results you should probably -start at `0x3000000` and then go upwards. Under normal use it's unlikely that -the two memory regions will crash into each other. - -## IO Registers - -* `0x4000000` to `0x40003FE` - -We've touched upon a few of these so far, and we'll get to more later. At the -moment it is enough to say that, as you might have guessed, all of them live in -this region. Each individual register is a `u16` or `u32` and they control all -sorts of things. We'll actually be talking about some more of them in this very -chapter, because that's how we'll control some of the background and object -stuff. - -## Palette RAM / PALRAM - -* `0x5000000` to `0x50003FF` (1k) - -Palette RAM has a 16-bit bus, which isn't really a problem because it -conceptually just holds `u16` values. There's no automatic wait state, but if -you try to access the same location that the display controller is accessing you -get bumped by 1 cycle. Since the display controller can use the palette ram any -number of times per scanline it's basically impossible to predict if you'll have -to do a wait or not during VDraw. During VBlank you won't have any wait of -course. - -PALRAM is among the memory where there's weirdness if you try to write just one -byte: if you try to write just 1 byte, it writes that byte into _both_ parts of -the larger 16-bit location. This doesn't really affect us much with PALRAM, -because palette values are all supposed to be `u16` anyway. - -The palette memory actually contains not one, but _two_ sets of palettes. First -there's 256 entries for the background palette data (starting at `0x5000000`), -and then there's 256 entries for object palette data (starting at `0x5000200`). - -The GBA also has two modes for palette access: 8-bits-per-pixel (8bpp) and -4-bits-per-pixel (4bpp). - -* In 8bpp mode an 8-bit palette index value within a background or sprite - simply indexes directly into the 256 slots for that type of thing. -* In 4bpp mode a 4-bit palette index value within a background or sprite - specifies an index within a particular "palbank" (16 palette entries each), - and then a _separate_ setting outside of the graphical data determines which - palbank is to be used for that background or object (the screen entry data for - backgrounds, and the object attributes for objects). - -### Transparency - -When a pixel within a background or object specifies index 0 as its palette -entry it is treated as a transparent pixel. This means that in 8bpp mode there's -only 255 actual color options (0 being transparent), and in 4bpp mode there's -only 15 actual color options available within each palbank (the 0th entry of -_each_ palbank is transparent). - -Individual backgrounds, and individual objects, each determine if they're 4bpp -or 8bpp separately, so a given overall palette slot might map to a used color in -8bpp and an unused/transparent color in 4bpp. If you're a palette wizard. - -Palette slot 0 of the overall background palette is used to determine the -"backdrop" color. That's the color you see if no background or object ends up -being rendered within a given pixel. - -Since display mode 3 and display mode 5 don't use the palette, they cannot -benefit from transparency. - -## Video RAM / VRAM - -* `0x6000000` to `0x6017FFF` (96k) - -We've used this before! VRAM has a 16-bit bus and no wait. However, the same as -with PALRAM, the "you might have to wait if the display controller is looking at -it" rule applies here. - -Unfortunately there's not much more exact detail that can be given about VRAM. -The use of the memory depends on the video mode that you're using. - -One general detail of note is that you can't write individual bytes to any part -of VRAM. Depending on mode and location, you'll either get your bytes doubled -into both the upper and lower parts of the 16-bit location targeted, or you -won't even affect the memory. This usually isn't a big deal, except in two -situations: - -* In Mode 4, if you want to change just 1 pixel, you'll have to be very careful - to read the old `u16`, overwrite just the byte you wanted to change, and then - write that back. -* In any display mode, avoid using `memcopy` to place things into VRAM. - It's written to be byte oriented, and only does 32-bit transfers under select - conditions. The rest of the time it'll copy one byte at a time and you'll get - either garbage or nothing at all. - -## Object Attribute Memory / OAM - -* `0x7000000` to `0x70003FF` (1k) - -The Object Attribute Memory has a 32-bit bus and no default wait, but suffers -from the "you might have to wait if the display controller is looking at it" -rule. You cannot write individual bytes to OAM at all, but that's not really a -problem because all the fields of the data types within OAM are either `i16` or -`u16` anyway. - -Object attribute memory is the wildest yet: it conceptually contains two types -of things, but they're _interlaced_ with each other all the way through. - -Now, [GBATEK](http://problemkaputt.de/gbatek.htm#lcdobjoamattributes) and -[CowByte](https://www.cs.rit.edu/~tjh8300/CowBite/CowBiteSpec.htm#OAM%20(sprites)) -doesn't quite give names to the two data types here. -[TONC](https://www.coranac.com/tonc/text/regobj.htm#sec-oam) calls them -`OBJ_ATTR` and `OBJ_AFFINE`, but we'll be giving them names fitting with the -Rust naming convention. Just know that if you try to talk about it with others -they might not be using the same names. In Rust terms their layout would look -like this: - -```rust -#[repr(C)] -pub struct ObjectAttributes { - attr0: u16, - attr1: u16, - attr2: u16, - filler: i16, -} - -#[repr(C)] -pub struct AffineMatrix { - filler0: [u16; 3], - pa: i16, - filler1: [u16; 3], - pb: i16, - filler2: [u16; 3], - pc: i16, - filler3: [u16; 3], - pd: i16, -} -``` - -(Note: the `#[repr(C)]` part just means that Rust must lay out the data exactly -in the order we specify, which otherwise it is not required to do). - -So, we've got 1024 bytes in OAM and each `ObjectAttributes` value is 8 bytes, so -naturally we can support up to 128 objects. - -_At the same time_, we've got 1024 bytes in OAM and each `AffineMatrix` is 32 -bytes, so we can have 32 of them. - -But, as I said, these things are all _interlaced_ with each other. See how -there's "filler" fields in each struct? If we imagine the OAM as being just an -array of one type or the other, indexes 0/1/2/3 of the `ObjectAttributes` array -would line up with index 0 of the `AffineMatrix` array. It's kinda weird, but -that's just how it works. When we setup functions to read and write these values -we'll have to be careful with how we do it. We probably _won't_ want to use -those representations above, at least not with the `AffineMatrix` type, because -they're quite wasteful if you want to store just object attributes or just -affine matrices. - -## Game Pak ROM / Flash ROM - -* `0x8000000` to `0x9FFFFFF` (wait 0) -* `0xA000000` to `0xBFFFFFF` (wait 1) -* `0xC000000` to `0xDFFFFFF` (wait 2) -* Max of 32Mb - -These portions of the memory are less fixed, because they depend on the precise -details of the game pak you've inserted into the GBA. In general, they connect -to the game pak ROM and/or Flash memory, using a 16-bit bus. The ROM is -read-only, but the Flash memory (if any) allows writes. - -The game pak ROM is listed as being in three sections, but it's actually the -same memory being effectively mirrored into three different locations. The -mirror that you choose to access the game pak through affects which wait state -setting it uses (configured via IO register of course). Unfortunately, the -details come down more to the game pak hardware that you load your game onto -than anything else, so there's not much I can say right here. We'll eventually -talk about it more later when I'm forced to do the boring thing and just cover -all the IO registers that aren't covered anywhere else. - -One thing of note is the way that the 16-bit bus affects us: the instructions to -execute are coming through the same bus as the rest of the game data, so we want -them to be as compact as possible. The ARM chip in the GBA supports two -different instruction sets, "thumb" and "non-thumb". The thumb mode instructions -are 16-bit, so they can each be loaded one at a time, and the non-thumb -instructions are 32-bit, so we're at a penalty if we execute them directly out -of the game pak. However, some things will demand that we use non-thumb code, so -we'll have to deal with that eventually. It's possible to switch between modes, -but it's a pain to keep track of what mode you're in because there's not -currently support for it in Rust itself (perhaps some day). So we'll stick with -thumb code as much as we possibly can, that's why our target profile for our -builds starts with `thumbv4`. - -## Game Pak SRAM - -* `0xE000000` to `0xE00FFFF` (64k) - -The game pak SRAM has an 8-bit bus. Why did Pokémon always take so long to save? -Saving the whole game one byte at a time is why. The SRAM also has some amount -of wait, but as with the ROM, the details depend on your game pak hardware (and -also as with ROM, you can adjust the settings with an IO register, should you -need to). - -One thing to note about the SRAM is that the GBA has a Direct Memory Access -(DMA) feature that can be used for bulk memory movements in some cases, but the -DMA _cannot_ access the SRAM region. You really are stuck reading and writing -one byte at a time when you're using the SRAM. diff --git a/book/src/00-introduction/05-help_and_resources.md b/book/src/00-introduction/05-help_and_resources.md index 0d9fc8e..59a51f5 100644 --- a/book/src/00-introduction/05-help_and_resources.md +++ b/book/src/00-introduction/05-help_and_resources.md @@ -26,9 +26,19 @@ available while you're debugging problems. ## Information Resources -Ketsuban and I didn't magically learn this all from nowhere, we read various -technical manuals and guides ourselves and then distilled the knowledge (usually -oriented towards C and C++) into this book for Rust. +First, if I fail to describe something related to Rust, you can always try +checking in [The Rust +Reference](https://doc.rust-lang.org/nightly/reference/introduction.html) to see +if they cover it. You can mostly ignore that big scary red banner at the top, +things are a lot better documented than they make it sound. + +If you need help trying to fiddle your math down as hard as you can, there are +resources such as the [Bit Twiddling +Hacks](https://graphics.stanford.edu/~seander/bithacks.html) page. + +As to GBA related lore, Ketsuban and I didn't magically learn this all from +nowhere, we read various technical manuals and guides ourselves and then +distilled those works oriented around C and C++ into a book for Rust. We have personally used some or all of the following: diff --git a/book/src/01-quirks/01-no_std.md b/book/src/01-quirks/01-no_std.md index 13faa72..44fa757 100644 --- a/book/src/01-quirks/01-no_std.md +++ b/book/src/01-quirks/01-no_std.md @@ -89,10 +89,6 @@ the standard library types to be used "for free" once it was set up, or just a custom allocator that's GBA specific if Rust's global allocator style isn't a good fit for the GBA (I honestly haven't looked into it). -## LLVM Intrinsics - -TODO: explain that we'll occasionally have to provide some intrinsics. - ## Bare Metal Panic TODO: expand this @@ -114,3 +110,10 @@ TODO: expand this * Sending the message also automatically zeroes the output buffer. * View the output within the "Tools" menu, "View Logs...". Note that the Fatal message, if any doesn't get logged. + +TODO: this will probably fail without a `__clzsi2` implementation, which is a +good seg for the next section + +## LLVM Intrinsics + +TODO: explain that we'll occasionally have to provide some intrinsics. diff --git a/book/src/01-quirks/02-fixed_only.md b/book/src/01-quirks/02-fixed_only.md index 49e507a..bb0e401 100644 --- a/book/src/01-quirks/02-fixed_only.md +++ b/book/src/01-quirks/02-fixed_only.md @@ -1,13 +1,548 @@ # Fixed Only -In addition to not having the standard library available, we don't even have a -floating point unit available! We can't do floating point math in hardware! We -could still do floating point math as software computations if we wanted, but -that's a slow, slow thing to do. +In addition to not having much of the standard library available, we don't even +have a floating point unit available! We can't do floating point math in +hardware! We _could_ still do floating point math as pure software computations +if we wanted, but that's a slow, slow thing to do. -Instead let's learn about another way to have fractional values called "Fixed -Point" +Are there faster ways? It's the same answer as always: "Yes, but not without a +tradeoff." -## Fixed Point +The faster way is to represent fractional values using a system called a [Fixed +Point Representation](https://en.wikipedia.org/wiki/Fixed-point_arithmetic). +What do we trade away? Numeric range. -TODO: describe fixed point, make some types, do the impls, all that. +* Floating point math stores bits for base value and for exponent all according + to a single [well defined](https://en.wikipedia.org/wiki/IEEE_754) standard + for how such a complicated thing works. +* Fixed point math takes a normal integer (either signed or unsigned) and then + just "mentally associates" it (so to speak) with a fractional value for its + "units". If you have 3 and it's in units of 1/2, then you have 3/2, or 1.5 + using decimal notation. If your number is 256 and it's in units of 1/256th + then the value is 1.0 in decimal notation. + +Floating point math requires dedicated hardware to perform quickly, but it can +"trade" precision when it needs to represent extremely large or small values. + +Fixed point math is just integral math, which our GBA is reasonably good at, but +because your number is associated with a fixed fraction your results can get out +of range very easily. + +## Representing A Fixed Point Value + +So we want to associate our numbers with a mental note of what units they're in: + +* [PhantomData](https://doc.rust-lang.org/core/marker/struct.PhantomData.html) + is a type that tells the compiler "please remember this extra type info" when + you add it as a field to a struct. It goes away at compile time, so it's + perfect for us to use as space for a note to ourselves without causing runtime + overhead. +* The [typenum](https://crates.io/crates/typenum) crate is the best way to + represent a number within a type in Rust. Since our values on the GBA are + always specified as a number of fractional bits to count the number as, we can + put `typenum` types such as `U8` or `U14` into our `PhantomData` to keep track + of what's going on. + +Now, those of you who know me, or perhaps just know my reputation, will of +course _immediately_ question what happened to the real Lokathor. I do not care +for most crates, and I particularly don't care for using a crate in teaching +situations. However, `typenum` has a number of factors on its side that let me +suggest it in this situation: + +* It's version 1.10 with a total of 21 versions and nearly 700k downloads, so we + can expect that the major troubles have been shaken out and that it will remain + fairly stable for quite some time to come. +* It has no further dependencies that it's going to drag into the compilation. +* It happens all at compile time, so it's not clogging up our actual game with + any nonsense. +* The (interesting) subject of "how do you do math inside Rust's trait system?" is + totally separate from the concern that we're trying to focus on here. + +Therefore, we will consider it acceptable to use this crate. + +Now the `typenum` crate defines a whole lot, but we'll focus down to just a +single type at the moment: +[UInt](https://docs.rs/typenum/1.10.0/typenum/uint/struct.UInt.html) is a +type-level unsigned value. It's like `u8` or `u16`, but while they're types that +then have values, each `UInt` construction statically equates to a specific +value. Like how the `()` type only has one value, which is also called `()`. In +this case, you wrap up `UInt` around smaller `UInt` values and a `B1` or `B0` +value to build up the binary number that you want at the type level. + +In other words, instead of writing + +```rust +let six = 0b110; +``` + +We write + +```rust +type U6 = UInt, B1>, B0>; +``` + +Wild, I know. If you look into the `typenum` crate you can do math and stuff +with these type level numbers, and we will a little bit below, but to start off +we _just_ need to store one in some `PhantomData`. + +### A struct For Fixed Point + +Our actual type for a fixed point value looks like this: + +```rust +use core::marker::PhantomData; +use typenum::marker_traits::Unsigned; + +/// Fixed point `T` value with `F` fractional bits. +#[derive(Debug, Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct Fx { + bits: T, + _phantom: PhantomData, +} +``` + +This says that `Fx` is a generic type that holds some base number type `T` +and a `F` type that's marking off how many fractional bits we're using. We only +want people giving unsigned type-level values for the `PhantomData` type, so we +use the trait bound `F: Unsigned`. + +We use +[repr(transparent)](https://github.com/rust-lang/rfcs/blob/master/text/1758-repr-transparent.md) +here to ensure that `Fx` will always be treated just like the base type in the +final program (in terms of bit pattern and ABI). + +If you go and check, this is _basically_ how the existing general purpose crates +for fixed point math represent their numbers. They're a little fancier about it +because they have to cover every case, and we only have to cover our GBA case. + +That's quite a bit to type though. We probably want to make a few type aliases +for things to be easier to look at. Unfortunately there's [no standard +notation](https://en.wikipedia.org/wiki/Fixed-point_arithmetic#Notation) for how +you write a fixed point type. We also have to limit ourselves to what's valid +for use in a Rust type too. I like the `fx` thing, so we'll use that for signed +and then `fxu` if we need an unsigned value. + +```rust +/// Alias for an `i16` fixed point value with 8 fractional bits. +pub type fx8_8 = Fx; +``` + +Rust will complain about having `non_camel_case_types`, and you can shut that +warning up by putting an `#[allow(non_camel_case_types)]` attribute on the type +alias directly, or you can use `#![allow(non_camel_case_types)]` at the very top +of the module to shut up that warning for the whole module (which is what I +did). + +## Constructing A Fixed Point Value + +So how do we actually _make_ one of these values? Well, we can always just wrap or unwrap any value in our `Fx` type: + +```rust +impl Fx { + /// Uses the provided value directly. + pub fn from_raw(r: T) -> Self { + Fx { + num: r, + phantom: PhantomData, + } + } + /// Unwraps the inner value. + pub fn into_raw(self) -> T { + self.num + } +} +``` + +I'd like to use the `From` trait of course, but it was giving me some trouble, i +think because of the orphan rule. Oh well. + +If we want to be particular to the fact that these are supposed to be +_numbers_... that gets tricky. Rust is actually quite bad at being generic about +number types. You can use the [num](https://crates.io/crates/num) crate, or you +can just use a macro and invoke it once per type. Guess what we're gonna do. + +```rust +macro_rules! fixed_point_methods { + ($t:ident) => { + impl Fx<$t, F> { + /// Gives the smallest positive non-zero value. + pub fn precision() -> Self { + Fx { + num: 1, + phantom: PhantomData, + } + } + + /// Makes a value with the integer part shifted into place. + pub fn from_int_part(i: $t) -> Self { + Fx { + num: i << F::U8, + phantom: PhantomData, + } + } + } + }; +} + +fixed_point_methods! {u8} +fixed_point_methods! {i8} +fixed_point_methods! {i16} +fixed_point_methods! {u16} +fixed_point_methods! {i32} +fixed_point_methods! {u32} +``` + +Now _you'd think_ that those can be `const`, but at the moment you can't have a +`const` function with a bound on any trait other than `Sized`, so they have to +be normal functions. + +Also, we're doing something a little interesting there with `from_int_part`. We +can take our `F` type and get its constant value. There's other associated +constants if we want it in other types, and also non-const methods if you wanted +that for some reason (maybe passing it as a closure function? dunno). + +## Casting Base Values + +Next, once we have a value in one base type we will need to be able to move it +into another base type. Unfortunately this means we gotta use the `as` operator, +which requires a concrete source type and a concrete destination type. There's +no easy way for us to make it generic here. + +We could let the user use `into_raw`, cast, and then do `from_raw`, but that's +error prone because they might change the fractional bit count accidentally. +This means that we have to write a function that does the casting while +perfectly preserving the fractional bit quantity. If we wrote one function for +each conversion it'd be like 30 different possible casts (6 base types that we +support, and then 5 possible target types). Instead, we'll write it just once in +a way that takes a closure, and let the user pass a closure that does the cast. +The compiler should merge it all together quite nicely for us once optimizations +kick in. + +This code goes outside the macro. I want to avoid too much code in the macro if +we can, it's a little easier to cope with I think. + +```rust + /// Casts the base type, keeping the fractional bit quantity the same. + pub fn cast_inner Z>(self, op: C) -> Fx { + Fx { + num: op(self.num), + phantom: PhantomData, + } + } +``` + +It's horrible and ugly, but Rust is just bad at numbers sometimes. + +## Adjusting Fractional Part + +In addition to the base value we might want to change our fractional bit +quantity. This is actually easier that it sounds, but it also requires us to be +tricky with the generics. We can actually use some typenum type level operators +here. + +This code goes inside the macro: we need to be able to use the left shift and +right shift, which is easiest when we just use the macro's `$t` as our type. We +could alternately put a similar function outside the macro and be generic on `T` +having the left and right shift operators by using a `where` clause. As much as +I'd like to avoid too much code being generated by macro, I'd _even more_ like +to avoid generic code with huge and complicated trait bounds. It comes down to +style, and you gotta decide for yourself. + +```rust + /// Changes the fractional bit quantity, keeping the base type the same. + pub fn adjust_fractional_bits>(self) -> Fx<$t, Y> { + let leftward_movement: i32 = Y::to_i32() - F::to_i32(); + Fx { + num: if leftward_movement > 0 { + self.num << leftward_movement + } else { + self.num >> (-leftward_movement) + }, + phantom: PhantomData, + } + } +``` + +There's a few things at work. First, we introduce `Y` as the target number of +fractional bits, and we _also_ limit it that the target bits quantity can't be +the same as we already have using a type-level operator. If it's the same as we +started with, why are you doing the cast at all? + +Now, once we're sure that the current bits and target bits aren't the same, we +compute `target - start`, and call this our "leftward movement". Example: if +we're targeting 8 bits and we're at 4 bits, we do 8-4 and get +4 as our leftward +movement. If the leftward_movement is positive we naturally shift our current +value to the left. If it's not positive then it _must_ be negative because we +eliminated 0 as a possibility using the type-level operator, so we shift to the +right by the negative value. + +## Addition, Subtraction, Shifting, Negative, Comparisons + +From here on we're getting help from [this blog +post](https://spin.atomicobject.com/2012/03/15/simple-fixed-point-math/) by [Job +Vranish](https://spin.atomicobject.com/author/vranish/), so thank them if you +learn something. + +I might have given away the game a bit with those `derive` traits on our fixed +point type. For a fair number of operations you can use the normal form of the +op on the inner bits as long as the fractional parts have the same quantity. +This includes equality and ordering (which we derived) as well as addition, +subtraction, and bit shifting (which we need to do ourselves). + +This code can go outside the macro, with sufficient trait bounds. + +```rust +impl, F: Unsigned> Add for Fx { + type Output = Self; + fn add(self, rhs: Fx) -> Self::Output { + Fx { + num: self.num + rhs.num, + phantom: PhantomData, + } + } +} +``` + +The bound on `T` makes it so that `Fx` can be added any time that `T` can +be added to its own type with itself as the output. We can use the exact same +pattern for `Sub`, `Shl`, `Shr`, and `Neg`. With enough trait bounds, we can do +anything! + +```rust +impl, F: Unsigned> Sub for Fx { + type Output = Self; + fn sub(self, rhs: Fx) -> Self::Output { + Fx { + num: self.num - rhs.num, + phantom: PhantomData, + } + } +} + +impl, F: Unsigned> Shl for Fx { + type Output = Self; + fn shl(self, rhs: u32) -> Self::Output { + Fx { + num: self.num << rhs, + phantom: PhantomData, + } + } +} + +impl, F: Unsigned> Shr for Fx { + type Output = Self; + fn shr(self, rhs: u32) -> Self::Output { + Fx { + num: self.num >> rhs, + phantom: PhantomData, + } + } +} + +impl, F: Unsigned> Neg for Fx { + type Output = Self; + fn neg(self) -> Self::Output { + Fx { + num: -self.num, + phantom: PhantomData, + } + } +} +``` + +Unfortunately, for `Shl` and `Shr` to have as much coverage on our type as it +does on the base type (allowing just about any right hand side) we'd have to do +another macro, but I think just `u32` is fine. We can always add more later if +we need. + +We could also implement `BitAnd`, `BitOr`, `BitXor`, and `Not`, but they don't +seem relevent to our fixed point math use, and this section is getting long +already. Just use the same general patterns if you want to add it in your own +programs. Shockingly, `Rem` also works directly if you want it, though I don't +forsee us needing floating point remainder. Also, the GBA can't do hardware +division or remainder, and we'll have to work around that below when we +implement `Div` (which maybe we don't need, but it's complex enough I should +show it instead of letting people guess). + +**Note:** In addition to the various `Op` traits, there's also `OpAssign` +variants. Each `OpAssign` is the same as `Op`, but takes `&mut self` instead of +`self` and then modifies in place instead of producing a fresh value. In other +words, if you want both `+` and `+=` you'll need to do the `AddAssign` trait +too. It's not the worst thing to just write `a = a+b`, so I won't bother with +showing all that here. It's pretty easy to figure out for yourself if you want. + +## Multiplication + +This is where things get more interesting. When we have two numbers `A` and `B` +they really stand for `(a*f)` and `(b*f)`. If we write `A*B` then we're really +writing `(a*f)*(b*f)`, which can be rewritten as `(a*b)*2f`, and now it's +obvious that we have one more `f` than we wanted to have. We have to do the +multiply of the inner value and then divide out the `f`. We divide by `1 << +bit_count`, so if we have 8 fractional bits we'll divide by 256. + +The catch is that, when we do the multiply we're _extremely_ likely to overflow +our base type with that multiplication step. Then we do that divide, and now our +result is basically nonsense. We can avoid this to some extent by casting up to +a higher bit type, doing the multiplication and division at higher precision, +and then casting back down. We want as much precision as possible without being +too inefficient, so we'll always cast up to 32-bit (on a 64-bit machine you'd +cast up to 64-bit instead). + +Naturally, any signed value has to be cast up to `i32` and any unsigned value +has to be cast up to `u32`, so we'll have to handle those separately. + +Also, instead of doing an _actual_ divide we can right-shift by the correct +number of bits to achieve the same effect. _Except_ when we have a signed value +that's negative, because actual division truncates towards zero and +right-shifting truncates towards negative infinity. We can get around _this_ by +flipping the sign, doing the shift, and flipping the sign again (which sounds +silly but it's so much faster than doing an actual division). + +Also, again signed values can be annoying, because if the value _just happens_ +to be `i32::MIN` then when you negate it you'll have... _still_ a negative +value. I'm not 100% on this, but I think the correct thing to do at that point +is to give `$t::MIN` as out output num value. + +Did you get all that? Good, because this is involves casting, we will need to +implement it three times, which calls for another macro. + +```rust +macro_rules! fixed_point_signed_multiply { + ($t:ident) => { + impl Mul for Fx<$t, F> { + type Output = Self; + fn mul(self, rhs: Fx<$t, F>) -> Self::Output { + let pre_shift = (self.num as i32).wrapping_mul(rhs.num as i32); + if pre_shift < 0 { + if pre_shift == core::i32::MIN { + Fx { + num: core::$t::MIN, + phantom: PhantomData, + } + } else { + Fx { + num: (-((-pre_shift) >> F::U8)) as $t, + phantom: PhantomData, + } + } + } else { + Fx { + num: (pre_shift >> F::U8) as $t, + phantom: PhantomData, + } + } + } + } + }; +} + +fixed_point_signed_multiply! {i8} +fixed_point_signed_multiply! {i16} +fixed_point_signed_multiply! {i32} + +macro_rules! fixed_point_unsigned_multiply { + ($t:ident) => { + impl Mul for Fx<$t, F> { + type Output = Self; + fn mul(self, rhs: Fx<$t, F>) -> Self::Output { + Fx { + num: ((self.num as u32).wrapping_mul(rhs.num as u32) >> F::U8) as $t, + phantom: PhantomData, + } + } + } + }; +} + +fixed_point_unsigned_multiply! {u8} +fixed_point_unsigned_multiply! {u16} +fixed_point_unsigned_multiply! {u32} +``` + +## Division + +Division is similar to multiplication, but reversed. Which makes sense. This +time `A/B` gives `(a*f)/(b*f)` which is `a/b`, one _less_ `f` than we were +after. + +As with the multiplication version of things, we have to up-cast our inner value +as much a we can before doing the math, to allow for the most precision +possible. + +The snag here is that the GBA has no division or remainder. Instead, the GBA has +a BIOS function you can call to do `i32/i32` division. + +This is a potential problem for us though. If we have some unsigned value, we +need it to fit within the positive space of an `i32` _after the multiply_ so +that we can cast it to `i32`, call the BIOS function that only works on `i32` +values, and cast it back to its actual type. + +* If you have a u8 you're always okay, even with 8 floating bits. +* If you have a u16 you're okay even with a maximum value up to 15 floating + bits, but having a maximum value and 16 floating bits makes it break. +* If you have a u32 you're probably going to be in trouble all the time. + +So... ugh, there's not much we can do about this. For now we'll just have to +suffer some. + +// TODO: find a numerics book that tells us how to do `u32/u32` divisions. + +```rust +macro_rules! fixed_point_signed_division { + ($t:ident) => { + impl Div for Fx<$t, F> { + type Output = Self; + fn div(self, rhs: Fx<$t, F>) -> Self::Output { + let mul_output: i32 = (self.num as i32).wrapping_mul(1 << F::U8); + let divide_result: i32 = crate::bios::div(mul_output, rhs.num as i32); + Fx { + num: divide_result as $t, + phantom: PhantomData, + } + } + } + }; +} + +fixed_point_signed_division! {i8} +fixed_point_signed_division! {i16} +fixed_point_signed_division! {i32} + +macro_rules! fixed_point_unsigned_division { + ($t:ident) => { + impl Div for Fx<$t, F> { + type Output = Self; + fn div(self, rhs: Fx<$t, F>) -> Self::Output { + let mul_output: i32 = (self.num as i32).wrapping_mul(1 << F::U8); + let divide_result: i32 = crate::bios::div(mul_output, rhs.num as i32); + Fx { + num: divide_result as $t, + phantom: PhantomData, + } + } + } + }; +} + +fixed_point_unsigned_division! {u8} +fixed_point_unsigned_division! {u16} +fixed_point_unsigned_division! {u32} +``` + +## Trigonometry + +TODO: look up tables! arcbits! + +## Just Using A Crate + +If, after seeing all that, and seeing that I still didn't even cover every +possible trait impl that you might want for all the possible types... if after +all that you feel too intimidated, then I'll cave a bit on your behalf and +suggest to you that the [fixed](https://crates.io/crates/fixed) crate seems to +be the best crate available for fixed point math. + +_I have not tested its use on the GBA myself_. + +It's just my recommendation from looking at the docs of the various options +available, if you really wanted to just have a crate for it. diff --git a/book/src/01-quirks/04-newtype.md b/book/src/01-quirks/04-newtype.md index 07244c9..f1c4be8 100644 --- a/book/src/01-quirks/04-newtype.md +++ b/book/src/01-quirks/04-newtype.md @@ -1,5 +1,8 @@ # Newtype +TODO: we've already used newtype twice by now (fixed point values and volatile +addresses), so we need to adjust how we start this section. + There's a great Zero Cost abstraction that we'll be using a lot that you might not already be familiar with: we're talking about the "Newtype Pattern"! @@ -27,32 +30,19 @@ cost at compile time. pub struct PixelColor(u16); ``` +TODO: we've already talked about repr(transparent) by now + Ah, except that, as I'm sure you remember from [The Rustonomicon](https://doc.rust-lang.org/nomicon/other-reprs.html#reprtransparent) -(and from [the -RFC](https://github.com/rust-lang/rfcs/blob/master/text/1758-repr-transparent.md) -too, of course), if we have a single field struct that's sometimes different -from having just the bare value, so we should be using `#[repr(transparent)]` -with our newtypes. +(and from the RFC too, of course), if we have a single field struct that's +sometimes different from having just the bare value, so we should be using +`#[repr(transparent)]` with our newtypes. ```rust #[repr(transparent)] pub struct PixelColor(u16); ``` -Ah, and of course we'll need to make it so you can unwrap the value: - -```rust -#[repr(transparent)] -pub struct PixelColor(u16); - -impl From for u16 { - fn from(color: PixelColor) -> u16 { - color.0 - } -} -``` - And then we'll need to do that same thing for _every other newtype we want_. Except there's only two tiny parts that actually differ between newtype @@ -62,7 +52,12 @@ a job for a macro to me! ## Making It A Macro -The most basic version of the macro we want goes like this: +If you're going to do much with macros you should definitely read through [The +Little Book of Rust +Macros](https://danielkeep.github.io/tlborm/book/index.html), but we won't be +doing too much so you can just follow along here a bit if you like. + +The most basic version of a newtype macro starts like this: ```rust #[macro_export] @@ -74,8 +69,39 @@ macro_rules! newtype { } ``` -Except we also want to be able to add attributes (which includes doc comments), -so we upgrade our macro a bit: +The `#[macro_export]` makes it exported by the current module (like `pub` +kinda), and then we have one expansion option that takes an identifier, a `,`, +and then a second identifier. The new name is the outer type we'll be using, and +the old name is the inner type that's being wrapped. You'd use our new macro +something like this: + +```rust +newtype! {PixelColorCurly, u16} + +newtype!(PixelColorParens, u16); + +newtype![PixelColorBrackets, u16]; +``` + +Note that you can invoke the macro with the outermost grouping as any of `()`, +`[]`, or `{}`. It makes no particular difference to the macro. Also, that space +in the first version is kinda to show off that you can put white space in +between the macro name and the grouping if you want. The difference is mostly +style, but there are some rules and considerations here: + +* If you use curly braces then you _must not_ put a `;` after the invocation. +* If you use parentheses or brackets then you _must_ put the `;` at the end. +* Rustfmt cares which you use and formats accordingly: + * Curly brace macro use mostly gets treated like a code block. + * Parentheses macro use mostly gets treated like a function call. + * Bracket macro use mostly gets treated like an array declaration. + +## Upgrade That Macro! + +We also want to be able to add `derive` stuff and doc comments to our newtype. +Within the context of `macro_rules!` definitions these are called "meta". Since +we can have any number of them we wrap it all up in a "zero or more" matcher. +Then our macro looks like this: ```rust #[macro_export] @@ -88,52 +114,44 @@ macro_rules! newtype { } ``` -And we want to automatically add the ability to turn the wrapper type back into -the wrapped type. +So now we can write ```rust -#[macro_export] -macro_rules! newtype { - ($(#[$attr:meta])* $new_name:ident, $old_name:ident) => { - $(#[$attr])* - #[repr(transparent)] - pub struct $new_name($old_name); - - impl From<$new_name> for $old_name { - fn from(x: $new_name) -> $old_name { - x.0 - } - } - }; +newtype! { + /// Color on the GBA gives 5 bits for each channel, the highest bit is ignored. + #[derive(Debug, Clone, Copy)] + PixelColor, u16 } ``` -That seems like enough for all of our examples, so we'll stop there. We could -add more things: - -* Making the `From` impl being optional. We'd have to make the newtype - invocation be more complicated somehow, the user puts ", no-unwrap" after the - inner type declaration or something, or something like that. -* Allowing for more precise visibility controls on the wrapping type and on the - inner field. This would add a lot of line noise, so we'll just always have our - newtypes be `pub`. -* Allowing for generic newtypes, which might sound silly but that we'll actually - see an example of soon enough. To do this you might _think_ that we can change - the `:ident` declarations to `:ty`, but since we're declaring a fresh type not - using an existing type we have to accept it as an `:ident`. The way you get - around this is with a proc-macro, which is a lot more powerful but which also - requires that you write the proc-macro in an entirely other crate that gets - compiled first. We don't need that much power, so for our examples we'll go - with the macro_rules version and just do it by hand in the few cases where we - need a generic newtype. -* Allowing for `Deref` and `DerefMut`, which usually defeats the point of doing - the newtype, but maybe sometimes it's the right thing, so if you were going - for the full industrial strength version with a proc-macro and all you might - want to make that part of your optional add-ons as well the same way you might - want optional `From`. You'd probably want `From` to be "on by default" and - `Deref`/`DerefMut` to be "off by default", but whatever. +And that's about all we'll need for the examples. **As a reminder:** remember that `macro_rules` macros have to appear _before_ they're invoked in your source, so the `newtype` macro will always have to be at the very top of your file, or if you put it in a module within your project you'll need to declare the module before anything that uses it. + +## Potential Homework + +If you wanted to keep going and get really fancy with it, you could potentially +add a lot more: + +* Make a `pub const fn new() -> Self` method that outputs the base value in a + const way. Combine this with builder style "setter" methods that are also + const and you can get the compiler to do quite a bit of the value building + work at compile time. +* Making the macro optionally emit a `From` impl to unwrap it back into the base + type. +* Allow for visibility modifiers to be applied to the inner field and the newly + generated type. +* Allowing for generic newtypes. You already saw the need for this once in the + volatile section. Unfortunately, this particular part gets really tricky if + you're using `macro_rules!`, so you might need to move up to a full + `proc_macro`. Having a `proc_macro` isn't bad except that they have to be + defined in a crate of their own and they're compiled before use. You can't + ever use them in the crate that defines them, so we won't be using them in any + of our single file examples. +* Allowing for optional `Deref` and `DerefMut` of the inner value. This takes + away most all the safety aspect of doing the newtype, but there may be times + for it. As an example, you could make a newtype with a different form of + Display impl that you want to otherwise treat as the base type in all places. diff --git a/book/src/01-quirks/05-const_asserts.md b/book/src/01-quirks/05-const_asserts.md new file mode 100644 index 0000000..21cb201 --- /dev/null +++ b/book/src/01-quirks/05-const_asserts.md @@ -0,0 +1,130 @@ +# Constant Assertions + +Have you ever wanted to assert things _even before runtime_? We all have, of +course. Particularly when the runtime machine is a poor little GBA, we'd like to +have the machine doing the compile handle as much checking as possible. + +Enter the [static assertions](https://docs.rs/static_assertions/) crate, which +provides a way to let you assert on a `const` expression. + +This is an amazing crate that you should definitely use when you can. + +It's written by [Nikolai Vazquez](https://github.com/nvzqz), and they kindly +wrote up a [blog +post](https://nikolaivazquez.com/posts/programming/rust-static-assertions/) that +explains the thinking behind it. + +However, I promised that each example would be single file, and I also promised +to explain what's going on as we go, so we'll briefly touch upon giving an +explanation here. + +## How We Const Assert + +Alright, as it stands (2018-12-15), we can't use `if` in a `const` context. + +Since we can't use `if`, we can't use a normal `assert!`. Some day it will be +possible, and a failed assert at compile time will be a compile error and a +failed assert at run time will be a panic and we'll have a nice unified +programming experience. We can add runtime-only assertions by being a little +tricky with the compiler. + +If we write + +```rust +const ASSERT: usize = 0 - 1; +``` + +that gives a warning, since the math would underflow. We can upgrade that +warning to a hard error: + +```rust +#[deny(const_err)] +const ASSERT: usize = 0 - 1; +``` + +And to make our construction reusable we can enable the +[underscore_const_names](https://github.com/rust-lang/rust/issues/54912) feature +in our program (or library) and then give each such const an underscore for a +name. + +```rust +#![feature(underscore_const_names)] + +#[deny(const_err)] +const _: usize = 0 - 1; +``` + +Now we wrap this in a macro where we give a `bool` expression as input. We +negate the bool then cast it to a `usize`, meaning that `true` negates into +`false`, which becomes `0usize`, and then there's no underflow error. Or if the +input was `false`, it negates into `true`, then becomes `1usize`, and then the +underflow error fires. + +```rust +macro_rules! const_assert { + ($condition:expr) => { + #[deny(const_err)] + #[allow(dead_code)] + const ASSERT: usize = 0 - !$condition as usize; + } +} +``` + +Technically, written like this, the expression can be anything with a +`core::ops::Not` implementation that can also be `as` cast into `usize`. That's +`bool`, but also basically all the other number types. Since we want to ensure +that we get proper looking type errors when things go wrong, we can use +`($condition && true)` to enforce that we get a `bool` (thanks to `Talchas` for +that particular suggestion). + +```rust +macro_rules! const_assert { + ($condition:expr) => { + #[deny(const_err)] + #[allow(dead_code)] + const _: usize = 0 - !($condition && true) as usize; + } +} +``` + +## Asserting Something + +As an example of how we might use a `const_assert`, we'll do a demo with colors. +There's a red, blue, and green channel. We store colors in a `u16` with 5 bits +for each channel. + +```rust +newtype! { + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + Color, u16 +} +``` + +And when we're building a color, we're passing in `u16` values, but they could +be using more than just 5 bits of space. We want to make sure that each channel +is 31 or less, so we can make a color builder that does a `const_assert!` on the +value of each channel. + +```rust +macro_rules! rgb { + ($r:expr, $g:expr, $b:expr) => { + { + const_assert!($r <= 31); + const_assert!($g <= 31); + const_assert!($b <= 31); + Color($b << 10 | $g << 5 | $r) + } + } +} +``` + +And then we can declare some colors + +```rust +const RED: Color = rgb!(31, 0, 0); + +const BLUE: Color = rgb!(31, 500, 0); +``` + +The second one is clearly out of bounds and it fires an error just like we +wanted. diff --git a/book/src/02-concepts/00-index.md b/book/src/02-concepts/00-index.md index 864e1ff..10fe20c 100644 --- a/book/src/02-concepts/00-index.md +++ b/book/src/02-concepts/00-index.md @@ -1 +1,38 @@ # Broad Concepts + +The GameBoy Advance sits in a middle place between the chthonic game consoles of +the ancient past and the "small PC in a funny case" consoles of the modern age. + +On the one hand, yeah, you're gonna find a few strange conventions as you learn +all the ropes. + +On the other, at least we're writing in Rust at all, and not having to do all +the assembly by hand. + +This chapter for "concepts" has a section for each part of the GBA's hardware +memory map, going by increasing order of base address value. The sections try to +explain as much as possible while sticking to just the concerns you might have +regarding that part of the memory map. + +For an assessment of how to wrangle all three parts of the video system (PALRAM, +VRAM, and OAM), along with the correct IO registers, into something that shows a +picture, you'll want the Video chapter. + +Similarly, the "IO Registers" part of the GBA actually controls how you interact +with every single bit of hardware connected to the GBA. A full description of +everything is obviously too much for just one section of the book. Instead you +get an overview of general IO register rules and advice. Each particular +register is described in the appropriate sections of either the Video or +Non-Video chapters. + +## Bus Size + +TODO: describe this + +## Minimum Write Size + +TODO: talk about parts where you can't write one byte at a time + +## Volatile or Not? + +TODO: discuss what memory should be used volatile style and what can be used normal style. \ No newline at end of file diff --git a/book/src/02-concepts/02-bios.md b/book/src/02-concepts/02-bios.md index 435d69f..4ab245d 100644 --- a/book/src/02-concepts/02-bios.md +++ b/book/src/02-concepts/02-bios.md @@ -1 +1,241 @@ # BIOS + +* **Address Span:** `0x0` to `0x3FFF` (16k) + +The [BIOS](https://en.wikipedia.org/wiki/BIOS) of the GBA is a small read-only +portion of memory at the very base of the address space. However, it is also +hardware protected against reading, so if you try to read from BIOS memory when +the program counter isn't pointed into the BIOS (eg: any time code _you_ write +is executing) then you get [basically garbage +data](https://problemkaputt.de/gbatek.htm#gbaunpredictablethings) back. + +So we're not going to spend time here talking about what bits to read or write +within BIOS memory like we do with the other sections. Instead we're going to +spend time talking about [inline +assembly](https://doc.rust-lang.org/unstable-book/language-features/asm.html) +([tracking issue](https://github.com/rust-lang/rust/issues/29722)) and then use +it to call the [GBA BIOS +Functions](https://problemkaputt.de/gbatek.htm#biosfunctions). + +Note that BIOS calls have _more overhead than normal function calls_, so don't +go using them all over the place if you don't have to. They're also usually +written more to be compact in terms of code than for raw speed, so you actually +can out speed them in some cases. Between the increased overhead and not being +as speed optimized, you can sometimes do a faster job without calling the BIOS +at all. (TODO: investigate more about what parts of the BIOS we could +potentially offer faster alternatives for.) + +I'd like to take a moment to thank [Marc Brinkmann](https://github.com/mbr) +(with contributions from [Oliver Schneider](https://github.com/oli-obk) and +[Philipp Oppermann](https://github.com/phil-opp)) for writing [this blog +post](http://embed.rs/articles/2016/arm-inline-assembly-rust/). It's at least +ten times the tutorial quality as the `asm` entry in the Unstable Book has. In +fairness to the Unstable Book, the actual spec of how inline ASM works in rust +is "basically what clang does", and that's specified as "basically what GCC +does", and that's basically/shockingly not specified much at all despite GCC +being like 30 years old. + +So let's be slow and pedantic about this process. + +## Inline ASM + +**Fair Warning:** Inline asm is one of the least stable parts of Rust overall, +and if you write bad things you can trigger internal compiler errors and panics +and crashes and make LLVM choke and die without explanation. If you write some +inline asm and then suddenly your program suddenly stops compiling without +explanation, try commenting out that whole inline asm use and see if it's +causing the problem. Double check that you've written every single part of the +asm call absolutely correctly, etc, etc. + +**Bonus Warning:** The general information that follows regarding the asm macro +is consistent from system to system, but specific information about register +names, register quantities, asm instruction argument ordering, and so on is +specific to ARM on the GBA. If you're programming for any other device you'll +need to carefully investigate that before you begin. + +Now then, with those out of the way, the inline asm docs describe an asm call as +looking like this: + +```rust +asm!(assembly template + : output operands + : input operands + : clobbers + : options + ); +``` + +And once you stick a lot of stuff in there it can _absolutely_ be hard to +remember the ordering of the elements. So we'll start with a code block that +has some comments thrown in on each line: + +```rust +asm!(/* ASM */ TODO + :/* OUT */ TODO + :/* INP */ TODO + :/* CLO */ TODO + :/* OPT */ +); +``` + +Now we have to decide what we're gonna write. Obviously we're going to do some +instructions, but those instructions use registers, and how are we gonna talk +about them? We've got two choices. + +1) We can pick each and every register used by specifying exact register names. + In THUMB mode we have 8 registers available, named `r0` through `r7`. If you + switch into 32-bit mode there's additional registers that are also available. + +2) We can specify slots for registers we need and let LLVM decide. In this style + you name your slots `$0`, `$1` and so on. Slot numbers are assigned first to + all specified outputs, then to all specified inputs, in the order that you + list them. + +In the case of the GBA BIOS, each BIOS function has pre-designated input and +output registers, so we will use the first style. If you use inline ASM in other +parts of your code you're free to use the second style. + +### Assembly + +This is just one big string literal. You write out one instruction per line, and +excess whitespace is ignored. You can also do comments within your assembly +using `;` to start a comment that goes until the end of the line. + +Assembly convention doesn't consider it unreasonable to comment potentially as +much as _every single line_ of asm that you write when you're getting used to +things. Or even if you are used to things. This is cryptic stuff, there's a +reason we avoid writing in it as much as possible. + +Remember that our Rust code is in 16-bit mode. You _can_ switch to 32-bit mode +within your asm as long as you switch back by the time the block ends. Otherwise +you'll have a bad time. + +### Outputs + +A comma separated list. Each entry looks like + +* `"constraint" (binding)` + +An output constraint starts with a symbol: + +* `=` for write only +* `+` for reads and writes +* `&` for for "early clobber", meaning that you'll write to this at some point + before all input values have been read. It prevents this register from being + assigned to an input register. + +Followed by _either_ the letter `r` (if you want LLVM to pick the register to +use) or curly braces around a specific register (if you want to pick). + +* The binding can be any single 32-bit or smaller value. +* If your binding has bit pattern requirements ("must be non-zero", etc) you are + responsible for upholding that. +* If your binding type will try to `Drop` later then you are responsible for it + being in a fit state to do that. +* The binding must be either a mutable binding or a binding that was + pre-declared but not yet assigned. + +Anything else is UB. + +### Inputs + +This is a similar comma separated list. + +* `"constraint" (binding)` + +An input constraint doesn't have the symbol prefix, you just pick either `r` or +a named register with curly braces around it. + +* An input binding must be a single 32-bit or smaller value. +* An input binding _should_ be a type that is `Copy` but this is not an absolute + requirement. Having the input be read is semantically similar to using + `core::ptr::read(&binding)` and forgetting the value when you're done. + +### Clobbers + +Sometimes your asm will touch registers other than the ones declared for input +and output. + +Clobbers are declared as a comma separated list of string literals naming +specific registers. You don't use curly braces with clobbers. + +LLVM _needs_ to know this information. It can move things around to keep your +data safe, but only if you tell it what's about to happen. + +Failure to define all of your clobbers can cause UB. + +### Options + +There's only one option we'd care to specify. That option is "volatile". + +Just like with a function call, LLVM will skip a block of asm if it doesn't see +that any outputs from the asm were used later on. Nearly every single BIOS call +(other than the math operations) will need to be marked as "volatile". + +### BIOS ASM + +* Inputs are always `r0`, `r1`, `r2`, and/or `r3`, depending on function. +* Outputs are always zero or more of `r0`, `r1`, and `r3`. +* Any of the output registers that aren't actually used should be marked as + clobbered. +* All other registers are unaffected. + +All of the GBA BIOS calls are performed using the +[swi](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0068b/BABFCEEG.html) +instruction, combined with a value depending on what BIOS function you're trying +to invoke. If you're in 16-bit code you use the value directly, and if you're in +32-bit mode you shift the value up by 16 bits first. + +### Example BIOS Function: Division + +For our example we'll use the division function, because GBATEK gives very clear +instructions on how each register is used with that one: + +```txt +Signed Division, r0/r1. + r0 signed 32bit Number + r1 signed 32bit Denom +Return: + r0 Number DIV Denom ;signed + r1 Number MOD Denom ;signed + r3 ABS (Number DIV Denom) ;unsigned +For example, incoming -1234, 10 should return -123, -4, +123. +The function usually gets caught in an endless loop upon division by zero. +``` + +The math folks tell me that the `r1` value should be properly called the +"remainder" not the "modulus". We'll go with that for our function, doesn't hurt +to use the correct names. Our Rust function has an assert against dividing by +`0`, then we name some bindings _without_ giving them a value, we make the asm +call, and then return what we got. + +```rust +pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) { + assert!(denominator != 0); + let div_out: i32; + let rem_out: i32; + unsafe { + asm!(/* ASM */ "swi 0x06" + :/* OUT */ "={r0}"(div_out), "={r1}"(rem_out) + :/* INP */ "{r0}"(numerator), "{r1}"(denominator) + :/* CLO */ "r3" + :/* OPT */ + ); + } + (div_out, rem_out) +} +``` + +I _hope_ this all makes sense by now. + +## Specific BIOS Functions + +For a full list of all the specific BIOS functions and their use you should +check the `gba::bios` module within the `gba` crate. There's just so many of +them that enumerating them all here wouldn't serve much purpose. + +Which is not to say that we'll never cover any BIOS functions in this book! +Instead, we'll simply mention them when whenever they're relevent to the task at +hand (such as controlling sound or waiting for vblank). + +//TODO: list/name all BIOS functions as well as what they relate to elsewhere. diff --git a/book/src/02-concepts/03-wram.md b/book/src/02-concepts/03-wram.md index b4185fc..26d71e8 100644 --- a/book/src/02-concepts/03-wram.md +++ b/book/src/02-concepts/03-wram.md @@ -1 +1,28 @@ # Work RAM + +## External Work RAM (EWRAM) + +* **Address Span:** `0x2000000` to `0x203FFFF` (256k) + +This is a big pile of space, the use of which is up to each game. However, the +external work ram has only a 16-bit bus (if you read/write a 32-bit value it +silently breaks it up into two 16-bit operations) and also 2 wait cycles (extra +CPU cycles that you have to expend _per 16-bit bus use_). + +It's most helpful to think of EWRAM as slower, distant memory, similar to the +"heap" in a normal application. You can take the time to go store something +within EWRAM, or to load it out of EWRAM, but if you've got several operations +to do in a row and you're worried about time you should pull that value into +local memory, work on your local copy, and then push it back out to EWRAM. + +## Internal Work RAM (IWRAM) + +* **Address Span:** `0x3000000` to `0x3007FFF` (32k) + +This is a smaller pile of space, but it has a 32-bit bus and no wait. + +By default, `0x3007F00` to `0x3007FFF` is reserved for interrupt and BIOS use. +The rest of it is mostly up to you. The user's stack space starts at `0x3007F00` +and proceeds _down_ from there. For best results you should probably start at +`0x3000000` and then go upwards. Under normal use it's unlikely that the two +memory regions will crash into each other. diff --git a/book/src/02-concepts/04-io-registers.md b/book/src/02-concepts/04-io-registers.md index 3a3e53f..99a18b9 100644 --- a/book/src/02-concepts/04-io-registers.md +++ b/book/src/02-concepts/04-io-registers.md @@ -1 +1,3 @@ # IO Registers + +* **Address Span:** `0x400_0000` to `0x400_03FE` diff --git a/book/src/02-concepts/05-palram.md b/book/src/02-concepts/05-palram.md index 5353b1c..95cbdf1 100644 --- a/book/src/02-concepts/05-palram.md +++ b/book/src/02-concepts/05-palram.md @@ -1 +1,50 @@ -# Palette RAM +# Palette RAM (PALRAM) + +* **Address Span:** `0x500_0000` to `0x500_03FF` (1k) + +Palette RAM has a 16-bit bus, which isn't really a problem because it +conceptually just holds `u16` values. There's no automatic wait state, but if +you try to access the same location that the display controller is accessing you +get bumped by 1 cycle. Since the display controller can use the palette ram any +number of times per scanline it's basically impossible to predict if you'll have +to do a wait or not during VDraw. During VBlank you won't have any wait of +course. + +PALRAM is among the memory where there's weirdness if you try to write just one +byte: if you try to write just 1 byte, it writes that byte into _both_ parts of +the larger 16-bit location. This doesn't really affect us much with PALRAM, +because palette values are all supposed to be `u16` anyway. + +The palette memory actually contains not one, but _two_ sets of palettes. First +there's 256 entries for the background palette data (starting at `0x5000000`), +and then there's 256 entries for object palette data (starting at `0x5000200`). + +The GBA also has two modes for palette access: 8-bits-per-pixel (8bpp) and +4-bits-per-pixel (4bpp). + +* In 8bpp mode an 8-bit palette index value within a background or sprite + simply indexes directly into the 256 slots for that type of thing. +* In 4bpp mode a 4-bit palette index value within a background or sprite + specifies an index within a particular "palbank" (16 palette entries each), + and then a _separate_ setting outside of the graphical data determines which + palbank is to be used for that background or object (the screen entry data for + backgrounds, and the object attributes for objects). + +### Transparency + +When a pixel within a background or object specifies index 0 as its palette +entry it is treated as a transparent pixel. This means that in 8bpp mode there's +only 255 actual color options (0 being transparent), and in 4bpp mode there's +only 15 actual color options available within each palbank (the 0th entry of +_each_ palbank is transparent). + +Individual backgrounds, and individual objects, each determine if they're 4bpp +or 8bpp separately, so a given overall palette slot might map to a used color in +8bpp and an unused/transparent color in 4bpp. If you're a palette wizard. + +Palette slot 0 of the overall background palette is used to determine the +"backdrop" color. That's the color you see if no background or object ends up +being rendered within a given pixel. + +Since display mode 3 and display mode 5 don't use the palette, they cannot +benefit from transparency. diff --git a/book/src/02-concepts/06-vram.md b/book/src/02-concepts/06-vram.md index e6915fd..24a96c4 100644 --- a/book/src/02-concepts/06-vram.md +++ b/book/src/02-concepts/06-vram.md @@ -1 +1,24 @@ -# Video RAM +# Video RAM (VRAM) + +* **Address Span:** `0x600_0000` to `0x601_7FFF` (96k) + +We've used this before! VRAM has a 16-bit bus and no wait. However, the same as +with PALRAM, the "you might have to wait if the display controller is looking at +it" rule applies here. + +Unfortunately there's not much more exact detail that can be given about VRAM. +The use of the memory depends on the video mode that you're using. + +One general detail of note is that you can't write individual bytes to any part +of VRAM. Depending on mode and location, you'll either get your bytes doubled +into both the upper and lower parts of the 16-bit location targeted, or you +won't even affect the memory. This usually isn't a big deal, except in two +situations: + +* In Mode 4, if you want to change just 1 pixel, you'll have to be very careful + to read the old `u16`, overwrite just the byte you wanted to change, and then + write that back. +* In any display mode, avoid using `memcopy` to place things into VRAM. + It's written to be byte oriented, and only does 32-bit transfers under select + conditions. The rest of the time it'll copy one byte at a time and you'll get + either garbage or nothing at all. diff --git a/book/src/02-concepts/07-oam.md b/book/src/02-concepts/07-oam.md index 78d8d02..eeee6f2 100644 --- a/book/src/02-concepts/07-oam.md +++ b/book/src/02-concepts/07-oam.md @@ -1 +1,62 @@ -# Object Attribute Memory +# Object Attribute Memory (OAM) + +* **Address Span:** `0x700_0000` to `0x700_03FF` (1k) + +The Object Attribute Memory has a 32-bit bus and no default wait, but suffers +from the "you might have to wait if the display controller is looking at it" +rule. You cannot write individual bytes to OAM at all, but that's not really a +problem because all the fields of the data types within OAM are either `i16` or +`u16` anyway. + +Object attribute memory is the wildest yet: it conceptually contains two types +of things, but they're _interlaced_ with each other all the way through. + +Now, [GBATEK](http://problemkaputt.de/gbatek.htm#lcdobjoamattributes) and +[CowByte](https://www.cs.rit.edu/~tjh8300/CowBite/CowBiteSpec.htm#OAM%20(sprites)) +doesn't quite give names to the two data types here. +[TONC](https://www.coranac.com/tonc/text/regobj.htm#sec-oam) calls them +`OBJ_ATTR` and `OBJ_AFFINE`, but we'll be giving them names fitting with the +Rust naming convention. Just know that if you try to talk about it with others +they might not be using the same names. In Rust terms their layout would look +like this: + +```rust +#[repr(C)] +pub struct ObjectAttributes { + attr0: u16, + attr1: u16, + attr2: u16, + filler: i16, +} + +#[repr(C)] +pub struct AffineMatrix { + filler0: [u16; 3], + pa: i16, + filler1: [u16; 3], + pb: i16, + filler2: [u16; 3], + pc: i16, + filler3: [u16; 3], + pd: i16, +} +``` + +(Note: the `#[repr(C)]` part just means that Rust must lay out the data exactly +in the order we specify, which otherwise it is not required to do). + +So, we've got 1024 bytes in OAM and each `ObjectAttributes` value is 8 bytes, so +naturally we can support up to 128 objects. + +_At the same time_, we've got 1024 bytes in OAM and each `AffineMatrix` is 32 +bytes, so we can have 32 of them. + +But, as I said, these things are all _interlaced_ with each other. See how +there's "filler" fields in each struct? If we imagine the OAM as being just an +array of one type or the other, indexes 0/1/2/3 of the `ObjectAttributes` array +would line up with index 0 of the `AffineMatrix` array. It's kinda weird, but +that's just how it works. When we setup functions to read and write these values +we'll have to be careful with how we do it. We probably _won't_ want to use +those representations above, at least not with the `AffineMatrix` type, because +they're quite wasteful if you want to store just object attributes or just +affine matrices. diff --git a/book/src/02-concepts/08-rom.md b/book/src/02-concepts/08-rom.md index 753857b..584faac 100644 --- a/book/src/02-concepts/08-rom.md +++ b/book/src/02-concepts/08-rom.md @@ -1 +1,14 @@ -# Game Pak ROM / Flash ROM +# Game Pak ROM / Flash ROM (ROM) + +* **Address Span (Wait State 0):** `0x800_0000` to `0x9FF_FFFF` +* **Address Span (Wait State 1):** `0xA00_0000` to `0xBFF_FFFF` +* **Address Span (Wait State 2):** `0xC00_0000` to `0xDFF_FFFF` + +The game's ROM data is a single set of data that's up to 32 megabytes in size. +However, that data is mirrored to three different locations in the address +space. Depending on which part of the address space you use, it can affect the +memory timings involved. + +TODO: describe `WAITCNT` here, we won't get a better chance at it. + +TODO: discuss THUMB vs ARM code and why THUMB is so much faster (because ROM is a 16-bit bus) diff --git a/book/src/02-concepts/09-sram.md b/book/src/02-concepts/09-sram.md index aa68e68..65ec4d2 100644 --- a/book/src/02-concepts/09-sram.md +++ b/book/src/02-concepts/09-sram.md @@ -1 +1,16 @@ -# Save RAM +# Save RAM (SRAM) + +* **Address Span:** `0xE00_0000` to `0xE00FFFF` (64k) + +The actual amount of SRAM available depends on your game pak, and the 64k figure +is simply the maximum possible. A particular game pak might have less, and an +emulator will likely let you have all 64k if you want. + +As with other portions of the address space, SRAM has some number of wait cycles +per use. As with ROM, you can change the wait cycle settings via the `WAITCNT` +register if the defaults don't work well for your game pak. See the ROM section +for full details of how the `WAITCNT` register works. + +The game pak SRAM also has only an 8-bit bus, so have fun with that. + +The GBA Direct Memory Access (DMA) unit cannot access SRAM. diff --git a/book/src/03-video/00-index.md b/book/src/03-video/00-index.md index f076b5d..288c3ca 100644 --- a/book/src/03-video/00-index.md +++ b/book/src/03-video/00-index.md @@ -1 +1,9 @@ # Video + +GBA Video starts with an IO register called the "Display Control Register", and +then spirals out from there. You generally have to use Palette RAM (PALRAM), +Video RAM (VRAM), Object Attribute Memory (OAM), as well as any number of other +IO registers. + +They all have to work together just right, and there's a lot going on when you +first try doing it, so try to take it very slowly as you're learning each step. diff --git a/book/src/04-non-video/00-index.md b/book/src/04-non-video/00-index.md index d7d1113..aff4a81 100644 --- a/book/src/04-non-video/00-index.md +++ b/book/src/04-non-video/00-index.md @@ -1 +1,21 @@ # Non-Video + +Besides video effects the GBA still has an okay amount of stuff going on. + +Obviously you'll want to know how to read the user's button inputs. That can +almost go without saying, except that I said it. + +Each other part can be handled in about any order you like. + +Using interrupts is perhaps one of the hardest things for us as Rust programmers +due to quirks in our compilation process. Our code all gets compiled to 16-bit +THUMB instructions, and we don't have a way to mark a function to be compiled +using 32-bit ASM instructions instead. However, an interrupt handler _must_ be +written in 32-bit ASM instructions for it to work. That means that we have to +write our interrupt handler in 32-bit ASM by hand. We'll do it, but I don't +think we'll be too happy about it. + +The Link Cable related stuff is also probably a little harder to test than +anything else. Just because link cable emulation isn't always the best, and or +you need two GBAs with two flash carts and the cable for hardware testing. +Still, we'll try to go over it eventually. diff --git a/book/src/04-non-video/01-buttons.md b/book/src/04-non-video/01-buttons.md index 8694b48..8eb4e80 100644 --- a/book/src/04-non-video/01-buttons.md +++ b/book/src/04-non-video/01-buttons.md @@ -1 +1,5 @@ # Buttons + +It's all well and good to just show a picture, even to show an animation, but if +we want a game we have to let the user interact with something. + diff --git a/book/src/04-non-video/06-link_cable.md b/book/src/04-non-video/06-link_cable.md new file mode 100644 index 0000000..f8e1989 --- /dev/null +++ b/book/src/04-non-video/06-link_cable.md @@ -0,0 +1 @@ +# Link Cable diff --git a/book/src/04-non-video/06-network.md b/book/src/04-non-video/06-network.md deleted file mode 100644 index 05db335..0000000 --- a/book/src/04-non-video/06-network.md +++ /dev/null @@ -1 +0,0 @@ -# Network diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index 6de4af7..704ba7a 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -12,6 +12,7 @@ * [Fixed Only](01-quirks/02-fixed_only.md) * [Volatile Destination](01-quirks/03-volatile_destination.md) * [Newtype](01-quirks/04-newtype.md) + * [Const Asserts](01-quirks/05-const_asserts.md) * [Concepts](02-concepts/00-index.md) * [CPU](02-concepts/01-cpu.md) * [BIOS](02-concepts/02-bios.md) @@ -31,7 +32,7 @@ * [Direct Memory Access](04-non-video/03-dma.md) * [Sound](04-non-video/04-sound.md) * [Interrupts](04-non-video/05-interrupts.md) - * [Network](04-non-video/06-network.md) + * [Link Cable](04-non-video/06-link_cable.md) * [Game Pak](04-non-video/07-game_pak.md) * [Examples](05-examples/00-index.md) * [hello_magic](05-examples/01-hello_magic.md) diff --git a/examples/bg_demo.rs b/examples/bg_demo.rs index c18a00c..3fa219f 100644 --- a/examples/bg_demo.rs +++ b/examples/bg_demo.rs @@ -1,5 +1,5 @@ -#![feature(start)] #![no_std] +#![feature(start)] #[panic_handler] fn panic(_info: &core::panic::PanicInfo) -> ! { diff --git a/examples/hello_world.rs b/examples/hello_world.rs index 8cd4270..549569a 100644 --- a/examples/hello_world.rs +++ b/examples/hello_world.rs @@ -1,22 +1,37 @@ -#![feature(start)] #![no_std] +#![feature(start)] +#![feature(underscore_const_names)] -#[panic_handler] -fn panic(_info: &core::panic::PanicInfo) -> ! { - loop {} +#[macro_export] +macro_rules! newtype { + ($(#[$attr:meta])* $new_name:ident, $old_name:ident) => { + $(#[$attr])* + #[repr(transparent)] + pub struct $new_name($old_name); + }; } -#[start] -fn main(_argc: isize, _argv: *const *const u8) -> isize { - unsafe { - DISPCNT.write(MODE3 | BG2); - mode3_pixel(120, 80, rgb16(31, 0, 0)); - mode3_pixel(136, 80, rgb16(0, 31, 0)); - mode3_pixel(120, 96, rgb16(0, 0, 31)); - loop {} - } +#[macro_export] +macro_rules! const_assert { + ($condition:expr) => { + #[deny(const_err)] + #[allow(dead_code)] + const _: usize = 0 - !$condition as usize; + }; } +/// Constructs an RGB value with a `const_assert!` that the input is in range. +#[macro_export] +macro_rules! const_rgb { + ($r:expr, $g:expr, $b:expr) => {{ + const_assert!($r <= 31); + const_assert!($g <= 31); + const_assert!($b <= 31); + Color::new($r, $g, $b) + }}; +} + +// TODO: kill this #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] #[repr(transparent)] pub struct VolatilePtr(pub *mut T); @@ -32,17 +47,50 @@ impl VolatilePtr { } } -pub const DISPCNT: VolatilePtr = VolatilePtr(0x04000000 as *mut u16); -pub const MODE3: u16 = 3; -pub const BG2: u16 = 0b100_0000_0000; - -pub const VRAM: usize = 0x06000000; -pub const SCREEN_WIDTH: isize = 240; - -pub const fn rgb16(red: u16, green: u16, blue: u16) -> u16 { - blue << 10 | green << 5 | red +newtype! { + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + Color, u16 } -pub unsafe fn mode3_pixel(col: isize, row: isize, color: u16) { - VolatilePtr(VRAM as *mut u16).offset(col + row * SCREEN_WIDTH).write(color); +impl Color { + /// Combines the Red, Blue, and Green provided into a single color value. + pub const fn new(red: u16, green: u16, blue: u16) -> Color { + Color(blue << 10 | green << 5 | red) + } +} + +newtype! { + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + DisplayControlSetting, u16 +} + +pub const DISPLAY_CONTROL: VolatilePtr = VolatilePtr(0x0400_0000 as *mut DisplayControlSetting); +pub const JUST_MODE3: DisplayControlSetting = DisplayControlSetting(3); +pub const JUST_BG2: DisplayControlSetting = DisplayControlSetting(0b100_0000_0000); +pub const JUST_MODE3_AND_BG2: DisplayControlSetting = DisplayControlSetting(JUST_MODE3.0 | JUST_BG2.0); + +pub struct Mode3; +impl Mode3 { + const SCREEN_WIDTH: isize = 240; + const PIXELS: VolatilePtr = VolatilePtr(0x600_0000 as *mut Color); + + pub unsafe fn draw_pixel_unchecked(col: isize, row: isize, color: Color) { + Self::PIXELS.offset(col + row * Self::SCREEN_WIDTH).write(color); + } +} + +#[panic_handler] +fn panic(_info: &core::panic::PanicInfo) -> ! { + loop {} +} + +#[start] +fn main(_argc: isize, _argv: *const *const u8) -> isize { + unsafe { + DISPLAY_CONTROL.write(JUST_MODE3_AND_BG2); + Mode3::draw_pixel_unchecked(120, 80, const_rgb!(31, 0, 0)); + Mode3::draw_pixel_unchecked(136, 80, const_rgb!(0, 31, 0)); + Mode3::draw_pixel_unchecked(120, 96, const_rgb!(0, 0, 31)); + loop {} + } } diff --git a/examples/light_cycle.rs b/examples/light_cycle.rs index c200663..c593c19 100644 --- a/examples/light_cycle.rs +++ b/examples/light_cycle.rs @@ -1,5 +1,5 @@ -#![feature(start)] #![no_std] +#![feature(start)] #[panic_handler] fn panic(_info: &core::panic::PanicInfo) -> ! { diff --git a/src/bios.rs b/src/bios.rs new file mode 100644 index 0000000..f24b6f4 --- /dev/null +++ b/src/bios.rs @@ -0,0 +1,517 @@ +//! This module contains wrappers for all GBA BIOS function calls. +//! +//! A GBA BIOS call has significantly more overhead than a normal function call, +//! so think carefully before using them too much. +//! +//! The actual content of each function here is generally a single inline asm +//! instruction to invoke the correct BIOS function (`swi x`, with `x` being +//! whatever value is necessary for that function). Some functions also perform +//! necessary checks to save you from yourself, such as not dividing by zero. + +//TODO: ALL functions in this module should have `if cfg!(test)` blocks. The +//functions that never return must panic, the functions that return nothing +//should just do so, and the math functions should just return the correct math +//I guess. + +/// (`swi 0x00`) SoftReset the device. +/// +/// This function does not ever return. +/// +/// Instead, it clears the top `0x200` bytes of IWRAM (containing stacks, and +/// BIOS IRQ vector/flags), re-initializes the system, supervisor, and irq stack +/// pointers (new values listed below), sets `r0` through `r12`, `LR_svc`, +/// `SPSR_svc`, `LR_irq`, and `SPSR_irq` to zero, and enters system mode. The +/// return address is loaded into `r14` and then the function jumps there with +/// `bx r14`. +/// +/// * sp_svc: `0x300_7FE0` +/// * sp_irq: `0x300_7FA0` +/// * sp_sys: `0x300_7F00` +/// * Zero-filled Area: `0x300_7E00` to `0x300_7FFF` +/// * Return Address: Depends on the 8-bit flag value at `0x300_7FFA`. In either +/// case execution proceeds in ARM mode. +/// * zero flag: `0x800_0000` (ROM), which for our builds means that the +/// `crt0` program to execute (just like with a fresh boot), and then +/// control passes into `main` and so on. +/// * non-zero flag: `0x200_0000` (RAM), This is where a multiboot image would +/// go if you were doing a multiboot thing. However, this project doesn't +/// support multiboot at the moment. You'd need an entirely different build +/// pipeline because there's differences in header format and things like +/// that. Perhaps someday, but probably not even then. Submit the PR for it +/// if you like! +/// +/// ## Safety +/// +/// This functions isn't ever unsafe to the current iteration of the program. +/// However, because not all memory is fully cleared you theoretically could +/// threaten the _next_ iteration of the program that runs. I'm _fairly_ +/// convinced that you can't actually use this to force purely safe code to +/// perform UB, but such a scenario might exist. +#[inline(always)] +pub unsafe fn soft_reset() -> ! { + asm!(/* ASM */ "swi 0x00" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + core::hint::unreachable_unchecked() +} + +/// (`swi 0x01`) RegisterRamReset. +/// +/// Clears the portions of memory given by the `flags` value, sets the Display +/// Control Register to `0x80` (forced blank and nothing else), then returns. +/// +/// * Flag bits: +/// 0) Clears the 256k of EWRAM (don't use if this is where your function call +/// will return to!) +/// 1) Clears the 32k of IWRAM _excluding_ the last `0x200` bytes (see also: +/// the `soft_reset` function) +/// 2) Clears all Palette data +/// 3) Clears all VRAM +/// 4) Clears all OAM (reminder: a zeroed object isn't disabled!) +/// 5) Reset SIO registers (resets them to general purpose mode) +/// 6) Reset Sound registers +/// 7) Reset all IO registers _other than_ SIO and Sound +/// +/// **Bug:** The LSB of `SIODATA32` is always zeroed, even if bit 5 was not +/// enabled. This is sadly a bug in the design of the GBA itself. +/// +/// ## Safety +/// +/// It is generally a safe operation to suddenly clear any part of the GBA's +/// memory, except in the case that you were executing out of EWRAM and clear +/// that. If you do then you return to nothing and have a bad time. +#[inline(always)] +pub unsafe fn register_ram_reset(flags: u8) { + asm!(/* ASM */ "swi 0x01" + :/* OUT */ // none + :/* INP */ "{r0}"(flags) + :/* CLO */ // none + :/* OPT */ "volatile" + ); +} +//TODO(lokathor): newtype this flag business. + +/// (`swi 0x02`) Halts the CPU until an interrupt occurs. +/// +/// Components _other than_ the CPU continue to function. Halt mode ends when +/// any enabled interrupt triggers. +#[inline(always)] +pub fn halt() { + unsafe { + asm!(/* ASM */ "swi 0x02" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} + +/// (`swi 0x03`) Stops the CPU as well as most other components. +/// +/// Stop mode must be stopped by an interrupt, but can _only_ be stopped by a +/// Keypad, Game Pak, or General-Purpose-SIO interrupt. +/// +/// Before going into stop mode you should manually disable video and sound (or +/// they will continue to consume power), and you should also disable any other +/// optional externals such as rumble and infra-red. +#[inline(always)] +pub fn stop() { + unsafe { + asm!(/* ASM */ "swi 0x03" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} + +/// (`swi 0x04`) "IntrWait", similar to halt but with more options. +/// +/// * The first argument controls if you want to ignore all current flags and +/// wait until a new flag is set. +/// * The second argument is what flags you're waiting on (same format as the +/// IE/IF registers). +/// +/// If you're trying to handle more than one interrupt at once this has less +/// overhead than calling `halt` over and over. +/// +/// When using this routing your interrupt handler MUST update the BIOS +/// Interrupt Flags `0x300_7FF8` in addition to the usual interrupt +/// acknowledgement. +#[inline(always)] +pub fn interrupt_wait(ignore_current_flags: bool, target_flags: u16) { + unsafe { + asm!(/* ASM */ "swi 0x04" + :/* OUT */ // none + :/* INP */ "{r0}"(ignore_current_flags), "{r1}"(target_flags) + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} +//TODO(lokathor): newtype this flag business. + +/// (`swi 0x05`) "VBlankIntrWait", VBlank Interrupt Wait. +/// +/// This is as per `interrupt_wait(true, 1)` (aka "wait for a new vblank"). You +/// must follow the same guidelines that `interrupt_wait` outlines. +#[inline(always)] +pub fn vblank_interrupt_wait() { + unsafe { + asm!(/* ASM */ "swi 0x04" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ "r0", "r1" // both set to 1 by the routine + :/* OPT */ "volatile" + ); + } +} + +/// (`swi 0x06`) Software Division and Remainder. +/// +/// ## Panics +/// +/// If the denominator is 0. +#[inline(always)] +pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) { + assert!(denominator != 0); + if cfg!(test) { + (numerator / denominator, numerator % denominator) + } else { + let div_out: i32; + let rem_out: i32; + unsafe { + asm!(/* ASM */ "swi 0x06" + :/* OUT */ "={r0}"(div_out), "={r1}"(rem_out) + :/* INP */ "{r0}"(numerator), "{r1}"(denominator) + :/* CLO */ "r3" + :/* OPT */ + ); + } + (div_out, rem_out) + } +} + +/// As `div_rem`, keeping only the `div` output. +#[inline(always)] +pub fn div(numerator: i32, denominator: i32) -> i32 { + div_rem(numerator, denominator).0 +} + +/// As `div_rem`, keeping only the `rem` output. +#[inline(always)] +pub fn rem(numerator: i32, denominator: i32) -> i32 { + div_rem(numerator, denominator).1 +} + +// (`swi 0x07`): We deliberately don't implement this one. It's the same as DIV +// but with reversed arguments, so it just runs 3 cycles slower as it does the +// swap. + +/// (`swi 0x08`) Integer square root. +/// +/// If you want more fractional precision, you can shift your input to the left +/// by `2n` bits to get `n` more bits of fractional precision in your output. +#[inline(always)] +pub fn sqrt(val: u32) -> u16 { + let out: u16; + unsafe { + asm!(/* ASM */ "swi 0x08" + :/* OUT */ "={r0}"(out) + :/* INP */ "{r0}"(val) + :/* CLO */ "r1", "r3" + :/* OPT */ + ); + } + out +} + +/// (`swi 0x09`) Gives the arctangent of `theta`. +/// +/// The input format is 1 bit for sign, 1 bit for integral part, 14 bits for +/// fractional part. +/// +/// Accuracy suffers if `theta` is less than `-pi/4` or greater than `pi/4`. +#[inline(always)] +pub fn atan(theta: i16) -> i16 { + let out: i16; + unsafe { + asm!(/* ASM */ "swi 0x09" + :/* OUT */ "={r0}"(out) + :/* INP */ "{r0}"(theta) + :/* CLO */ "r1", "r3" + :/* OPT */ + ); + } + out +} + +/// (`swi 0x0A`) Gives the atan2 of `y` over `x`. +/// +/// The output `theta` value maps into the range `[0, 2pi)`, or `0 .. 2pi` if +/// you prefer Rust's range notation. +/// +/// `y` and `x` use the same format as with `atan`: 1 bit for sign, 1 bit for +/// integral, 14 bits for fractional. +#[inline(always)] +pub fn atan2(y: i16, x: i16) -> u16 { + let out: u16; + unsafe { + asm!(/* ASM */ "swi 0x0A" + :/* OUT */ "={r0}"(out) + :/* INP */ "{r0}"(x), "{r1}"(y) + :/* CLO */ "r3" + :/* OPT */ + ); + } + out +} + +/// (`swi 0x0B`) "CpuSet", `u16` memory copy. +/// +/// * `count` is the number of `u16` values to copy (20 bits or less) +/// * `fixed_source` argument, if true, turns this copying routine into a +/// filling routine. +/// +/// ## Safety +/// +/// * Both pointers must be aligned +#[inline(always)] +pub unsafe fn cpu_set16(src: *const u16, dest: *mut u16, count: u32, fixed_source: bool) { + let control = count + ((fixed_source as u32) << 24); + asm!(/* ASM */ "swi 0x0B" + :/* OUT */ // none + :/* INP */ "{r0}"(src), "{r1}"(dest), "{r2}"(control) + :/* CLO */ // none + :/* OPT */ "volatile" + ); +} + +/// (`swi 0x0B`) "CpuSet", `u32` memory copy/fill. +/// +/// * `count` is the number of `u32` values to copy (20 bits or less) +/// * `fixed_source` argument, if true, turns this copying routine into a +/// filling routine. +/// +/// ## Safety +/// +/// * Both pointers must be aligned +#[inline(always)] +pub unsafe fn cpu_set32(src: *const u32, dest: *mut u32, count: u32, fixed_source: bool) { + let control = count + ((fixed_source as u32) << 24) + (1 << 26); + asm!(/* ASM */ "swi 0x0B" + :/* OUT */ // none + :/* INP */ "{r0}"(src), "{r1}"(dest), "{r2}"(control) + :/* CLO */ // none + :/* OPT */ "volatile" + ); +} + +/// (`swi 0x0C`) "CpuFastSet", copies memory in 32 byte chunks. +/// +/// * The `count` value is the number of `u32` values to transfer (20 bits or +/// less), and it's rounded up to the nearest multiple of 8 words. +/// * The `fixed_source` argument, if true, turns this copying routine into a +/// filling routine. +/// +/// ## Safety +/// +/// * Both pointers must be aligned +#[inline(always)] +pub unsafe fn cpu_fast_set(src: *const u32, dest: *mut u32, count: u32, fixed_source: bool) { + let control = count + ((fixed_source as u32) << 24); + asm!(/* ASM */ "swi 0x0C" + :/* OUT */ // none + :/* INP */ "{r0}"(src), "{r1}"(dest), "{r2}"(control) + :/* CLO */ // none + :/* OPT */ "volatile" + ); +} + +/// (`swi 0x0C`) "GetBiosChecksum" (Undocumented) +/// +/// Though we usually don't cover undocumented functionality, this one can make +/// it into the crate. +/// +/// The function computes the checksum of the BIOS data. You should get either +/// `0xBAAE_187F` (GBA / GBA SP) or `0xBAAE_1880` (DS in GBA mode). If you get +/// some other value I guess you're probably running on an emulator that just +/// broke the fourth wall. +pub fn get_bios_checksum() -> u32 { + let out: u32; + unsafe { + asm!(/* ASM */ "swi 0x0D" + :/* OUT */ "={r0}"(out) + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ // none + ); + } + out +} + +// TODO: these things will require that we build special structs + +//BgAffineSet +//ObjAffineSet +//BitUnPack +//LZ77UnCompReadNormalWrite8bit +//LZ77UnCompReadNormalWrite16bit +//HuffUnCompReadNormal +//RLUnCompReadNormalWrite8bit +//Diff8bitUnFilterWrite8bit +//Diff8bitUnFilterWrite16bit +//Diff16bitUnFilter + +/// (`swi 0x19`) "SoundBias", adjusts the volume level to a new level. +/// +/// This increases or decreases the current level of the `SOUNDBIAS` register +/// (with short delays) until at the new target level. The upper bits of the +/// register are unaffected. +/// +/// The final sound level setting will be `level` * `0x200`. +pub fn sound_bias(level: u32) { + unsafe { + asm!(/* ASM */ "swi 0x19" + :/* OUT */ // none + :/* INP */ "{r0}"(level) + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} + +//SoundDriverInit + +/// (`swi 0x1B`) "SoundDriverMode", sets the sound driver operation mode. +/// +/// The `mode` input uses the following flags and bits: +/// +/// * Bits 0-6: Reverb value +/// * Bit 7: Reverb Enable +/// * Bits 8-11: Simultaneously-produced channel count (default=8) +/// * Bits 12-15: Master Volume (1-15, default=15) +/// * Bits 16-19: Playback Frequency Index (see below, default=4) +/// * Bits 20-23: "Final number of D/A converter bits (8-11 = 9-6bits, def. 9=8bits)" TODO: what the hek? +/// * Bits 24 and up: Not used +/// +/// The frequency index selects a frequency from the following array: +/// * 0: 5734 +/// * 1: 7884 +/// * 2: 10512 +/// * 3: 13379 +/// * 4: 15768 +/// * 5: 18157 +/// * 6: 21024 +/// * 7: 26758 +/// * 8: 31536 +/// * 9: 36314 +/// * 10: 40137 +/// * 11: 42048 +pub fn sound_driver_mode(mode: u32) { + unsafe { + asm!(/* ASM */ "swi 0x1B" + :/* OUT */ // none + :/* INP */ "{r0}"(mode) + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} +//TODO(lokathor): newtype this mode business. + +/// (`swi 0x1C`) "SoundDriverMain", main of the sound driver +/// +/// You should call `SoundDriverVSync` immediately after the vblank interrupt +/// fires. +/// +/// "After that, this routine is called after BG and OBJ processing is +/// executed." --what? +#[inline(always)] +pub fn sound_driver_main() { + unsafe { + asm!(/* ASM */ "swi 0x1C" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} + +/// (`swi 0x1D`) "SoundDriverVSync", resets the sound DMA. +/// +/// The timing is critical, so you should call this _immediately_ after the +/// vblank interrupt (every 1/60th of a second). +#[inline(always)] +pub fn sound_driver_vsync() { + unsafe { + asm!(/* ASM */ "swi 0x1D" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} + +/// (`swi 0x1E`) "SoundChannelClear", clears the direct sound channels and stops +/// the sound. +/// +/// "This function may not operate properly when the library which expands the +/// sound driver feature is combined afterwards. In this case, do not use it." +/// --what? +#[inline(always)] +pub fn sound_channel_clear() { + unsafe { + asm!(/* ASM */ "swi 0x1E" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} + +//MidiKey2Freq +//MultiBoot + +/// (`swi 0x28`) "SoundDriverVSyncOff", disables sound +/// +/// If you can't use vblank interrupts to ensure that `sound_driver_vsync` is +/// called every 1/60th of a second for any reason you must use this function to +/// stop sound DMA. Otherwise the DMA will overrun its buffer and cause random +/// noise. +#[inline(always)] +pub fn sound_driver_vsync_off() { + unsafe { + asm!(/* ASM */ "swi 0x28" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} + +/// (`swi 0x29`) "SoundDriverVSyncOn", enables sound that was stopped by +/// `sound_driver_vsync_off`. +/// +/// Restarts sound DMA system. After restarting the sound you must have a vblank +/// interrupt followed by a `sound_driver_vsync` within 2/60th of a second. +#[inline(always)] +pub fn sound_driver_vsync_on() { + unsafe { + asm!(/* ASM */ "swi 0x29" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} diff --git a/src/builtins.rs b/src/builtins.rs new file mode 100644 index 0000000..db3615e --- /dev/null +++ b/src/builtins.rs @@ -0,0 +1,77 @@ +#![allow(missing_docs)] + +//! The module to provide "builtin" functions that LLVM expects. +//! +//! You shouldn't need to call anything in here yourself, it just has to be in +//! the translation unit and LLVM will find it. + +#[no_mangle] +#[cfg(any(target_pointer_width = "16", target_pointer_width = "32", target_pointer_width = "64"))] +pub extern "C" fn __clzsi2(mut x: usize) -> usize { + // TODO: const this? Requires const if + let mut y: usize; + let mut n: usize = { + #[cfg(target_pointer_width = "64")] + { + 64 + } + #[cfg(target_pointer_width = "32")] + { + 32 + } + #[cfg(target_pointer_width = "16")] + { + 16 + } + }; + #[cfg(target_pointer_width = "64")] + { + y = x >> 32; + if y != 0 { + n -= 32; + x = y; + } + } + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] + { + y = x >> 16; + if y != 0 { + n -= 16; + x = y; + } + } + y = x >> 8; + if y != 0 { + n -= 8; + x = y; + } + y = x >> 4; + if y != 0 { + n -= 4; + x = y; + } + y = x >> 2; + if y != 0 { + n -= 2; + x = y; + } + y = x >> 1; + if y != 0 { + n - 2 + } else { + n - x + } +} + +#[test] +fn __clzsi2_test() { + let mut i = 1 << 63; + while i > 0 { + assert_eq!(__clzsi2(i), i.leading_zeros() as usize); + i >>= 1; + } +} + +// TODO: add some shims +// #[no_mangle] extern "aapcs" fn __aeabi_uidiv(num: u32: denom: u32) -> u32 +// #[no_mangle] extern "aapcs" fn __aeabi_idiv(num: i32: denom: i32) -> u32 diff --git a/src/core_extras.rs b/src/core_extras.rs index fe82440..679b42f 100644 --- a/src/core_extras.rs +++ b/src/core_extras.rs @@ -1,39 +1,301 @@ //! Things that I wish were in core, but aren't. -/// A simple wrapper for any `*mut T` to adjust the basic operations. +//TODO(Lokathor): reorganize as gba::core_extras::fixed_point and gba::core_extras::volatile ? + +use core::{cmp::Ordering, iter::FusedIterator, marker::PhantomData, num::NonZeroUsize}; + +/// Abstracts the use of a volatile hardware address. /// -/// Read and Write are made to be volatile. Offset is made to be -/// wrapping_offset. This makes it much easier to correctly work with IO -/// Registers and all display related memory on the GBA. -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] +/// If you're trying to do anything other than abstract a volatile hardware +/// device then you _do not want to use this type_. Use one of the many other +/// smart pointer types. +/// +/// A volatile address doesn't store a value in the normal way: It maps to some +/// real hardware _other than_ RAM, and that hardware might have any sort of +/// strange rules. The specifics of reading and writing depend on the hardware +/// being mapped. For example, a particular address might be read only (ignoring +/// writes), write only (returning some arbitrary value if you read it), +/// "normal" read write (where you read back what you wrote), or some complex +/// read-write situation where writes have an effect but you _don't_ read back +/// what you wrote. +/// +/// As you imagine it can be very unsafe. The design of this type is set up so +/// that _creation_ is unsafe, and _use_ is safe. This gives an optimal +/// experience, since you'll use memory locations a lot more often than you try +/// to name them, on average. +/// +/// `VolAddress` is _not_ a thread safe type. If your device is multi-threaded +/// then you must arrange for synchronization in some other way. A `VolAddress` +/// _can_ be used to share data between an interrupt running on a core and a +/// thread running on that core as long as all access of that location is +/// volatile (if you're using the `asm!` macro add the "volatile" option, if +/// you're linking in ASM with the linker that's effectively volatile since the +/// compiler doesn't get a chance to mess with it). +/// +/// # Safety +/// +/// In order for values of this type to operate correctly they must follow quite +/// a few safety limits: +/// +/// * The declared address must be non-null (it uses the `NonNull` optimization +/// for better iteration results). This shouldn't be a big problem, since +/// hardware can't really live at the null address. +/// * The declared address must be aligned for the declared type of `T`. +/// * The declared address must _always_ read as something that's a valid bit +/// pattern for `T`. Don't pick any enums or things like that if your hardware +/// doesn't back it up. If there's _any_ doubt at all, you must instead read +/// or write an unsigned int of the correct bit size and then parse the bits +/// by hand. +/// * The declared address must be a part of the address space that Rust's +/// allocator and/or stack frames will never use. If you're not sure, please +/// re-read the hardware specs of your device and its memory map until you +/// know. +/// +/// The exact points of UB are if the address is ever 0, or if you ever `read` +/// or `write` with the invalid pointer. For example, if you offset to some +/// crazy (non-zero) value and then never use it that won't be an immediate +/// trigger of UB. +#[derive(Debug)] #[repr(transparent)] -pub struct VolatilePtr(pub *mut T); - -impl core::fmt::Pointer for VolatilePtr { - /// Formats exactly like the inner `*mut T`. - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - write!(f, "{:p}", self.0) +pub struct VolAddress { + address: NonZeroUsize, + marker: PhantomData<*mut T>, +} +// Note(Lokathor): We have to hand implement all these traits because if we use +// `derive` then they only get derived if the inner `T` has the trait. However, +// since we're acting like a pointer to `T`, the capability we offer isn't +// affected by whatever type `T` ends up being. +impl Clone for VolAddress { + fn clone(&self) -> Self { + *self + } +} +impl Copy for VolAddress {} +impl PartialEq for VolAddress { + fn eq(&self, other: &Self) -> bool { + self.address == other.address + } +} +impl Eq for VolAddress {} +impl PartialOrd for VolAddress { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.address.cmp(&other.address)) + } +} +impl Ord for VolAddress { + fn cmp(&self, other: &Self) -> Ordering { + self.address.cmp(&other.address) } } -impl VolatilePtr { - /// Performs a `read_volatile`. - pub unsafe fn read(&self) -> T { - self.0.read_volatile() +impl VolAddress { + /// Constructs a new address. + /// + /// # Safety + /// + /// You must follow the standard safety rules as outlined in the type docs. + pub const unsafe fn new_unchecked(address: usize) -> Self { + VolAddress { + address: NonZeroUsize::new_unchecked(address), + marker: PhantomData, + } } - /// Performs a `write_volatile`. - pub unsafe fn write(&self, data: T) { - self.0.write_volatile(data); + /// Casts the type of `T` into type `Z`. + /// + /// # Safety + /// + /// You must follow the standard safety rules as outlined in the type docs. + pub const unsafe fn cast(self) -> VolAddress { + VolAddress { + address: self.address, + marker: PhantomData, + } } - /// Performs a `wrapping_offset`. - pub fn offset(self, count: isize) -> Self { - VolatilePtr(self.0.wrapping_offset(count)) + /// Offsets the address by `offset` slots (like `pointer::wrapping_offset`). + /// + /// # Safety + /// + /// You must follow the standard safety rules as outlined in the type docs. + pub unsafe fn offset(self, offset: isize) -> Self { + // TODO: const this + VolAddress { + address: NonZeroUsize::new_unchecked(self.address.get().wrapping_add(offset as usize * core::mem::size_of::())), + marker: PhantomData, + } } - /// Performs a cast into some new pointer type. - pub fn cast(self) -> VolatilePtr { - VolatilePtr(self.0 as *mut Z) + /// Checks that the current target type of this address is aligned at this + /// address value. + /// + /// Technically it's a safety violation to even make a `VolAddress` that isn't + /// aligned. However, I know you're gonna try doing the bad thing, and it's + /// better to give you a chance to call `is_aligned` and potentially back off + /// from the operation or throw a `debug_assert!` or something instead of + /// triggering UB. Eventually this will be `const fn`, which will potentially + /// let you spot errors without even having to run your program. + pub fn is_aligned(self) -> bool { + // TODO: const this + self.address.get() % core::mem::align_of::() == 0 + } + + /// Makes an iterator starting here across the given number of slots. + /// + /// # Safety + /// + /// The normal safety rules must be correct for each address iterated over. + pub const unsafe fn iter_slots(self, slots: usize) -> VolAddressIter { + VolAddressIter { vol_address: self, slots } + } + + // non-const and never can be. + + /// Reads a `Copy` value out of the address. + /// + /// The `Copy` bound is actually supposed to be `!Drop`, but rust doesn't + /// allow negative trait bounds. If your type isn't `Copy` you can use the + /// `read_non_copy` fallback to do an unsafe read. + /// + /// That said, I don't think that you legitimately have hardware that maps to + /// a Rust type with a `Drop` impl. If you do please tell me, I'm interested + /// to hear about it. + pub fn read(self) -> T + where + T: Copy, + { + unsafe { (self.address.get() as *mut T).read_volatile() } + } + + /// Reads a value out of the address with no trait bound. + /// + /// # Safety + /// + /// This is _not_ a move, it forms a bit duplicate of the current address + /// value. If `T` has a `Drop` trait that does anything it is up to you to + /// ensure that repeated drops do not cause UB (such as a double free). + pub unsafe fn read_non_copy(self) -> T { + (self.address.get() as *mut T).read_volatile() + } + + /// Writes a value to the address. + /// + /// Semantically, the value is moved into the `VolAddress` and then forgotten, + /// so if `T` has a `Drop` impl then that will never get executed. This is + /// "safe" under Rust's safety rules, but could cause something unintended + /// (eg: a memory leak). + pub fn write(self, val: T) { + unsafe { (self.address.get() as *mut T).write_volatile(val) } + } +} + +/// An iterator that produces a series of `VolAddress` values. +#[derive(Debug)] +pub struct VolAddressIter { + vol_address: VolAddress, + slots: usize, +} +impl Clone for VolAddressIter { + fn clone(&self) -> Self { + VolAddressIter { + vol_address: self.vol_address, + slots: self.slots, + } + } +} +impl PartialEq for VolAddressIter { + fn eq(&self, other: &Self) -> bool { + self.vol_address == other.vol_address && self.slots == other.slots + } +} +impl Eq for VolAddressIter {} +impl Iterator for VolAddressIter { + type Item = VolAddress; + + fn next(&mut self) -> Option { + if self.slots > 0 { + let out = self.vol_address; + unsafe { + self.slots -= 1; + self.vol_address = self.vol_address.offset(1); + } + Some(out) + } else { + None + } + } +} +impl FusedIterator for VolAddressIter {} + +/// This type is like `VolAddress`, but for when you have a block of values all +/// in a row. +/// +/// This is similar to the idea of an array or a slice, but called a "block" +/// because you could _also_ construct a `[VolAddress]`, and we want to avoid +/// any accidental confusion. +#[derive(Debug)] +pub struct VolAddressBlock { + vol_address: VolAddress, + slots: usize, +} +impl Clone for VolAddressBlock { + fn clone(&self) -> Self { + VolAddressBlock { + vol_address: self.vol_address, + slots: self.slots, + } + } +} +impl PartialEq for VolAddressBlock { + fn eq(&self, other: &Self) -> bool { + self.vol_address == other.vol_address && self.slots == other.slots + } +} +impl Eq for VolAddressBlock {} + +impl VolAddressBlock { + /// Constructs a new `VolAddressBlock`. + /// + /// # Safety + /// + /// The given `VolAddress` must be valid when offset by each of `0 .. slots` + pub const unsafe fn new_unchecked(vol_address: VolAddress, slots: usize) -> Self { + VolAddressBlock { vol_address, slots } + } + + /// Gives an iterator over this block's slots. + pub const fn iter(self) -> VolAddressIter { + VolAddressIter { + vol_address: self.vol_address, + slots: self.slots, + } + } + + /// Unchecked indexing into the block. + /// + /// # Safety + /// + /// The slot given must be in bounds. + pub unsafe fn index_unchecked(self, slot: usize) -> VolAddress { + // TODO: const this + self.vol_address.offset(slot as isize) + } + + /// Checked "indexing" style access of the block, giving either a `VolAddress` or a panic. + pub fn index(self, slot: usize) -> VolAddress { + if slot < self.slots { + unsafe { self.vol_address.offset(slot as isize) } + } else { + panic!("Index Requested: {} >= Bound: {}", slot, self.slots) + } + } + + /// Checked "getting" style access of the block, giving an Option value. + pub fn get(self, slot: usize) -> Option> { + if slot < self.slots { + unsafe { Some(self.vol_address.offset(slot as isize)) } + } else { + None + } } } diff --git a/src/fixed.rs b/src/fixed.rs new file mode 100644 index 0000000..1e66998 --- /dev/null +++ b/src/fixed.rs @@ -0,0 +1,295 @@ +#![allow(non_camel_case_types)] + +//! Module for fixed point math types and operations. + +use core::{ + marker::PhantomData, + ops::{Add, Div, Mul, Neg, Shl, Shr, Sub}, +}; +use typenum::{consts::False, marker_traits::Unsigned, type_operators::IsEqual, U8}; + +/// Fixed point `T` value with `F` fractional bits. +#[derive(Debug, Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct Fx { + num: T, + phantom: PhantomData, +} + +impl Fx { + /// Uses the provided value directly. + pub fn from_raw(r: T) -> Self { + Fx { + num: r, + phantom: PhantomData, + } + } + + /// Unwraps the inner value. + pub fn into_raw(self) -> T { + self.num + } + + /// Casts the base type, keeping the fractional bit quantity the same. + pub fn cast_inner Z>(self, op: C) -> Fx { + Fx { + num: op(self.num), + phantom: PhantomData, + } + } +} + +impl, F: Unsigned> Add for Fx { + type Output = Self; + fn add(self, rhs: Fx) -> Self::Output { + Fx { + num: self.num + rhs.num, + phantom: PhantomData, + } + } +} + +impl, F: Unsigned> Sub for Fx { + type Output = Self; + fn sub(self, rhs: Fx) -> Self::Output { + Fx { + num: self.num - rhs.num, + phantom: PhantomData, + } + } +} + +impl, F: Unsigned> Shl for Fx { + type Output = Self; + fn shl(self, rhs: u32) -> Self::Output { + Fx { + num: self.num << rhs, + phantom: PhantomData, + } + } +} + +impl, F: Unsigned> Shr for Fx { + type Output = Self; + fn shr(self, rhs: u32) -> Self::Output { + Fx { + num: self.num >> rhs, + phantom: PhantomData, + } + } +} + +impl, F: Unsigned> Neg for Fx { + type Output = Self; + fn neg(self) -> Self::Output { + Fx { + num: -self.num, + phantom: PhantomData, + } + } +} + +macro_rules! fixed_point_methods { + ($t:ident) => { + impl Fx<$t, F> { + /// Gives the smallest positive non-zero value. + pub fn precision() -> Self { + Fx { + num: 1, + phantom: PhantomData, + } + } + + /// Makes a value with the integer part shifted into place. + pub fn from_int_part(i: $t) -> Self { + Fx { + num: i << F::U8, + phantom: PhantomData, + } + } + + /// Changes the fractional bit quantity, keeping the base type the same. + pub fn adjust_fractional_bits>(self) -> Fx<$t, Y> { + let leftward_movement: i32 = Y::to_i32() - F::to_i32(); + Fx { + num: if leftward_movement > 0 { + self.num << leftward_movement + } else { + self.num >> (-leftward_movement) + }, + phantom: PhantomData, + } + } + } + }; +} + +fixed_point_methods! {u8} +fixed_point_methods! {i8} +fixed_point_methods! {i16} +fixed_point_methods! {u16} +fixed_point_methods! {i32} +fixed_point_methods! {u32} + +macro_rules! fixed_point_signed_multiply { + ($t:ident) => { + impl Mul for Fx<$t, F> { + type Output = Self; + #[allow(clippy::suspicious_arithmetic_impl)] + fn mul(self, rhs: Fx<$t, F>) -> Self::Output { + let pre_shift = (self.num as i32).wrapping_mul(rhs.num as i32); + if pre_shift < 0 { + if pre_shift == core::i32::MIN { + Fx { + num: core::$t::MIN, + phantom: PhantomData, + } + } else { + Fx { + num: (-((-pre_shift) >> F::U8)) as $t, + phantom: PhantomData, + } + } + } else { + Fx { + num: (pre_shift >> F::U8) as $t, + phantom: PhantomData, + } + } + } + } + }; +} + +fixed_point_signed_multiply! {i8} +fixed_point_signed_multiply! {i16} +fixed_point_signed_multiply! {i32} + +macro_rules! fixed_point_unsigned_multiply { + ($t:ident) => { + impl Mul for Fx<$t, F> { + type Output = Self; + #[allow(clippy::suspicious_arithmetic_impl)] + fn mul(self, rhs: Fx<$t, F>) -> Self::Output { + Fx { + num: ((self.num as u32).wrapping_mul(rhs.num as u32) >> F::U8) as $t, + phantom: PhantomData, + } + } + } + }; +} + +fixed_point_unsigned_multiply! {u8} +fixed_point_unsigned_multiply! {u16} +fixed_point_unsigned_multiply! {u32} + +macro_rules! fixed_point_signed_division { + ($t:ident) => { + impl Div for Fx<$t, F> { + type Output = Self; + #[allow(clippy::suspicious_arithmetic_impl)] + fn div(self, rhs: Fx<$t, F>) -> Self::Output { + let mul_output: i32 = (self.num as i32).wrapping_mul(1 << F::U8); + let divide_result: i32 = crate::bios::div(mul_output, rhs.num as i32); + Fx { + num: divide_result as $t, + phantom: PhantomData, + } + } + } + }; +} + +fixed_point_signed_division! {i8} +fixed_point_signed_division! {i16} +fixed_point_signed_division! {i32} + +macro_rules! fixed_point_unsigned_division { + ($t:ident) => { + impl Div for Fx<$t, F> { + type Output = Self; + #[allow(clippy::suspicious_arithmetic_impl)] + fn div(self, rhs: Fx<$t, F>) -> Self::Output { + let mul_output: i32 = (self.num as i32).wrapping_mul(1 << F::U8); + let divide_result: i32 = crate::bios::div(mul_output, rhs.num as i32); + Fx { + num: divide_result as $t, + phantom: PhantomData, + } + } + } + }; +} + +fixed_point_unsigned_division! {u8} +fixed_point_unsigned_division! {u16} +fixed_point_unsigned_division! {u32} + +/// Alias for an `i16` fixed point value with 8 fractional bits. +pub type fx8_8 = Fx; + +#[cfg(test)] +mod fixed_tests { + use super::*; + + #[test] + fn test_add() { + use typenum::U4; + let one = Fx::::from_int_part(1); + let two = Fx::::from_int_part(2); + assert!(one + one == two) + } + + #[test] + fn test_sub() { + use typenum::U4; + let one = Fx::::from_int_part(1); + let two = Fx::::from_int_part(2); + assert!(two - one == one) + } + + #[test] + fn test_shl() { + use typenum::U4; + let one = Fx::::from_int_part(1); + let two = Fx::::from_int_part(2); + assert!(one << 1 == two) + } + + #[test] + fn test_shr() { + use typenum::U4; + let one = Fx::::from_int_part(1); + let two = Fx::::from_int_part(2); + assert!(two >> 1 == one) + } + + #[test] + fn test_neg() { + use typenum::U4; + let one = Fx::::from_int_part(1); + let neg_one = Fx::::from_int_part(-1); + assert!(-one == neg_one); + assert!(-(-one) == one); + } + + #[test] + fn test_mul() { + use typenum::U4; + let half = Fx::::from_int_part(1) >> 1; + let two = Fx::::from_int_part(2); + let three = Fx::::from_int_part(3); + let twelve = Fx::::from_int_part(12); + assert!(two * three == twelve * half); + } + + #[test] + fn test_div() { + use typenum::U4; + let two = Fx::::from_int_part(2); + let six = Fx::::from_int_part(6); + let twelve = Fx::::from_int_part(12); + assert!(twelve / two == six); + } +} diff --git a/src/io.rs b/src/io.rs new file mode 100644 index 0000000..5149b5a --- /dev/null +++ b/src/io.rs @@ -0,0 +1,13 @@ +//! This module contains definitions and types for the IO Registers. +//! +//! ## Naming +//! +//! In the interest of making things easy to search for, all io register +//! constants are given the names used in the +//! [GBATEK](https://problemkaputt.de/gbatek.htm) technical description. + +use super::*; + +use gba_proc_macro::register_bit; + +pub mod keypad; diff --git a/src/io/keypad.rs b/src/io/keypad.rs new file mode 100644 index 0000000..d03d242 --- /dev/null +++ b/src/io/keypad.rs @@ -0,0 +1,121 @@ +//! Allows access to the keypad. + +use super::*; + +/// The Key Input Register. +/// +/// This register follows the "low-active" convention. If you want your code to +/// follow the "high-active" convention (hint: you probably do, it's far easier +/// to work with) then call `read_key_input()` rather than reading this register +/// directly. It will perform the necessary bit flip operation for you. +pub const KEYINPUT: VolAddress = unsafe { VolAddress::new_unchecked(0x400_0130) }; + +/// A "tribool" value helps us interpret the arrow pad. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(i32)] +#[allow(missing_docs)] +pub enum TriBool { + Minus = -1, + Neutral = 0, + Plus = 1, +} + +newtype! { + /// Records a particular key press combination. + /// + /// Methods here follow the "high-active" convention, where a bit is enabled + /// when it's part of the set. + #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] + KeyInput, u16 +} + +#[allow(missing_docs)] +impl KeyInput { + register_bit!(A_BIT, u16, 1, a_pressed); + register_bit!(B_BIT, u16, 1 << 1, b_pressed); + register_bit!(SELECT_BIT, u16, 1 << 2, select_pressed); + register_bit!(START_BIT, u16, 1 << 3, start_pressed); + register_bit!(RIGHT_BIT, u16, 1 << 4, right_pressed); + register_bit!(LEFT_BIT, u16, 1 << 5, left_pressed); + register_bit!(UP_BIT, u16, 1 << 6, up_pressed); + register_bit!(DOWN_BIT, u16, 1 << 7, down_pressed); + register_bit!(R_BIT, u16, 1 << 8, r_pressed); + register_bit!(L_BIT, u16, 1 << 9, l_pressed); + + /// Takes the set difference between these keys and another set of keys. + pub fn difference(self, other: Self) -> Self { + KeyInput(self.0 ^ other.0) + } + + /// Gives the arrow pad value as a tribool, with Plus being increased column + /// value (right). + pub fn column_direction(self) -> TriBool { + if self.right_pressed() { + TriBool::Plus + } else if self.left_pressed() { + TriBool::Minus + } else { + TriBool::Neutral + } + } + + /// Gives the arrow pad value as a tribool, with Plus being increased row + /// value (down). + pub fn row_direction(self) -> TriBool { + if self.down_pressed() { + TriBool::Plus + } else if self.up_pressed() { + TriBool::Minus + } else { + TriBool::Neutral + } + } +} + +/// Gets the current state of the keys +pub fn read_key_input() -> KeyInput { + // Note(Lokathor): The 10 used bits are "low when pressed" style, but the 6 + // unused bits are always low, so we XOR with this mask to get a result where + // the only active bits are currently pressed keys. + KeyInput(KEYINPUT.read() ^ 0b0000_0011_1111_1111) +} + +newtype! { + /// Allows configuration of when a keypad interrupt fires. + /// + /// * The most important bit here is the `irq_enabled` bit, which determines + /// if an interrupt happens at all. + /// * The second most important bit is the `irq_logical_and` bit. If this bit + /// is set, _all_ the selected buttons are required to be set for the + /// interrupt to be fired (logical AND). If it's not set then _any_ of the + /// buttons selected can be pressed to fire the interrupt (logical OR). + /// * All other bits select a particular button to be required or not as part + /// of the interrupt firing. + /// + /// NOTE: This _only_ configures the operation of when keypad interrupts can + /// fire. You must still set the `IME` to have interrupts at all, and you must + /// further set `IE` for keypad interrupts to be possible. + #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] + KeyInterruptSetting, u16 +} +#[allow(missing_docs)] +impl KeyInterruptSetting { + register_bit!(A_BIT, u16, 1, a_pressed); + register_bit!(B_BIT, u16, 1 << 1, b_pressed); + register_bit!(SELECT_BIT, u16, 1 << 2, select_pressed); + register_bit!(START_BIT, u16, 1 << 3, start_pressed); + register_bit!(RIGHT_BIT, u16, 1 << 4, right_pressed); + register_bit!(LEFT_BIT, u16, 1 << 5, left_pressed); + register_bit!(UP_BIT, u16, 1 << 6, up_pressed); + register_bit!(DOWN_BIT, u16, 1 << 7, down_pressed); + register_bit!(R_BIT, u16, 1 << 8, r_pressed); + register_bit!(L_BIT, u16, 1 << 9, l_pressed); + // + register_bit!(IRQ_ENABLE_BIT, u16, 1 << 14, irq_enabled); + register_bit!(IRQ_AND_BIT, u16, 1 << 15, irq_logical_and); +} + +/// Use this to configure when a keypad interrupt happens. +/// +/// See the `KeyInterruptSetting` type for more. +pub const KEYCNT: VolAddress = unsafe { VolAddress::new_unchecked(0x400_0132) }; diff --git a/src/io_registers.rs b/src/io_registers.rs index 7ea83d3..4ba89dc 100644 --- a/src/io_registers.rs +++ b/src/io_registers.rs @@ -15,19 +15,20 @@ // TODO(lokathor): IO Register newtypes. -use gba_proc_macro::{newtype, register_bit}; +use gba_proc_macro::register_bit; use super::*; /// LCD Control. Read/Write. /// /// * [gbatek entry](http://problemkaputt.de/gbatek.htm#lcdiodisplaycontrol) -pub const DISPCNT: VolatilePtr = VolatilePtr(0x400_0000 as *mut u16); +pub const DISPCNT: VolAddress = unsafe { VolAddress::new_unchecked(0x400_0000) }; newtype!( + /// A newtype over the various display control options that you have on a GBA. + #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] DisplayControlSetting, - u16, - "A newtype over the various display control options that you have on a GBA." + u16 ); #[allow(missing_docs)] @@ -97,24 +98,19 @@ pub enum DisplayControlMode { /// Assigns the given display control setting. pub fn set_display_control(setting: DisplayControlSetting) { - unsafe { - DISPCNT.write(setting.0); - } + DISPCNT.write(setting); } /// Obtains the current display control setting. pub fn display_control() -> DisplayControlSetting { - unsafe { DisplayControlSetting(DISPCNT.read()) } + DISPCNT.read() } -/// General LCD Status (STAT,LYC) -pub const DISPSTAT: VolatilePtr = VolatilePtr(0x400_0004 as *mut u16); - /// Vertical Counter (LY) -pub const VCOUNT: VolatilePtr = VolatilePtr(0x400_0006 as *mut u16); +pub const VCOUNT: VolAddress = unsafe { VolAddress::new_unchecked(0x400_0006) }; /// Obtains the current VCount value. pub fn vcount() -> u16 { - unsafe { VCOUNT.read() } + VCOUNT.read() } /// Performs a busy loop until VBlank starts. @@ -128,369 +124,3 @@ pub fn wait_until_vdraw() { // TODO: make this the better version with BIOS and interrupts and such. while vcount() >= SCREEN_HEIGHT as u16 {} } - -/// BG0 Control -pub const BG0CNT: VolatilePtr = VolatilePtr(0x400_0008 as *mut u16); - -/// BG1 Control -pub const BG1CNT: VolatilePtr = VolatilePtr(0x400_000A as *mut u16); - -/// BG2 Control -pub const BG2CNT: VolatilePtr = VolatilePtr(0x400_000C as *mut u16); - -/// BG3 Control -pub const BG3CNT: VolatilePtr = VolatilePtr(0x400_000E as *mut u16); - -/// BG0 X-Offset -pub const BG0HOFS: VolatilePtr = VolatilePtr(0x400_0010 as *mut u16); - -/// BG0 Y-Offset -pub const BG0VOFS: VolatilePtr = VolatilePtr(0x400_0012 as *mut u16); - -/// BG1 X-Offset -pub const BG1HOFS: VolatilePtr = VolatilePtr(0x400_0014 as *mut u16); - -/// BG1 Y-Offset -pub const BG1VOFS: VolatilePtr = VolatilePtr(0x400_0016 as *mut u16); - -/// BG2 X-Offset -pub const BG2HOFS: VolatilePtr = VolatilePtr(0x400_0018 as *mut u16); - -/// BG2 Y-Offset -pub const BG2VOFS: VolatilePtr = VolatilePtr(0x400_001A as *mut u16); - -/// BG3 X-Offset -pub const BG3HOFS: VolatilePtr = VolatilePtr(0x400_001C as *mut u16); - -/// BG3 Y-Offset -pub const BG3VOFS: VolatilePtr = VolatilePtr(0x400_001E as *mut u16); - -/// BG2 Rotation/Scaling Parameter A (dx) -pub const BG2PA: VolatilePtr = VolatilePtr(0x400_0020 as *mut u16); - -/// BG2 Rotation/Scaling Parameter B (dmx) -pub const BG2PB: VolatilePtr = VolatilePtr(0x400_0022 as *mut u16); - -/// BG2 Rotation/Scaling Parameter C (dy) -pub const BG2PC: VolatilePtr = VolatilePtr(0x400_0024 as *mut u16); - -/// BG2 Rotation/Scaling Parameter D (dmy) -pub const BG2PD: VolatilePtr = VolatilePtr(0x400_0026 as *mut u16); - -/// BG2 Reference Point X-Coordinate -pub const BG2X: VolatilePtr = VolatilePtr(0x400_0028 as *mut u32); - -/// BG2 Reference Point Y-Coordinate -pub const BG2Y: VolatilePtr = VolatilePtr(0x400_002C as *mut u32); - -/// BG3 Rotation/Scaling Parameter A (dx) -pub const BG3PA: VolatilePtr = VolatilePtr(0x400_0030 as *mut u16); - -/// BG3 Rotation/Scaling Parameter B (dmx) -pub const BG3PB: VolatilePtr = VolatilePtr(0x400_0032 as *mut u16); - -/// BG3 Rotation/Scaling Parameter C (dy) -pub const BG3PC: VolatilePtr = VolatilePtr(0x400_0034 as *mut u16); - -/// BG3 Rotation/Scaling Parameter D (dmy) -pub const BG3PD: VolatilePtr = VolatilePtr(0x400_0036 as *mut u16); - -/// BG3 Reference Point X-Coordinate -pub const BG3X: VolatilePtr = VolatilePtr(0x400_0038 as *mut u32); - -/// BG3 Reference Point Y-Coordinate -pub const BG3Y: VolatilePtr = VolatilePtr(0x400_003C as *mut u32); - -/// Window 0 Horizontal Dimensions -pub const WIN0H: VolatilePtr = VolatilePtr(0x400_0040 as *mut u16); - -/// Window 1 Horizontal Dimensions -pub const WIN1H: VolatilePtr = VolatilePtr(0x400_0042 as *mut u16); - -/// Window 0 Vertical Dimensions -pub const WIN0V: VolatilePtr = VolatilePtr(0x400_0044 as *mut u16); - -/// Window 1 Vertical Dimensions -pub const WIN1V: VolatilePtr = VolatilePtr(0x400_0046 as *mut u16); - -/// Inside of Window 0 and 1 -pub const WININ: VolatilePtr = VolatilePtr(0x400_0048 as *mut u16); - -/// Inside of OBJ Window & Outside of Windows -pub const WINOUT: VolatilePtr = VolatilePtr(0x400_004A as *mut u16); - -/// Mosaic Size -pub const MOSAIC: VolatilePtr = VolatilePtr(0x400_004C as *mut u16); - -/// Color Special Effects Selection -pub const BLDCNT: VolatilePtr = VolatilePtr(0x400_0050 as *mut u16); - -/// Alpha Blending Coefficients -pub const BLDALPHA: VolatilePtr = VolatilePtr(0x400_0052 as *mut u16); - -/// Brightness (Fade-In/Out) Coefficient -pub const BLDY: VolatilePtr = VolatilePtr(0x400_0054 as *mut u16); - -/// Channel 1 Sweep register (NR10) -pub const UND1CNT_L: VolatilePtr = VolatilePtr(0x400_0060 as *mut u16); - -/// Channel 1 Duty/Length/Envelope (NR11, NR12) -pub const UND1CNT_H: VolatilePtr = VolatilePtr(0x400_0062 as *mut u16); - -/// Channel 1 Frequency/Control (NR13, NR14) -pub const UND1CNT_X: VolatilePtr = VolatilePtr(0x400_0064 as *mut u16); - -/// Channel 2 Duty/Length/Envelope (NR21, NR22) -pub const UND2CNT_L: VolatilePtr = VolatilePtr(0x400_0068 as *mut u16); - -/// Channel 2 Frequency/Control (NR23, NR24) -pub const UND2CNT_H: VolatilePtr = VolatilePtr(0x400_006C as *mut u16); - -/// Channel 3 Stop/Wave RAM select (NR30) -pub const UND3CNT_L: VolatilePtr = VolatilePtr(0x400_0070 as *mut u16); - -/// Channel 3 Length/Volume (NR31, NR32) -pub const UND3CNT_H: VolatilePtr = VolatilePtr(0x400_0072 as *mut u16); - -/// Channel 3 Frequency/Control (NR33, NR34) -pub const UND3CNT_X: VolatilePtr = VolatilePtr(0x400_0074 as *mut u16); - -/// Channel 4 Length/Envelope (NR41, NR42) -pub const UND4CNT_L: VolatilePtr = VolatilePtr(0x400_0078 as *mut u16); - -/// Channel 4 Frequency/Control (NR43, NR44) -pub const UND4CNT_H: VolatilePtr = VolatilePtr(0x400_007C as *mut u16); - -/// Control Stereo/Volume/Enable (NR50, NR51) -pub const UNDCNT_L: VolatilePtr = VolatilePtr(0x400_0080 as *mut u16); - -/// Control Mixing/DMA Control -pub const UNDCNT_H: VolatilePtr = VolatilePtr(0x400_0082 as *mut u16); - -/// Control Sound on/off (NR52) -pub const UNDCNT_X: VolatilePtr = VolatilePtr(0x400_0084 as *mut u16); - -/// Sound PWM Control -pub const UNDBIAS: VolatilePtr = VolatilePtr(0x400_0088 as *mut u16); - -/// Channel 3 Wave Pattern RAM (W/R) -pub const WAVE_RAM0_L: VolatilePtr = VolatilePtr(0x400_0090 as *mut u16); - -/// Channel 3 Wave Pattern RAM (W/R) -pub const WAVE_RAM0_H: VolatilePtr = VolatilePtr(0x400_0092 as *mut u16); - -/// Channel 3 Wave Pattern RAM (W/R) -pub const WAVE_RAM1_L: VolatilePtr = VolatilePtr(0x400_0094 as *mut u16); - -/// Channel 3 Wave Pattern RAM (W/R) -pub const WAVE_RAM1_H: VolatilePtr = VolatilePtr(0x400_0096 as *mut u16); - -/// Channel 3 Wave Pattern RAM (W/R) -pub const WAVE_RAM2_L: VolatilePtr = VolatilePtr(0x400_0098 as *mut u16); - -/// Channel 3 Wave Pattern RAM (W/R) -pub const WAVE_RAM2_H: VolatilePtr = VolatilePtr(0x400_009A as *mut u16); - -/// Channel 3 Wave Pattern RAM (W/R) -pub const WAVE_RAM3_L: VolatilePtr = VolatilePtr(0x400_009C as *mut u16); - -/// Channel 3 Wave Pattern RAM (W/R) -pub const WAVE_RAM3_H: VolatilePtr = VolatilePtr(0x400_009E as *mut u16); - -/// Channel A FIFO, Data 0-3 -pub const FIFO_A: VolatilePtr = VolatilePtr(0x400_00A0 as *mut u32); - -/// Channel B FIFO, Data 0-3 -pub const FIFO_B: VolatilePtr = VolatilePtr(0x400_00A4 as *mut u32); - -/// DMA 0 Source Address -pub const DMA0SAD: VolatilePtr = VolatilePtr(0x400_00B0 as *mut u32); - -/// DMA 0 Destination Address -pub const DMA0DAD: VolatilePtr = VolatilePtr(0x400_00B4 as *mut u32); - -/// DMA 0 Word Count -pub const DMA0CNT_L: VolatilePtr = VolatilePtr(0x400_00B8 as *mut u16); - -/// DMA 0 Control -pub const DMA0CNT_H: VolatilePtr = VolatilePtr(0x400_00BA as *mut u16); - -/// DMA 1 Source Address -pub const DMA1SAD: VolatilePtr = VolatilePtr(0x400_00BC as *mut u32); - -/// DMA 1 Destination Address -pub const DMA1DAD: VolatilePtr = VolatilePtr(0x400_00C0 as *mut u32); - -/// DMA 1 Word Count -pub const DMA1CNT_L: VolatilePtr = VolatilePtr(0x400_00C4 as *mut u16); - -/// DMA 1 Control -pub const DMA1CNT_H: VolatilePtr = VolatilePtr(0x400_00C6 as *mut u16); - -/// DMA 2 Source Address -pub const DMA2SAD: VolatilePtr = VolatilePtr(0x400_00C8 as *mut u32); - -/// DMA 2 Destination Address -pub const DMA2DAD: VolatilePtr = VolatilePtr(0x400_00CC as *mut u32); - -/// DMA 2 Word Count -pub const DMA2CNT_L: VolatilePtr = VolatilePtr(0x400_00D0 as *mut u16); - -/// DMA 2 Control -pub const DMA2CNT_H: VolatilePtr = VolatilePtr(0x400_00D2 as *mut u16); - -/// DMA 3 Source Address -pub const DMA3SAD: VolatilePtr = VolatilePtr(0x400_00D4 as *mut u32); - -/// DMA 3 Destination Address -pub const DMA3DAD: VolatilePtr = VolatilePtr(0x400_00D8 as *mut u32); - -/// DMA 3 Word Count -pub const DMA3CNT_L: VolatilePtr = VolatilePtr(0x400_00DC as *mut u16); - -/// DMA 3 Control -pub const DMA3CNT_H: VolatilePtr = VolatilePtr(0x400_00DE as *mut u16); - -/// Timer 0 Counter/Reload -pub const TM0D: VolatilePtr = VolatilePtr(0x400_0100 as *mut u16); - -/// Timer 0 Control -pub const TM0CNT: VolatilePtr = VolatilePtr(0x400_0102 as *mut u16); - -/// Timer 1 Counter/Reload -pub const TM1D: VolatilePtr = VolatilePtr(0x400_0104 as *mut u16); - -/// Timer 1 Control -pub const TM1CNT: VolatilePtr = VolatilePtr(0x400_0106 as *mut u16); - -/// Timer 2 Counter/Reload -pub const TM2D: VolatilePtr = VolatilePtr(0x400_0108 as *mut u16); - -/// Timer 2 Control -pub const TM2CNT: VolatilePtr = VolatilePtr(0x400_010A as *mut u16); - -/// Timer 3 Counter/Reload -pub const TM3D: VolatilePtr = VolatilePtr(0x400_010C as *mut u16); - -/// Timer 3 Control -pub const TM3CNT: VolatilePtr = VolatilePtr(0x400_010E as *mut u16); - -/// SIO Data (Normal-32bit Mode; shared with below) -pub const SIODATA32: VolatilePtr = VolatilePtr(0x400_0120 as *mut u32); - -/// SIO Data 0 (Parent) (Multi-Player Mode) -pub const SIOMULTI0: VolatilePtr = VolatilePtr(0x400_0120 as *mut u16); - -/// SIO Data 1 (1st Child) (Multi-Player Mode) -pub const SIOMULTI1: VolatilePtr = VolatilePtr(0x400_0122 as *mut u16); - -/// SIO Data 2 (2nd Child) (Multi-Player Mode) -pub const SIOMULTI2: VolatilePtr = VolatilePtr(0x400_0124 as *mut u16); - -/// SIO Data 3 (3rd Child) (Multi-Player Mode) -pub const SIOMULTI3: VolatilePtr = VolatilePtr(0x400_0126 as *mut u16); - -/// SIO Control Register -pub const SIOCNT: VolatilePtr = VolatilePtr(0x400_0128 as *mut u16); - -/// D SIO Data (Local of MultiPlayer; shared below) -pub const SIOMLT_SEN: VolatilePtr = VolatilePtr(0x400_012A as *mut u16); - -/// SIO Data (Normal-8bit and UART Mode) -pub const SIODATA8: VolatilePtr = VolatilePtr(0x400_012A as *mut u16); - -/// Key Status -pub const KEYINPUT: VolatilePtr = VolatilePtr(0x400_0130 as *mut u16); - -/// A "tribool" value helps us interpret the arrow pad. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[repr(i32)] -#[allow(missing_docs)] -pub enum TriBool { - Minus = -1, - Neutral = 0, - Plus = 1, -} - -newtype!(KeyInputSetting, u16, "A newtype over the key input state of the GBA"); - -#[allow(missing_docs)] -impl KeyInputSetting { - register_bit!(A_BIT, u16, 1, a_pressed); - register_bit!(B_BIT, u16, 1 << 1, b_pressed); - register_bit!(SELECT_BIT, u16, 1 << 2, select_pressed); - register_bit!(START_BIT, u16, 1 << 3, start_pressed); - register_bit!(RIGHT_BIT, u16, 1 << 4, right_pressed); - register_bit!(LEFT_BIT, u16, 1 << 5, left_pressed); - register_bit!(UP_BIT, u16, 1 << 6, up_pressed); - register_bit!(DOWN_BIT, u16, 1 << 7, down_pressed); - register_bit!(R_BIT, u16, 1 << 8, r_pressed); - register_bit!(L_BIT, u16, 1 << 9, l_pressed); - - /// Takes the difference between these keys and another set of keys. - pub fn difference(self, other: KeyInputSetting) -> KeyInputSetting { - KeyInputSetting(self.0 ^ other.0) - } - - /// Gives the arrow pad value as a tribool, with Plus being increased column - /// value (right). - pub fn column_direction(self) -> TriBool { - if self.right_pressed() { - TriBool::Plus - } else if self.left_pressed() { - TriBool::Minus - } else { - TriBool::Neutral - } - } - - /// Gives the arrow pad value as a tribool, with Plus being increased row - /// value (down). - pub fn row_direction(self) -> TriBool { - if self.down_pressed() { - TriBool::Plus - } else if self.up_pressed() { - TriBool::Minus - } else { - TriBool::Neutral - } - } -} - -/// Gets the current state of the keys -pub fn key_input() -> KeyInputSetting { - // Note(Lokathor): The 10 used bits are "low when pressed" style, but the 6 - // unused bits are always low, so we XOR with this mask to get a result where - // the only active bits are currently pressed keys. - unsafe { KeyInputSetting(KEYINPUT.read() ^ 0b0000_0011_1111_1111) } -} - -/// Key Interrupt Control -pub const KEYCNT: VolatilePtr = VolatilePtr(0x400_0132 as *mut u16); - -/// SIO Mode Select/General Purpose Data -pub const RCNT: VolatilePtr = VolatilePtr(0x400_0134 as *mut u16); - -/// SIO JOY Bus Control -pub const JOYCNT: VolatilePtr = VolatilePtr(0x400_0140 as *mut u16); - -/// SIO JOY Bus Receive Data -pub const JOY_RECV: VolatilePtr = VolatilePtr(0x400_0150 as *mut u32); - -/// SIO JOY Bus Transmit Data -pub const JOY_TRANS: VolatilePtr = VolatilePtr(0x400_0154 as *mut u32); - -/// SIO JOY Bus Receive Status -pub const JOYSTAT: VolatilePtr = VolatilePtr(0x400_0158 as *mut u16); - -/// Interrupt Enable Register -pub const IE: VolatilePtr = VolatilePtr(0x400_0200 as *mut u16); - -/// Interrupt Request Flags / IRQ Acknowledge -pub const IF: VolatilePtr = VolatilePtr(0x400_0202 as *mut u16); - -/// Game Pak Waitstate Control -pub const WAITCNT: VolatilePtr = VolatilePtr(0x400_0204 as *mut u16); - -/// Interrupt Master Enable Register -pub const IME: VolatilePtr = VolatilePtr(0x400_0208 as *mut u16); diff --git a/src/lib.rs b/src/lib.rs index 80f491c..be313d4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,12 +1,14 @@ #![cfg_attr(not(test), no_std)] -#![cfg_attr(not(test), feature(asm))] +#![feature(asm)] +#![feature(const_int_wrapping)] +#![feature(min_const_unsafe_fn)] #![warn(missing_docs)] #![allow(clippy::cast_lossless)] #![deny(clippy::float_arithmetic)] //! This crate helps you write GBA ROMs. //! -//! # SAFETY POLICY +//! ## SAFETY POLICY //! //! Some parts of this crate are safe wrappers around unsafe operations. This is //! good, and what you'd expect from a Rust crate. @@ -16,78 +18,211 @@ //! //! **Do not** use this crate in programs that aren't running on the GBA. If you //! do, it's a giant bag of Undefined Behavior. -//! -//! # TESTING POLICY -//! -//! It is the intent of the crate authors that as much of the crate as possible -//! be written so that you can use `cargo test` for at least some parts of your -//! code without everything exploding instantly. To that end, where possible we -//! attempt to use `cfg` flags to make things safe for `cargo test`. Hopefully -//! we got it all. + +/// Assists in defining a newtype wrapper over some base type. +/// +/// Note that rustdoc and derives are all the "meta" stuff, so you can write all +/// of your docs and derives in front of your newtype in the same way you would +/// for a normal struct. Then the inner type to be wrapped it name. +/// +/// The macro _assumes_ that you'll be using it to wrap zero safe numeric types, +/// so it automatically provides a `const fn` method for `new` that just wraps +/// `0`. If this is not desired you can add `, no frills` to the invocation. +/// +/// Example: +/// ``` +/// newtype! { +/// /// Records a particular key press combination. +/// #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] +/// KeyInput, u16 +/// } +/// ``` +#[macro_export] +macro_rules! newtype { + ($(#[$attr:meta])* $new_name:ident, $old_name:ident) => { + $(#[$attr])* + #[repr(transparent)] + pub struct $new_name($old_name); + impl $new_name { + /// A `const` "zero value" constructor + pub const fn new() -> Self { + $new_name(0) + } + } + }; + ($(#[$attr:meta])* $new_name:ident, $old_name:ident, no frills) => { + $(#[$attr])* + #[repr(transparent)] + pub struct $new_name($old_name); + }; +} + +pub mod builtins; + +pub mod fixed; + +pub mod bios; pub mod core_extras; pub(crate) use crate::core_extras::*; -pub mod io_registers; +pub mod io; pub mod video_ram; -pub(crate) use crate::video_ram::*; -/// Combines the Red, Blue, and Green provided into a single color value. -pub const fn rgb16(red: u16, green: u16, blue: u16) -> u16 { - blue << 10 | green << 5 | red +/// Performs unsigned divide and remainder, gives None if dividing by 0. +pub fn divrem_u32(numer: u32, denom: u32) -> Option<(u32, u32)> { + // TODO: const this? Requires const if + if denom == 0 { + None + } else { + Some(unsafe { divrem_u32_unchecked(numer, denom) }) + } } -/// BIOS Call: Div (GBA SWI 0x06). +/// Performs divide and remainder, no check for 0 division. /// -/// Gives just the DIV output of `numerator / denominator`. +/// # Safety /// -/// # Panics -/// -/// If `denominator` is 0. -#[inline] -pub fn div(numerator: i32, denominator: i32) -> i32 { - div_modulus(numerator, denominator).0 +/// If you call this with a denominator of 0 the result is implementation +/// defined (not literal UB) including but not limited to: an infinite loop, +/// panic on overflow, or incorrect output. +pub unsafe fn divrem_u32_unchecked(numer: u32, denom: u32) -> (u32, u32) { + // TODO: const this? Requires const if + if (numer >> 5) < denom { + divrem_u32_simple(numer, denom) + } else { + divrem_u32_non_restoring(numer, denom) + } } -/// BIOS Call: Div (GBA SWI 0x06). -/// -/// Gives just the MOD output of `numerator / denominator`. -/// -/// # Panics -/// -/// If `denominator` is 0. -#[inline] -pub fn modulus(numerator: i32, denominator: i32) -> i32 { - div_modulus(numerator, denominator).1 +/// The simplest form of division. If N is too much larger than D this will be +/// extremely slow. If N is close enough to D then it will likely be faster than +/// the non_restoring form. +fn divrem_u32_simple(mut numer: u32, denom: u32) -> (u32, u32) { + // TODO: const this? Requires const if + let mut quot = 0; + while numer >= denom { + numer -= denom; + quot += 1; + } + (quot, numer) } -/// BIOS Call: Div (GBA SWI 0x06). -/// -/// Gives both the DIV and MOD output of `numerator / denominator`. -/// -/// # Panics -/// -/// If `denominator` is 0. -#[inline] -pub fn div_modulus(numerator: i32, denominator: i32) -> (i32, i32) { - assert!(denominator != 0); - #[cfg(not(test))] - { - let div_out: i32; - let mod_out: i32; - unsafe { - asm!(/* assembly template */ "swi 0x06" - :/* output operands */ "={r0}"(div_out), "={r1}"(mod_out) - :/* input operands */ "{r0}"(numerator), "{r1}"(denominator) - :/* clobbers */ "r3" - :/* options */ - ); +/// Takes a fixed quantity of time based on the bit width of the number (in this +/// case 32). +fn divrem_u32_non_restoring(numer: u32, denom: u32) -> (u32, u32) { + // TODO: const this? Requires const if + let mut r: i64 = numer as i64; + let d: i64 = (denom as i64) << 32; + let mut q: u32 = 0; + let mut i = 1 << 31; + while i > 0 { + if r >= 0 { + q |= i; + r = 2 * r - d; + } else { + r = 2 * r + d; } - (div_out, mod_out) + i >>= 1; } - #[cfg(test)] - { - (numerator / denominator, numerator % denominator) + q -= !q; + if r < 0 { + q -= 1; + r += d; + } + r >>= 32; + // TODO: remove this once we've done more checks here. + debug_assert!(r >= 0); + debug_assert!(r <= core::u32::MAX as i64); + (q, r as u32) +} + +/// Performs signed divide and remainder, gives None if dividing by 0 or +/// computing `MIN/-1` +pub fn divrem_i32(numer: i32, denom: i32) -> Option<(i32, i32)> { + if denom == 0 || (numer == core::i32::MIN && denom == -1) { + None + } else { + Some(unsafe { divrem_i32_unchecked(numer, denom) }) } } + +/// Performs signed divide and remainder, no check for 0 division or `MIN/-1`. +/// +/// # Safety +/// +/// * If you call this with a denominator of 0 the result is implementation +/// defined (not literal UB) including but not limited to: an infinite loop, +/// panic on overflow, or incorrect output. +/// * If you call this with `MIN/-1` you'll get a panic in debug or just `MIN` +/// in release (which is incorrect), because of how twos-compliment works. +pub unsafe fn divrem_i32_unchecked(numer: i32, denom: i32) -> (i32, i32) { + // TODO: const this? Requires const if + let unsigned_numer = numer.abs() as u32; + let unsigned_denom = denom.abs() as u32; + let opposite_sign = (numer ^ denom) < 0; + let (udiv, urem) = if (numer >> 5) < denom { + divrem_u32_simple(unsigned_numer, unsigned_denom) + } else { + divrem_u32_non_restoring(unsigned_numer, unsigned_denom) + }; + match (opposite_sign, numer < 0) { + (true, true) => (-(udiv as i32), -(urem as i32)), + (true, false) => (-(udiv as i32), urem as i32), + (false, true) => (udiv as i32, -(urem as i32)), + (false, false) => (udiv as i32, urem as i32), + } +} + +/* +#[cfg(test)] +mod tests { + use super::*; + use quickcheck::quickcheck; + + // We have an explicit property on the non_restoring division + quickcheck! { + fn divrem_u32_non_restoring_prop(num: u32, denom: u32) -> bool { + if denom > 0 { + divrem_u32_non_restoring(num, denom) == (num / denom, num % denom) + } else { + true + } + } + } + + // We have an explicit property on the simple division + quickcheck! { + fn divrem_u32_simple_prop(num: u32, denom: u32) -> bool { + if denom > 0 { + divrem_u32_simple(num, denom) == (num / denom, num % denom) + } else { + true + } + } + } + + // Test the u32 wrapper + quickcheck! { + fn divrem_u32_prop(num: u32, denom: u32) -> bool { + if denom > 0 { + divrem_u32(num, denom).unwrap() == (num / denom, num % denom) + } else { + divrem_u32(num, denom).is_none() + } + } + } + + // test the i32 wrapper + quickcheck! { + fn divrem_i32_prop(num: i32, denom: i32) -> bool { + if denom == 0 || num == core::i32::MIN && denom == -1 { + divrem_i32(num, denom).is_none() + } else { + divrem_i32(num, denom).unwrap() == (num / denom, num % denom) + } + } + } +} +*/ diff --git a/src/macros.rs b/src/macros.rs deleted file mode 100644 index 3a38290..0000000 --- a/src/macros.rs +++ /dev/null @@ -1,8 +0,0 @@ -//! Module for all macros. -//! -//! Macros are the only thing in Rust where declaration order matters, so we -//! place all of them here regardless of what they do so that the macros module -//! can appear at the "top" of the library and all other modules can see them -//! properly. - -// no macros yet! diff --git a/src/video_ram.rs b/src/video_ram.rs index 6cb223a..232249f 100644 --- a/src/video_ram.rs +++ b/src/video_ram.rs @@ -15,6 +15,8 @@ pub use super::*; +// TODO: kill all this too + /// The physical width in pixels of the GBA screen. pub const SCREEN_WIDTH: isize = 240; @@ -28,6 +30,8 @@ pub const SCREEN_HEIGHT: isize = 160; /// value as just being a `usize`. pub const VRAM_BASE_ADDRESS: usize = 0x0600_0000; +const MODE3_VRAM: VolAddress = unsafe { VolAddress::new_unchecked(VRAM_BASE_ADDRESS) }; + /// Draws a pixel to the screen while in Display Mode 3, with bounds checks. /// /// # Panics @@ -51,7 +55,7 @@ pub fn mode3_draw_pixel(col: isize, row: isize, color: u16) { /// * `col` must be in `0..SCREEN_WIDTH` /// * `row` must be in `0..SCREEN_HEIGHT` pub unsafe fn mode3_draw_pixel_unchecked(col: isize, row: isize, color: u16) { - VolatilePtr(VRAM_BASE_ADDRESS as *mut u16).offset(col + row * SCREEN_WIDTH).write(color); + MODE3_VRAM.offset(col + row * SCREEN_WIDTH).write(color); } /// Reads the given pixel of video memory according to Mode 3 placement. @@ -61,7 +65,7 @@ pub unsafe fn mode3_draw_pixel_unchecked(col: isize, row: isize, color: u16) { /// If the location is out of bounds you get `None`. pub fn mode3_read_pixel(col: isize, row: isize) -> Option { if col >= 0 && col < SCREEN_WIDTH && row >= 0 && row < SCREEN_HEIGHT { - unsafe { Some(VolatilePtr(VRAM_BASE_ADDRESS as *mut u16).offset(col + row * SCREEN_WIDTH).read()) } + unsafe { Some(MODE3_VRAM.offset(col + row * SCREEN_WIDTH).read()) } } else { None } @@ -72,9 +76,8 @@ pub unsafe fn mode3_clear_screen(color: u16) { // TODO: use DMA? let color = color as u32; let bulk_color = color << 16 | color; - let mut ptr = VolatilePtr(VRAM_BASE_ADDRESS as *mut u32); - for _ in 0..(SCREEN_HEIGHT * SCREEN_WIDTH / 2) { - ptr.write(bulk_color); - ptr = ptr.offset(1); + let block: VolAddressBlock = VolAddressBlock::new_unchecked(MODE3_VRAM.cast::(), (SCREEN_HEIGHT * SCREEN_WIDTH / 2) as usize); + for b in block.iter() { + b.write(bulk_color); } } diff --git a/todo_check.bat b/todo_check.bat new file mode 100644 index 0000000..6f1e3ea --- /dev/null +++ b/todo_check.bat @@ -0,0 +1,12 @@ +@echo off + +echo ------- +echo ------- + +set Wildcard=*.rs + +echo TODOS FOUND: +findstr -s -n -i -l "TODO" %Wildcard% + +echo ------- +echo -------