From 8efef6ebc5e8d584deb499c990d0e896fa4247e6 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Mon, 5 Apr 2021 18:11:42 -0600 Subject: [PATCH] No more old book stuff (#123) * stop with the book, we should focus on the crate. * Update README.md * Update README.md --- Cargo.toml | 2 +- README.md | 59 +- book/book.toml | 7 - book/src-bak/00-concepts-index.md | 38 - book/src-bak/00-introduction-index.md | 21 - book/src-bak/00-non-video-index.md | 21 - book/src-bak/00-quirks-index.md | 9 - book/src-bak/00-video-index.md | 9 - book/src-bak/01-buttons.md | 102 -- book/src-bak/01-cpu.md | 1 - book/src-bak/01-no_std.md | 160 --- book/src-bak/01-requirements.md | 29 - book/src-bak/01-rgb15.md | 1 - book/src-bak/02-bios.md | 239 ---- book/src-bak/02-fixed_only.md | 548 --------- book/src-bak/02-goals_and_style.md | 23 - book/src-bak/02-timers.md | 1 - book/src-bak/03-dma.md | 133 --- book/src-bak/03-volatile_destination.md | 317 ----- book/src-bak/03-wram.md | 28 - book/src-bak/04-io-registers.md | 3 - book/src-bak/04-newtype.md | 206 ---- book/src-bak/04-sound.md | 1 - book/src-bak/05-const_asserts.md | 130 -- book/src-bak/05-help_and_resources.md | 78 -- book/src-bak/05-interrupts.md | 1 - book/src-bak/05-palram.md | 50 - book/src-bak/06-link_cable.md | 1 - book/src-bak/06-vram.md | 24 - book/src-bak/07-game_pak.md | 1 - book/src-bak/07-oam.md | 62 - book/src-bak/08-rom.md | 14 - book/src-bak/09-sram.md | 21 - book/src-bak/gba_prng.md | 1119 ------------------ book/src-bak/index.md | 52 - book/src-bak/io_registers.md | 33 - book/src-bak/light_cycle.md | 135 --- book/src-bak/memory_game.md | 316 ----- book/src-bak/obj_memory_2d1d.jpg | Bin 150277 -> 0 bytes book/src-bak/regular_backgrounds.md | 313 ----- book/src-bak/regular_objects.md | 417 ------- book/src-bak/screenshot_checkers.png | Bin 5546 -> 0 bytes book/src-bak/the_display_control_register.md | 109 -- book/src-bak/the_key_input_register.md | 213 ---- book/src-bak/the_vcount_register.md | 71 -- book/src-bak/tile_data.md | 130 -- book/src-bak/video_memory_intro.md | 113 -- book/src/SUMMARY.md | 9 - book/src/bitmap-video.md | 214 ---- book/src/development-setup.md | 189 --- book/src/gba-asm.md | 123 -- book/src/io-registers.md | 237 ---- book/src/the-hardware-memory-map.md | 379 ------ book/src/volatile.md | 48 - 54 files changed, 32 insertions(+), 6528 deletions(-) delete mode 100644 book/book.toml delete mode 100644 book/src-bak/00-concepts-index.md delete mode 100644 book/src-bak/00-introduction-index.md delete mode 100644 book/src-bak/00-non-video-index.md delete mode 100644 book/src-bak/00-quirks-index.md delete mode 100644 book/src-bak/00-video-index.md delete mode 100644 book/src-bak/01-buttons.md delete mode 100644 book/src-bak/01-cpu.md delete mode 100644 book/src-bak/01-no_std.md delete mode 100644 book/src-bak/01-requirements.md delete mode 100644 book/src-bak/01-rgb15.md delete mode 100644 book/src-bak/02-bios.md delete mode 100644 book/src-bak/02-fixed_only.md delete mode 100644 book/src-bak/02-goals_and_style.md delete mode 100644 book/src-bak/02-timers.md delete mode 100644 book/src-bak/03-dma.md delete mode 100644 book/src-bak/03-volatile_destination.md delete mode 100644 book/src-bak/03-wram.md delete mode 100644 book/src-bak/04-io-registers.md delete mode 100644 book/src-bak/04-newtype.md delete mode 100644 book/src-bak/04-sound.md delete mode 100644 book/src-bak/05-const_asserts.md delete mode 100644 book/src-bak/05-help_and_resources.md delete mode 100644 book/src-bak/05-interrupts.md delete mode 100644 book/src-bak/05-palram.md delete mode 100644 book/src-bak/06-link_cable.md delete mode 100644 book/src-bak/06-vram.md delete mode 100644 book/src-bak/07-game_pak.md delete mode 100644 book/src-bak/07-oam.md delete mode 100644 book/src-bak/08-rom.md delete mode 100644 book/src-bak/09-sram.md delete mode 100644 book/src-bak/gba_prng.md delete mode 100644 book/src-bak/index.md delete mode 100644 book/src-bak/io_registers.md delete mode 100644 book/src-bak/light_cycle.md delete mode 100644 book/src-bak/memory_game.md delete mode 100644 book/src-bak/obj_memory_2d1d.jpg delete mode 100644 book/src-bak/regular_backgrounds.md delete mode 100644 book/src-bak/regular_objects.md delete mode 100644 book/src-bak/screenshot_checkers.png delete mode 100644 book/src-bak/the_display_control_register.md delete mode 100644 book/src-bak/the_key_input_register.md delete mode 100644 book/src-bak/the_vcount_register.md delete mode 100644 book/src-bak/tile_data.md delete mode 100644 book/src-bak/video_memory_intro.md delete mode 100644 book/src/SUMMARY.md delete mode 100644 book/src/bitmap-video.md delete mode 100644 book/src/development-setup.md delete mode 100644 book/src/gba-asm.md delete mode 100644 book/src/io-registers.md delete mode 100644 book/src/the-hardware-memory-map.md delete mode 100644 book/src/volatile.md diff --git a/Cargo.toml b/Cargo.toml index 51d7ac0..419badd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "gba" -description = "A crate (and book) for making GBA games with Rust." +description = "A crate for making GBA games with Rust." version = "0.4.0-pre1" authors = ["Lokathor ", "Thomas Winwood "] repository = "https://github.com/rust-console/gba" diff --git a/README.md b/README.md index 7c109e0..b385ae5 100644 --- a/README.md +++ b/README.md @@ -11,43 +11,45 @@ # gba -_Eventually_ there will be a full [Tutorial -Book](https://rust-console.github.io/gba/) that goes along with this crate. -However, currently the development focus is leaning towards having minimal -coverage of all the parts of the GBA. Until that's done, unfortunately the book -will be in a rather messy state. +A crate to make GBA programming easy. -## What's Missing +Currently we don't have as much documentation as we'd like. +If you check out the [awesome-gbadev](https://github.com/gbdev/awesome-gbadev) repository they have many resources, though most are oriented towards C. -The following major GBA features are still missing from the crate: +## First Time Setup -* Affine Graphics -* Interrupt Handling -* Serial Communication +Building for the GBA requires Nightly rust, and also uses the `build-std` feature, so you'll need the rust source available. -## Build Dependencies - -Install required cargo packages ```sh rustup install nightly rustup +nightly component add rust-src +``` + +You'll also need the ARM binutils so that you can have the assembler and linker for the ARMv4T architecture. +The way to get them varies by platform: +* Ubuntu and other debian-like linux distros will usually have them in the package manager. + ```shell + sudo apt-get install binutils-arm-none-eabi + ``` +* With OSX you can get them via homebrew. + ```shell + brew install --cask gcc-arm-embedded + ``` +* On Windows you can get the installer from ARM's website and run that. + * Download the [GNU Arm Embedded Toolchain](https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-rm/downloads) + * When installing the toolchain, make sure to select "Add path to environment variable" during install. + * You'll have to restart any open command prompts after you so run the installer so that they see the new PATH value. + +Finally, rustc itself is only able to make ELF format files. These can be run in emulators, but aren't able to be played on actual hardware. +You'll need to convert the ELF file into a GBA rom. There's a `cargo-make` file in this repository to do this, and it relies on a tool called `gbafix` +to assign the right header data to the ROM when packing it. + +```sh cargo install cargo-make cargo install gbafix ``` -Install arm build tools -* Ubuntu - ```shell - sudo apt-get install binutils-arm-none-eabi - ``` -* OSX - ```shell - brew install --cask gcc-arm-embedded - ``` -* Windows - * Download the [GNU Arm Embedded Toolchain](https://developer.arm.com/tools-and-software/open-source-software/developer-tools/gnu-toolchain/gnu-rm/downloads) - * Install the toolchain, make sure to select "Add path to environment variable" during install - + # Contribution -This crate is Apache2 licensed and any contributions you submit must also be -Apache2 licensed. +This crate is tri-licensed under Zlib / Apache-2.0 / MIT. +Any contributions you submit must be licensed the same. diff --git a/book/book.toml b/book/book.toml deleted file mode 100644 index 69085ec..0000000 --- a/book/book.toml +++ /dev/null @@ -1,7 +0,0 @@ -[book] -title = "Rust GBA Guide" -authors = ["Lokathor"] - -[build] -build-dir = "../target/book-output" -create-missing = true diff --git a/book/src-bak/00-concepts-index.md b/book/src-bak/00-concepts-index.md deleted file mode 100644 index 10fe20c..0000000 --- a/book/src-bak/00-concepts-index.md +++ /dev/null @@ -1,38 +0,0 @@ -# Broad Concepts - -The GameBoy Advance sits in a middle place between the chthonic game consoles of -the ancient past and the "small PC in a funny case" consoles of the modern age. - -On the one hand, yeah, you're gonna find a few strange conventions as you learn -all the ropes. - -On the other, at least we're writing in Rust at all, and not having to do all -the assembly by hand. - -This chapter for "concepts" has a section for each part of the GBA's hardware -memory map, going by increasing order of base address value. The sections try to -explain as much as possible while sticking to just the concerns you might have -regarding that part of the memory map. - -For an assessment of how to wrangle all three parts of the video system (PALRAM, -VRAM, and OAM), along with the correct IO registers, into something that shows a -picture, you'll want the Video chapter. - -Similarly, the "IO Registers" part of the GBA actually controls how you interact -with every single bit of hardware connected to the GBA. A full description of -everything is obviously too much for just one section of the book. Instead you -get an overview of general IO register rules and advice. Each particular -register is described in the appropriate sections of either the Video or -Non-Video chapters. - -## Bus Size - -TODO: describe this - -## Minimum Write Size - -TODO: talk about parts where you can't write one byte at a time - -## Volatile or Not? - -TODO: discuss what memory should be used volatile style and what can be used normal style. \ No newline at end of file diff --git a/book/src-bak/00-introduction-index.md b/book/src-bak/00-introduction-index.md deleted file mode 100644 index 4a75dba..0000000 --- a/book/src-bak/00-introduction-index.md +++ /dev/null @@ -1,21 +0,0 @@ -# Introduction - -This is the book for learning how to write GameBoy Advance (GBA) games in Rust. - -I'm **Lokathor**, the main author of the book. There's also **Ketsuban** who -provides the technical advisement, reviews the PRs, and keeps my crazy in check. - -The book is a work in progress, as you can see if you actually try to open many -of the pages listed in the Table Of Contents. - -## Feedback - -It's very often hard to tell when you've explained something properly. In the -same way that your brain will read over small misspellings and correct things -into the right word, if an explanation for something you already understand -accidentally skips over some small detail then your brain can fill in the gaps -without you realizing it. - -**Please**, if things don't make sense then [file an -issue](https://github.com/rust-console/gba/issues) about it so I know where -things need to improve. diff --git a/book/src-bak/00-non-video-index.md b/book/src-bak/00-non-video-index.md deleted file mode 100644 index aff4a81..0000000 --- a/book/src-bak/00-non-video-index.md +++ /dev/null @@ -1,21 +0,0 @@ -# Non-Video - -Besides video effects the GBA still has an okay amount of stuff going on. - -Obviously you'll want to know how to read the user's button inputs. That can -almost go without saying, except that I said it. - -Each other part can be handled in about any order you like. - -Using interrupts is perhaps one of the hardest things for us as Rust programmers -due to quirks in our compilation process. Our code all gets compiled to 16-bit -THUMB instructions, and we don't have a way to mark a function to be compiled -using 32-bit ASM instructions instead. However, an interrupt handler _must_ be -written in 32-bit ASM instructions for it to work. That means that we have to -write our interrupt handler in 32-bit ASM by hand. We'll do it, but I don't -think we'll be too happy about it. - -The Link Cable related stuff is also probably a little harder to test than -anything else. Just because link cable emulation isn't always the best, and or -you need two GBAs with two flash carts and the cable for hardware testing. -Still, we'll try to go over it eventually. diff --git a/book/src-bak/00-quirks-index.md b/book/src-bak/00-quirks-index.md deleted file mode 100644 index 8df76e7..0000000 --- a/book/src-bak/00-quirks-index.md +++ /dev/null @@ -1,9 +0,0 @@ -# Quirks - -The GBA supports a lot of totally normal Rust code exactly like you'd think. - -However, it also is missing a lot of what you might expect, and sometimes we -have to do things in slightly weird ways. - -We start the book by covering the quirks our code will have, just to avoid too -many surprises later. diff --git a/book/src-bak/00-video-index.md b/book/src-bak/00-video-index.md deleted file mode 100644 index 288c3ca..0000000 --- a/book/src-bak/00-video-index.md +++ /dev/null @@ -1,9 +0,0 @@ -# Video - -GBA Video starts with an IO register called the "Display Control Register", and -then spirals out from there. You generally have to use Palette RAM (PALRAM), -Video RAM (VRAM), Object Attribute Memory (OAM), as well as any number of other -IO registers. - -They all have to work together just right, and there's a lot going on when you -first try doing it, so try to take it very slowly as you're learning each step. diff --git a/book/src-bak/01-buttons.md b/book/src-bak/01-buttons.md deleted file mode 100644 index 586f6c3..0000000 --- a/book/src-bak/01-buttons.md +++ /dev/null @@ -1,102 +0,0 @@ -# Buttons - -It's all well and good to just show a picture, even to show an animation, but if -we want a game we have to let the user interact with something. - -## Key Input Register - -* KEYINPUT, `0x400_0130`, `u16`, read only - -This little `u16` stores the status of _all_ the buttons on the GBA, all at -once. There's only 10 of them, and we have 16 bits to work with, so that sounds -easy. However, there's a bit of a catch. The register follows a "low-active" -convention, where pressing a button _clears_ that bit until it's released. - -```rust -const NO_BUTTONS_PRESSED: u16 = 0b0000_0011_1111_1111; -``` - -The buttons are, going up in order from the 0th bit: - -* A -* B -* Select -* Start -* Right -* Left -* Up -* Down -* R -* L - -Bits above that are not used. However, since the left and right directions, as -well as the up and down directions, can never be pressed at the same time, the -`KEYINPUT` register should never read as zero. Of course, the register _might_ -read as zero if someone is using an emulator that allows for such inputs, so I -wouldn't go so far as to make it be `NonZeroU16` or anything like that. - -When programming, we usually are thinking of what buttons we want to have _be -pressed_ instead of buttons we want to have _not be pressed_. This means that we -need an inversion to happen somewhere along the line. The easiest moment of -inversion is immediately as you read in from the register and wrap the value up -in a newtype. - -```rust -pub fn read_key_input() -> KeyInput { - KeyInput(KEYINPUT.read() ^ 0b0000_0011_1111_1111) -} -``` - -Now the KeyInput you get can be checked for what buttons are pressed by checking -for a set bit like you'd do anywhere else. - -```rust -impl KeyInput { - pub fn a_pressed(self) -> bool { - (self.0 & A_BIT) > 0 - } -} -``` - -Note that the current `KEYINPUT` value changes in real time as the user presses -or releases the buttons. To account for this, it's best to read the value just -once per game frame and then use that single value as if it was the input across -the whole frame. If you've worked with polling input before that should sound -totally normal. If not, just remember to call `read_key_input` once per frame -and then use that `KeyInput` value across the whole frame. - -### Detecting New Presses - -The keypad only tells you what's _currently_ pressed, but if you want to check -what's _newly_ pressed it's not too much harder. - -All that you do is store the last frame's keys and compare them to the current -keys with an `XOR`. In the `gba` crate it's called `KeyInput::difference`. Once -you've got the difference between last frame and this frame, you know what -changes happened. - -* If something is in the difference and _not pressed_ in the last frame, that - means it was newly pressed. -* If something is in the difference and _pressed_ in the last frame that means - it was newly released. -* If something is not in the difference then there's no change between last - frame and this frame. - -## Key Interrupt Control - -* KEYCNT, `0x400_0132`, `u16`, read/write - -This lets you control what keys will trigger a keypad interrupt. Of course, for -the actual interrupt to fire you also need to set the `IME` and `IE` registers -properly. See the [Interrupts](05-interrupts.md) section for details there. - -The main thing to know about this register is that the keys are in _the exact -same order_ as the key input order. However, with this register they use a -high-active convention instead (eg: the bit is active when the button should be -pressed as part of the interrupt). - -In addition to simply having the bits for the buttons, bit 14 is a flag for -enabling keypad interrupts (in addition to the flag in the `IE` register), and -bit 15 decides how having more than one button works. If bit 15 is disabled, -it's an OR combination (eg: "press any key to continue"). If bit 15 is enabled -it's an AND combination (eg: "press A+B+Start+Select to reset"). diff --git a/book/src-bak/01-cpu.md b/book/src-bak/01-cpu.md deleted file mode 100644 index 894d34b..0000000 --- a/book/src-bak/01-cpu.md +++ /dev/null @@ -1 +0,0 @@ -# CPU diff --git a/book/src-bak/01-no_std.md b/book/src-bak/01-no_std.md deleted file mode 100644 index 2111572..0000000 --- a/book/src-bak/01-no_std.md +++ /dev/null @@ -1,160 +0,0 @@ -# No Std - -First up, as you already saw in the `hello_magic` code, we have to use the -`#![no_std]` outer attribute on our program when we target the GBA. You can find -some info about `no_std` in two official sources: - -* [unstable - book section](https://doc.rust-lang.org/unstable-book/language-features/lang-items.html#writing-an-executable-without-stdlib) -* [embedded - book section](https://rust-embedded.github.io/book/intro/no-std.html?highlight=no_std#a--no_std--rust-environment) - -The unstable book is borderline useless here because it's describing too many -things in too many words. The embedded book is much better, but still fairly -terse. - -## Bare Metal - -The GBA falls under what the Embedded Book calls "Bare Metal Environments". -Basically, the machine powers on and immediately begins executing some ASM code. -Our ASM startup was provided by `Ketsuban` (check the `crt0.s` file). We'll go -over _how_ it works much later on, for now it's enough to know that it does -work, and eventually control passes into Rust code. - -On the rust code side of things, we determine our starting point with the -`#[start]` attribute on our `main` function. The `main` function also has a -specific type signature that's different from the usual `main` that you'd see in -Rust. I'd tell you to read the unstable-book entry on `#[start]` but they -[literally](https://doc.rust-lang.org/unstable-book/language-features/start.html) -just tell you to look at the [tracking issue for -it](https://github.com/rust-lang/rust/issues/29633) instead, and that's not very -helpful either. Basically it just _has_ to be declared the way it is, even -though there's nothing passing in the arguments and there's no place that the -return value will go. The compiler won't accept it any other way. - -## No Standard Library - -The Embedded Book tells us that we can't use the standard library, but we get -access to something called "libcore", which sounds kinda funny. What they're -talking about is just [the core -crate](https://doc.rust-lang.org/core/index.html), which is called `libcore` -within the rust repository for historical reasons. - -The `core` crate is actually still a really big portion of Rust. The standard -library doesn't actually hold too much code (relatively speaking), instead it -just takes code form other crates and then re-exports it in an organized way. So -with just `core` instead of `std`, what are we missing? - -In no particular order: - -* Allocation -* Clock -* Network -* File System - -The allocation system and all the types that you can use if you have a global -allocator are neatly packaged up in the -[alloc](https://doc.rust-lang.org/alloc/index.html) crate. The rest isn't as -nicely organized. - -It's _possible_ to implement a fair portion of the entire standard library -within a GBA context and make the rest just panic if you try to use it. However, -do you really need all that? Eh... probably not? - -* We don't need a file system, because all of our data is just sitting there in - the ROM for us to use. When programming we can organize our `const` data into - modules and such to keep it organized, but once the game is compiled it's just - one huge flat address space. TODO: Parasyte says that a FS can be handy even - if it's all just ReadOnly, so we'll eventually talk about how you might set up - such a thing I guess, since we'll already be talking about replacements for - three of the other four things we "lost". Maybe we'll make Parasyte write that - section. -* Networking, well, the GBA has a Link Cable you can use to communicate with - another GBA, but it's not really like a unix socket with TCP, so the standard - Rust networking isn't a very good match. -* Clock is actually two different things at once. One is the ability to store - the time long term, which is a bit of hardware that some gamepaks have in them - (eg: pokemon ruby/sapphire/emerald). The GBA itself can't keep time while - power is off. However, the second part is just tracking time moment to moment, - which the GBA can totally do. We'll see how to access the timers soon enough. - -Which just leaves us with allocation. Do we need an allocator? Depends on your -game. For demos and small games you probably don't need one. For bigger games -you'll maybe want to get an allocator going eventually. It's in some sense a -crutch, but it's a very useful one. - -So I promise that at some point we'll cover how to get an allocator going. -Either a Rust Global Allocator (if practical), which would allow for a lot of -the standard library types to be used "for free" once it was set up, or just a -custom allocator that's GBA specific if Rust's global allocator style isn't a -good fit for the GBA (I honestly haven't looked into it). - -## Bare Metal Panic - -If our code panics, we usually want to see that panic message. Unfortunately, -without a way to access something like `stdout` or `stderr` we've gotta do -something a little weirder. - -If our program is running within the `mGBA` emulator, version 0.7 or later, we -can access a special set of addresses that allow us to send out `CString` -values, which then appear within a message log that you can check. - -We can capture this behavior by making an `MGBADebug` type, and then implement -`core::fmt::Write` for that type. Once done, the `write!` macro will let us -target the mGBA debug output channel. - -When used, it looks like this: - -```rust -#[panic_handler] -fn panic(info: &core::panic::PanicInfo) -> ! { - use core::fmt::Write; - use gba::mgba::{MGBADebug, MGBADebugLevel}; - - if let Some(mut mgba) = MGBADebug::new() { - let _ = write!(mgba, "{}", info); - mgba.send(MGBADebugLevel::Fatal); - } - loop {} -} -``` - -If you want to follow the particulars you can check the `MGBADebug` source in -the `gba` crate. Basically, there's one address you can use to try and activate -the debug output, and if it works you write your message into the "array" at -another address, and then finally write a send value to a third address. You'll -need to have read the [volatile](03-volatile_destination.md) section for the -details to make sense. - -## LLVM Intrinsics - -The above code will make your program fail to build in debug mode, saying that -`__clzsi2` can't be found. This is a special builtin function that LLVM attempts -to use when there's no hardware version of an operation it wants to do (in this -case, counting the leading zeros). It's not _actually_ necessary in this case, -which is why you only need it in debug mode. The higher optimization level of -release mode makes LLVM pre-compute more and fold more constants or whatever and -then it stops trying to call `__clzsi2`. - -Unfortunately, sometimes a build will fail with a missing intrinsic even in -release mode. - -If LLVM wants _core_ to have that intrinsic then you're in -trouble, you'll have to send a PR to the -[compiler-builtins](https://github.com/rust-lang-nursery/compiler-builtins) -repository and hope to get it into rust itself. - -If LLVM wants _your code_ to have the intrinsic then you're in less trouble. You -can look up the details and then implement it yourself. It can go anywhere in -your program, as long as it has the right ABI and name. In the case of -`__clzsi2` it takes a `usize` and returns a `usize`, so you'd write something -like: - -```rust -#[no_mangle] -pub extern "C" fn __clzsi2(mut x: usize) -> usize { - // -} -``` - -And so on for whatever other missing intrinsic. diff --git a/book/src-bak/01-requirements.md b/book/src-bak/01-requirements.md deleted file mode 100644 index 96bb769..0000000 --- a/book/src-bak/01-requirements.md +++ /dev/null @@ -1,29 +0,0 @@ -# Reader Requirements - -This book naturally assumes that you've already read Rust's core book: - -* [The Rust Programming Language](https://doc.rust-lang.org/book/) - -Now, I _know_ it sounds silly to say "if you wanna program Rust on this old -video game system you should already know how to program Rust", but the more -people I meet and chat with the more they tell me that they jumped into Rust -without reading any or all of the book. You know who you are. - -Please, read the whole book! - -In addition to the core book, there's also an expansion book that I will declare -to be required reading for this: - -* [The Rustonomicon](https://doc.rust-lang.org/nomicon/) - -The Rustonomicon is all about trying to demystify `unsafe`. We'll end up using a -fair bit of unsafe code as a natural consequence of doing direct hardware -manipulations. Using unsafe is like [swinging a -sword](https://www.zeldadungeon.net/wp-content/uploads/2013/04/tumblr_mlkpzij6T81qizbpto1_1280.gif), -you should start slowly, practice carefully, and always pay attention no matter -how experienced you think you've become. - -That said, it's sometimes a [necessary -tool](https://www.youtube.com/watch?v=rTo2u13lVcQ) to get the job done, so you -have to break out of the borderline pathological fear of using it that most rust -programmers tend to have. diff --git a/book/src-bak/01-rgb15.md b/book/src-bak/01-rgb15.md deleted file mode 100644 index adf5784..0000000 --- a/book/src-bak/01-rgb15.md +++ /dev/null @@ -1 +0,0 @@ -# RBG15 Color diff --git a/book/src-bak/02-bios.md b/book/src-bak/02-bios.md deleted file mode 100644 index d76af96..0000000 --- a/book/src-bak/02-bios.md +++ /dev/null @@ -1,239 +0,0 @@ -# BIOS - -* **Address Span:** `0x0` to `0x3FFF` (16k) - -The [BIOS](https://en.wikipedia.org/wiki/BIOS) of the GBA is a small read-only -portion of memory at the very base of the address space. However, it is also -hardware protected against reading, so if you try to read from BIOS memory when -the program counter isn't pointed into the BIOS (eg: any time code _you_ write -is executing) then you get [basically garbage -data](https://problemkaputt.de/gbatek.htm#gbaunpredictablethings) back. - -So we're not going to spend time here talking about what bits to read or write -within BIOS memory like we do with the other sections. Instead we're going to -spend time talking about [inline -assembly](https://doc.rust-lang.org/unstable-book/language-features/asm.html) -([tracking issue](https://github.com/rust-lang/rust/issues/29722)) and then use -it to call the [GBA BIOS -Functions](https://problemkaputt.de/gbatek.htm#biosfunctions). - -Note that BIOS calls have _more overhead than normal function calls_, so don't -go using them all over the place if you don't have to. They're also usually -written more to be compact in terms of code than for raw speed, so you actually -can out speed them in some cases. Between the increased overhead and not being -as speed optimized, you can sometimes do a faster job without calling the BIOS -at all. (TODO: investigate more about what parts of the BIOS we could -potentially offer faster alternatives for.) - -I'd like to take a moment to thank [Marc Brinkmann](https://github.com/mbr) -(with contributions from [Oliver Scherer](https://github.com/oli-obk) and -[Philipp Oppermann](https://github.com/phil-opp)) for writing [this blog -post](http://embed.rs/articles/2016/arm-inline-assembly-rust/). It's at least -ten times the tutorial quality as the `asm` entry in the Unstable Book has. In -fairness to the Unstable Book, the actual spec of how inline ASM works in rust -is "basically what clang does", and that's specified as "basically what GCC -does", and that's basically/shockingly not specified much at all despite GCC -being like 30 years old. - -So let's be slow and pedantic about this process. - -## Inline ASM - -**Fair Warning:** The general information that follows regarding the asm macro -is consistent from system to system, but specific information about register -names, register quantities, asm instruction argument ordering, and so on is -specific to ARM on the GBA. If you're programming for any other device you'll -need to carefully investigate that before you begin. - -Now then, with those out of the way, the inline asm docs describe an asm call as -looking like this: - -```rust -let x = 10u32; -let y = 34u32; -let result: u32; -asm!( - // assembly template - "add {lhs}, {rhs}", - lhs = inout(reg_thumb) x => result, - rhs = in(reg_thumb) y, - options(nostack, nomem), -); -// result == 44 -``` - -The `asm` macro follows the [RFC -2873](https://github.com/Amanieu/rfcs/blob/inline-asm/text/0000-inline-asm.md) -syntax. The following is just a summary of the RFC. - -Now we have to decide what we're gonna write. Obviously we're going to do some -instructions, but those instructions use registers, and how are we gonna talk -about them? We've got two choices. - -1) We can pick each and every register used by specifying exact register names. - In THUMB mode we have 8 registers available, named `r0` through `r7`. To use - those registers you would write `in("r0") x` instead of - `rhs = in(reg_thumb) x`, and directly refer to `r0` in the assembly template. - -2) We can specify slots for registers we need and let LLVM decide. This is what - we do when we write `rhs = in(reg_thumb) y` and use `{rhs}` in the assembly - template. - - The `reg_thumb` stands for the register class we are using. Since we are - in THUMB mode, the set of registers we can use is limited. `reg_thumb` tells - LLVM: "use only registers available in THUMB mode". In 32-bit mode, you have - access to more register and you should use a different register class. - - The register classes [are described in the - RFC](https://github.com/Amanieu/rfcs/blob/inline-asm/text/0000-inline-asm.md#register-operands). - Look for "ARM" register classes. - -In the case of the GBA BIOS, each BIOS function has pre-designated input and -output registers, so we will use the first style. If you use inline ASM in other -parts of your code you're free to use the second style. - -### Assembly - -This is just one big string literal. You write out one instruction per line, and -excess whitespace is ignored. You can also do comments within your assembly -using `;` to start a comment that goes until the end of the line. - -Assembly convention doesn't consider it unreasonable to comment potentially as -much as _every single line_ of asm that you write when you're getting used to -things. Or even if you are used to things. This is cryptic stuff, there's a -reason we avoid writing in it as much as possible. - -Remember that our Rust code is in 16-bit mode. You _can_ switch to 32-bit mode -within your asm as long as you switch back by the time the block ends. Otherwise -you'll have a bad time. - -### Register bindings - -After the assembly string literal, you need to define your binding (which -rust variables are getting into your registers and which ones are going to refer -to their value afterward). - -There are many operand types [as per the -RFC](https://github.com/Amanieu/rfcs/blob/inline-asm/text/0000-inline-asm.md#operand-type), -but you will most often use: - -``` -[alias =] in() // input -[alias =] out() // output -[alias =] inout() => // both -out() _ // Clobber -``` - -* The binding can be any single 32-bit or smaller value. -* If your binding has bit pattern requirements ("must be non-zero", etc) you are - responsible for upholding that. -* If your binding type will try to `Drop` later then you are responsible for it - being in a fit state to do that. -* The binding must be either a mutable binding or a binding that was - pre-declared but not yet assigned. -* An input binding must be a single 32-bit or smaller value. -* An input binding _should_ be a type that is `Copy` but this is not an absolute - requirement. Having the input be read is semantically similar to using - `core::ptr::read(&binding)` and forgetting the value when you're done. - -Anything else is UB. - -### Clobbers - -Sometimes your asm will touch registers other than the ones declared for input -and output. - -Clobbers are declared as a comma separated list of string literals naming -specific registers. You don't use curly braces with clobbers. - -LLVM _needs_ to know this information. It can move things around to keep your -data safe, but only if you tell it what's about to happen. - -Failure to define all of your clobbers can cause UB. - -### Options - -By default the compiler won't optimize the code you wrote in an `asm` block. You -will need to specify with the `options(..)` parameter that your code can be -optimized. The available options [are specified in the -RFC](https://github.com/Amanieu/rfcs/blob/inline-asm/text/0000-inline-asm.md#options-1). - -An optimization might duplicate or remove your instructions from the final -code. - -Typically when executing a BIOS call (such as `swi 0x01`, which resets the -console), it's important that the instruction is executed, and not optimized -away, even though it has no observable input and output to the compiler. - -However some BIOS calls, such as _some_ math functions, have no observable -effects outside of the registers we specified, in this case, we instruct the -compiler to optimize them. - -### BIOS ASM - -* Inputs are always `r0`, `r1`, `r2`, and/or `r3`, depending on function. -* Outputs are always zero or more of `r0`, `r1`, and `r3`. -* Any of the output registers that aren't actually used should be marked as - clobbered. -* All other registers are unaffected. - -All of the GBA BIOS calls are performed using the -[swi](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0068b/BABFCEEG.html) -instruction, combined with a value depending on what BIOS function you're trying -to invoke. If you're in 16-bit code you use the value directly, and if you're in -32-bit mode you shift the value up by 16 bits first. - -### Example BIOS Function: Division - -For our example we'll use the division function, because GBATEK gives very clear -instructions on how each register is used with that one: - -```txt -Signed Division, r0/r1. - r0 signed 32bit Number - r1 signed 32bit Denom -Return: - r0 Number DIV Denom ;signed - r1 Number MOD Denom ;signed - r3 ABS (Number DIV Denom) ;unsigned -For example, incoming -1234, 10 should return -123, -4, +123. -The function usually gets caught in an endless loop upon division by zero. -``` - -The math folks tell me that the `r1` value should be properly called the -"remainder" not the "modulus". We'll go with that for our function, doesn't hurt -to use the correct names. Our Rust function has an assert against dividing by -`0`, then we name some bindings _without_ giving them a value, we make the asm -call, and then return what we got. - -```rust -pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) { - assert!(denominator != 0); - let div_out: i32; - let rem_out: i32; - unsafe { - asm!( - "swi 0x06", - inout("r0") numerator => div_out, - inout("r1") denominator => rem_out, - out("r3") _, - options(nostack, nomem), - ); - } - (div_out, rem_out) -} -``` - -I _hope_ this all makes sense by now. - -## Specific BIOS Functions - -For a full list of all the specific BIOS functions and their use you should -check the `gba::bios` module within the `gba` crate. There's just so many of -them that enumerating them all here wouldn't serve much purpose. - -Which is not to say that we'll never cover any BIOS functions in this book! -Instead, we'll simply mention them when whenever they're relevent to the task at -hand (such as controlling sound or waiting for vblank). - -//TODO: list/name all BIOS functions as well as what they relate to elsewhere. diff --git a/book/src-bak/02-fixed_only.md b/book/src-bak/02-fixed_only.md deleted file mode 100644 index e6ddf0f..0000000 --- a/book/src-bak/02-fixed_only.md +++ /dev/null @@ -1,548 +0,0 @@ -# Fixed Only - -In addition to not having much of the standard library available, we don't even -have a floating point unit available! We can't do floating point math in -hardware! We _could_ still do floating point math as pure software computations -if we wanted, but that's a slow, slow thing to do. - -Are there faster ways? It's the same answer as always: "Yes, but not without a -tradeoff." - -The faster way is to represent fractional values using a system called a [Fixed -Point Representation](https://en.wikipedia.org/wiki/Fixed-point_arithmetic). -What do we trade away? Numeric range. - -* Floating point math stores bits for base value and for exponent all according - to a single [well defined](https://en.wikipedia.org/wiki/IEEE_754) standard - for how such a complicated thing works. -* Fixed point math takes a normal integer (either signed or unsigned) and then - just "mentally associates" it (so to speak) with a fractional value for its - "units". If you have 3 and it's in units of 1/2, then you have 3/2, or 1.5 - using decimal notation. If your number is 256 and it's in units of 1/256th - then the value is 1.0 in decimal notation. - -Floating point math requires dedicated hardware to perform quickly, but it can -"trade" precision when it needs to represent extremely large or small values. - -Fixed point math is just integral math, which our GBA is reasonably good at, but -because your number is associated with a fixed fraction your results can get out -of range very easily. - -## Representing A Fixed Point Value - -So we want to associate our numbers with a mental note of what units they're in: - -* [PhantomData](https://doc.rust-lang.org/core/marker/struct.PhantomData.html) - is a type that tells the compiler "please remember this extra type info" when - you add it as a field to a struct. It goes away at compile time, so it's - perfect for us to use as space for a note to ourselves without causing runtime - overhead. -* The [typenum](https://crates.io/crates/typenum) crate is the best way to - represent a number within a type in Rust. Since our values on the GBA are - always specified as a number of fractional bits to count the number as, we can - put `typenum` types such as `U8` or `U14` into our `PhantomData` to keep track - of what's going on. - -Now, those of you who know me, or perhaps just know my reputation, will of -course _immediately_ question what happened to the real Lokathor. I do not care -for most crates, and I particularly don't care for using a crate in teaching -situations. However, `typenum` has a number of factors on its side that let me -suggest it in this situation: - -* It's version 1.10 with a total of 21 versions and nearly 700k downloads, so we - can expect that the major troubles have been shaken out and that it will remain - fairly stable for quite some time to come. -* It has no further dependencies that it's going to drag into the compilation. -* It happens all at compile time, so it's not clogging up our actual game with - any nonsense. -* The (interesting) subject of "how do you do math inside Rust's trait system?" is - totally separate from the concern that we're trying to focus on here. - -Therefore, we will consider it acceptable to use this crate. - -Now the `typenum` crate defines a whole lot, but we'll focus down to just a -single type at the moment: -[UInt](https://docs.rs/typenum/1.10.0/typenum/uint/struct.UInt.html) is a -type-level unsigned value. It's like `u8` or `u16`, but while they're types that -then have values, each `UInt` construction statically equates to a specific -value. Like how the `()` type only has one value, which is also called `()`. In -this case, you wrap up `UInt` around smaller `UInt` values and a `B1` or `B0` -value to build up the binary number that you want at the type level. - -In other words, instead of writing - -```rust -let six = 0b110; -``` - -We write - -```rust -type U6 = UInt, B1>, B0>; -``` - -Wild, I know. If you look into the `typenum` crate you can do math and stuff -with these type level numbers, and we will a little bit below, but to start off -we _just_ need to store one in some `PhantomData`. - -### A struct For Fixed Point - -Our actual type for a fixed point value looks like this: - -```rust -use core::marker::PhantomData; -use typenum::marker_traits::Unsigned; - -/// Fixed point `T` value with `F` fractional bits. -#[derive(Debug, Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)] -#[repr(transparent)] -pub struct Fx { - bits: T, - _phantom: PhantomData, -} -``` - -This says that `Fx` is a generic type that holds some base number type `T` -and a `F` type that's marking off how many fractional bits we're using. We only -want people giving unsigned type-level values for the `PhantomData` type, so we -use the trait bound `F: Unsigned`. - -We use -[repr(transparent)](https://github.com/rust-lang/rfcs/blob/master/text/1758-repr-transparent.md) -here to ensure that `Fx` will always be treated just like the base type in the -final program (in terms of bit pattern and ABI). - -If you go and check, this is _basically_ how the existing general purpose crates -for fixed point math represent their numbers. They're a little fancier about it -because they have to cover every case, and we only have to cover our GBA case. - -That's quite a bit to type though. We probably want to make a few type aliases -for things to be easier to look at. Unfortunately there's [no standard -notation](https://en.wikipedia.org/wiki/Fixed-point_arithmetic#Notation) for how -you write a fixed point type. We also have to limit ourselves to what's valid -for use in a Rust type too. I like the `fx` thing, so we'll use that for signed -and then `fxu` if we need an unsigned value. - -```rust -/// Alias for an `i16` fixed point value with 8 fractional bits. -pub type fx8_8 = Fx; -``` - -Rust will complain about having `non_camel_case_types`, and you can shut that -warning up by putting an `#[allow(non_camel_case_types)]` attribute on the type -alias directly, or you can use `#![allow(non_camel_case_types)]` at the very top -of the module to shut up that warning for the whole module (which is what I -did). - -## Constructing A Fixed Point Value - -So how do we actually _make_ one of these values? Well, we can always just wrap or unwrap any value in our `Fx` type: - -```rust -impl Fx { - /// Uses the provided value directly. - pub fn from_raw(r: T) -> Self { - Fx { - num: r, - phantom: PhantomData, - } - } - /// Unwraps the inner value. - pub fn into_raw(self) -> T { - self.num - } -} -``` - -I'd like to use the `From` trait of course, but it was giving me some trouble, i -think because of the orphan rule. Oh well. - -If we want to be particular to the fact that these are supposed to be -_numbers_... that gets tricky. Rust is actually quite bad at being generic about -number types. You can use the [num](https://crates.io/crates/num) crate, or you -can just use a macro and invoke it once per type. Guess what we're gonna do. - -```rust -macro_rules! fixed_point_methods { - ($t:ident) => { - impl Fx<$t, F> { - /// Gives the smallest positive non-zero value. - pub fn precision() -> Self { - Fx { - num: 1, - phantom: PhantomData, - } - } - - /// Makes a value with the integer part shifted into place. - pub fn from_int_part(i: $t) -> Self { - Fx { - num: i << F::U8, - phantom: PhantomData, - } - } - } - }; -} - -fixed_point_methods! {u8} -fixed_point_methods! {i8} -fixed_point_methods! {i16} -fixed_point_methods! {u16} -fixed_point_methods! {i32} -fixed_point_methods! {u32} -``` - -Now _you'd think_ that those can be `const`, but at the moment you can't have a -`const` function with a bound on any trait other than `Sized`, so they have to -be normal functions. - -Also, we're doing something a little interesting there with `from_int_part`. We -can take our `F` type and get its constant value. There's other associated -constants if we want it in other types, and also non-const methods if you wanted -that for some reason (maybe passing it as a closure function? dunno). - -## Casting Base Values - -Next, once we have a value in one base type we will need to be able to move it -into another base type. Unfortunately this means we gotta use the `as` operator, -which requires a concrete source type and a concrete destination type. There's -no easy way for us to make it generic here. - -We could let the user use `into_raw`, cast, and then do `from_raw`, but that's -error prone because they might change the fractional bit count accidentally. -This means that we have to write a function that does the casting while -perfectly preserving the fractional bit quantity. If we wrote one function for -each conversion it'd be like 30 different possible casts (6 base types that we -support, and then 5 possible target types). Instead, we'll write it just once in -a way that takes a closure, and let the user pass a closure that does the cast. -The compiler should merge it all together quite nicely for us once optimizations -kick in. - -This code goes outside the macro. I want to avoid too much code in the macro if -we can, it's a little easier to cope with I think. - -```rust - /// Casts the base type, keeping the fractional bit quantity the same. - pub fn cast_inner Z>(self, op: C) -> Fx { - Fx { - num: op(self.num), - phantom: PhantomData, - } - } -``` - -It's horrible and ugly, but Rust is just bad at numbers sometimes. - -## Adjusting Fractional Part - -In addition to the base value we might want to change our fractional bit -quantity. This is actually easier that it sounds, but it also requires us to be -tricky with the generics. We can actually use some typenum type level operators -here. - -This code goes inside the macro: we need to be able to use the left shift and -right shift, which is easiest when we just use the macro's `$t` as our type. We -could alternately put a similar function outside the macro and be generic on `T` -having the left and right shift operators by using a `where` clause. As much as -I'd like to avoid too much code being generated by macro, I'd _even more_ like -to avoid generic code with huge and complicated trait bounds. It comes down to -style, and you gotta decide for yourself. - -```rust - /// Changes the fractional bit quantity, keeping the base type the same. - pub fn adjust_fractional_bits>(self) -> Fx<$t, Y> { - let leftward_movement: i32 = Y::to_i32() - F::to_i32(); - Fx { - num: if leftward_movement > 0 { - self.num << leftward_movement - } else { - self.num >> (-leftward_movement) - }, - phantom: PhantomData, - } - } -``` - -There's a few things at work. First, we introduce `Y` as the target number of -fractional bits, and we _also_ limit it that the target bits quantity can't be -the same as we already have using a type-level operator. If it's the same as we -started with, why are you doing the cast at all? - -Now, once we're sure that the current bits and target bits aren't the same, we -compute `target - start`, and call this our "leftward movement". Example: if -we're targeting 8 bits and we're at 4 bits, we do 8-4 and get +4 as our leftward -movement. If the leftward_movement is positive we naturally shift our current -value to the left. If it's not positive then it _must_ be negative because we -eliminated 0 as a possibility using the type-level operator, so we shift to the -right by the negative value. - -## Addition, Subtraction, Shifting, Negative, Comparisons - -From here on we're getting help from [this blog -post](https://spin.atomicobject.com/2012/03/15/simple-fixed-point-math/) by [Job -Vranish](https://spin.atomicobject.com/author/vranish/), so thank them if you -learn something. - -I might have given away the game a bit with those `derive` traits on our fixed -point type. For a fair number of operations you can use the normal form of the -op on the inner bits as long as the fractional parts have the same quantity. -This includes equality and ordering (which we derived) as well as addition, -subtraction, and bit shifting (which we need to do ourselves). - -This code can go outside the macro, with sufficient trait bounds. - -```rust -impl, F: Unsigned> Add for Fx { - type Output = Self; - fn add(self, rhs: Fx) -> Self::Output { - Fx { - num: self.num + rhs.num, - phantom: PhantomData, - } - } -} -``` - -The bound on `T` makes it so that `Fx` can be added any time that `T` can -be added to its own type with itself as the output. We can use the exact same -pattern for `Sub`, `Shl`, `Shr`, and `Neg`. With enough trait bounds, we can do -anything! - -```rust -impl, F: Unsigned> Sub for Fx { - type Output = Self; - fn sub(self, rhs: Fx) -> Self::Output { - Fx { - num: self.num - rhs.num, - phantom: PhantomData, - } - } -} - -impl, F: Unsigned> Shl for Fx { - type Output = Self; - fn shl(self, rhs: u32) -> Self::Output { - Fx { - num: self.num << rhs, - phantom: PhantomData, - } - } -} - -impl, F: Unsigned> Shr for Fx { - type Output = Self; - fn shr(self, rhs: u32) -> Self::Output { - Fx { - num: self.num >> rhs, - phantom: PhantomData, - } - } -} - -impl, F: Unsigned> Neg for Fx { - type Output = Self; - fn neg(self) -> Self::Output { - Fx { - num: -self.num, - phantom: PhantomData, - } - } -} -``` - -Unfortunately, for `Shl` and `Shr` to have as much coverage on our type as it -does on the base type (allowing just about any right hand side) we'd have to do -another macro, but I think just `u32` is fine. We can always add more later if -we need. - -We could also implement `BitAnd`, `BitOr`, `BitXor`, and `Not`, but they don't -seem relevent to our fixed point math use, and this section is getting long -already. Just use the same general patterns if you want to add it in your own -programs. Shockingly, `Rem` also works directly if you want it, though I don't -forsee us needing floating point remainder. Also, the GBA can't do hardware -division or remainder, and we'll have to work around that below when we -implement `Div` (which maybe we don't need, but it's complex enough I should -show it instead of letting people guess). - -**Note:** In addition to the various `Op` traits, there's also `OpAssign` -variants. Each `OpAssign` is the same as `Op`, but takes `&mut self` instead of -`self` and then modifies in place instead of producing a fresh value. In other -words, if you want both `+` and `+=` you'll need to do the `AddAssign` trait -too. It's not the worst thing to just write `a = a+b`, so I won't bother with -showing all that here. It's pretty easy to figure out for yourself if you want. - -## Multiplication - -This is where things get more interesting. When we have two numbers `A` and `B` -they really stand for `(a*f)` and `(b*f)`. If we write `A*B` then we're really -writing `(a*f)*(b*f)`, which can be rewritten as `(a*b)*2f`, and now it's -obvious that we have one more `f` than we wanted to have. We have to do the -multiply of the inner value and then divide out the `f`. We divide by `1 << -bit_count`, so if we have 8 fractional bits we'll divide by 256. - -The catch is that, when we do the multiply we're _extremely_ likely to overflow -our base type with that multiplication step. Then we do that divide, and now our -result is basically nonsense. We can avoid this to some extent by casting up to -a higher bit type, doing the multiplication and division at higher precision, -and then casting back down. We want as much precision as possible without being -too inefficient, so we'll always cast up to 32-bit (on a 64-bit machine you'd -cast up to 64-bit instead). - -Naturally, any signed value has to be cast up to `i32` and any unsigned value -has to be cast up to `u32`, so we'll have to handle those separately. - -Also, instead of doing an _actual_ divide we can right-shift by the correct -number of bits to achieve the same effect. _Except_ when we have a signed value -that's negative, because actual division truncates towards zero and -right-shifting truncates towards negative infinity. We can get around _this_ by -flipping the sign, doing the shift, and flipping the sign again (which sounds -silly but it's so much faster than doing an actual division). - -Also, again signed values can be annoying, because if the value _just happens_ -to be `i32::MIN` then when you negate it you'll have... _still_ a negative -value. I'm not 100% on this, but I think the correct thing to do at that point -is to give `$t::MIN` as the output num value. - -Did you get all that? Good, because this involves casting, so we will need to -implement it three times, which calls for another macro. - -```rust -macro_rules! fixed_point_signed_multiply { - ($t:ident) => { - impl Mul for Fx<$t, F> { - type Output = Self; - fn mul(self, rhs: Fx<$t, F>) -> Self::Output { - let pre_shift = (self.num as i32).wrapping_mul(rhs.num as i32); - if pre_shift < 0 { - if pre_shift == core::i32::MIN { - Fx { - num: core::$t::MIN, - phantom: PhantomData, - } - } else { - Fx { - num: (-((-pre_shift) >> F::U8)) as $t, - phantom: PhantomData, - } - } - } else { - Fx { - num: (pre_shift >> F::U8) as $t, - phantom: PhantomData, - } - } - } - } - }; -} - -fixed_point_signed_multiply! {i8} -fixed_point_signed_multiply! {i16} -fixed_point_signed_multiply! {i32} - -macro_rules! fixed_point_unsigned_multiply { - ($t:ident) => { - impl Mul for Fx<$t, F> { - type Output = Self; - fn mul(self, rhs: Fx<$t, F>) -> Self::Output { - Fx { - num: ((self.num as u32).wrapping_mul(rhs.num as u32) >> F::U8) as $t, - phantom: PhantomData, - } - } - } - }; -} - -fixed_point_unsigned_multiply! {u8} -fixed_point_unsigned_multiply! {u16} -fixed_point_unsigned_multiply! {u32} -``` - -## Division - -Division is similar to multiplication, but reversed. Which makes sense. This -time `A/B` gives `(a*f)/(b*f)` which is `a/b`, one _less_ `f` than we were -after. - -As with the multiplication version of things, we have to up-cast our inner value -as much a we can before doing the math, to allow for the most precision -possible. - -The snag here is that the GBA has no division or remainder. Instead, the GBA has -a BIOS function you can call to do `i32/i32` division. - -This is a potential problem for us though. If we have some unsigned value, we -need it to fit within the positive space of an `i32` _after the multiply_ so -that we can cast it to `i32`, call the BIOS function that only works on `i32` -values, and cast it back to its actual type. - -* If you have a u8 you're always okay, even with 8 floating bits. -* If you have a u16 you're okay even with a maximum value up to 15 floating - bits, but having a maximum value and 16 floating bits makes it break. -* If you have a u32 you're probably going to be in trouble all the time. - -So... ugh, there's not much we can do about this. For now we'll just have to -suffer some. - -// TODO: find a numerics book that tells us how to do `u32/u32` divisions. - -```rust -macro_rules! fixed_point_signed_division { - ($t:ident) => { - impl Div for Fx<$t, F> { - type Output = Self; - fn div(self, rhs: Fx<$t, F>) -> Self::Output { - let mul_output: i32 = (self.num as i32).wrapping_mul(1 << F::U8); - let divide_result: i32 = crate::bios::div(mul_output, rhs.num as i32); - Fx { - num: divide_result as $t, - phantom: PhantomData, - } - } - } - }; -} - -fixed_point_signed_division! {i8} -fixed_point_signed_division! {i16} -fixed_point_signed_division! {i32} - -macro_rules! fixed_point_unsigned_division { - ($t:ident) => { - impl Div for Fx<$t, F> { - type Output = Self; - fn div(self, rhs: Fx<$t, F>) -> Self::Output { - let mul_output: i32 = (self.num as i32).wrapping_mul(1 << F::U8); - let divide_result: i32 = crate::bios::div(mul_output, rhs.num as i32); - Fx { - num: divide_result as $t, - phantom: PhantomData, - } - } - } - }; -} - -fixed_point_unsigned_division! {u8} -fixed_point_unsigned_division! {u16} -fixed_point_unsigned_division! {u32} -``` - -## Trigonometry - -TODO: look up tables! arcbits! - -## Just Using A Crate - -If, after seeing all that, and seeing that I still didn't even cover every -possible trait impl that you might want for all the possible types... if after -all that you feel too intimidated, then I'll cave a bit on your behalf and -suggest to you that the [fixed](https://crates.io/crates/fixed) crate seems to -be the best crate available for fixed point math. - -_I have not tested its use on the GBA myself_. - -It's just my recommendation from looking at the docs of the various options -available, if you really wanted to just have a crate for it. diff --git a/book/src-bak/02-goals_and_style.md b/book/src-bak/02-goals_and_style.md deleted file mode 100644 index eac5366..0000000 --- a/book/src-bak/02-goals_and_style.md +++ /dev/null @@ -1,23 +0,0 @@ -# Book Goals and Style - -So, what's this book actually gonna teach you? - -My goal is certainly not just showing off the crate. Programming for the GBA is -weird enough that I'm trying to teach you all the rest of the stuff you need to -know along the way. If I do my job right then you'd be able to write your own -crate for GBA stuff just how you think it should all go by the end. - -Overall the book is sorted more for easy review once you're trying to program -something. The GBA has a few things that can stand on their own and many other -things are a mass of interconnected concepts, so some parts of the book end up -having to refer you to portions that you haven't read yet. The chapters and -sections are sorted so that _minimal_ future references are required, but it's -unavoidable that it'll happen sometimes. - -The actual "tutorial order" of the book is the -[Examples](../05-examples/00-index.md) chapter. Each section of that chapter -breaks down one of the provided examples in the [examples -directory](https://github.com/rust-console/gba/tree/master/examples) of the -repository. We go over what sections of the book you'll need to have read for -the example code to make sense, and also how we apply the general concepts -described in the book to the specific example cases. diff --git a/book/src-bak/02-timers.md b/book/src-bak/02-timers.md deleted file mode 100644 index 2f76034..0000000 --- a/book/src-bak/02-timers.md +++ /dev/null @@ -1 +0,0 @@ -# Timers diff --git a/book/src-bak/03-dma.md b/book/src-bak/03-dma.md deleted file mode 100644 index ef9c846..0000000 --- a/book/src-bak/03-dma.md +++ /dev/null @@ -1,133 +0,0 @@ -# Direct Memory Access - -The GBA has four Direct Memory Access (DMA) units that can be utilized. They're -mostly the same in terms of overall operation, but each unit has special rules -that make it better suited to a particular task. - -**Please Note:** TONC and GBATEK have slightly different concepts of how a DMA -unit's registers should be viewed. I've chosen to go by what GBATEK uses. - -## General DMA - -A single DMA unit is controlled through four different IO Registers. - -* **Source:** (`DMAxSAD`, read only) A `*const` pointer that the DMA reads from. -* **Destination:** (`DMAxDAD`, read only) A `*mut` pointer that the DMA writes - to. -* **Count:** (`DMAxCNT_L`, read only) How many transfers to perform. -* **Control:** (`DMAxCNT_H`, read/write) A register full of bit-flags that - controls all sorts of details. - -Here, the `x` is replaced with 0 through 3 when utilizing whichever particular -DMA unit. - -### Source Address - -This is either a `u32` or `u16` address depending on the unit's assigned -transfer mode (see Control). The address MUST be aligned. - -With DMA0 the source must be internal memory. With other DMA units the source -can be any non-`SRAM` location. - -### Destination Address - -As with the Source, this is either a `u32` or `u16` address depending on the -unit's assigned transfer mode (see Control). The address MUST be aligned. - -With DMA0/1/2 the destination must be internal memory. With DMA3 the destination -can be any non-`SRAM` memory (allowing writes into Game Pak ROM / FlashROM, -assuming that your Game Pak hardware supports that). - -### Count - -This is a `u16` that says how many transfers (`u16` or `u32`) to make. - -DMA0/1/2 will only actually accept a 14-bit value, while DMA3 will accept a full -16-bit value. A value of 0 instead acts as if you'd used the _maximum_ value for -the DMA in question. Put another way, DMA0/1/2 transfer `1` through `0x4000` -words, with `0` as the `0x4000` value, and DMA3 transfers `1` through `0x1_0000` -words, with `0` as the `0x1_0000` value. - -The maximum value isn't a very harsh limit. Even in just `u16` mode, `0x4000` -transfers is 32k, which would for example be all 32k of `IWRAM` (including your -own user stack). If you for some reason do need to transfer more than a single -DMA use can move around at once then you can just setup the DMA a second time -and keep going. - -### Control - -This `u16` bit-flag field is where things get wild. - -* Bits 0-4 do nothing -* Bit 5-6 control how the destination address changes per transfer: - * 0: Offset +1 - * 1: Offset -1 - * 2: No Change - * 3: Offset +1 and reload when a Repeat starts (below) -* Bit 7-8 similarly control how the source address changes per transfer: - * 0: Offset +1 - * 1: Offset -1 - * 2: No Change - * 3: Prohibited -* Bit 9: enables Repeat mode. -* Bit 10: Transfer `u16` (false) or `u32` (true) data. -* Bit 11: "Game Pak DRQ" flag. GBATEK says that this is only allowed for DMA3, - and also your Game Pak hardware must be equipped to use DRQ mode. I don't even - know what DRQ mode is all about, and GBATEK doesn't say much either. If DRQ is - set then you _must not_ set the Repeat bit as well. The `gba` crate simply - doesn't bother to expose this flag to users. -* Bit 12-13: DMA Start: - * 0: "Immediate", which is 2 cycles after requested. - * 1: VBlank - * 2: HBlank - * 3: Special, depending on what DMA unit is involved: - * DMA0: Prohibited. - * DMA1/2: Sound FIFO (see the [Sound](04-sound.md) section) - * DMA3: Video Capture, intended for use with the Repeat flag, performs a - transfer per scanline (similar to HBlank) starting at `VCOUNT` 2 and - stopping at `VCOUNT` 162. Intended for copying things from ROM or camera - into VRAM. -* Bit 14: Interrupt upon DMA complete. -* Bit 15: Enable this DMA unit. - -## DMA Life Cycle - -The general technique for using a DMA unit involves first setting the relevent -source, destination, and count registers, then setting the appropriate control -register value with the Enable bit set. - -Once the Enable flag is set the appropriate DMA unit will trigger at the -assigned time (Bit 12-13). The CPU's operation is halted while any DMA unit is -active, until the DMA completes its task. If more than one DMA unit is supposed -to be active at once, then the DMA unit with the lower number will activate and -complete before any others. - -When the DMA triggers via _Enable_, the `Source`, `Destination`, and `Count` -values are copied from the GBA's registers into the DMA unit's internal -registers. Changes to the DMA unit's internal copy of the data don't affect the -values in the GBA registers. Another _Enable_ will read the same values as -before. - -If DMA is triggered via having _Repeat_ active then _only_ the Count is copied -in to the DMA unit registers. The `Source` and `Destination` are unaffected -during a Repeat. The exception to this is if the destination address control -value (Bits 5-6) are set to 3 (`0b11`), in which case a _Repeat_ will also -re-copy the `Destination` as well as the `Count`. - -Once a DMA operation completes, the Enable flag of its Control register will -automatically be disabled, _unless_ the Repeat flag is on, in which case the -Enable flag is left active. You will have to manually disable it if you don't -want the DMA to kick in again over and over at the specified starting time. - -## DMA Limitations - -The DMA units cannot access `SRAM` at all. - -If you're using HBlank to access any part of the memory that the display -controller utilizes (`OAM`, `PALRAM`, `VRAM`), you need to have enabled the -"HBlank Interval Free" bit in the Display Control Register (`DISPCNT`). - -Whenever DMA is active the CPU is _not_ active, which means that -[Interrupts](05-interrupts.md) will not fire while DMA is happening. This can -cause any number of hard to track down bugs. Try to limit your use of the DMA -units if you can. diff --git a/book/src-bak/03-volatile_destination.md b/book/src-bak/03-volatile_destination.md deleted file mode 100644 index dcc1978..0000000 --- a/book/src-bak/03-volatile_destination.md +++ /dev/null @@ -1,317 +0,0 @@ -# Volatile Destination - -TODO: update this when we can make more stuff `const` - -## Volatile Memory - -The compiler is an eager friend, so when it sees a read or a write that won't -have an effect, it eliminates that read or write. For example, if we write - -```rust -let mut x = 5; -x = 7; -``` - -The compiler won't actually ever put 5 into `x`. It'll skip straight to putting -7 in `x`, because we never read from `x` when it's 5, so that's a safe change to -make. Normally, values are stored in RAM, which has no side effects when you -read and write from it. RAM is purely for keeping notes about values you'll need -later on. - -However, what if we had a bit of hardware where we wanted to do a write and that -did something _other than_ keeping the value for us to look at later? As you saw -in the `hello_magic` example, we have to use a `write_volatile` operation. -Volatile means "just do it anyway". The compiler thinks that it's pointless, but -we know better, so we can force it to really do exactly what we say by using -`write_volatile` instead of `write`. - -This is kinda error prone though, right? Because it's just a raw pointer, so we -might forget to use `write_volatile` at some point. - -Instead, we want a type that's always going to use volatile reads and writes. -Also, we want a pointer type that lets our reads and writes to be as safe as -possible once we've unsafely constructed the initial value. - -### Constructing The VolAddress Type - -First, we want a type that stores a location within the address space. This can -be a pointer, or a `usize`, and we'll use a `usize` because that's easier to -work with in a `const` context (and we want to have `const` when we can get it). -We'll also have our type use `NonZeroUsize` instead of just `usize` so that -`Option>` stays as a single machine word. This helps quite a bit -when we want to iterate over the addresses of a block of memory (such as -locations within the palette memory). Hardware is never at the null address -anyway. Also, if we had _just_ an address number then we wouldn't be able to -track what type the address is for. We need some -[PhantomData](https://doc.rust-lang.org/core/marker/struct.PhantomData.html), -and specifically we need the phantom data to be for `*mut T`: - -* If we used `*const T` that'd have the wrong - [variance](https://doc.rust-lang.org/nomicon/subtyping.html). -* If we used `&mut T` then that's fusing in the ideas of _lifetime_ and - _exclusive access_ to our type. That's potentially important, but that's also - an abstraction we'll build _on top of_ this `VolAddress` type if we need it. - -One abstraction layer at a time, so we start with just a phantom pointer. This gives us a type that looks like this: - -```rust -#[derive(Debug)] -#[repr(transparent)] -pub struct VolAddress { - address: NonZeroUsize, - marker: PhantomData<*mut T>, -} -``` - -Now, because of how `derive` is specified, it derives traits _if the generic -parameter_ supports those traits. Since our type is like a pointer, the traits -it supports are distinct from whatever traits the target type supports. So we'll -provide those implementations manually. - -```rust -impl Clone for VolAddress { - fn clone(&self) -> Self { - *self - } -} -impl Copy for VolAddress {} -impl PartialEq for VolAddress { - fn eq(&self, other: &Self) -> bool { - self.address == other.address - } -} -impl Eq for VolAddress {} -impl PartialOrd for VolAddress { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.address.cmp(&other.address)) - } -} -impl Ord for VolAddress { - fn cmp(&self, other: &Self) -> Ordering { - self.address.cmp(&other.address) - } -} -``` - -Boilerplate junk, not interesting. There's a reason that you derive those traits -99% of the time in Rust. - -### Constructing A VolAddress Value - -Okay so here's the next core concept: If we unsafely _construct_ a -`VolAddress`, then we can safely _use_ the value once it's been properly -created. - -```rust -// you'll need these features enabled and a recent nightly -#![feature(const_int_wrapping)] -#![feature(min_const_unsafe_fn)] - -impl VolAddress { - pub const unsafe fn new_unchecked(address: usize) -> Self { - VolAddress { - address: NonZeroUsize::new_unchecked(address), - marker: PhantomData, - } - } - pub const unsafe fn cast(self) -> VolAddress { - VolAddress { - address: self.address, - marker: PhantomData, - } - } - pub unsafe fn offset(self, offset: isize) -> Self { - VolAddress { - address: NonZeroUsize::new_unchecked(self.address.get().wrapping_add(offset as usize * core::mem::size_of::())), - marker: PhantomData, - } - } -} -``` - -So what are the unsafety rules here? - -* Non-null, obviously. -* Must be aligned for `T` -* Must always produce valid bit patterns for `T` -* Must not be part of the address space that Rust's stack or allocator will ever - uses. - -So, again using the `hello_magic` example, we had - -```rust -(0x400_0000 as *mut u16).write_volatile(0x0403); -``` - -And instead we could declare - -```rust -const MAGIC_LOCATION: VolAddress = unsafe { VolAddress::new(0x400_0000) }; -``` - -### Using A VolAddress Value - -Now that we've named the magic location, we want to write to it. - -```rust -impl VolAddress { - pub fn read(self) -> T - where - T: Copy, - { - unsafe { (self.address.get() as *mut T).read_volatile() } - } - pub unsafe fn read_non_copy(self) -> T { - (self.address.get() as *mut T).read_volatile() - } - pub fn write(self, val: T) { - unsafe { (self.address.get() as *mut T).write_volatile(val) } - } -} -``` - -So if the type is `Copy` we can `read` it as much as we want. If, somehow, the -type isn't `Copy`, then it might be `Drop`, and that means if we read out a -value over and over we could cause the `drop` method to trigger UB. Since the -end user might really know what they're doing, we provide an unsafe backup -`read_non_copy`. - -On the other hand, we can `write` to the location as much as we want. Even if -the type isn't `Copy`, _not running `Drop` is safe_, so a `write` is always -safe. - -Now we can write to our magical location. - -```rust -MAGIC_LOCATION.write(0x0403); -``` - -### VolAddress Iteration - -We've already seen that sometimes we want to have a base address of some sort -and then offset from that location to another. What if we wanted to iterate over -_all the locations_. That's not particularly hard. - -```rust -impl VolAddress { - pub const unsafe fn iter_slots(self, slots: usize) -> VolAddressIter { - VolAddressIter { vol_address: self, slots } - } -} - -#[derive(Debug)] -pub struct VolAddressIter { - vol_address: VolAddress, - slots: usize, -} -impl Clone for VolAddressIter { - fn clone(&self) -> Self { - VolAddressIter { - vol_address: self.vol_address, - slots: self.slots, - } - } -} -impl PartialEq for VolAddressIter { - fn eq(&self, other: &Self) -> bool { - self.vol_address == other.vol_address && self.slots == other.slots - } -} -impl Eq for VolAddressIter {} -impl Iterator for VolAddressIter { - type Item = VolAddress; - - fn next(&mut self) -> Option { - if self.slots > 0 { - let out = self.vol_address; - unsafe { - self.slots -= 1; - self.vol_address = self.vol_address.offset(1); - } - Some(out) - } else { - None - } - } -} -impl FusedIterator for VolAddressIter {} -``` - -### VolAddressBlock - -Obviously, having a base address and a length exist separately is error prone. -There's a good reason for slices to keep their pointer and their length -together. We want something like that, which we'll call a "block" because -"array" and "slice" are already things in Rust. - -```rust -#[derive(Debug)] -pub struct VolAddressBlock { - vol_address: VolAddress, - slots: usize, -} -impl Clone for VolAddressBlock { - fn clone(&self) -> Self { - VolAddressBlock { - vol_address: self.vol_address, - slots: self.slots, - } - } -} -impl PartialEq for VolAddressBlock { - fn eq(&self, other: &Self) -> bool { - self.vol_address == other.vol_address && self.slots == other.slots - } -} -impl Eq for VolAddressBlock {} - -impl VolAddressBlock { - pub const unsafe fn new_unchecked(vol_address: VolAddress, slots: usize) -> Self { - VolAddressBlock { vol_address, slots } - } - pub const fn iter(self) -> VolAddressIter { - VolAddressIter { - vol_address: self.vol_address, - slots: self.slots, - } - } - pub unsafe fn index_unchecked(self, slot: usize) -> VolAddress { - self.vol_address.offset(slot as isize) - } - pub fn index(self, slot: usize) -> VolAddress { - if slot < self.slots { - unsafe { self.vol_address.offset(slot as isize) } - } else { - panic!("Index Requested: {} >= Bound: {}", slot, self.slots) - } - } - pub fn get(self, slot: usize) -> Option> { - if slot < self.slots { - unsafe { Some(self.vol_address.offset(slot as isize)) } - } else { - None - } - } -} -``` - -Now we can have something like: - -```rust -const OTHER_MAGIC: VolAddressBlock = unsafe { - VolAddressBlock::new_unchecked( - VolAddress::new(0x600_0000), - 240 * 160 - ) -}; - -OTHER_MAGIC.index(120 + 80 * 240).write_volatile(0x001F); -OTHER_MAGIC.index(136 + 80 * 240).write_volatile(0x03E0); -OTHER_MAGIC.index(120 + 96 * 240).write_volatile(0x7C00); -``` - -### Docs? - -If you wanna see these types and methods with a full docs write up you should -check the GBA crate's source. - diff --git a/book/src-bak/03-wram.md b/book/src-bak/03-wram.md deleted file mode 100644 index 26d71e8..0000000 --- a/book/src-bak/03-wram.md +++ /dev/null @@ -1,28 +0,0 @@ -# Work RAM - -## External Work RAM (EWRAM) - -* **Address Span:** `0x2000000` to `0x203FFFF` (256k) - -This is a big pile of space, the use of which is up to each game. However, the -external work ram has only a 16-bit bus (if you read/write a 32-bit value it -silently breaks it up into two 16-bit operations) and also 2 wait cycles (extra -CPU cycles that you have to expend _per 16-bit bus use_). - -It's most helpful to think of EWRAM as slower, distant memory, similar to the -"heap" in a normal application. You can take the time to go store something -within EWRAM, or to load it out of EWRAM, but if you've got several operations -to do in a row and you're worried about time you should pull that value into -local memory, work on your local copy, and then push it back out to EWRAM. - -## Internal Work RAM (IWRAM) - -* **Address Span:** `0x3000000` to `0x3007FFF` (32k) - -This is a smaller pile of space, but it has a 32-bit bus and no wait. - -By default, `0x3007F00` to `0x3007FFF` is reserved for interrupt and BIOS use. -The rest of it is mostly up to you. The user's stack space starts at `0x3007F00` -and proceeds _down_ from there. For best results you should probably start at -`0x3000000` and then go upwards. Under normal use it's unlikely that the two -memory regions will crash into each other. diff --git a/book/src-bak/04-io-registers.md b/book/src-bak/04-io-registers.md deleted file mode 100644 index 99a18b9..0000000 --- a/book/src-bak/04-io-registers.md +++ /dev/null @@ -1,3 +0,0 @@ -# IO Registers - -* **Address Span:** `0x400_0000` to `0x400_03FE` diff --git a/book/src-bak/04-newtype.md b/book/src-bak/04-newtype.md deleted file mode 100644 index 94117ec..0000000 --- a/book/src-bak/04-newtype.md +++ /dev/null @@ -1,206 +0,0 @@ -# Newtype - -TODO: we've already used newtype twice by now (fixed point values and volatile -addresses), so we need to adjust how we start this section. - -There's a great Zero Cost abstraction that we'll be using a lot that you might -not already be familiar with: we're talking about the "Newtype Pattern"! - -Now, I told you to read the Rust Book before you read this book, and I'm sure -you're all good students who wouldn't sneak into this book without doing the -required reading, so I'm sure you all remember exactly what I'm talking about, -because they touch on the newtype concept in the book twice, in two _very_ long -named sections: - -* [Using the Newtype Pattern to Implement External Traits on External - Types](https://doc.rust-lang.org/book/ch19-03-advanced-traits.html#using-the-newtype-pattern-to-implement-external-traits-on-external-types) -* [Using the Newtype Pattern for Type Safety and - Abstraction](https://doc.rust-lang.org/book/ch19-04-advanced-types.html#using-the-newtype-pattern-for-type-safety-and-abstraction) - -...Yeah... The Rust Book doesn't know how to make a short sub-section name to -save its life. Shame. - -## Newtype Basics - -So, we have all these pieces of data, and we want to keep them separated, and we -don't wanna pay the cost for it at runtime. Well, we're in luck, we can pay the -cost at compile time. - -```rust -pub struct PixelColor(u16); -``` - -TODO: we've already talked about repr(transparent) by now - -Ah, except that, as I'm sure you remember from [The -Rustonomicon](https://doc.rust-lang.org/nomicon/other-reprs.html#reprtransparent) -(and from the RFC too, of course), if we have a single field struct that's -sometimes different from having just the bare value, so we should be using -`#[repr(transparent)]` with our newtypes. - -```rust -#[repr(transparent)] -pub struct PixelColor(u16); -``` - -And then we'll need to do that same thing for _every other newtype we want_. - -Except there's only two tiny parts that actually differ between newtype -declarations: the new name and the base type. All the rest is just the same rote -code over and over. Generating piles and piles of boilerplate code? Sounds like -a job for a macro to me! - -## Making It A Macro - -If you're going to do much with macros you should definitely read through [The -Little Book of Rust -Macros](https://danielkeep.github.io/tlborm/book/index.html), but we won't be -doing too much so you can just follow along here a bit if you like. - -The most basic version of a newtype macro starts like this: - -```rust -#[macro_export] -macro_rules! newtype { - ($new_name:ident, $old_name:ident) => { - #[repr(transparent)] - pub struct $new_name($old_name); - }; -} -``` - -The `#[macro_export]` makes it exported by the current module (like `pub` -kinda), and then we have one expansion option that takes an identifier, a `,`, -and then a second identifier. The new name is the outer type we'll be using, and -the old name is the inner type that's being wrapped. You'd use our new macro -something like this: - -```rust -newtype! {PixelColorCurly, u16} - -newtype!(PixelColorParens, u16); - -newtype![PixelColorBrackets, u16]; -``` - -Note that you can invoke the macro with the outermost grouping as any of `()`, -`[]`, or `{}`. It makes no particular difference to the macro. Also, that space -in the first version is kinda to show off that you can put white space in -between the macro name and the grouping if you want. The difference is mostly -style, but there are some rules and considerations here: - -* If you use curly braces then you _must not_ put a `;` after the invocation. -* If you use parentheses or brackets then you _must_ put the `;` at the end. -* Rustfmt cares which you use and formats accordingly: - * Curly brace macro use mostly gets treated like a code block. - * Parentheses macro use mostly gets treated like a function call. - * Bracket macro use mostly gets treated like an array declaration. - -**As a reminder:** remember that `macro_rules` macros have to appear _before_ -they're invoked in your source, so the `newtype` macro will always have to be at -the very top of your file, or if you put it in a module within your project -you'll need to declare the module before anything that uses it. - -## Upgrade That Macro! - -We also want to be able to add `derive` stuff and doc comments to our newtype. -Within the context of `macro_rules!` definitions these are called "meta". Since -we can have any number of them we wrap it all up in a "zero or more" matcher. -Then our macro looks like this: - -```rust -#[macro_export] -macro_rules! newtype { - ($(#[$attr:meta])* $new_name:ident, $old_name:ident) => { - $(#[$attr])* - #[repr(transparent)] - pub struct $new_name($old_name); - }; -} -``` - -So now we can write - -```rust -newtype! { - /// Color on the GBA gives 5 bits for each channel, the highest bit is ignored. - #[derive(Debug, Clone, Copy)] - PixelColor, u16 -} -``` - -Next, we can allow for the wrapping of types that aren't just a single -identifier by changing `$old_name` from `:ident` to `:ty`. We can't _also_ do -this for the `$new_type` part because declaring a new struct expects a valid -identifier that's _not_ already declared (obviously), and `:ty` is intended for -capturing types that already exist. - -```rust -#[macro_export] -macro_rules! newtype { - ($(#[$attr:meta])* $new_name:ident, $old_name:ty) => { - $(#[$attr])* - #[repr(transparent)] - pub struct $new_name($old_name); - }; -} -``` - -Next of course we'll want to usually have a `new` method that's const and just -gives a 0 value. We won't always be making a newtype over a number value, but we -often will. It's usually silly to have a `new` method with no arguments since we -might as well just impl `Default`, but `Default::default` isn't `const`, so -having `pub const fn new() -> Self` is justified here. - -Here, the token `0` is given the `{integer}` type, which can be converted into -any of the integer types as needed, but it still can't be converted into an -array type or a pointer or things like that. Accordingly we've added the "no -frills" option which declares the struct and no `new` method. - -```rust -#[macro_export] -macro_rules! newtype { - ($(#[$attr:meta])* $new_name:ident, $old_name:ty) => { - $(#[$attr])* - #[repr(transparent)] - pub struct $new_name($old_name); - impl $new_name { - /// A `const` "zero value" constructor - pub const fn new() -> Self { - $new_name(0) - } - } - }; - ($(#[$attr:meta])* $new_name:ident, $old_name:ty, no frills) => { - $(#[$attr])* - #[repr(transparent)] - pub struct $new_name($old_name); - }; -} -``` - -Finally, we usually want to have the wrapped value be totally private, but there -_are_ occasions where that's not the case. For this, we can allow the wrapped -field to accept a visibility modifier. - -```rust -#[macro_export] -macro_rules! newtype { - ($(#[$attr:meta])* $new_name:ident, $v:vis $old_name:ty) => { - $(#[$attr])* - #[repr(transparent)] - pub struct $new_name($v $old_name); - impl $new_name { - /// A `const` "zero value" constructor - pub const fn new() -> Self { - $new_name(0) - } - } - }; - ($(#[$attr:meta])* $new_name:ident, $v:vis $old_name:ty, no frills) => { - $(#[$attr])* - #[repr(transparent)] - pub struct $new_name($v $old_name); - }; -} -``` diff --git a/book/src-bak/04-sound.md b/book/src-bak/04-sound.md deleted file mode 100644 index 26f833d..0000000 --- a/book/src-bak/04-sound.md +++ /dev/null @@ -1 +0,0 @@ -# Sound diff --git a/book/src-bak/05-const_asserts.md b/book/src-bak/05-const_asserts.md deleted file mode 100644 index 21cb201..0000000 --- a/book/src-bak/05-const_asserts.md +++ /dev/null @@ -1,130 +0,0 @@ -# Constant Assertions - -Have you ever wanted to assert things _even before runtime_? We all have, of -course. Particularly when the runtime machine is a poor little GBA, we'd like to -have the machine doing the compile handle as much checking as possible. - -Enter the [static assertions](https://docs.rs/static_assertions/) crate, which -provides a way to let you assert on a `const` expression. - -This is an amazing crate that you should definitely use when you can. - -It's written by [Nikolai Vazquez](https://github.com/nvzqz), and they kindly -wrote up a [blog -post](https://nikolaivazquez.com/posts/programming/rust-static-assertions/) that -explains the thinking behind it. - -However, I promised that each example would be single file, and I also promised -to explain what's going on as we go, so we'll briefly touch upon giving an -explanation here. - -## How We Const Assert - -Alright, as it stands (2018-12-15), we can't use `if` in a `const` context. - -Since we can't use `if`, we can't use a normal `assert!`. Some day it will be -possible, and a failed assert at compile time will be a compile error and a -failed assert at run time will be a panic and we'll have a nice unified -programming experience. We can add runtime-only assertions by being a little -tricky with the compiler. - -If we write - -```rust -const ASSERT: usize = 0 - 1; -``` - -that gives a warning, since the math would underflow. We can upgrade that -warning to a hard error: - -```rust -#[deny(const_err)] -const ASSERT: usize = 0 - 1; -``` - -And to make our construction reusable we can enable the -[underscore_const_names](https://github.com/rust-lang/rust/issues/54912) feature -in our program (or library) and then give each such const an underscore for a -name. - -```rust -#![feature(underscore_const_names)] - -#[deny(const_err)] -const _: usize = 0 - 1; -``` - -Now we wrap this in a macro where we give a `bool` expression as input. We -negate the bool then cast it to a `usize`, meaning that `true` negates into -`false`, which becomes `0usize`, and then there's no underflow error. Or if the -input was `false`, it negates into `true`, then becomes `1usize`, and then the -underflow error fires. - -```rust -macro_rules! const_assert { - ($condition:expr) => { - #[deny(const_err)] - #[allow(dead_code)] - const ASSERT: usize = 0 - !$condition as usize; - } -} -``` - -Technically, written like this, the expression can be anything with a -`core::ops::Not` implementation that can also be `as` cast into `usize`. That's -`bool`, but also basically all the other number types. Since we want to ensure -that we get proper looking type errors when things go wrong, we can use -`($condition && true)` to enforce that we get a `bool` (thanks to `Talchas` for -that particular suggestion). - -```rust -macro_rules! const_assert { - ($condition:expr) => { - #[deny(const_err)] - #[allow(dead_code)] - const _: usize = 0 - !($condition && true) as usize; - } -} -``` - -## Asserting Something - -As an example of how we might use a `const_assert`, we'll do a demo with colors. -There's a red, blue, and green channel. We store colors in a `u16` with 5 bits -for each channel. - -```rust -newtype! { - #[derive(Debug, Clone, Copy, PartialEq, Eq)] - Color, u16 -} -``` - -And when we're building a color, we're passing in `u16` values, but they could -be using more than just 5 bits of space. We want to make sure that each channel -is 31 or less, so we can make a color builder that does a `const_assert!` on the -value of each channel. - -```rust -macro_rules! rgb { - ($r:expr, $g:expr, $b:expr) => { - { - const_assert!($r <= 31); - const_assert!($g <= 31); - const_assert!($b <= 31); - Color($b << 10 | $g << 5 | $r) - } - } -} -``` - -And then we can declare some colors - -```rust -const RED: Color = rgb!(31, 0, 0); - -const BLUE: Color = rgb!(31, 500, 0); -``` - -The second one is clearly out of bounds and it fires an error just like we -wanted. diff --git a/book/src-bak/05-help_and_resources.md b/book/src-bak/05-help_and_resources.md deleted file mode 100644 index 59a51f5..0000000 --- a/book/src-bak/05-help_and_resources.md +++ /dev/null @@ -1,78 +0,0 @@ -# Help and Resources - -## Help - -So you're stuck on a problem and the book doesn't say what to do. Where can you -find out more? - -The first place I would suggest is the [Rust Community -Discord](https://discordapp.com/invite/aVESxV8). If it's a general Rust question -then you can ask anyone in any channel you feel is appropriate. If it's GBA -specific then you can try asking me (`Lokathor`) or `Ketsuban` in the `#gamedev` -channel. - -## Emulators - -You certainly might want to eventually write a game that you can put on a flash -cart and play on real hardware, but for most of your development you'll probably -want to be using an emulator for testing, because you don't have to fiddle with -cables and all that. - -In terms of emulators, you want to be using -[mGBA](https://github.com/mgba-emu/mgba), and you want to be using the [0.7 Beta -1](https://github.com/mgba-emu/mgba/releases/tag/0.7-b1) or later. This update -lets you run raw ELF files, which means that you can have full debug symbols -available while you're debugging problems. - -## Information Resources - -First, if I fail to describe something related to Rust, you can always try -checking in [The Rust -Reference](https://doc.rust-lang.org/nightly/reference/introduction.html) to see -if they cover it. You can mostly ignore that big scary red banner at the top, -things are a lot better documented than they make it sound. - -If you need help trying to fiddle your math down as hard as you can, there are -resources such as the [Bit Twiddling -Hacks](https://graphics.stanford.edu/~seander/bithacks.html) page. - -As to GBA related lore, Ketsuban and I didn't magically learn this all from -nowhere, we read various technical manuals and guides ourselves and then -distilled those works oriented around C and C++ into a book for Rust. - -We have personally used some or all of the following: - -* [GBATEK](http://problemkaputt.de/gbatek.htm): This is _the_ resource. It - covers not only the GBA, but also the DS and DSi, and also a run down of ARM - assembly (32-bit and 16-bit opcodes). The link there is to the 2.9b version on - `problemkaputt.de` (the official home of the document), but if you just google - for gbatek the top result is for the 2.5 version on `akkit.org`, so make sure - you're looking at the newest version. Sometimes `problemkaputt.de` is a little - sluggish so I've also [mirrored](https://lokathor.com/gbatek.html) the 2.9b - version on my own site as well. GBATEK is rather large, over 2mb of text, so - if you're on a phone or similar you might want to save an offline copy to go - easy on your data usage. -* [TONC](https://www.coranac.com/tonc/text/): While GBATEK is basically just a - huge tech specification, TONC is an actual _guide_ on how to make sense of the - GBA's abilities and organize it into a game. It's written for C of course, but - as a Rust programmer you should always be practicing your ability to read C - code anyway. It's the programming equivalent of learning Latin because all the - old academic books are written in Latin. -* [CowBite](https://www.cs.rit.edu/~tjh8300/CowBite/CowBiteSpec.htm): This is - more like GBATEK, and it's less complete, but it mixes in a little more - friendly explanation of things in between the hardware spec parts. - -And I haven't had time to look at it myself, [The Audio -Advance](http://belogic.com/gba/) seems to be very good. It explains in depth -how you can get audio working on the GBA. Note that the table of contents for -each page goes along the top instead of down the side. - -## Non-Rust GBA Community - -There's also the [GBADev.org](http://www.gbadev.org/) site, which has a forum -and everything. They're coding in C and C++, but you can probably overcome that -difference with a little work on your part. - -I also found a place called -[GBATemp](https://gbatemp.net/categories/nintendo-gba-discussions.32/), which -seems to have a more active forum but less of a focus on actual coding. diff --git a/book/src-bak/05-interrupts.md b/book/src-bak/05-interrupts.md deleted file mode 100644 index 81df6c7..0000000 --- a/book/src-bak/05-interrupts.md +++ /dev/null @@ -1 +0,0 @@ -# Interrupts diff --git a/book/src-bak/05-palram.md b/book/src-bak/05-palram.md deleted file mode 100644 index 0bc7e7f..0000000 --- a/book/src-bak/05-palram.md +++ /dev/null @@ -1,50 +0,0 @@ -# Palette RAM (PALRAM) - -* **Address Span:** `0x500_0000` to `0x500_03FF` (1k) - -Palette RAM has a 16-bit bus, which isn't really a problem because it -conceptually just holds `u16` values. There's no automatic wait state, but if -you try to access the same location that the display controller is accessing you -get bumped by 1 cycle. Since the display controller can use the palette ram any -number of times per scanline it's basically impossible to predict if you'll have -to do a wait or not during VDraw. During VBlank you won't have any wait of -course. - -PALRAM is among the memory where there's weirdness if you try to write just one -byte: if you try to write just 1 byte, it writes that byte into _both_ parts of -the larger 16-bit location. This doesn't really affect us much with PALRAM, -because palette values are all supposed to be `u16` anyway. - -The palette memory actually contains not one, but _two_ sets of palettes. First -there's 256 entries for the background palette data (starting at `0x500_0000`), -and then there's 256 entries for object palette data (starting at `0x500_0200`). - -The GBA also has two modes for palette access: 8-bits-per-pixel (8bpp) and -4-bits-per-pixel (4bpp). - -* In 8bpp mode an 8-bit palette index value within a background or sprite - simply indexes directly into the 256 slots for that type of thing. -* In 4bpp mode a 4-bit palette index value within a background or sprite - specifies an index within a particular "palbank" (16 palette entries each), - and then a _separate_ setting outside of the graphical data determines which - palbank is to be used for that background or object (the screen entry data for - backgrounds, and the object attributes for objects). - -### Transparency - -When a pixel within a background or object specifies index 0 as its palette -entry it is treated as a transparent pixel. This means that in 8bpp mode there's -only 255 actual color options (0 being transparent), and in 4bpp mode there's -only 15 actual color options available within each palbank (the 0th entry of -_each_ palbank is transparent). - -Individual backgrounds, and individual objects, each determine if they're 4bpp -or 8bpp separately, so a given overall palette slot might map to a used color in -8bpp and an unused/transparent color in 4bpp. If you're a palette wizard. - -Palette slot 0 of the overall background palette is used to determine the -"backdrop" color. That's the color you see if no background or object ends up -being rendered within a given pixel. - -Since display mode 3 and display mode 5 don't use the palette, they cannot -benefit from transparency. diff --git a/book/src-bak/06-link_cable.md b/book/src-bak/06-link_cable.md deleted file mode 100644 index f8e1989..0000000 --- a/book/src-bak/06-link_cable.md +++ /dev/null @@ -1 +0,0 @@ -# Link Cable diff --git a/book/src-bak/06-vram.md b/book/src-bak/06-vram.md deleted file mode 100644 index 24a96c4..0000000 --- a/book/src-bak/06-vram.md +++ /dev/null @@ -1,24 +0,0 @@ -# Video RAM (VRAM) - -* **Address Span:** `0x600_0000` to `0x601_7FFF` (96k) - -We've used this before! VRAM has a 16-bit bus and no wait. However, the same as -with PALRAM, the "you might have to wait if the display controller is looking at -it" rule applies here. - -Unfortunately there's not much more exact detail that can be given about VRAM. -The use of the memory depends on the video mode that you're using. - -One general detail of note is that you can't write individual bytes to any part -of VRAM. Depending on mode and location, you'll either get your bytes doubled -into both the upper and lower parts of the 16-bit location targeted, or you -won't even affect the memory. This usually isn't a big deal, except in two -situations: - -* In Mode 4, if you want to change just 1 pixel, you'll have to be very careful - to read the old `u16`, overwrite just the byte you wanted to change, and then - write that back. -* In any display mode, avoid using `memcopy` to place things into VRAM. - It's written to be byte oriented, and only does 32-bit transfers under select - conditions. The rest of the time it'll copy one byte at a time and you'll get - either garbage or nothing at all. diff --git a/book/src-bak/07-game_pak.md b/book/src-bak/07-game_pak.md deleted file mode 100644 index 7e1ac79..0000000 --- a/book/src-bak/07-game_pak.md +++ /dev/null @@ -1 +0,0 @@ -# Game Pak diff --git a/book/src-bak/07-oam.md b/book/src-bak/07-oam.md deleted file mode 100644 index eeee6f2..0000000 --- a/book/src-bak/07-oam.md +++ /dev/null @@ -1,62 +0,0 @@ -# Object Attribute Memory (OAM) - -* **Address Span:** `0x700_0000` to `0x700_03FF` (1k) - -The Object Attribute Memory has a 32-bit bus and no default wait, but suffers -from the "you might have to wait if the display controller is looking at it" -rule. You cannot write individual bytes to OAM at all, but that's not really a -problem because all the fields of the data types within OAM are either `i16` or -`u16` anyway. - -Object attribute memory is the wildest yet: it conceptually contains two types -of things, but they're _interlaced_ with each other all the way through. - -Now, [GBATEK](http://problemkaputt.de/gbatek.htm#lcdobjoamattributes) and -[CowByte](https://www.cs.rit.edu/~tjh8300/CowBite/CowBiteSpec.htm#OAM%20(sprites)) -doesn't quite give names to the two data types here. -[TONC](https://www.coranac.com/tonc/text/regobj.htm#sec-oam) calls them -`OBJ_ATTR` and `OBJ_AFFINE`, but we'll be giving them names fitting with the -Rust naming convention. Just know that if you try to talk about it with others -they might not be using the same names. In Rust terms their layout would look -like this: - -```rust -#[repr(C)] -pub struct ObjectAttributes { - attr0: u16, - attr1: u16, - attr2: u16, - filler: i16, -} - -#[repr(C)] -pub struct AffineMatrix { - filler0: [u16; 3], - pa: i16, - filler1: [u16; 3], - pb: i16, - filler2: [u16; 3], - pc: i16, - filler3: [u16; 3], - pd: i16, -} -``` - -(Note: the `#[repr(C)]` part just means that Rust must lay out the data exactly -in the order we specify, which otherwise it is not required to do). - -So, we've got 1024 bytes in OAM and each `ObjectAttributes` value is 8 bytes, so -naturally we can support up to 128 objects. - -_At the same time_, we've got 1024 bytes in OAM and each `AffineMatrix` is 32 -bytes, so we can have 32 of them. - -But, as I said, these things are all _interlaced_ with each other. See how -there's "filler" fields in each struct? If we imagine the OAM as being just an -array of one type or the other, indexes 0/1/2/3 of the `ObjectAttributes` array -would line up with index 0 of the `AffineMatrix` array. It's kinda weird, but -that's just how it works. When we setup functions to read and write these values -we'll have to be careful with how we do it. We probably _won't_ want to use -those representations above, at least not with the `AffineMatrix` type, because -they're quite wasteful if you want to store just object attributes or just -affine matrices. diff --git a/book/src-bak/08-rom.md b/book/src-bak/08-rom.md deleted file mode 100644 index 584faac..0000000 --- a/book/src-bak/08-rom.md +++ /dev/null @@ -1,14 +0,0 @@ -# Game Pak ROM / Flash ROM (ROM) - -* **Address Span (Wait State 0):** `0x800_0000` to `0x9FF_FFFF` -* **Address Span (Wait State 1):** `0xA00_0000` to `0xBFF_FFFF` -* **Address Span (Wait State 2):** `0xC00_0000` to `0xDFF_FFFF` - -The game's ROM data is a single set of data that's up to 32 megabytes in size. -However, that data is mirrored to three different locations in the address -space. Depending on which part of the address space you use, it can affect the -memory timings involved. - -TODO: describe `WAITCNT` here, we won't get a better chance at it. - -TODO: discuss THUMB vs ARM code and why THUMB is so much faster (because ROM is a 16-bit bus) diff --git a/book/src-bak/09-sram.md b/book/src-bak/09-sram.md deleted file mode 100644 index fbb6202..0000000 --- a/book/src-bak/09-sram.md +++ /dev/null @@ -1,21 +0,0 @@ -# Save RAM (SRAM) - -* **Address Span:** `0xE00_0000` to `0xE00FFFF` (64k) - -The actual amount of SRAM available depends on your game pak, and the 64k figure -is simply the maximum possible. A particular game pak might have less, and an -emulator will likely let you have all 64k if you want. - -As with other portions of the address space, SRAM has some number of wait cycles -per use. As with ROM, you can change the wait cycle settings via the `WAITCNT` -register if the defaults don't work well for your game pak. See the ROM section -for full details of how the `WAITCNT` register works. - -The game pak SRAM also has only an 8-bit bus, so have fun with that. - -The GBA Direct Memory Access (DMA) unit cannot access SRAM. - -Also, you [should not write to SRAM with code executing from -ROM](https://problemkaputt.de/gbatek.htm#gbacartbackupsramfram). Instead, you -should move the code to WRAM and execute the save code from there. We'll cover -how to handle that eventually. diff --git a/book/src-bak/gba_prng.md b/book/src-bak/gba_prng.md deleted file mode 100644 index 1ce9581..0000000 --- a/book/src-bak/gba_prng.md +++ /dev/null @@ -1,1119 +0,0 @@ -# GBA PRNG - -You often hear of the "Random Number Generator" in video games. First of all, -usually a game doesn't have access to any source of "true randomness". On a PC -you can send out a web request to [random.org](https://www.random.org/) which -uses atmospheric data, or even just [point a camera at some lava -lamps](https://blog.cloudflare.com/randomness-101-lavarand-in-production/). Even -then, the rate at which you'll want random numbers far exceeds the rate at which -those services can offer them up. So instead you'll get a pseudo-random number -generator and "seed" it with the true random data and then use that. - -However, we don't even have that! On the GBA, we can't ask any external anything -what we should do for our initial seed. So we will not only need to come up with -a few PRNG options, but we'll also need to come up with some seed source -options. More than with other options within the book, I think this is an area -where you can tailor what you do to your specific game. - -## What is a Pseudo-random Number Generator? - -For those of you who somehow read The Rust Book, plus possibly The Rustonomicon, -and then found this book, but somehow _still_ don't know what a PRNG is... Well, -I don't think there are many such people. Still, we'll define it anyway I -suppose. - -> A PRNG is any mathematical process that takes an initial input (of some fixed -> size) and then produces a series of outputs (of a possibly different size). - -So, if you seed your PRNG with a 32-bit value you might get 32-bit values out or -you might get 16-bit values out, or something like that. - -We measure the quality of a PRNG based upon: - -1) **Is the output range easy to work with?** Most PRNG techniques that you'll - find these days are already hip to the idea that we'll have the fastest - operations with numbers that match our register width and all that, so - they're usually designed around power of two inputs and power of two outputs. - Still, every once in a while you might find some page old page intended for - compatibility with the `rand()` function in the C standard library that'll - talk about something _crazy_ like having 15-bit PRNG outputs. Stupid as it - sounds, that's real. Avoid those. Whenever possible we want generators that - give us uniformly distributed `u8`, `u16`, `u32`, or whatever size value - we're producing. From there we can mold our random bits into whatever else we - need (eg: turning a `u8` into a "1d6" roll). -2) **How long does each generation cycle take?** This can be tricky for us. A - lot of the top quality PRNGs you'll find these days are oriented towards - 64-bit machines so they do a bunch of 64-bit operations. You _can_ do that on - a 32-bit machine if you have to, and the compiler will automatically "lower" - the 64-bit operation into a series of 32-bit operations. What we'd really - like to pick is something that sticks to just 32-bit operations though, since - those will be our best candidates for fast results. We can use [Compiler - Explorer](https://rust.godbolt.org/z/JyX7z-) and tell it to build for the - `thumbv6m-none-eabi` target to get a basic idea of what the ASM for a - generator looks like. That's not our exact target, but it's the closest - target that's shipped with the standard rust distribution. -3) **What is the statistical quality of the output?** This involves heavy - amounts of math. Since computers are quite good a large amounts of repeated - math you might wonder if there's programs for this already, and there are. - Many in fact. They take a generator and then run it over and over and perform - the necessary tests and report the results. I won't be explaining how to hook - our generators up to those tools, they each have their own user manuals. - However, if someone says that a generator "passes BigCrush" (the biggest - suite in TestU01) or "fails PractRand" or anything similar it's useful to - know what they're referring to. Example test suites include: - * [TestU01](https://en.wikipedia.org/wiki/TestU01) - * [PractRand](http://pracrand.sourceforge.net/) - * [Dieharder](https://webhome.phy.duke.edu/~rgb/General/dieharder.php) - * [NIST Statistical Test - Suite](https://csrc.nist.gov/projects/random-bit-generation/documentation-and-software) - -Note that if a generator is called upon to produce enough output relative to its -state size it will basically always end up failing statistical tests. This means -that any generator with 32-bit state will always fail in any of those test sets. -The theoretical _minimum_ state size for any generator at all to pass the -standard suites is 36 bits, but most generators need many more than that. - -### Generator Size - -I've mostly chosen to discuss generators that are towards the smaller end of the -state size scale. In fact we'll be going over many generators that are below the -36-bit theoretical minimum to pass all those fancy statistical tests. Why so? -Well, we don't always need the highest possible quality generators. - -"But Lokathor!", I can already hear you shouting. "I want the highest quality -randomness at all times! The game depends on it!", you cry out. - -Well... does it? Like, _really_? - -The [GBA -Pokemon](https://bulbapedia.bulbagarden.net/wiki/Pseudorandom_number_generation_in_Pok%C3%A9mon) -games use a _dead simple_ 32-bit LCG (we'll see it below). Then starting with -the DS they moved to also using Mersenne Twister, which also fails several -statistical tests and is one of the most predictable PRNGs around. [Metroid -Fusion](http://wiki.metroidconstruction.com/doku.php?id=fusion:technical:rng) -has a 100% goofy PRNG system for enemies that would definitely never pass any -sort of statistics tests at all. But like, those games were still awesome. Since -we're never going to be keeping secrets safe with our PRNG, it's okay if we -trade in some quality for something else in return (we obviously don't want to -trade quality for nothing). - -And you have to ask yourself: Where's the space used for the Metroid Fusion -PRNG? No where at all. They were already using everything involved for other -things too, so they're paying no extra cost to have the randomization they do. -How much does it cost Pokemon to throw in a 32-bit LCG? Just 4 bytes, might as -well. How much does it cost to add in a Mersenne Twister? ~2,500 bytes ya say? -I'm sorry _what on Earth_? Yeah, that sounds crazy, we're probably not doing -that one. - -### k-Dimensional Equidistribution - -So, wait, why did the Pokemon developers add in the Mersenne Twister generator? -They're smart people, surely they had a reason. Obviously we can't know for -sure, but Mersenne Twister is terrible in a lot of ways, so what's its single -best feature? Well, that gets us to a funky thing called **k-dimensional -equidistribution**. Basically, if you take a generator's output and chop it down -to get some value you want, with uniform generator output you can always get a -smaller ranged uniform result (though sometimes you will have to reject a result -and run the generator again). Imagine you have a `u32` output from your -generator. If you want a `u16` value from that you can just pick either half. If -you want a `[bool; 4]` from that you can just pick four bits. However you wanna -do it, as long as the final form of random thing we're getting needs a number of -bits _equal to or less than_ the number of bits that come out of a single -generator use, we're totally fine. - -What happens if the thing you want to make requires _more_ bits than a single -generator's output? You obviously have to run the generator more than once and -then stick two or more outputs together, duh. Except, that doesn't always work. -What I mean is that obviously you can always put two `u8` side by side to get a -`u16`, but if you start with a uniform `u8` generator and then you run it twice -and stick the results together you _don't_ always get a uniform `u16` generator. -Imagine a byte generator that just does `state+=1` and then outputs the state. -It's not good by almost any standard, but it _does give uniform output_. Then we -run it twice in a row, put the two bytes together, and suddenly a whole ton of -potential `u16` values can never be generated. That's what k-dimensional -equidistribution is all about. Every uniform output generator is 1-dimensional -equidistributed, but if you need to combine outputs and still have uniform -results then you need a higher `k` value. So why does Pokemon have Mersenne -Twister in it? Because it's got 623-dimensional equidistribution. That means -when you're combining PRNG calls for all those little IVs and Pokemon Abilities -and other things you're sure to have every potential pokemon actually be a -pokemon that the game can generate. Do you need that for most situations? -Absolutely not. Do you need it for pokemon? No, not even then, but a lot of the -hot new PRNGs have come out just within the past 10 years, so we can't fault -them too much for it. - -TLDR: 1-dimensional equidistribution just means "a normal uniform generator", -and higher k values mean "you can actually combine up to k output chains and -maintain uniformity". Generators that aren't uniform to begin with effectively -have a k value of 0. - -### Other Tricks - -Finally, some generators have other features that aren't strictly quantifiable. -Two tricks of note are "jump ahead" or "multiple streams": - -* Jump ahead lets you advance the generator's state by some enormous number of - outputs in a relatively small number of operations. -* Multi-stream generators have more than one output sequence, and then some part - of their total state space picks a "stream" rather than being part of the - actual seed, with each possible stream causing the potential output sequence - to be in a different order. - -They're normally used as a way to do multi-threaded stuff (we don't care about -that on GBA), but another interesting potential is to take one world seed and -then split off a generator for each "type" of thing you'd use PRNG for (combat, -world events, etc). This can become quite useful, where you can do things like -procedurally generate a world region, and then when they leave the region you -only need to store a single generator seed and a small amount of "delta" -information for what the player changed there that you want to save, and then -when they come back you can regenerate the region without having stored much at -all. This is the basis for how old games with limited memory like -[Starflight](https://en.wikipedia.org/wiki/Starflight) did their whole thing -(800 planets to explore on just to 5.25" floppy disks!). - -## How To Seed - -Oh I bet you thought we could somehow get through a section without learning -about yet another IO register. Ha, wishful thinking. - -There's actually not much involved. Starting at `0x400_0100` there's an array of -registers that go "data", "control", "data", "control", etc. TONC and GBATEK use -different names here, and we'll go by the TONC names because they're much -clearer: - -```rust -pub const TM0D: VolatilePtr = VolatilePtr(0x400_0100 as *mut u16); -pub const TM0CNT: VolatilePtr = VolatilePtr(0x400_0102 as *mut u16); - -pub const TM1D: VolatilePtr = VolatilePtr(0x400_0104 as *mut u16); -pub const TM1CNT: VolatilePtr = VolatilePtr(0x400_0106 as *mut u16); - -pub const TM2D: VolatilePtr = VolatilePtr(0x400_0108 as *mut u16); -pub const TM2CNT: VolatilePtr = VolatilePtr(0x400_010A as *mut u16); - -pub const TM3D: VolatilePtr = VolatilePtr(0x400_010C as *mut u16); -pub const TM3CNT: VolatilePtr = VolatilePtr(0x400_010E as *mut u16); -``` - -Basically there's 4 timers, numbered 0 to 3. Each one has a Data register and a -Control register. They're all `u16` and you can definitely _read_ from all of -them normally, but then it gets a little weird. You can also _write_ to the -Control portions normally, when you write to the Data portion of a timer that -writes the value that the timer resets to, _without changing_ its current Data -value. So if `TM0D` is paused on some value other than `5` and you write `5` to -it, when you read it back you won't get a `5`. When the next timer run starts -it'll begin counting at `5` instead of whatever value it currently reads as. - -The Data registers are just a `u16` number, no special bits to know about. - -The Control registers are also pretty simple compared to most IO registers: - -* 2 bits for the **Frequency:** 1, 64, 256, 1024. While active, the timer's - value will tick up once every `frequency` CPU cycles. On the GBA, 1 CPU cycle - is about 59.59ns (2^(-24) seconds). One display controller cycle is 280,896 - CPU cycles. -* 1 bit for **Cascade Mode:** If this is on the timer doesn't count on its own, - instead it ticks up whenever the _preceding_ timer overflows its counter (eg: - if t0 overflows, t1 will tick up if it's in cascade mode). You still have to - also enable this timer for it to do that (below). This naturally doesn't have - an effect when used with timer 0. -* 3 bits that do nothing -* 1 bit for **Interrupt:** Whenever this timer overflows it will signal an - interrupt. We still haven't gotten into interrupts yet (since you have to hand - write some ASM for that, it's annoying), but when we cover them this is how - you do them with timers. -* 1 bit to **Enable** the timer. When you disable a timer it retains the current - value, but when you enable it again the value jumps to whatever its currently - assigned default value is. - -```rust -#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] -#[repr(transparent)] -pub struct TimerControl(u16); - -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum TimerFrequency { - One = 0, - SixFour = 1, - TwoFiveSix = 2, - OneZeroTwoFour = 3, -} - -impl TimerControl { - pub fn frequency(self) -> TimerFrequency { - match self.0 & 0b11 { - 0 => TimerFrequency::One, - 1 => TimerFrequency::SixFour, - 2 => TimerFrequency::TwoFiveSix, - 3 => TimerFrequency::OneZeroTwoFour, - _ => unreachable!(), - } - } - pub fn cascade_mode(self) -> bool { - self.0 & 0b100 > 0 - } - pub fn interrupt(self) -> bool { - self.0 & 0b100_0000 > 0 - } - pub fn enabled(self) -> bool { - self.0 & 0b1000_0000 > 0 - } - // - pub fn set_frequency(&mut self, frequency: TimerFrequency) { - self.0 &= !0b11; - self.0 |= frequency as u16; - } - pub fn set_cascade_mode(&mut self, bit: bool) { - if bit { - self.0 |= 0b100; - } else { - self.0 &= !0b100; - } - } - pub fn set_interrupt(&mut self, bit: bool) { - if bit { - self.0 |= 0b100_0000; - } else { - self.0 &= !0b100_0000; - } - } - pub fn set_enabled(&mut self, bit: bool) { - if bit { - self.0 |= 0b1000_0000; - } else { - self.0 &= !0b1000_0000; - } - } -} -``` - -### A Timer Based Seed - -Okay so how do we turns some timers into a PRNG seed? Well, usually our seed is -a `u32`. So we'll take two timers, string them together with that cascade deal, -and then set them off. Then we wait until the user presses any key. We probably -do this as our first thing at startup, but we might show the title and like a -"press any key to continue" message, or something. - -```rust -/// Mucks with the settings of Timers 0 and 1. -unsafe fn u32_from_user_wait() -> u32 { - let mut t = TimerControl::default(); - t.set_enabled(true); - t.set_cascading(true); - TM1CNT.write(t.0); - t.set_cascading(false); - TM0CNT.write(t.0); - while key_input().0 == 0 {} - t.set_enabled(false); - TM0CNT.write(t.0); - TM1CNT.write(t.0); - let low = TM0D.read() as u32; - let high = TM1D.read() as u32; - (high << 32) | low -} -``` - -## Various Generators - -### SM64 (16-bit state, 16-bit output, non-uniform, bonkers) - -Our first PRNG to mention isn't one that's at all good, but it sure might be -cute to use. It's the PRNG that Super Mario 64 had ([video explanation, -long](https://www.youtube.com/watch?v=MiuLeTE2MeQ)). - -With a PRNG this simple the output of one call is _also_ the seed to the next -call, so we don't need to make a struct for it or anything. You're also assumed -to just seed with a plain 0 value at startup. The generator has a painfully -small period, and you're assumed to be looping through the state space -constantly while the RNG goes. - -```rust -pub fn sm64(mut input: u16) -> u16 { - if input == 0x560A { - input = 0; - } - let mut s0 = input << 8; - s0 ^= input; - input = s0.rotate_left(8); - s0 = ((s0 as u8) << 1) as u16 ^ input; - let s1 = (s0 >> 1) ^ 0xFF80; - if (s0 & 1) == 0 { - if s1 == 0xAA55 { - input = 0; - } else { - input = s1 ^ 0x1FF4; - } - } else { - input = s1 ^ 0x8180; - } - input -} -``` - -[Compiler Explorer](https://rust.godbolt.org/z/1F6P8L) - -If you watch the video explanation about this generator you'll note that the -first `if` checking for `0x560A` prevents you from being locked into a 2-step -cycle, but it's only important if you want to feed bad seeds to the generator. A -bad seed is unhelpfully defined defined as "any value that the generator can't -output". The second `if` that checks for `0xAA55` doesn't seem to be important -at all from a mathematical perspective. It cuts the generator's period shorter -by an arbitrary amount for no known reason. It's left in there only for -authenticity. - -### LCG32 (32-bit state, 32-bit output, uniform) - -The [Linear Congruential -Generator](https://en.wikipedia.org/wiki/Linear_congruential_generator) is a -well known PRNG family. You pick a multiplier and an additive and you're done. -Right? Well, not exactly, because (as the wikipedia article explains) the values -that you pick can easily make your LCG better or worse all on its own. You want -a good multiplier, and you want your additive to be odd. In our example here -we've got the values that -[Bulbapedia](https://bulbapedia.bulbagarden.net/wiki/Pseudorandom_number_generation_in_Pok%C3%A9mon) -says were used in the actual GBA Pokemon games, though Bulbapedia also lists -values for a few other other games as well. - -I don't actually know if _any_ of the constants used in the official games are -particularly good from a statistical viewpoint, though with only 32 bits an LCG -isn't gonna be passing any of the major statistical tests anyway (you need way -more bits in your LCG for that to happen). In my mind the main reason to use a -plain LCG like this is just for the fun of using the same PRNG that an official -Pokemon game did. - -You should _not_ use this as your default generator if you care about quality. - -It is _very_ fast though... if you want to set everything else on fire for -speed. If you do, please _at least_ remember that the highest bits are the best -ones, so if you're after less than 32 bits you should shift the high ones down -and keep those, or if you want to turn it into a `bool` cast to `i32` and then -check if it's negative, etc. - -```rust -pub fn lcg32(seed: u32) -> u32 { - seed.wrapping_mul(0x41C6_4E6D).wrapping_add(0x6073) -} -``` - -[Compiler Explorer](https://rust.godbolt.org/z/k5n_jJ) - -#### Multi-stream Generators - -Note that you don't have to add a compile time constant, you could add a runtime -value instead. Doing so allows the generator to be "multi-stream", with each -different additive value being its own unique output stream. This true of LCGs -as well as all the PCGs below (since they're LCG based). The examples here just -use a fixed stream for simplicity and to save space, but if you want streams you -can add that in for only a small amount of extra space used: - -```rust -pub fn lcg_streaming(seed: u32, stream: u32) -> u32 { - seed.wrapping_mul(0x41C6_4E6D).wrapping_add(stream) -} -``` - -With a streaming LCG you should pass the same stream value every single time. If -you don't, then your generator will jump between streams in some crazy way and -you lose your nice uniformity properties. - -There is the possibility of intentionally changing the stream value exactly when -the seed lands on a pre-determined value (after the multiply and add). This -_basically_ makes the stream selection value's bit size (minus one bit, because -it must be odd) count into the LCG's state bit size for calculating the overall -period of the generator. So an LCG32 with a 32-bit stream selection would have a -period of 2^32 * 2^31 = 2^63. - -```rust -let next_seed = lcg_streaming(seed, stream); -// It's cheapest to test for 0, so we pick 0 -if seed == 0 { - stream = stream.wrapping_add(2) -} -``` - -However, this isn't a particularly effective way to extend the generator's -period, and we'll see a much better extension technique below. - -### PCG16 XSH-RS (32-bit state, 16-bit output, uniform) - -The [Permuted Congruential -Generator](https://en.wikipedia.org/wiki/Permuted_congruential_generator) family -is the next step in LCG technology. We start with LCG output, which is good but -not great, and then we apply one of several possible permutations to bump up the -quality. There's basically a bunch of permutation components that are each -defined in terms of the bit width that you're working with. - -The "default" variant of PCG, PCG32, has 64 bits of state and 32 bits of output, -and it uses the "XSH-RR" permutation. Here we'll put together a 32 bit version -with 16-bit output, and using the "XSH-RS" permutation (but we'll show the other -one too for comparison). - -Of course, since PCG is based on a LCG, we have to start with a good LCG base. -As I said above, a better or worse set of LCG constants can make your generator -better or worse. The Wikipedia example for PCG has a good 64-bit constant, but -not a 32-bit constant. So we gotta [ask an -expert](http://www.ams.org/journals/mcom/1999-68-225/S0025-5718-99-00996-5/S0025-5718-99-00996-5.pdf) -about what a good 32-bit constant would be. I'm definitely not the best at -reading math papers, but it seems that the general idea is that we want `m % 8 -== 5` and `is_even(a)` to both hold for the values we pick. There are three -suggested LCG multipliers in a chart on page 10. A chart that's quite hard to -understand. Truth be told I asked several folks that are good at math papers and -even they couldn't make sense of the chart. Eventually `timutable` read the -whole paper in depth and concluded the same as I did: that we probably want to -pick the `32310901` option. - -For an additive value, we can pick any odd value, so we might as well pick -something small so that we can do an immediate add. _Immediate_ add? That sounds -new. An immediate instruction is when one side of an operation is small enough -that you can encode the value directly into the space that'd normally be for the -register you want to use. It basically means one less load you have to do, if -you're working with small enough numbers. To see what I mean compare [loading -the add value](https://rust.godbolt.org/z/LKCFUS) and [immediate add -value](https://rust.godbolt.org/z/SnZW9a). It's something you might have seen -frequently in `x86` or `x86_64` ASM output, but because a thumb instruction is -only 16 bits total, we can only get immediate instructions if the target value -is 8 bits or less, so we haven't used them too much ourselves yet. - -I guess we'll pick 5, because I happen to personally like the number. - -```rust -// Demo only. The "default" PCG permutation, for use when rotate is cheaper -pub fn pcg16_xsh_rr(seed: &mut u32) -> u16 { - *seed = seed.wrapping_mul(32310901).wrapping_add(5); - const INPUT_SIZE: u32 = 32; - const OUTPUT_SIZE: u32 = 16; - const ROTATE_BITS: u32 = 4; - let mut out32 = *seed; - let rot = out32 >> (INPUT_SIZE - ROTATE_BITS); - out32 ^= out32 >> ((OUTPUT_SIZE + ROTATE_BITS) / 2); - ((out32 >> (OUTPUT_SIZE - ROTATE_BITS)) as u16).rotate_right(rot) -} - -// This has slightly worse statistics but runs much better on the GBA -pub fn pcg16_xsh_rs(seed: &mut u32) -> u16 { - *seed = seed.wrapping_mul(32310901).wrapping_add(5); - const INPUT_SIZE: u32 = 32; - const OUTPUT_SIZE: u32 = 16; - const SHIFT_BITS: u32 = 2; - const NEXT_MOST_BITS: u32 = 19; - let mut out32 = *seed; - let shift = out32 >> (INPUT_SIZE - SHIFT_BITS); - out32 ^= out32 >> ((OUTPUT_SIZE + SHIFT_BITS) / 2); - (out32 >> (NEXT_MOST_BITS + shift)) as u16 -} -``` - -[Compiler Explorer](https://rust.godbolt.org/z/NtJAwS) - -### PCG32 RXS-M-XS (32-bit state, 32-bit output, uniform) - -Having the output be smaller than the input is great because you can keep just -the best quality bits that the LCG stage puts out, and you basically get 1 point -of dimensional equidistribution for each bit you discard as the size goes down -(so 32->16 gives 16). However, if your output size _has_ to the the same as your -input size, the PCG family is still up to the task. - -```rust -pub fn pcg32_rxs_m_xs(seed: &mut u32) -> u32 { - *seed = seed.wrapping_mul(32310901).wrapping_add(5); - let mut out32 = *seed; - let rxs = out32 >> 28; - out32 ^= out32 >> (4 + rxs); - const PURE_MAGIC: u32 = 277803737; - out32 *= PURE_MAGIC; - out32^ (out32 >> 22) -} -``` - -[Compiler Explorer](https://rust.godbolt.org/z/j3KPId) - -This permutation is the slowest but gives the strongest statistical benefits. If -you're going to be keeping 100% of the output bits you want the added strength -obviously. However, the period isn't actually any longer, so each output will be -given only once within the full period (1-dimensional equidistribution). - -### PCG Extension Array - -As a general improvement to any PCG you can hook on an "extension array" to give -yourself a longer period. It's all described in the [PCG -Paper](http://www.pcg-random.org/paper.html), but here's the bullet points: - -* In addition to your generator's state (and possible stream) you keep an array - of "extension" values. The array _type_ is the same as your output type, and - the array _count_ must be a power of two value that's less than the maximum - value of your state size. -* When you run the generator, use the _lowest_ bits to select from your - extension array according to the array's power of two. Eg: if the size is 2 - then use the single lowest bit, if it's 4 then use the lowest 2 bits, etc. -* Every time you run the generator, XOR the output with the selected value from - the array. -* Every time the generator state lands on 0, cycle the array. We want to be - careful with what we mean here by "cycle". We want the _entire_ pattern of - possible array bits to occur eventually. However, we obviously can't do - arbitrary adds for as many bits as we like, so we'll have to "carry the 1" - between the portions of the array by hand. - -Here's an example using an 8 slot array and `pcg16_xsh_rs`: - -```rust -// uses pcg16_xsh_rs from above - -pub struct PCG16Ext8 { - state: u32, - ext: [u16; 8], -} - -impl PCG16Ext8 { - pub fn next_u16(&mut self) -> u16 { - // PCG as normal. - let mut out = pcg16_xsh_rs(&mut self.state); - // XOR with a selected extension array value - out ^= unsafe { self.ext.get_unchecked((self.state & !0b111) as usize) }; - // if state == 0 we cycle the array with a series of overflowing adds - if self.state == 0 { - let mut carry = true; - let mut index = 0; - while carry && index < self.ext.len() { - let (add_output, next_carry) = self.ext[index].overflowing_add(1); - self.ext[index] = add_output; - carry = next_carry; - index += 1; - } - } - out - } -} -``` - -[Compiler Explorer](https://rust.godbolt.org/z/HTxoHY) - -The period gained from using an extension array is quite impressive. For a b-bit -generator giving r-bit outputs, and k array slots, the period goes from 2^b to -2^(k*r+b). So our 2^32 period generator has been extended to 2^160. - -Of course, we might care to seed the array itself so that it's not all 0 bits -all the way though, but that's not strictly necessary. All 0s is a legitimate -part of the extension cycle, so we have to pass through it at some point. - -### Xoshiro128** (128-bit state, 32-bit output, non-uniform) - -The [Xoshiro128**](http://xoshiro.di.unimi.it/xoshiro128starstar.c) generator is -an advancement of the [Xorshift family](https://en.wikipedia.org/wiki/Xorshift). -It was specifically requested, and I'm not aware of Xorshift specifically being -used in any of my favorite games, so instead of going over Xorshift and then -leading up to this, we'll just jump straight to this. Take care not to confuse -this generator with the very similarly named -[Xoroshiro128**](http://xoshiro.di.unimi.it/xoroshiro128starstar.c) generator, -which is the 64 bit variant. Note the extra "ro" hiding in the 64-bit version's -name near the start. - -Anyway, weird names aside, it's fairly zippy. The biggest downside is that you -can't have a seed state that's all 0s, and as a result 0 will be produced one -less time than all other outputs within a full cycle, making it non-uniform by -just a little bit. You also can't do a simple stream selection like with the LCG -based generators, instead it has a fixed jump function that advances a seed as -if you'd done 2^64 normal generator advancements. - -Note that `Xoshiro256**` is known to fail statistical tests, so the 128 version -is unlikely to pass them, though I admit that I didn't check myself. - -```rust -pub fn xoshiro128_starstar(seed: &mut [u32; 4]) -> u32 { - let output = seed[0].wrapping_mul(5).rotate_left(7).wrapping_mul(9); - let t = seed[1] << 9; - - seed[2] ^= seed[0]; - seed[3] ^= seed[1]; - seed[1] ^= seed[2]; - seed[0] ^= seed[3]; - - seed[2] ^= t; - - seed[3] = seed[3].rotate_left(11); - - output -} - -pub fn xoshiro128_starstar_jump(seed: &mut [u32; 4]) { - const JUMP: [u32; 4] = [0x8764000b, 0xf542d2d3, 0x6fa035c3, 0x77f2db5b]; - let mut s0 = 0; - let mut s1 = 0; - let mut s2 = 0; - let mut s3 = 0; - for j in JUMP.iter() { - for b in 0 .. 32 { - if *j & (1 << b) > 0 { - s0 ^= seed[0]; - s1 ^= seed[1]; - s2 ^= seed[2]; - s3 ^= seed[3]; - } - xoshiro128_starstar(seed); - } - } - seed[0] = s0; - seed[1] = s1; - seed[2] = s2; - seed[3] = s3; -} -``` - -[Compiler Explorer](https://rust.godbolt.org/z/PGvwZw) - -### jsf32 (128-bit state, 32-bit output, non-uniform) - -This is Bob Jenkins's [Small/Fast PRNG](small noncryptographic PRNG). It's a -little faster than `Xoshiro128**` (no multiplication involved), and can pass any -statistical test that's been thrown at it. - -Interestingly the generator's period is _not_ fixed based on the generator -overall. It's actually set by the exact internal generator state. There's even -six possible internal generator states where the generator becomes a fixed -point. Because of this, we should use the verified seeding method provided. -Using the provided seeding, the minimum period is expected to be 2^94, the -average is about 2^126, and no seed given to the generator is likely to overlap -with another seed's output for at least 2^64 uses. - -```rust -pub struct JSF32 { - a: u32, - b: u32, - c: u32, - d: u32, -} - -impl JSF32 { - pub fn new(seed: u32) -> Self { - let mut output = JSF32 { - a: 0xf1ea5eed, - b: seed, - c: seed, - d: seed - }; - for _ in 0 .. 20 { - output.next(); - } - output - } - - pub fn next(&mut self) -> u32 { - let e = self.a - self.b.rotate_left(27); - self.a = self.b ^ self.c.rotate_left(17); - self.b = self.c + self.d; - self.c = self.d + e; - self.d = e + self.a; - self.d - } -} -``` - -[Compiler Explorer](https://rust.godbolt.org/z/qO3obQ) - -Here it's presented with (27,17), but you can also use any of the following if -you want alternative generator flavors that use this same core technique: - -* (9,16), (9,24), (10,16), (10,24), (11,16), (11,24), (25,8), (25,16), (26,8), - (26,16), (26,17), or (27,16). - -Note that these alternate flavors haven't had as much testing as the (27,17) -version, though they are likely to be just as good. - -### Other Generators? - -* [Mersenne Twister](https://en.wikipedia.org/wiki/Mersenne_Twister): Gosh, 2.5k - is just way too many for me to ever want to use this thing. If you'd really - like to use it, there is [a - crate](https://docs.rs/mersenne_twister/1.1.1/mersenne_twister/) for it that - already has it. Small catch, they use a ton of stuff from `std` that they - could be importing from `core`, so you'll have to fork it and patch it - yourself to get it working on the GBA. They also stupidly depend on an old - version of `rand`, so you'll have to cut out that nonsense. - -## Placing a Value In Range - -I said earlier that you can always take a uniform output and then throw out some -bits, and possibly the whole result, to reduce it down into a smaller range. How -exactly does one do that? Well it turns out that it's [very -tricky](http://www.pcg-random.org/posts/bounded-rands.html) to get right, and we -could be losing as much as 60% of our execution time if we don't do it carefully. - -The _best_ possible case is if you can cleanly take a specific number of bits -out of your result without even doing any branching. The rest can be discarded -or kept for another step as you choose. I know that I keep referencing Pokemon, -but it's a very good example for the use of randomization. Each pokemon has, -among many values, a thing called an "IV" for each of 6 stats. The IVs range -from 0 to 31, which is total nonsense to anyone not familiar with decimal/binary -conversions, but to us programmers that's clearly a 5 bit range. Rather than -making math that's better for people using decimal (such as a 1-20 range or -something like that) they went with what's easiest for the computer. - -The _next_ best case is if you can have a designated range that you want to -generate within that's known at compile time. This at least gives us a chance to -write some bit of extremely specialized code that can take random bits and get -them into range. Hopefully your range can be "close enough" to a binary range -that you can get things into place. Example: if you want a "1d6" result then you -can generate a `u16`, look at just 3 bits (`0..8`), and if they're in the range -you're after you're good. If not you can discard those and look at the next 3 -bits. We started with 16 of them, so you get five chances before you have to run -the generator again entirely. - -The goal here is to avoid having to do one of the worst things possible in -computing: _divmod_. It's terribly expensive, even on a modern computer it's -about 10x as expensive as any other arithmetic, and on a GBA it's even worse for -us. We have to call into the BIOS to have it do a software division. Calling -into the BIOS at all is about a 60 cycle overhead (for comparison, a normal -function call is more like 30 cycles of overhead), _plus_ the time it takes to -do the math itself. Remember earlier how we were happy to have a savings of 5 -instructions here or there? Compared to this, all our previous efforts are -basically useless if we can't evade having to do a divmod. You can do quite a -bit of `if` checking and potential additional generator calls before it exceeds -the cost of having to do even a single divmod. - -### Calling The BIOS - -How do we do the actual divmod when we're forced to? Easy: [inline -assembly](https://doc.rust-lang.org/unstable-book/language-features/asm.html) of -course (There's also an [ARM -oriented](http://embed.rs/articles/2016/arm-inline-assembly-rust/) blog post -about it that I found most helpful). The GBA has many [BIOS -Functions](http://problemkaputt.de/gbatek.htm#biosfunctions), each of which has -a designated number. We use the -[swi](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0068b/BABFCEEG.html) -op (short for "SoftWare Interrupt") combined with the BIOS function number that -we want performed. Our code halts, some setup happens (hence that 60 cycles of -overhead I mentioned), the BIOS does its thing, and then eventually control -returns to us. - -The precise details of what the BIOS call does depends on the function number -that we call. The numerator goes into register 0, and the denominator goes into -register 1, the divmod happens, and then the division output is left in register -0 and the modulus output is left in register 1. I keep calling it "divmod" -because div and modulus are two sides of the same coin. There's no way to do one -of them faster by not doing the other or anything like that, so we'll first -define it as a unified function that returns a tuple: - -```rust -#![feature(asm)] -// put the above at the top of any program and/or library that uses inline asm - -pub fn div_modulus(numerator: i32, denominator: i32) -> (i32, i32) { - assert!(denominator != 0); - { - let div_out: i32; - let mod_out: i32; - unsafe { - asm!( - // Assembly template - "swi 0x06", - // in+output registers - inout("r0") numerator => div_out, - inout("r0") denominator => mod_out, - // Clobber (not part of in/output but used by the operation) - out("r3") _, - // Additional compiler optimization options. See for details: - // https://github.com/Amanieu/rfcs/blob/inline-asm/text/0000-inline-asm.md#options-1 - options(nostack, nomem), - ); - } - (div_out, mod_out) - } -} -``` - -And next, since most of the time we really do want just the `div` or `modulus` -without having to explicitly throw out the other half, we also define -intermediary functions to unpack the correct values. - -```rust -pub fn div(numerator: i32, denominator: i32) -> i32 { - div_modulus(numerator, denominator).0 -} - -pub fn modulus(numerator: i32, denominator: i32) -> i32 { - div_modulus(numerator, denominator).1 -} -``` - -We can generally trust the compiler to inline single line functions correctly -even without an `#[inline]` directive when it's not going cross-crate or when -LTO is on. I'd point you to some exact output from the Compiler Explorer, but at -the time of writing their nightly compiler is broken, and you can only use -inline asm with a nightly compiler. Unfortunate. Hopefully they'll fix it soon -and I can come back to this section with some links. - -### Finally Those Random Ranges We Mentioned - -Of course, now that we can do divmod if we need to, let's get back to random -numbers in ranges that aren't exact powers of two. - -yada yada yada, if you just use `x % n` to place `x` into the range `0..n` then -you'll turn an unbiased value into a biased value (or you'll turn a biased value -into an arbitrarily _more_ biased value). You should never do this, etc etc. - -So what's a good way to get unbiased outputs? We're going to be adapting some -CPP code from that that I first hinted at way up above. It's specifically all -about the various ways you can go about getting unbiased random results for -various bounds. There's actually many different methods offered, and for -specific situations there's sometimes different winners for speed. The best -overall performer looks like this: - -```cpp -uint32_t bounded_rand(rng_t& rng, uint32_t range) { - uint32_t x = rng(); - uint64_t m = uint64_t(x) * uint64_t(range); - uint32_t l = uint32_t(m); - if (l < range) { - uint32_t t = -range; - if (t >= range) { - t -= range; - if (t >= range) - t %= range; - } - while (l < t) { - x = rng(); - m = uint64_t(x) * uint64_t(range); - l = uint32_t(m); - } - } - return m >> 32; -} -``` - -And, wow, I sure don't know what a lot of that means (well, I do, but let's -pretend I don't for dramatic effect, don't tell anyone). Let's try to pick it -apart some. - -First, all the `uint32_t` and `uint64_t` are C nonsense names for what we just -call `u32` and `u64`. You probably guessed that on your own. - -Next, `rng_t& rng` is more properly written as `rng: &rng_t`. Though, here -there's a catch: as you can see we're calling `rng` within the function, so in -rust we'd need to declare it as `rng: &mut rng_t`, because C++ doesn't track -mutability the same as we do (barbaric, I know). - -Finally, what's `rng_t` actually defined as? Well, I sure don't know, but in our -context it's taking nothing and then spitting out a `u32`. We'll also presume -that it's a different `u32` each time (not a huge leap in this context). To us -rust programmers that means we'd want something like `impl FnMut() -> u32`. - -```rust -pub fn bounded_rand(rng: &mut impl FnMut() -> u32, range: u32) -> u32 { - let mut x: u32 = rng(); - let mut m: u64 = x as u64 * range as u64; - let mut l: u32 = m as u32; - if l < range { - let mut t: u32 = range.wrapping_neg(); - if t >= range { - t -= range; - if t >= range { - t = modulus(t, range); - } - } - while l < t { - x = rng(); - m = x as u64 * range as u64; - l = m as u32; - } - } - (m >> 32) as u32 -} -``` - -So, now we can read it. Can we compile it? No, actually. Turns out we can't. -Remember how our `modulus` function is `(i32, i32) -> i32`? Here we're doing -`(u32, u32) -> u32`. You can't just cast, modulus, and cast back. You'll get -totally wrong results most of the time because of sign-bit stuff. Since it's -fairly probable that `range` fits in a positive `i32`, its negation must -necessarily be a negative value, which triggers exactly the bad situation where -casting around gives us the wrong results. - -Well, that's not the worst thing in the world either, since we also didn't -really wanna be doing those 64-bit multiplies. Let's try again with everything -scaled down one stage: - -```rust -pub fn bounded_rand16(rng: &mut impl FnMut() -> u16, range: u16) -> u16 { - let mut x: u16 = rng(); - let mut m: u32 = x as u32 * range as u32; - let mut l: u16 = m as u16; - if l < range { - let mut t: u16 = range.wrapping_neg(); - if t >= range { - t -= range; - if t >= range { - t = modulus(t as i32, range as i32) as u16; - } - } - while l < t { - x = rng(); - m = x as u32 * range as u32; - l = m as u16; - } - } - (m >> 16) as u16 -} -``` - -Okay, so the code compiles, _and_ it plays nicely what the known limits of the -various number types involved. We know that if we cast a `u16` up into `i32` -it's assured to fit properly and also be positive, and the output is assured to -be smaller than the input so it'll fit when we cast it back down to `u16`. -What's even happening though? Well, this is a variation on [Lemire's -method](https://arxiv.org/abs/1805.10941). One of the biggest attempts at a -speedup here is that when you have - -```rust -a %= b; -``` - -You can translate that into - -```rust -if a >= b { - a -= b; - if a >= b { - a %= b; - } -} -``` - -Now... if we're being real with ourselves, let's just think about this for a -moment. How often will this help us? I genuinely don't know. But I do know how -to find out: we write a program to just [enumerate all possible -cases](https://play.rust-lang.org/?version=stable&mode=release&edition=2015&gist=48b36f8c9f6a3284c0bc65366a4fab47) -and run the code. You can't always do this, but there's not many possible `u16` -values. The output is this: - -``` -skip_all:32767 -sub_worked:10923 -had_to_modulus:21846 -Some skips: -32769 -32770 -32771 -32772 -32773 -Some subs: -21846 -21847 -21848 -21849 -21850 -Some mods: -0 -1 -2 -3 -4 -``` - -So, about half the time, we're able to skip all our work, and about a sixth of -the time we're able to solve it with just the subtract, with the other third of -the time we have to do the mod. However, what I personally care about the most -is smaller ranges, and we can see that we'll have to do the mod if our target -range size is in `0..21846`, and just the subtract if our target range size is -in `21846..32769`, and we can only skip all work if our range size is `32769` -and above. So that's not cool. - -But what _is_ cool is that we're doing the modulus only once, and the rest of -the time we've just got the cheap operations. Sounds like we can maybe try to -cache that work and reuse a range of some particular size. We can also get that -going pretty easily. - -```rust -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub struct RandRangeU16 { - range: u16, - threshold: u16, -} - -impl RandRangeU16 { - pub fn new(mut range: u16) -> Self { - let mut threshold = range.wrapping_neg(); - if threshold >= range { - threshold -= range; - if threshold >= range { - threshold = modulus(threshold as i32, range as i32) as u16; - } - } - RandRangeU16 { range, threshold } - } - - pub fn roll_random(&self, rng: &mut impl FnMut() -> u16) -> u16 { - let mut x: u16 = rng(); - let mut m: u32 = x as u32 * self.range as u32; - let mut l: u16 = m as u16; - if l < self.range { - while l < self.threshold { - x = rng(); - m = x as u32 * self.range as u32; - l = m as u16; - } - } - (m >> 16) as u16 - } -} -``` - -What if you really want to use ranges bigger than `u16`? Well, that's possible, -but we'd want a whole new technique. Preferably one that didn't do divmod at -all, to avoid any nastiness with sign bit nonsense. Thankfully there is one such -method listed in the blog post, "Bitmask with Rejection (Unbiased)" - -```cpp -uint32_t bounded_rand(rng_t& rng, uint32_t range) { - uint32_t mask = ~uint32_t(0); - --range; - mask >>= __builtin_clz(range|1); - uint32_t x; - do { - x = rng() & mask; - } while (x > range); - return x; -} -``` - -And in Rust - -```rust -pub fn bounded_rand32(rng: &mut impl FnMut() -> u32, mut range: u32) -> u32 { - let mut mask: u32 = !0; - range -= 1; - mask >>= (range | 1).leading_zeros(); - let mut x = rng() & mask; - while x > range { - x = rng() & mask; - } - x -} -``` - -Wow, that's so much less code. What the heck? Less code is _supposed_ to be the -faster version, why is this rated slower? Basically, because of how the math -works out on how often you have to run the PRNG again and stuff, Lemire's method -_usually_ better with smaller ranges and the masking method _usually_ works -better with larger ranges. If your target range fits in a `u8`, probably use -Lemire's. If it's bigger than `u8`, or if you need to do it just once and can't -benefit from the cached modulus, you might want to start moving toward the -masking version at some point in there. Obviously if your target range is more -than a `u16` then you have to use the masking method. The fact that they're each -oriented towards different size generator outputs only makes things more -complicated. - -Life just be that way, I guess. - -## Summary Table - -That was a whole lot. Let's put them in a table: - -| Generator | Bytes | Output | Period | k-Dim | -|:---------------|:-----:|:------:|:------:|:-----:| -| sm64 | 2 | u16 | 65,114 | 0 | -| lcg32 | 4 | u16 | 2^32 | 1 | -| pcg16_xsh_rs | 4 | u16 | 2^32 | 1 | -| pcg32_rxs_m_xs | 4 | u32 | 2^32 | 1 | -| PCG16Ext8 | 20 | u16 | 2^160 | 8 | -| xoshiro128** | 16 | u32 | 2^128-1| 0 | -| jsf32 | 16 | u32 | ~2^126 | 0 | diff --git a/book/src-bak/index.md b/book/src-bak/index.md deleted file mode 100644 index a1840b0..0000000 --- a/book/src-bak/index.md +++ /dev/null @@ -1,52 +0,0 @@ -# Ch 3: Memory and Objects - -Alright so we can do some basic "movement", but we left a big trail in the video -memory of everywhere we went. Most of the time that's not what we want at all. -If we want more hardware support we're going to have to use a new video mode. So -far we've only used Mode 3, but modes 4 and 5 are basically the same. Instead, -we'll switch focus to using a tiled graphical mode. - -First we will go over the complete GBA memory mapping. Part of this is the -memory for tiled graphics, but also things like all those IO registers, where -our RAM is for scratch space, all that stuff. Even if we can't put all of them -to use at once, it's helpful to have an idea of what will be available in the -long run. - -Tiled modes bring us three big new concepts that each have their own complexity: -tiles, backgrounds, and objects. Backgrounds and objects both use tiles, but the -background is for creating a very large static space that you can scroll around -the view within, and the objects are about having a few moving bits that appear -over the background. Careful use of backgrounds and objects is key to having the -best looking GBA game, so we won't even be able to cover it all in a single -chapter. - -And, of course, since most games are pretty boring if they're totally static -we'll touch on the kinds of RNG implementations you might want to have on a GBA. -Most general purpose RNGs that you find are rather big compared to the amount of -memory we want to give them, and they often use a lot of `u64` operations, so -they end up much slower on a 32-bit machine like the GBA (you can lower 64-bit -ops to combinations of 32-bit ops, but that's quite a bit more work). We'll -cover a few RNG options that size down the RNG to a good size and a good speed -without trading away too much in terms of quality. - -To top it all off, we'll make a simple "memory game" sort of thing. There's some -face down cards in a grid, you pick one to check, then you pick the other to -check, and then if they match the pair disappears. - -## Drawing Priority - -Both backgrounds and objects can have "priority" values associated with them. -TONC and GBATEK have _opposite_ ideas of what it means to have the "highest" -priority. TONC goes by highest numerical value, and GBATEK goes by what's on the -z-layer closest to the user. Let's list out the rules as clearly as we can: - -* Priority is always two bits, so 0 through 3. -* Priority conceptually proceeds in drawing passes that count _down_, so any - priority 3 things can get covered up by priority 2 things. In truth there's - probably depth testing and buffering stuff going on so it's all one single - pass, but conceptually we will imagine it happening as all of the 3 elements, - then all of 2, and so on. -* Objects always draw over top of backgrounds of equal priority. -* Within things of the same type and priority, the lower numbered element "wins" - and gets its pixel drawn (bg0 is favored over bg1, obj0 is favored over obj1, - etc). diff --git a/book/src-bak/io_registers.md b/book/src-bak/io_registers.md deleted file mode 100644 index 890e7a7..0000000 --- a/book/src-bak/io_registers.md +++ /dev/null @@ -1,33 +0,0 @@ -# IO Registers - -The GBA has a large number of **IO Registers** (not to be confused with CPU -registers). These are special memory locations from `0x04000000` to -`0x040003FE`. GBATEK has a [full -list](http://problemkaputt.de/gbatek.htm#gbaiomap), but we only need to learn -about a few of them at a time as we go, so don't be worried. - -The important facts to know about IO Registers are these: - -* Each has their own specific size. Most are `u16`, but some are `u32`. -* All of them must be accessed in a `volatile` style. -* Each register is specifically readable or writable or both. Actually, with - some registers there are even individual bits that are read-only or - write-only. - * If you write to a read-only position, those writes are simply ignored. This - mostly matters if a writable register contains a read-only bit (such as the - Display Control, next section). - * If you read from a write-only position, you get back values that are - [basically - nonsense](http://problemkaputt.de/gbatek.htm#gbaunpredictablethings). There - aren't really any registers that mix writable bits with read only bits, so - you're basically safe here. The only (mild) concern is that when you write a - value into a write-only register you need to keep track of what you wrote - somewhere else if you want to know what you wrote (such to adjust an offset - value by +1, or whatever). - * You can always check GBATEK to be sure, but if I don't mention it then a bit - is probably both read and write. -* Some registers have invalid bit patterns. For example, the lowest three bits - of the Display Control register can't legally be set to the values 6 or 7. - -When talking about bit positions, the numbers are _zero indexed_ just like an -array index is. diff --git a/book/src-bak/light_cycle.md b/book/src-bak/light_cycle.md deleted file mode 100644 index 3044b71..0000000 --- a/book/src-bak/light_cycle.md +++ /dev/null @@ -1,135 +0,0 @@ -# light_cycle - -Now let's make a game of "light_cycle" with our new knowledge. - -## Gameplay - -`light_cycle` is pretty simple, and very obvious if you've ever seen Tron. The -player moves around the screen with a trail left behind them. They die if they -go off the screen or if they touch their own trail. - -## Operations - -We need some better drawing operations this time around. - -```rust -pub unsafe fn mode3_clear_screen(color: u16) { - let color = color as u32; - let bulk_color = color << 16 | color; - let mut ptr = VolatilePtr(VRAM as *mut u32); - for _ in 0..SCREEN_HEIGHT { - for _ in 0..(SCREEN_WIDTH / 2) { - ptr.write(bulk_color); - ptr = ptr.offset(1); - } - } -} - -pub unsafe fn mode3_draw_pixel(col: isize, row: isize, color: u16) { - VolatilePtr(VRAM as *mut u16).offset(col + row * SCREEN_WIDTH).write(color); -} - -pub unsafe fn mode3_read_pixel(col: isize, row: isize) -> u16 { - VolatilePtr(VRAM as *mut u16).offset(col + row * SCREEN_WIDTH).read() -} -``` - -The draw pixel and read pixel are both pretty obvious. What's new is the clear -screen operation. It changes the `u16` color into a `u32` and then packs the -value in twice. Then we write out `u32` values the whole way through screen -memory. This means we have to do less write operations overall, and so the -screen clear is twice as fast. - -Now we just have to fill in the main function: - -```rust -#[start] -fn main(_argc: isize, _argv: *const *const u8) -> isize { - unsafe { - DISPCNT.write(MODE3 | BG2); - } - - let mut px = SCREEN_WIDTH / 2; - let mut py = SCREEN_HEIGHT / 2; - let mut color = rgb16(31, 0, 0); - - loop { - // read the input for this frame - let this_frame_keys = key_input(); - - // adjust game state and wait for vblank - px += 2 * this_frame_keys.column_direction() as isize; - py += 2 * this_frame_keys.row_direction() as isize; - wait_until_vblank(); - - // draw the new game and wait until the next frame starts. - unsafe { - if px < 0 || py < 0 || px == SCREEN_WIDTH || py == SCREEN_HEIGHT { - // out of bounds, reset the screen and position. - mode3_clear_screen(0); - color = color.rotate_left(5); - px = SCREEN_WIDTH / 2; - py = SCREEN_HEIGHT / 2; - } else { - let color_here = mode3_read_pixel(px, py); - if color_here != 0 { - // crashed into our own line, reset the screen - mode3_clear_screen(0); - color = color.rotate_left(5); - } else { - // draw the new part of the line - mode3_draw_pixel(px, py, color); - mode3_draw_pixel(px, py + 1, color); - mode3_draw_pixel(px + 1, py, color); - mode3_draw_pixel(px + 1, py + 1, color); - } - } - } - wait_until_vdraw(); - } -} -``` - -Oh that's a lot more than before! - -First we set Mode 3 and Background 2, we know about that. - -Then we're going to store the player's x and y, along with a color value for -their light cycle. Then we enter the core loop. - -We read the keys for input, and then do as much as we can without touching video -memory. Since we're using video memory as the place to store the player's light -trail, we can't do much, we just update their position and wait for VBlank to -start. The player will be a 2x2 square, so the arrows will move you 2 pixels per -frame. - -Once we're in VBlank we check to see what kind of drawing we're doing. If the -player has gone out of bounds, we clear the screen, rotate their color, and then -reset their position. Why rotate the color? Just because it's fun to have -different colors. - -Next, if the player is in bounds we read the video memory for their position. If -it's not black that means we've been here before and the player has crashed into -their own line. In this case, we reset the game without moving them to a new -location. - -Finally, if the player is in bounds and they haven't crashed, we write their -color into memory at this position. - -Regardless of how it worked out, we hold here until vdraw starts before going to -the next loop. That's all there is to it. - -## The gba crate doesn't quite work like this - -Once again, as with the `hello1` and `hello2` examples, the `gba` crate covers -much of this same ground as our example here, but in slightly different ways. - -Better organization and abstractions are usually only realized once you've used -more of the whole thing you're trying to work with. If we want to have a crate -where the whole thing is well integrated with itself, then the examples would -also end up having to explain about things we haven't really touched on much -yet. It becomes a lot harder to teach. - -So, going forward, we will continue to teach concepts and build examples that -don't directly depend on the `gba` crate. This allows the crate to freely grow -without all the past examples becoming a great inertia upon it. \ No newline at end of file diff --git a/book/src-bak/memory_game.md b/book/src-bak/memory_game.md deleted file mode 100644 index 3e65170..0000000 --- a/book/src-bak/memory_game.md +++ /dev/null @@ -1,316 +0,0 @@ -# Making A Memory Game - -For this example to show off our new skills we'll make a "memory" game. The idea -is that there's some face down cards and you pick one, it flips, you pick a -second, if they match they both go away, if they don't match they both turn back -face down. The player keeps going until all the cards are gone, then we'll deal -the cards again. - -There are many steps to do to get such a simple seeming game going. In fact I -stumbled a bit myself when trying to get things set up and going despite having -written and explained all the parts so far. Accordingly, we'll take each part -very slowly, and review things as we build up our game. - -We'll start back with a nearly blank file, calling it `memory_game.rs`: - -```rust -#![feature(start)] -#![no_std] - -#[panic_handler] -fn panic(_info: &core::panic::PanicInfo) -> ! { - loop {} -} - -#[start] -fn main(_argc: isize, _argv: *const *const u8) -> isize { - loop { - // TODO the whole thing - } -} -``` - -## Displaying A Background - -First let's try to get a background going. We'll display a simple checker -pattern just so that we know that we did something. - -Remember, backgrounds have the following essential components: - -* Background Palette -* Background Tiles -* Screenblock -* IO Registers - -### Background Palette - -To write to the background palette memory we'll want to name a `VolatilePtr` for -it. We'll probably also want to be able to cast between different types either -right away or later in this program, so we'll add a method for that. - -```rust -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[repr(transparent)] -pub struct VolatilePtr(pub *mut T); -impl VolatilePtr { - pub unsafe fn read(&self) -> T { - core::ptr::read_volatile(self.0) - } - pub unsafe fn write(&self, data: T) { - core::ptr::write_volatile(self.0, data); - } - pub fn offset(self, count: isize) -> Self { - VolatilePtr(self.0.wrapping_offset(count)) - } - pub fn cast(self) -> VolatilePtr { - VolatilePtr(self.0 as *mut Z) - } -} -``` - -Now we give ourselves an easy way to write a color into a palbank slot. - -```rust -pub const BACKGROUND_PALETTE: VolatilePtr = VolatilePtr(0x500_0000 as *mut u16); - -pub fn set_bg_palette_4bpp(palbank: usize, slot: usize, color: u16) { - assert!(palbank < 16); - assert!(slot > 0 && slot < 16); - unsafe { - BACKGROUND_PALETTE - .cast::<[u16; 16]>() - .offset(palbank as isize) - .cast::() - .offset(slot as isize) - .write(color); - } -} -``` - -And of course we need to bring back in our ability to build color values, as -well as a few named colors to start us off: - -```rust -pub const fn rgb16(red: u16, green: u16, blue: u16) -> u16 { - blue << 10 | green << 5 | red -} - -pub const WHITE: u16 = rgb16(31, 31, 31); -pub const LIGHT_GRAY: u16 = rgb16(25, 25, 25); -pub const DARK_GRAY: u16 = rgb16(15, 15, 15); -``` - -Which _finally_ allows us to set our palette colors in `main`: - -```rust -fn main(_argc: isize, _argv: *const *const u8) -> isize { - set_bg_palette_4bpp(0, 1, WHITE); - set_bg_palette_4bpp(0, 2, LIGHT_GRAY); - set_bg_palette_4bpp(0, 3, DARK_GRAY); -``` - -### Background Tiles - -So we'll want some light gray tiles and some dark gray tiles. We could use a -single tile and then swap it between palbanks to do the color selection, but for -now we'll just use two different tiles, since we've got tons of tile space to -spare. - -```rust -#[derive(Debug, Clone, Copy, Default)] -#[repr(transparent)] -pub struct Tile4bpp { - pub data: [u32; 8], -} - -pub const ALL_TWOS: Tile4bpp = Tile4bpp { - data: [ - 0x22222222, 0x22222222, 0x22222222, 0x22222222, 0x22222222, 0x22222222, 0x22222222, 0x22222222, - ], -}; - -pub const ALL_THREES: Tile4bpp = Tile4bpp { - data: [ - 0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333, 0x33333333, - ], -}; -``` - -And then we have to have a way to put the tiles into video memory: - -```rust -#[derive(Clone, Copy)] -#[repr(transparent)] -pub struct Charblock4bpp { - pub data: [Tile4bpp; 512], -} - -pub const VRAM: VolatilePtr = VolatilePtr(0x0600_0000 as *mut Charblock4bpp); - -pub fn set_bg_tile_4bpp(charblock: usize, index: usize, tile: Tile4bpp) { - assert!(charblock < 4); - assert!(index < 512); - unsafe { VRAM.offset(charblock as isize).cast::().offset(index as isize).write(tile) } -} -``` - -And finally, we can call that within `main`: - -```rust -fn main(_argc: isize, _argv: *const *const u8) -> isize { - // bg palette - set_bg_palette_4bpp(0, 1, WHITE); - set_bg_palette_4bpp(0, 2, LIGHT_GRAY); - set_bg_palette_4bpp(0, 3, DARK_GRAY); - // bg tiles - set_bg_tile_4bpp(0, 0, ALL_TWOS); - set_bg_tile_4bpp(0, 1, ALL_THREES); -``` - -### Setup A Screenblock - -Screenblocks are a little weird because they take the same space as the -charblocks (8 screenblocks per charblock). The GBA will let you mix and match -and it's up to you to keep it all straight. We're using tiles at the base of -charblock 0, so we'll place our screenblock at the base of charblock 1. - -First, we have to be able to make one single screenblock entry at a time: - -```rust -#[derive(Debug, Clone, Copy, Default)] -#[repr(transparent)] -pub struct RegularScreenblockEntry(u16); - -impl RegularScreenblockEntry { - pub const SCREENBLOCK_ENTRY_TILE_ID_MASK: u16 = 0b11_1111_1111; - pub const fn from_tile_id(id: u16) -> Self { - RegularScreenblockEntry(id & Self::SCREENBLOCK_ENTRY_TILE_ID_MASK) - } -} -``` - -And then with 32x32 of these things we'll have a whole screenblock. Now, we -probably won't actually make values of the screenblock type itself, but we at -least need it to have the type declared with the correct size so that we can -move our pointers around by the right amount. - -```rust -#[derive(Clone, Copy)] -#[repr(transparent)] -pub struct RegularScreenblock { - pub data: [RegularScreenblockEntry; 32 * 32], -} -``` - -Alright, so, as I said those things are kinda big, we don't really want to be -building them up on the stack if we can avoid it, so we'll write one straight -into memory at the correct location. - -```rust -pub fn checker_screenblock(slot: usize, a_entry: RegularScreenblockEntry, b_entry: RegularScreenblockEntry) { - let mut p = VRAM.cast::().offset(slot as isize).cast::(); - let mut checker = true; - for _row in 0..32 { - for _col in 0..32 { - unsafe { p.write(if checker { a_entry } else { b_entry }) }; - p = p.offset(1); - checker = !checker; - } - checker = !checker; - } -} -``` - -And then we add this into `main` - -```rust - // screenblock - let light_entry = RegularScreenblockEntry::from_tile_id(0); - let dark_entry = RegularScreenblockEntry::from_tile_id(1); - checker_screenblock(8, light_entry, dark_entry); -``` - -### Background IO Registers - -Our most important step is of course the IO register step. There's four -different background layers, but each of them has the same format for their -control register. For the moment, all that we care about is being able to set -the "screen base block" value. - -```rust -#[derive(Clone, Copy, Default, PartialEq, Eq)] -#[repr(transparent)] -pub struct BackgroundControlSetting(u16); - -impl BackgroundControlSetting { - pub const SCREEN_BASE_BLOCK_MASK: u16 = 0b1_1111; - pub const fn from_base_block(sbb: u16) -> Self { - BackgroundControlSetting((sbb & Self::SCREEN_BASE_BLOCK_MASK) << 8) - } -} - -pub const BG0CNT: VolatilePtr = VolatilePtr(0x400_0008 as *mut BackgroundControlSetting); -``` - -And... that's all it takes for us to be able to add a line into `main` - -```rust - // bg0 control - unsafe { BG0CNT.write(BackgroundControlSetting::from_base_block(8)) }; -``` - -### Set The Display Control Register - -We're finally ready to set the display control register and get things going. - -We've slightly glossed over it so far, but when the GBA is first booted most -everything within the address space will be all zeroed. However, the display -control register has the "Force VBlank" bit enabled by the BIOS, giving you a -moment to put the memory in place that you'll need for the first frame. - -So, now that have got all of our memory set, we'll overwrite the initial -display control register value with what we'll call "just enable bg0". - -```rust -#[derive(Clone, Copy, Default, PartialEq, Eq)] -#[repr(transparent)] -pub struct DisplayControlSetting(u16); - -impl DisplayControlSetting { - pub const JUST_ENABLE_BG0: DisplayControlSetting = DisplayControlSetting(1 << 8); -} - -pub const DISPCNT: VolatilePtr = VolatilePtr(0x0400_0000 as *mut DisplayControlSetting); -``` - -And so finally we have a complete `main` - -```rust -#[start] -fn main(_argc: isize, _argv: *const *const u8) -> isize { - // bg palette - set_bg_palette_4bpp(0, 1, WHITE); - set_bg_palette_4bpp(0, 2, LIGHT_GRAY); - set_bg_palette_4bpp(0, 3, DARK_GRAY); - // bg tiles - set_bg_tile_4bpp(0, 0, ALL_TWOS); - set_bg_tile_4bpp(0, 1, ALL_THREES); - // screenblock - let light_entry = RegularScreenblockEntry::from_tile_id(0); - let dark_entry = RegularScreenblockEntry::from_tile_id(1); - checker_screenblock(8, light_entry, dark_entry); - // bg0 control - unsafe { BG0CNT.write(BackgroundControlSetting::from_base_block(8)) }; - // Display Control - unsafe { DISPCNT.write(DisplayControlSetting::JUST_ENABLE_BG0) }; - loop { - // TODO the whole thing - } -} -``` - -And _It works, Marty! It works!_ - -![screenshot_checkers](screenshot_checkers.png) - -We've got more to go, but we're well on our way. diff --git a/book/src-bak/obj_memory_2d1d.jpg b/book/src-bak/obj_memory_2d1d.jpg deleted file mode 100644 index 4cec80c8944dd9784c45834b612c551065b5405b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 150277 zcmeFa2|Sd2-#0vxealW_&O(%}>_sNYmQ$iEQ6?cg+u4>$Ip+BP*8gw${(o%@Z%jhA9WydD zf-o^LLC%4{AR7e8al;^w3lNC8Ib<&c0%3=2WoCk~fZs8Jm!9Re%|F4nLm-y?kgeZ- z&f8`w@bi1{XW+ZTA3-2xaT}u$1IQL8CdSwI7c&bpDllD;pc*V&~xEU}xuK zXJg~!;pF7v248F(ynH;|yo~P|AHukuaToZ-&CbToc*pY~^PW z*tuVyRnXFjZI`c*>dpAa?9xZdnuV?25@poRUk>5m6cH5@-!8jbPJYi`bq!4|?E?o5 zjv5*no0uLudCJpd+M3U_+AaiD|0430x>&LSVY!sjAQf0cRi#-xi&_oNU(+s4gI!z!6h#*a8<4FO zQWz!ztBkbjMJkm}99DMjb1rM_U+$HDSZ*qut=f2OSX-5aCp3Bp2EN&x_N39A;*DxP z2y3$t_L?9jdT#G%C^1Cj2S+Nv4!)MOnw7Ae>D+P7`=<1ceCOzDyTs#q8cZTU)CzgDnbOFc&nSB)8Z}37}e}H>0^witco2 z(y9HGMY&zh*K@o5^Tq0JoNX1wYZg{O6;MJV+K!$zDSO{#xOR2F`I>Ow!3&+CFdH{2 zP3MxJXwaLaG^>a`?U=|kSUA!!knq$CWEKAYry54|$&;^b@lqdE;D* z?15GrG>@06qsR7v;KzbI8dzwQaH__4T0a=(~%L_{QP`EAc>7^_>~}CuNjMabxu_GcTRL+ zk2q9CV-`j%^Qa>oWFqyyCrCt7Sk zwvPaAQ=RSR&=&g6#@om1T{N}Qa^CHaPr7=(BHSj-Zb!jgd8g++_RB~!3W^FPrYsV2 zsOK6;%Xj2jB|WEl@jY!e;bvWXWa{f2?DXBOU&bA3yJ*KG&eK>j*jBK+B=VY$+Awqj zGGuF%LcXar2;6I3V&KOpPtj6ehk1)l%yba& z-hjv}y%BWO0l6kctHcTeqvT@1i^w`een4_-hDD{hMSV)2^Y`X!{TkClm3jM$Eh*3Z z(%qB&z_#|?#>=x-(}J40YD6XD%c zE4S@GNr@BR1y-kBpXlZ;?-UxcdHUww`vlx)8vEr^If+o zFKRz=xG(m&XWqCh|7c6(UoX}=?I|qyF(Eh5&F8UP=w@vnxEZPXaj93vK6+QMx2_T) zM(Oh*heh^F>sHD(5lMjt@z^;d1`qgK4YdnkcU|WsXDQpJA;%JMdIfBJZ6k z9kJz|WkZ#KhJ^paBQN8-w?BW}B*V%SeprTiys?P9NGrz?BI#0qmg5HGRv=vf4wxZ* zGXbsV6!b9m1ug6HZ+nJ3ou~k@rvoVEziA?){m$f zPp?ZnN>6KgR>@!WGH&H)Y%h1TE33YEY3K&T2LbFJTFj$J2Dyn8`O&KKA3J{$OQvt+x7u>{w#_^nnSABDV>@Vw*qQo*H{0`fuJa{aN$ zx(BWfTKo37i{bjGoCNdGPq_{c^J`(Pa8-1(hY@-*5m%}D;2gkaw}ZR^Ihs&Wti=*Q z^E{}xYOD89;DP1r1B@rth*kuf%z$yD&sz*z5LA|g380aTZ$QX-4gFLVKX6MDga?%` zFIba;2gbUerXMpqDHktWSCi(t>Kt`DlPNU?wGs^EP_zI!Iwz>CEdD?{5ktP!IPkfL zm8wJ>zj=KU!=(HyAjfXrERgHP)uM8Imr#m;vGz9BcSoH=^jwQ&CCuV|(&3lu_NUb6 zyv^Q#NI>&KE^a_RfApx1OOK>>fx6@Su^P=iNvqJ~q#gq5f}M$8gjR>Sac+{*aw!?W zN{yYFyDTy4Ug~-`R5H3nj@^NO|GCa9Q%Ga79W9|z7~Cs|Xt^lDGNbmJKm1xQNlzdVt`C(Fj%k`j=bne69M|o}6BeQn)Ire7Wx>mFSY5FWx zsPwF_W~`rPM602ClcJh16}eRfNH!uSywRJqm*d=aRD7u;E#Y<0tHRyqDbo_ks%^}> zwisKXPR^l3p7kp??#b&ea%lD_eBmWH-prlha_Q9xgrN_Y{KXrWR9U^&8tA+SXtz*8 zR3}kLC`}qFAC{ zj6-PEFt^c(2(+L-6fmXkEDFxJUJP5R(RB1{-PI+&ru076u&*IQLjC=x&C)6m;NX>p-Yl5AISFpy% zk4DFgcgS3gPng^?TJE<5CHnR(LWy}(pmPkKLPU@6B$kp>+Dap;MKse`pMuPZ#emF7 zW-v$mCv&{)-&(GL`=k!~_fyL&YgA>_9l&P&1eS=!Xci+7ZlEkAKv{&FJih-(byp3` zJ;T8SI?o2NvnBO5!iHGG|o*0kWFc~5DQ`@Pn(g960?pz_0%LG(#;}I6T+vK zMA>G;Ib`qo-gWNr1Z>Fzpvnd!S-{_~p`ri>cCx9~C6`?Y2>Tj-+PUFZO%@e5sS06Z!dU0pfqr*|h zb_TP=Oko8q=$zmKScXYGVRY$vDf#rKG<1t3=LX~y@pVekJC9_PbPC1zM666>kfcmd z=7jWK1?4jZa-65uF+}5)o7Y4`K5anoJ>;3% zm;48%*(bks{{~OLye#K?-af9PWJwcyyxmX1PpKqjyd>C^=-0Uc*-?eV4^3T(n})Lh z7RG>nPUi(Af61`tZg1n$7xc+DeMQzPyj-|DrS|P}VY@_b`m#~q8jzDs|eD<=lnA=v+UkO-rOmM;`?_+q-2y?GV$GB|z zb690wIPwa*q(uG_liPm2KYjFIpdxwumq+3+WxX;(%`}GrC#hZ7s$umjPL^Wtxr6w5@&| zx!+KzL#$oM@-Re`$|l>>WwYyZgZ+y{BHXpjw-N5=S-q#(fcsfhH-I7Us9qO$#zMWbdZJk zA-qv77w~e3G9~Igcy;0I-odMj)3K+=!WW`maDFMdc|yjM8Py=xy5r4G$fg1$o^1OvD)}3rB<6E>}qP#@(BOjo3Y@WPA=Sa^sE$~=#|9TIrNYWXzJ4VPZ zzqgRmKsc{0>#)~katb@YY7>9X^7z9np^Q1%RU7KrDoL|0pLOa6gy-mdI8G8Jif%T+ zT>x2AN);X=w>1LKiX4K_Qz8weABpa^`-3j5pX^sr+<=rVag7=%RSH~n${D$L;qcPa z2b0d^ebK&-P>LG814Dd`sZ{BNN}*c}X=NJ_F3j1oL2N6T4@^CrqrBLkP)pTbA#?(h za94)O%N@L0!eUh?)AJPI*WMk-@oxM9y|>;P?XN5~7anAraXZC7KXAaB?X$nplLJB$0f+LV zt*$uiPFu+|bEK$r)~Uv+dN21`(8s-T8Kp#+40`hML{(mM&xjxxbdUy%v#2Oi=i(44 zuIaR`l)(;)!P}lNhQ% zN(*c|BQxcj+kh}%bh@EmXlM6Pi-fmZd3N2)!>P6^8*r1z1! zkJALIkO6>=VNr*NnSz-x> zb|=4+R*+Yft@?!SHF`UfHdfhru_SKQ+2>g3J9P8!8xRjPH@yi%w2r$86UWN!ZpfnU zjcZ2zMtrt%$6Gz>;gKFQY;D?WJ88_~$K{OkZG=nN3A{|4?P%k17T<1_gzUZC}$Fe$}d6Z?4rPs<^YWpK^Tu9F)z{g1EnM{I7u$-B5Oo}A%l=7s!zt9 z9d1v(bohPL)dig>aoGrM)A%^!lh~<@PyM#K*Ka4C6KLT;XZdKRm)ejM+s%8BX7H+z;MU3$Cah z)g#3W;D|T^wt&R7h*AemlCHO+ej^=Er-c>3$Y-O)Gfp=S)a*B_-jn5Brc*X?gJUb- zz-*XZoF`m9SbhfiqNcJAyWlaGn|1G`&2{ZnYSjirZOwwt#Y{O&)uQ3RkUU0HPnD>g zpP>Q-POFFmmpdf#D(cD|KK8X;sqT9m=^tw?(R=jET{m&@t=pGilUZm!e<%bsJn=Lb z8=5(HP4S>g+=_qk0Yz_tv5VdkM;lo2zumZad-3;7ke26@wi8k(HXsXcoK7BDx6Y!z zoA`RHh5#gX1#du-I#;l`xJeW{eFR3dL~lbk>!Bx~V5*?hU34`-gSey{?3<}y*iLe2 zLG2pIZ8WLOkM~)0?NyS6SJd2}fBH^mkNX3HmMW$X49*6IS|}C(n7rQn1Pg-+RF+1j ziAK=hB7%$U@Kqr+tjGB3luEg35uU{>v7F6%k9b`4`Np+&4ABG{4OUV>+cF_VEN(z3 z4qh16Pq@hX1t1;%wh9_i507G==Pi6f?D79d>I`FYZsy#_)KvNyH;KVLJswA4LGCB9 zCL*Tb+k!^hP?|%_y4F)@ey=Sub%Po@VTQ?;JC2pLN=ct6%ino;{5Mzb(LrS zpWX$Oe0Wtg^l74hJ)3fG{^+DtpO~eg5ah>YrFG2y;2Y*^#mG*I2fBF&%#9C_N3=bJ zbwI^Xl2B0~%CzsKNmT=Vq8|*1_71otSW0y{+`mg*eI6*RwC1c0;mgzXJKny#WIN^% z{UwZOW^n^oCfBke8YD+3Z5=%%*n-mC8Wd{Q*sr2-J=ofQcUXe{RrLvJqgOesLLrxV zy>0;v|07IG&|^LGtTPP7{jvV+^nCTT-fvWU&o{@X{QVh0E?;wipRJsDain6qKTb$KRfS`>J@lp--aC&$Ca z5oxml@vKc7>s4Qqb&XMN<8FZH#5;bD^!Pn)t=-Kk?ZH)7S^)cX90$pGkr`Z)GA$kL z>_&`>`2-8C*GnWBSo9X7hVmyoZLc*ZYmHld={-^-*v+kaHN6Aaa+ia1)chJ|sT92N z2>{;CB3$=&Sx*(I-G_9#`Y6lL`6{`qSZ`Hq_j>>_m5t z$lg5cGUZ*GSJG*z)5LVLEMNiFP%ozwpkcZ!gfS?geWsEeXwT`up;S7Zs%idVwxtE?KJyK88-v@^`fS)s}t zERI#m3iOZbs;Z0B+irVcw08M@hQeFe`H5jD0Y(+UJ zJ?a6|Tm#bHKKszN{qutdOCCFm_wP)-V3u4R;$MB0yHGOfD6->4@W_D*%6W9NFzy0+ zbbYE#?;ub|9HI+M2#wB>JS@jC+vck!yfgn;v+)51aLspF_3}oKi3(MfTjVwmeu$vI zz!8tbLpC5!!ARE)Smwo@{zIo$D>k{gA^{PD#`Zuy*!sG4)Vz0j=c9+Tyun z)69q$%1&p?k1f@mIPjWl8)V1pJPIpv&mdKacVeC5l(sbT4qp>dVcnXEH+&Iv5s=2(i84$x{0>%%6c>)--GE#G;#%sH zuw^-8V19AB#GLF7AcSmszr??GjXXpyyPx8@)n@OE>DHkcM+~SK z+Q6OZ1#dZhqVeK^Z2Bv#ZuxafzIHOO0r3MX4=$C8ArAL1D!D569gC#spI}0nEWqDL zjqlMdvplYwFzX1v@nJ>b8eP_VH{MJ^?6+jw3=`*59&m}A-_DhF`VBt0xpqRjInVl; zgG+~|>2F}7Apf7|2cd^qgdWV^yKsY3{?P+YFp&tk`VTd|Z#6zIO?eyHe5U@Bfm1hL znL`dn`dpTO=X;np^JZ@(r{H} z>0*>WU`*%Kr(8qAXbHM7igD01p>%O#dIJ)=J}y~Jx{#9v zh^*Q5QBOMMbZ!p7Y$WF7sc0TPpL-2Z^ik@shm%E@VkuVP1Y1{BwQv2&i^}pM^!H=7}3|$;O z3iB~BzwbSv%-uTs1x027E2SajsCQ_v?BIeK!7ms9@uW~2KrWcH_vN}NaRYM4zgvWI zvhiuHUB}#%gPqIXw_k2ICEWTjbIyn$nDqEH*6~7o%*@N4g7`b0K?{y%6kd-F$OsNh zjaos6$w4k79XX(@PWW!W-&>iq{6 zjeg4ZND^2GNdY+rqlh4(6{PjH9_U9sF=9rxc40onLz zHg1?g1{jP1{y8AB)Ok0O@eNHkka?|V`PB|gNU4DR5 zpN$8eg=OlW9Yl&|kDqy(Fg5jhk1Ud@E^^Cp2*hvr+FMEM!HHRcb$g3Ibj!r7x8?8g zspro(1YYSmMr~}M72zhcE$Jg>L>nF0a2#ELqj4{Nzgcc*JJcD}qUsC>SSqe(ylH8b zy!2u$V~>$(w*lyQz&KF$(lx{GMci4weouuD7WghYXAO~$%r_+Smac*{>O)FAG04Tw z=MI{2bwy-w-N_Bn408&5mHh&tX{mMaiR=BngxI*nbx@rT0D=@7Y5+;D6&`^W@K+H5 zdRnfS<$GQc*`ua-W$}c#bn0D$eKzsJjd_=&2@W1p4}xcc7UtedDBu>p)~%ef?X1U8 zMf=DGZ?SqRtqO6mQsPqkybpU^P_KNJXkqqn^oqvUmWx9?s|go4`L>2Sc2VFc(BE^Q z&y_~_0!33FV*SxuKh_9*5h?YTNt+@c=de>E*rr{8gdBArHJ00wL>o|!%MX~DzqTa2 zid*yrmz+mS+khZeBG-bT1ZZ0v*bcDt&g&{PbKQWj(~F#{@mevG)UG1;#PB56H>0sZ zY^?e7XyRGKh>9UC(}8eysFRPwU|M^H7bByX0y`Huso5t8|AKb5<>pLM+aeJf+HR{* zZ;|hhDEnOHw*e8t?)I2qqjCjLDcO&2E4n}9r$oH?LB>9E4@If(tOu@4MmRl1{&zA3Hh`Y^GpL$^Uk{z6>} z_he0qg!kE3!`2trwfcdDHXQM6+^q?2oTr6AkVVIklsZ`J@Ygm51Oh)tTb)yb-?%vnT&#jYHdQpNZcUmU5xP_0p5yeMDQ)u6+2=7Bm}O zF=&)7fB=rEC-+itFu3CC`?TuMj+s3>j?1jzf>X&in2ZVxw3^ta% z@2Y3YQyNEYs}k`m%dm6LYgKNMQuUysk=9m=v*Ge zi4I(4X>{WuZ@xCL6vgI_5`Xhqf~yF6Yg`F)!i!Q=?5OknL(E0-1mVcuwzh5oR<~~q zIshhx^=J;N5|~hBwa{g1NhMJP2PoG^Fy_+eX_*jiE;uv(j=|t3cdKGaTa3NVIivSj zrD8fJQGekdWjG8g? zEWJ^~??hgby=`@@7K)jnUTseb3ePfA}XIicYr# zK`yGabm&BV{lIiX?3DHs$*eG^%Ch}xM{kZSZ)pq@KR--dp$fOQZqHjXn^s|e7vwy( zPL)0=7v?YN*!&n!n${fHzPh@0I{c#rdtX#eZS2ceZA+XAb@iO9_s5(72Rgeo0THyZ7#77_OKA3IRBVmM9;uIgWG1ZB^1i3morRzPQqmXtW~ z40IWTcL-=2m`_*^vIIbWhBnU@MMj3+0|i zS7}8zO2fiYD(Duq%phC+fyM365UiTFb*YMq#(ke##2D=tk|00cwEX19bR(lHu$3rT zLlugcs(y;+>zJ*?0$!61+EW&xdi%U`f&wf=bW)W%?q7&iRTgxKjZ6@}C#_>BZ_tD$ zp0HpV1q+=h1uvdQR39(1m2fOqMXU^(6w83soCz7rv1FuJmKJ8 z6Ppv!OG1zNn84@i>#~83WtN>l@e|EZQeCkMSP)fyt(`d86TN-TQSxMa?5vq@u6R~- z)U6Te%0dVd%W3`c(%&}mU)$2JefU@4@hj5$e>1cM(;<#C zU^=t``JNBKErL*taM-A^v0?E1X7a%BI}$KiypXM*zwpfd`u5Lj4{ol&*}>eElWkH5 z4(#D;k8x^*fuSve5!$|#@0;xW8QKQ)alh?2O1hQA$ZZN4(LCi@9W(A31c{@d!SL8r z9XG-^+1VcTP)(miemsp}1{_WdRCb`lQL_EI2bA7l;q&v^-S1tj*GUeh+a*RhV)xh+R2YvS* z?PDK^G85zMRK^5>vAk^@e!T_7M~g(emoksd!3DDiB?yCcp2`>L5xYG}hnI)LNIfy--WDD@^UsnScEN!IFnv z{515|`yDNxmIs?`m|&;xpm(FUH5yD7^@w{)?R$gNDDXVUQ&LK7Xt((q+)dRie}Rx} zi*pvpR@(dMiib0knZQRTry7bC$WfO@9%2RgJP0Ui`k1$XmC? zESMPAZln2Dq%ZG8Z;#%CcmpvxC7Ia#LXHberlp-~r2@4L94C2pD4Xw%%yBfW>RvuV zS|86qbQwh5(RPz-x=b?@T~c9F#)z(dyq2=01eQj&Ao+>lF$NOz8KP$0l=;byG?nnr zV9kVYWFEuQXu%UR{#dv&&Re*(>fW)tl`R?!&e$b<)H{djogiM)YTAG>p}KL~G**}w z#&-?qdSn7#mi@Xgk3y;zNmoO&(c63>?8qw zQ;LoDfQl{&5^cLiFPQd*wqA=_k9hLgB0@?oykpHWa?rRsGSEvt$;l&0cqAv9MJ7dm zjjx?A4z>lg+a1k}bPLW$LV?lTIdYotr**BA3EqG%2M3d64)L7i(@Hkl)7C7hVSKp#HRVUv}V{EDWC?rynCD4?gR!sl$k76>V`UK+lJ^NE zoMn3jdkFR*&}HzB6*i<6iM6bV$_ET&&mFW$_q}>O`1+)hBOd=!C?>LUA7iRuqWs?HxgND$jwekcpM=cd3lE zKE?CuFdOvllp6ObS*V*?s+Jozs*ZfMhVy-`hoKXxaXn@nuh%&bUor-Z4aGnJu?BPj zQBw$ZJkqG1o6@?|2t|;1SK2cqi5)owAD?=`IFJtBx!u7$Ps$x~UX?m=8Z#xG&`~c7 zJJMDUu>(E&Rn*&hpHd1B)vXc^x`QgO(k%<|9Hm$RAh% zCE$KgB#j_(Zc>EJEBJVKlOn{QC=%GDsIsi^@%gimcl*#D7(iQP65YBoQZvPhn~d>l z!>7ck!+1QkzVeh56T8~?d8{7<-G-^RWLwNncWf&A2H84`9<7-!E-9k$QC+IxTdAH^ zT2ico(}HlM9LezE6N$<%;d5?j&F91SKd4MNn0do&+oK_{5ngpAW-1@&T6)Lamsm4> z1+xV>d1}q4lr9Q{zwR9|e9(}c>~S$!JIgYe;e^F6{Ie1vH6u7PZK7%=IwLu@K%AIda(ZD z&{MprZ2h|~*<0YoVEuDr=w$3lSR5-#k`8*$M*Yd+lkI5{(UmGZp7!CPUPaXXBy%jr z4=oH@NT;v#a2seL`I2dNO;)D<(0yN0$g8wRil5QLUekxt0J-UaqO(DQ4!116?GI0v z`#vzuWOyIi^_>qTv!L(9`q4^ehBhFCdwTL$CwNerb`D@`U9JdGhXu|~>FAibQg+lO zoX#n?n~FiLneMbc!2#ZdJKlB4?iwkvSkK|2b9CFOJo%0Xh``eLuV{)Dy7?k~2UUrd zswW+UY0<>-mw@#5aC=_MU~G(04Eh-xqYPJo!tl}fCf8HEONyTPb}5Isc;fI4bWQ>R za|>EN(N-!N1aF0jW|OqSsCx=MzaH_zu{64Qm_5zj*81o!LF%Rt&gj1zS<}puLD!(*&xTRZ|JdSo#l8MgGlD!Wx z`D#YzblNPmGH_3m=@XSAQEp(#GbZ4up3D3LsKp8>$N}08)+SMmi?8VMU201w|fQB~)S3 z!`$TcWO70mdqbM}L7~WZ&kJUCy0;hlfIgv#2KMPDOZ3D6eTowG7|AUR6!$d?0RY}o zpIe-*+(5MLxRT~D?tnK_+_mhX{?SUoZ&%*^6?M;oT9A3XI&@C32Ll$2ztOP)sT7HG zdxS)UFMK0J<{Ke)?-R{x4LyA1`UJ$BS#El`1{@W?;CjLYHkk{;w=I}}9y$k(qVG<= z#?}G&R3XB#(&Zs$D`3I7tSZ)2z2TNk@>KBto#FfJM3QY!=_g;X-Wt142a3nhIj4U} zP6U)36qKAw7*(Or6Mtj@$ASbU_w3J-Q{8(tcFm|Toe>bh{zr^!jRouesc1$4cpAn) zye+Y0jeLYk5b6iOI63wuhYFS(Xz3U{Ak(|dZ|`vDQ(L%L^Evy|XDwMzr=Akv1NMLz zbipQ+8u&2?`WyR088=B-iDCyr>Fvcw2^6q?tmy3wil6XZ{LvyTIY|TCTvL9Npz$0cc*yalwIxgZkGg5J3b zM&*EMBUdp1N2lTtiL@LK#E0oF?{OPis`+SO?n)whG8gk>KYF&gAMyRzk19gUKxmfZ zzNb~D443|Ujo{oJ?)@S9-}f>|+SkrW+a(3U1b)XnW_UXy4#ZI`q7{0;s5iZ-ej;rB zOFH=r3~!T6N9k6N%^*hZjn!Q9fqdF7pLs(UP3PQFM+?(ay&fce`Vefc!;|_90CQrN zM*y6}M`vG|+<@G`uq;wflj0cXk-7N5Hf@VsCg5P{(pQQxH#2pquT5jAb_fz{(1~!} zrS=^D0QE*)e~2yqqx-5&8QFBLU_o4d?3y2X)zD9)OMXRS^aElQ1_CoCAE6<7GU#TQ z3fg(>!opN_9x5U)_j46egzw|d|*~z5fBOR zkc#ANm;v)nSJ$Rp^j3px7X=wKAyP)79KQ&-&7p1OO7hUOn@%kq=*N> zT421JXPu$9!L#2w`n}YXUbK|!ZnTt37pafZKZL8Ns37*eGHe=oFj=4%03tR8X!lW2 zq#?LI1G8K&DIOwi2Gh=C+%||(M3g=83kp$LdU#~Su&v+@^x{rSQ}5>xyb%j3R_x|y z>c?&Q#h#|eEj*O)4<>m;Vbn>XmeuX#WERk#YzIdI={@3MMUEMMO~fk5p<3-iXZ|Ua~ocn5~l~H z4;N7ksm3JH);8kO=m>Q;I%3J`vSBAVU=%AIJ7ukRnRWEpspwkHOx$_y!(V&Zg|>I> zLnee?U-KFoZwOqe%TDuFudaTm4njF#n3dZYq7`4*K^RqHt_anw$3}$33QmiJPJZfb z3?LMtInSO}xwWD+2Dj|5(n;lavW_PtAGRJhm9l$NSfM#Ft`as34lD3`v^HcBileF7 zf82U3H|(~`vC*`3#8rD5)(F`kTaGKKURV`4z14TWPg7#-05EnPR01+B;Ut}lA3@rK zRYbRoB8a&N&KLRUmi7EXTv!0! zyV!hmR6S9xR+Z`({@W_%DJH*jnXdB%L=NUKJHO04N zOktxQCXN@A)+j2D3FBU++vq)x!nBNN8%Lp7iMz{k1GzDdBfExoU#+Q7;LToxUqd(h z!Q3AjkdM)_a4sr$?3MEf2reMB^`O)+o~N&YKU1E3*KFI7 z0zvND3-+{I`);UB3}W?Z^|;CVSS9c?JU~5j15daM*Nr}^*q6xW3 z%S@yX;4qEAOHa)v?Wn0&38@S=YsK3{Nj`j#TwUHQmBVdo%l4`ct~oPsd$)$%j5xs7 zf)i2F<)et|Qw~twY4xRgETf}yZiXbjmWO`ib7muz5iiE|Mz>z^}`FP&)LO2J!3LMURqwrn%$~byn5D-ft2u?O&Uk+27PE8`<&MHwv1i1 zeg4z-Od2}(Pidv?7Ba|erZ3!en=4PH`G;8 z9`2QT={46?-|B&P-C;K8W_FKWO!J)yiE}1_M}Mm*)mmf|7Ss>!oA-xsL3nUgR zVIn>~Ul!H8}Cq?9&QQ!=KkcX(@jBTVis?haQ1I7Nh zJA>o|q$Y9cew-zk?JDVgY_!k2efCaGZCR|7xWJ>+$1Yx$vA(eXxo8S44DD6Q5riNo z0gtcx=24B(Bzm|3AF=kl)+N;HiML~3VNspJ>AIfCe*Ejg5`!?A4g#ulC&^g5OU^zLllBz&TU%}N${{yw!m1+V~1Y=Lc6zN#_Tx{}SzWReoAzvh?Zy+E`3TL`$LC3GEl zhHmKXX}|1$Ru=y*Nbt1R?KeFIkd^KDlvC%$6rM6gUuIFXys;LC4W^zHJ;5VE3Vh&o8(vV|NX&FuB%7p(@gZNmh)nuK_WNBdkQ=yeFhQ58i_D zD=w{2`3ObsHi!tlU1s4y0esuk5;@A(`1h+{cX4HZuWjQj5f;&euKsZI^UA2)6l-zb z2-T!tF_#DrzqsZ_oNlvvm0%oPTJa@b_Kv^RZu63P;pdlfq7gM9c8*`yaaz~LEw*gh)Aqh74NP-@6JYRGw^KkM)3K|;?)?w<=opTk`jRyEknF~>0b%Vy z+Cs~4Z5DCWP<~xeQj{@vNPUId!pqB**xGijsxvvk>zGS|j*fVmkfMpv-M!DP4(CMs zh_1ky(9MS%H{npfG2l=)|A0fuCr%DUOU+3!;7~J5o5(S_Z$K(*qbrdoGTbPf^miFt z#JHtWQY3xt2+%3BJt9PRCt~|)stoDuOiQILeYcyc0q=v}qYnfoB)#)c&%V?n)w)}(#ZuZ_d zCUp>u-kbNikN*Zhn&3O_zYX8hW!tR({2Kv-fBT7tng1?E@ozu!-_O=xME*;(=`TtC zZvpZCZMObW*I(-Tx0UCYcl}qu`oGQAUpD#6CjYkb{GYPPA6bB+7Y~?n#&mlH6Nj3w zSQ=qibr_=+_n&<`GemrwU+BB7}wT;w`pUKq>?hc{@d zvA1aZ5>SBMzCbqfb0va;HqNHaa=K@Fb+JE&(WRP$y0p+q|v z{zD5^kQR>ffNnYK0TYrP1ji}eqms#eM?Xuor6(o zAN)^RzxPfmwS7Hrzmc7`Wayhd1okgz9z7DP}y)Wl)tG;^q=q@;n2ggXv7`#S~+8WAt`{b15G^6w<#z+t=4}gsLJn+ ziKfc(`b=(yil|V9I_aDoAiKcnnVZ}?{Eb_qKe<&PlXkXc&p$NT-)sTd1*#@$Q#C{X zqMAS0<+RBz`M0}Xy#Kpy!vCQ7PpU~}sAi$*?vzXasfym>iQ948_y}VIh*+|A}TYc>VGxuM@rl8W=*}BpDPk{J%2C zf0Qz4jGLv3pT>BIZTgm$eGS+W7}1wqZ^nq(;XD7im~SxCIi11Bvk;*%u*nC|O5Qj( zh7$#S5DxS~Zr|+UpZTCap*U22a_omG|4FkMO*6|MO|w@2g!)Y7pE~lxqBeCTl+^ME z4g##9`~idz>hjgQf45caCl1B~Y}YY-2oumn{{bKAWd8#`0=Db^z>oZNBA|wCIuVFF z*uVkZ2+O3!_8)ek|5MN)z7_N$BHz4->m~vxT0|oI2LuCD%~VjvhpBAS7JwWZ1N;G? zr~SXP+d%0XyR$(*@>eza0sh!Dj7{*zH^cbv6V0%UO|QM_3;qkQ{f}z$n<$+Vbk`ta zM=l0EnZ60w@pwf23E26w*H-=RJ$e3v_XI5~@0<5rq{2w`e~9awNi$$8GvB1u3`)!N zJ8J33H_!X0d;e^efAYNlgk#{M|253$+o~COsecBTDQEgK(CDX8j~9a*^OZ|o-(VM6(|83x`(Enpk4rlt#?+`GmLI2@K|J$48H#aKxr|fIUrR0CkSpISE`(@kT)3pCPTpE~rn8SXgYS%Y&4`>YWTdD?zq+qM^M_&9R zJKY-cJy2oN_#D?%inndWRUR0CJyoB8&TT-3>9*joG!Cts0XVLeHLaypbY-y;m3q~? z;pIfdo(7*!W2=Ewp}uda5+i`e{WY_^WQrzwfidAD4RdO;VwM9d5S22^^OO z9@%qOiNPZl$B2BLP=1ZB;3Er$MBba|sY6F3#CpvNiWisO*Cw5}buqe|eNjklZ-UWj z~bBUAq6br!g}1Y2&ZQ&F%=u1{!*B8Z{fJhP1if zcma{j5pJiyflGYCjo>ClWA%0|B!3MXjGBb5dz`uOp{n z?y#7z@WoN0;3i$yDaE2q2U#{Cw?DIA;W{X9zV6#GA_R7B92qn+}FRb<{EKLihXgr$@FCt zx3LNfJdcoIe;dS8F~+K>{~W7w03555;qM#&y#ZYqJvlYnsLT> z8b<%wYjZqJIb%G{!)(T26Zs#5O*Z@S5r_4JfX?Zn2}%09amB`75UA#ZwWD$ohF!u? z!a?`X9x51m=CINnyA^*=COV<8LhLgY$N#;b8iD zoiT4mxZ?Y~oj*_$enO8$fA+FBc}7M28Ts%J^L9*k{g_*$TR(92bX`}$uoN?>8zDiw zqjXLKl7pQ$+q9oO9H~J(`@mNYtQKhWHhx?5^zUlM;-Vbo#)hVxFZscu>CQlc9TgYR33IgUPM#QZql) z?ffw>^^dQ4zUO?t%X7}>-D$?DbJA{yT8|lL1$T!&EMImhcZYr!b(kFX=Iw3O_E(Hf zk4p3<6s|DX1kHiap>E@?I*>wOB!?v7KYwRn(2DSxkWK| zR;aAW`T(@suJa8W_sw(n`%v^vefPWPPu|r3#arCy`~5ZYKFp^@0^EX(?whBNiv*HC z6esZBD|Qtp@cErTt4`>%f#EB6mEFKBcU8YYV1Fnyf?WQ~;;O%pY>=-$6cJGDK782y zwjqF7hFShoZImdi%djqYmX?0aQ3}@OJ`@oQiMbGEsC1W*)pE1-)YX{n*aC+U=SY}h>P+z;(k`@K;iLKb!2Gs1umhSAG%VOVU;Yp3U zHbktNlUED*b)i@F4GTQ#QaOn0N6}q)*3?zV&#MnVB8y`yp@r!ml^Du)yUFPYSCnH1 zX1n8ta_%=Q;@m956VIS9;djf4Y^~Zs;0YO&)MQesm$a?jSqGj&Nv$4a`@(NsK0i!kwEw306M-gVON`9CSL>zBQJ>fJ* z%OImg&UZ!Ng@FTvH)}?Eloef-B}?b}o~@HJTye21`vxJbCK*XcbgoYnr~b}V$RfY; zR$n{Yk?JRy4pG#HN)qw=Qzi? zh0p7Z3JYiKyZJ;~^cf|)or0yK0>?(ftP-1lq%5I83|6p0RKgsZF`_mIo0xE8hiIj7<6uidmocjnrMQ7uT z@-ipd;#K!wV%J7h-ec76Wnukaw%)s$Aeo+-#{7d_VmCD0Q)S=nj8f^s!b3|I3>JMp zdyZNAFuYXz9*P9#E@f=IGvddLVnhY{7G0m6O0s7*>`dBx;aqZMbd`LZX!K!|=KNdM zImYMmr=`?K-6BYp>a&f{S!FJdSJqETO45(Enk;s^KAq@F63Sj_w*z}+P1u+UOI7wL z)cQts?H$V0l|H0jd~qh*o@ZnvC!Cu#d}#(3e2C$F_^2S+yttUXkn8rL!TcMy4>jkb z2TYe0g>G;v+M99l+4^x;Fj5hpH)6GR-2rw1c{&ROI)OZg8hY(5{dPxMPhG&&p1Ody zpKxvXMd@G!VCnEXWCY?+JEE0@%v;UHz7A6 zH+z@W2)X%>I}(tf|DFU1QZOP2Ix4G0gt!fZP&wjaENB0vxui?9Moqd+vUVLl^8roL zmo@s@WzB5&r zsULkv%jmy`Y?x`7>0XDyKzR45nm|rNPD4&B_X5=lgbakNx5;Us-am|b3!`-shd)5- z+Bz9^SRSLUSj>7SeXP>T!a>-PE*>$aXcFwo{??_ zXOp7z$y=RTS6O)0#s^Lv9HxVsW^{j|IgR?7EooB&5ftm#fnmv_Z7!tO7OXk!+2xip zc;klz4Jxx5M~S*MqO(XOvW{kT$n)6rNcBLcts)_ME84@QH+a0EHm~D$`xcg z-9xv?P~zoM`L{X8stWY4pHAB8tmrC{q~IoVKTVmqnw01&xpy)(lH<{&x$s!BV7&?o5e)e;B4;ZP>;#w2nlI8TEQjbNJ#?jwE|rnVG)D&G$7=W1pskBSPO}or4)LSgD8Et{U*{)ZVg6q7 zl&RD0xEQp|<=D8wo6^&TWezj1UZ5WFma8+mnz=RP(kn6TG5fcijSqji#bZ`cIV<6! zX_SurjR}mUO(XOlrqP_y<|Cn(>eahi)e&8W-J|REgU_Ix6$F>~(h(P&9)H2+y7v%= z93S`bTTCxoMWzm(vt&q4&}|LrO}O*CA`z&!ov|fu?|hTNSOcOLYB7VO9}FS7&>{|B+|Tr5EI)MKoiM2U&Fd8X zDcUZQw@#Hjn*T!2i#ytY*dSp!;Z3-6>cYH|mBg-+mG042qkQzv(VVtyj_xU0IePl! z!icj;h((}}ites+p?R*O_iVyf#Qt8QyRNduxS@REe!r%&rm1tobW&1vbWVg}cpu%P z$eCtsTVy)jsYzyCIYlF@?Dr0g;ob0=_fyh~8fc^Pr(B<`v-;XW2|D?pvZwQ%M~hdh z%6+}P%1xpAzj)(!ql+Kkby=$RVmX%u^3293aqB<9FFK7h+&hhYxkeiJ9wXnqMt4=* zmW}yMRZdG+hYJze^*L|jgWrjDr{Bp3-o}*Y^&gpmd|qMmEUmN4uB*bvgKNRjSz*I9 z4>lO)7Rj?80iSH}$>uwF`pYNVZa_7W3(G-mc2HQ3O%E*RGs_q&uE+HZ)3gO$?%2Xi zV7&aL48TkRg+K(Yz~(1GU-8t5A>k5Q zZjV@~5iBzHV9&)E7ao!j)XcjWI~LRFo$tx@&i@Msd(C70HL?Z<9r@6QE5r^==N+Ot z?+&q0%#b31OVDsoQ9mHC?S!sWWhT9Gmpc>_l`^hgse~Zf}q}@-k{$9ty314%0}YB&A1HE)@@Sx zNEg2kMPj1-zU3$rlS>Yi;gJKUyQAP!6gr4G{Jwy`v{9Eqt5SJMdZgqXg}%#vP1HqM zsv+?=Gif{9-o>}R!&WVc;}E7~)I9aBJWxa4=JjqeXNY^}RlRxyT8R=*OOuaU(?h@G zGE2?s#l<^0ay;H3j|68gmXbsmL@1@GwjVD(*B+FJbYSc_1ubZ$0b-pco{Zp#hqAr* zx1Vu>d23e_M}>tY=4 z#D(hakQH?s_G{s7=NT%hRK?Tn8QJtGw8$dMm8F1RUcyo36k~-MNYDj;-CwRS^WrQe zO7*<!^oFNvDTMRi}qY)!ec9inDO!Cf^ z8c6dGUy|ND=o#tEJHmR$F*^e;;w<0E8$YeA{K@Rj)ir~%u2!#mvpV;J5!&YT>7DOK z01uJOrJc}|kI62_xy0fBqvLjZmltq>EK083QKL&mi7=0U^BR9|(gYq6#-4ls^%wOV z*OCoUIX3v$<~Iz#{gVQW*$+d)*R=e{Hc^<(-tqtt;K@SJLeL7P?x3{2rFxK?keiU3 zU$DUh2?7a%6UrAJ#86gdHgkb`xDwT$h|(TAT^$end?qKVhxXWssN7R8rhDSl&Q*)n ztUWE(w)M$S&L7OK3i~di=svbXxiYl_)5+UJ{dZ36wUqGBxft8Y+jJv4bF1_16s(=m z>ilshC(qKcyM9l@ukDyr1eSh@lwPqDKb9rQDYX>NEG1|cCEGG9aF?Cp4MS^3-6#p! z|NQL%*K^dB<)e(B8N8EVjG~Gunp@Tq1w+a|@3)aL=q(qHcgvOCdCMijyXCqZY}15N zK2y^pt;b&vSrS(lU_3I^$U#1C>7wazs}~FRUpuIPBg2=5nWkt$ctcKukO?!pUN(F` zc0Zurpx&U~up@(QD7UK%)En;|`mNjj{FqCYU0<7RQ0Aj$27T%%a#iU5+CRA%@Z1dk z?kBh$oU1F?H2P{8xEy3$s*=hFD+lna0G9)>a`-Yw7qD{ZX@h+19=aBca$pGo%Ofz# z`TzK3fx3daf>DmqzZuGb%R!&}D}c)Z0_|ncUiRN=FN2m*$Z5!F$Z2pc0hfcXG1~%b zhrV4mV4d#U=MBy!ed=8DBf1>?+4117{(aG}j+A%Qf*}&*oi*!%BNF`9u8f^+u0L!B zrNWSe4_9b;{rdRfXUj~3YnOSZ`V)jC0e5n`9; z%(5O*D0B40fsu49qN5$ym`y*0l}iu3Pp~jM-A2U?s_bf(cmB{U&oztO5U<5Gi~O7R z{Ow(h7Ds%vb4UtPOS2}O30J)nS3mFV$%ycfN=;cpNNSSBE{=s+m&e*HB@{%tra~cx}!gE>fkV) zV*|hRbBz{8l-*1VQ5!^QmQVJm6TgeKYhgc1vmL@(MhR%qb1k}ZIn?9OhGOxpJ{b=m z-`%;yrDW;kX1lui@ys1J$+e>%CgZ1XJ@bNtiGCN|5XwL&Zy$R} zx$<=6arr@-B!0vi6>H|2e96rzL%VzA(JPc$x$C33S12K<(_!b`ix_>fH}~$v(mKTp?Wf4jxd~UXeP;NytgaNmwWO?H=blc0hRkTf!s6R83(I-&L1? zsa7357LvKg!d1iFnXWy@!ps+3-y8qjxzEpy{ADp0xE4^mRVe!gzOy1+J+P68R)ou& zg)%!Z`Oms)$#qWJ?NIC4;#bTwoufD`Uv??Cmi#PQ**)yd+uN$`&xPux5IyZiUj}Xn zfl5q^3NlC|?P+tWFE~Q&*mA;XgM;`WKkA!jX^zLFV%;EP1 z^relu3|f`SOVT4H@2KB9Ft3Ta_c|~k@i#MRJKNsHx4y$xEs5h0rexGS^{zZnL*C|v z`CaMHbozh$H_+>WUfth77BsuEi=JF8x!j#RsmV z61QpPs%^}qe(bRkpt%{C%HLemCd}r)wW>qVLeK&T{0kPb&*}wxe+`4&gxrMOgxnO= zzzcRkU$hGXdOdzDOOjJ+DV$kK&@M{0Wme!WJH;D@){eSS60-mK+XJrWs4L4y89y_4 zC&3s+6;m{~tR)K8H2q_86$R*i>%YD`0Y`?J2IwFiq$tkz0CGP0X<( z=z2&3l^3YIQu*JkT?T}!>9^Id%6Dr%%p}Yt-@(Io=P}@*O-7uD!Bo{XjVh z^m?GzD>E`5_BwePu%Unr?e!oNsFPSA?+XZT2yY1QzvJ9XI9n7Z$d!W|?z5c6PB|~@?K0w7r(2_7Z5cIj z_M{```YScnRyOS`men(g-tXGj(1D>R;&`TjU)hiatW;p70xR_!Y%LYQ0RnOwavE|P za{Bw{9}5ZQ9VCE;3g~4cz{sH9px&U~px)oVdIP=wGpaq%>w#YXmCfTo0R3+fK!y46 zSLpk$IKD@OaD{N?J9zrOZ4|;4!WF``S9*^5eyos_kdu4|4=f>;Js|k}UqA7%;`_07 zFXAcHj0o|ab&@n#jTJpFhDYhUBesT9ewtu)DR;JQ z6qzJYy9fYc0T2s-SOCNVAojZi#D1rJhOlIPZHFVQ)38p%It}YItkbYg4?0=-T)j^W z$OK!bfnFatqa0<8$(G&IYV24qG6XLUQpt~K^_HYrM5^@r9vR8V@`F^ZFP)-899_aPF8`tKK0u>ji zxV@2C0<~oc)G`h^4LJ=t4LJ>@>w`Tn#yH#;V?iHYj2(+%(eFj*dAfdc-Rhv;px&U~ z|1I?fdOgtVX?QcH^AR8I9FoG+(yU2m!c~D@5A^ye4sYDZN-l>d+`eiXGl@st1PU)u zc!9#(yFGvW_N_y1LT*BCLT>WbYN-Jo7;Y`B=hr(1fdu`dbp|LcDD8Kuv_P*1dOhM% zH(5uEf|%Be_Xfri0I>jw1wd?XWR`&K;sP|lK~6(XLrz0ZfB(>DAfNW08<{dS<)bswSkAM4rn$Hh=EjfZ77keDPz1c9&!OLuLdDcQkyEgUUhAF!} zHOXaOW|Ma-4%czfw7jrEC2rh|Svwafq@G~QmToqTN*IlVOXeS|stC)%o?P&O(~-yJ2WgV{ z5o=VenQQVTH>V8k?#q`b$)A8OXaj-0n+{ z*GVS(U!cCX+hsuCDqS_lJMr;4KWP(#n%vm|OXw5zri;nTS*^UOu8#2e2kzqgnTWLx z4rkg}-=|pmAFR`}P}oUrpHXyGZKip2^$W7n(L9N8@`h;A;;Tb#q+#A*-XP!X zA>TkJK__8-gMGHCT)h-;;U_dH zAZ~JidBei1LG|{>^CI3S-`uz?SXOV`zFRC6H+zv=G^T@H)vH{}@b*yD98P_pvpaU! z)AB^bBc)RR?m?=z%<|{C*1vBpG=9KH^^j3`I|3iNA=)J*B3|7ai{3(-phvP`f2n*T z)3yUsB|hiMeM<|f?*+VBv+4m^9D4NJxi@K}_36i>(#XSJ?2#}IzL|D8^Mu3|le1a@ zL97{g+0txxY+%9!#P{N|b@G|oQCV0b7(1w!gGhJ00A%f3DXJL`Gqn9*$HC%(b@@O3u`BAg`?kAe)wCDG{P=y?g?t^2XZGbCNw>6g8)Hu}F z?zN;v!FysWHnyPylWR+N_ReLo@3QcuMqL{sR?W$)h5WkEtNMn;TEcyj@Mcn#9q&N3 zH*r^eCK^>9eJtd^gBb*+=Nx9-g1izlYV7%wQiX)d;4!o@rZ`vJ3t+lzEm{tr5w B5Z3?z diff --git a/book/src-bak/regular_backgrounds.md b/book/src-bak/regular_backgrounds.md deleted file mode 100644 index 29675a8..0000000 --- a/book/src-bak/regular_backgrounds.md +++ /dev/null @@ -1,313 +0,0 @@ -# Regular Backgrounds - -So, backgrounds, they're cool. Why do we call the ones here "regular" -backgrounds? Because there's also "affine" backgrounds. However, affine math -stuff adds a complication, so for now we'll just work with regular backgrounds. -The non-affine backgrounds are sometimes called "text mode" backgrounds by other -guides. - -To get your background image working you generally need to perform all of the -following steps, though I suppose the exact ordering is up to you. - -## Tiled Video Modes - -When you want regular tiled display, you must use video mode 0 or 1. - -* Mode 0 allows for using all four BG layers (0 through 3) as regular - backgrounds. -* Mode 1 allows for using BG0 and BG1 as regular backgrounds, BG2 as an affine - background, and BG3 not at all. -* Mode 2 allows for BG2 and BG3 to be used as affine backgrounds, while BG0 and - BG1 cannot be used at all. - -We will not cover affine backgrounds in this chapter, so we will naturally be -using video mode 0. - -Also, note that you have to enable each background layer that you want to use -within the display control register. - -## Get Your Palette Ready - -Background palette starts at `0x5000000` and is 256 `u16` values long. It'd -potentially be possible declare a static array starting at a fixed address and -use a linker script to make sure that it ends up at the right spot in the final -program, but since we have to use volatile reads and writes with PALRAM anyway, -we'll just reuse our `VolatilePtr` type. Something like this: - -```rust -pub const PALRAM_BG_BASE: VolatilePtr = VolatilePtr(0x500_0000 as *mut u16); - -pub fn bg_palette(slot: usize) -> u16 { - assert!(slot < 256); - unsafe { PALRAM_BG_BASE.offset(slot as isize).read() } -} - -pub fn set_bg_palette(slot: usize, color: u16) { - assert!(slot < 256); - unsafe { PALRAM_BG_BASE.offset(slot as isize).write(color) } -} -``` - -As we discussed with the tile color depths, the palette can be utilized as a -single block of palette values (`[u16; 256]`) or as 16 palbanks of 16 palette -values each (`[[u16;16]; 16]`). This setting is assigned per background layer -via IO register. - -## Get Your Tiles Ready - -Tile data is placed into charblocks. A charblock is always 16kb, so depending on -color depth it will have either 256 or 512 tiles within that charblock. -Charblocks 0, 1, 2, and 3 are all for background tiles. That's a maximum of 2048 -tiles for backgrounds, but as you'll see in a moment a particular tilemap entry -can't even index that high. Instead, each background layer is assigned a -"character base block", and then tilemap entries index relative to the character -base block of that background layer. - -Now, if you want to move in a lot of tile data you'll probably want to use a DMA -routine, or at least write a function like memcopy32 for fast `u32` copying from -ROM into VRAM. However, for now, and because we're being very explicit since -this is our first time doing it, we'll write it as functions for individual tile -reads and writes. - -The math works like indexing a pointer, except that we have two sizes we need to -go by. First you take the base address for VRAM (`0x600_0000`), then add the -size of a charblock (16kb) times the charblock you want to place the tile -within, and then you add the index of the tile slot you're placing it into times -the size of that type of tile. Like this: - -```rust -pub fn bg_tile_4bpp(base_block: usize, tile_index: usize) -> Tile4bpp { - assert!(base_block < 4); - assert!(tile_index < 512); - let address = VRAM + size_of::() * base_block + size_of::() * tile_index; - unsafe { VolatilePtr(address as *mut Tile4bpp).read() } -} - -pub fn set_bg_tile_4bpp(base_block: usize, tile_index: usize, tile: Tile4bpp) { - assert!(base_block < 4); - assert!(tile_index < 512); - let address = VRAM + size_of::() * base_block + size_of::() * tile_index; - unsafe { VolatilePtr(address as *mut Tile4bpp).write(tile) } -} - -pub fn bg_tile_8bpp(base_block: usize, tile_index: usize) -> Tile8bpp { - assert!(base_block < 4); - assert!(tile_index < 256); - let address = VRAM + size_of::() * base_block + size_of::() * tile_index; - unsafe { VolatilePtr(address as *mut Tile8bpp).read() } -} - -pub fn set_bg_tile_8bpp(base_block: usize, tile_index: usize, tile: Tile8bpp) { - assert!(base_block < 4); - assert!(tile_index < 256); - let address = VRAM + size_of::() * base_block + size_of::() * tile_index; - unsafe { VolatilePtr(address as *mut Tile8bpp).write(tile) } -} -``` - -For bulk operations, you'd do the exact same math to get your base destination -pointer, and then you'd get the base source pointer for the tile you're copying -out of ROM, and then you'd do the bulk copy for the correct number of `u32` -values that you're trying to move (8 per tile moved for 4bpp, or 16 per tile -moved for 8bpp). - -**GBA Limitation Note:** on a modern PC (eg: `x86` or `x86_64`) you're probably -used to index based loops and iterator based loops being the same speed. The CPU -has the ability to do a "fused multiply add", so the base address of the array -plus desired index * size per element is a single CPU operation to compute. It's -slightly more complicated if there's arrays within arrays like there are here, -but with normal arrays it's basically the same speed to index per loop cycle as -it is to take a base address and then add +1 offset per loop cycle. However, the -GBA's CPU _can't do any of that_. On the GBA, there's a genuine speed difference -between looping over indexes and then indexing each loop (slow) compared to -using an iterator that just stores an internal pointer and does +1 offset per -loop until it reaches the end (fast). The repeated indexing itself can by itself -be an expensive step. If it's like a 3 element array it's no big deal, but if -you've got a big slice of data to process, be sure to go over it with `.iter()` -and `.iter_mut()` if you can, instead of looping by index. This is Rust and all, -so probably you were gonna do that anyway, but just a heads up. - -## Get your Tilemap ready - -I believe that at one point I alluded to a tilemap existing. Well, just as the -tiles are arranged into charblocks, the data describing what tile to show in -what location is arranged into a thing called a **screenblock**. - -A screenblock is placed into VRAM the same as the tile data charblocks. Starting -at the base of VRAM (`0x600_0000`) there are 32 slots for the screenblock array. -Each screenblock is 2048 bytes (`0x800`). Naturally, if our tiles are using up -charblock space within VRAM and our tilemaps are using up screenblock space -within the same VRAM... well it would just be a _disaster_ if they ran in to -each other. Once again, it's up to you as the programmer to determine how much -space you want to devote to each thing. Each complete charblock uses up 8 -screenblocks worth of space, but you don't have to fill a complete charblock -with tiles, so you can be very fiddly with how you split the memory. - -Each screenblock is composed of a series of _screenblock entry_ values, which -describe what tile index to use and if the tile should be flipped and what -palbank it should use (if any). Because both regular backgrounds and affine -backgrounds are composed of screenblocks with entries, and because the affine -background has a smaller format for screenblock entries, we'll name -appropriately. - -```rust -#[derive(Clone, Copy)] -#[repr(transparent)] -pub struct RegularScreenblock { - pub data: [RegularScreenblockEntry; 32 * 32], -} - -#[derive(Debug, Clone, Copy, Default)] -#[repr(transparent)] -pub struct RegularScreenblockEntry(u16); -``` - -So, with one entry per tile, a single screenblock allows for 32x32 tiles worth of -background. - -The format of a regular screenblock entry is quite simple compared to some of -the IO register stuff: - -* 10 bits for tile index (base off of the character base block of the background) -* 1 bit for horizontal flip -* 1 bit for vertical flip -* 4 bits for picking which palbank to use (if 4bpp, otherwise it's ignored) - -```rust -impl RegularScreenblockEntry { - pub fn tile_id(self) -> u16 { - self.0 & 0b11_1111_1111 - } - pub fn set_tile_id(&mut self, id: u16) { - self.0 &= !0b11_1111_1111; - self.0 |= id; - } - pub fn horizontal_flip(self) -> bool { - (self.0 & (1 << 0xA)) > 0 - } - pub fn set_horizontal_flip(&mut self, bit: bool) { - if bit { - self.0 |= 1 << 0xA; - } else { - self.0 &= !(1 << 0xA); - } - } - pub fn vertical_flip(self) -> bool { - (self.0 & (1 << 0xB)) > 0 - } - pub fn set_vertical_flip(&mut self, bit: bool) { - if bit { - self.0 |= 1 << 0xB; - } else { - self.0 &= !(1 << 0xB); - } - } - pub fn palbank_index(self) -> u16 { - self.0 >> 12 - } - pub fn set_palbank_index(&mut self, palbank_index: u16) { - self.0 &= 0b1111_1111_1111; - self.0 |= palbank_index << 12; - } -} -``` - -Now, at either 256 or 512 tiles per charblock, you might be thinking that with a -10 bit index you can index past the end of one charblock and into the next. -You'd be right, mostly. - -As long as you stay within the background memory region for charblocks (that is, -0 through 3), then it all works out. However, if you try to get the background -rendering to reach outside of the background charblocks you'll get an -implementation defined result. It's not the dreaded "undefined behavior" we're -often worried about in programming, but the results _are_ determined by what -you're running the game on. With GBA hardware you get a bizarre result -(basically another way to put garbage on the screen). With a DS it acts as if -the tiles were all 0s. If you use an emulator it might or might not allow for -you to do this, it's up to the emulator writers. - -## Set Your IO Registers - -Instead of being just a single IO register to learn about this time, there's two -separate groups of related registers. - -### Background Control - -* BG0CNT (`0x400_0008`): BG0 Control -* BG1CNT (`0x400_000A`): BG1 Control -* BG2CNT (`0x400_000C`): BG2 Control -* BG3CNT (`0x400_000E`): BG3 Control - -Each of these are a read/write `u16` location. This is where we get to all of -the important details that we've been putting off. - -* 2 bits for the priority. -* 2 bits for "character base block", the charblock that all of the tile indexes - for this background are offset from. -* 1 bit for mosaic effect being enabled (we'll get to that below). -* 1 bit to enable 8bpp, otherwise 4bpp is used. -* 5 bits to pick the "screen base block", the screen block that serves as the - _base_ value for this background. -* 1 bit that is _not_ used in regular mode, but in affine mode it can be enabled - to cause the affine background to wrap around at the edges. -* 2 bits for the background size. - -The size works a little funny. When size is 0 only the base screen block is -used. If size is 1 or 2 then the base screenblock and the following screenblock -are placed next to each other (horizontally for 1, vertically for 2). If the -size is 3 then the base screenblock and the following three screenblocks are -arranged into a 2x2 grid of screenblocks. - -### Background Offset - -* BG0HOFS (`0x400_0010`): BG0 X-Offset -* BG0VOFS (`0x400_0012`): BG0 Y-Offset -* BG1HOFS (`0x400_0014`): BG1 X-Offset -* BG1VOFS (`0x400_0016`): BG1 Y-Offset -* BG2HOFS (`0x400_0018`): BG2 X-Offset -* BG2VOFS (`0x400_001A`): BG2 Y-Offset -* BG3HOFS (`0x400_001C`): BG3 X-Offset -* BG3VOFS (`0x400_001E`): BG3 Y-Offset - -Each of these are a _write only_ `u16` location. Bits 0 through 8 are used, so -the offsets can be 0 through 511. They also only apply in regular backgrounds. -If a background is in an affine state then you'll use different IO registers to -control it (discussed in a later chapter). - -The offset that you assign determines the pixel offset of the display area -relative to the start of the background scene, as if the screen was a camera -looking at the scene. In other words, as a BG X offset value increases, you can -think of it as the camera moving to the right, or as that background moving to -the left. Like when mario walks toward the goal. Similarly, when a BG Y offset -increases the camera is moving down, or the background is moving up, like when -mario falls down from a high platform. - -Depending on how much the background is scrolled and the size of the background, -it will loop. - -## Mosaic - -As a special effect, you can apply mosaic to backgrounds and objects. It's just -a single flag for each background, so all backgrounds will use the same mosaic -settings when they have it enabled. What it actually does is split the normal -image into "blocks" and then each block gets the color of the top left pixel of -that block. This is the effect you see when link hits an electric foe with his -sword and the whole screen "buzzes" at you. - -The mosaic control is a _write only_ `u16` IO register at `0x400_004C`. - -There's 4 bits each for: - -* Horizontal BG stretch -* Vertical BG stretch -* Horizontal object stretch -* Vertical object stretch - -The inputs should be 1 _less_ than the desired block size. So if you set a -stretch value of 5 then pixels 0-5 would be part of the first block (6 pixels), -then 6-11 is the next block (another 6 pixels) and so on. - -If you need to make a pixel other than the top left part of each block the one -that determines the mosaic color you can carefully offset the background or -image by a tiny bit, but of course that makes every mosaic block change its -target pixel. You can't change the target pixel on a block by block basis. diff --git a/book/src-bak/regular_objects.md b/book/src-bak/regular_objects.md deleted file mode 100644 index f32c86f..0000000 --- a/book/src-bak/regular_objects.md +++ /dev/null @@ -1,417 +0,0 @@ -# Regular Objects - -As with backgrounds, objects can be used in both an affine and non-affine way. -For this section we'll focus on the non-affine elements, and then we'll do all -the affine stuff in a later chapter. - -## Objects vs Sprites - -As [TONC](https://www.coranac.com/tonc/text/regobj.htm) helpfully reminds us -(and then proceeds to not follow its own advice), we should always try to think -in terms of _objects_, not _sprites_. A sprite is a logical / software concern, -perhaps a player concern, whereas an object is a hardware concern. - -What's more, a given sprite that the player sees might need more than one object -to display. Objects must be either square or rectangular (so sprite bits that -stick out probably call for a second object), and can only be from 8x8 to 64x64 -(so anything bigger has to be two objects lined up to appear as one). - -## General Object Info - -Unlike with backgrounds, you can enable the object layer in any video mode. -There's space for 128 object definitions in OAM. - -The display gets a number of cycles per scanline to process objects: 1210 by -default, but only 954 if you enable the "HBlank interval free" setting in the -display control register. The [cycle cost per -object](http://problemkaputt.de/gbatek.htm#lcdobjoverview) depends on the -object's size and if it's using affine or regular mode, so enabling the HBlank -interval free setting doesn't cut the number of objects displayable by an exact -number of objects. The objects are processed in order of their definitions and -if you run out of cycles then the rest just don't get shown. If there's a -concern that you might run out of cycles you can place important objects (such -as the player) at the start of the list and then less important animation -objects later on. - -## Ready the Palette - -Objects use the palette the same as the background does. The only difference is -that the palette data for objects starts at `0x500_0200`. - -```rust -pub const PALRAM_OBJECT_BASE: VolatilePtr = VolatilePtr(0x500_0200 as *mut u16); - -pub fn object_palette(slot: usize) -> u16 { - assert!(slot < 256); - unsafe { PALRAM_OBJECT_BASE.offset(slot as isize).read() } -} - -pub fn set_object_palette(slot: usize, color: u16) { - assert!(slot < 256); - unsafe { PALRAM_OBJECT_BASE.offset(slot as isize).write(color) } -} -``` - -## Ready the Tiles - -Objects, as with backgrounds, are composed of 8x8 tiles, and if you want -something bigger than 8x8 you have to use more than one tile put together. -Object tiles go into the final two charblocks of VRAM (indexes 4 and 5). Because -there's only two of them, they are sometimes called the lower block -(`0x601_0000`) and the higher/upper block (`0x601_4000`). - -Tile indexes for sprites always offset from the base of the lower block, and -they always go 32 bytes at a time, regardless of if the object is set for 4bpp -or 8bpp. From this we can determine that there's 512 tile slots in each of the -two object charblocks. However, in video modes 3, 4, and 5 the space for the -background cuts into the lower charblock, so you can only safely use the upper -charblock. - -```rust -pub fn obj_tile_4bpp(tile_index: usize) -> Tile4bpp { - assert!(tile_index < 512); - let address = VRAM + size_of::() * 4 + 32 * tile_index; - unsafe { VolatilePtr(address as *mut Tile4bpp).read() } -} - -pub fn set_obj_tile_4bpp(tile_index: usize, tile: Tile4bpp) { - assert!(tile_index < 512); - let address = VRAM + size_of::() * 4 + 32 * tile_index; - unsafe { VolatilePtr(address as *mut Tile4bpp).write(tile) } -} - -pub fn obj_tile_8bpp(tile_index: usize) -> Tile8bpp { - assert!(tile_index < 512); - let address = VRAM + size_of::() * 4 + 32 * tile_index; - unsafe { VolatilePtr(address as *mut Tile8bpp).read() } -} - -pub fn set_obj_tile_8bpp(tile_index: usize, tile: Tile8bpp) { - assert!(tile_index < 512); - let address = VRAM + size_of::() * 4 + 32 * tile_index; - unsafe { VolatilePtr(address as *mut Tile8bpp).write(tile) } -} -``` - -With backgrounds you picked every single tile individually with a bunch of -screen entry values. Objects don't do that at all. Instead you pick a base tile, -size, and shape, then it figures out the rest from there. However, you may -recall back with the display control register something about an "object memory -1d" bit. This is where that comes into play. - -* If object memory is set to be 2d (the default) then each charblock is treated - as 32 tiles by 32 tiles square. Each object has a base tile and dimensions, - and that just extracts directly from the charblock picture as if you were - selecting an area. This mode probably makes for the easiest image editing. -* If object memory is set to be 1d then the tiles are loaded sequentially from - the starting point, enough to fill in the object's dimensions. This most - probably makes it the easiest to program with about things, since programming - languages are pretty good at 1d things. - -I'm not sure I explained that well, here's a picture: - -![2d1d-diagram](obj_memory_2d1d.jpg) - -In 2d mode, a new row of tiles starts every 32 tile indexes. - -Of course, the mode that you actually end up using is not particularly -important, since it should be the job of your image conversion routine to get -everything all lined up and into place anyway. - -## Set the Object Attributes - -The final step is to assign the correct attributes to an object. Each object has -three `u16` values that make up its overall attributes. - -Before we go into the details, I want to bring up that the hardware will attempt -to process every single object every single frame if the object layer is -enabled, and also that all of the GBA's object memory is cleared to 0 at -startup. Why do these two things matter right now? As you'll see in a second an -"all zero" set of object attributes causes an 8x8 object to appear at 0,0 using -object tile index 0. This is usually _not_ what you want your unused objects to -do. When your game first starts you should take a moment to mark any objects you -won't be using as objects to not render. - -### ObjectAttributes.attr0 - -* 8 bits for row coordinate (marks the top of the sprite) -* 2 bits for object rendering: 0 = Normal, 1 = Affine, 2 = Disabled, 3 = Affine with double rendering area -* 2 bits for object mode: 0 = Normal, 1 = Alpha Blending, 2 = Object Window, 3 = Forbidden -* 1 bit for mosaic enabled -* 1 bit 8bpp color enabled -* 2 bits for shape: 0 = Square, 1 = Horizontal, 2 = Vertical, 3 = Forbidden - -If an object is 128 pixels big at Y > 128 you'll get a strange looking result -where it acts like Y > -128 and then displays partly off screen to the top. - -### ObjectAttributes.attr1 - -* 9 bit for column coordinate (marks the left of the sprite) -* Either: - * 3 empty bits, 1 bit for horizontal flip, 1 bit for vertical flip (non-affine) - * 5 bits for affine index (affine) -* 2 bits for size. - -| Size | Square | Horizontal | Vertical| -|:----:|:------:|:----------:|:-------:| -| 0 | 8x8 | 16x8 | 8x16 | -| 1 | 16x16 | 32x8 | 8x32 | -| 2 | 32x32 | 32x16 | 16x32 | -| 3 | 64x64 | 64x32 | 32x64 | - -### ObjectAttributes.attr2 - -* 10 bits for the base tile index -* 2 bits for priority -* 4 bits for the palbank index (4bpp mode only, ignored in 8bpp) - -### ObjectAttributes summary - -So I said in the GBA memory mapping section that C people would tell you that -the object attributes should look like this: - -```rust -#[repr(C)] -pub struct ObjectAttributes { - attr0: u16, - attr1: u16, - attr2: u16, - filler: i16, -} -``` - -Except that: - -1) It's wasteful when we store object attributes on their own outside of OAM - (which we definitely might want to do). -2) In Rust we can't access just one field through a volatile pointer (our - pointers aren't actually volatile to begin with, just the ops we do with them - are). We have to read or write the whole pointer's value at a time. - Similarly, we can't do things like `|=` and `&=` with volatile in Rust. So in - rust we can't have a volatile pointer to an ObjectAttributes and then write - to just the three "real" values and not touch the filler field. Having the - filler value in there just means we have to dance around it more, not less. -3) We want to newtype this whole thing to prevent accidental invalid states from - being written into memory. - -So we will not be using that representation. At the same time we want to have no -overhead, so we will stick to three `u16` values. We could newtype each -individual field to be its own type (`ObjectAttributesAttr0` or something silly -like that), since there aren't actual dependencies between two different fields -such that a change in one can throw another into a forbidden state. The worst -that can happen is if we disable or enable affine mode (`attr0`) it can change -the meaning of `attr1`. The changed meaning isn't actually in invalid state -though, so we _could_ make each field its own type if we wanted. - -However, when you think about it, I can't imagine a common situation where we do -something like make an `attr0` value that we then want to save on its own and -apply to several different `ObjectAttributes` that we make during a game. That -just doesn't sound likely to me. So, we'll go the route where `ObjectAttributes` -is just a big black box to the outside world and we don't need to think about -the three fields internally as being separate. - -First we make it so that we can get and set object attributes from memory: - -```rust -pub const OAM: usize = 0x700_0000; - -pub fn object_attributes(slot: usize) -> ObjectAttributes { - assert!(slot < 128); - let ptr = VolatilePtr((OAM + slot * (size_of::() * 4)) as *mut u16); - unsafe { - ObjectAttributes { - attr0: ptr.read(), - attr1: ptr.offset(1).read(), - attr2: ptr.offset(2).read(), - } - } -} - -pub fn set_object_attributes(slot: usize, obj: ObjectAttributes) { - assert!(slot < 128); - let ptr = VolatilePtr((OAM + slot * (size_of::() * 4)) as *mut u16); - unsafe { - ptr.write(obj.attr0); - ptr.offset(1).write(obj.attr1); - ptr.offset(2).write(obj.attr2); - } -} - -#[derive(Debug, Clone, Copy, Default)] -pub struct ObjectAttributes { - attr0: u16, - attr1: u16, - attr2: u16, -} -``` - -Then we add a billion methods to the `ObjectAttributes` type so that we can -actually set all the different values that we want to set. - -This code block is the last thing on this page so if you don't wanna scroll past -the whole thing you can just go to the next page. - -```rust -#[derive(Debug, Clone, Copy)] -pub enum ObjectRenderMode { - Normal, - Affine, - Disabled, - DoubleAreaAffine, -} - -#[derive(Debug, Clone, Copy)] -pub enum ObjectMode { - Normal, - AlphaBlending, - ObjectWindow, -} - -#[derive(Debug, Clone, Copy)] -pub enum ObjectShape { - Square, - Horizontal, - Vertical, -} - -#[derive(Debug, Clone, Copy)] -pub enum ObjectOrientation { - Normal, - HFlip, - VFlip, - BothFlip, - Affine(u8), -} - -impl ObjectAttributes { - pub fn row(&self) -> u16 { - self.attr0 & 0b1111_1111 - } - pub fn column(&self) -> u16 { - self.attr1 & 0b1_1111_1111 - } - pub fn rendering(&self) -> ObjectRenderMode { - match (self.attr0 >> 8) & 0b11 { - 0 => ObjectRenderMode::Normal, - 1 => ObjectRenderMode::Affine, - 2 => ObjectRenderMode::Disabled, - 3 => ObjectRenderMode::DoubleAreaAffine, - _ => unimplemented!(), - } - } - pub fn mode(&self) -> ObjectMode { - match (self.attr0 >> 0xA) & 0b11 { - 0 => ObjectMode::Normal, - 1 => ObjectMode::AlphaBlending, - 2 => ObjectMode::ObjectWindow, - _ => unimplemented!(), - } - } - pub fn mosaic(&self) -> bool { - ((self.attr0 << 3) as i16) < 0 - } - pub fn two_fifty_six_colors(&self) -> bool { - ((self.attr0 << 2) as i16) < 0 - } - pub fn shape(&self) -> ObjectShape { - match (self.attr0 >> 0xE) & 0b11 { - 0 => ObjectShape::Square, - 1 => ObjectShape::Horizontal, - 2 => ObjectShape::Vertical, - _ => unimplemented!(), - } - } - pub fn orientation(&self) -> ObjectOrientation { - if (self.attr0 >> 8) & 1 > 0 { - ObjectOrientation::Affine((self.attr1 >> 9) as u8 & 0b1_1111) - } else { - match (self.attr1 >> 0xC) & 0b11 { - 0 => ObjectOrientation::Normal, - 1 => ObjectOrientation::HFlip, - 2 => ObjectOrientation::VFlip, - 3 => ObjectOrientation::BothFlip, - _ => unimplemented!(), - } - } - } - pub fn size(&self) -> u16 { - self.attr1 >> 0xE - } - pub fn tile_index(&self) -> u16 { - self.attr2 & 0b11_1111_1111 - } - pub fn priority(&self) -> u16 { - self.attr2 >> 0xA - } - pub fn palbank(&self) -> u16 { - self.attr2 >> 0xC - } - // - pub fn set_row(&mut self, row: u16) { - self.attr0 &= !0b1111_1111; - self.attr0 |= row & 0b1111_1111; - } - pub fn set_column(&mut self, col: u16) { - self.attr1 &= !0b1_1111_1111; - self.attr2 |= col & 0b1_1111_1111; - } - pub fn set_rendering(&mut self, rendering: ObjectRenderMode) { - const RENDERING_MASK: u16 = 0b11 << 8; - self.attr0 &= !RENDERING_MASK; - self.attr0 |= (rendering as u16) << 8; - } - pub fn set_mode(&mut self, mode: ObjectMode) { - const MODE_MASK: u16 = 0b11 << 0xA; - self.attr0 &= MODE_MASK; - self.attr0 |= (mode as u16) << 0xA; - } - pub fn set_mosaic(&mut self, bit: bool) { - const MOSAIC_BIT: u16 = 1 << 0xC; - if bit { - self.attr0 |= MOSAIC_BIT - } else { - self.attr0 &= !MOSAIC_BIT - } - } - pub fn set_two_fifty_six_colors(&mut self, bit: bool) { - const COLOR_MODE_BIT: u16 = 1 << 0xD; - if bit { - self.attr0 |= COLOR_MODE_BIT - } else { - self.attr0 &= !COLOR_MODE_BIT - } - } - pub fn set_shape(&mut self, shape: ObjectShape) { - self.attr0 &= 0b0011_1111_1111_1111; - self.attr0 |= (shape as u16) << 0xE; - } - pub fn set_orientation(&mut self, orientation: ObjectOrientation) { - const AFFINE_INDEX_MASK: u16 = 0b1_1111 << 9; - self.attr1 &= !AFFINE_INDEX_MASK; - let bits = match orientation { - ObjectOrientation::Affine(index) => (index as u16) << 9, - ObjectOrientation::Normal => 0, - ObjectOrientation::HFlip => 1 << 0xC, - ObjectOrientation::VFlip => 1 << 0xD, - ObjectOrientation::BothFlip => 0b11 << 0xC, - }; - self.attr1 |= bits; - } - pub fn set_size(&mut self, size: u16) { - self.attr1 &= 0b0011_1111_1111_1111; - self.attr1 |= size << 14; - } - pub fn set_tile_index(&mut self, index: u16) { - self.attr2 &= !0b11_1111_1111; - self.attr2 |= 0b11_1111_1111 & index; - } - pub fn set_priority(&mut self, priority: u16) { - self.attr2 &= !0b0000_1100_0000_0000; - self.attr2 |= (priority & 0b11) << 0xA; - } - pub fn set_palbank(&mut self, palbank: u16) { - self.attr2 &= !0b1111_0000_0000_0000; - self.attr2 |= (palbank & 0b1111) << 0xC; - } -} -``` diff --git a/book/src-bak/screenshot_checkers.png b/book/src-bak/screenshot_checkers.png deleted file mode 100644 index dc6d71af64090dccd403339157b2887b1a052993..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5546 zcmZ`-2{@Er-+so}BFxA#Atp;ycA`R5V@s(1itO2U$!<(k3WZ@*_BFdwh_Mqw*|SX6 zkzHdSgJH&e)BC;u_kF+b`o8D7p65K*^*q-(=Xd|^`~IDCqHh`LaLG2crEo2S51P_*D}IPOqNnQvkvVa zLN2rkz+r~UPo18Bvh_5zP0Y+q$gaPe*73pBVBGWE+RCLj!!L`wu?P24S4j@ zOa?M581L+qxpCOAf!siTR8~}Mh1tif)RnoeX*Lx5tVcT^T2n89;>|r6f-a76f+4Dkii+yOXoqV!e|?ODsG@s#(ekK=%-A#o zBuv?G$Z#{<^rStaydw1I2(ChO%|qb;ST}Bja~psT+oZtEm#@%9s?-gU!v)*fk|xr%CZ*rAP}JP_A%;OdMOG zZb((+HHY~pC2{nA>h+JA^zjWzK)F;eVdhV-EI#FW?XfhlyzsMOC&8jNzgjfSd+(H2 zP)Swf%Tj_W-#$99=@zzB%+}h{syX3lU@W#eU3GtgI@rm1>n)qeBaCloRzb z9eP_={cp~rACbZMqvTf{u7_X-*696mrWw*ePwn~__UZ?t7kXvUMO3RyPTcae^1=B? zOkiisVOe^w6=uWs_X1HwXm8oneoGnq$iOqFGAFLn#>d~E^ETIQxRmdC?C?GQ!2LN5 z>?7@a&XOOBde)*O9DL7kv+8%?i&RIwDc8fu)D8y9GF z7Q6lBWw+e$$(+e@che$$;|<4iJ>N**M6pHj>GfQL&$##&s&`}50>~4!qc1BsH*|K4 z8F~a}1XW>F9+kn3ZlDqAy47~&6?MuFN4nvI4w-fxdO_>?vAU9FS?h4H)7%*JYqrXL zrEq#3w!hhyGdMlw`rPz$W{zgwSbs&#S4{2;o?CR-bN85>e|5a{$nE3*&AG6DS*>YN z16w3tPmC=;RQ#&Vl;OP(9^r4%n9nogf$-3nlA?bTwAVm}vONMdTg&K(gXOlJS4QSp z*RAD()#sb01y_|CQQkI{{mCi=6S$qB0?OB~?0l1Tc~b7vo?$pt)8W9h(vXwhJJYjM zIeU9F39CvWh_mVY>=$))nX|tSVoCQS1%Lkz;-XE@j#`~U4C5vY5Ko?jLu1Bxt4O>E zM26fjS%RB&CQ;!mTvGCITYpZ5>3#!?iKP|IcHvvQp6zzb6()qIwXSCBbskbr?|X|f zCz(1$$`_gW#+824E3oy8A>WIe0^%zw7m+Cw?;PiOg5@%{^(xCgfedjML?zzGG_}=R zAft-r9F{uXTMMI3Nvf%qb$yc%&0Q6mUfFO(m6TQ0)IHXoiS2-8KQ@Tde6TOLnVD!* zLVDGuYb!r>as*aOjJ?;EAxayte|j~_!cLg<0v$c4?ZP6#X4aR1GWC7IJ}GFGWT7dv z9xv{C@Oj6bzmGdu$wj}@U4w2z8bfbRzN=fW;%-T1+}jZQneMlHcYYp1;=x-aWEeIz zXdw{VUx%TrJDH<}zs)-jk6rz7OoU_<&+IVX&*xl`=Ivw?94sW+THwryudP^GM_wUC zGvON~8ZC(qmM7Qi6)F60oGjNjC@%ZC1xWv31jg>7T&<`*cCdpHD*i9@= z1_Enx`m%pLw7EKHwsE`=iq$ z-7n-eCTJ-n!rQm6+bumJh=BXpM1T3105K$`^TY=rN)L0Pv*Ajoy1D#AT+7#rC&ClU z14V`@S66NhP1UWR8mTMn*F%39{1aVBDS<#x<%ZtI|6-;E;b7o>V~$CSfhBc^8@w8l z8K|l+muBj!QeJSLsuy@vy?O5DyW>83o1tOiH5S4$@WF8|V~uVn z_f#L5fv&EuCe)#w8F%p7yOrxy@q|9*v9;Q_mzKtNQg(la8cNl^u@KIiN+#;N(hI=7 zGD}zNgr|yyxp``@(_cK=w|OohV5)a?58jvF$J#WKcN5;Ll}**9sC2}#gETCp$8)mP27%9-HB0nqvw2NAl_r}70^@Yf%eHoz7^M}W=BI4 z_|z5ks!M@(5x(l}1cicagL&l>T>xC-*RKy!FUtt2Gy$dy4jJlrqq29vO~zk7vig`3 z*VF`1jUpU~$G`^rH#iZ9s|@nFHkD0JIl_x$q0=vO8SAvHKPHj+zq+S((9Ym)17{e7 zrz@n9jn?{@DC1|3K%27hopW0ujn*Gk$I9r1t-)HZSxy=K*b2w1eH$&T;dY!eXa}h- zoT{Ak9DwjwdrmC+T_&#$_6F*i;yze(Dut8Hvm23nXC@v1vp2Q1pHF!i8yK+H^05Lw zPk`_NEkMf!4#1TaShMpxcCz!gb@*!qy8kqq?W$hs+2(HMUU}(KmTaLB#|rbE8nQZR z&|JTYe4P^4W7MBA7w#AuL~zgUm)_s!ZoTN9s)fH;?igP2LaFK1%DBqX?de68(dtWb zeVHllW4C$tTF)+bXB43!FG_vteTU90?V3FvE=u>i;1#Ii@u9>VyV6yx)G_^*YM3^9 zS7P1%pm_XCSj)3CO0(0oQZz&GO#6!`Cj<|SL)X7g;Ek%4iqV7?;qg!Y1m8-hosO=8l&UYdNjGmD83f75G})oZ z>QbetQ=cB5jgNCTk1}`58!`0DQ^5zVV7Y?4YNqdqy-`&eZGYX~Tf@1U!{NTYJ(|o5 z`Qq6uxDdN>rzZb-ViOx%aNg2~F-eqCdcCc$=q$*ot;au1Ew}KW9b>Om@xhqq2q1_Q`5dA+r4mJkZQn>%z`f6XBi&B z71aayAN1+mS`5W$kFGU4l7BUu=e_iTblY{lFI~{5m0#S8bPU@e%-v@WXq>JkSQSE> zO4E8W2Dfa7?M&{Be)9DdhTi=!fp8QYolo#iqYdENk{Iqqe*G(I>B!~V1jQ^{%xd&g zm0Re@^Q1L&SXp?v-+Tye<)gee(LRk%4aer8y)$8_keoGLq_Rp6{l`2Dku*1GbF5%n z#m{oWpWN`MQd1J5{X{dOVkN@`47o9oC>?#S1v$ z94!$I*%|??8tP#7A=76@2Y#l+Ra2#|iOG(iMj20QWVo}HWigVyGjywJ$R_E?4ZXqdv3(j&)veO8%YhQ95xgh$MF12GEPDLPx3 zY)rP8ciaki9tiv8lj;{QCQD}lda!evTrIq|UNkFNLP}?nb;!3zH0mCn3;EO7{P43f zVeHhH-|2xGSr!I3?=58gxSpiW`wgmQyioUvSRlEs=u*C1<;+B`_=E(&%`(ae&iPNb za?1n%<@lI?_Y=CKskV0Pk)|R55nu&aIT(QmW(MFI7zF(5f6+{qqA}xWWcRvz!E2WAlMH$#Hf76A^+(?QO~E5H*bvh|aJh<*5dh=qsRj z`C*(jGFG9$)zi3)%nOB{QX3CYn-yVY=%D*&Yd=`}5D`qhbT0Gnf#%G(KJB=p%~BcS zLtrOu$zmU+i9zaKs!$n;%Snho@W8Jegbo^7Lw2T3UaWruWTf;~pya-|}ilO98;0mPY0yb)x0&S0FIr#3#jy&FsjZy7nSlWWc+LyqI%6@@S_}^S&@M z+|jJ|k)yu8zP-J@qvOKLP$Aif+^dS0K7Zb#&d>Ex2G};2ydKz$oNYfA**1v7b?zPK zqx1wPUM!$Hy()iF_{Wmah!8NGWw>6iY}*#Refk%mB?+jqu62hV?O;2vhW zQDOjy$LWfoKIi2**xI&CGPRCmYw?UvOsoYUO-*rxkIk8hiHRvGm z4bM;ubaHksEG#4@bpcvDHw+A-rOuxx%zSwkP@l080fb9!%0g@)_N(o1*ncz13Jzy8 z6el-hXW$OP&mY_JW**cRJ}oC{Mcuv>P-*H=8cGCNGrDTnG4k<1^--Q^<6o-MtCM| zj~gmeL>}b_tG$vj1nFtWGXvMeX!8*0AX4oyrQ4yNL2EW4E374f9z2Kn;a7qnINq%b z3U`J0d19c06IA-%*1Rh_%+5)9)l^m?DDM4t+sqxUz-Cr z4v+S1R##VVrm6uqVa-h~G#WKReD=PXsgn0=R{)w`f)h!$egctc`s5N>Bj)GgP zS&?+l$Q>FnL?D{O=EC*~u+NmN{S1U||4S6=}ar<*w4LLzMIB-@$3n#Tm}@w^IMZ z$P!M)nd4nNE=~zD&f+4!gc)g zgqq`G-~+BhmsPPcmWS$_l)=EhL8PaKczBla|1B1qp?_qHUIGKQpns;o{xr~GHcJKI59B*y;9R^wu1Maw z9`qKb27v5nnK_7J#Ltd-0r5iD`oS=}f0dfv-d>DIQ>3@q@uvw8EjRnOPkOLD^?;tf z_=ygKR|>#u;12g$Skqs^b1yY!2+^1#JK59i;JTt=|gq@PU9ZMi)5vr_elX9l$KJux8;7uN2{>0@%GQVY($WB}!f&&Aq%^~2I0G03Z2 z%eBFXk5GiYI@`=K;#RoeN31IhIkZH9pB~jdW5(tnSPo3_-3djks{{C1i*&qncU>8= z`7{yn)+)>e&U9;JYf3PE!aY@GTJ?o|Xx$4aO}Cx$tGkQeFvj6+XFS0UJ>_@bM=# = VolatilePtr(0x400_0130 as *mut u16); - -/// A newtype over the key input state of the GBA. -#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] -#[repr(transparent)] -pub struct KeyInputSetting(u16); - -pub fn key_input() -> KeyInputSetting { - unsafe { KeyInputSetting(KEYINPUT.read()) } -} -``` - -Now we want a way to check if a key is _being pressed_, since that's normally -how we think of things as a game designer and even as a player. That is, usually -you'd say "if you press A, then X happens" instead of "if you don't press A, -then X does not happen". - -Normally we'd pick a constant for the bit we want, `&` it with our value, and -then check for `val != 0`. Since the bit we're looking for is `0` in the "true" -state we still pick the same constant and we still do the `&`, but we test with -`== 0`. Practically the same, right? Well, since I'm asking a rhetorical -question like that you can probably already guess that it's not the same. I was -shocked to learn this too. - -All we have to do is ask our good friend -[Godbolt](https://rust.godbolt.org/z/d-8oCe) what's gonna happen when the code -compiles. The link there has the page set for the `stable` 1.30 compiler just so -that the link results stay consistent if you read this book in a year or -something. Also, we've set the target to `thumbv6m-none-eabi`, which is a -slightly later version of ARM than the actual GBA, but it's close enough for -just checking. Of course, in a full program small functions like these will -probably get inlined into the calling code and disappear entirely as they're -folded and refolded by the compiler, but we can just check. - -It turns out that the `!=0` test is 4 instructions and the `==0` test is 6 -instructions. Since we want to get savings where we can, and we'll probably -check the keys of an input often enough, we'll just always use a `!=0` test and -then adjust how we initially read the register to compensate. By using xor with -a mask for only the 10 used bits we can flip the "low when pressed" values so -that the entire result has active bits in all positions where a key is pressed. - -```rust -pub fn key_input() -> KeyInputSetting { - unsafe { KeyInputSetting(KEYINPUT.read_volatile() ^ 0b0000_0011_1111_1111) } -} -``` - -Now we add a method for seeing if a key is pressed. In the full library there's -a more advanced version of this that's built up via macro, but for this example -we'll just name a bunch of `const` values and then have a method that takes a -value and says if that bit is on. - -```rust -pub const KEY_A: u16 = 1 << 0; -pub const KEY_B: u16 = 1 << 1; -pub const KEY_SELECT: u16 = 1 << 2; -pub const KEY_START: u16 = 1 << 3; -pub const KEY_RIGHT: u16 = 1 << 4; -pub const KEY_LEFT: u16 = 1 << 5; -pub const KEY_UP: u16 = 1 << 6; -pub const KEY_DOWN: u16 = 1 << 7; -pub const KEY_R: u16 = 1 << 8; -pub const KEY_L: u16 = 1 << 9; - -impl KeyInputSetting { - pub fn contains(&self, key: u16) -> bool { - (self.0 & key) != 0 - } -} -``` - -Because each key is a unique bit you can even check for more than one key at -once by just adding two key values together. - -```rust -let input_contains_a_and_l = input.contains(KEY_A + KEY_L); -``` - -And we wanted to save the state of an old frame and compare it to the current -frame to see what was different: - -```rust - pub fn difference(&self, other: KeyInputSetting) -> KeyInputSetting { - KeyInputSetting(self.0 ^ other.0) - } -``` - -Anything that's "in" the difference output is a key that _changed_, and then if -the key reads as pressed this frame that means it was just pressed. The exact -mechanics of all the ways you might care to do something based on new key -presses is obviously quite varied, but it might be something like this: - -```rust -let this_frame_diff = this_frame_input.difference(last_frame_input); - -if this_frame_diff.contains(KEY_B) && this_frame_input.contains(KEY_B) { - // the user just pressed B, react in some way -} -``` - -And for the arrow pad, we'll make an enum that easily casts into `i32`. Whenever -we're working with stuff we can try to use `i32` / `isize` as often as possible -just because it's easier on the GBA's CPU if we stick to its native number size. -Having it be an enum lets us use `match` and be sure that we've covered all our -cases. - -```rust -/// A "tribool" value helps us interpret the arrow pad. -#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] -#[repr(i32)] -pub enum TriBool { - Minus = -1, - Neutral = 0, - Plus = +1, -} -``` - -Now, how do we determine _which way_ is plus or minus? Well... I don't know. -Really. I'm not sure what the best one is because the GBA really wants the -origin at 0,0 with higher rows going down and higher cols going right. On the -other hand, all the normal math you and I learned in school is oriented with -increasing Y being upward on the page. So, at least for this demo, we're going -to go with what the GBA wants us to do and give it a try. If we don't end up -confusing ourselves then we can stick with that. Maybe we can cover it over -somehow later on. - -```rust - pub fn column_direction(&self) -> TriBool { - if self.contains(KEY_RIGHT) { - TriBool::Plus - } else if self.contains(KEY_LEFT) { - TriBool::Minus - } else { - TriBool::Neutral - } - } - - pub fn row_direction(&self) -> TriBool { - if self.contains(KEY_DOWN) { - TriBool::Plus - } else if self.contains(KEY_UP) { - TriBool::Minus - } else { - TriBool::Neutral - } - } -``` - -So then in our game, every frame we can check for `column_direction` and -`row_direction` and then apply those to the player's current position to make -them move around the screen. - -With that settled I think we're all done with user input for now. There's some -other things to eventually know about like key interrupts that you can set and -stuff, but we'll cover that later on because it's not necessary right now. diff --git a/book/src-bak/the_vcount_register.md b/book/src-bak/the_vcount_register.md deleted file mode 100644 index 20da70b..0000000 --- a/book/src-bak/the_vcount_register.md +++ /dev/null @@ -1,71 +0,0 @@ -# The VCount Register - -There's an IO register called -[VCOUNT](http://problemkaputt.de/gbatek.htm#lcdiointerruptsandstatus) that shows -you, what else, the Vertical (row) COUNT(er). It's a `u16` at address -`0x0400_0006`, and it's how we'll be doing our very poor quality vertical sync -code to start. - -* **What makes it poor?** Well, we're just going to read from the vcount value as - often as possible every time we need to wait for a specific value to come up, - and then proceed once it hits the point we're looking for. -* **Why is this bad?** Because we're making the CPU do a lot of useless work, - which uses a lot more power that necessary. Even if you're not on an actual - GBA you might be running inside an emulator on a phone or other handheld. You - wanna try to save battery if all you're doing with that power use is waiting - instead of making a game actually do something. -* **Can we do better?** We can, but not yet. The better way to do things is to - use a BIOS call to put the CPU into low power mode until a VBlank interrupt - happens. However, we don't know about interrupts yet, and we don't know about - BIOS calls yet, so we'll do the basic thing for now and then upgrade later. - -So the way that display hardware actually displays each frame is that it moves a -tiny pointer left to right across each pixel row one pixel at a time. When it's -within the actual screen width (240px) it's drawing out those pixels. Then it -goes _past_ the edge of the screen for 68px during a period known as the -"horizontal blank" (HBlank). Then it starts on the next row and does that loop -over again. This happens for the whole screen height (160px) and then once again -it goes past the last row for another 68px into a "vertical blank" (VBlank) -period. - -* One pixel is 4 CPU cycles -* HDraw is 240 pixels, HBlank is 68 pixels (1,232 cycles per full scanline) -* VDraw is 150 scanlines, VBlank is 68 scanlines (280,896 cycles per full refresh) - -Now you may remember some stuff from the display control register section where -it was mentioned that some parts of memory are best accessed during VBlank, and -also during hblank with a setting applied. These blanking periods are what was -being talked about. At other times if you attempt to access video or object -memory you (the CPU) might try touching the same memory that the display device -is trying to use, in which case you get bumped back a cycle so that the display -can finish what it's doing. Also, if you really insist on doing video memory -changes while the screen is being drawn then you might get some visual glitches. -If you can, just prepare all your changes ahead of time and then assign then all -quickly during the blank period. - -So first we want a way to check the vcount value at all: - -```rust -pub const VCOUNT: VolatilePtr = VolatilePtr(0x0400_0006 as *mut u16); - -pub fn vcount() -> u16 { - unsafe { VCOUNT.read() } -} -``` - -Then we want two little helper functions to wait until VBlank and vdraw. - -```rust -pub const SCREEN_HEIGHT: isize = 160; - -pub fn wait_until_vblank() { - while vcount() < SCREEN_HEIGHT as u16 {} -} - -pub fn wait_until_vdraw() { - while vcount() >= SCREEN_HEIGHT as u16 {} -} -``` - -And... that's it. No special types to be made this time around, it's just a -number we read out of memory. diff --git a/book/src-bak/tile_data.md b/book/src-bak/tile_data.md deleted file mode 100644 index 973c61c..0000000 --- a/book/src-bak/tile_data.md +++ /dev/null @@ -1,130 +0,0 @@ -# Tile Data - -When using the GBA's hardware graphics, if you want to let the hardware do most -of the work you have to use Modes 0, 1 or 2. However, to do that we first have -to learn about how tile data works inside of the GBA. - -## Tiles - -Fundamentally, a tile is an 8x8 image. If you want anything bigger than 8x8 you -need to arrange several tiles so that it looks like whatever you're trying to -draw. - -As was already mentioned, the GBA supports two different color modes: 4 bits per -pixel and 8 bits per pixel. This means that we have two types of tile that we -need to model. The pixel bits always represent an index into the PALRAM. - -* With 4 bits per pixel, the PALRAM is imagined to be 16 **palbank** sections of - 16 palette entries each. The image data selects the index within the palbank, - and an external configuration selects which palbank is used. -* With 8 bits per pixel, the PALRAM is imagined to be a single 256 entry array - and the index just directly picks which of the 256 colors is used. - -Knowing this, we can write the following definitions: - -```rust -#[derive(Debug, Clone, Copy, Default)] -#[repr(transparent)] -pub struct Tile4bpp { - pub data: [u32; 8] -} - -#[derive(Debug, Clone, Copy, Default)] -#[repr(transparent)] -pub struct Tile8bpp { - pub data: [u32; 16] -} -``` - -I hope this makes sense so far. At 4bpp, we have 4 bits per pixel, times 8 -pixels per line, times 8 lines: 256 bits required. Similarly, at 8 bits per -pixel we'll need 512 bits. Why are we defining them as arrays of `u32` values? -Because when it comes time to do bulk copies the fastest way to it will be to go -one whole machine word at a time. If we make the data inside the type be an -array of `u32` then it'll already be aligned for fast `u32` bulk copies. - -Keeping track of the current color depth is naturally the _programmer's_ -problem. If you get it wrong you'll see a whole ton of garbage pixels all over -the screen, and you'll probably be able to guess why. You know, unless you did -one of the other things that can make a bunch of garbage pixels show up all over -the screen. Graphics programming is fun like that. - -## Charblocks - -Tiles don't just sit on their own, they get grouped into **charblocks**. Long -ago in the distant past, video games were built with hardware that was also used -to make text terminals. So tile image data was called "character data". In fact -some guides will even call the regular mode for the background layers "text -mode", despite the fact that you obviously don't have to show text at all. - -A charblock is 16kb long (`0x4000` bytes), which means that the number of tiles -that fit into a charblock depends on your color depth. With 4bpp you get 512 -tiles, and with 8bpp there's 256 tiles. So they'd be something like this: - -```rust -#[derive(Clone, Copy)] -#[repr(transparent)] -pub struct Charblock4bpp { - pub data: [Tile4bpp; 512], -} - -#[derive(Clone, Copy)] -#[repr(transparent)] -pub struct Charblock8bpp { - pub data: [Tile8bpp; 256], -} -``` - -You'll note that we can't even derive `Debug` or `Default` any more because the -arrays are so big. Rust supports Clone and Copy for arrays of any size, but the -rest is still size 32 or less. We won't generally be making up an entire -Charblock on the fly though, so it's not a big deal. If we _absolutely_ had to, -we could call `core::mem::zeroed()`, but we really don't want to be trying to -build a whole charblock at runtime. We'll usually want to define our tile data -as `const` charblock values (or even parts of charblock values) that we then -load out of the game pak ROM at runtime. - -Anyway, with 16k per charblock and only 96k total in VRAM, it's easy math to see -that there's 6 different charblocks in VRAM when in a tiled mode. The first four -of these are for backgrounds, and the other two are for objects. There's rules -for how a tile ID on a background or object selects a tile within a charblock, -but since they're different between backgrounds and objects we'll cover that on -their own pages. - -## Image Editing - -It's very important to note that if you use a normal image editor you'll get -very bad results if you translate that directly into GBA memory. - -Imagine you have part of an image that's 16 by 16 pixels, aka 2 tiles by 2 -tiles. The data for that bitmap is the 1st row of the 1st tile, then the 1st row -of the 2nd tile. However, when we translate that into the GBA, the first 8 -pixels will indeed be the first 8 tile pixels, but then the next 8 pixels in -memory will be used as the _2nd row of the first tile_, not the 1st row of the -2nd tile. - -So, how do we fix this? - -Well, the simple but annoying way is to edit your tile image as being an 8 pixel -wide image and then have the image get super tall as you add more and more -tiles. It can work, but it's really impractical if you have any multi-tile -things that you're trying to do. - -Instead, there are some image conversion tools that devkitpro provides in their -gba-dev section. They let you take normal images and then repackage them and -export it in various formats that you can then compile into your project. - -Ketsuban uses the [grit](http://www.coranac.com/projects/grit/) tool, with the -following suggestions: - -1) Include an actual resource file and a file describing it somewhere in your - project (see [the grit - manual](http://www.coranac.com/man/grit/html/index.htm) for all details - involved here). -2) In a `build.rs` you run `grit` on each resource+description pair, such as in - this [old gist - example](https://gist.github.com/ketsuban/526fa55fbef0a3ccd4c7cd6204f29f94) -3) Then within your rust code you use the - [include_bytes!](https://doc.rust-lang.org/core/macro.include_bytes.html) - macro to have the formatted resource be available as a const value you can - load at runtime. diff --git a/book/src-bak/video_memory_intro.md b/book/src-bak/video_memory_intro.md deleted file mode 100644 index fb133cb..0000000 --- a/book/src-bak/video_memory_intro.md +++ /dev/null @@ -1,113 +0,0 @@ -# Video Memory Intro - -The GBA's Video RAM is 96k stretching from `0x0600_0000` to `0x0601_7FFF`. - -The Video RAM can only be accessed totally freely during a Vertical Blank (aka -"VBlank", though sometimes I forget and don't capitalize it properly). At other -times, if the CPU tries to touch the same part of video memory as the display -controller is accessing then the CPU gets bumped by a cycle to avoid a clash. - -Annoyingly, VRAM can only be properly written to in 16 and 32 bit segments (same -with PALRAM and OAM). If you try to write just an 8 bit segment, then both parts -of the 16 bit segment get the same value written to them. In other words, if you -write the byte `5` to `0x0600_0000`, then both `0x0600_0000` and ALSO -`0x0600_0001` will have the byte `5` in them. We have to be extra careful when -trying to set an individual byte, and we also have to be careful if we use -`memcopy` or `memset` as well, because they're byte oriented by default and -don't know to follow the special rules. - -## RGB15 - -As I said before, RGB15 stores a color within a `u16` value using 5 bits for -each color channel. - -```rust -pub const RED: u16 = 0b0_00000_00000_11111; -pub const GREEN: u16 = 0b0_00000_11111_00000; -pub const BLUE: u16 = 0b0_11111_00000_00000; -``` - -In Mode 3 and Mode 5 we write direct color values into VRAM, and in Mode 4 we -write palette index values, and then the color values go into the PALRAM. - -## Mode 3 - -Mode 3 is pretty easy. We have a full resolution grid of rgb15 pixels. There's -160 rows of 240 pixels each, with the base address being the top left corner. A -particular pixel uses normal "2d indexing" math: - -```rust -let row_five_col_seven = 5 + (7 * SCREEN_WIDTH); -``` - -To draw a pixel, we just write a value at the address for the row and col that -we want to draw to. - -## Mode 4 - -Mode 4 introduces page flipping. Instead of one giant page at `0x0600_0000`, -there's Page 0 at `0x0600_0000` and then Page 1 at `0x0600_A000`. The resolution -for each page is the same as above, but instead of writing `u16` values, the -memory is treated as `u8` indexes into PALRAM. The PALRAM starts at -`0x0500_0000`, and there's enough space for 256 palette entries (each a `u16`). - -To set the color of a palette entry we just do a normal `u16` write_volatile. - -```rust -(0x0500_0000 as *mut u16).offset(target_index).write_volatile(new_color) -``` - -To draw a pixel we set the palette entry that we want the pixel to use. However, -we must remember the "minimum size" write limitation that applies to VRAM. So, -if we want to change just a single pixel at a time we must - -1) Read the full `u16` it's a part of. -2) Clear the half of the `u16` we're going to replace -3) Write the half of the `u16` we're going to replace with the new value -4) Write that result back to the address. - -So, the math for finding a byte offset is the same as Mode 3 (since they're both -a 2d grid). If the byte offset is EVEN it'll be the high bits of the `u16` at -half the byte offset rounded down. If the offset is ODD it'll be the low bits of -the `u16` at half the byte. - -Does that make sense? - -* If we want to write pixel (0,0) the byte offset is 0, so we change the high - bits of `u16` offset 0. Then we want to write to (1,0), so the byte offset is - 1, so we change the low bits of `u16` offset 0. The pixels are next to each - other, and the target bytes are next to each other, good so far. -* If we want to write to (5,6) that'd be byte `5 + 6 * 240 = 1445`, so we'd - target the low bits of `u16` offset `floor(1445/2) = 722`. - -As you can see, trying to write individual pixels in Mode 4 is mostly a bad -time. Fret not! We don't _have_ to write individual bytes. If our data is -arranged correctly ahead of time we can just write `u16` or `u32` values -directly. The video hardware doesn't care, it'll get along just fine. - -## Mode 5 - -Mode 5 is also a two page mode, but instead of compressing the size of a pixel's -data to fit in two pages, we compress the resolution. - -Mode 5 is full `u16` color, but only 160w x 128h per page. - -## In Conclusion... - -So what got written into VRAM in `hello1`? - -```rust - (0x06000000 as *mut u16).offset(120 + 80 * 240).write_volatile(0x001F); - (0x06000000 as *mut u16).offset(136 + 80 * 240).write_volatile(0x03E0); - (0x06000000 as *mut u16).offset(120 + 96 * 240).write_volatile(0x7C00); -``` - -So at pixels `(120,80)`, `(136,80)`, and `(120,96)` we write three values. Once -again we probably need to [convert them](https://www.wolframalpha.com/) into -binary to make sense of it. - -* 0x001F: 0b0_00000_00000_11111 -* 0x03E0: 0b0_00000_11111_00000 -* 0x7C00: 0b0_11111_00000_00000 - -Ah, of course, a red pixel, a green pixel, and a blue pixel. diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md deleted file mode 100644 index 327fe00..0000000 --- a/book/src/SUMMARY.md +++ /dev/null @@ -1,9 +0,0 @@ - -# Rust GBA Guide - -* [Development Setup](development-setup.md) -* [Volatile](volatile.md) -* [The Hardware Memory Map](the-hardware-memory-map.md) -* [IO Registers](io-registers.md) -* [Bitmap Video](bitmap-video.md) -* [GBA Assembly](gba-asm.md) diff --git a/book/src/bitmap-video.md b/book/src/bitmap-video.md deleted file mode 100644 index 0b10fa3..0000000 --- a/book/src/bitmap-video.md +++ /dev/null @@ -1,214 +0,0 @@ -# Bitmap Video - -Our first video modes to talk about are the bitmap video modes. - -It's not because they're the best and fastest, it's because they're the -_simplest_. You can get going and practice with them really quickly. Usually -after that you end up wanting to move on to the other video modes because they -have better hardware support, so you can draw more complex things with the small -number of cycles that the GBA allows. - -## The Three Bitmap Modes - -As I said in the Hardware Memory Map section, the Video RAM lives in the address -space at `0x600_0000`. Depending on our video mode the display controller will -consider this memory to be in one of a few totally different formats. - -### Mode 3 - -The screen is 160 rows, each 240 pixels long, of `u16` color values. - -This is "full" resolution, and "full" color. It adds up to 76,800 bytes. VRAM is -only 96,304 bytes total though. There's enough space left over after the bitmap -for some object tile data if you want to use objects, but basically Mode3 is -using all of VRAM as one huge canvas. - -### Mode 4 - -The screen is 160 rows, each 240 pixels long, of `u8` palette values. - -This has half as much space per pixel. What's a palette value? That's an index -into the background PALRAM which says what the color of that pixel should be. We -still have the full color space available, but we can only use 256 colors at the -same time. - -What did we get in exchange for this? Well, now there's a second "page". The -second page starts `0xA000` bytes into VRAM (in both Mode 4 and Mode 5). It's an -entire second set of pixel data. You determine if Page 0 or Page 1 is shown -using bit 4 of DISPCNT. When you swap which page is being displayed it's called -page flipping or flipping the page, or something like that. - -Having two pages is cool, but Mode 4 has a big drawback: it's part of VRAM so -that "can't write 1 byte at a time" rule applies. This means that to set a -single byte we need to read a `u16`, adjust just one side of it, and then write -that `u16` back. We can hide the complication behind a method call, but it -simply takes longer to do all that, so editing pixels ends up being -unfortunately slow compared to the other bitmap modes. - -### Mode 5 - -The screen is 128 rows, each 160 pixels long, of `u16` color values. - -Mode 5 has two pages like Mode 4 does, but instead of keeping full resolution we -keep full color. The pixels are displayed in the top left and it's just black on -the right and bottom edges. You can use the background control registers to -shift it around, maybe center it, but there's no way to get around the fact that -not having full resolution is kinda awkward. - -## Using Mode 3 - -Let's have a look at how this comes together. We'll call this one -`hello_world.rs`, since it's our first real program. - -### Module Attributes and Imports - -At the top of our file we're still `no_std` and we're still using -`feature(start)`, but now we're using the `gba` crate so we're 100% safe code! -Often enough we'll need a little `unsafe`, but for just bitmap drawing we don't -need it. - -```rust -#![no_std] -#![feature(start)] -#![forbid(unsafe_code)] - -use gba::{ - fatal, - io::{ - display::{DisplayControlSetting, DisplayMode, DISPCNT, VBLANK_SCANLINE, VCOUNT}, - keypad::read_key_input, - }, - vram::bitmap::Mode3, - Color, -}; -``` - -### Panic Handler - -Before we had a panic handler that just looped forever. Now that we're using the -`gba` crate we can rely on the debug output channel from `mGBA` to get a message -into the real world. There's macros setup for each message severity, and they -all accept a format string and arguments, like how `println` works. The catch is -that a given message is capped at a length of 255 bytes, and it should probably -be ASCII only. - -In the case of the `fatal` message level, it also halts the emulator. - -Of course, if the program is run on real hardware then the `fatal` message won't -stop the program, so we still need the infinite loop there too. - -(not that this program _can_ panic, but `rustc` doesn't know that so it demands -we have a `panic_handler`) - -```rust -#[panic_handler] -fn panic(info: &core::panic::PanicInfo) -> ! { - // This kills the emulation with a message if we're running within mGBA. - fatal!("{}", info); - // If we're _not_ running within mGBA then we still need to not return, so - // loop forever doing nothing. - loop {} -} -``` - -### Waiting Around - -Like I talked about before, sometimes we need to wait around a bit for the right -moment to start doing work. However, we don't know how to do the good version of -waiting for VBlank and VDraw to start, so we'll use the really bad version of it -for now. - -```rust -/// Performs a busy loop until VBlank starts. -/// -/// This is very inefficient, and please keep following the lessons until we -/// cover how interrupts work! -pub fn spin_until_vblank() { - while VCOUNT.read() < VBLANK_SCANLINE {} -} - -/// Performs a busy loop until VDraw starts. -/// -/// This is very inefficient, and please keep following the lessons until we -/// cover how interrupts work! -pub fn spin_until_vdraw() { - while VCOUNT.read() >= VBLANK_SCANLINE {} -} -``` - -### Setup in `main` - -In main we set the display control value we want and declare a few variables -we're going to use in our primary loop. - -```rust -#[start] -fn main(_argc: isize, _argv: *const *const u8) -> isize { - const SETTING: DisplayControlSetting = - DisplayControlSetting::new().with_mode(DisplayMode::Mode3).with_bg2(true); - DISPCNT.write(SETTING); - - let mut px = Mode3::WIDTH / 2; - let mut py = Mode3::HEIGHT / 2; - let mut color = Color::from_rgb(31, 0, 0); -``` - -### Stuff During VDraw - -When a frame starts we want to read the keys, then adjust as much of the game -state as we can without touching VRAM. - -Once we're ready, we do our spin loop until VBlank starts. - -In this case, we're going to adjust `px` and `py` depending on the arrow pad -input, and also we'll cycle around the color depending on L and R being pressed. - -```rust - loop { - // read our keys for this frame - let this_frame_keys = read_key_input(); - - // adjust game state and wait for vblank - px = px.wrapping_add(2 * this_frame_keys.x_tribool() as usize); - py = py.wrapping_add(2 * this_frame_keys.y_tribool() as usize); - if this_frame_keys.l() { - color = Color(color.0.rotate_left(5)); - } - if this_frame_keys.r() { - color = Color(color.0.rotate_right(5)); - } - - // now we wait - spin_until_vblank(); -``` - -### Stuff During VBlank - -When VBlank starts we want want to update video memory to display the new -frame's situation. - -In our case, we're going to paint a little square of the current color, but also -if you go off the map it resets the screen. - -At the end, we spin until VDraw starts so we can do the whole thing again. - -```rust - // draw the new game and wait until the next frame starts. - if px >= Mode3::WIDTH || py >= Mode3::HEIGHT { - // out of bounds, reset the screen and position. - Mode3::dma_clear_to(Color::from_rgb(0, 0, 0)); - px = Mode3::WIDTH / 2; - py = Mode3::HEIGHT / 2; - } else { - // draw the new part of the line - Mode3::write(px, py, color); - Mode3::write(px, py + 1, color); - Mode3::write(px + 1, py, color); - Mode3::write(px + 1, py + 1, color); - } - - // now we wait again - spin_until_vdraw(); - } -} -``` diff --git a/book/src/development-setup.md b/book/src/development-setup.md deleted file mode 100644 index d6d3231..0000000 --- a/book/src/development-setup.md +++ /dev/null @@ -1,189 +0,0 @@ -# Development Setup - -Before you can build a GBA game you'll have to follow some special steps to -setup the development environment. - -Once again, extra special thanks to **Ketsuban**, who first dove into how to -make this all work with rust and then shared it with the world. - -## Per System Setup - -Obviously you need your computer to have a [working rust -installation](https://rustup.rs/). However, you'll also need to ensure that -you're using a nightly toolchain (we will need it for inline assembly, among -other potential useful features). You can run `rustup default nightly` to set -nightly as the system wide default toolchain, or you can use a [toolchain -file](https://github.com/rust-lang-nursery/rustup.rs#the-toolchain-file) to use -nightly just on a specific project, but either way we'll be assuming the use of -nightly from now on. You'll also need the `rust-src` component so that -`cargo-xbuild` will be able to compile the core crate for us in a bit, so run -`rustup component add rust-src`. - -Next, you need [devkitpro](https://devkitpro.org/wiki/Getting_Started). They've -got a graphical installer for Windows that runs nicely, and I guess `pacman` -support on Linux (I'm on Windows so I haven't tried the Linux install myself). -We'll be using a few of their general binutils for the `arm-none-eabi` target, -and we'll also be using some of their tools that are specific to GBA -development, so _even if_ you already have the right binutils for whatever -reason, you'll still want devkitpro for the `gbafix` utility. - -* On Windows you'll want something like `C:\devkitpro\devkitARM\bin` and - `C:\devkitpro\tools\bin` to be [added to your - PATH](https://stackoverflow.com/q/44272416/455232), depending on where you - installed it to and such. -* On Linux you can use pacman to get it, and the default install puts the stuff - in `/opt/devkitpro/devkitARM/bin` and `/opt/devkitpro/tools/bin`. If you need - help you can look in our repository's - [.travis.yml](https://github.com/rust-console/gba/blob/master/.travis.yml) - file to see exactly what our CI does. - -Finally, you'll need `cargo-xbuild`. Just run `cargo install cargo-xbuild` and -cargo will figure it all out for you. - -## Per Project Setup - -Once the system wide tools are ready, you'll need some particular files each -time you want to start a new project. You can find them in the root of the -[rust-console/gba repo](https://github.com/rust-console/gba). - -* `thumbv4-none-agb.json` describes the overall GBA to cargo-xbuild (and LLVM) - so it knows what to do. Technically the GBA is `thumbv4-none-eabi`, but we - change the `eabi` to `agb` so that we can distinguish it from other `eabi` - devices when using `cfg` flags. -* `crt0.s` describes some ASM startup stuff. If you have more ASM to place here - later on this is where you can put it. You also need to build it into a - `crt0.o` file before it can actually be used, but we'll cover that below. -* `linker.ld` tells the linker all the critical info about the layout - expectations that the GBA has about our program, and that it should also - include the `crt0.o` file with our compiled rust code. - -## Compiling - -Once all the tools are in place, there's particular steps that you need to -compile the project. For these to work you'll need some source code to compile. -Unlike with other things, an empty main file and/or an empty lib file will cause -a total build failure, because we'll need a -[no_std](https://rust-embedded.github.io/book/intro/no-std.html) build, and rust -defaults to builds that use the standard library. The next section has a minimal -example file you can use (along with explanation), but we'll describe the build -steps here. - -* `arm-none-eabi-as crt0.s -o target/crt0.o` - * This builds your text format `crt0.s` file into object format `crt0.o` - that's placed in the `target/` directory. Note that if the `target/` - directory doesn't exist yet it will fail, so you have to make the directory - if it's not there. You don't need to rebuild `crt0.s` every single time, - only when it changes, but you might as well throw a line to do it every time - into your build script so that you never forget because it's a practically - instant operation anyway. - -* `cargo xbuild --target thumbv4-none-agb.json` - * This builds your Rust source. It accepts _most of_ the normal options, such - as `--release`, and options, such as `--bin foo` or `--examples`, that you'd - expect `cargo` to accept. - * You **can not** build and run tests this way, because they require `std`, - which the GBA doesn't have. If you want you can still run some of your - project's tests with `cargo test --lib` or similar, but that builds for your - local machine, so anything specific to the GBA (such as reading and writing - registers) won't be testable that way. If you want to isolate and try out - some piece code running on the GBA you'll unfortunately have to make a demo - for it in your `examples/` directory and then run the demo in an emulator - and see if it does what you expect. - * The file extension is important! It will work if you forget it, but `cargo - xbuild` takes the inclusion of the extension as a flag to also compile - dependencies with the same sysroot, so you can include other crates in your - build. Well, crates that work in the GBA's limited environment, but you get - the idea. - -At this point you have an ELF binary that some emulators can execute directly -(more on that later). However, if you want a "real" ROM that works in all -emulators and that you could transfer to a flash cart to play on real hardware -there's a little more to do. - -* `arm-none-eabi-objcopy -O binary target/thumbv4-none-agb/MODE/BIN_NAME target/ROM_NAME.gba` - * This will perform an [objcopy](https://linux.die.net/man/1/objcopy) on our - program. Here I've named the program `arm-none-eabi-objcopy`, which is what - devkitpro calls their version of `objcopy` that's specific to the GBA in the - Windows install. If the program isn't found under that name, have a look in - your installation directory to see if it's under a slightly different name - or something. - * As you can see from reading the man page, the `-O binary` option takes our - lovely ELF file with symbols and all that and strips it down to basically a - bare memory dump of the program. - * The next argument is the input file. You might not be familiar with how - `cargo` arranges stuff in the `target/` directory, and between RLS and - `cargo doc` and stuff it gets kinda crowded, so it goes like this: - * Since our program was built for a non-local target, first we've got a - directory named for that target, `thumbv4-none-agb/` - * Next, the "MODE" is either `debug/` or `release/`, depending on if we had - the `--release` flag included. You'll probably only be packing release - mode programs all the way into GBA roms, but it works with either mode. - * Finally, the name of the program. If your program is something out of the - project's `src/bin/` then it'll be that file's name, or whatever name you - configured for the bin in the `Cargo.toml` file. If your program is - something out of the project's `examples/` directory there will be a - similar `examples/` sub-directory first, and then the example's name. - * The final argument is the output of the `objcopy`, which I suggest putting - at just the top level of the `target/` directory. Really it could go - anywhere, but if you're using git then it's likely that your `.gitignore` - file is already setup to exclude everything in `target/`, so this makes sure - that your intermediate game builds don't get checked into your git. - -* `gbafix target/ROM_NAME.gba` - * The `gbafix` tool also comes from devkitpro. The GBA is very picky about a - ROM's format, and `gbafix` patches the ROM's header and such so that it'll - work right. Unlike `objcopy`, this tool is custom built for GBA development, - so it works just perfectly without any arguments beyond the file name. The - ROM is patched in place, so we don't even need to specify a new destination. - -And you're _finally_ done! - -Of course, you probably want to make a script for all that, but it's up to you. -On our own project we have it mostly set up within a `Makefile.toml` which runs -using the [cargo-make](https://github.com/sagiegurari/cargo-make) plugin. - -## Checking Your Setup - -As I said, you need some source code to compile just to check that your -compilation pipeline is working. Here's a sample file that just puts three dots -on the screen without depending on any crates or anything at all. - -`hello_magic.rs`: - -```rust -#![no_std] -#![feature(start)] - -#[panic_handler] -fn panic(_info: &core::panic::PanicInfo) -> ! { - loop {} -} - -#[start] -fn main(_argc: isize, _argv: *const *const u8) -> isize { - unsafe { - (0x400_0000 as *mut u16).write_volatile(0x0403); - (0x600_0000 as *mut u16).offset(120 + 80 * 240).write_volatile(0x001F); - (0x600_0000 as *mut u16).offset(136 + 80 * 240).write_volatile(0x03E0); - (0x600_0000 as *mut u16).offset(120 + 96 * 240).write_volatile(0x7C00); - loop {} - } -} - -#[no_mangle] -static __IRQ_HANDLER: extern "C" fn() = irq_handler; - -extern "C" fn irq_handler() {} -``` - -Throw that into your project skeleton, build the program, and give it a run in -an emulator. I suggest [mgba](https://mgba.io/2019/01/26/mgba-0.7.0/), it has -some developer tools we'll use later on. You should see a red, green, and blue -dot close-ish to the middle of the screen. If you don't, something _already_ -went wrong. Double check things, phone a friend, write your senators, try asking -`Lokathor` or `Ketsuban` on the [Rust Community -Discord](https://discordapp.com/invite/aVESxV8), until you're eventually able to -get your three dots going. - -Of course, I'm sure you want to know why those particular numbers are the -numbers to use. Well that's what the whole rest of the book is about! diff --git a/book/src/gba-asm.md b/book/src/gba-asm.md deleted file mode 100644 index 082aa4e..0000000 --- a/book/src/gba-asm.md +++ /dev/null @@ -1,123 +0,0 @@ -# GBA Assembly - -On the GBA sometimes you just end up using assembly. Not a whole lot, but -sometimes. Accordingly, you should know how assembly works on the GBA. - -* The [ARM Infocenter: - ARM7TDMI](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0210c/index.html) - is the basic authority for reference information. The GBA has a CPU with the - `ARMv4` ISA, the `ARMv4T` variant, and specifically the `ARM7TDMI` - microarchitecture. Someone at ARM decided that having both `ARM#` and `ARMv#` - was a good way to [version things](https://en.wikichip.org/wiki/arm/versions), - even when the numbers don't match. The rest of us have been sad ever since. - The link there will take you to the correct book specific to the GBA's - microarchitecture. There's a whole big pile of ARM books available within the - ARM Infocenter, so if you just google it or whatever make sure you end up - looking at the correct one. Note that there is also a [PDF - Version](http://infocenter.arm.com/help/topic/com.arm.doc.ddi0210c/DDI0210B.pdf) - of the documentation available, if you'd like that. - -* In addition to the `ARM7TDMI` book, which is specific to the GBA's CPU, you'll - need to find a copy of the ARM Architecture Reference Manual if you want - general ARM knowledge. The ARM Infocenter has the - [ARMv5](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0100i/index.html) - version of said manual hosted on their site. Unfortunately, they don't seem to - host the `ARMv4T` version of the manual any more. - -* The [GBATek: ARM CPU - Overview](https://problemkaputt.de/gbatek.htm#armcpuoverview) also has quite a - bit of info. Some of it is a duplication of what you'd find in the ARM - Infocenter reference manuals. Some of it is information that's specific to the - GBA's layout and how the CPU interacts with other parts (such as how its - timings and the display adapter's timings line up). Some of it is specific to - the ARM chips _within the DS and DSi_, so be careful to make sure that you - don't wander into the wrong section. GBATEK is always a bit of a jumbled mess, - and the explanations are often "sparse" (to put it nicely), so I'd advise that - you also look at the official ARM manuals. - -* The [Compiler Explorer](https://rust.godbolt.org/z/ndCnk3) can be used to - quickly look at assembly versions of your Rust code. That link there will load - up an essentially blank `no_std` file with `opt-level=3` set and targeting - `thumbv6m-none-eabi`. That's _not_ the same target as the GBA (it's two ISA - revisions later, `ARMv6` instead of `ARMv4`), but it's the closest CPU target - that is bundled with `rustc`, so it's the closest you can get with the - compiler explorer website. If you're very dedicated I suppose you could setup - a [local - instance](https://github.com/mattgodbolt/compiler-explorer#running-a-local-instance) - of compiler explorer and then add the extra target definition and so on, but - that's _probably_ overkill. - -## ARM and Thumb - -The "T" part in `ARMv4T` and `ARM7TDMI` means "Thumb". An ARM chip that supports -Thumb has two different instruction sets instead of just one. The chip can run -in ARM state with 32-bit instructions, or it can run in Thumb state with 16-bit -instructions. Note that the CPU _state_ (ARM or Thumb) is distinct from the -_mode_ (User, FIQ, IRQ, etc). Apparently these states are sometimes called -`a32` and `t32` in a more modern context, but I will stick with ARM and Thumb -because that's what the official ARM7TDMI manual and GBATEK both use. - -On the GBA, the memory bus that physically transfers data from the cartridge into -the device is a 16-bit memory bus. This means that if you need to transfer more -than 16 bits at a time you have to do more than one transfer. Since we'd like -our instructions to get to the CPU as fast as possible, we compile the majority -of our program with the Thumb instruction set. The ARM reference says that with -Thumb instructions on a 16-bit memory bus system you get about 160% performance -compared to using ARM instructions. That's absolutely something we want to take -advantage of. Also, your Thumb compiled code is about 65% of the same code -compiled with ARM. Since a game ROM can only be 32MB total, and we're trying to -fit in images and sound too, we want to get space savings where we can. - -You may wonder, why is the Thumb code 65% as large if the instructions -themselves are 50% as large, and why have ARM state at all if there's such a -benefit to be had with Thumb? Well, Thumb state doesn't support as many different -instructions as ARM state does. Some lines of source code that can compile to a -single ARM instruction might need to compile into more than one Thumb -instruction. Thumb still has most of the really good instructions available, so -it all averages out to about 65%. - -That said, some parts of a GBA program _must_ be written for ARM state. Also, -ARM state does allow that increased instruction flexibility. So we _need_ to use -ARM some of the time, and we might just _want_ to use ARM even when we don't -need to at other times. It is possible to switch states on the fly, there's -extremely minimal overhead, even less than doing some function calls. The only -problem is the 16-bit memory bus of the cartridge giving us a needless speed -penalty with our ARM code. The CPU _executes_ the ARM instructions at full -speed, but then it has to wait while more instructions get sent in. What do we -do? Well, code is ultimately just a different kind of data. We can copy parts of -our code off the cartridge ROM and place it into a part of the RAM that has a -32-bit memory bus. Then the CPU can execute the code from there, going at full -speed. Of course, there's only a very small amount of RAM compared to the size -of a cartridge, so we'll only do this with a few select functions. Exactly which -functions will probably depend on your game. - -There's two problems that we face as Rust programmers: - -1) Rust offers no way to specify individual functions as being ARM or Thumb. The - whole program is compiled for one state or the other. Obviously this is no - good, so it's on the [2019 embedded - wishlist](https://github.com/rust-embedded/wg/issues/256#issuecomment-439677804), - and perhaps a fix will come. - -2) Rust offers no way to get a pointer to a function as well as the length of - the compiled function, so we can't copy a function from the ROM to some other - location because we can't even express statements about the function's data. - I also put this [on the - wishlist](https://github.com/rust-embedded/wg/issues/256#issuecomment-450539836), - but honestly I have much less hope that this becomes a part of rust. - -What this ultimately means is that some parts of our program have to be written -in external assembly files and then added to the program with the linker. We -were already going to write some assembly, and we already use more than one file -in our project all the time, those parts aren't a big problem. The big problem -is that using custom linker scripts to get assembly code into our final program -isn't transitive between crates. - -What I mean is that once we have a file full of custom assembly that we're -linking in by hand, that's not "part of" the crate any more. At least not as -`cargo` sees it. So we can't just upload it to `crates.io` and then depend on it -in other projects and have `cargo` download the right version and and include it -all automatically. We're back to fully manually copying files from the old -project into the new one, adding more lines to the linker script each time we -split up a new assembly file, all that stuff. Like the stone age. Sometimes ya -gotta suffer for your art. diff --git a/book/src/io-registers.md b/book/src/io-registers.md deleted file mode 100644 index de3aa7c..0000000 --- a/book/src/io-registers.md +++ /dev/null @@ -1,237 +0,0 @@ -# IO Registers - -As I said before, the IO registers are how you tell the GBA to do all the things -you want it to do. If you want a hint at what's available, they're all listed -out in the [GBA I/O Map](https://problemkaputt.de/gbatek.htm#gbaiomap) section -of GBATEK. Go have a quick look. - -Each individual IO register has a particular address just like we talked about -in the Hardware Memory Map section. They also have a size (listed in bytes), and -a note on if they're read only, write only, or read-write. Finally, each -register has a name and a one line summary. Unfortunately for us, the names are -all C style names with heavy shorthand. I'm not normally a fan of shorthand -names, but the `gba` crate uses the register names from GBATEK as much as -possible, since they're the most commonly used set of names among GBA -programmers. That way, if you're reading other guides and they say to set the -`BG2CNT` register, then you know exactly what register to look for within the -`gba` docs. - -## Register Bits - -There's only about 100 registers, but there's a lot more than 100 details we -want to have control over on the GBA. How does that work? Well, let's use a -particular register to talk about it. The first one on the list is `DISPCNT`, -the "Display Control" register. It's one of the most important IO registers, so -this is a "two birds with one stone" situation. - -Naturally there's a whole lot of things involved in the LCD that we want to -control, and it's all "one" value, but that value is actually many "fields" -packed into one value. When learning about an IO register, you have to look at -its bit pattern breakdown. For `DISPCNT` the GBATEK entry looks like this: - -```txt -4000000h - DISPCNT - LCD Control (Read/Write) - Bit Expl. - 0-2 BG Mode (0-5=Video Mode 0-5, 6-7=Prohibited) - 3 Reserved / CGB Mode (0=GBA, 1=CGB; can be set only by BIOS opcodes) - 4 Display Frame Select (0-1=Frame 0-1) (for BG Modes 4,5 only) - 5 H-Blank Interval Free (1=Allow access to OAM during H-Blank) - 6 OBJ Character VRAM Mapping (0=Two dimensional, 1=One dimensional) - 7 Forced Blank (1=Allow FAST access to VRAM,Palette,OAM) - 8 Screen Display BG0 (0=Off, 1=On) - 9 Screen Display BG1 (0=Off, 1=On) - 10 Screen Display BG2 (0=Off, 1=On) - 11 Screen Display BG3 (0=Off, 1=On) - 12 Screen Display OBJ (0=Off, 1=On) - 13 Window 0 Display Flag (0=Off, 1=On) - 14 Window 1 Display Flag (0=Off, 1=On) - 15 OBJ Window Display Flag (0=Off, 1=On) -``` - -So what we're supposed to understand here is that we've got a `u16`, and then we -set the individual bits for the things that we want. In the `hello_magic` -example you might recall that we set this register to the value `0x0403`. That -was a bit of a trick on my part because hex numbers usually look far more -mysterious than decimal or binary numbers. If we converted it to binary it'd -look like this: - -```rust -0b100_0000_0011 -``` - -And then you can just go down the list of settings to see what bits are what: - -* Bits 0-2 (BG Mode) are `0b011`, so that's Video Mode 3 -* Bit 10 (Display BG2) is enabled -* Everything else is disabled - -Naturally, trying to remember exactly what bit does what can be difficult. In -the `gba` crate we attempt as much as possible to make types that wrap over a -`u16` or `u32` and then have getters and setters _as if_ all the inner bits were -different fields. - -* If it's a single bit then the getter/setter will use `bool`. -* If it's more than one bit and each pattern has some non-numeric meaning then - it'll use an `enum`. -* If it's more than one bit and numeric in nature then it'll just use the - wrapped integer type. Note that you generally won't get the full range of the - inner number type, and any excess gets truncated down to fit in the bits - available. - -All the getters and setters are defined as `const` functions, so you can make -constant declarations for the exact setting combinations that you want. - -## Some Important IO Registers - -It's not easy to automatically see what registers will be important for getting -started and what registers can be saved to learn about later. - -We'll go over three IO registers here that will help us the most to get started, -then next lesson we'll cover how that Video Mode 3 bitmap drawing works, and -then by the end of the next lesson we'll be able to put it all together into -something interactive. - -### DISPCNT: Display Control - -The [DISPCNT](https://problemkaputt.de/gbatek.htm#lcdiodisplaycontrol) register -lets us affect the major details of our video output. There's a lot of other -registers involved too, but it all starts here. - -```rust -pub const DISPCNT: VolAddress = unsafe { VolAddress::new(0x400_0000) }; -``` - -As you can see, the display control register is, like most registers, -complicated enough that we make it a dedicated type with getters and setters for -the "phantom" fields. In this case it's mostly a bunch of `bool` values we can -set, and also the video mode is an `enum`. - -We already looked at the bit listing above, let's go over what's important right -now and skip the other bits: - -* BG Mode sets how the whole screen is going to work and even how the display - adapter is going to interpret the bit layout of video memory for pixel - processing. We'll start with Mode 3, which is the simplest to learn. -* The "Forced Blank" bit is one of the very few bits that starts _on_ at the - start of the main program. When it's enabled it prevents the display adapter - from displaying anything at all. You use this bit when you need to do a very - long change to video memory and you don't want the user to see the - intermediate states being partly drawn. -* The "Screen Display" bits let us enable different display layers. We care - about BG2 right now because the bitmap modes (3, 4, and 5) are all treated as - if they were drawing into BG2 (even though it's the only BG layer available in - those modes). - -There's a bunch of other stuff, but we'll get to those things later. They're not -relevent right now, and there's enough to learn already. Already we can see that -when the `hello_magic` demo says - -```rust - (0x400_0000 as *mut u16).write_volatile(0x0403); -``` - -We could re-write that more sensibly like this - -```rust - const SETTING: DisplayControlSetting = - DisplayControlSetting::new().with_mode(DisplayMode::Mode3).with_bg2(true); - DISPCNT.write(SETTING); -``` - -### VCOUNT: Vertical Display Counter - -The [VCOUNT](https://problemkaputt.de/gbatek.htm#lcdiointerruptsandstatus) -register lets us find out what row of pixels (called a **scanline**) is -currently being processed. - -```rust -pub const VCOUNT: ROVolAddress = unsafe { ROVolAddress::new(0x400_0006) }; -``` - -You see, the display adapter is constantly running its own loop, along side the -CPU. It starts at the very first pixel of the very first scanline, takes 4 -cycles to determine what color that pixel is, and then processes the next -pixel. Each scanline is 240 pixels long, followed by 68 "virtual" pixels so that -you have just a moment to setup for the next scanline to be drawn if you need -it. 272 cycles (68*4) is not a lot of time, but it's enough that you could -change some palette colors or move some objects around if you need to. - -* Horizontal pixel value `0..240`: "HDraw" -* Horizontal pixel value `240..308`: "HBlank" - -There's no way to check the current horizontal counter, but there is a way to -have the CPU interrupt the normal code when the HBlank period starts, which -we'll learn about later. - -Once a complete scanline has been processed (including the blank period), the -display adapter keeps going with the next scanline. Similar to how the -horizontal processing works, there's 160 scanlines in the real display, and then -it's followed by 68 "virtual" scanlines to give you time for adjusting video -memory between the frames of the game. - -* Vertical Count `0..160`: "VDraw" -* Vertical Count `160..228`: "VBlank" - -Once every scanline has been processed (including the vblank period), the -display adapter starts the whole loop over again with scanline 0. A total of -280,896 cycles per display loop (4 * 308 * 228), and about 59.59ns per CPU -cycle, gives us a full speed display rate of 59.73fps. That's close enough to -60fps that I think we can just round up a bit whenever we're not counting it -down to the exact cycle timings. - -However, there's a bit of a snag. If we change video memory during the middle of -a scanline the display will _immediately_ start processing using the new state -of video memory. The picture before the change and after the change won't look -like a single, clean picture. Instead you'll get what's called "[screen -tearing](https://en.wikipedia.org/wiki/Screen_tearing)", which is usually -considered to be the mark of a badly programmed game. - -To avoid this we just need to only adjust video memory during one of the blank -periods. If you're really cool you can adjust things during HBlank, but we're -not that cool yet. Starting out our general program flow will be: - -1) Gather input for the frame (next part of this lesson) and update the game - state, getting everything ready for when VBlank actually starts. -2) Once VBlank starts we update all of the video memory as fast as we can. -3) Once we're done drawing we again wait for the VDraw period to begin and then - do it all again. - -Now, it's not the most efficient way, but to get our timings right we can just -read from `VCOUNT` over and over in a "busy loop". Once we read a value of 160 -we know that we've entered VBlank. Once it goes back to 0 we know that we're -back in VDraw. - -Doing a busy loop like this actually drains the batteries way more than -necessary. It keeps the CPU active constantly, which is what uses a fair amount -of the power. Normally you're supposed to put the CPU to sleep if you're just -waiting around for something to happen. However, that also requires learning -about some more concepts to get right. So to keep things easier starting out -we'll do the bad/lazy version and then upgrade our technique later. - -### KEYINPUT: Key Input Reading - -The [KEYINPUT](https://problemkaputt.de/gbatek.htm#gbakeypadinput) register is -the last one we've got to learn about this lesson. It lets you check the status -of all 10 buttons on the GBA. - -```rust -pub const KEYINPUT: ROVolAddress = unsafe { ROVolAddress::new(0x400_0130) }; -``` - -There's little to say here. It's a read only register, and the data just -contains one bit per button. The only thing that's a little weird about it is -that the bits follow a "low active" convention, so if the button is pressed then -the bit is 0, and if the button is released the bit is 1. - -You _could_ work with that directly, but I think it's a lot easier to think -about having `true` for pressed and `false` for not pressed. So the `gba` crate -flips the bits when you read the keys: - -```rust -/// Gets the current state of the keys -pub fn read_key_input() -> KeyInput { - KeyInput(KEYINPUT.read() ^ 0b0000_0011_1111_1111) -} -``` - -Now we can treat the KeyInput values like a totally normal bitset. diff --git a/book/src/the-hardware-memory-map.md b/book/src/the-hardware-memory-map.md deleted file mode 100644 index 5ef79a5..0000000 --- a/book/src/the-hardware-memory-map.md +++ /dev/null @@ -1,379 +0,0 @@ -# The Hardware Memory Map - -So we saw `hello_magic.rs` and then we learned what `volatile` was all about, -but we've still got a few things that are a bit mysterious. You can't just cast -a number into a pointer and start writing to it! That's totally crazy! That's -writing to un-allocated memory! Against the rules! - -Well, _kinda_. It's true that you're not allowed to write _anywhere at all_, but -those locations were carefully selected locations. - -You see, on a modern computer if you need to check if a key is pressed you ask -the Operating System (OS) to please go check for you. If you need to play a -sound, you ask the OS to please play the sound on a default sound output. If you -need to show a picture you ask the OS to give you access to the video driver so -that you can ask the video driver to please put some pixels on the screen. -That's mostly fine, except how does the OS actually do it? It doesn't have an OS -to go ask, it has to stop somewhere. - -Ultimately, every piece of hardware is mapped into somewhere in the address -space of the CPU. You can't actually tell that this is the case as a normal user -because your program runs inside a virtualized address space. That way you can't -go writing into another program's memory and crash what they're doing or steal -their data (well, hopefully, it's obviously not perfect). Outside of the -virtualization layer the OS is running directly in the "true" address space, and -it can access the hardware on behalf of a program whenever it's asked to. - -How does directly accessing the hardware work, _precisely_? It's just the same -as accessing the RAM. Each address holds some bits, and the CPU picks an address -and loads in the bits. Then the program gets the bits and has to decide what -they mean. The "driver" of a hardware device is just the layer that translates -between raw bits in the outside world and more meaningful values inside of the -program. - -Of course, memory mapped hardware can change its bits at any time. The user can -press and release a key and you can't stop them. This is where `volatile` comes -in. Whenever there's memory mapped hardware you want to access it with -`volatile` operations so that you can be sure that you're sending the data every -time, and that you're getting fresh data every time. - -## GBA Specifics - -That's enough about the general concept of memory mapped hardware, let's get to -some GBA specifics. The GBA has the following sections in its memory map. - -* BIOS -* External Work RAM (EWRAM) -* Internal Work RAM (IWRAM) -* IO Registers -* Palette RAM (PALRAM) -* Video RAM (VRAM) -* Object Attribute Memory (OAM) -* Game Pak ROM (ROM) -* Save RAM (SRAM) - -Each of these has a few key points of interest: - -* **Bus Width:** Also just called "bus", this is how many little wires are - _physically_ connecting a part of the address space to the CPU. If you need to - transfer more data than fits in the bus you have to do repeated transfers - until it all gets through. -* **Read/Write Modes:** Most parts of the address space can be read from in 8, - 16, or 32 bits at a time (there's a few exceptions we'll see). However, a - significant portion of the address space can't accept 8 bit writes. Usually - this isn't a big deal, but standard `memcopy` routine switches to doing a - byte-by-byte copy in some situations, so we'll have to be careful about using - it in combination with those regions of the memory. -* **Access Speed:** On top of the bus width issue, not all memory can be - accessed at the same speed. The "fast" parts of memory can do a read or write - in 1 cycle, but the slower parts of memory can take a few cycles per access. - These are called "wait cycles". The exact timings depend on what you configure - the system to use, which is also limited by what your cartridge physically - supports. You'll often see timings broken down into `N` cycles (non-sequential - memory access) and `S` cycles (sequential memory access, often faster). There - are also `I` cycles (internal cycles) which happen whenever the CPU does an - internal operation that's more than one cycle to complete (like a multiply). - Don't worry, you don't have to count exact cycle timings unless you're on the - razor's edge of the GBA's abilities. For more normal games you just have to be - mindful of what you're doing and it'll be fine. - -Let's briefly go over the major talking points of each memory region. All of -this information is also available in GBATEK, mostly in their [memory -map](http://www.akkit.org/info/gbatek.htm#gbamemorymap) section (though somewhat -spread through the rest of the document too). - -Though I'm going to list the location range of each memory space below, most of -the hardware locations are actually mirrored at several points throughout the -address space. - -### BIOS - -* **Location:** `0x0` to `0x3FFF` -* **Bus:** 32-bit -* **Access:** Memory protected read-only (see text). -* **Wait Cycles:** None - -The "basic input output system". This contains a grab bag of utilities that do -various tasks. The code is optimized for small size rather than great speed, so -you can sometimes write faster versions of these routines. Also, calling a bios -function has more overhead than a normal function call. You can think of bios -calls as being similar to system calls to the OS on a desktop computer. Useful, -but costly. - -As a side note, not only is BIOS memory read only, but it's memory protected so -that you can't even read from bios memory unless the system is currently -executing a function that's in bios memory. If you try then the system just -gives back a nonsensical value that's not really what you asked for. If you -really want to know what's inside, there's actually a bug in one bios call -(`MidiKey2Freq`) that lets you read the bios section one byte at a time. - -Also, there's not just one bios! Of course there's the official bios from -Nintendo that's used on actual hardware, but since that's code instead of -hardware it's protected by copyright. Since a bios is needed to run a GBA -emulator properly, people have come up with their own open source versions or -they simply make the emulator special case the bios and act _as if_ the function -call had done the right thing. - -* The [TempGBA](https://github.com/Nebuleon/TempGBA) repository has an easy to - look at version written in assembly. It's API and effects are close enough to - the Nintendo version that most games will run just fine. -* You can also check out the [mGBA - bios](https://github.com/mgba-emu/mgba/blob/master/src/gba/bios.c) if you want - to see the C version of what various bios functions are doing. - -### External Work RAM (EWRAM) - -* **Location:** `0x200_0000` to `0x203_FFFF` (256k) -* **Bus:** 16-bit -* **Access:** Read-write, any size. -* **Wait Cycles:** 2 - -The external work ram is a sizable amount of space, but the 2 wait cycles per -access and 16-bit bus mean that you should probably think of it as being a -"heap" to avoid putting things in if you don't have to. - -The GBA itself doesn't use this for anything, so any use is totally up to you. - -At the moment, the linker script and `crt0.s` files provided with the `gba` -crate also have no defined use for the EWRAM, so it's 100% on you to decide how -you wanna use them. - -(Note: There is an undocumented control register that lets you adjust the wait -cycles on EWRAM. Using it, you can turn EWRAM from the default 2 wait cycles -down to 1. However, not all GBA-like things support it. The GBA and GBA SP do, -the GBA Micro and DS do not. Emulators might or might not depending on the -particular emulator. See the [GBATEK system -control](https://problemkaputt.de/gbatek.htm#gbasystemcontrol) page for a full -description of that register, though probably only once you've read more of this -tutorial book and know how to make sense of IO registers and such.) - -### Internal Work RAM (IWRAM) - -* **Location:** `0x300_0000` to `0x300_7FFF` (32k) -* **Bus:** 32-bit -* **Access:** Read-write, any size. -* **Wait Cycles:** 0 - -This is where the "fast" memory for general purposes lives. By default the -system uses the 256 _bytes_ starting at `0x300_7F00` _and up_ for system and -interrupt purposes, while Rust's program stack starts at that same address _and -goes down_ from there. - -Even though your stack exists in this space, it's totally reasonable to use the -bottom parts of this memory space for whatever quick scratch purposes, same as -EWRAM. 32k is fairly huge, and the stack going down from the top and the scratch -data going up from the bottom are unlikely to hit each other. If they do you -were probably well on your way to a stack overflow anyway. - -The linker script and `crt0.s` file provided with the `gba` crate use the bottom -of IWRAM to store the `.data` and `.bss` [data -segments](https://en.wikipedia.org/wiki/Data_segment). That's where your global -variables get placed (both `static` and `static mut`). The `.data` segment holds -any variable that's initialized to non-zero, and the `.bss` section is for any -variable initialized to zero. When the GBA is powered on, some code in the -`crt0.s` file runs and copies the initial `.data` values into place within IWRAM -(all of `.bss` starts at 0, so there's no copy for those variables). - -If you have no global variables at all, then you don't need to worry about those -details, but if you do have some global variables then you can use the _address -of_ the `__bss_end` symbol defined in the top of the `gba` crate as a marker for -where it's safe for you to start using IWRAM without overwriting your globals. - -### IO Registers - -* **Location:** `0x400_0000` to `0x400_03FE` -* **Bus:** 32-bit -* **Access:** different for each IO register -* **Wait Cycles:** 0 - -The IO Registers are where most of the magic happens, and it's where most of the -variety happens too. Each IO register is a specific width, usually 16-bit but -sometimes 32-bit. Most of them are fully read/write, but some of them are read -only or write only. Some of them have individual bits that are read only even -when the rest of the register is writable. Some of them can be written to, but -the write doesn't change the value you read back, it sets something else. -Really. - -The IO registers are how you control every bit of hardware besides the CPU -itself. Reading the buttons, setting display modes, enabling timers, all of that -goes through different IO registers. Actually, even a few parts of the CPU's -operation can be controlled via IO register. - -We'll go over IO registers more in the next section, including a few specific -registers, and then we'll constantly encounter more IO registers as we explore -each new topic through the rest of the book. - -### Palette RAM (PALRAM) - -* **Location:** `0x500_0000` to `0x500_03FF` (1k) -* **Bus:** 16-bit -* **Access:** Read any, single bytes mirrored (see text). -* **Wait Cycles:** Video Memory Wait (see text) - -This is where the GBA stores color palette data. There's 256 slots for -Background color, and then 256 slots for Object color. - -GBA colors are 15 bits each, with five bits per channel and the highest bit -being totally ignored, so we store them as `u16` values: - -* `X_BBBBB_GGGGG_RRRRR` - -Of note is the fact that the 256 palette slots can be viewed in two different -ways. There's two different formats for images in video memory: "8 bit per -pixel" (8bpp) and "4 bit per pixel mode" (4bpp). - -* **8bpp:** Each pixel in the image is 8 bits and indexes directly into the full - 256 entry palette array. An index of 0 means that pixel should be transparent, - so there's 255 possible colors. -* **4bpp:** Each pixel in the image is 4 bits and indexes into a "palbank" of 16 - colors within the palette data. Some exterior control selects the palbank to - be used. An index of 0 still means that the pixel should be transparent, so - there's 15 possible colors. - -Different images can use different modes all at once, as long as you can fit all -the colors you want to use into your palette layout. - -PALRAM can't be written to in individual bytes. This isn't normally a problem at -all, because you wouldn't really want to write half of a color entry anyway. If -you do try to write a single byte then it gets "mirrored" into both halves of -the `u16` that would be associated with that address. For example, if you tried -to write `0x01u8` to either `0x500_0000` or `0x500_0001` then you'd actually -_effectively_ be writing `0x0101u16` to `0x500_0000`. - -PALRAM follows what we'll call the "Video Memory Wait" rule: If you to access -the memory during a vertical blank or horizontal blank period there's 0 wait -cycles, and if you try to access the memory while the display controller is -drawing there is a 1 cycle wait inserted _if_ the display controller was using -that memory at that moment. - -### Video RAM (VRAM) - -* **Location:** `0x600_0000` to `0x601_7FFF` (96k or 64k+32k depending on mode) -* **Bus:** 16-bit -* **Access:** Read any, single bytes _sometimes_ mirrored (see text). -* **Wait Cycles:** Video Memory Wait (see text) - -Video RAM is the memory for what you want the display controller to be -displaying. The GBA actually has 6 different display modes (numbered 0 through -5), and depending on the mode you're using the layout that you should imagine -VRAM having changes. Because there's so much involved here, I'll leave more -precise details to the following sections which talk about how to use VRAM in -each mode. - -VRAM can't be written to in individual bytes. If you try to write a single byte -to background VRAM the byte gets mirrored like with PALRAM, and if you try with -object VRAM the write gets ignored entirely. Exactly what address ranges those -memory types are depends on video mode, but just don't bother with individual -byte writes to VRAM. If you want to change a single byte of data (and you might) -then the correct style is to read the full `u16`, mask out the old data, mask in -your new value, and then write the whole `u16`. - -VRAM follows the same "Video Memory Wait" rule that PALRAM has. - -### Object Attribute Memory (OAM) - -* **Location:** `0x700_0000` to `0x700_03FF` (1k) -* **Bus:** 32-bit -* **Access:** Read any, single bytes no effect (see text). -* **Wait Cycles:** Video Memory Wait (see text) - -This part of memory controls the "Objects" (OBJ) on the screen. An object is -_similar to_ the concept of a "sprite". However, because of an object's size -limitations, a single sprite might require more than one object to be drawn -properly. In general, if you want to think in terms of sprites at all, you -should think of sprites as being a logical / programming concept, and objects as -being a hardware concept. - -While VRAM has the _image_ data for each object, this part of memory has the -_control_ data for each object. An objects "attributes" describe what part of -the VRAM to use, where to place is on the screen, any special graphical effects -to use, all that stuff. Each object has 6 bytes of attribute data (arranged as -three `u16` values), and there's a total of 128 objects (indexed 0 through 127). - -But 6 bytes each times 128 entries out of 1024 bytes leaves us with 256 bytes -left over. What's the other space used for? Well, it's a little weird, but after -every three `u16` object attribute fields there's one `i16` "affine parameter" -field mixed in. It takes four such fields to make a complete set of affine -parameters (a 2x2 matrix), so we get a total of 32 affine parameter entries -across all of OAM. "Affine" might sound fancy but it just means a transformation -where anything that started parallel stays parallel after the transform. The -affine parameters can be used to scale, rotate, and/or skew a background or -object as it's being displayed on the screen. It takes more computing power than -the non-affine display, so you can't display as many different things at once -when using the affine modes. - -OAM can't ever be written to with individual bytes. The write just has no effect -at all. - -OAM follows the same "Video Memory Wait" rule that PALRAM has, **and** you can -also only freely access OAM during a horizontal blank if you set a special -"HBlank Interval Free" bit in one of the IO registers (the "Display Control" -register, which we'll talk about next lesson). The reason that you might _not_ -want to set that bit is because when it's enabled you can't draw as many objects -at once. You don't lose the use of an exact number of objects, you actually lose -the use of a number of display adapter drawing cycles. Since not all objects -take the same number of cycles to render, it depends on what you're drawing. -GBATEK [has the details](https://problemkaputt.de/gbatek.htm#lcdobjoverview) if -you want to know precisely. - -### Game Pak ROM (ROM) - -* **Location:** Special (max of 32MB) -* **Bus:** 16-bit -* **Access:** Special -* **Wait Cycles:** Special - -This is where your actual game is located! As you might guess, since each -cartridge is different, the details here depend quite a bit on the cartridge -that you use for your game. Even a simple statement like "you can't write to the -ROM region" isn't true for some carts if they have FlashROM. - -The _most important_ thing to concern yourself with when considering the ROM -portion of memory is the 32MB limit. That's compiled code, images, sound, -everything put together. The total has to stay under 32MB. - -The next most important thing to consider is that 16-bit bus. It means that we -compile our programs using "Thumb state" code instead of "ARM state" code. -Details about this can be found in the GBA Assembly section of the book, but -just be aware that there's two different types of assembly on the GBA. You can -switch between them, but the default for us is always Thumb state. - -Another detail which you actually _don't_ have to think about much, but that you -might care if you're doing precise optimization, is that the ROM address space -is actually mirrored across three different locations: - -* `0x800_0000` to `0x9FF_FFFF`: Wait State 0 -* `0xA00_0000` to `0xBFF_FFFF`: Wait State 1 -* `0xC00_0000` to `0xDFF_FFFF`: Wait State 2 - -These _don't_ mean 0, 1, and 2 wait cycles, they mean the wait cycles associated -with ROM mirrors 0, 1, and 2. On some carts the game will store different parts -of the data into different chips that are wired to be accessible through -different parts of the mirroring. The actual wait cycles used are even -configurable via an IO register called the -[WAITCNT](https://problemkaputt.de/gbatek.htm#gbasystemcontrol) ("Wait Control", -I don't know why C programmers have to give everything the worst names it's not -1980 any more). - -### Save RAM (SRAM) - -* **Location:** Special (max of 64k) -* **Bus:** 8-bit -* **Access:** Special -* **Wait Cycles:** Special - -The Save RAM is also part of the cart that you've got your game on, so it also -depends on your hardware. - -SRAM _starts_ at `0xE00_0000` and you can save up to however much the hardware -supports, to a maximum of 64k. However, you can only read and write SRAM one -_byte_ at a time. What's worse, while you can _write_ to SRAM using code -executing anywhere, you can only _read_ with code that's executing out of either -Internal or External Work RAM, not from with code that's executing out of ROM. -This means that you need to copy the code for doing the read into some scratch -space (either at startup or on the fly, doesn't matter) and call that function -you've carefully placed. It's a bit annoying, but soon enough a routine for it -all will be provided in the `gba` crate and we won't have to worry too much -about it. - -(TODO: Provide the routine that I just claimed we would provide.) diff --git a/book/src/volatile.md b/book/src/volatile.md deleted file mode 100644 index 57c8257..0000000 --- a/book/src/volatile.md +++ /dev/null @@ -1,48 +0,0 @@ -# Volatile - -I know that you just got your first program running and you're probably excited -to learn more about GBA stuff, but first we have to cover a subject that's not -quite GBA specific. - -In the `hello_magic.rs` file we had these lines - -```rust - (0x600_0000 as *mut u16).offset(120 + 80 * 240).write_volatile(0x001F); - (0x600_0000 as *mut u16).offset(136 + 80 * 240).write_volatile(0x03E0); - (0x600_0000 as *mut u16).offset(120 + 96 * 240).write_volatile(0x7C00); -``` - -You've probably seen or heard of the -[write](https://doc.rust-lang.org/core/ptr/fn.write.html) function before, but -you'd be excused if you've never heard of its cousin function, -[write_volatile](https://doc.rust-lang.org/core/ptr/fn.write_volatile.html). - -What's the difference? Well, when the compiler sees normal reads and writes, it -assumes that those go into plain old memory locations. CPU registers, RAM, -wherever it is that the value's being placed. The compiler assumes that it's -safe to optimize away some of the reads and writes, or maybe issue the reads and -writes in a different order from what you wrote. Normally this is okay, and it's -exactly what we want the compiler to be doing, quietly making things faster for us. - -However, some of the time we access values from parts of memory where it's -important that each access happen, and in the exact order that we say. In our -`hello_magic.rs` example, we're writing directly into the video memory of the -display. The compiler sees that the rest of the Rust program never read out of -those locations, so it might think "oh, we can skip those writes, they're -pointless". It doesn't know that we're having a side effect besides just storing -some value at an address. - -By declaring a particular read or write to be `volatile` then we can force the -compiler to issue that access. Further, we're guaranteed that all `volatile` -access will happen in exactly the order it appears in the program relative to -other `volatile` access. However, non-volatile access can still be re-ordered -relative to a volatile access. In other words, for parts of the memory that are -volatile, we must _always_ use a volatile read or write for our program to -perform properly. - -For exactly this reason, we've got the [voladdress](https://docs.rs/voladdress/) -crate. It used to be part of the GBA crate, but it became big enough to break -out into a stand alone crate. It doesn't even do too much, it just makes it a -lot less error prone to accidentally forget to use volatile with our memory -mapped addresses. We just call `read` and `write` on any `VolAddress` that we -happen to see and the right thing will happen.