From 51d3915dea6e8eae1b09576c859597ce7342d246 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sat, 15 Dec 2018 20:35:57 -0700 Subject: [PATCH 01/33] static asserts --- book/src/01-quirks/04-newtype.md | 125 +++++++++++++----------- book/src/01-quirks/05-static_asserts.md | 114 +++++++++++++++++++++ book/src/SUMMARY.md | 1 + examples/bg_demo.rs | 2 +- examples/hello_world.rs | 72 ++++++++++---- examples/light_cycle.rs | 2 +- 6 files changed, 239 insertions(+), 77 deletions(-) create mode 100644 book/src/01-quirks/05-static_asserts.md diff --git a/book/src/01-quirks/04-newtype.md b/book/src/01-quirks/04-newtype.md index 07244c9..86b2916 100644 --- a/book/src/01-quirks/04-newtype.md +++ b/book/src/01-quirks/04-newtype.md @@ -40,19 +40,6 @@ with our newtypes. pub struct PixelColor(u16); ``` -Ah, and of course we'll need to make it so you can unwrap the value: - -```rust -#[repr(transparent)] -pub struct PixelColor(u16); - -impl From for u16 { - fn from(color: PixelColor) -> u16 { - color.0 - } -} -``` - And then we'll need to do that same thing for _every other newtype we want_. Except there's only two tiny parts that actually differ between newtype @@ -62,7 +49,12 @@ a job for a macro to me! ## Making It A Macro -The most basic version of the macro we want goes like this: +If you're going to do much with macros you should definitely read through [The +Little Book of Rust +Macros](https://danielkeep.github.io/tlborm/book/index.html), but we won't be +doing too much so you can just follow along here a bit if you like. + +The most basic version of a newtype macro starts like this: ```rust #[macro_export] @@ -74,8 +66,39 @@ macro_rules! newtype { } ``` -Except we also want to be able to add attributes (which includes doc comments), -so we upgrade our macro a bit: +The `#[macro_export]` makes it exported by the current module (like `pub` +kinda), and then we have one expansion option that takes an identifier, a `,`, +and then a second identifier. The new name is the outer type we'll be using, and +the old name is the inner type that's being wrapped. You'd use our new macro +something like this: + +```rust +newtype! {PixelColorCurly, u16} + +newtype!(PixelColorParens, u16); + +newtype![PixelColorBrackets, u16]; +``` + +Note that you can invoke the macro with the outermost grouping as any of `()`, +`[]`, or `{}`. It makes no particular difference to the macro. Also, that space +in the first version is kinda to show off that you can put white space in +between the macro name and the grouping if you want. The difference is mostly +style, but there are some rules and considerations here: + +* If you use curly braces then you _must not_ put a `;` after the invocation. +* If you use parentheses or brackets then you _must_ put the `;` at the end. +* Rustfmt cares which you use and formats accordingly: + * Curly brace macro use mostly gets treated like a code block. + * Parentheses macro use mostly gets treated like a function call. + * Bracket macro use mostly gets treated like an array declaration. + +## Upgrade That Macro! + +We also want to be able to add `derive` stuff and doc comments to our newtype. +Within the context of `macro_rules!` definitions these are called "meta". Since +we can have any number of them we wrap it all up in a "zero or more" matcher. +Then our macro looks like this: ```rust #[macro_export] @@ -88,52 +111,44 @@ macro_rules! newtype { } ``` -And we want to automatically add the ability to turn the wrapper type back into -the wrapped type. +So now we can write ```rust -#[macro_export] -macro_rules! newtype { - ($(#[$attr:meta])* $new_name:ident, $old_name:ident) => { - $(#[$attr])* - #[repr(transparent)] - pub struct $new_name($old_name); - - impl From<$new_name> for $old_name { - fn from(x: $new_name) -> $old_name { - x.0 - } - } - }; +newtype! { + /// Color on the GBA gives 5 bits for each channel, the highest bit is ignored. + #[derive(Debug, Clone, Copy)] + PixelColor, u16 } ``` -That seems like enough for all of our examples, so we'll stop there. We could -add more things: - -* Making the `From` impl being optional. We'd have to make the newtype - invocation be more complicated somehow, the user puts ", no-unwrap" after the - inner type declaration or something, or something like that. -* Allowing for more precise visibility controls on the wrapping type and on the - inner field. This would add a lot of line noise, so we'll just always have our - newtypes be `pub`. -* Allowing for generic newtypes, which might sound silly but that we'll actually - see an example of soon enough. To do this you might _think_ that we can change - the `:ident` declarations to `:ty`, but since we're declaring a fresh type not - using an existing type we have to accept it as an `:ident`. The way you get - around this is with a proc-macro, which is a lot more powerful but which also - requires that you write the proc-macro in an entirely other crate that gets - compiled first. We don't need that much power, so for our examples we'll go - with the macro_rules version and just do it by hand in the few cases where we - need a generic newtype. -* Allowing for `Deref` and `DerefMut`, which usually defeats the point of doing - the newtype, but maybe sometimes it's the right thing, so if you were going - for the full industrial strength version with a proc-macro and all you might - want to make that part of your optional add-ons as well the same way you might - want optional `From`. You'd probably want `From` to be "on by default" and - `Deref`/`DerefMut` to be "off by default", but whatever. +And that's about all we'll need for the examples. **As a reminder:** remember that `macro_rules` macros have to appear _before_ they're invoked in your source, so the `newtype` macro will always have to be at the very top of your file, or if you put it in a module within your project you'll need to declare the module before anything that uses it. + +## Potential Homework + +If you wanted to keep going and get really fancy with it, you could potentially +add a lot more: + +* Make a `pub const fn new() -> Self` method that outputs the base value in a + const way. Combine this with builder style "setter" methods that are also + const and you can get the compiler to do quite a bit of the value building + work at compile time. +* Making the macro optionally emit a `From` impl to unwrap it back into the base + type. +* Allow for visibility modifiers to be applied to the inner field and the newly + generated type. +* Allowing for generic newtypes. You already saw the need for this once in the + volatile section. Unfortunately, this particular part gets really tricky if + you're using `macro_rules!`, so you might need to move up to a full + `proc_macro`. Having a `proc_macro` isn't bad except that they have to be + defined in a crate of their own and they're compiled before use. You can't + ever use them in the crate that defines them, so we won't be using them in any + of our single file examples. +* Allowing for optional `Deref` and `DerefMut` of the inner value. This takes + away most all the safety aspect of doing the newtype, but there may be times + for it. As an example, you could make a newtype with a different form of + Display impl that you want to otherwise treat as the base type in all places. diff --git a/book/src/01-quirks/05-static_asserts.md b/book/src/01-quirks/05-static_asserts.md new file mode 100644 index 0000000..845ede1 --- /dev/null +++ b/book/src/01-quirks/05-static_asserts.md @@ -0,0 +1,114 @@ +# Static Asserts + +Have you ever wanted to assert things _even before runtime_? We all have, of +course. Particularly when the runtime machine is a poor little GBA, we'd like to +have the machine doing the compile handle as much checking as possible. + +Enter [static assertions](https://docs.rs/static_assertions/). + +This is an amazing crate that you should definitely use when you can. + +It's written by [nvzqz](https://github.com/nvzqz), and they kindly wrote up a +[blog +post](https://nikolaivazquez.com/posts/programming/rust-static-assertions/) that +explains the thinking behind it. + +However, I promised that each example would be single file, and I also promised +to explain what's going on as we go, so we'll briefly touch upon giving an +explanation here. + +## How We Const Assert + +Alright, as it stands (2018-12-15), we can't use `if` in a `const` context. + +Since we can't use `if`, we can't use a normal `assert!`. Some day it will be +possible, and a failed assert at compile time will be a compile error and a +failed assert at run time will be a panic and we'll have a nice unified +programming experience. We can add runtime-only assertions by being a little +tricky with the compiler. + +If we write + +```rust +const ASSERT: usize = 0 - 1; +``` + +that gives a warning, since the math would underflow. We can upgrade that +warning to a hard error: + +```rust +#[deny(const_err)] +const ASSERT: usize = 0 - 1; +``` + +And to make our construction reusable we can enable the `underscore_const_names` +feature in our program or library and give each such const an underscore for a +name. + +```rust +#![feature(underscore_const_names)] + +#[deny(const_err)] +const _: usize = 0 - 1; +``` + +Now we wrap this in a macro where we give an expression for a bool. We negate +the bool then cast it to a `usize`, meaning that `true` negates into `false`, +which becomes `0usize`, and then there's no underflow error. Or if the input was +`false`, it negates into `true`, then becomes `1usize`, and then the underflow +error fires. + +```rust +macro_rules! const_assert { + ($condition:expr) => { + #[deny(const_err)] + #[allow(dead_code)] + const ASSERT: usize = 0 - !$condition as usize; + } +} +``` + +This allows anything which supports `core::ops::Not` and can then can cast into +`usize`, which technically isn't just `bool` values, but close enough. + +## Asserting Something + +As an example of how we might use a `const_assert`, we'll do a demo with colors. +There's a red, blue, and green channel. We store colors in a `u16` with 5 bits +for each channel. + +```rust +newtype! { + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + Color, u16 +} +``` + +And when we're building a color, we're passing in `u16` values, but they could +be using more than just 5 bits of space. We want to make sure that each channel +is 31 or less, so we can make a color builder that does a `const_assert!` on the +value of each channel. + +```rust +macro_rules! rgb { + ($r:expr, $g:expr, $b:expr) => { + { + const_assert!($r <= 31); + const_assert!($g <= 31); + const_assert!($b <= 31); + Color($b << 10 | $g << 5 | $r) + } + } +} +``` + +And then we can declare some colors + +```rust +const RED: Color = rgb!(31, 0, 0); + +const BLUE: Color = rgb!(31, 500, 0); +``` + +The second one is clearly out of bounds and it fires an error just like we +wanted. diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index 6de4af7..08bf812 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -12,6 +12,7 @@ * [Fixed Only](01-quirks/02-fixed_only.md) * [Volatile Destination](01-quirks/03-volatile_destination.md) * [Newtype](01-quirks/04-newtype.md) + * [Static Asserts](01-quirks/05-static_asserts.md) * [Concepts](02-concepts/00-index.md) * [CPU](02-concepts/01-cpu.md) * [BIOS](02-concepts/02-bios.md) diff --git a/examples/bg_demo.rs b/examples/bg_demo.rs index c18a00c..3fa219f 100644 --- a/examples/bg_demo.rs +++ b/examples/bg_demo.rs @@ -1,5 +1,5 @@ -#![feature(start)] #![no_std] +#![feature(start)] #[panic_handler] fn panic(_info: &core::panic::PanicInfo) -> ! { diff --git a/examples/hello_world.rs b/examples/hello_world.rs index 8cd4270..87e6a1c 100644 --- a/examples/hello_world.rs +++ b/examples/hello_world.rs @@ -1,18 +1,65 @@ -#![feature(start)] #![no_std] +#![feature(start)] +#![feature(underscore_const_names)] + +#[macro_export] +macro_rules! newtype { + ($(#[$attr:meta])* $new_name:ident, $old_name:ident) => { + $(#[$attr])* + #[repr(transparent)] + pub struct $new_name($old_name); + }; +} + +#[macro_export] +macro_rules! const_assert { + ($condition:expr) => { + #[deny(const_err)] + #[allow(dead_code)] + const _: usize = 0 - !$condition as usize; + }; +} #[panic_handler] fn panic(_info: &core::panic::PanicInfo) -> ! { loop {} } +newtype! { + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + Color, u16 +} + +pub const fn rgb(red: u16, green: u16, blue: u16) -> Color { + Color(blue << 10 | green << 5 | red) +} + +newtype! { + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + DisplayControlSetting, u16 +} + +pub const DISPLAY_CONTROL: VolatilePtr = VolatilePtr(0x04000000 as *mut DisplayControlSetting); +pub const JUST_MODE3_AND_BG2: DisplayControlSetting = DisplayControlSetting(3 + 0b100_0000_0000); + +pub struct Mode3; + +impl Mode3 { + const SCREEN_WIDTH: isize = 240; + const PIXELS: VolatilePtr = VolatilePtr(0x600_0000 as *mut Color); + + pub unsafe fn draw_pixel_unchecked(col: isize, row: isize, color: Color) { + Self::PIXELS.offset(col + row * Self::SCREEN_WIDTH).write(color); + } +} + #[start] fn main(_argc: isize, _argv: *const *const u8) -> isize { unsafe { - DISPCNT.write(MODE3 | BG2); - mode3_pixel(120, 80, rgb16(31, 0, 0)); - mode3_pixel(136, 80, rgb16(0, 31, 0)); - mode3_pixel(120, 96, rgb16(0, 0, 31)); + DISPLAY_CONTROL.write(JUST_MODE3_AND_BG2); + Mode3::draw_pixel_unchecked(120, 80, rgb(31, 0, 0)); + Mode3::draw_pixel_unchecked(136, 80, rgb(0, 31, 0)); + Mode3::draw_pixel_unchecked(120, 96, rgb(0, 0, 31)); loop {} } } @@ -31,18 +78,3 @@ impl VolatilePtr { VolatilePtr(self.0.wrapping_offset(count)) } } - -pub const DISPCNT: VolatilePtr = VolatilePtr(0x04000000 as *mut u16); -pub const MODE3: u16 = 3; -pub const BG2: u16 = 0b100_0000_0000; - -pub const VRAM: usize = 0x06000000; -pub const SCREEN_WIDTH: isize = 240; - -pub const fn rgb16(red: u16, green: u16, blue: u16) -> u16 { - blue << 10 | green << 5 | red -} - -pub unsafe fn mode3_pixel(col: isize, row: isize, color: u16) { - VolatilePtr(VRAM as *mut u16).offset(col + row * SCREEN_WIDTH).write(color); -} diff --git a/examples/light_cycle.rs b/examples/light_cycle.rs index c200663..c593c19 100644 --- a/examples/light_cycle.rs +++ b/examples/light_cycle.rs @@ -1,5 +1,5 @@ -#![feature(start)] #![no_std] +#![feature(start)] #[panic_handler] fn panic(_info: &core::panic::PanicInfo) -> ! { From 09c16ef1b0ab96c1091bfa6cca582fe9350ab9c6 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sat, 15 Dec 2018 20:43:01 -0700 Subject: [PATCH 02/33] clarification to const_assert --- book/src/01-quirks/05-static_asserts.md | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/book/src/01-quirks/05-static_asserts.md b/book/src/01-quirks/05-static_asserts.md index 845ede1..5c74184 100644 --- a/book/src/01-quirks/05-static_asserts.md +++ b/book/src/01-quirks/05-static_asserts.md @@ -52,11 +52,11 @@ name. const _: usize = 0 - 1; ``` -Now we wrap this in a macro where we give an expression for a bool. We negate -the bool then cast it to a `usize`, meaning that `true` negates into `false`, -which becomes `0usize`, and then there's no underflow error. Or if the input was -`false`, it negates into `true`, then becomes `1usize`, and then the underflow -error fires. +Now we wrap this in a macro where we give a `bool` expression as input. We +negate the bool then cast it to a `usize`, meaning that `true` negates into +`false`, which becomes `0usize`, and then there's no underflow error. Or if the +input was `false`, it negates into `true`, then becomes `1usize`, and then the +underflow error fires. ```rust macro_rules! const_assert { @@ -68,8 +68,14 @@ macro_rules! const_assert { } ``` -This allows anything which supports `core::ops::Not` and can then can cast into -`usize`, which technically isn't just `bool` values, but close enough. +Technically, written like this, the expression can be anything with a +`core::ops::Not` implementation that can also be `as` cast into `usize`. That's +`bool`, but also basically all the other number types. + +It doesn't really hurt if you want to `const_assert!` a number I guess. I mean, +any number other than the `MAX` value of an unsigned type or the `-1` value of +an unsigned type will fail such an assertion, but I bet you'll notice that you +did something wrong pretty quick. ## Asserting Something From e9a8b80a4e11fcce0550b4eb94be85c92f27ef9e Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sat, 15 Dec 2018 20:45:25 -0700 Subject: [PATCH 03/33] static_assert -> const_assert --- .../01-quirks/{05-static_asserts.md => 05-const_asserts.md} | 5 +++-- book/src/SUMMARY.md | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) rename book/src/01-quirks/{05-static_asserts.md => 05-const_asserts.md} (95%) diff --git a/book/src/01-quirks/05-static_asserts.md b/book/src/01-quirks/05-const_asserts.md similarity index 95% rename from book/src/01-quirks/05-static_asserts.md rename to book/src/01-quirks/05-const_asserts.md index 5c74184..0827a85 100644 --- a/book/src/01-quirks/05-static_asserts.md +++ b/book/src/01-quirks/05-const_asserts.md @@ -1,10 +1,11 @@ -# Static Asserts +# Constant Assertions Have you ever wanted to assert things _even before runtime_? We all have, of course. Particularly when the runtime machine is a poor little GBA, we'd like to have the machine doing the compile handle as much checking as possible. -Enter [static assertions](https://docs.rs/static_assertions/). +Enter the [static assertions](https://docs.rs/static_assertions/) crate, which +provides a way to let you assert on a `const` expression. This is an amazing crate that you should definitely use when you can. diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index 08bf812..46168b3 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -12,7 +12,7 @@ * [Fixed Only](01-quirks/02-fixed_only.md) * [Volatile Destination](01-quirks/03-volatile_destination.md) * [Newtype](01-quirks/04-newtype.md) - * [Static Asserts](01-quirks/05-static_asserts.md) + * [Static Asserts](01-quirks/05-const_asserts.md) * [Concepts](02-concepts/00-index.md) * [CPU](02-concepts/01-cpu.md) * [BIOS](02-concepts/02-bios.md) From 2aafe1a4cbde5379bd149af4b64662e26531e74b Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sat, 15 Dec 2018 20:47:00 -0700 Subject: [PATCH 04/33] Using their full name upon request. --- book/src/01-quirks/05-const_asserts.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/book/src/01-quirks/05-const_asserts.md b/book/src/01-quirks/05-const_asserts.md index 0827a85..42b99ed 100644 --- a/book/src/01-quirks/05-const_asserts.md +++ b/book/src/01-quirks/05-const_asserts.md @@ -9,8 +9,8 @@ provides a way to let you assert on a `const` expression. This is an amazing crate that you should definitely use when you can. -It's written by [nvzqz](https://github.com/nvzqz), and they kindly wrote up a -[blog +It's written by [Nikolai Vazquez](https://github.com/nvzqz), and they kindly +wrote up a [blog post](https://nikolaivazquez.com/posts/programming/rust-static-assertions/) that explains the thinking behind it. From d78e6033c4a5d455c19ce79ff60ad93b2585ffdb Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sat, 15 Dec 2018 20:50:43 -0700 Subject: [PATCH 05/33] TOC fix --- book/src/SUMMARY.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index 46168b3..b6a49ea 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -12,7 +12,7 @@ * [Fixed Only](01-quirks/02-fixed_only.md) * [Volatile Destination](01-quirks/03-volatile_destination.md) * [Newtype](01-quirks/04-newtype.md) - * [Static Asserts](01-quirks/05-const_asserts.md) + * [Const Asserts](01-quirks/05-const_asserts.md) * [Concepts](02-concepts/00-index.md) * [CPU](02-concepts/01-cpu.md) * [BIOS](02-concepts/02-bios.md) From 3d2dbbf214596ad7aca794553f9c6575c3c9840a Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sat, 15 Dec 2018 20:58:53 -0700 Subject: [PATCH 06/33] issue tracker links --- book/src/01-quirks/05-const_asserts.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/book/src/01-quirks/05-const_asserts.md b/book/src/01-quirks/05-const_asserts.md index 42b99ed..6dbec14 100644 --- a/book/src/01-quirks/05-const_asserts.md +++ b/book/src/01-quirks/05-const_asserts.md @@ -42,8 +42,9 @@ warning to a hard error: const ASSERT: usize = 0 - 1; ``` -And to make our construction reusable we can enable the `underscore_const_names` -feature in our program or library and give each such const an underscore for a +And to make our construction reusable we can enable the +[underscore_const_names](https://github.com/rust-lang/rust/issues/54912) feature +in our program (or library) and then give each such const an underscore for a name. ```rust @@ -76,7 +77,10 @@ Technically, written like this, the expression can be anything with a It doesn't really hurt if you want to `const_assert!` a number I guess. I mean, any number other than the `MAX` value of an unsigned type or the `-1` value of an unsigned type will fail such an assertion, but I bet you'll notice that you -did something wrong pretty quick. +did something wrong pretty quick. We could use the +[type_ascription](https://github.com/rust-lang/rust/issues/23416) feature to +really force a `bool`, but it's not that critical, so we'll avoid using a +feature that we don't need until it's stable. ## Asserting Something From 9e3b3e15f12c589436db5dee02970ae60a0f32d7 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sat, 15 Dec 2018 21:11:13 -0700 Subject: [PATCH 07/33] assert upgrades! --- book/src/01-quirks/05-const_asserts.md | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/book/src/01-quirks/05-const_asserts.md b/book/src/01-quirks/05-const_asserts.md index 6dbec14..21cb201 100644 --- a/book/src/01-quirks/05-const_asserts.md +++ b/book/src/01-quirks/05-const_asserts.md @@ -72,15 +72,20 @@ macro_rules! const_assert { Technically, written like this, the expression can be anything with a `core::ops::Not` implementation that can also be `as` cast into `usize`. That's -`bool`, but also basically all the other number types. +`bool`, but also basically all the other number types. Since we want to ensure +that we get proper looking type errors when things go wrong, we can use +`($condition && true)` to enforce that we get a `bool` (thanks to `Talchas` for +that particular suggestion). -It doesn't really hurt if you want to `const_assert!` a number I guess. I mean, -any number other than the `MAX` value of an unsigned type or the `-1` value of -an unsigned type will fail such an assertion, but I bet you'll notice that you -did something wrong pretty quick. We could use the -[type_ascription](https://github.com/rust-lang/rust/issues/23416) feature to -really force a `bool`, but it's not that critical, so we'll avoid using a -feature that we don't need until it's stable. +```rust +macro_rules! const_assert { + ($condition:expr) => { + #[deny(const_err)] + #[allow(dead_code)] + const _: usize = 0 - !($condition && true) as usize; + } +} +``` ## Asserting Something From e08a8d617e6588e231d5da4ef4cd6b363a92a5ce Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sat, 15 Dec 2018 21:46:44 -0700 Subject: [PATCH 08/33] rename "Network" to "Link Cable" --- book/src/04-non-video/06-link_cable.md | 1 + book/src/04-non-video/06-network.md | 1 - book/src/SUMMARY.md | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 book/src/04-non-video/06-link_cable.md delete mode 100644 book/src/04-non-video/06-network.md diff --git a/book/src/04-non-video/06-link_cable.md b/book/src/04-non-video/06-link_cable.md new file mode 100644 index 0000000..f8e1989 --- /dev/null +++ b/book/src/04-non-video/06-link_cable.md @@ -0,0 +1 @@ +# Link Cable diff --git a/book/src/04-non-video/06-network.md b/book/src/04-non-video/06-network.md deleted file mode 100644 index 05db335..0000000 --- a/book/src/04-non-video/06-network.md +++ /dev/null @@ -1 +0,0 @@ -# Network diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md index b6a49ea..704ba7a 100644 --- a/book/src/SUMMARY.md +++ b/book/src/SUMMARY.md @@ -32,7 +32,7 @@ * [Direct Memory Access](04-non-video/03-dma.md) * [Sound](04-non-video/04-sound.md) * [Interrupts](04-non-video/05-interrupts.md) - * [Network](04-non-video/06-network.md) + * [Link Cable](04-non-video/06-link_cable.md) * [Game Pak](04-non-video/07-game_pak.md) * [Examples](05-examples/00-index.md) * [hello_magic](05-examples/01-hello_magic.md) From bfc7e96c79b08ae7f7775c130cf6a0fe9bf2f9ed Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sat, 15 Dec 2018 21:58:56 -0700 Subject: [PATCH 09/33] Give some chapter outlines --- book/src/02-concepts/00-index.md | 25 +++++++++++++++++++++++++ book/src/03-video/00-index.md | 8 ++++++++ book/src/04-non-video/00-index.md | 20 ++++++++++++++++++++ 3 files changed, 53 insertions(+) diff --git a/book/src/02-concepts/00-index.md b/book/src/02-concepts/00-index.md index 864e1ff..fb6b902 100644 --- a/book/src/02-concepts/00-index.md +++ b/book/src/02-concepts/00-index.md @@ -1 +1,26 @@ # Broad Concepts + +The GameBoy Advance sits in a middle place between the chthonic game consoles of +the ancient past and the "small PC in a funny case" consoles of the modern age. + +On the one hand, yeah, you're gonna find a few strange conventions as you learn +all the ropes. + +On the other, at least we're writing in Rust at all, and not having to do all +the assembly by hand. + +This chapter for "concepts" has a section for each part of the GBA's hardware +memory map, going by increasing order of base address value. The sections try to +explain as much as possible while sticking to just the concerns you might have +regarding that part of the memory map. + +For an assessment of how to wrangle all three parts of the video system (PALRAM, +VRAM, and OAM), along with the correct IO registers, into something that shows a +picture, you'll want the Video chapter. + +Similarly, the "IO Registers" part of the GBA actually controls how you interact +with every single bit of hardware connected to the GBA. A full description of +everything is obviously too much for just one section of the book. Instead you +get an overview of general IO register rules and advice. Each particular +register is described in the appropriate sections of either the Video or +Non-Video chapters. diff --git a/book/src/03-video/00-index.md b/book/src/03-video/00-index.md index f076b5d..288c3ca 100644 --- a/book/src/03-video/00-index.md +++ b/book/src/03-video/00-index.md @@ -1 +1,9 @@ # Video + +GBA Video starts with an IO register called the "Display Control Register", and +then spirals out from there. You generally have to use Palette RAM (PALRAM), +Video RAM (VRAM), Object Attribute Memory (OAM), as well as any number of other +IO registers. + +They all have to work together just right, and there's a lot going on when you +first try doing it, so try to take it very slowly as you're learning each step. diff --git a/book/src/04-non-video/00-index.md b/book/src/04-non-video/00-index.md index d7d1113..aff4a81 100644 --- a/book/src/04-non-video/00-index.md +++ b/book/src/04-non-video/00-index.md @@ -1 +1,21 @@ # Non-Video + +Besides video effects the GBA still has an okay amount of stuff going on. + +Obviously you'll want to know how to read the user's button inputs. That can +almost go without saying, except that I said it. + +Each other part can be handled in about any order you like. + +Using interrupts is perhaps one of the hardest things for us as Rust programmers +due to quirks in our compilation process. Our code all gets compiled to 16-bit +THUMB instructions, and we don't have a way to mark a function to be compiled +using 32-bit ASM instructions instead. However, an interrupt handler _must_ be +written in 32-bit ASM instructions for it to work. That means that we have to +write our interrupt handler in 32-bit ASM by hand. We'll do it, but I don't +think we'll be too happy about it. + +The Link Cable related stuff is also probably a little harder to test than +anything else. Just because link cable emulation isn't always the best, and or +you need two GBAs with two flash carts and the cable for hardware testing. +Still, we'll try to go over it eventually. From b9d965410ef0e003e27cacf19654f629fb00a2f9 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sat, 15 Dec 2018 23:05:03 -0700 Subject: [PATCH 10/33] improve the Resources page --- book/src/00-introduction/05-help_and_resources.md | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/book/src/00-introduction/05-help_and_resources.md b/book/src/00-introduction/05-help_and_resources.md index 0d9fc8e..63a646f 100644 --- a/book/src/00-introduction/05-help_and_resources.md +++ b/book/src/00-introduction/05-help_and_resources.md @@ -26,9 +26,15 @@ available while you're debugging problems. ## Information Resources -Ketsuban and I didn't magically learn this all from nowhere, we read various -technical manuals and guides ourselves and then distilled the knowledge (usually -oriented towards C and C++) into this book for Rust. +First, if I fail to describe something related to Rust, you can always try +checking in [The Rust +Reference](https://doc.rust-lang.org/nightly/reference/introduction.html) to see +if they cover it. You can mostly ignore that big scary red banner at the top, +things are a lot better documented than they make it sound. + +As to GBA related lore, Ketsuban and I didn't magically learn this all from +nowhere, we read various technical manuals and guides ourselves and then +distilled those works oriented around C and C++ into a book for Rust. We have personally used some or all of the following: From 1fcfbb00e8e67c94677647e908fd6f8b919d07ee Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sat, 15 Dec 2018 23:05:22 -0700 Subject: [PATCH 11/33] Clear some cruft --- book/src-bak/ch01/hello1.md | 115 ----------------------------- book/src-bak/ch01/hello2.md | 132 ---------------------------------- book/src-bak/ch01/index.md | 10 --- book/src-bak/ch01/volatile.md | 70 ------------------ book/src-bak/ch02/index.md | 22 ------ 5 files changed, 349 deletions(-) delete mode 100644 book/src-bak/ch01/hello1.md delete mode 100644 book/src-bak/ch01/hello2.md delete mode 100644 book/src-bak/ch01/index.md delete mode 100644 book/src-bak/ch01/volatile.md delete mode 100644 book/src-bak/ch02/index.md diff --git a/book/src-bak/ch01/hello1.md b/book/src-bak/ch01/hello1.md deleted file mode 100644 index 561f77e..0000000 --- a/book/src-bak/ch01/hello1.md +++ /dev/null @@ -1,115 +0,0 @@ - - -## A basic hello1 explanation - -So, what just happened? Even if you're used to Rust that might look pretty -strange. We'll go over most of the little parts right here, and then bigger -parts will get their own sections. - -```rust -#![feature(start)] -``` - -This enables the [start -feature](https://doc.rust-lang.org/beta/unstable-book/language-features/start.html), -which you would normally be able to read about in the unstable book, except that -the book tells you nothing at all except to look at the [tracking -issue](https://github.com/rust-lang/rust/issues/29633). - -Basically, a GBA game is even more low-level than the _normal_ amount of -low-level that you get from Rust, so we have to tell the compiler to account for -that by specifying a `#[start]`, and we need this feature on to do that. - -```rust -#![no_std] -``` - -There's no standard library available on the GBA, so we'll have to live a core -only life. - -```rust -#[panic_handler] -fn panic(_info: &core::panic::PanicInfo) -> ! { - loop {} -} -``` - -This sets our [panic -handler](https://doc.rust-lang.org/nightly/nomicon/panic-handler.html). -Basically, if we somehow trigger a panic, this is where the program goes. -However, right now we don't know how to get any sort of message out to the user -so... we do nothing at all. We _can't even return_ from here, so we just sit in -an infinite loop. The player will have to reset the universe from the outside. - -```rust -#[start] -fn main(_argc: isize, _argv: *const *const u8) -> isize { -``` - -This is our `#[start]`. We call it `main`, but it's not like a `main` that you'd -see in a Rust program. It's _more like_ the sort of `main` that you'd see in a C -program, but it's still **not** that either. If you compile a `#[start]` program -for a target with an OS such as `arm-none-eabi-nm` you can open up the debug -info and see that your result will have the symbol for the C `main` along side -the symbol for the start `main` that we write here. Our start `main` is just its -own unique thing, and the inputs and outputs have to be like that because that's -how `#[start]` is specified to work in Rust. - -If you think about it for a moment you'll probably realize that, those inputs -and outputs are totally useless to us on a GBA. There's no OS on the GBA to call -our program, and there's no place for our program to "return to" when it's done. - -Side note: if you want to learn more about stuff "before main gets called" you -can watch a great [CppCon talk](https://www.youtube.com/watch?v=dOfucXtyEsU) by -Matt Godbolt (yes, that Godbolt) where he delves into quite a bit of it. The -talk doesn't really apply to the GBA, but it's pretty good. - -```rust - unsafe { -``` - -I hope you're all set for some `unsafe`, because there's a lot of it to be had. - -```rust - (0x04000000 as *mut u16).write_volatile(0x0403); -``` - -Sure! - -```rust - (0x06000000 as *mut u16).offset(120 + 80 * 240).write_volatile(0x001F); - (0x06000000 as *mut u16).offset(136 + 80 * 240).write_volatile(0x03E0); - (0x06000000 as *mut u16).offset(120 + 96 * 240).write_volatile(0x7C00); -``` - -Ah, of course. - -```rust - loop {} - } -} -``` - -And, as mentioned above, there's no place for a GBA program to "return to", so -we can't ever let `main` try to return there. Instead, we go into an infinite -`loop` that does nothing. The fact that this doesn't ever return an `isize` -value doesn't seem to bother Rust, because I guess we're at least not returning -any other type of thing instead. - -Fun fact: unlike in C++, an infinite loop with no side effects isn't Undefined -Behavior for us rustaceans... _semantically_. In truth LLVM has a [known -bug](https://github.com/rust-lang/rust/issues/28728) in this area, so we won't -actually be relying on empty loops in any future programs. - -## All Those Magic Numbers - -Alright, I cheated quite a bit in the middle there. The program works, but I -didn't really tell you why because I didn't really tell you what any of those -magic numbers mean or do. - -* `0x04000000` is the address of an IO Register called the Display Control. -* `0x06000000` is the start of Video RAM. - -So we write some magic to the display control register once, then we write some -other magic to three magic locations in the Video RAM. Somehow that shows three -dots. Gotta read on to find out why! diff --git a/book/src-bak/ch01/hello2.md b/book/src-bak/ch01/hello2.md deleted file mode 100644 index 7c991d4..0000000 --- a/book/src-bak/ch01/hello2.md +++ /dev/null @@ -1,132 +0,0 @@ -# hello2 - -Okay so let's have a look again: - -`hello1` - -```rust -#![feature(start)] -#![no_std] - -#[panic_handler] -fn panic(_info: &core::panic::PanicInfo) -> ! { - loop {} -} - -#[start] -fn main(_argc: isize, _argv: *const *const u8) -> isize { - unsafe { - (0x04000000 as *mut u16).write_volatile(0x0403); - (0x06000000 as *mut u16).offset(120 + 80 * 240).write_volatile(0x001F); - (0x06000000 as *mut u16).offset(136 + 80 * 240).write_volatile(0x03E0); - (0x06000000 as *mut u16).offset(120 + 96 * 240).write_volatile(0x7C00); - loop {} - } -} -``` - -Now let's clean this up so that it's clearer what's going on. - -First we'll label that display control stuff, including using the `VolatilePtr` -type from the volatile explanation: - -```rust -pub const DISPCNT: VolatilePtr = VolatilePtr(0x04000000 as *mut u16); -pub const MODE3: u16 = 3; -pub const BG2: u16 = 0b100_0000_0000; -``` - -Next we make some const values for the actual pixel drawing - -```rust -pub const VRAM: usize = 0x06000000; -pub const SCREEN_WIDTH: isize = 240; -``` - -Note that VRAM has to be interpreted in different ways depending on mode, so we -just leave it as `usize` and we'll cast it into the right form closer to the -actual use. - -Next we want a small helper function for putting together a color value. -Happily, this one can even be declared as a `const` function. At the time of -writing, we've got the "minimal const fn" support in nightly. It really is quite -limited, but I'm happy to let rustc and LLVM pre-compute as much as they can -when it comes to the GBA's tiny CPU. - -```rust -pub const fn rgb16(red: u16, green: u16, blue: u16) -> u16 { - blue << 10 | green << 5 | red -} -``` - -Finally, we'll make a function for drawing a pixel in Mode 3. Even though it's -just a one-liner, having the "important parts" be labeled as function arguments -usually helps you think about it a lot better. - -```rust -pub unsafe fn mode3_pixel(col: isize, row: isize, color: u16) { - VolatilePtr(VRAM as *mut u16).offset(col + row * SCREEN_WIDTH).write(color); -} -``` - -So now we've got this: - -`hello2` - -```rust -#![feature(start)] -#![no_std] - -#[panic_handler] -fn panic(_info: &core::panic::PanicInfo) -> ! { - loop {} -} - -#[start] -fn main(_argc: isize, _argv: *const *const u8) -> isize { - unsafe { - DISPCNT.write(MODE3 | BG2); - mode3_pixel(120, 80, rgb16(31, 0, 0)); - mode3_pixel(136, 80, rgb16(0, 31, 0)); - mode3_pixel(120, 96, rgb16(0, 0, 31)); - loop {} - } -} - -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] -#[repr(transparent)] -pub struct VolatilePtr(pub *mut T); -impl VolatilePtr { - pub unsafe fn read(&self) -> T { - core::ptr::read_volatile(self.0) - } - pub unsafe fn write(&self, data: T) { - core::ptr::write_volatile(self.0, data); - } - pub unsafe fn offset(self, count: isize) -> Self { - VolatilePtr(self.0.wrapping_offset(count)) - } -} - -pub const DISPCNT: VolatilePtr = VolatilePtr(0x04000000 as *mut u16); -pub const MODE3: u16 = 3; -pub const BG2: u16 = 0b100_0000_0000; - -pub const VRAM: usize = 0x06000000; -pub const SCREEN_WIDTH: isize = 240; - -pub const fn rgb16(red: u16, green: u16, blue: u16) -> u16 { - blue << 10 | green << 5 | red -} - -pub unsafe fn mode3_pixel(col: isize, row: isize, color: u16) { - VolatilePtr(VRAM as *mut u16).offset(col + row * SCREEN_WIDTH).write(color); -} -``` - -Exact same program that we started with, but much easier to read. - -Of course, in the full `gba` crate that this book is a part of we have these and -other elements all labeled and sorted out for you (not identically, but -similarly). Still, for educational purposes it's often best to do it yourself at -least once. diff --git a/book/src-bak/ch01/index.md b/book/src-bak/ch01/index.md deleted file mode 100644 index 7c21c79..0000000 --- a/book/src-bak/ch01/index.md +++ /dev/null @@ -1,10 +0,0 @@ -# Ch 1: Hello GBA - -Traditionally a person writes a "hello, world" program so that they can test -that their development environment is setup properly and to just get a feel for -using the tools involved. To get an idea of what a small part of a source file -will look like. All that stuff. - -Normally, you write a program that prints "hello, world" to the terminal. The -GBA has no terminal, but it does have a screen, so instead we're going to draw -three dots to the screen. diff --git a/book/src-bak/ch01/volatile.md b/book/src-bak/ch01/volatile.md deleted file mode 100644 index 940e05b..0000000 --- a/book/src-bak/ch01/volatile.md +++ /dev/null @@ -1,70 +0,0 @@ -# Volatile - -Before we focus on what the numbers mean, first let's ask ourselves: Why are we -doing _volatile_ writes? You've probably never used that keywords before at all. -What _is_ volatile anyway? - -Well, the optimizer is pretty aggressive, and so it'll skip reads and writes -when it thinks can. Like if you write to a pointer once, and then again a moment -later, and it didn't see any other reads in between, it'll think that it can -just skip doing that first write since it'll get overwritten anyway. Sometimes -that's correct, but sometimes it's not. - -Marking a read or write as _volatile_ tells the compiler that it really must do -that action, and in the exact order that we wrote it out. It says that there -might even be special hardware side effects going on that the compiler isn't -aware of. In this case, the write to the display control register sets a video -mode, and the writes to the Video RAM set pixels that will show up on the -screen. - -Similar to "atomic" operations you might have heard about, all volatile -operations are enforced to happen in the exact order that you specify them, but -only relative to other volatile operations. So something like - -```rust -c.write_volatile(5); -a += b; -d.write_volatile(7); -``` - -might end up changing `a` either before or after the change to `c` (since the -value of `a` doesn't affect the write to `c`), but the write to `d` will -_always_ happen after the write to `c`, even though the compiler doesn't see any -direct data dependency there. - -If you ever go on to use volatile stuff on other platforms it's important to -note that volatile doesn't make things thread-safe, you still need atomic for -that. However, the GBA doesn't have threads, so we don't have to worry about -those sorts of thread safety concerns (there's interrupts, but that's another -matter). - -## Volatile by default - -Of course, writing out `volatile_write` every time is more than we wanna do. -There's clarity and then there's excessive. This is a chance to write our first -[newtype](https://doc.rust-lang.org/1.0.0/style/features/types/newtype.html). -Basically a type that's got the exact same binary representation as some other -type, but new methods and trait implementations. - -We want a `*mut T` that's volatile by default, and also when we offset it... -well the verdict is slightly unclear on how `offset` vs `wrapping_offset` work -when you're using pointers that you made up out of nowhere. I've asked the -experts and they genuinely weren't sure, so we'll make an `offset` method that -does a `wrapping_offset` just to be careful. - -```rust -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] -#[repr(transparent)] -pub struct VolatilePtr(pub *mut T); -impl VolatilePtr { - pub unsafe fn read(&self) -> T { - core::ptr::read_volatile(self.0) - } - pub unsafe fn write(&self, data: T) { - core::ptr::write_volatile(self.0, data); - } - pub unsafe fn offset(self, count: isize) -> Self { - VolatilePtr(self.0.wrapping_offset(count)) - } -} -``` diff --git a/book/src-bak/ch02/index.md b/book/src-bak/ch02/index.md deleted file mode 100644 index 1263ced..0000000 --- a/book/src-bak/ch02/index.md +++ /dev/null @@ -1,22 +0,0 @@ -# Ch 2: User Input - -It's all well and good to draw three pixels, but they don't do anything yet. We -want them to do something, and for that we need to get some input from the user. - -The GBA, as I'm sure you know, has an arrow pad, A and B, L and R, Start and -Select. That's a little more than the NES/GB/CGB had, and a little less than the -SNES had. As you can guess, we get key state info from an IO register. - -Also, we will need a way to keep the program from running "too fast". On a -modern computer or console you do this with vsync info from the GPU and Monitor, -and on the GBA we'll be using vsync info from an IO register that tracks what -the display hardware is doing. - -As a way to apply our knowledge We'll make a simple "light cycle" game where -your dot leaves a trail behind them and you die if you go off the screen or if -you touch your own trail. We just make a copy of `hello2.rs` named -`light_cycle.rs` and then fill it in as we go through the chapter. Normally you -might not place the entire program into a single source file, particularly as it -grows over time, but since these are small examples it's much better to have -them be completely self contained than it is to have them be "properly -organized" for the long term. From fd681b182e6ad7876b95f3570966bd7eeb0049e9 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sun, 16 Dec 2018 00:59:36 -0700 Subject: [PATCH 12/33] fix our math terms (thanks ubsan!) --- src/lib.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 80f491c..781603b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -47,7 +47,7 @@ pub const fn rgb16(red: u16, green: u16, blue: u16) -> u16 { /// If `denominator` is 0. #[inline] pub fn div(numerator: i32, denominator: i32) -> i32 { - div_modulus(numerator, denominator).0 + div_rem(numerator, denominator).0 } /// BIOS Call: Div (GBA SWI 0x06). @@ -58,19 +58,19 @@ pub fn div(numerator: i32, denominator: i32) -> i32 { /// /// If `denominator` is 0. #[inline] -pub fn modulus(numerator: i32, denominator: i32) -> i32 { - div_modulus(numerator, denominator).1 +pub fn rem(numerator: i32, denominator: i32) -> i32 { + div_rem(numerator, denominator).1 } /// BIOS Call: Div (GBA SWI 0x06). /// -/// Gives both the DIV and MOD output of `numerator / denominator`. +/// Gives both the DIV and REM output of `numerator / denominator`. /// /// # Panics /// /// If `denominator` is 0. #[inline] -pub fn div_modulus(numerator: i32, denominator: i32) -> (i32, i32) { +pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) { assert!(denominator != 0); #[cfg(not(test))] { From 1eb9b06d1ac3e0024f6be1cd81bf91b60ca1ea30 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sun, 16 Dec 2018 01:21:43 -0700 Subject: [PATCH 13/33] Talkin about the BIOS --- book/src/02-concepts/02-bios.md | 171 ++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) diff --git a/book/src/02-concepts/02-bios.md b/book/src/02-concepts/02-bios.md index 435d69f..4f4a90f 100644 --- a/book/src/02-concepts/02-bios.md +++ b/book/src/02-concepts/02-bios.md @@ -1 +1,172 @@ # BIOS + +* **Address Span:** `0x0` to `0x3FFF` (16k) + +The [BIOS](https://en.wikipedia.org/wiki/BIOS) of the GBA is a small read-only +portion of memory at the very base of the address space. However, it is also +hardware protected against reading, so if you try to read from BIOS memory when +the program counter isn't pointed into the BIOS (eg: any time code _you_ write +is executing) then you get [basically garbage +data](https://problemkaputt.de/gbatek.htm#gbaunpredictablethings) back. + +So we're not going to spend time here talking about what bits to read or write +within BIOS memory like we do with the other sections. Instead we're going to +spend time talking about [inline +assembly](https://doc.rust-lang.org/unstable-book/language-features/asm.html) +([tracking issue](https://github.com/rust-lang/rust/issues/29722)) and then use +it to call the [GBA BIOS +Functions](https://problemkaputt.de/gbatek.htm#biosfunctions). + +Note that BIOS calls have _more overhead than normal function calls_, so don't +go using them all over the place if you don't have to. + +I'd like to take a moment to thank [Marc Brinkmann](https://github.com/mbr) +(with contributions from [Oliver Schneider](https://github.com/oli-obk) and +[Philipp Oppermann](https://github.com/phil-opp)) for writing [this blog +post](http://embed.rs/articles/2016/arm-inline-assembly-rust/). It's at least +ten times the tutorial quality as the `asm` entry in the Unstable Book has. In +their defense, the actual spec of how inline ASM works in rust is "basically +what clang does", and that's specified as "basically what GCC does", and that's +basically not specified at all despite GCC being like 30 years old. + +So we're in for a very slow, careful, and pedantic ride on this one. + +## Inline ASM + +The inline asm docs describe an asm call as looking like this: + +```rust +asm!(assembly template + : output operands + : input operands + : clobbers + : options + ); +``` + +And once you stick a lot of stuff in there it can _absolutely_ be hard to +remember the ordering of the elements. So we'll start with a code block that +has some commends throw in on each line: + +```rust +asm!(/* ASM */ TODO + :/* OUT */ TODO + :/* INP */ TODO + :/* CLO */ TODO + :/* OPT */ +); +``` + +Note: it's possible to use an inline ASM style where you allow LLVM to determine +the exact register placement. We will _not_ do that in this section because each +BIOS call has specific input and output slots that we must follow. However, if +you want to use inline asm for other purposes elsewhere in your code you can use +it then. + +* **ASM:** The actual asm instructions to use. + * When writing inline asm, remember that we're writing for 16-bit THUMB mode + because that's what all of our Rust code is compiled to. You can switch to + 32-bit ARM mode on the fly, but be sure to switch back before the inline ASM + block ends or things will go _bad_. + * You can write code for specific registers (`r0` through `r7` are available + in THUMB mode) or you can write code for _register slots_ and let LLVM pick + what actual registers to assign to what slots. In this case, you'd instead + write `$0`, `$1` and so on (however many you need). Outputs take up one slot + each, followed by inputs taking up one slot each. +* **OUT:** The output variables, if any. Comma separated list. + * Output is specified as `"constraint" (binding)` + * A constraint is either `=` (write), `+` (read and write), or `&` (early + clobber) followed by either the name of a specific register in curly braces, + such as `{r0}`, or simply `r` if you want to let LLVM assign it. + * If you're writing to `r0` you'd use `={r0}`, if you're read writing from + `r3` you'd use `+{r3}` and so on. + * Bindings named in the outputs must be mutable bindings or bindings that + are declared but not yet assigned to. + * GBA registers are 32-bit, and you must always use an appropriately sized + type for the binding. + * LLVM assumes when selecting registers for you that no output is written to + until all inputs are read. If this is not the case you need to use the `&` + designation on your output to give LLVM the heads up so that LLVM doesn't + assign it as an input register. +* **INP:** The inputs, if any. Comma separated list. + * Similar to outputs, the input format is `"constraint" (binding)` + * Inputs don't have a symbol prefix, you simply name the specific register in + curly braces or use `r` to let LLVM pick. + * Inputs should always be 32-bit types. (TODO: can you use smaller types and + have it 'just work'?) +* **CLO:** This is possibly _the most important part to get right_. The + "clobbers" part describes what registers are affected by this use of asm. The + compiler will use this to make sure that you don't accidentally destroy any of + your data. + * The clobbers list is a comma separated series of string literals that each + name one of the registers clobbered. + * Example: "r0", "r1", "r3" +* **OPT:** This lets us specify any options. At the moment the only option we + care about is that some asm calls will need to be "volatile". As with reads + and writes, the compiler will attempt to eliminate asm that it thinks isn't + necessary, so if there's no output from an asm block we'll need to mark it + volatile to make sure that it gets done. + +That seems like a whole lot, but since we're only handling BIOS calls in this +section we can tone it down quite a bit: + +* Inputs are always `r0`, `r1`, `r2`, and/or `r3`, depending on function. +* Outputs are always zero or more of `r0`, `r1`, and `r3`. +* Any of the output registers that aren't actually used should be marked as + clobbered. +* All other registers are unaffected. + +All of the GBA BIOS calls are performed using the +[swi](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0068b/BABFCEEG.html) +instruction, combined with a value depending on what BIOS function you're trying +to invoke. If you're in 16-bit code you use the value directly, and if you're in +32-bit mode you shift the value up by 16 bits first. + +### Example BIOS Function: Division + +The GBA doesn't have hardware division. You have to do it in software. + +You can implement that yourself (we might get around to trying that, i was even +sent [a link to a +paper](https://www.microsoft.com/en-us/research/wp-content/uploads/2008/08/tr-2008-141.pdf) +that I promptly did not read), or you can call the BIOS to do it for you and +trust that it's being as efficient as possible. + +GBATEK gives a very clear explanation of it: + +```txt +Signed Division, r0/r1. + r0 signed 32bit Number + r1 signed 32bit Denom +Return: + r0 Number DIV Denom ;signed + r1 Number MOD Denom ;signed + r3 ABS (Number DIV Denom) ;unsigned +For example, incoming -1234, 10 should return -123, -4, +123. +The function usually gets caught in an endless loop upon division by zero. +``` + +Of course, the math folks tell me that the `r1` value should be properly called +the "remainder" not the "modulus". We'll go with that for our function, doesn't +hurt to use the correct names. The function itself is a single assert, then we +name some bindings without giving them a value, make the asm call, and then +return what we got. + +```rust +pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) { + assert!(denominator != 0); + let div_out: i32; + let rem_out: i32; + unsafe { + asm!(/* ASM */ "swi 0x06" + :/* OUT */ "={r0}"(div_out), "={r1}"(rem_out) + :/* INP */ "{r0}"(numerator), "{r1}"(denominator) + :/* CLO */ "r3" + :/* OPT */ + ); + } + (div_out, rem_out) +} +``` + +I _hope_ this makes sense by now. From 1d3ddafd5021281f2397abf713e92c5835418a0a Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sun, 16 Dec 2018 02:02:16 -0700 Subject: [PATCH 14/33] draft2 --- book/src/02-concepts/02-bios.md | 172 +++++++++++++++++++++----------- 1 file changed, 116 insertions(+), 56 deletions(-) diff --git a/book/src/02-concepts/02-bios.md b/book/src/02-concepts/02-bios.md index 4f4a90f..1baf3b4 100644 --- a/book/src/02-concepts/02-bios.md +++ b/book/src/02-concepts/02-bios.md @@ -25,15 +25,30 @@ I'd like to take a moment to thank [Marc Brinkmann](https://github.com/mbr) [Philipp Oppermann](https://github.com/phil-opp)) for writing [this blog post](http://embed.rs/articles/2016/arm-inline-assembly-rust/). It's at least ten times the tutorial quality as the `asm` entry in the Unstable Book has. In -their defense, the actual spec of how inline ASM works in rust is "basically -what clang does", and that's specified as "basically what GCC does", and that's -basically not specified at all despite GCC being like 30 years old. +fairness to the Unstable Book, the actual spec of how inline ASM works in rust +is "basically what clang does", and that's specified as "basically what GCC +does", and that's basically/shockingly not specified much at all despite GCC +being like 30 years old. -So we're in for a very slow, careful, and pedantic ride on this one. +So let's be slow and pedantic about this process. ## Inline ASM -The inline asm docs describe an asm call as looking like this: +**Fair Warning:** Inline asm is one of the least stable parts of Rust overall, +and if you write bad things you can trigger internal compiler errors and panics +and crashes and make LLVM choke and die without explanation. If you write some +inline asm and then suddenly your program suddenly stops compiling without +explanation, try commenting out that whole inline asm use and see if it's +causing the problem. Double check that you've written every single part of the +asm call absolutely correctly, etc, etc. + +**Bonus Warning:** The following formatting information is specific to the ARM +flavor of assembly. If you're using `x86` / `x86_64` or `MIPS` or whatever else +then you're unfortunately on your own, I have know knowledge of how to correctly +call the `asm!` macro on those platforms. + +Now then, with those out of the way, the inline asm docs describe an asm call as +looking like this: ```rust asm!(assembly template @@ -46,7 +61,7 @@ asm!(assembly template And once you stick a lot of stuff in there it can _absolutely_ be hard to remember the ordering of the elements. So we'll start with a code block that -has some commends throw in on each line: +has some comments thrown in on each line: ```rust asm!(/* ASM */ TODO @@ -57,58 +72,103 @@ asm!(/* ASM */ TODO ); ``` -Note: it's possible to use an inline ASM style where you allow LLVM to determine -the exact register placement. We will _not_ do that in this section because each -BIOS call has specific input and output slots that we must follow. However, if -you want to use inline asm for other purposes elsewhere in your code you can use -it then. +Now we have to decide what we're gonna write. Obviously we're going to do some +instructions, but those instructions use registers, and how are we gonna talk +about them? We've got two choices. -* **ASM:** The actual asm instructions to use. - * When writing inline asm, remember that we're writing for 16-bit THUMB mode - because that's what all of our Rust code is compiled to. You can switch to - 32-bit ARM mode on the fly, but be sure to switch back before the inline ASM - block ends or things will go _bad_. - * You can write code for specific registers (`r0` through `r7` are available - in THUMB mode) or you can write code for _register slots_ and let LLVM pick - what actual registers to assign to what slots. In this case, you'd instead - write `$0`, `$1` and so on (however many you need). Outputs take up one slot - each, followed by inputs taking up one slot each. -* **OUT:** The output variables, if any. Comma separated list. - * Output is specified as `"constraint" (binding)` - * A constraint is either `=` (write), `+` (read and write), or `&` (early - clobber) followed by either the name of a specific register in curly braces, - such as `{r0}`, or simply `r` if you want to let LLVM assign it. - * If you're writing to `r0` you'd use `={r0}`, if you're read writing from - `r3` you'd use `+{r3}` and so on. - * Bindings named in the outputs must be mutable bindings or bindings that - are declared but not yet assigned to. - * GBA registers are 32-bit, and you must always use an appropriately sized - type for the binding. - * LLVM assumes when selecting registers for you that no output is written to - until all inputs are read. If this is not the case you need to use the `&` - designation on your output to give LLVM the heads up so that LLVM doesn't - assign it as an input register. -* **INP:** The inputs, if any. Comma separated list. - * Similar to outputs, the input format is `"constraint" (binding)` - * Inputs don't have a symbol prefix, you simply name the specific register in - curly braces or use `r` to let LLVM pick. - * Inputs should always be 32-bit types. (TODO: can you use smaller types and - have it 'just work'?) -* **CLO:** This is possibly _the most important part to get right_. The - "clobbers" part describes what registers are affected by this use of asm. The - compiler will use this to make sure that you don't accidentally destroy any of - your data. - * The clobbers list is a comma separated series of string literals that each - name one of the registers clobbered. - * Example: "r0", "r1", "r3" -* **OPT:** This lets us specify any options. At the moment the only option we - care about is that some asm calls will need to be "volatile". As with reads - and writes, the compiler will attempt to eliminate asm that it thinks isn't - necessary, so if there's no output from an asm block we'll need to mark it - volatile to make sure that it gets done. +1) We can pick each and every register used by specifying exact register names. + In THUMB mode we have 8 registers available, named `r0` through `r7`. If you + switch into 32-bit mode there's additional registers that are also available. -That seems like a whole lot, but since we're only handling BIOS calls in this -section we can tone it down quite a bit: +2) We can specify slots for registers we need and let LLVM decide. In this style + you name your slots `$0`, `$1` and so on. Slot numbers are assigned first to + all specified outputs, then to all specified inputs, in the order that you + list them. + +In the case of the GBA BIOS, each BIOS function has pre-designated input and +output registers, so we will use the first style. If you use inline ASM in other +parts of your code you're free to use the second style. + +### Assembly + +This is just one big string literal. You write out one instruction per line, and +excess whitespace is ignored. You can also do comments within your assembly +using `;` to start a comment that goes until the end of the line. + +Assembly convention doesn't consider it unreasonable to comment potentially as +much as _every single line_ of asm that you write when you're getting used to +things. Or even if you are used to things. This is cryptic stuff, there's a +reason we avoid writing in it as much as possible. + +Remember that our Rust code is in 16-bit mode. You _can_ switch to 32-bit mode +within your asm as long as you switch back by the time the block ends. Otherwise +you'll have a bad time. + +### Outputs + +A comma separated list. Each entry looks like + +* `"constraint" (binding)` + +An output constraint starts with a symbol: + +* `=` for write only +* `+` for reads and writes +* `&` for for "early clobber", meaning that you'll write to this at some point + before all input values have been read. It prevents this register from being + assigned to an input register. + +Followed by _either_ the letter `r` (if you want LLVM to pick the register to +use) or curly braces around a specific register (if you want to pick). + +* The binding can be any 32-bit sized binding in scope (`i32`, `u32`, `isize`, + `usize`, etc). +* If your binding has bit pattern requirements ("must be non-zero", etc) you are + responsible for upholding that. +* If your binding type will try to `Drop` later then you are responsible for it + being in a fit state to do that. +* The binding must be either a mutable binding or a binding that was + pre-declared but not yet assigned. + +Anything else is UB. + +### Inputs + +This is a similar comma separated list. + +* `"constraint" (binding)` + +An input constraint doesn't have the symbol prefix, you just pick either `r` or +a named register with curly braces around it. + +* An input binding must be 32-bit sized. +* An input binding _should_ be a type that is `Copy` but this is not an absolute + requirement. Having the input be read is semantically similar to using + `core::ptr::read(&binding)` and forgetting the value when you're done. + +### Clobbers + +Sometimes your asm will touch registers other than the ones declared for input +and output. + +Clobbers are declared as a comma separated list of string literals naming +specific registers. You don't use curly braces with clobbers. + +LLVM _needs_ to know this information. It can move things around to keep your +data safe, but only if you tell it what's about to happen. + +Failure to define all of your clobbers can cause UB. + +### Options + +There's only one option we'd care to specify, and we don't even always need it. +That option is "volatile". + +Just like with a function call, LLVM will skip a block of asm if it doesn't see +that any outputs from the asm were used later on. A lot of our BIOS calls will +need to be declared "volatile" because to LLVM they don't seem to do anything. + +### BIOS ASM * Inputs are always `r0`, `r1`, `r2`, and/or `r3`, depending on function. * Outputs are always zero or more of `r0`, `r1`, and `r3`. From cf18b0a2cb57ff46a7fbbabbd195290a048bcb2d Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sun, 16 Dec 2018 02:12:34 -0700 Subject: [PATCH 15/33] important todo --- book/src/02-concepts/02-bios.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/book/src/02-concepts/02-bios.md b/book/src/02-concepts/02-bios.md index 1baf3b4..5ecb626 100644 --- a/book/src/02-concepts/02-bios.md +++ b/book/src/02-concepts/02-bios.md @@ -18,7 +18,12 @@ it to call the [GBA BIOS Functions](https://problemkaputt.de/gbatek.htm#biosfunctions). Note that BIOS calls have _more overhead than normal function calls_, so don't -go using them all over the place if you don't have to. +go using them all over the place if you don't have to. They're also usually +written more to be compact in terms of code than for raw speed, so you actually +can out speed them in some cases. Between the increased overhead and not being +as speed optimized, you can sometimes do a faster job without calling the BIOS +at all. (TODO: investigate more about what parts of the BIOS we could +potentially offer faster alternatives for.) I'd like to take a moment to thank [Marc Brinkmann](https://github.com/mbr) (with contributions from [Oliver Schneider](https://github.com/oli-obk) and From 2542662a2cec10324f7a4aa74cb66d223a8d302c Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sun, 16 Dec 2018 02:17:45 -0700 Subject: [PATCH 16/33] whoops spelling --- book/src/02-concepts/02-bios.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/book/src/02-concepts/02-bios.md b/book/src/02-concepts/02-bios.md index 5ecb626..5b8538a 100644 --- a/book/src/02-concepts/02-bios.md +++ b/book/src/02-concepts/02-bios.md @@ -49,8 +49,8 @@ asm call absolutely correctly, etc, etc. **Bonus Warning:** The following formatting information is specific to the ARM flavor of assembly. If you're using `x86` / `x86_64` or `MIPS` or whatever else -then you're unfortunately on your own, I have know knowledge of how to correctly -call the `asm!` macro on those platforms. +then you're unfortunately on your own, I have no first hand knowledge of how to +correctly call the `asm!` macro on those platforms. Now then, with those out of the way, the inline asm docs describe an asm call as looking like this: From e6547d78ba5949d9affbc21d919273ffeae518fc Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sun, 16 Dec 2018 02:22:07 -0700 Subject: [PATCH 17/33] I should go to bed. --- book/src/02-concepts/02-bios.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/book/src/02-concepts/02-bios.md b/book/src/02-concepts/02-bios.md index 5b8538a..5bcd216 100644 --- a/book/src/02-concepts/02-bios.md +++ b/book/src/02-concepts/02-bios.md @@ -191,13 +191,13 @@ to invoke. If you're in 16-bit code you use the value directly, and if you're in The GBA doesn't have hardware division. You have to do it in software. -You can implement that yourself (we might get around to trying that, i was even -sent [a link to a +We could potentially implement this in Rust (we might get around to trying that, +I was even sent [a link to a paper](https://www.microsoft.com/en-us/research/wp-content/uploads/2008/08/tr-2008-141.pdf) -that I promptly did not read), or you can call the BIOS to do it for you and -trust that it's being as efficient as possible. +that I promptly did not actually read right away), or you can call the BIOS to +do it for you and trust that big N did a good enough job. -GBATEK gives a very clear explanation of it: +GBATEK gives a fairly clear explanation of our inputs and outputs: ```txt Signed Division, r0/r1. @@ -211,11 +211,11 @@ For example, incoming -1234, 10 should return -123, -4, +123. The function usually gets caught in an endless loop upon division by zero. ``` -Of course, the math folks tell me that the `r1` value should be properly called -the "remainder" not the "modulus". We'll go with that for our function, doesn't -hurt to use the correct names. The function itself is a single assert, then we -name some bindings without giving them a value, make the asm call, and then -return what we got. +The math folks tell me that the `r1` value should be properly called the +"remainder" not the "modulus". We'll go with that for our function, doesn't hurt +to use the correct names. The function itself is an assert against dividing by +`0`, then we name some bindings _without_ giving them a value, we make the asm +call, and then return what we got. ```rust pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) { From f6ad545905d79a40f8caf5685ec7094ffec84e55 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sun, 16 Dec 2018 11:04:56 -0700 Subject: [PATCH 18/33] warning update --- book/src/02-concepts/02-bios.md | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/book/src/02-concepts/02-bios.md b/book/src/02-concepts/02-bios.md index 5bcd216..44e5391 100644 --- a/book/src/02-concepts/02-bios.md +++ b/book/src/02-concepts/02-bios.md @@ -47,10 +47,11 @@ explanation, try commenting out that whole inline asm use and see if it's causing the problem. Double check that you've written every single part of the asm call absolutely correctly, etc, etc. -**Bonus Warning:** The following formatting information is specific to the ARM -flavor of assembly. If you're using `x86` / `x86_64` or `MIPS` or whatever else -then you're unfortunately on your own, I have no first hand knowledge of how to -correctly call the `asm!` macro on those platforms. +**Bonus Warning:** The general information that follows regarding the asm macro +is consistent from system to system, but specific information about register +names, register quantities, asm instruction argument ordering, and so on is +specific to ARM on the GBA. If you're programming for any other device you'll +need to carefully investigate that before you begin. Now then, with those out of the way, the inline asm docs describe an asm call as looking like this: From 09b936262f0fcffacb1fc76fb086d80239e6790a Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sun, 16 Dec 2018 15:17:30 -0700 Subject: [PATCH 19/33] more BIOS --- book/src/02-concepts/02-bios.md | 305 ++++++++++++++++++++++++++++++-- src/bios.rs | 182 +++++++++++++++++++ src/lib.rs | 59 +----- 3 files changed, 473 insertions(+), 73 deletions(-) create mode 100644 src/bios.rs diff --git a/book/src/02-concepts/02-bios.md b/book/src/02-concepts/02-bios.md index 44e5391..e949d5e 100644 --- a/book/src/02-concepts/02-bios.md +++ b/book/src/02-concepts/02-bios.md @@ -127,8 +127,7 @@ An output constraint starts with a symbol: Followed by _either_ the letter `r` (if you want LLVM to pick the register to use) or curly braces around a specific register (if you want to pick). -* The binding can be any 32-bit sized binding in scope (`i32`, `u32`, `isize`, - `usize`, etc). +* The binding can be any single 32-bit or smaller value. * If your binding has bit pattern requirements ("must be non-zero", etc) you are responsible for upholding that. * If your binding type will try to `Drop` later then you are responsible for it @@ -147,7 +146,7 @@ This is a similar comma separated list. An input constraint doesn't have the symbol prefix, you just pick either `r` or a named register with curly braces around it. -* An input binding must be 32-bit sized. +* An input binding must be a single 32-bit or smaller value. * An input binding _should_ be a type that is `Copy` but this is not an absolute requirement. Having the input be read is semantically similar to using `core::ptr::read(&binding)` and forgetting the value when you're done. @@ -167,12 +166,11 @@ Failure to define all of your clobbers can cause UB. ### Options -There's only one option we'd care to specify, and we don't even always need it. -That option is "volatile". +There's only one option we'd care to specify. That option is "volatile". Just like with a function call, LLVM will skip a block of asm if it doesn't see -that any outputs from the asm were used later on. A lot of our BIOS calls will -need to be declared "volatile" because to LLVM they don't seem to do anything. +that any outputs from the asm were used later on. Nearly every single BIOS call +(other than the math operations) will need to be marked as "volatile". ### BIOS ASM @@ -190,15 +188,8 @@ to invoke. If you're in 16-bit code you use the value directly, and if you're in ### Example BIOS Function: Division -The GBA doesn't have hardware division. You have to do it in software. - -We could potentially implement this in Rust (we might get around to trying that, -I was even sent [a link to a -paper](https://www.microsoft.com/en-us/research/wp-content/uploads/2008/08/tr-2008-141.pdf) -that I promptly did not actually read right away), or you can call the BIOS to -do it for you and trust that big N did a good enough job. - -GBATEK gives a fairly clear explanation of our inputs and outputs: +For our example we'll use the division function, because GBATEK gives very clear +instructions on how each register is used with that one: ```txt Signed Division, r0/r1. @@ -214,7 +205,7 @@ The function usually gets caught in an endless loop upon division by zero. The math folks tell me that the `r1` value should be properly called the "remainder" not the "modulus". We'll go with that for our function, doesn't hurt -to use the correct names. The function itself is an assert against dividing by +to use the correct names. Our Rust function has an assert against dividing by `0`, then we name some bindings _without_ giving them a value, we make the asm call, and then return what we got. @@ -235,4 +226,282 @@ pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) { } ``` -I _hope_ this makes sense by now. +I _hope_ this all makes sense by now. + +# BIOS Function Definitions + +What follows is one entry for every BIOS call function, sorted by `swi` value +(which also _kinda_ sorts them into themed groups too). + +All functions here are marked with `#[inline(always)]`, which I wouldn't +normally bother with, but the compiler can't see that the ASM we use is +immediately a second function call, so we want to be very sure that it gets +inlined as much as possible. You should probably be using Link Time Optimization +in your release mode GBA games just to get that extra boost, but +`#[inline(always)]` will help keep debug builds going at a good speed too. + +The entries here in the book are basically just copy pasting the source for each +function from the `gba::bios` module of the crate. The actual asm invocation +itself is uninteresting, but I've attempted to make the documentation for each +function clear and complete. + +## CPU Control / Reset + +### Soft Reset (0x00) + +```rust +/// (`swi 0x00`) SoftReset the device. +/// +/// This function does not ever return. +/// +/// Instead, it clears the top `0x200` bytes of IWRAM (containing stacks, and +/// BIOS IRQ vector/flags), re-initializes the system, supervisor, and irq stack +/// pointers (new values listed below), sets `r0` through `r12`, `LR_svc`, +/// `SPSR_svc`, `LR_irq`, and `SPSR_irq` to zero, and enters system mode. The +/// return address is loaded into `r14` and then the function jumps there with +/// `bx r14`. +/// +/// * sp_svc: `0x300_7FE0` +/// * sp_irq: `0x300_7FA0` +/// * sp_sys: `0x300_7F00` +/// * Zero-filled Area: `0x300_7E00` to `0x300_7FFF` +/// * Return Address: Depends on the 8-bit flag value at `0x300_7FFA`. In either +/// case execution proceeds in ARM mode. +/// * zero flag: `0x800_0000` (ROM), which for our builds means that the +/// `crt0` program to execute (just like with a fresh boot), and then +/// control passes into `main` and so on. +/// * non-zero flag: `0x200_0000` (RAM), This is where a multiboot image would +/// go if you were doing a multiboot thing. However, this project doesn't +/// support multiboot at the moment. You'd need an entirely different build +/// pipeline because there's differences in header format and things like +/// that. Perhaps someday, but probably not even then. Submit the PR for it +/// if you like! +/// +/// ## Safety +/// +/// This functions isn't ever unsafe to the current iteration of the program. +/// However, because not all memory is fully cleared you theoretically could +/// threaten the _next_ iteration of the program that runs. I'm _fairly_ +/// convinced that you can't actually use this to force purely safe code to +/// perform UB, but such a scenario might exist. +#[inline(always)] +pub unsafe fn soft_reset() -> ! { + asm!(/* ASM */ "swi 0x00" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + core::hint::unreachable_unchecked() +} +``` + +### Register / RAM Reset (0x01) + +```rust +/// (`swi 0x01`) RegisterRamReset. +/// +/// Clears the portions of memory given by the `flags` value, sets the Display +/// Control Register to `0x80` (forced blank and nothing else), then returns. +/// +/// * Flag bits: +/// 0) Clears the 256k of EWRAM (don't use if this is where your function call +/// will return to!) +/// 1) Clears the 32k of IWRAM _excluding_ the last `0x200` bytes (see also: +/// the `soft_reset` function). +/// 2) Clears all Palette data. +/// 3) Clears all VRAM. +/// 4) Clears all OAM (reminder: a zeroed obj isn't disabled!) +/// 5) Reset SIO registers (resets them to general purpose mode) +/// 6) Reset Sound registers +/// 7) Reset all IO registers _other than_ SIO and Sound +/// +/// **Bug:** The least significant byte of `SIODATA32` is always zeroed, even if +/// bit 5 was not enabled. This is sadly a bug in the design of the GBA itself. +/// +/// ## Safety +/// +/// It is generally a safe operation to suddenly clear any part of the GBA's +/// memory, except in the case that you were executing out of IWRAM and clear +/// that. If you do that you return to nothing and have a bad time. +#[inline(always)] +pub unsafe fn register_ram_reset(flags: u8) { + asm!(/* ASM */ "swi 0x01" + :/* OUT */ // none + :/* INP */ "{r0}"(flags) + :/* CLO */ // none + :/* OPT */ "volatile" + ); +} +//TODO(lokathor): newtype this flag business. +``` + +### Halt (0x02) +### Stop / Sleep (0x03) +### Interrupt Wait (0x04) +### VBlank Interrupt Wait (0x05) + +## Math + +For the math functions to make sense you'll want to be familiar with the fixed +point math concepts from the [Fixed Only](../01-quirks/02-fixed_only.md) section +of the Quirks chapter. + +### Div (0x06) + +```rust +/// (`swi 0x06`) Software Division and Remainder. +/// +/// ## Panics +/// +/// If the denominator is 0. +#[inline(always)] +pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) { + assert!(denominator != 0); + let div_out: i32; + let rem_out: i32; + unsafe { + asm!(/* ASM */ "swi 0x06" + :/* OUT */ "={r0}"(div_out), "={r1}"(rem_out) + :/* INP */ "{r0}"(numerator), "{r1}"(denominator) + :/* CLO */ "r3" + :/* OPT */ + ); + } + (div_out, rem_out) +} + +/// As `div_rem`, but keeping only the `div` part. +#[inline(always)] +pub fn div(numerator: i32, denominator: i32) -> i32 { + div_rem(numerator, denominator).0 +} + +/// As `div_rem`, but keeping only the `rem` part. +#[inline(always)] +pub fn rem(numerator: i32, denominator: i32) -> i32 { + div_rem(numerator, denominator).1 +} +``` + +### DivArm (0x07) + +This is exactly like Div, but with the input arguments swapped. It ends up being +exactly 3 cycles slower than normal Div because it swaps the input arguments to +the positions that Div is expecting ("move r0 -> r3, mov r1 -> r0, mov r3 -> +r1") and then goes to the normal Div function. + +You can basically forget about this function. It's for compatibility with other +ARM software conventions, which we don't need. Just use normal Div. + +### Sqrt (0x08) + +```rust +/// (`swi 0x08`) Integer square root. +/// +/// If you want more fractional precision, you can shift your input to the left +/// by `2n` bits to get `n` more bits of fractional precision in your output. +#[inline(always)] +pub fn sqrt(val: u32) -> u16 { + let out: u16; + unsafe { + asm!(/* ASM */ "swi 0x08" + :/* OUT */ "={r0}"(out) + :/* INP */ "{r0}"(val) + :/* CLO */ "r1", "r3" + :/* OPT */ + ); + } + out +} +``` + +### ArcTan (0x09) + +```rust +/// (`swi 0x09`) Gives the arctangent of `theta`. +/// +/// The input format is 1 bit for sign, 1 bit for integral part, 14 bits for +/// fractional part. +/// +/// Accuracy suffers if `theta` is less than `-pi/4` or greater than `pi/4`. +#[inline(always)] +pub fn atan(theta: i16) -> i16 { + let out: i16; + unsafe { + asm!(/* ASM */ "swi 0x09" + :/* OUT */ "={r0}"(out) + :/* INP */ "{r0}"(theta) + :/* CLO */ "r1", "r3" + :/* OPT */ + ); + } + out +} +``` + +### ArcTan2 (0x0A) + +```rust +/// (`swi 0x0A`) Gives the atan2 of `y` over `x`. +/// +/// The output `theta` value maps into the range `[0, 2pi)`, or `0 .. 2pi` if +/// you prefer Rust's range notation. +/// +/// `y` and `x` use the same format as with `atan`: 1 bit for sign, 1 bit for +/// integral, 14 bits for fractional. +#[inline(always)] +pub fn atan2(y: i16, x: i16) -> u16 { + let out: u16; + unsafe { + asm!(/* ASM */ "swi 0x0A" + :/* OUT */ "={r0}"(out) + :/* INP */ "{r0}"(x), "{r1}"(y) + :/* CLO */ "r3" + :/* OPT */ + ); + } + out +} +``` + +## Memory Modification + +### CPU Set (0x08) +### CPU Fast Set (0x0C) +### Get BIOS Checksum (0x0D) +### BG Affine Set (0x0E) +### Obj Affine Set (0x0F) + +## Decompression + +### BitUnPack (0x10) +### LZ77UnCompReadNormalWrite8bit (0x11) +### LZ77UnCompReadNormalWrite16bit (0x12) +### HuffUnCompReadNormal (0x13) +### RLUnCompReadNormalWrite8bit (0x14) +### RLUnCompReadNormalWrite16bit (0x15) +### Diff8bitUnFilterWrite8bit (0x16) +### Diff8bitUnFilterWrite16bit (0x17) +### Diff16bitUnFilter (0x18) + +## Sound + +### SoundBias (0x19) +### SoundDriverInit (0x1A) +### SoundDriverMode (0x1B) +### SoundDriverMain (0x1C) +### SoundDriverVSync (0x1D) +### SoundChannelClear (0x1E) +### MidiKey2Freq (0x1F) +### SoundWhatever0 (0x20) +### SoundWhatever1 (0x21) +### SoundWhatever2 (0x22) +### SoundWhatever3 (0x23) +### SoundWhatever4 (0x24) +### MultiBoot (0x25) +### HardReset (0x26) +### CustomHalt (0x27) +### SoundDriverVSyncOff (0x28) +### SoundDriverVSyncOn (0x29) +### SoundGetJumpList (0x2A) diff --git a/src/bios.rs b/src/bios.rs new file mode 100644 index 0000000..64d202c --- /dev/null +++ b/src/bios.rs @@ -0,0 +1,182 @@ +//! This module contains wrappers for all GBA BIOS function calls. +//! +//! A GBA BIOS call has significantly more overhead than a normal function call, +//! so think carefully before using them too much. +//! +//! The actual content of each function here is generally a single inline asm +//! instruction to invoke the correct BIOS function (`swi x`, with `x` being +//! whatever value is necessary for that function). Some functions also perform +//! necessary checks to save you from yourself, such as not dividing by zero. + +/// (`swi 0x00`) SoftReset the device. +/// +/// This function does not ever return. +/// +/// Instead, it clears the top `0x200` bytes of IWRAM (containing stacks, and +/// BIOS IRQ vector/flags), re-initializes the system, supervisor, and irq stack +/// pointers (new values listed below), sets `r0` through `r12`, `LR_svc`, +/// `SPSR_svc`, `LR_irq`, and `SPSR_irq` to zero, and enters system mode. The +/// return address is loaded into `r14` and then the function jumps there with +/// `bx r14`. +/// +/// * sp_svc: `0x300_7FE0` +/// * sp_irq: `0x300_7FA0` +/// * sp_sys: `0x300_7F00` +/// * Zero-filled Area: `0x300_7E00` to `0x300_7FFF` +/// * Return Address: Depends on the 8-bit flag value at `0x300_7FFA`. In either +/// case execution proceeds in ARM mode. +/// * zero flag: `0x800_0000` (ROM), which for our builds means that the +/// `crt0` program to execute (just like with a fresh boot), and then +/// control passes into `main` and so on. +/// * non-zero flag: `0x200_0000` (RAM), This is where a multiboot image would +/// go if you were doing a multiboot thing. However, this project doesn't +/// support multiboot at the moment. You'd need an entirely different build +/// pipeline because there's differences in header format and things like +/// that. Perhaps someday, but probably not even then. Submit the PR for it +/// if you like! +/// +/// ## Safety +/// +/// This functions isn't ever unsafe to the current iteration of the program. +/// However, because not all memory is fully cleared you theoretically could +/// threaten the _next_ iteration of the program that runs. I'm _fairly_ +/// convinced that you can't actually use this to force purely safe code to +/// perform UB, but such a scenario might exist. +#[inline(always)] +pub unsafe fn soft_reset() -> ! { + asm!(/* ASM */ "swi 0x00" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + core::hint::unreachable_unchecked() +} + +/// (`swi 0x01`) RegisterRamReset. +/// +/// Clears the portions of memory given by the `flags` value, sets the Display +/// Control Register to `0x80` (forced blank and nothing else), then returns. +/// +/// * Flag bits: +/// 0) Clears the 256k of EWRAM (don't use if this is where your function call +/// will return to!) +/// 1) Clears the 32k of IWRAM _excluding_ the last `0x200` bytes (see also: +/// the `soft_reset` function). +/// 2) Clears all Palette data. +/// 3) Clears all VRAM. +/// 4) Clears all OAM (reminder: a zeroed obj isn't disabled!) +/// 5) Reset SIO registers (resets them to general purpose mode) +/// 6) Reset Sound registers +/// 7) Reset all IO registers _other than_ SIO and Sound +/// +/// **Bug:** The least significant byte of `SIODATA32` is always zeroed, even if +/// bit 5 was not enabled. This is sadly a bug in the design of the GBA itself. +/// +/// ## Safety +/// +/// It is generally a safe operation to suddenly clear any part of the GBA's +/// memory, except in the case that you were executing out of IWRAM and clear +/// that. If you do that you return to nothing and have a bad time. +#[inline(always)] +pub unsafe fn register_ram_reset(flags: u8) { + asm!(/* ASM */ "swi 0x01" + :/* OUT */ // none + :/* INP */ "{r0}"(flags) + :/* CLO */ // none + :/* OPT */ "volatile" + ); +} +//TODO(lokathor): newtype this flag business. + +/// (`swi 0x06`) Software Division and Remainder. +/// +/// ## Panics +/// +/// If the denominator is 0. +#[inline(always)] +pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) { + assert!(denominator != 0); + let div_out: i32; + let rem_out: i32; + unsafe { + asm!(/* ASM */ "swi 0x06" + :/* OUT */ "={r0}"(div_out), "={r1}"(rem_out) + :/* INP */ "{r0}"(numerator), "{r1}"(denominator) + :/* CLO */ "r3" + :/* OPT */ + ); + } + (div_out, rem_out) +} + +/// As `div_rem`, but keeping only the `div` part. +#[inline(always)] +pub fn div(numerator: i32, denominator: i32) -> i32 { + div_rem(numerator, denominator).0 +} + +/// As `div_rem`, but keeping only the `rem` part. +#[inline(always)] +pub fn rem(numerator: i32, denominator: i32) -> i32 { + div_rem(numerator, denominator).1 +} + +/// (`swi 0x08`) Integer square root. +/// +/// If you want more fractional precision, you can shift your input to the left +/// by `2n` bits to get `n` more bits of fractional precision in your output. +#[inline(always)] +pub fn sqrt(val: u32) -> u16 { + let out: u16; + unsafe { + asm!(/* ASM */ "swi 0x08" + :/* OUT */ "={r0}"(out) + :/* INP */ "{r0}"(val) + :/* CLO */ "r1", "r3" + :/* OPT */ + ); + } + out +} + +/// (`swi 0x09`) Gives the arctangent of `theta`. +/// +/// The input format is 1 bit for sign, 1 bit for integral part, 14 bits for +/// fractional part. +/// +/// Accuracy suffers if `theta` is less than `-pi/4` or greater than `pi/4`. +#[inline(always)] +pub fn atan(theta: i16) -> i16 { + let out: i16; + unsafe { + asm!(/* ASM */ "swi 0x09" + :/* OUT */ "={r0}"(out) + :/* INP */ "{r0}"(theta) + :/* CLO */ "r1", "r3" + :/* OPT */ + ); + } + out +} + +/// (`swi 0x0A`) Gives the atan2 of `y` over `x`. +/// +/// The output `theta` value maps into the range `[0, 2pi)`, or `0 .. 2pi` if +/// you prefer Rust's range notation. +/// +/// `y` and `x` use the same format as with `atan`: 1 bit for sign, 1 bit for +/// integral, 14 bits for fractional. +#[inline(always)] +pub fn atan2(y: i16, x: i16) -> u16 { + let out: u16; + unsafe { + asm!(/* ASM */ "swi 0x0A" + :/* OUT */ "={r0}"(out) + :/* INP */ "{r0}"(x), "{r1}"(y) + :/* CLO */ "r3" + :/* OPT */ + ); + } + out +} diff --git a/src/lib.rs b/src/lib.rs index 781603b..7b53307 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ #![cfg_attr(not(test), no_std)] #![cfg_attr(not(test), feature(asm))] #![warn(missing_docs)] -#![allow(clippy::cast_lossless)] +//#![allow(clippy::cast_lossless)] #![deny(clippy::float_arithmetic)] //! This crate helps you write GBA ROMs. @@ -28,6 +28,9 @@ pub mod core_extras; pub(crate) use crate::core_extras::*; +#[cfg(not(test))] +pub mod bios; + pub mod io_registers; pub mod video_ram; @@ -37,57 +40,3 @@ pub(crate) use crate::video_ram::*; pub const fn rgb16(red: u16, green: u16, blue: u16) -> u16 { blue << 10 | green << 5 | red } - -/// BIOS Call: Div (GBA SWI 0x06). -/// -/// Gives just the DIV output of `numerator / denominator`. -/// -/// # Panics -/// -/// If `denominator` is 0. -#[inline] -pub fn div(numerator: i32, denominator: i32) -> i32 { - div_rem(numerator, denominator).0 -} - -/// BIOS Call: Div (GBA SWI 0x06). -/// -/// Gives just the MOD output of `numerator / denominator`. -/// -/// # Panics -/// -/// If `denominator` is 0. -#[inline] -pub fn rem(numerator: i32, denominator: i32) -> i32 { - div_rem(numerator, denominator).1 -} - -/// BIOS Call: Div (GBA SWI 0x06). -/// -/// Gives both the DIV and REM output of `numerator / denominator`. -/// -/// # Panics -/// -/// If `denominator` is 0. -#[inline] -pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) { - assert!(denominator != 0); - #[cfg(not(test))] - { - let div_out: i32; - let mod_out: i32; - unsafe { - asm!(/* assembly template */ "swi 0x06" - :/* output operands */ "={r0}"(div_out), "={r1}"(mod_out) - :/* input operands */ "{r0}"(numerator), "{r1}"(denominator) - :/* clobbers */ "r3" - :/* options */ - ); - } - (div_out, mod_out) - } - #[cfg(test)] - { - (numerator / denominator, numerator % denominator) - } -} From 74d50b9218a2586f58a2042efce6e7e425e627d1 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sun, 16 Dec 2018 15:20:07 -0700 Subject: [PATCH 20/33] typos --- src/bios.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bios.rs b/src/bios.rs index 64d202c..6c1e1a7 100644 --- a/src/bios.rs +++ b/src/bios.rs @@ -65,7 +65,7 @@ pub unsafe fn soft_reset() -> ! { /// the `soft_reset` function). /// 2) Clears all Palette data. /// 3) Clears all VRAM. -/// 4) Clears all OAM (reminder: a zeroed obj isn't disabled!) +/// 4) Clears all OAM (reminder: a zeroed object isn't disabled!) /// 5) Reset SIO registers (resets them to general purpose mode) /// 6) Reset Sound registers /// 7) Reset all IO registers _other than_ SIO and Sound @@ -76,8 +76,8 @@ pub unsafe fn soft_reset() -> ! { /// ## Safety /// /// It is generally a safe operation to suddenly clear any part of the GBA's -/// memory, except in the case that you were executing out of IWRAM and clear -/// that. If you do that you return to nothing and have a bad time. +/// memory, except in the case that you were executing out of EWRAM and clear +/// that. If you do then you return to nothing and have a bad time. #[inline(always)] pub unsafe fn register_ram_reset(flags: u8) { asm!(/* ASM */ "swi 0x01" From 5408a11f54d1067e7b8df538187e25363ed70b89 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sun, 16 Dec 2018 15:22:27 -0700 Subject: [PATCH 21/33] bit/byte mixup --- src/bios.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/bios.rs b/src/bios.rs index 6c1e1a7..196d0b0 100644 --- a/src/bios.rs +++ b/src/bios.rs @@ -62,16 +62,16 @@ pub unsafe fn soft_reset() -> ! { /// 0) Clears the 256k of EWRAM (don't use if this is where your function call /// will return to!) /// 1) Clears the 32k of IWRAM _excluding_ the last `0x200` bytes (see also: -/// the `soft_reset` function). -/// 2) Clears all Palette data. -/// 3) Clears all VRAM. +/// the `soft_reset` function) +/// 2) Clears all Palette data +/// 3) Clears all VRAM /// 4) Clears all OAM (reminder: a zeroed object isn't disabled!) /// 5) Reset SIO registers (resets them to general purpose mode) /// 6) Reset Sound registers /// 7) Reset all IO registers _other than_ SIO and Sound /// -/// **Bug:** The least significant byte of `SIODATA32` is always zeroed, even if -/// bit 5 was not enabled. This is sadly a bug in the design of the GBA itself. +/// **Bug:** The LSB of `SIODATA32` is always zeroed, even if bit 5 was not +/// enabled. This is sadly a bug in the design of the GBA itself. /// /// ## Safety /// From c892ac96815fbfa0be76c0aff0eadc596923e633 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sun, 16 Dec 2018 20:54:50 -0700 Subject: [PATCH 22/33] Once we use one proc-macro, we might use them all --- src/macros.rs | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 src/macros.rs diff --git a/src/macros.rs b/src/macros.rs deleted file mode 100644 index 3a38290..0000000 --- a/src/macros.rs +++ /dev/null @@ -1,8 +0,0 @@ -//! Module for all macros. -//! -//! Macros are the only thing in Rust where declaration order matters, so we -//! place all of them here regardless of what they do so that the macros module -//! can appear at the "top" of the library and all other modules can see them -//! properly. - -// no macros yet! From 71a2de023fc647ce1fdada6178cf326d56091708 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sun, 16 Dec 2018 20:55:02 -0700 Subject: [PATCH 23/33] all sorts of new bios --- book/src/02-concepts/02-bios.md | 282 +-------------------------- src/bios.rs | 330 +++++++++++++++++++++++++++++++- 2 files changed, 335 insertions(+), 277 deletions(-) diff --git a/book/src/02-concepts/02-bios.md b/book/src/02-concepts/02-bios.md index e949d5e..d3f942f 100644 --- a/book/src/02-concepts/02-bios.md +++ b/book/src/02-concepts/02-bios.md @@ -228,280 +228,12 @@ pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) { I _hope_ this all makes sense by now. -# BIOS Function Definitions +## All The BIOS Functions -What follows is one entry for every BIOS call function, sorted by `swi` value -(which also _kinda_ sorts them into themed groups too). +As for a full list of all the specific BIOS functions and their use, you should +check the `gba::bios` module within the `gba` crate. There's just so many of +them that enumerating them all here wouldn't serve much purpose. -All functions here are marked with `#[inline(always)]`, which I wouldn't -normally bother with, but the compiler can't see that the ASM we use is -immediately a second function call, so we want to be very sure that it gets -inlined as much as possible. You should probably be using Link Time Optimization -in your release mode GBA games just to get that extra boost, but -`#[inline(always)]` will help keep debug builds going at a good speed too. - -The entries here in the book are basically just copy pasting the source for each -function from the `gba::bios` module of the crate. The actual asm invocation -itself is uninteresting, but I've attempted to make the documentation for each -function clear and complete. - -## CPU Control / Reset - -### Soft Reset (0x00) - -```rust -/// (`swi 0x00`) SoftReset the device. -/// -/// This function does not ever return. -/// -/// Instead, it clears the top `0x200` bytes of IWRAM (containing stacks, and -/// BIOS IRQ vector/flags), re-initializes the system, supervisor, and irq stack -/// pointers (new values listed below), sets `r0` through `r12`, `LR_svc`, -/// `SPSR_svc`, `LR_irq`, and `SPSR_irq` to zero, and enters system mode. The -/// return address is loaded into `r14` and then the function jumps there with -/// `bx r14`. -/// -/// * sp_svc: `0x300_7FE0` -/// * sp_irq: `0x300_7FA0` -/// * sp_sys: `0x300_7F00` -/// * Zero-filled Area: `0x300_7E00` to `0x300_7FFF` -/// * Return Address: Depends on the 8-bit flag value at `0x300_7FFA`. In either -/// case execution proceeds in ARM mode. -/// * zero flag: `0x800_0000` (ROM), which for our builds means that the -/// `crt0` program to execute (just like with a fresh boot), and then -/// control passes into `main` and so on. -/// * non-zero flag: `0x200_0000` (RAM), This is where a multiboot image would -/// go if you were doing a multiboot thing. However, this project doesn't -/// support multiboot at the moment. You'd need an entirely different build -/// pipeline because there's differences in header format and things like -/// that. Perhaps someday, but probably not even then. Submit the PR for it -/// if you like! -/// -/// ## Safety -/// -/// This functions isn't ever unsafe to the current iteration of the program. -/// However, because not all memory is fully cleared you theoretically could -/// threaten the _next_ iteration of the program that runs. I'm _fairly_ -/// convinced that you can't actually use this to force purely safe code to -/// perform UB, but such a scenario might exist. -#[inline(always)] -pub unsafe fn soft_reset() -> ! { - asm!(/* ASM */ "swi 0x00" - :/* OUT */ // none - :/* INP */ // none - :/* CLO */ // none - :/* OPT */ "volatile" - ); - core::hint::unreachable_unchecked() -} -``` - -### Register / RAM Reset (0x01) - -```rust -/// (`swi 0x01`) RegisterRamReset. -/// -/// Clears the portions of memory given by the `flags` value, sets the Display -/// Control Register to `0x80` (forced blank and nothing else), then returns. -/// -/// * Flag bits: -/// 0) Clears the 256k of EWRAM (don't use if this is where your function call -/// will return to!) -/// 1) Clears the 32k of IWRAM _excluding_ the last `0x200` bytes (see also: -/// the `soft_reset` function). -/// 2) Clears all Palette data. -/// 3) Clears all VRAM. -/// 4) Clears all OAM (reminder: a zeroed obj isn't disabled!) -/// 5) Reset SIO registers (resets them to general purpose mode) -/// 6) Reset Sound registers -/// 7) Reset all IO registers _other than_ SIO and Sound -/// -/// **Bug:** The least significant byte of `SIODATA32` is always zeroed, even if -/// bit 5 was not enabled. This is sadly a bug in the design of the GBA itself. -/// -/// ## Safety -/// -/// It is generally a safe operation to suddenly clear any part of the GBA's -/// memory, except in the case that you were executing out of IWRAM and clear -/// that. If you do that you return to nothing and have a bad time. -#[inline(always)] -pub unsafe fn register_ram_reset(flags: u8) { - asm!(/* ASM */ "swi 0x01" - :/* OUT */ // none - :/* INP */ "{r0}"(flags) - :/* CLO */ // none - :/* OPT */ "volatile" - ); -} -//TODO(lokathor): newtype this flag business. -``` - -### Halt (0x02) -### Stop / Sleep (0x03) -### Interrupt Wait (0x04) -### VBlank Interrupt Wait (0x05) - -## Math - -For the math functions to make sense you'll want to be familiar with the fixed -point math concepts from the [Fixed Only](../01-quirks/02-fixed_only.md) section -of the Quirks chapter. - -### Div (0x06) - -```rust -/// (`swi 0x06`) Software Division and Remainder. -/// -/// ## Panics -/// -/// If the denominator is 0. -#[inline(always)] -pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) { - assert!(denominator != 0); - let div_out: i32; - let rem_out: i32; - unsafe { - asm!(/* ASM */ "swi 0x06" - :/* OUT */ "={r0}"(div_out), "={r1}"(rem_out) - :/* INP */ "{r0}"(numerator), "{r1}"(denominator) - :/* CLO */ "r3" - :/* OPT */ - ); - } - (div_out, rem_out) -} - -/// As `div_rem`, but keeping only the `div` part. -#[inline(always)] -pub fn div(numerator: i32, denominator: i32) -> i32 { - div_rem(numerator, denominator).0 -} - -/// As `div_rem`, but keeping only the `rem` part. -#[inline(always)] -pub fn rem(numerator: i32, denominator: i32) -> i32 { - div_rem(numerator, denominator).1 -} -``` - -### DivArm (0x07) - -This is exactly like Div, but with the input arguments swapped. It ends up being -exactly 3 cycles slower than normal Div because it swaps the input arguments to -the positions that Div is expecting ("move r0 -> r3, mov r1 -> r0, mov r3 -> -r1") and then goes to the normal Div function. - -You can basically forget about this function. It's for compatibility with other -ARM software conventions, which we don't need. Just use normal Div. - -### Sqrt (0x08) - -```rust -/// (`swi 0x08`) Integer square root. -/// -/// If you want more fractional precision, you can shift your input to the left -/// by `2n` bits to get `n` more bits of fractional precision in your output. -#[inline(always)] -pub fn sqrt(val: u32) -> u16 { - let out: u16; - unsafe { - asm!(/* ASM */ "swi 0x08" - :/* OUT */ "={r0}"(out) - :/* INP */ "{r0}"(val) - :/* CLO */ "r1", "r3" - :/* OPT */ - ); - } - out -} -``` - -### ArcTan (0x09) - -```rust -/// (`swi 0x09`) Gives the arctangent of `theta`. -/// -/// The input format is 1 bit for sign, 1 bit for integral part, 14 bits for -/// fractional part. -/// -/// Accuracy suffers if `theta` is less than `-pi/4` or greater than `pi/4`. -#[inline(always)] -pub fn atan(theta: i16) -> i16 { - let out: i16; - unsafe { - asm!(/* ASM */ "swi 0x09" - :/* OUT */ "={r0}"(out) - :/* INP */ "{r0}"(theta) - :/* CLO */ "r1", "r3" - :/* OPT */ - ); - } - out -} -``` - -### ArcTan2 (0x0A) - -```rust -/// (`swi 0x0A`) Gives the atan2 of `y` over `x`. -/// -/// The output `theta` value maps into the range `[0, 2pi)`, or `0 .. 2pi` if -/// you prefer Rust's range notation. -/// -/// `y` and `x` use the same format as with `atan`: 1 bit for sign, 1 bit for -/// integral, 14 bits for fractional. -#[inline(always)] -pub fn atan2(y: i16, x: i16) -> u16 { - let out: u16; - unsafe { - asm!(/* ASM */ "swi 0x0A" - :/* OUT */ "={r0}"(out) - :/* INP */ "{r0}"(x), "{r1}"(y) - :/* CLO */ "r3" - :/* OPT */ - ); - } - out -} -``` - -## Memory Modification - -### CPU Set (0x08) -### CPU Fast Set (0x0C) -### Get BIOS Checksum (0x0D) -### BG Affine Set (0x0E) -### Obj Affine Set (0x0F) - -## Decompression - -### BitUnPack (0x10) -### LZ77UnCompReadNormalWrite8bit (0x11) -### LZ77UnCompReadNormalWrite16bit (0x12) -### HuffUnCompReadNormal (0x13) -### RLUnCompReadNormalWrite8bit (0x14) -### RLUnCompReadNormalWrite16bit (0x15) -### Diff8bitUnFilterWrite8bit (0x16) -### Diff8bitUnFilterWrite16bit (0x17) -### Diff16bitUnFilter (0x18) - -## Sound - -### SoundBias (0x19) -### SoundDriverInit (0x1A) -### SoundDriverMode (0x1B) -### SoundDriverMain (0x1C) -### SoundDriverVSync (0x1D) -### SoundChannelClear (0x1E) -### MidiKey2Freq (0x1F) -### SoundWhatever0 (0x20) -### SoundWhatever1 (0x21) -### SoundWhatever2 (0x22) -### SoundWhatever3 (0x23) -### SoundWhatever4 (0x24) -### MultiBoot (0x25) -### HardReset (0x26) -### CustomHalt (0x27) -### SoundDriverVSyncOff (0x28) -### SoundDriverVSyncOn (0x29) -### SoundGetJumpList (0x2A) +Which is not to say that we'll never cover any BIOS functions in this book! +Instead, we'll simply mention them when whenever they're relevent to the task at +hand (such as sound or waiting for vblank). diff --git a/src/bios.rs b/src/bios.rs index 196d0b0..c2ac39c 100644 --- a/src/bios.rs +++ b/src/bios.rs @@ -89,6 +89,84 @@ pub unsafe fn register_ram_reset(flags: u8) { } //TODO(lokathor): newtype this flag business. +/// (`swi 0x02`) Halts the CPU until an interrupt occurs. +/// +/// Components _other than_ the CPU continue to function. Halt mode ends when +/// any enabled interrupt triggers. +#[inline(always)] +pub fn halt() { + unsafe { + asm!(/* ASM */ "swi 0x02" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} + +/// (`swi 0x03`) Stops the CPU as well as most other components. +/// +/// Stop mode must be stopped by an interrupt, but can _only_ be stopped by a +/// Keypad, Game Pak, or General-Purpose-SIO interrupt. +/// +/// Before going into stop mode you should manually disable video and sound (or +/// they will continue to consume power), and you should also disable any other +/// optional externals such as rumble and infra-red. +#[inline(always)] +pub fn stop() { + unsafe { + asm!(/* ASM */ "swi 0x03" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} + +/// (`swi 0x04`) "IntrWait", similar to halt but with more options. +/// +/// * The first argument controls if you want to ignore all current flags and +/// wait until a new flag is set. +/// * The second argument is what flags you're waiting on (same format as the +/// IE/IF registers). +/// +/// If you're trying to handle more than one interrupt at once this has less +/// overhead than calling `halt` over and over. +/// +/// When using this routing your interrupt handler MUST update the BIOS +/// Interrupt Flags `0x300_7FF8` in addition to the usual interrupt +/// acknowledgement. +#[inline(always)] +pub fn interrupt_wait(ignore_current_flags: bool, target_flags: u16) { + unsafe { + asm!(/* ASM */ "swi 0x04" + :/* OUT */ // none + :/* INP */ "{r0}"(ignore_current_flags), "{r1}"(target_flags) + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} +//TODO(lokathor): newtype this flag business. + +/// (`swi 0x05`) "VBlankIntrWait", VBlank Interrupt Wait. +/// +/// This is as per `interrupt_wait(true, 1)` (aka "wait for a new vblank"). You +/// must follow the same guidelines that `interrupt_wait` outlines. +#[inline(always)] +pub fn vblank_interrupt_wait() { + unsafe { + asm!(/* ASM */ "swi 0x04" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ "r0", "r1" // both set to 1 by the routine + :/* OPT */ "volatile" + ); + } +} + /// (`swi 0x06`) Software Division and Remainder. /// /// ## Panics @@ -110,18 +188,22 @@ pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) { (div_out, rem_out) } -/// As `div_rem`, but keeping only the `div` part. +/// As `div_rem`, keeping only the `div` output. #[inline(always)] pub fn div(numerator: i32, denominator: i32) -> i32 { div_rem(numerator, denominator).0 } -/// As `div_rem`, but keeping only the `rem` part. +/// As `div_rem`, keeping only the `rem` output. #[inline(always)] pub fn rem(numerator: i32, denominator: i32) -> i32 { div_rem(numerator, denominator).1 } +// (`swi 0x07`): We deliberately don't implement this one. It's the same as DIV +// but with reversed arguments, so it just runs 3 cycles slower as it does the +// swap. + /// (`swi 0x08`) Integer square root. /// /// If you want more fractional precision, you can shift your input to the left @@ -180,3 +262,247 @@ pub fn atan2(y: i16, x: i16) -> u16 { } out } + +/// (`swi 0x0B`) "CpuSet", `u16` memory copy. +/// +/// * `count` is the number of `u16` values to copy (20 bits or less) +/// * `fixed_source` argument, if true, turns this copying routine into a +/// filling routine. +/// +/// ## Safety +/// +/// * Both pointers must be aligned +#[inline(always)] +pub unsafe fn cpu_set16(src: *const u16, dest: *mut u16, count: u32, fixed_source: bool) { + let control = count + ((fixed_source as u32) << 24); + asm!(/* ASM */ "swi 0x0B" + :/* OUT */ // none + :/* INP */ "{r0}"(src), "{r1}"(dest), "{r2}"(control) + :/* CLO */ // none + :/* OPT */ "volatile" + ); +} + +/// (`swi 0x0B`) "CpuSet", `u32` memory copy/fill. +/// +/// * `count` is the number of `u32` values to copy (20 bits or less) +/// * `fixed_source` argument, if true, turns this copying routine into a +/// filling routine. +/// +/// ## Safety +/// +/// * Both pointers must be aligned +#[inline(always)] +pub unsafe fn cpu_set32(src: *const u32, dest: *mut u32, count: u32, fixed_source: bool) { + let control = count + ((fixed_source as u32) << 24) + (1 << 26); + asm!(/* ASM */ "swi 0x0B" + :/* OUT */ // none + :/* INP */ "{r0}"(src), "{r1}"(dest), "{r2}"(control) + :/* CLO */ // none + :/* OPT */ "volatile" + ); +} + +/// (`swi 0x0C`) "CpuFastSet", copies memory in 32 byte chunks. +/// +/// * The `count` value is the number of `u32` values to transfer (20 bits or +/// less), and it's rounded up to the nearest multiple of 8 words. +/// * The `fixed_source` argument, if true, turns this copying routine into a +/// filling routine. +/// +/// ## Safety +/// +/// * Both pointers must be aligned +#[inline(always)] +pub unsafe fn cpu_fast_set(src: *const u32, dest: *mut u32, count: u32, fixed_source: bool) { + let control = count + ((fixed_source as u32) << 24); + asm!(/* ASM */ "swi 0x0C" + :/* OUT */ // none + :/* INP */ "{r0}"(src), "{r1}"(dest), "{r2}"(control) + :/* CLO */ // none + :/* OPT */ "volatile" + ); +} + +/// (`swi 0x0C`) "GetBiosChecksum" (Undocumented) +/// +/// Though we usually don't cover undocumented functionality, this one can make +/// it into the crate. +/// +/// The function computes the checksum of the BIOS data. You should get either +/// `0xBAAE_187F` (GBA / GBA SP) or `0xBAAE_1880` (DS in GBA mode). If you get +/// some other value I guess you're probably running on an emulator that just +/// broke the fourth wall. +pub fn get_bios_checksum() -> u32 { + let out: u32; + unsafe { + asm!(/* ASM */ "swi 0x0D" + :/* OUT */ "={r0}"(out) + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ // none + ); + } + out +} + +// TODO: these things will require that we build special structs + +//BgAffineSet +//ObjAffineSet +//BitUnPack +//LZ77UnCompReadNormalWrite8bit +//LZ77UnCompReadNormalWrite16bit +//HuffUnCompReadNormal +//RLUnCompReadNormalWrite8bit +//Diff8bitUnFilterWrite8bit +//Diff8bitUnFilterWrite16bit +//Diff16bitUnFilter + +/// (`swi 0x19`) "SoundBias", adjusts the volume level to a new level. +/// +/// This increases or decreases the current level of the `SOUNDBIAS` register +/// (with short delays) until at the new target level. The upper bits of the +/// register are unaffected. +/// +/// The final sound level setting will be `level` * `0x200`. +pub fn sound_bias(level: u32) { + unsafe { + asm!(/* ASM */ "swi 0x19" + :/* OUT */ // none + :/* INP */ "{r0}"(level) + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} + +//SoundDriverInit + +/// (`swi 0x1B`) "SoundDriverMode", sets the sound driver operation mode. +/// +/// The `mode` input uses the following flags and bits: +/// +/// * Bits 0-6: Reverb value +/// * Bit 7: Reverb Enable +/// * Bits 8-11: Simultaneously-produced channel count (default=8) +/// * Bits 12-15: Master Volume (1-15, default=15) +/// * Bits 16-19: Playback Frequency Index (see below, default=4) +/// * Bits 20-23: "Final number of D/A converter bits (8-11 = 9-6bits, def. 9=8bits)" TODO: what the hek? +/// * Bits 24 and up: Not used +/// +/// The frequency index selects a frequency from the following array: +/// * 0: 5734 +/// * 1: 7884 +/// * 2: 10512 +/// * 3: 13379 +/// * 4: 15768 +/// * 5: 18157 +/// * 6: 21024 +/// * 7: 26758 +/// * 8: 31536 +/// * 9: 36314 +/// * 10: 40137 +/// * 11: 42048 +pub fn sound_driver_mode(mode: u32) { + unsafe { + asm!(/* ASM */ "swi 0x1B" + :/* OUT */ // none + :/* INP */ "{r0}"(mode) + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} +//TODO(lokathor): newtype this mode business. + +/// (`swi 0x1C`) "SoundDriverMain", main of the sound driver +/// +/// You should call `SoundDriverVSync` immediately after the vblank interrupt +/// fires. +/// +/// "After that, this routine is called after BG and OBJ processing is +/// executed." --what? +#[inline(always)] +pub fn sound_driver_main() { + unsafe { + asm!(/* ASM */ "swi 0x1C" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} + +/// (`swi 0x1D`) "SoundDriverVSync", resets the sound DMA. +/// +/// The timing is critical, so you should call this _immediately_ after the +/// vblank interrupt (every 1/60th of a second). +#[inline(always)] +pub fn sound_driver_vsync() { + unsafe { + asm!(/* ASM */ "swi 0x1D" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} + +/// (`swi 0x1E`) "SoundChannelClear", clears the direct sound channels and stops +/// the sound. +/// +/// "This function may not operate properly when the library which expands the +/// sound driver feature is combined afterwards. In this case, do not use it." +/// --what? +#[inline(always)] +pub fn sound_channel_clear() { + unsafe { + asm!(/* ASM */ "swi 0x1E" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} + +//MidiKey2Freq +//MultiBoot + +/// (`swi 0x28`) "SoundDriverVSyncOff", disables sound +/// +/// If you can't use vblank interrupts to ensure that `sound_driver_vsync` is +/// called every 1/60th of a second for any reason you must use this function to +/// stop sound DMA. Otherwise the DMA will overrun its buffer and cause random +/// noise. +#[inline(always)] +pub fn sound_driver_vsync_off() { + unsafe { + asm!(/* ASM */ "swi 0x28" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} + +/// (`swi 0x29`) "SoundDriverVSyncOn", enables sound that was stopped by +/// `sound_driver_vsync_off`. +/// +/// Restarts sound DMA system. After restarting the sound you must have a vblank +/// interrupt followed by a `sound_driver_vsync` within 2/60th of a second. +#[inline(always)] +pub fn sound_driver_vsync_on() { + unsafe { + asm!(/* ASM */ "swi 0x29" + :/* OUT */ // none + :/* INP */ // none + :/* CLO */ // none + :/* OPT */ "volatile" + ); + } +} From 779770a187d1d88bedf4171210ae04741f447b00 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sun, 16 Dec 2018 20:55:53 -0700 Subject: [PATCH 24/33] cleanup and cleanup designations --- examples/hello_world.rs | 99 ++++++++++++++++++++++++----------------- src/core_extras.rs | 3 ++ src/lib.rs | 5 --- src/video_ram.rs | 2 + 4 files changed, 62 insertions(+), 47 deletions(-) diff --git a/examples/hello_world.rs b/examples/hello_world.rs index 87e6a1c..3866d46 100644 --- a/examples/hello_world.rs +++ b/examples/hello_world.rs @@ -20,50 +20,17 @@ macro_rules! const_assert { }; } -#[panic_handler] -fn panic(_info: &core::panic::PanicInfo) -> ! { - loop {} -} - -newtype! { - #[derive(Debug, Clone, Copy, PartialEq, Eq)] - Color, u16 -} - -pub const fn rgb(red: u16, green: u16, blue: u16) -> Color { - Color(blue << 10 | green << 5 | red) -} - -newtype! { - #[derive(Debug, Clone, Copy, PartialEq, Eq)] - DisplayControlSetting, u16 -} - -pub const DISPLAY_CONTROL: VolatilePtr = VolatilePtr(0x04000000 as *mut DisplayControlSetting); -pub const JUST_MODE3_AND_BG2: DisplayControlSetting = DisplayControlSetting(3 + 0b100_0000_0000); - -pub struct Mode3; - -impl Mode3 { - const SCREEN_WIDTH: isize = 240; - const PIXELS: VolatilePtr = VolatilePtr(0x600_0000 as *mut Color); - - pub unsafe fn draw_pixel_unchecked(col: isize, row: isize, color: Color) { - Self::PIXELS.offset(col + row * Self::SCREEN_WIDTH).write(color); - } -} - -#[start] -fn main(_argc: isize, _argv: *const *const u8) -> isize { - unsafe { - DISPLAY_CONTROL.write(JUST_MODE3_AND_BG2); - Mode3::draw_pixel_unchecked(120, 80, rgb(31, 0, 0)); - Mode3::draw_pixel_unchecked(136, 80, rgb(0, 31, 0)); - Mode3::draw_pixel_unchecked(120, 96, rgb(0, 0, 31)); - loop {} - } +#[macro_export] +macro_rules! const_rgb { + ($r:expr, $g:expr, $b:expr) => {{ + const_assert!($r); + const_assert!($g); + const_assert!($b); + Color::new($r, $g, $b) + }}; } +// TODO: kill this #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] #[repr(transparent)] pub struct VolatilePtr(pub *mut T); @@ -78,3 +45,51 @@ impl VolatilePtr { VolatilePtr(self.0.wrapping_offset(count)) } } + +newtype! { + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + Color, u16 +} + +impl Color { + /// Combines the Red, Blue, and Green provided into a single color value. + pub const fn new(red: u16, green: u16, blue: u16) -> Color { + Color(blue << 10 | green << 5 | red) + } +} + +newtype! { + #[derive(Debug, Clone, Copy, PartialEq, Eq)] + DisplayControlSetting, u16 +} + +pub const DISPLAY_CONTROL: VolatilePtr = VolatilePtr(0x0400_0000 as *mut DisplayControlSetting); +pub const JUST_MODE3: DisplayControlSetting = DisplayControlSetting(3); +pub const JUST_BG2: DisplayControlSetting = DisplayControlSetting(0b100_0000_0000); +pub const JUST_MODE3_AND_BG2: DisplayControlSetting = DisplayControlSetting(JUST_MODE3.0 | JUST_BG2.0); + +pub struct Mode3; +impl Mode3 { + const SCREEN_WIDTH: isize = 240; + const PIXELS: VolatilePtr = VolatilePtr(0x600_0000 as *mut Color); + + pub unsafe fn draw_pixel_unchecked(col: isize, row: isize, color: Color) { + Self::PIXELS.offset(col + row * Self::SCREEN_WIDTH).write(color); + } +} + +#[panic_handler] +fn panic(_info: &core::panic::PanicInfo) -> ! { + loop {} +} + +#[start] +fn main(_argc: isize, _argv: *const *const u8) -> isize { + unsafe { + DISPLAY_CONTROL.write(JUST_MODE3_AND_BG2); + Mode3::draw_pixel_unchecked(120, 80, const_rgb!(31, 0, 0)); + Mode3::draw_pixel_unchecked(136, 80, const_rgb!(0, 31, 0)); + Mode3::draw_pixel_unchecked(120, 96, const_rgb!(0, 0, 31)); + loop {} + } +} diff --git a/src/core_extras.rs b/src/core_extras.rs index fe82440..1eb96dd 100644 --- a/src/core_extras.rs +++ b/src/core_extras.rs @@ -37,3 +37,6 @@ impl VolatilePtr { VolatilePtr(self.0 as *mut Z) } } + +// TODO: kill all this with fire + diff --git a/src/lib.rs b/src/lib.rs index 7b53307..f00981f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -35,8 +35,3 @@ pub mod io_registers; pub mod video_ram; pub(crate) use crate::video_ram::*; - -/// Combines the Red, Blue, and Green provided into a single color value. -pub const fn rgb16(red: u16, green: u16, blue: u16) -> u16 { - blue << 10 | green << 5 | red -} diff --git a/src/video_ram.rs b/src/video_ram.rs index 6cb223a..fd47355 100644 --- a/src/video_ram.rs +++ b/src/video_ram.rs @@ -15,6 +15,8 @@ pub use super::*; +// TODO: kill all this too + /// The physical width in pixels of the GBA screen. pub const SCREEN_WIDTH: isize = 240; From 046e80851f871b62c591316a3ea51c9078019248 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sun, 16 Dec 2018 22:01:23 -0700 Subject: [PATCH 25/33] moving old text to new locations, notes of where to go --- book/src-bak/ch03/gba_memory_mapping.md | 256 ------------------------ book/src/02-concepts/00-index.md | 12 ++ book/src/02-concepts/02-bios.md | 6 +- book/src/02-concepts/03-wram.md | 27 +++ book/src/02-concepts/04-io-registers.md | 2 + book/src/02-concepts/05-palram.md | 51 ++++- book/src/02-concepts/06-vram.md | 25 ++- book/src/02-concepts/07-oam.md | 63 +++++- book/src/02-concepts/08-rom.md | 15 +- book/src/02-concepts/09-sram.md | 17 +- 10 files changed, 210 insertions(+), 264 deletions(-) delete mode 100644 book/src-bak/ch03/gba_memory_mapping.md diff --git a/book/src-bak/ch03/gba_memory_mapping.md b/book/src-bak/ch03/gba_memory_mapping.md deleted file mode 100644 index f3c4e8a..0000000 --- a/book/src-bak/ch03/gba_memory_mapping.md +++ /dev/null @@ -1,256 +0,0 @@ -# GBA Memory Mapping - -The [GBA Memory Map](http://problemkaputt.de/gbatek.htm#gbamemorymap) has -several memory portions to it, each with their own little differences. Most of -the memory has pre-determined use according to the hardware, but there is also -space for games to use as a scratch pad in whatever way the game sees fit. - -The memory ranges listed here are _inclusive_, so they end with a lot of F's -and E's. - -We've talked about volatile memory before, but just as a reminder I'll say that -all of the memory we'll talk about here should be accessed using volatile with -two exceptions: - -1) Work RAM (both internal and external) can be used normally, and if the - compiler is able to totally elide some reads and writes that's okay. -2) However, if you set aside any space in Work RAM where an interrupt will - communicate with the main program then that specific location will have to - keep using volatile access, since the compiler never knows when an interrupt - will actually happen. - -## BIOS / System ROM - -* `0x0` to `0x3FFF` (16k) - -This is special memory for the BIOS. It is "read-only", but even then it's only -accessible when the program counter is pointing into the BIOS region. At all -other times you get a [garbage -value](http://problemkaputt.de/gbatek.htm#gbaunpredictablethings) back when you -try to read out of the BIOS. - -## External Work RAM / EWRAM - -* `0x2000000` to `0x203FFFF` (256k) - -This is a big pile of space, the use of which is up to each game. However, the -external work ram has only a 16-bit bus (if you read/write a 32-bit value it -silently breaks it up into two 16-bit operations) and also 2 wait cycles (extra -CPU cycles that you have to expend _per 16-bit bus use_). - -It's most helpful to think of EWRAM as slower, distant memory, similar to the -"heap" in a normal application. You can take the time to go store something -within EWRAM, or to load it out of EWRAM, but if you've got several operations -to do in a row and you're worried about time you should pull that value into -local memory, work on your local copy, and then push it back out to EWRAM. - -## Internal Work RAM / IWRAM - -* `0x3000000` to `0x3007FFF` (32k) - -This is a smaller pile of space, but it has a 32-bit bus and no wait. - -By default, `0x3007F00` to `0x3007FFF` is reserved for interrupt and BIOS use. -The rest of it is totally up to you. The user's stack space starts at -`0x3007F00` and proceeds _down_ from there. For best results you should probably -start at `0x3000000` and then go upwards. Under normal use it's unlikely that -the two memory regions will crash into each other. - -## IO Registers - -* `0x4000000` to `0x40003FE` - -We've touched upon a few of these so far, and we'll get to more later. At the -moment it is enough to say that, as you might have guessed, all of them live in -this region. Each individual register is a `u16` or `u32` and they control all -sorts of things. We'll actually be talking about some more of them in this very -chapter, because that's how we'll control some of the background and object -stuff. - -## Palette RAM / PALRAM - -* `0x5000000` to `0x50003FF` (1k) - -Palette RAM has a 16-bit bus, which isn't really a problem because it -conceptually just holds `u16` values. There's no automatic wait state, but if -you try to access the same location that the display controller is accessing you -get bumped by 1 cycle. Since the display controller can use the palette ram any -number of times per scanline it's basically impossible to predict if you'll have -to do a wait or not during VDraw. During VBlank you won't have any wait of -course. - -PALRAM is among the memory where there's weirdness if you try to write just one -byte: if you try to write just 1 byte, it writes that byte into _both_ parts of -the larger 16-bit location. This doesn't really affect us much with PALRAM, -because palette values are all supposed to be `u16` anyway. - -The palette memory actually contains not one, but _two_ sets of palettes. First -there's 256 entries for the background palette data (starting at `0x5000000`), -and then there's 256 entries for object palette data (starting at `0x5000200`). - -The GBA also has two modes for palette access: 8-bits-per-pixel (8bpp) and -4-bits-per-pixel (4bpp). - -* In 8bpp mode an 8-bit palette index value within a background or sprite - simply indexes directly into the 256 slots for that type of thing. -* In 4bpp mode a 4-bit palette index value within a background or sprite - specifies an index within a particular "palbank" (16 palette entries each), - and then a _separate_ setting outside of the graphical data determines which - palbank is to be used for that background or object (the screen entry data for - backgrounds, and the object attributes for objects). - -### Transparency - -When a pixel within a background or object specifies index 0 as its palette -entry it is treated as a transparent pixel. This means that in 8bpp mode there's -only 255 actual color options (0 being transparent), and in 4bpp mode there's -only 15 actual color options available within each palbank (the 0th entry of -_each_ palbank is transparent). - -Individual backgrounds, and individual objects, each determine if they're 4bpp -or 8bpp separately, so a given overall palette slot might map to a used color in -8bpp and an unused/transparent color in 4bpp. If you're a palette wizard. - -Palette slot 0 of the overall background palette is used to determine the -"backdrop" color. That's the color you see if no background or object ends up -being rendered within a given pixel. - -Since display mode 3 and display mode 5 don't use the palette, they cannot -benefit from transparency. - -## Video RAM / VRAM - -* `0x6000000` to `0x6017FFF` (96k) - -We've used this before! VRAM has a 16-bit bus and no wait. However, the same as -with PALRAM, the "you might have to wait if the display controller is looking at -it" rule applies here. - -Unfortunately there's not much more exact detail that can be given about VRAM. -The use of the memory depends on the video mode that you're using. - -One general detail of note is that you can't write individual bytes to any part -of VRAM. Depending on mode and location, you'll either get your bytes doubled -into both the upper and lower parts of the 16-bit location targeted, or you -won't even affect the memory. This usually isn't a big deal, except in two -situations: - -* In Mode 4, if you want to change just 1 pixel, you'll have to be very careful - to read the old `u16`, overwrite just the byte you wanted to change, and then - write that back. -* In any display mode, avoid using `memcopy` to place things into VRAM. - It's written to be byte oriented, and only does 32-bit transfers under select - conditions. The rest of the time it'll copy one byte at a time and you'll get - either garbage or nothing at all. - -## Object Attribute Memory / OAM - -* `0x7000000` to `0x70003FF` (1k) - -The Object Attribute Memory has a 32-bit bus and no default wait, but suffers -from the "you might have to wait if the display controller is looking at it" -rule. You cannot write individual bytes to OAM at all, but that's not really a -problem because all the fields of the data types within OAM are either `i16` or -`u16` anyway. - -Object attribute memory is the wildest yet: it conceptually contains two types -of things, but they're _interlaced_ with each other all the way through. - -Now, [GBATEK](http://problemkaputt.de/gbatek.htm#lcdobjoamattributes) and -[CowByte](https://www.cs.rit.edu/~tjh8300/CowBite/CowBiteSpec.htm#OAM%20(sprites)) -doesn't quite give names to the two data types here. -[TONC](https://www.coranac.com/tonc/text/regobj.htm#sec-oam) calls them -`OBJ_ATTR` and `OBJ_AFFINE`, but we'll be giving them names fitting with the -Rust naming convention. Just know that if you try to talk about it with others -they might not be using the same names. In Rust terms their layout would look -like this: - -```rust -#[repr(C)] -pub struct ObjectAttributes { - attr0: u16, - attr1: u16, - attr2: u16, - filler: i16, -} - -#[repr(C)] -pub struct AffineMatrix { - filler0: [u16; 3], - pa: i16, - filler1: [u16; 3], - pb: i16, - filler2: [u16; 3], - pc: i16, - filler3: [u16; 3], - pd: i16, -} -``` - -(Note: the `#[repr(C)]` part just means that Rust must lay out the data exactly -in the order we specify, which otherwise it is not required to do). - -So, we've got 1024 bytes in OAM and each `ObjectAttributes` value is 8 bytes, so -naturally we can support up to 128 objects. - -_At the same time_, we've got 1024 bytes in OAM and each `AffineMatrix` is 32 -bytes, so we can have 32 of them. - -But, as I said, these things are all _interlaced_ with each other. See how -there's "filler" fields in each struct? If we imagine the OAM as being just an -array of one type or the other, indexes 0/1/2/3 of the `ObjectAttributes` array -would line up with index 0 of the `AffineMatrix` array. It's kinda weird, but -that's just how it works. When we setup functions to read and write these values -we'll have to be careful with how we do it. We probably _won't_ want to use -those representations above, at least not with the `AffineMatrix` type, because -they're quite wasteful if you want to store just object attributes or just -affine matrices. - -## Game Pak ROM / Flash ROM - -* `0x8000000` to `0x9FFFFFF` (wait 0) -* `0xA000000` to `0xBFFFFFF` (wait 1) -* `0xC000000` to `0xDFFFFFF` (wait 2) -* Max of 32Mb - -These portions of the memory are less fixed, because they depend on the precise -details of the game pak you've inserted into the GBA. In general, they connect -to the game pak ROM and/or Flash memory, using a 16-bit bus. The ROM is -read-only, but the Flash memory (if any) allows writes. - -The game pak ROM is listed as being in three sections, but it's actually the -same memory being effectively mirrored into three different locations. The -mirror that you choose to access the game pak through affects which wait state -setting it uses (configured via IO register of course). Unfortunately, the -details come down more to the game pak hardware that you load your game onto -than anything else, so there's not much I can say right here. We'll eventually -talk about it more later when I'm forced to do the boring thing and just cover -all the IO registers that aren't covered anywhere else. - -One thing of note is the way that the 16-bit bus affects us: the instructions to -execute are coming through the same bus as the rest of the game data, so we want -them to be as compact as possible. The ARM chip in the GBA supports two -different instruction sets, "thumb" and "non-thumb". The thumb mode instructions -are 16-bit, so they can each be loaded one at a time, and the non-thumb -instructions are 32-bit, so we're at a penalty if we execute them directly out -of the game pak. However, some things will demand that we use non-thumb code, so -we'll have to deal with that eventually. It's possible to switch between modes, -but it's a pain to keep track of what mode you're in because there's not -currently support for it in Rust itself (perhaps some day). So we'll stick with -thumb code as much as we possibly can, that's why our target profile for our -builds starts with `thumbv4`. - -## Game Pak SRAM - -* `0xE000000` to `0xE00FFFF` (64k) - -The game pak SRAM has an 8-bit bus. Why did Pokémon always take so long to save? -Saving the whole game one byte at a time is why. The SRAM also has some amount -of wait, but as with the ROM, the details depend on your game pak hardware (and -also as with ROM, you can adjust the settings with an IO register, should you -need to). - -One thing to note about the SRAM is that the GBA has a Direct Memory Access -(DMA) feature that can be used for bulk memory movements in some cases, but the -DMA _cannot_ access the SRAM region. You really are stuck reading and writing -one byte at a time when you're using the SRAM. diff --git a/book/src/02-concepts/00-index.md b/book/src/02-concepts/00-index.md index fb6b902..10fe20c 100644 --- a/book/src/02-concepts/00-index.md +++ b/book/src/02-concepts/00-index.md @@ -24,3 +24,15 @@ everything is obviously too much for just one section of the book. Instead you get an overview of general IO register rules and advice. Each particular register is described in the appropriate sections of either the Video or Non-Video chapters. + +## Bus Size + +TODO: describe this + +## Minimum Write Size + +TODO: talk about parts where you can't write one byte at a time + +## Volatile or Not? + +TODO: discuss what memory should be used volatile style and what can be used normal style. \ No newline at end of file diff --git a/book/src/02-concepts/02-bios.md b/book/src/02-concepts/02-bios.md index d3f942f..dadcf2b 100644 --- a/book/src/02-concepts/02-bios.md +++ b/book/src/02-concepts/02-bios.md @@ -228,12 +228,12 @@ pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) { I _hope_ this all makes sense by now. -## All The BIOS Functions +## Specific BIOS Functions -As for a full list of all the specific BIOS functions and their use, you should +For a full list of all the specific BIOS functions and their use you should check the `gba::bios` module within the `gba` crate. There's just so many of them that enumerating them all here wouldn't serve much purpose. Which is not to say that we'll never cover any BIOS functions in this book! Instead, we'll simply mention them when whenever they're relevent to the task at -hand (such as sound or waiting for vblank). +hand (such as controlling sound or waiting for vblank). diff --git a/book/src/02-concepts/03-wram.md b/book/src/02-concepts/03-wram.md index b4185fc..26d71e8 100644 --- a/book/src/02-concepts/03-wram.md +++ b/book/src/02-concepts/03-wram.md @@ -1 +1,28 @@ # Work RAM + +## External Work RAM (EWRAM) + +* **Address Span:** `0x2000000` to `0x203FFFF` (256k) + +This is a big pile of space, the use of which is up to each game. However, the +external work ram has only a 16-bit bus (if you read/write a 32-bit value it +silently breaks it up into two 16-bit operations) and also 2 wait cycles (extra +CPU cycles that you have to expend _per 16-bit bus use_). + +It's most helpful to think of EWRAM as slower, distant memory, similar to the +"heap" in a normal application. You can take the time to go store something +within EWRAM, or to load it out of EWRAM, but if you've got several operations +to do in a row and you're worried about time you should pull that value into +local memory, work on your local copy, and then push it back out to EWRAM. + +## Internal Work RAM (IWRAM) + +* **Address Span:** `0x3000000` to `0x3007FFF` (32k) + +This is a smaller pile of space, but it has a 32-bit bus and no wait. + +By default, `0x3007F00` to `0x3007FFF` is reserved for interrupt and BIOS use. +The rest of it is mostly up to you. The user's stack space starts at `0x3007F00` +and proceeds _down_ from there. For best results you should probably start at +`0x3000000` and then go upwards. Under normal use it's unlikely that the two +memory regions will crash into each other. diff --git a/book/src/02-concepts/04-io-registers.md b/book/src/02-concepts/04-io-registers.md index 3a3e53f..99a18b9 100644 --- a/book/src/02-concepts/04-io-registers.md +++ b/book/src/02-concepts/04-io-registers.md @@ -1 +1,3 @@ # IO Registers + +* **Address Span:** `0x400_0000` to `0x400_03FE` diff --git a/book/src/02-concepts/05-palram.md b/book/src/02-concepts/05-palram.md index 5353b1c..95cbdf1 100644 --- a/book/src/02-concepts/05-palram.md +++ b/book/src/02-concepts/05-palram.md @@ -1 +1,50 @@ -# Palette RAM +# Palette RAM (PALRAM) + +* **Address Span:** `0x500_0000` to `0x500_03FF` (1k) + +Palette RAM has a 16-bit bus, which isn't really a problem because it +conceptually just holds `u16` values. There's no automatic wait state, but if +you try to access the same location that the display controller is accessing you +get bumped by 1 cycle. Since the display controller can use the palette ram any +number of times per scanline it's basically impossible to predict if you'll have +to do a wait or not during VDraw. During VBlank you won't have any wait of +course. + +PALRAM is among the memory where there's weirdness if you try to write just one +byte: if you try to write just 1 byte, it writes that byte into _both_ parts of +the larger 16-bit location. This doesn't really affect us much with PALRAM, +because palette values are all supposed to be `u16` anyway. + +The palette memory actually contains not one, but _two_ sets of palettes. First +there's 256 entries for the background palette data (starting at `0x5000000`), +and then there's 256 entries for object palette data (starting at `0x5000200`). + +The GBA also has two modes for palette access: 8-bits-per-pixel (8bpp) and +4-bits-per-pixel (4bpp). + +* In 8bpp mode an 8-bit palette index value within a background or sprite + simply indexes directly into the 256 slots for that type of thing. +* In 4bpp mode a 4-bit palette index value within a background or sprite + specifies an index within a particular "palbank" (16 palette entries each), + and then a _separate_ setting outside of the graphical data determines which + palbank is to be used for that background or object (the screen entry data for + backgrounds, and the object attributes for objects). + +### Transparency + +When a pixel within a background or object specifies index 0 as its palette +entry it is treated as a transparent pixel. This means that in 8bpp mode there's +only 255 actual color options (0 being transparent), and in 4bpp mode there's +only 15 actual color options available within each palbank (the 0th entry of +_each_ palbank is transparent). + +Individual backgrounds, and individual objects, each determine if they're 4bpp +or 8bpp separately, so a given overall palette slot might map to a used color in +8bpp and an unused/transparent color in 4bpp. If you're a palette wizard. + +Palette slot 0 of the overall background palette is used to determine the +"backdrop" color. That's the color you see if no background or object ends up +being rendered within a given pixel. + +Since display mode 3 and display mode 5 don't use the palette, they cannot +benefit from transparency. diff --git a/book/src/02-concepts/06-vram.md b/book/src/02-concepts/06-vram.md index e6915fd..24a96c4 100644 --- a/book/src/02-concepts/06-vram.md +++ b/book/src/02-concepts/06-vram.md @@ -1 +1,24 @@ -# Video RAM +# Video RAM (VRAM) + +* **Address Span:** `0x600_0000` to `0x601_7FFF` (96k) + +We've used this before! VRAM has a 16-bit bus and no wait. However, the same as +with PALRAM, the "you might have to wait if the display controller is looking at +it" rule applies here. + +Unfortunately there's not much more exact detail that can be given about VRAM. +The use of the memory depends on the video mode that you're using. + +One general detail of note is that you can't write individual bytes to any part +of VRAM. Depending on mode and location, you'll either get your bytes doubled +into both the upper and lower parts of the 16-bit location targeted, or you +won't even affect the memory. This usually isn't a big deal, except in two +situations: + +* In Mode 4, if you want to change just 1 pixel, you'll have to be very careful + to read the old `u16`, overwrite just the byte you wanted to change, and then + write that back. +* In any display mode, avoid using `memcopy` to place things into VRAM. + It's written to be byte oriented, and only does 32-bit transfers under select + conditions. The rest of the time it'll copy one byte at a time and you'll get + either garbage or nothing at all. diff --git a/book/src/02-concepts/07-oam.md b/book/src/02-concepts/07-oam.md index 78d8d02..eeee6f2 100644 --- a/book/src/02-concepts/07-oam.md +++ b/book/src/02-concepts/07-oam.md @@ -1 +1,62 @@ -# Object Attribute Memory +# Object Attribute Memory (OAM) + +* **Address Span:** `0x700_0000` to `0x700_03FF` (1k) + +The Object Attribute Memory has a 32-bit bus and no default wait, but suffers +from the "you might have to wait if the display controller is looking at it" +rule. You cannot write individual bytes to OAM at all, but that's not really a +problem because all the fields of the data types within OAM are either `i16` or +`u16` anyway. + +Object attribute memory is the wildest yet: it conceptually contains two types +of things, but they're _interlaced_ with each other all the way through. + +Now, [GBATEK](http://problemkaputt.de/gbatek.htm#lcdobjoamattributes) and +[CowByte](https://www.cs.rit.edu/~tjh8300/CowBite/CowBiteSpec.htm#OAM%20(sprites)) +doesn't quite give names to the two data types here. +[TONC](https://www.coranac.com/tonc/text/regobj.htm#sec-oam) calls them +`OBJ_ATTR` and `OBJ_AFFINE`, but we'll be giving them names fitting with the +Rust naming convention. Just know that if you try to talk about it with others +they might not be using the same names. In Rust terms their layout would look +like this: + +```rust +#[repr(C)] +pub struct ObjectAttributes { + attr0: u16, + attr1: u16, + attr2: u16, + filler: i16, +} + +#[repr(C)] +pub struct AffineMatrix { + filler0: [u16; 3], + pa: i16, + filler1: [u16; 3], + pb: i16, + filler2: [u16; 3], + pc: i16, + filler3: [u16; 3], + pd: i16, +} +``` + +(Note: the `#[repr(C)]` part just means that Rust must lay out the data exactly +in the order we specify, which otherwise it is not required to do). + +So, we've got 1024 bytes in OAM and each `ObjectAttributes` value is 8 bytes, so +naturally we can support up to 128 objects. + +_At the same time_, we've got 1024 bytes in OAM and each `AffineMatrix` is 32 +bytes, so we can have 32 of them. + +But, as I said, these things are all _interlaced_ with each other. See how +there's "filler" fields in each struct? If we imagine the OAM as being just an +array of one type or the other, indexes 0/1/2/3 of the `ObjectAttributes` array +would line up with index 0 of the `AffineMatrix` array. It's kinda weird, but +that's just how it works. When we setup functions to read and write these values +we'll have to be careful with how we do it. We probably _won't_ want to use +those representations above, at least not with the `AffineMatrix` type, because +they're quite wasteful if you want to store just object attributes or just +affine matrices. diff --git a/book/src/02-concepts/08-rom.md b/book/src/02-concepts/08-rom.md index 753857b..584faac 100644 --- a/book/src/02-concepts/08-rom.md +++ b/book/src/02-concepts/08-rom.md @@ -1 +1,14 @@ -# Game Pak ROM / Flash ROM +# Game Pak ROM / Flash ROM (ROM) + +* **Address Span (Wait State 0):** `0x800_0000` to `0x9FF_FFFF` +* **Address Span (Wait State 1):** `0xA00_0000` to `0xBFF_FFFF` +* **Address Span (Wait State 2):** `0xC00_0000` to `0xDFF_FFFF` + +The game's ROM data is a single set of data that's up to 32 megabytes in size. +However, that data is mirrored to three different locations in the address +space. Depending on which part of the address space you use, it can affect the +memory timings involved. + +TODO: describe `WAITCNT` here, we won't get a better chance at it. + +TODO: discuss THUMB vs ARM code and why THUMB is so much faster (because ROM is a 16-bit bus) diff --git a/book/src/02-concepts/09-sram.md b/book/src/02-concepts/09-sram.md index aa68e68..65ec4d2 100644 --- a/book/src/02-concepts/09-sram.md +++ b/book/src/02-concepts/09-sram.md @@ -1 +1,16 @@ -# Save RAM +# Save RAM (SRAM) + +* **Address Span:** `0xE00_0000` to `0xE00FFFF` (64k) + +The actual amount of SRAM available depends on your game pak, and the 64k figure +is simply the maximum possible. A particular game pak might have less, and an +emulator will likely let you have all 64k if you want. + +As with other portions of the address space, SRAM has some number of wait cycles +per use. As with ROM, you can change the wait cycle settings via the `WAITCNT` +register if the defaults don't work well for your game pak. See the ROM section +for full details of how the `WAITCNT` register works. + +The game pak SRAM also has only an 8-bit bus, so have fun with that. + +The GBA Direct Memory Access (DMA) unit cannot access SRAM. From 58d739dd9e1826a5e0ec61799d6e456777f75934 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Mon, 17 Dec 2018 17:00:22 -0700 Subject: [PATCH 26/33] fixed point and stuff --- Cargo.toml | 1 + book/src/01-quirks/01-no_std.md | 11 +- book/src/01-quirks/02-fixed_only.md | 261 +++++++++++++++++++++++++++- book/src/01-quirks/04-newtype.md | 13 +- examples/hello_world.rs | 7 +- src/builtins.rs | 38 ++++ src/core_extras.rs | 1 - src/fixed.rs | 86 +++++++++ src/io_registers.rs | 23 ++- src/lib.rs | 58 +++++-- 10 files changed, 457 insertions(+), 42 deletions(-) create mode 100644 src/builtins.rs create mode 100644 src/fixed.rs diff --git a/Cargo.toml b/Cargo.toml index 3c39e21..401f558 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,6 +12,7 @@ license = "Apache-2.0" publish = false [dependencies] +typenum = "1.10" gba-proc-macro = "0.2.1" [profile.release] diff --git a/book/src/01-quirks/01-no_std.md b/book/src/01-quirks/01-no_std.md index 13faa72..44fa757 100644 --- a/book/src/01-quirks/01-no_std.md +++ b/book/src/01-quirks/01-no_std.md @@ -89,10 +89,6 @@ the standard library types to be used "for free" once it was set up, or just a custom allocator that's GBA specific if Rust's global allocator style isn't a good fit for the GBA (I honestly haven't looked into it). -## LLVM Intrinsics - -TODO: explain that we'll occasionally have to provide some intrinsics. - ## Bare Metal Panic TODO: expand this @@ -114,3 +110,10 @@ TODO: expand this * Sending the message also automatically zeroes the output buffer. * View the output within the "Tools" menu, "View Logs...". Note that the Fatal message, if any doesn't get logged. + +TODO: this will probably fail without a `__clzsi2` implementation, which is a +good seg for the next section + +## LLVM Intrinsics + +TODO: explain that we'll occasionally have to provide some intrinsics. diff --git a/book/src/01-quirks/02-fixed_only.md b/book/src/01-quirks/02-fixed_only.md index 49e507a..c2c5c95 100644 --- a/book/src/01-quirks/02-fixed_only.md +++ b/book/src/01-quirks/02-fixed_only.md @@ -1,13 +1,258 @@ # Fixed Only -In addition to not having the standard library available, we don't even have a -floating point unit available! We can't do floating point math in hardware! We -could still do floating point math as software computations if we wanted, but -that's a slow, slow thing to do. +In addition to not having much of the standard library available, we don't even +have a floating point unit available! We can't do floating point math in +hardware! We _could_ still do floating point math as pure software computations +if we wanted, but that's a slow, slow thing to do. -Instead let's learn about another way to have fractional values called "Fixed -Point" +Are there faster ways? It's the same answer as always: "Yes, but not without a +tradeoff." -## Fixed Point +The faster way is to represent fractional values using a system called a [Fixed +Point Representation](https://en.wikipedia.org/wiki/Fixed-point_arithmetic). +What do we trade away? Numeric range. -TODO: describe fixed point, make some types, do the impls, all that. +* Floating point math stores bits for base value and for exponent all according + to a single [well defined](https://en.wikipedia.org/wiki/IEEE_754) standard + for how such a complicated thing works. +* Fixed point math takes a normal integer (either signed or unsigned) and then + just "mentally associates" it (so to speak) with a fractional value for its + "units". If you have 3 and it's in units of 1/2, then you have 3/2, or 1.5 + using decimal notation. If your number is 256 and it's in units of 1/256th + then the value is 1.0 in decimal notation. + +Floating point math requires dedicated hardware to perform quickly, but it can +"trade" precision when it needs to represent extremely large or small values. + +Fixed point math is just integral math, which our GBA is reasonably good at, but +because your number is associated with a fixed fraction your results can get out +of range very easily. + +## Representing A Fixed Point Value + +So we want to associate our numbers with a mental note of what units they're in: + +* [PhantomData](https://doc.rust-lang.org/core/marker/struct.PhantomData.html) + is a type that tells the compiler "please remember this extra type info" when + you add it as a field to a struct. It goes away at compile time, so it's + perfect for us to use as space for a note to ourselves without causing runtime + overhead. +* The [typenum](https://crates.io/crates/typenum) crate is the best way to + represent a number within a type in Rust. Since our values on the GBA are + always specified as a number of fractional bits to count the number as, we can + put `typenum` types such as `U8` or `U14` into our `PhantomData` to keep track + of what's going on. + +Now, those of you who know me, or perhaps just know my reputation, will of +course _immediately_ question what happened to the real Lokathor. I do not care +for most crates, and I particularly don't care for using a crate in teaching +situations. However, `typenum` has a number of factors on its side that let me +suggest it in this situation: + +* It's version 1.10 with a total of 21 versions and nearly 700k downloads, so we + can expect that the major troubles have been shaken out and that it will remain + fairly stable for quite some time to come. +* It has no further dependencies that it's going to drag into the compilation. +* It happens all at compile time, so it's not clogging up our actual game with + any nonsense. +* The (interesting) subject of "how do you do math inside Rust's trait system?" is + totally separate from the concern that we're trying to focus on here. + +Therefore, we will consider it acceptable to use this crate. + +Now the `typenum` crate defines a whole lot, but we'll focus down to just a +single type at the moment: +[UInt](https://docs.rs/typenum/1.10.0/typenum/uint/struct.UInt.html) is a +type-level unsigned value. It's like `u8` or `u16`, but while they're types that +then have values, each `UInt` construction statically equates to a specific +value. Like how the `()` type only has one value, which is also called `()`. In +this case, you wrap up `UInt` around smaller `UInt` values and a `B1` or `B0` +value to build up the binary number that you want at the type level. + +In other words, instead of writing + +```rust +let six = 0b110; +``` + +We write + +```rust +type U6 = UInt, B1>, B0>; +``` + +Wild, I know. If you look into the `typenum` crate you can do math and stuff +with these type level numbers, and we will a little bit below, but to start off +we _just_ need to store one in some `PhantomData`. + +### A struct For Fixed Point + +Our actual type for a fixed point value looks like this: + +```rust +use core::marker::PhantomData; +use typenum::marker_traits::Unsigned; + +/// Fixed point `T` value with `F` fractional bits. +#[derive(Debug, Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct Fx { + bits: T, + _phantom: PhantomData, +} +``` + +This says that `Fx` is a generic type that holds some base number type `T` +and a `F` type that's marking off how many fractional bits we're using. We only +want people giving unsigned type-level values for the `PhantomData` type, so we +use the trait bound `F: Unsigned`. + +We use +[repr(transparent)](https://github.com/rust-lang/rfcs/blob/master/text/1758-repr-transparent.md) +here to ensure that `Fx` will always be treated just like the base type in the +final program (in terms of bit pattern and ABI). + +If you go and check, this is _basically_ how the existing general purpose crates +for fixed point math represent their numbers. They're a little fancier about it +because they have to cover every case, and we only have to cover our GBA case. + +That's quite a bit to type though. We probably want to make a few type aliases +for things to be easier to look at. Unfortunately there's [no standard +notation](https://en.wikipedia.org/wiki/Fixed-point_arithmetic#Notation) for how +you write a fixed point type. We also have to limit ourselves to what's valid +for use in a Rust type too. I like the `fx` thing, so we'll use that for signed +and then `fxu` if we need an unsigned value. + +```rust +/// Alias for an `i16` fixed point value with 8 fractional bits. +pub type fx8_8 = Fx; +``` + +Rust will complain about having `non_camel_case_types`, and you can shut that +warning up by putting an `#[allow(non_camel_case_types)]` attribute on the type +alias directly, or you can use `#![allow(non_camel_case_types)]` at the very top +of the module to shut up that warning for the whole module (which is what I +did). + +## Constructing A Fixed Point Value + +So how do we actually _make_ one of these values? Well, we can always just wrap or unwrap any value in our `Fx` type: + +```rust +impl Fx { + /// Uses the provided value directly. + pub fn from_raw(r: T) -> Self { + Fx { + num: r, + phantom: PhantomData, + } + } + /// Unwraps the inner value. + pub fn into_raw(self) -> T { + self.num + } +} +``` + +I'd like to use the `From` trait of course, but it was giving me some trouble, i +think because of the orphan rule. Oh well. + +If we want to be particular to the fact that these are supposed to be +_numbers_... that gets tricky. Rust is actually quite bad at being generic about +number types. You can use the [num](https://crates.io/crates/num) crate, or you +can just use a macro and invoke it once per type. Guess what we're gonna do. + +```rust +macro_rules! fixed_point_methods { + ($t:ident) => { + impl Fx<$t, F> { + /// Gives 0 for this type. + pub fn zero() -> Self { + Fx { + num: 0, + phantom: PhantomData, + } + } + + /// Gives the smallest positive non-zero value. + pub fn precision() -> Self { + Fx { + num: 1, + phantom: PhantomData, + } + } + + /// Makes a value with the integer part shifted into place. + pub fn from_int_part(i: $t) -> Self { + Fx { + num: i << F::to_u8(), + phantom: PhantomData, + } + } + } + }; +} + +fixed_point_methods! {u8} +fixed_point_methods! {i8} +fixed_point_methods! {i16} +fixed_point_methods! {u16} +fixed_point_methods! {i32} +fixed_point_methods! {u32} +``` + +Now _you'd think_ that those can all be `const`, but at the moment you can't +have a `const` function with a bound on any trait other than `Sized`, so they +have to be normal functions. + +Also, we're doing something a little interesting there with `from_int_part`. We +can take our `F` type and get it as a value instead of a type using `to_u8`. + +## Casting Values + +Next, once we have a value in one type, we need to be able to move it into +another type. A particular `Fx` type is a base number type and a fractional +count, so there's two ways we might want to move it. + +For casting the base type it's a little weird. Because there's so many number +types, and we can't be generic about them when using `as`, we'd have to make +like 30 functions (6 base number types we're using, times 5 target number types +you could cast to). Instead, we'll write it just once, and let the user pass a +closure that does the cast. + +We can put this as part of the basic impl block that `from_raw` and `into_raw` +are part of. If can avoid having code inside a macro we'll do it just because +macros are messy. + +```rust + /// Casts the base type, keeping the fractional bit quantity the same. + pub fn cast_inner Z>(self, op: C) -> Fx { + Fx { + num: op(self.num), + phantom: PhantomData, + } + } +``` + +It's... not the best to have to pass in the casting operation like that. +Hopefully we won't have to use it much. + +Also we might want to change the amount of fractional bits in a number. Oh, +gosh, this one is kinda complicated. + +## Addition / Subtraction + +## Multiplication / Division + +## Trigonometry + +## Just Using A Crate + +If you feel too intimidated by all of this then I'll suggest to you that the +[fixed](https://crates.io/crates/fixed) crate seems to be the best crate +available for fixed point math. + +_I have not tested its use on the GBA myself_. + +It's just my recommendation from looking at the docs of the various options +available. diff --git a/book/src/01-quirks/04-newtype.md b/book/src/01-quirks/04-newtype.md index 86b2916..f1c4be8 100644 --- a/book/src/01-quirks/04-newtype.md +++ b/book/src/01-quirks/04-newtype.md @@ -1,5 +1,8 @@ # Newtype +TODO: we've already used newtype twice by now (fixed point values and volatile +addresses), so we need to adjust how we start this section. + There's a great Zero Cost abstraction that we'll be using a lot that you might not already be familiar with: we're talking about the "Newtype Pattern"! @@ -27,13 +30,13 @@ cost at compile time. pub struct PixelColor(u16); ``` +TODO: we've already talked about repr(transparent) by now + Ah, except that, as I'm sure you remember from [The Rustonomicon](https://doc.rust-lang.org/nomicon/other-reprs.html#reprtransparent) -(and from [the -RFC](https://github.com/rust-lang/rfcs/blob/master/text/1758-repr-transparent.md) -too, of course), if we have a single field struct that's sometimes different -from having just the bare value, so we should be using `#[repr(transparent)]` -with our newtypes. +(and from the RFC too, of course), if we have a single field struct that's +sometimes different from having just the bare value, so we should be using +`#[repr(transparent)]` with our newtypes. ```rust #[repr(transparent)] diff --git a/examples/hello_world.rs b/examples/hello_world.rs index 3866d46..549569a 100644 --- a/examples/hello_world.rs +++ b/examples/hello_world.rs @@ -20,12 +20,13 @@ macro_rules! const_assert { }; } +/// Constructs an RGB value with a `const_assert!` that the input is in range. #[macro_export] macro_rules! const_rgb { ($r:expr, $g:expr, $b:expr) => {{ - const_assert!($r); - const_assert!($g); - const_assert!($b); + const_assert!($r <= 31); + const_assert!($g <= 31); + const_assert!($b <= 31); Color::new($r, $g, $b) }}; } diff --git a/src/builtins.rs b/src/builtins.rs new file mode 100644 index 0000000..048b5bf --- /dev/null +++ b/src/builtins.rs @@ -0,0 +1,38 @@ +#![allow(missing_docs)] + +//! The module to provide "builtin" functions that LLVM expects. +//! +//! You shouldn't need to call anything in here yourself, it just has to be in +//! the translation unit and LLVM will find it. + +#[no_mangle] +pub unsafe extern "C" fn __clzsi2(mut x: usize) -> usize { + let mut y: usize; + let mut n: usize = 32; + y = x >> 16; + if y != 0 { + n = n - 16; + x = y; + } + y = x >> 8; + if y != 0 { + n = n - 8; + x = y; + } + y = x >> 4; + if y != 0 { + n = n - 4; + x = y; + } + y = x >> 2; + if y != 0 { + n = n - 2; + x = y; + } + y = x >> 1; + if y != 0 { + n - 2 + } else { + n - x + } +} diff --git a/src/core_extras.rs b/src/core_extras.rs index 1eb96dd..38eaba4 100644 --- a/src/core_extras.rs +++ b/src/core_extras.rs @@ -39,4 +39,3 @@ impl VolatilePtr { } // TODO: kill all this with fire - diff --git a/src/fixed.rs b/src/fixed.rs new file mode 100644 index 0000000..98879c1 --- /dev/null +++ b/src/fixed.rs @@ -0,0 +1,86 @@ +#![allow(non_camel_case_types)] + +//! Module for fixed point math types and operations. + +use core::{convert::From, marker::PhantomData}; +use typenum::{marker_traits::Unsigned, U8}; + +/// Fixed point `T` value with `F` fractional bits. +#[derive(Debug, Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct Fx { + num: T, + phantom: PhantomData, +} + +impl Fx { + /// Uses the provided value directly. + pub fn from_raw(r: T) -> Self { + Fx { + num: r, + phantom: PhantomData, + } + } + /// Unwraps the inner value. + pub fn into_raw(self) -> T { + self.num + } + + /// Casts the base type, keeping the fractional bit quantity the same. + pub fn cast_inner Z>(self, op: C) -> Fx { + Fx { + num: op(self.num), + phantom: PhantomData, + } + } +} + +macro_rules! fixed_point_methods { + ($t:ident) => { + impl Fx<$t, F> { + /// Gives 0 for this type. + pub fn zero() -> Self { + Fx { + num: 0, + phantom: PhantomData, + } + } + + /// Gives the smallest positive non-zero value. + pub fn precision() -> Self { + Fx { + num: 1, + phantom: PhantomData, + } + } + + /// Makes a value with the integer part shifted into place. + pub fn from_int_part(i: $t) -> Self { + Fx { + num: i << F::to_u8(), + phantom: PhantomData, + } + } + + /// Gives the raw inner value. + pub fn into_inner(&self) -> $t { + self.num + } + + /// Changes the fractional bit quantity, keeping the base type the same. + pub fn change_bit_quantity(&self) -> Fx<$t, N> { + unimplemented!() + } + } + }; +} + +fixed_point_methods! {u8} +fixed_point_methods! {i8} +fixed_point_methods! {i16} +fixed_point_methods! {u16} +fixed_point_methods! {i32} +fixed_point_methods! {u32} + +/// Alias for an `i16` fixed point value with 8 fractional bits. +pub type fx8_8 = Fx; diff --git a/src/io_registers.rs b/src/io_registers.rs index 7ea83d3..3439b87 100644 --- a/src/io_registers.rs +++ b/src/io_registers.rs @@ -15,7 +15,7 @@ // TODO(lokathor): IO Register newtypes. -use gba_proc_macro::{newtype, register_bit}; +use gba_proc_macro::register_bit; use super::*; @@ -25,9 +25,10 @@ use super::*; pub const DISPCNT: VolatilePtr = VolatilePtr(0x400_0000 as *mut u16); newtype!( + /// A newtype over the various display control options that you have on a GBA. + #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] DisplayControlSetting, - u16, - "A newtype over the various display control options that you have on a GBA." + u16 ); #[allow(missing_docs)] @@ -412,10 +413,14 @@ pub enum TriBool { Plus = 1, } -newtype!(KeyInputSetting, u16, "A newtype over the key input state of the GBA"); +newtype! { + /// Records a particular key press combination. + #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] + KeyInput, u16 +} #[allow(missing_docs)] -impl KeyInputSetting { +impl KeyInput { register_bit!(A_BIT, u16, 1, a_pressed); register_bit!(B_BIT, u16, 1 << 1, b_pressed); register_bit!(SELECT_BIT, u16, 1 << 2, select_pressed); @@ -428,8 +433,8 @@ impl KeyInputSetting { register_bit!(L_BIT, u16, 1 << 9, l_pressed); /// Takes the difference between these keys and another set of keys. - pub fn difference(self, other: KeyInputSetting) -> KeyInputSetting { - KeyInputSetting(self.0 ^ other.0) + pub fn difference(self, other: Self) -> Self { + KeyInput(self.0 ^ other.0) } /// Gives the arrow pad value as a tribool, with Plus being increased column @@ -458,11 +463,11 @@ impl KeyInputSetting { } /// Gets the current state of the keys -pub fn key_input() -> KeyInputSetting { +pub fn key_input() -> KeyInput { // Note(Lokathor): The 10 used bits are "low when pressed" style, but the 6 // unused bits are always low, so we XOR with this mask to get a result where // the only active bits are currently pressed keys. - unsafe { KeyInputSetting(KEYINPUT.read() ^ 0b0000_0011_1111_1111) } + unsafe { KeyInput(KEYINPUT.read() ^ 0b0000_0011_1111_1111) } } /// Key Interrupt Control diff --git a/src/lib.rs b/src/lib.rs index f00981f..c82a89b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,12 +1,12 @@ #![cfg_attr(not(test), no_std)] #![cfg_attr(not(test), feature(asm))] #![warn(missing_docs)] -//#![allow(clippy::cast_lossless)] +#![allow(clippy::cast_lossless)] #![deny(clippy::float_arithmetic)] //! This crate helps you write GBA ROMs. //! -//! # SAFETY POLICY +//! ## SAFETY POLICY //! //! Some parts of this crate are safe wrappers around unsafe operations. This is //! good, and what you'd expect from a Rust crate. @@ -16,21 +16,55 @@ //! //! **Do not** use this crate in programs that aren't running on the GBA. If you //! do, it's a giant bag of Undefined Behavior. -//! -//! # TESTING POLICY -//! -//! It is the intent of the crate authors that as much of the crate as possible -//! be written so that you can use `cargo test` for at least some parts of your -//! code without everything exploding instantly. To that end, where possible we -//! attempt to use `cfg` flags to make things safe for `cargo test`. Hopefully -//! we got it all. -pub mod core_extras; -pub(crate) use crate::core_extras::*; +/// Assists in defining a newtype wrapper over some base type. +/// +/// Note that rustdoc and derives are all the "meta" stuff, so you can write all +/// of your docs and derives in front of your newtype in the same way you would +/// for a normal struct. Then the inner type to be wrapped it name. +/// +/// The macro _assumes_ that you'll be using it to wrap zero safe numeric types, +/// so it automatically provides a `const fn` method for `new` that just wraps +/// `0`. If this is not desired you can add `, no frills` to the invocation. +/// +/// Example: +/// ``` +/// newtype! { +/// /// Records a particular key press combination. +/// #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] +/// KeyInput, u16 +/// } +/// ``` +#[macro_export] +macro_rules! newtype { + ($(#[$attr:meta])* $new_name:ident, $old_name:ident) => { + $(#[$attr])* + #[repr(transparent)] + pub struct $new_name($old_name); + impl $new_name { + /// A `const` "zero value" constructor + pub const fn new() -> Self { + $new_name(0) + } + } + }; + ($(#[$attr:meta])* $new_name:ident, $old_name:ident, no frills) => { + $(#[$attr])* + #[repr(transparent)] + pub struct $new_name($old_name); + }; +} + +pub mod builtins; + +pub mod fixed; #[cfg(not(test))] pub mod bios; +pub mod core_extras; +pub(crate) use crate::core_extras::*; + pub mod io_registers; pub mod video_ram; From 4d270051507a7a4f5ada25deca3edbba435c0753 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Tue, 18 Dec 2018 02:05:59 -0700 Subject: [PATCH 27/33] fixed point stuff --- .../00-introduction/05-help_and_resources.md | 4 + book/src/01-quirks/02-fixed_only.md | 360 ++++++++++++++++-- src/bios.rs | 29 +- src/fixed.rs | 176 ++++++++- src/lib.rs | 5 +- todo_check.bat | 12 + 6 files changed, 520 insertions(+), 66 deletions(-) create mode 100644 todo_check.bat diff --git a/book/src/00-introduction/05-help_and_resources.md b/book/src/00-introduction/05-help_and_resources.md index 63a646f..59a51f5 100644 --- a/book/src/00-introduction/05-help_and_resources.md +++ b/book/src/00-introduction/05-help_and_resources.md @@ -32,6 +32,10 @@ Reference](https://doc.rust-lang.org/nightly/reference/introduction.html) to see if they cover it. You can mostly ignore that big scary red banner at the top, things are a lot better documented than they make it sound. +If you need help trying to fiddle your math down as hard as you can, there are +resources such as the [Bit Twiddling +Hacks](https://graphics.stanford.edu/~seander/bithacks.html) page. + As to GBA related lore, Ketsuban and I didn't magically learn this all from nowhere, we read various technical manuals and guides ourselves and then distilled those works oriented around C and C++ into a book for Rust. diff --git a/book/src/01-quirks/02-fixed_only.md b/book/src/01-quirks/02-fixed_only.md index c2c5c95..bb0e401 100644 --- a/book/src/01-quirks/02-fixed_only.md +++ b/book/src/01-quirks/02-fixed_only.md @@ -166,14 +166,6 @@ can just use a macro and invoke it once per type. Guess what we're gonna do. macro_rules! fixed_point_methods { ($t:ident) => { impl Fx<$t, F> { - /// Gives 0 for this type. - pub fn zero() -> Self { - Fx { - num: 0, - phantom: PhantomData, - } - } - /// Gives the smallest positive non-zero value. pub fn precision() -> Self { Fx { @@ -185,7 +177,7 @@ macro_rules! fixed_point_methods { /// Makes a value with the integer part shifted into place. pub fn from_int_part(i: $t) -> Self { Fx { - num: i << F::to_u8(), + num: i << F::U8, phantom: PhantomData, } } @@ -201,28 +193,34 @@ fixed_point_methods! {i32} fixed_point_methods! {u32} ``` -Now _you'd think_ that those can all be `const`, but at the moment you can't -have a `const` function with a bound on any trait other than `Sized`, so they -have to be normal functions. +Now _you'd think_ that those can be `const`, but at the moment you can't have a +`const` function with a bound on any trait other than `Sized`, so they have to +be normal functions. Also, we're doing something a little interesting there with `from_int_part`. We -can take our `F` type and get it as a value instead of a type using `to_u8`. +can take our `F` type and get its constant value. There's other associated +constants if we want it in other types, and also non-const methods if you wanted +that for some reason (maybe passing it as a closure function? dunno). -## Casting Values +## Casting Base Values -Next, once we have a value in one type, we need to be able to move it into -another type. A particular `Fx` type is a base number type and a fractional -count, so there's two ways we might want to move it. +Next, once we have a value in one base type we will need to be able to move it +into another base type. Unfortunately this means we gotta use the `as` operator, +which requires a concrete source type and a concrete destination type. There's +no easy way for us to make it generic here. -For casting the base type it's a little weird. Because there's so many number -types, and we can't be generic about them when using `as`, we'd have to make -like 30 functions (6 base number types we're using, times 5 target number types -you could cast to). Instead, we'll write it just once, and let the user pass a -closure that does the cast. +We could let the user use `into_raw`, cast, and then do `from_raw`, but that's +error prone because they might change the fractional bit count accidentally. +This means that we have to write a function that does the casting while +perfectly preserving the fractional bit quantity. If we wrote one function for +each conversion it'd be like 30 different possible casts (6 base types that we +support, and then 5 possible target types). Instead, we'll write it just once in +a way that takes a closure, and let the user pass a closure that does the cast. +The compiler should merge it all together quite nicely for us once optimizations +kick in. -We can put this as part of the basic impl block that `from_raw` and `into_raw` -are part of. If can avoid having code inside a macro we'll do it just because -macros are messy. +This code goes outside the macro. I want to avoid too much code in the macro if +we can, it's a little easier to cope with I think. ```rust /// Casts the base type, keeping the fractional bit quantity the same. @@ -234,25 +232,317 @@ macros are messy. } ``` -It's... not the best to have to pass in the casting operation like that. -Hopefully we won't have to use it much. +It's horrible and ugly, but Rust is just bad at numbers sometimes. -Also we might want to change the amount of fractional bits in a number. Oh, -gosh, this one is kinda complicated. +## Adjusting Fractional Part -## Addition / Subtraction +In addition to the base value we might want to change our fractional bit +quantity. This is actually easier that it sounds, but it also requires us to be +tricky with the generics. We can actually use some typenum type level operators +here. -## Multiplication / Division +This code goes inside the macro: we need to be able to use the left shift and +right shift, which is easiest when we just use the macro's `$t` as our type. We +could alternately put a similar function outside the macro and be generic on `T` +having the left and right shift operators by using a `where` clause. As much as +I'd like to avoid too much code being generated by macro, I'd _even more_ like +to avoid generic code with huge and complicated trait bounds. It comes down to +style, and you gotta decide for yourself. + +```rust + /// Changes the fractional bit quantity, keeping the base type the same. + pub fn adjust_fractional_bits>(self) -> Fx<$t, Y> { + let leftward_movement: i32 = Y::to_i32() - F::to_i32(); + Fx { + num: if leftward_movement > 0 { + self.num << leftward_movement + } else { + self.num >> (-leftward_movement) + }, + phantom: PhantomData, + } + } +``` + +There's a few things at work. First, we introduce `Y` as the target number of +fractional bits, and we _also_ limit it that the target bits quantity can't be +the same as we already have using a type-level operator. If it's the same as we +started with, why are you doing the cast at all? + +Now, once we're sure that the current bits and target bits aren't the same, we +compute `target - start`, and call this our "leftward movement". Example: if +we're targeting 8 bits and we're at 4 bits, we do 8-4 and get +4 as our leftward +movement. If the leftward_movement is positive we naturally shift our current +value to the left. If it's not positive then it _must_ be negative because we +eliminated 0 as a possibility using the type-level operator, so we shift to the +right by the negative value. + +## Addition, Subtraction, Shifting, Negative, Comparisons + +From here on we're getting help from [this blog +post](https://spin.atomicobject.com/2012/03/15/simple-fixed-point-math/) by [Job +Vranish](https://spin.atomicobject.com/author/vranish/), so thank them if you +learn something. + +I might have given away the game a bit with those `derive` traits on our fixed +point type. For a fair number of operations you can use the normal form of the +op on the inner bits as long as the fractional parts have the same quantity. +This includes equality and ordering (which we derived) as well as addition, +subtraction, and bit shifting (which we need to do ourselves). + +This code can go outside the macro, with sufficient trait bounds. + +```rust +impl, F: Unsigned> Add for Fx { + type Output = Self; + fn add(self, rhs: Fx) -> Self::Output { + Fx { + num: self.num + rhs.num, + phantom: PhantomData, + } + } +} +``` + +The bound on `T` makes it so that `Fx` can be added any time that `T` can +be added to its own type with itself as the output. We can use the exact same +pattern for `Sub`, `Shl`, `Shr`, and `Neg`. With enough trait bounds, we can do +anything! + +```rust +impl, F: Unsigned> Sub for Fx { + type Output = Self; + fn sub(self, rhs: Fx) -> Self::Output { + Fx { + num: self.num - rhs.num, + phantom: PhantomData, + } + } +} + +impl, F: Unsigned> Shl for Fx { + type Output = Self; + fn shl(self, rhs: u32) -> Self::Output { + Fx { + num: self.num << rhs, + phantom: PhantomData, + } + } +} + +impl, F: Unsigned> Shr for Fx { + type Output = Self; + fn shr(self, rhs: u32) -> Self::Output { + Fx { + num: self.num >> rhs, + phantom: PhantomData, + } + } +} + +impl, F: Unsigned> Neg for Fx { + type Output = Self; + fn neg(self) -> Self::Output { + Fx { + num: -self.num, + phantom: PhantomData, + } + } +} +``` + +Unfortunately, for `Shl` and `Shr` to have as much coverage on our type as it +does on the base type (allowing just about any right hand side) we'd have to do +another macro, but I think just `u32` is fine. We can always add more later if +we need. + +We could also implement `BitAnd`, `BitOr`, `BitXor`, and `Not`, but they don't +seem relevent to our fixed point math use, and this section is getting long +already. Just use the same general patterns if you want to add it in your own +programs. Shockingly, `Rem` also works directly if you want it, though I don't +forsee us needing floating point remainder. Also, the GBA can't do hardware +division or remainder, and we'll have to work around that below when we +implement `Div` (which maybe we don't need, but it's complex enough I should +show it instead of letting people guess). + +**Note:** In addition to the various `Op` traits, there's also `OpAssign` +variants. Each `OpAssign` is the same as `Op`, but takes `&mut self` instead of +`self` and then modifies in place instead of producing a fresh value. In other +words, if you want both `+` and `+=` you'll need to do the `AddAssign` trait +too. It's not the worst thing to just write `a = a+b`, so I won't bother with +showing all that here. It's pretty easy to figure out for yourself if you want. + +## Multiplication + +This is where things get more interesting. When we have two numbers `A` and `B` +they really stand for `(a*f)` and `(b*f)`. If we write `A*B` then we're really +writing `(a*f)*(b*f)`, which can be rewritten as `(a*b)*2f`, and now it's +obvious that we have one more `f` than we wanted to have. We have to do the +multiply of the inner value and then divide out the `f`. We divide by `1 << +bit_count`, so if we have 8 fractional bits we'll divide by 256. + +The catch is that, when we do the multiply we're _extremely_ likely to overflow +our base type with that multiplication step. Then we do that divide, and now our +result is basically nonsense. We can avoid this to some extent by casting up to +a higher bit type, doing the multiplication and division at higher precision, +and then casting back down. We want as much precision as possible without being +too inefficient, so we'll always cast up to 32-bit (on a 64-bit machine you'd +cast up to 64-bit instead). + +Naturally, any signed value has to be cast up to `i32` and any unsigned value +has to be cast up to `u32`, so we'll have to handle those separately. + +Also, instead of doing an _actual_ divide we can right-shift by the correct +number of bits to achieve the same effect. _Except_ when we have a signed value +that's negative, because actual division truncates towards zero and +right-shifting truncates towards negative infinity. We can get around _this_ by +flipping the sign, doing the shift, and flipping the sign again (which sounds +silly but it's so much faster than doing an actual division). + +Also, again signed values can be annoying, because if the value _just happens_ +to be `i32::MIN` then when you negate it you'll have... _still_ a negative +value. I'm not 100% on this, but I think the correct thing to do at that point +is to give `$t::MIN` as out output num value. + +Did you get all that? Good, because this is involves casting, we will need to +implement it three times, which calls for another macro. + +```rust +macro_rules! fixed_point_signed_multiply { + ($t:ident) => { + impl Mul for Fx<$t, F> { + type Output = Self; + fn mul(self, rhs: Fx<$t, F>) -> Self::Output { + let pre_shift = (self.num as i32).wrapping_mul(rhs.num as i32); + if pre_shift < 0 { + if pre_shift == core::i32::MIN { + Fx { + num: core::$t::MIN, + phantom: PhantomData, + } + } else { + Fx { + num: (-((-pre_shift) >> F::U8)) as $t, + phantom: PhantomData, + } + } + } else { + Fx { + num: (pre_shift >> F::U8) as $t, + phantom: PhantomData, + } + } + } + } + }; +} + +fixed_point_signed_multiply! {i8} +fixed_point_signed_multiply! {i16} +fixed_point_signed_multiply! {i32} + +macro_rules! fixed_point_unsigned_multiply { + ($t:ident) => { + impl Mul for Fx<$t, F> { + type Output = Self; + fn mul(self, rhs: Fx<$t, F>) -> Self::Output { + Fx { + num: ((self.num as u32).wrapping_mul(rhs.num as u32) >> F::U8) as $t, + phantom: PhantomData, + } + } + } + }; +} + +fixed_point_unsigned_multiply! {u8} +fixed_point_unsigned_multiply! {u16} +fixed_point_unsigned_multiply! {u32} +``` + +## Division + +Division is similar to multiplication, but reversed. Which makes sense. This +time `A/B` gives `(a*f)/(b*f)` which is `a/b`, one _less_ `f` than we were +after. + +As with the multiplication version of things, we have to up-cast our inner value +as much a we can before doing the math, to allow for the most precision +possible. + +The snag here is that the GBA has no division or remainder. Instead, the GBA has +a BIOS function you can call to do `i32/i32` division. + +This is a potential problem for us though. If we have some unsigned value, we +need it to fit within the positive space of an `i32` _after the multiply_ so +that we can cast it to `i32`, call the BIOS function that only works on `i32` +values, and cast it back to its actual type. + +* If you have a u8 you're always okay, even with 8 floating bits. +* If you have a u16 you're okay even with a maximum value up to 15 floating + bits, but having a maximum value and 16 floating bits makes it break. +* If you have a u32 you're probably going to be in trouble all the time. + +So... ugh, there's not much we can do about this. For now we'll just have to +suffer some. + +// TODO: find a numerics book that tells us how to do `u32/u32` divisions. + +```rust +macro_rules! fixed_point_signed_division { + ($t:ident) => { + impl Div for Fx<$t, F> { + type Output = Self; + fn div(self, rhs: Fx<$t, F>) -> Self::Output { + let mul_output: i32 = (self.num as i32).wrapping_mul(1 << F::U8); + let divide_result: i32 = crate::bios::div(mul_output, rhs.num as i32); + Fx { + num: divide_result as $t, + phantom: PhantomData, + } + } + } + }; +} + +fixed_point_signed_division! {i8} +fixed_point_signed_division! {i16} +fixed_point_signed_division! {i32} + +macro_rules! fixed_point_unsigned_division { + ($t:ident) => { + impl Div for Fx<$t, F> { + type Output = Self; + fn div(self, rhs: Fx<$t, F>) -> Self::Output { + let mul_output: i32 = (self.num as i32).wrapping_mul(1 << F::U8); + let divide_result: i32 = crate::bios::div(mul_output, rhs.num as i32); + Fx { + num: divide_result as $t, + phantom: PhantomData, + } + } + } + }; +} + +fixed_point_unsigned_division! {u8} +fixed_point_unsigned_division! {u16} +fixed_point_unsigned_division! {u32} +``` ## Trigonometry +TODO: look up tables! arcbits! + ## Just Using A Crate -If you feel too intimidated by all of this then I'll suggest to you that the -[fixed](https://crates.io/crates/fixed) crate seems to be the best crate -available for fixed point math. +If, after seeing all that, and seeing that I still didn't even cover every +possible trait impl that you might want for all the possible types... if after +all that you feel too intimidated, then I'll cave a bit on your behalf and +suggest to you that the [fixed](https://crates.io/crates/fixed) crate seems to +be the best crate available for fixed point math. _I have not tested its use on the GBA myself_. It's just my recommendation from looking at the docs of the various options -available. +available, if you really wanted to just have a crate for it. diff --git a/src/bios.rs b/src/bios.rs index c2ac39c..f24b6f4 100644 --- a/src/bios.rs +++ b/src/bios.rs @@ -8,6 +8,11 @@ //! whatever value is necessary for that function). Some functions also perform //! necessary checks to save you from yourself, such as not dividing by zero. +//TODO: ALL functions in this module should have `if cfg!(test)` blocks. The +//functions that never return must panic, the functions that return nothing +//should just do so, and the math functions should just return the correct math +//I guess. + /// (`swi 0x00`) SoftReset the device. /// /// This function does not ever return. @@ -175,17 +180,21 @@ pub fn vblank_interrupt_wait() { #[inline(always)] pub fn div_rem(numerator: i32, denominator: i32) -> (i32, i32) { assert!(denominator != 0); - let div_out: i32; - let rem_out: i32; - unsafe { - asm!(/* ASM */ "swi 0x06" - :/* OUT */ "={r0}"(div_out), "={r1}"(rem_out) - :/* INP */ "{r0}"(numerator), "{r1}"(denominator) - :/* CLO */ "r3" - :/* OPT */ - ); + if cfg!(test) { + (numerator / denominator, numerator % denominator) + } else { + let div_out: i32; + let rem_out: i32; + unsafe { + asm!(/* ASM */ "swi 0x06" + :/* OUT */ "={r0}"(div_out), "={r1}"(rem_out) + :/* INP */ "{r0}"(numerator), "{r1}"(denominator) + :/* CLO */ "r3" + :/* OPT */ + ); + } + (div_out, rem_out) } - (div_out, rem_out) } /// As `div_rem`, keeping only the `div` output. diff --git a/src/fixed.rs b/src/fixed.rs index 98879c1..da8b596 100644 --- a/src/fixed.rs +++ b/src/fixed.rs @@ -2,8 +2,11 @@ //! Module for fixed point math types and operations. -use core::{convert::From, marker::PhantomData}; -use typenum::{marker_traits::Unsigned, U8}; +use core::{ + marker::PhantomData, + ops::{Add, Div, Mul, Neg, Shl, Shr, Sub}, +}; +use typenum::{consts::False, marker_traits::Unsigned, type_operators::IsEqual, U8}; /// Fixed point `T` value with `F` fractional bits. #[derive(Debug, Copy, Clone, Default, PartialEq, Eq, PartialOrd, Ord)] @@ -21,6 +24,7 @@ impl Fx { phantom: PhantomData, } } + /// Unwraps the inner value. pub fn into_raw(self) -> T { self.num @@ -35,17 +39,59 @@ impl Fx { } } +impl, F: Unsigned> Add for Fx { + type Output = Self; + fn add(self, rhs: Fx) -> Self::Output { + Fx { + num: self.num + rhs.num, + phantom: PhantomData, + } + } +} + +impl, F: Unsigned> Sub for Fx { + type Output = Self; + fn sub(self, rhs: Fx) -> Self::Output { + Fx { + num: self.num - rhs.num, + phantom: PhantomData, + } + } +} + +impl, F: Unsigned> Shl for Fx { + type Output = Self; + fn shl(self, rhs: u32) -> Self::Output { + Fx { + num: self.num << rhs, + phantom: PhantomData, + } + } +} + +impl, F: Unsigned> Shr for Fx { + type Output = Self; + fn shr(self, rhs: u32) -> Self::Output { + Fx { + num: self.num >> rhs, + phantom: PhantomData, + } + } +} + +impl, F: Unsigned> Neg for Fx { + type Output = Self; + fn neg(self) -> Self::Output { + Fx { + num: -self.num, + phantom: PhantomData, + } + } +} + macro_rules! fixed_point_methods { ($t:ident) => { impl Fx<$t, F> { - /// Gives 0 for this type. - pub fn zero() -> Self { - Fx { - num: 0, - phantom: PhantomData, - } - } - /// Gives the smallest positive non-zero value. pub fn precision() -> Self { Fx { @@ -57,19 +103,22 @@ macro_rules! fixed_point_methods { /// Makes a value with the integer part shifted into place. pub fn from_int_part(i: $t) -> Self { Fx { - num: i << F::to_u8(), + num: i << F::U8, phantom: PhantomData, } } - /// Gives the raw inner value. - pub fn into_inner(&self) -> $t { - self.num - } - /// Changes the fractional bit quantity, keeping the base type the same. - pub fn change_bit_quantity(&self) -> Fx<$t, N> { - unimplemented!() + pub fn adjust_fractional_bits>(self) -> Fx<$t, Y> { + let leftward_movement: i32 = Y::to_i32() - F::to_i32(); + Fx { + num: if leftward_movement > 0 { + self.num << leftward_movement + } else { + self.num >> (-leftward_movement) + }, + phantom: PhantomData, + } } } }; @@ -82,5 +131,96 @@ fixed_point_methods! {u16} fixed_point_methods! {i32} fixed_point_methods! {u32} +macro_rules! fixed_point_signed_multiply { + ($t:ident) => { + impl Mul for Fx<$t, F> { + type Output = Self; + fn mul(self, rhs: Fx<$t, F>) -> Self::Output { + let pre_shift = (self.num as i32).wrapping_mul(rhs.num as i32); + if pre_shift < 0 { + if pre_shift == core::i32::MIN { + Fx { + num: core::$t::MIN, + phantom: PhantomData, + } + } else { + Fx { + num: (-((-pre_shift) >> F::U8)) as $t, + phantom: PhantomData, + } + } + } else { + Fx { + num: (pre_shift >> F::U8) as $t, + phantom: PhantomData, + } + } + } + } + }; +} + +fixed_point_signed_multiply! {i8} +fixed_point_signed_multiply! {i16} +fixed_point_signed_multiply! {i32} + +macro_rules! fixed_point_unsigned_multiply { + ($t:ident) => { + impl Mul for Fx<$t, F> { + type Output = Self; + fn mul(self, rhs: Fx<$t, F>) -> Self::Output { + Fx { + num: ((self.num as u32).wrapping_mul(rhs.num as u32) >> F::U8) as $t, + phantom: PhantomData, + } + } + } + }; +} + +fixed_point_unsigned_multiply! {u8} +fixed_point_unsigned_multiply! {u16} +fixed_point_unsigned_multiply! {u32} + +macro_rules! fixed_point_signed_division { + ($t:ident) => { + impl Div for Fx<$t, F> { + type Output = Self; + fn div(self, rhs: Fx<$t, F>) -> Self::Output { + let mul_output: i32 = (self.num as i32).wrapping_mul(1 << F::U8); + let divide_result: i32 = crate::bios::div(mul_output, rhs.num as i32); + Fx { + num: divide_result as $t, + phantom: PhantomData, + } + } + } + }; +} + +fixed_point_signed_division! {i8} +fixed_point_signed_division! {i16} +fixed_point_signed_division! {i32} + +macro_rules! fixed_point_unsigned_division { + ($t:ident) => { + impl Div for Fx<$t, F> { + type Output = Self; + fn div(self, rhs: Fx<$t, F>) -> Self::Output { + let mul_output: i32 = (self.num as i32).wrapping_mul(1 << F::U8); + let divide_result: i32 = crate::bios::div(mul_output, rhs.num as i32); + Fx { + num: divide_result as $t, + phantom: PhantomData, + } + } + } + }; +} + +fixed_point_unsigned_division! {u8} +fixed_point_unsigned_division! {u16} +fixed_point_unsigned_division! {u32} + /// Alias for an `i16` fixed point value with 8 fractional bits. pub type fx8_8 = Fx; diff --git a/src/lib.rs b/src/lib.rs index c82a89b..c172301 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,5 @@ -#![cfg_attr(not(test), no_std)] -#![cfg_attr(not(test), feature(asm))] +#![no_std] +#![feature(asm)] #![warn(missing_docs)] #![allow(clippy::cast_lossless)] #![deny(clippy::float_arithmetic)] @@ -59,7 +59,6 @@ pub mod builtins; pub mod fixed; -#[cfg(not(test))] pub mod bios; pub mod core_extras; diff --git a/todo_check.bat b/todo_check.bat new file mode 100644 index 0000000..6f1e3ea --- /dev/null +++ b/todo_check.bat @@ -0,0 +1,12 @@ +@echo off + +echo ------- +echo ------- + +set Wildcard=*.rs + +echo TODOS FOUND: +findstr -s -n -i -l "TODO" %Wildcard% + +echo ------- +echo ------- From f372923bad5d9a3bc506f1b51787ab7aabfeb069 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Tue, 18 Dec 2018 12:09:04 -0700 Subject: [PATCH 28/33] added some tests and they even pass --- src/fixed.rs | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) diff --git a/src/fixed.rs b/src/fixed.rs index da8b596..9aac415 100644 --- a/src/fixed.rs +++ b/src/fixed.rs @@ -224,3 +224,68 @@ fixed_point_unsigned_division! {u32} /// Alias for an `i16` fixed point value with 8 fractional bits. pub type fx8_8 = Fx; + +#[cfg(test)] +mod fixed_tests { + use super::*; + + #[test] + fn test_add() { + use typenum::U4; + let one = Fx::::from_int_part(1); + let two = Fx::::from_int_part(2); + assert!(one + one == two) + } + + #[test] + fn test_sub() { + use typenum::U4; + let one = Fx::::from_int_part(1); + let two = Fx::::from_int_part(2); + assert!(two - one == one) + } + + #[test] + fn test_shl() { + use typenum::U4; + let one = Fx::::from_int_part(1); + let two = Fx::::from_int_part(2); + assert!(one << 1 == two) + } + + #[test] + fn test_shr() { + use typenum::U4; + let one = Fx::::from_int_part(1); + let two = Fx::::from_int_part(2); + assert!(two >> 1 == one) + } + + #[test] + fn test_neg() { + use typenum::U4; + let one = Fx::::from_int_part(1); + let neg_one = Fx::::from_int_part(-1); + assert!(-one == neg_one); + assert!(-(-one) == one); + } + + #[test] + fn test_mul() { + use typenum::U4; + let half = Fx::::from_int_part(1) >> 1; + let two = Fx::::from_int_part(2); + let three = Fx::::from_int_part(3); + let twelve = Fx::::from_int_part(12); + assert!(two * three == twelve * half); + } + + #[test] + fn test_div() { + use typenum::U4; + let two = Fx::::from_int_part(2); + let six = Fx::::from_int_part(6); + let twelve = Fx::::from_int_part(12); + assert!(twelve / two == six); + } +} From c3f62b1ab54f7ec00134fd407ddb8a24707c141f Mon Sep 17 00:00:00 2001 From: Lokathor Date: Wed, 19 Dec 2018 20:58:41 -0700 Subject: [PATCH 29/33] trying a commit but i think nightly is broked --- .travis.yml | 8 +- Cargo.toml | 3 + Makefile.toml | 3 + book/src/02-concepts/02-bios.md | 2 + src/builtins.rs | 51 ++++- src/core_extras.rs | 310 ++++++++++++++++++++++++++++--- src/io_registers.rs | 319 +------------------------------- src/lib.rs | 160 +++++++++++++++- src/video_ram.rs | 13 +- 9 files changed, 517 insertions(+), 352 deletions(-) diff --git a/.travis.yml b/.travis.yml index 45e0d04..e22f99c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,8 +27,12 @@ script: - export PATH="$PATH:/opt/devkitpro/devkitARM/bin" - export PATH="$PATH:/opt/devkitpro/tools/bin" - cd .. - # Test the lib and then compile all examples with `cargo make` - - cargo test --lib && cargo test --lib --release + # Run all tests, both modes + - cargo test --no-fail-fast --lib + - cargo test --no-fail-fast --lib --release + - cargo test --no-fail-fast --tests + - cargo test --no-fail-fast --tests --release + # cargo make defaults to both debug and release builds of all examples - cargo make # Test build the book so that a failed book build kills this run - cd book && mdbook build diff --git a/Cargo.toml b/Cargo.toml index 401f558..2e65f0c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,9 @@ publish = false typenum = "1.10" gba-proc-macro = "0.2.1" +[dev-dependencies] +quickcheck="0.7" + [profile.release] lto = true panic = "abort" diff --git a/Makefile.toml b/Makefile.toml index e01af30..5371b95 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -58,6 +58,9 @@ fn main() -> std::io::Result<()> { [tasks.build] dependencies = ["build-examples-debug", "build-examples-release", "pack-roms"] +[tasks.justrelease] +dependencies = ["build-examples-release", "pack-roms"] + [tasks.test] command = "cargo" args = ["test", "--lib"] diff --git a/book/src/02-concepts/02-bios.md b/book/src/02-concepts/02-bios.md index dadcf2b..4ab245d 100644 --- a/book/src/02-concepts/02-bios.md +++ b/book/src/02-concepts/02-bios.md @@ -237,3 +237,5 @@ them that enumerating them all here wouldn't serve much purpose. Which is not to say that we'll never cover any BIOS functions in this book! Instead, we'll simply mention them when whenever they're relevent to the task at hand (such as controlling sound or waiting for vblank). + +//TODO: list/name all BIOS functions as well as what they relate to elsewhere. diff --git a/src/builtins.rs b/src/builtins.rs index 048b5bf..bcda21f 100644 --- a/src/builtins.rs +++ b/src/builtins.rs @@ -5,14 +5,40 @@ //! You shouldn't need to call anything in here yourself, it just has to be in //! the translation unit and LLVM will find it. +//TODO: make 64 bit too #[no_mangle] -pub unsafe extern "C" fn __clzsi2(mut x: usize) -> usize { +#[cfg(any(target_pointer_width = "16", target_pointer_width = "32", target_pointer_width = "64"))] +pub extern "C" fn __clzsi2(mut x: usize) -> usize { let mut y: usize; - let mut n: usize = 32; - y = x >> 16; - if y != 0 { - n = n - 16; - x = y; + let mut n: usize = { + #[cfg(target_pointer_width = "64")] + { + 64 + } + #[cfg(target_pointer_width = "32")] + { + 32 + } + #[cfg(target_pointer_width = "16")] + { + 16 + } + }; + #[cfg(target_pointer_width = "64")] + { + y = x >> 32; + if y != 0 { + n = n - 32; + x = y; + } + } + #[cfg(target_pointer_width = "32")] + { + y = x >> 16; + if y != 0 { + n = n - 16; + x = y; + } } y = x >> 8; if y != 0 { @@ -36,3 +62,16 @@ pub unsafe extern "C" fn __clzsi2(mut x: usize) -> usize { n - x } } + +#[test] +fn __clzsi2_test() { + let mut i = 1 << 63; + while i > 0 { + assert_eq!(__clzsi2(i), i.leading_zeros() as usize); + i >>= 1; + } +} + +// TODO: add some shims +// #[no_mangle] extern "aapcs" fn __aeabi_uidiv(num: u32: denom: u32) -> u32 +// #[no_mangle] extern "aapcs" fn __aeabi_idiv(num: i32: denom: i32) -> u32 diff --git a/src/core_extras.rs b/src/core_extras.rs index 38eaba4..1e8bd96 100644 --- a/src/core_extras.rs +++ b/src/core_extras.rs @@ -1,41 +1,301 @@ //! Things that I wish were in core, but aren't. -/// A simple wrapper for any `*mut T` to adjust the basic operations. +//TODO(Lokathor): reorganize as gba::core::fixed and gba::core::volatile ? + +use core::{cmp::Ordering, iter::FusedIterator, marker::PhantomData, num::NonZeroUsize}; + +/// Abstracts the use of a volatile hardware address. /// -/// Read and Write are made to be volatile. Offset is made to be -/// wrapping_offset. This makes it much easier to correctly work with IO -/// Registers and all display related memory on the GBA. -#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, PartialOrd, Ord)] +/// If you're trying to do anything other than abstract a volatile hardware +/// device then you _do not want to use this type_. Use one of the many other +/// smart pointer types. +/// +/// A volatile address doesn't store a value in the normal way: It maps to some +/// real hardware _other than_ RAM, and that hardware might have any sort of +/// strange rules. The specifics of reading and writing depend on the hardware +/// being mapped. For example, a particular address might be read only (ignoring +/// writes), write only (returning some arbitrary value if you read it), +/// "normal" read write (where you read back what you wrote), or some complex +/// read-write situation where writes have an effect but you _don't_ read back +/// what you wrote. +/// +/// As you imagine it can be very unsafe. The design of this type is set up so +/// that _creation_ is unsafe, and _use_ is safe. This gives an optimal +/// experience, since you'll use memory locations a lot more often than you try +/// to name them, on average. +/// +/// `VolAddress` is _not_ a thread safe type. If your device is multi-threaded +/// then you must arrange for synchronization in some other way. A `VolAddress` +/// _can_ be used to share data between an interrupt running on a core and a +/// thread running on that core as long as all access of that location is +/// volatile (if you're using the `asm!` macro add the "volatile" option, if +/// you're linking in ASM with the linker that's effectively volatile since the +/// compiler doesn't get a chance to mess with it). +/// +/// # Safety +/// +/// In order for values of this type to operate correctly they must follow quite +/// a few safety limits: +/// +/// * The declared address must be non-null (it uses the `NonNull` optimization +/// for better iteration results). This shouldn't be a big problem, since +/// hardware can't really live at the null address. +/// * The declared address must be aligned for the declared type of `T`. +/// * The declared address must _always_ read as something that's a valid bit +/// pattern for `T`. Don't pick any enums or things like that if your hardware +/// doesn't back it up. If there's _any_ doubt at all, you must instead read +/// or write an unsigned int of the correct bit size and then parse the bits +/// by hand. +/// * The declared address must be a part of the address space that Rust's +/// allocator and/or stack frames will never use. If you're not sure, please +/// re-read the hardware specs of your device and its memory map until you +/// know. +/// +/// The exact points of UB are if the address is ever 0, or if you ever `read` +/// or `write` with the invalid pointer. For example, if you offset to some +/// crazy (non-zero) value and then never use it that won't be an immediate +/// trigger of UB. +#[derive(Debug)] #[repr(transparent)] -pub struct VolatilePtr(pub *mut T); - -impl core::fmt::Pointer for VolatilePtr { - /// Formats exactly like the inner `*mut T`. - fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { - write!(f, "{:p}", self.0) +pub struct VolAddress { + address: NonZeroUsize, + marker: PhantomData<*mut T>, +} +// Note(Lokathor): We have to hand implement all these traits because if we use +// `derive` then they only get derived if the inner `T` has the trait. However, +// since we're acting like a pointer to `T`, the capability we offer isn't +// affected by whatever type `T` ends up being. +impl Clone for VolAddress { + fn clone(&self) -> Self { + *self + } +} +impl Copy for VolAddress {} +impl PartialEq for VolAddress { + fn eq(&self, other: &Self) -> bool { + self.address == other.address + } +} +impl Eq for VolAddress {} +impl PartialOrd for VolAddress { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.address.cmp(&other.address)) + } +} +impl Ord for VolAddress { + fn cmp(&self, other: &Self) -> Ordering { + self.address.cmp(&other.address) } } -impl VolatilePtr { - /// Performs a `read_volatile`. - pub unsafe fn read(&self) -> T { - self.0.read_volatile() +impl VolAddress { + /// Constructs a new address. + /// + /// # Safety + /// + /// You must follow the standard safety rules as outlined in the type docs. + pub const unsafe fn new_unchecked(address: usize) -> Self { + VolAddress { + address: NonZeroUsize::new_unchecked(address), + marker: PhantomData, + } } - /// Performs a `write_volatile`. - pub unsafe fn write(&self, data: T) { - self.0.write_volatile(data); + /// Casts the type of `T` into type `Z`. + /// + /// # Safety + /// + /// You must follow the standard safety rules as outlined in the type docs. + pub const unsafe fn cast(self) -> VolAddress { + VolAddress { + address: self.address, + marker: PhantomData, + } } - /// Performs a `wrapping_offset`. - pub fn offset(self, count: isize) -> Self { - VolatilePtr(self.0.wrapping_offset(count)) + /// Offsets the address by `offset` slots (like `pointer::wrapping_offset`). + /// + /// # Safety + /// + /// You must follow the standard safety rules as outlined in the type docs. + pub unsafe fn offset(self, offset: isize) -> Self { + // TODO: const this + VolAddress { + address: NonZeroUsize::new_unchecked(self.address.get().wrapping_add(offset as usize * core::mem::size_of::())), + marker: PhantomData, + } } - /// Performs a cast into some new pointer type. - pub fn cast(self) -> VolatilePtr { - VolatilePtr(self.0 as *mut Z) + /// Checks that the current target type of this address is aligned at this + /// address value. + /// + /// Technically it's a safety violation to even make a `VolAddress` that isn't + /// aligned. However, I know you're gonna try doing the bad thing, and it's + /// better to give you a chance to call `is_aligned` and potentially back off + /// from the operation or throw a `debug_assert!` or something instead of + /// triggering UB. Eventually this will be `const fn`, which will potentially + /// let you spot errors without even having to run your program. + pub fn is_aligned(self) -> bool { + // TODO: const this + self.address.get() % core::mem::align_of::() == 0 + } + + /// Makes an iterator starting here across the given number of slots. + /// + /// # Safety + /// + /// The normal safety rules must be correct for each address iterated over. + pub const unsafe fn iter_slots(self, slots: usize) -> VolAddressIter { + VolAddressIter { vol_address: self, slots } + } + + // non-const and never can be. + + /// Reads a `Copy` value out of the address. + /// + /// The `Copy` bound is actually supposed to be `!Drop`, but rust doesn't + /// allow negative trait bounds. If your type isn't `Copy` you can use the + /// `read_non_copy` fallback to do an unsafe read. + /// + /// That said, I don't think that you legitimately have hardware that maps to + /// a Rust type with a `Drop` impl. If you do please tell me, I'm interested + /// to hear about it. + pub fn read(self) -> T + where + T: Copy, + { + unsafe { (self.address.get() as *mut T).read_volatile() } + } + + /// Reads a value out of the address with no trait bound. + /// + /// # Safety + /// + /// This is _not_ a move, it forms a bit duplicate of the current address + /// value. If `T` has a `Drop` trait that does anything it is up to you to + /// ensure that repeated drops do not cause UB (such as a double free). + pub unsafe fn read_non_copy(self) -> T { + (self.address.get() as *mut T).read_volatile() + } + + /// Writes a value to the address. + /// + /// Semantically, the value is moved into the `VolAddress` and then forgotten, + /// so if `T` has a `Drop` impl then that will never get executed. This is + /// "safe" under Rust's safety rules, but could cause something unintended + /// (eg: a memory leak). + pub fn write(self, val: T) { + unsafe { (self.address.get() as *mut T).write_volatile(val) } } } -// TODO: kill all this with fire +/// An iterator that produces a series of `VolAddress` values. +#[derive(Debug)] +pub struct VolAddressIter { + vol_address: VolAddress, + slots: usize, +} +impl Clone for VolAddressIter { + fn clone(&self) -> Self { + VolAddressIter { + vol_address: self.vol_address, + slots: self.slots, + } + } +} +impl PartialEq for VolAddressIter { + fn eq(&self, other: &Self) -> bool { + self.vol_address == other.vol_address && self.slots == other.slots + } +} +impl Eq for VolAddressIter {} +impl Iterator for VolAddressIter { + type Item = VolAddress; + + fn next(&mut self) -> Option { + if self.slots > 0 { + let out = self.vol_address; + unsafe { + self.slots -= 1; + self.vol_address = self.vol_address.offset(1); + } + Some(out) + } else { + None + } + } +} +impl FusedIterator for VolAddressIter {} + +/// This type is like `VolAddress`, but for when you have a block of values all +/// in a row. +/// +/// This is similar to the idea of an array or a slice, but called a "block" +/// because you could _also_ construct a `[VolAddress]`, and we want to avoid +/// any accidental confusion. +#[derive(Debug)] +pub struct VolAddressBlock { + vol_address: VolAddress, + slots: usize, +} +impl Clone for VolAddressBlock { + fn clone(&self) -> Self { + VolAddressBlock { + vol_address: self.vol_address, + slots: self.slots, + } + } +} +impl PartialEq for VolAddressBlock { + fn eq(&self, other: &Self) -> bool { + self.vol_address == other.vol_address && self.slots == other.slots + } +} +impl Eq for VolAddressBlock {} + +impl VolAddressBlock { + /// Constructs a new `VolAddressBlock`. + /// + /// # Safety + /// + /// The given `VolAddress` must be valid when offset by each of `0 .. slots` + pub const unsafe fn new_unchecked(vol_address: VolAddress, slots: usize) -> Self { + VolAddressBlock { vol_address, slots } + } + + /// Checked "indexing" style access of the block, giving either a `VolAddress` or a panic. + pub fn index(self, slot: usize) -> VolAddress { + if slot < self.slots { + unsafe { self.vol_address.offset(slot as isize) } + } else { + panic!("Index Requested: {} >= Bound: {}", slot, self.slots) + } + } + + /// Unchecked indexing into the block. + /// + /// # Safety + /// + /// The slot given must be in bounds. + pub unsafe fn index_unchecked(self, slot: usize) -> VolAddress { + // TODO: const this + self.vol_address.offset(slot as isize) + } + + /// Checked "getting" style access of the block, giving an Option value. + pub fn get(self, slot: usize) -> Option> { + if slot < self.slots { + unsafe { Some(self.vol_address.offset(slot as isize)) } + } else { + None + } + } + + /// Gives an iterator over the block's slots. + pub const fn iter(self) -> VolAddressIter { + VolAddressIter { + vol_address: self.vol_address, + slots: self.slots, + } + } +} diff --git a/src/io_registers.rs b/src/io_registers.rs index 3439b87..9bdc7fb 100644 --- a/src/io_registers.rs +++ b/src/io_registers.rs @@ -22,7 +22,7 @@ use super::*; /// LCD Control. Read/Write. /// /// * [gbatek entry](http://problemkaputt.de/gbatek.htm#lcdiodisplaycontrol) -pub const DISPCNT: VolatilePtr = VolatilePtr(0x400_0000 as *mut u16); +pub const DISPCNT: VolAddress = unsafe { VolAddress::new_unchecked(0x400_0000) }; newtype!( /// A newtype over the various display control options that you have on a GBA. @@ -98,24 +98,19 @@ pub enum DisplayControlMode { /// Assigns the given display control setting. pub fn set_display_control(setting: DisplayControlSetting) { - unsafe { - DISPCNT.write(setting.0); - } + DISPCNT.write(setting); } /// Obtains the current display control setting. pub fn display_control() -> DisplayControlSetting { - unsafe { DisplayControlSetting(DISPCNT.read()) } + DISPCNT.read() } -/// General LCD Status (STAT,LYC) -pub const DISPSTAT: VolatilePtr = VolatilePtr(0x400_0004 as *mut u16); - /// Vertical Counter (LY) -pub const VCOUNT: VolatilePtr = VolatilePtr(0x400_0006 as *mut u16); +pub const VCOUNT: VolAddress = unsafe { VolAddress::new_unchecked(0x400_0006) }; /// Obtains the current VCount value. pub fn vcount() -> u16 { - unsafe { VCOUNT.read() } + VCOUNT.read() } /// Performs a busy loop until VBlank starts. @@ -130,278 +125,8 @@ pub fn wait_until_vdraw() { while vcount() >= SCREEN_HEIGHT as u16 {} } -/// BG0 Control -pub const BG0CNT: VolatilePtr = VolatilePtr(0x400_0008 as *mut u16); - -/// BG1 Control -pub const BG1CNT: VolatilePtr = VolatilePtr(0x400_000A as *mut u16); - -/// BG2 Control -pub const BG2CNT: VolatilePtr = VolatilePtr(0x400_000C as *mut u16); - -/// BG3 Control -pub const BG3CNT: VolatilePtr = VolatilePtr(0x400_000E as *mut u16); - -/// BG0 X-Offset -pub const BG0HOFS: VolatilePtr = VolatilePtr(0x400_0010 as *mut u16); - -/// BG0 Y-Offset -pub const BG0VOFS: VolatilePtr = VolatilePtr(0x400_0012 as *mut u16); - -/// BG1 X-Offset -pub const BG1HOFS: VolatilePtr = VolatilePtr(0x400_0014 as *mut u16); - -/// BG1 Y-Offset -pub const BG1VOFS: VolatilePtr = VolatilePtr(0x400_0016 as *mut u16); - -/// BG2 X-Offset -pub const BG2HOFS: VolatilePtr = VolatilePtr(0x400_0018 as *mut u16); - -/// BG2 Y-Offset -pub const BG2VOFS: VolatilePtr = VolatilePtr(0x400_001A as *mut u16); - -/// BG3 X-Offset -pub const BG3HOFS: VolatilePtr = VolatilePtr(0x400_001C as *mut u16); - -/// BG3 Y-Offset -pub const BG3VOFS: VolatilePtr = VolatilePtr(0x400_001E as *mut u16); - -/// BG2 Rotation/Scaling Parameter A (dx) -pub const BG2PA: VolatilePtr = VolatilePtr(0x400_0020 as *mut u16); - -/// BG2 Rotation/Scaling Parameter B (dmx) -pub const BG2PB: VolatilePtr = VolatilePtr(0x400_0022 as *mut u16); - -/// BG2 Rotation/Scaling Parameter C (dy) -pub const BG2PC: VolatilePtr = VolatilePtr(0x400_0024 as *mut u16); - -/// BG2 Rotation/Scaling Parameter D (dmy) -pub const BG2PD: VolatilePtr = VolatilePtr(0x400_0026 as *mut u16); - -/// BG2 Reference Point X-Coordinate -pub const BG2X: VolatilePtr = VolatilePtr(0x400_0028 as *mut u32); - -/// BG2 Reference Point Y-Coordinate -pub const BG2Y: VolatilePtr = VolatilePtr(0x400_002C as *mut u32); - -/// BG3 Rotation/Scaling Parameter A (dx) -pub const BG3PA: VolatilePtr = VolatilePtr(0x400_0030 as *mut u16); - -/// BG3 Rotation/Scaling Parameter B (dmx) -pub const BG3PB: VolatilePtr = VolatilePtr(0x400_0032 as *mut u16); - -/// BG3 Rotation/Scaling Parameter C (dy) -pub const BG3PC: VolatilePtr = VolatilePtr(0x400_0034 as *mut u16); - -/// BG3 Rotation/Scaling Parameter D (dmy) -pub const BG3PD: VolatilePtr = VolatilePtr(0x400_0036 as *mut u16); - -/// BG3 Reference Point X-Coordinate -pub const BG3X: VolatilePtr = VolatilePtr(0x400_0038 as *mut u32); - -/// BG3 Reference Point Y-Coordinate -pub const BG3Y: VolatilePtr = VolatilePtr(0x400_003C as *mut u32); - -/// Window 0 Horizontal Dimensions -pub const WIN0H: VolatilePtr = VolatilePtr(0x400_0040 as *mut u16); - -/// Window 1 Horizontal Dimensions -pub const WIN1H: VolatilePtr = VolatilePtr(0x400_0042 as *mut u16); - -/// Window 0 Vertical Dimensions -pub const WIN0V: VolatilePtr = VolatilePtr(0x400_0044 as *mut u16); - -/// Window 1 Vertical Dimensions -pub const WIN1V: VolatilePtr = VolatilePtr(0x400_0046 as *mut u16); - -/// Inside of Window 0 and 1 -pub const WININ: VolatilePtr = VolatilePtr(0x400_0048 as *mut u16); - -/// Inside of OBJ Window & Outside of Windows -pub const WINOUT: VolatilePtr = VolatilePtr(0x400_004A as *mut u16); - -/// Mosaic Size -pub const MOSAIC: VolatilePtr = VolatilePtr(0x400_004C as *mut u16); - -/// Color Special Effects Selection -pub const BLDCNT: VolatilePtr = VolatilePtr(0x400_0050 as *mut u16); - -/// Alpha Blending Coefficients -pub const BLDALPHA: VolatilePtr = VolatilePtr(0x400_0052 as *mut u16); - -/// Brightness (Fade-In/Out) Coefficient -pub const BLDY: VolatilePtr = VolatilePtr(0x400_0054 as *mut u16); - -/// Channel 1 Sweep register (NR10) -pub const UND1CNT_L: VolatilePtr = VolatilePtr(0x400_0060 as *mut u16); - -/// Channel 1 Duty/Length/Envelope (NR11, NR12) -pub const UND1CNT_H: VolatilePtr = VolatilePtr(0x400_0062 as *mut u16); - -/// Channel 1 Frequency/Control (NR13, NR14) -pub const UND1CNT_X: VolatilePtr = VolatilePtr(0x400_0064 as *mut u16); - -/// Channel 2 Duty/Length/Envelope (NR21, NR22) -pub const UND2CNT_L: VolatilePtr = VolatilePtr(0x400_0068 as *mut u16); - -/// Channel 2 Frequency/Control (NR23, NR24) -pub const UND2CNT_H: VolatilePtr = VolatilePtr(0x400_006C as *mut u16); - -/// Channel 3 Stop/Wave RAM select (NR30) -pub const UND3CNT_L: VolatilePtr = VolatilePtr(0x400_0070 as *mut u16); - -/// Channel 3 Length/Volume (NR31, NR32) -pub const UND3CNT_H: VolatilePtr = VolatilePtr(0x400_0072 as *mut u16); - -/// Channel 3 Frequency/Control (NR33, NR34) -pub const UND3CNT_X: VolatilePtr = VolatilePtr(0x400_0074 as *mut u16); - -/// Channel 4 Length/Envelope (NR41, NR42) -pub const UND4CNT_L: VolatilePtr = VolatilePtr(0x400_0078 as *mut u16); - -/// Channel 4 Frequency/Control (NR43, NR44) -pub const UND4CNT_H: VolatilePtr = VolatilePtr(0x400_007C as *mut u16); - -/// Control Stereo/Volume/Enable (NR50, NR51) -pub const UNDCNT_L: VolatilePtr = VolatilePtr(0x400_0080 as *mut u16); - -/// Control Mixing/DMA Control -pub const UNDCNT_H: VolatilePtr = VolatilePtr(0x400_0082 as *mut u16); - -/// Control Sound on/off (NR52) -pub const UNDCNT_X: VolatilePtr = VolatilePtr(0x400_0084 as *mut u16); - -/// Sound PWM Control -pub const UNDBIAS: VolatilePtr = VolatilePtr(0x400_0088 as *mut u16); - -/// Channel 3 Wave Pattern RAM (W/R) -pub const WAVE_RAM0_L: VolatilePtr = VolatilePtr(0x400_0090 as *mut u16); - -/// Channel 3 Wave Pattern RAM (W/R) -pub const WAVE_RAM0_H: VolatilePtr = VolatilePtr(0x400_0092 as *mut u16); - -/// Channel 3 Wave Pattern RAM (W/R) -pub const WAVE_RAM1_L: VolatilePtr = VolatilePtr(0x400_0094 as *mut u16); - -/// Channel 3 Wave Pattern RAM (W/R) -pub const WAVE_RAM1_H: VolatilePtr = VolatilePtr(0x400_0096 as *mut u16); - -/// Channel 3 Wave Pattern RAM (W/R) -pub const WAVE_RAM2_L: VolatilePtr = VolatilePtr(0x400_0098 as *mut u16); - -/// Channel 3 Wave Pattern RAM (W/R) -pub const WAVE_RAM2_H: VolatilePtr = VolatilePtr(0x400_009A as *mut u16); - -/// Channel 3 Wave Pattern RAM (W/R) -pub const WAVE_RAM3_L: VolatilePtr = VolatilePtr(0x400_009C as *mut u16); - -/// Channel 3 Wave Pattern RAM (W/R) -pub const WAVE_RAM3_H: VolatilePtr = VolatilePtr(0x400_009E as *mut u16); - -/// Channel A FIFO, Data 0-3 -pub const FIFO_A: VolatilePtr = VolatilePtr(0x400_00A0 as *mut u32); - -/// Channel B FIFO, Data 0-3 -pub const FIFO_B: VolatilePtr = VolatilePtr(0x400_00A4 as *mut u32); - -/// DMA 0 Source Address -pub const DMA0SAD: VolatilePtr = VolatilePtr(0x400_00B0 as *mut u32); - -/// DMA 0 Destination Address -pub const DMA0DAD: VolatilePtr = VolatilePtr(0x400_00B4 as *mut u32); - -/// DMA 0 Word Count -pub const DMA0CNT_L: VolatilePtr = VolatilePtr(0x400_00B8 as *mut u16); - -/// DMA 0 Control -pub const DMA0CNT_H: VolatilePtr = VolatilePtr(0x400_00BA as *mut u16); - -/// DMA 1 Source Address -pub const DMA1SAD: VolatilePtr = VolatilePtr(0x400_00BC as *mut u32); - -/// DMA 1 Destination Address -pub const DMA1DAD: VolatilePtr = VolatilePtr(0x400_00C0 as *mut u32); - -/// DMA 1 Word Count -pub const DMA1CNT_L: VolatilePtr = VolatilePtr(0x400_00C4 as *mut u16); - -/// DMA 1 Control -pub const DMA1CNT_H: VolatilePtr = VolatilePtr(0x400_00C6 as *mut u16); - -/// DMA 2 Source Address -pub const DMA2SAD: VolatilePtr = VolatilePtr(0x400_00C8 as *mut u32); - -/// DMA 2 Destination Address -pub const DMA2DAD: VolatilePtr = VolatilePtr(0x400_00CC as *mut u32); - -/// DMA 2 Word Count -pub const DMA2CNT_L: VolatilePtr = VolatilePtr(0x400_00D0 as *mut u16); - -/// DMA 2 Control -pub const DMA2CNT_H: VolatilePtr = VolatilePtr(0x400_00D2 as *mut u16); - -/// DMA 3 Source Address -pub const DMA3SAD: VolatilePtr = VolatilePtr(0x400_00D4 as *mut u32); - -/// DMA 3 Destination Address -pub const DMA3DAD: VolatilePtr = VolatilePtr(0x400_00D8 as *mut u32); - -/// DMA 3 Word Count -pub const DMA3CNT_L: VolatilePtr = VolatilePtr(0x400_00DC as *mut u16); - -/// DMA 3 Control -pub const DMA3CNT_H: VolatilePtr = VolatilePtr(0x400_00DE as *mut u16); - -/// Timer 0 Counter/Reload -pub const TM0D: VolatilePtr = VolatilePtr(0x400_0100 as *mut u16); - -/// Timer 0 Control -pub const TM0CNT: VolatilePtr = VolatilePtr(0x400_0102 as *mut u16); - -/// Timer 1 Counter/Reload -pub const TM1D: VolatilePtr = VolatilePtr(0x400_0104 as *mut u16); - -/// Timer 1 Control -pub const TM1CNT: VolatilePtr = VolatilePtr(0x400_0106 as *mut u16); - -/// Timer 2 Counter/Reload -pub const TM2D: VolatilePtr = VolatilePtr(0x400_0108 as *mut u16); - -/// Timer 2 Control -pub const TM2CNT: VolatilePtr = VolatilePtr(0x400_010A as *mut u16); - -/// Timer 3 Counter/Reload -pub const TM3D: VolatilePtr = VolatilePtr(0x400_010C as *mut u16); - -/// Timer 3 Control -pub const TM3CNT: VolatilePtr = VolatilePtr(0x400_010E as *mut u16); - -/// SIO Data (Normal-32bit Mode; shared with below) -pub const SIODATA32: VolatilePtr = VolatilePtr(0x400_0120 as *mut u32); - -/// SIO Data 0 (Parent) (Multi-Player Mode) -pub const SIOMULTI0: VolatilePtr = VolatilePtr(0x400_0120 as *mut u16); - -/// SIO Data 1 (1st Child) (Multi-Player Mode) -pub const SIOMULTI1: VolatilePtr = VolatilePtr(0x400_0122 as *mut u16); - -/// SIO Data 2 (2nd Child) (Multi-Player Mode) -pub const SIOMULTI2: VolatilePtr = VolatilePtr(0x400_0124 as *mut u16); - -/// SIO Data 3 (3rd Child) (Multi-Player Mode) -pub const SIOMULTI3: VolatilePtr = VolatilePtr(0x400_0126 as *mut u16); - -/// SIO Control Register -pub const SIOCNT: VolatilePtr = VolatilePtr(0x400_0128 as *mut u16); - -/// D SIO Data (Local of MultiPlayer; shared below) -pub const SIOMLT_SEN: VolatilePtr = VolatilePtr(0x400_012A as *mut u16); - -/// SIO Data (Normal-8bit and UART Mode) -pub const SIODATA8: VolatilePtr = VolatilePtr(0x400_012A as *mut u16); - /// Key Status -pub const KEYINPUT: VolatilePtr = VolatilePtr(0x400_0130 as *mut u16); +const KEYINPUT: VolAddress = unsafe { VolAddress::new_unchecked(0x400_0130) }; /// A "tribool" value helps us interpret the arrow pad. #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -467,35 +192,5 @@ pub fn key_input() -> KeyInput { // Note(Lokathor): The 10 used bits are "low when pressed" style, but the 6 // unused bits are always low, so we XOR with this mask to get a result where // the only active bits are currently pressed keys. - unsafe { KeyInput(KEYINPUT.read() ^ 0b0000_0011_1111_1111) } + KeyInput(KEYINPUT.read() ^ 0b0000_0011_1111_1111) } - -/// Key Interrupt Control -pub const KEYCNT: VolatilePtr = VolatilePtr(0x400_0132 as *mut u16); - -/// SIO Mode Select/General Purpose Data -pub const RCNT: VolatilePtr = VolatilePtr(0x400_0134 as *mut u16); - -/// SIO JOY Bus Control -pub const JOYCNT: VolatilePtr = VolatilePtr(0x400_0140 as *mut u16); - -/// SIO JOY Bus Receive Data -pub const JOY_RECV: VolatilePtr = VolatilePtr(0x400_0150 as *mut u32); - -/// SIO JOY Bus Transmit Data -pub const JOY_TRANS: VolatilePtr = VolatilePtr(0x400_0154 as *mut u32); - -/// SIO JOY Bus Receive Status -pub const JOYSTAT: VolatilePtr = VolatilePtr(0x400_0158 as *mut u16); - -/// Interrupt Enable Register -pub const IE: VolatilePtr = VolatilePtr(0x400_0200 as *mut u16); - -/// Interrupt Request Flags / IRQ Acknowledge -pub const IF: VolatilePtr = VolatilePtr(0x400_0202 as *mut u16); - -/// Game Pak Waitstate Control -pub const WAITCNT: VolatilePtr = VolatilePtr(0x400_0204 as *mut u16); - -/// Interrupt Master Enable Register -pub const IME: VolatilePtr = VolatilePtr(0x400_0208 as *mut u16); diff --git a/src/lib.rs b/src/lib.rs index c172301..1ddf7d0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,7 @@ -#![no_std] +#![cfg_attr(not(test), no_std)] #![feature(asm)] +#![feature(const_int_wrapping)] +#![feature(min_const_unsafe_fn)] #![warn(missing_docs)] #![allow(clippy::cast_lossless)] #![deny(clippy::float_arithmetic)] @@ -68,3 +70,159 @@ pub mod io_registers; pub mod video_ram; pub(crate) use crate::video_ram::*; + +/// Performs unsigned divide and remainder, gives None if dividing by 0. +pub fn divrem_u32(numer: u32, denom: u32) -> Option<(u32, u32)> { + if denom == 0 { + None + } else { + Some(unsafe { divrem_u32_unchecked(numer, denom) }) + } +} + +/// Performs divide and remainder, no check for 0 division. +/// +/// # Safety +/// +/// If you call this with a denominator of 0 the result is implementation +/// defined (not literal UB) including but not limited to: an infinite loop, +/// panic on overflow, or incorrect output. +pub unsafe fn divrem_u32_unchecked(numer: u32, denom: u32) -> (u32, u32) { + if (numer >> 5) < denom { + divrem_u32_simple(numer, denom) + } else { + divrem_u32_non_restoring(numer, denom) + } +} + +/// The simplest form of division. If N is too much larger than D this will be +/// extremely slow. If N is close enough to D then it will likely be faster than +/// the non_restoring form. +fn divrem_u32_simple(mut numer: u32, denom: u32) -> (u32, u32) { + let mut quot = 0; + while numer >= denom { + numer -= denom; + quot += 1; + } + (quot, numer) +} + +/// Takes a fixed quantity of time based on the bit width of the number (in this +/// case 32). +fn divrem_u32_non_restoring(numer: u32, denom: u32) -> (u32, u32) { + let mut r: i64 = numer as i64; + let d: i64 = (denom as i64) << 32; + let mut q: u32 = 0; + let mut i = 1 << 31; + while i > 0 { + if r >= 0 { + q |= i; + r = 2 * r - d; + } else { + r = 2 * r + d; + } + i >>= 1; + } + q = q - !q; + if r < 0 { + q = q - 1; + r = r + d; + } + r = r >> 32; + debug_assert!(r >= 0); + debug_assert!(r <= core::u32::MAX as i64); + (q, r as u32) +} + +/// Performs signed divide and remainder, gives None if dividing by 0 or +/// computing `MIN/-1` +pub fn divrem_i32(numer: i32, denom: i32) -> Option<(i32, i32)> { + if denom == 0 || (numer == core::i32::MIN && denom == -1) { + None + } else { + Some(unsafe { divrem_i32_unchecked(numer, denom) }) + } +} + +/// Performs signed divide and remainder, no check for 0 division or `MIN/-1`. +/// +/// # Safety +/// +/// * If you call this with a denominator of 0 the result is implementation +/// defined (not literal UB) including but not limited to: an infinite loop, +/// panic on overflow, or incorrect output. +/// * If you call this with `MIN/-1` you'll get a panic in debug or just `MIN` +/// in release (which is incorrect), because of how twos-compliment works. +pub unsafe fn divrem_i32_unchecked(numer: i32, denom: i32) -> (i32, i32) { + let unsigned_numer = numer.abs() as u32; + let unsigned_denom = denom.abs() as u32; + let opposite_sign = (numer ^ denom) < 0; + let (udiv, urem) = if (numer >> 5) < denom { + divrem_u32_simple(unsigned_numer, unsigned_denom) + } else { + divrem_u32_non_restoring(unsigned_numer, unsigned_denom) + }; + if opposite_sign { + if numer < 0 { + (-(udiv as i32), -(urem as i32)) + } else { + (-(udiv as i32), urem as i32) + } + } else { + if numer < 0 { + (udiv as i32, -(urem as i32)) + } else { + (udiv as i32, urem as i32) + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use quickcheck::quickcheck; + + // We have an explicit property on the non_restoring division + quickcheck! { + fn divrem_u32_non_restoring_prop(num: u32, denom: u32) -> bool { + if denom > 0 { + divrem_u32_non_restoring(num, denom) == (num / denom, num % denom) + } else { + true + } + } + } + + // We have an explicit property on the simple division + quickcheck! { + fn divrem_u32_simple_prop(num: u32, denom: u32) -> bool { + if denom > 0 { + divrem_u32_simple(num, denom) == (num / denom, num % denom) + } else { + true + } + } + } + + // Test the u32 wrapper + quickcheck! { + fn divrem_u32_prop(num: u32, denom: u32) -> bool { + if denom > 0 { + divrem_u32(num, denom).unwrap() == (num / denom, num % denom) + } else { + divrem_u32(num, denom).is_none() + } + } + } + + // test the i32 wrapper + quickcheck! { + fn divrem_i32_prop(num: i32, denom: i32) -> bool { + if denom == 0 || num == core::i32::MIN && denom == -1 { + divrem_i32(num, denom).is_none() + } else { + divrem_i32(num, denom).unwrap() == (num / denom, num % denom) + } + } + } +} diff --git a/src/video_ram.rs b/src/video_ram.rs index fd47355..232249f 100644 --- a/src/video_ram.rs +++ b/src/video_ram.rs @@ -30,6 +30,8 @@ pub const SCREEN_HEIGHT: isize = 160; /// value as just being a `usize`. pub const VRAM_BASE_ADDRESS: usize = 0x0600_0000; +const MODE3_VRAM: VolAddress = unsafe { VolAddress::new_unchecked(VRAM_BASE_ADDRESS) }; + /// Draws a pixel to the screen while in Display Mode 3, with bounds checks. /// /// # Panics @@ -53,7 +55,7 @@ pub fn mode3_draw_pixel(col: isize, row: isize, color: u16) { /// * `col` must be in `0..SCREEN_WIDTH` /// * `row` must be in `0..SCREEN_HEIGHT` pub unsafe fn mode3_draw_pixel_unchecked(col: isize, row: isize, color: u16) { - VolatilePtr(VRAM_BASE_ADDRESS as *mut u16).offset(col + row * SCREEN_WIDTH).write(color); + MODE3_VRAM.offset(col + row * SCREEN_WIDTH).write(color); } /// Reads the given pixel of video memory according to Mode 3 placement. @@ -63,7 +65,7 @@ pub unsafe fn mode3_draw_pixel_unchecked(col: isize, row: isize, color: u16) { /// If the location is out of bounds you get `None`. pub fn mode3_read_pixel(col: isize, row: isize) -> Option { if col >= 0 && col < SCREEN_WIDTH && row >= 0 && row < SCREEN_HEIGHT { - unsafe { Some(VolatilePtr(VRAM_BASE_ADDRESS as *mut u16).offset(col + row * SCREEN_WIDTH).read()) } + unsafe { Some(MODE3_VRAM.offset(col + row * SCREEN_WIDTH).read()) } } else { None } @@ -74,9 +76,8 @@ pub unsafe fn mode3_clear_screen(color: u16) { // TODO: use DMA? let color = color as u32; let bulk_color = color << 16 | color; - let mut ptr = VolatilePtr(VRAM_BASE_ADDRESS as *mut u32); - for _ in 0..(SCREEN_HEIGHT * SCREEN_WIDTH / 2) { - ptr.write(bulk_color); - ptr = ptr.offset(1); + let block: VolAddressBlock = VolAddressBlock::new_unchecked(MODE3_VRAM.cast::(), (SCREEN_HEIGHT * SCREEN_WIDTH / 2) as usize); + for b in block.iter() { + b.write(bulk_color); } } From 1ff25ca2b79cbbfde53ff1a49572728f8230c8dc Mon Sep 17 00:00:00 2001 From: Lokathor Date: Wed, 19 Dec 2018 21:04:28 -0700 Subject: [PATCH 30/33] whoops --- src/builtins.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/builtins.rs b/src/builtins.rs index bcda21f..1ced79f 100644 --- a/src/builtins.rs +++ b/src/builtins.rs @@ -5,7 +5,6 @@ //! You shouldn't need to call anything in here yourself, it just has to be in //! the translation unit and LLVM will find it. -//TODO: make 64 bit too #[no_mangle] #[cfg(any(target_pointer_width = "16", target_pointer_width = "32", target_pointer_width = "64"))] pub extern "C" fn __clzsi2(mut x: usize) -> usize { @@ -32,7 +31,7 @@ pub extern "C" fn __clzsi2(mut x: usize) -> usize { x = y; } } - #[cfg(target_pointer_width = "32")] + #[cfg(any(target_pointer_width = "32", target_pointer_width = "64"))] { y = x >> 16; if y != 0 { From fdf0eebb69759c27a2c83e0c7fe554c6a162b47c Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 20 Dec 2018 15:30:08 -0700 Subject: [PATCH 31/33] Drop the quickcheck usage Some day we can split the software math into its own crate and do quickcheck there, until then the test code must be fully no_std because otherwise the examples won't build (sadly, dev-dependencies applies to both tests and examples) --- Cargo.toml | 5 +++-- src/builtins.rs | 1 + src/core_extras.rs | 30 +++++++++++++++--------------- src/lib.rs | 8 ++++++++ 4 files changed, 27 insertions(+), 17 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2e65f0c..400c908 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,8 +15,9 @@ publish = false typenum = "1.10" gba-proc-macro = "0.2.1" -[dev-dependencies] -quickcheck="0.7" +#[dev-dependencies] +#quickcheck="0.7" +# TODO: F [profile.release] lto = true diff --git a/src/builtins.rs b/src/builtins.rs index 1ced79f..931ed96 100644 --- a/src/builtins.rs +++ b/src/builtins.rs @@ -8,6 +8,7 @@ #[no_mangle] #[cfg(any(target_pointer_width = "16", target_pointer_width = "32", target_pointer_width = "64"))] pub extern "C" fn __clzsi2(mut x: usize) -> usize { + // TODO: const this? Requires const if let mut y: usize; let mut n: usize = { #[cfg(target_pointer_width = "64")] diff --git a/src/core_extras.rs b/src/core_extras.rs index 1e8bd96..679b42f 100644 --- a/src/core_extras.rs +++ b/src/core_extras.rs @@ -1,6 +1,6 @@ //! Things that I wish were in core, but aren't. -//TODO(Lokathor): reorganize as gba::core::fixed and gba::core::volatile ? +//TODO(Lokathor): reorganize as gba::core_extras::fixed_point and gba::core_extras::volatile ? use core::{cmp::Ordering, iter::FusedIterator, marker::PhantomData, num::NonZeroUsize}; @@ -263,12 +263,11 @@ impl VolAddressBlock { VolAddressBlock { vol_address, slots } } - /// Checked "indexing" style access of the block, giving either a `VolAddress` or a panic. - pub fn index(self, slot: usize) -> VolAddress { - if slot < self.slots { - unsafe { self.vol_address.offset(slot as isize) } - } else { - panic!("Index Requested: {} >= Bound: {}", slot, self.slots) + /// Gives an iterator over this block's slots. + pub const fn iter(self) -> VolAddressIter { + VolAddressIter { + vol_address: self.vol_address, + slots: self.slots, } } @@ -282,6 +281,15 @@ impl VolAddressBlock { self.vol_address.offset(slot as isize) } + /// Checked "indexing" style access of the block, giving either a `VolAddress` or a panic. + pub fn index(self, slot: usize) -> VolAddress { + if slot < self.slots { + unsafe { self.vol_address.offset(slot as isize) } + } else { + panic!("Index Requested: {} >= Bound: {}", slot, self.slots) + } + } + /// Checked "getting" style access of the block, giving an Option value. pub fn get(self, slot: usize) -> Option> { if slot < self.slots { @@ -290,12 +298,4 @@ impl VolAddressBlock { None } } - - /// Gives an iterator over the block's slots. - pub const fn iter(self) -> VolAddressIter { - VolAddressIter { - vol_address: self.vol_address, - slots: self.slots, - } - } } diff --git a/src/lib.rs b/src/lib.rs index 1ddf7d0..677c574 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -73,6 +73,7 @@ pub(crate) use crate::video_ram::*; /// Performs unsigned divide and remainder, gives None if dividing by 0. pub fn divrem_u32(numer: u32, denom: u32) -> Option<(u32, u32)> { + // TODO: const this? Requires const if if denom == 0 { None } else { @@ -88,6 +89,7 @@ pub fn divrem_u32(numer: u32, denom: u32) -> Option<(u32, u32)> { /// defined (not literal UB) including but not limited to: an infinite loop, /// panic on overflow, or incorrect output. pub unsafe fn divrem_u32_unchecked(numer: u32, denom: u32) -> (u32, u32) { + // TODO: const this? Requires const if if (numer >> 5) < denom { divrem_u32_simple(numer, denom) } else { @@ -99,6 +101,7 @@ pub unsafe fn divrem_u32_unchecked(numer: u32, denom: u32) -> (u32, u32) { /// extremely slow. If N is close enough to D then it will likely be faster than /// the non_restoring form. fn divrem_u32_simple(mut numer: u32, denom: u32) -> (u32, u32) { + // TODO: const this? Requires const if let mut quot = 0; while numer >= denom { numer -= denom; @@ -110,6 +113,7 @@ fn divrem_u32_simple(mut numer: u32, denom: u32) -> (u32, u32) { /// Takes a fixed quantity of time based on the bit width of the number (in this /// case 32). fn divrem_u32_non_restoring(numer: u32, denom: u32) -> (u32, u32) { + // TODO: const this? Requires const if let mut r: i64 = numer as i64; let d: i64 = (denom as i64) << 32; let mut q: u32 = 0; @@ -129,6 +133,7 @@ fn divrem_u32_non_restoring(numer: u32, denom: u32) -> (u32, u32) { r = r + d; } r = r >> 32; + // TODO: remove this once we've done more checks here. debug_assert!(r >= 0); debug_assert!(r <= core::u32::MAX as i64); (q, r as u32) @@ -154,6 +159,7 @@ pub fn divrem_i32(numer: i32, denom: i32) -> Option<(i32, i32)> { /// * If you call this with `MIN/-1` you'll get a panic in debug or just `MIN` /// in release (which is incorrect), because of how twos-compliment works. pub unsafe fn divrem_i32_unchecked(numer: i32, denom: i32) -> (i32, i32) { + // TODO: const this? Requires const if let unsigned_numer = numer.abs() as u32; let unsigned_denom = denom.abs() as u32; let opposite_sign = (numer ^ denom) < 0; @@ -177,6 +183,7 @@ pub unsafe fn divrem_i32_unchecked(numer: i32, denom: i32) -> (i32, i32) { } } +/* #[cfg(test)] mod tests { use super::*; @@ -226,3 +233,4 @@ mod tests { } } } +*/ From 8ede9f524dd01f2903bd3fe32216067f2bc0daa6 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 20 Dec 2018 16:15:23 -0700 Subject: [PATCH 32/33] begin IO Register classification, start with KEYINPUT --- .travis.yml | 8 +- Makefile.toml | 14 ++-- book/src/04-non-video/01-buttons.md | 4 + src/builtins.rs | 10 +-- src/fixed.rs | 4 + src/io.rs | 13 +++ src/io/keypad.rs | 121 ++++++++++++++++++++++++++++ src/io_registers.rs | 70 ---------------- src/lib.rs | 28 +++---- 9 files changed, 169 insertions(+), 103 deletions(-) create mode 100644 src/io.rs create mode 100644 src/io/keypad.rs diff --git a/.travis.yml b/.travis.yml index e22f99c..73b5c91 100644 --- a/.travis.yml +++ b/.travis.yml @@ -27,13 +27,15 @@ script: - export PATH="$PATH:/opt/devkitpro/devkitARM/bin" - export PATH="$PATH:/opt/devkitpro/tools/bin" - cd .. - # Run all tests, both modes + # Run all verificaions, both debug and release + - cargo clippy + - cargo clippy --release - cargo test --no-fail-fast --lib - cargo test --no-fail-fast --lib --release - cargo test --no-fail-fast --tests - cargo test --no-fail-fast --tests --release - # cargo make defaults to both debug and release builds of all examples - - cargo make + # Let cargo make take over the rest + - cargo make build-all # Test build the book so that a failed book build kills this run - cd book && mdbook build diff --git a/Makefile.toml b/Makefile.toml index 5371b95..7c937ec 100644 --- a/Makefile.toml +++ b/Makefile.toml @@ -55,15 +55,15 @@ fn main() -> std::io::Result<()> { ''' ] -[tasks.build] -dependencies = ["build-examples-debug", "build-examples-release", "pack-roms"] - -[tasks.justrelease] -dependencies = ["build-examples-release", "pack-roms"] - [tasks.test] command = "cargo" args = ["test", "--lib"] +[tasks.justrelease] +dependencies = ["build-examples-release", "pack-roms"] + +[tasks.build-all] +dependencies = ["build-examples-debug", "build-examples-release", "pack-roms"] + [tasks.default] -alias = "build" +alias = "build-all" diff --git a/book/src/04-non-video/01-buttons.md b/book/src/04-non-video/01-buttons.md index 8694b48..8eb4e80 100644 --- a/book/src/04-non-video/01-buttons.md +++ b/book/src/04-non-video/01-buttons.md @@ -1 +1,5 @@ # Buttons + +It's all well and good to just show a picture, even to show an animation, but if +we want a game we have to let the user interact with something. + diff --git a/src/builtins.rs b/src/builtins.rs index 931ed96..db3615e 100644 --- a/src/builtins.rs +++ b/src/builtins.rs @@ -28,7 +28,7 @@ pub extern "C" fn __clzsi2(mut x: usize) -> usize { { y = x >> 32; if y != 0 { - n = n - 32; + n -= 32; x = y; } } @@ -36,23 +36,23 @@ pub extern "C" fn __clzsi2(mut x: usize) -> usize { { y = x >> 16; if y != 0 { - n = n - 16; + n -= 16; x = y; } } y = x >> 8; if y != 0 { - n = n - 8; + n -= 8; x = y; } y = x >> 4; if y != 0 { - n = n - 4; + n -= 4; x = y; } y = x >> 2; if y != 0 { - n = n - 2; + n -= 2; x = y; } y = x >> 1; diff --git a/src/fixed.rs b/src/fixed.rs index 9aac415..1e66998 100644 --- a/src/fixed.rs +++ b/src/fixed.rs @@ -135,6 +135,7 @@ macro_rules! fixed_point_signed_multiply { ($t:ident) => { impl Mul for Fx<$t, F> { type Output = Self; + #[allow(clippy::suspicious_arithmetic_impl)] fn mul(self, rhs: Fx<$t, F>) -> Self::Output { let pre_shift = (self.num as i32).wrapping_mul(rhs.num as i32); if pre_shift < 0 { @@ -168,6 +169,7 @@ macro_rules! fixed_point_unsigned_multiply { ($t:ident) => { impl Mul for Fx<$t, F> { type Output = Self; + #[allow(clippy::suspicious_arithmetic_impl)] fn mul(self, rhs: Fx<$t, F>) -> Self::Output { Fx { num: ((self.num as u32).wrapping_mul(rhs.num as u32) >> F::U8) as $t, @@ -186,6 +188,7 @@ macro_rules! fixed_point_signed_division { ($t:ident) => { impl Div for Fx<$t, F> { type Output = Self; + #[allow(clippy::suspicious_arithmetic_impl)] fn div(self, rhs: Fx<$t, F>) -> Self::Output { let mul_output: i32 = (self.num as i32).wrapping_mul(1 << F::U8); let divide_result: i32 = crate::bios::div(mul_output, rhs.num as i32); @@ -206,6 +209,7 @@ macro_rules! fixed_point_unsigned_division { ($t:ident) => { impl Div for Fx<$t, F> { type Output = Self; + #[allow(clippy::suspicious_arithmetic_impl)] fn div(self, rhs: Fx<$t, F>) -> Self::Output { let mul_output: i32 = (self.num as i32).wrapping_mul(1 << F::U8); let divide_result: i32 = crate::bios::div(mul_output, rhs.num as i32); diff --git a/src/io.rs b/src/io.rs new file mode 100644 index 0000000..5149b5a --- /dev/null +++ b/src/io.rs @@ -0,0 +1,13 @@ +//! This module contains definitions and types for the IO Registers. +//! +//! ## Naming +//! +//! In the interest of making things easy to search for, all io register +//! constants are given the names used in the +//! [GBATEK](https://problemkaputt.de/gbatek.htm) technical description. + +use super::*; + +use gba_proc_macro::register_bit; + +pub mod keypad; diff --git a/src/io/keypad.rs b/src/io/keypad.rs new file mode 100644 index 0000000..d03d242 --- /dev/null +++ b/src/io/keypad.rs @@ -0,0 +1,121 @@ +//! Allows access to the keypad. + +use super::*; + +/// The Key Input Register. +/// +/// This register follows the "low-active" convention. If you want your code to +/// follow the "high-active" convention (hint: you probably do, it's far easier +/// to work with) then call `read_key_input()` rather than reading this register +/// directly. It will perform the necessary bit flip operation for you. +pub const KEYINPUT: VolAddress = unsafe { VolAddress::new_unchecked(0x400_0130) }; + +/// A "tribool" value helps us interpret the arrow pad. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(i32)] +#[allow(missing_docs)] +pub enum TriBool { + Minus = -1, + Neutral = 0, + Plus = 1, +} + +newtype! { + /// Records a particular key press combination. + /// + /// Methods here follow the "high-active" convention, where a bit is enabled + /// when it's part of the set. + #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] + KeyInput, u16 +} + +#[allow(missing_docs)] +impl KeyInput { + register_bit!(A_BIT, u16, 1, a_pressed); + register_bit!(B_BIT, u16, 1 << 1, b_pressed); + register_bit!(SELECT_BIT, u16, 1 << 2, select_pressed); + register_bit!(START_BIT, u16, 1 << 3, start_pressed); + register_bit!(RIGHT_BIT, u16, 1 << 4, right_pressed); + register_bit!(LEFT_BIT, u16, 1 << 5, left_pressed); + register_bit!(UP_BIT, u16, 1 << 6, up_pressed); + register_bit!(DOWN_BIT, u16, 1 << 7, down_pressed); + register_bit!(R_BIT, u16, 1 << 8, r_pressed); + register_bit!(L_BIT, u16, 1 << 9, l_pressed); + + /// Takes the set difference between these keys and another set of keys. + pub fn difference(self, other: Self) -> Self { + KeyInput(self.0 ^ other.0) + } + + /// Gives the arrow pad value as a tribool, with Plus being increased column + /// value (right). + pub fn column_direction(self) -> TriBool { + if self.right_pressed() { + TriBool::Plus + } else if self.left_pressed() { + TriBool::Minus + } else { + TriBool::Neutral + } + } + + /// Gives the arrow pad value as a tribool, with Plus being increased row + /// value (down). + pub fn row_direction(self) -> TriBool { + if self.down_pressed() { + TriBool::Plus + } else if self.up_pressed() { + TriBool::Minus + } else { + TriBool::Neutral + } + } +} + +/// Gets the current state of the keys +pub fn read_key_input() -> KeyInput { + // Note(Lokathor): The 10 used bits are "low when pressed" style, but the 6 + // unused bits are always low, so we XOR with this mask to get a result where + // the only active bits are currently pressed keys. + KeyInput(KEYINPUT.read() ^ 0b0000_0011_1111_1111) +} + +newtype! { + /// Allows configuration of when a keypad interrupt fires. + /// + /// * The most important bit here is the `irq_enabled` bit, which determines + /// if an interrupt happens at all. + /// * The second most important bit is the `irq_logical_and` bit. If this bit + /// is set, _all_ the selected buttons are required to be set for the + /// interrupt to be fired (logical AND). If it's not set then _any_ of the + /// buttons selected can be pressed to fire the interrupt (logical OR). + /// * All other bits select a particular button to be required or not as part + /// of the interrupt firing. + /// + /// NOTE: This _only_ configures the operation of when keypad interrupts can + /// fire. You must still set the `IME` to have interrupts at all, and you must + /// further set `IE` for keypad interrupts to be possible. + #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] + KeyInterruptSetting, u16 +} +#[allow(missing_docs)] +impl KeyInterruptSetting { + register_bit!(A_BIT, u16, 1, a_pressed); + register_bit!(B_BIT, u16, 1 << 1, b_pressed); + register_bit!(SELECT_BIT, u16, 1 << 2, select_pressed); + register_bit!(START_BIT, u16, 1 << 3, start_pressed); + register_bit!(RIGHT_BIT, u16, 1 << 4, right_pressed); + register_bit!(LEFT_BIT, u16, 1 << 5, left_pressed); + register_bit!(UP_BIT, u16, 1 << 6, up_pressed); + register_bit!(DOWN_BIT, u16, 1 << 7, down_pressed); + register_bit!(R_BIT, u16, 1 << 8, r_pressed); + register_bit!(L_BIT, u16, 1 << 9, l_pressed); + // + register_bit!(IRQ_ENABLE_BIT, u16, 1 << 14, irq_enabled); + register_bit!(IRQ_AND_BIT, u16, 1 << 15, irq_logical_and); +} + +/// Use this to configure when a keypad interrupt happens. +/// +/// See the `KeyInterruptSetting` type for more. +pub const KEYCNT: VolAddress = unsafe { VolAddress::new_unchecked(0x400_0132) }; diff --git a/src/io_registers.rs b/src/io_registers.rs index 9bdc7fb..4ba89dc 100644 --- a/src/io_registers.rs +++ b/src/io_registers.rs @@ -124,73 +124,3 @@ pub fn wait_until_vdraw() { // TODO: make this the better version with BIOS and interrupts and such. while vcount() >= SCREEN_HEIGHT as u16 {} } - -/// Key Status -const KEYINPUT: VolAddress = unsafe { VolAddress::new_unchecked(0x400_0130) }; - -/// A "tribool" value helps us interpret the arrow pad. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -#[repr(i32)] -#[allow(missing_docs)] -pub enum TriBool { - Minus = -1, - Neutral = 0, - Plus = 1, -} - -newtype! { - /// Records a particular key press combination. - #[derive(Debug, Copy, Clone, Default, PartialEq, Eq)] - KeyInput, u16 -} - -#[allow(missing_docs)] -impl KeyInput { - register_bit!(A_BIT, u16, 1, a_pressed); - register_bit!(B_BIT, u16, 1 << 1, b_pressed); - register_bit!(SELECT_BIT, u16, 1 << 2, select_pressed); - register_bit!(START_BIT, u16, 1 << 3, start_pressed); - register_bit!(RIGHT_BIT, u16, 1 << 4, right_pressed); - register_bit!(LEFT_BIT, u16, 1 << 5, left_pressed); - register_bit!(UP_BIT, u16, 1 << 6, up_pressed); - register_bit!(DOWN_BIT, u16, 1 << 7, down_pressed); - register_bit!(R_BIT, u16, 1 << 8, r_pressed); - register_bit!(L_BIT, u16, 1 << 9, l_pressed); - - /// Takes the difference between these keys and another set of keys. - pub fn difference(self, other: Self) -> Self { - KeyInput(self.0 ^ other.0) - } - - /// Gives the arrow pad value as a tribool, with Plus being increased column - /// value (right). - pub fn column_direction(self) -> TriBool { - if self.right_pressed() { - TriBool::Plus - } else if self.left_pressed() { - TriBool::Minus - } else { - TriBool::Neutral - } - } - - /// Gives the arrow pad value as a tribool, with Plus being increased row - /// value (down). - pub fn row_direction(self) -> TriBool { - if self.down_pressed() { - TriBool::Plus - } else if self.up_pressed() { - TriBool::Minus - } else { - TriBool::Neutral - } - } -} - -/// Gets the current state of the keys -pub fn key_input() -> KeyInput { - // Note(Lokathor): The 10 used bits are "low when pressed" style, but the 6 - // unused bits are always low, so we XOR with this mask to get a result where - // the only active bits are currently pressed keys. - KeyInput(KEYINPUT.read() ^ 0b0000_0011_1111_1111) -} diff --git a/src/lib.rs b/src/lib.rs index 677c574..be313d4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -66,10 +66,9 @@ pub mod bios; pub mod core_extras; pub(crate) use crate::core_extras::*; -pub mod io_registers; +pub mod io; pub mod video_ram; -pub(crate) use crate::video_ram::*; /// Performs unsigned divide and remainder, gives None if dividing by 0. pub fn divrem_u32(numer: u32, denom: u32) -> Option<(u32, u32)> { @@ -127,12 +126,12 @@ fn divrem_u32_non_restoring(numer: u32, denom: u32) -> (u32, u32) { } i >>= 1; } - q = q - !q; + q -= !q; if r < 0 { - q = q - 1; - r = r + d; + q -= 1; + r += d; } - r = r >> 32; + r >>= 32; // TODO: remove this once we've done more checks here. debug_assert!(r >= 0); debug_assert!(r <= core::u32::MAX as i64); @@ -168,18 +167,11 @@ pub unsafe fn divrem_i32_unchecked(numer: i32, denom: i32) -> (i32, i32) { } else { divrem_u32_non_restoring(unsigned_numer, unsigned_denom) }; - if opposite_sign { - if numer < 0 { - (-(udiv as i32), -(urem as i32)) - } else { - (-(udiv as i32), urem as i32) - } - } else { - if numer < 0 { - (udiv as i32, -(urem as i32)) - } else { - (udiv as i32, urem as i32) - } + match (opposite_sign, numer < 0) { + (true, true) => (-(udiv as i32), -(urem as i32)), + (true, false) => (-(udiv as i32), urem as i32), + (false, true) => (udiv as i32, -(urem as i32)), + (false, false) => (udiv as i32, urem as i32), } } From a1b85fa98f2b184073732ab0b3473b7a5d7838bd Mon Sep 17 00:00:00 2001 From: Lokathor Date: Thu, 20 Dec 2018 16:20:14 -0700 Subject: [PATCH 33/33] clip those clips --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 73b5c91..2f5bf99 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,6 +9,7 @@ rust: before_script: - rustup component add rust-src + - rustup component add clippy - (test -x $HOME/.cargo/bin/cargo-install-update || cargo install cargo-update) - (test -x $HOME/.cargo/bin/cargo-xbuild || cargo install cargo-xbuild) - (test -x $HOME/.cargo/bin/cargo-make || cargo install cargo-make)