Refactor and slightly improve hashmap (#429)

Started as an attempt to make it easier to develop the hashmap, ended up
with me benchmarking everything :).

- [x]  no changelog update needed
This commit is contained in:
Gwilym Inzani 2023-05-16 22:25:11 +01:00 committed by GitHub
commit 458280b68f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 1028 additions and 414 deletions

View file

@ -11,6 +11,7 @@ rustc-hash = { version = "1", default-features = false }
[dev-dependencies] [dev-dependencies]
rand = { version = "0.8", default-features = false, features = ["small_rng"] } rand = { version = "0.8", default-features = false, features = ["small_rng"] }
lazy_static = "1.4"
[profile.dev] [profile.dev]
opt-level = 3 opt-level = 3

View file

@ -0,0 +1,318 @@
// These benchmarks were taken from hashbrown. They are impossible to run
// on the target GBA hardware, but hopefully running these on something like a
// raspberry pi zero will give something comparable.
// This benchmark suite contains some benchmarks along a set of dimensions:
// Int key distribution: low bit heavy, top bit heavy, and random.
// Task: basic functionality: insert, insert_erase, lookup, lookup_fail, iter
#![feature(test)]
extern crate test;
use test::{black_box, Bencher};
use agb_hashmap::HashMap;
use std::sync::atomic::{self, AtomicUsize};
const SIZE: usize = 1000;
type StdHashMap<K, V> = std::collections::hash_map::HashMap<K, V>;
// A random key iterator.
#[derive(Clone, Copy)]
struct RandomKeys {
state: usize,
}
impl RandomKeys {
fn new() -> Self {
RandomKeys { state: 0 }
}
}
impl Iterator for RandomKeys {
type Item = usize;
fn next(&mut self) -> Option<usize> {
// Add 1 then multiply by some 32 bit prime.
self.state = self.state.wrapping_add(1).wrapping_mul(3_787_392_781);
Some(self.state)
}
}
// Just an arbitrary side effect to make the maps not shortcircuit to the non-dropping path
// when dropping maps/entries (most real world usages likely have drop in the key or value)
lazy_static::lazy_static! {
static ref SIDE_EFFECT: AtomicUsize = AtomicUsize::new(0);
}
#[derive(Clone)]
struct DropType(usize);
impl Drop for DropType {
fn drop(&mut self) {
SIDE_EFFECT.fetch_add(self.0, atomic::Ordering::SeqCst);
}
}
macro_rules! bench_suite {
($bench_macro:ident, $bench_agb_hashmap_serial:ident, $bench_std_serial:ident,
$bench_agb_hashmap_highbits:ident, $bench_std_highbits:ident,
$bench_agb_hashmap_random:ident, $bench_std_random:ident) => {
$bench_macro!($bench_agb_hashmap_serial, HashMap, 0..);
$bench_macro!($bench_std_serial, StdHashMap, 0..);
$bench_macro!(
$bench_agb_hashmap_highbits,
HashMap,
(0..).map(usize::swap_bytes)
);
$bench_macro!(
$bench_std_highbits,
StdHashMap,
(0..).map(usize::swap_bytes)
);
$bench_macro!($bench_agb_hashmap_random, HashMap, RandomKeys::new());
$bench_macro!($bench_std_random, StdHashMap, RandomKeys::new());
};
}
macro_rules! bench_insert {
($name:ident, $maptype:ident, $keydist:expr) => {
#[bench]
fn $name(b: &mut Bencher) {
let mut m = $maptype::with_capacity(SIZE);
b.iter(|| {
m.clear();
for i in ($keydist).take(SIZE) {
m.insert(i, (DropType(i), [i; 20]));
}
black_box(&mut m);
});
eprintln!("{}", SIDE_EFFECT.load(atomic::Ordering::SeqCst));
}
};
}
bench_suite!(
bench_insert,
agb_hashmap_insert_serial,
std_hashmap_insert_serial,
agb_hashmap_insert_highbits,
std_hashmap_insert_highbits,
agb_hashmap_insert_random,
std_hashmap_insert_random
);
macro_rules! bench_grow_insert {
($name:ident, $maptype:ident, $keydist:expr) => {
#[bench]
fn $name(b: &mut Bencher) {
b.iter(|| {
let mut m = $maptype::default();
for i in ($keydist).take(SIZE) {
m.insert(i, DropType(i));
}
black_box(&mut m);
})
}
};
}
bench_suite!(
bench_grow_insert,
agb_hashmap_grow_insert_serial,
std_hashmap_grow_insert_serial,
agb_hashmap_grow_insert_highbits,
std_hashmap_grow_insert_highbits,
agb_hashmap_grow_insert_random,
std_hashmap_grow_insert_random
);
macro_rules! bench_insert_erase {
($name:ident, $maptype:ident, $keydist:expr) => {
#[bench]
fn $name(b: &mut Bencher) {
let mut base = $maptype::default();
for i in ($keydist).take(SIZE) {
base.insert(i, DropType(i));
}
let skip = $keydist.skip(SIZE);
b.iter(|| {
let mut m = base.clone();
let mut add_iter = skip.clone();
let mut remove_iter = $keydist;
// While keeping the size constant,
// replace the first keydist with the second.
for (add, remove) in (&mut add_iter).zip(&mut remove_iter).take(SIZE) {
m.insert(add, DropType(add));
black_box(m.remove(&remove));
}
black_box(m);
});
eprintln!("{}", SIDE_EFFECT.load(atomic::Ordering::SeqCst));
}
};
}
bench_suite!(
bench_insert_erase,
agb_hashmap_insert_erase_serial,
std_hashmap_insert_erase_serial,
agb_hashmap_insert_erase_highbits,
std_hashmap_insert_erase_highbits,
agb_hashmap_insert_erase_random,
std_hashmap_insert_erase_random
);
macro_rules! bench_lookup {
($name:ident, $maptype:ident, $keydist:expr) => {
#[bench]
fn $name(b: &mut Bencher) {
let mut m = $maptype::default();
for i in $keydist.take(SIZE) {
m.insert(i, DropType(i));
}
b.iter(|| {
for i in $keydist.take(SIZE) {
black_box(m.get(&i));
}
});
eprintln!("{}", SIDE_EFFECT.load(atomic::Ordering::SeqCst));
}
};
}
bench_suite!(
bench_lookup,
agb_hashmap_lookup_serial,
std_hashmap_lookup_serial,
agb_hashmap_lookup_highbits,
std_hashmap_lookup_highbits,
agb_hashmap_lookup_random,
std_hashmap_lookup_random
);
macro_rules! bench_lookup_fail {
($name:ident, $maptype:ident, $keydist:expr) => {
#[bench]
fn $name(b: &mut Bencher) {
let mut m = $maptype::default();
let mut iter = $keydist;
for i in (&mut iter).take(SIZE) {
m.insert(i, DropType(i));
}
b.iter(|| {
for i in (&mut iter).take(SIZE) {
black_box(m.get(&i));
}
})
}
};
}
bench_suite!(
bench_lookup_fail,
agb_hashmap_lookup_fail_serial,
std_hashmap_lookup_fail_serial,
agb_hashmap_lookup_fail_highbits,
std_hashmap_lookup_fail_highbits,
agb_hashmap_lookup_fail_random,
std_hashmap_lookup_fail_random
);
macro_rules! bench_iter {
($name:ident, $maptype:ident, $keydist:expr) => {
#[bench]
fn $name(b: &mut Bencher) {
let mut m = $maptype::default();
for i in ($keydist).take(SIZE) {
m.insert(i, DropType(i));
}
b.iter(|| {
for i in &m {
black_box(i);
}
})
}
};
}
bench_suite!(
bench_iter,
agb_hashmap_iter_serial,
std_hashmap_iter_serial,
agb_hashmap_iter_highbits,
std_hashmap_iter_highbits,
agb_hashmap_iter_random,
std_hashmap_iter_random
);
macro_rules! clone_bench {
($maptype:ident) => {
use super::DropType;
use test::{black_box, Bencher};
#[bench]
fn clone_small(b: &mut Bencher) {
let mut m = $maptype::new();
for i in 0..10 {
m.insert(i, DropType(i));
}
b.iter(|| {
black_box(m.clone());
})
}
#[bench]
fn clone_from_small(b: &mut Bencher) {
let mut m = $maptype::new();
let mut m2 = $maptype::new();
for i in 0..10 {
m.insert(i, DropType(i));
}
b.iter(|| {
m2.clone_from(&m);
black_box(&mut m2);
})
}
#[bench]
fn clone_large(b: &mut Bencher) {
let mut m = $maptype::new();
for i in 0..1000 {
m.insert(i, DropType(i));
}
b.iter(|| {
black_box(m.clone());
})
}
#[bench]
fn clone_from_large(b: &mut Bencher) {
let mut m = $maptype::new();
let mut m2 = $maptype::new();
for i in 0..1000 {
m.insert(i, DropType(i));
}
b.iter(|| {
m2.clone_from(&m);
black_box(&mut m2);
})
}
};
}
mod agb_hashmap_clone_benches {
use agb_hashmap::HashMap;
clone_bench!(HashMap);
}
mod std_hashmap_clone_benches {
use std::collections::hash_map::HashMap;
clone_bench!(HashMap);
}

View file

@ -14,6 +14,15 @@
#![deny(rustdoc::broken_intra_doc_links)] #![deny(rustdoc::broken_intra_doc_links)]
#![deny(rustdoc::private_intra_doc_links)] #![deny(rustdoc::private_intra_doc_links)]
#![deny(rustdoc::invalid_html_tags)] #![deny(rustdoc::invalid_html_tags)]
#![deny(unreachable_pub)]
#![deny(clippy::missing_safety_doc)]
#![deny(clippy::undocumented_unsafe_blocks)]
#![deny(clippy::manual_assert)]
#![deny(clippy::default_trait_access)]
#![deny(clippy::missing_panics_doc)]
#![deny(clippy::doc_markdown)]
#![deny(clippy::return_self_not_must_use)]
#![deny(clippy::cast_possible_truncation)]
extern crate alloc; extern crate alloc;
@ -21,16 +30,20 @@ use alloc::{alloc::Global, vec::Vec};
use core::{ use core::{
alloc::Allocator, alloc::Allocator,
borrow::Borrow, borrow::Borrow,
fmt::Debug,
hash::{BuildHasher, BuildHasherDefault, Hash, Hasher}, hash::{BuildHasher, BuildHasherDefault, Hash, Hasher},
iter::FromIterator, iter::FromIterator,
mem::{self, MaybeUninit}, num::Wrapping,
ops::Index, ops::Index,
ptr,
}; };
use rustc_hash::FxHasher; use rustc_hash::FxHasher;
type HashType = u32; mod node;
mod node_storage;
use node::Node;
use node_storage::NodeStorage;
// # Robin Hood Hash Tables // # Robin Hood Hash Tables
// //
@ -96,7 +109,7 @@ type HashType = u32;
/// ///
/// The API surface provided is incredibly similar to the /// The API surface provided is incredibly similar to the
/// [`std::collections::HashMap`](https://doc.rust-lang.org/std/collections/struct.HashMap.html) /// [`std::collections::HashMap`](https://doc.rust-lang.org/std/collections/struct.HashMap.html)
/// implementation with fewer guarantees, and better optimised for the GameBoy Advance. /// implementation with fewer guarantees, and better optimised for the `GameBoy Advance`.
/// ///
/// [`Eq`]: https://doc.rust-lang.org/core/cmp/trait.Eq.html /// [`Eq`]: https://doc.rust-lang.org/core/cmp/trait.Eq.html
/// [`Hash`]: https://doc.rust-lang.org/core/hash/trait.Hash.html /// [`Hash`]: https://doc.rust-lang.org/core/hash/trait.Hash.html
@ -132,6 +145,7 @@ type HashType = u32;
/// println!("{game}: \"{review}\""); /// println!("{game}: \"{review}\"");
/// } /// }
/// ``` /// ```
#[derive(Clone)]
pub struct HashMap<K, V, ALLOCATOR: Allocator = Global> { pub struct HashMap<K, V, ALLOCATOR: Allocator = Global> {
nodes: NodeStorage<K, V, ALLOCATOR>, nodes: NodeStorage<K, V, ALLOCATOR>,
@ -161,6 +175,12 @@ impl<K, V> HashMap<K, V> {
pub fn with_capacity(capacity: usize) -> Self { pub fn with_capacity(capacity: usize) -> Self {
Self::with_capacity_in(capacity, Global) Self::with_capacity_in(capacity, Global)
} }
#[doc(hidden)]
#[must_use]
pub fn distance_histogram(&self) -> (Vec<usize>, usize) {
self.nodes.distance_histogram()
}
} }
impl<K, V, ALLOCATOR: ClonableAllocator> HashMap<K, V, ALLOCATOR> { impl<K, V, ALLOCATOR: ClonableAllocator> HashMap<K, V, ALLOCATOR> {
@ -170,7 +190,7 @@ impl<K, V, ALLOCATOR: ClonableAllocator> HashMap<K, V, ALLOCATOR> {
pub fn with_size_in(size: usize, alloc: ALLOCATOR) -> Self { pub fn with_size_in(size: usize, alloc: ALLOCATOR) -> Self {
Self { Self {
nodes: NodeStorage::with_size_in(size, alloc), nodes: NodeStorage::with_size_in(size, alloc),
hasher: Default::default(), hasher: BuildHasherDefault::default(),
} }
} }
@ -187,6 +207,10 @@ impl<K, V, ALLOCATOR: ClonableAllocator> HashMap<K, V, ALLOCATOR> {
/// Creates an empty `HashMap` which can hold at least `capacity` elements before resizing. The actual /// Creates an empty `HashMap` which can hold at least `capacity` elements before resizing. The actual
/// internal size may be larger as it must be a power of 2 /// internal size may be larger as it must be a power of 2
///
/// # Panics
///
/// Panics if capacity is larger than 2^32 * .85
#[must_use] #[must_use]
pub fn with_capacity_in(capacity: usize, alloc: ALLOCATOR) -> Self { pub fn with_capacity_in(capacity: usize, alloc: ALLOCATOR) -> Self {
for i in 0..32 { for i in 0..32 {
@ -231,8 +255,7 @@ impl<K, V, ALLOCATOR: ClonableAllocator> HashMap<K, V, ALLOCATOR> {
/// Removes all elements from the map /// Removes all elements from the map
pub fn clear(&mut self) { pub fn clear(&mut self) {
self.nodes = self.nodes.clear();
NodeStorage::with_size_in(self.nodes.backing_vec_size(), self.allocator().clone());
} }
/// An iterator visiting all key-value pairs in an arbitrary order /// An iterator visiting all key-value pairs in an arbitrary order
@ -246,7 +269,7 @@ impl<K, V, ALLOCATOR: ClonableAllocator> HashMap<K, V, ALLOCATOR> {
/// An iterator visiting all key-value pairs in an arbitrary order, with mutable references to the values /// An iterator visiting all key-value pairs in an arbitrary order, with mutable references to the values
pub fn iter_mut(&mut self) -> impl Iterator<Item = (&'_ K, &'_ mut V)> { pub fn iter_mut(&mut self) -> impl Iterator<Item = (&'_ K, &'_ mut V)> {
self.nodes.nodes.iter_mut().filter_map(Node::key_value_mut) self.nodes.iter_mut().filter_map(Node::key_value_mut)
} }
/// Retains only the elements specified by the predicate `f`. /// Retains only the elements specified by the predicate `f`.
@ -282,11 +305,6 @@ impl<K, V> Default for HashMap<K, V> {
} }
} }
const fn fast_mod(len: usize, hash: HashType) -> usize {
debug_assert!(len.is_power_of_two(), "Length must be a power of 2");
(hash as usize) & (len - 1)
}
impl<K, V, ALLOCATOR: ClonableAllocator> HashMap<K, V, ALLOCATOR> impl<K, V, ALLOCATOR: ClonableAllocator> HashMap<K, V, ALLOCATOR>
where where
K: Eq + Hash, K: Eq + Hash,
@ -302,7 +320,13 @@ where
let hash = self.hash(&key); let hash = self.hash(&key);
if let Some(location) = self.nodes.location(&key, hash) { if let Some(location) = self.nodes.location(&key, hash) {
Some(self.nodes.replace_at_location(location, key, value)) Some(
// SAFETY: location is valid due to the above
unsafe {
self.nodes
.replace_at_location_unchecked(location, key, value)
},
)
} else { } else {
if self.nodes.capacity() <= self.len() { if self.nodes.capacity() <= self.len() {
self.resize(self.nodes.backing_vec_size() * 2); self.resize(self.nodes.backing_vec_size() * 2);
@ -318,7 +342,11 @@ where
let hash = self.hash(&key); let hash = self.hash(&key);
let location = if let Some(location) = self.nodes.location(&key, hash) { let location = if let Some(location) = self.nodes.location(&key, hash) {
self.nodes.replace_at_location(location, key, value); // SAFETY: location is valid due to the above
unsafe {
self.nodes
.replace_at_location_unchecked(location, key, value);
}
location location
} else { } else {
if self.nodes.capacity() <= self.len() { if self.nodes.capacity() <= self.len() {
@ -328,7 +356,12 @@ where
self.nodes.insert_new(key, value, hash) self.nodes.insert_new(key, value, hash)
}; };
self.nodes.nodes[location].value_mut().unwrap() // SAFETY: location is always valid
unsafe {
self.nodes
.node_at_unchecked_mut(location)
.value_mut_unchecked()
}
} }
/// Returns `true` if the map contains a value for the specified key. /// Returns `true` if the map contains a value for the specified key.
@ -349,9 +382,15 @@ where
{ {
let hash = self.hash(key); let hash = self.hash(key);
self.nodes let location = self.nodes.location(key, hash)?;
.location(key, hash) Some(
.and_then(|location| self.nodes.nodes[location].key_value_ref()) // SAFETY: we know that a node exists and has a value from the location call above
unsafe {
self.nodes
.node_at_unchecked(location)
.key_value_ref_unchecked()
},
)
} }
/// Returns a reference to the value corresponding to the key. Returns [`None`] if there is /// Returns a reference to the value corresponding to the key. Returns [`None`] if there is
@ -397,11 +436,15 @@ where
{ {
let hash = self.hash(key); let hash = self.hash(key);
if let Some(location) = self.nodes.location(key, hash) { let location = self.nodes.location(key, hash)?;
self.nodes.nodes[location].value_mut() Some(
} else { // SAFETY: we know that a node exists and has a value from the location call above
None unsafe {
} self.nodes
.node_at_unchecked_mut(location)
.value_mut_unchecked()
},
)
} }
/// Removes the given key from the map. Returns the current value if it existed, or [`None`] /// Removes the given key from the map. Returns the current value if it existed, or [`None`]
@ -440,7 +483,12 @@ where
{ {
let mut hasher = self.hasher.build_hasher(); let mut hasher = self.hasher.build_hasher();
key.hash(&mut hasher); key.hash(&mut hasher);
hasher.finish() as HashType let result = hasher.finish();
// we want to allow truncation here since we're reducing 64 bits to 32
#[allow(clippy::cast_possible_truncation)]
let reduced = (result as u32) ^ ((result >> 32) as u32);
HashType::bit_mix(reduced)
} }
} }
@ -463,12 +511,12 @@ impl<'a, K, V, ALLOCATOR: ClonableAllocator> Iterator for Iter<'a, K, V, ALLOCAT
return None; return None;
} }
let node = &self.map.nodes.nodes[self.at]; let node = &self.map.nodes.node_at(self.at);
self.at += 1; self.at += 1;
if node.has_value() { if let Some(key_value) = node.key_value_ref() {
self.num_found += 1; self.num_found += 1;
return Some((node.key_ref().unwrap(), node.value_ref().unwrap())); return Some(key_value);
} }
} }
} }
@ -497,7 +545,7 @@ impl<'a, K, V, ALLOCATOR: ClonableAllocator> IntoIterator for &'a HashMap<K, V,
/// An iterator over entries of a [`HashMap`] /// An iterator over entries of a [`HashMap`]
/// ///
/// This struct is created using the `into_iter()` method on [`HashMap`] as part of its implementation /// This struct is created using the `into_iter()` method on [`HashMap`] as part of its implementation
/// of the IntoIterator trait. /// of the `IntoIterator` trait.
pub struct IterOwned<K, V, ALLOCATOR: Allocator = Global> { pub struct IterOwned<K, V, ALLOCATOR: Allocator = Global> {
map: HashMap<K, V, ALLOCATOR>, map: HashMap<K, V, ALLOCATOR>,
at: usize, at: usize,
@ -513,7 +561,7 @@ impl<K, V, ALLOCATOR: ClonableAllocator> Iterator for IterOwned<K, V, ALLOCATOR>
return None; return None;
} }
let maybe_kv = self.map.nodes.nodes[self.at].take_key_value(); let maybe_kv = self.map.nodes.node_at_mut(self.at).take_key_value();
self.at += 1; self.at += 1;
if let Some((k, v, _)) = maybe_kv { if let Some((k, v, _)) = maybe_kv {
@ -534,7 +582,7 @@ impl<K, V, ALLOCATOR: ClonableAllocator> Iterator for IterOwned<K, V, ALLOCATOR>
/// An iterator over entries of a [`HashMap`] /// An iterator over entries of a [`HashMap`]
/// ///
/// This struct is created using the `into_iter()` method on [`HashMap`] as part of its implementation /// This struct is created using the `into_iter()` method on [`HashMap`] as part of its implementation
/// of the IntoIterator trait. /// of the `IntoIterator` trait.
impl<K, V, ALLOCATOR: ClonableAllocator> IntoIterator for HashMap<K, V, ALLOCATOR> { impl<K, V, ALLOCATOR: ClonableAllocator> IntoIterator for HashMap<K, V, ALLOCATOR> {
type Item = (K, V); type Item = (K, V);
type IntoIter = IterOwned<K, V, ALLOCATOR>; type IntoIter = IterOwned<K, V, ALLOCATOR>;
@ -548,86 +596,133 @@ impl<K, V, ALLOCATOR: ClonableAllocator> IntoIterator for HashMap<K, V, ALLOCATO
} }
} }
/// A view into an occupied entry in a `HashMap`. This is part of the [`Entry`] enum. mod entries {
pub struct OccupiedEntry<'a, K: 'a, V: 'a, ALLOCATOR: Allocator> { use core::{alloc::Allocator, hash::Hash};
key: K,
map: &'a mut HashMap<K, V, ALLOCATOR>,
location: usize,
}
impl<'a, K: 'a, V: 'a, ALLOCATOR: ClonableAllocator> OccupiedEntry<'a, K, V, ALLOCATOR> { use super::{ClonableAllocator, HashMap};
/// Gets a reference to the key in the entry.
pub fn key(&self) -> &K { /// A view into an occupied entry in a `HashMap`. This is part of the [`crate::Entry`] enum.
&self.key pub struct OccupiedEntry<'a, K: 'a, V: 'a, ALLOCATOR: Allocator> {
key: K,
map: &'a mut HashMap<K, V, ALLOCATOR>,
location: usize,
} }
/// Take the ownership of the key and value from the map. impl<'a, K: 'a, V: 'a, ALLOCATOR: ClonableAllocator> OccupiedEntry<'a, K, V, ALLOCATOR> {
pub fn remove_entry(self) -> (K, V) { /// # Safety
let old_value = self.map.nodes.remove_from_location(self.location); ///
(self.key, old_value) /// You must call this with a valid location (one where the entry is defined)
pub(crate) unsafe fn new(
key: K,
map: &'a mut HashMap<K, V, ALLOCATOR>,
location: usize,
) -> Self {
Self { key, map, location }
}
/// Gets a reference to the key in the entry.
pub fn key(&self) -> &K {
&self.key
}
/// Take the ownership of the key and value from the map.
pub fn remove_entry(self) -> (K, V) {
let old_value = self.map.nodes.remove_from_location(self.location);
(self.key, old_value)
}
/// Gets a reference to the value in the entry.
pub fn get(&self) -> &V {
// SAFETY: This can only be constructed with valid locations
unsafe {
self.map
.nodes
.node_at_unchecked(self.location)
.value_ref_unchecked()
}
}
/// Gets a mutable reference to the value in the entry.
///
/// If you need a reference to the `OccupiedEntry` which may outlive the destruction
/// of the `Entry` value, see [`into_mut`].
///
/// [`into_mut`]: Self::into_mut
pub fn get_mut(&mut self) -> &mut V {
// SAFETY: This can only be constructed with valid locations
unsafe {
self.map
.nodes
.node_at_unchecked_mut(self.location)
.value_mut_unchecked()
}
}
/// Converts the `OccupiedEntry` into a mutable reference to the value in the entry with
/// a lifetime bound to the map itself.
///
/// If you need multiple references to the `OccupiedEntry`, see [`get_mut`].
///
/// [`get_mut`]: Self::get_mut
pub fn into_mut(self) -> &'a mut V {
// SAFETY: This can only be constructed with valid locations
unsafe {
self.map
.nodes
.node_at_unchecked_mut(self.location)
.value_mut_unchecked()
}
}
/// Sets the value of the entry and returns the entry's old value.
pub fn insert(&mut self, value: V) -> V {
// SAFETY: This can only be constructed with valid locations
unsafe {
self.map
.nodes
.node_at_unchecked_mut(self.location)
.replace_value_unchecked(value)
}
}
/// Takes the value out of the entry and returns it.
pub fn remove(self) -> V {
self.map.nodes.remove_from_location(self.location)
}
} }
/// Gets a reference to the value in the entry. /// A view into a vacant entry in a `HashMap`. It is part of the [`crate::Entry`] enum.
pub fn get(&self) -> &V { pub struct VacantEntry<'a, K: 'a, V: 'a, ALLOCATOR: Allocator> {
self.map.nodes.nodes[self.location].value_ref().unwrap() key: K,
map: &'a mut HashMap<K, V, ALLOCATOR>,
} }
/// Gets a mutable reference to the value in the entry. impl<'a, K: 'a, V: 'a, ALLOCATOR: ClonableAllocator> VacantEntry<'a, K, V, ALLOCATOR> {
/// pub(crate) fn new(key: K, map: &'a mut HashMap<K, V, ALLOCATOR>) -> Self {
/// If you need a reference to the `OccupiedEntry` which may outlive the destruction Self { key, map }
/// of the `Entry` value, see [`into_mut`]. }
///
/// [`into_mut`]: Self::into_mut
pub fn get_mut(&mut self) -> &mut V {
self.map.nodes.nodes[self.location].value_mut().unwrap()
}
/// Converts the `OccupiedEntry` into a mutable reference to the value in the entry with /// Gets a reference to the key that would be used when inserting a value through `VacantEntry`
/// a lifetime bound to the map itself. pub fn key(&self) -> &K {
/// &self.key
/// If you need multiple references to the `OccupiedEntry`, see [`get_mut`]. }
///
/// [`get_mut`]: Self::get_mut
pub fn into_mut(self) -> &'a mut V {
self.map.nodes.nodes[self.location].value_mut().unwrap()
}
/// Sets the value of the entry and returns the entry's old value. /// Take ownership of the key
pub fn insert(&mut self, value: V) -> V { pub fn into_key(self) -> K {
self.map.nodes.nodes[self.location].replace_value(value) self.key
} }
/// Takes the value out of the entry and returns it. /// Sets the value of the entry with the `VacantEntry`'s key and returns a mutable reference to it.
pub fn remove(self) -> V { pub fn insert(self, value: V) -> &'a mut V
self.map.nodes.remove_from_location(self.location) where
K: Hash + Eq,
{
self.map.insert_and_get(self.key, value)
}
} }
} }
/// A view into a vacant entry in a `HashMap`. It is part of the [`Entry`] enum. pub use entries::{OccupiedEntry, VacantEntry};
pub struct VacantEntry<'a, K: 'a, V: 'a, ALLOCATOR: Allocator> {
key: K,
map: &'a mut HashMap<K, V, ALLOCATOR>,
}
impl<'a, K: 'a, V: 'a, ALLOCATOR: ClonableAllocator> VacantEntry<'a, K, V, ALLOCATOR> {
/// Gets a reference to the key that would be used when inserting a value through `VacantEntry`
pub fn key(&self) -> &K {
&self.key
}
/// Take ownership of the key
pub fn into_key(self) -> K {
self.key
}
/// Sets the value of the entry with the `VacantEntry`'s key and returns a mutable reference to it.
pub fn insert(self, value: V) -> &'a mut V
where
K: Hash + Eq,
{
self.map.insert_and_get(self.key, value)
}
}
/// A view into a single entry in a map, which may be vacant or occupied. /// A view into a single entry in a map, which may be vacant or occupied.
/// ///
@ -679,7 +774,7 @@ where
match self { match self {
Entry::Occupied(e) => e.into_mut(), Entry::Occupied(e) => e.into_mut(),
Entry::Vacant(e) => { Entry::Vacant(e) => {
let value = f(&e.key); let value = f(e.key());
e.insert(value) e.insert(value)
} }
} }
@ -687,6 +782,7 @@ where
/// Provides in-place mutable access to an occupied entry before any potential inserts /// Provides in-place mutable access to an occupied entry before any potential inserts
/// into the map. /// into the map.
#[must_use]
pub fn and_modify<F>(self, f: F) -> Self pub fn and_modify<F>(self, f: F) -> Self
where where
F: FnOnce(&mut V), F: FnOnce(&mut V),
@ -715,8 +811,8 @@ where
/// Returns a reference to this entry's key. /// Returns a reference to this entry's key.
pub fn key(&self) -> &K { pub fn key(&self) -> &K {
match self { match self {
Entry::Occupied(e) => &e.key, Entry::Occupied(e) => e.key(),
Entry::Vacant(e) => &e.key, Entry::Vacant(e) => e.key(),
} }
} }
} }
@ -731,13 +827,12 @@ where
let location = self.nodes.location(&key, hash); let location = self.nodes.location(&key, hash);
if let Some(location) = location { if let Some(location) = location {
Entry::Occupied(OccupiedEntry { Entry::Occupied(
key, // SAFETY: location is valid by the call to location above
location, unsafe { OccupiedEntry::new(key, self, location) },
map: self, )
})
} else { } else {
Entry::Vacant(VacantEntry { key, map: self }) Entry::Vacant(VacantEntry::new(key, self))
} }
} }
} }
@ -776,313 +871,81 @@ where
} }
} }
impl<K, V, ALLOCATOR: ClonableAllocator> PartialEq for HashMap<K, V, ALLOCATOR>
where
K: Eq + Hash,
V: PartialEq,
{
fn eq(&self, other: &HashMap<K, V, ALLOCATOR>) -> bool {
if self.len() != other.len() {
return false;
}
self.iter()
.all(|(key, value)| other.get(key).map_or(false, |v| *value == *v))
}
}
impl<K, V, ALLOCATOR: ClonableAllocator> Eq for HashMap<K, V, ALLOCATOR>
where
K: Eq + Hash,
V: PartialEq,
{
}
impl<K, V, ALLOCATOR: ClonableAllocator> Debug for HashMap<K, V, ALLOCATOR>
where
K: Debug,
V: Debug,
{
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_map().entries(self.iter()).finish()
}
}
const fn number_before_resize(capacity: usize) -> usize { const fn number_before_resize(capacity: usize) -> usize {
capacity * 85 / 100 capacity * 60 / 100
} }
struct NodeStorage<K, V, ALLOCATOR: Allocator = Global> { #[derive(Clone, Copy, PartialEq, Eq)]
nodes: Vec<Node<K, V>, ALLOCATOR>, pub(crate) struct HashType(u32);
max_distance_to_initial_bucket: i32,
number_of_items: usize, impl From<usize> for HashType {
max_number_before_resize: usize, fn from(value: usize) -> Self {
} // we explicitly want to allow truncation
#[allow(clippy::cast_possible_truncation)]
impl<K, V, ALLOCATOR: ClonableAllocator> NodeStorage<K, V, ALLOCATOR> { Self(value as u32)
fn with_size_in(capacity: usize, alloc: ALLOCATOR) -> Self {
assert!(capacity.is_power_of_two(), "Capacity must be a power of 2");
let mut nodes = Vec::with_capacity_in(capacity, alloc);
for _ in 0..capacity {
nodes.push(Default::default());
}
Self {
nodes,
max_distance_to_initial_bucket: 0,
number_of_items: 0,
max_number_before_resize: number_before_resize(capacity),
}
}
fn allocator(&self) -> &ALLOCATOR {
self.nodes.allocator()
}
fn capacity(&self) -> usize {
self.max_number_before_resize
}
fn backing_vec_size(&self) -> usize {
self.nodes.len()
}
fn len(&self) -> usize {
self.number_of_items
}
fn insert_new(&mut self, key: K, value: V, hash: HashType) -> usize {
debug_assert!(
self.capacity() > self.len(),
"Do not have space to insert into len {} with {}",
self.backing_vec_size(),
self.len()
);
let mut new_node = Node::new_with(key, value, hash);
let mut inserted_location = usize::MAX;
loop {
let location = fast_mod(
self.backing_vec_size(),
new_node.hash + new_node.distance() as HashType,
);
let current_node = &mut self.nodes[location];
if current_node.has_value() {
if current_node.distance() <= new_node.distance() {
mem::swap(&mut new_node, current_node);
if inserted_location == usize::MAX {
inserted_location = location;
}
}
} else {
self.nodes[location] = new_node;
if inserted_location == usize::MAX {
inserted_location = location;
}
break;
}
new_node.increment_distance();
self.max_distance_to_initial_bucket =
new_node.distance().max(self.max_distance_to_initial_bucket);
}
self.number_of_items += 1;
inserted_location
}
fn retain<F>(&mut self, mut f: F)
where
F: FnMut(&K, &mut V) -> bool,
{
let num_nodes = self.nodes.len();
let mut i = 0;
while i < num_nodes {
let node = &mut self.nodes[i];
if let Some((k, v)) = node.key_value_mut() {
if !f(k, v) {
self.remove_from_location(i);
// Need to continue before adding 1 to i because remove from location could
// put the element which was next into the ith location in the nodes array,
// so we need to check if that one needs removing too.
continue;
}
}
i += 1;
}
}
fn remove_from_location(&mut self, location: usize) -> V {
let mut current_location = location;
self.number_of_items -= 1;
loop {
let next_location =
fast_mod(self.backing_vec_size(), (current_location + 1) as HashType);
// if the next node is empty, or the next location has 0 distance to initial bucket then
// we can clear the current node
if !self.nodes[next_location].has_value() || self.nodes[next_location].distance() == 0 {
return self.nodes[current_location].take_key_value().unwrap().1;
}
self.nodes.swap(current_location, next_location);
self.nodes[current_location].decrement_distance();
current_location = next_location;
}
}
fn location<Q>(&self, key: &Q, hash: HashType) -> Option<usize>
where
K: Borrow<Q>,
Q: Eq + ?Sized,
{
for distance_to_initial_bucket in 0..(self.max_distance_to_initial_bucket + 1) {
let location = fast_mod(
self.nodes.len(),
hash + distance_to_initial_bucket as HashType,
);
let node = &self.nodes[location];
if let Some(node_key_ref) = node.key_ref() {
if node_key_ref.borrow() == key {
return Some(location);
}
} else {
return None;
}
}
None
}
fn resized_to(&mut self, new_size: usize) -> Self {
let mut new_node_storage = Self::with_size_in(new_size, self.allocator().clone());
for mut node in self.nodes.drain(..) {
if let Some((key, value, hash)) = node.take_key_value() {
new_node_storage.insert_new(key, value, hash);
}
}
new_node_storage
}
fn replace_at_location(&mut self, location: usize, key: K, value: V) -> V {
self.nodes[location].replace(key, value).1
} }
} }
struct Node<K, V> { impl HashType {
hash: HashType, pub(crate) const fn new() -> Self {
Self(0)
// distance_to_initial_bucket = -1 => key and value are uninit.
// distance_to_initial_bucket >= 0 => key and value are init
distance_to_initial_bucket: i32,
key: MaybeUninit<K>,
value: MaybeUninit<V>,
}
impl<K, V> Node<K, V> {
fn new() -> Self {
Self {
hash: 0,
distance_to_initial_bucket: -1,
key: MaybeUninit::uninit(),
value: MaybeUninit::uninit(),
}
} }
fn new_with(key: K, value: V, hash: HashType) -> Self { // 32 bit mix function from here: https://github.com/skeeto/hash-prospector
Self { fn bit_mix(key: u32) -> Self {
hash, let mut key = Wrapping(key);
distance_to_initial_bucket: 0, key ^= key >> 16;
key: MaybeUninit::new(key), key *= 0x7feb352d;
value: MaybeUninit::new(value), key ^= key >> 15;
} key *= 0x846ca68b;
key ^= key >> 16;
Self(key.0)
} }
fn value_ref(&self) -> Option<&V> { pub(crate) fn fast_mod(self, len: usize) -> usize {
if self.has_value() { debug_assert!(len.is_power_of_two(), "Length must be a power of 2");
Some(unsafe { self.value.assume_init_ref() }) (self.0 as usize) & (len - 1)
} else {
None
}
}
fn value_mut(&mut self) -> Option<&mut V> {
if self.has_value() {
Some(unsafe { self.value.assume_init_mut() })
} else {
None
}
}
fn key_ref(&self) -> Option<&K> {
if self.distance_to_initial_bucket >= 0 {
Some(unsafe { self.key.assume_init_ref() })
} else {
None
}
}
fn key_value_ref(&self) -> Option<(&K, &V)> {
if self.has_value() {
Some(unsafe { (self.key.assume_init_ref(), self.value.assume_init_ref()) })
} else {
None
}
}
fn key_value_mut(&mut self) -> Option<(&K, &mut V)> {
if self.has_value() {
Some(unsafe { (self.key.assume_init_ref(), self.value.assume_init_mut()) })
} else {
None
}
}
fn has_value(&self) -> bool {
self.distance_to_initial_bucket >= 0
}
fn take_key_value(&mut self) -> Option<(K, V, HashType)> {
if self.has_value() {
let key = mem::replace(&mut self.key, MaybeUninit::uninit());
let value = mem::replace(&mut self.value, MaybeUninit::uninit());
self.distance_to_initial_bucket = -1;
Some(unsafe { (key.assume_init(), value.assume_init(), self.hash) })
} else {
None
}
}
fn replace_value(&mut self, value: V) -> V {
if self.has_value() {
let old_value = mem::replace(&mut self.value, MaybeUninit::new(value));
unsafe { old_value.assume_init() }
} else {
panic!("Cannot replace an uninitialised node");
}
}
fn replace(&mut self, key: K, value: V) -> (K, V) {
if self.has_value() {
let old_key = mem::replace(&mut self.key, MaybeUninit::new(key));
let old_value = mem::replace(&mut self.value, MaybeUninit::new(value));
unsafe { (old_key.assume_init(), old_value.assume_init()) }
} else {
panic!("Cannot replace an uninitialised node");
}
}
fn increment_distance(&mut self) {
self.distance_to_initial_bucket += 1;
}
fn decrement_distance(&mut self) {
self.distance_to_initial_bucket -= 1;
if self.distance_to_initial_bucket < 0 {
panic!("Cannot decrement distance to below 0");
}
}
fn distance(&self) -> i32 {
self.distance_to_initial_bucket
} }
} }
impl<K, V> Drop for Node<K, V> { impl core::ops::Add<i32> for HashType {
fn drop(&mut self) { type Output = HashType;
if self.has_value() {
unsafe { ptr::drop_in_place(self.key.as_mut_ptr()) };
unsafe { ptr::drop_in_place(self.value.as_mut_ptr()) };
}
}
}
impl<K, V> Default for Node<K, V> { fn add(self, rhs: i32) -> Self::Output {
fn default() -> Self { Self(self.0.wrapping_add_signed(rhs))
Self::new()
} }
} }
@ -1090,6 +953,8 @@ impl<K, V> Default for Node<K, V> {
mod test { mod test {
use core::cell::RefCell; use core::cell::RefCell;
use alloc::vec::Vec;
use super::*; use super::*;
#[test] #[test]
@ -1155,7 +1020,7 @@ mod test {
let mut max_found = -1; let mut max_found = -1;
let mut num_found = 0; let mut num_found = 0;
for (_, value) in map.into_iter() { for (_, value) in map {
max_found = max_found.max(value); max_found = max_found.max(value);
num_found += 1; num_found += 1;
} }
@ -1205,9 +1070,7 @@ mod test {
impl Drop for NoisyDrop { impl Drop for NoisyDrop {
fn drop(&mut self) { fn drop(&mut self) {
if self.dropped { assert!(!self.dropped, "NoisyDropped dropped twice");
panic!("NoisyDropped dropped twice");
}
self.dropped = true; self.dropped = true;
} }
@ -1234,11 +1097,11 @@ mod test {
let mut map = HashMap::new(); let mut map = HashMap::new();
let mut rng = rand::rngs::SmallRng::seed_from_u64(20); let mut rng = rand::rngs::SmallRng::seed_from_u64(20);
let mut answers: [Option<i32>; 128] = [None; 128]; let mut answers: [Option<i32>; 512] = [None; 512];
for _ in 0..5_000 { for _ in 0..15_000 {
let command = rng.next_i32().rem_euclid(2); let command = rng.next_i32().rem_euclid(2);
let key = rng.next_i32().rem_euclid(answers.len() as i32); let key = rng.next_i32().rem_euclid(answers.len().try_into().unwrap());
let value = rng.next_i32(); let value = rng.next_i32();
match command { match command {
@ -1257,7 +1120,8 @@ mod test {
for (i, answer) in answers.iter().enumerate() { for (i, answer) in answers.iter().enumerate() {
assert_eq!( assert_eq!(
map.get(&NoisyDrop::new(i as i32)).map(|nd| &nd.i), map.get(&NoisyDrop::new(i.try_into().unwrap()))
.map(|nd| &nd.i),
answer.as_ref() answer.as_ref()
); );
} }
@ -1295,13 +1159,13 @@ mod test {
} }
impl DropRegistry { impl DropRegistry {
pub fn new() -> Self { fn new() -> Self {
Self { Self {
are_dropped: Default::default(), are_dropped: RefCell::default(),
} }
} }
pub fn new_droppable(&self) -> Droppable<'_> { fn new_droppable(&self) -> Droppable<'_> {
self.are_dropped.borrow_mut().push(0); self.are_dropped.borrow_mut().push(0);
Droppable { Droppable {
id: self.are_dropped.borrow().len() - 1, id: self.are_dropped.borrow().len() - 1,
@ -1309,19 +1173,19 @@ mod test {
} }
} }
pub fn dropped(&self, id: usize) { fn dropped(&self, id: usize) {
self.are_dropped.borrow_mut()[id] += 1; self.are_dropped.borrow_mut()[id] += 1;
} }
pub fn assert_dropped_once(&self, id: usize) { fn assert_dropped_once(&self, id: usize) {
assert_eq!(self.are_dropped.borrow()[id], 1); assert_eq!(self.are_dropped.borrow()[id], 1);
} }
pub fn assert_not_dropped(&self, id: usize) { fn assert_not_dropped(&self, id: usize) {
assert_eq!(self.are_dropped.borrow()[id], 0); assert_eq!(self.are_dropped.borrow()[id], 0);
} }
pub fn assert_dropped_n_times(&self, id: usize, num_drops: i32) { fn assert_dropped_n_times(&self, id: usize, num_drops: i32) {
assert_eq!(self.are_dropped.borrow()[id], num_drops); assert_eq!(self.are_dropped.borrow()[id], num_drops);
} }
} }
@ -1450,6 +1314,8 @@ mod test {
// Following test cases copied from the rust source // Following test cases copied from the rust source
// https://github.com/rust-lang/rust/blob/master/library/std/src/collections/hash/map/tests.rs // https://github.com/rust-lang/rust/blob/master/library/std/src/collections/hash/map/tests.rs
mod rust_std_tests { mod rust_std_tests {
use alloc::format;
use crate::{Entry::*, HashMap}; use crate::{Entry::*, HashMap};
#[test] #[test]
@ -1548,5 +1414,37 @@ mod test {
assert_eq!(map[&2], 1); assert_eq!(map[&2], 1);
} }
#[test]
fn test_eq() {
let mut m1 = HashMap::new();
m1.insert(1, 2);
m1.insert(2, 3);
m1.insert(3, 4);
let mut m2 = HashMap::new();
m2.insert(1, 2);
m2.insert(2, 3);
assert!(m1 != m2);
m2.insert(3, 4);
assert_eq!(m1, m2);
}
#[test]
fn test_show() {
let mut map = HashMap::new();
let empty: HashMap<i32, i32> = HashMap::new();
map.insert(1, 2);
map.insert(3, 4);
let map_str = format!("{map:?}");
assert!(map_str == "{1: 2, 3: 4}" || map_str == "{3: 4, 1: 2}");
assert_eq!(format!("{empty:?}"), "{}");
}
} }
} }

170
agb-hashmap/src/node.rs Normal file
View file

@ -0,0 +1,170 @@
use core::{
mem::{self, MaybeUninit},
ptr,
};
use crate::HashType;
pub(crate) struct Node<K, V> {
hash: HashType,
// distance_to_initial_bucket = -1 => key and value are uninit.
// distance_to_initial_bucket >= 0 => key and value are init
distance_to_initial_bucket: i32,
key: MaybeUninit<K>,
value: MaybeUninit<V>,
}
impl<K, V> Node<K, V> {
pub(crate) const fn new() -> Self {
Self {
hash: HashType::new(),
distance_to_initial_bucket: -1,
key: MaybeUninit::uninit(),
value: MaybeUninit::uninit(),
}
}
pub(crate) fn new_with(key: K, value: V, hash: HashType) -> Self {
Self {
hash,
distance_to_initial_bucket: 0,
key: MaybeUninit::new(key),
value: MaybeUninit::new(value),
}
}
pub(crate) unsafe fn value_ref_unchecked(&self) -> &V {
self.value.assume_init_ref()
}
pub(crate) unsafe fn value_mut_unchecked(&mut self) -> &mut V {
self.value.assume_init_mut()
}
pub(crate) fn key_ref(&self) -> Option<&K> {
if self.distance_to_initial_bucket >= 0 {
Some(
// SAFETY: has a value
unsafe { self.key.assume_init_ref() },
)
} else {
None
}
}
pub(crate) fn key_value_ref(&self) -> Option<(&K, &V)> {
if self.has_value() {
Some(
// SAFETY: has a value
unsafe { self.key_value_ref_unchecked() },
)
} else {
None
}
}
pub(crate) unsafe fn key_value_ref_unchecked(&self) -> (&K, &V) {
(self.key.assume_init_ref(), self.value.assume_init_ref())
}
pub(crate) fn key_value_mut(&mut self) -> Option<(&K, &mut V)> {
if self.has_value() {
Some(
// SAFETY: has a value
unsafe { (self.key.assume_init_ref(), self.value.assume_init_mut()) },
)
} else {
None
}
}
pub(crate) fn has_value(&self) -> bool {
self.distance_to_initial_bucket >= 0
}
pub(crate) fn take_key_value(&mut self) -> Option<(K, V, HashType)> {
if self.has_value() {
let key = mem::replace(&mut self.key, MaybeUninit::uninit());
let value = mem::replace(&mut self.value, MaybeUninit::uninit());
self.distance_to_initial_bucket = -1;
Some(
// SAFETY: has a value
unsafe { (key.assume_init(), value.assume_init(), self.hash) },
)
} else {
None
}
}
pub(crate) unsafe fn replace_value_unchecked(&mut self, value: V) -> V {
let old_value = mem::replace(&mut self.value, MaybeUninit::new(value));
old_value.assume_init()
}
pub(crate) unsafe fn replace_unchecked(&mut self, key: K, value: V) -> (K, V) {
let old_key = mem::replace(&mut self.key, MaybeUninit::new(key));
let old_value = mem::replace(&mut self.value, MaybeUninit::new(value));
(old_key.assume_init(), old_value.assume_init())
}
pub(crate) fn increment_distance(&mut self) {
self.distance_to_initial_bucket += 1;
}
pub(crate) fn decrement_distance(&mut self) {
self.distance_to_initial_bucket -= 1;
assert!(
self.distance_to_initial_bucket >= 0,
"Cannot decrement distance below 0"
);
}
pub(crate) fn distance(&self) -> i32 {
self.distance_to_initial_bucket
}
pub(crate) fn hash(&self) -> HashType {
self.hash
}
}
impl<K, V> Drop for Node<K, V> {
fn drop(&mut self) {
if self.has_value() {
// SAFETY: has a value
unsafe {
ptr::drop_in_place(self.key.as_mut_ptr());
ptr::drop_in_place(self.value.as_mut_ptr());
}
}
}
}
impl<K, V> Clone for Node<K, V>
where
K: Clone,
V: Clone,
{
fn clone(&self) -> Self {
if let Some((k, v)) = self.key_value_ref() {
Self {
hash: self.hash,
distance_to_initial_bucket: self.distance_to_initial_bucket,
key: MaybeUninit::new(k.clone()),
value: MaybeUninit::new(v.clone()),
}
} else {
Self {
hash: self.hash,
distance_to_initial_bucket: self.distance_to_initial_bucket,
key: MaybeUninit::uninit(),
value: MaybeUninit::uninit(),
}
}
}
}

View file

@ -0,0 +1,227 @@
use core::{alloc::Allocator, borrow::Borrow, mem};
use alloc::{alloc::Global, vec::Vec};
use crate::{node::Node, number_before_resize, ClonableAllocator, HashType};
#[derive(Clone)]
pub(crate) struct NodeStorage<K, V, ALLOCATOR: Allocator = Global> {
nodes: Vec<Node<K, V>, ALLOCATOR>,
max_distance_to_initial_bucket: i32,
number_of_items: usize,
max_number_before_resize: usize,
}
impl<K, V, ALLOCATOR: ClonableAllocator> NodeStorage<K, V, ALLOCATOR> {
pub(crate) fn with_size_in(capacity: usize, alloc: ALLOCATOR) -> Self {
assert!(capacity.is_power_of_two(), "Capacity must be a power of 2");
let mut nodes = Vec::with_capacity_in(capacity, alloc);
for _ in 0..capacity {
nodes.push(Node::new());
}
Self {
nodes,
max_distance_to_initial_bucket: 0,
number_of_items: 0,
max_number_before_resize: number_before_resize(capacity),
}
}
pub(crate) fn allocator(&self) -> &ALLOCATOR {
self.nodes.allocator()
}
pub(crate) fn capacity(&self) -> usize {
self.max_number_before_resize
}
pub(crate) fn backing_vec_size(&self) -> usize {
self.nodes.len()
}
pub(crate) fn len(&self) -> usize {
self.number_of_items
}
pub(crate) fn insert_new(&mut self, key: K, value: V, hash: HashType) -> usize {
debug_assert!(
self.capacity() > self.len(),
"Do not have space to insert into len {} with {}",
self.backing_vec_size(),
self.len()
);
let mut new_node = Node::new_with(key, value, hash);
let mut inserted_location = usize::MAX;
loop {
let location =
(new_node.hash() + new_node.distance()).fast_mod(self.backing_vec_size());
let current_node = &mut self.nodes[location];
if current_node.has_value() {
if current_node.distance() <= new_node.distance() {
mem::swap(&mut new_node, current_node);
if inserted_location == usize::MAX {
inserted_location = location;
}
}
} else {
self.nodes[location] = new_node;
if inserted_location == usize::MAX {
inserted_location = location;
}
break;
}
new_node.increment_distance();
self.max_distance_to_initial_bucket =
new_node.distance().max(self.max_distance_to_initial_bucket);
}
self.number_of_items += 1;
inserted_location
}
pub(crate) fn retain<F>(&mut self, mut f: F)
where
F: FnMut(&K, &mut V) -> bool,
{
let num_nodes = self.nodes.len();
let mut i = 0;
while i < num_nodes {
let node = &mut self.nodes[i];
if let Some((k, v)) = node.key_value_mut() {
if !f(k, v) {
self.remove_from_location(i);
// Need to continue before adding 1 to i because remove from location could
// put the element which was next into the ith location in the nodes array,
// so we need to check if that one needs removing too.
continue;
}
}
i += 1;
}
}
pub(crate) fn remove_from_location(&mut self, location: usize) -> V {
let mut current_location = location;
self.number_of_items -= 1;
loop {
let next_location =
HashType::from(current_location + 1).fast_mod(self.backing_vec_size());
// if the next node is empty, or the next location has 0 distance to initial bucket then
// we can clear the current node
if !self.nodes[next_location].has_value() || self.nodes[next_location].distance() == 0 {
return self.nodes[current_location].take_key_value().unwrap().1;
}
self.nodes.swap(current_location, next_location);
self.nodes[current_location].decrement_distance();
current_location = next_location;
}
}
pub(crate) fn location<Q>(&self, key: &Q, hash: HashType) -> Option<usize>
where
K: Borrow<Q>,
Q: Eq + ?Sized,
{
for distance_to_initial_bucket in 0..(self.max_distance_to_initial_bucket + 1) {
let location = (hash + distance_to_initial_bucket).fast_mod(self.backing_vec_size());
let node = &self.nodes[location];
// if we've seen a node which is further from home than what we'd expect to find, then
// our node cannot exist because it would've been inserted here.
if node.distance() < distance_to_initial_bucket {
return None;
}
let node_key_ref = node.key_ref()?;
if node_key_ref.borrow() == key {
return Some(location);
}
}
None
}
pub(crate) fn resized_to(&mut self, new_size: usize) -> Self {
let mut new_node_storage = Self::with_size_in(new_size, self.allocator().clone());
for mut node in self.nodes.drain(..) {
if let Some((key, value, hash)) = node.take_key_value() {
new_node_storage.insert_new(key, value, hash);
}
}
new_node_storage
}
pub(crate) unsafe fn replace_at_location_unchecked(
&mut self,
location: usize,
key: K,
value: V,
) -> V {
self.node_at_unchecked_mut(location)
.replace_unchecked(key, value)
.1
}
pub(crate) fn iter_mut(&mut self) -> impl Iterator<Item = &mut Node<K, V>> {
self.nodes.iter_mut()
}
pub(crate) fn node_at(&self, at: usize) -> &Node<K, V> {
&self.nodes[at]
}
pub(crate) fn node_at_mut(&mut self, at: usize) -> &mut Node<K, V> {
&mut self.nodes[at]
}
pub(crate) unsafe fn node_at_unchecked(&self, at: usize) -> &Node<K, V> {
self.nodes.get_unchecked(at)
}
pub(crate) unsafe fn node_at_unchecked_mut(&mut self, at: usize) -> &mut Node<K, V> {
self.nodes.get_unchecked_mut(at)
}
pub(crate) fn distance_histogram(&self) -> (Vec<usize>, usize) {
let mut ret = Vec::new();
for node in self.nodes.iter() {
let distance = node.distance();
if distance >= 0 {
let distance = distance as usize;
ret.resize(ret.len().max(distance + 1), 0);
ret[distance] += 1;
}
}
(ret, self.max_distance_to_initial_bucket as usize)
}
pub(crate) fn clear(&mut self) {
self.max_distance_to_initial_bucket = 0;
self.number_of_items = 0;
self.nodes.fill_with(Node::new);
}
}