Extract node_storage as well

This commit is contained in:
Gwilym Inzani 2023-05-09 20:57:33 +01:00
parent 5e04e8c03f
commit 1cdf23683a
2 changed files with 218 additions and 187 deletions

View file

@ -17,21 +17,22 @@
extern crate alloc; extern crate alloc;
use alloc::{alloc::Global, vec::Vec}; use alloc::alloc::Global;
use core::{ use core::{
alloc::Allocator, alloc::Allocator,
borrow::Borrow, borrow::Borrow,
hash::{BuildHasher, BuildHasherDefault, Hash, Hasher}, hash::{BuildHasher, BuildHasherDefault, Hash, Hasher},
iter::FromIterator, iter::FromIterator,
mem,
ops::Index, ops::Index,
}; };
use rustc_hash::FxHasher; use rustc_hash::FxHasher;
mod node; mod node;
mod node_storage;
use node::Node; use node::Node;
use node_storage::NodeStorage;
type HashType = u32; type HashType = u32;
@ -249,7 +250,7 @@ impl<K, V, ALLOCATOR: ClonableAllocator> HashMap<K, V, ALLOCATOR> {
/// An iterator visiting all key-value pairs in an arbitrary order, with mutable references to the values /// An iterator visiting all key-value pairs in an arbitrary order, with mutable references to the values
pub fn iter_mut(&mut self) -> impl Iterator<Item = (&'_ K, &'_ mut V)> { pub fn iter_mut(&mut self) -> impl Iterator<Item = (&'_ K, &'_ mut V)> {
self.nodes.nodes.iter_mut().filter_map(Node::key_value_mut) self.nodes.iter_mut().filter_map(Node::key_value_mut)
} }
/// Retains only the elements specified by the predicate `f`. /// Retains only the elements specified by the predicate `f`.
@ -285,11 +286,6 @@ impl<K, V> Default for HashMap<K, V> {
} }
} }
const fn fast_mod(len: usize, hash: HashType) -> usize {
debug_assert!(len.is_power_of_two(), "Length must be a power of 2");
(hash as usize) & (len - 1)
}
impl<K, V, ALLOCATOR: ClonableAllocator> HashMap<K, V, ALLOCATOR> impl<K, V, ALLOCATOR: ClonableAllocator> HashMap<K, V, ALLOCATOR>
where where
K: Eq + Hash, K: Eq + Hash,
@ -331,7 +327,7 @@ where
self.nodes.insert_new(key, value, hash) self.nodes.insert_new(key, value, hash)
}; };
self.nodes.nodes[location].value_mut().unwrap() self.nodes.node_at_mut(location).value_mut().unwrap()
} }
/// Returns `true` if the map contains a value for the specified key. /// Returns `true` if the map contains a value for the specified key.
@ -354,7 +350,7 @@ where
self.nodes self.nodes
.location(key, hash) .location(key, hash)
.and_then(|location| self.nodes.nodes[location].key_value_ref()) .and_then(|location| self.nodes.node_at(location).key_value_ref())
} }
/// Returns a reference to the value corresponding to the key. Returns [`None`] if there is /// Returns a reference to the value corresponding to the key. Returns [`None`] if there is
@ -401,7 +397,7 @@ where
let hash = self.hash(key); let hash = self.hash(key);
if let Some(location) = self.nodes.location(key, hash) { if let Some(location) = self.nodes.location(key, hash) {
self.nodes.nodes[location].value_mut() self.nodes.node_at_mut(location).value_mut()
} else { } else {
None None
} }
@ -466,7 +462,7 @@ impl<'a, K, V, ALLOCATOR: ClonableAllocator> Iterator for Iter<'a, K, V, ALLOCAT
return None; return None;
} }
let node = &self.map.nodes.nodes[self.at]; let node = &self.map.nodes.node_at(self.at);
self.at += 1; self.at += 1;
if node.has_value() { if node.has_value() {
@ -516,7 +512,7 @@ impl<K, V, ALLOCATOR: ClonableAllocator> Iterator for IterOwned<K, V, ALLOCATOR>
return None; return None;
} }
let maybe_kv = self.map.nodes.nodes[self.at].take_key_value(); let maybe_kv = self.map.nodes.node_at_mut(self.at).take_key_value();
self.at += 1; self.at += 1;
if let Some((k, v, _)) = maybe_kv { if let Some((k, v, _)) = maybe_kv {
@ -572,7 +568,7 @@ impl<'a, K: 'a, V: 'a, ALLOCATOR: ClonableAllocator> OccupiedEntry<'a, K, V, ALL
/// Gets a reference to the value in the entry. /// Gets a reference to the value in the entry.
pub fn get(&self) -> &V { pub fn get(&self) -> &V {
self.map.nodes.nodes[self.location].value_ref().unwrap() self.map.nodes.node_at(self.location).value_ref().unwrap()
} }
/// Gets a mutable reference to the value in the entry. /// Gets a mutable reference to the value in the entry.
@ -582,7 +578,11 @@ impl<'a, K: 'a, V: 'a, ALLOCATOR: ClonableAllocator> OccupiedEntry<'a, K, V, ALL
/// ///
/// [`into_mut`]: Self::into_mut /// [`into_mut`]: Self::into_mut
pub fn get_mut(&mut self) -> &mut V { pub fn get_mut(&mut self) -> &mut V {
self.map.nodes.nodes[self.location].value_mut().unwrap() self.map
.nodes
.node_at_mut(self.location)
.value_mut()
.unwrap()
} }
/// Converts the `OccupiedEntry` into a mutable reference to the value in the entry with /// Converts the `OccupiedEntry` into a mutable reference to the value in the entry with
@ -592,12 +592,19 @@ impl<'a, K: 'a, V: 'a, ALLOCATOR: ClonableAllocator> OccupiedEntry<'a, K, V, ALL
/// ///
/// [`get_mut`]: Self::get_mut /// [`get_mut`]: Self::get_mut
pub fn into_mut(self) -> &'a mut V { pub fn into_mut(self) -> &'a mut V {
self.map.nodes.nodes[self.location].value_mut().unwrap() self.map
.nodes
.node_at_mut(self.location)
.value_mut()
.unwrap()
} }
/// Sets the value of the entry and returns the entry's old value. /// Sets the value of the entry and returns the entry's old value.
pub fn insert(&mut self, value: V) -> V { pub fn insert(&mut self, value: V) -> V {
self.map.nodes.nodes[self.location].replace_value(value) self.map
.nodes
.node_at_mut(self.location)
.replace_value(value)
} }
/// Takes the value out of the entry and returns it. /// Takes the value out of the entry and returns it.
@ -783,180 +790,12 @@ const fn number_before_resize(capacity: usize) -> usize {
capacity * 85 / 100 capacity * 85 / 100
} }
struct NodeStorage<K, V, ALLOCATOR: Allocator = Global> {
nodes: Vec<Node<K, V>, ALLOCATOR>,
max_distance_to_initial_bucket: i32,
number_of_items: usize,
max_number_before_resize: usize,
}
impl<K, V, ALLOCATOR: ClonableAllocator> NodeStorage<K, V, ALLOCATOR> {
fn with_size_in(capacity: usize, alloc: ALLOCATOR) -> Self {
assert!(capacity.is_power_of_two(), "Capacity must be a power of 2");
let mut nodes = Vec::with_capacity_in(capacity, alloc);
for _ in 0..capacity {
nodes.push(Default::default());
}
Self {
nodes,
max_distance_to_initial_bucket: 0,
number_of_items: 0,
max_number_before_resize: number_before_resize(capacity),
}
}
fn allocator(&self) -> &ALLOCATOR {
self.nodes.allocator()
}
fn capacity(&self) -> usize {
self.max_number_before_resize
}
fn backing_vec_size(&self) -> usize {
self.nodes.len()
}
fn len(&self) -> usize {
self.number_of_items
}
fn insert_new(&mut self, key: K, value: V, hash: HashType) -> usize {
debug_assert!(
self.capacity() > self.len(),
"Do not have space to insert into len {} with {}",
self.backing_vec_size(),
self.len()
);
let mut new_node = Node::new_with(key, value, hash);
let mut inserted_location = usize::MAX;
loop {
let location = fast_mod(
self.backing_vec_size(),
new_node.hash() + new_node.distance() as HashType,
);
let current_node = &mut self.nodes[location];
if current_node.has_value() {
if current_node.distance() <= new_node.distance() {
mem::swap(&mut new_node, current_node);
if inserted_location == usize::MAX {
inserted_location = location;
}
}
} else {
self.nodes[location] = new_node;
if inserted_location == usize::MAX {
inserted_location = location;
}
break;
}
new_node.increment_distance();
self.max_distance_to_initial_bucket =
new_node.distance().max(self.max_distance_to_initial_bucket);
}
self.number_of_items += 1;
inserted_location
}
fn retain<F>(&mut self, mut f: F)
where
F: FnMut(&K, &mut V) -> bool,
{
let num_nodes = self.nodes.len();
let mut i = 0;
while i < num_nodes {
let node = &mut self.nodes[i];
if let Some((k, v)) = node.key_value_mut() {
if !f(k, v) {
self.remove_from_location(i);
// Need to continue before adding 1 to i because remove from location could
// put the element which was next into the ith location in the nodes array,
// so we need to check if that one needs removing too.
continue;
}
}
i += 1;
}
}
fn remove_from_location(&mut self, location: usize) -> V {
let mut current_location = location;
self.number_of_items -= 1;
loop {
let next_location =
fast_mod(self.backing_vec_size(), (current_location + 1) as HashType);
// if the next node is empty, or the next location has 0 distance to initial bucket then
// we can clear the current node
if !self.nodes[next_location].has_value() || self.nodes[next_location].distance() == 0 {
return self.nodes[current_location].take_key_value().unwrap().1;
}
self.nodes.swap(current_location, next_location);
self.nodes[current_location].decrement_distance();
current_location = next_location;
}
}
fn location<Q>(&self, key: &Q, hash: HashType) -> Option<usize>
where
K: Borrow<Q>,
Q: Eq + ?Sized,
{
for distance_to_initial_bucket in 0..(self.max_distance_to_initial_bucket + 1) {
let location = fast_mod(
self.nodes.len(),
hash + distance_to_initial_bucket as HashType,
);
let node = &self.nodes[location];
if let Some(node_key_ref) = node.key_ref() {
if node_key_ref.borrow() == key {
return Some(location);
}
} else {
return None;
}
}
None
}
fn resized_to(&mut self, new_size: usize) -> Self {
let mut new_node_storage = Self::with_size_in(new_size, self.allocator().clone());
for mut node in self.nodes.drain(..) {
if let Some((key, value, hash)) = node.take_key_value() {
new_node_storage.insert_new(key, value, hash);
}
}
new_node_storage
}
fn replace_at_location(&mut self, location: usize, key: K, value: V) -> V {
self.nodes[location].replace(key, value).1
}
}
#[cfg(test)] #[cfg(test)]
mod test { mod test {
use core::cell::RefCell; use core::cell::RefCell;
use alloc::vec::Vec;
use super::*; use super::*;
#[test] #[test]

View file

@ -0,0 +1,192 @@
use core::{alloc::Allocator, borrow::Borrow, mem};
use alloc::{alloc::Global, vec::Vec};
use crate::{node::Node, number_before_resize, ClonableAllocator, HashType};
pub(crate) struct NodeStorage<K, V, ALLOCATOR: Allocator = Global> {
nodes: Vec<Node<K, V>, ALLOCATOR>,
max_distance_to_initial_bucket: i32,
number_of_items: usize,
max_number_before_resize: usize,
}
impl<K, V, ALLOCATOR: ClonableAllocator> NodeStorage<K, V, ALLOCATOR> {
pub(crate) fn with_size_in(capacity: usize, alloc: ALLOCATOR) -> Self {
assert!(capacity.is_power_of_two(), "Capacity must be a power of 2");
let mut nodes = Vec::with_capacity_in(capacity, alloc);
for _ in 0..capacity {
nodes.push(Default::default());
}
Self {
nodes,
max_distance_to_initial_bucket: 0,
number_of_items: 0,
max_number_before_resize: number_before_resize(capacity),
}
}
pub(crate) fn allocator(&self) -> &ALLOCATOR {
self.nodes.allocator()
}
pub(crate) fn capacity(&self) -> usize {
self.max_number_before_resize
}
pub(crate) fn backing_vec_size(&self) -> usize {
self.nodes.len()
}
pub(crate) fn len(&self) -> usize {
self.number_of_items
}
pub(crate) fn insert_new(&mut self, key: K, value: V, hash: HashType) -> usize {
debug_assert!(
self.capacity() > self.len(),
"Do not have space to insert into len {} with {}",
self.backing_vec_size(),
self.len()
);
let mut new_node = Node::new_with(key, value, hash);
let mut inserted_location = usize::MAX;
loop {
let location = fast_mod(
self.backing_vec_size(),
new_node.hash() + new_node.distance() as HashType,
);
let current_node = &mut self.nodes[location];
if current_node.has_value() {
if current_node.distance() <= new_node.distance() {
mem::swap(&mut new_node, current_node);
if inserted_location == usize::MAX {
inserted_location = location;
}
}
} else {
self.nodes[location] = new_node;
if inserted_location == usize::MAX {
inserted_location = location;
}
break;
}
new_node.increment_distance();
self.max_distance_to_initial_bucket =
new_node.distance().max(self.max_distance_to_initial_bucket);
}
self.number_of_items += 1;
inserted_location
}
pub(crate) fn retain<F>(&mut self, mut f: F)
where
F: FnMut(&K, &mut V) -> bool,
{
let num_nodes = self.nodes.len();
let mut i = 0;
while i < num_nodes {
let node = &mut self.nodes[i];
if let Some((k, v)) = node.key_value_mut() {
if !f(k, v) {
self.remove_from_location(i);
// Need to continue before adding 1 to i because remove from location could
// put the element which was next into the ith location in the nodes array,
// so we need to check if that one needs removing too.
continue;
}
}
i += 1;
}
}
pub(crate) fn remove_from_location(&mut self, location: usize) -> V {
let mut current_location = location;
self.number_of_items -= 1;
loop {
let next_location =
fast_mod(self.backing_vec_size(), (current_location + 1) as HashType);
// if the next node is empty, or the next location has 0 distance to initial bucket then
// we can clear the current node
if !self.nodes[next_location].has_value() || self.nodes[next_location].distance() == 0 {
return self.nodes[current_location].take_key_value().unwrap().1;
}
self.nodes.swap(current_location, next_location);
self.nodes[current_location].decrement_distance();
current_location = next_location;
}
}
pub(crate) fn location<Q>(&self, key: &Q, hash: HashType) -> Option<usize>
where
K: Borrow<Q>,
Q: Eq + ?Sized,
{
for distance_to_initial_bucket in 0..(self.max_distance_to_initial_bucket + 1) {
let location = fast_mod(
self.nodes.len(),
hash + distance_to_initial_bucket as HashType,
);
let node = &self.nodes[location];
if let Some(node_key_ref) = node.key_ref() {
if node_key_ref.borrow() == key {
return Some(location);
}
} else {
return None;
}
}
None
}
pub(crate) fn resized_to(&mut self, new_size: usize) -> Self {
let mut new_node_storage = Self::with_size_in(new_size, self.allocator().clone());
for mut node in self.nodes.drain(..) {
if let Some((key, value, hash)) = node.take_key_value() {
new_node_storage.insert_new(key, value, hash);
}
}
new_node_storage
}
pub(crate) fn replace_at_location(&mut self, location: usize, key: K, value: V) -> V {
self.nodes[location].replace(key, value).1
}
pub(crate) fn iter_mut(&mut self) -> impl Iterator<Item = &mut Node<K, V>> {
self.nodes.iter_mut()
}
pub(crate) fn node_at(&self, at: usize) -> &Node<K, V> {
&self.nodes[at]
}
pub(crate) fn node_at_mut(&mut self, at: usize) -> &mut Node<K, V> {
&mut self.nodes[at]
}
}
const fn fast_mod(len: usize, hash: HashType) -> usize {
debug_assert!(len.is_power_of_two(), "Length must be a power of 2");
(hash as usize) & (len - 1)
}