Merge pull request #201 from gwilymk/speed-up-hashmap-massively

Remove the division on every insert operation
This commit is contained in:
Gwilym Kuiper 2022-03-24 19:01:17 +00:00 committed by GitHub
commit c623d8b708
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -106,7 +106,7 @@ impl<K, V> HashMap<K, V> {
pub fn with_capacity(capacity: usize) -> Self { pub fn with_capacity(capacity: usize) -> Self {
for i in 0..32 { for i in 0..32 {
let attempted_size = 1usize << i; let attempted_size = 1usize << i;
if attempted_size * 85 / 100 > capacity { if number_before_resize(attempted_size) > capacity {
return Self::with_size(attempted_size); return Self::with_size(attempted_size);
} }
} }
@ -124,7 +124,7 @@ impl<K, V> HashMap<K, V> {
/// Returns the number of elements the map can hold /// Returns the number of elements the map can hold
pub fn capacity(&self) -> usize { pub fn capacity(&self) -> usize {
self.nodes.capacity() * 85 / 100 self.nodes.capacity()
} }
/// An iterator visiting all keys in an arbitrary order /// An iterator visiting all keys in an arbitrary order
@ -144,7 +144,7 @@ impl<K, V> HashMap<K, V> {
/// Removes all elements from the map /// Removes all elements from the map
pub fn clear(&mut self) { pub fn clear(&mut self) {
self.nodes = NodeStorage::with_size(self.capacity()); self.nodes = NodeStorage::with_size(self.nodes.backing_vec_size());
} }
/// An iterator visiting all key-value pairs in an arbitrary order /// An iterator visiting all key-value pairs in an arbitrary order
@ -170,10 +170,10 @@ impl<K, V> HashMap<K, V> {
fn resize(&mut self, new_size: usize) { fn resize(&mut self, new_size: usize) {
assert!( assert!(
new_size >= self.nodes.capacity(), new_size >= self.nodes.backing_vec_size(),
"Can only increase the size of a hash map" "Can only increase the size of a hash map"
); );
if new_size == self.nodes.capacity() { if new_size == self.nodes.backing_vec_size() {
return; return;
} }
@ -209,8 +209,8 @@ where
if let Some(location) = self.nodes.location(&key, hash) { if let Some(location) = self.nodes.location(&key, hash) {
Some(self.nodes.replace_at_location(location, key, value)) Some(self.nodes.replace_at_location(location, key, value))
} else { } else {
if self.nodes.capacity() * 85 / 100 <= self.len() { if self.nodes.capacity() <= self.len() {
self.resize(self.nodes.capacity() * 2); self.resize(self.nodes.backing_vec_size() * 2);
} }
self.nodes.insert_new(key, value, hash); self.nodes.insert_new(key, value, hash);
@ -226,8 +226,8 @@ where
self.nodes.replace_at_location(location, key, value); self.nodes.replace_at_location(location, key, value);
location location
} else { } else {
if self.nodes.capacity() * 85 / 100 <= self.len() { if self.nodes.capacity() <= self.len() {
self.resize(self.nodes.capacity() * 2); self.resize(self.nodes.backing_vec_size() * 2);
} }
self.nodes.insert_new(key, value, hash) self.nodes.insert_new(key, value, hash)
@ -301,7 +301,7 @@ impl<'a, K, V> Iterator for Iter<'a, K, V> {
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
loop { loop {
if self.at >= self.map.nodes.capacity() { if self.at >= self.map.nodes.backing_vec_size() {
return None; return None;
} }
@ -562,11 +562,16 @@ where
} }
} }
const fn number_before_resize(capacity: usize) -> usize {
capacity * 85 / 100
}
struct NodeStorage<K, V> { struct NodeStorage<K, V> {
nodes: Vec<Node<K, V>>, nodes: Vec<Node<K, V>>,
max_distance_to_initial_bucket: i32, max_distance_to_initial_bucket: i32,
number_of_items: usize, number_of_items: usize,
max_number_before_resize: usize,
} }
impl<K, V> NodeStorage<K, V> { impl<K, V> NodeStorage<K, V> {
@ -577,10 +582,15 @@ impl<K, V> NodeStorage<K, V> {
nodes: iter::repeat_with(Default::default).take(capacity).collect(), nodes: iter::repeat_with(Default::default).take(capacity).collect(),
max_distance_to_initial_bucket: 0, max_distance_to_initial_bucket: 0,
number_of_items: 0, number_of_items: 0,
max_number_before_resize: number_before_resize(capacity),
} }
} }
fn capacity(&self) -> usize { fn capacity(&self) -> usize {
self.max_number_before_resize
}
fn backing_vec_size(&self) -> usize {
self.nodes.len() self.nodes.len()
} }
@ -590,9 +600,9 @@ impl<K, V> NodeStorage<K, V> {
fn insert_new(&mut self, key: K, value: V, hash: HashType) -> usize { fn insert_new(&mut self, key: K, value: V, hash: HashType) -> usize {
debug_assert!( debug_assert!(
self.capacity() * 85 / 100 > self.len(), self.capacity() > self.len(),
"Do not have space to insert into len {} with {}", "Do not have space to insert into len {} with {}",
self.capacity(), self.backing_vec_size(),
self.len() self.len()
); );
@ -601,7 +611,7 @@ impl<K, V> NodeStorage<K, V> {
loop { loop {
let location = fast_mod( let location = fast_mod(
self.capacity(), self.backing_vec_size(),
new_node.hash + new_node.distance() as HashType, new_node.hash + new_node.distance() as HashType,
); );
let current_node = &mut self.nodes[location]; let current_node = &mut self.nodes[location];
@ -636,7 +646,8 @@ impl<K, V> NodeStorage<K, V> {
self.number_of_items -= 1; self.number_of_items -= 1;
loop { loop {
let next_location = fast_mod(self.capacity(), (current_location + 1) as HashType); let next_location =
fast_mod(self.backing_vec_size(), (current_location + 1) as HashType);
// if the next node is empty, or the next location has 0 distance to initial bucket then // if the next node is empty, or the next location has 0 distance to initial bucket then
// we can clear the current node // we can clear the current node