mirror of
https://github.com/italicsjenga/valence.git
synced 2025-01-11 07:11:30 +11:00
VarInt/VarLong Encode and Decode Optimizations (#227)
Aims to improve upon VarInt/VarLong Encode and Decode procedures (#208). # Baselines: ## VarInt: ![VarInt::decode](https://user-images.githubusercontent.com/60993440/218295740-c7507993-1c1f-4d71-a42b-4ba9f1437bd7.png) ![VarInt::encode](https://user-images.githubusercontent.com/60993440/218295804-3cda044a-e2e4-4109-83ce-6e0a135d467b.png) ## VarLong ![VarLong::encode](https://user-images.githubusercontent.com/60993440/218295958-e445c2ea-ec2e-422f-92a4-d53bf41c8ec4.png) ![VarLong::decode](https://user-images.githubusercontent.com/60993440/218296047-802cec4a-b69b-435b-a140-0d9bf5df49a8.png)
This commit is contained in:
parent
e1a3e2dc00
commit
c494b83a56
|
@ -10,14 +10,14 @@ use valence_protocol::packets::s2c::play::{
|
|||
use valence_protocol::text::Color;
|
||||
use valence_protocol::{
|
||||
encode_packet, encode_packet_compressed, ByteAngle, Decode, Encode, ItemKind,
|
||||
LengthPrefixedArray, PacketDecoder, PacketEncoder, TextFormat, VarInt,
|
||||
LengthPrefixedArray, PacketDecoder, PacketEncoder, TextFormat, VarInt, VarLong,
|
||||
};
|
||||
|
||||
criterion_group! {
|
||||
name = benches;
|
||||
config = Criterion::default()
|
||||
.measurement_time(Duration::from_secs(5)).confidence_level(0.99);
|
||||
targets = blocks, packets, var_int, decode_array
|
||||
targets = blocks, packets, var_int, var_long, decode_array
|
||||
}
|
||||
criterion_main!(benches);
|
||||
|
||||
|
@ -358,6 +358,36 @@ fn var_int(c: &mut Criterion) {
|
|||
});
|
||||
}
|
||||
|
||||
fn var_long(c: &mut Criterion) {
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
c.bench_function("VarLong::encode", |b| {
|
||||
b.iter_with_setup(
|
||||
|| rng.gen(),
|
||||
|i| {
|
||||
let i: i64 = black_box(i);
|
||||
|
||||
let mut buf = [0; VarLong::MAX_SIZE];
|
||||
let _ = black_box(VarLong(i).encode(buf.as_mut_slice()));
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
c.bench_function("VarLong::decode", |b| {
|
||||
b.iter_with_setup(
|
||||
|| {
|
||||
let mut buf = [0; VarLong::MAX_SIZE];
|
||||
VarLong(rng.gen()).encode(buf.as_mut_slice()).unwrap();
|
||||
buf
|
||||
},
|
||||
|buf| {
|
||||
let mut r = black_box(buf.as_slice());
|
||||
let _ = black_box(VarLong::decode(&mut r));
|
||||
},
|
||||
)
|
||||
});
|
||||
}
|
||||
|
||||
fn decode_array(c: &mut Criterion) {
|
||||
let floats = [123.0, 456.0, 789.0];
|
||||
let mut buf = [0u8; 24];
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use std::io::{Read, Write};
|
||||
|
||||
use anyhow::bail;
|
||||
use byteorder::{ReadBytesExt, WriteBytesExt};
|
||||
use byteorder::ReadBytesExt;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::{Decode, Encode};
|
||||
|
@ -48,16 +48,31 @@ pub enum VarIntDecodeError {
|
|||
}
|
||||
|
||||
impl Encode for VarInt {
|
||||
// Adapted from VarInt-Simd encode
|
||||
// https://github.com/as-com/varint-simd/blob/0f468783da8e181929b01b9c6e9f741c1fe09825/src/encode/mod.rs#L71
|
||||
fn encode(&self, mut w: impl Write) -> anyhow::Result<()> {
|
||||
let mut val = self.0 as u32;
|
||||
loop {
|
||||
if val & 0b11111111111111111111111110000000 == 0 {
|
||||
w.write_u8(val as u8)?;
|
||||
return Ok(());
|
||||
}
|
||||
w.write_u8(val as u8 & 0b01111111 | 0b10000000)?;
|
||||
val >>= 7;
|
||||
}
|
||||
let x = self.0 as u64;
|
||||
let stage1 = (x & 0x000000000000007f)
|
||||
| ((x & 0x0000000000003f80) << 1)
|
||||
| ((x & 0x00000000001fc000) << 2)
|
||||
| ((x & 0x000000000fe00000) << 3)
|
||||
| ((x & 0x00000000f0000000) << 4);
|
||||
|
||||
let leading = stage1.leading_zeros();
|
||||
|
||||
let unused_bytes = (leading - 1) >> 3;
|
||||
let bytes_needed = 8 - unused_bytes;
|
||||
|
||||
// set all but the last MSBs
|
||||
let msbs = 0x8080808080808080;
|
||||
let msbmask = 0xffffffffffffffff >> (((8 - bytes_needed + 1) << 3) - 1);
|
||||
|
||||
let merged = stage1 | (msbs & msbmask);
|
||||
let bytes = merged.to_le_bytes();
|
||||
|
||||
w.write_all(unsafe { bytes.get_unchecked(..bytes_needed as usize) })?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
use std::io::Write;
|
||||
|
||||
use anyhow::bail;
|
||||
use byteorder::{ReadBytesExt, WriteBytesExt};
|
||||
use byteorder::ReadBytesExt;
|
||||
|
||||
use crate::{Decode, Encode, Result};
|
||||
|
||||
|
@ -26,7 +26,64 @@ impl VarLong {
|
|||
}
|
||||
|
||||
impl Encode for VarLong {
|
||||
// Adapted from VarInt-Simd encode
|
||||
// https://github.com/as-com/varint-simd/blob/0f468783da8e181929b01b9c6e9f741c1fe09825/src/encode/mod.rs#L71
|
||||
#[cfg(all(
|
||||
any(target_arch = "x86", target_arch = "x86_64"),
|
||||
not(target_os = "macos")
|
||||
))]
|
||||
fn encode(&self, mut w: impl Write) -> Result<()> {
|
||||
#[cfg(target_arch = "x86")]
|
||||
use std::arch::x86::*;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use std::arch::x86_64::*;
|
||||
|
||||
// Break the number into 7-bit parts and spread them out into a vector
|
||||
let mut res = [0u64; 2];
|
||||
{
|
||||
let x = self.0 as u64;
|
||||
|
||||
res[0] = unsafe { _pdep_u64(x, 0x7f7f7f7f7f7f7f7f) };
|
||||
res[1] = unsafe { _pdep_u64(x >> 56, 0x000000000000017f) };
|
||||
}
|
||||
let stage1: __m128i = unsafe { std::mem::transmute(res) };
|
||||
|
||||
// Create a mask for where there exist values
|
||||
// This signed comparison works because all MSBs should be cleared at this point
|
||||
// Also handle the special case when num == 0
|
||||
let minimum =
|
||||
unsafe { _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffu8 as i8) };
|
||||
let exists = unsafe { _mm_or_si128(_mm_cmpgt_epi8(stage1, _mm_setzero_si128()), minimum) };
|
||||
let bits = unsafe { _mm_movemask_epi8(exists) };
|
||||
|
||||
// Count the number of bytes used
|
||||
let bytes_needed = 32 - bits.leading_zeros() as u8; // lzcnt on supported CPUs
|
||||
|
||||
// Fill that many bytes into a vector
|
||||
let ascend = unsafe { _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) };
|
||||
let mask = unsafe { _mm_cmplt_epi8(ascend, _mm_set1_epi8(bytes_needed as i8)) };
|
||||
|
||||
// Shift it down 1 byte so the last MSB is the only one set, and make sure only
|
||||
// the MSB is set
|
||||
let shift = unsafe { _mm_bsrli_si128(mask, 1) };
|
||||
let msbmask = unsafe { _mm_and_si128(shift, _mm_set1_epi8(128u8 as i8)) };
|
||||
|
||||
// Merge the MSB bits into the vector
|
||||
let merged = unsafe { _mm_or_si128(stage1, msbmask) };
|
||||
let bytes = unsafe { std::mem::transmute::<__m128i, [u8; 16]>(merged) };
|
||||
|
||||
w.write_all(unsafe { bytes.get_unchecked(..bytes_needed as usize) })?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(any(
|
||||
not(any(target_arch = "x86", target_arch = "x86_64")),
|
||||
target_os = "macos"
|
||||
))]
|
||||
fn encode(&self, mut w: impl Write) -> Result<()> {
|
||||
use byteorder::WriteBytesExt;
|
||||
|
||||
let mut val = self.0 as u64;
|
||||
loop {
|
||||
if val & 0b1111111111111111111111111111111111111111111111111111111110000000 == 0 {
|
||||
|
|
Loading…
Reference in a new issue