mirror of
https://github.com/italicsjenga/valence.git
synced 2025-01-26 05:26:34 +11:00
VarInt/VarLong Encode and Decode Optimizations (#227)
Aims to improve upon VarInt/VarLong Encode and Decode procedures (#208). # Baselines: ## VarInt: ![VarInt::decode](https://user-images.githubusercontent.com/60993440/218295740-c7507993-1c1f-4d71-a42b-4ba9f1437bd7.png) ![VarInt::encode](https://user-images.githubusercontent.com/60993440/218295804-3cda044a-e2e4-4109-83ce-6e0a135d467b.png) ## VarLong ![VarLong::encode](https://user-images.githubusercontent.com/60993440/218295958-e445c2ea-ec2e-422f-92a4-d53bf41c8ec4.png) ![VarLong::decode](https://user-images.githubusercontent.com/60993440/218296047-802cec4a-b69b-435b-a140-0d9bf5df49a8.png)
This commit is contained in:
parent
e1a3e2dc00
commit
c494b83a56
3 changed files with 115 additions and 13 deletions
|
@ -10,14 +10,14 @@ use valence_protocol::packets::s2c::play::{
|
||||||
use valence_protocol::text::Color;
|
use valence_protocol::text::Color;
|
||||||
use valence_protocol::{
|
use valence_protocol::{
|
||||||
encode_packet, encode_packet_compressed, ByteAngle, Decode, Encode, ItemKind,
|
encode_packet, encode_packet_compressed, ByteAngle, Decode, Encode, ItemKind,
|
||||||
LengthPrefixedArray, PacketDecoder, PacketEncoder, TextFormat, VarInt,
|
LengthPrefixedArray, PacketDecoder, PacketEncoder, TextFormat, VarInt, VarLong,
|
||||||
};
|
};
|
||||||
|
|
||||||
criterion_group! {
|
criterion_group! {
|
||||||
name = benches;
|
name = benches;
|
||||||
config = Criterion::default()
|
config = Criterion::default()
|
||||||
.measurement_time(Duration::from_secs(5)).confidence_level(0.99);
|
.measurement_time(Duration::from_secs(5)).confidence_level(0.99);
|
||||||
targets = blocks, packets, var_int, decode_array
|
targets = blocks, packets, var_int, var_long, decode_array
|
||||||
}
|
}
|
||||||
criterion_main!(benches);
|
criterion_main!(benches);
|
||||||
|
|
||||||
|
@ -358,6 +358,36 @@ fn var_int(c: &mut Criterion) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn var_long(c: &mut Criterion) {
|
||||||
|
let mut rng = rand::thread_rng();
|
||||||
|
|
||||||
|
c.bench_function("VarLong::encode", |b| {
|
||||||
|
b.iter_with_setup(
|
||||||
|
|| rng.gen(),
|
||||||
|
|i| {
|
||||||
|
let i: i64 = black_box(i);
|
||||||
|
|
||||||
|
let mut buf = [0; VarLong::MAX_SIZE];
|
||||||
|
let _ = black_box(VarLong(i).encode(buf.as_mut_slice()));
|
||||||
|
},
|
||||||
|
);
|
||||||
|
});
|
||||||
|
|
||||||
|
c.bench_function("VarLong::decode", |b| {
|
||||||
|
b.iter_with_setup(
|
||||||
|
|| {
|
||||||
|
let mut buf = [0; VarLong::MAX_SIZE];
|
||||||
|
VarLong(rng.gen()).encode(buf.as_mut_slice()).unwrap();
|
||||||
|
buf
|
||||||
|
},
|
||||||
|
|buf| {
|
||||||
|
let mut r = black_box(buf.as_slice());
|
||||||
|
let _ = black_box(VarLong::decode(&mut r));
|
||||||
|
},
|
||||||
|
)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
fn decode_array(c: &mut Criterion) {
|
fn decode_array(c: &mut Criterion) {
|
||||||
let floats = [123.0, 456.0, 789.0];
|
let floats = [123.0, 456.0, 789.0];
|
||||||
let mut buf = [0u8; 24];
|
let mut buf = [0u8; 24];
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use std::io::{Read, Write};
|
use std::io::{Read, Write};
|
||||||
|
|
||||||
use anyhow::bail;
|
use anyhow::bail;
|
||||||
use byteorder::{ReadBytesExt, WriteBytesExt};
|
use byteorder::ReadBytesExt;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
use crate::{Decode, Encode};
|
use crate::{Decode, Encode};
|
||||||
|
@ -48,16 +48,31 @@ pub enum VarIntDecodeError {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Encode for VarInt {
|
impl Encode for VarInt {
|
||||||
|
// Adapted from VarInt-Simd encode
|
||||||
|
// https://github.com/as-com/varint-simd/blob/0f468783da8e181929b01b9c6e9f741c1fe09825/src/encode/mod.rs#L71
|
||||||
fn encode(&self, mut w: impl Write) -> anyhow::Result<()> {
|
fn encode(&self, mut w: impl Write) -> anyhow::Result<()> {
|
||||||
let mut val = self.0 as u32;
|
let x = self.0 as u64;
|
||||||
loop {
|
let stage1 = (x & 0x000000000000007f)
|
||||||
if val & 0b11111111111111111111111110000000 == 0 {
|
| ((x & 0x0000000000003f80) << 1)
|
||||||
w.write_u8(val as u8)?;
|
| ((x & 0x00000000001fc000) << 2)
|
||||||
return Ok(());
|
| ((x & 0x000000000fe00000) << 3)
|
||||||
}
|
| ((x & 0x00000000f0000000) << 4);
|
||||||
w.write_u8(val as u8 & 0b01111111 | 0b10000000)?;
|
|
||||||
val >>= 7;
|
let leading = stage1.leading_zeros();
|
||||||
}
|
|
||||||
|
let unused_bytes = (leading - 1) >> 3;
|
||||||
|
let bytes_needed = 8 - unused_bytes;
|
||||||
|
|
||||||
|
// set all but the last MSBs
|
||||||
|
let msbs = 0x8080808080808080;
|
||||||
|
let msbmask = 0xffffffffffffffff >> (((8 - bytes_needed + 1) << 3) - 1);
|
||||||
|
|
||||||
|
let merged = stage1 | (msbs & msbmask);
|
||||||
|
let bytes = merged.to_le_bytes();
|
||||||
|
|
||||||
|
w.write_all(unsafe { bytes.get_unchecked(..bytes_needed as usize) })?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
|
|
||||||
use anyhow::bail;
|
use anyhow::bail;
|
||||||
use byteorder::{ReadBytesExt, WriteBytesExt};
|
use byteorder::ReadBytesExt;
|
||||||
|
|
||||||
use crate::{Decode, Encode, Result};
|
use crate::{Decode, Encode, Result};
|
||||||
|
|
||||||
|
@ -26,7 +26,64 @@ impl VarLong {
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Encode for VarLong {
|
impl Encode for VarLong {
|
||||||
|
// Adapted from VarInt-Simd encode
|
||||||
|
// https://github.com/as-com/varint-simd/blob/0f468783da8e181929b01b9c6e9f741c1fe09825/src/encode/mod.rs#L71
|
||||||
|
#[cfg(all(
|
||||||
|
any(target_arch = "x86", target_arch = "x86_64"),
|
||||||
|
not(target_os = "macos")
|
||||||
|
))]
|
||||||
fn encode(&self, mut w: impl Write) -> Result<()> {
|
fn encode(&self, mut w: impl Write) -> Result<()> {
|
||||||
|
#[cfg(target_arch = "x86")]
|
||||||
|
use std::arch::x86::*;
|
||||||
|
#[cfg(target_arch = "x86_64")]
|
||||||
|
use std::arch::x86_64::*;
|
||||||
|
|
||||||
|
// Break the number into 7-bit parts and spread them out into a vector
|
||||||
|
let mut res = [0u64; 2];
|
||||||
|
{
|
||||||
|
let x = self.0 as u64;
|
||||||
|
|
||||||
|
res[0] = unsafe { _pdep_u64(x, 0x7f7f7f7f7f7f7f7f) };
|
||||||
|
res[1] = unsafe { _pdep_u64(x >> 56, 0x000000000000017f) };
|
||||||
|
}
|
||||||
|
let stage1: __m128i = unsafe { std::mem::transmute(res) };
|
||||||
|
|
||||||
|
// Create a mask for where there exist values
|
||||||
|
// This signed comparison works because all MSBs should be cleared at this point
|
||||||
|
// Also handle the special case when num == 0
|
||||||
|
let minimum =
|
||||||
|
unsafe { _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffu8 as i8) };
|
||||||
|
let exists = unsafe { _mm_or_si128(_mm_cmpgt_epi8(stage1, _mm_setzero_si128()), minimum) };
|
||||||
|
let bits = unsafe { _mm_movemask_epi8(exists) };
|
||||||
|
|
||||||
|
// Count the number of bytes used
|
||||||
|
let bytes_needed = 32 - bits.leading_zeros() as u8; // lzcnt on supported CPUs
|
||||||
|
|
||||||
|
// Fill that many bytes into a vector
|
||||||
|
let ascend = unsafe { _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) };
|
||||||
|
let mask = unsafe { _mm_cmplt_epi8(ascend, _mm_set1_epi8(bytes_needed as i8)) };
|
||||||
|
|
||||||
|
// Shift it down 1 byte so the last MSB is the only one set, and make sure only
|
||||||
|
// the MSB is set
|
||||||
|
let shift = unsafe { _mm_bsrli_si128(mask, 1) };
|
||||||
|
let msbmask = unsafe { _mm_and_si128(shift, _mm_set1_epi8(128u8 as i8)) };
|
||||||
|
|
||||||
|
// Merge the MSB bits into the vector
|
||||||
|
let merged = unsafe { _mm_or_si128(stage1, msbmask) };
|
||||||
|
let bytes = unsafe { std::mem::transmute::<__m128i, [u8; 16]>(merged) };
|
||||||
|
|
||||||
|
w.write_all(unsafe { bytes.get_unchecked(..bytes_needed as usize) })?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(any(
|
||||||
|
not(any(target_arch = "x86", target_arch = "x86_64")),
|
||||||
|
target_os = "macos"
|
||||||
|
))]
|
||||||
|
fn encode(&self, mut w: impl Write) -> Result<()> {
|
||||||
|
use byteorder::WriteBytesExt;
|
||||||
|
|
||||||
let mut val = self.0 as u64;
|
let mut val = self.0 as u64;
|
||||||
loop {
|
loop {
|
||||||
if val & 0b1111111111111111111111111111111111111111111111111111111110000000 == 0 {
|
if val & 0b1111111111111111111111111111111111111111111111111111111110000000 == 0 {
|
||||||
|
|
Loading…
Add table
Reference in a new issue