1
0
Fork 0
nih-plug/src/util/stft.rs

293 lines
14 KiB
Rust
Raw Normal View History

//! Utilities for buffering audio, likely used as part of a short-term Fourier transform.
use crate::buffer::Buffer;
/// Process the input buffer in equal sized blocks, running a callback on each block to transform
/// the block and then writing back the results from the previous block to the buffer. This
/// introduces latency equal to the size of the block.
///
/// Additional inputs can be processed by setting the `NUM_SIDECHAIN_INPUTS` constant. These buffers
/// will not be written to, so they are purely used for analysis. These sidechain inputs will have
/// the same number of channels as the main input.
///
/// TODO: Better name?
/// TODO: We may need something like this purely for analysis, e.g. for showing spectrums in a GUI.
/// Figure out the cleanest way to adapt this for the non-processing use case.
pub struct StftHelper<const NUM_SIDECHAIN_INPUTS: usize = 0> {
// These ring buffers store the input samples and the already processed output produced by
// adding overlapping windows. Whenever we reach a new overlapping window, we'll write the
// already calculated outputs to the main buffer passed to the process function and then process
// a new block.
main_input_ring_buffers: Vec<Vec<f32>>,
main_output_ring_buffers: Vec<Vec<f32>>,
sidechain_ring_buffers: [Vec<Vec<f32>>; NUM_SIDECHAIN_INPUTS],
/// Results from the ring buffers are copied to this scratch buffer before being passed to the
/// plugin. Needed to handle overlap.
scratch_buffer: Vec<f32>,
/// The current position in our ring buffers. Whenever this wraps around to 0, we'll process
/// a block.
current_pos: usize,
}
impl<const NUM_SIDECHAIN_INPUTS: usize> StftHelper<NUM_SIDECHAIN_INPUTS> {
/// Initialize the [`StftHelper`] for [`Buffer`]s with the specified number of channels and the
/// given maximum block size. Call [`set_block_size()`][`Self::set_block_size()`] afterwards if
/// you do not need the full capacity upfront.
///
/// # Panics
///
/// Panics if `num_channels == 0 || max_block_size == 0`.
pub fn new(num_channels: usize, max_block_size: usize) -> Self {
assert_ne!(num_channels, 0);
assert_ne!(max_block_size, 0);
Self {
main_input_ring_buffers: vec![vec![0.0; max_block_size]; num_channels],
main_output_ring_buffers: vec![vec![0.0; max_block_size]; num_channels],
// Kinda hacky way to initialize an array of non-copy types
sidechain_ring_buffers: [(); NUM_SIDECHAIN_INPUTS]
.map(|_| vec![vec![0.0; max_block_size]; num_channels]),
scratch_buffer: vec![0.0; max_block_size],
current_pos: 0,
}
}
/// Change the current block size. This will clear the buffers, causing the next block to output
/// silence.
///
/// # Panics
///
/// WIll panic if `block_size > max_block_size`.
pub fn set_block_size(&mut self, block_size: usize) {
assert!(block_size <= self.main_input_ring_buffers[0].capacity());
for main_ring_buffer in &mut self.main_input_ring_buffers {
main_ring_buffer.resize(block_size, 0.0);
main_ring_buffer.fill(0.0);
}
for main_ring_buffer in &mut self.main_output_ring_buffers {
main_ring_buffer.resize(block_size, 0.0);
main_ring_buffer.fill(0.0);
}
self.scratch_buffer.resize(block_size, 0.0);
self.scratch_buffer.fill(0.0);
for sidechain_ring_buffers in &mut self.sidechain_ring_buffers {
for sidechain_ring_buffer in sidechain_ring_buffers {
sidechain_ring_buffer.resize(block_size, 0.0);
sidechain_ring_buffer.fill(0.0);
}
}
self.current_pos = 0;
}
/// The amount of latency introduced when processing audio throug hthis [`StftHelper`].
pub fn latency_samples(&self) -> u32 {
self.main_input_ring_buffers[0].len() as u32
}
/// Process the audio in `main_buffer` and in any sidechain buffers in small overlapping blocks
/// with a window function applied, adding up the results for the main buffer so they can be
/// written back to the host. The window overlap amount is compensated automatically when adding
/// up these samples. Whenever a new block is available, `process_cb()` gets called with a new
/// audio block of the specified size with the windowing function already applied. The summed
/// reults will then be written back to `main_buffer` exactly one block later, which means that
/// this function will introduce one block of latency. This can be compensated by calling
/// [`ProcessContext::set_latency()`][`crate::prelude::ProcessContext::set_latency()`] in your
/// plugin's initialization function.
///
/// For efficiency's sake this function will reuse the same vector for all calls to
/// `process_cb`. This means you can only access a single channel's worth of windowed data at a
/// time. The arguments to that function are `process_cb(channel_idx, sidechain_buffer_idx,
/// data)`, where `sidechain_buffer_idx` will be `None` for the main buffer. If there are any
/// sidechain buffers, then they will be processed before the main buffer.
///
/// # Panics
///
/// Panics if `main_buffer` or the buffers in `sidechain_buffers` do not have the same number of
/// channels as this [`StftHelper`], if the sidechain buffers do not contain the same number of
/// samples as the main buffer, or if the window function does not match the block size.
2022-03-06 12:15:14 +11:00
///
/// TODO: Maybe introduce a trait here so this can be used with things that aren't whole buffers
/// TODO: And also introduce that aforementioned read-only process function (`analyze()?`)
pub fn process_overlap_add<F>(
&mut self,
main_buffer: &mut Buffer,
sidechain_buffers: [&Buffer; NUM_SIDECHAIN_INPUTS],
window_function: &[f32],
overlap_times: usize,
overlap_gain_compensation: f32,
mut process_cb: F,
) where
F: FnMut(usize, Option<usize>, &mut [f32]),
{
assert_eq!(main_buffer.channels(), self.main_input_ring_buffers.len());
assert_eq!(window_function.len(), self.main_input_ring_buffers[0].len());
assert!(overlap_times > 0);
// We'll copy samples from `*_buffer` into `*_ring_buffers` while simultaneously copying
// already processed samples from `main_ring_buffers` in into `main_buffer`
let main_buffer_len = main_buffer.len();
let num_channels = main_buffer.channels();
let block_size = self.main_input_ring_buffers[0].len();
let window_interval = (block_size / overlap_times) as i32;
let mut already_processed_samples = 0;
while already_processed_samples < main_buffer_len {
let remaining_samples = main_buffer_len - already_processed_samples;
let samples_until_next_window = ((window_interval - self.current_pos as i32 - 1)
.rem_euclid(window_interval)
+ 1) as usize;
let samples_to_process = samples_until_next_window.min(remaining_samples);
// Copy the input from `main_buffer` to the ring buffer while copying last block's
// result from the buffer to `main_buffer`
// TODO: This might be able to be sped up a bit with SIMD
{
// For the main buffer
let main_buffer = main_buffer.as_slice();
for sample_offset in 0..samples_to_process {
for channel_idx in 0..num_channels {
let sample = unsafe {
main_buffer
.get_unchecked_mut(channel_idx)
.get_unchecked_mut(already_processed_samples + sample_offset)
};
let input_ring_buffer_sample = unsafe {
self.main_input_ring_buffers
.get_unchecked_mut(channel_idx)
.get_unchecked_mut(self.current_pos + sample_offset)
};
let output_ring_buffer_sample = unsafe {
self.main_output_ring_buffers
.get_unchecked_mut(channel_idx)
.get_unchecked_mut(self.current_pos + sample_offset)
};
*input_ring_buffer_sample = *sample;
*sample = *output_ring_buffer_sample;
// Very important, or else we'll overlap-add ourselves into a feedback hell
*output_ring_buffer_sample = 0.0;
}
}
// And for the sidechain buffers we only need to copy the inputs
for (sidechain_buffer, sidechain_ring_buffers) in sidechain_buffers
.iter()
.zip(self.sidechain_ring_buffers.iter_mut())
{
let sidechain_buffer = sidechain_buffer.as_slice_immutable();
for sample_offset in 0..samples_to_process {
for channel_idx in 0..num_channels {
let sample = unsafe {
sidechain_buffer
.get_unchecked(channel_idx)
.get_unchecked(already_processed_samples + sample_offset)
};
let ring_buffer_sample = unsafe {
sidechain_ring_buffers
.get_unchecked_mut(channel_idx)
.get_unchecked_mut(self.current_pos + sample_offset)
};
*ring_buffer_sample = *sample;
}
}
}
}
// At this point we either have `already_processed_samples == main_buffer_len`, or
// `self.current_pos % window_interval == 0`. If it's the latter, then we can process a
// new block.
if samples_to_process == samples_until_next_window {
// Because we're processing in smaller windows, the input ring buffers sadly does
// not always contain the full contiguous range we're interested in because they map
// wrap around. Because premade FFT algorithms typically can't handle this, we'll
// start with copying
// TODO: Sdiechain
for (channel_idx, (input_ring_buffer, output_ring_buffer)) in self
.main_input_ring_buffers
.iter()
.zip(self.main_output_ring_buffers.iter_mut())
.enumerate()
{
copy_ring_to_scratch_buffer(
&mut self.scratch_buffer,
self.current_pos,
input_ring_buffer,
);
multiply_scratch_buffer(&mut self.scratch_buffer, window_function);
process_cb(channel_idx, None, &mut self.scratch_buffer);
// The actual overlap-add part of the equation
add_scratch_to_ring_buffer(
&self.scratch_buffer,
self.current_pos,
output_ring_buffer,
overlap_gain_compensation,
);
}
}
// Do this after handling the block or else we'll copy the wrong samples.
already_processed_samples += samples_to_process;
self.current_pos = (self.current_pos + samples_to_process) % block_size;
}
}
}
/// Copy data from the the specified ring buffer (borrowed from `self`) to the scratch buffers at
/// the current position. This is a free function because you cannot pass an immutable reference to
/// a field from `&self` to a `&mut self` method.
#[inline]
fn copy_ring_to_scratch_buffer(
scratch_buffer: &mut [f32],
current_pos: usize,
ring_buffer: &[f32],
) {
let block_size = scratch_buffer.len();
let num_copy_before_wrap = block_size - current_pos;
scratch_buffer[0..num_copy_before_wrap].copy_from_slice(&ring_buffer[current_pos..block_size]);
scratch_buffer[num_copy_before_wrap..block_size].copy_from_slice(&ring_buffer[0..current_pos]);
}
/// Multiply the scratch buffer by some window function. Also free function because you can't do
/// split borrows with methods.
#[inline]
fn multiply_scratch_buffer(scratch_buffer: &mut [f32], window_function: &[f32]) {
for (sample, window_sample) in scratch_buffer.iter_mut().zip(window_function) {
*sample *= window_sample;
}
}
/// Add data from the scratch buffer to the specified ring buffer. When writing samples from this
/// ring buffer back to the host's outputs they must be cleared to prevent infinite feedback.
#[inline]
fn add_scratch_to_ring_buffer(
scratch_buffer: &[f32],
current_pos: usize,
ring_buffer: &mut [f32],
gain_compensation: f32,
) {
// TODO: This could also use some SIMD
let block_size = scratch_buffer.len();
let num_copy_before_wrap = block_size - current_pos;
for (scratch_sample, ring_sample) in scratch_buffer[0..num_copy_before_wrap]
.iter()
.zip(&mut ring_buffer[current_pos..block_size])
{
// TODO: Moving this gain compensation to the window is more efficient, but that makes the
// interface less nice to work with
*ring_sample += *scratch_sample * gain_compensation;
}
for (scratch_sample, ring_sample) in scratch_buffer[num_copy_before_wrap..block_size]
.iter()
.zip(&mut ring_buffer[0..current_pos])
{
*ring_sample += *scratch_sample * gain_compensation;
}
}