//! Utilities for buffering audio, likely used as part of a short-term Fourier transform. use crate::buffer::Buffer; /// Process the input buffer in equal sized blocks, running a callback on each block to transform /// the block and then writing back the results from the previous block to the buffer. This /// introduces latency equal to the size of the block. /// /// Additional inputs can be processed by setting the `NUM_SIDECHAIN_INPUTS` constant. These buffers /// will not be written to, so they are purely used for analysis. These sidechain inputs will have /// the same number of channels as the main input. /// /// TODO: Better name? /// TODO: We may need something like this purely for analysis, e.g. for showing spectrums in a GUI. /// Figure out the cleanest way to adapt this for the non-processing use case. pub struct StftHelper { // These ring buffers store the input samples and the already processed output produced by // adding overlapping windows. Whenever we reach a new overlapping window, we'll write the // already calculated outputs to the main buffer passed to the process function and then process // a new block. main_input_ring_buffers: Vec>, main_output_ring_buffers: Vec>, sidechain_ring_buffers: [Vec>; NUM_SIDECHAIN_INPUTS], /// Results from the ring buffers are copied to this scratch buffer before being passed to the /// plugin. Needed to handle overlap. scratch_buffer: Vec, /// The current position in our ring buffers. Whenever this wraps around to 0, we'll process /// a block. current_pos: usize, } impl StftHelper { /// Initialize the [`StftHelper`] for [`Buffer`]s with the specified number of channels and the /// given maximum block size. Call [`set_block_size()`][`Self::set_block_size()`] afterwards if /// you do not need the full capacity upfront. /// /// # Panics /// /// Panics if `num_channels == 0 || max_block_size == 0`. pub fn new(num_channels: usize, max_block_size: usize) -> Self { assert_ne!(num_channels, 0); assert_ne!(max_block_size, 0); Self { main_input_ring_buffers: vec![vec![0.0; max_block_size]; num_channels], main_output_ring_buffers: vec![vec![0.0; max_block_size]; num_channels], // Kinda hacky way to initialize an array of non-copy types sidechain_ring_buffers: [(); NUM_SIDECHAIN_INPUTS] .map(|_| vec![vec![0.0; max_block_size]; num_channels]), scratch_buffer: vec![0.0; max_block_size], current_pos: 0, } } /// Change the current block size. This will clear the buffers, causing the next block to output /// silence. /// /// # Panics /// /// WIll panic if `block_size > max_block_size`. pub fn set_block_size(&mut self, block_size: usize) { assert!(block_size <= self.main_input_ring_buffers[0].capacity()); for main_ring_buffer in &mut self.main_input_ring_buffers { main_ring_buffer.resize(block_size, 0.0); main_ring_buffer.fill(0.0); } for main_ring_buffer in &mut self.main_output_ring_buffers { main_ring_buffer.resize(block_size, 0.0); main_ring_buffer.fill(0.0); } self.scratch_buffer.resize(block_size, 0.0); self.scratch_buffer.fill(0.0); for sidechain_ring_buffers in &mut self.sidechain_ring_buffers { for sidechain_ring_buffer in sidechain_ring_buffers { sidechain_ring_buffer.resize(block_size, 0.0); sidechain_ring_buffer.fill(0.0); } } self.current_pos = 0; } /// The amount of latency introduced when processing audio throug hthis [`StftHelper`]. pub fn latency_samples(&self) -> u32 { self.main_input_ring_buffers[0].len() as u32 } /// Process the audio in `main_buffer` and in any sidechain buffers in small overlapping blocks /// with a window function applied, adding up the results for the main buffer so they can be /// written back to the host. The window overlap amount is compensated automatically when adding /// up these samples. Whenever a new block is available, `process_cb()` gets called with a new /// audio block of the specified size with the windowing function already applied. The summed /// reults will then be written back to `main_buffer` exactly one block later, which means that /// this function will introduce one block of latency. This can be compensated by calling /// [`ProcessContext::set_latency()`][`crate::prelude::ProcessContext::set_latency()`] in your /// plugin's initialization function. /// /// For efficiency's sake this function will reuse the same vector for all calls to /// `process_cb`. This means you can only access a single channel's worth of windowed data at a /// time. The arguments to that function are `process_cb(channel_idx, sidechain_buffer_idx, /// data)`, where `sidechain_buffer_idx` will be `None` for the main buffer. If there are any /// sidechain buffers, then they will be processed before the main buffer. /// /// # Panics /// /// Panics if `main_buffer` or the buffers in `sidechain_buffers` do not have the same number of /// channels as this [`StftHelper`], if the sidechain buffers do not contain the same number of /// samples as the main buffer, or if the window function does not match the block size. /// /// TODO: Maybe introduce a trait here so this can be used with things that aren't whole buffers /// TODO: And also introduce that aforementioned read-only process function (`analyze()?`) pub fn process_overlap_add( &mut self, main_buffer: &mut Buffer, sidechain_buffers: [&Buffer; NUM_SIDECHAIN_INPUTS], window_function: &[f32], overlap_times: usize, overlap_gain_compensation: f32, mut process_cb: F, ) where F: FnMut(usize, Option, &mut [f32]), { assert_eq!(main_buffer.channels(), self.main_input_ring_buffers.len()); assert_eq!(window_function.len(), self.main_input_ring_buffers[0].len()); assert!(overlap_times > 0); // We'll copy samples from `*_buffer` into `*_ring_buffers` while simultaneously copying // already processed samples from `main_ring_buffers` in into `main_buffer` let main_buffer_len = main_buffer.len(); let num_channels = main_buffer.channels(); let block_size = self.main_input_ring_buffers[0].len(); let window_interval = (block_size / overlap_times) as i32; let mut already_processed_samples = 0; while already_processed_samples < main_buffer_len { let remaining_samples = main_buffer_len - already_processed_samples; let samples_until_next_window = ((window_interval - self.current_pos as i32 - 1) .rem_euclid(window_interval) + 1) as usize; let samples_to_process = samples_until_next_window.min(remaining_samples); // Copy the input from `main_buffer` to the ring buffer while copying last block's // result from the buffer to `main_buffer` // TODO: This might be able to be sped up a bit with SIMD { // For the main buffer let main_buffer = main_buffer.as_slice(); for sample_offset in 0..samples_to_process { for channel_idx in 0..num_channels { let sample = unsafe { main_buffer .get_unchecked_mut(channel_idx) .get_unchecked_mut(already_processed_samples + sample_offset) }; let input_ring_buffer_sample = unsafe { self.main_input_ring_buffers .get_unchecked_mut(channel_idx) .get_unchecked_mut(self.current_pos + sample_offset) }; let output_ring_buffer_sample = unsafe { self.main_output_ring_buffers .get_unchecked_mut(channel_idx) .get_unchecked_mut(self.current_pos + sample_offset) }; *input_ring_buffer_sample = *sample; *sample = *output_ring_buffer_sample; // Very important, or else we'll overlap-add ourselves into a feedback hell *output_ring_buffer_sample = 0.0; } } // And for the sidechain buffers we only need to copy the inputs for (sidechain_buffer, sidechain_ring_buffers) in sidechain_buffers .iter() .zip(self.sidechain_ring_buffers.iter_mut()) { let sidechain_buffer = sidechain_buffer.as_slice_immutable(); for sample_offset in 0..samples_to_process { for channel_idx in 0..num_channels { let sample = unsafe { sidechain_buffer .get_unchecked(channel_idx) .get_unchecked(already_processed_samples + sample_offset) }; let ring_buffer_sample = unsafe { sidechain_ring_buffers .get_unchecked_mut(channel_idx) .get_unchecked_mut(self.current_pos + sample_offset) }; *ring_buffer_sample = *sample; } } } } // At this point we either have `already_processed_samples == main_buffer_len`, or // `self.current_pos % window_interval == 0`. If it's the latter, then we can process a // new block. if samples_to_process == samples_until_next_window { // Because we're processing in smaller windows, the input ring buffers sadly does // not always contain the full contiguous range we're interested in because they map // wrap around. Because premade FFT algorithms typically can't handle this, we'll // start with copying // TODO: Sdiechain for (channel_idx, (input_ring_buffer, output_ring_buffer)) in self .main_input_ring_buffers .iter() .zip(self.main_output_ring_buffers.iter_mut()) .enumerate() { copy_ring_to_scratch_buffer( &mut self.scratch_buffer, self.current_pos, input_ring_buffer, ); multiply_scratch_buffer(&mut self.scratch_buffer, window_function); process_cb(channel_idx, None, &mut self.scratch_buffer); // The actual overlap-add part of the equation add_scratch_to_ring_buffer( &self.scratch_buffer, self.current_pos, output_ring_buffer, overlap_gain_compensation, ); } } // Do this after handling the block or else we'll copy the wrong samples. already_processed_samples += samples_to_process; self.current_pos = (self.current_pos + samples_to_process) % block_size; } } } /// Copy data from the the specified ring buffer (borrowed from `self`) to the scratch buffers at /// the current position. This is a free function because you cannot pass an immutable reference to /// a field from `&self` to a `&mut self` method. #[inline] fn copy_ring_to_scratch_buffer( scratch_buffer: &mut [f32], current_pos: usize, ring_buffer: &[f32], ) { let block_size = scratch_buffer.len(); let num_copy_before_wrap = block_size - current_pos; scratch_buffer[0..num_copy_before_wrap].copy_from_slice(&ring_buffer[current_pos..block_size]); scratch_buffer[num_copy_before_wrap..block_size].copy_from_slice(&ring_buffer[0..current_pos]); } /// Multiply the scratch buffer by some window function. Also free function because you can't do /// split borrows with methods. #[inline] fn multiply_scratch_buffer(scratch_buffer: &mut [f32], window_function: &[f32]) { for (sample, window_sample) in scratch_buffer.iter_mut().zip(window_function) { *sample *= window_sample; } } /// Add data from the scratch buffer to the specified ring buffer. When writing samples from this /// ring buffer back to the host's outputs they must be cleared to prevent infinite feedback. #[inline] fn add_scratch_to_ring_buffer( scratch_buffer: &[f32], current_pos: usize, ring_buffer: &mut [f32], gain_compensation: f32, ) { // TODO: This could also use some SIMD let block_size = scratch_buffer.len(); let num_copy_before_wrap = block_size - current_pos; for (scratch_sample, ring_sample) in scratch_buffer[0..num_copy_before_wrap] .iter() .zip(&mut ring_buffer[current_pos..block_size]) { // TODO: Moving this gain compensation to the window is more efficient, but that makes the // interface less nice to work with *ring_sample += *scratch_sample * gain_compensation; } for (scratch_sample, ring_sample) in scratch_buffer[num_copy_before_wrap..block_size] .iter() .zip(&mut ring_buffer[0..current_pos]) { *ring_sample += *scratch_sample * gain_compensation; } }