Rework FIR crossover to use FFT convolution
This commit is contained in:
parent
2c48ceb392
commit
5a51dce00d
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -711,6 +711,7 @@ name = "crossover"
|
|||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"nih_plug",
|
||||
"realfft",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
|
|
@ -16,3 +16,4 @@ simd = ["nih_plug/simd"]
|
|||
|
||||
[dependencies]
|
||||
nih_plug = { path = "../../", features = ["assert_process_allocs"] }
|
||||
realfft = "3.0.0"
|
||||
|
|
|
@ -14,36 +14,60 @@
|
|||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
use nih_plug::buffer::ChannelSamples;
|
||||
use nih_plug::debug::*;
|
||||
use realfft::num_complex::Complex32;
|
||||
use realfft::{ComplexToReal, RealFftPlanner, RealToComplex};
|
||||
use std::f32;
|
||||
use std::simd::f32x2;
|
||||
use std::sync::Arc;
|
||||
|
||||
use self::filter::{FirCoefficients, FirFilter};
|
||||
use self::filter::{FftFirFilter, FirCoefficients, FFT_INPUT_SIZE, FFT_SIZE};
|
||||
use crate::crossover::fir::filter::FILTER_SIZE;
|
||||
use crate::crossover::iir::biquad::{BiquadCoefficients, NEUTRAL_Q};
|
||||
use crate::NUM_BANDS;
|
||||
use crate::{NUM_BANDS, NUM_CHANNELS};
|
||||
|
||||
pub mod filter;
|
||||
|
||||
// TODO: Move this to FFT convolution so we can increase the filter size and improve low latency performance
|
||||
|
||||
/// The size of the FIR filter window, or the number of taps. The low frequency performance is
|
||||
/// greatly limited by this.
|
||||
const FILTER_SIZE: usize = 121;
|
||||
/// The size of the FIR filter's ring buffer. This is `FILTER_SIZE` rounded up to the next power of
|
||||
/// two.
|
||||
const RING_BUFFER_SIZE: usize = FILTER_SIZE.next_power_of_two();
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FirCrossover {
|
||||
/// The kind of crossover to use. `.update_filters()` must be called after changing this.
|
||||
mode: FirCrossoverType,
|
||||
|
||||
/// Filters for each of the bands. Depending on the number of bands argument passed to
|
||||
/// `.process()` two to five of these may be used. The first one always contains a low-pass
|
||||
/// `.process()`, two to five of these may be used. The first one always contains a low-pass
|
||||
/// filter, the last one always contains a high-pass filter, while the other bands will contain
|
||||
/// band-pass filters.
|
||||
band_filters: [FirFilter; NUM_BANDS],
|
||||
///
|
||||
/// These filters will be fed the FFT from the main input to produce output samples for the enxt
|
||||
/// period. Everything could be a bit nicer to read if the filter did the entire STFT process,
|
||||
/// but that would mean duplicating the input ring buffer and forward DFT up to five times.
|
||||
band_filters: [FftFirFilter; NUM_BANDS],
|
||||
|
||||
/// A ring buffer that is used to store inputs for the next FFT. Until it is time to take the
|
||||
/// next FFT, samples are copied from the inputs to this buffer, while simultaneously copying
|
||||
/// the already processed output samples from the output buffers to the output. Once
|
||||
/// `io_buffer_next_indices` wrap back around to 0, the next buffer should be produced.
|
||||
input_buffers: [[f32; FFT_INPUT_SIZE]; NUM_CHANNELS as usize],
|
||||
/// A ring that contains the next period's outputs for each of the five bands. This is written
|
||||
/// to and read from in lockstep with `input_buffers`.
|
||||
band_output_buffers: [[[f32; FFT_INPUT_SIZE]; NUM_CHANNELS as usize]; NUM_BANDS],
|
||||
/// The index in the inner `io_buffer` the next sample should be read from. After a sample is
|
||||
/// written to the band's output then this is incremented by one. Once
|
||||
/// `self.io_buffer_next_indices[channel_idx] == self.io_buffer.len()` then the next block
|
||||
/// should be processed.
|
||||
///
|
||||
/// This is stored as an array since each channel is processed individually. While this should
|
||||
/// of course stay in sync, this makes it much simpler to process both channels in sequence.
|
||||
io_buffers_next_indices: [usize; NUM_CHANNELS as usize],
|
||||
|
||||
/// The algorithm for the FFT operation.
|
||||
r2c_plan: Arc<dyn RealToComplex<f32>>,
|
||||
/// The algorithm for the IFFT operation.
|
||||
c2r_plan: Arc<dyn ComplexToReal<f32>>,
|
||||
|
||||
/// A real buffer that may be written to in place during the FFT and IFFT operations.
|
||||
real_scratch_buffer: [f32; FFT_SIZE],
|
||||
/// A complex buffer corresponding to `real_scratch_buffer` that may be written to in place
|
||||
/// during the FFT and IFFT operations.
|
||||
complex_scratch_buffer: [Complex32; FFT_SIZE / 2 + 1],
|
||||
}
|
||||
|
||||
/// The type of FIR crossover to use.
|
||||
|
@ -63,9 +87,19 @@ impl FirCrossover {
|
|||
/// Make sure to add the latency reported by [`latency()`][Self::latency()] to the plugin's
|
||||
/// reported latency.
|
||||
pub fn new(mode: FirCrossoverType) -> Self {
|
||||
let mut fft_planner = RealFftPlanner::new();
|
||||
|
||||
Self {
|
||||
mode,
|
||||
band_filters: Default::default(),
|
||||
|
||||
input_buffers: [[0.0; FFT_INPUT_SIZE]; NUM_CHANNELS as usize],
|
||||
band_output_buffers: [[[0.0; FFT_INPUT_SIZE]; NUM_CHANNELS as usize]; NUM_BANDS],
|
||||
io_buffers_next_indices: [0; NUM_CHANNELS as usize],
|
||||
r2c_plan: fft_planner.plan_fft_forward(FFT_SIZE),
|
||||
c2r_plan: fft_planner.plan_fft_inverse(FFT_SIZE),
|
||||
real_scratch_buffer: [0.0; FFT_SIZE],
|
||||
complex_scratch_buffer: [Complex32::default(); FFT_SIZE / 2 + 1],
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -74,43 +108,96 @@ impl FirCrossover {
|
|||
// Actually, that's a lie, since we currently only do linear-phase filters with a constant
|
||||
// size
|
||||
match self.mode {
|
||||
FirCrossoverType::LinkwitzRiley24LinearPhase => (FILTER_SIZE / 2) as u32,
|
||||
FirCrossoverType::LinkwitzRiley24LinearPhase => FFT_INPUT_SIZE as u32,
|
||||
}
|
||||
}
|
||||
|
||||
/// Split the signal into bands using the crossovers previously configured through `.update()`.
|
||||
/// The split bands will be written to `band_outputs`. `main_io` is not written to, and should
|
||||
/// be cleared separately.
|
||||
/// The split bands will be written to `band_outputs`. The main output should be cleared
|
||||
/// separately. For efficiency's sake this processes an entire channel at once to minimize the
|
||||
/// number of FFT operations needed. Since this process delays the signal by `FFT_INPUT_SIZE`
|
||||
/// samples, the latency should be reported to the host.
|
||||
pub fn process(
|
||||
&mut self,
|
||||
num_bands: usize,
|
||||
main_io: &ChannelSamples,
|
||||
band_outputs: [ChannelSamples; NUM_BANDS],
|
||||
main_input: &[f32],
|
||||
mut band_outputs: [&mut &mut [f32]; NUM_BANDS],
|
||||
channel_idx: usize,
|
||||
) {
|
||||
nih_debug_assert!(num_bands >= 2);
|
||||
nih_debug_assert!(num_bands <= NUM_BANDS);
|
||||
// Required for the SIMD, so we'll just do a hard assert or the unchecked conversions will
|
||||
// be unsound
|
||||
assert!(main_io.len() == 2);
|
||||
nih_debug_assert!(main_input.len() == band_outputs[0].len());
|
||||
nih_debug_assert!(channel_idx < NUM_CHANNELS as usize);
|
||||
|
||||
let samples: f32x2 = unsafe { main_io.to_simd_unchecked() };
|
||||
match self.mode {
|
||||
FirCrossoverType::LinkwitzRiley24LinearPhase => {
|
||||
// TODO: Everything is structured to be fast to compute for the IIR filters. Instead
|
||||
// of doing two channels at the same time, it would probably be faster to use
|
||||
// SIMD for the actual convolution so we can do 4 or 8 multiply-adds at the
|
||||
// same time. Or perhaps a better way to spend the time, use FFT convolution
|
||||
// for this.
|
||||
for (filter, mut output) in self
|
||||
.band_filters
|
||||
// We'll copy already processed output to `band_outputs` while storing input for the next
|
||||
// FFT operation. This is a modified version of what's going on in `StftHelper`.
|
||||
let mut current_sample_idx = 0;
|
||||
while current_sample_idx < main_input.len() {
|
||||
{
|
||||
// When `self.io_buffers_next_indices == FFT_SIZE`, the next block should be processed
|
||||
let io_buffers_next_indices = self.io_buffers_next_indices[channel_idx];
|
||||
let process_num_samples = (FFT_INPUT_SIZE - io_buffers_next_indices)
|
||||
.min(main_input.len() - current_sample_idx);
|
||||
|
||||
// Since we can't do this in-place (without unnecessarily duplicating a ton of data),
|
||||
// copying data from and to the ring buffers can be done with simple memcpys
|
||||
self.input_buffers[channel_idx]
|
||||
[io_buffers_next_indices..io_buffers_next_indices + process_num_samples]
|
||||
.copy_from_slice(
|
||||
&main_input[current_sample_idx..current_sample_idx + process_num_samples],
|
||||
);
|
||||
for (band_output, band_output_buffers) in band_outputs
|
||||
.iter_mut()
|
||||
.zip(band_outputs)
|
||||
.zip(self.band_output_buffers.iter())
|
||||
.take(num_bands)
|
||||
{
|
||||
let filtered_samples = filter.process(samples);
|
||||
|
||||
unsafe { output.from_simd_unchecked(filtered_samples) };
|
||||
band_output[current_sample_idx..current_sample_idx + process_num_samples]
|
||||
.copy_from_slice(
|
||||
&band_output_buffers[channel_idx][io_buffers_next_indices
|
||||
..io_buffers_next_indices + process_num_samples],
|
||||
);
|
||||
}
|
||||
|
||||
// This is tracked per-channel because both channels are processed individually
|
||||
self.io_buffers_next_indices[channel_idx] += process_num_samples;
|
||||
current_sample_idx += process_num_samples;
|
||||
}
|
||||
|
||||
// At this point we either reached the end of the buffer (`current_sample_idx ==
|
||||
// main_input.len()`), or we filled up the `io_buffer` and we can process the next block
|
||||
if self.io_buffers_next_indices[channel_idx] == FFT_INPUT_SIZE {
|
||||
// Zero pad the input for the FFT
|
||||
self.real_scratch_buffer[..FFT_INPUT_SIZE]
|
||||
.copy_from_slice(&self.input_buffers[channel_idx]);
|
||||
self.real_scratch_buffer[FFT_INPUT_SIZE..].fill(0.0);
|
||||
|
||||
self.r2c_plan
|
||||
.process_with_scratch(
|
||||
&mut self.real_scratch_buffer,
|
||||
&mut self.complex_scratch_buffer,
|
||||
&mut [],
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// The input can then be used to produce each band's output. Since realfft expects
|
||||
// to be able to modify the input, we need to make a copy of this first:
|
||||
let input_fft = self.complex_scratch_buffer;
|
||||
|
||||
for (band_output_buffers, band_filter) in self
|
||||
.band_output_buffers
|
||||
.iter_mut()
|
||||
.zip(self.band_filters.iter_mut())
|
||||
.take(num_bands)
|
||||
{
|
||||
band_filter.process(
|
||||
&input_fft,
|
||||
&mut band_output_buffers[channel_idx],
|
||||
channel_idx,
|
||||
&*self.c2r_plan,
|
||||
&mut self.real_scratch_buffer,
|
||||
&mut self.complex_scratch_buffer,
|
||||
)
|
||||
}
|
||||
|
||||
self.io_buffers_next_indices[channel_idx] = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -150,11 +237,16 @@ impl FirCrossover {
|
|||
FirCoefficients::design_fourth_order_linear_phase_low_pass_from_biquad(
|
||||
iir_coefs,
|
||||
);
|
||||
self.band_filters[0].coefficients = lp_fir_coefs;
|
||||
self.band_filters[0].recompute_coefficients(
|
||||
lp_fir_coefs.clone(),
|
||||
&*self.r2c_plan,
|
||||
&mut self.real_scratch_buffer,
|
||||
&mut self.complex_scratch_buffer,
|
||||
);
|
||||
|
||||
// For the band-pass filters and the final high-pass filter, we need to keep track
|
||||
// of the accumulated impulse response
|
||||
let mut accumulated_ir = self.band_filters[0].coefficients.clone();
|
||||
let mut accumulated_ir = lp_fir_coefs;
|
||||
for (split_frequency, band_filter) in frequencies
|
||||
.iter()
|
||||
.zip(self.band_filters.iter_mut())
|
||||
|
@ -191,7 +283,12 @@ impl FirCrossover {
|
|||
*accumulated_coef += *bp_coef;
|
||||
}
|
||||
|
||||
band_filter.coefficients = fir_bp_coefs;
|
||||
band_filter.recompute_coefficients(
|
||||
fir_bp_coefs,
|
||||
&*self.r2c_plan,
|
||||
&mut self.real_scratch_buffer,
|
||||
&mut self.complex_scratch_buffer,
|
||||
);
|
||||
}
|
||||
|
||||
// And finally we can do a spectral inversion of the accumulated IR to the the last
|
||||
|
@ -202,7 +299,12 @@ impl FirCrossover {
|
|||
}
|
||||
fir_hp_coefs.0[FILTER_SIZE / 2] += 1.0;
|
||||
|
||||
self.band_filters[num_bands - 1].coefficients = fir_hp_coefs;
|
||||
self.band_filters[num_bands - 1].recompute_coefficients(
|
||||
fir_hp_coefs,
|
||||
&*self.r2c_plan,
|
||||
&mut self.real_scratch_buffer,
|
||||
&mut self.complex_scratch_buffer,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -212,5 +314,16 @@ impl FirCrossover {
|
|||
for filter in &mut self.band_filters {
|
||||
filter.reset();
|
||||
}
|
||||
|
||||
// The inputs don't need to be reset as they'll be overwritten immediately
|
||||
for band_buffers in &mut self.band_output_buffers {
|
||||
for buffer in band_buffers {
|
||||
buffer.fill(0.0);
|
||||
}
|
||||
}
|
||||
|
||||
// This being 0 means that the very first period will simply output the silence form above
|
||||
// and gather input for the next FFT
|
||||
self.io_buffers_next_indices.fill(0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,27 +14,50 @@
|
|||
// You should have received a copy of the GNU General Public License
|
||||
// along with this program. If not, see <https://www.gnu.org/licenses/>.
|
||||
|
||||
use realfft::num_complex::Complex32;
|
||||
use realfft::{ComplexToReal, RealToComplex};
|
||||
use std::f32;
|
||||
use std::simd::{f32x2, StdFloat};
|
||||
|
||||
use super::{FILTER_SIZE, RING_BUFFER_SIZE};
|
||||
use crate::crossover::iir::biquad::{Biquad, BiquadCoefficients};
|
||||
use crate::NUM_CHANNELS;
|
||||
|
||||
/// We're doing FFT convolution here since otherwise there's no way to get decent low-frequency
|
||||
/// accuracy while still having acceptable performance. The input going into the STFT will be
|
||||
/// smaller since it will be padding with zeroes to compensate for the otherwise overlapping tail
|
||||
/// caused by the convolution.
|
||||
pub const FFT_SIZE: usize = 4096;
|
||||
/// The input chunk size the FFT convolution is processing. This is also the latency. By having this
|
||||
/// be exactly half of FFT_SIZE, we can make the overlap-add part of the FFT convolution a lot
|
||||
/// simpler for ourselves. (check the `StftHelper` struct in NIH-plug itself for an examples that
|
||||
/// can handle arbitrary padding)
|
||||
pub const FFT_INPUT_SIZE: usize = FFT_SIZE / 2;
|
||||
/// The size of the FIR filter window, or the number of taps. Convoling `FFT_INPUT_SIZE` samples
|
||||
/// with this filter should fit exactly in `FFT_SIZE`, and it should be an odd number.
|
||||
pub const FILTER_SIZE: usize = FFT_SIZE - FFT_INPUT_SIZE + 1;
|
||||
|
||||
/// A single FIR filter that may be configured in any way. In this plugin this will be a
|
||||
/// linear-phase low-pass, band-pass, or high-pass filter.
|
||||
/// linear-phase low-pass, band-pass, or high-pass filter. Implemented using FFT convolution. `git
|
||||
/// blame` this for a version that uses direct convolution.
|
||||
///
|
||||
/// `N_INPUT` is the size of the input that will be processed. The size of the FFT window becomes
|
||||
/// `N_INPUT * 2`. That makes handling the overlap easy, as each IDFT after multiplying the padded
|
||||
/// input and the padded impulse response FFTs will result one `N_INPUT` period of output that can
|
||||
/// be taken as is, followed by one `N_INPUT` period of samples that need to be added to the next
|
||||
/// period's outputs as part of the overlap-add process.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct FirFilter {
|
||||
/// The coefficients for this filter. The filters for both channels should be equivalent, this
|
||||
/// just avoids broadcasts in the filter process.
|
||||
pub coefficients: FirCoefficients<FILTER_SIZE>,
|
||||
pub struct FftFirFilter {
|
||||
/// An `N_INPUT + 1` sized IIR. Padded, ran through the DFT, and then normalized by dividing by
|
||||
/// `FFT_SIZE`.
|
||||
padded_ir_fft: [Complex32; FFT_SIZE / 2 + 1],
|
||||
|
||||
/// A ring buffer storing the last `FILTER_SIZE - 1` samples. The capacity is `FILTER_SIZE`
|
||||
/// rounded up to the next power of two.
|
||||
delay_buffer: [f32x2; RING_BUFFER_SIZE],
|
||||
/// The index in `delay_buffer` to write the next sample to. Wrapping negative indices back to
|
||||
/// the end, the previous sample can be found at `delay_buffer[delay_buffer_next_idx - 1]`, the
|
||||
/// one before that at `delay_buffer[delay_buffer_next_idx - 2]`, and so on.
|
||||
delay_buffer_next_idx: usize,
|
||||
/// The padding from the previous IDFT operation that needs to be added to the next output
|
||||
/// buffer. After the IDFT process there will be an `FFT_SIZE` real scratch buffer containing
|
||||
/// the output. At that point the first `FFT_INPUT_SIZE` samples of those will be copied to
|
||||
/// `output_buffers` in the FIR crossover, `unapplied_padding_buffer` will be added to that
|
||||
/// output buffer, and then finally the last `FFT_INPUT_SIZE` samples of the scratch buffer are
|
||||
/// copied to `unapplied_padding_buffer`. This thus makes sure the tail gets delayed by another
|
||||
/// period so that everything matches up.
|
||||
unapplied_padding_buffers: [[f32; FFT_INPUT_SIZE]; NUM_CHANNELS as usize],
|
||||
}
|
||||
|
||||
/// Coefficients for a (linear-phase) FIR filter. This struct includes ways to design the filter.
|
||||
|
@ -43,12 +66,14 @@ pub struct FirFilter {
|
|||
#[derive(Debug, Clone)]
|
||||
pub struct FirCoefficients<const N: usize>(pub [f32; N]);
|
||||
|
||||
impl Default for FirFilter {
|
||||
impl Default for FftFirFilter {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
coefficients: FirCoefficients::default(),
|
||||
delay_buffer: [f32x2::default(); RING_BUFFER_SIZE],
|
||||
delay_buffer_next_idx: 0,
|
||||
// Would be nicer to initialize this to an impulse response that actually had the
|
||||
// correct position wrt the usual linear-phase latency, but this is fine since it should
|
||||
// never be used anyways
|
||||
padded_ir_fft: [Complex32::new(1.0 / FFT_SIZE as f32, 0.0); FFT_SIZE / 2 + 1],
|
||||
unapplied_padding_buffers: [[0.0; FFT_INPUT_SIZE]; NUM_CHANNELS as usize],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -64,53 +89,87 @@ impl<const N: usize> Default for FirCoefficients<N> {
|
|||
}
|
||||
}
|
||||
|
||||
impl FirFilter {
|
||||
/// Process left and right audio samples through the filter.
|
||||
pub fn process(&mut self, samples: f32x2) -> f32x2 {
|
||||
// TODO: Replace direct convolution with FFT convolution, would make the implementation much
|
||||
// more complex though because of the multi output part
|
||||
let coefficients = &self.coefficients.0;
|
||||
let mut result = f32x2::splat(coefficients[0]) * samples;
|
||||
impl FftFirFilter {
|
||||
/// Filter `FFT_INPUT_SIZE` samples padded to `FFT_SIZE` through this filter, and write the
|
||||
/// outputs to `output_samples` (belonging to channel `channel_idx`), at an `FFT_INPUT_SIZE`
|
||||
/// delay. This is a bit weird and probably difficult to follow because as an optimization the
|
||||
/// DFT is taken only once, and then the IDFT is taken once for every filtered band. This
|
||||
/// function is thus called inside of the overlap-add loop to avoid duplicate work.
|
||||
pub fn process(
|
||||
&mut self,
|
||||
input_fft: &[Complex32; FFT_SIZE / 2 + 1],
|
||||
output_samples: &mut [f32; FFT_INPUT_SIZE],
|
||||
output_channel_idx: usize,
|
||||
c2r_plan: &dyn ComplexToReal<f32>,
|
||||
real_scratch_buffer: &mut [f32; FFT_SIZE],
|
||||
complex_scratch_buffer: &mut [Complex32; FFT_SIZE / 2 + 1],
|
||||
) {
|
||||
// The padded input FFT has already been taken, so we only need to copy it to the scratch
|
||||
// buffer (the input cannot change as the next band might need it as well).
|
||||
complex_scratch_buffer.copy_from_slice(input_fft);
|
||||
|
||||
// Now multiply `self.coefficients[1..]` with the delay buffer starting at
|
||||
// `self.delay_buffer_next_idx - 1`, wrapping around to the end when that is reached
|
||||
// The end index is exclusive, and we already did the multiply+add for the first coefficient.
|
||||
let before_wraparound_start_idx = self
|
||||
.delay_buffer_next_idx
|
||||
.saturating_sub(coefficients.len() - 1);
|
||||
let before_wraparound_end_idx = self.delay_buffer_next_idx;
|
||||
let num_before_wraparound = before_wraparound_end_idx - before_wraparound_start_idx;
|
||||
for (coefficient, delayed_sample) in coefficients[1..1 + num_before_wraparound].iter().zip(
|
||||
self.delay_buffer[before_wraparound_start_idx..before_wraparound_end_idx]
|
||||
.iter()
|
||||
.rev(),
|
||||
) {
|
||||
// `result += coefficient * sample`, but with explicit FMA
|
||||
result = f32x2::splat(*coefficient).mul_add(*delayed_sample, result);
|
||||
// The FFT of the impulse response has already been normalized, so we just need to
|
||||
// multiply the two buffers
|
||||
for (output_bin, ir_bin) in complex_scratch_buffer
|
||||
.iter_mut()
|
||||
.zip(self.padded_ir_fft.iter())
|
||||
{
|
||||
*output_bin *= ir_bin;
|
||||
}
|
||||
c2r_plan
|
||||
.process_with_scratch(complex_scratch_buffer, real_scratch_buffer, &mut [])
|
||||
.unwrap();
|
||||
|
||||
let after_wraparound_begin_idx =
|
||||
self.delay_buffer.len() - (coefficients.len() - num_before_wraparound);
|
||||
let after_wraparound_end_idx = self.delay_buffer.len();
|
||||
for (coefficient, delayed_sample) in coefficients[1 + num_before_wraparound..].iter().zip(
|
||||
self.delay_buffer[after_wraparound_begin_idx..after_wraparound_end_idx]
|
||||
.iter()
|
||||
.rev(),
|
||||
) {
|
||||
result = f32x2::splat(*coefficient).mul_add(*delayed_sample, result);
|
||||
// At this point the first `FFT_INPUT_SIZE` elements in `real_scratch_buffer`
|
||||
// contain the output for the next period, while the last `FFT_INPUT_SIZE` elements
|
||||
// contain output that needs to be added to the period after that. Since previous
|
||||
// period also produced similar delayed output, we'll need to copy that to the
|
||||
// results as well.
|
||||
output_samples.copy_from_slice(&real_scratch_buffer[..FFT_INPUT_SIZE]);
|
||||
for (output_sample, padding_sample) in output_samples
|
||||
.iter_mut()
|
||||
.zip(self.unapplied_padding_buffers[output_channel_idx].iter())
|
||||
{
|
||||
*output_sample += *padding_sample;
|
||||
}
|
||||
self.unapplied_padding_buffers[output_channel_idx]
|
||||
.copy_from_slice(&real_scratch_buffer[FFT_INPUT_SIZE..]);
|
||||
}
|
||||
|
||||
// And finally write the samples to the delay buffer for the enxt sample
|
||||
self.delay_buffer[self.delay_buffer_next_idx] = samples;
|
||||
self.delay_buffer_next_idx = (self.delay_buffer_next_idx + 1) % self.delay_buffer.len();
|
||||
/// Set the filter's coefficients based on raw FIR filter coefficients. These will be padded,
|
||||
/// ran through the DFT, and normalized.
|
||||
pub fn recompute_coefficients(
|
||||
&mut self,
|
||||
coefficients: FirCoefficients<FILTER_SIZE>,
|
||||
r2c_plan: &dyn RealToComplex<f32>,
|
||||
real_scratch_buffer: &mut [f32; FFT_SIZE],
|
||||
complex_scratch_buffer: &mut [Complex32; FFT_SIZE / 2 + 1],
|
||||
) {
|
||||
// This needs to be padded with zeroes
|
||||
real_scratch_buffer[..FILTER_SIZE].copy_from_slice(&coefficients.0);
|
||||
real_scratch_buffer[FILTER_SIZE..].fill(0.0);
|
||||
|
||||
result
|
||||
r2c_plan
|
||||
.process_with_scratch(real_scratch_buffer, complex_scratch_buffer, &mut [])
|
||||
.unwrap();
|
||||
|
||||
// The resulting buffer needs to be normalized and written to `self.padded_ir_fft`. That way
|
||||
// we don't need to do anything but multiplying and writing the results back when
|
||||
// processing.
|
||||
let normalization_factor = 1.0 / FFT_SIZE as f32;
|
||||
for (filter_bin, target_bin) in complex_scratch_buffer
|
||||
.iter()
|
||||
.zip(self.padded_ir_fft.iter_mut())
|
||||
{
|
||||
*target_bin = *filter_bin * normalization_factor;
|
||||
}
|
||||
}
|
||||
|
||||
/// Reset the internal filter state.
|
||||
pub fn reset(&mut self) {
|
||||
self.delay_buffer.fill(f32x2::default());
|
||||
self.delay_buffer_next_idx = 0;
|
||||
for buffer in &mut self.unapplied_padding_buffers {
|
||||
buffer.fill(0.0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -27,6 +27,9 @@ use std::sync::Arc;
|
|||
|
||||
mod crossover;
|
||||
|
||||
/// The number of channels this plugin supports. Hard capped at 2 for SIMD reasons.
|
||||
pub const NUM_CHANNELS: u32 = 2;
|
||||
|
||||
/// The number of bands. Not used directly here, but this avoids hardcoding some constants in the
|
||||
/// crossover implementations.
|
||||
pub const NUM_BANDS: usize = 5;
|
||||
|
@ -163,13 +166,13 @@ impl Plugin for Crossover {
|
|||
|
||||
const VERSION: &'static str = "0.1.0";
|
||||
|
||||
const DEFAULT_NUM_INPUTS: u32 = 2;
|
||||
const DEFAULT_NUM_OUTPUTS: u32 = 2;
|
||||
const DEFAULT_NUM_INPUTS: u32 = NUM_CHANNELS;
|
||||
const DEFAULT_NUM_OUTPUTS: u32 = NUM_CHANNELS;
|
||||
|
||||
const DEFAULT_AUX_OUTPUTS: Option<AuxiliaryIOConfig> = Some(AuxiliaryIOConfig {
|
||||
// Two to five of these busses will be used at a time
|
||||
num_busses: 5,
|
||||
num_channels: 2,
|
||||
num_channels: NUM_CHANNELS,
|
||||
});
|
||||
|
||||
const PORT_NAMES: PortNames = PortNames {
|
||||
|
@ -186,9 +189,9 @@ impl Plugin for Crossover {
|
|||
|
||||
fn accepts_bus_config(&self, config: &BusConfig) -> bool {
|
||||
// Only do stereo
|
||||
config.num_input_channels == 2
|
||||
&& config.num_output_channels == 2
|
||||
&& config.aux_output_busses.num_channels == 2
|
||||
config.num_input_channels == NUM_CHANNELS
|
||||
&& config.num_output_channels == NUM_CHANNELS
|
||||
&& config.aux_output_busses.num_channels == NUM_CHANNELS
|
||||
}
|
||||
|
||||
fn initialize(
|
||||
|
@ -232,18 +235,7 @@ impl Plugin for Crossover {
|
|||
CrossoverType::LinkwitzRiley24LinearPhase => {
|
||||
context.set_latency_samples(self.fir_crossover.latency());
|
||||
|
||||
todo!();
|
||||
// Self::do_process(buffer, aux, |main_channel_samples, bands| {
|
||||
// if self.should_update_filters() {
|
||||
// self.update_filters(buffer.len() as u32);
|
||||
// }
|
||||
|
||||
// self.fir_crossover.process(
|
||||
// self.params.num_bands.value as usize,
|
||||
// main_channel_samples,
|
||||
// bands,
|
||||
// );
|
||||
// })
|
||||
self.process_fir(buffer, aux);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -253,7 +245,7 @@ impl Plugin for Crossover {
|
|||
|
||||
impl Crossover {
|
||||
/// Takes care of all of the boilerplate in zipping the outputs together to get a nice iterator
|
||||
/// friendly and SIMD-able interface for the processing function. Prevents havign to branch per
|
||||
/// friendly and SIMD-able interface for the processing function. Prevents having to branch per
|
||||
/// sample. The closure receives an input sample and it should write the output samples for each
|
||||
/// band to the array.
|
||||
fn process_iir(&mut self, buffer: &mut Buffer, aux: &mut AuxiliaryBuffers) {
|
||||
|
@ -310,6 +302,45 @@ impl Crossover {
|
|||
}
|
||||
}
|
||||
|
||||
/// `process_iir()`, but for the linear-phase FIR crossovers. This processes an entire channel
|
||||
/// at once instead of processing per-sample since we use FFT convolution.
|
||||
fn process_fir(&mut self, buffer: &mut Buffer, aux: &mut AuxiliaryBuffers) {
|
||||
// In theory we could do smoothing in between processed blocks, but this hsould be fine
|
||||
if self.should_update_filters() {
|
||||
self.update_filters(buffer.len() as u32);
|
||||
}
|
||||
|
||||
let aux_outputs = &mut aux.outputs;
|
||||
let (band_1_buffer, aux_outputs) = aux_outputs.split_first_mut().unwrap();
|
||||
let (band_2_buffer, aux_outputs) = aux_outputs.split_first_mut().unwrap();
|
||||
let (band_3_buffer, aux_outputs) = aux_outputs.split_first_mut().unwrap();
|
||||
let (band_4_buffer, aux_outputs) = aux_outputs.split_first_mut().unwrap();
|
||||
let (band_5_buffer, _) = aux_outputs.split_first_mut().unwrap();
|
||||
|
||||
// We can avoid a lot of hardcoding and conditionals by restoring the original array structure
|
||||
for channel_idx in 0..buffer.channels() {
|
||||
let main_io = &mut buffer.as_slice()[channel_idx];
|
||||
let band_outputs = [
|
||||
&mut band_1_buffer.as_slice()[channel_idx],
|
||||
&mut band_2_buffer.as_slice()[channel_idx],
|
||||
&mut band_3_buffer.as_slice()[channel_idx],
|
||||
&mut band_4_buffer.as_slice()[channel_idx],
|
||||
&mut band_5_buffer.as_slice()[channel_idx],
|
||||
];
|
||||
|
||||
self.fir_crossover.process(
|
||||
self.params.num_bands.value as usize,
|
||||
main_io,
|
||||
band_outputs,
|
||||
channel_idx,
|
||||
);
|
||||
|
||||
// The main output should be silent as the signal is already evenly split over the other
|
||||
// bands
|
||||
main_io.fill(0.0);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns whether the filters should be updated. There are different updating functions for
|
||||
/// the IIR and FIR crossovers.
|
||||
fn should_update_filters(&mut self) -> bool {
|
||||
|
|
Loading…
Reference in a new issue