diff --git a/Cargo.lock b/Cargo.lock
index f5a6476b..a12a0b89 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -711,6 +711,7 @@ name = "crossover"
version = "0.1.0"
dependencies = [
"nih_plug",
+ "realfft",
]
[[package]]
diff --git a/plugins/crossover/Cargo.toml b/plugins/crossover/Cargo.toml
index b7efa382..77c46114 100644
--- a/plugins/crossover/Cargo.toml
+++ b/plugins/crossover/Cargo.toml
@@ -16,3 +16,4 @@ simd = ["nih_plug/simd"]
[dependencies]
nih_plug = { path = "../../", features = ["assert_process_allocs"] }
+realfft = "3.0.0"
diff --git a/plugins/crossover/src/crossover/fir.rs b/plugins/crossover/src/crossover/fir.rs
index c55bcd10..4d903eaa 100644
--- a/plugins/crossover/src/crossover/fir.rs
+++ b/plugins/crossover/src/crossover/fir.rs
@@ -14,36 +14,60 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see .
-use nih_plug::buffer::ChannelSamples;
use nih_plug::debug::*;
+use realfft::num_complex::Complex32;
+use realfft::{ComplexToReal, RealFftPlanner, RealToComplex};
use std::f32;
-use std::simd::f32x2;
+use std::sync::Arc;
-use self::filter::{FirCoefficients, FirFilter};
+use self::filter::{FftFirFilter, FirCoefficients, FFT_INPUT_SIZE, FFT_SIZE};
+use crate::crossover::fir::filter::FILTER_SIZE;
use crate::crossover::iir::biquad::{BiquadCoefficients, NEUTRAL_Q};
-use crate::NUM_BANDS;
+use crate::{NUM_BANDS, NUM_CHANNELS};
pub mod filter;
-// TODO: Move this to FFT convolution so we can increase the filter size and improve low latency performance
-
-/// The size of the FIR filter window, or the number of taps. The low frequency performance is
-/// greatly limited by this.
-const FILTER_SIZE: usize = 121;
-/// The size of the FIR filter's ring buffer. This is `FILTER_SIZE` rounded up to the next power of
-/// two.
-const RING_BUFFER_SIZE: usize = FILTER_SIZE.next_power_of_two();
-
-#[derive(Debug)]
pub struct FirCrossover {
/// The kind of crossover to use. `.update_filters()` must be called after changing this.
mode: FirCrossoverType,
/// Filters for each of the bands. Depending on the number of bands argument passed to
- /// `.process()` two to five of these may be used. The first one always contains a low-pass
+ /// `.process()`, two to five of these may be used. The first one always contains a low-pass
/// filter, the last one always contains a high-pass filter, while the other bands will contain
/// band-pass filters.
- band_filters: [FirFilter; NUM_BANDS],
+ ///
+ /// These filters will be fed the FFT from the main input to produce output samples for the enxt
+ /// period. Everything could be a bit nicer to read if the filter did the entire STFT process,
+ /// but that would mean duplicating the input ring buffer and forward DFT up to five times.
+ band_filters: [FftFirFilter; NUM_BANDS],
+
+ /// A ring buffer that is used to store inputs for the next FFT. Until it is time to take the
+ /// next FFT, samples are copied from the inputs to this buffer, while simultaneously copying
+ /// the already processed output samples from the output buffers to the output. Once
+ /// `io_buffer_next_indices` wrap back around to 0, the next buffer should be produced.
+ input_buffers: [[f32; FFT_INPUT_SIZE]; NUM_CHANNELS as usize],
+ /// A ring that contains the next period's outputs for each of the five bands. This is written
+ /// to and read from in lockstep with `input_buffers`.
+ band_output_buffers: [[[f32; FFT_INPUT_SIZE]; NUM_CHANNELS as usize]; NUM_BANDS],
+ /// The index in the inner `io_buffer` the next sample should be read from. After a sample is
+ /// written to the band's output then this is incremented by one. Once
+ /// `self.io_buffer_next_indices[channel_idx] == self.io_buffer.len()` then the next block
+ /// should be processed.
+ ///
+ /// This is stored as an array since each channel is processed individually. While this should
+ /// of course stay in sync, this makes it much simpler to process both channels in sequence.
+ io_buffers_next_indices: [usize; NUM_CHANNELS as usize],
+
+ /// The algorithm for the FFT operation.
+ r2c_plan: Arc>,
+ /// The algorithm for the IFFT operation.
+ c2r_plan: Arc>,
+
+ /// A real buffer that may be written to in place during the FFT and IFFT operations.
+ real_scratch_buffer: [f32; FFT_SIZE],
+ /// A complex buffer corresponding to `real_scratch_buffer` that may be written to in place
+ /// during the FFT and IFFT operations.
+ complex_scratch_buffer: [Complex32; FFT_SIZE / 2 + 1],
}
/// The type of FIR crossover to use.
@@ -63,9 +87,19 @@ impl FirCrossover {
/// Make sure to add the latency reported by [`latency()`][Self::latency()] to the plugin's
/// reported latency.
pub fn new(mode: FirCrossoverType) -> Self {
+ let mut fft_planner = RealFftPlanner::new();
+
Self {
mode,
band_filters: Default::default(),
+
+ input_buffers: [[0.0; FFT_INPUT_SIZE]; NUM_CHANNELS as usize],
+ band_output_buffers: [[[0.0; FFT_INPUT_SIZE]; NUM_CHANNELS as usize]; NUM_BANDS],
+ io_buffers_next_indices: [0; NUM_CHANNELS as usize],
+ r2c_plan: fft_planner.plan_fft_forward(FFT_SIZE),
+ c2r_plan: fft_planner.plan_fft_inverse(FFT_SIZE),
+ real_scratch_buffer: [0.0; FFT_SIZE],
+ complex_scratch_buffer: [Complex32::default(); FFT_SIZE / 2 + 1],
}
}
@@ -74,43 +108,96 @@ impl FirCrossover {
// Actually, that's a lie, since we currently only do linear-phase filters with a constant
// size
match self.mode {
- FirCrossoverType::LinkwitzRiley24LinearPhase => (FILTER_SIZE / 2) as u32,
+ FirCrossoverType::LinkwitzRiley24LinearPhase => FFT_INPUT_SIZE as u32,
}
}
/// Split the signal into bands using the crossovers previously configured through `.update()`.
- /// The split bands will be written to `band_outputs`. `main_io` is not written to, and should
- /// be cleared separately.
+ /// The split bands will be written to `band_outputs`. The main output should be cleared
+ /// separately. For efficiency's sake this processes an entire channel at once to minimize the
+ /// number of FFT operations needed. Since this process delays the signal by `FFT_INPUT_SIZE`
+ /// samples, the latency should be reported to the host.
pub fn process(
&mut self,
num_bands: usize,
- main_io: &ChannelSamples,
- band_outputs: [ChannelSamples; NUM_BANDS],
+ main_input: &[f32],
+ mut band_outputs: [&mut &mut [f32]; NUM_BANDS],
+ channel_idx: usize,
) {
- nih_debug_assert!(num_bands >= 2);
- nih_debug_assert!(num_bands <= NUM_BANDS);
- // Required for the SIMD, so we'll just do a hard assert or the unchecked conversions will
- // be unsound
- assert!(main_io.len() == 2);
+ nih_debug_assert!(main_input.len() == band_outputs[0].len());
+ nih_debug_assert!(channel_idx < NUM_CHANNELS as usize);
- let samples: f32x2 = unsafe { main_io.to_simd_unchecked() };
- match self.mode {
- FirCrossoverType::LinkwitzRiley24LinearPhase => {
- // TODO: Everything is structured to be fast to compute for the IIR filters. Instead
- // of doing two channels at the same time, it would probably be faster to use
- // SIMD for the actual convolution so we can do 4 or 8 multiply-adds at the
- // same time. Or perhaps a better way to spend the time, use FFT convolution
- // for this.
- for (filter, mut output) in self
- .band_filters
+ // We'll copy already processed output to `band_outputs` while storing input for the next
+ // FFT operation. This is a modified version of what's going on in `StftHelper`.
+ let mut current_sample_idx = 0;
+ while current_sample_idx < main_input.len() {
+ {
+ // When `self.io_buffers_next_indices == FFT_SIZE`, the next block should be processed
+ let io_buffers_next_indices = self.io_buffers_next_indices[channel_idx];
+ let process_num_samples = (FFT_INPUT_SIZE - io_buffers_next_indices)
+ .min(main_input.len() - current_sample_idx);
+
+ // Since we can't do this in-place (without unnecessarily duplicating a ton of data),
+ // copying data from and to the ring buffers can be done with simple memcpys
+ self.input_buffers[channel_idx]
+ [io_buffers_next_indices..io_buffers_next_indices + process_num_samples]
+ .copy_from_slice(
+ &main_input[current_sample_idx..current_sample_idx + process_num_samples],
+ );
+ for (band_output, band_output_buffers) in band_outputs
.iter_mut()
- .zip(band_outputs)
+ .zip(self.band_output_buffers.iter())
.take(num_bands)
{
- let filtered_samples = filter.process(samples);
-
- unsafe { output.from_simd_unchecked(filtered_samples) };
+ band_output[current_sample_idx..current_sample_idx + process_num_samples]
+ .copy_from_slice(
+ &band_output_buffers[channel_idx][io_buffers_next_indices
+ ..io_buffers_next_indices + process_num_samples],
+ );
}
+
+ // This is tracked per-channel because both channels are processed individually
+ self.io_buffers_next_indices[channel_idx] += process_num_samples;
+ current_sample_idx += process_num_samples;
+ }
+
+ // At this point we either reached the end of the buffer (`current_sample_idx ==
+ // main_input.len()`), or we filled up the `io_buffer` and we can process the next block
+ if self.io_buffers_next_indices[channel_idx] == FFT_INPUT_SIZE {
+ // Zero pad the input for the FFT
+ self.real_scratch_buffer[..FFT_INPUT_SIZE]
+ .copy_from_slice(&self.input_buffers[channel_idx]);
+ self.real_scratch_buffer[FFT_INPUT_SIZE..].fill(0.0);
+
+ self.r2c_plan
+ .process_with_scratch(
+ &mut self.real_scratch_buffer,
+ &mut self.complex_scratch_buffer,
+ &mut [],
+ )
+ .unwrap();
+
+ // The input can then be used to produce each band's output. Since realfft expects
+ // to be able to modify the input, we need to make a copy of this first:
+ let input_fft = self.complex_scratch_buffer;
+
+ for (band_output_buffers, band_filter) in self
+ .band_output_buffers
+ .iter_mut()
+ .zip(self.band_filters.iter_mut())
+ .take(num_bands)
+ {
+ band_filter.process(
+ &input_fft,
+ &mut band_output_buffers[channel_idx],
+ channel_idx,
+ &*self.c2r_plan,
+ &mut self.real_scratch_buffer,
+ &mut self.complex_scratch_buffer,
+ )
+ }
+
+ self.io_buffers_next_indices[channel_idx] = 0;
}
}
}
@@ -150,11 +237,16 @@ impl FirCrossover {
FirCoefficients::design_fourth_order_linear_phase_low_pass_from_biquad(
iir_coefs,
);
- self.band_filters[0].coefficients = lp_fir_coefs;
+ self.band_filters[0].recompute_coefficients(
+ lp_fir_coefs.clone(),
+ &*self.r2c_plan,
+ &mut self.real_scratch_buffer,
+ &mut self.complex_scratch_buffer,
+ );
// For the band-pass filters and the final high-pass filter, we need to keep track
// of the accumulated impulse response
- let mut accumulated_ir = self.band_filters[0].coefficients.clone();
+ let mut accumulated_ir = lp_fir_coefs;
for (split_frequency, band_filter) in frequencies
.iter()
.zip(self.band_filters.iter_mut())
@@ -191,7 +283,12 @@ impl FirCrossover {
*accumulated_coef += *bp_coef;
}
- band_filter.coefficients = fir_bp_coefs;
+ band_filter.recompute_coefficients(
+ fir_bp_coefs,
+ &*self.r2c_plan,
+ &mut self.real_scratch_buffer,
+ &mut self.complex_scratch_buffer,
+ );
}
// And finally we can do a spectral inversion of the accumulated IR to the the last
@@ -202,7 +299,12 @@ impl FirCrossover {
}
fir_hp_coefs.0[FILTER_SIZE / 2] += 1.0;
- self.band_filters[num_bands - 1].coefficients = fir_hp_coefs;
+ self.band_filters[num_bands - 1].recompute_coefficients(
+ fir_hp_coefs,
+ &*self.r2c_plan,
+ &mut self.real_scratch_buffer,
+ &mut self.complex_scratch_buffer,
+ );
}
}
}
@@ -212,5 +314,16 @@ impl FirCrossover {
for filter in &mut self.band_filters {
filter.reset();
}
+
+ // The inputs don't need to be reset as they'll be overwritten immediately
+ for band_buffers in &mut self.band_output_buffers {
+ for buffer in band_buffers {
+ buffer.fill(0.0);
+ }
+ }
+
+ // This being 0 means that the very first period will simply output the silence form above
+ // and gather input for the next FFT
+ self.io_buffers_next_indices.fill(0);
}
}
diff --git a/plugins/crossover/src/crossover/fir/filter.rs b/plugins/crossover/src/crossover/fir/filter.rs
index ae99c04d..d1e0626f 100644
--- a/plugins/crossover/src/crossover/fir/filter.rs
+++ b/plugins/crossover/src/crossover/fir/filter.rs
@@ -14,27 +14,50 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see .
+use realfft::num_complex::Complex32;
+use realfft::{ComplexToReal, RealToComplex};
use std::f32;
-use std::simd::{f32x2, StdFloat};
-use super::{FILTER_SIZE, RING_BUFFER_SIZE};
use crate::crossover::iir::biquad::{Biquad, BiquadCoefficients};
+use crate::NUM_CHANNELS;
+
+/// We're doing FFT convolution here since otherwise there's no way to get decent low-frequency
+/// accuracy while still having acceptable performance. The input going into the STFT will be
+/// smaller since it will be padding with zeroes to compensate for the otherwise overlapping tail
+/// caused by the convolution.
+pub const FFT_SIZE: usize = 4096;
+/// The input chunk size the FFT convolution is processing. This is also the latency. By having this
+/// be exactly half of FFT_SIZE, we can make the overlap-add part of the FFT convolution a lot
+/// simpler for ourselves. (check the `StftHelper` struct in NIH-plug itself for an examples that
+/// can handle arbitrary padding)
+pub const FFT_INPUT_SIZE: usize = FFT_SIZE / 2;
+/// The size of the FIR filter window, or the number of taps. Convoling `FFT_INPUT_SIZE` samples
+/// with this filter should fit exactly in `FFT_SIZE`, and it should be an odd number.
+pub const FILTER_SIZE: usize = FFT_SIZE - FFT_INPUT_SIZE + 1;
/// A single FIR filter that may be configured in any way. In this plugin this will be a
-/// linear-phase low-pass, band-pass, or high-pass filter.
+/// linear-phase low-pass, band-pass, or high-pass filter. Implemented using FFT convolution. `git
+/// blame` this for a version that uses direct convolution.
+///
+/// `N_INPUT` is the size of the input that will be processed. The size of the FFT window becomes
+/// `N_INPUT * 2`. That makes handling the overlap easy, as each IDFT after multiplying the padded
+/// input and the padded impulse response FFTs will result one `N_INPUT` period of output that can
+/// be taken as is, followed by one `N_INPUT` period of samples that need to be added to the next
+/// period's outputs as part of the overlap-add process.
#[derive(Debug, Clone)]
-pub struct FirFilter {
- /// The coefficients for this filter. The filters for both channels should be equivalent, this
- /// just avoids broadcasts in the filter process.
- pub coefficients: FirCoefficients,
+pub struct FftFirFilter {
+ /// An `N_INPUT + 1` sized IIR. Padded, ran through the DFT, and then normalized by dividing by
+ /// `FFT_SIZE`.
+ padded_ir_fft: [Complex32; FFT_SIZE / 2 + 1],
- /// A ring buffer storing the last `FILTER_SIZE - 1` samples. The capacity is `FILTER_SIZE`
- /// rounded up to the next power of two.
- delay_buffer: [f32x2; RING_BUFFER_SIZE],
- /// The index in `delay_buffer` to write the next sample to. Wrapping negative indices back to
- /// the end, the previous sample can be found at `delay_buffer[delay_buffer_next_idx - 1]`, the
- /// one before that at `delay_buffer[delay_buffer_next_idx - 2]`, and so on.
- delay_buffer_next_idx: usize,
+ /// The padding from the previous IDFT operation that needs to be added to the next output
+ /// buffer. After the IDFT process there will be an `FFT_SIZE` real scratch buffer containing
+ /// the output. At that point the first `FFT_INPUT_SIZE` samples of those will be copied to
+ /// `output_buffers` in the FIR crossover, `unapplied_padding_buffer` will be added to that
+ /// output buffer, and then finally the last `FFT_INPUT_SIZE` samples of the scratch buffer are
+ /// copied to `unapplied_padding_buffer`. This thus makes sure the tail gets delayed by another
+ /// period so that everything matches up.
+ unapplied_padding_buffers: [[f32; FFT_INPUT_SIZE]; NUM_CHANNELS as usize],
}
/// Coefficients for a (linear-phase) FIR filter. This struct includes ways to design the filter.
@@ -43,12 +66,14 @@ pub struct FirFilter {
#[derive(Debug, Clone)]
pub struct FirCoefficients(pub [f32; N]);
-impl Default for FirFilter {
+impl Default for FftFirFilter {
fn default() -> Self {
Self {
- coefficients: FirCoefficients::default(),
- delay_buffer: [f32x2::default(); RING_BUFFER_SIZE],
- delay_buffer_next_idx: 0,
+ // Would be nicer to initialize this to an impulse response that actually had the
+ // correct position wrt the usual linear-phase latency, but this is fine since it should
+ // never be used anyways
+ padded_ir_fft: [Complex32::new(1.0 / FFT_SIZE as f32, 0.0); FFT_SIZE / 2 + 1],
+ unapplied_padding_buffers: [[0.0; FFT_INPUT_SIZE]; NUM_CHANNELS as usize],
}
}
}
@@ -64,53 +89,87 @@ impl Default for FirCoefficients {
}
}
-impl FirFilter {
- /// Process left and right audio samples through the filter.
- pub fn process(&mut self, samples: f32x2) -> f32x2 {
- // TODO: Replace direct convolution with FFT convolution, would make the implementation much
- // more complex though because of the multi output part
- let coefficients = &self.coefficients.0;
- let mut result = f32x2::splat(coefficients[0]) * samples;
+impl FftFirFilter {
+ /// Filter `FFT_INPUT_SIZE` samples padded to `FFT_SIZE` through this filter, and write the
+ /// outputs to `output_samples` (belonging to channel `channel_idx`), at an `FFT_INPUT_SIZE`
+ /// delay. This is a bit weird and probably difficult to follow because as an optimization the
+ /// DFT is taken only once, and then the IDFT is taken once for every filtered band. This
+ /// function is thus called inside of the overlap-add loop to avoid duplicate work.
+ pub fn process(
+ &mut self,
+ input_fft: &[Complex32; FFT_SIZE / 2 + 1],
+ output_samples: &mut [f32; FFT_INPUT_SIZE],
+ output_channel_idx: usize,
+ c2r_plan: &dyn ComplexToReal,
+ real_scratch_buffer: &mut [f32; FFT_SIZE],
+ complex_scratch_buffer: &mut [Complex32; FFT_SIZE / 2 + 1],
+ ) {
+ // The padded input FFT has already been taken, so we only need to copy it to the scratch
+ // buffer (the input cannot change as the next band might need it as well).
+ complex_scratch_buffer.copy_from_slice(input_fft);
- // Now multiply `self.coefficients[1..]` with the delay buffer starting at
- // `self.delay_buffer_next_idx - 1`, wrapping around to the end when that is reached
- // The end index is exclusive, and we already did the multiply+add for the first coefficient.
- let before_wraparound_start_idx = self
- .delay_buffer_next_idx
- .saturating_sub(coefficients.len() - 1);
- let before_wraparound_end_idx = self.delay_buffer_next_idx;
- let num_before_wraparound = before_wraparound_end_idx - before_wraparound_start_idx;
- for (coefficient, delayed_sample) in coefficients[1..1 + num_before_wraparound].iter().zip(
- self.delay_buffer[before_wraparound_start_idx..before_wraparound_end_idx]
- .iter()
- .rev(),
- ) {
- // `result += coefficient * sample`, but with explicit FMA
- result = f32x2::splat(*coefficient).mul_add(*delayed_sample, result);
+ // The FFT of the impulse response has already been normalized, so we just need to
+ // multiply the two buffers
+ for (output_bin, ir_bin) in complex_scratch_buffer
+ .iter_mut()
+ .zip(self.padded_ir_fft.iter())
+ {
+ *output_bin *= ir_bin;
}
+ c2r_plan
+ .process_with_scratch(complex_scratch_buffer, real_scratch_buffer, &mut [])
+ .unwrap();
- let after_wraparound_begin_idx =
- self.delay_buffer.len() - (coefficients.len() - num_before_wraparound);
- let after_wraparound_end_idx = self.delay_buffer.len();
- for (coefficient, delayed_sample) in coefficients[1 + num_before_wraparound..].iter().zip(
- self.delay_buffer[after_wraparound_begin_idx..after_wraparound_end_idx]
- .iter()
- .rev(),
- ) {
- result = f32x2::splat(*coefficient).mul_add(*delayed_sample, result);
+ // At this point the first `FFT_INPUT_SIZE` elements in `real_scratch_buffer`
+ // contain the output for the next period, while the last `FFT_INPUT_SIZE` elements
+ // contain output that needs to be added to the period after that. Since previous
+ // period also produced similar delayed output, we'll need to copy that to the
+ // results as well.
+ output_samples.copy_from_slice(&real_scratch_buffer[..FFT_INPUT_SIZE]);
+ for (output_sample, padding_sample) in output_samples
+ .iter_mut()
+ .zip(self.unapplied_padding_buffers[output_channel_idx].iter())
+ {
+ *output_sample += *padding_sample;
}
+ self.unapplied_padding_buffers[output_channel_idx]
+ .copy_from_slice(&real_scratch_buffer[FFT_INPUT_SIZE..]);
+ }
- // And finally write the samples to the delay buffer for the enxt sample
- self.delay_buffer[self.delay_buffer_next_idx] = samples;
- self.delay_buffer_next_idx = (self.delay_buffer_next_idx + 1) % self.delay_buffer.len();
+ /// Set the filter's coefficients based on raw FIR filter coefficients. These will be padded,
+ /// ran through the DFT, and normalized.
+ pub fn recompute_coefficients(
+ &mut self,
+ coefficients: FirCoefficients,
+ r2c_plan: &dyn RealToComplex,
+ real_scratch_buffer: &mut [f32; FFT_SIZE],
+ complex_scratch_buffer: &mut [Complex32; FFT_SIZE / 2 + 1],
+ ) {
+ // This needs to be padded with zeroes
+ real_scratch_buffer[..FILTER_SIZE].copy_from_slice(&coefficients.0);
+ real_scratch_buffer[FILTER_SIZE..].fill(0.0);
- result
+ r2c_plan
+ .process_with_scratch(real_scratch_buffer, complex_scratch_buffer, &mut [])
+ .unwrap();
+
+ // The resulting buffer needs to be normalized and written to `self.padded_ir_fft`. That way
+ // we don't need to do anything but multiplying and writing the results back when
+ // processing.
+ let normalization_factor = 1.0 / FFT_SIZE as f32;
+ for (filter_bin, target_bin) in complex_scratch_buffer
+ .iter()
+ .zip(self.padded_ir_fft.iter_mut())
+ {
+ *target_bin = *filter_bin * normalization_factor;
+ }
}
/// Reset the internal filter state.
pub fn reset(&mut self) {
- self.delay_buffer.fill(f32x2::default());
- self.delay_buffer_next_idx = 0;
+ for buffer in &mut self.unapplied_padding_buffers {
+ buffer.fill(0.0);
+ }
}
}
diff --git a/plugins/crossover/src/lib.rs b/plugins/crossover/src/lib.rs
index d18a22fb..9174a683 100644
--- a/plugins/crossover/src/lib.rs
+++ b/plugins/crossover/src/lib.rs
@@ -27,6 +27,9 @@ use std::sync::Arc;
mod crossover;
+/// The number of channels this plugin supports. Hard capped at 2 for SIMD reasons.
+pub const NUM_CHANNELS: u32 = 2;
+
/// The number of bands. Not used directly here, but this avoids hardcoding some constants in the
/// crossover implementations.
pub const NUM_BANDS: usize = 5;
@@ -163,13 +166,13 @@ impl Plugin for Crossover {
const VERSION: &'static str = "0.1.0";
- const DEFAULT_NUM_INPUTS: u32 = 2;
- const DEFAULT_NUM_OUTPUTS: u32 = 2;
+ const DEFAULT_NUM_INPUTS: u32 = NUM_CHANNELS;
+ const DEFAULT_NUM_OUTPUTS: u32 = NUM_CHANNELS;
const DEFAULT_AUX_OUTPUTS: Option = Some(AuxiliaryIOConfig {
// Two to five of these busses will be used at a time
num_busses: 5,
- num_channels: 2,
+ num_channels: NUM_CHANNELS,
});
const PORT_NAMES: PortNames = PortNames {
@@ -186,9 +189,9 @@ impl Plugin for Crossover {
fn accepts_bus_config(&self, config: &BusConfig) -> bool {
// Only do stereo
- config.num_input_channels == 2
- && config.num_output_channels == 2
- && config.aux_output_busses.num_channels == 2
+ config.num_input_channels == NUM_CHANNELS
+ && config.num_output_channels == NUM_CHANNELS
+ && config.aux_output_busses.num_channels == NUM_CHANNELS
}
fn initialize(
@@ -232,18 +235,7 @@ impl Plugin for Crossover {
CrossoverType::LinkwitzRiley24LinearPhase => {
context.set_latency_samples(self.fir_crossover.latency());
- todo!();
- // Self::do_process(buffer, aux, |main_channel_samples, bands| {
- // if self.should_update_filters() {
- // self.update_filters(buffer.len() as u32);
- // }
-
- // self.fir_crossover.process(
- // self.params.num_bands.value as usize,
- // main_channel_samples,
- // bands,
- // );
- // })
+ self.process_fir(buffer, aux);
}
}
@@ -253,7 +245,7 @@ impl Plugin for Crossover {
impl Crossover {
/// Takes care of all of the boilerplate in zipping the outputs together to get a nice iterator
- /// friendly and SIMD-able interface for the processing function. Prevents havign to branch per
+ /// friendly and SIMD-able interface for the processing function. Prevents having to branch per
/// sample. The closure receives an input sample and it should write the output samples for each
/// band to the array.
fn process_iir(&mut self, buffer: &mut Buffer, aux: &mut AuxiliaryBuffers) {
@@ -310,6 +302,45 @@ impl Crossover {
}
}
+ /// `process_iir()`, but for the linear-phase FIR crossovers. This processes an entire channel
+ /// at once instead of processing per-sample since we use FFT convolution.
+ fn process_fir(&mut self, buffer: &mut Buffer, aux: &mut AuxiliaryBuffers) {
+ // In theory we could do smoothing in between processed blocks, but this hsould be fine
+ if self.should_update_filters() {
+ self.update_filters(buffer.len() as u32);
+ }
+
+ let aux_outputs = &mut aux.outputs;
+ let (band_1_buffer, aux_outputs) = aux_outputs.split_first_mut().unwrap();
+ let (band_2_buffer, aux_outputs) = aux_outputs.split_first_mut().unwrap();
+ let (band_3_buffer, aux_outputs) = aux_outputs.split_first_mut().unwrap();
+ let (band_4_buffer, aux_outputs) = aux_outputs.split_first_mut().unwrap();
+ let (band_5_buffer, _) = aux_outputs.split_first_mut().unwrap();
+
+ // We can avoid a lot of hardcoding and conditionals by restoring the original array structure
+ for channel_idx in 0..buffer.channels() {
+ let main_io = &mut buffer.as_slice()[channel_idx];
+ let band_outputs = [
+ &mut band_1_buffer.as_slice()[channel_idx],
+ &mut band_2_buffer.as_slice()[channel_idx],
+ &mut band_3_buffer.as_slice()[channel_idx],
+ &mut band_4_buffer.as_slice()[channel_idx],
+ &mut band_5_buffer.as_slice()[channel_idx],
+ ];
+
+ self.fir_crossover.process(
+ self.params.num_bands.value as usize,
+ main_io,
+ band_outputs,
+ channel_idx,
+ );
+
+ // The main output should be silent as the signal is already evenly split over the other
+ // bands
+ main_io.fill(0.0);
+ }
+ }
+
/// Returns whether the filters should be updated. There are different updating functions for
/// the IIR and FIR crossovers.
fn should_update_filters(&mut self) -> bool {