1
0
Fork 0

Rework FIR crossover to use FFT convolution

This commit is contained in:
Robbert van der Helm 2022-06-07 20:32:27 +02:00
parent 2c48ceb392
commit 5a51dce00d
5 changed files with 323 additions and 118 deletions

1
Cargo.lock generated
View file

@ -711,6 +711,7 @@ name = "crossover"
version = "0.1.0"
dependencies = [
"nih_plug",
"realfft",
]
[[package]]

View file

@ -16,3 +16,4 @@ simd = ["nih_plug/simd"]
[dependencies]
nih_plug = { path = "../../", features = ["assert_process_allocs"] }
realfft = "3.0.0"

View file

@ -14,36 +14,60 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
use nih_plug::buffer::ChannelSamples;
use nih_plug::debug::*;
use realfft::num_complex::Complex32;
use realfft::{ComplexToReal, RealFftPlanner, RealToComplex};
use std::f32;
use std::simd::f32x2;
use std::sync::Arc;
use self::filter::{FirCoefficients, FirFilter};
use self::filter::{FftFirFilter, FirCoefficients, FFT_INPUT_SIZE, FFT_SIZE};
use crate::crossover::fir::filter::FILTER_SIZE;
use crate::crossover::iir::biquad::{BiquadCoefficients, NEUTRAL_Q};
use crate::NUM_BANDS;
use crate::{NUM_BANDS, NUM_CHANNELS};
pub mod filter;
// TODO: Move this to FFT convolution so we can increase the filter size and improve low latency performance
/// The size of the FIR filter window, or the number of taps. The low frequency performance is
/// greatly limited by this.
const FILTER_SIZE: usize = 121;
/// The size of the FIR filter's ring buffer. This is `FILTER_SIZE` rounded up to the next power of
/// two.
const RING_BUFFER_SIZE: usize = FILTER_SIZE.next_power_of_two();
#[derive(Debug)]
pub struct FirCrossover {
/// The kind of crossover to use. `.update_filters()` must be called after changing this.
mode: FirCrossoverType,
/// Filters for each of the bands. Depending on the number of bands argument passed to
/// `.process()` two to five of these may be used. The first one always contains a low-pass
/// `.process()`, two to five of these may be used. The first one always contains a low-pass
/// filter, the last one always contains a high-pass filter, while the other bands will contain
/// band-pass filters.
band_filters: [FirFilter; NUM_BANDS],
///
/// These filters will be fed the FFT from the main input to produce output samples for the enxt
/// period. Everything could be a bit nicer to read if the filter did the entire STFT process,
/// but that would mean duplicating the input ring buffer and forward DFT up to five times.
band_filters: [FftFirFilter; NUM_BANDS],
/// A ring buffer that is used to store inputs for the next FFT. Until it is time to take the
/// next FFT, samples are copied from the inputs to this buffer, while simultaneously copying
/// the already processed output samples from the output buffers to the output. Once
/// `io_buffer_next_indices` wrap back around to 0, the next buffer should be produced.
input_buffers: [[f32; FFT_INPUT_SIZE]; NUM_CHANNELS as usize],
/// A ring that contains the next period's outputs for each of the five bands. This is written
/// to and read from in lockstep with `input_buffers`.
band_output_buffers: [[[f32; FFT_INPUT_SIZE]; NUM_CHANNELS as usize]; NUM_BANDS],
/// The index in the inner `io_buffer` the next sample should be read from. After a sample is
/// written to the band's output then this is incremented by one. Once
/// `self.io_buffer_next_indices[channel_idx] == self.io_buffer.len()` then the next block
/// should be processed.
///
/// This is stored as an array since each channel is processed individually. While this should
/// of course stay in sync, this makes it much simpler to process both channels in sequence.
io_buffers_next_indices: [usize; NUM_CHANNELS as usize],
/// The algorithm for the FFT operation.
r2c_plan: Arc<dyn RealToComplex<f32>>,
/// The algorithm for the IFFT operation.
c2r_plan: Arc<dyn ComplexToReal<f32>>,
/// A real buffer that may be written to in place during the FFT and IFFT operations.
real_scratch_buffer: [f32; FFT_SIZE],
/// A complex buffer corresponding to `real_scratch_buffer` that may be written to in place
/// during the FFT and IFFT operations.
complex_scratch_buffer: [Complex32; FFT_SIZE / 2 + 1],
}
/// The type of FIR crossover to use.
@ -63,9 +87,19 @@ impl FirCrossover {
/// Make sure to add the latency reported by [`latency()`][Self::latency()] to the plugin's
/// reported latency.
pub fn new(mode: FirCrossoverType) -> Self {
let mut fft_planner = RealFftPlanner::new();
Self {
mode,
band_filters: Default::default(),
input_buffers: [[0.0; FFT_INPUT_SIZE]; NUM_CHANNELS as usize],
band_output_buffers: [[[0.0; FFT_INPUT_SIZE]; NUM_CHANNELS as usize]; NUM_BANDS],
io_buffers_next_indices: [0; NUM_CHANNELS as usize],
r2c_plan: fft_planner.plan_fft_forward(FFT_SIZE),
c2r_plan: fft_planner.plan_fft_inverse(FFT_SIZE),
real_scratch_buffer: [0.0; FFT_SIZE],
complex_scratch_buffer: [Complex32::default(); FFT_SIZE / 2 + 1],
}
}
@ -74,43 +108,96 @@ impl FirCrossover {
// Actually, that's a lie, since we currently only do linear-phase filters with a constant
// size
match self.mode {
FirCrossoverType::LinkwitzRiley24LinearPhase => (FILTER_SIZE / 2) as u32,
FirCrossoverType::LinkwitzRiley24LinearPhase => FFT_INPUT_SIZE as u32,
}
}
/// Split the signal into bands using the crossovers previously configured through `.update()`.
/// The split bands will be written to `band_outputs`. `main_io` is not written to, and should
/// be cleared separately.
/// The split bands will be written to `band_outputs`. The main output should be cleared
/// separately. For efficiency's sake this processes an entire channel at once to minimize the
/// number of FFT operations needed. Since this process delays the signal by `FFT_INPUT_SIZE`
/// samples, the latency should be reported to the host.
pub fn process(
&mut self,
num_bands: usize,
main_io: &ChannelSamples,
band_outputs: [ChannelSamples; NUM_BANDS],
main_input: &[f32],
mut band_outputs: [&mut &mut [f32]; NUM_BANDS],
channel_idx: usize,
) {
nih_debug_assert!(num_bands >= 2);
nih_debug_assert!(num_bands <= NUM_BANDS);
// Required for the SIMD, so we'll just do a hard assert or the unchecked conversions will
// be unsound
assert!(main_io.len() == 2);
nih_debug_assert!(main_input.len() == band_outputs[0].len());
nih_debug_assert!(channel_idx < NUM_CHANNELS as usize);
let samples: f32x2 = unsafe { main_io.to_simd_unchecked() };
match self.mode {
FirCrossoverType::LinkwitzRiley24LinearPhase => {
// TODO: Everything is structured to be fast to compute for the IIR filters. Instead
// of doing two channels at the same time, it would probably be faster to use
// SIMD for the actual convolution so we can do 4 or 8 multiply-adds at the
// same time. Or perhaps a better way to spend the time, use FFT convolution
// for this.
for (filter, mut output) in self
.band_filters
// We'll copy already processed output to `band_outputs` while storing input for the next
// FFT operation. This is a modified version of what's going on in `StftHelper`.
let mut current_sample_idx = 0;
while current_sample_idx < main_input.len() {
{
// When `self.io_buffers_next_indices == FFT_SIZE`, the next block should be processed
let io_buffers_next_indices = self.io_buffers_next_indices[channel_idx];
let process_num_samples = (FFT_INPUT_SIZE - io_buffers_next_indices)
.min(main_input.len() - current_sample_idx);
// Since we can't do this in-place (without unnecessarily duplicating a ton of data),
// copying data from and to the ring buffers can be done with simple memcpys
self.input_buffers[channel_idx]
[io_buffers_next_indices..io_buffers_next_indices + process_num_samples]
.copy_from_slice(
&main_input[current_sample_idx..current_sample_idx + process_num_samples],
);
for (band_output, band_output_buffers) in band_outputs
.iter_mut()
.zip(band_outputs)
.zip(self.band_output_buffers.iter())
.take(num_bands)
{
let filtered_samples = filter.process(samples);
unsafe { output.from_simd_unchecked(filtered_samples) };
band_output[current_sample_idx..current_sample_idx + process_num_samples]
.copy_from_slice(
&band_output_buffers[channel_idx][io_buffers_next_indices
..io_buffers_next_indices + process_num_samples],
);
}
// This is tracked per-channel because both channels are processed individually
self.io_buffers_next_indices[channel_idx] += process_num_samples;
current_sample_idx += process_num_samples;
}
// At this point we either reached the end of the buffer (`current_sample_idx ==
// main_input.len()`), or we filled up the `io_buffer` and we can process the next block
if self.io_buffers_next_indices[channel_idx] == FFT_INPUT_SIZE {
// Zero pad the input for the FFT
self.real_scratch_buffer[..FFT_INPUT_SIZE]
.copy_from_slice(&self.input_buffers[channel_idx]);
self.real_scratch_buffer[FFT_INPUT_SIZE..].fill(0.0);
self.r2c_plan
.process_with_scratch(
&mut self.real_scratch_buffer,
&mut self.complex_scratch_buffer,
&mut [],
)
.unwrap();
// The input can then be used to produce each band's output. Since realfft expects
// to be able to modify the input, we need to make a copy of this first:
let input_fft = self.complex_scratch_buffer;
for (band_output_buffers, band_filter) in self
.band_output_buffers
.iter_mut()
.zip(self.band_filters.iter_mut())
.take(num_bands)
{
band_filter.process(
&input_fft,
&mut band_output_buffers[channel_idx],
channel_idx,
&*self.c2r_plan,
&mut self.real_scratch_buffer,
&mut self.complex_scratch_buffer,
)
}
self.io_buffers_next_indices[channel_idx] = 0;
}
}
}
@ -150,11 +237,16 @@ impl FirCrossover {
FirCoefficients::design_fourth_order_linear_phase_low_pass_from_biquad(
iir_coefs,
);
self.band_filters[0].coefficients = lp_fir_coefs;
self.band_filters[0].recompute_coefficients(
lp_fir_coefs.clone(),
&*self.r2c_plan,
&mut self.real_scratch_buffer,
&mut self.complex_scratch_buffer,
);
// For the band-pass filters and the final high-pass filter, we need to keep track
// of the accumulated impulse response
let mut accumulated_ir = self.band_filters[0].coefficients.clone();
let mut accumulated_ir = lp_fir_coefs;
for (split_frequency, band_filter) in frequencies
.iter()
.zip(self.band_filters.iter_mut())
@ -191,7 +283,12 @@ impl FirCrossover {
*accumulated_coef += *bp_coef;
}
band_filter.coefficients = fir_bp_coefs;
band_filter.recompute_coefficients(
fir_bp_coefs,
&*self.r2c_plan,
&mut self.real_scratch_buffer,
&mut self.complex_scratch_buffer,
);
}
// And finally we can do a spectral inversion of the accumulated IR to the the last
@ -202,7 +299,12 @@ impl FirCrossover {
}
fir_hp_coefs.0[FILTER_SIZE / 2] += 1.0;
self.band_filters[num_bands - 1].coefficients = fir_hp_coefs;
self.band_filters[num_bands - 1].recompute_coefficients(
fir_hp_coefs,
&*self.r2c_plan,
&mut self.real_scratch_buffer,
&mut self.complex_scratch_buffer,
);
}
}
}
@ -212,5 +314,16 @@ impl FirCrossover {
for filter in &mut self.band_filters {
filter.reset();
}
// The inputs don't need to be reset as they'll be overwritten immediately
for band_buffers in &mut self.band_output_buffers {
for buffer in band_buffers {
buffer.fill(0.0);
}
}
// This being 0 means that the very first period will simply output the silence form above
// and gather input for the next FFT
self.io_buffers_next_indices.fill(0);
}
}

View file

@ -14,27 +14,50 @@
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
use realfft::num_complex::Complex32;
use realfft::{ComplexToReal, RealToComplex};
use std::f32;
use std::simd::{f32x2, StdFloat};
use super::{FILTER_SIZE, RING_BUFFER_SIZE};
use crate::crossover::iir::biquad::{Biquad, BiquadCoefficients};
use crate::NUM_CHANNELS;
/// We're doing FFT convolution here since otherwise there's no way to get decent low-frequency
/// accuracy while still having acceptable performance. The input going into the STFT will be
/// smaller since it will be padding with zeroes to compensate for the otherwise overlapping tail
/// caused by the convolution.
pub const FFT_SIZE: usize = 4096;
/// The input chunk size the FFT convolution is processing. This is also the latency. By having this
/// be exactly half of FFT_SIZE, we can make the overlap-add part of the FFT convolution a lot
/// simpler for ourselves. (check the `StftHelper` struct in NIH-plug itself for an examples that
/// can handle arbitrary padding)
pub const FFT_INPUT_SIZE: usize = FFT_SIZE / 2;
/// The size of the FIR filter window, or the number of taps. Convoling `FFT_INPUT_SIZE` samples
/// with this filter should fit exactly in `FFT_SIZE`, and it should be an odd number.
pub const FILTER_SIZE: usize = FFT_SIZE - FFT_INPUT_SIZE + 1;
/// A single FIR filter that may be configured in any way. In this plugin this will be a
/// linear-phase low-pass, band-pass, or high-pass filter.
/// linear-phase low-pass, band-pass, or high-pass filter. Implemented using FFT convolution. `git
/// blame` this for a version that uses direct convolution.
///
/// `N_INPUT` is the size of the input that will be processed. The size of the FFT window becomes
/// `N_INPUT * 2`. That makes handling the overlap easy, as each IDFT after multiplying the padded
/// input and the padded impulse response FFTs will result one `N_INPUT` period of output that can
/// be taken as is, followed by one `N_INPUT` period of samples that need to be added to the next
/// period's outputs as part of the overlap-add process.
#[derive(Debug, Clone)]
pub struct FirFilter {
/// The coefficients for this filter. The filters for both channels should be equivalent, this
/// just avoids broadcasts in the filter process.
pub coefficients: FirCoefficients<FILTER_SIZE>,
pub struct FftFirFilter {
/// An `N_INPUT + 1` sized IIR. Padded, ran through the DFT, and then normalized by dividing by
/// `FFT_SIZE`.
padded_ir_fft: [Complex32; FFT_SIZE / 2 + 1],
/// A ring buffer storing the last `FILTER_SIZE - 1` samples. The capacity is `FILTER_SIZE`
/// rounded up to the next power of two.
delay_buffer: [f32x2; RING_BUFFER_SIZE],
/// The index in `delay_buffer` to write the next sample to. Wrapping negative indices back to
/// the end, the previous sample can be found at `delay_buffer[delay_buffer_next_idx - 1]`, the
/// one before that at `delay_buffer[delay_buffer_next_idx - 2]`, and so on.
delay_buffer_next_idx: usize,
/// The padding from the previous IDFT operation that needs to be added to the next output
/// buffer. After the IDFT process there will be an `FFT_SIZE` real scratch buffer containing
/// the output. At that point the first `FFT_INPUT_SIZE` samples of those will be copied to
/// `output_buffers` in the FIR crossover, `unapplied_padding_buffer` will be added to that
/// output buffer, and then finally the last `FFT_INPUT_SIZE` samples of the scratch buffer are
/// copied to `unapplied_padding_buffer`. This thus makes sure the tail gets delayed by another
/// period so that everything matches up.
unapplied_padding_buffers: [[f32; FFT_INPUT_SIZE]; NUM_CHANNELS as usize],
}
/// Coefficients for a (linear-phase) FIR filter. This struct includes ways to design the filter.
@ -43,12 +66,14 @@ pub struct FirFilter {
#[derive(Debug, Clone)]
pub struct FirCoefficients<const N: usize>(pub [f32; N]);
impl Default for FirFilter {
impl Default for FftFirFilter {
fn default() -> Self {
Self {
coefficients: FirCoefficients::default(),
delay_buffer: [f32x2::default(); RING_BUFFER_SIZE],
delay_buffer_next_idx: 0,
// Would be nicer to initialize this to an impulse response that actually had the
// correct position wrt the usual linear-phase latency, but this is fine since it should
// never be used anyways
padded_ir_fft: [Complex32::new(1.0 / FFT_SIZE as f32, 0.0); FFT_SIZE / 2 + 1],
unapplied_padding_buffers: [[0.0; FFT_INPUT_SIZE]; NUM_CHANNELS as usize],
}
}
}
@ -64,53 +89,87 @@ impl<const N: usize> Default for FirCoefficients<N> {
}
}
impl FirFilter {
/// Process left and right audio samples through the filter.
pub fn process(&mut self, samples: f32x2) -> f32x2 {
// TODO: Replace direct convolution with FFT convolution, would make the implementation much
// more complex though because of the multi output part
let coefficients = &self.coefficients.0;
let mut result = f32x2::splat(coefficients[0]) * samples;
impl FftFirFilter {
/// Filter `FFT_INPUT_SIZE` samples padded to `FFT_SIZE` through this filter, and write the
/// outputs to `output_samples` (belonging to channel `channel_idx`), at an `FFT_INPUT_SIZE`
/// delay. This is a bit weird and probably difficult to follow because as an optimization the
/// DFT is taken only once, and then the IDFT is taken once for every filtered band. This
/// function is thus called inside of the overlap-add loop to avoid duplicate work.
pub fn process(
&mut self,
input_fft: &[Complex32; FFT_SIZE / 2 + 1],
output_samples: &mut [f32; FFT_INPUT_SIZE],
output_channel_idx: usize,
c2r_plan: &dyn ComplexToReal<f32>,
real_scratch_buffer: &mut [f32; FFT_SIZE],
complex_scratch_buffer: &mut [Complex32; FFT_SIZE / 2 + 1],
) {
// The padded input FFT has already been taken, so we only need to copy it to the scratch
// buffer (the input cannot change as the next band might need it as well).
complex_scratch_buffer.copy_from_slice(input_fft);
// Now multiply `self.coefficients[1..]` with the delay buffer starting at
// `self.delay_buffer_next_idx - 1`, wrapping around to the end when that is reached
// The end index is exclusive, and we already did the multiply+add for the first coefficient.
let before_wraparound_start_idx = self
.delay_buffer_next_idx
.saturating_sub(coefficients.len() - 1);
let before_wraparound_end_idx = self.delay_buffer_next_idx;
let num_before_wraparound = before_wraparound_end_idx - before_wraparound_start_idx;
for (coefficient, delayed_sample) in coefficients[1..1 + num_before_wraparound].iter().zip(
self.delay_buffer[before_wraparound_start_idx..before_wraparound_end_idx]
.iter()
.rev(),
) {
// `result += coefficient * sample`, but with explicit FMA
result = f32x2::splat(*coefficient).mul_add(*delayed_sample, result);
// The FFT of the impulse response has already been normalized, so we just need to
// multiply the two buffers
for (output_bin, ir_bin) in complex_scratch_buffer
.iter_mut()
.zip(self.padded_ir_fft.iter())
{
*output_bin *= ir_bin;
}
c2r_plan
.process_with_scratch(complex_scratch_buffer, real_scratch_buffer, &mut [])
.unwrap();
let after_wraparound_begin_idx =
self.delay_buffer.len() - (coefficients.len() - num_before_wraparound);
let after_wraparound_end_idx = self.delay_buffer.len();
for (coefficient, delayed_sample) in coefficients[1 + num_before_wraparound..].iter().zip(
self.delay_buffer[after_wraparound_begin_idx..after_wraparound_end_idx]
.iter()
.rev(),
) {
result = f32x2::splat(*coefficient).mul_add(*delayed_sample, result);
// At this point the first `FFT_INPUT_SIZE` elements in `real_scratch_buffer`
// contain the output for the next period, while the last `FFT_INPUT_SIZE` elements
// contain output that needs to be added to the period after that. Since previous
// period also produced similar delayed output, we'll need to copy that to the
// results as well.
output_samples.copy_from_slice(&real_scratch_buffer[..FFT_INPUT_SIZE]);
for (output_sample, padding_sample) in output_samples
.iter_mut()
.zip(self.unapplied_padding_buffers[output_channel_idx].iter())
{
*output_sample += *padding_sample;
}
self.unapplied_padding_buffers[output_channel_idx]
.copy_from_slice(&real_scratch_buffer[FFT_INPUT_SIZE..]);
}
// And finally write the samples to the delay buffer for the enxt sample
self.delay_buffer[self.delay_buffer_next_idx] = samples;
self.delay_buffer_next_idx = (self.delay_buffer_next_idx + 1) % self.delay_buffer.len();
/// Set the filter's coefficients based on raw FIR filter coefficients. These will be padded,
/// ran through the DFT, and normalized.
pub fn recompute_coefficients(
&mut self,
coefficients: FirCoefficients<FILTER_SIZE>,
r2c_plan: &dyn RealToComplex<f32>,
real_scratch_buffer: &mut [f32; FFT_SIZE],
complex_scratch_buffer: &mut [Complex32; FFT_SIZE / 2 + 1],
) {
// This needs to be padded with zeroes
real_scratch_buffer[..FILTER_SIZE].copy_from_slice(&coefficients.0);
real_scratch_buffer[FILTER_SIZE..].fill(0.0);
result
r2c_plan
.process_with_scratch(real_scratch_buffer, complex_scratch_buffer, &mut [])
.unwrap();
// The resulting buffer needs to be normalized and written to `self.padded_ir_fft`. That way
// we don't need to do anything but multiplying and writing the results back when
// processing.
let normalization_factor = 1.0 / FFT_SIZE as f32;
for (filter_bin, target_bin) in complex_scratch_buffer
.iter()
.zip(self.padded_ir_fft.iter_mut())
{
*target_bin = *filter_bin * normalization_factor;
}
}
/// Reset the internal filter state.
pub fn reset(&mut self) {
self.delay_buffer.fill(f32x2::default());
self.delay_buffer_next_idx = 0;
for buffer in &mut self.unapplied_padding_buffers {
buffer.fill(0.0);
}
}
}

View file

@ -27,6 +27,9 @@ use std::sync::Arc;
mod crossover;
/// The number of channels this plugin supports. Hard capped at 2 for SIMD reasons.
pub const NUM_CHANNELS: u32 = 2;
/// The number of bands. Not used directly here, but this avoids hardcoding some constants in the
/// crossover implementations.
pub const NUM_BANDS: usize = 5;
@ -163,13 +166,13 @@ impl Plugin for Crossover {
const VERSION: &'static str = "0.1.0";
const DEFAULT_NUM_INPUTS: u32 = 2;
const DEFAULT_NUM_OUTPUTS: u32 = 2;
const DEFAULT_NUM_INPUTS: u32 = NUM_CHANNELS;
const DEFAULT_NUM_OUTPUTS: u32 = NUM_CHANNELS;
const DEFAULT_AUX_OUTPUTS: Option<AuxiliaryIOConfig> = Some(AuxiliaryIOConfig {
// Two to five of these busses will be used at a time
num_busses: 5,
num_channels: 2,
num_channels: NUM_CHANNELS,
});
const PORT_NAMES: PortNames = PortNames {
@ -186,9 +189,9 @@ impl Plugin for Crossover {
fn accepts_bus_config(&self, config: &BusConfig) -> bool {
// Only do stereo
config.num_input_channels == 2
&& config.num_output_channels == 2
&& config.aux_output_busses.num_channels == 2
config.num_input_channels == NUM_CHANNELS
&& config.num_output_channels == NUM_CHANNELS
&& config.aux_output_busses.num_channels == NUM_CHANNELS
}
fn initialize(
@ -232,18 +235,7 @@ impl Plugin for Crossover {
CrossoverType::LinkwitzRiley24LinearPhase => {
context.set_latency_samples(self.fir_crossover.latency());
todo!();
// Self::do_process(buffer, aux, |main_channel_samples, bands| {
// if self.should_update_filters() {
// self.update_filters(buffer.len() as u32);
// }
// self.fir_crossover.process(
// self.params.num_bands.value as usize,
// main_channel_samples,
// bands,
// );
// })
self.process_fir(buffer, aux);
}
}
@ -253,7 +245,7 @@ impl Plugin for Crossover {
impl Crossover {
/// Takes care of all of the boilerplate in zipping the outputs together to get a nice iterator
/// friendly and SIMD-able interface for the processing function. Prevents havign to branch per
/// friendly and SIMD-able interface for the processing function. Prevents having to branch per
/// sample. The closure receives an input sample and it should write the output samples for each
/// band to the array.
fn process_iir(&mut self, buffer: &mut Buffer, aux: &mut AuxiliaryBuffers) {
@ -310,6 +302,45 @@ impl Crossover {
}
}
/// `process_iir()`, but for the linear-phase FIR crossovers. This processes an entire channel
/// at once instead of processing per-sample since we use FFT convolution.
fn process_fir(&mut self, buffer: &mut Buffer, aux: &mut AuxiliaryBuffers) {
// In theory we could do smoothing in between processed blocks, but this hsould be fine
if self.should_update_filters() {
self.update_filters(buffer.len() as u32);
}
let aux_outputs = &mut aux.outputs;
let (band_1_buffer, aux_outputs) = aux_outputs.split_first_mut().unwrap();
let (band_2_buffer, aux_outputs) = aux_outputs.split_first_mut().unwrap();
let (band_3_buffer, aux_outputs) = aux_outputs.split_first_mut().unwrap();
let (band_4_buffer, aux_outputs) = aux_outputs.split_first_mut().unwrap();
let (band_5_buffer, _) = aux_outputs.split_first_mut().unwrap();
// We can avoid a lot of hardcoding and conditionals by restoring the original array structure
for channel_idx in 0..buffer.channels() {
let main_io = &mut buffer.as_slice()[channel_idx];
let band_outputs = [
&mut band_1_buffer.as_slice()[channel_idx],
&mut band_2_buffer.as_slice()[channel_idx],
&mut band_3_buffer.as_slice()[channel_idx],
&mut band_4_buffer.as_slice()[channel_idx],
&mut band_5_buffer.as_slice()[channel_idx],
];
self.fir_crossover.process(
self.params.num_bands.value as usize,
main_io,
band_outputs,
channel_idx,
);
// The main output should be silent as the signal is already evenly split over the other
// bands
main_io.fill(0.0);
}
}
/// Returns whether the filters should be updated. There are different updating functions for
/// the IIR and FIR crossovers.
fn should_update_filters(&mut self) -> bool {