From de13f8c42a7b9be01ca22100ef68d14fcdca1368 Mon Sep 17 00:00:00 2001 From: Robbert van der Helm Date: Mon, 6 Jun 2022 02:07:26 +0200 Subject: [PATCH] Add part of an FIR crossover This includes an algorithm that efficiently converts biquad coefficients to a linear-phase FIR filter kernel. --- plugins/crossover/src/crossover.rs | 1 + plugins/crossover/src/crossover/fir.rs | 317 +++++++++++++++++++++++++ plugins/crossover/src/crossover/iir.rs | 6 +- 3 files changed, 320 insertions(+), 4 deletions(-) create mode 100644 plugins/crossover/src/crossover/fir.rs diff --git a/plugins/crossover/src/crossover.rs b/plugins/crossover/src/crossover.rs index fc1d1b41..f627a8a2 100644 --- a/plugins/crossover/src/crossover.rs +++ b/plugins/crossover/src/crossover.rs @@ -14,4 +14,5 @@ // You should have received a copy of the GNU General Public License // along with this program. If not, see . +pub mod fir; pub mod iir; diff --git a/plugins/crossover/src/crossover/fir.rs b/plugins/crossover/src/crossover/fir.rs new file mode 100644 index 00000000..83c3aa00 --- /dev/null +++ b/plugins/crossover/src/crossover/fir.rs @@ -0,0 +1,317 @@ +// Crossover: clean crossovers as a multi-out plugin +// Copyright (C) 2022 Robbert van der Helm +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use nih_plug::buffer::ChannelSamples; +use nih_plug::debug::*; +use std::f32; +use std::simd::{f32x2, StdFloat}; + +use crate::biquad::{Biquad, BiquadCoefficients}; +use crate::NUM_BANDS; + +// TODO: These filters would be more efficient when processing four samples at a time instead of +// processing two channels at a time. But this keeps the interface nicer. + +/// The size of the FIR filter window, or the number of taps. +const FILTER_SIZE: usize = 121; +/// The size of the FIR filter's ring buffer. This is `FILTER_SIZE` rounded up to the next power of +/// two. +const RING_BUFFER_SIZE: usize = FILTER_SIZE.next_power_of_two(); + +#[derive(Debug)] +pub struct FirCrossover { + /// The kind of crossover to use. `.update_filters()` must be called after changing this. + mode: FirCrossoverType, + + /// Filters for each of the bands. Depending on the number of bands argument passed to + /// `.process()` two to five of these may be used. The first one always contains a low-pass + /// filter, the last one always contains a high-pass filter, while the other bands will contain + /// band-pass filters. + band_filters: [FirFilter; NUM_BANDS], +} + +/// The type of FIR crossover to use. +#[derive(Debug, Clone, Copy)] +pub enum FirCrossoverType { + /// Emulates the filter slope of [`super::iir::IirCrossoverType`], but with linear-phase FIR + /// filters instead of minimum-phase IIR filters. The exact same filters are used to design the + /// FIR filters. + LinkwitzRiley24LinearPhase, +} + +/// A single FIR filter that may be configured in any way. In this plugin this will be a +/// linear-phase low-pass, band-pass, or high-pass filter. +#[derive(Debug, Clone)] +struct FirFilter { + /// The coefficients for this filter. The filters for both channels should be equivalent, this + /// just avoids broadcasts in the filter process. + /// + /// TODO: Profile to see if storing this as f32x2 rather than f32s plus splatting makes any + /// difference in performance at all + coefficients: FirCoefficients, + + /// A ring buffer storing the last `FILTER_SIZE - 1` samples. The capacity is `FILTER_SIZE` + /// rounded up to the next power of two. + delay_buffer: [f32x2; RING_BUFFER_SIZE], + /// The index in `delay_buffer` to write the next sample to. Wrapping negative indices back to + /// the end, the previous sample can be found at `delay_buffer[delay_buffer_next_idx - 1]`, the + /// one before that at `delay_buffer[delay_buffer_next_idx - 2]`, and so on. + delay_buffer_next_idx: usize, +} + +/// Coefficients for an FIR filter. This struct includes ways to design the filter. Parameterized +/// over `f32x2` only for the time being since that's what we need here. +#[repr(transparent)] +#[derive(Debug, Clone)] +struct FirCoefficients([f32x2; FILTER_SIZE]); + +impl Default for FirFilter { + fn default() -> Self { + Self { + coefficients: FirCoefficients::default(), + delay_buffer: [f32x2::default(); RING_BUFFER_SIZE], + delay_buffer_next_idx: 0, + } + } +} + +impl Default for FirCoefficients { + fn default() -> Self { + // Initialize this to a delay with the same amount of latency as we'd introduce with our + // linear-phase filters + let mut coefficients = [f32x2::default(); FILTER_SIZE]; + coefficients[FILTER_SIZE / 2] = f32x2::splat(1.0); + + Self(coefficients) + } +} + +impl FirCrossover { + /// Create a new multiband crossover processor. All filters will be configured to pass audio + /// through as is, albeit with a delay. `.update()` needs to be called first to set up the + /// filters, and `.reset()` can be called whenever the filter state must be cleared. + /// + /// Make sure to add the latency reported by [`latency()`][Self::latency()] to the plugin's + /// reported latency. + pub fn new(mode: FirCrossoverType) -> Self { + Self { + mode, + band_filters: Default::default(), + } + } + + /// Get the current latency in samples. This depends on the selected mode. + pub fn latency(&self) -> usize { + // Actually, that's a lie, since we currently only do linear-phase filters with a constant + // size + match self.mode { + FirCrossoverType::LinkwitzRiley24LinearPhase => FILTER_SIZE / 2, + } + } + + /// Split the signal into bands using the crossovers previously configured through `.update()`. + /// The split bands will be written to `band_outputs`. `main_io` is not written to, and should + /// be cleared separately. + pub fn process( + &mut self, + num_bands: usize, + main_io: &ChannelSamples, + mut band_outputs: [ChannelSamples; NUM_BANDS], + ) { + nih_debug_assert!(num_bands >= 2); + nih_debug_assert!(num_bands <= NUM_BANDS); + // Required for the SIMD, so we'll just do a hard assert or the unchecked conversions will + // be unsound + assert!(main_io.len() == 2); + + let mut samples: f32x2 = unsafe { main_io.to_simd_unchecked() }; + match self.mode { + FirCrossoverType::LinkwitzRiley24LinearPhase => { + todo!(); + } + } + } + + /// Update the crossover frequencies for all filters. + pub fn update( + &mut self, + sample_rate: f32, + num_bands: usize, + frequencies: [f32; NUM_BANDS - 1], + ) { + match self.mode { + FirCrossoverType::LinkwitzRiley24LinearPhase => todo!(), + } + } + + /// Reset the internal filter state for all crossovers. + pub fn reset(&mut self) { + for filter in &mut self.band_filters { + filter.reset(); + } + } +} + +impl FirFilter { + /// Process left and right audio samples through the filter. + pub fn process(&mut self, samples: f32x2) -> f32x2 { + // TODO: Replace direct convolution with FFT convolution, would make the implementation much + // more complex though because of the multi output part + let coefficients = &self.coefficients.0; + let mut result = coefficients[0] * samples; + + // Now multiply `self.coefficients[1..]` with the delay buffer starting at + // `self.delay_buffer_next_idx - 1`, wrapping around to the end when that is reached + // The end index is exclusive, and we already did the multiply+add for the first coefficient. + let before_wraparound_start_idx = self + .delay_buffer_next_idx + .saturating_sub(coefficients.len() - 1); + let before_wraparound_end_idx = self.delay_buffer_next_idx; + let num_before_wraparound = before_wraparound_end_idx - before_wraparound_start_idx; + for (coefficient, delayed_sample) in coefficients[1..1 + num_before_wraparound].iter().zip( + self.delay_buffer[before_wraparound_start_idx..before_wraparound_end_idx] + .iter() + .rev(), + ) { + // `result += coefficient * sample`, but with explicit FMA + result = coefficient.mul_add(*delayed_sample, result); + } + + let after_wraparound_begin_idx = + self.delay_buffer.len() - (coefficients.len() - num_before_wraparound); + let after_wraparound_end_idx = self.delay_buffer.len(); + for (coefficient, delayed_sample) in coefficients[1 + num_before_wraparound..].iter().zip( + self.delay_buffer[after_wraparound_begin_idx..after_wraparound_end_idx] + .iter() + .rev(), + ) { + result = coefficient.mul_add(*delayed_sample, result); + } + + // And finally write the samples to the delay buffer for the enxt sample + self.delay_buffer[self.delay_buffer_next_idx] = samples; + self.delay_buffer_next_idx = (self.delay_buffer_next_idx + 1) % self.delay_buffer.len(); + + result + } + + /// Update the coefficients for all filters in the crossover. + pub fn update_coefficients(&mut self, coefs: FirCoefficients) { + self.coefficients = coefs; + } + + /// Reset the internal filter state. + pub fn reset(&mut self) { + self.delay_buffer.fill(f32x2::default()); + self.delay_buffer_next_idx = 0; + } +} + +impl FirCoefficients { + /// A somewhat crude but very functional and relatively fast way create a linear phase FIR + /// **low-pass** filter that matches the frequency response of a biquad filter. This normalizes + /// the result, so biquad coefficients for high- and band-pass filters will not work correctly. + /// The algorithm works as follows: + /// + /// - An impulse function (so all zeroes except for the first element) of length `FILTER_LEN / 2 + /// + 1` is filtered with the biquad. + /// - The biquad's state is reset, and the impulse response is filtered in the opposite + /// direction. + /// - At this point the bidirectionally filtered impulse response contains the **right** half of + /// a truncated linear phase FIR kernel. + /// + /// Since the FIR filter will be a symmetrical version of this impulse response, we can optimize + /// the post-processing work slightly by windowing and normalizing this bidirectionally filtered + /// impulse response instead. + /// + /// - A half Blackman window is applied to the impulse response. Since this is the right half, + /// this starts at unity gain for the first sample and then tapers off towards the right. + /// - The impulse response is then normalized such that the final linear-phase FIR kernel has a + /// sum of 1.0. Since it will be symmetrical around the IRs first sample, the would-be final + /// sum can be computed as `ir.sum() * 2 - ir[0]`> + /// + /// Lastly the linear phase FIR filter simply needs to be constructed from this right half: + /// + /// - This bidirectionally filtered impulse response is then reversed, and placed at the start + /// of the `FILTER_LEN` size FIR coefficient array. + /// - The non-reversed bidirectionally filtered impulse response is copied to the second half of + /// the coefficients. (one of the copies doesn't need to include the centermost coefficient) + /// + /// The corresponding high-pass filter can be computed through spectral inversion. + pub fn design_linear_phase_low_pass_from_biquad( + biquad_coefs: BiquadCoefficients, + ) -> Self { + // We'll start with an impulse... + let mut impulse_response = [f32x2::default(); FILTER_SIZE / 2 + 1]; + impulse_response[0] = f32x2::splat(1.0); + + // ...and filter that in both directions + let mut biquad = Biquad::default(); + biquad.coefficients = biquad_coefs; + for sample in impulse_response.iter_mut() { + *sample = biquad.process(*sample); + } + + biquad.reset(); + for sample in impulse_response.iter_mut().rev() { + *sample = biquad.process(*sample); + } + + // Now `impulse_response` contains a truncated right half of the linear-phase FIR filter. We + // can apply the window function here, and then normalize it so that the the final FIR + // filter kernel sums to 1. + + // Adopted from `nih_plug::util::window` + let blackman_scale_1 = (2.0 * f32::consts::PI) / (impulse_response.len() - 1) as f32; + let blackman_scale_2 = blackman_scale_1 * 2.0; + // We only apply the right half of the window, starting at the top of the window + let blackman_offset = impulse_response.len() / 2; + for (sample_idx, sample) in impulse_response.iter_mut().enumerate() { + let i = sample_idx + blackman_offset; + let cos_1 = (blackman_scale_1 * i as f32).cos(); + let cos_2 = (blackman_scale_2 * i as f32).cos(); + *sample *= f32x2::splat(0.42 - (0.5 * cos_1) + (0.08 * cos_2)); + } + + // Since this final filter will be symmetrical around + // `impulse_response[0]`, we can simply normalized based on that fact: + let would_be_coefficients_sum = + impulse_response.iter().sum::() * f32x2::splat(2.0) - impulse_response[0]; + let would_be_coefficients_recip = would_be_coefficients_sum.recip(); + for sample in &mut impulse_response { + *sample *= would_be_coefficients_recip; + } + + // And finally we can simply build the filter from the processed impulse response (which, + // again, corresponds to the right half of the final linear-phase filter kernel with the + // first sample in the IR being the middlemost element in the kernel) + let mut coefficients = [f32x2::default(); FILTER_SIZE]; + for (coefficient, ir_sample) in coefficients + .iter_mut() + .take(impulse_response.len() / 2 - 1) + // We won't copy the very first sample of the IR here, that will be part of the second + // (non-reversed) half + .zip(impulse_response.iter().skip(1).rev()) + { + *coefficient = *ir_sample; + } + + // And the second half can be a simple memcpy + coefficients[impulse_response.len() / 2..].copy_from_slice(&impulse_response); + + Self(coefficients) + } +} diff --git a/plugins/crossover/src/crossover/iir.rs b/plugins/crossover/src/crossover/iir.rs index c8784cad..f668f17e 100644 --- a/plugins/crossover/src/crossover/iir.rs +++ b/plugins/crossover/src/crossover/iir.rs @@ -75,7 +75,7 @@ struct AllPassCascade { impl IirCrossover { /// Create a new multiband crossover processor. All filters will be configured to pass audio - /// through as it. `.update()` needs to be called first to set up the filters, and `.reset()` + /// through as is. `.update()` needs to be called first to set up the filters, and `.reset()` /// can be called whenever the filter state must be cleared. pub fn new(mode: IirCrossoverType) -> Self { Self { @@ -126,9 +126,7 @@ impl IirCrossover { } } - /// Update the crossover frequencies for all filters. If the frequencies are not monotonic then - /// this function will ensure that they are. The active number of bands is used to make sure - /// unused bands are not part of the normalization. + /// Update the crossover frequencies for all filters. pub fn update( &mut self, sample_rate: f32,