diff --git a/plugins/crossover/src/crossover/fir.rs b/plugins/crossover/src/crossover/fir.rs index 174321a3..c715c723 100644 --- a/plugins/crossover/src/crossover/fir.rs +++ b/plugins/crossover/src/crossover/fir.rs @@ -17,11 +17,14 @@ use nih_plug::buffer::ChannelSamples; use nih_plug::debug::*; use std::f32; -use std::simd::{f32x2, StdFloat}; +use std::simd::f32x2; -use crate::crossover::iir::biquad::{Biquad, BiquadCoefficients, NEUTRAL_Q}; +use self::filter::{FirCoefficients, FirFilter}; +use crate::crossover::iir::biquad::{BiquadCoefficients, NEUTRAL_Q}; use crate::NUM_BANDS; +pub mod filter; + // TODO: Move this to FFT convolution so we can increase the filter size and improve low latency performance /// The size of the FIR filter window, or the number of taps. The low frequency performance is @@ -52,53 +55,6 @@ pub enum FirCrossoverType { LinkwitzRiley24LinearPhase, } -/// A single FIR filter that may be configured in any way. In this plugin this will be a -/// linear-phase low-pass, band-pass, or high-pass filter. -#[derive(Debug, Clone)] -struct FirFilter { - /// The coefficients for this filter. The filters for both channels should be equivalent, this - /// just avoids broadcasts in the filter process. - /// - /// TODO: Profile to see if storing this as f32x2 rather than f32s plus splatting makes any - /// difference in performance at all - pub coefficients: FirCoefficients, - - /// A ring buffer storing the last `FILTER_SIZE - 1` samples. The capacity is `FILTER_SIZE` - /// rounded up to the next power of two. - delay_buffer: [f32x2; RING_BUFFER_SIZE], - /// The index in `delay_buffer` to write the next sample to. Wrapping negative indices back to - /// the end, the previous sample can be found at `delay_buffer[delay_buffer_next_idx - 1]`, the - /// one before that at `delay_buffer[delay_buffer_next_idx - 2]`, and so on. - delay_buffer_next_idx: usize, -} - -/// Coefficients for an FIR filter. This struct includes ways to design the filter. Parameterized -/// over `f32x2` only for the time being since that's what we need here. -#[repr(transparent)] -#[derive(Debug, Clone)] -struct FirCoefficients([f32x2; FILTER_SIZE]); - -impl Default for FirFilter { - fn default() -> Self { - Self { - coefficients: FirCoefficients::default(), - delay_buffer: [f32x2::default(); RING_BUFFER_SIZE], - delay_buffer_next_idx: 0, - } - } -} - -impl Default for FirCoefficients { - fn default() -> Self { - // Initialize this to a delay with the same amount of latency as we'd introduce with our - // linear-phase filters - let mut coefficients = [f32x2::default(); FILTER_SIZE]; - coefficients[FILTER_SIZE / 2] = f32x2::splat(1.0); - - Self(coefficients) - } -} - impl FirCrossover { /// Create a new multiband crossover processor. All filters will be configured to pass audio /// through as is, albeit with a delay. `.update()` needs to be called first to set up the @@ -258,141 +214,3 @@ impl FirCrossover { } } } - -impl FirFilter { - /// Process left and right audio samples through the filter. - pub fn process(&mut self, samples: f32x2) -> f32x2 { - // TODO: Replace direct convolution with FFT convolution, would make the implementation much - // more complex though because of the multi output part - let coefficients = &self.coefficients.0; - let mut result = coefficients[0] * samples; - - // Now multiply `self.coefficients[1..]` with the delay buffer starting at - // `self.delay_buffer_next_idx - 1`, wrapping around to the end when that is reached - // The end index is exclusive, and we already did the multiply+add for the first coefficient. - let before_wraparound_start_idx = self - .delay_buffer_next_idx - .saturating_sub(coefficients.len() - 1); - let before_wraparound_end_idx = self.delay_buffer_next_idx; - let num_before_wraparound = before_wraparound_end_idx - before_wraparound_start_idx; - for (coefficient, delayed_sample) in coefficients[1..1 + num_before_wraparound].iter().zip( - self.delay_buffer[before_wraparound_start_idx..before_wraparound_end_idx] - .iter() - .rev(), - ) { - // `result += coefficient * sample`, but with explicit FMA - result = coefficient.mul_add(*delayed_sample, result); - } - - let after_wraparound_begin_idx = - self.delay_buffer.len() - (coefficients.len() - num_before_wraparound); - let after_wraparound_end_idx = self.delay_buffer.len(); - for (coefficient, delayed_sample) in coefficients[1 + num_before_wraparound..].iter().zip( - self.delay_buffer[after_wraparound_begin_idx..after_wraparound_end_idx] - .iter() - .rev(), - ) { - result = coefficient.mul_add(*delayed_sample, result); - } - - // And finally write the samples to the delay buffer for the enxt sample - self.delay_buffer[self.delay_buffer_next_idx] = samples; - self.delay_buffer_next_idx = (self.delay_buffer_next_idx + 1) % self.delay_buffer.len(); - - result - } - - /// Reset the internal filter state. - pub fn reset(&mut self) { - self.delay_buffer.fill(f32x2::default()); - self.delay_buffer_next_idx = 0; - } -} - -impl FirCoefficients { - /// A somewhat crude but very functional and relatively fast way create linear phase FIR - /// **low-pass** filter that matches the frequency response of a fourth order biquad low-pass - /// filter. As in, this matches the frequency response magnitudes of applying those biquads to a - /// signal twice. This only works for low-pass filters, as the function normalizes the result to - /// hae unity gain at the DC bin. The algorithm works as follows: - /// - /// - An impulse function (so all zeroes except for the first element) of length `FILTER_LEN / 2 - /// + 1` is filtered with the biquad. - /// - The biquad's state is reset, and the impulse response is filtered in the opposite - /// direction. - /// - At this point the bidirectionally filtered impulse response contains the **right** half of - /// a truncated linear phase FIR kernel. - /// - /// Since the FIR filter will be a symmetrical version of this impulse response, we can optimize - /// the post-processing work slightly by windowing and normalizing this bidirectionally filtered - /// impulse response instead. - /// - /// - A half Blackman window is applied to the impulse response. Since this is the right half, - /// this starts at unity gain for the first sample and then tapers off towards the right. - /// - The impulse response is then normalized such that the final linear-phase FIR kernel has a - /// sum of 1.0. Since it will be symmetrical around the IRs first sample, the would-be final - /// sum can be computed as `ir.sum() * 2 - ir[0]`. - /// - /// Lastly the linear phase FIR filter simply needs to be constructed from this right half: - /// - /// - This bidirectionally filtered impulse response is then reversed, and placed at the start - /// of the `FILTER_LEN` size FIR coefficient array. - /// - The non-reversed bidirectionally filtered impulse response is copied to the second half of - /// the coefficients. (one of the copies doesn't need to include the centermost coefficient) - /// - /// The corresponding high-pass filter can be computed through spectral inversion. - pub fn design_fourth_order_linear_phase_low_pass_from_biquad( - biquad_coefs: BiquadCoefficients, - ) -> Self { - const CENTER_IDX: usize = FILTER_SIZE / 2; - - // We'll start with an impulse (at exactly half of this odd sized buffer)... - let mut impulse_response = [f32x2::default(); FILTER_SIZE]; - impulse_response[CENTER_IDX] = f32x2::splat(1.0); - - // ...and filter that in both directions - let mut biquad = Biquad::default(); - biquad.coefficients = biquad_coefs; - for sample in impulse_response.iter_mut().skip(CENTER_IDX - 1) { - *sample = biquad.process(*sample); - } - - biquad.reset(); - for sample in impulse_response.iter_mut().skip(CENTER_IDX - 1).rev() { - *sample = biquad.process(*sample); - } - - // Now the right half of `impulse_response` contains a truncated right half of the - // linear-phase FIR filter. We can apply the window function here, and then fianlly - // normalize it so that the the final FIR filter kernel sums to 1. - - // Adopted from `nih_plug::util::window`. We only end up applying the right half of the - // window, starting at the top of the window. - let blackman_scale_1 = (2.0 * f32::consts::PI) / (impulse_response.len() - 1) as f32; - let blackman_scale_2 = blackman_scale_1 * 2.0; - for (sample_idx, sample) in impulse_response.iter_mut().enumerate().skip(CENTER_IDX - 1) { - let cos_1 = (blackman_scale_1 * sample_idx as f32).cos(); - let cos_2 = (blackman_scale_2 * sample_idx as f32).cos(); - *sample *= f32x2::splat(0.42 - (0.5 * cos_1) + (0.08 * cos_2)); - } - - // Since this final filter will be symmetrical around `impulse_response[CENTER_IDX]`, we - // can simply normalize based on that fact: - let would_be_impulse_response_sum = - (impulse_response.iter().skip(CENTER_IDX).sum::() * f32x2::splat(2.0)) - - impulse_response[CENTER_IDX]; - let would_be_impulse_response_recip = would_be_impulse_response_sum.recip(); - for sample in &mut impulse_response { - *sample *= would_be_impulse_response_recip; - } - - // And finally we can simply copy the right half of the filter kernel to the left half - // around the `CENTER_IDX`. - for source_idx in CENTER_IDX + 1..impulse_response.len() { - let target_idx = CENTER_IDX - (source_idx - CENTER_IDX); - impulse_response[target_idx] = impulse_response[source_idx]; - } - - Self(impulse_response) - } -} diff --git a/plugins/crossover/src/crossover/fir/filter.rs b/plugins/crossover/src/crossover/fir/filter.rs new file mode 100644 index 00000000..957b3028 --- /dev/null +++ b/plugins/crossover/src/crossover/fir/filter.rs @@ -0,0 +1,206 @@ +// Crossover: clean crossovers as a multi-out plugin +// Copyright (C) 2022 Robbert van der Helm +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +use std::f32; +use std::simd::{f32x2, StdFloat}; + +use super::{FILTER_SIZE, RING_BUFFER_SIZE}; +use crate::crossover::iir::biquad::{Biquad, BiquadCoefficients}; + +/// A single FIR filter that may be configured in any way. In this plugin this will be a +/// linear-phase low-pass, band-pass, or high-pass filter. +#[derive(Debug, Clone)] +pub struct FirFilter { + /// The coefficients for this filter. The filters for both channels should be equivalent, this + /// just avoids broadcasts in the filter process. + /// + /// TODO: Profile to see if storing this as f32x2 rather than f32s plus splatting makes any + /// difference in performance at all + pub coefficients: FirCoefficients, + + /// A ring buffer storing the last `FILTER_SIZE - 1` samples. The capacity is `FILTER_SIZE` + /// rounded up to the next power of two. + delay_buffer: [f32x2; RING_BUFFER_SIZE], + /// The index in `delay_buffer` to write the next sample to. Wrapping negative indices back to + /// the end, the previous sample can be found at `delay_buffer[delay_buffer_next_idx - 1]`, the + /// one before that at `delay_buffer[delay_buffer_next_idx - 2]`, and so on. + delay_buffer_next_idx: usize, +} + +/// Coefficients for an FIR filter. This struct includes ways to design the filter. Parameterized +/// over `f32x2` only for the time being since that's what we need here. +#[repr(transparent)] +#[derive(Debug, Clone)] +pub struct FirCoefficients(pub [f32x2; FILTER_SIZE]); + +impl Default for FirFilter { + fn default() -> Self { + Self { + coefficients: FirCoefficients::default(), + delay_buffer: [f32x2::default(); RING_BUFFER_SIZE], + delay_buffer_next_idx: 0, + } + } +} + +impl Default for FirCoefficients { + fn default() -> Self { + // Initialize this to a delay with the same amount of latency as we'd introduce with our + // linear-phase filters + let mut coefficients = [f32x2::default(); FILTER_SIZE]; + coefficients[FILTER_SIZE / 2] = f32x2::splat(1.0); + + Self(coefficients) + } +} + +impl FirFilter { + /// Process left and right audio samples through the filter. + pub fn process(&mut self, samples: f32x2) -> f32x2 { + // TODO: Replace direct convolution with FFT convolution, would make the implementation much + // more complex though because of the multi output part + let coefficients = &self.coefficients.0; + let mut result = coefficients[0] * samples; + + // Now multiply `self.coefficients[1..]` with the delay buffer starting at + // `self.delay_buffer_next_idx - 1`, wrapping around to the end when that is reached + // The end index is exclusive, and we already did the multiply+add for the first coefficient. + let before_wraparound_start_idx = self + .delay_buffer_next_idx + .saturating_sub(coefficients.len() - 1); + let before_wraparound_end_idx = self.delay_buffer_next_idx; + let num_before_wraparound = before_wraparound_end_idx - before_wraparound_start_idx; + for (coefficient, delayed_sample) in coefficients[1..1 + num_before_wraparound].iter().zip( + self.delay_buffer[before_wraparound_start_idx..before_wraparound_end_idx] + .iter() + .rev(), + ) { + // `result += coefficient * sample`, but with explicit FMA + result = coefficient.mul_add(*delayed_sample, result); + } + + let after_wraparound_begin_idx = + self.delay_buffer.len() - (coefficients.len() - num_before_wraparound); + let after_wraparound_end_idx = self.delay_buffer.len(); + for (coefficient, delayed_sample) in coefficients[1 + num_before_wraparound..].iter().zip( + self.delay_buffer[after_wraparound_begin_idx..after_wraparound_end_idx] + .iter() + .rev(), + ) { + result = coefficient.mul_add(*delayed_sample, result); + } + + // And finally write the samples to the delay buffer for the enxt sample + self.delay_buffer[self.delay_buffer_next_idx] = samples; + self.delay_buffer_next_idx = (self.delay_buffer_next_idx + 1) % self.delay_buffer.len(); + + result + } + + /// Reset the internal filter state. + pub fn reset(&mut self) { + self.delay_buffer.fill(f32x2::default()); + self.delay_buffer_next_idx = 0; + } +} + +impl FirCoefficients { + /// A somewhat crude but very functional and relatively fast way create linear phase FIR + /// **low-pass** filter that matches the frequency response of a fourth order biquad low-pass + /// filter. As in, this matches the frequency response magnitudes of applying those biquads to a + /// signal twice. This only works for low-pass filters, as the function normalizes the result to + /// hae unity gain at the DC bin. The algorithm works as follows: + /// + /// - An impulse function (so all zeroes except for the first element) of length `FILTER_LEN / 2 + /// + 1` is filtered with the biquad. + /// - The biquad's state is reset, and the impulse response is filtered in the opposite + /// direction. + /// - At this point the bidirectionally filtered impulse response contains the **right** half of + /// a truncated linear phase FIR kernel. + /// + /// Since the FIR filter will be a symmetrical version of this impulse response, we can optimize + /// the post-processing work slightly by windowing and normalizing this bidirectionally filtered + /// impulse response instead. + /// + /// - A half Blackman window is applied to the impulse response. Since this is the right half, + /// this starts at unity gain for the first sample and then tapers off towards the right. + /// - The impulse response is then normalized such that the final linear-phase FIR kernel has a + /// sum of 1.0. Since it will be symmetrical around the IRs first sample, the would-be final + /// sum can be computed as `ir.sum() * 2 - ir[0]`. + /// + /// Lastly the linear phase FIR filter simply needs to be constructed from this right half: + /// + /// - This bidirectionally filtered impulse response is then reversed, and placed at the start + /// of the `FILTER_LEN` size FIR coefficient array. + /// - The non-reversed bidirectionally filtered impulse response is copied to the second half of + /// the coefficients. (one of the copies doesn't need to include the centermost coefficient) + /// + /// The corresponding high-pass filter can be computed through spectral inversion. + pub fn design_fourth_order_linear_phase_low_pass_from_biquad( + biquad_coefs: BiquadCoefficients, + ) -> Self { + const CENTER_IDX: usize = FILTER_SIZE / 2; + + // We'll start with an impulse (at exactly half of this odd sized buffer)... + let mut impulse_response = [f32x2::default(); FILTER_SIZE]; + impulse_response[CENTER_IDX] = f32x2::splat(1.0); + + // ...and filter that in both directions + let mut biquad = Biquad::default(); + biquad.coefficients = biquad_coefs; + for sample in impulse_response.iter_mut().skip(CENTER_IDX - 1) { + *sample = biquad.process(*sample); + } + + biquad.reset(); + for sample in impulse_response.iter_mut().skip(CENTER_IDX - 1).rev() { + *sample = biquad.process(*sample); + } + + // Now the right half of `impulse_response` contains a truncated right half of the + // linear-phase FIR filter. We can apply the window function here, and then fianlly + // normalize it so that the the final FIR filter kernel sums to 1. + + // Adopted from `nih_plug::util::window`. We only end up applying the right half of the + // window, starting at the top of the window. + let blackman_scale_1 = (2.0 * f32::consts::PI) / (impulse_response.len() - 1) as f32; + let blackman_scale_2 = blackman_scale_1 * 2.0; + for (sample_idx, sample) in impulse_response.iter_mut().enumerate().skip(CENTER_IDX - 1) { + let cos_1 = (blackman_scale_1 * sample_idx as f32).cos(); + let cos_2 = (blackman_scale_2 * sample_idx as f32).cos(); + *sample *= f32x2::splat(0.42 - (0.5 * cos_1) + (0.08 * cos_2)); + } + + // Since this final filter will be symmetrical around `impulse_response[CENTER_IDX]`, we + // can simply normalize based on that fact: + let would_be_impulse_response_sum = + (impulse_response.iter().skip(CENTER_IDX).sum::() * f32x2::splat(2.0)) + - impulse_response[CENTER_IDX]; + let would_be_impulse_response_recip = would_be_impulse_response_sum.recip(); + for sample in &mut impulse_response { + *sample *= would_be_impulse_response_recip; + } + + // And finally we can simply copy the right half of the filter kernel to the left half + // around the `CENTER_IDX`. + for source_idx in CENTER_IDX + 1..impulse_response.len() { + let target_idx = CENTER_IDX - (source_idx - CENTER_IDX); + impulse_response[target_idx] = impulse_response[source_idx]; + } + + Self(impulse_response) + } +} diff --git a/src/event_loop/linux.rs b/src/event_loop/linux.rs index 83ea9726..2f496ad0 100644 --- a/src/event_loop/linux.rs +++ b/src/event_loop/linux.rs @@ -7,7 +7,6 @@ use std::sync::Weak; use std::thread::{self, JoinHandle, ThreadId}; use super::{EventLoop, MainThreadExecutor}; -use crate::nih_log; use crate::util::permit_alloc; /// See [`EventLoop`][super::EventLoop].