From d0f1a792799fa32e620bb9dcb1623163112b8314 Mon Sep 17 00:00:00 2001 From: Robbert van der Helm Date: Tue, 15 Feb 2022 18:04:26 +0100 Subject: [PATCH] Use SIMD for Diopser It's pretty damn fast now, especially compared to the JUCE version. --- plugins/diopser/README.md | 3 ++- plugins/diopser/src/lib.rs | 54 ++++++++++++++++++++++---------------- 2 files changed, 34 insertions(+), 23 deletions(-) diff --git a/plugins/diopser/README.md b/plugins/diopser/README.md index 5a72e9bd..fa9280c4 100644 --- a/plugins/diopser/README.md +++ b/plugins/diopser/README.md @@ -11,7 +11,8 @@ like a cartoon laser beam, or a psytrance kickdrum. If you are experimenting with those kinds of settings, then you may want to consider temporarily placing a peak limiter after the plugin in case loud resonances start building up. -This is a port from https://github.com/robbert-vdh/diopser. +This is a port from https://github.com/robbert-vdh/diopser with more features +and much better performance. *Disperser is a trademark of Kilohearts AB. Diopser is in no way related to diff --git a/plugins/diopser/src/lib.rs b/plugins/diopser/src/lib.rs index 58f1c6ac..9cf52af4 100644 --- a/plugins/diopser/src/lib.rs +++ b/plugins/diopser/src/lib.rs @@ -22,6 +22,7 @@ use nih_plug::{ }; use nih_plug::{BoolParam, FloatParam, IntParam, Params, Range, SmoothingStyle}; use nih_plug::{Enum, EnumParam}; +use packed_simd::f32x2; use std::pin::Pin; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::Arc; @@ -49,9 +50,12 @@ struct Diopser { /// Needed for computing the filter coefficients. sample_rate: f32, - /// All of the all-pass filters, with one array of serial filters per channelq. - /// [DiopserParams::num_stages] controls how many filters are actually active. - filters: Vec<[filter::Biquad; MAX_NUM_FILTERS]>, + /// All of the all-pass filters, with vectorized coefficients so they can be calculated for + /// multiple channels at once. [DiopserParams::num_stages] controls how many filters are + /// actually active. + // FIXME: This was the scalar version, maybe add this back at some point. + // filters: Vec<[filter::Biquad; MAX_NUM_FILTERS]>, + filters: [filter::Biquad; MAX_NUM_FILTERS], /// If this is set at the start of the processing cycle, then the filter coefficients should be /// updated. For the regular filter parameters we can look at the smoothers, but this is needed /// when changing the number of active filters. @@ -111,7 +115,7 @@ impl Default for Diopser { sample_rate: 1.0, - filters: Vec::new(), + filters: [filter::Biquad::default(); MAX_NUM_FILTERS], should_update_filters, next_filter_smoothing_in: 1, } @@ -216,19 +220,17 @@ impl Plugin for Diopser { } fn accepts_bus_config(&self, config: &BusConfig) -> bool { - // This works with any symmetrical IO layout - config.num_input_channels == config.num_output_channels && config.num_input_channels > 0 + // FIXME: The scalar version would work for any IO layout, but this SIMD version can only do + // stereo + config.num_input_channels == config.num_output_channels && config.num_input_channels == 2 } fn initialize( &mut self, - bus_config: &BusConfig, + _bus_config: &BusConfig, buffer_config: &BufferConfig, _context: &mut impl ProcessContext, ) -> bool { - self.filters = - vec![[Default::default(); MAX_NUM_FILTERS]; bus_config.num_input_channels as usize]; - // Initialize the filters on the first process call self.sample_rate = buffer_config.sample_rate; self.should_update_filters.store(true, Ordering::Release); @@ -250,16 +252,25 @@ impl Plugin for Diopser { for mut channel_samples in buffer.iter_mut() { self.maybe_update_filters(smoothing_interval); - // We get better cache locality by iterating over the filters and then over the channels - for filter_idx in 0..self.params.filter_stages.value as usize { - for (channel_idx, filters) in self.filters.iter_mut().enumerate() { - // We can also use `channel_samples.iter_mut()`, but the compiler isn't able to - // optmize that iterator away and it would add a ton of overhead over indexing - // the buffer directly - let sample = unsafe { channel_samples.get_unchecked_mut(channel_idx) }; - *sample = filters[filter_idx].process(*sample); - } + // We can compute the filters for both channels at once. This version thus now only + // supports stero audio. + let mut samples = + f32x2::new(*unsafe { channel_samples.get_unchecked_mut(0) }, *unsafe { + channel_samples.get_unchecked_mut(1) + }); + + // Iterating over the filters and then over the channels would get us better cache + // locality even in the scalar version + for filter in self + .filters + .iter_mut() + .take(self.params.filter_stages.value as usize) + { + samples = filter.process(samples); } + + *unsafe { channel_samples.get_unchecked_mut(0) } = samples.extract(0); + *unsafe { channel_samples.get_unchecked_mut(1) } = samples.extract(1); } ProcessStatus::Normal @@ -325,11 +336,10 @@ impl Diopser { } .clamp(MIN_FREQUENCY, max_frequency); + // In the scalar version we'd update every channel's filter's coefficients gere let coefficients = filter::BiquadCoefficients::allpass(self.sample_rate, filter_frequency, resonance); - for channel in self.filters.iter_mut() { - channel[filter_idx].coefficients = coefficients; - } + self.filters[filter_idx].coefficients = coefficients; } } }