bbx_core/
simd.rs

1//! Portable SIMD utilities for audio DSP.
2//!
3//! This module provides SIMD-accelerated operations for common DSP tasks.
4//! Requires the `simd` feature and nightly Rust.
5
6use std::simd::{StdFloat, f32x4, f64x4};
7
8use crate::sample::{SIMD_LANES, Sample};
9
10pub const F32_LANES: usize = 4;
11pub const F64_LANES: usize = 4;
12
13#[inline]
14pub fn fill_f32(slice: &mut [f32], value: f32) {
15    let vec = f32x4::splat(value);
16    let (chunks, remainder) = slice.as_chunks_mut::<F32_LANES>();
17
18    for chunk in chunks {
19        *chunk = vec.to_array();
20    }
21    remainder.fill(value);
22}
23
24#[inline]
25pub fn fill_f64(slice: &mut [f64], value: f64) {
26    let vec = f64x4::splat(value);
27    let (chunks, remainder) = slice.as_chunks_mut::<F64_LANES>();
28
29    for chunk in chunks {
30        *chunk = vec.to_array();
31    }
32    remainder.fill(value);
33}
34
35#[inline]
36pub fn apply_gain_f32(input: &[f32], output: &mut [f32], gain: f32) {
37    debug_assert!(input.len() <= output.len());
38
39    let gain_vec = f32x4::splat(gain);
40    let len = input.len();
41    let chunks = len / F32_LANES;
42    let remainder_start = chunks * F32_LANES;
43
44    for i in 0..chunks {
45        let offset = i * F32_LANES;
46        let in_chunk = f32x4::from_slice(&input[offset..]);
47        let result = in_chunk * gain_vec;
48        output[offset..offset + F32_LANES].copy_from_slice(&result.to_array());
49    }
50
51    for i in remainder_start..len {
52        output[i] = input[i] * gain;
53    }
54}
55
56#[inline]
57pub fn apply_gain_f64(input: &[f64], output: &mut [f64], gain: f64) {
58    debug_assert!(input.len() <= output.len());
59
60    let gain_vec = f64x4::splat(gain);
61    let len = input.len();
62    let chunks = len / F64_LANES;
63    let remainder_start = chunks * F64_LANES;
64
65    for i in 0..chunks {
66        let offset = i * F64_LANES;
67        let in_chunk = f64x4::from_slice(&input[offset..]);
68        let result = in_chunk * gain_vec;
69        output[offset..offset + F64_LANES].copy_from_slice(&result.to_array());
70    }
71
72    for i in remainder_start..len {
73        output[i] = input[i] * gain;
74    }
75}
76
77#[inline]
78pub fn multiply_add_f32(a: &[f32], b: &[f32], output: &mut [f32]) {
79    debug_assert!(a.len() == b.len());
80    debug_assert!(a.len() <= output.len());
81
82    let len = a.len();
83    let chunks = len / F32_LANES;
84    let remainder_start = chunks * F32_LANES;
85
86    for i in 0..chunks {
87        let offset = i * F32_LANES;
88        let a_chunk = f32x4::from_slice(&a[offset..]);
89        let b_chunk = f32x4::from_slice(&b[offset..]);
90        let result = a_chunk * b_chunk;
91        output[offset..offset + F32_LANES].copy_from_slice(&result.to_array());
92    }
93
94    for i in remainder_start..len {
95        output[i] = a[i] * b[i];
96    }
97}
98
99#[inline]
100pub fn multiply_add_f64(a: &[f64], b: &[f64], output: &mut [f64]) {
101    debug_assert!(a.len() == b.len());
102    debug_assert!(a.len() <= output.len());
103
104    let len = a.len();
105    let chunks = len / F64_LANES;
106    let remainder_start = chunks * F64_LANES;
107
108    for i in 0..chunks {
109        let offset = i * F64_LANES;
110        let a_chunk = f64x4::from_slice(&a[offset..]);
111        let b_chunk = f64x4::from_slice(&b[offset..]);
112        let result = a_chunk * b_chunk;
113        output[offset..offset + F64_LANES].copy_from_slice(&result.to_array());
114    }
115
116    for i in remainder_start..len {
117        output[i] = a[i] * b[i];
118    }
119}
120
121pub fn sin_f32(input: &[f32], output: &mut [f32]) {
122    debug_assert!(input.len() <= output.len());
123
124    let len = input.len();
125    let chunks = len / F32_LANES;
126    let remainder_start = chunks * F32_LANES;
127
128    for i in 0..chunks {
129        let offset = i * F32_LANES;
130        let in_chunk = f32x4::from_slice(&input[offset..]);
131        let result = in_chunk.sin();
132        output[offset..offset + F32_LANES].copy_from_slice(&result.to_array());
133    }
134
135    for i in remainder_start..len {
136        output[i] = input[i].sin();
137    }
138}
139
140pub fn sin_f64(input: &[f64], output: &mut [f64]) {
141    debug_assert!(input.len() <= output.len());
142
143    let len = input.len();
144    let chunks = len / F64_LANES;
145    let remainder_start = chunks * F64_LANES;
146
147    for i in 0..chunks {
148        let offset = i * F64_LANES;
149        let in_chunk = f64x4::from_slice(&input[offset..]);
150        let result = in_chunk.sin();
151        output[offset..offset + F64_LANES].copy_from_slice(&result.to_array());
152    }
153
154    for i in remainder_start..len {
155        output[i] = input[i].sin();
156    }
157}
158
159// =============================================================================
160// Generic SIMD operations using Sample trait
161// =============================================================================
162
163/// Fill a slice with a constant value using SIMD.
164#[inline]
165pub fn fill<S: Sample>(slice: &mut [S], value: S) {
166    let vec = S::simd_splat(value);
167    let chunks = slice.len() / SIMD_LANES;
168    let remainder_start = chunks * SIMD_LANES;
169
170    for i in 0..chunks {
171        let offset = i * SIMD_LANES;
172        slice[offset..offset + SIMD_LANES].copy_from_slice(&S::simd_to_array(vec));
173    }
174    slice[remainder_start..].fill(value);
175}
176
177/// Apply a gain value to an input slice and write to output using SIMD.
178#[inline]
179pub fn apply_gain<S: Sample>(input: &[S], output: &mut [S], gain: S)
180where
181    S::Simd: std::ops::Mul<Output = S::Simd>,
182{
183    debug_assert!(input.len() <= output.len());
184
185    let gain_vec = S::simd_splat(gain);
186    let len = input.len();
187    let chunks = len / SIMD_LANES;
188    let remainder_start = chunks * SIMD_LANES;
189
190    for i in 0..chunks {
191        let offset = i * SIMD_LANES;
192        let in_chunk = S::simd_from_slice(&input[offset..]);
193        let result = in_chunk * gain_vec;
194        output[offset..offset + SIMD_LANES].copy_from_slice(&S::simd_to_array(result));
195    }
196
197    for i in remainder_start..len {
198        output[i] = input[i] * gain;
199    }
200}
201
202/// Element-wise multiply two slices and write to output using SIMD.
203#[inline]
204pub fn multiply_add<S: Sample>(a: &[S], b: &[S], output: &mut [S])
205where
206    S::Simd: std::ops::Mul<Output = S::Simd>,
207{
208    debug_assert!(a.len() == b.len());
209    debug_assert!(a.len() <= output.len());
210
211    let len = a.len();
212    let chunks = len / SIMD_LANES;
213    let remainder_start = chunks * SIMD_LANES;
214
215    for i in 0..chunks {
216        let offset = i * SIMD_LANES;
217        let a_chunk = S::simd_from_slice(&a[offset..]);
218        let b_chunk = S::simd_from_slice(&b[offset..]);
219        let result = a_chunk * b_chunk;
220        output[offset..offset + SIMD_LANES].copy_from_slice(&S::simd_to_array(result));
221    }
222
223    for i in remainder_start..len {
224        output[i] = a[i] * b[i];
225    }
226}
227
228/// Compute sine of each element using SIMD.
229pub fn sin<S: Sample>(input: &[S], output: &mut [S]) {
230    debug_assert!(input.len() <= output.len());
231
232    let len = input.len();
233    let chunks = len / SIMD_LANES;
234    let remainder_start = chunks * SIMD_LANES;
235
236    for i in 0..chunks {
237        let offset = i * SIMD_LANES;
238        let in_chunk = S::simd_from_slice(&input[offset..]);
239        let result = in_chunk.sin();
240        output[offset..offset + SIMD_LANES].copy_from_slice(&S::simd_to_array(result));
241    }
242
243    for i in remainder_start..len {
244        output[i] = S::from_f64(input[i].to_f64().sin());
245    }
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251
252    #[test]
253    fn test_fill_f32() {
254        let mut buffer = [0.0f32; 10];
255        fill_f32(&mut buffer, 1.5);
256        assert!(buffer.iter().all(|&x| x == 1.5));
257    }
258
259    #[test]
260    fn test_fill_f64() {
261        let mut buffer = [0.0f64; 10];
262        fill_f64(&mut buffer, 2.5);
263        assert!(buffer.iter().all(|&x| x == 2.5));
264    }
265
266    #[test]
267    fn test_apply_gain_f32() {
268        let input: Vec<f32> = (0..10).map(|i| i as f32).collect();
269        let mut output = vec![0.0f32; 10];
270        apply_gain_f32(&input, &mut output, 0.5);
271
272        for (i, &val) in output.iter().enumerate() {
273            assert!((val - (i as f32) * 0.5).abs() < 1e-6);
274        }
275    }
276
277    #[test]
278    fn test_apply_gain_f64() {
279        let input: Vec<f64> = (0..10).map(|i| i as f64).collect();
280        let mut output = vec![0.0f64; 10];
281        apply_gain_f64(&input, &mut output, 0.5);
282
283        for (i, &val) in output.iter().enumerate() {
284            assert!((val - (i as f64) * 0.5).abs() < 1e-10);
285        }
286    }
287
288    #[test]
289    fn test_sin_f32() {
290        let input: Vec<f32> = (0..10).map(|i| i as f32 * 0.1).collect();
291        let mut output = vec![0.0f32; 10];
292        sin_f32(&input, &mut output);
293
294        for (i, &val) in output.iter().enumerate() {
295            let expected = (i as f32 * 0.1).sin();
296            assert!((val - expected).abs() < 1e-6);
297        }
298    }
299
300    #[test]
301    fn test_sin_f64() {
302        let input: Vec<f64> = (0..10).map(|i| i as f64 * 0.1).collect();
303        let mut output = vec![0.0f64; 10];
304        sin_f64(&input, &mut output);
305
306        for (i, &val) in output.iter().enumerate() {
307            let expected = (i as f64 * 0.1).sin();
308            assert!((val - expected).abs() < 1e-10);
309        }
310    }
311}