bbx_core/
denormal.rs

1//! Denormal (subnormal) float handling utilities.
2//!
3//! Denormalized floating-point numbers are very small values that can cause
4//! significant CPU slowdowns (10-100x) on x86 processors due to microcode
5//! fallback handling. This module provides utilities to flush these values
6//! to zero, preventing performance degradation in audio processing.
7//!
8//! When the `ftz-daz` feature is enabled, this module also provides CPU-level
9//! FTZ (Flush-To-Zero) and DAZ (Denormals-Are-Zero) mode configuration for
10//! x86/x86_64 and AArch64 processors.
11
12/// Threshold below which values are considered denormal.
13/// This is slightly above the actual denormal threshold to catch
14/// values that will become denormal after further processing.
15pub const DENORMAL_THRESHOLD_F64: f64 = 1e-15;
16pub const DENORMAL_THRESHOLD_F32: f32 = 1e-15;
17
18/// Flush a denormal f64 value to zero.
19///
20/// Values with absolute value below the threshold are replaced with zero
21/// to prevent CPU slowdowns from denormalized float handling.
22#[inline]
23pub fn flush_denormal_f64(x: f64) -> f64 {
24    if x.abs() < DENORMAL_THRESHOLD_F64 { 0.0 } else { x }
25}
26
27/// Batch flush denormals in a slice of f64 values using SIMD.
28#[cfg(feature = "simd")]
29#[inline]
30pub fn flush_denormals_f64_batch(buffer: &mut [f64]) {
31    use std::simd::{cmp::SimdPartialOrd, f64x4, num::SimdFloat};
32
33    let threshold = f64x4::splat(DENORMAL_THRESHOLD_F64);
34    let zero = f64x4::splat(0.0);
35
36    let (chunks, remainder) = buffer.as_chunks_mut::<4>();
37    for chunk in chunks {
38        let v = f64x4::from_array(*chunk);
39        let mask = v.abs().simd_lt(threshold);
40        *chunk = mask.select(zero, v).to_array();
41    }
42    for sample in remainder {
43        *sample = flush_denormal_f64(*sample);
44    }
45}
46
47/// Batch flush denormals in a slice of f64 values (scalar fallback).
48#[cfg(not(feature = "simd"))]
49#[inline]
50pub fn flush_denormals_f64_batch(buffer: &mut [f64]) {
51    for sample in buffer {
52        *sample = flush_denormal_f64(*sample);
53    }
54}
55
56/// Flush a denormal f32 value to zero.
57#[inline]
58pub fn flush_denormal_f32(x: f32) -> f32 {
59    if x.abs() < DENORMAL_THRESHOLD_F32 { 0.0 } else { x }
60}
61
62/// Batch flush denormals in a slice of f32 values using SIMD.
63#[cfg(feature = "simd")]
64#[inline]
65pub fn flush_denormals_f32_batch(buffer: &mut [f32]) {
66    use std::simd::{cmp::SimdPartialOrd, f32x4, num::SimdFloat};
67
68    let threshold = f32x4::splat(DENORMAL_THRESHOLD_F32);
69    let zero = f32x4::splat(0.0);
70
71    let (chunks, remainder) = buffer.as_chunks_mut::<4>();
72    for chunk in chunks {
73        let v = f32x4::from_array(*chunk);
74        let mask = v.abs().simd_lt(threshold);
75        *chunk = mask.select(zero, v).to_array();
76    }
77    for sample in remainder {
78        *sample = flush_denormal_f32(*sample);
79    }
80}
81
82/// Batch flush denormals in a slice of f32 values (scalar fallback).
83#[cfg(not(feature = "simd"))]
84#[inline]
85pub fn flush_denormals_f32_batch(buffer: &mut [f32]) {
86    for sample in buffer {
87        *sample = flush_denormal_f32(*sample);
88    }
89}
90
91/// Enable FTZ (Flush-To-Zero) and DAZ (Denormals-Are-Zero) modes on x86/x86_64.
92///
93/// This sets CPU flags that cause denormalized floating-point numbers to be
94/// automatically flushed to zero, avoiding the significant performance penalty
95/// (10-100x slowdown) that denormals can cause.
96///
97/// This function is only available when the `ftz-daz` feature is enabled and
98/// compiling for x86/x86_64 targets.
99///
100/// # Safety
101///
102/// This function modifies CPU control registers. It is safe to call from any
103/// thread, but the flags are per-thread on most systems. Call this at the start
104/// of any audio processing thread.
105#[cfg(all(feature = "ftz-daz", any(target_arch = "x86", target_arch = "x86_64")))]
106pub fn enable_ftz_daz() {
107    use std::arch::asm;
108
109    const FTZ_BIT: u32 = 1 << 15;
110    const DAZ_BIT: u32 = 1 << 6;
111
112    unsafe {
113        let mut mxcsr: u32 = 0;
114        asm!(
115            "stmxcsr [{}]",
116            in(reg) &mut mxcsr,
117            options(nostack, preserves_flags)
118        );
119        mxcsr |= FTZ_BIT | DAZ_BIT;
120        asm!(
121            "ldmxcsr [{}]",
122            in(reg) &mxcsr,
123            options(nostack, preserves_flags)
124        );
125    }
126}
127
128/// Enable FTZ (Flush-To-Zero) mode on AArch64.
129///
130/// Sets the FZ bit in FPCR, causing denormal outputs to be flushed to zero.
131///
132/// # ARM vs x86 Differences
133///
134/// ARM FPCR.FZ only affects outputs (no universal DAZ equivalent).
135/// Use `flush_denormal_f64/f32` in feedback paths for full coverage.
136#[cfg(all(feature = "ftz-daz", target_arch = "aarch64"))]
137pub fn enable_ftz_daz() {
138    use std::arch::asm;
139
140    const FZ_BIT: u64 = 1 << 24;
141
142    unsafe {
143        let mut fpcr: u64;
144        asm!(
145            "mrs {}, fpcr",
146            out(reg) fpcr,
147            options(nomem, nostack, preserves_flags)
148        );
149        fpcr |= FZ_BIT;
150        asm!(
151            "msr fpcr, {}",
152            in(reg) fpcr,
153            options(nomem, nostack, preserves_flags)
154        );
155    }
156}
157
158/// No-op stub for unsupported architectures when `ftz-daz` feature is enabled.
159#[cfg(all(
160    feature = "ftz-daz",
161    not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))
162))]
163pub fn enable_ftz_daz() {
164    // No-op on unsupported architectures
165}
166
167#[cfg(test)]
168mod tests {
169    use super::*;
170
171    #[test]
172    fn test_normal_values_unchanged() {
173        assert_eq!(flush_denormal_f64(1.0), 1.0);
174        assert_eq!(flush_denormal_f64(-0.5), -0.5);
175        assert_eq!(flush_denormal_f64(1e-10), 1e-10);
176    }
177
178    #[test]
179    fn test_denormal_flushed_to_zero() {
180        assert_eq!(flush_denormal_f64(1e-16), 0.0);
181        assert_eq!(flush_denormal_f64(-1e-16), 0.0);
182        assert_eq!(flush_denormal_f64(1e-300), 0.0);
183    }
184
185    #[test]
186    fn test_zero_unchanged() {
187        assert_eq!(flush_denormal_f64(0.0), 0.0);
188        assert_eq!(flush_denormal_f64(-0.0), 0.0);
189    }
190
191    #[test]
192    fn test_f32_denormal_handling() {
193        assert_eq!(flush_denormal_f32(1.0), 1.0);
194        assert_eq!(flush_denormal_f32(1e-16), 0.0);
195    }
196
197    #[cfg(all(feature = "ftz-daz", target_arch = "aarch64"))]
198    #[test]
199    fn test_enable_ftz_daz_sets_fz_bit() {
200        use std::arch::asm;
201        const FZ_BIT: u64 = 1 << 24;
202
203        enable_ftz_daz();
204
205        let fpcr: u64;
206        unsafe {
207            asm!(
208                "mrs {}, fpcr",
209                out(reg) fpcr,
210                options(nomem, nostack, preserves_flags)
211            );
212        }
213        assert_ne!(fpcr & FZ_BIT, 0, "FZ bit should be set after enable_ftz_daz()");
214    }
215}
bbx_core/denormal.rs

bbx_core/
denormal.rs