1pub const DENORMAL_THRESHOLD_F64: f64 = 1e-15;
16pub const DENORMAL_THRESHOLD_F32: f32 = 1e-15;
17
18#[inline]
23pub fn flush_denormal_f64(x: f64) -> f64 {
24 if x.abs() < DENORMAL_THRESHOLD_F64 { 0.0 } else { x }
25}
26
27#[cfg(feature = "simd")]
29#[inline]
30pub fn flush_denormals_f64_batch(buffer: &mut [f64]) {
31 use std::simd::{cmp::SimdPartialOrd, f64x4, num::SimdFloat};
32
33 let threshold = f64x4::splat(DENORMAL_THRESHOLD_F64);
34 let zero = f64x4::splat(0.0);
35
36 let (chunks, remainder) = buffer.as_chunks_mut::<4>();
37 for chunk in chunks {
38 let v = f64x4::from_array(*chunk);
39 let mask = v.abs().simd_lt(threshold);
40 *chunk = mask.select(zero, v).to_array();
41 }
42 for sample in remainder {
43 *sample = flush_denormal_f64(*sample);
44 }
45}
46
47#[cfg(not(feature = "simd"))]
49#[inline]
50pub fn flush_denormals_f64_batch(buffer: &mut [f64]) {
51 for sample in buffer {
52 *sample = flush_denormal_f64(*sample);
53 }
54}
55
56#[inline]
58pub fn flush_denormal_f32(x: f32) -> f32 {
59 if x.abs() < DENORMAL_THRESHOLD_F32 { 0.0 } else { x }
60}
61
62#[cfg(feature = "simd")]
64#[inline]
65pub fn flush_denormals_f32_batch(buffer: &mut [f32]) {
66 use std::simd::{cmp::SimdPartialOrd, f32x4, num::SimdFloat};
67
68 let threshold = f32x4::splat(DENORMAL_THRESHOLD_F32);
69 let zero = f32x4::splat(0.0);
70
71 let (chunks, remainder) = buffer.as_chunks_mut::<4>();
72 for chunk in chunks {
73 let v = f32x4::from_array(*chunk);
74 let mask = v.abs().simd_lt(threshold);
75 *chunk = mask.select(zero, v).to_array();
76 }
77 for sample in remainder {
78 *sample = flush_denormal_f32(*sample);
79 }
80}
81
82#[cfg(not(feature = "simd"))]
84#[inline]
85pub fn flush_denormals_f32_batch(buffer: &mut [f32]) {
86 for sample in buffer {
87 *sample = flush_denormal_f32(*sample);
88 }
89}
90
91#[cfg(all(feature = "ftz-daz", any(target_arch = "x86", target_arch = "x86_64")))]
106pub fn enable_ftz_daz() {
107 use std::arch::asm;
108
109 const FTZ_BIT: u32 = 1 << 15;
110 const DAZ_BIT: u32 = 1 << 6;
111
112 unsafe {
113 let mut mxcsr: u32 = 0;
114 asm!(
115 "stmxcsr [{}]",
116 in(reg) &mut mxcsr,
117 options(nostack, preserves_flags)
118 );
119 mxcsr |= FTZ_BIT | DAZ_BIT;
120 asm!(
121 "ldmxcsr [{}]",
122 in(reg) &mxcsr,
123 options(nostack, preserves_flags)
124 );
125 }
126}
127
128#[cfg(all(feature = "ftz-daz", target_arch = "aarch64"))]
137pub fn enable_ftz_daz() {
138 use std::arch::asm;
139
140 const FZ_BIT: u64 = 1 << 24;
141
142 unsafe {
143 let mut fpcr: u64;
144 asm!(
145 "mrs {}, fpcr",
146 out(reg) fpcr,
147 options(nomem, nostack, preserves_flags)
148 );
149 fpcr |= FZ_BIT;
150 asm!(
151 "msr fpcr, {}",
152 in(reg) fpcr,
153 options(nomem, nostack, preserves_flags)
154 );
155 }
156}
157
158#[cfg(all(
160 feature = "ftz-daz",
161 not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))
162))]
163pub fn enable_ftz_daz() {
164 }
166
167#[cfg(test)]
168mod tests {
169 use super::*;
170
171 #[test]
172 fn test_normal_values_unchanged() {
173 assert_eq!(flush_denormal_f64(1.0), 1.0);
174 assert_eq!(flush_denormal_f64(-0.5), -0.5);
175 assert_eq!(flush_denormal_f64(1e-10), 1e-10);
176 }
177
178 #[test]
179 fn test_denormal_flushed_to_zero() {
180 assert_eq!(flush_denormal_f64(1e-16), 0.0);
181 assert_eq!(flush_denormal_f64(-1e-16), 0.0);
182 assert_eq!(flush_denormal_f64(1e-300), 0.0);
183 }
184
185 #[test]
186 fn test_zero_unchanged() {
187 assert_eq!(flush_denormal_f64(0.0), 0.0);
188 assert_eq!(flush_denormal_f64(-0.0), 0.0);
189 }
190
191 #[test]
192 fn test_f32_denormal_handling() {
193 assert_eq!(flush_denormal_f32(1.0), 1.0);
194 assert_eq!(flush_denormal_f32(1e-16), 0.0);
195 }
196
197 #[cfg(all(feature = "ftz-daz", target_arch = "aarch64"))]
198 #[test]
199 fn test_enable_ftz_daz_sets_fz_bit() {
200 use std::arch::asm;
201 const FZ_BIT: u64 = 1 << 24;
202
203 enable_ftz_daz();
204
205 let fpcr: u64;
206 unsafe {
207 asm!(
208 "mrs {}, fpcr",
209 out(reg) fpcr,
210 options(nomem, nostack, preserves_flags)
211 );
212 }
213 assert_ne!(fpcr & FZ_BIT, 0, "FZ bit should be set after enable_ftz_daz()");
214 }
215}