denormal.h

Go to the documentation of this file.
00001 /***************************************************************************/
00027 #ifndef AURESERVOIR_DENORMAL_H__
00028 #define AURESERVOIR_DENORMAL_H__
00029 
00030 #ifdef __SSE__
00031 #include <xmmintrin.h>
00032 #endif
00033 
00034 #include <flens/flens.h>
00035 #include "auexcept.h"
00036 
00037 namespace aureservoir
00038 {
00039 
00042 const float SINGLE_DENORMAL_DC = 1.0E-25;
00043 
00046 const double DOUBLE_DENORMAL_DC = 1.0E-30;
00047 
00063 inline void set_denormal_flags()
00064   throw(AUExcept)
00065 {
00066 #ifdef __SSE__
00067 
00068   unsigned long cpuflags = 0;
00069 
00070 #ifndef USE_X86_64_ASM
00071 
00072   asm volatile (
00073     "mov $1, %%eax\n"
00074     "pushl %%ebx\n"
00075     "cpuid\n"
00076     "movl %%edx, %0\n"
00077     "popl %%ebx\n"
00078     : "=r" (cpuflags)
00079     :
00080     : "%eax", "%ecx", "%edx", "memory"
00081   );
00082 
00083 #else
00084 
00085   asm volatile (
00086     "pushq %%rbx\n"
00087     "movq $1, %%rax\n"
00088     "cpuid\n"
00089     "movq %%rdx, %0\n"
00090     "popq %%rbx\n"
00091     : "=r" (cpuflags)
00092     : 
00093     : "%rax", "%rcx", "%rdx", "memory"
00094   );
00095 
00096 #endif // USE_X86_64_ASM
00097 
00098   if (! (cpuflags & 1<<25) )
00099     throw AUExcept("set_denormal_flag: your processor doesn't have SSE support, DAZ and FZ denormal handling not activated !");
00100 
00101   // do we need SSE2 ?
00102 //   if (! (cpuflags & 1<<26) )
00103 //     throw AUExcept("set_denormal_flag: your processor doesn't have SSE2 support, DAZ and FZ denormal handling not activated !");
00104 
00105   // set DAZ and FZ bits
00106   int oldMXCSR = _mm_getcsr(); //read the old MXCSR setting
00107   int newMXCSR = oldMXCSR | 0x8040; // set DAZ and FZ bits
00108   _mm_setcsr( newMXCSR ); //write the new MXCSR setting to the MXCSR
00109 
00110 #else
00111 
00112   throw AUExcept("set_denormal_flag: you did not compile with SSE support (-mfpmath=sse -msse), DAZ and FZ denormal handling not activated !");
00113 
00114 #endif // __SSE__
00115 }
00116 
00123 inline void denormals_add_dc(float *data, int size)
00124 {
00125   for(int i=0; i<size; ++i)
00126     data[i] += SINGLE_DENORMAL_DC;
00127 }
00128 
00135 inline void denormals_add_dc(double *data, int size)
00136 {
00137   for(int i=0; i<size; ++i)
00138     data[i] += DOUBLE_DENORMAL_DC;
00139 }
00140 
00141 } // end of namespace aureservoir
00142 
00143 #endif // AURESERVOIR_DENORMAL_H__

Generated on Wed Mar 12 21:16:05 2008 for aureservoir by  doxygen 1.5.3