00001 #ifndef __SIMDIA_H__
00002 #define __SIMDIA_H__
00003 
00004 
00005 #if defined(__SSE2__) && !defined(_CRAYC)
00006   #include "emmintrin.h"
00007 #endif
00008 
00009 #if CMK_CELL_SPE != 0
00010   #include "spu_intrinsics.h"
00011 #else
00012   #include "math.h"
00013 #endif
00014 
00015 #if defined(__VEC__)
00016   #include "altivec.h"
00017   #ifdef pixel
00018   #undef pixel
00019   #endif
00020   #ifdef bool
00021   #undef bool
00022   #endif
00023 #endif
00024 
00025 
00026 
00027 #if !CMK_HAS_SQRTF
00028   #define sqrtf(a) ((float)(sqrt((double)(a))))
00029 #endif
00030 
00031 
00032 
00033 
00034 #define SIMDIA_FORCE_NO_SSE       (0)
00035 #define SIMDIA_FORCE_NO_ALTIVEC   (0)
00036 #define SIMDIA_FORCE_NO_SPE_SIMD  (0)
00037 
00038 
00039 
00040 #define SIMDIA_CONSTANT_PI      (3.141592653589793)
00041 #define SIMDIA_CONSTANT_E       (2.718281828459045)
00042 #define SIMDIA_CONSTANT_SQRT_2  (1.414213562373095)
00043 
00044 
00045 
00046 
00047 
00048 
00049 
00050 
00051 
00052 
00053 
00054 
00055 
00056 
00057 
00058 
00059 
00060 
00061 
00064 
00065 
00066 
00067 
00068  
00069 
00070 
00071 typedef struct __simdia_vec_i  {    int v0, v1, v2, v3; }  __simdia_veci;
00072 typedef struct __simdia_vec_f  {  float v0, v1, v2, v3; }  __simdia_vecf;
00073 typedef struct __simdia_vec_lf { double v0, v1;         } __simdia_veclf;
00074 
00075 
00076 
00077 inline  __simdia_veci  __simdia_vinserti( __simdia_veci v, const    int s, const int i) {  __simdia_veci r = v;    int* rPtr = (   int*)(&r); rPtr[i] = s; return r; }
00078 inline  __simdia_vecf  __simdia_vinsertf( __simdia_vecf v, const  float s, const int i) {  __simdia_vecf r = v;  float* rPtr = ( float*)(&r); rPtr[i] = s; return r; }
00079 inline __simdia_veclf __simdia_vinsertlf(__simdia_veclf v, const double s, const int i) { __simdia_veclf r = v; double* rPtr = (double*)(&r); rPtr[i] = s; return r; }
00080 
00081 
00082 inline    int  __simdia_vextracti( __simdia_veci v, const int i) {    int* vPtr = (   int*)(&v); return vPtr[i]; }
00083 inline  float  __simdia_vextractf( __simdia_vecf v, const int i) {  float* vPtr = ( float*)(&v); return vPtr[i]; }
00084 inline double __simdia_vextractlf(__simdia_veclf v, const int i) { double* vPtr = (double*)(&v); return vPtr[i]; }
00085 
00086 
00087 inline  __simdia_veci  __simdia_vseti(const    int a) {  __simdia_veci r; r.v0 = r.v1 = r.v2 = r.v3 = a; return r; }
00088 inline  __simdia_vecf  __simdia_vsetf(const  float a) {  __simdia_vecf r; r.v0 = r.v1 = r.v2 = r.v3 = a; return r; }
00089 inline __simdia_veclf __simdia_vsetlf(const double a) { __simdia_veclf r; r.v0 = r.v1 =               a; return r; }
00090 
00091 
00092 
00093 
00094 const  __simdia_veci  __simdia_const_vzeroi = {   0 ,   0 ,   0 ,   0  };
00095 const  __simdia_vecf  __simdia_const_vzerof = { 0.0f, 0.0f, 0.0f, 0.0f };
00096 const __simdia_veclf __simdia_const_vzerolf = { 0.0 , 0.0              };
00097 
00098 
00099 const  __simdia_veci  __simdia_const_vonei = {   1 ,   1 ,   1 ,   1  };
00100 const  __simdia_vecf  __simdia_const_vonef = { 1.0f, 1.0f, 1.0f, 1.0f };
00101 const __simdia_veclf __simdia_const_vonelf = { 1.0 , 1.0              };
00102 
00103 
00104 const  __simdia_veci  __simdia_const_vtwoi = {   2 ,   2 ,   2 ,   2  };
00105 const  __simdia_vecf  __simdia_const_vtwof = { 2.0f, 2.0f, 2.0f, 2.0f };
00106 const __simdia_veclf __simdia_const_vtwolf = { 2.0 , 2.0              };
00107 
00108 
00109 const  __simdia_veci  __simdia_const_vnegonei = {   -1 ,   -1 ,   -1 ,   -1  };
00110 const  __simdia_vecf  __simdia_const_vnegonef = { -1.0f, -1.0f, -1.0f, -1.0f };
00111 const __simdia_veclf __simdia_const_vnegonelf = { -1.0 , -1.0                };
00112 
00113 
00114 
00115 
00116 
00117 
00118 inline  __simdia_veci  __simdia_vrothi(const  __simdia_veci a, int s) {  __simdia_veci b;    int* a_ptr = (   int*)(&a);    int* b_ptr = (   int*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0-s)&0x3]; b_ptr[1] = a_ptr[(1-s)&0x3]; b_ptr[2] = a_ptr[(2-s)&0x3]; b_ptr[3] = a_ptr[(3-s)&0x3]; return b; }
00119 inline  __simdia_vecf  __simdia_vrothf(const  __simdia_vecf a, int s) {  __simdia_vecf b;  float* a_ptr = ( float*)(&a);  float* b_ptr = ( float*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0-s)&0x3]; b_ptr[1] = a_ptr[(1-s)&0x3]; b_ptr[2] = a_ptr[(2-s)&0x3]; b_ptr[3] = a_ptr[(3-s)&0x3]; return b; }
00120 inline __simdia_veclf __simdia_vrothlf(const __simdia_veclf a, int s) { __simdia_veclf b; double* a_ptr = (double*)(&a); double* b_ptr = (double*)(&b); s &= 0x1; b_ptr[0] = a_ptr[(0-s)&0x1]; b_ptr[1] = a_ptr[(1-s)&0x1]; return b; }
00121 inline  __simdia_veci  __simdia_vrotli(const  __simdia_veci a, int s) {  __simdia_veci b;    int* a_ptr = (   int*)(&a);    int* b_ptr = (   int*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0+s)&0x3]; b_ptr[1] = a_ptr[(1+s)&0x3]; b_ptr[2] = a_ptr[(2+s)&0x3]; b_ptr[3] = a_ptr[(3+s)&0x3]; return b; }
00122 inline  __simdia_vecf  __simdia_vrotlf(const  __simdia_vecf a, int s) {  __simdia_vecf b;  float* a_ptr = ( float*)(&a);  float* b_ptr = ( float*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0+s)&0x3]; b_ptr[1] = a_ptr[(1+s)&0x3]; b_ptr[2] = a_ptr[(2+s)&0x3]; b_ptr[3] = a_ptr[(3+s)&0x3]; return b; }
00123 inline __simdia_veclf __simdia_vrotllf(const __simdia_veclf a, int s) { __simdia_veclf b; double* a_ptr = (double*)(&a); double* b_ptr = (double*)(&b); s &= 0x1; b_ptr[0] = a_ptr[(0+s)&0x1]; b_ptr[1] = a_ptr[(1+s)&0x1]; return b; }
00124 
00125 
00126 inline  __simdia_veci  __simdia_vaddi(const  __simdia_veci a, const  __simdia_veci b) {  __simdia_veci r; r.v0 = a.v0 + b.v0; r.v1 = a.v1 + b.v1; r.v2 = a.v2 + b.v2; r.v3 = a.v3 + b.v3; return r; }
00127 inline  __simdia_vecf  __simdia_vaddf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_vecf r; r.v0 = a.v0 + b.v0; r.v1 = a.v1 + b.v1; r.v2 = a.v2 + b.v2; r.v3 = a.v3 + b.v3; return r; }
00128 inline __simdia_veclf __simdia_vaddlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; r.v0 = a.v0 + b.v0; r.v1 = a.v1 + b.v1;                                         return r; }
00129 
00130 
00131 inline  __simdia_veci  __simdia_vsubi(const  __simdia_veci a, const  __simdia_veci b) {  __simdia_veci r; r.v0 = a.v0 - b.v0; r.v1 = a.v1 - b.v1; r.v2 = a.v2 - b.v2; r.v3 = a.v3 - b.v3; return r; }
00132 inline  __simdia_vecf  __simdia_vsubf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_vecf r; r.v0 = a.v0 - b.v0; r.v1 = a.v1 - b.v1; r.v2 = a.v2 - b.v2; r.v3 = a.v3 - b.v3; return r; }
00133 inline __simdia_veclf __simdia_vsublf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; r.v0 = a.v0 - b.v0; r.v1 = a.v1 - b.v1;                                         return r; }
00134 
00135 
00136 inline  __simdia_veci  __simdia_vmuli(const  __simdia_veci a, const  __simdia_veci b) {  __simdia_veci r; r.v0 = a.v0 * b.v0; r.v1 = a.v1 * b.v1; r.v2 = a.v2 * b.v2; r.v3 = a.v3 * b.v3; return r; }
00137 inline  __simdia_vecf  __simdia_vmulf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_vecf r; r.v0 = a.v0 * b.v0; r.v1 = a.v1 * b.v1; r.v2 = a.v2 * b.v2; r.v3 = a.v3 * b.v3; return r; }
00138 inline __simdia_veclf __simdia_vmullf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; r.v0 = a.v0 * b.v0; r.v1 = a.v1 * b.v1;                                         return r; }
00139 
00140 
00141 inline  __simdia_veci  __simdia_vdivi(const  __simdia_veci a, const  __simdia_veci b) {  __simdia_veci r; r.v0 = a.v0 / b.v0; r.v1 = a.v1 / b.v1; r.v2 = a.v2 / b.v2; r.v3 = a.v3 / b.v3; return r; }
00142 inline  __simdia_vecf  __simdia_vdivf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_vecf r; r.v0 = a.v0 / b.v0; r.v1 = a.v1 / b.v1; r.v2 = a.v2 / b.v2; r.v3 = a.v3 / b.v3; return r; }
00143 inline __simdia_veclf __simdia_vdivlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; r.v0 = a.v0 / b.v0; r.v1 = a.v1 / b.v1;                                         return r; }
00144 
00145 
00146 inline  __simdia_veci  __simdia_vmaddi(const  __simdia_veci a, const  __simdia_veci b, const  __simdia_veci c) {  __simdia_veci r; r.v0 = a.v0 * b.v0 + c.v0; r.v1 = a.v1 * b.v1 + c.v1; r.v2 = a.v2 * b.v2 + c.v2; r.v3 = a.v3 * b.v3 + c.v3; return r; }
00147 inline  __simdia_vecf  __simdia_vmaddf(const  __simdia_vecf a, const  __simdia_vecf b, const  __simdia_vecf c) {  __simdia_vecf r; r.v0 = a.v0 * b.v0 + c.v0; r.v1 = a.v1 * b.v1 + c.v1; r.v2 = a.v2 * b.v2 + c.v2; r.v3 = a.v3 * b.v3 + c.v3; return r; }
00148 inline __simdia_veclf __simdia_vmaddlf(const __simdia_veclf a, const __simdia_veclf b, const __simdia_veclf c) { __simdia_veclf r; r.v0 = a.v0 * b.v0 + c.v0; r.v1 = a.v1 * b.v1 + c.v1;                                                       return r; }
00149 
00150 
00151 
00152 inline  __simdia_vecf  __simdia_vrecipf(const  __simdia_vecf a) {  __simdia_vecf r; r.v0 = 1.0f / a.v0; r.v1 = 1.0f / a.v1; r.v2 = 1.0f / a.v2; r.v3 = 1.0f / a.v3; return r; }
00153 inline __simdia_veclf __simdia_vreciplf(const __simdia_veclf a) { __simdia_veclf r; r.v0 = 1.0f / a.v0; r.v1 = 1.0f / a.v1; return r; }
00154 
00155 
00156 inline  __simdia_vecf  __simdia_vsqrtf(const  __simdia_vecf a) {  __simdia_vecf r; r.v0 = sqrtf(a.v0); r.v1 = sqrtf(a.v1); r.v2 = sqrtf(a.v2); r.v3 = sqrtf(a.v3); return r; }
00157 inline __simdia_veclf __simdia_vsqrtlf(const __simdia_veclf a) { __simdia_veclf r; r.v0 = sqrt(a.v0); r.v1 = sqrt(a.v1); return r; }
00158 
00159 
00160 inline  __simdia_vecf  __simdia_vrsqrtf(const  __simdia_vecf a) {  __simdia_vecf r; r.v0 = 1.0f / sqrtf(a.v0); r.v1 = 1.0f / sqrtf(a.v1); r.v2 = 1.0f / sqrtf(a.v2); r.v3 = 1.0f / sqrtf(a.v3); return r; }
00161 inline __simdia_veclf __simdia_vrsqrtlf(const __simdia_veclf a) { __simdia_veclf r; r.v0 = 1.0 / sqrt(a.v0); r.v1 = 1.0 / sqrt(a.v1); return r; }
00162 
00163 
00164 inline  __simdia_veci  __simdia_vnoti(const  __simdia_veci a) {  __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); rPtr[0] = aPtr[0] ^ -1; rPtr[1] = aPtr[1] ^ -1; rPtr[2] = aPtr[2] ^ -1; rPtr[3] = aPtr[3] ^ -1; return r; }
00165 inline  __simdia_vecf  __simdia_vnotf(const  __simdia_vecf a) {  __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); rPtr[0] = aPtr[0] ^ -1; rPtr[1] = aPtr[1] ^ -1; rPtr[2] = aPtr[2] ^ -1; rPtr[3] = aPtr[3] ^ -1; return r; }
00166 inline __simdia_veclf __simdia_vnotlf(const __simdia_veclf a) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); rPtr[0] = aPtr[0] ^ -1; rPtr[1] = aPtr[1] ^ -1; rPtr[2] = aPtr[2] ^ -1; rPtr[3] = aPtr[3] ^ -1; return r; }
00167 
00168 
00169 inline  __simdia_veci  __simdia_vori(const  __simdia_veci a, const  __simdia_veci b) {  __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] | bPtr[0]; rPtr[1] = aPtr[1] | bPtr[1]; rPtr[2] = aPtr[2] | bPtr[2]; rPtr[3] = aPtr[3] | bPtr[3]; return r; }
00170 inline  __simdia_vecf  __simdia_vorf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] | bPtr[0]; rPtr[1] = aPtr[1] | bPtr[1]; rPtr[2] = aPtr[2] | bPtr[2]; rPtr[3] = aPtr[3] | bPtr[3]; return r; }
00171 inline __simdia_veclf __simdia_vorlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] | bPtr[0]; rPtr[1] = aPtr[1] | bPtr[1]; rPtr[2] = aPtr[2] | bPtr[2]; rPtr[3] = aPtr[3] | bPtr[3]; return r; }
00172 
00173 
00174 inline  __simdia_veci  __simdia_vnori(const  __simdia_veci a, const  __simdia_veci b) {  __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] | bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] | bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] | bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] | bPtr[3]) ^ -1; return r; }
00175 inline  __simdia_vecf  __simdia_vnorf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] | bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] | bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] | bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] | bPtr[3]) ^ -1; return r; }
00176 inline __simdia_veclf __simdia_vnorlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] | bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] | bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] | bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] | bPtr[3]) ^ -1; return r; }
00177 
00178 
00179 inline  __simdia_veci  __simdia_vandi(const  __simdia_veci a, const  __simdia_veci b) {  __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] & bPtr[0]; rPtr[1] = aPtr[1] & bPtr[1]; rPtr[2] = aPtr[2] & bPtr[2]; rPtr[3] = aPtr[3] & bPtr[3]; return r; }
00180 inline  __simdia_vecf  __simdia_vandf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] & bPtr[0]; rPtr[1] = aPtr[1] & bPtr[1]; rPtr[2] = aPtr[2] & bPtr[2]; rPtr[3] = aPtr[3] & bPtr[3]; return r; }
00181 inline __simdia_veclf __simdia_vandlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] & bPtr[0]; rPtr[1] = aPtr[1] & bPtr[1]; rPtr[2] = aPtr[2] & bPtr[2]; rPtr[3] = aPtr[3] & bPtr[3]; return r; }
00182 
00183 
00184 inline  __simdia_veci  __simdia_vnandi(const  __simdia_veci a, const  __simdia_veci b) {  __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] & bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] & bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] & bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] & bPtr[3]) ^ -1; return r; }
00185 inline  __simdia_vecf  __simdia_vnandf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] & bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] & bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] & bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] & bPtr[3]) ^ -1; return r; }
00186 inline __simdia_veclf __simdia_vnandlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] & bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] & bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] & bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] & bPtr[3]) ^ -1; return r; }
00187 
00188 
00189 inline  __simdia_veci  __simdia_vxori(const  __simdia_veci a, const  __simdia_veci b) {  __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] ^ bPtr[0]; rPtr[1] = aPtr[1] ^ bPtr[1]; rPtr[2] = aPtr[2] ^ bPtr[2]; rPtr[3] = aPtr[3] ^ bPtr[3]; return r; }
00190 inline  __simdia_vecf  __simdia_vxorf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] ^ bPtr[0]; rPtr[1] = aPtr[1] ^ bPtr[1]; rPtr[2] = aPtr[2] ^ bPtr[2]; rPtr[3] = aPtr[3] ^ bPtr[3]; return r; }
00191 inline __simdia_veclf __simdia_vxorlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] ^ bPtr[0]; rPtr[1] = aPtr[1] ^ bPtr[1]; rPtr[2] = aPtr[2] ^ bPtr[2]; rPtr[3] = aPtr[3] ^ bPtr[3]; return r; }
00192 
00193 
00194 inline  __simdia_veci  __simdia_vnxori(const  __simdia_veci a, const  __simdia_veci b) {  __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] ^ bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] ^ bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] ^ bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] ^ bPtr[3]) ^ -1; return r; }
00195 inline  __simdia_vecf  __simdia_vnxorf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] ^ bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] ^ bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] ^ bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] ^ bPtr[3]) ^ -1; return r; }
00196 inline __simdia_veclf __simdia_vnxorlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] ^ bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] ^ bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] ^ bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] ^ bPtr[3]) ^ -1; return r; }
00197 
00198 
00199 
00200 
00201 inline __simdia_veci  __simdia_vcmpeqi(const  __simdia_veci a, const  __simdia_veci b) {  __simdia_veci r; r.v0 = ((a.v0 == b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 == b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 == b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 == b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00202 inline __simdia_veci  __simdia_vcmpeqf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_veci r; r.v0 = ((a.v0 == b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 == b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 == b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 == b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00203 inline __simdia_veci __simdia_vcmpeqlf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_veci r; r.v0 = r.v1 = ((a.v0 == b.v0) ? (0xFFFFFFFF) : (0x0)); r.v2 = r.v3 = ((a.v1 == b.v1) ? (0xFFFFFFFF) : (0x0)); return r; }
00204 
00205 
00206 inline __simdia_veci  __simdia_vcmpgti(const  __simdia_veci a, const  __simdia_veci b) {  __simdia_veci r; r.v0 = ((a.v0 > b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 > b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 > b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 > b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00207 inline __simdia_veci  __simdia_vcmpgtf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_veci r; r.v0 = ((a.v0 > b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 > b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 > b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 > b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00208 inline __simdia_veci __simdia_vcmpgtlf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_veci r; r.v0 = r.v1 = ((a.v0 > b.v0) ? (0xFFFFFFFF) : (0x0)); r.v2 = r.v3 = ((a.v1 > b.v1) ? (0xFFFFFFFF) : (0x0)); return r; }
00209 
00210 
00211 inline __simdia_veci  __simdia_vcmpgei(const  __simdia_veci a, const  __simdia_veci b) {  __simdia_veci r; r.v0 = ((a.v0 >= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 >= b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 >= b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 >= b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00212 inline __simdia_veci  __simdia_vcmpgef(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_veci r; r.v0 = ((a.v0 >= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 >= b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 >= b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 >= b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00213 inline __simdia_veci __simdia_vcmpgelf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_veci r; r.v0 = r.v1 = ((a.v0 >= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v2 = r.v3 = ((a.v1 >= b.v1) ? (0xFFFFFFFF) : (0x0)); return r; }
00214 
00215 
00216 inline __simdia_veci  __simdia_vcmplti(const  __simdia_veci a, const  __simdia_veci b) {  __simdia_veci r; r.v0 = ((a.v0 < b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 < b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 < b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 < b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00217 inline __simdia_veci  __simdia_vcmpltf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_veci r; r.v0 = ((a.v0 < b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 < b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 < b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 < b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00218 inline __simdia_veci __simdia_vcmpltlf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_veci r; r.v0 = r.v1 = ((a.v0 < b.v0) ? (0xFFFFFFFF) : (0x0)); r.v2 = r.v3 = ((a.v1 < b.v1) ? (0xFFFFFFFF) : (0x0)); return r; }
00219 
00220 
00221 inline __simdia_veci  __simdia_vcmplei(const  __simdia_veci a, const  __simdia_veci b) {  __simdia_veci r; r.v0 = ((a.v0 <= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 <= b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 <= b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 <= b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00222 inline __simdia_veci  __simdia_vcmplef(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_veci r; r.v0 = ((a.v0 <= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 <= b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 <= b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 <= b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00223 inline __simdia_veci __simdia_vcmplelf(const  __simdia_vecf a, const  __simdia_vecf b) {  __simdia_veci r; r.v0 = r.v1 = ((a.v0 <= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v2 = r.v3 = ((a.v1 <= b.v1) ? (0xFFFFFFFF) : (0x0)); return r; }
00224 
00225 
00226 
00227 
00228 
00229 #if defined(__cplusplus)
00230 
00231   
00232   inline  __simdia_veci operator+(const  __simdia_veci &a, const  __simdia_veci &b) { return  __simdia_vaddi(a, b); }
00233   inline  __simdia_vecf operator+(const  __simdia_vecf &a, const  __simdia_vecf &b) { return  __simdia_vaddf(a, b); }
00234   inline __simdia_veclf operator+(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vaddlf(a, b); }
00235   inline  __simdia_veci operator+=( __simdia_veci &a, const  __simdia_veci &b) { a =  __simdia_vaddi(a, b); return a; }
00236   inline  __simdia_vecf operator+=( __simdia_vecf &a, const  __simdia_vecf &b) { a =  __simdia_vaddf(a, b); return a; }
00237   inline __simdia_veclf operator+=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vaddlf(a, b); return a; }
00238 
00239   inline  __simdia_veci operator+(const  __simdia_veci &a, const    int &b) { return  __simdia_vaddi(a,  __simdia_vseti(b)); }
00240   inline  __simdia_vecf operator+(const  __simdia_vecf &a, const  float &b) { return  __simdia_vaddf(a,  __simdia_vsetf(b)); }
00241   inline __simdia_veclf operator+(const __simdia_veclf &a, const double &b) { return __simdia_vaddlf(a, __simdia_vsetlf(b)); }
00242   inline  __simdia_veci operator+=( __simdia_veci &a, const    int &b) { a =  __simdia_vaddi(a,  __simdia_vseti(b)); return a; }
00243   inline  __simdia_vecf operator+=( __simdia_vecf &a, const  float &b) { a =  __simdia_vaddf(a,  __simdia_vsetf(b)); return a; }
00244   inline __simdia_veclf operator+=(__simdia_veclf &a, const double &b) { a = __simdia_vaddlf(a, __simdia_vsetlf(b)); return a; }
00245 
00246   
00247   inline  __simdia_veci operator-(const  __simdia_veci &a, const  __simdia_veci &b) { return  __simdia_vsubi(a, b); }
00248   inline  __simdia_vecf operator-(const  __simdia_vecf &a, const  __simdia_vecf &b) { return  __simdia_vsubf(a, b); }
00249   inline __simdia_veclf operator-(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vsublf(a, b); }
00250   inline  __simdia_veci operator-=( __simdia_veci &a, const  __simdia_veci &b) { a =  __simdia_vsubi(a, b); return a; }
00251   inline  __simdia_vecf operator-=( __simdia_vecf &a, const  __simdia_vecf &b) { a =  __simdia_vsubf(a, b); return a; }
00252   inline __simdia_veclf operator-=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vsublf(a, b); return a; }
00253 
00254   inline  __simdia_veci operator-(const  __simdia_veci &a, const    int &b) { return  __simdia_vsubi(a,  __simdia_vseti(b)); }
00255   inline  __simdia_vecf operator-(const  __simdia_vecf &a, const  float &b) { return  __simdia_vsubf(a,  __simdia_vsetf(b)); }
00256   inline __simdia_veclf operator-(const __simdia_veclf &a, const double &b) { return __simdia_vsublf(a, __simdia_vsetlf(b)); }
00257   inline  __simdia_veci operator-=( __simdia_veci &a, const    int &b) { a =  __simdia_vsubi(a,  __simdia_vseti(b)); return a; }
00258   inline  __simdia_vecf operator-=( __simdia_vecf &a, const  float &b) { a =  __simdia_vsubf(a,  __simdia_vsetf(b)); return a; }
00259   inline __simdia_veclf operator-=(__simdia_veclf &a, const double &b) { a = __simdia_vsublf(a, __simdia_vsetlf(b)); return a; }
00260 
00261   
00262   inline  __simdia_vecf operator*(const  __simdia_vecf &a, const  __simdia_vecf &b) { return  __simdia_vmulf(a, b); }
00263   inline __simdia_veclf operator*(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vmullf(a, b); }
00264   inline  __simdia_vecf operator*=( __simdia_vecf &a, const  __simdia_vecf &b) { a =  __simdia_vmulf(a, b); return a; }
00265   inline __simdia_veclf operator*=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vmullf(a, b); return a; }
00266 
00267   inline  __simdia_vecf operator*(const  __simdia_vecf &a, const  float &b) { return  __simdia_vmulf(a,  __simdia_vsetf(b)); }
00268   inline __simdia_veclf operator*(const __simdia_veclf &a, const double &b) { return __simdia_vmullf(a, __simdia_vsetlf(b)); }
00269   inline  __simdia_vecf operator*=( __simdia_vecf &a, const  float &b) { a =  __simdia_vmulf(a,  __simdia_vsetf(b)); return a; }
00270   inline __simdia_veclf operator*=(__simdia_veclf &a, const double &b) { a = __simdia_vmullf(a, __simdia_vsetlf(b)); return a; }
00271 
00272   
00273   inline  __simdia_vecf operator/(const  __simdia_vecf &a, const  __simdia_vecf &b) { return  __simdia_vdivf(a, b); }
00274   inline __simdia_veclf operator/(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vdivlf(a, b); }
00275   inline  __simdia_vecf operator/=( __simdia_vecf &a, const  __simdia_vecf &b) { a =  __simdia_vdivf(a, b); return a; }
00276   inline __simdia_veclf operator/=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vdivlf(a, b); return a; }
00277 
00278   inline  __simdia_vecf operator/(const  __simdia_vecf &a, const  float &b) { return  __simdia_vdivf(a,  __simdia_vsetf(b)); }
00279   inline __simdia_veclf operator/(const __simdia_veclf &a, const double &b) { return __simdia_vdivlf(a, __simdia_vsetlf(b)); }
00280   inline  __simdia_vecf operator/=( __simdia_vecf &a, const  float &b) { a =  __simdia_vdivf(a,  __simdia_vsetf(b)); return a; }
00281   inline __simdia_veclf operator/=(__simdia_veclf &a, const double &b) { a = __simdia_vdivlf(a, __simdia_vsetlf(b)); return a; }
00282 
00283   
00284   inline  __simdia_veci operator|(const  __simdia_veci &a, const  __simdia_veci &b) { return  __simdia_vori(a, b); }
00285   inline  __simdia_vecf operator|(const  __simdia_vecf &a, const  __simdia_vecf &b) { return  __simdia_vorf(a, b); }
00286   inline __simdia_veclf operator|(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vorlf(a, b); }
00287   inline  __simdia_veci operator|=( __simdia_veci &a, const  __simdia_veci &b) { a =  __simdia_vori(a, b); return a; }
00288   inline  __simdia_vecf operator|=( __simdia_vecf &a, const  __simdia_vecf &b) { a =  __simdia_vorf(a, b); return a; }
00289   inline __simdia_veclf operator|=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vorlf(a, b); return a; }
00290 
00291   inline  __simdia_veci operator|(const  __simdia_veci &a, const    int &b) { return  __simdia_vori(a,  __simdia_vseti(b)); }
00292   inline  __simdia_vecf operator|(const  __simdia_vecf &a, const  float &b) { return  __simdia_vorf(a,  __simdia_vsetf(b)); }
00293   inline __simdia_veclf operator|(const __simdia_veclf &a, const double &b) { return __simdia_vorlf(a, __simdia_vsetlf(b)); }
00294   inline  __simdia_veci operator|=( __simdia_veci &a, const    int &b) { a =  __simdia_vori(a,  __simdia_vseti(b)); return a; }
00295   inline  __simdia_vecf operator|=( __simdia_vecf &a, const  float &b) { a =  __simdia_vorf(a,  __simdia_vsetf(b)); return a; }
00296   inline __simdia_veclf operator|=(__simdia_veclf &a, const double &b) { a = __simdia_vorlf(a, __simdia_vsetlf(b)); return a; }
00297 
00298   
00299   inline  __simdia_veci operator&(const  __simdia_veci &a, const  __simdia_veci &b) { return  __simdia_vandi(a, b); }
00300   inline  __simdia_vecf operator&(const  __simdia_vecf &a, const  __simdia_vecf &b) { return  __simdia_vandf(a, b); }
00301   inline __simdia_veclf operator&(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vandlf(a, b); }
00302   inline  __simdia_veci operator&=( __simdia_veci &a, const  __simdia_veci &b) { a =  __simdia_vandi(a, b); return a; }
00303   inline  __simdia_vecf operator&=( __simdia_vecf &a, const  __simdia_vecf &b) { a =  __simdia_vandf(a, b); return a; }
00304   inline __simdia_veclf operator&=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vandlf(a, b); return a; }
00305 
00306   inline  __simdia_veci operator&(const  __simdia_veci &a, const    int &b) { return  __simdia_vandi(a,  __simdia_vseti(b)); }
00307   inline  __simdia_vecf operator&(const  __simdia_vecf &a, const  float &b) { return  __simdia_vandf(a,  __simdia_vsetf(b)); }
00308   inline __simdia_veclf operator&(const __simdia_veclf &a, const double &b) { return __simdia_vandlf(a, __simdia_vsetlf(b)); }
00309   inline  __simdia_veci operator&=( __simdia_veci &a, const    int &b) { a =  __simdia_vandi(a,  __simdia_vseti(b)); return a; }
00310   inline  __simdia_vecf operator&=( __simdia_vecf &a, const  float &b) { a =  __simdia_vandf(a,  __simdia_vsetf(b)); return a; }
00311   inline __simdia_veclf operator&=(__simdia_veclf &a, const double &b) { a = __simdia_vandlf(a, __simdia_vsetlf(b)); return a; }
00312 
00313   
00314   inline  __simdia_veci operator^(const  __simdia_veci &a, const  __simdia_veci &b) { return  __simdia_vxori(a, b); }
00315   inline  __simdia_vecf operator^(const  __simdia_vecf &a, const  __simdia_vecf &b) { return  __simdia_vxorf(a, b); }
00316   inline __simdia_veclf operator^(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vxorlf(a, b); }
00317   inline  __simdia_veci operator^=( __simdia_veci &a, const  __simdia_veci &b) { a =  __simdia_vxori(a, b); return a; }
00318   inline  __simdia_vecf operator^=( __simdia_vecf &a, const  __simdia_vecf &b) { a =  __simdia_vxorf(a, b); return a; }
00319   inline __simdia_veclf operator^=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vxorlf(a, b); return a; }
00320 
00321   inline  __simdia_veci operator^(const  __simdia_veci &a, const    int &b) { return  __simdia_vxori(a,  __simdia_vseti(b)); }
00322   inline  __simdia_vecf operator^(const  __simdia_vecf &a, const  float &b) { return  __simdia_vxorf(a,  __simdia_vsetf(b)); }
00323   inline __simdia_veclf operator^(const __simdia_veclf &a, const double &b) { return __simdia_vxorlf(a, __simdia_vsetlf(b)); }
00324   inline  __simdia_veci operator^=( __simdia_veci &a, const    int &b) { a =  __simdia_vxori(a,  __simdia_vseti(b)); return a; }
00325   inline  __simdia_vecf operator^=( __simdia_vecf &a, const  float &b) { a =  __simdia_vxorf(a,  __simdia_vsetf(b)); return a; }
00326   inline __simdia_veclf operator^=(__simdia_veclf &a, const double &b) { a = __simdia_vxorlf(a, __simdia_vsetlf(b)); return a; }
00327 
00328 #endif 
00329 
00333 
00334 
00335 
00336 
00337 
00338 #if defined(__SSE2__) && (!(SIMDIA_FORCE_NO_SSE)) && !defined(_CRAYC)
00339 
00340   
00341 
00342 
00343 
00344   
00345   typedef __m128i  simdia_veci;
00346   typedef  __m128  simdia_vecf;
00347   typedef __m128d simdia_veclf;
00348 
00349   
00350   
00351   inline  simdia_veci  simdia_vinserti( simdia_veci v, const    int s, const int i) {  simdia_veci r = v;    int* rPtr = (   int*)(&r); rPtr[i] = s; return r; }
00352   inline  simdia_vecf  simdia_vinsertf( simdia_vecf v, const  float s, const int i) {  simdia_vecf r = v;  float* rPtr = ( float*)(&r); rPtr[i] = s; return r; }
00353   inline simdia_veclf simdia_vinsertlf(simdia_veclf v, const double s, const int i) { simdia_veclf r = v; double* rPtr = (double*)(&r); rPtr[i] = s; return r; }
00354 
00355   
00356   
00357   inline    int  vextracti( simdia_veci v, const int i) { return ((   int*)(&v))[i]; }
00358   inline  float  vextractf( simdia_vecf v, const int i) { return (( float*)(&v))[i]; }
00359   inline double vextractlf(simdia_veclf v, const int i) { return ((double*)(&v))[i]; }
00360 
00361   
00362   #define  simdia_vseti(a)  (_mm_set1_epi32((int)(a)))
00363   #define  simdia_vsetf(a)  (_mm_set1_ps((float)(a)))
00364   #define simdia_vsetlf(a)  (_mm_set1_pd((double)(a)))
00365 
00366   
00367   #define  simdia_const_vzeroi  (_mm_setzero_si128())
00368   #define  simdia_const_vzerof  (_mm_setzero_ps())
00369   #define simdia_const_vzerolf  (_mm_setzero_pd())
00370 
00371   
00372   #define  simdia_const_vonei  (simdia_vseti(1))
00373   #define  simdia_const_vonef  (simdia_vsetf(1.0f))
00374   #define simdia_const_vonelf  (simdia_vsetlf(1.0))
00375 
00376   
00377   #define  simdia_const_vtwoi  (simdia_vseti(2))
00378   #define  simdia_const_vtwof  (simdia_vsetf(2.0f))
00379   #define simdia_const_vtwolf  (simdia_vsetlf(2.0))
00380 
00381   
00382   #define  simdia_const_vnegonei  (simdia_vseti(-1))
00383   #define  simdia_const_vnegonef  (simdia_vsetf(-1.0f))
00384   #define simdia_const_vnegonelf  (simdia_vsetlf(-1.0))
00385 
00386   
00387   
00388   inline  simdia_veci  simdia_vrothi(const  simdia_veci &a, int s) {  simdia_veci b;    int* a_ptr = (   int*)(&a);    int* b_ptr = (   int*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0-s)&0x3]; b_ptr[1] = a_ptr[(1-s)&0x3]; b_ptr[2] = a_ptr[(2-s)&0x3]; b_ptr[3] = a_ptr[(3-s)&0x3]; return b; }
00389   inline  simdia_vecf  simdia_vrothf(const  simdia_vecf &a, int s) {  simdia_vecf b;  float* a_ptr = ( float*)(&a);  float* b_ptr = ( float*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0-s)&0x3]; b_ptr[1] = a_ptr[(1-s)&0x3]; b_ptr[2] = a_ptr[(2-s)&0x3]; b_ptr[3] = a_ptr[(3-s)&0x3]; return b; }
00390   inline simdia_veclf simdia_vrothlf(const simdia_veclf &a, int s) { simdia_veclf b; double* a_ptr = (double*)(&a); double* b_ptr = (double*)(&b); s &= 0x1; b_ptr[0] = a_ptr[(0-s)&0x1]; b_ptr[1] = a_ptr[(1-s)&0x1]; return b; }
00391   inline  simdia_veci  simdia_vrotli(const  simdia_veci &a, int s) {  simdia_veci b;    int* a_ptr = (   int*)(&a);    int* b_ptr = (   int*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0+s)&0x3]; b_ptr[1] = a_ptr[(1+s)&0x3]; b_ptr[2] = a_ptr[(2+s)&0x3]; b_ptr[3] = a_ptr[(3+s)&0x3]; return b; }
00392   inline  simdia_vecf  simdia_vrotlf(const  simdia_vecf &a, int s) {  simdia_vecf b;  float* a_ptr = ( float*)(&a);  float* b_ptr = ( float*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0+s)&0x3]; b_ptr[1] = a_ptr[(1+s)&0x3]; b_ptr[2] = a_ptr[(2+s)&0x3]; b_ptr[3] = a_ptr[(3+s)&0x3]; return b; }
00393   inline simdia_veclf simdia_vrotllf(const simdia_veclf &a, int s) { simdia_veclf b; double* a_ptr = (double*)(&a); double* b_ptr = (double*)(&b); s &= 0x1; b_ptr[0] = a_ptr[(0+s)&0x1]; b_ptr[1] = a_ptr[(1+s)&0x1]; return b; }
00394 
00395   
00396   #define  simdia_vaddi(a, b)  (_mm_add_epi32((a), (b)))
00397   #define  simdia_vaddf(a, b)  (_mm_add_ps((a), (b)))
00398   #define simdia_vaddlf(a, b)  (_mm_add_pd((a), (b)))
00399 
00400   
00401   #define  simdia_vsubi(a, b)  (_mm_sub_epi32((a), (b)))
00402   #define  simdia_vsubf(a, b)  (_mm_sub_ps((a), (b)))
00403   #define simdia_vsublf(a, b)  (_mm_sub_pd((a), (b)))
00404 
00405   
00406   #define    simdia_vmulf(a, b)  (_mm_mul_ps((a), (b)))
00407   #define   simdia_vmullf(a, b)  (_mm_mul_pd((a), (b)))
00408 
00409   
00410   #define   simdia_vdivf(a, b)  (_mm_div_ps((a), (b)))
00411   #define  simdia_vdivlf(a, b)  (_mm_div_pd((a), (b)))
00412 
00413   
00414   #define  simdia_vmaddf(a, b, c)  ( vaddf( vmulf((a), (b)), (c)))
00415   #define simdia_vmaddlf(a, b, c)  (vaddlf(vmullf((a), (b)), (c)))
00416 
00417   
00418   #define  simdia_vrecipf(a)  (_mm_rcp_ps(a))
00419   inline simdia_veclf simdia_vreciplf(const simdia_veclf a) { simdia_veclf r; double* a_ptr =  (double*)(&a); double* r_ptr = (double*)(&r); r_ptr[0] = 1.0f /  a_ptr[0]; r_ptr[1] = 1.0f / a_ptr[1]; return r; }
00420 
00421   
00422   #define  simdia_vsqrtf(a)  (_mm_sqrt_ps(a))
00423   #define simdia_vsqrtlf(a)  (_mm_sqrt_pd(a))
00424 
00425   
00426   #define  simdia_vrsqrtf(a)  (_mm_rsqrt_ps(a))
00427   #define simdia_vrsqrtlf(a)  (vreciplf(vsqrtlf(a)))
00428 
00429   
00430   #define  simdia_vnoti(a)  (_mm_xor_si128((a), simdia_const_vnegonei))
00431   #define  simdia_vnotf(a)  (_mm_xor_ps((a), simdia_const_vnegonei))
00432   #define simdia_vnotlf(a)  (_mm_xor_pd((a), simdia_const_vnegonei))
00433 
00434   
00435   #define  simdia_vori(a, b)  (_mm_or_si128((a), (b)))
00436   #define  simdia_vorf(a, b)  (_mm_or_ps((a), (b)))
00437   #define simdia_vorlf(a, b)  (_mm_or_pd((a), (b)))
00438 
00439   
00440   #define  simdia_vnori(a, b)  ( simdia_vnoti( simdia_vori((a), (b))))
00441   #define  simdia_vnorf(a, b)  ( simdia_vnotf( simdia_vorf((a), (b))))
00442   #define simdia_vnorlf(a, b)  (simdia_vnotlf(simdia_vorlf((a), (b))))
00443 
00444   
00445   #define  simdia_vandi(a, b)  (_mm_and_si128((a), (b)))
00446   #define  simdia_vandf(a, b)  (_mm_and_ps((a), (b)))
00447   #define simdia_vandlf(a, b)  (_mm_and_pd((a), (b)))
00448 
00449   
00450   #define  simdia_vnandi(a, b)  ( simdia_vnoti( simdia_vandi((a), (b))))
00451   #define  simdia_vnandf(a, b)  ( simdia_vnotf( simdia_vandf((a), (b))))
00452   #define simdia_vnandlf(a, b)  (simdia_vnotlf(simdia_vandlf((a), (b))))
00453 
00454   
00455   #define  simdia_vxori(a, b)  (_mm_xor_si128((a), (b)))
00456   #define  simdia_vxorf(a, b)  (_mm_xor_ps((a), (b)))
00457   #define simdia_vxorlf(a, b)  (_mm_xor_pd((a), (b)))
00458 
00459   
00460   #define  simdia_vnxori(a, b)  ( simdia_vnoti( simdia_vxori((a), (b))))
00461   #define  simdia_vnxorf(a, b)  ( simdia_vnotf( simdia_vxorf((a), (b))))
00462   #define simdia_vnxorlf(a, b)  (simdia_vnotlf(simdia_vxorlf((a), (b))))
00463 
00464   
00465   #define  simdia_vcmpeqi(a, b)  ((simdia_veci)(_mm_cmpeq_epi32((a), (b))))
00466   #define  simdia_vcmpeqf(a, b)  ((simdia_veci)(_mm_cmpeq_ps((a), (b))))
00467   #define simdia_vcmpeqlf(a, b)  ((simdia_veci)(_mm_cmpeq_pd((a), (b))))
00468 
00469   
00470   #define  simdia_vcmpgti(a, b)  ((simdia_veci)(_mm_cmpgt_epi32((a), (b))))
00471   #define  simdia_vcmpgtf(a, b)  ((simdia_veci)(_mm_cmpgt_ps((a), (b))))
00472   #define simdia_vcmpgtlf(a, b)  ((simdia_veci)(_mm_cmpgt_pd((a), (b))))
00473 
00474   
00475   #define  simdia_vcmpgei(a, b)  ((simdia_veci)(_mm_cmpge_epi32((a), (b))))
00476   #define  simdia_vcmpgef(a, b)  ((simdia_veci)(_mm_cmpge_ps((a), (b))))
00477   #define simdia_vcmpgelf(a, b)  ((simdia_veci)(_mm_cmpge_pd((a), (b))))
00478 
00479   
00480   #define  simdia_vcmplti(a, b)  ((simdia_veci)(_mm_cmplt_epi32((a), (b))))
00481   #define  simdia_vcmpltf(a, b)  ((simdia_veci)(_mm_cmplt_ps((a), (b))))
00482   #define simdia_vcmpltlf(a, b)  ((simdia_veci)(_mm_cmplt_pd((a), (b))))
00483 
00484   
00485   #define  simdia_vcmplei(a, b)  ((simdia_veci)(_mm_cmple_epi32((a), (b))))
00486   #define  simdia_vcmplef(a, b)  ((simdia_veci)(_mm_cmple_ps((a), (b))))
00487   #define simdia_vcmplelf(a, b)  ((simdia_veci)(_mm_cmple_pd((a), (b))))
00488 
00489 
00490 
00491 
00492 
00493 
00494 
00495 
00496 #elif (CMK_CELL_SPE != 0) && (!(SIMDIA_FORCE_NO_SPE_SIMD))
00497 
00498   
00499   typedef vector signed int  simdia_veci;
00500   typedef vector float       simdia_vecf;
00501   typedef vector double     simdia_veclf;
00502 
00503   
00504   #define  simdia_vinserti(v, s, i)  (spu_insert((s), (v), (i)))
00505   #define  simdia_vinsertf(v, s, i)  (spu_insert((s), (v), (i)))
00506   #define simdia_vinsertlf(v, s, i)  (spu_insert((s), (v), (i)))
00507 
00508   
00509   #define  simdia_vextracti(v, i)  (spu_extract((v), (i)))
00510   #define  simdia_vextractf(v, i)  (spu_extract((v), (i)))
00511   #define simdia_vextractlf(v, i)  (spu_extract((v), (i)))
00512 
00513   
00514   #define  simdia_vseti(a)  (spu_splats((int)(a)))
00515   #define  simdia_vsetf(a)  (spu_splats((float)(a)))
00516   #define simdia_vsetlf(a)  (spu_splats((double)(a)))
00517 
00518   
00519   #define  simdia_const_vzeroi  (vseti(0))
00520   #define  simdia_const_vzerof  (vsetf(0.0f))
00521   #define simdia_const_vzerolf  (vsetlf(0.0))
00522 
00523   
00524   #define  simdia_const_vonei  (vseti(1))
00525   #define  simdia_const_vonef  (vsetf(1.0f))
00526   #define simdia_const_vonelf  (vsetlf(1.0))
00527 
00528   
00529   #define  simdia_const_vtwoi  (vseti(2))
00530   #define  simdia_const_vtwof  (vsetf(2.0f))
00531   #define simdia_const_vtwolf  (vsetlf(2.0))
00532 
00533   
00534   #define  simdia_const_vnegonei  (vseti(-1))
00535   #define  simdia_const_vnegonef  (vsetf(-1.0f))
00536   #define simdia_const_vnegonelf  (vsetlf(-1.0))
00537 
00538   
00539   #define   simdia_vrothi(a, s) (spu_rlqwbyte((a), (0x10-(((s)&0x3)<<2)) ))
00540   #define   simdia_vrothf(a, s) (spu_rlqwbyte((a), (0x10-(((s)&0x3)<<2)) ))
00541   #define  simdia_vrothlf(a, s) (spu_rlqwbyte((a),       (((s)&0x1)<<3)  ))
00542   #define   simdia_vrotli(a, s) (spu_rlqwbyte((a), ((s)&0x3)<<2))
00543   #define   simdia_vrotlf(a, s) (spu_rlqwbyte((a), ((s)&0x3)<<2))
00544   #define  simdia_vrotllf(a, s) (spu_rlqwbyte((a), ((s)&0x1)<<3))
00545 
00546   
00547   #define  simdia_vaddi(a, b)  (spu_add((a), (b)))
00548   #define  simdia_vaddf(a, b)  (spu_add((a), (b)))
00549   #define simdia_vaddlf(a, b)  (spu_add((a), (b)))
00550 
00551   
00552   #define  simdia_vsubi(a, b)  (spu_sub((a), (b)))
00553   #define  simdia_vsubf(a, b)  (spu_sub((a), (b)))
00554   #define simdia_vsublf(a, b)  (spu_sub((a), (b)))
00555 
00556   
00557   #define   simdia_vmulf(a, b)  (spu_mul((a), (b)))
00558   #define  simdia_vmullf(a, b)  (spu_mul((a), (b)))
00559 
00560   
00561   #define simdia_vdivf(a, b)  (spu_mul((a), spu_re(b)))
00562   inline simdia_veclf simdia_vdivlf(const simdia_veclf a, const simdia_veclf b) { simdia_veclf r = { 0.0, 0.0 }; spu_insert((spu_extract(a, 0) / spu_extract(b, 0)), r, 0); spu_insert((spu_extract(a, 1) / spu_extract(b, 1)), r, 1); return r; }
00563 
00564   
00565   #define  simdia_vmaddf(a, b, c)  (spu_madd((a), (b), (c)))
00566   #define simdia_vmaddlf(a, b, c)  (spu_madd((a), (b), (c)))
00567 
00568   
00569   #define  simdia_vrecipf(a)  (spu_re(a))
00570   inline simdia_veclf simdia_vreciplf(const simdia_veclf a, const simdia_veclf b) { simdia_veclf r = { 0.0, 0.0 }; spu_insert((1.0f / spu_extract(a, 0)), r, 0); spu_insert((1.0f / spu_extract(a, 1)), r, 1); return r; }
00571 
00572   
00573   #define simdia_vsqrtf(a) (spu_re(spu_rsqrte(a)))
00574   inline simdia_veclf simdia_vsqrtlf(const simdia_veclf a, const simdia_veclf b) { simdia_veclf r = { 0.0, 0.0 }; spu_insert(sqrt(spu_extract(a, 0)), r, 0); spu_insert(sqrt(spu_extract(a, 1)), r, 1); return r; }
00575 
00576   
00577   #define simdia_vrsqrtf(a) (spu_rsqrte(a))
00578   inline simdia_veclf simdia_vrsqrtlf(const simdia_veclf a, const simdia_veclf b) { simdia_veclf r = { 0.0, 0.0 }; spu_insert((1.0f / sqrt(spu_extract(a, 0))), r, 0); spu_insert((1.0f / sqrt(spu_extract(a, 1))), r, 1); return r; }
00579 
00580   
00581   #define  simdia_vnoti(a)  (spu_nor((a), (a)))
00582   #define  simdia_vnotf(a)  (spu_nor((a), (a)))
00583   #define simdia_vnotlf(a)  (spu_nor((a), (a)))
00584 
00585   
00586   #define  simdia_vori(a, b)  (spu_or((a), (b)))
00587   #define  simdia_vorf(a, b)  (spu_or((a), (b)))
00588   #define simdia_vorlf(a, b)  (spu_or((a), (b)))
00589 
00590   
00591   #define  simdia_vnori(a, b)  (spu_nor((a), (b)))
00592   #define  simdia_vnorf(a, b)  (spu_nor((a), (b)))
00593   #define simdia_vnorlf(a, b)  (spu_nor((a), (b)))
00594 
00595   
00596   #define  simdia_vandi(a, b)  (spu_and((a), (b)))
00597   #define  simdia_vandf(a, b)  (spu_and((a), (b)))
00598   #define simdia_vandlf(a, b)  (spu_and((a), (b)))
00599 
00600   
00601   #define  simdia_vnandi(a, b)  (spu_nand((a), (b)))
00602   #define  simdia_vnandf(a, b)  (spu_nand((a), (b)))
00603   #define simdia_vnandlf(a, b)  (spu_nand((a), (b)))
00604 
00605   
00606   #define  simdia_vxori(a, b)  (spu_xor((a), (b)))
00607   #define  simdia_vxorf(a, b)  (spu_xor((a), (b)))
00608   #define simdia_vxorlf(a, b)  (spu_xor((a), (b)))
00609 
00610   
00611   #define  simdia_vnxori(a, b)  ( simdia_vnoti( simdia_vxori((a), (b))))
00612   #define  simdia_vnxorf(a, b)  ( simdia_vnotf( simdia_vxorf((a), (b))))
00613   #define simdia_vnxorlf(a, b)  (simdia_vnotlf(simdia_vxorlf((a), (b))))
00614 
00615   
00616   #define  simdia_vcmpeqi(a, b)  ((simdia_veci)(spu_cmpeq((a), (b))))
00617   #define  simdia_vcmpeqf(a, b)  ((simdia_veci)(spu_cmpeq((a), (b))))
00618   #define simdia_vcmpeqlf(a, b)  ((simdia_veci)(spu_cmpeq((a), (b))))
00619 
00620   
00621   #define  simdia_vcmpgti(a, b)  ((simdia_veci)(spu_cmpgt((a), (b))))
00622   #define  simdia_vcmpgtf(a, b)  ((simdia_veci)(spu_cmpgt((a), (b))))
00623   #define simdia_vcmpgtlf(a, b)  ((simdia_veci)(spu_cmpgt((a), (b))))
00624 
00625   
00626 
00627   
00628   #define  simdia_vcmpgei(a, b)  (spu_or( simdia_vcmpeqi((a), (b)),  simdia_vcmpgti((a), (b))))
00629   #define  simdia_vcmpgef(a, b)  (spu_or( simdia_vcmpeqf((a), (b)),  simdia_vcmpgtf((a), (b))))
00630   #define simdia_vcmpgelf(a, b)  (spu_or(simdia_vcmpeqlf((a), (b)), simdia_vcmpgtlf((a), (b))))
00631 
00632   
00633   #define  simdia_vcmplti(a, b)  (spu_nor( simdia_vcmpgti((a), (b)),  simdia_vcmpeqi((a), (b))))
00634   #define  simdia_vcmpltf(a, b)  (spu_nor( simdia_vcmpgtf((a), (b)),  simdia_vcmpeqf((a), (b))))
00635   #define simdia_vcmpltlf(a, b)  (spu_nor(simdia_vcmpgtlf((a), (b)), simdia_vcmpeqlf((a), (b))))
00636 
00637   
00638   #define  simdia_vcmplei(a, b)  (spu_nor( simdia_vcmpgti((a), (b)),  simdia_const_vzeroi))
00639   #define  simdia_vcmplef(a, b)  (spu_nor( simdia_vcmpgtf((a), (b)),  simdia_const_vzerof))
00640   #define simdia_vcmplelf(a, b)  (spu_nor(simdia_vcmpgtlf((a), (b)), simdia_const_vzerolf))
00641 
00642 
00643 
00644 
00645 
00646 
00647 
00648 #elif defined(__VEC__) && (!(SIMDIA_FORCE_NO_ALTIVEC))
00649 
00650   
00651   typedef vector signed int simdia_veci;
00652   typedef vector float      simdia_vecf;
00653   #ifdef _ARCH_PWR7
00654 
00662     typedef vector double  simdia_veclf;
00663   #else
00664     typedef __simdia_veclf simdia_veclf;
00665   #endif
00666 
00667   
00668   
00669 
00670   #ifdef _ARCH_PWR7 
00671     
00672     #define  simdia_vinserti(a, b, c)  (vec_insert((b)), ((a)), ((c)))
00673     #define  simdia_vinsertf(a, b, c)  (vec_insert((b)), ((a)), ((c)))
00674     #define  simdia_vinsertlf(a, b, c)  (vec_insert((b)), ((a)), ((c)))
00675   #else
00676     inline  simdia_veci  simdia_vinserti( simdia_veci v, const    int s, const int i) {  simdia_veci r = v;    int* rPtr = (   int*)(&r); rPtr[i] = s; return r; }
00677     inline  simdia_vecf  simdia_vinsertf( simdia_vecf v, const  float s, const int i) {  simdia_vecf r = v;  float* rPtr = ( float*)(&r); rPtr[i] = s; return r; }
00678     #define simdia_vinsertlf __simdia_vinsertlf
00679   #endif
00680 
00681   
00682   #ifdef _ARCH_PWR7 
00683     #define  simdia_vextracti(a, b)  (vec_extract((a), (b)))
00684     #define  simdia_vextractf(a, b)  (vec_extract((a), (b)))
00685     #define  simdia_vextractlf(a, b)  (vec_extract((a), (b)))
00686   #else
00687     
00688     inline    int  simdia_vextracti( simdia_veci v, const int i) {    int* vPtr = (   int*)(&v); return vPtr[i]; }
00689     inline  float  simdia_vextractf( simdia_vecf v, const int i) {  float* vPtr = ( float*)(&v); return vPtr[i]; }
00690     #define simdia_vextractlf __simdia_vextractlf
00691   #endif
00692 
00693   
00694   #ifdef _ARCH_PWR7 
00695     #define  simdia_vseti(a)  (vec_promote((a), 0))
00696     #define  simdia_vsetf(a)  (vec_promote((a), 0))
00697     #define  simdia_vsetlf(a)  (vec_promote((a), 0))
00698   #else
00699     
00700 
00701 
00702 
00703 
00704 
00705 
00706 
00707 
00708     inline simdia_veci simdia_vseti(const   int a) { __simdia_veci r; r.v0 = a; return vec_splat(*((simdia_veci*)(&r)), 0); }
00709     inline simdia_vecf simdia_vsetf(const float a) { __simdia_vecf r; r.v0 = a; return vec_splat(*((simdia_vecf*)(&r)), 0); }
00710     #define simdia_vsetlf __simdia_vsetlf
00711   #endif
00712   
00713   inline vector unsigned char simdia_vset16uc(const unsigned char c) { vector unsigned char r __attribute__((aligned(16))); ((unsigned char*)(&r))[0] = c; return vec_splat(r, 0); }
00714 
00715   
00716   #define  simdia_const_vzeroi  (vec_splat_s32(0))
00717   #define  simdia_const_vzerof  (vec_ctf(vec_splat_s32(0), 0))
00718   #ifdef _ARCH_PWR7 
00719     #define simdia_const_vzerolf  (vec_splats(0))
00720   #else
00721     #define simdia_const_vzerolf  (__simdia_const_vzerolf)
00722   #endif
00723 
00724   
00725   #define  simdia_const_vonei  (vec_splat_s32(1))
00726   #define  simdia_const_vonef  (vec_ctf(vec_splat_s32(1), 0))
00727   #ifdef _ARCH_PWR7 
00728     #define simdia_const_vonelf  (vec_splats(1))
00729   #else
00730     #define simdia_const_vonelf  (__simdia_const_vonelf)
00731   #endif
00732 
00733   
00734   #define  simdia_const_vtwoi  (vec_splat_s32(2))
00735   #define  simdia_const_vtwof  (vec_ctf(vec_splat_s32(2), 0))
00736   #ifdef _ARCH_PWR7 
00737     #define simdia_const_vtwolf  (vec_splats(2))
00738   #else
00739     #define simdia_const_vtwolf  (__simdia_const_vtwolf)
00740   #endif
00741 
00742   
00743   #define  simdia_const_vnegonei  (vec_splat_s32(-1))
00744   #define  simdia_const_vnegonef  (vec_ctf(vec_splat_s32(-1), 0))
00745   #ifdef _ARCH_PWR7 
00746     #define simdia_const_vnegonelf  (vec_splats(-1))
00747   #else
00748     #define simdia_const_vnegonelf  (__const_veclf)
00749   #endif
00750 
00751   
00752   #define __simdia_vrotlbytes(a, s)  (vec_or(vec_slo((a), simdia_vset16uc(((s) & 0xf) << 3)), vec_sro((a), simdia_set16uc((16 - ((s) & 0xf)) << 3))))
00753   #define __simdia_vrotrbytes(a, s)  (vec_or(vec_sro((a), simdia_vset16uc(((s) & 0xf) << 3)), vec_slo((a), simdia_set16uc((16 - ((s) & 0xf)) << 3))))
00754   #define  simdia_vrotli(a, s)  __simdia_vrotlbytes((a), ((s) << 2))
00755   #define  simdia_vrotlf(a, s)  __simdia_vrotlbytes((a), ((s) << 2))
00756   #define simdia_vrotllf(a, s)  __simdia_vrotlbytes((a), ((s) << 3))
00757   #define  simdia_vrothi(a, s)  __simdia_vrotrbytes((a), ((s) << 2))
00758   #define  simdia_vrothf(a, s)  __simdia_vrotrbytes((a), ((s) << 2))
00759   #define simdia_vrothlf(a, s)  __simdia_vrotrbytes((a), ((s) << 3))
00760 
00761   
00762   #define  simdia_vaddi(a, b)  (vec_add((a), (b)))
00763   #define  simdia_vaddf(a, b)  (vec_add((a), (b)))
00764   #ifdef _ARCH_PWR7 
00765     #define  simdia_vaddlf(a, b)  (vec_add((a), (b)))
00766   #else
00767     #define simdia_vaddlf __simdia_vaddlf
00768   #endif
00769 
00770   
00771   #define  simdia_vsubi(a, b)  (vec_sub((a), (b)))
00772   #define  simdia_vsubf(a, b)  (vec_sub((a), (b)))
00773   #ifdef _ARCH_PWR7 
00774     #define  simdia_vsublf(a, b)  (vec_sub((a), (b)))
00775   #else
00776     #define simdia_vsublf __simdia_vsublf
00777   #endif
00778 
00779   
00780   
00781   #ifdef _ARCH_PWR7 
00782     #define  simdia_vmulf(a, b)  (vec_mul((a), (b)))
00783     #define  simdia_vmullf(a, b)  (vec_mul((a), (b)))
00784   #else
00785     #define  simdia_vmulf(a, b)  (vec_madd((a), (b), vec_xor((a), (a))))
00786     #define  simdia_vmullf __simdia_vmullf
00787   #endif
00788 
00789   
00790   #ifdef _ARCH_PWR7 
00791     #define simdia_vdivf(a, b)  (vec_div((a)), ((b)))
00792     #define simdia_vdivlf(a, b)  (vec_div((a)), ((b)))
00793   #else
00794     #define simdia_vdivf(a, b)  (simdia_vmulf((a), vec_re(b)))
00795     #define simdia_vdivlf __simdia_vdivlf
00796   #endif
00797 
00798   
00799   #define simdia_vmaddf(a, b, c)  (vec_madd((a), (b), (c)))
00800   #ifdef _ARCH_PWR7 
00801     #define simdia_vmaddlf(a, b, c)  (vec_madd((a), (b), (c)))
00802   #else
00803     #define simdia_vmaddlf __simdia_vmaddlf
00804   #endif
00805 
00806   
00807   #define simdia_vrecipf(a)  (vec_re(a))
00808   #ifdef _ARCH_PWR7 
00809     #define simdia_vreciplf(a)  (vec_re(a))
00810   #else
00811     #define simdia_vreciplf __simdia_vreciplf
00812   #endif
00813 
00814   
00815   #define simdia_vsqrtf(a)  (vec_re(vec_rsqrte(a)))
00816   #ifdef _ARCH_PWR7 
00817     #define simdia_vsqrtlf(a)  (vec_sqrt(a))
00818   #else
00819     #define simdia_vsqrtlf __simdia_vsqrtlf
00820   #endif
00821 
00822   
00823   #define simdia_vrsqrtf(a)  (vec_rsqrte(a))
00824   #ifdef _ARCH_PWR7 
00825     #define simdia_vrsqrtlf(a)  (vec_rsqrte(a))
00826   #else
00827     #define simdia_vrsqrtlf __simdia_vrsqrtlf
00828   #endif
00829 
00830   
00831   #ifdef _ARCH_PWR7 
00832     #define simdia_vnoti(a)  (vec_neg(a))
00833     #define simdia_vnotf(a)  (vec_neg(a))
00834     #define simdia_vnotlf(a)  (vec_neg(a))
00835   #else
00836     #define simdia_vnoti(a)  (vec_xor((a), simdia_const_vnegonei))
00837     #define simdia_vnotf(a)  (vec_xor((a), simdia_const_vnegonei))
00838     #define simdia_vnotlf __simdia_vnotlf
00839   #endif
00840 
00841   
00842   #define simdia_vori(a, b)  (vec_or((a), (b)))
00843   #define simdia_vorf(a, b)  (vec_or((a), (b)))
00844   #ifdef _ARCH_PWR7 
00845     #define simdia_vorlf(a, b)  (vec_or((a), (b)))
00846   #else
00847     #define simdia_vorlf __simdia_vorlf
00848   #endif
00849 
00850   
00851   #define simdia_vnori(a, b)  (vec_nor((a), (b)))
00852   #define simdia_vnorf(a, b)  (vec_nor((a), (b)))
00853   #ifdef _ARCH_PWR7 
00854     #define simdia_vnorlf(a, b)  (vec_nor((a), (b)))
00855   #else
00856     #define simdia_vnorlf __simdia_vnorlf
00857   #endif
00858 
00859   
00860   #define simdia_vandi(a, b)  (vec_and((a), (b)))
00861   #define simdia_vandf(a, b)  (vec_and((a), (b)))
00862   #ifdef _ARCH_PWR7 
00863     #define simdia_vandlf(a, b)  (vec_and((a), (b)))
00864   #else
00865     #define simdia_vandlf __simdia_vandlf
00866   #endif
00867 
00868   
00869   #define simdia_vnandi(a, b)  (simdia_vnoti(simdia_vandi((a), (b))))
00870   #define simdia_vnandf(a, b)  (simdia_vnotf(simdia_vandf((a), (b))))
00871   #ifdef _ARCH_PWR7 
00872     #define simdia_vnandlf(a, b)  (simdia_vnotf(simdia_vandf((a), (b))))
00873   #else
00874     #define simdia_vnandlf __simdia_vnandlf
00875   #endif
00876 
00877   
00878   #define simdia_vxori(a, b)  (vec_xor((a), (b)))
00879   #define simdia_vxorf(a, b)  (vec_xor((a), (b)))
00880   #ifdef _ARCH_PWR7 
00881     #define simdia_vxorlf(a, b)  (vec_xor((a), (b)))
00882   #else
00883     #define simdia_vxorlf __simdia_vxorlf
00884   #endif
00885 
00886   
00887   #define simdia_vnxori(a, b)  (simdia_vnoti(simdia_vxori((a), (b))))
00888   #define simdia_vnxorf(a, b)  (simdia_vnotf(simdia_vxorf((a), (b))))
00889   #ifdef _ARCH_PWR7 
00890     #define simdia_vnxorlf(a, b)  (simdia_vnotlf(simdia_vxorf((a), (b))))
00891   #else
00892     #define simdia_vnxorlf __simdia_vnxorlf
00893   #endif
00894 
00895   
00896   #define  simdia_vcmpeqi(a, b)  ((simdia_veci)(vec_cmpeq((a), (b))))
00897   #define  simdia_vcmpeqf(a, b)  ((simdia_veci)(vec_cmpeq((a), (b))))
00898   #ifdef _ARCH_PWR7 
00899     #define  simdia_vcmpeqlf(a, b)  ((simdia_veci)(vec_cmpeq((a), (b))))
00900   #else
00901     #define simdia_vcmpeqlf __simdia_vcmpeqlf
00902   #endif
00903 
00904   
00905   #define  simdia_vcmpgti(a, b)  ((simdia_veci)(vec_cmpgt((a), (b))))
00906   #define  simdia_vcmpgtf(a, b)  ((simdia_veci)(vec_cmpgt((a), (b))))
00907   #ifdef _ARCH_PWR7 
00908     #define  simdia_vcmpgtlf(a, b)  ((simdia_veci)(vec_cmpgt((a), (b))))
00909   #else
00910     #define simdia_vcmpgtlf __simdia_vcmpgtlf
00911   #endif
00912 
00913   
00914   #define  simdia_vcmpgei(a, b)  ((simdia_veci)(vec_cmpge((a), (b))))
00915   #define  simdia_vcmpgef(a, b)  ((simdia_veci)(vec_cmpge((a), (b))))
00916   #ifdef _ARCH_PWR7 
00917     #define  simdia_vcmpgelf(a, b)  ((simdia_veci)(vec_cmpge((a), (b))))
00918   #else
00919     #define simdia_vcmpgelf __simdia_vcmpgelf
00920   #endif
00921 
00922   
00923   #define  simdia_vcmplti(a, b)  ((simdia_veci)(vec_cmplt((a), (b))))
00924   #define  simdia_vcmpltf(a, b)  ((simdia_veci)(vec_cmplt((a), (b))))
00925   #ifdef _ARCH_PWR7 
00926     #define  simdia_vcmpltlf(a, b)  ((simdia_veci)(vec_cmplt((a), (b))))
00927   #else
00928     #define simdia_vcmpltlf __simdia_vcmpltlf
00929   #endif
00930 
00931   
00932   #define  simdia_vcmplei(a, b)  ((simdia_veci)(vec_cmple((a), (b))))
00933   #define  simdia_vcmplef(a, b)  ((simdia_veci)(vec_cmple((a), (b))))
00934   #ifdef _ARCH_PWR7 
00935     #define  simdia_vcmplelf(a, b)  ((simdia_veci)(vec_cmple((a), (b))))
00936     
00937     
00938   #else
00939     #define simdia_vcmplelf __simdia_vcmplelf
00940   #endif
00941 
00942 
00943 
00944 
00945 
00946 
00947 #else
00948 
00949   
00950   typedef   __simdia_veci   simdia_veci;
00951   typedef   __simdia_vecf   simdia_vecf;
00952   typedef  __simdia_veclf  simdia_veclf;
00953 
00954   
00955   #define  simdia_vinserti  __simdia_vinserti
00956   #define  simdia_vinsertf  __simdia_vinsertf
00957   #define simdia_vinsertlf __simdia_vinsertlf
00958 
00959   
00960   #define  simdia_vextracti  __simdia_vextracti
00961   #define  simdia_vextractf  __simdia_vextractf
00962   #define simdia_vextractlf __simdia_vextractlf
00963 
00964   
00965   #define  simdia_vseti  __simdia_vseti
00966   #define  simdia_vsetf  __simdia_vsetf
00967   #define simdia_vsetlf __simdia_vsetlf
00968 
00969   
00970   #define  simdia_const_vzeroi  __simdia_const_vzeroi
00971   #define  simdia_const_vzerof  __simdia_const_vzerof
00972   #define simdia_const_vzerolf __simdia_const_vzerolf
00973 
00974   
00975   #define  simdia_const_vonei  __simdia_const_vonei
00976   #define  simdia_const_vonef  __simdia_const_vonef
00977   #define simdia_const_vonelf __simdia_const_vonelf
00978 
00979   
00980   #define  simdia_const_vtwoi  __simdia_const_vtwoi
00981   #define  simdia_const_vtwof  __simdia_const_vtwof
00982   #define simdia_const_vtwolf __simdia_const_vtwolf
00983 
00984   
00985   #define  simdia_const_vnegonei  __simdia_const_vnegonei
00986   #define  simdia_const_vnegonef  __simdia_const_vnegonef
00987   #define simdia_const_vnegonelf __simdia_const_vnegonelf
00988 
00989   
00990   #define  simdia_vrothi  __simdia_vrothi
00991   #define  simdia_vrothf  __simdia_vrothf
00992   #define simdia_vrothlf __simdia_vrothlf
00993   #define  simdia_vrotli  __simdia_vrotli
00994   #define  simdia_vrotlf  __simdia_vrotlf
00995   #define simdia_vrotllf __simdia_vrotllf
00996   
00997   
00998   #define  simdia_vaddi  __simdia_vaddi
00999   #define  simdia_vaddf  __simdia_vaddf
01000   #define simdia_vaddlf __simdia_vaddlf
01001 
01002   
01003   #define  simdia_vsubi  __simdia_vsubi
01004   #define  simdia_vsubf  __simdia_vsubf
01005   #define simdia_vsublf __simdia_vsublf
01006 
01007   
01008   #define  simdia_vmulf   __simdia_vmulf
01009   #define simdia_vmullf  __simdia_vmullf
01010 
01011   
01012   #define  simdia_vdivf   __simdia_vdivf
01013   #define simdia_vdivlf  __simdia_vdivlf
01014 
01015   
01016   #define  simdia_vmaddf  __simdia_vmaddf
01017   #define simdia_vmaddlf __simdia_vmaddlf
01018 
01019   
01020   #define  simdia_vrecipf  __simdia_vrecipf
01021   #define simdia_vreciplf __simdia_vreciplf
01022 
01023   
01024   #define  simdia_vsqrtf  __simdia_vsqrtf
01025   #define simdia_vsqrtlf __simdia_vsqrtlf
01026 
01027   
01028   #define  simdia_vrsqrtf  __simdia_vrsqrtf
01029   #define simdia_vrsqrtlf __simdia_vrsqrtlf
01030 
01031   
01032   #define  simdia_vnoti  __simdia_vnoti
01033   #define  simdia_vnotf  __simdia_vnotf
01034   #define simdia_vnotlf __simdia_vnotlf
01035 
01036   
01037   #define  simdia_vori  __simdia_vori
01038   #define  simdia_vorf  __simdia_vorf
01039   #define simdia_vorlf __simdia_vorlf
01040 
01041   
01042   #define  simdia_vnori  __simdia_vnori
01043   #define  simdia_vnorf  __simdia_vnorf
01044   #define simdia_vnorlf __simdia_vnorlf
01045 
01046   
01047   #define  simdia_vandi  __simdia_vandi
01048   #define  simdia_vandf  __simdia_vandf
01049   #define simdia_vandlf __simdia_vandlf
01050 
01051   
01052   #define  simdia_vnandi  __simdia_vnandi
01053   #define  simdia_vnandf  __simdia_vnandf
01054   #define simdia_vnandlf __simdia_vnandlf
01055 
01056   
01057   #define  simdia_vxori  __simdia_vxori
01058   #define  simdia_vxorf  __simdia_vxorf
01059   #define simdia_vxorlf __simdia_vxorlf
01060 
01061   
01062   #define  simdia_vnxori  __simdia_vnxori
01063   #define  simdia_vnxorf  __simdia_vnxorf
01064   #define simdia_vnxorlf __simdia_vnxorlf
01065 
01066   
01067   #define  simdia_vcmpeqi  __simdia_vcmpeqi
01068   #define  simdia_vcmpeqf  __simdia_vcmpeqf
01069   #define simdia_vcmpeqlf __simdia_vcmpeqlf
01070 
01071   
01072   #define  simdia_vcmpgti  __simdia_vcmpgti
01073   #define  simdia_vcmpgtf  __simdia_vcmpgtf
01074   #define simdia_vcmpgtlf __simdia_vcmpgtlf
01075 
01076   
01077   #define  simdia_vcmpgei  __simdia_vcmpgei
01078   #define  simdia_vcmpgef  __simdia_vcmpgef
01079   #define simdia_vcmpgelf __simdia_vcmpgelf
01080 
01081   
01082   #define  simdia_vcmplti  __simdia_vcmplti
01083   #define  simdia_vcmpltf  __simdia_vcmpltf
01084   #define simdia_vcmpltlf __simdia_vcmpltlf
01085 
01086   
01087   #define  simdia_vcmplei  __simdia_vcmplei
01088   #define  simdia_vcmplef  __simdia_vcmplef
01089   #define simdia_vcmplelf __simdia_vcmplelf
01090 
01091 
01092 #endif
01093 
01094 
01095 
01096 
01097 
01098 
01099 
01100 
01101 
01102 
01103 
01104 
01105 
01106 
01107 
01108 #define  simdia_veci_numElems  (sizeof( simdia_veci)/sizeof(   int))
01109 #define  simdia_vecf_numElems  (sizeof( simdia_vecf)/sizeof( float))
01110 #define simdia_veclf_numElems  (sizeof(simdia_veclf)/sizeof(double))
01111 
01112 
01113 #define  simdia_vspreadi(a)  ( simdia_vseti(a))
01114 #define  simdia_vspreadf(a)  ( simdia_vsetf(a))
01115 #define simdia_vspreadlf(a)  (simdia_vsetlf(a))
01116 
01117 #define  simdia_visfinitef(a) (isfinite(simdia_vextractf((a),0)) && isfinite(simdia_vextractf((a),1)) && isfinite(simdia_vextractf((a),2)) && isfinite(simdia_vextractf((a),3)))
01118 #define simdia_visfinitelf(a) (isfinite(simdia_vextractlf((a),0)) && isfinite(simdia_vextractlf((a),1)))
01119 
01120 
01121 #define   simdia_vaddis(a, b)  ( simdia_vaddi((a),  simdia_vseti(b)))
01122 #define   simdia_vaddfs(a, b)  ( simdia_vaddf((a),  simdia_vsetf(b)))
01123 #define  simdia_vaddlfs(a, b)  (simdia_vaddlf((a), simdia_vsetlf(b)))
01124 
01125 
01126 #define   simdia_vsubis(a, b)  ( simdia_vsubi((a),  simdia_vseti(b)))
01127 #define   simdia_vsubfs(a, b)  ( simdia_vsubf((a),  simdia_vsetf(b)))
01128 #define  simdia_vsublfs(a, b)  (simdia_vsublf((a), simdia_vsetlf(b)))
01129 
01130 
01131 #define   simdia_vmulfs(a, b)  ( simdia_vmulf((a),  simdia_vsetf(b)))
01132 #define  simdia_vmullfs(a, b)  (simdia_vmullf((a), simdia_vsetlf(b)))
01133 
01134 
01135 #define  simdia_vdivfs(a, b)  ( simdia_vdivf((a),  simdia_vsetf(b)))
01136 #define simdia_vdivlfs(a, b)  (simdia_vdivlf((a), simdia_vsetlf(b)))
01137 
01138 
01139 #define  simdia_vmaddfs(a, b, c)  ( simdia_vmaddf((a), (b),  simdia_vsetf(c)))
01140 #define simdia_vmaddlfs(a, b, c)  (simdia_vmaddlf((a), (b), simdia_vsetlf(c)))
01141 
01142 
01143 #define  simdia_vmaddfss(a, b, c)  ( simdia_vmaddf((a),  simdia_vsetf(b),  simdia_vsetf(c)))
01144 #define simdia_vmaddlfss(a, b, c)  (simdia_vmaddlf((a), simdia_vsetlf(b), simdia_vsetlf(c)))
01145 
01146 #if defined(__VEC__)
01147   #ifdef vector
01148   #undef vector
01149   #endif
01150 #endif
01151 
01152 #endif //__SIMDIA_H__