00001 #ifndef __SIMDIA_H__
00002 #define __SIMDIA_H__
00003
00004
00005 #if defined(__SSE2__) && !defined(_CRAYC)
00006 #include "emmintrin.h"
00007 #endif
00008
00009 #if CMK_CELL_SPE != 0
00010 #include "spu_intrinsics.h"
00011 #else
00012 #include "math.h"
00013 #endif
00014
00015 #if defined(__VEC__)
00016 #include "altivec.h"
00017 #endif
00018
00019
00020
00021 #if !CMK_HAS_SQRTF
00022 #define sqrtf(a) ((float)(sqrt((double)(a))))
00023 #endif
00024
00025
00026
00027
00028 #define SIMDIA_FORCE_NO_SSE (0)
00029 #define SIMDIA_FORCE_NO_ALTIVEC (0)
00030 #define SIMDIA_FORCE_NO_SPE_SIMD (0)
00031
00032
00033
00034 #define SIMDIA_CONSTANT_PI (3.141592653589793)
00035 #define SIMDIA_CONSTANT_E (2.718281828459045)
00036 #define SIMDIA_CONSTANT_SQRT_2 (1.414213562373095)
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00058
00059
00060
00061
00062
00063
00064
00065 typedef struct __simdia_vec_i { int v0, v1, v2, v3; } __simdia_veci;
00066 typedef struct __simdia_vec_f { float v0, v1, v2, v3; } __simdia_vecf;
00067 typedef struct __simdia_vec_lf { double v0, v1; } __simdia_veclf;
00068
00069
00070
00071 inline __simdia_veci __simdia_vinserti( __simdia_veci v, const int s, const int i) { __simdia_veci r = v; int* rPtr = ( int*)(&r); rPtr[i] = s; return r; }
00072 inline __simdia_vecf __simdia_vinsertf( __simdia_vecf v, const float s, const int i) { __simdia_vecf r = v; float* rPtr = ( float*)(&r); rPtr[i] = s; return r; }
00073 inline __simdia_veclf __simdia_vinsertlf(__simdia_veclf v, const double s, const int i) { __simdia_veclf r = v; double* rPtr = (double*)(&r); rPtr[i] = s; return r; }
00074
00075
00076 inline int __simdia_vextracti( __simdia_veci v, const int i) { int* vPtr = ( int*)(&v); return vPtr[i]; }
00077 inline float __simdia_vextractf( __simdia_vecf v, const int i) { float* vPtr = ( float*)(&v); return vPtr[i]; }
00078 inline double __simdia_vextractlf(__simdia_veclf v, const int i) { double* vPtr = (double*)(&v); return vPtr[i]; }
00079
00080
00081 inline __simdia_veci __simdia_vseti(const int a) { __simdia_veci r; r.v0 = r.v1 = r.v2 = r.v3 = a; return r; }
00082 inline __simdia_vecf __simdia_vsetf(const float a) { __simdia_vecf r; r.v0 = r.v1 = r.v2 = r.v3 = a; return r; }
00083 inline __simdia_veclf __simdia_vsetlf(const double a) { __simdia_veclf r; r.v0 = r.v1 = a; return r; }
00084
00085
00086
00087
00088 const __simdia_veci __simdia_const_vzeroi = { 0 , 0 , 0 , 0 };
00089 const __simdia_vecf __simdia_const_vzerof = { 0.0f, 0.0f, 0.0f, 0.0f };
00090 const __simdia_veclf __simdia_const_vzerolf = { 0.0 , 0.0 };
00091
00092
00093 const __simdia_veci __simdia_const_vonei = { 1 , 1 , 1 , 1 };
00094 const __simdia_vecf __simdia_const_vonef = { 1.0f, 1.0f, 1.0f, 1.0f };
00095 const __simdia_veclf __simdia_const_vonelf = { 1.0 , 1.0 };
00096
00097
00098 const __simdia_veci __simdia_const_vtwoi = { 2 , 2 , 2 , 2 };
00099 const __simdia_vecf __simdia_const_vtwof = { 2.0f, 2.0f, 2.0f, 2.0f };
00100 const __simdia_veclf __simdia_const_vtwolf = { 2.0 , 2.0 };
00101
00102
00103 const __simdia_veci __simdia_const_vnegonei = { -1 , -1 , -1 , -1 };
00104 const __simdia_vecf __simdia_const_vnegonef = { -1.0f, -1.0f, -1.0f, -1.0f };
00105 const __simdia_veclf __simdia_const_vnegonelf = { -1.0 , -1.0 };
00106
00107
00108
00109
00110
00111
00112 inline __simdia_veci __simdia_vrothi(const __simdia_veci a, int s) { __simdia_veci b; int* a_ptr = ( int*)(&a); int* b_ptr = ( int*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0-s)&0x3]; b_ptr[1] = a_ptr[(1-s)&0x3]; b_ptr[2] = a_ptr[(2-s)&0x3]; b_ptr[3] = a_ptr[(3-s)&0x3]; return b; }
00113 inline __simdia_vecf __simdia_vrothf(const __simdia_vecf a, int s) { __simdia_vecf b; float* a_ptr = ( float*)(&a); float* b_ptr = ( float*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0-s)&0x3]; b_ptr[1] = a_ptr[(1-s)&0x3]; b_ptr[2] = a_ptr[(2-s)&0x3]; b_ptr[3] = a_ptr[(3-s)&0x3]; return b; }
00114 inline __simdia_veclf __simdia_vrothlf(const __simdia_veclf a, int s) { __simdia_veclf b; double* a_ptr = (double*)(&a); double* b_ptr = (double*)(&b); s &= 0x1; b_ptr[0] = a_ptr[(0-s)&0x1]; b_ptr[1] = a_ptr[(1-s)&0x1]; return b; }
00115 inline __simdia_veci __simdia_vrotli(const __simdia_veci a, int s) { __simdia_veci b; int* a_ptr = ( int*)(&a); int* b_ptr = ( int*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0+s)&0x3]; b_ptr[1] = a_ptr[(1+s)&0x3]; b_ptr[2] = a_ptr[(2+s)&0x3]; b_ptr[3] = a_ptr[(3+s)&0x3]; return b; }
00116 inline __simdia_vecf __simdia_vrotlf(const __simdia_vecf a, int s) { __simdia_vecf b; float* a_ptr = ( float*)(&a); float* b_ptr = ( float*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0+s)&0x3]; b_ptr[1] = a_ptr[(1+s)&0x3]; b_ptr[2] = a_ptr[(2+s)&0x3]; b_ptr[3] = a_ptr[(3+s)&0x3]; return b; }
00117 inline __simdia_veclf __simdia_vrotllf(const __simdia_veclf a, int s) { __simdia_veclf b; double* a_ptr = (double*)(&a); double* b_ptr = (double*)(&b); s &= 0x1; b_ptr[0] = a_ptr[(0+s)&0x1]; b_ptr[1] = a_ptr[(1+s)&0x1]; return b; }
00118
00119
00120 inline __simdia_veci __simdia_vaddi(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = a.v0 + b.v0; r.v1 = a.v1 + b.v1; r.v2 = a.v2 + b.v2; r.v3 = a.v3 + b.v3; return r; }
00121 inline __simdia_vecf __simdia_vaddf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; r.v0 = a.v0 + b.v0; r.v1 = a.v1 + b.v1; r.v2 = a.v2 + b.v2; r.v3 = a.v3 + b.v3; return r; }
00122 inline __simdia_veclf __simdia_vaddlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; r.v0 = a.v0 + b.v0; r.v1 = a.v1 + b.v1; return r; }
00123
00124
00125 inline __simdia_veci __simdia_vsubi(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = a.v0 - b.v0; r.v1 = a.v1 - b.v1; r.v2 = a.v2 - b.v2; r.v3 = a.v3 - b.v3; return r; }
00126 inline __simdia_vecf __simdia_vsubf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; r.v0 = a.v0 - b.v0; r.v1 = a.v1 - b.v1; r.v2 = a.v2 - b.v2; r.v3 = a.v3 - b.v3; return r; }
00127 inline __simdia_veclf __simdia_vsublf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; r.v0 = a.v0 - b.v0; r.v1 = a.v1 - b.v1; return r; }
00128
00129
00130 inline __simdia_veci __simdia_vmuli(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = a.v0 * b.v0; r.v1 = a.v1 * b.v1; r.v2 = a.v2 * b.v2; r.v3 = a.v3 * b.v3; return r; }
00131 inline __simdia_vecf __simdia_vmulf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; r.v0 = a.v0 * b.v0; r.v1 = a.v1 * b.v1; r.v2 = a.v2 * b.v2; r.v3 = a.v3 * b.v3; return r; }
00132 inline __simdia_veclf __simdia_vmullf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; r.v0 = a.v0 * b.v0; r.v1 = a.v1 * b.v1; return r; }
00133
00134
00135 inline __simdia_veci __simdia_vdivi(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = a.v0 / b.v0; r.v1 = a.v1 / b.v1; r.v2 = a.v2 / b.v2; r.v3 = a.v3 / b.v3; return r; }
00136 inline __simdia_vecf __simdia_vdivf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; r.v0 = a.v0 / b.v0; r.v1 = a.v1 / b.v1; r.v2 = a.v2 / b.v2; r.v3 = a.v3 / b.v3; return r; }
00137 inline __simdia_veclf __simdia_vdivlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; r.v0 = a.v0 / b.v0; r.v1 = a.v1 / b.v1; return r; }
00138
00139
00140 inline __simdia_veci __simdia_vmaddi(const __simdia_veci a, const __simdia_veci b, const __simdia_veci c) { __simdia_veci r; r.v0 = a.v0 * b.v0 + c.v0; r.v1 = a.v1 * b.v1 + c.v1; r.v2 = a.v2 * b.v2 + c.v2; r.v3 = a.v3 * b.v3 + c.v3; return r; }
00141 inline __simdia_vecf __simdia_vmaddf(const __simdia_vecf a, const __simdia_vecf b, const __simdia_vecf c) { __simdia_vecf r; r.v0 = a.v0 * b.v0 + c.v0; r.v1 = a.v1 * b.v1 + c.v1; r.v2 = a.v2 * b.v2 + c.v2; r.v3 = a.v3 * b.v3 + c.v3; return r; }
00142 inline __simdia_veclf __simdia_vmaddlf(const __simdia_veclf a, const __simdia_veclf b, const __simdia_veclf c) { __simdia_veclf r; r.v0 = a.v0 * b.v0 + c.v0; r.v1 = a.v1 * b.v1 + c.v1; return r; }
00143
00144
00145
00146 inline __simdia_vecf __simdia_vrecipf(const __simdia_vecf a) { __simdia_vecf r; r.v0 = 1.0f / a.v0; r.v1 = 1.0f / a.v1; r.v2 = 1.0f / a.v2; r.v3 = 1.0f / a.v3; return r; }
00147 inline __simdia_veclf __simdia_vreciplf(const __simdia_veclf a) { __simdia_veclf r; r.v0 = 1.0f / a.v0; r.v1 = 1.0f / a.v1; return r; }
00148
00149
00150 inline __simdia_vecf __simdia_vsqrtf(const __simdia_vecf a) { __simdia_vecf r; r.v0 = sqrtf(a.v0); r.v1 = sqrtf(a.v1); r.v2 = sqrtf(a.v2); r.v3 = sqrtf(a.v3); return r; }
00151 inline __simdia_veclf __simdia_vsqrtlf(const __simdia_veclf a) { __simdia_veclf r; r.v0 = sqrt(a.v0); r.v1 = sqrt(a.v1); return r; }
00152
00153
00154 inline __simdia_vecf __simdia_vrsqrtf(const __simdia_vecf a) { __simdia_vecf r; r.v0 = 1.0f / sqrtf(a.v0); r.v1 = 1.0f / sqrtf(a.v1); r.v2 = 1.0f / sqrtf(a.v2); r.v3 = 1.0f / sqrtf(a.v3); return r; }
00155 inline __simdia_veclf __simdia_vrsqrtlf(const __simdia_veclf a) { __simdia_veclf r; r.v0 = 1.0 / sqrt(a.v0); r.v1 = 1.0 / sqrt(a.v1); return r; }
00156
00157
00158 inline __simdia_veci __simdia_vnoti(const __simdia_veci a) { __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); rPtr[0] = aPtr[0] ^ -1; rPtr[1] = aPtr[1] ^ -1; rPtr[2] = aPtr[2] ^ -1; rPtr[3] = aPtr[3] ^ -1; return r; }
00159 inline __simdia_vecf __simdia_vnotf(const __simdia_vecf a) { __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); rPtr[0] = aPtr[0] ^ -1; rPtr[1] = aPtr[1] ^ -1; rPtr[2] = aPtr[2] ^ -1; rPtr[3] = aPtr[3] ^ -1; return r; }
00160 inline __simdia_veclf __simdia_vnotlf(const __simdia_veclf a) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); rPtr[0] = aPtr[0] ^ -1; rPtr[1] = aPtr[1] ^ -1; rPtr[2] = aPtr[2] ^ -1; rPtr[3] = aPtr[3] ^ -1; return r; }
00161
00162
00163 inline __simdia_veci __simdia_vori(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] | bPtr[0]; rPtr[1] = aPtr[1] | bPtr[1]; rPtr[2] = aPtr[2] | bPtr[2]; rPtr[3] = aPtr[3] | bPtr[3]; return r; }
00164 inline __simdia_vecf __simdia_vorf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] | bPtr[0]; rPtr[1] = aPtr[1] | bPtr[1]; rPtr[2] = aPtr[2] | bPtr[2]; rPtr[3] = aPtr[3] | bPtr[3]; return r; }
00165 inline __simdia_veclf __simdia_vorlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] | bPtr[0]; rPtr[1] = aPtr[1] | bPtr[1]; rPtr[2] = aPtr[2] | bPtr[2]; rPtr[3] = aPtr[3] | bPtr[3]; return r; }
00166
00167
00168 inline __simdia_veci __simdia_vnori(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] | bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] | bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] | bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] | bPtr[3]) ^ -1; return r; }
00169 inline __simdia_vecf __simdia_vnorf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] | bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] | bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] | bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] | bPtr[3]) ^ -1; return r; }
00170 inline __simdia_veclf __simdia_vnorlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] | bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] | bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] | bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] | bPtr[3]) ^ -1; return r; }
00171
00172
00173 inline __simdia_veci __simdia_vandi(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] & bPtr[0]; rPtr[1] = aPtr[1] & bPtr[1]; rPtr[2] = aPtr[2] & bPtr[2]; rPtr[3] = aPtr[3] & bPtr[3]; return r; }
00174 inline __simdia_vecf __simdia_vandf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] & bPtr[0]; rPtr[1] = aPtr[1] & bPtr[1]; rPtr[2] = aPtr[2] & bPtr[2]; rPtr[3] = aPtr[3] & bPtr[3]; return r; }
00175 inline __simdia_veclf __simdia_vandlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] & bPtr[0]; rPtr[1] = aPtr[1] & bPtr[1]; rPtr[2] = aPtr[2] & bPtr[2]; rPtr[3] = aPtr[3] & bPtr[3]; return r; }
00176
00177
00178 inline __simdia_veci __simdia_vnandi(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] & bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] & bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] & bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] & bPtr[3]) ^ -1; return r; }
00179 inline __simdia_vecf __simdia_vnandf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] & bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] & bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] & bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] & bPtr[3]) ^ -1; return r; }
00180 inline __simdia_veclf __simdia_vnandlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] & bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] & bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] & bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] & bPtr[3]) ^ -1; return r; }
00181
00182
00183 inline __simdia_veci __simdia_vxori(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] ^ bPtr[0]; rPtr[1] = aPtr[1] ^ bPtr[1]; rPtr[2] = aPtr[2] ^ bPtr[2]; rPtr[3] = aPtr[3] ^ bPtr[3]; return r; }
00184 inline __simdia_vecf __simdia_vxorf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] ^ bPtr[0]; rPtr[1] = aPtr[1] ^ bPtr[1]; rPtr[2] = aPtr[2] ^ bPtr[2]; rPtr[3] = aPtr[3] ^ bPtr[3]; return r; }
00185 inline __simdia_veclf __simdia_vxorlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] ^ bPtr[0]; rPtr[1] = aPtr[1] ^ bPtr[1]; rPtr[2] = aPtr[2] ^ bPtr[2]; rPtr[3] = aPtr[3] ^ bPtr[3]; return r; }
00186
00187
00188 inline __simdia_veci __simdia_vnxori(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] ^ bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] ^ bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] ^ bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] ^ bPtr[3]) ^ -1; return r; }
00189 inline __simdia_vecf __simdia_vnxorf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] ^ bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] ^ bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] ^ bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] ^ bPtr[3]) ^ -1; return r; }
00190 inline __simdia_veclf __simdia_vnxorlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] ^ bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] ^ bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] ^ bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] ^ bPtr[3]) ^ -1; return r; }
00191
00192
00193
00194
00195 inline __simdia_veci __simdia_vcmpeqi(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = ((a.v0 == b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 == b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 == b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 == b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00196 inline __simdia_veci __simdia_vcmpeqf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = ((a.v0 == b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 == b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 == b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 == b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00197 inline __simdia_veci __simdia_vcmpeqlf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = r.v1 = ((a.v0 == b.v0) ? (0xFFFFFFFF) : (0x0)); r.v2 = r.v3 = ((a.v1 == b.v1) ? (0xFFFFFFFF) : (0x0)); return r; }
00198
00199
00200 inline __simdia_veci __simdia_vcmpgti(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = ((a.v0 > b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 > b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 > b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 > b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00201 inline __simdia_veci __simdia_vcmpgtf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = ((a.v0 > b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 > b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 > b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 > b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00202 inline __simdia_veci __simdia_vcmpgtlf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = r.v1 = ((a.v0 > b.v0) ? (0xFFFFFFFF) : (0x0)); r.v2 = r.v3 = ((a.v1 > b.v1) ? (0xFFFFFFFF) : (0x0)); return r; }
00203
00204
00205 inline __simdia_veci __simdia_vcmpgei(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = ((a.v0 >= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 >= b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 >= b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 >= b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00206 inline __simdia_veci __simdia_vcmpgef(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = ((a.v0 >= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 >= b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 >= b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 >= b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00207 inline __simdia_veci __simdia_vcmpgelf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = r.v1 = ((a.v0 >= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v2 = r.v3 = ((a.v1 >= b.v1) ? (0xFFFFFFFF) : (0x0)); return r; }
00208
00209
00210 inline __simdia_veci __simdia_vcmplti(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = ((a.v0 < b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 < b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 < b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 < b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00211 inline __simdia_veci __simdia_vcmpltf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = ((a.v0 < b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 < b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 < b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 < b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00212 inline __simdia_veci __simdia_vcmpltlf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = r.v1 = ((a.v0 < b.v0) ? (0xFFFFFFFF) : (0x0)); r.v2 = r.v3 = ((a.v1 < b.v1) ? (0xFFFFFFFF) : (0x0)); return r; }
00213
00214
00215 inline __simdia_veci __simdia_vcmplei(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = ((a.v0 <= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 <= b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 <= b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 <= b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00216 inline __simdia_veci __simdia_vcmplef(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = ((a.v0 <= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 <= b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 <= b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 <= b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00217 inline __simdia_veci __simdia_vcmplelf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = r.v1 = ((a.v0 <= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v2 = r.v3 = ((a.v1 <= b.v1) ? (0xFFFFFFFF) : (0x0)); return r; }
00218
00219
00220
00221
00222
00223 #if defined(__cplusplus)
00224
00225
00226 inline __simdia_veci operator+(const __simdia_veci &a, const __simdia_veci &b) { return __simdia_vaddi(a, b); }
00227 inline __simdia_vecf operator+(const __simdia_vecf &a, const __simdia_vecf &b) { return __simdia_vaddf(a, b); }
00228 inline __simdia_veclf operator+(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vaddlf(a, b); }
00229 inline __simdia_veci operator+=( __simdia_veci &a, const __simdia_veci &b) { a = __simdia_vaddi(a, b); return a; }
00230 inline __simdia_vecf operator+=( __simdia_vecf &a, const __simdia_vecf &b) { a = __simdia_vaddf(a, b); return a; }
00231 inline __simdia_veclf operator+=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vaddlf(a, b); return a; }
00232
00233 inline __simdia_veci operator+(const __simdia_veci &a, const int &b) { return __simdia_vaddi(a, __simdia_vseti(b)); }
00234 inline __simdia_vecf operator+(const __simdia_vecf &a, const float &b) { return __simdia_vaddf(a, __simdia_vsetf(b)); }
00235 inline __simdia_veclf operator+(const __simdia_veclf &a, const double &b) { return __simdia_vaddlf(a, __simdia_vsetlf(b)); }
00236 inline __simdia_veci operator+=( __simdia_veci &a, const int &b) { a = __simdia_vaddi(a, __simdia_vseti(b)); return a; }
00237 inline __simdia_vecf operator+=( __simdia_vecf &a, const float &b) { a = __simdia_vaddf(a, __simdia_vsetf(b)); return a; }
00238 inline __simdia_veclf operator+=(__simdia_veclf &a, const double &b) { a = __simdia_vaddlf(a, __simdia_vsetlf(b)); return a; }
00239
00240
00241 inline __simdia_veci operator-(const __simdia_veci &a, const __simdia_veci &b) { return __simdia_vsubi(a, b); }
00242 inline __simdia_vecf operator-(const __simdia_vecf &a, const __simdia_vecf &b) { return __simdia_vsubf(a, b); }
00243 inline __simdia_veclf operator-(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vsublf(a, b); }
00244 inline __simdia_veci operator-=( __simdia_veci &a, const __simdia_veci &b) { a = __simdia_vsubi(a, b); return a; }
00245 inline __simdia_vecf operator-=( __simdia_vecf &a, const __simdia_vecf &b) { a = __simdia_vsubf(a, b); return a; }
00246 inline __simdia_veclf operator-=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vsublf(a, b); return a; }
00247
00248 inline __simdia_veci operator-(const __simdia_veci &a, const int &b) { return __simdia_vsubi(a, __simdia_vseti(b)); }
00249 inline __simdia_vecf operator-(const __simdia_vecf &a, const float &b) { return __simdia_vsubf(a, __simdia_vsetf(b)); }
00250 inline __simdia_veclf operator-(const __simdia_veclf &a, const double &b) { return __simdia_vsublf(a, __simdia_vsetlf(b)); }
00251 inline __simdia_veci operator-=( __simdia_veci &a, const int &b) { a = __simdia_vsubi(a, __simdia_vseti(b)); return a; }
00252 inline __simdia_vecf operator-=( __simdia_vecf &a, const float &b) { a = __simdia_vsubf(a, __simdia_vsetf(b)); return a; }
00253 inline __simdia_veclf operator-=(__simdia_veclf &a, const double &b) { a = __simdia_vsublf(a, __simdia_vsetlf(b)); return a; }
00254
00255
00256 inline __simdia_vecf operator*(const __simdia_vecf &a, const __simdia_vecf &b) { return __simdia_vmulf(a, b); }
00257 inline __simdia_veclf operator*(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vmullf(a, b); }
00258 inline __simdia_vecf operator*=( __simdia_vecf &a, const __simdia_vecf &b) { a = __simdia_vmulf(a, b); return a; }
00259 inline __simdia_veclf operator*=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vmullf(a, b); return a; }
00260
00261 inline __simdia_vecf operator*(const __simdia_vecf &a, const float &b) { return __simdia_vmulf(a, __simdia_vsetf(b)); }
00262 inline __simdia_veclf operator*(const __simdia_veclf &a, const double &b) { return __simdia_vmullf(a, __simdia_vsetlf(b)); }
00263 inline __simdia_vecf operator*=( __simdia_vecf &a, const float &b) { a = __simdia_vmulf(a, __simdia_vsetf(b)); return a; }
00264 inline __simdia_veclf operator*=(__simdia_veclf &a, const double &b) { a = __simdia_vmullf(a, __simdia_vsetlf(b)); return a; }
00265
00266
00267 inline __simdia_vecf operator/(const __simdia_vecf &a, const __simdia_vecf &b) { return __simdia_vdivf(a, b); }
00268 inline __simdia_veclf operator/(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vdivlf(a, b); }
00269 inline __simdia_vecf operator/=( __simdia_vecf &a, const __simdia_vecf &b) { a = __simdia_vdivf(a, b); return a; }
00270 inline __simdia_veclf operator/=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vdivlf(a, b); return a; }
00271
00272 inline __simdia_vecf operator/(const __simdia_vecf &a, const float &b) { return __simdia_vdivf(a, __simdia_vsetf(b)); }
00273 inline __simdia_veclf operator/(const __simdia_veclf &a, const double &b) { return __simdia_vdivlf(a, __simdia_vsetlf(b)); }
00274 inline __simdia_vecf operator/=( __simdia_vecf &a, const float &b) { a = __simdia_vdivf(a, __simdia_vsetf(b)); return a; }
00275 inline __simdia_veclf operator/=(__simdia_veclf &a, const double &b) { a = __simdia_vdivlf(a, __simdia_vsetlf(b)); return a; }
00276
00277
00278 inline __simdia_veci operator|(const __simdia_veci &a, const __simdia_veci &b) { return __simdia_vori(a, b); }
00279 inline __simdia_vecf operator|(const __simdia_vecf &a, const __simdia_vecf &b) { return __simdia_vorf(a, b); }
00280 inline __simdia_veclf operator|(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vorlf(a, b); }
00281 inline __simdia_veci operator|=( __simdia_veci &a, const __simdia_veci &b) { a = __simdia_vori(a, b); return a; }
00282 inline __simdia_vecf operator|=( __simdia_vecf &a, const __simdia_vecf &b) { a = __simdia_vorf(a, b); return a; }
00283 inline __simdia_veclf operator|=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vorlf(a, b); return a; }
00284
00285 inline __simdia_veci operator|(const __simdia_veci &a, const int &b) { return __simdia_vori(a, __simdia_vseti(b)); }
00286 inline __simdia_vecf operator|(const __simdia_vecf &a, const float &b) { return __simdia_vorf(a, __simdia_vsetf(b)); }
00287 inline __simdia_veclf operator|(const __simdia_veclf &a, const double &b) { return __simdia_vorlf(a, __simdia_vsetlf(b)); }
00288 inline __simdia_veci operator|=( __simdia_veci &a, const int &b) { a = __simdia_vori(a, __simdia_vseti(b)); return a; }
00289 inline __simdia_vecf operator|=( __simdia_vecf &a, const float &b) { a = __simdia_vorf(a, __simdia_vsetf(b)); return a; }
00290 inline __simdia_veclf operator|=(__simdia_veclf &a, const double &b) { a = __simdia_vorlf(a, __simdia_vsetlf(b)); return a; }
00291
00292
00293 inline __simdia_veci operator&(const __simdia_veci &a, const __simdia_veci &b) { return __simdia_vandi(a, b); }
00294 inline __simdia_vecf operator&(const __simdia_vecf &a, const __simdia_vecf &b) { return __simdia_vandf(a, b); }
00295 inline __simdia_veclf operator&(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vandlf(a, b); }
00296 inline __simdia_veci operator&=( __simdia_veci &a, const __simdia_veci &b) { a = __simdia_vandi(a, b); return a; }
00297 inline __simdia_vecf operator&=( __simdia_vecf &a, const __simdia_vecf &b) { a = __simdia_vandf(a, b); return a; }
00298 inline __simdia_veclf operator&=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vandlf(a, b); return a; }
00299
00300 inline __simdia_veci operator&(const __simdia_veci &a, const int &b) { return __simdia_vandi(a, __simdia_vseti(b)); }
00301 inline __simdia_vecf operator&(const __simdia_vecf &a, const float &b) { return __simdia_vandf(a, __simdia_vsetf(b)); }
00302 inline __simdia_veclf operator&(const __simdia_veclf &a, const double &b) { return __simdia_vandlf(a, __simdia_vsetlf(b)); }
00303 inline __simdia_veci operator&=( __simdia_veci &a, const int &b) { a = __simdia_vandi(a, __simdia_vseti(b)); return a; }
00304 inline __simdia_vecf operator&=( __simdia_vecf &a, const float &b) { a = __simdia_vandf(a, __simdia_vsetf(b)); return a; }
00305 inline __simdia_veclf operator&=(__simdia_veclf &a, const double &b) { a = __simdia_vandlf(a, __simdia_vsetlf(b)); return a; }
00306
00307
00308 inline __simdia_veci operator^(const __simdia_veci &a, const __simdia_veci &b) { return __simdia_vxori(a, b); }
00309 inline __simdia_vecf operator^(const __simdia_vecf &a, const __simdia_vecf &b) { return __simdia_vxorf(a, b); }
00310 inline __simdia_veclf operator^(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vxorlf(a, b); }
00311 inline __simdia_veci operator^=( __simdia_veci &a, const __simdia_veci &b) { a = __simdia_vxori(a, b); return a; }
00312 inline __simdia_vecf operator^=( __simdia_vecf &a, const __simdia_vecf &b) { a = __simdia_vxorf(a, b); return a; }
00313 inline __simdia_veclf operator^=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vxorlf(a, b); return a; }
00314
00315 inline __simdia_veci operator^(const __simdia_veci &a, const int &b) { return __simdia_vxori(a, __simdia_vseti(b)); }
00316 inline __simdia_vecf operator^(const __simdia_vecf &a, const float &b) { return __simdia_vxorf(a, __simdia_vsetf(b)); }
00317 inline __simdia_veclf operator^(const __simdia_veclf &a, const double &b) { return __simdia_vxorlf(a, __simdia_vsetlf(b)); }
00318 inline __simdia_veci operator^=( __simdia_veci &a, const int &b) { a = __simdia_vxori(a, __simdia_vseti(b)); return a; }
00319 inline __simdia_vecf operator^=( __simdia_vecf &a, const float &b) { a = __simdia_vxorf(a, __simdia_vsetf(b)); return a; }
00320 inline __simdia_veclf operator^=(__simdia_veclf &a, const double &b) { a = __simdia_vxorlf(a, __simdia_vsetlf(b)); return a; }
00321
00322 #endif
00323
00327
00328
00329
00330
00331
00332 #if defined(__SSE2__) && (!(SIMDIA_FORCE_NO_SSE)) && !defined(_CRAYC)
00333
00334
00335
00336
00337
00338
00339 typedef __m128i simdia_veci;
00340 typedef __m128 simdia_vecf;
00341 typedef __m128d simdia_veclf;
00342
00343
00344
00345 inline simdia_veci simdia_vinserti( simdia_veci v, const int s, const int i) { simdia_veci r = v; int* rPtr = ( int*)(&r); rPtr[i] = s; return r; }
00346 inline simdia_vecf simdia_vinsertf( simdia_vecf v, const float s, const int i) { simdia_vecf r = v; float* rPtr = ( float*)(&r); rPtr[i] = s; return r; }
00347 inline simdia_veclf simdia_vinsertlf(simdia_veclf v, const double s, const int i) { simdia_veclf r = v; double* rPtr = (double*)(&r); rPtr[i] = s; return r; }
00348
00349
00350
00351 inline int vextracti( simdia_veci v, const int i) { return (( int*)(&v))[i]; }
00352 inline float vextractf( simdia_vecf v, const int i) { return (( float*)(&v))[i]; }
00353 inline double vextractlf(simdia_veclf v, const int i) { return ((double*)(&v))[i]; }
00354
00355
00356 #define simdia_vseti(a) (_mm_set1_epi32((int)(a)))
00357 #define simdia_vsetf(a) (_mm_set1_ps((float)(a)))
00358 #define simdia_vsetlf(a) (_mm_set1_pd((double)(a)))
00359
00360
00361 #define simdia_const_vzeroi (_mm_setzero_si128())
00362 #define simdia_const_vzerof (_mm_setzero_ps())
00363 #define simdia_const_vzerolf (_mm_setzero_pd())
00364
00365
00366 #define simdia_const_vonei (simdia_vseti(1))
00367 #define simdia_const_vonef (simdia_vsetf(1.0f))
00368 #define simdia_const_vonelf (simdia_vsetlf(1.0))
00369
00370
00371 #define simdia_const_vtwoi (simdia_vseti(2))
00372 #define simdia_const_vtwof (simdia_vsetf(2.0f))
00373 #define simdia_const_vtwolf (simdia_vsetlf(2.0))
00374
00375
00376 #define simdia_const_vnegonei (simdia_vseti(-1))
00377 #define simdia_const_vnegonef (simdia_vsetf(-1.0f))
00378 #define simdia_const_vnegonelf (simdia_vsetlf(-1.0))
00379
00380
00381
00382 inline simdia_veci simdia_vrothi(const simdia_veci &a, int s) { simdia_veci b; int* a_ptr = ( int*)(&a); int* b_ptr = ( int*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0-s)&0x3]; b_ptr[1] = a_ptr[(1-s)&0x3]; b_ptr[2] = a_ptr[(2-s)&0x3]; b_ptr[3] = a_ptr[(3-s)&0x3]; return b; }
00383 inline simdia_vecf simdia_vrothf(const simdia_vecf &a, int s) { simdia_vecf b; float* a_ptr = ( float*)(&a); float* b_ptr = ( float*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0-s)&0x3]; b_ptr[1] = a_ptr[(1-s)&0x3]; b_ptr[2] = a_ptr[(2-s)&0x3]; b_ptr[3] = a_ptr[(3-s)&0x3]; return b; }
00384 inline simdia_veclf simdia_vrothlf(const simdia_veclf &a, int s) { simdia_veclf b; double* a_ptr = (double*)(&a); double* b_ptr = (double*)(&b); s &= 0x1; b_ptr[0] = a_ptr[(0-s)&0x1]; b_ptr[1] = a_ptr[(1-s)&0x1]; return b; }
00385 inline simdia_veci simdia_vrotli(const simdia_veci &a, int s) { simdia_veci b; int* a_ptr = ( int*)(&a); int* b_ptr = ( int*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0+s)&0x3]; b_ptr[1] = a_ptr[(1+s)&0x3]; b_ptr[2] = a_ptr[(2+s)&0x3]; b_ptr[3] = a_ptr[(3+s)&0x3]; return b; }
00386 inline simdia_vecf simdia_vrotlf(const simdia_vecf &a, int s) { simdia_vecf b; float* a_ptr = ( float*)(&a); float* b_ptr = ( float*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0+s)&0x3]; b_ptr[1] = a_ptr[(1+s)&0x3]; b_ptr[2] = a_ptr[(2+s)&0x3]; b_ptr[3] = a_ptr[(3+s)&0x3]; return b; }
00387 inline simdia_veclf simdia_vrotllf(const simdia_veclf &a, int s) { simdia_veclf b; double* a_ptr = (double*)(&a); double* b_ptr = (double*)(&b); s &= 0x1; b_ptr[0] = a_ptr[(0+s)&0x1]; b_ptr[1] = a_ptr[(1+s)&0x1]; return b; }
00388
00389
00390 #define simdia_vaddi(a, b) (_mm_add_epi32((a), (b)))
00391 #define simdia_vaddf(a, b) (_mm_add_ps((a), (b)))
00392 #define simdia_vaddlf(a, b) (_mm_add_pd((a), (b)))
00393
00394
00395 #define simdia_vsubi(a, b) (_mm_sub_epi32((a), (b)))
00396 #define simdia_vsubf(a, b) (_mm_sub_ps((a), (b)))
00397 #define simdia_vsublf(a, b) (_mm_sub_pd((a), (b)))
00398
00399
00400 #define simdia_vmulf(a, b) (_mm_mul_ps((a), (b)))
00401 #define simdia_vmullf(a, b) (_mm_mul_pd((a), (b)))
00402
00403
00404 #define simdia_vdivf(a, b) (_mm_div_ps((a), (b)))
00405 #define simdia_vdivlf(a, b) (_mm_div_pd((a), (b)))
00406
00407
00408 #define simdia_vmaddf(a, b, c) ( vaddf( vmulf((a), (b)), (c)))
00409 #define simdia_vmaddlf(a, b, c) (vaddlf(vmullf((a), (b)), (c)))
00410
00411
00412 #define simdia_vrecipf(a) (_mm_rcp_ps(a))
00413 inline simdia_veclf simdia_vreciplf(const simdia_veclf a) { simdia_veclf r; double* a_ptr = (double*)(&a); double* r_ptr = (double*)(&r); r_ptr[0] = 1.0f / a_ptr[0]; r_ptr[1] = 1.0f / a_ptr[1]; return r; }
00414
00415
00416 #define simdia_vsqrtf(a) (_mm_sqrt_ps(a))
00417 #define simdia_vsqrtlf(a) (_mm_sqrt_pd(a))
00418
00419
00420 #define simdia_vrsqrtf(a) (_mm_rsqrt_ps(a))
00421 #define simdia_vrsqrtlf(a) (vreciplf(vsqrtlf(a)))
00422
00423
00424 #define simdia_vnoti(a) (_mm_xor_si128((a), simdia_const_vnegonei))
00425 #define simdia_vnotf(a) (_mm_xor_ps((a), simdia_const_vnegonei))
00426 #define simdia_vnotlf(a) (_mm_xor_pd((a), simdia_const_vnegonei))
00427
00428
00429 #define simdia_vori(a, b) (_mm_or_si128((a), (b)))
00430 #define simdia_vorf(a, b) (_mm_or_ps((a), (b)))
00431 #define simdia_vorlf(a, b) (_mm_or_pd((a), (b)))
00432
00433
00434 #define simdia_vnori(a, b) ( simdia_vnoti( simdia_vori((a), (b))))
00435 #define simdia_vnorf(a, b) ( simdia_vnotf( simdia_vorf((a), (b))))
00436 #define simdia_vnorlf(a, b) (simdia_vnotlf(simdia_vorlf((a), (b))))
00437
00438
00439 #define simdia_vandi(a, b) (_mm_and_si128((a), (b)))
00440 #define simdia_vandf(a, b) (_mm_and_ps((a), (b)))
00441 #define simdia_vandlf(a, b) (_mm_and_pd((a), (b)))
00442
00443
00444 #define simdia_vnandi(a, b) ( simdia_vnoti( simdia_vandi((a), (b))))
00445 #define simdia_vnandf(a, b) ( simdia_vnotf( simdia_vandf((a), (b))))
00446 #define simdia_vnandlf(a, b) (simdia_vnotlf(simdia_vandlf((a), (b))))
00447
00448
00449 #define simdia_vxori(a, b) (_mm_xor_si128((a), (b)))
00450 #define simdia_vxorf(a, b) (_mm_xor_ps((a), (b)))
00451 #define simdia_vxorlf(a, b) (_mm_xor_pd((a), (b)))
00452
00453
00454 #define simdia_vnxori(a, b) ( simdia_vnoti( simdia_vxori((a), (b))))
00455 #define simdia_vnxorf(a, b) ( simdia_vnotf( simdia_vxorf((a), (b))))
00456 #define simdia_vnxorlf(a, b) (simdia_vnotlf(simdia_vxorlf((a), (b))))
00457
00458
00459 #define simdia_vcmpeqi(a, b) ((simdia_veci)(_mm_cmpeq_epi32((a), (b))))
00460 #define simdia_vcmpeqf(a, b) ((simdia_veci)(_mm_cmpeq_ps((a), (b))))
00461 #define simdia_vcmpeqlf(a, b) ((simdia_veci)(_mm_cmpeq_pd((a), (b))))
00462
00463
00464 #define simdia_vcmpgti(a, b) ((simdia_veci)(_mm_cmpgt_epi32((a), (b))))
00465 #define simdia_vcmpgtf(a, b) ((simdia_veci)(_mm_cmpgt_ps((a), (b))))
00466 #define simdia_vcmpgtlf(a, b) ((simdia_veci)(_mm_cmpgt_pd((a), (b))))
00467
00468
00469 #define simdia_vcmpgei(a, b) ((simdia_veci)(_mm_cmpge_epi32((a), (b))))
00470 #define simdia_vcmpgef(a, b) ((simdia_veci)(_mm_cmpge_ps((a), (b))))
00471 #define simdia_vcmpgelf(a, b) ((simdia_veci)(_mm_cmpge_pd((a), (b))))
00472
00473
00474 #define simdia_vcmplti(a, b) ((simdia_veci)(_mm_cmplt_epi32((a), (b))))
00475 #define simdia_vcmpltf(a, b) ((simdia_veci)(_mm_cmplt_ps((a), (b))))
00476 #define simdia_vcmpltlf(a, b) ((simdia_veci)(_mm_cmplt_pd((a), (b))))
00477
00478
00479 #define simdia_vcmplei(a, b) ((simdia_veci)(_mm_cmple_epi32((a), (b))))
00480 #define simdia_vcmplef(a, b) ((simdia_veci)(_mm_cmple_ps((a), (b))))
00481 #define simdia_vcmplelf(a, b) ((simdia_veci)(_mm_cmple_pd((a), (b))))
00482
00483
00484
00485
00486
00487
00488
00489
00490 #elif (CMK_CELL_SPE != 0) && (!(SIMDIA_FORCE_NO_SPE_SIMD))
00491
00492
00493 typedef vector signed int simdia_veci;
00494 typedef vector float simdia_vecf;
00495 typedef vector double simdia_veclf;
00496
00497
00498 #define simdia_vinserti(v, s, i) (spu_insert((s), (v), (i)))
00499 #define simdia_vinsertf(v, s, i) (spu_insert((s), (v), (i)))
00500 #define simdia_vinsertlf(v, s, i) (spu_insert((s), (v), (i)))
00501
00502
00503 #define simdia_vextracti(v, i) (spu_extract((v), (i)))
00504 #define simdia_vextractf(v, i) (spu_extract((v), (i)))
00505 #define simdia_vextractlf(v, i) (spu_extract((v), (i)))
00506
00507
00508 #define simdia_vseti(a) (spu_splats((int)(a)))
00509 #define simdia_vsetf(a) (spu_splats((float)(a)))
00510 #define simdia_vsetlf(a) (spu_splats((double)(a)))
00511
00512
00513 #define simdia_const_vzeroi (vseti(0))
00514 #define simdia_const_vzerof (vsetf(0.0f))
00515 #define simdia_const_vzerolf (vsetlf(0.0))
00516
00517
00518 #define simdia_const_vonei (vseti(1))
00519 #define simdia_const_vonef (vsetf(1.0f))
00520 #define simdia_const_vonelf (vsetlf(1.0))
00521
00522
00523 #define simdia_const_vtwoi (vseti(2))
00524 #define simdia_const_vtwof (vsetf(2.0f))
00525 #define simdia_const_vtwolf (vsetlf(2.0))
00526
00527
00528 #define simdia_const_vnegonei (vseti(-1))
00529 #define simdia_const_vnegonef (vsetf(-1.0f))
00530 #define simdia_const_vnegonelf (vsetlf(-1.0))
00531
00532
00533 #define simdia_vrothi(a, s) (spu_rlqwbyte((a), (0x10-(((s)&0x3)<<2)) ))
00534 #define simdia_vrothf(a, s) (spu_rlqwbyte((a), (0x10-(((s)&0x3)<<2)) ))
00535 #define simdia_vrothlf(a, s) (spu_rlqwbyte((a), (((s)&0x1)<<3) ))
00536 #define simdia_vrotli(a, s) (spu_rlqwbyte((a), ((s)&0x3)<<2))
00537 #define simdia_vrotlf(a, s) (spu_rlqwbyte((a), ((s)&0x3)<<2))
00538 #define simdia_vrotllf(a, s) (spu_rlqwbyte((a), ((s)&0x1)<<3))
00539
00540
00541 #define simdia_vaddi(a, b) (spu_add((a), (b)))
00542 #define simdia_vaddf(a, b) (spu_add((a), (b)))
00543 #define simdia_vaddlf(a, b) (spu_add((a), (b)))
00544
00545
00546 #define simdia_vsubi(a, b) (spu_sub((a), (b)))
00547 #define simdia_vsubf(a, b) (spu_sub((a), (b)))
00548 #define simdia_vsublf(a, b) (spu_sub((a), (b)))
00549
00550
00551 #define simdia_vmulf(a, b) (spu_mul((a), (b)))
00552 #define simdia_vmullf(a, b) (spu_mul((a), (b)))
00553
00554
00555 #define simdia_vdivf(a, b) (spu_mul((a), spu_re(b)))
00556 inline simdia_veclf simdia_vdivlf(const simdia_veclf a, const simdia_veclf b) { simdia_veclf r = { 0.0, 0.0 }; spu_insert((spu_extract(a, 0) / spu_extract(b, 0)), r, 0); spu_insert((spu_extract(a, 1) / spu_extract(b, 1)), r, 1); return r; }
00557
00558
00559 #define simdia_vmaddf(a, b, c) (spu_madd((a), (b), (c)))
00560 #define simdia_vmaddlf(a, b, c) (spu_madd((a), (b), (c)))
00561
00562
00563 #define simdia_vrecipf(a) (spu_re(a))
00564 inline simdia_veclf simdia_vreciplf(const simdia_veclf a, const simdia_veclf b) { simdia_veclf r = { 0.0, 0.0 }; spu_insert((1.0f / spu_extract(a, 0)), r, 0); spu_insert((1.0f / spu_extract(a, 1)), r, 1); return r; }
00565
00566
00567 #define simdia_vsqrtf(a) (spu_re(spu_rsqrte(a)))
00568 inline simdia_veclf simdia_vsqrtlf(const simdia_veclf a, const simdia_veclf b) { simdia_veclf r = { 0.0, 0.0 }; spu_insert(sqrt(spu_extract(a, 0)), r, 0); spu_insert(sqrt(spu_extract(a, 1)), r, 1); return r; }
00569
00570
00571 #define simdia_vrsqrtf(a) (spu_rsqrte(a))
00572 inline simdia_veclf simdia_vrsqrtlf(const simdia_veclf a, const simdia_veclf b) { simdia_veclf r = { 0.0, 0.0 }; spu_insert((1.0f / sqrt(spu_extract(a, 0))), r, 0); spu_insert((1.0f / sqrt(spu_extract(a, 1))), r, 1); return r; }
00573
00574
00575 #define simdia_vnoti(a) (spu_nor((a), (a)))
00576 #define simdia_vnotf(a) (spu_nor((a), (a)))
00577 #define simdia_vnotlf(a) (spu_nor((a), (a)))
00578
00579
00580 #define simdia_vori(a, b) (spu_or((a), (b)))
00581 #define simdia_vorf(a, b) (spu_or((a), (b)))
00582 #define simdia_vorlf(a, b) (spu_or((a), (b)))
00583
00584
00585 #define simdia_vnori(a, b) (spu_nor((a), (b)))
00586 #define simdia_vnorf(a, b) (spu_nor((a), (b)))
00587 #define simdia_vnorlf(a, b) (spu_nor((a), (b)))
00588
00589
00590 #define simdia_vandi(a, b) (spu_and((a), (b)))
00591 #define simdia_vandf(a, b) (spu_and((a), (b)))
00592 #define simdia_vandlf(a, b) (spu_and((a), (b)))
00593
00594
00595 #define simdia_vnandi(a, b) (spu_nand((a), (b)))
00596 #define simdia_vnandf(a, b) (spu_nand((a), (b)))
00597 #define simdia_vnandlf(a, b) (spu_nand((a), (b)))
00598
00599
00600 #define simdia_vxori(a, b) (spu_xor((a), (b)))
00601 #define simdia_vxorf(a, b) (spu_xor((a), (b)))
00602 #define simdia_vxorlf(a, b) (spu_xor((a), (b)))
00603
00604
00605 #define simdia_vnxori(a, b) ( simdia_vnoti( simdia_vxori((a), (b))))
00606 #define simdia_vnxorf(a, b) ( simdia_vnotf( simdia_vxorf((a), (b))))
00607 #define simdia_vnxorlf(a, b) (simdia_vnotlf(simdia_vxorlf((a), (b))))
00608
00609
00610 #define simdia_vcmpeqi(a, b) ((simdia_veci)(spu_cmpeq((a), (b))))
00611 #define simdia_vcmpeqf(a, b) ((simdia_veci)(spu_cmpeq((a), (b))))
00612 #define simdia_vcmpeqlf(a, b) ((simdia_veci)(spu_cmpeq((a), (b))))
00613
00614
00615 #define simdia_vcmpgti(a, b) ((simdia_veci)(spu_cmpgt((a), (b))))
00616 #define simdia_vcmpgtf(a, b) ((simdia_veci)(spu_cmpgt((a), (b))))
00617 #define simdia_vcmpgtlf(a, b) ((simdia_veci)(spu_cmpgt((a), (b))))
00618
00619
00620
00621
00622 #define simdia_vcmpgei(a, b) (spu_or( simdia_vcmpeqi((a), (b)), simdia_vcmpgti((a), (b))))
00623 #define simdia_vcmpgef(a, b) (spu_or( simdia_vcmpeqf((a), (b)), simdia_vcmpgtf((a), (b))))
00624 #define simdia_vcmpgelf(a, b) (spu_or(simdia_vcmpeqlf((a), (b)), simdia_vcmpgtlf((a), (b))))
00625
00626
00627 #define simdia_vcmplti(a, b) (spu_nor( simdia_vcmpgti((a), (b)), simdia_vcmpeqi((a), (b))))
00628 #define simdia_vcmpltf(a, b) (spu_nor( simdia_vcmpgtf((a), (b)), simdia_vcmpeqf((a), (b))))
00629 #define simdia_vcmpltlf(a, b) (spu_nor(simdia_vcmpgtlf((a), (b)), simdia_vcmpeqlf((a), (b))))
00630
00631
00632 #define simdia_vcmplei(a, b) (spu_nor( simdia_vcmpgti((a), (b)), simdia_const_vzeroi))
00633 #define simdia_vcmplef(a, b) (spu_nor( simdia_vcmpgtf((a), (b)), simdia_const_vzerof))
00634 #define simdia_vcmplelf(a, b) (spu_nor(simdia_vcmpgtlf((a), (b)), simdia_const_vzerolf))
00635
00636
00637
00638
00639
00640
00641
00642 #elif defined(__VEC__) && (!(SIMDIA_FORCE_NO_ALTIVEC))
00643
00644
00645 typedef vector signed int simdia_veci;
00646 typedef vector float simdia_vecf;
00647 #ifdef _ARCH_PWR7
00648
00656 typedef vector double simdia_veclf;
00657 #else
00658 typedef __simdia_veclf simdia_veclf;
00659 #endif
00660
00661
00662
00663
00664 #ifdef _ARCH_PWR7
00665
00666 #define simdia_vinserti(a, b, c) (vec_insert((b)), ((a)), ((c)))
00667 #define simdia_vinsertf(a, b, c) (vec_insert((b)), ((a)), ((c)))
00668 #define simdia_vinsertlf(a, b, c) (vec_insert((b)), ((a)), ((c)))
00669 #else
00670 inline simdia_veci simdia_vinserti( simdia_veci v, const int s, const int i) { simdia_veci r = v; int* rPtr = ( int*)(&r); rPtr[i] = s; return r; }
00671 inline simdia_vecf simdia_vinsertf( simdia_vecf v, const float s, const int i) { simdia_vecf r = v; float* rPtr = ( float*)(&r); rPtr[i] = s; return r; }
00672 #define simdia_vinsertlf __simdia_vinsertlf
00673 #endif
00674
00675
00676 #ifdef _ARCH_PWR7
00677 #define simdia_vextracti(a, b) (vec_extract((a), (b)))
00678 #define simdia_vextractf(a, b) (vec_extract((a), (b)))
00679 #define simdia_vextractlf(a, b) (vec_extract((a), (b)))
00680 #else
00681
00682 inline int simdia_vextracti( simdia_veci v, const int i) { int* vPtr = ( int*)(&v); return vPtr[i]; }
00683 inline float simdia_vextractf( simdia_vecf v, const int i) { float* vPtr = ( float*)(&v); return vPtr[i]; }
00684 #define simdia_vextractlf __simdia_vextractlf
00685 #endif
00686
00687
00688 #ifdef _ARCH_PWR7
00689 #define simdia_vseti(a) (vec_promote((a), 0))
00690 #define simdia_vsetf(a) (vec_promote((a), 0))
00691 #define simdia_vsetlf(a) (vec_promote((a), 0))
00692 #else
00693
00694
00695
00696
00697
00698
00699
00700
00701
00702 inline simdia_veci simdia_vseti(const int a) { __simdia_veci r; r.v0 = a; return vec_splat(*((simdia_veci*)(&r)), 0); }
00703 inline simdia_vecf simdia_vsetf(const float a) { __simdia_vecf r; r.v0 = a; return vec_splat(*((simdia_vecf*)(&r)), 0); }
00704 #define simdia_vsetlf __simdia_vsetlf
00705 #endif
00706
00707 inline vector unsigned char simdia_vset16uc(const unsigned char c) { vector unsigned char r __attribute__((aligned(16))); ((unsigned char*)(&r))[0] = c; return vec_splat(r, 0); }
00708
00709
00710 #define simdia_const_vzeroi (vec_splat_s32(0))
00711 #define simdia_const_vzerof (vec_ctf(vec_splat_s32(0), 0))
00712 #ifdef _ARCH_PWR7
00713 #define simdia_const_vzerolf (vec_splats(0))
00714 #else
00715 #define simdia_const_vzerolf (__simdia_const_vzerolf)
00716 #endif
00717
00718
00719 #define simdia_const_vonei (vec_splat_s32(1))
00720 #define simdia_const_vonef (vec_ctf(vec_splat_s32(1), 0))
00721 #ifdef _ARCH_PWR7
00722 #define simdia_const_vonelf (vec_splats(1))
00723 #else
00724 #define simdia_const_vonelf (__simdia_const_vonelf)
00725 #endif
00726
00727
00728 #define simdia_const_vtwoi (vec_splat_s32(2))
00729 #define simdia_const_vtwof (vec_ctf(vec_splat_s32(2), 0))
00730 #ifdef _ARCH_PWR7
00731 #define simdia_const_vtwolf (vec_splats(2))
00732 #else
00733 #define simdia_const_vtwolf (__simdia_const_vtwolf)
00734 #endif
00735
00736
00737 #define simdia_const_vnegonei (vec_splat_s32(-1))
00738 #define simdia_const_vnegonef (vec_ctf(vec_splat_s32(-1), 0))
00739 #ifdef _ARCH_PWR7
00740 #define simdia_const_vnegonelf (vec_splats(-1))
00741 #else
00742 #define simdia_const_vnegonelf (__const_veclf)
00743 #endif
00744
00745
00746 #define __simdia_vrotlbytes(a, s) (vec_or(vec_slo((a), simdia_vset16uc(((s) & 0xf) << 3)), vec_sro((a), simdia_set16uc((16 - ((s) & 0xf)) << 3))))
00747 #define __simdia_vrotrbytes(a, s) (vec_or(vec_sro((a), simdia_vset16uc(((s) & 0xf) << 3)), vec_slo((a), simdia_set16uc((16 - ((s) & 0xf)) << 3))))
00748 #define simdia_vrotli(a, s) __simdia_vrotlbytes((a), ((s) << 2))
00749 #define simdia_vrotlf(a, s) __simdia_vrotlbytes((a), ((s) << 2))
00750 #define simdia_vrotllf(a, s) __simdia_vrotlbytes((a), ((s) << 3))
00751 #define simdia_vrothi(a, s) __simdia_vrotrbytes((a), ((s) << 2))
00752 #define simdia_vrothf(a, s) __simdia_vrotrbytes((a), ((s) << 2))
00753 #define simdia_vrothlf(a, s) __simdia_vrotrbytes((a), ((s) << 3))
00754
00755
00756 #define simdia_vaddi(a, b) (vec_add((a), (b)))
00757 #define simdia_vaddf(a, b) (vec_add((a), (b)))
00758 #ifdef _ARCH_PWR7
00759 #define simdia_vaddlf(a, b) (vec_add((a), (b)))
00760 #else
00761 #define simdia_vaddlf __simdia_vaddlf
00762 #endif
00763
00764
00765 #define simdia_vsubi(a, b) (vec_sub((a), (b)))
00766 #define simdia_vsubf(a, b) (vec_sub((a), (b)))
00767 #ifdef _ARCH_PWR7
00768 #define simdia_vsublf(a, b) (vec_sub((a), (b)))
00769 #else
00770 #define simdia_vsublf __simdia_vsublf
00771 #endif
00772
00773
00774
00775 #ifdef _ARCH_PWR7
00776 #define simdia_vmulf(a, b) (vec_mul((a), (b)))
00777 #define simdia_vmullf(a, b) (vec_mul((a), (b)))
00778 #else
00779 #define simdia_vmulf(a, b) (vec_madd((a), (b), vec_xor((a), (a))))
00780 #define simdia_vmullf __simdia_vmullf
00781 #endif
00782
00783
00784 #ifdef _ARCH_PWR7
00785 #define simdia_vdivf(a, b) (vec_div((a)), ((b)))
00786 #define simdia_vdivlf(a, b) (vec_div((a)), ((b)))
00787 #else
00788 #define simdia_vdivf(a, b) (simdia_vmulf((a), vec_re(b)))
00789 #define simdia_vdivlf __simdia_vdivlf
00790 #endif
00791
00792
00793 #define simdia_vmaddf(a, b, c) (vec_madd((a), (b), (c)))
00794 #ifdef _ARCH_PWR7
00795 #define simdia_vmaddlf(a, b, c) (vec_madd((a), (b), (c)))
00796 #else
00797 #define simdia_vmaddlf __simdia_vmaddlf
00798 #endif
00799
00800
00801 #define simdia_vrecipf(a) (vec_re(a))
00802 #ifdef _ARCH_PWR7
00803 #define simdia_vreciplf(a) (vec_re(a))
00804 #else
00805 #define simdia_vreciplf __simdia_vreciplf
00806 #endif
00807
00808
00809 #define simdia_vsqrtf(a) (vec_re(vec_rsqrte(a)))
00810 #ifdef _ARCH_PWR7
00811 #define simdia_vsqrtlf(a) (vec_sqrt(a))
00812 #else
00813 #define simdia_vsqrtlf __simdia_vsqrtlf
00814 #endif
00815
00816
00817 #define simdia_vrsqrtf(a) (vec_rsqrte(a))
00818 #ifdef _ARCH_PWR7
00819 #define simdia_vrsqrtlf(a) (vec_rsqrte(a))
00820 #else
00821 #define simdia_vrsqrtlf __simdia_vrsqrtlf
00822 #endif
00823
00824
00825 #ifdef _ARCH_PWR7
00826 #define simdia_vnoti(a) (vec_neg(a))
00827 #define simdia_vnotf(a) (vec_neg(a))
00828 #define simdia_vnotlf(a) (vec_neg(a))
00829 #else
00830 #define simdia_vnoti(a) (vec_xor((a), simdia_const_vnegonei))
00831 #define simdia_vnotf(a) (vec_xor((a), simdia_const_vnegonei))
00832 #define simdia_vnotlf __simdia_vnotlf
00833 #endif
00834
00835
00836 #define simdia_vori(a, b) (vec_or((a), (b)))
00837 #define simdia_vorf(a, b) (vec_or((a), (b)))
00838 #ifdef _ARCH_PWR7
00839 #define simdia_vorlf(a, b) (vec_or((a), (b)))
00840 #else
00841 #define simdia_vorlf __simdia_vorlf
00842 #endif
00843
00844
00845 #define simdia_vnori(a, b) (vec_nor((a), (b)))
00846 #define simdia_vnorf(a, b) (vec_nor((a), (b)))
00847 #ifdef _ARCH_PWR7
00848 #define simdia_vnorlf(a, b) (vec_nor((a), (b)))
00849 #else
00850 #define simdia_vnorlf __simdia_vnorlf
00851 #endif
00852
00853
00854 #define simdia_vandi(a, b) (vec_and((a), (b)))
00855 #define simdia_vandf(a, b) (vec_and((a), (b)))
00856 #ifdef _ARCH_PWR7
00857 #define simdia_vandlf(a, b) (vec_and((a), (b)))
00858 #else
00859 #define simdia_vandlf __simdia_vandlf
00860 #endif
00861
00862
00863 #define simdia_vnandi(a, b) (simdia_vnoti(simdia_vandi((a), (b))))
00864 #define simdia_vnandf(a, b) (simdia_vnotf(simdia_vandf((a), (b))))
00865 #ifdef _ARCH_PWR7
00866 #define simdia_vnandlf(a, b) (simdia_vnotf(simdia_vandf((a), (b))))
00867 #else
00868 #define simdia_vnandlf __simdia_vnandlf
00869 #endif
00870
00871
00872 #define simdia_vxori(a, b) (vec_xor((a), (b)))
00873 #define simdia_vxorf(a, b) (vec_xor((a), (b)))
00874 #ifdef _ARCH_PWR7
00875 #define simdia_vxorlf(a, b) (vec_xor((a), (b)))
00876 #else
00877 #define simdia_vxorlf __simdia_vxorlf
00878 #endif
00879
00880
00881 #define simdia_vnxori(a, b) (simdia_vnoti(simdia_vxori((a), (b))))
00882 #define simdia_vnxorf(a, b) (simdia_vnotf(simdia_vxorf((a), (b))))
00883 #ifdef _ARCH_PWR7
00884 #define simdia_vnxorlf(a, b) (simdia_vnotlf(simdia_vxorf((a), (b))))
00885 #else
00886 #define simdia_vnxorlf __simdia_vnxorlf
00887 #endif
00888
00889
00890 #define simdia_vcmpeqi(a, b) ((simdia_veci)(vec_cmpeq((a), (b))))
00891 #define simdia_vcmpeqf(a, b) ((simdia_veci)(vec_cmpeq((a), (b))))
00892 #ifdef _ARCH_PWR7
00893 #define simdia_vcmpeqlf(a, b) ((simdia_veci)(vec_cmpeq((a), (b))))
00894 #else
00895 #define simdia_vcmpeqlf __simdia_vcmpeqlf
00896 #endif
00897
00898
00899 #define simdia_vcmpgti(a, b) ((simdia_veci)(vec_cmpgt((a), (b))))
00900 #define simdia_vcmpgtf(a, b) ((simdia_veci)(vec_cmpgt((a), (b))))
00901 #ifdef _ARCH_PWR7
00902 #define simdia_vcmpgtlf(a, b) ((simdia_veci)(vec_cmpgt((a), (b))))
00903 #else
00904 #define simdia_vcmpgtlf __simdia_vcmpgtlf
00905 #endif
00906
00907
00908 #define simdia_vcmpgei(a, b) ((simdia_veci)(vec_cmpge((a), (b))))
00909 #define simdia_vcmpgef(a, b) ((simdia_veci)(vec_cmpge((a), (b))))
00910 #ifdef _ARCH_PWR7
00911 #define simdia_vcmpgelf(a, b) ((simdia_veci)(vec_cmpge((a), (b))))
00912 #else
00913 #define simdia_vcmpgelf __simdia_vcmpgelf
00914 #endif
00915
00916
00917 #define simdia_vcmplti(a, b) ((simdia_veci)(vec_cmplt((a), (b))))
00918 #define simdia_vcmpltf(a, b) ((simdia_veci)(vec_cmplt((a), (b))))
00919 #ifdef _ARCH_PWR7
00920 #define simdia_vcmpltlf(a, b) ((simdia_veci)(vec_cmplt((a), (b))))
00921 #else
00922 #define simdia_vcmpltlf __simdia_vcmpltlf
00923 #endif
00924
00925
00926 #define simdia_vcmplei(a, b) ((simdia_veci)(vec_cmple((a), (b))))
00927 #define simdia_vcmplef(a, b) ((simdia_veci)(vec_cmple((a), (b))))
00928 #ifdef _ARCH_PWR7
00929 #define simdia_vcmplelf(a, b) ((simdia_veci)(vec_cmple((a), (b))))
00930
00931
00932 #else
00933 #define simdia_vcmplelf __simdia_vcmplelf
00934 #endif
00935
00936
00937
00938
00939
00940
00941
00942 #else
00943
00944
00945 typedef __simdia_veci simdia_veci;
00946 typedef __simdia_vecf simdia_vecf;
00947 typedef __simdia_veclf simdia_veclf;
00948
00949
00950 #define simdia_vinserti __simdia_vinserti
00951 #define simdia_vinsertf __simdia_vinsertf
00952 #define simdia_vinsertlf __simdia_vinsertlf
00953
00954
00955 #define simdia_vextracti __simdia_vextracti
00956 #define simdia_vextractf __simdia_vextractf
00957 #define simdia_vextractlf __simdia_vextractlf
00958
00959
00960 #define simdia_vseti __simdia_vseti
00961 #define simdia_vsetf __simdia_vsetf
00962 #define simdia_vsetlf __simdia_vsetlf
00963
00964
00965 #define simdia_const_vzeroi __simdia_const_vzeroi
00966 #define simdia_const_vzerof __simdia_const_vzerof
00967 #define simdia_const_vzerolf __simdia_const_vzerolf
00968
00969
00970 #define simdia_const_vonei __simdia_const_vonei
00971 #define simdia_const_vonef __simdia_const_vonef
00972 #define simdia_const_vonelf __simdia_const_vonelf
00973
00974
00975 #define simdia_const_vtwoi __simdia_const_vtwoi
00976 #define simdia_const_vtwof __simdia_const_vtwof
00977 #define simdia_const_vtwolf __simdia_const_vtwolf
00978
00979
00980 #define simdia_const_vnegonei __simdia_const_vnegonei
00981 #define simdia_const_vnegonef __simdia_const_vnegonef
00982 #define simdia_const_vnegonelf __simdia_const_vnegonelf
00983
00984
00985 #define simdia_vrothi __simdia_vrothi
00986 #define simdia_vrothf __simdia_vrothf
00987 #define simdia_vrothlf __simdia_vrothlf
00988 #define simdia_vrotli __simdia_vrotli
00989 #define simdia_vrotlf __simdia_vrotlf
00990 #define simdia_vrotllf __simdia_vrotllf
00991
00992
00993 #define simdia_vaddi __simdia_vaddi
00994 #define simdia_vaddf __simdia_vaddf
00995 #define simdia_vaddlf __simdia_vaddlf
00996
00997
00998 #define simdia_vsubi __simdia_vsubi
00999 #define simdia_vsubf __simdia_vsubf
01000 #define simdia_vsublf __simdia_vsublf
01001
01002
01003 #define simdia_vmulf __simdia_vmulf
01004 #define simdia_vmullf __simdia_vmullf
01005
01006
01007 #define simdia_vdivf __simdia_vdivf
01008 #define simdia_vdivlf __simdia_vdivlf
01009
01010
01011 #define simdia_vmaddf __simdia_vmaddf
01012 #define simdia_vmaddlf __simdia_vmaddlf
01013
01014
01015 #define simdia_vrecipf __simdia_vrecipf
01016 #define simdia_vreciplf __simdia_vreciplf
01017
01018
01019 #define simdia_vsqrtf __simdia_vsqrtf
01020 #define simdia_vsqrtlf __simdia_vsqrtlf
01021
01022
01023 #define simdia_vrsqrtf __simdia_vrsqrtf
01024 #define simdia_vrsqrtlf __simdia_vrsqrtlf
01025
01026
01027 #define simdia_vnoti __simdia_vnoti
01028 #define simdia_vnotf __simdia_vnotf
01029 #define simdia_vnotlf __simdia_vnotlf
01030
01031
01032 #define simdia_vori __simdia_vori
01033 #define simdia_vorf __simdia_vorf
01034 #define simdia_vorlf __simdia_vorlf
01035
01036
01037 #define simdia_vnori __simdia_vnori
01038 #define simdia_vnorf __simdia_vnorf
01039 #define simdia_vnorlf __simdia_vnorlf
01040
01041
01042 #define simdia_vandi __simdia_vandi
01043 #define simdia_vandf __simdia_vandf
01044 #define simdia_vandlf __simdia_vandlf
01045
01046
01047 #define simdia_vnandi __simdia_vnandi
01048 #define simdia_vnandf __simdia_vnandf
01049 #define simdia_vnandlf __simdia_vnandlf
01050
01051
01052 #define simdia_vxori __simdia_vxori
01053 #define simdia_vxorf __simdia_vxorf
01054 #define simdia_vxorlf __simdia_vxorlf
01055
01056
01057 #define simdia_vnxori __simdia_vnxori
01058 #define simdia_vnxorf __simdia_vnxorf
01059 #define simdia_vnxorlf __simdia_vnxorlf
01060
01061
01062 #define simdia_vcmpeqi __simdia_vcmpeqi
01063 #define simdia_vcmpeqf __simdia_vcmpeqf
01064 #define simdia_vcmpeqlf __simdia_vcmpeqlf
01065
01066
01067 #define simdia_vcmpgti __simdia_vcmpgti
01068 #define simdia_vcmpgtf __simdia_vcmpgtf
01069 #define simdia_vcmpgtlf __simdia_vcmpgtlf
01070
01071
01072 #define simdia_vcmpgei __simdia_vcmpgei
01073 #define simdia_vcmpgef __simdia_vcmpgef
01074 #define simdia_vcmpgelf __simdia_vcmpgelf
01075
01076
01077 #define simdia_vcmplti __simdia_vcmplti
01078 #define simdia_vcmpltf __simdia_vcmpltf
01079 #define simdia_vcmpltlf __simdia_vcmpltlf
01080
01081
01082 #define simdia_vcmplei __simdia_vcmplei
01083 #define simdia_vcmplef __simdia_vcmplef
01084 #define simdia_vcmplelf __simdia_vcmplelf
01085
01086
01087 #endif
01088
01089
01090
01091
01092
01093
01094
01095
01096
01097
01098
01099
01100
01101
01102
01103 #define simdia_veci_numElems (sizeof( simdia_veci)/sizeof( int))
01104 #define simdia_vecf_numElems (sizeof( simdia_vecf)/sizeof( float))
01105 #define simdia_veclf_numElems (sizeof(simdia_veclf)/sizeof(double))
01106
01107
01108 #define simdia_vspreadi(a) ( simdia_vseti(a))
01109 #define simdia_vspreadf(a) ( simdia_vsetf(a))
01110 #define simdia_vspreadlf(a) (simdia_vsetlf(a))
01111
01112 #define simdia_visfinitef(a) (isfinite(simdia_vextractf((a),0)) && isfinite(simdia_vextractf((a),1)) && isfinite(simdia_vextractf((a),2)) && isfinite(simdia_vextractf((a),3)))
01113 #define simdia_visfinitelf(a) (isfinite(simdia_vextractlf((a),0)) && isfinite(simdia_vextractlf((a),1)))
01114
01115
01116 #define simdia_vaddis(a, b) ( simdia_vaddi((a), simdia_vseti(b)))
01117 #define simdia_vaddfs(a, b) ( simdia_vaddf((a), simdia_vsetf(b)))
01118 #define simdia_vaddlfs(a, b) (simdia_vaddlf((a), simdia_vsetlf(b)))
01119
01120
01121 #define simdia_vsubis(a, b) ( simdia_vsubi((a), simdia_vseti(b)))
01122 #define simdia_vsubfs(a, b) ( simdia_vsubf((a), simdia_vsetf(b)))
01123 #define simdia_vsublfs(a, b) (simdia_vsublf((a), simdia_vsetlf(b)))
01124
01125
01126 #define simdia_vmulfs(a, b) ( simdia_vmulf((a), simdia_vsetf(b)))
01127 #define simdia_vmullfs(a, b) (simdia_vmullf((a), simdia_vsetlf(b)))
01128
01129
01130 #define simdia_vdivfs(a, b) ( simdia_vdivf((a), simdia_vsetf(b)))
01131 #define simdia_vdivlfs(a, b) (simdia_vdivlf((a), simdia_vsetlf(b)))
01132
01133
01134 #define simdia_vmaddfs(a, b, c) ( simdia_vmaddf((a), (b), simdia_vsetf(c)))
01135 #define simdia_vmaddlfs(a, b, c) (simdia_vmaddlf((a), (b), simdia_vsetlf(c)))
01136
01137
01138 #define simdia_vmaddfss(a, b, c) ( simdia_vmaddf((a), simdia_vsetf(b), simdia_vsetf(c)))
01139 #define simdia_vmaddlfss(a, b, c) (simdia_vmaddlf((a), simdia_vsetlf(b), simdia_vsetlf(c)))
01140
01141
01142 #endif //__SIMDIA_H__