00001 #ifndef __SIMDIA_H__
00002 #define __SIMDIA_H__
00003
00004
00005 #if defined(__SSE2__) && !defined(_CRAYC)
00006 #include "emmintrin.h"
00007 #endif
00008
00009 #if CMK_CELL_SPE != 0
00010 #include "spu_intrinsics.h"
00011 #else
00012 #include "math.h"
00013 #endif
00014
00015 #if defined(__VEC__)
00016 #include "altivec.h"
00017 #ifdef pixel
00018 #undef pixel
00019 #endif
00020 #ifdef bool
00021 #undef bool
00022 #endif
00023 #endif
00024
00025
00026
00027 #if !CMK_HAS_SQRTF
00028 #define sqrtf(a) ((float)(sqrt((double)(a))))
00029 #endif
00030
00031
00032
00033
00034 #define SIMDIA_FORCE_NO_SSE (0)
00035 #define SIMDIA_FORCE_NO_ALTIVEC (0)
00036 #define SIMDIA_FORCE_NO_SPE_SIMD (0)
00037
00038
00039
00040 #define SIMDIA_CONSTANT_PI (3.141592653589793)
00041 #define SIMDIA_CONSTANT_E (2.718281828459045)
00042 #define SIMDIA_CONSTANT_SQRT_2 (1.414213562373095)
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00064
00065
00066
00067
00068
00069
00070
00071 typedef struct __simdia_vec_i { int v0, v1, v2, v3; } __simdia_veci;
00072 typedef struct __simdia_vec_f { float v0, v1, v2, v3; } __simdia_vecf;
00073 typedef struct __simdia_vec_lf { double v0, v1; } __simdia_veclf;
00074
00075
00076
00077 inline __simdia_veci __simdia_vinserti( __simdia_veci v, const int s, const int i) { __simdia_veci r = v; int* rPtr = ( int*)(&r); rPtr[i] = s; return r; }
00078 inline __simdia_vecf __simdia_vinsertf( __simdia_vecf v, const float s, const int i) { __simdia_vecf r = v; float* rPtr = ( float*)(&r); rPtr[i] = s; return r; }
00079 inline __simdia_veclf __simdia_vinsertlf(__simdia_veclf v, const double s, const int i) { __simdia_veclf r = v; double* rPtr = (double*)(&r); rPtr[i] = s; return r; }
00080
00081
00082 inline int __simdia_vextracti( __simdia_veci v, const int i) { int* vPtr = ( int*)(&v); return vPtr[i]; }
00083 inline float __simdia_vextractf( __simdia_vecf v, const int i) { float* vPtr = ( float*)(&v); return vPtr[i]; }
00084 inline double __simdia_vextractlf(__simdia_veclf v, const int i) { double* vPtr = (double*)(&v); return vPtr[i]; }
00085
00086
00087 inline __simdia_veci __simdia_vseti(const int a) { __simdia_veci r; r.v0 = r.v1 = r.v2 = r.v3 = a; return r; }
00088 inline __simdia_vecf __simdia_vsetf(const float a) { __simdia_vecf r; r.v0 = r.v1 = r.v2 = r.v3 = a; return r; }
00089 inline __simdia_veclf __simdia_vsetlf(const double a) { __simdia_veclf r; r.v0 = r.v1 = a; return r; }
00090
00091
00092
00093
00094 const __simdia_veci __simdia_const_vzeroi = { 0 , 0 , 0 , 0 };
00095 const __simdia_vecf __simdia_const_vzerof = { 0.0f, 0.0f, 0.0f, 0.0f };
00096 const __simdia_veclf __simdia_const_vzerolf = { 0.0 , 0.0 };
00097
00098
00099 const __simdia_veci __simdia_const_vonei = { 1 , 1 , 1 , 1 };
00100 const __simdia_vecf __simdia_const_vonef = { 1.0f, 1.0f, 1.0f, 1.0f };
00101 const __simdia_veclf __simdia_const_vonelf = { 1.0 , 1.0 };
00102
00103
00104 const __simdia_veci __simdia_const_vtwoi = { 2 , 2 , 2 , 2 };
00105 const __simdia_vecf __simdia_const_vtwof = { 2.0f, 2.0f, 2.0f, 2.0f };
00106 const __simdia_veclf __simdia_const_vtwolf = { 2.0 , 2.0 };
00107
00108
00109 const __simdia_veci __simdia_const_vnegonei = { -1 , -1 , -1 , -1 };
00110 const __simdia_vecf __simdia_const_vnegonef = { -1.0f, -1.0f, -1.0f, -1.0f };
00111 const __simdia_veclf __simdia_const_vnegonelf = { -1.0 , -1.0 };
00112
00113
00114
00115
00116
00117
00118 inline __simdia_veci __simdia_vrothi(const __simdia_veci a, int s) { __simdia_veci b; int* a_ptr = ( int*)(&a); int* b_ptr = ( int*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0-s)&0x3]; b_ptr[1] = a_ptr[(1-s)&0x3]; b_ptr[2] = a_ptr[(2-s)&0x3]; b_ptr[3] = a_ptr[(3-s)&0x3]; return b; }
00119 inline __simdia_vecf __simdia_vrothf(const __simdia_vecf a, int s) { __simdia_vecf b; float* a_ptr = ( float*)(&a); float* b_ptr = ( float*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0-s)&0x3]; b_ptr[1] = a_ptr[(1-s)&0x3]; b_ptr[2] = a_ptr[(2-s)&0x3]; b_ptr[3] = a_ptr[(3-s)&0x3]; return b; }
00120 inline __simdia_veclf __simdia_vrothlf(const __simdia_veclf a, int s) { __simdia_veclf b; double* a_ptr = (double*)(&a); double* b_ptr = (double*)(&b); s &= 0x1; b_ptr[0] = a_ptr[(0-s)&0x1]; b_ptr[1] = a_ptr[(1-s)&0x1]; return b; }
00121 inline __simdia_veci __simdia_vrotli(const __simdia_veci a, int s) { __simdia_veci b; int* a_ptr = ( int*)(&a); int* b_ptr = ( int*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0+s)&0x3]; b_ptr[1] = a_ptr[(1+s)&0x3]; b_ptr[2] = a_ptr[(2+s)&0x3]; b_ptr[3] = a_ptr[(3+s)&0x3]; return b; }
00122 inline __simdia_vecf __simdia_vrotlf(const __simdia_vecf a, int s) { __simdia_vecf b; float* a_ptr = ( float*)(&a); float* b_ptr = ( float*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0+s)&0x3]; b_ptr[1] = a_ptr[(1+s)&0x3]; b_ptr[2] = a_ptr[(2+s)&0x3]; b_ptr[3] = a_ptr[(3+s)&0x3]; return b; }
00123 inline __simdia_veclf __simdia_vrotllf(const __simdia_veclf a, int s) { __simdia_veclf b; double* a_ptr = (double*)(&a); double* b_ptr = (double*)(&b); s &= 0x1; b_ptr[0] = a_ptr[(0+s)&0x1]; b_ptr[1] = a_ptr[(1+s)&0x1]; return b; }
00124
00125
00126 inline __simdia_veci __simdia_vaddi(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = a.v0 + b.v0; r.v1 = a.v1 + b.v1; r.v2 = a.v2 + b.v2; r.v3 = a.v3 + b.v3; return r; }
00127 inline __simdia_vecf __simdia_vaddf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; r.v0 = a.v0 + b.v0; r.v1 = a.v1 + b.v1; r.v2 = a.v2 + b.v2; r.v3 = a.v3 + b.v3; return r; }
00128 inline __simdia_veclf __simdia_vaddlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; r.v0 = a.v0 + b.v0; r.v1 = a.v1 + b.v1; return r; }
00129
00130
00131 inline __simdia_veci __simdia_vsubi(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = a.v0 - b.v0; r.v1 = a.v1 - b.v1; r.v2 = a.v2 - b.v2; r.v3 = a.v3 - b.v3; return r; }
00132 inline __simdia_vecf __simdia_vsubf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; r.v0 = a.v0 - b.v0; r.v1 = a.v1 - b.v1; r.v2 = a.v2 - b.v2; r.v3 = a.v3 - b.v3; return r; }
00133 inline __simdia_veclf __simdia_vsublf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; r.v0 = a.v0 - b.v0; r.v1 = a.v1 - b.v1; return r; }
00134
00135
00136 inline __simdia_veci __simdia_vmuli(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = a.v0 * b.v0; r.v1 = a.v1 * b.v1; r.v2 = a.v2 * b.v2; r.v3 = a.v3 * b.v3; return r; }
00137 inline __simdia_vecf __simdia_vmulf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; r.v0 = a.v0 * b.v0; r.v1 = a.v1 * b.v1; r.v2 = a.v2 * b.v2; r.v3 = a.v3 * b.v3; return r; }
00138 inline __simdia_veclf __simdia_vmullf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; r.v0 = a.v0 * b.v0; r.v1 = a.v1 * b.v1; return r; }
00139
00140
00141 inline __simdia_veci __simdia_vdivi(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = a.v0 / b.v0; r.v1 = a.v1 / b.v1; r.v2 = a.v2 / b.v2; r.v3 = a.v3 / b.v3; return r; }
00142 inline __simdia_vecf __simdia_vdivf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; r.v0 = a.v0 / b.v0; r.v1 = a.v1 / b.v1; r.v2 = a.v2 / b.v2; r.v3 = a.v3 / b.v3; return r; }
00143 inline __simdia_veclf __simdia_vdivlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; r.v0 = a.v0 / b.v0; r.v1 = a.v1 / b.v1; return r; }
00144
00145
00146 inline __simdia_veci __simdia_vmaddi(const __simdia_veci a, const __simdia_veci b, const __simdia_veci c) { __simdia_veci r; r.v0 = a.v0 * b.v0 + c.v0; r.v1 = a.v1 * b.v1 + c.v1; r.v2 = a.v2 * b.v2 + c.v2; r.v3 = a.v3 * b.v3 + c.v3; return r; }
00147 inline __simdia_vecf __simdia_vmaddf(const __simdia_vecf a, const __simdia_vecf b, const __simdia_vecf c) { __simdia_vecf r; r.v0 = a.v0 * b.v0 + c.v0; r.v1 = a.v1 * b.v1 + c.v1; r.v2 = a.v2 * b.v2 + c.v2; r.v3 = a.v3 * b.v3 + c.v3; return r; }
00148 inline __simdia_veclf __simdia_vmaddlf(const __simdia_veclf a, const __simdia_veclf b, const __simdia_veclf c) { __simdia_veclf r; r.v0 = a.v0 * b.v0 + c.v0; r.v1 = a.v1 * b.v1 + c.v1; return r; }
00149
00150
00151
00152 inline __simdia_vecf __simdia_vrecipf(const __simdia_vecf a) { __simdia_vecf r; r.v0 = 1.0f / a.v0; r.v1 = 1.0f / a.v1; r.v2 = 1.0f / a.v2; r.v3 = 1.0f / a.v3; return r; }
00153 inline __simdia_veclf __simdia_vreciplf(const __simdia_veclf a) { __simdia_veclf r; r.v0 = 1.0f / a.v0; r.v1 = 1.0f / a.v1; return r; }
00154
00155
00156 inline __simdia_vecf __simdia_vsqrtf(const __simdia_vecf a) { __simdia_vecf r; r.v0 = sqrtf(a.v0); r.v1 = sqrtf(a.v1); r.v2 = sqrtf(a.v2); r.v3 = sqrtf(a.v3); return r; }
00157 inline __simdia_veclf __simdia_vsqrtlf(const __simdia_veclf a) { __simdia_veclf r; r.v0 = sqrt(a.v0); r.v1 = sqrt(a.v1); return r; }
00158
00159
00160 inline __simdia_vecf __simdia_vrsqrtf(const __simdia_vecf a) { __simdia_vecf r; r.v0 = 1.0f / sqrtf(a.v0); r.v1 = 1.0f / sqrtf(a.v1); r.v2 = 1.0f / sqrtf(a.v2); r.v3 = 1.0f / sqrtf(a.v3); return r; }
00161 inline __simdia_veclf __simdia_vrsqrtlf(const __simdia_veclf a) { __simdia_veclf r; r.v0 = 1.0 / sqrt(a.v0); r.v1 = 1.0 / sqrt(a.v1); return r; }
00162
00163
00164 inline __simdia_veci __simdia_vnoti(const __simdia_veci a) { __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); rPtr[0] = aPtr[0] ^ -1; rPtr[1] = aPtr[1] ^ -1; rPtr[2] = aPtr[2] ^ -1; rPtr[3] = aPtr[3] ^ -1; return r; }
00165 inline __simdia_vecf __simdia_vnotf(const __simdia_vecf a) { __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); rPtr[0] = aPtr[0] ^ -1; rPtr[1] = aPtr[1] ^ -1; rPtr[2] = aPtr[2] ^ -1; rPtr[3] = aPtr[3] ^ -1; return r; }
00166 inline __simdia_veclf __simdia_vnotlf(const __simdia_veclf a) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); rPtr[0] = aPtr[0] ^ -1; rPtr[1] = aPtr[1] ^ -1; rPtr[2] = aPtr[2] ^ -1; rPtr[3] = aPtr[3] ^ -1; return r; }
00167
00168
00169 inline __simdia_veci __simdia_vori(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] | bPtr[0]; rPtr[1] = aPtr[1] | bPtr[1]; rPtr[2] = aPtr[2] | bPtr[2]; rPtr[3] = aPtr[3] | bPtr[3]; return r; }
00170 inline __simdia_vecf __simdia_vorf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] | bPtr[0]; rPtr[1] = aPtr[1] | bPtr[1]; rPtr[2] = aPtr[2] | bPtr[2]; rPtr[3] = aPtr[3] | bPtr[3]; return r; }
00171 inline __simdia_veclf __simdia_vorlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] | bPtr[0]; rPtr[1] = aPtr[1] | bPtr[1]; rPtr[2] = aPtr[2] | bPtr[2]; rPtr[3] = aPtr[3] | bPtr[3]; return r; }
00172
00173
00174 inline __simdia_veci __simdia_vnori(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] | bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] | bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] | bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] | bPtr[3]) ^ -1; return r; }
00175 inline __simdia_vecf __simdia_vnorf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] | bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] | bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] | bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] | bPtr[3]) ^ -1; return r; }
00176 inline __simdia_veclf __simdia_vnorlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] | bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] | bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] | bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] | bPtr[3]) ^ -1; return r; }
00177
00178
00179 inline __simdia_veci __simdia_vandi(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] & bPtr[0]; rPtr[1] = aPtr[1] & bPtr[1]; rPtr[2] = aPtr[2] & bPtr[2]; rPtr[3] = aPtr[3] & bPtr[3]; return r; }
00180 inline __simdia_vecf __simdia_vandf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] & bPtr[0]; rPtr[1] = aPtr[1] & bPtr[1]; rPtr[2] = aPtr[2] & bPtr[2]; rPtr[3] = aPtr[3] & bPtr[3]; return r; }
00181 inline __simdia_veclf __simdia_vandlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] & bPtr[0]; rPtr[1] = aPtr[1] & bPtr[1]; rPtr[2] = aPtr[2] & bPtr[2]; rPtr[3] = aPtr[3] & bPtr[3]; return r; }
00182
00183
00184 inline __simdia_veci __simdia_vnandi(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] & bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] & bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] & bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] & bPtr[3]) ^ -1; return r; }
00185 inline __simdia_vecf __simdia_vnandf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] & bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] & bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] & bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] & bPtr[3]) ^ -1; return r; }
00186 inline __simdia_veclf __simdia_vnandlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] & bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] & bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] & bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] & bPtr[3]) ^ -1; return r; }
00187
00188
00189 inline __simdia_veci __simdia_vxori(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] ^ bPtr[0]; rPtr[1] = aPtr[1] ^ bPtr[1]; rPtr[2] = aPtr[2] ^ bPtr[2]; rPtr[3] = aPtr[3] ^ bPtr[3]; return r; }
00190 inline __simdia_vecf __simdia_vxorf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] ^ bPtr[0]; rPtr[1] = aPtr[1] ^ bPtr[1]; rPtr[2] = aPtr[2] ^ bPtr[2]; rPtr[3] = aPtr[3] ^ bPtr[3]; return r; }
00191 inline __simdia_veclf __simdia_vxorlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = aPtr[0] ^ bPtr[0]; rPtr[1] = aPtr[1] ^ bPtr[1]; rPtr[2] = aPtr[2] ^ bPtr[2]; rPtr[3] = aPtr[3] ^ bPtr[3]; return r; }
00192
00193
00194 inline __simdia_veci __simdia_vnxori(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] ^ bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] ^ bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] ^ bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] ^ bPtr[3]) ^ -1; return r; }
00195 inline __simdia_vecf __simdia_vnxorf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_vecf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] ^ bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] ^ bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] ^ bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] ^ bPtr[3]) ^ -1; return r; }
00196 inline __simdia_veclf __simdia_vnxorlf(const __simdia_veclf a, const __simdia_veclf b) { __simdia_veclf r; int* rPtr = (int*)(&r); int* aPtr = (int*)(&a); int* bPtr = (int*)(&b); rPtr[0] = (aPtr[0] ^ bPtr[0]) ^ -1; rPtr[1] = (aPtr[1] ^ bPtr[1]) ^ -1; rPtr[2] = (aPtr[2] ^ bPtr[2]) ^ -1; rPtr[3] = (aPtr[3] ^ bPtr[3]) ^ -1; return r; }
00197
00198
00199
00200
00201 inline __simdia_veci __simdia_vcmpeqi(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = ((a.v0 == b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 == b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 == b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 == b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00202 inline __simdia_veci __simdia_vcmpeqf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = ((a.v0 == b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 == b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 == b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 == b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00203 inline __simdia_veci __simdia_vcmpeqlf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = r.v1 = ((a.v0 == b.v0) ? (0xFFFFFFFF) : (0x0)); r.v2 = r.v3 = ((a.v1 == b.v1) ? (0xFFFFFFFF) : (0x0)); return r; }
00204
00205
00206 inline __simdia_veci __simdia_vcmpgti(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = ((a.v0 > b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 > b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 > b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 > b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00207 inline __simdia_veci __simdia_vcmpgtf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = ((a.v0 > b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 > b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 > b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 > b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00208 inline __simdia_veci __simdia_vcmpgtlf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = r.v1 = ((a.v0 > b.v0) ? (0xFFFFFFFF) : (0x0)); r.v2 = r.v3 = ((a.v1 > b.v1) ? (0xFFFFFFFF) : (0x0)); return r; }
00209
00210
00211 inline __simdia_veci __simdia_vcmpgei(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = ((a.v0 >= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 >= b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 >= b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 >= b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00212 inline __simdia_veci __simdia_vcmpgef(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = ((a.v0 >= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 >= b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 >= b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 >= b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00213 inline __simdia_veci __simdia_vcmpgelf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = r.v1 = ((a.v0 >= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v2 = r.v3 = ((a.v1 >= b.v1) ? (0xFFFFFFFF) : (0x0)); return r; }
00214
00215
00216 inline __simdia_veci __simdia_vcmplti(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = ((a.v0 < b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 < b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 < b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 < b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00217 inline __simdia_veci __simdia_vcmpltf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = ((a.v0 < b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 < b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 < b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 < b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00218 inline __simdia_veci __simdia_vcmpltlf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = r.v1 = ((a.v0 < b.v0) ? (0xFFFFFFFF) : (0x0)); r.v2 = r.v3 = ((a.v1 < b.v1) ? (0xFFFFFFFF) : (0x0)); return r; }
00219
00220
00221 inline __simdia_veci __simdia_vcmplei(const __simdia_veci a, const __simdia_veci b) { __simdia_veci r; r.v0 = ((a.v0 <= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 <= b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 <= b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 <= b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00222 inline __simdia_veci __simdia_vcmplef(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = ((a.v0 <= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v1 = ((a.v1 <= b.v1) ? (0xFFFFFFFF) : (0x0)); r.v2 = ((a.v2 <= b.v2) ? (0xFFFFFFFF) : (0x0)); r.v3 = ((a.v3 <= b.v3) ? (0xFFFFFFFF) : (0x0)); return r; }
00223 inline __simdia_veci __simdia_vcmplelf(const __simdia_vecf a, const __simdia_vecf b) { __simdia_veci r; r.v0 = r.v1 = ((a.v0 <= b.v0) ? (0xFFFFFFFF) : (0x0)); r.v2 = r.v3 = ((a.v1 <= b.v1) ? (0xFFFFFFFF) : (0x0)); return r; }
00224
00225
00226
00227
00228
00229 #if defined(__cplusplus)
00230
00231
00232 inline __simdia_veci operator+(const __simdia_veci &a, const __simdia_veci &b) { return __simdia_vaddi(a, b); }
00233 inline __simdia_vecf operator+(const __simdia_vecf &a, const __simdia_vecf &b) { return __simdia_vaddf(a, b); }
00234 inline __simdia_veclf operator+(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vaddlf(a, b); }
00235 inline __simdia_veci operator+=( __simdia_veci &a, const __simdia_veci &b) { a = __simdia_vaddi(a, b); return a; }
00236 inline __simdia_vecf operator+=( __simdia_vecf &a, const __simdia_vecf &b) { a = __simdia_vaddf(a, b); return a; }
00237 inline __simdia_veclf operator+=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vaddlf(a, b); return a; }
00238
00239 inline __simdia_veci operator+(const __simdia_veci &a, const int &b) { return __simdia_vaddi(a, __simdia_vseti(b)); }
00240 inline __simdia_vecf operator+(const __simdia_vecf &a, const float &b) { return __simdia_vaddf(a, __simdia_vsetf(b)); }
00241 inline __simdia_veclf operator+(const __simdia_veclf &a, const double &b) { return __simdia_vaddlf(a, __simdia_vsetlf(b)); }
00242 inline __simdia_veci operator+=( __simdia_veci &a, const int &b) { a = __simdia_vaddi(a, __simdia_vseti(b)); return a; }
00243 inline __simdia_vecf operator+=( __simdia_vecf &a, const float &b) { a = __simdia_vaddf(a, __simdia_vsetf(b)); return a; }
00244 inline __simdia_veclf operator+=(__simdia_veclf &a, const double &b) { a = __simdia_vaddlf(a, __simdia_vsetlf(b)); return a; }
00245
00246
00247 inline __simdia_veci operator-(const __simdia_veci &a, const __simdia_veci &b) { return __simdia_vsubi(a, b); }
00248 inline __simdia_vecf operator-(const __simdia_vecf &a, const __simdia_vecf &b) { return __simdia_vsubf(a, b); }
00249 inline __simdia_veclf operator-(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vsublf(a, b); }
00250 inline __simdia_veci operator-=( __simdia_veci &a, const __simdia_veci &b) { a = __simdia_vsubi(a, b); return a; }
00251 inline __simdia_vecf operator-=( __simdia_vecf &a, const __simdia_vecf &b) { a = __simdia_vsubf(a, b); return a; }
00252 inline __simdia_veclf operator-=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vsublf(a, b); return a; }
00253
00254 inline __simdia_veci operator-(const __simdia_veci &a, const int &b) { return __simdia_vsubi(a, __simdia_vseti(b)); }
00255 inline __simdia_vecf operator-(const __simdia_vecf &a, const float &b) { return __simdia_vsubf(a, __simdia_vsetf(b)); }
00256 inline __simdia_veclf operator-(const __simdia_veclf &a, const double &b) { return __simdia_vsublf(a, __simdia_vsetlf(b)); }
00257 inline __simdia_veci operator-=( __simdia_veci &a, const int &b) { a = __simdia_vsubi(a, __simdia_vseti(b)); return a; }
00258 inline __simdia_vecf operator-=( __simdia_vecf &a, const float &b) { a = __simdia_vsubf(a, __simdia_vsetf(b)); return a; }
00259 inline __simdia_veclf operator-=(__simdia_veclf &a, const double &b) { a = __simdia_vsublf(a, __simdia_vsetlf(b)); return a; }
00260
00261
00262 inline __simdia_vecf operator*(const __simdia_vecf &a, const __simdia_vecf &b) { return __simdia_vmulf(a, b); }
00263 inline __simdia_veclf operator*(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vmullf(a, b); }
00264 inline __simdia_vecf operator*=( __simdia_vecf &a, const __simdia_vecf &b) { a = __simdia_vmulf(a, b); return a; }
00265 inline __simdia_veclf operator*=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vmullf(a, b); return a; }
00266
00267 inline __simdia_vecf operator*(const __simdia_vecf &a, const float &b) { return __simdia_vmulf(a, __simdia_vsetf(b)); }
00268 inline __simdia_veclf operator*(const __simdia_veclf &a, const double &b) { return __simdia_vmullf(a, __simdia_vsetlf(b)); }
00269 inline __simdia_vecf operator*=( __simdia_vecf &a, const float &b) { a = __simdia_vmulf(a, __simdia_vsetf(b)); return a; }
00270 inline __simdia_veclf operator*=(__simdia_veclf &a, const double &b) { a = __simdia_vmullf(a, __simdia_vsetlf(b)); return a; }
00271
00272
00273 inline __simdia_vecf operator/(const __simdia_vecf &a, const __simdia_vecf &b) { return __simdia_vdivf(a, b); }
00274 inline __simdia_veclf operator/(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vdivlf(a, b); }
00275 inline __simdia_vecf operator/=( __simdia_vecf &a, const __simdia_vecf &b) { a = __simdia_vdivf(a, b); return a; }
00276 inline __simdia_veclf operator/=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vdivlf(a, b); return a; }
00277
00278 inline __simdia_vecf operator/(const __simdia_vecf &a, const float &b) { return __simdia_vdivf(a, __simdia_vsetf(b)); }
00279 inline __simdia_veclf operator/(const __simdia_veclf &a, const double &b) { return __simdia_vdivlf(a, __simdia_vsetlf(b)); }
00280 inline __simdia_vecf operator/=( __simdia_vecf &a, const float &b) { a = __simdia_vdivf(a, __simdia_vsetf(b)); return a; }
00281 inline __simdia_veclf operator/=(__simdia_veclf &a, const double &b) { a = __simdia_vdivlf(a, __simdia_vsetlf(b)); return a; }
00282
00283
00284 inline __simdia_veci operator|(const __simdia_veci &a, const __simdia_veci &b) { return __simdia_vori(a, b); }
00285 inline __simdia_vecf operator|(const __simdia_vecf &a, const __simdia_vecf &b) { return __simdia_vorf(a, b); }
00286 inline __simdia_veclf operator|(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vorlf(a, b); }
00287 inline __simdia_veci operator|=( __simdia_veci &a, const __simdia_veci &b) { a = __simdia_vori(a, b); return a; }
00288 inline __simdia_vecf operator|=( __simdia_vecf &a, const __simdia_vecf &b) { a = __simdia_vorf(a, b); return a; }
00289 inline __simdia_veclf operator|=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vorlf(a, b); return a; }
00290
00291 inline __simdia_veci operator|(const __simdia_veci &a, const int &b) { return __simdia_vori(a, __simdia_vseti(b)); }
00292 inline __simdia_vecf operator|(const __simdia_vecf &a, const float &b) { return __simdia_vorf(a, __simdia_vsetf(b)); }
00293 inline __simdia_veclf operator|(const __simdia_veclf &a, const double &b) { return __simdia_vorlf(a, __simdia_vsetlf(b)); }
00294 inline __simdia_veci operator|=( __simdia_veci &a, const int &b) { a = __simdia_vori(a, __simdia_vseti(b)); return a; }
00295 inline __simdia_vecf operator|=( __simdia_vecf &a, const float &b) { a = __simdia_vorf(a, __simdia_vsetf(b)); return a; }
00296 inline __simdia_veclf operator|=(__simdia_veclf &a, const double &b) { a = __simdia_vorlf(a, __simdia_vsetlf(b)); return a; }
00297
00298
00299 inline __simdia_veci operator&(const __simdia_veci &a, const __simdia_veci &b) { return __simdia_vandi(a, b); }
00300 inline __simdia_vecf operator&(const __simdia_vecf &a, const __simdia_vecf &b) { return __simdia_vandf(a, b); }
00301 inline __simdia_veclf operator&(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vandlf(a, b); }
00302 inline __simdia_veci operator&=( __simdia_veci &a, const __simdia_veci &b) { a = __simdia_vandi(a, b); return a; }
00303 inline __simdia_vecf operator&=( __simdia_vecf &a, const __simdia_vecf &b) { a = __simdia_vandf(a, b); return a; }
00304 inline __simdia_veclf operator&=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vandlf(a, b); return a; }
00305
00306 inline __simdia_veci operator&(const __simdia_veci &a, const int &b) { return __simdia_vandi(a, __simdia_vseti(b)); }
00307 inline __simdia_vecf operator&(const __simdia_vecf &a, const float &b) { return __simdia_vandf(a, __simdia_vsetf(b)); }
00308 inline __simdia_veclf operator&(const __simdia_veclf &a, const double &b) { return __simdia_vandlf(a, __simdia_vsetlf(b)); }
00309 inline __simdia_veci operator&=( __simdia_veci &a, const int &b) { a = __simdia_vandi(a, __simdia_vseti(b)); return a; }
00310 inline __simdia_vecf operator&=( __simdia_vecf &a, const float &b) { a = __simdia_vandf(a, __simdia_vsetf(b)); return a; }
00311 inline __simdia_veclf operator&=(__simdia_veclf &a, const double &b) { a = __simdia_vandlf(a, __simdia_vsetlf(b)); return a; }
00312
00313
00314 inline __simdia_veci operator^(const __simdia_veci &a, const __simdia_veci &b) { return __simdia_vxori(a, b); }
00315 inline __simdia_vecf operator^(const __simdia_vecf &a, const __simdia_vecf &b) { return __simdia_vxorf(a, b); }
00316 inline __simdia_veclf operator^(const __simdia_veclf &a, const __simdia_veclf &b) { return __simdia_vxorlf(a, b); }
00317 inline __simdia_veci operator^=( __simdia_veci &a, const __simdia_veci &b) { a = __simdia_vxori(a, b); return a; }
00318 inline __simdia_vecf operator^=( __simdia_vecf &a, const __simdia_vecf &b) { a = __simdia_vxorf(a, b); return a; }
00319 inline __simdia_veclf operator^=(__simdia_veclf &a, const __simdia_veclf &b) { a = __simdia_vxorlf(a, b); return a; }
00320
00321 inline __simdia_veci operator^(const __simdia_veci &a, const int &b) { return __simdia_vxori(a, __simdia_vseti(b)); }
00322 inline __simdia_vecf operator^(const __simdia_vecf &a, const float &b) { return __simdia_vxorf(a, __simdia_vsetf(b)); }
00323 inline __simdia_veclf operator^(const __simdia_veclf &a, const double &b) { return __simdia_vxorlf(a, __simdia_vsetlf(b)); }
00324 inline __simdia_veci operator^=( __simdia_veci &a, const int &b) { a = __simdia_vxori(a, __simdia_vseti(b)); return a; }
00325 inline __simdia_vecf operator^=( __simdia_vecf &a, const float &b) { a = __simdia_vxorf(a, __simdia_vsetf(b)); return a; }
00326 inline __simdia_veclf operator^=(__simdia_veclf &a, const double &b) { a = __simdia_vxorlf(a, __simdia_vsetlf(b)); return a; }
00327
00328 #endif
00329
00333
00334
00335
00336
00337
00338 #if defined(__SSE2__) && (!(SIMDIA_FORCE_NO_SSE)) && !defined(_CRAYC)
00339
00340
00341
00342
00343
00344
00345 typedef __m128i simdia_veci;
00346 typedef __m128 simdia_vecf;
00347 typedef __m128d simdia_veclf;
00348
00349
00350
00351 inline simdia_veci simdia_vinserti( simdia_veci v, const int s, const int i) { simdia_veci r = v; int* rPtr = ( int*)(&r); rPtr[i] = s; return r; }
00352 inline simdia_vecf simdia_vinsertf( simdia_vecf v, const float s, const int i) { simdia_vecf r = v; float* rPtr = ( float*)(&r); rPtr[i] = s; return r; }
00353 inline simdia_veclf simdia_vinsertlf(simdia_veclf v, const double s, const int i) { simdia_veclf r = v; double* rPtr = (double*)(&r); rPtr[i] = s; return r; }
00354
00355
00356
00357 inline int vextracti( simdia_veci v, const int i) { return (( int*)(&v))[i]; }
00358 inline float vextractf( simdia_vecf v, const int i) { return (( float*)(&v))[i]; }
00359 inline double vextractlf(simdia_veclf v, const int i) { return ((double*)(&v))[i]; }
00360
00361
00362 #define simdia_vseti(a) (_mm_set1_epi32((int)(a)))
00363 #define simdia_vsetf(a) (_mm_set1_ps((float)(a)))
00364 #define simdia_vsetlf(a) (_mm_set1_pd((double)(a)))
00365
00366
00367 #define simdia_const_vzeroi (_mm_setzero_si128())
00368 #define simdia_const_vzerof (_mm_setzero_ps())
00369 #define simdia_const_vzerolf (_mm_setzero_pd())
00370
00371
00372 #define simdia_const_vonei (simdia_vseti(1))
00373 #define simdia_const_vonef (simdia_vsetf(1.0f))
00374 #define simdia_const_vonelf (simdia_vsetlf(1.0))
00375
00376
00377 #define simdia_const_vtwoi (simdia_vseti(2))
00378 #define simdia_const_vtwof (simdia_vsetf(2.0f))
00379 #define simdia_const_vtwolf (simdia_vsetlf(2.0))
00380
00381
00382 #define simdia_const_vnegonei (simdia_vseti(-1))
00383 #define simdia_const_vnegonef (simdia_vsetf(-1.0f))
00384 #define simdia_const_vnegonelf (simdia_vsetlf(-1.0))
00385
00386
00387
00388 inline simdia_veci simdia_vrothi(const simdia_veci &a, int s) { simdia_veci b; int* a_ptr = ( int*)(&a); int* b_ptr = ( int*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0-s)&0x3]; b_ptr[1] = a_ptr[(1-s)&0x3]; b_ptr[2] = a_ptr[(2-s)&0x3]; b_ptr[3] = a_ptr[(3-s)&0x3]; return b; }
00389 inline simdia_vecf simdia_vrothf(const simdia_vecf &a, int s) { simdia_vecf b; float* a_ptr = ( float*)(&a); float* b_ptr = ( float*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0-s)&0x3]; b_ptr[1] = a_ptr[(1-s)&0x3]; b_ptr[2] = a_ptr[(2-s)&0x3]; b_ptr[3] = a_ptr[(3-s)&0x3]; return b; }
00390 inline simdia_veclf simdia_vrothlf(const simdia_veclf &a, int s) { simdia_veclf b; double* a_ptr = (double*)(&a); double* b_ptr = (double*)(&b); s &= 0x1; b_ptr[0] = a_ptr[(0-s)&0x1]; b_ptr[1] = a_ptr[(1-s)&0x1]; return b; }
00391 inline simdia_veci simdia_vrotli(const simdia_veci &a, int s) { simdia_veci b; int* a_ptr = ( int*)(&a); int* b_ptr = ( int*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0+s)&0x3]; b_ptr[1] = a_ptr[(1+s)&0x3]; b_ptr[2] = a_ptr[(2+s)&0x3]; b_ptr[3] = a_ptr[(3+s)&0x3]; return b; }
00392 inline simdia_vecf simdia_vrotlf(const simdia_vecf &a, int s) { simdia_vecf b; float* a_ptr = ( float*)(&a); float* b_ptr = ( float*)(&b); s &= 0x3; b_ptr[0] = a_ptr[(0+s)&0x3]; b_ptr[1] = a_ptr[(1+s)&0x3]; b_ptr[2] = a_ptr[(2+s)&0x3]; b_ptr[3] = a_ptr[(3+s)&0x3]; return b; }
00393 inline simdia_veclf simdia_vrotllf(const simdia_veclf &a, int s) { simdia_veclf b; double* a_ptr = (double*)(&a); double* b_ptr = (double*)(&b); s &= 0x1; b_ptr[0] = a_ptr[(0+s)&0x1]; b_ptr[1] = a_ptr[(1+s)&0x1]; return b; }
00394
00395
00396 #define simdia_vaddi(a, b) (_mm_add_epi32((a), (b)))
00397 #define simdia_vaddf(a, b) (_mm_add_ps((a), (b)))
00398 #define simdia_vaddlf(a, b) (_mm_add_pd((a), (b)))
00399
00400
00401 #define simdia_vsubi(a, b) (_mm_sub_epi32((a), (b)))
00402 #define simdia_vsubf(a, b) (_mm_sub_ps((a), (b)))
00403 #define simdia_vsublf(a, b) (_mm_sub_pd((a), (b)))
00404
00405
00406 #define simdia_vmulf(a, b) (_mm_mul_ps((a), (b)))
00407 #define simdia_vmullf(a, b) (_mm_mul_pd((a), (b)))
00408
00409
00410 #define simdia_vdivf(a, b) (_mm_div_ps((a), (b)))
00411 #define simdia_vdivlf(a, b) (_mm_div_pd((a), (b)))
00412
00413
00414 #define simdia_vmaddf(a, b, c) ( vaddf( vmulf((a), (b)), (c)))
00415 #define simdia_vmaddlf(a, b, c) (vaddlf(vmullf((a), (b)), (c)))
00416
00417
00418 #define simdia_vrecipf(a) (_mm_rcp_ps(a))
00419 inline simdia_veclf simdia_vreciplf(const simdia_veclf a) { simdia_veclf r; double* a_ptr = (double*)(&a); double* r_ptr = (double*)(&r); r_ptr[0] = 1.0f / a_ptr[0]; r_ptr[1] = 1.0f / a_ptr[1]; return r; }
00420
00421
00422 #define simdia_vsqrtf(a) (_mm_sqrt_ps(a))
00423 #define simdia_vsqrtlf(a) (_mm_sqrt_pd(a))
00424
00425
00426 #define simdia_vrsqrtf(a) (_mm_rsqrt_ps(a))
00427 #define simdia_vrsqrtlf(a) (vreciplf(vsqrtlf(a)))
00428
00429
00430 #define simdia_vnoti(a) (_mm_xor_si128((a), simdia_const_vnegonei))
00431 #define simdia_vnotf(a) (_mm_xor_ps((a), simdia_const_vnegonei))
00432 #define simdia_vnotlf(a) (_mm_xor_pd((a), simdia_const_vnegonei))
00433
00434
00435 #define simdia_vori(a, b) (_mm_or_si128((a), (b)))
00436 #define simdia_vorf(a, b) (_mm_or_ps((a), (b)))
00437 #define simdia_vorlf(a, b) (_mm_or_pd((a), (b)))
00438
00439
00440 #define simdia_vnori(a, b) ( simdia_vnoti( simdia_vori((a), (b))))
00441 #define simdia_vnorf(a, b) ( simdia_vnotf( simdia_vorf((a), (b))))
00442 #define simdia_vnorlf(a, b) (simdia_vnotlf(simdia_vorlf((a), (b))))
00443
00444
00445 #define simdia_vandi(a, b) (_mm_and_si128((a), (b)))
00446 #define simdia_vandf(a, b) (_mm_and_ps((a), (b)))
00447 #define simdia_vandlf(a, b) (_mm_and_pd((a), (b)))
00448
00449
00450 #define simdia_vnandi(a, b) ( simdia_vnoti( simdia_vandi((a), (b))))
00451 #define simdia_vnandf(a, b) ( simdia_vnotf( simdia_vandf((a), (b))))
00452 #define simdia_vnandlf(a, b) (simdia_vnotlf(simdia_vandlf((a), (b))))
00453
00454
00455 #define simdia_vxori(a, b) (_mm_xor_si128((a), (b)))
00456 #define simdia_vxorf(a, b) (_mm_xor_ps((a), (b)))
00457 #define simdia_vxorlf(a, b) (_mm_xor_pd((a), (b)))
00458
00459
00460 #define simdia_vnxori(a, b) ( simdia_vnoti( simdia_vxori((a), (b))))
00461 #define simdia_vnxorf(a, b) ( simdia_vnotf( simdia_vxorf((a), (b))))
00462 #define simdia_vnxorlf(a, b) (simdia_vnotlf(simdia_vxorlf((a), (b))))
00463
00464
00465 #define simdia_vcmpeqi(a, b) ((simdia_veci)(_mm_cmpeq_epi32((a), (b))))
00466 #define simdia_vcmpeqf(a, b) ((simdia_veci)(_mm_cmpeq_ps((a), (b))))
00467 #define simdia_vcmpeqlf(a, b) ((simdia_veci)(_mm_cmpeq_pd((a), (b))))
00468
00469
00470 #define simdia_vcmpgti(a, b) ((simdia_veci)(_mm_cmpgt_epi32((a), (b))))
00471 #define simdia_vcmpgtf(a, b) ((simdia_veci)(_mm_cmpgt_ps((a), (b))))
00472 #define simdia_vcmpgtlf(a, b) ((simdia_veci)(_mm_cmpgt_pd((a), (b))))
00473
00474
00475 #define simdia_vcmpgei(a, b) ((simdia_veci)(_mm_cmpge_epi32((a), (b))))
00476 #define simdia_vcmpgef(a, b) ((simdia_veci)(_mm_cmpge_ps((a), (b))))
00477 #define simdia_vcmpgelf(a, b) ((simdia_veci)(_mm_cmpge_pd((a), (b))))
00478
00479
00480 #define simdia_vcmplti(a, b) ((simdia_veci)(_mm_cmplt_epi32((a), (b))))
00481 #define simdia_vcmpltf(a, b) ((simdia_veci)(_mm_cmplt_ps((a), (b))))
00482 #define simdia_vcmpltlf(a, b) ((simdia_veci)(_mm_cmplt_pd((a), (b))))
00483
00484
00485 #define simdia_vcmplei(a, b) ((simdia_veci)(_mm_cmple_epi32((a), (b))))
00486 #define simdia_vcmplef(a, b) ((simdia_veci)(_mm_cmple_ps((a), (b))))
00487 #define simdia_vcmplelf(a, b) ((simdia_veci)(_mm_cmple_pd((a), (b))))
00488
00489
00490
00491
00492
00493
00494
00495
00496 #elif (CMK_CELL_SPE != 0) && (!(SIMDIA_FORCE_NO_SPE_SIMD))
00497
00498
00499 typedef vector signed int simdia_veci;
00500 typedef vector float simdia_vecf;
00501 typedef vector double simdia_veclf;
00502
00503
00504 #define simdia_vinserti(v, s, i) (spu_insert((s), (v), (i)))
00505 #define simdia_vinsertf(v, s, i) (spu_insert((s), (v), (i)))
00506 #define simdia_vinsertlf(v, s, i) (spu_insert((s), (v), (i)))
00507
00508
00509 #define simdia_vextracti(v, i) (spu_extract((v), (i)))
00510 #define simdia_vextractf(v, i) (spu_extract((v), (i)))
00511 #define simdia_vextractlf(v, i) (spu_extract((v), (i)))
00512
00513
00514 #define simdia_vseti(a) (spu_splats((int)(a)))
00515 #define simdia_vsetf(a) (spu_splats((float)(a)))
00516 #define simdia_vsetlf(a) (spu_splats((double)(a)))
00517
00518
00519 #define simdia_const_vzeroi (vseti(0))
00520 #define simdia_const_vzerof (vsetf(0.0f))
00521 #define simdia_const_vzerolf (vsetlf(0.0))
00522
00523
00524 #define simdia_const_vonei (vseti(1))
00525 #define simdia_const_vonef (vsetf(1.0f))
00526 #define simdia_const_vonelf (vsetlf(1.0))
00527
00528
00529 #define simdia_const_vtwoi (vseti(2))
00530 #define simdia_const_vtwof (vsetf(2.0f))
00531 #define simdia_const_vtwolf (vsetlf(2.0))
00532
00533
00534 #define simdia_const_vnegonei (vseti(-1))
00535 #define simdia_const_vnegonef (vsetf(-1.0f))
00536 #define simdia_const_vnegonelf (vsetlf(-1.0))
00537
00538
00539 #define simdia_vrothi(a, s) (spu_rlqwbyte((a), (0x10-(((s)&0x3)<<2)) ))
00540 #define simdia_vrothf(a, s) (spu_rlqwbyte((a), (0x10-(((s)&0x3)<<2)) ))
00541 #define simdia_vrothlf(a, s) (spu_rlqwbyte((a), (((s)&0x1)<<3) ))
00542 #define simdia_vrotli(a, s) (spu_rlqwbyte((a), ((s)&0x3)<<2))
00543 #define simdia_vrotlf(a, s) (spu_rlqwbyte((a), ((s)&0x3)<<2))
00544 #define simdia_vrotllf(a, s) (spu_rlqwbyte((a), ((s)&0x1)<<3))
00545
00546
00547 #define simdia_vaddi(a, b) (spu_add((a), (b)))
00548 #define simdia_vaddf(a, b) (spu_add((a), (b)))
00549 #define simdia_vaddlf(a, b) (spu_add((a), (b)))
00550
00551
00552 #define simdia_vsubi(a, b) (spu_sub((a), (b)))
00553 #define simdia_vsubf(a, b) (spu_sub((a), (b)))
00554 #define simdia_vsublf(a, b) (spu_sub((a), (b)))
00555
00556
00557 #define simdia_vmulf(a, b) (spu_mul((a), (b)))
00558 #define simdia_vmullf(a, b) (spu_mul((a), (b)))
00559
00560
00561 #define simdia_vdivf(a, b) (spu_mul((a), spu_re(b)))
00562 inline simdia_veclf simdia_vdivlf(const simdia_veclf a, const simdia_veclf b) { simdia_veclf r = { 0.0, 0.0 }; spu_insert((spu_extract(a, 0) / spu_extract(b, 0)), r, 0); spu_insert((spu_extract(a, 1) / spu_extract(b, 1)), r, 1); return r; }
00563
00564
00565 #define simdia_vmaddf(a, b, c) (spu_madd((a), (b), (c)))
00566 #define simdia_vmaddlf(a, b, c) (spu_madd((a), (b), (c)))
00567
00568
00569 #define simdia_vrecipf(a) (spu_re(a))
00570 inline simdia_veclf simdia_vreciplf(const simdia_veclf a, const simdia_veclf b) { simdia_veclf r = { 0.0, 0.0 }; spu_insert((1.0f / spu_extract(a, 0)), r, 0); spu_insert((1.0f / spu_extract(a, 1)), r, 1); return r; }
00571
00572
00573 #define simdia_vsqrtf(a) (spu_re(spu_rsqrte(a)))
00574 inline simdia_veclf simdia_vsqrtlf(const simdia_veclf a, const simdia_veclf b) { simdia_veclf r = { 0.0, 0.0 }; spu_insert(sqrt(spu_extract(a, 0)), r, 0); spu_insert(sqrt(spu_extract(a, 1)), r, 1); return r; }
00575
00576
00577 #define simdia_vrsqrtf(a) (spu_rsqrte(a))
00578 inline simdia_veclf simdia_vrsqrtlf(const simdia_veclf a, const simdia_veclf b) { simdia_veclf r = { 0.0, 0.0 }; spu_insert((1.0f / sqrt(spu_extract(a, 0))), r, 0); spu_insert((1.0f / sqrt(spu_extract(a, 1))), r, 1); return r; }
00579
00580
00581 #define simdia_vnoti(a) (spu_nor((a), (a)))
00582 #define simdia_vnotf(a) (spu_nor((a), (a)))
00583 #define simdia_vnotlf(a) (spu_nor((a), (a)))
00584
00585
00586 #define simdia_vori(a, b) (spu_or((a), (b)))
00587 #define simdia_vorf(a, b) (spu_or((a), (b)))
00588 #define simdia_vorlf(a, b) (spu_or((a), (b)))
00589
00590
00591 #define simdia_vnori(a, b) (spu_nor((a), (b)))
00592 #define simdia_vnorf(a, b) (spu_nor((a), (b)))
00593 #define simdia_vnorlf(a, b) (spu_nor((a), (b)))
00594
00595
00596 #define simdia_vandi(a, b) (spu_and((a), (b)))
00597 #define simdia_vandf(a, b) (spu_and((a), (b)))
00598 #define simdia_vandlf(a, b) (spu_and((a), (b)))
00599
00600
00601 #define simdia_vnandi(a, b) (spu_nand((a), (b)))
00602 #define simdia_vnandf(a, b) (spu_nand((a), (b)))
00603 #define simdia_vnandlf(a, b) (spu_nand((a), (b)))
00604
00605
00606 #define simdia_vxori(a, b) (spu_xor((a), (b)))
00607 #define simdia_vxorf(a, b) (spu_xor((a), (b)))
00608 #define simdia_vxorlf(a, b) (spu_xor((a), (b)))
00609
00610
00611 #define simdia_vnxori(a, b) ( simdia_vnoti( simdia_vxori((a), (b))))
00612 #define simdia_vnxorf(a, b) ( simdia_vnotf( simdia_vxorf((a), (b))))
00613 #define simdia_vnxorlf(a, b) (simdia_vnotlf(simdia_vxorlf((a), (b))))
00614
00615
00616 #define simdia_vcmpeqi(a, b) ((simdia_veci)(spu_cmpeq((a), (b))))
00617 #define simdia_vcmpeqf(a, b) ((simdia_veci)(spu_cmpeq((a), (b))))
00618 #define simdia_vcmpeqlf(a, b) ((simdia_veci)(spu_cmpeq((a), (b))))
00619
00620
00621 #define simdia_vcmpgti(a, b) ((simdia_veci)(spu_cmpgt((a), (b))))
00622 #define simdia_vcmpgtf(a, b) ((simdia_veci)(spu_cmpgt((a), (b))))
00623 #define simdia_vcmpgtlf(a, b) ((simdia_veci)(spu_cmpgt((a), (b))))
00624
00625
00626
00627
00628 #define simdia_vcmpgei(a, b) (spu_or( simdia_vcmpeqi((a), (b)), simdia_vcmpgti((a), (b))))
00629 #define simdia_vcmpgef(a, b) (spu_or( simdia_vcmpeqf((a), (b)), simdia_vcmpgtf((a), (b))))
00630 #define simdia_vcmpgelf(a, b) (spu_or(simdia_vcmpeqlf((a), (b)), simdia_vcmpgtlf((a), (b))))
00631
00632
00633 #define simdia_vcmplti(a, b) (spu_nor( simdia_vcmpgti((a), (b)), simdia_vcmpeqi((a), (b))))
00634 #define simdia_vcmpltf(a, b) (spu_nor( simdia_vcmpgtf((a), (b)), simdia_vcmpeqf((a), (b))))
00635 #define simdia_vcmpltlf(a, b) (spu_nor(simdia_vcmpgtlf((a), (b)), simdia_vcmpeqlf((a), (b))))
00636
00637
00638 #define simdia_vcmplei(a, b) (spu_nor( simdia_vcmpgti((a), (b)), simdia_const_vzeroi))
00639 #define simdia_vcmplef(a, b) (spu_nor( simdia_vcmpgtf((a), (b)), simdia_const_vzerof))
00640 #define simdia_vcmplelf(a, b) (spu_nor(simdia_vcmpgtlf((a), (b)), simdia_const_vzerolf))
00641
00642
00643
00644
00645
00646
00647
00648 #elif defined(__VEC__) && (!(SIMDIA_FORCE_NO_ALTIVEC))
00649
00650
00651 typedef vector signed int simdia_veci;
00652 typedef vector float simdia_vecf;
00653 #ifdef _ARCH_PWR7
00654
00662 typedef vector double simdia_veclf;
00663 #else
00664 typedef __simdia_veclf simdia_veclf;
00665 #endif
00666
00667
00668
00669
00670 #ifdef _ARCH_PWR7
00671
00672 #define simdia_vinserti(a, b, c) (vec_insert((b)), ((a)), ((c)))
00673 #define simdia_vinsertf(a, b, c) (vec_insert((b)), ((a)), ((c)))
00674 #define simdia_vinsertlf(a, b, c) (vec_insert((b)), ((a)), ((c)))
00675 #else
00676 inline simdia_veci simdia_vinserti( simdia_veci v, const int s, const int i) { simdia_veci r = v; int* rPtr = ( int*)(&r); rPtr[i] = s; return r; }
00677 inline simdia_vecf simdia_vinsertf( simdia_vecf v, const float s, const int i) { simdia_vecf r = v; float* rPtr = ( float*)(&r); rPtr[i] = s; return r; }
00678 #define simdia_vinsertlf __simdia_vinsertlf
00679 #endif
00680
00681
00682 #ifdef _ARCH_PWR7
00683 #define simdia_vextracti(a, b) (vec_extract((a), (b)))
00684 #define simdia_vextractf(a, b) (vec_extract((a), (b)))
00685 #define simdia_vextractlf(a, b) (vec_extract((a), (b)))
00686 #else
00687
00688 inline int simdia_vextracti( simdia_veci v, const int i) { int* vPtr = ( int*)(&v); return vPtr[i]; }
00689 inline float simdia_vextractf( simdia_vecf v, const int i) { float* vPtr = ( float*)(&v); return vPtr[i]; }
00690 #define simdia_vextractlf __simdia_vextractlf
00691 #endif
00692
00693
00694 #ifdef _ARCH_PWR7
00695 #define simdia_vseti(a) (vec_promote((a), 0))
00696 #define simdia_vsetf(a) (vec_promote((a), 0))
00697 #define simdia_vsetlf(a) (vec_promote((a), 0))
00698 #else
00699
00700
00701
00702
00703
00704
00705
00706
00707
00708 inline simdia_veci simdia_vseti(const int a) { __simdia_veci r; r.v0 = a; return vec_splat(*((simdia_veci*)(&r)), 0); }
00709 inline simdia_vecf simdia_vsetf(const float a) { __simdia_vecf r; r.v0 = a; return vec_splat(*((simdia_vecf*)(&r)), 0); }
00710 #define simdia_vsetlf __simdia_vsetlf
00711 #endif
00712
00713 inline vector unsigned char simdia_vset16uc(const unsigned char c) { vector unsigned char r __attribute__((aligned(16))); ((unsigned char*)(&r))[0] = c; return vec_splat(r, 0); }
00714
00715
00716 #define simdia_const_vzeroi (vec_splat_s32(0))
00717 #define simdia_const_vzerof (vec_ctf(vec_splat_s32(0), 0))
00718 #ifdef _ARCH_PWR7
00719 #define simdia_const_vzerolf (vec_splats(0))
00720 #else
00721 #define simdia_const_vzerolf (__simdia_const_vzerolf)
00722 #endif
00723
00724
00725 #define simdia_const_vonei (vec_splat_s32(1))
00726 #define simdia_const_vonef (vec_ctf(vec_splat_s32(1), 0))
00727 #ifdef _ARCH_PWR7
00728 #define simdia_const_vonelf (vec_splats(1))
00729 #else
00730 #define simdia_const_vonelf (__simdia_const_vonelf)
00731 #endif
00732
00733
00734 #define simdia_const_vtwoi (vec_splat_s32(2))
00735 #define simdia_const_vtwof (vec_ctf(vec_splat_s32(2), 0))
00736 #ifdef _ARCH_PWR7
00737 #define simdia_const_vtwolf (vec_splats(2))
00738 #else
00739 #define simdia_const_vtwolf (__simdia_const_vtwolf)
00740 #endif
00741
00742
00743 #define simdia_const_vnegonei (vec_splat_s32(-1))
00744 #define simdia_const_vnegonef (vec_ctf(vec_splat_s32(-1), 0))
00745 #ifdef _ARCH_PWR7
00746 #define simdia_const_vnegonelf (vec_splats(-1))
00747 #else
00748 #define simdia_const_vnegonelf (__const_veclf)
00749 #endif
00750
00751
00752 #define __simdia_vrotlbytes(a, s) (vec_or(vec_slo((a), simdia_vset16uc(((s) & 0xf) << 3)), vec_sro((a), simdia_set16uc((16 - ((s) & 0xf)) << 3))))
00753 #define __simdia_vrotrbytes(a, s) (vec_or(vec_sro((a), simdia_vset16uc(((s) & 0xf) << 3)), vec_slo((a), simdia_set16uc((16 - ((s) & 0xf)) << 3))))
00754 #define simdia_vrotli(a, s) __simdia_vrotlbytes((a), ((s) << 2))
00755 #define simdia_vrotlf(a, s) __simdia_vrotlbytes((a), ((s) << 2))
00756 #define simdia_vrotllf(a, s) __simdia_vrotlbytes((a), ((s) << 3))
00757 #define simdia_vrothi(a, s) __simdia_vrotrbytes((a), ((s) << 2))
00758 #define simdia_vrothf(a, s) __simdia_vrotrbytes((a), ((s) << 2))
00759 #define simdia_vrothlf(a, s) __simdia_vrotrbytes((a), ((s) << 3))
00760
00761
00762 #define simdia_vaddi(a, b) (vec_add((a), (b)))
00763 #define simdia_vaddf(a, b) (vec_add((a), (b)))
00764 #ifdef _ARCH_PWR7
00765 #define simdia_vaddlf(a, b) (vec_add((a), (b)))
00766 #else
00767 #define simdia_vaddlf __simdia_vaddlf
00768 #endif
00769
00770
00771 #define simdia_vsubi(a, b) (vec_sub((a), (b)))
00772 #define simdia_vsubf(a, b) (vec_sub((a), (b)))
00773 #ifdef _ARCH_PWR7
00774 #define simdia_vsublf(a, b) (vec_sub((a), (b)))
00775 #else
00776 #define simdia_vsublf __simdia_vsublf
00777 #endif
00778
00779
00780
00781 #ifdef _ARCH_PWR7
00782 #define simdia_vmulf(a, b) (vec_mul((a), (b)))
00783 #define simdia_vmullf(a, b) (vec_mul((a), (b)))
00784 #else
00785 #define simdia_vmulf(a, b) (vec_madd((a), (b), vec_xor((a), (a))))
00786 #define simdia_vmullf __simdia_vmullf
00787 #endif
00788
00789
00790 #ifdef _ARCH_PWR7
00791 #define simdia_vdivf(a, b) (vec_div((a)), ((b)))
00792 #define simdia_vdivlf(a, b) (vec_div((a)), ((b)))
00793 #else
00794 #define simdia_vdivf(a, b) (simdia_vmulf((a), vec_re(b)))
00795 #define simdia_vdivlf __simdia_vdivlf
00796 #endif
00797
00798
00799 #define simdia_vmaddf(a, b, c) (vec_madd((a), (b), (c)))
00800 #ifdef _ARCH_PWR7
00801 #define simdia_vmaddlf(a, b, c) (vec_madd((a), (b), (c)))
00802 #else
00803 #define simdia_vmaddlf __simdia_vmaddlf
00804 #endif
00805
00806
00807 #define simdia_vrecipf(a) (vec_re(a))
00808 #ifdef _ARCH_PWR7
00809 #define simdia_vreciplf(a) (vec_re(a))
00810 #else
00811 #define simdia_vreciplf __simdia_vreciplf
00812 #endif
00813
00814
00815 #define simdia_vsqrtf(a) (vec_re(vec_rsqrte(a)))
00816 #ifdef _ARCH_PWR7
00817 #define simdia_vsqrtlf(a) (vec_sqrt(a))
00818 #else
00819 #define simdia_vsqrtlf __simdia_vsqrtlf
00820 #endif
00821
00822
00823 #define simdia_vrsqrtf(a) (vec_rsqrte(a))
00824 #ifdef _ARCH_PWR7
00825 #define simdia_vrsqrtlf(a) (vec_rsqrte(a))
00826 #else
00827 #define simdia_vrsqrtlf __simdia_vrsqrtlf
00828 #endif
00829
00830
00831 #ifdef _ARCH_PWR7
00832 #define simdia_vnoti(a) (vec_neg(a))
00833 #define simdia_vnotf(a) (vec_neg(a))
00834 #define simdia_vnotlf(a) (vec_neg(a))
00835 #else
00836 #define simdia_vnoti(a) (vec_xor((a), simdia_const_vnegonei))
00837 #define simdia_vnotf(a) (vec_xor((a), simdia_const_vnegonei))
00838 #define simdia_vnotlf __simdia_vnotlf
00839 #endif
00840
00841
00842 #define simdia_vori(a, b) (vec_or((a), (b)))
00843 #define simdia_vorf(a, b) (vec_or((a), (b)))
00844 #ifdef _ARCH_PWR7
00845 #define simdia_vorlf(a, b) (vec_or((a), (b)))
00846 #else
00847 #define simdia_vorlf __simdia_vorlf
00848 #endif
00849
00850
00851 #define simdia_vnori(a, b) (vec_nor((a), (b)))
00852 #define simdia_vnorf(a, b) (vec_nor((a), (b)))
00853 #ifdef _ARCH_PWR7
00854 #define simdia_vnorlf(a, b) (vec_nor((a), (b)))
00855 #else
00856 #define simdia_vnorlf __simdia_vnorlf
00857 #endif
00858
00859
00860 #define simdia_vandi(a, b) (vec_and((a), (b)))
00861 #define simdia_vandf(a, b) (vec_and((a), (b)))
00862 #ifdef _ARCH_PWR7
00863 #define simdia_vandlf(a, b) (vec_and((a), (b)))
00864 #else
00865 #define simdia_vandlf __simdia_vandlf
00866 #endif
00867
00868
00869 #define simdia_vnandi(a, b) (simdia_vnoti(simdia_vandi((a), (b))))
00870 #define simdia_vnandf(a, b) (simdia_vnotf(simdia_vandf((a), (b))))
00871 #ifdef _ARCH_PWR7
00872 #define simdia_vnandlf(a, b) (simdia_vnotf(simdia_vandf((a), (b))))
00873 #else
00874 #define simdia_vnandlf __simdia_vnandlf
00875 #endif
00876
00877
00878 #define simdia_vxori(a, b) (vec_xor((a), (b)))
00879 #define simdia_vxorf(a, b) (vec_xor((a), (b)))
00880 #ifdef _ARCH_PWR7
00881 #define simdia_vxorlf(a, b) (vec_xor((a), (b)))
00882 #else
00883 #define simdia_vxorlf __simdia_vxorlf
00884 #endif
00885
00886
00887 #define simdia_vnxori(a, b) (simdia_vnoti(simdia_vxori((a), (b))))
00888 #define simdia_vnxorf(a, b) (simdia_vnotf(simdia_vxorf((a), (b))))
00889 #ifdef _ARCH_PWR7
00890 #define simdia_vnxorlf(a, b) (simdia_vnotlf(simdia_vxorf((a), (b))))
00891 #else
00892 #define simdia_vnxorlf __simdia_vnxorlf
00893 #endif
00894
00895
00896 #define simdia_vcmpeqi(a, b) ((simdia_veci)(vec_cmpeq((a), (b))))
00897 #define simdia_vcmpeqf(a, b) ((simdia_veci)(vec_cmpeq((a), (b))))
00898 #ifdef _ARCH_PWR7
00899 #define simdia_vcmpeqlf(a, b) ((simdia_veci)(vec_cmpeq((a), (b))))
00900 #else
00901 #define simdia_vcmpeqlf __simdia_vcmpeqlf
00902 #endif
00903
00904
00905 #define simdia_vcmpgti(a, b) ((simdia_veci)(vec_cmpgt((a), (b))))
00906 #define simdia_vcmpgtf(a, b) ((simdia_veci)(vec_cmpgt((a), (b))))
00907 #ifdef _ARCH_PWR7
00908 #define simdia_vcmpgtlf(a, b) ((simdia_veci)(vec_cmpgt((a), (b))))
00909 #else
00910 #define simdia_vcmpgtlf __simdia_vcmpgtlf
00911 #endif
00912
00913
00914 #define simdia_vcmpgei(a, b) ((simdia_veci)(vec_cmpge((a), (b))))
00915 #define simdia_vcmpgef(a, b) ((simdia_veci)(vec_cmpge((a), (b))))
00916 #ifdef _ARCH_PWR7
00917 #define simdia_vcmpgelf(a, b) ((simdia_veci)(vec_cmpge((a), (b))))
00918 #else
00919 #define simdia_vcmpgelf __simdia_vcmpgelf
00920 #endif
00921
00922
00923 #define simdia_vcmplti(a, b) ((simdia_veci)(vec_cmplt((a), (b))))
00924 #define simdia_vcmpltf(a, b) ((simdia_veci)(vec_cmplt((a), (b))))
00925 #ifdef _ARCH_PWR7
00926 #define simdia_vcmpltlf(a, b) ((simdia_veci)(vec_cmplt((a), (b))))
00927 #else
00928 #define simdia_vcmpltlf __simdia_vcmpltlf
00929 #endif
00930
00931
00932 #define simdia_vcmplei(a, b) ((simdia_veci)(vec_cmple((a), (b))))
00933 #define simdia_vcmplef(a, b) ((simdia_veci)(vec_cmple((a), (b))))
00934 #ifdef _ARCH_PWR7
00935 #define simdia_vcmplelf(a, b) ((simdia_veci)(vec_cmple((a), (b))))
00936
00937
00938 #else
00939 #define simdia_vcmplelf __simdia_vcmplelf
00940 #endif
00941
00942
00943
00944
00945
00946
00947 #else
00948
00949
00950 typedef __simdia_veci simdia_veci;
00951 typedef __simdia_vecf simdia_vecf;
00952 typedef __simdia_veclf simdia_veclf;
00953
00954
00955 #define simdia_vinserti __simdia_vinserti
00956 #define simdia_vinsertf __simdia_vinsertf
00957 #define simdia_vinsertlf __simdia_vinsertlf
00958
00959
00960 #define simdia_vextracti __simdia_vextracti
00961 #define simdia_vextractf __simdia_vextractf
00962 #define simdia_vextractlf __simdia_vextractlf
00963
00964
00965 #define simdia_vseti __simdia_vseti
00966 #define simdia_vsetf __simdia_vsetf
00967 #define simdia_vsetlf __simdia_vsetlf
00968
00969
00970 #define simdia_const_vzeroi __simdia_const_vzeroi
00971 #define simdia_const_vzerof __simdia_const_vzerof
00972 #define simdia_const_vzerolf __simdia_const_vzerolf
00973
00974
00975 #define simdia_const_vonei __simdia_const_vonei
00976 #define simdia_const_vonef __simdia_const_vonef
00977 #define simdia_const_vonelf __simdia_const_vonelf
00978
00979
00980 #define simdia_const_vtwoi __simdia_const_vtwoi
00981 #define simdia_const_vtwof __simdia_const_vtwof
00982 #define simdia_const_vtwolf __simdia_const_vtwolf
00983
00984
00985 #define simdia_const_vnegonei __simdia_const_vnegonei
00986 #define simdia_const_vnegonef __simdia_const_vnegonef
00987 #define simdia_const_vnegonelf __simdia_const_vnegonelf
00988
00989
00990 #define simdia_vrothi __simdia_vrothi
00991 #define simdia_vrothf __simdia_vrothf
00992 #define simdia_vrothlf __simdia_vrothlf
00993 #define simdia_vrotli __simdia_vrotli
00994 #define simdia_vrotlf __simdia_vrotlf
00995 #define simdia_vrotllf __simdia_vrotllf
00996
00997
00998 #define simdia_vaddi __simdia_vaddi
00999 #define simdia_vaddf __simdia_vaddf
01000 #define simdia_vaddlf __simdia_vaddlf
01001
01002
01003 #define simdia_vsubi __simdia_vsubi
01004 #define simdia_vsubf __simdia_vsubf
01005 #define simdia_vsublf __simdia_vsublf
01006
01007
01008 #define simdia_vmulf __simdia_vmulf
01009 #define simdia_vmullf __simdia_vmullf
01010
01011
01012 #define simdia_vdivf __simdia_vdivf
01013 #define simdia_vdivlf __simdia_vdivlf
01014
01015
01016 #define simdia_vmaddf __simdia_vmaddf
01017 #define simdia_vmaddlf __simdia_vmaddlf
01018
01019
01020 #define simdia_vrecipf __simdia_vrecipf
01021 #define simdia_vreciplf __simdia_vreciplf
01022
01023
01024 #define simdia_vsqrtf __simdia_vsqrtf
01025 #define simdia_vsqrtlf __simdia_vsqrtlf
01026
01027
01028 #define simdia_vrsqrtf __simdia_vrsqrtf
01029 #define simdia_vrsqrtlf __simdia_vrsqrtlf
01030
01031
01032 #define simdia_vnoti __simdia_vnoti
01033 #define simdia_vnotf __simdia_vnotf
01034 #define simdia_vnotlf __simdia_vnotlf
01035
01036
01037 #define simdia_vori __simdia_vori
01038 #define simdia_vorf __simdia_vorf
01039 #define simdia_vorlf __simdia_vorlf
01040
01041
01042 #define simdia_vnori __simdia_vnori
01043 #define simdia_vnorf __simdia_vnorf
01044 #define simdia_vnorlf __simdia_vnorlf
01045
01046
01047 #define simdia_vandi __simdia_vandi
01048 #define simdia_vandf __simdia_vandf
01049 #define simdia_vandlf __simdia_vandlf
01050
01051
01052 #define simdia_vnandi __simdia_vnandi
01053 #define simdia_vnandf __simdia_vnandf
01054 #define simdia_vnandlf __simdia_vnandlf
01055
01056
01057 #define simdia_vxori __simdia_vxori
01058 #define simdia_vxorf __simdia_vxorf
01059 #define simdia_vxorlf __simdia_vxorlf
01060
01061
01062 #define simdia_vnxori __simdia_vnxori
01063 #define simdia_vnxorf __simdia_vnxorf
01064 #define simdia_vnxorlf __simdia_vnxorlf
01065
01066
01067 #define simdia_vcmpeqi __simdia_vcmpeqi
01068 #define simdia_vcmpeqf __simdia_vcmpeqf
01069 #define simdia_vcmpeqlf __simdia_vcmpeqlf
01070
01071
01072 #define simdia_vcmpgti __simdia_vcmpgti
01073 #define simdia_vcmpgtf __simdia_vcmpgtf
01074 #define simdia_vcmpgtlf __simdia_vcmpgtlf
01075
01076
01077 #define simdia_vcmpgei __simdia_vcmpgei
01078 #define simdia_vcmpgef __simdia_vcmpgef
01079 #define simdia_vcmpgelf __simdia_vcmpgelf
01080
01081
01082 #define simdia_vcmplti __simdia_vcmplti
01083 #define simdia_vcmpltf __simdia_vcmpltf
01084 #define simdia_vcmpltlf __simdia_vcmpltlf
01085
01086
01087 #define simdia_vcmplei __simdia_vcmplei
01088 #define simdia_vcmplef __simdia_vcmplef
01089 #define simdia_vcmplelf __simdia_vcmplelf
01090
01091
01092 #endif
01093
01094
01095
01096
01097
01098
01099
01100
01101
01102
01103
01104
01105
01106
01107
01108 #define simdia_veci_numElems (sizeof( simdia_veci)/sizeof( int))
01109 #define simdia_vecf_numElems (sizeof( simdia_vecf)/sizeof( float))
01110 #define simdia_veclf_numElems (sizeof(simdia_veclf)/sizeof(double))
01111
01112
01113 #define simdia_vspreadi(a) ( simdia_vseti(a))
01114 #define simdia_vspreadf(a) ( simdia_vsetf(a))
01115 #define simdia_vspreadlf(a) (simdia_vsetlf(a))
01116
01117 #define simdia_visfinitef(a) (isfinite(simdia_vextractf((a),0)) && isfinite(simdia_vextractf((a),1)) && isfinite(simdia_vextractf((a),2)) && isfinite(simdia_vextractf((a),3)))
01118 #define simdia_visfinitelf(a) (isfinite(simdia_vextractlf((a),0)) && isfinite(simdia_vextractlf((a),1)))
01119
01120
01121 #define simdia_vaddis(a, b) ( simdia_vaddi((a), simdia_vseti(b)))
01122 #define simdia_vaddfs(a, b) ( simdia_vaddf((a), simdia_vsetf(b)))
01123 #define simdia_vaddlfs(a, b) (simdia_vaddlf((a), simdia_vsetlf(b)))
01124
01125
01126 #define simdia_vsubis(a, b) ( simdia_vsubi((a), simdia_vseti(b)))
01127 #define simdia_vsubfs(a, b) ( simdia_vsubf((a), simdia_vsetf(b)))
01128 #define simdia_vsublfs(a, b) (simdia_vsublf((a), simdia_vsetlf(b)))
01129
01130
01131 #define simdia_vmulfs(a, b) ( simdia_vmulf((a), simdia_vsetf(b)))
01132 #define simdia_vmullfs(a, b) (simdia_vmullf((a), simdia_vsetlf(b)))
01133
01134
01135 #define simdia_vdivfs(a, b) ( simdia_vdivf((a), simdia_vsetf(b)))
01136 #define simdia_vdivlfs(a, b) (simdia_vdivlf((a), simdia_vsetlf(b)))
01137
01138
01139 #define simdia_vmaddfs(a, b, c) ( simdia_vmaddf((a), (b), simdia_vsetf(c)))
01140 #define simdia_vmaddlfs(a, b, c) (simdia_vmaddlf((a), (b), simdia_vsetlf(c)))
01141
01142
01143 #define simdia_vmaddfss(a, b, c) ( simdia_vmaddf((a), simdia_vsetf(b), simdia_vsetf(c)))
01144 #define simdia_vmaddlfss(a, b, c) (simdia_vmaddlf((a), simdia_vsetlf(b), simdia_vsetlf(c)))
01145
01146 #if defined(__VEC__)
01147 #ifdef vector
01148 #undef vector
01149 #endif
01150 #endif
01151
01152 #endif //__SIMDIA_H__