00001 #ifndef __SSE_DOUBLE_H__
00002 #define __SSE_DOUBLE_H__
00003
00004 #if CMK_USE_AVX && defined(__AVX__)
00005
00006 #include <x86intrin.h>
00007
00008 #include<iostream>
00009
00010
00011 class SSEDouble
00012 {
00013
00014 public: __m256d val;
00015
00016
00017 public:
00018
00019 SSEDouble() {}
00020
00021 SSEDouble(double d) { val = _mm256_set1_pd(d); }
00022
00023 SSEDouble(double d0, double d1, double d2, double d3) { val = _mm256_setr_pd(d0,d1,d2,d3); }
00024
00025
00026 friend inline SSEDouble operator -(const SSEDouble &a) {SSEDouble c;c.val=_mm256_sub_pd(_mm256_setzero_pd(),a.val);return c;}
00027
00028 friend inline SSEDouble operator +(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_add_pd(a.val,b.val);return c;}
00029
00030 friend inline SSEDouble operator -(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_sub_pd(a.val,b.val);return c;}
00031
00032 friend inline SSEDouble operator *(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_mul_pd(a.val,b.val);return c;}
00033
00034 friend inline SSEDouble operator /(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_div_pd(a.val,b.val);return c;}
00035
00036 friend inline SSEDouble sqrt (const SSEDouble &a) { SSEDouble c;c.val= _mm256_sqrt_pd(a.val);return c;}
00037
00038
00039 friend inline SSEDouble operator +(double a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_add_pd(_mm256_set1_pd(a),b.val);return c;}
00040
00041
00042 friend inline SSEDouble operator -(double a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_sub_pd(_mm256_set1_pd(a),b.val);return c;}
00043
00044 friend inline SSEDouble operator *(double a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_mul_pd(_mm256_set1_pd(a),b.val);return c;}
00045
00046 friend inline SSEDouble operator /(double a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_div_pd(_mm256_set1_pd(a),b.val);return c;}
00047
00048 inline SSEDouble& operator +=(const SSEDouble &a) {val= _mm256_add_pd(val,a.val);return *this;}
00049
00050 inline SSEDouble& operator -=(const SSEDouble &a) {val= _mm256_sub_pd(val,a.val);return *this;}
00051
00052 inline SSEDouble& operator *=(const SSEDouble &a) {val= _mm256_mul_pd(val,a.val);return *this;}
00053
00054 inline SSEDouble& operator /=(const SSEDouble &a) {val= _mm256_div_pd(val,a.val);return *this;}
00055
00056
00057
00058 friend inline SSEDouble operator &(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_and_pd(a.val,b.val);return c;}
00059
00060 friend inline SSEDouble operator |(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_or_pd(a.val,b.val);return c;}
00061
00062 friend inline SSEDouble operator ^(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_xor_pd(a.val,b.val);return c;}
00063
00064 friend inline SSEDouble andnot (const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_andnot_pd(a.val,b.val);return c;}
00065
00066
00067
00068
00069 friend inline SSEDouble operator <(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_cmp_pd(a.val,b.val,_CMP_LT_OS);return c;}
00070
00071
00072 friend inline SSEDouble operator >(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_cmp_pd(a.val,b.val,_CMP_GT_OS);return c;}
00073
00074
00075 friend inline SSEDouble operator ==(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm256_cmp_pd(a.val,b.val,_CMP_EQ_OQ);return c;}
00076
00077
00078 friend inline SSEDouble operator <(const SSEDouble &a, double b) {SSEDouble c;c.val= _mm256_cmp_pd(a.val,_mm256_set1_pd(b),_CMP_LT_OS);return c;}
00079
00080
00081 friend inline SSEDouble operator >(const SSEDouble &a, double b) {SSEDouble c;c.val= _mm256_cmp_pd(a.val,_mm256_set1_pd(b),_CMP_GT_OS);return c;}
00082
00083 friend inline SSEDouble max (const SSEDouble &a, SSEDouble &b) { SSEDouble c; c.val= _mm256_max_pd(a.val,b.val);return c;}
00084
00085
00086
00087
00088 friend inline int movemask( const SSEDouble &a) {return _mm256_movemask_pd(a.val);}
00089
00090
00091
00092
00093 friend inline void storeu(double *p, const SSEDouble &a) { _mm256_storeu_pd(p,a.val);}
00094
00095
00096
00097
00098
00099
00100
00101 };
00102
00103
00104 #else
00105
00106
00107 #include<emmintrin.h>
00108
00109 #include<iostream>
00110
00111
00112
00113 class SSEDouble
00114 {
00115
00116 public: __m128d val;
00117
00118
00119 public:
00120
00121 SSEDouble() {}
00122
00123 SSEDouble(double d) { val= _mm_set1_pd(d);}
00124
00125 SSEDouble(double d0, double d1) {val = _mm_setr_pd(d0,d1);}
00126
00127
00128 friend inline SSEDouble operator -(const SSEDouble &a) {SSEDouble c;c.val=_mm_sub_pd(_mm_setzero_pd(),a.val);return c;}
00129
00130 friend inline SSEDouble operator +(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_add_pd(a.val,b.val);return c;}
00131
00132 friend inline SSEDouble operator -(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_sub_pd(a.val,b.val);return c;}
00133
00134 friend inline SSEDouble operator *(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_mul_pd(a.val,b.val);return c;}
00135
00136 friend inline SSEDouble operator /(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_div_pd(a.val,b.val);return c;}
00137
00138 friend inline SSEDouble sqrt (const SSEDouble &a) { SSEDouble c;c.val= _mm_sqrt_pd(a.val);return c;}
00139
00140
00141 friend inline SSEDouble operator +(double a, const SSEDouble &b) {SSEDouble c;c.val= _mm_add_pd(_mm_set1_pd(a),b.val);return c;}
00142
00143
00144 friend inline SSEDouble operator -(double a, const SSEDouble &b) {SSEDouble c;c.val= _mm_sub_pd(_mm_set1_pd(a),b.val);return c;}
00145
00146 friend inline SSEDouble operator *(double a, const SSEDouble &b) {SSEDouble c;c.val= _mm_mul_pd(_mm_set1_pd(a),b.val);return c;}
00147
00148 friend inline SSEDouble operator /(double a, const SSEDouble &b) {SSEDouble c;c.val= _mm_div_pd(_mm_set1_pd(a),b.val);return c;}
00149
00150 inline SSEDouble& operator +=(const SSEDouble &a) {val= _mm_add_pd(val,a.val);return *this;}
00151
00152 inline SSEDouble& operator -=(const SSEDouble &a) {val= _mm_sub_pd(val,a.val);return *this;}
00153
00154 inline SSEDouble& operator *=(const SSEDouble &a) {val= _mm_mul_pd(val,a.val);return *this;}
00155
00156 inline SSEDouble& operator /=(const SSEDouble &a) {val= _mm_div_pd(val,a.val);return *this;}
00157
00158
00159
00160 friend inline SSEDouble operator &(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_and_pd(a.val,b.val);return c;}
00161
00162 friend inline SSEDouble operator |(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_or_pd(a.val,b.val);return c;}
00163
00164 friend inline SSEDouble operator ^(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_xor_pd(a.val,b.val);return c;}
00165
00166 friend inline SSEDouble andnot (const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_andnot_pd(a.val,b.val);return c;}
00167
00168
00169
00170
00171 friend inline SSEDouble operator <(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_cmplt_pd(a.val,b.val);return c;}
00172
00173 friend inline SSEDouble operator >(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_cmpgt_pd(a.val,b.val);return c;}
00174
00175 friend inline SSEDouble operator ==(const SSEDouble &a, const SSEDouble &b) {SSEDouble c;c.val= _mm_cmpeq_pd(a.val,b.val);return c;}
00176
00177 friend inline SSEDouble operator <(const SSEDouble &a, double b) {SSEDouble c;c.val= _mm_cmplt_pd(a.val,_mm_set1_pd(b));return c;}
00178
00179 friend inline SSEDouble operator >(const SSEDouble &a, double b) {SSEDouble c;c.val= _mm_cmpgt_pd(a.val,_mm_set1_pd(b));return c;}
00180
00181 friend inline SSEDouble max (const SSEDouble &a, SSEDouble &b) { SSEDouble c; c.val= _mm_max_pd(a.val,b.val);return c;}
00182
00183
00184
00185
00186 friend inline int movemask( const SSEDouble &a) {return _mm_movemask_pd(a.val);}
00187
00188
00189
00190
00191 friend inline void storel(double *p, const SSEDouble &a) { _mm_storel_pd(p,a.val);}
00192
00193 friend inline void storeh(double *p, const SSEDouble &a) { _mm_storeh_pd(p,a.val);}
00194
00195
00196
00197
00198
00199
00200
00201 };
00202
00203
00204
00205
00206
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225
00226
00227
00228
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238
00239
00240
00241
00242
00243
00244
00245 #endif
00246
00247 #endif //__SSE_DOUBLE_H__