00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018 #include <stdio.h>
00019 #include <stdlib.h>
00020 #include <string.h>
00021 #include <math.h>
00022 struct timeval tv;
00023 #include <sys/time.h>
00024 #include <assert.h>
00025
00026
00027
00028 #if USE_SSE
00029 #include <smmintrin.h>
00030 #endif
00031
00032 double get_clock()
00033 {
00034 struct timeval tv; int ok;
00035 ok = gettimeofday(&tv, NULL);
00036 if (ok<0) { CmiPrintf("gettimeofday error"); }
00037 return (tv.tv_sec * 1.0 + tv.tv_usec * 1.0E-6);
00038 }
00039
00040 #define COMPRESS 1
00041
00042 #define CHAR_BIT 8
00043 #define FLOAT_BIT CHAR_BIT*sizeof(float)
00044 #define FLOAT_BYTE sizeof(float)
00045
00046 #define COMPRESS_EXP 1
00047
00048 #if COMPRESS_EXP
00049 #define SETBIT(dest, i) (dest[i>>3]) |= (1 << (i&7) )
00050 #define TESTBIT(dest, i) ((dest[i>>3]) >> (i&7)) & 1
00051 #define SETBIT11(dest, i) (dest[(i)>>3]) |= (3 << ((i)&7) )
00052 #define TESTBIT11(dest, i) ((dest[(i)>>3]) >> ((i)&7)) & 0x3l
00053
00054 #else
00055
00056 #define TESTBIT(data, b) (data>>(b)) & 1
00057 #define SETBIT(data, index, bit) (data |= ((bit)<<(index)))
00058 #endif
00059
00061 void compressChar(void *src, void *dst, int size, int *compressSize, void *bData)
00062 {
00063 char *source = (char*)src;
00064 char *dest = (char*)dst;
00065 char *baseData = (char*)bData;
00066 int i;
00067 #if DEBUG
00068 double t1 = get_clock();
00069 #endif
00070
00071 #if !COMPRESS
00072 memcpy(dest, source, size*sizeof(char));
00073 *compressSize = size;
00074 #else
00075 int _dataIndex = (size+7)/8;
00076 memset(dest, 0, (size+7)/8 );
00077 for (i = 0; i < size&&_dataIndex<size; ++i) {
00078
00079 char xor_d = source[i] ^ baseData[i];
00080 short different= xor_d & 0xff;
00081 if (different) {
00082
00083 dest[_dataIndex] = source[i];
00084 _dataIndex += 1;
00085 }else
00086 {
00087 SETBIT(dest, i);
00088 }
00089
00090 }
00091 *compressSize = _dataIndex;
00092 #endif
00093 #if DEBUG
00094 double t = get_clock()-t1;
00095 printf(" +++++CHAR done compressing(%d===>%d) (reduction:%d) ration=%f time=%d us\n", (int)(size*sizeof(char)), *compressSize, (int)(size*sizeof(char)-*compressSize), (1-(float)*compressSize/(size*sizeof(char)))*100, (int)(t*1000000));
00096 #endif
00097 }
00098
00099 void decompressChar(void *cData, void *dData, int size, int compressSize, void *bData) {
00100 #if DEBUG
00101 double t1 = get_clock();
00102 #endif
00103 #if !COMPRESS
00104 memcpy(dData, cData, size*sizeof(char));
00105 #else
00106 char *compressData = (char*)cData;
00107 char *baseData = (char*)bData;
00108 char *decompressData =(char*)dData;
00109 int sdataIndex = (size+7)/8;
00110 char *src = (char*)compressData;
00111 int i;
00112 for(i=0; i<size; ++i)
00113 {
00114 if(TESTBIT(src, i))
00115 {
00116 decompressData[i] = baseData[i];
00117 }else
00118 {
00119 decompressData[i] = compressData[sdataIndex];
00120 sdataIndex += 1;
00121 }
00122 }
00123 #endif
00124 #if DEBUG
00125 double t = get_clock()-t1;
00126 printf("------CHAR done decompressing..... orig size:%d time:%d us \n", (int)size, (int)(t*1000000)) ;
00127 #endif
00128
00129 }
00130
00131 #if COMPRESS_EXP
00132
00133 #if USE_SSE
00134 void compressFloatingPoint(void *src, void *dst, int s, int *compressSize, void *bData)
00135 {
00136 int size = s/FLOAT_BYTE;
00137 float *source = (float*)src;
00138 float *dest = (float*)dst;
00139 float *baseData = (float*)bData;
00140 unsigned int *bptr = (unsigned int*) baseData;
00141 unsigned int *uptr = (unsigned int *) source;
00142 char *uchar;
00143 int i, j;
00144 #if DEBUG
00145 double t1 = get_clock();
00146 #endif
00147
00148 #if !COMPRESS
00149 memcpy(dest, source, size*sizeof(float));
00150 *compressSize = s;
00151 #else
00152 assert(baseData != NULL);
00153 {
00154
00155 unsigned char *cdst = (unsigned char*)dest;
00156 int _dataIndex = (size+7)/8;
00157 unsigned int diff;
00158 memset(cdst, 0, (size+7)/8 );
00159
00160 const __m128i* b_ptr = (__m128i*)bptr;
00161 const __m128i* u_ptr = (__m128i*)uptr;
00162
00163 __m128i xmm_f = _mm_set1_epi32(0xFF000000);
00164 for (i = 0; i < size; i+=4) {
00165
00166 __m128i xmm_b = _mm_load_si128(b_ptr);
00167 __m128i xmm_u = _mm_load_si128(u_ptr);
00168 __m128i xmm_d = _mm_xor_si128(xmm_b, xmm_u);
00169 xmm_d = _mm_and_si128(xmm_d, xmm_f);
00170
00171 if (_mm_extract_epi32(xmm_d, 0)) {
00172 SETBIT(cdst, i);
00173 memcpy(cdst+_dataIndex, &(uptr[i]), 4);
00174 _dataIndex += 4;
00175 }
00176 else{
00177 memcpy(cdst+_dataIndex, &(uptr[i]), 3);
00178 _dataIndex += 3;
00179 }
00180 if (_mm_extract_epi32(xmm_d, 1)) {
00181 SETBIT(cdst, i+1);
00182 memcpy(cdst+_dataIndex, &(uptr[i+1]), 4);
00183 _dataIndex += 4;
00184 }else{
00185 memcpy(cdst+_dataIndex, &(uptr[i+1]), 3);
00186 _dataIndex += 3;
00187 }
00188 if (_mm_extract_epi32(xmm_d, 2)) {
00189 SETBIT(cdst, i+2);
00190 memcpy(cdst+_dataIndex, &(uptr[i+2]), 4);
00191 _dataIndex += 4;
00192 }else{
00193 memcpy(cdst+_dataIndex, &(uptr[i+2]), 3);
00194 _dataIndex += 3;
00195 }
00196 if (_mm_extract_epi32(xmm_d, 3)) {
00197 SETBIT(cdst, i+3);
00198 memcpy(cdst+_dataIndex, &(uptr[i+3]), 4);
00199 _dataIndex += 4;
00200 }else{
00201 memcpy(cdst+_dataIndex, &(uptr[i+3]), 3);
00202 _dataIndex += 3;
00203 }
00204 ++b_ptr;
00205 ++u_ptr;
00206 }
00207 *compressSize = _dataIndex;
00208 }
00209 #endif
00210 #if DEBUG
00211 double t = get_clock()-t1;
00212 printf(" ===>floating compare done compressingcompressed size:(%d===>%d) (reduction:%d) ration=%f time=%d us \n", (int)(size*sizeof(float)), *compressSize, (int)(size*sizeof(float)-*compressSize), (1-(float)*compressSize/(size*sizeof(float)))*100, (int)(t*1000000));
00213 #endif
00214 }
00215
00216 #else
00217
00218 void compressFloatingPoint(void *src, void *dst, int s, int *compressSize, void *bData)
00219 {
00220 int size = s/FLOAT_BYTE;
00221 float *source = (float*)src;
00222 float *dest = (float*)dst;
00223 float *baseData = (float*)bData;
00224 unsigned int *bptr = (unsigned int*) baseData;
00225 unsigned int *uptr = (unsigned int *) source;
00226 char *uchar;
00227 int i;
00228 #if DEBUG
00229 double t1 = get_clock();
00230 #endif
00231
00232 #if !COMPRESS
00233 memcpy(dest, source, size*sizeof(float));
00234 *compressSize = s;
00235 #else
00236 assert(baseData != NULL);
00237
00238 {
00239
00240 unsigned char *cdst = (unsigned char*)dest;
00241 int _dataIndex = (size+7)/8;
00242 unsigned int diff;
00243 memset(cdst, 0, (size+7)/8 );
00244 for (i = 0; i < size; ++i) {
00245
00246 diff = (bptr[i] ^ uptr[i]) & 0xff000000 ;
00247 if (diff) {
00248
00249 SETBIT(cdst, i);
00250 memcpy(cdst+_dataIndex, &(uptr[i]), 4);
00251 _dataIndex += 4;
00252 }else
00253 {
00254 memcpy(cdst+_dataIndex, &(uptr[i]), 3);
00255 _dataIndex += 3;
00256 }
00257
00258 }
00259 *compressSize = _dataIndex;
00260 }
00261 #endif
00262 #if DEBUG
00263 double t = get_clock()-t1;
00264 CmiPrintf(" ===> FLOATING done compressingcompressed size:(%d===>%d) (reduction:%d) ration=%f time=%d us\n", (int)(size*sizeof(float)), *compressSize, (int)(size*sizeof(float)-*compressSize), (1-(float)*compressSize/(size*sizeof(float)))*100, (int)(t*1000000));
00265 #endif
00266 }
00267
00268 #endif
00269
00270 void decompressFloatingPoint(void *cData, void *dData, int s, int compressSize, void *bData) {
00271 int size = s/FLOAT_BYTE;
00272 #if DEBUG
00273 double t1 = get_clock();
00274 #endif
00275 #if !COMPRESS
00276 memcpy(dData, cData, size*sizeof(float));
00277 #else
00278 float *compressData = (float*)cData;
00279 float *baseData = (float*)bData;
00280 unsigned int *decompressData =(unsigned int*)dData;
00281 int _sdataIndex = (size+7)/8;
00282 char *src = (char*)compressData;
00283 int exponent;
00284 unsigned int mantissa;
00285 unsigned int *bptr = (unsigned int*)baseData;
00286 int i;
00287 for(i=0; i<size; ++i)
00288 {
00289 if(TESTBIT(src, i))
00290 {
00291
00292 decompressData[i] = *((unsigned int*)(src+_sdataIndex));
00293 _sdataIndex += 4;
00294 }else
00295 {
00296 exponent = bptr[i] & 0xff000000;
00297 mantissa = *((unsigned int*)(src+_sdataIndex)) & 0x00FFFFFF;
00298 mantissa |= exponent;
00299 decompressData[i] = mantissa;
00300 _sdataIndex += 3;
00301 }
00302 }
00303 #endif
00304 #if DEBUG
00305 double t = get_clock()-t1;
00306
00307 #endif
00308
00309 }
00310
00311 #else
00312 void compressFloatingPoint(void *src, void *dst, int s, int *compressSize, void *bData)
00313 {
00314 unsigned int *dest = (unsigned int*)dst;
00315 unsigned int *bptr = (unsigned int*) bData;
00316 unsigned int *uptr = (unsigned int *) src;
00317 int size = s/sizeof(float);
00318 #if DEBUG
00319 double t1 = get_clock();
00320 #endif
00321
00322 #if !COMPRESS
00323 memcpy(dest, src, size*sizeof(float));
00324 *compressSize = s;
00325 #else
00326 unsigned int comp_data = 0;
00327 int f_index = 0;
00328 int i;
00329 int j;
00330 int b;
00331 int zers;
00332 unsigned int xor_data;
00333 memset(dest, 0, s);
00334 for (i = 0; i < size; ++i) {
00335 xor_data = (uptr[i])^(bptr[i]);
00336 zers = 0;
00337 b=FLOAT_BIT-1;
00338 while(!TESTBIT(xor_data, b) && zers<15){
00339 zers++;
00340 b--;
00341 }
00342
00343 for(j=0; j<4; j++)
00344 {
00345 SETBIT(dest[(int)(f_index>>5)], (f_index&0x1f), TESTBIT(zers, j));
00346 f_index++;
00347 }
00348 while(b>=0)
00349 {
00350 SETBIT(dest[(f_index>>5)], f_index&0x1f, TESTBIT(xor_data, b));
00351 f_index++;
00352 b--;
00353 }
00354 }
00355 *compressSize = f_index/8;
00356 float compressRatio = (1-(float)(*compressSize)/s)*100;
00357
00358 #if DEBUG
00359 double t = get_clock()-t1;
00360 CmiPrintf("===>[floating point lzc]done compressing compressed size:(%d===>%d) (reduction:%d) ration=%f Timer:%d us\n\n", (int)(size*sizeof(float)), *compressSize, (int)((size*sizeof(float)-*compressSize)), (1-(float)*compressSize/(size*sizeof(float)))*100, (int)(t*1000000));
00361 #endif
00362
00363 #endif
00364 }
00365
00366 void decompressFloatingPoint(void *cData, void *dData, int s, int compressSize, void *bData) {
00367 int size = s/sizeof(float);
00368 #if DEBUG
00369 double t1 = get_clock();
00370 if(CmiMyPe() == 5)
00371 CmiPrintf("[%d] starting decompressing \n", CmiMyPe());
00372 #endif
00373 #if !COMPRESS
00374 memcpy(dData, cData, size*sizeof(float));
00375 #else
00376 unsigned int *compressData = (unsigned int*)cData;
00377 unsigned int *decompressData = (unsigned int*)dData;
00378 unsigned int *baseData = (unsigned int*)bData;
00379 memset(decompressData, 0, s);
00380 int index;
00381 unsigned int xor_data;
00382 int data = 0;
00383 int d_index=0;
00384 int compp = 0;
00385 int i;
00386 int j;
00387 int f;
00388 for (i=0; i<size; i++) {
00389 index = FLOAT_BIT-1;
00390 data = 0;
00391
00392 for (f=0; f<4; f++,compp++) {
00393 if(TESTBIT(compressData[(int)(compp>>5)], (compp&0x1f))){
00394 for (j=0; j < (1<<f); j++) {
00395 SETBIT(data, index, 0);
00396 index--;
00397 }
00398 }
00399 }
00400 while(index>=0){
00401 SETBIT(data, index, TESTBIT(compressData[(int)(compp>>5)], (compp&0x1f)));
00402 index--; compp++;
00403 }
00404 xor_data = data^(baseData[i]);
00405 decompressData[i] = xor_data;
00406 }
00407
00408 #if DEBUG
00409 double t = get_clock()-t1;
00410 if(CmiMyPe() == 5)
00411 CmiPrintf("[%d] done decompressing..... orig size:%d time:%d us \n", CmiMyPe(), size, (int)(t*1000000));
00412 #endif
00413
00414 #endif
00415 }
00416
00417 #endif
00418
00419
00420
00421
00422
00423
00424
00425 #define DOUBLE_BYTE sizeof(double)
00426 #define BITS_DOUBLE sizeof(double)*8
00427
00428 #if COMPRESS_EXP
00429
00430 void compressDouble(void *src, void *dst, int s, int *compressSize, void *bData)
00431 {
00432 int size = s/DOUBLE_BYTE;
00433 double *source = (double*)src;
00434 double *dest = (double*)dst;
00435 double *baseData = (double*)bData;
00436 unsigned long *bptr = (unsigned long*) baseData;
00437 unsigned long *uptr = (unsigned long*) source;
00438 char *uchar;
00439 int i;
00440 #if DEBUG
00441 double t1 = get_clock();
00442 #endif
00443
00444 #if !COMPRESS
00445 memcpy(dest, source, s);
00446 *compressSize = s;
00447 #else
00448 assert(baseData != NULL);
00449
00450 {
00451 *compressSize = s;
00452
00453 unsigned char *cdst = (unsigned char*)dest;
00454 int _dataIndex = (2*size+7)/8;
00455 memset(cdst, 0, (2*size+7)/8 );
00456 for (i = 0; i < size; ++i) {
00457
00458 unsigned long xord = bptr[i] ^ uptr[i];
00459 unsigned long eight = xord & 0xff00000000000000;
00460 unsigned long sixteen = xord & 0xffff000000000000;
00461 if(sixteen == 0l)
00462 {
00463 unsigned long ui = uptr[i];
00464 memcpy(cdst+_dataIndex, &ui, 6);
00465 _dataIndex += 6;
00466 }
00467 else if(eight == 0l)
00468 {
00469 SETBIT(cdst, i<<1);
00470 unsigned long ui = uptr[i];
00471 memcpy(cdst+_dataIndex, &ui, 7);
00472 _dataIndex += 7;
00473 }else
00474 {
00475 SETBIT11(cdst, i<<1);
00476 unsigned long ui = uptr[i];
00477 memcpy(cdst+_dataIndex, &ui, 8);
00478 _dataIndex += 8;
00479 }
00480 }
00481 *compressSize = _dataIndex;
00482 }
00483 #endif
00484 #if DEBUG
00485 double t = get_clock()-t1;
00486 printf(" ===>[double lzc] done compressingcompressed size:(%d===>%d) (reduction:%d) ration=%f time=%d us\n", (int)(size*sizeof(double)), *compressSize, (int)(size*sizeof(double)-*compressSize), (1-(double)*compressSize/(size*sizeof(double)))*100, (int)(t*1000000));
00487 #endif
00488 }
00489
00490 void decompressDouble(void *cData, void *dData, int s, int compressSize, void *bData) {
00491 int size = s/DOUBLE_BYTE;
00492 #if DEBUG
00493 double t1 = get_clock();
00494 #endif
00495 #if !COMPRESS
00496 memcpy(dData, cData, s);
00497 #else
00498 double *compressData = (double*)cData;
00499 double *baseData = (double*)bData;
00500 unsigned long *decompressData =(unsigned long*)dData;
00501 int _sdataIndex = (2*size+7)/8;
00502 char *src = (char*)compressData;
00503 unsigned long exponent;
00504 unsigned long mantissa;
00505 unsigned long *bptr = (unsigned long*)baseData;
00506 int i;
00507 for(i=0; i<size; ++i)
00508 {
00509 int bitss = TESTBIT(src, i<<1);
00510 if(bitss==3)
00511 {
00512
00513 decompressData[i] = *((unsigned long*)(src+_sdataIndex));
00514 _sdataIndex += 8;
00515 }else if(bitss==1)
00516 {
00517 exponent = bptr[i] & 0xff00000000000000;
00518 mantissa = *((unsigned long*)(src+_sdataIndex)) & 0x00ffffffffffffff;
00519 mantissa |= exponent;
00520 decompressData[i] = mantissa;
00521 _sdataIndex += 7;
00522 }else
00523 {
00524 exponent = bptr[i] & 0xffff000000000000;
00525 mantissa = *((unsigned long*)(src+_sdataIndex)) & 0x0000ffffffffffff;
00526 mantissa |= exponent;
00527 decompressData[i] = mantissa;
00528 _sdataIndex += 6;
00529 }
00530 }
00531 #endif
00532 #if DEBUG
00533 double t = get_clock()-t1;
00534 printf("done decompressing..... orig size:%d\n time:%d us", (int)size, (int)(t*1000000)) ;
00535 #endif
00536
00537 }
00538
00539
00540 #else
00541
00542 void compressDouble(void *src, void *dst, int s, int *compressSize, void *bData)
00543 {
00544 unsigned long *dest = (unsigned long*)dst;
00545 unsigned long *bptr = (unsigned long*) bData;
00546 unsigned long *uptr = (unsigned long*) src;
00547 int size = s/sizeof(double);
00548 #if DEBUG
00549 double t1 = get_clock();
00550 #endif
00551
00552 #if !COMPRESS
00553 memcpy(dest, src, size*sizeof(double));
00554 *compressSize = s;
00555 #else
00556 int f_index = 0;
00557 int i;
00558 int j;
00559 int b;
00560 int zers;
00561 unsigned long xor_data;
00562 memset(dest, 0, s);
00563 for (i = 0; i < size; ++i) {
00564 xor_data = (uptr[i])^(bptr[i]);
00565 zers = 0;
00566
00567
00568
00569 b=BITS_DOUBLE-1;
00570 while(!TESTBIT(xor_data, b) && zers<15){
00571 zers++;
00572 b--;
00573 }
00574
00575
00576 for(j=0; j<4; j++)
00577 {
00578 SETBIT(dest[(int)(f_index>>6)], (f_index&0x3f), ((unsigned long)(TESTBIT(zers, j))));
00579 f_index++;
00580 }
00581 while(b>=0)
00582 {
00583 SETBIT(dest[(f_index>>6)], f_index&0x3f, TESTBIT(xor_data, b));
00584 f_index++;
00585 b--;
00586 }
00587 }
00588
00589
00590
00591
00592 *compressSize = f_index/8;
00593 double compressRatio = (1-(double)(*compressSize)/s)*100;
00594
00595 #if DEBUG
00596 double t = get_clock()-t1;
00597 printf("===>double lzc done compressing compressed size:(%d===>%d) (reduction:%d) ration=%f Timer:%d us\n\n", (int)(size*sizeof(double)), *compressSize, (int)((size*sizeof(double)-*compressSize)), (1-(double)*compressSize/(size*sizeof(double)))*100, (int)(t*1000000));
00598 #endif
00599
00600 #endif
00601 }
00602
00603 void decompressDouble(void *cData, void *dData, int s, int compressSize, void *bData) {
00604 int size = s/sizeof(double);
00605 #if DEBUG
00606 double t1 = get_clock();
00607 #endif
00608 #if !COMPRESS
00609 memcpy(dData, cData, size*sizeof(double));
00610 #else
00611 unsigned long *compressData = (unsigned long*)cData;
00612 unsigned long *decompressData = (unsigned long*)dData;
00613 unsigned long *baseData = (unsigned long*)bData;
00614
00615
00616
00617
00618 memset(decompressData, 0, s);
00619 int index;
00620 unsigned long xor_data;
00621 unsigned long data = 0;
00622 int d_index=0;
00623 int compp = 0;
00624 int i;
00625 int j;
00626 int f;
00627 for (i=0; i<size; i++) {
00628 index = BITS_DOUBLE-1;
00629 data = 0; int zers=0;
00630
00631 for (f=0; f<4; f++,compp++) {
00632 if(TESTBIT(compressData[(int)(compp>>6)], (compp&0x3f))){
00633 for (j=0; j < (1<<f); j++) {
00634 index--; zers++;
00635 }
00636 }
00637 }
00638
00639
00640 while(index>=0){
00641 SETBIT(data, index, TESTBIT(compressData[(int)(compp>>6)], (compp&0x3f)));
00642 index--; compp++;
00643 }
00644 xor_data = data^(baseData[i]);
00645 decompressData[i] = xor_data;
00646 }
00647
00648 #if DEBUG
00649 double t = get_clock()-t1;
00650 printf("done decompressing..... orig size:%d time:%d us \n", size, (int)(t*1000000));
00651 #endif
00652
00653 #endif
00654 }
00655
00656 #endif
00657
00658