00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 
00031 
00032 
00033 #include "charm.h"
00034 #include <cstdlib>
00035 #include <math.h>
00036 #include <stdio.h>
00037 #include <time.h>
00038 #include <vector>
00039 #define NUM_TREES 20
00040 #define LB_CLASSES 6
00041 #define DEPTH 9
00042 
00043 namespace rfmodel {
00044 struct DataMatrix {
00045   std::vector<double> data;
00046   int num_rows;
00047   int num_cols;
00048   DataMatrix() {}
00049   DataMatrix(int nrows, int ncols, bool ones = false) : num_rows(nrows), num_cols(ncols) {
00050     if (ones)
00051       data.resize(nrows * ncols, 1);
00052     else
00053       data.resize(nrows * ncols, 0);
00054   }
00055   DataMatrix(const std::vector<double>& d, int nrows, int ncols)
00056       : data(d), num_rows(nrows), num_cols(ncols) {
00057     CkAssert(d.size() == (size_t)nrows * (size_t)ncols);
00058   }
00059 
00060   inline double& data_at(int x, int y) { return data[x * num_cols + y]; }
00061 
00062   
00063   inline void repmat(int asize, int bsize, DataMatrix& R) const {
00064     for (int i = 0; i < num_rows * asize; i++)
00065       for (int j = 0; j < num_cols * bsize; j++)
00066         R.data[i * num_cols * bsize + j] =
00067             data[(i % num_rows) * num_cols + (j % num_cols)];
00068   }
00069 
00070   
00071   inline void findIndicesLT(double scalar, DataMatrix& R) const {
00072     int count = 0;
00073     for (int i = 0; i < num_rows * num_cols; i++) {
00074       if (data[i] < scalar) R.data[count++] = i;
00075     }
00076     R.data.resize(count);
00077     R.num_rows = count;
00078   }
00079 
00080   
00081   inline void subset_rows(int* select_rows, int s_rows_size, DataMatrix& R) const {
00082     for (int j = 0; j < s_rows_size; j++)
00083       for (int i = 0; i < num_cols; i++)
00084         R.data[j * num_cols + i] = data[select_rows[j] * num_cols + i];
00085   }
00086 
00087   
00088   inline void subset_cols(int* select_cols, int s_cols_size, DataMatrix& R) const {
00089     for (int j = 0; j < s_cols_size; j++)
00090       for (int i = 0; i < num_rows; i++)
00091         R.data[i * num_rows + j] = data[i * num_rows + select_cols[j]];
00092   }
00093 
00094   
00095   inline void findValue(double value, DataMatrix& R) const {
00096     for (int i = 0; i < num_rows; i++)
00097       for (int j = 0; j < num_cols; j++)
00098         if (fabs(data[i * num_rows + j] - value) < 0.0001)
00099           R.data[i * num_rows + j] = 1;
00100         else
00101           R.data[i * num_rows + j] = 0;
00102   }
00103 
00104   
00105   inline void findIndicesNE(double scalar, DataMatrix& R) const {
00106     int count = 0;
00107     for (int i = 0; i < num_rows * num_cols; i++) {
00108       if (fabs(data[i] - scalar) > 0.0001) {
00109         R.data[count++] = i;
00110       }
00111     }
00112     R.data.resize(count);
00113     R.num_rows = count;
00114   }
00115 
00116   
00117   inline void findIndicesE(double scalar, DataMatrix& R) const {
00118     int count = 0;
00119     for (int i = 0; i < num_rows * num_cols; i++) {
00120       if (fabs(data[i] - scalar) < 0.0001) {
00121         R.data[count++] = i;
00122       }
00123     }
00124     R.data.resize(count);
00125     R.num_rows = count;
00126     R.num_cols = 1;
00127   }
00128 
00129   
00130   inline int maxIndex() const {
00131     int mIndex = 0;
00132     double max = data[0];
00133     for (int i = 1; i < num_rows * num_cols; i++)
00134       if (max < data[i]) {
00135         max = data[i];
00136         mIndex = i;
00137       }
00138     return mIndex;
00139   }
00140 
00141   
00142   inline double randomValue() const {
00143     int randIndex = rand() % (num_rows * num_cols);
00144     return data[randIndex];
00145   }
00146 
00147   
00148   inline void matrix_multiply(const DataMatrix& X, const DataMatrix& Y) {
00149     int x_rows = X.num_rows;
00150     int x_y_cols_rows = X.num_cols;
00151     int y_cols = Y.num_cols;
00152 
00153     for (int i = 0; i < x_rows; i++)
00154       for (int j = 0; j < x_y_cols_rows; j++)
00155         for (int k = 0; k < y_cols; k++)
00156           data_at(i, k) += X.data[i * x_y_cols_rows + j] * Y.data[j * y_cols + k];
00157   }
00158 
00159   
00160   inline void combine(const DataMatrix& A, const DataMatrix& B) {
00161     int N = A.num_rows;
00162     int asize = A.num_cols;
00163     int bsize = B.num_cols;
00164     int cols = asize + bsize;
00165     for (int i = 0; i < N; i++)
00166       for (int j = 0; j < cols; j++)
00167         if (j < asize)
00168           data[i * cols + j] = A.data[i * asize + j];
00169         else
00170           data[i * cols + j] = B.data[i * bsize + (j - asize)];
00171   }
00172 };
00173 
00174 struct Model {
00175   int classifierID;
00176   int r1;
00177   int r2;
00178   std::vector<double> w;
00179   double weakTest(const DataMatrix& X) const;
00180 };
00181 
00182 struct TreeModel {
00183   int l_X, l_D;
00184   int* classes;
00185   std::vector<Model> weakModels;
00186   std::vector<double> leafdist;
00187   void treeTest(const DataMatrix& X, std::vector<double>& Ysoft) const;
00188 };
00189 
00190 struct ForestModel {
00191   int classes[LB_CLASSES];
00192   std::vector<TreeModel> treeModels;
00193   void readModel(const char* dir);
00194   int forestTest(std::vector<double>& X, int num_rows, int num_cols);
00195 };
00196 
00197 }