00001
00010 #include <GKlib.h>
00011
00012
00014
00015 typedef struct {
00016 ssize_t minlen, maxlen;
00017 ssize_t minfreq, maxfreq;
00018 char *filename;
00019 int silent;
00020 ssize_t nitemsets;
00021 char *clabelfile;
00022 char **clabels;
00023 } params_t;
00024
00025
00027
00028 #define CMD_MINLEN 1
00029 #define CMD_MAXLEN 2
00030 #define CMD_MINFREQ 3
00031 #define CMD_MAXFREQ 4
00032 #define CMD_SILENT 5
00033 #define CMD_CLABELFILE 6
00034 #define CMD_HELP 10
00035
00036
00037
00039
00040 static struct gk_option long_options[] = {
00041 {"minlen", 1, 0, CMD_MINLEN},
00042 {"maxlen", 1, 0, CMD_MAXLEN},
00043 {"minfreq", 1, 0, CMD_MINFREQ},
00044 {"maxfreq", 1, 0, CMD_MAXFREQ},
00045 {"silent", 0, 0, CMD_SILENT},
00046 {"clabels", 1, 0, CMD_CLABELFILE},
00047 {"help", 0, 0, CMD_HELP},
00048 {0, 0, 0, 0}
00049 };
00050
00051
00052
00053
00054
00055 static char helpstr[][100] = {
00056 " ",
00057 "Usage: fis [options] <mat-file>",
00058 " ",
00059 " Required parameters",
00060 " mat-file",
00061 " The name of the file storing the transactions. The file is in ",
00062 " Cluto's .mat format.",
00063 " ",
00064 " Optional parameters",
00065 " -minlen=int",
00066 " Specifies the minimum length of the patterns. [default: 1]",
00067 " ",
00068 " -maxlen=int",
00069 " Specifies the maximum length of the patterns. [default: none]",
00070 " ",
00071 " -minfreq=int",
00072 " Specifies the minimum frequency of the patterns. [default: 10]",
00073 " ",
00074 " -maxfreq=int",
00075 " Specifies the maximum frequency of the patterns. [default: none]",
00076 " ",
00077 " -silent",
00078 " Does not print the discovered itemsets.",
00079 " ",
00080 " -clabels=filename",
00081 " Specifies the name of the file that stores the column labels.",
00082 " ",
00083 " -help",
00084 " Prints this message.",
00085 ""
00086 };
00087
00088 static char shorthelpstr[][100] = {
00089 " ",
00090 " Usage: fis [options] <mat-file>",
00091 " use 'fis -help' for a summary of the options.",
00092 ""
00093 };
00094
00095
00096
00097
00099
00100 void print_init_info(params_t *params, gk_csr_t *mat);
00101 void print_final_info(params_t *params);
00102 params_t *parse_cmdline(int argc, char *argv[]);
00103 void print_an_itemset(void *stateptr, int nitems, int *itemind,
00104 int ntrans, int *tranind);
00105
00106
00107
00109
00110 int main(int argc, char *argv[])
00111 {
00112 ssize_t i;
00113 char line[8192];
00114 FILE *fpin;
00115 params_t *params;
00116 gk_csr_t *mat;
00117
00118 params = parse_cmdline(argc, argv);
00119 params->nitemsets = 0;
00120
00121
00122 mat = gk_csr_Read(params->filename, GK_CSR_FMT_CLUTO, 1, 1);
00123 gk_csr_CreateIndex(mat, GK_CSR_COL);
00124
00125
00126 params->clabels = (char **)gk_malloc(mat->ncols*sizeof(char *), "main: clabels");
00127 if (params->clabelfile == NULL) {
00128 for (i=0; i<mat->ncols; i++) {
00129 sprintf(line, "%zd", i);
00130 params->clabels[i] = gk_strdup(line);
00131 }
00132 }
00133 else {
00134 fpin = gk_fopen(params->clabelfile, "r", "main: fpin");
00135 for (i=0; i<mat->ncols; i++) {
00136 if (fgets(line, 8192, fpin) == NULL)
00137 errexit("Failed on fgets.\n");
00138 params->clabels[i] = gk_strdup(gk_strtprune(line, " \n\t"));
00139 }
00140 gk_fclose(fpin);
00141 }
00142
00143
00144 print_init_info(params, mat);
00145
00146 gk_find_frequent_itemsets(mat->nrows, mat->rowptr, mat->rowind,
00147 params->minfreq, params->maxfreq, params->minlen, params->maxlen,
00148 &print_an_itemset, (void *)params);
00149
00150 printf("Total itemsets found: %zd\n", params->nitemsets);
00151
00152 print_final_info(params);
00153 }
00154
00155
00156
00157
00159
00160 void print_init_info(params_t *params, gk_csr_t *mat)
00161 {
00162 printf("*******************************************************************************\n");
00163 printf(" fis\n\n");
00164 printf("Matrix Information ---------------------------------------------------------\n");
00165 printf(" input file=%s, [%d, %d, %zd]\n",
00166 params->filename, mat->nrows, mat->ncols, mat->rowptr[mat->nrows]);
00167
00168 printf("\n");
00169 printf("Options --------------------------------------------------------------------\n");
00170 printf(" minlen=%zd, maxlen=%zd, minfeq=%zd, maxfreq=%zd\n",
00171 params->minlen, params->maxlen, params->minfreq, params->maxfreq);
00172
00173 printf("\n");
00174 printf("Finding patterns... -----------------------------------------------------\n");
00175 }
00176
00177
00178
00180
00181 void print_final_info(params_t *params)
00182 {
00183 printf("\n");
00184 printf("Memory Usage Information -----------------------------------------------------\n");
00185 printf(" Maximum memory used: %10zd bytes\n", (ssize_t) gk_GetMaxMemoryUsed());
00186 printf(" Current memory used: %10zd bytes\n", (ssize_t) gk_GetCurMemoryUsed());
00187 printf("********************************************************************************\n");
00188 }
00189
00190
00191
00193
00194 params_t *parse_cmdline(int argc, char *argv[])
00195 {
00196 int i;
00197 int c, option_index;
00198 params_t *params;
00199
00200 params = (params_t *)gk_malloc(sizeof(params_t), "parse_cmdline: params");
00201
00202
00203 params->minlen = 1;
00204 params->maxlen = -1;
00205 params->minfreq = 10;
00206 params->maxfreq = -1;
00207 params->silent = 0;
00208 params->filename = NULL;
00209 params->clabelfile = NULL;
00210
00211
00212
00213 while ((c = gk_getopt_long_only(argc, argv, "", long_options, &option_index)) != -1) {
00214 switch (c) {
00215 case CMD_MINLEN:
00216 if (gk_optarg) params->minlen = atoi(gk_optarg);
00217 break;
00218 case CMD_MAXLEN:
00219 if (gk_optarg) params->maxlen = atoi(gk_optarg);
00220 break;
00221 case CMD_MINFREQ:
00222 if (gk_optarg) params->minfreq = atoi(gk_optarg);
00223 break;
00224 case CMD_MAXFREQ:
00225 if (gk_optarg) params->maxfreq = atoi(gk_optarg);
00226 break;
00227
00228 case CMD_SILENT:
00229 params->silent = 1;
00230 break;
00231
00232 case CMD_CLABELFILE:
00233 if (gk_optarg) params->clabelfile = gk_strdup(gk_optarg);
00234 break;
00235
00236 case CMD_HELP:
00237 for (i=0; strlen(helpstr[i]) > 0; i++)
00238 printf("%s\n", helpstr[i]);
00239 exit(0);
00240 break;
00241 case '?':
00242 default:
00243 printf("Illegal command-line option(s)\nUse %s -help for a summary of the options.\n", argv[0]);
00244 exit(0);
00245 }
00246 }
00247
00248 if (argc-gk_optind != 1) {
00249 printf("Unrecognized parameters.");
00250 for (i=0; strlen(shorthelpstr[i]) > 0; i++)
00251 printf("%s\n", shorthelpstr[i]);
00252 exit(0);
00253 }
00254
00255 params->filename = gk_strdup(argv[gk_optind++]);
00256
00257 if (!gk_fexists(params->filename))
00258 errexit("input file %s does not exist.\n", params->filename);
00259
00260 return params;
00261 }
00262
00263
00264
00265
00267
00268 void print_an_itemset(void *stateptr, int nitems, int *itemids, int ntrans,
00269 int *transids)
00270 {
00271 ssize_t i;
00272 params_t *params;
00273
00274 params = (params_t *)stateptr;
00275 params->nitemsets++;
00276
00277 if (!params->silent) {
00278 printf("%4zd %4d %4d => ", params->nitemsets, nitems, ntrans);
00279 for (i=0; i<nitems; i++)
00280 printf(" %s", params->clabels[itemids[i]]);
00281 printf("\n");
00282 for (i=0; i<ntrans; i++)
00283 printf(" %d\n", transids[i]);
00284 printf("\n");
00285 }
00286 }