00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #include <GKlib.h>
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027 void gk_seq_init(gk_seq_t *seq)
00028 {
00029
00030 seq->len = 0;
00031 seq->sequence = NULL;
00032
00033 seq->pssm = NULL;
00034 seq->psfm = NULL;
00035
00036 seq->name = NULL;
00037
00038 }
00039
00040
00046
00047
00048 gk_i2cc2i_t *gk_i2cc2i_create_common(char *alphabet)
00049 {
00050
00051
00052 int nsymbols;
00053 gk_idx_t i;
00054 gk_i2cc2i_t *t;
00055
00056 nsymbols = strlen(alphabet);
00057 t = gk_malloc(sizeof(gk_i2cc2i_t),"gk_i2c_create_common");
00058 t->n = nsymbols;
00059 t->i2c = gk_cmalloc(256, "gk_i2c_create_common");
00060 t->c2i = gk_imalloc(256, "gk_i2c_create_common");
00061
00062
00063 gk_cset(256, -1, t->i2c);
00064 gk_iset(256, -1, t->c2i);
00065
00066 for(i=0;i<nsymbols;i++){
00067 t->i2c[i] = alphabet[i];
00068 t->c2i[(int)alphabet[i]] = i;
00069 }
00070
00071 return t;
00072
00073 }
00074
00075
00076
00082
00083 gk_seq_t *gk_seq_ReadGKMODPSSM(char *filename)
00084 {
00085 gk_seq_t *seq;
00086 gk_idx_t i, j, ii;
00087 size_t ntokens, nbytes, len;
00088 FILE *fpin;
00089
00090
00091 gk_Tokens_t tokens;
00092 static char *AAORDER = "ARNDCQEGHILKMFPSTWYVBZX*";
00093 static int PSSMWIDTH = 20;
00094 char *header, line[MAXLINELEN];
00095 gk_i2cc2i_t *converter;
00096
00097 header = gk_cmalloc(PSSMWIDTH, "gk_seq_ReadGKMODPSSM: header");
00098
00099 converter = gk_i2cc2i_create_common(AAORDER);
00100
00101 gk_getfilestats(filename, &len, &ntokens, NULL, &nbytes);
00102 len --;
00103
00104 seq = gk_malloc(sizeof(gk_seq_t),"gk_seq_ReadGKMODPSSM");
00105 gk_seq_init(seq);
00106
00107 seq->len = len;
00108 seq->sequence = gk_imalloc(len, "gk_seq_ReadGKMODPSSM");
00109 seq->pssm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
00110 seq->psfm = gk_iAllocMatrix(len, PSSMWIDTH, 0, "gk_seq_ReadGKMODPSSM");
00111
00112 seq->nsymbols = PSSMWIDTH;
00113 seq->name = gk_getbasename(filename);
00114
00115 fpin = gk_fopen(filename,"r","gk_seq_ReadGKMODPSSM");
00116
00117
00118
00119 if (fgets(line, MAXLINELEN-1, fpin) == NULL)
00120 errexit("Unexpected end of file: %s\n", filename);
00121 gk_strtoupper(line);
00122 gk_strtokenize(line, " \t\n", &tokens);
00123
00124 for (i=0; i<PSSMWIDTH; i++)
00125 header[i] = tokens.list[i][0];
00126
00127 gk_freetokenslist(&tokens);
00128
00129
00130
00131 for (i=0, ii=0; ii<len; ii++) {
00132 if (fgets(line, MAXLINELEN-1, fpin) == NULL)
00133 errexit("Unexpected end of file: %s\n", filename);
00134 gk_strtoupper(line);
00135 gk_strtokenize(line, " \t\n", &tokens);
00136
00137 seq->sequence[i] = converter->c2i[(int)tokens.list[1][0]];
00138
00139 for (j=0; j<PSSMWIDTH; j++) {
00140 seq->pssm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+j]);
00141 seq->psfm[i][converter->c2i[(int)header[j]]] = atoi(tokens.list[2+PSSMWIDTH+j]);
00142 }
00143
00144
00145
00146 gk_freetokenslist(&tokens);
00147 i++;
00148 }
00149
00150 seq->len = i;
00151
00152 gk_free((void **)&header, LTERM);
00153 gk_fclose(fpin);
00154
00155 return seq;
00156 }
00157
00158
00159
00165
00166 void gk_seq_free(gk_seq_t *seq)
00167 {
00168 gk_iFreeMatrix(&seq->pssm, seq->len, seq->nsymbols);
00169 gk_iFreeMatrix(&seq->psfm, seq->len, seq->nsymbols);
00170 gk_free((void **)&seq->name, &seq->sequence, LTERM);
00171
00172 gk_free((void **) &seq, LTERM);
00173
00174 }