00001
00012
00013 #include <GKlib.h>
00014
00015
00023
00024 char gk_threetoone(char *res) {
00025
00026 res[0] = toupper(res[0]);
00027 res[1] = toupper(res[1]);
00028 res[2] = toupper(res[2]);
00029 if(strcmp(res,"ALA") == 0) {
00030 return 'A';
00031 }
00032 else if(strcmp(res,"CYS") == 0) {
00033 return 'C';
00034 }
00035 else if(strcmp(res,"ASP") == 0) {
00036 return 'D';
00037 }
00038 else if(strcmp(res,"GLU") == 0) {
00039 return 'E';
00040 }
00041 else if(strcmp(res,"PHE") == 0) {
00042 return 'F';
00043 }
00044 else if(strcmp(res,"GLY") == 0) {
00045 return 'G';
00046 }
00047 else if(strcmp(res,"HIS") == 0) {
00048 return 'H';
00049 }
00050 else if(strcmp(res,"ILE") == 0) {
00051 return 'I';
00052 }
00053 else if(strcmp(res,"LYS") == 0) {
00054 return 'K';
00055 }
00056 else if(strcmp(res,"LEU") == 0) {
00057 return 'L';
00058 }
00059 else if(strcmp(res,"MET") == 0) {
00060 return 'M';
00061 }
00062 else if(strcmp(res,"ASN") == 0) {
00063 return 'N';
00064 }
00065 else if(strcmp(res,"PRO") == 0) {
00066 return 'P';
00067 }
00068 else if(strcmp(res,"GLN") == 0) {
00069 return 'Q';
00070 }
00071 else if(strcmp(res,"ARG") == 0) {
00072 return 'R';
00073 }
00074 else if(strcmp(res,"SER") == 0) {
00075 return 'S';
00076 }
00077 else if(strcmp(res,"THR") == 0) {
00078 return 'T';
00079 }
00080 else if(strcmp(res,"SCY") == 0) {
00081 return 'U';
00082 }
00083 else if(strcmp(res,"VAL") == 0) {
00084 return 'V';
00085 }
00086 else if(strcmp(res,"TRP") == 0) {
00087 return 'W';
00088 }
00089 else if(strcmp(res,"TYR") == 0) {
00090 return 'Y';
00091 }
00092 else {
00093 return 'X';
00094 }
00095 }
00096
00097
00104
00105 void gk_freepdbf(pdbf *p) {
00106 int i;
00107 if(p != NULL) {
00108 gk_free((void **)&p->resSeq, LTERM);
00109 for(i=0; i<p->natoms; i++) {
00110 gk_free((void **)&p->atoms[i].name, &p->atoms[i].resname, LTERM);
00111 }
00112 for(i=0; i<p->nresidues; i++) {
00113 gk_free((void *)&p->threeresSeq[i], LTERM);
00114 }
00115
00116
00117 gk_free((void **)&p->bbs, &p->cas, &p->atoms, &p->cm, &p->threeresSeq, LTERM);
00118 }
00119 gk_free((void **)&p, LTERM);
00120 }
00121
00122
00131
00132 pdbf *gk_readpdbfile(char *fname) {
00133 int i=0, res=0;
00134 char linetype[6];
00135 int aserial;
00136 char aname[5] = " \0";
00137 char altLoc = ' ';
00138 char rname[4] = " \0";
00139 char chainid = ' ';
00140 char oldchainid = ' ';
00141 int rserial;
00142 int oldRserial = -37;
00143 char icode = ' ';
00144 char element = ' ';
00145 double x;
00146 double y;
00147 double z;
00148 double avgx;
00149 double avgy;
00150 double avgz;
00151 double opcy;
00152 double tmpt;
00153 char line[MAXLINELEN];
00154 int corruption=0;
00155 int nresatoms;
00156
00157 int atoms=0, residues=0, cas=0, bbs=0, firstres=1;
00158 pdbf *toFill = gk_malloc(sizeof(pdbf),"fillme");
00159 FILE *FPIN;
00160
00161 FPIN = gk_fopen(fname,"r",fname);
00162 while(fgets(line, 256, FPIN)) {
00163 sscanf(line,"%s ",linetype);
00164
00165
00166 if(strstr(linetype, "ATOM") != NULL) {
00167 sscanf(line, "%6s%5d%*1c%4c%1c%3c%*1c%1c%4d%1c%*3c%8lf%8lf%8lf%6lf%6lf %c\n",
00168 linetype,&aserial,aname,&altLoc,rname,&chainid,&rserial,&icode,&x,&y,&z,&opcy,&tmpt,&element);
00169 sscanf(linetype, " %s ",linetype);
00170 sscanf(aname, " %s ",aname);
00171 sscanf(rname, " %s ",rname);
00172 if(altLoc != ' ') {
00173 corruption = corruption|CRP_ALTLOCS;
00174 }
00175
00176 if(firstres == 1) {
00177 oldRserial = rserial;
00178 oldchainid = chainid;
00179 residues++;
00180 firstres = 0;
00181 }
00182 if(oldRserial != rserial) {
00183 residues++;
00184 oldRserial = rserial;
00185 }
00186 if(oldchainid != chainid) {
00187 corruption = corruption|CRP_MULTICHAIN;
00188 }
00189 oldchainid = chainid;
00190 atoms++;
00191 if(strcmp(aname,"CA") == 0) {
00192 cas++;
00193 }
00194 if(strcmp(aname,"N") == 0 || strcmp(aname,"CA") == 0 ||
00195 strcmp(aname,"C") == 0 || strcmp(aname,"O") == 0) {
00196 bbs++;
00197 }
00198 }
00199 else if(strstr(linetype, "ENDMDL") != NULL || strstr(linetype, "END") != NULL || strstr(linetype, "TER") != NULL) {
00200 break;
00201 }
00202 }
00203 fclose(FPIN);
00204
00205
00206 toFill->natoms = atoms;
00207 toFill->ncas = cas;
00208 toFill->nbbs = bbs;
00209 toFill->nresidues = residues;
00210 toFill->resSeq = (char *) gk_malloc (residues*sizeof(char),"residue seq");
00211 toFill->threeresSeq = (char **)gk_malloc (residues*sizeof(char *),"residue seq");
00212 toFill->atoms = (atom *) gk_malloc (atoms*sizeof(atom), "atoms");
00213 toFill->bbs = (atom **)gk_malloc ( bbs*sizeof(atom *),"bbs");
00214 toFill->cas = (atom **)gk_malloc ( cas*sizeof(atom *),"cas");
00215 toFill->cm = (center_of_mass *)gk_malloc(residues*sizeof(center_of_mass),"center of mass");
00216 res=0; firstres=1; cas=0; bbs=0; i=0;
00217 avgx = 0.0; avgy = 0.0; avgz = 0.0;
00218 nresatoms = 0;
00219
00220 FPIN = gk_fopen(fname,"r",fname);
00221 while(fgets(line, 256, FPIN)) {
00222 sscanf(line,"%s ",linetype);
00223
00224
00225 if(strstr(linetype, "ATOM") != NULL ) {
00226
00227
00228 sscanf(line, "%6s%5d%*1c%4c%1c%3c%*1c%1c%4d%1c%*3c%8lf%8lf%8lf%6lf%6lf %c\n",
00229 linetype,&aserial,aname,&altLoc,rname,&chainid,&rserial,&icode,&x,&y,&z,&opcy,&tmpt,&element);
00230 sscanf(aname, "%s",aname);
00231 sscanf(rname, "%s",rname);
00232
00233 if(firstres == 1) {
00234 toFill->resSeq[res] = gk_threetoone(rname);
00235 toFill->threeresSeq[res] = gk_strdup(rname);
00236 oldRserial = rserial;
00237 res++;
00238 firstres = 0;
00239 }
00240 if(oldRserial != rserial) {
00241
00242 toFill->cm[res-1].x = avgx/nresatoms;
00243 toFill->cm[res-1].y = avgy/nresatoms;
00244 toFill->cm[res-1].z = avgz/nresatoms;
00245 avgx = 0.0; avgy = 0.0; avgz = 0.0;
00246 nresatoms = 0;
00247 toFill->cm[res-1].name = toFill->resSeq[res-1];
00248
00249 toFill->threeresSeq[res] = gk_strdup(rname);
00250 toFill->resSeq[res] = gk_threetoone(rname);
00251 res++;
00252 oldRserial = rserial;
00253 }
00254 avgx += x;
00255 avgy += y;
00256 avgz += z;
00257 nresatoms++;
00258
00259 toFill->atoms[i].x = x;
00260 toFill->atoms[i].y = y;
00261 toFill->atoms[i].z = z;
00262 toFill->atoms[i].opcy = opcy;
00263 toFill->atoms[i].tmpt = tmpt;
00264 toFill->atoms[i].element = element;
00265 toFill->atoms[i].serial = aserial;
00266 toFill->atoms[i].chainid = chainid;
00267 toFill->atoms[i].altLoc = altLoc;
00268 toFill->atoms[i].rserial = rserial;
00269 toFill->atoms[i].icode = icode;
00270 toFill->atoms[i].name = gk_strdup(aname);
00271 toFill->atoms[i].resname = gk_strdup(rname);
00272
00273 if(strcmp(aname,"CA") == 0) {
00274 toFill->cas[cas] = &(toFill->atoms[i]);
00275 cas++;
00276 }
00277 if(strcmp(aname,"N") == 0 || strcmp(aname,"CA") == 0 || strcmp(aname,"C") == 0 || strcmp(aname,"O") == 0) {
00278 toFill->bbs[bbs] = &(toFill->atoms[i]);
00279 bbs++;
00280 }
00281 i++;
00282 }
00283 else if(strstr(linetype, "ENDMDL") != NULL || strstr(linetype, "END") != NULL || strstr(linetype, "TER") != NULL) {
00284 break;
00285 }
00286 }
00287
00288 toFill->cm[res-1].x = avgx/nresatoms;
00289 toFill->cm[res-1].y = avgy/nresatoms;
00290 toFill->cm[res-1].z = avgz/nresatoms;
00291
00292 if(cas != residues) {
00293 printf("Number of residues and CA coordinates differs by %d (!)\n",residues-cas);
00294 if(cas < residues) {
00295 corruption = corruption|CRP_MISSINGCA;
00296 }
00297 else if(cas > residues) {
00298 corruption = corruption|CRP_MULTICA;
00299 }
00300 }
00301 if(bbs < residues*4) {
00302 corruption = corruption|CRP_MISSINGBB;
00303 }
00304 else if(bbs > residues*4) {
00305 corruption = corruption|CRP_MULTIBB;
00306 }
00307 fclose(FPIN);
00308 toFill->corruption = corruption;
00309
00310
00311 return(toFill);
00312 }
00313
00314
00325
00326 void gk_writefastafrompdb(pdbf *pb, char *fname) {
00327 int i;
00328 FILE *FPOUT;
00329
00330 FPOUT = gk_fopen(fname,"w",fname);
00331 fprintf(FPOUT,"> %s\n",fname);
00332
00333 for(i=0; i<pb->nresidues; i++)
00334 fprintf(FPOUT,"%c",pb->resSeq[i]);
00335
00336 fprintf(FPOUT,"\n");
00337 fclose(FPOUT);
00338 }
00339
00340
00349
00350 void gk_writecentersofmass(pdbf *p, char *fname) {
00351 int i;
00352 FILE *FPIN;
00353 FPIN = gk_fopen(fname,"w",fname);
00354 for(i=0; i<p->nresidues; i++) {
00355 fprintf(FPIN,"%-6s%5d %4s%1c%3s %1c%4d%1c %8.3lf%8.3lf%8.3lf%6.2f%6.2f\n",
00356 "ATOM ",i,"CA",' ',p->threeresSeq[i],' ',i,' ',p->cm[i].x,p->cm[i].y,p->cm[i].z,1.0,-37.0);
00357 }
00358 fclose(FPIN);
00359 }
00360
00361
00370
00371 void gk_writefullatom(pdbf *p, char *fname) {
00372 int i;
00373 FILE *FPIN;
00374 FPIN = gk_fopen(fname,"w",fname);
00375 for(i=0; i<p->natoms; i++) {
00376 fprintf(FPIN,"%-6s%5d %4s%1c%3s %1c%4d%1c %8.3lf%8.3lf%8.3lf%6.2f%6.2f\n",
00377 "ATOM ",p->atoms[i].serial,p->atoms[i].name,p->atoms[i].altLoc,p->atoms[i].resname,p->atoms[i].chainid,p->atoms[i].rserial,p->atoms[i].icode,p->atoms[i].x,p->atoms[i].y,p->atoms[i].z,p->atoms[i].opcy,p->atoms[i].tmpt);
00378 }
00379 fclose(FPIN);
00380 }
00381
00382
00391
00392 void gk_writebackbone(pdbf *p, char *fname) {
00393 int i;
00394 FILE *FPIN;
00395 FPIN = gk_fopen(fname,"w",fname);
00396 for(i=0; i<p->nbbs; i++) {
00397 fprintf(FPIN,"%-6s%5d %4s%1c%3s %1c%4d%1c %8.3lf%8.3lf%8.3lf%6.2f%6.2f\n",
00398 "ATOM ",p->bbs[i]->serial,p->bbs[i]->name,p->bbs[i]->altLoc,p->bbs[i]->resname,p->bbs[i]->chainid,p->bbs[i]->rserial,p->bbs[i]->icode,p->bbs[i]->x,p->bbs[i]->y,p->bbs[i]->z,p->bbs[i]->opcy,p->bbs[i]->tmpt);
00399 }
00400 fclose(FPIN);
00401 }
00402
00403
00412
00413 void gk_writealphacarbons(pdbf *p, char *fname) {
00414 int i;
00415 FILE *FPIN;
00416 FPIN = gk_fopen(fname,"w",fname);
00417 for(i=0; i<p->ncas; i++) {
00418 fprintf(FPIN,"%-6s%5d %4s%1c%3s %1c%4d%1c %8.3lf%8.3lf%8.3lf%6.2f%6.2f\n",
00419 "ATOM ",p->cas[i]->serial,p->cas[i]->name,p->cas[i]->altLoc,p->cas[i]->resname,p->cas[i]->chainid,p->cas[i]->rserial,p->cas[i]->icode,p->cas[i]->x,p->cas[i]->y,p->cas[i]->z,p->cas[i]->opcy,p->cas[i]->tmpt);
00420 }
00421 fclose(FPIN);
00422 }
00423
00424
00434
00435 void gk_showcorruption(pdbf *p) {
00436 int corruption = p->corruption;
00437 if(corruption&CRP_ALTLOCS)
00438 printf("Multiple coordinate sets for at least one atom\n");
00439 if(corruption&CRP_MISSINGCA)
00440 printf("Missing coordiantes for at least one CA atom\n");
00441 if(corruption&CRP_MISSINGBB)
00442 printf("Missing coordiantes for at least one backbone atom (N,CA,C,O)\n");
00443 if(corruption&CRP_MULTICHAIN)
00444 printf("File contains coordinates for multiple chains\n");
00445 if(corruption&CRP_MULTICA)
00446 printf("Multiple CA atoms found for the same residue (could be alternate locators)\n");
00447 if(corruption&CRP_MULTICA)
00448 printf("Multiple copies of backbone atoms found for the same residue (could be alternate locators)\n");
00449 }
00450
00451
00452
00453
00454
00455
00456
00457
00458
00459
00460