diff options
author | Thomas White <taw@physics.org> | 2010-03-25 11:53:48 +0100 |
---|---|---|
committer | Thomas White <taw@physics.org> | 2010-03-25 11:53:48 +0100 |
commit | b372c5d77caa1bc9bd4c8f69de76d064f8d93ccd (patch) | |
tree | d1571f069f33730bef0d76964f9e8ce95d4bdb95 /src | |
parent | f93bd1a5a69ad5061a1289bf77c701c13275842f (diff) |
Improved PDB parser
Diffstat (limited to 'src')
-rw-r--r-- | src/sfac.c | 66 | ||||
-rw-r--r-- | src/sfac.h | 13 | ||||
-rw-r--r-- | src/utils.c | 88 | ||||
-rw-r--r-- | src/utils.h | 8 |
4 files changed, 153 insertions, 22 deletions
@@ -318,6 +318,8 @@ struct molecule *load_molecule() char line[1024]; char *rval; int i; + int nbits; + char **bits; mol = malloc(sizeof(struct molecule)); if ( mol == NULL ) return NULL; @@ -333,12 +335,11 @@ struct molecule *load_molecule() do { - char el[4]; + char *el; int j, r; int done = 0; float xf, yf, zf, occf, Bf; double x, y, z, occ, B; - char *coords; rval = fgets(line, 1023, fh); @@ -360,23 +361,49 @@ struct molecule *load_molecule() /* Only interested in atoms */ if ( strncmp(line, "HETATM", 6) != 0 ) continue; - /* The following crimes against programming style - * were brought to you by Wizbit Enterprises, Inc. */ - if ( line[76] == ' ' ) { - el[0] = line[77]; - el[1] = '\0'; - } else { - el[0] = line[76]; - el[1] = line[77]; - el[2] = '\0'; - } + chomp(line); + nbits = assplode(line, " ", &bits, ASSPLODE_NONE); + if ( nbits == 0 ) continue; + + if ( nbits == 11 ) { + + /* Normal case- HETATM<space+>number<space+>stuff */ + r = sscanf(bits[5], "%f", &xf); + r += sscanf(bits[6], "%f", &yf); + r += sscanf(bits[7], "%f", &zf); + r += sscanf(bits[8], "%f", &occf); + r += sscanf(bits[9], "%f", &Bf); + if ( r != 5 ) { + STATUS("PDB read failed (%i)\n", r); + for ( i=0; i<nbits; i++ ) free(bits[i]); + continue; + } + el = strdup(bits[10]); + + } else if ( nbits == 10 ) { + + /* Squished case- HETATMnumber<space+>stuff */ + r = sscanf(bits[4], "%f", &xf); + r += sscanf(bits[5], "%f", &yf); + r += sscanf(bits[6], "%f", &zf); + r += sscanf(bits[7], "%f", &occf); + r += sscanf(bits[8], "%f", &Bf); + if ( r != 5 ) { + STATUS("PDB read failed (%i)\n", r); + for ( i=0; i<nbits; i++ ) free(bits[i]); + continue; + } + el = strdup(bits[9]); - coords = line + 29; - r = sscanf(coords, "%f %f %f %f %f", &xf, &yf, &zf, &occf, &Bf); - if ( r != 5 ) { - STATUS("I didn't understand a line in the PDB file.\n"); + } else { + STATUS("Unrecognised line in the PDB file (%i)\n", + nbits); + STATUS("'%s'\n", line); + for ( i=0; i<nbits; i++ ) free(bits[i]); continue; } + for ( i=0; i<nbits; i++ ) free(bits[i]); + /* Promote to double precision */ x = xf; y = yf; z = zf; occ = occf; B = Bf; @@ -397,6 +424,10 @@ struct molecule *load_molecule() spec->occ[n] = occ; spec->B[n] = B*1.0e-20; /* Convert to m^2 */ mol->species[j]->n_atoms++; + if ( mol->species[j]->n_atoms == MAX_ATOMS ) { + ERROR("Too many atoms of type '%s'!\n", el); + exit(1); + } done = 1; @@ -409,7 +440,7 @@ struct molecule *load_molecule() spec = malloc(sizeof(struct mol_species)); - memcpy(spec->species, el, 4); + memcpy(spec->species, el, 1+strlen(el)); spec->x[0] = x*1.0e-10; /* Convert to m */ spec->y[0] = y*1.0e-10; spec->z[0] = z*1.0e-10; @@ -422,6 +453,7 @@ struct molecule *load_molecule() } + free(el); } while ( rval != NULL ); @@ -21,16 +21,19 @@ #include "cell.h" +#define MAX_ATOMS (128*1024) + + struct mol_species { char species[4]; /* Species name */ int n_atoms; /* Number of atoms of this species */ - double x[32*1024]; - double y[32*1024]; - double z[32*1024]; - double occ[32*1024]; - double B[32*1024]; + double x[MAX_ATOMS]; + double y[MAX_ATOMS]; + double z[MAX_ATOMS]; + double occ[MAX_ATOMS]; + double B[MAX_ATOMS]; }; diff --git a/src/utils.c b/src/utils.c index 64423c2f..e3b8b667 100644 --- a/src/utils.c +++ b/src/utils.c @@ -195,3 +195,91 @@ struct rvec quat_rot(struct rvec q, struct quaternion z) return res; } + + +/* Return non-zero if c is in delims */ +static int assplode_isdelim(const char c, const char *delims) +{ + size_t i; + for ( i=0; i<strlen(delims); i++ ) { + if ( c == delims[i] ) return 1; + } + return 0; +} + + +static int assplode_extract(char ***pbits, int n, size_t n_captured, + size_t start, const char *a) +{ + char **bits = *pbits; + bits = realloc(bits, sizeof(char *)*(n+1)); + bits[n] = malloc(n_captured+1); + memcpy(bits[n], a+start, n_captured); + bits[n][n_captured] = '\0'; + n++; + *pbits = bits; + return n; +} + + +/* Split the string 'a' using 'delims' as a zero-terminated list of + * deliminators. + * Store each segment in bits[0...n] where n is the number of segments and is + * the return value. pbits = &bits + * Each segment needs to be freed with free() when finished with. + * The array of bits also needs to be freed with free() when finished with, + * unless n=0 in which case bits==NULL + */ +int assplode(const char *a, const char *delims, char ***pbits, + AssplodeFlag flags) +{ + size_t i, start, n_captured; + int n, last_was_delim; + char **bits; + + n = 0; + i = 0; + n_captured = 0; + start = 0; + last_was_delim = 0; + bits = NULL; + while ( i < strlen(a) ) { + + if ( assplode_isdelim(a[i], delims) ) { + + if ( n_captured > 0 ) { + /* This is a deliminator after a sequence of + * non-deliminator chars */ + n = assplode_extract(&bits, n, n_captured, + start, a); + } + + n_captured = 0; + if ( (flags & ASSPLODE_DUPS) && last_was_delim ) { + n = assplode_extract(&bits, n, 0, start, a); + } + last_was_delim = 1; + + } else { + + if ( n_captured == 0 ) { + /* No characters currently found, so this is the + * start */ + start = i; + } + n_captured++; + last_was_delim = 0; + + } + + i++; + + } + /* Left over characters at the end? */ + if ( n_captured > 0 ) { + n = assplode_extract(&bits, n, n_captured, start, a); + } + + *pbits = bits; + return n; +} diff --git a/src/utils.h b/src/utils.h index 7d8274bd..441fa51d 100644 --- a/src/utils.h +++ b/src/utils.h @@ -67,6 +67,14 @@ extern struct rvec quat_rot(struct rvec q, struct quaternion z); extern size_t skipspace(const char *s); extern void chomp(char *s); + +typedef enum { + ASSPLODE_NONE = 0, + ASSPLODE_DUPS = 1<<0 +} AssplodeFlag; +extern int assplode(const char *a, const char *delims, char ***pbits, + AssplodeFlag flags); + extern void progress_bar(int val, int total, const char *text); extern int poisson_noise(double expected); |