/* * process_hkl.c * * Assemble and process FEL Bragg intensities * * (c) 2006-2010 Thomas White * * Part of CrystFEL - crystallography with a FEL * */ #ifdef HAVE_CONFIG_H #include #endif #include #include #include #include #include #include #include "utils.h" #include "statistics.h" #include "sfac.h" #include "reflections.h" #include "likelihood.h" #include "symmetry.h" /* Number of divisions for R vs |q| graphs */ #define RVQDV (20) static void show_help(const char *s) { printf("Syntax: %s [options]\n\n", s); printf( "Assemble and process FEL Bragg intensities.\n" "\n" " -h, --help Display this help message.\n" " -i, --input= Specify input filename (\"-\" for stdin).\n" " -o, --output= Specify output filename for merged intensities\n" " (don't specify for no output).\n" " -p, --pdb= PDB file to use (default: molecule.pdb).\n" "\n" " --max-only Take the integrated intensity to be equal to the\n" " maximum intensity measured for that reflection.\n" " The default is to use the mean value from all\n" " measurements.\n" " --sum Sum (rather than average) the intensities for the\n" " final output list. This is useful for comparing\n" " results to radially summed powder patterns, but\n" " will break R-factor analysis.\n" " --stop-after= Stop after processing n patterns. Zero means\n" " keep going until the end of the input, and is\n" " the default.\n" " -c, --compare-with= Compare with reflection intensities in this file\n" "\n" " -e, --output-every= Analyse figures of merit after every n patterns\n" " Default: 1000. A value of zero means to do the\n" " analysis only after reading all the patterns.\n" " -r, --rvsq Output lists of R vs |q| (\"Luzzatti plots\")\n" " when analysing figures of merit.\n" " --detwin Correlate each new pattern with the current\n" " model and choose the best fitting out of the\n" " allowable twins.\n" " --scale Scale each pattern for best fit with the current\n" " model.\n" " -y, --symmetry= Merge according to point group .\n" ); } static void write_RvsQ(const char *name, double *ref, double *trueref, unsigned int *counts, double scale, UnitCell *cell) { FILE *fh; double smax, sbracket; signed int h, k, l; fh = fopen(name, "w"); smax = 0.0; for ( h=-INDMAX; h 0) && (s > smax) ) { smax = s; } } } } for ( sbracket=0.0; sbracket=sbracket) && (s0)) { double obs, calc, obsi; obs = lookup_intensity(ref, h, k, l); calc = lookup_intensity(trueref, h, k, l); obsi = obs / (double)c; top += pow(obsi - scale*calc, 2.0); bot += pow(obsi, 2.0); nhits += c; nrefl++; } } } } R = sqrt(top/bot); hits_per_refl = nrefl ? (double)nhits/nrefl : 0; fprintf(fh, "%8.5f %8.5f %5.2f\n", sbracket+smax/(2.0*RVQDV), R, hits_per_refl); } fclose(fh); } static void process_reflections(double *ref, unsigned int *counts, double *trueref, unsigned int *truecounts, unsigned int n_patterns, UnitCell *cell, int do_rvsq) { int j; double mean_counts; int ctot = 0; int nmeas = 0; double R, scale; FILE *fh; for ( j=0; j 0 ) nmeas++; } mean_counts = (double)ctot/nmeas; R = stat_r2(ref, counts, trueref, truecounts, &scale); STATUS("%8u: R=%5.2f%% (sf=%7.4e) mean meas/refl=%5.2f," " %i reflections measured\n", n_patterns, R*100.0, scale, mean_counts, nmeas); if ( do_rvsq ) { /* Record graph of R against q for this N */ char name[64]; snprintf(name, 63, "R_vs_q-%u.dat", n_patterns); write_RvsQ(name, ref, trueref, counts, scale, cell); } fh = fopen("results/convergence.dat", "a"); fprintf(fh, "%u %5.2f %5.2f\n", n_patterns, R*100.0, mean_counts); fclose(fh); } static int resolve_twin(ReflItemList *items, const char *symm, const double *patt, const double *model, const unsigned int *model_counts) { ReflItemList *twins; const char *holo; int n, i; double best_fom = 0.0; int best_op = 0; /* How many possible twinned orientations are there? */ twins = get_twins(items, symm); holo = get_holohedral(symm); n = num_items(twins); for ( i=0; iop; for ( j=0; jh, r->k, r->l, &h, &k, &l, holo, op); get_asymm(h, k, l, &h, &k, &l, symm); set_intensity(trial_ints, h, k, l, lookup_intensity(patt, r->h, r->k, r->l)); set_count(trial_counts, h, k, l, 1); } fom = stat_pearson(trial_ints, trial_counts, model, model_counts); free(trial_ints); free(trial_counts); //printf(" %f", fom); if ( fom > best_fom ) { best_fom = fom; best_op = op; } } //printf("\n"); delete_items(twins); return best_op; } static void merge_pattern(double *model, const double *new, unsigned int *model_counts, ReflItemList *items, int mo, int sum, const char *symm) { int i; const char *holo = get_holohedral(symm); /* Needed to look up later */ int twin; twin = resolve_twin(items, symm, new, model, model_counts); for ( i=0; ih; ks = item->k; ls = item->l; get_general_equiv(hs, ks, ls, &h, &k, &l, holo, twin); get_asymm(h, k, l, &h, &k, &l, symm); intensity = lookup_intensity(new, h, k, l); if ( !mo ) { integrate_intensity(model, h, k, l, intensity); if ( sum ) { set_count(model_counts, h, k, l, 1); } else { integrate_count(model_counts, h, k, l, 1); } } else { if ( intensity > lookup_intensity(model, h, k, l) ) { set_intensity(model, h, k, l, intensity); } set_count(model_counts, h, k, l, 1); } } } int main(int argc, char *argv[]) { int c; char *filename = NULL; char *output = NULL; FILE *fh; unsigned int n_patterns; double *model, *trueref = NULL; unsigned int *model_counts; char *rval; UnitCell *cell; int config_maxonly = 0; int config_every = 1000; int config_rvsq = 0; int config_stopafter = 0; int config_sum = 0; int config_detwin = 0; int config_scale = 0; char *intfile = NULL; double *new_pattern = NULL; unsigned int n_total_patterns; unsigned int *truecounts = NULL; char *sym = NULL; char *pdb = NULL; float f0; int f0_valid; int n_nof0 = 0; ReflItemList *items; /* Long options */ const struct option longopts[] = { {"help", 0, NULL, 'h'}, {"input", 1, NULL, 'i'}, {"output", 1, NULL, 'o'}, {"max-only", 0, &config_maxonly, 1}, {"output-every", 1, NULL, 'e'}, {"rvsq", 0, NULL, 'r'}, {"stop-after", 1, NULL, 's'}, {"compare-with", 0, NULL, 'c'}, {"sum", 0, &config_sum, 1}, {"detwin", 0, &config_detwin, 1}, {"scale", 0, &config_scale, 1}, {"symmetry", 1, NULL, 'y'}, {"pdb", 1, NULL, 'p'}, {0, 0, NULL, 0} }; /* Short options */ while ((c = getopt_long(argc, argv, "hi:e:ro:p:y:", longopts, NULL)) != -1) { switch (c) { case 'h' : show_help(argv[0]); return 0; case 'i' : filename = strdup(optarg); break; case 'o' : output = strdup(optarg); break; case 'r' : config_rvsq = 1; break; case 'e' : config_every = atoi(optarg); break; case 's' : config_stopafter = atoi(optarg); break; case 'c' : intfile = strdup(optarg); break; case 'p' : pdb = strdup(optarg); break; case 'y' : sym = strdup(optarg); break; case 0 : break; default : return 1; } } if ( filename == NULL ) { ERROR("Please specify filename using the -i option\n"); return 1; } if ( intfile != NULL ) { truecounts = new_list_count(); STATUS("Comparing against '%s'\n", intfile); trueref = read_reflections(intfile, truecounts, NULL); free(intfile); } else { trueref = NULL; } if ( pdb == NULL ) { pdb = strdup("molecule.pdb"); } if ( sym == NULL ) { sym = strdup("1"); } model = new_list_intensity(); model_counts = new_list_count(); new_pattern = new_list_intensity(); items = new_items(); cell = load_cell_from_pdb(pdb); free(pdb); if ( strcmp(filename, "-") == 0 ) { fh = stdin; } else { fh = fopen(filename, "r"); } free(filename); if ( fh == NULL ) { ERROR("Failed to open input file\n"); return 1; } /* Count the number of patterns in the file */ n_total_patterns = 0; do { char line[1024]; rval = fgets(line, 1023, fh); if ( (strncmp(line, "Reflections from indexing", 25) == 0) || (strncmp(line, "New pattern", 11) == 0) ) { n_total_patterns++; } } while ( rval != NULL ); rewind(fh); STATUS("There are %i patterns to process\n", n_total_patterns); /* Show useful symmetry information */ const char *holo = get_holohedral(sym); int np = num_general_equivs(holo) / num_general_equivs(sym); STATUS("Resolving from point group %s to %s (%i possibilities)\n", holo, sym, np); n_patterns = 0; f0_valid = 0; do { char line[1024]; signed int h, k, l; float intensity; int r; rval = fgets(line, 1023, fh); if ( (strncmp(line, "Reflections from indexing", 25) == 0) || (strncmp(line, "New pattern", 11) == 0) ) { /* Start of first pattern? */ if ( n_patterns == 0 ) { n_patterns++; continue; } /* Detwin before scaling */ if ( config_detwin ) { detwin_intensities(model, new_pattern, model_counts, items); } /* Assume a default I0 if we don't have one by now */ if ( config_scale && !f0_valid ) { n_nof0++; f0 = 1.0; } /* Scale if requested */ if ( config_scale ) { scale_intensities(model, new_pattern, model_counts, items, f0, f0_valid); } /* Start of second or later pattern */ merge_pattern(model, new_pattern, model_counts, items, config_maxonly, config_sum, sym); if ( (trueref != NULL) && config_every && (n_patterns % config_every == 0) ) { process_reflections(model, model_counts, trueref, truecounts, n_patterns, cell, config_rvsq); } if ( n_patterns == config_stopafter ) break; n_patterns++; clear_items(items); progress_bar(n_patterns, n_total_patterns, "Merging"); f0_valid = 0; } if ( strncmp(line, "f0 = ", 5) == 0 ) { r = sscanf(line, "f0 = %f", &f0); if ( r != 1 ) { f0 = 1.0; f0_valid = 0; continue; } f0_valid = 1; } r = sscanf(line, "%i %i %i %f", &h, &k, &l, &intensity); if ( r != 4 ) continue; if ( (h==0) && (k==0) && (l==0) ) continue; if ( find_item(items, h, k, l) != 0 ) { ERROR("More than one measurement for %i %i %i in" " pattern number %i\n", h, k, l, n_patterns); } set_intensity(new_pattern, h, k, l, intensity); add_item(items, h, k, l); } while ( rval != NULL ); fclose(fh); if ( trueref != NULL ) { process_reflections(model, model_counts, trueref, truecounts, n_patterns, cell, config_rvsq); } if ( output != NULL ) { write_reflections(output, model_counts, model, NULL, 0, cell, 1); } STATUS("There were %u patterns.\n", n_patterns); STATUS("%i had no f0 valid value.\n", n_nof0); delete_items(items); free(sym); free(model); free(model_counts); free(new_pattern); free(output); free(cell); return 0; }