diff options
author | Thomas White <taw@physics.org> | 2018-11-15 10:33:47 +0100 |
---|---|---|
committer | Thomas White <taw@physics.org> | 2018-11-15 10:33:47 +0100 |
commit | 6c18490d12ad6837636b54afe97641513042f95d (patch) | |
tree | 0a8df2d764264d70705de1127208eeb0e5110012 | |
parent | 5184da5ca9ce34ddd3a291cf371a63ad1fa1f77d (diff) |
indexamajig: Name and shame slow processing stages
-rw-r--r-- | libcrystfel/src/index.c | 38 | ||||
-rw-r--r-- | libcrystfel/src/index.h | 3 | ||||
-rw-r--r-- | src/im-sandbox.c | 16 | ||||
-rw-r--r-- | src/im-sandbox.h | 7 | ||||
-rw-r--r-- | src/process_image.c | 20 | ||||
-rw-r--r-- | src/process_image.h | 3 |
6 files changed, 80 insertions, 7 deletions
diff --git a/libcrystfel/src/index.c b/libcrystfel/src/index.c index 1476304d..06d57e52 100644 --- a/libcrystfel/src/index.c +++ b/libcrystfel/src/index.c @@ -81,6 +81,20 @@ static const char *onoff(int a) return "off"; } +/* Definition and function definition duplicated here (from im-sandbox.{c,h}) + * because libcrystfel code cannot depend on core CrystFEL programs. + * + * Must match the value and definition in im-sandbox.h + */ +#define MAX_TASK_LEN (32) + +static void set_last_task(char *lt, const char *task) +{ + if ( lt == NULL ) return; + assert(strlen(task) < MAX_TASK_LEN-1); + strcpy(lt, task); +} + static void show_indexing_flags(IndexingFlags flags) { @@ -556,7 +570,7 @@ static int check_cell(IndexingFlags flags, Crystal *cr, UnitCell *target, /* Return non-zero for "success" */ static int try_indexer(struct image *image, IndexingMethod indm, - IndexingPrivate *ipriv, void *mpriv) + IndexingPrivate *ipriv, void *mpriv, char *last_task) { int i, r; int n_bad = 0; @@ -568,34 +582,42 @@ static int try_indexer(struct image *image, IndexingMethod indm, return 0; case INDEXING_DIRAX : + set_last_task(last_task, "indexing:dirax"); r = run_dirax(image, mpriv); break; case INDEXING_ASDF : + set_last_task(last_task, "indexing:asdf"); r = run_asdf(image, mpriv); break; case INDEXING_MOSFLM : + set_last_task(last_task, "indexing:mosflm"); r = run_mosflm(image, mpriv); break; case INDEXING_XDS : + set_last_task(last_task, "indexing:xds"); r = run_xds(image, mpriv); break; case INDEXING_DEBUG : + set_last_task(last_task, "indexing:debug"); r = debug_index(image); break; case INDEXING_FELIX : + set_last_task(last_task, "indexing:felix"); r = felix_index(image, mpriv); break; case INDEXING_TAKETWO : + set_last_task(last_task, "indexing:taketwo"); r = taketwo_index(image, ipriv->ttopts, mpriv); break; case INDEXING_XGANDALF : + set_last_task(last_task, "indexing:xgandalf"); r = run_xgandalf(image, mpriv); break; @@ -605,6 +627,8 @@ static int try_indexer(struct image *image, IndexingMethod indm, } + set_last_task(last_task, "indexing:finalisation"); + /* Stop a really difficult to debug situation in its tracks */ if ( image->n_crystals - n_before != r ) { ERROR("Whoops, indexer didn't return the right number " @@ -812,12 +836,19 @@ static int finished_retry(IndexingMethod indm, IndexingFlags flags, void index_pattern(struct image *image, IndexingPrivate *ipriv) { - index_pattern_2(image, ipriv, NULL); + index_pattern_3(image, ipriv, NULL, NULL); } void index_pattern_2(struct image *image, IndexingPrivate *ipriv, int *ping) { + index_pattern_3(image, ipriv, ping, NULL); +} + + +void index_pattern_3(struct image *image, IndexingPrivate *ipriv, int *ping, + char *last_task) +{ int n = 0; ImageFeatureList *orig; @@ -841,7 +872,8 @@ void index_pattern_2(struct image *image, IndexingPrivate *ipriv, int *ping) do { r = try_indexer(image, ipriv->methods[n], - ipriv, ipriv->engine_private[n]); + ipriv, ipriv->engine_private[n], + last_task); success += r; ntry++; done = finished_retry(ipriv->methods[n], ipriv->flags, diff --git a/libcrystfel/src/index.h b/libcrystfel/src/index.h index 2099b4d7..eaf6f912 100644 --- a/libcrystfel/src/index.h +++ b/libcrystfel/src/index.h @@ -158,6 +158,9 @@ extern void index_pattern(struct image *image, IndexingPrivate *ipriv); extern void index_pattern_2(struct image *image, IndexingPrivate *ipriv, int *ping); +extern void index_pattern_3(struct image *image, IndexingPrivate *ipriv, + int *ping, char *last_task); + extern void cleanup_indexing(IndexingPrivate *ipriv); #ifdef __cplusplus diff --git a/src/im-sandbox.c b/src/im-sandbox.c index 80ac4166..ad2bc5fc 100644 --- a/src/im-sandbox.c +++ b/src/im-sandbox.c @@ -153,6 +153,8 @@ static void check_hung_workers(struct sandbox *sb) "frame for more than 10 minutes.\n", i); STATUS("Event ID is: %s\n", sb->shared->last_ev[i]); + STATUS("Task ID is: %s\n", + sb->shared->last_task[i]); sb->shared->warned_long_running[i] = 1; } } @@ -321,6 +323,14 @@ static void shuffle_events(struct sb_shm *sb_shared) } +void set_last_task(char *lt, const char *task) +{ + if ( lt == NULL ) return; + assert(strlen(task) < MAX_TASK_LEN-1); + strcpy(lt, task); +} + + static void run_work(const struct index_args *iargs, Stream *st, int cookie, const char *tmpdir, struct sandbox *sb) { @@ -338,12 +348,14 @@ static void run_work(const struct index_args *iargs, Stream *st, /* Wait until an event is ready */ time_accounts_set(taccs, TACC_EVENTWAIT); + set_last_task(sb->shared->last_task[cookie], "wait_event"); if ( sem_wait(sb->queue_sem) != 0 ) { ERROR("Failed to wait on queue semaphore: %s\n", strerror(errno)); } /* Get the event from the queue */ + set_last_task(sb->shared->last_task[cookie], "read_queue"); pthread_mutex_lock(&sb->shared->queue_lock); if ( sb->shared->no_more ) { pthread_mutex_unlock(&sb->shared->queue_lock); @@ -390,7 +402,7 @@ static void run_work(const struct index_args *iargs, Stream *st, sb->shared->time_last_start[cookie] = get_monotonic_seconds(); process_image(iargs, &pargs, st, cookie, tmpdir, ser, - sb->shared, taccs); + sb->shared, taccs, sb->shared->last_task[cookie]); free_filename_plus_event(pargs.filename_p_e); @@ -705,6 +717,8 @@ static void handle_zombie(struct sandbox *sb, int respawn) i, WTERMSIG(status)); STATUS("Event ID was: %s\n", sb->shared->last_ev[i]); + STATUS("Task ID was: %s\n", + sb->shared->last_task[i]); if ( respawn ) start_worker_process(sb, i); } diff --git a/src/im-sandbox.h b/src/im-sandbox.h index 1412d199..e5f3422b 100644 --- a/src/im-sandbox.h +++ b/src/im-sandbox.h @@ -49,6 +49,10 @@ struct sb_shm; /* Maximum length of an event ID including serial number */ #define MAX_EV_LEN (1024) +/* Maximum length of a task ID, e.g. indexing:xgandalf. + * NB If changing this, also update the value in index.c */ +#define MAX_TASK_LEN (32) + /* Maximum number of workers */ #define MAX_NUM_WORKERS (1024) @@ -61,6 +65,7 @@ struct sb_shm char queue[QUEUE_SIZE][MAX_EV_LEN]; int no_more; char last_ev[MAX_NUM_WORKERS][MAX_EV_LEN]; + char last_task[MAX_NUM_WORKERS][MAX_TASK_LEN]; int pings[MAX_NUM_WORKERS]; time_t time_last_start[MAX_NUM_WORKERS]; int warned_long_running[MAX_NUM_WORKERS]; @@ -73,6 +78,8 @@ struct sb_shm extern char *create_tempdir(const char *temp_location); +extern void set_last_task(char *lt, const char *task); + extern void create_sandbox(struct index_args *iargs, int n_proc, char *prefix, int config_basename, FILE *fh, Stream *stream, const char *tempdir, int serial_start); diff --git a/src/process_image.c b/src/process_image.c index 10863fd3..2fbf0e4a 100644 --- a/src/process_image.c +++ b/src/process_image.c @@ -99,7 +99,8 @@ static void restore_image_data(float **dp, struct detector *det, float **bu) void process_image(const struct index_args *iargs, struct pattern_args *pargs, Stream *st, int cookie, const char *tmpdir, - int serial, struct sb_shm *sb_shared, TimeAccounts *taccs) + int serial, struct sb_shm *sb_shared, TimeAccounts *taccs, + char *last_task) { int check; struct imagefile *imfile; @@ -124,6 +125,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, image.indexed_by = INDEXING_NONE; time_accounts_set(taccs, TACC_HDF5OPEN); + set_last_task(last_task, "open file"); sb_shared->pings[cookie]++; imfile = imagefile_open(image.filename); if ( imfile == NULL ) { @@ -132,6 +134,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, } time_accounts_set(taccs, TACC_HDF5READ); + set_last_task(last_task, "read file"); sb_shared->pings[cookie]++; check = imagefile_read(imfile, &image, image.event); if ( check ) { @@ -140,6 +143,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, /* Take snapshot of image before applying horrible noise filters */ time_accounts_set(taccs, TACC_FILTER); + set_last_task(last_task, "image filter"); sb_shared->pings[cookie]++; prefilter = backup_image_data(image.dp, image.det); @@ -152,6 +156,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, } time_accounts_set(taccs, TACC_RESRANGE); + set_last_task(last_task, "resolution range"); sb_shared->pings[cookie]++; mark_resolution_range_as_bad(&image, iargs->highres, +INFINITY); @@ -162,6 +167,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, struct hdfile *hdfile; case PEAK_HDF5: + set_last_task(last_task, "peaksearch:hdf5"); hdfile = imagefile_get_hdfile(imfile); if ( (hdfile == NULL) || (get_peaks_2(&image, hdfile, iargs->hdf5_peak_path, @@ -178,6 +184,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, break; case PEAK_CXI: + set_last_task(last_task, "peaksearch:cxi"); hdfile = imagefile_get_hdfile(imfile); if ( (hdfile == NULL) || (get_peaks_cxi_2(&image, hdfile, iargs->hdf5_peak_path, @@ -195,6 +202,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, break; case PEAK_ZAEF: + set_last_task(last_task, "peaksearch:zaef"); search_peaks(&image, iargs->threshold, iargs->min_gradient, iargs->min_snr, iargs->pk_inn, iargs->pk_mid, iargs->pk_out, @@ -202,6 +210,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, break; case PEAK_PEAKFINDER8: + set_last_task(last_task, "peaksearch:pf8"); if ( search_peaks_peakfinder8(&image, 2048, iargs->threshold, iargs->min_snr, @@ -224,6 +233,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, break; case PEAK_PEAKFINDER9: + set_last_task(last_task, "peaksearch:pf9"); if ( search_peaks_peakfinder9(&image, iargs->min_snr_biggest_pix, iargs->min_snr_peak_pix, @@ -297,7 +307,9 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, /* Index the pattern */ time_accounts_set(taccs, TACC_INDEXING); - index_pattern_2(&image, iargs->ipriv, &sb_shared->pings[cookie]); + set_last_task(last_task, "indexing"); + index_pattern_3(&image, iargs->ipriv, &sb_shared->pings[cookie], + last_task); r = chdir(rn); if ( r ) { @@ -309,6 +321,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, /* Set beam/crystal parameters */ time_accounts_set(taccs, TACC_PREDPARAMS); + set_last_task(last_task, "prediction params"); if ( iargs->fix_profile_r >= 0.0 ) { for ( i=0; i<image.n_crystals; i++ ) { crystal_set_profile_radius(image.crystals[i], @@ -332,6 +345,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, /* Integrate! */ time_accounts_set(taccs, TACC_INTEGRATION); + set_last_task(last_task, "integration"); sb_shared->pings[cookie]++; integrate_all_5(&image, iargs->int_meth, PMODEL_XSPHERE, iargs->push_res, @@ -342,6 +356,7 @@ void process_image(const struct index_args *iargs, struct pattern_args *pargs, streamwrite: time_accounts_set(taccs, TACC_WRITESTREAM); + set_last_task(last_task, "stream write"); sb_shared->pings[cookie]++; ret = write_chunk(st, &image, imfile, iargs->stream_peaks, iargs->stream_refls, @@ -363,6 +378,7 @@ streamwrite: out: /* Count crystals which are still good */ time_accounts_set(taccs, TACC_TOTALS); + set_last_task(last_task, "process_image finalisation"); sb_shared->pings[cookie]++; pthread_mutex_lock(&sb_shared->totals_lock); any_crystals = 0; diff --git a/src/process_image.h b/src/process_image.h index d4dcbf04..d4b1ce59 100644 --- a/src/process_image.h +++ b/src/process_image.h @@ -127,7 +127,8 @@ struct pattern_args extern void process_image(const struct index_args *iargs, struct pattern_args *pargs, Stream *st, int cookie, const char *tmpdir, int serial, - struct sb_shm *sb_shared, TimeAccounts *taccs); + struct sb_shm *sb_shared, TimeAccounts *taccs, + char *last_task); #endif /* PROCESS_IMAGE_H */ |