diff options
author | Thomas White <taw@physics.org> | 2020-08-31 16:42:41 +0200 |
---|---|---|
committer | Thomas White <taw@physics.org> | 2020-08-31 16:42:41 +0200 |
commit | ab5f4e3a29112fdd0a718bb33ddb8a20217cd443 (patch) | |
tree | c5d4c76f956098febcacc57c6e8df63d7c890046 | |
parent | f5d539e4e4aabf2ef5c8c5cb066de123611f3b42 (diff) |
Split up job for submission via SLURM
-rw-r--r-- | src/gui_backend_local.c | 2 | ||||
-rw-r--r-- | src/gui_backend_slurm.c | 219 | ||||
-rw-r--r-- | src/gui_index.c | 6 | ||||
-rw-r--r-- | src/gui_index.h | 2 |
4 files changed, 179 insertions, 50 deletions
diff --git a/src/gui_backend_local.c b/src/gui_backend_local.c index 7411c9a2..a43e3632 100644 --- a/src/gui_backend_local.c +++ b/src/gui_backend_local.c @@ -218,6 +218,8 @@ static void *run_indexing(const char *job_title, snprintf(n_thread_str, 63, "%i", opts->n_processes); args = indexamajig_command_line(geom_filename, n_thread_str, + "files.lst", + "crystfel.stream", peak_search_params, indexing_params); diff --git a/src/gui_backend_slurm.c b/src/gui_backend_slurm.c index 036cf4ef..38ead6b1 100644 --- a/src/gui_backend_slurm.c +++ b/src/gui_backend_slurm.c @@ -52,7 +52,8 @@ struct slurm_indexing_opts struct slurm_job { double frac_complete; - /* FIXME: List of SLURM job numbers to track */ + int n_blocks; + uint32_t *job_ids; }; @@ -73,7 +74,7 @@ static void cancel_task(void *job_priv) } -static char **create_env(uint32_t *psize, char *path_add) +static char **create_env(int *psize, char *path_add) { char **env; const char *base_path = "PATH=/bin:/usr/bin"; @@ -115,6 +116,117 @@ static char **create_env(uint32_t *psize, char *path_add) } +static uint32_t submit_batch_job(const char *geom_filename, + const char *file_list, + const char *stream_filename, + const char *email_address, + const char *partition, + char **env, + int n_env, + const char *job_name, + const char *workdir, + struct peak_params *peak_search_params, + struct index_params *indexing_params) + +{ + job_desc_msg_t job_desc_msg; + submit_response_msg_t *resp; + char **cmdline; + char *cmdline_all; + char *script; + int job_id; + int r; + + cmdline = indexamajig_command_line(geom_filename, + "`nproc`", + file_list, + stream_filename, + peak_search_params, + indexing_params); + + cmdline_all = g_strjoinv(" ", cmdline); + + script = malloc(strlen(cmdline_all)+16); + if ( script == NULL ) return 0; + + strcpy(script, "#!/bin/sh\n"); + strcat(script, cmdline_all); + g_free(cmdline_all); + + slurm_init_job_desc_msg(&job_desc_msg); + job_desc_msg.user_id = getuid(); + job_desc_msg.group_id = getgid(); + job_desc_msg.mail_user = strdup(email_address); + job_desc_msg.mail_type = MAIL_JOB_FAIL; + job_desc_msg.comment = "Submitted via CrystFEL GUI"; + job_desc_msg.shared = 0; + job_desc_msg.time_limit = 60; + job_desc_msg.partition = strdup(partition); + job_desc_msg.min_nodes = 1; + job_desc_msg.max_nodes = 1; + job_desc_msg.name = strdup(job_name); + job_desc_msg.std_err = strdup("job.err"); + job_desc_msg.std_out = strdup("job.out"); + job_desc_msg.work_dir = strdup(workdir); + job_desc_msg.script = script; + job_desc_msg.environment = env; + job_desc_msg.env_size = n_env; + + r = slurm_submit_batch_job(&job_desc_msg, &resp); + if ( r ) { + ERROR("Couldn't submit job: %i\n", errno); + return 0; + } + + free(job_desc_msg.mail_user); + free(job_desc_msg.partition); + free(job_desc_msg.name); + free(job_desc_msg.work_dir); + free(job_desc_msg.std_err); + free(job_desc_msg.std_out); + + job_id = resp->job_id; + slurm_free_submit_response_response_msg(resp); + + return job_id; +} + + +static void write_partial_file_list(GFile *workdir, + const char *list_filename, + int j, + int block_size, + char **filenames, + char **events, + int n_frames) +{ + GFile *file; + char *file_path; + FILE *fh; + int i; + + file = g_file_get_child(workdir, list_filename); + file_path = g_file_get_path(file); + + fh = fopen(file_path, "w"); + for ( i=j*block_size; + (i<(j+1)*block_size) && (i<n_frames); + i++ ) + { + fprintf(fh, "%s", filenames[i]); + if ( events[i] != NULL ) { + fprintf(fh, " %s\n", events[i]); + } else { + fprintf(fh, "\n"); + } + } + + fclose(fh); + g_free(file_path); + g_object_unref(file); +} + + static void *run_indexing(const char *job_title, const char *job_notes, char **filenames, @@ -127,19 +239,17 @@ static void *run_indexing(const char *job_title, { struct slurm_indexing_opts *opts = opts_priv; struct slurm_job *job; - job_desc_msg_t job_desc_msg; - submit_response_msg_t *resp; - int r; char *workdir; struct stat s; - char **cmdline; - char *cmdline_all; - char *script; - GFile *workdir_file; GFile *cwd_file; GFile *notes_file; + GFile *workdir_file; char *notes_path; FILE *fh; + char **env; + int n_env; + int i; + int fail = 0; workdir = strdup(job_title); if ( workdir == NULL ) return NULL; @@ -168,55 +278,68 @@ static void *run_indexing(const char *job_title, g_free(notes_path); g_object_unref(notes_file); - cmdline = indexamajig_command_line(geom_filename, - "`nproc`", - peak_search_params, - indexing_params); - - cmdline_all = g_strjoinv(" ", cmdline); + workdir = g_file_get_path(workdir_file); - script = malloc(strlen(cmdline_all)+16); - if ( script == NULL ) return NULL; - - strcpy(script, "#!/bin/sh\n"); - strcat(script, cmdline_all); - g_free(cmdline_all); + env = create_env(&n_env, opts->path_add); job = malloc(sizeof(struct slurm_job)); - if ( job == NULL ) return NULL; + if ( job == NULL ) return 0; + + job->n_blocks = n_frames / opts->block_size; + if ( n_frames % opts->block_size ) job->n_blocks++; + STATUS("Splitting job into %i blocks of max %i frames\n", + job->n_blocks, opts->block_size); + + job->job_ids = malloc(job->n_blocks * sizeof(uint32_t)); + if ( job->job_ids == NULL ) return NULL; + + for ( i=0; i<job->n_blocks; i++ ) { + + char job_name[128]; + char file_list[128]; + char stream_filename[128]; + int job_id; + + snprintf(job_name, 127, "%s-%i", job_title, i); + snprintf(file_list, 127, "files-%i.lst", i); + snprintf(stream_filename, 127, + "crystfel-%i.stream", i); + + write_partial_file_list(workdir_file, file_list, + i, opts->block_size, + filenames, events, n_frames); + + job_id = submit_batch_job(geom_filename, + file_list, + stream_filename, + opts->email_address, + opts->partition, + env, + n_env, + job_name, + workdir, + peak_search_params, + indexing_params); + + if ( job_id == 0 ) { + fail = 1; + break; + } - slurm_init_job_desc_msg(&job_desc_msg); - job_desc_msg.user_id = getuid(); - job_desc_msg.group_id = getgid(); - job_desc_msg.mail_user = strdup(opts->email_address); - job_desc_msg.mail_type = MAIL_JOB_FAIL; - job_desc_msg.comment = strdup("Submitted via CrystFEL GUI"); - job_desc_msg.shared = 0; - job_desc_msg.time_limit = 60; - job_desc_msg.partition = strdup(opts->partition); - job_desc_msg.min_nodes = 1; - job_desc_msg.max_nodes = 1; - job_desc_msg.name = strdup(job_title); - job_desc_msg.std_err = strdup("job.err"); - job_desc_msg.std_out = strdup("job.out"); - job_desc_msg.work_dir = g_file_get_path(workdir_file); - job_desc_msg.script = script; - job_desc_msg.environment = create_env(&job_desc_msg.env_size, - opts->path_add); + job->job_ids[i] = job_id; + STATUS("Submitted SLURM job ID %i\n", job_id); + } + for ( i=0; i<n_env; i++ ) free(env[i]); + free(env); + free(workdir); g_object_unref(workdir_file); - r = slurm_submit_batch_job(&job_desc_msg, &resp); - - if ( r ) { - ERROR("Couldn't submit job: %i\n", errno); + if ( fail ) { + free(job->job_ids); free(job); return NULL; } - - STATUS("Submitted SLURM job ID %i\n", resp->job_id); - slurm_free_submit_response_response_msg(resp); - return job; } diff --git a/src/gui_index.c b/src/gui_index.c index a0027f82..9874fecf 100644 --- a/src/gui_index.c +++ b/src/gui_index.c @@ -619,6 +619,8 @@ static char *get_indexamajig_exe() char **indexamajig_command_line(const char *geom_filename, const char *n_thread_str, + const char *files_list, + const char *stream_filename, struct peak_params *peak_search_params, struct index_params *indexing_params) { @@ -641,11 +643,11 @@ char **indexamajig_command_line(const char *geom_filename, /* The basics */ add_arg(args, n_args++, indexamajig_path); add_arg(args, n_args++, "-i"); - add_arg(args, n_args++, "files.lst"); + add_arg(args, n_args++, files_list); add_arg(args, n_args++, "-g"); add_arg(args, n_args++, geom_filename); add_arg(args, n_args++, "-o"); - add_arg(args, n_args++, "crystfel.stream"); + add_arg(args, n_args++, stream_filename); add_arg(args, n_args++, "-j"); add_arg(args, n_args++, n_thread_str); diff --git a/src/gui_index.h b/src/gui_index.h index f0454271..0812c3a9 100644 --- a/src/gui_index.h +++ b/src/gui_index.h @@ -43,6 +43,8 @@ extern void cell_explorer_sig(struct crystfelproject *proj); extern char **indexamajig_command_line(const char *geom_filename, const char *n_thread_str, + const char *files_list, + const char *stream_filename, struct peak_params *peak_search_params, struct index_params *indexing_params); |