diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Makefile.am | 4 | ||||
-rw-r--r-- | src/diffraction-gpu.c | 286 | ||||
-rw-r--r-- | src/diffraction-gpu.h | 34 | ||||
-rw-r--r-- | src/diffraction.c | 6 | ||||
-rw-r--r-- | src/image.h | 4 | ||||
-rw-r--r-- | src/pattern_sim.c | 16 |
6 files changed, 344 insertions, 6 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index 6a40c9a0..824211f8 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -10,6 +10,10 @@ AM_CPPFLAGS = -DDATADIR=\""$(datadir)"\" pattern_sim_SOURCES = pattern_sim.c diffraction.c utils.c image.c cell.c \ hdf5-file.c detector.c sfac.c intensities.c \ reflections.c +if HAVE_OPENCL +pattern_sim_SOURCES += diffraction-gpu.c +endif + pattern_sim_LDADD = @LIBS@ process_hkl_SOURCES = process_hkl.c sfac.c statistics.c cell.c utils.c \ diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c new file mode 100644 index 00000000..4172da3f --- /dev/null +++ b/src/diffraction-gpu.c @@ -0,0 +1,286 @@ +/* + * diffraction-gpu.c + * + * Calculate diffraction patterns by Fourier methods (GPU version) + * + * (c) 2006-2010 Thomas White <taw@physics.org> + * + * Part of CrystFEL - crystallography with a FEL + * + */ + + +#include <stdlib.h> +#include <math.h> +#include <stdio.h> +#include <string.h> +#include <complex.h> +#include <CL/cl.h> + +#include "image.h" +#include "utils.h" +#include "cell.h" +#include "diffraction.h" +#include "sfac.h" + + +#define SAMPLING (5) +#define BWSAMPLING (10) +#define BANDWIDTH (1.0 / 100.0) + + +static cl_device_id get_first_dev(cl_context ctx) +{ + cl_device_id dev; + cl_int r; + + r = clGetContextInfo(ctx, CL_CONTEXT_DEVICES, sizeof(dev), &dev, NULL); + if ( r != CL_SUCCESS ) { + ERROR("Couldn't get device\n"); + return 0; + } + + return dev; +} + + +static void show_build_log(cl_program prog, cl_device_id dev) +{ + cl_int r; + char log[4096]; + size_t s; + + r = clGetProgramBuildInfo(prog, dev, CL_PROGRAM_BUILD_LOG, 4096, log, + &s); + + STATUS("%s\n", log); +} + + +static cl_program load_program(const char *filename, cl_context ctx, + cl_device_id dev, cl_int *err) +{ + FILE *fh; + cl_program prog; + char *source; + size_t len; + cl_int r; + + fh = fopen(filename, "r"); + if ( fh == NULL ) { + ERROR("Couldn't open '%s'\n", filename); + *err = CL_INVALID_PROGRAM; + return 0; + } + source = malloc(16384); + len = fread(source, 1, 16383, fh); + fclose(fh); + source[len] = '\0'; + + prog = clCreateProgramWithSource(ctx, 1, (const char **)&source, + NULL, err); + if ( *err != CL_SUCCESS ) { + ERROR("Couldn't load source\n"); + return 0; + } + + r = clBuildProgram(prog, 0, NULL, "-Werror", NULL, NULL); + if ( r != CL_SUCCESS ) { + ERROR("Couldn't build program\n"); + show_build_log(prog, dev); + *err = r; + return 0; + } + + *err = CL_SUCCESS; + return prog; +} + + +void get_diffraction_gpu(struct image *image, int na, int nb, int nc, + int no_sfac) +{ + cl_uint nplat; + cl_platform_id platforms[8]; + cl_context_properties prop[3]; + cl_context ctx; + cl_int err; + cl_command_queue cq; + cl_program prog; + cl_device_id dev; + cl_kernel kern; + double ax, ay, az; + double bx, by, bz; + double cx, cy, cz; + double a, b, c, d; + float kc; + const size_t dims[2] = {1024, 1024}; + + cl_mem sfacs; + size_t sfac_size; + float *sfac_ptr; + cl_mem tt; + size_t tt_size; + float *tt_ptr; + int x, y; + cl_float16 cell; + cl_mem diff; + size_t diff_size; + float *diff_ptr; + int i; + + if ( image->molecule == NULL ) return; + + /* Generate structure factors if required */ + if ( !no_sfac ) { + if ( image->molecule->reflections == NULL ) { + get_reflections_cached(image->molecule, + ph_lambda_to_en(image->lambda)); + } + } + + cell_get_cartesian(image->molecule->cell, &ax, &ay, &az, + &bx, &by, &bz, + &cx, &cy, &cz); + cell[0] = ax; cell[1] = ay; cell[2] = az; + cell[3] = bx; cell[4] = by; cell[5] = bz; + cell[6] = cx; cell[7] = cy; cell[8] = cz; + + cell_get_parameters(image->molecule->cell, + &a, &b, &c, &d, &d, &d); + STATUS("Particle size = %i x %i x %i (=%5.2f x %5.2f x %5.2f nm)\n", + na, nb, nc, na*a/1.0e-9, nb*b/1.0e-9, nc*c/1.0e-9); + + err = clGetPlatformIDs(8, platforms, &nplat); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't get platform IDs: %i\n", err); + return; + } + STATUS("%i platforms\n", nplat); + prop[0] = CL_CONTEXT_PLATFORM; + prop[1] = (cl_context_properties)platforms[0]; + prop[2] = 0; + + ctx = clCreateContextFromType(prop, CL_DEVICE_TYPE_GPU, NULL, NULL, &err); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't create OpenCL context: %i\n", err); + switch ( err ) { + case CL_INVALID_PLATFORM : + ERROR("Invalid platform\n"); + break; + case CL_OUT_OF_HOST_MEMORY : + ERROR("Out of memory\n"); + break; + } + return; + } + + dev = get_first_dev(ctx); + + cq = clCreateCommandQueue(ctx, dev, 0, &err); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't create OpenCL command queue\n"); + return; + } + + /* Create buffer for the picture */ + diff_size = image->width*image->height*sizeof(cl_float)*2; /* complex */ + diff = clCreateBuffer(ctx, CL_MEM_READ_WRITE, diff_size, NULL, &err); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't allocate diffraction memory\n"); + return; + } + + /* Create a single-precision version of the scattering factors */ + sfac_size = IDIM*IDIM*sizeof(cl_float)*2; /* complex */ + sfac_ptr = malloc(IDIM*IDIM*sizeof(cl_float)*2); + for ( i=0; i<IDIM*IDIM; i++ ) { + sfac_ptr[2*i+0] = creal(image->molecule->reflections[i]); + sfac_ptr[2*i+1] = cimag(image->molecule->reflections[i]); + } + sfacs = clCreateBuffer(ctx, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, + sfac_size, sfac_ptr, &err); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't allocate sfac memory\n"); + return; + } + + tt_size = image->width*image->height*sizeof(cl_float); + tt = clCreateBuffer(ctx, CL_MEM_READ_WRITE, tt_size, NULL, &err); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't allocate twotheta memory\n"); + return; + } + + prog = load_program(DATADIR"/crystfel/diffraction.cl", ctx, dev, &err); + if ( err != CL_SUCCESS ) { + return; + } + + kern = clCreateKernel(prog, "diffraction", &err); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't create kernel\n"); + return; + } + + /* Calculate wavelength */ + kc = 1.0/image->lambda; /* Centre value */ + + clSetKernelArg(kern, 0, sizeof(cl_mem), &diff); + clSetKernelArg(kern, 1, sizeof(cl_mem), &tt); + clSetKernelArg(kern, 2, sizeof(cl_float), &kc); + clSetKernelArg(kern, 3, sizeof(cl_int), &image->width); + clSetKernelArg(kern, 4, sizeof(cl_float), &image->det.panels[0].cx); + clSetKernelArg(kern, 5, sizeof(cl_float), &image->det.panels[0].cy); + clSetKernelArg(kern, 6, sizeof(cl_float), &image->resolution); + clSetKernelArg(kern, 7, sizeof(cl_float), &image->camera_len); + clSetKernelArg(kern, 8, sizeof(cl_float16), &cell); + clSetKernelArg(kern, 9, sizeof(cl_mem), &sfacs); + + STATUS("Running...\n"); + err = clEnqueueNDRangeKernel(cq, kern, 2, NULL, dims, NULL, + 0, NULL, NULL); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't enqueue kernel\n"); + return; + } + + diff_ptr = clEnqueueMapBuffer(cq, diff, CL_TRUE, CL_MAP_READ, 0, + diff_size, 0, NULL, NULL, &err); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't map sfac buffer\n"); + return; + } + tt_ptr = clEnqueueMapBuffer(cq, tt, CL_TRUE, CL_MAP_READ, 0, + tt_size, 0, NULL, NULL, &err); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't map tt buffer\n"); + return; + } + STATUS("Done!\n"); + + image->sfacs = calloc(image->width * image->height, + sizeof(double complex)); + image->twotheta = calloc(image->width * image->height, sizeof(double)); + + for ( x=0; x<image->width; x++ ) { + for ( y=0; y<image->height; y++ ) { + + float re, im, tt; + + re = diff_ptr[2*(x + image->width*y)+0]; + im = diff_ptr[2*(x + image->width*y)+1]; + tt = tt_ptr[x + image->width*y]; + + image->sfacs[x + image->width*y] = re + I*im; + image->twotheta[x + image->width*y] = tt; + + } + } + + clReleaseProgram(prog); + clReleaseMemObject(sfacs); + clReleaseCommandQueue(cq); + clReleaseContext(ctx); + +} diff --git a/src/diffraction-gpu.h b/src/diffraction-gpu.h new file mode 100644 index 00000000..687fecf3 --- /dev/null +++ b/src/diffraction-gpu.h @@ -0,0 +1,34 @@ +/* + * diffraction-gpu.h + * + * Calculate diffraction patterns by Fourier methods (GPU version) + * + * (c) 2006-2010 Thomas White <taw@physics.org> + * + * Part of CrystFEL - crystallography with a FEL + * + */ + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#ifndef DIFFRACTION_GPU_H +#define DIFFRACTION_GPU_H + +#include "image.h" +#include "cell.h" + +#if HAVE_OPENCL +extern void get_diffraction_gpu(struct image *image, int na, int nb, int nc, + int nosfac); +#else +static void get_diffraction_gpu(struct image *image, int na, int nb, int nc, + int nosfac) +{ + /* Do nothing */ + ERROR("This copy of CrystFEL was not compiled with OpenCL support.\n"); +} +#endif + +#endif /* DIFFRACTION_GPU_H */ diff --git a/src/diffraction.c b/src/diffraction.c index 88b50d1f..fb993bf6 100644 --- a/src/diffraction.c +++ b/src/diffraction.c @@ -23,9 +23,9 @@ #include "sfac.h" -#define SAMPLING (5) -#define BWSAMPLING (10) -#define BANDWIDTH (1.0 / 100.0) +#define SAMPLING (1) +#define BWSAMPLING (1) +#define BANDWIDTH (0.0 / 100.0) static double lattice_factor(struct rvec q, double ax, double ay, double az, diff --git a/src/image.h b/src/image.h index 144ebe42..78194433 100644 --- a/src/image.h +++ b/src/image.h @@ -88,8 +88,8 @@ struct image { * If FORMULATION_PIXELSIZE, then pixel_size only is needed.*/ FormulationMode fmode; double pixel_size; - double camera_len; - double resolution; /* pixels per metre */ + float camera_len; + float resolution; /* pixels per metre */ /* Wavelength must always be given */ double lambda; /* Wavelength in m */ diff --git a/src/pattern_sim.c b/src/pattern_sim.c index d0aad29b..e3d675d2 100644 --- a/src/pattern_sim.c +++ b/src/pattern_sim.c @@ -21,6 +21,7 @@ #include "image.h" #include "diffraction.h" +#include "diffraction-gpu.h" #include "cell.h" #include "utils.h" #include "hdf5-file.h" @@ -38,6 +39,7 @@ static void show_help(const char *s) "\n" " -h, --help Display this help message.\n" " --simulation-details Show technical details of the simulation.\n" +" --gpu Use the GPU to speed up the calculation.\n" "\n" " --near-bragg Output h,k,l,I near Bragg conditions.\n" " -n, --number=<N> Generate N images. Default 1.\n" @@ -157,6 +159,7 @@ int main(int argc, char *argv[]) int config_nonoise = 0; int config_nobloom = 0; int config_nosfac = 0; + int config_gpu = 0; int ndone = 0; /* Number of simulations done (images or not) */ int number = 1; /* Number used for filename of image */ int n_images = 1; /* Generate one image by default */ @@ -166,6 +169,7 @@ int main(int argc, char *argv[]) const struct option longopts[] = { {"help", 0, NULL, 'h'}, {"simulation-details", 0, &config_simdetails, 1}, + {"gpu", 0, &config_gpu, 1}, {"near-bragg", 0, &config_nearbragg, 1}, {"random-orientation", 0, NULL, 'r'}, {"number", 1, NULL, 'n'}, @@ -274,11 +278,20 @@ int main(int argc, char *argv[]) image.twotheta = NULL; image.hdr = NULL; - get_diffraction(&image, na, nb, nc, config_nosfac); + if ( config_gpu ) { + get_diffraction_gpu(&image, na, nb, nc, config_nosfac); + } else { + get_diffraction(&image, na, nb, nc, config_nosfac); + } if ( image.molecule == NULL ) { ERROR("Couldn't open molecule.pdb\n"); return 1; } + if ( image.sfacs == NULL ) { + ERROR("Diffraction calculation failed.\n"); + goto skip; + } + record_image(&image, !config_nowater, !config_nonoise, !config_nobloom); @@ -305,6 +318,7 @@ int main(int argc, char *argv[]) free(image.sfacs); free(image.twotheta); +skip: ndone++; if ( n_images && (ndone >= n_images) ) done = 1; |