diff options
author | Thomas White <taw@physics.org> | 2011-03-02 18:40:28 +0100 |
---|---|---|
committer | Thomas White <taw@physics.org> | 2012-02-22 15:27:16 +0100 |
commit | c708b2162f76a228235983f183f6250dc68522c4 (patch) | |
tree | 223e0f62003f7529f073ba35001c1ec4712f668c /src | |
parent | 356b10b53e51d1ec695d9b6a09bd9fab6b46a0f3 (diff) |
Fix GPU code for new geometry, and tidy up some detector stuff (needs debugging)
Diffstat (limited to 'src')
-rw-r--r-- | src/cl-utils.c | 10 | ||||
-rw-r--r-- | src/detector.c | 41 | ||||
-rw-r--r-- | src/diffraction-gpu.c | 208 |
3 files changed, 128 insertions, 131 deletions
diff --git a/src/cl-utils.c b/src/cl-utils.c index 0c6dce27..7c1b23a3 100644 --- a/src/cl-utils.c +++ b/src/cl-utils.c @@ -3,17 +3,25 @@ * * OpenCL utility functions * - * (c) 2006-2010 Thomas White <taw@physics.org> + * (c) 2006-2011 Thomas White <taw@physics.org> * * Part of CrystFEL - crystallography with a FEL * */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif #include <stdlib.h> #include <stdio.h> #include <string.h> + +#ifdef HAVE_CL_CL_H +#include <CL/cl.h> +#else #include <cl.h> +#endif #include "utils.h" diff --git a/src/detector.c b/src/detector.c index eb5d1083..e53d5331 100644 --- a/src/detector.c +++ b/src/detector.c @@ -129,6 +129,12 @@ void record_image(struct image *image, int do_poisson) double ph_per_e; double area; double max_tt = 0.0; + int n_inf1 = 0; + int n_neg1 = 0; + int n_nan1 = 0; + int n_inf2 = 0; + int n_neg2 = 0; + int n_nan2 = 0; /* How many photons are scattered per electron? */ area = M_PI*pow(image->beam->beam_radius, 2.0); @@ -152,15 +158,9 @@ void record_image(struct image *image, int do_poisson) struct panel *p; intensity = (double)image->data[x + image->width*y]; - if ( isinf(intensity) ) { - ERROR("Infinity at %i,%i\n", x, y); - } - if ( intensity < 0.0 ) { - ERROR("Negative at %i,%i\n", x, y); - } - if ( isnan(intensity) ) { - ERROR("NaN at %i,%i\n", x, y); - } + if ( isinf(intensity) ) n_inf1++; + if ( intensity < 0.0 ) n_neg1++; + if ( isnan(intensity) ) n_nan1++; p = find_panel(image->det, x, y); @@ -191,15 +191,11 @@ void record_image(struct image *image, int do_poisson) image->data[x + image->width*y] = counts * image->beam->adu_per_photon; - if ( isinf(image->data[x+image->width*y]) ) { - ERROR("Processed infinity at %i,%i\n", x, y); - } - if ( isnan(image->data[x+image->width*y]) ) { - ERROR("Processed NaN at %i,%i\n", x, y); - } - if ( image->data[x+image->width*y] < 0.0 ) { - ERROR("Processed negative at %i,%i %f\n", x, y, counts); - } + + /* Sanity checks */ + if ( isinf(image->data[x+image->width*y]) ) n_inf2++; + if ( isnan(image->data[x+image->width*y]) ) n_nan2++; + if ( image->data[x+image->width*y] < 0.0 ) n_neg2++; if ( image->twotheta[x + image->width*y] > max_tt ) { max_tt = image->twotheta[x + image->width*y]; @@ -221,6 +217,15 @@ void record_image(struct image *image, int do_poisson) rad2deg(tt_side), (image->lambda/(2.0*sin(tt_side/2.0)))/1e-9); STATUS("Halve the d values to get the voxel size for a synthesis.\n"); + + if ( n_neg1 + n_inf1 + n_nan1 + n_neg2 + n_inf2 + n_nan2 ) { + ERROR("WARNING: The raw calculation produced %i negative" + " values, %i infinities and %i NaNs.\n", + n_neg1, n_inf1, n_nan1); + ERROR("WARNING: After processing, there were %i negative" + " values, %i infinities and %i NaNs.\n", + n_neg2, n_inf2, n_nan2); + } } diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c index dd382ede..2f4fee77 100644 --- a/src/diffraction-gpu.c +++ b/src/diffraction-gpu.c @@ -3,19 +3,27 @@ * * Calculate diffraction patterns by Fourier methods (GPU version) * - * (c) 2006-2010 Thomas White <taw@physics.org> + * (c) 2006-2011 Thomas White <taw@physics.org> * * Part of CrystFEL - crystallography with a FEL * */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif #include <stdlib.h> #include <math.h> #include <stdio.h> #include <string.h> #include <complex.h> + +#ifdef HAVE_CL_CL_H +#include <CL/cl.h> +#else #include <cl.h> +#endif #include "image.h" #include "utils.h" @@ -107,6 +115,50 @@ static void check_sinc_lut(struct gpu_context *gctx, int n) } +static int sfloat(struct gpu_context *gctx, int idx, float val) +{ + cl_int err; + err = clSetKernelArg(gctx->kern, idx, sizeof(cl_float), &val); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set kernel argument %i: %s\n", + idx, clError(err)); + return 1; + } + + return 0; +} + + +static int setint(struct gpu_context *gctx, int idx, int val) +{ + cl_int err; + + err = clSetKernelArg(gctx->kern, idx, sizeof(cl_int), &val); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set kernel argument %i: %s\n", + idx, clError(err)); + return 1; + } + + return 0; +} + + +static int setmem(struct gpu_context *gctx, int idx, cl_mem val) +{ + cl_int err; + + err = clSetKernelArg(gctx->kern, idx, sizeof(cl_mem), &val); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set kernel argument %i: %s\n", + idx, clError(err)); + return 1; + } + + return 0; +} + + void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, int na, int nb, int nc, UnitCell *ucell) { @@ -124,6 +176,10 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, cl_int4 ncells; const int sampling = SAMPLING; cl_float bwstep; + int n_inf = 0; + int n_neg = 0; + int n_nan = 0; + if ( gctx == NULL ) { ERROR("GPU setup failed.\n"); @@ -150,82 +206,34 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, check_sinc_lut(gctx, nb); check_sinc_lut(gctx, nc); - err = clSetKernelArg(gctx->kern, 0, sizeof(cl_mem), &gctx->diff); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 0: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 1, sizeof(cl_mem), &gctx->tt); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 1: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 2, sizeof(cl_float), &klow); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 2: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 3, sizeof(cl_int), &image->width); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 3: %s\n", clError(err)); - return; - } + if ( setmem(gctx, 0, gctx->diff) ) return; + if ( setmem(gctx, 1, gctx->tt) ) return; + if ( setmem(gctx, 9, gctx->intensities) ) return; + if ( setmem(gctx, 15, gctx->sinc_luts[na-1]) ) return; + if ( setmem(gctx, 16, gctx->sinc_luts[nb-1]) ) return; + if ( setmem(gctx, 17, gctx->sinc_luts[nc-1]) ) return; + if ( setmem(gctx, 18, gctx->flags) ) return; + + /* Unit cell */ clSetKernelArg(gctx->kern, 8, sizeof(cl_float16), &cell); if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 8: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 9, sizeof(cl_mem), &gctx->intensities); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 9: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 12, sizeof(cl_int), &sampling); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 12: %s\n", clError(err)); + ERROR("Couldn't set unit cell: %s\n", clError(err)); return; } + /* Local memory for reduction */ clSetKernelArg(gctx->kern, 13, BWSAMPLING*SAMPLING*SAMPLING*sizeof(cl_float), NULL); if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 13: %s\n", clError(err)); - return; - } - /* Bandwidth sampling step */ - clSetKernelArg(gctx->kern, 14, sizeof(cl_float), &bwstep); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 14: %s\n", clError(err)); - return; - } - - /* LUT in 'a' direction */ - clSetKernelArg(gctx->kern, 15, sizeof(cl_mem), &gctx->sinc_luts[na-1]); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 15: %s\n", clError(err)); + ERROR("Couldn't set local memory: %s\n", clError(err)); return; } - /* LUT in 'b' direction */ - clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->sinc_luts[nb-1]); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 16: %s\n", clError(err)); - return; - } - /* LUT in 'c' direction */ - clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->sinc_luts[nc-1]); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 17: %s\n", clError(err)); - return; - } - - /* Flag array */ - clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->flags); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set flag array: %s\n", clError(err)); - return; - } + if ( sfloat(gctx, 2, klow) ) return; + if ( setint(gctx, 3, image->width) ) return; + if ( setint(gctx, 12, sampling) ) return; + if ( sfloat(gctx, 14, bwstep) ) return; /* Iterate over panels */ event = malloc(image->det->n_panels * sizeof(cl_event)); @@ -236,48 +244,24 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, /* In a future version of OpenCL, this could be done * with a global work offset. But not yet... */ - dims[0] = 1+image->det->panels[0].max_x-image->det->panels[0].min_x; - dims[1] = 1+image->det->panels[0].max_y-image->det->panels[0].min_y; + dims[0] = 1+image->det->panels[p].max_fs + -image->det->panels[p].min_fs; + dims[1] = 1+image->det->panels[p].max_ss + -image->det->panels[p].min_ss; dims[0] *= SAMPLING; dims[1] *= SAMPLING; dims[2] = BWSAMPLING; - clSetKernelArg(gctx->kern, 4, sizeof(cl_float), - &image->det->panels[p].cx); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 4: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 5, sizeof(cl_float), - &image->det->panels[p].cy); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 5: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 6, sizeof(cl_float), - &image->det->panels[p].res); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 6: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 7, sizeof(cl_float), - &image->det->panels[p].clen); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 7: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 10, sizeof(cl_int), - &image->det->panels[p].min_x); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 10: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 11, sizeof(cl_int), - &image->det->panels[p].min_y); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 11: %s\n", clError(err)); - return; - } + if ( sfloat(gctx, 4, image->det->panels[p].cnx) ) return; + if ( sfloat(gctx, 5, image->det->panels[p].cny) ) return; + if ( sfloat(gctx, 6, image->det->panels[p].res) ) return; + if ( sfloat(gctx, 7, image->det->panels[p].clen) ) return; + if ( setint(gctx, 10, image->det->panels[p].min_fs) ) return; + if ( setint(gctx, 11, image->det->panels[p].min_ss) ) return; + if ( sfloat(gctx, 19, image->det->panels[p].fsx) ) return; + if ( sfloat(gctx, 19, image->det->panels[p].fsy) ) return; + if ( sfloat(gctx, 20, image->det->panels[p].ssx) ) return; + if ( sfloat(gctx, 21, image->det->panels[p].ssy) ) return; err = clEnqueueNDRangeKernel(gctx->cq, gctx->kern, 3, NULL, dims, ldims, 0, NULL, &event[p]); @@ -314,15 +298,9 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, float val, tt; val = diff_ptr[x + image->width*y]; - if ( isinf(val) ) { - ERROR("Extracting infinity at %i,%i\n", x, y); - } - if ( val < 0.0 ) { - ERROR("Extracting negative at %i,%i\n", x, y); - } - if ( isnan(val) ) { - ERROR("Extracting NaN at %i,%i\n", x, y); - } + if ( isinf(val) ) n_inf++; + if ( val < 0.0 ) n_neg++; + if ( isnan(val) ) n_nan++; tt = tt_ptr[x + image->width*y]; image->data[x + image->width*y] = val; @@ -331,6 +309,12 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, } } + if ( n_neg + n_inf + n_nan ) { + ERROR("WARNING: The GPU calculation produced %i negative" + " values, %i infinities and %i NaNs.\n", + n_neg, n_inf, n_nan); + } + clEnqueueUnmapMemObject(gctx->cq, gctx->diff, diff_ptr, 0, NULL, NULL); clEnqueueUnmapMemObject(gctx->cq, gctx->tt, tt_ptr, 0, NULL, NULL); } |