diff options
author | Thomas White <taw@physics.org> | 2011-03-02 18:40:28 +0100 |
---|---|---|
committer | Thomas White <taw@physics.org> | 2012-02-22 15:27:16 +0100 |
commit | c708b2162f76a228235983f183f6250dc68522c4 (patch) | |
tree | 223e0f62003f7529f073ba35001c1ec4712f668c | |
parent | 356b10b53e51d1ec695d9b6a09bd9fab6b46a0f3 (diff) |
Fix GPU code for new geometry, and tidy up some detector stuff (needs debugging)
-rw-r--r-- | config.h.in | 7 | ||||
-rwxr-xr-x | configure | 57 | ||||
-rw-r--r-- | configure.ac | 37 | ||||
-rw-r--r-- | data/diffraction.cl | 49 | ||||
-rw-r--r-- | src/cl-utils.c | 10 | ||||
-rw-r--r-- | src/detector.c | 41 | ||||
-rw-r--r-- | src/diffraction-gpu.c | 208 |
7 files changed, 234 insertions, 175 deletions
diff --git a/config.h.in b/config.h.in index 80cbca26..48575a27 100644 --- a/config.h.in +++ b/config.h.in @@ -18,6 +18,9 @@ /* Define to 1 if Cairo is available */ #undef HAVE_CAIRO +/* Define to 1 if CL/cl.h should be used */ +#undef HAVE_CL_CL_H + /* Define to 1 if you have the declaration of `strndup', and to 0 if you don't. */ #undef HAVE_DECL_STRNDUP @@ -157,10 +160,6 @@ /* Define if you have the 'wchar_t' type. */ #undef HAVE_WCHAR_T -/* Define to 1 if you have the - </System/Library/Frameworks/OpenCL.framework/Headers/cl.h> header file. */ -#undef HAVE__SYSTEM_LIBRARY_FRAMEWORKS_OPENCL_FRAMEWORK_HEADERS_CL_H - /* Define to a substitute value for mmap()'s MAP_ANONYMOUS flag. */ #undef MAP_ANONYMOUS @@ -809,7 +809,7 @@ enable_silent_rules with_hdf5 with_gsl enable_gsl_fudge -enable_opencl +with_opencl enable_gtk enable_gtktest enable_png @@ -1464,7 +1464,6 @@ Optional Features: --disable-silent-rules verbose build output (undo: `make V=0') --enable-gsl-fudge Use this option if you get lots of linker errors concerning GSL and CBLAS - --enable-opencl Enable the use of OpenCL --disable-gtk Disable GTK+/GLib --disable-gtktest do not try to compile and run a test GTK+ program --disable-png Disable the use of libPNG @@ -1477,6 +1476,7 @@ Optional Packages: --with-hdf5 specify location of HDF5 library --with-gsl specify location of GSL (instead of using pkg-config) + --with-opencl Use OpenCL --with-libtiff specify location of libTIFF library Some influential environment variables: @@ -6442,37 +6442,56 @@ $as_echo "no" >&6; } fi -# Check whether --enable-opencl was given. -if test "${enable_opencl+set}" = set; then : - enableval=$enable_opencl; + + +# Check whether --with-opencl was given. +if test "${with_opencl+set}" = set; then : + withval=$with_opencl; fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to use OpenCL" >&5 $as_echo_n "checking whether to use OpenCL... " >&6; } -if test "x$enable_opencl" == "xyes"; then : +if test "x$with_opencl" == "xyes"; then : { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 $as_echo "yes" >&6; } - for ac_header in /System/Library/Frameworks/OpenCL.framework/Headers/cl.h -do : - ac_fn_c_check_header_mongrel "$LINENO" "/System/Library/Frameworks/OpenCL.framework/Headers/cl.h" "ac_cv_header__System_Library_Frameworks_OpenCL_framework_Headers_cl_h" "$ac_includes_default" -if test "x$ac_cv_header__System_Library_Frameworks_OpenCL_framework_Headers_cl_h" = x""yes; then : - cat >>confdefs.h <<_ACEOF -#define HAVE__SYSTEM_LIBRARY_FRAMEWORKS_OPENCL_FRAMEWORK_HEADERS_CL_H 1 -_ACEOF + { $as_echo "$as_me:${as_lineno-$LINENO}: checking cl.h" >&5 +$as_echo_n "checking cl.h... " >&6; } + if test -f /System/Library/Frameworks/OpenCL.framework/Headers/cl.h; then : + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: /System/Library/Frameworks/OpenCL.framework/Headers/cl.h" >&5 +$as_echo "/System/Library/Frameworks/OpenCL.framework/Headers/cl.h" >&6; } OPENCL_CFLAGS="-I/System/Library/Frameworks/OpenCL.framework/Headers" OPENCL_LIBS="-framework OpenCL" -else +fi + if test -f /usr/local/cuda/cuda/include/CL/cl.h; then : - OPENCL_CFLAGS="-I/usr/include/CL" + { $as_echo "$as_me:${as_lineno-$LINENO}: result: /usr/local/cuda/cuda/include/CL/cl.h" >&5 +$as_echo "/usr/local/cuda/cuda/include/CL/cl.h" >&6; } + OPENCL_CFLAGS="-I/usr/local/cuda/cuda/include" OPENCL_LIBS="-lOpenCL" + cl_cl_h=true fi + if test -f /usr/local/cuda/include/CL/cl.h; then : -done + { $as_echo "$as_me:${as_lineno-$LINENO}: result: /usr/local/cuda/include/CL/cl.h" >&5 +$as_echo "/usr/local/cuda/include/CL/cl.h" >&6; } + OPENCL_CFLAGS="-I/usr/local/cuda/include" + OPENCL_LIBS="-lOpenCL" + cl_cl_h=true +else + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found, assuming /usr/include/CL/cl.h" >&5 +$as_echo "not found, assuming /usr/include/CL/cl.h" >&6; } + OPENCL_CFLAGS="" + OPENCL_LIBS="-lOpenCL" + cl_cl_h=true + +fi $as_echo "#define HAVE_OPENCL 1" >>confdefs.h @@ -6492,8 +6511,14 @@ else HAVE_OPENCL_FALSE= fi +if test "x$cl_cl_h" == "xtrue"; then : +$as_echo "#define HAVE_CL_CL_H 1" >>confdefs.h + + +fi + # Check whether --enable-gtk was given. if test "${enable_gtk+set}" = set; then : enableval=$enable_gtk; diff --git a/configure.ac b/configure.ac index f7eac7e6..3674d190 100644 --- a/configure.ac +++ b/configure.ac @@ -53,28 +53,49 @@ AS_IF([test "x$enable_gsl_fudge" == "xyes"], AC_MSG_RESULT([no]) ]) -AC_ARG_ENABLE(opencl, AS_HELP_STRING([--enable-opencl], [Enable the use of OpenCL])) + +AC_ARG_WITH(opencl, AS_HELP_STRING([--with-opencl], [Use OpenCL])) AC_MSG_CHECKING([whether to use OpenCL]) -AS_IF([test "x$enable_opencl" == "xyes"], +AS_IF([test "x$with_opencl" == "xyes"], [ AC_MSG_RESULT([yes]) - AC_CHECK_HEADERS([/System/Library/Frameworks/OpenCL.framework/Headers/cl.h], + + AC_MSG_CHECKING([cl.h]) + AS_IF([test -f /System/Library/Frameworks/OpenCL.framework/Headers/cl.h], [ + AC_MSG_RESULT([/System/Library/Frameworks/OpenCL.framework/Headers/cl.h]) OPENCL_CFLAGS="-I/System/Library/Frameworks/OpenCL.framework/Headers" OPENCL_LIBS="-framework OpenCL" - ], + ]) + AS_IF([test -f /usr/local/cuda/cuda/include/CL/cl.h], [ - OPENCL_CFLAGS="-I/usr/include/CL" + AC_MSG_RESULT([/usr/local/cuda/cuda/include/CL/cl.h]) + OPENCL_CFLAGS="-I/usr/local/cuda/cuda/include" OPENCL_LIBS="-lOpenCL" + cl_cl_h=true + ]) + AS_IF([test -f /usr/local/cuda/include/CL/cl.h], + [ + AC_MSG_RESULT([/usr/local/cuda/include/CL/cl.h]) + OPENCL_CFLAGS="-I/usr/local/cuda/include" + OPENCL_LIBS="-lOpenCL" + cl_cl_h=true + ], [ + AC_MSG_RESULT([not found, assuming /usr/include/CL/cl.h]) + OPENCL_CFLAGS="" + OPENCL_LIBS="-lOpenCL" + cl_cl_h=true ]) AC_DEFINE([HAVE_OPENCL], [1], [Define to 1 if OpenCL is available]) have_opencl=true -], -[ +], [ AC_MSG_RESULT([no]) ]) AM_CONDITIONAL([HAVE_OPENCL], test x$have_opencl = xtrue) - +AS_IF([test "x$cl_cl_h" == "xtrue"], +[ + AC_DEFINE([HAVE_CL_CL_H], [1], [Define to 1 if CL/cl.h should be used]) +]) AC_ARG_ENABLE(gtk, AS_HELP_STRING([--disable-gtk], [Disable GTK+/GLib])) havegtk=false diff --git a/data/diffraction.cl b/data/diffraction.cl index 9eae1b4c..dbbad265 100644 --- a/data/diffraction.cl +++ b/data/diffraction.cl @@ -24,15 +24,20 @@ const sampler_t sampler_c = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_REPEAT | CLK_FILTER_LINEAR; -float4 get_q(int x, int y, float cx, float cy, float res, float clen, float k, - float *ttp, int sampling) +float4 get_q(int fs, int ss, float res, float clen, float k, + float *ttp, float corner_x, float corner_y, + float fsx, float fsy, float ssx, float ssy) { float rx, ry, r; float az, tt; float4 q; + float xs, ys; - rx = ((float)x - sampling*cx)/(res*sampling); - ry = ((float)y - sampling*cy)/(res*sampling); + xs = fs*fsx + ss*ssx; + ys = fs*fsy + ss*ssy; + + rx = (xs + corner_x) / res; + ry = (ys + corner_y) / res; r = sqrt(pow(rx, 2.0f) + pow(ry, 2.0f)); @@ -185,19 +190,19 @@ float molecule_factor(global float *intensities, global float *flags, kernel void diffraction(global float *diff, global float *tt, float klow, - int w, float cx, float cy, + int w, float corner_x, float corner_y, float res, float clen, float16 cell, global float *intensities, - int xmin, int ymin, int sampling, local float *tmp, + int min_fs, int min_ss, int sampling, local float *tmp, float kstep, read_only image2d_t func_a, read_only image2d_t func_b, read_only image2d_t func_c, - global float *flags) + global float *flags, + float fsx, float fsy, float ssx, float ssy) { float ttv; - const int x = get_global_id(0) + (xmin*sampling); - const int y = get_global_id(1) + (ymin*sampling); + float fs, ss; float f_lattice, I_lattice; float I_molecule; float4 q; @@ -205,12 +210,21 @@ kernel void diffraction(global float *diff, global float *tt, float klow, const int ly = get_local_id(1); const int lb = get_local_id(2); float k = klow + kstep * get_local_id(2); - const int ax = x / sampling; - const int ay = y / sampling; + const int afs = fs / sampling; + const int ass = ss / sampling; /* Array index of target pixel */ float intensity; - /* Calculate value */ - q = get_q(x, y, cx, cy, res, clen, k, &ttv, sampling); + /* Calculate fractional coordinates in fs/ss */ + fs = convert_float(get_global_id(0) + (min_fs*sampling)) + / convert_float(sampling); + ss = convert_float(get_global_id(1) + (min_ss*sampling)) + / convert_float(sampling); + + /* Get the scattering vector */ + q = get_q(fs, ss, res, clen, k, &ttv, + corner_x, corner_y, fsx, fsy, ssx, ssy); + + /* Calculate the diffraction */ f_lattice = lattice_factor(cell, q, func_a, func_b, func_c); I_molecule = molecule_factor(intensities, flags, cell, q); I_lattice = pow(f_lattice, 2.0f); @@ -228,16 +242,19 @@ kernel void diffraction(global float *diff, global float *tt, float klow, int i; float sum = 0.0; float val; + int idx; + + idx = convert_int_rtz(fs)+w*convert_int_rtz(ss); for ( i=0; i<sampling*sampling*get_local_size(2); i++ ) sum += tmp[i]; - val = sum / (float)(sampling*sampling*get_local_size(2)); - diff[ax+w*ay] = val; + val = sum / convert_float(sampling*sampling*get_local_size(2)); + diff[idx] = val; /* Leader thread also records the 2theta value. * This should really be averaged across all pixels, but * I strongly suspect this would be a waste of time. */ - tt[ax+w*ay] = ttv; + tt[idx] = ttv; } } diff --git a/src/cl-utils.c b/src/cl-utils.c index 0c6dce27..7c1b23a3 100644 --- a/src/cl-utils.c +++ b/src/cl-utils.c @@ -3,17 +3,25 @@ * * OpenCL utility functions * - * (c) 2006-2010 Thomas White <taw@physics.org> + * (c) 2006-2011 Thomas White <taw@physics.org> * * Part of CrystFEL - crystallography with a FEL * */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif #include <stdlib.h> #include <stdio.h> #include <string.h> + +#ifdef HAVE_CL_CL_H +#include <CL/cl.h> +#else #include <cl.h> +#endif #include "utils.h" diff --git a/src/detector.c b/src/detector.c index eb5d1083..e53d5331 100644 --- a/src/detector.c +++ b/src/detector.c @@ -129,6 +129,12 @@ void record_image(struct image *image, int do_poisson) double ph_per_e; double area; double max_tt = 0.0; + int n_inf1 = 0; + int n_neg1 = 0; + int n_nan1 = 0; + int n_inf2 = 0; + int n_neg2 = 0; + int n_nan2 = 0; /* How many photons are scattered per electron? */ area = M_PI*pow(image->beam->beam_radius, 2.0); @@ -152,15 +158,9 @@ void record_image(struct image *image, int do_poisson) struct panel *p; intensity = (double)image->data[x + image->width*y]; - if ( isinf(intensity) ) { - ERROR("Infinity at %i,%i\n", x, y); - } - if ( intensity < 0.0 ) { - ERROR("Negative at %i,%i\n", x, y); - } - if ( isnan(intensity) ) { - ERROR("NaN at %i,%i\n", x, y); - } + if ( isinf(intensity) ) n_inf1++; + if ( intensity < 0.0 ) n_neg1++; + if ( isnan(intensity) ) n_nan1++; p = find_panel(image->det, x, y); @@ -191,15 +191,11 @@ void record_image(struct image *image, int do_poisson) image->data[x + image->width*y] = counts * image->beam->adu_per_photon; - if ( isinf(image->data[x+image->width*y]) ) { - ERROR("Processed infinity at %i,%i\n", x, y); - } - if ( isnan(image->data[x+image->width*y]) ) { - ERROR("Processed NaN at %i,%i\n", x, y); - } - if ( image->data[x+image->width*y] < 0.0 ) { - ERROR("Processed negative at %i,%i %f\n", x, y, counts); - } + + /* Sanity checks */ + if ( isinf(image->data[x+image->width*y]) ) n_inf2++; + if ( isnan(image->data[x+image->width*y]) ) n_nan2++; + if ( image->data[x+image->width*y] < 0.0 ) n_neg2++; if ( image->twotheta[x + image->width*y] > max_tt ) { max_tt = image->twotheta[x + image->width*y]; @@ -221,6 +217,15 @@ void record_image(struct image *image, int do_poisson) rad2deg(tt_side), (image->lambda/(2.0*sin(tt_side/2.0)))/1e-9); STATUS("Halve the d values to get the voxel size for a synthesis.\n"); + + if ( n_neg1 + n_inf1 + n_nan1 + n_neg2 + n_inf2 + n_nan2 ) { + ERROR("WARNING: The raw calculation produced %i negative" + " values, %i infinities and %i NaNs.\n", + n_neg1, n_inf1, n_nan1); + ERROR("WARNING: After processing, there were %i negative" + " values, %i infinities and %i NaNs.\n", + n_neg2, n_inf2, n_nan2); + } } diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c index dd382ede..2f4fee77 100644 --- a/src/diffraction-gpu.c +++ b/src/diffraction-gpu.c @@ -3,19 +3,27 @@ * * Calculate diffraction patterns by Fourier methods (GPU version) * - * (c) 2006-2010 Thomas White <taw@physics.org> + * (c) 2006-2011 Thomas White <taw@physics.org> * * Part of CrystFEL - crystallography with a FEL * */ +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif #include <stdlib.h> #include <math.h> #include <stdio.h> #include <string.h> #include <complex.h> + +#ifdef HAVE_CL_CL_H +#include <CL/cl.h> +#else #include <cl.h> +#endif #include "image.h" #include "utils.h" @@ -107,6 +115,50 @@ static void check_sinc_lut(struct gpu_context *gctx, int n) } +static int sfloat(struct gpu_context *gctx, int idx, float val) +{ + cl_int err; + err = clSetKernelArg(gctx->kern, idx, sizeof(cl_float), &val); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set kernel argument %i: %s\n", + idx, clError(err)); + return 1; + } + + return 0; +} + + +static int setint(struct gpu_context *gctx, int idx, int val) +{ + cl_int err; + + err = clSetKernelArg(gctx->kern, idx, sizeof(cl_int), &val); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set kernel argument %i: %s\n", + idx, clError(err)); + return 1; + } + + return 0; +} + + +static int setmem(struct gpu_context *gctx, int idx, cl_mem val) +{ + cl_int err; + + err = clSetKernelArg(gctx->kern, idx, sizeof(cl_mem), &val); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set kernel argument %i: %s\n", + idx, clError(err)); + return 1; + } + + return 0; +} + + void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, int na, int nb, int nc, UnitCell *ucell) { @@ -124,6 +176,10 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, cl_int4 ncells; const int sampling = SAMPLING; cl_float bwstep; + int n_inf = 0; + int n_neg = 0; + int n_nan = 0; + if ( gctx == NULL ) { ERROR("GPU setup failed.\n"); @@ -150,82 +206,34 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, check_sinc_lut(gctx, nb); check_sinc_lut(gctx, nc); - err = clSetKernelArg(gctx->kern, 0, sizeof(cl_mem), &gctx->diff); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 0: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 1, sizeof(cl_mem), &gctx->tt); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 1: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 2, sizeof(cl_float), &klow); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 2: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 3, sizeof(cl_int), &image->width); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 3: %s\n", clError(err)); - return; - } + if ( setmem(gctx, 0, gctx->diff) ) return; + if ( setmem(gctx, 1, gctx->tt) ) return; + if ( setmem(gctx, 9, gctx->intensities) ) return; + if ( setmem(gctx, 15, gctx->sinc_luts[na-1]) ) return; + if ( setmem(gctx, 16, gctx->sinc_luts[nb-1]) ) return; + if ( setmem(gctx, 17, gctx->sinc_luts[nc-1]) ) return; + if ( setmem(gctx, 18, gctx->flags) ) return; + + /* Unit cell */ clSetKernelArg(gctx->kern, 8, sizeof(cl_float16), &cell); if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 8: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 9, sizeof(cl_mem), &gctx->intensities); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 9: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 12, sizeof(cl_int), &sampling); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 12: %s\n", clError(err)); + ERROR("Couldn't set unit cell: %s\n", clError(err)); return; } + /* Local memory for reduction */ clSetKernelArg(gctx->kern, 13, BWSAMPLING*SAMPLING*SAMPLING*sizeof(cl_float), NULL); if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 13: %s\n", clError(err)); - return; - } - /* Bandwidth sampling step */ - clSetKernelArg(gctx->kern, 14, sizeof(cl_float), &bwstep); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 14: %s\n", clError(err)); - return; - } - - /* LUT in 'a' direction */ - clSetKernelArg(gctx->kern, 15, sizeof(cl_mem), &gctx->sinc_luts[na-1]); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 15: %s\n", clError(err)); + ERROR("Couldn't set local memory: %s\n", clError(err)); return; } - /* LUT in 'b' direction */ - clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->sinc_luts[nb-1]); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 16: %s\n", clError(err)); - return; - } - /* LUT in 'c' direction */ - clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->sinc_luts[nc-1]); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 17: %s\n", clError(err)); - return; - } - - /* Flag array */ - clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->flags); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set flag array: %s\n", clError(err)); - return; - } + if ( sfloat(gctx, 2, klow) ) return; + if ( setint(gctx, 3, image->width) ) return; + if ( setint(gctx, 12, sampling) ) return; + if ( sfloat(gctx, 14, bwstep) ) return; /* Iterate over panels */ event = malloc(image->det->n_panels * sizeof(cl_event)); @@ -236,48 +244,24 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, /* In a future version of OpenCL, this could be done * with a global work offset. But not yet... */ - dims[0] = 1+image->det->panels[0].max_x-image->det->panels[0].min_x; - dims[1] = 1+image->det->panels[0].max_y-image->det->panels[0].min_y; + dims[0] = 1+image->det->panels[p].max_fs + -image->det->panels[p].min_fs; + dims[1] = 1+image->det->panels[p].max_ss + -image->det->panels[p].min_ss; dims[0] *= SAMPLING; dims[1] *= SAMPLING; dims[2] = BWSAMPLING; - clSetKernelArg(gctx->kern, 4, sizeof(cl_float), - &image->det->panels[p].cx); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 4: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 5, sizeof(cl_float), - &image->det->panels[p].cy); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 5: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 6, sizeof(cl_float), - &image->det->panels[p].res); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 6: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 7, sizeof(cl_float), - &image->det->panels[p].clen); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 7: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 10, sizeof(cl_int), - &image->det->panels[p].min_x); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 10: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 11, sizeof(cl_int), - &image->det->panels[p].min_y); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 11: %s\n", clError(err)); - return; - } + if ( sfloat(gctx, 4, image->det->panels[p].cnx) ) return; + if ( sfloat(gctx, 5, image->det->panels[p].cny) ) return; + if ( sfloat(gctx, 6, image->det->panels[p].res) ) return; + if ( sfloat(gctx, 7, image->det->panels[p].clen) ) return; + if ( setint(gctx, 10, image->det->panels[p].min_fs) ) return; + if ( setint(gctx, 11, image->det->panels[p].min_ss) ) return; + if ( sfloat(gctx, 19, image->det->panels[p].fsx) ) return; + if ( sfloat(gctx, 19, image->det->panels[p].fsy) ) return; + if ( sfloat(gctx, 20, image->det->panels[p].ssx) ) return; + if ( sfloat(gctx, 21, image->det->panels[p].ssy) ) return; err = clEnqueueNDRangeKernel(gctx->cq, gctx->kern, 3, NULL, dims, ldims, 0, NULL, &event[p]); @@ -314,15 +298,9 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, float val, tt; val = diff_ptr[x + image->width*y]; - if ( isinf(val) ) { - ERROR("Extracting infinity at %i,%i\n", x, y); - } - if ( val < 0.0 ) { - ERROR("Extracting negative at %i,%i\n", x, y); - } - if ( isnan(val) ) { - ERROR("Extracting NaN at %i,%i\n", x, y); - } + if ( isinf(val) ) n_inf++; + if ( val < 0.0 ) n_neg++; + if ( isnan(val) ) n_nan++; tt = tt_ptr[x + image->width*y]; image->data[x + image->width*y] = val; @@ -331,6 +309,12 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, } } + if ( n_neg + n_inf + n_nan ) { + ERROR("WARNING: The GPU calculation produced %i negative" + " values, %i infinities and %i NaNs.\n", + n_neg, n_inf, n_nan); + } + clEnqueueUnmapMemObject(gctx->cq, gctx->diff, diff_ptr, 0, NULL, NULL); clEnqueueUnmapMemObject(gctx->cq, gctx->tt, tt_ptr, 0, NULL, NULL); } |