Fix GPU code for new geometry, and tidy up some detector stuff (needs debugging)

author: Thomas White <taw@physics.org> 2011-03-02 18:40:28 +0100
committer: Thomas White <taw@physics.org> 2012-02-22 15:27:16 +0100
commit: c708b2162f76a228235983f183f6250dc68522c4 (patch)
tree: 223e0f62003f7529f073ba35001c1ec4712f668c
parent: 356b10b53e51d1ec695d9b6a09bd9fab6b46a0f3 (diff)
7 files changed, 234 insertions, 175 deletions
diff --git a/config.h.in b/config.h.in
index 80cbca26..48575a27 100644
--- a/config.h.in
+++ b/config.h.in
@@ -18,6 +18,9 @@
 /* Define to 1 if Cairo is available */
 #undef HAVE_CAIRO
 
+/* Define to 1 if CL/cl.h should be used */
+#undef HAVE_CL_CL_H
+
 /* Define to 1 if you have the declaration of `strndup', and to 0 if you
    don't. */
 #undef HAVE_DECL_STRNDUP
@@ -157,10 +160,6 @@
 /* Define if you have the 'wchar_t' type. */
 #undef HAVE_WCHAR_T
 
-/* Define to 1 if you have the
-   </System/Library/Frameworks/OpenCL.framework/Headers/cl.h> header file. */
-#undef HAVE__SYSTEM_LIBRARY_FRAMEWORKS_OPENCL_FRAMEWORK_HEADERS_CL_H
-
 /* Define to a substitute value for mmap()'s MAP_ANONYMOUS flag. */
 #undef MAP_ANONYMOUS
 
diff --git a/configure b/configure
index e30aa7c8..c060e2de 100755
--- a/configure
+++ b/configure
@@ -809,7 +809,7 @@ enable_silent_rules
 with_hdf5
 with_gsl
 enable_gsl_fudge
-enable_opencl
+with_opencl
 enable_gtk
 enable_gtktest
 enable_png
@@ -1464,7 +1464,6 @@ Optional Features:
   --disable-silent-rules         verbose build output (undo: `make V=0')
   --enable-gsl-fudge      Use this option if you get lots of linker errors
                           concerning GSL and CBLAS
-  --enable-opencl         Enable the use of OpenCL
   --disable-gtk           Disable GTK+/GLib
   --disable-gtktest       do not try to compile and run a test GTK+ program
   --disable-png           Disable the use of libPNG
@@ -1477,6 +1476,7 @@ Optional Packages:
   --with-hdf5             specify location of HDF5 library
   --with-gsl              specify location of GSL (instead of using
                           pkg-config)
+  --with-opencl           Use OpenCL
   --with-libtiff          specify location of libTIFF library
 
 Some influential environment variables:
@@ -6442,37 +6442,56 @@ $as_echo "no" >&6; }
 
 fi
 
-# Check whether --enable-opencl was given.
-if test "${enable_opencl+set}" = set; then :
-  enableval=$enable_opencl;
+
+
+# Check whether --with-opencl was given.
+if test "${with_opencl+set}" = set; then :
+  withval=$with_opencl;
 fi
 
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to use OpenCL" >&5
 $as_echo_n "checking whether to use OpenCL... " >&6; }
-if test "x$enable_opencl" == "xyes"; then :
+if test "x$with_opencl" == "xyes"; then :
 
    { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
 $as_echo "yes" >&6; }
-   for ac_header in /System/Library/Frameworks/OpenCL.framework/Headers/cl.h
-do :
-  ac_fn_c_check_header_mongrel "$LINENO" "/System/Library/Frameworks/OpenCL.framework/Headers/cl.h" "ac_cv_header__System_Library_Frameworks_OpenCL_framework_Headers_cl_h" "$ac_includes_default"
-if test "x$ac_cv_header__System_Library_Frameworks_OpenCL_framework_Headers_cl_h" = x""yes; then :
-  cat >>confdefs.h <<_ACEOF
-#define HAVE__SYSTEM_LIBRARY_FRAMEWORKS_OPENCL_FRAMEWORK_HEADERS_CL_H 1
-_ACEOF
 
+   { $as_echo "$as_me:${as_lineno-$LINENO}: checking cl.h" >&5
+$as_echo_n "checking cl.h... " >&6; }
+   if test -f /System/Library/Frameworks/OpenCL.framework/Headers/cl.h; then :
+
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: /System/Library/Frameworks/OpenCL.framework/Headers/cl.h" >&5
+$as_echo "/System/Library/Frameworks/OpenCL.framework/Headers/cl.h" >&6; }
       OPENCL_CFLAGS="-I/System/Library/Frameworks/OpenCL.framework/Headers"
       OPENCL_LIBS="-framework OpenCL"
 
-else
+fi
+   if test -f /usr/local/cuda/cuda/include/CL/cl.h; then :
 
-      OPENCL_CFLAGS="-I/usr/include/CL"
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: /usr/local/cuda/cuda/include/CL/cl.h" >&5
+$as_echo "/usr/local/cuda/cuda/include/CL/cl.h" >&6; }
+      OPENCL_CFLAGS="-I/usr/local/cuda/cuda/include"
       OPENCL_LIBS="-lOpenCL"
+      cl_cl_h=true
 
 fi
+   if test -f /usr/local/cuda/include/CL/cl.h; then :
 
-done
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: /usr/local/cuda/include/CL/cl.h" >&5
+$as_echo "/usr/local/cuda/include/CL/cl.h" >&6; }
+      OPENCL_CFLAGS="-I/usr/local/cuda/include"
+      OPENCL_LIBS="-lOpenCL"
+      cl_cl_h=true
 
+else
+
+      { $as_echo "$as_me:${as_lineno-$LINENO}: result: not found, assuming /usr/include/CL/cl.h" >&5
+$as_echo "not found, assuming /usr/include/CL/cl.h" >&6; }
+      OPENCL_CFLAGS=""
+      OPENCL_LIBS="-lOpenCL"
+      cl_cl_h=true
+
+fi
 
 $as_echo "#define HAVE_OPENCL 1" >>confdefs.h
 
@@ -6492,8 +6511,14 @@ else
   HAVE_OPENCL_FALSE=
 fi
 
+if test "x$cl_cl_h" == "xtrue"; then :
 
 
+$as_echo "#define HAVE_CL_CL_H 1" >>confdefs.h
+
+
+fi
+
 # Check whether --enable-gtk was given.
 if test "${enable_gtk+set}" = set; then :
   enableval=$enable_gtk;
diff --git a/configure.ac b/configure.ac
index f7eac7e6..3674d190 100644
--- a/configure.ac
+++ b/configure.ac
@@ -53,28 +53,49 @@ AS_IF([test "x$enable_gsl_fudge" == "xyes"],
    AC_MSG_RESULT([no])
 ])
 
-AC_ARG_ENABLE(opencl, AS_HELP_STRING([--enable-opencl], [Enable the use of OpenCL]))
+
+AC_ARG_WITH(opencl, AS_HELP_STRING([--with-opencl], [Use OpenCL]))
 AC_MSG_CHECKING([whether to use OpenCL])
-AS_IF([test "x$enable_opencl" == "xyes"],
+AS_IF([test "x$with_opencl" == "xyes"],
 [
    AC_MSG_RESULT([yes])
-   AC_CHECK_HEADERS([/System/Library/Frameworks/OpenCL.framework/Headers/cl.h],
+
+   AC_MSG_CHECKING([cl.h])
+   AS_IF([test -f /System/Library/Frameworks/OpenCL.framework/Headers/cl.h],
    [
+      AC_MSG_RESULT([/System/Library/Frameworks/OpenCL.framework/Headers/cl.h])
       OPENCL_CFLAGS="-I/System/Library/Frameworks/OpenCL.framework/Headers"
       OPENCL_LIBS="-framework OpenCL"
-   ],
+   ])
+   AS_IF([test -f /usr/local/cuda/cuda/include/CL/cl.h],
    [
-      OPENCL_CFLAGS="-I/usr/include/CL"
+      AC_MSG_RESULT([/usr/local/cuda/cuda/include/CL/cl.h])
+      OPENCL_CFLAGS="-I/usr/local/cuda/cuda/include"
       OPENCL_LIBS="-lOpenCL"
+      cl_cl_h=true
+   ])
+   AS_IF([test -f /usr/local/cuda/include/CL/cl.h],
+   [
+      AC_MSG_RESULT([/usr/local/cuda/include/CL/cl.h])
+      OPENCL_CFLAGS="-I/usr/local/cuda/include"
+      OPENCL_LIBS="-lOpenCL"
+      cl_cl_h=true
+   ], [
+      AC_MSG_RESULT([not found, assuming /usr/include/CL/cl.h])
+      OPENCL_CFLAGS=""
+      OPENCL_LIBS="-lOpenCL"
+      cl_cl_h=true
    ])
    AC_DEFINE([HAVE_OPENCL], [1], [Define to 1 if OpenCL is available])
    have_opencl=true
-],
-[
+], [
    AC_MSG_RESULT([no])
 ])
 AM_CONDITIONAL([HAVE_OPENCL], test x$have_opencl = xtrue)
-
+AS_IF([test "x$cl_cl_h" == "xtrue"],
+[
+   AC_DEFINE([HAVE_CL_CL_H], [1], [Define to 1 if CL/cl.h should be used])
+])
 
 AC_ARG_ENABLE(gtk, AS_HELP_STRING([--disable-gtk], [Disable GTK+/GLib]))
 havegtk=false
diff --git a/data/diffraction.cl b/data/diffraction.cl
index 9eae1b4c..dbbad265 100644
--- a/data/diffraction.cl
+++ b/data/diffraction.cl
@@ -24,15 +24,20 @@ const sampler_t sampler_c = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_REPEAT
                              | CLK_FILTER_LINEAR;
 
 
-float4 get_q(int x, int y, float cx, float cy, float res, float clen, float k,
-             float *ttp, int sampling)
+float4 get_q(int fs, int ss, float res, float clen, float k,
+             float *ttp, float corner_x, float corner_y,
+             float fsx, float fsy, float ssx, float ssy)
 {
 	float rx, ry, r;
 	float az, tt;
 	float4 q;
+	float xs, ys;
 
-	rx = ((float)x - sampling*cx)/(res*sampling);
-	ry = ((float)y - sampling*cy)/(res*sampling);
+	xs = fs*fsx + ss*ssx;
+	ys = fs*fsy + ss*ssy;
+
+	rx = (xs + corner_x) / res;
+	ry = (ys + corner_y) / res;
 
 	r = sqrt(pow(rx, 2.0f) + pow(ry, 2.0f));
 
@@ -185,19 +190,19 @@ float molecule_factor(global float *intensities, global float *flags,
 
 
 kernel void diffraction(global float *diff, global float *tt, float klow,
-                       int w, float cx, float cy,
+                       int w, float corner_x, float corner_y,
                        float res, float clen, float16 cell,
                        global float *intensities,
-                       int xmin, int ymin, int sampling, local float *tmp,
+                       int min_fs, int min_ss, int sampling, local float *tmp,
                        float kstep,
                        read_only image2d_t func_a,
                        read_only image2d_t func_b,
                        read_only image2d_t func_c,
-                       global float *flags)
+                       global float *flags,
+                       float fsx, float fsy, float ssx, float ssy)
 {
 	float ttv;
-	const int x = get_global_id(0) + (xmin*sampling);
-	const int y = get_global_id(1) + (ymin*sampling);
+	float fs, ss;
 	float f_lattice, I_lattice;
 	float I_molecule;
 	float4 q;
@@ -205,12 +210,21 @@ kernel void diffraction(global float *diff, global float *tt, float klow,
 	const int ly = get_local_id(1);
 	const int lb = get_local_id(2);
 	float k = klow + kstep * get_local_id(2);
-	const int ax = x / sampling;
-	const int ay = y / sampling;
+	const int afs = fs / sampling;
+	const int ass = ss / sampling; /* Array index of target pixel */
 	float intensity;
 
-	/* Calculate value */
-	q = get_q(x, y, cx, cy, res, clen, k, &ttv, sampling);
+	/* Calculate fractional coordinates in fs/ss */
+	fs = convert_float(get_global_id(0) + (min_fs*sampling))
+	      / convert_float(sampling);
+	ss = convert_float(get_global_id(1) + (min_ss*sampling))
+	      / convert_float(sampling);
+
+	/* Get the scattering vector */
+	q = get_q(fs, ss, res, clen, k, &ttv,
+	          corner_x, corner_y, fsx, fsy, ssx, ssy);
+
+	/* Calculate the diffraction */
 	f_lattice = lattice_factor(cell, q, func_a, func_b, func_c);
 	I_molecule = molecule_factor(intensities, flags, cell, q);
 	I_lattice = pow(f_lattice, 2.0f);
@@ -228,16 +242,19 @@ kernel void diffraction(global float *diff, global float *tt, float klow,
 		int i;
 		float sum = 0.0;
 		float val;
+		int idx;
+
+		idx = convert_int_rtz(fs)+w*convert_int_rtz(ss);
 
 		for ( i=0; i<sampling*sampling*get_local_size(2); i++ )
 			sum += tmp[i];
 
-		val = sum / (float)(sampling*sampling*get_local_size(2));
-		diff[ax+w*ay] = val;
+		val = sum / convert_float(sampling*sampling*get_local_size(2));
+		diff[idx] = val;
 
 		/* Leader thread also records the 2theta value.
 		 * This should really be averaged across all pixels, but
 		 * I strongly suspect this would be a waste of time. */
-		tt[ax+w*ay] = ttv;
+		tt[idx] = ttv;
 	}
 }
diff --git a/src/cl-utils.c b/src/cl-utils.c
index 0c6dce27..7c1b23a3 100644
--- a/src/cl-utils.c
+++ b/src/cl-utils.c
@@ -3,17 +3,25 @@
  *
  * OpenCL utility functions
  *
- * (c) 2006-2010 Thomas White <taw@physics.org>
+ * (c) 2006-2011 Thomas White <taw@physics.org>
  *
  * Part of CrystFEL - crystallography with a FEL
  *
  */
 
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
 
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+
+#ifdef HAVE_CL_CL_H
+#include <CL/cl.h>
+#else
 #include <cl.h>
+#endif
 
 #include "utils.h"
 
diff --git a/src/detector.c b/src/detector.c
index eb5d1083..e53d5331 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -129,6 +129,12 @@ void record_image(struct image *image, int do_poisson)
 	double ph_per_e;
 	double area;
 	double max_tt = 0.0;
+	int n_inf1 = 0;
+	int n_neg1 = 0;
+	int n_nan1 = 0;
+	int n_inf2 = 0;
+	int n_neg2 = 0;
+	int n_nan2 = 0;
 
 	/* How many photons are scattered per electron? */
 	area = M_PI*pow(image->beam->beam_radius, 2.0);
@@ -152,15 +158,9 @@ void record_image(struct image *image, int do_poisson)
 		struct panel *p;
 
 		intensity = (double)image->data[x + image->width*y];
-		if ( isinf(intensity) ) {
-			ERROR("Infinity at %i,%i\n", x, y);
-		}
-		if ( intensity < 0.0 ) {
-			ERROR("Negative at %i,%i\n", x, y);
-		}
-		if ( isnan(intensity) ) {
-			ERROR("NaN at %i,%i\n", x, y);
-		}
+		if ( isinf(intensity) ) n_inf1++;
+		if ( intensity < 0.0 ) n_neg1++;
+		if ( isnan(intensity) ) n_nan1++;
 
 		p = find_panel(image->det, x, y);
 
@@ -191,15 +191,11 @@ void record_image(struct image *image, int do_poisson)
 
 		image->data[x + image->width*y] = counts
 		                                  * image->beam->adu_per_photon;
-		if ( isinf(image->data[x+image->width*y]) ) {
-			ERROR("Processed infinity at %i,%i\n", x, y);
-		}
-		if ( isnan(image->data[x+image->width*y]) ) {
-			ERROR("Processed NaN at %i,%i\n", x, y);
-		}
-		if ( image->data[x+image->width*y] < 0.0 ) {
-			ERROR("Processed negative at %i,%i %f\n", x, y, counts);
-		}
+
+		/* Sanity checks */
+		if ( isinf(image->data[x+image->width*y]) ) n_inf2++;
+		if ( isnan(image->data[x+image->width*y]) ) n_nan2++;
+		if ( image->data[x+image->width*y] < 0.0 ) n_neg2++;
 
 		if ( image->twotheta[x + image->width*y] > max_tt ) {
 			max_tt = image->twotheta[x + image->width*y];
@@ -221,6 +217,15 @@ void record_image(struct image *image, int do_poisson)
 	        rad2deg(tt_side), (image->lambda/(2.0*sin(tt_side/2.0)))/1e-9);
 
 	STATUS("Halve the d values to get the voxel size for a synthesis.\n");
+
+	if ( n_neg1 + n_inf1 + n_nan1 + n_neg2 + n_inf2 + n_nan2 ) {
+		ERROR("WARNING: The raw calculation produced %i negative"
+		      " values, %i infinities and %i NaNs.\n",
+		      n_neg1, n_inf1, n_nan1);
+		ERROR("WARNING: After processing, there were %i negative"
+		      " values, %i infinities and %i NaNs.\n",
+		      n_neg2, n_inf2, n_nan2);
+	}
 }
 
 
diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c
index dd382ede..2f4fee77 100644
--- a/src/diffraction-gpu.c
+++ b/src/diffraction-gpu.c
@@ -3,19 +3,27 @@
  *
  * Calculate diffraction patterns by Fourier methods (GPU version)
  *
- * (c) 2006-2010 Thomas White <taw@physics.org>
+ * (c) 2006-2011 Thomas White <taw@physics.org>
  *
  * Part of CrystFEL - crystallography with a FEL
  *
  */
 
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
 
 #include <stdlib.h>
 #include <math.h>
 #include <stdio.h>
 #include <string.h>
 #include <complex.h>
+
+#ifdef HAVE_CL_CL_H
+#include <CL/cl.h>
+#else
 #include <cl.h>
+#endif
 
 #include "image.h"
 #include "utils.h"
@@ -107,6 +115,50 @@ static void check_sinc_lut(struct gpu_context *gctx, int n)
 }
 
 
+static int sfloat(struct gpu_context *gctx, int idx, float val)
+{
+	cl_int err;
+	err = clSetKernelArg(gctx->kern, idx, sizeof(cl_float), &val);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't set kernel argument %i: %s\n",
+		      idx, clError(err));
+		return 1;
+	}
+
+	return 0;
+}
+
+
+static int setint(struct gpu_context *gctx, int idx, int val)
+{
+	cl_int err;
+
+	err = clSetKernelArg(gctx->kern, idx, sizeof(cl_int), &val);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't set kernel argument %i: %s\n",
+		      idx, clError(err));
+		return 1;
+	}
+
+	return 0;
+}
+
+
+static int setmem(struct gpu_context *gctx, int idx, cl_mem val)
+{
+	cl_int err;
+
+	err = clSetKernelArg(gctx->kern, idx, sizeof(cl_mem), &val);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't set kernel argument %i: %s\n",
+		      idx, clError(err));
+		return 1;
+	}
+
+	return 0;
+}
+
+
 void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
                          int na, int nb, int nc, UnitCell *ucell)
 {
@@ -124,6 +176,10 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 	cl_int4 ncells;
 	const int sampling = SAMPLING;
 	cl_float bwstep;
+	int n_inf = 0;
+	int n_neg = 0;
+	int n_nan = 0;
+
 
 	if ( gctx == NULL ) {
 		ERROR("GPU setup failed.\n");
@@ -150,82 +206,34 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 	check_sinc_lut(gctx, nb);
 	check_sinc_lut(gctx, nc);
 
-	err = clSetKernelArg(gctx->kern, 0, sizeof(cl_mem), &gctx->diff);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 0: %s\n", clError(err));
-		return;
-	}
-	clSetKernelArg(gctx->kern, 1, sizeof(cl_mem), &gctx->tt);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 1: %s\n", clError(err));
-		return;
-	}
-	clSetKernelArg(gctx->kern, 2, sizeof(cl_float), &klow);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 2: %s\n", clError(err));
-		return;
-	}
-	clSetKernelArg(gctx->kern, 3, sizeof(cl_int), &image->width);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 3: %s\n", clError(err));
-		return;
-	}
+	if ( setmem(gctx, 0, gctx->diff) ) return;
+	if ( setmem(gctx, 1, gctx->tt) ) return;
+	if ( setmem(gctx, 9, gctx->intensities) ) return;
+	if ( setmem(gctx, 15, gctx->sinc_luts[na-1]) ) return;
+	if ( setmem(gctx, 16, gctx->sinc_luts[nb-1]) ) return;
+	if ( setmem(gctx, 17, gctx->sinc_luts[nc-1]) ) return;
+	if ( setmem(gctx, 18, gctx->flags) ) return;
+
+	/* Unit cell */
 	clSetKernelArg(gctx->kern, 8, sizeof(cl_float16), &cell);
 	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 8: %s\n", clError(err));
-		return;
-	}
-	clSetKernelArg(gctx->kern, 9, sizeof(cl_mem), &gctx->intensities);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 9: %s\n", clError(err));
-		return;
-	}
-	clSetKernelArg(gctx->kern, 12, sizeof(cl_int), &sampling);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 12: %s\n", clError(err));
+		ERROR("Couldn't set unit cell: %s\n", clError(err));
 		return;
 	}
+
 	/* Local memory for reduction */
 	clSetKernelArg(gctx->kern, 13,
 	               BWSAMPLING*SAMPLING*SAMPLING*sizeof(cl_float), NULL);
 	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 13: %s\n", clError(err));
-		return;
-	}
-	/* Bandwidth sampling step */
-	clSetKernelArg(gctx->kern, 14, sizeof(cl_float), &bwstep);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 14: %s\n", clError(err));
-		return;
-	}
-
-	/* LUT in 'a' direction */
-	clSetKernelArg(gctx->kern, 15, sizeof(cl_mem), &gctx->sinc_luts[na-1]);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 15: %s\n", clError(err));
+		ERROR("Couldn't set local memory: %s\n", clError(err));
 		return;
 	}
 
-	/* LUT in 'b' direction */
-	clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->sinc_luts[nb-1]);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 16: %s\n", clError(err));
-		return;
-	}
 
-	/* LUT in 'c' direction */
-	clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->sinc_luts[nc-1]);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 17: %s\n", clError(err));
-		return;
-	}
-
-	/* Flag array */
-	clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->flags);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set flag array: %s\n", clError(err));
-		return;
-	}
+	if ( sfloat(gctx, 2, klow) ) return;
+	if ( setint(gctx, 3, image->width) ) return;
+	if ( setint(gctx, 12, sampling) ) return;
+	if ( sfloat(gctx, 14, bwstep) ) return;
 
 	/* Iterate over panels */
 	event = malloc(image->det->n_panels * sizeof(cl_event));
@@ -236,48 +244,24 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 
 		/* In a future version of OpenCL, this could be done
 		 * with a global work offset.  But not yet... */
-		dims[0] = 1+image->det->panels[0].max_x-image->det->panels[0].min_x;
-		dims[1] = 1+image->det->panels[0].max_y-image->det->panels[0].min_y;
+		dims[0] = 1+image->det->panels[p].max_fs
+		           -image->det->panels[p].min_fs;
+		dims[1] = 1+image->det->panels[p].max_ss
+		           -image->det->panels[p].min_ss;
 		dims[0] *= SAMPLING;
 		dims[1] *= SAMPLING;
 		dims[2] = BWSAMPLING;
 
-		clSetKernelArg(gctx->kern, 4, sizeof(cl_float),
-		               &image->det->panels[p].cx);
-		if ( err != CL_SUCCESS ) {
-			ERROR("Couldn't set arg 4: %s\n", clError(err));
-			return;
-		}
-		clSetKernelArg(gctx->kern, 5, sizeof(cl_float),
-		               &image->det->panels[p].cy);
-		if ( err != CL_SUCCESS ) {
-			ERROR("Couldn't set arg 5: %s\n", clError(err));
-			return;
-		}
-		clSetKernelArg(gctx->kern, 6, sizeof(cl_float),
-		               &image->det->panels[p].res);
-		if ( err != CL_SUCCESS ) {
-			ERROR("Couldn't set arg 6: %s\n", clError(err));
-			return;
-		}
-		clSetKernelArg(gctx->kern, 7, sizeof(cl_float),
-		               &image->det->panels[p].clen);
-		if ( err != CL_SUCCESS ) {
-			ERROR("Couldn't set arg 7: %s\n", clError(err));
-			return;
-		}
-		clSetKernelArg(gctx->kern, 10, sizeof(cl_int),
-		               &image->det->panels[p].min_x);
-		if ( err != CL_SUCCESS ) {
-			ERROR("Couldn't set arg 10: %s\n", clError(err));
-			return;
-		}
-		clSetKernelArg(gctx->kern, 11, sizeof(cl_int),
-		               &image->det->panels[p].min_y);
-		if ( err != CL_SUCCESS ) {
-			ERROR("Couldn't set arg 11: %s\n", clError(err));
-			return;
-		}
+		if ( sfloat(gctx, 4, image->det->panels[p].cnx) ) return;
+		if ( sfloat(gctx, 5, image->det->panels[p].cny) ) return;
+		if ( sfloat(gctx, 6, image->det->panels[p].res) ) return;
+		if ( sfloat(gctx, 7, image->det->panels[p].clen) ) return;
+		if ( setint(gctx, 10, image->det->panels[p].min_fs) ) return;
+		if ( setint(gctx, 11, image->det->panels[p].min_ss) ) return;
+		if ( sfloat(gctx, 19, image->det->panels[p].fsx) ) return;
+		if ( sfloat(gctx, 19, image->det->panels[p].fsy) ) return;
+		if ( sfloat(gctx, 20, image->det->panels[p].ssx) ) return;
+		if ( sfloat(gctx, 21, image->det->panels[p].ssy) ) return;
 
 		err = clEnqueueNDRangeKernel(gctx->cq, gctx->kern, 3, NULL,
 		                             dims, ldims, 0, NULL, &event[p]);
@@ -314,15 +298,9 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 		float val, tt;
 
 		val = diff_ptr[x + image->width*y];
-		if ( isinf(val) ) {
-			ERROR("Extracting infinity at %i,%i\n", x, y);
-		}
-		if ( val < 0.0 ) {
-			ERROR("Extracting negative at %i,%i\n", x, y);
-		}
-		if ( isnan(val) ) {
-			ERROR("Extracting NaN at %i,%i\n", x, y);
-		}
+		if ( isinf(val) ) n_inf++;
+		if ( val < 0.0 ) n_neg++;
+		if ( isnan(val) ) n_nan++;
 		tt = tt_ptr[x + image->width*y];
 
 		image->data[x + image->width*y] = val;
@@ -331,6 +309,12 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 	}
 	}
 
+	if ( n_neg + n_inf + n_nan ) {
+		ERROR("WARNING: The GPU calculation produced %i negative"
+		      " values, %i infinities and %i NaNs.\n",
+		      n_neg, n_inf, n_nan);
+	}
+
 	clEnqueueUnmapMemObject(gctx->cq, gctx->diff, diff_ptr, 0, NULL, NULL);
 	clEnqueueUnmapMemObject(gctx->cq, gctx->tt, tt_ptr, 0, NULL, NULL);
 }
author	Thomas White <taw@physics.org>	2011-03-02 18:40:28 +0100
committer	Thomas White <taw@physics.org>	2012-02-22 15:27:16 +0100
commit	c708b2162f76a228235983f183f6250dc68522c4 (patch)
tree	223e0f62003f7529f073ba35001c1ec4712f668c
parent	356b10b53e51d1ec695d9b6a09bd9fab6b46a0f3 (diff)