Fix GPU code for new geometry, and tidy up some detector stuff (needs debugging)

author: Thomas White <taw@physics.org> 2011-03-02 18:40:28 +0100
committer: Thomas White <taw@physics.org> 2012-02-22 15:27:16 +0100
commit: c708b2162f76a228235983f183f6250dc68522c4 (patch)
tree: 223e0f62003f7529f073ba35001c1ec4712f668c /src
parent: 356b10b53e51d1ec695d9b6a09bd9fab6b46a0f3 (diff)
3 files changed, 128 insertions, 131 deletions
diff --git a/src/cl-utils.c b/src/cl-utils.c
index 0c6dce27..7c1b23a3 100644
--- a/src/cl-utils.c
+++ b/src/cl-utils.c
@@ -3,17 +3,25 @@
  *
  * OpenCL utility functions
  *
- * (c) 2006-2010 Thomas White <taw@physics.org>
+ * (c) 2006-2011 Thomas White <taw@physics.org>
  *
  * Part of CrystFEL - crystallography with a FEL
  *
  */
 
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
 
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+
+#ifdef HAVE_CL_CL_H
+#include <CL/cl.h>
+#else
 #include <cl.h>
+#endif
 
 #include "utils.h"
 
diff --git a/src/detector.c b/src/detector.c
index eb5d1083..e53d5331 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -129,6 +129,12 @@ void record_image(struct image *image, int do_poisson)
 	double ph_per_e;
 	double area;
 	double max_tt = 0.0;
+	int n_inf1 = 0;
+	int n_neg1 = 0;
+	int n_nan1 = 0;
+	int n_inf2 = 0;
+	int n_neg2 = 0;
+	int n_nan2 = 0;
 
 	/* How many photons are scattered per electron? */
 	area = M_PI*pow(image->beam->beam_radius, 2.0);
@@ -152,15 +158,9 @@ void record_image(struct image *image, int do_poisson)
 		struct panel *p;
 
 		intensity = (double)image->data[x + image->width*y];
-		if ( isinf(intensity) ) {
-			ERROR("Infinity at %i,%i\n", x, y);
-		}
-		if ( intensity < 0.0 ) {
-			ERROR("Negative at %i,%i\n", x, y);
-		}
-		if ( isnan(intensity) ) {
-			ERROR("NaN at %i,%i\n", x, y);
-		}
+		if ( isinf(intensity) ) n_inf1++;
+		if ( intensity < 0.0 ) n_neg1++;
+		if ( isnan(intensity) ) n_nan1++;
 
 		p = find_panel(image->det, x, y);
 
@@ -191,15 +191,11 @@ void record_image(struct image *image, int do_poisson)
 
 		image->data[x + image->width*y] = counts
 		                                  * image->beam->adu_per_photon;
-		if ( isinf(image->data[x+image->width*y]) ) {
-			ERROR("Processed infinity at %i,%i\n", x, y);
-		}
-		if ( isnan(image->data[x+image->width*y]) ) {
-			ERROR("Processed NaN at %i,%i\n", x, y);
-		}
-		if ( image->data[x+image->width*y] < 0.0 ) {
-			ERROR("Processed negative at %i,%i %f\n", x, y, counts);
-		}
+
+		/* Sanity checks */
+		if ( isinf(image->data[x+image->width*y]) ) n_inf2++;
+		if ( isnan(image->data[x+image->width*y]) ) n_nan2++;
+		if ( image->data[x+image->width*y] < 0.0 ) n_neg2++;
 
 		if ( image->twotheta[x + image->width*y] > max_tt ) {
 			max_tt = image->twotheta[x + image->width*y];
@@ -221,6 +217,15 @@ void record_image(struct image *image, int do_poisson)
 	        rad2deg(tt_side), (image->lambda/(2.0*sin(tt_side/2.0)))/1e-9);
 
 	STATUS("Halve the d values to get the voxel size for a synthesis.\n");
+
+	if ( n_neg1 + n_inf1 + n_nan1 + n_neg2 + n_inf2 + n_nan2 ) {
+		ERROR("WARNING: The raw calculation produced %i negative"
+		      " values, %i infinities and %i NaNs.\n",
+		      n_neg1, n_inf1, n_nan1);
+		ERROR("WARNING: After processing, there were %i negative"
+		      " values, %i infinities and %i NaNs.\n",
+		      n_neg2, n_inf2, n_nan2);
+	}
 }
 
 
diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c
index dd382ede..2f4fee77 100644
--- a/src/diffraction-gpu.c
+++ b/src/diffraction-gpu.c
@@ -3,19 +3,27 @@
  *
  * Calculate diffraction patterns by Fourier methods (GPU version)
  *
- * (c) 2006-2010 Thomas White <taw@physics.org>
+ * (c) 2006-2011 Thomas White <taw@physics.org>
  *
  * Part of CrystFEL - crystallography with a FEL
  *
  */
 
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
 
 #include <stdlib.h>
 #include <math.h>
 #include <stdio.h>
 #include <string.h>
 #include <complex.h>
+
+#ifdef HAVE_CL_CL_H
+#include <CL/cl.h>
+#else
 #include <cl.h>
+#endif
 
 #include "image.h"
 #include "utils.h"
@@ -107,6 +115,50 @@ static void check_sinc_lut(struct gpu_context *gctx, int n)
 }
 
 
+static int sfloat(struct gpu_context *gctx, int idx, float val)
+{
+	cl_int err;
+	err = clSetKernelArg(gctx->kern, idx, sizeof(cl_float), &val);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't set kernel argument %i: %s\n",
+		      idx, clError(err));
+		return 1;
+	}
+
+	return 0;
+}
+
+
+static int setint(struct gpu_context *gctx, int idx, int val)
+{
+	cl_int err;
+
+	err = clSetKernelArg(gctx->kern, idx, sizeof(cl_int), &val);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't set kernel argument %i: %s\n",
+		      idx, clError(err));
+		return 1;
+	}
+
+	return 0;
+}
+
+
+static int setmem(struct gpu_context *gctx, int idx, cl_mem val)
+{
+	cl_int err;
+
+	err = clSetKernelArg(gctx->kern, idx, sizeof(cl_mem), &val);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't set kernel argument %i: %s\n",
+		      idx, clError(err));
+		return 1;
+	}
+
+	return 0;
+}
+
+
 void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
                          int na, int nb, int nc, UnitCell *ucell)
 {
@@ -124,6 +176,10 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 	cl_int4 ncells;
 	const int sampling = SAMPLING;
 	cl_float bwstep;
+	int n_inf = 0;
+	int n_neg = 0;
+	int n_nan = 0;
+
 
 	if ( gctx == NULL ) {
 		ERROR("GPU setup failed.\n");
@@ -150,82 +206,34 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 	check_sinc_lut(gctx, nb);
 	check_sinc_lut(gctx, nc);
 
-	err = clSetKernelArg(gctx->kern, 0, sizeof(cl_mem), &gctx->diff);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 0: %s\n", clError(err));
-		return;
-	}
-	clSetKernelArg(gctx->kern, 1, sizeof(cl_mem), &gctx->tt);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 1: %s\n", clError(err));
-		return;
-	}
-	clSetKernelArg(gctx->kern, 2, sizeof(cl_float), &klow);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 2: %s\n", clError(err));
-		return;
-	}
-	clSetKernelArg(gctx->kern, 3, sizeof(cl_int), &image->width);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 3: %s\n", clError(err));
-		return;
-	}
+	if ( setmem(gctx, 0, gctx->diff) ) return;
+	if ( setmem(gctx, 1, gctx->tt) ) return;
+	if ( setmem(gctx, 9, gctx->intensities) ) return;
+	if ( setmem(gctx, 15, gctx->sinc_luts[na-1]) ) return;
+	if ( setmem(gctx, 16, gctx->sinc_luts[nb-1]) ) return;
+	if ( setmem(gctx, 17, gctx->sinc_luts[nc-1]) ) return;
+	if ( setmem(gctx, 18, gctx->flags) ) return;
+
+	/* Unit cell */
 	clSetKernelArg(gctx->kern, 8, sizeof(cl_float16), &cell);
 	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 8: %s\n", clError(err));
-		return;
-	}
-	clSetKernelArg(gctx->kern, 9, sizeof(cl_mem), &gctx->intensities);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 9: %s\n", clError(err));
-		return;
-	}
-	clSetKernelArg(gctx->kern, 12, sizeof(cl_int), &sampling);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 12: %s\n", clError(err));
+		ERROR("Couldn't set unit cell: %s\n", clError(err));
 		return;
 	}
+
 	/* Local memory for reduction */
 	clSetKernelArg(gctx->kern, 13,
 	               BWSAMPLING*SAMPLING*SAMPLING*sizeof(cl_float), NULL);
 	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 13: %s\n", clError(err));
-		return;
-	}
-	/* Bandwidth sampling step */
-	clSetKernelArg(gctx->kern, 14, sizeof(cl_float), &bwstep);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 14: %s\n", clError(err));
-		return;
-	}
-
-	/* LUT in 'a' direction */
-	clSetKernelArg(gctx->kern, 15, sizeof(cl_mem), &gctx->sinc_luts[na-1]);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 15: %s\n", clError(err));
+		ERROR("Couldn't set local memory: %s\n", clError(err));
 		return;
 	}
 
-	/* LUT in 'b' direction */
-	clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->sinc_luts[nb-1]);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 16: %s\n", clError(err));
-		return;
-	}
 
-	/* LUT in 'c' direction */
-	clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->sinc_luts[nc-1]);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 17: %s\n", clError(err));
-		return;
-	}
-
-	/* Flag array */
-	clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->flags);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set flag array: %s\n", clError(err));
-		return;
-	}
+	if ( sfloat(gctx, 2, klow) ) return;
+	if ( setint(gctx, 3, image->width) ) return;
+	if ( setint(gctx, 12, sampling) ) return;
+	if ( sfloat(gctx, 14, bwstep) ) return;
 
 	/* Iterate over panels */
 	event = malloc(image->det->n_panels * sizeof(cl_event));
@@ -236,48 +244,24 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 
 		/* In a future version of OpenCL, this could be done
 		 * with a global work offset.  But not yet... */
-		dims[0] = 1+image->det->panels[0].max_x-image->det->panels[0].min_x;
-		dims[1] = 1+image->det->panels[0].max_y-image->det->panels[0].min_y;
+		dims[0] = 1+image->det->panels[p].max_fs
+		           -image->det->panels[p].min_fs;
+		dims[1] = 1+image->det->panels[p].max_ss
+		           -image->det->panels[p].min_ss;
 		dims[0] *= SAMPLING;
 		dims[1] *= SAMPLING;
 		dims[2] = BWSAMPLING;
 
-		clSetKernelArg(gctx->kern, 4, sizeof(cl_float),
-		               &image->det->panels[p].cx);
-		if ( err != CL_SUCCESS ) {
-			ERROR("Couldn't set arg 4: %s\n", clError(err));
-			return;
-		}
-		clSetKernelArg(gctx->kern, 5, sizeof(cl_float),
-		               &image->det->panels[p].cy);
-		if ( err != CL_SUCCESS ) {
-			ERROR("Couldn't set arg 5: %s\n", clError(err));
-			return;
-		}
-		clSetKernelArg(gctx->kern, 6, sizeof(cl_float),
-		               &image->det->panels[p].res);
-		if ( err != CL_SUCCESS ) {
-			ERROR("Couldn't set arg 6: %s\n", clError(err));
-			return;
-		}
-		clSetKernelArg(gctx->kern, 7, sizeof(cl_float),
-		               &image->det->panels[p].clen);
-		if ( err != CL_SUCCESS ) {
-			ERROR("Couldn't set arg 7: %s\n", clError(err));
-			return;
-		}
-		clSetKernelArg(gctx->kern, 10, sizeof(cl_int),
-		               &image->det->panels[p].min_x);
-		if ( err != CL_SUCCESS ) {
-			ERROR("Couldn't set arg 10: %s\n", clError(err));
-			return;
-		}
-		clSetKernelArg(gctx->kern, 11, sizeof(cl_int),
-		               &image->det->panels[p].min_y);
-		if ( err != CL_SUCCESS ) {
-			ERROR("Couldn't set arg 11: %s\n", clError(err));
-			return;
-		}
+		if ( sfloat(gctx, 4, image->det->panels[p].cnx) ) return;
+		if ( sfloat(gctx, 5, image->det->panels[p].cny) ) return;
+		if ( sfloat(gctx, 6, image->det->panels[p].res) ) return;
+		if ( sfloat(gctx, 7, image->det->panels[p].clen) ) return;
+		if ( setint(gctx, 10, image->det->panels[p].min_fs) ) return;
+		if ( setint(gctx, 11, image->det->panels[p].min_ss) ) return;
+		if ( sfloat(gctx, 19, image->det->panels[p].fsx) ) return;
+		if ( sfloat(gctx, 19, image->det->panels[p].fsy) ) return;
+		if ( sfloat(gctx, 20, image->det->panels[p].ssx) ) return;
+		if ( sfloat(gctx, 21, image->det->panels[p].ssy) ) return;
 
 		err = clEnqueueNDRangeKernel(gctx->cq, gctx->kern, 3, NULL,
 		                             dims, ldims, 0, NULL, &event[p]);
@@ -314,15 +298,9 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 		float val, tt;
 
 		val = diff_ptr[x + image->width*y];
-		if ( isinf(val) ) {
-			ERROR("Extracting infinity at %i,%i\n", x, y);
-		}
-		if ( val < 0.0 ) {
-			ERROR("Extracting negative at %i,%i\n", x, y);
-		}
-		if ( isnan(val) ) {
-			ERROR("Extracting NaN at %i,%i\n", x, y);
-		}
+		if ( isinf(val) ) n_inf++;
+		if ( val < 0.0 ) n_neg++;
+		if ( isnan(val) ) n_nan++;
 		tt = tt_ptr[x + image->width*y];
 
 		image->data[x + image->width*y] = val;
@@ -331,6 +309,12 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 	}
 	}
 
+	if ( n_neg + n_inf + n_nan ) {
+		ERROR("WARNING: The GPU calculation produced %i negative"
+		      " values, %i infinities and %i NaNs.\n",
+		      n_neg, n_inf, n_nan);
+	}
+
 	clEnqueueUnmapMemObject(gctx->cq, gctx->diff, diff_ptr, 0, NULL, NULL);
 	clEnqueueUnmapMemObject(gctx->cq, gctx->tt, tt_ptr, 0, NULL, NULL);
 }
author	Thomas White <taw@physics.org>	2011-03-02 18:40:28 +0100
committer	Thomas White <taw@physics.org>	2012-02-22 15:27:16 +0100
commit	c708b2162f76a228235983f183f6250dc68522c4 (patch)
tree	223e0f62003f7529f073ba35001c1ec4712f668c /src
parent	356b10b53e51d1ec695d9b6a09bd9fab6b46a0f3 (diff)