Add divergence to calculation

author: Thomas White <taw@physics.org> 2011-06-06 15:47:17 +0200
committer: Thomas White <taw@physics.org> 2012-02-22 15:27:27 +0100
commit: 01fdad29f7d9f126290cdc455c0a587d5aa51007 (patch)
tree: 001d0d7683a82b556638f1f5d388487de5834254
parent: 9244e4169a58c457a6a10be05067e27130cc3004 (diff)
3 files changed, 64 insertions, 21 deletions
diff --git a/data/diffraction.cl b/data/diffraction.cl
index a3986107..ec3d528f 100644
--- a/data/diffraction.cl
+++ b/data/diffraction.cl
@@ -29,12 +29,14 @@ const sampler_t sampler_c = CLK_NORMALIZED_COORDS_TRUE
 
 float4 get_q(float fs, float ss, float res, float clen, float k,
              float *ttp, float corner_x, float corner_y,
-             float fsx, float fsy, float ssx, float ssy)
+             float fsx, float fsy, float ssx, float ssy,
+             float xdiv, float ydiv)
 {
 	float rx, ry, r;
 	float az, tt;
 	float4 q;
 	float xs, ys;
+	float kx, ky, kz;
 
 	xs = fs*fsx + ss*ssx;
 	ys = fs*fsy + ss*ssy;
@@ -49,9 +51,19 @@ float4 get_q(float fs, float ss, float res, float clen, float k,
 
 	az = atan2(ry, rx);
 
-	q = (float4)(k*native_sin(tt)*native_cos(az),
-	             k*native_sin(tt)*native_sin(az),
-	             k*(native_cos(tt)-1.0), 0.0);
+	kx = k*native_sin(tt)*native_cos(az);
+	ky = k*native_sin(tt)*native_sin(az);
+	kz = k*(native_cos(tt)-1.0);
+
+	/* x divergence */
+	kx =  kx*cos(xdiv) +kz*sin(xdiv);
+	kz = -kx*sin(xdiv) +kz*cos(xdiv);
+
+	/* y divergence */
+	ky =  ky*cos(ydiv) +kz*sin(ydiv);
+	kz = -ky*sin(ydiv) +kz*cos(ydiv);
+
+	q = (float4)(kx, ky, kz, 0.0);
 
 	return q;
 }
@@ -202,18 +214,32 @@ kernel void diffraction(global float *diff, global float *tt, float klow,
                         read_only image2d_t func_b,
                         read_only image2d_t func_c,
                         global float *flags,
-                        float fsx, float fsy, float ssx, float ssy)
+                        float fsx, float fsy, float ssx, float ssy,
+                        float divxlow, float divxstep, int divxsamp,
+                        float divylow, float divystep, int divysamp)
 {
 	float ttv;
 	float fs, ss;
 	float f_lattice, I_lattice;
 	float I_molecule;
 	float4 q;
-	const int lx = get_local_id(0);
-	const int ly = get_local_id(1);
-	const int lb = get_local_id(2);
 	float k = klow + kstep * get_local_id(2);
 	float intensity;
+	const int ls0 = get_local_size(0);
+	const int ls1 = get_local_size(1);
+	const int ls2 = get_local_size(2) / (divxsamp*divysamp);
+	const int ls3 = divxsamp;
+	const int ls4 = divysamp;
+	const int li0 = get_local_id(0);
+	const int li1 = get_local_id(1);
+	const int li234 = get_local_id(2);
+	const int li2 = li234 / (ls3*ls4);
+	const int li234leftover = li234 % (ls3*ls4);
+	const int li3 = li234leftover / ls4;
+	const int li4 = li234leftover % ls4;
+	const int ls = ls0 * ls1 * ls2 * ls3 * ls4;
+	float xdiv = divxlow + divxstep*ls4;
+	float ydiv = divylow + divystep*ls3;
 
 	/* Calculate fractional coordinates in fs/ss */
 	fs = convert_float(get_global_id(0)) / convert_float(sampling);
@@ -221,7 +247,7 @@ kernel void diffraction(global float *diff, global float *tt, float klow,
 
 	/* Get the scattering vector */
 	q = get_q(fs, ss, res, clen, k, &ttv,
-	          corner_x, corner_y, fsx, fsy, ssx, ssy);
+	          corner_x, corner_y, fsx, fsy, ssx, ssy, xdiv, ydiv);
 
 	/* Calculate the diffraction */
 	f_lattice = lattice_factor(cell, q, func_a, func_b, func_c);
@@ -230,13 +256,14 @@ kernel void diffraction(global float *diff, global float *tt, float klow,
 
 	/* Write the value to local memory */
 	intensity = I_molecule * I_lattice;
-	tmp[lx+sampling*ly+sampling*sampling*lb] = intensity;
+	tmp[li0 + ls0*li1 + ls0*ls1*li2 + ls0*ls1*ls2*li3
+	        + ls0*ls1*ls2*ls3*li4] = intensity;
 
 	/* Memory fence */
 	barrier(CLK_LOCAL_MEM_FENCE);
 
 	/* Leader thread sums the values */
-	if ( lx + ly + lb == 0 ) {
+	if ( li0 + li1 + li2 + li3 + li4 == 0 ) {
 
 		int i;
 		float sum = 0.0;
@@ -245,11 +272,9 @@ kernel void diffraction(global float *diff, global float *tt, float klow,
 
 		idx = convert_int_rtz(fs) + w*convert_int_rtz(ss);
 
-		for ( i=0; i<sampling*sampling*get_local_size(2); i++ )
-			sum += tmp[i];
+		for ( i=0; i<ls; i++ ) sum += tmp[i];
 
-		val = sum / convert_float(get_local_size(0)*get_local_size(1)
-		                                           *get_local_size(2));
+		val = sum / convert_float(ls);
 		diff[idx] = val;
 
 		/* Leader thread also records the 2theta value */
diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c
index c0814e79..74e2292a 100644
--- a/src/diffraction-gpu.c
+++ b/src/diffraction-gpu.c
@@ -34,7 +34,8 @@
 
 
 #define SAMPLING (4)
-#define BWSAMPLING (10)
+#define BWSAMPLING (1)
+#define DIVSAMPLING (4)
 #define SINC_LUT_ELEMENTS (4096)
 
 
@@ -166,7 +167,10 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 	cl_float bwstep;
 	int n_inf = 0;
 	int n_neg = 0;
+	cl_float divxlow, divxstep;
+	cl_float divylow, divystep;
 	int n_nan = 0;
+	int sprod;
 
 	if ( gctx == NULL ) {
 		ERROR("GPU setup failed.\n");
@@ -183,6 +187,12 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 	khigh = 1.0/(image->lambda*(1.0 - image->beam->bandwidth/2.0));
 	bwstep = (khigh-klow) / BWSAMPLING;
 
+	/* Calculate divergence stuff */
+	divxlow = -image->beam->divergence/2.0;
+	divylow = -image->beam->divergence/2.0;
+	divxstep = image->beam->divergence / DIVSAMPLING;
+	divystep = image->beam->divergence / DIVSAMPLING;
+
 	ncells.s[0] = na;
 	ncells.s[1] = nb;
 	ncells.s[2] = nc;
@@ -201,6 +211,12 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 	if ( set_arg_mem(gctx, 16, gctx->sinc_luts[nb-1]) ) return;
 	if ( set_arg_mem(gctx, 17, gctx->sinc_luts[nc-1]) ) return;
 	if ( set_arg_mem(gctx, 18, gctx->flags) ) return;
+	if ( set_arg_float(gctx, 23, divxlow) ) return;
+	if ( set_arg_float(gctx, 24, divxstep) ) return;
+	if ( set_arg_int(gctx, 25, DIVSAMPLING) ) return;
+	if ( set_arg_float(gctx, 26, divylow) ) return;
+	if ( set_arg_float(gctx, 27, divystep) ) return;
+	if ( set_arg_int(gctx, 28, DIVSAMPLING) ) return;
 
 	/* Unit cell */
 	err = clSetKernelArg(gctx->kern, 8, sizeof(cl_float16), &cell);
@@ -210,8 +226,8 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 	}
 
 	/* Local memory for reduction */
-	err = clSetKernelArg(gctx->kern, 13,
-	               BWSAMPLING*SAMPLING*SAMPLING*sizeof(cl_float), NULL);
+	sprod = BWSAMPLING*SAMPLING*SAMPLING*DIVSAMPLING*DIVSAMPLING;
+	err = clSetKernelArg(gctx->kern, 13, sprod*sizeof(cl_float), NULL);
 	if ( err != CL_SUCCESS ) {
 		ERROR("Couldn't set local memory: %s\n", clError(err));
 		return;
@@ -225,7 +241,8 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 	for ( i=0; i<image->det->n_panels; i++ ) {
 
 		size_t dims[3];
-		size_t ldims[3] = {SAMPLING, SAMPLING, BWSAMPLING};
+		size_t ldims[3] = {SAMPLING, SAMPLING,
+		                   BWSAMPLING * DIVSAMPLING * DIVSAMPLING};
 		struct panel *p;
 		cl_mem tt;
 		size_t tt_size;
@@ -273,7 +290,7 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 
 		dims[0] = pan_width * SAMPLING;
 		dims[1] = pan_height * SAMPLING;
-		dims[2] = BWSAMPLING;
+		dims[2] = BWSAMPLING * DIVSAMPLING * DIVSAMPLING;
 
 		err = clEnqueueNDRangeKernel(gctx->cq, gctx->kern, 3, NULL,
 		                             dims, ldims, 0, NULL, NULL);
diff --git a/src/diffraction.c b/src/diffraction.c
index cd372a28..b55d2265 100644
--- a/src/diffraction.c
+++ b/src/diffraction.c
@@ -27,7 +27,8 @@
 
 
 #define SAMPLING (4)
-#define BWSAMPLING (10)
+#define BWSAMPLING (1)
+#define DIVSAMPLING (4)
 #define SINC_LUT_ELEMENTS (4096)
author	Thomas White <taw@physics.org>	2011-06-06 15:47:17 +0200
committer	Thomas White <taw@physics.org>	2012-02-22 15:27:27 +0100
commit	01fdad29f7d9f126290cdc455c0a587d5aa51007 (patch)
tree	001d0d7683a82b556638f1f5d388487de5834254
parent	9244e4169a58c457a6a10be05067e27130cc3004 (diff)