Allow multiple sinc LUTs

author: Thomas White <taw@physics.org> 2010-03-10 12:14:25 +0100
committer: Thomas White <taw@physics.org> 2010-03-10 12:14:25 +0100
commit: bda63e5f6eeed249f0effb4f990e887b3912dad6 (patch)
tree: 42070462d4c14d9e3b2cdc657a12c1f374d8d9c3 /src/diffraction-gpu.c
parent: fd8f9222599951749edf3c6ba27289da04d84b82 (diff)
1 files changed, 99 insertions, 76 deletions
diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c
index 29bfe3e8..f71df74e 100644
--- a/src/diffraction-gpu.c
+++ b/src/diffraction-gpu.c
@@ -46,15 +46,65 @@ struct gpu_context
 	cl_mem diff;
 	size_t diff_size;
 
-	cl_mem func_a;
-	cl_float *func_a_ptr;
-	cl_mem func_b;
-	cl_float *func_b_ptr;
-	cl_mem func_c;
-	cl_float *func_c_ptr;
+	/* Array of sinc LUTs */
+	cl_mem *sinc_luts;
+	cl_float **sinc_lut_ptrs;
+	int max_sinc_lut;  /* Number of LUTs, i.e. one greater than the maximum
+	                    * index.  This equals the highest allowable "n". */
 };
 
 
+static void check_sinc_lut(struct gpu_context *gctx, int n)
+{
+	cl_int err;
+	size_t sinc_lut_size;
+	cl_image_format fmt;
+	int i;
+
+	if ( n > gctx->max_sinc_lut ) {
+
+		STATUS("Allocating %i -> %i\n", gctx->max_sinc_lut, n);
+
+		gctx->sinc_luts = realloc(gctx->sinc_luts,
+		                          n*sizeof(*gctx->sinc_luts));
+		gctx->sinc_lut_ptrs = realloc(gctx->sinc_lut_ptrs,
+		                              n*sizeof(*gctx->sinc_lut_ptrs));
+
+		for ( i=gctx->max_sinc_lut; i<n; i++ ) {
+			STATUS("zeroing %i\n", i);
+			gctx->sinc_lut_ptrs[i] = NULL;
+		}
+
+		gctx->max_sinc_lut = n;
+	}
+
+	fmt.image_channel_order = CL_INTENSITY;
+	fmt.image_channel_data_type = CL_FLOAT;
+	sinc_lut_size = SINC_LUT_ELEMENTS*sizeof(cl_float);
+
+	/* Create a new sinc LUT */
+	gctx->sinc_lut_ptrs[n-1] = malloc(sinc_lut_size);
+	gctx->sinc_lut_ptrs[n-1][0] = n;
+	if ( n == 1 ) {
+		for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
+			gctx->sinc_lut_ptrs[n-1][i] = 1.0;
+		}
+	} else {
+		for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
+			double x, val;
+			x = (double)i/SINC_LUT_ELEMENTS;
+			val = fabs(sin(M_PI*n*x)/sin(M_PI*x));
+			gctx->sinc_lut_ptrs[n-1][i] = val;
+		}
+	}
+
+	gctx->sinc_luts[n-1] = clCreateImage2D(gctx->ctx,
+	                                CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+	                                &fmt, SINC_LUT_ELEMENTS, 1, 0,
+	                                gctx->sinc_lut_ptrs[n-1], &err);
+}
+
+
 void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
                          int na, int nb, int nc, int no_sfac)
 {
@@ -102,6 +152,11 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 	ncells[2] = nc;
 	ncells[3] = 0;  /* unused */
 
+	/* Ensure all required LUTs are available */
+	check_sinc_lut(gctx, na);
+	check_sinc_lut(gctx, nb);
+	check_sinc_lut(gctx, nc);
+
 	err = clSetKernelArg(gctx->kern, 0, sizeof(cl_mem), &gctx->diff);
 	if ( err != CL_SUCCESS ) {
 		ERROR("Couldn't set arg 0: %s\n", clError(err));
@@ -156,6 +211,27 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 		return;
 	}
 
+	/* LUT in 'a' direction */
+	clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->sinc_luts[na-1]);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't set arg 16: %s\n", clError(err));
+		return;
+	}
+
+	/* LUT in 'b' direction */
+	clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->sinc_luts[nb-1]);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't set arg 17: %s\n", clError(err));
+		return;
+	}
+
+	/* LUT in 'c' direction */
+	clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->sinc_luts[nc-1]);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't set arg 18: %s\n", clError(err));
+		return;
+	}
+
 	/* Iterate over panels */
 	event = malloc(image->det.n_panels * sizeof(cl_event));
 	for ( p=0; p<image->det.n_panels; p++ ) {
@@ -267,7 +343,7 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
 
 /* Setup the OpenCL stuff, create buffers, load the structure factor table */
 struct gpu_context *setup_gpu(int no_sfac, struct image *image,
-                              struct molecule *molecule, int na, int nb, int nc)
+                              struct molecule *molecule)
 {
 	struct gpu_context *gctx;
 	cl_uint nplat;
@@ -278,8 +354,6 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image,
 	size_t sfac_size;
 	float *sfac_ptr;
 	size_t maxwgsize;
-	size_t sinc_lut_size;
-	cl_image_format fmt;
 	int i;
 
 	if ( molecule == NULL ) return NULL;
@@ -382,69 +456,12 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image,
 		return NULL;
 	}
 
-	fmt.image_channel_order = CL_INTENSITY;
-	fmt.image_channel_data_type = CL_FLOAT;
-	sinc_lut_size = SINC_LUT_ELEMENTS*sizeof(cl_float);
-
-	/* Set up sinc LUT for a* direction */
-	gctx->func_a_ptr = malloc(sinc_lut_size);
-	gctx->func_a_ptr[0] = na;
-	for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
-		double x, val;
-		x = (double)i/SINC_LUT_ELEMENTS;
-		val = fabs(sin(M_PI*na*x)/sin(M_PI*x));
-		gctx->func_a_ptr[i] = val;
-	}
-	gctx->func_a = clCreateImage2D(gctx->ctx,
-	                               CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-	                               &fmt, SINC_LUT_ELEMENTS, 1, 0,
-	                               gctx->func_a_ptr, &err);
-	clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->func_a);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 16: %s\n", clError(err));
-		return NULL;
-	}
-
-	/* Set up sinc LUT for b* direction */
-	gctx->func_b_ptr = malloc(sinc_lut_size);
-	gctx->func_b_ptr[0] = nb;
-	for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
-		double x, val;
-		x = (double)i/SINC_LUT_ELEMENTS;
-		val = fabs(sin(M_PI*nb*x)/sin(M_PI*x));
-		gctx->func_b_ptr[i] = val;
-	}
-	gctx->func_b = clCreateImage2D(gctx->ctx,
-	                               CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-	                               &fmt, SINC_LUT_ELEMENTS, 1, 0,
-	                               gctx->func_b_ptr, &err);
-	clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->func_b);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 17: %s\n", clError(err));
-		return NULL;
-	}
-
-	/* Set up sinc LUT for c* direction */
-	gctx->func_c_ptr = malloc(sinc_lut_size);
-	gctx->func_c_ptr[0] = nc;
-	for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
-		double x, val;
-		x = (double)i/SINC_LUT_ELEMENTS;
-		val = fabs(sin(M_PI*nc*x)/sin(M_PI*x));
-		gctx->func_c_ptr[i] = val;
-	}
-	gctx->func_c = clCreateImage2D(gctx->ctx,
-	                               CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
-	                               &fmt, SINC_LUT_ELEMENTS, 1, 0,
-	                               gctx->func_c_ptr, &err);
-	clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->func_c);
-	if ( err != CL_SUCCESS ) {
-		ERROR("Couldn't set arg 18: %s\n", clError(err));
-		return NULL;
-	}
-
 	STATUS("done\n");
 
+	gctx->max_sinc_lut = 0;
+	gctx->sinc_lut_ptrs = NULL;
+	gctx->sinc_luts = NULL;
+
 	clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE,
 	                sizeof(size_t), &maxwgsize, NULL);
 	STATUS("Maximum work group size = %lli\n", (long long int)maxwgsize);
@@ -455,16 +472,22 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image,
 
 void cleanup_gpu(struct gpu_context *gctx)
 {
+	int i;
+
 	clReleaseProgram(gctx->prog);
 	clReleaseMemObject(gctx->diff);
 	clReleaseMemObject(gctx->tt);
 	clReleaseMemObject(gctx->sfacs);
-	clReleaseMemObject(gctx->func_a);
-	clReleaseMemObject(gctx->func_b);
-	clReleaseMemObject(gctx->func_c);
-	free(gctx->func_a_ptr);
-	free(gctx->func_b_ptr);
-	free(gctx->func_c_ptr);
+
+	/* Release LUTs */
+	for ( i=1; i<=gctx->max_sinc_lut; i++ ) {
+		if ( gctx->sinc_lut_ptrs[i-1] != NULL ) {
+			STATUS("freeing %i\n", i-1);
+			clReleaseMemObject(gctx->sinc_luts[i-1]);
+			free(gctx->sinc_lut_ptrs[i-1]);
+		}
+	}
+
 	clReleaseCommandQueue(gctx->cq);
 	clReleaseContext(gctx->ctx);
 	free(gctx);
author	Thomas White <taw@physics.org>	2010-03-10 12:14:25 +0100
committer	Thomas White <taw@physics.org>	2010-03-10 12:14:25 +0100
commit	bda63e5f6eeed249f0effb4f990e887b3912dad6 (patch)
tree	42070462d4c14d9e3b2cdc657a12c1f374d8d9c3 /src/diffraction-gpu.c
parent	fd8f9222599951749edf3c6ba27289da04d84b82 (diff)