7 files changed, 1312 insertions, 0 deletions
diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c
new file mode 100644
index 00000000..c365cecb
--- /dev/null
+++ b/src/diffraction-gpu.c
@@ -0,0 +1,530 @@
+/*
+ * diffraction-gpu.c
+ *
+ * Calculate diffraction patterns by Fourier methods (GPU version)
+ *
+ * (c) 2006-2011 Thomas White <taw@physics.org>
+ *
+ * Part of CrystFEL - crystallography with a FEL
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdlib.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <complex.h>
+
+#ifdef HAVE_CL_CL_H
+#include <CL/cl.h>
+#else
+#include <cl.h>
+#endif
+
+#include "image.h"
+#include "utils.h"
+#include "cell.h"
+#include "diffraction.h"
+#include "cl-utils.h"
+#include "beam-parameters.h"
+#include "pattern_sim.h"
+
+
+#define SAMPLING (4)
+#define BWSAMPLING (10)
+#define DIVSAMPLING (1)
+#define SINC_LUT_ELEMENTS (4096)
+
+
+struct gpu_context
+{
+	cl_context ctx;
+	cl_command_queue cq;
+	cl_program prog;
+	cl_kernel kern;
+	cl_mem intensities;
+	cl_mem flags;
+
+	/* Array of sinc LUTs */
+	cl_mem *sinc_luts;
+	cl_float **sinc_lut_ptrs;
+	int max_sinc_lut;  /* Number of LUTs, i.e. one greater than the maximum
+	                    * index.  This equals the highest allowable "n". */
+};
+
+
+static void check_sinc_lut(struct gpu_context *gctx, int n)
+{
+	cl_int err;
+	cl_image_format fmt;
+	int i;
+
+	if ( n > gctx->max_sinc_lut ) {
+
+		gctx->sinc_luts = realloc(gctx->sinc_luts,
+		                          n*sizeof(*gctx->sinc_luts));
+		gctx->sinc_lut_ptrs = realloc(gctx->sinc_lut_ptrs,
+		                              n*sizeof(*gctx->sinc_lut_ptrs));
+
+		for ( i=gctx->max_sinc_lut; i<n; i++ ) {
+			gctx->sinc_lut_ptrs[i] = NULL;
+		}
+
+		gctx->max_sinc_lut = n;
+	}
+
+	if ( gctx->sinc_lut_ptrs[n-1] != NULL ) return;
+
+	/* Create a new sinc LUT */
+	gctx->sinc_lut_ptrs[n-1] = malloc(SINC_LUT_ELEMENTS*sizeof(cl_float));
+	gctx->sinc_lut_ptrs[n-1][0] = n;
+	if ( n == 1 ) {
+		for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
+			gctx->sinc_lut_ptrs[n-1][i] = 1.0;
+		}
+	} else {
+		for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
+			double x, val;
+			x = (double)i/SINC_LUT_ELEMENTS;
+			val = fabs(sin(M_PI*n*x)/sin(M_PI*x));
+			gctx->sinc_lut_ptrs[n-1][i] = val;
+		}
+	}
+
+	fmt.image_channel_order = CL_INTENSITY;
+	fmt.image_channel_data_type = CL_FLOAT;
+
+	gctx->sinc_luts[n-1] = clCreateImage2D(gctx->ctx,
+	                                CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+	                                &fmt, SINC_LUT_ELEMENTS, 1, 0,
+	                                gctx->sinc_lut_ptrs[n-1], &err);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't create LUT for %i\n", n);
+		return;
+	}
+}
+
+
+static int set_arg_float(struct gpu_context *gctx, int idx, float val)
+{
+	cl_int err;
+	err = clSetKernelArg(gctx->kern, idx, sizeof(cl_float), &val);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't set kernel argument %i: %s\n",
+		      idx, clError(err));
+		return 1;
+	}
+
+	return 0;
+}
+
+
+static int set_arg_int(struct gpu_context *gctx, int idx, int val)
+{
+	cl_int err;
+
+	err = clSetKernelArg(gctx->kern, idx, sizeof(cl_int), &val);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't set kernel argument %i: %s\n",
+		      idx, clError(err));
+		return 1;
+	}
+
+	return 0;
+}
+
+
+static int set_arg_mem(struct gpu_context *gctx, int idx, cl_mem val)
+{
+	cl_int err;
+
+	err = clSetKernelArg(gctx->kern, idx, sizeof(cl_mem), &val);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't set kernel argument %i: %s\n",
+		      idx, clError(err));
+		return 1;
+	}
+
+	return 0;
+}
+
+
+void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
+                         int na, int nb, int nc, UnitCell *ucell)
+{
+	cl_int err;
+	double ax, ay, az;
+	double bx, by, bz;
+	double cx, cy, cz;
+	float klow, khigh;
+	int i;
+	cl_float16 cell;
+	cl_int4 ncells;
+	const int sampling = SAMPLING;
+	cl_float bwstep;
+	int n_inf = 0;
+	int n_neg = 0;
+	cl_float divxlow, divxstep;
+	cl_float divylow, divystep;
+	int n_nan = 0;
+	int sprod;
+
+	if ( gctx == NULL ) {
+		ERROR("GPU setup failed.\n");
+		return;
+	}
+
+	cell_get_cartesian(ucell, &ax, &ay, &az, &bx, &by, &bz, &cx, &cy, &cz);
+	cell.s[0] = ax;  cell.s[1] = ay;  cell.s[2] = az;
+	cell.s[3] = bx;  cell.s[4] = by;  cell.s[5] = bz;
+	cell.s[6] = cx;  cell.s[7] = cy;  cell.s[8] = cz;
+
+	/* Calculate wavelength */
+	klow = 1.0/(image->lambda*(1.0 + image->beam->bandwidth/2.0));
+	khigh = 1.0/(image->lambda*(1.0 - image->beam->bandwidth/2.0));
+	bwstep = (khigh-klow) / BWSAMPLING;
+
+	/* Calculate divergence stuff */
+	divxlow = -image->beam->divergence/2.0;
+	divylow = -image->beam->divergence/2.0;
+	divxstep = image->beam->divergence / DIVSAMPLING;
+	divystep = image->beam->divergence / DIVSAMPLING;
+
+	ncells.s[0] = na;
+	ncells.s[1] = nb;
+	ncells.s[2] = nc;
+	ncells.s[3] = 0;  /* unused */
+
+	/* Ensure all required LUTs are available */
+	check_sinc_lut(gctx, na);
+	check_sinc_lut(gctx, nb);
+	check_sinc_lut(gctx, nc);
+
+	if ( set_arg_float(gctx, 2, klow) ) return;
+	if ( set_arg_mem(gctx, 9, gctx->intensities) ) return;
+	if ( set_arg_int(gctx, 12, sampling) ) return;
+	if ( set_arg_float(gctx, 14, bwstep) ) return;
+	if ( set_arg_mem(gctx, 15, gctx->sinc_luts[na-1]) ) return;
+	if ( set_arg_mem(gctx, 16, gctx->sinc_luts[nb-1]) ) return;
+	if ( set_arg_mem(gctx, 17, gctx->sinc_luts[nc-1]) ) return;
+	if ( set_arg_mem(gctx, 18, gctx->flags) ) return;
+	if ( set_arg_float(gctx, 23, divxlow) ) return;
+	if ( set_arg_float(gctx, 24, divxstep) ) return;
+	if ( set_arg_int(gctx, 25, DIVSAMPLING) ) return;
+	if ( set_arg_float(gctx, 26, divylow) ) return;
+	if ( set_arg_float(gctx, 27, divystep) ) return;
+	if ( set_arg_int(gctx, 28, DIVSAMPLING) ) return;
+
+	/* Unit cell */
+	err = clSetKernelArg(gctx->kern, 8, sizeof(cl_float16), &cell);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't set unit cell: %s\n", clError(err));
+		return;
+	}
+
+	/* Local memory for reduction */
+	sprod = BWSAMPLING*SAMPLING*SAMPLING*DIVSAMPLING*DIVSAMPLING;
+	err = clSetKernelArg(gctx->kern, 13, sprod*sizeof(cl_float), NULL);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't set local memory: %s\n", clError(err));
+		return;
+	}
+
+	/* Allocate memory for the result */
+	image->data = calloc(image->width * image->height, sizeof(float));
+	image->twotheta = calloc(image->width * image->height, sizeof(double));
+
+	/* Iterate over panels */
+	for ( i=0; i<image->det->n_panels; i++ ) {
+
+		size_t dims[3];
+		size_t ldims[3] = {SAMPLING, SAMPLING,
+		                   BWSAMPLING * DIVSAMPLING * DIVSAMPLING};
+		struct panel *p;
+		cl_mem tt;
+		size_t tt_size;
+		cl_mem diff;
+		size_t diff_size;
+		float *diff_ptr;
+		float *tt_ptr;
+		int pan_width, pan_height;
+		int fs, ss;
+
+		p = &image->det->panels[i];
+
+		pan_width = 1 + p->max_fs - p->min_fs;
+		pan_height = 1 + p->max_ss - p->min_ss;
+
+		/* Buffer for the results of this panel */
+		diff_size = pan_width * pan_height * sizeof(cl_float);
+		diff = clCreateBuffer(gctx->ctx, CL_MEM_WRITE_ONLY,
+	                              diff_size, NULL, &err);
+		if ( err != CL_SUCCESS ) {
+			ERROR("Couldn't allocate diffraction memory\n");
+			return;
+		}
+		tt_size = pan_width * pan_height * sizeof(cl_float);
+		tt = clCreateBuffer(gctx->ctx, CL_MEM_WRITE_ONLY, tt_size,
+			            NULL, &err);
+		if ( err != CL_SUCCESS ) {
+			ERROR("Couldn't allocate twotheta memory\n");
+			return;
+		}
+
+		if ( set_arg_mem(gctx, 0, diff) ) return;
+		if ( set_arg_mem(gctx, 1, tt) ) return;
+		if ( set_arg_int(gctx, 3, pan_width) ) return;
+		if ( set_arg_float(gctx, 4, p->cnx) ) return;
+		if ( set_arg_float(gctx, 5, p->cny) ) return;
+		if ( set_arg_float(gctx, 6, p->res) ) return;
+		if ( set_arg_float(gctx, 7, p->clen) ) return;
+		if ( set_arg_int(gctx, 10, p->min_fs) ) return;
+		if ( set_arg_int(gctx, 11, p->min_ss) ) return;
+		if ( set_arg_float(gctx, 19, p->fsx) ) return;
+		if ( set_arg_float(gctx, 20, p->fsy) ) return;
+		if ( set_arg_float(gctx, 21, p->ssx) ) return;
+		if ( set_arg_float(gctx, 22, p->ssy) ) return;
+
+		dims[0] = pan_width * SAMPLING;
+		dims[1] = pan_height * SAMPLING;
+		dims[2] = BWSAMPLING * DIVSAMPLING * DIVSAMPLING;
+
+		err = clEnqueueNDRangeKernel(gctx->cq, gctx->kern, 3, NULL,
+		                             dims, ldims, 0, NULL, NULL);
+		if ( err != CL_SUCCESS ) {
+			ERROR("Couldn't enqueue diffraction kernel: %s\n",
+			      clError(err));
+			return;
+		}
+
+		clFinish(gctx->cq);
+
+		diff_ptr = clEnqueueMapBuffer(gctx->cq, diff, CL_TRUE,
+		                              CL_MAP_READ, 0, diff_size,
+		                              0, NULL, NULL, &err);
+		if ( err != CL_SUCCESS ) {
+			ERROR("Couldn't map diffraction buffer: %s\n",
+			      clError(err));
+			return;
+		}
+		tt_ptr = clEnqueueMapBuffer(gctx->cq, tt, CL_TRUE, CL_MAP_READ,
+		                            0, tt_size, 0, NULL, NULL, &err);
+		if ( err != CL_SUCCESS ) {
+			ERROR("Couldn't map tt buffer\n");
+			return;
+		}
+
+		for ( fs=0; fs<pan_width; fs++ ) {
+		for ( ss=0; ss<pan_height; ss++ ) {
+
+			float val, tt;
+			int tfs, tss;
+
+			val = diff_ptr[fs + pan_width*ss];
+			if ( isinf(val) ) n_inf++;
+			if ( val < 0.0 ) n_neg++;
+			if ( isnan(val) ) n_nan++;
+			tt = tt_ptr[fs + pan_width*ss];
+
+			tfs = p->min_fs + fs;
+			tss = p->min_ss + ss;
+			image->data[tfs + image->width*tss] = val;
+			image->twotheta[tfs + image->width*tss] = tt;
+
+		}
+		}
+
+		clEnqueueUnmapMemObject(gctx->cq, diff, diff_ptr,
+		                        0, NULL, NULL);
+		clEnqueueUnmapMemObject(gctx->cq, tt, tt_ptr,
+		                        0, NULL, NULL);
+
+		clReleaseMemObject(diff);
+		clReleaseMemObject(tt);
+
+	}
+
+
+	if ( n_neg + n_inf + n_nan ) {
+		ERROR("WARNING: The GPU calculation produced %i negative"
+		      " values, %i infinities and %i NaNs.\n",
+		      n_neg, n_inf, n_nan);
+	}
+
+}
+
+
+/* Setup the OpenCL stuff, create buffers, load the structure factor table */
+struct gpu_context *setup_gpu(int no_sfac,
+                              const double *intensities, unsigned char *flags,
+                              const char *sym, int dev_num)
+{
+	struct gpu_context *gctx;
+	cl_uint nplat;
+	cl_platform_id platforms[8];
+	cl_context_properties prop[3];
+	cl_int err;
+	cl_device_id dev;
+	size_t intensities_size;
+	float *intensities_ptr;
+	size_t flags_size;
+	float *flags_ptr;
+	size_t maxwgsize;
+	int i;
+	char cflags[512] = "";
+
+	STATUS("Setting up GPU...\n");
+
+	err = clGetPlatformIDs(8, platforms, &nplat);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't get platform IDs: %i\n", err);
+		return NULL;
+	}
+	if ( nplat == 0 ) {
+		ERROR("Couldn't find at least one platform!\n");
+		return NULL;
+	}
+	prop[0] = CL_CONTEXT_PLATFORM;
+	prop[1] = (cl_context_properties)platforms[0];
+	prop[2] = 0;
+
+	gctx = malloc(sizeof(*gctx));
+	gctx->ctx = clCreateContextFromType(prop, CL_DEVICE_TYPE_GPU,
+	                                    NULL, NULL, &err);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't create OpenCL context: %i\n", err);
+		free(gctx);
+		return NULL;
+	}
+
+	dev = get_cl_dev(gctx->ctx, dev_num);
+
+	gctx->cq = clCreateCommandQueue(gctx->ctx, dev, 0, &err);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't create OpenCL command queue\n");
+		free(gctx);
+		return NULL;
+	}
+
+	/* Create a single-precision version of the scattering factors */
+	intensities_size = IDIM*IDIM*IDIM*sizeof(cl_float);
+	intensities_ptr = malloc(intensities_size);
+	if ( intensities != NULL ) {
+		for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
+			intensities_ptr[i] = intensities[i];
+		}
+	} else {
+		for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
+			intensities_ptr[i] = 1e5;
+		}
+		strncat(cflags, "-DFLAT_INTENSITIES ", 511-strlen(cflags));
+	}
+	gctx->intensities = clCreateBuffer(gctx->ctx,
+	                             CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+	                             intensities_size, intensities_ptr, &err);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't allocate intensities memory\n");
+		free(gctx);
+		return NULL;
+	}
+	free(intensities_ptr);
+
+	if ( sym != NULL ) {
+		if ( strcmp(sym, "1") == 0 ) {
+			strncat(cflags, "-DPG1 ", 511-strlen(cflags));
+		} else if ( strcmp(sym, "-1") == 0 ) {
+			strncat(cflags, "-DPG1BAR ", 511-strlen(cflags));
+		} else if ( strcmp(sym, "6/mmm") == 0 ) {
+			strncat(cflags, "-DPG6MMM ", 511-strlen(cflags));
+		} else if ( strcmp(sym, "6") == 0 ) {
+			strncat(cflags, "-DPG6 ", 511-strlen(cflags));
+		} else if ( strcmp(sym, "6/m") == 0 ) {
+			strncat(cflags, "-DPG6M ", 511-strlen(cflags));
+		} else {
+			ERROR("Sorry!  Point group '%s' is not currently"
+			      " supported on the GPU."
+			      " I'm using '1' instead.\n", sym);
+			strncat(cflags, "-DPG1 ", 511-strlen(cflags));
+		}
+	} else {
+		if ( intensities != NULL ) {
+			ERROR("You gave me an intensities file but no point"
+			      " group.  I'm assuming '1'.\n");
+			strncat(cflags, "-DPG1 ", 511-strlen(cflags));
+		}
+	}
+
+	/* Create a flag array */
+	flags_size = IDIM*IDIM*IDIM*sizeof(cl_float);
+	flags_ptr = malloc(flags_size);
+	if ( flags != NULL ) {
+		for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
+			flags_ptr[i] = flags[i];
+		}
+	} else {
+		for ( i=0; i<IDIM*IDIM*IDIM; i++ ) {
+			flags_ptr[i] = 1.0;
+		}
+	}
+	gctx->flags = clCreateBuffer(gctx->ctx,
+	                             CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
+	                             flags_size, flags_ptr, &err);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't allocate flag buffer\n");
+		free(gctx);
+		return NULL;
+	}
+	free(flags_ptr);
+
+	gctx->prog = load_program(DATADIR"/crystfel/diffraction.cl", gctx->ctx,
+	                          dev, &err, cflags);
+	if ( err != CL_SUCCESS ) {
+		free(gctx);
+		return NULL;
+	}
+
+	gctx->kern = clCreateKernel(gctx->prog, "diffraction", &err);
+	if ( err != CL_SUCCESS ) {
+		ERROR("Couldn't create kernel\n");
+		free(gctx);
+		return NULL;
+	}
+
+	gctx->max_sinc_lut = 0;
+	gctx->sinc_lut_ptrs = NULL;
+	gctx->sinc_luts = NULL;
+
+	clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE,
+	                sizeof(size_t), &maxwgsize, NULL);
+	STATUS("Maximum work group size = %lli\n", (long long int)maxwgsize);
+
+	return gctx;
+}
+
+
+void cleanup_gpu(struct gpu_context *gctx)
+{
+	int i;
+
+	clReleaseProgram(gctx->prog);
+	clReleaseMemObject(gctx->intensities);
+
+	/* Release LUTs */
+	for ( i=1; i<=gctx->max_sinc_lut; i++ ) {
+		if ( gctx->sinc_lut_ptrs[i-1] != NULL ) {
+			clReleaseMemObject(gctx->sinc_luts[i-1]);
+			free(gctx->sinc_lut_ptrs[i-1]);
+		}
+	}
+
+	free(gctx->sinc_luts);
+	free(gctx->sinc_lut_ptrs);
+
+	clReleaseCommandQueue(gctx->cq);
+	clReleaseContext(gctx->ctx);
+	free(gctx);
+}
diff --git a/src/diffraction-gpu.h b/src/diffraction-gpu.h
new file mode 100644
index 00000000..a3bde4e1
--- /dev/null
+++ b/src/diffraction-gpu.h
@@ -0,0 +1,57 @@
+/*
+ * diffraction-gpu.h
+ *
+ * Calculate diffraction patterns by Fourier methods (GPU version)
+ *
+ * (c) 2006-2011 Thomas White <taw@physics.org>
+ *
+ * Part of CrystFEL - crystallography with a FEL
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifndef DIFFRACTION_GPU_H
+#define DIFFRACTION_GPU_H
+
+#include "image.h"
+#include "cell.h"
+
+struct gpu_context;
+
+#if HAVE_OPENCL
+
+extern void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
+                                int na, int nb, int nc, UnitCell *ucell);
+extern struct gpu_context *setup_gpu(int no_sfac,
+                                     const double *intensities,
+                                     const unsigned char *flags,
+                                     const char *sym, int dev_num);
+extern void cleanup_gpu(struct gpu_context *gctx);
+
+#else
+
+static void get_diffraction_gpu(struct gpu_context *gctx, struct image *image,
+                                int na, int nb, int nc, UnitCell *ucell)
+{
+	/* Do nothing */
+	ERROR("This copy of CrystFEL was not compiled with OpenCL support.\n");
+}
+
+static struct gpu_context *setup_gpu(int no_sfac,
+                                     const double *intensities,
+                                     const unsigned char *flags,
+                                     const char *sym, int dev_num)
+{
+	return NULL;
+}
+
+static void cleanup_gpu(struct gpu_context *gctx)
+{
+}
+
+#endif
+
+#endif	/* DIFFRACTION_GPU_H */
diff --git a/src/diffraction.c b/src/diffraction.c
new file mode 100644
index 00000000..de994133
--- /dev/null
+++ b/src/diffraction.c
@@ -0,0 +1,464 @@
+/*
+ * diffraction.c
+ *
+ * Calculate diffraction patterns by Fourier methods
+ *
+ * (c) 2006-2011 Thomas White <taw@physics.org>
+ *
+ * Part of CrystFEL - crystallography with a FEL
+ *
+ */
+
+
+#include <stdlib.h>
+#include <math.h>
+#include <stdio.h>
+#include <string.h>
+#include <complex.h>
+#include <assert.h>
+#include <fenv.h>
+
+#include "image.h"
+#include "utils.h"
+#include "cell.h"
+#include "diffraction.h"
+#include "beam-parameters.h"
+#include "symmetry.h"
+#include "pattern_sim.h"
+
+
+#define SAMPLING (4)
+#define BWSAMPLING (10)
+#define DIVSAMPLING (1)
+#define SINC_LUT_ELEMENTS (4096)
+
+
+static double *get_sinc_lut(int n)
+{
+	int i;
+	double *lut;
+
+	lut = malloc(SINC_LUT_ELEMENTS*sizeof(double));
+	lut[0] = n;
+	if ( n == 1 ) {
+		for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
+			lut[i] = 1.0;
+		}
+	} else {
+		for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) {
+			double x, val;
+			x = (double)i/SINC_LUT_ELEMENTS;
+			val = fabs(sin(M_PI*n*x)/sin(M_PI*x));
+			lut[i] = val;
+		}
+	}
+
+	return lut;
+}
+
+
+static double interpolate_lut(double *lut, double val)
+{
+	double i, pos, f;
+	unsigned int low, high;
+
+	pos = SINC_LUT_ELEMENTS * modf(fabs(val), &i);
+	low = (int)pos;  /* Discard fractional part */
+	high = low + 1;
+	f = modf(pos, &i);  /* Fraction */
+	if ( high == SINC_LUT_ELEMENTS ) high = 0;
+
+	return (1.0-f)*lut[low] + f*lut[high];
+}
+
+
+static double lattice_factor(struct rvec q, double ax, double ay, double az,
+                                            double bx, double by, double bz,
+                                            double cx, double cy, double cz,
+                                            double *lut_a, double *lut_b,
+                                            double *lut_c)
+{
+	struct rvec Udotq;
+	double f1, f2, f3;
+
+	Udotq.u = ax*q.u + ay*q.v + az*q.w;
+	Udotq.v = bx*q.u + by*q.v + bz*q.w;
+	Udotq.w = cx*q.u + cy*q.v + cz*q.w;
+
+	f1 = interpolate_lut(lut_a, Udotq.u);
+	f2 = interpolate_lut(lut_b, Udotq.v);
+	f3 = interpolate_lut(lut_c, Udotq.w);
+
+	return f1 * f2 * f3;
+}
+
+
+static double sym_lookup_intensity(const double *intensities,
+                                   const unsigned char *flags,
+                                   const SymOpList *sym,
+                                   signed int h, signed int k, signed int l)
+{
+	int i;
+	double ret = 0.0;
+
+	for ( i=0; i<num_equivs(sym, NULL); i++ ) {
+
+		signed int he;
+		signed int ke;
+		signed int le;
+		double f, val;
+
+		get_equiv(sym, NULL, i, h, k, l, &he, &ke, &le);
+
+		f = (double)lookup_flag(flags, he, ke, le);
+		val = lookup_intensity(intensities, he, ke, le);
+
+		ret += f*val;
+
+	}
+
+	return ret;
+}
+
+
+static double sym_lookup_phase(const double *phases,
+                               const unsigned char *flags, const SymOpList *sym,
+                               signed int h, signed int k, signed int l)
+{
+	int i;
+	double ret = 0.0;
+
+	for ( i=0; i<num_equivs(sym, NULL); i++ ) {
+
+		signed int he;
+		signed int ke;
+		signed int le;
+		double f, val;
+
+		get_equiv(sym, NULL, i, h, k, l, &he, &ke, &le);
+
+		f = (double)lookup_flag(flags, he, ke, le);
+		val = lookup_phase(phases, he, ke, le);
+
+		ret += f*val;
+
+	}
+
+	return ret;
+}
+
+
+static double interpolate_linear(const double *ref, const unsigned char *flags,
+                                 const SymOpList *sym, float hd,
+                                 signed int k, signed int l)
+{
+	signed int h;
+	double val1, val2;
+	float f;
+
+	h = (signed int)hd;
+	if ( hd < 0.0 ) h -= 1;
+	f = hd - (float)h;
+	assert(f >= 0.0);
+
+	val1 = sym_lookup_intensity(ref, flags, sym, h, k, l);
+	val2 = sym_lookup_intensity(ref, flags, sym, h+1, k, l);
+
+	val1 = val1;
+	val2 = val2;
+
+	return (1.0-f)*val1 + f*val2;
+}
+
+
+static double interpolate_bilinear(const double *ref,
+                                   const unsigned char *flags,
+                                   const SymOpList *sym,
+                                   float hd, float kd, signed int l)
+{
+	signed int k;
+	double val1, val2;
+	float f;
+
+	k = (signed int)kd;
+	if ( kd < 0.0 ) k -= 1;
+	f = kd - (float)k;
+	assert(f >= 0.0);
+
+	val1 = interpolate_linear(ref, flags, sym, hd, k, l);
+	val2 = interpolate_linear(ref, flags, sym, hd, k+1, l);
+
+	return (1.0-f)*val1 + f*val2;
+}
+
+
+static double interpolate_intensity(const double *ref,
+                                    const unsigned char *flags,
+                                    const SymOpList *sym,
+                                    float hd, float kd, float ld)
+{
+	signed int l;
+	double val1, val2;
+	float f;
+
+	l = (signed int)ld;
+	if ( ld < 0.0 ) l -= 1;
+	f = ld - (float)l;
+	assert(f >= 0.0);
+
+	val1 = interpolate_bilinear(ref, flags, sym, hd, kd, l);
+	val2 = interpolate_bilinear(ref, flags, sym, hd, kd, l+1);
+
+	return (1.0-f)*val1 + f*val2;
+}
+
+
+static double complex interpolate_phased_linear(const double *ref,
+                                                const double *phases,
+                                                const unsigned char *flags,
+                                                const SymOpList *sym,
+                                                float hd,
+                                                signed int k, signed int l)
+{
+	signed int h;
+	double val1, val2;
+	float f;
+	double ph1, ph2;
+	double re1, re2, im1, im2;
+	double re, im;
+
+	h = (signed int)hd;
+	if ( hd < 0.0 ) h -= 1;
+	f = hd - (float)h;
+	assert(f >= 0.0);
+
+	val1 = sym_lookup_intensity(ref, flags, sym, h, k, l);
+	val2 = sym_lookup_intensity(ref, flags, sym, h+1, k, l);
+	ph1 = sym_lookup_phase(phases, flags, sym, h, k, l);
+	ph2 = sym_lookup_phase(phases, flags, sym, h+1, k, l);
+
+	val1 = val1;
+	val2 = val2;
+
+	/* Calculate real and imaginary parts */
+	re1 = val1 * cos(ph1);
+	im1 = val1 * sin(ph1);
+	re2 = val2 * cos(ph2);
+	im2 = val2 * sin(ph2);
+
+	re = (1.0-f)*re1 + f*re2;
+	im = (1.0-f)*im1 + f*im2;
+
+	return re + im*I;
+}
+
+
+static double complex interpolate_phased_bilinear(const double *ref,
+                                                  const double *phases,
+                                                  const unsigned char *flags,
+                                                  const SymOpList *sym,
+                                                  float hd, float kd,
+                                                  signed int l)
+{
+	signed int k;
+	double complex val1, val2;
+	float f;
+
+	k = (signed int)kd;
+	if ( kd < 0.0 ) k -= 1;
+	f = kd - (float)k;
+	assert(f >= 0.0);
+
+	val1 = interpolate_phased_linear(ref, phases, flags, sym, hd, k, l);
+	val2 = interpolate_phased_linear(ref, phases, flags, sym, hd, k+1, l);
+
+	return (1.0-f)*val1 + f*val2;
+}
+
+
+static double interpolate_phased_intensity(const double *ref,
+                                           const double *phases,
+                                           const unsigned char *flags,
+                                           const SymOpList *sym,
+                                           float hd, float kd, float ld)
+{
+	signed int l;
+	double complex val1, val2;
+	float f;
+
+	l = (signed int)ld;
+	if ( ld < 0.0 ) l -= 1;
+	f = ld - (float)l;
+	assert(f >= 0.0);
+
+	val1 = interpolate_phased_bilinear(ref, phases, flags, sym,
+	                                   hd, kd, l);
+	val2 = interpolate_phased_bilinear(ref, phases, flags, sym,
+	                                   hd, kd, l+1);
+
+	return cabs((1.0-f)*val1 + f*val2);
+}
+
+
+/* Look up the structure factor for the nearest Bragg condition */
+static double molecule_factor(const double *intensities, const double *phases,
+                              const unsigned char *flags, struct rvec q,
+                              double ax, double ay, double az,
+                              double bx, double by, double bz,
+                              double cx, double cy, double cz,
+                              GradientMethod m, const SymOpList *sym)
+{
+	float hd, kd, ld;
+	signed int h, k, l;
+	double r;
+
+	hd = q.u * ax + q.v * ay + q.w * az;
+	kd = q.u * bx + q.v * by + q.w * bz;
+	ld = q.u * cx + q.v * cy + q.w * cz;
+
+	/* No flags -> flat intensity distribution */
+	if ( flags == NULL ) return 1.0e5;
+
+	switch ( m ) {
+	case GRADIENT_MOSAIC :
+		fesetround(1);  /* Round to nearest */
+		h = (signed int)rint(hd);
+		k = (signed int)rint(kd);
+		l = (signed int)rint(ld);
+		if ( abs(h) > INDMAX ) r = 0.0;
+		else if ( abs(k) > INDMAX ) r = 0.0;
+		else if ( abs(l) > INDMAX ) r = 0.0;
+		else r = sym_lookup_intensity(intensities, flags, sym, h, k, l);
+		break;
+	case GRADIENT_INTERPOLATE :
+		r = interpolate_intensity(intensities, flags, sym, hd, kd, ld);
+		break;
+	case GRADIENT_PHASED :
+		r = interpolate_phased_intensity(intensities, phases, flags,
+		                                 sym, hd, kd, ld);
+		break;
+	default:
+		ERROR("This gradient method not implemented yet.\n");
+		exit(1);
+	}
+
+	return r;
+}
+
+
+void get_diffraction(struct image *image, int na, int nb, int nc,
+                     const double *intensities, const double *phases,
+                     const unsigned char *flags, UnitCell *cell,
+                     GradientMethod m, const SymOpList *sym)
+{
+	unsigned int fs, ss;
+	double ax, ay, az;
+	double bx, by, bz;
+	double cx, cy, cz;
+	float klow, khigh, bwstep;
+	double *lut_a;
+	double *lut_b;
+	double *lut_c;
+	double divxlow, divylow, divxstep, divystep;
+
+	cell_get_cartesian(cell, &ax, &ay, &az, &bx, &by, &bz, &cx, &cy, &cz);
+
+	/* Allocate (and zero) the "diffraction array" */
+	image->data = calloc(image->width * image->height, sizeof(float));
+
+	/* Needed later for Lorentz calculation */
+	image->twotheta = malloc(image->width * image->height * sizeof(double));
+
+	klow = 1.0/(image->lambda*(1.0 + image->beam->bandwidth/2.0));
+	khigh = 1.0/(image->lambda*(1.0 - image->beam->bandwidth/2.0));
+	bwstep = (khigh-klow) / BWSAMPLING;
+
+	divxlow = -image->beam->divergence/2.0;
+	divylow = -image->beam->divergence/2.0;
+	divxstep = image->beam->divergence / DIVSAMPLING;
+	divystep = image->beam->divergence / DIVSAMPLING;
+
+	lut_a = get_sinc_lut(na);
+	lut_b = get_sinc_lut(nb);
+	lut_c = get_sinc_lut(nc);
+
+	for ( fs=0; fs<image->width; fs++ ) {
+	for ( ss=0; ss<image->height; ss++ ) {
+
+		int fs_step, ss_step, kstep;
+		int divxval, divyval;
+		int idx = fs + image->width*ss;
+
+		for ( fs_step=0; fs_step<SAMPLING; fs_step++ ) {
+		for ( ss_step=0; ss_step<SAMPLING; ss_step++ ) {
+		for ( kstep=0; kstep<BWSAMPLING; kstep++ ) {
+		for ( divxval=0; divxval<DIVSAMPLING; divxval++ ) {
+		for ( divyval=0; divyval<DIVSAMPLING; divyval++ ) {
+
+			double k;
+			double intensity;
+			double f_lattice, I_lattice;
+			double I_molecule;
+			struct rvec q, qn;
+			double twotheta;
+			const double dfs = (double)fs
+			                    + ((double)fs_step / SAMPLING);
+			const double dss = (double)ss
+			                    + ((double)ss_step / SAMPLING);
+
+			double xdiv = divxlow + divxstep*(double)divxval;
+			double ydiv = divylow + divystep*(double)divyval;
+
+			/* Calculate k this time round */
+			k = klow + (double)kstep * bwstep;
+
+			qn = get_q(image, dfs, dss, &twotheta, k);
+
+			/* x divergence */
+			q.u =  qn.u*cos(xdiv) +qn.w*sin(xdiv);
+			q.v =  qn.v;
+			q.w = -qn.u*sin(xdiv) +qn.w*cos(xdiv);
+
+			qn = q;
+
+			/* y divergence */
+			q.v =  qn.v*cos(ydiv) +qn.w*sin(ydiv);
+			q.w = -qn.v*sin(ydiv) +qn.w*cos(ydiv);
+
+			f_lattice = lattice_factor(q, ax, ay, az,
+			                              bx, by, bz,
+			                              cx, cy, cz,
+			                              lut_a, lut_b, lut_c);
+
+			I_molecule = molecule_factor(intensities,
+			                             phases, flags, q,
+			                             ax,ay,az,bx,by,bz,cx,cy,cz,
+			                             m, sym);
+
+			I_lattice = pow(f_lattice, 2.0);
+			intensity = I_lattice * I_molecule;
+
+			image->data[idx] += intensity;
+
+			if ( fs_step + ss_step + kstep == 0 ) {
+				image->twotheta[idx] = twotheta;
+			}
+
+		}
+		}
+		}
+		}
+		}
+
+		image->data[idx] /= (SAMPLING*SAMPLING*BWSAMPLING
+		                     *DIVSAMPLING*DIVSAMPLING);
+
+
+	}
+	progress_bar(fs, image->width-1, "Calculating diffraction");
+	}
+
+	free(lut_a);
+	free(lut_b);
+	free(lut_c);
+}
diff --git a/src/diffraction.h b/src/diffraction.h
new file mode 100644
index 00000000..f71d3cce
--- /dev/null
+++ b/src/diffraction.h
@@ -0,0 +1,34 @@
+/*
+ * diffraction.h
+ *
+ * Calculate diffraction patterns by Fourier methods
+ *
+ * (c) 2006-2011 Thomas White <taw@physics.org>
+ *
+ * Part of CrystFEL - crystallography with a FEL
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifndef DIFFRACTION_H
+#define DIFFRACTION_H
+
+#include "image.h"
+#include "cell.h"
+#include "symmetry.h"
+
+typedef enum {
+	GRADIENT_MOSAIC,
+	GRADIENT_INTERPOLATE,
+	GRADIENT_PHASED
+} GradientMethod;
+
+extern void get_diffraction(struct image *image, int na, int nb, int nc,
+                            const double *intensities, const double *phases,
+                            const unsigned char *flags, UnitCell *cell,
+                            GradientMethod m, const SymOpList *sym);
+
+#endif	/* DIFFRACTION_H */
diff --git a/src/list_tmp.h b/src/list_tmp.h
new file mode 100644
index 00000000..a524b2f9
--- /dev/null
+++ b/src/list_tmp.h
@@ -0,0 +1,106 @@
+/*
+ * Template for creating indexed 3D lists of a given type, usually indexed
+ * as signed h,k,l values where -INDMAX<={h,k,l}<=+INDMAX.
+ *
+ * These are used, for example, for:
+ *  - a list of 'double complex' values for storing structure factors,
+ *  - a list of 'double' values for storing reflection intensities,
+ *  - a list of 'unsigned int' values for counts of some sort.
+ *
+ * When LABEL and TYPE are #defined appropriately, including this header
+ * defines functions such as:
+ *  - new_list_<LABEL>(), for creating a new list of the given type,
+ *  - set_<LABEL>(), for setting a value in a list,
+ *  - lookup_<LABEL>(), for retrieving values from a list,
+ * ..and so on.
+ *
+ * See src/utils.h for more information.
+ *
+ */
+
+#include <stdio.h>
+
+#define ERROR_T(...) fprintf(stderr, __VA_ARGS__)
+
+static inline void LABEL(integrate)(TYPE *ref, signed int h,
+                                    signed int k, signed int l,
+                                    TYPE i)
+{
+	int idx;
+
+	if ( (abs(h) > INDMAX) || (abs(k) > INDMAX) || (abs(l) > INDMAX) ) {
+		ERROR_T("\nReflection %i %i %i is out of range!\n", h, k, l);
+		ERROR_T("You need to re-configure INDMAX and re-run.\n");
+		exit(1);
+	}
+
+	if ( h < 0 ) h += IDIM;
+	if ( k < 0 ) k += IDIM;
+	if ( l < 0 ) l += IDIM;
+
+	idx = h + (IDIM*k) + (IDIM*IDIM*l);
+	ref[idx] += i;
+}
+
+
+static inline void LABEL(set)(TYPE *ref, signed int h,
+                              signed int k, signed int l,
+                              TYPE i)
+{
+	int idx;
+
+	if ( (abs(h) > INDMAX) || (abs(k) > INDMAX) || (abs(l) > INDMAX) ) {
+		ERROR_T("\nReflection %i %i %i is out of range!\n", h, k, l);
+		ERROR_T("You need to re-configure INDMAX and re-run.\n");
+		exit(1);
+	}
+
+	if ( h < 0 ) h += IDIM;
+	if ( k < 0 ) k += IDIM;
+	if ( l < 0 ) l += IDIM;
+
+	idx = h + (IDIM*k) + (IDIM*IDIM*l);
+	ref[idx] = i;
+}
+
+
+static inline TYPE LABEL(lookup)(const TYPE *ref, signed int h,
+                                 signed int k, signed int l)
+{
+	int idx;
+
+	if ( (abs(h) > INDMAX) || (abs(k) > INDMAX) || (abs(l) > INDMAX) ) {
+		ERROR_T("\nReflection %i %i %i is out of range!\n", h, k, l);
+		ERROR_T("You need to re-configure INDMAX and re-run.\n");
+		exit(1);
+	}
+
+	if ( h < 0 ) h += IDIM;
+	if ( k < 0 ) k += IDIM;
+	if ( l < 0 ) l += IDIM;
+
+	idx = h + (IDIM*k) + (IDIM*IDIM*l);
+	return ref[idx];
+}
+
+
+static inline TYPE *LABEL(new_list)(void)
+{
+	TYPE *r;
+	size_t r_size;
+	r_size = IDIM*IDIM*IDIM*sizeof(TYPE);
+	r = malloc(r_size);
+	memset(r, 0, r_size);
+	return r;
+}
+
+
+static inline void LABEL(zero_list)(TYPE *ref)
+{
+	memset(ref, 0, IDIM*IDIM*IDIM*sizeof(TYPE));
+}
+
+
+#undef LABEL
+#undef TYPE
+#undef ERROR_T
diff --git a/src/pattern_sim.c b/src/pattern_sim.c
index 758e9c85..88781f15 100644
--- a/src/pattern_sim.c
+++ b/src/pattern_sim.c
@@ -33,6 +33,7 @@
 #include "symmetry.h"
 #include "reflist.h"
 #include "reflist-utils.h"
+#include "pattern_sim.h"
 
 
 static void show_help(const char *s)
@@ -141,6 +142,76 @@ static void show_details()
 }
 
 
+static double *intensities_from_list(RefList *list)
+{
+	Reflection *refl;
+	RefListIterator *iter;
+	double *out = new_list_intensity();
+
+	for ( refl = first_refl(list, &iter);
+	      refl != NULL;
+	      refl = next_refl(refl, iter) ) {
+
+		signed int h, k, l;
+		double intensity = get_intensity(refl);
+
+		get_indices(refl, &h, &k, &l);
+
+		set_intensity(out, h, k, l, intensity);
+
+	}
+
+	return out;
+}
+
+
+static double *phases_from_list(RefList *list)
+{
+	Reflection *refl;
+	RefListIterator *iter;
+	double *out = new_list_phase();
+
+	for ( refl = first_refl(list, &iter);
+	      refl != NULL;
+	      refl = next_refl(refl, iter) ) {
+
+		signed int h, k, l;
+		double phase = get_phase(refl, NULL);
+
+		get_indices(refl, &h, &k, &l);
+
+		set_phase(out, h, k, l, phase);
+
+	}
+
+	return out;
+
+}
+
+
+static unsigned char *flags_from_list(RefList *list)
+{
+	Reflection *refl;
+	RefListIterator *iter;
+	unsigned char *out = new_list_flag();
+
+	for ( refl = first_refl(list, &iter);
+	      refl != NULL;
+	      refl = next_refl(refl, iter) ) {
+
+		signed int h, k, l;
+
+		get_indices(refl, &h, &k, &l);
+
+		set_flag(out, h, k, l, 1);
+
+	}
+
+	return out;
+
+}
+
+
 static struct quaternion read_quaternion()
 {
 	do {
diff --git a/src/pattern_sim.h b/src/pattern_sim.h
new file mode 100644
index 00000000..4476a34a
--- /dev/null
+++ b/src/pattern_sim.h
@@ -0,0 +1,50 @@
+/*
+ * pattern_sim.h
+ *
+ * Simulate diffraction patterns from small crystals
+ *
+ * (c) 2006-2011 Thomas White <taw@physics.org>
+ *
+ * Part of CrystFEL - crystallography with a FEL
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifndef PATTERN_SIM_H
+#define PATTERN_SIM_H
+
+
+/* Maxmimum index to hold values up to (can be increased if necessary)
+ * WARNING: Altering this value constitutes an ABI change, and means you must
+ * update data/diffraction.cl then recompile and reinstall everything. */
+#define INDMAX 140
+
+/* Array size */
+#define IDIM (INDMAX*2 +1)
+#define LIST_SIZE (IDIM*IDIM*IDIM)
+
+/* Create functions for storing reflection intensities indexed as h,k,l */
+#define LABEL(x) x##_intensity
+#define TYPE double
+#include "list_tmp.h"
+
+/* CAs above, but for phase values */
+#define LABEL(x) x##_phase
+#define TYPE double
+#include "list_tmp.h"
+
+/* As above, but for (unsigned) integer counts */
+#define LABEL(x) x##_count
+#define TYPE unsigned int
+#include "list_tmp.h"
+
+/* As above, but for simple flags */
+#define LABEL(x) x##_flag
+#define TYPE unsigned char
+#include "list_tmp.h"
+
+
+#endif	/* PATTERN_SIM_H */