diff options
-rw-r--r-- | data/diffraction.cl | 47 | ||||
-rw-r--r-- | src/diffraction-gpu.c | 102 | ||||
-rw-r--r-- | src/diffraction-gpu.h | 6 | ||||
-rw-r--r-- | src/indexamajig.c | 6 | ||||
-rw-r--r-- | src/pattern_sim.c | 4 |
5 files changed, 115 insertions, 50 deletions
diff --git a/data/diffraction.cl b/data/diffraction.cl index ebacb416..cc3e91bd 100644 --- a/data/diffraction.cl +++ b/data/diffraction.cl @@ -15,6 +15,15 @@ #define M_PI ((float)(3.14159265)) #endif + +const sampler_t sampler_a = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_REPEAT + | CLK_FILTER_LINEAR; +const sampler_t sampler_b = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_REPEAT + | CLK_FILTER_LINEAR; +const sampler_t sampler_c = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_REPEAT + | CLK_FILTER_LINEAR; + + float4 quat_rot(float4 q, float4 z) { float4 res; @@ -80,36 +89,23 @@ float range(float a) } -float lattice_factor(float16 cell, float4 q, int4 ncells) +float lattice_factor(float16 cell, float4 q, + read_only image2d_t func_a, + read_only image2d_t func_b, + read_only image2d_t func_c) { float f1, f2, f3, v; float4 Udotq; - const int na = ncells.s0; - const int nb = ncells.s1; - const int nc = ncells.s2; Udotq.x = cell.s0*q.x + cell.s1*q.y + cell.s2*q.z; Udotq.y = cell.s3*q.x + cell.s4*q.y + cell.s5*q.z; Udotq.z = cell.s6*q.x + cell.s7*q.y + cell.s8*q.z; - /* At exact Bragg condition, f1 = na */ - v = M_PI*Udotq.x; - f1 = sin(v*(float)na) / sin(v); - f1 = isnan(f1) ? na : f1; - - /* At exact Bragg condition, f2 = nb */ - v = M_PI*Udotq.y; - f2 = sin(v*(float)nb) / sin(v); - f2 = isnan(f2) ? nb : f2; - - /* At exact Bragg condition, f3 = nc */ - v = M_PI*Udotq.z; - f3 = sin(v*(float)nc) / sin(v); - f3 = isnan(f3) ? nc : f3; + /* Look up values from precalculated sinc() table */ + f1 = read_imagef(func_a, sampler_a, (float2)(Udotq.x, 0.0)).s0; + f2 = read_imagef(func_b, sampler_b, (float2)(Udotq.y, 0.0)).s0; + f3 = read_imagef(func_c, sampler_c, (float2)(Udotq.z, 0.0)).s0; - /* At exact Bragg condition, this will multiply the molecular - * part of the structure factor by the number of unit cells, - * as desired (more scattering from bigger crystal!) */ return f1 * f2 * f3; } @@ -148,9 +144,12 @@ float2 get_sfac(global float2 *sfacs, float16 cell, float4 q) kernel void diffraction(global float *diff, global float *tt, float klow, int w, float cx, float cy, float res, float clen, float16 cell, - global float2 *sfacs, float4 z, int4 ncells, + global float2 *sfacs, float4 z, int xmin, int ymin, int sampling, local float *tmp, - float kstep) + float kstep, + read_only image2d_t func_a, + read_only image2d_t func_b, + read_only image2d_t func_c) { float ttv; const int x = get_global_id(0) + (xmin*sampling); @@ -169,7 +168,7 @@ kernel void diffraction(global float *diff, global float *tt, float klow, /* Calculate value */ q = get_q(x, y, cx, cy, res, clen, k, &ttv, z, sampling); - f_lattice = lattice_factor(cell, q, ncells); + f_lattice = lattice_factor(cell, q, func_a, func_b, func_c); f_molecule = get_sfac(sfacs, cell, q); /* Write the value to local memory */ diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c index 69d21912..c54b9aea 100644 --- a/src/diffraction-gpu.c +++ b/src/diffraction-gpu.c @@ -29,6 +29,8 @@ #define BWSAMPLING (10) #define BANDWIDTH (1.0 / 100.0) +#define SINC_LUT_ELEMENTS (4096) + struct gpu_context { @@ -43,6 +45,13 @@ struct gpu_context cl_mem diff; size_t diff_size; + + cl_mem func_a; + cl_float *func_a_ptr; + cl_mem func_b; + cl_float *func_b_ptr; + cl_mem func_c; + cl_float *func_c_ptr; }; @@ -128,27 +137,22 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, ERROR("Couldn't set arg 10: %s\n", clError(err)); return; } - clSetKernelArg(gctx->kern, 11, sizeof(cl_int4), &ncells); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 11: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 14, sizeof(cl_int), &sampling); + clSetKernelArg(gctx->kern, 13, sizeof(cl_int), &sampling); if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 14: %s\n", clError(err)); + ERROR("Couldn't set arg 13: %s\n", clError(err)); return; } /* Local memory for reduction */ - clSetKernelArg(gctx->kern, 15, + clSetKernelArg(gctx->kern, 14, BWSAMPLING*SAMPLING*SAMPLING*sizeof(cl_float), NULL); if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 15: %s\n", clError(err)); + ERROR("Couldn't set arg 14: %s\n", clError(err)); return; } /* Bandwidth sampling step */ - clSetKernelArg(gctx->kern, 16, sizeof(cl_float), &bwstep); + clSetKernelArg(gctx->kern, 15, sizeof(cl_float), &bwstep); if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 16: %s\n", clError(err)); + ERROR("Couldn't set arg 15: %s\n", clError(err)); return; } @@ -191,16 +195,16 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, ERROR("Couldn't set arg 7: %s\n", clError(err)); return; } - clSetKernelArg(gctx->kern, 12, sizeof(cl_int), + clSetKernelArg(gctx->kern, 11, sizeof(cl_int), &image->det.panels[p].min_x); if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 12: %s\n", clError(err)); + ERROR("Couldn't set arg 11: %s\n", clError(err)); return; } - clSetKernelArg(gctx->kern, 13, sizeof(cl_int), + clSetKernelArg(gctx->kern, 12, sizeof(cl_int), &image->det.panels[p].min_y); if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 13: %s\n", clError(err)); + ERROR("Couldn't set arg 12: %s\n", clError(err)); return; } @@ -263,7 +267,7 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, /* Setup the OpenCL stuff, create buffers, load the structure factor table */ struct gpu_context *setup_gpu(int no_sfac, struct image *image, - struct molecule *molecule) + struct molecule *molecule, int na, int nb, int nc) { struct gpu_context *gctx; cl_uint nplat; @@ -274,6 +278,9 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image, size_t sfac_size; float *sfac_ptr; size_t maxwgsize; + size_t sinc_lut_size; + cl_image_format fmt; + int i; if ( molecule == NULL ) return NULL; @@ -332,13 +339,11 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image, sfac_size = IDIM*IDIM*IDIM*sizeof(cl_float)*2; /* complex */ sfac_ptr = malloc(sfac_size); if ( !no_sfac ) { - int i; for ( i=0; i<IDIM*IDIM*IDIM; i++ ) { sfac_ptr[2*i+0] = creal(molecule->reflections[i]); sfac_ptr[2*i+1] = cimag(molecule->reflections[i]); } } else { - int i; for ( i=0; i<IDIM*IDIM*IDIM; i++ ) { sfac_ptr[2*i+0] = 10000.0; sfac_ptr[2*i+1] = 0.0; @@ -377,6 +382,67 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image, return NULL; } + fmt.image_channel_order = CL_INTENSITY; + fmt.image_channel_data_type = CL_FLOAT; + sinc_lut_size = SINC_LUT_ELEMENTS*sizeof(cl_float); + + /* Set up sinc LUT for a* direction */ + gctx->func_a_ptr = malloc(sinc_lut_size); + gctx->func_a_ptr[0] = na; + for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) { + double x, val; + x = (double)i/SINC_LUT_ELEMENTS; + val = sin(M_PI*na*x)/sin(M_PI*x); + gctx->func_a_ptr[i] = val; + } + gctx->func_a = clCreateImage2D(gctx->ctx, + CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + &fmt, SINC_LUT_ELEMENTS, 1, 0, + gctx->func_a_ptr, &err); + clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->func_a); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 16: %s\n", clError(err)); + return NULL; + } + + /* Set up sinc LUT for b* direction */ + gctx->func_b_ptr = malloc(sinc_lut_size); + gctx->func_b_ptr[0] = nb; + for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) { + double x, val; + x = (double)i/SINC_LUT_ELEMENTS; + val = sin(M_PI*nb*x)/sin(M_PI*x); + gctx->func_b_ptr[i] = val; + } + gctx->func_b = clCreateImage2D(gctx->ctx, + CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + &fmt, SINC_LUT_ELEMENTS, 1, 0, + gctx->func_b_ptr, &err); + clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->func_b); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 17: %s\n", clError(err)); + return NULL; + } + + /* Set up sinc LUT for c* direction */ + gctx->func_c_ptr = malloc(sinc_lut_size); + gctx->func_c_ptr[0] = nc; + for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) { + double x, val; + x = (double)i/SINC_LUT_ELEMENTS; + val = sin(M_PI*nc*x)/sin(M_PI*x); + gctx->func_c_ptr[i] = val; + } + gctx->func_c = clCreateImage2D(gctx->ctx, + CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + &fmt, SINC_LUT_ELEMENTS, 1, 0, + gctx->func_c_ptr, &err); + clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->func_c); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 18: %s\n", clError(err)); + return NULL; + } + STATUS("done\n"); clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, diff --git a/src/diffraction-gpu.h b/src/diffraction-gpu.h index f3bed8bf..05f393a1 100644 --- a/src/diffraction-gpu.h +++ b/src/diffraction-gpu.h @@ -23,10 +23,10 @@ struct gpu_context; #if HAVE_OPENCL -extern void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, - int na, int nb, int nc); +extern void get_diffraction_gpu(struct gpu_context *gctx, struct image *image); extern struct gpu_context *setup_gpu(int no_sfac, struct image *image, - struct molecule *molecule); + struct molecule *molecule, + int na, int nb, int nc); extern void cleanup_gpu(struct gpu_context *gctx); #else diff --git a/src/indexamajig.c b/src/indexamajig.c index 5033b73c..22bc2ea4 100644 --- a/src/indexamajig.c +++ b/src/indexamajig.c @@ -242,10 +242,10 @@ int main(int argc, char *argv[]) if ( config_gpu ) { if ( gctx == NULL ) { gctx = setup_gpu(0, &image, - image.molecule); + image.molecule, + 24, 24, 40); } - get_diffraction_gpu(gctx, &image, - 24, 24, 40); + get_diffraction_gpu(gctx, &image); } else { get_diffraction(&image, 8, 8, 8, 0, 0); } diff --git a/src/pattern_sim.c b/src/pattern_sim.c index 65e020ac..4eee7c57 100644 --- a/src/pattern_sim.c +++ b/src/pattern_sim.c @@ -276,9 +276,9 @@ int main(int argc, char *argv[]) if ( config_gpu ) { if ( gctx == NULL ) { gctx = setup_gpu(config_nosfac, &image, - image.molecule); + image.molecule, na, nb, nc); } - get_diffraction_gpu(gctx, &image, na, nb, nc); + get_diffraction_gpu(gctx, &image); } else { get_diffraction(&image, na, nb, nc, config_nosfac, !config_nowater); |