From f213d4dc787d4d6edb8981c41138f4ace1e2f324 Mon Sep 17 00:00:00 2001 From: Thomas White Date: Mon, 1 Mar 2010 17:10:41 +0100 Subject: Use a lookup table for sinc values in GPU calculation --- src/diffraction-gpu.c | 102 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 84 insertions(+), 18 deletions(-) (limited to 'src/diffraction-gpu.c') diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c index 69d21912..c54b9aea 100644 --- a/src/diffraction-gpu.c +++ b/src/diffraction-gpu.c @@ -29,6 +29,8 @@ #define BWSAMPLING (10) #define BANDWIDTH (1.0 / 100.0) +#define SINC_LUT_ELEMENTS (4096) + struct gpu_context { @@ -43,6 +45,13 @@ struct gpu_context cl_mem diff; size_t diff_size; + + cl_mem func_a; + cl_float *func_a_ptr; + cl_mem func_b; + cl_float *func_b_ptr; + cl_mem func_c; + cl_float *func_c_ptr; }; @@ -128,27 +137,22 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, ERROR("Couldn't set arg 10: %s\n", clError(err)); return; } - clSetKernelArg(gctx->kern, 11, sizeof(cl_int4), &ncells); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 11: %s\n", clError(err)); - return; - } - clSetKernelArg(gctx->kern, 14, sizeof(cl_int), &sampling); + clSetKernelArg(gctx->kern, 13, sizeof(cl_int), &sampling); if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 14: %s\n", clError(err)); + ERROR("Couldn't set arg 13: %s\n", clError(err)); return; } /* Local memory for reduction */ - clSetKernelArg(gctx->kern, 15, + clSetKernelArg(gctx->kern, 14, BWSAMPLING*SAMPLING*SAMPLING*sizeof(cl_float), NULL); if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 15: %s\n", clError(err)); + ERROR("Couldn't set arg 14: %s\n", clError(err)); return; } /* Bandwidth sampling step */ - clSetKernelArg(gctx->kern, 16, sizeof(cl_float), &bwstep); + clSetKernelArg(gctx->kern, 15, sizeof(cl_float), &bwstep); if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 16: %s\n", clError(err)); + ERROR("Couldn't set arg 15: %s\n", clError(err)); return; } @@ -191,16 +195,16 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, ERROR("Couldn't set arg 7: %s\n", clError(err)); return; } - clSetKernelArg(gctx->kern, 12, sizeof(cl_int), + clSetKernelArg(gctx->kern, 11, sizeof(cl_int), &image->det.panels[p].min_x); if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 12: %s\n", clError(err)); + ERROR("Couldn't set arg 11: %s\n", clError(err)); return; } - clSetKernelArg(gctx->kern, 13, sizeof(cl_int), + clSetKernelArg(gctx->kern, 12, sizeof(cl_int), &image->det.panels[p].min_y); if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 13: %s\n", clError(err)); + ERROR("Couldn't set arg 12: %s\n", clError(err)); return; } @@ -263,7 +267,7 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, /* Setup the OpenCL stuff, create buffers, load the structure factor table */ struct gpu_context *setup_gpu(int no_sfac, struct image *image, - struct molecule *molecule) + struct molecule *molecule, int na, int nb, int nc) { struct gpu_context *gctx; cl_uint nplat; @@ -274,6 +278,9 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image, size_t sfac_size; float *sfac_ptr; size_t maxwgsize; + size_t sinc_lut_size; + cl_image_format fmt; + int i; if ( molecule == NULL ) return NULL; @@ -332,13 +339,11 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image, sfac_size = IDIM*IDIM*IDIM*sizeof(cl_float)*2; /* complex */ sfac_ptr = malloc(sfac_size); if ( !no_sfac ) { - int i; for ( i=0; ireflections[i]); sfac_ptr[2*i+1] = cimag(molecule->reflections[i]); } } else { - int i; for ( i=0; ifunc_a_ptr = malloc(sinc_lut_size); + gctx->func_a_ptr[0] = na; + for ( i=1; ifunc_a_ptr[i] = val; + } + gctx->func_a = clCreateImage2D(gctx->ctx, + CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + &fmt, SINC_LUT_ELEMENTS, 1, 0, + gctx->func_a_ptr, &err); + clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->func_a); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 16: %s\n", clError(err)); + return NULL; + } + + /* Set up sinc LUT for b* direction */ + gctx->func_b_ptr = malloc(sinc_lut_size); + gctx->func_b_ptr[0] = nb; + for ( i=1; ifunc_b_ptr[i] = val; + } + gctx->func_b = clCreateImage2D(gctx->ctx, + CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + &fmt, SINC_LUT_ELEMENTS, 1, 0, + gctx->func_b_ptr, &err); + clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->func_b); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 17: %s\n", clError(err)); + return NULL; + } + + /* Set up sinc LUT for c* direction */ + gctx->func_c_ptr = malloc(sinc_lut_size); + gctx->func_c_ptr[0] = nc; + for ( i=1; ifunc_c_ptr[i] = val; + } + gctx->func_c = clCreateImage2D(gctx->ctx, + CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + &fmt, SINC_LUT_ELEMENTS, 1, 0, + gctx->func_c_ptr, &err); + clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->func_c); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 18: %s\n", clError(err)); + return NULL; + } + STATUS("done\n"); clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, -- cgit v1.2.3