diff options
author | Thomas White <taw@physics.org> | 2010-03-10 12:14:25 +0100 |
---|---|---|
committer | Thomas White <taw@physics.org> | 2010-03-10 12:14:25 +0100 |
commit | bda63e5f6eeed249f0effb4f990e887b3912dad6 (patch) | |
tree | 42070462d4c14d9e3b2cdc657a12c1f374d8d9c3 /src/diffraction-gpu.c | |
parent | fd8f9222599951749edf3c6ba27289da04d84b82 (diff) |
Allow multiple sinc LUTs
Diffstat (limited to 'src/diffraction-gpu.c')
-rw-r--r-- | src/diffraction-gpu.c | 175 |
1 files changed, 99 insertions, 76 deletions
diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c index 29bfe3e8..f71df74e 100644 --- a/src/diffraction-gpu.c +++ b/src/diffraction-gpu.c @@ -46,15 +46,65 @@ struct gpu_context cl_mem diff; size_t diff_size; - cl_mem func_a; - cl_float *func_a_ptr; - cl_mem func_b; - cl_float *func_b_ptr; - cl_mem func_c; - cl_float *func_c_ptr; + /* Array of sinc LUTs */ + cl_mem *sinc_luts; + cl_float **sinc_lut_ptrs; + int max_sinc_lut; /* Number of LUTs, i.e. one greater than the maximum + * index. This equals the highest allowable "n". */ }; +static void check_sinc_lut(struct gpu_context *gctx, int n) +{ + cl_int err; + size_t sinc_lut_size; + cl_image_format fmt; + int i; + + if ( n > gctx->max_sinc_lut ) { + + STATUS("Allocating %i -> %i\n", gctx->max_sinc_lut, n); + + gctx->sinc_luts = realloc(gctx->sinc_luts, + n*sizeof(*gctx->sinc_luts)); + gctx->sinc_lut_ptrs = realloc(gctx->sinc_lut_ptrs, + n*sizeof(*gctx->sinc_lut_ptrs)); + + for ( i=gctx->max_sinc_lut; i<n; i++ ) { + STATUS("zeroing %i\n", i); + gctx->sinc_lut_ptrs[i] = NULL; + } + + gctx->max_sinc_lut = n; + } + + fmt.image_channel_order = CL_INTENSITY; + fmt.image_channel_data_type = CL_FLOAT; + sinc_lut_size = SINC_LUT_ELEMENTS*sizeof(cl_float); + + /* Create a new sinc LUT */ + gctx->sinc_lut_ptrs[n-1] = malloc(sinc_lut_size); + gctx->sinc_lut_ptrs[n-1][0] = n; + if ( n == 1 ) { + for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) { + gctx->sinc_lut_ptrs[n-1][i] = 1.0; + } + } else { + for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) { + double x, val; + x = (double)i/SINC_LUT_ELEMENTS; + val = fabs(sin(M_PI*n*x)/sin(M_PI*x)); + gctx->sinc_lut_ptrs[n-1][i] = val; + } + } + + gctx->sinc_luts[n-1] = clCreateImage2D(gctx->ctx, + CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + &fmt, SINC_LUT_ELEMENTS, 1, 0, + gctx->sinc_lut_ptrs[n-1], &err); +} + + void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, int na, int nb, int nc, int no_sfac) { @@ -102,6 +152,11 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, ncells[2] = nc; ncells[3] = 0; /* unused */ + /* Ensure all required LUTs are available */ + check_sinc_lut(gctx, na); + check_sinc_lut(gctx, nb); + check_sinc_lut(gctx, nc); + err = clSetKernelArg(gctx->kern, 0, sizeof(cl_mem), &gctx->diff); if ( err != CL_SUCCESS ) { ERROR("Couldn't set arg 0: %s\n", clError(err)); @@ -156,6 +211,27 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, return; } + /* LUT in 'a' direction */ + clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->sinc_luts[na-1]); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 16: %s\n", clError(err)); + return; + } + + /* LUT in 'b' direction */ + clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->sinc_luts[nb-1]); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 17: %s\n", clError(err)); + return; + } + + /* LUT in 'c' direction */ + clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->sinc_luts[nc-1]); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 18: %s\n", clError(err)); + return; + } + /* Iterate over panels */ event = malloc(image->det.n_panels * sizeof(cl_event)); for ( p=0; p<image->det.n_panels; p++ ) { @@ -267,7 +343,7 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, /* Setup the OpenCL stuff, create buffers, load the structure factor table */ struct gpu_context *setup_gpu(int no_sfac, struct image *image, - struct molecule *molecule, int na, int nb, int nc) + struct molecule *molecule) { struct gpu_context *gctx; cl_uint nplat; @@ -278,8 +354,6 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image, size_t sfac_size; float *sfac_ptr; size_t maxwgsize; - size_t sinc_lut_size; - cl_image_format fmt; int i; if ( molecule == NULL ) return NULL; @@ -382,69 +456,12 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image, return NULL; } - fmt.image_channel_order = CL_INTENSITY; - fmt.image_channel_data_type = CL_FLOAT; - sinc_lut_size = SINC_LUT_ELEMENTS*sizeof(cl_float); - - /* Set up sinc LUT for a* direction */ - gctx->func_a_ptr = malloc(sinc_lut_size); - gctx->func_a_ptr[0] = na; - for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) { - double x, val; - x = (double)i/SINC_LUT_ELEMENTS; - val = fabs(sin(M_PI*na*x)/sin(M_PI*x)); - gctx->func_a_ptr[i] = val; - } - gctx->func_a = clCreateImage2D(gctx->ctx, - CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - &fmt, SINC_LUT_ELEMENTS, 1, 0, - gctx->func_a_ptr, &err); - clSetKernelArg(gctx->kern, 16, sizeof(cl_mem), &gctx->func_a); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 16: %s\n", clError(err)); - return NULL; - } - - /* Set up sinc LUT for b* direction */ - gctx->func_b_ptr = malloc(sinc_lut_size); - gctx->func_b_ptr[0] = nb; - for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) { - double x, val; - x = (double)i/SINC_LUT_ELEMENTS; - val = fabs(sin(M_PI*nb*x)/sin(M_PI*x)); - gctx->func_b_ptr[i] = val; - } - gctx->func_b = clCreateImage2D(gctx->ctx, - CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - &fmt, SINC_LUT_ELEMENTS, 1, 0, - gctx->func_b_ptr, &err); - clSetKernelArg(gctx->kern, 17, sizeof(cl_mem), &gctx->func_b); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 17: %s\n", clError(err)); - return NULL; - } - - /* Set up sinc LUT for c* direction */ - gctx->func_c_ptr = malloc(sinc_lut_size); - gctx->func_c_ptr[0] = nc; - for ( i=1; i<SINC_LUT_ELEMENTS; i++ ) { - double x, val; - x = (double)i/SINC_LUT_ELEMENTS; - val = fabs(sin(M_PI*nc*x)/sin(M_PI*x)); - gctx->func_c_ptr[i] = val; - } - gctx->func_c = clCreateImage2D(gctx->ctx, - CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, - &fmt, SINC_LUT_ELEMENTS, 1, 0, - gctx->func_c_ptr, &err); - clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->func_c); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 18: %s\n", clError(err)); - return NULL; - } - STATUS("done\n"); + gctx->max_sinc_lut = 0; + gctx->sinc_lut_ptrs = NULL; + gctx->sinc_luts = NULL; + clGetDeviceInfo(dev, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), &maxwgsize, NULL); STATUS("Maximum work group size = %lli\n", (long long int)maxwgsize); @@ -455,16 +472,22 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image, void cleanup_gpu(struct gpu_context *gctx) { + int i; + clReleaseProgram(gctx->prog); clReleaseMemObject(gctx->diff); clReleaseMemObject(gctx->tt); clReleaseMemObject(gctx->sfacs); - clReleaseMemObject(gctx->func_a); - clReleaseMemObject(gctx->func_b); - clReleaseMemObject(gctx->func_c); - free(gctx->func_a_ptr); - free(gctx->func_b_ptr); - free(gctx->func_c_ptr); + + /* Release LUTs */ + for ( i=1; i<=gctx->max_sinc_lut; i++ ) { + if ( gctx->sinc_lut_ptrs[i-1] != NULL ) { + STATUS("freeing %i\n", i-1); + clReleaseMemObject(gctx->sinc_luts[i-1]); + free(gctx->sinc_lut_ptrs[i-1]); + } + } + clReleaseCommandQueue(gctx->cq); clReleaseContext(gctx->ctx); free(gctx); |