diff options
author | Thomas White <taw@physics.org> | 2010-02-19 15:11:44 +0100 |
---|---|---|
committer | Thomas White <taw@physics.org> | 2010-02-19 15:11:44 +0100 |
commit | 4ecbf9d41ece5e412e3fada32a3998775ffd9676 (patch) | |
tree | 20fa15a45084b391b4799e01a4d4e51dbba5ac33 | |
parent | 803fbcf98c0c68cc5000331d6e00f9297a1cf5f0 (diff) |
Honour detector panels in GPU calculation
-rw-r--r-- | data/diffraction.cl | 7 | ||||
-rw-r--r-- | src/diffraction-gpu.c | 81 |
2 files changed, 59 insertions, 29 deletions
diff --git a/data/diffraction.cl b/data/diffraction.cl index ade56484..82a4adcf 100644 --- a/data/diffraction.cl +++ b/data/diffraction.cl @@ -130,11 +130,12 @@ float2 get_sfac(global float2 *sfacs, float16 cell, float4 q) kernel void diffraction(global float2 *diff, global float *tt, float k, int w, float cx, float cy, float res, float clen, float16 cell, - global float2 *sfacs, float4 z, int4 ncells) + global float2 *sfacs, float4 z, int4 ncells, + int xmin, int ymin) { float ttv; - const int x = get_global_id(0); - const int y = get_global_id(1); + const int x = get_global_id(0) + xmin; + const int y = get_global_id(1) + ymin; float f_lattice; float2 f_molecule; diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c index 5ffb576c..dbf5c1aa 100644 --- a/src/diffraction-gpu.c +++ b/src/diffraction-gpu.c @@ -141,8 +141,9 @@ void get_diffraction_gpu(struct image *image, int na, int nb, int nc, double bx, by, bz; double cx, cy, cz; float kc; - const size_t dims[2] = {1024, 1024}; + size_t dims[2]; cl_event event_d; + int p; cl_mem sfacs; size_t sfac_size; @@ -277,26 +278,6 @@ void get_diffraction_gpu(struct image *image, int na, int nb, int nc, ERROR("Couldn't set arg 3: %s\n", clError(err)); return; } - clSetKernelArg(kern, 4, sizeof(cl_float), &image->det.panels[0].cx); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 4: %s\n", clError(err)); - return; - } - clSetKernelArg(kern, 5, sizeof(cl_float), &image->det.panels[0].cy); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 5: %s\n", clError(err)); - return; - } - clSetKernelArg(kern, 6, sizeof(cl_float), &image->det.panels[0].res); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 6: %s\n", clError(err)); - return; - } - clSetKernelArg(kern, 7, sizeof(cl_float), &image->det.panels[0].clen); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set arg 7: %s\n", clError(err)); - return; - } clSetKernelArg(kern, 8, sizeof(cl_float16), &cell); if ( err != CL_SUCCESS ) { ERROR("Couldn't set arg 8: %s\n", clError(err)); @@ -318,11 +299,59 @@ void get_diffraction_gpu(struct image *image, int na, int nb, int nc, return; } - err = clEnqueueNDRangeKernel(cq, kern, 2, NULL, dims, NULL, - 0, NULL, &event_d); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't enqueue diffraction kernel: %s\n", clError(err)); - return; + /* Iterate over panels */ + for ( p=0; p<image->det.n_panels; p++ ) { + + /* In a future version of OpenCL, this could be done + * with a global work offset. But not yet... */ + dims[0] = image->det.panels[0].max_x-image->det.panels[0].min_x; + dims[1] = image->det.panels[0].max_y-image->det.panels[0].min_y; + + clSetKernelArg(kern, 4, sizeof(cl_float), + &image->det.panels[p].cx); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 4: %s\n", clError(err)); + return; + } + clSetKernelArg(kern, 5, sizeof(cl_float), + &image->det.panels[p].cy); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 5: %s\n", clError(err)); + return; + } + clSetKernelArg(kern, 6, sizeof(cl_float), + &image->det.panels[p].res); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 6: %s\n", clError(err)); + return; + } + clSetKernelArg(kern, 7, sizeof(cl_float), + &image->det.panels[p].clen); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 7: %s\n", clError(err)); + return; + } + + clSetKernelArg(kern, 12, sizeof(cl_int), + &image->det.panels[p].min_x); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 12: %s\n", clError(err)); + return; + } + clSetKernelArg(kern, 13, sizeof(cl_int), + &image->det.panels[p].min_y); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set arg 13: %s\n", clError(err)); + return; + } + + err = clEnqueueNDRangeKernel(cq, kern, 2, NULL, dims, NULL, + 0, NULL, &event_d); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't enqueue diffraction kernel: %s\n", + clError(err)); + return; + } } diff_ptr = clEnqueueMapBuffer(cq, diff, CL_TRUE, CL_MAP_READ, 0, |