diff options
-rw-r--r-- | data/diffraction.cl | 89 | ||||
-rw-r--r-- | src/diffraction-gpu.c | 72 | ||||
-rw-r--r-- | src/diffraction.c | 97 | ||||
-rw-r--r-- | src/pattern_sim.h | 2 |
4 files changed, 56 insertions, 204 deletions
diff --git a/data/diffraction.cl b/data/diffraction.cl index f8be5179..f6318378 100644 --- a/data/diffraction.cl +++ b/data/diffraction.cl @@ -13,7 +13,7 @@ /* Maxmimum index to hold values up to (can be increased if necessary) * WARNING: Altering this value constitutes an ABI change, and means you must * update src/pattern_sim.h then recompile and reinstall everything. */ -#define INDMAX 200 +#define INDMAX 120 #define IDIM (INDMAX*2 +1) #ifndef M_PI @@ -34,15 +34,13 @@ const sampler_t sampler_c = CLK_NORMALIZED_COORDS_TRUE float4 get_q(float fs, float ss, float res, float clen, float k, float *ttp, float corner_x, float corner_y, - float fsx, float fsy, float ssx, float ssy, - float xdiv, float ydiv) + float fsx, float fsy, float ssx, float ssy) { float rx, ry, r; float az, tt; float4 q; float xs, ys; float kx, ky, kz; - float kxn, kyn, kzn; xs = fs*fsx + ss*ssx; ys = fs*fsy + ss*ssy; @@ -57,20 +55,9 @@ float4 get_q(float fs, float ss, float res, float clen, float k, az = atan2(ry, rx); - kxn = k*native_sin(tt)*native_cos(az); - kyn = k*native_sin(tt)*native_sin(az); - kzn = k*(native_cos(tt)-1.0); - - /* x divergence */ - kx = kxn*native_cos(xdiv) +kzn*native_sin(xdiv); - ky = kyn; - kz = -kxn*native_sin(xdiv) +kzn*native_cos(xdiv); - kxn = kx; kyn = ky; kzn = kz; - - /* y divergence */ - kx = kxn; - ky = kyn*cos(ydiv) +kzn*sin(ydiv); - kz = -kyn*sin(ydiv) +kzn*cos(ydiv); + kx = k*native_sin(tt)*native_cos(az); + ky = k*native_sin(tt)*native_sin(az); + kz = k*(native_cos(tt)-1.0); q = (float4)(kx, ky, kz, 0.0); @@ -224,81 +211,39 @@ float molecule_factor(global float *intensities, global float *flags, } -kernel void diffraction(global float *diff, global float *tt, float klow, +kernel void diffraction(global float *diff, global float *ttp, float k, int w, float corner_x, float corner_y, float res, float clen, float16 cell, global float *intensities, - int min_fs, int min_ss, int sampling, local float *tmp, - float kstep, read_only image2d_t func_a, read_only image2d_t func_b, read_only image2d_t func_c, global float *flags, - float fsx, float fsy, float ssx, float ssy, - float divxlow, float divxstep, int divxsamp, - float divylow, float divystep, int divysamp) + float fsx, float fsy, float ssx, float ssy) { - float ttv; + float tt; float fs, ss; float f_lattice, I_lattice; float I_molecule; float4 q; - float k = klow + kstep * get_local_id(2); float intensity; - const int ls0 = get_local_size(0); - const int ls1 = get_local_size(1); - const int ls2 = get_local_size(2) / (divxsamp*divysamp); - const int ls3 = divxsamp; - const int ls4 = divysamp; - const int li0 = get_local_id(0); - const int li1 = get_local_id(1); - const int li234 = get_local_id(2); - const int li2 = li234 / (ls3*ls4); - const int li234leftover = li234 % (ls3*ls4); - const int li3 = li234leftover / ls4; - const int li4 = li234leftover % ls4; - const int ls = ls0 * ls1 * ls2 * ls3 * ls4; - float xdiv = divxlow + divxstep*ls4; - float ydiv = divylow + divystep*ls3; + int idx; /* Calculate fractional coordinates in fs/ss */ - fs = convert_float(get_global_id(0)) / convert_float(sampling); - ss = convert_float(get_global_id(1)) / convert_float(sampling); + fs = convert_float(get_global_id(0)); + ss = convert_float(get_global_id(1)); /* Get the scattering vector */ - q = get_q(fs, ss, res, clen, k, &ttv, - corner_x, corner_y, fsx, fsy, ssx, ssy, xdiv, ydiv); + q = get_q(fs, ss, res, clen, k, &tt, + corner_x, corner_y, fsx, fsy, ssx, ssy); /* Calculate the diffraction */ f_lattice = lattice_factor(cell, q, func_a, func_b, func_c); I_molecule = molecule_factor(intensities, flags, cell, q); I_lattice = pow(f_lattice, 2.0f); - /* Write the value to local memory */ - intensity = I_molecule * I_lattice; - tmp[li0 + ls0*li1 + ls0*ls1*li2 + ls0*ls1*ls2*li3 - + ls0*ls1*ls2*ls3*li4] = intensity; - - /* Memory fence */ - barrier(CLK_LOCAL_MEM_FENCE); - - /* Leader thread sums the values */ - if ( li0 + li1 + li2 + li3 + li4 == 0 ) { - - int i; - float sum = 0.0; - float val; - int idx; - - idx = convert_int_rtz(fs) + w*convert_int_rtz(ss); - - for ( i=0; i<ls; i++ ) sum += tmp[i]; - - val = sum / convert_float(ls); - diff[idx] = val; - - /* Leader thread also records the 2theta value */ - tt[idx] = ttv; - - } + /* Write the value to memory */ + idx = convert_int_rtz(fs) + w*convert_int_rtz(ss); + diff[idx] = I_molecule * I_lattice; + ttp[idx] = tt; } diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c index b3950570..464adf02 100644 --- a/src/diffraction-gpu.c +++ b/src/diffraction-gpu.c @@ -34,9 +34,6 @@ #include "pattern_sim.h" -#define SAMPLING (4) -#define BWSAMPLING (10) -#define DIVSAMPLING (1) #define SINC_LUT_ELEMENTS (4096) @@ -160,18 +157,12 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, double ax, ay, az; double bx, by, bz; double cx, cy, cz; - float klow, khigh; int i; cl_float16 cell; cl_int4 ncells; - const int sampling = SAMPLING; - cl_float bwstep; int n_inf = 0; int n_neg = 0; - cl_float divxlow, divxstep; - cl_float divylow, divystep; int n_nan = 0; - int sprod; if ( gctx == NULL ) { ERROR("GPU setup failed.\n"); @@ -183,17 +174,6 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, cell.s[3] = bx; cell.s[4] = by; cell.s[5] = bz; cell.s[6] = cx; cell.s[7] = cy; cell.s[8] = cz; - /* Calculate wavelength */ - klow = 1.0/(image->lambda*(1.0 + image->beam->bandwidth/2.0)); - khigh = 1.0/(image->lambda*(1.0 - image->beam->bandwidth/2.0)); - bwstep = (khigh-klow) / BWSAMPLING; - - /* Calculate divergence stuff */ - divxlow = -image->beam->divergence/2.0; - divylow = -image->beam->divergence/2.0; - divxstep = image->beam->divergence / DIVSAMPLING; - divystep = image->beam->divergence / DIVSAMPLING; - ncells.s[0] = na; ncells.s[1] = nb; ncells.s[2] = nc; @@ -204,20 +184,12 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, check_sinc_lut(gctx, nb); check_sinc_lut(gctx, nc); - if ( set_arg_float(gctx, 2, klow) ) return; + if ( set_arg_float(gctx, 2, 1.0/image->lambda) ) return; if ( set_arg_mem(gctx, 9, gctx->intensities) ) return; - if ( set_arg_int(gctx, 12, sampling) ) return; - if ( set_arg_float(gctx, 14, bwstep) ) return; - if ( set_arg_mem(gctx, 15, gctx->sinc_luts[na-1]) ) return; - if ( set_arg_mem(gctx, 16, gctx->sinc_luts[nb-1]) ) return; - if ( set_arg_mem(gctx, 17, gctx->sinc_luts[nc-1]) ) return; - if ( set_arg_mem(gctx, 18, gctx->flags) ) return; - if ( set_arg_float(gctx, 23, divxlow) ) return; - if ( set_arg_float(gctx, 24, divxstep) ) return; - if ( set_arg_int(gctx, 25, DIVSAMPLING) ) return; - if ( set_arg_float(gctx, 26, divylow) ) return; - if ( set_arg_float(gctx, 27, divystep) ) return; - if ( set_arg_int(gctx, 28, DIVSAMPLING) ) return; + if ( set_arg_mem(gctx, 10, gctx->sinc_luts[na-1]) ) return; + if ( set_arg_mem(gctx, 11, gctx->sinc_luts[nb-1]) ) return; + if ( set_arg_mem(gctx, 12, gctx->sinc_luts[nc-1]) ) return; + if ( set_arg_mem(gctx, 13, gctx->flags) ) return; /* Unit cell */ err = clSetKernelArg(gctx->kern, 8, sizeof(cl_float16), &cell); @@ -226,14 +198,6 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, return; } - /* Local memory for reduction */ - sprod = BWSAMPLING*SAMPLING*SAMPLING*DIVSAMPLING*DIVSAMPLING; - err = clSetKernelArg(gctx->kern, 13, sprod*sizeof(cl_float), NULL); - if ( err != CL_SUCCESS ) { - ERROR("Couldn't set local memory: %s\n", clError(err)); - return; - } - /* Allocate memory for the result */ image->data = calloc(image->width * image->height, sizeof(float)); image->twotheta = calloc(image->width * image->height, sizeof(double)); @@ -241,9 +205,8 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, /* Iterate over panels */ for ( i=0; i<image->det->n_panels; i++ ) { - size_t dims[3]; - size_t ldims[3] = {SAMPLING, SAMPLING, - BWSAMPLING * DIVSAMPLING * DIVSAMPLING}; + size_t dims[2]; + size_t ldims[2] = {1, 1}; struct panel *p; cl_mem tt; size_t tt_size; @@ -282,18 +245,15 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, if ( set_arg_float(gctx, 5, p->cny) ) return; if ( set_arg_float(gctx, 6, p->res) ) return; if ( set_arg_float(gctx, 7, p->clen) ) return; - if ( set_arg_int(gctx, 10, p->min_fs) ) return; - if ( set_arg_int(gctx, 11, p->min_ss) ) return; - if ( set_arg_float(gctx, 19, p->fsx) ) return; - if ( set_arg_float(gctx, 20, p->fsy) ) return; - if ( set_arg_float(gctx, 21, p->ssx) ) return; - if ( set_arg_float(gctx, 22, p->ssy) ) return; - - dims[0] = pan_width * SAMPLING; - dims[1] = pan_height * SAMPLING; - dims[2] = BWSAMPLING * DIVSAMPLING * DIVSAMPLING; - - err = clEnqueueNDRangeKernel(gctx->cq, gctx->kern, 3, NULL, + if ( set_arg_float(gctx, 14, p->fsx) ) return; + if ( set_arg_float(gctx, 15, p->fsy) ) return; + if ( set_arg_float(gctx, 16, p->ssx) ) return; + if ( set_arg_float(gctx, 17, p->ssy) ) return; + + dims[0] = pan_width; + dims[1] = pan_height; + + err = clEnqueueNDRangeKernel(gctx->cq, gctx->kern, 2, NULL, dims, ldims, 0, NULL, NULL); if ( err != CL_SUCCESS ) { ERROR("Couldn't enqueue diffraction kernel: %s\n", diff --git a/src/diffraction.c b/src/diffraction.c index de994133..a37c2fcb 100644 --- a/src/diffraction.c +++ b/src/diffraction.c @@ -27,9 +27,6 @@ #include "pattern_sim.h" -#define SAMPLING (4) -#define BWSAMPLING (10) -#define DIVSAMPLING (1) #define SINC_LUT_ELEMENTS (4096) @@ -355,11 +352,9 @@ void get_diffraction(struct image *image, int na, int nb, int nc, double ax, ay, az; double bx, by, bz; double cx, cy, cz; - float klow, khigh, bwstep; double *lut_a; double *lut_b; double *lut_c; - double divxlow, divylow, divxstep, divystep; cell_get_cartesian(cell, &ax, &ay, &az, &bx, &by, &bz, &cx, &cy, &cz); @@ -369,15 +364,6 @@ void get_diffraction(struct image *image, int na, int nb, int nc, /* Needed later for Lorentz calculation */ image->twotheta = malloc(image->width * image->height * sizeof(double)); - klow = 1.0/(image->lambda*(1.0 + image->beam->bandwidth/2.0)); - khigh = 1.0/(image->lambda*(1.0 - image->beam->bandwidth/2.0)); - bwstep = (khigh-klow) / BWSAMPLING; - - divxlow = -image->beam->divergence/2.0; - divylow = -image->beam->divergence/2.0; - divxstep = image->beam->divergence / DIVSAMPLING; - divystep = image->beam->divergence / DIVSAMPLING; - lut_a = get_sinc_lut(na); lut_b = get_sinc_lut(nb); lut_c = get_sinc_lut(nc); @@ -385,73 +371,34 @@ void get_diffraction(struct image *image, int na, int nb, int nc, for ( fs=0; fs<image->width; fs++ ) { for ( ss=0; ss<image->height; ss++ ) { - int fs_step, ss_step, kstep; - int divxval, divyval; - int idx = fs + image->width*ss; - - for ( fs_step=0; fs_step<SAMPLING; fs_step++ ) { - for ( ss_step=0; ss_step<SAMPLING; ss_step++ ) { - for ( kstep=0; kstep<BWSAMPLING; kstep++ ) { - for ( divxval=0; divxval<DIVSAMPLING; divxval++ ) { - for ( divyval=0; divyval<DIVSAMPLING; divyval++ ) { - - double k; - double intensity; - double f_lattice, I_lattice; - double I_molecule; - struct rvec q, qn; - double twotheta; - const double dfs = (double)fs - + ((double)fs_step / SAMPLING); - const double dss = (double)ss - + ((double)ss_step / SAMPLING); - - double xdiv = divxlow + divxstep*(double)divxval; - double ydiv = divylow + divystep*(double)divyval; - - /* Calculate k this time round */ - k = klow + (double)kstep * bwstep; - - qn = get_q(image, dfs, dss, &twotheta, k); - - /* x divergence */ - q.u = qn.u*cos(xdiv) +qn.w*sin(xdiv); - q.v = qn.v; - q.w = -qn.u*sin(xdiv) +qn.w*cos(xdiv); + int idx; + double k; + double intensity; + double f_lattice, I_lattice; + double I_molecule; + struct rvec q; + double twotheta; - qn = q; + /* Calculate k this time round */ + k = 1.0/image->lambda; - /* y divergence */ - q.v = qn.v*cos(ydiv) +qn.w*sin(ydiv); - q.w = -qn.v*sin(ydiv) +qn.w*cos(ydiv); + q = get_q(image, fs, ss, &twotheta, k); - f_lattice = lattice_factor(q, ax, ay, az, - bx, by, bz, - cx, cy, cz, - lut_a, lut_b, lut_c); + f_lattice = lattice_factor(q, ax, ay, az, + bx, by, bz, + cx, cy, cz, + lut_a, lut_b, lut_c); - I_molecule = molecule_factor(intensities, - phases, flags, q, - ax,ay,az,bx,by,bz,cx,cy,cz, - m, sym); + I_molecule = molecule_factor(intensities, + phases, flags, q, + ax,ay,az,bx,by,bz,cx,cy,cz, + m, sym); - I_lattice = pow(f_lattice, 2.0); - intensity = I_lattice * I_molecule; - - image->data[idx] += intensity; - - if ( fs_step + ss_step + kstep == 0 ) { - image->twotheta[idx] = twotheta; - } - - } - } - } - } - } + I_lattice = pow(f_lattice, 2.0); - image->data[idx] /= (SAMPLING*SAMPLING*BWSAMPLING - *DIVSAMPLING*DIVSAMPLING); + idx = fs + image->width*ss; + image->data[idx] = I_lattice * I_molecule; + image->twotheta[idx] = twotheta; } diff --git a/src/pattern_sim.h b/src/pattern_sim.h index fb376596..5d31dadf 100644 --- a/src/pattern_sim.h +++ b/src/pattern_sim.h @@ -20,7 +20,7 @@ /* Maxmimum index to hold values up to (can be increased if necessary) * WARNING: Altering this value constitutes an ABI change, and means you must * update data/diffraction.cl then recompile and reinstall everything. */ -#define INDMAX 200 +#define INDMAX 120 /* Array size */ #define IDIM (INDMAX*2 +1) |