diff options
author | Thomas White <taw@physics.org> | 2010-12-03 16:43:19 +0100 |
---|---|---|
committer | Thomas White <taw@physics.org> | 2012-02-22 15:27:07 +0100 |
commit | 9aafea1bdb0255ad7d2491d96174ac3407a6ca69 (patch) | |
tree | 69bb5865124beea2285917fa4eaa62e092896525 | |
parent | c25120f4b71da8b82476c8a14b1617c8f7b72d57 (diff) |
Use symmetry when simulating on the GPU
-rw-r--r-- | data/diffraction.cl | 116 | ||||
-rw-r--r-- | src/cl-utils.c | 12 | ||||
-rw-r--r-- | src/cl-utils.h | 3 | ||||
-rw-r--r-- | src/diffraction-gpu.c | 56 | ||||
-rw-r--r-- | src/diffraction-gpu.h | 6 | ||||
-rw-r--r-- | src/indexamajig.c | 2 | ||||
-rw-r--r-- | src/pattern_sim.c | 3 |
7 files changed, 170 insertions, 28 deletions
diff --git a/data/diffraction.cl b/data/diffraction.cl index ff07edff..9eae1b4c 100644 --- a/data/diffraction.cl +++ b/data/diffraction.cl @@ -70,21 +70,12 @@ float lattice_factor(float16 cell, float4 q, } -float get_intensity(global float *intensities, float16 cell, float4 q) +float lookup_intensity(global float *intensities, + signed int h, signed int k, signed int l) { - float hf, kf, lf; - int h, k, l; int idx; - hf = cell.s0*q.x + cell.s1*q.y + cell.s2*q.z; /* h */ - kf = cell.s3*q.x + cell.s4*q.y + cell.s5*q.z; /* k */ - lf = cell.s6*q.x + cell.s7*q.y + cell.s8*q.z; /* l */ - - h = round(hf); - k = round(kf); - l = round(lf); - - /* Return a silly value if indices are out of range */ + /* Out of range? */ if ( (abs(h) > INDMAX) || (abs(k) > INDMAX) || (abs(l) > INDMAX) ) { return 0.0; } @@ -93,14 +84,106 @@ float get_intensity(global float *intensities, float16 cell, float4 q) k = (k>=0) ? k : k+IDIM; l = (l>=0) ? l : l+IDIM; - if ( (h>=IDIM) || (k>=IDIM) || (l>=IDIM) ) return 0.0; - idx = h + (IDIM*k) + (IDIM*IDIM*l); return intensities[idx]; } +float lookup_flagged_intensity(global float *intensities, global float *flags, + signed int h, signed int k, signed int l) +{ + return lookup_intensity(intensities, h, k, l) + * lookup_intensity(flags, h, k, l); +} + + +float molecule_factor(global float *intensities, global float *flags, + float16 cell, float4 q) +{ + float hf, kf, lf; + int h, k, l; + float val = 0.0; + signed int i; + + #ifdef FLAT_INTENSITIES + return 1.0e5; + #else + + hf = cell.s0*q.x + cell.s1*q.y + cell.s2*q.z; /* h */ + kf = cell.s3*q.x + cell.s4*q.y + cell.s5*q.z; /* k */ + lf = cell.s6*q.x + cell.s7*q.y + cell.s8*q.z; /* l */ + + h = round(hf); + k = round(kf); + l = round(lf); + i = -h-k; + + #ifdef PG1 + val += lookup_flagged_intensity(intensities, flags, h, k, l); + #endif /* PG1 */ + + #ifdef PG1BAR + val += lookup_flagged_intensity(intensities, flags, h, k, l); + val += lookup_flagged_intensity(intensities, flags, -h, -k, -l); + #endif /* PG1BAR */ + + #ifdef PG6 + val += lookup_flagged_intensity(intensities, flags, h, k, l); + val += lookup_flagged_intensity(intensities, flags, i, h, l); + val += lookup_flagged_intensity(intensities, flags, k, i, l); + val += lookup_flagged_intensity(intensities, flags, -h, -k, l); + val += lookup_flagged_intensity(intensities, flags, -i, -h, l); + val += lookup_flagged_intensity(intensities, flags, -k, -i, l); + #endif /* PG6 */ + + #ifdef PG6M + val += lookup_flagged_intensity(intensities, flags, h, k, l); + val += lookup_flagged_intensity(intensities, flags, i, h, l); + val += lookup_flagged_intensity(intensities, flags, k, i, l); + val += lookup_flagged_intensity(intensities, flags, -h, -k, l); + val += lookup_flagged_intensity(intensities, flags, -i, -h, l); + val += lookup_flagged_intensity(intensities, flags, -k, -i, l); + val += lookup_flagged_intensity(intensities, flags, -h, -k, -l); + val += lookup_flagged_intensity(intensities, flags, -i, -h, -l); + val += lookup_flagged_intensity(intensities, flags, -k, -i, -l); + val += lookup_flagged_intensity(intensities, flags, h, k, -l); + val += lookup_flagged_intensity(intensities, flags, i, h, -l); + val += lookup_flagged_intensity(intensities, flags, k, i, -l); + #endif /* PG6M */ + + #ifdef PG6MMM + val += lookup_flagged_intensity(intensities, flags, h, k, l); + val += lookup_flagged_intensity(intensities, flags, i, h, l); + val += lookup_flagged_intensity(intensities, flags, k, i, l); + val += lookup_flagged_intensity(intensities, flags, -h, -k, l); + val += lookup_flagged_intensity(intensities, flags, -i, -h, l); + val += lookup_flagged_intensity(intensities, flags, -k, -i, l); + val += lookup_flagged_intensity(intensities, flags, k, h, -l); + val += lookup_flagged_intensity(intensities, flags, h, i, -l); + val += lookup_flagged_intensity(intensities, flags, i, k, -l); + val += lookup_flagged_intensity(intensities, flags, -k, -h, -l); + val += lookup_flagged_intensity(intensities, flags, -h, -i, -l); + val += lookup_flagged_intensity(intensities, flags, -i, -k, -l); + val += lookup_flagged_intensity(intensities, flags, -h, -k, -l); + val += lookup_flagged_intensity(intensities, flags, -i, -h, -l); + val += lookup_flagged_intensity(intensities, flags, -k, i, -l); + val += lookup_flagged_intensity(intensities, flags, h, k, -l); + val += lookup_flagged_intensity(intensities, flags, i, h, -l); + val += lookup_flagged_intensity(intensities, flags, k, i, -l); + val += lookup_flagged_intensity(intensities, flags, -k, -h, l); + val += lookup_flagged_intensity(intensities, flags, -h, -i, l); + val += lookup_flagged_intensity(intensities, flags, -i, -k, l); + val += lookup_flagged_intensity(intensities, flags, k, h, l); + val += lookup_flagged_intensity(intensities, flags, h, i, l); + val += lookup_flagged_intensity(intensities, flags, i, k, l); + #endif /* PG6MMM */ + + return val; + #endif /* FLAT_INTENSITIIES */ +} + + kernel void diffraction(global float *diff, global float *tt, float klow, int w, float cx, float cy, float res, float clen, float16 cell, @@ -109,7 +192,8 @@ kernel void diffraction(global float *diff, global float *tt, float klow, float kstep, read_only image2d_t func_a, read_only image2d_t func_b, - read_only image2d_t func_c) + read_only image2d_t func_c, + global float *flags) { float ttv; const int x = get_global_id(0) + (xmin*sampling); @@ -128,7 +212,7 @@ kernel void diffraction(global float *diff, global float *tt, float klow, /* Calculate value */ q = get_q(x, y, cx, cy, res, clen, k, &ttv, sampling); f_lattice = lattice_factor(cell, q, func_a, func_b, func_c); - I_molecule = get_intensity(intensities, cell, q); + I_molecule = molecule_factor(intensities, flags, cell, q); I_lattice = pow(f_lattice, 2.0f); /* Write the value to local memory */ diff --git a/src/cl-utils.c b/src/cl-utils.c index 1b7b9811..0c6dce27 100644 --- a/src/cl-utils.c +++ b/src/cl-utils.c @@ -146,13 +146,14 @@ static void show_build_log(cl_program prog, cl_device_id dev) cl_program load_program(const char *filename, cl_context ctx, - cl_device_id dev, cl_int *err) + cl_device_id dev, cl_int *err, const char *extra_cflags) { FILE *fh; cl_program prog; char *source; size_t len; cl_int r; + char cflags[1024] = ""; fh = fopen(filename, "r"); if ( fh == NULL ) { @@ -172,9 +173,12 @@ cl_program load_program(const char *filename, cl_context ctx, return 0; } - r = clBuildProgram(prog, 0, NULL, - "-Werror -I"DATADIR"/crystfel/ -cl-no-signed-zeros", - NULL, NULL); + snprintf(cflags, 1023, "-Werror "); + strncat(cflags, "-I"DATADIR"/crystfel/ ", 1023-strlen(cflags)); + strncat(cflags, "-cl-no-signed-zeros ", 1023-strlen(cflags)); + strncat(cflags, extra_cflags, 1023-strlen(cflags)); + + r = clBuildProgram(prog, 0, NULL, cflags, NULL, NULL); if ( r != CL_SUCCESS ) { ERROR("Couldn't build program '%s'\n", filename); show_build_log(prog, dev); diff --git a/src/cl-utils.h b/src/cl-utils.h index 790a149a..21a7ecd2 100644 --- a/src/cl-utils.h +++ b/src/cl-utils.h @@ -20,7 +20,8 @@ extern const char *clError(cl_int err); extern cl_device_id get_cl_dev(cl_context ctx, int n); extern cl_program load_program(const char *filename, cl_context ctx, - cl_device_id dev, cl_int *err); + cl_device_id dev, cl_int *err, + const char *extra_cflags); #endif /* CLUTILS_H */ diff --git a/src/diffraction-gpu.c b/src/diffraction-gpu.c index 2d8eda95..dd382ede 100644 --- a/src/diffraction-gpu.c +++ b/src/diffraction-gpu.c @@ -39,6 +39,7 @@ struct gpu_context cl_program prog; cl_kernel kern; cl_mem intensities; + cl_mem flags; cl_mem tt; size_t tt_size; @@ -219,6 +220,13 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, return; } + /* Flag array */ + clSetKernelArg(gctx->kern, 18, sizeof(cl_mem), &gctx->flags); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't set flag array: %s\n", clError(err)); + return; + } + /* Iterate over panels */ event = malloc(image->det->n_panels * sizeof(cl_event)); for ( p=0; p<image->det->n_panels; p++ ) { @@ -331,7 +339,7 @@ void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, /* Setup the OpenCL stuff, create buffers, load the structure factor table */ struct gpu_context *setup_gpu(int no_sfac, struct image *image, const double *intensities, unsigned char *flags, - int dev_num) + const char *sym, int dev_num) { struct gpu_context *gctx; cl_uint nplat; @@ -341,8 +349,11 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image, cl_device_id dev; size_t intensities_size; float *intensities_ptr; + size_t flags_size; + float *flags_ptr; size_t maxwgsize; int i; + char cflags[512] = ""; STATUS("Setting up GPU...\n"); @@ -396,8 +407,9 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image, } } else { for ( i=0; i<IDIM*IDIM*IDIM; i++ ) { - intensities_ptr[i] = 1e10; + intensities_ptr[i] = 1e5; } + strncat(cflags, "-DFLAT_INTENSITIES ", 511-strlen(cflags)); } gctx->intensities = clCreateBuffer(gctx->ctx, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, @@ -409,6 +421,44 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image, } free(intensities_ptr); + if ( strcmp(sym, "1") == 0 ) { + strncat(cflags, "-DPG1 ", 511-strlen(cflags)); + } else if ( strcmp(sym, "-1") == 0 ) { + strncat(cflags, "-DPG1BAR ", 511-strlen(cflags)); + } else if ( strcmp(sym, "6/mmm") == 0 ) { + strncat(cflags, "-DPG6MMM ", 511-strlen(cflags)); + } else if ( strcmp(sym, "6") == 0 ) { + strncat(cflags, "-DPG6 ", 511-strlen(cflags)); + } else if ( strcmp(sym, "6/m") == 0 ) { + strncat(cflags, "-DPG6M ", 511-strlen(cflags)); + } else { + ERROR("Sorry! Point group '%s' is not currently supported" + " on the GPU. I'm using '1' instead.\n", sym); + strncat(cflags, "-DPG1 ", 511-strlen(cflags)); + } + + /* Create a flag array */ + flags_size = IDIM*IDIM*IDIM*sizeof(cl_float); + flags_ptr = malloc(flags_size); + if ( flags != NULL ) { + for ( i=0; i<IDIM*IDIM*IDIM; i++ ) { + flags_ptr[i] = flags[i]; + } + } else { + for ( i=0; i<IDIM*IDIM*IDIM; i++ ) { + flags_ptr[i] = 1.0; + } + } + gctx->flags = clCreateBuffer(gctx->ctx, + CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, + flags_size, flags_ptr, &err); + if ( err != CL_SUCCESS ) { + ERROR("Couldn't allocate flag buffer\n"); + free(gctx); + return NULL; + } + free(flags_ptr); + gctx->tt_size = image->width*image->height*sizeof(cl_float); gctx->tt = clCreateBuffer(gctx->ctx, CL_MEM_WRITE_ONLY, gctx->tt_size, NULL, &err); @@ -419,7 +469,7 @@ struct gpu_context *setup_gpu(int no_sfac, struct image *image, } gctx->prog = load_program(DATADIR"/crystfel/diffraction.cl", gctx->ctx, - dev, &err); + dev, &err, cflags); if ( err != CL_SUCCESS ) { free(gctx); return NULL; diff --git a/src/diffraction-gpu.h b/src/diffraction-gpu.h index 7e6048b3..02115317 100644 --- a/src/diffraction-gpu.h +++ b/src/diffraction-gpu.h @@ -27,7 +27,8 @@ extern void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, int na, int nb, int nc, UnitCell *ucell); extern struct gpu_context *setup_gpu(int no_sfac, struct image *image, const double *intensities, - const unsigned char *flags, int dev_num); + const unsigned char *flags, + const char *sym, int dev_num); extern void cleanup_gpu(struct gpu_context *gctx); #else @@ -41,7 +42,8 @@ static void get_diffraction_gpu(struct gpu_context *gctx, struct image *image, static struct gpu_context *setup_gpu(int no_sfac, struct image *image, const double *intensities, - const unsigned char *flags, int dev_num) + const unsigned char *flags, + const char *sym, int dev_num) { return NULL; } diff --git a/src/indexamajig.c b/src/indexamajig.c index e99b1bd3..2fb6dd85 100644 --- a/src/indexamajig.c +++ b/src/indexamajig.c @@ -286,7 +286,7 @@ static void simulate_and_write(struct image *simage, struct gpu_context **gctx, * Unfortunately, setup has to go here since until now we don't know * enough about the situation. */ if ( (gctx != NULL) && (*gctx == NULL) ) { - *gctx = setup_gpu(0, simage, intensities, flags, gpu_dev); + *gctx = setup_gpu(0, simage, intensities, flags, sym, gpu_dev); } if ( (gctx != NULL) && (*gctx != NULL) ) { diff --git a/src/pattern_sim.c b/src/pattern_sim.c index 41dd3a26..92a0765a 100644 --- a/src/pattern_sim.c +++ b/src/pattern_sim.c @@ -539,7 +539,8 @@ int main(int argc, char *argv[]) if ( config_gpu ) { if ( gctx == NULL ) { gctx = setup_gpu(config_nosfac, &image, - intensities, flags, gpu_dev); + intensities, flags, sym, + gpu_dev); } get_diffraction_gpu(gctx, &image, na, nb, nc, cell); } else { |