aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas White <taw@physics.org>2020-06-23 17:57:37 +0200
committerThomas White <taw@physics.org>2020-07-29 18:53:44 +0200
commitbed1e11f220c315e1a01b917abf200e927a1621a (patch)
tree5fca620942d0588b13d27e421cfa999d1c10075b
parent83f4aece3488f72496aad2ed2d9d559a06271557 (diff)
Path part of event expansion
-rw-r--r--libcrystfel/src/image-hdf5.c217
-rw-r--r--tests/CMakeLists.txt6
-rw-r--r--tests/ev_enum1.c95
-rw-r--r--tests/ev_enum1.h5bin0 -> 46504 bytes
-rw-r--r--tests/gen-ev-test.py23
5 files changed, 338 insertions, 3 deletions
diff --git a/libcrystfel/src/image-hdf5.c b/libcrystfel/src/image-hdf5.c
index a59154fc..82968575 100644
--- a/libcrystfel/src/image-hdf5.c
+++ b/libcrystfel/src/image-hdf5.c
@@ -1131,10 +1131,221 @@ ImageFeatureList *image_hdf5_read_peaks_hdf5(const DataTemplate *dtempl,
}
-struct event_list *image_hdf5_expand_frames(const DataTemplate *dtempl,
- const char *filename)
+/* This could be extended, later, to include patterns other than just
+ * a literal string (no placeholders) and just %. However, pattern
+ * matching is in general not that easy. */
+static char *matches_pattern(const char *name, const char *pattern,
+ const char *ev_str_old)
{
- struct event_list *master_el;
+ if ( strcmp(pattern, "%") == 0 ) {
+ char *nstr = malloc(strlen(ev_str_old)+strlen(name)+2);
+ if ( nstr == NULL ) {
+ ERROR("Couldn't allocate memory\n");
+ return NULL;
+ }
+ strcpy(nstr, ev_str_old);
+ strcat(nstr, "/");
+ strcat(nstr, name);
+ return nstr;
+ } else {
+ if ( strcmp(name, pattern) == 0 ) {
+ return strdup(ev_str_old);
+ } else {
+ return NULL;
+ }
+ }
+}
+
+
+/* Private structure, just to avoid passing char *** around */
+struct ev_list
+{
+ char **events;
+ int n_events;
+ int max_events;
+};
+
+
+static void add_to_list(struct ev_list *list, char *ev_str)
+{
+ if ( list->n_events == list->max_events ) {
+ char **new_events = realloc(list->events,
+ (list->max_events+128)*sizeof(char *));
+ if ( new_events == NULL ) return;
+ list->max_events += 128;
+ list->events = new_events;
+ }
+
+ list->events[list->n_events++] = ev_str;
+}
+
+
+static int rec_expand_paths(hid_t gh, struct ev_list *list,
+ const char *ev_str,
+ char **pattern_bits, int n_pattern_bits)
+{
+ int i;
+ H5G_info_t group_info;
+
+ if ( H5Gget_info(gh, &group_info) < 0 ) {
+ ERROR("Couldn't get group info\n");
+ return 1;
+ }
+
+ for ( i=0; i<group_info.nlinks; i++ ) {
+
+ ssize_t size;
+ char *name;
+ H5O_info_t obj_info;
+ char *ev_str_new;
+
+ size = H5Lget_name_by_idx(gh, ".", H5_INDEX_NAME,
+ H5_ITER_INC, i, NULL, 0,
+ H5P_DEFAULT);
+ if ( (size < 0) || (size > 20000) ) {
+ ERROR("Couldn't get link name\n");
+ return 1;
+ }
+
+ name = malloc(size+1);
+ if ( name == NULL ) {
+ ERROR("Couldn't allocate memory\n");
+ return 1;
+ }
+
+ if ( H5Lget_name_by_idx(gh, ".", H5_INDEX_NAME,
+ H5_ITER_INC, i, name, size+1,
+ H5P_DEFAULT) < 0 )
+ {
+ ERROR("Couldn't get name\n");
+ return 1;
+ }
+
+ ev_str_new = matches_pattern(name, pattern_bits[0],
+ ev_str);
+ if ( ev_str_new == NULL ) {
+ free(name);
+ continue;
+ }
+
+ if ( H5Oget_info_by_idx(gh, ".", H5_INDEX_NAME,
+ H5_ITER_INC, i, &obj_info, 0) )
+ {
+ ERROR("Couldn't get info\n");
+ free(name);
+ free(ev_str_new);
+ return 1;
+ }
+
+ if ( obj_info.type == H5O_TYPE_GROUP ) {
+
+ hid_t child_gh;
+
+ if ( n_pattern_bits == 0 ) {
+ ERROR("Pattern doesn't match file"
+ " (too short)\n");
+ free(name);
+ free(ev_str_new);
+ return 1;
+ }
+
+ child_gh = H5Gopen1(gh, name);
+ if ( child_gh < 0 ) {
+ ERROR("Couldn't open '%s'\n", name);
+ free(name);
+ free(ev_str_new);
+ return 1;
+ }
+
+ if ( rec_expand_paths(child_gh, list,
+ ev_str_new,
+ &pattern_bits[1],
+ n_pattern_bits - 1) )
+ {
+ free(name);
+ free(ev_str_new);
+ return 1;
+ }
+
+ free(ev_str_new);
+ H5Gclose(child_gh);
+
+ } else if ( obj_info.type == H5O_TYPE_DATASET ) {
+
+ if ( n_pattern_bits != 1 ) {
+ ERROR("Pattern doesn't match file"
+ " (too long by %i)\n",
+ n_pattern_bits);
+ free(name);
+ free(ev_str_new);
+ return 1;
+ }
+
+ add_to_list(list, ev_str_new);
+
+ }
+
+ free(name);
+
+ }
+
+ return 0;
+}
+
+
+static char **expand_paths(hid_t fh, char *pattern, int *n_evs)
+{
+ int n_sep;
+ size_t len;
+ char **pattern_bits;
+ struct ev_list list;
+ int i;
+ char *start;
+
+ if ( pattern == NULL ) return NULL;
+ if ( pattern[0] != '/' ) return NULL;
+
+ /* Chop up the pattern into path bits */
+ len = strlen(pattern);
+ n_sep = 0;
+ for ( i=0; i<len; i++ ) {
+ if ( pattern[i] == '/' ) n_sep++;
+ }
+
+ pattern_bits = malloc(n_sep*sizeof(char *));
+ if ( pattern_bits == NULL ) return NULL;
+
+ start = pattern+1;
+ for ( i=0; i<n_sep; i++ ) {
+ char *sep = strchr(start, '/');
+ assert(sep != NULL);
+ pattern_bits[i] = strndup(start, sep-start);
+ if ( pattern_bits[i] == NULL ) return NULL;
+ start = sep+1;
+ }
+
+ list.n_events = 0;
+ list.max_events = 0;
+ list.events = NULL;
+
+ rec_expand_paths(fh, &list, "", pattern_bits, n_sep);
+
+ for ( i=0; i<n_sep; i++ ) {
+ free(pattern_bits[i]);
+ }
+ free(pattern_bits);
+
+ *n_evs = list.n_events;
+ return list.events;
+}
+
+
+char **image_hdf5_expand_frames(const DataTemplate *dtempl,
+ const char *filename,
+ int *pn_frames)
+{
+ char **frames = NULL;
+ int n_frames;
hid_t fh;
fh = H5Fopen(filename, H5F_ACC_RDONLY, H5P_DEFAULT);
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
index 78d39115..8be42f28 100644
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -137,3 +137,9 @@ add_executable(evparse7 evparse7.c)
target_include_directories(evparse7 PRIVATE ${COMMON_INCLUDES})
target_link_libraries(evparse7 ${COMMON_LIBRARIES} -lhdf5)
add_test(evparse7 evparse7)
+
+add_executable(ev_enum1 ev_enum1.c)
+target_include_directories(ev_enum1 PRIVATE ${COMMON_INCLUDES})
+target_link_libraries(ev_enum1 ${COMMON_LIBRARIES} -lhdf5)
+add_test(NAME ev_enum1
+ COMMAND ev_enum1 ${CMAKE_CURRENT_SOURCE_DIR}/ev_enum1.h5)
diff --git a/tests/ev_enum1.c b/tests/ev_enum1.c
new file mode 100644
index 00000000..9c414dc5
--- /dev/null
+++ b/tests/ev_enum1.c
@@ -0,0 +1,95 @@
+/*
+ * ev_enum1.c
+ *
+ * Check that event enumeration works
+ *
+ * Copyright © 2020 Deutsches Elektronen-Synchrotron DESY,
+ * a research centre of the Helmholtz Association.
+ *
+ * Authors:
+ * 2020 Thomas White <taw@physics.org>
+ *
+ * This file is part of CrystFEL.
+ *
+ * CrystFEL is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * CrystFEL is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with CrystFEL. If not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <hdf5.h>
+
+#include "../libcrystfel/src/image-hdf5.c"
+
+int main(int argc, char *argv[])
+{
+ hid_t fh;
+ char **event_ids;
+ int n_event_ids;
+ int i;
+
+ fh = H5Fopen(argv[1], H5F_ACC_RDONLY, H5P_DEFAULT);
+ if ( fh < 0 ) {
+ ERROR("Couldn't open file\n");
+ return 1;
+ }
+
+ event_ids = expand_paths(fh,
+ "/data/panelA/%/panel_data1t/%/array",
+ &n_event_ids);
+
+ if ( event_ids == NULL ) {
+ STATUS("event_ids = NULL\n");
+ return 1;
+ }
+
+ if ( n_event_ids != 4 ) {
+ STATUS("Number of event IDs = %i\n", n_event_ids);
+ return 1;
+ }
+
+ if ( strcmp(event_ids[0], "/ev_1/dataABCset") != 0 ) {
+ STATUS("Wrong event id '%s'\n", event_ids[0]);
+ return 1;
+ }
+
+ if ( strcmp(event_ids[1], "/ev_2/dataDEFset") != 0 ) {
+ STATUS("Wrong event id '%s'\n", event_ids[1]);
+ return 1;
+ }
+
+ if ( strcmp(event_ids[2], "/ev_3/dataGHIset") != 0 ) {
+ STATUS("Wrong event id '%s'\n", event_ids[2]);
+ return 1;
+ }
+
+ if ( strcmp(event_ids[3], "/ev_5/dataNOPset") != 0 ) {
+ STATUS("Wrong event id '%s'\n", event_ids[3]);
+ return 1;
+ }
+
+ for ( i=0; i<n_event_ids; i++ ) {
+ free(event_ids[i]);
+ }
+ free(event_ids);
+
+ H5Fclose(fh);
+
+ return 0;
+}
diff --git a/tests/ev_enum1.h5 b/tests/ev_enum1.h5
new file mode 100644
index 00000000..448fa539
--- /dev/null
+++ b/tests/ev_enum1.h5
Binary files differ
diff --git a/tests/gen-ev-test.py b/tests/gen-ev-test.py
new file mode 100644
index 00000000..dc7c0f0a
--- /dev/null
+++ b/tests/gen-ev-test.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python
+
+import h5py
+import numpy
+
+blank = numpy.zeros((1,1), dtype=float)
+
+with h5py.File('ev_enum1.h5', 'w') as fh:
+ fh.create_dataset('/data/panelA/ev_1/panel_data1t/dataABCset/array', data=blank)
+ fh.create_dataset('/data/panelA/ev_2/panel_data1t/dataDEFset/array', data=blank)
+ fh.create_dataset('/data/panelA/ev_3/panel_data1t/dataGHIset/array', data=blank)
+ fh.create_dataset('/data/panelA/ev_4/panel_data1t/dataKLMset/nomatch', data=blank)
+ fh.create_dataset('/data/panelA/ev_5/panel_data1t/dataNOPset/array', data=blank)
+ fh.create_dataset('/data/panelB/ev_1/panel_data1t/dataABCset/array', data=blank)
+ fh.create_dataset('/data/panelB/ev_2/panel_data1t/dataDEFset/array', data=blank)
+ fh.create_dataset('/data/panelB/ev_3/panel_data1t/dataGHIset/array', data=blank)
+ fh.create_dataset('/data/panelB/ev_4/panel_data1t/dataKLMset/array', data=blank)
+ fh.create_dataset('/data/panelB/ev_5/panel_data1t/dataNOPset/array', data=blank)
+ fh.create_dataset('/data/panelB/ev_1/dataABCset/array', data=blank)
+ fh.create_dataset('/data/panelB/ev_2/dataDEFset/array', data=blank)
+ fh.create_dataset('/data/panelB/ev_3/dataGHIset/array', data=blank)
+ fh.create_dataset('/data/panelB/ev_4/dataKLMset/array', data=blank)
+ fh.create_dataset('/data/panelB/ev_5/dataNOPset/array', data=blank)