diff options
Diffstat (limited to 'scripts/stream_grep')
-rwxr-xr-x | scripts/stream_grep | 252 |
1 files changed, 252 insertions, 0 deletions
diff --git a/scripts/stream_grep b/scripts/stream_grep new file mode 100755 index 00000000..d02b6348 --- /dev/null +++ b/scripts/stream_grep @@ -0,0 +1,252 @@ +#!/usr/bin/perl -w +# +# +# Stream_grep Splits a stream file based on tag name/value +# +# Written by Andrew Aquila 2011 +# +# Version 1.1 Dec 1 2011: +# Now stdin and stdout work so piping is possible +# Changed input options to match shell script test function for numbers +# Added -v for invert-match option +# Added cell parameter matching options +# + +use Getopt::Long; +use Switch; + +my ($input_stream_name, $tag_name, $output_stream_name, $help, $v, + $lt, $le, $eq, $ge, $gt, $ne, $cell_a, $cell_b, $cell_c, $cell_al, + $cell_be, $cell_ga) ; + +my $opts = GetOptions('help|?|h' => \$help, 'i|input=s' => \$input_stream_name, + 'o|output=s' => \$output_stream_name, 'n|tag-name=s' => \$tag_name, 'v|invert-match' => \$v, + 'eq=f'=>\$eq, 'lt=f'=>\$lt, 'le=f'=>\$le, 'ge=f'=>\$ge,'gt=f'=>\$gt,'ne=f'=>\$ne, + 'g|greater-than' => \$gt, 'cell-a' => \$cell_a, 'cell-b' => \$cell_b, 'cell-c' => \$cell_c, + 'cell-alpha' => \$cell_al, 'cell-beta' => \$cell_be, 'cell-gamma' => \$cell_ga); + +#sanity check and error message +if (! $opts or defined $help) { +print STDERR "@ARGV\n"; + help_msgs(); + exit; +} + +#check if filtering a Cell parameter +my $N_cell_types = 0; +my $cell_type = 0; +if (defined $tag_name) { + $N_cell_types++; +} +if (defined $cell_a) { + $tag_name = "^Cell\ parameters\ ([0-9\.]+)\ [0-9\.]+\ [0-9\.]+"; + $cell_type = 1; + $N_cell_types++; +} +if (defined $cell_b) { + $tag_name = "^Cell\ parameters\ [0-9\.]+\ ([0-9\.]+)\ [0-9\.]+"; + $cell_type = 1; + $N_cell_types++; +} +if (defined $cell_c) { + $tag_name = "^Cell\ parameters\ [0-9\.]+\ [0-9\.]+\ ([0-9\.]+)"; + $cell_type = 1; + $N_cell_types++; +} +if (defined $cell_al) { + $tag_name = "([0-9\.]+)\ [0-9\.]+\ [0-9\.]+ deg\$"; + $cell_type = 1; + $N_cell_types++; +} +if (defined $cell_be) { + $tag_name = "[0-9\.]+\ ([0-9\.]+)\ [0-9\.]+ deg\$"; + $cell_type = 1; + $N_cell_types++; +} +if (defined $cell_ga) { + $tag_name = "[0-9\.]+\ [0-9\.]+\ ([0-9\.]+) deg\$"; + $cell_type = 1; + $N_cell_types++; +} +# A bit of error checking on number of tags +if ($N_cell_types>1) { + print STDERR "More then one tag-name/cell parameret is used!\n"; + help_msgs(); + exit; +} +if (!defined $tag_name) { + print STDERR "No tag-name/cell parameret is defined!\n"; + help_msgs(); + exit; +} + +#set type and tag value +my $tag_type = 0; +my $tag_value = 0; +my $N_tag_types = 0; +if (defined $lt) { + $tag_type = 1; + $tag_value = $lt; + $N_tag_types++; +} +if (defined $le) { + $tag_type = 2; + $tag_value = $le; + $N_tag_types++; +} +if (defined $eq) { + $tag_type = 3; + $tag_value = $eq; + $N_tag_types++; +} +if (defined $ge) { + $tag_type = 4; + $tag_value = $ge; + $N_tag_types++; +} +if (defined $gt) { + $tag_type = 5; + $tag_value = $gt; + $N_tag_types++; +} +if (defined $ne) { + $tag_type = 6; + $tag_value = $ne; + $N_tag_types++; +} + +# sanity check for xor of numeric options +if ($N_tag_types>1) { + print STDERR "More then one comparison is used!\n"; + help_msgs(); + exit; +} + +#set inverse value +if (defined $v) {$v = -1;} else {$v = 1;} + +# set input file handle +my $FHin = STDIN; +if (defined $input_stream_name) { + open( IN,"< $input_stream_name") || die "Can't open file $input_stream_name\n"; + $FHin = IN; +} + +# set output file handle +my $FHout = STDOUT; +if (defined $output_stream_name) { + open(OUT, "> $output_stream_name") || die "Can't open file $output_stream_name\n"; + $FHout = OUT; +} + +# initialize variables +my @chunk =(); +my $N_chunks = 0; +my $N_matches = 0; +my $test_chunk; +my $line; + +# loop over file +while ($line = <$FHin>) { + if ($line =~ /^-----\ Begin\ chunk -----$/) { # new chunk! + if (@chunk != 0) { # ignore if empty (i.e. first chunk) + $test_chunk = check_match(\@chunk,$tag_name,$tag_value,$tag_type,$cell_type); + if (($test_chunk * $v) > 0) { # simple test including inverse + print_chunk(\@chunk,$FHout); + $N_matches++; + } + } + $N_chunks++; + @chunk = (); # clear chunk + } + if ($N_chunks == 0) { # check if in header + print $FHout $line; # print header + } + else { + push(@chunk, $line); # add line to end of the chunk + } +} + +# don't forget the last chunk! +$test_chunk = check_match(\@chunk,$tag_name,$tag_value,$tag_type,$cell_type); +if (($test_chunk * $v) > 0) { + print_chunk(\@chunk,$FHout); + $N_matches++; +} + +# close handles if files +if (defined $input_stream_name) { + close(IN); +} +if (defined $output_stream_name) { + close(OUT); +} + +# print useful data on the old and new streams +print STDERR "I have read $N_chunks chunks.\n"; +print STDERR "Of those $N_matches matched the criteria.\n"; + +# function to print the chunk +sub print_chunk +{ + ($chunk_ref, $fh) = @_; + print $fh @{$chunk_ref}; +} + +# function to match chunk +# returns 1 if TRUE and -1 if FALSE +sub check_match +{ + my ($chunk_ref, $name, $ref_value, $eq_type, $split_type) = @_; + my $junk; + my $value; + foreach (@{$chunk_ref}) { + if($_ =~ $name) { + if ($split_type) { + $value = $1; # evaluate cell parameter + } else { + ($junk, $value) = split(/=/,$_); # evaluate everthing else + } + switch($eq_type) { + case 0 {return 1;} + case 1 {if ($value < $ref_value) {return 1;}} + case 2 {if ($value <= $ref_value) {return 1;}} + case 3 {if ($value == $ref_value) {return 1;}} + case 4 {if ($value >= $ref_value) {return 1;}} + case 5 {if ($value > $ref_value) {return 1;}} + case 6 {if ($value != $ref_value) {return 1;}} + } + } + } + return -1; # chunk is empty and nothing matches +} + +sub help_msgs +{ + print STDERR "Unknown option: @_\n" if (@_); + print STDERR "Syntax: stream_grep [options] \n"; + print STDERR "Stream_grep takes in a CrystFEL stream and outputs a stream \n"; + print STDERR "with only chunks matching the specific tag-name and tag-value.\n\n"; + print STDERR "-h, --help\t Displays this help message.\n"; + print STDERR "-i, --input=<file>\t Input CrystFEL stream filename (default is stdin)\n"; + print STDERR "-o, --output=<file>\t Output CrystFEL stream filename (default is stdout)\n"; + print STDERR "-n, --tag-name=<name>\t Name of tag to match on\n"; + print STDERR "-v, --invert-match\t Select non-matching chunks\n"; + print STDERR "\n"; + print STDERR "--cell-a\t Use the smallest unit cell length [nm] as the tag-name\n"; + print STDERR "--cell-b\t Use the middle unit cell length [nm] as the tag-name\n"; + print STDERR "--cell-c\t Use the largest unit cell length [nm] as the tag-name\n"; + print STDERR "--cell-alpha\t Use the first rotation angle [deg] as the tag-name\n"; + print STDERR "--cell-beta\t Use the second rotation angle [deg] as the tag-name\n"; + print STDERR "--cell-gamma\t Use the third rotation angle [deg] as the tag-name\n"; + print STDERR "\n"; + print STDERR "-eq <value>,\t Match all chunks of the stream with tag values equal to the given value\n"; + print STDERR "-ne <value>,\t Match all chunks of the stream with tag values not equal to the given value\n"; + print STDERR "-lt <value>,\t Match all chunks of the stream with tag values less then the given value\n"; + print STDERR "-le <value>,\t Match all chunks of the stream with tag values less then or equal to the given value\n"; + print STDERR "-gt <value>,\t Match all chunks of the stream with tag values greater then the given value\n"; + print STDERR "-ge <value>,\t Match all chunks of the stream with tag values greater then or equal to the given value\n"; + print STDERR "\n"; + print STDERR "Usage note: if --tag-name is specified without a comparison tag-value then "; + print STDERR "all chunks with the tag-name match.\n"; +} |