X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=extract_loadset;h=61326bdd0c575e3dfd069b18139926353c63aa8b;hp=9b0be3c68e23dddc9d61e05d21f69183da4eb71a;hb=ed724b63f7d32cf5bb1a6e7b4c915d056413e899;hpb=072efb63a94c69c32a84c97b92d1d30441ab72f2 diff --git a/extract_loadset b/extract_loadset old mode 100644 new mode 100755 index 9b0be3c..61326bd --- a/extract_loadset +++ b/extract_loadset @@ -1,4 +1,21 @@ #!/usr/bin/perl + +# Copyright 2009-2012, Equinox Software, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + use strict; use warnings; use open ':utf8'; @@ -8,12 +25,31 @@ use Getopt::Long; my $conf = {}; # configuration hashref initialize($conf); -open FP, '<', shift or die "Can't open input file: $!\n"; - +# build exclusion hash +open FP, '<', shift or die "Can't open matchset file: $!\n"; +my %exclude = (); while () { + chomp; + my ($lead,$sub) = split /\t/; + $sub =~ s/\s//g; # any whitespace is extraneous + $exclude{$sub} = 1 unless ($sub < $conf->{lowerbound}); +} +close FP; +# strip exclusions from marcxml file +open MI, '<', $conf->{input} or die "Can't open input file: $!\n"; +open MO, '>', $conf->{output} or die "Can't open output file: $!\n"; +while () { + m/tag="$conf->{tag}".+?(\d+){reverse}) { + print MO if $exclude{$1}; + } else { + print MO unless $exclude{$1}; + } } + sub initialize { my ($c) = @_; my @missing = (); @@ -22,15 +58,23 @@ sub initialize { binmode(STDIN, ':utf8'); my $rc = GetOptions( $c, - 'prefix|p=s', + 'lowerbound|l=i', + 'input|i=s', + 'output|o=s', + 'tag|t=i', + 'subfield|s=s', + 'reverse|r', 'help|h', ); show_help() unless $rc; show_help() if ($c->{help}); + $conf->{tag} = $conf->{tag} || 903; + $conf->{subfield} = $conf->{subfield} || 'a'; + my @keys = keys %{$c}; show_help() unless (@ARGV and @keys); - for my $key ('prefix') + for my $key ('output', 'lowerbound', 'input') { push @missing, $key unless $c->{$key} } if (@missing) { print "Required option: ", join(', ', @missing), " missing!\n"; @@ -40,7 +84,14 @@ sub initialize { sub show_help { print <