X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=extract_loadset;h=61326bdd0c575e3dfd069b18139926353c63aa8b;hp=a4f41accb46024e3c59d37c41c042c06ffcf704c;hb=f82f5ee9f659cdb5b9b3e77cea082f8f00381097;hpb=6891bd6d84b8289beabbf609643deb75467a3026 diff --git a/extract_loadset b/extract_loadset index a4f41ac..61326bd 100755 --- a/extract_loadset +++ b/extract_loadset @@ -1,4 +1,21 @@ #!/usr/bin/perl + +# Copyright 2009-2012, Equinox Software, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + use strict; use warnings; use open ':utf8'; @@ -12,17 +29,24 @@ initialize($conf); open FP, '<', shift or die "Can't open matchset file: $!\n"; my %exclude = (); while () { + chomp; my ($lead,$sub) = split /\t/; + $sub =~ s/\s//g; # any whitespace is extraneous $exclude{$sub} = 1 unless ($sub < $conf->{lowerbound}); } close FP; # strip exclusions from marcxml file -open MI, '<', $conf->{marc} or die "Can't open input file: $!\n"; +open MI, '<', $conf->{input} or die "Can't open input file: $!\n"; open MO, '>', $conf->{output} or die "Can't open output file: $!\n"; while () { - m/tag="$conf->{tag}",+?(\d+){tag}".+?(\d+){reverse}) { + print MO if $exclude{$1}; + } else { + print MO unless $exclude{$1}; + } } @@ -39,6 +63,7 @@ sub initialize { 'output|o=s', 'tag|t=i', 'subfield|s=s', + 'reverse|r', 'help|h', ); show_help() unless $rc; @@ -66,6 +91,7 @@ Usage is: extract_loadset -l BOUND -i INPUTXML -o OUTPUTXML MATCHSET --output -o MARCXML output file --tag -t MARC tag to use as identifier (default: 903) --subfield -s Subfield of --tag argument (default: 'a') + --reverse -r Output subordinate bibs rather than lead bibs HELP exit; }