#!/usr/bin/perl
+
+# Copyright 2009-2012, Equinox Software, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
use strict;
use warnings;
use open ':utf8';
open FP, '<', shift or die "Can't open matchset file: $!\n";
my %exclude = ();
while (<FP>) {
+ chomp;
my ($lead,$sub) = split /\t/;
+ $sub =~ s/\s//g; # any whitespace is extraneous
$exclude{$sub} = 1 unless ($sub < $conf->{lowerbound});
}
close FP;
# strip exclusions from marcxml file
-open MI, '<', $conf->{marc} or die "Can't open input file: $!\n";
+open MI, '<', $conf->{input} or die "Can't open input file: $!\n";
open MO, '>', $conf->{output} or die "Can't open output file: $!\n";
while (<MI>) {
- m/tag="$conf->{tag}",+?<subfield code="$conf->{subfield}">(\d+)</
- print MO unless $exclude{$1};
+ m/tag="$conf->{tag}".+?<subfield code="$conf->{subfield}">(\d+)</;
+ next unless defined $1;
+ if ($conf->{reverse}) {
+ print MO if $exclude{$1};
+ } else {
+ print MO unless $exclude{$1};
+ }
}
binmode(STDIN, ':utf8');
my $rc = GetOptions( $c,
- 'lowerbound|l=i'
+ 'lowerbound|l=i',
'input|i=s',
'output|o=s',
'tag|t=i',
'subfield|s=s',
+ 'reverse|r',
'help|h',
);
show_help() unless $rc;
sub show_help {
print <<HELP;
-Usage is: extract_loadset -l BOUND -i MARC -o OUTPUT MATCHSET
+Usage is: extract_loadset -l BOUND -i INPUTXML -o OUTPUTXML MATCHSET
--lowerbound -l Lowest record ID which will be included in the loadset
--input -i MARCXML input file
--output -o MARCXML output file
+ --tag -t MARC tag to use as identifier (default: 903)
+ --subfield -s Subfield of --tag argument (default: 'a')
+ --reverse -r Output subordinate bibs rather than lead bibs
HELP
exit;
}