#!/usr/bin/perl
+
+# Copyright 2009-2012, Equinox Software, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
use strict;
use warnings;
use open ':utf8';
my $conf = {}; # configuration hashref
initialize($conf);
-open FP, '<', shift or die "Can't open input file: $!\n";
-
+# build exclusion hash
+open FP, '<', shift or die "Can't open matchset file: $!\n";
+my %exclude = ();
while (<FP>) {
+ chomp;
my ($lead,$sub) = split /\t/;
- print $sub if $sub > 6999999)
+ $sub =~ s/\s//g; # any whitespace is extraneous
+ $exclude{$sub} = 1 unless ($sub < $conf->{lowerbound});
+}
+close FP;
+
+# strip exclusions from marcxml file
+open MI, '<', $conf->{input} or die "Can't open input file: $!\n";
+open MO, '>', $conf->{output} or die "Can't open output file: $!\n";
+while (<MI>) {
+ m/tag="$conf->{tag}".+?<subfield code="$conf->{subfield}">(\d+)</;
+ next unless defined $1;
+ if ($conf->{reverse}) {
+ print MO if $exclude{$1};
+ } else {
+ print MO unless $exclude{$1};
+ }
}
+
sub initialize {
my ($c) = @_;
my @missing = ();
binmode(STDIN, ':utf8');
my $rc = GetOptions( $c,
- 'filter|f=i'
+ 'lowerbound|l=i',
+ 'input|i=s',
'output|o=s',
+ 'tag|t=i',
+ 'subfield|s=s',
+ 'reverse|r',
'help|h',
);
show_help() unless $rc;
show_help() if ($c->{help});
+ $conf->{tag} = $conf->{tag} || 903;
+ $conf->{subfield} = $conf->{subfield} || 'a';
+
my @keys = keys %{$c};
show_help() unless (@ARGV and @keys);
- for my $key ('output', 'filter')
+ for my $key ('output', 'lowerbound', 'input')
{ push @missing, $key unless $c->{$key} }
if (@missing) {
print "Required option: ", join(', ', @missing), " missing!\n";
sub show_help {
print <<HELP;
-Usage is: extract_loadset [ARGS] INPUTFILE
+Usage is: extract_loadset -l BOUND -i INPUTXML -o OUTPUTXML MATCHSET
- --filter -f Record ID lower bound for
- --output -o
+ --lowerbound -l Lowest record ID which will be included in the loadset
+ --input -i MARCXML input file
+ --output -o MARCXML output file
+ --tag -t MARC tag to use as identifier (default: 903)
+ --subfield -s Subfield of --tag argument (default: 'a')
+ --reverse -r Output subordinate bibs rather than lead bibs
HELP
exit;
}