7 #use MARC::File::XML ( BinaryEncoding => 'utf-8' );
9 # configuration hashref
15 open F, "<", $conf->{idfile};
23 open $M, '<:utf8', $conf->{marcfile};
24 open $I, '>:utf8', $conf->{'output-import'};
25 open $S, '>:utf8', $conf->{'output-shelve'};
28 my $tag = $conf->{tag};
29 my $sub = $conf->{subfield};
31 /tag="$tag" ind1=" " ind2=" ">.*?<subfield code="$sub">(\d+)</;
32 if ($conf->{incoming}) {
33 print $S $_ if ($id{$1});
34 print $I $_ unless ($id{$1});;
36 print $S $_ unless ($id{$1});
37 print $I $_ if ($id{$1});;
41 unless ($conf->{count} % 100) {
42 print STDERR "\rProcessed: ",$conf->{count};
48 Performs boring script initialization. Handles argument parsing,
57 # set mode on existing filehandles
58 binmode(STDIN, ':utf8');
60 my $rc = GetOptions( $c,
71 show_help() unless $rc;
72 show_help() if ($c->{help});
74 $c->{'incoming-tag'} = 903;
75 $c->{'incoming-subfield'} = 'a';
76 $c->{'incumbent-tag'} = 901;
77 $c->{'incumbent-subfield'} = 'c';
78 my @keys = keys %{$c};
79 unless ($c->{incoming} or $c->{incumbent}) {
80 print "One of --incoming or --incumbent is required.\n";
83 if ($c->{incoming} and $c->{incumbent}) {
84 print "Only one of --incoming or --incumbent can be specified.\n";
87 for my $key ('idfile', 'marcfile', 'output-import', 'output-shelved')
88 { push @missing, $key unless $c->{$key} }
90 print "Required option: ", join(', ', @missing), " missing!\n";
99 Display usage message when things go wrong
106 The purpose of this utility is to split a MARCXML file in twain,
107 producing a set of records which will imported into Evergreen, and a
108 set of records which will not.
110 Usage is: $0 [REQUIRED ARGS]
112 --incoming \\___ One (and only one) of these two must
113 --incumbent / be specified
115 If --incoming is specified, the record ids in the file specified by
116 --idfile will be used as EXCLUSION data. That is, the given record
117 ids will be treated as records which match incumbent records and are
118 being compressed into existing data, and so WILL NOT be
119 imported. The --output-import file will contain records whose ids DO
120 NOT occur in --idfile; --output-shelve will contain the records
123 If --incumbent is specified, the reverse occurs.
125 --idfile -i File of record ids to use as source for matchpoints
126 --marcfile -m MARCXML source file
127 --output-import -oi Output MARCXML file for records to be imported
128 --output-shelve -os Output MARCXML file for records to be ignored
131 --tag -t MARC tag to use as matchpoint (default 903 for incoming,
133 --subfield -s Subfield of tag to use (default 'c' for incoming, 'a'