From 7b652e662ee5f804368620a6bac0821bd6e1999f Mon Sep 17 00:00:00 2001 From: Shawn Boyette Date: Tue, 9 Dec 2008 22:38:41 +0000 Subject: [PATCH] marc-cleanup: exception dumping is better now datafield regex has been modified to handle greater than 2 indicators --- marc-cleanup | 38 +++++++++++++++++++++++++------------- 1 files changed, 25 insertions(+), 13 deletions(-) diff --git a/marc-cleanup b/marc-cleanup index cb4fda1..b90ff48 100755 --- a/marc-cleanup +++ b/marc-cleanup @@ -17,13 +17,14 @@ initialize($conf); populate_trash() if ($conf->{trashfile}); # set up files, since everything appears to be in order -my $EXMARC = ''; -open MARC, '<:utf8', (shift || 'incoming.marc.xml'); -open my $NUMARC, '>:utf8', $conf->{output}; +open MARC, '<:utf8', (shift || 'incoming.marc.xml') + or die "Can't open input file $!\n"; +open my $NUMARC, '>:utf8', $conf->{output} + or die "Can't open output file $!\n"; open my $OLD2NEW, '>', 'old2new.map' if ($conf->{'renumber-from'} and $conf->{'original-subfield'}); -#print $NUMARC '',"\n"; -#print $NUMARC '',"\n"; +my $EXMARC = 'EX'; + my @record = (); # current record storage my %recmeta = (); # metadata about current record @@ -112,7 +113,7 @@ sub do_automated_cleanups { stow_record_data(); # catch empty datafield elements - if ($record[$ptr] =~ m//) { + if ($record[$ptr] =~ m/|) { my @a = @record[0 .. $ptr - 1]; my @b = @record[$ptr + 2 .. $#record]; @@ -179,10 +180,15 @@ sub do_automated_cleanups { sub stow_record_data { # get tag data if we're looking at it - if ($record[$ptr] =~ m//) { + if ($record[$ptr] =~ m/{'original-subfield'} and $1 == $conf->{'original-tag'}) { @@ -271,10 +277,10 @@ sub write_record { my ($FH) = @_; my $trash = $conf->{trash}; - # uninitialized $EXMARC - if ($FH eq '') { + if ($FH eq 'EX') { $EXMARC = undef; - open $EXMARC, '>:utf8', $conf->{exception}; + open $EXMARC, '>:utf8', $conf->{exception} + or die "Can't open exception file $!\n"; $FH = $EXMARC; } @@ -331,8 +337,13 @@ sub write_record { } } - # and finally, actually write the record + # actually write the record print $FH @record,"\n"; + + # if we were dumping to exception file, nuke the record and set ptr + # to terminate processing loop + @record = ('a'); + $ptr = 0; } sub print_fullcontext { @@ -460,6 +471,7 @@ sub display_lines { sub dump_record { my (@explanation) = @_; + print $OUT @explanation; $recmeta{explanation} = join(' ', 'Tag', $recmeta{tag}, @explanation); write_record($EXMARC); return 1; -- 1.7.2.5