From 4c1ae3ca6b887181841f8af05a1446de534caf82 Mon Sep 17 00:00:00 2001 From: Shawn Boyette Date: Wed, 8 Apr 2009 04:46:55 +0000 Subject: [PATCH] improved trash tag handling by never putting them in the record to begin with (much less code as well) --- marc_cleanup | 58 +++++++++++++++++++++++++++------------------------------- 1 files changed, 27 insertions(+), 31 deletions(-) diff --git a/marc_cleanup b/marc_cleanup index 57c5826..ee09e3c 100755 --- a/marc_cleanup +++ b/marc_cleanup @@ -127,6 +127,7 @@ print $OUT "\nDone. \n"; sub do_automated_cleanups { $ptr = 0; until ($ptr == $#record) { + # catch empty datafield elements if ($record[$ptr] =~ m/|) { @@ -181,12 +182,6 @@ sub do_automated_cleanups { message("Dollar sign corrected"); } - # clean up tags with spaces in them - $record[$ptr] =~ s/tag=" /tag="00/g; - $record[$ptr] =~ s/tag=" /tag="0/g; - $record[$ptr] =~ s/tag="-/tag="0/g; - $record[$ptr] =~ s/tag="(\d\d) /tag="0$1/g; - # automatable subfield maladies $record[$ptr] =~ s/code=" ">c/code="c">/; $record[$ptr] =~ s/code=" ">\$/code="c">\$/; @@ -287,21 +282,43 @@ to the driver loop. sub buildrecord { my $l = ''; + my $istrash = 0; + my $trash = $conf->{trash}; + $l = while (defined $l and $l !~ //); return $l unless defined $l; @record = (); %recmeta = (); $conf->{ricount}++; - until ($l =~ m||) - { push @record, $l; $l = ; } + until ($l =~ m||) { + # clean up tags with spaces in them + $l =~ s/tag=" /tag="00/g; + $l =~ s/tag=" /tag="0/g; + $l =~ s/tag="-/tag="0/g; + $l =~ s/tag="(\d\d) /tag="0$1/g; + + # excise unwanted tags + if ($istrash) { + if ($l =~ m|; next } + } + if ($l =~ m/has($1) or ($conf->{autoscrub} and $1 =~ /\D/)) + { $istrash = 1; next } + } + + push @record, $l; + $l = ; + } push @record, $l; return 1; } sub write_record { my ($FH) = @_; - my $trash = $conf->{trash}; if ($FH eq 'EX') { $EXMARC = undef; @@ -314,27 +331,6 @@ sub write_record { print $FH '\n" if(defined $recmeta{explanation}); - # excise unwanted tags - if (defined $trash or $conf->{autoscrub}) { - my @trimmed = (); - my $istrash = 0; - for my $line (@record) { - if ($istrash) { - $istrash = 0 if $line =~ m|has($tag) or ($conf->{autoscrub} and $tag =~ /\D/)) { - $istrash = 1; - next - } - } - push @trimmed, $line; - } - @record = @trimmed; - } - # add 903(?) with new record id my $renumber = ''; if ($conf->{'renumber-from'}) { @@ -597,7 +593,7 @@ sub initialize { $c->{window} = 5; if ($c->{trashfile}) { - $c->{trash} = Equinox::Migration::SimpleTagList->new($conf->{trashfile}) + $c->{trash} = Equinox::Migration::SimpleTagList->new(file => $conf->{trashfile}) } else { $c->{trash} = Equinox::Migration::SimpleTagList->new; } -- 1.7.2.5