From 4d1ec7e57f388288deae342760ab8924d7f54068 Mon Sep 17 00:00:00 2001 From: Shawn Boyette Date: Fri, 3 Oct 2008 05:30:31 +0000 Subject: [PATCH] quotable sub args --- marc-cleanup | 43 +++++++++++++++++++++++++++++++++++++++---- 1 files changed, 39 insertions(+), 4 deletions(-) diff --git a/marc-cleanup b/marc-cleanup index 9a06e40..3a22e5a 100755 --- a/marc-cleanup +++ b/marc-cleanup @@ -13,6 +13,7 @@ my $OUT = $term->OUT || \*STDOUT; my $count = 0; my $reccount = 0; +my $oreccount = 0; my $line = ''; my @record = (); # current record storage @@ -62,15 +63,26 @@ while (my $line = getline()) { if ($line =~ m||) { if ($record[-2] =~ m//) { pop @record; pop @record; - print $OUT "\rEmpty datafield scrubbed at line $count\n"; + message("Empty datafield scrubbed"); next; } } + # pad short leaders + if ($line =~ m|(.+?)|) { + my $leader = $1; + if (length $leader < 24) { + $leader .= ' ' x (20 - length($leader)); + $leader .= "4500"; + $line = "$leader\n"; + message("Short leader padded"); + } + } + # clean misplaced dollarsigns if ($line =~ m|c?\d+\.\d{2}|) { $line =~ s|"\$">c?(\d+\.\d{2})|"c">\$$1|; - print $OUT "\rDollar sign in subfield code corrected at line $count\n"; + message("Dollar sign corrected"); } # clean up tags with spaces in them @@ -86,6 +98,9 @@ while (my $line = getline()) { $reccontext{ind2} = $3; } + # and stow line back in record + $record[-1] = $line; + # naked ampersands if ($line =~ /&/ && $line !~ /&\w+?;/) { edit("Naked ampersand", $line); next } @@ -121,7 +136,7 @@ Handles the Term::ReadLine loop sub edit { my ($msg, $line_in) = @_; - print $OUT "\r".$msg, " at line $count (record $reccount)\n"; + message($msg); print_context(); while (1) { @@ -173,6 +188,7 @@ sub getline { sub write_record { my ($FH) = @_; + $oreccount++ if ($FH eq $NUMARC); print $FH '\n" if(defined $reccontext{explanation}); print $FH @record; @@ -184,12 +200,31 @@ sub update_linecontext { shift @linecontext if (@linecontext > 5); } +sub message { + my ($msg) = @_; + print $OUT "\r$msg at record $reccount/",$oreccount + 1,"\n"; + +} + #----------------------------------------------------------------------------------- # command routines #----------------------------------------------------------------------------------- sub substitute { - my ($line_in, $ofrom, $to) = @_; + my ($line_in, @chunks) = @_; + my $ofrom = shift @chunks; + if ($ofrom =~ /^'/ or !@chunks) { + until ($ofrom =~ /'$/) + { $ofrom .= join(' ','',shift @chunks) } + $ofrom =~ s/^'//; $ofrom =~ s/'$//; + } + my $to = shift @chunks; + if ($to =~ /^'/) { + until ($to =~ /'$/ or !@chunks) + { $to .= join(' ','',shift @chunks) } + $to =~ s/^'//; $to =~ s/'$//; + } + my $from = ''; for my $char (split(//,$ofrom)) { $char = "\\" . $char if ($char =~ /\W/); -- 1.7.2.5