From b664ee171f477a69133212ce37a4c33177b3bedb Mon Sep 17 00:00:00 2001 From: Shawn Boyette Date: Fri, 19 Sep 2008 22:05:40 +0000 Subject: [PATCH] prelim readline work --- yaz-cleanup | 84 +++++++++++++++++++++++++++++++++++++++++++++------------- 1 files changed, 65 insertions(+), 19 deletions(-) diff --git a/yaz-cleanup b/yaz-cleanup index 1f9d93d..091682c 100755 --- a/yaz-cleanup +++ b/yaz-cleanup @@ -3,34 +3,37 @@ use strict; use warnings; -my $skip = shift || 0; +use Getopt::Long; +use Term::ReadLine; + +my $term = new Term::ReadLine 'yaz-cleanup'; +my $OUT = $term->OUT || \*STDOUT; + my $count = 0; -$| = 1; +my $line = ''; + +my @record = (); +my @context= (); + +my %commands = ( '?' => \&help, + h => \&help, + c => \&print_context, + d => \&dump_record, + q => \&quit, + ); + open MARC, '<', 'incoming.marc.xml'; open NUMARC, '>', 'incoming.clean.marc.xml'; -if ($skip) { - until ($count == ($skip - 1)) { - my $t = ; - print NUMARC $t; - $count++; - printf("\rSpinning on to record %s (%2.2f%%)", $skip, ($count / $skip *100)) - unless ($count % 1000); - } - print "\nScrubbing resumes...\n" if $skip; -} - -my $line1 = ; +my $line1 = getline(); -while (my $line2 = ) { - $count++; +while (my $line2 = getline()) { # catch empty datafield elements if ($line1 =~ m//) { if ($line2 =~ m||) { print "Empty datafield scrubbed at line $count\n"; - $line1 = ; - $count++; + $line1 = getline(); next; } } @@ -48,7 +51,7 @@ while (my $line2 = ) { $line1 =~ s/tag="(\d\d) /tag="0$1/g; # naked ampersands - die "Looks like naked ampersand at line $count: $line1" + edit("Looks like naked ampersand", $line1) if ($line1 =~ /&/ && $line1 !~ /&\w{1,7};/); # subfields can't be non-alphanumeric @@ -60,3 +63,46 @@ while (my $line2 = ) { $line1 = $line2; } print NUMARC $line1; + +sub edit { + my ($msg, $line_in) = @_; + print $OUT "\n".$msg, " at line $count:\n"; + print $OUT "\t$line_in\n"; + while (1) { + my $line = $term->readline('yaz-cleanup>'); + $commands{$line}->(); + } +} + +sub print_context { + print $OUT "\n", join(' ','',@context[0..2]); + print $OUT '==>', $context[3]; + print $OUT ' ', $context[4],"\n"; +} + +sub getline { + my $l = ; + $count++; + if (defined $l) { + if ($l =~ //) + { @record = ($l) } + else + { push @record, $l } + push @context, $l; + shift @context if (@context > 5); + } + return $l; +} + +sub help { +print $OUT <