9 my $term = new Term::ReadLine 'yaz-cleanup';
10 my $OUT = $term->OUT || \*STDOUT;
18 my %commands = ( '?' => \&help,
26 open MARC, '<', 'incoming.marc.xml';
27 open NUMARC, '>', 'incoming.clean.marc.xml';
29 my $line1 = getline();
31 while (my $line2 = getline()) {
32 # catch empty datafield elements
33 if ($line1 =~ m/<datafield tag="..." ind1="." ind2=".">/) {
34 if ($line2 =~ m|</datafield>|) {
35 print "Empty datafield scrubbed at line $count\n";
41 # clean misplaced dollarsigns
42 if ($line1 =~ m|<subfield code="\$">c?\d+\.\d{2}|) {
43 $line1 =~ s|"\$">c?(\d+\.\d{2})|"c">\$$1|;
44 print "Dollar sign in subfield code corrected at line $count\n";
47 # clean up tags with spaces in them
48 $line1 =~ s/tag=" /tag="00/g;
49 $line1 =~ s/tag=" /tag="0/g;
50 $line1 =~ s/tag="-/tag="0/g;
51 $line1 =~ s/tag="(\d\d) /tag="0$1/g;
54 edit("Looks like naked ampersand", $line1)
55 if ($line1 =~ /&/ && $line1 !~ /&\w{1,7};/);
57 # subfields can't be non-alphanumeric
58 die "Junk in subfield at line $count: $line1"
59 if $line1 =~ /<subfield code="[^[:alnum:]]"/;
68 my ($msg, $line_in) = @_;
69 print $OUT "\n".$msg, " at line $count:\n";
70 print $OUT "\t$line_in\n";
72 my $line = $term->readline('yaz-cleanup>');
78 print $OUT "\n", join(' ','',@context[0..2]);
79 print $OUT '==>', $context[3];
80 print $OUT ' ', $context[4],"\n";
92 shift @context if (@context > 5);
100 Enter a replacement for this line, a blank line to dump this line, or a command.
101 Commands: c Show line context
102 d Dump this record (redirect to exceptions file)