my %reccontext = ();
my @linecontext= (); # last 5 lines of file
-open MARC, '<', 'incoming.marc.xml';
+my $input = shift || 'incoming.marc.xml';
+
+open MARC, '<', $input;
open my $NUMARC, '>', 'incoming.clean.marc.xml';
print $NUMARC '<?xml version="1.0" encoding="UTF-8"?>',"\n";
open my $EXMARC, '>', 'incoming.exceptions.marc.xml';
print $EXMARC '<?xml version="1.0" encoding="UTF-8"?>',"\n";
-open MARC2, '<', 'incoming.marc.xml';
+open MARC2, '<', $input;
<MARC2>;
# this is the dispatch table which drives command selection in
C => \&print_linecontext,
k => \&kill_line,
o => \&show_original,
+ m => \&merge_lines,
t => \&commit_edit,
x => \&dump_record,
q => \&quit,
}
# naked ampersands
- if ($line =~ /&/ && $line !~ /&\w{1,7};/)
+ if ($line =~ /&/ && $line !~ /&\w+?;/)
{ edit("Looks like naked ampersand", $line); next }
# subfields can't be non-alphanumeric
- if ($line =~ /<subfield code="[^[:alnum:]]"/)
- { edit("Junk in subfield", $line); next }
+ if ($line =~ /<subfield code="(.+?)"/) {
+ my $match = $1;
+ if ($match =~ /\P{IsAlnum}/) {
+ print $OUT "\n$match\n";
+ edit("Junk in subfield", $line);
+ next;
+ }
+ }
}
print $NUMARC "</xml>\n";
print_context();
while (1) {
my $line = $term->readline('yaz-cleanup>');
- if (length $line == 1)
+ if (length $line < 2)
{ next unless (defined $commands{$line}) }
if (defined $commands{$line}) {
my $term = $commands{$line}->($line_in);
%reccontext = ();
$reccount++;
} elsif ($l =~ m|</record>|) {
+ push @record, $l;
write_record($NUMARC) if $reccount;
} else {
push @record, $l;
#-----------------------------------------------------------------------------------
sub print_context {
- print "\n Tag: ",$reccontext{tag}, " Ind1:'",
+ print "\n Tag:",$reccontext{tag}, " Ind1:'",
$reccontext{ind1},"' Ind2:'", $reccontext{ind2}, "'";
print_linecontext();
return 0;
return 0;
}
+sub merge_lines {
+ my $last = pop @record;
+ $last =~ s/^\s+//;
+ $record[-1] =~ s/\n//;
+ $record[-1] = join('', $record[-1], $last);
+ my @temp = ("\n");
+ push @temp, @linecontext[0..1];
+ $temp[3] = $record[-1];
+ $temp[4] = $linecontext[4];
+ @linecontext = @temp;
+ print_linecontext();
+ return 0;
+}
+
sub kill_line {
pop @record;
$linecontext[3] = " [LINE KILLED]\n";
Commands: c Show record context ('C' for brief context)
k Kill this line (remove from record)
- m Merge indicated line with previous line [NOT IMPLEMENTED]
+ m Merge indicated line with previous line
o Show original line
t Commit changes and resume stream edit
x Write this record to the exception file instead of output