# edit(), below
my %commands = ( c => \&print_context,
C => \&print_linecontext,
- k => \&kill_line,
o => \&show_original,
+ f => \&flip_lines,
+ k => \&kill_line,
m => \&merge_lines,
+ n => \&next_line,
+ p => \&prev_line,
s => \&substitute,
t => \&commit_edit,
x => \&dump_record,
my $sidx = 0;
while ( buildrecord() ) {
- print "\rWorking... ", $spinner[$sidx];
- $sidx = ($sidx == $#spinner) ? 0 : $sidx + 1;
+ unless ($conf->{ricount} % 100) {
+ print "\rWorking... ", $spinner[$sidx];
+ $sidx = ($sidx == $#spinner) ? 0 : $sidx + 1;
+ }
do_automated_cleanups();
}
$recptr++;
}
-
write_record($NUMARC);
}
print $NUMARC "</collection>\n";
$recptr = 0;
until ($recptr == $#record) {
# catch empty datafield elements
- if ($record[$recptr] =~ m|</datafield>|) {
- if ($record[$recptr + 1] =~ m/<datafield tag="..." ind1="." ind2=".">/) {
- my @a = @record[0, $recptr - 1];
- my @b = @record[$recptr + 1, $#record];
+ if ($record[$recptr] =~ m/<datafield tag="..." ind1="." ind2=".">/) {
+ if ($record[$recptr + 1] =~ m|</datafield>|) {
+ my @a = @record[0 .. $recptr - 1];
+ my @b = @record[$recptr + 2 .. $#record];
@record = (@a, @b);
message("Empty datafield scrubbed");
$recptr = 0;
}
}
# and quasi-empty subfields
- if ($record[$recptr] =~ m|<subfield code="\s*">\s*</sub|) {
- delete $record[$recptr];
- message("Empty subfield scrubbed");
- $recptr = 0;
- next;
+ if ($record[$recptr] =~ m|<subfield code="(.*?)">(.*?)</sub|) {
+ my $code = $1; my $content = $2;
+ if ($code =~ /\W/ and ($content =~ /\s+/ or $content eq '')) {
+ my @a = @record[0 .. $recptr - 1];
+ my @b = @record[$recptr + 1 .. $#record];
+ @record = (@a, @b);
+ message("Empty subfield scrubbed");
+ $recptr = 0;
+ next;
+ }
}
$recptr++;
}
message("Short leader padded");
}
}
+ if ($record[$recptr] =~ m|<controlfield tag="008">(.+?)</control|) {
+ #pad short 008
+ my $content = $1;
+ if (length $content < 40) {
+ $content .= ' ' x (40 - length($content));
+ $record[$recptr] = "<controlfield tag=\"008\">$content</controlfield>\n";
+ message("Short 008 padded");
+ }
+ }
# clean misplaced dollarsigns
if ($record[$recptr] =~ m|<subfield code="\$">c?\d+\.\d{2}|) {
sub edit {
my ($msg) = @_;
- my $trash = $conf->{trash};
- return if $trash->{ $recmeta{tag} };
- message($msg);
+ return if $conf->{trash}{ $recmeta{tag} };
+ message($msg, 1);
print_context();
+ # stow original problem line
+ $conf->{origline} = $record[$recptr];
+
while (1) {
my $line = $term->readline('marc-cleanup>');
my @chunks = split /\s+/, $line;
=cut
sub buildrecord {
- my $l = <MARC>;
+ my $l = '';
+ $l = <MARC> while (defined $l and $l !~ /<record>/);
return $l unless defined $l;
-
- $l = <MARC> until ($l =~ /<record>/);
@record = ($l);
%recmeta = ();
$conf->{ricount}++;
print $FH "</record>\n";
}
+sub print_context {
+ print " Tag:",$recmeta{tag}, " Ind1:'",
+ $recmeta{ind1},"' Ind2:'", $recmeta{ind2}, "'\n";
+ print_linecontext();
+ return 0;
+}
+
+sub print_linecontext {
+ my $low = ($recptr - 3 < 0) ? 0 : $recptr - 3;
+ print $OUT ' |', $record[$_] for ($low .. $recptr - 1);
+ print $OUT '==> |', $record[$recptr];
+ print $OUT ' |', $record[$recptr + 1], "\n";
+ return 0;
+}
+
sub message {
- my ($msg) = @_;
- print $OUT "\r", ' ' x 72, "\n";
- print $OUT "$msg at ",$conf->{ricount},"/",$conf->{rocount} + 1,"\n";
+ my ($msg, $new) = @_;
+ print $OUT "\r", ' ' x 72, "\n" if $new;
+ print $OUT "\r$msg at ",$conf->{ricount},"/",$conf->{rocount} + 1,"\n";
}
}
sub merge_lines {
+ # remove <subfield stuff; extract (probably wrong) subfield code
$record[$recptr] =~ s/^\s*<subfield code="(.*?)">//;
+ # and move to front of line
$record[$recptr] = join(' ', $1 , $record[$recptr]);
- $record[$recptr - 1] =~ s|<subfield>\n||;
+ # tear off trailing subfield tag from preceeding line
+ $record[$recptr - 1] =~ s|</subfield>\n||;
+ # join current line onto preceeding line
$record[$recptr - 1] = join('', $record[$recptr - 1], $record[$recptr]);
+ # erase current line
+ my @a = @record[0 .. $recptr - 1];
+ my @b = @record[$recptr + 1 .. $#record];
+ @record = (@a, @b);
+ # move record pointer to previous line
+ prev_line();
print_linecontext();
return 0;
}
sub kill_line {
- delete $record[$recptr];
+ my @a = @record[0 .. $recptr - 1];
+ my @b = @record[$recptr + 1 .. $#record];
+ @record = (@a, @b);
print_linecontext();
return 0;
}
return 1;
}
-sub commit_edit { return 1 }
-
-sub print_context {
- print " Tag:",$recmeta{tag}, " Ind1:'",
- $recmeta{ind1},"' Ind2:'", $recmeta{ind2}, "'";
+sub next_line {
+ $recptr++;
print_linecontext();
return 0;
}
-sub print_linecontext {
- my $low = ($recptr - 3 < 0) ? 0 : $recptr - 3;
- print $OUT ' ', $record[$_], "\n" for ($low .. $recptr - 1);
- print $OUT '==> |', $record[$recptr];
- print $OUT ' ', $record[$recptr + 1], "\n";
+sub prev_line {
+ $recptr--;
+ print_linecontext();
return 0;
}
sub show_original {
my ($line_in) = @_;
- print $OUT "\n$line_in\n";
+ print $OUT "\n", $conf->{origline}, "\n";
return 0;
}
+sub commit_edit { return 1 }
+
sub help {
print $OUT <<HELP;