X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=marc_cleanup;h=e2a9390ecf6a33ce6cf52dc91b5664faf2989c36;hp=2c3a005df8e9566383aca6d37144077af5cc29f4;hb=9d35fadc55c2eb48cf90d72229d24d7c52d3ba96;hpb=583e4b7c22ba15f37168c0deb49728929a65e6fe
diff --git a/marc_cleanup b/marc_cleanup
index 2c3a005..e2a9390 100755
--- a/marc_cleanup
+++ b/marc_cleanup
@@ -93,7 +93,7 @@ while ( buildrecord() ) {
until ($ptr == $#record) {
# get datafield/tag data if we have it
$rc = stow_record_data() if ($c->{'renumber-from'} and $c->{'original-tag'});
- return $rc if $rc;
+ next if $rc;
# naked ampersands
if ($record[$ptr] =~ /&/ && $record[$ptr] !~ /&\w+?;/)
@@ -115,7 +115,7 @@ while ( buildrecord() ) {
next;
}
}
- # subfields can't be non-alphanumeric
+ # subfields can't be larger than 1 char (technically you could make the MARC format accomodate that:)
if ($record[$ptr] =~ /|) {
+ $record[$ptr] =~ s|\s{10,}||;
+ message("Trailing whitespace trimmed from subfield contents");
+ }
+
# automatable subfield maladies
$record[$ptr] =~ s/code=" ">c/code="c">/;
$record[$ptr] =~ s/code=" ">\$/code="c">\$/;
+
+ if ($c->{'fix-subfield'}) {
+ $record[$ptr] =~ s/code="&">/code="$c->{'fix-subfield'}">/;
+ $record[$ptr] =~ s/code="\P{IsAlnum}">/code="$c->{'fix-subfield'}">/;
+ $record[$ptr] =~ s/code="">/code="$c->{'fix-subfield'}">/;
+ }
}
return 0;
}
@@ -316,7 +328,7 @@ sub buildrecord {
my $istrash = 0;
my $trash = $c->{trash};
- $l = while (defined $l and $l !~ //);
+ $l = while (defined $l and $l !~ /{ricount}++;
@@ -609,6 +621,7 @@ sub initialize {
'renumber-subfield|rs=s',
'original-tag|ot=i',
'original-subfield|os=s',
+ 'fix-subfield|fs=s',
'script',
'no-strip9',
'trashfile|t=s',
@@ -672,13 +685,15 @@ Options
and renumbering is in effect, an old-to-new mapping
file (old2new.map) will be generated.
- --autoscrub -a Automatically remove non-numeric tags in data
- --nocollapse -n Don't compress records to one line on output
- --no-strip9 Don't autoremove 901/903 tags in data
- --trashfile -t File containing trash tag data (see --trashhelp)
+ --autoscrub -a Automatically remove non-numeric tags in data
+ --fix-subfield -fs Subfield code to use in place of non-alphanumeric
+ or empty subfield codes
+ --nocollapse -n Don't compress records to one line on output
+ --no-strip9 Don't autoremove 901/903 tags in data
+ --trashfile -t File containing trash tag data (see --trashhelp)
- --fullauto No manual edits. All problematic records dumped to
- exception file.
+ --fullauto No manual edits. All problematic records dumped to
+ exception file.
HELP
exit;