X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=marc_cleanup;h=524109eb3dcde5e7bf41a9717eb01e55c142d6f2;hp=e79180ac2d2f8a37fe4c775ba4c5645ed1db2b85;hb=8c0be645ca11beb5124c93618480f281dfaee6be;hpb=5a192db46cdadef98017b1b52acfc09946561188 diff --git a/marc_cleanup b/marc_cleanup index e79180a..524109e 100755 --- a/marc_cleanup +++ b/marc_cleanup @@ -111,7 +111,7 @@ while ( buildrecord() ) { # subfields can't be non-alphanumeric if ($record[$ptr] =~ /c/code="c">/; - $record[$ptr] =~ s/code=" ">\$/code="c">\$/; + if ($record[$ptr] =~ /code=" ">c/) { + message('Fixing probable subfield c, scenario 1'); + $record[$ptr] =~ s/code=" ">c/code="c">/; + } + if ($record[$ptr] =~ /code=" ">\$/) { + message('Fixing probable subfield c, scenario 2'); + $record[$ptr] =~ s/code=" ">\$/code="c">\$/; + } if ($c->{'fix-subfield'}) { - $record[$ptr] =~ s/code="&">/code="$c->{'fix-subfield'}">/; - $record[$ptr] =~ s/code="\P{IsAlnum}">/code="$c->{'fix-subfield'}">/; - $record[$ptr] =~ s/code="">/code="$c->{'fix-subfield'}">/; + if ($record[$ptr] =~ /code="&">/) { + message('Fixing & for subfield code'); + $record[$ptr] =~ s/code="&">/code="$c->{'fix-subfield'}">/; + } + if ($record[$ptr] =~ /code="(.*?\P{IsAlnum}.*?)">/) { + message("Fixing non-alphanumeric subfield code: $1 -> " . $c->{'fix-subfield'}); + $record[$ptr] =~ s/code=".*?\P{IsAlnum}.*?">/code="$c->{'fix-subfield'}">/; + } + if ($record[$ptr] =~ /code="">/) { + message('Fixing null subfield code'); + $record[$ptr] =~ s/code="">/code="$c->{'fix-subfield'}">/; + } } } return 0; @@ -645,6 +660,13 @@ sub initialize { $c->{'renumber-tag'} = 903 unless defined $c->{'renumber-tag'}; $c->{'renumber-subfield'} = 'a' unless defined $c->{'renumber-subfield'}; $c->{window} = 9; + if ($c->{marcfile} and $c->{prefix}) { abort('You can not declare a marc file and prefix.'); } + if ($c->{marcfile}) { + $c->{output} = join('.',$c->{marcfile},'clean') + unless $c->{output}; + $c->{exception} = join('.',$c->{marcfile},'exception') + unless $c->{exception}; + } if ($c->{prefix}) { $c->{output} = join('.',$c->{prefix},'clean','marc','xml') unless $c->{output}; @@ -670,6 +692,12 @@ sub initialize { if ( $c->{'original-tag'} and $c->{trash}->has($c->{'original-tag'}) ); } +sub abort { + my $msg = shift; + print STDERR "$0: $msg", "\n"; + exit 1; +} + sub show_help { print <