X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=marc_cleanup;h=e0c20a781afd6f13c10402f017df98f913e9e8cf;hp=53801f3438baf905823f94aaaf87a768761fb271;hb=adfcc2989bbcc6e6c2aab72a4464fd31771d5354;hpb=26d762670d7610ed9ccb1c36a8dbab3169e4e2a3
diff --git a/marc_cleanup b/marc_cleanup
index 53801f3..e0c20a7 100755
--- a/marc_cleanup
+++ b/marc_cleanup
@@ -1,4 +1,21 @@
#!/usr/bin/perl
+
+# Copyright 2009-2012, Equinox Software, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
require 5.10.0;
use strict;
@@ -76,7 +93,7 @@ while ( buildrecord() ) {
until ($ptr == $#record) {
# get datafield/tag data if we have it
$rc = stow_record_data() if ($c->{'renumber-from'} and $c->{'original-tag'});
- return $rc if $rc;
+ next if $rc;
# naked ampersands
if ($record[$ptr] =~ /&/ && $record[$ptr] !~ /&\w+?;/)
@@ -92,12 +109,12 @@ while ( buildrecord() ) {
}
# subfields can't be non-alphanumeric
- #if ($record[$ptr] =~ /|) {
+ $record[$ptr] =~ s|\s{10,}||;
+ message("Trailing whitespace trimmed from subfield contents");
+ }
+
# automatable subfield maladies
$record[$ptr] =~ s/code=" ">c/code="c">/;
$record[$ptr] =~ s/code=" ">\$/code="c">\$/;
@@ -186,13 +209,13 @@ sub do_automated_cleanups {
sub stow_record_data {
# get tag data if we're looking at it
my $tag = 0;
- if ($record[$ptr] =~ m/<(control|data)field tag="(?.{3})"/) {
- $recmeta{tag} = $+{TAG};
+ if ($record[$ptr] =~ m/<(?:control|data)field tag="(.{3})"/) {
+ $recmeta{tag} = $1;
$tag = $recmeta{tag};
- $record[$ptr] =~ m/ind1="(?.)"/;
- $recmeta{ind1} = $+{IND1} || '';
- $record[$ptr] =~ m/ind2="(?.)"/;
- $recmeta{ind2} = $+{IND2} || '';
+ $record[$ptr] =~ m/ind1="(.)"/;
+ $recmeta{ind1} = $1 || '';
+ $record[$ptr] =~ m/ind2="(.)"/;
+ $recmeta{ind2} = $1 || '';
unless ($tag) {
message("Autokill record: no detectable tag");
@@ -299,7 +322,7 @@ sub buildrecord {
my $istrash = 0;
my $trash = $c->{trash};
- $l = while (defined $l and $l !~ //);
+ $l = while (defined $l and $l !~ /{ricount}++;
@@ -603,7 +626,7 @@ sub initialize {
show_trashhelp() if ($c->{trashhelp});
# defaults
- my $pfx = $c->{prefix} // "bibs";
+ my $pfx = defined($c->{prefix}) ? $c->{prefix} : "bibs";
$c->{ricount} = 0;
$c->{rocount} = 0;
$c->{'renumber-tag'} = 903 unless defined $c->{'renumber-tag'};
@@ -641,7 +664,7 @@ Options
--output -o Cleaned MARCXML output filename
--exception -x Exception (dumped records) MARCXML filename
or
- --prefix=> -p Shared prefix for output/exception files. Will produce
+ --prefix= -p Shared prefix for output/exception files. Will produce
PREFIX.clean.marc.xml and PREFIX.exception.marc.xml
--marcfile -m Input filename. Defaults to PREFIX.marc.xml