From 22906a39927ccd8905b4bab0980c1eaa216ace6a Mon Sep 17 00:00:00 2001 From: Shawn Boyette Date: Thu, 18 Sep 2008 20:10:29 +0000 Subject: [PATCH] spin on argument --- yaz-cleanup | 31 +++++++++++++++++++++++++++++-- 1 files changed, 29 insertions(+), 2 deletions(-) diff --git a/yaz-cleanup b/yaz-cleanup index fc08740..b3d5eda 100755 --- a/yaz-cleanup +++ b/yaz-cleanup @@ -1,21 +1,48 @@ #!/usr/bin/perl +use strict; +use warnings; + +my $skip = shift || 0; +my $count = 0; +$| = 1; + open MARC, '<', 'incoming.marc.xml'; open NUMARC, '>', 'incoming.clean.marc.xml'; -$line1 = ; +until ($count == ($skip - 1)) { + my $t = ; + print NUMARC $t; + $count++; + printf("\rSpinning on to record %s (%2.2f%%)", $skip, ($count / $skip *100)) + unless ($count % 1000); +} +print "\nScrubbing resumes...\n" if $skip; + +my $line1 = ; -while ($line2 = ) { +while (my $line2 = ) { + $count++; + # catch empty datafield elements if ($line1 =~ m//) { if ($line2 =~ m||) { $line1 = ; + $count++; next; } } + + # clean up tags with spaces in them $line1 =~ s/tag=" /tag="00/g; $line1 =~ s/tag=" /tag="0/g; $line1 =~ s/tag="-/tag="0/g; $line1 =~ s/tag="(\d\d) /tag="0$1/g; + + # subfields can't be non-alphanumeric + die "Junk in subfield at line $count: $line1" + if $line1 =~ /