From 26d762670d7610ed9ccb1c36a8dbab3169e4e2a3 Mon Sep 17 00:00:00 2001 From: Galen Charlton Date: Tue, 13 Apr 2010 19:18:52 +0000 Subject: [PATCH] recover gracefully after an XML parsing error --- fingerprinter | 10 +++++++++- marc_cleanup | 12 ++++++------ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/fingerprinter b/fingerprinter index 93df981..dcf2e29 100755 --- a/fingerprinter +++ b/fingerprinter @@ -27,7 +27,15 @@ for my $file (@ARGV) { $batch->strict_off(); $batch->warnings_off(); - while ( $record = $batch->next ) { + my $record; + while ( 1 ) { + eval { $record = $batch->next; }; + if ($@) { + import MARC::File::XML; + print "bad record\n"; + next; + } + last unless $record; $count++; progress_ticker(); my $marc = undef; unless ( defined $record ) diff --git a/marc_cleanup b/marc_cleanup index 89f8ab0..53801f3 100755 --- a/marc_cleanup +++ b/marc_cleanup @@ -92,12 +92,12 @@ while ( buildrecord() ) { } # subfields can't be non-alphanumeric - if ($record[$ptr] =~ /