recover gracefully after an XML parsing error
authorGalen Charlton <gmc@esilibrary.com>
Tue, 13 Apr 2010 19:18:52 +0000 (19:18 +0000)
committerGalen Charlton <gmc@esilibrary.com>
Tue, 13 Apr 2010 19:18:52 +0000 (19:18 +0000)
fingerprinter
marc_cleanup

index 93df981..dcf2e29 100755 (executable)
@@ -27,7 +27,15 @@ for my $file (@ARGV) {
     $batch->strict_off();
     $batch->warnings_off();
 
-    while ( $record = $batch->next ) {
+    my $record;
+    while ( 1 ) {
+        eval { $record = $batch->next; };
+        if ($@) {
+            import MARC::File::XML;
+            print "bad record\n";
+            next;
+        }
+        last unless $record;
         $count++; progress_ticker();
         my $marc = undef;
         unless ( defined $record )
index 89f8ab0..53801f3 100755 (executable)
@@ -92,12 +92,12 @@ while ( buildrecord() ) {
         }
 
         # subfields can't be non-alphanumeric
-        if ($record[$ptr] =~ /<subfield code="(.*?)"/) {
-            if ($1 =~ /\P{IsAlnum}/ or $1 eq '') {
-                edit("Junk in subfield code/Null subfield code");
-                next;
-            }
-        }
+        #if ($record[$ptr] =~ /<subfield code="(.*?)"/) {
+            #if ($1 =~ /\P{IsAlnum}/ or $1 eq '') {
+                #edit("Junk in subfield code/Null subfield code");
+                #next;
+            #}
+        #}
         # subfields can't be non-alphanumeric
         if ($record[$ptr] =~ /<subfield code="(\w{2,})"/) {
             edit("Subfield code larger than 1 char");