From: Galen Charlton Date: Mon, 11 Apr 2016 14:18:18 +0000 (-0400) Subject: staged bib overlay: tweak bib filtering X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=commitdiff_plain;h=71f0b86281768fc929a70a0db5b0affdd475635d staged bib overlay: tweak bib filtering Filtering now always ignores staged bibs that are imported or already marked as not to be imported. Also, bibs whose XML is not well-formed are now excluded. Signed-off-by: Galen Charlton --- diff --git a/eg_staged_bib_overlay b/eg_staged_bib_overlay index 8e723be..76bcf62 100755 --- a/eg_staged_bib_overlay +++ b/eg_staged_bib_overlay @@ -226,6 +226,8 @@ sub handle_filter_bibs { FROM biblio.record_entry WHERE deleted ) + AND to_import + AND NOT imported }); $sth1->execute(); my $ct = $sth1->rows; @@ -240,11 +242,24 @@ sub handle_filter_bibs { FROM biblio.record_entry WHERE edit_date >= ? ) - AND to_import; + AND to_import + AND NOT imported }); $sth2->execute($cutoff); $ct = $sth2->rows; report_progress("Filtering out $ct records edited after cutoff date of $cutoff"); + + my $sth3 = $dbh->prepare(qq{ + UPDATE $schema.$batch + SET to_import = FALSE, + skip_reason = 'XML is not well-formed' + WHERE NOT xml_is_well_formed(marc) + AND to_import + AND NOT imported + }); + $sth3->execute(); + $ct = $sth3->rows; + report_progress("Filtering out $ct records whose XML is not well-formed"); } sub handle_load_bibs {