X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=eg_staged_bib_overlay;h=a2d776cfa97d0b6ffd0e37993df1e49a71b031b0;hp=1d8f04a50e5a8d7b63d4d1465d2112c5769b76ec;hb=97d3c06d4fe2a6050868121a55f14ff4d5298226;hpb=be81214f660ec475396d21085ab6e28df3c7cddf diff --git a/eg_staged_bib_overlay b/eg_staged_bib_overlay index 1d8f04a..a2d776c 100755 --- a/eg_staged_bib_overlay +++ b/eg_staged_bib_overlay @@ -34,6 +34,8 @@ my $dbhost; my $batch; my $cutoff; my $wait = 1; +my $output; +my $link_skipped; my $ret = GetOptions( 'action:s' => \$action, @@ -45,6 +47,8 @@ my $ret = GetOptions( 'batch:s' => \$batch, 'cutoff:s' => \$cutoff, 'wait:i' => \$wait, + 'output:s' => \$output, + 'link-skipped' => \$link_skipped, ); abort('must specify --action') unless defined $action; @@ -57,7 +61,8 @@ abort('must specify --batch') unless defined $batch; abort(q{--action must be "stage_bibs", "filter_bibs", "load_bibs", "stage_auths", "match_auths", "load_new_auths", "overlay_auths_stage1", -"overlay_auths_stage2", "overlay_auths_stage3", "link_auth_auth"}) unless +"overlay_auths_stage2", "overlay_auths_stage3", "link_auth_auth", +"link_auth_bib", "export_skipped_bibs", or "export_skipped_auths"}) unless $action eq 'filter_bibs' or $action eq 'stage_bibs' or $action eq 'load_bibs' or @@ -68,7 +73,9 @@ abort(q{--action must be "stage_bibs", "filter_bibs", "load_bibs", "stage_auths" $action eq 'overlay_auths_stage2' or $action eq 'overlay_auths_stage3' or $action eq 'link_auth_auth' or - $action eq 'link_auth_bib' + $action eq 'link_auth_bib' or + $action eq 'export_skipped_bibs' or + $action eq 'export_skipped_auths' ; my $dbh = connect_db($db, $dbuser, $dbpw, $dbhost); @@ -114,7 +121,16 @@ if ($action eq 'link_auth_auth') { handle_link_auth_auth($dbh, $schema, $batch); } if ($action eq 'link_auth_bib') { - handle_link_auth_bib($dbh, $schema, $batch); + handle_link_auth_bib($dbh, $schema, $batch, $link_skipped); +} + +if ($action eq 'export_skipped_bibs') { + abort('must specify output file') unless defined $output; + handle_export_skipped_bibs($dbh, $schema, $batch, $output); +} +if ($action eq 'export_skipped_auths') { + abort('must specify output file') unless defined $output; + handle_export_skipped_auths($dbh, $schema, $batch, $output); } sub abort { @@ -155,10 +171,10 @@ This program has several modes controlled by the --action switch: main heading. --action load_new_auths - load new (unmatched) authorities --action overlay_auths_stage1 - overlay based on LCCN where - heading has not change; this step + heading has NOT changed; this step disables propagation to bib records --action overlay_auths_stage2 - overlay based on LCCN where heading - has NOT changed; propagates changes + HAS changed; propagates changes to bib records --action overlay_auths_stage3 - overlay for records where a cancelled LCCN is replaced with a new one @@ -167,7 +183,18 @@ This program has several modes controlled by the --action switch: or added in this batch. --action link_auth_bib - run authority_control_fields.pl for the bibs that were overlaid in this - batch. + batch. Add --link-skipped to specify + that bibs that were matched but + skipped due to having be edited after + the cutoff should be linked (rather + than linking the imported bibs) + --action export_skipped_bibs - export to ISO2709 file whose name is + specified by --output those bibs + that had been edited after the cutoff. + --action export_skipped_auths - export to ISO2709 file whose name is + specified by --output those authorities + that could not be definitively + handled as updates or adds. Several switches are used regardless of the specified action: @@ -254,14 +281,19 @@ sub handle_stage_bibs { $dbh->commit; $dbh->begin_work; } - my $marc = MARC::Record->new_from_usmarc($_); - my $bibid = $marc->subfield('901', 'c'); - if ($bibid !~ /^\d+$/) { - print STDERR "Record $i is suspect; skipping\n"; + eval { + my $marc = MARC::Record->new_from_usmarc($_); + my $bibid = $marc->subfield('901', 'c'); + if ($bibid !~ /^\d+$/) { + die('Subfield 901$c is not numeric or missing.'); + } + my $xml = OpenILS::Application::AppUtils->entityize($marc->as_xml_record()); + $ins->execute($xml, $bibid); + }; + if ($@) { + warn("Record $i is bad: $@; skipping."); next; } - my $xml = OpenILS::Application::AppUtils->entityize($marc->as_xml_record()); - $ins->execute($xml, $bibid); } $dbh->commit; report_progress("Records staged", $i) if 0 != $i % 100; @@ -355,7 +387,7 @@ sub handle_load_bibs { FROM $schema.$batch WHERE to_import AND NOT imported - ORDER BY id + ORDER BY bib_id DESC LIMIT 1 ) }); @@ -418,25 +450,31 @@ sub handle_stage_auths { $dbh->commit; $dbh->begin_work; } - my $marc = MARC::Record->new_from_usmarc($_); - my $authid = $marc->subfield('901', 'c'); - if (defined($authid) && $authid !~ /^\d+$/) { - undef $authid; - } - my $lccn = $marc->subfield('010', 'a'); - if (defined $lccn) { - $lccn =~ s/^\s+//; - $lccn =~ s/\s+$//; - $lccn =~ s/\s+/ /g; - } - my $cancelled_lccn = $marc->subfield('010', 'z'); - if (defined $cancelled_lccn) { - $cancelled_lccn =~ s/^\s+//; - $cancelled_lccn =~ s/\s+$//; - $cancelled_lccn =~ s/\s+/ /g; + eval { + my $marc = MARC::Record->new_from_usmarc($_); + my $authid = $marc->subfield('901', 'c'); + if (defined($authid) && $authid !~ /^\d+$/) { + undef $authid; + } + my $lccn = $marc->subfield('010', 'a'); + if (defined $lccn) { + $lccn =~ s/^\s+//; + $lccn =~ s/\s+$//; + $lccn =~ s/\s+/ /g; + } + my $cancelled_lccn = $marc->subfield('010', 'z'); + if (defined $cancelled_lccn) { + $cancelled_lccn =~ s/^\s+//; + $cancelled_lccn =~ s/\s+$//; + $cancelled_lccn =~ s/\s+/ /g; + } + my $xml = OpenILS::Application::AppUtils->entityize($marc->as_xml_record()); + $ins->execute($xml, $authid, $lccn, $cancelled_lccn, $xml); + }; + if ($@) { + warn("Record $i is bad: $@; skipping."); + next; } - my $xml = OpenILS::Application::AppUtils->entityize($marc->as_xml_record()); - $ins->execute($xml, $authid, $lccn, $cancelled_lccn, $xml); } $dbh->commit; report_progress("Records staged", $i) if 0 != $i % 100; @@ -556,6 +594,7 @@ sub handle_load_new_auths { ) }); $dbh->commit; + sleep $wait; } } @@ -777,13 +816,27 @@ sub handle_link_auth_bib { my $dbh = shift; my $schema = shift; my $batch = shift; + my $link_skipped = shift; + + my $query; + if ($link_skipped) { + $query = qq{ + SELECT bib_id AS id + FROM $schema.$batch + WHERE NOT imported + AND skip_reason ~ '^edit' + ORDER BY 1 + }; + } else { + $query = qq{ + SELECT bib_id AS id + FROM $schema.$batch + WHERE imported + ORDER BY 1 + }; + } - my $sth = $dbh->prepare(qq{ - SELECT bib_id AS id - FROM $schema.$batch - WHERE imported - ORDER BY 1 - }); + my $sth = $dbh->prepare($query); $sth->execute(); my @ids = map { $_->{id} } @{ $sth->fetchall_arrayref({}) }; my $i = 0; @@ -795,3 +848,53 @@ sub handle_link_auth_bib { } } + +sub handle_export_skipped_bibs { + my $dbh = shift; + my $schema = shift; + my $batch = shift; + my $output = shift; + + my $outfh; + open($outfh, '>', $output) or die("Could not open input file $output: $!\n"); + binmode $outfh, ':utf8'; + + my $sth = $dbh->prepare(qq{ + SELECT marc + FROM $schema.$batch + WHERE skip_reason ~ '^edit' + ORDER BY id + }); + $sth->execute(); + + while (my $row = $sth->fetchrow_hashref()) { + my $marc = MARC::Record->new_from_xml($row->{marc}); + print $outfh $marc->as_usmarc(); + } + $outfh->close(); +} + +sub handle_export_skipped_auths { + my $dbh = shift; + my $schema = shift; + my $batch = shift; + my $output = shift; + + my $outfh; + open($outfh, '>', $output) or die("Could not open input file $output: $!\n"); + binmode $outfh, ':utf8'; + + my $sth = $dbh->prepare(qq{ + SELECT marc + FROM $schema.auths_$batch + WHERE NOT imported + ORDER BY id + }); + $sth->execute(); + + while (my $row = $sth->fetchrow_hashref()) { + my $marc = MARC::Record->new_from_xml($row->{marc}); + print $outfh $marc->as_usmarc(); + } + $outfh->close(); +}