X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=eg_staged_bib_overlay;h=4df0cdeaba9c32ec28dbb4c286c47f5615833bf3;hp=53c4080ed20398479c5fb54ff8c25de9d5cb1716;hb=5daa7e37fb3828e5bc77a037afce5c94f755cd4d;hpb=284ea45f5f7470e58fc5ba1750797b956b1e9749 diff --git a/eg_staged_bib_overlay b/eg_staged_bib_overlay index 53c4080..4df0cde 100755 --- a/eg_staged_bib_overlay +++ b/eg_staged_bib_overlay @@ -33,7 +33,9 @@ my $dbpw; my $dbhost; my $batch; my $cutoff; -my $wait = 1; +my $wait = 0; +my $output; +my $link_skipped; my $ret = GetOptions( 'action:s' => \$action, @@ -45,6 +47,8 @@ my $ret = GetOptions( 'batch:s' => \$batch, 'cutoff:s' => \$cutoff, 'wait:i' => \$wait, + 'output:s' => \$output, + 'link-skipped' => \$link_skipped, ); abort('must specify --action') unless defined $action; @@ -57,7 +61,8 @@ abort('must specify --batch') unless defined $batch; abort(q{--action must be "stage_bibs", "filter_bibs", "load_bibs", "stage_auths", "match_auths", "load_new_auths", "overlay_auths_stage1", -"overlay_auths_stage2", "overlay_auths_stage3", "link_auth_auth"}) unless +"overlay_auths_stage2", "overlay_auths_stage3", "link_auth_auth", +"link_auth_bib", "export_skipped_bibs", or "export_skipped_auths"}) unless $action eq 'filter_bibs' or $action eq 'stage_bibs' or $action eq 'load_bibs' or @@ -68,7 +73,9 @@ abort(q{--action must be "stage_bibs", "filter_bibs", "load_bibs", "stage_auths" $action eq 'overlay_auths_stage2' or $action eq 'overlay_auths_stage3' or $action eq 'link_auth_auth' or - $action eq 'link_auth_bib' + $action eq 'link_auth_bib' or + $action eq 'export_skipped_bibs' or + $action eq 'export_skipped_auths' ; my $dbh = connect_db($db, $dbuser, $dbpw, $dbhost); @@ -114,7 +121,16 @@ if ($action eq 'link_auth_auth') { handle_link_auth_auth($dbh, $schema, $batch); } if ($action eq 'link_auth_bib') { - handle_link_auth_bib($dbh, $schema, $batch); + handle_link_auth_bib($dbh, $schema, $batch, $link_skipped); +} + +if ($action eq 'export_skipped_bibs') { + abort('must specify output file') unless defined $output; + handle_export_skipped_bibs($dbh, $schema, $batch, $output); +} +if ($action eq 'export_skipped_auths') { + abort('must specify output file') unless defined $output; + handle_export_skipped_auths($dbh, $schema, $batch, $output); } sub abort { @@ -167,7 +183,18 @@ This program has several modes controlled by the --action switch: or added in this batch. --action link_auth_bib - run authority_control_fields.pl for the bibs that were overlaid in this - batch. + batch. Add --link-skipped to specify + that bibs that were matched but + skipped due to having be edited after + the cutoff should be linked (rather + than linking the imported bibs) + --action export_skipped_bibs - export to ISO2709 file whose name is + specified by --output those bibs + that had been edited after the cutoff. + --action export_skipped_auths - export to ISO2709 file whose name is + specified by --output those authorities + that could not be definitively + handled as updates or adds. Several switches are used regardless of the specified action: @@ -254,14 +281,19 @@ sub handle_stage_bibs { $dbh->commit; $dbh->begin_work; } - my $marc = MARC::Record->new_from_usmarc($_); - my $bibid = $marc->subfield('901', 'c'); - if ($bibid !~ /^\d+$/) { - print STDERR "Record $i is suspect; skipping\n"; + eval { + my $marc = MARC::Record->new_from_usmarc($_); + my $bibid = $marc->subfield('901', 'c'); + if ($bibid !~ /^\d+$/) { + die('Subfield 901$c is not numeric or missing.'); + } + my $xml = OpenILS::Application::AppUtils->entityize($marc->as_xml_record()); + $ins->execute($xml, $bibid); + }; + if ($@) { + warn("Record $i is bad: $@; skipping."); next; } - my $xml = OpenILS::Application::AppUtils->entityize($marc->as_xml_record()); - $ins->execute($xml, $bibid); } $dbh->commit; report_progress("Records staged", $i) if 0 != $i % 100; @@ -372,7 +404,7 @@ sub handle_load_bibs { ) }); $dbh->commit; - sleep $wait; + sleep $wait if ($wait); } } @@ -418,25 +450,31 @@ sub handle_stage_auths { $dbh->commit; $dbh->begin_work; } - my $marc = MARC::Record->new_from_usmarc($_); - my $authid = $marc->subfield('901', 'c'); - if (defined($authid) && $authid !~ /^\d+$/) { - undef $authid; - } - my $lccn = $marc->subfield('010', 'a'); - if (defined $lccn) { - $lccn =~ s/^\s+//; - $lccn =~ s/\s+$//; - $lccn =~ s/\s+/ /g; - } - my $cancelled_lccn = $marc->subfield('010', 'z'); - if (defined $cancelled_lccn) { - $cancelled_lccn =~ s/^\s+//; - $cancelled_lccn =~ s/\s+$//; - $cancelled_lccn =~ s/\s+/ /g; + eval { + my $marc = MARC::Record->new_from_usmarc($_); + my $authid = $marc->subfield('901', 'c'); + if (defined($authid) && $authid !~ /^\d+$/) { + undef $authid; + } + my $lccn = $marc->subfield('010', 'a'); + if (defined $lccn) { + $lccn =~ s/^\s+//; + $lccn =~ s/\s+$//; + $lccn =~ s/\s+/ /g; + } + my $cancelled_lccn = $marc->subfield('010', 'z'); + if (defined $cancelled_lccn) { + $cancelled_lccn =~ s/^\s+//; + $cancelled_lccn =~ s/\s+$//; + $cancelled_lccn =~ s/\s+/ /g; + } + my $xml = OpenILS::Application::AppUtils->entityize($marc->as_xml_record()); + $ins->execute($xml, $authid, $lccn, $cancelled_lccn, $xml); + }; + if ($@) { + warn("Record $i is bad: $@; skipping."); + next; } - my $xml = OpenILS::Application::AppUtils->entityize($marc->as_xml_record()); - $ins->execute($xml, $authid, $lccn, $cancelled_lccn, $xml); } $dbh->commit; report_progress("Records staged", $i) if 0 != $i % 100; @@ -777,13 +815,27 @@ sub handle_link_auth_bib { my $dbh = shift; my $schema = shift; my $batch = shift; + my $link_skipped = shift; + + my $query; + if ($link_skipped) { + $query = qq{ + SELECT bib_id AS id + FROM $schema.$batch + WHERE NOT imported + AND skip_reason ~ '^edit' + ORDER BY 1 + }; + } else { + $query = qq{ + SELECT bib_id AS id + FROM $schema.$batch + WHERE imported + ORDER BY 1 + }; + } - my $sth = $dbh->prepare(qq{ - SELECT bib_id AS id - FROM $schema.$batch - WHERE imported - ORDER BY 1 - }); + my $sth = $dbh->prepare($query); $sth->execute(); my @ids = map { $_->{id} } @{ $sth->fetchall_arrayref({}) }; my $i = 0; @@ -795,3 +847,53 @@ sub handle_link_auth_bib { } } + +sub handle_export_skipped_bibs { + my $dbh = shift; + my $schema = shift; + my $batch = shift; + my $output = shift; + + my $outfh; + open($outfh, '>', $output) or die("Could not open input file $output: $!\n"); + binmode $outfh, ':utf8'; + + my $sth = $dbh->prepare(qq{ + SELECT marc + FROM $schema.$batch + WHERE skip_reason ~ '^edit' + ORDER BY id + }); + $sth->execute(); + + while (my $row = $sth->fetchrow_hashref()) { + my $marc = MARC::Record->new_from_xml($row->{marc}); + print $outfh $marc->as_usmarc(); + } + $outfh->close(); +} + +sub handle_export_skipped_auths { + my $dbh = shift; + my $schema = shift; + my $batch = shift; + my $output = shift; + + my $outfh; + open($outfh, '>', $output) or die("Could not open input file $output: $!\n"); + binmode $outfh, ':utf8'; + + my $sth = $dbh->prepare(qq{ + SELECT marc + FROM $schema.auths_$batch + WHERE NOT imported + ORDER BY id + }); + $sth->execute(); + + while (my $row = $sth->fetchrow_hashref()) { + my $marc = MARC::Record->new_from_xml($row->{marc}); + print $outfh $marc->as_usmarc(); + } + $outfh->close(); +}