3 # Copyright (c) 2016 Equinox Software, Inc.
4 # Author: Galen Charlton <gmc@esilibrary.com>
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2, or (at your option)
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
16 # You should have received a copy of the GNU General Public License
17 # along with this program. If not, see <http://www.gnu.org/licenses/>
24 use MARC::File::XML (BinaryEncoding => 'utf8');
26 use OpenILS::Application::AppUtils;
29 my $schema = 'bib_loads';
39 'action:s' => \$action,
40 'schema:s' => \$schema,
42 'dbuser:s' => \$dbuser,
43 'dbhost:s' => \$dbhost,
46 'cutoff:s' => \$cutoff,
50 abort('must specify --action') unless defined $action;
51 abort('must specify --schema') unless defined $schema;
52 abort('must specify --db') unless defined $db;
53 abort('must specify --dbuser') unless defined $dbuser;
54 abort('must specify --dbhost') unless defined $dbhost;
55 abort('must specify --dbpw') unless defined $dbpw;
56 abort('must specify --batch') unless defined $batch;
58 abort('--action must be "stage_bibs" or "filter_bibs" or "load_bibs"') unless
59 $action eq 'filter_bibs' or
60 $action eq 'stage_bibs' or
61 $action eq 'load_bibs';
63 my $dbh = connect_db($db, $dbuser, $dbpw, $dbhost);
65 if ($action eq 'stage_bibs') {
66 abort('must specify at least one input file') unless @ARGV;
67 handle_stage_bibs($dbh, $schema, $batch);
70 if ($action eq 'filter_bibs') {
71 abort('must specify cutoff date when filtering') unless defined $cutoff;
72 handle_filter_bibs($dbh, $schema, $batch, $cutoff);
75 if ($action eq 'load_bibs' ) {
76 handle_load_bibs($dbh, $schema, $batch, $wait);
81 print STDERR "$0: $msg", "\n";
89 Utility to stage and overlay bib records in an Evergreen database. This
90 expects that the incoming records will have been previously exported
91 from that Evergreen database and modified in some fashion (e.g., for
92 authority record processing) and that the bib ID can be found in the
95 This program has several modes controlled by the --action switch:
97 --action stage_bibs - load MARC bib records into a staging table
98 --action filter_bibs - mark previously staged bibs that should
99 be excluded from a subsequent load, either
100 because the target bib is deleted in Evergreen
101 or the record was modified after a date
102 specified by the --cutoff switch
103 --action load_bibs - overlay bib records using a previously staged
104 batch, one at a time. After each bib, it will
105 wait the number of seconds specified by the
108 Several switches are used regardless of the specified action:
110 --schema - Pg schema in which staging table will live; should be
112 --batch - name of bib batch; will also be used as the name
115 --dbuser - database user
116 --dbpw - database password
117 --dbhost - database host
121 $0 --schema bib_load --batch bibs_2016_01 --db evergreen \\
122 --dbuser evergreen --dbpw evergreen --dbhost localhost \\
123 --action stage_bibs -- file1.mrc file2.mrc [...]
125 $0 --schema bib_load --batch bibs_2016_01 --db evergreen \\
126 --dbuser evergreen --dbpw evergreen --dbhost localhost \\
127 --action filter_bibs --cutoff 2016-01-02
129 $0 --schema bib_load --batch bibs_2016_01 --db evergreen \\
130 --dbuser evergreen --dbpw evergreen --dbhost localhost \\
131 --action load_bibs --wait 2
137 sub report_progress {
138 my ($msg, $counter) = @_;
139 if (defined $counter) {
140 print STDERR "$msg: $counter\n";
142 print STDERR "$msg\n";
147 my ($db, $dbuser, $dbpw, $dbhost) = @_;
149 my $dsn = "dbi:Pg:host=$dbhost;dbname=$db;port=5432";
152 ShowErrorStatement => 1,
157 my $dbh = DBI->connect($dsn, $dbuser, $dbpw, $attrs);
162 sub handle_stage_bibs {
168 DROP TABLE IF EXISTS $schema.$batch;
171 CREATE TABLE $schema.$batch (
175 imported BOOLEAN DEFAULT FALSE,
176 to_import BOOLEAN DEFAULT TRUE,
183 binmode STDIN, ':utf8';
184 my $ins = $dbh->prepare("INSERT INTO $schema.$batch (marc, bib_id) VALUES (?, ?)");
189 report_progress("Records staged", $i);
193 my $marc = MARC::Record->new_from_usmarc($_);
194 my $bibid = $marc->subfield('901', 'c');
195 if ($bibid !~ /^\d+$/) {
196 print STDERR "Record $i is suspect; skipping\n";
199 my $xml = OpenILS::Application::AppUtils->entityize($marc->as_xml_record());
200 $ins->execute($xml, $bibid);
203 report_progress("Records staged", $i) if 0 != $i % 100;
205 CREATE INDEX ${batch}_bib_id_idx ON
206 $schema.$batch (bib_id);
209 CREATE INDEX ${batch}_id_idx ON
214 sub handle_filter_bibs {
220 my $sth1 = $dbh->prepare(qq{
221 UPDATE $schema.$batch
222 SET to_import = FALSE,
223 skip_reason = 'deleted'
226 FROM biblio.record_entry
231 my $ct = $sth1->rows;
232 report_progress("Filtering out $ct records that are currently deleted");
234 my $sth2 = $dbh->prepare(qq{
235 UPDATE $schema.$batch
236 SET to_import = FALSE,
237 skip_reason = 'edited after cutoff of $cutoff'
240 FROM biblio.record_entry
245 $sth2->execute($cutoff);
247 report_progress("Filtering out $ct records edited after cutoff date of $cutoff");
250 sub handle_load_bibs {
256 my $getct = $dbh->prepare(qq{
263 my $max = $getct->fetchrow_arrayref()->[0];
265 report_progress('Number of bibs to update', $max);
266 for (my $i = 1; $i <= $max; $i++) {
267 report_progress('... bibs updated', $i) if 0 == $i % 10 or $i == $max;
270 UPDATE biblio.record_entry a
272 FROM $schema.$batch b
273 WHERE a.id = b.bib_id
284 UPDATE $schema.$batch