6 use OpenSRF::EX qw/:try/;
7 use OpenSRF::Utils::SettingsClient;
8 use OpenILS::Application::AppUtils;
10 use OpenILS::Utils::Fieldmapper;
11 use OpenSRF::Utils::JSON;
12 use Unicode::Normalize;
14 use Time::HiRes qw/time/;
17 use MARC::File::XML ( BinaryEncoding => 'utf-8' );
21 MARC::Charset->ignore_errors(1);
23 # Command line options, with applicable defaults
24 my ($idsubfield, $bibfield, $bibsubfield, @files, $libmap, $quiet, $help);
28 my $config = '/openils/conf/opensrf_core.xml';
29 my $marctype = 'USMARC';
31 my $parse_options = GetOptions(
32 'idfield=s' => \$idfield,
33 'idsubfield=s' => \$idsubfield,
34 'bibfield=s'=> \$bibfield,
35 'bibsubfield=s'=> \$bibsubfield,
36 'startid=i' => \$count,
38 'config=s' => \$config,
39 'marctype=s' => \$marctype,
41 'libmap=s' => \$libmap,
46 if (!$parse_options or $help) {
50 @files = @ARGV if (!@files);
52 my $U = 'OpenILS::Application::AppUtils';
58 $lib_id_map = map_libraries_to_ID($libmap);
61 OpenSRF::System->bootstrap_client( config_file => $config );
62 Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
64 my ($result, $evt) = get_user_id($user);
65 if ($evt || !$result->id) {
66 print("Could not retrieve user with user name '$user'\n");
72 select STDERR; $| = 1;
73 select STDOUT; $| = 1;
75 my $batch = new MARC::Batch ( $marctype, @files );
77 $batch->warnings_off();
81 while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
86 $record_field = $rec->field($idfield, $idsubfield);
88 $record_field = $rec->field($idfield);
92 # On some systems, the 001 actually points to the record ID
93 # We need to attach to the call number to handle holdings in different libraries
94 # but we can work out call numbers later in SQL by the record ID + call number text
96 $record = $record_field->data;
97 $record =~ s/^.*?(\d+).*?$/$1/o;
100 # If we have been given bibfield / bibsubfield values, use those to find
101 # a matching bib record for $record and use _that_ as our record instead
103 my ($result, $evt) = map_id_to_bib($record);
104 if ($evt || !$result->record) {
105 print("Could not find matching bibliographic record for $record\n");
107 $record = $result->record;
110 (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
111 $xml =~ s/^<\?xml.+\?\s*>//go;
112 $xml =~ s/>\s+</></go;
113 $xml =~ s/\p{Cc}//go;
114 $xml = OpenILS::Application::AppUtils->entityize($xml);
115 $xml =~ s/[\x00-\x1f]//go;
117 my $bib = new Fieldmapper::serial::record_entry;
119 $bib->record($record);
123 $bib->creator($user);
124 $bib->create_date('now');
126 $bib->edit_date('now');
127 $bib->last_xact_id('IMPORT-'.$starttime);
130 my $lib_id = get_library_id($rec);
132 $bib->owning_lib($lib_id);
136 print OpenSRF::Utils::JSON->perl2JSON($bib)."\n";
140 if (!$quiet && !($count % 20)) {
141 print STDERR "\r$count\t". $count / (time - $starttime);
145 # Generate a hash of library names (as found in the 852b in the MFHD record) to
146 # integers representing actor.org_unit ID values
147 sub map_libraries_to_ID {
148 my $map_filename = shift;
152 open(MAP_FH, '<', $map_filename) or die "Could not load [$map_filename] $!";
154 my ($lib, $id) = $_ =~ /^(.*?)\t(.*?)$/;
155 $lib_id_map{$lib} = $id;
161 # Look up the actor.org_unit.id value for this library name
165 my $lib_name = $record->field('852')->subfield('b');
166 my $lib_id = $lib_id_map->{$lib_name};
171 # Get the actor.usr.id value for the given username
173 my $username = shift;
177 $result = $U->cstorereq(
178 'open-ils.cstore.direct.actor.user.search',
179 { usrname => $username, deleted => 'f' }
181 $evt = OpenILS::Event->new('ACTOR_USR_NOT_FOUND') unless $result;
183 return ($result, $evt);
186 # Get the biblio.record_entry.id value for the given identifier; note that this
187 # approach uses a wildcard to match anything that precedes the identifier value
195 value => { like => '%' . $record }
199 $search{'subfield'} = $bibsubfield;
202 $result = $U->cstorereq(
203 'open-ils.cstore.direct.metabib.full_rec.search', \%search
205 $evt = OpenILS::Event->new('METABIB_FULL_REC_NOT_FOUND') unless $record;
207 return ($result, $evt);
214 marc2sre.pl - Convert MARC Format for Holdings Data (MFHD) records to SRE
215 (serial.record_entry) JSON objects
219 C<marc2sre.pl> [B<--config>=I<opensrf_core.conf>]
220 [[B<--idfield>=I<MARC-tag>[ B<--idsubfield>=I<MARC-code>]] [B<--start_id>=I<start-ID>]
221 [B<--user>=I<db-username>] [B<--marctype>=I<fileformat>]
222 [[B<--file>=I<MARC-filename>[, ...]] [B<--libmap>=I<map-file>] [B<--quiet>=I<quiet>]
223 [[B<--bibfield>=I<MARC-tag> [B<--bibsubfield>=<MARC-code>]]
227 For one or more files containing MFHD records, iterate through the records
228 and generate SRE (serial.record_entry) JSON objects.
234 =item * B<-c> I<config-file>, B<--config>=I<config-file>
236 Specifies the OpenSRF configuration file used to connect to the OpenSRF router.
237 Defaults to F</openils/conf/opensrf_core.xml>
239 =item * B<--idfield> I<MARC-field>
241 Specifies the MFHD field where the identifier of the corresponding
242 bibliographic record is found. Defaults to '004'.
244 =item * B<--idsubfield> I<MARC-code>
246 Specifies the MFHD subfield, if any, where the identifier of the corresponding
247 bibliographic record is found. This option is ignored unless it is accompanied
248 by the B<--idfield> option. Defaults to null.
250 =item * B<--bibfield> I<MARC-field>
252 Specifies the field in the bibliographic record that holds the identifier
253 value. Defaults to null.
255 =item * B<--bibsubfield> I<MARC-code>
257 Specifies the subfield in the bibliographic record, if any, that holds the
258 identifier value. This option is ignored unless it is accompanied by the
259 B<--bibfield> option. Defaults to null.
261 =item * B<-u> I<username>, B<--user>=I<username>
263 Specifies the Evergreen user that will own these serial records.
265 =item * B<-m> I<file-format>, B<--marctype>=I<file-format>
267 Specifies whether the files containg the MFHD records are in MARC21 ('MARC21')
268 or MARC21XML ('XML') format. Defaults to MARC21.
270 =item * B<-l> I<map-file>, B<--libmap>=I<map-file>
272 Points to a file to containing a mapping of library names to integers.
273 The integer represents the actor.org_unit.id value of the library. This enables
274 us to generate an ingest file that does not subsequently need to manually
277 The library name must correspond to the 'b' subfield of the 852 field.
278 Well, it does not have to, but you will have to modify this script
281 The format of the map file should be the name of the library, followed
282 by a tab, followed by the desired numeric ID of the library. For example:
287 =item * B<-q>, B<--quiet>
289 Suppresses the record counter output.
295 marc2sre.pl --idfield 004 --bibfield 035 --bibsubfield a --user cat1 serial_holding.xml
297 Processes MFHD records in the B<serial_holding.xml> file. The script pulls the
298 bibliographic record identifier from the 004 control field of the MFHD record
299 and searches for a matching value in the bibliographic record in data field
300 035, subfield a. The "cat1" user will own the processed MFHD records.
304 Dan Scott <dscott@laurentian.ca>
306 =head1 COPYRIGHT AND LICENSE
308 Copyright 2010-2011 by Dan Scott
310 This program is free software; you can redistribute it and/or
311 modify it under the terms of the GNU General Public License
312 as published by the Free Software Foundation; either version 2
313 of the License, or (at your option) any later version.
315 This program is distributed in the hope that it will be useful,
316 but WITHOUT ANY WARRANTY; without even the implied warranty of
317 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
318 GNU General Public License for more details.
320 You should have received a copy of the GNU General Public License
321 along with this program; if not, write to the Free Software
322 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.