X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=mig-bin%2Fmig-loadbibs;fp=mig-bin%2Fmig-loadbibs;h=a6840dec8a8d76f8d89559d409c26ba0a6d461df;hp=0000000000000000000000000000000000000000;hb=8778d74ad0833c120f7db371376b2420c3edc6c4;hpb=53a11b628416da91c21ecd98f868508f54e894ee diff --git a/mig-bin/mig-loadbibs b/mig-bin/mig-loadbibs new file mode 100755 index 0000000..a6840de --- /dev/null +++ b/mig-bin/mig-loadbibs @@ -0,0 +1,168 @@ +#!/usr/bin/perl + +############################################################################### +=pod + +=item B --stage_file foo.mrc + +Takes a load of bibs from a binary marc file and loads them into mig staging table +of bibio_record_entry. + +Takes these optional arguments: + +--append + +When used it does not drop the staging table and instead adds onto it. + +--source + +Sets an x_source value on the staging table to the one supplied instead of the +default of none. + +--xml + +By default the program assumes a USMARC file. This flag will identify it as +a MARCXML file instead. + +=back + +=cut + +############################################################################### + +use strict; +use warnings; + +use DBI; +use Data::Dumper; +use MARC::Record; +use MARC::Batch; +use MARC::File; +use MARC::File::XML; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use Switch; +use Cwd 'abs_path'; +use FindBin; +use UNIVERSAL; +use Unicode::Normalize; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; +pod2usage(-verbose => 1) if ! $ARGV[1]; + +my $next_arg_is_file = 0; +my $append_is_false = 1; +my $next_arg_is_source = 0; +my $source = 'default'; +my $file_is_xml = 0; +my $dbh = Mig::db_connect(); +my $infile; +my $i = 0; +my $batch; +binmode STDIN, ':utf8'; + +foreach my $arg (@ARGV) { + if ($arg eq '--stage_file') { + $next_arg_is_file = 1; + next; + } + if ($next_arg_is_file) { + $infile = $arg; + $next_arg_is_file = 0; + next; + } + if ($arg eq '--source') { + $next_arg_is_source = 1; + next; + } + if ($next_arg_is_file) { + $source = $arg; + $next_arg_is_source = 0; + next; + } + if ($arg eq '--append') { + $append_is_false = 0; + next; + } + if ($arg eq '--xml') { + $file_is_xml = 1; + next; + } +} + +if ($append_is_false) { + $dbh->do(qq{ + DROP TABLE IF EXISTS $MIGSCHEMA.biblio_record_entry_stage; + CREATE UNLOGGED TABLE $MIGSCHEMA.biblio_record_entry_stage ( + l_bib_id TEXT, + x_source TEXT, + x_warnings TEXT, + x_migrate BOOLEAN DEFAULT TRUE + ) INHERITS ($MIGSCHEMA.biblio_record_entry); + }); +} + +if ($file_is_xml) { + $batch = MARC::Batch->new('XML',$infile); +} else { + $batch = MARC::Batch->new('USMARC',$infile); +} +$batch->strict_off(); + +while ( my $record = $batch->next() ) { + my $xml; + if ($file_is_xml) { $xml = $record; } + else { $xml = $record->as_xml_record(); } + $i++; + $xml = clean_marc($xml); + $xml = '$_$' . $xml . '$_$'; + my @warnings = $batch->warnings(); + my $warning_string; + if (@warnings) { $warning_string = "'" . join(':',@warnings) . "'"; } else { $warning_string = "'none'"; } + my $sql = "INSERT INTO $MIGSCHEMA.biblio_record_entry_stage (marc,x_source,x_warnings) VALUES ($xml,$source,$warning_string);"; + my $sth = $dbh->prepare($sql); + $sth->execute(); + report_progress("Records staged", $i) if 0 != $i % 100; +} + +$dbh->do(qq/ + CREATE INDEX ${MIGSCHEMA}_biblio_record_entry_stage_idx ON + $MIGSCHEMA.biblio_record_entry_stage (id); +/); + +print "Finis.\n"; + +sub clean_marc { + my $xml = shift; + $xml =~ s/\n//sog; + $xml =~ s/^<\?xml.+\?\s*>//go; + $xml =~ s/>\s+