--- /dev/null
+#!/usr/bin/perl
+
+###############################################################################
+=pod
+
+=item B<loadbibs> --stage_file foo.mrc
+
+Takes a load of bibs from a binary marc file and loads them into mig staging table
+of bibio_record_entry.
+
+Takes these optional arguments:
+
+--append
+
+When used it does not drop the staging table and instead adds onto it.
+
+--source
+
+Sets an x_source value on the staging table to the one supplied instead of the
+default of none.
+
+--xml
+
+By default the program assumes a USMARC file. This flag will identify it as
+a MARCXML file instead.
+
+=back
+
+=cut
+
+###############################################################################
+
+use strict;
+use warnings;
+
+use DBI;
+use Data::Dumper;
+use MARC::Record;
+use MARC::Batch;
+use MARC::File;
+use MARC::File::XML;
+use Env qw(
+ HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA
+ MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR
+);
+use Pod::Usage;
+use Switch;
+use Cwd 'abs_path';
+use FindBin;
+use UNIVERSAL;
+use Unicode::Normalize;
+my $mig_bin = "$FindBin::Bin/";
+use lib "$FindBin::Bin/";
+use Mig;
+
+pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help';
+pod2usage(-verbose => 1) if ! $ARGV[1];
+
+my $next_arg_is_file = 0;
+my $append_is_false = 1;
+my $next_arg_is_source = 0;
+my $source = 'default';
+my $file_is_xml = 0;
+my $dbh = Mig::db_connect();
+my $infile;
+my $i = 0;
+my $batch;
+binmode STDIN, ':utf8';
+
+foreach my $arg (@ARGV) {
+ if ($arg eq '--stage_file') {
+ $next_arg_is_file = 1;
+ next;
+ }
+ if ($next_arg_is_file) {
+ $infile = $arg;
+ $next_arg_is_file = 0;
+ next;
+ }
+ if ($arg eq '--source') {
+ $next_arg_is_source = 1;
+ next;
+ }
+ if ($next_arg_is_file) {
+ $source = $arg;
+ $next_arg_is_source = 0;
+ next;
+ }
+ if ($arg eq '--append') {
+ $append_is_false = 0;
+ next;
+ }
+ if ($arg eq '--xml') {
+ $file_is_xml = 1;
+ next;
+ }
+}
+
+if ($append_is_false) {
+ $dbh->do(qq{
+ DROP TABLE IF EXISTS $MIGSCHEMA.biblio_record_entry_stage;
+ CREATE UNLOGGED TABLE $MIGSCHEMA.biblio_record_entry_stage (
+ l_bib_id TEXT,
+ x_source TEXT,
+ x_warnings TEXT,
+ x_migrate BOOLEAN DEFAULT TRUE
+ ) INHERITS ($MIGSCHEMA.biblio_record_entry);
+ });
+}
+
+if ($file_is_xml) {
+ $batch = MARC::Batch->new('XML',$infile);
+} else {
+ $batch = MARC::Batch->new('USMARC',$infile);
+}
+$batch->strict_off();
+
+while ( my $record = $batch->next() ) {
+ my $xml;
+ if ($file_is_xml) { $xml = $record; }
+ else { $xml = $record->as_xml_record(); }
+ $i++;
+ $xml = clean_marc($xml);
+ $xml = '$_$' . $xml . '$_$';
+ my @warnings = $batch->warnings();
+ my $warning_string;
+ if (@warnings) { $warning_string = "'" . join(':',@warnings) . "'"; } else { $warning_string = "'none'"; }
+ my $sql = "INSERT INTO $MIGSCHEMA.biblio_record_entry_stage (marc,x_source,x_warnings) VALUES ($xml,$source,$warning_string);";
+ my $sth = $dbh->prepare($sql);
+ $sth->execute();
+ report_progress("Records staged", $i) if 0 != $i % 100;
+}
+
+$dbh->do(qq/
+ CREATE INDEX ${MIGSCHEMA}_biblio_record_entry_stage_idx ON
+ $MIGSCHEMA.biblio_record_entry_stage (id);
+/);
+
+print "Finis.\n";
+
+sub clean_marc {
+ my $xml = shift;
+ $xml =~ s/\n//sog;
+ $xml =~ s/^<\?xml.+\?\s*>//go;
+ $xml =~ s/>\s+</></go;
+ $xml =~ s/\p{Cc}//go;
+ $xml = NFC($xml);
+ $xml =~ s/&(?!\S+;)/&/gso;
+ $xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
+ $xml =~ s/[\x00-\x1f]//go;
+ return $xml;
+}
+
+
+sub abort {
+ my $msg = shift;
+ print STDERR "$0: $msg", "\n";
+ exit 1;
+}
+
+sub report_progress {
+ my ($msg, $counter) = @_;
+ if (defined $counter) {
+ print STDERR "$msg: $counter\n";
+ } else {
+ print STDERR "$msg\n";
+ }
+}