From 000e8df6405da270e1ca30fd631f251b14ac0ddc Mon Sep 17 00:00:00 2001 From: Rogan Hamby Date: Thu, 9 May 2019 15:39:25 -0400 Subject: [PATCH] new mig-loadbibs --- mig | 2 + mig-bin/mig-loadbibs | 199 ------------------------------------------------- mig-bin/mig-stagebibs | 190 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 192 insertions(+), 199 deletions(-) delete mode 100755 mig-bin/mig-loadbibs create mode 100755 mig-bin/mig-stagebibs diff --git a/mig b/mig index e0a6754..3d53a6c 100755 --- a/mig +++ b/mig @@ -67,6 +67,8 @@ Using B should go something like this: =item mig gsheet --pull foo_tab_name OR --push foo_pg_table_name +=item mig stagebibs --file foo.xml + =back =head1 COMMANDS diff --git a/mig-bin/mig-loadbibs b/mig-bin/mig-loadbibs deleted file mode 100755 index a6db91c..0000000 --- a/mig-bin/mig-loadbibs +++ /dev/null @@ -1,199 +0,0 @@ -#!/usr/bin/perl - -############################################################################### -=pod - -=item B --stage_file foo.mrc - -Takes a load of bibs from a binary marc file and loads them into mig staging table -of bibio_record_entry. - -Takes these optional arguments: - ---append - -When used it does not drop the staging table and instead adds onto it. - ---source - -Sets an x_source value on the staging table to the one supplied instead of the -default of none. - ---xml - -By default the program assumes a USMARC file. This flag will identify it as -a MARCXML file instead. - -=back - -=cut - -############################################################################### - -use strict; -use warnings; - -use DBI; -#binmode STDIN, ':bytes'; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use Switch; -use Cwd 'abs_path'; -use FindBin; -use UNIVERSAL; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; - -pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; -pod2usage(-verbose => 1) if ! $ARGV[1]; - -my $next_arg_is_file = 0; -my $append = 0; -my $next_arg_is_source = 0; -my $next_arg_is_stage = 0; -my $stage_table = 'biblio_record_entry'; -my $source = 'default'; -my $file_is_xml = 0; -my $dbh = Mig::db_connect(); -my $infile; -my $i = 0; -my $batch; -binmode STDIN, ':utf8'; - -#MARC::Charset->assume_unicode(1); -MARC::Charset->ignore_errors(1); - -foreach my $arg (@ARGV) { - if ($arg eq '--stage_file') { - $next_arg_is_file = 1; - next; - } - if ($next_arg_is_file) { - $infile = $arg; - $next_arg_is_file = 0; - next; - } - if ($arg eq '--source') { - $next_arg_is_source = 1; - next; - } - if ($next_arg_is_source) { - $source = $arg; - $next_arg_is_source = 0; - next; - } -} - -my $bre_test = check_for_table($dbh,'biblio_record_entry'); -if ($bre_test == 0) { create_child_bre($dbh); } - -my $xmig_test = check_for_column($dbh,'biblio_record_entry','x_migrate'); -if ($xmig_test == 0) { add_column($dbh,'biblio_record_entry','x_migrate','BOOLEAN DEFAULT TRUE'); - -my $xsource_test = check_for_column($dbh,'biblio_record_entry','x_source'); -if ($xsource_test == 0) { add_column($dbh,'biblio_record_entry','x_source','TEXT'); - -my $last_xact; -if ($source) { $last_xact = "'$MIGSCHEMA $source'" } else { $last_xact = "'$MIGSCHEMA'"; } - -#flatten out MARC XML FILE -open my $xml, "<:encoding(utf8)", $infile or abort('could not open MARC XML file'); -$i = 0; -my $record; -while(my $line = <$xml>) { - if ($line =~ /^<\/?collection/) { next; } - chomp $line; - $record = $record . $line; - if ($line =~ /^<\/record/) { - stage_record($dbh,$record,$last_xact); - $record = ''; - } - -close $xml; - - -#load the MARC XML FILE TO STAGING -report_progress("Records staged", $i) if 0 != $i % 100; - -print "Finis.\n"; - -# beyond here be functions - -sub create_child_bre { - my $dbh = shift; - $dbh->do("DO \$\$ - DECLARE - t BOOLEAN; - BEGIN - SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_schema = '$MIGSCHEMA' AND table_name = 'biblio_record_entry') INTO t; - IF t = FALSE THEN - PERFORM migration_tools.build_specific_base_staging_table ('$MIGSCHEMA','biblio.record_entry'); - END IF; - END \$\$;"); - - return (); -} - -sub abort { - my $msg = shift; - print STDERR "$0: $msg", "\n"; - exit 1; -} - -sub report_progress { - my ($msg, $counter) = @_; - if (defined $counter) { - print STDERR "$msg: $counter\n"; - } else { - print STDERR "$msg\n"; - } -} - -sub stage_record { - my $dbh = shift; - my $record = shift; - my $last_xact = shift; - $record = '$_$' . $record . '$_$'; - my $sql = "INSERT INTO $MIGSCHEMA.biblio_record_entry (last_xact_id,marc) VALUES ($last_xact,$record);"; - my $sth = $dbh->prepare($sql); - $sth->execute(); - return; -} - -sub check_for_table { - my $dbh = shift; - my $table = shift; - my $sql = "SELECT 1 FROM information_schema.tables WHERE table_schema = '$MIGSCHEMA' AND table_name = '$table';"; - my $sth = $dbh->prepare($sql); - $sth->execute(); - my @sqlresult = $sth->fetchrow_array; - my $r = pop @sqlresult; - if ($r) { return $r; } else { return 0; } -} - -sub check_for_column { - my $dbh = shift; - my $table = shift; - my $column = shift; - my $sql = "SELECT 1 FROM information_schema.columns WHERE table_schema = '$MIGSCHEMA' AND table_name = '$table' AND column_name = $column;"; - my $sth = $dbh->prepare($sql); - $sth->execute(); - my @sqlresult = $sth->fetchrow_array; - my $r = pop @sqlresult; - if ($r) { return $r; } else { return 0; } -} - -sub add_column { - my $dbh = shift; - my $table = shift; - my $column = shift; - my $column_type = shift; - my $sql = "ALTER TABLE $MIGSCHEMA.$table ADD COLUMN $COLUMN $COLUMN_TYPE;"; - my $r = check_for_column($dbh,$table,$column); - if ($r == 0) { abort('failed to create column'; } else { return $r; } -} - diff --git a/mig-bin/mig-stagebibs b/mig-bin/mig-stagebibs new file mode 100755 index 0000000..fb76b0c --- /dev/null +++ b/mig-bin/mig-stagebibs @@ -0,0 +1,190 @@ +#!/usr/bin/perl + +############################################################################### +=pod + +=item B --file foo.xml + +Takes a load of bibs from a UTF-8 MARC XML file and loads them into mig staging +table of bibio_record_entry. This is done with no checking of file validity +so records should be checked before hand and cleaned. + +Takes one optional arguments: + +--source + +Sets an x_source value on the staging table to the one supplied instead of the +default of none. + +=back + +=cut + +############################################################################### + +use strict; +use warnings; + +use DBI; +#binmode STDIN, ':bytes'; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Data::Dumper; +use Pod::Usage; +use Switch; +use Cwd 'abs_path'; +use FindBin; +use UNIVERSAL; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; +pod2usage(-verbose => 1) if ! $ARGV[1]; + +my $next_arg_is_file = 0; +my $append = 0; +my $next_arg_is_source = 0; +my $next_arg_is_stage = 0; +my $stage_table = 'biblio_record_entry'; +my $source = 'default'; +my $file_is_xml = 0; +my $dbh = Mig::db_connect(); +my $infile; +my $i = 0; +my $batch; +binmode STDIN, ':utf8'; + +foreach my $arg (@ARGV) { + if ($arg eq '--file') { + $next_arg_is_file = 1; + next; + } + if ($next_arg_is_file) { + $infile = $arg; + $next_arg_is_file = 0; + next; + } + if ($arg eq '--source') { + $next_arg_is_source = 1; + next; + } + if ($next_arg_is_source) { + $source = $arg; + $next_arg_is_source = 0; + next; + } +} + +my $bre_test = check_for_table($dbh,'biblio_record_entry'); +if ($bre_test == 0) { create_child_bre($dbh); } + +my $xmig_test = check_for_column($dbh,'biblio_record_entry','x_migrate'); +if ($xmig_test == 0) { add_column($dbh,'biblio_record_entry','x_migrate','BOOLEAN DEFAULT TRUE'); } + +my $xsource_test = check_for_column($dbh,'biblio_record_entry','x_source'); +if ($xsource_test == 0) { add_column($dbh,'biblio_record_entry','x_source','TEXT'); } + +#flatten out MARC XML FILE +open my $xml, "<:encoding(utf8)", $infile or abort('could not open MARC XML file'); +$i = 0; +my $record = ''; +while(my $line = <$xml>) { + if ($line =~ /^<\/?collection/) { next; } + chomp $line; + $record = $record . $line; + if ($line =~ /^<\/record/) { + stage_record($dbh,$record,$source); + $record = ''; + $i++; + if (($i % 100) == 0) { report_progress('Records stage', $i); } + } +} +close $xml; + +print "Finis.\n"; + +# beyond here be functions + +sub create_child_bre { + my $dbh = shift; + $dbh->do("DO \$\$ + DECLARE + t BOOLEAN; + BEGIN + SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_schema = '$MIGSCHEMA' AND table_name = 'biblio_record_entry') INTO t; + IF t = FALSE THEN + PERFORM migration_tools.build_specific_base_staging_table ('$MIGSCHEMA','biblio.record_entry'); + END IF; + END \$\$;"); + + return (); +} + +sub abort { + my $msg = shift; + print STDERR "$0: $msg", "\n"; + exit 1; +} + +sub report_progress { + my ($msg, $counter) = @_; + if (defined $counter) { + print STDERR "$msg: $counter\n"; + } else { + print STDERR "$msg\n"; + } +} + +sub stage_record { + my $dbh = shift; + my $record = shift; + my $source = shift; + my $last_xact = "'$MIGSCHEMA'"; + $record = '$_$' . $record . '$_$'; + my $sql; + if ($source eq 'default') { $sql = "INSERT INTO $MIGSCHEMA.biblio_record_entry (last_xact_id,marc) VALUES ($last_xact,$record);"; } + else { $sql = "INSERT INTO $MIGSCHEMA.biblio_record_entry (last_xact_id,marc,x_source) VALUES ($last_xact,$record,'$source');"; } + my $sth = $dbh->prepare($sql); + $sth->execute(); + return; +} + +sub check_for_table { + my $dbh = shift; + my $table = shift; + my $sql = "SELECT 1 FROM information_schema.tables WHERE table_schema = '$MIGSCHEMA' AND table_name = '$table';"; + my $sth = $dbh->prepare($sql); + $sth->execute(); + my @sqlresult = $sth->fetchrow_array; + my $r = pop @sqlresult; + if ($r) { return $r; } else { return 0; } +} + +sub check_for_column { + my $dbh = shift; + my $table = shift; + my $column = shift; + my $sql = "SELECT 1 FROM information_schema.columns WHERE table_schema = '$MIGSCHEMA' AND table_name = '$table' AND column_name = '$column';"; + my $sth = $dbh->prepare($sql); + $sth->execute(); + my @sqlresult = $sth->fetchrow_array; + my $r = pop @sqlresult; + if ($r) { return $r; } else { return 0; } +} + +sub add_column { + my $dbh = shift; + my $table = shift; + my $column = shift; + my $column_type = shift; + my $sql = "ALTER TABLE $MIGSCHEMA.$table ADD COLUMN $column $column_type;"; + my $sth = $dbh->prepare($sql); + $sth->execute(); + my @sqlresult = $sth->fetchrow_array; + my $r = check_for_column($dbh,$table,$column); + if ($r == 0) { abort('failed to create column'); } else { return $r; } +} + -- 1.7.2.5