X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=mig-bin%2Fmig-loadbibs;h=a6db91c5f4a0614aa1e52d09452699ca408d035c;hp=a6840dec8a8d76f8d89559d409c26ba0a6d461df;hb=56f5223cb936c8f62a928eece4b6f40ea5bc9b06;hpb=8778d74ad0833c120f7db371376b2420c3edc6c4 diff --git a/mig-bin/mig-loadbibs b/mig-bin/mig-loadbibs index a6840de..a6db91c 100755 --- a/mig-bin/mig-loadbibs +++ b/mig-bin/mig-loadbibs @@ -34,11 +34,7 @@ use strict; use warnings; use DBI; -use Data::Dumper; -use MARC::Record; -use MARC::Batch; -use MARC::File; -use MARC::File::XML; +#binmode STDIN, ':bytes'; use Env qw( HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR @@ -48,7 +44,6 @@ use Switch; use Cwd 'abs_path'; use FindBin; use UNIVERSAL; -use Unicode::Normalize; my $mig_bin = "$FindBin::Bin/"; use lib "$FindBin::Bin/"; use Mig; @@ -57,8 +52,10 @@ pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; pod2usage(-verbose => 1) if ! $ARGV[1]; my $next_arg_is_file = 0; -my $append_is_false = 1; +my $append = 0; my $next_arg_is_source = 0; +my $next_arg_is_stage = 0; +my $stage_table = 'biblio_record_entry'; my $source = 'default'; my $file_is_xml = 0; my $dbh = Mig::db_connect(); @@ -67,6 +64,9 @@ my $i = 0; my $batch; binmode STDIN, ':utf8'; +#MARC::Charset->assume_unicode(1); +MARC::Charset->ignore_errors(1); + foreach my $arg (@ARGV) { if ($arg eq '--stage_file') { $next_arg_is_file = 1; @@ -81,77 +81,63 @@ foreach my $arg (@ARGV) { $next_arg_is_source = 1; next; } - if ($next_arg_is_file) { + if ($next_arg_is_source) { $source = $arg; $next_arg_is_source = 0; next; } - if ($arg eq '--append') { - $append_is_false = 0; - next; - } - if ($arg eq '--xml') { - $file_is_xml = 1; - next; - } } -if ($append_is_false) { - $dbh->do(qq{ - DROP TABLE IF EXISTS $MIGSCHEMA.biblio_record_entry_stage; - CREATE UNLOGGED TABLE $MIGSCHEMA.biblio_record_entry_stage ( - l_bib_id TEXT, - x_source TEXT, - x_warnings TEXT, - x_migrate BOOLEAN DEFAULT TRUE - ) INHERITS ($MIGSCHEMA.biblio_record_entry); - }); -} +my $bre_test = check_for_table($dbh,'biblio_record_entry'); +if ($bre_test == 0) { create_child_bre($dbh); } + +my $xmig_test = check_for_column($dbh,'biblio_record_entry','x_migrate'); +if ($xmig_test == 0) { add_column($dbh,'biblio_record_entry','x_migrate','BOOLEAN DEFAULT TRUE'); + +my $xsource_test = check_for_column($dbh,'biblio_record_entry','x_source'); +if ($xsource_test == 0) { add_column($dbh,'biblio_record_entry','x_source','TEXT'); + +my $last_xact; +if ($source) { $last_xact = "'$MIGSCHEMA $source'" } else { $last_xact = "'$MIGSCHEMA'"; } + +#flatten out MARC XML FILE +open my $xml, "<:encoding(utf8)", $infile or abort('could not open MARC XML file'); +$i = 0; +my $record; +while(my $line = <$xml>) { + if ($line =~ /^<\/?collection/) { next; } + chomp $line; + $record = $record . $line; + if ($line =~ /^<\/record/) { + stage_record($dbh,$record,$last_xact); + $record = ''; + } + +close $xml; -if ($file_is_xml) { - $batch = MARC::Batch->new('XML',$infile); -} else { - $batch = MARC::Batch->new('USMARC',$infile); -} -$batch->strict_off(); - -while ( my $record = $batch->next() ) { - my $xml; - if ($file_is_xml) { $xml = $record; } - else { $xml = $record->as_xml_record(); } - $i++; - $xml = clean_marc($xml); - $xml = '$_$' . $xml . '$_$'; - my @warnings = $batch->warnings(); - my $warning_string; - if (@warnings) { $warning_string = "'" . join(':',@warnings) . "'"; } else { $warning_string = "'none'"; } - my $sql = "INSERT INTO $MIGSCHEMA.biblio_record_entry_stage (marc,x_source,x_warnings) VALUES ($xml,$source,$warning_string);"; - my $sth = $dbh->prepare($sql); - $sth->execute(); - report_progress("Records staged", $i) if 0 != $i % 100; -} -$dbh->do(qq/ - CREATE INDEX ${MIGSCHEMA}_biblio_record_entry_stage_idx ON - $MIGSCHEMA.biblio_record_entry_stage (id); -/); +#load the MARC XML FILE TO STAGING +report_progress("Records staged", $i) if 0 != $i % 100; print "Finis.\n"; -sub clean_marc { - my $xml = shift; - $xml =~ s/\n//sog; - $xml =~ s/^<\?xml.+\?\s*>//go; - $xml =~ s/>\s+do("DO \$\$ + DECLARE + t BOOLEAN; + BEGIN + SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_schema = '$MIGSCHEMA' AND table_name = 'biblio_record_entry') INTO t; + IF t = FALSE THEN + PERFORM migration_tools.build_specific_base_staging_table ('$MIGSCHEMA','biblio.record_entry'); + END IF; + END \$\$;"); + + return (); } - sub abort { my $msg = shift; print STDERR "$0: $msg", "\n"; @@ -166,3 +152,48 @@ sub report_progress { print STDERR "$msg\n"; } } + +sub stage_record { + my $dbh = shift; + my $record = shift; + my $last_xact = shift; + $record = '$_$' . $record . '$_$'; + my $sql = "INSERT INTO $MIGSCHEMA.biblio_record_entry (last_xact_id,marc) VALUES ($last_xact,$record);"; + my $sth = $dbh->prepare($sql); + $sth->execute(); + return; +} + +sub check_for_table { + my $dbh = shift; + my $table = shift; + my $sql = "SELECT 1 FROM information_schema.tables WHERE table_schema = '$MIGSCHEMA' AND table_name = '$table';"; + my $sth = $dbh->prepare($sql); + $sth->execute(); + my @sqlresult = $sth->fetchrow_array; + my $r = pop @sqlresult; + if ($r) { return $r; } else { return 0; } +} + +sub check_for_column { + my $dbh = shift; + my $table = shift; + my $column = shift; + my $sql = "SELECT 1 FROM information_schema.columns WHERE table_schema = '$MIGSCHEMA' AND table_name = '$table' AND column_name = $column;"; + my $sth = $dbh->prepare($sql); + $sth->execute(); + my @sqlresult = $sth->fetchrow_array; + my $r = pop @sqlresult; + if ($r) { return $r; } else { return 0; } +} + +sub add_column { + my $dbh = shift; + my $table = shift; + my $column = shift; + my $column_type = shift; + my $sql = "ALTER TABLE $MIGSCHEMA.$table ADD COLUMN $COLUMN $COLUMN_TYPE;"; + my $r = check_for_column($dbh,$table,$column); + if ($r == 0) { abort('failed to create column'; } else { return $r; } +} +