From 72c3a70056e1d3aab8c35add6ed5d343aef5f157 Mon Sep 17 00:00:00 2001 From: Jason Etheridge Date: Fri, 10 Apr 2020 16:01:53 -0400 Subject: [PATCH] seed csv2mysql with csv2sql --- text/csv2mysql | 188 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 188 insertions(+), 0 deletions(-) create mode 100755 text/csv2mysql diff --git a/text/csv2mysql b/text/csv2mysql new file mode 100755 index 0000000..2d74d0a --- /dev/null +++ b/text/csv2mysql @@ -0,0 +1,188 @@ +#!/usr/bin/perl -w +use Getopt::Long; +use Text::CSV::Auto; +use Data::Dumper; +use DBI; +use File::Basename; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); + +my $dbh; +my $cfg; +my $csv_config; + +sub init { + if ($PGHOST and $PGPORT and $PGUSER and $PGDATABASE) + { + $dbh = connect_db($PGDATABASE,$PGUSER,undef,$PGHOST) or die $DBI::errstr; + } else { + our %config; + eval `cat /openils/conf/offline-config.pl`; + $dbh = DBI->connect( $config{dsn}, $config{usr}, $config{pw} ) or die $DBI::errstr; + } + + $cfg = { + schema => 'm_foo', + auto_options => { + } + }; + our %CSV_options = ( + binary => 1, + auto_diag => 1, + diag_verbose => 1, + ); + $cfg->{auto_options}->{csv_options} = \%CSV_options; + + GetOptions( + 'config=s' => \$csv_config, + 'no-legacy-prefix' => \($cfg->{no_legacy_prefix}), + 'use-no-headers-file' => \($cfg->{use_no_headers_file}), + 'add-x-migrate' => \($cfg->{add_x_migrate}), + 'outfile=s' => \($cfg->{outfile}), + 'schema=s' => \($cfg->{schema}), + 'parent=s' => \($cfg->{parent}), + 'help|?' => \$help + ); + if ($help || ((@ARGV == 0) && (-t STDIN))) { + die qq^\n\t$0 [--config ] [--add-x-migrate] [--no-legacy-prefix] [--schema ] [--parent ] [--outfile ] <"clean" file from clean_csv script>\n\n^; + } + if ($csv_config && ! -e $csv_config) { + die "$csv_config does not exist\n"; + } + if ($csv_config && -e $csv_config) { + eval `cat $csv_config`; + } + if (! -e $ARGV[0]) { + die "$ARGV[0] does not exist\n"; + } +} + +sub connect_db { + my ($db, $dbuser, $dbpw, $dbhost) = @_; + + my $dsn = "dbi:Pg:host=$dbhost;dbname=$db;port=5432"; + + my $attrs = { + ShowErrorStatement => 1, + RaiseError => 1, + PrintError => 1, + pg_enable_utf8 => 1, + }; + my $dbh = DBI->connect($dsn, $dbuser, $dbpw, $attrs); + + return $dbh; +} + + +sub write_sql_sample { + my $cfg = shift; + my $info = shift; + my $fn = $cfg->{outfile} || $cfg->{auto_options}->{file} . '.sql'; + + print "\twriting $fn\n"; + local *SQL; + open SQL, ">$fn"; + print SQL "-- $cfg->{auto_options}->{file}\n/*\n"; + open IN, $cfg->{auto_options}->{file}; + foreach (1..5) { + my $line = ; + print SQL $line; + } + close IN; + print SQL "*/\n"; + return *SQL; +} + +sub write_sql_table { + my $sql = shift; + my $cfg = shift; + my $info = shift; + my $fn = $cfg->{auto_options}->{file}; + my @indices = (); + + print "\twriting table definition\n"; + if ($cfg->{parent}) { + $cfg->{table_name} = $cfg->{parent} . '_legacy'; + } else { + $cfg->{table_name} = lc(basename($fn)); $cfg->{table_name} =~ s/[\-\. ]/_/g; + } + print $sql "DROP TABLE IF EXISTS $cfg->{schema}.$cfg->{table_name};\n"; + print $sql "CREATE UNLOGGED TABLE $cfg->{schema}.$cfg->{table_name} (\n"; + my $idx = 0; + if ($cfg->{add_x_migrate}) { + print $sql " x_migrate BOOLEAN\n"; + $idx++; + push @indices, 'x_migrate'; + } + foreach my $column (@{ $info }) { + my $cn = $column->{'header'}; + if ($cn =~ /^x_/) { + push @indices, $cn; + } + my $col_info = Dumper($column); + $col_info =~ s/^\$VAR1 = //; + print $sql " " . ($idx++ ? ',' : ' '); + print $sql "l_" unless $cfg->{no_legacy_prefix} or $column->{'header'} =~ /^x_/ or $column->{'header'} =~ /^l_/; + print $sql "$cn " . ($cn eq 'x_eg_bib_id' ? 'BIGINT' : 'TEXT'); + print $sql " /*\n $col_info */\n"; + } + if ($cfg->{parent}) { + print $sql ') INHERITS (' . $cfg->{schema} . '.' . $cfg->{parent} . ");\n"; + } else { + print $sql ");\n"; + } + foreach my $cn (@indices) { + print $sql "CREATE INDEX ON $cfg->{schema}.$cfg->{table_name} ($cn);\n"; + } +} + +sub write_sql_loader { + my $sql = shift; + my $cfg = shift; + my $auto = shift; + my $info = shift; + my $fn = $cfg->{auto_options}->{file} . ($cfg->{use_no_headers_file} ? '.no_headers' : ''); + + print "\twriting copy statement\n"; + print $sql "\n\\COPY $cfg->{schema}.$cfg->{table_name} ("; + my $idx = 0; + foreach my $column (@{ $info }) { + print $sql ($idx++ ? ',' : ''); + print $sql "l_" unless $cfg->{no_legacy_prefix} or $column->{'header'} =~ /^x_/ or $column->{'header'} =~ /^l_/; + print $sql $column->{'header'}; + } + print $sql ") FROM '$fn'"; + if ($auto->csv->sep_char eq chr(9) && ! defined $auto->csv->quote_char && ! defined $auto->csv->escape_char) { + # true .tsv, don't treat as csv + } elsif ($auto->csv->sep_char eq chr(9)) { + # probably good enough .tsv, don't treat as csv + } else { + print $sql " WITH csv " . ($cfg->{use_no_headers_file} ? "" : "header"); + print $sql " delimiter " . $dbh->quote( $auto->csv->sep_char ) unless $dbh->quote( $auto->csv->sep_char ) eq 'NULL'; + print $sql " quote " . $dbh->quote( $auto->csv->quote_char ) unless $dbh->quote( $auto->csv->quote_char ) eq 'NULL'; + print $sql " escape " . $dbh->quote( $auto->csv->escape_char ) unless $dbh->quote( $auto->csv->escape_char ) eq 'NULL'; + } + print $sql "\n"; +} + +sub main { + init(); + foreach my $fn (@ARGV) { + print "processing $fn\n"; + $cfg->{auto_options}->{file} = $fn; + my $auto = Text::CSV::Auto->new($cfg->{auto_options}); + + my $info = $auto->analyze(); + my $sql = write_sql_sample($cfg,$info); + write_sql_table($sql,$cfg,$info); + write_sql_loader($sql,$cfg,$auto,$info); + close $sql; + + print "\tdone.\n"; + } +} + +main(); + -- 1.7.2.5