X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=text%2Fcsv2mysql;fp=text%2Fcsv2mysql;h=0000000000000000000000000000000000000000;hp=2d74d0a9e361b81302be9a6b9cca1fb61333f160;hb=d9d2b1fbafcb6d379e74531b4c8583bb28ed37ad;hpb=72c3a70056e1d3aab8c35add6ed5d343aef5f157 diff --git a/text/csv2mysql b/text/csv2mysql deleted file mode 100755 index 2d74d0a..0000000 --- a/text/csv2mysql +++ /dev/null @@ -1,188 +0,0 @@ -#!/usr/bin/perl -w -use Getopt::Long; -use Text::CSV::Auto; -use Data::Dumper; -use DBI; -use File::Basename; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); - -my $dbh; -my $cfg; -my $csv_config; - -sub init { - if ($PGHOST and $PGPORT and $PGUSER and $PGDATABASE) - { - $dbh = connect_db($PGDATABASE,$PGUSER,undef,$PGHOST) or die $DBI::errstr; - } else { - our %config; - eval `cat /openils/conf/offline-config.pl`; - $dbh = DBI->connect( $config{dsn}, $config{usr}, $config{pw} ) or die $DBI::errstr; - } - - $cfg = { - schema => 'm_foo', - auto_options => { - } - }; - our %CSV_options = ( - binary => 1, - auto_diag => 1, - diag_verbose => 1, - ); - $cfg->{auto_options}->{csv_options} = \%CSV_options; - - GetOptions( - 'config=s' => \$csv_config, - 'no-legacy-prefix' => \($cfg->{no_legacy_prefix}), - 'use-no-headers-file' => \($cfg->{use_no_headers_file}), - 'add-x-migrate' => \($cfg->{add_x_migrate}), - 'outfile=s' => \($cfg->{outfile}), - 'schema=s' => \($cfg->{schema}), - 'parent=s' => \($cfg->{parent}), - 'help|?' => \$help - ); - if ($help || ((@ARGV == 0) && (-t STDIN))) { - die qq^\n\t$0 [--config ] [--add-x-migrate] [--no-legacy-prefix] [--schema ] [--parent ] [--outfile ] <"clean" file from clean_csv script>\n\n^; - } - if ($csv_config && ! -e $csv_config) { - die "$csv_config does not exist\n"; - } - if ($csv_config && -e $csv_config) { - eval `cat $csv_config`; - } - if (! -e $ARGV[0]) { - die "$ARGV[0] does not exist\n"; - } -} - -sub connect_db { - my ($db, $dbuser, $dbpw, $dbhost) = @_; - - my $dsn = "dbi:Pg:host=$dbhost;dbname=$db;port=5432"; - - my $attrs = { - ShowErrorStatement => 1, - RaiseError => 1, - PrintError => 1, - pg_enable_utf8 => 1, - }; - my $dbh = DBI->connect($dsn, $dbuser, $dbpw, $attrs); - - return $dbh; -} - - -sub write_sql_sample { - my $cfg = shift; - my $info = shift; - my $fn = $cfg->{outfile} || $cfg->{auto_options}->{file} . '.sql'; - - print "\twriting $fn\n"; - local *SQL; - open SQL, ">$fn"; - print SQL "-- $cfg->{auto_options}->{file}\n/*\n"; - open IN, $cfg->{auto_options}->{file}; - foreach (1..5) { - my $line = ; - print SQL $line; - } - close IN; - print SQL "*/\n"; - return *SQL; -} - -sub write_sql_table { - my $sql = shift; - my $cfg = shift; - my $info = shift; - my $fn = $cfg->{auto_options}->{file}; - my @indices = (); - - print "\twriting table definition\n"; - if ($cfg->{parent}) { - $cfg->{table_name} = $cfg->{parent} . '_legacy'; - } else { - $cfg->{table_name} = lc(basename($fn)); $cfg->{table_name} =~ s/[\-\. ]/_/g; - } - print $sql "DROP TABLE IF EXISTS $cfg->{schema}.$cfg->{table_name};\n"; - print $sql "CREATE UNLOGGED TABLE $cfg->{schema}.$cfg->{table_name} (\n"; - my $idx = 0; - if ($cfg->{add_x_migrate}) { - print $sql " x_migrate BOOLEAN\n"; - $idx++; - push @indices, 'x_migrate'; - } - foreach my $column (@{ $info }) { - my $cn = $column->{'header'}; - if ($cn =~ /^x_/) { - push @indices, $cn; - } - my $col_info = Dumper($column); - $col_info =~ s/^\$VAR1 = //; - print $sql " " . ($idx++ ? ',' : ' '); - print $sql "l_" unless $cfg->{no_legacy_prefix} or $column->{'header'} =~ /^x_/ or $column->{'header'} =~ /^l_/; - print $sql "$cn " . ($cn eq 'x_eg_bib_id' ? 'BIGINT' : 'TEXT'); - print $sql " /*\n $col_info */\n"; - } - if ($cfg->{parent}) { - print $sql ') INHERITS (' . $cfg->{schema} . '.' . $cfg->{parent} . ");\n"; - } else { - print $sql ");\n"; - } - foreach my $cn (@indices) { - print $sql "CREATE INDEX ON $cfg->{schema}.$cfg->{table_name} ($cn);\n"; - } -} - -sub write_sql_loader { - my $sql = shift; - my $cfg = shift; - my $auto = shift; - my $info = shift; - my $fn = $cfg->{auto_options}->{file} . ($cfg->{use_no_headers_file} ? '.no_headers' : ''); - - print "\twriting copy statement\n"; - print $sql "\n\\COPY $cfg->{schema}.$cfg->{table_name} ("; - my $idx = 0; - foreach my $column (@{ $info }) { - print $sql ($idx++ ? ',' : ''); - print $sql "l_" unless $cfg->{no_legacy_prefix} or $column->{'header'} =~ /^x_/ or $column->{'header'} =~ /^l_/; - print $sql $column->{'header'}; - } - print $sql ") FROM '$fn'"; - if ($auto->csv->sep_char eq chr(9) && ! defined $auto->csv->quote_char && ! defined $auto->csv->escape_char) { - # true .tsv, don't treat as csv - } elsif ($auto->csv->sep_char eq chr(9)) { - # probably good enough .tsv, don't treat as csv - } else { - print $sql " WITH csv " . ($cfg->{use_no_headers_file} ? "" : "header"); - print $sql " delimiter " . $dbh->quote( $auto->csv->sep_char ) unless $dbh->quote( $auto->csv->sep_char ) eq 'NULL'; - print $sql " quote " . $dbh->quote( $auto->csv->quote_char ) unless $dbh->quote( $auto->csv->quote_char ) eq 'NULL'; - print $sql " escape " . $dbh->quote( $auto->csv->escape_char ) unless $dbh->quote( $auto->csv->escape_char ) eq 'NULL'; - } - print $sql "\n"; -} - -sub main { - init(); - foreach my $fn (@ARGV) { - print "processing $fn\n"; - $cfg->{auto_options}->{file} = $fn; - my $auto = Text::CSV::Auto->new($cfg->{auto_options}); - - my $info = $auto->analyze(); - my $sql = write_sql_sample($cfg,$info); - write_sql_table($sql,$cfg,$info); - write_sql_loader($sql,$cfg,$auto,$info); - close $sql; - - print "\tdone.\n"; - } -} - -main(); -