X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=mig-bin%2Fmig-quicksheet;fp=mig-bin%2Fmig-quicksheet;h=0000000000000000000000000000000000000000;hp=22ed33ed0c8dbfabf6beb6552cd931f03598a8e7;hb=155eb9eac077ca803f75d1295e584e7012e1b883;hpb=69588457ab8f70fbb77af29cc0653933d24ed2ac diff --git a/mig-bin/mig-quicksheet b/mig-bin/mig-quicksheet deleted file mode 100755 index 22ed33e..0000000 --- a/mig-bin/mig-quicksheet +++ /dev/null @@ -1,594 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-quicksheet - -By default: - -Quickly produces an Excel spreadsheet based on the tracked file suitable -for simple end-user mapping. The new file is named after the tracked file, but -ends in .mapping.xls - -Multiple files may be specified, in which case all of the results are -concatenated into one spreadsheet named .mapping.xls - -If using --outfile: - -This specifies the exact name to use for the Excel file. If not specified, and -there is also no --outtable, then the naming convention will be as specified -above. - -If using --outtable: - -This specifies a summary table and prefix to use within the migration schema for -recording the output either in addition to or instead of the Excel file. Unless ---force is specified, it will not overwrite existing tables. - -If using --drop with --outable: - -This will delete the summary table specified and all related sub-tables. - -=head1 SYNOPSIS - -B [--force|--drop|--outfile |--outtable ] [...] - -=cut - -############################################################################### - -use strict; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Try::Tiny; -use Pod::Usage; -use Getopt::Long; -use DBI; -use Spreadsheet::WriteExcel; -use Cwd 'abs_path'; -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; - -my $outtable = ''; -my $outfile = ''; -my $force; -my $drop; -my $help; - -GetOptions( - 'outtable=s' => \$outtable, - 'outfile=s' => \$outfile, - 'force' => \$force, - 'drop' => \$drop, - 'help|?' => \$help -); -pod2usage(-verbose => 2) if $help || ! $ARGV[0]; - -if (! $outtable && ! $outfile) { - if (scalar(@ARGV) > 1) { - $outfile = $MIGSCHEMA . '.mapping.xls'; - } else { - $outfile = abs_path($ARGV[0]) . '.mapping.xls'; - } -} - -Mig::die_if_no_env_migschema(); -Mig::die_if_mig_tracking_table_does_not_exist(); - -my $workbook; -my @worksheets = (); -my $first_sheet; -my $first_table; -my $toc; -my $sheet_row_offset = 0; -my $sheet_row_start = 4; -my $table; -my $file; -my $fdata; -my $has_x_source = 0; -my $bold; -my $left; -my $counter = 0; - -if (!$drop) { - init_workbook(); - foreach my $f (@ARGV) { - $file = abs_path($f); - $counter++; - if ($toc) { - $toc->write($counter,0,$counter); - $toc->write($counter,1,$f); - } - handle_file(); - write_worksheets(); - } - close_workbook(); -} else { - if (Mig::check_db_migschema_for_specific_table($outtable)) { - drop_existing_outtable(); - } -} - -sub handle_file { - my $tracked_file_id = Mig::check_for_tracked_file($file); - if ($tracked_file_id) { - $fdata = Mig::status_this_file($file); - } else { - die "File not currently tracked: $file\n"; - } - $table = $fdata->{staged_table}; - if (!$table) { - die "No staged staged table for file: $file\n"; - } -} - -sub init_workbook { - if ($outfile) { - print "Writing $outfile\n"; - $workbook = Spreadsheet::WriteExcel->new( $outfile ); - $bold = $workbook->add_format(); - $bold->set_bold(); - $bold->set_align('left'); - $left = $workbook->add_format(); - $left->set_align('left'); - if (scalar(@ARGV) > 1) { - $toc = $workbook->add_worksheet('Files'); - } - } - if ($outtable) { - if (Mig::check_db_migschema_for_specific_table($outtable)) { - if ($force) { - drop_existing_outtable(); - } else { - die "$outtable already exists. Use --force to wipe and redo tables.\n"; - } - } - create_new_outtable(); - } -} - -sub drop_existing_outtable { - - # we want a transaction for this one - my $dbh = Mig::db_connect(); - $dbh->{AutoCommit} = 0; - $dbh->{RaiseError} = 1; - - try { - # gather subordinate tables - - my @tables = (); - my $sth = $dbh->prepare(" - SELECT summary_table - FROM $MIGSCHEMA.$outtable - ORDER BY 1;" - ); - my $rv = $sth->execute(); - my $rows = $sth->fetchall_arrayref; - for my $row ( @$rows ) { - push @tables, $row->[0] - } - - # drop them - - foreach my $table (@tables) { - print "Dropping $MIGSCHEMA.$table\n"; - $dbh->do("DROP TABLE $MIGSCHEMA.\"$table\";"); - } - - # drop master table - - print "Dropping $MIGSCHEMA.$outtable\n"; - $dbh->do("DROP TABLE $MIGSCHEMA.$outtable;"); - - $dbh->commit; - } catch { - warn "Transaction aborted because $_\n"; - eval { $dbh->rollback }; - die "Aborting mig-quicksheet\n"; - }; - - Mig::db_disconnect($dbh); -} - -sub create_new_outtable { - my $dbh = Mig::db_connect(); - print "Creating table $MIGSCHEMA.$outtable\n"; - my $rv = $dbh->do(" - CREATE UNLOGGED TABLE $MIGSCHEMA.$outtable ( - file TEXT, - summary_table TEXT UNIQUE - ); - ") || die "Error creating outtable ($MIGSCHEMA.$outtable): $!\n"; - Mig::db_disconnect($dbh); -} - -sub create_new_subtable { - my $subtable = shift; - my $dbh = Mig::db_connect(); - $dbh->{AutoCommit} = 0; - $dbh->{RaiseError} = 1; - - try { - print "Creating table $MIGSCHEMA.\"$subtable\"\n"; - my $rv = $dbh->do(" - CREATE UNLOGGED TABLE $MIGSCHEMA.\"$subtable\" (); - ") || die "Error creating subtable ($MIGSCHEMA.\"$subtable\"): $!\n"; - $rv = $dbh->do(" - INSERT INTO $MIGSCHEMA.$outtable (file,summary_table) VALUES (" . $dbh->quote($file) . ',' . $dbh->quote($subtable) . "); - ") || die "Error inserting into outtable ($MIGSCHEMA.$outtable): $!\n"; - $dbh->commit; - } catch { - warn "Transaction aborted because $_\n"; - eval { $dbh->rollback }; - die "Aborting mig-quicksheet\n"; - }; - - Mig::db_disconnect($dbh); -} - -sub write_worksheets { - print 'File #' . $counter . "\n"; - print "Sheet: Field Summary\n"; - my $tab_name = (scalar(@ARGV) > 1 ? $counter . ') ' : '') . 'Field Summary'; - $tab_name = substr($tab_name,0,31); # truncate for WriteExcel - if ($outfile) { - $first_sheet = $workbook->add_worksheet( $tab_name ); - $first_sheet->set_column(0,6,30); - } - if ($outtable) { - $first_table = "$outtable $tab_name"; - create_new_subtable( $first_table ); - } - - my $dbh = Mig::db_connect(); - my $sth = $dbh->prepare(" - SELECT COUNT(*) - FROM $MIGSCHEMA.$table - LIMIT 1; - "); - my $rv = $sth->execute() - || die "Error retrieving data from information_schema: $!"; - - my @cols = $sth->fetchrow_array; - $sth->finish; - my $count = $cols[0]; - - $sheet_row_start = 0; - - if ($outfile) { - $first_sheet->write($sheet_row_start,0,'Legacy Column',$bold); - $first_sheet->write($sheet_row_start,1,'Non-Empty Rows',$bold); - $first_sheet->write($sheet_row_start,2,'Distinct Non-NULL Values',$bold); - $first_sheet->write($sheet_row_start,3,'Min Value',$bold); - $first_sheet->write($sheet_row_start,4,'Min Length',$bold); - $first_sheet->write($sheet_row_start,5,'Max Value',$bold); - $first_sheet->write($sheet_row_start,6,'Max Length',$bold); - } - if ($outtable) { - try { - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Legacy Column" TEXT;'); - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Non-Empty Rows" TEXT;'); - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Distinct Non-NULL Values" TEXT;'); - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Min Value" TEXT;'); - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Min Length" TEXT;'); - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Max Value" TEXT;'); - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Max Length" TEXT;'); - } catch { - die "Error modifying subtable ($MIGSCHEMA.$first_table): $_\n"; - }; - } - - handle_list(); - handle_columns(); - - if ($outfile) { - $first_sheet->write($count + 3,0,'Source File:',$bold); - $first_sheet->write($count + 3,1,$file,$left); - $first_sheet->write($count + 4,0,'Number of Rows:',$bold); - $first_sheet->write($count + 4,1,$count,$left); - } - if ($outtable) { - try { - $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ . ' ("Legacy Column") VALUES (NULL);'); - $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ . - ' ("Legacy Column","Non-Empty Rows") ' . "VALUES ('Source File:'," . $dbh->quote($file) . ");"); - $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ . - ' ("Legacy Column","Non-Empty Rows") ' . "VALUES ('Number of Rows:',$count);"); - } catch { - die "Error inserting into subtable ($MIGSCHEMA.$first_table): $_\n"; - }; - } - - Mig::db_disconnect($dbh); -} - -sub close_workbook { - if ($outfile) { - $workbook->close(); - } -} - -exit 0; - -############################################################################### - -sub handle_list { - my $dbh = Mig::db_connect(); - my $sth = $dbh->prepare(" - SELECT * - FROM " . $MIGSCHEMA. "." . $table . " - LIMIT 65530; - "); - my $rv = $sth->execute() - || die "Error retrieving data from staging table: $!"; - my $list_sheet; - - $sheet_row_offset = 0; - $has_x_source = 0; - if ($outfile) { - print "Sheet: $table\n"; - $list_sheet = $workbook->add_worksheet( $table ); - } - - my $handle_headers = 1; - - while (my $data = $sth->fetchrow_hashref) { - if ($handle_headers) { - my $_idx = 0; - foreach my $col (sort keys %{ $data }) { - $list_sheet->write($sheet_row_start + $sheet_row_offset,$_idx++,$col,$bold); - } - $handle_headers = 0; - } - $sheet_row_offset++; - my $idx = 0; - foreach my $col (sort keys %{ $data }) { - my $cdata = $$data{$col}; - if (!defined $cdata) { $cdata = '\N'; } - if ($outfile) { - $list_sheet->write($sheet_row_start + $sheet_row_offset,$idx++,$cdata,$left); - } - } - } -} - -sub handle_columns { - my $dbh = Mig::db_connect(); - my $sth = $dbh->prepare(" - SELECT * - FROM information_schema.columns - WHERE table_schema = " . $dbh->quote($MIGSCHEMA) . " - AND table_name = " . $dbh->quote($table) . " - ORDER BY dtd_identifier::INTEGER ASC; - "); - my $rv = $sth->execute() - || die "Error retrieving data from information_schema: $!"; - - $sheet_row_offset = 0; - $has_x_source = 0; - - while (my $data = $sth->fetchrow_hashref) { - my $column = $data->{column_name}; - if ($column eq 'x_source') { - $has_x_source = 1; - } - if ($column =~ /^l_/ - || ($column =~ /^x_/ - && ( $column ne 'x_migrate' - && $column ne 'x_source' - && $column ne 'x_egid' - && $column ne 'x_hseq' - ) - ) - ) { - $sheet_row_offset++; - my $cdata = column_summary($column); - if ($outfile) { - $first_sheet->write($sheet_row_start + $sheet_row_offset,0,$column,$left); - $first_sheet->write($sheet_row_start + $sheet_row_offset,1,$cdata->{non_empty_count},$left); - $first_sheet->write($sheet_row_start + $sheet_row_offset,2,$cdata->{distinct_value_count},$left); - $first_sheet->write($sheet_row_start + $sheet_row_offset,3,$cdata->{min_value},$left); - $first_sheet->write($sheet_row_start + $sheet_row_offset,4,$cdata->{min_length},$left); - $first_sheet->write($sheet_row_start + $sheet_row_offset,5,$cdata->{max_value},$left); - $first_sheet->write($sheet_row_start + $sheet_row_offset,6,$cdata->{max_length},$left); - } - if ($outtable) { - $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$first_table" VALUES (^ . join(',' - ,$cdata->{non_empty_count} - ,$cdata->{distinct_value_count} - ,$dbh->quote($cdata->{min_value}) - ,$cdata->{min_length} - ,$dbh->quote($cdata->{max_value}) - ,$cdata->{max_length} - ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$first_table\": $!"; - } - if ($cdata->{distinct_value_count} > 1 && $cdata->{distinct_value_count} <= 500) { - group_by($column); - } - } - } - $sth->finish; - Mig::db_disconnect($dbh); -} - -sub column_summary { - - my $column = shift; - - my $dbh = Mig::db_connect(); - - ### non_empty_count - my $sth = $dbh->prepare(" - SELECT COUNT(*) - FROM $MIGSCHEMA.$table - WHERE $column IS NOT NULL AND BTRIM($column) <> ''; - "); - my $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - my @cols = $sth->fetchrow_array; - $sth->finish; - my $non_empty_count = $cols[0]; - - ### distinct_value_count - $sth = $dbh->prepare(" - SELECT COUNT(DISTINCT $column) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - my $distinct_value_count = $cols[0]; - - ### min_value - $sth = $dbh->prepare(" - SELECT MIN($column) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - my $min_value = $cols[0]; - - ### min_length - $sth = $dbh->prepare(" - SELECT MIN(LENGTH($column)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - my $min_length = $cols[0]; - - ### max_value - $sth = $dbh->prepare(" - SELECT MAX($column) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - my $max_value = $cols[0]; - - ### max_length - $sth = $dbh->prepare(" - SELECT MAX(LENGTH($column)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - my $max_length = $cols[0]; - - return { - non_empty_count => $non_empty_count - ,distinct_value_count => $distinct_value_count - ,min_value => defined $min_value ? $min_value : '' - ,min_length => defined $min_length ? $min_length : '' - ,max_value => defined $max_value ? $max_value : '' - ,max_length => defined $max_length ? $max_length : '' - }; -} - -sub group_by { - my ($column,$option) = (shift,"GROUP BY 2 ORDER BY 2"); - - my $dbh = Mig::db_connect(); - my $sth; - my $rv; - - my $col_sheet_row_start = 0; - my $col_sheet_row_offset = 0; - my $col_sheet; - my $col_table; - - my $sheet_name = (scalar(@ARGV) > 1 ? $counter . ') ' : '') . $column; - $sheet_name = substr($sheet_name,0,31); - - print "Sheet: $sheet_name\n"; - if ($has_x_source) { - $option = "GROUP BY 2,3 ORDER BY 2,3"; - } - - if ($outfile) { - $col_sheet = $workbook->add_worksheet( $sheet_name ); - push @worksheets, $col_sheet; - $col_sheet->set_column(0,6,30); - $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,'Count',$bold); - if ($has_x_source) { - $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,'Source',$bold); - } - $col_sheet->write( - $col_sheet_row_start + $col_sheet_row_offset - ,$has_x_source ? 2 : 1 - ,"Legacy Value for $column" - ,$bold - ); - } - - if ($outtable) { - $col_table = "$outtable $sheet_name"; - create_new_subtable( $col_table ); - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "Count" TEXT;') - || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^; - if ($has_x_source) { - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "Source" TEXT;') - || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^; - } - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "' . $dbh->quote("Legacy value for $column") . '" TEXT;') - || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^; - } - - $sth = $dbh->prepare(" - SELECT COUNT(*), " . ($has_x_source ? 'x_source, ' : '') . "$column - FROM $MIGSCHEMA.$table - $option; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - - while (my @cols = $sth->fetchrow_array) { - $col_sheet_row_offset++; - my $count = $cols[0]; - $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,$count,$left) if $outfile; - my $value; - my $source; - if ($has_x_source) { - $source = defined $cols[1] ? $cols[1] : ''; - $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$source,$left) if $outfile; - $value = defined $cols[2] ? $cols[2] : ''; - $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,2,$value,$left) if $outfile; - } else { - $value = defined $cols[1] ? $cols[1] : ''; - $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$value,$left) if $outfile; - } - if ($outtable) { - if ($has_x_source) { - $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$col_table" VALUES (^ . join(',' - ,$count - ,$dbh->quote($source) - ,$dbh->quote($value) - ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$col_table\": $!"; - } else { - $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$col_table" VALUES (^ . join(',' - ,$count - ,$dbh->quote($value) - ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$col_table\": $!"; - } - } - } - $sth->finish; -} -