#!/usr/bin/perl -w ############################################################################### =pod =head1 NAME mig-quicksheet By default: Quickly produces an Excel spreadsheet based on the tracked file suitable for simple end-user mapping. The new file is named after the tracked file, but ends in .mapping.xls Multiple files may be specified, in which case all of the results are concatenated into one spreadsheet named .mapping.xls If using --outfile: This specifies the exact name to use for the Excel file. If not specified, and there is also no --outtable, then the naming convention will be as specified above. If using --outtable: This specifies a summary table and prefix to use within the migration schema for recording the output either in addition to or instead of the Excel file. Unless --force is specified, it will not overwrite existing tables. If using --drop with --outable: This will delete the summary table specified and all related sub-tables. =head1 SYNOPSIS B [--force|--drop|--outfile |--outtable ] [...] =cut ############################################################################### use strict; use Switch; use Env qw( HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR ); use Try::Tiny; use Pod::Usage; use Getopt::Long; use DBI; use Spreadsheet::WriteExcel; use Cwd 'abs_path'; use FindBin; my $mig_bin = "$FindBin::Bin/"; use lib "$FindBin::Bin/"; use Mig; my $outtable = ''; my $outfile = ''; my $force; my $drop; my $help; GetOptions( 'outtable=s' => \$outtable, 'outfile=s' => \$outfile, 'force' => \$force, 'drop' => \$drop, 'help|?' => \$help ); pod2usage(-verbose => 2) if $help || ! $ARGV[0]; if (! $outtable && ! $outfile) { if (scalar(@ARGV) > 1) { $outfile = $MIGSCHEMA . '.mapping.xls'; } else { $outfile = abs_path($ARGV[0]) . '.mapping.xls'; } } Mig::die_if_no_env_migschema(); Mig::die_if_mig_tracking_table_does_not_exist(); my $workbook; my @worksheets = (); my $first_sheet; my $first_table; my $toc; my $sheet_row_offset = 0; my $sheet_row_start = 4; my $table; my $file; my $fdata; my $has_x_source = 0; my $bold; my $left; my $counter = 0; if (!$drop) { init_workbook(); foreach my $f (@ARGV) { $file = abs_path($f); $counter++; if ($toc) { $toc->write($counter,0,$counter); $toc->write($counter,1,$f); } handle_file(); write_worksheets(); } close_workbook(); } else { if (Mig::check_db_migschema_for_specific_table($outtable)) { drop_existing_outtable(); } } sub handle_file { my $tracked_file_id = Mig::check_for_tracked_file($file); if ($tracked_file_id) { $fdata = Mig::status_this_file($file); } else { die "File not currently tracked: $file\n"; } $table = $fdata->{staged_table}; if (!$table) { die "No staged staged table for file: $file\n"; } } sub init_workbook { if ($outfile) { print "Writing $outfile\n"; $workbook = Spreadsheet::WriteExcel->new( $outfile ); $bold = $workbook->add_format(); $bold->set_bold(); $bold->set_align('left'); $left = $workbook->add_format(); $left->set_align('left'); if (scalar(@ARGV) > 1) { $toc = $workbook->add_worksheet('Files'); } } if ($outtable) { if (Mig::check_db_migschema_for_specific_table($outtable)) { if ($force) { drop_existing_outtable(); } else { die "$outtable already exists. Use --force to wipe and redo tables.\n"; } } create_new_outtable(); } } sub drop_existing_outtable { # we want a transaction for this one my $dbh = Mig::db_connect(); $dbh->{AutoCommit} = 0; $dbh->{RaiseError} = 1; try { # gather subordinate tables my @tables = (); my $sth = $dbh->prepare(" SELECT summary_table FROM $MIGSCHEMA.$outtable ORDER BY 1;" ); my $rv = $sth->execute(); my $rows = $sth->fetchall_arrayref; for my $row ( @$rows ) { push @tables, $row->[0] } # drop them foreach my $table (@tables) { print "Dropping $MIGSCHEMA.$table\n"; $dbh->do("DROP TABLE $MIGSCHEMA.\"$table\";"); } # drop master table print "Dropping $MIGSCHEMA.$outtable\n"; $dbh->do("DROP TABLE $MIGSCHEMA.$outtable;"); $dbh->commit; } catch { warn "Transaction aborted because $_\n"; eval { $dbh->rollback }; die "Aborting mig-quicksheet\n"; }; Mig::db_disconnect($dbh); } sub create_new_outtable { my $dbh = Mig::db_connect(); print "Creating table $MIGSCHEMA.$outtable\n"; my $rv = $dbh->do(" CREATE UNLOGGED TABLE $MIGSCHEMA.$outtable ( file TEXT, summary_table TEXT UNIQUE ); ") || die "Error creating outtable ($MIGSCHEMA.$outtable): $!\n"; Mig::db_disconnect($dbh); } sub create_new_subtable { my $subtable = shift; my $dbh = Mig::db_connect(); $dbh->{AutoCommit} = 0; $dbh->{RaiseError} = 1; try { print "Creating table $MIGSCHEMA.\"$subtable\"\n"; my $rv = $dbh->do(" CREATE UNLOGGED TABLE $MIGSCHEMA.\"$subtable\" (); ") || die "Error creating subtable ($MIGSCHEMA.\"$subtable\"): $!\n"; $rv = $dbh->do(" INSERT INTO $MIGSCHEMA.$outtable (file,summary_table) VALUES (" . $dbh->quote($file) . ',' . $dbh->quote($subtable) . "); ") || die "Error inserting into outtable ($MIGSCHEMA.$outtable): $!\n"; $dbh->commit; } catch { warn "Transaction aborted because $_\n"; eval { $dbh->rollback }; die "Aborting mig-quicksheet\n"; }; Mig::db_disconnect($dbh); } sub write_worksheets { print 'File #' . $counter . "\n"; print "Sheet: Field Summary\n"; my $tab_name = (scalar(@ARGV) > 1 ? $counter . ') ' : '') . 'Field Summary'; $tab_name = substr($tab_name,0,31); # truncate for WriteExcel if ($outfile) { $first_sheet = $workbook->add_worksheet( $tab_name ); $first_sheet->set_column(0,6,30); } if ($outtable) { $first_table = "$outtable $tab_name"; create_new_subtable( $first_table ); } my $dbh = Mig::db_connect(); my $sth = $dbh->prepare(" SELECT COUNT(*) FROM $MIGSCHEMA.$table LIMIT 1; "); my $rv = $sth->execute() || die "Error retrieving data from information_schema: $!"; my @cols = $sth->fetchrow_array; $sth->finish; my $count = $cols[0]; $sheet_row_start = 0; if ($outfile) { $first_sheet->write($sheet_row_start,0,'Legacy Column',$bold); $first_sheet->write($sheet_row_start,1,'Non-Empty Rows',$bold); $first_sheet->write($sheet_row_start,2,'Distinct Non-NULL Values',$bold); $first_sheet->write($sheet_row_start,3,'Min Value',$bold); $first_sheet->write($sheet_row_start,4,'Min Length',$bold); $first_sheet->write($sheet_row_start,5,'Max Value',$bold); $first_sheet->write($sheet_row_start,6,'Max Length',$bold); } if ($outtable) { try { $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Legacy Column" TEXT;'); $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Non-Empty Rows" TEXT;'); $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Distinct Non-NULL Values" TEXT;'); $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Min Value" TEXT;'); $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Min Length" TEXT;'); $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Max Value" TEXT;'); $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Max Length" TEXT;'); } catch { die "Error modifying subtable ($MIGSCHEMA.$first_table): $_\n"; }; } handle_columns(); if ($outfile) { $first_sheet->write($count + 3,0,'Source File:',$bold); $first_sheet->write($count + 3,1,$file,$left); $first_sheet->write($count + 4,0,'Number of Rows:',$bold); $first_sheet->write($count + 4,1,$count,$left); } if ($outtable) { try { $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ . ' ("Legacy Column") VALUES (NULL);'); $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ . ' ("Legacy Column","Non-Empty Rows") ' . "VALUES ('Source File:'," . $dbh->quote($file) . ");"); $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ . ' ("Legacy Column","Non-Empty Rows") ' . "VALUES ('Number of Rows:',$count);"); } catch { die "Error inserting into subtable ($MIGSCHEMA.$first_table): $_\n"; }; } Mig::db_disconnect($dbh); } sub close_workbook { if ($outfile) { $workbook->close(); } } exit 0; ############################################################################### sub handle_columns { my $dbh = Mig::db_connect(); my $sth = $dbh->prepare(" SELECT * FROM information_schema.columns WHERE table_schema = " . $dbh->quote($MIGSCHEMA) . " AND table_name = " . $dbh->quote($table) . " ORDER BY dtd_identifier::INTEGER ASC; "); my $rv = $sth->execute() || die "Error retrieving data from information_schema: $!"; $sheet_row_offset = 0; $has_x_source = 0; while (my $data = $sth->fetchrow_hashref) { my $column = $data->{column_name}; if ($column eq 'x_source') { $has_x_source = 1; } if ($column =~ /^l_/ || ($column =~ /^x_/ && ( $column ne 'x_migrate' && $column ne 'x_source' && $column ne 'x_egid' && $column ne 'x_hseq' ) ) ) { $sheet_row_offset++; my $cdata = column_summary($column); if ($outfile) { $first_sheet->write($sheet_row_start + $sheet_row_offset,0,$column,$left); $first_sheet->write($sheet_row_start + $sheet_row_offset,1,$cdata->{non_empty_count},$left); $first_sheet->write($sheet_row_start + $sheet_row_offset,2,$cdata->{distinct_value_count},$left); $first_sheet->write($sheet_row_start + $sheet_row_offset,3,$cdata->{min_value},$left); $first_sheet->write($sheet_row_start + $sheet_row_offset,4,$cdata->{min_length},$left); $first_sheet->write($sheet_row_start + $sheet_row_offset,5,$cdata->{max_value},$left); $first_sheet->write($sheet_row_start + $sheet_row_offset,6,$cdata->{max_length},$left); } if ($outtable) { $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$first_table" VALUES (^ . join(',' ,$cdata->{non_empty_count} ,$cdata->{distinct_value_count} ,$dbh->quote($cdata->{min_value}) ,$cdata->{min_length} ,$dbh->quote($cdata->{max_value}) ,$cdata->{max_length} ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$first_table\": $!"; } if ($cdata->{distinct_value_count} > 1 && $cdata->{distinct_value_count} <= 500) { group_by($column); } } } $sth->finish; Mig::db_disconnect($dbh); } sub column_summary { my $column = shift; my $dbh = Mig::db_connect(); ### non_empty_count my $sth = $dbh->prepare(" SELECT COUNT(*) FROM $MIGSCHEMA.$table WHERE $column IS NOT NULL AND BTRIM($column) <> ''; "); my $rv = $sth->execute() || die "Error retrieving data from $MIGSCHEMA.$table: $!"; my @cols = $sth->fetchrow_array; $sth->finish; my $non_empty_count = $cols[0]; ### distinct_value_count $sth = $dbh->prepare(" SELECT COUNT(DISTINCT $column) FROM $MIGSCHEMA.$table; "); $rv = $sth->execute() || die "Error retrieving data from $MIGSCHEMA.$table: $!"; @cols = $sth->fetchrow_array; $sth->finish; my $distinct_value_count = $cols[0]; ### min_value $sth = $dbh->prepare(" SELECT MIN($column) FROM $MIGSCHEMA.$table; "); $rv = $sth->execute() || die "Error retrieving data from $MIGSCHEMA.$table: $!"; @cols = $sth->fetchrow_array; $sth->finish; my $min_value = $cols[0]; ### min_length $sth = $dbh->prepare(" SELECT MIN(LENGTH($column)) FROM $MIGSCHEMA.$table; "); $rv = $sth->execute() || die "Error retrieving data from $MIGSCHEMA.$table: $!"; @cols = $sth->fetchrow_array; $sth->finish; my $min_length = $cols[0]; ### max_value $sth = $dbh->prepare(" SELECT MAX($column) FROM $MIGSCHEMA.$table; "); $rv = $sth->execute() || die "Error retrieving data from $MIGSCHEMA.$table: $!"; @cols = $sth->fetchrow_array; $sth->finish; my $max_value = $cols[0]; ### max_length $sth = $dbh->prepare(" SELECT MAX(LENGTH($column)) FROM $MIGSCHEMA.$table; "); $rv = $sth->execute() || die "Error retrieving data from $MIGSCHEMA.$table: $!"; @cols = $sth->fetchrow_array; $sth->finish; my $max_length = $cols[0]; return { non_empty_count => $non_empty_count ,distinct_value_count => $distinct_value_count ,min_value => defined $min_value ? $min_value : '' ,min_length => defined $min_length ? $min_length : '' ,max_value => defined $max_value ? $max_value : '' ,max_length => defined $max_length ? $max_length : '' }; } sub group_by { my ($column,$option) = (shift,"GROUP BY 2 ORDER BY 2"); my $dbh = Mig::db_connect(); my $sth; my $rv; my $col_sheet_row_start = 0; my $col_sheet_row_offset = 0; my $col_sheet; my $col_table; my $sheet_name = (scalar(@ARGV) > 1 ? $counter . ') ' : '') . $column; $sheet_name = substr($sheet_name,0,31); print "Sheet: $sheet_name\n"; if ($has_x_source) { $option = "GROUP BY 2,3 ORDER BY 2,3"; } if ($outfile) { $col_sheet = $workbook->add_worksheet( $sheet_name ); push @worksheets, $col_sheet; $col_sheet->set_column(0,6,30); $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,'Count',$bold); if ($has_x_source) { $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,'Source',$bold); } $col_sheet->write( $col_sheet_row_start + $col_sheet_row_offset ,$has_x_source ? 2 : 1 ,"Legacy Value for $column" ,$bold ); } if ($outtable) { $col_table = "$outtable $sheet_name"; create_new_subtable( $col_table ); $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "Count" TEXT;') || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^; if ($has_x_source) { $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "Source" TEXT;') || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^; } $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "' . $dbh->quote("Legacy value for $column") . '" TEXT;') || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^; } $sth = $dbh->prepare(" SELECT COUNT(*), " . ($has_x_source ? 'x_source, ' : '') . "$column FROM $MIGSCHEMA.$table $option; "); $rv = $sth->execute() || die "Error retrieving data from $MIGSCHEMA.$table: $!"; while (my @cols = $sth->fetchrow_array) { $col_sheet_row_offset++; my $count = $cols[0]; $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,$count,$left) if $outfile; my $value; my $source; if ($has_x_source) { $source = defined $cols[1] ? $cols[1] : ''; $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$source,$left) if $outfile; $value = defined $cols[2] ? $cols[2] : ''; $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,2,$value,$left) if $outfile; } else { $value = defined $cols[1] ? $cols[1] : ''; $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$value,$left) if $outfile; } if ($outtable) { if ($has_x_source) { $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$col_table" VALUES (^ . join(',' ,$count ,$dbh->quote($source) ,$dbh->quote($value) ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$col_table\": $!"; } else { $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$col_table" VALUES (^ . join(',' ,$count ,$dbh->quote($value) ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$col_table\": $!"; } } } $sth->finish; }