#!/usr/bin/perl -w ############################################################################### =pod =head1 NAME mig-quicksheet Quickly produces a simple Excel spreadsheet based on the tracked file suitable for simple end-user mapping. The new file is named after the tracked file, but ends in .mapping.xls Multiple files may be specified, in which case all of the results are concatenated into one spreadsheet named .mapping.xls =head1 SYNOPSIS B [...] =cut ############################################################################### use strict; use Switch; use Env qw( HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR ); use Pod::Usage; use DBI; use Spreadsheet::WriteExcel; use Cwd 'abs_path'; use FindBin; my $mig_bin = "$FindBin::Bin/"; use lib "$FindBin::Bin/"; use Mig; pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; Mig::die_if_no_env_migschema(); Mig::die_if_mig_tracking_table_does_not_exist(); my $workbook; my @worksheets = (); my $first_sheet; my $toc; my $sheet_row_offset = 0; my $sheet_row_start = 4; my $table; my $file; my $outfile; my $fdata; my $has_x_source = 0; my $bold; my $left; my $counter = 0; init_workbook(); foreach my $f (@ARGV) { $file = abs_path($f); $counter++; if ($toc) { $toc->write($counter,0,$counter); $toc->write($counter,1,$f); } handle_file(); write_worksheets(); } close_workbook(); sub handle_file { my $tracked_file_id = Mig::check_for_tracked_file($file); if ($tracked_file_id) { $fdata = Mig::status_this_file($file); } else { die "File not currently tracked: $file\n"; } $table = $fdata->{staged_table}; if (!$table) { die "No staged staged table for file: $file\n"; } } sub init_workbook { if (scalar(@ARGV) > 1) { $outfile = $MIGSCHEMA . '.mapping.xls'; } else { $outfile = abs_path($ARGV[0]) . '.mapping.xls'; } print "Writing $outfile\n"; $workbook = Spreadsheet::WriteExcel->new( $outfile ); $bold = $workbook->add_format(); $bold->set_bold(); $bold->set_align('left'); $left = $workbook->add_format(); $left->set_align('left'); if (scalar(@ARGV) > 1) { $toc = $workbook->add_worksheet('Files'); } } sub write_worksheets { if ($toc) { print 'File #' . $counter . "\n"; } print "Sheet: Field Summary\n"; $first_sheet = $workbook->add_worksheet( ($toc ? $counter . ') ' : '') . 'Field Summary'); $first_sheet->set_column(0,6,30); my $dbh = Mig::db_connect(); my $sth = $dbh->prepare(" SELECT COUNT(*) FROM $MIGSCHEMA.$table LIMIT 1; "); my $rv = $sth->execute() || die "Error retrieving data from information_schema: $!"; my @cols = $sth->fetchrow_array; $sth->finish; my $count = $cols[0]; Mig::db_disconnect($dbh); $first_sheet->write(0,0,'Source File:',$bold); $first_sheet->write(0,1,$file,$left); $first_sheet->write(1,0,'Number of Rows:',$bold); $first_sheet->write(1,1,$count,$left); $sheet_row_start = 4; $first_sheet->write($sheet_row_start,0,'Legacy Column',$bold); $first_sheet->write($sheet_row_start,1,'Non-Empty Rows',$bold); $first_sheet->write($sheet_row_start,2,'Distinct Non-NULL Values',$bold); $first_sheet->write($sheet_row_start,3,'Min Value',$bold); $first_sheet->write($sheet_row_start,4,'Min Length',$bold); $first_sheet->write($sheet_row_start,5,'Max Value',$bold); $first_sheet->write($sheet_row_start,6,'Max Length',$bold); handle_columns(); } sub close_workbook { $workbook->close(); } exit 0; ############################################################################### sub handle_columns { my $dbh = Mig::db_connect(); my $sth = $dbh->prepare(" SELECT * FROM information_schema.columns WHERE table_schema = " . $dbh->quote($MIGSCHEMA) . " AND table_name = " . $dbh->quote($table) . " ORDER BY dtd_identifier::INTEGER ASC; "); my $rv = $sth->execute() || die "Error retrieving data from information_schema: $!"; $sheet_row_offset = 0; while (my $data = $sth->fetchrow_hashref) { my $column = $data->{column_name}; if ($column eq 'x_source') { $has_x_source = 1; } if ($column =~ /^l_/ || ($column =~ /^x_/ && ( $column ne 'x_migrate' && $column ne 'x_source' && $column ne 'x_egid' && $column ne 'x_hseq' ) ) ) { $sheet_row_offset++; my $cdata = column_summary($column); $first_sheet->write($sheet_row_start + $sheet_row_offset,0,$column,$left); $first_sheet->write($sheet_row_start + $sheet_row_offset,1,$cdata->{non_empty_count},$left); $first_sheet->write($sheet_row_start + $sheet_row_offset,2,$cdata->{distinct_value_count},$left); $first_sheet->write($sheet_row_start + $sheet_row_offset,3,$cdata->{min_value},$left); $first_sheet->write($sheet_row_start + $sheet_row_offset,4,$cdata->{min_length},$left); $first_sheet->write($sheet_row_start + $sheet_row_offset,5,$cdata->{max_value},$left); $first_sheet->write($sheet_row_start + $sheet_row_offset,6,$cdata->{max_length},$left); #if ($cdata->{distinct_value_count} > 1 && $cdata->{distinct_value_count} <= 500) { group_by($column); #} } } $sth->finish; Mig::db_disconnect($dbh); } sub column_summary { my $column = shift; my $dbh = Mig::db_connect(); ### non_empty_count my $sth = $dbh->prepare(" SELECT COUNT(*) FROM $MIGSCHEMA.$table WHERE $column IS NOT NULL AND BTRIM($column) <> ''; "); my $rv = $sth->execute() || die "Error retrieving data from $MIGSCHEMA.$table: $!"; my @cols = $sth->fetchrow_array; $sth->finish; my $non_empty_count = $cols[0]; ### distinct_value_count $sth = $dbh->prepare(" SELECT COUNT(DISTINCT $column) FROM $MIGSCHEMA.$table; "); $rv = $sth->execute() || die "Error retrieving data from $MIGSCHEMA.$table: $!"; @cols = $sth->fetchrow_array; $sth->finish; my $distinct_value_count = $cols[0]; ### min_value $sth = $dbh->prepare(" SELECT MIN($column) FROM $MIGSCHEMA.$table; "); $rv = $sth->execute() || die "Error retrieving data from $MIGSCHEMA.$table: $!"; @cols = $sth->fetchrow_array; $sth->finish; my $min_value = $cols[0]; ### min_length $sth = $dbh->prepare(" SELECT MIN(LENGTH($column)) FROM $MIGSCHEMA.$table; "); $rv = $sth->execute() || die "Error retrieving data from $MIGSCHEMA.$table: $!"; @cols = $sth->fetchrow_array; $sth->finish; my $min_length = $cols[0]; ### max_value $sth = $dbh->prepare(" SELECT MAX($column) FROM $MIGSCHEMA.$table; "); $rv = $sth->execute() || die "Error retrieving data from $MIGSCHEMA.$table: $!"; @cols = $sth->fetchrow_array; $sth->finish; my $max_value = $cols[0]; ### max_length $sth = $dbh->prepare(" SELECT MAX(LENGTH($column)) FROM $MIGSCHEMA.$table; "); $rv = $sth->execute() || die "Error retrieving data from $MIGSCHEMA.$table: $!"; @cols = $sth->fetchrow_array; $sth->finish; my $max_length = $cols[0]; return { non_empty_count => $non_empty_count ,distinct_value_count => $distinct_value_count ,min_value => defined $min_value ? $min_value : '' ,min_length => defined $min_length ? $min_length : '' ,max_value => defined $max_value ? $max_value : '' ,max_length => defined $max_length ? $max_length : '' }; } sub group_by { my ($column,$option) = (shift,"GROUP BY 2 ORDER BY 2"); print "Sheet: $column\n"; my $col_sheet = $workbook->add_worksheet( ($toc ? $counter . ') ' : '') . substr($column,0,31)); push @worksheets, $col_sheet; $col_sheet->set_column(0,6,30); my $col_sheet_row_start = 0; my $col_sheet_row_offset = 0; $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,'Count',$bold); if ($has_x_source) { $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,'Source',$bold); $option = "GROUP BY 2,3 ORDER BY 2,3"; } $col_sheet->write( $col_sheet_row_start + $col_sheet_row_offset ,$has_x_source ? 2 : 1 ,"Legacy Value for $column" ,$bold ); my $dbh = Mig::db_connect(); my $sth; my $rv; $sth = $dbh->prepare(" SELECT COUNT(*), " . ($has_x_source ? 'x_source, ' : '') . "$column FROM $MIGSCHEMA.$table $option; "); $rv = $sth->execute() || die "Error retrieving data from $MIGSCHEMA.$table: $!"; while (my @cols = $sth->fetchrow_array) { $col_sheet_row_offset++; my $count = $cols[0]; $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,$count,$left); my $value; if ($has_x_source) { my $source = defined $cols[1] ? $cols[1] : ''; $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$source,$left); $value = defined $cols[2] ? $cols[2] : ''; $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,2,$value,$left); } else { $value = defined $cols[1] ? $cols[1] : ''; $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$value,$left); } } $sth->finish; }