2 ###############################################################################
11 Quickly produces an Excel spreadsheet based on the tracked file suitable
12 for simple end-user mapping. The new file is named after the tracked file, but
15 Multiple files may be specified, in which case all of the results are
16 concatenated into one spreadsheet named <migration_schema>.mapping.xls
20 This specifies the exact name to use for the Excel file. If not specified, and
21 there is also no --outtable, then the naming convention will be as specified
26 This specifies a summary table and prefix to use within the migration schema for
27 recording the output either in addition to or instead of the Excel file. Unless
28 --force is specified, it will not overwrite existing tables.
30 If using --drop with --outable:
32 This will delete the summary table specified and all related sub-tables.
36 B<mig-quicksheet> [--force|--drop|--outfile <file>|--outtable <table_name>] <file> [<file>...]
40 ###############################################################################
45 HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA
46 MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR
52 use Spreadsheet::WriteExcel;
55 my $mig_bin = "$FindBin::Bin/";
56 use lib "$FindBin::Bin/";
66 'outtable=s' => \$outtable,
67 'outfile=s' => \$outfile,
72 pod2usage(-verbose => 2) if $help || ! $ARGV[0];
74 if (! $outtable && ! $outfile) {
75 if (scalar(@ARGV) > 1) {
76 $outfile = $MIGSCHEMA . '.mapping.xls';
78 $outfile = abs_path($ARGV[0]) . '.mapping.xls';
82 Mig::die_if_no_env_migschema();
83 Mig::die_if_mig_tracking_table_does_not_exist();
90 my $sheet_row_offset = 0;
91 my $sheet_row_start = 4;
102 foreach my $f (@ARGV) {
103 $file = abs_path($f);
106 $toc->write($counter,0,$counter);
107 $toc->write($counter,1,$f);
114 if (Mig::check_db_migschema_for_specific_table($outtable)) {
115 drop_existing_outtable();
120 my $tracked_file_id = Mig::check_for_tracked_file($file);
121 if ($tracked_file_id) {
122 $fdata = Mig::status_this_file($file);
124 die "File not currently tracked: $file\n";
126 $table = $fdata->{staged_table};
128 die "No staged staged table for file: $file\n";
134 print "Writing $outfile\n";
135 $workbook = Spreadsheet::WriteExcel->new( $outfile );
136 $bold = $workbook->add_format();
138 $bold->set_align('left');
139 $left = $workbook->add_format();
140 $left->set_align('left');
141 if (scalar(@ARGV) > 1) {
142 $toc = $workbook->add_worksheet('Files');
146 if (Mig::check_db_migschema_for_specific_table($outtable)) {
148 drop_existing_outtable();
150 die "$outtable already exists. Use --force to wipe and redo tables.\n";
153 create_new_outtable();
157 sub drop_existing_outtable {
159 # we want a transaction for this one
160 my $dbh = Mig::db_connect();
161 $dbh->{AutoCommit} = 0;
162 $dbh->{RaiseError} = 1;
165 # gather subordinate tables
168 my $sth = $dbh->prepare("
170 FROM $MIGSCHEMA.$outtable
173 my $rv = $sth->execute();
174 my $rows = $sth->fetchall_arrayref;
175 for my $row ( @$rows ) {
176 push @tables, $row->[0]
181 foreach my $table (@tables) {
182 print "Dropping $MIGSCHEMA.$table\n";
183 $dbh->do("DROP TABLE $MIGSCHEMA.\"$table\";");
188 print "Dropping $MIGSCHEMA.$outtable\n";
189 $dbh->do("DROP TABLE $MIGSCHEMA.$outtable;");
193 warn "Transaction aborted because $_\n";
194 eval { $dbh->rollback };
195 die "Aborting mig-quicksheet\n";
198 Mig::db_disconnect($dbh);
201 sub create_new_outtable {
202 my $dbh = Mig::db_connect();
203 print "Creating table $MIGSCHEMA.$outtable\n";
205 CREATE UNLOGGED TABLE $MIGSCHEMA.$outtable (
207 summary_table TEXT UNIQUE
209 ") || die "Error creating outtable ($MIGSCHEMA.$outtable): $!\n";
210 Mig::db_disconnect($dbh);
213 sub create_new_subtable {
214 my $subtable = shift;
215 my $dbh = Mig::db_connect();
216 $dbh->{AutoCommit} = 0;
217 $dbh->{RaiseError} = 1;
220 print "Creating table $MIGSCHEMA.\"$subtable\"\n";
222 CREATE UNLOGGED TABLE $MIGSCHEMA.\"$subtable\" ();
223 ") || die "Error creating subtable ($MIGSCHEMA.\"$subtable\"): $!\n";
225 INSERT INTO $MIGSCHEMA.$outtable (file,summary_table) VALUES (" . $dbh->quote($file) . ',' . $dbh->quote($subtable) . ");
226 ") || die "Error inserting into outtable ($MIGSCHEMA.$outtable): $!\n";
229 warn "Transaction aborted because $_\n";
230 eval { $dbh->rollback };
231 die "Aborting mig-quicksheet\n";
234 Mig::db_disconnect($dbh);
237 sub write_worksheets {
238 print 'File #' . $counter . "\n";
239 print "Sheet: Field Summary\n";
240 my $tab_name = (scalar(@ARGV) > 1 ? $counter . ') ' : '') . 'Field Summary';
241 $tab_name = substr($tab_name,0,31); # truncate for WriteExcel
243 $first_sheet = $workbook->add_worksheet( $tab_name );
244 $first_sheet->set_column(0,6,30);
247 $first_table = "$outtable $tab_name";
248 create_new_subtable( $first_table );
251 my $dbh = Mig::db_connect();
252 my $sth = $dbh->prepare("
254 FROM $MIGSCHEMA.$table
257 my $rv = $sth->execute()
258 || die "Error retrieving data from information_schema: $!";
260 my @cols = $sth->fetchrow_array;
262 my $count = $cols[0];
264 $sheet_row_start = 0;
267 $first_sheet->write($sheet_row_start,0,'Legacy Column',$bold);
268 $first_sheet->write($sheet_row_start,1,'Non-Empty Rows',$bold);
269 $first_sheet->write($sheet_row_start,2,'Distinct Non-NULL Values',$bold);
270 $first_sheet->write($sheet_row_start,3,'Min Value',$bold);
271 $first_sheet->write($sheet_row_start,4,'Min Length',$bold);
272 $first_sheet->write($sheet_row_start,5,'Max Value',$bold);
273 $first_sheet->write($sheet_row_start,6,'Max Length',$bold);
277 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Legacy Column" TEXT;');
278 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Non-Empty Rows" TEXT;');
279 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Distinct Non-NULL Values" TEXT;');
280 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Min Value" TEXT;');
281 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Min Length" TEXT;');
282 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Max Value" TEXT;');
283 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Max Length" TEXT;');
285 die "Error modifying subtable ($MIGSCHEMA.$first_table): $_\n";
293 $first_sheet->write($count + 3,0,'Source File:',$bold);
294 $first_sheet->write($count + 3,1,$file,$left);
295 $first_sheet->write($count + 4,0,'Number of Rows:',$bold);
296 $first_sheet->write($count + 4,1,$count,$left);
300 $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ . ' ("Legacy Column") VALUES (NULL);');
301 $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ .
302 ' ("Legacy Column","Non-Empty Rows") ' . "VALUES ('Source File:'," . $dbh->quote($file) . ");");
303 $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ .
304 ' ("Legacy Column","Non-Empty Rows") ' . "VALUES ('Number of Rows:',$count);");
306 die "Error inserting into subtable ($MIGSCHEMA.$first_table): $_\n";
310 Mig::db_disconnect($dbh);
321 ###############################################################################
324 my $dbh = Mig::db_connect();
325 my $sth = $dbh->prepare("
327 FROM " . $MIGSCHEMA. "." . $table . "
330 my $rv = $sth->execute()
331 || die "Error retrieving data from staging table: $!";
334 $sheet_row_offset = 0;
337 print "Sheet: $table\n";
338 $list_sheet = $workbook->add_worksheet( $table );
341 my $handle_headers = 1;
343 while (my $data = $sth->fetchrow_hashref) {
344 if ($handle_headers) {
346 foreach my $col (sort keys %{ $data }) {
347 $list_sheet->write($sheet_row_start + $sheet_row_offset,$_idx++,$col,$bold);
353 foreach my $col (sort keys %{ $data }) {
354 my $cdata = $$data{$col};
355 if (!defined $cdata) { $cdata = '\N'; }
357 $list_sheet->write($sheet_row_start + $sheet_row_offset,$idx++,$cdata,$left);
364 my $dbh = Mig::db_connect();
365 my $sth = $dbh->prepare("
367 FROM information_schema.columns
368 WHERE table_schema = " . $dbh->quote($MIGSCHEMA) . "
369 AND table_name = " . $dbh->quote($table) . "
370 ORDER BY dtd_identifier::INTEGER ASC;
372 my $rv = $sth->execute()
373 || die "Error retrieving data from information_schema: $!";
375 $sheet_row_offset = 0;
378 while (my $data = $sth->fetchrow_hashref) {
379 my $column = $data->{column_name};
380 if ($column eq 'x_source') {
385 && ( $column ne 'x_migrate'
386 && $column ne 'x_source'
387 && $column ne 'x_egid'
388 && $column ne 'x_hseq'
393 my $cdata = column_summary($column);
395 $first_sheet->write($sheet_row_start + $sheet_row_offset,0,$column,$left);
396 $first_sheet->write($sheet_row_start + $sheet_row_offset,1,$cdata->{non_empty_count},$left);
397 $first_sheet->write($sheet_row_start + $sheet_row_offset,2,$cdata->{distinct_value_count},$left);
398 $first_sheet->write($sheet_row_start + $sheet_row_offset,3,$cdata->{min_value},$left);
399 $first_sheet->write($sheet_row_start + $sheet_row_offset,4,$cdata->{min_length},$left);
400 $first_sheet->write($sheet_row_start + $sheet_row_offset,5,$cdata->{max_value},$left);
401 $first_sheet->write($sheet_row_start + $sheet_row_offset,6,$cdata->{max_length},$left);
404 $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$first_table" VALUES (^ . join(','
405 ,$cdata->{non_empty_count}
406 ,$cdata->{distinct_value_count}
407 ,$dbh->quote($cdata->{min_value})
408 ,$cdata->{min_length}
409 ,$dbh->quote($cdata->{max_value})
410 ,$cdata->{max_length}
411 ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$first_table\": $!";
413 if ($cdata->{distinct_value_count} > 1 && $cdata->{distinct_value_count} <= 500) {
419 Mig::db_disconnect($dbh);
426 my $dbh = Mig::db_connect();
429 my $sth = $dbh->prepare("
431 FROM $MIGSCHEMA.$table
432 WHERE $column IS NOT NULL AND BTRIM($column) <> '';
434 my $rv = $sth->execute()
435 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
436 my @cols = $sth->fetchrow_array;
438 my $non_empty_count = $cols[0];
440 ### distinct_value_count
441 $sth = $dbh->prepare("
442 SELECT COUNT(DISTINCT $column)
443 FROM $MIGSCHEMA.$table;
445 $rv = $sth->execute()
446 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
447 @cols = $sth->fetchrow_array;
449 my $distinct_value_count = $cols[0];
452 $sth = $dbh->prepare("
454 FROM $MIGSCHEMA.$table;
456 $rv = $sth->execute()
457 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
458 @cols = $sth->fetchrow_array;
460 my $min_value = $cols[0];
463 $sth = $dbh->prepare("
464 SELECT MIN(LENGTH($column))
465 FROM $MIGSCHEMA.$table;
467 $rv = $sth->execute()
468 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
469 @cols = $sth->fetchrow_array;
471 my $min_length = $cols[0];
474 $sth = $dbh->prepare("
476 FROM $MIGSCHEMA.$table;
478 $rv = $sth->execute()
479 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
480 @cols = $sth->fetchrow_array;
482 my $max_value = $cols[0];
485 $sth = $dbh->prepare("
486 SELECT MAX(LENGTH($column))
487 FROM $MIGSCHEMA.$table;
489 $rv = $sth->execute()
490 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
491 @cols = $sth->fetchrow_array;
493 my $max_length = $cols[0];
496 non_empty_count => $non_empty_count
497 ,distinct_value_count => $distinct_value_count
498 ,min_value => defined $min_value ? $min_value : '<NULL>'
499 ,min_length => defined $min_length ? $min_length : '<NULL>'
500 ,max_value => defined $max_value ? $max_value : '<NULL>'
501 ,max_length => defined $max_length ? $max_length : '<NULL>'
506 my ($column,$option) = (shift,"GROUP BY 2 ORDER BY 2");
508 my $dbh = Mig::db_connect();
512 my $col_sheet_row_start = 0;
513 my $col_sheet_row_offset = 0;
517 my $sheet_name = (scalar(@ARGV) > 1 ? $counter . ') ' : '') . $column;
518 $sheet_name = substr($sheet_name,0,31);
520 print "Sheet: $sheet_name\n";
522 $option = "GROUP BY 2,3 ORDER BY 2,3";
526 $col_sheet = $workbook->add_worksheet( $sheet_name );
527 push @worksheets, $col_sheet;
528 $col_sheet->set_column(0,6,30);
529 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,'Count',$bold);
531 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,'Source',$bold);
534 $col_sheet_row_start + $col_sheet_row_offset
535 ,$has_x_source ? 2 : 1
536 ,"Legacy Value for $column"
542 $col_table = "$outtable $sheet_name";
543 create_new_subtable( $col_table );
544 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "Count" TEXT;')
545 || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^;
547 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "Source" TEXT;')
548 || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^;
550 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "' . $dbh->quote("Legacy value for $column") . '" TEXT;')
551 || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^;
554 $sth = $dbh->prepare("
555 SELECT COUNT(*), " . ($has_x_source ? 'x_source, ' : '') . "$column
556 FROM $MIGSCHEMA.$table
559 $rv = $sth->execute()
560 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
562 while (my @cols = $sth->fetchrow_array) {
563 $col_sheet_row_offset++;
564 my $count = $cols[0];
565 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,$count,$left) if $outfile;
569 $source = defined $cols[1] ? $cols[1] : '<NULL>';
570 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$source,$left) if $outfile;
571 $value = defined $cols[2] ? $cols[2] : '<NULL>';
572 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,2,$value,$left) if $outfile;
574 $value = defined $cols[1] ? $cols[1] : '<NULL>';
575 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$value,$left) if $outfile;
579 $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$col_table" VALUES (^ . join(','
581 ,$dbh->quote($source)
583 ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$col_table\": $!";
585 $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$col_table" VALUES (^ . join(','
588 ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$col_table\": $!";