2 ###############################################################################
11 Quickly produces an Excel spreadsheet based on the tracked file suitable
12 for simple end-user mapping. The new file is named after the tracked file, but
15 Multiple files may be specified, in which case all of the results are
16 concatenated into one spreadsheet named <migration_schema>.mapping.xls
20 This specifies the exact name to use for the Excel file. If not specified, and
21 there is also no --outtable, then the naming convention will be as specified
26 This specifies a summary table and prefix to use within the migration schema for
27 recording the output either in addition to or instead of the Excel file. Unless
28 --force is specified, it will not overwrite existing tables.
30 If using --drop with --outable:
32 This will delete the summary table specified and all related sub-tables.
36 B<mig-quicksheet> [--force|--drop|--outfile <file>|--outtable <table_name>] <file> [<file>...]
40 ###############################################################################
45 HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA
46 MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR
52 use Spreadsheet::WriteExcel;
55 my $mig_bin = "$FindBin::Bin/";
56 use lib "$FindBin::Bin/";
67 'outtable=s' => \$outtable,
68 'outfile=s' => \$outfile,
74 pod2usage(-verbose => 2) if $help || ! $ARGV[0];
76 if (! $outtable && ! $outfile) {
77 if (scalar(@ARGV) > 1) {
78 $outfile = $MIGSCHEMA . '.mapping.xls';
80 $outfile = abs_path($ARGV[0]) . '.mapping.xls';
84 EMig::die_if_no_env_migschema();
85 EMig::die_if_mig_tracking_table_does_not_exist();
92 my $sheet_row_offset = 0;
93 my $sheet_row_start = 4;
104 foreach my $f (@ARGV) {
105 $file = abs_path($f);
108 $toc->write($counter,0,$counter);
109 $toc->write($counter,1,$f);
116 if (EMig::check_db_migschema_for_specific_table($outtable)) {
117 drop_existing_outtable();
122 my $tracked_file_id = EMig::check_for_tracked_file($file);
123 if ($tracked_file_id) {
124 $fdata = EMig::status_this_file($file);
126 die "File not currently tracked: $file\n";
128 $table = $fdata->{staged_table};
130 die "No staged staged table for file: $file\n";
136 print "Writing $outfile\n";
137 $workbook = Spreadsheet::WriteExcel->new( $outfile );
138 $bold = $workbook->add_format();
140 $bold->set_align('left');
141 $left = $workbook->add_format();
142 $left->set_align('left');
143 if (scalar(@ARGV) > 1) {
144 $toc = $workbook->add_worksheet('Files');
148 if (EMig::check_db_migschema_for_specific_table($outtable)) {
150 drop_existing_outtable();
152 die "$outtable already exists. Use --force to wipe and redo tables.\n";
155 create_new_outtable();
159 sub drop_existing_outtable {
161 # we want a transaction for this one
162 my $dbh = EMig::db_connect();
163 $dbh->{AutoCommit} = 0;
164 $dbh->{RaiseError} = 1;
167 # gather subordinate tables
170 my $sth = $dbh->prepare("
172 FROM $MIGSCHEMA.$outtable
175 my $rv = $sth->execute();
176 my $rows = $sth->fetchall_arrayref;
177 for my $row ( @$rows ) {
178 push @tables, $row->[0]
183 foreach my $table (@tables) {
184 print "Dropping $MIGSCHEMA.$table\n";
185 $dbh->do("DROP TABLE $MIGSCHEMA.\"$table\";");
190 print "Dropping $MIGSCHEMA.$outtable\n";
191 $dbh->do("DROP TABLE $MIGSCHEMA.$outtable;");
195 warn "Transaction aborted because $_\n";
196 eval { $dbh->rollback };
197 die "Aborting mig-quicksheet\n";
200 EMig::db_disconnect($dbh);
203 sub create_new_outtable {
204 my $dbh = EMig::db_connect();
205 print "Creating table $MIGSCHEMA.$outtable\n";
207 CREATE UNLOGGED TABLE $MIGSCHEMA.$outtable (
209 summary_table TEXT UNIQUE
211 ") || die "Error creating outtable ($MIGSCHEMA.$outtable): $!\n";
212 EMig::db_disconnect($dbh);
215 sub create_new_subtable {
216 my $subtable = shift;
217 my $dbh = EMig::db_connect();
218 $dbh->{AutoCommit} = 0;
219 $dbh->{RaiseError} = 1;
222 print "Creating table $MIGSCHEMA.\"$subtable\"\n";
224 CREATE UNLOGGED TABLE $MIGSCHEMA.\"$subtable\" ();
225 ") || die "Error creating subtable ($MIGSCHEMA.\"$subtable\"): $!\n";
227 INSERT INTO $MIGSCHEMA.$outtable (file,summary_table) VALUES (" . $dbh->quote($file) . ',' . $dbh->quote($subtable) . ");
228 ") || die "Error inserting into outtable ($MIGSCHEMA.$outtable): $!\n";
231 warn "Transaction aborted because $_\n";
232 eval { $dbh->rollback };
233 die "Aborting mig-quicksheet\n";
236 EMig::db_disconnect($dbh);
239 sub write_worksheets {
240 print 'File #' . $counter . "\n";
241 print "Sheet: Field Summary\n";
242 my $tab_name = (scalar(@ARGV) > 1 ? $counter . ') ' : '') . 'Field Summary';
243 $tab_name = substr($tab_name,0,31); # truncate for WriteExcel
245 $first_sheet = $workbook->add_worksheet( substr($tab_name,0,31) );
246 $first_sheet->set_column(0,6,30);
249 $first_table = "$outtable $tab_name";
250 create_new_subtable( $first_table );
253 my $dbh = EMig::db_connect();
254 my $sth = $dbh->prepare("
256 FROM $MIGSCHEMA.$table
259 my $rv = $sth->execute()
260 || die "Error retrieving data from information_schema: $!";
262 my @cols = $sth->fetchrow_array;
264 my $count = $cols[0];
266 $sheet_row_start = 0;
269 $first_sheet->write($sheet_row_start,0,'Legacy Column',$bold);
270 $first_sheet->write($sheet_row_start,1,'Non-Empty Rows',$bold);
271 $first_sheet->write($sheet_row_start,2,'Distinct Non-NULL Values',$bold);
272 $first_sheet->write($sheet_row_start,3,'Min Value',$bold);
273 $first_sheet->write($sheet_row_start,4,'Min Length',$bold);
274 $first_sheet->write($sheet_row_start,5,'Max Value',$bold);
275 $first_sheet->write($sheet_row_start,6,'Max Length',$bold);
279 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Legacy Column" TEXT;');
280 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Non-Empty Rows" TEXT;');
281 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Distinct Non-NULL Values" TEXT;');
282 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Min Value" TEXT;');
283 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Min Length" TEXT;');
284 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Max Value" TEXT;');
285 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Max Length" TEXT;');
287 die "Error modifying subtable ($MIGSCHEMA.$first_table): $_\n";
291 handle_list() if $list;
295 $first_sheet->write($count + 3,0,'Source File:',$bold);
296 $first_sheet->write($count + 3,1,$file,$left);
297 $first_sheet->write($count + 4,0,'Number of Rows:',$bold);
298 $first_sheet->write($count + 4,1,$count,$left);
302 $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ . ' ("Legacy Column") VALUES (NULL);');
303 $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ .
304 ' ("Legacy Column","Non-Empty Rows") ' . "VALUES ('Source File:'," . $dbh->quote($file) . ");");
305 $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ .
306 ' ("Legacy Column","Non-Empty Rows") ' . "VALUES ('Number of Rows:',$count);");
308 die "Error inserting into subtable ($MIGSCHEMA.$first_table): $_\n";
312 EMig::db_disconnect($dbh);
323 ###############################################################################
326 my $dbh = EMig::db_connect();
327 my $sth = $dbh->prepare("
329 FROM " . $MIGSCHEMA. "." . $table . "
332 my $rv = $sth->execute()
333 || die "Error retrieving data from staging table: $!";
336 $sheet_row_offset = 0;
339 print "Sheet: $table\n";
340 $list_sheet = $workbook->add_worksheet( substr($table,0,31) );
343 my $handle_headers = 1;
345 while (my $data = $sth->fetchrow_hashref) {
346 if ($handle_headers) {
348 foreach my $col (sort keys %{ $data }) {
349 $list_sheet->write($sheet_row_start + $sheet_row_offset,$_idx++,$col,$bold);
355 foreach my $col (sort keys %{ $data }) {
356 my $cdata = $$data{$col};
357 if (!defined $cdata) { $cdata = '\N'; }
359 $list_sheet->write($sheet_row_start + $sheet_row_offset,$idx++,$cdata,$left);
366 my $dbh = EMig::db_connect();
367 my $sth = $dbh->prepare("
369 FROM information_schema.columns
370 WHERE table_schema = " . $dbh->quote($MIGSCHEMA) . "
371 AND table_name = " . $dbh->quote($table) . "
372 ORDER BY dtd_identifier::INTEGER ASC;
374 my $rv = $sth->execute()
375 || die "Error retrieving data from information_schema: $!";
377 $sheet_row_offset = 0;
380 while (my $data = $sth->fetchrow_hashref) {
381 my $column = $data->{column_name};
382 if ($column eq 'x_source') {
387 && ( $column ne 'x_migrate'
388 && $column ne 'x_source'
389 && $column ne 'x_egid'
390 && $column ne 'x_hseq'
395 my $cdata = column_summary($column);
397 $first_sheet->write($sheet_row_start + $sheet_row_offset,0,$column,$left);
398 $first_sheet->write($sheet_row_start + $sheet_row_offset,1,$cdata->{non_empty_count},$left);
399 $first_sheet->write($sheet_row_start + $sheet_row_offset,2,$cdata->{distinct_value_count},$left);
400 $first_sheet->write($sheet_row_start + $sheet_row_offset,3,$cdata->{min_value},$left);
401 $first_sheet->write($sheet_row_start + $sheet_row_offset,4,$cdata->{min_length},$left);
402 $first_sheet->write($sheet_row_start + $sheet_row_offset,5,$cdata->{max_value},$left);
403 $first_sheet->write($sheet_row_start + $sheet_row_offset,6,$cdata->{max_length},$left);
406 $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$first_table" VALUES (^ . join(','
407 ,$cdata->{non_empty_count}
408 ,$cdata->{distinct_value_count}
409 ,$dbh->quote($cdata->{min_value})
410 ,$cdata->{min_length}
411 ,$dbh->quote($cdata->{max_value})
412 ,$cdata->{max_length}
413 ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$first_table\": $!";
415 if ($cdata->{distinct_value_count} > 1 && $cdata->{distinct_value_count} <= 500) {
421 EMig::db_disconnect($dbh);
428 my $dbh = EMig::db_connect();
431 my $sth = $dbh->prepare("
433 FROM $MIGSCHEMA.$table
434 WHERE $column IS NOT NULL AND BTRIM($column) <> '';
436 my $rv = $sth->execute()
437 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
438 my @cols = $sth->fetchrow_array;
440 my $non_empty_count = $cols[0];
442 ### distinct_value_count
443 $sth = $dbh->prepare("
444 SELECT COUNT(DISTINCT $column)
445 FROM $MIGSCHEMA.$table;
447 $rv = $sth->execute()
448 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
449 @cols = $sth->fetchrow_array;
451 my $distinct_value_count = $cols[0];
454 $sth = $dbh->prepare("
456 FROM $MIGSCHEMA.$table;
458 $rv = $sth->execute()
459 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
460 @cols = $sth->fetchrow_array;
462 my $min_value = $cols[0];
465 $sth = $dbh->prepare("
466 SELECT MIN(LENGTH($column))
467 FROM $MIGSCHEMA.$table;
469 $rv = $sth->execute()
470 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
471 @cols = $sth->fetchrow_array;
473 my $min_length = $cols[0];
476 $sth = $dbh->prepare("
478 FROM $MIGSCHEMA.$table;
480 $rv = $sth->execute()
481 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
482 @cols = $sth->fetchrow_array;
484 my $max_value = $cols[0];
487 $sth = $dbh->prepare("
488 SELECT MAX(LENGTH($column))
489 FROM $MIGSCHEMA.$table;
491 $rv = $sth->execute()
492 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
493 @cols = $sth->fetchrow_array;
495 my $max_length = $cols[0];
498 non_empty_count => $non_empty_count
499 ,distinct_value_count => $distinct_value_count
500 ,min_value => defined $min_value ? $min_value : '<NULL>'
501 ,min_length => defined $min_length ? $min_length : '<NULL>'
502 ,max_value => defined $max_value ? $max_value : '<NULL>'
503 ,max_length => defined $max_length ? $max_length : '<NULL>'
508 my ($column,$option) = (shift,"GROUP BY 2 ORDER BY 2");
510 my $dbh = EMig::db_connect();
514 my $col_sheet_row_start = 0;
515 my $col_sheet_row_offset = 0;
519 my $sheet_name = (scalar(@ARGV) > 1 ? $counter . ') ' : '') . $column;
520 $sheet_name = substr($sheet_name,0,31);
522 print "Sheet: $sheet_name\n";
524 $option = "GROUP BY 2,3 ORDER BY 2,3";
528 $col_sheet = $workbook->add_worksheet( substr($sheet_name,0,31) );
529 push @worksheets, $col_sheet;
530 $col_sheet->set_column(0,6,30);
531 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,'Count',$bold);
533 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,'Source',$bold);
536 $col_sheet_row_start + $col_sheet_row_offset
537 ,$has_x_source ? 2 : 1
538 ,"Legacy Value for $column"
544 $col_table = "$outtable $sheet_name";
545 create_new_subtable( $col_table );
546 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "Count" TEXT;')
547 || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^;
549 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "Source" TEXT;')
550 || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^;
552 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "' . $dbh->quote("Legacy value for $column") . '" TEXT;')
553 || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^;
556 $sth = $dbh->prepare("
557 SELECT COUNT(*), " . ($has_x_source ? 'x_source, ' : '') . "$column
558 FROM $MIGSCHEMA.$table
561 $rv = $sth->execute()
562 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
564 while (my @cols = $sth->fetchrow_array) {
565 $col_sheet_row_offset++;
566 my $count = $cols[0];
567 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,$count,$left) if $outfile;
571 $source = defined $cols[1] ? $cols[1] : '<NULL>';
572 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$source,$left) if $outfile;
573 $value = defined $cols[2] ? $cols[2] : '<NULL>';
574 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,2,$value,$left) if $outfile;
576 $value = defined $cols[1] ? $cols[1] : '<NULL>';
577 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$value,$left) if $outfile;
581 $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$col_table" VALUES (^ . join(','
583 ,$dbh->quote($source)
585 ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$col_table\": $!";
587 $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$col_table" VALUES (^ . join(','
590 ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$col_table\": $!";