2 ###############################################################################
11 Quickly produces an Excel spreadsheet based on the tracked file suitable
12 for simple end-user mapping. The new file is named after the tracked file, but
15 Multiple files may be specified, in which case all of the results are
16 concatenated into one spreadsheet named <migration_schema>.mapping.xls
20 This specifies the exact name to use for the Excel file. If not specified, and
21 there is also no --outtable, then the naming convention will be as specified
26 This specifies a summary table and prefix to use within the migration schema for
27 recording the output either in addition to or instead of the Excel file. Unless
28 --force is specified, it will not overwrite existing tables.
30 If using --drop with --outable:
32 This will delete the summary table specified and all related sub-tables.
36 B<mig-quicksheet> [--force|--drop|--outfile <file>|--outtable <table_name>] <file> [<file>...]
40 ###############################################################################
45 HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA
46 MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR
52 use Spreadsheet::WriteExcel;
55 my $mig_bin = "$FindBin::Bin/";
56 use lib "$FindBin::Bin/";
66 'outtable=s' => \$outtable,
67 'outfile=s' => \$outfile,
72 pod2usage(-verbose => 2) if $help || ! $ARGV[0];
74 if (! $outtable && ! $outfile) {
75 if (scalar(@ARGV) > 1) {
76 $outfile = $MIGSCHEMA . '.mapping.xls';
78 $outfile = abs_path($ARGV[0]) . '.mapping.xls';
82 Mig::die_if_no_env_migschema();
83 Mig::die_if_mig_tracking_table_does_not_exist();
90 my $sheet_row_offset = 0;
91 my $sheet_row_start = 4;
102 foreach my $f (@ARGV) {
103 $file = abs_path($f);
106 $toc->write($counter,0,$counter);
107 $toc->write($counter,1,$f);
114 if (Mig::check_db_migschema_for_specific_table($outtable)) {
115 drop_existing_outtable();
120 my $tracked_file_id = Mig::check_for_tracked_file($file);
121 if ($tracked_file_id) {
122 $fdata = Mig::status_this_file($file);
124 die "File not currently tracked: $file\n";
126 $table = $fdata->{staged_table};
128 die "No staged staged table for file: $file\n";
134 print "Writing $outfile\n";
135 $workbook = Spreadsheet::WriteExcel->new( $outfile );
136 $bold = $workbook->add_format();
138 $bold->set_align('left');
139 $left = $workbook->add_format();
140 $left->set_align('left');
141 if (scalar(@ARGV) > 1) {
142 $toc = $workbook->add_worksheet('Files');
146 if (Mig::check_db_migschema_for_specific_table($outtable)) {
148 drop_existing_outtable();
150 die "$outtable already exists. Use --force to wipe and redo tables.\n";
153 create_new_outtable();
157 sub drop_existing_outtable {
159 # we want a transaction for this one
160 my $dbh = Mig::db_connect();
161 $dbh->{AutoCommit} = 0;
162 $dbh->{RaiseError} = 1;
165 # gather subordinate tables
168 my $sth = $dbh->prepare("
170 FROM $MIGSCHEMA.$outtable
173 my $rv = $sth->execute();
174 my $rows = $sth->fetchall_arrayref;
175 for my $row ( @$rows ) {
176 push @tables, $row->[0]
181 foreach my $table (@tables) {
182 print "Dropping $MIGSCHEMA.$table\n";
183 $dbh->do("DROP TABLE $MIGSCHEMA.\"$table\";");
188 print "Dropping $MIGSCHEMA.$outtable\n";
189 $dbh->do("DROP TABLE $MIGSCHEMA.$outtable;");
193 warn "Transaction aborted because $_\n";
194 eval { $dbh->rollback };
195 die "Aborting mig-quicksheet\n";
198 Mig::db_disconnect($dbh);
201 sub create_new_outtable {
202 my $dbh = Mig::db_connect();
203 print "Creating table $MIGSCHEMA.$outtable\n";
205 CREATE UNLOGGED TABLE $MIGSCHEMA.$outtable (
207 summary_table TEXT UNIQUE
209 ") || die "Error creating outtable ($MIGSCHEMA.$outtable): $!\n";
210 Mig::db_disconnect($dbh);
213 sub create_new_subtable {
214 my $subtable = shift;
215 my $dbh = Mig::db_connect();
216 $dbh->{AutoCommit} = 0;
217 $dbh->{RaiseError} = 1;
220 print "Creating table $MIGSCHEMA.\"$subtable\"\n";
222 CREATE UNLOGGED TABLE $MIGSCHEMA.\"$subtable\" ();
223 ") || die "Error creating subtable ($MIGSCHEMA.\"$subtable\"): $!\n";
225 INSERT INTO $MIGSCHEMA.$outtable (file,summary_table) VALUES (" . $dbh->quote($file) . ',' . $dbh->quote($subtable) . ");
226 ") || die "Error inserting into outtable ($MIGSCHEMA.$outtable): $!\n";
229 warn "Transaction aborted because $_\n";
230 eval { $dbh->rollback };
231 die "Aborting mig-quicksheet\n";
234 Mig::db_disconnect($dbh);
237 sub write_worksheets {
238 print 'File #' . $counter . "\n";
239 print "Sheet: Field Summary\n";
240 my $tab_name = (scalar(@ARGV) > 1 ? $counter . ') ' : '') . 'Field Summary';
242 $first_sheet = $workbook->add_worksheet( $tab_name );
243 $first_sheet->set_column(0,6,30);
246 $first_table = "$outtable $tab_name";
247 create_new_subtable( $first_table );
250 my $dbh = Mig::db_connect();
251 my $sth = $dbh->prepare("
253 FROM $MIGSCHEMA.$table
256 my $rv = $sth->execute()
257 || die "Error retrieving data from information_schema: $!";
259 my @cols = $sth->fetchrow_array;
261 my $count = $cols[0];
263 $sheet_row_start = 0;
266 $first_sheet->write($sheet_row_start,0,'Legacy Column',$bold);
267 $first_sheet->write($sheet_row_start,1,'Non-Empty Rows',$bold);
268 $first_sheet->write($sheet_row_start,2,'Distinct Non-NULL Values',$bold);
269 $first_sheet->write($sheet_row_start,3,'Min Value',$bold);
270 $first_sheet->write($sheet_row_start,4,'Min Length',$bold);
271 $first_sheet->write($sheet_row_start,5,'Max Value',$bold);
272 $first_sheet->write($sheet_row_start,6,'Max Length',$bold);
276 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Legacy Column" TEXT;');
277 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Non-Empty Rows" TEXT;');
278 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Distinct Non-NULL Values" TEXT;');
279 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Min Value" TEXT;');
280 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Min Length" TEXT;');
281 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Max Value" TEXT;');
282 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Max Length" TEXT;');
284 die "Error modifying subtable ($MIGSCHEMA.$first_table): $_\n";
291 $first_sheet->write($count + 3,0,'Source File:',$bold);
292 $first_sheet->write($count + 3,1,$file,$left);
293 $first_sheet->write($count + 4,0,'Number of Rows:',$bold);
294 $first_sheet->write($count + 4,1,$count,$left);
298 $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ . ' ("Legacy Column") VALUES (NULL);');
299 $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ .
300 ' ("Legacy Column","Non-Empty Rows") ' . "VALUES ('Source File:'," . $dbh->quote($file) . ");");
301 $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ .
302 ' ("Legacy Column","Non-Empty Rows") ' . "VALUES ('Number of Rows:',$count);");
304 die "Error inserting into subtable ($MIGSCHEMA.$first_table): $_\n";
308 Mig::db_disconnect($dbh);
319 ###############################################################################
322 my $dbh = Mig::db_connect();
323 my $sth = $dbh->prepare("
325 FROM information_schema.columns
326 WHERE table_schema = " . $dbh->quote($MIGSCHEMA) . "
327 AND table_name = " . $dbh->quote($table) . "
328 ORDER BY dtd_identifier::INTEGER ASC;
330 my $rv = $sth->execute()
331 || die "Error retrieving data from information_schema: $!";
333 $sheet_row_offset = 0;
336 while (my $data = $sth->fetchrow_hashref) {
337 my $column = $data->{column_name};
338 if ($column eq 'x_source') {
343 && ( $column ne 'x_migrate'
344 && $column ne 'x_source'
345 && $column ne 'x_egid'
346 && $column ne 'x_hseq'
351 my $cdata = column_summary($column);
353 $first_sheet->write($sheet_row_start + $sheet_row_offset,0,$column,$left);
354 $first_sheet->write($sheet_row_start + $sheet_row_offset,1,$cdata->{non_empty_count},$left);
355 $first_sheet->write($sheet_row_start + $sheet_row_offset,2,$cdata->{distinct_value_count},$left);
356 $first_sheet->write($sheet_row_start + $sheet_row_offset,3,$cdata->{min_value},$left);
357 $first_sheet->write($sheet_row_start + $sheet_row_offset,4,$cdata->{min_length},$left);
358 $first_sheet->write($sheet_row_start + $sheet_row_offset,5,$cdata->{max_value},$left);
359 $first_sheet->write($sheet_row_start + $sheet_row_offset,6,$cdata->{max_length},$left);
362 $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$first_table" VALUES (^ . join(','
363 ,$cdata->{non_empty_count}
364 ,$cdata->{distinct_value_count}
365 ,$dbh->quote($cdata->{min_value})
366 ,$cdata->{min_length}
367 ,$dbh->quote($cdata->{max_value})
368 ,$cdata->{max_length}
369 ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$first_table\": $!";
371 if ($cdata->{distinct_value_count} > 1 && $cdata->{distinct_value_count} <= 500) {
377 Mig::db_disconnect($dbh);
384 my $dbh = Mig::db_connect();
387 my $sth = $dbh->prepare("
389 FROM $MIGSCHEMA.$table
390 WHERE $column IS NOT NULL AND BTRIM($column) <> '';
392 my $rv = $sth->execute()
393 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
394 my @cols = $sth->fetchrow_array;
396 my $non_empty_count = $cols[0];
398 ### distinct_value_count
399 $sth = $dbh->prepare("
400 SELECT COUNT(DISTINCT $column)
401 FROM $MIGSCHEMA.$table;
403 $rv = $sth->execute()
404 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
405 @cols = $sth->fetchrow_array;
407 my $distinct_value_count = $cols[0];
410 $sth = $dbh->prepare("
412 FROM $MIGSCHEMA.$table;
414 $rv = $sth->execute()
415 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
416 @cols = $sth->fetchrow_array;
418 my $min_value = $cols[0];
421 $sth = $dbh->prepare("
422 SELECT MIN(LENGTH($column))
423 FROM $MIGSCHEMA.$table;
425 $rv = $sth->execute()
426 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
427 @cols = $sth->fetchrow_array;
429 my $min_length = $cols[0];
432 $sth = $dbh->prepare("
434 FROM $MIGSCHEMA.$table;
436 $rv = $sth->execute()
437 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
438 @cols = $sth->fetchrow_array;
440 my $max_value = $cols[0];
443 $sth = $dbh->prepare("
444 SELECT MAX(LENGTH($column))
445 FROM $MIGSCHEMA.$table;
447 $rv = $sth->execute()
448 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
449 @cols = $sth->fetchrow_array;
451 my $max_length = $cols[0];
454 non_empty_count => $non_empty_count
455 ,distinct_value_count => $distinct_value_count
456 ,min_value => defined $min_value ? $min_value : '<NULL>'
457 ,min_length => defined $min_length ? $min_length : '<NULL>'
458 ,max_value => defined $max_value ? $max_value : '<NULL>'
459 ,max_length => defined $max_length ? $max_length : '<NULL>'
464 my ($column,$option) = (shift,"GROUP BY 2 ORDER BY 2");
466 my $dbh = Mig::db_connect();
470 my $col_sheet_row_start = 0;
471 my $col_sheet_row_offset = 0;
475 my $sheet_name = (scalar(@ARGV) > 1 ? $counter . ') ' : '') . substr($column,0,31);
477 print "Sheet: $sheet_name\n";
479 $option = "GROUP BY 2,3 ORDER BY 2,3";
483 $col_sheet = $workbook->add_worksheet( $sheet_name );
484 push @worksheets, $col_sheet;
485 $col_sheet->set_column(0,6,30);
486 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,'Count',$bold);
488 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,'Source',$bold);
491 $col_sheet_row_start + $col_sheet_row_offset
492 ,$has_x_source ? 2 : 1
493 ,"Legacy Value for $column"
499 $col_table = "$outtable $sheet_name";
500 create_new_subtable( $col_table );
501 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "Count" TEXT;')
502 || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^;
504 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "Source" TEXT;')
505 || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^;
507 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "' . $dbh->quote("Legacy value for $column") . '" TEXT;')
508 || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^;
511 $sth = $dbh->prepare("
512 SELECT COUNT(*), " . ($has_x_source ? 'x_source, ' : '') . "$column
513 FROM $MIGSCHEMA.$table
516 $rv = $sth->execute()
517 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
519 while (my @cols = $sth->fetchrow_array) {
520 $col_sheet_row_offset++;
521 my $count = $cols[0];
522 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,$count,$left) if $outfile;
526 $source = defined $cols[1] ? $cols[1] : '<NULL>';
527 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$source,$left) if $outfile;
528 $value = defined $cols[2] ? $cols[2] : '<NULL>';
529 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,2,$value,$left) if $outfile;
531 $value = defined $cols[1] ? $cols[1] : '<NULL>';
532 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$value,$left) if $outfile;
536 $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$col_table" VALUES (^ . join(','
538 ,$dbh->quote($source)
540 ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$col_table\": $!";
542 $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$col_table" VALUES (^ . join(','
545 ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$col_table\": $!";