2 ###############################################################################
11 Quickly produces an Excel spreadsheet based on the tracked file suitable
12 for simple end-user mapping. The new file is named after the tracked file, but
15 Multiple files may be specified, in which case all of the results are
16 concatenated into one spreadsheet named <migration_schema>.mapping.xls
20 This specifies the exact name to use for the Excel file. If not specified, and
21 there is also no --outtable, then the naming convention will be as specified
26 This specifies a summary table and prefix to use within the migration schema for
27 recording the output either in addition to or instead of the Excel file. Unless
28 --force is specified, it will not overwrite existing tables.
30 If using --drop with --outable:
32 This will delete the summary table specified and all related sub-tables.
36 B<mig-quicksheet> [--force|--drop|--outfile <file>|--outtable <table_name>] <file> [<file>...]
40 ###############################################################################
45 HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA
46 MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR
52 use Spreadsheet::WriteExcel;
55 my $mig_bin = "$FindBin::Bin/";
56 use lib "$FindBin::Bin/";
66 'outtable=s' => \$outtable,
67 'outfile=s' => \$outfile,
72 pod2usage(-verbose => 2) if $help || ! $ARGV[0];
74 if (! $outtable && ! $outfile) {
75 if (scalar(@ARGV) > 1) {
76 $outfile = $MIGSCHEMA . '.mapping.xls';
78 $outfile = abs_path($ARGV[0]) . '.mapping.xls';
82 Mig::die_if_no_env_migschema();
83 Mig::die_if_mig_tracking_table_does_not_exist();
90 my $sheet_row_offset = 0;
91 my $sheet_row_start = 4;
102 foreach my $f (@ARGV) {
103 $file = abs_path($f);
106 $toc->write($counter,0,$counter);
107 $toc->write($counter,1,$f);
114 if (Mig::check_db_migschema_for_specific_table($outtable)) {
115 drop_existing_outtable();
120 my $tracked_file_id = Mig::check_for_tracked_file($file);
121 if ($tracked_file_id) {
122 $fdata = Mig::status_this_file($file);
124 die "File not currently tracked: $file\n";
126 $table = $fdata->{staged_table};
128 die "No staged staged table for file: $file\n";
134 print "Writing $outfile\n";
135 $workbook = Spreadsheet::WriteExcel->new( $outfile );
136 $bold = $workbook->add_format();
138 $bold->set_align('left');
139 $left = $workbook->add_format();
140 $left->set_align('left');
141 if (scalar(@ARGV) > 1) {
142 $toc = $workbook->add_worksheet('Files');
146 if (Mig::check_db_migschema_for_specific_table($outtable)) {
148 drop_existing_outtable();
150 die "$outtable already exists. Use --force to wipe and redo tables.\n";
153 create_new_outtable();
157 sub drop_existing_outtable {
159 # we want a transaction for this one
160 my $dbh = Mig::db_connect();
161 $dbh->{AutoCommit} = 0;
162 $dbh->{RaiseError} = 1;
165 # gather subordinate tables
168 my $sth = $dbh->prepare("
170 FROM $MIGSCHEMA.$outtable
173 my $rv = $sth->execute();
174 my $rows = $sth->fetchall_arrayref;
175 for my $row ( @$rows ) {
176 push @tables, $row->[0]
181 foreach my $table (@tables) {
182 print "Dropping $MIGSCHEMA.$table\n";
183 $dbh->do("DROP TABLE $MIGSCHEMA.\"$table\";");
188 print "Dropping $MIGSCHEMA.$outtable\n";
189 $dbh->do("DROP TABLE $MIGSCHEMA.$outtable;");
193 warn "Transaction aborted because $_\n";
194 eval { $dbh->rollback };
195 die "Aborting mig-quicksheet\n";
198 Mig::db_disconnect($dbh);
201 sub create_new_outtable {
202 my $dbh = Mig::db_connect();
203 print "Creating table $MIGSCHEMA.$outtable\n";
205 CREATE UNLOGGED TABLE $MIGSCHEMA.$outtable (
207 summary_table TEXT UNIQUE
209 ") || die "Error creating outtable ($MIGSCHEMA.$outtable): $!\n";
210 Mig::db_disconnect($dbh);
213 sub create_new_subtable {
214 my $subtable = shift;
215 my $dbh = Mig::db_connect();
216 $dbh->{AutoCommit} = 0;
217 $dbh->{RaiseError} = 1;
220 print "Creating table $MIGSCHEMA.\"$subtable\"\n";
222 CREATE UNLOGGED TABLE $MIGSCHEMA.\"$subtable\" ();
223 ") || die "Error creating subtable ($MIGSCHEMA.\"$subtable\"): $!\n";
225 INSERT INTO $MIGSCHEMA.$outtable (file,summary_table) VALUES (" . $dbh->quote($file) . ',' . $dbh->quote($subtable) . ");
226 ") || die "Error inserting into outtable ($MIGSCHEMA.$outtable): $!\n";
229 warn "Transaction aborted because $_\n";
230 eval { $dbh->rollback };
231 die "Aborting mig-quicksheet\n";
234 Mig::db_disconnect($dbh);
237 sub write_worksheets {
238 print 'File #' . $counter . "\n";
239 print "Sheet: Field Summary\n";
240 my $tab_name = (scalar(@ARGV) > 1 ? $counter . ') ' : '') . 'Field Summary';
241 $tab_name = substr($tab_name,0,31); # truncate for WriteExcel
243 $first_sheet = $workbook->add_worksheet( $tab_name );
244 $first_sheet->set_column(0,6,30);
247 $first_table = "$outtable $tab_name";
248 create_new_subtable( $first_table );
251 my $dbh = Mig::db_connect();
252 my $sth = $dbh->prepare("
254 FROM $MIGSCHEMA.$table
257 my $rv = $sth->execute()
258 || die "Error retrieving data from information_schema: $!";
260 my @cols = $sth->fetchrow_array;
262 my $count = $cols[0];
264 $sheet_row_start = 0;
267 $first_sheet->write($sheet_row_start,0,'Legacy Column',$bold);
268 $first_sheet->write($sheet_row_start,1,'Non-Empty Rows',$bold);
269 $first_sheet->write($sheet_row_start,2,'Distinct Non-NULL Values',$bold);
270 $first_sheet->write($sheet_row_start,3,'Min Value',$bold);
271 $first_sheet->write($sheet_row_start,4,'Min Length',$bold);
272 $first_sheet->write($sheet_row_start,5,'Max Value',$bold);
273 $first_sheet->write($sheet_row_start,6,'Max Length',$bold);
277 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Legacy Column" TEXT;');
278 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Non-Empty Rows" TEXT;');
279 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Distinct Non-NULL Values" TEXT;');
280 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Min Value" TEXT;');
281 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Min Length" TEXT;');
282 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Max Value" TEXT;');
283 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Max Length" TEXT;');
285 die "Error modifying subtable ($MIGSCHEMA.$first_table): $_\n";
292 $first_sheet->write($count + 3,0,'Source File:',$bold);
293 $first_sheet->write($count + 3,1,$file,$left);
294 $first_sheet->write($count + 4,0,'Number of Rows:',$bold);
295 $first_sheet->write($count + 4,1,$count,$left);
299 $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ . ' ("Legacy Column") VALUES (NULL);');
300 $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ .
301 ' ("Legacy Column","Non-Empty Rows") ' . "VALUES ('Source File:'," . $dbh->quote($file) . ");");
302 $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ .
303 ' ("Legacy Column","Non-Empty Rows") ' . "VALUES ('Number of Rows:',$count);");
305 die "Error inserting into subtable ($MIGSCHEMA.$first_table): $_\n";
309 Mig::db_disconnect($dbh);
320 ###############################################################################
323 my $dbh = Mig::db_connect();
324 my $sth = $dbh->prepare("
326 FROM information_schema.columns
327 WHERE table_schema = " . $dbh->quote($MIGSCHEMA) . "
328 AND table_name = " . $dbh->quote($table) . "
329 ORDER BY dtd_identifier::INTEGER ASC;
331 my $rv = $sth->execute()
332 || die "Error retrieving data from information_schema: $!";
334 $sheet_row_offset = 0;
337 while (my $data = $sth->fetchrow_hashref) {
338 my $column = $data->{column_name};
339 if ($column eq 'x_source') {
344 && ( $column ne 'x_migrate'
345 && $column ne 'x_source'
346 && $column ne 'x_egid'
347 && $column ne 'x_hseq'
352 my $cdata = column_summary($column);
354 $first_sheet->write($sheet_row_start + $sheet_row_offset,0,$column,$left);
355 $first_sheet->write($sheet_row_start + $sheet_row_offset,1,$cdata->{non_empty_count},$left);
356 $first_sheet->write($sheet_row_start + $sheet_row_offset,2,$cdata->{distinct_value_count},$left);
357 $first_sheet->write($sheet_row_start + $sheet_row_offset,3,$cdata->{min_value},$left);
358 $first_sheet->write($sheet_row_start + $sheet_row_offset,4,$cdata->{min_length},$left);
359 $first_sheet->write($sheet_row_start + $sheet_row_offset,5,$cdata->{max_value},$left);
360 $first_sheet->write($sheet_row_start + $sheet_row_offset,6,$cdata->{max_length},$left);
363 $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$first_table" VALUES (^ . join(','
364 ,$cdata->{non_empty_count}
365 ,$cdata->{distinct_value_count}
366 ,$dbh->quote($cdata->{min_value})
367 ,$cdata->{min_length}
368 ,$dbh->quote($cdata->{max_value})
369 ,$cdata->{max_length}
370 ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$first_table\": $!";
372 if ($cdata->{distinct_value_count} > 1 && $cdata->{distinct_value_count} <= 500) {
378 Mig::db_disconnect($dbh);
385 my $dbh = Mig::db_connect();
388 my $sth = $dbh->prepare("
390 FROM $MIGSCHEMA.$table
391 WHERE $column IS NOT NULL AND BTRIM($column) <> '';
393 my $rv = $sth->execute()
394 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
395 my @cols = $sth->fetchrow_array;
397 my $non_empty_count = $cols[0];
399 ### distinct_value_count
400 $sth = $dbh->prepare("
401 SELECT COUNT(DISTINCT $column)
402 FROM $MIGSCHEMA.$table;
404 $rv = $sth->execute()
405 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
406 @cols = $sth->fetchrow_array;
408 my $distinct_value_count = $cols[0];
411 $sth = $dbh->prepare("
413 FROM $MIGSCHEMA.$table;
415 $rv = $sth->execute()
416 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
417 @cols = $sth->fetchrow_array;
419 my $min_value = $cols[0];
422 $sth = $dbh->prepare("
423 SELECT MIN(LENGTH($column))
424 FROM $MIGSCHEMA.$table;
426 $rv = $sth->execute()
427 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
428 @cols = $sth->fetchrow_array;
430 my $min_length = $cols[0];
433 $sth = $dbh->prepare("
435 FROM $MIGSCHEMA.$table;
437 $rv = $sth->execute()
438 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
439 @cols = $sth->fetchrow_array;
441 my $max_value = $cols[0];
444 $sth = $dbh->prepare("
445 SELECT MAX(LENGTH($column))
446 FROM $MIGSCHEMA.$table;
448 $rv = $sth->execute()
449 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
450 @cols = $sth->fetchrow_array;
452 my $max_length = $cols[0];
455 non_empty_count => $non_empty_count
456 ,distinct_value_count => $distinct_value_count
457 ,min_value => defined $min_value ? $min_value : '<NULL>'
458 ,min_length => defined $min_length ? $min_length : '<NULL>'
459 ,max_value => defined $max_value ? $max_value : '<NULL>'
460 ,max_length => defined $max_length ? $max_length : '<NULL>'
465 my ($column,$option) = (shift,"GROUP BY 2 ORDER BY 2");
467 my $dbh = Mig::db_connect();
471 my $col_sheet_row_start = 0;
472 my $col_sheet_row_offset = 0;
476 my $sheet_name = (scalar(@ARGV) > 1 ? $counter . ') ' : '') . $column;
477 $sheet_name = substr($sheet_name,0,31);
479 print "Sheet: $sheet_name\n";
481 $option = "GROUP BY 2,3 ORDER BY 2,3";
485 $col_sheet = $workbook->add_worksheet( $sheet_name );
486 push @worksheets, $col_sheet;
487 $col_sheet->set_column(0,6,30);
488 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,'Count',$bold);
490 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,'Source',$bold);
493 $col_sheet_row_start + $col_sheet_row_offset
494 ,$has_x_source ? 2 : 1
495 ,"Legacy Value for $column"
501 $col_table = "$outtable $sheet_name";
502 create_new_subtable( $col_table );
503 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "Count" TEXT;')
504 || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^;
506 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "Source" TEXT;')
507 || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^;
509 $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "' . $dbh->quote("Legacy value for $column") . '" TEXT;')
510 || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^;
513 $sth = $dbh->prepare("
514 SELECT COUNT(*), " . ($has_x_source ? 'x_source, ' : '') . "$column
515 FROM $MIGSCHEMA.$table
518 $rv = $sth->execute()
519 || die "Error retrieving data from $MIGSCHEMA.$table: $!";
521 while (my @cols = $sth->fetchrow_array) {
522 $col_sheet_row_offset++;
523 my $count = $cols[0];
524 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,$count,$left) if $outfile;
528 $source = defined $cols[1] ? $cols[1] : '<NULL>';
529 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$source,$left) if $outfile;
530 $value = defined $cols[2] ? $cols[2] : '<NULL>';
531 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,2,$value,$left) if $outfile;
533 $value = defined $cols[1] ? $cols[1] : '<NULL>';
534 $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$value,$left) if $outfile;
538 $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$col_table" VALUES (^ . join(','
540 ,$dbh->quote($source)
542 ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$col_table\": $!";
544 $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$col_table" VALUES (^ . join(','
547 ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$col_table\": $!";