#!/usr/bin/perl ############################################################################### =pod =item B --analyst "Analyst Name" --report_title "Report Title" Generates an asciidoc file in the git working directory that can be converted to any appropriate format. The analyst and report parameters are required. Optional parameters are : --added_page_title and --added_page_file If one is used both must be. The added page file can be plain text or asciidoc. This adds an extra arbitrary page of notes to the report. Mig assumes the page file is in the mig git directory. --tags This will define a set of tags to use, if not set it will default to Circs, Holds, Actors, Bibs, Assets & Money. --debug Gives more information about what is happening. --reports_xml Allows you to override the default evergreen_staged_report.xml in the mig-xml folder. --excel_output or --excel Pushes output to an Excel file instead of asciidoc file. --captions or --captions_off Adds the captions tag to asciidoc header to turn off captions in generated output. =back =cut ############################################################################### use strict; use warnings; use DBI; use Data::Dumper; use XML::LibXML; use Env qw( HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR ); use Excel::Writer::XLSX; use Pod::Usage; use Switch; use Cwd 'abs_path'; use Cwd qw(getcwd); use FindBin; my $mig_bin = "$FindBin::Bin/"; use lib "$FindBin::Bin/"; use Mig; use open ':encoding(utf8)'; pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; pod2usage(-verbose => 1) if ! $ARGV[1]; my $analyst; my $next_arg_is_analyst; my $report_title; my $next_arg_is_report_title; my $reports_xml; my $next_arg_is_reports_xml; my $tags; my $next_arg_is_tags; my $added_page_title; my $next_arg_is_added_page_title; my $added_page_file; my $next_arg_is_added_page_file; my $excel_output = 0; my $captions_off = 0; my $i = 0; my $parser = XML::LibXML->new(); my $lines_per_page = 42; my $debug_flag = 0; my $workbook; my $fh; foreach my $arg (@ARGV) { if ($arg eq '--report_title') { $next_arg_is_report_title = 1; next; } if ($next_arg_is_report_title) { $report_title = $arg; $next_arg_is_report_title = 0; next; } if ($arg eq '--analyst') { $next_arg_is_analyst = 1; next; } if ($next_arg_is_analyst) { $analyst = $arg; $next_arg_is_analyst = 0; next; } if ($arg eq '--reports_xml') { $next_arg_is_reports_xml = 1; next; } if ($next_arg_is_reports_xml) { $reports_xml = $arg; $next_arg_is_reports_xml = 0; next; } if ($arg eq '--tags') { $next_arg_is_tags = 1; next; } if ($next_arg_is_tags) { $tags = $arg; $next_arg_is_tags = 0; next; } if ($arg eq '--added_page_title') { $next_arg_is_added_page_title = 1; next; } if ($next_arg_is_added_page_title) { $added_page_title = $arg; $next_arg_is_added_page_title = 0; next; } if ($arg eq '--added_page_file') { $next_arg_is_added_page_file = 1; next; } if ($next_arg_is_added_page_file) { $added_page_file = $arg; $next_arg_is_added_page_file = 0; next; } if ($arg eq '--excel_output' or $arg eq '--excel') { $excel_output = 1; next; } if ($arg eq '--captions_off' or $arg eq '--captions') { $captions_off = 1; next; } if ($arg eq '--debug') { $debug_flag = 1; next; } } if (!defined $tags) {$tags = 'circs.holds.actors.bibs.assets.money'}; if (!defined $report_title) { abort('--report_title must be supplied'); } if ($excel_output == 0 and !defined $analyst) { abort('--analyst must be supplied'); } my $mig_path = abs_path($0); $mig_path =~ s|[^/]+$||; $reports_xml = find_xml($reports_xml,$mig_path,$excel_output); if (!defined $reports_xml) { abort("Can not find xml reports file."); } my $dom = $parser->parse_file($reports_xml); if (defined $added_page_file or defined $added_page_title) { abort('must specify --added_page_file and --added_page_title') unless defined $added_page_file and defined $added_page_title; } if (defined $added_page_file) { $added_page_file = $MIGGITDIR . $added_page_file; } my $dbh = Mig::db_connect(); my $report_file = create_report_name($report_title,$excel_output); $report_file = $MIGGITDIR . $report_file; if ($excel_output == 1) { $workbook = Excel::Writer::XLSX->new( $report_file ); } else { open($fh, '>', $report_file) or abort("Could not open output file!"); write_title_page($report_title,$fh,$analyst,$captions_off); }; if (defined $added_page_file and defined $added_page_title) { print $fh "<<<\n"; print $fh "== $added_page_title\n"; print "$added_page_file\t$added_page_title\n"; open(my $an,'<:encoding(UTF-8)', $added_page_file) or abort("Could not open $added_page_file!"); while ( my $line = <$an> ) { print $fh $line; } print $fh "\n"; close $an; } foreach my $func ($dom->findnodes('//function')) { my $fdrop = $func->findvalue('./drop'); my $fcreate = $func->findvalue('./create'); my $fname = $func->findvalue('./name'); my $sdrop = $dbh->prepare($fdrop); my $screate = $dbh->prepare($fcreate); print "dropping function $fname ... "; $sdrop->execute(); print "creating function $fname\n\n"; $screate->execute(); } $tags = lc($tags); my @report_tags = split(/\./,$tags); foreach my $t (@report_tags) { print "\n\n=========== Starting to process tag $t\n"; print "==========================================\n\n"; my @asset_files; foreach my $asset ($dom->findnodes('//asset')) { if (index($asset->findvalue('./tag'),$t) != -1) { push @asset_files, $asset->findvalue('./file'); } } foreach my $fname (@asset_files) { my $asset_path = $mig_path . '../mig-asc/' . $fname; open my $a, $asset_path or abort("Could not open $fname."); while ( my $l = <$a> ) { print $fh $l; } print $fh "<<<\n"; } if ($excel_output == 0) { print_section_header(ucfirst($t),$fh); } my $linecount = $lines_per_page; my $r; undef @asset_files; foreach my $asset ($dom->findnodes('//asset')) { if (index($asset->findvalue('./tag'),$t) != -1) { push @asset_files, $asset->findvalue('./file'); } } my @report_names; foreach my $report ($dom->findnodes('//report')) { if (index($report->findvalue('./tag'),$t) != -1 and $report->findvalue('./iteration') eq '0') { push @report_names, $report->findvalue('./name'); if ($excel_output == 1) { print_query_to_excel($workbook,$report); } } } #only has one level of failover now but could change to array of hashes and loops #but this keeps it simple and in practice I haven't needed more than two if ($excel_output == 0) { foreach my $rname (@report_names) { my %report0; my %report1; my $check_tables0; my $check_tables1; if ($debug_flag == 1) {print "\nchecking for $rname ... ";} %report0 = find_report($dom,$t,$rname,'0',$debug_flag); $check_tables0 = check_table($report0{query},$MIGSCHEMA,$debug_flag,$rname); if ($check_tables0 == 1) { $r = print_query($fh,%report0); } else { %report1 = find_report($dom,$t,$rname,'1',$debug_flag); if (defined $report1{query}) { $check_tables1 = check_table($report1{query},$MIGSCHEMA,$debug_flag,$rname); if ($check_tables1 == 1) { $r = print_query($fh,%report1); } } } } } } print "\n"; if ($excel_output eq 1) { $workbook->close(); } else { close $fh; } ############ end of main logic sub find_xml { my $reports_xml = shift; my $mig_path = shift; my $excel_output = shift; if (!defined $reports_xml) { if ($excel_output == 0) { $reports_xml = $mig_path . '../mig-xml/evergreen_staged_report.xml'; return $reports_xml; } else { $reports_xml = $mig_path . '../mig-xml/excel_mapping_reports.xml'; return $reports_xml; } }; if ($reports_xml =~ m/\//) { return $reports_xml; } my $mig_test_file = $mig_path . '/../mig-xml/' . $reports_xml; my $working_test_dir = getcwd(); my $working_test_file = $working_test_dir . '/' . $reports_xml; if (-e $mig_test_file) { return $mig_test_file; } if (-e $working_test_file) { return $working_test_file; } return undef; } sub find_report { my $dom = shift; my $tag = shift; my $name = shift; my $iteration = shift; my $debug_flag = shift; my %report; if ($debug_flag == 1) {print "iteration $iteration ";} foreach my $node ($dom->findnodes('//report')) { if ($node->findvalue('./tag') =~ $tag and $node->findvalue('./iteration') eq $iteration and $node->findvalue('./name') eq $name) { if ($debug_flag == 1) {print "succeeded ... \n";} %report = ( name => $node->findvalue('./name'), report_title => $node->findvalue('./report_title'), query => $node->findvalue('./query'), heading => $node->findvalue('./heading'), tag => $node->findvalue('./tag'), iteration => $node->findvalue('./iteration'), note => $node->findvalue('./note'), ); return %report; } } if ($debug_flag == 1) {print "failed ... \n";} return %report = ( name => "eaten by grue" ); } sub print_section_header { my $t = shift; my $fh = shift; $t =~ s/_/ /g; #$t =~ s/(\w+)/\u$1/g;; print $fh "<<<\n"; print $fh "== $t Reports\n"; return; } sub create_report_name { my $rt = shift; my $excel_output = shift; my @abbr = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); $year += 1900; my $date = $year . '_' . $abbr[$mon] . '_' . $mday; my $report_file; if ($excel_output == 0) { $report_file = $rt . ' ' . $date . '.asciidoc'; } else { $report_file = $rt . ' ' . $date . '.xlsx'; } $report_file =~ s/ /_/g; return $report_file; } sub write_title_page { my $rt = shift; my $fh = shift; my $a = shift; my $captions_off = shift; my @abbr = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); my $l = length($report_title); my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); $year += 1900; print $fh "= $rt\n"; print $fh "$mday $abbr[$mon] $year\n"; print $fh "$a\n"; #print $fh ":title-logo-image: image::eolilogosmall.png[pdfwidth=3in]\n"; print $fh ":toc:\n"; if ($captions_off == 1) { print $fh ":caption:\n"; } print $fh "\n"; } sub check_table { my $query = shift; my $MIGSCHEMA = shift; my $debug_flag = shift; my $report_name = shift; if ($debug_flag == 1) {print "$query\n";} my $i; my $return_flag = 1; my @qe = split(/ /,$query); $i = @qe; $i--; my @tables; while ($i > -1) { if ($qe[$i] eq 'FROM' or $qe[$i] eq 'JOIN') { my $q = $i + 1; if ($qe[$q] ne '(SELECT') { push @tables, $qe[$q]; } } $i--; } if ($debug_flag == 1) {print "checking tables ... ";} $i = 0; foreach my $table (@tables) { my $sql; my $schema; if (index($table,'.') != -1) { $schema = (split /\./,$table)[0]; $table = (split /\./,$table)[1]; } $table = clean_query_string($table); if (defined $schema) { $schema = clean_query_string($schema); $sql = 'SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = \'' . $schema . '\' AND table_name = \'' . $table . '\');'; } else { $sql = 'SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = \'' . $MIGSCHEMA . '\' AND table_name = \'' . $table . '\');'; } my $sth = $dbh->prepare($sql); $sth->execute(); while (my @row = $sth->fetchrow_array) { if ($row[0] eq '1') { next; } else { $return_flag = 0; if ($debug_flag == 1) {print "detecting $table failed...\n";} } if ($row[0] eq '0') {$return_flag = 0;} } } if ($return_flag == 1 and $debug_flag == 1) {print "succeeded ...\n";} if ($return_flag == 0) {print "! a table failed the find test for report $report_name\n\n";} return $return_flag; } sub clean_query_string { my $str = shift; $str =~ s/(?!_)[[:punct:]]//g; #remove punct except underscores $str =~ s/\n//g; $str =~ s/\r//g; return $str; } sub print_query { my $fh = shift; my %report = @_; my $query = $report{query}; my $sth = $dbh->prepare($query); $sth->execute(); my $header_flag = 0; while (my @row = $sth->fetchrow_array) { if ($header_flag == 0) { print $fh "\n.*$report{report_title}*\n"; print $fh "|===\n"; my @h = split(/\./,$report{heading}); my $h_length = @h; my $h_count = 1; while ($h_count <= $h_length) { print $fh "|$h[$h_count-1] "; $h_count++; } print $fh "\n"; $header_flag = 1; } my $row_length = @row; my $r = 1; while ($r <= $row_length) { if (! defined $row[$r-1] ) { $row[$r-1] = 'none'; } print $fh "|$row[$r-1] "; $r++; } print $fh "\n"; } if ($header_flag == 1) { print $fh "|===\n\n"; print $fh $report{note}; print $fh "\n\n"; } print "successfully wrote output for $report{name}.\n\n"; } sub print_query_to_excel { my $workbook = shift; my $report = shift; my $header_format = $workbook->add_format( bold => 1, color => 'green', size => 16); my $note_format = $workbook->add_format( bold => 1, color => 'red', size => 14); my $query = $report->findvalue('./query'); my $title = $report->findvalue('./report_title'); my $headings = $report->findnodes('./heading'); my $sth = $dbh->prepare($query); $sth->execute(); my $worksheet = $workbook->add_worksheet( $title ); my $cell = ""; my $col = ""; my @h = split(/\./,$headings); my $h_length = @h; my $h_count = 1; while ($h_count <= $h_length) { $col = give_column($h_count-1); $cell = $col . '1'; $worksheet->write($cell,$h[$h_count-1],$header_format); $h_count++; } my $cur_row = 1; while (my @row = $sth->fetchrow_array) { $cur_row++; my $row_length = @row; my $r = 1; print Dumper(@row); while ($r <= $row_length) { if (! defined $row[$r-1] ) { $row[$r-1] = 'none'; } $col = give_column($r-1); $cell = $col . $cur_row; $worksheet->write($cell,$row[$r-1]); $r++; } } $cur_row = $cur_row + 2; $cell = "A" . "$cur_row"; $worksheet->write($cell,$report->findvalue('./note'),$note_format); print "Printed Query for $title.\n"; } sub give_column { my $i = shift; my $col = ""; do { $col .= chr( ( $i % 26 ) + ord('A') ); $i = int( $i / 26 ) - 1; } while ( $i >= 0 ); return scalar reverse $col; } sub abort { my $msg = shift; print STDERR "$0: $msg", "\n"; exit 1; }