From 155eb9eac077ca803f75d1295e584e7012e1b883 Mon Sep 17 00:00:00 2001 From: Jason Etheridge Date: Fri, 10 Apr 2020 14:05:56 -0400 Subject: [PATCH] toward renaming mig to emig and tweaking the directory layout --- emig | 334 ++++++ emig.d/asc/dedupe_process.asciidoc | 42 + emig.d/bin/Mig.pm | 268 +++++ emig.d/bin/mig-add | 127 +++ emig.d/bin/mig-bibstats | 206 ++++ emig.d/bin/mig-clean | 127 +++ emig.d/bin/mig-convert | 131 +++ emig.d/bin/mig-dump | 88 ++ emig.d/bin/mig-env | 268 +++++ emig.d/bin/mig-gsheet | 419 ++++++++ emig.d/bin/mig-iconv | 107 ++ emig.d/bin/mig-init | 93 ++ emig.d/bin/mig-link | 87 ++ emig.d/bin/mig-mapper | 778 ++++++++++++++ emig.d/bin/mig-quick | 66 ++ emig.d/bin/mig-quicksheet | 594 +++++++++++ emig.d/bin/mig-remove | 67 ++ emig.d/bin/mig-reporter | 507 +++++++++ emig.d/bin/mig-skip-clean | 100 ++ emig.d/bin/mig-skip-iconv | 87 ++ emig.d/bin/mig-sql | 48 + emig.d/bin/mig-stage | 128 +++ emig.d/bin/mig-stagebibs | 244 +++++ emig.d/bin/mig-status | 87 ++ emig.d/bin/mig-unlink | 71 ++ emig.d/sql/init/000-tracked_column.sql | 15 + emig.d/sql/init/000-tracked_file.sql | 14 + emig.d/sql/init/010_gsheet_tracking.sql | 15 + emig.d/sql/init/011_reporter_tables.sql | 5 + emig.d/sql/init/020_common_tables.sql | 354 +++++++ emig.d/sql/system/tlc/030_tlc_mapping_tables.sql | 375 +++++++ emig.d/xml/evergreen_full_system.xml | 519 ++++++++++ emig.d/xml/evergreen_staged_report.xml | 1182 ++++++++++++++++++++++ emig.d/xml/mapping_reports.xml | 854 ++++++++++++++++ mig | 334 ------ mig-asc/dedupe_process.asciidoc | 42 - mig-bin/Mig.pm | 268 ----- mig-bin/mig-add | 127 --- mig-bin/mig-bibstats | 206 ---- mig-bin/mig-clean | 127 --- mig-bin/mig-convert | 131 --- mig-bin/mig-dump | 88 -- mig-bin/mig-env | 268 ----- mig-bin/mig-gsheet | 419 -------- mig-bin/mig-iconv | 107 -- mig-bin/mig-init | 93 -- mig-bin/mig-link | 87 -- mig-bin/mig-mapper | 778 -------------- mig-bin/mig-quick | 66 -- mig-bin/mig-quicksheet | 594 ----------- mig-bin/mig-remove | 67 -- mig-bin/mig-reporter | 507 --------- mig-bin/mig-skip-clean | 100 -- mig-bin/mig-skip-iconv | 87 -- mig-bin/mig-sql | 48 - mig-bin/mig-stage | 128 --- mig-bin/mig-stagebibs | 244 ----- mig-bin/mig-status | 87 -- mig-bin/mig-unlink | 71 -- mig-sql/init/000-tracked_column.sql | 15 - mig-sql/init/000-tracked_file.sql | 14 - mig-sql/init/010_gsheet_tracking.sql | 15 - mig-sql/init/011_reporter_tables.sql | 5 - mig-sql/init/020_common_tables.sql | 354 ------- mig-sql/system/tlc/030_tlc_mapping_tables.sql | 375 ------- mig-xml/evergreen_full_system.xml | 519 ---------- mig-xml/evergreen_staged_report.xml | 1182 ---------------------- mig-xml/mapping_reports.xml | 854 ---------------- 68 files changed, 8407 insertions(+), 8407 deletions(-) create mode 100755 emig create mode 100644 emig.d/asc/dedupe_process.asciidoc create mode 100644 emig.d/bin/Mig.pm create mode 100755 emig.d/bin/mig-add create mode 100755 emig.d/bin/mig-bibstats create mode 100755 emig.d/bin/mig-clean create mode 100755 emig.d/bin/mig-convert create mode 100755 emig.d/bin/mig-dump create mode 100755 emig.d/bin/mig-env create mode 100755 emig.d/bin/mig-gsheet create mode 100755 emig.d/bin/mig-iconv create mode 100755 emig.d/bin/mig-init create mode 100755 emig.d/bin/mig-link create mode 100755 emig.d/bin/mig-mapper create mode 100755 emig.d/bin/mig-quick create mode 100755 emig.d/bin/mig-quicksheet create mode 100755 emig.d/bin/mig-remove create mode 100755 emig.d/bin/mig-reporter create mode 100755 emig.d/bin/mig-skip-clean create mode 100755 emig.d/bin/mig-skip-iconv create mode 100755 emig.d/bin/mig-sql create mode 100755 emig.d/bin/mig-stage create mode 100755 emig.d/bin/mig-stagebibs create mode 100755 emig.d/bin/mig-status create mode 100755 emig.d/bin/mig-unlink create mode 100644 emig.d/sql/init/000-tracked_column.sql create mode 100644 emig.d/sql/init/000-tracked_file.sql create mode 100644 emig.d/sql/init/010_gsheet_tracking.sql create mode 100644 emig.d/sql/init/011_reporter_tables.sql create mode 100644 emig.d/sql/init/020_common_tables.sql create mode 100644 emig.d/sql/system/tlc/030_tlc_mapping_tables.sql create mode 100644 emig.d/xml/evergreen_full_system.xml create mode 100644 emig.d/xml/evergreen_staged_report.xml create mode 100644 emig.d/xml/mapping_reports.xml delete mode 100755 mig delete mode 100644 mig-asc/dedupe_process.asciidoc delete mode 100644 mig-bin/Mig.pm delete mode 100755 mig-bin/mig-add delete mode 100755 mig-bin/mig-bibstats delete mode 100755 mig-bin/mig-clean delete mode 100755 mig-bin/mig-convert delete mode 100755 mig-bin/mig-dump delete mode 100755 mig-bin/mig-env delete mode 100755 mig-bin/mig-gsheet delete mode 100755 mig-bin/mig-iconv delete mode 100755 mig-bin/mig-init delete mode 100755 mig-bin/mig-link delete mode 100755 mig-bin/mig-mapper delete mode 100755 mig-bin/mig-quick delete mode 100755 mig-bin/mig-quicksheet delete mode 100755 mig-bin/mig-remove delete mode 100755 mig-bin/mig-reporter delete mode 100755 mig-bin/mig-skip-clean delete mode 100755 mig-bin/mig-skip-iconv delete mode 100755 mig-bin/mig-sql delete mode 100755 mig-bin/mig-stage delete mode 100755 mig-bin/mig-stagebibs delete mode 100755 mig-bin/mig-status delete mode 100755 mig-bin/mig-unlink delete mode 100644 mig-sql/init/000-tracked_column.sql delete mode 100644 mig-sql/init/000-tracked_file.sql delete mode 100644 mig-sql/init/010_gsheet_tracking.sql delete mode 100644 mig-sql/init/011_reporter_tables.sql delete mode 100644 mig-sql/init/020_common_tables.sql delete mode 100644 mig-sql/system/tlc/030_tlc_mapping_tables.sql delete mode 100644 mig-xml/evergreen_full_system.xml delete mode 100644 mig-xml/evergreen_staged_report.xml delete mode 100644 mig-xml/mapping_reports.xml diff --git a/emig b/emig new file mode 100755 index 0000000..3d53a6c --- /dev/null +++ b/emig @@ -0,0 +1,334 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig - git-like program for tracking and manipulating legacy data files for +migrations + +=head1 SYNOPSIS + +B [argument] [...] + +=head1 DESCRIPTION + +B is used to track and manipulate CSV or CSV-like text files exported from +legacy systems for migration into Evergreen. It can be a wrapper for some +other migration tools and tracks state using a PostgreSQL table in a given +migration schema. + +It makes use of certain environment variables that may be set by the B +tool: PGHOST, PGPORT, PGUSER, PGDATABASE, MIGSCHEMA, and MIGWORKDIR + +For most commands, if the current working directory falls outside of the +directory specified by MIGWORKDIR, then mig will assume that environment is +also incorrect and bail before doing any actual work. + +~/.pgpass should also be configured, as B will not prompt for a database +password. + +Only the B and B commands work without the MIGSCHEMA environment +variable being set. + +=head1 OVERVIEW + +Using B should go something like this: + +=over 15 + +=item mig env create m_foo # Sets up the environment + +=item mig env use m_foo # Spawns a shell using the configured environment + +=item mig init # creates the m_foo schema in the database if needed, and other tables + +=item mig add patrons.tsv # tracks an incoming data file; repeat for additional files + +=item mig iconv patrons.tsv # convert it to UTF8, creating patrons.tsv.utf8 + +=item mig clean patrons.tsv # cleans the file, creating patrons.tsv.utf8.clean + +=item mig link patrons.tsv actor_usr # makes the soon-to-be staging table a child of m_foo.actor_usr + +=item mig convert patrons.tsv # creates a .sql file for staging the data + +=item mig stage patrons.tsv # load said .sql file + +=item mig mapper patrons.tsv # interactive tool for analyzing/mapping the staging table + +=item mig analysis patrons.tsv # writes a summary .tsv file of mapped/flagged fields from the staging table + +=item mig map patrons.tsv # apply configured mappings + +=item mig write_prod patrons.tsv # creates a .sql file for pushing the staging data into production + +=item mig reporter --analyst "Foo Fooer" --report_title "Foo Load Analysis" #creates an asciidoc report + +=item mig gsheet --pull foo_tab_name OR --push foo_pg_table_name + +=item mig stagebibs --file foo.xml + +=back + +=head1 COMMANDS + +=over 15 + +=item B [command] + +Display this very same documentation, or specific documentation for one of the +commands listed here. + +=item B + +Invokes B with the same arguments. I can set important +environment variables and spawn a shell with those variables, and it also does +some directory creation and symlinking. + +=item B + +Create or re-create the PostgreSQL tracking table for the schema specified by +the MIGSCHEMA environment variable. If needed, create the migration schema +itself and run migration_tools.init() and build() if the migration_tools schema +exists. + +=item B [file] [...] + +Show status information for either the specified files or all tracked files if +no argument is given. + +=item B [--no-headers|--headers] [file|--no-headers|--headers] [...] + +Add the specified files to the migration tracker. Until --no-headers is +specified, the tracker will assume the files have headers. + +You can do crazy stuff like +B + +=item B [file] [...] + +Remove the specified files from the migration tracker. + +=item B [other arguments...] + +Attempts to invoke B on the specified tracked file, placing the output in +.utf8 + +If given no other arguments, the invocation will lool like + +=over 5 + +iconv -f ISO-8859-1 -t UTF-8 -o .utf8 + +=back + +otherwise, the arguments will be passed through like so + +=over 5 + +iconv [other arguments...] -o .utf8 + +=back + +=item B + +If this is used instead of B, then B will look for an existing +.utf8 and use it instead of attempting to create one. + +=item B [other arguments...] + +Attempts to invoke B on the iconv-converted specified tracked file, +placing the output in .utf8.clean + +If given no other arguments, the invocation will lool like + +=over 5 + +clean_csv --config scripts/clean.conf --fix --apply <--create-headers> + +=back + +otherwise, the arguments will be passed through like so + +=over 5 + +clean_csv [other arguments...] + +=back + +=item B + +If this is used instead of B, then B will look for an existing +.utf8.clean and use it instead of attempting to create one. + +=item B + +Associate the specified file with a parent table within the migration schema. + +Linking multiple files to the same parent table is not allowed currently. + +=item B + +Removes any association between the specified file and a parent table within +the migration schema. + +=item B + +Attempts to invoke B on the .utf8.clean version of the specified +tracked file, creating either [file].utf8.clean.stage.sql or +_stage.sql depending on whether the file has been linked to a +parent table within the migration schema or not. + +If given no other arguments, the invocation will lool like + +=over 5 + +csv2sql --config scripts/clean.conf --add-x-migrate --schema [--parent ] -o <[.utf8.clean.stage.sql]|[parent_table_stage.sql]> .utf8.clean + +=back + +otherwise, the arguments will be passed through like so + +=over 5 + +csv2sql [other arguments...] -o <[.utf8.clean.stage.sql]|[parent_table_stage.sql]> .utf8.clean + +=back + +=item B [other arguments...] + +Load the SQL-converted version of the specified file into the migration schema. + +Extra arguments are passed to the underlying call to psql + +=item B + +Interactive session for analyzing, flagging, and mapping legacy field data to +Evergreen fields. + +Upon exit, generate either [file].clean.map.sql or _map.sql. The +SQL generated will be UPDATE's for setting the Evergreen-specific columns for a +given file's staging tables, and TRUNCATE's and INSERT's for auxilary tables. +The files will have \include hooks for pulling in additional mapping files +(for example, end-user mappings for circ modifiers, etc.) + +=item B [file] + +Writes a MIGSCHEMA.tsv file containing a break-down of mapped and flagged +fields from the specified file, or all staged files if no file is specified. + +The main goal of the tsv file is to present end-user mappable data for circ +modifiers, shelving locations, patron profiles, etc. We use spreadsheets for +this now but may move to a dedicated UI in the future. + +=item B [file] + +Applies the mapping sql to the migration schema for the specified mapped file, +or for all mapped files if no file is specified. + +=item B [file] + +Generates _prod.sql for the specified linked and mapped file, or +all such files if no file is specified. + +=item B [arguments...] + +A wrapper around the psql command. At some point the plan is to shove mig-tracked variables into psql sessions. + +=item B --analyst "Analyst Name" --report_title "Report Title" + +Generates an asciidoc file in the git working directory that can be converted to +any appropriate format. The analyst and report parameters are required. + +Optional parameters are : + +--added_page_title and --added_page_file + +If one is used both must be. The added page file can be plain text or asciidoc. This +adds an extra arbitrary page of notes to the report. Mig assumes the page file is in the mig git directory. + +--tags + +This will define a set of tags to use, if not set it will default to Circs, +Holds, Actors, Bibs, Assets & Money. + +--debug + +Gives more information about what is happening. + +--reports_xml + +Allows you to override the default evergreen_staged_report.xml in the mig-xml folder. + +=item B --pull or --push spreadsheet_tab + +This uses the gsheet_tracked_table and gsheet_tracked column tables to map a Google Docs Spreadsheet tabs +with Postgres tables in the mig schema. The spreadsheet is assumed to share the name as the mig schema. +Tab names must be unique. Each spreadsheet column needs a header that matches the column name in the matching +table. An oauth session key is also needed for your Google account and mig gsheet will look for it in the +.mig directory. + +=back + +=cut + +############################################################################### + +use strict; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use FindBin; +my $mig_bin = "$FindBin::Bin/mig-bin/"; +use lib "$FindBin::Bin/mig-bin"; +use Mig; + +pod2usage(-verbose => 2) if ! $ARGV[0]; +switch($ARGV[0]) { + case "help" { + if (defined $ARGV[1]) { + my $cmd = $mig_bin . "mig-$ARGV[1]"; + if (-e $cmd) { + system( $mig_bin . "mig-$ARGV[1]", '--help' ); + } else { + pod2usage(-verbose => 2); + } + } else { + pod2usage(-verbose => 2); + } + } + case "map" { + } + case "load" { + } + case "wdir" { + print "$MIGWORKDIR\n"; + } + case "gdir" { + print "$MIGBASEGITDIR\n"; + } + case "sdir" { + print "$MIGGITDIR\n"; + } + else { + standard_invocation(@ARGV); + } +} + +sub standard_invocation { + my $cmd = shift; + + if ($cmd ne 'env') { Mig::die_if_no_env_migschema(); } + if (-e $mig_bin . "mig-$cmd") { + system( $mig_bin . "mig-$cmd", @_ ); + } else { + system( "mig-$cmd", @_ ) == 0 or die pod2usage(1); + } + +} + + diff --git a/emig.d/asc/dedupe_process.asciidoc b/emig.d/asc/dedupe_process.asciidoc new file mode 100644 index 0000000..b88ff1b --- /dev/null +++ b/emig.d/asc/dedupe_process.asciidoc @@ -0,0 +1,42 @@ +== Migration Deduplication Process + +The deduplication process covers what is considered a descriptive bibliographic record for items. Other kinds of MARC records such as authorities and MFHDs for serials are not included in the deduplication process. This process is updated periodically as we review the best ways to do this given that metadata in Evergreen changes as do cataloging practices. + +Two important terms to understand as part of the deduplication process are incumbent bibliographic records and incoming bibliographic records. Incumbent records are defined as all the bib records in the library's database except those that are being brought in as part of the migration. Incoming bib records are those that are being imported as part of the migration. The migration deduplication is not tailored to a specific catalog's needs nor meant to be comprehensive of all scenarios. It is meant to reduce duplication of bibs that have common identifiers while preventing merges of materials that should not share a common bib record. It is not an inclusive deduplication, which is to say that duplicate incumbent items are not matched against each other - only incoming materials are matched against incumbent ones. + +The process begins by creating one list of incoming bib records and one of incumbent bib records. Each list is then assigned a set of values. Most of these are from the first value found from a list of possible values. + +* Author - derived from the 100$a, or 110$a, or 111$a, then made all lowercase and some punctuation is removed +* Title - derived from the 245$p and $n combined, then made all lowercase and some punctuation is removed +* Publication Date derived from the 260$c or 264$c with an indicator 2 value of '1', this tracks only the first one found even in the case of a record with multiples and all non-digit values removed +* Identifiers - a list of all the $a & $z from the 020 and 024 with non-alphanumeric values removed +* Search Formats - These are values calculated by Evergreen from MARC attributes, primarily the 007 and 008s to identify kinds of materials. Common values include: blu-ray, book, braille, casaudiiobook, casmusic, cdmusic, dvd, eaudio, electronic, equip, evideo, kit, map, music, microform, phonomusic, picture, score, serial, software and vhs. A single bib record can have multiple search formats depending on what information is present in the bib record. Duplicate search formats are removed, so if information is present due to multiple 008s for more than one book then book is only recorded once. + +A full description of how the physical description fixed fields of the 007 and fixed length data elements of the 008 interact is beyond the scope of the deduplication description but you can find more information at the Library of Congress' web site at https://www.loc.gov/marc/bibliographic/bd007.html and https://www.loc.gov/marc/bibliographic/bd008.html. + +After the lists of incumbent and incoming bibs are created they are compared and a list of potential matches are created. To be considered a potential match the following criteria must be met: + +* One identifier must match between the lists. +* The author must match exactly. +* The title must match exactly. +* The search formats must match. So, for example, if a record has entries for book, large print book and dvd then it will only match to another record that has book, large print book and dvd. If a single format is different between the two records it will not match. +* The publication date must match. + +Once a list of potential matches are built, the scoring begins. Scoring is done for each incoming bib record that there is a potential match among the incumbents for and for each incumbent that is a potential match. + +Scoring is done by evaluating each MARC record and giving it a score between 0 and 2 billion. However, most records have a score between 10 and 100 with some very detailed records getting much higher scores. + +A record's score is built as follows: + +* 1 point for each identifier subfield from the 020, 022 and 024 MARC fields +* 2 points for each subject subfield from the 6xx MARC fields +* 1 point for each title subfield from the 210, 222 and 24x MARC fields as long as they are one of the following descriptive subfields: 'a','b','c','f','g','k','n','p','s', or '0' +* 1 point for each author subfield from the 100, 110, 111 and 130 MARC fields as long as they are one of the following descriptive subfields: 'a','b','c','d','e','f','g','j','k','l','n','p','q','t', or 'u' +* 1 point for each added entry from the 70x, 71x,72x, 73x, 74x, 75x, 80x, 81x, 83x + +Once the scores are created, the highest scoring incumbent bib record is selected as the one for the incoming bib matching it to be merged to, unless the incumbent's score is lower than the incoming bib's, in which case the incoming bib is retained and no merge is created. + +The list of merges is then run as a process. This process is very system intensive and involves merging all of the assets associated with one bib to the other, such as notes, copies, volumes, parts, transfering title level holds and so on. 856 tags from the incoming bib are transferred to the incumbent but no other MARC data is transferred. + +The process runs conservatively and varies but averages 1,000 bibs per hour. + diff --git a/emig.d/bin/Mig.pm b/emig.d/bin/Mig.pm new file mode 100644 index 0000000..f1b97f2 --- /dev/null +++ b/emig.d/bin/Mig.pm @@ -0,0 +1,268 @@ +package Mig; + +use strict; +use Exporter; +use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); + +$VERSION = 1.00; +@ISA = qw(Exporter); +@EXPORT = (); +@EXPORT_OK = qw(); +%EXPORT_TAGS = ( + DEFAULT => [] +); + +use DBI; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); + +sub db_connect { + my $dbh; + if ($PGHOST) { + $dbh = DBI->connect( + "dbi:Pg:host=$PGHOST;dbname=$PGDATABASE;port=$PGPORT" + ,$PGUSER + ,undef + ) || die "Unable to connect to $PGHOST:$PGPORT:$PGDATABASE:$PGUSER : $!\n"; + } else { + $dbh = DBI->connect("dbi:Pg:dbname=$PGDATABASE", "", "") || die "Unable to connect to $PGDATABASE : $!\n"; + } + $dbh->do("SET search_path TO $MIGSCHEMA, evergreen, pg_catalog, public"); + return $dbh; +} + +sub db_disconnect { + my $dbh = shift; + $dbh->disconnect; +} + +sub sql { + my $sql = shift; + chomp $sql; + $sql =~ s/\n//g; + print "\n$sql\n"; + return $sql; +} + +sub die_if_no_env_migschema { + die "MIGSCHEMA environment variable not set. See 'mig env help'\n" + unless $MIGSCHEMA; +} + +sub check_for_db_migschema { + my $dbh = db_connect(); + my $sth = $dbh->prepare(" + SELECT EXISTS( + SELECT 1 + FROM pg_namespace + WHERE nspname = ? + );" + ); + my $rv = $sth->execute($MIGSCHEMA) + || die "Error checking for migration schema ($MIGSCHEMA): $!"; + my @cols = $sth->fetchrow_array; + $sth->finish; + my $found; + if ($cols[0]) { + print "Found migration schema ($MIGSCHEMA) at $PGHOST:$PGPORT:$PGDATABASE:$PGUSER\n"; + $found = 1; + } else { + print "Migration schema ($MIGSCHEMA) does not exist at $PGHOST:$PGPORT:$PGDATABASE:$PGUSER\n"; + $found = 0; + } + db_disconnect($dbh); + return $found; +} + +sub check_db_migschema_for_migration_tables { + my $found = check_db_migschema_for_specific_table('asset_copy'); + if (!$found) { + print "Missing migration tables (such as $MIGSCHEMA.asset_copy)\n"; + } + return $found; +} + +sub check_db_migschema_for_specific_table { + my $table = shift; + my $dbh = db_connect(); + my $sth = $dbh->prepare(" + SELECT EXISTS( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = " . $dbh->quote( $MIGSCHEMA ) . " + AND table_name = " . $dbh->quote( $table ) . " + );" + ); + my $rv = $sth->execute() + || die "Error checking migration schema ($MIGSCHEMA) for table ($table): $!"; + my @cols = $sth->fetchrow_array; + $sth->finish; + my $found; + if ($cols[0]) { + $found = 1; + } else { + $found = 0; + } + db_disconnect($dbh); + return $found; +} + +sub check_for_migration_tools { + my $dbh = db_connect(); + my $sth = $dbh->prepare(" + SELECT EXISTS( + SELECT 1 + FROM pg_namespace + WHERE nspname = 'migration_tools' + );" + ); + my $rv = $sth->execute() + || die "Error checking for migration_tools schema: $!"; + my @cols = $sth->fetchrow_array; + $sth->finish; + db_disconnect($dbh); + return $cols[0]; +} + +sub die_if_no_migration_tools { + if (check_for_migration_tools()) { + print "Found migration_tools schema\n"; + } else { + die "Missing migration_tools schema\n"; + } +} + +sub check_for_mig_tracking_table { + my $dbh = db_connect(); + my $sth = $dbh->prepare(" + SELECT EXISTS( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = " . $dbh->quote( $MIGSCHEMA ) . " + AND table_name = 'tracked_file' + );" + ); + my $rv = $sth->execute() + || die "Error checking for table (tracked_file): $!"; + my @cols = $sth->fetchrow_array; + $sth->finish; + db_disconnect($dbh); + return $cols[0]; +} + +sub die_if_mig_tracking_table_exists { + if (check_for_mig_tracking_table()) { + die "Table $MIGSCHEMA.tracked_file already exists. Bailing init...\n"; + } +} + +sub die_if_mig_tracking_table_does_not_exist { + if (!check_for_mig_tracking_table()) { + die "Table $MIGSCHEMA.tracked_file does not exist. Bailing...\n"; + } +} + +sub check_for_mig_column_tracking_table { + my $dbh = db_connect(); + my $sth = $dbh->prepare(" + SELECT EXISTS( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = " . $dbh->quote( $MIGSCHEMA ) . " + AND table_name = 'tracked_column' + );" + ); + my $rv = $sth->execute() + || die "Error checking for table (tracked_column): $!"; + my @cols = $sth->fetchrow_array; + $sth->finish; + db_disconnect($dbh); + return $cols[0]; +} + +sub die_if_mig_column_tracking_table_exists { + if (check_for_mig_column_tracking_table()) { + die "Table $MIGSCHEMA.tracked_column already exists. Bailing init...\n"; + } +} + +sub die_if_mig_column_tracking_table_does_not_exist { + if (!check_for_mig_column_tracking_table()) { + die "Table $MIGSCHEMA.tracked_column does not exist. Bailing...\n"; + } +} + +sub check_for_tracked_file { + my $file = shift; + my $options = shift; + if (! -e $file) { + die "file not found: $file\n" unless $options && $options->{'allow_missing'}; + } + my $dbh = db_connect(); + my $sth = $dbh->prepare(" + SELECT id + FROM $MIGSCHEMA.tracked_file + WHERE base_filename = " . $dbh->quote( $file ) . ";" + ); + my $rv = $sth->execute() + || die "Error checking table (tracked_file) for base_filename ($file): $!"; + my @cols = $sth->fetchrow_array; + $sth->finish; + db_disconnect($dbh); + return $cols[0]; +} + +sub check_for_tracked_column { + my ($table,$column,$options) = (shift,shift,shift); + my $dbh = db_connect(); + my $sth = $dbh->prepare(" + SELECT id + FROM $MIGSCHEMA.tracked_column + WHERE staged_table = " . $dbh->quote( $table ) . " + AND staged_column = " . $dbh->quote( $column ) . ";" + ); + my $rv = $sth->execute() + || die "Error checking table (tracked_column) for $table.$column: $!"; + my @cols = $sth->fetchrow_array; + $sth->finish; + db_disconnect($dbh); + return $cols[0]; +} + +sub status_this_file { + my $file = shift; + my $dbh = db_connect(); + my $sth = $dbh->prepare(" + SELECT * + FROM $MIGSCHEMA.tracked_file + WHERE base_filename = " . $dbh->quote( $file ) . ";" + ); + my $rv = $sth->execute() + || die "Error retrieving data from table (tracked_file) for base_filename ($file): $!"; + my $data = $sth->fetchrow_hashref; + $sth->finish; + db_disconnect($dbh); + return $data; +} + +sub status_this_column { + my ($table,$column) = (shift,shift); + my $dbh = db_connect(); + my $sth = $dbh->prepare(" + SELECT * + FROM $MIGSCHEMA.tracked_column + WHERE staged_table = " . $dbh->quote( $table ) . " + AND staged_column = " . $dbh->quote( $column ) . ";" + ); + my $rv = $sth->execute() + || die "Error checking table (tracked_column) for $table.$column: $!"; + my $data = $sth->fetchrow_hashref; + $sth->finish; + db_disconnect($dbh); + return $data; +} + +1; + diff --git a/emig.d/bin/mig-add b/emig.d/bin/mig-add new file mode 100755 index 0000000..3e433c5 --- /dev/null +++ b/emig.d/bin/mig-add @@ -0,0 +1,127 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig-add - This will add the specified files to the mig tracking table for the +schema pointed to by the MIGSCHEMA environment variable in the PostgreSQL +database specified by various PG environment variables. + +--headers (the default) and --no-headers are repeatable, and indicate whether +subsequent files have headers or not + +--headers-file specifies a text file defining the column headers for +the next added , which should contain one line per header + +--headers-file will automatically invoke --no-headers + +You'll need to invoke B prior to using commands like B + +=head1 SYNOPSIS + +B [--no-headers|--headers|--headers-file ] [file|--no-headers|--headers|--headers-file ] [...] + +=cut + +############################################################################### + +use strict; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use DBI; +use Cwd 'abs_path'; +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; + +Mig::die_if_no_env_migschema(); +Mig::die_if_mig_tracking_table_does_not_exist(); + +my $has_headers = 1; +my $headers_file; +my $next_arg_is_headers_file = 0; + +foreach my $arg (@ARGV) { + if ($next_arg_is_headers_file) { + $next_arg_is_headers_file = 0; + $headers_file = abs_path($arg); + next; + } + if ($arg eq '--headers') { + $has_headers = 1; + next; + } + if ($arg eq '--no-headers') { + $has_headers = 0; + next; + } + if ($arg eq '--headers-file') { + $next_arg_is_headers_file = 1; + $has_headers = 0; + next; + } + my $file = abs_path($arg); + if ($file =~ /^$MIGBASEWORKDIR/) { + if (-e $file) { + if (-f $file) { + add_this_file($file,$has_headers,$headers_file); + $headers_file = ''; # clear after applying to just one file + } else { + print "Not a real file: $file\n"; + } + } else { + print "Could not find file: $file\n"; + } + } else { + print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; + } +} + +exit 0; + +############################################################################### + +sub add_this_file { + my $file = shift; + my $headers = shift; + my $headers_file = shift; + if ($headers_file) { + if (! (-e $headers_file && -f $headers_file)) { + print "Could not find headers file $headers_file, skipping $file\n"; + return; + } + } + if (Mig::check_for_tracked_file($file)) { + print "File already tracked: $file\n"; + } else { + print 'Adding ('; + if ($headers_file) { + print "with headers file = $headers_file"; + } else { + print ($headers ? ' with headers' : 'without headers'); + } + print '): ' . "$file\n"; + my $dbh = Mig::db_connect(); + my $rv = $dbh->do(" + INSERT INTO $MIGSCHEMA.tracked_file ( + base_filename + ,has_headers + ,headers_file + ) VALUES ( + " . $dbh->quote($file) . " + ," . $dbh->quote($headers) . " + ," . $dbh->quote($headers_file) . " + ); + ") || die "Error inserting into table $MIGSCHEMA.tracked_file: $!\n"; + Mig::db_disconnect($dbh); + } +} + diff --git a/emig.d/bin/mig-bibstats b/emig.d/bin/mig-bibstats new file mode 100755 index 0000000..e0db266 --- /dev/null +++ b/emig.d/bin/mig-bibstats @@ -0,0 +1,206 @@ +#!/usr/bin/perl +# -*- coding: iso-8859-15 -*- +############################################################################### +=pod + +=item B --file foo.mrc + +Reads through a marc file to generate statistical information about the file +for quick analysis. + +--uri_threshold defaults to 1, only shows URI values with more than that +frequency + +--ingore_filetype true will have it not care what file returns as the type and +always treat it as marc21 +=back + +=cut + +############################################################################### + +use strict; +use warnings; + +use Data::Dumper; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use Switch; +use Getopt::Long; +use MARC::Batch; +use MARC::Record; +use MARC::Field; +use Cwd 'abs_path'; +use Cwd qw(getcwd); +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; +use open ':encoding(utf8)'; + +pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; +pod2usage(-verbose => 1) if ! $ARGV[1]; + +my $file; +my $uri_threshold = 1; +my $p_holding_code; +my $p_barcode_subfield; +my $p_ils_name = 'Runtime ILS'; +my $holding_threshold = 50; +my $p_ignore_filetype = 'false'; + +my $ret = GetOptions( + 'file:s' => \$file, + 'uri_threshold:i' => \$uri_threshold, + 'holding_code:s' => \$p_holding_code, + 'barcode:s' => \$p_barcode_subfield, + 'ignore_filetype:s' => \$p_ignore_filetype, + 'ils_name:s' => \$p_ils_name, + 'holding_threshold:s' => \$holding_threshold +); + +if ($p_holding_code and length $p_holding_code != 3) { abort('Holdings codes must be three characters.'); } + +if ($p_barcode_subfield) { + if (!defined $p_holding_code) { abort('A barcode field can not be used without a holding code.'); } + if (length $p_barcode_subfield != 1) { abort('Barcode subfields must be a single character code.'); } +} + +my @ilses = ( + ['Mandarin','852','p'], + ['Evergreen','852','p'], + ['Polaris','852','p'], + ['TLC','949','g'], + ['Koha','952','p'], + ['Sympony','999','i'] +); + +my @temp; +if ($p_holding_code) { + push @temp, $p_ils_name; + push @temp, $p_holding_code; + if ($p_barcode_subfield) { push @temp, lc $p_barcode_subfield; } +} +push @ilses, @temp; + + + +my $batch = MARC::Batch->new('USMARC', $file); +$batch->strict_off(); +my $filetype = `file $file`; +if ($filetype =~ m/MARC21/ or $p_ignore_filetype eq 'true') { print "$filetype.\n" } + else { abort("File is not MARC21."); } + +my $i = 0; +my $uri_count = 0; +my $uri_valid_count = 0; +my $uri_sub9_count = 0; +my $author_sub0 = 0; +my $title_sub0 = 0; +my @uris; +my @fields; +my @codes; +my @holding_code_strings; +my %holding_counts; +my %barcode_counts; + +foreach (@ilses) { + $holding_counts{@$_[0]} = 0; + $barcode_counts{@$_[0]} = 0; +} + +while ( my $record = $batch->next() ) { + $i++; + #check holdings, bit time consuming but more future proof + foreach (@ilses) { + my $ils = @$_[0]; + my $hcode = @$_[1]; + my $barcode = @$_[2]; + my @holding_fields = $record->field($hcode); + my $l = scalar @holding_fields; + my $v = $holding_counts{$ils}; + if ($l) { $holding_counts{$ils} = $v + $l; } + } + #process 856s + @fields = $record->field('856'); + my $ldr = substr $record->leader(), 9, 1; + push @codes, $ldr; + foreach my $f (@fields) { + my $u = $f->subfield('u'); + my $n = $f->subfield('9'); + if (defined $n) { $uri_sub9_count++; } + if (defined $u) { + $uri_count++; + my $ind1 = $f->indicator('1'); + my $ind2 = $f->indicator('2'); + if ($ind1 eq '4') { + if ($ind2 eq '0' or $ind2 eq '1') { $uri_valid_count++; } + } + my $ustring = lc $f->as_string('u'); + $ustring =~ s/http:\/\///; + $ustring =~ s/ftp:\/\///; + $ustring =~ s/https:\/\///; + $ustring =~ s/\/.*//; + push @uris, $ustring; + } + } + #check for authority linking on 100s and 245s, if present may need to scrub them + @fields = $record->field('100'); + foreach my $f (@fields) { + my $t = $f->subfield('0'); + if (defined $t) { $title_sub0++; } + } + @fields = $record->field('245'); + foreach my $f (@fields) { + my $t = $f->subfield('0'); + if (defined $t) { $author_sub0++; } + } + if(($i % 1000) == 0) { print "Processing bib $i.\n"; } +} + +my %uri_counts; +$uri_counts{$_}++ for @uris; + +my %code_counts; +$code_counts{$_}++ for @codes; + +print "\n$filetype\n"; +print "$i bibs read in file\n\n"; + +print "=== Leader 09 codes\n"; +foreach my $key (keys %code_counts) { + my $value = $code_counts{$key}; + print "=== $key $value\n"; +} +print "\n"; + +print "$uri_count 856 fields with a subfield u\n"; +print "$uri_valid_count 856 fields with a subfield u and valid indicators\n"; +print "$uri_sub9_count 856 fields have subfield 9s\n"; +print "$title_sub0 100 fields have a subfield 0\n"; +print "$author_sub0 245 fields have a subfield 0\n"; + +print "\n=== Holdings Analysis\n"; +foreach my $key (keys %holding_counts) { + my $c = $holding_counts{$key}; + if (((100/$i)*$c) >= $holding_threshold) { print "Could be $key $holding_counts{$key} holdings tags\n"; } +} + +print "\nURI values are domains and filtered to only show those with more than $uri_threshold\n"; +foreach my $key (keys %uri_counts) { + my $value = $uri_counts{$key}; + if ($value > $uri_threshold) { print "=== $key $value\n"; } +} + +close $file; + +########### functions + +sub abort { + my $msg = shift; + print STDERR "$0: $msg", "\n"; + exit 1; +} diff --git a/emig.d/bin/mig-clean b/emig.d/bin/mig-clean new file mode 100755 index 0000000..b9cb013 --- /dev/null +++ b/emig.d/bin/mig-clean @@ -0,0 +1,127 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig-clean + +Attempts to invoke B on the specified tracked file, placing the +output in [file].clean + +If given no other arguments, the invocation will lool like + +=over 5 + +clean_csv --config scripts/clean.conf --fix --apply [--create-headers|--use-headers ] + +=back + +otherwise, the arguments will be passed through like so + +=over 5 + +clean_csv [other arguments...] + +=back + +You'll need to invoke B or B prior to using commands +like B + +=head1 SYNOPSIS + +B [other arguments...] + +=cut + +############################################################################### + +use strict; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use DBI; +use Cwd 'abs_path'; +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; + +Mig::die_if_no_env_migschema(); +Mig::die_if_mig_tracking_table_does_not_exist(); + +my $file = abs_path($ARGV[0]); +if ($file =~ /^$MIGBASEWORKDIR/) { + call_clean_csv(@ARGV); +} else { + print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; +} + +exit 0; + +############################################################################### + +sub call_clean_csv { + my $file = abs_path(shift); + my @args = @_; + + my $tracked_file_id = Mig::check_for_tracked_file($file); + if ($tracked_file_id) { + my $data = Mig::status_this_file($file); + + if (! $data->{'utf8_filename'}) { + die "mig-iconv or mig-skip-iconv needed for UTF8 version of file: $file\n"; + } + + my $utf8_file = $data->{'utf8_filename'}; + if (! -e $utf8_file) { + die "missing file: $utf8_file\n"; + } + + print "cleaning tracked file: $file\n"; + + if (scalar(@args) == 0) { + @args = ( + '--config' + ,'scripts/clean.conf' + ,'--fix' + ,'--apply' + ,'--backslash' + ,'--pad' + ); + if (! $data->{'has_headers'}) { + if ($data->{'headers_file'}) { + push @args, '--use-headers'; + push @args, $data->{'headers_file'}; + } else { + push @args, '--create-headers'; + } + } + } + + print join(' ',@args) . "\n"; + system('clean_csv', @args, $utf8_file); + + my $dbh = Mig::db_connect(); + my $clean_file = $dbh->quote($utf8_file . '.clean'); + if (! -e $utf8_file . '.clean') { + print "clean file does not exist: $clean_file\n"; + $clean_file = $dbh->quote(''); + } + + my $rv = $dbh->do(" + UPDATE $MIGSCHEMA.tracked_file + SET clean_filename = $clean_file + WHERE base_filename = " . $dbh->quote($file) . " + ; + ") || die "Error inserting into table $MIGSCHEMA.tracked_file: $!\n"; + Mig::db_disconnect($dbh); + } else { + print "File not currently tracked: $file\n"; + } +} diff --git a/emig.d/bin/mig-convert b/emig.d/bin/mig-convert new file mode 100755 index 0000000..6fe2172 --- /dev/null +++ b/emig.d/bin/mig-convert @@ -0,0 +1,131 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig-convert + +Attempts to invoke B on the .utf8.clean version of the specified +tracked file, creating either [file].utf8.clean.stage.sql or +_stage.sql depending on whether the file has been linked to a +parent table within the migration schema or not. + +If given no other arguments, the invocation will lool like + +=over 5 + +csv2sql --config scripts/clean.conf --add-x-migrate --schema [--parent ] --outfile <[.utf8.clean.stage.sql]|[parent_table_stage.sql]> .utf8.clean + +=back + +otherwise, the arguments will be passed through like so + +=over 5 + +csv2sql [other arguments...] --schema [--parent ] --outfile <[.utf8.clean.stage.sql]|[parent_table_stage.sql]> .utf8.clean + +=back + +=head1 SYNOPSIS + +B [other arguments...] + +=cut + +############################################################################### + +use strict; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use DBI; +use Cwd 'abs_path'; +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; + +Mig::die_if_no_env_migschema(); +Mig::die_if_mig_tracking_table_does_not_exist(); + +my $file = abs_path($ARGV[0]); +if ($file =~ /^$MIGBASEWORKDIR/) { + call_convert_csv(@ARGV); +} else { + print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; +} + +exit 0; + +############################################################################### + +sub call_convert_csv { + my $file = abs_path(shift); + my @args = @_; + + my $stage_sql_filename; + my $tracked_file_id = Mig::check_for_tracked_file($file); + if ($tracked_file_id) { + my $data = Mig::status_this_file($file); + + if (! $data->{'utf8_filename'}) { + die "mig-iconv or mig-skip-iconv needed for UTF8 version of file: $file\n"; + } + + if (! $data->{'clean_filename'}) { + die "mig-clean or mig-skip-clean needed for .clean version of file: $file\n"; + } + + my $clean_file = $data->{'clean_filename'}; + if (! -e $clean_file) { + die "missing file: $clean_file\n"; + } + + print "converting tracked file: $file\n"; + + if (scalar(@args) == 0) { + @args = ( + '--config' + ,'scripts/clean.conf' + ,'--add-x-migrate' + ); + } + push @args, '--use-no-headers-file'; + push @args, '--schema'; + push @args, $MIGSCHEMA; + if ($data->{'parent_table'}) { + push @args, '--parent'; + push @args, $data->{'parent_table'}; + $stage_sql_filename = $data->{'parent_table'} . '.stage.sql'; + } else { + $stage_sql_filename = "$clean_file.stage.sql"; + } + push @args, '--outfile'; + push @args, $stage_sql_filename; + + print "args: " . join(',',@args) . "\n"; + system('csv2sql', @args, $clean_file); + + my $dbh = Mig::db_connect(); + if (! -e $stage_sql_filename) { + print "SQL converted file does not exist: $stage_sql_filename\n"; + $stage_sql_filename = ''; + } + + my $rv = $dbh->do(" + UPDATE $MIGSCHEMA.tracked_file + SET stage_sql_filename = " . $dbh->quote($stage_sql_filename) . " + WHERE base_filename = " . $dbh->quote($file) . " + ; + ") || die "Error updating table $MIGSCHEMA.tracked_file: $!\n"; + Mig::db_disconnect($dbh); + } else { + print "File not currently tracked: $file\n"; + } +} diff --git a/emig.d/bin/mig-dump b/emig.d/bin/mig-dump new file mode 100755 index 0000000..57edeab --- /dev/null +++ b/emig.d/bin/mig-dump @@ -0,0 +1,88 @@ +#!/usr/bin/perl + +############################################################################### +=pod + +=head1 NAME + +mig-dump + +A wrapper around the pg_dump command that saves a table in the mig schema with a time stamp in the working directory. + +=head1 SYNOPSIS + +B [arguments...] + +=cut + +############################################################################### + +use strict; +use warnings; + +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use Switch; +use Cwd 'abs_path'; +use Cwd qw(getcwd); +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; +use open ':encoding(utf8)'; + +pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; +pod2usage(-verbose => 1) if ! $ARGV[1]; + +my $fh; +my $outfile; + +my $table; +my $next_arg_is_table; + +foreach my $arg (@ARGV) { + if ($arg eq '--table') { + $next_arg_is_table = 1; + next; + } + if ($next_arg_is_table) { + $table = $arg; + $next_arg_is_table = 0; + next; + } +} + +my $outfilewpath = create_dumpfile_name($table); + +my $syscmd = 'pg_dump --format plain --data-only --file ' . $outfilewpath . ' --table ' . $MIGSCHEMA . '.' . $table . ' ' . $PGUSER; + +print "pgdump command: \n"; +print "$syscmd\n"; + +system($syscmd); + +####### beyond here be functions + +sub create_dumpfile_name { + my $table_name = shift; + $table_name =~ s/\./_/; + my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); + $year += 1900; + my $date = $year . '-' . $mon . '-' . $mday; + my $dump_file = $table_name . ' ' . $date . '.pg'; + $dump_file =~ s/ /_/g; + $dump_file = $MIGGITDIR . $dump_file; + print "$dump_file \n"; + return $dump_file; +} + +sub abort { + my $msg = shift; + print STDERR "$0: $msg", "\n"; + exit 1; +} + + diff --git a/emig.d/bin/mig-env b/emig.d/bin/mig-env new file mode 100755 index 0000000..dceec4f --- /dev/null +++ b/emig.d/bin/mig-env @@ -0,0 +1,268 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig-env - This tool is for tracking and setting environment variables used by +B and its sub-tools. + +=head1 SYNOPSIS + +B + +B [migration_schema] + +B [orig_migration_schema] [new_migration_schema] + +B + +B + +=head1 DESCRIPTION + +For most invocations, B will either create or use a migration-specific +file (~/.mig/.env) for setting the following environment +variables: + +=over 15 + +=item MIGSCHEMA + +The name of the migration schema. Convention has this being a single lowercased +word or acronym identifying the library, prefixed with 'm_'. + +=item MIGWORKDIR + +The base working directory for containing migration data, scripts, and other +files. + +=item PGHOST + +The IP address or hostname for the PostgreSQL database used for a migration. + +=item PGPORT + +The TCP port for the PostgreSQL database. + +=item PGUSER + +The PostgreSQL user to use for the database. + +=item PGDATABASE + +The name of the actual database containing the migration schema. + +=back + +This script may also setup a symlink from a specified Git repository to a +scripts/ directory within the migration work directory. The default for this is +~/git/migration-work/MIGSCHEMA --> MIGWORKDIR/scripts + +It may also create the migration work directory if necessary. + +=head1 COMMANDS + +=over 15 + +=item B + +This invocation will prompt for various values and create a .env file for the +specified migration schema, and a symlink between the specified Git repository +and migration work directory (which will also be created if needed). + +=item B + +This command will spawn a bash shell that executes the corresponding +~/.mig/.env script for setting up environment variables encoded during +B. + +=item B [schema] + +This command will show the contents of the corresponding ~/.mig/.env +script, or, if no schema is specified, then it will list pertinent variables in +the current environment if they exist. + +=item B [orig schema] [new schema] + +This command will create a "shallow" clone of the orig schema, in that it will +share database credentials as well as git and data directories, but will have a +separate schema name. + +=item B + +This command will list migration schemas found in ~/.mig + +=item B + +Display the documentation you're reading now. + +=back + +=cut + +############################################################################### + +use strict; +use 5.012; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use File::Path qw(make_path); +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; + +pod2usage(-verbose => 2) if ! $ARGV[0]; + +my $migration_schema = $ARGV[1] || ''; +my $filename = "$HOME/.mig/$migration_schema.env"; +switch($ARGV[0]) { + case "--help" { + pod2usage(-verbose => 2); + } + case "help" { + pod2usage(-verbose => 2); + } + case "create" { + pod2usage(-verbose => 1) if ! $ARGV[1]; + mig_env_create(); + } + case "clone" { + pod2usage(-verbose => 1) if ! $ARGV[2]; + $migration_schema = $ARGV[2] || ''; + $filename = "$HOME/.mig/$migration_schema.env"; + mig_env_clone(); + } + case "use" { + pod2usage(-verbose => 1) if ! $ARGV[1]; + if (-e $filename) { + exec '/bin/bash', '--init-file', $filename; + } else { + die "\n$filename does not exist\n"; + } + } + case "show" { + if (-e $filename) { + exec '/bin/cat', $filename; + } else { + print `env | sort | egrep 'MIG|PG'`; + } + } + case "list" { + opendir(my $dh, "$HOME/.mig") || die "can't open $HOME/.mig: $!"; + while (readdir $dh) { + if (/^(.*)\.env$/) { + print "$1\n"; + } + } + closedir $dh; + } + else { + pod2usage(1); + } +} + +sub mig_env_create { + if (-e $filename) { + print "Re-Creating $filename\n"; + print `cat $filename`; + } else { + print "Creating $filename\n"; + } + print "\n"; + + # directories + + $MIGBASEWORKDIR = "$HOME/data/" unless $MIGBASEWORKDIR; + my $migworkdir_default = "$MIGBASEWORKDIR$migration_schema/"; + print "Main work directory (default $migworkdir_default): "; + my $MIGWORKDIR = ; + chomp $MIGWORKDIR; + if (! $MIGWORKDIR) { + $MIGWORKDIR = $migworkdir_default; + } + $MIGBASEGITDIR = "$HOME/git/migration-work/" unless $MIGBASEGITDIR; + my $miggitdir_default = "${MIGBASEGITDIR}/$migration_schema/"; + print "git repo for migration-specific scripts (default $miggitdir_default): "; + my $MIGGITDIR = ; + chomp $MIGGITDIR; + if (! $MIGGITDIR) { + $MIGGITDIR = $miggitdir_default; + } + + # PostgreSQL + + $PGHOST = 'localhost' unless $PGHOST; + my $pghost_default = $PGHOST; + print "PGHOST (default $pghost_default): "; + $PGHOST = ; + chomp $PGHOST; + if (! $PGHOST) { + $PGHOST = $pghost_default; + } + $PGPORT = 5432 unless $PGPORT; + my $pgport_default = $PGPORT; + print "PGPORT (default $pgport_default): "; + $PGPORT = ; + chomp $PGPORT; + if (! $PGPORT) { + $PGPORT = $pgport_default; + } + $PGDATABASE = 'evergreen' unless $PGDATABASE; + my $pgdatabase_default = $PGDATABASE; + print "PGDATABASE (default $pgdatabase_default): "; + $PGDATABASE = ; + chomp $PGDATABASE; + if (! $PGDATABASE) { + $PGDATABASE = $pgdatabase_default; + } + $PGUSER = $PGDATABASE unless $PGUSER; + my $pguser_default = $PGUSER; + print "PGUSER (default $pguser_default): "; + my $PGUSER = ; + chomp $PGUSER; + if (! $PGUSER) { + $PGUSER = $pguser_default; + } + + # create files and directories if needed + + mkdir "$HOME/.mig"; + make_path($MIGGITDIR, { verbose => 1 }); + `touch $MIGGITDIR/README`; + make_path($MIGWORKDIR, { verbose => 1 }); + symlink $MIGGITDIR, "$MIGWORKDIR/scripts"; + open FILE, ">$filename"; + print FILE "export PGHOST=$PGHOST\n"; + print FILE "export PGPORT=$PGPORT\n"; + print FILE "export PGDATABASE=$PGDATABASE\n"; + print FILE "export PGUSER=$PGUSER\n"; + print FILE "export PGOPTIONS='-c search_path=$migration_schema,public,evergreen'\n"; + print FILE "export MIGENVPROMPT=$migration_schema\n"; + print FILE "export MIGSCHEMA=$migration_schema\n"; + print FILE "export MIGBASEWORKDIR=$MIGBASEWORKDIR\n"; + print FILE "export MIGWORKDIR=$MIGWORKDIR\n"; + print FILE "export MIGBASEGITDIR=$MIGBASEGITDIR\n"; + print FILE "export MIGGITDIR=$MIGGITDIR\n"; + print FILE "alias wcd='cd `mig wdir`'\n"; + print FILE "alias gcd='cd `mig gdir`'\n"; + print FILE "alias scd='cd `mig sdir`'\n"; + print FILE "source ~/.profile\n"; + print FILE "env | sort | egrep 'PG|MIG'\n"; + print FILE 'echo shell PID = $$' . "\n"; + close FILE; +} + +sub mig_env_clone { + my $orig_migration_schema = $ARGV[1] || ''; + my $orig_filename = "$HOME/.mig/$orig_migration_schema.env"; + `cp $orig_filename $filename`; + `sed -i 's/export PGOPTIONS=.*/export PGOPTIONS='"'"'-c search_path=$migration_schema,public,evergreen'"'"'/' $filename`; + `sed -i 's/export MIGENVPROMPT=.*/export MIGENVPROMPT=$migration_schema/' $filename`; + `sed -i 's/export MIGSCHEMA=.*/export MIGSCHEMA=$migration_schema/' $filename`; +} + diff --git a/emig.d/bin/mig-gsheet b/emig.d/bin/mig-gsheet new file mode 100755 index 0000000..5975bd6 --- /dev/null +++ b/emig.d/bin/mig-gsheet @@ -0,0 +1,419 @@ +#!/usr/bin/perl + +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Net::Google::Spreadsheets; +use Net::Google::DataAPI::Auth::OAuth2; +use Net::OAuth2::AccessToken; +use Storable; +use DBI; +use FindBin; +use lib "$FindBin::Bin/"; +my $mig_bin = "$FindBin::Bin/"; +use Mig; +use strict; +use Switch; +use Cwd 'abs_path'; +use Pod::Usage; +use Data::Dumper; +use DateTime; + +pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; + +Mig::die_if_no_env_migschema(); +die_if_gsheet_tracked_table_does_not_exist(); +die_if_gsheet_tracked_column_does_not_exist(); + +my $cmd_push; +my $next_arg_is_push; +my $cmd_pull; +my $next_arg_is_pull; +my @worksheet_names; +my $cmd_export = 0; +my @table_names; +my $sql; +my $sth; +my @ws; +my @tracked_ws_names; +my $authfile = $ENV{HOME} . '/.mig/oauth.env'; +my $next_arg_is_authfile; + +foreach my $arg (@ARGV) { + if ($arg eq '--push') { + $next_arg_is_push = 1; + next; + } + if ($next_arg_is_push) { + $cmd_push = $arg; + $next_arg_is_push = 0; + next; + } + if ($arg eq '--pull') { + $next_arg_is_pull = 1; + next; + } + if ($next_arg_is_pull) { + $cmd_pull = $arg; + $next_arg_is_pull = 0; + next; + } + if ($arg eq '--authfile') { + $next_arg_is_authfile = 1; + next; + } + if ($next_arg_is_authfile) { + $authfile = $arg; + $next_arg_is_authfile = 0; + next; + } + if ($arg eq '--export') { + $cmd_export = 1; + next; + } +} + +abort('must specify --push (db->worksheets) or --pull (worksheets->db)') unless (defined $cmd_push or defined $cmd_pull); +if (defined $cmd_push and defined $cmd_pull) { abort('you can not specify both a --push and --pull on the same command'); } + +my $clientid; +my $clientsecret; +my $sessionfile; + +open (my $fh, '<', $authfile) or abort("Could not open $authfile"); +while (my $var = <$fh>) { + chomp $var; + my ($var1, $var2) = split /=/,$var; + if ($var1 eq 'CLIENTID') { $clientid = $var2; } + if ($var1 eq 'CLIENTSECRET') { $clientsecret = $var2; } + if ($var1 eq 'SESSIONFILE') { $sessionfile = $var2; } +} +my $dbh = Mig::db_connect(); +my $spreadsheet = connect_gsheet($clientid,$clientsecret,$sessionfile); +abort('could not connect to google sheet') unless (defined $spreadsheet); + +$sql = 'SELECT tab_name FROM gsheet_tracked_table;'; +$sth = $dbh->prepare($sql); +my $ra = $sth->execute(); +while (my @row = $sth->fetchrow_array) { + push @tracked_ws_names, $row[0]; +} + +if (defined $cmd_pull) { + print "Pulling "; + if ($cmd_pull eq 'all') { + print "all worksheets.\n"; + @ws = $spreadsheet->worksheets; + foreach my $wsn (@ws) { push @worksheet_names, $wsn->title; } + } else { + print "only worksheet $cmd_pull.\n"; + if (!defined $cmd_pull) { abort('command incomplete'); } + push @worksheet_names, $cmd_pull; + } + my @m = array_match(\@worksheet_names,\@tracked_ws_names); + foreach my $w (@m) { + my $pull_ws = $spreadsheet->worksheet( {title => $w} ); + my $push_tb = get_table_name($w,$dbh); + my @rows = $pull_ws->rows; + my @content; + map { $content[$_->row - 1][$_->col - 1] = $_->content } $pull_ws->cells; + my @tab_headers = shift @content; + my $tab_headers_length = $#{ $tab_headers[0] }; + my @pg_headers; + for my $i ( 0 .. $tab_headers_length ) { + push @pg_headers, $tab_headers[0][$i]; + } + shift @content; + #todo: check for clean headers at some point ... + truncate_table($push_tb,$dbh); + print "Inserting from $w to $push_tb.\n"; + for my $j (@content) { + insert_row($MIGSCHEMA,$push_tb,$dbh,\@pg_headers,$j); + } + timestamp($push_tb,$dbh,'pull'); + if ($cmd_export == 1) { export_table($dbh,$push_tb); } + } +} + +if (defined $cmd_push) { + print "Pushing "; + my @tab_names; + if ($cmd_push eq 'all') { + print "all worksheets.\n"; + $sql = 'SELECT tab_name FROM gsheet_tracked_table;'; + $sth = $dbh->prepare($sql); + $ra = $sth->execute(); + while (my @row = $sth->fetchrow_array) { + push @tab_names, $row[0]; + } + } else { + print "only worksheet $cmd_push.\n"; + if (!defined $cmd_push) { abort('command incomplete'); } + push @tab_names, $cmd_push; + } + foreach my $push_ws_name (@tab_names) { + my $pull_tb = get_table_name($push_ws_name,$dbh); + my @table_headers = get_pg_column_headers($pull_tb,$MIGSCHEMA); + print "worksheetname: $push_ws_name\n"; + my $push_ws = $spreadsheet->worksheet( {title => $push_ws_name} ); + if (!defined $push_ws) { next; } + my @rows; + my $i = 0; + foreach my $rth (@table_headers) { $rows[0][$i] = $rth; $i++; } + $sql = "SELECT * FROM $pull_tb;"; + $sth = $dbh->prepare($sql); + $sth->execute(); + my $grabhash = $sth->fetchall_arrayref({}); + erase_sheet($push_ws,$push_ws_name); + + #get from postgres the headers to use in the sheet from tracked columns + $sql = 'SELECT column_name FROM gsheet_tracked_column WHERE table_id = (SELECT id FROM gsheet_tracked_table WHERE table_name = \'' . $pull_tb . '\')'; + $sth = $dbh->prepare($sql); + $sth->execute(); + my $sheet_headers = $sth->fetchall_arrayref(); + my $sheet_headers_length = @$sheet_headers; + #now I need to do new rows using those headers + my @content; + foreach my $row ( @{$grabhash} ) { + my $record = {}; + for my $column ( sort keys %{ $row } ) { + #print Dumper(@$sheet_headers); + #print "column: $column\n"; + my $clean_column = $column; + $clean_column =~ s/_//g; + if ( $column ~~ @$sheet_headers ) { + $record->{$clean_column} = $row->{$column}; + } + } + push @content, $record; + } + print "Writing to $push_ws_name\n"; + foreach my $fillsheet (@content) { + my $new_row = $push_ws->add_row ( + $fillsheet + ); + } + timestamp($pull_tb,$dbh,'push'); + if ($cmd_export == 1) { export_table($dbh,$pull_tb); } + } +} + +sub export_table { + my $dbh = shift; + my $table = shift; + + my $dt = DateTime->now; + my $date = $dt->ymd; + my $hms = $dt->hms; + my $efile = $MIGGITDIR . $table . '_' . $date . '_' . $hms . '.tsv'; + my @data; + my $record_count = 0; + $dbh->do("COPY $table TO STDOUT CSV DELIMITER E'\t' HEADER;"); + 1 while $dbh->pg_getcopydata(\$data[$record_count++]) >= 0; + open (my $eout, '>', $efile) or abort("Could NOT open $efile."); + foreach my $d (@data) { + print $eout $d; + } + print "$efile written.\n"; + close $eout; + return; +} + +sub die_if_gsheet_tracked_table_does_not_exist { + if (!check_for_gsheet_tracked_table()) { + die "Table gsheet_tracked_table does not exist. Bailing...\n"; + } +} + +sub array_match { + my ($xa,$xb) = @_; + my @a = @{ $xa }; + my @b = @{ $xb }; + my @r; + + foreach my $av (@a) { + foreach my $bv (@b) { + if ($av eq $bv) { push @r, $bv; } + } + } + return @r; +} + +sub get_pg_column_headers { + my $table_name = shift; + my $schema_name = shift; + my @headers; + my $dbh = Mig::db_connect(); + $sql = 'SELECT column_name FROM information_schema.columns WHERE table_schema = ' . $dbh->quote( $schema_name ) . ' AND table_name = ' . $dbh->quote( $table_name ) . ';'; + $sth = $dbh->prepare($sql); + $ra = $sth->execute(); + while (my @row = $sth->fetchrow_array) { + push @headers, $row[0]; + } + return @headers; +} + +sub erase_sheet { + my $ws = shift; + my $ws_name = shift; + + print "Erasing $ws_name.\n"; + my @rows = $ws->rows; + splice @rows, 0, 1; + my $i = @rows; + while ($i > 0) { + my $row = pop @rows; + $row->delete; + $i--; + } + return; +} + +sub check_for_gsheet_tracked_table { + my $dbh = Mig::db_connect(); + my $sth = $dbh->prepare(" + SELECT EXISTS( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = " . $dbh->quote( $MIGSCHEMA ) . " + AND table_name = 'gsheet_tracked_table' + );" + ); + my $rv = $sth->execute() + || die "Error checking for table (tracked_gsheet_table): $!"; + my @cols = $sth->fetchrow_array; + $sth->finish; + Mig::db_disconnect($dbh); + return $cols[0]; +} + +sub die_if_gsheet_tracked_column_does_not_exist { + if (!check_for_gsheet_tracked_column()) { + die "Table $MIGSCHEMA.gsheet_tracked_column does not exist. Bailing...\n"; + } +} + +sub get_table_name { + my $worksheet = shift; + my $dbh = shift; + + my $sql = 'SELECT table_name FROM gsheet_tracked_table WHERE tab_name = \'' . $worksheet . '\';'; + my $sth = $dbh->prepare($sql); + my $xs = $sth->execute(); + my $table_name; + while (my @row = $sth->fetchrow_array) { + $table_name = $row[0]; + } + + return $table_name; +} + +#sub get_worksheet_name { +# my $table = shift; +# my $dbh = shift; +# +# my $sql = 'SELECT tab_name FROM gsheet_tracked_table WHERE table_name = \'' . $table . '\';'; +# print "$sql \n"; +# my $sth = $dbh->prepare($sql); +# my $xs = $sth->execute(); +# my $worksheet_name; +# while (my @row = $sth->fetchrow_array) { +# $worksheet_name = $row[0]; +# } +# +# return $worksheet_name; +#} + + +sub check_for_gsheet_tracked_column { + my $dbh = Mig::db_connect(); + my $sth = $dbh->prepare(" + SELECT EXISTS( + SELECT 1 + FROM information_schema.tables + WHERE table_schema = " . $dbh->quote( $MIGSCHEMA ) . " + AND table_name = 'gsheet_tracked_column' + );" + ); + my $rv = $sth->execute() + || die "Error checking for table (gsheet_tracked_column): $!"; + my @cols = $sth->fetchrow_array; + $sth->finish; + Mig::db_disconnect($dbh); + return $cols[0]; +} + +sub insert_row { + my ($schema, $table, $dbh, $headers_ref, $row_ref) = @_; + my @headers = @{ $headers_ref }; + my @row_data = @{ $row_ref }; + + my $header_string = '(' . join(",", @headers) . ')'; + map {s/\'/\'\'/g; } @row_data; + my $row_string = '(' . join(",", map {qq/'$_'/} @row_data) . ')'; + #print "INSERT INTO $schema.$table $header_string VALUES $row_string\n"; + $dbh->do(qq/ + INSERT INTO $schema.$table $header_string VALUES $row_string ; + /); +} + +sub timestamp { + my ($table, $dbh, $action) = @_; + + my $column; + if ($action eq 'pull') { $column = 'last_pulled' } + else { $column = 'last_pushed' }; + + $dbh->do(qq/ + UPDATE gsheet_tracked_table SET $column = NOW() WHERE table_name = '$table'; + /); + +} + + +sub truncate_table { + my $table = shift; + my $dbh = shift; + + $dbh->do(qq/ + TRUNCATE TABLE $table;; + /); + print "Table $table truncated.\n"; +} + +sub abort { + my $msg = shift; + print STDERR "$0: $msg", "\n"; + exit 1; +} + +sub connect_gsheet { + + my ($clientid,$clientsecret,$sessionfile) = @_; + + my $oauth2 = Net::Google::DataAPI::Auth::OAuth2->new( + client_id => $clientid, + client_secret => $clientsecret, + scope => ['http://spreadsheets.google.com/feeds/'], + redirect_uri => 'https://developers.google.com/oauthplayground', + ); + if ($sessionfile =~ m/~/) {$sessionfile =~ s/~/$ENV{HOME}/; } + my $session = retrieve($sessionfile); + my $restored_token = Net::OAuth2::AccessToken->session_thaw( + $session, + auto_refresh => 1, + profile => $oauth2->oauth2_webserver, + ); + $oauth2->access_token($restored_token); + my $service = Net::Google::Spreadsheets->new(auth => $oauth2); + + my $spreadsheet = $service->spreadsheet( + { + title => $MIGSCHEMA + } + ); + return $spreadsheet; +} + + diff --git a/emig.d/bin/mig-iconv b/emig.d/bin/mig-iconv new file mode 100755 index 0000000..88acdd0 --- /dev/null +++ b/emig.d/bin/mig-iconv @@ -0,0 +1,107 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig-iconv + +Attempts to invoke B on the specified tracked file, placing the +output in [file].iconv + +If given no other arguments, the invocation will lool like + +=over 5 + +iconv -f ISO-8859-1 -t UTF-8 -o .utf8 + +=back + +otherwise, the arguments will be passed through like so + +=over 5 + +iconv [other arguments...] -o .utf8 + +=back + +You'll need to invoke B prior to using commands like B + +=head1 SYNOPSIS + +B [other arguments...] + +=cut + +############################################################################### + +use strict; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use DBI; +use Cwd 'abs_path'; +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; + +Mig::die_if_no_env_migschema(); +Mig::die_if_mig_tracking_table_does_not_exist(); + +my $file = abs_path($ARGV[0]); +if ($file =~ /^$MIGBASEWORKDIR/) { + call_iconv(@ARGV); +} else { + print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; +} + +exit 0; + +############################################################################### + +sub call_iconv { + my $file = abs_path(shift); + my @args = @_; + + my $tracked_file_id = Mig::check_for_tracked_file($file); + if ($tracked_file_id) { + my $data = Mig::status_this_file($file); + print "iconv'ing tracked file: $file\n"; + + if (scalar(@args) == 0) { + @args = ( + '-f' + ,'ISO-8859-1' + ,'-t' + ,'UTF-8' + ,'--verbose' + ); + } + + system('iconv', @args, '-o', $file . '.utf8', $file); + system('touch', $file . '.utf8'); # handle 0-byte files + + my $dbh = Mig::db_connect(); + my $utf8_file = $dbh->quote($file . '.utf8'); + if (! -e $file . '.utf8') { + print "utf8 file does not exist: $utf8_file\n"; + $utf8_file = $dbh->quote(''); + } + + my $rv = $dbh->do(" + UPDATE $MIGSCHEMA.tracked_file + SET utf8_filename = $utf8_file + WHERE base_filename = " . $dbh->quote($file) . " + ; + ") || die "Error inserting into table $MIGSCHEMA.tracked_file: $!\n"; + Mig::db_disconnect($dbh); + } else { + print "File not currently tracked: $file\n"; + } +} diff --git a/emig.d/bin/mig-init b/emig.d/bin/mig-init new file mode 100755 index 0000000..98f92b5 --- /dev/null +++ b/emig.d/bin/mig-init @@ -0,0 +1,93 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig-init - This will add or recreate tracking tables for the B toolset to +the migration schema specified by the MIGSCHEMA environment variable, in the +PostgreSQL database specified by various PG environment variables. + +In practice, you should invoke 'mig env use schema_name' prior to calling +B + +=head1 SYNOPSIS + +B + +B + +=cut + +############################################################################### + +use strict; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use DBI; +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +my $mig_sql = $mig_bin . "../mig-sql/init/"; +use lib "$FindBin::Bin/"; +use Mig; + +pod2usage(-verbose => 2) if $ARGV[0]; + +Mig::die_if_no_env_migschema(); + +if (! Mig::check_for_db_migschema()) { + try_to_create_schema(); +} + +if (! Mig::check_db_migschema_for_migration_tables()) { + try_to_init_schema_with_migration_tools(); +} +Mig::die_if_mig_tracking_table_exists(); +Mig::die_if_mig_column_tracking_table_exists(); +loop_through_mig_sql_templates(); + +exit 0; + +############################################################################### + +sub try_to_create_schema { + if ($MIGSCHEMA =~ /[^\w_]/) { + die "$MIGSCHEMA is not suitable for a schema name in PostgreSQL\n"; + } + my $dbh = Mig::db_connect(); + my $rv = $dbh->do("CREATE SCHEMA $MIGSCHEMA;") + || die "Error creating migration schema ($MIGSCHEMA): $!\n"; + print "Created schema $MIGSCHEMA\n"; + Mig::db_disconnect($dbh); +} + +sub try_to_init_schema_with_migration_tools { + Mig::die_if_no_migration_tools(); + print "Calling migration_tools.init() and .build()\n"; + my $dbh = Mig::db_connect(); + my $rv = $dbh->do("SELECT migration_tools.init(" . $dbh->quote($MIGSCHEMA) . ");") + || die "Error running migration_tools.init($MIGSCHEMA): $!\n"; + print "migration_tools.init() finished\n"; + my $rv2 = $dbh->do("SELECT migration_tools.build(" . $dbh->quote($MIGSCHEMA) . ");") + || die "Error running migration_tools.build($MIGSCHEMA): $!\n"; + print "migration_tools.build() finished\n"; + Mig::db_disconnect($dbh); +} + +sub loop_through_mig_sql_templates { + print "Looping through mig-sql/init/ templates\n"; + opendir my $dir, $mig_sql or die "Cannot open directory: $!"; + my @files = sort readdir $dir; + closedir $dir; + foreach my $file (@files) { + if ($file =~ /.sql$/) { + print "executing $file:\n"; + system( $mig_bin . "mig-sql", ('-f',$mig_sql . $file) ) + } + } +} + diff --git a/emig.d/bin/mig-link b/emig.d/bin/mig-link new file mode 100755 index 0000000..1a8ccd7 --- /dev/null +++ b/emig.d/bin/mig-link @@ -0,0 +1,87 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig-link + +Associate the specified file with a parent table within the migration schema. + +=head1 SYNOPSIS + +B + +=cut + +############################################################################### + +use strict; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use DBI; +use Cwd 'abs_path'; +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; + +Mig::die_if_no_env_migschema(); +Mig::die_if_mig_tracking_table_does_not_exist(); + +my $file = abs_path($ARGV[0]); +if ($file =~ /^$MIGBASEWORKDIR/) { + link_table(@ARGV); +} else { + print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; +} + +exit 0; + +############################################################################### + +sub link_table { + my $file = abs_path(shift); + my $table = shift; + + if (! Mig::check_db_migschema_for_specific_table($table)) { + die "table not found in MIGSCHEMA ($MIGSCHEMA): $table\n"; + } + + my $tracked_file_id = Mig::check_for_tracked_file($file); + if ($tracked_file_id) { + my $data = Mig::status_this_file($file); + + print "linking file to parent table: $file -> $table\n"; + + my $dbh = Mig::db_connect(); + my $sth = $dbh->prepare(" + SELECT base_filename + FROM $MIGSCHEMA.tracked_file + WHERE parent_table = " . $dbh->quote($table) . " + AND base_filename <> " . $dbh->quote($file) . ";" + ); + my $rv = $sth->execute() + || die "Error checking $MIGSCHEMA.tracked_file: $!"; + my @cols = $sth->fetchrow_array; + $sth->finish; + if ($cols[0]) { # found + die "table ($table) already linked to a different file: $cols[0]\n"; + } + $rv = $dbh->do(" + UPDATE $MIGSCHEMA.tracked_file + SET parent_table = " . $dbh->quote($table) . " + WHERE base_filename = " . $dbh->quote($file) . " + ; + ") || die "Error updating table $MIGSCHEMA.tracked_file: $!\n"; + Mig::db_disconnect($dbh); + } else { + print "File not currently tracked: $file\n"; + } +} diff --git a/emig.d/bin/mig-mapper b/emig.d/bin/mig-mapper new file mode 100755 index 0000000..6841cf7 --- /dev/null +++ b/emig.d/bin/mig-mapper @@ -0,0 +1,778 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig-mapper + +Interactive session for analyzing, flagging, and mapping legacy field data to +Evergreen fields. + +Upon exit, generate either [file].clean.map.sql or _map.sql. The +SQL generated will be UPDATE's for setting the Evergreen-specific columns for a +given file's staging tables, and TRUNCATE's and INSERT's for auxilary tables. +The files will have \include hooks for pulling in additional mapping files +(for example, end-user mappings for circ modifiers, etc.) + +=head1 SYNOPSIS + +B + +=cut + +############################################################################### + +use strict; +use Term::ReadLine; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use DBI; +use Cwd 'abs_path'; +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; + +Mig::die_if_no_env_migschema(); +Mig::die_if_mig_tracking_table_does_not_exist(); + +my $column_filter = 1; # show all fields +my $file = abs_path($ARGV[0]); +my $fdata; +my $tracked_file_id = Mig::check_for_tracked_file($file); +if ($tracked_file_id) { + $fdata = Mig::status_this_file($file); +} else { + die "File not currently tracked: $file\n"; +} + +my $table = $fdata->{staged_table}; +if (!$table) { + die "No staged staged table for file: $file\n"; +} + +my $loop = 1; +my $term = Term::ReadLine->new('mapper'); +my $prompt; +my $OUT = $term->OUT || \*STDOUT; +my @dtd_identifiers; + +table_menu(); +$prompt = "$fdata->{staged_table}: "; +while ( $loop && defined (my $cmd = $term->readline($prompt)) ) { +top: + $cmd =~ s/^\s+//; + $cmd =~ s/\s+$//; + $term->addhistory($cmd) if $cmd =~ /\S/; + if ($cmd =~ /^\d+$/) { + my $ret = column_menu($cmd); + if ($ret) { + $cmd = $ret; + goto top; + } + } else { + switch($cmd) { + case /^(ls|\?|\.|;)$/ { + table_menu(); + } + case '' { + table_menu(); + } + case 'l' { + list_ten(); + } + case 'f1' { + $column_filter = 1; + table_menu(); + } + case 'f2' { + $column_filter = 2; + table_menu(); + } + case 'f3' { + $column_filter = 3; + table_menu(); + } + } + } + $loop = 0 if $cmd =~ /^q/io; +} + +exit 0; + +############################################################################### + +sub table_menu { + print "\n=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n"; + print "$table"; + print "\n=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n"; + print "\n"; + print " l) SELECT * FROM $fdata->{staged_table} LIMIT 10;\n"; + print "f1) show all fields (default)\n"; + print "f2) show legacy fields\n"; + print "f3) show EG fields\n"; + print " q) quit\n\n"; + printf "%-36s", "Columns (* for required)"; + printf "%-30s", "Target"; + printf "%-30s", "Transform"; + printf "%-30s", "First Row"; + printf "%-30s", "Migration Note"; + print "\n"; + printf "%-36s", "-------"; + printf "%-30s", "------"; + printf "%-30s", "---------"; + printf "%-30s", "---------"; + printf "%-30s", "--------------"; + print "\n"; + my $dbh = Mig::db_connect(); + my $sth = $dbh->prepare(" + SELECT * + FROM information_schema.columns + WHERE table_schema = " . $dbh->quote($MIGSCHEMA) . " + AND table_name = " . $dbh->quote($table) . " + ORDER BY dtd_identifier::INTEGER ASC; + "); + my $rv = $sth->execute() + || die "Error retrieving data from information_schema: $!"; + my $sth2 = $dbh->prepare(" + SELECT * + FROM $MIGSCHEMA.$table + LIMIT 1; + "); + my $rv2 = $sth2->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + my $row = $sth2->fetchrow_hashref; + + open LESS, "|less -F"; + @dtd_identifiers = (); + while (my $data = $sth->fetchrow_hashref) { + my $column = $data->{column_name}; + if ($column_filter == 2 && !($column =~ /^[xl]_/)) { + next; + } + if ($column_filter == 3 && ($column =~ /^[xl]_/)) { + next; + } + my $cdata = status_this_column($column); + printf LESS $cdata->{required} ? '*' : ' '; + printf LESS "%3s) ", $data->{dtd_identifier}; + push @dtd_identifiers, $data->{dtd_identifier}; + printf LESS "%-30s", $column; + printf LESS "%-30s", defined $cdata->{target_table} + ? ( $cdata->{target_table} ne $table ? $cdata->{target_table} . '.' : '') . $cdata->{target_column} + : ''; + printf LESS "%-30s", defined $cdata->{transform} ? $cdata->{transform} : ''; + printf LESS "%-30s", defined $$row{$column} ? $$row{$column} : ''; + printf LESS "%-30s", defined $cdata->{comment} ? $cdata->{comment} : ''; + print LESS "\n"; + } + close LESS; + print "\n"; + $sth->finish; + $sth2->finish; + Mig::db_disconnect($dbh); +} + +sub column_menu { + my $dtd_identifier = shift; + my $dbh = Mig::db_connect(); + my $sth = $dbh->prepare(" + SELECT * + FROM information_schema.columns + WHERE table_schema = " . $dbh->quote($MIGSCHEMA) . " + AND table_name = " . $dbh->quote($table) . " + AND dtd_identifier = " . $dbh->quote($dtd_identifier) . "; + "); + my $rv = $sth->execute() + || die "Error retrieving data from information_schema: $!"; + my $data = $sth->fetchrow_hashref; + $sth->finish; + Mig::db_disconnect($dbh); + + my $column = $data->{column_name}; + + my $prompt = "$table.$column: "; + + sub print_menu { + my $column = shift; + my $cdata = status_this_column($column); + print "\n=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n"; + print "$column"; + print "\n-------------------------------------------------------------------------------------------------\n"; + print " target: " . ( + defined $cdata->{target_table} + ? ( $cdata->{target_table} ne $table ? $cdata->{target_table} . '.' : '') . $cdata->{target_column} + : '' + ) . "\n"; + print "transform: " . (defined $cdata->{transform} ? $cdata->{transform} : '') . "\n"; + print " comment: " . (defined $cdata->{comment} ? $cdata->{comment} : '') . "\n"; + print "\n=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n"; + print "\n"; + print " l) SELECT $column FROM $fdata->{staged_table} LIMIT 10;\n"; + print " s) summarize\n" if $column ne 'x_migrate'; + print " g) group browse\n"; + print "g2) group browse (order by count desc)\n"; + print " c) comment\n"; + print " f) flag for end-user mapping\n"; + print " t) target\n"; + print " e) eval/transform\n"; + print " n) next column\n"; + print " p) prev column\n"; + print " q) quit back to table menu\n"; + print "\n"; + } + print_menu($column); + + my $loop = 1; + while ( $loop && defined (my $cmd = $term->readline($prompt)) ) { + $cmd =~ s/^\s+//; + $cmd =~ s/\s+$//; + $term->addhistory($cmd) if $cmd =~ /\S/; + $loop = 0 if $cmd =~ /^q/io; + switch($cmd) { + case /^(ls|\?|\.|;)$/ { + print_menu($column); + } + case '' { + print_menu($column); + } + case 'l' { + list_ten($column); + } + case 's' { + summarize($column); + } + case 'g' { + group_browse($column); + } + case 'g2' { + group_browse($column,'GROUP BY 1 ORDER BY 2 DESC'); + } + case /^c/io { + if ($cmd =~ /^c\s+(.+)$/) { + set_comment($column,$1); + } + } + case /^t/io { + if ($cmd =~ /^t\s+(.+)$/) { + set_target($column,$1); + } + } + case /^e/io { + if ($cmd =~ /^e\s+(.+)$/) { + set_transform($column,$1); + } + } + case 'n' { + my( $index )= grep { $dtd_identifiers[$_] eq $dtd_identifier } 0..$#dtd_identifiers; + return $dtd_identifiers[$index + 1]; + } + case 'p' { + my( $index )= grep { $dtd_identifiers[$_] eq $dtd_identifier } 0..$#dtd_identifiers; + return $dtd_identifiers[$index - 1]; + } + } + } +} + +sub list_ten { + my $column = shift; + + my $dbh = Mig::db_connect(); + my $sth; + my $rv; + my @cols; + + $sth = $dbh->prepare(Mig::sql(" + SELECT " . (defined $column ? $column : '*') . " + FROM $MIGSCHEMA.$table + LIMIT 10; + ")); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + print "\n"; + while (@cols = $sth->fetchrow_array) { + print "\t" . join(',',map {defined $_ ? $_ : ''} @cols) . "\n"; + } + print "\n"; + $sth->finish; +} + +sub summarize { + my $column = shift; + + my $count; + my $non_empty_count; + my $distinct_value_count; + my $distinct_integer_value_count; + my $distinct_money6_value_count; + my $distinct_money8_value_count; + my $distinct_date_value_count; + my $distinct_timestamptz_value_count; + + my $min_value; + my $min_length; + my $min_length_min_value; + my $max_value; + my $max_length; + my $max_length_max_value; + + my $min_value_as_integer; + my $max_value_as_integer; + + my $min_value_as_money6; + my $max_value_as_money6; + + my $min_value_as_money8; + my $max_value_as_money8; + + my $min_value_as_date; + my $max_value_as_date; + + my $min_value_as_timestamptz; + my $max_value_as_timestamptz; + + my $dbh = Mig::db_connect(); + my $sth; + my $rv; + my @cols; + + ### count + $sth = $dbh->prepare(" + SELECT COUNT(*) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $count = $cols[0]; + + ### non_empty_count + $sth = $dbh->prepare(" + SELECT COUNT(*) + FROM $MIGSCHEMA.$table + WHERE $column IS NOT NULL AND BTRIM($column) <> ''; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $non_empty_count = $cols[0]; + + ### distinct_value_count + $sth = $dbh->prepare(" + SELECT COUNT(DISTINCT $column) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $distinct_value_count = $cols[0]; + + ### distinct_integer_value_count + $sth = $dbh->prepare(" + SELECT COUNT(DISTINCT migration_tools.attempt_cast($column,'INTEGER')::INTEGER) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $distinct_integer_value_count = $cols[0]; + + ### distinct_money6_value_count + $sth = $dbh->prepare(" + SELECT COUNT(DISTINCT NULLIF(migration_tools.attempt_money6($column,'-0.01'),-0.01)) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $distinct_money6_value_count = $cols[0]; + + ### distinct_money8_value_count + $sth = $dbh->prepare(" + SELECT COUNT(DISTINCT NULLIF(migration_tools.attempt_money($column,'-0.01'),-0.01)) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $distinct_money8_value_count = $cols[0]; + + ### distinct_date_value_count + $sth = $dbh->prepare(" + SELECT COUNT(DISTINCT NULLIF(migration_tools.attempt_date($column,'1969-06-09'),'1969-06-09'::DATE)) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $distinct_date_value_count = $cols[0]; + + ### distinct_timestamptz_value_count + $sth = $dbh->prepare(" + SELECT COUNT(DISTINCT NULLIF(migration_tools.attempt_timestamptz($column,'1969-06-09'),'1969-06-09'::TIMESTAMPTZ)) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $distinct_timestamptz_value_count = $cols[0]; + + ### min_value + $sth = $dbh->prepare(" + SELECT MIN($column) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $min_value = $cols[0]; + + ### min_length + $sth = $dbh->prepare(" + SELECT MIN(LENGTH($column)) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $min_length = $cols[0]; + + ### min_length_min_value + $sth = $dbh->prepare(" + SELECT MIN($column) + FROM $MIGSCHEMA.$table + WHERE LENGTH($column) = $min_length; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $min_length_min_value = $cols[0]; + + ### min_value_as_integer + $sth = $dbh->prepare(" + SELECT MIN(migration_tools.attempt_cast($column,'INTEGER')::INTEGER) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $min_value_as_integer = $cols[0]; + + ### min_value_as_money6 + $sth = $dbh->prepare(" + SELECT MIN(NULLIF(migration_tools.attempt_money6($column,'-0.01'),-0.01)) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $min_value_as_money6 = $cols[0]; + + ### min_value_as_money8 + $sth = $dbh->prepare(" + SELECT MIN(NULLIF(migration_tools.attempt_money($column,'-0.01'),-0.01)) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $min_value_as_money8 = $cols[0]; + + ### min_value_as_date + $sth = $dbh->prepare(" + SELECT MIN(NULLIF(migration_tools.attempt_date($column,'1969-06-09'),'1969-06-09'::DATE)) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $min_value_as_date = $cols[0]; + + ### min_value_as_timestamptz + $sth = $dbh->prepare(" + SELECT MIN(NULLIF(migration_tools.attempt_timestamptz($column,'1969-06-09'),'1969-06-09'::TIMESTAMPTZ)) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $min_value_as_timestamptz = $cols[0]; + + ### max_value + $sth = $dbh->prepare(" + SELECT MAX($column) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $max_value = $cols[0]; + + ### max_length + $sth = $dbh->prepare(" + SELECT MAX(LENGTH($column)) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $max_length = $cols[0]; + + ### max_length_max_value + $sth = $dbh->prepare(" + SELECT MAX($column) + FROM $MIGSCHEMA.$table + WHERE LENGTH($column) = $max_length; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $max_length_max_value = $cols[0]; + + ### max_value_as_integer + $sth = $dbh->prepare(" + SELECT MAX(migration_tools.attempt_cast($column,'INTEGER')::INTEGER) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $max_value_as_integer = $cols[0]; + + ### max_value_as_money6 + $sth = $dbh->prepare(" + SELECT MAX(NULLIF(migration_tools.attempt_money6($column,'-0.01'),-0.01)) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $max_value_as_money6 = $cols[0]; + + ### max_value_as_money8 + $sth = $dbh->prepare(" + SELECT MAX(NULLIF(migration_tools.attempt_money($column,'-0.01'),-0.01)) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $max_value_as_money8 = $cols[0]; + + ### max_value_as_date + $sth = $dbh->prepare(" + SELECT MAX(NULLIF(migration_tools.attempt_date($column,'1969-06-09'),'1969-06-09'::DATE)) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $max_value_as_date = $cols[0]; + + ### max_value_as_timestamptz + $sth = $dbh->prepare(" + SELECT MAX(NULLIF(migration_tools.attempt_timestamptz($column,'1969-06-09'),'1969-06-09'::TIMESTAMPTZ)) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + $max_value_as_timestamptz = $cols[0]; + + Mig::db_disconnect($dbh); + + print "\n"; + print "# of rows = $count\n"; + print "# of non-empty rows = $non_empty_count\n"; + print "# of distinct values (as text) = $distinct_value_count\n"; + print "# of distinct values (as integer) = $distinct_integer_value_count\n"; + print "# of distinct values (as money6) = $distinct_money6_value_count\n"; + print "# of distinct values (as money8) = $distinct_money8_value_count\n"; + print "# of distinct values (as date) = $distinct_date_value_count\n"; + print "# of distinct values (as timestamptz) = $distinct_timestamptz_value_count\n"; + print "\n"; + print "minimum value (as text) = $min_value\n"; + print "maximum value (as text) = $max_value\n"; + print "\n"; + print "minimum value length (as text) = $min_length (min value: $min_length_min_value)\n"; + print "maximum value length (as text) = $max_length (max value: $max_length_max_value)\n"; + print "\n"; + print "minimum value (as integer) = " . ($min_value_as_integer ? $min_value_as_integer : '') . "\n"; + print "maximum value (as integer) = " . ($max_value_as_integer ? $max_value_as_integer : '') . "\n"; + print "\n"; + print "minimum value (as money6) = " . ($min_value_as_money6 ? $min_value_as_money6 : '') . "\n"; + print "maximum value (as money6) = " . ($max_value_as_money6 ? $max_value_as_money6 : '') . "\n"; + print "\n"; + print "minimum value (as money8) = " . ($min_value_as_money8 ? $min_value_as_money8 : '') . "\n"; + print "maximum value (as money8) = " . ($max_value_as_money8 ? $max_value_as_money8 : '') . "\n"; + print "\n"; + print "minimum value (as date) = " . ($min_value_as_date ? $min_value_as_date : '') . "\n"; + print "maximum value (as date) = " . ($max_value_as_date ? $max_value_as_date : '') . "\n"; + print "\n"; + print "minimum value (as timestamptz) = " . ($min_value_as_timestamptz ? $min_value_as_timestamptz : '') . "\n"; + print "maximum value (as timestamptz) = " . ($max_value_as_timestamptz ? $max_value_as_timestamptz : '') . "\n"; + print "\n"; +} + +sub group_browse { + my ($column,$option) = (shift,shift||"GROUP BY 1 ORDER BY 1"); + + my $dbh = Mig::db_connect(); + my $sth; + my $rv; + + $sth = $dbh->prepare(Mig::sql(" + SELECT $column, COUNT(*) + FROM $MIGSCHEMA.$table + $option; + ")); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + + print "\n"; + open LESS, "|less -F"; + printf LESS "%-30s", "Value:"; + print LESS "Count:\n\n"; + while (my @cols = $sth->fetchrow_array) { + my $value = $cols[0]; + my $count = $cols[1]; + printf LESS "%-30s", defined $value ? $value : ''; + print LESS "$count\n"; + } + close LESS; + print "\n"; + $sth->finish; +} + +############################################################################### + +sub add_this_column { + my $column = shift; + if (!Mig::check_for_tracked_column($table,$column)) { + my $dbh = Mig::db_connect(); + my $rv = $dbh->do(" + INSERT INTO $MIGSCHEMA.tracked_column ( + base_filename + ,parent_table + ,staged_table + ,staged_column + ) VALUES ( + " . $dbh->quote($file) . " + ," . $dbh->quote($fdata->{parent_table}) . " + ," . $dbh->quote($table) . " + ," . $dbh->quote($column) . " + ); + ") || die "Error inserting into table $MIGSCHEMA.tracked_column: $!\n"; + Mig::db_disconnect($dbh); + } +} + +sub status_this_column { + my $column = shift; + my $data = Mig::status_this_column($table,$column); + if (!$data) { + add_this_column($column); + $data = Mig::status_this_column($table,$column); + } + if ($$data{parent_table}) { + my $dbh = Mig::db_connect(); + my $sth = $dbh->prepare(" + SELECT * + FROM $MIGSCHEMA.fields_requiring_mapping + WHERE table_name = " . $dbh->quote( $$data{parent_table} ) . " + AND column_name = " . $dbh->quote( $column ) . ";" + ); + my $rv = $sth->execute() + || die "Error checking table (tracked_column) for $table.$column: $!"; + my $data2 = $sth->fetchrow_hashref; + if ($data2) { + $$data{required} = 1; + } else { + $$data{required} = 0; + } + $sth->finish; + Mig::db_disconnect($dbh); + } + return $data; +} + +sub set_comment { + my ($column,$comment) = (shift,shift); + if ($comment) { + my $data = status_this_column($column); + my $dbh = Mig::db_connect(); + my $rv = $dbh->do(" + UPDATE $MIGSCHEMA.tracked_column + SET comment = " . $dbh->quote($comment) . " + WHERE id = " . $dbh->quote($data->{id}) . "; + ") || die "Error updating table $MIGSCHEMA.tracked_column: $!\n"; + Mig::db_disconnect($dbh); + } +} + +sub set_transform { + my ($column,$transform) = (shift,shift); + if ($transform) { + my $data = status_this_column($column); + my $dbh = Mig::db_connect(); + my $rv = $dbh->do(" + UPDATE $MIGSCHEMA.tracked_column + SET transform = " . $dbh->quote($transform) . " + WHERE id = " . $dbh->quote($data->{id}) . "; + ") || die "Error updating table $MIGSCHEMA.tracked_column: $!\n"; + Mig::db_disconnect($dbh); + } +} + +sub set_target { + my ($column,$target) = (shift,shift); + my $target_table = $table; + my $target_column = $target; + if ($target) { + if ($target =~ /^(.+)\.(.+)$/) { + $target_table = $1; + $target_column = $2; + } + my $data = status_this_column($column); + my $dbh = Mig::db_connect(); + my $rv = $dbh->do(" + UPDATE $MIGSCHEMA.tracked_column + SET target_table = " . $dbh->quote($target_table) . " + ,target_column = " . $dbh->quote($target_column) . " + WHERE id = " . $dbh->quote($data->{id}) . "; + ") || die "Error updating table $MIGSCHEMA.tracked_column: $!\n"; + Mig::db_disconnect($dbh); + } +} diff --git a/emig.d/bin/mig-quick b/emig.d/bin/mig-quick new file mode 100755 index 0000000..59b0843 --- /dev/null +++ b/emig.d/bin/mig-quick @@ -0,0 +1,66 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig-quick + +A wrapper for running the following mig commands on the specified files: + +=over 15 + +mig add +mig skip-iconv +mig clean +mig convert +mig stage + +=back + +Arguments take the form of --cmd--argument or --cmd--argument=value. + +This form is NOT supported: --cmd--argument value + +cmd must be substituted with either add, skip-iconv, clean, convert, or stage, +and determines which mig command to apply the argument toward. + +=head1 SYNOPSIS + +B [arguments...] [ ...] + +=cut + +############################################################################### + +use strict; +use Pod::Usage; +use Cwd 'abs_path'; +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +my @files = grep {!/^--/} @ARGV; +my %pass_thru = ('add'=>[],'skip-iconv'=>[],'clean'=>[],'convert'=>[],'stage'=>[]); +foreach my $a (@ARGV) { + if ($a =~ /^--([a-z]+)-(.*)$/) { + $pass_thru{$1} = [] if ! defined $pass_thru{$1}; + unshift @{ $pass_thru{$1} }, "--$2"; + } +} + +foreach my $file (@files) { + foreach my $cmd (('add','skip-iconv','clean','convert','stage')) { + print "mig $cmd $file " . (join ' ', @{ $pass_thru{$cmd} }) . "\n"; + my @MYARGV = ( + 'mig' + ,$cmd + ,$file + ); + system(@MYARGV,@{ $pass_thru{$cmd} }); + } +} + +exit 0; + diff --git a/emig.d/bin/mig-quicksheet b/emig.d/bin/mig-quicksheet new file mode 100755 index 0000000..22ed33e --- /dev/null +++ b/emig.d/bin/mig-quicksheet @@ -0,0 +1,594 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig-quicksheet + +By default: + +Quickly produces an Excel spreadsheet based on the tracked file suitable +for simple end-user mapping. The new file is named after the tracked file, but +ends in .mapping.xls + +Multiple files may be specified, in which case all of the results are +concatenated into one spreadsheet named .mapping.xls + +If using --outfile: + +This specifies the exact name to use for the Excel file. If not specified, and +there is also no --outtable, then the naming convention will be as specified +above. + +If using --outtable: + +This specifies a summary table and prefix to use within the migration schema for +recording the output either in addition to or instead of the Excel file. Unless +--force is specified, it will not overwrite existing tables. + +If using --drop with --outable: + +This will delete the summary table specified and all related sub-tables. + +=head1 SYNOPSIS + +B [--force|--drop|--outfile |--outtable ] [...] + +=cut + +############################################################################### + +use strict; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Try::Tiny; +use Pod::Usage; +use Getopt::Long; +use DBI; +use Spreadsheet::WriteExcel; +use Cwd 'abs_path'; +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +my $outtable = ''; +my $outfile = ''; +my $force; +my $drop; +my $help; + +GetOptions( + 'outtable=s' => \$outtable, + 'outfile=s' => \$outfile, + 'force' => \$force, + 'drop' => \$drop, + 'help|?' => \$help +); +pod2usage(-verbose => 2) if $help || ! $ARGV[0]; + +if (! $outtable && ! $outfile) { + if (scalar(@ARGV) > 1) { + $outfile = $MIGSCHEMA . '.mapping.xls'; + } else { + $outfile = abs_path($ARGV[0]) . '.mapping.xls'; + } +} + +Mig::die_if_no_env_migschema(); +Mig::die_if_mig_tracking_table_does_not_exist(); + +my $workbook; +my @worksheets = (); +my $first_sheet; +my $first_table; +my $toc; +my $sheet_row_offset = 0; +my $sheet_row_start = 4; +my $table; +my $file; +my $fdata; +my $has_x_source = 0; +my $bold; +my $left; +my $counter = 0; + +if (!$drop) { + init_workbook(); + foreach my $f (@ARGV) { + $file = abs_path($f); + $counter++; + if ($toc) { + $toc->write($counter,0,$counter); + $toc->write($counter,1,$f); + } + handle_file(); + write_worksheets(); + } + close_workbook(); +} else { + if (Mig::check_db_migschema_for_specific_table($outtable)) { + drop_existing_outtable(); + } +} + +sub handle_file { + my $tracked_file_id = Mig::check_for_tracked_file($file); + if ($tracked_file_id) { + $fdata = Mig::status_this_file($file); + } else { + die "File not currently tracked: $file\n"; + } + $table = $fdata->{staged_table}; + if (!$table) { + die "No staged staged table for file: $file\n"; + } +} + +sub init_workbook { + if ($outfile) { + print "Writing $outfile\n"; + $workbook = Spreadsheet::WriteExcel->new( $outfile ); + $bold = $workbook->add_format(); + $bold->set_bold(); + $bold->set_align('left'); + $left = $workbook->add_format(); + $left->set_align('left'); + if (scalar(@ARGV) > 1) { + $toc = $workbook->add_worksheet('Files'); + } + } + if ($outtable) { + if (Mig::check_db_migschema_for_specific_table($outtable)) { + if ($force) { + drop_existing_outtable(); + } else { + die "$outtable already exists. Use --force to wipe and redo tables.\n"; + } + } + create_new_outtable(); + } +} + +sub drop_existing_outtable { + + # we want a transaction for this one + my $dbh = Mig::db_connect(); + $dbh->{AutoCommit} = 0; + $dbh->{RaiseError} = 1; + + try { + # gather subordinate tables + + my @tables = (); + my $sth = $dbh->prepare(" + SELECT summary_table + FROM $MIGSCHEMA.$outtable + ORDER BY 1;" + ); + my $rv = $sth->execute(); + my $rows = $sth->fetchall_arrayref; + for my $row ( @$rows ) { + push @tables, $row->[0] + } + + # drop them + + foreach my $table (@tables) { + print "Dropping $MIGSCHEMA.$table\n"; + $dbh->do("DROP TABLE $MIGSCHEMA.\"$table\";"); + } + + # drop master table + + print "Dropping $MIGSCHEMA.$outtable\n"; + $dbh->do("DROP TABLE $MIGSCHEMA.$outtable;"); + + $dbh->commit; + } catch { + warn "Transaction aborted because $_\n"; + eval { $dbh->rollback }; + die "Aborting mig-quicksheet\n"; + }; + + Mig::db_disconnect($dbh); +} + +sub create_new_outtable { + my $dbh = Mig::db_connect(); + print "Creating table $MIGSCHEMA.$outtable\n"; + my $rv = $dbh->do(" + CREATE UNLOGGED TABLE $MIGSCHEMA.$outtable ( + file TEXT, + summary_table TEXT UNIQUE + ); + ") || die "Error creating outtable ($MIGSCHEMA.$outtable): $!\n"; + Mig::db_disconnect($dbh); +} + +sub create_new_subtable { + my $subtable = shift; + my $dbh = Mig::db_connect(); + $dbh->{AutoCommit} = 0; + $dbh->{RaiseError} = 1; + + try { + print "Creating table $MIGSCHEMA.\"$subtable\"\n"; + my $rv = $dbh->do(" + CREATE UNLOGGED TABLE $MIGSCHEMA.\"$subtable\" (); + ") || die "Error creating subtable ($MIGSCHEMA.\"$subtable\"): $!\n"; + $rv = $dbh->do(" + INSERT INTO $MIGSCHEMA.$outtable (file,summary_table) VALUES (" . $dbh->quote($file) . ',' . $dbh->quote($subtable) . "); + ") || die "Error inserting into outtable ($MIGSCHEMA.$outtable): $!\n"; + $dbh->commit; + } catch { + warn "Transaction aborted because $_\n"; + eval { $dbh->rollback }; + die "Aborting mig-quicksheet\n"; + }; + + Mig::db_disconnect($dbh); +} + +sub write_worksheets { + print 'File #' . $counter . "\n"; + print "Sheet: Field Summary\n"; + my $tab_name = (scalar(@ARGV) > 1 ? $counter . ') ' : '') . 'Field Summary'; + $tab_name = substr($tab_name,0,31); # truncate for WriteExcel + if ($outfile) { + $first_sheet = $workbook->add_worksheet( $tab_name ); + $first_sheet->set_column(0,6,30); + } + if ($outtable) { + $first_table = "$outtable $tab_name"; + create_new_subtable( $first_table ); + } + + my $dbh = Mig::db_connect(); + my $sth = $dbh->prepare(" + SELECT COUNT(*) + FROM $MIGSCHEMA.$table + LIMIT 1; + "); + my $rv = $sth->execute() + || die "Error retrieving data from information_schema: $!"; + + my @cols = $sth->fetchrow_array; + $sth->finish; + my $count = $cols[0]; + + $sheet_row_start = 0; + + if ($outfile) { + $first_sheet->write($sheet_row_start,0,'Legacy Column',$bold); + $first_sheet->write($sheet_row_start,1,'Non-Empty Rows',$bold); + $first_sheet->write($sheet_row_start,2,'Distinct Non-NULL Values',$bold); + $first_sheet->write($sheet_row_start,3,'Min Value',$bold); + $first_sheet->write($sheet_row_start,4,'Min Length',$bold); + $first_sheet->write($sheet_row_start,5,'Max Value',$bold); + $first_sheet->write($sheet_row_start,6,'Max Length',$bold); + } + if ($outtable) { + try { + $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Legacy Column" TEXT;'); + $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Non-Empty Rows" TEXT;'); + $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Distinct Non-NULL Values" TEXT;'); + $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Min Value" TEXT;'); + $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Min Length" TEXT;'); + $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Max Value" TEXT;'); + $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Max Length" TEXT;'); + } catch { + die "Error modifying subtable ($MIGSCHEMA.$first_table): $_\n"; + }; + } + + handle_list(); + handle_columns(); + + if ($outfile) { + $first_sheet->write($count + 3,0,'Source File:',$bold); + $first_sheet->write($count + 3,1,$file,$left); + $first_sheet->write($count + 4,0,'Number of Rows:',$bold); + $first_sheet->write($count + 4,1,$count,$left); + } + if ($outtable) { + try { + $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ . ' ("Legacy Column") VALUES (NULL);'); + $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ . + ' ("Legacy Column","Non-Empty Rows") ' . "VALUES ('Source File:'," . $dbh->quote($file) . ");"); + $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ . + ' ("Legacy Column","Non-Empty Rows") ' . "VALUES ('Number of Rows:',$count);"); + } catch { + die "Error inserting into subtable ($MIGSCHEMA.$first_table): $_\n"; + }; + } + + Mig::db_disconnect($dbh); +} + +sub close_workbook { + if ($outfile) { + $workbook->close(); + } +} + +exit 0; + +############################################################################### + +sub handle_list { + my $dbh = Mig::db_connect(); + my $sth = $dbh->prepare(" + SELECT * + FROM " . $MIGSCHEMA. "." . $table . " + LIMIT 65530; + "); + my $rv = $sth->execute() + || die "Error retrieving data from staging table: $!"; + my $list_sheet; + + $sheet_row_offset = 0; + $has_x_source = 0; + if ($outfile) { + print "Sheet: $table\n"; + $list_sheet = $workbook->add_worksheet( $table ); + } + + my $handle_headers = 1; + + while (my $data = $sth->fetchrow_hashref) { + if ($handle_headers) { + my $_idx = 0; + foreach my $col (sort keys %{ $data }) { + $list_sheet->write($sheet_row_start + $sheet_row_offset,$_idx++,$col,$bold); + } + $handle_headers = 0; + } + $sheet_row_offset++; + my $idx = 0; + foreach my $col (sort keys %{ $data }) { + my $cdata = $$data{$col}; + if (!defined $cdata) { $cdata = '\N'; } + if ($outfile) { + $list_sheet->write($sheet_row_start + $sheet_row_offset,$idx++,$cdata,$left); + } + } + } +} + +sub handle_columns { + my $dbh = Mig::db_connect(); + my $sth = $dbh->prepare(" + SELECT * + FROM information_schema.columns + WHERE table_schema = " . $dbh->quote($MIGSCHEMA) . " + AND table_name = " . $dbh->quote($table) . " + ORDER BY dtd_identifier::INTEGER ASC; + "); + my $rv = $sth->execute() + || die "Error retrieving data from information_schema: $!"; + + $sheet_row_offset = 0; + $has_x_source = 0; + + while (my $data = $sth->fetchrow_hashref) { + my $column = $data->{column_name}; + if ($column eq 'x_source') { + $has_x_source = 1; + } + if ($column =~ /^l_/ + || ($column =~ /^x_/ + && ( $column ne 'x_migrate' + && $column ne 'x_source' + && $column ne 'x_egid' + && $column ne 'x_hseq' + ) + ) + ) { + $sheet_row_offset++; + my $cdata = column_summary($column); + if ($outfile) { + $first_sheet->write($sheet_row_start + $sheet_row_offset,0,$column,$left); + $first_sheet->write($sheet_row_start + $sheet_row_offset,1,$cdata->{non_empty_count},$left); + $first_sheet->write($sheet_row_start + $sheet_row_offset,2,$cdata->{distinct_value_count},$left); + $first_sheet->write($sheet_row_start + $sheet_row_offset,3,$cdata->{min_value},$left); + $first_sheet->write($sheet_row_start + $sheet_row_offset,4,$cdata->{min_length},$left); + $first_sheet->write($sheet_row_start + $sheet_row_offset,5,$cdata->{max_value},$left); + $first_sheet->write($sheet_row_start + $sheet_row_offset,6,$cdata->{max_length},$left); + } + if ($outtable) { + $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$first_table" VALUES (^ . join(',' + ,$cdata->{non_empty_count} + ,$cdata->{distinct_value_count} + ,$dbh->quote($cdata->{min_value}) + ,$cdata->{min_length} + ,$dbh->quote($cdata->{max_value}) + ,$cdata->{max_length} + ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$first_table\": $!"; + } + if ($cdata->{distinct_value_count} > 1 && $cdata->{distinct_value_count} <= 500) { + group_by($column); + } + } + } + $sth->finish; + Mig::db_disconnect($dbh); +} + +sub column_summary { + + my $column = shift; + + my $dbh = Mig::db_connect(); + + ### non_empty_count + my $sth = $dbh->prepare(" + SELECT COUNT(*) + FROM $MIGSCHEMA.$table + WHERE $column IS NOT NULL AND BTRIM($column) <> ''; + "); + my $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + my @cols = $sth->fetchrow_array; + $sth->finish; + my $non_empty_count = $cols[0]; + + ### distinct_value_count + $sth = $dbh->prepare(" + SELECT COUNT(DISTINCT $column) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + my $distinct_value_count = $cols[0]; + + ### min_value + $sth = $dbh->prepare(" + SELECT MIN($column) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + my $min_value = $cols[0]; + + ### min_length + $sth = $dbh->prepare(" + SELECT MIN(LENGTH($column)) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + my $min_length = $cols[0]; + + ### max_value + $sth = $dbh->prepare(" + SELECT MAX($column) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + my $max_value = $cols[0]; + + ### max_length + $sth = $dbh->prepare(" + SELECT MAX(LENGTH($column)) + FROM $MIGSCHEMA.$table; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + @cols = $sth->fetchrow_array; + $sth->finish; + my $max_length = $cols[0]; + + return { + non_empty_count => $non_empty_count + ,distinct_value_count => $distinct_value_count + ,min_value => defined $min_value ? $min_value : '' + ,min_length => defined $min_length ? $min_length : '' + ,max_value => defined $max_value ? $max_value : '' + ,max_length => defined $max_length ? $max_length : '' + }; +} + +sub group_by { + my ($column,$option) = (shift,"GROUP BY 2 ORDER BY 2"); + + my $dbh = Mig::db_connect(); + my $sth; + my $rv; + + my $col_sheet_row_start = 0; + my $col_sheet_row_offset = 0; + my $col_sheet; + my $col_table; + + my $sheet_name = (scalar(@ARGV) > 1 ? $counter . ') ' : '') . $column; + $sheet_name = substr($sheet_name,0,31); + + print "Sheet: $sheet_name\n"; + if ($has_x_source) { + $option = "GROUP BY 2,3 ORDER BY 2,3"; + } + + if ($outfile) { + $col_sheet = $workbook->add_worksheet( $sheet_name ); + push @worksheets, $col_sheet; + $col_sheet->set_column(0,6,30); + $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,'Count',$bold); + if ($has_x_source) { + $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,'Source',$bold); + } + $col_sheet->write( + $col_sheet_row_start + $col_sheet_row_offset + ,$has_x_source ? 2 : 1 + ,"Legacy Value for $column" + ,$bold + ); + } + + if ($outtable) { + $col_table = "$outtable $sheet_name"; + create_new_subtable( $col_table ); + $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "Count" TEXT;') + || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^; + if ($has_x_source) { + $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "Source" TEXT;') + || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^; + } + $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "' . $dbh->quote("Legacy value for $column") . '" TEXT;') + || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^; + } + + $sth = $dbh->prepare(" + SELECT COUNT(*), " . ($has_x_source ? 'x_source, ' : '') . "$column + FROM $MIGSCHEMA.$table + $option; + "); + $rv = $sth->execute() + || die "Error retrieving data from $MIGSCHEMA.$table: $!"; + + while (my @cols = $sth->fetchrow_array) { + $col_sheet_row_offset++; + my $count = $cols[0]; + $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,$count,$left) if $outfile; + my $value; + my $source; + if ($has_x_source) { + $source = defined $cols[1] ? $cols[1] : ''; + $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$source,$left) if $outfile; + $value = defined $cols[2] ? $cols[2] : ''; + $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,2,$value,$left) if $outfile; + } else { + $value = defined $cols[1] ? $cols[1] : ''; + $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$value,$left) if $outfile; + } + if ($outtable) { + if ($has_x_source) { + $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$col_table" VALUES (^ . join(',' + ,$count + ,$dbh->quote($source) + ,$dbh->quote($value) + ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$col_table\": $!"; + } else { + $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$col_table" VALUES (^ . join(',' + ,$count + ,$dbh->quote($value) + ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$col_table\": $!"; + } + } + } + $sth->finish; +} + diff --git a/emig.d/bin/mig-remove b/emig.d/bin/mig-remove new file mode 100755 index 0000000..cf70eda --- /dev/null +++ b/emig.d/bin/mig-remove @@ -0,0 +1,67 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig-remove - This will remove the specified files from the mig tracking table +for the schema pointed to by the MIGSCHEMA environment variable in the +PostgreSQL database specified by various PG environment variables. + +You'll need to invoke B prior to using commands like B + +=head1 SYNOPSIS + +B [file] [...] + +=cut + +############################################################################### + +use strict; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use DBI; +use Cwd 'abs_path'; +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; + +Mig::die_if_no_env_migschema(); +Mig::die_if_mig_tracking_table_does_not_exist(); + +foreach my $arg (@ARGV) { + my $file = abs_path($arg); + if ($file =~ /^$MIGBASEWORKDIR/) { + remove_this_file($file); + } else { + print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; + } +} + +exit 0; + +############################################################################### + +sub remove_this_file { + my $file = shift; + my $tracked_file_id = Mig::check_for_tracked_file($file,{'allow_missing'=>1}); + if ($tracked_file_id) { + print "removing tracked file: $file\n"; + my $dbh = Mig::db_connect(); + my $rv = $dbh->do(" + DELETE FROM $MIGSCHEMA.tracked_file WHERE id = $tracked_file_id; + ") || die "Error deleting from table $MIGSCHEMA.tracked_file (id = $tracked_file_id): $!\n"; + Mig::db_disconnect($dbh); + } else { + print "File not currently tracked: $file\n"; + } +} diff --git a/emig.d/bin/mig-reporter b/emig.d/bin/mig-reporter new file mode 100755 index 0000000..f87059e --- /dev/null +++ b/emig.d/bin/mig-reporter @@ -0,0 +1,507 @@ +#!/usr/bin/perl +# -*- coding: iso-8859-15 -*- +############################################################################### +=pod + +=item B --title "Report Title" + +Generates an asciidoc file in the git working directory that can be converted to +any appropriate format. The analyst and report parameters are required. + +Optional parameters are : + +-- analyst + +Default to "Equinox Open Library Initiative" + +--added_page_title and --added_page_file + +If one is used both must be. The added page file can be plain text or asciidoc. This +adds an extra arbitrary page of notes to the report. Mig assumes the page file is in the mig git directory. + +--tags + +This will define a set of tags to use, if not set it will default to Circs, +Holds, Actors, Bibs, Assets & Money. + +--debug on + +Gives more information about what is happening. Defaults to off. + +--reports_xml + +Allows you to override the default evergreen_staged_report.xml in the mig-xml folder. + +--captions on OR --captions off + +Adds the captions tag to asciidoc header to turn off captions in generated output. +Defaults to off. + +=back + +=cut + +############################################################################### + +use strict; +use warnings; + +use DBI; +use Data::Dumper; +use XML::LibXML; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use Switch; +use Getopt::Long; +use Cwd 'abs_path'; +use Cwd qw(getcwd); +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; +use open ':encoding(utf8)'; + +pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; +pod2usage(-verbose => 1) if ! $ARGV[1]; + +my $analyst = 'Equinox Open Library Initiative';; +my $report_title; +my $reports_xml = 'evergreen_staged_report.xml'; +my $tags; +my $added_page_title; +my $added_page_file; +my $captions = 'off'; +my $i = 0; +my $parser = XML::LibXML->new(); +my $lines_per_page = 42; +my $debug = 'off'; +my $workbook; +my $fh; + +my $ret = GetOptions( + 'analyst:s' => \$analyst, + 'report_title:s' => \$report_title, + 'title:s' => \$report_title, + 'reports_xml:s' => \$reports_xml, + 'tags:s' => \$tags, + 'added_page_title:s' => \$added_page_title, + 'added_page_file:s' => \$added_page_file, + 'captions:s' => \$captions, + 'debug:s' => \$debug +); + +if (!defined $tags) {$tags = 'circs.holds.actors.bibs.assets.money.notices'}; +if (!defined $report_title) { abort('--report_title or --title must be supplied'); } +if (!defined $analyst) { abort('--analyst must be supplied'); } + +my $mig_path = abs_path($0); +$mig_path =~ s|[^/]+$||; +$reports_xml = find_xml($reports_xml,$mig_path); +if (!defined $reports_xml) { abort("Can not find xml reports file."); } +my $dom = $parser->parse_file($reports_xml); + +if (defined $added_page_file or defined $added_page_title) { + abort('must specify --added_page_file and --added_page_title') unless defined $added_page_file and defined $added_page_title; + } +if (defined $added_page_file) { $added_page_file = $MIGGITDIR . $added_page_file; } + +my $dbh = Mig::db_connect(); +my $report_file = create_report_name($report_title); +$report_file = $MIGGITDIR . $report_file; + +open($fh, '>', $report_file) or abort("Could not open output file $report_file!"); +write_title_page($report_title,$fh,$analyst,$captions); +load_javascript($fh); + +if (defined $added_page_file and defined $added_page_title) { + print $fh "<<<\n"; + print $fh "== $added_page_title\n"; + print "$added_page_file\t$added_page_title\n"; + open(my $an,'<:encoding(UTF-8)', $added_page_file) or abort("Could not open $added_page_file!"); + while ( my $line = <$an> ) { + print $fh $line; + } + print $fh "\n"; + close $an; +} + +foreach my $func ($dom->findnodes('//function')) { + my $fdrop = $func->findvalue('./drop'); + my $fcreate = $func->findvalue('./create'); + my $fname = $func->findvalue('./name'); + my $sdrop = $dbh->prepare($fdrop); + my $screate = $dbh->prepare($fcreate); + print "dropping function $fname ... "; + $sdrop->execute(); + print "creating function $fname\n\n"; + $screate->execute(); +} + +foreach my $table ($dom->findnodes('//table')) { + my $tdrop = $table->findvalue('./drop'); + my $tcreate = $table->findvalue('./create'); + my $tname = $table->findvalue('./name'); + my $sdrop = $dbh->prepare($tdrop); + my $screate = $dbh->prepare($tcreate); + print "dropping table $tname ... "; + $sdrop->execute(); + print "creating table $tname\n\n"; + $screate->execute(); +} + +$tags = lc($tags); +my @report_tags = split(/\./,$tags); +foreach my $t (@report_tags) { + print "\n\n=========== Starting to process tag $t\n"; + print "==========================================\n\n"; + + my @asset_files; + foreach my $asset ($dom->findnodes('//asset')) { + if (index($asset->findvalue('./tag'),$t) != -1) { + push @asset_files, $asset->findvalue('./file'); + } + } + + foreach my $fname (@asset_files) { + my $asset_path = $mig_path . '../mig-asc/' . $fname; + open my $a, $asset_path or abort("Could not open $fname."); + while ( my $l = <$a> ) { + print $fh $l; + } + print $fh "<<<\n"; + } + + print_section_header(ucfirst($t),$fh); + my $linecount = $lines_per_page; + my $r; + + undef @asset_files; + foreach my $asset ($dom->findnodes('//asset')) { + if (index($asset->findvalue('./tag'),$t) != -1) { + push @asset_files, $asset->findvalue('./file'); + } + } + + my @report_names; + foreach my $report ($dom->findnodes('//report')) { + if (index($report->findvalue('./tag'),$t) != -1 and $report->findvalue('./iteration') eq '0') { + push @report_names, $report->findvalue('./name'); + } + } + + #only has one level of failover now but could change to array of hashes and loops + #but this keeps it simple and in practice I haven't needed more than two + + + foreach my $rname (@report_names) { + my %report0; + my %report1; + my $check_tables0; + my $check_tables1; + + if ($debug eq 'on') {print "\nchecking for $rname ... ";} + %report0 = find_report($dom,$t,$rname,'0',$debug); + $check_tables0 = check_table($report0{query},$MIGSCHEMA,$debug,$rname); + if ($check_tables0 == 1) { $r = print_query($fh,%report0); } else { + %report1 = find_report($dom,$t,$rname,'1',$debug); + if (defined $report1{query}) { + $check_tables1 = check_table($report1{query},$MIGSCHEMA,$debug,$rname); + if ($check_tables1 == 1) { $r = print_query($fh,%report1); } + } + } + } + +} + +print "\n"; + +foreach my $table ($dom->findnodes('//table')) { + my $tdrop = $table->findvalue('./drop'); + my $tname = $table->findvalue('./name'); + my $sdrop = $dbh->prepare($tdrop); + print "cleaning up table $tname ... \n"; + $sdrop->execute(); +} + +close $fh; + +############ end of main logic + +sub find_xml { + my $reports_xml = shift; + my $mig_path = shift; + + if ($reports_xml =~ m/\//) { return $reports_xml; } + + my $mig_test_file = $mig_path . '/../mig-xml/' . $reports_xml; + my $working_test_dir = getcwd(); + my $working_test_file = $working_test_dir . '/' . $reports_xml; + + if (-e $mig_test_file) { return $mig_test_file; } + if (-e $working_test_file) { return $working_test_file; } + + return undef; +} + +sub find_report { + my $dom = shift; + my $tag = shift; + my $name = shift; + my $iteration = shift; + my $debug = shift; + my %report; + + if ($debug eq 'on') {print "iteration $iteration ";} + foreach my $node ($dom->findnodes('//report')) { + if ($node->findvalue('./tag') =~ $tag and $node->findvalue('./iteration') eq $iteration and $node->findvalue('./name') eq $name) { + if ($debug eq 'on') {print "succeeded ... \n";} + %report = ( + name => $node->findvalue('./name'), + report_title => $node->findvalue('./report_title'), + query => $node->findvalue('./query'), + heading => $node->findvalue('./heading'), + tag => $node->findvalue('./tag'), + iteration => $node->findvalue('./iteration'), + note => $node->findvalue('./note'), + display => $node->findvalue('./display'), + chart_labels => $node->findvalue('./chart_labels'), + divwidth => $node->findvalue('./divwidth'), + divheight => $node->findvalue('./divheight'), + ); + return %report; + } + } + if ($debug eq 'on') {print "failed ... \n";} + return %report = ( + name => "eaten by grue" + ); +} + +sub print_section_header { + my $t = shift; + my $fh = shift; + + $t =~ s/_/ /g; + #$t =~ s/(\w+)/\u$1/g;; + print $fh "<<<\n"; + print $fh "== $t Reports\n"; + return; +} + +sub create_report_name { + my $rt = shift; + + my @abbr = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); + my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); + $year += 1900; + my $date = $year . '_' . $abbr[$mon] . '_' . $mday; + my $report_file; + $report_file = $rt . ' ' . $date . '.asciidoc'; + $report_file =~ s/ /_/g; + return $report_file; +} + +sub write_title_page { + my $rt = shift; + my $fh = shift; + my $a = shift; + my $captions = shift; + + my @abbr = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); + my $l = length($report_title); + my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); + $year += 1900; + print $fh "= $rt\n"; + print $fh "$mday $abbr[$mon] $year\n"; + print $fh "$a\n"; + #print $fh ":title-logo-image: image::eolilogosmall.png[pdfwidth=3in]\n"; + print $fh ":toc:\n"; + if ($captions eq 'on') { print $fh ":caption:\n"; } + print $fh "\n"; +} + +sub load_javascript { + my $fh = shift; + + print $fh "++++\n"; + print $fh "\n"; + print $fh "++++\n"; +} + +sub check_table { + my $query = shift; + my $MIGSCHEMA = shift; + my $debug = shift; + my $report_name = shift; + + if ($debug eq 'on') {print "$query\n";} + + my $i; + my $return_flag = 1; + my @qe = split(/ /,$query); + $i = @qe; + $i--; + my @tables; + while ($i > -1) { + if ($qe[$i] eq 'FROM' or $qe[$i] eq 'JOIN') { + my $q = $i + 1; + if ($qe[$q] ne '(SELECT') { + push @tables, $qe[$q]; + } + } + $i--; + } + if ($debug eq 'on') {print "checking tables ... ";} + + $i = 0; + foreach my $table (@tables) { + my $sql; + my $schema; + if (index($table,'.') != -1) { + $schema = (split /\./,$table)[0]; + $table = (split /\./,$table)[1]; + } + $table = clean_query_string($table); + if (defined $schema) { + $schema = clean_query_string($schema); + $sql = 'SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = \'' . $schema . '\' AND table_name = \'' . $table . '\');'; + } else { + $sql = 'SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = \'' . $MIGSCHEMA . '\' AND table_name = \'' . $table . '\');'; + } + my $sth = $dbh->prepare($sql); + $sth->execute(); + while (my @row = $sth->fetchrow_array) { + if ($row[0] eq '1') { + next; + } else { + $return_flag = 0; + if ($debug eq 'on') {print "detecting $table failed...\n";} + } + if ($row[0] eq '0') {$return_flag = 0;} + } + } + if ($return_flag == 1 and $debug eq 'on') {print "succeeded ...\n";} + if ($return_flag == 0) {print "! a table failed the find test for report $report_name\n\n";} + return $return_flag; +} + +sub clean_query_string { + my $str = shift; + + $str =~ s/(?!_)[[:punct:]]//g; #remove punct except underscores + $str =~ s/\n//g; + $str =~ s/\r//g; + return $str; +} + +sub print_query { + my $fh = shift; + my %report = @_; + + my $display = $report{display}; + my $height = $report{divheight}; + my $width = $report{divwidth}; + if (!defined $display or length $display == 0) { $display = 'table'; } + my $rname = $report{name}; + my $query = $report{query}; + my $title = $report{report_title}; + my $sth = $dbh->prepare($query); + $sth->execute(); + + if ($height) { $height = $height . 'px'; } + if ($width) { $width = $width . 'px'; } + my $header_flag = 0; + + #print asciidoc + if ($display eq 'table') { + while (my @row = $sth->fetchrow_array) { + if ($header_flag == 0) { + print $fh "\n.*$report{report_title}*\n"; + print $fh "|===\n"; + my @h = split(/\./,$report{heading}); + my $h_length = @h; + my $h_count = 1; + while ($h_count <= $h_length) { + print $fh "|*$h[$h_count-1]* "; + $h_count++; + } + print $fh "\n"; + $header_flag = 1; + } + my $row_length = @row; + my $r = 1; + while ($r <= $row_length) { + if (! defined $row[$r-1] ) { + $row[$r-1] = 'none'; + } + print $fh "|$row[$r-1] "; + $r++; + } + print $fh "\n"; + } + if ($header_flag == 1) { + print $fh "|===\n\n"; + print $fh $report{note}; + print $fh "\n\n"; + } + } + + #print chart + if ($display eq 'pie_chart' or $display eq 'donut_chart') { + my @h = split(/\./,$report{heading}); + my @l = split(/\./,$report{chart_labels}); + + print $fh "++++\n"; + if (defined $height and defined $width) { print $fh "
\n"; } + else { print $fh "
\n"; } + print $fh "\n"; + print $fh "++++\n"; + } + + print "successfully wrote output for $report{name}.\n\n"; +} + +sub give_column { + my $i = shift; + my $col = ""; + + do { + $col .= chr( ( $i % 26 ) + ord('A') ); + $i = int( $i / 26 ) - 1; + } while ( $i >= 0 ); + + return scalar reverse $col; +} + +sub abort { + my $msg = shift; + print STDERR "$0: $msg", "\n"; + exit 1; +} + + diff --git a/emig.d/bin/mig-skip-clean b/emig.d/bin/mig-skip-clean new file mode 100755 index 0000000..5b60cb1 --- /dev/null +++ b/emig.d/bin/mig-skip-clean @@ -0,0 +1,100 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig-skip-clean + +Allows you to either use an existing file named .utf8.clean or a +named [cleaned file] as if it were the one created by mig-clean + +Note that the clean file, however specified, should contain headers. The +remaining tools in the chain will expect this. + +=head1 SYNOPSIS + +B [cleaned file] + +=cut + +############################################################################### + +use strict; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use DBI; +use Cwd 'abs_path'; +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +pod2usage(-verbose => 2) if ! ($ARGV[0]||$ARGV[1]) || $ARGV[0] eq '--help'; + +Mig::die_if_no_env_migschema(); +Mig::die_if_mig_tracking_table_does_not_exist(); + +my $file = abs_path($ARGV[0]); +my $clean_file; +if ($ARGV[1]) { + $clean_file = abs_path($ARGV[1]); +} else { + $clean_file = $file; +} +if ($clean_file && ! $clean_file =~ /^$MIGBASEWORKDIR/) { + die "File falls outside of MIGWORKDIR ($MIGWORKDIR): $clean_file\n"; +} + +if ($file =~ /^$MIGBASEWORKDIR/) { + skip_clean($file,$clean_file); +} else { + die "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; +} + +exit 0; + +############################################################################### + +sub skip_clean { + my $file = shift; + my $clean_file = shift; + + my $tracked_file_id = Mig::check_for_tracked_file($file); + if ($tracked_file_id) { + my $data = Mig::status_this_file($file); + + if (! $data->{'utf8_filename'}) { + die "mig-iconv or mig-skip-iconv needed for UTF8 version of file: $file\n"; + } + + my $utf8_file = $data->{'utf8_filename'}; + if (! -e $utf8_file) { + die "missing file: $utf8_file\n"; + } + + print "skipping cleaning of tracked file: $file\n"; + + my $dbh = Mig::db_connect(); + if (! $clean_file) { + $clean_file = $utf8_file . '.clean'; + } + if (! -e $clean_file) { + die "clean file does not exist: $clean_file\n"; + } + + my $rv = $dbh->do(" + UPDATE $MIGSCHEMA.tracked_file + SET clean_filename = " . $dbh->quote($clean_file) . " + WHERE base_filename = " . $dbh->quote($file) . " + ; + ") || die "Error inserting into table $MIGSCHEMA.tracked_file: $!\n"; + Mig::db_disconnect($dbh); + } else { + die "File not currently tracked: $file\n"; + } +} diff --git a/emig.d/bin/mig-skip-iconv b/emig.d/bin/mig-skip-iconv new file mode 100755 index 0000000..fec558d --- /dev/null +++ b/emig.d/bin/mig-skip-iconv @@ -0,0 +1,87 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig-skip-iconv + +Allows you to either use an existing file named .utf8 or a named +[utf8 file] as if it were the one created by mig-iconv + +=head1 SYNOPSIS + +B [utf8 file] + +=cut + +############################################################################### + +use strict; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use DBI; +use Cwd 'abs_path'; +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +pod2usage(-verbose => 2) if ! ($ARGV[0]||$ARGV[1]) || $ARGV[0] eq '--help'; + +Mig::die_if_no_env_migschema(); +Mig::die_if_mig_tracking_table_does_not_exist(); + +my $file = abs_path($ARGV[0]); +my $utf8_file; +if ($ARGV[1]) { + $utf8_file = abs_path($ARGV[1]); +} else { + $utf8_file = $file; +} +if ($utf8_file && ! $utf8_file =~ /^$MIGBASEWORKDIR/) { + die "File falls outside of MIGWORKDIR ($MIGWORKDIR): $utf8_file\n"; +} + +if ($file =~ /^$MIGBASEWORKDIR/) { + skip_iconv($file,$utf8_file); +} else { + print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; +} + +exit 0; + +############################################################################### + +sub skip_iconv { + my $file = shift; + my $utf8_file = shift; + + my $tracked_file_id = Mig::check_for_tracked_file($file); + if ($tracked_file_id) { + my $data = Mig::status_this_file($file); + print "skipping the iconv'ing of tracked file: $file\n"; + + my $dbh = Mig::db_connect(); + if (! $utf8_file) { + $utf8_file = $file . '.utf8'; + } + if (! -e $utf8_file) { + die "utf8 file does not exist: $utf8_file\n"; + } + + my $rv = $dbh->do(" + UPDATE $MIGSCHEMA.tracked_file + SET utf8_filename = " . $dbh->quote($utf8_file) . " + WHERE base_filename = " . $dbh->quote($file) . " + ; + ") || die "Error inserting into table $MIGSCHEMA.tracked_file: $!\n"; + Mig::db_disconnect($dbh); + } else { + print "File not currently tracked: $file\n"; + } +} diff --git a/emig.d/bin/mig-sql b/emig.d/bin/mig-sql new file mode 100755 index 0000000..3909ab3 --- /dev/null +++ b/emig.d/bin/mig-sql @@ -0,0 +1,48 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig-sql + +A wrapper around the psql command. At some point the plan is to shove mig-tracked variables into psql sessions. + +=head1 SYNOPSIS + +B [arguments...] + +=cut + +############################################################################### + +use strict; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR + BIBSTART +); +use Pod::Usage; +use DBI; +use Cwd 'abs_path'; +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +my @MYARGV = ( + 'psql' + ,'-vmigschema=' . $MIGSCHEMA + ,'-vmigschema_text=\'' . $MIGSCHEMA . '\'' + ,'-F ' . "\t" +); +if (defined $BIBSTART) { + push @MYARGV, '-vbibstart=' . $BIBSTART; +} +# TODO inject more mig-tracked variables here + +system(@MYARGV, @ARGV); + +exit 0; + diff --git a/emig.d/bin/mig-stage b/emig.d/bin/mig-stage new file mode 100755 index 0000000..6e7faf5 --- /dev/null +++ b/emig.d/bin/mig-stage @@ -0,0 +1,128 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig-stage + +Load the SQL-converted version of the specified file into the migration schema. + +Extra arguments are passed to the underlying call to psql + +If the tracked file was previously staged with a different table, drop that +table. + + +=head1 SYNOPSIS + +B [other arguments...] + +=cut + +############################################################################### + +use strict; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use DBI; +use Cwd 'abs_path'; +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; + +Mig::die_if_no_env_migschema(); +Mig::die_if_mig_tracking_table_does_not_exist(); + +my $file = abs_path($ARGV[0]); +if ($file =~ /^$MIGBASEWORKDIR/) { + stage_csv(@ARGV); +} else { + print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; +} + +exit 0; + +############################################################################### + +sub stage_csv { + my $file = abs_path(shift); + my @args = @_; + + my $tracked_file_id = Mig::check_for_tracked_file($file); + if ($tracked_file_id) { + my $data = Mig::status_this_file($file); + + if (! $data->{'utf8_filename'}) { + die "mig-iconv or mig-skip-iconv needed for UTF8 version of file: $file\n"; + } + + if (! $data->{'clean_filename'}) { + die "mig-clean or mig-skip-clean needed for .clean version of file: $file\n"; + } + + if (! $data->{'stage_sql_filename'}) { + die "mig-convert needed for .stage.sql version of file: $file\n"; + } + + my $stage_sql_filename = $data->{'stage_sql_filename'}; + if (! -e $stage_sql_filename) { + die "missing file: $stage_sql_filename\n"; + } + + my $schema_table = `grep 'CREATE UNLOGGED TABLE' $stage_sql_filename | cut -f4 -d\\ | head -1`; + chomp $schema_table; + my ($schema,$table) = split /\./, $schema_table; + + if (defined $data->{'staged_table'} && $data->{'staged_table'} ne $table) { + my $dbh2 = Mig::db_connect(); + print "dropping previously staged table: $MIGSCHEMA.$data->{staged_table}\n"; + my $rv2 = $dbh2->do(" + DROP TABLE $MIGSCHEMA.$data->{staged_table}; + ") || die "Error dropping table $data->{staged_table}: $!\n"; + print "changing references to old tables\n"; + my $rv3 = $dbh2->do(" + UPDATE $MIGSCHEMA.tracked_column + SET staged_table = " . $dbh2->quote($table) . " + WHERE staged_table = " . $dbh2->quote($data->{staged_table}) . " + ") || die "Error changing references to $data->{staged_table}: $!\n"; + my $rv4 = $dbh2->do(" + UPDATE $MIGSCHEMA.tracked_column + SET target_table = " . $dbh2->quote($table) . " + WHERE target_table = " . $dbh2->quote($data->{staged_table}) . " + ") || die "Error changing references to $data->{staged_table}: $!\n"; + Mig::db_disconnect($dbh2); + } + + print "running staging SQL: $stage_sql_filename\n"; + + system('psql', @args, '-f', $stage_sql_filename); + + if ($schema ne $MIGSCHEMA) { + die "Schema mismatch: env => $MIGSCHEMA sql => $schema\n"; + } + if (! Mig::check_db_migschema_for_specific_table($table)) { + die "Missing staged table: $schema_table\n"; + } else { + print "table staged: $schema_table\n"; + } + + my $dbh = Mig::db_connect(); + my $rv = $dbh->do(" + UPDATE $MIGSCHEMA.tracked_file + SET staged_table = " . $dbh->quote($table) . " + WHERE base_filename = " . $dbh->quote($file) . " + ; + ") || die "Error updating table $MIGSCHEMA.tracked_file: $!\n"; + Mig::db_disconnect($dbh); + } else { + print "File not currently tracked: $file\n"; + } +} diff --git a/emig.d/bin/mig-stagebibs b/emig.d/bin/mig-stagebibs new file mode 100755 index 0000000..f045a13 --- /dev/null +++ b/emig.d/bin/mig-stagebibs @@ -0,0 +1,244 @@ +#!/usr/bin/perl + +############################################################################### +=pod + +=item B --file foo.mrc.xml + +Takes a load of bibs from a UTF-8 MARC XML file and loads them into mig staging +table of bibio_record_entry_legacy. This is done with no checking of file validity +so records should be checked before hand and cleaned. + +Takes three optional arguments: + + +--source + +Takes a numeric value and set the x_source of the bib record to that. Defaults to +2 which is local system. + +--x_source + +Sets an x_source value on the staging table to the one supplied instead of the +default of none. + +--auth foo.mrc.xml + +This will load bibs into the authority_record_entry_legacy. + +--serial foo.mrc.xml + +This will load bibs into the serial_record_entry_legacy. + +=back + +=cut + +############################################################################### + +use strict; +use warnings; + +use DBI; +#binmode STDIN, ':bytes'; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Data::Dumper; +use Pod::Usage; +use Switch; +use Cwd 'abs_path'; +use FindBin; +use UNIVERSAL; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; +use Getopt::Long; + +pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; +pod2usage(-verbose => 1) if ! $ARGV[1]; + +my $append = 0; +my $base_table; +my $stage_table; +my $marc_column = 'marc'; +my $auth = ''; +my $serial = ''; +my $source = 2; +my $x_source = 'default'; +my $no_source_or_last_xact_id; +my $dbh = Mig::db_connect(); +my $infile; +my $i = 0; +my $batch; +binmode STDIN, ':utf8'; + +my $ret = GetOptions( + 'file:s' => \$infile, + 'serial:s' => \$serial, + 'auth:s' => \$auth, + 'x_source:s' => \$x_source, + 'source:i' => \$source, + 'base_table:s' => \$base_table, + 'stage_table:s' => \$stage_table, + 'marc_column:s' => \$marc_column, + 'no_source_or_last_xact_id' => \$no_source_or_last_xact_id +); + +#if in file is empty then fail +#if auth and serial = 1 fail + +if ($serial == 1) { + $base_table = 'm_authority_record_entry'; +} + +if ($auth == 1) { + $base_table = 'm_serial_record_entry'; +} + +if ($auth == 1 and $serial == 1) { abort('are you sure you want to load these as authorities and serials?'); } + +if (!$base_table) { + $base_table = 'm_biblio_record_entry'; +} + +if (!$stage_table) { + $stage_table = $base_table . '_legacy'; +} + +my $bre_test = check_for_table($dbh,$base_table); +my $bre_legacy_test = check_for_table($dbh,$stage_table); +if ($bre_test == 0 and $bre_legacy_test == 0 ) { create_bre($dbh); create_child_bre($dbh); } +if ($bre_test == 1 and $bre_legacy_test == 0 ) { create_child_bre($dbh); } + +my $xmig_test = check_for_column($dbh,$stage_table,'x_migrate'); +if ($xmig_test == 0) { add_column($dbh,$stage_table,'x_migrate','BOOLEAN DEFAULT TRUE'); } + +my $xx_source_test = check_for_column($dbh,$stage_table,'x_source'); +if ($xx_source_test == 0) { add_column($dbh,$stage_table,'x_source','TEXT'); } + +my $xmarc_test = check_for_column($dbh,$stage_table,$marc_column); +if ($xmarc_test == 0) { add_column($dbh,$stage_table,$marc_column,'TEXT'); } + + +#flatten out MARC XML FILE +open my $xml, "<:encoding(utf8)", $infile or abort('could not open MARC XML file'); +$i = 0; +my $record = ''; +while(my $line = <$xml>) { + if ($line =~ /^<\/?collection/) { next; } + chomp $line; + $record = $record . $line; + if ($line =~ /<\/record>$/) { + stage_record($dbh,$record,$x_source,$source); + $record = ''; + $i++; + if (($i % 100) == 0) { report_progress('Records stage', $i); } + } +} +close $xml; + +if ($i == 0) { print "No XML was processed, are you sure this is an XML file?\n"; } +print "Finis.\n"; + +# beyond here be functions + +sub create_bre { + my $dbh = shift; + $dbh->do("DO \$\$ + DECLARE + t BOOLEAN; + BEGIN + SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_schema = '$MIGSCHEMA' AND table_name = '$base_table') INTO t; + IF t = FALSE THEN + PERFORM migration_tools.build_specific_base_staging_table ('$MIGSCHEMA',REGEXP_REPLACE('$base_table','_','.')); + END IF; + END \$\$;"); + + return (); +} + +sub create_child_bre { + my $dbh = shift; + $dbh->do("DO \$\$ + BEGIN + CREATE TABLE $MIGSCHEMA.$stage_table (x_migrate BOOLEAN DEFAULT TRUE, x_source TEXT) INHERITS ($MIGSCHEMA.$base_table); + END \$\$;"); + + return (); +} + +sub abort { + my $msg = shift; + print STDERR "$0: $msg", "\n"; + exit 1; +} + +sub report_progress { + my ($msg, $counter) = @_; + if (defined $counter) { + print STDERR "$msg: $counter\n"; + } else { + print STDERR "$msg\n"; + } +} + +sub stage_record { + my $dbh = shift; + my $record = shift; + my $x_source = shift; + my $source = shift; + my $last_xact = "'$MIGSCHEMA'"; + $record = '$_$' . $record . '$_$'; + my $sql; + if ($no_source_or_last_xact_id) { + $sql = "INSERT INTO $MIGSCHEMA.$stage_table ($marc_column) VALUES ($record);"; + } else { + if ($x_source eq 'default') { + $sql = "INSERT INTO $MIGSCHEMA.$stage_table (last_xact_id,$marc_column,source) VALUES ($last_xact,$record,$source);"; + } else { + $sql = "INSERT INTO $MIGSCHEMA.$stage_table (last_xact_id,$marc_column,x_source,source) VALUES ($last_xact,$record,'$x_source',$source);"; + } + } + my $sth = $dbh->prepare($sql); + $sth->execute(); + return; +} + +sub check_for_table { + my $dbh = shift; + my $table = shift; + my $sql = "SELECT 1 FROM information_schema.tables WHERE table_schema = '$MIGSCHEMA' AND table_name = '$table';"; + my $sth = $dbh->prepare($sql); + $sth->execute(); + my @sqlresult = $sth->fetchrow_array; + my $r = pop @sqlresult; + if ($r) { return $r; } else { return 0; } +} + +sub check_for_column { + my $dbh = shift; + my $table = shift; + my $column = shift; + my $sql = "SELECT 1 FROM information_schema.columns WHERE table_schema = '$MIGSCHEMA' AND table_name = '$table' AND column_name = '$column';"; + my $sth = $dbh->prepare($sql); + $sth->execute(); + my @sqlresult = $sth->fetchrow_array; + my $r = pop @sqlresult; + if ($r) { return $r; } else { return 0; } +} + +sub add_column { + my $dbh = shift; + my $table = shift; + my $column = shift; + my $column_type = shift; + my $sql = "ALTER TABLE $MIGSCHEMA.$table ADD COLUMN $column $column_type;"; + my $sth = $dbh->prepare($sql); + $sth->execute(); + my @sqlresult = $sth->fetchrow_array; + my $r = check_for_column($dbh,$table,$column); + if ($r == 0) { abort('failed to create column'); } else { return $r; } +} + diff --git a/emig.d/bin/mig-status b/emig.d/bin/mig-status new file mode 100755 index 0000000..0d78b18 --- /dev/null +++ b/emig.d/bin/mig-status @@ -0,0 +1,87 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig-status - This will show tracking information for either the specified files +or all tracked files if no argument is given. + +You'll need to invoke B prior to using commands like B + +=head1 SYNOPSIS + +B [file] [...] + +=cut + +############################################################################### + +use strict; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use DBI; +use Cwd 'abs_path'; +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +pod2usage(-verbose => 2) if scalar(@ARGV) > 0 && $ARGV[0] eq '--help'; + +Mig::die_if_no_env_migschema(); +Mig::die_if_mig_tracking_table_does_not_exist(); + +my @files = @ARGV; +if (scalar(@files) == 0) { + @files = (); + my $dbh = Mig::db_connect(); + my $sth = $dbh->prepare(" + SELECT base_filename + FROM $MIGSCHEMA.tracked_file + ORDER BY 1;" + ); + my $rv = $sth->execute() + || die "Error retrieving data from table (tracked_file): $!"; + my $rows = $sth->fetchall_arrayref; + for my $row ( @$rows ) { + push @files, $row->[0] + } + $sth->finish; + Mig::db_disconnect($dbh); +} + +foreach my $arg (sort @files) { + my $file = abs_path($arg); + my $data = Mig::status_this_file($file); + print "=-=-=\n"; + foreach my $key ( + 'base_filename' + ,'has_headers' + ,'headers_file' + ,'utf8_filename' + ,'clean_filename' + ,'parent_table' + ,'stage_sql_filename' + ,'staged_table' + ,'map_sql_filename' + ,'prod_sql_filename' + ) { + printf "%-20s:\t", $key; + print $data->{$key} ? $data->{$key} : ""; + if ($key =~ /filename$/ && $data->{$key} && ! -e $data->{$key}) { + print " (FILE MISSING)"; + } + print "\n"; + } +} + +exit 0; + +############################################################################### + + diff --git a/emig.d/bin/mig-unlink b/emig.d/bin/mig-unlink new file mode 100755 index 0000000..5bf34e4 --- /dev/null +++ b/emig.d/bin/mig-unlink @@ -0,0 +1,71 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig-unlink + +Clear any association between the specified file and a parent table within the +migration schema. + +=head1 SYNOPSIS + +B + +=cut + +############################################################################### + +use strict; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use DBI; +use Cwd 'abs_path'; +use FindBin; +my $mig_bin = "$FindBin::Bin/"; +use lib "$FindBin::Bin/"; +use Mig; + +pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; + +Mig::die_if_no_env_migschema(); +Mig::die_if_mig_tracking_table_does_not_exist(); + +my $file = abs_path($ARGV[0]); +if ($file =~ /^$MIGBASEWORKDIR/) { + unlink_table(@ARGV); +} else { + print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; +} + +exit 0; + +############################################################################### + +sub unlink_table { + my $file = abs_path(shift); + + my $tracked_file_id = Mig::check_for_tracked_file($file); + if ($tracked_file_id) { + my $data = Mig::status_this_file($file); + my $table = $data->{'parent_table'} || ''; + + print "unlinking table ($table) from file: $file\n"; + + my $dbh = Mig::db_connect(); + my $rv = $dbh->do(" + UPDATE $MIGSCHEMA.tracked_file + SET parent_table = '' + WHERE base_filename = " . $dbh->quote($file) . " + ; + ") || die "Error updating table $MIGSCHEMA.tracked_file: $!\n"; + Mig::db_disconnect($dbh); + } else { + print "File not currently tracked: $file\n"; + } +} diff --git a/emig.d/sql/init/000-tracked_column.sql b/emig.d/sql/init/000-tracked_column.sql new file mode 100644 index 0000000..3f73454 --- /dev/null +++ b/emig.d/sql/init/000-tracked_column.sql @@ -0,0 +1,15 @@ +DROP TABLE IF EXISTS tracked_column; +CREATE TABLE tracked_column ( + id serial + ,base_filename TEXT + ,parent_table TEXT + ,staged_table TEXT + ,staged_column TEXT + ,comment TEXT + ,target_table TEXT + ,target_column TEXT + ,transform TEXT + ,summarize BOOLEAN +); +CREATE INDEX ON tracked_column(target_table,target_column); +CREATE INDEX ON tracked_column(base_filename); diff --git a/emig.d/sql/init/000-tracked_file.sql b/emig.d/sql/init/000-tracked_file.sql new file mode 100644 index 0000000..d80b12d --- /dev/null +++ b/emig.d/sql/init/000-tracked_file.sql @@ -0,0 +1,14 @@ +DROP TABLE IF EXISTS tracked_file; +CREATE TABLE tracked_file ( + id serial + ,base_filename TEXT UNIQUE + ,has_headers BOOLEAN + ,headers_file TEXT + ,utf8_filename TEXT + ,clean_filename TEXT + ,stage_sql_filename TEXT + ,map_sql_filename TEXT + ,prod_sql_filename TEXT + ,parent_table TEXT + ,staged_table TEXT +); diff --git a/emig.d/sql/init/010_gsheet_tracking.sql b/emig.d/sql/init/010_gsheet_tracking.sql new file mode 100644 index 0000000..ff2e4dc --- /dev/null +++ b/emig.d/sql/init/010_gsheet_tracking.sql @@ -0,0 +1,15 @@ +CREATE TABLE gsheet_tracked_table ( + id SERIAL PRIMARY KEY + ,sheet_name TEXT NOT NULL + ,table_name TEXT NOT NULL + ,tab_name TEXT + ,created TIMESTAMP + ,last_pulled TIMESTAMP + ,last_pushed TIMESTAMP +); + +CREATE TABLE gsheet_tracked_column ( + id SERIAL + ,table_id INTEGER REFERENCES gsheet_tracked_table (id) + ,column_name TEXT NOT NULL +); diff --git a/emig.d/sql/init/011_reporter_tables.sql b/emig.d/sql/init/011_reporter_tables.sql new file mode 100644 index 0000000..5a0bc77 --- /dev/null +++ b/emig.d/sql/init/011_reporter_tables.sql @@ -0,0 +1,5 @@ +CREATE TABLE report (id SERIAL, create_date TIMESTAMPTZ, name TEXT); +CREATE TABLE reporter_columns (id SERIAL, report INTEGER, header TEXT, ordinal_position INTEGER); +CREATE TABLE reporter_rows (id SERIAL, report INTEGER, row INTEGER, ordinal_position INTEGER); + + diff --git a/emig.d/sql/init/020_common_tables.sql b/emig.d/sql/init/020_common_tables.sql new file mode 100644 index 0000000..99b3ce4 --- /dev/null +++ b/emig.d/sql/init/020_common_tables.sql @@ -0,0 +1,354 @@ +CREATE TABLE map_hold_policies ( + l_user_home_ou TEXT + ,l_request_ou TEXT + ,l_item_owning_ou TEXT + ,l_item_circ_ou TEXT + ,l_requestor_grp TEXT + ,l_circ_modifier TEXT + ,l_active TEXT + ,l_holdable TEXT + ,l_max_holds TEXT + ,l_includes_frozen_holds TEXT + ,l_distance_is_from_owner TEXT + ,l_transit_range TEXT + ,l_usr_grp TEXT + ,x_user_home_ou INTEGER + ,x_request_ou INTEGER + ,x_item_owning_ou INTEGER + ,x_item_circ_ou INTEGER + ,x_requestor_grp INTEGER + ,x_circ_modifier TEXT + ,x_active BOOLEAN + ,x_holdable BOOLEAN + ,x_max_holds INTEGER + ,x_includes_frozen_holds BOOLEAN + ,x_distance_is_from_owner BOOLEAN + ,x_transit_range INTEGER + ,x_usr_grp INTEGER + ,x_migrate BOOLEAN DEFAULT TRUE +); + +INSERT INTO gsheet_tracked_table + (sheet_name,table_name,tab_name,created) +VALUES + ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_hold_policies','Hold Policies',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_user_home_ou') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_request_ou') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_item_owning_ou') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_item_circ_ou') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_requestor_grp') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_circ_modifier') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_active') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_holdable') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_max_holds') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_includes_frozen_holds') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_distance_is_from_owner') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_transit_range') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_usr_grp') +; + + +CREATE TABLE map_circ_policies ( + l_org_unit TEXT + ,l_user_group TEXT + ,l_copy_owning_lib TEXT + ,l_user_home_lib TEXT + ,l_circ_mod TEXT + ,l_copy_location TEXT + ,l_circulate TEXT + ,l_circ_limit_set TEXT + ,l_duration_rule TEXT + ,l_fine_rule TEXT + ,l_grace_override TEXT + ,l_max_fine TEXT + ,l_notes TEXT + ,x_org_unit INTEGER + ,x_user_group INTEGER + ,x_copy_owning_lib INTEGER + ,x_user_home_lib INTEGER + ,x_circ_mod TEXT + ,x_copy_location INTEGER + ,x_circulate BOOLEAN + ,x_circ_limit_set INTEGER + ,x_duration_rule INTEGER + ,x_fine_rule INTEGER + ,x_grace_override INTERVAL + ,x_max_fine INTEGER + ,x_circ_limit_quantity INTEGER + ,x_circ_limit_parts INTEGER + ,x_circ_limit_ou_name TEXT + ,x_circ_limit_ou_id INTEGER + ,x_circ_limit_id INTEGER + ,x_migrate BOOLEAN DEFAULT FALSE +); + +INSERT INTO gsheet_tracked_table + (sheet_name,table_name,tab_name,created) +VALUES + ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_circ_policies','Circ Policies',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_org_unit') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_user_group') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_copy_owning') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_user_home_lib') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_circ_mod') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_copy_location') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_circulate') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_circ_limit_set') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_duration_rule') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_fine_rule') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_grace_override') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_max_fine') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_notes') +; + +CREATE TABLE map_circ_limit_sets ( + l_owning_lib TEXT + ,l_name TEXT + ,l_items_out TEXT + ,l_depth TEXT + ,l_global TEXT + ,l_description TEXT + ,l_circ_mod TEXT + ,l_copy_loc TEXT + ,x_owning_lib INTEGER + ,x_name TEXT + ,x_items_out INTEGER + ,x_global BOOLEAN + ,x_depth INTEGER + ,x_description TEXT +); + +INSERT INTO gsheet_tracked_table + (sheet_name,table_name,tab_name,created) +VALUES + ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_circ_limit_sets','Circ Limit Sets',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Limit Sets'),'l_owning_lib') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Limit Sets'),'l_name') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Limit Sets'),'l_items_out') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Limit Sets'),'l_depth') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Limit Sets'),'l_global') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Limit Sets'),'l_description') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Limit Sets'),'l_circ_mod') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Limit Sets'),'l_copy_loc') +; + +CREATE TABLE map_create_shelving_location ( + l_id SERIAL + ,l_owning_lib TEXT + ,l_copy_location TEXT + ,l_opac_visible TEXT + ,l_checkin_alert TEXT + ,l_holdable TEXT + ,l_circulate TEXT + ,l_note TEXT + ,x_migrate BOOLEAN NOT NULL DEFAULT TRUE + ,x_shelf INTEGER +) INHERITS (m_asset_copy_location); + +INSERT INTO gsheet_tracked_table + (sheet_name,table_name,tab_name,created) +VALUES + ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_create_shelving_location','New Copy Locations',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Copy Locations'),'l_owning_lib') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Copy Locations'),'l_copy_location') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Copy Locations'),'l_opac_visible') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Copy Locations'),'l_checkin_alert') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Copy Locations'),'l_holdable') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Copy Locations'),'l_circulate') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Copy Locations'),'l_note') +; + +CREATE TABLE map_create_account ( + l_id SERIAL + ,l_barcode TEXT + ,l_usrname TEXT + ,l_first_name TEXT + ,l_family_name TEXT + ,l_email TEXT + ,l_password TEXT + ,l_home_library TEXT + ,l_profile1 TEXT + ,l_profile2 TEXT + ,l_profile3 TEXT + ,l_work_ou1 TEXT + ,l_work_ou2 TEXT + ,l_work_ou3 TEXT + ,l_work_ou4 TEXT + ,l_work_ou5 TEXT + ,l_work_ou6 TEXT + ,l_work_ou7 TEXT + ,l_work_ou8 TEXT + ,l_work_ou9 TEXT + ,l_work_ou10 TEXT + ,l_work_ou11 TEXT + ,l_work_ou12 TEXT + ,l_work_ou13 TEXT + ,l_note TEXT + ,x_migrate BOOLEAN NOT NULL DEFAULT TRUE +); + +INSERT INTO gsheet_tracked_table + (sheet_name,table_name,tab_name,created) +VALUES + ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_create_account','New Accounts',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_usrname') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_barcode') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_first_name') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_family_name') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_email') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_password') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_home_library') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_profile1') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_profile2') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_profile3') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou1') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou2') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou3') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou4') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou5') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou6') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou7') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou8') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou9') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou10') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou11') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou12') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou13') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_note') +; + +CREATE TABLE map_threshold ( + id SERIAL + ,library TEXT + ,profile TEXT + ,checkout_threshold TEXT + ,fine_threshold TEXT + ,overdue_threshold TEXT + ,note TEXT +); + +INSERT INTO gsheet_tracked_table + (sheet_name,table_name,tab_name,created) +VALUES + ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_threshold','Patron Thresholds',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Thresholds'),'profile') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Thresholds'),'library') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Thresholds'),'checkout_threshold') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Thresholds'),'fine_threshold') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Thresholds'),'overdue_threshold') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Thresholds'),'note') +; + + +CREATE TABLE map_misc ( + id SERIAL + ,count TEXT + ,option TEXT + ,choice TEXT + ,value TEXT + ,note TEXT +); + +INSERT INTO gsheet_tracked_table + (sheet_name,table_name,tab_name,created) +VALUES + ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_misc','Miscellaneous Options',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Miscellaneous Options'),'count') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Miscellaneous Options'),'option') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Miscellaneous Options'),'Choice') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Miscellaneous Options'),'value') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Miscellaneous Options'),'note') +; + +CREATE TABLE map_org_setting ( + l_id SERIAL + ,l_name TEXT + ,l_label TEXT + ,l_entry_type TEXT + ,l_org_unit TEXT + ,l_value TEXT + ,l_note TEXT +); + +INSERT INTO gsheet_tracked_table + (sheet_name,table_name,tab_name,created) +VALUES + ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_org_setting','Org Settings',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Org Settings'),'l_name') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Org Settings'),'l_label') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Org Settings'),'l_entry_type') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Org Settings'),'l_org_unit') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Org Settings'),'l_value') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Org Settings'),'l_note') +; + + +CREATE TABLE map_bib_manipulations ( + id SERIAL + ,action TEXT + ,field TEXT + ,subfield TEXT + ,matching_value TEXT + ,target_value TEXT + ,note TEXT +); + +INSERT INTO gsheet_tracked_table + (sheet_name,table_name,tab_name,created) +VALUES + ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_bib_manipulations','Bib Records',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Bib Records'),'name') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Bib Records'),'action') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Bib Records'),'field') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Bib Records'),'subfield') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Bib Records'),'matching_value') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Bib Records'),'target_value') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Bib Records'),'note') +; + + diff --git a/emig.d/sql/system/tlc/030_tlc_mapping_tables.sql b/emig.d/sql/system/tlc/030_tlc_mapping_tables.sql new file mode 100644 index 0000000..210cc73 --- /dev/null +++ b/emig.d/sql/system/tlc/030_tlc_mapping_tables.sql @@ -0,0 +1,375 @@ +CREATE TABLE map_tlc_branches ( + id SERIAL + ,tlc_branch_id TEXT + ,tlc_name TEXT + ,org_unit TEXT + ,mig_patrons TEXT + ,mig_items TEXT + ,note TEXT + ,x_org_id INTEGER + +); + +INSERT INTO gsheet_tracked_table + (table_name,tab_name,created) +VALUES + ('map_tlc_branches','Branches Present in Extract',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Branches Present in Extract'),'x_count') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Branches Present in Extract'),'tlc_branch_id') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Branches Present in Extract'),'tlc_name') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Branches Present in Extract'),'org_unit') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Branches Present in Extract'),'note') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Branches Present in Extract'),'mig_patrons') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Branches Present in Extract'),'mig_items') +; + +-- ############################################ + +CREATE TABLE map_tlc_perm_group ( + id SERIAL + ,x_count TEXT + ,legacy_group TEXT + ,target_group TEXT + ,stat_cat_name TEXT + ,stat_cat_entry TEXT + ,dnm TEXT + ,note TEXT +); + +INSERT INTO gsheet_tracked_table + (table_name,tab_name,created) +VALUES + ('map_tlc_perm_group','Patron Type',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Type'),'x_count') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Type'),'legacy_group') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Type'),'target_group') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Type'),'stat_cat_name') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Type'),'stat_cat_entry') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Type'),'dmn') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Type'),'note') +; + +-- ############################################ + +CREATE TABLE map_tlc_patron_expire ( + id SERIAL + ,x_count TEXT + ,expire_year TEXT + ,set_to_date TEXT + ,dnm TEXT + ,note TEXT +); + +INSERT INTO gsheet_tracked_table + (table_name,tab_name,created) +VALUES + ('map_tlc_patron_expire','Patrons by Expiration Date',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Expiration Date'),'x_count') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Expiration Date'),'expire_year') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Expiration Date'),'set_to_date') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Expiration Date'),'dnm') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Expiration Date'),'note') +; + +-- ############################################ + +CREATE TABLE map_tlc_patron_last_active ( + id SERIAL + ,x_count TEXT + ,last_active TEXT + ,inactive TEXT + ,dnm TEXT + ,note TEXT +); + +INSERT INTO gsheet_tracked_table + (table_name,tab_name,created) +VALUES + ('map_tlc_patron_last_active','Patrons by Last Active Date',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Last Active Date'),'x_count') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Last Active Date'),'last_active') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Last Active Date'),'inactive') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Last Active Date'),'dnm') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Last Active Date'),'note') +; + +-- ############################################ + +CREATE TABLE map_tlc_billing_type ( + id SERIAL + ,x_count TEXT + ,tlc_code TEXT + ,billing_type TEXT + ,dnm TEXT + ,note TEXT +); + +INSERT INTO gsheet_tracked_table + (table_name,tab_name,created) +VALUES + ('map_tlc_billing_type','Migrating Bills by Bill Type',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Migrating Bills by Bill Type'),'x_count') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Migrating Bills by Bill Type'),'tlc_code') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Migrating Bills by Bill Type'),'billing_type') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Migrating Bills by Bill Type'),'dnm') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Migrating Bills by Bill Type'),'note') +; + +-- ############################################ + +CREATE TABLE map_tlc_password ( + id SERIAL + ,x_count TEXT + ,note TEXT + ,migrate_available TEXT + ,fill_in_method TEXT + ,static_value TEXT +); + +INSERT INTO gsheet_tracked_table + (table_name,tab_name,created) +VALUES + ('map_tlc_password','Patrons w NULL Passwords',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons w NULL Passwords'),'x_count') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons w NULL Passwords'),'note') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons w NULL Passwords'),'migrate_available') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons w NULL Passwords'),'fill_in_method') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons w NULL Passwords'),'static_value') +; + +-- ############################################ + +CREATE TABLE map_tlc_block_status ( + id SERIAL + ,x_count TEXT + ,tlc_block_status TEXT + ,block TEXT + ,bar TEXT + ,dnm TEXT + ,note TEXT +); + +INSERT INTO gsheet_tracked_table + (table_name,tab_name,created) +VALUES + ('map_tlc_block_status','Patrons by Block Status',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Block Status'),'x_count') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Block Status'),'tlc_block_status') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Block Status'),'block') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Block Status'),'bar') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Block Status'),'dnm') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Block Status'),'note') +; + + +-- ############################################ + +CREATE TABLE map_tlc_patron_gender ( + id SERIAL + ,x_count TEXT + ,gender TEXT + ,stat_cat TEXT + ,stat_cat_entry TEXT + ,show TEXT + ,required TEXT + ,dnm TEXT + ,note TEXT +); + +INSERT INTO gsheet_tracked_table + (table_name,tab_name,created) +VALUES + ('map_tlc_patron_gender','Patrons by Gender',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Gender'),'x_count') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Gender'),'gender') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Gender'),'stat_cat') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Gender'),'stat_cat_entry') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Gender'),'show') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Gender'),'required') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Gender'),'dnm') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Gender'),'note') +; + + +-- ############################################ + + +CREATE TABLE map_tlc_holding_code ( + id SERIAL + ,x_count TEXT + ,holding_code TEXT + ,shelving_location TEXT + ,org_unit TEXT + ,circ_mod TEXT + ,alert TEXT + ,alert_message TEXT + ,dnm TEXT + ,note TEXT + ,reference TEXT + ,item_status TEXT + ,stat_cat_title TEXT + ,stat_cat_entry TEXT + ,x_migrate TEXT +); + +INSERT INTO gsheet_tracked_table + (table_name,tab_name,created) +VALUES + ('map_tlc_holding_code','Holdings Code',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'x_count') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'holding_code') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'shelving_location') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'org_unit') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'circ_mod') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'alert') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'alert_message') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'dnm') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'note') +; + + + + +-- ############################################ + + +CREATE TABLE map_tlc_stat_cat ( + id SERIAL + ,x_count TEXT + ,tlc_stat_cat TEXT + ,tlc_stat_cat_value TEXT + ,stat_cat TEXT + ,stat_cat_entry TEXT + ,show TEXT + ,required TEXT + ,dnm TEXT + ,note TEXT + ,note2 TEXT +); + +INSERT INTO gsheet_tracked_table + (table_name,tab_name,created) +VALUES + ('map_tlc_stat_cat','Patron Stat Cats',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'x_count') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'tlc_stat_cat') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'tlc_stat_cat_value') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'stat_cat') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'stat_cat_entry') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'show') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'required') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'dnm') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'note') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'note2') +; + + +-- ############################################ + +CREATE TABLE map_tlc_patron_note ( + id SERIAL + ,x_count TEXT + ,note_type TEXT + ,subset_values TEXT + ,matching_text TEXT + ,action TEXT + ,note TEXT +); + +INSERT INTO gsheet_tracked_table + (table_name,tab_name,created) +VALUES + ('map_tlc_patron_note','Patron Notes',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Notes'),'x_count') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Notes'),'note_type') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Notes'),'subset_values') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Notes'),'matching_text') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Notes'),'action') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Notes'),'note') +; + +-- ############################################ + +CREATE TABLE map_tlc_item_note ( + id SERIAL + ,x_count TEXT + ,note_type TEXT + ,subset_values TEXT + ,matching_text TEXT + ,action TEXT + ,note TEXT +); + +INSERT INTO gsheet_tracked_table + (table_name,tab_name,created) +VALUES + ('map_tlc_item_note','Item Notes',NOW()) +; + +INSERT INTO gsheet_tracked_column + (table_id,column_name) +VALUES + ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Item Notes'),'x_count') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Item Notes'),'note_type') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Item Notes'),'subset_values') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Item Notes'),'matching_text') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Item Notes'),'action') + ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Item Notes'),'note') +; + + diff --git a/emig.d/xml/evergreen_full_system.xml b/emig.d/xml/evergreen_full_system.xml new file mode 100644 index 0000000..e972f6a --- /dev/null +++ b/emig.d/xml/evergreen_full_system.xml @@ -0,0 +1,519 @@ + + + + + circ_count + circs + 0 + Open Circulations + Circulation Status.Count of Circs + SELECT 'Closed Circulations', COUNT(id) FROM action.circulation WHERE xact_finish IS NOT NULL UNION ALL SELECT 'Open Circulations', COUNT(id) FROM action.circulation WHERE xact_finish IS NULL + + + + circ_by_orgunit + circs + 0 + Circulations by Org Unit + Circulations Count.Org Unit + SELECT COUNT(acirc.id), aou.name FROM action.circulation acirc JOIN actor.org_unit aou ON aou.id = acirc.circ_lib WHERE acirc.xact_finish IS NULL GROUP BY 2 + + + + circs_by_duration + circs + 0 + Circulations by Duration, Fine and Max Fine Rules + Count of Circs.Duration.Fine.Max Fine + SELECT COUNT(id), duration_rule, recurring_fine_rule, max_fine_rule FROM action.circulation GROUP BY 2, 3, 4 ORDER BY 2, 3, 4 + + + + circs_by_usrgroup + circs + 0 + Circulations by Rules and Patron Group + Count of Circs.Duration.Fine.Max Fine.User Group + SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, pgt.name FROM action.circulation acirc JOIN actor.usr au ON au.id = acirc.usr JOIN permission.grp_tree pgt ON pgt.id = au.profile + GROUP BY 2, 3, 4, 5 ORDER BY 2, 3, 4, 5 + + + + circs_by_circmod + circs + 0 + Circulations by Rules and Circulation Modifier + Count of Circs.Duration.Fine.Max Fine.Circulation Modifier + SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, ac.circ_modifier FROM action.circulation acirc JOIN asset.copy ac ON ac.id = acirc.target_copy + GROUP BY 2, 3, 4, 5 ORDER BY 2, 3, 4, 5 + + + + circs_by_orgunit + circs + 0 + Circulations by Rules and Org Unit + Count of Circs.Duration.Fine.Max Fine.Library Branch + SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, aou.name FROM action.circulation acirc JOIN actor.org_unit aou ON aou.id = acirc.circ_lib GROUP BY 2, 3, 4, 5 ORDER BY 2, 3, 4, 5 + + + + non_cat_circs + circs + 0 + Non-Cataloged Circulation + Circulations Count + SELECT COUNT(id) FROM action.non_cataloged_circulation + + + + in_house + circs + 0 + In House Use + In House Use Records + SELECT COUNT(id) FROM action.in_house_use + + + + circs_missing_rules + circs + 0 + Circs Missing Rules + Count.Field Missing + SELECT COUNT(id), 'Duration Rule Value' FROM action.circulation WHERE duration IS NULL + UNION ALL SELECT COUNT(id), 'Recurring Fine Rule Value' FROM action.circulation WHERE recurring_fine IS NULL + UNION ALL SELECT COUNT(id), 'Max Fine Rule Value' FROM action.circulation WHERE max_fine IS NULL + UNION ALL SELECT COUNT(id), 'Duration Rule' FROM action.circulation WHERE duration_rule IS NULL + UNION ALL SELECT COUNT(id), 'Recurring Fine Rule' FROM action.circulation WHERE recurring_fine_rule IS NULL + UNION ALL SELECT COUNT(id), 'Max Fine Rule' FROM action.circulation WHERE max_fine_rule IS NULL + + + + + + + holds + holds + 0 + Holds + Hold Type.Hold Count + SELECT 'Closed Holds', COUNT(id) FROM action.hold_request WHERE (expire_time::TIMESTAMP < now()) OR cancel_time IS NOT NULL OR fulfillment_time IS NOT NULL UNION ALL SELECT 'Open Holds', COUNT(id) FROM action.hold_request WHERE (expire_time IS NULL OR expire_time::TIMESTAMP > now()) AND cancel_time IS NULL AND fulfillment_time IS NULL + + + + holds_bytype + holds + 0 + Holds By Type + Hold Type.Hold Count + SELECT hold_type as "Hold Type", COUNT(id) FROM action.hold_request GROUP BY 1 + + + + transit_open_by_item_status + holds + 0 + Transit Copy Records and Status of Linked Items + Count.Status + SELECT COUNT(atc.id), ccs.name FROM action.transit_copy atc JOIN asset.copy ac ON ac.id = atc.target_copy JOIN config.copy_status ccs ON ccs.id = ac.status WHERE atc.id IN (SELECT id FROM action.transit_copy) AND atc.dest_recv_time IS NULL GROUP BY 2 ORDER BY 2 + + + + transit_copies_by_status + holds + 0 + Status of Items with Count of Open In Transits + Count.Status.Count of Open Transits + SELECT COUNT(ac.id), ccs.name, SUM(CASE WHEN atc.id IS NULL THEN 0 ELSE 1 END) FROM asset.copy ac JOIN config.copy_status ccs ON ccs.id = ac.status LEFT JOIN (SELECT * FROM action.transit_copy WHERE id IN (SELECT id FROM action.transit_copy) AND dest_recv_time IS NULL) atc ON atc.target_copy = ac.id WHERE ac.id IN (SELECT id from asset.copy) GROUP BY 2 ORDER BY 2 + + + + hold_copies_by_status + holds + 0 + Captured Holds with Status of Items + Count of Captured Hold.Status of Item + SELECT COUNT(ahr.id), ccs.name FROM action.hold_request ahr JOIN asset.copy ac ON ac.id = ahr.current_copy JOIN config.copy_status ccs ON ccs.id = ac.status WHERE ahr.capture_time IS NOT NULL AND ahr.fulfillment_time IS NULL and ahr.cancel_time IS NULL AND ahr.id IN (SELECT id FROM action.hold_request) GROUP BY 2 ORDER By 2 + + + + + + asset.copy_count + Count of Copies by Library + assets + 0 + Copy Count.Library + SELECT COUNT(ac.id), aou.name FROM asset.copy ac JOIN actor.org_unit aou ON aou.id = ac.circ_lib GROUP BY 2 ORDER BY 2 + + + + asset.deleted_copies + Deleted Copies + assets + 0 + Copy Count.Deleted + SELECT COUNT(ac.id), ac.deleted::TEXT FROM asset.copy ac GROUP BY 2 + + + + asset.copies_by_status + Copies by Status + assets + 0 + Copy Count.Status + SELECT COUNT(ac.id), cs.name FROM asset.copy ac JOIN config.copy_status cs ON cs.id = ac.status GROUP BY 2 ORDER BY 2 + + + + asset.circ_mod_copies_count + Copies by Circulation Modifier + assets + 0 + Copy Count.Circulation Modifier + SELECT COUNT(ac.id), ac.circ_modifier FROM asset.copy ac GROUP BY 2 ORDER BY 2 + + + + asset.copy_notes + Copy Notes + assets + 0 + Note Count.Public + SELECT COUNT(acnote.id), acnote.pub::TEXT FROM asset.copy_note acnote GROUP BY 2 ORDER BY 2 + + + + asset.copy_notes + Copy Notes + assets + 0 + Note Count.Public + SELECT COUNT(acnote.id), acnote.pub::TEXT FROM asset.copy_note acnote GROUP BY 2 ORDER BY 2 + + + + asset.vols_by_lib + Volumes by Library + assets + 0 + Volume Count.Library + SELECT COUNT(acn.id), aou.name FROM asset.call_number acn JOIN actor.org_unit aou ON aou.id = acn.owning_lib GROUP BY 2 ORDER BY 2 + + + + asset.vols_by_lib + Volumes by Library + assets + 0 + Volume Count.Library + SELECT COUNT(acn.id), aou.name FROM asset.call_number acn JOIN actor.org_unit aou ON aou.id = acn.owning_lib GROUP BY 2 ORDER BY 2 + + + + asset.cops_by_loc_and_org + Copies by Location + assets + 0 + Copy Count.Library.Circ Library + SELECT COUNT(ac.id), acl.name, aou.name FROM asset.copy ac JOIN asset.copy_location acl ON acl.id = ac.location JOIN actor.org_unit aou ON aou.id = ac.circ_lib GROUP BY 2, 3 ORDER BY 2, 3 + + + + asset.barcode_lengths + Barcode Lengths by Library + assets + 0 + Count of Barcode.Barcode Length.Library + SELECT COUNT(ac.id), LENGTH(ac.barcode), aou.name FROM asset.copy ac JOIN actor.org_unit aou ON aou.id = ac.circ_lib GROUP BY 2, 3 ORDER BY 3, 2 + + + + asset.stat_cats + Copy Statistical Categories + assets + 0 + Stat Cat Count.Library.Statistical Category + SELECT COUNT(ac_sc.id), aou.name, ac_sc.name FROM asset.stat_cat ac_sc JOIN actor.org_unit aou ON aou.id = ac_sc.owner GROUP BY 2,3 ORDER BY 2,3 + + + + asset.stat_cats + Copy Statistical Categories + assets + 0 + Stat Cat Count.Library.Statistical Category + SELECT COUNT(ac_sc.id), aou.name, ac_sc.name FROM asset.stat_cat ac_sc JOIN actor.org_unit aou ON aou.id = ac_sc.owner GROUP BY 2,3 ORDER BY 2,3 + + + + asset.stat_cat_entries + Copy Stat Cat User Entries + assets + 0 + Copy Stat Count.Library.Statistical Category + SELECT COUNT(map.id), aou.name, ac_sc.name FROM asset.stat_cat_entry_copy_map map JOIN asset.stat_cat ac_sc ON ac_sc.id = map.stat_cat JOIN actor.org_unit aou ON aou.id = ac_sc.owner GROUP BY 2,3 ORDER BY 2,3 + + + + asset.stat_cat_entries + Copy Stat Cat User Entries + assets + 0 + Copy Stat Count.Library.Statistical Category + SELECT COUNT(map.id), aou.name, ac_sc.name FROM asset.stat_cat_entry_copy_map map JOIN asset.stat_cat ac_sc ON ac_sc.id = map.stat_cat JOIN actor.org_unit aou ON aou. +id = ac_sc.owner GROUP BY 2,3 ORDER BY 2,3 + + + + asset.copy_tags + Copy Tags + assets + 0 + Tag Count.Copy Tag Type.Copy Tag Label.Staff Note.Public + SELECT COUNT(map.id), tag.tag_type, tag.label, tag.staff_note, tag.pub FROM asset.copy_tag tag JOIN asset.copy_tag_copy_map map ON map.tag = tag.id GROUP BY 2,3,4,5 ORDER BY 2,3 + + + + + + money.billing_voided + Bills Voided And Not + money + 0 + Count.Voided.Sum + SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM money.billing a GROUP BY 2 ORDER BY 2 + + + + money.billing_voided + Bills Voided And Not + money + 0 + Count.Voided.Sum + SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM money.billing a GROUP BY 2 ORDER BY 2, 3 + + + + money.billing_by_type + Bills by Type + money + 0 + Count.Billing Type + SELECT COUNT(a.id), a.billing_type FROM money.billing a GROUP BY 2 ORDER BY 2 + + + + money.billing_by_type + Bills by Type + money + 0 + Count.Billing Type + SELECT COUNT(a.id), a.billing_type FROM money.billing a GROUP BY 2 ORDER BY 2 + + + + money.cash_payment + Cash Payments + money + 0 + Count.Voided.Sum + SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM money.cash_payment a GROUP BY 2 ORDER BY 2 + + + + money.cash_payment + Cash Payments + money + 0 + Count.Voided.Sum + SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM money.cash_payment a GROUP BY 2 ORDER BY 2 + + + + money.check_payment + Check Payments + money + 0 + Count.Voided.Sum + SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM money.check_payment a GROUP BY 2 ORDER BY 2 + + + + money.forgive_payment + Forgive Payments + money + 0 + Count.Voided.Sum + SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM money.forgive_payment a GROUP BY 2 ORDER BY 2 + + + + + bibswovolumes + Bibliographic Records Without Volumes + bibs + 0 + Count + SELECT COUNT(id) FROM biblio.record_entry where id NOT IN (SELECT DISTINCT record FROM asset.call_number) AND deleted IS FALSE + + + + bibs_notes + Bib Record Notes + bibs + 0 + Count + SELECT COUNT(b.id) FROM biblio.record_note b + + + + bibs_peers + Peer Bib Copies + bibs + 0 + Count + SELECT COUNT(b.id) FROM biblio.peer_bib_copy_map b + + + + bibs_parts + Monograph Parts + bibs + 0 + Count + SELECT COUNT(b.id) FROM biblio.monograph_part b + + + + + + usrsbyorg + Patrons by Home Org + actors + 0 + Count.Library.Deleted + SELECT COUNT(au.id), aou.name, au.deleted::TEXT FROM actor.usr au JOIN actor.org_unit aou ON aou.id = au.home_ou GROUP BY 2, 3 ORDER BY 2, 3 + + + + usrsbypgt + Patrons by Permission Group + actors + 0 + Count.Permission Group + SELECT COUNT(au.id), pgt.name FROM actor.usr au JOIN permission.grp_tree pgt ON pgt.id = au.profile GROUP BY 2 ORDER BY 2 + + + + active_usrs + Patrons by Active Status + actors + 0 + Count of Users.Active + SELECT COUNT(id), active::TEXT FROM actor.usr GROUP BY 2 + + + + active_usr_barcodes + Patron Barcodes by Active Status + actors + 0 + Count of Barcodes.Active + SELECT COUNT(id), active::TEXT FROM actor.card GROUP BY 2 + + + + usr_barcode_lengths + Barcode Lengths by Library + actors + 0 + Count of Barcode.Barcode Length.Library + SELECT COUNT(acard.id), LENGTH(acard.barcode), aou.name FROM actor.card acard JOIN actor.usr au ON au.id = acard.usr JOIN actor.org_unit aou ON aou.id = au.home_ou GROUP BY 2, 3 ORDER BY 3, 2 + + + + usr_barcode_patterns + Common Barcode Starting Patterns + actors + 0 + Count of Barcodes (greater than 10).Left 60% of Characters + SELECT COUNT(acard.id), LEFT(acard.barcode,(ROUND(LENGTH(acard.barcode)*.6))::INT) FROM actor.card acard GROUP BY 2 HAVING COUNT(acard.id) > 10 ORDER BY 2 + + + + usr_addressses_status + Patron Addresses by Valid Status + actors + 0 + Count.Valid + SELECT COUNT(aua.id), valid::TEXT FROM actor.usr_address aua GROUP BY 2 + + + + usr_addresses_pending + Patron Addresses by Pending Status + actors + 0 + Count of Addresses.Pending + SELECT COUNT(aua.id), pending::TEXT FROM actor.usr_address aua GROUP BY 2 + + + + usr_messages + Patron Messages + actors + 0 + Count.Deleted + SELECT COUNT(aum.id), deleted::TEXT FROM actor.usr_message aum GROUP BY 2 + + + + usr_notes + Patron Notes + actors + 0 + Count.Public + SELECT COUNT(aun.id), pub::TEXT FROM actor.usr_note aun GROUP BY 2 + + + + usr_stat_cats + Patron Statistical Categories + actors + 0 + Stat Cat Count.Library.Statistical Category + SELECT COUNT(au_sc.id), aou.name, au_sc.name FROM actor.stat_cat au_sc JOIN actor.org_unit aou ON aou.id = au_sc.owner GROUP BY 2, 3 ORDER BY 2, 3 + + + + usr_stat_cat_entries + Patron Stat Cat User Entries + actors + 0 + Patron Stat Count.Library.Statistical Category + SELECT COUNT(map.id), aou.name, au_sc.name FROM actor.stat_cat_entry_usr_map map JOIN actor.stat_cat au_sc ON au_sc.id = map.stat_cat JOIN actor.org_unit aou ON aou.id = au_sc.owner GROUP BY 2, 3 ORDER BY 2,3 + + + + + fund_count + 0 + acq + Funds + Number of Funds + SELECT COUNT(id) FROM acq.fund; + + + + invoice_count + 0 + acq + Invoices + Number of Funds + SELECT COUNT(id) FROM acq.invoice; + + + + diff --git a/emig.d/xml/evergreen_staged_report.xml b/emig.d/xml/evergreen_staged_report.xml new file mode 100644 index 0000000..050df4a --- /dev/null +++ b/emig.d/xml/evergreen_staged_report.xml @@ -0,0 +1,1182 @@ + + + find_cmm + DROP FUNCTION IF EXISTS find_cmm(BIGINT) + + CREATE OR REPLACE FUNCTION find_cmm(circ_id BIGINT) + RETURNS SETOF INTEGER[] + LANGUAGE plpgsql + AS $function$ + DECLARE + aou INTEGER; + ac INTEGER; + au INTEGER; + r INTEGER[]; + BEGIN + SELECT circ_lib FROM action.circulation WHERE id = circ_id INTO aou; + SELECT target_copy FROM action.circulation WHERE id = circ_id INTO ac; + SELECT usr FROM action.circulation WHERE id = circ_id INTO au; + + FOR r IN SELECT buildrows FROM action.find_circ_matrix_matchpoint(aou,ac,au,FALSE) + LOOP + RETURN NEXT r; + END LOOP; + RETURN; + END + $function$ + + + + create_subfield_u + DROP TABLE IF EXISTS subfield_u + CREATE UNLOGGED TABLE subfield_u AS SELECT UNNEST(oils_xpath( '//*[@tag="856"]/*[@code="u"]/text()', marc)) AS value FROM m_biblio_record_entry_legacy WHERE x_migrate +
+ + + + + + + + circ_count + circs + 0 + Migrated Circulations + Circulation Status.Count of Circs + SELECT 'Closed Circulations', COUNT(id) FROM m_action_circulation_legacy WHERE xact_finish IS NOT NULL AND x_migrate + UNION ALL SELECT 'Open Circulations', COUNT(id) FROM m_action_circulation_legacy WHERE xact_finish IS NULL AND x_migrate + + + + circ_count + circs + 1 + Open Circulations + Circulation Status.Count of Circs + SELECT 'Closed Circulations', COUNT(id) FROM m_action_circulation WHERE xact_finish IS NOT NULL UNION ALL SELECT 'Open Circulations', COUNT(id) FROM m_action_circulation WHERE xact_finish IS NULL + + + + circ_by_orgunit + circs + 0 + Circulations by Org Unit + Circulations Count.Org Unit + SELECT COUNT(acirc.id), aou.name FROM m_action_circulation_legacy acirc JOIN actor.org_unit aou ON aou.id = acirc.circ_lib WHERE acirc.xact_finish IS NULL AND x_migrate = TRUE GROUP BY 2 + + + + circ_by_orgunit + circs + 1 + Circulations by Org Unit + Circulations Count.Org Unit + SELECT COUNT(acirc.id), aou.name FROM m_action_circulation acirc JOIN actor.org_unit aou ON aou.id = acirc.circ_lib WHERE acirc.xact_finish IS NULL GROUP BY 2 + + + + circs_by_duration + circs + 0 + Migrated Circulations by Duration, Fine and Max Fine + Count of Circs.Duration.Fine.Max Fine + SELECT COUNT(id), duration_rule, recurring_fine_rule, max_fine_rule FROM m_action_circulation_legacy WHERE x_migrate = TRUE GROUP BY 2, 3, 4 + + + + circs_by_duration + circs + 1 + Circulations by Duration, Fine and Max Fine + Count of Circs.Duration.Fine.Max Fine + SELECT COUNT(id), duration_rule, recurring_fine_rule, max_fine_rule FROM m_action_circulation GROUP BY 2, 3, 4 ORDER BY 2, 3, 4 + + + + circs_by_usrgroup + circs + 0 + Circulations by Rules and Patron Group + Count of Circs.Duration.Fine.Max Fine.User Group.Matchpoints + SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, pgt.name, x.buildrows FROM m_action_circulation_legacy acirc JOIN actor.usr au ON au.id = acirc.usr JOIN permission.grp_tree pgt ON pgt.id = au.profile JOIN (SELECT acirc.id, ARRAY_TO_STRING(find_cmm(acirc.id),',') AS buildrows FROM m_action_circulation_legacy acirc WHERE acirc.x_migrate = TRUE) x ON x.id = acirc.id WHERE acirc.x_migrate = TRUE GROUP BY 2, 3, 4, 5, 6 ORDER BY 2, 3, 4, 5, 6 + + + + circs_by_usrgroup + circs + 1 + Circulations by Rules and Patron Group + Count of Circs.Duration.Fine.Max Fine.User Group.Matchpoints + SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, pgt.name, x.buildrows FROM m_action_circulation acirc JOIN actor.usr au ON au.id = acirc.usr JOIN permission.grp_tree pgt ON pgt.id = au.profile JOIN (SELECT acirc.id, ARRAY_TO_STRING(find_cmm(acirc.id),',') AS buildrows FROM m_action_circulation acirc) x ON x.id = acirc.id GROUP BY 2, 3, 4, 5, 6 ORDER BY 2, 3, 4, 5, 6 + + + + circs_by_circmod + circs + 0 + Circulations by Rules and Circulation Modifier + Count of Circs.Duration.Fine.Max Fine.Circulation Modifier.Matchpoints + SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, ac.circ_modifier, x.buildrows FROM m_action_circulation_legacy acirc JOIN asset.copy ac ON ac.id = acirc.target_copy JOIN (SELECT acirc.id, ARRAY_TO_STRING(find_cmm(acirc.id),',') AS buildrows FROM m_action_circulation_legacy acirc WHERE acirc.x_migrate = TRUE) x ON x.id = acirc.id WHERE acirc.x_migrate = TRUE GROUP BY 2, 3, 4, 5, 6 ORDER BY 2, 3, 4, 5, 6 + + + + circs_by_circmod + circs + 1 + Circulations by Rules and Circulation Modifier + Count of Circs.Duration.Fine.Max Fine.Circulation Modifier.Matchpoints + SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, ac.circ_modifier, x.buildrows FROM m_action_circulation acirc JOIN asset.copy ac ON ac.id = acirc.target_copy JOIN (SELECT acirc.id, ARRAY_TO_STRING(find_cmm(acirc.id),',') AS buildrows FROM m_action_circulation acirc) x ON x.id = acirc.id + GROUP BY 2, 3, 4, 5, 6 ORDER BY 2, 3, 4, 5, 6 + + + + circs_by_orgunit + circs + 0 + Circulations by Rules and Org Unit + Count of Circs.Duration.Fine.Max Fine.Library Branch + SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, aou.name FROM m_action_circulation_legacy acirc JOIN actor.org_unit aou ON aou.id = acirc.circ_lib WHERE acirc.x_migrate = TRUE GROUP BY 2, 3, 4, 5 ORDER BY 2, 3, 4, 5 + + + + circs_by_orgunit + circs + 1 + Circulations by Rules and Org Unit + Count of Circs.Duration.Fine.Max Fine.Library Branch + SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, aou.name FROM m_action_circulation acirc JOIN actor.org_unit aou ON aou.id = acirc.circ_lib GROUP BY 2, 3, 4, 5 ORDER BY 2, 3, 4, 5 + + + + non_cat_circs + circs + 0 + Non-Cataloged Circulation + Circulations Count.Migrated + SELECT COUNT(id), x_migrate::TEXT FROM m_action_non_cataloged_circulation_legacy GROUP BY 2 + + + + non_cat_circs + circs + 1 + Non-Cataloged Circulation + Circulations Count + SELECT COUNT(id) FROM m_action_non_cataloged_circulation + + + + in_house + circs + 0 + In House Use + In House Use Records.Migrated + SELECT COUNT(id), x_migrate::TEXT FROM m_action_in_house_use_legacy GROUP BY 2 + + + + in_house + circs + 1 + In House Use + In House Use Records + SELECT COUNT(id) FROM m_action_in_house_use + + + + circs_missing_rules + circs + 1 + Circs Missing Rules + Count.Field Missing + SELECT COUNT(id), 'Duration Rule Value' FROM m_action_circulation WHERE duration IS NULL + UNION ALL SELECT COUNT(id), 'Recurring Fine Rule Value' FROM m_action_circulation WHERE recurring_fine IS NULL + UNION ALL SELECT COUNT(id), 'Max Fine Rule Value' FROM m_action_circulation WHERE max_fine IS NULL + UNION ALL SELECT COUNT(id), 'Duration Rule' FROM m_action_circulation WHERE duration_rule IS NULL + UNION ALL SELECT COUNT(id), 'Recurring Fine Rule' FROM m_action_circulation WHERE recurring_fine_rule IS NULL + UNION ALL SELECT COUNT(id), 'Max Fine Rule' FROM m_action_circulation WHERE max_fine_rule IS NULL + + + + + circ_open_by_item_status + circs + 0 + Status of Currently Circulating Items + Count.Status + SELECT COUNT(acirc.id), ccs.name FROM action.circulation acirc JOIN asset.copy ac ON ac.id = acirc.target_copy JOIN config.copy_status ccs ON ccs.id = ac.status WHERE acirc.xact_finish IS NULL AND acirc.checkin_time IS NULL AND acirc.id IN (SELECT id FROM m_action_circulation) GROUP BY 2 ORDER BY 2 + + + + + + holds + holds + 0 + Migrated and Non-Migrated Holds + Hold Type.Hold Count.Migrated + SELECT 'Closed Holds', COUNT(id), x_migrate::TEXT FROM m_action_hold_request_legacy WHERE (expire_time::TIMESTAMP < now()) OR cancel_time IS NOT NULL OR fulfillment_time IS NOT NULL GROUP BY 3 UNION ALL SELECT 'Open Holds', COUNT(id), x_migrate::TEXT FROM m_action_hold_request_legacy WHERE (expire_time IS NULL OR expire_time::TIMESTAMP > now()) AND cancel_time IS NULL AND fulfillment_time IS NULL GROUP BY 3 + + + + holds_bytype + holds + 0 + Migrated Holds By Type + Hold Type.Hold Count + SELECT hold_type as "Hold Type", COUNT(id) FROM m_action_hold_request_legacy WHERE x_migrate = TRUE GROUP BY 1 + + + + transit_open_by_item_status + holds + 0 + Transit Copy Records and Status of Linked Items + Count.Status + SELECT COUNT(atc.id), ccs.name FROM action.transit_copy atc JOIN asset.copy ac ON ac.id = atc.target_copy JOIN config.copy_status ccs ON ccs.id = ac.status WHERE atc.id IN (SELECT id FROM m_action_transit_copy) AND atc.dest_recv_time IS NULL GROUP BY 2 ORDER BY 2 + + + + transit_copies_by_status + holds + 0 + Status of Items with Count of Open In Transits + Count.Status.Count of Open Transits + SELECT COUNT(ac.id), ccs.name, SUM(CASE WHEN atc.id IS NULL THEN 0 ELSE 1 END) FROM asset.copy ac JOIN config.copy_status ccs ON ccs.id = ac.status LEFT JOIN (SELECT * FROM action.transit_copy WHERE id IN (SELECT id FROM m_action_transit_copy) AND dest_recv_time IS NULL) atc ON atc.target_copy = ac.id WHERE ac.id IN (SELECT id from m_asset_copy) GROUP BY 2 ORDER BY 2 + + + + hold_copies_by_status + holds + 0 + Captured Holds with Status of Items + Count of Captured Hold.Status of Item + SELECT COUNT(ahr.id), ccs.name FROM action.hold_request ahr JOIN asset.copy ac ON ac.id = ahr.current_copy JOIN config.copy_status ccs ON ccs.id = ac.status WHERE ahr.capture_time IS NOT NULL AND ahr.fulfillment_time IS NULL and ahr.cancel_time IS NULL AND ahr.id IN (SELECT id FROM m_action_hold_request) GROUP BY 2 ORDER By 2 + + + + hold_depth + holds + 0 + Depth of Unfilled Holds + Count.Depth + SELECT COUNT(ahr.id), ahr.selection_depth FROM action.hold_request ahr WHERE ahr.id IN (SELECT id FROM m_action_hold_request) AND ahr.cancel_time IS NULL AND ahr.capture_time IS NULL AND ahr.fulfillment_time IS NULL GROUP BY 2 ORDER BY 2 + + + + + + m_asset_copy_count + Count of Copies by Library + assets + 0 + Copy Count.Library + SELECT COUNT(ac.id), aou.name FROM m_asset_copy_legacy ac JOIN actor.org_unit aou ON aou.id = ac.circ_lib WHERE ac.x_migrate = TRUE GROUP BY 2 ORDER BY 2 + + + + m_asset_copy_count_non_migrated + Non-Migrated Count of Copies by Library + assets + 0 + Copy Count.Library + SELECT COUNT(ac.id), aou.name FROM m_asset_copy_legacy ac JOIN actor.org_unit aou ON aou.id = ac.circ_lib WHERE ac.x_migrate = FALSE GROUP BY 2 ORDER BY 2 + + + + m_asset_copies_by_status + Copies by Status + assets + 0 + Copy Count.Status + SELECT COUNT(ac.id), cs.name FROM m_asset_copy_legacy ac JOIN config.copy_status cs ON cs.id = ac.status WHERE ac.x_migrate = TRUE GROUP BY 2 ORDER BY 2 + + + + m_asset_circ_mod_copies_count + Copies by Circulation Modifier + assets + 0 + Copy Count.Circulation Modifier + SELECT COUNT(ac.id), ac.circ_modifier FROM m_asset_copy_legacy ac WHERE ac.x_migrate = TRUE GROUP BY 2 ORDER BY 2 + + + + m_asset_copy_notes + Copy Notes + assets + 0 + Note Count.Public + SELECT COUNT(acnote.id), acnote.pub::TEXT FROM m_asset_copy_note_legacy acnote WHERE acnote.x_migrate = TRUE GROUP BY 2 ORDER BY 2 + + + + m_asset_copy_notes + Copy Notes + assets + 1 + Note Count.Public + SELECT COUNT(acnote.id), acnote.pub::TEXT FROM m_asset_copy_note acnote GROUP BY 2 ORDER BY 2 + + + + m_asset_vols_by_lib + Volumes by Library + assets + 0 + Volume Count.Library + SELECT COUNT(acn.id), aou.name FROM m_asset_call_number_legacy acn JOIN m_actor_org_unit_legacy aou ON aou.id = acn.owning_lib WHERE acn.x_migrate = TRUE GROUP BY 2 ORDER BY 2 + + + + m_asset_vols_by_lib + Volumes by Library + assets + 1 + Volume Count.Library + SELECT COUNT(acn.id), aou.name FROM m_asset_call_number acn JOIN actor.org_unit aou ON aou.id = acn.owning_lib GROUP BY 2 ORDER BY 2 + + + + m_asset_cops_by_loc_and_org + Copies by Location + assets + 0 + Copy Count.Location.Circ Library + SELECT COUNT(ac.id), acl.name, aou.name FROM m_asset_copy_legacy ac JOIN asset.copy_location acl ON acl.id = ac.location JOIN actor.org_unit aou ON aou.id = ac.circ_lib WHERE ac.x_migrate = TRUE GROUP BY 2, 3 ORDER BY 2, 3 + + + + m_asset_cops_w_loc_one + Copies with a Location of Stacks + assets + 0 + Barcode + SELECT barcode FROM m_asset_copy_legacy WHERE location = 1 AND x_migrate + + + + m_asset_no_barcode + Items Without Barcodes + assets + 0 + Assigned Barcode + SELECT barcode FROM m_asset_copy_legacy WHERE barcode ~* 'no_barocde' AND x_migrate + + + + m_asset_barcode_patterns + Common Barcode Starting Patterns + assets + 0 + Count of Barcodes (greater than 10).Left 60% of Characters + SELECT COUNT(ac.id), LEFT(ac.barcode,(ROUND(LENGTH(ac.barcode)*.6))::INT) FROM m_asset_copy_legacy ac WHERE ac.x_migrate = TRUE GROUP BY 2 HAVING COUNT(ac.id) > 10 ORDER BY 2 + + + + m_asset_barcode_collisions + Copy Barcode Collisions + assets + 0 + Collision Count + SELECT COUNT(id) FROM m_asset_copy_legacy WHERE x_migrate = TRUE AND barcode ~* '^x_' + Incumbent collisions are those where the migrated barcodes collide with existing barcodes in the database. + + + + m_asset_barcode_collisions + Copy Barcode Collisions + assets + 1 + Collision Count + SELECT COUNT(id) FROM m_asset_copy WHERE barcode ~* '^x_' + Incumbent collisions are those where the migrated barcodes collide with existing barcodes in the database. + + + + m_asset_barcode_collisions_shortlist + Copy Barcode Collisions (first 20) + assets + 0 + Collision List + SELECT ac.barcode FROM m_asset_copy_legacy ac WHERE ac.barcode ~* '^x_' AND ac.x_migrate = TRUE ORDER BY 1 LIMIT 20 + This is a shortlist of copy barcode collisions that maxes out at 20. If there are more collisions we will need to run a custom report. + + + + m_asset_barcode_collisions_shortlist + Copy Barcode Collisions (first 20) + assets + 1 + Collision List + SELECT ac.barcode FROM m_asset_copy ac WHERE ac.barcode ~* '^x_' ORDER BY 1 LIMIT 20 + This is a shortlist of patron barcode collisions that maxes out at 20. If there are more collisions we will need to run a custom report. + + + + m_asset_barcode_collision_patterns + Common Copy Barcode Collision Patterns + assets + 0 + Number of Barcodes Matching Pattern Greater than 10.Left 60% of Characters + SELECT COUNT(ac.id), LEFT(ac.barcode,(ROUND(LENGTH(ac.barcode)*.6))::INT) FROM m_asset_copy_legacy ac WHERE barcode ~* '^x_' AND ac.x_migrate = TRUE GROUP BY 2 HAVING COUNT(ac.id) > 10 ORDER BY 2 + + + + m_asset_barcode_collision_patterns + Common Copy Barcode Collision Patterns + assets + 1 + Number of Barcodes Matching Pattern Greater than 10.Left 60% of Characters + SELECT COUNT(ac.id), LEFT(ac.barcode,(ROUND(LENGTH(ac.barcode)*.6))::INT) FROM m_asset_copy ac WHERE barcode ~* '^x_' GROUP BY 2 HAVING COUNT(ac.id) > 10 ORDER BY 2 + + + + m_asset_stat_cats + Copy Statistical Categories + assets + 0 + Stat Cat Count.Library.Statistical Category + SELECT COUNT(ac_sc.id), aou.name, ac_sc.name FROM m_asset_stat_cat_legacy ac_sc JOIN actor.org_unit aou ON aou.id = ac_sc.owner GROUP BY 2,3 ORDER BY 2,3 + + + + m_asset_stat_cats + Copy Statistical Categories + assets + 1 + Stat Cat Count.Library.Statistical Category + SELECT COUNT(ac_sc.id), aou.name, ac_sc.name FROM m_asset_stat_cat ac_sc JOIN actor.org_unit aou ON aou.id = ac_sc.owner GROUP BY 2,3 ORDER BY 2,3 + + + + m_asset_stat_cat_entries + Copy Stat Cat User Entries + assets + 0 + Copy Stat Count.Library.Statistical Category + SELECT COUNT(map.id), aou.name, ac_sc.name FROM m_asset_stat_cat_entry_copy_map_legacy map JOIN m_asset_stat_cat_legacy ac_sc ON ac_sc.id = map.stat_cat JOIN actor.org_unit aou ON aou.id = ac_sc.owner GROUP BY 2,3 ORDER BY 2,3 + + + + m_asset_stat_cat_entries + Copy Stat Cat User Entries + assets + 1 + Copy Stat Count.Library.Statistical Category + SELECT COUNT(map.id), aou.name, ac_sc.name FROM m_asset_stat_cat_entry_copy_map map JOIN m_asset_stat_cat ac_sc ON ac_sc.id = map.stat_cat JOIN actor.org_unit aou ON aou. +id = ac_sc.owner GROUP BY 2,3 ORDER BY 2,3 + + + + m_asset_copy_tags + Copy Tags + assets + 0 + Tag Count.Copy Tag Type.Copy Tag Label.Staff Note.Public + SELECT COUNT(map.id), tag.tag_type, tag.label, tag.staff_note, tag.pub FROM m_asset_copy_tag tag JOIN m_asset_copy_tag_copy_map map ON map.tag = tag.id GROUP BY 2,3,4,5 ORDER BY 2,3 + + + + m_asset_copy_alerts + Copy Alerts + assets + 0 + Alert Count.Alert Type + SELECT COUNT(*), cat.name FROM m_asset_copy_alert aca JOIN config.copy_alert_type cat ON cat.id = aca.alert_type GROUP BY 2 + + + + + + m_money_billing_voided + Bills Voided And Not + money + 0 + Count.Voided.Sum.Migrated + SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount), a.x_migrate::TEXT FROM m_money_billing_legacy a GROUP BY 2, 4 ORDER BY 2, 4 + + + + m_money_billing_voided + Bills Voided And Not + money + 1 + Count.Voided.Sum + SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM m_money_billing a GROUP BY 2 ORDER BY 2, 3 + + + + m_money_billing_by_type + Bills by Type + money + 0 + Count.Billing Type.Migrated + SELECT COUNT(a.id), a.billing_type, a.x_migrate::TEXT FROM m_money_billing_legacy a GROUP BY 2, 3 ORDER BY 2, 3 + + + + m_money_billing_by_type + Bills by Type + money + 1 + Count.Billing Type + SELECT COUNT(a.id), a.billing_type FROM m_money_billing a GROUP BY 2 ORDER BY 2 + + + + m_money_cash_payment + Cash Payments + money + 0 + Count.Voided.Sum.Migrated + SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount), a.x_migrate::TEXT FROM m_money_cash_payment_legacy a GROUP BY 2, 4 ORDER BY 2, 4 + + + + m_money_cash_payment + Cash Payments + money + 1 + Count.Voided.Sum + SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM m_money_cash_payment a GROUP BY 2 ORDER BY 2 + + + + m_money_check_payment + Check Payments + money + 0 + Count.Voided.Sum.Migrated + SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount), a.x_migrate::TEXT FROM m_money_check_payment_legacy a GROUP BY 2, 4 ORDER BY 2, 4 + + + + m_money_forgive_payment + Forgive Payments + money + 0 + Count.Voided.Sum.Migrated + SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount), a.x_migrate::TEXT FROM m_money_forgive_payment_legacy a GROUP BY 2, 4 ORDER BY 2, 4 + + + + m_money_forgive_payment + Forgive Payments + money + 1 + Count.Voided.Sum + SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM m_money_forgive_paymen a GROUP BY 2 ORDER BY 2 + + + + m_money_goods_payment + Goods Payments + money + 0 + Count.Voided.Sum.Migrated + SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount), a.x_migrate::TEXT FROM m_money_goods_payment_legacy a GROUP BY 2, 4 ORDER BY 2, 4 + + + + m_money_work_payment + Work Payments + money + 0 + Count.Voided.Sum.Migrated + SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount), a.x_migrate::TEXT FROM m_money_work_payment_legacy a GROUP BY 2, 4 ORDER BY 2, 4 + + + + m_money_credit_card_payment + Credit Card Payments + money + 0 + Count.Voided.Sum.Migrated + SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount), a.x_migrate::TEXT FROM m_money_credit_card_payment_legacy a GROUP BY 2, 4 ORDER BY 2, 4 + + + + m_money_credit_payment + Credit Payments + money + 0 + Count.Voided.Sum.Migrated + SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount), a.x_migrate::TEXT FROM m_money_credit_payment_legacy a GROUP BY 2, 4 ORDER BY 2, 4 + + + + + + + bibs_loaded + Loaded Bibliographic Records + bibs + 0 + Count + SELECT COUNT(bre.id) FROM m_biblio_record_entry_legacy bre WHERE id IN (SELECT id FROM biblio.record_entry) ORDER BY 1 + + + + bibs_loaded + Loaded Bibliographic Records + bibs + 1 + Count + SELECT COUNT(bre.id) FROM m_biblio_record_entry bre WHERE id IN (SELECT id FROM biblio.record_entry) ORDER BY 1 + + + + bibswovolumes + Bibliographic Records Without Volumes + bibs + 0 + Count + SELECT COUNT(id) FROM m_biblio_record_entry where id NOT IN (SELECT DISTINCT record FROM m_asset_call_number) + These records would not have been loaded but many may be deduplicated against incumbent records. + + + + bibswovolumesanduri + Bibliographic Records Without Volumes And Recognized URI + bibs + 0 + Service.Domain.Count + SELECT '3M','ebook.3m.com', COUNT(*) FROM subfield_u WHERE value ~* 'ebook.3m.com' GROUP BY 1, 2 UNION ALL + SELECT 'Axis 360','axis360.baker-taylor.com', COUNT(*) FROM subfield_u WHERE value ~* 'axis360.baker-taylor.com' GROUP BY 1, 2 UNION ALL + SELECT 'Book Flix','bookflix.digital.scholastic.com', COUNT(*) FROM subfield_u WHERE value ~* 'bookflix.digital.scholastic.com' GROUP BY 1, 2 UNION ALL + SELECT 'Book Flix','bkflix.grolier.com', COUNT(*) FROM subfield_u WHERE value ~* 'bkflix.grolier.com' GROUP BY 1, 2 UNION ALL + SELECT 'Comics Plus','library.comicsplusapp.com', COUNT(*) FROM subfield_u WHERE value ~* 'library.comicsplusapp.com' GROUP BY 1, 2 UNION ALL + SELECT 'Ebrary','site.ebrary.com', COUNT(*) FROM subfield_u WHERE value ~* 'site.ebrary.com' GROUP BY 1, 2 UNION ALL + SELECT 'Freading','freading.com', COUNT(*) FROM subfield_u WHERE value ~* 'freading.com' GROUP BY 1, 2 UNION ALL + SELECT 'Hoopla','hoopladigital.com', COUNT(*) FROM subfield_u WHERE value ~* 'hoopladigital.com' GROUP BY 1, 2 UNION ALL + SELECT 'Infobase','avod.infobase.com', COUNT(*) FROM subfield_u WHERE value ~* 'avod.infobase.com' GROUP BY 1, 2 UNION ALL + SELECT 'Learning Express','learningexpresslibrary.com', COUNT(*) FROM subfield_u WHERE value ~* 'learningexpresslibrary.com' GROUP BY 1, 2 UNION ALL + SELECT 'Missouri Overdrive','molib2go.org', COUNT(*) FROM subfield_u WHERE value ~* 'molib2go.org' GROUP BY 1, 2 UNION ALL + SELECT 'netLibrary','netLibrary.com', COUNT(*) FROM subfield_u WHERE value ~* 'netLibrary.com' GROUP BY 1, 2 UNION ALL + SELECT 'OneClickDigital','oneclickdigital.com', COUNT(*) FROM subfield_u WHERE value ~* 'oneclickdigital.com' GROUP BY 1, 2 UNION ALL + SELECT 'Overdrive','overdrive.com', COUNT(*) FROM subfield_u WHERE value ~* 'overdrive.com' GROUP BY 1, 2 UNION ALL + SELECT 'ProQuest','ebookcentral.proquest.com', COUNT(*) FROM subfield_u WHERE value ~* 'ebookcentral.proquest.com' GROUP BY 1, 2 UNION ALL + SELECT 'RB Digital','rbdigital.com', COUNT(*) FROM subfield_u WHERE value ~* 'rbdigital.com' GROUP BY 1, 2 UNION ALL + SELECT 'U.S. Government Sites','.gov', COUNT(*) FROM subfield_u WHERE value ~* '\.gov' GROUP BY 1,2;; + + This list is built from known services and domains. If you have records for electronic resources that are not here please let us know. + + + + bibswuri + Bibliographic Records With 856$9s + bibs + 0 + Count + SELECT COUNT(id) FROM m_biblio_record_entry where id IN (SELECT record FROM asset.call_number WHERE label ~* '##URI##') + + + + bibsff + Bibliographic Records with Adjusted Fixed Fields + bibs + 0 + Count.Original Search Format.New Search Format + SELECT COUNT(*), ARRAY_TO_STRING(x_search_format,','), ARRAY_TO_STRING(x_after_search_format,',') FROM m_biblio_record_entry_legacy WHERE x_migrate AND x_after_search_format IS NOT NULL GROUP BY 2, 3 ORDER BY 3,2 + + + + bibs_notes + Bib Record Notes + bibs + 0 + Count.Migrated + SELECT COUNT(b.id), b.x_migrate::TEXT FROM m_biblio_record_note_legacy b GROUP BY 2 + + + + bibs_notes + Bib Record Notes + bibs + 1 + Count + SELECT COUNT(b.id) FROM m_biblio_record_note b + + + + bibs_peers + Peer Bib Copies + bibs + 0 + Count.Migrated + SELECT COUNT(b.id), b.x_migrate::TEXT FROM m_biblio_peer_bib_copy_map_legacy b GROUP BY 2 + + + + bibs_peers + Peer Bib Copies + bibs + 1 + Count + SELECT COUNT(b.id) FROM m_biblio_peer_bib_copy_map b + + + + bibs_parts + Monograph Parts + bibs + 0 + Count.Migrated + SELECT COUNT(b.id), b.x_migrate::TEXT FROM m_biblio_monograph_part_legacy b GROUP BY 2 + + + + bibs_parts + Monograph Parts + bibs + 1 + Count + SELECT COUNT(b.id) FROM m_biblio_monograph_part b + + + + bib_merges + Bibliographic Merge Count + bibs + 0 + Records Merged.Incumbent Records Merged Into + SELECT SUM(array_length(records,1)), COUNT(*) FROM groups + + + + + + usrsbyorg + Migrated Patrons by Home Org + actors + 0 + Count.Library + SELECT COUNT(au.id), aou.name FROM m_actor_usr_legacy au JOIN actor.org_unit aou ON aou.id = au.home_ou WHERE au.x_migrate = TRUE GROUP BY 2 ORDER BY 2 + + + + nonmigusrsbyorg + Non-Migrated Patrons by Home Org + actors + 0 + Count.Library + SELECT COUNT(au.id), aou.name FROM m_actor_usr_legacy au JOIN actor.org_unit aou ON aou.id = au.home_ou WHERE au.x_migrate = FALSE GROUP BY 2 ORDER BY 2 + + + + usrsbypgt + Migrated Patrons by Permission Group + actors + 0 + Count.Permission Group + SELECT COUNT(au.id), pgt.name FROM m_actor_usr_legacy au JOIN permission.grp_tree pgt ON pgt.id = au.profile WHERE au.x_migrate = TRUE GROUP BY 2 ORDER BY 2 + + + + active_usrs + Patron by Active Status + actors + 0 + Count of Users.Active + SELECT COUNT(id), active::TEXT FROM m_actor_usr_legacy WHERE x_migrate = TRUE GROUP BY 2 + + + + active_usrs + Patron Cards + actors + 1 + Count of Users + SELECT COUNT(id) FROM m_actor_usr + + + + active_usr_barcodes + Patron Barcodes by Active Status + actors + 0 + Count of Barcodes.Active.Migrated + SELECT COUNT(id), active::TEXT, x_migrate::TEXT FROM m_actor_card_legacy GROUP BY 2, 3 + + + + active_usr_barcodes + Patron Barcodes by Active Status + actors + 1 + Count of Barcodes.Active + SELECT COUNT(id), active::TEXT FROM m_actor_card GROUP BY 2 + + + + usr_barcode_patterns + Common Barcode Starting Patterns + actors + 0 + Count of Barcodes (greater than 10).Left 60% of Characters + SELECT COUNT(acard.id), LEFT(acard.barcode,(ROUND(LENGTH(acard.barcode)*.6))::INT) FROM m_actor_card_legacy acard WHERE acard.x_migrate = TRUE GROUP BY 2 HAVING COUNT(acard.id) > 10 ORDER BY 2 + + + + usr_barcode_patterns + Common Barcode Starting Patterns + actors + 1 + Count of Barcodes (greater than 10).Left 60% of Characters + SELECT COUNT(acard.id), LEFT(acard.barcode,(ROUND(LENGTH(acard.barcode)*.6))::INT) FROM m_actor_card acard GROUP BY 2 HAVING COUNT(acard.id) > 10 ORDER BY 2 + + + + usr_barcode_collisions + Patron Barcode Collisions + actors + 0 + Collision Count + SELECT COUNT(acard.id) FROM m_actor_card_legacy acard WHERE barcode ~* '^x_' AND x_migrate = TRUE + + + + usr_barcode_collisions + Patron Barcode Collisions + actors + 1 + Collision Count + SELECT COUNT(acard.id) FROM m_actor_card acard WHERE barcode ~* '^x_' + + + + usr_barcode_collision_shortlist + Patron Barcode Collisions (first 20) + actors + 0 + Collision List + SELECT acard.barcode FROM m_actor_card_legacy acard WHERE acard.barcode ~* '^x_' AND acard.x_migrate = TRUE ORDER BY 1 LIMIT 20 + This is a shortlist of patron barcode collisions that maxes out at 20. If there are more collisions we will need to run a custom report. In some cases we may flag individual accounts to not migrate. + + + + usr_barcode_collision_shortlist + Patron Barcode Collisions (first 20) + actors + 1 + Collision List + SELECT acard.barcode FROM m_actor_card acard WHERE acard.barcode ~* '^x_%' ORDER BY 1 LIMIT 20 + This is a shortlist of patron barcode collisions that maxes out at 20. If there are more collisions we will need to run a custom report. In some cases we may flag individual accounts to not migrate. + + + + usr_barcode_collision_patterns + Common Patron Barcode Collision Patterns a.x_migrate + actors + 0 + Number of Barcodes Matching Pattern Greater than 10.Left 60% of Characters + SELECT COUNT(acard.id), LEFT(acard.barcode,(ROUND(LENGTH(acard.barcode)*.6))::INT) FROM m_actor_card_legacy acard WHERE (acard.barcode ~* 'collision' OR acard.barcode ~* '^x_') AND acard.x_migrate = TRUE GROUP BY 2 HAVING COUNT(acard.id) > 10 ORDER BY 2 + + + + usr_barcode_collision_patterns + Common Patron Barcode Collision Patterns a.x_migrate + actors + 1 + Number of Barcodes Matching Pattern Greater than 10.Left 60% of Characters + SELECT COUNT(acard.id), LEFT(acard.barcode,(ROUND(LENGTH(acard.barcode)*.6))::INT) FROM m_actor_card acard WHERE (acard.barcode ~* 'collision' OR acard.barcode ~* '^x_') GROUP BY 2 HAVING COUNT(acard.id) > 10 ORDER BY 2 + + + + usr_addressses_status + Patron Addresses + actors + 0 + Count + SELECT COUNT(aua.id) FROM m_actor_usr_address_legacy aua WHERE aua.x_migrate = TRUE + + + + usr_addressses_status + Patron Addresses + actors + 1 + Count + SELECT COUNT(aua.id) FROM m_actor_usr_address aua + + + + usr_addresses_pending + Patron Addresses by Pending Status + actors + 0 + Count of Addresses.Pending + SELECT COUNT(aua.id), pending::TEXT FROM m_actor_usr_address_legacy aua WHERE aua.x_migrate = TRUE GROUP BY 2 + + + + usr_addresses_pending + Patron Addresses by Pending Status + actors + 1 + Count of Addresses.Pending + SELECT COUNT(aua.id), pending::TEXT FROM m_actor_usr_address aua GROUP BY 2 + + + + usr_messages + Patron Messages + actors + 0 + Count.Deleted.Migrated + SELECT COUNT(aum.id), deleted::TEXT, x_migrate::TEXT FROM m_actor_usr_message_legacy aum GROUP BY 2, 3 + + + + usr_messages + Patron Messages + actors + 1 + Count.Deleted + SELECT COUNT(aum.id), deleted::TEXT FROM m_actor_usr_message_legacy aum GROUP BY 2 + + + + usr_notes + Patron Notes + actors + 0 + Count.Public.Migrated + SELECT COUNT(aun.id), pub::TEXT, x_migrate::TEXT FROM m_actor_usr_note_legacy aun GROUP BY 2, 3 + + + + usr_notes + Patron Notes + actors + 1 + Count.Public + SELECT COUNT(aun.id), pub::TEXT FROM m_actor_usr_note aun GROUP BY 2 + + + + usr_stat_cats + Patron Statistical Categories + actors + 0 + Stat Cat Count.Library.Statistical Category.Migrated + SELECT COUNT(au_sc.id), aou.name, au_sc.name, au_sc.x_migrate::TEXT FROM m_actor_stat_cat_legacy au_sc JOIN actor.org_unit aou ON aou.id = au_sc.owner GROUP BY 2, 3, 4 ORDER BY 2, 3, 4 + + + + usr_stat_cats + Patron Statistical Categories + actors + 1 + Stat Cat Count.Library.Statistical Category + SELECT COUNT(au_sc.id), aou.name, au_sc.name FROM m_actor_stat_cat au_sc JOIN actor.org_unit aou ON aou.id = au_sc.owner GROUP BY 2, 3 ORDER BY 2, 3 + + + + usr_stat_cat_entries + Patron Stat Cat User Entries + actors + 0 + Patron Stat Count.Library.Statistical Category.Migrated + SELECT COUNT(map.id), aou.name, au_sc.name, map.x_migrate::TEXT FROM m_actor_stat_cat_entry_usr_map_legacy map JOIN m_actor_stat_cat_legacy au_sc ON au_sc.id = map.stat_cat JOIN actor.org_unit aou ON aou.id = au_sc.owner GROUP BY 2, 3, 4 ORDER BY 2,3, 4 + + + + usr_stat_cat_entries + Patron Stat Cat User Entries + actors + 1 + Patron Stat Count.Library.Statistical Category + SELECT COUNT(map.id), aou.name, au_sc.name FROM m_actor_stat_cat_entry_usr_map map JOIN m_actor_stat_cat au_sc ON au_sc.id = map.stat_cat JOIN actor.org_unit aou ON aou.id = au_sc.owner GROUP BY 2, 3 ORDER BY 2,3 + + + + usr_thresholds + Patron Thresholds + actors + 0 + Pateron Group.Org Unit.Penalty.Threshold + SELECT pgt.name, aou.shortname, sp.name, p.threshold FROM permission_grp_penalty_threshold p + JOIN actor.org_unit aou ON aou.id = p.org_unit JOIN permission.grp_tree pgt ON pgt.id = p.grp JOIN config.standing_penalty sp ON sp.id = p.penalty + ORDER BY 2, 1, 3 + + + + usr_settings + Patron Settings + actors + 0 + Count.Setting.Value + SELECT COUNT(*), name, 'User''s Phone' FROM m_actor_usr_setting WHERE name IN ('opac.default_phone') GROUP BY 2, 3 + UNION ALL SELECT COUNT(*), name, value FROM m_actor_usr_setting WHERE name IN ('opac.hold_notify') GROUP BY 2, 3 + UNION ALL SELECT COUNT(*), a.name, aou.shortname FROM m_actor_usr_setting a JOIN actor.org_unit aou ON aou.id = REPLACE(a.value,'"','')::INTEGER + WHERE a.name IN ('opac.default_pickup_location','opac.default_search_location') GROUP BY 2, 3 ORDER BY 2, 3; + + + + + fund_count + 0 + acq + Migrated Funds + Number of Funds.Migrated + SELECT COUNT(id), x_migrate::TEXT FROM m_acq_fund_legacy GROUP BY 2; + + + + fund_count + 1 + acq + Migrated Funds + Number of Funds + SELECT COUNT(id) FROM m_acq_fund; + + + + invoice_count + 0 + acq + Migrated Invoices + Number of Invoices.Migrated + SELECT COUNT(id), x_migrate::TEXT FROM m_acq_invoice_legacy GROUP BY 2; + + + + invoice_count + 1 + acq + Migrated Invoices + Number of Funds + SELECT COUNT(id) FROM m_acq_invoice; + + + + + serials_mfhd_count + serials + 0 + Migrated Serial MFHDs + Number of MFHDs + SELECT COUNT(id) FROM m_serial_record_entry + + + + + + dedupe_explain + dedupe + dedupe_process.asciidoc + + + + dedupe_bib_groups + dedupe + 0 + Scoring and Bib Record Groups + Count.Bib Record Groups + SELECT COUNT(id), 'Total Bibs Being Evaluated' FROM biblio.record_entry WHERE deleted IS FALSE AND id IN (SELECT eg::BIGINT FROM bib_id_map) + UNION ALL SELECT (COUNT(DISTINCT incoming_bib)), 'Incoming Bibs With Matches Found' FROM bib_matches + UNION ALL SELECT (COUNT(bre.id) - (SELECT COUNT(DISTINCT incoming_bib) FROM bib_matches)), 'Incoming Bibs With No Match' + FROM biblio.record_entry bre WHERE bre.deleted IS FALSE AND bre.id IN (SELECT eg::BIGINT FROM bib_id_map) + UNION ALL SELECT COUNT(DISTINCT incoming_bib), 'Incoming Bibs Being Merged into Incumbent' FROM bib_matches WHERE incumbent_bib_score >= incoming_bib_score + UNION ALL SELECT COUNT(id), 'Incumbent Bibs With Higher Scores to Incoming' FROM bib_matches WHERE incumbent_bib_score > incoming_bib_score + UNION ALL SELECT COUNT(id), 'Incumbent Bibs With Equal Scores to Incoming' FROM bib_matches WHERE incumbent_bib_score = incoming_bib_score + UNION ALL SELECT COUNT(id), 'Incumbent Bibs With Lower Scores to Incoming' FROM bib_matches WHERE incumbent_bib_score < incoming_bib_score + ; + + + + dedupe_format_count + dedupe + 0 + Count of Items Matching By Format + Count.Format(s) + SELECT COUNT(id), search_formats FROM bib_matches GROUP BY 2 ORDER BY 2; + + + + dedupe_score_ranges + dedupe + 0 + Count of Items Matching By Format + Lowest Record Score.Largest Record Score.Record Set + SELECT MIN(incumbent_bib_score), MAX(incumbent_bib_score), 'Incumbent Records' FROM bib_matches + UNION ALL SELECT MIN(incoming_bib_score), MAX(incoming_bib_score), 'Incoming Records' FROM bib_matches ; + + + + + + dedupe_sample_set + dedupe + 0 + Sample of 20 Matching Dedupe Record Sets + Bib Being Merged Into.Bib Being Merged + SELECT incumbent_bib, incoming_bib FROM bib_matches WHERE incumbent_bib_score >= incoming_bib_score LIMIT 20 ; + + + + + + + notices_overview + notices + 0 + Action Triggers Setup for Notices + ID.Active.Owner.Name + SELECT ed.id, ed.active::TEXT, aou.shortname, ed.name + FROM action_trigger.event_definition ed + JOIN actor.org_unit aou ON aou.id = ed.owner + WHERE ed.owner IN (SELECT DISTINCT home_ou FROM m_actor_usr) + OR ed.owner IN (SELECT DISTINCT parent_ou FROM actor.org_unit WHERE id in (SELECT DISTINCT home_ou FROM m_actor_usr)); + + + + + notices_count + notices + 0 + Count of Notices Run with State + Count of Notices.State.ID.Owner.Name + SELECT COUNT(ate.id), ate.state, ed.id, aou.shortname, ed.name + FROM action_trigger.event_definition ed + JOIN actor.org_unit aou ON aou.id = ed.owner + JOIN action_trigger.event ate ON ate.event_def = ed.id + WHERE ed.owner IN (SELECT DISTINCT home_ou FROM m_actor_usr) + OR ed.owner IN (SELECT DISTINCT parent_ou FROM actor.org_unit WHERE id in (SELECT DISTINCT home_ou FROM m_actor_usr)) + GROUP BY 2,3,4; + + + + +
+ diff --git a/emig.d/xml/mapping_reports.xml b/emig.d/xml/mapping_reports.xml new file mode 100644 index 0000000..53665d5 --- /dev/null +++ b/emig.d/xml/mapping_reports.xml @@ -0,0 +1,854 @@ + + + + + + + evg_m_asset_copy_statuses + Statuses + evergreen + 0 + Copy Count.Migrating Status.New Evergreen Status.Notes + SELECT COUNT(ac.l_id), cs.l_name FROM m_asset_copy_legacy ac JOIN config_copy_status_legacy cs ON cs.l_id = ac.l_status GROUP BY 2 ORDER BY 2 + You only need to fill this sheet out if you use custom statuses that need to be migrated. + + + + evg_m_asset_circ_mods + Circulation Modifiers + evergreen + 0 + Copy Count.Migrating Circ Mod.New Circ Mod.Notes + SELECT COUNT(ac.l_id), ac.l_circ_modifier FROM m_asset_copy_legacy ac GROUP BY 2 ORDER BY 2 + + + + evg_m_asset_copy_locs + Copy Locations + evergreen + 0 + Count.Library.Migrating Copy Location.New Copy Location.Notes + SELECT COUNT(ac.l_id), aou.l_name, acl.l_name FROM m_asset_copy_location_legacy acl JOIN m_actor_org_unit_legacy aou ON aou.l_id = acl.l_owning_lib JOIN m_asset_copy_legacy ac ON ac.l_location = acl.l_id GROUP BY 2, 3 ORDER BY 2, 3 + Any locations not mapped can be moved over as their existing locations. + + + + evg_permission_grps + Permission Groups + evergreen + 0 + Count.Migrating Permission Group.New Permission Group.Notes + SELECT COUNT(au.l_id), pgt.l_name FROM m_actor_usr_legacy au JOIN permission_grp_tree_legacy pgt ON pgt.l_id = au.l_profile GROUP BY 2 ORDER BY 2 + + + + + tlc_load_branches_list + tlc + 0 + Branches Present in Extract + Name.Evergreen Org Unit.Notes + SELECT l_name FROM ctbranches_tsv_clean ORDER BY 1 + + + + tlc_load_m_asset_notes + Item Notes + tlc + 0 + Count.Note Type.Notes + SELECT COUNT(l_itemcomment)::TEXT, 'Item Comments' FROM ititeminformation_tsv_clean WHERE l_itemcomment IS NOT NULL GROUP BY 2 + UNION ALL SELECT COUNT(l_physicalcondition)::TEXT, 'Condition Notes' FROM ctlocitem_tsv_clean WHERE l_physicalcondition IS NOT NULL GROUP BY 2 + UNION ALL SELECT COUNT(l_checkinoutnote)::TEXT, 'Circ Notes' FROM ctlocitem_tsv_clean WHERE l_checkinoutnote IS NOT NULL GROUP BY 2 + UNION ALL (SELECT DISTINCT 'Sample Item Comment', l_itemcomment FROM ititeminformation_tsv_clean WHERE l_itemcomment IS NOT NULL LIMIT 20) + UNION ALL (SELECT DISTINCT 'Sample Physical Condition', l_physicalcondition FROM ctlocitem_tsv_clean WHERE l_physicalcondition IS NOT NULL LIMIT 20) + UNION ALL (SELECT DISTINCT 'Sample Circ Note', l_checkinoutnote FROM ctlocitem_tsv_clean WHERE l_checkinoutnote IS NOT NULL LIMIT 20) + + + + + tlc_load_m_asset_holdings_codes + Holdings Codes + tlc + 0 + Count.Holdings Codes.Evergreen Circulation Modifier.Evergreen Shelving Locatione + SELECT COUNT(l_barcode), l_activeholdingscode FROM ctlocitem_tsv_clean GROUP BY 2 ORDER BY 2 + + + + + tlc_load_m_money_migrating_bills + Migrating Bills By Bill Type + tlc + 0 + Count.Billing Type.Evergreen Bill Type + SELECT COUNT(a.l_chargenumber), b.l_description FROM itpayment_tsv_clean a JOIN itpaymenttype_tsv_clean b ON b.l_paymenttype = a.l_paymenttype WHERE a.l_dueamount::INTEGER - (a.l_waiveamount::INTEGER + a.l_tenderamount::INTEGER) > 0 GROUP BY 2 ORDER BY 2 + + + + tlc_load_usrs_bygroup + Patrons by Agency Type + tlc + 0 + Count.Permission Group.Evergreen Permission Group + SELECT COUNT(l_agencynumber), l_agencytype FROM itagency_tsv_clean GROUP BY 2 ORDER BY 2 + + + + tlc_load_usrs_byexpiration + Patrons by Expiration Date + tlc + 0 + Count.Year of Expiration.Do Not Migrate? + SELECT COUNT(l_agencynumber), LEFT(l_expirationdate,4) FROM itagency_tsv_clean GROUP BY 2 ORDER BY 2 + + + + tlc_load_usrs_byactive + Patrons by Last Active Date + tlc + 0 + Count.Year Last Active.Migrate as Active Flag? + SELECT COUNT(l_agencynumber), LEFT(l_lastactivedate,4) FROM itagency_tsv_clean GROUP BY 2 ORDER BY 2 + We can set the active flag based on this if desired. + + + + tlc_load_usrs_blocks + Patrons by Block Status + tlc + 0 + Count.Block Status.Migration Note + SELECT COUNT(l_agencynumber), l_blockstatus FROM itagency_tsv_clean GROUP BY 2 ORDER BY 2 + + + + tlc_load_usrs_gender + Patrons by Gender + tlc + 0 + Count.Gender.Migrate as Stat Cat + SELECT COUNT(l_agencynumber), l_gender FROM itagency_tsv_clean GROUP BY 2 ORDER BY 2 + Evergreen can load these as statistical categories + + + + tlc_load_active_usr_passwords + Count of Patrons w Passwords + tlc + 0 + Count of NULL PINs.Default Password + SELECT COUNT(l_agencynumber) FROM itagency_tsv_clean WHERE l_pin IS NULL + If any PINS are blank we will have to choose a method to supply them as Evergreen requires passwords. + + + + tlc_load_usr_notes + Count of Patron Notes + tlc + 0 + Count.Note Type.Action + SELECT COUNT(l_agencynumber), 'Patron Note' FROM itagency_tsv_clean WHERE l_agencycomment IS NOT NULL UNION ALL SELECT COUNT(l_agencynumber), 'Address Comments' FROM itaddress_tsv_clean WHERE l_addresscomment IS NOT NULL + + + + tlc_load_usr_balance + Count and Sum of Balances + tlc + 0 + Count.SUM in Pennies.Migrate? + SELECT COUNT(l_agencynumber), SUM(l_accountbalance::INTEGER) FROM itagency_tsv_clean WHERE l_accountbalance != '0' + If this is being migrated there are a few options of how to do it and each will have different workflows. + + + + tlc_load_usr_stat_cats + Patron Stat Cats and Counts + tlc + 0 + Patron Count.Stat Cat.Migrate? + SELECT COUNT(b.l_agencynumber), a.l_description FROM itagencyfields_tsv_clean a JOIN itagencydata_tsv_clean b ON b.l_agencyfieldnumber = a.l_agencyfieldnumber GROUP BY 2 ORDER BY 1 + + + + + + destiny_load_usr_by_gradelevel + Patrons by Destiny Grade Level + destiny + 0 + Count.Graduation Year + SELECT COUNT(*), grade_level FROM patrons_csv GROUP BY 2 ORDER BY 2 + Transfer to note or stat cat? + + + + destiny_load_usr_by_gender + Patrons by Destiny Gender + destiny + 0 + Count.Gender + SELECT COUNT(*), gender FROM patrons_csv GROUP BY 2 ORDER BY 2 + Transfer to stat cat? + + + + destiny_load_usr_by_patrontype + Patrons by Destiny Patron Type + destiny + 0 + Count.Patron Type.Permission Group + SELECT COUNT(*), patron_type FROM patrons_csv GROUP BY 2 ORDER BY 2 + + + + destiny_load_usr_by_status + Patrons by Destiny Status + destiny + 0 + Count.Status + SELECT COUNT(*), status FROM patrons_csv GROUP BY 2 ORDER BY 2 + + + + destiny_load_usr_by_municipality + Patrons by Municipality + destiny + 0 + Count.Municipality + SELECT COUNT(*), municipality FROM patrons_csv GROUP BY 2 ORDER BY 2 + State cat? + + + + destiny_load_usr_notes + Types of and Counts of Notes + destiny + 0 + Type of Note.Count + SELECT 'General Note', COUNT(*) FROM patrons_csv WHERE note_general IS NOT NULL and note_general != '' UNION ALL SELECT 'Important Note', COUNT(*) FROM patrons_csv WHERE note_important IS NOT NULL and note_important != '' + Messsage / alert / note? + + + + destiny_load_usr_userdefined5 + User Defined Field 5 + destiny + 0 + Count.Values + SELECT COUNT(*), user_defined_5 FROM patrons_csv GROUP BY 2 ORDER BY 2 + Retain somewhere? + + + + destiny_load_usrs_pswdconfigured + Patrons by Password Configured + destiny + 0 + Count.Password Configured + SELECT COUNT(*), password_configured FROM patrons_csv GROUP BY 2 ORDER BY 2 + Need how they want passwords set since we don't have them to migrate and do they want these that are configured with some special note? + + + + destiny_load_usrs_phonefields + Phone Fields + destiny + 0 + Phone Field.Count.Evergreen Phone Field + SELECT 'Primary Phone 1', COUNT(*) FROM patrons_csv WHERE primary_phone_1 IS NOT NULL AND primary_phone_1 != '' UNION ALL SELECT 'Primary Phone 2', COUNT(*) FROM patrons_csv WHERE primary_phone_2 IS NOT NULL AND primary_phone_2 != '' UNION ALL SELECT 'Secondary Phone 1', COUNT(*) FROM patrons_csv WHERE secondary_phone_1 IS NOT NULL AND secondary_phone_1 != '' UNION ALL SELECT 'Secondary Phone 2', COUNT(*) FROM patrons_csv WHERE secondary_phone_2 IS NOT NULL AND secondary_phone_2 != '' + + + + destiny_load_m_asset_categories + Count of Categories + destiny + 0 + Count.Category.Circ Mod? + SELECT COUNT(*), category FROM copies_csv GROUP BY 2 ORDER BY 2 + + + + destiny_load_m_asset_notes + Copies by Note Types + destiny + 0 + Note Type.Count + SELECT 'General Note', COUNT(*) FROM copies_csv WHERE note_general IS NOT NULL and note_general != '' UNION ALL SELECT 'Important Note', COUNT(*) FROM copies_csv WHERE note_important IS NOT NULL and note_important != '' + Retain? + + + + destiny_load_m_asset_sublocation + Copies by Sub Location + destiny + 0 + Count.Sub Location.Shelving Location? + SELECT COUNT(*), sublocation FROM copies_csv GROUP BY 2 ORDER BY 2 + + + + destiny_load_m_asset_vendor + Copies by Vendor + destiny + 0 + Count.Vendor + SELECT COUNT(*), vendor FROM copies_csv GROUP BY 2 ORDER BY 2 + Retain? + + + + destiny_load_m_asset_descriptions + Copies with Description Fields + destiny + 0 + Description Field.Count + SELECT 'Description Field 1', COUNT(*) FROM copies_csv WHERE description_1 IS NOT NULL and description_1 != '' UNION ALL SELECT 'Description Field 2', COUNT(*) FROM copies_csv WHERE description_2 IS NOT NULL and description_2 != '' UNION ALL SELECT 'Description Field 3', COUNT(*) FROM copies_csv WHERE description_3 IS NOT NULL and description_3 != '' + Need report? Retain? + + + + destiny_load_fines_byreason + destiny + 0 + Fines by Reason + Count.Reason + SELECT COUNT(*), reason FROM fines_csv GROUP BY 2 ORDER BY 2 + + + + + + + circ_bystatus + Circulations by Status + apollo + 0 + Count.Status.Type + SELECT COUNT(id), l_status, l_type FROM m_action_circulation_legacy GROUP BY 2, 3 + Circulations will only not be migrated if they can't be attached to a migrated patron and holding. + + + + hold_bystatus + Holds by Status + apollo + 0 + Count.Status + SELECT COUNT(id), l_status FROM m_action_hold_request_legacy GROUP BY 2 + Only unfilled holds are being migrated. + + + + m_asset_pending_bibs + Pending Records + apollo + 0 + Count.Year of Last Edit.Count of Copies Attached + SELECT COUNT(bre.id), CASE WHEN LENGTH(bre.l_edited) > 1 THEN EXTRACT(YEAR FROM bre.l_edited::TIMESTAMP) ELSE '1900' END, COUNT(ac.id) FROM m_biblio_record_entry_legacy bre LEFT JOIN m_asset_copy_legacy ac ON ac.l_biblio = bre.l_id WHERE bre.l_status = 'pending' GROUP BY 2 ORDER BY 2 + + + + + m_asset_copies_by_status + Copies by Status + apollo + 0 + Count.Status + SELECT COUNT(id), l_status FROM m_asset_copy_legacy GROUP BY 2 ORDER BY 2 + Deleted copies with a delete date were not exported additionally those with the deleted status won't get loaded. + + + + m_asset_pending_copies + Pending Copies by Last Edit + apollo + 0 + Count.Last Edited + SELECT COUNT(id), CASE WHEN LENGTH(l_edited) > 1 THEN EXTRACT(YEAR FROM l_edited::TIMESTAMP) ELSE '1900' END FROM m_asset_copy_legacy WHERE l_status = 'pending' GROUP BY 2 ORDER BY 2 + + + + assets_by_memberships + Copies by Memberships + apollo + 0 + Count.Membership Number.Membership Name + SELECT COUNT(ac.id), acl.l_membership_number, acl.l_membership_name FROM m_asset_copy_legacy ac JOIN m_asset_copy_location_legacy acl ON acl.l_membership_name = ac.l_memberships GROUP BY 2,3 ORDER BY 2 + + + + + m_money_bills + Bills + apollo + 0 + Count.Status + SELECT COUNT(id), l_status FROM m_money_billing_legacy GROUP BY 2 ORDER BY 2 + Unless there is a good reason to do so forgiven and paid bills will not be migrated. + + + + m_actor_groups + Patron Membership Groups + apollo + 0 + Membership List Name.Membership List ID.Membership Number.Membership Name.Membership Length in Monthst + SELECT l_membership_list_name, l_membership_list_id, l_membership_number, l_membership_name, l_membership_length_months FROM m_actor_usr_legacy_groups_tsv ORDER BY 1, 3 + Age looks like a good target for a stat cat and / or juvenile setting while the patron type can map to profiles. + + + + m_actor_by_groups + Patrons by Membership Groups + apollo + 0 + Count.Membership List Name.Membership Number + SELECT COUNT(*), l_membership_list_name, l_membership_number FROM (SELECT id, UNNEST(STRING_TO_ARRAY(l_memberships,'|')) AS m FROM m_actor_usr_legacy ) x JOIN m_actor_usr_legacy_groups_tsv t ON t.l_membership_name = x.m GROUP BY 2, 3 ORDER BY 2, 3 + + + + m_actor_addresses_nulls + Patron Addresses + apollo + 0 + Address Field.Nulls + SELECT 'Street Address', COUNT(id) FROM m_actor_usr_address_legacy WHERE l_lines IS NULL UNION ALL SELECT 'City', COUNT(id) FROM m_actor_usr_address_legacy UNION ALL SELECT 'State', COUNT(id) FROM m_actor_usr_address_legacy WHERE l_country_division IS NULL UNION ALL SELECT 'Postal Code', COUNT(id) FROM m_actor_usr_address_legacy WHERE l_postal_code IS NULL + If any of these fields are null then we need defaults to fill in, note the extract had no city data. + + + + m_actor_phones + Patron Phones + apollo + 0 + Count.Type + SELECT COUNT(*), l_type FROM m_actor_usr_phones_tsv GROUP BY 2 ORDER BY 2 + These need to be mapped to Evergreen phone types. + + + + + + + hz_borrowersbybtypes + Borrowers by Borrower Types + horizon + 0 + Count.Borrower Type + SELECT COUNT(id), l_borrower_btype FROM m_actor_usr_legacy GROUP BY 2 ORDER BY 2; + + + + hz_borrowersbybtypes + Borrowers by Borrower Types + horizon2 + 0 + Count.Borrower Type.Description + SELECT COUNT(*), a.btype, b.descr FROM borrower_csv_clean a JOIN btype_csv_clean b ON b.btype = a.btype GROUP BY 2, 3 ORDER BY 2; + + + + hz_borrowerpincount + Borrower PINs Count + horizon + 0 + Count of Migratable Passwords / PINs + SELECT COUNT(l_borrower_pin) FROM m_actor_usr_legacy WHERE LENGTH(l_borrower_pin) > 1; + + + + hz_borrowerpincount + Borrower PINs Count + horizon2 + 0 + Count of Migratable Passwords / PINs + SELECT COUNT(pin) FROM borrower_csv_clean WHERE LENGTH(pin) > 1; + + + + hz_blocks + Borrower Blocks + horizon2 + 0 + Count of Entries.Block Description + SELECT COUNT(*), b.descr FROM burb_csv_clean a JOIN block_csv_clean b ON a.block = b.block GROUP BY 2; + + + + hz_borrowernotesample + Borrower Note Field Samples + horizon + 0 + Sample of Migratable Notes + SELECT l_borrower_borrower_note FROM m_actor_usr_legacy WHERE LENGTH(l_borrower_borrower_note) > 1 LIMIT 20; + + + + hz_borrowernotesample + Borrower Note Field Samples + horizon2 + 0 + Sample of Migratable Notes + SELECT borrower_note FROM borrower_csv_clean WHERE LENGTH(borrower_note) > 1 LIMIT 20; + + + + hz_borrowernotescount + Count of Migratable Borrower Notes + horizon + 0 + Count + SELECT COUNT(l_borrower_borrower_note) FROM m_actor_usr_legacy WHERE LENGTH(l_borrower_borrower_note) > 1; + + + + hz_borrowernotescount + Count of Migratable Borrower Notes + horizon2 + 0 + Count + SELECT COUNT(borrower_note) FROM borrower_csv_clean WHERE LENGTH(borrower_note) > 1; + + + + hz_borrowernotesample2 + Borrower Note Field 2 Samples + horizon + 0 + Count + SELECT l_borrower_note2 FROM m_actor_usr_legacy WHERE LENGTH(l_borrower_note2) > 1 LIMIT 20; + + + + hz_borrowernotesample3 + Borrower Note Field 3 Samples + horizon + 0 + Count + SELECT l_borrower_note3 FROM m_actor_usr_legacy WHERE LENGTH(l_borrower_note3) > 1 LIMIT 20; + + + + hz_phones + Borrower Phones + horizon + 0 + Count.Borrower Phone Type + + SELECT COUNT(*), b + FROM (SELECT l_borrower_phone_1_phone_type AS b FROM m_actor_usr_legacy + UNION ALL SELECT l_borrower_phone_2_phone_type AS b FROM m_actor_usr_legacy + UNION ALL SELECT l_borrower_phone_3_phone_type AS b FROM m_actor_usr_legacy + UNION ALL SELECT l_borrower_phone_4_phone_type AS b FROM m_actor_usr_legacy) x + GROUP BY 2 ORDER BY 2 + + + + + hz_phones + Borrower Phones + horizon2 + 0 + Count.Borrower Phone Position + + SELECT COUNT(*), '0' FROM borrower_phone_csv_clean WHERE ord = '0' + UNION ALL SELECT COUNT(*), '1' FROM borrower_phone_csv_clean WHERE ord = '1' + UNION ALL SELECT COUNT(*), '2' FROM borrower_phone_csv_clean WHERE ord = '2' + UNION ALL SELECT COUNT(*), '3' FROM borrower_phone_csv_clean WHERE ord = '3' + UNION ALL SELECT COUNT(*), '4' FROM borrower_phone_csv_clean WHERE ord = '4' + + + + + hz_bstats + Borrower B-Stats + horizon + 0 + Count.BStat + SELECT COUNT(*), b + FROM (SELECT l_borrower_bstat_1_bstat AS b FROM m_actor_usr_legacy + UNION ALL + SELECT l_borrower_bstat_2_bstat AS b FROM m_actor_usr_legacy + UNION ALL + SELECT l_borrower_bstat_3_bstat AS b FROM m_actor_usr_legacy) x + GROUP BY 2 ORDER BY 1; + + + + + hz_bstats + Borrower B-Stats + horizon2 + 0 + Count.B-Stat.Description + SELECT COUNT(*), a.bstat, b.descr FROM borrower_bstat_csv_clean a JOIN bstat_csv_clean b ON b.bstat = a.bstat GROUP BY 2, 3; + + + + + hz_copybycollection + Copies by Collection + horizon + 0 + Count.Collection + SELECT COUNT(id), l_collection FROM m_asset_copy_legacy GROUP BY 2 ORDER BY 2; + + + + hz_copybycollection + Copies by Collection + horizon2 + 0 + Count.Collection.Description.PAC Description + SELECT COUNT(*), a.collection, c.descr, c.pac_descr FROM item_csv_clean a JOIN collection_csv_clean c ON c.collection = a.collection GROUP BY 2, 3, 4 ORDER BY 2, 3, 4; + + + + hz_itemsbyitype + Items by IType + horizon + 0 + Count.Item Type (itype) + SELECT COUNT(id), l_itype FROM m_asset_copy_legacy GROUP BY 2 ORDER BY 2; + + + + hz_itemsbyitype + Items by IType + horizon2 + 0 + Count.Item Type (itype).Description + SELECT COUNT(*), a.itype, b.descr FROM item_csv_clean a JOIN itype_csv_clean b ON b.itype = a.itype GROUP BY 2, 3 ORDER BY 2; + + + + hz_internalnotescount + Internal/Check In Item Notes + horizon + 0 + Count + SELECT COUNT(l_internal_note) FROM m_asset_copy_legacy WHERE LENGTH(l_internal_note) > 1; + + + + hz_internalnotescount + Internal/Check In Item Notes + horizon2 + 0 + Count + SELECT COUNT(cki_notes) FROM item_csv_clean WHERE LENGTH(cki_notes) > 1; + + + + hz_internalnotesample + Internal/Check In Item Notes Sample + horizon + 0 + Count + SELECT l_internal_note FROM m_asset_copy_legacy WHERE LENGTH(l_internal_note) > 1 LIMIT 20; + + + + hz_internalnotesample + Internal/Check In Item Notes Sample + horizon2 + 0 + Count + SELECT cki_notes FROM item_csv_clean WHERE LENGTH(cki_notes) > 1 LIMIT 20; + + + + hz_burbbills + Count of Bills by Type + horizon2 + 0 + Count.Bill Type + SELECT COUNT(*), block FROM burb_csv_clean WHERE amount::INTEGER > 0 GROUP BY 2; + + + + + + rm_load_circ_count + rm + 0 + Circs by Status + Count of Circs.Status + SELECT COUNT(id), l_is_checked_out FROM m_asset_copy_legacy GROUP BY 2 + + + + rm_load_m_asset_by_resource_type + Resource Type + rm + 0 + Count.Resource Type + SELECT COUNT(*), l_resource_type FROM m_asset_copy_legacy GROUP BY 2 ORDER BY 2 + + + + rm_load_m_asset_by_location + Copies by Location + rm + 0 + Count.Location + SELECT COUNT(*), l_location FROM m_asset_copy_legacy GROUP BY 2 ORDER BY 2 + + + + rm_load_m_asset_by_category + Copies by Category + rm + 0 + Count.Category + SELECT COUNT(*), l_category FROM m_asset_copy_legacy GROUP BY 2 ORDER BY 2 + + + + rm_load_m_asset_by_status + Copies by Status + rm + 0 + Count.Status + SELECT COUNT(*), l_status FROM m_asset_copy_legacy GROUP BY 2 ORDER BY 2 + + + + rm_m_actor_groups + Patrons by User Groups + rm + 0 + Count.Group + SELECT COUNT(id), l_user_group FROM m_actor_usr_legacy GROUP BY 2 ORDER BY 2; + + + + + rm_m_actor_access + Patrons by Access Field + rm + 0 + Count.Access + SELECT COUNT(id), l_access_if_applicable FROM m_actor_usr_legacy GROUP BY 2 ORDER BY 2; + + + + + rm_m_actor_comments + Patron Comments + rm + 0 + Count.Sample + SELECT COUNT(id), 'All Comments' FROM m_actor_usr_legacy WHERE LENGTH(l_comments) > 1 + UNION ALL SELECT NULL, l_comments FROM m_actor_usr_legacy WHERE LENGTH(l_comments) > 1 LIMIT 10 + + + + + rm_m_actor_circulation_note + Patron Circ Notes + rm + 0 + Count.Sample + SELECT COUNT(id), 'All Notes' FROM m_actor_usr_legacy WHERE LENGTH(l_circulation_note) > 1 + UNION ALL SELECT NULL, l_circulation_note FROM m_actor_usr_legacy WHERE LENGTH(l_circulation_note) > 1 LIMIT 10 + + + + + diff --git a/mig b/mig deleted file mode 100755 index 3d53a6c..0000000 --- a/mig +++ /dev/null @@ -1,334 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig - git-like program for tracking and manipulating legacy data files for -migrations - -=head1 SYNOPSIS - -B [argument] [...] - -=head1 DESCRIPTION - -B is used to track and manipulate CSV or CSV-like text files exported from -legacy systems for migration into Evergreen. It can be a wrapper for some -other migration tools and tracks state using a PostgreSQL table in a given -migration schema. - -It makes use of certain environment variables that may be set by the B -tool: PGHOST, PGPORT, PGUSER, PGDATABASE, MIGSCHEMA, and MIGWORKDIR - -For most commands, if the current working directory falls outside of the -directory specified by MIGWORKDIR, then mig will assume that environment is -also incorrect and bail before doing any actual work. - -~/.pgpass should also be configured, as B will not prompt for a database -password. - -Only the B and B commands work without the MIGSCHEMA environment -variable being set. - -=head1 OVERVIEW - -Using B should go something like this: - -=over 15 - -=item mig env create m_foo # Sets up the environment - -=item mig env use m_foo # Spawns a shell using the configured environment - -=item mig init # creates the m_foo schema in the database if needed, and other tables - -=item mig add patrons.tsv # tracks an incoming data file; repeat for additional files - -=item mig iconv patrons.tsv # convert it to UTF8, creating patrons.tsv.utf8 - -=item mig clean patrons.tsv # cleans the file, creating patrons.tsv.utf8.clean - -=item mig link patrons.tsv actor_usr # makes the soon-to-be staging table a child of m_foo.actor_usr - -=item mig convert patrons.tsv # creates a .sql file for staging the data - -=item mig stage patrons.tsv # load said .sql file - -=item mig mapper patrons.tsv # interactive tool for analyzing/mapping the staging table - -=item mig analysis patrons.tsv # writes a summary .tsv file of mapped/flagged fields from the staging table - -=item mig map patrons.tsv # apply configured mappings - -=item mig write_prod patrons.tsv # creates a .sql file for pushing the staging data into production - -=item mig reporter --analyst "Foo Fooer" --report_title "Foo Load Analysis" #creates an asciidoc report - -=item mig gsheet --pull foo_tab_name OR --push foo_pg_table_name - -=item mig stagebibs --file foo.xml - -=back - -=head1 COMMANDS - -=over 15 - -=item B [command] - -Display this very same documentation, or specific documentation for one of the -commands listed here. - -=item B - -Invokes B with the same arguments. I can set important -environment variables and spawn a shell with those variables, and it also does -some directory creation and symlinking. - -=item B - -Create or re-create the PostgreSQL tracking table for the schema specified by -the MIGSCHEMA environment variable. If needed, create the migration schema -itself and run migration_tools.init() and build() if the migration_tools schema -exists. - -=item B [file] [...] - -Show status information for either the specified files or all tracked files if -no argument is given. - -=item B [--no-headers|--headers] [file|--no-headers|--headers] [...] - -Add the specified files to the migration tracker. Until --no-headers is -specified, the tracker will assume the files have headers. - -You can do crazy stuff like -B - -=item B [file] [...] - -Remove the specified files from the migration tracker. - -=item B [other arguments...] - -Attempts to invoke B on the specified tracked file, placing the output in -.utf8 - -If given no other arguments, the invocation will lool like - -=over 5 - -iconv -f ISO-8859-1 -t UTF-8 -o .utf8 - -=back - -otherwise, the arguments will be passed through like so - -=over 5 - -iconv [other arguments...] -o .utf8 - -=back - -=item B - -If this is used instead of B, then B will look for an existing -.utf8 and use it instead of attempting to create one. - -=item B [other arguments...] - -Attempts to invoke B on the iconv-converted specified tracked file, -placing the output in .utf8.clean - -If given no other arguments, the invocation will lool like - -=over 5 - -clean_csv --config scripts/clean.conf --fix --apply <--create-headers> - -=back - -otherwise, the arguments will be passed through like so - -=over 5 - -clean_csv [other arguments...] - -=back - -=item B - -If this is used instead of B, then B will look for an existing -.utf8.clean and use it instead of attempting to create one. - -=item B - -Associate the specified file with a parent table within the migration schema. - -Linking multiple files to the same parent table is not allowed currently. - -=item B - -Removes any association between the specified file and a parent table within -the migration schema. - -=item B - -Attempts to invoke B on the .utf8.clean version of the specified -tracked file, creating either [file].utf8.clean.stage.sql or -_stage.sql depending on whether the file has been linked to a -parent table within the migration schema or not. - -If given no other arguments, the invocation will lool like - -=over 5 - -csv2sql --config scripts/clean.conf --add-x-migrate --schema [--parent ] -o <[.utf8.clean.stage.sql]|[parent_table_stage.sql]> .utf8.clean - -=back - -otherwise, the arguments will be passed through like so - -=over 5 - -csv2sql [other arguments...] -o <[.utf8.clean.stage.sql]|[parent_table_stage.sql]> .utf8.clean - -=back - -=item B [other arguments...] - -Load the SQL-converted version of the specified file into the migration schema. - -Extra arguments are passed to the underlying call to psql - -=item B - -Interactive session for analyzing, flagging, and mapping legacy field data to -Evergreen fields. - -Upon exit, generate either [file].clean.map.sql or _map.sql. The -SQL generated will be UPDATE's for setting the Evergreen-specific columns for a -given file's staging tables, and TRUNCATE's and INSERT's for auxilary tables. -The files will have \include hooks for pulling in additional mapping files -(for example, end-user mappings for circ modifiers, etc.) - -=item B [file] - -Writes a MIGSCHEMA.tsv file containing a break-down of mapped and flagged -fields from the specified file, or all staged files if no file is specified. - -The main goal of the tsv file is to present end-user mappable data for circ -modifiers, shelving locations, patron profiles, etc. We use spreadsheets for -this now but may move to a dedicated UI in the future. - -=item B [file] - -Applies the mapping sql to the migration schema for the specified mapped file, -or for all mapped files if no file is specified. - -=item B [file] - -Generates _prod.sql for the specified linked and mapped file, or -all such files if no file is specified. - -=item B [arguments...] - -A wrapper around the psql command. At some point the plan is to shove mig-tracked variables into psql sessions. - -=item B --analyst "Analyst Name" --report_title "Report Title" - -Generates an asciidoc file in the git working directory that can be converted to -any appropriate format. The analyst and report parameters are required. - -Optional parameters are : - ---added_page_title and --added_page_file - -If one is used both must be. The added page file can be plain text or asciidoc. This -adds an extra arbitrary page of notes to the report. Mig assumes the page file is in the mig git directory. - ---tags - -This will define a set of tags to use, if not set it will default to Circs, -Holds, Actors, Bibs, Assets & Money. - ---debug - -Gives more information about what is happening. - ---reports_xml - -Allows you to override the default evergreen_staged_report.xml in the mig-xml folder. - -=item B --pull or --push spreadsheet_tab - -This uses the gsheet_tracked_table and gsheet_tracked column tables to map a Google Docs Spreadsheet tabs -with Postgres tables in the mig schema. The spreadsheet is assumed to share the name as the mig schema. -Tab names must be unique. Each spreadsheet column needs a header that matches the column name in the matching -table. An oauth session key is also needed for your Google account and mig gsheet will look for it in the -.mig directory. - -=back - -=cut - -############################################################################### - -use strict; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use FindBin; -my $mig_bin = "$FindBin::Bin/mig-bin/"; -use lib "$FindBin::Bin/mig-bin"; -use Mig; - -pod2usage(-verbose => 2) if ! $ARGV[0]; -switch($ARGV[0]) { - case "help" { - if (defined $ARGV[1]) { - my $cmd = $mig_bin . "mig-$ARGV[1]"; - if (-e $cmd) { - system( $mig_bin . "mig-$ARGV[1]", '--help' ); - } else { - pod2usage(-verbose => 2); - } - } else { - pod2usage(-verbose => 2); - } - } - case "map" { - } - case "load" { - } - case "wdir" { - print "$MIGWORKDIR\n"; - } - case "gdir" { - print "$MIGBASEGITDIR\n"; - } - case "sdir" { - print "$MIGGITDIR\n"; - } - else { - standard_invocation(@ARGV); - } -} - -sub standard_invocation { - my $cmd = shift; - - if ($cmd ne 'env') { Mig::die_if_no_env_migschema(); } - if (-e $mig_bin . "mig-$cmd") { - system( $mig_bin . "mig-$cmd", @_ ); - } else { - system( "mig-$cmd", @_ ) == 0 or die pod2usage(1); - } - -} - - diff --git a/mig-asc/dedupe_process.asciidoc b/mig-asc/dedupe_process.asciidoc deleted file mode 100644 index b88ff1b..0000000 --- a/mig-asc/dedupe_process.asciidoc +++ /dev/null @@ -1,42 +0,0 @@ -== Migration Deduplication Process - -The deduplication process covers what is considered a descriptive bibliographic record for items. Other kinds of MARC records such as authorities and MFHDs for serials are not included in the deduplication process. This process is updated periodically as we review the best ways to do this given that metadata in Evergreen changes as do cataloging practices. - -Two important terms to understand as part of the deduplication process are incumbent bibliographic records and incoming bibliographic records. Incumbent records are defined as all the bib records in the library's database except those that are being brought in as part of the migration. Incoming bib records are those that are being imported as part of the migration. The migration deduplication is not tailored to a specific catalog's needs nor meant to be comprehensive of all scenarios. It is meant to reduce duplication of bibs that have common identifiers while preventing merges of materials that should not share a common bib record. It is not an inclusive deduplication, which is to say that duplicate incumbent items are not matched against each other - only incoming materials are matched against incumbent ones. - -The process begins by creating one list of incoming bib records and one of incumbent bib records. Each list is then assigned a set of values. Most of these are from the first value found from a list of possible values. - -* Author - derived from the 100$a, or 110$a, or 111$a, then made all lowercase and some punctuation is removed -* Title - derived from the 245$p and $n combined, then made all lowercase and some punctuation is removed -* Publication Date derived from the 260$c or 264$c with an indicator 2 value of '1', this tracks only the first one found even in the case of a record with multiples and all non-digit values removed -* Identifiers - a list of all the $a & $z from the 020 and 024 with non-alphanumeric values removed -* Search Formats - These are values calculated by Evergreen from MARC attributes, primarily the 007 and 008s to identify kinds of materials. Common values include: blu-ray, book, braille, casaudiiobook, casmusic, cdmusic, dvd, eaudio, electronic, equip, evideo, kit, map, music, microform, phonomusic, picture, score, serial, software and vhs. A single bib record can have multiple search formats depending on what information is present in the bib record. Duplicate search formats are removed, so if information is present due to multiple 008s for more than one book then book is only recorded once. - -A full description of how the physical description fixed fields of the 007 and fixed length data elements of the 008 interact is beyond the scope of the deduplication description but you can find more information at the Library of Congress' web site at https://www.loc.gov/marc/bibliographic/bd007.html and https://www.loc.gov/marc/bibliographic/bd008.html. - -After the lists of incumbent and incoming bibs are created they are compared and a list of potential matches are created. To be considered a potential match the following criteria must be met: - -* One identifier must match between the lists. -* The author must match exactly. -* The title must match exactly. -* The search formats must match. So, for example, if a record has entries for book, large print book and dvd then it will only match to another record that has book, large print book and dvd. If a single format is different between the two records it will not match. -* The publication date must match. - -Once a list of potential matches are built, the scoring begins. Scoring is done for each incoming bib record that there is a potential match among the incumbents for and for each incumbent that is a potential match. - -Scoring is done by evaluating each MARC record and giving it a score between 0 and 2 billion. However, most records have a score between 10 and 100 with some very detailed records getting much higher scores. - -A record's score is built as follows: - -* 1 point for each identifier subfield from the 020, 022 and 024 MARC fields -* 2 points for each subject subfield from the 6xx MARC fields -* 1 point for each title subfield from the 210, 222 and 24x MARC fields as long as they are one of the following descriptive subfields: 'a','b','c','f','g','k','n','p','s', or '0' -* 1 point for each author subfield from the 100, 110, 111 and 130 MARC fields as long as they are one of the following descriptive subfields: 'a','b','c','d','e','f','g','j','k','l','n','p','q','t', or 'u' -* 1 point for each added entry from the 70x, 71x,72x, 73x, 74x, 75x, 80x, 81x, 83x - -Once the scores are created, the highest scoring incumbent bib record is selected as the one for the incoming bib matching it to be merged to, unless the incumbent's score is lower than the incoming bib's, in which case the incoming bib is retained and no merge is created. - -The list of merges is then run as a process. This process is very system intensive and involves merging all of the assets associated with one bib to the other, such as notes, copies, volumes, parts, transfering title level holds and so on. 856 tags from the incoming bib are transferred to the incumbent but no other MARC data is transferred. - -The process runs conservatively and varies but averages 1,000 bibs per hour. - diff --git a/mig-bin/Mig.pm b/mig-bin/Mig.pm deleted file mode 100644 index f1b97f2..0000000 --- a/mig-bin/Mig.pm +++ /dev/null @@ -1,268 +0,0 @@ -package Mig; - -use strict; -use Exporter; -use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS); - -$VERSION = 1.00; -@ISA = qw(Exporter); -@EXPORT = (); -@EXPORT_OK = qw(); -%EXPORT_TAGS = ( - DEFAULT => [] -); - -use DBI; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); - -sub db_connect { - my $dbh; - if ($PGHOST) { - $dbh = DBI->connect( - "dbi:Pg:host=$PGHOST;dbname=$PGDATABASE;port=$PGPORT" - ,$PGUSER - ,undef - ) || die "Unable to connect to $PGHOST:$PGPORT:$PGDATABASE:$PGUSER : $!\n"; - } else { - $dbh = DBI->connect("dbi:Pg:dbname=$PGDATABASE", "", "") || die "Unable to connect to $PGDATABASE : $!\n"; - } - $dbh->do("SET search_path TO $MIGSCHEMA, evergreen, pg_catalog, public"); - return $dbh; -} - -sub db_disconnect { - my $dbh = shift; - $dbh->disconnect; -} - -sub sql { - my $sql = shift; - chomp $sql; - $sql =~ s/\n//g; - print "\n$sql\n"; - return $sql; -} - -sub die_if_no_env_migschema { - die "MIGSCHEMA environment variable not set. See 'mig env help'\n" - unless $MIGSCHEMA; -} - -sub check_for_db_migschema { - my $dbh = db_connect(); - my $sth = $dbh->prepare(" - SELECT EXISTS( - SELECT 1 - FROM pg_namespace - WHERE nspname = ? - );" - ); - my $rv = $sth->execute($MIGSCHEMA) - || die "Error checking for migration schema ($MIGSCHEMA): $!"; - my @cols = $sth->fetchrow_array; - $sth->finish; - my $found; - if ($cols[0]) { - print "Found migration schema ($MIGSCHEMA) at $PGHOST:$PGPORT:$PGDATABASE:$PGUSER\n"; - $found = 1; - } else { - print "Migration schema ($MIGSCHEMA) does not exist at $PGHOST:$PGPORT:$PGDATABASE:$PGUSER\n"; - $found = 0; - } - db_disconnect($dbh); - return $found; -} - -sub check_db_migschema_for_migration_tables { - my $found = check_db_migschema_for_specific_table('asset_copy'); - if (!$found) { - print "Missing migration tables (such as $MIGSCHEMA.asset_copy)\n"; - } - return $found; -} - -sub check_db_migschema_for_specific_table { - my $table = shift; - my $dbh = db_connect(); - my $sth = $dbh->prepare(" - SELECT EXISTS( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = " . $dbh->quote( $MIGSCHEMA ) . " - AND table_name = " . $dbh->quote( $table ) . " - );" - ); - my $rv = $sth->execute() - || die "Error checking migration schema ($MIGSCHEMA) for table ($table): $!"; - my @cols = $sth->fetchrow_array; - $sth->finish; - my $found; - if ($cols[0]) { - $found = 1; - } else { - $found = 0; - } - db_disconnect($dbh); - return $found; -} - -sub check_for_migration_tools { - my $dbh = db_connect(); - my $sth = $dbh->prepare(" - SELECT EXISTS( - SELECT 1 - FROM pg_namespace - WHERE nspname = 'migration_tools' - );" - ); - my $rv = $sth->execute() - || die "Error checking for migration_tools schema: $!"; - my @cols = $sth->fetchrow_array; - $sth->finish; - db_disconnect($dbh); - return $cols[0]; -} - -sub die_if_no_migration_tools { - if (check_for_migration_tools()) { - print "Found migration_tools schema\n"; - } else { - die "Missing migration_tools schema\n"; - } -} - -sub check_for_mig_tracking_table { - my $dbh = db_connect(); - my $sth = $dbh->prepare(" - SELECT EXISTS( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = " . $dbh->quote( $MIGSCHEMA ) . " - AND table_name = 'tracked_file' - );" - ); - my $rv = $sth->execute() - || die "Error checking for table (tracked_file): $!"; - my @cols = $sth->fetchrow_array; - $sth->finish; - db_disconnect($dbh); - return $cols[0]; -} - -sub die_if_mig_tracking_table_exists { - if (check_for_mig_tracking_table()) { - die "Table $MIGSCHEMA.tracked_file already exists. Bailing init...\n"; - } -} - -sub die_if_mig_tracking_table_does_not_exist { - if (!check_for_mig_tracking_table()) { - die "Table $MIGSCHEMA.tracked_file does not exist. Bailing...\n"; - } -} - -sub check_for_mig_column_tracking_table { - my $dbh = db_connect(); - my $sth = $dbh->prepare(" - SELECT EXISTS( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = " . $dbh->quote( $MIGSCHEMA ) . " - AND table_name = 'tracked_column' - );" - ); - my $rv = $sth->execute() - || die "Error checking for table (tracked_column): $!"; - my @cols = $sth->fetchrow_array; - $sth->finish; - db_disconnect($dbh); - return $cols[0]; -} - -sub die_if_mig_column_tracking_table_exists { - if (check_for_mig_column_tracking_table()) { - die "Table $MIGSCHEMA.tracked_column already exists. Bailing init...\n"; - } -} - -sub die_if_mig_column_tracking_table_does_not_exist { - if (!check_for_mig_column_tracking_table()) { - die "Table $MIGSCHEMA.tracked_column does not exist. Bailing...\n"; - } -} - -sub check_for_tracked_file { - my $file = shift; - my $options = shift; - if (! -e $file) { - die "file not found: $file\n" unless $options && $options->{'allow_missing'}; - } - my $dbh = db_connect(); - my $sth = $dbh->prepare(" - SELECT id - FROM $MIGSCHEMA.tracked_file - WHERE base_filename = " . $dbh->quote( $file ) . ";" - ); - my $rv = $sth->execute() - || die "Error checking table (tracked_file) for base_filename ($file): $!"; - my @cols = $sth->fetchrow_array; - $sth->finish; - db_disconnect($dbh); - return $cols[0]; -} - -sub check_for_tracked_column { - my ($table,$column,$options) = (shift,shift,shift); - my $dbh = db_connect(); - my $sth = $dbh->prepare(" - SELECT id - FROM $MIGSCHEMA.tracked_column - WHERE staged_table = " . $dbh->quote( $table ) . " - AND staged_column = " . $dbh->quote( $column ) . ";" - ); - my $rv = $sth->execute() - || die "Error checking table (tracked_column) for $table.$column: $!"; - my @cols = $sth->fetchrow_array; - $sth->finish; - db_disconnect($dbh); - return $cols[0]; -} - -sub status_this_file { - my $file = shift; - my $dbh = db_connect(); - my $sth = $dbh->prepare(" - SELECT * - FROM $MIGSCHEMA.tracked_file - WHERE base_filename = " . $dbh->quote( $file ) . ";" - ); - my $rv = $sth->execute() - || die "Error retrieving data from table (tracked_file) for base_filename ($file): $!"; - my $data = $sth->fetchrow_hashref; - $sth->finish; - db_disconnect($dbh); - return $data; -} - -sub status_this_column { - my ($table,$column) = (shift,shift); - my $dbh = db_connect(); - my $sth = $dbh->prepare(" - SELECT * - FROM $MIGSCHEMA.tracked_column - WHERE staged_table = " . $dbh->quote( $table ) . " - AND staged_column = " . $dbh->quote( $column ) . ";" - ); - my $rv = $sth->execute() - || die "Error checking table (tracked_column) for $table.$column: $!"; - my $data = $sth->fetchrow_hashref; - $sth->finish; - db_disconnect($dbh); - return $data; -} - -1; - diff --git a/mig-bin/mig-add b/mig-bin/mig-add deleted file mode 100755 index 3e433c5..0000000 --- a/mig-bin/mig-add +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-add - This will add the specified files to the mig tracking table for the -schema pointed to by the MIGSCHEMA environment variable in the PostgreSQL -database specified by various PG environment variables. - ---headers (the default) and --no-headers are repeatable, and indicate whether -subsequent files have headers or not - ---headers-file specifies a text file defining the column headers for -the next added , which should contain one line per header - ---headers-file will automatically invoke --no-headers - -You'll need to invoke B prior to using commands like B - -=head1 SYNOPSIS - -B [--no-headers|--headers|--headers-file ] [file|--no-headers|--headers|--headers-file ] [...] - -=cut - -############################################################################### - -use strict; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use DBI; -use Cwd 'abs_path'; -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; - -pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; - -Mig::die_if_no_env_migschema(); -Mig::die_if_mig_tracking_table_does_not_exist(); - -my $has_headers = 1; -my $headers_file; -my $next_arg_is_headers_file = 0; - -foreach my $arg (@ARGV) { - if ($next_arg_is_headers_file) { - $next_arg_is_headers_file = 0; - $headers_file = abs_path($arg); - next; - } - if ($arg eq '--headers') { - $has_headers = 1; - next; - } - if ($arg eq '--no-headers') { - $has_headers = 0; - next; - } - if ($arg eq '--headers-file') { - $next_arg_is_headers_file = 1; - $has_headers = 0; - next; - } - my $file = abs_path($arg); - if ($file =~ /^$MIGBASEWORKDIR/) { - if (-e $file) { - if (-f $file) { - add_this_file($file,$has_headers,$headers_file); - $headers_file = ''; # clear after applying to just one file - } else { - print "Not a real file: $file\n"; - } - } else { - print "Could not find file: $file\n"; - } - } else { - print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; - } -} - -exit 0; - -############################################################################### - -sub add_this_file { - my $file = shift; - my $headers = shift; - my $headers_file = shift; - if ($headers_file) { - if (! (-e $headers_file && -f $headers_file)) { - print "Could not find headers file $headers_file, skipping $file\n"; - return; - } - } - if (Mig::check_for_tracked_file($file)) { - print "File already tracked: $file\n"; - } else { - print 'Adding ('; - if ($headers_file) { - print "with headers file = $headers_file"; - } else { - print ($headers ? ' with headers' : 'without headers'); - } - print '): ' . "$file\n"; - my $dbh = Mig::db_connect(); - my $rv = $dbh->do(" - INSERT INTO $MIGSCHEMA.tracked_file ( - base_filename - ,has_headers - ,headers_file - ) VALUES ( - " . $dbh->quote($file) . " - ," . $dbh->quote($headers) . " - ," . $dbh->quote($headers_file) . " - ); - ") || die "Error inserting into table $MIGSCHEMA.tracked_file: $!\n"; - Mig::db_disconnect($dbh); - } -} - diff --git a/mig-bin/mig-bibstats b/mig-bin/mig-bibstats deleted file mode 100755 index e0db266..0000000 --- a/mig-bin/mig-bibstats +++ /dev/null @@ -1,206 +0,0 @@ -#!/usr/bin/perl -# -*- coding: iso-8859-15 -*- -############################################################################### -=pod - -=item B --file foo.mrc - -Reads through a marc file to generate statistical information about the file -for quick analysis. - ---uri_threshold defaults to 1, only shows URI values with more than that -frequency - ---ingore_filetype true will have it not care what file returns as the type and -always treat it as marc21 -=back - -=cut - -############################################################################### - -use strict; -use warnings; - -use Data::Dumper; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use Switch; -use Getopt::Long; -use MARC::Batch; -use MARC::Record; -use MARC::Field; -use Cwd 'abs_path'; -use Cwd qw(getcwd); -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; -use open ':encoding(utf8)'; - -pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; -pod2usage(-verbose => 1) if ! $ARGV[1]; - -my $file; -my $uri_threshold = 1; -my $p_holding_code; -my $p_barcode_subfield; -my $p_ils_name = 'Runtime ILS'; -my $holding_threshold = 50; -my $p_ignore_filetype = 'false'; - -my $ret = GetOptions( - 'file:s' => \$file, - 'uri_threshold:i' => \$uri_threshold, - 'holding_code:s' => \$p_holding_code, - 'barcode:s' => \$p_barcode_subfield, - 'ignore_filetype:s' => \$p_ignore_filetype, - 'ils_name:s' => \$p_ils_name, - 'holding_threshold:s' => \$holding_threshold -); - -if ($p_holding_code and length $p_holding_code != 3) { abort('Holdings codes must be three characters.'); } - -if ($p_barcode_subfield) { - if (!defined $p_holding_code) { abort('A barcode field can not be used without a holding code.'); } - if (length $p_barcode_subfield != 1) { abort('Barcode subfields must be a single character code.'); } -} - -my @ilses = ( - ['Mandarin','852','p'], - ['Evergreen','852','p'], - ['Polaris','852','p'], - ['TLC','949','g'], - ['Koha','952','p'], - ['Sympony','999','i'] -); - -my @temp; -if ($p_holding_code) { - push @temp, $p_ils_name; - push @temp, $p_holding_code; - if ($p_barcode_subfield) { push @temp, lc $p_barcode_subfield; } -} -push @ilses, @temp; - - - -my $batch = MARC::Batch->new('USMARC', $file); -$batch->strict_off(); -my $filetype = `file $file`; -if ($filetype =~ m/MARC21/ or $p_ignore_filetype eq 'true') { print "$filetype.\n" } - else { abort("File is not MARC21."); } - -my $i = 0; -my $uri_count = 0; -my $uri_valid_count = 0; -my $uri_sub9_count = 0; -my $author_sub0 = 0; -my $title_sub0 = 0; -my @uris; -my @fields; -my @codes; -my @holding_code_strings; -my %holding_counts; -my %barcode_counts; - -foreach (@ilses) { - $holding_counts{@$_[0]} = 0; - $barcode_counts{@$_[0]} = 0; -} - -while ( my $record = $batch->next() ) { - $i++; - #check holdings, bit time consuming but more future proof - foreach (@ilses) { - my $ils = @$_[0]; - my $hcode = @$_[1]; - my $barcode = @$_[2]; - my @holding_fields = $record->field($hcode); - my $l = scalar @holding_fields; - my $v = $holding_counts{$ils}; - if ($l) { $holding_counts{$ils} = $v + $l; } - } - #process 856s - @fields = $record->field('856'); - my $ldr = substr $record->leader(), 9, 1; - push @codes, $ldr; - foreach my $f (@fields) { - my $u = $f->subfield('u'); - my $n = $f->subfield('9'); - if (defined $n) { $uri_sub9_count++; } - if (defined $u) { - $uri_count++; - my $ind1 = $f->indicator('1'); - my $ind2 = $f->indicator('2'); - if ($ind1 eq '4') { - if ($ind2 eq '0' or $ind2 eq '1') { $uri_valid_count++; } - } - my $ustring = lc $f->as_string('u'); - $ustring =~ s/http:\/\///; - $ustring =~ s/ftp:\/\///; - $ustring =~ s/https:\/\///; - $ustring =~ s/\/.*//; - push @uris, $ustring; - } - } - #check for authority linking on 100s and 245s, if present may need to scrub them - @fields = $record->field('100'); - foreach my $f (@fields) { - my $t = $f->subfield('0'); - if (defined $t) { $title_sub0++; } - } - @fields = $record->field('245'); - foreach my $f (@fields) { - my $t = $f->subfield('0'); - if (defined $t) { $author_sub0++; } - } - if(($i % 1000) == 0) { print "Processing bib $i.\n"; } -} - -my %uri_counts; -$uri_counts{$_}++ for @uris; - -my %code_counts; -$code_counts{$_}++ for @codes; - -print "\n$filetype\n"; -print "$i bibs read in file\n\n"; - -print "=== Leader 09 codes\n"; -foreach my $key (keys %code_counts) { - my $value = $code_counts{$key}; - print "=== $key $value\n"; -} -print "\n"; - -print "$uri_count 856 fields with a subfield u\n"; -print "$uri_valid_count 856 fields with a subfield u and valid indicators\n"; -print "$uri_sub9_count 856 fields have subfield 9s\n"; -print "$title_sub0 100 fields have a subfield 0\n"; -print "$author_sub0 245 fields have a subfield 0\n"; - -print "\n=== Holdings Analysis\n"; -foreach my $key (keys %holding_counts) { - my $c = $holding_counts{$key}; - if (((100/$i)*$c) >= $holding_threshold) { print "Could be $key $holding_counts{$key} holdings tags\n"; } -} - -print "\nURI values are domains and filtered to only show those with more than $uri_threshold\n"; -foreach my $key (keys %uri_counts) { - my $value = $uri_counts{$key}; - if ($value > $uri_threshold) { print "=== $key $value\n"; } -} - -close $file; - -########### functions - -sub abort { - my $msg = shift; - print STDERR "$0: $msg", "\n"; - exit 1; -} diff --git a/mig-bin/mig-clean b/mig-bin/mig-clean deleted file mode 100755 index b9cb013..0000000 --- a/mig-bin/mig-clean +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-clean - -Attempts to invoke B on the specified tracked file, placing the -output in [file].clean - -If given no other arguments, the invocation will lool like - -=over 5 - -clean_csv --config scripts/clean.conf --fix --apply [--create-headers|--use-headers ] - -=back - -otherwise, the arguments will be passed through like so - -=over 5 - -clean_csv [other arguments...] - -=back - -You'll need to invoke B or B prior to using commands -like B - -=head1 SYNOPSIS - -B [other arguments...] - -=cut - -############################################################################### - -use strict; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use DBI; -use Cwd 'abs_path'; -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; - -pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; - -Mig::die_if_no_env_migschema(); -Mig::die_if_mig_tracking_table_does_not_exist(); - -my $file = abs_path($ARGV[0]); -if ($file =~ /^$MIGBASEWORKDIR/) { - call_clean_csv(@ARGV); -} else { - print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; -} - -exit 0; - -############################################################################### - -sub call_clean_csv { - my $file = abs_path(shift); - my @args = @_; - - my $tracked_file_id = Mig::check_for_tracked_file($file); - if ($tracked_file_id) { - my $data = Mig::status_this_file($file); - - if (! $data->{'utf8_filename'}) { - die "mig-iconv or mig-skip-iconv needed for UTF8 version of file: $file\n"; - } - - my $utf8_file = $data->{'utf8_filename'}; - if (! -e $utf8_file) { - die "missing file: $utf8_file\n"; - } - - print "cleaning tracked file: $file\n"; - - if (scalar(@args) == 0) { - @args = ( - '--config' - ,'scripts/clean.conf' - ,'--fix' - ,'--apply' - ,'--backslash' - ,'--pad' - ); - if (! $data->{'has_headers'}) { - if ($data->{'headers_file'}) { - push @args, '--use-headers'; - push @args, $data->{'headers_file'}; - } else { - push @args, '--create-headers'; - } - } - } - - print join(' ',@args) . "\n"; - system('clean_csv', @args, $utf8_file); - - my $dbh = Mig::db_connect(); - my $clean_file = $dbh->quote($utf8_file . '.clean'); - if (! -e $utf8_file . '.clean') { - print "clean file does not exist: $clean_file\n"; - $clean_file = $dbh->quote(''); - } - - my $rv = $dbh->do(" - UPDATE $MIGSCHEMA.tracked_file - SET clean_filename = $clean_file - WHERE base_filename = " . $dbh->quote($file) . " - ; - ") || die "Error inserting into table $MIGSCHEMA.tracked_file: $!\n"; - Mig::db_disconnect($dbh); - } else { - print "File not currently tracked: $file\n"; - } -} diff --git a/mig-bin/mig-convert b/mig-bin/mig-convert deleted file mode 100755 index 6fe2172..0000000 --- a/mig-bin/mig-convert +++ /dev/null @@ -1,131 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-convert - -Attempts to invoke B on the .utf8.clean version of the specified -tracked file, creating either [file].utf8.clean.stage.sql or -_stage.sql depending on whether the file has been linked to a -parent table within the migration schema or not. - -If given no other arguments, the invocation will lool like - -=over 5 - -csv2sql --config scripts/clean.conf --add-x-migrate --schema [--parent ] --outfile <[.utf8.clean.stage.sql]|[parent_table_stage.sql]> .utf8.clean - -=back - -otherwise, the arguments will be passed through like so - -=over 5 - -csv2sql [other arguments...] --schema [--parent ] --outfile <[.utf8.clean.stage.sql]|[parent_table_stage.sql]> .utf8.clean - -=back - -=head1 SYNOPSIS - -B [other arguments...] - -=cut - -############################################################################### - -use strict; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use DBI; -use Cwd 'abs_path'; -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; - -pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; - -Mig::die_if_no_env_migschema(); -Mig::die_if_mig_tracking_table_does_not_exist(); - -my $file = abs_path($ARGV[0]); -if ($file =~ /^$MIGBASEWORKDIR/) { - call_convert_csv(@ARGV); -} else { - print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; -} - -exit 0; - -############################################################################### - -sub call_convert_csv { - my $file = abs_path(shift); - my @args = @_; - - my $stage_sql_filename; - my $tracked_file_id = Mig::check_for_tracked_file($file); - if ($tracked_file_id) { - my $data = Mig::status_this_file($file); - - if (! $data->{'utf8_filename'}) { - die "mig-iconv or mig-skip-iconv needed for UTF8 version of file: $file\n"; - } - - if (! $data->{'clean_filename'}) { - die "mig-clean or mig-skip-clean needed for .clean version of file: $file\n"; - } - - my $clean_file = $data->{'clean_filename'}; - if (! -e $clean_file) { - die "missing file: $clean_file\n"; - } - - print "converting tracked file: $file\n"; - - if (scalar(@args) == 0) { - @args = ( - '--config' - ,'scripts/clean.conf' - ,'--add-x-migrate' - ); - } - push @args, '--use-no-headers-file'; - push @args, '--schema'; - push @args, $MIGSCHEMA; - if ($data->{'parent_table'}) { - push @args, '--parent'; - push @args, $data->{'parent_table'}; - $stage_sql_filename = $data->{'parent_table'} . '.stage.sql'; - } else { - $stage_sql_filename = "$clean_file.stage.sql"; - } - push @args, '--outfile'; - push @args, $stage_sql_filename; - - print "args: " . join(',',@args) . "\n"; - system('csv2sql', @args, $clean_file); - - my $dbh = Mig::db_connect(); - if (! -e $stage_sql_filename) { - print "SQL converted file does not exist: $stage_sql_filename\n"; - $stage_sql_filename = ''; - } - - my $rv = $dbh->do(" - UPDATE $MIGSCHEMA.tracked_file - SET stage_sql_filename = " . $dbh->quote($stage_sql_filename) . " - WHERE base_filename = " . $dbh->quote($file) . " - ; - ") || die "Error updating table $MIGSCHEMA.tracked_file: $!\n"; - Mig::db_disconnect($dbh); - } else { - print "File not currently tracked: $file\n"; - } -} diff --git a/mig-bin/mig-dump b/mig-bin/mig-dump deleted file mode 100755 index 57edeab..0000000 --- a/mig-bin/mig-dump +++ /dev/null @@ -1,88 +0,0 @@ -#!/usr/bin/perl - -############################################################################### -=pod - -=head1 NAME - -mig-dump - -A wrapper around the pg_dump command that saves a table in the mig schema with a time stamp in the working directory. - -=head1 SYNOPSIS - -B [arguments...] - -=cut - -############################################################################### - -use strict; -use warnings; - -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use Switch; -use Cwd 'abs_path'; -use Cwd qw(getcwd); -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; -use open ':encoding(utf8)'; - -pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; -pod2usage(-verbose => 1) if ! $ARGV[1]; - -my $fh; -my $outfile; - -my $table; -my $next_arg_is_table; - -foreach my $arg (@ARGV) { - if ($arg eq '--table') { - $next_arg_is_table = 1; - next; - } - if ($next_arg_is_table) { - $table = $arg; - $next_arg_is_table = 0; - next; - } -} - -my $outfilewpath = create_dumpfile_name($table); - -my $syscmd = 'pg_dump --format plain --data-only --file ' . $outfilewpath . ' --table ' . $MIGSCHEMA . '.' . $table . ' ' . $PGUSER; - -print "pgdump command: \n"; -print "$syscmd\n"; - -system($syscmd); - -####### beyond here be functions - -sub create_dumpfile_name { - my $table_name = shift; - $table_name =~ s/\./_/; - my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); - $year += 1900; - my $date = $year . '-' . $mon . '-' . $mday; - my $dump_file = $table_name . ' ' . $date . '.pg'; - $dump_file =~ s/ /_/g; - $dump_file = $MIGGITDIR . $dump_file; - print "$dump_file \n"; - return $dump_file; -} - -sub abort { - my $msg = shift; - print STDERR "$0: $msg", "\n"; - exit 1; -} - - diff --git a/mig-bin/mig-env b/mig-bin/mig-env deleted file mode 100755 index dceec4f..0000000 --- a/mig-bin/mig-env +++ /dev/null @@ -1,268 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-env - This tool is for tracking and setting environment variables used by -B and its sub-tools. - -=head1 SYNOPSIS - -B - -B [migration_schema] - -B [orig_migration_schema] [new_migration_schema] - -B - -B - -=head1 DESCRIPTION - -For most invocations, B will either create or use a migration-specific -file (~/.mig/.env) for setting the following environment -variables: - -=over 15 - -=item MIGSCHEMA - -The name of the migration schema. Convention has this being a single lowercased -word or acronym identifying the library, prefixed with 'm_'. - -=item MIGWORKDIR - -The base working directory for containing migration data, scripts, and other -files. - -=item PGHOST - -The IP address or hostname for the PostgreSQL database used for a migration. - -=item PGPORT - -The TCP port for the PostgreSQL database. - -=item PGUSER - -The PostgreSQL user to use for the database. - -=item PGDATABASE - -The name of the actual database containing the migration schema. - -=back - -This script may also setup a symlink from a specified Git repository to a -scripts/ directory within the migration work directory. The default for this is -~/git/migration-work/MIGSCHEMA --> MIGWORKDIR/scripts - -It may also create the migration work directory if necessary. - -=head1 COMMANDS - -=over 15 - -=item B - -This invocation will prompt for various values and create a .env file for the -specified migration schema, and a symlink between the specified Git repository -and migration work directory (which will also be created if needed). - -=item B - -This command will spawn a bash shell that executes the corresponding -~/.mig/.env script for setting up environment variables encoded during -B. - -=item B [schema] - -This command will show the contents of the corresponding ~/.mig/.env -script, or, if no schema is specified, then it will list pertinent variables in -the current environment if they exist. - -=item B [orig schema] [new schema] - -This command will create a "shallow" clone of the orig schema, in that it will -share database credentials as well as git and data directories, but will have a -separate schema name. - -=item B - -This command will list migration schemas found in ~/.mig - -=item B - -Display the documentation you're reading now. - -=back - -=cut - -############################################################################### - -use strict; -use 5.012; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use File::Path qw(make_path); -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; - -pod2usage(-verbose => 2) if ! $ARGV[0]; - -my $migration_schema = $ARGV[1] || ''; -my $filename = "$HOME/.mig/$migration_schema.env"; -switch($ARGV[0]) { - case "--help" { - pod2usage(-verbose => 2); - } - case "help" { - pod2usage(-verbose => 2); - } - case "create" { - pod2usage(-verbose => 1) if ! $ARGV[1]; - mig_env_create(); - } - case "clone" { - pod2usage(-verbose => 1) if ! $ARGV[2]; - $migration_schema = $ARGV[2] || ''; - $filename = "$HOME/.mig/$migration_schema.env"; - mig_env_clone(); - } - case "use" { - pod2usage(-verbose => 1) if ! $ARGV[1]; - if (-e $filename) { - exec '/bin/bash', '--init-file', $filename; - } else { - die "\n$filename does not exist\n"; - } - } - case "show" { - if (-e $filename) { - exec '/bin/cat', $filename; - } else { - print `env | sort | egrep 'MIG|PG'`; - } - } - case "list" { - opendir(my $dh, "$HOME/.mig") || die "can't open $HOME/.mig: $!"; - while (readdir $dh) { - if (/^(.*)\.env$/) { - print "$1\n"; - } - } - closedir $dh; - } - else { - pod2usage(1); - } -} - -sub mig_env_create { - if (-e $filename) { - print "Re-Creating $filename\n"; - print `cat $filename`; - } else { - print "Creating $filename\n"; - } - print "\n"; - - # directories - - $MIGBASEWORKDIR = "$HOME/data/" unless $MIGBASEWORKDIR; - my $migworkdir_default = "$MIGBASEWORKDIR$migration_schema/"; - print "Main work directory (default $migworkdir_default): "; - my $MIGWORKDIR = ; - chomp $MIGWORKDIR; - if (! $MIGWORKDIR) { - $MIGWORKDIR = $migworkdir_default; - } - $MIGBASEGITDIR = "$HOME/git/migration-work/" unless $MIGBASEGITDIR; - my $miggitdir_default = "${MIGBASEGITDIR}/$migration_schema/"; - print "git repo for migration-specific scripts (default $miggitdir_default): "; - my $MIGGITDIR = ; - chomp $MIGGITDIR; - if (! $MIGGITDIR) { - $MIGGITDIR = $miggitdir_default; - } - - # PostgreSQL - - $PGHOST = 'localhost' unless $PGHOST; - my $pghost_default = $PGHOST; - print "PGHOST (default $pghost_default): "; - $PGHOST = ; - chomp $PGHOST; - if (! $PGHOST) { - $PGHOST = $pghost_default; - } - $PGPORT = 5432 unless $PGPORT; - my $pgport_default = $PGPORT; - print "PGPORT (default $pgport_default): "; - $PGPORT = ; - chomp $PGPORT; - if (! $PGPORT) { - $PGPORT = $pgport_default; - } - $PGDATABASE = 'evergreen' unless $PGDATABASE; - my $pgdatabase_default = $PGDATABASE; - print "PGDATABASE (default $pgdatabase_default): "; - $PGDATABASE = ; - chomp $PGDATABASE; - if (! $PGDATABASE) { - $PGDATABASE = $pgdatabase_default; - } - $PGUSER = $PGDATABASE unless $PGUSER; - my $pguser_default = $PGUSER; - print "PGUSER (default $pguser_default): "; - my $PGUSER = ; - chomp $PGUSER; - if (! $PGUSER) { - $PGUSER = $pguser_default; - } - - # create files and directories if needed - - mkdir "$HOME/.mig"; - make_path($MIGGITDIR, { verbose => 1 }); - `touch $MIGGITDIR/README`; - make_path($MIGWORKDIR, { verbose => 1 }); - symlink $MIGGITDIR, "$MIGWORKDIR/scripts"; - open FILE, ">$filename"; - print FILE "export PGHOST=$PGHOST\n"; - print FILE "export PGPORT=$PGPORT\n"; - print FILE "export PGDATABASE=$PGDATABASE\n"; - print FILE "export PGUSER=$PGUSER\n"; - print FILE "export PGOPTIONS='-c search_path=$migration_schema,public,evergreen'\n"; - print FILE "export MIGENVPROMPT=$migration_schema\n"; - print FILE "export MIGSCHEMA=$migration_schema\n"; - print FILE "export MIGBASEWORKDIR=$MIGBASEWORKDIR\n"; - print FILE "export MIGWORKDIR=$MIGWORKDIR\n"; - print FILE "export MIGBASEGITDIR=$MIGBASEGITDIR\n"; - print FILE "export MIGGITDIR=$MIGGITDIR\n"; - print FILE "alias wcd='cd `mig wdir`'\n"; - print FILE "alias gcd='cd `mig gdir`'\n"; - print FILE "alias scd='cd `mig sdir`'\n"; - print FILE "source ~/.profile\n"; - print FILE "env | sort | egrep 'PG|MIG'\n"; - print FILE 'echo shell PID = $$' . "\n"; - close FILE; -} - -sub mig_env_clone { - my $orig_migration_schema = $ARGV[1] || ''; - my $orig_filename = "$HOME/.mig/$orig_migration_schema.env"; - `cp $orig_filename $filename`; - `sed -i 's/export PGOPTIONS=.*/export PGOPTIONS='"'"'-c search_path=$migration_schema,public,evergreen'"'"'/' $filename`; - `sed -i 's/export MIGENVPROMPT=.*/export MIGENVPROMPT=$migration_schema/' $filename`; - `sed -i 's/export MIGSCHEMA=.*/export MIGSCHEMA=$migration_schema/' $filename`; -} - diff --git a/mig-bin/mig-gsheet b/mig-bin/mig-gsheet deleted file mode 100755 index 5975bd6..0000000 --- a/mig-bin/mig-gsheet +++ /dev/null @@ -1,419 +0,0 @@ -#!/usr/bin/perl - -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Net::Google::Spreadsheets; -use Net::Google::DataAPI::Auth::OAuth2; -use Net::OAuth2::AccessToken; -use Storable; -use DBI; -use FindBin; -use lib "$FindBin::Bin/"; -my $mig_bin = "$FindBin::Bin/"; -use Mig; -use strict; -use Switch; -use Cwd 'abs_path'; -use Pod::Usage; -use Data::Dumper; -use DateTime; - -pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; - -Mig::die_if_no_env_migschema(); -die_if_gsheet_tracked_table_does_not_exist(); -die_if_gsheet_tracked_column_does_not_exist(); - -my $cmd_push; -my $next_arg_is_push; -my $cmd_pull; -my $next_arg_is_pull; -my @worksheet_names; -my $cmd_export = 0; -my @table_names; -my $sql; -my $sth; -my @ws; -my @tracked_ws_names; -my $authfile = $ENV{HOME} . '/.mig/oauth.env'; -my $next_arg_is_authfile; - -foreach my $arg (@ARGV) { - if ($arg eq '--push') { - $next_arg_is_push = 1; - next; - } - if ($next_arg_is_push) { - $cmd_push = $arg; - $next_arg_is_push = 0; - next; - } - if ($arg eq '--pull') { - $next_arg_is_pull = 1; - next; - } - if ($next_arg_is_pull) { - $cmd_pull = $arg; - $next_arg_is_pull = 0; - next; - } - if ($arg eq '--authfile') { - $next_arg_is_authfile = 1; - next; - } - if ($next_arg_is_authfile) { - $authfile = $arg; - $next_arg_is_authfile = 0; - next; - } - if ($arg eq '--export') { - $cmd_export = 1; - next; - } -} - -abort('must specify --push (db->worksheets) or --pull (worksheets->db)') unless (defined $cmd_push or defined $cmd_pull); -if (defined $cmd_push and defined $cmd_pull) { abort('you can not specify both a --push and --pull on the same command'); } - -my $clientid; -my $clientsecret; -my $sessionfile; - -open (my $fh, '<', $authfile) or abort("Could not open $authfile"); -while (my $var = <$fh>) { - chomp $var; - my ($var1, $var2) = split /=/,$var; - if ($var1 eq 'CLIENTID') { $clientid = $var2; } - if ($var1 eq 'CLIENTSECRET') { $clientsecret = $var2; } - if ($var1 eq 'SESSIONFILE') { $sessionfile = $var2; } -} -my $dbh = Mig::db_connect(); -my $spreadsheet = connect_gsheet($clientid,$clientsecret,$sessionfile); -abort('could not connect to google sheet') unless (defined $spreadsheet); - -$sql = 'SELECT tab_name FROM gsheet_tracked_table;'; -$sth = $dbh->prepare($sql); -my $ra = $sth->execute(); -while (my @row = $sth->fetchrow_array) { - push @tracked_ws_names, $row[0]; -} - -if (defined $cmd_pull) { - print "Pulling "; - if ($cmd_pull eq 'all') { - print "all worksheets.\n"; - @ws = $spreadsheet->worksheets; - foreach my $wsn (@ws) { push @worksheet_names, $wsn->title; } - } else { - print "only worksheet $cmd_pull.\n"; - if (!defined $cmd_pull) { abort('command incomplete'); } - push @worksheet_names, $cmd_pull; - } - my @m = array_match(\@worksheet_names,\@tracked_ws_names); - foreach my $w (@m) { - my $pull_ws = $spreadsheet->worksheet( {title => $w} ); - my $push_tb = get_table_name($w,$dbh); - my @rows = $pull_ws->rows; - my @content; - map { $content[$_->row - 1][$_->col - 1] = $_->content } $pull_ws->cells; - my @tab_headers = shift @content; - my $tab_headers_length = $#{ $tab_headers[0] }; - my @pg_headers; - for my $i ( 0 .. $tab_headers_length ) { - push @pg_headers, $tab_headers[0][$i]; - } - shift @content; - #todo: check for clean headers at some point ... - truncate_table($push_tb,$dbh); - print "Inserting from $w to $push_tb.\n"; - for my $j (@content) { - insert_row($MIGSCHEMA,$push_tb,$dbh,\@pg_headers,$j); - } - timestamp($push_tb,$dbh,'pull'); - if ($cmd_export == 1) { export_table($dbh,$push_tb); } - } -} - -if (defined $cmd_push) { - print "Pushing "; - my @tab_names; - if ($cmd_push eq 'all') { - print "all worksheets.\n"; - $sql = 'SELECT tab_name FROM gsheet_tracked_table;'; - $sth = $dbh->prepare($sql); - $ra = $sth->execute(); - while (my @row = $sth->fetchrow_array) { - push @tab_names, $row[0]; - } - } else { - print "only worksheet $cmd_push.\n"; - if (!defined $cmd_push) { abort('command incomplete'); } - push @tab_names, $cmd_push; - } - foreach my $push_ws_name (@tab_names) { - my $pull_tb = get_table_name($push_ws_name,$dbh); - my @table_headers = get_pg_column_headers($pull_tb,$MIGSCHEMA); - print "worksheetname: $push_ws_name\n"; - my $push_ws = $spreadsheet->worksheet( {title => $push_ws_name} ); - if (!defined $push_ws) { next; } - my @rows; - my $i = 0; - foreach my $rth (@table_headers) { $rows[0][$i] = $rth; $i++; } - $sql = "SELECT * FROM $pull_tb;"; - $sth = $dbh->prepare($sql); - $sth->execute(); - my $grabhash = $sth->fetchall_arrayref({}); - erase_sheet($push_ws,$push_ws_name); - - #get from postgres the headers to use in the sheet from tracked columns - $sql = 'SELECT column_name FROM gsheet_tracked_column WHERE table_id = (SELECT id FROM gsheet_tracked_table WHERE table_name = \'' . $pull_tb . '\')'; - $sth = $dbh->prepare($sql); - $sth->execute(); - my $sheet_headers = $sth->fetchall_arrayref(); - my $sheet_headers_length = @$sheet_headers; - #now I need to do new rows using those headers - my @content; - foreach my $row ( @{$grabhash} ) { - my $record = {}; - for my $column ( sort keys %{ $row } ) { - #print Dumper(@$sheet_headers); - #print "column: $column\n"; - my $clean_column = $column; - $clean_column =~ s/_//g; - if ( $column ~~ @$sheet_headers ) { - $record->{$clean_column} = $row->{$column}; - } - } - push @content, $record; - } - print "Writing to $push_ws_name\n"; - foreach my $fillsheet (@content) { - my $new_row = $push_ws->add_row ( - $fillsheet - ); - } - timestamp($pull_tb,$dbh,'push'); - if ($cmd_export == 1) { export_table($dbh,$pull_tb); } - } -} - -sub export_table { - my $dbh = shift; - my $table = shift; - - my $dt = DateTime->now; - my $date = $dt->ymd; - my $hms = $dt->hms; - my $efile = $MIGGITDIR . $table . '_' . $date . '_' . $hms . '.tsv'; - my @data; - my $record_count = 0; - $dbh->do("COPY $table TO STDOUT CSV DELIMITER E'\t' HEADER;"); - 1 while $dbh->pg_getcopydata(\$data[$record_count++]) >= 0; - open (my $eout, '>', $efile) or abort("Could NOT open $efile."); - foreach my $d (@data) { - print $eout $d; - } - print "$efile written.\n"; - close $eout; - return; -} - -sub die_if_gsheet_tracked_table_does_not_exist { - if (!check_for_gsheet_tracked_table()) { - die "Table gsheet_tracked_table does not exist. Bailing...\n"; - } -} - -sub array_match { - my ($xa,$xb) = @_; - my @a = @{ $xa }; - my @b = @{ $xb }; - my @r; - - foreach my $av (@a) { - foreach my $bv (@b) { - if ($av eq $bv) { push @r, $bv; } - } - } - return @r; -} - -sub get_pg_column_headers { - my $table_name = shift; - my $schema_name = shift; - my @headers; - my $dbh = Mig::db_connect(); - $sql = 'SELECT column_name FROM information_schema.columns WHERE table_schema = ' . $dbh->quote( $schema_name ) . ' AND table_name = ' . $dbh->quote( $table_name ) . ';'; - $sth = $dbh->prepare($sql); - $ra = $sth->execute(); - while (my @row = $sth->fetchrow_array) { - push @headers, $row[0]; - } - return @headers; -} - -sub erase_sheet { - my $ws = shift; - my $ws_name = shift; - - print "Erasing $ws_name.\n"; - my @rows = $ws->rows; - splice @rows, 0, 1; - my $i = @rows; - while ($i > 0) { - my $row = pop @rows; - $row->delete; - $i--; - } - return; -} - -sub check_for_gsheet_tracked_table { - my $dbh = Mig::db_connect(); - my $sth = $dbh->prepare(" - SELECT EXISTS( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = " . $dbh->quote( $MIGSCHEMA ) . " - AND table_name = 'gsheet_tracked_table' - );" - ); - my $rv = $sth->execute() - || die "Error checking for table (tracked_gsheet_table): $!"; - my @cols = $sth->fetchrow_array; - $sth->finish; - Mig::db_disconnect($dbh); - return $cols[0]; -} - -sub die_if_gsheet_tracked_column_does_not_exist { - if (!check_for_gsheet_tracked_column()) { - die "Table $MIGSCHEMA.gsheet_tracked_column does not exist. Bailing...\n"; - } -} - -sub get_table_name { - my $worksheet = shift; - my $dbh = shift; - - my $sql = 'SELECT table_name FROM gsheet_tracked_table WHERE tab_name = \'' . $worksheet . '\';'; - my $sth = $dbh->prepare($sql); - my $xs = $sth->execute(); - my $table_name; - while (my @row = $sth->fetchrow_array) { - $table_name = $row[0]; - } - - return $table_name; -} - -#sub get_worksheet_name { -# my $table = shift; -# my $dbh = shift; -# -# my $sql = 'SELECT tab_name FROM gsheet_tracked_table WHERE table_name = \'' . $table . '\';'; -# print "$sql \n"; -# my $sth = $dbh->prepare($sql); -# my $xs = $sth->execute(); -# my $worksheet_name; -# while (my @row = $sth->fetchrow_array) { -# $worksheet_name = $row[0]; -# } -# -# return $worksheet_name; -#} - - -sub check_for_gsheet_tracked_column { - my $dbh = Mig::db_connect(); - my $sth = $dbh->prepare(" - SELECT EXISTS( - SELECT 1 - FROM information_schema.tables - WHERE table_schema = " . $dbh->quote( $MIGSCHEMA ) . " - AND table_name = 'gsheet_tracked_column' - );" - ); - my $rv = $sth->execute() - || die "Error checking for table (gsheet_tracked_column): $!"; - my @cols = $sth->fetchrow_array; - $sth->finish; - Mig::db_disconnect($dbh); - return $cols[0]; -} - -sub insert_row { - my ($schema, $table, $dbh, $headers_ref, $row_ref) = @_; - my @headers = @{ $headers_ref }; - my @row_data = @{ $row_ref }; - - my $header_string = '(' . join(",", @headers) . ')'; - map {s/\'/\'\'/g; } @row_data; - my $row_string = '(' . join(",", map {qq/'$_'/} @row_data) . ')'; - #print "INSERT INTO $schema.$table $header_string VALUES $row_string\n"; - $dbh->do(qq/ - INSERT INTO $schema.$table $header_string VALUES $row_string ; - /); -} - -sub timestamp { - my ($table, $dbh, $action) = @_; - - my $column; - if ($action eq 'pull') { $column = 'last_pulled' } - else { $column = 'last_pushed' }; - - $dbh->do(qq/ - UPDATE gsheet_tracked_table SET $column = NOW() WHERE table_name = '$table'; - /); - -} - - -sub truncate_table { - my $table = shift; - my $dbh = shift; - - $dbh->do(qq/ - TRUNCATE TABLE $table;; - /); - print "Table $table truncated.\n"; -} - -sub abort { - my $msg = shift; - print STDERR "$0: $msg", "\n"; - exit 1; -} - -sub connect_gsheet { - - my ($clientid,$clientsecret,$sessionfile) = @_; - - my $oauth2 = Net::Google::DataAPI::Auth::OAuth2->new( - client_id => $clientid, - client_secret => $clientsecret, - scope => ['http://spreadsheets.google.com/feeds/'], - redirect_uri => 'https://developers.google.com/oauthplayground', - ); - if ($sessionfile =~ m/~/) {$sessionfile =~ s/~/$ENV{HOME}/; } - my $session = retrieve($sessionfile); - my $restored_token = Net::OAuth2::AccessToken->session_thaw( - $session, - auto_refresh => 1, - profile => $oauth2->oauth2_webserver, - ); - $oauth2->access_token($restored_token); - my $service = Net::Google::Spreadsheets->new(auth => $oauth2); - - my $spreadsheet = $service->spreadsheet( - { - title => $MIGSCHEMA - } - ); - return $spreadsheet; -} - - diff --git a/mig-bin/mig-iconv b/mig-bin/mig-iconv deleted file mode 100755 index 88acdd0..0000000 --- a/mig-bin/mig-iconv +++ /dev/null @@ -1,107 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-iconv - -Attempts to invoke B on the specified tracked file, placing the -output in [file].iconv - -If given no other arguments, the invocation will lool like - -=over 5 - -iconv -f ISO-8859-1 -t UTF-8 -o .utf8 - -=back - -otherwise, the arguments will be passed through like so - -=over 5 - -iconv [other arguments...] -o .utf8 - -=back - -You'll need to invoke B prior to using commands like B - -=head1 SYNOPSIS - -B [other arguments...] - -=cut - -############################################################################### - -use strict; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use DBI; -use Cwd 'abs_path'; -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; - -pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; - -Mig::die_if_no_env_migschema(); -Mig::die_if_mig_tracking_table_does_not_exist(); - -my $file = abs_path($ARGV[0]); -if ($file =~ /^$MIGBASEWORKDIR/) { - call_iconv(@ARGV); -} else { - print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; -} - -exit 0; - -############################################################################### - -sub call_iconv { - my $file = abs_path(shift); - my @args = @_; - - my $tracked_file_id = Mig::check_for_tracked_file($file); - if ($tracked_file_id) { - my $data = Mig::status_this_file($file); - print "iconv'ing tracked file: $file\n"; - - if (scalar(@args) == 0) { - @args = ( - '-f' - ,'ISO-8859-1' - ,'-t' - ,'UTF-8' - ,'--verbose' - ); - } - - system('iconv', @args, '-o', $file . '.utf8', $file); - system('touch', $file . '.utf8'); # handle 0-byte files - - my $dbh = Mig::db_connect(); - my $utf8_file = $dbh->quote($file . '.utf8'); - if (! -e $file . '.utf8') { - print "utf8 file does not exist: $utf8_file\n"; - $utf8_file = $dbh->quote(''); - } - - my $rv = $dbh->do(" - UPDATE $MIGSCHEMA.tracked_file - SET utf8_filename = $utf8_file - WHERE base_filename = " . $dbh->quote($file) . " - ; - ") || die "Error inserting into table $MIGSCHEMA.tracked_file: $!\n"; - Mig::db_disconnect($dbh); - } else { - print "File not currently tracked: $file\n"; - } -} diff --git a/mig-bin/mig-init b/mig-bin/mig-init deleted file mode 100755 index 98f92b5..0000000 --- a/mig-bin/mig-init +++ /dev/null @@ -1,93 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-init - This will add or recreate tracking tables for the B toolset to -the migration schema specified by the MIGSCHEMA environment variable, in the -PostgreSQL database specified by various PG environment variables. - -In practice, you should invoke 'mig env use schema_name' prior to calling -B - -=head1 SYNOPSIS - -B - -B - -=cut - -############################################################################### - -use strict; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use DBI; -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -my $mig_sql = $mig_bin . "../mig-sql/init/"; -use lib "$FindBin::Bin/"; -use Mig; - -pod2usage(-verbose => 2) if $ARGV[0]; - -Mig::die_if_no_env_migschema(); - -if (! Mig::check_for_db_migschema()) { - try_to_create_schema(); -} - -if (! Mig::check_db_migschema_for_migration_tables()) { - try_to_init_schema_with_migration_tools(); -} -Mig::die_if_mig_tracking_table_exists(); -Mig::die_if_mig_column_tracking_table_exists(); -loop_through_mig_sql_templates(); - -exit 0; - -############################################################################### - -sub try_to_create_schema { - if ($MIGSCHEMA =~ /[^\w_]/) { - die "$MIGSCHEMA is not suitable for a schema name in PostgreSQL\n"; - } - my $dbh = Mig::db_connect(); - my $rv = $dbh->do("CREATE SCHEMA $MIGSCHEMA;") - || die "Error creating migration schema ($MIGSCHEMA): $!\n"; - print "Created schema $MIGSCHEMA\n"; - Mig::db_disconnect($dbh); -} - -sub try_to_init_schema_with_migration_tools { - Mig::die_if_no_migration_tools(); - print "Calling migration_tools.init() and .build()\n"; - my $dbh = Mig::db_connect(); - my $rv = $dbh->do("SELECT migration_tools.init(" . $dbh->quote($MIGSCHEMA) . ");") - || die "Error running migration_tools.init($MIGSCHEMA): $!\n"; - print "migration_tools.init() finished\n"; - my $rv2 = $dbh->do("SELECT migration_tools.build(" . $dbh->quote($MIGSCHEMA) . ");") - || die "Error running migration_tools.build($MIGSCHEMA): $!\n"; - print "migration_tools.build() finished\n"; - Mig::db_disconnect($dbh); -} - -sub loop_through_mig_sql_templates { - print "Looping through mig-sql/init/ templates\n"; - opendir my $dir, $mig_sql or die "Cannot open directory: $!"; - my @files = sort readdir $dir; - closedir $dir; - foreach my $file (@files) { - if ($file =~ /.sql$/) { - print "executing $file:\n"; - system( $mig_bin . "mig-sql", ('-f',$mig_sql . $file) ) - } - } -} - diff --git a/mig-bin/mig-link b/mig-bin/mig-link deleted file mode 100755 index 1a8ccd7..0000000 --- a/mig-bin/mig-link +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-link - -Associate the specified file with a parent table within the migration schema. - -=head1 SYNOPSIS - -B - -=cut - -############################################################################### - -use strict; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use DBI; -use Cwd 'abs_path'; -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; - -pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; - -Mig::die_if_no_env_migschema(); -Mig::die_if_mig_tracking_table_does_not_exist(); - -my $file = abs_path($ARGV[0]); -if ($file =~ /^$MIGBASEWORKDIR/) { - link_table(@ARGV); -} else { - print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; -} - -exit 0; - -############################################################################### - -sub link_table { - my $file = abs_path(shift); - my $table = shift; - - if (! Mig::check_db_migschema_for_specific_table($table)) { - die "table not found in MIGSCHEMA ($MIGSCHEMA): $table\n"; - } - - my $tracked_file_id = Mig::check_for_tracked_file($file); - if ($tracked_file_id) { - my $data = Mig::status_this_file($file); - - print "linking file to parent table: $file -> $table\n"; - - my $dbh = Mig::db_connect(); - my $sth = $dbh->prepare(" - SELECT base_filename - FROM $MIGSCHEMA.tracked_file - WHERE parent_table = " . $dbh->quote($table) . " - AND base_filename <> " . $dbh->quote($file) . ";" - ); - my $rv = $sth->execute() - || die "Error checking $MIGSCHEMA.tracked_file: $!"; - my @cols = $sth->fetchrow_array; - $sth->finish; - if ($cols[0]) { # found - die "table ($table) already linked to a different file: $cols[0]\n"; - } - $rv = $dbh->do(" - UPDATE $MIGSCHEMA.tracked_file - SET parent_table = " . $dbh->quote($table) . " - WHERE base_filename = " . $dbh->quote($file) . " - ; - ") || die "Error updating table $MIGSCHEMA.tracked_file: $!\n"; - Mig::db_disconnect($dbh); - } else { - print "File not currently tracked: $file\n"; - } -} diff --git a/mig-bin/mig-mapper b/mig-bin/mig-mapper deleted file mode 100755 index 6841cf7..0000000 --- a/mig-bin/mig-mapper +++ /dev/null @@ -1,778 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-mapper - -Interactive session for analyzing, flagging, and mapping legacy field data to -Evergreen fields. - -Upon exit, generate either [file].clean.map.sql or _map.sql. The -SQL generated will be UPDATE's for setting the Evergreen-specific columns for a -given file's staging tables, and TRUNCATE's and INSERT's for auxilary tables. -The files will have \include hooks for pulling in additional mapping files -(for example, end-user mappings for circ modifiers, etc.) - -=head1 SYNOPSIS - -B - -=cut - -############################################################################### - -use strict; -use Term::ReadLine; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use DBI; -use Cwd 'abs_path'; -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; - -pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; - -Mig::die_if_no_env_migschema(); -Mig::die_if_mig_tracking_table_does_not_exist(); - -my $column_filter = 1; # show all fields -my $file = abs_path($ARGV[0]); -my $fdata; -my $tracked_file_id = Mig::check_for_tracked_file($file); -if ($tracked_file_id) { - $fdata = Mig::status_this_file($file); -} else { - die "File not currently tracked: $file\n"; -} - -my $table = $fdata->{staged_table}; -if (!$table) { - die "No staged staged table for file: $file\n"; -} - -my $loop = 1; -my $term = Term::ReadLine->new('mapper'); -my $prompt; -my $OUT = $term->OUT || \*STDOUT; -my @dtd_identifiers; - -table_menu(); -$prompt = "$fdata->{staged_table}: "; -while ( $loop && defined (my $cmd = $term->readline($prompt)) ) { -top: - $cmd =~ s/^\s+//; - $cmd =~ s/\s+$//; - $term->addhistory($cmd) if $cmd =~ /\S/; - if ($cmd =~ /^\d+$/) { - my $ret = column_menu($cmd); - if ($ret) { - $cmd = $ret; - goto top; - } - } else { - switch($cmd) { - case /^(ls|\?|\.|;)$/ { - table_menu(); - } - case '' { - table_menu(); - } - case 'l' { - list_ten(); - } - case 'f1' { - $column_filter = 1; - table_menu(); - } - case 'f2' { - $column_filter = 2; - table_menu(); - } - case 'f3' { - $column_filter = 3; - table_menu(); - } - } - } - $loop = 0 if $cmd =~ /^q/io; -} - -exit 0; - -############################################################################### - -sub table_menu { - print "\n=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n"; - print "$table"; - print "\n=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n"; - print "\n"; - print " l) SELECT * FROM $fdata->{staged_table} LIMIT 10;\n"; - print "f1) show all fields (default)\n"; - print "f2) show legacy fields\n"; - print "f3) show EG fields\n"; - print " q) quit\n\n"; - printf "%-36s", "Columns (* for required)"; - printf "%-30s", "Target"; - printf "%-30s", "Transform"; - printf "%-30s", "First Row"; - printf "%-30s", "Migration Note"; - print "\n"; - printf "%-36s", "-------"; - printf "%-30s", "------"; - printf "%-30s", "---------"; - printf "%-30s", "---------"; - printf "%-30s", "--------------"; - print "\n"; - my $dbh = Mig::db_connect(); - my $sth = $dbh->prepare(" - SELECT * - FROM information_schema.columns - WHERE table_schema = " . $dbh->quote($MIGSCHEMA) . " - AND table_name = " . $dbh->quote($table) . " - ORDER BY dtd_identifier::INTEGER ASC; - "); - my $rv = $sth->execute() - || die "Error retrieving data from information_schema: $!"; - my $sth2 = $dbh->prepare(" - SELECT * - FROM $MIGSCHEMA.$table - LIMIT 1; - "); - my $rv2 = $sth2->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - my $row = $sth2->fetchrow_hashref; - - open LESS, "|less -F"; - @dtd_identifiers = (); - while (my $data = $sth->fetchrow_hashref) { - my $column = $data->{column_name}; - if ($column_filter == 2 && !($column =~ /^[xl]_/)) { - next; - } - if ($column_filter == 3 && ($column =~ /^[xl]_/)) { - next; - } - my $cdata = status_this_column($column); - printf LESS $cdata->{required} ? '*' : ' '; - printf LESS "%3s) ", $data->{dtd_identifier}; - push @dtd_identifiers, $data->{dtd_identifier}; - printf LESS "%-30s", $column; - printf LESS "%-30s", defined $cdata->{target_table} - ? ( $cdata->{target_table} ne $table ? $cdata->{target_table} . '.' : '') . $cdata->{target_column} - : ''; - printf LESS "%-30s", defined $cdata->{transform} ? $cdata->{transform} : ''; - printf LESS "%-30s", defined $$row{$column} ? $$row{$column} : ''; - printf LESS "%-30s", defined $cdata->{comment} ? $cdata->{comment} : ''; - print LESS "\n"; - } - close LESS; - print "\n"; - $sth->finish; - $sth2->finish; - Mig::db_disconnect($dbh); -} - -sub column_menu { - my $dtd_identifier = shift; - my $dbh = Mig::db_connect(); - my $sth = $dbh->prepare(" - SELECT * - FROM information_schema.columns - WHERE table_schema = " . $dbh->quote($MIGSCHEMA) . " - AND table_name = " . $dbh->quote($table) . " - AND dtd_identifier = " . $dbh->quote($dtd_identifier) . "; - "); - my $rv = $sth->execute() - || die "Error retrieving data from information_schema: $!"; - my $data = $sth->fetchrow_hashref; - $sth->finish; - Mig::db_disconnect($dbh); - - my $column = $data->{column_name}; - - my $prompt = "$table.$column: "; - - sub print_menu { - my $column = shift; - my $cdata = status_this_column($column); - print "\n=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n"; - print "$column"; - print "\n-------------------------------------------------------------------------------------------------\n"; - print " target: " . ( - defined $cdata->{target_table} - ? ( $cdata->{target_table} ne $table ? $cdata->{target_table} . '.' : '') . $cdata->{target_column} - : '' - ) . "\n"; - print "transform: " . (defined $cdata->{transform} ? $cdata->{transform} : '') . "\n"; - print " comment: " . (defined $cdata->{comment} ? $cdata->{comment} : '') . "\n"; - print "\n=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=\n"; - print "\n"; - print " l) SELECT $column FROM $fdata->{staged_table} LIMIT 10;\n"; - print " s) summarize\n" if $column ne 'x_migrate'; - print " g) group browse\n"; - print "g2) group browse (order by count desc)\n"; - print " c) comment\n"; - print " f) flag for end-user mapping\n"; - print " t) target\n"; - print " e) eval/transform\n"; - print " n) next column\n"; - print " p) prev column\n"; - print " q) quit back to table menu\n"; - print "\n"; - } - print_menu($column); - - my $loop = 1; - while ( $loop && defined (my $cmd = $term->readline($prompt)) ) { - $cmd =~ s/^\s+//; - $cmd =~ s/\s+$//; - $term->addhistory($cmd) if $cmd =~ /\S/; - $loop = 0 if $cmd =~ /^q/io; - switch($cmd) { - case /^(ls|\?|\.|;)$/ { - print_menu($column); - } - case '' { - print_menu($column); - } - case 'l' { - list_ten($column); - } - case 's' { - summarize($column); - } - case 'g' { - group_browse($column); - } - case 'g2' { - group_browse($column,'GROUP BY 1 ORDER BY 2 DESC'); - } - case /^c/io { - if ($cmd =~ /^c\s+(.+)$/) { - set_comment($column,$1); - } - } - case /^t/io { - if ($cmd =~ /^t\s+(.+)$/) { - set_target($column,$1); - } - } - case /^e/io { - if ($cmd =~ /^e\s+(.+)$/) { - set_transform($column,$1); - } - } - case 'n' { - my( $index )= grep { $dtd_identifiers[$_] eq $dtd_identifier } 0..$#dtd_identifiers; - return $dtd_identifiers[$index + 1]; - } - case 'p' { - my( $index )= grep { $dtd_identifiers[$_] eq $dtd_identifier } 0..$#dtd_identifiers; - return $dtd_identifiers[$index - 1]; - } - } - } -} - -sub list_ten { - my $column = shift; - - my $dbh = Mig::db_connect(); - my $sth; - my $rv; - my @cols; - - $sth = $dbh->prepare(Mig::sql(" - SELECT " . (defined $column ? $column : '*') . " - FROM $MIGSCHEMA.$table - LIMIT 10; - ")); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - print "\n"; - while (@cols = $sth->fetchrow_array) { - print "\t" . join(',',map {defined $_ ? $_ : ''} @cols) . "\n"; - } - print "\n"; - $sth->finish; -} - -sub summarize { - my $column = shift; - - my $count; - my $non_empty_count; - my $distinct_value_count; - my $distinct_integer_value_count; - my $distinct_money6_value_count; - my $distinct_money8_value_count; - my $distinct_date_value_count; - my $distinct_timestamptz_value_count; - - my $min_value; - my $min_length; - my $min_length_min_value; - my $max_value; - my $max_length; - my $max_length_max_value; - - my $min_value_as_integer; - my $max_value_as_integer; - - my $min_value_as_money6; - my $max_value_as_money6; - - my $min_value_as_money8; - my $max_value_as_money8; - - my $min_value_as_date; - my $max_value_as_date; - - my $min_value_as_timestamptz; - my $max_value_as_timestamptz; - - my $dbh = Mig::db_connect(); - my $sth; - my $rv; - my @cols; - - ### count - $sth = $dbh->prepare(" - SELECT COUNT(*) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $count = $cols[0]; - - ### non_empty_count - $sth = $dbh->prepare(" - SELECT COUNT(*) - FROM $MIGSCHEMA.$table - WHERE $column IS NOT NULL AND BTRIM($column) <> ''; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $non_empty_count = $cols[0]; - - ### distinct_value_count - $sth = $dbh->prepare(" - SELECT COUNT(DISTINCT $column) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $distinct_value_count = $cols[0]; - - ### distinct_integer_value_count - $sth = $dbh->prepare(" - SELECT COUNT(DISTINCT migration_tools.attempt_cast($column,'INTEGER')::INTEGER) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $distinct_integer_value_count = $cols[0]; - - ### distinct_money6_value_count - $sth = $dbh->prepare(" - SELECT COUNT(DISTINCT NULLIF(migration_tools.attempt_money6($column,'-0.01'),-0.01)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $distinct_money6_value_count = $cols[0]; - - ### distinct_money8_value_count - $sth = $dbh->prepare(" - SELECT COUNT(DISTINCT NULLIF(migration_tools.attempt_money($column,'-0.01'),-0.01)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $distinct_money8_value_count = $cols[0]; - - ### distinct_date_value_count - $sth = $dbh->prepare(" - SELECT COUNT(DISTINCT NULLIF(migration_tools.attempt_date($column,'1969-06-09'),'1969-06-09'::DATE)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $distinct_date_value_count = $cols[0]; - - ### distinct_timestamptz_value_count - $sth = $dbh->prepare(" - SELECT COUNT(DISTINCT NULLIF(migration_tools.attempt_timestamptz($column,'1969-06-09'),'1969-06-09'::TIMESTAMPTZ)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $distinct_timestamptz_value_count = $cols[0]; - - ### min_value - $sth = $dbh->prepare(" - SELECT MIN($column) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $min_value = $cols[0]; - - ### min_length - $sth = $dbh->prepare(" - SELECT MIN(LENGTH($column)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $min_length = $cols[0]; - - ### min_length_min_value - $sth = $dbh->prepare(" - SELECT MIN($column) - FROM $MIGSCHEMA.$table - WHERE LENGTH($column) = $min_length; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $min_length_min_value = $cols[0]; - - ### min_value_as_integer - $sth = $dbh->prepare(" - SELECT MIN(migration_tools.attempt_cast($column,'INTEGER')::INTEGER) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $min_value_as_integer = $cols[0]; - - ### min_value_as_money6 - $sth = $dbh->prepare(" - SELECT MIN(NULLIF(migration_tools.attempt_money6($column,'-0.01'),-0.01)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $min_value_as_money6 = $cols[0]; - - ### min_value_as_money8 - $sth = $dbh->prepare(" - SELECT MIN(NULLIF(migration_tools.attempt_money($column,'-0.01'),-0.01)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $min_value_as_money8 = $cols[0]; - - ### min_value_as_date - $sth = $dbh->prepare(" - SELECT MIN(NULLIF(migration_tools.attempt_date($column,'1969-06-09'),'1969-06-09'::DATE)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $min_value_as_date = $cols[0]; - - ### min_value_as_timestamptz - $sth = $dbh->prepare(" - SELECT MIN(NULLIF(migration_tools.attempt_timestamptz($column,'1969-06-09'),'1969-06-09'::TIMESTAMPTZ)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $min_value_as_timestamptz = $cols[0]; - - ### max_value - $sth = $dbh->prepare(" - SELECT MAX($column) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $max_value = $cols[0]; - - ### max_length - $sth = $dbh->prepare(" - SELECT MAX(LENGTH($column)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $max_length = $cols[0]; - - ### max_length_max_value - $sth = $dbh->prepare(" - SELECT MAX($column) - FROM $MIGSCHEMA.$table - WHERE LENGTH($column) = $max_length; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $max_length_max_value = $cols[0]; - - ### max_value_as_integer - $sth = $dbh->prepare(" - SELECT MAX(migration_tools.attempt_cast($column,'INTEGER')::INTEGER) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $max_value_as_integer = $cols[0]; - - ### max_value_as_money6 - $sth = $dbh->prepare(" - SELECT MAX(NULLIF(migration_tools.attempt_money6($column,'-0.01'),-0.01)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $max_value_as_money6 = $cols[0]; - - ### max_value_as_money8 - $sth = $dbh->prepare(" - SELECT MAX(NULLIF(migration_tools.attempt_money($column,'-0.01'),-0.01)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $max_value_as_money8 = $cols[0]; - - ### max_value_as_date - $sth = $dbh->prepare(" - SELECT MAX(NULLIF(migration_tools.attempt_date($column,'1969-06-09'),'1969-06-09'::DATE)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $max_value_as_date = $cols[0]; - - ### max_value_as_timestamptz - $sth = $dbh->prepare(" - SELECT MAX(NULLIF(migration_tools.attempt_timestamptz($column,'1969-06-09'),'1969-06-09'::TIMESTAMPTZ)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - $max_value_as_timestamptz = $cols[0]; - - Mig::db_disconnect($dbh); - - print "\n"; - print "# of rows = $count\n"; - print "# of non-empty rows = $non_empty_count\n"; - print "# of distinct values (as text) = $distinct_value_count\n"; - print "# of distinct values (as integer) = $distinct_integer_value_count\n"; - print "# of distinct values (as money6) = $distinct_money6_value_count\n"; - print "# of distinct values (as money8) = $distinct_money8_value_count\n"; - print "# of distinct values (as date) = $distinct_date_value_count\n"; - print "# of distinct values (as timestamptz) = $distinct_timestamptz_value_count\n"; - print "\n"; - print "minimum value (as text) = $min_value\n"; - print "maximum value (as text) = $max_value\n"; - print "\n"; - print "minimum value length (as text) = $min_length (min value: $min_length_min_value)\n"; - print "maximum value length (as text) = $max_length (max value: $max_length_max_value)\n"; - print "\n"; - print "minimum value (as integer) = " . ($min_value_as_integer ? $min_value_as_integer : '') . "\n"; - print "maximum value (as integer) = " . ($max_value_as_integer ? $max_value_as_integer : '') . "\n"; - print "\n"; - print "minimum value (as money6) = " . ($min_value_as_money6 ? $min_value_as_money6 : '') . "\n"; - print "maximum value (as money6) = " . ($max_value_as_money6 ? $max_value_as_money6 : '') . "\n"; - print "\n"; - print "minimum value (as money8) = " . ($min_value_as_money8 ? $min_value_as_money8 : '') . "\n"; - print "maximum value (as money8) = " . ($max_value_as_money8 ? $max_value_as_money8 : '') . "\n"; - print "\n"; - print "minimum value (as date) = " . ($min_value_as_date ? $min_value_as_date : '') . "\n"; - print "maximum value (as date) = " . ($max_value_as_date ? $max_value_as_date : '') . "\n"; - print "\n"; - print "minimum value (as timestamptz) = " . ($min_value_as_timestamptz ? $min_value_as_timestamptz : '') . "\n"; - print "maximum value (as timestamptz) = " . ($max_value_as_timestamptz ? $max_value_as_timestamptz : '') . "\n"; - print "\n"; -} - -sub group_browse { - my ($column,$option) = (shift,shift||"GROUP BY 1 ORDER BY 1"); - - my $dbh = Mig::db_connect(); - my $sth; - my $rv; - - $sth = $dbh->prepare(Mig::sql(" - SELECT $column, COUNT(*) - FROM $MIGSCHEMA.$table - $option; - ")); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - - print "\n"; - open LESS, "|less -F"; - printf LESS "%-30s", "Value:"; - print LESS "Count:\n\n"; - while (my @cols = $sth->fetchrow_array) { - my $value = $cols[0]; - my $count = $cols[1]; - printf LESS "%-30s", defined $value ? $value : ''; - print LESS "$count\n"; - } - close LESS; - print "\n"; - $sth->finish; -} - -############################################################################### - -sub add_this_column { - my $column = shift; - if (!Mig::check_for_tracked_column($table,$column)) { - my $dbh = Mig::db_connect(); - my $rv = $dbh->do(" - INSERT INTO $MIGSCHEMA.tracked_column ( - base_filename - ,parent_table - ,staged_table - ,staged_column - ) VALUES ( - " . $dbh->quote($file) . " - ," . $dbh->quote($fdata->{parent_table}) . " - ," . $dbh->quote($table) . " - ," . $dbh->quote($column) . " - ); - ") || die "Error inserting into table $MIGSCHEMA.tracked_column: $!\n"; - Mig::db_disconnect($dbh); - } -} - -sub status_this_column { - my $column = shift; - my $data = Mig::status_this_column($table,$column); - if (!$data) { - add_this_column($column); - $data = Mig::status_this_column($table,$column); - } - if ($$data{parent_table}) { - my $dbh = Mig::db_connect(); - my $sth = $dbh->prepare(" - SELECT * - FROM $MIGSCHEMA.fields_requiring_mapping - WHERE table_name = " . $dbh->quote( $$data{parent_table} ) . " - AND column_name = " . $dbh->quote( $column ) . ";" - ); - my $rv = $sth->execute() - || die "Error checking table (tracked_column) for $table.$column: $!"; - my $data2 = $sth->fetchrow_hashref; - if ($data2) { - $$data{required} = 1; - } else { - $$data{required} = 0; - } - $sth->finish; - Mig::db_disconnect($dbh); - } - return $data; -} - -sub set_comment { - my ($column,$comment) = (shift,shift); - if ($comment) { - my $data = status_this_column($column); - my $dbh = Mig::db_connect(); - my $rv = $dbh->do(" - UPDATE $MIGSCHEMA.tracked_column - SET comment = " . $dbh->quote($comment) . " - WHERE id = " . $dbh->quote($data->{id}) . "; - ") || die "Error updating table $MIGSCHEMA.tracked_column: $!\n"; - Mig::db_disconnect($dbh); - } -} - -sub set_transform { - my ($column,$transform) = (shift,shift); - if ($transform) { - my $data = status_this_column($column); - my $dbh = Mig::db_connect(); - my $rv = $dbh->do(" - UPDATE $MIGSCHEMA.tracked_column - SET transform = " . $dbh->quote($transform) . " - WHERE id = " . $dbh->quote($data->{id}) . "; - ") || die "Error updating table $MIGSCHEMA.tracked_column: $!\n"; - Mig::db_disconnect($dbh); - } -} - -sub set_target { - my ($column,$target) = (shift,shift); - my $target_table = $table; - my $target_column = $target; - if ($target) { - if ($target =~ /^(.+)\.(.+)$/) { - $target_table = $1; - $target_column = $2; - } - my $data = status_this_column($column); - my $dbh = Mig::db_connect(); - my $rv = $dbh->do(" - UPDATE $MIGSCHEMA.tracked_column - SET target_table = " . $dbh->quote($target_table) . " - ,target_column = " . $dbh->quote($target_column) . " - WHERE id = " . $dbh->quote($data->{id}) . "; - ") || die "Error updating table $MIGSCHEMA.tracked_column: $!\n"; - Mig::db_disconnect($dbh); - } -} diff --git a/mig-bin/mig-quick b/mig-bin/mig-quick deleted file mode 100755 index 59b0843..0000000 --- a/mig-bin/mig-quick +++ /dev/null @@ -1,66 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-quick - -A wrapper for running the following mig commands on the specified files: - -=over 15 - -mig add -mig skip-iconv -mig clean -mig convert -mig stage - -=back - -Arguments take the form of --cmd--argument or --cmd--argument=value. - -This form is NOT supported: --cmd--argument value - -cmd must be substituted with either add, skip-iconv, clean, convert, or stage, -and determines which mig command to apply the argument toward. - -=head1 SYNOPSIS - -B [arguments...] [ ...] - -=cut - -############################################################################### - -use strict; -use Pod::Usage; -use Cwd 'abs_path'; -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; - -my @files = grep {!/^--/} @ARGV; -my %pass_thru = ('add'=>[],'skip-iconv'=>[],'clean'=>[],'convert'=>[],'stage'=>[]); -foreach my $a (@ARGV) { - if ($a =~ /^--([a-z]+)-(.*)$/) { - $pass_thru{$1} = [] if ! defined $pass_thru{$1}; - unshift @{ $pass_thru{$1} }, "--$2"; - } -} - -foreach my $file (@files) { - foreach my $cmd (('add','skip-iconv','clean','convert','stage')) { - print "mig $cmd $file " . (join ' ', @{ $pass_thru{$cmd} }) . "\n"; - my @MYARGV = ( - 'mig' - ,$cmd - ,$file - ); - system(@MYARGV,@{ $pass_thru{$cmd} }); - } -} - -exit 0; - diff --git a/mig-bin/mig-quicksheet b/mig-bin/mig-quicksheet deleted file mode 100755 index 22ed33e..0000000 --- a/mig-bin/mig-quicksheet +++ /dev/null @@ -1,594 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-quicksheet - -By default: - -Quickly produces an Excel spreadsheet based on the tracked file suitable -for simple end-user mapping. The new file is named after the tracked file, but -ends in .mapping.xls - -Multiple files may be specified, in which case all of the results are -concatenated into one spreadsheet named .mapping.xls - -If using --outfile: - -This specifies the exact name to use for the Excel file. If not specified, and -there is also no --outtable, then the naming convention will be as specified -above. - -If using --outtable: - -This specifies a summary table and prefix to use within the migration schema for -recording the output either in addition to or instead of the Excel file. Unless ---force is specified, it will not overwrite existing tables. - -If using --drop with --outable: - -This will delete the summary table specified and all related sub-tables. - -=head1 SYNOPSIS - -B [--force|--drop|--outfile |--outtable ] [...] - -=cut - -############################################################################### - -use strict; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Try::Tiny; -use Pod::Usage; -use Getopt::Long; -use DBI; -use Spreadsheet::WriteExcel; -use Cwd 'abs_path'; -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; - -my $outtable = ''; -my $outfile = ''; -my $force; -my $drop; -my $help; - -GetOptions( - 'outtable=s' => \$outtable, - 'outfile=s' => \$outfile, - 'force' => \$force, - 'drop' => \$drop, - 'help|?' => \$help -); -pod2usage(-verbose => 2) if $help || ! $ARGV[0]; - -if (! $outtable && ! $outfile) { - if (scalar(@ARGV) > 1) { - $outfile = $MIGSCHEMA . '.mapping.xls'; - } else { - $outfile = abs_path($ARGV[0]) . '.mapping.xls'; - } -} - -Mig::die_if_no_env_migschema(); -Mig::die_if_mig_tracking_table_does_not_exist(); - -my $workbook; -my @worksheets = (); -my $first_sheet; -my $first_table; -my $toc; -my $sheet_row_offset = 0; -my $sheet_row_start = 4; -my $table; -my $file; -my $fdata; -my $has_x_source = 0; -my $bold; -my $left; -my $counter = 0; - -if (!$drop) { - init_workbook(); - foreach my $f (@ARGV) { - $file = abs_path($f); - $counter++; - if ($toc) { - $toc->write($counter,0,$counter); - $toc->write($counter,1,$f); - } - handle_file(); - write_worksheets(); - } - close_workbook(); -} else { - if (Mig::check_db_migschema_for_specific_table($outtable)) { - drop_existing_outtable(); - } -} - -sub handle_file { - my $tracked_file_id = Mig::check_for_tracked_file($file); - if ($tracked_file_id) { - $fdata = Mig::status_this_file($file); - } else { - die "File not currently tracked: $file\n"; - } - $table = $fdata->{staged_table}; - if (!$table) { - die "No staged staged table for file: $file\n"; - } -} - -sub init_workbook { - if ($outfile) { - print "Writing $outfile\n"; - $workbook = Spreadsheet::WriteExcel->new( $outfile ); - $bold = $workbook->add_format(); - $bold->set_bold(); - $bold->set_align('left'); - $left = $workbook->add_format(); - $left->set_align('left'); - if (scalar(@ARGV) > 1) { - $toc = $workbook->add_worksheet('Files'); - } - } - if ($outtable) { - if (Mig::check_db_migschema_for_specific_table($outtable)) { - if ($force) { - drop_existing_outtable(); - } else { - die "$outtable already exists. Use --force to wipe and redo tables.\n"; - } - } - create_new_outtable(); - } -} - -sub drop_existing_outtable { - - # we want a transaction for this one - my $dbh = Mig::db_connect(); - $dbh->{AutoCommit} = 0; - $dbh->{RaiseError} = 1; - - try { - # gather subordinate tables - - my @tables = (); - my $sth = $dbh->prepare(" - SELECT summary_table - FROM $MIGSCHEMA.$outtable - ORDER BY 1;" - ); - my $rv = $sth->execute(); - my $rows = $sth->fetchall_arrayref; - for my $row ( @$rows ) { - push @tables, $row->[0] - } - - # drop them - - foreach my $table (@tables) { - print "Dropping $MIGSCHEMA.$table\n"; - $dbh->do("DROP TABLE $MIGSCHEMA.\"$table\";"); - } - - # drop master table - - print "Dropping $MIGSCHEMA.$outtable\n"; - $dbh->do("DROP TABLE $MIGSCHEMA.$outtable;"); - - $dbh->commit; - } catch { - warn "Transaction aborted because $_\n"; - eval { $dbh->rollback }; - die "Aborting mig-quicksheet\n"; - }; - - Mig::db_disconnect($dbh); -} - -sub create_new_outtable { - my $dbh = Mig::db_connect(); - print "Creating table $MIGSCHEMA.$outtable\n"; - my $rv = $dbh->do(" - CREATE UNLOGGED TABLE $MIGSCHEMA.$outtable ( - file TEXT, - summary_table TEXT UNIQUE - ); - ") || die "Error creating outtable ($MIGSCHEMA.$outtable): $!\n"; - Mig::db_disconnect($dbh); -} - -sub create_new_subtable { - my $subtable = shift; - my $dbh = Mig::db_connect(); - $dbh->{AutoCommit} = 0; - $dbh->{RaiseError} = 1; - - try { - print "Creating table $MIGSCHEMA.\"$subtable\"\n"; - my $rv = $dbh->do(" - CREATE UNLOGGED TABLE $MIGSCHEMA.\"$subtable\" (); - ") || die "Error creating subtable ($MIGSCHEMA.\"$subtable\"): $!\n"; - $rv = $dbh->do(" - INSERT INTO $MIGSCHEMA.$outtable (file,summary_table) VALUES (" . $dbh->quote($file) . ',' . $dbh->quote($subtable) . "); - ") || die "Error inserting into outtable ($MIGSCHEMA.$outtable): $!\n"; - $dbh->commit; - } catch { - warn "Transaction aborted because $_\n"; - eval { $dbh->rollback }; - die "Aborting mig-quicksheet\n"; - }; - - Mig::db_disconnect($dbh); -} - -sub write_worksheets { - print 'File #' . $counter . "\n"; - print "Sheet: Field Summary\n"; - my $tab_name = (scalar(@ARGV) > 1 ? $counter . ') ' : '') . 'Field Summary'; - $tab_name = substr($tab_name,0,31); # truncate for WriteExcel - if ($outfile) { - $first_sheet = $workbook->add_worksheet( $tab_name ); - $first_sheet->set_column(0,6,30); - } - if ($outtable) { - $first_table = "$outtable $tab_name"; - create_new_subtable( $first_table ); - } - - my $dbh = Mig::db_connect(); - my $sth = $dbh->prepare(" - SELECT COUNT(*) - FROM $MIGSCHEMA.$table - LIMIT 1; - "); - my $rv = $sth->execute() - || die "Error retrieving data from information_schema: $!"; - - my @cols = $sth->fetchrow_array; - $sth->finish; - my $count = $cols[0]; - - $sheet_row_start = 0; - - if ($outfile) { - $first_sheet->write($sheet_row_start,0,'Legacy Column',$bold); - $first_sheet->write($sheet_row_start,1,'Non-Empty Rows',$bold); - $first_sheet->write($sheet_row_start,2,'Distinct Non-NULL Values',$bold); - $first_sheet->write($sheet_row_start,3,'Min Value',$bold); - $first_sheet->write($sheet_row_start,4,'Min Length',$bold); - $first_sheet->write($sheet_row_start,5,'Max Value',$bold); - $first_sheet->write($sheet_row_start,6,'Max Length',$bold); - } - if ($outtable) { - try { - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Legacy Column" TEXT;'); - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Non-Empty Rows" TEXT;'); - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Distinct Non-NULL Values" TEXT;'); - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Min Value" TEXT;'); - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Min Length" TEXT;'); - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Max Value" TEXT;'); - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$first_table"^ . ' ADD COLUMN "Max Length" TEXT;'); - } catch { - die "Error modifying subtable ($MIGSCHEMA.$first_table): $_\n"; - }; - } - - handle_list(); - handle_columns(); - - if ($outfile) { - $first_sheet->write($count + 3,0,'Source File:',$bold); - $first_sheet->write($count + 3,1,$file,$left); - $first_sheet->write($count + 4,0,'Number of Rows:',$bold); - $first_sheet->write($count + 4,1,$count,$left); - } - if ($outtable) { - try { - $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ . ' ("Legacy Column") VALUES (NULL);'); - $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ . - ' ("Legacy Column","Non-Empty Rows") ' . "VALUES ('Source File:'," . $dbh->quote($file) . ");"); - $rv = $dbh->do('INSERT INTO ' . qq^$MIGSCHEMA."$first_table"^ . - ' ("Legacy Column","Non-Empty Rows") ' . "VALUES ('Number of Rows:',$count);"); - } catch { - die "Error inserting into subtable ($MIGSCHEMA.$first_table): $_\n"; - }; - } - - Mig::db_disconnect($dbh); -} - -sub close_workbook { - if ($outfile) { - $workbook->close(); - } -} - -exit 0; - -############################################################################### - -sub handle_list { - my $dbh = Mig::db_connect(); - my $sth = $dbh->prepare(" - SELECT * - FROM " . $MIGSCHEMA. "." . $table . " - LIMIT 65530; - "); - my $rv = $sth->execute() - || die "Error retrieving data from staging table: $!"; - my $list_sheet; - - $sheet_row_offset = 0; - $has_x_source = 0; - if ($outfile) { - print "Sheet: $table\n"; - $list_sheet = $workbook->add_worksheet( $table ); - } - - my $handle_headers = 1; - - while (my $data = $sth->fetchrow_hashref) { - if ($handle_headers) { - my $_idx = 0; - foreach my $col (sort keys %{ $data }) { - $list_sheet->write($sheet_row_start + $sheet_row_offset,$_idx++,$col,$bold); - } - $handle_headers = 0; - } - $sheet_row_offset++; - my $idx = 0; - foreach my $col (sort keys %{ $data }) { - my $cdata = $$data{$col}; - if (!defined $cdata) { $cdata = '\N'; } - if ($outfile) { - $list_sheet->write($sheet_row_start + $sheet_row_offset,$idx++,$cdata,$left); - } - } - } -} - -sub handle_columns { - my $dbh = Mig::db_connect(); - my $sth = $dbh->prepare(" - SELECT * - FROM information_schema.columns - WHERE table_schema = " . $dbh->quote($MIGSCHEMA) . " - AND table_name = " . $dbh->quote($table) . " - ORDER BY dtd_identifier::INTEGER ASC; - "); - my $rv = $sth->execute() - || die "Error retrieving data from information_schema: $!"; - - $sheet_row_offset = 0; - $has_x_source = 0; - - while (my $data = $sth->fetchrow_hashref) { - my $column = $data->{column_name}; - if ($column eq 'x_source') { - $has_x_source = 1; - } - if ($column =~ /^l_/ - || ($column =~ /^x_/ - && ( $column ne 'x_migrate' - && $column ne 'x_source' - && $column ne 'x_egid' - && $column ne 'x_hseq' - ) - ) - ) { - $sheet_row_offset++; - my $cdata = column_summary($column); - if ($outfile) { - $first_sheet->write($sheet_row_start + $sheet_row_offset,0,$column,$left); - $first_sheet->write($sheet_row_start + $sheet_row_offset,1,$cdata->{non_empty_count},$left); - $first_sheet->write($sheet_row_start + $sheet_row_offset,2,$cdata->{distinct_value_count},$left); - $first_sheet->write($sheet_row_start + $sheet_row_offset,3,$cdata->{min_value},$left); - $first_sheet->write($sheet_row_start + $sheet_row_offset,4,$cdata->{min_length},$left); - $first_sheet->write($sheet_row_start + $sheet_row_offset,5,$cdata->{max_value},$left); - $first_sheet->write($sheet_row_start + $sheet_row_offset,6,$cdata->{max_length},$left); - } - if ($outtable) { - $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$first_table" VALUES (^ . join(',' - ,$cdata->{non_empty_count} - ,$cdata->{distinct_value_count} - ,$dbh->quote($cdata->{min_value}) - ,$cdata->{min_length} - ,$dbh->quote($cdata->{max_value}) - ,$cdata->{max_length} - ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$first_table\": $!"; - } - if ($cdata->{distinct_value_count} > 1 && $cdata->{distinct_value_count} <= 500) { - group_by($column); - } - } - } - $sth->finish; - Mig::db_disconnect($dbh); -} - -sub column_summary { - - my $column = shift; - - my $dbh = Mig::db_connect(); - - ### non_empty_count - my $sth = $dbh->prepare(" - SELECT COUNT(*) - FROM $MIGSCHEMA.$table - WHERE $column IS NOT NULL AND BTRIM($column) <> ''; - "); - my $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - my @cols = $sth->fetchrow_array; - $sth->finish; - my $non_empty_count = $cols[0]; - - ### distinct_value_count - $sth = $dbh->prepare(" - SELECT COUNT(DISTINCT $column) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - my $distinct_value_count = $cols[0]; - - ### min_value - $sth = $dbh->prepare(" - SELECT MIN($column) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - my $min_value = $cols[0]; - - ### min_length - $sth = $dbh->prepare(" - SELECT MIN(LENGTH($column)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - my $min_length = $cols[0]; - - ### max_value - $sth = $dbh->prepare(" - SELECT MAX($column) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - my $max_value = $cols[0]; - - ### max_length - $sth = $dbh->prepare(" - SELECT MAX(LENGTH($column)) - FROM $MIGSCHEMA.$table; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - @cols = $sth->fetchrow_array; - $sth->finish; - my $max_length = $cols[0]; - - return { - non_empty_count => $non_empty_count - ,distinct_value_count => $distinct_value_count - ,min_value => defined $min_value ? $min_value : '' - ,min_length => defined $min_length ? $min_length : '' - ,max_value => defined $max_value ? $max_value : '' - ,max_length => defined $max_length ? $max_length : '' - }; -} - -sub group_by { - my ($column,$option) = (shift,"GROUP BY 2 ORDER BY 2"); - - my $dbh = Mig::db_connect(); - my $sth; - my $rv; - - my $col_sheet_row_start = 0; - my $col_sheet_row_offset = 0; - my $col_sheet; - my $col_table; - - my $sheet_name = (scalar(@ARGV) > 1 ? $counter . ') ' : '') . $column; - $sheet_name = substr($sheet_name,0,31); - - print "Sheet: $sheet_name\n"; - if ($has_x_source) { - $option = "GROUP BY 2,3 ORDER BY 2,3"; - } - - if ($outfile) { - $col_sheet = $workbook->add_worksheet( $sheet_name ); - push @worksheets, $col_sheet; - $col_sheet->set_column(0,6,30); - $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,'Count',$bold); - if ($has_x_source) { - $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,'Source',$bold); - } - $col_sheet->write( - $col_sheet_row_start + $col_sheet_row_offset - ,$has_x_source ? 2 : 1 - ,"Legacy Value for $column" - ,$bold - ); - } - - if ($outtable) { - $col_table = "$outtable $sheet_name"; - create_new_subtable( $col_table ); - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "Count" TEXT;') - || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^; - if ($has_x_source) { - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "Source" TEXT;') - || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^; - } - $rv = $dbh->do('ALTER TABLE ' . qq^$MIGSCHEMA."$col_table"^ . ' ADD COLUMN "' . $dbh->quote("Legacy value for $column") . '" TEXT;') - || die qq^Error altering subtable $MIGSCHEMA."$col_table": $!\n^; - } - - $sth = $dbh->prepare(" - SELECT COUNT(*), " . ($has_x_source ? 'x_source, ' : '') . "$column - FROM $MIGSCHEMA.$table - $option; - "); - $rv = $sth->execute() - || die "Error retrieving data from $MIGSCHEMA.$table: $!"; - - while (my @cols = $sth->fetchrow_array) { - $col_sheet_row_offset++; - my $count = $cols[0]; - $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,0,$count,$left) if $outfile; - my $value; - my $source; - if ($has_x_source) { - $source = defined $cols[1] ? $cols[1] : ''; - $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$source,$left) if $outfile; - $value = defined $cols[2] ? $cols[2] : ''; - $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,2,$value,$left) if $outfile; - } else { - $value = defined $cols[1] ? $cols[1] : ''; - $col_sheet->write($col_sheet_row_start + $col_sheet_row_offset,1,$value,$left) if $outfile; - } - if ($outtable) { - if ($has_x_source) { - $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$col_table" VALUES (^ . join(',' - ,$count - ,$dbh->quote($source) - ,$dbh->quote($value) - ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$col_table\": $!"; - } else { - $rv = $dbh->do(qq^INSERT INTO $MIGSCHEMA."$col_table" VALUES (^ . join(',' - ,$count - ,$dbh->quote($value) - ) . ');') || die "Error inserting into subtable $MIGSCHEMA.\"$col_table\": $!"; - } - } - } - $sth->finish; -} - diff --git a/mig-bin/mig-remove b/mig-bin/mig-remove deleted file mode 100755 index cf70eda..0000000 --- a/mig-bin/mig-remove +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-remove - This will remove the specified files from the mig tracking table -for the schema pointed to by the MIGSCHEMA environment variable in the -PostgreSQL database specified by various PG environment variables. - -You'll need to invoke B prior to using commands like B - -=head1 SYNOPSIS - -B [file] [...] - -=cut - -############################################################################### - -use strict; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use DBI; -use Cwd 'abs_path'; -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; - -pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; - -Mig::die_if_no_env_migschema(); -Mig::die_if_mig_tracking_table_does_not_exist(); - -foreach my $arg (@ARGV) { - my $file = abs_path($arg); - if ($file =~ /^$MIGBASEWORKDIR/) { - remove_this_file($file); - } else { - print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; - } -} - -exit 0; - -############################################################################### - -sub remove_this_file { - my $file = shift; - my $tracked_file_id = Mig::check_for_tracked_file($file,{'allow_missing'=>1}); - if ($tracked_file_id) { - print "removing tracked file: $file\n"; - my $dbh = Mig::db_connect(); - my $rv = $dbh->do(" - DELETE FROM $MIGSCHEMA.tracked_file WHERE id = $tracked_file_id; - ") || die "Error deleting from table $MIGSCHEMA.tracked_file (id = $tracked_file_id): $!\n"; - Mig::db_disconnect($dbh); - } else { - print "File not currently tracked: $file\n"; - } -} diff --git a/mig-bin/mig-reporter b/mig-bin/mig-reporter deleted file mode 100755 index f87059e..0000000 --- a/mig-bin/mig-reporter +++ /dev/null @@ -1,507 +0,0 @@ -#!/usr/bin/perl -# -*- coding: iso-8859-15 -*- -############################################################################### -=pod - -=item B --title "Report Title" - -Generates an asciidoc file in the git working directory that can be converted to -any appropriate format. The analyst and report parameters are required. - -Optional parameters are : - --- analyst - -Default to "Equinox Open Library Initiative" - ---added_page_title and --added_page_file - -If one is used both must be. The added page file can be plain text or asciidoc. This -adds an extra arbitrary page of notes to the report. Mig assumes the page file is in the mig git directory. - ---tags - -This will define a set of tags to use, if not set it will default to Circs, -Holds, Actors, Bibs, Assets & Money. - ---debug on - -Gives more information about what is happening. Defaults to off. - ---reports_xml - -Allows you to override the default evergreen_staged_report.xml in the mig-xml folder. - ---captions on OR --captions off - -Adds the captions tag to asciidoc header to turn off captions in generated output. -Defaults to off. - -=back - -=cut - -############################################################################### - -use strict; -use warnings; - -use DBI; -use Data::Dumper; -use XML::LibXML; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use Switch; -use Getopt::Long; -use Cwd 'abs_path'; -use Cwd qw(getcwd); -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; -use open ':encoding(utf8)'; - -pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; -pod2usage(-verbose => 1) if ! $ARGV[1]; - -my $analyst = 'Equinox Open Library Initiative';; -my $report_title; -my $reports_xml = 'evergreen_staged_report.xml'; -my $tags; -my $added_page_title; -my $added_page_file; -my $captions = 'off'; -my $i = 0; -my $parser = XML::LibXML->new(); -my $lines_per_page = 42; -my $debug = 'off'; -my $workbook; -my $fh; - -my $ret = GetOptions( - 'analyst:s' => \$analyst, - 'report_title:s' => \$report_title, - 'title:s' => \$report_title, - 'reports_xml:s' => \$reports_xml, - 'tags:s' => \$tags, - 'added_page_title:s' => \$added_page_title, - 'added_page_file:s' => \$added_page_file, - 'captions:s' => \$captions, - 'debug:s' => \$debug -); - -if (!defined $tags) {$tags = 'circs.holds.actors.bibs.assets.money.notices'}; -if (!defined $report_title) { abort('--report_title or --title must be supplied'); } -if (!defined $analyst) { abort('--analyst must be supplied'); } - -my $mig_path = abs_path($0); -$mig_path =~ s|[^/]+$||; -$reports_xml = find_xml($reports_xml,$mig_path); -if (!defined $reports_xml) { abort("Can not find xml reports file."); } -my $dom = $parser->parse_file($reports_xml); - -if (defined $added_page_file or defined $added_page_title) { - abort('must specify --added_page_file and --added_page_title') unless defined $added_page_file and defined $added_page_title; - } -if (defined $added_page_file) { $added_page_file = $MIGGITDIR . $added_page_file; } - -my $dbh = Mig::db_connect(); -my $report_file = create_report_name($report_title); -$report_file = $MIGGITDIR . $report_file; - -open($fh, '>', $report_file) or abort("Could not open output file $report_file!"); -write_title_page($report_title,$fh,$analyst,$captions); -load_javascript($fh); - -if (defined $added_page_file and defined $added_page_title) { - print $fh "<<<\n"; - print $fh "== $added_page_title\n"; - print "$added_page_file\t$added_page_title\n"; - open(my $an,'<:encoding(UTF-8)', $added_page_file) or abort("Could not open $added_page_file!"); - while ( my $line = <$an> ) { - print $fh $line; - } - print $fh "\n"; - close $an; -} - -foreach my $func ($dom->findnodes('//function')) { - my $fdrop = $func->findvalue('./drop'); - my $fcreate = $func->findvalue('./create'); - my $fname = $func->findvalue('./name'); - my $sdrop = $dbh->prepare($fdrop); - my $screate = $dbh->prepare($fcreate); - print "dropping function $fname ... "; - $sdrop->execute(); - print "creating function $fname\n\n"; - $screate->execute(); -} - -foreach my $table ($dom->findnodes('//table')) { - my $tdrop = $table->findvalue('./drop'); - my $tcreate = $table->findvalue('./create'); - my $tname = $table->findvalue('./name'); - my $sdrop = $dbh->prepare($tdrop); - my $screate = $dbh->prepare($tcreate); - print "dropping table $tname ... "; - $sdrop->execute(); - print "creating table $tname\n\n"; - $screate->execute(); -} - -$tags = lc($tags); -my @report_tags = split(/\./,$tags); -foreach my $t (@report_tags) { - print "\n\n=========== Starting to process tag $t\n"; - print "==========================================\n\n"; - - my @asset_files; - foreach my $asset ($dom->findnodes('//asset')) { - if (index($asset->findvalue('./tag'),$t) != -1) { - push @asset_files, $asset->findvalue('./file'); - } - } - - foreach my $fname (@asset_files) { - my $asset_path = $mig_path . '../mig-asc/' . $fname; - open my $a, $asset_path or abort("Could not open $fname."); - while ( my $l = <$a> ) { - print $fh $l; - } - print $fh "<<<\n"; - } - - print_section_header(ucfirst($t),$fh); - my $linecount = $lines_per_page; - my $r; - - undef @asset_files; - foreach my $asset ($dom->findnodes('//asset')) { - if (index($asset->findvalue('./tag'),$t) != -1) { - push @asset_files, $asset->findvalue('./file'); - } - } - - my @report_names; - foreach my $report ($dom->findnodes('//report')) { - if (index($report->findvalue('./tag'),$t) != -1 and $report->findvalue('./iteration') eq '0') { - push @report_names, $report->findvalue('./name'); - } - } - - #only has one level of failover now but could change to array of hashes and loops - #but this keeps it simple and in practice I haven't needed more than two - - - foreach my $rname (@report_names) { - my %report0; - my %report1; - my $check_tables0; - my $check_tables1; - - if ($debug eq 'on') {print "\nchecking for $rname ... ";} - %report0 = find_report($dom,$t,$rname,'0',$debug); - $check_tables0 = check_table($report0{query},$MIGSCHEMA,$debug,$rname); - if ($check_tables0 == 1) { $r = print_query($fh,%report0); } else { - %report1 = find_report($dom,$t,$rname,'1',$debug); - if (defined $report1{query}) { - $check_tables1 = check_table($report1{query},$MIGSCHEMA,$debug,$rname); - if ($check_tables1 == 1) { $r = print_query($fh,%report1); } - } - } - } - -} - -print "\n"; - -foreach my $table ($dom->findnodes('//table')) { - my $tdrop = $table->findvalue('./drop'); - my $tname = $table->findvalue('./name'); - my $sdrop = $dbh->prepare($tdrop); - print "cleaning up table $tname ... \n"; - $sdrop->execute(); -} - -close $fh; - -############ end of main logic - -sub find_xml { - my $reports_xml = shift; - my $mig_path = shift; - - if ($reports_xml =~ m/\//) { return $reports_xml; } - - my $mig_test_file = $mig_path . '/../mig-xml/' . $reports_xml; - my $working_test_dir = getcwd(); - my $working_test_file = $working_test_dir . '/' . $reports_xml; - - if (-e $mig_test_file) { return $mig_test_file; } - if (-e $working_test_file) { return $working_test_file; } - - return undef; -} - -sub find_report { - my $dom = shift; - my $tag = shift; - my $name = shift; - my $iteration = shift; - my $debug = shift; - my %report; - - if ($debug eq 'on') {print "iteration $iteration ";} - foreach my $node ($dom->findnodes('//report')) { - if ($node->findvalue('./tag') =~ $tag and $node->findvalue('./iteration') eq $iteration and $node->findvalue('./name') eq $name) { - if ($debug eq 'on') {print "succeeded ... \n";} - %report = ( - name => $node->findvalue('./name'), - report_title => $node->findvalue('./report_title'), - query => $node->findvalue('./query'), - heading => $node->findvalue('./heading'), - tag => $node->findvalue('./tag'), - iteration => $node->findvalue('./iteration'), - note => $node->findvalue('./note'), - display => $node->findvalue('./display'), - chart_labels => $node->findvalue('./chart_labels'), - divwidth => $node->findvalue('./divwidth'), - divheight => $node->findvalue('./divheight'), - ); - return %report; - } - } - if ($debug eq 'on') {print "failed ... \n";} - return %report = ( - name => "eaten by grue" - ); -} - -sub print_section_header { - my $t = shift; - my $fh = shift; - - $t =~ s/_/ /g; - #$t =~ s/(\w+)/\u$1/g;; - print $fh "<<<\n"; - print $fh "== $t Reports\n"; - return; -} - -sub create_report_name { - my $rt = shift; - - my @abbr = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); - my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); - $year += 1900; - my $date = $year . '_' . $abbr[$mon] . '_' . $mday; - my $report_file; - $report_file = $rt . ' ' . $date . '.asciidoc'; - $report_file =~ s/ /_/g; - return $report_file; -} - -sub write_title_page { - my $rt = shift; - my $fh = shift; - my $a = shift; - my $captions = shift; - - my @abbr = qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec); - my $l = length($report_title); - my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(time); - $year += 1900; - print $fh "= $rt\n"; - print $fh "$mday $abbr[$mon] $year\n"; - print $fh "$a\n"; - #print $fh ":title-logo-image: image::eolilogosmall.png[pdfwidth=3in]\n"; - print $fh ":toc:\n"; - if ($captions eq 'on') { print $fh ":caption:\n"; } - print $fh "\n"; -} - -sub load_javascript { - my $fh = shift; - - print $fh "++++\n"; - print $fh "\n"; - print $fh "++++\n"; -} - -sub check_table { - my $query = shift; - my $MIGSCHEMA = shift; - my $debug = shift; - my $report_name = shift; - - if ($debug eq 'on') {print "$query\n";} - - my $i; - my $return_flag = 1; - my @qe = split(/ /,$query); - $i = @qe; - $i--; - my @tables; - while ($i > -1) { - if ($qe[$i] eq 'FROM' or $qe[$i] eq 'JOIN') { - my $q = $i + 1; - if ($qe[$q] ne '(SELECT') { - push @tables, $qe[$q]; - } - } - $i--; - } - if ($debug eq 'on') {print "checking tables ... ";} - - $i = 0; - foreach my $table (@tables) { - my $sql; - my $schema; - if (index($table,'.') != -1) { - $schema = (split /\./,$table)[0]; - $table = (split /\./,$table)[1]; - } - $table = clean_query_string($table); - if (defined $schema) { - $schema = clean_query_string($schema); - $sql = 'SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = \'' . $schema . '\' AND table_name = \'' . $table . '\');'; - } else { - $sql = 'SELECT EXISTS (SELECT 1 FROM information_schema.tables WHERE table_schema = \'' . $MIGSCHEMA . '\' AND table_name = \'' . $table . '\');'; - } - my $sth = $dbh->prepare($sql); - $sth->execute(); - while (my @row = $sth->fetchrow_array) { - if ($row[0] eq '1') { - next; - } else { - $return_flag = 0; - if ($debug eq 'on') {print "detecting $table failed...\n";} - } - if ($row[0] eq '0') {$return_flag = 0;} - } - } - if ($return_flag == 1 and $debug eq 'on') {print "succeeded ...\n";} - if ($return_flag == 0) {print "! a table failed the find test for report $report_name\n\n";} - return $return_flag; -} - -sub clean_query_string { - my $str = shift; - - $str =~ s/(?!_)[[:punct:]]//g; #remove punct except underscores - $str =~ s/\n//g; - $str =~ s/\r//g; - return $str; -} - -sub print_query { - my $fh = shift; - my %report = @_; - - my $display = $report{display}; - my $height = $report{divheight}; - my $width = $report{divwidth}; - if (!defined $display or length $display == 0) { $display = 'table'; } - my $rname = $report{name}; - my $query = $report{query}; - my $title = $report{report_title}; - my $sth = $dbh->prepare($query); - $sth->execute(); - - if ($height) { $height = $height . 'px'; } - if ($width) { $width = $width . 'px'; } - my $header_flag = 0; - - #print asciidoc - if ($display eq 'table') { - while (my @row = $sth->fetchrow_array) { - if ($header_flag == 0) { - print $fh "\n.*$report{report_title}*\n"; - print $fh "|===\n"; - my @h = split(/\./,$report{heading}); - my $h_length = @h; - my $h_count = 1; - while ($h_count <= $h_length) { - print $fh "|*$h[$h_count-1]* "; - $h_count++; - } - print $fh "\n"; - $header_flag = 1; - } - my $row_length = @row; - my $r = 1; - while ($r <= $row_length) { - if (! defined $row[$r-1] ) { - $row[$r-1] = 'none'; - } - print $fh "|$row[$r-1] "; - $r++; - } - print $fh "\n"; - } - if ($header_flag == 1) { - print $fh "|===\n\n"; - print $fh $report{note}; - print $fh "\n\n"; - } - } - - #print chart - if ($display eq 'pie_chart' or $display eq 'donut_chart') { - my @h = split(/\./,$report{heading}); - my @l = split(/\./,$report{chart_labels}); - - print $fh "++++\n"; - if (defined $height and defined $width) { print $fh "
\n"; } - else { print $fh "
\n"; } - print $fh "\n"; - print $fh "++++\n"; - } - - print "successfully wrote output for $report{name}.\n\n"; -} - -sub give_column { - my $i = shift; - my $col = ""; - - do { - $col .= chr( ( $i % 26 ) + ord('A') ); - $i = int( $i / 26 ) - 1; - } while ( $i >= 0 ); - - return scalar reverse $col; -} - -sub abort { - my $msg = shift; - print STDERR "$0: $msg", "\n"; - exit 1; -} - - diff --git a/mig-bin/mig-skip-clean b/mig-bin/mig-skip-clean deleted file mode 100755 index 5b60cb1..0000000 --- a/mig-bin/mig-skip-clean +++ /dev/null @@ -1,100 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-skip-clean - -Allows you to either use an existing file named .utf8.clean or a -named [cleaned file] as if it were the one created by mig-clean - -Note that the clean file, however specified, should contain headers. The -remaining tools in the chain will expect this. - -=head1 SYNOPSIS - -B [cleaned file] - -=cut - -############################################################################### - -use strict; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use DBI; -use Cwd 'abs_path'; -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; - -pod2usage(-verbose => 2) if ! ($ARGV[0]||$ARGV[1]) || $ARGV[0] eq '--help'; - -Mig::die_if_no_env_migschema(); -Mig::die_if_mig_tracking_table_does_not_exist(); - -my $file = abs_path($ARGV[0]); -my $clean_file; -if ($ARGV[1]) { - $clean_file = abs_path($ARGV[1]); -} else { - $clean_file = $file; -} -if ($clean_file && ! $clean_file =~ /^$MIGBASEWORKDIR/) { - die "File falls outside of MIGWORKDIR ($MIGWORKDIR): $clean_file\n"; -} - -if ($file =~ /^$MIGBASEWORKDIR/) { - skip_clean($file,$clean_file); -} else { - die "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; -} - -exit 0; - -############################################################################### - -sub skip_clean { - my $file = shift; - my $clean_file = shift; - - my $tracked_file_id = Mig::check_for_tracked_file($file); - if ($tracked_file_id) { - my $data = Mig::status_this_file($file); - - if (! $data->{'utf8_filename'}) { - die "mig-iconv or mig-skip-iconv needed for UTF8 version of file: $file\n"; - } - - my $utf8_file = $data->{'utf8_filename'}; - if (! -e $utf8_file) { - die "missing file: $utf8_file\n"; - } - - print "skipping cleaning of tracked file: $file\n"; - - my $dbh = Mig::db_connect(); - if (! $clean_file) { - $clean_file = $utf8_file . '.clean'; - } - if (! -e $clean_file) { - die "clean file does not exist: $clean_file\n"; - } - - my $rv = $dbh->do(" - UPDATE $MIGSCHEMA.tracked_file - SET clean_filename = " . $dbh->quote($clean_file) . " - WHERE base_filename = " . $dbh->quote($file) . " - ; - ") || die "Error inserting into table $MIGSCHEMA.tracked_file: $!\n"; - Mig::db_disconnect($dbh); - } else { - die "File not currently tracked: $file\n"; - } -} diff --git a/mig-bin/mig-skip-iconv b/mig-bin/mig-skip-iconv deleted file mode 100755 index fec558d..0000000 --- a/mig-bin/mig-skip-iconv +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-skip-iconv - -Allows you to either use an existing file named .utf8 or a named -[utf8 file] as if it were the one created by mig-iconv - -=head1 SYNOPSIS - -B [utf8 file] - -=cut - -############################################################################### - -use strict; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use DBI; -use Cwd 'abs_path'; -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; - -pod2usage(-verbose => 2) if ! ($ARGV[0]||$ARGV[1]) || $ARGV[0] eq '--help'; - -Mig::die_if_no_env_migschema(); -Mig::die_if_mig_tracking_table_does_not_exist(); - -my $file = abs_path($ARGV[0]); -my $utf8_file; -if ($ARGV[1]) { - $utf8_file = abs_path($ARGV[1]); -} else { - $utf8_file = $file; -} -if ($utf8_file && ! $utf8_file =~ /^$MIGBASEWORKDIR/) { - die "File falls outside of MIGWORKDIR ($MIGWORKDIR): $utf8_file\n"; -} - -if ($file =~ /^$MIGBASEWORKDIR/) { - skip_iconv($file,$utf8_file); -} else { - print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; -} - -exit 0; - -############################################################################### - -sub skip_iconv { - my $file = shift; - my $utf8_file = shift; - - my $tracked_file_id = Mig::check_for_tracked_file($file); - if ($tracked_file_id) { - my $data = Mig::status_this_file($file); - print "skipping the iconv'ing of tracked file: $file\n"; - - my $dbh = Mig::db_connect(); - if (! $utf8_file) { - $utf8_file = $file . '.utf8'; - } - if (! -e $utf8_file) { - die "utf8 file does not exist: $utf8_file\n"; - } - - my $rv = $dbh->do(" - UPDATE $MIGSCHEMA.tracked_file - SET utf8_filename = " . $dbh->quote($utf8_file) . " - WHERE base_filename = " . $dbh->quote($file) . " - ; - ") || die "Error inserting into table $MIGSCHEMA.tracked_file: $!\n"; - Mig::db_disconnect($dbh); - } else { - print "File not currently tracked: $file\n"; - } -} diff --git a/mig-bin/mig-sql b/mig-bin/mig-sql deleted file mode 100755 index 3909ab3..0000000 --- a/mig-bin/mig-sql +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-sql - -A wrapper around the psql command. At some point the plan is to shove mig-tracked variables into psql sessions. - -=head1 SYNOPSIS - -B [arguments...] - -=cut - -############################################################################### - -use strict; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR - BIBSTART -); -use Pod::Usage; -use DBI; -use Cwd 'abs_path'; -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; - -my @MYARGV = ( - 'psql' - ,'-vmigschema=' . $MIGSCHEMA - ,'-vmigschema_text=\'' . $MIGSCHEMA . '\'' - ,'-F ' . "\t" -); -if (defined $BIBSTART) { - push @MYARGV, '-vbibstart=' . $BIBSTART; -} -# TODO inject more mig-tracked variables here - -system(@MYARGV, @ARGV); - -exit 0; - diff --git a/mig-bin/mig-stage b/mig-bin/mig-stage deleted file mode 100755 index 6e7faf5..0000000 --- a/mig-bin/mig-stage +++ /dev/null @@ -1,128 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-stage - -Load the SQL-converted version of the specified file into the migration schema. - -Extra arguments are passed to the underlying call to psql - -If the tracked file was previously staged with a different table, drop that -table. - - -=head1 SYNOPSIS - -B [other arguments...] - -=cut - -############################################################################### - -use strict; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use DBI; -use Cwd 'abs_path'; -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; - -pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; - -Mig::die_if_no_env_migschema(); -Mig::die_if_mig_tracking_table_does_not_exist(); - -my $file = abs_path($ARGV[0]); -if ($file =~ /^$MIGBASEWORKDIR/) { - stage_csv(@ARGV); -} else { - print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; -} - -exit 0; - -############################################################################### - -sub stage_csv { - my $file = abs_path(shift); - my @args = @_; - - my $tracked_file_id = Mig::check_for_tracked_file($file); - if ($tracked_file_id) { - my $data = Mig::status_this_file($file); - - if (! $data->{'utf8_filename'}) { - die "mig-iconv or mig-skip-iconv needed for UTF8 version of file: $file\n"; - } - - if (! $data->{'clean_filename'}) { - die "mig-clean or mig-skip-clean needed for .clean version of file: $file\n"; - } - - if (! $data->{'stage_sql_filename'}) { - die "mig-convert needed for .stage.sql version of file: $file\n"; - } - - my $stage_sql_filename = $data->{'stage_sql_filename'}; - if (! -e $stage_sql_filename) { - die "missing file: $stage_sql_filename\n"; - } - - my $schema_table = `grep 'CREATE UNLOGGED TABLE' $stage_sql_filename | cut -f4 -d\\ | head -1`; - chomp $schema_table; - my ($schema,$table) = split /\./, $schema_table; - - if (defined $data->{'staged_table'} && $data->{'staged_table'} ne $table) { - my $dbh2 = Mig::db_connect(); - print "dropping previously staged table: $MIGSCHEMA.$data->{staged_table}\n"; - my $rv2 = $dbh2->do(" - DROP TABLE $MIGSCHEMA.$data->{staged_table}; - ") || die "Error dropping table $data->{staged_table}: $!\n"; - print "changing references to old tables\n"; - my $rv3 = $dbh2->do(" - UPDATE $MIGSCHEMA.tracked_column - SET staged_table = " . $dbh2->quote($table) . " - WHERE staged_table = " . $dbh2->quote($data->{staged_table}) . " - ") || die "Error changing references to $data->{staged_table}: $!\n"; - my $rv4 = $dbh2->do(" - UPDATE $MIGSCHEMA.tracked_column - SET target_table = " . $dbh2->quote($table) . " - WHERE target_table = " . $dbh2->quote($data->{staged_table}) . " - ") || die "Error changing references to $data->{staged_table}: $!\n"; - Mig::db_disconnect($dbh2); - } - - print "running staging SQL: $stage_sql_filename\n"; - - system('psql', @args, '-f', $stage_sql_filename); - - if ($schema ne $MIGSCHEMA) { - die "Schema mismatch: env => $MIGSCHEMA sql => $schema\n"; - } - if (! Mig::check_db_migschema_for_specific_table($table)) { - die "Missing staged table: $schema_table\n"; - } else { - print "table staged: $schema_table\n"; - } - - my $dbh = Mig::db_connect(); - my $rv = $dbh->do(" - UPDATE $MIGSCHEMA.tracked_file - SET staged_table = " . $dbh->quote($table) . " - WHERE base_filename = " . $dbh->quote($file) . " - ; - ") || die "Error updating table $MIGSCHEMA.tracked_file: $!\n"; - Mig::db_disconnect($dbh); - } else { - print "File not currently tracked: $file\n"; - } -} diff --git a/mig-bin/mig-stagebibs b/mig-bin/mig-stagebibs deleted file mode 100755 index f045a13..0000000 --- a/mig-bin/mig-stagebibs +++ /dev/null @@ -1,244 +0,0 @@ -#!/usr/bin/perl - -############################################################################### -=pod - -=item B --file foo.mrc.xml - -Takes a load of bibs from a UTF-8 MARC XML file and loads them into mig staging -table of bibio_record_entry_legacy. This is done with no checking of file validity -so records should be checked before hand and cleaned. - -Takes three optional arguments: - - ---source - -Takes a numeric value and set the x_source of the bib record to that. Defaults to -2 which is local system. - ---x_source - -Sets an x_source value on the staging table to the one supplied instead of the -default of none. - ---auth foo.mrc.xml - -This will load bibs into the authority_record_entry_legacy. - ---serial foo.mrc.xml - -This will load bibs into the serial_record_entry_legacy. - -=back - -=cut - -############################################################################### - -use strict; -use warnings; - -use DBI; -#binmode STDIN, ':bytes'; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Data::Dumper; -use Pod::Usage; -use Switch; -use Cwd 'abs_path'; -use FindBin; -use UNIVERSAL; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; -use Getopt::Long; - -pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help'; -pod2usage(-verbose => 1) if ! $ARGV[1]; - -my $append = 0; -my $base_table; -my $stage_table; -my $marc_column = 'marc'; -my $auth = ''; -my $serial = ''; -my $source = 2; -my $x_source = 'default'; -my $no_source_or_last_xact_id; -my $dbh = Mig::db_connect(); -my $infile; -my $i = 0; -my $batch; -binmode STDIN, ':utf8'; - -my $ret = GetOptions( - 'file:s' => \$infile, - 'serial:s' => \$serial, - 'auth:s' => \$auth, - 'x_source:s' => \$x_source, - 'source:i' => \$source, - 'base_table:s' => \$base_table, - 'stage_table:s' => \$stage_table, - 'marc_column:s' => \$marc_column, - 'no_source_or_last_xact_id' => \$no_source_or_last_xact_id -); - -#if in file is empty then fail -#if auth and serial = 1 fail - -if ($serial == 1) { - $base_table = 'm_authority_record_entry'; -} - -if ($auth == 1) { - $base_table = 'm_serial_record_entry'; -} - -if ($auth == 1 and $serial == 1) { abort('are you sure you want to load these as authorities and serials?'); } - -if (!$base_table) { - $base_table = 'm_biblio_record_entry'; -} - -if (!$stage_table) { - $stage_table = $base_table . '_legacy'; -} - -my $bre_test = check_for_table($dbh,$base_table); -my $bre_legacy_test = check_for_table($dbh,$stage_table); -if ($bre_test == 0 and $bre_legacy_test == 0 ) { create_bre($dbh); create_child_bre($dbh); } -if ($bre_test == 1 and $bre_legacy_test == 0 ) { create_child_bre($dbh); } - -my $xmig_test = check_for_column($dbh,$stage_table,'x_migrate'); -if ($xmig_test == 0) { add_column($dbh,$stage_table,'x_migrate','BOOLEAN DEFAULT TRUE'); } - -my $xx_source_test = check_for_column($dbh,$stage_table,'x_source'); -if ($xx_source_test == 0) { add_column($dbh,$stage_table,'x_source','TEXT'); } - -my $xmarc_test = check_for_column($dbh,$stage_table,$marc_column); -if ($xmarc_test == 0) { add_column($dbh,$stage_table,$marc_column,'TEXT'); } - - -#flatten out MARC XML FILE -open my $xml, "<:encoding(utf8)", $infile or abort('could not open MARC XML file'); -$i = 0; -my $record = ''; -while(my $line = <$xml>) { - if ($line =~ /^<\/?collection/) { next; } - chomp $line; - $record = $record . $line; - if ($line =~ /<\/record>$/) { - stage_record($dbh,$record,$x_source,$source); - $record = ''; - $i++; - if (($i % 100) == 0) { report_progress('Records stage', $i); } - } -} -close $xml; - -if ($i == 0) { print "No XML was processed, are you sure this is an XML file?\n"; } -print "Finis.\n"; - -# beyond here be functions - -sub create_bre { - my $dbh = shift; - $dbh->do("DO \$\$ - DECLARE - t BOOLEAN; - BEGIN - SELECT EXISTS(SELECT 1 FROM information_schema.tables WHERE table_schema = '$MIGSCHEMA' AND table_name = '$base_table') INTO t; - IF t = FALSE THEN - PERFORM migration_tools.build_specific_base_staging_table ('$MIGSCHEMA',REGEXP_REPLACE('$base_table','_','.')); - END IF; - END \$\$;"); - - return (); -} - -sub create_child_bre { - my $dbh = shift; - $dbh->do("DO \$\$ - BEGIN - CREATE TABLE $MIGSCHEMA.$stage_table (x_migrate BOOLEAN DEFAULT TRUE, x_source TEXT) INHERITS ($MIGSCHEMA.$base_table); - END \$\$;"); - - return (); -} - -sub abort { - my $msg = shift; - print STDERR "$0: $msg", "\n"; - exit 1; -} - -sub report_progress { - my ($msg, $counter) = @_; - if (defined $counter) { - print STDERR "$msg: $counter\n"; - } else { - print STDERR "$msg\n"; - } -} - -sub stage_record { - my $dbh = shift; - my $record = shift; - my $x_source = shift; - my $source = shift; - my $last_xact = "'$MIGSCHEMA'"; - $record = '$_$' . $record . '$_$'; - my $sql; - if ($no_source_or_last_xact_id) { - $sql = "INSERT INTO $MIGSCHEMA.$stage_table ($marc_column) VALUES ($record);"; - } else { - if ($x_source eq 'default') { - $sql = "INSERT INTO $MIGSCHEMA.$stage_table (last_xact_id,$marc_column,source) VALUES ($last_xact,$record,$source);"; - } else { - $sql = "INSERT INTO $MIGSCHEMA.$stage_table (last_xact_id,$marc_column,x_source,source) VALUES ($last_xact,$record,'$x_source',$source);"; - } - } - my $sth = $dbh->prepare($sql); - $sth->execute(); - return; -} - -sub check_for_table { - my $dbh = shift; - my $table = shift; - my $sql = "SELECT 1 FROM information_schema.tables WHERE table_schema = '$MIGSCHEMA' AND table_name = '$table';"; - my $sth = $dbh->prepare($sql); - $sth->execute(); - my @sqlresult = $sth->fetchrow_array; - my $r = pop @sqlresult; - if ($r) { return $r; } else { return 0; } -} - -sub check_for_column { - my $dbh = shift; - my $table = shift; - my $column = shift; - my $sql = "SELECT 1 FROM information_schema.columns WHERE table_schema = '$MIGSCHEMA' AND table_name = '$table' AND column_name = '$column';"; - my $sth = $dbh->prepare($sql); - $sth->execute(); - my @sqlresult = $sth->fetchrow_array; - my $r = pop @sqlresult; - if ($r) { return $r; } else { return 0; } -} - -sub add_column { - my $dbh = shift; - my $table = shift; - my $column = shift; - my $column_type = shift; - my $sql = "ALTER TABLE $MIGSCHEMA.$table ADD COLUMN $column $column_type;"; - my $sth = $dbh->prepare($sql); - $sth->execute(); - my @sqlresult = $sth->fetchrow_array; - my $r = check_for_column($dbh,$table,$column); - if ($r == 0) { abort('failed to create column'); } else { return $r; } -} - diff --git a/mig-bin/mig-status b/mig-bin/mig-status deleted file mode 100755 index 0d78b18..0000000 --- a/mig-bin/mig-status +++ /dev/null @@ -1,87 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-status - This will show tracking information for either the specified files -or all tracked files if no argument is given. - -You'll need to invoke B prior to using commands like B - -=head1 SYNOPSIS - -B [file] [...] - -=cut - -############################################################################### - -use strict; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use DBI; -use Cwd 'abs_path'; -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; - -pod2usage(-verbose => 2) if scalar(@ARGV) > 0 && $ARGV[0] eq '--help'; - -Mig::die_if_no_env_migschema(); -Mig::die_if_mig_tracking_table_does_not_exist(); - -my @files = @ARGV; -if (scalar(@files) == 0) { - @files = (); - my $dbh = Mig::db_connect(); - my $sth = $dbh->prepare(" - SELECT base_filename - FROM $MIGSCHEMA.tracked_file - ORDER BY 1;" - ); - my $rv = $sth->execute() - || die "Error retrieving data from table (tracked_file): $!"; - my $rows = $sth->fetchall_arrayref; - for my $row ( @$rows ) { - push @files, $row->[0] - } - $sth->finish; - Mig::db_disconnect($dbh); -} - -foreach my $arg (sort @files) { - my $file = abs_path($arg); - my $data = Mig::status_this_file($file); - print "=-=-=\n"; - foreach my $key ( - 'base_filename' - ,'has_headers' - ,'headers_file' - ,'utf8_filename' - ,'clean_filename' - ,'parent_table' - ,'stage_sql_filename' - ,'staged_table' - ,'map_sql_filename' - ,'prod_sql_filename' - ) { - printf "%-20s:\t", $key; - print $data->{$key} ? $data->{$key} : ""; - if ($key =~ /filename$/ && $data->{$key} && ! -e $data->{$key}) { - print " (FILE MISSING)"; - } - print "\n"; - } -} - -exit 0; - -############################################################################### - - diff --git a/mig-bin/mig-unlink b/mig-bin/mig-unlink deleted file mode 100755 index 5bf34e4..0000000 --- a/mig-bin/mig-unlink +++ /dev/null @@ -1,71 +0,0 @@ -#!/usr/bin/perl -w -############################################################################### -=pod - -=head1 NAME - -mig-unlink - -Clear any association between the specified file and a parent table within the -migration schema. - -=head1 SYNOPSIS - -B - -=cut - -############################################################################### - -use strict; -use Switch; -use Env qw( - HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA - MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR -); -use Pod::Usage; -use DBI; -use Cwd 'abs_path'; -use FindBin; -my $mig_bin = "$FindBin::Bin/"; -use lib "$FindBin::Bin/"; -use Mig; - -pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; - -Mig::die_if_no_env_migschema(); -Mig::die_if_mig_tracking_table_does_not_exist(); - -my $file = abs_path($ARGV[0]); -if ($file =~ /^$MIGBASEWORKDIR/) { - unlink_table(@ARGV); -} else { - print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; -} - -exit 0; - -############################################################################### - -sub unlink_table { - my $file = abs_path(shift); - - my $tracked_file_id = Mig::check_for_tracked_file($file); - if ($tracked_file_id) { - my $data = Mig::status_this_file($file); - my $table = $data->{'parent_table'} || ''; - - print "unlinking table ($table) from file: $file\n"; - - my $dbh = Mig::db_connect(); - my $rv = $dbh->do(" - UPDATE $MIGSCHEMA.tracked_file - SET parent_table = '' - WHERE base_filename = " . $dbh->quote($file) . " - ; - ") || die "Error updating table $MIGSCHEMA.tracked_file: $!\n"; - Mig::db_disconnect($dbh); - } else { - print "File not currently tracked: $file\n"; - } -} diff --git a/mig-sql/init/000-tracked_column.sql b/mig-sql/init/000-tracked_column.sql deleted file mode 100644 index 3f73454..0000000 --- a/mig-sql/init/000-tracked_column.sql +++ /dev/null @@ -1,15 +0,0 @@ -DROP TABLE IF EXISTS tracked_column; -CREATE TABLE tracked_column ( - id serial - ,base_filename TEXT - ,parent_table TEXT - ,staged_table TEXT - ,staged_column TEXT - ,comment TEXT - ,target_table TEXT - ,target_column TEXT - ,transform TEXT - ,summarize BOOLEAN -); -CREATE INDEX ON tracked_column(target_table,target_column); -CREATE INDEX ON tracked_column(base_filename); diff --git a/mig-sql/init/000-tracked_file.sql b/mig-sql/init/000-tracked_file.sql deleted file mode 100644 index d80b12d..0000000 --- a/mig-sql/init/000-tracked_file.sql +++ /dev/null @@ -1,14 +0,0 @@ -DROP TABLE IF EXISTS tracked_file; -CREATE TABLE tracked_file ( - id serial - ,base_filename TEXT UNIQUE - ,has_headers BOOLEAN - ,headers_file TEXT - ,utf8_filename TEXT - ,clean_filename TEXT - ,stage_sql_filename TEXT - ,map_sql_filename TEXT - ,prod_sql_filename TEXT - ,parent_table TEXT - ,staged_table TEXT -); diff --git a/mig-sql/init/010_gsheet_tracking.sql b/mig-sql/init/010_gsheet_tracking.sql deleted file mode 100644 index ff2e4dc..0000000 --- a/mig-sql/init/010_gsheet_tracking.sql +++ /dev/null @@ -1,15 +0,0 @@ -CREATE TABLE gsheet_tracked_table ( - id SERIAL PRIMARY KEY - ,sheet_name TEXT NOT NULL - ,table_name TEXT NOT NULL - ,tab_name TEXT - ,created TIMESTAMP - ,last_pulled TIMESTAMP - ,last_pushed TIMESTAMP -); - -CREATE TABLE gsheet_tracked_column ( - id SERIAL - ,table_id INTEGER REFERENCES gsheet_tracked_table (id) - ,column_name TEXT NOT NULL -); diff --git a/mig-sql/init/011_reporter_tables.sql b/mig-sql/init/011_reporter_tables.sql deleted file mode 100644 index 5a0bc77..0000000 --- a/mig-sql/init/011_reporter_tables.sql +++ /dev/null @@ -1,5 +0,0 @@ -CREATE TABLE report (id SERIAL, create_date TIMESTAMPTZ, name TEXT); -CREATE TABLE reporter_columns (id SERIAL, report INTEGER, header TEXT, ordinal_position INTEGER); -CREATE TABLE reporter_rows (id SERIAL, report INTEGER, row INTEGER, ordinal_position INTEGER); - - diff --git a/mig-sql/init/020_common_tables.sql b/mig-sql/init/020_common_tables.sql deleted file mode 100644 index 99b3ce4..0000000 --- a/mig-sql/init/020_common_tables.sql +++ /dev/null @@ -1,354 +0,0 @@ -CREATE TABLE map_hold_policies ( - l_user_home_ou TEXT - ,l_request_ou TEXT - ,l_item_owning_ou TEXT - ,l_item_circ_ou TEXT - ,l_requestor_grp TEXT - ,l_circ_modifier TEXT - ,l_active TEXT - ,l_holdable TEXT - ,l_max_holds TEXT - ,l_includes_frozen_holds TEXT - ,l_distance_is_from_owner TEXT - ,l_transit_range TEXT - ,l_usr_grp TEXT - ,x_user_home_ou INTEGER - ,x_request_ou INTEGER - ,x_item_owning_ou INTEGER - ,x_item_circ_ou INTEGER - ,x_requestor_grp INTEGER - ,x_circ_modifier TEXT - ,x_active BOOLEAN - ,x_holdable BOOLEAN - ,x_max_holds INTEGER - ,x_includes_frozen_holds BOOLEAN - ,x_distance_is_from_owner BOOLEAN - ,x_transit_range INTEGER - ,x_usr_grp INTEGER - ,x_migrate BOOLEAN DEFAULT TRUE -); - -INSERT INTO gsheet_tracked_table - (sheet_name,table_name,tab_name,created) -VALUES - ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_hold_policies','Hold Policies',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_user_home_ou') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_request_ou') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_item_owning_ou') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_item_circ_ou') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_requestor_grp') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_circ_modifier') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_active') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_holdable') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_max_holds') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_includes_frozen_holds') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_distance_is_from_owner') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_transit_range') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Hold Policies'),'l_usr_grp') -; - - -CREATE TABLE map_circ_policies ( - l_org_unit TEXT - ,l_user_group TEXT - ,l_copy_owning_lib TEXT - ,l_user_home_lib TEXT - ,l_circ_mod TEXT - ,l_copy_location TEXT - ,l_circulate TEXT - ,l_circ_limit_set TEXT - ,l_duration_rule TEXT - ,l_fine_rule TEXT - ,l_grace_override TEXT - ,l_max_fine TEXT - ,l_notes TEXT - ,x_org_unit INTEGER - ,x_user_group INTEGER - ,x_copy_owning_lib INTEGER - ,x_user_home_lib INTEGER - ,x_circ_mod TEXT - ,x_copy_location INTEGER - ,x_circulate BOOLEAN - ,x_circ_limit_set INTEGER - ,x_duration_rule INTEGER - ,x_fine_rule INTEGER - ,x_grace_override INTERVAL - ,x_max_fine INTEGER - ,x_circ_limit_quantity INTEGER - ,x_circ_limit_parts INTEGER - ,x_circ_limit_ou_name TEXT - ,x_circ_limit_ou_id INTEGER - ,x_circ_limit_id INTEGER - ,x_migrate BOOLEAN DEFAULT FALSE -); - -INSERT INTO gsheet_tracked_table - (sheet_name,table_name,tab_name,created) -VALUES - ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_circ_policies','Circ Policies',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_org_unit') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_user_group') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_copy_owning') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_user_home_lib') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_circ_mod') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_copy_location') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_circulate') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_circ_limit_set') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_duration_rule') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_fine_rule') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_grace_override') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_max_fine') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Policies'),'l_notes') -; - -CREATE TABLE map_circ_limit_sets ( - l_owning_lib TEXT - ,l_name TEXT - ,l_items_out TEXT - ,l_depth TEXT - ,l_global TEXT - ,l_description TEXT - ,l_circ_mod TEXT - ,l_copy_loc TEXT - ,x_owning_lib INTEGER - ,x_name TEXT - ,x_items_out INTEGER - ,x_global BOOLEAN - ,x_depth INTEGER - ,x_description TEXT -); - -INSERT INTO gsheet_tracked_table - (sheet_name,table_name,tab_name,created) -VALUES - ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_circ_limit_sets','Circ Limit Sets',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Limit Sets'),'l_owning_lib') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Limit Sets'),'l_name') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Limit Sets'),'l_items_out') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Limit Sets'),'l_depth') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Limit Sets'),'l_global') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Limit Sets'),'l_description') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Limit Sets'),'l_circ_mod') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Circ Limit Sets'),'l_copy_loc') -; - -CREATE TABLE map_create_shelving_location ( - l_id SERIAL - ,l_owning_lib TEXT - ,l_copy_location TEXT - ,l_opac_visible TEXT - ,l_checkin_alert TEXT - ,l_holdable TEXT - ,l_circulate TEXT - ,l_note TEXT - ,x_migrate BOOLEAN NOT NULL DEFAULT TRUE - ,x_shelf INTEGER -) INHERITS (m_asset_copy_location); - -INSERT INTO gsheet_tracked_table - (sheet_name,table_name,tab_name,created) -VALUES - ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_create_shelving_location','New Copy Locations',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Copy Locations'),'l_owning_lib') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Copy Locations'),'l_copy_location') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Copy Locations'),'l_opac_visible') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Copy Locations'),'l_checkin_alert') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Copy Locations'),'l_holdable') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Copy Locations'),'l_circulate') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Copy Locations'),'l_note') -; - -CREATE TABLE map_create_account ( - l_id SERIAL - ,l_barcode TEXT - ,l_usrname TEXT - ,l_first_name TEXT - ,l_family_name TEXT - ,l_email TEXT - ,l_password TEXT - ,l_home_library TEXT - ,l_profile1 TEXT - ,l_profile2 TEXT - ,l_profile3 TEXT - ,l_work_ou1 TEXT - ,l_work_ou2 TEXT - ,l_work_ou3 TEXT - ,l_work_ou4 TEXT - ,l_work_ou5 TEXT - ,l_work_ou6 TEXT - ,l_work_ou7 TEXT - ,l_work_ou8 TEXT - ,l_work_ou9 TEXT - ,l_work_ou10 TEXT - ,l_work_ou11 TEXT - ,l_work_ou12 TEXT - ,l_work_ou13 TEXT - ,l_note TEXT - ,x_migrate BOOLEAN NOT NULL DEFAULT TRUE -); - -INSERT INTO gsheet_tracked_table - (sheet_name,table_name,tab_name,created) -VALUES - ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_create_account','New Accounts',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_usrname') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_barcode') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_first_name') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_family_name') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_email') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_password') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_home_library') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_profile1') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_profile2') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_profile3') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou1') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou2') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou3') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou4') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou5') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou6') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou7') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou8') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou9') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou10') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou11') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou12') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_work_ou13') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'New Accounts'),'l_note') -; - -CREATE TABLE map_threshold ( - id SERIAL - ,library TEXT - ,profile TEXT - ,checkout_threshold TEXT - ,fine_threshold TEXT - ,overdue_threshold TEXT - ,note TEXT -); - -INSERT INTO gsheet_tracked_table - (sheet_name,table_name,tab_name,created) -VALUES - ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_threshold','Patron Thresholds',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Thresholds'),'profile') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Thresholds'),'library') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Thresholds'),'checkout_threshold') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Thresholds'),'fine_threshold') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Thresholds'),'overdue_threshold') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Thresholds'),'note') -; - - -CREATE TABLE map_misc ( - id SERIAL - ,count TEXT - ,option TEXT - ,choice TEXT - ,value TEXT - ,note TEXT -); - -INSERT INTO gsheet_tracked_table - (sheet_name,table_name,tab_name,created) -VALUES - ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_misc','Miscellaneous Options',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Miscellaneous Options'),'count') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Miscellaneous Options'),'option') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Miscellaneous Options'),'Choice') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Miscellaneous Options'),'value') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Miscellaneous Options'),'note') -; - -CREATE TABLE map_org_setting ( - l_id SERIAL - ,l_name TEXT - ,l_label TEXT - ,l_entry_type TEXT - ,l_org_unit TEXT - ,l_value TEXT - ,l_note TEXT -); - -INSERT INTO gsheet_tracked_table - (sheet_name,table_name,tab_name,created) -VALUES - ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_org_setting','Org Settings',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Org Settings'),'l_name') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Org Settings'),'l_label') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Org Settings'),'l_entry_type') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Org Settings'),'l_org_unit') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Org Settings'),'l_value') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Org Settings'),'l_note') -; - - -CREATE TABLE map_bib_manipulations ( - id SERIAL - ,action TEXT - ,field TEXT - ,subfield TEXT - ,matching_value TEXT - ,target_value TEXT - ,note TEXT -); - -INSERT INTO gsheet_tracked_table - (sheet_name,table_name,tab_name,created) -VALUES - ((SELECT SPLIT_PART(reset_val,',',1) FROM pg_settings WHERE name = 'search_path'),'map_bib_manipulations','Bib Records',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Bib Records'),'name') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Bib Records'),'action') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Bib Records'),'field') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Bib Records'),'subfield') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Bib Records'),'matching_value') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Bib Records'),'target_value') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Bib Records'),'note') -; - - diff --git a/mig-sql/system/tlc/030_tlc_mapping_tables.sql b/mig-sql/system/tlc/030_tlc_mapping_tables.sql deleted file mode 100644 index 210cc73..0000000 --- a/mig-sql/system/tlc/030_tlc_mapping_tables.sql +++ /dev/null @@ -1,375 +0,0 @@ -CREATE TABLE map_tlc_branches ( - id SERIAL - ,tlc_branch_id TEXT - ,tlc_name TEXT - ,org_unit TEXT - ,mig_patrons TEXT - ,mig_items TEXT - ,note TEXT - ,x_org_id INTEGER - -); - -INSERT INTO gsheet_tracked_table - (table_name,tab_name,created) -VALUES - ('map_tlc_branches','Branches Present in Extract',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Branches Present in Extract'),'x_count') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Branches Present in Extract'),'tlc_branch_id') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Branches Present in Extract'),'tlc_name') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Branches Present in Extract'),'org_unit') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Branches Present in Extract'),'note') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Branches Present in Extract'),'mig_patrons') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Branches Present in Extract'),'mig_items') -; - --- ############################################ - -CREATE TABLE map_tlc_perm_group ( - id SERIAL - ,x_count TEXT - ,legacy_group TEXT - ,target_group TEXT - ,stat_cat_name TEXT - ,stat_cat_entry TEXT - ,dnm TEXT - ,note TEXT -); - -INSERT INTO gsheet_tracked_table - (table_name,tab_name,created) -VALUES - ('map_tlc_perm_group','Patron Type',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Type'),'x_count') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Type'),'legacy_group') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Type'),'target_group') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Type'),'stat_cat_name') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Type'),'stat_cat_entry') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Type'),'dmn') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Type'),'note') -; - --- ############################################ - -CREATE TABLE map_tlc_patron_expire ( - id SERIAL - ,x_count TEXT - ,expire_year TEXT - ,set_to_date TEXT - ,dnm TEXT - ,note TEXT -); - -INSERT INTO gsheet_tracked_table - (table_name,tab_name,created) -VALUES - ('map_tlc_patron_expire','Patrons by Expiration Date',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Expiration Date'),'x_count') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Expiration Date'),'expire_year') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Expiration Date'),'set_to_date') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Expiration Date'),'dnm') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Expiration Date'),'note') -; - --- ############################################ - -CREATE TABLE map_tlc_patron_last_active ( - id SERIAL - ,x_count TEXT - ,last_active TEXT - ,inactive TEXT - ,dnm TEXT - ,note TEXT -); - -INSERT INTO gsheet_tracked_table - (table_name,tab_name,created) -VALUES - ('map_tlc_patron_last_active','Patrons by Last Active Date',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Last Active Date'),'x_count') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Last Active Date'),'last_active') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Last Active Date'),'inactive') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Last Active Date'),'dnm') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Last Active Date'),'note') -; - --- ############################################ - -CREATE TABLE map_tlc_billing_type ( - id SERIAL - ,x_count TEXT - ,tlc_code TEXT - ,billing_type TEXT - ,dnm TEXT - ,note TEXT -); - -INSERT INTO gsheet_tracked_table - (table_name,tab_name,created) -VALUES - ('map_tlc_billing_type','Migrating Bills by Bill Type',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Migrating Bills by Bill Type'),'x_count') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Migrating Bills by Bill Type'),'tlc_code') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Migrating Bills by Bill Type'),'billing_type') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Migrating Bills by Bill Type'),'dnm') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Migrating Bills by Bill Type'),'note') -; - --- ############################################ - -CREATE TABLE map_tlc_password ( - id SERIAL - ,x_count TEXT - ,note TEXT - ,migrate_available TEXT - ,fill_in_method TEXT - ,static_value TEXT -); - -INSERT INTO gsheet_tracked_table - (table_name,tab_name,created) -VALUES - ('map_tlc_password','Patrons w NULL Passwords',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons w NULL Passwords'),'x_count') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons w NULL Passwords'),'note') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons w NULL Passwords'),'migrate_available') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons w NULL Passwords'),'fill_in_method') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons w NULL Passwords'),'static_value') -; - --- ############################################ - -CREATE TABLE map_tlc_block_status ( - id SERIAL - ,x_count TEXT - ,tlc_block_status TEXT - ,block TEXT - ,bar TEXT - ,dnm TEXT - ,note TEXT -); - -INSERT INTO gsheet_tracked_table - (table_name,tab_name,created) -VALUES - ('map_tlc_block_status','Patrons by Block Status',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Block Status'),'x_count') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Block Status'),'tlc_block_status') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Block Status'),'block') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Block Status'),'bar') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Block Status'),'dnm') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Block Status'),'note') -; - - --- ############################################ - -CREATE TABLE map_tlc_patron_gender ( - id SERIAL - ,x_count TEXT - ,gender TEXT - ,stat_cat TEXT - ,stat_cat_entry TEXT - ,show TEXT - ,required TEXT - ,dnm TEXT - ,note TEXT -); - -INSERT INTO gsheet_tracked_table - (table_name,tab_name,created) -VALUES - ('map_tlc_patron_gender','Patrons by Gender',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Gender'),'x_count') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Gender'),'gender') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Gender'),'stat_cat') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Gender'),'stat_cat_entry') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Gender'),'show') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Gender'),'required') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Gender'),'dnm') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patrons by Gender'),'note') -; - - --- ############################################ - - -CREATE TABLE map_tlc_holding_code ( - id SERIAL - ,x_count TEXT - ,holding_code TEXT - ,shelving_location TEXT - ,org_unit TEXT - ,circ_mod TEXT - ,alert TEXT - ,alert_message TEXT - ,dnm TEXT - ,note TEXT - ,reference TEXT - ,item_status TEXT - ,stat_cat_title TEXT - ,stat_cat_entry TEXT - ,x_migrate TEXT -); - -INSERT INTO gsheet_tracked_table - (table_name,tab_name,created) -VALUES - ('map_tlc_holding_code','Holdings Code',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'x_count') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'holding_code') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'shelving_location') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'org_unit') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'circ_mod') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'alert') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'alert_message') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'dnm') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Holdings Code'),'note') -; - - - - --- ############################################ - - -CREATE TABLE map_tlc_stat_cat ( - id SERIAL - ,x_count TEXT - ,tlc_stat_cat TEXT - ,tlc_stat_cat_value TEXT - ,stat_cat TEXT - ,stat_cat_entry TEXT - ,show TEXT - ,required TEXT - ,dnm TEXT - ,note TEXT - ,note2 TEXT -); - -INSERT INTO gsheet_tracked_table - (table_name,tab_name,created) -VALUES - ('map_tlc_stat_cat','Patron Stat Cats',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'x_count') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'tlc_stat_cat') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'tlc_stat_cat_value') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'stat_cat') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'stat_cat_entry') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'show') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'required') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'dnm') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'note') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Stat Cats'),'note2') -; - - --- ############################################ - -CREATE TABLE map_tlc_patron_note ( - id SERIAL - ,x_count TEXT - ,note_type TEXT - ,subset_values TEXT - ,matching_text TEXT - ,action TEXT - ,note TEXT -); - -INSERT INTO gsheet_tracked_table - (table_name,tab_name,created) -VALUES - ('map_tlc_patron_note','Patron Notes',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Notes'),'x_count') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Notes'),'note_type') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Notes'),'subset_values') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Notes'),'matching_text') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Notes'),'action') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Patron Notes'),'note') -; - --- ############################################ - -CREATE TABLE map_tlc_item_note ( - id SERIAL - ,x_count TEXT - ,note_type TEXT - ,subset_values TEXT - ,matching_text TEXT - ,action TEXT - ,note TEXT -); - -INSERT INTO gsheet_tracked_table - (table_name,tab_name,created) -VALUES - ('map_tlc_item_note','Item Notes',NOW()) -; - -INSERT INTO gsheet_tracked_column - (table_id,column_name) -VALUES - ((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Item Notes'),'x_count') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Item Notes'),'note_type') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Item Notes'),'subset_values') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Item Notes'),'matching_text') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Item Notes'),'action') - ,((SELECT id FROM gsheet_tracked_table WHERE tab_name = 'Item Notes'),'note') -; - - diff --git a/mig-xml/evergreen_full_system.xml b/mig-xml/evergreen_full_system.xml deleted file mode 100644 index e972f6a..0000000 --- a/mig-xml/evergreen_full_system.xml +++ /dev/null @@ -1,519 +0,0 @@ - - - - - circ_count - circs - 0 - Open Circulations - Circulation Status.Count of Circs - SELECT 'Closed Circulations', COUNT(id) FROM action.circulation WHERE xact_finish IS NOT NULL UNION ALL SELECT 'Open Circulations', COUNT(id) FROM action.circulation WHERE xact_finish IS NULL - - - - circ_by_orgunit - circs - 0 - Circulations by Org Unit - Circulations Count.Org Unit - SELECT COUNT(acirc.id), aou.name FROM action.circulation acirc JOIN actor.org_unit aou ON aou.id = acirc.circ_lib WHERE acirc.xact_finish IS NULL GROUP BY 2 - - - - circs_by_duration - circs - 0 - Circulations by Duration, Fine and Max Fine Rules - Count of Circs.Duration.Fine.Max Fine - SELECT COUNT(id), duration_rule, recurring_fine_rule, max_fine_rule FROM action.circulation GROUP BY 2, 3, 4 ORDER BY 2, 3, 4 - - - - circs_by_usrgroup - circs - 0 - Circulations by Rules and Patron Group - Count of Circs.Duration.Fine.Max Fine.User Group - SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, pgt.name FROM action.circulation acirc JOIN actor.usr au ON au.id = acirc.usr JOIN permission.grp_tree pgt ON pgt.id = au.profile - GROUP BY 2, 3, 4, 5 ORDER BY 2, 3, 4, 5 - - - - circs_by_circmod - circs - 0 - Circulations by Rules and Circulation Modifier - Count of Circs.Duration.Fine.Max Fine.Circulation Modifier - SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, ac.circ_modifier FROM action.circulation acirc JOIN asset.copy ac ON ac.id = acirc.target_copy - GROUP BY 2, 3, 4, 5 ORDER BY 2, 3, 4, 5 - - - - circs_by_orgunit - circs - 0 - Circulations by Rules and Org Unit - Count of Circs.Duration.Fine.Max Fine.Library Branch - SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, aou.name FROM action.circulation acirc JOIN actor.org_unit aou ON aou.id = acirc.circ_lib GROUP BY 2, 3, 4, 5 ORDER BY 2, 3, 4, 5 - - - - non_cat_circs - circs - 0 - Non-Cataloged Circulation - Circulations Count - SELECT COUNT(id) FROM action.non_cataloged_circulation - - - - in_house - circs - 0 - In House Use - In House Use Records - SELECT COUNT(id) FROM action.in_house_use - - - - circs_missing_rules - circs - 0 - Circs Missing Rules - Count.Field Missing - SELECT COUNT(id), 'Duration Rule Value' FROM action.circulation WHERE duration IS NULL - UNION ALL SELECT COUNT(id), 'Recurring Fine Rule Value' FROM action.circulation WHERE recurring_fine IS NULL - UNION ALL SELECT COUNT(id), 'Max Fine Rule Value' FROM action.circulation WHERE max_fine IS NULL - UNION ALL SELECT COUNT(id), 'Duration Rule' FROM action.circulation WHERE duration_rule IS NULL - UNION ALL SELECT COUNT(id), 'Recurring Fine Rule' FROM action.circulation WHERE recurring_fine_rule IS NULL - UNION ALL SELECT COUNT(id), 'Max Fine Rule' FROM action.circulation WHERE max_fine_rule IS NULL - - - - - - - holds - holds - 0 - Holds - Hold Type.Hold Count - SELECT 'Closed Holds', COUNT(id) FROM action.hold_request WHERE (expire_time::TIMESTAMP < now()) OR cancel_time IS NOT NULL OR fulfillment_time IS NOT NULL UNION ALL SELECT 'Open Holds', COUNT(id) FROM action.hold_request WHERE (expire_time IS NULL OR expire_time::TIMESTAMP > now()) AND cancel_time IS NULL AND fulfillment_time IS NULL - - - - holds_bytype - holds - 0 - Holds By Type - Hold Type.Hold Count - SELECT hold_type as "Hold Type", COUNT(id) FROM action.hold_request GROUP BY 1 - - - - transit_open_by_item_status - holds - 0 - Transit Copy Records and Status of Linked Items - Count.Status - SELECT COUNT(atc.id), ccs.name FROM action.transit_copy atc JOIN asset.copy ac ON ac.id = atc.target_copy JOIN config.copy_status ccs ON ccs.id = ac.status WHERE atc.id IN (SELECT id FROM action.transit_copy) AND atc.dest_recv_time IS NULL GROUP BY 2 ORDER BY 2 - - - - transit_copies_by_status - holds - 0 - Status of Items with Count of Open In Transits - Count.Status.Count of Open Transits - SELECT COUNT(ac.id), ccs.name, SUM(CASE WHEN atc.id IS NULL THEN 0 ELSE 1 END) FROM asset.copy ac JOIN config.copy_status ccs ON ccs.id = ac.status LEFT JOIN (SELECT * FROM action.transit_copy WHERE id IN (SELECT id FROM action.transit_copy) AND dest_recv_time IS NULL) atc ON atc.target_copy = ac.id WHERE ac.id IN (SELECT id from asset.copy) GROUP BY 2 ORDER BY 2 - - - - hold_copies_by_status - holds - 0 - Captured Holds with Status of Items - Count of Captured Hold.Status of Item - SELECT COUNT(ahr.id), ccs.name FROM action.hold_request ahr JOIN asset.copy ac ON ac.id = ahr.current_copy JOIN config.copy_status ccs ON ccs.id = ac.status WHERE ahr.capture_time IS NOT NULL AND ahr.fulfillment_time IS NULL and ahr.cancel_time IS NULL AND ahr.id IN (SELECT id FROM action.hold_request) GROUP BY 2 ORDER By 2 - - - - - - asset.copy_count - Count of Copies by Library - assets - 0 - Copy Count.Library - SELECT COUNT(ac.id), aou.name FROM asset.copy ac JOIN actor.org_unit aou ON aou.id = ac.circ_lib GROUP BY 2 ORDER BY 2 - - - - asset.deleted_copies - Deleted Copies - assets - 0 - Copy Count.Deleted - SELECT COUNT(ac.id), ac.deleted::TEXT FROM asset.copy ac GROUP BY 2 - - - - asset.copies_by_status - Copies by Status - assets - 0 - Copy Count.Status - SELECT COUNT(ac.id), cs.name FROM asset.copy ac JOIN config.copy_status cs ON cs.id = ac.status GROUP BY 2 ORDER BY 2 - - - - asset.circ_mod_copies_count - Copies by Circulation Modifier - assets - 0 - Copy Count.Circulation Modifier - SELECT COUNT(ac.id), ac.circ_modifier FROM asset.copy ac GROUP BY 2 ORDER BY 2 - - - - asset.copy_notes - Copy Notes - assets - 0 - Note Count.Public - SELECT COUNT(acnote.id), acnote.pub::TEXT FROM asset.copy_note acnote GROUP BY 2 ORDER BY 2 - - - - asset.copy_notes - Copy Notes - assets - 0 - Note Count.Public - SELECT COUNT(acnote.id), acnote.pub::TEXT FROM asset.copy_note acnote GROUP BY 2 ORDER BY 2 - - - - asset.vols_by_lib - Volumes by Library - assets - 0 - Volume Count.Library - SELECT COUNT(acn.id), aou.name FROM asset.call_number acn JOIN actor.org_unit aou ON aou.id = acn.owning_lib GROUP BY 2 ORDER BY 2 - - - - asset.vols_by_lib - Volumes by Library - assets - 0 - Volume Count.Library - SELECT COUNT(acn.id), aou.name FROM asset.call_number acn JOIN actor.org_unit aou ON aou.id = acn.owning_lib GROUP BY 2 ORDER BY 2 - - - - asset.cops_by_loc_and_org - Copies by Location - assets - 0 - Copy Count.Library.Circ Library - SELECT COUNT(ac.id), acl.name, aou.name FROM asset.copy ac JOIN asset.copy_location acl ON acl.id = ac.location JOIN actor.org_unit aou ON aou.id = ac.circ_lib GROUP BY 2, 3 ORDER BY 2, 3 - - - - asset.barcode_lengths - Barcode Lengths by Library - assets - 0 - Count of Barcode.Barcode Length.Library - SELECT COUNT(ac.id), LENGTH(ac.barcode), aou.name FROM asset.copy ac JOIN actor.org_unit aou ON aou.id = ac.circ_lib GROUP BY 2, 3 ORDER BY 3, 2 - - - - asset.stat_cats - Copy Statistical Categories - assets - 0 - Stat Cat Count.Library.Statistical Category - SELECT COUNT(ac_sc.id), aou.name, ac_sc.name FROM asset.stat_cat ac_sc JOIN actor.org_unit aou ON aou.id = ac_sc.owner GROUP BY 2,3 ORDER BY 2,3 - - - - asset.stat_cats - Copy Statistical Categories - assets - 0 - Stat Cat Count.Library.Statistical Category - SELECT COUNT(ac_sc.id), aou.name, ac_sc.name FROM asset.stat_cat ac_sc JOIN actor.org_unit aou ON aou.id = ac_sc.owner GROUP BY 2,3 ORDER BY 2,3 - - - - asset.stat_cat_entries - Copy Stat Cat User Entries - assets - 0 - Copy Stat Count.Library.Statistical Category - SELECT COUNT(map.id), aou.name, ac_sc.name FROM asset.stat_cat_entry_copy_map map JOIN asset.stat_cat ac_sc ON ac_sc.id = map.stat_cat JOIN actor.org_unit aou ON aou.id = ac_sc.owner GROUP BY 2,3 ORDER BY 2,3 - - - - asset.stat_cat_entries - Copy Stat Cat User Entries - assets - 0 - Copy Stat Count.Library.Statistical Category - SELECT COUNT(map.id), aou.name, ac_sc.name FROM asset.stat_cat_entry_copy_map map JOIN asset.stat_cat ac_sc ON ac_sc.id = map.stat_cat JOIN actor.org_unit aou ON aou. -id = ac_sc.owner GROUP BY 2,3 ORDER BY 2,3 - - - - asset.copy_tags - Copy Tags - assets - 0 - Tag Count.Copy Tag Type.Copy Tag Label.Staff Note.Public - SELECT COUNT(map.id), tag.tag_type, tag.label, tag.staff_note, tag.pub FROM asset.copy_tag tag JOIN asset.copy_tag_copy_map map ON map.tag = tag.id GROUP BY 2,3,4,5 ORDER BY 2,3 - - - - - - money.billing_voided - Bills Voided And Not - money - 0 - Count.Voided.Sum - SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM money.billing a GROUP BY 2 ORDER BY 2 - - - - money.billing_voided - Bills Voided And Not - money - 0 - Count.Voided.Sum - SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM money.billing a GROUP BY 2 ORDER BY 2, 3 - - - - money.billing_by_type - Bills by Type - money - 0 - Count.Billing Type - SELECT COUNT(a.id), a.billing_type FROM money.billing a GROUP BY 2 ORDER BY 2 - - - - money.billing_by_type - Bills by Type - money - 0 - Count.Billing Type - SELECT COUNT(a.id), a.billing_type FROM money.billing a GROUP BY 2 ORDER BY 2 - - - - money.cash_payment - Cash Payments - money - 0 - Count.Voided.Sum - SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM money.cash_payment a GROUP BY 2 ORDER BY 2 - - - - money.cash_payment - Cash Payments - money - 0 - Count.Voided.Sum - SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM money.cash_payment a GROUP BY 2 ORDER BY 2 - - - - money.check_payment - Check Payments - money - 0 - Count.Voided.Sum - SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM money.check_payment a GROUP BY 2 ORDER BY 2 - - - - money.forgive_payment - Forgive Payments - money - 0 - Count.Voided.Sum - SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM money.forgive_payment a GROUP BY 2 ORDER BY 2 - - - - - bibswovolumes - Bibliographic Records Without Volumes - bibs - 0 - Count - SELECT COUNT(id) FROM biblio.record_entry where id NOT IN (SELECT DISTINCT record FROM asset.call_number) AND deleted IS FALSE - - - - bibs_notes - Bib Record Notes - bibs - 0 - Count - SELECT COUNT(b.id) FROM biblio.record_note b - - - - bibs_peers - Peer Bib Copies - bibs - 0 - Count - SELECT COUNT(b.id) FROM biblio.peer_bib_copy_map b - - - - bibs_parts - Monograph Parts - bibs - 0 - Count - SELECT COUNT(b.id) FROM biblio.monograph_part b - - - - - - usrsbyorg - Patrons by Home Org - actors - 0 - Count.Library.Deleted - SELECT COUNT(au.id), aou.name, au.deleted::TEXT FROM actor.usr au JOIN actor.org_unit aou ON aou.id = au.home_ou GROUP BY 2, 3 ORDER BY 2, 3 - - - - usrsbypgt - Patrons by Permission Group - actors - 0 - Count.Permission Group - SELECT COUNT(au.id), pgt.name FROM actor.usr au JOIN permission.grp_tree pgt ON pgt.id = au.profile GROUP BY 2 ORDER BY 2 - - - - active_usrs - Patrons by Active Status - actors - 0 - Count of Users.Active - SELECT COUNT(id), active::TEXT FROM actor.usr GROUP BY 2 - - - - active_usr_barcodes - Patron Barcodes by Active Status - actors - 0 - Count of Barcodes.Active - SELECT COUNT(id), active::TEXT FROM actor.card GROUP BY 2 - - - - usr_barcode_lengths - Barcode Lengths by Library - actors - 0 - Count of Barcode.Barcode Length.Library - SELECT COUNT(acard.id), LENGTH(acard.barcode), aou.name FROM actor.card acard JOIN actor.usr au ON au.id = acard.usr JOIN actor.org_unit aou ON aou.id = au.home_ou GROUP BY 2, 3 ORDER BY 3, 2 - - - - usr_barcode_patterns - Common Barcode Starting Patterns - actors - 0 - Count of Barcodes (greater than 10).Left 60% of Characters - SELECT COUNT(acard.id), LEFT(acard.barcode,(ROUND(LENGTH(acard.barcode)*.6))::INT) FROM actor.card acard GROUP BY 2 HAVING COUNT(acard.id) > 10 ORDER BY 2 - - - - usr_addressses_status - Patron Addresses by Valid Status - actors - 0 - Count.Valid - SELECT COUNT(aua.id), valid::TEXT FROM actor.usr_address aua GROUP BY 2 - - - - usr_addresses_pending - Patron Addresses by Pending Status - actors - 0 - Count of Addresses.Pending - SELECT COUNT(aua.id), pending::TEXT FROM actor.usr_address aua GROUP BY 2 - - - - usr_messages - Patron Messages - actors - 0 - Count.Deleted - SELECT COUNT(aum.id), deleted::TEXT FROM actor.usr_message aum GROUP BY 2 - - - - usr_notes - Patron Notes - actors - 0 - Count.Public - SELECT COUNT(aun.id), pub::TEXT FROM actor.usr_note aun GROUP BY 2 - - - - usr_stat_cats - Patron Statistical Categories - actors - 0 - Stat Cat Count.Library.Statistical Category - SELECT COUNT(au_sc.id), aou.name, au_sc.name FROM actor.stat_cat au_sc JOIN actor.org_unit aou ON aou.id = au_sc.owner GROUP BY 2, 3 ORDER BY 2, 3 - - - - usr_stat_cat_entries - Patron Stat Cat User Entries - actors - 0 - Patron Stat Count.Library.Statistical Category - SELECT COUNT(map.id), aou.name, au_sc.name FROM actor.stat_cat_entry_usr_map map JOIN actor.stat_cat au_sc ON au_sc.id = map.stat_cat JOIN actor.org_unit aou ON aou.id = au_sc.owner GROUP BY 2, 3 ORDER BY 2,3 - - - - - fund_count - 0 - acq - Funds - Number of Funds - SELECT COUNT(id) FROM acq.fund; - - - - invoice_count - 0 - acq - Invoices - Number of Funds - SELECT COUNT(id) FROM acq.invoice; - - - - diff --git a/mig-xml/evergreen_staged_report.xml b/mig-xml/evergreen_staged_report.xml deleted file mode 100644 index 050df4a..0000000 --- a/mig-xml/evergreen_staged_report.xml +++ /dev/null @@ -1,1182 +0,0 @@ - - - find_cmm - DROP FUNCTION IF EXISTS find_cmm(BIGINT) - - CREATE OR REPLACE FUNCTION find_cmm(circ_id BIGINT) - RETURNS SETOF INTEGER[] - LANGUAGE plpgsql - AS $function$ - DECLARE - aou INTEGER; - ac INTEGER; - au INTEGER; - r INTEGER[]; - BEGIN - SELECT circ_lib FROM action.circulation WHERE id = circ_id INTO aou; - SELECT target_copy FROM action.circulation WHERE id = circ_id INTO ac; - SELECT usr FROM action.circulation WHERE id = circ_id INTO au; - - FOR r IN SELECT buildrows FROM action.find_circ_matrix_matchpoint(aou,ac,au,FALSE) - LOOP - RETURN NEXT r; - END LOOP; - RETURN; - END - $function$ - - - - create_subfield_u - DROP TABLE IF EXISTS subfield_u - CREATE UNLOGGED TABLE subfield_u AS SELECT UNNEST(oils_xpath( '//*[@tag="856"]/*[@code="u"]/text()', marc)) AS value FROM m_biblio_record_entry_legacy WHERE x_migrate -
- - - - - - - - circ_count - circs - 0 - Migrated Circulations - Circulation Status.Count of Circs - SELECT 'Closed Circulations', COUNT(id) FROM m_action_circulation_legacy WHERE xact_finish IS NOT NULL AND x_migrate - UNION ALL SELECT 'Open Circulations', COUNT(id) FROM m_action_circulation_legacy WHERE xact_finish IS NULL AND x_migrate - - - - circ_count - circs - 1 - Open Circulations - Circulation Status.Count of Circs - SELECT 'Closed Circulations', COUNT(id) FROM m_action_circulation WHERE xact_finish IS NOT NULL UNION ALL SELECT 'Open Circulations', COUNT(id) FROM m_action_circulation WHERE xact_finish IS NULL - - - - circ_by_orgunit - circs - 0 - Circulations by Org Unit - Circulations Count.Org Unit - SELECT COUNT(acirc.id), aou.name FROM m_action_circulation_legacy acirc JOIN actor.org_unit aou ON aou.id = acirc.circ_lib WHERE acirc.xact_finish IS NULL AND x_migrate = TRUE GROUP BY 2 - - - - circ_by_orgunit - circs - 1 - Circulations by Org Unit - Circulations Count.Org Unit - SELECT COUNT(acirc.id), aou.name FROM m_action_circulation acirc JOIN actor.org_unit aou ON aou.id = acirc.circ_lib WHERE acirc.xact_finish IS NULL GROUP BY 2 - - - - circs_by_duration - circs - 0 - Migrated Circulations by Duration, Fine and Max Fine - Count of Circs.Duration.Fine.Max Fine - SELECT COUNT(id), duration_rule, recurring_fine_rule, max_fine_rule FROM m_action_circulation_legacy WHERE x_migrate = TRUE GROUP BY 2, 3, 4 - - - - circs_by_duration - circs - 1 - Circulations by Duration, Fine and Max Fine - Count of Circs.Duration.Fine.Max Fine - SELECT COUNT(id), duration_rule, recurring_fine_rule, max_fine_rule FROM m_action_circulation GROUP BY 2, 3, 4 ORDER BY 2, 3, 4 - - - - circs_by_usrgroup - circs - 0 - Circulations by Rules and Patron Group - Count of Circs.Duration.Fine.Max Fine.User Group.Matchpoints - SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, pgt.name, x.buildrows FROM m_action_circulation_legacy acirc JOIN actor.usr au ON au.id = acirc.usr JOIN permission.grp_tree pgt ON pgt.id = au.profile JOIN (SELECT acirc.id, ARRAY_TO_STRING(find_cmm(acirc.id),',') AS buildrows FROM m_action_circulation_legacy acirc WHERE acirc.x_migrate = TRUE) x ON x.id = acirc.id WHERE acirc.x_migrate = TRUE GROUP BY 2, 3, 4, 5, 6 ORDER BY 2, 3, 4, 5, 6 - - - - circs_by_usrgroup - circs - 1 - Circulations by Rules and Patron Group - Count of Circs.Duration.Fine.Max Fine.User Group.Matchpoints - SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, pgt.name, x.buildrows FROM m_action_circulation acirc JOIN actor.usr au ON au.id = acirc.usr JOIN permission.grp_tree pgt ON pgt.id = au.profile JOIN (SELECT acirc.id, ARRAY_TO_STRING(find_cmm(acirc.id),',') AS buildrows FROM m_action_circulation acirc) x ON x.id = acirc.id GROUP BY 2, 3, 4, 5, 6 ORDER BY 2, 3, 4, 5, 6 - - - - circs_by_circmod - circs - 0 - Circulations by Rules and Circulation Modifier - Count of Circs.Duration.Fine.Max Fine.Circulation Modifier.Matchpoints - SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, ac.circ_modifier, x.buildrows FROM m_action_circulation_legacy acirc JOIN asset.copy ac ON ac.id = acirc.target_copy JOIN (SELECT acirc.id, ARRAY_TO_STRING(find_cmm(acirc.id),',') AS buildrows FROM m_action_circulation_legacy acirc WHERE acirc.x_migrate = TRUE) x ON x.id = acirc.id WHERE acirc.x_migrate = TRUE GROUP BY 2, 3, 4, 5, 6 ORDER BY 2, 3, 4, 5, 6 - - - - circs_by_circmod - circs - 1 - Circulations by Rules and Circulation Modifier - Count of Circs.Duration.Fine.Max Fine.Circulation Modifier.Matchpoints - SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, ac.circ_modifier, x.buildrows FROM m_action_circulation acirc JOIN asset.copy ac ON ac.id = acirc.target_copy JOIN (SELECT acirc.id, ARRAY_TO_STRING(find_cmm(acirc.id),',') AS buildrows FROM m_action_circulation acirc) x ON x.id = acirc.id - GROUP BY 2, 3, 4, 5, 6 ORDER BY 2, 3, 4, 5, 6 - - - - circs_by_orgunit - circs - 0 - Circulations by Rules and Org Unit - Count of Circs.Duration.Fine.Max Fine.Library Branch - SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, aou.name FROM m_action_circulation_legacy acirc JOIN actor.org_unit aou ON aou.id = acirc.circ_lib WHERE acirc.x_migrate = TRUE GROUP BY 2, 3, 4, 5 ORDER BY 2, 3, 4, 5 - - - - circs_by_orgunit - circs - 1 - Circulations by Rules and Org Unit - Count of Circs.Duration.Fine.Max Fine.Library Branch - SELECT COUNT(acirc.id), acirc.duration_rule, acirc.recurring_fine_rule, acirc.max_fine_rule, aou.name FROM m_action_circulation acirc JOIN actor.org_unit aou ON aou.id = acirc.circ_lib GROUP BY 2, 3, 4, 5 ORDER BY 2, 3, 4, 5 - - - - non_cat_circs - circs - 0 - Non-Cataloged Circulation - Circulations Count.Migrated - SELECT COUNT(id), x_migrate::TEXT FROM m_action_non_cataloged_circulation_legacy GROUP BY 2 - - - - non_cat_circs - circs - 1 - Non-Cataloged Circulation - Circulations Count - SELECT COUNT(id) FROM m_action_non_cataloged_circulation - - - - in_house - circs - 0 - In House Use - In House Use Records.Migrated - SELECT COUNT(id), x_migrate::TEXT FROM m_action_in_house_use_legacy GROUP BY 2 - - - - in_house - circs - 1 - In House Use - In House Use Records - SELECT COUNT(id) FROM m_action_in_house_use - - - - circs_missing_rules - circs - 1 - Circs Missing Rules - Count.Field Missing - SELECT COUNT(id), 'Duration Rule Value' FROM m_action_circulation WHERE duration IS NULL - UNION ALL SELECT COUNT(id), 'Recurring Fine Rule Value' FROM m_action_circulation WHERE recurring_fine IS NULL - UNION ALL SELECT COUNT(id), 'Max Fine Rule Value' FROM m_action_circulation WHERE max_fine IS NULL - UNION ALL SELECT COUNT(id), 'Duration Rule' FROM m_action_circulation WHERE duration_rule IS NULL - UNION ALL SELECT COUNT(id), 'Recurring Fine Rule' FROM m_action_circulation WHERE recurring_fine_rule IS NULL - UNION ALL SELECT COUNT(id), 'Max Fine Rule' FROM m_action_circulation WHERE max_fine_rule IS NULL - - - - - circ_open_by_item_status - circs - 0 - Status of Currently Circulating Items - Count.Status - SELECT COUNT(acirc.id), ccs.name FROM action.circulation acirc JOIN asset.copy ac ON ac.id = acirc.target_copy JOIN config.copy_status ccs ON ccs.id = ac.status WHERE acirc.xact_finish IS NULL AND acirc.checkin_time IS NULL AND acirc.id IN (SELECT id FROM m_action_circulation) GROUP BY 2 ORDER BY 2 - - - - - - holds - holds - 0 - Migrated and Non-Migrated Holds - Hold Type.Hold Count.Migrated - SELECT 'Closed Holds', COUNT(id), x_migrate::TEXT FROM m_action_hold_request_legacy WHERE (expire_time::TIMESTAMP < now()) OR cancel_time IS NOT NULL OR fulfillment_time IS NOT NULL GROUP BY 3 UNION ALL SELECT 'Open Holds', COUNT(id), x_migrate::TEXT FROM m_action_hold_request_legacy WHERE (expire_time IS NULL OR expire_time::TIMESTAMP > now()) AND cancel_time IS NULL AND fulfillment_time IS NULL GROUP BY 3 - - - - holds_bytype - holds - 0 - Migrated Holds By Type - Hold Type.Hold Count - SELECT hold_type as "Hold Type", COUNT(id) FROM m_action_hold_request_legacy WHERE x_migrate = TRUE GROUP BY 1 - - - - transit_open_by_item_status - holds - 0 - Transit Copy Records and Status of Linked Items - Count.Status - SELECT COUNT(atc.id), ccs.name FROM action.transit_copy atc JOIN asset.copy ac ON ac.id = atc.target_copy JOIN config.copy_status ccs ON ccs.id = ac.status WHERE atc.id IN (SELECT id FROM m_action_transit_copy) AND atc.dest_recv_time IS NULL GROUP BY 2 ORDER BY 2 - - - - transit_copies_by_status - holds - 0 - Status of Items with Count of Open In Transits - Count.Status.Count of Open Transits - SELECT COUNT(ac.id), ccs.name, SUM(CASE WHEN atc.id IS NULL THEN 0 ELSE 1 END) FROM asset.copy ac JOIN config.copy_status ccs ON ccs.id = ac.status LEFT JOIN (SELECT * FROM action.transit_copy WHERE id IN (SELECT id FROM m_action_transit_copy) AND dest_recv_time IS NULL) atc ON atc.target_copy = ac.id WHERE ac.id IN (SELECT id from m_asset_copy) GROUP BY 2 ORDER BY 2 - - - - hold_copies_by_status - holds - 0 - Captured Holds with Status of Items - Count of Captured Hold.Status of Item - SELECT COUNT(ahr.id), ccs.name FROM action.hold_request ahr JOIN asset.copy ac ON ac.id = ahr.current_copy JOIN config.copy_status ccs ON ccs.id = ac.status WHERE ahr.capture_time IS NOT NULL AND ahr.fulfillment_time IS NULL and ahr.cancel_time IS NULL AND ahr.id IN (SELECT id FROM m_action_hold_request) GROUP BY 2 ORDER By 2 - - - - hold_depth - holds - 0 - Depth of Unfilled Holds - Count.Depth - SELECT COUNT(ahr.id), ahr.selection_depth FROM action.hold_request ahr WHERE ahr.id IN (SELECT id FROM m_action_hold_request) AND ahr.cancel_time IS NULL AND ahr.capture_time IS NULL AND ahr.fulfillment_time IS NULL GROUP BY 2 ORDER BY 2 - - - - - - m_asset_copy_count - Count of Copies by Library - assets - 0 - Copy Count.Library - SELECT COUNT(ac.id), aou.name FROM m_asset_copy_legacy ac JOIN actor.org_unit aou ON aou.id = ac.circ_lib WHERE ac.x_migrate = TRUE GROUP BY 2 ORDER BY 2 - - - - m_asset_copy_count_non_migrated - Non-Migrated Count of Copies by Library - assets - 0 - Copy Count.Library - SELECT COUNT(ac.id), aou.name FROM m_asset_copy_legacy ac JOIN actor.org_unit aou ON aou.id = ac.circ_lib WHERE ac.x_migrate = FALSE GROUP BY 2 ORDER BY 2 - - - - m_asset_copies_by_status - Copies by Status - assets - 0 - Copy Count.Status - SELECT COUNT(ac.id), cs.name FROM m_asset_copy_legacy ac JOIN config.copy_status cs ON cs.id = ac.status WHERE ac.x_migrate = TRUE GROUP BY 2 ORDER BY 2 - - - - m_asset_circ_mod_copies_count - Copies by Circulation Modifier - assets - 0 - Copy Count.Circulation Modifier - SELECT COUNT(ac.id), ac.circ_modifier FROM m_asset_copy_legacy ac WHERE ac.x_migrate = TRUE GROUP BY 2 ORDER BY 2 - - - - m_asset_copy_notes - Copy Notes - assets - 0 - Note Count.Public - SELECT COUNT(acnote.id), acnote.pub::TEXT FROM m_asset_copy_note_legacy acnote WHERE acnote.x_migrate = TRUE GROUP BY 2 ORDER BY 2 - - - - m_asset_copy_notes - Copy Notes - assets - 1 - Note Count.Public - SELECT COUNT(acnote.id), acnote.pub::TEXT FROM m_asset_copy_note acnote GROUP BY 2 ORDER BY 2 - - - - m_asset_vols_by_lib - Volumes by Library - assets - 0 - Volume Count.Library - SELECT COUNT(acn.id), aou.name FROM m_asset_call_number_legacy acn JOIN m_actor_org_unit_legacy aou ON aou.id = acn.owning_lib WHERE acn.x_migrate = TRUE GROUP BY 2 ORDER BY 2 - - - - m_asset_vols_by_lib - Volumes by Library - assets - 1 - Volume Count.Library - SELECT COUNT(acn.id), aou.name FROM m_asset_call_number acn JOIN actor.org_unit aou ON aou.id = acn.owning_lib GROUP BY 2 ORDER BY 2 - - - - m_asset_cops_by_loc_and_org - Copies by Location - assets - 0 - Copy Count.Location.Circ Library - SELECT COUNT(ac.id), acl.name, aou.name FROM m_asset_copy_legacy ac JOIN asset.copy_location acl ON acl.id = ac.location JOIN actor.org_unit aou ON aou.id = ac.circ_lib WHERE ac.x_migrate = TRUE GROUP BY 2, 3 ORDER BY 2, 3 - - - - m_asset_cops_w_loc_one - Copies with a Location of Stacks - assets - 0 - Barcode - SELECT barcode FROM m_asset_copy_legacy WHERE location = 1 AND x_migrate - - - - m_asset_no_barcode - Items Without Barcodes - assets - 0 - Assigned Barcode - SELECT barcode FROM m_asset_copy_legacy WHERE barcode ~* 'no_barocde' AND x_migrate - - - - m_asset_barcode_patterns - Common Barcode Starting Patterns - assets - 0 - Count of Barcodes (greater than 10).Left 60% of Characters - SELECT COUNT(ac.id), LEFT(ac.barcode,(ROUND(LENGTH(ac.barcode)*.6))::INT) FROM m_asset_copy_legacy ac WHERE ac.x_migrate = TRUE GROUP BY 2 HAVING COUNT(ac.id) > 10 ORDER BY 2 - - - - m_asset_barcode_collisions - Copy Barcode Collisions - assets - 0 - Collision Count - SELECT COUNT(id) FROM m_asset_copy_legacy WHERE x_migrate = TRUE AND barcode ~* '^x_' - Incumbent collisions are those where the migrated barcodes collide with existing barcodes in the database. - - - - m_asset_barcode_collisions - Copy Barcode Collisions - assets - 1 - Collision Count - SELECT COUNT(id) FROM m_asset_copy WHERE barcode ~* '^x_' - Incumbent collisions are those where the migrated barcodes collide with existing barcodes in the database. - - - - m_asset_barcode_collisions_shortlist - Copy Barcode Collisions (first 20) - assets - 0 - Collision List - SELECT ac.barcode FROM m_asset_copy_legacy ac WHERE ac.barcode ~* '^x_' AND ac.x_migrate = TRUE ORDER BY 1 LIMIT 20 - This is a shortlist of copy barcode collisions that maxes out at 20. If there are more collisions we will need to run a custom report. - - - - m_asset_barcode_collisions_shortlist - Copy Barcode Collisions (first 20) - assets - 1 - Collision List - SELECT ac.barcode FROM m_asset_copy ac WHERE ac.barcode ~* '^x_' ORDER BY 1 LIMIT 20 - This is a shortlist of patron barcode collisions that maxes out at 20. If there are more collisions we will need to run a custom report. - - - - m_asset_barcode_collision_patterns - Common Copy Barcode Collision Patterns - assets - 0 - Number of Barcodes Matching Pattern Greater than 10.Left 60% of Characters - SELECT COUNT(ac.id), LEFT(ac.barcode,(ROUND(LENGTH(ac.barcode)*.6))::INT) FROM m_asset_copy_legacy ac WHERE barcode ~* '^x_' AND ac.x_migrate = TRUE GROUP BY 2 HAVING COUNT(ac.id) > 10 ORDER BY 2 - - - - m_asset_barcode_collision_patterns - Common Copy Barcode Collision Patterns - assets - 1 - Number of Barcodes Matching Pattern Greater than 10.Left 60% of Characters - SELECT COUNT(ac.id), LEFT(ac.barcode,(ROUND(LENGTH(ac.barcode)*.6))::INT) FROM m_asset_copy ac WHERE barcode ~* '^x_' GROUP BY 2 HAVING COUNT(ac.id) > 10 ORDER BY 2 - - - - m_asset_stat_cats - Copy Statistical Categories - assets - 0 - Stat Cat Count.Library.Statistical Category - SELECT COUNT(ac_sc.id), aou.name, ac_sc.name FROM m_asset_stat_cat_legacy ac_sc JOIN actor.org_unit aou ON aou.id = ac_sc.owner GROUP BY 2,3 ORDER BY 2,3 - - - - m_asset_stat_cats - Copy Statistical Categories - assets - 1 - Stat Cat Count.Library.Statistical Category - SELECT COUNT(ac_sc.id), aou.name, ac_sc.name FROM m_asset_stat_cat ac_sc JOIN actor.org_unit aou ON aou.id = ac_sc.owner GROUP BY 2,3 ORDER BY 2,3 - - - - m_asset_stat_cat_entries - Copy Stat Cat User Entries - assets - 0 - Copy Stat Count.Library.Statistical Category - SELECT COUNT(map.id), aou.name, ac_sc.name FROM m_asset_stat_cat_entry_copy_map_legacy map JOIN m_asset_stat_cat_legacy ac_sc ON ac_sc.id = map.stat_cat JOIN actor.org_unit aou ON aou.id = ac_sc.owner GROUP BY 2,3 ORDER BY 2,3 - - - - m_asset_stat_cat_entries - Copy Stat Cat User Entries - assets - 1 - Copy Stat Count.Library.Statistical Category - SELECT COUNT(map.id), aou.name, ac_sc.name FROM m_asset_stat_cat_entry_copy_map map JOIN m_asset_stat_cat ac_sc ON ac_sc.id = map.stat_cat JOIN actor.org_unit aou ON aou. -id = ac_sc.owner GROUP BY 2,3 ORDER BY 2,3 - - - - m_asset_copy_tags - Copy Tags - assets - 0 - Tag Count.Copy Tag Type.Copy Tag Label.Staff Note.Public - SELECT COUNT(map.id), tag.tag_type, tag.label, tag.staff_note, tag.pub FROM m_asset_copy_tag tag JOIN m_asset_copy_tag_copy_map map ON map.tag = tag.id GROUP BY 2,3,4,5 ORDER BY 2,3 - - - - m_asset_copy_alerts - Copy Alerts - assets - 0 - Alert Count.Alert Type - SELECT COUNT(*), cat.name FROM m_asset_copy_alert aca JOIN config.copy_alert_type cat ON cat.id = aca.alert_type GROUP BY 2 - - - - - - m_money_billing_voided - Bills Voided And Not - money - 0 - Count.Voided.Sum.Migrated - SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount), a.x_migrate::TEXT FROM m_money_billing_legacy a GROUP BY 2, 4 ORDER BY 2, 4 - - - - m_money_billing_voided - Bills Voided And Not - money - 1 - Count.Voided.Sum - SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM m_money_billing a GROUP BY 2 ORDER BY 2, 3 - - - - m_money_billing_by_type - Bills by Type - money - 0 - Count.Billing Type.Migrated - SELECT COUNT(a.id), a.billing_type, a.x_migrate::TEXT FROM m_money_billing_legacy a GROUP BY 2, 3 ORDER BY 2, 3 - - - - m_money_billing_by_type - Bills by Type - money - 1 - Count.Billing Type - SELECT COUNT(a.id), a.billing_type FROM m_money_billing a GROUP BY 2 ORDER BY 2 - - - - m_money_cash_payment - Cash Payments - money - 0 - Count.Voided.Sum.Migrated - SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount), a.x_migrate::TEXT FROM m_money_cash_payment_legacy a GROUP BY 2, 4 ORDER BY 2, 4 - - - - m_money_cash_payment - Cash Payments - money - 1 - Count.Voided.Sum - SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM m_money_cash_payment a GROUP BY 2 ORDER BY 2 - - - - m_money_check_payment - Check Payments - money - 0 - Count.Voided.Sum.Migrated - SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount), a.x_migrate::TEXT FROM m_money_check_payment_legacy a GROUP BY 2, 4 ORDER BY 2, 4 - - - - m_money_forgive_payment - Forgive Payments - money - 0 - Count.Voided.Sum.Migrated - SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount), a.x_migrate::TEXT FROM m_money_forgive_payment_legacy a GROUP BY 2, 4 ORDER BY 2, 4 - - - - m_money_forgive_payment - Forgive Payments - money - 1 - Count.Voided.Sum - SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount) FROM m_money_forgive_paymen a GROUP BY 2 ORDER BY 2 - - - - m_money_goods_payment - Goods Payments - money - 0 - Count.Voided.Sum.Migrated - SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount), a.x_migrate::TEXT FROM m_money_goods_payment_legacy a GROUP BY 2, 4 ORDER BY 2, 4 - - - - m_money_work_payment - Work Payments - money - 0 - Count.Voided.Sum.Migrated - SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount), a.x_migrate::TEXT FROM m_money_work_payment_legacy a GROUP BY 2, 4 ORDER BY 2, 4 - - - - m_money_credit_card_payment - Credit Card Payments - money - 0 - Count.Voided.Sum.Migrated - SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount), a.x_migrate::TEXT FROM m_money_credit_card_payment_legacy a GROUP BY 2, 4 ORDER BY 2, 4 - - - - m_money_credit_payment - Credit Payments - money - 0 - Count.Voided.Sum.Migrated - SELECT COUNT(a.id), a.voided::TEXT, SUM(a.amount), a.x_migrate::TEXT FROM m_money_credit_payment_legacy a GROUP BY 2, 4 ORDER BY 2, 4 - - - - - - - bibs_loaded - Loaded Bibliographic Records - bibs - 0 - Count - SELECT COUNT(bre.id) FROM m_biblio_record_entry_legacy bre WHERE id IN (SELECT id FROM biblio.record_entry) ORDER BY 1 - - - - bibs_loaded - Loaded Bibliographic Records - bibs - 1 - Count - SELECT COUNT(bre.id) FROM m_biblio_record_entry bre WHERE id IN (SELECT id FROM biblio.record_entry) ORDER BY 1 - - - - bibswovolumes - Bibliographic Records Without Volumes - bibs - 0 - Count - SELECT COUNT(id) FROM m_biblio_record_entry where id NOT IN (SELECT DISTINCT record FROM m_asset_call_number) - These records would not have been loaded but many may be deduplicated against incumbent records. - - - - bibswovolumesanduri - Bibliographic Records Without Volumes And Recognized URI - bibs - 0 - Service.Domain.Count - SELECT '3M','ebook.3m.com', COUNT(*) FROM subfield_u WHERE value ~* 'ebook.3m.com' GROUP BY 1, 2 UNION ALL - SELECT 'Axis 360','axis360.baker-taylor.com', COUNT(*) FROM subfield_u WHERE value ~* 'axis360.baker-taylor.com' GROUP BY 1, 2 UNION ALL - SELECT 'Book Flix','bookflix.digital.scholastic.com', COUNT(*) FROM subfield_u WHERE value ~* 'bookflix.digital.scholastic.com' GROUP BY 1, 2 UNION ALL - SELECT 'Book Flix','bkflix.grolier.com', COUNT(*) FROM subfield_u WHERE value ~* 'bkflix.grolier.com' GROUP BY 1, 2 UNION ALL - SELECT 'Comics Plus','library.comicsplusapp.com', COUNT(*) FROM subfield_u WHERE value ~* 'library.comicsplusapp.com' GROUP BY 1, 2 UNION ALL - SELECT 'Ebrary','site.ebrary.com', COUNT(*) FROM subfield_u WHERE value ~* 'site.ebrary.com' GROUP BY 1, 2 UNION ALL - SELECT 'Freading','freading.com', COUNT(*) FROM subfield_u WHERE value ~* 'freading.com' GROUP BY 1, 2 UNION ALL - SELECT 'Hoopla','hoopladigital.com', COUNT(*) FROM subfield_u WHERE value ~* 'hoopladigital.com' GROUP BY 1, 2 UNION ALL - SELECT 'Infobase','avod.infobase.com', COUNT(*) FROM subfield_u WHERE value ~* 'avod.infobase.com' GROUP BY 1, 2 UNION ALL - SELECT 'Learning Express','learningexpresslibrary.com', COUNT(*) FROM subfield_u WHERE value ~* 'learningexpresslibrary.com' GROUP BY 1, 2 UNION ALL - SELECT 'Missouri Overdrive','molib2go.org', COUNT(*) FROM subfield_u WHERE value ~* 'molib2go.org' GROUP BY 1, 2 UNION ALL - SELECT 'netLibrary','netLibrary.com', COUNT(*) FROM subfield_u WHERE value ~* 'netLibrary.com' GROUP BY 1, 2 UNION ALL - SELECT 'OneClickDigital','oneclickdigital.com', COUNT(*) FROM subfield_u WHERE value ~* 'oneclickdigital.com' GROUP BY 1, 2 UNION ALL - SELECT 'Overdrive','overdrive.com', COUNT(*) FROM subfield_u WHERE value ~* 'overdrive.com' GROUP BY 1, 2 UNION ALL - SELECT 'ProQuest','ebookcentral.proquest.com', COUNT(*) FROM subfield_u WHERE value ~* 'ebookcentral.proquest.com' GROUP BY 1, 2 UNION ALL - SELECT 'RB Digital','rbdigital.com', COUNT(*) FROM subfield_u WHERE value ~* 'rbdigital.com' GROUP BY 1, 2 UNION ALL - SELECT 'U.S. Government Sites','.gov', COUNT(*) FROM subfield_u WHERE value ~* '\.gov' GROUP BY 1,2;; - - This list is built from known services and domains. If you have records for electronic resources that are not here please let us know. - - - - bibswuri - Bibliographic Records With 856$9s - bibs - 0 - Count - SELECT COUNT(id) FROM m_biblio_record_entry where id IN (SELECT record FROM asset.call_number WHERE label ~* '##URI##') - - - - bibsff - Bibliographic Records with Adjusted Fixed Fields - bibs - 0 - Count.Original Search Format.New Search Format - SELECT COUNT(*), ARRAY_TO_STRING(x_search_format,','), ARRAY_TO_STRING(x_after_search_format,',') FROM m_biblio_record_entry_legacy WHERE x_migrate AND x_after_search_format IS NOT NULL GROUP BY 2, 3 ORDER BY 3,2 - - - - bibs_notes - Bib Record Notes - bibs - 0 - Count.Migrated - SELECT COUNT(b.id), b.x_migrate::TEXT FROM m_biblio_record_note_legacy b GROUP BY 2 - - - - bibs_notes - Bib Record Notes - bibs - 1 - Count - SELECT COUNT(b.id) FROM m_biblio_record_note b - - - - bibs_peers - Peer Bib Copies - bibs - 0 - Count.Migrated - SELECT COUNT(b.id), b.x_migrate::TEXT FROM m_biblio_peer_bib_copy_map_legacy b GROUP BY 2 - - - - bibs_peers - Peer Bib Copies - bibs - 1 - Count - SELECT COUNT(b.id) FROM m_biblio_peer_bib_copy_map b - - - - bibs_parts - Monograph Parts - bibs - 0 - Count.Migrated - SELECT COUNT(b.id), b.x_migrate::TEXT FROM m_biblio_monograph_part_legacy b GROUP BY 2 - - - - bibs_parts - Monograph Parts - bibs - 1 - Count - SELECT COUNT(b.id) FROM m_biblio_monograph_part b - - - - bib_merges - Bibliographic Merge Count - bibs - 0 - Records Merged.Incumbent Records Merged Into - SELECT SUM(array_length(records,1)), COUNT(*) FROM groups - - - - - - usrsbyorg - Migrated Patrons by Home Org - actors - 0 - Count.Library - SELECT COUNT(au.id), aou.name FROM m_actor_usr_legacy au JOIN actor.org_unit aou ON aou.id = au.home_ou WHERE au.x_migrate = TRUE GROUP BY 2 ORDER BY 2 - - - - nonmigusrsbyorg - Non-Migrated Patrons by Home Org - actors - 0 - Count.Library - SELECT COUNT(au.id), aou.name FROM m_actor_usr_legacy au JOIN actor.org_unit aou ON aou.id = au.home_ou WHERE au.x_migrate = FALSE GROUP BY 2 ORDER BY 2 - - - - usrsbypgt - Migrated Patrons by Permission Group - actors - 0 - Count.Permission Group - SELECT COUNT(au.id), pgt.name FROM m_actor_usr_legacy au JOIN permission.grp_tree pgt ON pgt.id = au.profile WHERE au.x_migrate = TRUE GROUP BY 2 ORDER BY 2 - - - - active_usrs - Patron by Active Status - actors - 0 - Count of Users.Active - SELECT COUNT(id), active::TEXT FROM m_actor_usr_legacy WHERE x_migrate = TRUE GROUP BY 2 - - - - active_usrs - Patron Cards - actors - 1 - Count of Users - SELECT COUNT(id) FROM m_actor_usr - - - - active_usr_barcodes - Patron Barcodes by Active Status - actors - 0 - Count of Barcodes.Active.Migrated - SELECT COUNT(id), active::TEXT, x_migrate::TEXT FROM m_actor_card_legacy GROUP BY 2, 3 - - - - active_usr_barcodes - Patron Barcodes by Active Status - actors - 1 - Count of Barcodes.Active - SELECT COUNT(id), active::TEXT FROM m_actor_card GROUP BY 2 - - - - usr_barcode_patterns - Common Barcode Starting Patterns - actors - 0 - Count of Barcodes (greater than 10).Left 60% of Characters - SELECT COUNT(acard.id), LEFT(acard.barcode,(ROUND(LENGTH(acard.barcode)*.6))::INT) FROM m_actor_card_legacy acard WHERE acard.x_migrate = TRUE GROUP BY 2 HAVING COUNT(acard.id) > 10 ORDER BY 2 - - - - usr_barcode_patterns - Common Barcode Starting Patterns - actors - 1 - Count of Barcodes (greater than 10).Left 60% of Characters - SELECT COUNT(acard.id), LEFT(acard.barcode,(ROUND(LENGTH(acard.barcode)*.6))::INT) FROM m_actor_card acard GROUP BY 2 HAVING COUNT(acard.id) > 10 ORDER BY 2 - - - - usr_barcode_collisions - Patron Barcode Collisions - actors - 0 - Collision Count - SELECT COUNT(acard.id) FROM m_actor_card_legacy acard WHERE barcode ~* '^x_' AND x_migrate = TRUE - - - - usr_barcode_collisions - Patron Barcode Collisions - actors - 1 - Collision Count - SELECT COUNT(acard.id) FROM m_actor_card acard WHERE barcode ~* '^x_' - - - - usr_barcode_collision_shortlist - Patron Barcode Collisions (first 20) - actors - 0 - Collision List - SELECT acard.barcode FROM m_actor_card_legacy acard WHERE acard.barcode ~* '^x_' AND acard.x_migrate = TRUE ORDER BY 1 LIMIT 20 - This is a shortlist of patron barcode collisions that maxes out at 20. If there are more collisions we will need to run a custom report. In some cases we may flag individual accounts to not migrate. - - - - usr_barcode_collision_shortlist - Patron Barcode Collisions (first 20) - actors - 1 - Collision List - SELECT acard.barcode FROM m_actor_card acard WHERE acard.barcode ~* '^x_%' ORDER BY 1 LIMIT 20 - This is a shortlist of patron barcode collisions that maxes out at 20. If there are more collisions we will need to run a custom report. In some cases we may flag individual accounts to not migrate. - - - - usr_barcode_collision_patterns - Common Patron Barcode Collision Patterns a.x_migrate - actors - 0 - Number of Barcodes Matching Pattern Greater than 10.Left 60% of Characters - SELECT COUNT(acard.id), LEFT(acard.barcode,(ROUND(LENGTH(acard.barcode)*.6))::INT) FROM m_actor_card_legacy acard WHERE (acard.barcode ~* 'collision' OR acard.barcode ~* '^x_') AND acard.x_migrate = TRUE GROUP BY 2 HAVING COUNT(acard.id) > 10 ORDER BY 2 - - - - usr_barcode_collision_patterns - Common Patron Barcode Collision Patterns a.x_migrate - actors - 1 - Number of Barcodes Matching Pattern Greater than 10.Left 60% of Characters - SELECT COUNT(acard.id), LEFT(acard.barcode,(ROUND(LENGTH(acard.barcode)*.6))::INT) FROM m_actor_card acard WHERE (acard.barcode ~* 'collision' OR acard.barcode ~* '^x_') GROUP BY 2 HAVING COUNT(acard.id) > 10 ORDER BY 2 - - - - usr_addressses_status - Patron Addresses - actors - 0 - Count - SELECT COUNT(aua.id) FROM m_actor_usr_address_legacy aua WHERE aua.x_migrate = TRUE - - - - usr_addressses_status - Patron Addresses - actors - 1 - Count - SELECT COUNT(aua.id) FROM m_actor_usr_address aua - - - - usr_addresses_pending - Patron Addresses by Pending Status - actors - 0 - Count of Addresses.Pending - SELECT COUNT(aua.id), pending::TEXT FROM m_actor_usr_address_legacy aua WHERE aua.x_migrate = TRUE GROUP BY 2 - - - - usr_addresses_pending - Patron Addresses by Pending Status - actors - 1 - Count of Addresses.Pending - SELECT COUNT(aua.id), pending::TEXT FROM m_actor_usr_address aua GROUP BY 2 - - - - usr_messages - Patron Messages - actors - 0 - Count.Deleted.Migrated - SELECT COUNT(aum.id), deleted::TEXT, x_migrate::TEXT FROM m_actor_usr_message_legacy aum GROUP BY 2, 3 - - - - usr_messages - Patron Messages - actors - 1 - Count.Deleted - SELECT COUNT(aum.id), deleted::TEXT FROM m_actor_usr_message_legacy aum GROUP BY 2 - - - - usr_notes - Patron Notes - actors - 0 - Count.Public.Migrated - SELECT COUNT(aun.id), pub::TEXT, x_migrate::TEXT FROM m_actor_usr_note_legacy aun GROUP BY 2, 3 - - - - usr_notes - Patron Notes - actors - 1 - Count.Public - SELECT COUNT(aun.id), pub::TEXT FROM m_actor_usr_note aun GROUP BY 2 - - - - usr_stat_cats - Patron Statistical Categories - actors - 0 - Stat Cat Count.Library.Statistical Category.Migrated - SELECT COUNT(au_sc.id), aou.name, au_sc.name, au_sc.x_migrate::TEXT FROM m_actor_stat_cat_legacy au_sc JOIN actor.org_unit aou ON aou.id = au_sc.owner GROUP BY 2, 3, 4 ORDER BY 2, 3, 4 - - - - usr_stat_cats - Patron Statistical Categories - actors - 1 - Stat Cat Count.Library.Statistical Category - SELECT COUNT(au_sc.id), aou.name, au_sc.name FROM m_actor_stat_cat au_sc JOIN actor.org_unit aou ON aou.id = au_sc.owner GROUP BY 2, 3 ORDER BY 2, 3 - - - - usr_stat_cat_entries - Patron Stat Cat User Entries - actors - 0 - Patron Stat Count.Library.Statistical Category.Migrated - SELECT COUNT(map.id), aou.name, au_sc.name, map.x_migrate::TEXT FROM m_actor_stat_cat_entry_usr_map_legacy map JOIN m_actor_stat_cat_legacy au_sc ON au_sc.id = map.stat_cat JOIN actor.org_unit aou ON aou.id = au_sc.owner GROUP BY 2, 3, 4 ORDER BY 2,3, 4 - - - - usr_stat_cat_entries - Patron Stat Cat User Entries - actors - 1 - Patron Stat Count.Library.Statistical Category - SELECT COUNT(map.id), aou.name, au_sc.name FROM m_actor_stat_cat_entry_usr_map map JOIN m_actor_stat_cat au_sc ON au_sc.id = map.stat_cat JOIN actor.org_unit aou ON aou.id = au_sc.owner GROUP BY 2, 3 ORDER BY 2,3 - - - - usr_thresholds - Patron Thresholds - actors - 0 - Pateron Group.Org Unit.Penalty.Threshold - SELECT pgt.name, aou.shortname, sp.name, p.threshold FROM permission_grp_penalty_threshold p - JOIN actor.org_unit aou ON aou.id = p.org_unit JOIN permission.grp_tree pgt ON pgt.id = p.grp JOIN config.standing_penalty sp ON sp.id = p.penalty - ORDER BY 2, 1, 3 - - - - usr_settings - Patron Settings - actors - 0 - Count.Setting.Value - SELECT COUNT(*), name, 'User''s Phone' FROM m_actor_usr_setting WHERE name IN ('opac.default_phone') GROUP BY 2, 3 - UNION ALL SELECT COUNT(*), name, value FROM m_actor_usr_setting WHERE name IN ('opac.hold_notify') GROUP BY 2, 3 - UNION ALL SELECT COUNT(*), a.name, aou.shortname FROM m_actor_usr_setting a JOIN actor.org_unit aou ON aou.id = REPLACE(a.value,'"','')::INTEGER - WHERE a.name IN ('opac.default_pickup_location','opac.default_search_location') GROUP BY 2, 3 ORDER BY 2, 3; - - - - - fund_count - 0 - acq - Migrated Funds - Number of Funds.Migrated - SELECT COUNT(id), x_migrate::TEXT FROM m_acq_fund_legacy GROUP BY 2; - - - - fund_count - 1 - acq - Migrated Funds - Number of Funds - SELECT COUNT(id) FROM m_acq_fund; - - - - invoice_count - 0 - acq - Migrated Invoices - Number of Invoices.Migrated - SELECT COUNT(id), x_migrate::TEXT FROM m_acq_invoice_legacy GROUP BY 2; - - - - invoice_count - 1 - acq - Migrated Invoices - Number of Funds - SELECT COUNT(id) FROM m_acq_invoice; - - - - - serials_mfhd_count - serials - 0 - Migrated Serial MFHDs - Number of MFHDs - SELECT COUNT(id) FROM m_serial_record_entry - - - - - - dedupe_explain - dedupe - dedupe_process.asciidoc - - - - dedupe_bib_groups - dedupe - 0 - Scoring and Bib Record Groups - Count.Bib Record Groups - SELECT COUNT(id), 'Total Bibs Being Evaluated' FROM biblio.record_entry WHERE deleted IS FALSE AND id IN (SELECT eg::BIGINT FROM bib_id_map) - UNION ALL SELECT (COUNT(DISTINCT incoming_bib)), 'Incoming Bibs With Matches Found' FROM bib_matches - UNION ALL SELECT (COUNT(bre.id) - (SELECT COUNT(DISTINCT incoming_bib) FROM bib_matches)), 'Incoming Bibs With No Match' - FROM biblio.record_entry bre WHERE bre.deleted IS FALSE AND bre.id IN (SELECT eg::BIGINT FROM bib_id_map) - UNION ALL SELECT COUNT(DISTINCT incoming_bib), 'Incoming Bibs Being Merged into Incumbent' FROM bib_matches WHERE incumbent_bib_score >= incoming_bib_score - UNION ALL SELECT COUNT(id), 'Incumbent Bibs With Higher Scores to Incoming' FROM bib_matches WHERE incumbent_bib_score > incoming_bib_score - UNION ALL SELECT COUNT(id), 'Incumbent Bibs With Equal Scores to Incoming' FROM bib_matches WHERE incumbent_bib_score = incoming_bib_score - UNION ALL SELECT COUNT(id), 'Incumbent Bibs With Lower Scores to Incoming' FROM bib_matches WHERE incumbent_bib_score < incoming_bib_score - ; - - - - dedupe_format_count - dedupe - 0 - Count of Items Matching By Format - Count.Format(s) - SELECT COUNT(id), search_formats FROM bib_matches GROUP BY 2 ORDER BY 2; - - - - dedupe_score_ranges - dedupe - 0 - Count of Items Matching By Format - Lowest Record Score.Largest Record Score.Record Set - SELECT MIN(incumbent_bib_score), MAX(incumbent_bib_score), 'Incumbent Records' FROM bib_matches - UNION ALL SELECT MIN(incoming_bib_score), MAX(incoming_bib_score), 'Incoming Records' FROM bib_matches ; - - - - - - dedupe_sample_set - dedupe - 0 - Sample of 20 Matching Dedupe Record Sets - Bib Being Merged Into.Bib Being Merged - SELECT incumbent_bib, incoming_bib FROM bib_matches WHERE incumbent_bib_score >= incoming_bib_score LIMIT 20 ; - - - - - - - notices_overview - notices - 0 - Action Triggers Setup for Notices - ID.Active.Owner.Name - SELECT ed.id, ed.active::TEXT, aou.shortname, ed.name - FROM action_trigger.event_definition ed - JOIN actor.org_unit aou ON aou.id = ed.owner - WHERE ed.owner IN (SELECT DISTINCT home_ou FROM m_actor_usr) - OR ed.owner IN (SELECT DISTINCT parent_ou FROM actor.org_unit WHERE id in (SELECT DISTINCT home_ou FROM m_actor_usr)); - - - - - notices_count - notices - 0 - Count of Notices Run with State - Count of Notices.State.ID.Owner.Name - SELECT COUNT(ate.id), ate.state, ed.id, aou.shortname, ed.name - FROM action_trigger.event_definition ed - JOIN actor.org_unit aou ON aou.id = ed.owner - JOIN action_trigger.event ate ON ate.event_def = ed.id - WHERE ed.owner IN (SELECT DISTINCT home_ou FROM m_actor_usr) - OR ed.owner IN (SELECT DISTINCT parent_ou FROM actor.org_unit WHERE id in (SELECT DISTINCT home_ou FROM m_actor_usr)) - GROUP BY 2,3,4; - - - - -
- diff --git a/mig-xml/mapping_reports.xml b/mig-xml/mapping_reports.xml deleted file mode 100644 index 53665d5..0000000 --- a/mig-xml/mapping_reports.xml +++ /dev/null @@ -1,854 +0,0 @@ - - - - - - - evg_m_asset_copy_statuses - Statuses - evergreen - 0 - Copy Count.Migrating Status.New Evergreen Status.Notes - SELECT COUNT(ac.l_id), cs.l_name FROM m_asset_copy_legacy ac JOIN config_copy_status_legacy cs ON cs.l_id = ac.l_status GROUP BY 2 ORDER BY 2 - You only need to fill this sheet out if you use custom statuses that need to be migrated. - - - - evg_m_asset_circ_mods - Circulation Modifiers - evergreen - 0 - Copy Count.Migrating Circ Mod.New Circ Mod.Notes - SELECT COUNT(ac.l_id), ac.l_circ_modifier FROM m_asset_copy_legacy ac GROUP BY 2 ORDER BY 2 - - - - evg_m_asset_copy_locs - Copy Locations - evergreen - 0 - Count.Library.Migrating Copy Location.New Copy Location.Notes - SELECT COUNT(ac.l_id), aou.l_name, acl.l_name FROM m_asset_copy_location_legacy acl JOIN m_actor_org_unit_legacy aou ON aou.l_id = acl.l_owning_lib JOIN m_asset_copy_legacy ac ON ac.l_location = acl.l_id GROUP BY 2, 3 ORDER BY 2, 3 - Any locations not mapped can be moved over as their existing locations. - - - - evg_permission_grps - Permission Groups - evergreen - 0 - Count.Migrating Permission Group.New Permission Group.Notes - SELECT COUNT(au.l_id), pgt.l_name FROM m_actor_usr_legacy au JOIN permission_grp_tree_legacy pgt ON pgt.l_id = au.l_profile GROUP BY 2 ORDER BY 2 - - - - - tlc_load_branches_list - tlc - 0 - Branches Present in Extract - Name.Evergreen Org Unit.Notes - SELECT l_name FROM ctbranches_tsv_clean ORDER BY 1 - - - - tlc_load_m_asset_notes - Item Notes - tlc - 0 - Count.Note Type.Notes - SELECT COUNT(l_itemcomment)::TEXT, 'Item Comments' FROM ititeminformation_tsv_clean WHERE l_itemcomment IS NOT NULL GROUP BY 2 - UNION ALL SELECT COUNT(l_physicalcondition)::TEXT, 'Condition Notes' FROM ctlocitem_tsv_clean WHERE l_physicalcondition IS NOT NULL GROUP BY 2 - UNION ALL SELECT COUNT(l_checkinoutnote)::TEXT, 'Circ Notes' FROM ctlocitem_tsv_clean WHERE l_checkinoutnote IS NOT NULL GROUP BY 2 - UNION ALL (SELECT DISTINCT 'Sample Item Comment', l_itemcomment FROM ititeminformation_tsv_clean WHERE l_itemcomment IS NOT NULL LIMIT 20) - UNION ALL (SELECT DISTINCT 'Sample Physical Condition', l_physicalcondition FROM ctlocitem_tsv_clean WHERE l_physicalcondition IS NOT NULL LIMIT 20) - UNION ALL (SELECT DISTINCT 'Sample Circ Note', l_checkinoutnote FROM ctlocitem_tsv_clean WHERE l_checkinoutnote IS NOT NULL LIMIT 20) - - - - - tlc_load_m_asset_holdings_codes - Holdings Codes - tlc - 0 - Count.Holdings Codes.Evergreen Circulation Modifier.Evergreen Shelving Locatione - SELECT COUNT(l_barcode), l_activeholdingscode FROM ctlocitem_tsv_clean GROUP BY 2 ORDER BY 2 - - - - - tlc_load_m_money_migrating_bills - Migrating Bills By Bill Type - tlc - 0 - Count.Billing Type.Evergreen Bill Type - SELECT COUNT(a.l_chargenumber), b.l_description FROM itpayment_tsv_clean a JOIN itpaymenttype_tsv_clean b ON b.l_paymenttype = a.l_paymenttype WHERE a.l_dueamount::INTEGER - (a.l_waiveamount::INTEGER + a.l_tenderamount::INTEGER) > 0 GROUP BY 2 ORDER BY 2 - - - - tlc_load_usrs_bygroup - Patrons by Agency Type - tlc - 0 - Count.Permission Group.Evergreen Permission Group - SELECT COUNT(l_agencynumber), l_agencytype FROM itagency_tsv_clean GROUP BY 2 ORDER BY 2 - - - - tlc_load_usrs_byexpiration - Patrons by Expiration Date - tlc - 0 - Count.Year of Expiration.Do Not Migrate? - SELECT COUNT(l_agencynumber), LEFT(l_expirationdate,4) FROM itagency_tsv_clean GROUP BY 2 ORDER BY 2 - - - - tlc_load_usrs_byactive - Patrons by Last Active Date - tlc - 0 - Count.Year Last Active.Migrate as Active Flag? - SELECT COUNT(l_agencynumber), LEFT(l_lastactivedate,4) FROM itagency_tsv_clean GROUP BY 2 ORDER BY 2 - We can set the active flag based on this if desired. - - - - tlc_load_usrs_blocks - Patrons by Block Status - tlc - 0 - Count.Block Status.Migration Note - SELECT COUNT(l_agencynumber), l_blockstatus FROM itagency_tsv_clean GROUP BY 2 ORDER BY 2 - - - - tlc_load_usrs_gender - Patrons by Gender - tlc - 0 - Count.Gender.Migrate as Stat Cat - SELECT COUNT(l_agencynumber), l_gender FROM itagency_tsv_clean GROUP BY 2 ORDER BY 2 - Evergreen can load these as statistical categories - - - - tlc_load_active_usr_passwords - Count of Patrons w Passwords - tlc - 0 - Count of NULL PINs.Default Password - SELECT COUNT(l_agencynumber) FROM itagency_tsv_clean WHERE l_pin IS NULL - If any PINS are blank we will have to choose a method to supply them as Evergreen requires passwords. - - - - tlc_load_usr_notes - Count of Patron Notes - tlc - 0 - Count.Note Type.Action - SELECT COUNT(l_agencynumber), 'Patron Note' FROM itagency_tsv_clean WHERE l_agencycomment IS NOT NULL UNION ALL SELECT COUNT(l_agencynumber), 'Address Comments' FROM itaddress_tsv_clean WHERE l_addresscomment IS NOT NULL - - - - tlc_load_usr_balance - Count and Sum of Balances - tlc - 0 - Count.SUM in Pennies.Migrate? - SELECT COUNT(l_agencynumber), SUM(l_accountbalance::INTEGER) FROM itagency_tsv_clean WHERE l_accountbalance != '0' - If this is being migrated there are a few options of how to do it and each will have different workflows. - - - - tlc_load_usr_stat_cats - Patron Stat Cats and Counts - tlc - 0 - Patron Count.Stat Cat.Migrate? - SELECT COUNT(b.l_agencynumber), a.l_description FROM itagencyfields_tsv_clean a JOIN itagencydata_tsv_clean b ON b.l_agencyfieldnumber = a.l_agencyfieldnumber GROUP BY 2 ORDER BY 1 - - - - - - destiny_load_usr_by_gradelevel - Patrons by Destiny Grade Level - destiny - 0 - Count.Graduation Year - SELECT COUNT(*), grade_level FROM patrons_csv GROUP BY 2 ORDER BY 2 - Transfer to note or stat cat? - - - - destiny_load_usr_by_gender - Patrons by Destiny Gender - destiny - 0 - Count.Gender - SELECT COUNT(*), gender FROM patrons_csv GROUP BY 2 ORDER BY 2 - Transfer to stat cat? - - - - destiny_load_usr_by_patrontype - Patrons by Destiny Patron Type - destiny - 0 - Count.Patron Type.Permission Group - SELECT COUNT(*), patron_type FROM patrons_csv GROUP BY 2 ORDER BY 2 - - - - destiny_load_usr_by_status - Patrons by Destiny Status - destiny - 0 - Count.Status - SELECT COUNT(*), status FROM patrons_csv GROUP BY 2 ORDER BY 2 - - - - destiny_load_usr_by_municipality - Patrons by Municipality - destiny - 0 - Count.Municipality - SELECT COUNT(*), municipality FROM patrons_csv GROUP BY 2 ORDER BY 2 - State cat? - - - - destiny_load_usr_notes - Types of and Counts of Notes - destiny - 0 - Type of Note.Count - SELECT 'General Note', COUNT(*) FROM patrons_csv WHERE note_general IS NOT NULL and note_general != '' UNION ALL SELECT 'Important Note', COUNT(*) FROM patrons_csv WHERE note_important IS NOT NULL and note_important != '' - Messsage / alert / note? - - - - destiny_load_usr_userdefined5 - User Defined Field 5 - destiny - 0 - Count.Values - SELECT COUNT(*), user_defined_5 FROM patrons_csv GROUP BY 2 ORDER BY 2 - Retain somewhere? - - - - destiny_load_usrs_pswdconfigured - Patrons by Password Configured - destiny - 0 - Count.Password Configured - SELECT COUNT(*), password_configured FROM patrons_csv GROUP BY 2 ORDER BY 2 - Need how they want passwords set since we don't have them to migrate and do they want these that are configured with some special note? - - - - destiny_load_usrs_phonefields - Phone Fields - destiny - 0 - Phone Field.Count.Evergreen Phone Field - SELECT 'Primary Phone 1', COUNT(*) FROM patrons_csv WHERE primary_phone_1 IS NOT NULL AND primary_phone_1 != '' UNION ALL SELECT 'Primary Phone 2', COUNT(*) FROM patrons_csv WHERE primary_phone_2 IS NOT NULL AND primary_phone_2 != '' UNION ALL SELECT 'Secondary Phone 1', COUNT(*) FROM patrons_csv WHERE secondary_phone_1 IS NOT NULL AND secondary_phone_1 != '' UNION ALL SELECT 'Secondary Phone 2', COUNT(*) FROM patrons_csv WHERE secondary_phone_2 IS NOT NULL AND secondary_phone_2 != '' - - - - destiny_load_m_asset_categories - Count of Categories - destiny - 0 - Count.Category.Circ Mod? - SELECT COUNT(*), category FROM copies_csv GROUP BY 2 ORDER BY 2 - - - - destiny_load_m_asset_notes - Copies by Note Types - destiny - 0 - Note Type.Count - SELECT 'General Note', COUNT(*) FROM copies_csv WHERE note_general IS NOT NULL and note_general != '' UNION ALL SELECT 'Important Note', COUNT(*) FROM copies_csv WHERE note_important IS NOT NULL and note_important != '' - Retain? - - - - destiny_load_m_asset_sublocation - Copies by Sub Location - destiny - 0 - Count.Sub Location.Shelving Location? - SELECT COUNT(*), sublocation FROM copies_csv GROUP BY 2 ORDER BY 2 - - - - destiny_load_m_asset_vendor - Copies by Vendor - destiny - 0 - Count.Vendor - SELECT COUNT(*), vendor FROM copies_csv GROUP BY 2 ORDER BY 2 - Retain? - - - - destiny_load_m_asset_descriptions - Copies with Description Fields - destiny - 0 - Description Field.Count - SELECT 'Description Field 1', COUNT(*) FROM copies_csv WHERE description_1 IS NOT NULL and description_1 != '' UNION ALL SELECT 'Description Field 2', COUNT(*) FROM copies_csv WHERE description_2 IS NOT NULL and description_2 != '' UNION ALL SELECT 'Description Field 3', COUNT(*) FROM copies_csv WHERE description_3 IS NOT NULL and description_3 != '' - Need report? Retain? - - - - destiny_load_fines_byreason - destiny - 0 - Fines by Reason - Count.Reason - SELECT COUNT(*), reason FROM fines_csv GROUP BY 2 ORDER BY 2 - - - - - - - circ_bystatus - Circulations by Status - apollo - 0 - Count.Status.Type - SELECT COUNT(id), l_status, l_type FROM m_action_circulation_legacy GROUP BY 2, 3 - Circulations will only not be migrated if they can't be attached to a migrated patron and holding. - - - - hold_bystatus - Holds by Status - apollo - 0 - Count.Status - SELECT COUNT(id), l_status FROM m_action_hold_request_legacy GROUP BY 2 - Only unfilled holds are being migrated. - - - - m_asset_pending_bibs - Pending Records - apollo - 0 - Count.Year of Last Edit.Count of Copies Attached - SELECT COUNT(bre.id), CASE WHEN LENGTH(bre.l_edited) > 1 THEN EXTRACT(YEAR FROM bre.l_edited::TIMESTAMP) ELSE '1900' END, COUNT(ac.id) FROM m_biblio_record_entry_legacy bre LEFT JOIN m_asset_copy_legacy ac ON ac.l_biblio = bre.l_id WHERE bre.l_status = 'pending' GROUP BY 2 ORDER BY 2 - - - - - m_asset_copies_by_status - Copies by Status - apollo - 0 - Count.Status - SELECT COUNT(id), l_status FROM m_asset_copy_legacy GROUP BY 2 ORDER BY 2 - Deleted copies with a delete date were not exported additionally those with the deleted status won't get loaded. - - - - m_asset_pending_copies - Pending Copies by Last Edit - apollo - 0 - Count.Last Edited - SELECT COUNT(id), CASE WHEN LENGTH(l_edited) > 1 THEN EXTRACT(YEAR FROM l_edited::TIMESTAMP) ELSE '1900' END FROM m_asset_copy_legacy WHERE l_status = 'pending' GROUP BY 2 ORDER BY 2 - - - - assets_by_memberships - Copies by Memberships - apollo - 0 - Count.Membership Number.Membership Name - SELECT COUNT(ac.id), acl.l_membership_number, acl.l_membership_name FROM m_asset_copy_legacy ac JOIN m_asset_copy_location_legacy acl ON acl.l_membership_name = ac.l_memberships GROUP BY 2,3 ORDER BY 2 - - - - - m_money_bills - Bills - apollo - 0 - Count.Status - SELECT COUNT(id), l_status FROM m_money_billing_legacy GROUP BY 2 ORDER BY 2 - Unless there is a good reason to do so forgiven and paid bills will not be migrated. - - - - m_actor_groups - Patron Membership Groups - apollo - 0 - Membership List Name.Membership List ID.Membership Number.Membership Name.Membership Length in Monthst - SELECT l_membership_list_name, l_membership_list_id, l_membership_number, l_membership_name, l_membership_length_months FROM m_actor_usr_legacy_groups_tsv ORDER BY 1, 3 - Age looks like a good target for a stat cat and / or juvenile setting while the patron type can map to profiles. - - - - m_actor_by_groups - Patrons by Membership Groups - apollo - 0 - Count.Membership List Name.Membership Number - SELECT COUNT(*), l_membership_list_name, l_membership_number FROM (SELECT id, UNNEST(STRING_TO_ARRAY(l_memberships,'|')) AS m FROM m_actor_usr_legacy ) x JOIN m_actor_usr_legacy_groups_tsv t ON t.l_membership_name = x.m GROUP BY 2, 3 ORDER BY 2, 3 - - - - m_actor_addresses_nulls - Patron Addresses - apollo - 0 - Address Field.Nulls - SELECT 'Street Address', COUNT(id) FROM m_actor_usr_address_legacy WHERE l_lines IS NULL UNION ALL SELECT 'City', COUNT(id) FROM m_actor_usr_address_legacy UNION ALL SELECT 'State', COUNT(id) FROM m_actor_usr_address_legacy WHERE l_country_division IS NULL UNION ALL SELECT 'Postal Code', COUNT(id) FROM m_actor_usr_address_legacy WHERE l_postal_code IS NULL - If any of these fields are null then we need defaults to fill in, note the extract had no city data. - - - - m_actor_phones - Patron Phones - apollo - 0 - Count.Type - SELECT COUNT(*), l_type FROM m_actor_usr_phones_tsv GROUP BY 2 ORDER BY 2 - These need to be mapped to Evergreen phone types. - - - - - - - hz_borrowersbybtypes - Borrowers by Borrower Types - horizon - 0 - Count.Borrower Type - SELECT COUNT(id), l_borrower_btype FROM m_actor_usr_legacy GROUP BY 2 ORDER BY 2; - - - - hz_borrowersbybtypes - Borrowers by Borrower Types - horizon2 - 0 - Count.Borrower Type.Description - SELECT COUNT(*), a.btype, b.descr FROM borrower_csv_clean a JOIN btype_csv_clean b ON b.btype = a.btype GROUP BY 2, 3 ORDER BY 2; - - - - hz_borrowerpincount - Borrower PINs Count - horizon - 0 - Count of Migratable Passwords / PINs - SELECT COUNT(l_borrower_pin) FROM m_actor_usr_legacy WHERE LENGTH(l_borrower_pin) > 1; - - - - hz_borrowerpincount - Borrower PINs Count - horizon2 - 0 - Count of Migratable Passwords / PINs - SELECT COUNT(pin) FROM borrower_csv_clean WHERE LENGTH(pin) > 1; - - - - hz_blocks - Borrower Blocks - horizon2 - 0 - Count of Entries.Block Description - SELECT COUNT(*), b.descr FROM burb_csv_clean a JOIN block_csv_clean b ON a.block = b.block GROUP BY 2; - - - - hz_borrowernotesample - Borrower Note Field Samples - horizon - 0 - Sample of Migratable Notes - SELECT l_borrower_borrower_note FROM m_actor_usr_legacy WHERE LENGTH(l_borrower_borrower_note) > 1 LIMIT 20; - - - - hz_borrowernotesample - Borrower Note Field Samples - horizon2 - 0 - Sample of Migratable Notes - SELECT borrower_note FROM borrower_csv_clean WHERE LENGTH(borrower_note) > 1 LIMIT 20; - - - - hz_borrowernotescount - Count of Migratable Borrower Notes - horizon - 0 - Count - SELECT COUNT(l_borrower_borrower_note) FROM m_actor_usr_legacy WHERE LENGTH(l_borrower_borrower_note) > 1; - - - - hz_borrowernotescount - Count of Migratable Borrower Notes - horizon2 - 0 - Count - SELECT COUNT(borrower_note) FROM borrower_csv_clean WHERE LENGTH(borrower_note) > 1; - - - - hz_borrowernotesample2 - Borrower Note Field 2 Samples - horizon - 0 - Count - SELECT l_borrower_note2 FROM m_actor_usr_legacy WHERE LENGTH(l_borrower_note2) > 1 LIMIT 20; - - - - hz_borrowernotesample3 - Borrower Note Field 3 Samples - horizon - 0 - Count - SELECT l_borrower_note3 FROM m_actor_usr_legacy WHERE LENGTH(l_borrower_note3) > 1 LIMIT 20; - - - - hz_phones - Borrower Phones - horizon - 0 - Count.Borrower Phone Type - - SELECT COUNT(*), b - FROM (SELECT l_borrower_phone_1_phone_type AS b FROM m_actor_usr_legacy - UNION ALL SELECT l_borrower_phone_2_phone_type AS b FROM m_actor_usr_legacy - UNION ALL SELECT l_borrower_phone_3_phone_type AS b FROM m_actor_usr_legacy - UNION ALL SELECT l_borrower_phone_4_phone_type AS b FROM m_actor_usr_legacy) x - GROUP BY 2 ORDER BY 2 - - - - - hz_phones - Borrower Phones - horizon2 - 0 - Count.Borrower Phone Position - - SELECT COUNT(*), '0' FROM borrower_phone_csv_clean WHERE ord = '0' - UNION ALL SELECT COUNT(*), '1' FROM borrower_phone_csv_clean WHERE ord = '1' - UNION ALL SELECT COUNT(*), '2' FROM borrower_phone_csv_clean WHERE ord = '2' - UNION ALL SELECT COUNT(*), '3' FROM borrower_phone_csv_clean WHERE ord = '3' - UNION ALL SELECT COUNT(*), '4' FROM borrower_phone_csv_clean WHERE ord = '4' - - - - - hz_bstats - Borrower B-Stats - horizon - 0 - Count.BStat - SELECT COUNT(*), b - FROM (SELECT l_borrower_bstat_1_bstat AS b FROM m_actor_usr_legacy - UNION ALL - SELECT l_borrower_bstat_2_bstat AS b FROM m_actor_usr_legacy - UNION ALL - SELECT l_borrower_bstat_3_bstat AS b FROM m_actor_usr_legacy) x - GROUP BY 2 ORDER BY 1; - - - - - hz_bstats - Borrower B-Stats - horizon2 - 0 - Count.B-Stat.Description - SELECT COUNT(*), a.bstat, b.descr FROM borrower_bstat_csv_clean a JOIN bstat_csv_clean b ON b.bstat = a.bstat GROUP BY 2, 3; - - - - - hz_copybycollection - Copies by Collection - horizon - 0 - Count.Collection - SELECT COUNT(id), l_collection FROM m_asset_copy_legacy GROUP BY 2 ORDER BY 2; - - - - hz_copybycollection - Copies by Collection - horizon2 - 0 - Count.Collection.Description.PAC Description - SELECT COUNT(*), a.collection, c.descr, c.pac_descr FROM item_csv_clean a JOIN collection_csv_clean c ON c.collection = a.collection GROUP BY 2, 3, 4 ORDER BY 2, 3, 4; - - - - hz_itemsbyitype - Items by IType - horizon - 0 - Count.Item Type (itype) - SELECT COUNT(id), l_itype FROM m_asset_copy_legacy GROUP BY 2 ORDER BY 2; - - - - hz_itemsbyitype - Items by IType - horizon2 - 0 - Count.Item Type (itype).Description - SELECT COUNT(*), a.itype, b.descr FROM item_csv_clean a JOIN itype_csv_clean b ON b.itype = a.itype GROUP BY 2, 3 ORDER BY 2; - - - - hz_internalnotescount - Internal/Check In Item Notes - horizon - 0 - Count - SELECT COUNT(l_internal_note) FROM m_asset_copy_legacy WHERE LENGTH(l_internal_note) > 1; - - - - hz_internalnotescount - Internal/Check In Item Notes - horizon2 - 0 - Count - SELECT COUNT(cki_notes) FROM item_csv_clean WHERE LENGTH(cki_notes) > 1; - - - - hz_internalnotesample - Internal/Check In Item Notes Sample - horizon - 0 - Count - SELECT l_internal_note FROM m_asset_copy_legacy WHERE LENGTH(l_internal_note) > 1 LIMIT 20; - - - - hz_internalnotesample - Internal/Check In Item Notes Sample - horizon2 - 0 - Count - SELECT cki_notes FROM item_csv_clean WHERE LENGTH(cki_notes) > 1 LIMIT 20; - - - - hz_burbbills - Count of Bills by Type - horizon2 - 0 - Count.Bill Type - SELECT COUNT(*), block FROM burb_csv_clean WHERE amount::INTEGER > 0 GROUP BY 2; - - - - - - rm_load_circ_count - rm - 0 - Circs by Status - Count of Circs.Status - SELECT COUNT(id), l_is_checked_out FROM m_asset_copy_legacy GROUP BY 2 - - - - rm_load_m_asset_by_resource_type - Resource Type - rm - 0 - Count.Resource Type - SELECT COUNT(*), l_resource_type FROM m_asset_copy_legacy GROUP BY 2 ORDER BY 2 - - - - rm_load_m_asset_by_location - Copies by Location - rm - 0 - Count.Location - SELECT COUNT(*), l_location FROM m_asset_copy_legacy GROUP BY 2 ORDER BY 2 - - - - rm_load_m_asset_by_category - Copies by Category - rm - 0 - Count.Category - SELECT COUNT(*), l_category FROM m_asset_copy_legacy GROUP BY 2 ORDER BY 2 - - - - rm_load_m_asset_by_status - Copies by Status - rm - 0 - Count.Status - SELECT COUNT(*), l_status FROM m_asset_copy_legacy GROUP BY 2 ORDER BY 2 - - - - rm_m_actor_groups - Patrons by User Groups - rm - 0 - Count.Group - SELECT COUNT(id), l_user_group FROM m_actor_usr_legacy GROUP BY 2 ORDER BY 2; - - - - - rm_m_actor_access - Patrons by Access Field - rm - 0 - Count.Access - SELECT COUNT(id), l_access_if_applicable FROM m_actor_usr_legacy GROUP BY 2 ORDER BY 2; - - - - - rm_m_actor_comments - Patron Comments - rm - 0 - Count.Sample - SELECT COUNT(id), 'All Comments' FROM m_actor_usr_legacy WHERE LENGTH(l_comments) > 1 - UNION ALL SELECT NULL, l_comments FROM m_actor_usr_legacy WHERE LENGTH(l_comments) > 1 LIMIT 10 - - - - - rm_m_actor_circulation_note - Patron Circ Notes - rm - 0 - Count.Sample - SELECT COUNT(id), 'All Notes' FROM m_actor_usr_legacy WHERE LENGTH(l_circulation_note) > 1 - UNION ALL SELECT NULL, l_circulation_note FROM m_actor_usr_legacy WHERE LENGTH(l_circulation_note) > 1 LIMIT 10 - - - - - -- 1.7.2.5