X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=mig;fp=mig;h=ea6cfd1e8078a4ab7d506c9159b7e1ef4a09d750;hp=0000000000000000000000000000000000000000;hb=f9201dc2d1699f5161e5e29690a1634e8063bb85;hpb=d1812fa8c4c9e220978d650adb3611c978a2a56b diff --git a/mig b/mig new file mode 100755 index 0000000..ea6cfd1 --- /dev/null +++ b/mig @@ -0,0 +1,345 @@ +#!/usr/bin/perl -w +############################################################################### +=pod + +=head1 NAME + +mig - git-like program for tracking and manipulating legacy data files for +migrations + +=head1 SYNOPSIS + +B [argument] [...] + +=head1 DESCRIPTION + +B is used to track and manipulate CSV or CSV-like text files exported from +legacy systems for migration into Evergreen. It can be a wrapper for some +other migration tools and tracks state using a PostgreSQL table in a given +migration schema. + +It makes use of certain environment variables that may be set by the B +tool: PGHOST, PGPORT, PGUSER, PGDATABASE, MIGSCHEMA, and MIGWORKDIR + +For most commands, if the current working directory falls outside of the +directory specified by MIGWORKDIR, then mig will assume that environment is +also incorrect and bail before doing any actual work. + +~/.pgpass should also be configured, as B will not prompt for a database +password. + +Only the B and B commands work without the MIGSCHEMA environment +variable being set. + +=head1 OVERVIEW + +Using B should go something like this: + +=over 15 + +=item mig env create m_foo # Sets up the environment + +=item mig env use m_foo # Spawns a shell using the configured environment + +=item mig init # creates the m_foo schema in the database if needed, and other tables + +=item mig add patrons.tsv # tracks an incoming data file; repeat for additional files + +=item mig iconv patrons.tsv # convert it to UTF8, creating patrons.tsv.utf8 + +=item mig clean patrons.tsv # cleans the file, creating patrons.tsv.utf8.clean + +=item mig link patrons.tsv actor_usr # makes the soon-to-be staging table a child of m_foo.actor_usr + +=item mig convert patrons.tsv # creates a .sql file for staging the data + +=item mig stage patrons.tsv # load said .sql file + +=item mig mapper patrons.tsv # interactive tool for analyzing/mapping the staging table + +=item mig analysis patrons.tsv # writes a summary .tsv file of mapped/flagged fields from the staging table + +=item mig map patrons.tsv # apply configured mappings + +=item mig write_prod patrons.tsv # creates a .sql file for pushing the staging data into production + +=back + +=head1 COMMANDS + +=over 15 + +=item B [command] + +Display this very same documentation, or specific documentation for one of the +commands listed here. + +=item B + +Invokes B with the same arguments. I can set important +environment variables and spawn a shell with those variables, and it also does +some directory creation and symlinking. + +=item B + +Create or re-create the PostgreSQL tracking table for the schema specified by +the MIGSCHEMA environment variable. If needed, create the migration schema +itself and run migration_tools.init() and build() if the migration_tools schema +exists. + +=item B [file] [...] + +Show status information for either the specified files or all tracked files if +no argument is given. + +=item B [--no-headers|--headers] [file|--no-headers|--headers] [...] + +Add the specified files to the migration tracker. Until --no-headers is +specified, the tracker will assume the files have headers. + +You can do crazy stuff like +B + +=item B [file] [...] + +Remove the specified files from the migration tracker. + +=item B [other arguments...] + +Attempts to invoke B on the specified tracked file, placing the output in +.utf8 + +If given no other arguments, the invocation will lool like + +=over 5 + +iconv -f ISO-8859-1 -t UTF-8 -o .utf8 + +=back + +otherwise, the arguments will be passed through like so + +=over 5 + +iconv [other arguments...] -o .utf8 + +=back + +=item B + +If this is used instead of B, then B will look for an existing +.utf8 and use it instead of attempting to create one. + +=item B [other arguments...] + +Attempts to invoke B on the iconv-converted specified tracked file, +placing the output in .utf8.clean + +If given no other arguments, the invocation will lool like + +=over 5 + +clean_csv --config scripts/clean.conf --fix --apply <--create-headers> + +=back + +otherwise, the arguments will be passed through like so + +=over 5 + +clean_csv [other arguments...] + +=back + +=item B + +If this is used instead of B, then B will look for an existing +.utf8.clean and use it instead of attempting to create one. + +=item B + +Associate the specified file with a parent table within the migration schema. + +Linking multiple files to the same parent table is not allowed currently. + +=item B + +Removes any association between the specified file and a parent table within +the migration schema. + +=item B + +Attempts to invoke B on the .utf8.clean version of the specified +tracked file, creating either [file].utf8.clean.stage.sql or +_stage.sql depending on whether the file has been linked to a +parent table within the migration schema or not. + +If given no other arguments, the invocation will lool like + +=over 5 + +csv2sql --config scripts/clean.conf --add-x-migrate --schema [--parent ] -o <[.utf8.clean.stage.sql]|[parent_table_stage.sql]> .utf8.clean + +=back + +otherwise, the arguments will be passed through like so + +=over 5 + +csv2sql [other arguments...] -o <[.utf8.clean.stage.sql]|[parent_table_stage.sql]> .utf8.clean + +=back + +=item B [other arguments...] + +Load the SQL-converted version of the specified file into the migration schema. + +Extra arguments are passed to the underlying call to psql + +=item B + +Interactive session for analyzing, flagging, and mapping legacy field data to +Evergreen fields. + +Upon exit, generate either [file].clean.map.sql or _map.sql. The +SQL generated will be UPDATE's for setting the Evergreen-specific columns for a +given file's staging tables, and TRUNCATE's and INSERT's for auxilary tables. +The files will have \include hooks for pulling in additional mapping files +(for example, end-user mappings for circ modifiers, etc.) + +=item B [file] + +Writes a MIGSCHEMA.tsv file containing a break-down of mapped and flagged +fields from the specified file, or all staged files if no file is specified. + +The main goal of the tsv file is to present end-user mappable data for circ +modifiers, shelving locations, patron profiles, etc. We use spreadsheets for +this now but may move to a dedicated UI in the future. + +=item B [file] + +Applies the mapping sql to the migration schema for the specified mapped file, +or for all mapped files if no file is specified. + +=item B [file] + +Generates _prod.sql for the specified linked and mapped file, or +all such files if no file is specified. + +=back + +=cut + +############################################################################### + +use strict; +use Switch; +use Env qw( + HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA + MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR +); +use Pod::Usage; +use FindBin; +my $mig_bin = "$FindBin::Bin/mig-bin/"; +use lib "$FindBin::Bin/mig-bin"; +use Mig; + +pod2usage(-verbose => 2) if ! $ARGV[0]; +switch($ARGV[0]) { + case "help" { + if (defined $ARGV[1]) { + my $cmd = $mig_bin . "mig-$ARGV[1]"; + if (-e $cmd) { + system( $mig_bin . "mig-$ARGV[1]", '--help' ); + } else { + pod2usage(-verbose => 2); + } + } else { + pod2usage(-verbose => 2); + } + } + case "env" { + standard_invocation(@ARGV); + } + case "init" { + Mig::die_if_no_env_migschema(); + standard_invocation(@ARGV); + } + case "status" { + Mig::die_if_no_env_migschema(); + standard_invocation(@ARGV); + } + case "add" { + Mig::die_if_no_env_migschema(); + standard_invocation(@ARGV); + } + case "remove" { + Mig::die_if_no_env_migschema(); + standard_invocation(@ARGV); + } + case "iconv" { + Mig::die_if_no_env_migschema(); + standard_invocation(@ARGV); + } + case "skip-iconv" { + Mig::die_if_no_env_migschema(); + standard_invocation(@ARGV); + } + case "clean" { + Mig::die_if_no_env_migschema(); + standard_invocation(@ARGV); + } + case "skip-clean" { + Mig::die_if_no_env_migschema(); + standard_invocation(@ARGV); + } + case "link" { + Mig::die_if_no_env_migschema(); + standard_invocation(@ARGV); + } + case "unlink" { + Mig::die_if_no_env_migschema(); + standard_invocation(@ARGV); + } + case "convert" { + Mig::die_if_no_env_migschema(); + standard_invocation(@ARGV); + } + case "stage" { + Mig::die_if_no_env_migschema(); + standard_invocation(@ARGV); + } + case "mapper" { + Mig::die_if_no_env_migschema(); + standard_invocation(@ARGV); + } + case "quicksheet" { + Mig::die_if_no_env_migschema(); + standard_invocation(@ARGV); + } + case "map" { + Mig::die_if_no_env_migschema(); + } + case "load" { + Mig::die_if_no_env_migschema(); + } + case "wdir" { + print "$MIGWORKDIR\n"; + } + case "gdir" { + print "$MIGBASEGITDIR\n"; + } + case "sdir" { + print "$MIGGITDIR\n"; + } + else { + pod2usage(1); + } +} + +sub standard_invocation { + my $cmd = shift; + system( $mig_bin . "mig-$cmd", @_ ); +} + +