mig

   1 #!/usr/bin/perl -w
   2 ###############################################################################
   3 =pod
   4
   5 =head1 NAME
   6
   7 mig - git-like program for tracking and manipulating legacy data files for
   8 migrations
   9
  10 =head1 SYNOPSIS
  11
  12 B<mig> <command> [argument] [...]
  13
  14 =head1 DESCRIPTION
  15
  16 B<mig> is used to track and manipulate CSV or CSV-like text files exported from
  17 legacy systems for migration into Evergreen.  It can be a wrapper for some
  18 other migration tools and tracks state using a PostgreSQL table in a given
  19 migration schema.
  20
  21 It makes use of certain environment variables that may be set by the B<mig-env>
  22 tool: PGHOST, PGPORT, PGUSER, PGDATABASE, MIGSCHEMA, and MIGWORKDIR
  23
  24 For most commands, if the current working directory falls outside of the
  25 directory specified by MIGWORKDIR, then mig will assume that environment is
  26 also incorrect and bail before doing any actual work.
  27
  28 ~/.pgpass should also be configured, as B<mig> will not prompt for a database
  29 password.
  30
  31 Only the B<env> and B<help> commands work without the MIGSCHEMA environment
  32 variable being set.
  33
  34 =head1 OVERVIEW
  35
  36 Using B<mig> should go something like this:
  37
  38 =over 15
  39
  40 =item mig env create m_foo # Sets up the environment
  41
  42 =item mig env use m_foo # Spawns a shell using the configured environment
  43
  44 =item mig init # creates the m_foo schema in the database if needed, and other tables
  45
  46 =item mig add patrons.tsv # tracks an incoming data file; repeat for additional files
  47
  48 =item mig iconv patrons.tsv # convert it to UTF8, creating patrons.tsv.utf8
  49
  50 =item mig clean patrons.tsv # cleans the file, creating patrons.tsv.utf8.clean
  51
  52 =item mig link patrons.tsv actor_usr # makes the soon-to-be staging table a child of m_foo.actor_usr
  53
  54 =item mig convert patrons.tsv # creates a .sql file for staging the data
  55
  56 =item mig stage patrons.tsv # load said .sql file
  57
  58 =item mig mapper patrons.tsv # interactive tool for analyzing/mapping the staging table
  59
  60 =item mig analysis patrons.tsv # writes a summary .tsv file of mapped/flagged fields from the staging table
  61
  62 =item mig map patrons.tsv # apply configured mappings
  63
  64 =item mig write_prod patrons.tsv # creates a .sql file for pushing the staging data into production
  65
  66 =item mig reporter --analyst "Foo Fooer" --report_title "Foo Load Analysis" #creates an asciidoc report
  67
  68 =item mig gsheet --pull foo_tab_name OR --push foo_pg_table_name
  69
  70 =back
  71
  72 =head1 COMMANDS
  73
  74 =over 15
  75
  76 =item B<help> [command]
  77
  78 Display this very same documentation, or specific documentation for one of the
  79 commands listed here.
  80
  81 =item B<env> <create|use|show> <schema>
  82
  83 Invokes B<mig-env> with the same arguments.  I<mig-env> can set important
  84 environment variables and spawn a shell with those variables, and it also does
  85 some directory creation and symlinking.
  86
  87 =item B<init>
  88
  89 Create or re-create the PostgreSQL tracking table for the schema specified by
  90 the MIGSCHEMA environment variable.  If needed, create the migration schema
  91 itself and run migration_tools.init() and build() if the migration_tools schema
  92 exists.
  93
  94 =item B<status> [file] [...]
  95
  96 Show status information for either the specified files or all tracked files if
  97 no argument is given.
  98
  99 =item B<add> [--no-headers|--headers] <file> [file|--no-headers|--headers] [...]
 100
 101 Add the specified files to the migration tracker.  Until --no-headers is
 102 specified, the tracker will assume the files have headers.
 103
 104 You can do crazy stuff like
 105 B<mig add file1 --no-headers file2 file3 --headers file4>
 106
 107 =item B<remove> <file> [file] [...]
 108
 109 Remove the specified files from the migration tracker.
 110
 111 =item B<iconv> <file> [other arguments...]
 112
 113 Attempts to invoke B<iconv> on the specified tracked file, placing the output in
 114 <file>.utf8
 115
 116 If given no other arguments, the invocation will lool like
 117
 118 =over 5
 119
 120 iconv -f ISO-8859-1 -t UTF-8 -o <file>.utf8 <file>
 121
 122 =back
 123
 124 otherwise, the arguments will be passed through like so
 125
 126 =over 5
 127
 128 iconv [other arguments...] -o <file>.utf8 <file>
 129
 130 =back
 131
 132 =item B<skip-iconv> <file>
 133
 134 If this is used instead of B<iconv>, then B<mig> will look for an existing
 135 <file>.utf8 and use it instead of attempting to create one.
 136
 137 =item B<clean> <file> [other arguments...]
 138
 139 Attempts to invoke B<clean_csv> on the iconv-converted specified tracked file,
 140 placing the output in <file>.utf8.clean
 141
 142 If given no other arguments, the invocation will lool like
 143
 144 =over 5
 145
 146 clean_csv --config scripts/clean.conf --fix --apply <--create-headers> <file>
 147
 148 =back
 149
 150 otherwise, the arguments will be passed through like so
 151
 152 =over 5
 153
 154 clean_csv [other arguments...] <file>
 155
 156 =back
 157
 158 =item B<skip-clean> <file>
 159
 160 If this is used instead of B<clean>, then B<mig> will look for an existing
 161 <file>.utf8.clean and use it instead of attempting to create one.
 162
 163 =item B<link> <file> <parent table>
 164
 165 Associate the specified file with a parent table within the migration schema.
 166
 167 Linking multiple files to the same parent table is not allowed currently.
 168
 169 =item B<unlink> <file>
 170
 171 Removes any association between the specified file and a parent table within
 172 the migration schema.
 173
 174 =item B<convert> <file>
 175
 176 Attempts to invoke B<csv2sql> on the .utf8.clean version of the specified
 177 tracked file, creating either [file].utf8.clean.stage.sql or
 178 <parent table>_stage.sql depending on whether the file has been linked to a
 179 parent table within the migration schema or not.
 180
 181 If given no other arguments, the invocation will lool like
 182
 183 =over 5
 184
 185 csv2sql --config scripts/clean.conf --add-x-migrate --schema <MIGSCHEMA> [--parent <PARENT TABLE>] -o <[<file>.utf8.clean.stage.sql]|[parent_table_stage.sql]> <FILE>.utf8.clean
 186
 187 =back
 188
 189 otherwise, the arguments will be passed through like so
 190
 191 =over 5
 192
 193 csv2sql [other arguments...] -o <[<file>.utf8.clean.stage.sql]|[parent_table_stage.sql]> <file>.utf8.clean
 194
 195 =back
 196
 197 =item B<stage> <file> [other arguments...]
 198
 199 Load the SQL-converted version of the specified file into the migration schema.
 200
 201 Extra arguments are passed to the underlying call to psql
 202
 203 =item B<mapper> <file>
 204
 205 Interactive session for analyzing, flagging, and mapping legacy field data to
 206 Evergreen fields.
 207
 208 Upon exit, generate either [file].clean.map.sql or <parent table>_map.sql. The
 209 SQL generated will be UPDATE's for setting the Evergreen-specific columns for a
 210 given file's staging tables, and TRUNCATE's and INSERT's for auxilary tables.
 211 The files will have \include hooks for pulling in additional mapping files
 212 (for example, end-user mappings for circ modifiers, etc.)
 213
 214 =item B<analysis> [file]
 215
 216 Writes a MIGSCHEMA.tsv file containing a break-down of mapped and flagged
 217 fields from the specified file, or all staged files if no file is specified.
 218
 219 The main goal of the tsv file is to present end-user mappable data for circ
 220 modifiers, shelving locations, patron profiles, etc.  We use spreadsheets for
 221 this now but may move to a dedicated UI in the future.
 222
 223 =item B<map> [file]
 224
 225 Applies the mapping sql to the migration schema for the specified mapped file,
 226 or for all mapped files if no file is specified.
 227
 228 =item B<write_prod> [file]
 229
 230 Generates <parent table>_prod.sql for the specified linked and mapped file, or
 231 all such files if no file is specified.
 232
 233 =item B<sql> [arguments...]
 234
 235 A wrapper around the psql command.  At some point the plan is to shove mig-tracked variables into psql sessions.
 236
 237 =item B<reporter> --analyst "Analyst Name" --report_title "Report Title"
 238
 239 Generates an asciidoc file in the git working directory that can be converted to
 240 any appropriate format.  The analyst and report parameters are required.
 241
 242 Optional parameters are :
 243
 244 --added_page_title and --added_page_file
 245
 246 If one is used both must be.  The added page file can be plain text or asciidoc.  This
 247 adds an extra arbitrary page of notes to the report.  Mig assumes the page file is in the mig git directory.
 248
 249 --tags
 250
 251 This will define a set of tags to use, if not set it will default to Circs,
 252 Holds, Actors, Bibs, Assets & Money.
 253
 254 --reports_xml
 255
 256 Allows you to override the default evergreen_staged_report.xml in the mig-xml folder.
 257
 258 =item B<gsheet> --pull spreadsheet_tab or --push postgres_table
 259
 260 This uses the gsheet_tracked_table and gsheet_tracked column tables to map a Google Docs Spreadsheet tabs
 261 with Postgres tables in the mig schema.  Multiple spreadsheets can be used but tab names must be unique.
 262 Each spreadsheet column needs a header that matches the column name in the matching table.  An oauth
 263 session key is also needed for your Google account and mig gsheet will look for it in the .mig directory.
 264
 265 =back
 266
 267 =cut
 268
 269 ###############################################################################
 270
 271 use strict;
 272 use Switch;
 273 use Env qw(
 274     HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA
 275     MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR
 276 );
 277 use Pod::Usage;
 278 use FindBin;
 279 my $mig_bin = "$FindBin::Bin/mig-bin/";
 280 use lib "$FindBin::Bin/mig-bin";
 281 use Mig;
 282
 283 pod2usage(-verbose => 2) if ! $ARGV[0];
 284 switch($ARGV[0]) {
 285     case "help" {
 286         if (defined $ARGV[1]) {
 287             my $cmd = $mig_bin . "mig-$ARGV[1]";
 288             if (-e $cmd) {
 289                 system( $mig_bin . "mig-$ARGV[1]", '--help' );
 290             } else {
 291                 pod2usage(-verbose => 2);
 292             }
 293         } else {
 294             pod2usage(-verbose => 2);
 295         }
 296     }
 297     case "env" {
 298         standard_invocation(@ARGV);
 299     }
 300     case "init" {
 301         Mig::die_if_no_env_migschema();
 302         standard_invocation(@ARGV);
 303     }
 304     case "status" {
 305         Mig::die_if_no_env_migschema();
 306         standard_invocation(@ARGV);
 307     }
 308     case "add" {
 309         Mig::die_if_no_env_migschema();
 310         standard_invocation(@ARGV);
 311     }
 312     case "reporter" {
 313         Mig::die_if_no_env_migschema();
 314         standard_invocation(@ARGV);
 315     }
 316     case "gsheet" {
 317         Mig::die_if_no_env_migschema();
 318         standard_invocation(@ARGV);
 319     }
 320     case "remove" {
 321         Mig::die_if_no_env_migschema();
 322         standard_invocation(@ARGV);
 323     }
 324     case "iconv" {
 325         Mig::die_if_no_env_migschema();
 326         standard_invocation(@ARGV);
 327     }
 328     case "skip-iconv" {
 329         Mig::die_if_no_env_migschema();
 330         standard_invocation(@ARGV);
 331     }
 332     case "clean" {
 333         Mig::die_if_no_env_migschema();
 334         standard_invocation(@ARGV);
 335     }
 336     case "skip-clean" {
 337         Mig::die_if_no_env_migschema();
 338         standard_invocation(@ARGV);
 339     }
 340     case "link" {
 341         Mig::die_if_no_env_migschema();
 342         standard_invocation(@ARGV);
 343     }
 344     case "unlink" {
 345         Mig::die_if_no_env_migschema();
 346         standard_invocation(@ARGV);
 347     }
 348     case "convert" {
 349         Mig::die_if_no_env_migschema();
 350         standard_invocation(@ARGV);
 351     }
 352     case "stage" {
 353         Mig::die_if_no_env_migschema();
 354         standard_invocation(@ARGV);
 355     }
 356     case "mapper" {
 357         Mig::die_if_no_env_migschema();
 358         standard_invocation(@ARGV);
 359     }
 360     case "quicksheet" {
 361         Mig::die_if_no_env_migschema();
 362         standard_invocation(@ARGV);
 363     }
 364     case "sql" {
 365         Mig::die_if_no_env_migschema();
 366         standard_invocation(@ARGV);
 367     }
 368     case "map" {
 369         Mig::die_if_no_env_migschema();
 370     }
 371     case "load" {
 372         Mig::die_if_no_env_migschema();
 373     }
 374     case "wdir" {
 375         print "$MIGWORKDIR\n";
 376     }
 377     case "gdir" {
 378         print "$MIGBASEGITDIR\n";
 379     }
 380     case "sdir" {
 381         print "$MIGGITDIR\n";
 382     }
 383     else {
 384         pod2usage(1);
 385     }
 386 }
 387
 388 sub standard_invocation {
 389     my $cmd = shift;
 390     system( $mig_bin . "mig-$cmd", @_ );
 391 }
 392
 393