mig

   1 #!/usr/bin/perl -w
   2 ###############################################################################
   3 =pod
   4
   5 =head1 NAME
   6
   7 mig - git-like program for tracking and manipulating legacy data files for
   8 migrations
   9
  10 =head1 SYNOPSIS
  11
  12 B<mig> <command> [argument] [...]
  13
  14 =head1 DESCRIPTION
  15
  16 B<mig> is used to track and manipulate CSV or CSV-like text files exported from
  17 legacy systems for migration into Evergreen.  It can be a wrapper for some
  18 other migration tools and tracks state using a PostgreSQL table in a given
  19 migration schema.
  20
  21 It makes use of certain environment variables that may be set by the B<mig-env>
  22 tool: PGHOST, PGPORT, PGUSER, PGDATABASE, MIGSCHEMA, and MIGWORKDIR
  23
  24 For most commands, if the current working directory falls outside of the
  25 directory specified by MIGWORKDIR, then mig will assume that environment is
  26 also incorrect and bail before doing any actual work.
  27
  28 ~/.pgpass should also be configured, as B<mig> will not prompt for a database
  29 password.
  30
  31 Only the B<env> and B<help> commands work without the MIGSCHEMA environment
  32 variable being set.
  33
  34 =head1 OVERVIEW
  35
  36 Using B<mig> should go something like this:
  37
  38 =over 15
  39
  40 =item mig env create m_foo # Sets up the environment
  41
  42 =item mig env use m_foo # Spawns a shell using the configured environment
  43
  44 =item mig init # creates the m_foo schema in the database if needed, and other tables
  45
  46 =item mig add patrons.tsv # tracks an incoming data file; repeat for additional files
  47
  48 =item mig iconv patrons.tsv # convert it to UTF8, creating patrons.tsv.utf8
  49
  50 =item mig clean patrons.tsv # cleans the file, creating patrons.tsv.utf8.clean
  51
  52 =item mig link patrons.tsv actor_usr # makes the soon-to-be staging table a child of m_foo.actor_usr
  53
  54 =item mig convert patrons.tsv # creates a .sql file for staging the data
  55
  56 =item mig stage patrons.tsv # load said .sql file
  57
  58 =item mig mapper patrons.tsv # interactive tool for analyzing/mapping the staging table
  59
  60 =item mig analysis patrons.tsv # writes a summary .tsv file of mapped/flagged fields from the staging table
  61
  62 =item mig map patrons.tsv # apply configured mappings
  63
  64 =item mig write_prod patrons.tsv # creates a .sql file for pushing the staging data into production
  65
  66 =item mig reporter --analyst "Foo Fooer" --report_title "Foo Load Analysis" #creates an asciidoc report
  67
  68 =back
  69
  70 =head1 COMMANDS
  71
  72 =over 15
  73
  74 =item B<help> [command]
  75
  76 Display this very same documentation, or specific documentation for one of the
  77 commands listed here.
  78
  79 =item B<env> <create|use|show> <schema>
  80
  81 Invokes B<mig-env> with the same arguments.  I<mig-env> can set important
  82 environment variables and spawn a shell with those variables, and it also does
  83 some directory creation and symlinking.
  84
  85 =item B<init>
  86
  87 Create or re-create the PostgreSQL tracking table for the schema specified by
  88 the MIGSCHEMA environment variable.  If needed, create the migration schema
  89 itself and run migration_tools.init() and build() if the migration_tools schema
  90 exists.
  91
  92 =item B<status> [file] [...]
  93
  94 Show status information for either the specified files or all tracked files if
  95 no argument is given.
  96
  97 =item B<add> [--no-headers|--headers] <file> [file|--no-headers|--headers] [...]
  98
  99 Add the specified files to the migration tracker.  Until --no-headers is
 100 specified, the tracker will assume the files have headers.
 101
 102 You can do crazy stuff like
 103 B<mig add file1 --no-headers file2 file3 --headers file4>
 104
 105 =item B<remove> <file> [file] [...]
 106
 107 Remove the specified files from the migration tracker.
 108
 109 =item B<iconv> <file> [other arguments...]
 110
 111 Attempts to invoke B<iconv> on the specified tracked file, placing the output in
 112 <file>.utf8
 113
 114 If given no other arguments, the invocation will lool like
 115
 116 =over 5
 117
 118 iconv -f ISO-8859-1 -t UTF-8 -o <file>.utf8 <file>
 119
 120 =back
 121
 122 otherwise, the arguments will be passed through like so
 123
 124 =over 5
 125
 126 iconv [other arguments...] -o <file>.utf8 <file>
 127
 128 =back
 129
 130 =item B<skip-iconv> <file>
 131
 132 If this is used instead of B<iconv>, then B<mig> will look for an existing
 133 <file>.utf8 and use it instead of attempting to create one.
 134
 135 =item B<clean> <file> [other arguments...]
 136
 137 Attempts to invoke B<clean_csv> on the iconv-converted specified tracked file,
 138 placing the output in <file>.utf8.clean
 139
 140 If given no other arguments, the invocation will lool like
 141
 142 =over 5
 143
 144 clean_csv --config scripts/clean.conf --fix --apply <--create-headers> <file>
 145
 146 =back
 147
 148 otherwise, the arguments will be passed through like so
 149
 150 =over 5
 151
 152 clean_csv [other arguments...] <file>
 153
 154 =back
 155
 156 =item B<skip-clean> <file>
 157
 158 If this is used instead of B<clean>, then B<mig> will look for an existing
 159 <file>.utf8.clean and use it instead of attempting to create one.
 160
 161 =item B<link> <file> <parent table>
 162
 163 Associate the specified file with a parent table within the migration schema.
 164
 165 Linking multiple files to the same parent table is not allowed currently.
 166
 167 =item B<unlink> <file>
 168
 169 Removes any association between the specified file and a parent table within
 170 the migration schema.
 171
 172 =item B<convert> <file>
 173
 174 Attempts to invoke B<csv2sql> on the .utf8.clean version of the specified
 175 tracked file, creating either [file].utf8.clean.stage.sql or
 176 <parent table>_stage.sql depending on whether the file has been linked to a
 177 parent table within the migration schema or not.
 178
 179 If given no other arguments, the invocation will lool like
 180
 181 =over 5
 182
 183 csv2sql --config scripts/clean.conf --add-x-migrate --schema <MIGSCHEMA> [--parent <PARENT TABLE>] -o <[<file>.utf8.clean.stage.sql]|[parent_table_stage.sql]> <FILE>.utf8.clean
 184
 185 =back
 186
 187 otherwise, the arguments will be passed through like so
 188
 189 =over 5
 190
 191 csv2sql [other arguments...] -o <[<file>.utf8.clean.stage.sql]|[parent_table_stage.sql]> <file>.utf8.clean
 192
 193 =back
 194
 195 =item B<stage> <file> [other arguments...]
 196
 197 Load the SQL-converted version of the specified file into the migration schema.
 198
 199 Extra arguments are passed to the underlying call to psql
 200
 201 =item B<mapper> <file>
 202
 203 Interactive session for analyzing, flagging, and mapping legacy field data to
 204 Evergreen fields.
 205
 206 Upon exit, generate either [file].clean.map.sql or <parent table>_map.sql. The
 207 SQL generated will be UPDATE's for setting the Evergreen-specific columns for a
 208 given file's staging tables, and TRUNCATE's and INSERT's for auxilary tables.
 209 The files will have \include hooks for pulling in additional mapping files
 210 (for example, end-user mappings for circ modifiers, etc.)
 211
 212 =item B<analysis> [file]
 213
 214 Writes a MIGSCHEMA.tsv file containing a break-down of mapped and flagged
 215 fields from the specified file, or all staged files if no file is specified.
 216
 217 The main goal of the tsv file is to present end-user mappable data for circ
 218 modifiers, shelving locations, patron profiles, etc.  We use spreadsheets for
 219 this now but may move to a dedicated UI in the future.
 220
 221 =item B<map> [file]
 222
 223 Applies the mapping sql to the migration schema for the specified mapped file,
 224 or for all mapped files if no file is specified.
 225
 226 =item B<write_prod> [file]
 227
 228 Generates <parent table>_prod.sql for the specified linked and mapped file, or
 229 all such files if no file is specified.
 230
 231 =item B<sql> [arguments...]
 232
 233 A wrapper around the psql command.  At some point the plan is to shove mig-tracked variables into psql sessions.
 234
 235 =item B<reporter> --analyst "Analyst Name" --report_title "Report Title"
 236
 237 Generates an asciidoc file in the git working directory that can be converted to
 238 any appropriate format.  The analyst and report parameters are required.
 239
 240 Optional parameters are :
 241
 242 --added_page_title and --added_page_file
 243
 244 If one is used both must be.  The added page file can be plain text or asciidoc.  This
 245 adds an extra arbitrary page of notes to the report.
 246
 247 --tags
 248
 249 This will define a set of tags to use, if not set it will default to Circs,
 250 Holds, Actors, Bibs, Assets & Money.
 251
 252 --reports_xml
 253
 254 Allows you to override the default evergreen_staged_report.xml in the mig-xml folder.
 255
 256 =back
 257
 258 =cut
 259
 260 ###############################################################################
 261
 262 use strict;
 263 use Switch;
 264 use Env qw(
 265     HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA
 266     MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR
 267 );
 268 use Pod::Usage;
 269 use FindBin;
 270 my $mig_bin = "$FindBin::Bin/mig-bin/";
 271 use lib "$FindBin::Bin/mig-bin";
 272 use Mig;
 273
 274 pod2usage(-verbose => 2) if ! $ARGV[0];
 275 switch($ARGV[0]) {
 276     case "help" {
 277         if (defined $ARGV[1]) {
 278             my $cmd = $mig_bin . "mig-$ARGV[1]";
 279             if (-e $cmd) {
 280                 system( $mig_bin . "mig-$ARGV[1]", '--help' );
 281             } else {
 282                 pod2usage(-verbose => 2);
 283             }
 284         } else {
 285             pod2usage(-verbose => 2);
 286         }
 287     }
 288     case "env" {
 289         standard_invocation(@ARGV);
 290     }
 291     case "init" {
 292         Mig::die_if_no_env_migschema();
 293         standard_invocation(@ARGV);
 294     }
 295     case "status" {
 296         Mig::die_if_no_env_migschema();
 297         standard_invocation(@ARGV);
 298     }
 299     case "add" {
 300         Mig::die_if_no_env_migschema();
 301         standard_invocation(@ARGV);
 302     }
 303     case "reporter" {
 304         Mig::die_if_no_env_migschema();
 305         standard_invocation(@ARGV);
 306     }
 307     case "remove" {
 308         Mig::die_if_no_env_migschema();
 309         standard_invocation(@ARGV);
 310     }
 311     case "iconv" {
 312         Mig::die_if_no_env_migschema();
 313         standard_invocation(@ARGV);
 314     }
 315     case "skip-iconv" {
 316         Mig::die_if_no_env_migschema();
 317         standard_invocation(@ARGV);
 318     }
 319     case "clean" {
 320         Mig::die_if_no_env_migschema();
 321         standard_invocation(@ARGV);
 322     }
 323     case "skip-clean" {
 324         Mig::die_if_no_env_migschema();
 325         standard_invocation(@ARGV);
 326     }
 327     case "link" {
 328         Mig::die_if_no_env_migschema();
 329         standard_invocation(@ARGV);
 330     }
 331     case "unlink" {
 332         Mig::die_if_no_env_migschema();
 333         standard_invocation(@ARGV);
 334     }
 335     case "convert" {
 336         Mig::die_if_no_env_migschema();
 337         standard_invocation(@ARGV);
 338     }
 339     case "stage" {
 340         Mig::die_if_no_env_migschema();
 341         standard_invocation(@ARGV);
 342     }
 343     case "mapper" {
 344         Mig::die_if_no_env_migschema();
 345         standard_invocation(@ARGV);
 346     }
 347     case "quicksheet" {
 348         Mig::die_if_no_env_migschema();
 349         standard_invocation(@ARGV);
 350     }
 351     case "sql" {
 352         Mig::die_if_no_env_migschema();
 353         standard_invocation(@ARGV);
 354     }
 355     case "map" {
 356         Mig::die_if_no_env_migschema();
 357     }
 358     case "load" {
 359         Mig::die_if_no_env_migschema();
 360     }
 361     case "wdir" {
 362         print "$MIGWORKDIR\n";
 363     }
 364     case "gdir" {
 365         print "$MIGBASEGITDIR\n";
 366     }
 367     case "sdir" {
 368         print "$MIGGITDIR\n";
 369     }
 370     else {
 371         pod2usage(1);
 372     }
 373 }
 374
 375 sub standard_invocation {
 376     my $cmd = shift;
 377     system( $mig_bin . "mig-$cmd", @_ );
 378 }
 379
 380