kmig

   1 #!/usr/bin/perl -w
   2 ###############################################################################
   3 =pod
   4
   5 =head1 NAME
   6
   7 mig - git-like program for tracking and manipulating legacy data files for
   8 migrations. This variant of <mig> and is geared toward the Koha ILS and
   9 MySql/MariaDB.
  10
  11 =head1 SYNOPSIS
  12
  13 B<mig> <command> [argument] [...]
  14
  15 =head1 DESCRIPTION
  16
  17 B<mig> is used to track and manipulate CSV or CSV-like text files exported from
  18 legacy systems for migration into Evergreen.  It can be a wrapper for some
  19 other migration tools and tracks state using a MySQL table for a given
  20 database.
  21
  22 For most commands, if the current working directory falls outside of the
  23 directory specified by MIGWORKDIR, then mig will assume that environment is
  24 also incorrect and bail before doing any actual work.
  25
  26 Only the B<env> and B<help> commands work without the MIGDATABASE environment
  27 variable being set.
  28
  29 =head1 OVERVIEW
  30
  31 Using B<mig> should go something like this:
  32
  33 =over 15
  34
  35 =item mig env create m_foo # Sets up the environment
  36
  37 =item mig env use m_foo # Spawns a shell using the configured environment
  38
  39 =item mig init # creates any needed auxilary tables
  40
  41 =item mig add patrons.tsv # tracks an incoming data file; repeat for additional files
  42
  43 =item mig iconv patrons.tsv # convert it to UTF8, creating patrons.tsv.utf8
  44
  45 =item mig clean patrons.tsv # cleans the file, creating patrons.tsv.utf8.clean
  46
  47 =item mig link patrons.tsv borrowers # models the soon-to-be staging table after table 'borrowers'
  48
  49 =item mig convert patrons.tsv # creates a .sql file for staging the data
  50
  51 =item mig stage patrons.tsv # load said .sql file
  52
  53 =item mig mapper patrons.tsv # interactive tool for analyzing/mapping the staging table
  54
  55 =item mig analysis patrons.tsv # writes a summary .tsv file of mapped/flagged fields from the staging table
  56
  57 =item mig map patrons.tsv # apply configured mappings
  58
  59 =item mig write_prod patrons.tsv # creates a .sql file for pushing the staging data into production
  60
  61 =item mig reporter --analyst "Foo Fooer" --report_title "Foo Load Analysis" #creates an asciidoc report
  62
  63 =item mig gsheet --pull foo_tab_name OR --push foo_pg_table_name
  64
  65 =item mig stagebibs --file foo.xml
  66
  67 =back
  68
  69 =head1 COMMANDS
  70
  71 =over 15
  72
  73 =item B<help> [command]
  74
  75 Display this very same documentation, or specific documentation for one of the
  76 commands listed here.
  77
  78 =item B<env> <create|use|show> <schema>
  79
  80 Invokes B<mig-env> with the same arguments.  I<mig-env> can set important
  81 environment variables and spawn a shell with those variables, and it also does
  82 some directory creation and symlinking.
  83
  84 =item B<init>
  85
  86 Create or re-create the PostgreSQL tracking table for the schema specified by
  87 the MIGDATABASE environment variable.  If needed, create the migration schema
  88 itself and run migration_tools.init() and build() if the migration_tools schema
  89 exists.
  90
  91 =item B<status> [file] [...]
  92
  93 Show status information for either the specified files or all tracked files if
  94 no argument is given.
  95
  96 =item B<add> [--no-headers|--headers] <file> [file|--no-headers|--headers] [...]
  97
  98 Add the specified files to the migration tracker.  Until --no-headers is
  99 specified, the tracker will assume the files have headers.
 100
 101 You can do crazy stuff like
 102 B<mig add file1 --no-headers file2 file3 --headers file4>
 103
 104 =item B<remove> <file> [file] [...]
 105
 106 Remove the specified files from the migration tracker.
 107
 108 =item B<iconv> <file> [other arguments...]
 109
 110 Attempts to invoke B<iconv> on the specified tracked file, placing the output in
 111 <file>.utf8
 112
 113 If given no other arguments, the invocation will lool like
 114
 115 =over 5
 116
 117 iconv -f ISO-8859-1 -t UTF-8 -o <file>.utf8 <file>
 118
 119 =back
 120
 121 otherwise, the arguments will be passed through like so
 122
 123 =over 5
 124
 125 iconv [other arguments...] -o <file>.utf8 <file>
 126
 127 =back
 128
 129 =item B<skip-iconv> <file>
 130
 131 If this is used instead of B<iconv>, then B<mig> will look for an existing
 132 <file>.utf8 and use it instead of attempting to create one.
 133
 134 =item B<clean> <file> [other arguments...]
 135
 136 Attempts to invoke B<clean_csv> on the iconv-converted specified tracked file,
 137 placing the output in <file>.utf8.clean
 138
 139 If given no other arguments, the invocation will lool like
 140
 141 =over 5
 142
 143 clean_csv --config scripts/clean.conf --fix --apply <--create-headers> <file>
 144
 145 =back
 146
 147 otherwise, the arguments will be passed through like so
 148
 149 =over 5
 150
 151 clean_csv [other arguments...] <file>
 152
 153 =back
 154
 155 =item B<skip-clean> <file>
 156
 157 If this is used instead of B<clean>, then B<mig> will look for an existing
 158 <file>.utf8.clean and use it instead of attempting to create one.
 159
 160 =item B<link> <file> <parent table>
 161
 162 Associate the specified file with a parent table within the migration schema.
 163
 164 Linking multiple files to the same parent table is not allowed currently.
 165
 166 =item B<unlink> <file>
 167
 168 Removes any association between the specified file and a parent table within
 169 the migration schema.
 170
 171 =item B<convert> <file>
 172
 173 Attempts to invoke B<csv2sql> on the .utf8.clean version of the specified
 174 tracked file, creating either [file].utf8.clean.stage.sql or
 175 <parent table>_stage.sql depending on whether the file has been linked to a
 176 parent table within the migration schema or not.
 177
 178 If given no other arguments, the invocation will lool like
 179
 180 =over 5
 181
 182 csv2sql --config scripts/clean.conf --add-x-migrate --schema <MIGDATABASE> [--parent <PARENT TABLE>] -o <[<file>.utf8.clean.stage.sql]|[parent_table_stage.sql]> <FILE>.utf8.clean
 183
 184 =back
 185
 186 otherwise, the arguments will be passed through like so
 187
 188 =over 5
 189
 190 csv2sql [other arguments...] -o <[<file>.utf8.clean.stage.sql]|[parent_table_stage.sql]> <file>.utf8.clean
 191
 192 =back
 193
 194 =item B<stage> <file> [other arguments...]
 195
 196 Load the SQL-converted version of the specified file into the migration schema.
 197
 198 Extra arguments are passed to the underlying call to psql
 199
 200 =item B<mapper> <file>
 201
 202 Interactive session for analyzing, flagging, and mapping legacy field data to
 203 Evergreen fields.
 204
 205 Upon exit, generate either [file].clean.map.sql or <parent table>_map.sql. The
 206 SQL generated will be UPDATE's for setting the Evergreen-specific columns for a
 207 given file's staging tables, and TRUNCATE's and INSERT's for auxilary tables.
 208 The files will have \include hooks for pulling in additional mapping files
 209 (for example, end-user mappings for circ modifiers, etc.)
 210
 211 =item B<analysis> [file]
 212
 213 Writes a MIGDATABASE.tsv file containing a break-down of mapped and flagged
 214 fields from the specified file, or all staged files if no file is specified.
 215
 216 The main goal of the tsv file is to present end-user mappable data for circ
 217 modifiers, shelving locations, patron profiles, etc.  We use spreadsheets for
 218 this now but may move to a dedicated UI in the future.
 219
 220 =item B<map> [file]
 221
 222 Applies the mapping sql to the migration schema for the specified mapped file,
 223 or for all mapped files if no file is specified.
 224
 225 =item B<write_prod> [file]
 226
 227 Generates <parent table>_prod.sql for the specified linked and mapped file, or
 228 all such files if no file is specified.
 229
 230 =item B<sql> [arguments...]
 231
 232 A wrapper around the psql command.  At some point the plan is to shove mig-tracked variables into psql sessions.
 233
 234 =item B<reporter> --analyst "Analyst Name" --report_title "Report Title"
 235
 236 Generates an asciidoc file in the git working directory that can be converted to
 237 any appropriate format.  The analyst and report parameters are required.
 238
 239 Optional parameters are :
 240
 241 --added_page_title and --added_page_file
 242
 243 If one is used both must be.  The added page file can be plain text or asciidoc.  This
 244 adds an extra arbitrary page of notes to the report.  KMig assumes the page file is in the mig git directory.
 245
 246 --tags
 247
 248 This will define a set of tags to use, if not set it will default to Circs,
 249 Holds, Actors, Bibs, Assets & Money.
 250
 251 --debug
 252
 253 Gives more information about what is happening.
 254
 255 --reports_xml
 256
 257 Allows you to override the default evergreen_staged_report.xml in the mig-xml folder.
 258
 259 =item B<gsheet> --pull or --push spreadsheet_tab
 260
 261 This uses the gsheet_tracked_table and gsheet_tracked column tables to map a Google Docs Spreadsheet tabs
 262 with Postgres tables in the mig schema.  The spreadsheet is assumed to share the name as the mig schema.
 263 Tab names must be unique.  Each spreadsheet column needs a header that matches the column name in the matching
 264 table.  An oauth session key is also needed for your Google account and mig gsheet will look for it in the
 265 .mig directory.
 266
 267 =back
 268
 269 =cut
 270
 271 ###############################################################################
 272
 273 use strict;
 274 use Switch;
 275 use Env qw(
 276     HOME PGHOST PGPORT PGUSER PGDATABASE MIGDATABASE
 277     MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR
 278 );
 279 use Pod::Usage;
 280 use FindBin;
 281 my $mig_bin = "$FindBin::Bin/kmig.d/bin/";
 282 use lib "$FindBin::Bin/kmig.d/bin";
 283 use KMig;
 284
 285 pod2usage(-verbose => 2) if ! $ARGV[0];
 286 switch($ARGV[0]) {
 287     case "help" {
 288         if (defined $ARGV[1]) {
 289             my $cmd = $mig_bin . "mig-$ARGV[1]";
 290             if (-e $cmd) {
 291                 system( $mig_bin . "mig-$ARGV[1]", '--help' );
 292             } else {
 293                 pod2usage(-verbose => 2);
 294             }
 295         } else {
 296             pod2usage(-verbose => 2);
 297         }
 298     }
 299     case "map" {
 300     }
 301     case "load" {
 302     }
 303     case "wdir" {
 304         print "$MIGWORKDIR\n";
 305     }
 306     case "gdir" {
 307         print "$MIGBASEGITDIR\n";
 308     }
 309     case "sdir" {
 310         print "$MIGGITDIR\n";
 311     }
 312     else {
 313         standard_invocation(@ARGV);
 314     }
 315 }
 316
 317 sub standard_invocation {
 318     my $cmd = shift;
 319
 320     if ($cmd ne 'env') { KMig::die_if_no_env_migschema(); }
 321     if (-e $mig_bin . "mig-$cmd") {
 322         system( $mig_bin . "mig-$cmd", @_ );
 323     } else {
 324         system( "mig-$cmd", @_ ) == 0 or die pod2usage(1);
 325     }
 326 }
 327
 328