kmig

   1 #!/usr/bin/perl -w
   2 ###############################################################################
   3 =pod
   4
   5 =head1 NAME
   6
   7 mig - git-like program for tracking and manipulating legacy data files for
   8 migrations. This variant of <mig> and is geared toward the Koha ILS and
   9 MySql/MariaDB.
  10
  11 =head1 SYNOPSIS
  12
  13 B<mig> <command> [argument] [...]
  14
  15 =head1 DESCRIPTION
  16
  17 B<mig> is used to track and manipulate CSV or CSV-like text files exported from
  18 legacy systems for migration into Evergreen.  It can be a wrapper for some
  19 other migration tools and tracks state using a MySQL table for a given
  20 database.
  21
  22 For most commands, if the current working directory falls outside of the
  23 directory specified by MIGWORKDIR, then mig will assume that environment is
  24 also incorrect and bail before doing any actual work.
  25
  26 Only the B<env> and B<help> commands work without the MIGDATABASE environment
  27 variable being set.
  28
  29 =head1 OVERVIEW
  30
  31 Using B<mig> should go something like this:
  32
  33 =over 15
  34
  35 =item mig env create m_foo # Sets up the environment
  36
  37 =item mig env use m_foo # Spawns a shell using the configured environment
  38
  39 =item mig init # creates any needed auxilary tables
  40
  41 =item mig add patrons.tsv # tracks an incoming data file; repeat for additional files
  42
  43 =item mig iconv patrons.tsv # convert it to UTF8, creating patrons.tsv.utf8
  44
  45 =item mig bibstats foo.mrc # get summarized data about bibs and export barcode list
  46
  47 =item mig clean patrons.tsv # cleans the file, creating patrons.tsv.utf8.clean
  48
  49 =item mig link patrons.tsv borrowers # models the soon-to-be staging table after table 'borrowers'
  50
  51 =item mig convert patrons.tsv # creates a .sql file for staging the data
  52
  53 =item mig export foo # exports koha setup elements for importing elsewhere
  54
  55 =item mig stage patrons.tsv # load said .sql file
  56
  57 =item mig mapper patrons.tsv # interactive tool for analyzing/mapping the staging table
  58
  59 =item mig analysis patrons.tsv # writes a summary .tsv file of mapped/flagged fields from the staging table
  60
  61 =item mig map patrons.tsv # apply configured mappings
  62
  63 =item mig write_prod patrons.tsv # creates a .sql file for pushing the staging data into production
  64
  65 =item mig reporter --analyst "Foo Fooer" --report_title "Foo Load Analysis" #creates an asciidoc report
  66
  67 =item mig gsheet --pull foo_tab_name OR --push foo_pg_table_name
  68
  69 =item mig stagebibs --file foo.xml
  70
  71 =back
  72
  73 =head1 COMMANDS
  74
  75 =over 15
  76
  77 =item B<help> [command]
  78
  79 Display this very same documentation, or specific documentation for one of the
  80 commands listed here.
  81
  82 =item B<env> <create|use|show> <schema>
  83
  84 Invokes B<mig-env> with the same arguments.  I<mig-env> can set important
  85 environment variables and spawn a shell with those variables, and it also does
  86 some directory creation and symlinking.
  87
  88 =item B<init>
  89
  90 Create or re-create the PostgreSQL tracking table for the schema specified by
  91 the MIGDATABASE environment variable.  If needed, create the migration schema
  92 itself and run migration_tools.init() and build() if the migration_tools schema
  93 exists.
  94
  95 =item B<status> [file] [...]
  96
  97 Show status information for either the specified files or all tracked files if
  98 no argument is given.
  99
 100 =item B<add> [--no-headers|--headers] <file> [file|--no-headers|--headers] [...]
 101
 102 Add the specified files to the migration tracker.  Until --no-headers is
 103 specified, the tracker will assume the files have headers.
 104
 105 You can do crazy stuff like
 106 B<mig add file1 --no-headers file2 file3 --headers file4>
 107
 108 =item B<remove> <file> [file] [...]
 109
 110 Remove the specified files from the migration tracker.
 111
 112 =item B<iconv> <file> [other arguments...]
 113
 114 Attempts to invoke B<iconv> on the specified tracked file, placing the output in
 115 <file>.utf8
 116
 117 If given no other arguments, the invocation will lool like
 118
 119 =over 5
 120
 121 iconv -f ISO-8859-1 -t UTF-8 -o <file>.utf8 <file>
 122
 123 =back
 124
 125 otherwise, the arguments will be passed through like so
 126
 127 =over 5
 128
 129 iconv [other arguments...] -o <file>.utf8 <file>
 130
 131 =back
 132
 133 =item B<skip-iconv> <file>
 134
 135 If this is used instead of B<iconv>, then B<mig> will look for an existing
 136 <file>.utf8 and use it instead of attempting to create one.
 137
 138 =item B<clean> <file> [other arguments...]
 139
 140 Attempts to invoke B<clean_csv> on the iconv-converted specified tracked file,
 141 placing the output in <file>.utf8.clean
 142
 143 If given no other arguments, the invocation will lool like
 144
 145 =over 5
 146
 147 clean_csv --config scripts/clean.conf --fix --apply <--create-headers> <file>
 148
 149 =back
 150
 151 otherwise, the arguments will be passed through like so
 152
 153 =over 5
 154
 155 clean_csv [other arguments...] <file>
 156
 157 =back
 158
 159 =item B<skip-clean> <file>
 160
 161 If this is used instead of B<clean>, then B<mig> will look for an existing
 162 <file>.utf8.clean and use it instead of attempting to create one.
 163
 164 =item B<link> <file> <parent table>
 165
 166 Associate the specified file with a parent table within the migration schema.
 167
 168 Linking multiple files to the same parent table is not allowed currently.
 169
 170 =item B<unlink> <file>
 171
 172 Removes any association between the specified file and a parent table within
 173 the migration schema.
 174
 175 =item B<convert> <file>
 176
 177 Attempts to invoke B<csv2sql> on the .utf8.clean version of the specified
 178 tracked file, creating either [file].utf8.clean.stage.sql or
 179 <parent table>_stage.sql depending on whether the file has been linked to a
 180 parent table within the migration schema or not.
 181
 182 If given no other arguments, the invocation will lool like
 183
 184 =over 5
 185
 186 csv2sql --config scripts/clean.conf --add-x-migrate --schema <MIGDATABASE> [--parent <PARENT TABLE>] -o <[<file>.utf8.clean.stage.sql]|[parent_table_stage.sql]> <FILE>.utf8.clean
 187
 188 =back
 189
 190 otherwise, the arguments will be passed through like so
 191
 192 =over 5
 193
 194 csv2sql [other arguments...] -o <[<file>.utf8.clean.stage.sql]|[parent_table_stage.sql]> <file>.utf8.clean
 195
 196 =back
 197
 198 =item B<stage> <file> [other arguments...]
 199
 200 Load the SQL-converted version of the specified file into the migration schema.
 201
 202 Extra arguments are passed to the underlying call to psql
 203
 204 =item B<mapper> <file>
 205
 206 Interactive session for analyzing, flagging, and mapping legacy field data to
 207 Evergreen fields.
 208
 209 Upon exit, generate either [file].clean.map.sql or <parent table>_map.sql. The
 210 SQL generated will be UPDATE's for setting the Evergreen-specific columns for a
 211 given file's staging tables, and TRUNCATE's and INSERT's for auxilary tables.
 212 The files will have \include hooks for pulling in additional mapping files
 213 (for example, end-user mappings for circ modifiers, etc.)
 214
 215 =item B<analysis> [file]
 216
 217 Writes a MIGDATABASE.tsv file containing a break-down of mapped and flagged
 218 fields from the specified file, or all staged files if no file is specified.
 219
 220 The main goal of the tsv file is to present end-user mappable data for circ
 221 modifiers, shelving locations, patron profiles, etc.  We use spreadsheets for
 222 this now but may move to a dedicated UI in the future.
 223
 224 =item B<map> [file]
 225
 226 Applies the mapping sql to the migration schema for the specified mapped file,
 227 or for all mapped files if no file is specified.
 228
 229 =item B<write_prod> [file]
 230
 231 Generates <parent table>_prod.sql for the specified linked and mapped file, or
 232 all such files if no file is specified.
 233
 234 =item B<sql> [arguments...]
 235
 236 A wrapper around the psql command.  At some point the plan is to shove mig-tracked variables into psql sessions.
 237
 238 =item B<reporter> --analyst "Analyst Name" --report_title "Report Title"
 239
 240 Generates an asciidoc file in the git working directory that can be converted to
 241 any appropriate format.  The analyst and report parameters are required.
 242
 243 Optional parameters are :
 244
 245 --added_page_title and --added_page_file
 246
 247 If one is used both must be.  The added page file can be plain text or asciidoc.  This
 248 adds an extra arbitrary page of notes to the report.  KMig assumes the page file is in the mig git directory.
 249
 250 --tags
 251
 252 This will define a set of tags to use, if not set it will default to Circs,
 253 Holds, Actors, Bibs, Assets & Money.
 254
 255 --debug
 256
 257 Gives more information about what is happening.
 258
 259 --reports_xml
 260
 261 Allows you to override the default evergreen_staged_report.xml in the mig-xml folder.
 262
 263 =item B<gsheet> --pull or --push spreadsheet_tab
 264
 265 This uses the gsheet_tracked_table and gsheet_tracked column tables to map a Google Docs Spreadsheet tabs
 266 with Postgres tables in the mig schema.  The spreadsheet is assumed to share the name as the mig schema.
 267 Tab names must be unique.  Each spreadsheet column needs a header that matches the column name in the matching
 268 table.  An oauth session key is also needed for your Google account and mig gsheet will look for it in the
 269 .mig directory.
 270
 271 =back
 272
 273 =cut
 274
 275 ###############################################################################
 276
 277 use strict;
 278 use Switch;
 279 use Env qw(
 280     HOME PGHOST PGPORT PGUSER PGDATABASE MIGDATABASE
 281     MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR
 282 );
 283 use Pod::Usage;
 284 use FindBin;
 285 my $mig_bin = "$FindBin::Bin/kmig.d/bin/";
 286 use lib "$FindBin::Bin/kmig.d/bin";
 287 use KMig;
 288
 289 pod2usage(-verbose => 2) if ! $ARGV[0];
 290 switch($ARGV[0]) {
 291     case "help" {
 292         if (defined $ARGV[1]) {
 293             my $cmd = $mig_bin . "mig-$ARGV[1]";
 294             if (-e $cmd) {
 295                 system( $mig_bin . "mig-$ARGV[1]", '--help' );
 296             } else {
 297                 pod2usage(-verbose => 2);
 298             }
 299         } else {
 300             pod2usage(-verbose => 2);
 301         }
 302     }
 303     case "map" {
 304     }
 305     case "load" {
 306     }
 307     case "wdir" {
 308         print "$MIGWORKDIR\n";
 309     }
 310     case "gdir" {
 311         print "$MIGBASEGITDIR\n";
 312     }
 313     case "sdir" {
 314         print "$MIGGITDIR\n";
 315     }
 316     else {
 317         standard_invocation(@ARGV);
 318     }
 319 }
 320
 321 sub standard_invocation {
 322     my $cmd = shift;
 323
 324     if ($cmd ne 'env') { KMig::die_if_no_env_migschema(); }
 325     if (-e $mig_bin . "mig-$cmd") {
 326         system( $mig_bin . "mig-$cmd", @_ );
 327     } else {
 328         system( "mig-$cmd", @_ ) == 0 or die pod2usage(1);
 329     }
 330 }
 331
 332