#!/usr/bin/perl -w ############################################################################### =pod =head1 NAME mig-clean Attempts to invoke B on the specified tracked file, placing the output in [file].clean If given no other arguments, the invocation will lool like =over 5 clean_csv --config scripts/clean.conf --fix --apply [--create-headers|--use-headers ] =back otherwise, the arguments will be passed through like so =over 5 clean_csv [other arguments...] =back You'll need to invoke B or B prior to using commands like B =head1 SYNOPSIS B [other arguments...] =cut ############################################################################### use strict; use Switch; use Env qw( HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR ); use Pod::Usage; use DBI; use Cwd 'abs_path'; use FindBin; my $mig_bin = "$FindBin::Bin/"; use lib "$FindBin::Bin/"; use Mig; pod2usage(-verbose => 2) if ! $ARGV[0] || $ARGV[0] eq '--help'; Mig::die_if_no_env_migschema(); Mig::die_if_mig_tracking_table_does_not_exist(); my $file = abs_path($ARGV[0]); if ($file =~ /^$MIGBASEWORKDIR/) { call_clean_csv(@ARGV); } else { print "File falls outside of MIGWORKDIR ($MIGWORKDIR): $file\n"; } exit 0; ############################################################################### sub call_clean_csv { my $file = abs_path(shift); my @args = @_; my $tracked_file_id = Mig::check_for_tracked_file($file); if ($tracked_file_id) { my $data = Mig::status_this_file($file); if (! $data->{'utf8_filename'}) { die "mig-iconv or mig-skip-iconv needed for UTF8 version of file: $file\n"; } my $utf8_file = $data->{'utf8_filename'}; if (! -e $utf8_file) { die "missing file: $utf8_file\n"; } print "cleaning tracked file: $file\n"; if (scalar(@args) == 0) { @args = ( '--config' ,'scripts/clean.conf' ,'--fix' ,'--apply' ,'--backslash' ,'--pad' ); if (! $data->{'has_headers'}) { if ($data->{'headers_file'}) { push @args, '--use-headers'; push @args, $data->{'headers_file'}; } else { push @args, '--create-headers'; } } } print join(' ',@args) . "\n"; system('clean_csv', @args, $utf8_file); my $dbh = Mig::db_connect(); my $clean_file = $dbh->quote($utf8_file . '.clean'); if (! -e $utf8_file . '.clean') { print "clean file does not exist: $clean_file\n"; $clean_file = $dbh->quote(''); } my $rv = $dbh->do(" UPDATE $MIGSCHEMA.tracked_file SET clean_filename = $clean_file WHERE base_filename = " . $dbh->quote($file) . " ; ") || die "Error inserting into table $MIGSCHEMA.tracked_file: $!\n"; Mig::db_disconnect($dbh); } else { print "File not currently tracked: $file\n"; } }