From 8f3eec5b9155b53c3b570fe6184ab7d82075ecfb Mon Sep 17 00:00:00 2001 From: Galen Charlton Date: Tue, 16 Apr 2013 17:02:25 -0400 Subject: [PATCH] dump_oracle_table_for_pg: try to ensure dump output is UTF-8 Adds a --source-charset command-line parameter to specify the source character encoding. Note that CLOB/BLOB columns are not considered (not that they were before this commit). Signed-off-by: Galen Charlton --- dump_oracle_table_for_pg | 11 +++++++++-- 1 files changed, 9 insertions(+), 2 deletions(-) diff --git a/dump_oracle_table_for_pg b/dump_oracle_table_for_pg index f8969e9..b226bc2 100755 --- a/dump_oracle_table_for_pg +++ b/dump_oracle_table_for_pg @@ -24,6 +24,7 @@ use warnings; use Carp; use DBI; use Getopt::Long; +use Encode; my $host = 'localhost'; my $sid = $ENV{ORACLE_SID}; @@ -36,6 +37,7 @@ my $pg_table; my $base_table; my $column_prefix = ''; my $show_help; +my $src_charset; my $result = GetOptions( 'sid=s' => \$sid, @@ -48,6 +50,7 @@ my $result = GetOptions( 'pg-table=s' => \$pg_table, 'column-prefix=s' => \$column_prefix, 'inherits-from=s' => \$base_table, + 'source-charset=s' => \$src_charset, 'help' => \$show_help, ); @@ -94,7 +97,7 @@ sub export_table { my @data = map { normalize_value_for_tsv($_) } @$row; my $str = join("\t", @data); $str =~ s/\0//g; - print $fh "$str\n"; + print $fh encode('utf8', "$str\n"); } $sth->finish(); @@ -120,7 +123,11 @@ sub normalize_value_for_tsv { $val =~ s/\n/\\n/g; $val =~ s/\t/\\t/g; $val =~ s/\v/\\v/g; - return $val; + if ($src_charset) { + return decode($src_charset, $val); + } else { + return $val; + } } else { return '\N'; } -- 1.7.2.5