X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=unicorn%2Funicorn_to_tsv.pl;h=a6cdf475229b736110fb78da2f7e9b99e0232491;hp=3af493f56667efdf18eda321502c74efd59174a2;hb=9de57ed3344ac49d97437f9f13362bb7ba1bd64b;hpb=3021cf9f0f0e23e6c003802e2425273618f56087 diff --git a/unicorn/unicorn_to_tsv.pl b/unicorn/unicorn_to_tsv.pl index 3af493f..a6cdf47 100755 --- a/unicorn/unicorn_to_tsv.pl +++ b/unicorn/unicorn_to_tsv.pl @@ -1,5 +1,21 @@ #!/usr/bin/perl -w +# Copyright 2009-2012, Equinox Software, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + # Converts a Unicorn users.data, bill.data, or charge.data file to a tab-separated file. # 2009-08-10 Ben Ostrowsky @@ -21,7 +37,7 @@ while (<>) { $line = 0; $serial++; $section = ''; # just in case this didn't get reset in the previous record - # print STDERR "Processing record $serial.\n"; + print STDERR "Processing record $serial.\n"; next; } @@ -36,7 +52,7 @@ while (<>) { # Is this line the beginning of a block of data (typically an address or a note)? if ( /^\.(.*?)_BEGIN.$/ ) { - # print STDERR "I think this might be the beginning of a beautiful " . $1 . ".\n"; + print STDERR "I think this might be the beginning of a beautiful " . $1 . ".\n"; $section = "$1."; next; } @@ -46,14 +62,14 @@ while (<>) { if ("$1." ne $section) { print STDERR "Error in record $serial, line $line (input line $.): got an end-of-$1 but I thought I was in a $section block!\n"; } - # print STDERR "It's been fun, guys, but... this is the end of the " . $1 . ".\n"; + print STDERR "It's been fun, guys, but... this is the end of the " . $1 . ".\n"; $section = ''; next; } # Looks like we've got some actual data! Let's store it. # FIXME: For large batches of data, we may run out of memory and should store this on disk. - if ( /^\.([A-Z]*?).\s+(\|a)?(.*)$/ ) { + if ( /^\.([A-Z0-9_\/]+?)\.\s+(\|a)?(.*)$/ ) { # Build the name of this field (taking note of whether we're in a named section of data) $field = ''; @@ -68,7 +84,7 @@ while (<>) { # Now we can actually store this line of data! $records[$serial]{$field} = $3; - # print STDERR "Data extracted: \$records[$serial]{'$field'} = '$3'\n"; + print STDERR "Data extracted: \$records[$serial]{'$field'} = '$3'\n"; next; } @@ -77,7 +93,7 @@ while (<>) { else { chomp($_); $records[$serial]{$field} .= ' ' . $_; - # print STDERR "Appended data to previous field. \$records[$serial]{'$field'} is now '" . $records[$serial]{$field} . "'.\n"; + print STDERR "Appended data to previous field. \$records[$serial]{'$field'} is now '" . $records[$serial]{$field} . "'.\n"; } }