X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=destiny%2Fdestiny_xml_to_tsv.pl;fp=destiny%2Fdestiny_xml_to_tsv.pl;h=a1ff0523213130cd47ac72f65789c9429a077e4f;hp=0000000000000000000000000000000000000000;hb=fb59b32671696276f326886f978f155426b137f1;hpb=693b0fe39fc1dc61f807359a9632fae668659d8c

diff --git a/destiny/destiny_xml_to_tsv.pl b/destiny/destiny_xml_to_tsv.pl
new file mode 100755
index 0000000..a1ff052
--- /dev/null
+++ b/destiny/destiny_xml_to_tsv.pl
@@ -0,0 +1,105 @@
+#!/usr/bin/perl -w
+
+# Copyright 2009-2016, Equinox Software, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
+# Take a file that starts off like this:
+#
+# <?xml version="1.0" encoding="UTF-8" ?>
+# <DestinyCustomReport>
+#  <Row>
+#
+# Munge it like so:
+#
+# cat file | xml2 | cut -f4- -d/  > file.munged
+#
+# And feed the results to this script to create a .tsv version of the data.
+#
+# destiny_xml_to_tsv.pl file.munged > file.munged.tsv
+#
+
+
+my @records;
+my $serial = 0;
+my $line = 0;
+my $field = '';
+my %unique_fields;
+
+
+# Load each record
+while (<>) {
+    s/\r\n/\n/g;
+# print STDERR "Loaded this line: " . $_;
+
+	# Is this the start of a new record?
+	if ( /^$/ ) {
+		$line = 0;
+		$serial++;
+		print STDERR "Processing record $serial.\n";
+		next;
+	}
+
+	# If this isn't the start of the new record, it's a new line in the present record.
+	$line++;
+
+	# Looks like we've got some actual data!  Let's store it.
+	# FIXME: For large batches of data, we may run out of memory and should store this on disk.
+	if ( /^(.*?)=(.*)$/ ) {
+
+		$field = $1;
+		$unique_fields{$field} = 1;
+		$records[$serial]{$field} = $2;
+
+		print STDERR "Data extracted: \$records[$serial]{'$field'} = '$2'\n";
+
+		next;
+	}	
+
+	# This is the continuation of the previous line.
+	else {
+		chomp($_);
+		$records[$serial]{$field} .= ' ' . $_;
+		print STDERR "Appended data to previous field. \$records[$serial]{'$field'} is now '" . $records[$serial]{$field} . "'.\n";
+	}
+
+}
+
+print STDERR "Loaded " . scalar(@records) . " records.\n";
+
+
+# Print a header line
+print "SERIAL\t";
+@sorted_fields = sort keys %unique_fields;
+foreach $i (@sorted_fields) {
+	print "$i\t";
+}
+print "\n";
+
+
+# Print the results
+for (my $u = 0; $u < @records; $u++) {
+	print "$u\t";	
+	foreach $f (@sorted_fields) {
+		if (defined $records[$u]{$f}) {
+			print $records[$u]{$f};
+		}
+	print "\t";
+	}
+	print "\n";
+}
+
+print STDERR "Wrote " . scalar(@records) . " records.\n";
+# uh-bdee-uh-bdee-uh-bdee-uh- THAT'S ALL, FOLKS