From: Ben Ostrowsky Date: Thu, 18 Nov 2010 19:29:55 +0000 (+0000) Subject: Works fine on TITLE.DB, but misses the boat on other files. X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=commitdiff_plain;h=3ba063702677ea5378145ca05e51a1efbc523144 Works fine on TITLE.DB, but misses the boat on other files. --- diff --git a/bibliofile/parse_db.pl b/bibliofile/parse_db.pl new file mode 100755 index 0000000..e366b48 --- /dev/null +++ b/bibliofile/parse_db.pl @@ -0,0 +1,88 @@ +#!/usr/bin/perl -w + +# Parses Bibliofile files. +# Usage: parse_db.pl TITLE.DB +# Works fine on TITLE.DB, but misses the boat on other files; probably different block sizes or something. + +use strict; + +$/ = undef; + +my $startOfColumnTypes = 8; +my $startOfRealData = 4096; +my $blockSize = 4096; +my $initialOffset = 6; +my %dataTypes = ( + 'A' => 'Text', + 'N' => 'Numeric', + 'S' => 'Integer' +); + +my $rowLength; +my @fieldLengths; +my @fieldNames; +my @fieldTypes; + +my $db = $ARGV[0]; +my $dbd = $db . "D"; + +open (DBD, $dbd); + +while () { + + my $data = $_; + + $rowLength = ord substr($data, 0, 1); + #print "Row length: $rowLength\n"; + + my $numColumns = ord substr($data, 2, 1); + #print "Columns: $numColumns\n"; + + my $namedata = substr($data, $startOfColumnTypes + ($numColumns * 7) - 2); + @fieldNames = split(/\x00/, $namedata); + + for (my $i = 0; $i < $numColumns; $i++) { + $fieldTypes[$i] = substr($data, ($i * 7) + $startOfColumnTypes, 1); + $fieldLengths[$i] = ord substr($data, ($i * 7) + $startOfColumnTypes + 1, 1); + } + +} + +close(DBD); + +print join("\t", @fieldNames) . "\n"; + +open (DB, $db); + +my $blocks = 0; + +while (read DB, my $data, $blockSize) { + $blocks++; + next if ($blocks == 1); + my $maxRecords = int( $blockSize / $rowLength); + my $indexIndicator = ord substr($data, 7, 1); + next if ($indexIndicator == 0); + +# for (my $i = 1; $i <= scalar(@fieldLengths); $i++) { +# print "Field $i has length $fieldLengths[$i-1]\n"; +# } + + for (my $r = 0; $r < $maxRecords; $r++) { + + my $pos = 0; + my @field; + + for (my $f = 0; $f < scalar(@fieldLengths); $f++) { + $field[$f] = substr($data, $initialOffset + ($r * $rowLength) + $pos, $fieldLengths[$f]); + if ($fieldTypes[$f] eq 'S') { $field[$f] = ord $field[$f]; } + $pos += $fieldLengths[$f]; + } + + if ($field[0] =~ m/[^\x00]/) { + print join("\t", @field) . "\n"; + } + } + +} + +close(DB);