X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=bibliofile%2Fparse_db.pl;h=6d1ae4fd8320092197eccc5b8378d69d53eb1931;hp=e366b481049dd8145bfd35ed83cce34c7663402e;hb=3bb6926bbb549af97fe1ce2698b7fc15852607fa;hpb=3ba063702677ea5378145ca05e51a1efbc523144

diff --git a/bibliofile/parse_db.pl b/bibliofile/parse_db.pl
index e366b48..6d1ae4f 100755
--- a/bibliofile/parse_db.pl
+++ b/bibliofile/parse_db.pl
@@ -1,10 +1,32 @@
 #!/usr/bin/perl -w
 
+# Copyright 2009-2012, Equinox Software, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+
 # Parses Bibliofile files.
-# Usage: parse_db.pl TITLE.DB
-# Works fine on TITLE.DB, but misses the boat on other files; probably different block sizes or something.
+# Usage: parse_db.pl TITLE.DB [--ignore-indexes]
+# Choosing --ignore-indexes will find data you'd otherwise miss, but also grabs a lot of junk you'll need to filter out.
 
 use strict;
+use POSIX;
+use Getopt::Long;
+
+my $ignoreIndexes = '';
+
+my $opts = GetOptions('ignore-indexes' => \$ignoreIndexes);
 
 $/ = undef;
 
@@ -32,11 +54,11 @@ while (<DBD>) {
 
   my $data = $_;
 
-  $rowLength = ord substr($data, 0, 1);
-  #print "Row length: $rowLength\n";
+  $rowLength = ord(substr($data, 0, 1)) + (256 * (ord(substr($data, 1, 1))));
+  #print STDERR "Row length: $rowLength\n";
 
   my $numColumns = ord substr($data, 2, 1);
-  #print "Columns:    $numColumns\n";
+  #print STDERR "Columns:    $numColumns\n";
 
   my $namedata = substr($data, $startOfColumnTypes + ($numColumns * 7) - 2);
   @fieldNames = split(/\x00/, $namedata);
@@ -59,19 +81,21 @@ my $blocks = 0;
 while (read DB, my $data, $blockSize) {
   $blocks++;
   next if ($blocks == 1);
-  my $maxRecords = int( $blockSize / $rowLength);
-  my $indexIndicator = ord substr($data, 7, 1);
-  next if ($indexIndicator == 0);
-
-#  for (my $i = 1; $i <= scalar(@fieldLengths); $i++) {
-#    print "Field $i has length $fieldLengths[$i-1]\n";
-#  }
+  my $maxRecords = POSIX::floor($blockSize / $rowLength);
+  unless $ignoreIndexes {
+    my $indexIndicator1 = ord substr($data, 1, 1);
+    next if ($indexIndicator1 != 0);
+    my $indexIndicator2 = ord substr($data, 7, 1);
+    next if ($indexIndicator2 == 0);
+  }
 
   for (my $r = 0; $r < $maxRecords; $r++) {
 
     my $pos = 0;
     my @field;
 
+    #print STDERR "Record " . ($r+1) . " of $maxRecords\n";
+
     for (my $f = 0; $f < scalar(@fieldLengths); $f++) {
       $field[$f] = substr($data, $initialOffset + ($r * $rowLength) + $pos, $fieldLengths[$f]);
       if ($fieldTypes[$f] eq 'S') { $field[$f] = ord $field[$f]; }
@@ -81,6 +105,7 @@ while (read DB, my $data, $blockSize) {
     if ($field[0] =~ m/[^\x00]/) {
       print join("\t", @field) . "\n";
     }
+
   }
 
 }