From 812b9bcd7d6996dd35a9e8231a7c259108e81676 Mon Sep 17 00:00:00 2001
From: Ben Ostrowsky <ben@esilibrary.com>
Date: Mon, 22 Nov 2010 19:26:45 +0000
Subject: [PATCH] Added --ignore-indexes option to grab more data, much of which will be junk to filter out downstream

---
 bibliofile/parse_db.pl |   24 +++++++++++++-----------
 1 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/bibliofile/parse_db.pl b/bibliofile/parse_db.pl
index c14f958..d5459ef 100755
--- a/bibliofile/parse_db.pl
+++ b/bibliofile/parse_db.pl
@@ -1,11 +1,16 @@
 #!/usr/bin/perl -w
 
 # Parses Bibliofile files.
-# Usage: parse_db.pl TITLE.DB
-# Works fine on TITLE.DB, but misses the boat on other files; probably different block sizes or something.
+# Usage: parse_db.pl TITLE.DB [--ignore-indexes]
+# Choosing --ignore-indexes will find data you'd otherwise miss, but also grabs a lot of junk you'll need to filter out.
 
 use strict;
 use POSIX;
+use Getopt::Long;
+
+my $ignoreIndexes = '';
+
+my $opts = GetOptions('ignore-indexes' => \$ignoreIndexes);
 
 $/ = undef;
 
@@ -61,14 +66,12 @@ while (read DB, my $data, $blockSize) {
   $blocks++;
   next if ($blocks == 1);
   my $maxRecords = POSIX::floor($blockSize / $rowLength);
-  my $indexIndicator1 = ord substr($data, 1, 1);
-  next if ($indexIndicator1 != 0);
-  my $indexIndicator2 = ord substr($data, 7, 1);
-  next if ($indexIndicator2 == 0);
-
-#  for (my $i = 1; $i <= scalar(@fieldLengths); $i++) {
-#    print "Field $i has length $fieldLengths[$i-1]\n";
-#  }
+  unless $ignoreIndexes {
+    my $indexIndicator1 = ord substr($data, 1, 1);
+    next if ($indexIndicator1 != 0);
+    my $indexIndicator2 = ord substr($data, 7, 1);
+    next if ($indexIndicator2 == 0);
+  }
 
   for (my $r = 0; $r < $maxRecords; $r++) {
 
@@ -77,7 +80,6 @@ while (read DB, my $data, $blockSize) {
 
     #print STDERR "Record " . ($r+1) . " of $maxRecords\n";
 
-
     for (my $f = 0; $f < scalar(@fieldLengths); $f++) {
       $field[$f] = substr($data, $initialOffset + ($r * $rowLength) + $pos, $fieldLengths[$f]);
       if ($fieldTypes[$f] eq 'S') { $field[$f] = ord $field[$f]; }
-- 
1.7.2.5