From 812b9bcd7d6996dd35a9e8231a7c259108e81676 Mon Sep 17 00:00:00 2001 From: Ben Ostrowsky Date: Mon, 22 Nov 2010 19:26:45 +0000 Subject: [PATCH] Added --ignore-indexes option to grab more data, much of which will be junk to filter out downstream --- bibliofile/parse_db.pl | 24 +++++++++++++----------- 1 files changed, 13 insertions(+), 11 deletions(-) diff --git a/bibliofile/parse_db.pl b/bibliofile/parse_db.pl index c14f958..d5459ef 100755 --- a/bibliofile/parse_db.pl +++ b/bibliofile/parse_db.pl @@ -1,11 +1,16 @@ #!/usr/bin/perl -w # Parses Bibliofile files. -# Usage: parse_db.pl TITLE.DB -# Works fine on TITLE.DB, but misses the boat on other files; probably different block sizes or something. +# Usage: parse_db.pl TITLE.DB [--ignore-indexes] +# Choosing --ignore-indexes will find data you'd otherwise miss, but also grabs a lot of junk you'll need to filter out. use strict; use POSIX; +use Getopt::Long; + +my $ignoreIndexes = ''; + +my $opts = GetOptions('ignore-indexes' => \$ignoreIndexes); $/ = undef; @@ -61,14 +66,12 @@ while (read DB, my $data, $blockSize) { $blocks++; next if ($blocks == 1); my $maxRecords = POSIX::floor($blockSize / $rowLength); - my $indexIndicator1 = ord substr($data, 1, 1); - next if ($indexIndicator1 != 0); - my $indexIndicator2 = ord substr($data, 7, 1); - next if ($indexIndicator2 == 0); - -# for (my $i = 1; $i <= scalar(@fieldLengths); $i++) { -# print "Field $i has length $fieldLengths[$i-1]\n"; -# } + unless $ignoreIndexes { + my $indexIndicator1 = ord substr($data, 1, 1); + next if ($indexIndicator1 != 0); + my $indexIndicator2 = ord substr($data, 7, 1); + next if ($indexIndicator2 == 0); + } for (my $r = 0; $r < $maxRecords; $r++) { @@ -77,7 +80,6 @@ while (read DB, my $data, $blockSize) { #print STDERR "Record " . ($r+1) . " of $maxRecords\n"; - for (my $f = 0; $f < scalar(@fieldLengths); $f++) { $field[$f] = substr($data, $initialOffset + ($r * $rowLength) + $pos, $fieldLengths[$f]); if ($fieldTypes[$f] eq 'S') { $field[$f] = ord $field[$f]; } -- 1.7.2.5