X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=bibliofile%2Fparse_db.pl;h=6d1ae4fd8320092197eccc5b8378d69d53eb1931;hp=b7b691d4f1eeff2d3ee97f16fdff0133a7a2185a;hb=3bb6926bbb549af97fe1ce2698b7fc15852607fa;hpb=e657089751e935005c4338f0cd1b7d52074abac3 diff --git a/bibliofile/parse_db.pl b/bibliofile/parse_db.pl index b7b691d..6d1ae4f 100755 --- a/bibliofile/parse_db.pl +++ b/bibliofile/parse_db.pl @@ -1,11 +1,32 @@ #!/usr/bin/perl -w +# Copyright 2009-2012, Equinox Software, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + # Parses Bibliofile files. -# Usage: parse_db.pl TITLE.DB -# Works fine on TITLE.DB, but misses the boat on other files; probably different block sizes or something. +# Usage: parse_db.pl TITLE.DB [--ignore-indexes] +# Choosing --ignore-indexes will find data you'd otherwise miss, but also grabs a lot of junk you'll need to filter out. use strict; use POSIX; +use Getopt::Long; + +my $ignoreIndexes = ''; + +my $opts = GetOptions('ignore-indexes' => \$ignoreIndexes); $/ = undef; @@ -61,14 +82,12 @@ while (read DB, my $data, $blockSize) { $blocks++; next if ($blocks == 1); my $maxRecords = POSIX::floor($blockSize / $rowLength); - my $indexIndicator1 = ord substr($data, 1, 1); - next if ($indexIndicator1 != 0); - my $indexIndicator2 = ord substr($data, 7, 1); - next if ($indexIndicator2 == 0); - -# for (my $i = 1; $i <= scalar(@fieldLengths); $i++) { -# print "Field $i has length $fieldLengths[$i-1]\n"; -# } + unless $ignoreIndexes { + my $indexIndicator1 = ord substr($data, 1, 1); + next if ($indexIndicator1 != 0); + my $indexIndicator2 = ord substr($data, 7, 1); + next if ($indexIndicator2 == 0); + } for (my $r = 0; $r < $maxRecords; $r++) { @@ -77,7 +96,6 @@ while (read DB, my $data, $blockSize) { #print STDERR "Record " . ($r+1) . " of $maxRecords\n"; - for (my $f = 0; $f < scalar(@fieldLengths); $f++) { $field[$f] = substr($data, $initialOffset + ($r * $rowLength) + $pos, $fieldLengths[$f]); if ($fieldTypes[$f] eq 'S') { $field[$f] = ord $field[$f]; } @@ -86,7 +104,6 @@ while (read DB, my $data, $blockSize) { if ($field[0] =~ m/[^\x00]/) { print join("\t", @field) . "\n"; - #print STDERR "Length: $pos\n"; } }