bibliofile/parse_db.pl

   1 #!/usr/bin/perl -w
   2
   3 # Copyright 2009-2012, Equinox Software, Inc.
   4 #
   5 # This program is free software; you can redistribute it and/or
   6 # modify it under the terms of the GNU General Public License
   7 # as published by the Free Software Foundation; either version 2
   8 # of the License, or (at your option) any later version.
   9 #
  10 # This program is distributed in the hope that it will be useful,
  11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13 # GNU General Public License for more details.
  14 #
  15 # You should have received a copy of the GNU General Public License
  16 # along with this program; if not, write to the Free Software
  17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
  18
  19 # Parses Bibliofile files.
  20 # Usage: parse_db.pl TITLE.DB [--ignore-indexes]
  21 # Choosing --ignore-indexes will find data you'd otherwise miss, but also grabs a lot of junk you'll need to filter out.
  22
  23 use strict;
  24 use POSIX;
  25 use Getopt::Long;
  26
  27 my $ignoreIndexes = '';
  28
  29 my $opts = GetOptions('ignore-indexes' => \$ignoreIndexes);
  30
  31 $/ = undef;
  32
  33 my $startOfColumnTypes = 8;
  34 my $startOfRealData = 4096;
  35 my $blockSize = 4096;
  36 my $initialOffset = 6;
  37 my %dataTypes = (
  38   'A' => 'Text',
  39   'N' => 'Numeric',
  40   'S' => 'Integer'
  41 );
  42
  43 my $rowLength;
  44 my @fieldLengths;
  45 my @fieldNames;
  46 my @fieldTypes;
  47
  48 my $db = $ARGV[0];
  49 my $dbd = $db . "D";
  50
  51 open (DBD, $dbd);
  52
  53 while (<DBD>) {
  54
  55   my $data = $_;
  56
  57   $rowLength = ord(substr($data, 0, 1)) + (256 * (ord(substr($data, 1, 1))));
  58   #print STDERR "Row length: $rowLength\n";
  59
  60   my $numColumns = ord substr($data, 2, 1);
  61   #print STDERR "Columns:    $numColumns\n";
  62
  63   my $namedata = substr($data, $startOfColumnTypes + ($numColumns * 7) - 2);
  64   @fieldNames = split(/\x00/, $namedata);
  65
  66   for (my $i = 0; $i < $numColumns; $i++) {
  67     $fieldTypes[$i] = substr($data, ($i * 7) + $startOfColumnTypes, 1);
  68     $fieldLengths[$i] = ord substr($data, ($i * 7) + $startOfColumnTypes + 1, 1);
  69   }
  70
  71 }
  72
  73 close(DBD);
  74
  75 print join("\t", @fieldNames) . "\n";
  76
  77 open (DB, $db);
  78
  79 my $blocks = 0;
  80
  81 while (read DB, my $data, $blockSize) {
  82   $blocks++;
  83   next if ($blocks == 1);
  84   my $maxRecords = POSIX::floor($blockSize / $rowLength);
  85   unless $ignoreIndexes {
  86     my $indexIndicator1 = ord substr($data, 1, 1);
  87     next if ($indexIndicator1 != 0);
  88     my $indexIndicator2 = ord substr($data, 7, 1);
  89     next if ($indexIndicator2 == 0);
  90   }
  91
  92   for (my $r = 0; $r < $maxRecords; $r++) {
  93
  94     my $pos = 0;
  95     my @field;
  96
  97     #print STDERR "Record " . ($r+1) . " of $maxRecords\n";
  98
  99     for (my $f = 0; $f < scalar(@fieldLengths); $f++) {
 100       $field[$f] = substr($data, $initialOffset + ($r * $rowLength) + $pos, $fieldLengths[$f]);
 101       if ($fieldTypes[$f] eq 'S') { $field[$f] = ord $field[$f]; }
 102       $pos += $fieldLengths[$f];
 103     }
 104
 105     if ($field[0] =~ m/[^\x00]/) {
 106       print join("\t", @field) . "\n";
 107     }
 108
 109   }
 110
 111 }
 112
 113 close(DB);