miker's script
authorDon McMorris <dmcmorris@esilibrary.com>
Tue, 5 Aug 2008 21:13:43 +0000 (21:13 +0000)
committerDon McMorris <dmcmorris@esilibrary.com>
Tue, 5 Aug 2008 21:13:43 +0000 (21:13 +0000)
miker-filter_incumbents.pl [new file with mode: 0644]

diff --git a/miker-filter_incumbents.pl b/miker-filter_incumbents.pl
new file mode 100644 (file)
index 0000000..c68b9c7
--- /dev/null
@@ -0,0 +1,48 @@
+#!/usr/bin/perl
+
+use Time::HiRes qw/time/;
+use MARC::Record;
+use MARC::File::XML ( BinaryEncoding => 'utf-8' );
+
+my $idfile = shift;
+my $marcfile = shift;
+my $import = shift;
+my $shelve = shift;
+
+my %id;
+
+open F, "<$idfile";
+while (<F>) {
+       chomp;
+       $id{$_} = 1;
+}
+
+close F;
+
+my $M;
+open $M, '<:utf8', $marcfile;
+open $I, '>:utf8', $import;
+open $S, '>:utf8', $shelve;
+
+my $starttime = time;
+my $count = 0;
+my $icount = 0;
+my $scount = 0;
+while (<$M>) {
+
+       /tag="901" ind1=" " ind2=" "><subfield code="a">(\d+)</;
+       if ( $id{$1} ) {
+               print $I $_;
+               $icount++;
+       } else {
+               print $S $_;
+               $scount++;
+       }
+       $count++;
+
+       unless ($count && $count % 100) {
+               print STDERR "\r$count\t(shelved: $scount, import: $icount)\t". $count / (time - $starttime);
+       }
+
+}
+