From 4e4e2c556fb0a8299cd4b751f3538a5632ae41c9 Mon Sep 17 00:00:00 2001 From: Shawn Boyette Date: Mon, 10 Nov 2008 15:11:16 +0000 Subject: [PATCH] misc --- filter_record.ids | 121 ++++++++++++++++++++++++++++++++++++++++++++ marc-cleanup | 13 +++-- miker-filter_incoming.pl | 6 ++- miker-filter_incumbents.pl | 10 ++-- 4 files changed, 139 insertions(+), 11 deletions(-) create mode 100644 filter_record.ids diff --git a/filter_record.ids b/filter_record.ids new file mode 100644 index 0000000..99c6171 --- /dev/null +++ b/filter_record.ids @@ -0,0 +1,121 @@ +#!/usr/bin/perl +use warnings; +use strict; + +use Getopt::Long; +use Time::HiRes qw/time/; +use MARC::Record; +use MARC::File::XML ( BinaryEncoding => 'utf-8' ); + +# THIS FILE EXTRACTS NONMATCHING RECORDS + +# configuration hashref +my $conf = (); +#initialize($conf); + +my $idfile = shift; +my $marcfile = shift; +my $import = shift; +my $shelve = shift; + +my %id; + +open F, "<$idfile"; +while () { + chomp; + $id{$_} = 1; +} + +close F; + +my $M; my $I; my $S; +open $M, '<:utf8', $marcfile; +open $I, '>:utf8', $import; +open $S, '>:utf8', $shelve; + +my $starttime = time; +my $count = 0; +my $icount = 0; +my $scount = 0; +while (<$M>) { + /tag="903" ind1=" " ind2=" ">.*?(\d+){help}); + + $c->{'incoming-tag'} = 903; + $c->{'incoming-subfield'} = 'a'; + $c->{'incoming-matchfile'} = ''; + $c->{'incoming-nomatchfile'} = ''; + $c->{'incumbent-tag'} = 901; + $c->{'incumbent-subfield'} = 'a'; + $c->{'incumbent-matchfile'} = ''; + $c->{'incumbent-nomatchfile'} = ''; + my @keys = keys %{$c}; + show_help() unless (@ARGV and @keys); + for my $key ('renumber-from', 'tag', 'subfield', 'output') + { push @missing, $key unless $c->{$key} } + if (@missing) { + print "Required option: ", join(', ', @missing), " missing!\n"; + show_help(); + } + +} + + +=head2 show_help + +Display usage message when things go wrong + +=cut + +sub show_help { +print < -o Output filename + +Any number of input files may be specified; one output file will result. +HELP +exit 1; +} diff --git a/marc-cleanup b/marc-cleanup index b3e3b73..58b59a8 100755 --- a/marc-cleanup +++ b/marc-cleanup @@ -33,8 +33,9 @@ open my $EXMARC, '>:utf8', $conf->{exception}; # edit(), below my %commands = ( c => \&print_context, C => \&print_linecontext, - k => \&kill_line, o => \&show_original, + f => \&flip_lines, + k => \&kill_line, m => \&merge_lines, n => \&next_line, p => \&prev_line, @@ -189,6 +190,9 @@ sub edit { message($msg, 1); print_context(); + # stow original problem line + $conf->{origline} = $record[$recptr]; + while (1) { my $line = $term->readline('marc-cleanup>'); my @chunks = split /\s+/, $line; @@ -343,6 +347,7 @@ sub merge_lines { @record = (@a, @b); # move record pointer to previous line prev_line(); + print_linecontext(); return 0; } @@ -373,14 +378,14 @@ sub prev_line { return 0; } -sub commit_edit { return 1 } - sub show_original { my ($line_in) = @_; - print $OUT "\n$line_in\n"; + print $OUT "\n", $conf->{origline}, "\n"; return 0; } +sub commit_edit { return 1 } + sub help { print $OUT < 'utf-8' ); +# THIS FILE EXTRACTS NONMATCHING RECORDS + # configuration hashref my $conf = (); #initialize($conf); @@ -39,10 +41,10 @@ while (<$M>) { /tag="903" ind1=" " ind2=" ">.*?(\d+) 'utf-8' ); +use Time::HiRes qw/time/; +use MARC::Record; +use MARC::File::XML ( BinaryEncoding => 'utf-8' ); # configuration hashref my $conf = (); -initialize($conf); +#initialize($conf); my $idfile = shift; my $marcfile = shift; @@ -26,7 +26,7 @@ while () { close F; -my $M; +my $M; my $I; my $S; open $M, '<:utf8', $marcfile; open $I, '>:utf8', $import; open $S, '>:utf8', $shelve; -- 1.7.2.5