From dd820db3b17371409aea7755ac6b1fa40cfbfcb8 Mon Sep 17 00:00:00 2001 From: Shawn Boyette Date: Fri, 19 Dec 2008 16:58:42 +0000 Subject: [PATCH] completing cleanup and organization --- dump_inverse_select_marc.pl | 59 ---------------- oneliners/dump_inverse_select_marc.pl | 59 ++++++++++++++++ oneliners/select_marc.pl | 55 +++++++++++++++ oneliners/select_marc_as_text.pl | 60 ++++++++++++++++ oneliners/spit_csv.pl | 64 +++++++++++++++++ oneliners/spit_marc_903a_001.pl | 36 ++++++++++ oneliners/spit_sample.pl | 9 +++ oneliners/spit_tag_multiplication.pl | 74 ++++++++++++++++++++ oneliners/split_marc.pl | 48 +++++++++++++ oneliners/spot_check.pl | 44 ++++++++++++ oneliners/trim_marc_based_on_tag_subfield_value.pl | 51 ++++++++++++++ select_marc.pl | 55 --------------- select_marc_as_text.pl | 60 ---------------- spit_csv.pl | 64 ----------------- spit_marc_903a_001.pl | 36 ---------- spit_sample.pl | 9 --- spit_tag_multiplication.pl | 74 -------------------- split_marc.pl | 48 ------------- spot_check.pl | 44 ------------ trim_marc_based_on_tag_subfield_value.pl | 51 -------------- 20 files changed, 500 insertions(+), 500 deletions(-) delete mode 100755 dump_inverse_select_marc.pl create mode 100755 oneliners/dump_inverse_select_marc.pl create mode 100755 oneliners/select_marc.pl create mode 100755 oneliners/select_marc_as_text.pl create mode 100755 oneliners/spit_csv.pl create mode 100755 oneliners/spit_marc_903a_001.pl create mode 100755 oneliners/spit_sample.pl create mode 100755 oneliners/spit_tag_multiplication.pl create mode 100644 oneliners/split_marc.pl create mode 100644 oneliners/spot_check.pl create mode 100755 oneliners/trim_marc_based_on_tag_subfield_value.pl delete mode 100755 select_marc.pl delete mode 100755 select_marc_as_text.pl delete mode 100755 spit_csv.pl delete mode 100755 spit_marc_903a_001.pl delete mode 100755 spit_sample.pl delete mode 100755 spit_tag_multiplication.pl delete mode 100644 split_marc.pl delete mode 100644 spot_check.pl delete mode 100755 trim_marc_based_on_tag_subfield_value.pl diff --git a/dump_inverse_select_marc.pl b/dump_inverse_select_marc.pl deleted file mode 100755 index 249cad5..0000000 --- a/dump_inverse_select_marc.pl +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/perl -use open ':utf8'; -use MARC::Batch; -use MARC::Record; -use MARC::File::XML ( BinaryEncoding => 'utf-8' ); -use MARC::Field; - -my $format = $ARGV[0]; - -my $record_id_file = $ARGV[1]; -my %record_ids; - -open FILE, $record_id_file; -while (my $record_id = ) { - chomp($record_id); $record_ids{ $record_id } = 1; -} -close FILE; - -my $id_tag = $ARGV[2]; my $id_subfield = $ARGV[3]; - -binmode(STDOUT, ':utf8'); -binmode(STDIN, ':utf8'); - -my $M; - -foreach $argnum ( 4 .. $#ARGV ) { - - print STDERR "Processing " . $ARGV[$argnum] . "\n"; - - open $M, '<:utf8', $ARGV[$argnum]; - - my $batch = MARC::Batch->new('XML',$M); - $batch->strict_off(); - $batch->warnings_off(); - - my $count = 0; - - while ( my $record = $batch->next() ) { - - $count++; - - my $id = $record->field($id_tag); - if (!$id) { - print STDERR "ERROR: This record is missing a $id_tag field.\n" . $record->as_formatted() . "\n=====\n"; - next; - } - $id = $id->as_string($id_subfield); - - if (! defined $record_ids{ $id }) { - if ($format eq 'text') { - print STDOUT '=-' x 39 . "\n"; - print STDOUT $record->as_formatted() . "\n"; - } else { - print STDOUT $record->as_xml() . "\n"; - } - } - } - print STDERR "Processed $count records.\n"; -} diff --git a/oneliners/dump_inverse_select_marc.pl b/oneliners/dump_inverse_select_marc.pl new file mode 100755 index 0000000..249cad5 --- /dev/null +++ b/oneliners/dump_inverse_select_marc.pl @@ -0,0 +1,59 @@ +#!/usr/bin/perl +use open ':utf8'; +use MARC::Batch; +use MARC::Record; +use MARC::File::XML ( BinaryEncoding => 'utf-8' ); +use MARC::Field; + +my $format = $ARGV[0]; + +my $record_id_file = $ARGV[1]; +my %record_ids; + +open FILE, $record_id_file; +while (my $record_id = ) { + chomp($record_id); $record_ids{ $record_id } = 1; +} +close FILE; + +my $id_tag = $ARGV[2]; my $id_subfield = $ARGV[3]; + +binmode(STDOUT, ':utf8'); +binmode(STDIN, ':utf8'); + +my $M; + +foreach $argnum ( 4 .. $#ARGV ) { + + print STDERR "Processing " . $ARGV[$argnum] . "\n"; + + open $M, '<:utf8', $ARGV[$argnum]; + + my $batch = MARC::Batch->new('XML',$M); + $batch->strict_off(); + $batch->warnings_off(); + + my $count = 0; + + while ( my $record = $batch->next() ) { + + $count++; + + my $id = $record->field($id_tag); + if (!$id) { + print STDERR "ERROR: This record is missing a $id_tag field.\n" . $record->as_formatted() . "\n=====\n"; + next; + } + $id = $id->as_string($id_subfield); + + if (! defined $record_ids{ $id }) { + if ($format eq 'text') { + print STDOUT '=-' x 39 . "\n"; + print STDOUT $record->as_formatted() . "\n"; + } else { + print STDOUT $record->as_xml() . "\n"; + } + } + } + print STDERR "Processed $count records.\n"; +} diff --git a/oneliners/select_marc.pl b/oneliners/select_marc.pl new file mode 100755 index 0000000..cbd7fd9 --- /dev/null +++ b/oneliners/select_marc.pl @@ -0,0 +1,55 @@ +#!/usr/bin/perl +use open ':utf8'; +use MARC::Batch; +use MARC::Record; +use MARC::File::XML ( BinaryEncoding => 'utf-8' ); +use MARC::Field; + +my $record_id_file = $ARGV[0]; +my %record_ids; + +open FILE, $record_id_file; +while (my $record_id = ) { + chomp($record_id); $record_ids{ $record_id } = 1; +} +close FILE; + +my $id_tag = $ARGV[1]; my $id_subfield = $ARGV[2]; + +binmode(STDOUT, ':utf8'); +binmode(STDIN, ':utf8'); + +my $M; + +foreach $argnum ( 3 .. $#ARGV ) { + + print STDERR "Processing " . $ARGV[$argnum] . "\n"; + + open $M, '<:utf8', $ARGV[$argnum]; + + my $batch = MARC::Batch->new('XML',$M); + $batch->strict_off(); + $batch->warnings_off(); + + my $count = 0; + + while ( my $record = $batch->next() ) { + + $count++; + + my $id = $record->field($id_tag); + if (!$id) { + print STDERR "ERROR: This record is missing a $id_tag field.\n" . $record->as_formatted() . "\n=====\n"; + next; + } + $id = $id->as_string($id_subfield); + + if (defined $record_ids{ $id }) { + open FILE, ">$id"; + binmode(FILE, ':utf8'); + print FILE $record->as_xml(); + close FILE; + } + } + print STDERR "Processed $count records.\n"; +} diff --git a/oneliners/select_marc_as_text.pl b/oneliners/select_marc_as_text.pl new file mode 100755 index 0000000..6a28e88 --- /dev/null +++ b/oneliners/select_marc_as_text.pl @@ -0,0 +1,60 @@ +#!/usr/bin/perl +use open ':utf8'; +use MARC::Batch; +use MARC::Record; +use MARC::File::XML ( BinaryEncoding => 'utf-8' ); +use MARC::Field; + +my $inverse = $ARGV[0] eq "inverse"; + +my $record_id_file = $ARGV[1]; +my %record_ids; + +open FILE, $record_id_file; +while (my $record_id = ) { + chomp($record_id); $record_ids{ $record_id } = 1; +} +close FILE; + +my $id_tag = $ARGV[2]; my $id_subfield = $ARGV[3]; + +binmode(STDOUT, ':utf8'); +binmode(STDIN, ':utf8'); + +my $M; + +foreach $argnum ( 4 .. $#ARGV ) { + + print STDERR "Processing " . $ARGV[$argnum] . "\n"; + + open $M, '<:utf8', $ARGV[$argnum]; + + my $batch = MARC::Batch->new('XML',$M); + $batch->strict_off(); + $batch->warnings_off(); + + my $count = 0; + + while ( my $record = $batch->next() ) { + + $count++; + + my $id = $record->field($id_tag); + if (!$id) { + print STDERR "ERROR: This record is missing a $id_tag field.\n" . $record->as_formatted() . "\n=====\n"; + next; + } + $id = $id->as_string($id_subfield); + + if ( + ( ! $inverse && defined $record_ids{ $id } ) || + ( $inverse && ! defined $record_ids{ $id } ) + ) { + open FILE, ">$id.txt"; + binmode(FILE, ':utf8'); + print FILE $record->as_formatted(); + close FILE; + } + } + print STDERR "Processed $count records.\n"; +} diff --git a/oneliners/spit_csv.pl b/oneliners/spit_csv.pl new file mode 100755 index 0000000..8a02003 --- /dev/null +++ b/oneliners/spit_csv.pl @@ -0,0 +1,64 @@ +#!/usr/bin/perl +use MARC::Batch; +use MARC::File::XML ( BinaryEncoding => 'utf-8' ); +#use MARC::Field; +use Unicode::Normalize; + + +my @desired_tags_subfields = (); +foreach my $argnum ( 1 .. $#ARGV) { + print STDERR $ARGV[$argnum] . "\n"; + push @desired_tags_subfields, $ARGV[$argnum]; +} + +my $count = 0; + +binmode(STDOUT, ':utf8'); +binmode(STDIN, ':utf8'); + +foreach my $argnum ( 0 .. 0 ) { + + print STDERR "Processing " . $ARGV[$argnum] . "\n"; + + #my $M; + #open $M, '<:utf8', $ARGV[$argnum]; + #my $batch = MARC::Batch->new('XML',$M); + my $batch = MARC::Batch->new('XML',$ARGV[$argnum]); + + $batch->strict_off(); + $batch->warnings_off(); + + while ( my $record = $batch->next() ) { + + $count++; + + print STDERR "WARNINGS: Record $count : " . join(":",@warnings) . " : continuing...\n" if ( @warnings ); + + my $first = 1; + for (my $i = 0; $i < scalar(@desired_tags_subfields); $i+=2) { + my @tags = (); + if ($record->field($desired_tags_subfields[$i])) { + @tags = $record->field($desired_tags_subfields[$i]); + } + if (scalar(@tags)>1) { + die "Multiple $desired_tags_subfields[$i]\n"; + } elsif (scalar(@tags)==0) { + print STDERR "Record $count missing $desired_tags_subfields[$i]\n"; + goto END_OF_WHILE; + } + foreach my $f ( @tags ) { + if ($f->subfield($desired_tags_subfields[$i+1])) { + if ($first) { + $first = 0; + } else { + print STDOUT "\t"; + } + print STDOUT $f->subfield($desired_tags_subfields[$i+1]); + } + } + } + print STDOUT "\n"; + END_OF_WHILE: + } + print STDERR "Processed $count records\n"; +} diff --git a/oneliners/spit_marc_903a_001.pl b/oneliners/spit_marc_903a_001.pl new file mode 100755 index 0000000..b70b2d2 --- /dev/null +++ b/oneliners/spit_marc_903a_001.pl @@ -0,0 +1,36 @@ +#!/usr/bin/perl +use MARC::Batch; +use MARC::File::XML ( BinaryEncoding => 'utf-8' ); +use MARC::Field; +use Unicode::Normalize; + +my $count = 0; + +binmode(STDOUT, ':utf8'); +binmode(STDIN, ':utf8'); + +foreach my $argnum ( 0 .. $#ARGV ) { + + print STDERR "Processing " . $ARGV[$argnum] . "\n"; + + my $M; + open $M, '<:utf8', $ARGV[$argnum]; + my $batch = MARC::Batch->new('XML',$M); + + $batch->strict_off(); + $batch->warnings_off(); + + while ( my $record = $batch->next() ) { + + $count++; + + print STDERR "WARNINGS: Record $count : " . join(":",@warnings) . " : continuing...\n" if ( @warnings ); + + my @my001 = $record->field('001'); + if (scalar(@my001) == 0 || scalar(@my001) > 1) { die "Wrong number of 001 tags for record $count\n"; } + my @my903 = $record->field('903'); + if (scalar(@my903) == 0 || scalar(@my903) > 1) { die "Wrong number of 903 tags for record $count\n"; } + print $my903[0]->subfield('a') . "\t" . $my001[0]->as_string() . "\n" + } + print STDERR "Processed $count records\n"; +} diff --git a/oneliners/spit_sample.pl b/oneliners/spit_sample.pl new file mode 100755 index 0000000..69a72db --- /dev/null +++ b/oneliners/spit_sample.pl @@ -0,0 +1,9 @@ +#!/usr/bin/perl +my @lines = <>; + +foreach my $i ( 1..20 ) { + $length = scalar( @lines ); + $idx = int rand ($length); + print $lines[$idx]; + splice(@lines,$idx,1); +} diff --git a/oneliners/spit_tag_multiplication.pl b/oneliners/spit_tag_multiplication.pl new file mode 100755 index 0000000..7a8408a --- /dev/null +++ b/oneliners/spit_tag_multiplication.pl @@ -0,0 +1,74 @@ +#!/usr/bin/perl +use MARC::Batch; +use MARC::File::XML ( BinaryEncoding => 'utf-8' ); +#use MARC::Field; +use Unicode::Normalize; + +my $filetype = $ARGV[0]; # XML or USMARC +my $filename = $ARGV[1]; +my $tag1 = $ARGV[2]; # use NONE for no subfield, such as in 001 +my $subfield1 = $ARGV[3]; +my $tag2 = $ARGV[4]; +my $subfield2 = $ARGV[5]; + +die "required arguments: filename tag1 subfield1 tag2 subfield2\n" if (! ($filename && $tag1 && $subfield1 && $tag2 && $subfield2) ); + +my $count = 0; + +binmode(STDOUT, ':utf8'); +binmode(STDIN, ':utf8'); + +print STDERR "Processing $filename\n"; + +my $batch = MARC::Batch->new($filetype,$filename); $batch->strict_off(); $batch->warnings_off(); + +while ( my $record = $batch->next() ) { + + $count++; + + print STDERR "WARNINGS: Record $count : " . join(":",@warnings) . " : continuing...\n" if ( @warnings ); + + my @tags1 = (); if ($record->field($tag1)) { @tags1 = $record->field($tag1); } else { next; } + + foreach my $f1 ( @tags1 ) { + if ($subfield1 eq 'NONE' ) { + + #*********************************************************************************************************************** + + my @tags2 = (); if ($record->field($tag2)) { @tags2 = $record->field($tag2); } else { next; } + + foreach my $f2 ( @tags2 ) { + if ($f2->subfield($subfield2)) { + my @subfields2 = $f2->subfield($subfield2); + foreach my $s2 ( @subfields2 ) { + print $f1->as_string() . "\t$s2\n"; + } + } + } + + #*********************************************************************************************************************** + + } else { + if ($f1->subfield($subfield1)) { + my @subfields1 = $f1->subfield($subfield1); + foreach my $s1 ( @subfields1 ) { + #*********************************************************************************************************************** + + my @tags2 = (); if ($record->field($tag2)) { @tags2 = $record->field($tag2); } else { next; } + + foreach my $f2 ( @tags2 ) { + if ($f2->subfield($subfield2)) { + my @subfields2 = $f2->subfield($subfield2); + foreach my $s2 ( @subfields2 ) { + print "$s1\t$s2\n"; + } + } + } + + #*********************************************************************************************************************** + } + } + } + } +} +print STDERR "Processed $count records\n"; diff --git a/oneliners/split_marc.pl b/oneliners/split_marc.pl new file mode 100644 index 0000000..e6088b1 --- /dev/null +++ b/oneliners/split_marc.pl @@ -0,0 +1,48 @@ +#!/usr/bin/perl +use open ':utf8'; +use MARC::Batch; +use MARC::Record; +use MARC::File::XML ( BinaryEncoding => 'utf-8' ); +use MARC::Field; +use POSIX; +use Error qw/:try/; + +my $split_every = $ARGV[0]; +my $count = 0; + +binmode(STDOUT, ':utf8'); +binmode(STDIN, ':utf8'); +my $M; + +foreach $argnum ( 1 .. $#ARGV ) { + + open $M, '<:utf8', $ARGV[$argnum]; + + print STDERR "Processing " . $ARGV[$argnum] . "\n"; + + my $batch = MARC::Batch->new('XML', $M); + $batch->strict_off(); + $batch->warnings_off(); + + my $record; + while ( try { $record = $batch->next() } otherwise { $record = -1 } ) { + next if ($record == -1); + $count++; + + my $filename = $ARGV[$argnum] . ".split." . floor( $count / $split_every ) . ".xml"; + + open FILE, ">>$filename"; + binmode(FILE, ':utf8'); + print FILE $record->as_xml(); + close FILE; + + $record = undef; + + unless ($count % 1000) { + print STDERR "$count\r" + } + + } + print STDERR "Processed $count records.\n"; +} + diff --git a/oneliners/spot_check.pl b/oneliners/spot_check.pl new file mode 100644 index 0000000..6f8011b --- /dev/null +++ b/oneliners/spot_check.pl @@ -0,0 +1,44 @@ +#!/usr/bin/perl +use open ':utf8'; +use MARC::Batch; +use MARC::Record; +use MARC::File::XML ( BinaryEncoding => 'utf-8' ); +use MARC::Field; + +my $count = 0; + +binmode(STDOUT, ':utf8'); +binmode(STDIN, ':utf8'); + +my $M; + +foreach $argnum ( 0 .. $#ARGV ) { + + print STDERR "Processing " . $ARGV[$argnum] . "\n"; + + open $M, '<:utf8', $ARGV[$argnum]; + + my $batch = MARC::Batch->new('XML',$M); + $batch->strict_off(); + $batch->warnings_off(); + + my $last_successful_record; + + eval { + while ( my $record = $batch->next() ) { + + $count++; + + $last_successful_record = $record->as_xml(); + + print STDERR "WARNINGS: Record $count : " . join(":",@warnings) . " : continuing...\n" if ( @warnings ); + + unless ($count % 1000) { + print STDERR "$count\r" + } + + } + }; + print STDERR "Processed $count records. Last successful record = " . $last_successful_record . "\n"; + warn $@ if $@; +} diff --git a/oneliners/trim_marc_based_on_tag_subfield_value.pl b/oneliners/trim_marc_based_on_tag_subfield_value.pl new file mode 100755 index 0000000..b7ba249 --- /dev/null +++ b/oneliners/trim_marc_based_on_tag_subfield_value.pl @@ -0,0 +1,51 @@ +#!/usr/bin/perl +use open ':utf8'; +use MARC::Batch; +use MARC::File::XML ( BinaryEncoding => 'utf-8' ); +use MARC::Field; +use Unicode::Normalize; + + +my $tag_number = $ARGV[0]; +my $tag_subfield = $ARGV[1]; +my $tag_value = $ARGV[2]; + +my $count = 0; + +binmode(STDOUT, ':utf8'); +binmode(STDIN, ':utf8'); + +foreach $argnum ( 3 .. $#ARGV ) { + + print STDERR "Processing " . $ARGV[$argnum] . "\n"; + + my $M; + open $M, '<:utf8', $ARGV[$argnum]; + my $batch = MARC::Batch->new('XML',$M); + + $batch->strict_off(); + $batch->warnings_off(); + + while ( my $record = $batch->next() ) { + + $count++; + + print STDERR "WARNINGS: Record $count : " . join(":",@warnings) . " : continuing...\n" if ( @warnings ); + + my $keep_me = 0; + + my @tags = (); + my @tags; if ($record->field($tag_number)) { @tags = $record->field($tag_number); } + foreach my $f ( @tags ) { + if ($f->subfield($tag_subfield)) { + if ( $f->subfield($tag_subfield)=~ m/($tag_value)/i ) { $keep_me = 1; } + } + } + + if ($keep_me) { + print STDOUT $record->as_xml(); + } + + } + print STDERR "Processed $count records\n"; +} diff --git a/select_marc.pl b/select_marc.pl deleted file mode 100755 index cbd7fd9..0000000 --- a/select_marc.pl +++ /dev/null @@ -1,55 +0,0 @@ -#!/usr/bin/perl -use open ':utf8'; -use MARC::Batch; -use MARC::Record; -use MARC::File::XML ( BinaryEncoding => 'utf-8' ); -use MARC::Field; - -my $record_id_file = $ARGV[0]; -my %record_ids; - -open FILE, $record_id_file; -while (my $record_id = ) { - chomp($record_id); $record_ids{ $record_id } = 1; -} -close FILE; - -my $id_tag = $ARGV[1]; my $id_subfield = $ARGV[2]; - -binmode(STDOUT, ':utf8'); -binmode(STDIN, ':utf8'); - -my $M; - -foreach $argnum ( 3 .. $#ARGV ) { - - print STDERR "Processing " . $ARGV[$argnum] . "\n"; - - open $M, '<:utf8', $ARGV[$argnum]; - - my $batch = MARC::Batch->new('XML',$M); - $batch->strict_off(); - $batch->warnings_off(); - - my $count = 0; - - while ( my $record = $batch->next() ) { - - $count++; - - my $id = $record->field($id_tag); - if (!$id) { - print STDERR "ERROR: This record is missing a $id_tag field.\n" . $record->as_formatted() . "\n=====\n"; - next; - } - $id = $id->as_string($id_subfield); - - if (defined $record_ids{ $id }) { - open FILE, ">$id"; - binmode(FILE, ':utf8'); - print FILE $record->as_xml(); - close FILE; - } - } - print STDERR "Processed $count records.\n"; -} diff --git a/select_marc_as_text.pl b/select_marc_as_text.pl deleted file mode 100755 index 6a28e88..0000000 --- a/select_marc_as_text.pl +++ /dev/null @@ -1,60 +0,0 @@ -#!/usr/bin/perl -use open ':utf8'; -use MARC::Batch; -use MARC::Record; -use MARC::File::XML ( BinaryEncoding => 'utf-8' ); -use MARC::Field; - -my $inverse = $ARGV[0] eq "inverse"; - -my $record_id_file = $ARGV[1]; -my %record_ids; - -open FILE, $record_id_file; -while (my $record_id = ) { - chomp($record_id); $record_ids{ $record_id } = 1; -} -close FILE; - -my $id_tag = $ARGV[2]; my $id_subfield = $ARGV[3]; - -binmode(STDOUT, ':utf8'); -binmode(STDIN, ':utf8'); - -my $M; - -foreach $argnum ( 4 .. $#ARGV ) { - - print STDERR "Processing " . $ARGV[$argnum] . "\n"; - - open $M, '<:utf8', $ARGV[$argnum]; - - my $batch = MARC::Batch->new('XML',$M); - $batch->strict_off(); - $batch->warnings_off(); - - my $count = 0; - - while ( my $record = $batch->next() ) { - - $count++; - - my $id = $record->field($id_tag); - if (!$id) { - print STDERR "ERROR: This record is missing a $id_tag field.\n" . $record->as_formatted() . "\n=====\n"; - next; - } - $id = $id->as_string($id_subfield); - - if ( - ( ! $inverse && defined $record_ids{ $id } ) || - ( $inverse && ! defined $record_ids{ $id } ) - ) { - open FILE, ">$id.txt"; - binmode(FILE, ':utf8'); - print FILE $record->as_formatted(); - close FILE; - } - } - print STDERR "Processed $count records.\n"; -} diff --git a/spit_csv.pl b/spit_csv.pl deleted file mode 100755 index 8a02003..0000000 --- a/spit_csv.pl +++ /dev/null @@ -1,64 +0,0 @@ -#!/usr/bin/perl -use MARC::Batch; -use MARC::File::XML ( BinaryEncoding => 'utf-8' ); -#use MARC::Field; -use Unicode::Normalize; - - -my @desired_tags_subfields = (); -foreach my $argnum ( 1 .. $#ARGV) { - print STDERR $ARGV[$argnum] . "\n"; - push @desired_tags_subfields, $ARGV[$argnum]; -} - -my $count = 0; - -binmode(STDOUT, ':utf8'); -binmode(STDIN, ':utf8'); - -foreach my $argnum ( 0 .. 0 ) { - - print STDERR "Processing " . $ARGV[$argnum] . "\n"; - - #my $M; - #open $M, '<:utf8', $ARGV[$argnum]; - #my $batch = MARC::Batch->new('XML',$M); - my $batch = MARC::Batch->new('XML',$ARGV[$argnum]); - - $batch->strict_off(); - $batch->warnings_off(); - - while ( my $record = $batch->next() ) { - - $count++; - - print STDERR "WARNINGS: Record $count : " . join(":",@warnings) . " : continuing...\n" if ( @warnings ); - - my $first = 1; - for (my $i = 0; $i < scalar(@desired_tags_subfields); $i+=2) { - my @tags = (); - if ($record->field($desired_tags_subfields[$i])) { - @tags = $record->field($desired_tags_subfields[$i]); - } - if (scalar(@tags)>1) { - die "Multiple $desired_tags_subfields[$i]\n"; - } elsif (scalar(@tags)==0) { - print STDERR "Record $count missing $desired_tags_subfields[$i]\n"; - goto END_OF_WHILE; - } - foreach my $f ( @tags ) { - if ($f->subfield($desired_tags_subfields[$i+1])) { - if ($first) { - $first = 0; - } else { - print STDOUT "\t"; - } - print STDOUT $f->subfield($desired_tags_subfields[$i+1]); - } - } - } - print STDOUT "\n"; - END_OF_WHILE: - } - print STDERR "Processed $count records\n"; -} diff --git a/spit_marc_903a_001.pl b/spit_marc_903a_001.pl deleted file mode 100755 index b70b2d2..0000000 --- a/spit_marc_903a_001.pl +++ /dev/null @@ -1,36 +0,0 @@ -#!/usr/bin/perl -use MARC::Batch; -use MARC::File::XML ( BinaryEncoding => 'utf-8' ); -use MARC::Field; -use Unicode::Normalize; - -my $count = 0; - -binmode(STDOUT, ':utf8'); -binmode(STDIN, ':utf8'); - -foreach my $argnum ( 0 .. $#ARGV ) { - - print STDERR "Processing " . $ARGV[$argnum] . "\n"; - - my $M; - open $M, '<:utf8', $ARGV[$argnum]; - my $batch = MARC::Batch->new('XML',$M); - - $batch->strict_off(); - $batch->warnings_off(); - - while ( my $record = $batch->next() ) { - - $count++; - - print STDERR "WARNINGS: Record $count : " . join(":",@warnings) . " : continuing...\n" if ( @warnings ); - - my @my001 = $record->field('001'); - if (scalar(@my001) == 0 || scalar(@my001) > 1) { die "Wrong number of 001 tags for record $count\n"; } - my @my903 = $record->field('903'); - if (scalar(@my903) == 0 || scalar(@my903) > 1) { die "Wrong number of 903 tags for record $count\n"; } - print $my903[0]->subfield('a') . "\t" . $my001[0]->as_string() . "\n" - } - print STDERR "Processed $count records\n"; -} diff --git a/spit_sample.pl b/spit_sample.pl deleted file mode 100755 index 69a72db..0000000 --- a/spit_sample.pl +++ /dev/null @@ -1,9 +0,0 @@ -#!/usr/bin/perl -my @lines = <>; - -foreach my $i ( 1..20 ) { - $length = scalar( @lines ); - $idx = int rand ($length); - print $lines[$idx]; - splice(@lines,$idx,1); -} diff --git a/spit_tag_multiplication.pl b/spit_tag_multiplication.pl deleted file mode 100755 index 7a8408a..0000000 --- a/spit_tag_multiplication.pl +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/perl -use MARC::Batch; -use MARC::File::XML ( BinaryEncoding => 'utf-8' ); -#use MARC::Field; -use Unicode::Normalize; - -my $filetype = $ARGV[0]; # XML or USMARC -my $filename = $ARGV[1]; -my $tag1 = $ARGV[2]; # use NONE for no subfield, such as in 001 -my $subfield1 = $ARGV[3]; -my $tag2 = $ARGV[4]; -my $subfield2 = $ARGV[5]; - -die "required arguments: filename tag1 subfield1 tag2 subfield2\n" if (! ($filename && $tag1 && $subfield1 && $tag2 && $subfield2) ); - -my $count = 0; - -binmode(STDOUT, ':utf8'); -binmode(STDIN, ':utf8'); - -print STDERR "Processing $filename\n"; - -my $batch = MARC::Batch->new($filetype,$filename); $batch->strict_off(); $batch->warnings_off(); - -while ( my $record = $batch->next() ) { - - $count++; - - print STDERR "WARNINGS: Record $count : " . join(":",@warnings) . " : continuing...\n" if ( @warnings ); - - my @tags1 = (); if ($record->field($tag1)) { @tags1 = $record->field($tag1); } else { next; } - - foreach my $f1 ( @tags1 ) { - if ($subfield1 eq 'NONE' ) { - - #*********************************************************************************************************************** - - my @tags2 = (); if ($record->field($tag2)) { @tags2 = $record->field($tag2); } else { next; } - - foreach my $f2 ( @tags2 ) { - if ($f2->subfield($subfield2)) { - my @subfields2 = $f2->subfield($subfield2); - foreach my $s2 ( @subfields2 ) { - print $f1->as_string() . "\t$s2\n"; - } - } - } - - #*********************************************************************************************************************** - - } else { - if ($f1->subfield($subfield1)) { - my @subfields1 = $f1->subfield($subfield1); - foreach my $s1 ( @subfields1 ) { - #*********************************************************************************************************************** - - my @tags2 = (); if ($record->field($tag2)) { @tags2 = $record->field($tag2); } else { next; } - - foreach my $f2 ( @tags2 ) { - if ($f2->subfield($subfield2)) { - my @subfields2 = $f2->subfield($subfield2); - foreach my $s2 ( @subfields2 ) { - print "$s1\t$s2\n"; - } - } - } - - #*********************************************************************************************************************** - } - } - } - } -} -print STDERR "Processed $count records\n"; diff --git a/split_marc.pl b/split_marc.pl deleted file mode 100644 index e6088b1..0000000 --- a/split_marc.pl +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/perl -use open ':utf8'; -use MARC::Batch; -use MARC::Record; -use MARC::File::XML ( BinaryEncoding => 'utf-8' ); -use MARC::Field; -use POSIX; -use Error qw/:try/; - -my $split_every = $ARGV[0]; -my $count = 0; - -binmode(STDOUT, ':utf8'); -binmode(STDIN, ':utf8'); -my $M; - -foreach $argnum ( 1 .. $#ARGV ) { - - open $M, '<:utf8', $ARGV[$argnum]; - - print STDERR "Processing " . $ARGV[$argnum] . "\n"; - - my $batch = MARC::Batch->new('XML', $M); - $batch->strict_off(); - $batch->warnings_off(); - - my $record; - while ( try { $record = $batch->next() } otherwise { $record = -1 } ) { - next if ($record == -1); - $count++; - - my $filename = $ARGV[$argnum] . ".split." . floor( $count / $split_every ) . ".xml"; - - open FILE, ">>$filename"; - binmode(FILE, ':utf8'); - print FILE $record->as_xml(); - close FILE; - - $record = undef; - - unless ($count % 1000) { - print STDERR "$count\r" - } - - } - print STDERR "Processed $count records.\n"; -} - diff --git a/spot_check.pl b/spot_check.pl deleted file mode 100644 index 6f8011b..0000000 --- a/spot_check.pl +++ /dev/null @@ -1,44 +0,0 @@ -#!/usr/bin/perl -use open ':utf8'; -use MARC::Batch; -use MARC::Record; -use MARC::File::XML ( BinaryEncoding => 'utf-8' ); -use MARC::Field; - -my $count = 0; - -binmode(STDOUT, ':utf8'); -binmode(STDIN, ':utf8'); - -my $M; - -foreach $argnum ( 0 .. $#ARGV ) { - - print STDERR "Processing " . $ARGV[$argnum] . "\n"; - - open $M, '<:utf8', $ARGV[$argnum]; - - my $batch = MARC::Batch->new('XML',$M); - $batch->strict_off(); - $batch->warnings_off(); - - my $last_successful_record; - - eval { - while ( my $record = $batch->next() ) { - - $count++; - - $last_successful_record = $record->as_xml(); - - print STDERR "WARNINGS: Record $count : " . join(":",@warnings) . " : continuing...\n" if ( @warnings ); - - unless ($count % 1000) { - print STDERR "$count\r" - } - - } - }; - print STDERR "Processed $count records. Last successful record = " . $last_successful_record . "\n"; - warn $@ if $@; -} diff --git a/trim_marc_based_on_tag_subfield_value.pl b/trim_marc_based_on_tag_subfield_value.pl deleted file mode 100755 index b7ba249..0000000 --- a/trim_marc_based_on_tag_subfield_value.pl +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/perl -use open ':utf8'; -use MARC::Batch; -use MARC::File::XML ( BinaryEncoding => 'utf-8' ); -use MARC::Field; -use Unicode::Normalize; - - -my $tag_number = $ARGV[0]; -my $tag_subfield = $ARGV[1]; -my $tag_value = $ARGV[2]; - -my $count = 0; - -binmode(STDOUT, ':utf8'); -binmode(STDIN, ':utf8'); - -foreach $argnum ( 3 .. $#ARGV ) { - - print STDERR "Processing " . $ARGV[$argnum] . "\n"; - - my $M; - open $M, '<:utf8', $ARGV[$argnum]; - my $batch = MARC::Batch->new('XML',$M); - - $batch->strict_off(); - $batch->warnings_off(); - - while ( my $record = $batch->next() ) { - - $count++; - - print STDERR "WARNINGS: Record $count : " . join(":",@warnings) . " : continuing...\n" if ( @warnings ); - - my $keep_me = 0; - - my @tags = (); - my @tags; if ($record->field($tag_number)) { @tags = $record->field($tag_number); } - foreach my $f ( @tags ) { - if ($f->subfield($tag_subfield)) { - if ( $f->subfield($tag_subfield)=~ m/($tag_value)/i ) { $keep_me = 1; } - } - } - - if ($keep_me) { - print STDOUT $record->as_xml(); - } - - } - print STDERR "Processed $count records\n"; -} -- 1.7.2.5