4 use MARC::File::XML ( BinaryEncoding => 'utf-8' );
6 use Unicode::Normalize;
10 my $id_tag = $ARGV[1]; my $id_subfield = $ARGV[2];
12 binmode(STDOUT, ':utf8');
13 binmode(STDIN, ':utf8');
15 foreach $argnum ( 3 .. $#ARGV ) {
17 print STDERR "Processing " . $ARGV[$argnum] . "\n";
19 my $batch = MARC::Batch->new('XML',$ARGV[$argnum]);
21 $batch->warnings_off();
27 $record = $batch->next();
29 my $id = $record->field($id_tag);
31 print STDERR "ERROR: This record is missing a $id_tag field.\n" . $record->as_formatted() . "\n=====\n";
34 $id = $id->as_string($id_subfield);
35 print STDERR "WARNINGS: Record id " . $id . " : " . join(":",@warnings) . " : continuing...\n" if ( @warnings );
37 my $leader = $record->leader();
38 my $record_type = substr($leader,6,1);
39 my $bib_lvl = substr($leader,7,1);
41 my $my_008 = $record->field('008');
42 $my_008 = $my_008->as_string() if ($my_008);
43 my $date1 = substr($my_008,7,4) if ($my_008);
44 my $date2 = substr($my_008,11,4) if ($my_008);
46 if ( $record_type =~ /[gkroef]/ ) { # MAP, VIS
47 $item_form = substr($my_008,29,1) if ($my_008);
49 $item_form = substr($my_008,23,1) if ($my_008);
52 my $title = $record->field('245');
53 if ( $title ) { $title = $title->subfield('a'); }
56 my @isbns_020; if ($record->field('020')) { @isbns_020 = $record->field('020'); }
57 foreach my $f ( @isbns_020 ) { if ($f->subfield('a')) { if ( $f->subfield('a')=~/(\S+)/ ) { push @isbns, $1; } } }
58 my @isbns_024; if ($record->field('024')) { @isbns_024 = $record->field('024'); }
59 foreach my $f ( @isbns_024 ) { if ($f->subfield('a')) { if ( $f->subfield('a')=~/(\S+)/ ) { push @isbns, $1; } } }
61 my $issn = $record->field('022');
62 if ( $issn ) { $issn = $issn->subfield('a'); }
63 my $lccn = $record->field('010');
64 if ( $lccn ) { $lccn = $lccn->subfield('a'); }
66 if ($record->field('100')) { $author = $record->field('100')->subfield('a'); }
68 if ($record->field('110')) { $author = $record->field('110')->subfield('a'); }
71 if ($record->field('111')) { $author = $record->field('111')->subfield('a'); }
73 my $desc = $record->field('300');
74 if ( $desc ) { $desc = $desc->subfield('a'); }
76 if ($desc =~ /(\d+)/) { $pages = $1; }
77 my $my_260 = $record->field('260');
78 my $publisher = $my_260->subfield('b') if ( $my_260 );
79 my $pubyear = $my_260->subfield('c') if ( $my_260 );
81 if ( $pubyear =~ /(\d\d\d\d)/ ) { $pubyear = $1; } else { $pubyear = ''; }
83 my $edition = $record->field('250');
84 if ( $edition ) { $edition = $edition->subfield('a'); }
87 if ($record_type == ' ') { $record_type = 'a'; }
89 $title = NFD($title); $title =~ s/[\x{80}-\x{ffff}]//go;
94 $author = NFD($author); $author =~ s/[\x{80}-\x{ffff}]//go;
95 $author = lc($author);
96 $author =~ s/\W+$//go;
97 if ($author =~ /^(\w+)/) {
102 $publisher = NFD($publisher); $publisher =~ s/[\x{80}-\x{ffff}]//go;
103 $publisher = lc($publisher);
104 $publisher =~ s/\W+$//go;
105 if ($publisher =~ /^(\w+)/) {
110 # SPIT OUT FINGERPRINTS FROM THE "LOIS ALGORITHM"
111 # If we're not getting good matches, we may want to change this. The same thing goes for some other fields.
112 if ($item_form && ($date1 =~ /\d\d\d\d/) && $record_type && $bib_lvl && $title) {
114 if ($which eq "primary") {
115 print STDOUT join("\t",$id,$item_form,$date1,$record_type,$bib_lvl,$title) . "\n";
118 # case a : isbn and pages
119 if (scalar(@isbns)>0 && $pages) {
120 foreach my $isbn ( @isbns ) {
121 print STDOUT join("\t",$id,"case a",$item_form,$date1,$record_type,$bib_lvl,$title,$isbn,$pages) . "\n";
127 print STDOUT join("\t",$id,"case b",$item_form,$date1,$record_type,$bib_lvl,$title,$edition) . "\n";
132 print STDOUT join("\t",$id,"case c",$item_form,$date1,$record_type,$bib_lvl,$title,$issn) . "\n";
137 print STDOUT join("\t",$id,"case d",$item_form,$date1,$record_type,$bib_lvl,$title,$lccn) . "\n";
140 # case e : author, publisher, pubyear, pages
141 if ($author && $publisher && $pubyear && $pages) {
142 print STDOUT join("\t",$id,"case e",$item_form,$date1,$record_type,$bib_lvl,$title,$author,$publisher,$pubyear,$pages) . "\n";
148 print STDERR "Record " . $id . " did not make the cut: ";
149 print STDERR "Missing item_form. " unless ($item_form);
150 print STDERR "Missing valid date1. " unless ($date1 =~ /\d\d\d\d/);
151 print STDERR "Missing record_type. " unless ($record_type);
152 print STDERR "Missing bib_lvl. " unless ($bib_lvl);
153 print STDERR "Missing title. " unless ($title);
158 print STDERR "Trapped exception for MARC::Batch->next on record $count: $@\n" if ($@);
160 print STDERR "Processed $count records\n";