7 use MARC::File::XML ( BinaryEncoding => 'utf-8' );
9 use Unicode::Normalize;
14 my $id_subfield = shift;
16 binmode(STDOUT, ':utf8');
17 binmode(STDIN, ':utf8');
19 for my $file (@ARGV) {
21 print STDERR "Processing $file\n";
23 open my $M, '<:utf8', $file;
25 my $batch = MARC::Batch->new('XML',$M);
27 $batch->warnings_off();
32 $record = $batch->next();
34 my $id = $record->field($id_tag);
36 print STDERR "ERROR: This record is missing a $id_tag field.\n"
37 . $record->as_formatted() . "\n=====\n";
40 $id = $id->as_string($id_subfield);
42 my $leader = $record->leader();
43 my $record_type = substr($leader,6,1);
44 my $bib_lvl = substr($leader,7,1);
46 my $my_008 = $record->field('008');
47 $my_008 = $my_008->as_string() if ($my_008);
48 my $date1 = substr($my_008,7,4) if ($my_008);
49 my $date2 = substr($my_008,11,4) if ($my_008);
51 if ( $record_type =~ /[gkroef]/ ) { # MAP, VIS
52 $item_form = substr($my_008,29,1) if ($my_008);
54 $item_form = substr($my_008,23,1) if ($my_008);
57 my $title = $record->field('245');
58 if ( $title ) { $title = $title->subfield('a'); }
62 if ($record->field('020')) { @isbns_020 = $record->field('020'); }
63 foreach my $f ( @isbns_020 ) {
64 if ($f->subfield('a')) {
65 if ( $f->subfield('a')=~/(\S+)/ ) {
71 if ($record->field('024')) { @isbns_024 = $record->field('024'); }
72 foreach my $f ( @isbns_024 ) {
73 if ($f->subfield('a')) {
74 if ( $f->subfield('a')=~/(\S+)/ ) {
80 my $issn = $record->field('022');
81 if ( $issn ) { $issn = $issn->subfield('a'); }
82 my $lccn = $record->field('010');
83 if ( $lccn ) { $lccn = $lccn->subfield('a'); }
85 if ($record->field('100'))
86 { $author = $record->field('100')->subfield('a'); }
88 if ($record->field('110')) {
89 $author = $record->field('110')->subfield('a');
93 if ($record->field('111')) {
94 $author = $record->field('111')->subfield('a');
97 my $desc = $record->field('300');
98 if ( $desc ) { $desc = $desc->subfield('a'); }
100 if (defined $desc and $desc =~ /(\d+)/) { $pages = $1; }
101 my $my_260 = $record->field('260');
102 my $publisher = $my_260->subfield('b') if ( $my_260 );
103 my $pubyear = $my_260->subfield('c') if ( $my_260 );
105 if ( $pubyear =~ /(\d\d\d\d)/ )
110 my $edition = $record->field('250');
111 if ( $edition ) { $edition = $edition->subfield('a'); }
114 $record_type = 'a' if ($record_type eq ' ');
116 $title = NFD($title); $title =~ s/[\x{80}-\x{ffff}]//go;
118 $title =~ s/\W+$//go;
121 $author = NFD($author); $author =~ s/[\x{80}-\x{ffff}]//go;
122 $author = lc($author);
123 $author =~ s/\W+$//go;
124 if ($author =~ /^(\w+)/) {
129 $publisher = NFD($publisher); $publisher =~ s/[\x{80}-\x{ffff}]//go;
130 $publisher = lc($publisher);
131 $publisher =~ s/\W+$//go;
132 if ($publisher =~ /^(\w+)/) {
137 # SPIT OUT FINGERPRINTS FROM THE "LOIS ALGORITHM"
138 # If we're not getting good matches, we may want to change this.
139 # The same thing goes for some other fields.
140 if ($item_form && ($date1 =~ /\d\d\d\d/)
141 && $record_type && $bib_lvl && $title) {
142 if ($which eq "primary") {
144 join("\t",$id,$item_form,$date1,$record_type,$bib_lvl,$title)
147 # case a : isbn and pages
148 if (scalar(@isbns)>0 && $pages) {
149 foreach my $isbn ( @isbns ) {
151 join("\t", $id, "case a", $item_form, $date1,
152 $record_type, $bib_lvl, $title,$isbn,$pages)
159 join("\t", $id, "case b", $item_form, $date1,
160 $record_type, $bib_lvl, $title,$edition), "\n";
164 print STDOUT join("\t", $id, "case c", $item_form, $date1,
165 $record_type, $bib_lvl, $title,$issn)
170 print STDOUT join("\t", $id, "case d", $item_form, $date1,
171 $record_type, $bib_lvl, $title, $lccn)
174 # case e : author, publisher, pubyear, pages
175 if ($author && $publisher && $pubyear && $pages) {
176 print STDOUT join("\t", $id, "case e", $item_form, $date1,
177 $record_type, $bib_lvl, $title, $author,
178 $publisher, $pubyear, $pages), "\n";
182 print STDERR "Record " . $id . " did not make the cut: ";
183 print STDERR "Missing item_form. " unless ($item_form);
184 print STDERR "Missing valid date1. " unless ($date1 =~ /\d\d\d\d/);
185 print STDERR "Missing record_type. " unless ($record_type);
186 print STDERR "Missing bib_lvl. " unless ($bib_lvl);
187 print STDERR "Missing title. " unless ($title);
191 print STDERR "Processed $count records\n";