2 # ./filter_out_mfhd.pl marcfile > out 2> err
3 # Looks for tcn_id.map2 containg lines like: 001_or_035value|eg_bib_id
4 # Spits out mfhd.tsv (eg_bib_id<tab>marcxml<tab>eg_bib_id) and mfhd.bad.mrc
5 # For marcfile, it expects a "title record", followed by one or more MFHD records. Rinse, repeat.
12 use Unicode::Normalize;
13 use MARC::File::XML ( BinaryEncoding => 'utf-8' );
16 my $batch = MARC::Batch->new( 'USMARC', @ARGV );
18 $batch->warnings_off();
28 open FILE, "tcn_id.map2";
29 while (my $line = <FILE>) {
30 if ($line =~ /^(.+)\|(.*)$/) {
36 open MFHD, ">mfhd.tsv";
37 open BADMFHD, ">mfhd.bad.mrc";
38 while ( my $marc = $batch->next ) {
39 $tag001 = $marc->field('001');
40 $tag035 = $marc->field('035');
41 $tag245 = $marc->field('245');
42 $tag852 = $marc->field('852');
43 $tag866 = $marc->field('866');
44 if ($tag852 || $tag866) {
46 my $field = MARC::Field->new(
48 $tcn2bid{$current_title}
49 ? $tcn2bid{$current_title}
50 : 'missing: ' . $current_title
52 $marc->insert_fields_ordered( $field );
53 if ($tcn2bid{$current_title}) {
54 my $string = $marc->as_xml_record();
56 $string =~ s/<\?xml version="1\.0" encoding="UTF-8"\?>//;
57 print MFHD $tcn2bid{$current_title} . "\t$string\t" . $tcn2bid{$current_title} . "\n";
59 print BADMFHD $marc->as_usmarc();
63 my $tcnv = $tag001->as_string();
64 if ($tcnv =~ /^\d*$/) {
65 print "fishy MFHD? with 001 $tcnv\n";
66 print STDERR "=== fishy MFHD? with 001 $tcnv\n";
67 print STDERR $marc->as_formatted() . "\n";
69 print "title with 001 $tcnv, eg bib id = $tcn2bid{$tcnv}\n";
70 $current_title = $tcnv;
74 my $tcnv = $tag035->as_string();
75 print "title with 035 $tcnv, eg bib id = $tcn2bid{$tcnv}\n";
76 $current_title = $tcnv;
80 $tcnv = $tag245->as_string();
82 print "fishy title? missing 001 and 035: $tcnv\n";
83 print STDERR "=== fishy title? missing 001 and 035: $tcnv\n";
84 print STDERR $marc->as_formatted() . "\n";
85 $current_title = "fishy: $tcnv";