2 # -*- coding: iso-8859-15 -*-
3 ###############################################################################
6 =item B<bibstats> --file foo.mrc
8 Reads through a marc file to generate statistical information about the file
11 --uri_threshold defaults to 1, only shows URI values with more than that
18 ###############################################################################
25 HOME PGHOST PGPORT PGUSER PGDATABASE MIGSCHEMA
26 MIGBASEWORKDIR MIGBASEGITDIR MIGGITDIR MIGWORKDIR
37 my $mig_bin = "$FindBin::Bin/";
38 use lib "$FindBin::Bin/";
40 use open ':encoding(utf8)';
42 pod2usage(-verbose => 2) if defined $ARGV[0] && $ARGV[0] eq '--help';
43 pod2usage(-verbose => 1) if ! $ARGV[1];
46 my $uri_threshold = 1;
50 'uri_threshold:i' => \$uri_threshold
53 my $batch = MARC::Batch->new('USMARC', $file);
55 my $filetype = `file $file`;
56 if ($filetype =~ m/MARC21/) { print "$filetype.\n" }
57 else { abort("File is not MARC21."); }
61 my $uri_valid_count = 0;
62 my $uri_sub9_count = 0;
68 while ( my $record = $batch->next() ) {
70 @fields = $record->field('856');
71 my $ldr = substr $record->leader(), 9, 1;
73 foreach my $f (@fields) {
74 my $u = $f->subfield('u');
75 my $n = $f->subfield('9');
76 if (defined $n) { $uri_sub9_count++; }
79 my $ind1 = $f->indicator('1');
80 my $ind2 = $f->indicator('2');
82 if ($ind2 eq '0' or $ind2 eq '1') { $uri_valid_count++; }
84 my $ustring = lc $f->as_string('u');
85 $ustring =~ s/http:\/\///;
86 $ustring =~ s/ftp:\/\///;
87 $ustring =~ s/https:\/\///;
92 @fields = $record->field('100');
93 foreach my $f (@fields) {
94 my $t = $f->subfield('0');
95 if (defined $t) { $title_sub0++; }
97 @fields = $record->field('245');
98 foreach my $f (@fields) {
99 my $t = $f->subfield('0');
100 if (defined $t) { $author_sub0++; }
102 if(($i % 1000) == 0) { print "Processing bib $i.\n"; }
106 $uri_counts{$_}++ for @uris;
109 $code_counts{$_}++ for @codes;
111 print "\n$filetype\n";
112 print "$i bibs read in file\n\n";
115 foreach my $key (keys %code_counts) {
116 my $value = $code_counts{$key};
117 print "=== $key $value\n";
121 print "$uri_count 856 fields with a subfield u\n";
122 print "$uri_valid_count 856 fields with a subfield u and valid indicators\n";
123 print "$uri_sub9_count 856 fields have subfield 9s\n";
124 print "$title_sub0 100 fields have a subfield 0\n";
125 print "$author_sub0 245 fields have a subfield 0\n";
127 print "\nURI values are domains and filtered to only show those with more than $uri_threshold\n";
128 foreach my $key (keys %uri_counts) {
129 my $value = $uri_counts{$key};
130 if ($value > $uri_threshold) { print "=== $key $value\n"; }
135 ########### functions
139 print STDERR "$0: $msg", "\n";