X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=emig.d%2Fbin%2Fmig-bibstats;h=0161176adc8fb58ed0f20df3e04bf22947695439;hp=cfb382a4f8a8028289ed326ac41908e541d865c6;hb=1c61b119580d507b820b9d209ac56f7ad297cc7a;hpb=3aaa60eb13b14b0019b79fd2228cbc0a30e42d01 diff --git a/emig.d/bin/mig-bibstats b/emig.d/bin/mig-bibstats index cfb382a..0161176 100755 --- a/emig.d/bin/mig-bibstats +++ b/emig.d/bin/mig-bibstats @@ -24,6 +24,10 @@ with one of the ILSes so provide the name --exportbarcodesfile will use this file name for a barcode export instead of the generic 'barcodes_export.txt' +--item_type_subfield will make use of --holding_code and provide a breakdown of +bib types by item types. If --branch_subfield is also provided, then the +breakdown will be further subdivided by branch. + =back =cut @@ -60,6 +64,8 @@ my $file; my $uri_threshold = 1; my $p_holding_code; my $p_barcode_subfield; +my $p_item_type_subfield; +my $p_branch_subfield; my $p_ils_name = ''; my $holding_threshold = 50; my $p_ignore_filetype = 'false'; @@ -69,15 +75,17 @@ my $exportbarcodes; my $exportbarcodesfile; my $ret = GetOptions( - 'file:s' => \$file, - 'uri_threshold:i' => \$uri_threshold, - 'holding_code:s' => \$p_holding_code, - 'barcode_subfield:s' => \$p_barcode_subfield, - 'ignore_filetype:s' => \$p_ignore_filetype, - 'ils:s' => \$p_ils_name, - 'exportbarcodes:s' => \$exportbarcodes, - 'exportbarcodesfile:s' => \$exportbarcodesfile, - 'holding_threshold:s' => \$holding_threshold + 'file:s' => \$file, + 'uri_threshold:i' => \$uri_threshold, + 'holding_code:s' => \$p_holding_code, + 'barcode_subfield:s' => \$p_barcode_subfield, + 'item_type_subfield:s' => \$p_item_type_subfield, + 'branch_subfield:s' => \$p_branch_subfield, + 'ignore_filetype:s' => \$p_ignore_filetype, + 'ils:s' => \$p_ils_name, + 'exportbarcodes:s' => \$exportbarcodes, + 'exportbarcodesfile:s' => \$exportbarcodesfile, + 'holding_threshold:s' => \$holding_threshold ); if ($exportbarcodesfile and !defined $exportbarcodes) { abort('You have to provide an ILS name if you want a barcode export file.'); } @@ -85,36 +93,49 @@ if ($exportbarcodesfile and !defined $exportbarcodes) { abort('You have to provi if ($p_holding_code and length $p_holding_code != 3) { abort('Holdings codes must be three characters.'); } if ($p_barcode_subfield) { - if (!defined $p_holding_code) { abort('A barcode field can not be used without a holding code.'); } - if (length $p_barcode_subfield != 1) { abort('Barcode subfields must be a single character code.'); } + if (!defined $p_holding_code) { abort('A barcode field can not be used without a holding code.'); } + if (length $p_barcode_subfield != 1) { abort('Barcode subfields must be a single character code.'); } +} + +if ($p_item_type_subfield) { + if (!defined $p_holding_code) { abort('An item type field can not be used without a holding code.'); } + if (length $p_item_type_subfield != 1) { abort('Item type subfields must be a single character code.'); } +} + +if ($p_branch_subfield) { + if (!defined $p_holding_code) { abort('A branch field can not be used without a holding code.'); } + if (length $p_branch_subfield != 1) { abort('Branch subfields must be a single character code.'); } } # ils name, holding tag, barcode subfield my @ilses = ( - ['Mandarin','852','p'], - ['Evergreen','852','p'], - ['Polaris','852','p'], - ['TLC','949','g'], - ['Koha','952','p'], - ['Sympony','999','i'], + ['Mandarin','852','p'], + ['Evergreen','852','p'], + ['Polaris','852','p'], + ['TLC','949','g'], + ['Koha','952','p'], + ['Sympony','999','i'], ['Destiny','852','p'] ); my @temp; -if ($p_holding_code) { - push @temp, $p_ils_name; - push @temp, $p_holding_code; - if ($p_barcode_subfield) { push @temp, lc $p_barcode_subfield; } - push @ilses, [@temp]; +if (defined $p_holding_code && defined $p_ils_name && defined $p_barcode_subfield) { + push @temp, $p_ils_name; + push @temp, $p_holding_code; + if ($p_barcode_subfield) { push @temp, lc $p_barcode_subfield; } + push @ilses, [@temp]; } #to do - add a check for exportbarcodes being in @ilses -my $batch = MARC::Batch->new('USMARC', $file); -$batch->strict_off(); my $filetype = `file $file`; -if ($filetype =~ m/MARC21/ or $p_ignore_filetype eq 'true') { print "$filetype.\n" } - else { abort("File is not MARC21."); } +my $batch; +if ($filetype =~ m/MARC21/) { + $batch = MARC::Batch->new( 'USMARC', $file ); +} else { + $batch = MARC::Batch->new( 'XML', $file ); +} +$batch->strict_off(); my $i = 0; my $uri_count = 0; @@ -126,65 +147,84 @@ my @uris; my @fields; my @encodings; my @types; +my %bib_types_by_item_type; my @holding_code_strings; my %holding_counts; my %barcode_counts; foreach (@ilses) { - $holding_counts{@$_[0]} = 0; - $barcode_counts{@$_[0]} = 0; + $holding_counts{@$_[0]} = 0; + $barcode_counts{@$_[0]} = 0; } while ( my $record = $batch->next() ) { $i++; - #check holdings, bit time consuming but more future proof - foreach (@ilses) { - my $ils = @$_[0]; - my $hcode = @$_[1]; - my $barcode = @$_[2]; - my @holding_fields = $record->field($hcode); + #check holdings, bit time consuming but more future proof + foreach (@ilses) { + my $ils = @$_[0]; + my $hcode = @$_[1]; + my $barcode = @$_[2]; + my @holding_fields = $record->field($hcode); foreach my $hf (@holding_fields) { - my @h; - my $barcode_string = $hf->subfield($barcode); - push @h, $ils; - push @h, $barcode_string; - push @holdings, [@h]; - } - my $l = scalar @holding_fields; - my $v = $holding_counts{$ils}; - if ($l) { $holding_counts{$ils} = $v + $l; } - } + my @h; + my $barcode_string = $hf->subfield($barcode); + push @h, $ils; + push @h, $barcode_string; + push @holdings, [@h]; + } + my $l = scalar @holding_fields; + my $v = $holding_counts{$ils}; + if ($l) { $holding_counts{$ils} = $v + $l; } + } #process 856s - @fields = $record->field('856'); - my $enc = substr $record->leader(), 9, 1; - push @encodings, $enc; + @fields = $record->field('856'); + my $enc = substr $record->leader(), 9, 1; + push @encodings, $enc; my $type = substr $record->leader(), 6, 1; push @types, $type; - foreach my $f (@fields) { - my $u = $f->subfield('u'); + # bib type by branch and by item type if item subfield (and optionally branch subfield) provided + if (defined $p_holding_code && defined $p_item_type_subfield) { + my @holding_fields = $record->field($p_holding_code); + foreach my $hf (@holding_fields) { + my $item_type = $hf->subfield($p_item_type_subfield) || ''; + my $branch = $p_branch_subfield ? $hf->subfield($p_branch_subfield) : 'default'; + if (! defined $bib_types_by_item_type{ $branch }) { + $bib_types_by_item_type{ $branch } = {}; + } + if (! defined $bib_types_by_item_type{ $branch }{ $type }) { + $bib_types_by_item_type{ $branch }{ $type } = {}; + } + if (! defined $bib_types_by_item_type{ $branch }{ $type }{ $item_type }) { + $bib_types_by_item_type{ $branch }{ $type }{ $item_type } = 0; + } + $bib_types_by_item_type{ $branch }{ $type }{ $item_type }++; + } + } + foreach my $f (@fields) { + my $u = $f->subfield('u'); my $n = $f->subfield('9'); if (defined $n) { $uri_sub9_count++; } - if (defined $u) { - $uri_count++; - my $ind1 = $f->indicator('1'); - my $ind2 = $f->indicator('2'); - if ($ind1 eq '4') { - if ($ind2 eq '0' or $ind2 eq '1') { $uri_valid_count++; } - } - my $ustring = lc $f->as_string('u'); - $ustring =~ s/http:\/\///; + if (defined $u) { + $uri_count++; + my $ind1 = $f->indicator('1'); + my $ind2 = $f->indicator('2'); + if ($ind1 eq '4') { + if ($ind2 eq '0' or $ind2 eq '1') { $uri_valid_count++; } + } + my $ustring = lc $f->as_string('u'); + $ustring =~ s/http:\/\///; $ustring =~ s/ftp:\/\///; - $ustring =~ s/https:\/\///; - $ustring =~ s/\/.*//; - push @uris, $ustring; - } - } + $ustring =~ s/https:\/\///; + $ustring =~ s/\/.*//; + push @uris, $ustring; + } + } #check for authority linking on 100s and 245s, if present may need to scrub them - @fields = $record->field('100'); - foreach my $f (@fields) { - my $t = $f->subfield('0'); - if (defined $t) { $title_sub0++; } - } + @fields = $record->field('100'); + foreach my $f (@fields) { + my $t = $f->subfield('0'); + if (defined $t) { $title_sub0++; } + } @fields = $record->field('245'); foreach my $f (@fields) { my $t = $f->subfield('0'); @@ -194,11 +234,11 @@ while ( my $record = $batch->next() ) { } foreach (@ilses) { - my $ils = @$_[0]; + my $ils = @$_[0]; my @temp_barcodes; foreach my $h (@holdings) { - my $temp_ils_name = @$h[0]; - if ($temp_ils_name eq $ils) { push @temp_barcodes, @$h[1]; } + my $temp_ils_name = @$h[0]; + if ($temp_ils_name eq $ils) { push @temp_barcodes, @$h[1]; } } my @uniq_barcodes = uniq @temp_barcodes;; $barcode_counts{$ils} = scalar @uniq_barcodes; @@ -231,6 +271,19 @@ foreach my $key (keys %type_counts) { } print "\n"; +if ($p_item_type_subfield) { + print "===== Branch / Leader 06 / Item Type\n"; + foreach my $branch (keys %bib_types_by_item_type) { + foreach my $btype (keys %{ $bib_types_by_item_type{$branch} }) { + foreach my $itype (keys %{ $bib_types_by_item_type{$branch}{$btype} }) { + my $count = $bib_types_by_item_type{$branch}{$btype}{$itype}; + print "$branch\t$btype (" . give_type($btype) . ")\t$itype\t$count\n"; + } + } + } + print "\n"; +} + print "===== Summary of Select Field Counts\n"; print " $uri_count 856 fields with a subfield u\n"; print " $uri_valid_count 856 fields with a subfield u and valid indicators\n"; @@ -240,14 +293,14 @@ print " $author_sub0 245 fields have a subfield 0\n"; print "\n===== Holdings Analysis\n"; foreach my $key (keys %holding_counts) { - my $c = $holding_counts{$key}; - if (((100/$i)*$c) >= $holding_threshold) { print " $key $holding_counts{$key} holdings in $i bibs with $barcode_counts{$key} unique barcodes\n"; } + my $c = $holding_counts{$key}; + if (((100/$i)*$c) >= $holding_threshold) { print " $key $holding_counts{$key} holdings in $i bibs with $barcode_counts{$key} unique barcodes\n"; } } print "\n===== URI values are domains and filtered to only show those with more than $uri_threshold\n"; foreach my $key (keys %uri_counts) { - my $value = $uri_counts{$key}; - if ($value > $uri_threshold) { print " $key $value\n"; } + my $value = $uri_counts{$key}; + if ($value > $uri_threshold) { print " $key $value\n"; } } if ($exportbarcodes) { @@ -257,8 +310,8 @@ if ($exportbarcodes) { open my $out_fh, '>:utf8', $outfile or abort('can not open output file for barcode list'); foreach my $h (@holdings) { my $temp_ils_name = @$h[0]; - my $barcode = @$h[1]; - if (!defined $barcode) { $barcode = 'no barcode found'; } + my $barcode = @$h[1]; + if (!defined $barcode) { $barcode = 'no barcode found'; } if ($temp_ils_name eq $exportbarcodes) { print $out_fh "@$h[1]\n" } } close $out_fh; @@ -275,7 +328,7 @@ sub abort { } sub give_type { - my $type = shift; + my $type = shift; if ($type eq 'a') { return 'Language material'; } if ($type eq 'c') { return 'Notated Music'; } if ($type eq 'd') { return 'Manuscript notated music'; }