--exportbarcodesfile will use this file name for a barcode export instead
of the generic 'barcodes_export.txt'
+--item_type_subfield will make use of --holding_code and provide a breakdown of
+bib types by item types. If --branch_subfield is also provided, then the
+breakdown will be further subdivided by branch.
+
=back
=cut
my $uri_threshold = 1;
my $p_holding_code;
my $p_barcode_subfield;
+my $p_item_type_subfield;
+my $p_branch_subfield;
my $p_ils_name = '';
my $holding_threshold = 50;
my $p_ignore_filetype = 'false';
my $exportbarcodesfile;
my $ret = GetOptions(
- 'file:s' => \$file,
- 'uri_threshold:i' => \$uri_threshold,
- 'holding_code:s' => \$p_holding_code,
- 'barcode_subfield:s' => \$p_barcode_subfield,
- 'ignore_filetype:s' => \$p_ignore_filetype,
- 'ils:s' => \$p_ils_name,
- 'exportbarcodes:s' => \$exportbarcodes,
- 'exportbarcodesfile:s' => \$exportbarcodesfile,
- 'holding_threshold:s' => \$holding_threshold
+ 'file:s' => \$file,
+ 'uri_threshold:i' => \$uri_threshold,
+ 'holding_code:s' => \$p_holding_code,
+ 'barcode_subfield:s' => \$p_barcode_subfield,
+ 'item_type_subfield:s' => \$p_item_type_subfield,
+ 'branch_subfield:s' => \$p_branch_subfield,
+ 'ignore_filetype:s' => \$p_ignore_filetype,
+ 'ils:s' => \$p_ils_name,
+ 'exportbarcodes:s' => \$exportbarcodes,
+ 'exportbarcodesfile:s' => \$exportbarcodesfile,
+ 'holding_threshold:s' => \$holding_threshold
);
if ($exportbarcodesfile and !defined $exportbarcodes) { abort('You have to provide an ILS name if you want a barcode export file.'); }
if ($p_holding_code and length $p_holding_code != 3) { abort('Holdings codes must be three characters.'); }
if ($p_barcode_subfield) {
- if (!defined $p_holding_code) { abort('A barcode field can not be used without a holding code.'); }
- if (length $p_barcode_subfield != 1) { abort('Barcode subfields must be a single character code.'); }
+ if (!defined $p_holding_code) { abort('A barcode field can not be used without a holding code.'); }
+ if (length $p_barcode_subfield != 1) { abort('Barcode subfields must be a single character code.'); }
+}
+
+if ($p_item_type_subfield) {
+ if (!defined $p_holding_code) { abort('An item type field can not be used without a holding code.'); }
+ if (length $p_item_type_subfield != 1) { abort('Item type subfields must be a single character code.'); }
+}
+
+if ($p_branch_subfield) {
+ if (!defined $p_holding_code) { abort('A branch field can not be used without a holding code.'); }
+ if (length $p_branch_subfield != 1) { abort('Branch subfields must be a single character code.'); }
}
# ils name, holding tag, barcode subfield
my @ilses = (
- ['Mandarin','852','p'],
- ['Evergreen','852','p'],
- ['Polaris','852','p'],
- ['TLC','949','g'],
- ['Koha','952','p'],
- ['Sympony','999','i'],
+ ['Mandarin','852','p'],
+ ['Evergreen','852','p'],
+ ['Polaris','852','p'],
+ ['TLC','949','g'],
+ ['Koha','952','p'],
+ ['Sympony','999','i'],
['Destiny','852','p']
);
my @temp;
-if ($p_holding_code) {
- push @temp, $p_ils_name;
- push @temp, $p_holding_code;
- if ($p_barcode_subfield) { push @temp, lc $p_barcode_subfield; }
- push @ilses, [@temp];
+if (defined $p_holding_code && defined $p_ils_name && defined $p_barcode_subfield) {
+ push @temp, $p_ils_name;
+ push @temp, $p_holding_code;
+ if ($p_barcode_subfield) { push @temp, lc $p_barcode_subfield; }
+ push @ilses, [@temp];
}
#to do - add a check for exportbarcodes being in @ilses
-my $batch = MARC::Batch->new('USMARC', $file);
-$batch->strict_off();
my $filetype = `file $file`;
-if ($filetype =~ m/MARC21/ or $p_ignore_filetype eq 'true') { print "$filetype.\n" }
- else { abort("File is not MARC21."); }
+my $batch;
+if ($filetype =~ m/MARC21/) {
+ $batch = MARC::Batch->new( 'USMARC', $file );
+} else {
+ $batch = MARC::Batch->new( 'XML', $file );
+}
+$batch->strict_off();
my $i = 0;
my $uri_count = 0;
my @fields;
my @encodings;
my @types;
+my %bib_types_by_item_type;
my @holding_code_strings;
my %holding_counts;
my %barcode_counts;
foreach (@ilses) {
- $holding_counts{@$_[0]} = 0;
- $barcode_counts{@$_[0]} = 0;
+ $holding_counts{@$_[0]} = 0;
+ $barcode_counts{@$_[0]} = 0;
}
while ( my $record = $batch->next() ) {
$i++;
- #check holdings, bit time consuming but more future proof
- foreach (@ilses) {
- my $ils = @$_[0];
- my $hcode = @$_[1];
- my $barcode = @$_[2];
- my @holding_fields = $record->field($hcode);
+ #check holdings, bit time consuming but more future proof
+ foreach (@ilses) {
+ my $ils = @$_[0];
+ my $hcode = @$_[1];
+ my $barcode = @$_[2];
+ my @holding_fields = $record->field($hcode);
foreach my $hf (@holding_fields) {
- my @h;
- my $barcode_string = $hf->subfield($barcode);
- push @h, $ils;
- push @h, $barcode_string;
- push @holdings, [@h];
- }
- my $l = scalar @holding_fields;
- my $v = $holding_counts{$ils};
- if ($l) { $holding_counts{$ils} = $v + $l; }
- }
+ my @h;
+ my $barcode_string = $hf->subfield($barcode);
+ push @h, $ils;
+ push @h, $barcode_string;
+ push @holdings, [@h];
+ }
+ my $l = scalar @holding_fields;
+ my $v = $holding_counts{$ils};
+ if ($l) { $holding_counts{$ils} = $v + $l; }
+ }
#process 856s
- @fields = $record->field('856');
- my $enc = substr $record->leader(), 9, 1;
- push @encodings, $enc;
+ @fields = $record->field('856');
+ my $enc = substr $record->leader(), 9, 1;
+ push @encodings, $enc;
my $type = substr $record->leader(), 6, 1;
push @types, $type;
- foreach my $f (@fields) {
- my $u = $f->subfield('u');
+ # bib type by branch and by item type if item subfield (and optionally branch subfield) provided
+ if (defined $p_holding_code && defined $p_item_type_subfield) {
+ my @holding_fields = $record->field($p_holding_code);
+ foreach my $hf (@holding_fields) {
+ my $item_type = $hf->subfield($p_item_type_subfield) || '<missing item type subfield>';
+ my $branch = $p_branch_subfield ? $f->subfield($p_branch_subfield) || 'default';
+ if (! defined $bib_types_by_item_type{ $branch }) {
+ $bib_types_by_item_type{ $branch } = {};
+ }
+ if (! defined $bib_types_by_item_type{ $branch }{ $type }) {
+ $bib_types_by_item_type{ $branch }{ $type } = {};
+ }
+ if (! defined $bib_types_by_item_type{ $branch }{ $type }{ $item_type }) {
+ $bib_types_by_item_type{ $branch }{ $type }{ $item_type } = 0;
+ }
+ $bib_types_by_item_type{ $branch }{ $type }{ $item_type }++;
+ }
+ }
+ foreach my $f (@fields) {
+ my $u = $f->subfield('u');
my $n = $f->subfield('9');
if (defined $n) { $uri_sub9_count++; }
- if (defined $u) {
- $uri_count++;
- my $ind1 = $f->indicator('1');
- my $ind2 = $f->indicator('2');
- if ($ind1 eq '4') {
- if ($ind2 eq '0' or $ind2 eq '1') { $uri_valid_count++; }
- }
- my $ustring = lc $f->as_string('u');
- $ustring =~ s/http:\/\///;
+ if (defined $u) {
+ $uri_count++;
+ my $ind1 = $f->indicator('1');
+ my $ind2 = $f->indicator('2');
+ if ($ind1 eq '4') {
+ if ($ind2 eq '0' or $ind2 eq '1') { $uri_valid_count++; }
+ }
+ my $ustring = lc $f->as_string('u');
+ $ustring =~ s/http:\/\///;
$ustring =~ s/ftp:\/\///;
- $ustring =~ s/https:\/\///;
- $ustring =~ s/\/.*//;
- push @uris, $ustring;
- }
- }
+ $ustring =~ s/https:\/\///;
+ $ustring =~ s/\/.*//;
+ push @uris, $ustring;
+ }
+ }
#check for authority linking on 100s and 245s, if present may need to scrub them
- @fields = $record->field('100');
- foreach my $f (@fields) {
- my $t = $f->subfield('0');
- if (defined $t) { $title_sub0++; }
- }
+ @fields = $record->field('100');
+ foreach my $f (@fields) {
+ my $t = $f->subfield('0');
+ if (defined $t) { $title_sub0++; }
+ }
@fields = $record->field('245');
foreach my $f (@fields) {
my $t = $f->subfield('0');
}
foreach (@ilses) {
- my $ils = @$_[0];
+ my $ils = @$_[0];
my @temp_barcodes;
foreach my $h (@holdings) {
- my $temp_ils_name = @$h[0];
- if ($temp_ils_name eq $ils) { push @temp_barcodes, @$h[1]; }
+ my $temp_ils_name = @$h[0];
+ if ($temp_ils_name eq $ils) { push @temp_barcodes, @$h[1]; }
}
my @uniq_barcodes = uniq @temp_barcodes;;
$barcode_counts{$ils} = scalar @uniq_barcodes;
}
print "\n";
+if ($p_item_type_subfield) {
+ print "===== Branch / Leader 06 / Item Type\n";
+ foreach my $branch (keys %bib_types_by_item_type) {
+ foreach my $btype (keys %{ $bib_types_by_item_type{$branch} }) {
+ foreach my $itype (keys %{ $bib_types_by_item_type{$branch}{$btype} }) {
+ my $count = $bib_types_by_item_type{$branch}{$btype}{$itype};
+ print "$branch\t$btype (" . give_type($btype) . ")\t$itype\t$count\n";
+ }
+ }
+ }
+ print "\n";
+}
+
print "===== Summary of Select Field Counts\n";
print " $uri_count 856 fields with a subfield u\n";
print " $uri_valid_count 856 fields with a subfield u and valid indicators\n";
print "\n===== Holdings Analysis\n";
foreach my $key (keys %holding_counts) {
- my $c = $holding_counts{$key};
- if (((100/$i)*$c) >= $holding_threshold) { print " $key $holding_counts{$key} holdings in $i bibs with $barcode_counts{$key} unique barcodes\n"; }
+ my $c = $holding_counts{$key};
+ if (((100/$i)*$c) >= $holding_threshold) { print " $key $holding_counts{$key} holdings in $i bibs with $barcode_counts{$key} unique barcodes\n"; }
}
print "\n===== URI values are domains and filtered to only show those with more than $uri_threshold\n";
foreach my $key (keys %uri_counts) {
- my $value = $uri_counts{$key};
- if ($value > $uri_threshold) { print " $key $value\n"; }
+ my $value = $uri_counts{$key};
+ if ($value > $uri_threshold) { print " $key $value\n"; }
}
if ($exportbarcodes) {
open my $out_fh, '>:utf8', $outfile or abort('can not open output file for barcode list');
foreach my $h (@holdings) {
my $temp_ils_name = @$h[0];
- my $barcode = @$h[1];
- if (!defined $barcode) { $barcode = 'no barcode found'; }
+ my $barcode = @$h[1];
+ if (!defined $barcode) { $barcode = 'no barcode found'; }
if ($temp_ils_name eq $exportbarcodes) { print $out_fh "@$h[1]\n" }
}
close $out_fh;
}
sub give_type {
- my $type = shift;
+ my $type = shift;
if ($type eq 'a') { return 'Language material'; }
if ($type eq 'c') { return 'Notated Music'; }
if ($type eq 'd') { return 'Manuscript notated music'; }