#!/usr/bin/perl use strict; use warnings; use XML::Twig; use YAML::Tiny; use Getopt::Long; my $conf = initialize(); my $marcxml = shift; open HOLDINGS, '>', $conf->{output}; open X, '>', $conf->{pubnotesfile}; open Z, '>', $conf->{privnotesfile}; my $holdings = {}; my %sample = ( x => {}, z => {} ); # hash of all subfields in all 852s my $copyid = 0; $| = 1; my $count = 0; my $total = `grep -c 'new; my $t = XML::Twig->new( twig_handlers => { record => \&record } ); $t->parsefile($marcxml); $yaml->[0] = \%sample; $yaml->write('holdings.sample'); print "\n\n"; sub record { my($t, $r)= @_; $holdings = { copies => [] }; my @dfields = $r->children('datafield'); for my $d (@dfields) { process_datafields($d) } for my $copy (@{$holdings->{copies}}) { print_reports($copy) } $r->purge; $count++; $percent = int(($count / $total) * 100); print "\r$percent% done ($count)" if ($percent != $prevper); $prevper = $percent; } sub process_datafields { my ($d) = @_; # get 903 if ($d->{'att'}->{'tag'} == 903) { my $s = $d->first_child('subfield'); $holdings->{id} = $s->text;; } # and holdings data if ($d->{'att'}->{'tag'} == $conf->{tag}) { push @{$holdings->{copies}}, { x =>[], z => [] }; $holdings->{copies}[-1]{copyid} = $copyid; my @subs = $d->children('subfield'); for my $s (@subs) { process_subs($s) } $copyid++; } } sub process_subs { my ($s) = @_; my $copy = $holdings->{copies}[-1]; my $code = $s->{'att'}->{'code'}; my $value = $s->text; if ($code eq $conf->{pubnotes} or $code eq $conf->{privnotes}) { push @{$copy->{$code}}, $value; my ($k,$v) = split /:/, $value; $sample{$code}{$k} = $v; } else { $copy->{$code} = $value; $sample{$code} = $value; } } sub print_reports { return unless defined $holdings->{id}; my ($copy) = @_; my $note = 0; for (@{$copy->{x}}) { print X join("\t", $holdings->{id}, $copy->{copyid}, $note, $_), "\n"; $note++; } $note = 0; for (@{$copy->{z}}) { print Z join("\t", $holdings->{id}, $copy->{copyid}, $note, $_), "\n"; $note++; } my @fields = (); for ( @{$conf->{fields}} ) { $copy->{$_} = '' unless defined $copy->{$_}; push @fields, $copy->{$_} } print HOLDINGS join("\t", $holdings->{id}, $copy->{copyid}, @fields), "\n"; } #------------------------------------------------ sub initialize { my $c = {}; my @missing = (); # set mode on existing filehandles binmode(STDIN, ':utf8'); my $rc = GetOptions( $c, 'fields|f=s', 'output|o=s', 'prefix|p=s', 'pubnotes|pub=i', 'pubnotesfile=s', 'privnotes|priv=s', 'privnotesfile=s', 'tag|t=i', 'help|h', ); show_help() unless $rc; show_help() if ($c->{help}); # set defaults $c->{prefix} = (defined $c->{prefix}) ? ($c->{prefix} . '.') : ''; $c->{tag} = $c->{tag} || '852'; $c->{output} = $c->{output} || join('', $c->{prefix}, "holdings.pg"); $c->{pubnotes} = $c->{pubnotes} || 'x'; $c->{pubnotesfile} = $c->{pubnotesfile} || join('', $c->{prefix}, "holdings.pubnote.pg"); $c->{privnotes} = $c->{privnotes} || 'z'; $c->{privnotesfile} = $c->{privnotesfile} || join('', $c->{prefix}, "holdings.privnote.pg"); my @keys = keys %{$c}; show_help() unless (@ARGV and @keys); for my $key ('fields', 'tag') { push @missing, $key unless $c->{$key} } if (@missing) { print "Required option: ", join(', ', @missing), " missing!\n"; show_help(); } # explode and validate fields string process_fields($c); return $c; } sub process_fields { my ($c) = @_; my @holdings_fields = split /,/, $c->{fields}; for (@holdings_fields) { die "Field names must be alphanumeric!\n" if /\W/; die "Field names must be single characters!\n" if /\w{2,}/; } $c->{fields} = \@holdings_fields; } sub show_help { print <