use warnings;
use XML::Twig;
-use YAML;
-use JSON;
+use YAML::Tiny;
+use Getopt::Long;
-my $marcxml = shift || help();
-open HOLDINGS, '>', "holdings";
-open X, '>', "holdings.x";
-open Z, '>', "holdings.z";
-open ALL852, '>', "holdings.all852";
+my $conf = initialize();
+my $marcxml = shift;
-$| = 1;
+open HOLDINGS, '>', $conf->{output};
+open X, '>', $conf->{pubnotesfile};
+open Z, '>', $conf->{privnotesfile};
my $holdings = {};
+my %sample = ( x => {}, z => {} ); # hash of all subfields in all 852s
my $copyid = 0;
+
+
+$| = 1;
my $count = 0;
-my %all852 = ( x => {}, z => {} ); # hash of all subfields in all 852s
+my $total = `grep -c '<record' $marcxml`;
+my $percent = 0;
+my $prevper = -1;
+my $yaml = YAML::Tiny->new;
my $t = XML::Twig->new( twig_handlers => { record => \&record } );
$t->parsefile($marcxml);
-#print ALL852 to_json(\%all852);
-print ALL852 Dump(%all852);
+$yaml->[0] = \%sample;
+$yaml->write('holdings.sample');
+print "\n\n";
sub record {
my($t, $r)= @_;
$holdings = { copies => [] };
my @dfields = $r->children('datafield');
- for my $d (@dfields) {
- process_datafields($d)
- }
+ for my $d (@dfields)
+ { process_datafields($d) }
for my $copy (@{$holdings->{copies}})
{ print_reports($copy) }
$r->purge;
- $count++; print "\r$count";
+
+ $count++;
+ $percent = int(($count / $total) * 100);
+ print "\r$percent% done ($count)" if ($percent != $prevper);
+ $prevper = $percent;
}
sub process_datafields {
}
# and holdings data
- if ($d->{'att'}->{'tag'} == 852) {
+ if ($d->{'att'}->{'tag'} == $conf->{tag}) {
push @{$holdings->{copies}}, { x =>[], z => [] };
$holdings->{copies}[-1]{copyid} = $copyid;
my @subs = $d->children('subfield');
- for my $s (@subs)
+ for my $s (@subs)
{ process_subs($s) }
$copyid++;
}
my $code = $s->{'att'}->{'code'};
my $value = $s->text;
- if ($code eq 'x' or $code eq 'z') {
+ if ($code eq $conf->{pubnotes} or $code eq $conf->{privnotes}) {
push @{$copy->{$code}}, $value;
my ($k,$v) = split /:/, $value;
- $all852{$code}{$k} = $v;
- } else {
+ $sample{$code}{$k} = $v;
+ } else {
$copy->{$code} = $value;
- $all852{$code} = $value;
+ $sample{$code} = $value;
}
}
sub print_reports {
+ return unless defined $holdings->{id};
my ($copy) = @_;
my $note = 0;
for (@{$copy->{x}}) {
print Z join("\t", $holdings->{id}, $copy->{copyid}, $note, $_), "\n";
$note++;
}
- print HOLDINGS join("\t", $holdings->{id}, $copy->{copyid},
- $copy->{b}, $copy->{p}, $copy->{h}, $copy->{9}), "\n";
+ my @fields = ();
+ for ( @{$conf->{fields}} )
+ { $copy->{$_} = '' unless defined $copy->{$_}; push @fields, $copy->{$_} }
+ print HOLDINGS join("\t", $holdings->{id}, $copy->{copyid}, @fields), "\n";
}
-sub help {
+#------------------------------------------------
+
+
+sub initialize {
+ my $c = {};
+ my @missing = ();
+
+ # set mode on existing filehandles
+ binmode(STDIN, ':utf8');
+
+ my $rc = GetOptions( $c,
+ 'fields|f=s',
+ 'output|o=s',
+ 'prefix|p=s',
+ 'pubnotes|pub=i',
+ 'pubnotesfile=s',
+ 'privnotes|priv=s',
+ 'privnotesfile=s',
+ 'tag|t=i',
+ 'help|h',
+ );
+ show_help() unless $rc;
+ show_help() if ($c->{help});
+
+ # set defaults
+ $c->{prefix} = (defined $c->{prefix}) ? ($c->{prefix} . '.') : '';
+ $c->{tag} = $c->{tag} || '852';
+ $c->{output} =
+ $c->{output} || join('', $c->{prefix}, "holdings.pg");
+ $c->{pubnotes} = $c->{pubnotes} || 'x';
+ $c->{pubnotesfile} =
+ $c->{pubnotesfile} || join('', $c->{prefix}, "holdings.pubnote.pg");
+ $c->{privnotes} = $c->{privnotes} || 'z';
+ $c->{privnotesfile} =
+ $c->{privnotesfile} || join('', $c->{prefix}, "holdings.privnote.pg");
+
+ my @keys = keys %{$c};
+ show_help() unless (@ARGV and @keys);
+ for my $key ('fields', 'tag')
+ { push @missing, $key unless $c->{$key} }
+ if (@missing) {
+ print "Required option: ", join(', ', @missing), " missing!\n";
+ show_help();
+ }
+
+ # explode and validate fields string
+ process_fields($c);
+ return $c;
+}
+
+sub process_fields {
+ my ($c) = @_;
+ my @holdings_fields = split /,/, $c->{fields};
+ for (@holdings_fields) {
+ die "Field names must be alphanumeric!\n" if /\W/;
+ die "Field names must be single characters!\n"
+ if /\w{2,}/;
+ }
+ $c->{fields} = \@holdings_fields;
+}
+
+sub show_help {
print <<HELP;
-Usage is: extract_holdings MARCXML_FILE
+Usage is: extract_holdings MARCXML_FILE HOLDINGS_TAG
HELP
exit;
}