From fb1c3443e7fdaa5754c61fba439b145e38471bad Mon Sep 17 00:00:00 2001 From: Shawn Boyette Date: Wed, 14 Jan 2009 13:43:23 +0000 Subject: [PATCH] adding extract_holdings script --- extract_holdings | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ marc_cleanup | 4 +- 2 files changed, 91 insertions(+), 2 deletions(-) create mode 100755 extract_holdings diff --git a/extract_holdings b/extract_holdings new file mode 100755 index 0000000..8415c6a --- /dev/null +++ b/extract_holdings @@ -0,0 +1,89 @@ +#!/usr/bin/perl +use strict; +use warnings; + +use XML::Twig; +use YAML; +use JSON; + +my $marcxml = shift; + +open HOLDINGS, '>', "holdings"; +open X, '>', "holdings.x"; +open Z, '>', "holdings.z"; +open ALL852, '>', "holdings.all852"; + +my $holdings = {}; +my $copyid = 0; +my %all852 = ( x => {}, z => {} ); # hash of all subfields in all 852s + +my $t = XML::Twig->new( twig_handlers => { record => \&record } ); +$t->parsefile($marcxml); +#print ALL852 to_json(\%all852); +print ALL852 Dump(%all852); + +sub record { + my($t, $r)= @_; + $holdings = { copies => [] }; + + my @dfields = $r->children('datafield'); + for my $d (@dfields) { + process_datafields($d) + } + + for my $copy (@{$holdings->{copies}}) + { print_reports($copy) } + $r->purge; +} + +sub process_datafields { + my ($d) = @_; + # get 903 + if ($d->{'att'}->{'tag'} == 903) { + my $s = $d->first_child('subfield'); + $holdings->{id} = $s->text;; + } + + # and holdings data + if ($d->{'att'}->{'tag'} == 852) { + push @{$holdings->{copies}}, { x =>[], z => [] }; + $holdings->{copies}[-1]{copyid} = $copyid; + my @subs = $d->children('subfield'); + for my $s (@subs) + { process_subs($s) } + $copyid++; + } +} + +sub process_subs { + my ($s) = @_; + my $copy = $holdings->{copies}[-1]; + + my $code = $s->{'att'}->{'code'}; + my $value = $s->text; + + if ($code eq 'x' or $code eq 'z') { + push @{$copy->{$code}}, $value; + my ($k,$v) = split /:/, $value; + $all852{$code}{$k} = $v; + } else { + $copy->{$code} = $value; + $all852{$code} = $value; + } +} + +sub print_reports { + my ($copy) = @_; + my $note = 0; + for (@{$copy->{x}}) { + print X join("\t", $holdings->{id}, $copy->{copyid}, $note, $_), "\n"; + $note++; + } + $note = 0; + for (@{$copy->{z}}) { + print Z join("\t", $holdings->{id}, $copy->{copyid}, $note, $_), "\n"; + $note++; + } + print HOLDINGS join("\t", $holdings->{id}, $copy->{copyid}, + $copy->{b}, $copy->{p}, $copy->{h}, $copy->{9}), "\n"; +} diff --git a/marc_cleanup b/marc_cleanup index d1479a4..853de84 100755 --- a/marc_cleanup +++ b/marc_cleanup @@ -25,7 +25,7 @@ open my $NUMARC, '>:utf8', $conf->{output} open my $OLD2NEW, '>', 'old2new.map' if ($conf->{'renumber-from'} and $conf->{'original-subfield'}); my $EXMARC = 'EX'; - +print $NUMARC "\n"; my @record = (); # current record storage my %recmeta = (); # metadata about current record @@ -103,7 +103,7 @@ while ( buildrecord() ) { } write_record($NUMARC); } -#print $NUMARC "\n"; +print $NUMARC "\n"; print $OUT "\nDone. \n"; -- 1.7.2.5