use warnings;
use XML::Twig;
-use YAML;
-use JSON;
+use YAML::Tiny;
+use Getopt::Long;
+use Equinox::Migration::SubfieldMapper;
-my $marcxml = shift || help();
+$| = 1;
-open HOLDINGS, '>', "holdings";
-open X, '>', "holdings.x";
-open Z, '>', "holdings.z";
-open ALL852, '>', "holdings.all852";
-
-my $holdings = {};
+my $c = initialize();
+my $marcxml = shift;
my $copyid = 0;
-my %all852 = ( x => {}, z => {} ); # hash of all subfields in all 852s
+my $holdings;
+
+my $count = 0;
+my $total = `grep -c '<record' $marcxml`;
+my $percent = 0;
+my $prevper = -1;
+my $yaml = YAML::Tiny->new;
my $t = XML::Twig->new( twig_handlers => { record => \&record } );
$t->parsefile($marcxml);
-#print ALL852 to_json(\%all852);
-print ALL852 Dump(%all852);
+write_sample_fieds();
sub record {
my($t, $r)= @_;
- $holdings = { copies => [] };
+ $holdings = {};
my @dfields = $r->children('datafield');
for my $d (@dfields) {
- process_datafields($d)
+ process_datafields($d);
}
-
- for my $copy (@{$holdings->{copies}})
- { print_reports($copy) }
+ write_data_out();
$r->purge;
+
+ $count++;
+ $percent = int(($count / $total) * 100);
+ print "\r$percent% done ($count)";# if ($percent != $prevper);
+ $prevper = $percent;
}
sub process_datafields {
my ($d) = @_;
- # get 903
- if ($d->{'att'}->{'tag'} == 903) {
+ my $tag = $d->{'att'}->{'tag'};
+
+ if ($tag == 903) {
my $s = $d->first_child('subfield');
$holdings->{id} = $s->text;;
- }
-
- # and holdings data
- if ($d->{'att'}->{'tag'} == 852) {
- push @{$holdings->{copies}}, { x =>[], z => [] };
- $holdings->{copies}[-1]{copyid} = $copyid;
+ } elsif ($c->{map}->has($tag)) {
+ push @{$holdings->{copies}}, { tag => $tag };
my @subs = $d->children('subfield');
- for my $s (@subs)
- { process_subs($s) }
- $copyid++;
+ for my $sub (@subs)
+ { process_subs($tag,$sub) }
}
}
sub process_subs {
- my ($s) = @_;
+ my ($tag, $sub) = @_;
+ my $code = $sub->{'att'}->{'code'};
+
+ unless ($c->{map}->has($tag, $code)) {
+ # this is a subfield code we don't have mapped. report on it if this is a sample tag
+ push @{$c->{sample}{$tag}}, $code if defined $c->{sample}{tag};
+ return;
+ }
+
my $copy = $holdings->{copies}[-1];
+ my $field = $c->{map}->field($tag, $code);
+ if ($c->{map}->mod($field) eq 'multi') {
+ my $name = $tag . $code;
+ push @{$copy->{multi}{$name}}, $sub->text;
+ } else {
+ $copy->{uni}{$code} = $sub->text;
+ }
+}
- my $code = $s->{'att'}->{'code'};
- my $value = $s->text;
- if ($code eq 'x' or $code eq 'z') {
- push @{$copy->{$code}}, $value;
- my ($k,$v) = split /:/, $value;
- $all852{$code}{$k} = $v;
- } else {
- $copy->{$code} = $value;
- $all852{$code} = $value;
+#------------------------------------------------
+
+sub write_data_out {
+ my $i = 0;
+
+ for my $copy (@{$holdings->{copies}}) {
+ print HOLDINGS $holdings->{id}, "\t$i\t", $copy->{tag};
+ for ( sort keys %{ $c->{map}{fields} } ) {
+ next if ($c->{map}->mod($_) =~ /^bib/);
+ if (defined $copy->{uni}{ $c->{map}{fields}{$_}{sub} }) {
+ print HOLDINGS "\t", $copy->{uni}{ $c->{map}{fields}{$_}{sub} };
+ } else {
+ print HOLDINGS "\t";
+ }
+ }
+ print HOLDINGS "\n";
+
+ for my $m (sort keys %{$copy->{multi}}) {
+ my $fh = $c->{files}{multi}{$m};
+ print $fh join("\t", $holdings->{id}, $i, @{$copy->{multi}{$m}}), "\n";
+ }
+ $i++;
}
}
-sub print_reports {
- my ($copy) = @_;
- my $note = 0;
- for (@{$copy->{x}}) {
- print X join("\t", $holdings->{id}, $copy->{copyid}, $note, $_), "\n";
- $note++;
+sub write_sample_fields {
+}
+
+#------------------------------------------------
+
+
+sub initialize {
+ my $c = {};
+ my @missing = ();
+
+ # set mode on existing filehandles
+ binmode(STDIN, ':utf8');
+
+ my $rc = GetOptions( $c,
+ 'sample|s=s',
+ 'map|m=s',
+ 'ils=s',
+ 'library=s',
+ 'prefix|p=s',
+ 'help|h',
+ );
+ show_help() unless $rc;
+ show_help() if ($c->{help});
+
+ my @keys = keys %{$c};
+ show_help() unless (@ARGV and @keys);
+ for my $key ('prefix', 'map', 'ils')
+ { push @missing, $key unless $c->{$key} }
+ if (@missing) {
+ print "Required option: ", join(', ', @missing), " missing!\n";
+ show_help();
}
- $note = 0;
- for (@{$copy->{z}}) {
- print Z join("\t", $holdings->{id}, $copy->{copyid}, $note, $_), "\n";
- $note++;
+
+ # generate subfield map
+ $c->{map} = Equinox::Migration::SubfieldMapper->new( file => $c->{map} );
+
+ # explode sample tags string
+ if (defined $c->{sample}) {
+ my $sample = $c->{sample};
+ $c->{sample} = {};
+ for (split /,/, $c->{sample}) {
+ $c->{sample}{$_} = [];
+ }
}
- print HOLDINGS join("\t", $holdings->{id}, $copy->{copyid},
- $copy->{b}, $copy->{p}, $copy->{h}, $copy->{9}), "\n";
-}
+ # open required files
+ open HOLDINGS, '>', ($c->{prefix} . ".holdings.pg");
+ for my $f (keys %{$c->{map}{fields}}) {
+ if ($c->{map}->mod($f)) {
+ next if ($c->{map}->mod($f) =~ /bib/);
+ open my $mfh, '>', join('.', $c->{prefix}, "holdings",
+ $c->{map}{fields}{$f}{tag},
+ $c->{map}{fields}{$f}{sub}, "pg");
+ $c->{files}{multi}{ ($c->{map}{fields}{$f}{tag} . $c->{map}{fields}{$f}{sub}) }
+ = $mfh;
+ }
+ }
+
+ # print file headers
+ print HOLDINGS "BEGIN;\n";
+ print HOLDINGS "CREATE TABLE ", $c->{prefix}, ".asset_copy_", $c->{ils};
+ print HOLDINGS $c->{library} if (defined $c->{library});
+ print HOLDINGS " (eg_bib_id INTEGER, eg_copy_id INTEGER, l_tag INTEGER";
+ for ( sort keys %{ $c->{map}{fields} } ) {
+ next if ($c->{map}->mod($_) =~ /bib/);
+ print HOLDINGS ", l_", $_, " TEXT";
+ }
+ print HOLDINGS ") INHERITS (", $c->{prefix}, ".asset_copy);\n";
+ print HOLDINGS "COPY ", $c->{prefix}, ".asset_copy_", $c->{ils};
+ print HOLDINGS $c->{library} if (defined $c->{library});
+ print HOLDINGS " (eg_bib_id, eg_copy_id, l_tag";
+ for ( sort keys %{ $c->{map}{fields} } ) {
+ print HOLDINGS ", l_", $_;
+ }
+ print HOLDINGS ") FROM STDIN;\n";
+
+
+ return $c;
+}
-sub help {
+sub show_help {
print <<HELP;
-Usage is: extract_holdings MARCXML_FILE
+Usage is: ...
HELP
exit;
}