From 5a0ffda818989fddf5c3315d00c87fdf348cae92 Mon Sep 17 00:00:00 2001 From: Galen Charlton Date: Fri, 12 Mar 2010 20:32:45 +0000 Subject: [PATCH] improved handling for drawing data from fields other than the holdings tag Suppose the 852 is the main holdings tag, but information is needed from the 590$a (e.g., as an item note). The extract_holdings configuration file can now work as follows: # to map the subfield contents to the item record barcode 852 p note 590 a # to map, but have the 590$a information be associated # only with the first item in the bib; if m:first is # not supplied, the 590$a note will be supplied # to all items associated with the bib barcode 852 p note 590 a m:first # to put all occurrences of the 590$a into a separate # file for linking with the appropriate items. m:multi # *must* be supplied if the 590 tag occurs more than once # in the bib records barcode 852 p note 590 a m:multi # to concatenate repeats of $a in the 590 together barcode 852 p note 590 a m:concatenate # to concatenate with a custom separator barcode 852 p note 590 a m:concatenate c:' / ' Development sponsored by the Indiana State Library --- .../lib/Equinox/Migration/MapDrivenMARCXMLProc.pm | 38 ++++++++++++++++++- .../lib/Equinox/Migration/SubfieldMapper.pm | 22 ++++++++++-- extract_holdings | 34 ++++++++++++----- 3 files changed, 79 insertions(+), 15 deletions(-) diff --git a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm index c5b1bbc..b98a7ff 100644 --- a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm +++ b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm @@ -21,7 +21,7 @@ our $VERSION = '1.005'; my $dstore; my $sfmap; -my @modlist = qw( multi ignoremulti bib required ); +my @modlist = qw( multi ignoremulti bib required first concatenate ); my %allmods = (); my $multis = {}; my $reccount; @@ -154,6 +154,7 @@ sub process_subs { # fetch our datafield struct and field and mods my $dataf = $crec->{tags}[-1]; my $field = $sfmap->field($tag, $code); + my $sep = $sfmap->sep($field); $allmods{$field} = $sfmap->mods($field) unless $allmods{$field}; my $mods = $allmods{$field}; @@ -164,8 +165,27 @@ sub process_subs { # handle multi modifier if ($mods->{multi}) { - push @{$dataf->{multi}{$code}}, $sub->text; $multis->{$tag}{$code} = 1; + if ($mods->{concatenate}) { + if (exists($dataf->{multi}{$code})) { + $dataf->{multi}{$code}[0] .= $sep . $sub->text; + } else { + push @{$dataf->{multi}{$code}}, $sub->text; + } + $multis->{$tag}{$code} = 1; + } else { + push @{$dataf->{multi}{$code}}, $sub->text; + } + return; + } + + + if ($mods->{concatenate}) { + if (exists($dataf->{uni}{$code})) { + $dataf->{uni}{$code} .= $sep . $sub->text; + } else { + $dataf->{uni}{$code} = $sub->text; + } return; } @@ -216,6 +236,20 @@ Returns mapped fieldname when passed a tag, and code sub name { my ($self, $t, $c) = @_; return $sfmap->field($t, $c) } +=head2 first_only + +Returns whether mapped fieldname is to be applied only to first +item in a bib + +=cut + +sub first_only { + my ($self, $t, $c) = @_; + my $field = $sfmap->field($t, $c); + my $mods = $sfmap->mods($field); + return exists($mods->{first}); +} + =head2 get_multis Returns hashref of C<{tag}{code}> for all mapped multi fields diff --git a/Equinox-Migration/lib/Equinox/Migration/SubfieldMapper.pm b/Equinox-Migration/lib/Equinox/Migration/SubfieldMapper.pm index a115ea4..d67d11c 100644 --- a/Equinox-Migration/lib/Equinox/Migration/SubfieldMapper.pm +++ b/Equinox-Migration/lib/Equinox/Migration/SubfieldMapper.pm @@ -206,6 +206,19 @@ sub filters { return $self->{fields}{$field}{filt}; } +=head2 sep + +Returns the separator string set on a mapping. Used only +if concatenating. + +=cut + +sub sep { + my ($self, $field) = @_; + return undef unless $self->has($field); + return $self->{fields}{$field}{sep}; +} + =head1 MAP CONSTRUCTION METHODS These methods are not generally accessed from user code. @@ -227,12 +240,12 @@ sub generate { chomp; my @tokens = split /\s+/; - my $map = { mods => [], filt => [] }; + my $map = { mods => [], filt => [], sep => ' ' }; $map->{field} = shift @tokens; $map->{tag} = shift @tokens; while (defined (my $tok = shift @tokens)) { last if ($tok =~ m/^#/); - if ($tok =~ m/^[a-z]:'/ and $tok !~ /'$/) { + if ($tok =~ m/^[a-z]:'/ and $tok !~ /^'$/) { $tok .= ' ' . shift @tokens until ($tokens[0] =~ m/'$/); $tok .= ' ' . shift @tokens; @@ -245,6 +258,8 @@ sub generate { { push @{$map->{filt}}, $tok } elsif ($tok =~ m/^[a-z0-9]$/) { $map->{sub} = $tok } + elsif ($tok =~ /^c:(.*)$/) + { $map->{sep} = $1 } else { die "Unknown chunk '$tok' at line $.\n" } } @@ -296,7 +311,8 @@ sub add { $self->{fields}{ $map->{field} } = { tag => $map->{tag}, sub => $map->{sub}, mods => $map->{mods}, - filt => $map->{filt} + filt => $map->{filt}, + sep => $map->{sep}, }; # and to the tags hash $self->{tags}{ $map->{tag} }{ $map->{sub} } = $map->{field}; diff --git a/extract_holdings b/extract_holdings index 01f6a45..d39aac9 100755 --- a/extract_holdings +++ b/extract_holdings @@ -33,6 +33,7 @@ sub extract_holdings { mapfile => $c->{map}, verbose => 1, ); + print "Writing holdings to output file(s)...\n"; # open main holdings file open HOLDINGS, '>', ($c->{prefix} . "-HOLDINGS.pg"); @@ -79,20 +80,33 @@ sub extract_holdings { # now get everything else in the mapping for my $othertag ( sort keys %{$rec->{tmap}} ) { next if $othertag eq $c->{holdings}; # ignoring the holdings, o'course - my $idx = $rec->{tmap}{$othertag}[0]; # get index into tags struct - unless (defined $idx) { + my $test_idx = $rec->{tmap}{$othertag}[0]; # get index into tags struct + unless (defined $test_idx) { push @out, ''; next; } - for my $sub ( sort keys %{$rec->{tags}[$idx]{uni}} ) { - push @out, $rec->{tags}[$idx]{uni}{$sub}; - print HOLDINGS "l_", $m->name($rec->{tags}[$idx]{tag}, $sub), ", " - unless $j; + # handle only first other tag unless it is known to be multi + my $limit = 0; + if (exists($multis->{$othertag})) { + $limit = $#{ $rec->{tmap}{$othertag} }; } - for my $sub ( sort keys %{$multis->{$othertag}} ) { - for my $value ( @{$rec->{tags}[$idx]{multi}{$sub}} ) { - my $fh = $MULTIFILE{"$othertag$sub"}; - print $fh join("\t", $rec->{egid}, $j, $value), "\n"; + foreach my $idx (0..$limit) { + for my $sub ( sort keys %{$rec->{tags}[$idx]{uni}} ) { + if ($m->first_only($rec->{tags}[$idx]{tag}, $sub)) { + push @out, ($k == 1) ? $rec->{tags}[$idx]{uni}{$sub} : ''; + } else { + push @out, $rec->{tags}[$idx]{uni}{$sub}; + } + print HOLDINGS "l_", $m->name($rec->{tags}[$idx]{tag}, $sub), ", " + unless $j; + } + next unless exists($multis->{$othertag}); + for my $sub ( sort keys %{$multis->{$othertag}} ) { + next if $m->first_only($rec->{tags}[$idx]{tag}, $sub) and ($k > 1); + for my $value ( @{$rec->{tags}[$idx]{multi}{$sub}} ) { + my $fh = $MULTIFILE{"$othertag$sub"}; + print $fh join("\t", $rec->{egid}, $j, $value), "\n"; + } } } } -- 1.7.2.5