X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=Equinox-Migration%2Flib%2FEquinox%2FMigration%2FMapDrivenMARCXMLProc.pm;h=77a96dc535a6e5ee3b8ed58044938ec126ba8503;hp=dc45bafce752f4517aa998a2647aa0a5f26b9959;hb=f066013ab0a65299aaddf1466c438585e95ee2bf;hpb=7aa7dfc0b3a13d24379e3e7bc407a29714b3a04f diff --git a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm index dc45baf..77a96dc 100644 --- a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm +++ b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm @@ -4,7 +4,6 @@ use warnings; use strict; use XML::Twig; -use DBM::Deep; use Equinox::Migration::SubfieldMapper 1.004; @@ -14,15 +13,16 @@ Equinox::Migration::MapDrivenMARCXMLProc =head1 VERSION -Version 1.004 +Version 1.005 =cut -our $VERSION = '1.004'; +our $VERSION = '1.005'; my $dstore; my $sfmap; -my @mods = qw( multi bib required ); +my @modlist = qw( multi ignoremulti bib required first concatenate ); +my %allmods = (); my $multis = {}; my $reccount; my $verbose = 0; @@ -60,16 +60,9 @@ sub new { # initialize map and taglist die "Argument 'mapfile' must be specified\n" unless ($args{mapfile}); $sfmap = Equinox::Migration::SubfieldMapper->new( file => $args{mapfile}, - mods => \@mods ); + mods => \@modlist ); # initialize datastore - #die "Datastore file 'EMMXSSTORAGE.dbmd' already exists. Exiting.\n" - # if (-e "EMMXSSTORAGE.dbmd"); - #$dstore = DBM::Deep->new( file => "EMMXSSTORAGE.dbmd", - # max_buckets => 64, - # #data_sector_size => 256, - # autoflush => 0, - # ); $dstore = {}; $reccount = 0; # next record ptr $dstore->{tags} = $sfmap->tags; # list of all tags @@ -101,6 +94,22 @@ sub parse_record { for my $f (@fields) { process_field($f, $crec) } + # fill in blank values if needed + for my $mappedtag ( @{ $sfmap->tags }) { + unless (exists $crec->{tmap}{$mappedtag}) { + push @{ $crec->{tags} }, {}; + for my $mappedsub ( @{ $sfmap->subfields($mappedtag) } ) { + my $fieldname = $sfmap->field($mappedtag, $mappedsub); + my $mods = $sfmap->mods($fieldname); + next if $mods->{multi}; + $crec->{tags}[-1]{uni}{$mappedsub} = ''; + $crec->{tags}[-1]{multi} = undef; + $crec->{tags}[-1]{tag} = $mappedtag; + } + push @{ $crec->{tmap}{$mappedtag} }, $#{ $crec->{tags} }; + } + } + # cleanup memory and increment pointer $record->purge; $reccount++; @@ -144,10 +153,6 @@ sub process_field { $crec->{tags}[-1]{uni}{$mappedsub} = '' unless defined $crec->{tags}[-1]{uni}{$mappedsub}; } - for my $mappedtag ( @{ $sfmap->tags }) { - $crec->{tmap}{$mappedtag} = undef - unless defined $crec->{tmap}{$mappedtag}; - } } } @@ -158,9 +163,12 @@ sub process_subs { # handle unmapped tag/subs return unless ($sfmap->has($tag, $code)); - # fetch our datafield struct and fiel + # fetch our datafield struct and field and mods my $dataf = $crec->{tags}[-1]; my $field = $sfmap->field($tag, $code); + my $sep = $sfmap->sep($field); + $allmods{$field} = $sfmap->mods($field) unless $allmods{$field}; + my $mods = $allmods{$field}; # test filters for my $filter ( @{$sfmap->filters($field)} ) { @@ -168,17 +176,34 @@ sub process_subs { } # handle multi modifier - if (my $mods = $sfmap->mods($field)) { - if ($mods->{multi}) { - push @{$dataf->{multi}{$code}}, $sub->text; + if ($mods->{multi}) { + $multis->{$tag}{$code} = 1; + if ($mods->{concatenate}) { + if (exists($dataf->{multi}{$code})) { + $dataf->{multi}{$code}[0] .= $sep . $sub->text; + } else { + push @{$dataf->{multi}{$code}}, $sub->text; + } $multis->{$tag}{$code} = 1; - return; + } else { + push @{$dataf->{multi}{$code}}, $sub->text; + } + return; + } + + + if ($mods->{concatenate}) { + if (exists($dataf->{uni}{$code})) { + $dataf->{uni}{$code} .= $sep . $sub->text; + } else { + $dataf->{uni}{$code} = $sub->text; } + return; } # if this were a multi field, it would be handled already. make sure its a singleton die "Multiple occurances of a non-multi field: $tag$code at rec ", - ($reccount + 1),"\n" if (defined $dataf->{uni}{$code}); + ($reccount + 1),"\n" if (defined $dataf->{uni}{$code} and !$mods->{ignoremulti}); # everything seems okay $dataf->{uni}{$code} = $sub->text; @@ -223,6 +248,20 @@ Returns mapped fieldname when passed a tag, and code sub name { my ($self, $t, $c) = @_; return $sfmap->field($t, $c) } +=head2 first_only + +Returns whether mapped fieldname is to be applied only to first +item in a bib + +=cut + +sub first_only { + my ($self, $t, $c) = @_; + my $field = $sfmap->field($t, $c); + my $mods = $sfmap->mods($field); + return exists($mods->{first}); +} + =head2 get_multis Returns hashref of C<{tag}{code}> for all mapped multi fields