X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=Equinox-Migration%2Flib%2FEquinox%2FMigration%2FMapDrivenMARCXMLProc.pm;h=77a96dc535a6e5ee3b8ed58044938ec126ba8503;hp=2e4a455a80c4a65c01d79a8da54e78e98c3e26d9;hb=f066013ab0a65299aaddf1466c438585e95ee2bf;hpb=00059d35f8272adedcbbef95066acae210c613ec diff --git a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm index 2e4a455..77a96dc 100644 --- a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm +++ b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm @@ -4,7 +4,6 @@ use warnings; use strict; use XML::Twig; -use DBM::Deep; use Equinox::Migration::SubfieldMapper 1.004; @@ -14,15 +13,16 @@ Equinox::Migration::MapDrivenMARCXMLProc =head1 VERSION -Version 1.003 +Version 1.005 =cut -our $VERSION = '1.003'; +our $VERSION = '1.005'; my $dstore; my $sfmap; -my @mods = qw( multi bib required ); +my @modlist = qw( multi ignoremulti bib required first concatenate ); +my %allmods = (); my $multis = {}; my $reccount; my $verbose = 0; @@ -60,15 +60,10 @@ sub new { # initialize map and taglist die "Argument 'mapfile' must be specified\n" unless ($args{mapfile}); $sfmap = Equinox::Migration::SubfieldMapper->new( file => $args{mapfile}, - mods => \@mods ); + mods => \@modlist ); # initialize datastore - die "Datastore file 'EMMXSSTORAGE.dbmd' already exists. Exiting.\n" - if (-e "EMMXSSTORAGE.dbmd"); - $dstore = DBM::Deep->new( file => "EMMXSSTORAGE.dbmd", - data_sector_size => 256, - autoflush => 0, - ); + $dstore = {}; $reccount = 0; # next record ptr $dstore->{tags} = $sfmap->tags; # list of all tags $self->{data} = $dstore; @@ -85,8 +80,6 @@ sub new { return $self; } -sub DESTROY { unlink "EMMXSSTORAGE.dbmd" } - =head2 parse_record Extracts data from the next record, per the mapping file. @@ -101,12 +94,28 @@ sub parse_record { for my $f (@fields) { process_field($f, $crec) } + # fill in blank values if needed + for my $mappedtag ( @{ $sfmap->tags }) { + unless (exists $crec->{tmap}{$mappedtag}) { + push @{ $crec->{tags} }, {}; + for my $mappedsub ( @{ $sfmap->subfields($mappedtag) } ) { + my $fieldname = $sfmap->field($mappedtag, $mappedsub); + my $mods = $sfmap->mods($fieldname); + next if $mods->{multi}; + $crec->{tags}[-1]{uni}{$mappedsub} = ''; + $crec->{tags}[-1]{multi} = undef; + $crec->{tags}[-1]{tag} = $mappedtag; + } + push @{ $crec->{tmap}{$mappedtag} }, $#{ $crec->{tags} }; + } + } + # cleanup memory and increment pointer $record->purge; $reccount++; # check for required fields - check_required(); + check_required($crec); push @{ $dstore->{recs} }, $crec; print STDERR "$reccount\n" @@ -144,10 +153,6 @@ sub process_field { $crec->{tags}[-1]{uni}{$mappedsub} = '' unless defined $crec->{tags}[-1]{uni}{$mappedsub}; } - for my $mappedtag ( @{ $sfmap->tags }) { - $crec->{tmap}{$mappedtag} = undef - unless defined $crec->{tmap}{$mappedtag}; - } } } @@ -158,9 +163,12 @@ sub process_subs { # handle unmapped tag/subs return unless ($sfmap->has($tag, $code)); - # fetch our datafield struct and fiel + # fetch our datafield struct and field and mods my $dataf = $crec->{tags}[-1]; my $field = $sfmap->field($tag, $code); + my $sep = $sfmap->sep($field); + $allmods{$field} = $sfmap->mods($field) unless $allmods{$field}; + my $mods = $allmods{$field}; # test filters for my $filter ( @{$sfmap->filters($field)} ) { @@ -168,17 +176,34 @@ sub process_subs { } # handle multi modifier - if (my $mods = $sfmap->mods($field)) { - if ($mods->{multi}) { - push @{$dataf->{multi}{$code}}, $sub->text; + if ($mods->{multi}) { + $multis->{$tag}{$code} = 1; + if ($mods->{concatenate}) { + if (exists($dataf->{multi}{$code})) { + $dataf->{multi}{$code}[0] .= $sep . $sub->text; + } else { + push @{$dataf->{multi}{$code}}, $sub->text; + } $multis->{$tag}{$code} = 1; - return; + } else { + push @{$dataf->{multi}{$code}}, $sub->text; } + return; + } + + + if ($mods->{concatenate}) { + if (exists($dataf->{uni}{$code})) { + $dataf->{uni}{$code} .= $sep . $sub->text; + } else { + $dataf->{uni}{$code} = $sub->text; + } + return; } # if this were a multi field, it would be handled already. make sure its a singleton die "Multiple occurances of a non-multi field: $tag$code at rec ", - ($reccount + 1),"\n" if (defined $dataf->{uni}{$code}); + ($reccount + 1),"\n" if (defined $dataf->{uni}{$code} and !$mods->{ignoremulti}); # everything seems okay $dataf->{uni}{$code} = $sub->text; @@ -186,8 +211,8 @@ sub process_subs { sub check_required { + my ($crec) = @_; my $mods = $sfmap->mods; - my $crec = $dstore->{crec}; for my $tag_id (keys %{$mods->{required}}) { for my $code (@{$mods->{required}{$tag_id}}) { @@ -223,6 +248,20 @@ Returns mapped fieldname when passed a tag, and code sub name { my ($self, $t, $c) = @_; return $sfmap->field($t, $c) } +=head2 first_only + +Returns whether mapped fieldname is to be applied only to first +item in a bib + +=cut + +sub first_only { + my ($self, $t, $c) = @_; + my $field = $sfmap->field($t, $c); + my $mods = $sfmap->mods($field); + return exists($mods->{first}); +} + =head2 get_multis Returns hashref of C<{tag}{code}> for all mapped multi fields