X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=Equinox-Migration%2Flib%2FEquinox%2FMigration%2FMapDrivenMARCXMLProc.pm;h=77a96dc535a6e5ee3b8ed58044938ec126ba8503;hp=9c698a1dd545c3921ff30bb12f2fccc04c49e452;hb=f066013ab0a65299aaddf1466c438585e95ee2bf;hpb=ef8cc7d38891b4fab1ce59c2c9df60ed08f9446c diff --git a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm index 9c698a1..77a96dc 100644 --- a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm +++ b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm @@ -4,7 +4,6 @@ use warnings; use strict; use XML::Twig; -use DBM::Deep; use Equinox::Migration::SubfieldMapper 1.004; @@ -14,17 +13,21 @@ Equinox::Migration::MapDrivenMARCXMLProc =head1 VERSION -Version 1.002 +Version 1.005 =cut -our $VERSION = '1.002'; +our $VERSION = '1.005'; my $dstore; my $sfmap; -my @mods = qw( multi bib required ); +my @modlist = qw( multi ignoremulti bib required first concatenate ); +my %allmods = (); +my $multis = {}; +my $reccount; my $verbose = 0; + =head1 SYNOPSIS Foo @@ -49,18 +52,19 @@ and C (the MARC data to be processed). sub new { my ($class, %args) = @_; - my $self = bless { + $verbose = 1 if $args{verbose}; + + my $self = bless { multis => \$multis, }, $class; # initialize map and taglist die "Argument 'mapfile' must be specified\n" unless ($args{mapfile}); $sfmap = Equinox::Migration::SubfieldMapper->new( file => $args{mapfile}, - mods => \@mods ); + mods => \@modlist ); # initialize datastore - $dstore = DBM::Deep->new( file => "EMMXSSTORAGE.dbmd", - data_sector_size => 256 ); - $dstore->{rcnt} = 0; # next record ptr + $dstore = {}; + $reccount = 0; # next record ptr $dstore->{tags} = $sfmap->tags; # list of all tags $self->{data} = $dstore; @@ -76,8 +80,6 @@ sub new { return $self; } -sub DESTROY { unlink "EMMXSSTORAGE.dbmd" } - =head2 parse_record Extracts data from the next record, per the mapping file. @@ -92,13 +94,32 @@ sub parse_record { for my $f (@fields) { process_field($f, $crec) } + # fill in blank values if needed + for my $mappedtag ( @{ $sfmap->tags }) { + unless (exists $crec->{tmap}{$mappedtag}) { + push @{ $crec->{tags} }, {}; + for my $mappedsub ( @{ $sfmap->subfields($mappedtag) } ) { + my $fieldname = $sfmap->field($mappedtag, $mappedsub); + my $mods = $sfmap->mods($fieldname); + next if $mods->{multi}; + $crec->{tags}[-1]{uni}{$mappedsub} = ''; + $crec->{tags}[-1]{multi} = undef; + $crec->{tags}[-1]{tag} = $mappedtag; + } + push @{ $crec->{tmap}{$mappedtag} }, $#{ $crec->{tags} }; + } + } + # cleanup memory and increment pointer $record->purge; - $dstore->{rcnt}++; + $reccount++; # check for required fields - check_required(); + check_required($crec); push @{ $dstore->{recs} }, $crec; + + print STDERR "$reccount\n" + if ($verbose and !($reccount % 1000)); } sub process_field { @@ -125,16 +146,13 @@ sub process_field { { process_subs($tag, $sub, $crec) } # check map to ensure all declared tags and subs have a value - my $mods = $sfmap->mods($field); for my $mappedsub ( @{ $sfmap->subfields($tag) } ) { + my $fieldname = $sfmap->field($tag, $mappedsub); + my $mods = $sfmap->mods($fieldname); next if $mods->{multi}; $crec->{tags}[-1]{uni}{$mappedsub} = '' unless defined $crec->{tags}[-1]{uni}{$mappedsub}; } - for my $mappedtag ( @{ $sfmap->tags }) { - $crec->{tmap}{$mappedtag} = undef - unless defined $crec->{tmap}{$mappedtag}; - } } } @@ -145,26 +163,47 @@ sub process_subs { # handle unmapped tag/subs return unless ($sfmap->has($tag, $code)); - # fetch our datafield struct and fieldname + # fetch our datafield struct and field and mods my $dataf = $crec->{tags}[-1]; my $field = $sfmap->field($tag, $code); - $crec->{names}{$tag}{$code} = $field; + my $sep = $sfmap->sep($field); + $allmods{$field} = $sfmap->mods($field) unless $allmods{$field}; + my $mods = $allmods{$field}; # test filters for my $filter ( @{$sfmap->filters($field)} ) { return if ($sub->text =~ /$filter/i); } + # handle multi modifier - if (my $mods = $sfmap->mods($field)) { - if ($mods->{multi}) { + if ($mods->{multi}) { + $multis->{$tag}{$code} = 1; + if ($mods->{concatenate}) { + if (exists($dataf->{multi}{$code})) { + $dataf->{multi}{$code}[0] .= $sep . $sub->text; + } else { + push @{$dataf->{multi}{$code}}, $sub->text; + } + $multis->{$tag}{$code} = 1; + } else { push @{$dataf->{multi}{$code}}, $sub->text; - return; } + return; + } + + + if ($mods->{concatenate}) { + if (exists($dataf->{uni}{$code})) { + $dataf->{uni}{$code} .= $sep . $sub->text; + } else { + $dataf->{uni}{$code} = $sub->text; + } + return; } # if this were a multi field, it would be handled already. make sure its a singleton die "Multiple occurances of a non-multi field: $tag$code at rec ", - ($dstore->{rcnt} + 1),"\n" if (defined $dataf->{uni}{$code}); + ($reccount + 1),"\n" if (defined $dataf->{uni}{$code} and !$mods->{ignoremulti}); # everything seems okay $dataf->{uni}{$code} = $sub->text; @@ -172,8 +211,8 @@ sub process_subs { sub check_required { + my ($crec) = @_; my $mods = $sfmap->mods; - my $crec = $dstore->{crec}; for my $tag_id (keys %{$mods->{required}}) { for my $code (@{$mods->{required}{$tag_id}}) { @@ -184,7 +223,7 @@ sub check_required { $found = 1 if ($tag->{uni}{$code}); } - die "Required mapping $tag_id$code not found in rec ",$dstore->{rcnt},"\n" + die "Required mapping $tag_id$code not found in rec ",$reccount,"\n" unless ($found); } } @@ -201,13 +240,38 @@ sub recno { my ($self) = @_; return $self->{data}{rcnt} } =head2 name -Returns mapped fieldname when passed a record number, tag, and code +Returns mapped fieldname when passed a tag, and code + + my $name = $m->name(999,'a'); + +=cut + +sub name { my ($self, $t, $c) = @_; return $sfmap->field($t, $c) } + +=head2 first_only - my $name = $m->name(3,999,'a'); +Returns whether mapped fieldname is to be applied only to first +item in a bib =cut -sub name { my ($self, $r, $t, $c) = @_; return $dstore->{recs}[$r]{names}{$t}{$c} }; +sub first_only { + my ($self, $t, $c) = @_; + my $field = $sfmap->field($t, $c); + my $mods = $sfmap->mods($field); + return exists($mods->{first}); +} + +=head2 get_multis + +Returns hashref of C<{tag}{code}> for all mapped multi fields + +=cut + +sub get_multis { + my ($self) = @_; + return $multis; +} =head1 MODIFIERS