X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=Equinox-Migration%2Flib%2FEquinox%2FMigration%2FMapDrivenMARCXMLProc.pm;h=ca82ca7e6328c6a17b48de26f1216a791a64cd30;hp=9c698a1dd545c3921ff30bb12f2fccc04c49e452;hb=d1d70c501f6592ae84fc351e4325e5a5f9a67376;hpb=ef8cc7d38891b4fab1ce59c2c9df60ed08f9446c diff --git a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm index 9c698a1..ca82ca7 100644 --- a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm +++ b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm @@ -4,7 +4,6 @@ use warnings; use strict; use XML::Twig; -use DBM::Deep; use Equinox::Migration::SubfieldMapper 1.004; @@ -14,17 +13,20 @@ Equinox::Migration::MapDrivenMARCXMLProc =head1 VERSION -Version 1.002 +Version 1.004 =cut -our $VERSION = '1.002'; +our $VERSION = '1.004'; my $dstore; my $sfmap; my @mods = qw( multi bib required ); +my $multis = {}; +my $reccount; my $verbose = 0; + =head1 SYNOPSIS Foo @@ -49,7 +51,9 @@ and C (the MARC data to be processed). sub new { my ($class, %args) = @_; - my $self = bless { + $verbose = 1 if $args{verbose}; + + my $self = bless { multis => \$multis, }, $class; # initialize map and taglist @@ -58,9 +62,8 @@ sub new { mods => \@mods ); # initialize datastore - $dstore = DBM::Deep->new( file => "EMMXSSTORAGE.dbmd", - data_sector_size => 256 ); - $dstore->{rcnt} = 0; # next record ptr + $dstore = {}; + $reccount = 0; # next record ptr $dstore->{tags} = $sfmap->tags; # list of all tags $self->{data} = $dstore; @@ -76,8 +79,6 @@ sub new { return $self; } -sub DESTROY { unlink "EMMXSSTORAGE.dbmd" } - =head2 parse_record Extracts data from the next record, per the mapping file. @@ -94,11 +95,14 @@ sub parse_record { # cleanup memory and increment pointer $record->purge; - $dstore->{rcnt}++; + $reccount++; # check for required fields - check_required(); + check_required($crec); push @{ $dstore->{recs} }, $crec; + + print STDERR "$reccount\n" + if ($verbose and !($reccount % 1000)); } sub process_field { @@ -125,8 +129,9 @@ sub process_field { { process_subs($tag, $sub, $crec) } # check map to ensure all declared tags and subs have a value - my $mods = $sfmap->mods($field); for my $mappedsub ( @{ $sfmap->subfields($tag) } ) { + my $fieldname = $sfmap->field($tag, $mappedsub); + my $mods = $sfmap->mods($fieldname); next if $mods->{multi}; $crec->{tags}[-1]{uni}{$mappedsub} = '' unless defined $crec->{tags}[-1]{uni}{$mappedsub}; @@ -145,26 +150,27 @@ sub process_subs { # handle unmapped tag/subs return unless ($sfmap->has($tag, $code)); - # fetch our datafield struct and fieldname + # fetch our datafield struct and fiel my $dataf = $crec->{tags}[-1]; my $field = $sfmap->field($tag, $code); - $crec->{names}{$tag}{$code} = $field; # test filters for my $filter ( @{$sfmap->filters($field)} ) { return if ($sub->text =~ /$filter/i); } + # handle multi modifier if (my $mods = $sfmap->mods($field)) { if ($mods->{multi}) { push @{$dataf->{multi}{$code}}, $sub->text; + $multis->{$tag}{$code} = 1; return; } } # if this were a multi field, it would be handled already. make sure its a singleton die "Multiple occurances of a non-multi field: $tag$code at rec ", - ($dstore->{rcnt} + 1),"\n" if (defined $dataf->{uni}{$code}); + ($reccount + 1),"\n" if (defined $dataf->{uni}{$code}); # everything seems okay $dataf->{uni}{$code} = $sub->text; @@ -172,8 +178,8 @@ sub process_subs { sub check_required { + my ($crec) = @_; my $mods = $sfmap->mods; - my $crec = $dstore->{crec}; for my $tag_id (keys %{$mods->{required}}) { for my $code (@{$mods->{required}{$tag_id}}) { @@ -184,7 +190,7 @@ sub check_required { $found = 1 if ($tag->{uni}{$code}); } - die "Required mapping $tag_id$code not found in rec ",$dstore->{rcnt},"\n" + die "Required mapping $tag_id$code not found in rec ",$reccount,"\n" unless ($found); } } @@ -201,13 +207,24 @@ sub recno { my ($self) = @_; return $self->{data}{rcnt} } =head2 name -Returns mapped fieldname when passed a record number, tag, and code +Returns mapped fieldname when passed a tag, and code - my $name = $m->name(3,999,'a'); + my $name = $m->name(999,'a'); =cut -sub name { my ($self, $r, $t, $c) = @_; return $dstore->{recs}[$r]{names}{$t}{$c} }; +sub name { my ($self, $t, $c) = @_; return $sfmap->field($t, $c) } + +=head2 get_multis + +Returns hashref of C<{tag}{code}> for all mapped multi fields + +=cut + +sub get_multis { + my ($self) = @_; + return $multis; +} =head1 MODIFIERS