From: Shawn Boyette Date: Sat, 25 Apr 2009 04:12:18 +0000 (+0000) Subject: all mapped subfields will now be DEFINED even if their values are NULL X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=commitdiff_plain;h=a85d1ead29a140aefd716b303f0f0e67a96cee75 all mapped subfields will now be DEFINED even if their values are NULL --- diff --git a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm index dc8a8ee..ab053bb 100644 --- a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm +++ b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm @@ -4,7 +4,7 @@ use warnings; use strict; use XML::Twig; -use Equinox::Migration::SubfieldMapper; +use Equinox::Migration::SubfieldMapper 1.002; =head1 NAME @@ -119,35 +119,39 @@ sub parse_record { return $self->{data}{crec}; } -=head2 process_field - -=cut - sub process_field { my ($self, $field) = @_; my $map = $self->{map}; my $tag = $field->{'att'}->{'tag'}; my $crec = $self->{data}{crec}; + # leader + unless (defined $tag) { + #FIXME + return; + } + # datafields - if (defined $tag) { - if ($tag == 903) { - my $sub = $field->first_child('subfield'); - $crec->{egid} = $sub->text;; - } elsif ($map->has($tag)) { - push @{$crec->{tags}}, { tag => $tag, uni => undef, multi => undef }; - my @subs = $field->children('subfield'); - for my $sub (@subs) - { $self->process_subs($tag, $sub) } - # check map to ensure all declared subs are in + if ($tag == 903) { + my $sub = $field->first_child('subfield'); + $crec->{egid} = $sub->text; + return; + } + if ($map->has($tag)) { + push @{$crec->{tags}}, { tag => $tag, uni => undef, multi => undef }; + my @subs = $field->children('subfield'); + for my $sub (@subs) + { $self->process_subs($tag, $sub) } + # check map to ensure all declared subs have a value + my $mods = $map->mods($field); + for my $mappedsub ( @{ $map->subfields($tag) } ) { + next if $mods->{multi}; + $crec->{tags}[-1]{uni}{$mappedsub} = '' + unless defined $crec->{tags}[-1]{uni}{$mappedsub}; } } } -=head2 process_subs - -=cut - sub process_subs { my ($self, $tag, $sub) = @_; my $map = $self->{map}; @@ -167,6 +171,7 @@ sub process_subs { return; } + # fetch our datafield struct and fieldname my $dataf = $self->{data}{crec}{tags}[-1]; my $field = $map->field($tag, $code); @@ -183,6 +188,13 @@ sub process_subs { =head1 PARSED RECORDS +Given: + + my $m = Equinox::Migration::MapDrivenMARCXMLProc->new(ARGUMENTS); + $rec = $m->parse_record; + +Then C<$rec> will look like: + { egid => evergreen_record_id, bib => { @@ -206,32 +218,40 @@ key which points to an arrayref. =head3 C -This hashref holds extracted data which should occur once per record -(the default assumption is that a tag/subfield pair can occur multiple -times per record). The keys are composed of tag id and subfield code, -catenated (e.g. 901c). The values are the contents of that subfield of -that tag. +A reference to a hash which holds extracted data which occurs only +once per record (and is therefore "bib-level"; the default assumption +is that a tag/subfield pair can occur multiple times per record). The +keys are composed of tag id and subfield code, catenated +(e.g. 901c). The values are the contents of that subfield of that tag. -If there are no tags defined as bib-level, C will be C. +If there are no tags defined as bib-level in the mapfile, C will +be C. =head3 C -This arrayref holds anonymous hashrefs, one for each instance of each -tag which occurs in the map. Each tag hashref holds its own id -(e.g. C<998>), and two more hashrefs, C and C. +A reference to a list of anonymous hashes, one for each instance of +each tag which occurs in the map. + +Each tag hash holds its own id (e.g. C<998>), and two references to +two more hashrefs, C and C. -The C hashref holds the extracted data for tag/sub mappings -which have the C modifier on them. The keys in C are +The C hash holds the extracted data for tag/sub mappings which +have the C modifier on them. The keys in C are composed of the tag id and subfield code, catenated (e.g. C<901c>). The values are arrayrefs containing the content of all -instances of that subfield in that instance of that tag. - -The C hashref holds data for tag/sub mappings which occur only -once per instance of a tag (but may occur multiple times in a record -due to there being multiple instances of that tag in a record). Keys -are subfield codes and values are subfield content. - -If no tags are defined as C, it will be C. +instances of that subfield in that instance of that tag. If no tags +are defined as C, it will be C. + +The C hash holds data for tag/sub mappings which occur only once +per instance of a tag (but may occur multiple times in a record due to +there being multiple instances of that tag in a record). Keys are +subfield codes and values are subfield content. + +All C subfields occuring in the map are guaranteed to be +defined. Sufields which are mapped but do not occur in a particular +datafield will be given a value of '' (the null string) in the current +record struct. Oppose subfields which are not mapped, which will be +C. =head1 UNMAPPED TAGS