X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=Equinox-Migration%2Flib%2FEquinox%2FMigration%2FMapDrivenMARCXMLProc.pm;h=dc8a8eecec173b7967a3998111045278b0eaf1b7;hp=3eb37662852a6eca30ca2a7e3d4e7129c20a06eb;hb=047d6bb9cda46d837eff371322290e530266c065;hpb=6122a9e8332fdd507a98251a9eed96662e05647a diff --git a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm index 3eb3766..dc8a8ee 100644 --- a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm +++ b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm @@ -67,10 +67,11 @@ sub new { }, $class; # initialize map and taglist + die "Argument 'mapfile' must be specified\n" unless (defined $args{mapfile}); my @mods = keys %{$self->{mods}}; $self->{map} = Equinox::Migration::SubfieldMapper->new( file => $args{mapfile}, mods => \@mods ); - $self->{tags} = $self->{map}->tags; + $self->{data}{tags} = $self->{map}->tags; # initialize twig die "Argument 'marcfile' must be specified\n" unless (defined $args{marcfile}); @@ -89,10 +90,11 @@ sub new { =head2 parse_record -Extracts data from the next record, per the mapping file. Returns 1 on -success, 0 otherwise. +Extracts data from the next record, per the mapping file. Returns a +normalized datastructure (see L for details) on +success; returns 0 otherwise. - while ($m->parse_record) { + while (my $rec = $m->parse_record) { # handle extracted record data } @@ -104,7 +106,7 @@ sub parse_record { # get the next record and wipe current parsed record return 0 unless defined $self->{data}{recs}[ $self->{data}{rptr} ]; my $record = $self->{data}{recs}[ $self->{data}{rptr} ]; - $self->{data}{crec} = {}; + $self->{data}{crec} = { egid => undef, bib => undef, tags => undef }; my @fields = $record->children; for my $f (@fields) @@ -113,6 +115,8 @@ sub parse_record { # cleanup memory and increment pointer $record->purge; $self->{data}{rptr}++; + + return $self->{data}{crec}; } =head2 process_field @@ -123,16 +127,20 @@ sub process_field { my ($self, $field) = @_; my $map = $self->{map}; my $tag = $field->{'att'}->{'tag'}; - my $parsed = $self->{data}{crec}; - - if ($tag == 903) { - my $sub = $field->first_child('subfield'); - $parsed->{egid} = $sub->text;; - } elsif ($map->has($tag)) { - push @{$parsed->{tags}}, { tag => $tag }; - my @subs = $field->children('subfield'); - for my $sub (@subs) - { $self->process_subs($tag, $sub) } + my $crec = $self->{data}{crec}; + + # datafields + if (defined $tag) { + if ($tag == 903) { + my $sub = $field->first_child('subfield'); + $crec->{egid} = $sub->text;; + } elsif ($map->has($tag)) { + push @{$crec->{tags}}, { tag => $tag, uni => undef, multi => undef }; + my @subs = $field->children('subfield'); + for my $sub (@subs) + { $self->process_subs($tag, $sub) } + # check map to ensure all declared subs are in + } } } @@ -151,18 +159,25 @@ sub process_subs { my $s = $self->{data}{stag}; return unless (defined $s->{$tag}); + # set a value, total-seen count and records-seen-in count $u->{$tag}{$code}{value} = $sub->text unless defined $u->{$tag}{$code}; $u->{$tag}{$code}{count}++; + $u->{$tag}{$code}{rcnt}++ unless ($u->{$tag}{$code}{last} == $self->{data}{rptr}); + $u->{$tag}{$code}{last} = $self->{data}{rptr}; return; } - my $data = $self->{data}{crec}{tags}[-1]; + my $dataf = $self->{data}{crec}{tags}[-1]; my $field = $map->field($tag, $code); - if ($map->mod($field) eq 'multi') { - my $name = $tag . $code; - push @{$data->{multi}{$name}}, $sub->text; + + # handle modifiers, or slug data in normally + if (my $mods = $map->mods($field)) { + if ($mods->{multi}) { + my $name = $tag . $code; + push @{$dataf->{multi}{$name}}, $sub->text; + } } else { - $data->{uni}{$code} = $sub->text; + $dataf->{uni}{$code} = $sub->text; } } @@ -197,6 +212,8 @@ times per record). The keys are composed of tag id and subfield code, catenated (e.g. 901c). The values are the contents of that subfield of that tag. +If there are no tags defined as bib-level, C will be C. + =head3 C This arrayref holds anonymous hashrefs, one for each instance of each @@ -214,6 +231,8 @@ once per instance of a tag (but may occur multiple times in a record due to there being multiple instances of that tag in a record). Keys are subfield codes and values are subfield content. +If no tags are defined as C, it will be C. + =head1 UNMAPPED TAGS { tag_id => {