From: Shawn Boyette Date: Mon, 20 Apr 2009 06:35:21 +0000 (+0000) Subject: progress X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=commitdiff_plain;h=93facadd2821eba167e9dd7e4348b772e3d31337 progress --- diff --git a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm index 3eb3766..bf9144d 100644 --- a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm +++ b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm @@ -67,10 +67,11 @@ sub new { }, $class; # initialize map and taglist + die "Argument 'mapfile' must be specified\n" unless (defined $args{mapfile}); my @mods = keys %{$self->{mods}}; $self->{map} = Equinox::Migration::SubfieldMapper->new( file => $args{mapfile}, mods => \@mods ); - $self->{tags} = $self->{map}->tags; + $self->{data}{tags} = $self->{map}->tags; # initialize twig die "Argument 'marcfile' must be specified\n" unless (defined $args{marcfile}); @@ -89,10 +90,11 @@ sub new { =head2 parse_record -Extracts data from the next record, per the mapping file. Returns 1 on -success, 0 otherwise. +Extracts data from the next record, per the mapping file. Returns a +normalized datastructure (see L for details) on +success; returns 0 otherwise. - while ($m->parse_record) { + while (my $rec = $m->parse_record) { # handle extracted record data } @@ -104,7 +106,7 @@ sub parse_record { # get the next record and wipe current parsed record return 0 unless defined $self->{data}{recs}[ $self->{data}{rptr} ]; my $record = $self->{data}{recs}[ $self->{data}{rptr} ]; - $self->{data}{crec} = {}; + $self->{data}{crec} = { bib => undef, multi => undef }; my @fields = $record->children; for my $f (@fields) @@ -113,6 +115,8 @@ sub parse_record { # cleanup memory and increment pointer $record->purge; $self->{data}{rptr}++; + + return $self->format_record; } =head2 process_field @@ -125,14 +129,17 @@ sub process_field { my $tag = $field->{'att'}->{'tag'}; my $parsed = $self->{data}{crec}; - if ($tag == 903) { - my $sub = $field->first_child('subfield'); - $parsed->{egid} = $sub->text;; - } elsif ($map->has($tag)) { - push @{$parsed->{tags}}, { tag => $tag }; - my @subs = $field->children('subfield'); - for my $sub (@subs) - { $self->process_subs($tag, $sub) } + # datafields + if (defined $tag) { + if ($tag == 903) { + my $sub = $field->first_child('subfield'); + $parsed->{egid} = $sub->text;; + } elsif ($map->has($tag)) { + push @{$parsed->{tags}}, { tag => $tag }; + my @subs = $field->children('subfield'); + for my $sub (@subs) + { $self->process_subs($tag, $sub) } + } } } @@ -158,12 +165,16 @@ sub process_subs { my $data = $self->{data}{crec}{tags}[-1]; my $field = $map->field($tag, $code); - if ($map->mod($field) eq 'multi') { - my $name = $tag . $code; - push @{$data->{multi}{$name}}, $sub->text; - } else { - $data->{uni}{$code} = $sub->text; + + # handle modifiers + if (defined $map->mods($field)) { + if ($map->mods($field) eq 'multi') { + my $name = $tag . $code; + push @{$data->{multi}{$name}}, $sub->text; + } } + + $data->{uni}{$code} = $sub->text; } =head1 PARSED RECORDS @@ -197,6 +208,8 @@ times per record). The keys are composed of tag id and subfield code, catenated (e.g. 901c). The values are the contents of that subfield of that tag. +If there are no tags defined as bib-level, C will be C. + =head3 C This arrayref holds anonymous hashrefs, one for each instance of each @@ -214,6 +227,8 @@ once per instance of a tag (but may occur multiple times in a record due to there being multiple instances of that tag in a record). Keys are subfield codes and values are subfield content. +If no tags are defined as C, it will be C. + =head1 UNMAPPED TAGS { tag_id => { diff --git a/Equinox-Migration/t/03-MapDrivenMARCXMLProc.t b/Equinox-Migration/t/03-MapDrivenMARCXMLProc.t index 3c3540c..c3e3ba0 100644 --- a/Equinox-Migration/t/03-MapDrivenMARCXMLProc.t +++ b/Equinox-Migration/t/03-MapDrivenMARCXMLProc.t @@ -2,9 +2,27 @@ #use Test::More tests => 39; use Test::More qw(no_plan); -use Equinox::Migration::MapDrivenXMLProc; +use Equinox::Migration::MapDrivenMARCXMLProc; + +# fails +eval { my $mp = + Equinox::Migration::MapDrivenMARCXMLProc->new(marcfile => 't/corpus/mdmp-0.txt') }; +is ($@, "Argument 'mapfile' must be specified\n", 'no mapfile'); + +eval { my $mp = + Equinox::Migration::MapDrivenMARCXMLProc->new(mapfile => 't/corpus/mdmpmap-00.txt') }; +is ($@, "Argument 'marcfile' must be specified\n", 'no marcfile'); + +eval { my $mp = Equinox::Migration::MapDrivenMARCXMLProc->new }; +is ($@, "Argument 'mapfile' must be specified\n", 'no mapfile'); + # baseline object creation -my $sm = Equinox::Migration::MapDrivenXMLProc->new(); -is(ref $sm, "Equinox::Migration::MapDrivenXMLProc", "self is self"); +my $mp = Equinox::Migration::MapDrivenMARCXMLProc->new( marcfile => 't/corpus/mdmp-0.txt', + mapfile => 't/corpus/mdmpmap-00.txt', + ); +is(ref $mp, "Equinox::Migration::MapDrivenMARCXMLProc", "self is self"); +# parsing +my $rec = $mp->parse_record; +is (defined $rec, 1);