From 8e63d0bddf6634ebf2a8d5b8e160c285c6676d0e Mon Sep 17 00:00:00 2001 From: Shawn Boyette Date: Wed, 29 Apr 2009 20:01:38 +0000 Subject: [PATCH] yanking bib references; adding tag map to MDMP; cleanup and new test corpus data --- Equinox-Migration/MANIFEST | 4 ++ .../lib/Equinox/Migration/MARCXMLSampler.pm | 9 --- .../lib/Equinox/Migration/MapDrivenMARCXMLProc.pm | 52 ++++---------------- Equinox-Migration/t/03-MapDrivenMARCXMLProc.t | 21 ++++++++- Equinox-Migration/t/corpus/mdmpmap-04.txt | 3 + 5 files changed, 37 insertions(+), 52 deletions(-) create mode 100644 Equinox-Migration/t/corpus/mdmpmap-04.txt diff --git a/Equinox-Migration/MANIFEST b/Equinox-Migration/MANIFEST index b5b7c15..da963bb 100644 --- a/Equinox-Migration/MANIFEST +++ b/Equinox-Migration/MANIFEST @@ -8,3 +8,7 @@ lib/Equinox/Migration/MARCXMLSampler.pm lib/Equinox/Migration/SubfieldMapper.pm lib/Equinox/Migration/SubfieldMapper.pm t/00-load.t +t/01-SimpleTagList.t +t/02-SubfieldMapper.t +t/03-MapDrivenMARCXMLProc.t +t/04-MARCXMLSampler.t diff --git a/Equinox-Migration/lib/Equinox/Migration/MARCXMLSampler.pm b/Equinox-Migration/lib/Equinox/Migration/MARCXMLSampler.pm index e0cc670..ef89c6a 100644 --- a/Equinox-Migration/lib/Equinox/Migration/MARCXMLSampler.pm +++ b/Equinox-Migration/lib/Equinox/Migration/MARCXMLSampler.pm @@ -6,15 +6,6 @@ use strict; use XML::Twig; use Equinox::Migration::SimpleTagList 1.001; -# FIXME -# -# sample functionality should be extracted into a new module which -# uses E::M::SM to drive sampling of individual datafields, and -# reports ALL datafields which occur -# -# --sample should give the list of all datafields -# --samplefile should take a SM map as teh argument and introspect the mapped datafields - =head1 NAME diff --git a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm index 904ac25..666551f 100644 --- a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm +++ b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm @@ -6,15 +6,6 @@ use strict; use XML::Twig; use Equinox::Migration::SubfieldMapper 1.003; -# FIXME -# -# sample functionality should be extracted into a new module which -# uses E::M::SM to drive sampling of individual datafields, and -# reports ALL datafields which occur -# -# --sample should give the list of all datafields -# --samplefile should take a SM map as teh argument and introspect the mapped datafields - =head1 NAME @@ -54,12 +45,12 @@ sub new { my ($class, %args) = @_; my $self = bless { mods => { multi => {}, - bib => {}, required => {}, }, data => { recs => undef, # X::T record objects rptr => 0, # next record pointer crec => undef, # parsed record storage + tmap => undef, # tag_id-to-tag array map }, }, $class; @@ -103,7 +94,8 @@ sub parse_record { # get the next record and wipe current parsed record return 0 unless defined $self->{data}{recs}[ $self->{data}{rptr} ]; my $record = $self->{data}{recs}[ $self->{data}{rptr} ]; - $self->{data}{crec} = { egid => undef, bib => undef, tags => undef }; + $self->{data}{crec} = { egid => undef, tags => undef }; + $self->{data}{tmap} = {}; my @fields = $record->children; for my $f (@fields) @@ -124,6 +116,7 @@ sub process_field { my $map = $self->{map}; my $tag = $field->{'att'}->{'tag'}; my $crec = $self->{data}{crec}; + my $tmap = $self->{data}{tmap}; # leader unless (defined $tag) { @@ -139,6 +132,7 @@ sub process_field { } if ($map->has($tag)) { push @{$crec->{tags}}, { tag => $tag, uni => undef, multi => undef }; + push @{$tmap->{$tag}}, (@{$crec->{tags}} - 1); my @subs = $field->children('subfield'); for my $sub (@subs) { $self->process_subs($tag, $sub) } @@ -188,7 +182,6 @@ sub check_required { for my $code (@{$mods->{required}{$tag_id}}) { my $found = 0; - $found = 1 if ($crec->{bib}{($tag_id . $code)}); for my $tag (@{$crec->{tags}}) { $found = 1 if ($tag->{multi}{($tag_id . $code)}); $found = 1 if ($tag->{uni}{$code}); @@ -219,19 +212,11 @@ Occurring zero or one time is legal for a C mapping. A mapping which is not flagged as C, but which occurs more than once per datafield will cause a fatal error. -=head2 bib - -The C modifier declares that a mapping is "bib-level", and should -be encountered once per B instead of once per B -- -which is another way of saying that it occurs in a non-repeating -datafield or in a controlfield. - =head2 required -By default, if a mapping does not occur in a datafield (or record, in -the case of C mappings), processing continues normally. if a -mapping has the C modifier, however, it must appear, or a -fatal error will occur. +By default, if a mapping does not occur in a datafield, processing +continues normally. if a mapping has the C modifier, +however, it must appear, or a fatal error will occur. =head1 PARSED RECORDS @@ -243,12 +228,7 @@ Given: Then C<$rec> will look like: { - egid => evergreen_record_id, - bib => { - (tag_id . sub_code)1 => value1, - (tag_id . sub_code)2 => value2, - ... - }, + egid => evergreen_record_id, tags => [ { tag => tag_id, @@ -260,19 +240,7 @@ Then C<$rec> will look like: } That is, there is an C key which points to the Evergreen ID of -that record, a C key which points to a hashref, and a C -key which points to an arrayref. - -=head3 C - -A reference to a hash which holds extracted data which occurs only -once per record (and is therefore "bib-level"; the default assumption -is that a tag/subfield pair can occur multiple times per record). The -keys are composed of tag id and subfield code, catenated -(e.g. 901c). The values are the contents of that subfield of that tag. - -If there are no tags defined as bib-level in the mapfile, C will -be C. +that record, and a C key which points to an arrayref. =head3 C diff --git a/Equinox-Migration/t/03-MapDrivenMARCXMLProc.t b/Equinox-Migration/t/03-MapDrivenMARCXMLProc.t index 938363b..d258a42 100644 --- a/Equinox-Migration/t/03-MapDrivenMARCXMLProc.t +++ b/Equinox-Migration/t/03-MapDrivenMARCXMLProc.t @@ -34,7 +34,6 @@ is ($rec->{tags}[0]{uni}{a}, "MYS DEM", 'single-ocurrance subfield "a" should be is ($rec->{tags}[0]{uni}{b}, undef, 'only one uni subfield defined'); is ($rec->{tags}[0]{multi}, undef, 'no multi subfields were defined'); is ($rec->{tags}[1], undef, 'Only one tag in map'); -is ($rec->{bib}, undef, 'No bib-level fields in map'); # let's go ahead and look at the rest of the file $rec = $mp->parse_record; is ($rec->{egid}, 9000001, '903 #2'); @@ -81,3 +80,23 @@ is ($@, "Required mapping 999s not found in rec 1\n", '999$s removed from this r eval { $rec = $mp->parse_record }; is ($@, "", '999$s exists here tho'); +# map-04 has fields in 999 and 250, and multi data +$mp = Equinox::Migration::MapDrivenMARCXMLProc->new( marcfile => 't/corpus/mdmp-0.txt', + mapfile => 't/corpus/mdmpmap-04.txt'); +$rec = $mp->parse_record; +is ($rec->{tags}[0]{tag}, 250, 'should be 250'); +is ($rec->{tags}[0]{uni}{a}, "1st ed.", '999$a'); +is ($rec->{tags}[1]{tag}, 999, 'should be 999'); +is ($rec->{tags}[1]{uni}{a}, "MYS DEM", '999$a'); +is_deeply ($rec->{tags}[1]{multi}{'999x'}, ['MYSTERY'], '999$x - multi'); +is_deeply ($mp->{data}{tmap}{250}, [0], 'tag map test 1a'); +is_deeply ($mp->{data}{tmap}{999}, [1], 'tag map test 1b'); +$rec = $mp->parse_record; +$rec = $mp->parse_record; +$rec = $mp->parse_record; +is ($rec->{tags}[0]{tag}, 999, '250 doesnt exist in this record'); +is ($rec->{tags}[0]{uni}{a}, "FIC DEV", 'subfield value 4'); +is_deeply ($rec->{tags}[0]{multi}{'999x'}, ['FICTION','FICTION2','FICTION3','FICTION4'], + '999$x - multi'); +is ($mp->{data}{tmap}{250}, undef, 'tag map test 2a'); +is_deeply ($mp->{data}{tmap}{999}, [0], 'tag map test 2b'); diff --git a/Equinox-Migration/t/corpus/mdmpmap-04.txt b/Equinox-Migration/t/corpus/mdmpmap-04.txt new file mode 100644 index 0000000..8ac86f4 --- /dev/null +++ b/Equinox-Migration/t/corpus/mdmpmap-04.txt @@ -0,0 +1,3 @@ +call_num 999 a +multi_field 999 x m:multi +edition 250 a -- 1.7.2.5