From f31b41545e735198c20f969107794b8e024ecfda Mon Sep 17 00:00:00 2001 From: Shawn Boyette Date: Mon, 6 Apr 2009 09:15:48 +0000 Subject: [PATCH] E::M::SM retooling --- .../lib/Equinox/Migration/SubfieldMapper.pm | 135 +++++++++++++------ Equinox-Migration/t/02-SubfieldMapper.t | 19 ++-- Equinox-Migration/t/corpus/sm0.txt | 6 +- 3 files changed, 104 insertions(+), 56 deletions(-) diff --git a/Equinox-Migration/lib/Equinox/Migration/SubfieldMapper.pm b/Equinox-Migration/lib/Equinox/Migration/SubfieldMapper.pm index 7d6f069..ae33db0 100644 --- a/Equinox-Migration/lib/Equinox/Migration/SubfieldMapper.pm +++ b/Equinox-Migration/lib/Equinox/Migration/SubfieldMapper.pm @@ -51,7 +51,7 @@ Returns a E::M::STL object. sub new { my ($class, %args) = @_; - my $self = bless { conf => { mods => { multi => 1, bib => 1, req => 1, bibreq => 1 } }, + my $self = bless { conf => { mods => undef }, fields => {}, tags => {} }, $class; @@ -64,12 +64,19 @@ sub new { } } + if ($args{mods}) { + die "Argument 'mods' is wrong type\n" + unless (ref $args{mods} eq "ARRAY"); + for my $mod ( @{$args{mods}} ) + { $self->{conf}{mods}{$mod} = 1 } + } + return $self; } =head2 has -Ask it whether you mapping has various things, and it'll let you know. +Ask it whether your mapping has various things, and it'll let you know. $sm->has('fieldname') # is this fieldname mapped? $sm->has(901) # are there any mappings for this tag? @@ -126,33 +133,42 @@ sub field { return $self->{tags}{$tag}{$sub}; } -=head2 mod +=head2 mods -Returns the modifier set on a mapping. +Returns the modifiers set on a mapping. - if ($sm->mod('field) eq "bib") + $self->mods('fieldname') -If there is no modifier, C<0> will be returned. At the moment, the -valid mappings are +If there are no modifiers, C will be returned. Else a listref +will be returned. - * multi - This field is expected to be seen multiple times per - datafield +=cut - * bib - This is a bib-level field, and is expected to be seen only - once per record (normal is once per datafield) +sub mods { + my ($self, $field) = @_; + return undef unless $self->has($field); + return $self->{fields}{$field}{mods}; +} - * req - This field is required to occur before output +=head2 filters - * bibreq - Both 'bib' and 'req' +Returns the content filters set on a mapping + + $self->filters('fieldname') + +If there are no filters, C will be returned. Else a listref +will be returned. =cut -sub mod { +sub filters { my ($self, $field) = @_; return undef unless $self->has($field); - return $self->{fields}{$field}{mod}; + return $self->{fields}{$field}{filt}; } + + =head2 generate Generate initial mapping from file. @@ -170,36 +186,64 @@ sub generate { chomp; my @tokens = split /\s+/; - if (defined $tokens[3]) { - $self->add( field => $tokens[0], tag => $tokens[1], - sub => $tokens[2], mod => $tokens[3] ); - } else { - $self->add( field => $tokens[0], tag => $tokens[1], sub => $tokens[2] ); + my $map = { mods => [], filt => [] }; + $map->{field} = shift @tokens; + $map->{tag} = shift @tokens; + for my $tok (@tokens) { + last if ($tok =~ m/^#/); + if ($tok =~ m/^m:/) + { push @{$map->{mods}}, $tok } + elsif ($tok =~ m/^f:/) + { push @{$map->{filt}}, $tok } + elsif ($tok =~ m/^[a-z0-9]$/) + { $map->{sub} = $tok } + else + { die "Unknown chunk '$tok' at line $.\n" } } + $self->add($map); } - } =head2 add Add new item to mapping. Not usually called directly from user code. - $sm->add( field => 'value', tag => num, sub => 'c' ); - $sm->add( field => 'value', tag => num, - sub => 'c', mod => 'modifier' ); + $sm->add( $map ); + +Where C<$map> is a hashref that, at a minimum, looks like + + { field => "value", tag => NNN, sub => X } + +and may also have the key/value pairs + + mods => [ ITEMS ] + filt => [ ITEMS ] =cut sub add { - my ($self, %toks) = @_; + my ($self, $map) = @_; - # check bits for validity - $self->validate(\%toks); - - $toks{mod} = (defined $toks{mod} and $toks{mod} !~ /^#/) ? $toks{mod} : 0; + # trim the mods and filters + my $mods = []; my $filt = []; + for my $m (@{$map->{mods}}) + { $m =~ s/^m://; push @{$mods}, $m } + for my $f (@{$map->{filt}}) + { $f =~ s/^f://; push @{$filt}, $f } + $map->{mods} = $mods; + $map->{filt} = $filt; - $self->{fields}{$toks{field}} = { tag => $toks{tag}, sub => $toks{sub}, mod => $toks{mod}}; - $self->{tags}{$toks{tag}}{$toks{sub}} = $toks{field}; + # check bits for validity + $self->validate($map); + + # add data to the fields hash + $self->{fields}{ $map->{field} } = { tag => $map->{tag}, + sub => $map->{sub}, + mods => $map->{mods}, + filt => $map->{filt} + }; + # and to the tags hash + $self->{tags}{ $map->{tag} }{ $map->{sub} } = $map->{field}; } =head2 validate @@ -213,37 +257,42 @@ contents for common errors and dies if there is an issue. * tag must be between 0 and 999 * subfield code must be a single alphanumeric character * tag+subfield can only be mapped once + * if a list of allowable mod values was given in the call to + C, any modifiers must be on that list =cut sub validate { - my ($self, $toks) = @_; + my ($self, $map) = @_; $.= 1 unless defined $.; die "Required field missing (line $.)\n" - unless (defined $toks->{field} and defined $toks->{tag} and defined $toks->{sub}); + unless (defined $map->{field} and defined $map->{tag} and defined $map->{sub}); die "Fieldnames must start with letter (line $.)\n" - unless ($toks->{field} =~ /^[a-zA-z]/); + unless ($map->{field} =~ /^[a-zA-z]/); die "Invalid tag (line $.)\n" - if ($toks->{tag} =~ /[^\d\-]/ or $toks->{tag} < 0 or $toks->{tag} > 999); + if ($map->{tag} =~ /[^\d\-]/ or $map->{tag} < 0 or $map->{tag} > 999); die "Invalid subfield code (line $.)\n" - if (length $toks->{sub} != 1 or $toks->{sub} =~ /[^a-zA-Z0-9]/); + if (length $map->{sub} != 1 or $map->{sub} =~ /[^a-zA-Z0-9]/); - # the next thing (if it exists), must be a comment or valid modifier - if (defined $toks->{mod}) { - die "Unknown chunk (line $.)\n" - unless (defined $self->{conf}{mods}{$toks->{mod}} or $toks->{mod} =~ /^#/); + # test mod names if we have a set to check against + if (defined $self->{conf}{mods}) { + for my $mod ( @{$map->{mods}} ) { + die "Modifier '$mod' not allowed\n" + unless $self->{conf}{mods}{$mod}; + } } die "Fieldnames must be unique (line $.)\n" - if (defined $self->{fields}{$toks->{field}}); + if (defined $self->{fields}{$map->{field}}); + + die "Subfields cannot be mapped twice (line $.)\n" + if (defined $self->{tags}{$map->{tag}}{$map->{sub}}); - die "Subfields cannot be multimapped (line $.)\n" - if (defined $self->{tags}{$toks->{tag}}{$toks->{sub}}); } diff --git a/Equinox-Migration/t/02-SubfieldMapper.t b/Equinox-Migration/t/02-SubfieldMapper.t index d1095a5..058f3c2 100644 --- a/Equinox-Migration/t/02-SubfieldMapper.t +++ b/Equinox-Migration/t/02-SubfieldMapper.t @@ -1,6 +1,6 @@ #!perl -T -use Test::More tests => 39; +use Test::More tests => 40; #use Test::More qw(no_plan); use Equinox::Migration::SubfieldMapper; @@ -46,10 +46,6 @@ $tokens = { field => 'foo', tag => 650, sub => 'qq' }; eval { $sm->validate($tokens) }; is ($@, "Invalid subfield code (line 1)\n", 'over-length subfield'); -$tokens = { field => 'foo', tag => 650, sub => 'a', mod => 'bar' }; -eval { $sm->validate($tokens) }; -is ($@, "Unknown chunk (line 1)\n", 'Extra, non-comment content'); - # and some which should have no problems $tokens = { field => 'foo', tag => 650, sub => 'a' }; eval { $sm->validate($tokens) }; @@ -66,7 +62,7 @@ is ($@, "Fieldnames must be unique (line 1)\n", 'dupe fieldname'); $sm->{tags}{650}{a} = 1; $tokens = { field => 'bar', tag => 650, sub => 'a', mod => '#', 'this', 'is', 'a', 'comment' }; eval { $sm->validate($tokens) }; -is ($@, "Subfields cannot be multimapped (line 1)\n", 'dupe fieldname'); +is ($@, "Subfields cannot be mapped twice (line 1)\n", 'dupe fieldname'); # test load from file $sm = Equinox::Migration::SubfieldMapper->new( file => "./t/corpus/sm0.txt" ); @@ -94,7 +90,10 @@ is ($sm->field(650,'z'), undef, 'tag+code not mapped'); is ($sm->field(949,'a'), 'call_number', 'mapping returned'); # mod method tests -is ($sm->{fields}{type}{mod}, 0); -is ($sm->{fields}{note}{mod}, 'multi'); -is ($sm->mod('zzz'), undef, 'nonexistant field'); -is ($sm->mod('note'), 'multi', 'multi'); +is ($sm->{fields}{note}{mods}[0], 'multi'); +is ($sm->mods('zzz'), undef, 'nonexistant field'); +is_deeply ($sm->mods('note'), ['multi'], 'multi'); +is_deeply ($sm->mods('note_alt'), ['multi', 'req'], 'multi, req'); +is_deeply ($sm->mods('date_a'), ['foo', 'bar', 'quux']); +is_deeply ($sm->filters('date_a'), ['baz']); + diff --git a/Equinox-Migration/t/corpus/sm0.txt b/Equinox-Migration/t/corpus/sm0.txt index 26b0114..175bac3 100644 --- a/Equinox-Migration/t/corpus/sm0.txt +++ b/Equinox-Migration/t/corpus/sm0.txt @@ -17,11 +17,11 @@ location 949 l # comments can go here, too location_alt 999 l library 949 m library_alt 999 m -note 949 o multi -note_alt 999 o multi +note 949 o m:multi +note_alt 999 o m:multi m:req type 949 t type_alt 999 t -date_a 949 v +date_a 949 v m:foo m:bar f:baz m:quux date_a_alt 999 v date_b 949 u date_b_alt 999 u -- 1.7.2.5