X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=Equinox-Migration%2Flib%2FEquinox%2FMigration%2FSubfieldMapper.pm;h=ae33db0ddf02bf60318cb8aad66d978e41e3d931;hp=7d6f0690f85f96ec62899de1204570e6843ac4f2;hb=f31b41545e735198c20f969107794b8e024ecfda;hpb=15e39d692fa06ef50b9fede216b9c8edd0d3f7de diff --git a/Equinox-Migration/lib/Equinox/Migration/SubfieldMapper.pm b/Equinox-Migration/lib/Equinox/Migration/SubfieldMapper.pm index 7d6f069..ae33db0 100644 --- a/Equinox-Migration/lib/Equinox/Migration/SubfieldMapper.pm +++ b/Equinox-Migration/lib/Equinox/Migration/SubfieldMapper.pm @@ -51,7 +51,7 @@ Returns a E::M::STL object. sub new { my ($class, %args) = @_; - my $self = bless { conf => { mods => { multi => 1, bib => 1, req => 1, bibreq => 1 } }, + my $self = bless { conf => { mods => undef }, fields => {}, tags => {} }, $class; @@ -64,12 +64,19 @@ sub new { } } + if ($args{mods}) { + die "Argument 'mods' is wrong type\n" + unless (ref $args{mods} eq "ARRAY"); + for my $mod ( @{$args{mods}} ) + { $self->{conf}{mods}{$mod} = 1 } + } + return $self; } =head2 has -Ask it whether you mapping has various things, and it'll let you know. +Ask it whether your mapping has various things, and it'll let you know. $sm->has('fieldname') # is this fieldname mapped? $sm->has(901) # are there any mappings for this tag? @@ -126,33 +133,42 @@ sub field { return $self->{tags}{$tag}{$sub}; } -=head2 mod +=head2 mods -Returns the modifier set on a mapping. +Returns the modifiers set on a mapping. - if ($sm->mod('field) eq "bib") + $self->mods('fieldname') -If there is no modifier, C<0> will be returned. At the moment, the -valid mappings are +If there are no modifiers, C will be returned. Else a listref +will be returned. - * multi - This field is expected to be seen multiple times per - datafield +=cut - * bib - This is a bib-level field, and is expected to be seen only - once per record (normal is once per datafield) +sub mods { + my ($self, $field) = @_; + return undef unless $self->has($field); + return $self->{fields}{$field}{mods}; +} - * req - This field is required to occur before output +=head2 filters - * bibreq - Both 'bib' and 'req' +Returns the content filters set on a mapping + + $self->filters('fieldname') + +If there are no filters, C will be returned. Else a listref +will be returned. =cut -sub mod { +sub filters { my ($self, $field) = @_; return undef unless $self->has($field); - return $self->{fields}{$field}{mod}; + return $self->{fields}{$field}{filt}; } + + =head2 generate Generate initial mapping from file. @@ -170,36 +186,64 @@ sub generate { chomp; my @tokens = split /\s+/; - if (defined $tokens[3]) { - $self->add( field => $tokens[0], tag => $tokens[1], - sub => $tokens[2], mod => $tokens[3] ); - } else { - $self->add( field => $tokens[0], tag => $tokens[1], sub => $tokens[2] ); + my $map = { mods => [], filt => [] }; + $map->{field} = shift @tokens; + $map->{tag} = shift @tokens; + for my $tok (@tokens) { + last if ($tok =~ m/^#/); + if ($tok =~ m/^m:/) + { push @{$map->{mods}}, $tok } + elsif ($tok =~ m/^f:/) + { push @{$map->{filt}}, $tok } + elsif ($tok =~ m/^[a-z0-9]$/) + { $map->{sub} = $tok } + else + { die "Unknown chunk '$tok' at line $.\n" } } + $self->add($map); } - } =head2 add Add new item to mapping. Not usually called directly from user code. - $sm->add( field => 'value', tag => num, sub => 'c' ); - $sm->add( field => 'value', tag => num, - sub => 'c', mod => 'modifier' ); + $sm->add( $map ); + +Where C<$map> is a hashref that, at a minimum, looks like + + { field => "value", tag => NNN, sub => X } + +and may also have the key/value pairs + + mods => [ ITEMS ] + filt => [ ITEMS ] =cut sub add { - my ($self, %toks) = @_; + my ($self, $map) = @_; - # check bits for validity - $self->validate(\%toks); - - $toks{mod} = (defined $toks{mod} and $toks{mod} !~ /^#/) ? $toks{mod} : 0; + # trim the mods and filters + my $mods = []; my $filt = []; + for my $m (@{$map->{mods}}) + { $m =~ s/^m://; push @{$mods}, $m } + for my $f (@{$map->{filt}}) + { $f =~ s/^f://; push @{$filt}, $f } + $map->{mods} = $mods; + $map->{filt} = $filt; - $self->{fields}{$toks{field}} = { tag => $toks{tag}, sub => $toks{sub}, mod => $toks{mod}}; - $self->{tags}{$toks{tag}}{$toks{sub}} = $toks{field}; + # check bits for validity + $self->validate($map); + + # add data to the fields hash + $self->{fields}{ $map->{field} } = { tag => $map->{tag}, + sub => $map->{sub}, + mods => $map->{mods}, + filt => $map->{filt} + }; + # and to the tags hash + $self->{tags}{ $map->{tag} }{ $map->{sub} } = $map->{field}; } =head2 validate @@ -213,37 +257,42 @@ contents for common errors and dies if there is an issue. * tag must be between 0 and 999 * subfield code must be a single alphanumeric character * tag+subfield can only be mapped once + * if a list of allowable mod values was given in the call to + C, any modifiers must be on that list =cut sub validate { - my ($self, $toks) = @_; + my ($self, $map) = @_; $.= 1 unless defined $.; die "Required field missing (line $.)\n" - unless (defined $toks->{field} and defined $toks->{tag} and defined $toks->{sub}); + unless (defined $map->{field} and defined $map->{tag} and defined $map->{sub}); die "Fieldnames must start with letter (line $.)\n" - unless ($toks->{field} =~ /^[a-zA-z]/); + unless ($map->{field} =~ /^[a-zA-z]/); die "Invalid tag (line $.)\n" - if ($toks->{tag} =~ /[^\d\-]/ or $toks->{tag} < 0 or $toks->{tag} > 999); + if ($map->{tag} =~ /[^\d\-]/ or $map->{tag} < 0 or $map->{tag} > 999); die "Invalid subfield code (line $.)\n" - if (length $toks->{sub} != 1 or $toks->{sub} =~ /[^a-zA-Z0-9]/); + if (length $map->{sub} != 1 or $map->{sub} =~ /[^a-zA-Z0-9]/); - # the next thing (if it exists), must be a comment or valid modifier - if (defined $toks->{mod}) { - die "Unknown chunk (line $.)\n" - unless (defined $self->{conf}{mods}{$toks->{mod}} or $toks->{mod} =~ /^#/); + # test mod names if we have a set to check against + if (defined $self->{conf}{mods}) { + for my $mod ( @{$map->{mods}} ) { + die "Modifier '$mod' not allowed\n" + unless $self->{conf}{mods}{$mod}; + } } die "Fieldnames must be unique (line $.)\n" - if (defined $self->{fields}{$toks->{field}}); + if (defined $self->{fields}{$map->{field}}); + + die "Subfields cannot be mapped twice (line $.)\n" + if (defined $self->{tags}{$map->{tag}}{$map->{sub}}); - die "Subfields cannot be multimapped (line $.)\n" - if (defined $self->{tags}{$toks->{tag}}{$toks->{sub}}); }