From 787d7acc9b22dbb935074dd3d5c2da470819686b Mon Sep 17 00:00:00 2001 From: Shawn Boyette Date: Mon, 23 Mar 2009 09:59:38 +0000 Subject: [PATCH] adding subfield mapper stuffs --- .../lib/Equinox/Migration/SubfieldMapper.pm | 192 ++++++++++++++++++++ Equinox-Migration/t/02-SubfieldMapper.t | 77 ++++++++ Equinox-Migration/t/corpus/sm0.txt | 31 +++ 3 files changed, 300 insertions(+), 0 deletions(-) create mode 100644 Equinox-Migration/lib/Equinox/Migration/SubfieldMapper.pm create mode 100644 Equinox-Migration/t/02-SubfieldMapper.t create mode 100644 Equinox-Migration/t/corpus/sm0.txt diff --git a/Equinox-Migration/lib/Equinox/Migration/SubfieldMapper.pm b/Equinox-Migration/lib/Equinox/Migration/SubfieldMapper.pm new file mode 100644 index 0000000..dd32d31 --- /dev/null +++ b/Equinox-Migration/lib/Equinox/Migration/SubfieldMapper.pm @@ -0,0 +1,192 @@ +package Equinox::Migration::SubfieldMapper; + +use warnings; +use strict; + +=head1 NAME + +Equinox::Migration::SubfieldMapper - Generate named-field to MARC tag map from file + +=head1 VERSION + +Version 1.000 + +=cut + +our $VERSION = '1.000'; + + +=head1 SYNOPSIS + +Using a file as input, E::M::SM generates a mapping of MARC subfields +to arbitrary field names, and provides several access mechanisms to +that set. + + use Equinox::Migration::SubfieldMapper; + + my $stl = Equinox::Migration::SubfieldMapper->new( file => ".txt" ); + my $tags = $stl->as_hashref; + +or + + my $stl = Equinox::Migration::SubfieldMapper->new( file => ".txt" ); + if ( $stl->has($foo) ) { + # if $foo is an element of $stl's parsed list + # do stuff ... + } + + +=head1 ROUTINES + + +=head2 new + +Takes one optional argument, C. If this is speficied, the tag +list will be populated as per that file on instantiation. + +Returns a E::M::STL object. + +=cut + +sub new { + my ($class, %args) = @_; + + my $self = bless { conf => { mods => { multi => 1, biblevel => 1} }, + fields => {}, + tags => {} }, $class; + + if ($args{file}) { + if (-r $args{file}) { + $self->{conf}{file} = $args{file}; + $self->generate; + } else { + die "Can't open file: $!\n"; + } + } + + return $self; +} + + +=head2 generate + +=cut + +sub has { + my ($self, @chunks) = @_; + return undef unless (defined $chunks[0]); + + if ($chunks[0] =~ /^\d/) { + if (defined $chunks[1]) { + return 1 if ( defined $self->{tags}{$chunks[0]}{$chunks[1]} ); + return 0; + } else { + return 1 if ( defined $self->{tags}{$chunks[0]} ); + return 0; + } + } else { + if (defined $chunks[2]) { + return 1 if ( $self->{fields}{$chunks[0]}{tag} eq $chunks[1] and + $self->{fields}{$chunks[0]}{sub} eq $chunks[2] ); + return undef; + } elsif (defined $chunks[1]) { + return 1 if ( $self->{fields}{$chunks[0]}{tag} eq $chunks[1] ); + return undef; + } else { + return 1 if ( defined $self->{fields}{$chunks[0]} ); + return undef; + } + } +} + + +sub generate { + my ($self, $file) = @_; + + open TAGFILE, '<', $self->{conf}{file}; + while () { + next if m/^#/; + next if m/^\s*\n$/; + + chomp; + my @tokens = split /\s+/; + + if (defined $tokens[3]) { + $self->add( field => $tokens[0], tag => $tokens[1], + sub => $tokens[2], mod => $tokens[3] ); + } else { + $self->add( field => $tokens[0], tag => $tokens[1], sub => $tokens[2] ); + } + } + +} + +sub add { + my ($self, %toks) = @_; + + # check bits for validity + $self->validate(\%toks); + + $toks{mod} = (defined $toks{mod}) ? $toks{mod} : 0; + + $self->{fields}{$toks{field}} = { tag => $toks{tag}, sub => $toks{sub}, mod => $toks{mod}}; + $self->{tags}{$toks{tag}}{$toks{sub}} = $toks{field}; +} + +sub validate { + my ($self, $toks) = @_; + + $.= 1 unless defined $.; + + die "Required field missing (line $.)\n" + unless (defined $toks->{field} and defined $toks->{tag} and defined $toks->{sub}); + + die "Fieldnames must start with letter (line $.)\n" + unless ($toks->{field} =~ /^\w/); + + die "Invalid tag (line $.)\n" + if ($toks->{tag} =~ /\D/ or $toks->{tag} < 0 or $toks->{tag} > 999); + + die "Invalid subfield code (line $.)\n" + if (length $toks->{sub} != 1 or $toks->{sub} =~ /[^a-z0-9]/); + + # the next thing (if it exists), must be a comment or valid modifier + if (defined $toks->{mod}) { + die "Unknown chunk (line $.)\n" + unless (defined $self->{conf}{mods}{$toks->{mod}} or $toks->{mod} =~ /^#/); + } + + die "Fieldnames must be unique (line $.)\n" + if (defined $self->{fields}{$toks->{field}}); + + die "Subfields cannot be multimapped (line $.)\n" + if (defined $self->{tags}{$toks->{tag}}{$toks->{sub}}); +} + + +=head1 AUTHOR + +Shawn Boyette, C<< >> + +=head1 BUGS + +Please report any bugs or feature requests to the above email address. + +=head1 SUPPORT + +You can find documentation for this module with the perldoc command. + + perldoc Equinox::Migration::SubfieldMapper + + +=head1 COPYRIGHT & LICENSE + +Copyright 2009 Equinox, all rights reserved. + +This program is free software; you can redistribute it and/or modify it +under the same terms as Perl itself. + + +=cut + +1; # End of Equinox::Migration::SimpleTagList diff --git a/Equinox-Migration/t/02-SubfieldMapper.t b/Equinox-Migration/t/02-SubfieldMapper.t new file mode 100644 index 0000000..680f348 --- /dev/null +++ b/Equinox-Migration/t/02-SubfieldMapper.t @@ -0,0 +1,77 @@ +#!perl -T + +#use Test::More tests => 33; +use Test::More qw(no_plan); +use Equinox::Migration::SubfieldMapper; + +# baseline object creation +my $sm = Equinox::Migration::SubfieldMapper->new(); +is(ref $sm, "Equinox::Migration::SubfieldMapper", "self is self"); + +# test validation death routines +my $tokens = {}; +eval { $sm->validate($tokens) }; +is ($@, "Required field missing (line 1)\n", 'nothing there'); +$tokens = { field => 'foo' }; +eval { $sm->validate($tokens) }; +is ($@, "Required field missing (line 1)\n", 'only 1 field'); +$tokens = { field => 'foo', tag => 99 }; +eval { $sm->validate($tokens) }; +is ($@, "Required field missing (line 1)\n", 'only 2 fields'); + +$tokens = { field => 'foo', tag => -1, sub => 'a' }; +eval { $sm->validate($tokens) }; +is ($@, "Invalid tag (line 1)\n", 'tag value < 0'); +$tokens = { field => 'foo', tag => 1042, sub => 'a' }; +eval { $sm->validate($tokens) }; +is ($@, "Invalid tag (line 1)\n", 'tag value > 999'); + +$tokens = { field => 'foo', tag => 650, sub => '%' }; +eval { $sm->validate($tokens) }; +is ($@, "Invalid subfield code (line 1)\n", 'non-alphanum subfield'); +$tokens = { field => 'foo', tag => 650, sub => '' }; +eval { $sm->validate($tokens) }; +is ($@, "Invalid subfield code (line 1)\n", 'zero-length subfield'); +$tokens = { field => 'foo', tag => 650, sub => 'qq' }; +eval { $sm->validate($tokens) }; +is ($@, "Invalid subfield code (line 1)\n", 'over-length subfield'); + +$tokens = { field => 'foo', tag => 650, sub => 'a', mod => 'bar' }; +eval { $sm->validate($tokens) }; +is ($@, "Unknown chunk (line 1)\n", 'Extra, non-comment content'); + +# and some which should have no problems +$tokens = { field => 'foo', tag => 650, sub => 'a' }; +eval { $sm->validate($tokens) }; +is ($@, '', 'should be fine!'); +$tokens = { field => 'foo', tag => 650, sub => 'a', mod => '#', 'this', 'is', 'a', 'comment' }; +eval { $sm->validate($tokens) }; +is ($@, '', 'should be fine!'); + +# two more death: dupes +$sm->{fields}{foo} = 1; +$tokens = { field => 'foo', tag => 650, sub => 'a', mod => '#', 'this', 'is', 'a', 'comment' }; +eval { $sm->validate($tokens) }; +is ($@, "Fieldnames must be unique (line 1)\n", 'dupe fieldname'); +$sm->{tags}{650}{a} = 1; +$tokens = { field => 'bar', tag => 650, sub => 'a', mod => '#', 'this', 'is', 'a', 'comment' }; +eval { $sm->validate($tokens) }; +is ($@, "Subfields cannot be multimapped (line 1)\n", 'dupe fieldname'); + +# test load from file +$sm = Equinox::Migration::SubfieldMapper->new( file => "./t/corpus/sm0.txt" ); +is(ref $sm, "Equinox::Migration::SubfieldMapper", "self is self"); +is ($sm->{tags}{949}{a}, 'call_number'); +is ($sm->{tags}{999}{a}, 'call_number_alt'); + +is ($sm->has(949), 1, 'has tag'); +is ($sm->has(999, 'a'), 1, 'has tag and subfield'); +is ($sm->has('call_number'), 1, 'has fieldname'); +is ($sm->has('call_number', 949), 1, 'has fieldname'); +is ($sm->has('call_number', 949, 'a'), 1, 'has fieldname'); + + +is ($sm->{fields}{call_number}{tag}, 949); +is ($sm->{fields}{call_number}{sub}, 'a'); +is ($sm->{fields}{type}{mod}, 0); +is ($sm->{fields}{note}{mod}, 'multi'); diff --git a/Equinox-Migration/t/corpus/sm0.txt b/Equinox-Migration/t/corpus/sm0.txt new file mode 100644 index 0000000..26b0114 --- /dev/null +++ b/Equinox-Migration/t/corpus/sm0.txt @@ -0,0 +1,31 @@ +# comment lines are comments + +# blanks are ignored + + +# format is as follows: +# +# fieldname tag sub [modifier] [# line comment] + +call_number 949 a +call_number_alt 999 a +copy_seq 949 c +copy_seq_alt 999 c +barcode 949 i +barcode_alt 999 i +location 949 l # comments can go here, too +location_alt 999 l +library 949 m +library_alt 999 m +note 949 o multi +note_alt 999 o multi +type 949 t +type_alt 999 t +date_a 949 v +date_a_alt 999 v +date_b 949 u +date_b_alt 999 u +cat_a 949 w +cat_a_alt 999 w +cat_b 949 x +cat_b_alt 999 x -- 1.7.2.5