X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=Equinox-Migration%2Flib%2FEquinox%2FMigration%2FMapDrivenMARCXMLProc.pm;h=9844daf68f02ef213b23c7d195801e177ea3cb4f;hp=ab053bba0fb782367d4ada7750f612ac5925e935;hb=ca9dd06ee372e59b4fe0ecdcbe24cf7468d8577f;hpb=a85d1ead29a140aefd716b303f0f0e67a96cee75 diff --git a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm index ab053bb..9844daf 100644 --- a/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm +++ b/Equinox-Migration/lib/Equinox/Migration/MapDrivenMARCXMLProc.pm @@ -6,6 +6,16 @@ use strict; use XML::Twig; use Equinox::Migration::SubfieldMapper 1.002; +# FIXME +# +# sample functionality should be extracted into a new module which +# uses E::M::SM to drive sampling of individual datafields, and +# reports ALL datafields which occur +# +# --sample should give the list of all datafields +# --samplefile should take a SM map as teh argument and introspect the mapped datafields + + =head1 NAME Equinox::Migration::MapDrivenMARCXMLProc @@ -84,6 +94,13 @@ sub new { die "Can't open marc file: $!\n"; } + # if we have a sample arg, set up the sample set and umap hash + if (defined $args{sample}) { + for my $s ( @{$args{sample}}) + { $self->{data}{stag}{$s} = 1 } + $self->{data}{umap} = {}; + } + return $self; } @@ -166,7 +183,8 @@ sub process_subs { # set a value, total-seen count and records-seen-in count $u->{$tag}{$code}{value} = $sub->text unless defined $u->{$tag}{$code}; $u->{$tag}{$code}{count}++; - $u->{$tag}{$code}{rcnt}++ unless ($u->{$tag}{$code}{last} == $self->{data}{rptr}); + $u->{$tag}{$code}{rcnt}++ unless ( defined $u->{$tag}{$code}{last} and + $u->{$tag}{$code}{last} == $self->{data}{rptr} ); $u->{$tag}{$code}{last} = $self->{data}{rptr}; return; } @@ -182,10 +200,34 @@ sub process_subs { push @{$dataf->{multi}{$name}}, $sub->text; } } else { + die "Multiple occurances of a non-multi field: \n" + if (defined $dataf->{uni}{$code}); $dataf->{uni}{$code} = $sub->text; } } +=head1 MODIFIERS + +MapDrivenMARCXMLProc implements the following modifiers, and passes +them to L, meaning that specifying +any other modifiers in a MDMP map file will cause a fatal error when +it is processed. + +=head2 multi + +If a mapping is declared to be C, then MDMP expects to see more +than one instance of that subfield per datafield, and the data is +handled accordingly (see L below). + +Occurring zero or one time is legal for a C mapping. + +A mapping which is not flagged as C, but which occurs more than +once per datafield will cause a fatal error. + +=head2 bib + +=head2 required + =head1 PARSED RECORDS Given: @@ -255,14 +297,30 @@ C. =head1 UNMAPPED TAGS +If the C argument is passed to L, there will also be a +structure which holds data about unmapped subfields encountered in +mapped tags which are also in the declared sample set. This +information is collected over the life of the object and is not reset +for every record processed (as the current record data neccessarily +is). + { tag_id => { - sub_code => { value => VALUE, count => COUNT }, - sub_code2 => { value => VALUE, count => COUNT }, + sub_code => { value => VALUE, + count => COUNT, + rcnt => RCOUNT + }, ... }, ... } +For each mapped tag, for each unmapped subfield, there is a hash of +data about that subfield containing + + * value - A sample of the subfield text + * count - Total number of times the subfield was seen + * rcnt - The number of records the subfield was seen in + =head1 AUTHOR Shawn Boyette, C<< >>