From: Shawn Boyette Date: Tue, 5 May 2009 16:32:18 +0000 (+0000) Subject: fixes to tag counts X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=commitdiff_plain;h=f74cdf720391cddb8a329e1fdfb688ab3012515a;hp=92480d2b668d7ddd2237cca8d4d6c20f5f2e43f9 fixes to tag counts --- diff --git a/Equinox-Migration/lib/Equinox/Migration/MARCXMLSampler.pm b/Equinox-Migration/lib/Equinox/Migration/MARCXMLSampler.pm index 1fbfefe..3ec869f 100644 --- a/Equinox-Migration/lib/Equinox/Migration/MARCXMLSampler.pm +++ b/Equinox-Migration/lib/Equinox/Migration/MARCXMLSampler.pm @@ -61,7 +61,8 @@ sub new { my ($class, %args) = @_; my $self = bless { data => { recs => undef, # X::T record objects - rcnt => 0, # next record counter + rcnt => 0, # record counter + tcnt => 0, # tag counter samp => {}, # data samples tags => {}, # all found tags }, @@ -117,6 +118,7 @@ sub process_field { return unless ($tag and $tag > 9); # increment raw tag count + $self->{data}{tcnt}++; $self->{data}{tags}{$tag}++; if ($map and $map->has($tag)) { @@ -136,9 +138,9 @@ sub process_subs { # set a value, total-seen count and records-seen-in count $samp->{$tag}{$code}{value} = $sub->text unless defined $samp->{$tag}{$code}; $samp->{$tag}{$code}{count}++; - $samp->{$tag}{$code}{rcnt}++ unless ( defined $samp->{$tag}{$code}{last} and - $samp->{$tag}{$code}{last} == $self->{data}{rcnt} ); - $samp->{$tag}{$code}{last} = $self->{data}{rcnt}; + $samp->{$tag}{$code}{tcnt}++ unless ( defined $samp->{$tag}{$code}{last} and + $samp->{$tag}{$code}{last} == $self->{data}{tcnt} ); + $samp->{$tag}{$code}{last} = $self->{data}{tcnt}; #FIXME tcnt not rcnt } @@ -151,7 +153,7 @@ structure will be constructed which holds data about tags in the map. { tag_id => { sub_code => { value => VALUE, count => COUNT, - rcnt => RCOUNT + tcnt => TAGCOUNT }, ... }, @@ -163,7 +165,7 @@ that subfield containing * value - A sample of the subfield text * count - Total number of times the subfield was seen - * rcnt - The number of records the subfield was seen in + * tcnt - The number of tags the subfield was seen in =head1 AUTHOR diff --git a/Equinox-Migration/t/04-MARCXMLSampler.t b/Equinox-Migration/t/04-MARCXMLSampler.t index d1251aa..1b3e0a4 100644 --- a/Equinox-Migration/t/04-MARCXMLSampler.t +++ b/Equinox-Migration/t/04-MARCXMLSampler.t @@ -24,8 +24,8 @@ is (defined $sample->{999}, 1); is (defined $sample->{999}{x}, 1); is ($sample->{999}{x}{value}, 'MYSTERY', 'Should be the first seen value'); is ($sample->{999}{x}{count}, 7, 'One real in each record, plus 3 synthetic in last rec'); -is ($sample->{999}{x}{rcnt}, 4, 'Occurs in all records'); -is ($sample->{999}{s}{rcnt}, 3, 'Was removed from one record'); +is ($sample->{999}{x}{tcnt}, 4, 'Occurs in all records'); +is ($sample->{999}{s}{tcnt}, 3, 'Was removed from one record'); my $tags = $mp->{data}{tags}; is ($tags->{961}, 4);