From f74cdf720391cddb8a329e1fdfb688ab3012515a Mon Sep 17 00:00:00 2001 From: Shawn Boyette Date: Tue, 5 May 2009 16:32:18 +0000 Subject: [PATCH] fixes to tag counts --- .../lib/Equinox/Migration/MARCXMLSampler.pm | 14 ++++++++------ Equinox-Migration/t/04-MARCXMLSampler.t | 4 ++-- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Equinox-Migration/lib/Equinox/Migration/MARCXMLSampler.pm b/Equinox-Migration/lib/Equinox/Migration/MARCXMLSampler.pm index 1fbfefe..3ec869f 100644 --- a/Equinox-Migration/lib/Equinox/Migration/MARCXMLSampler.pm +++ b/Equinox-Migration/lib/Equinox/Migration/MARCXMLSampler.pm @@ -61,7 +61,8 @@ sub new { my ($class, %args) = @_; my $self = bless { data => { recs => undef, # X::T record objects - rcnt => 0, # next record counter + rcnt => 0, # record counter + tcnt => 0, # tag counter samp => {}, # data samples tags => {}, # all found tags }, @@ -117,6 +118,7 @@ sub process_field { return unless ($tag and $tag > 9); # increment raw tag count + $self->{data}{tcnt}++; $self->{data}{tags}{$tag}++; if ($map and $map->has($tag)) { @@ -136,9 +138,9 @@ sub process_subs { # set a value, total-seen count and records-seen-in count $samp->{$tag}{$code}{value} = $sub->text unless defined $samp->{$tag}{$code}; $samp->{$tag}{$code}{count}++; - $samp->{$tag}{$code}{rcnt}++ unless ( defined $samp->{$tag}{$code}{last} and - $samp->{$tag}{$code}{last} == $self->{data}{rcnt} ); - $samp->{$tag}{$code}{last} = $self->{data}{rcnt}; + $samp->{$tag}{$code}{tcnt}++ unless ( defined $samp->{$tag}{$code}{last} and + $samp->{$tag}{$code}{last} == $self->{data}{tcnt} ); + $samp->{$tag}{$code}{last} = $self->{data}{tcnt}; #FIXME tcnt not rcnt } @@ -151,7 +153,7 @@ structure will be constructed which holds data about tags in the map. { tag_id => { sub_code => { value => VALUE, count => COUNT, - rcnt => RCOUNT + tcnt => TAGCOUNT }, ... }, @@ -163,7 +165,7 @@ that subfield containing * value - A sample of the subfield text * count - Total number of times the subfield was seen - * rcnt - The number of records the subfield was seen in + * tcnt - The number of tags the subfield was seen in =head1 AUTHOR diff --git a/Equinox-Migration/t/04-MARCXMLSampler.t b/Equinox-Migration/t/04-MARCXMLSampler.t index d1251aa..1b3e0a4 100644 --- a/Equinox-Migration/t/04-MARCXMLSampler.t +++ b/Equinox-Migration/t/04-MARCXMLSampler.t @@ -24,8 +24,8 @@ is (defined $sample->{999}, 1); is (defined $sample->{999}{x}, 1); is ($sample->{999}{x}{value}, 'MYSTERY', 'Should be the first seen value'); is ($sample->{999}{x}{count}, 7, 'One real in each record, plus 3 synthetic in last rec'); -is ($sample->{999}{x}{rcnt}, 4, 'Occurs in all records'); -is ($sample->{999}{s}{rcnt}, 3, 'Was removed from one record'); +is ($sample->{999}{x}{tcnt}, 4, 'Occurs in all records'); +is ($sample->{999}{s}{tcnt}, 3, 'Was removed from one record'); my $tags = $mp->{data}{tags}; is ($tags->{961}, 4); -- 1.7.2.5