1 package Equinox::Migration::MARCXMLSampler;
7 use Equinox::Migration::SimpleTagList 1.001;
12 Equinox::Migration::MARCXMLSampler
20 our $VERSION = '1.002';
25 Produce a list of all fields in a MARCXML file which have a C<tag>
26 attribute, and count how many times each occurs
28 my $s = E::M::MARCXMLSampler->new( marcfile => "foo.marc.xml" );
31 Also deeply introspect certain tags, producing lists of all subfields,
32 and counts of how many times each subfield occurs I<in toto> and how
33 many records each subfield appears in
35 my $s = E::M::MARCXMLSampler->new( marcfile => "foo.marc.xml",
36 mapfile => "foo.map" );
39 my $s = E::M::MARCXMLSampler->new( marcfile => "foo.marc.xml",
40 mapstring => "852 999" );
49 Takes one required argument, C<marcfile>, which points to the MARCXML
52 Has two mutually-exclusive optional arguments, C<mapfile> and
53 C<mapstring>". The former should point to a file which will be used as
54 a L<Equinox::Migration::SimpleTagList> map; the latter should have as
55 its value a text string which will be used in the same way (handy for
56 when you only want deep introspection on a handful of tags).
61 my ($class, %args) = @_;
63 my $self = bless { data => { recs => undef, # X::T record objects
64 rcnt => 0, # record counter
65 tcnt => 0, # tag counter
66 scnt => {}, # subfield/tag counters
67 samp => {}, # data samples
68 tags => {}, # all found tags
73 die "Argument 'marcfile' must be specified\n" unless ($args{marcfile});
74 if (-r $args{marcfile}) {
75 $self->{twig} = XML::Twig->new;
76 $self->{conf}{marc} = $args{marcfile};
78 die "Can't open marc file: $!\n";
81 # if we have a sample arg, create the sample map
82 die "Can't use a mapfile and mapstring\n"
83 if ($args{mapfile} and $args{mapstring});
84 $self->{map} = Equinox::Migration::SimpleTagList->new(file => $args{mapfile})
86 $self->{map} = Equinox::Migration::SimpleTagList->new(str => $args{mapstring})
87 if ($args{mapstring});
95 Extracts data from MARC records, per the mapping file.
102 $self->{twig}->parsefile( $self->{conf}{marc} );
103 for my $record ( $self->{twig}->root->children ) {
104 my @fields = $record->children;
106 { $self->process_field($f); $f->purge }
108 # cleanup memory and increment pointer
110 $self->{data}{rcnt}++;
115 my ($self, $field) = @_;
116 my $map = $self->{map};
117 my $tag = $field->{'att'}->{'tag'};
118 return unless ($tag and $tag > 9);
120 # increment raw tag count
121 $self->{data}{tcnt}++;
122 $self->{data}{tags}{$tag}++;
124 if ($map and $map->has($tag)) {
125 my @subs = $field->children('subfield');
128 { $self->process_subs($tag, $sub); $sub->purge; $i++ }
130 # increment sub length counter
131 $self->{data}{scnt}{$tag}{$i}++;
136 my ($self, $tag, $sub) = @_;
137 my $map = $self->{map};
138 my $code = $sub->{'att'}->{'code'};
140 # handle unmapped tag/subs
141 my $samp = $self->{data}{samp};
142 # set a value, total-seen count and records-seen-in count
143 $samp->{$tag}{$code}{value} = $sub->text unless $samp->{$tag}{$code};
144 $samp->{$tag}{$code}{count}++;
145 $samp->{$tag}{$code}{tcnt}++ unless ( defined $samp->{$tag}{$code}{last} and
146 $samp->{$tag}{$code}{last} == $self->{data}{tcnt} );
147 $samp->{$tag}{$code}{last} = $self->{data}{tcnt};
153 If the C<mapfile> or C<mapstring> arguments are passed to L</new>, a
154 structure will be constructed which holds data about tags in the map.
157 sub_code => { value => VALUE,
166 For each subfield in each mapped tag, there is a hash of data about
167 that subfield containing
169 * value - A sample of the subfield text
170 * count - Total number of times the subfield was seen
171 * tcnt - The number of tags the subfield was seen in
175 Shawn Boyette, C<< <sboyette at esilibrary.com> >>
179 Please report any bugs or feature requests to the above email address.
183 You can find documentation for this module with the perldoc command.
185 perldoc Equinox::Migration::MARCXMLSampler
188 =head1 COPYRIGHT & LICENSE
190 Copyright 2009 Equinox, all rights reserved.
192 This program is free software; you can redistribute it and/or modify it
193 under the same terms as Perl itself.
198 1; # End of Equinox::Migration::MARCXMLSampler