1 package Equinox::Migration::MARCXMLSampler;
7 use Equinox::Migration::SimpleTagList 1.001;
12 Equinox::Migration::MARCXMLSampler
20 our $VERSION = '1.000';
27 use Equinox::Migration::MARCXMLSampler;
38 my ($class, %args) = @_;
40 my $self = bless { data => { recs => undef, # X::T record objects
41 rcnt => 0, # next record counter
42 samp => {}, # data samples
43 tags => {}, # all found tags
48 die "Argument 'marcfile' must be specified\n" unless ($args{marcfile});
49 if (-r $args{marcfile}) {
50 $self->{twig} = XML::Twig->new;
51 $self->{twig}->parsefile($args{marcfile});
52 my @records = $self->{twig}->root->children;
53 $self->{data}{recs} = \@records;
55 die "Can't open marc file: $!\n";
58 # if we have a sample arg, create the sample map
59 $self->{map} = Equinox::Migration::SimpleTagList->new(file => $args{mapfile})
61 $self->{map} = Equinox::Migration::SimpleTagList->new(str => $args{mapstring})
62 if ($args{mapstring});
70 Extracts data from MARC records, per the mapping file.
77 for my $record ( @{$self->{data}{recs}} ) {
78 my @fields = $record->children;
80 { $self->process_field($f) }
82 # cleanup memory and increment pointer
84 $self->{data}{rcnt}++;
89 my ($self, $field) = @_;
90 my $map = $self->{map};
91 my $tag = $field->{'att'}->{'tag'};
92 return unless ($tag and $tag > 9);
94 # increment raw tag count
95 $self->{data}{tags}{$tag}++;
97 if ($map and $map->has($tag)) {
98 my @subs = $field->children('subfield');
100 { $self->process_subs($tag, $sub) }
105 my ($self, $tag, $sub) = @_;
106 my $map = $self->{map};
107 my $code = $sub->{'att'}->{'code'};
109 # handle unmapped tag/subs
110 my $samp = $self->{data}{samp};
111 # set a value, total-seen count and records-seen-in count
112 $samp->{$tag}{$code}{value} = $sub->text unless defined $samp->{$tag}{$code};
113 $samp->{$tag}{$code}{count}++;
114 $samp->{$tag}{$code}{rcnt}++ unless ( defined $samp->{$tag}{$code}{last} and
115 $samp->{$tag}{$code}{last} == $self->{data}{rcnt} );
116 $samp->{$tag}{$code}{last} = $self->{data}{rcnt};
122 If the C<sample> argument is passed to L</new>, there will also be a
123 structure which holds data about unmapped subfields encountered in
124 mapped tags which are also in the declared sample set. This
125 information is collected over the life of the object and is not reset
126 for every record processed (as the current record data neccessarily
130 sub_code => { value => VALUE,
139 For each mapped tag, for each unmapped subfield, there is a hash of
140 data about that subfield containing
142 * value - A sample of the subfield text
143 * count - Total number of times the subfield was seen
144 * rcnt - The number of records the subfield was seen in
148 Shawn Boyette, C<< <sboyette at esilibrary.com> >>
152 Please report any bugs or feature requests to the above email address.
156 You can find documentation for this module with the perldoc command.
158 perldoc Equinox::Migration::MARCXMLSampler
161 =head1 COPYRIGHT & LICENSE
163 Copyright 2009 Equinox, all rights reserved.
165 This program is free software; you can redistribute it and/or modify it
166 under the same terms as Perl itself.
171 1; # End of Equinox::Migration::MARCXMLSampler