1 package Equinox::Migration::MapDrivenMARCXMLProc;
7 use Equinox::Migration::SubfieldMapper;
11 Equinox::Migration::MapDrivenMARCXMLProc
19 our $VERSION = '1.000';
26 use Equinox::Migration::MapDrivenMARCXMLProc;
34 Takes two required arguments: C<mapfile> (which will be passed along
35 to L<Equinox::Migration::SubfieldMapper> as the basis for its map),
36 and C<marcfile> (the MARC data to be processed).
38 my $m = Equinox::Migration::MapDrivenMARCXMLProc->new( mapfile => FILE,
41 There is an optional third, argument, C<sample>, which specifies a
42 arrayref of datafields to "sample" by reporting on subfields which are
43 found in the data but not in the map.
45 my $m = Equinox::Migration::MapDrivenMARCXMLProc->new( mapfile => FILE,
50 See L</UNMAPPED TAGS> for more info.
55 my ($class, %args) = @_;
57 my $self = bless { mods => { multi => {},
61 data => { recs => undef, # X::T record objects
62 rptr => 0, # next record pointer
63 crec => undef, # parsed record storage
64 stag => undef, # list of tags to sample
65 umap => undef, # unmapped data samples
69 # initialize map and taglist
70 my @mods = keys %{$self->{mods}};
71 $self->{map} = Equinox::Migration::SubfieldMapper->new( file => $args{mapfile},
73 $self->{tags} = $self->{map}->tags;
76 die "Argument 'marcfile' must be specified\n" unless (defined $args{marcfile});
77 if (-r $args{marcfile}) {
78 $self->{twig} = XML::Twig->new;
79 $self->{twig}->parsefile($args{marcfile});
80 my @records = $self->{twig}->root->children;
81 $self->{data}{recs} = \@records;
83 die "Can't open marc file: $!\n";
92 Extracts data from the next record, per the mapping file. Returns 1 on
95 while ($m->parse_record) {
96 # handle extracted record data
104 # get the next record and wipe current parsed record
105 return 0 unless defined $self->{data}{recs}[ $self->{data}{rptr} ];
106 my $record = $self->{data}{recs}[ $self->{data}{rptr} ];
107 $self->{data}{crec} = {};
109 my @fields = $record->children;
111 { $self->process_field($f) }
113 # cleanup memory and increment pointer
115 $self->{data}{rptr}++;
123 my ($self, $field) = @_;
124 my $map = $self->{map};
125 my $tag = $field->{'att'}->{'tag'};
126 my $parsed = $self->{data}{crec};
129 my $sub = $field->first_child('subfield');
130 $parsed->{egid} = $sub->text;;
131 } elsif ($map->has($tag)) {
132 push @{$parsed->{tags}}, { tag => $tag };
133 my @subs = $field->children('subfield');
135 { $self->process_subs($tag, $sub) }
144 my ($self, $tag, $sub) = @_;
145 my $map = $self->{map};
146 my $code = $sub->{'att'}->{'code'};
148 # handle unmapped tag/subs
149 unless ($map->has($tag, $code)) {
150 my $u = $self->{data}{umap};
151 my $s = $self->{data}{stag};
152 return unless (defined $s->{$tag});
154 $u->{$tag}{$code}{value} = $sub->text unless defined $u->{$tag}{$code};
155 $u->{$tag}{$code}{count}++;
159 my $data = $self->{data}{crec}{tags}[-1];
160 my $field = $map->field($tag, $code);
161 if ($map->mod($field) eq 'multi') {
162 my $name = $tag . $code;
163 push @{$data->{multi}{$name}}, $sub->text;
165 $data->{uni}{$code} = $sub->text;
169 =head1 PARSED RECORDS
172 egid => evergreen_record_id,
174 (tag_id . sub_code)1 => value1,
175 (tag_id . sub_code)2 => value2,
181 multi => { (tag_id . sub_code) => [ val1, val2, ... ] },
182 uni => { code => value, code2 => value2, ... },
188 That is, there is an C<egid> key which points to the Evergreen ID of
189 that record, a C<bib> key which points to a hashref, and a C<tags>
190 key which points to an arrayref.
194 This hashref holds extracted data which should occur once per record
195 (the default assumption is that a tag/subfield pair can occur multiple
196 times per record). The keys are composed of tag id and subfield code,
197 catenated (e.g. 901c). The values are the contents of that subfield of
202 This arrayref holds anonymous hashrefs, one for each instance of each
203 tag which occurs in the map. Each tag hashref holds its own id
204 (e.g. C<998>), and two more hashrefs, C<multi> and C<uni>.
206 The C<multi> hashref holds the extracted data for tag/sub mappings
207 which have the C<multiple> modifier on them. The keys in C<multi> are
208 composed of the tag id and subfield code, catenated
209 (e.g. C<901c>). The values are arrayrefs containing the content of all
210 instances of that subfield in that instance of that tag.
212 The C<uni> hashref holds data for tag/sub mappings which occur only
213 once per instance of a tag (but may occur multiple times in a record
214 due to there being multiple instances of that tag in a record). Keys
215 are subfield codes and values are subfield content.
220 sub_code => { value => VALUE, count => COUNT },
221 sub_code2 => { value => VALUE, count => COUNT },
229 Shawn Boyette, C<< <sboyette at esilibrary.com> >>
233 Please report any bugs or feature requests to the above email address.
237 You can find documentation for this module with the perldoc command.
239 perldoc Equinox::Migration::MapDrivenMARCXMLProc
242 =head1 COPYRIGHT & LICENSE
244 Copyright 2009 Equinox, all rights reserved.
246 This program is free software; you can redistribute it and/or modify it
247 under the same terms as Perl itself.
252 1; # End of Equinox::Migration::MapDrivenMARCXMLProc