1 package Equinox::Migration::MapDrivenMARCXMLProc;
7 use Equinox::Migration::SubfieldMapper 1.003;
12 Equinox::Migration::MapDrivenMARCXMLProc
20 our $VERSION = '1.000';
27 use Equinox::Migration::MapDrivenMARCXMLProc;
35 Takes two required arguments: C<mapfile> (which will be passed along
36 to L<Equinox::Migration::SubfieldMapper> as the basis for its map),
37 and C<marcfile> (the MARC data to be processed).
39 my $m = Equinox::Migration::MapDrivenMARCXMLProc->new( mapfile => FILE,
45 my ($class, %args) = @_;
47 my $self = bless { mods => { multi => {},
51 data => { recs => undef, # X::T record objects
52 rptr => 0, # next record pointer
53 crec => undef, # parsed record storage
57 # initialize map and taglist
58 die "Argument 'mapfile' must be specified\n" unless (defined $args{mapfile});
59 my @mods = keys %{$self->{mods}};
60 $self->{map} = Equinox::Migration::SubfieldMapper->new( file => $args{mapfile},
62 $self->{data}{tags} = $self->{map}->tags;
65 die "Argument 'marcfile' must be specified\n" unless (defined $args{marcfile});
66 if (-r $args{marcfile}) {
67 $self->{twig} = XML::Twig->new;
68 $self->{twig}->parsefile($args{marcfile});
69 my @records = $self->{twig}->root->children;
70 $self->{data}{recs} = \@records;
72 die "Can't open marc file: $!\n";
81 Extracts data from the next record, per the mapping file. Returns a
82 normalized datastructure (see L</format_record> for details) on
83 success; returns 0 otherwise.
85 while (my $rec = $m->parse_record) {
86 # handle extracted record data
94 # get the next record and wipe current parsed record
95 return 0 unless defined $self->{data}{recs}[ $self->{data}{rptr} ];
96 my $record = $self->{data}{recs}[ $self->{data}{rptr} ];
97 $self->{data}{crec} = { egid => undef, tags => undef };
99 my @fields = $record->children;
101 { $self->process_field($f) }
103 # cleanup memory and increment pointer
105 $self->{data}{rptr}++;
107 # check for required fields
108 $self->check_required;
110 return $self->{data}{crec};
114 my ($self, $field) = @_;
115 my $map = $self->{map};
116 my $tag = $field->{'att'}->{'tag'};
117 my $crec = $self->{data}{crec};
120 unless (defined $tag) {
127 my $sub = $field->first_child('subfield');
128 $crec->{egid} = $sub->text;
131 if ($map->has($tag)) {
132 push @{$crec->{tags}}, { tag => $tag, uni => undef, multi => undef };
133 push @{$crec->{tmap}{$tag}}, (@{$crec->{tags}} - 1);
134 my @subs = $field->children('subfield');
136 { $self->process_subs($tag, $sub) }
137 # check map to ensure all declared subs have a value
138 my $mods = $map->mods($field);
139 for my $mappedsub ( @{ $map->subfields($tag) } ) {
140 next if $mods->{multi};
141 $crec->{tags}[-1]{uni}{$mappedsub} = ''
142 unless defined $crec->{tags}[-1]{uni}{$mappedsub};
148 my ($self, $tag, $sub) = @_;
149 my $map = $self->{map};
150 my $code = $sub->{'att'}->{'code'};
152 # handle unmapped tag/subs
153 return unless ($map->has($tag, $code));
155 # fetch our datafield struct and fieldname
156 my $dataf = $self->{data}{crec}{tags}[-1];
157 my $field = $map->field($tag, $code);
160 for my $filter ( @{$map->filters($field)} ) {
161 return if ($sub->text =~ /$filter/i);
163 # handle multi modifier
164 if (my $mods = $map->mods($field)) {
165 if ($mods->{multi}) {
166 my $name = $tag . $code;
167 push @{$dataf->{multi}{$name}}, $sub->text;
172 # if this were a multi field, it would be handled already. make sure its a singleton
173 die "Multiple occurances of a non-multi field: $tag$code at rec ",
174 ($self->{data}{rptr} + 1),"\n" if (defined $dataf->{uni}{$code});
176 # everything seems okay
177 $dataf->{uni}{$code} = $sub->text;
183 my $mods = $self->{map}->mods;
184 my $crec = $self->{data}{crec};
186 for my $tag_id (keys %{$mods->{required}}) {
187 for my $code (@{$mods->{required}{$tag_id}}) {
190 for my $tag (@{$crec->{tags}}) {
191 $found = 1 if ($tag->{multi}{($tag_id . $code)});
192 $found = 1 if ($tag->{uni}{$code});
195 die "Required mapping $tag_id$code not found in rec ",$self->{data}{rptr},"\n"
204 MapDrivenMARCXMLProc implements the following modifiers, and passes
205 them to L<Equinox::Migration::SubfieldMapper>, meaning that specifying
206 any other modifiers in a MDMP map file will cause a fatal error when
211 If a mapping is declared to be C<multi>, then MDMP expects to see more
212 than one instance of that subfield per datafield, and the data is
213 handled accordingly (see L</PARSED RECORDS> below).
215 Occurring zero or one time is legal for a C<multi> mapping.
217 A mapping which is not flagged as C<multi>, but which occurs more than
218 once per datafield will cause a fatal error.
222 By default, if a mapping does not occur in a datafield, processing
223 continues normally. if a mapping has the C<required> modifier,
224 however, it must appear, or a fatal error will occur.
226 =head1 PARSED RECORDS
230 my $m = Equinox::Migration::MapDrivenMARCXMLProc->new(ARGUMENTS);
231 $rec = $m->parse_record;
233 Then C<$rec> will look like:
236 egid => evergreen_record_id,
240 multi => { (tag_id . sub_code) => [ val1, val2, ... ] },
241 uni => { code => value, code2 => value2, ... },
245 tmap => { tag_id => [ INDEX_LIST ], tag_id2 => [ INDEX_LIST ], ... }
248 That is, there is an C<egid> key which points to the Evergreen ID of
249 that record, a C<tags> key which points to an arrayref, and a C<tmap>
250 key which points to a hashref.
254 A reference to a list of anonymous hashes, one for each instance of
255 each tag which occurs in the map.
257 Each tag hash holds its own id (e.g. C<998>), and two references to
258 two more hashrefs, C<multi> and C<uni>.
260 The C<multi> hash holds the extracted data for tag/sub mappings which
261 have the C<multiple> modifier on them. The keys in C<multi> are
262 composed of the tag id and subfield code, catenated
263 (e.g. C<901c>). The values are arrayrefs containing the content of all
264 instances of that subfield in that instance of that tag. If no tags
265 are defined as C<multi>, it will be C<undef>.
267 The C<uni> hash holds data for tag/sub mappings which occur only once
268 per instance of a tag (but may occur multiple times in a record due to
269 there being multiple instances of that tag in a record). Keys are
270 subfield codes and values are subfield content.
272 All C<uni> subfields occuring in the map are guaranteed to be
273 defined. Sufields which are mapped but do not occur in a particular
274 datafield will be given a value of '' (the null string) in the current
275 record struct. Oppose subfields which are not mapped, which will be
280 A hashref, where each key (a tag id like "650") points to a listref
281 containing the index (or indices) of C<tags> where that tag has
284 The intended use of this is to simplify the processing of data from
285 tags which can appear more than once in a MARC record, like
286 holdings. If your holdings data is in 852, C<tmap->{852}> will be a
287 listref with the indices of C<tags> which hold the data from the 852
290 Complimentarily, C<tmap> prevents data from singular datafields from
291 having to be copied for every instance of a multiple datafield, as it
292 lets you get the data from that record's one instance of whichever
293 field you're looking for.
297 Shawn Boyette, C<< <sboyette at esilibrary.com> >>
301 Please report any bugs or feature requests to the above email address.
305 You can find documentation for this module with the perldoc command.
307 perldoc Equinox::Migration::MapDrivenMARCXMLProc
310 =head1 COPYRIGHT & LICENSE
312 Copyright 2009 Equinox, all rights reserved.
314 This program is free software; you can redistribute it and/or modify it
315 under the same terms as Perl itself.
320 1; # End of Equinox::Migration::MapDrivenMARCXMLProc