package Equinox::Migration::MapDrivenMARCXMLProc;
+# Copyright 2009-2012, Equinox Software, Inc.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License
+# as published by the Free Software Foundation; either version 2
+# of the License, or (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+
use warnings;
use strict;
use XML::Twig;
-use DBM::Deep;
use Equinox::Migration::SubfieldMapper 1.004;
=head1 VERSION
-Version 1.002
+Version 1.005
=cut
-our $VERSION = '1.002';
+our $VERSION = '1.005';
my $dstore;
my $sfmap;
-my @mods = qw( multi bib required );
+my @modlist = qw( multi ignoremulti bib required first concatenate );
+my %allmods = ();
+my $multis = {};
my $reccount;
my $verbose = 0;
$verbose = 1 if $args{verbose};
- my $self = bless {
+ my $self = bless { multis => \$multis,
}, $class;
# initialize map and taglist
die "Argument 'mapfile' must be specified\n" unless ($args{mapfile});
$sfmap = Equinox::Migration::SubfieldMapper->new( file => $args{mapfile},
- mods => \@mods );
+ mods => \@modlist );
# initialize datastore
- $dstore = DBM::Deep->new( file => "EMMXSSTORAGE.dbmd",
- data_sector_size => 256 );
+ $dstore = {};
$reccount = 0; # next record ptr
$dstore->{tags} = $sfmap->tags; # list of all tags
$self->{data} = $dstore;
return $self;
}
-sub DESTROY { unlink "EMMXSSTORAGE.dbmd" }
-
=head2 parse_record
Extracts data from the next record, per the mapping file.
for my $f (@fields)
{ process_field($f, $crec) }
+ # fill in blank values if needed
+ for my $mappedtag ( @{ $sfmap->tags }) {
+ unless (exists $crec->{tmap}{$mappedtag}) {
+ push @{ $crec->{tags} }, {};
+ for my $mappedsub ( @{ $sfmap->subfields($mappedtag) } ) {
+ my $fieldname = $sfmap->field($mappedtag, $mappedsub);
+ my $mods = $sfmap->mods($fieldname);
+ next if $mods->{multi};
+ $crec->{tags}[-1]{uni}{$mappedsub} = '';
+ $crec->{tags}[-1]{multi} = undef;
+ $crec->{tags}[-1]{tag} = $mappedtag;
+ }
+ push @{ $crec->{tmap}{$mappedtag} }, $#{ $crec->{tags} };
+ }
+ }
+
# cleanup memory and increment pointer
$record->purge;
$reccount++;
# check for required fields
- check_required();
+ check_required($crec);
push @{ $dstore->{recs} }, $crec;
print STDERR "$reccount\n"
{ process_subs($tag, $sub, $crec) }
# check map to ensure all declared tags and subs have a value
- my $mods = $sfmap->mods($field);
for my $mappedsub ( @{ $sfmap->subfields($tag) } ) {
+ my $fieldname = $sfmap->field($tag, $mappedsub);
+ my $mods = $sfmap->mods($fieldname);
next if $mods->{multi};
$crec->{tags}[-1]{uni}{$mappedsub} = ''
unless defined $crec->{tags}[-1]{uni}{$mappedsub};
}
- for my $mappedtag ( @{ $sfmap->tags }) {
- $crec->{tmap}{$mappedtag} = undef
- unless defined $crec->{tmap}{$mappedtag};
- }
}
}
# handle unmapped tag/subs
return unless ($sfmap->has($tag, $code));
- # fetch our datafield struct and fieldname
+ # fetch our datafield struct and field and mods
my $dataf = $crec->{tags}[-1];
my $field = $sfmap->field($tag, $code);
- $crec->{names}{$tag}{$code} = $field;
+ my $sep = $sfmap->sep($field);
+ $allmods{$field} = $sfmap->mods($field) unless $allmods{$field};
+ my $mods = $allmods{$field};
# test filters
for my $filter ( @{$sfmap->filters($field)} ) {
return if ($sub->text =~ /$filter/i);
}
+
# handle multi modifier
- if (my $mods = $sfmap->mods($field)) {
- if ($mods->{multi}) {
+ if ($mods->{multi}) {
+ $multis->{$tag}{$code} = 1;
+ if ($mods->{concatenate}) {
+ if (exists($dataf->{multi}{$code})) {
+ $dataf->{multi}{$code}[0] .= $sep . $sub->text;
+ } else {
+ push @{$dataf->{multi}{$code}}, $sub->text;
+ }
+ $multis->{$tag}{$code} = 1;
+ } else {
push @{$dataf->{multi}{$code}}, $sub->text;
- return;
}
+ return;
+ }
+
+
+ if ($mods->{concatenate}) {
+ if (exists($dataf->{uni}{$code})) {
+ $dataf->{uni}{$code} .= $sep . $sub->text;
+ } else {
+ $dataf->{uni}{$code} = $sub->text;
+ }
+ return;
}
# if this were a multi field, it would be handled already. make sure its a singleton
die "Multiple occurances of a non-multi field: $tag$code at rec ",
- ($reccount + 1),"\n" if (defined $dataf->{uni}{$code});
+ ($reccount + 1),"\n" if (defined $dataf->{uni}{$code} and !$mods->{ignoremulti});
# everything seems okay
$dataf->{uni}{$code} = $sub->text;
sub check_required {
+ my ($crec) = @_;
my $mods = $sfmap->mods;
- my $crec = $dstore->{crec};
for my $tag_id (keys %{$mods->{required}}) {
for my $code (@{$mods->{required}{$tag_id}}) {
=head2 name
-Returns mapped fieldname when passed a record number, tag, and code
+Returns mapped fieldname when passed a tag, and code
- my $name = $m->name(3,999,'a');
+ my $name = $m->name(999,'a');
=cut
-sub name { my ($self, $r, $t, $c) = @_; return $dstore->{recs}[$r]{names}{$t}{$c} };
+sub name { my ($self, $t, $c) = @_; return $sfmap->field($t, $c) }
+
+=head2 first_only
+
+Returns whether mapped fieldname is to be applied only to first
+item in a bib
+
+=cut
+
+sub first_only {
+ my ($self, $t, $c) = @_;
+ my $field = $sfmap->field($t, $c);
+ my $mods = $sfmap->mods($field);
+ return exists($mods->{first});
+}
+
+=head2 get_multis
+
+Returns hashref of C<{tag}{code}> for all mapped multi fields
+
+=cut
+
+sub get_multis {
+ my ($self) = @_;
+ return $multis;
+}
=head1 MODIFIERS