}, $class;
# initialize map and taglist
+ die "Argument 'mapfile' must be specified\n" unless (defined $args{mapfile});
my @mods = keys %{$self->{mods}};
$self->{map} = Equinox::Migration::SubfieldMapper->new( file => $args{mapfile},
mods => \@mods );
- $self->{tags} = $self->{map}->tags;
+ $self->{data}{tags} = $self->{map}->tags;
# initialize twig
die "Argument 'marcfile' must be specified\n" unless (defined $args{marcfile});
=head2 parse_record
-Extracts data from the next record, per the mapping file. Returns 1 on
-success, 0 otherwise.
+Extracts data from the next record, per the mapping file. Returns a
+normalized datastructure (see L</format_record> for details) on
+success; returns 0 otherwise.
- while ($m->parse_record) {
+ while (my $rec = $m->parse_record) {
# handle extracted record data
}
# get the next record and wipe current parsed record
return 0 unless defined $self->{data}{recs}[ $self->{data}{rptr} ];
my $record = $self->{data}{recs}[ $self->{data}{rptr} ];
- $self->{data}{crec} = {};
+ $self->{data}{crec} = { egid => undef, bib => undef, tags => undef };
my @fields = $record->children;
for my $f (@fields)
# cleanup memory and increment pointer
$record->purge;
$self->{data}{rptr}++;
+
+ return $self->{data}{crec};
}
=head2 process_field
my ($self, $field) = @_;
my $map = $self->{map};
my $tag = $field->{'att'}->{'tag'};
- my $parsed = $self->{data}{crec};
-
- if ($tag == 903) {
- my $sub = $field->first_child('subfield');
- $parsed->{egid} = $sub->text;;
- } elsif ($map->has($tag)) {
- push @{$parsed->{tags}}, { tag => $tag };
- my @subs = $field->children('subfield');
- for my $sub (@subs)
- { $self->process_subs($tag, $sub) }
+ my $crec = $self->{data}{crec};
+
+ # datafields
+ if (defined $tag) {
+ if ($tag == 903) {
+ my $sub = $field->first_child('subfield');
+ $crec->{egid} = $sub->text;;
+ } elsif ($map->has($tag)) {
+ push @{$crec->{tags}}, { tag => $tag, uni => undef, multi => undef };
+ my @subs = $field->children('subfield');
+ for my $sub (@subs)
+ { $self->process_subs($tag, $sub) }
+ # check map to ensure all declared subs are in
+ }
}
}
my $s = $self->{data}{stag};
return unless (defined $s->{$tag});
+ # set a value, total-seen count and records-seen-in count
$u->{$tag}{$code}{value} = $sub->text unless defined $u->{$tag}{$code};
$u->{$tag}{$code}{count}++;
+ $u->{$tag}{$code}{rcnt}++ unless ($u->{$tag}{$code}{last} == $self->{data}{rptr});
+ $u->{$tag}{$code}{last} = $self->{data}{rptr};
return;
}
- my $data = $self->{data}{crec}{tags}[-1];
+ my $dataf = $self->{data}{crec}{tags}[-1];
my $field = $map->field($tag, $code);
- if ($map->mod($field) eq 'multi') {
- my $name = $tag . $code;
- push @{$data->{multi}{$name}}, $sub->text;
- } else {
- $data->{uni}{$code} = $sub->text;
+
+ # handle modifiers
+ if (defined $map->mods($field)) {
+ if ($map->mods($field) eq 'multi') {
+ my $name = $tag . $code;
+ push @{$dataf->{multi}{$name}}, $sub->text;
+ }
}
+
+ $dataf->{uni}{$code} = $sub->text;
}
=head1 PARSED RECORDS
catenated (e.g. 901c). The values are the contents of that subfield of
that tag.
+If there are no tags defined as bib-level, C<bib> will be C<undef>.
+
=head3 C<tags>
This arrayref holds anonymous hashrefs, one for each instance of each
due to there being multiple instances of that tag in a record). Keys
are subfield codes and values are subfield content.
+If no tags are defined as C<multi>, it will be C<undef>.
+
=head1 UNMAPPED TAGS
{ tag_id => {