# get the next record and wipe current parsed record
return 0 unless defined $self->{data}{recs}[ $self->{data}{rptr} ];
my $record = $self->{data}{recs}[ $self->{data}{rptr} ];
- $self->{data}{crec} = { bib => undef, multi => undef };
+ $self->{data}{crec} = { egid => undef, bib => undef, tags => undef };
my @fields = $record->children;
for my $f (@fields)
$record->purge;
$self->{data}{rptr}++;
- return $self->format_record;
+ return $self->{data}{crec};
}
=head2 process_field
my ($self, $field) = @_;
my $map = $self->{map};
my $tag = $field->{'att'}->{'tag'};
- my $parsed = $self->{data}{crec};
+ my $crec = $self->{data}{crec};
# datafields
if (defined $tag) {
if ($tag == 903) {
my $sub = $field->first_child('subfield');
- $parsed->{egid} = $sub->text;;
+ $crec->{egid} = $sub->text;;
} elsif ($map->has($tag)) {
- push @{$parsed->{tags}}, { tag => $tag };
+ push @{$crec->{tags}}, { tag => $tag, uni => undef, multi => undef };
my @subs = $field->children('subfield');
for my $sub (@subs)
{ $self->process_subs($tag, $sub) }
+ # check map to ensure all declared subs are in
}
}
}
my $s = $self->{data}{stag};
return unless (defined $s->{$tag});
+ # set a value, total-seen count and records-seen-in count
$u->{$tag}{$code}{value} = $sub->text unless defined $u->{$tag}{$code};
$u->{$tag}{$code}{count}++;
+ $u->{$tag}{$code}{rcnt}++ unless ($u->{$tag}{$code}{last} == $self->{data}{rptr});
+ $u->{$tag}{$code}{last} = $self->{data}{rptr};
return;
}
- my $data = $self->{data}{crec}{tags}[-1];
+ my $dataf = $self->{data}{crec}{tags}[-1];
my $field = $map->field($tag, $code);
# handle modifiers
if (defined $map->mods($field)) {
if ($map->mods($field) eq 'multi') {
my $name = $tag . $code;
- push @{$data->{multi}{$name}}, $sub->text;
+ push @{$dataf->{multi}{$name}}, $sub->text;
}
}
- $data->{uni}{$code} = $sub->text;
+ $dataf->{uni}{$code} = $sub->text;
}
=head1 PARSED RECORDS
mapfile => 't/corpus/mdmpmap-00.txt',
);
is(ref $mp, "Equinox::Migration::MapDrivenMARCXMLProc", "self is self");
-
# parsing
+#
+# with map-00, only the 999$a should be captured
+# 903$a will *always* be captured, of course
my $rec = $mp->parse_record;
is (defined $rec, 1);
+is ($rec->{egid}, 9000000, '903 captured');
+is ($rec->{tags}[0]{tag}, 999, 'first (only) tag should be 999');
+is ($rec->{tags}[0]{uni}{a}, "MYS DEM", 'single-ocurrance subfield "a" should be "MYS DEM"');
+is ($rec->{tags}[0]{uni}{b}, undef, 'only one uni subfield defined');
+is ($rec->{tags}[0]{multi}, undef, 'no multi subfields were defined');
+is ($rec->{tags}[1], undef, 'Only one tag in map');
+is ($rec->{bib}, undef, 'No bib-level fields in map');
+# let's go ahead and look at the rest of the file
+$rec = $mp->parse_record;
+is ($rec->{egid}, 9000001, '903 #2');
+is ($rec->{tags}[0]{tag}, 999, 'tag id 2');
+is ($rec->{tags}[0]{uni}{a}, "MYS 2", 'subfield value 2');
+$rec = $mp->parse_record;
+is ($rec->{egid}, 9000002, '903 #3');
+is ($rec->{tags}[0]{tag}, 999, 'tag id 3');
+is ($rec->{tags}[0]{uni}{a}, "FOO BAR", 'subfield value 3');
+$rec = $mp->parse_record;
+is ($rec->{egid}, 9000003, '903 #4');
+is ($rec->{tags}[0]{tag}, 999, 'tag id 4');
+is ($rec->{tags}[0]{uni}{a}, "FIC DEV", 'subfield value 4');
+$rec = $mp->parse_record;
+is ($rec, 0, 'no more records');
<collection>
<record> <leader>00708nam a2200229u 4500</leader> <controlfield tag="001">ocm03650663</controlfield> <controlfield tag="003">OCoLC</controlfield> <controlfield tag="005">19840321193705.0</controlfield> <controlfield tag="008">971231s1978 nyu 00011 eng </controlfield> <datafield tag="010" ind1=" " ind2=" "> <subfield code="a"> 77091474</subfield> </datafield> <datafield tag="020" ind1=" " ind2=" "> <subfield code="a">0151152780</subfield> </datafield> <datafield tag="040" ind1=" " ind2=" "> <subfield code="a">DLC</subfield> <subfield code="c">DLC</subfield> <subfield code="d">IIA</subfield> </datafield> <datafield tag="050" ind1="0" ind2=" "> <subfield code="a">PZ4.D3792</subfield> <subfield code="b">By</subfield> <subfield code="a">PS3554.E472</subfield> </datafield> <datafield tag="082" ind1=" " ind2=" "> <subfield code="a">813/.5/4</subfield> </datafield> <datafield tag="092" ind1=" " ind2=" "> <subfield code="a">Fic</subfield> <subfield code="b">DeM</subfield> </datafield> <datafield tag="100" ind1="1" ind2="0"> <subfield code="a">DeMille, Nelson.</subfield> </datafield> <datafield tag="245" ind1="1" ind2="0"> <subfield code="a">By the rivers of Babylon :</subfield> <subfield code="b">a novel /</subfield> <subfield code="c">by Nelson De Mille.</subfield> </datafield> <datafield tag="250" ind1=" " ind2=" "> <subfield code="a">1st ed.</subfield> </datafield> <datafield tag="260" ind1="0" ind2=" "> <subfield code="a">New York :</subfield> <subfield code="b">Harcourt Brace Jovanovich,</subfield> <subfield code="c">c1978.</subfield> </datafield> <datafield tag="300" ind1=" " ind2=" "> <subfield code="a">391 p. ;</subfield> <subfield code="c">24 cm.</subfield> </datafield> <datafield tag="998" ind1=" " ind2=" "> <subfield code="a">a3</subfield> </datafield> <datafield tag="999" ind1=" " ind2=" "> <subfield code="a">MYS DEM</subfield> <subfield code="w">DEWEY</subfield> <subfield code="c">1</subfield> <subfield code="i">010003</subfield> <subfield code="d">7/9/2008</subfield> <subfield code="e">6/30/2008</subfield> <subfield code="l">MYSTERY</subfield> <subfield code="m">CARNEGIE</subfield> <subfield code="n">8</subfield> <subfield code="p">$25.00</subfield> <subfield code="r">Y</subfield> <subfield code="s">Y</subfield> <subfield code="t">BOOK</subfield> <subfield code="u">11/22/2002</subfield> <subfield code="x">MYSTERY</subfield> <subfield code="z">ADULT</subfield> </datafield> <datafield tag="903" ind1=" " ind2=" "> <subfield code="a">9000000</subfield></datafield></record>
-<record> <leader>00770nam a2200265u 4500</leader> <controlfield tag="001">ocm07175079</controlfield> <controlfield tag="003">OCoLC</controlfield> <controlfield tag="005">19810601170611.0</controlfield> <controlfield tag="008">971231s1981 nyu 00011 eng </controlfield> <datafield tag="010" ind1=" " ind2=" "> <subfield code="a"> 80029126</subfield> </datafield> <datafield tag="020" ind1=" " ind2=" "> <subfield code="a">044001140X</subfield> </datafield> <datafield tag="040" ind1=" " ind2=" "> <subfield code="a">DLC</subfield> <subfield code="c">DLC</subfield> <subfield code="d">IIA</subfield> </datafield> <datafield tag="050" ind1="0" ind2=" "> <subfield code="a">PS3554.E472</subfield> <subfield code="b">C3</subfield> </datafield> <datafield tag="082" ind1="0" ind2=" "> <subfield code="a">813/.54</subfield> <subfield code="2">19</subfield> </datafield> <datafield tag="092" ind1=" " ind2=" "> <subfield code="a">Fic</subfield> <subfield code="b">DeM</subfield> </datafield> <datafield tag="100" ind1="1" ind2="0"> <subfield code="a">DeMille, Nelson.</subfield> </datafield> <datafield tag="245" ind1="1" ind2="0"> <subfield code="a">Cathedral :</subfield> <subfield code="b">a novel /</subfield> <subfield code="c">by Nelson DeMille.</subfield> </datafield> <datafield tag="260" ind1="0" ind2=" "> <subfield code="a">New York :</subfield> <subfield code="b">Delacorte Press,</subfield> <subfield code="c">c1981.</subfield> </datafield> <datafield tag="263" ind1=" " ind2=" "> <subfield code="a">8105</subfield> </datafield> <datafield tag="300" ind1=" " ind2=" "> <subfield code="a">483 p.</subfield> </datafield> <datafield tag="500" ind1=" " ind2=" "> <subfield code="a">"A Bernard Geis Associates book."</subfield> </datafield> <datafield tag="852" ind1=" " ind2=" "> <subfield code="c">MYS</subfield> <subfield code="p">10004</subfield> <subfield code="h">MYS DeM</subfield> </datafield> <datafield tag="961" ind1="w" ind2="l"> <subfield code="t">80</subfield> </datafield> <datafield tag="998" ind1=" " ind2=" "> <subfield code="a">a4</subfield> </datafield> <datafield tag="999" ind1=" " ind2=" "> <subfield code="a">MYS DEM</subfield> <subfield code="w">DEWEY</subfield> <subfield code="c">1</subfield> <subfield code="i">010004</subfield> <subfield code="d">11/17/2007</subfield> <subfield code="e">10/27/2007</subfield> <subfield code="l">MYSTERY</subfield> <subfield code="m">CARNEGIE</subfield> <subfield code="n">10</subfield> <subfield code="p">$25.00</subfield> <subfield code="r">Y</subfield> <subfield code="s">Y</subfield> <subfield code="t">BOOK</subfield> <subfield code="u">11/22/2002</subfield> <subfield code="x">MYSTERY</subfield> <subfield code="z">ADULT</subfield> </datafield> <datafield tag="903" ind1=" " ind2=" "> <subfield code="a">9000001</subfield></datafield></record>
-<record> <leader>00725nam a2200241u 4500</leader> <controlfield tag="001">ocm17300504</controlfield> <controlfield tag="003">OCoLC</controlfield> <controlfield tag="005">19910515200843.0</controlfield> <controlfield tag="008">971231s1988 nyu 00011 eng </controlfield> <datafield tag="010" ind1=" " ind2=" "> <subfield code="a"> 87034637</subfield> </datafield> <datafield tag="020" ind1=" " ind2=" "> <subfield code="a">0446513059 :</subfield> <subfield code="c">$17.95</subfield> </datafield> <datafield tag="040" ind1=" " ind2=" "> <subfield code="a">DLC</subfield> <subfield code="c">DLC</subfield> <subfield code="d">IIA</subfield> </datafield> <datafield tag="050" ind1="0" ind2=" "> <subfield code="a">PS3554.E472</subfield> <subfield code="b">C48 1988</subfield> </datafield> <datafield tag="082" ind1="0" ind2=" "> <subfield code="a">813/.54</subfield> <subfield code="2">19</subfield> </datafield> <datafield tag="092" ind1=" " ind2=" "> <subfield code="a">Fic</subfield> <subfield code="b">DeM</subfield> </datafield> <datafield tag="100" ind1="1" ind2="0"> <subfield code="a">DeMille, Nelson.</subfield> </datafield> <datafield tag="245" ind1="1" ind2="4"> <subfield code="a">The charm school /</subfield> <subfield code="c">Nelson DeMille.</subfield> </datafield> <datafield tag="260" ind1="0" ind2=" "> <subfield code="a">New York, NY :</subfield> <subfield code="b">Warner Books,</subfield> <subfield code="c">1988.</subfield> </datafield> <datafield tag="300" ind1=" " ind2=" "> <subfield code="a">533 p. ;</subfield> <subfield code="c">24 cm.</subfield> </datafield> <datafield tag="852" ind1=" " ind2=" "> <subfield code="c">MYS</subfield> <subfield code="p">10012</subfield> <subfield code="9">17.95</subfield> <subfield code="h">MYS DeM</subfield> </datafield> <datafield tag="961" ind1="w" ind2="l"> <subfield code="t">80</subfield> </datafield> <datafield tag="998" ind1=" " ind2=" "> <subfield code="a">a8</subfield> </datafield> <datafield tag="999" ind1=" " ind2=" "> <subfield code="a">MYS DEM</subfield> <subfield code="w">DEWEY</subfield> <subfield code="c">1</subfield> <subfield code="i">010012</subfield> <subfield code="d">11/20/2007</subfield> <subfield code="e">11/15/2007</subfield> <subfield code="l">MYSTERY</subfield> <subfield code="m">CARNEGIE</subfield> <subfield code="n">10</subfield> <subfield code="p">$17.95</subfield> <subfield code="r">Y</subfield> <subfield code="s">Y</subfield> <subfield code="t">BOOK</subfield> <subfield code="u">11/22/2002</subfield> <subfield code="x">MYSTERY</subfield> <subfield code="z">ADULT</subfield> </datafield> <datafield tag="903" ind1=" " ind2=" "> <subfield code="a">9000002</subfield></datafield></record>
+<record> <leader>00770nam a2200265u 4500</leader> <controlfield tag="001">ocm07175079</controlfield> <controlfield tag="003">OCoLC</controlfield> <controlfield tag="005">19810601170611.0</controlfield> <controlfield tag="008">971231s1981 nyu 00011 eng </controlfield> <datafield tag="010" ind1=" " ind2=" "> <subfield code="a"> 80029126</subfield> </datafield> <datafield tag="020" ind1=" " ind2=" "> <subfield code="a">044001140X</subfield> </datafield> <datafield tag="040" ind1=" " ind2=" "> <subfield code="a">DLC</subfield> <subfield code="c">DLC</subfield> <subfield code="d">IIA</subfield> </datafield> <datafield tag="050" ind1="0" ind2=" "> <subfield code="a">PS3554.E472</subfield> <subfield code="b">C3</subfield> </datafield> <datafield tag="082" ind1="0" ind2=" "> <subfield code="a">813/.54</subfield> <subfield code="2">19</subfield> </datafield> <datafield tag="092" ind1=" " ind2=" "> <subfield code="a">Fic</subfield> <subfield code="b">DeM</subfield> </datafield> <datafield tag="100" ind1="1" ind2="0"> <subfield code="a">DeMille, Nelson.</subfield> </datafield> <datafield tag="245" ind1="1" ind2="0"> <subfield code="a">Cathedral :</subfield> <subfield code="b">a novel /</subfield> <subfield code="c">by Nelson DeMille.</subfield> </datafield> <datafield tag="260" ind1="0" ind2=" "> <subfield code="a">New York :</subfield> <subfield code="b">Delacorte Press,</subfield> <subfield code="c">c1981.</subfield> </datafield> <datafield tag="263" ind1=" " ind2=" "> <subfield code="a">8105</subfield> </datafield> <datafield tag="300" ind1=" " ind2=" "> <subfield code="a">483 p.</subfield> </datafield> <datafield tag="500" ind1=" " ind2=" "> <subfield code="a">"A Bernard Geis Associates book."</subfield> </datafield> <datafield tag="852" ind1=" " ind2=" "> <subfield code="c">MYS</subfield> <subfield code="p">10004</subfield> <subfield code="h">MYS DeM</subfield> </datafield> <datafield tag="961" ind1="w" ind2="l"> <subfield code="t">80</subfield> </datafield> <datafield tag="998" ind1=" " ind2=" "> <subfield code="a">a4</subfield> </datafield> <datafield tag="999" ind1=" " ind2=" "> <subfield code="a">MYS 2</subfield> <subfield code="w">DEWEY</subfield> <subfield code="c">1</subfield> <subfield code="i">010004</subfield> <subfield code="d">11/17/2007</subfield> <subfield code="e">10/27/2007</subfield> <subfield code="l">MYSTERY</subfield> <subfield code="m">CARNEGIE</subfield> <subfield code="n">10</subfield> <subfield code="p">$25.00</subfield> <subfield code="r">Y</subfield> <subfield code="s">Y</subfield> <subfield code="t">BOOK</subfield> <subfield code="u">11/22/2002</subfield> <subfield code="x">MYSTERY</subfield> <subfield code="z">ADULT</subfield> </datafield> <datafield tag="903" ind1=" " ind2=" "> <subfield code="a">9000001</subfield></datafield></record>
+<record> <leader>00725nam a2200241u 4500</leader> <controlfield tag="001">ocm17300504</controlfield> <controlfield tag="003">OCoLC</controlfield> <controlfield tag="005">19910515200843.0</controlfield> <controlfield tag="008">971231s1988 nyu 00011 eng </controlfield> <datafield tag="010" ind1=" " ind2=" "> <subfield code="a"> 87034637</subfield> </datafield> <datafield tag="020" ind1=" " ind2=" "> <subfield code="a">0446513059 :</subfield> <subfield code="c">$17.95</subfield> </datafield> <datafield tag="040" ind1=" " ind2=" "> <subfield code="a">DLC</subfield> <subfield code="c">DLC</subfield> <subfield code="d">IIA</subfield> </datafield> <datafield tag="050" ind1="0" ind2=" "> <subfield code="a">PS3554.E472</subfield> <subfield code="b">C48 1988</subfield> </datafield> <datafield tag="082" ind1="0" ind2=" "> <subfield code="a">813/.54</subfield> <subfield code="2">19</subfield> </datafield> <datafield tag="092" ind1=" " ind2=" "> <subfield code="a">Fic</subfield> <subfield code="b">DeM</subfield> </datafield> <datafield tag="100" ind1="1" ind2="0"> <subfield code="a">DeMille, Nelson.</subfield> </datafield> <datafield tag="245" ind1="1" ind2="4"> <subfield code="a">The charm school /</subfield> <subfield code="c">Nelson DeMille.</subfield> </datafield> <datafield tag="260" ind1="0" ind2=" "> <subfield code="a">New York, NY :</subfield> <subfield code="b">Warner Books,</subfield> <subfield code="c">1988.</subfield> </datafield> <datafield tag="300" ind1=" " ind2=" "> <subfield code="a">533 p. ;</subfield> <subfield code="c">24 cm.</subfield> </datafield> <datafield tag="852" ind1=" " ind2=" "> <subfield code="c">MYS</subfield> <subfield code="p">10012</subfield> <subfield code="9">17.95</subfield> <subfield code="h">MYS DeM</subfield> </datafield> <datafield tag="961" ind1="w" ind2="l"> <subfield code="t">80</subfield> </datafield> <datafield tag="998" ind1=" " ind2=" "> <subfield code="a">a8</subfield> </datafield> <datafield tag="999" ind1=" " ind2=" "> <subfield code="a">FOO BAR</subfield> <subfield code="w">DEWEY</subfield> <subfield code="c">1</subfield> <subfield code="i">010012</subfield> <subfield code="d">11/20/2007</subfield> <subfield code="e">11/15/2007</subfield> <subfield code="l">MYSTERY</subfield> <subfield code="m">CARNEGIE</subfield> <subfield code="n">10</subfield> <subfield code="p">$17.95</subfield> <subfield code="r">Y</subfield> <subfield code="s">Y</subfield> <subfield code="t">BOOK</subfield> <subfield code="u">11/22/2002</subfield> <subfield code="x">MYSTERY</subfield> <subfield code="z">ADULT</subfield> </datafield> <datafield tag="903" ind1=" " ind2=" "> <subfield code="a">9000002</subfield></datafield></record>
<record> <leader>00792nam a2200277u 4500</leader> <controlfield tag="001">ocm21442030</controlfield> <controlfield tag="003">OCoLC</controlfield> <controlfield tag="005">19900806213946.0</controlfield> <controlfield tag="008">971231s1990 nyu 00011 eng </controlfield> <datafield tag="010" ind1=" " ind2=" "> <subfield code="a"> 90036314</subfield> </datafield> <datafield tag="020" ind1=" " ind2=" "> <subfield code="a">0671689754 :</subfield> <subfield code="c">$18.95</subfield> </datafield> <datafield tag="040" ind1=" " ind2=" "> <subfield code="a">DLC</subfield> <subfield code="c">DLC</subfield> <subfield code="d">IIA</subfield> </datafield> <datafield tag="050" ind1="0" ind2="0"> <subfield code="a">PS3554.E9273</subfield> <subfield code="b">M6 1990</subfield> </datafield> <datafield tag="082" ind1="0" ind2="0"> <subfield code="a">813/.54</subfield> <subfield code="2">20</subfield> </datafield> <datafield tag="092" ind1=" " ind2=" "> <subfield code="a">Fic</subfield> <subfield code="b">Dev</subfield> </datafield> <datafield tag="100" ind1="1" ind2="0"> <subfield code="a">Deveraux, Jude.</subfield> </datafield> <datafield tag="245" ind1="1" ind2="0"> <subfield code="a">Mountain laurel /</subfield> <subfield code="c">Jude Deveraux.</subfield> </datafield> <datafield tag="260" ind1="0" ind2=" "> <subfield code="a">New York :</subfield> <subfield code="b">Pocket Books,</subfield> <subfield code="c">c1990.</subfield> </datafield> <datafield tag="263" ind1=" " ind2=" "> <subfield code="a">9007</subfield> </datafield> <datafield tag="300" ind1=" " ind2=" "> <subfield code="a">312 p.</subfield> </datafield> <datafield tag="852" ind1=" " ind2=" "> <subfield code="c">FIC</subfield> <subfield code="h">Fic Dev</subfield> <subfield code="p">10015</subfield> <subfield code="9">18.95</subfield> </datafield> <datafield tag="852" ind1=" " ind2=" "> <subfield code="c">STORAGE</subfield> <subfield code="h">Fic Dev</subfield> <subfield code="p">14355</subfield> <subfield code="9">18.95</subfield> </datafield> <datafield tag="961" ind1="w" ind2="l"> <subfield code="t">2</subfield> </datafield> <datafield tag="961" ind1="w" ind2="l"> <subfield code="t">71</subfield> </datafield> <datafield tag="998" ind1=" " ind2=" "> <subfield code="a">a9</subfield> </datafield> <datafield tag="999" ind1=" " ind2=" "> <subfield code="a">FIC DEV</subfield> <subfield code="w">DEWEY</subfield> <subfield code="c">1</subfield> <subfield code="i">010015</subfield> <subfield code="d">3/9/2009</subfield> <subfield code="e">2/16/2009</subfield> <subfield code="l">FICTION</subfield> <subfield code="m">CARNEGIE</subfield> <subfield code="n">14</subfield> <subfield code="p">$18.95</subfield> <subfield code="r">Y</subfield> <subfield code="s">Y</subfield> <subfield code="t">BOOK</subfield> <subfield code="u">11/22/2002</subfield> <subfield code="x">FICTION</subfield> <subfield code="z">ADULT</subfield> </datafield> <datafield tag="903" ind1=" " ind2=" "> <subfield code="a">9000003</subfield></datafield></record>
</collection>