X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=fingerprinter;h=dcf2e29e63f4bdb8611a0419926fa8b6f39d40f8;hp=6e04e0ba89cf4eb08983896c79f2ff01755dd0ee;hb=26d762670d7610ed9ccb1c36a8dbab3169e4e2a3;hpb=4ee76d3146c0b0f2b9c42b04dac2487c777e9165 diff --git a/fingerprinter b/fingerprinter index 6e04e0b..dcf2e29 100755 --- a/fingerprinter +++ b/fingerprinter @@ -27,7 +27,15 @@ for my $file (@ARGV) { $batch->strict_off(); $batch->warnings_off(); - while ( $record = $batch->next ) { + my $record; + while ( 1 ) { + eval { $record = $batch->next; }; + if ($@) { + import MARC::File::XML; + print "bad record\n"; + next; + } + last unless $record; $count++; progress_ticker(); my $marc = undef; unless ( defined $record ) @@ -88,11 +96,14 @@ sub populate_marc { } unless ($marc{date1} and $marc{date1} =~ /\d{4}/) { my $my_260 = $record->field('260'); - my $date1 = $my_260->subfield('c') if $my_260; - if (defined $date1 and $date1 =~ /\d{4}/) { - $marc{date1} = $date1; - $marc{fudgedate} = 1; - print XF ">> using 260c as date1 at rec $count\n"; + if ($my_260 and $my_260->subfield('c')) { + my $date1 = $my_260->subfield('c'); + $date1 =~ s/\D//g; + if (defined $date1 and $date1 =~ /\d{4}/) { + $marc{date1} = $date1; + $marc{fudgedate} = 1; + print XF ">> using 260c as date1 at rec $count\n"; + } } } @@ -130,6 +141,11 @@ sub populate_marc { if (defined $oclc and $oclc =~ /\(OCoLC\)/ and $oclc =~/([0-9]+)/); } + if ($record->field('999')) { + my $koha_bib_id = $record->field('999')->subfield('c'); + $marc{koha_bib_id} = $koha_bib_id if defined $koha_bib_id and $koha_bib_id =~ /^\d+$/; + } + # "Accompanying material" and check for "copy" (300) if ($record->field('300')) { $marc{accomp} = $record->field('300')->subfield('e'); @@ -317,6 +333,14 @@ sub dump_fingerprints { } } + if ($conf->{fingerprints}{koha_bib_id} and exists $marc->{koha_bib_id}) { + print OF join("\t", $marc->{score}, $marc->{id}, "z_koha_bib_id", + $marc->{item_form}, $marc->{date1}, + $marc->{record_type}, + $marc->{bib_lvl}, $marc->{title}, + $marc->{koha_bib_id}), "\n"; + } + if ($conf->{fingerprints}{isbn}) { if ((scalar @{ $marc->{isbns} } > 0) and $marc->{pages}) { foreach my $isbn ( @{ $marc->{isbns}} ) { @@ -477,6 +501,7 @@ sub initialize { my %valid_fps = ( oclc => 1, isbn => 1, issn => 1, lccn => 1, edition => 1, accomp => 1, authpub => 1, baseline => 1, crap => 1, + koha_bib_id => 1, ); for (split /,/, $c->{fingerprints}) { die "Invalid fingerprint '$_'\n" unless $valid_fps{$_}; @@ -565,7 +590,7 @@ Options --fingerprints=LIST Fingerprints to generate, comma separated Default: oclc,isbn,edition,issn,lccn,accomp,authpub - Others: baseline + Others: baseline,koha_bib_id --excludelist=FILE Name of fingerprints exclusions file --scores=LIST Scores to calculate, comma separated