X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=fingerprinter;h=f388174a447f8bc0554d384aa58b9389b71a4eb2;hp=a42efc1bc67b8313a5aec71419ca12f10d1f37f8;hb=af66e5408e31e25bc6fed62b99cc39be95d480ef;hpb=55223e093c954354c1f28dca9c14de5a9d0d88da diff --git a/fingerprinter b/fingerprinter index a42efc1..f388174 100755 --- a/fingerprinter +++ b/fingerprinter @@ -25,6 +25,7 @@ use MARC::Batch; use Unicode::Normalize; use MARC::File::XML ( BinaryEncoding => 'utf-8' ); use Equinox::Migration::SubfieldMapper; +use Equinox::Migration::Utils qw/normalize_oclc_number/; my $conf = {}; # configuration hashref my $count = 0; my $scount = 0; @@ -34,7 +35,7 @@ $| = 1; initialize($conf); open OF, '>', $conf->{output} or die "$0: cannot open output file $conf->{output}: $!\n"; -open XF, '>', $conf->{exception} or die "$0: cannot open exception file $conf->{output}: $!\n"; +open XF, '>', $conf->{exception} or die "$0: cannot open exception file $conf->{exception}: $!\n"; for my $file (@ARGV) { print XF "Processing $file\n"; @@ -148,13 +149,19 @@ sub populate_marc { # oclc $marc{oclc} = []; - push @{ $marc{oclc} }, $record->field('001')->as_string() - if ($record->field('001') and $record->field('003') and - $record->field('003')->as_string() =~ /OCo{0,1}LC/); + if ($record->field('001') && + $record->field('003') && + $record->field('003')->as_string() =~ /OCo{0,1}LC/ && + defined normalize_oclc_number($record->field('001')->as_string())) { + push @{ $marc{oclc} }, normalize_oclc_number($record->field('001')->as_string()); + } for ($record->field('035')) { my $oclc = $_->subfield('a'); - push @{ $marc{oclc} }, $oclc - if (defined $oclc and $oclc =~ /\(OCoLC\)/ and $oclc =~/([0-9]+)/); + if (defined $oclc && + ($oclc =~ /\(OCoLC\)/ || $oclc =~ /(ocm|ocl7|ocn|on)/) && + defined normalize_oclc_number($oclc)) { + push @{ $marc{oclc} }, normalize_oclc_number($oclc); + } } if ($record->field('999')) { @@ -373,11 +380,11 @@ sub dump_fingerprints { } } - if ($conf->{fingerprints}{edition} and $marc->{edition}) { + if ($conf->{fingerprints}{edition} and $marc->{edition} and $marc->{author}) { print OF join("\t", $marc->{score}, $marc->{id}, "edition", $marc->{item_form}, $marc->{date1}, $marc->{record_type}, $marc->{bib_lvl}, - $marc->{title}, $marc->{edition}), "\n"; + $marc->{title}, $marc->{author}, $marc->{edition}), "\n"; } if ($conf->{fingerprints}{issn} and $marc->{issn}) {