X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=fingerprinter;h=e228c6edeb6751ac791c2bbb9d367bc0222d7694;hp=f8ba533dc73cf34afdd3f2556c7c5b6241355433;hb=209c24bd96a2534843af84d1c0176e6dbe80b82d;hpb=ba8ebf6c2e40ca0fdab9d3a5238b8de19fc43eb2 diff --git a/fingerprinter b/fingerprinter index f8ba533..e228c6e 100755 --- a/fingerprinter +++ b/fingerprinter @@ -226,6 +226,8 @@ sub score_marc { # static criteria scoring #---------------------------------- $marc->{misc_score} = 999; + $marc->{age_score} = 999999999999; + # -1 if 008 has been padded, -2 if it doesn't exist if ($marc->{tag008}) { $marc->{misc_score}-- if ($marc->{tag008} =~ /\|$/) } @@ -237,6 +239,19 @@ sub score_marc { $marc->{misc_score}-- if (defined $marc->{tag300a} and $marc->{tag300a} =~ /copy/i); + # subtract record id if we want older records to win + #$marc->{age_score} -= $marc->{id} unless ($conf->{newwins}); + # handle arbitrary adjustments + $marc->{age_score} = 1; + if ($conf->{'arbitrarily-lose-above'}) { + $marc->{age_score} = 0 + if ($marc->{id} >= $conf->{'arbitrarily-lose-above'}); + } + if ($conf->{'arbitrarily-lose-below'}) { + $marc->{age_score} = 0 + if ($marc->{id} <= $conf->{'arbitrarily-lose-below'}); + } + #---------------------------------- # dynamic calculated scoring #---------------------------------- @@ -273,7 +288,7 @@ sub score_marc { } $json .= 'misc:' . $marc->{misc_score} . '}'; - my $compact = join('', $marc->{misc_score}, @score); + my $compact = join('-', $marc->{age_score}, $marc->{misc_score}, @score); $marc->{score} = "$compact\t$json"; } @@ -401,6 +416,9 @@ sub initialize { 'tag|t=s', 'fingerprints=s', 'scores=s', + 'arbitrarily-lose-above=i', + 'arbitrarily-lose-below=i', + 'newwins', 'quiet|q', 'help|h', ); @@ -499,6 +517,12 @@ Options --scores=LIST Scores to calculate, comma separated Default: oclc,dlc,num_650,num_tags,enc_level + --newwins New record IDs score higher (default is old wins) + --arbitrarily-lose-above + --arbitrarily-lose-below + --arbitrarily-decrease-score-by + Modify fingerprint scoring of records whose EG id is above or below a + given value, inclusive (so 5 is <= 5 or >= 5) such that they lose. --marctype=TYPE Defaults to 'XML' HELP