X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=fingerprinter;h=e228c6edeb6751ac791c2bbb9d367bc0222d7694;hp=b7319c66af9c3dfdf0e0654cdb7db6200e2b5567;hb=209c24bd96a2534843af84d1c0176e6dbe80b82d;hpb=dd6bdc2102066cf5d257d4ad5f2bdbce5efc6aab diff --git a/fingerprinter b/fingerprinter index b7319c6..e228c6e 100755 --- a/fingerprinter +++ b/fingerprinter @@ -225,9 +225,9 @@ sub score_marc { #---------------------------------- # static criteria scoring #---------------------------------- - $marc->{misc_score} = 999999999999; - # subtract record id if we want older records to win - $marc->{misc_score} -= $marc->{id} unless ($conf->{newwins}); + $marc->{misc_score} = 999; + $marc->{age_score} = 999999999999; + # -1 if 008 has been padded, -2 if it doesn't exist if ($marc->{tag008}) { $marc->{misc_score}-- if ($marc->{tag008} =~ /\|$/) } @@ -239,6 +239,19 @@ sub score_marc { $marc->{misc_score}-- if (defined $marc->{tag300a} and $marc->{tag300a} =~ /copy/i); + # subtract record id if we want older records to win + #$marc->{age_score} -= $marc->{id} unless ($conf->{newwins}); + # handle arbitrary adjustments + $marc->{age_score} = 1; + if ($conf->{'arbitrarily-lose-above'}) { + $marc->{age_score} = 0 + if ($marc->{id} >= $conf->{'arbitrarily-lose-above'}); + } + if ($conf->{'arbitrarily-lose-below'}) { + $marc->{age_score} = 0 + if ($marc->{id} <= $conf->{'arbitrarily-lose-below'}); + } + #---------------------------------- # dynamic calculated scoring #---------------------------------- @@ -275,7 +288,7 @@ sub score_marc { } $json .= 'misc:' . $marc->{misc_score} . '}'; - my $compact = join('', $marc->{misc_score}, @score); + my $compact = join('-', $marc->{age_score}, $marc->{misc_score}, @score); $marc->{score} = "$compact\t$json"; } @@ -403,6 +416,8 @@ sub initialize { 'tag|t=s', 'fingerprints=s', 'scores=s', + 'arbitrarily-lose-above=i', + 'arbitrarily-lose-below=i', 'newwins', 'quiet|q', 'help|h', @@ -503,6 +518,11 @@ Options --scores=LIST Scores to calculate, comma separated Default: oclc,dlc,num_650,num_tags,enc_level --newwins New record IDs score higher (default is old wins) + --arbitrarily-lose-above + --arbitrarily-lose-below + --arbitrarily-decrease-score-by + Modify fingerprint scoring of records whose EG id is above or below a + given value, inclusive (so 5 is <= 5 or >= 5) such that they lose. --marctype=TYPE Defaults to 'XML' HELP