X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=fingerprinter;h=e228c6edeb6751ac791c2bbb9d367bc0222d7694;hp=0bee3041e2c274655a15476e4d98d7694178e5bf;hb=209c24bd96a2534843af84d1c0176e6dbe80b82d;hpb=55fbb56da41ed6f2e8eea310f6ce079df2dc7861 diff --git a/fingerprinter b/fingerprinter index 0bee304..e228c6e 100755 --- a/fingerprinter +++ b/fingerprinter @@ -74,7 +74,7 @@ sub populate_marc { # date1, date2 my $my_008 = $record->field('008'); - $marc{tag008} = $my_008->as_string();# if ($my_008); + $marc{tag008} = $my_008->as_string() if ($my_008); if (defined $marc{tag008}) { unless (length $marc{tag008} == 40) { $marc{tag008} = $marc{tag008} . ('|' x (40 - length($marc{tag008}))); @@ -226,6 +226,8 @@ sub score_marc { # static criteria scoring #---------------------------------- $marc->{misc_score} = 999; + $marc->{age_score} = 999999999999; + # -1 if 008 has been padded, -2 if it doesn't exist if ($marc->{tag008}) { $marc->{misc_score}-- if ($marc->{tag008} =~ /\|$/) } @@ -237,6 +239,19 @@ sub score_marc { $marc->{misc_score}-- if (defined $marc->{tag300a} and $marc->{tag300a} =~ /copy/i); + # subtract record id if we want older records to win + #$marc->{age_score} -= $marc->{id} unless ($conf->{newwins}); + # handle arbitrary adjustments + $marc->{age_score} = 1; + if ($conf->{'arbitrarily-lose-above'}) { + $marc->{age_score} = 0 + if ($marc->{id} >= $conf->{'arbitrarily-lose-above'}); + } + if ($conf->{'arbitrarily-lose-below'}) { + $marc->{age_score} = 0 + if ($marc->{id} <= $conf->{'arbitrarily-lose-below'}); + } + #---------------------------------- # dynamic calculated scoring #---------------------------------- @@ -273,7 +288,7 @@ sub score_marc { } $json .= 'misc:' . $marc->{misc_score} . '}'; - my $compact = join('', $marc->{misc_score}, @score); + my $compact = join('-', $marc->{age_score}, $marc->{misc_score}, @score); $marc->{score} = "$compact\t$json"; } @@ -401,6 +416,9 @@ sub initialize { 'tag|t=s', 'fingerprints=s', 'scores=s', + 'arbitrarily-lose-above=i', + 'arbitrarily-lose-below=i', + 'newwins', 'quiet|q', 'help|h', ); @@ -499,6 +517,12 @@ Options --scores=LIST Scores to calculate, comma separated Default: oclc,dlc,num_650,num_tags,enc_level + --newwins New record IDs score higher (default is old wins) + --arbitrarily-lose-above + --arbitrarily-lose-below + --arbitrarily-decrease-score-by + Modify fingerprint scoring of records whose EG id is above or below a + given value, inclusive (so 5 is <= 5 or >= 5) such that they lose. --marctype=TYPE Defaults to 'XML' HELP