3 my $dataset = $ARGV[0];
5 my $match_to = $ARGV[1];
6 my $match_these = $ARGV[2];
7 my $match_to_score = $ARGV[3];
8 my $match_these_score = $ARGV[4];
10 print "match_to: $match_to match_these: $match_these\n";
19 while (my $line = <FILE>) {
21 my @fields = split(/\t/,$line);
22 my $id = shift @fields;
23 my $fp = join '^', @fields;
24 if (! defined $pines{ $fp }) { $pines{ $fp } = []; }
25 push @{ $pines{ $fp } }, $id;
29 open FILE, $match_these;
30 while (my $line = <FILE>) {
32 my @fields = split(/\t/,$line);
33 my $id = shift @fields;
34 my $fp = join '^', @fields;
35 if (! defined $incoming{ $fp }) { $incoming{ $fp } = []; }
36 push @{ $incoming{ $fp } }, $id;
40 foreach my $file ( $match_to_score, $match_from_score ) {
42 while (my $line = <FILE>) {
44 my @fields = split(/\|/,$line);
45 my $id = shift @fields; $id =~ s/\D//g;
46 my $holdings = shift @fields; $holdings =~ s/\D//g;
47 my $subtitle = shift @fields; $subtitle =~ s/^\s+//; $subtitle =~ s/\s+$//;
48 $score{ $id } = [ $holdings, $subtitle ];
53 open RECORD_IDS, ">match.record_ids";
54 foreach my $fp ( keys %incoming ) {
56 if (defined $pines{ $fp }) { # match!
57 foreach my $id ( @{ $incoming{ $fp } } ) {
58 print RECORD_IDS "$id\n";
59 if ( ! defined $candidate_match{ $id } )
60 { $candidate_match{ $id } = []; }
61 push @{ $candidate_match{ $id } }, $fp;
67 foreach my $id ( keys %candidate_match ) {
69 if (defined $score{ $id })
70 { $subtitle = $score{ $id }[1]; }
72 my @fps = @{ $candidate_match{ $id } };
73 my @candidate_pines = ();
75 my $subtitle_matched = 0;
76 my $highest_holdings = 0;
79 foreach my $fp ( @fps ) {
80 foreach my $pines_id ( @{ $pines{ $fp } } ) {
82 if (defined $score{ $pines_id })
83 { $pines_subtitle = $score{ $pines_id }[1]; }
85 if (defined $score{ $pines_id })
86 { $pines_holdings = $score{ $pines_id }[0]; }
87 if ($pines_subtitle eq $subtitle) {
88 if (! $subtitle_matched) {
89 $subtitle_matched = 1;
90 $best_pines_id = $pines_id;
91 $highest_holdings = -1;
94 if ($subtitle_matched) { next; }
96 if ( $pines_holdings > $highest_holdings ) {
97 $highest_holdings = $pines_holdings;
98 $best_pines_id = $pines_id;
102 print RECORD_IDS "$best_pines_id\n";
103 if (! defined $match{ $best_pines_id } )
104 { $match{ $best_pines_id } = [ $best_pines_id ]; }
105 push @{ $match{ $best_pines_id } }, $id;
110 open GROUPINGS, ">match.groupings";
111 foreach my $k ( keys %match ) {
112 print GROUPINGS join("^",
116 join(",",@{ $match{ $k } }),
117 join(",",@{ $match{ $k } })