X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=cleanup_merge_map.pl;h=5227d42f3bd7b5e58d5165afc0f2385517594a63;hp=b00c9e35b6d521228ebd5a73eb14b0f049de312a;hb=8efe188d49411e78e81a13deba43ea8d474e0716;hpb=359d17a6e36acf6e093bf6975170e845a40742ba diff --git a/cleanup_merge_map.pl b/cleanup_merge_map.pl old mode 100644 new mode 100755 index b00c9e3..5227d42 --- a/cleanup_merge_map.pl +++ b/cleanup_merge_map.pl @@ -1,5 +1,21 @@ #!/usr/bin/perl +# Copyright 2011-2014, Equinox Software, Inc. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License +# as published by the Free Software Foundation; either version 2 +# of the License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + use strict; use warnings; @@ -14,21 +30,48 @@ multiple merge maps. =cut -my %bad_subs = (); -my %map = (); +my %leads = (); + +# load merge map while (<>) { chomp; my ($lead, $sub) = split /\t/, $_, -1; - next if exists $bad_subs{$sub}; - if (exists $map{$sub}) { - $bad_subs{$sub}++; - delete $map{$sub}; - next; - } - $map{$sub} = $lead; + $leads{$sub}->{$lead}++; } -foreach my $sub (sort keys %map) { - print "$map{$sub}\t$sub\n"; + +# run this twice to ensure that cycles are +# excluded +cleanup_map() foreach (1..2); + +foreach my $sub (sort numerically keys %leads) { + if (1 == keys(%{ $leads{$sub} })) { + print join("\t", keys(%{ $leads{$sub} }), $sub), "\n"; + } } +sub cleanup_map { + foreach my $sub (keys %leads) { + my @leads_to_prune = (); + my @leads_to_add = (); + foreach my $lead (keys %{ $leads{$sub} }) { + if (exists($leads{$lead})) { + # lead bib itself is slated to be merged, + # so it's no longer going to be the direct + # lead for the current sub + push @leads_to_prune, $lead; + # the current sub gets potential + # leads from its previous lead + foreach my $second_lead (keys %{ $leads{$lead} }) { + push @leads_to_add, $second_lead unless exists($leads{$sub}->{$second_lead}); + } + } + } + delete($leads{$sub}->{$_}) foreach @leads_to_prune; + $leads{$sub}->{$_}++ foreach @leads_to_add; + } +} + +sub numerically { + return $a <=> $b; +}