X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=blobdiff_plain;f=compile_zips;fp=compile_zips;h=7cc9a587a136196a52132b3d4a729151ddaf7c74;hp=0000000000000000000000000000000000000000;hb=2bd7fcdce9da8d43c45d746859dcdbb13ba5d706;hpb=359d17a6e36acf6e093bf6975170e845a40742ba diff --git a/compile_zips b/compile_zips new file mode 100755 index 0000000..7cc9a58 --- /dev/null +++ b/compile_zips @@ -0,0 +1,33 @@ +#!/usr/bin/perl -w +use strict; + +# Given input like "Miami Springs\tFL\t33166\n" derived from patron addresses, +# this utility will print a city and state for each zip that has the maximum +# number of occurrences. (It does not attempt to break ties. If there is a tie, +# the city and state that reaches the maximum first will end up winning.) + +my %zips; + +# Go through the input and tally the city-state combinations for each ZIP code +while (<>) { + chomp; + (my $city, my $state, my $zip) = split(/\t/) or next; + next unless $zip =~ m/([\d]{5})/; # If it doesn't have 5 digits in a row, it's not a ZIP + $zip =~ s/^([\d]{5}).*$/$1/; # We only want the 5-digit ZIP + $state = uc($state); + $zips{$zip}{"$city\t$state"}++; +} + +# Pick and print a winner for each ZIP code +foreach(sort keys %zips) { + my $zip = $_; + my $max = 0; + my $citystate = ""; + foreach(keys %{$zips{$zip}}) { + if ($zips{$zip}{$_} > $max) { + $max = $zips{$zip}{$_}; + $citystate = $_; + } + } + print "$citystate\t$zip\n"; +}