#!/usr/bin/perl -w # Copyright 2009-2012, Equinox Software, Inc. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. use strict; my $VERSION = '1.00'; =pod NAME elect_zips - Utility to elect a winning city/state for each ZIP code based on patron data USAGE psql -U evergreen -A -t -F $'\t' -c 'SELECT city, state, post_code FROM actor.usr_address' > raw-csz.tsv elect_zips < raw-csz.tsv > winning-zips.tsv NOTES Given input like "Miami Springs\tFL\t33166\n" derived from patron addresses, this utility will print a city and state for each zip that has the maximum number of occurrences. (It does not attempt to break ties. If there is a tie, the city and state that reaches the maximum first will end up winning.) You can also feed the output of elect_zips directly into I =cut my %zips; # Go through the input and tally the city-state combinations for each ZIP code while (<>) { chomp; (my $city, my $state, my $zip) = split(/\t/) or next; next unless $zip =~ m/([\d]{5})/; # If it doesn't have 5 digits in a row, it's not a ZIP $zip =~ s/^([\d]{5}).*$/$1/; # We only want the 5-digit ZIP $state = uc($state); $city =~ s/^\s+//; $city =~ s/\s+$//; $zips{$zip}{"$city\t$state"}++; } # Pick and print a winner for each ZIP code foreach(sort keys %zips) { my $zip = $_; my $max = 0; my $citystate = ""; foreach(keys %{$zips{$zip}}) { if ($zips{$zip}{$_} > $max) { $max = $zips{$zip}{$_}; $citystate = $_; } } print "$citystate\t$zip\n"; }