From 2bd7fcdce9da8d43c45d746859dcdbb13ba5d706 Mon Sep 17 00:00:00 2001 From: Ben Ostrowsky Date: Wed, 11 May 2011 13:25:01 +0000 Subject: [PATCH] New utility. Give it city-state-zip data in tab-separated format and it will give you the winning city/state for each zip in the same format. --- compile_zips | 33 +++++++++++++++++++++++++++++++++ 1 files changed, 33 insertions(+), 0 deletions(-) create mode 100755 compile_zips diff --git a/compile_zips b/compile_zips new file mode 100755 index 0000000..7cc9a58 --- /dev/null +++ b/compile_zips @@ -0,0 +1,33 @@ +#!/usr/bin/perl -w +use strict; + +# Given input like "Miami Springs\tFL\t33166\n" derived from patron addresses, +# this utility will print a city and state for each zip that has the maximum +# number of occurrences. (It does not attempt to break ties. If there is a tie, +# the city and state that reaches the maximum first will end up winning.) + +my %zips; + +# Go through the input and tally the city-state combinations for each ZIP code +while (<>) { + chomp; + (my $city, my $state, my $zip) = split(/\t/) or next; + next unless $zip =~ m/([\d]{5})/; # If it doesn't have 5 digits in a row, it's not a ZIP + $zip =~ s/^([\d]{5}).*$/$1/; # We only want the 5-digit ZIP + $state = uc($state); + $zips{$zip}{"$city\t$state"}++; +} + +# Pick and print a winner for each ZIP code +foreach(sort keys %zips) { + my $zip = $_; + my $max = 0; + my $citystate = ""; + foreach(keys %{$zips{$zip}}) { + if ($zips{$zip}{$_} > $max) { + $max = $zips{$zip}{$_}; + $citystate = $_; + } + } + print "$citystate\t$zip\n"; +} -- 1.7.2.5