#!/usr/bin/perl -w # Copyright 2009-2012, Equinox Software, Inc. # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. use strict; use Getopt::Long; my $VERSION = '1.01'; =pod NAME enrich_zips - Utility to add county information to city/state/ZIP data (and optionally use zips.txt format) USAGE enrich_zips --db US.txt < citystatezip.tsv enrich_zips --makezips --db US.txt < citystatezip.tsv > zips.txt NOTES Geonames database can be downloaded from http://download.geonames.org/export/zip/US.zip Add the --believegn flag if you want to believe the Geonames database when it conflicts with patron data. =cut my ($db, $makezips, %zips, $warn, $believegn); my $result = GetOptions ("db=s" => \$db, "makezips" => \$makezips, "warn" => \$warn, "believegn" => \$believegn); die "Please specify the location of the Geonames database with --db US.txt\n" . "HINT: You can download it at http://download.geonames.org/export/zip/US.zip" . "\n\nProgram halted" unless defined($db); open DB, $db or die "Couldn't open Geonames database $db: $!\n"; # Slurp in the Geonames database while () { chomp; my @f = split(/\t/); @{$zips{$f[1]}} = @f[4,2,5]; # @{$zips{"33166"}} == ("FL", "Miami Springs", "Miami-Dade") } while (<>) { chomp; (my $city, my $state, my $zip) = split(/\t/) or next; my $county = ""; my ($dbcity, $dbstate); if (defined $zips{$zip}) { ($dbstate, $dbcity, $county) = @{$zips{$zip}}; } if ($warn) { if (!defined $zips{$zip}) { print STDERR "No county data found for ZIP code $zip ($city, $state)\n\n"; next; } if ($city ne $dbcity || $state ne $dbstate) { print STDERR "Patron data input says $zip is $city, $state\n". "Geonames database says $zip is $dbcity, $dbstate\n\n"; } } if ($makezips) { if (defined $zips{$zip} && ($city ne $dbcity || $state ne $dbstate)) { if ($believegn) { $city = $dbcity; $state = $dbstate; } } print "|" . join("|", ($state, $city, $zip, "1", "", $county)) . "||\n"; } else { print join("\t", ($city, $state, $zip, $county)) . "\n"; } }