1 CREATE OR REPLACE FUNCTION msgration_tools.address_parse_out_citystatezip (TEXT) RETURNS TEXT[] AS $$
3 city_state_zip TEXT := $1;
8 zip := CASE WHEN city_state_zip ~ E'\\d\\d\\d\\d\\d' THEN REGEXP_REPLACE( city_state_zip, E'^.*(\\d\\d\\d\\d\\d-?\\d*).*$', E'\\1' ) ELSE '' END;
9 city_state_zip := REGEXP_REPLACE( city_state_zip, E'^(.*)\\d\\d\\d\\d\\d-?\\d*(.*)$', E'\\1\\2');
10 IF city_state_zip ~ ',' THEN
11 state := REGEXP_REPLACE( city_state_zip, E'^(.*),(.*)$', E'\\2');
12 city := REGEXP_REPLACE( city_state_zip, E'^(.*),(.*)$', E'\\1');
14 IF city_state_zip ~ E'\\s+[A-Z][A-Z]\\s*$' THEN
15 state := REGEXP_REPLACE( city_state_zip, E'^.*,?\\s+([A-Z][A-Z])\\s*$', E'\\1' );
16 city := REGEXP_REPLACE( city_state_zip, E'^(.*?),?\\s+[A-Z][A-Z](\\s*)$', E'\\1\\2' );
18 IF city_state_zip ~ E'^\\S+$' THEN
19 city := city_state_zip;
22 state := REGEXP_REPLACE( city_state_zip, E'^(.*?),?\\s*(\\S+)\\s*$', E'\\2');
23 city := REGEXP_REPLACE( city_state_zip, E'^(.*?),?\\s*(\\S+)\\s*$', E'\\1');
27 RETURN ARRAY[ TRIM(BOTH ' ' FROM city), TRIM(BOTH ' ' FROM state), TRIM(BOTH ' ' FROM zip) ];
29 $$ LANGUAGE PLPGSQL STRICT VOLATILE;
31 -- try to parse data like this: 100 MAIN ST$COVINGTON, GA 30016
32 CREATE OR REPLACE FUNCTION migration_tools.parse_out_address (TEXT) RETURNS TEXT[] AS $$
34 fullstring TEXT := $1;
44 WHEN fullstring ~ E'\\d\\d\\d\\d\\d'
45 THEN REGEXP_REPLACE( fullstring, E'^.*(\\d\\d\\d\\d\\d-?\\d*).*$', E'\\1' )
48 fullstring := REGEXP_REPLACE( fullstring, E'^(.*)\\d\\d\\d\\d\\d-?\\d*(.*)$', E'\\1\\2');
50 IF fullstring ~ ',' THEN
51 state := REGEXP_REPLACE( fullstring, E'^(.*),(.*)$', E'\\2');
52 scratch1 := REGEXP_REPLACE( fullstring, E'^(.*),(.*)$', E'\\1');
54 IF fullstring ~ E'\\s+[A-Z][A-Z]\\s*$' THEN
55 state := REGEXP_REPLACE( fullstring, E'^.*,?\\s+([A-Z][A-Z])\\s*$', E'\\1' );
56 scratch1 := REGEXP_REPLACE( fullstring, E'^(.*?),?\\s+[A-Z][A-Z](\\s*)$', E'\\1\\2' );
58 IF fullstring ~ E'^\\S+$' THEN
59 scratch1 := fullstring;
62 state := REGEXP_REPLACE( fullstring, E'^(.*?),?\\s*(\\S+)\\s*$', E'\\2');
63 scratch1 := REGEXP_REPLACE( fullstring, E'^(.*?),?\\s*(\\S+)\\s*$', E'\\1');
68 IF scratch1 ~ '[\$]' THEN
69 scratch2 := REGEXP_REPLACE( scratch1, E'^(.+)[\$](.+?)$', E'\\1');
70 city := REGEXP_REPLACE( scratch1, E'^(.+)[\$](.+?)$', E'\\2');
72 IF scratch1 ~ '\s' THEN
73 scratch2 := REGEXP_REPLACE( scratch1, E'^(.+)\\s+(.+?)$', E'\\1');
74 city := REGEXP_REPLACE( scratch1, E'^(.+)\\s+(.+?)$', E'\\2');
81 IF scratch2 ~ '^\d' THEN
85 address1 := REGEXP_REPLACE( scratch2, E'^(.+?)(\\d.+)$', E'\\1');
86 address2 := REGEXP_REPLACE( scratch2, E'^(.+?)(\\d.+)$', E'\\2');
90 TRIM(BOTH ' ' FROM address1)
91 ,TRIM(BOTH ' ' FROM address2)
92 ,TRIM(BOTH ' ' FROM city)
93 ,TRIM(BOTH ' ' FROM state)
94 ,TRIM(BOTH ' ' FROM zip)
97 $$ LANGUAGE PLPGSQL STRICT VOLATILE;
99 CREATE OR REPLACE FUNCTION migration_tools.parse_out_address2 (TEXT) RETURNS TEXT AS $$
102 use Geo::StreetAddress::US;
104 my $a = Geo::StreetAddress::US->parse_location($address);
107 "$a->{number} $a->{prefix} $a->{street} $a->{type} $a->{suffix}"
108 ,"$a->{sec_unit_type} $a->{sec_unit_num}"
113 $$ LANGUAGE PLPERLU STABLE;
115 DROP TABLE IF EXISTS migration_tools.usps_suffixes;
116 CREATE TABLE migration_tools.usps_suffixes ( suffix_from TEXT, suffix_to TEXT );
117 INSERT INTO migration_tools.usps_suffixes VALUES
150 ('BOULEVARD','BLVD'),
254 ('EXPRESSWAY','EXPY'),
259 ('EXTENSIONS','EXTS'),
370 ('JUNCTIONS','JCTS'),
432 ('MOUNTAINS','MTNS'),
557 ('STRAVENUE','STRA'),
577 ('THROUGHWAY','TRWY'),
584 ('TRAFFICWAY','TRFY'),
605 ('UNDERPASS','UPAS'),
650 -- this function should get a smaller range of inputs and benefit more from STABLE, hopefully speeding things up
651 CREATE OR REPLACE FUNCTION migration_tools._normalize_address_suffix (TEXT) RETURNS TEXT AS $$
656 --RAISE INFO 'suffix = %', suffix;
657 FOR _r IN (SELECT * FROM migration_tools.usps_suffixes) LOOP
658 suffix := REGEXP_REPLACE( suffix, _r.suffix_from, _r.suffix_to, 'i');
662 $$ LANGUAGE PLPGSQL STRICT STABLE;
664 CREATE OR REPLACE FUNCTION migration_tools.normalize_address_suffix (TEXT) RETURNS TEXT AS $$
667 WHEN $1 ~ '\s\S+$' THEN REGEXP_REPLACE( $1, '^(.*\s)(\S+)$', '\1' ) || migration_tools._normalize_address_suffix( REGEXP_REPLACE( $1, '^(.*\s)(\S+)$', '\2' ) )
671 $$ LANGUAGE PLPGSQL STRICT STABLE;