From: Jason Etheridge Date: Thu, 10 Jan 2019 15:49:41 +0000 (-0500) Subject: better usps suffix handling X-Git-Url: http://git.equinoxoli.org/?p=migration-tools.git;a=commitdiff_plain;h=97209b5f6e56350a5d4311db2e4fd948fda95e13;hp=ecb0988b2b160054fef62485ae34956511514f9c better usps suffix handling Signed-off-by: Jason Etheridge --- diff --git a/sql/base/base.sql b/sql/base/base.sql index 3dffd83..9914aef 100644 --- a/sql/base/base.sql +++ b/sql/base/base.sql @@ -1183,15 +1183,26 @@ INSERT INTO migration_tools.usps_suffixes VALUES ('WY','WAY'), ('XING','XING'); -CREATE OR REPLACE FUNCTION migration_tools.normalize_address_suffix (TEXT) RETURNS TEXT AS $$ +-- this function should get a smaller range of inputs and benefit more from STABLE, hopefully speeding things up +CREATE OR REPLACE FUNCTION migration_tools._normalize_address_suffix (TEXT) RETURNS TEXT AS $$ DECLARE - street1 TEXT := $1; + suffix TEXT := $1; _r RECORD; BEGIN + --RAISE INFO 'suffix = %', suffix; FOR _r IN (SELECT * FROM migration_tools.usps_suffixes) LOOP - street1 := REGEXP_REPLACE( street1, ' ' || _r.suffix_from || '$', ' ' || _r.suffix_to, 'i'); + suffix := REGEXP_REPLACE( suffix, _r.suffix_from, _r.suffix_to, 'i'); END LOOP; - RETURN street1; + RETURN suffix; + END; +$$ LANGUAGE PLPGSQL STRICT STABLE; + +CREATE OR REPLACE FUNCTION migration_tools.normalize_address_suffix (TEXT) RETURNS TEXT AS $$ + BEGIN + RETURN CASE + WHEN $1 ~ '\s\S+$' THEN REGEXP_REPLACE( $1, '^(.*\s)(\S+)$', '\1' ) || migration_tools._normalize_address_suffix( REGEXP_REPLACE( $1, '^(.*\s)(\S+)$', '\2' ) ) + ELSE $1 + END; END; $$ LANGUAGE PLPGSQL STRICT STABLE;