From 97209b5f6e56350a5d4311db2e4fd948fda95e13 Mon Sep 17 00:00:00 2001 From: Jason Etheridge Date: Thu, 10 Jan 2019 10:49:41 -0500 Subject: [PATCH 1/1] better usps suffix handling Signed-off-by: Jason Etheridge --- sql/base/base.sql | 19 +++++++++++++++---- 1 files changed, 15 insertions(+), 4 deletions(-) diff --git a/sql/base/base.sql b/sql/base/base.sql index 3dffd83..9914aef 100644 --- a/sql/base/base.sql +++ b/sql/base/base.sql @@ -1183,15 +1183,26 @@ INSERT INTO migration_tools.usps_suffixes VALUES ('WY','WAY'), ('XING','XING'); -CREATE OR REPLACE FUNCTION migration_tools.normalize_address_suffix (TEXT) RETURNS TEXT AS $$ +-- this function should get a smaller range of inputs and benefit more from STABLE, hopefully speeding things up +CREATE OR REPLACE FUNCTION migration_tools._normalize_address_suffix (TEXT) RETURNS TEXT AS $$ DECLARE - street1 TEXT := $1; + suffix TEXT := $1; _r RECORD; BEGIN + --RAISE INFO 'suffix = %', suffix; FOR _r IN (SELECT * FROM migration_tools.usps_suffixes) LOOP - street1 := REGEXP_REPLACE( street1, ' ' || _r.suffix_from || '$', ' ' || _r.suffix_to, 'i'); + suffix := REGEXP_REPLACE( suffix, _r.suffix_from, _r.suffix_to, 'i'); END LOOP; - RETURN street1; + RETURN suffix; + END; +$$ LANGUAGE PLPGSQL STRICT STABLE; + +CREATE OR REPLACE FUNCTION migration_tools.normalize_address_suffix (TEXT) RETURNS TEXT AS $$ + BEGIN + RETURN CASE + WHEN $1 ~ '\s\S+$' THEN REGEXP_REPLACE( $1, '^(.*\s)(\S+)$', '\1' ) || migration_tools._normalize_address_suffix( REGEXP_REPLACE( $1, '^(.*\s)(\S+)$', '\2' ) ) + ELSE $1 + END; END; $$ LANGUAGE PLPGSQL STRICT STABLE; -- 1.7.2.5