From 27f784377e3d4dac68e131025725c0fa20a2a2b7 Mon Sep 17 00:00:00 2001 From: Jason Etheridge Date: Wed, 11 Oct 2017 03:55:36 -0400 Subject: [PATCH] toward better name parsing (handles names like "Van Horn, Fiona") Signed-off-by: Jason Etheridge --- sql/base/base.sql | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 files changed, 76 insertions(+), 0 deletions(-) diff --git a/sql/base/base.sql b/sql/base/base.sql index 891759a..22f259e 100644 --- a/sql/base/base.sql +++ b/sql/base/base.sql @@ -421,6 +421,82 @@ CREATE OR REPLACE FUNCTION migration_tools.name_parse_out_last_first_middle_and_ END; $$ LANGUAGE PLPGSQL STRICT IMMUTABLE; +CREATE OR REPLACE FUNCTION migration_tools.name_parse_out_fuller_last_first_middle_and_random_affix (TEXT) RETURNS TEXT[] AS $$ + DECLARE + full_name TEXT := $1; + temp TEXT; + family_name TEXT := ''; + first_given_name TEXT := ''; + second_given_name TEXT := ''; + suffix TEXT := ''; + prefix TEXT := ''; + BEGIN + temp := full_name; + -- Use values, not structure, for prefix/suffix, unless we come up with a better idea + IF temp ilike '%MR.%' THEN + prefix := 'Mr.'; + temp := REGEXP_REPLACE( temp, E'MR\.\\s*', '', 'i' ); + END IF; + IF temp ilike '%MRS.%' THEN + prefix := 'Mrs.'; + temp := REGEXP_REPLACE( temp, E'MRS\.\\s*', '', 'i' ); + END IF; + IF temp ilike '%MS.%' THEN + prefix := 'Ms.'; + temp := REGEXP_REPLACE( temp, E'MS\.\\s*', '', 'i' ); + END IF; + IF temp ilike '%DR.%' THEN + prefix := 'Dr.'; + temp := REGEXP_REPLACE( temp, E'DR\.\\s*', '', 'i' ); + END IF; + IF temp ilike '%JR.%' THEN + suffix := 'Jr.'; + temp := REGEXP_REPLACE( temp, E'JR\.\\s*', '', 'i' ); + END IF; + IF temp ilike '%JR,%' THEN + suffix := 'Jr.'; + temp := REGEXP_REPLACE( temp, E'JR,\\s*', ',', 'i' ); + END IF; + IF temp ilike '%SR.%' THEN + suffix := 'Sr.'; + temp := REGEXP_REPLACE( temp, E'SR\.\\s*', '', 'i' ); + END IF; + IF temp ilike '%SR,%' THEN + suffix := 'Sr.'; + temp := REGEXP_REPLACE( temp, E'SR,\\s*', ',', 'i' ); + END IF; + IF temp like '%III%' THEN + suffix := 'III'; + temp := REGEXP_REPLACE( temp, E'III', '' ); + END IF; + IF temp like '%II%' THEN + suffix := 'II'; + temp := REGEXP_REPLACE( temp, E'II', '' ); + END IF; + IF temp like '%IV%' THEN + suffix := 'IV'; + temp := REGEXP_REPLACE( temp, E'IV', '' ); + END IF; + + + IF temp ~ ',' THEN + family_name = BTRIM(REGEXP_REPLACE(temp,E'^(.*?,).*$',E'\\1')); + temp := REPLACE( temp, family_name, '' ); + family_name := REPLACE( family_name, ',', '' ); + first_given_name := BTRIM( REGEXP_REPLACE(temp,E'^(\\S+).*?$',E'\\1') ); + temp := REPLACE( temp, first_given_name, '' ); + second_given_name := BTRIM(temp); + ELSE + first_given_name := BTRIM( REGEXP_REPLACE(temp,E'^(\\S+).*?$',E'\\1') ); + temp := REPLACE( temp, first_given_name, '' ); + family_name := BTRIM( REGEXP_REPLACE(temp,E'^.*?(\\S+)$',E'\\1') ); + temp := REPLACE( temp, family_name, '' ); + second_given_name := BTRIM(temp); + END IF; + + RETURN ARRAY[ family_name, prefix, first_given_name, second_given_name, suffix ]; + END; +$$ LANGUAGE PLPGSQL STRICT IMMUTABLE; CREATE OR REPLACE FUNCTION migration_tools.address_parse_out_citystatezip (TEXT) RETURNS TEXT[] AS $$ DECLARE -- 1.7.2.5