1 CREATE OR REPLACE FUNCTION migration_tools.attempt_phone (TEXT,TEXT) RETURNS TEXT AS $$
10 temp := REGEXP_REPLACE(temp, '^1*[^0-9]*(?=[0-9])', '');
11 temp := REGEXP_REPLACE(temp, '[^0-9]*([0-9]{3})[^0-9]*([0-9]{3})[^0-9]*([0-9]{4})', E'\\1-\\2-\\3');
12 n_digits := LENGTH(REGEXP_REPLACE(REGEXP_REPLACE(temp, '(.*)?[a-zA-Z].*', E'\\1') , '[^0-9]', '', 'g'));
13 IF n_digits = 7 AND areacode <> '' THEN
14 temp := REGEXP_REPLACE(temp, '[^0-9]*([0-9]{3})[^0-9]*([0-9]{4})', E'\\1-\\2');
15 output := (areacode || '-' || temp);
22 $$ LANGUAGE PLPGSQL STRICT VOLATILE;
24 CREATE OR REPLACE FUNCTION migration_tools.address_parse_out_citystatezip (TEXT) RETURNS TEXT[] AS $$
26 city_state_zip TEXT := $1;
31 zip := CASE WHEN city_state_zip ~ E'\\d\\d\\d\\d\\d' THEN REGEXP_REPLACE( city_state_zip, E'^.*(\\d\\d\\d\\d\\d-?\\d*).*$', E'\\1' ) ELSE '' END;
32 city_state_zip := REGEXP_REPLACE( city_state_zip, E'^(.*)\\d\\d\\d\\d\\d-?\\d*(.*)$', E'\\1\\2');
33 IF city_state_zip ~ ',' THEN
34 state := REGEXP_REPLACE( city_state_zip, E'^(.*),(.*)$', E'\\2');
35 city := REGEXP_REPLACE( city_state_zip, E'^(.*),(.*)$', E'\\1');
37 IF city_state_zip ~ E'\\s+[A-Z][A-Z]\\s*$' THEN
38 state := REGEXP_REPLACE( city_state_zip, E'^.*,?\\s+([A-Z][A-Z])\\s*$', E'\\1' );
39 city := REGEXP_REPLACE( city_state_zip, E'^(.*?),?\\s+[A-Z][A-Z](\\s*)$', E'\\1\\2' );
41 IF city_state_zip ~ E'^\\S+$' THEN
42 city := city_state_zip;
45 state := REGEXP_REPLACE( city_state_zip, E'^(.*?),?\\s*(\\S+)\\s*$', E'\\2');
46 city := REGEXP_REPLACE( city_state_zip, E'^(.*?),?\\s*(\\S+)\\s*$', E'\\1');
50 RETURN ARRAY[ TRIM(BOTH ' ' FROM city), TRIM(BOTH ' ' FROM state), TRIM(BOTH ' ' FROM zip) ];
52 $$ LANGUAGE PLPGSQL STRICT VOLATILE;
54 -- try to parse data like this: 100 MAIN ST$COVINGTON, GA 30016
55 CREATE OR REPLACE FUNCTION migration_tools.parse_out_address (TEXT) RETURNS TEXT[] AS $$
57 fullstring TEXT := $1;
67 WHEN fullstring ~ E'\\d\\d\\d\\d\\d'
68 THEN REGEXP_REPLACE( fullstring, E'^.*(\\d\\d\\d\\d\\d-?\\d*).*$', E'\\1' )
71 fullstring := REGEXP_REPLACE( fullstring, E'^(.*)\\d\\d\\d\\d\\d-?\\d*(.*)$', E'\\1\\2');
73 IF fullstring ~ ',' THEN
74 state := REGEXP_REPLACE( fullstring, E'^(.*),(.*)$', E'\\2');
75 scratch1 := REGEXP_REPLACE( fullstring, E'^(.*),(.*)$', E'\\1');
77 IF fullstring ~ E'\\s+[A-Z][A-Z]\\s*$' THEN
78 state := REGEXP_REPLACE( fullstring, E'^.*,?\\s+([A-Z][A-Z])\\s*$', E'\\1' );
79 scratch1 := REGEXP_REPLACE( fullstring, E'^(.*?),?\\s+[A-Z][A-Z](\\s*)$', E'\\1\\2' );
81 IF fullstring ~ E'^\\S+$' THEN
82 scratch1 := fullstring;
85 state := REGEXP_REPLACE( fullstring, E'^(.*?),?\\s*(\\S+)\\s*$', E'\\2');
86 scratch1 := REGEXP_REPLACE( fullstring, E'^(.*?),?\\s*(\\S+)\\s*$', E'\\1');
91 IF scratch1 ~ '[\$]' THEN
92 scratch2 := REGEXP_REPLACE( scratch1, E'^(.+)[\$](.+?)$', E'\\1');
93 city := REGEXP_REPLACE( scratch1, E'^(.+)[\$](.+?)$', E'\\2');
95 IF scratch1 ~ '\s' THEN
96 scratch2 := REGEXP_REPLACE( scratch1, E'^(.+)\\s+(.+?)$', E'\\1');
97 city := REGEXP_REPLACE( scratch1, E'^(.+)\\s+(.+?)$', E'\\2');
104 IF scratch2 ~ '^\d' THEN
105 address1 := scratch2;
108 address1 := REGEXP_REPLACE( scratch2, E'^(.+?)(\\d.+)$', E'\\1');
109 address2 := REGEXP_REPLACE( scratch2, E'^(.+?)(\\d.+)$', E'\\2');
113 TRIM(BOTH ' ' FROM address1)
114 ,TRIM(BOTH ' ' FROM address2)
115 ,TRIM(BOTH ' ' FROM city)
116 ,TRIM(BOTH ' ' FROM state)
117 ,TRIM(BOTH ' ' FROM zip)
120 $$ LANGUAGE PLPGSQL STRICT VOLATILE;
122 CREATE OR REPLACE FUNCTION migration_tools.parse_out_address2 (TEXT) RETURNS TEXT AS $$
125 use Geo::StreetAddress::US;
127 my $a = Geo::StreetAddress::US->parse_location($address);
130 "$a->{number} $a->{prefix} $a->{street} $a->{type} $a->{suffix}"
131 ,"$a->{sec_unit_type} $a->{sec_unit_num}"
136 $$ LANGUAGE PLPERLU STABLE;
138 DROP TABLE IF EXISTS migration_tools.usps_suffixes;
139 CREATE TABLE migration_tools.usps_suffixes ( suffix_from TEXT, suffix_to TEXT );
140 INSERT INTO migration_tools.usps_suffixes VALUES
173 ('BOULEVARD','BLVD'),
277 ('EXPRESSWAY','EXPY'),
282 ('EXTENSIONS','EXTS'),
393 ('JUNCTIONS','JCTS'),
455 ('MOUNTAINS','MTNS'),
580 ('STRAVENUE','STRA'),
600 ('THROUGHWAY','TRWY'),
607 ('TRAFFICWAY','TRFY'),
628 ('UNDERPASS','UPAS'),
673 -- this function should get a smaller range of inputs and benefit more from STABLE, hopefully speeding things up
674 CREATE OR REPLACE FUNCTION migration_tools._normalize_address_suffix (TEXT) RETURNS TEXT AS $$
679 --RAISE INFO 'suffix = %', suffix;
680 FOR _r IN (SELECT * FROM migration_tools.usps_suffixes) LOOP
681 suffix := REGEXP_REPLACE( suffix, _r.suffix_from, _r.suffix_to, 'i');
685 $$ LANGUAGE PLPGSQL STRICT STABLE;
687 CREATE OR REPLACE FUNCTION migration_tools.normalize_address_suffix (TEXT) RETURNS TEXT AS $$
690 WHEN $1 ~ '\s\S+$' THEN REGEXP_REPLACE( $1, '^(.*\s)(\S+)$', '\1' ) || migration_tools._normalize_address_suffix( REGEXP_REPLACE( $1, '^(.*\s)(\S+)$', '\2' ) )
694 $$ LANGUAGE PLPGSQL STRICT STABLE;
696 CREATE OR REPLACE FUNCTION migration_tools.zip_to_city_state_county (TEXT) RETURNS TEXT[] AS $$
701 open (FH, '<', '/openils/var/data/zips.txt') or return ('No File Found', 'No File Found', 'No File Found');
705 my ($junk, $state, $city, $zip, $foo, $bar, $county, $baz, $morejunk) = split(/\|/);
706 $zipdata{$zip} = [$city, $state, $county];
709 if (defined $zipdata{$input}) {
710 my ($city, $state, $county) = @{$zipdata{$input}};
711 return [$city, $state, $county];
712 } elsif (defined $zipdata{substr $input, 0, 5}) {
713 my ($city, $state, $county) = @{$zipdata{substr $input, 0, 5}};
714 return [$city, $state, $county];
716 return ['ZIP not found', 'ZIP not found', 'ZIP not found'];
719 $$ LANGUAGE PLPERLU STABLE;