-
Notifications
You must be signed in to change notification settings - Fork 6
Expand file tree
/
Copy pathcologne_phonetic_function.sql
More file actions
148 lines (138 loc) · 4.68 KB
/
cologne_phonetic_function.sql
File metadata and controls
148 lines (138 loc) · 4.68 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
drop function if exists strip_non_alpha;
drop function if exists soundex_de;
drop function if exists koelner_match;
delimiter $$
create function strip_non_alpha (string varchar(255)) returns varchar(255)
deterministic
begin
declare res varchar(255) default '';
set string = lower(string);
while char_length(string) > 0 do
if string regexp '^[a-zäöüß]' then
set res = concat(res, substr(string, 1, 1));
end if;
set string = substr(string, 2);
end while;
return res;
end
$$
create function soundex_de (string varchar(255)) returns varchar(255)
deterministic
begin
declare res varchar(255) default '';
declare tmp varchar(255) default '';
declare i tinyint unsigned default 1;
set string = strip_non_alpha(string);
while char_length(string) > 0 do
if string regexp '^[0-9]' then
set res = concat(res, substr(string, 1, 1));
set string = substr(string, 2);
elseif string regexp '^[aeijouyäöüÄÖÜ]' then
set res = concat(res, '0');
set string = substr(string, 2);
elseif string regexp '^ph' then
set res = concat(res, '3');
set string = substr(string, 3);
elseif string regexp '^[bp]' then
set res = concat(res, '1');
set string = substr(string, 2);
elseif string regexp '^[dt][csz]' then
set res = concat(res, '8');
set string = substr(string, 3);
elseif string regexp '^[dt]' then
set res = concat(res, '2');
set string = substr(string, 2);
elseif string regexp '^[fvw]' then
set res = concat(res, '3');
set string = substr(string, 2);
elseif string regexp '^[gkq]' then
set res = concat(res, '4');
set string = substr(string, 2);
elseif string regexp '^c[ahkloqrux]' then
set res = concat(res, '4');
set string = substr(string, 2);
elseif string regexp '^[^sz]c[ahkloqrux]' then
set string = concat(substr(string, 1, 1), '4', substr(string, 3));
elseif string regexp '^[^ckq]x+' then
set tmp = substr(string, 1, 1);
set string = substr(string, 3);
while substr(string, 1, 1) = 'x' do
set string = substr(string, 2);
end while;
set string = concat(tmp, 48, string);
elseif string regexp '^l' then
set res = concat(res, '5');
set string = substr(string, 2);
elseif string regexp '^[mn]' then
set res = concat(res, '6');
set string = substr(string, 2);
elseif string regexp '^r' then
set res = concat(res, '7');
set string = substr(string, 2);
elseif string regexp '^[sz]c' then
set string = concat(substr(string, 1, 1), '8', substr(string, 3));
elseif string regexp '^[szß]' then
set res = concat(res, '8');
set string = substr(string, 2);
elseif string regexp '^c[^ahkloqrux]?' then
set res = concat(res, '8');
set string = substr(string, 2);
elseif string regexp '^[ckq]x' then
set string = concat(substr(string, 1, 1), '8', substr(string, 3));
else
set string = substr(string, 2);
end if;
end while;
set res = replace(res, '070', '@');
set res = replace(res, '07', '0');
set res = replace(res, '@', '070'); # improved handling of silent r after vowels
set res = replace(res, '3', '8'); # for phone calls, where F sounds like S
set tmp = '@';
while i <= char_length(res) do
if substr(res, i, 1) = tmp then
set res = concat(substr(res, 1, i - 1), substr(res, i + 1));
else
set tmp = substr(res, i, 1);
set i = i + 1;
end if;
end while;
set res = concat(substr(res, 1, 1), replace(substr(res, 2), '0', ''));
return res;
end
$$
create function koelner_match (needle varchar(128), haystack text, splitChar varchar(1)) returns tinyint
deterministic
begin
declare spacePos int;
declare searchLen int default length(haystack);
declare curWord varchar(128) default '';
declare tempStr text default haystack;
declare tmp text default '';
declare soundx1 varchar(64) default soundex_de(needle);
declare soundx2 varchar(64) default '';
set spacePos = locate(splitChar, tempStr);
while searchLen > 0 do
if spacePos = 0 then
set tmp = tempStr;
select soundex_de(tmp) into soundx2;
if soundx1 = soundx2 then
return 1;
else
return 0;
end if;
end if;
if spacePos != 0 then
set tmp = substr(tempStr, 1, spacePos-1);
set soundx2 = soundex_de(tmp);
if soundx1 = soundx2 then
return 1;
end if;
set tempStr = substr(tempStr, spacePos+1);
set searchLen = length(tempStr);
end if;
set spacePos = locate(splitChar, tempStr);
end while;
return 0;
end
$$
delimiter ;