xref: /webtrees/app/Soundex.php (revision 13abd6f3a37322f885d85df150e105d27ad81f8d)
1a25f0a04SGreg Roach<?php
2a25f0a04SGreg Roach/**
3a25f0a04SGreg Roach * webtrees: online genealogy
4369c0ce6SGreg Roach * Copyright (C) 2016 webtrees development team
5a25f0a04SGreg Roach * This program is free software: you can redistribute it and/or modify
6a25f0a04SGreg Roach * it under the terms of the GNU General Public License as published by
7a25f0a04SGreg Roach * the Free Software Foundation, either version 3 of the License, or
8a25f0a04SGreg Roach * (at your option) any later version.
9a25f0a04SGreg Roach * This program is distributed in the hope that it will be useful,
10a25f0a04SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
11a25f0a04SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12a25f0a04SGreg Roach * GNU General Public License for more details.
13a25f0a04SGreg Roach * You should have received a copy of the GNU General Public License
14a25f0a04SGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>.
15a25f0a04SGreg Roach */
1676692c8bSGreg Roachnamespace Fisharebest\Webtrees;
17a25f0a04SGreg Roach
18a25f0a04SGreg Roach/**
1976692c8bSGreg Roach * Phonetic matching of strings.
20a25f0a04SGreg Roach */
21a25f0a04SGreg Roachclass Soundex {
22a25f0a04SGreg Roach	/**
2376692c8bSGreg Roach	 * Which algorithms are supported.
2476692c8bSGreg Roach	 *
25a25f0a04SGreg Roach	 * @return string[]
26a25f0a04SGreg Roach	 */
27a25f0a04SGreg Roach	public static function getAlgorithms() {
28*13abd6f3SGreg Roach		return [
29a25f0a04SGreg Roach			'std' => /* I18N: http://en.wikipedia.org/wiki/Soundex */ I18N::translate('Russell'),
30a25f0a04SGreg Roach			'dm'  => /* I18N: http://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */ I18N::translate('Daitch-Mokotoff'),
31*13abd6f3SGreg Roach		];
32a25f0a04SGreg Roach	}
33a25f0a04SGreg Roach
34a25f0a04SGreg Roach	/**
35a25f0a04SGreg Roach	 * Is there a match between two soundex codes?
36a25f0a04SGreg Roach	 *
37a25f0a04SGreg Roach	 * @param string $soundex1
38a25f0a04SGreg Roach	 * @param string $soundex2
39a25f0a04SGreg Roach	 *
40cbc1590aSGreg Roach	 * @return bool
41a25f0a04SGreg Roach	 */
42a25f0a04SGreg Roach	public static function compare($soundex1, $soundex2) {
43a25f0a04SGreg Roach		if ($soundex1 && $soundex2) {
44a25f0a04SGreg Roach			foreach (explode(':', $soundex1) as $code) {
45a25f0a04SGreg Roach				if (strpos($soundex2, $code) !== false) {
46a25f0a04SGreg Roach					return true;
47a25f0a04SGreg Roach				}
48a25f0a04SGreg Roach			}
49a25f0a04SGreg Roach		}
50a25f0a04SGreg Roach
51a25f0a04SGreg Roach		return false;
52a25f0a04SGreg Roach	}
53a25f0a04SGreg Roach
54a25f0a04SGreg Roach	/**
55a25f0a04SGreg Roach	 * Generate Russell soundex codes for a given text.
56a25f0a04SGreg Roach	 *
57a25f0a04SGreg Roach	 * @param $text
58a25f0a04SGreg Roach	 *
59a25f0a04SGreg Roach	 * @return null|string
60a25f0a04SGreg Roach	 */
61a25f0a04SGreg Roach	public static function russell($text) {
62a25f0a04SGreg Roach		$words         = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY);
63*13abd6f3SGreg Roach		$soundex_array = [];
64a25f0a04SGreg Roach		foreach ($words as $word) {
65a25f0a04SGreg Roach			$soundex = soundex($word);
66a25f0a04SGreg Roach			// Only return codes from recognisable sounds
67a25f0a04SGreg Roach			if ($soundex !== '0000') {
68a25f0a04SGreg Roach				$soundex_array[] = $soundex;
69a25f0a04SGreg Roach			}
70a25f0a04SGreg Roach		}
71a25f0a04SGreg Roach		// Combine words, e.g. “New York” as “Newyork”
72a25f0a04SGreg Roach		if (count($words) > 1) {
73a25f0a04SGreg Roach			$soundex_array[] = soundex(strtr($text, ' ', ''));
74a25f0a04SGreg Roach		}
75a25f0a04SGreg Roach		// A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters)
76a25f0a04SGreg Roach		$soundex_array = array_slice(array_unique($soundex_array), 0, 51);
77a25f0a04SGreg Roach
78a25f0a04SGreg Roach		if ($soundex_array) {
79a25f0a04SGreg Roach			return implode(':', $soundex_array);
80a25f0a04SGreg Roach		} else {
8115e87d46SGreg Roach			return '';
82a25f0a04SGreg Roach		}
83a25f0a04SGreg Roach	}
84a25f0a04SGreg Roach
85a25f0a04SGreg Roach	/**
86a25f0a04SGreg Roach	 * Generate Daitch–Mokotoff soundex codes for a given text.
87a25f0a04SGreg Roach	 *
88a25f0a04SGreg Roach	 * @param $text
89a25f0a04SGreg Roach	 *
90a25f0a04SGreg Roach	 * @return null|string
91a25f0a04SGreg Roach	 */
92a25f0a04SGreg Roach	public static function daitchMokotoff($text) {
93a25f0a04SGreg Roach		$words         = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY);
94*13abd6f3SGreg Roach		$soundex_array = [];
95a25f0a04SGreg Roach		foreach ($words as $word) {
96a25f0a04SGreg Roach			$soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word));
97a25f0a04SGreg Roach		}
98a25f0a04SGreg Roach		// Combine words, e.g. “New York” as “Newyork”
99a25f0a04SGreg Roach		if (count($words) > 1) {
100a25f0a04SGreg Roach			$soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(strtr($text, ' ', '')));
101a25f0a04SGreg Roach		}
102a25f0a04SGreg Roach		// A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters)
103a25f0a04SGreg Roach		$soundex_array = array_slice(array_unique($soundex_array), 0, 36);
104a25f0a04SGreg Roach
105a25f0a04SGreg Roach		if ($soundex_array) {
106a25f0a04SGreg Roach			return implode(':', $soundex_array);
107a25f0a04SGreg Roach		} else {
10815e87d46SGreg Roach			return '';
109a25f0a04SGreg Roach		}
110a25f0a04SGreg Roach	}
111a25f0a04SGreg Roach
112a25f0a04SGreg Roach	// Determine the Daitch–Mokotoff Soundex code for a word
113a25f0a04SGreg Roach	// Original implementation by Gerry Kroll, and analysis by Meliza Amity
114a25f0a04SGreg Roach
115a25f0a04SGreg Roach	// Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!)
116a25f0a04SGreg Roach	const MAXCHAR = 7;
117a25f0a04SGreg Roach
118a25f0a04SGreg Roach	/**
119a25f0a04SGreg Roach	 * Name transformation arrays.
120a25f0a04SGreg Roach	 * Used to transform the Name string to simplify the "sounds like" table.
121a25f0a04SGreg Roach	 * This is especially useful in Hebrew.
122a25f0a04SGreg Roach	 *
123a25f0a04SGreg Roach	 * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text)
124a25f0a04SGreg Roach	 * function call to achieve the desired transformations.
125a25f0a04SGreg Roach	 *
126a25f0a04SGreg Roach	 * Note about the use of "\x01":
127a25f0a04SGreg Roach	 * This code, which can’t legitimately occur in the kind of text we're dealing with,
128a25f0a04SGreg Roach	 * is used as a place-holder so that conditional string replacements can be done.
129a25f0a04SGreg Roach	 *
130a25f0a04SGreg Roach	 * @var string[][]
131a25f0a04SGreg Roach	 */
132*13abd6f3SGreg Roach	private static $transformNameTable = [
133a25f0a04SGreg Roach		// Force Yiddish ligatures to be treated as separate letters
134*13abd6f3SGreg Roach		['װ', 'וו'],
135*13abd6f3SGreg Roach		['ײ', 'יי'],
136*13abd6f3SGreg Roach		['ױ', 'וי'],
137*13abd6f3SGreg Roach		['בו', 'בע'],
138*13abd6f3SGreg Roach		['פו', 'פע'],
139*13abd6f3SGreg Roach		['ומ', 'עמ'],
140*13abd6f3SGreg Roach		['ום', 'עם'],
141*13abd6f3SGreg Roach		['ונ', 'ענ'],
142*13abd6f3SGreg Roach		['ון', 'ען'],
143*13abd6f3SGreg Roach		['וו', 'ב'],
144*13abd6f3SGreg Roach		["\x01", ''],
145*13abd6f3SGreg Roach		['ייה$', "\x01ה"],
146*13abd6f3SGreg Roach		['ייע$', "\x01ע"],
147*13abd6f3SGreg Roach		['יי', 'ע'],
148*13abd6f3SGreg Roach		["\x01", 'יי'],
149*13abd6f3SGreg Roach	];
150a25f0a04SGreg Roach
151a25f0a04SGreg Roach	/**
152a25f0a04SGreg Roach	 * The DM sound coding table is organized this way:
153a25f0a04SGreg Roach	 * key: a variable-length string that corresponds to the UTF-8 character sequence
154a25f0a04SGreg Roach	 * represented by the table entry. Currently, that string can be up to 7
155a25f0a04SGreg Roach	 * bytes long. This maximum length is defined by the value of global variable
156a25f0a04SGreg Roach	 * $maxchar.
157a25f0a04SGreg Roach	 *
158a25f0a04SGreg Roach	 * value: an array as follows:
159a25f0a04SGreg Roach	 * [0]:  zero if not a vowel
160a25f0a04SGreg Roach	 * [1]:  sound value when this string is at the beginning of the word
161a25f0a04SGreg Roach	 * [2]:  sound value when this string is followed by a vowel
162a25f0a04SGreg Roach	 * [3]:  sound value for other cases
163a25f0a04SGreg Roach	 * [1],[2],[3] can be repeated several times to create branches in the code
164a25f0a04SGreg Roach	 * an empty sound value means "ignore in this state"
165a25f0a04SGreg Roach	 *
166a25f0a04SGreg Roach	 * @var string[][]
167a25f0a04SGreg Roach	 */
168*13abd6f3SGreg Roach	private static $dmsounds = [
169*13abd6f3SGreg Roach		'A'       => ['1', '0', '', ''],
170*13abd6f3SGreg Roach		'À'       => ['1', '0', '', ''],
171*13abd6f3SGreg Roach		'Á'       => ['1', '0', '', ''],
172*13abd6f3SGreg Roach		'Â'       => ['1', '0', '', ''],
173*13abd6f3SGreg Roach		'Ã'       => ['1', '0', '', ''],
174*13abd6f3SGreg Roach		'Ä'       => ['1', '0', '1', '', '0', '', ''],
175*13abd6f3SGreg Roach		'Å'       => ['1', '0', '', ''],
176*13abd6f3SGreg Roach		'Ă'       => ['1', '0', '', ''],
177*13abd6f3SGreg Roach		'Ą'       => ['1', '', '', '', '', '', '6'],
178*13abd6f3SGreg Roach		'Ạ'       => ['1', '0', '', ''],
179*13abd6f3SGreg Roach		'Ả'       => ['1', '0', '', ''],
180*13abd6f3SGreg Roach		'Ấ'       => ['1', '0', '', ''],
181*13abd6f3SGreg Roach		'Ầ'       => ['1', '0', '', ''],
182*13abd6f3SGreg Roach		'Ẩ'       => ['1', '0', '', ''],
183*13abd6f3SGreg Roach		'Ẫ'       => ['1', '0', '', ''],
184*13abd6f3SGreg Roach		'Ậ'       => ['1', '0', '', ''],
185*13abd6f3SGreg Roach		'Ắ'       => ['1', '0', '', ''],
186*13abd6f3SGreg Roach		'Ằ'       => ['1', '0', '', ''],
187*13abd6f3SGreg Roach		'Ẳ'       => ['1', '0', '', ''],
188*13abd6f3SGreg Roach		'Ẵ'       => ['1', '0', '', ''],
189*13abd6f3SGreg Roach		'Ặ'       => ['1', '0', '', ''],
190*13abd6f3SGreg Roach		'AE'      => ['1', '0', '1', ''],
191*13abd6f3SGreg Roach		'Æ'       => ['1', '0', '1', ''],
192*13abd6f3SGreg Roach		'AI'      => ['1', '0', '1', ''],
193*13abd6f3SGreg Roach		'AJ'      => ['1', '0', '1', ''],
194*13abd6f3SGreg Roach		'AU'      => ['1', '0', '7', ''],
195*13abd6f3SGreg Roach		'AV'      => ['1', '0', '7', '', '7', '7', '7'],
196*13abd6f3SGreg Roach		'ÄU'      => ['1', '0', '1', ''],
197*13abd6f3SGreg Roach		'AY'      => ['1', '0', '1', ''],
198*13abd6f3SGreg Roach		'B'       => ['0', '7', '7', '7'],
199*13abd6f3SGreg Roach		'C'       => ['0', '5', '5', '5', '34', '4', '4'],
200*13abd6f3SGreg Roach		'Ć'       => ['0', '4', '4', '4'],
201*13abd6f3SGreg Roach		'Č'       => ['0', '4', '4', '4'],
202*13abd6f3SGreg Roach		'Ç'       => ['0', '4', '4', '4'],
203*13abd6f3SGreg Roach		'CH'      => ['0', '5', '5', '5', '34', '4', '4'],
204*13abd6f3SGreg Roach		'CHS'     => ['0', '5', '54', '54'],
205*13abd6f3SGreg Roach		'CK'      => ['0', '5', '5', '5', '45', '45', '45'],
206*13abd6f3SGreg Roach		'CCS'     => ['0', '4', '4', '4'],
207*13abd6f3SGreg Roach		'CS'      => ['0', '4', '4', '4'],
208*13abd6f3SGreg Roach		'CSZ'     => ['0', '4', '4', '4'],
209*13abd6f3SGreg Roach		'CZ'      => ['0', '4', '4', '4'],
210*13abd6f3SGreg Roach		'CZS'     => ['0', '4', '4', '4'],
211*13abd6f3SGreg Roach		'D'       => ['0', '3', '3', '3'],
212*13abd6f3SGreg Roach		'Ď'       => ['0', '3', '3', '3'],
213*13abd6f3SGreg Roach		'Đ'       => ['0', '3', '3', '3'],
214*13abd6f3SGreg Roach		'DRS'     => ['0', '4', '4', '4'],
215*13abd6f3SGreg Roach		'DRZ'     => ['0', '4', '4', '4'],
216*13abd6f3SGreg Roach		'DS'      => ['0', '4', '4', '4'],
217*13abd6f3SGreg Roach		'DSH'     => ['0', '4', '4', '4'],
218*13abd6f3SGreg Roach		'DSZ'     => ['0', '4', '4', '4'],
219*13abd6f3SGreg Roach		'DT'      => ['0', '3', '3', '3'],
220*13abd6f3SGreg Roach		'DDZ'     => ['0', '4', '4', '4'],
221*13abd6f3SGreg Roach		'DDZS'    => ['0', '4', '4', '4'],
222*13abd6f3SGreg Roach		'DZ'      => ['0', '4', '4', '4'],
223*13abd6f3SGreg Roach		'DŹ'      => ['0', '4', '4', '4'],
224*13abd6f3SGreg Roach		'DŻ'      => ['0', '4', '4', '4'],
225*13abd6f3SGreg Roach		'DZH'     => ['0', '4', '4', '4'],
226*13abd6f3SGreg Roach		'DZS'     => ['0', '4', '4', '4'],
227*13abd6f3SGreg Roach		'E'       => ['1', '0', '', ''],
228*13abd6f3SGreg Roach		'È'       => ['1', '0', '', ''],
229*13abd6f3SGreg Roach		'É'       => ['1', '0', '', ''],
230*13abd6f3SGreg Roach		'Ê'       => ['1', '0', '', ''],
231*13abd6f3SGreg Roach		'Ë'       => ['1', '0', '', ''],
232*13abd6f3SGreg Roach		'Ĕ'       => ['1', '0', '', ''],
233*13abd6f3SGreg Roach		'Ė'       => ['1', '0', '', ''],
234*13abd6f3SGreg Roach		'Ę'       => ['1', '', '', '6', '', '', ''],
235*13abd6f3SGreg Roach		'Ẹ'       => ['1', '0', '', ''],
236*13abd6f3SGreg Roach		'Ẻ'       => ['1', '0', '', ''],
237*13abd6f3SGreg Roach		'Ẽ'       => ['1', '0', '', ''],
238*13abd6f3SGreg Roach		'Ế'       => ['1', '0', '', ''],
239*13abd6f3SGreg Roach		'Ề'       => ['1', '0', '', ''],
240*13abd6f3SGreg Roach		'Ể'       => ['1', '0', '', ''],
241*13abd6f3SGreg Roach		'Ễ'       => ['1', '0', '', ''],
242*13abd6f3SGreg Roach		'Ệ'       => ['1', '0', '', ''],
243*13abd6f3SGreg Roach		'EAU'     => ['1', '0', '', ''],
244*13abd6f3SGreg Roach		'EI'      => ['1', '0', '1', ''],
245*13abd6f3SGreg Roach		'EJ'      => ['1', '0', '1', ''],
246*13abd6f3SGreg Roach		'EU'      => ['1', '1', '1', ''],
247*13abd6f3SGreg Roach		'EY'      => ['1', '0', '1', ''],
248*13abd6f3SGreg Roach		'F'       => ['0', '7', '7', '7'],
249*13abd6f3SGreg Roach		'FB'      => ['0', '7', '7', '7'],
250*13abd6f3SGreg Roach		'G'       => ['0', '5', '5', '5', '34', '4', '4'],
251*13abd6f3SGreg Roach		'Ğ'       => ['0', '', '', ''],
252*13abd6f3SGreg Roach		'GGY'     => ['0', '5', '5', '5'],
253*13abd6f3SGreg Roach		'GY'      => ['0', '5', '5', '5'],
254*13abd6f3SGreg Roach		'H'       => ['0', '5', '5', '', '5', '5', '5'],
255*13abd6f3SGreg Roach		'I'       => ['1', '0', '', ''],
256*13abd6f3SGreg Roach		'Ì'       => ['1', '0', '', ''],
257*13abd6f3SGreg Roach		'Í'       => ['1', '0', '', ''],
258*13abd6f3SGreg Roach		'Î'       => ['1', '0', '', ''],
259*13abd6f3SGreg Roach		'Ï'       => ['1', '0', '', ''],
260*13abd6f3SGreg Roach		'Ĩ'       => ['1', '0', '', ''],
261*13abd6f3SGreg Roach		'Į'       => ['1', '0', '', ''],
262*13abd6f3SGreg Roach		'İ'       => ['1', '0', '', ''],
263*13abd6f3SGreg Roach		'Ỉ'       => ['1', '0', '', ''],
264*13abd6f3SGreg Roach		'Ị'       => ['1', '0', '', ''],
265*13abd6f3SGreg Roach		'IA'      => ['1', '1', '', ''],
266*13abd6f3SGreg Roach		'IE'      => ['1', '1', '', ''],
267*13abd6f3SGreg Roach		'IO'      => ['1', '1', '', ''],
268*13abd6f3SGreg Roach		'IU'      => ['1', '1', '', ''],
269*13abd6f3SGreg Roach		'J'       => ['0', '1', '', '', '4', '4', '4', '5', '5', ''],
270*13abd6f3SGreg Roach		'K'       => ['0', '5', '5', '5'],
271*13abd6f3SGreg Roach		'KH'      => ['0', '5', '5', '5'],
272*13abd6f3SGreg Roach		'KS'      => ['0', '5', '54', '54'],
273*13abd6f3SGreg Roach		'L'       => ['0', '8', '8', '8'],
274*13abd6f3SGreg Roach		'Ľ'       => ['0', '8', '8', '8'],
275*13abd6f3SGreg Roach		'Ĺ'       => ['0', '8', '8', '8'],
276*13abd6f3SGreg Roach		'Ł'       => ['0', '7', '7', '7', '8', '8', '8'],
277*13abd6f3SGreg Roach		'LL'      => ['0', '8', '8', '8', '58', '8', '8', '1', '8', '8'],
278*13abd6f3SGreg Roach		'LLY'     => ['0', '8', '8', '8', '1', '8', '8'],
279*13abd6f3SGreg Roach		'LY'      => ['0', '8', '8', '8', '1', '8', '8'],
280*13abd6f3SGreg Roach		'M'       => ['0', '6', '6', '6'],
281*13abd6f3SGreg Roach		'MĔ'      => ['0', '66', '66', '66'],
282*13abd6f3SGreg Roach		'MN'      => ['0', '66', '66', '66'],
283*13abd6f3SGreg Roach		'N'       => ['0', '6', '6', '6'],
284*13abd6f3SGreg Roach		'Ń'       => ['0', '6', '6', '6'],
285*13abd6f3SGreg Roach		'Ň'       => ['0', '6', '6', '6'],
286*13abd6f3SGreg Roach		'Ñ'       => ['0', '6', '6', '6'],
287*13abd6f3SGreg Roach		'NM'      => ['0', '66', '66', '66'],
288*13abd6f3SGreg Roach		'O'       => ['1', '0', '', ''],
289*13abd6f3SGreg Roach		'Ò'       => ['1', '0', '', ''],
290*13abd6f3SGreg Roach		'Ó'       => ['1', '0', '', ''],
291*13abd6f3SGreg Roach		'Ô'       => ['1', '0', '', ''],
292*13abd6f3SGreg Roach		'Õ'       => ['1', '0', '', ''],
293*13abd6f3SGreg Roach		'Ö'       => ['1', '0', '', ''],
294*13abd6f3SGreg Roach		'Ø'       => ['1', '0', '', ''],
295*13abd6f3SGreg Roach		'Ő'       => ['1', '0', '', ''],
296*13abd6f3SGreg Roach		'Œ'       => ['1', '0', '', ''],
297*13abd6f3SGreg Roach		'Ơ'       => ['1', '0', '', ''],
298*13abd6f3SGreg Roach		'Ọ'       => ['1', '0', '', ''],
299*13abd6f3SGreg Roach		'Ỏ'       => ['1', '0', '', ''],
300*13abd6f3SGreg Roach		'Ố'       => ['1', '0', '', ''],
301*13abd6f3SGreg Roach		'Ồ'       => ['1', '0', '', ''],
302*13abd6f3SGreg Roach		'Ổ'       => ['1', '0', '', ''],
303*13abd6f3SGreg Roach		'Ỗ'       => ['1', '0', '', ''],
304*13abd6f3SGreg Roach		'Ộ'       => ['1', '0', '', ''],
305*13abd6f3SGreg Roach		'Ớ'       => ['1', '0', '', ''],
306*13abd6f3SGreg Roach		'Ờ'       => ['1', '0', '', ''],
307*13abd6f3SGreg Roach		'Ở'       => ['1', '0', '', ''],
308*13abd6f3SGreg Roach		'Ỡ'       => ['1', '0', '', ''],
309*13abd6f3SGreg Roach		'Ợ'       => ['1', '0', '', ''],
310*13abd6f3SGreg Roach		'OE'      => ['1', '0', '', ''],
311*13abd6f3SGreg Roach		'OI'      => ['1', '0', '1', ''],
312*13abd6f3SGreg Roach		'OJ'      => ['1', '0', '1', ''],
313*13abd6f3SGreg Roach		'OU'      => ['1', '0', '', ''],
314*13abd6f3SGreg Roach		'OY'      => ['1', '0', '1', ''],
315*13abd6f3SGreg Roach		'P'       => ['0', '7', '7', '7'],
316*13abd6f3SGreg Roach		'PF'      => ['0', '7', '7', '7'],
317*13abd6f3SGreg Roach		'PH'      => ['0', '7', '7', '7'],
318*13abd6f3SGreg Roach		'Q'       => ['0', '5', '5', '5'],
319*13abd6f3SGreg Roach		'R'       => ['0', '9', '9', '9'],
320*13abd6f3SGreg Roach		'Ř'       => ['0', '4', '4', '4'],
321*13abd6f3SGreg Roach		'RS'      => ['0', '4', '4', '4', '94', '94', '94'],
322*13abd6f3SGreg Roach		'RZ'      => ['0', '4', '4', '4', '94', '94', '94'],
323*13abd6f3SGreg Roach		'S'       => ['0', '4', '4', '4'],
324*13abd6f3SGreg Roach		'Ś'       => ['0', '4', '4', '4'],
325*13abd6f3SGreg Roach		'Š'       => ['0', '4', '4', '4'],
326*13abd6f3SGreg Roach		'Ş'       => ['0', '4', '4', '4'],
327*13abd6f3SGreg Roach		'SC'      => ['0', '2', '4', '4'],
328*13abd6f3SGreg Roach		'ŠČ'      => ['0', '2', '4', '4'],
329*13abd6f3SGreg Roach		'SCH'     => ['0', '4', '4', '4'],
330*13abd6f3SGreg Roach		'SCHD'    => ['0', '2', '43', '43'],
331*13abd6f3SGreg Roach		'SCHT'    => ['0', '2', '43', '43'],
332*13abd6f3SGreg Roach		'SCHTCH'  => ['0', '2', '4', '4'],
333*13abd6f3SGreg Roach		'SCHTSCH' => ['0', '2', '4', '4'],
334*13abd6f3SGreg Roach		'SCHTSH'  => ['0', '2', '4', '4'],
335*13abd6f3SGreg Roach		'SD'      => ['0', '2', '43', '43'],
336*13abd6f3SGreg Roach		'SH'      => ['0', '4', '4', '4'],
337*13abd6f3SGreg Roach		'SHCH'    => ['0', '2', '4', '4'],
338*13abd6f3SGreg Roach		'SHD'     => ['0', '2', '43', '43'],
339*13abd6f3SGreg Roach		'SHT'     => ['0', '2', '43', '43'],
340*13abd6f3SGreg Roach		'SHTCH'   => ['0', '2', '4', '4'],
341*13abd6f3SGreg Roach		'SHTSH'   => ['0', '2', '4', '4'],
342*13abd6f3SGreg Roach		'ß'       => ['0', '', '4', '4'],
343*13abd6f3SGreg Roach		'ST'      => ['0', '2', '43', '43'],
344*13abd6f3SGreg Roach		'STCH'    => ['0', '2', '4', '4'],
345*13abd6f3SGreg Roach		'STRS'    => ['0', '2', '4', '4'],
346*13abd6f3SGreg Roach		'STRZ'    => ['0', '2', '4', '4'],
347*13abd6f3SGreg Roach		'STSCH'   => ['0', '2', '4', '4'],
348*13abd6f3SGreg Roach		'STSH'    => ['0', '2', '4', '4'],
349*13abd6f3SGreg Roach		'SSZ'     => ['0', '4', '4', '4'],
350*13abd6f3SGreg Roach		'SZ'      => ['0', '4', '4', '4'],
351*13abd6f3SGreg Roach		'SZCS'    => ['0', '2', '4', '4'],
352*13abd6f3SGreg Roach		'SZCZ'    => ['0', '2', '4', '4'],
353*13abd6f3SGreg Roach		'SZD'     => ['0', '2', '43', '43'],
354*13abd6f3SGreg Roach		'SZT'     => ['0', '2', '43', '43'],
355*13abd6f3SGreg Roach		'T'       => ['0', '3', '3', '3'],
356*13abd6f3SGreg Roach		'Ť'       => ['0', '3', '3', '3'],
357*13abd6f3SGreg Roach		'Ţ'       => ['0', '3', '3', '3', '4', '4', '4'],
358*13abd6f3SGreg Roach		'TC'      => ['0', '4', '4', '4'],
359*13abd6f3SGreg Roach		'TCH'     => ['0', '4', '4', '4'],
360*13abd6f3SGreg Roach		'TH'      => ['0', '3', '3', '3'],
361*13abd6f3SGreg Roach		'TRS'     => ['0', '4', '4', '4'],
362*13abd6f3SGreg Roach		'TRZ'     => ['0', '4', '4', '4'],
363*13abd6f3SGreg Roach		'TS'      => ['0', '4', '4', '4'],
364*13abd6f3SGreg Roach		'TSCH'    => ['0', '4', '4', '4'],
365*13abd6f3SGreg Roach		'TSH'     => ['0', '4', '4', '4'],
366*13abd6f3SGreg Roach		'TSZ'     => ['0', '4', '4', '4'],
367*13abd6f3SGreg Roach		'TTCH'    => ['0', '4', '4', '4'],
368*13abd6f3SGreg Roach		'TTS'     => ['0', '4', '4', '4'],
369*13abd6f3SGreg Roach		'TTSCH'   => ['0', '4', '4', '4'],
370*13abd6f3SGreg Roach		'TTSZ'    => ['0', '4', '4', '4'],
371*13abd6f3SGreg Roach		'TTZ'     => ['0', '4', '4', '4'],
372*13abd6f3SGreg Roach		'TZ'      => ['0', '4', '4', '4'],
373*13abd6f3SGreg Roach		'TZS'     => ['0', '4', '4', '4'],
374*13abd6f3SGreg Roach		'U'       => ['1', '0', '', ''],
375*13abd6f3SGreg Roach		'Ù'       => ['1', '0', '', ''],
376*13abd6f3SGreg Roach		'Ú'       => ['1', '0', '', ''],
377*13abd6f3SGreg Roach		'Û'       => ['1', '0', '', ''],
378*13abd6f3SGreg Roach		'Ü'       => ['1', '0', '', ''],
379*13abd6f3SGreg Roach		'Ũ'       => ['1', '0', '', ''],
380*13abd6f3SGreg Roach		'Ū'       => ['1', '0', '', ''],
381*13abd6f3SGreg Roach		'Ů'       => ['1', '0', '', ''],
382*13abd6f3SGreg Roach		'Ű'       => ['1', '0', '', ''],
383*13abd6f3SGreg Roach		'Ų'       => ['1', '0', '', ''],
384*13abd6f3SGreg Roach		'Ư'       => ['1', '0', '', ''],
385*13abd6f3SGreg Roach		'Ụ'       => ['1', '0', '', ''],
386*13abd6f3SGreg Roach		'Ủ'       => ['1', '0', '', ''],
387*13abd6f3SGreg Roach		'Ứ'       => ['1', '0', '', ''],
388*13abd6f3SGreg Roach		'Ừ'       => ['1', '0', '', ''],
389*13abd6f3SGreg Roach		'Ử'       => ['1', '0', '', ''],
390*13abd6f3SGreg Roach		'Ữ'       => ['1', '0', '', ''],
391*13abd6f3SGreg Roach		'Ự'       => ['1', '0', '', ''],
392*13abd6f3SGreg Roach		'UE'      => ['1', '0', '', ''],
393*13abd6f3SGreg Roach		'UI'      => ['1', '0', '1', ''],
394*13abd6f3SGreg Roach		'UJ'      => ['1', '0', '1', ''],
395*13abd6f3SGreg Roach		'UY'      => ['1', '0', '1', ''],
396*13abd6f3SGreg Roach		'UW'      => ['1', '0', '1', '', '0', '7', '7'],
397*13abd6f3SGreg Roach		'V'       => ['0', '7', '7', '7'],
398*13abd6f3SGreg Roach		'W'       => ['0', '7', '7', '7'],
399*13abd6f3SGreg Roach		'X'       => ['0', '5', '54', '54'],
400*13abd6f3SGreg Roach		'Y'       => ['1', '1', '', ''],
401*13abd6f3SGreg Roach		'Ý'       => ['1', '1', '', ''],
402*13abd6f3SGreg Roach		'Ỳ'       => ['1', '1', '', ''],
403*13abd6f3SGreg Roach		'Ỵ'       => ['1', '1', '', ''],
404*13abd6f3SGreg Roach		'Ỷ'       => ['1', '1', '', ''],
405*13abd6f3SGreg Roach		'Ỹ'       => ['1', '1', '', ''],
406*13abd6f3SGreg Roach		'Z'       => ['0', '4', '4', '4'],
407*13abd6f3SGreg Roach		'Ź'       => ['0', '4', '4', '4'],
408*13abd6f3SGreg Roach		'Ż'       => ['0', '4', '4', '4'],
409*13abd6f3SGreg Roach		'Ž'       => ['0', '4', '4', '4'],
410*13abd6f3SGreg Roach		'ZD'      => ['0', '2', '43', '43'],
411*13abd6f3SGreg Roach		'ZDZ'     => ['0', '2', '4', '4'],
412*13abd6f3SGreg Roach		'ZDZH'    => ['0', '2', '4', '4'],
413*13abd6f3SGreg Roach		'ZH'      => ['0', '4', '4', '4'],
414*13abd6f3SGreg Roach		'ZHD'     => ['0', '2', '43', '43'],
415*13abd6f3SGreg Roach		'ZHDZH'   => ['0', '2', '4', '4'],
416*13abd6f3SGreg Roach		'ZS'      => ['0', '4', '4', '4'],
417*13abd6f3SGreg Roach		'ZSCH'    => ['0', '4', '4', '4'],
418*13abd6f3SGreg Roach		'ZSH'     => ['0', '4', '4', '4'],
419*13abd6f3SGreg Roach		'ZZS'     => ['0', '4', '4', '4'],
420a25f0a04SGreg Roach		// Cyrillic alphabet
421*13abd6f3SGreg Roach		'А'   => ['1', '0', '', ''],
422*13abd6f3SGreg Roach		'Б'   => ['0', '7', '7', '7'],
423*13abd6f3SGreg Roach		'В'   => ['0', '7', '7', '7'],
424*13abd6f3SGreg Roach		'Г'   => ['0', '5', '5', '5'],
425*13abd6f3SGreg Roach		'Д'   => ['0', '3', '3', '3'],
426*13abd6f3SGreg Roach		'ДЗ'  => ['0', '4', '4', '4'],
427*13abd6f3SGreg Roach		'Е'   => ['1', '0', '', ''],
428*13abd6f3SGreg Roach		'Ё'   => ['1', '0', '', ''],
429*13abd6f3SGreg Roach		'Ж'   => ['0', '4', '4', '4'],
430*13abd6f3SGreg Roach		'З'   => ['0', '4', '4', '4'],
431*13abd6f3SGreg Roach		'И'   => ['1', '0', '', ''],
432*13abd6f3SGreg Roach		'Й'   => ['1', '1', '', '', '4', '4', '4'],
433*13abd6f3SGreg Roach		'К'   => ['0', '5', '5', '5'],
434*13abd6f3SGreg Roach		'Л'   => ['0', '8', '8', '8'],
435*13abd6f3SGreg Roach		'М'   => ['0', '6', '6', '6'],
436*13abd6f3SGreg Roach		'Н'   => ['0', '6', '6', '6'],
437*13abd6f3SGreg Roach		'О'   => ['1', '0', '', ''],
438*13abd6f3SGreg Roach		'П'   => ['0', '7', '7', '7'],
439*13abd6f3SGreg Roach		'Р'   => ['0', '9', '9', '9'],
440*13abd6f3SGreg Roach		'РЖ'  => ['0', '4', '4', '4'],
441*13abd6f3SGreg Roach		'С'   => ['0', '4', '4', '4'],
442*13abd6f3SGreg Roach		'Т'   => ['0', '3', '3', '3'],
443*13abd6f3SGreg Roach		'У'   => ['1', '0', '', ''],
444*13abd6f3SGreg Roach		'Ф'   => ['0', '7', '7', '7'],
445*13abd6f3SGreg Roach		'Х'   => ['0', '5', '5', '5'],
446*13abd6f3SGreg Roach		'Ц'   => ['0', '4', '4', '4'],
447*13abd6f3SGreg Roach		'Ч'   => ['0', '4', '4', '4'],
448*13abd6f3SGreg Roach		'Ш'   => ['0', '4', '4', '4'],
449*13abd6f3SGreg Roach		'Щ'   => ['0', '2', '4', '4'],
450*13abd6f3SGreg Roach		'Ъ'   => ['0', '', '', ''],
451*13abd6f3SGreg Roach		'Ы'   => ['0', '1', '', ''],
452*13abd6f3SGreg Roach		'Ь'   => ['0', '', '', ''],
453*13abd6f3SGreg Roach		'Э'   => ['1', '0', '', ''],
454*13abd6f3SGreg Roach		'Ю'   => ['0', '1', '', ''],
455*13abd6f3SGreg Roach		'Я'   => ['0', '1', '', ''],
456a25f0a04SGreg Roach		// Greek alphabet
457*13abd6f3SGreg Roach		'Α'   => ['1', '0', '', ''],
458*13abd6f3SGreg Roach		'Ά'   => ['1', '0', '', ''],
459*13abd6f3SGreg Roach		'ΑΙ'  => ['1', '0', '1', ''],
460*13abd6f3SGreg Roach		'ΑΥ'  => ['1', '0', '1', ''],
461*13abd6f3SGreg Roach		'Β'   => ['0', '7', '7', '7'],
462*13abd6f3SGreg Roach		'Γ'   => ['0', '5', '5', '5'],
463*13abd6f3SGreg Roach		'Δ'   => ['0', '3', '3', '3'],
464*13abd6f3SGreg Roach		'Ε'   => ['1', '0', '', ''],
465*13abd6f3SGreg Roach		'Έ'   => ['1', '0', '', ''],
466*13abd6f3SGreg Roach		'ΕΙ'  => ['1', '0', '1', ''],
467*13abd6f3SGreg Roach		'ΕΥ'  => ['1', '1', '1', ''],
468*13abd6f3SGreg Roach		'Ζ'   => ['0', '4', '4', '4'],
469*13abd6f3SGreg Roach		'Η'   => ['1', '0', '', ''],
470*13abd6f3SGreg Roach		'Ή'   => ['1', '0', '', ''],
471*13abd6f3SGreg Roach		'Θ'   => ['0', '3', '3', '3'],
472*13abd6f3SGreg Roach		'Ι'   => ['1', '0', '', ''],
473*13abd6f3SGreg Roach		'Ί'   => ['1', '0', '', ''],
474*13abd6f3SGreg Roach		'Ϊ'   => ['1', '0', '', ''],
475*13abd6f3SGreg Roach		'ΐ'   => ['1', '0', '', ''],
476*13abd6f3SGreg Roach		'Κ'   => ['0', '5', '5', '5'],
477*13abd6f3SGreg Roach		'Λ'   => ['0', '8', '8', '8'],
478*13abd6f3SGreg Roach		'Μ'   => ['0', '6', '6', '6'],
479*13abd6f3SGreg Roach		'ΜΠ'  => ['0', '7', '7', '7'],
480*13abd6f3SGreg Roach		'Ν'   => ['0', '6', '6', '6'],
481*13abd6f3SGreg Roach		'ΝΤ'  => ['0', '3', '3', '3'],
482*13abd6f3SGreg Roach		'Ξ'   => ['0', '5', '54', '54'],
483*13abd6f3SGreg Roach		'Ο'   => ['1', '0', '', ''],
484*13abd6f3SGreg Roach		'Ό'   => ['1', '0', '', ''],
485*13abd6f3SGreg Roach		'ΟΙ'  => ['1', '0', '1', ''],
486*13abd6f3SGreg Roach		'ΟΥ'  => ['1', '0', '1', ''],
487*13abd6f3SGreg Roach		'Π'   => ['0', '7', '7', '7'],
488*13abd6f3SGreg Roach		'Ρ'   => ['0', '9', '9', '9'],
489*13abd6f3SGreg Roach		'Σ'   => ['0', '4', '4', '4'],
490*13abd6f3SGreg Roach		'ς'   => ['0', '', '', '4'],
491*13abd6f3SGreg Roach		'Τ'   => ['0', '3', '3', '3'],
492*13abd6f3SGreg Roach		'ΤΖ'  => ['0', '4', '4', '4'],
493*13abd6f3SGreg Roach		'ΤΣ'  => ['0', '4', '4', '4'],
494*13abd6f3SGreg Roach		'Υ'   => ['1', '1', '', ''],
495*13abd6f3SGreg Roach		'Ύ'   => ['1', '1', '', ''],
496*13abd6f3SGreg Roach		'Ϋ'   => ['1', '1', '', ''],
497*13abd6f3SGreg Roach		'ΰ'   => ['1', '1', '', ''],
498*13abd6f3SGreg Roach		'ΥΚ'  => ['1', '5', '5', '5'],
499*13abd6f3SGreg Roach		'ΥΥ'  => ['1', '65', '65', '65'],
500*13abd6f3SGreg Roach		'Φ'   => ['0', '7', '7', '7'],
501*13abd6f3SGreg Roach		'Χ'   => ['0', '5', '5', '5'],
502*13abd6f3SGreg Roach		'Ψ'   => ['0', '7', '7', '7'],
503*13abd6f3SGreg Roach		'Ω'   => ['1', '0', '', ''],
504*13abd6f3SGreg Roach		'Ώ'   => ['1', '0', '', ''],
505a25f0a04SGreg Roach		// Hebrew alphabet
506*13abd6f3SGreg Roach		'א'     => ['1', '0', '', ''],
507*13abd6f3SGreg Roach		'או'    => ['1', '0', '7', ''],
508*13abd6f3SGreg Roach		'אג'    => ['1', '4', '4', '4', '5', '5', '5', '34', '34', '34'],
509*13abd6f3SGreg Roach		'בב'    => ['0', '7', '7', '7', '77', '77', '77'],
510*13abd6f3SGreg Roach		'ב'     => ['0', '7', '7', '7'],
511*13abd6f3SGreg Roach		'גג'    => ['0', '4', '4', '4', '5', '5', '5', '45', '45', '45', '55', '55', '55', '54', '54', '54'],
512*13abd6f3SGreg Roach		'גד'    => ['0', '43', '43', '43', '53', '53', '53'],
513*13abd6f3SGreg Roach		'גה'    => ['0', '45', '45', '45', '55', '55', '55'],
514*13abd6f3SGreg Roach		'גז'    => ['0', '44', '44', '44', '45', '45', '45'],
515*13abd6f3SGreg Roach		'גח'    => ['0', '45', '45', '45', '55', '55', '55'],
516*13abd6f3SGreg Roach		'גכ'    => ['0', '45', '45', '45', '55', '55', '55'],
517*13abd6f3SGreg Roach		'גך'    => ['0', '45', '45', '45', '55', '55', '55'],
518*13abd6f3SGreg Roach		'גצ'    => ['0', '44', '44', '44', '45', '45', '45'],
519*13abd6f3SGreg Roach		'גץ'    => ['0', '44', '44', '44', '45', '45', '45'],
520*13abd6f3SGreg Roach		'גק'    => ['0', '45', '45', '45', '54', '54', '54'],
521*13abd6f3SGreg Roach		'גש'    => ['0', '44', '44', '44', '54', '54', '54'],
522*13abd6f3SGreg Roach		'גת'    => ['0', '43', '43', '43', '53', '53', '53'],
523*13abd6f3SGreg Roach		'ג'     => ['0', '4', '4', '4', '5', '5', '5'],
524*13abd6f3SGreg Roach		'דז'    => ['0', '4', '4', '4'],
525*13abd6f3SGreg Roach		'דד'    => ['0', '3', '3', '3', '33', '33', '33'],
526*13abd6f3SGreg Roach		'דט'    => ['0', '33', '33', '33'],
527*13abd6f3SGreg Roach		'דש'    => ['0', '4', '4', '4'],
528*13abd6f3SGreg Roach		'דצ'    => ['0', '4', '4', '4'],
529*13abd6f3SGreg Roach		'דץ'    => ['0', '4', '4', '4'],
530*13abd6f3SGreg Roach		'ד'     => ['0', '3', '3', '3'],
531*13abd6f3SGreg Roach		'הג'    => ['0', '54', '54', '54', '55', '55', '55'],
532*13abd6f3SGreg Roach		'הכ'    => ['0', '55', '55', '55'],
533*13abd6f3SGreg Roach		'הח'    => ['0', '55', '55', '55'],
534*13abd6f3SGreg Roach		'הק'    => ['0', '55', '55', '55', '5', '5', '5'],
535*13abd6f3SGreg Roach		'הה'    => ['0', '5', '5', '', '55', '55', ''],
536*13abd6f3SGreg Roach		'ה'     => ['0', '5', '5', ''],
537*13abd6f3SGreg Roach		'וי'    => ['1', '', '', '', '7', '7', '7'],
538*13abd6f3SGreg Roach		'ו'     => ['1', '7', '7', '7', '7', '', ''],
539*13abd6f3SGreg Roach		'וו'    => ['1', '7', '7', '7', '7', '', ''],
540*13abd6f3SGreg Roach		'וופ'   => ['1', '7', '7', '7', '77', '77', '77'],
541*13abd6f3SGreg Roach		'זש'    => ['0', '4', '4', '4', '44', '44', '44'],
542*13abd6f3SGreg Roach		'זדז'   => ['0', '2', '4', '4'],
543*13abd6f3SGreg Roach		'ז'     => ['0', '4', '4', '4'],
544*13abd6f3SGreg Roach		'זג'    => ['0', '44', '44', '44', '45', '45', '45'],
545*13abd6f3SGreg Roach		'זז'    => ['0', '4', '4', '4', '44', '44', '44'],
546*13abd6f3SGreg Roach		'זס'    => ['0', '44', '44', '44'],
547*13abd6f3SGreg Roach		'זצ'    => ['0', '44', '44', '44'],
548*13abd6f3SGreg Roach		'זץ'    => ['0', '44', '44', '44'],
549*13abd6f3SGreg Roach		'חג'    => ['0', '54', '54', '54', '53', '53', '53'],
550*13abd6f3SGreg Roach		'חח'    => ['0', '5', '5', '5', '55', '55', '55'],
551*13abd6f3SGreg Roach		'חק'    => ['0', '55', '55', '55', '5', '5', '5'],
552*13abd6f3SGreg Roach		'חכ'    => ['0', '45', '45', '45', '55', '55', '55'],
553*13abd6f3SGreg Roach		'חס'    => ['0', '5', '54', '54'],
554*13abd6f3SGreg Roach		'חש'    => ['0', '5', '54', '54'],
555*13abd6f3SGreg Roach		'ח'     => ['0', '5', '5', '5'],
556*13abd6f3SGreg Roach		'טש'    => ['0', '4', '4', '4'],
557*13abd6f3SGreg Roach		'טד'    => ['0', '33', '33', '33'],
558*13abd6f3SGreg Roach		'טי'    => ['0', '3', '3', '3', '4', '4', '4', '3', '3', '34'],
559*13abd6f3SGreg Roach		'טת'    => ['0', '33', '33', '33'],
560*13abd6f3SGreg Roach		'טט'    => ['0', '3', '3', '3', '33', '33', '33'],
561*13abd6f3SGreg Roach		'ט'     => ['0', '3', '3', '3'],
562*13abd6f3SGreg Roach		'י'     => ['1', '1', '', ''],
563*13abd6f3SGreg Roach		'יא'    => ['1', '1', '', '', '1', '1', '1'],
564*13abd6f3SGreg Roach		'כג'    => ['0', '55', '55', '55', '54', '54', '54'],
565*13abd6f3SGreg Roach		'כש'    => ['0', '5', '54', '54'],
566*13abd6f3SGreg Roach		'כס'    => ['0', '5', '54', '54'],
567*13abd6f3SGreg Roach		'ככ'    => ['0', '5', '5', '5', '55', '55', '55'],
568*13abd6f3SGreg Roach		'כך'    => ['0', '5', '5', '5', '55', '55', '55'],
569*13abd6f3SGreg Roach		'כ'     => ['0', '5', '5', '5'],
570*13abd6f3SGreg Roach		'כח'    => ['0', '55', '55', '55', '5', '5', '5'],
571*13abd6f3SGreg Roach		'ך'     => ['0', '', '5', '5'],
572*13abd6f3SGreg Roach		'ל'     => ['0', '8', '8', '8'],
573*13abd6f3SGreg Roach		'לל'    => ['0', '88', '88', '88', '8', '8', '8'],
574*13abd6f3SGreg Roach		'מנ'    => ['0', '66', '66', '66'],
575*13abd6f3SGreg Roach		'מן'    => ['0', '66', '66', '66'],
576*13abd6f3SGreg Roach		'ממ'    => ['0', '6', '6', '6', '66', '66', '66'],
577*13abd6f3SGreg Roach		'מם'    => ['0', '6', '6', '6', '66', '66', '66'],
578*13abd6f3SGreg Roach		'מ'     => ['0', '6', '6', '6'],
579*13abd6f3SGreg Roach		'ם'     => ['0', '', '6', '6'],
580*13abd6f3SGreg Roach		'נמ'    => ['0', '66', '66', '66'],
581*13abd6f3SGreg Roach		'נם'    => ['0', '66', '66', '66'],
582*13abd6f3SGreg Roach		'ננ'    => ['0', '6', '6', '6', '66', '66', '66'],
583*13abd6f3SGreg Roach		'נן'    => ['0', '6', '6', '6', '66', '66', '66'],
584*13abd6f3SGreg Roach		'נ'     => ['0', '6', '6', '6'],
585*13abd6f3SGreg Roach		'ן'     => ['0', '', '6', '6'],
586*13abd6f3SGreg Roach		'סתש'   => ['0', '2', '4', '4'],
587*13abd6f3SGreg Roach		'סתז'   => ['0', '2', '4', '4'],
588*13abd6f3SGreg Roach		'סטז'   => ['0', '2', '4', '4'],
589*13abd6f3SGreg Roach		'סטש'   => ['0', '2', '4', '4'],
590*13abd6f3SGreg Roach		'סצד'   => ['0', '2', '4', '4'],
591*13abd6f3SGreg Roach		'סט'    => ['0', '2', '4', '4', '43', '43', '43'],
592*13abd6f3SGreg Roach		'סת'    => ['0', '2', '4', '4', '43', '43', '43'],
593*13abd6f3SGreg Roach		'סג'    => ['0', '44', '44', '44', '4', '4', '4'],
594*13abd6f3SGreg Roach		'סס'    => ['0', '4', '4', '4', '44', '44', '44'],
595*13abd6f3SGreg Roach		'סצ'    => ['0', '44', '44', '44'],
596*13abd6f3SGreg Roach		'סץ'    => ['0', '44', '44', '44'],
597*13abd6f3SGreg Roach		'סז'    => ['0', '44', '44', '44'],
598*13abd6f3SGreg Roach		'סש'    => ['0', '44', '44', '44'],
599*13abd6f3SGreg Roach		'ס'     => ['0', '4', '4', '4'],
600*13abd6f3SGreg Roach		'ע'     => ['1', '0', '', ''],
601*13abd6f3SGreg Roach		'פב'    => ['0', '7', '7', '7', '77', '77', '77'],
602*13abd6f3SGreg Roach		'פוו'   => ['0', '7', '7', '7', '77', '77', '77'],
603*13abd6f3SGreg Roach		'פפ'    => ['0', '7', '7', '7', '77', '77', '77'],
604*13abd6f3SGreg Roach		'פף'    => ['0', '7', '7', '7', '77', '77', '77'],
605*13abd6f3SGreg Roach		'פ'     => ['0', '7', '7', '7'],
606*13abd6f3SGreg Roach		'ף'     => ['0', '', '7', '7'],
607*13abd6f3SGreg Roach		'צג'    => ['0', '44', '44', '44', '45', '45', '45'],
608*13abd6f3SGreg Roach		'צז'    => ['0', '44', '44', '44'],
609*13abd6f3SGreg Roach		'צס'    => ['0', '44', '44', '44'],
610*13abd6f3SGreg Roach		'צצ'    => ['0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54', '45', '45', '45'],
611*13abd6f3SGreg Roach		'צץ'    => ['0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54'],
612*13abd6f3SGreg Roach		'צש'    => ['0', '44', '44', '44', '4', '4', '4', '5', '5', '5'],
613*13abd6f3SGreg Roach		'צ'     => ['0', '4', '4', '4', '5', '5', '5'],
614*13abd6f3SGreg Roach		'ץ'     => ['0', '', '4', '4'],
615*13abd6f3SGreg Roach		'קה'    => ['0', '55', '55', '5'],
616*13abd6f3SGreg Roach		'קס'    => ['0', '5', '54', '54'],
617*13abd6f3SGreg Roach		'קש'    => ['0', '5', '54', '54'],
618*13abd6f3SGreg Roach		'קק'    => ['0', '5', '5', '5', '55', '55', '55'],
619*13abd6f3SGreg Roach		'קח'    => ['0', '55', '55', '55'],
620*13abd6f3SGreg Roach		'קכ'    => ['0', '55', '55', '55'],
621*13abd6f3SGreg Roach		'קך'    => ['0', '55', '55', '55'],
622*13abd6f3SGreg Roach		'קג'    => ['0', '55', '55', '55', '54', '54', '54'],
623*13abd6f3SGreg Roach		'ק'     => ['0', '5', '5', '5'],
624*13abd6f3SGreg Roach		'רר'    => ['0', '99', '99', '99', '9', '9', '9'],
625*13abd6f3SGreg Roach		'ר'     => ['0', '9', '9', '9'],
626*13abd6f3SGreg Roach		'שטז'   => ['0', '2', '4', '4'],
627*13abd6f3SGreg Roach		'שתש'   => ['0', '2', '4', '4'],
628*13abd6f3SGreg Roach		'שתז'   => ['0', '2', '4', '4'],
629*13abd6f3SGreg Roach		'שטש'   => ['0', '2', '4', '4'],
630*13abd6f3SGreg Roach		'שד'    => ['0', '2', '43', '43'],
631*13abd6f3SGreg Roach		'שז'    => ['0', '44', '44', '44'],
632*13abd6f3SGreg Roach		'שס'    => ['0', '44', '44', '44'],
633*13abd6f3SGreg Roach		'שת'    => ['0', '2', '43', '43'],
634*13abd6f3SGreg Roach		'שג'    => ['0', '4', '4', '4', '44', '44', '44', '4', '43', '43'],
635*13abd6f3SGreg Roach		'שט'    => ['0', '2', '43', '43', '44', '44', '44'],
636*13abd6f3SGreg Roach		'שצ'    => ['0', '44', '44', '44', '45', '45', '45'],
637*13abd6f3SGreg Roach		'שץ'    => ['0', '44', '', '44', '45', '', '45'],
638*13abd6f3SGreg Roach		'שש'    => ['0', '4', '4', '4', '44', '44', '44'],
639*13abd6f3SGreg Roach		'ש'     => ['0', '4', '4', '4'],
640*13abd6f3SGreg Roach		'תג'    => ['0', '34', '34', '34'],
641*13abd6f3SGreg Roach		'תז'    => ['0', '34', '34', '34'],
642*13abd6f3SGreg Roach		'תש'    => ['0', '4', '4', '4'],
643*13abd6f3SGreg Roach		'תת'    => ['0', '3', '3', '3', '4', '4', '4', '33', '33', '33', '44', '44', '44', '34', '34', '34', '43', '43', '43'],
644*13abd6f3SGreg Roach		'ת'     => ['0', '3', '3', '3', '4', '4', '4'],
645a25f0a04SGreg Roach		// Arabic alphabet
646*13abd6f3SGreg Roach		'ا'   => ['1', '0', '', ''],
647*13abd6f3SGreg Roach		'ب'   => ['0', '7', '7', '7'],
648*13abd6f3SGreg Roach		'ت'   => ['0', '3', '3', '3'],
649*13abd6f3SGreg Roach		'ث'   => ['0', '3', '3', '3'],
650*13abd6f3SGreg Roach		'ج'   => ['0', '4', '4', '4'],
651*13abd6f3SGreg Roach		'ح'   => ['0', '5', '5', '5'],
652*13abd6f3SGreg Roach		'خ'   => ['0', '5', '5', '5'],
653*13abd6f3SGreg Roach		'د'   => ['0', '3', '3', '3'],
654*13abd6f3SGreg Roach		'ذ'   => ['0', '3', '3', '3'],
655*13abd6f3SGreg Roach		'ر'   => ['0', '9', '9', '9'],
656*13abd6f3SGreg Roach		'ز'   => ['0', '4', '4', '4'],
657*13abd6f3SGreg Roach		'س'   => ['0', '4', '4', '4'],
658*13abd6f3SGreg Roach		'ش'   => ['0', '4', '4', '4'],
659*13abd6f3SGreg Roach		'ص'   => ['0', '4', '4', '4'],
660*13abd6f3SGreg Roach		'ض'   => ['0', '3', '3', '3'],
661*13abd6f3SGreg Roach		'ط'   => ['0', '3', '3', '3'],
662*13abd6f3SGreg Roach		'ظ'   => ['0', '4', '4', '4'],
663*13abd6f3SGreg Roach		'ع'   => ['1', '0', '', ''],
664*13abd6f3SGreg Roach		'غ'   => ['0', '0', '', ''],
665*13abd6f3SGreg Roach		'ف'   => ['0', '7', '7', '7'],
666*13abd6f3SGreg Roach		'ق'   => ['0', '5', '5', '5'],
667*13abd6f3SGreg Roach		'ك'   => ['0', '5', '5', '5'],
668*13abd6f3SGreg Roach		'ل'   => ['0', '8', '8', '8'],
669*13abd6f3SGreg Roach		'لا'  => ['0', '8', '8', '8'],
670*13abd6f3SGreg Roach		'م'   => ['0', '6', '6', '6'],
671*13abd6f3SGreg Roach		'ن'   => ['0', '6', '6', '6'],
672*13abd6f3SGreg Roach		'هن'  => ['0', '66', '66', '66'],
673*13abd6f3SGreg Roach		'ه'   => ['0', '5', '5', ''],
674*13abd6f3SGreg Roach		'و'   => ['1', '', '', '', '7', '', ''],
675*13abd6f3SGreg Roach		'ي'   => ['0', '1', '', ''],
676*13abd6f3SGreg Roach		'آ'   => ['0', '1', '', ''],
677*13abd6f3SGreg Roach		'ة'   => ['0', '', '', '3'],
678*13abd6f3SGreg Roach		'ی'   => ['0', '1', '', ''],
679*13abd6f3SGreg Roach		'ى'   => ['1', '1', '', ''],
680*13abd6f3SGreg Roach	];
681a25f0a04SGreg Roach
682a25f0a04SGreg Roach	/**
68376692c8bSGreg Roach	 * Calculate the Daitch-Mokotoff soundex for a word.
68476692c8bSGreg Roach	 *
685a25f0a04SGreg Roach	 * @param string $name
686a25f0a04SGreg Roach	 *
687a25f0a04SGreg Roach	 * @return string[] List of possible DM codes for the word.
688a25f0a04SGreg Roach	 */
689a25f0a04SGreg Roach	private static function daitchMokotoffWord($name) {
690a25f0a04SGreg Roach		// Apply special transformation rules to the input string
691a25f0a04SGreg Roach		$name = I18N::strtoupper($name);
692a25f0a04SGreg Roach		foreach (self::$transformNameTable as $transformRule) {
693a25f0a04SGreg Roach			$name = str_replace($transformRule[0], $transformRule[1], $name);
694a25f0a04SGreg Roach		}
695a25f0a04SGreg Roach
696a25f0a04SGreg Roach		// Initialize
697a25f0a04SGreg Roach		$name_script = I18N::textScript($name);
698a25f0a04SGreg Roach		$noVowels    = ($name_script == 'Hebr' || $name_script == 'Arab');
699a25f0a04SGreg Roach
700a25f0a04SGreg Roach		$lastPos         = strlen($name) - 1;
701a25f0a04SGreg Roach		$currPos         = 0;
702a25f0a04SGreg Roach		$state           = 1; // 1: start of input string, 2: before vowel, 3: other
703*13abd6f3SGreg Roach		$result          = []; // accumulate complete 6-digit D-M codes here
704*13abd6f3SGreg Roach		$partialResult   = []; // accumulate incomplete D-M codes here
705*13abd6f3SGreg Roach		$partialResult[] = ['!']; // initialize 1st partial result  ('!' stops "duplicate sound" check)
706a25f0a04SGreg Roach
707a25f0a04SGreg Roach		// Loop through the input string.
708a25f0a04SGreg Roach		// Stop when the string is exhausted or when no more partial results remain
709a25f0a04SGreg Roach		while (count($partialResult) !== 0 && $currPos <= $lastPos) {
710a25f0a04SGreg Roach			// Find the DM coding table entry for the chunk at the current position
711a25f0a04SGreg Roach			$thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
712a25f0a04SGreg Roach			while ($thisEntry != '') {
713a25f0a04SGreg Roach				if (isset(self::$dmsounds[$thisEntry])) {
714a25f0a04SGreg Roach					break;
715a25f0a04SGreg Roach				}
716a25f0a04SGreg Roach				$thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk
717a25f0a04SGreg Roach			}
718a25f0a04SGreg Roach			if ($thisEntry === '') {
719a25f0a04SGreg Roach				$currPos++; // Not in table: advance pointer to next byte
720a25f0a04SGreg Roach				continue; // and try again
721a25f0a04SGreg Roach			}
722a25f0a04SGreg Roach
723a25f0a04SGreg Roach			$soundTableEntry = self::$dmsounds[$thisEntry];
724a25f0a04SGreg Roach			$workingResult   = $partialResult;
725*13abd6f3SGreg Roach			$partialResult   = [];
726a25f0a04SGreg Roach			$currPos += strlen($thisEntry);
727a25f0a04SGreg Roach
728a25f0a04SGreg Roach			// Not at beginning of input string
729a25f0a04SGreg Roach			if ($state != 1) {
730a25f0a04SGreg Roach				if ($currPos <= $lastPos) {
731a25f0a04SGreg Roach					// Determine whether the next chunk is a vowel
732a25f0a04SGreg Roach					$nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
733a25f0a04SGreg Roach					while ($nextEntry != '') {
734a25f0a04SGreg Roach						if (isset(self::$dmsounds[$nextEntry])) {
735a25f0a04SGreg Roach							break;
736a25f0a04SGreg Roach						}
737a25f0a04SGreg Roach						$nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk
738a25f0a04SGreg Roach					}
739a25f0a04SGreg Roach				} else {
740a25f0a04SGreg Roach					$nextEntry = '';
741a25f0a04SGreg Roach				}
742a25f0a04SGreg Roach				if ($nextEntry != '' && self::$dmsounds[$nextEntry][0] != '0') {
743a25f0a04SGreg Roach					$state = 2;
744a25f0a04SGreg Roach				} else {
745a25f0a04SGreg Roach					// Next chunk is a vowel
746a25f0a04SGreg Roach					$state = 3;
747a25f0a04SGreg Roach				}
748a25f0a04SGreg Roach			}
749a25f0a04SGreg Roach
750a25f0a04SGreg Roach			while ($state < count($soundTableEntry)) {
751a25f0a04SGreg Roach				// empty means 'ignore this sound in this state'
752a25f0a04SGreg Roach				if ($soundTableEntry[$state] == '') {
753a25f0a04SGreg Roach					foreach ($workingResult as $workingEntry) {
754a25f0a04SGreg Roach						$tempEntry = $workingEntry;
755a25f0a04SGreg Roach						$tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles'
756a25f0a04SGreg Roach						$partialResult[] = $tempEntry;
757a25f0a04SGreg Roach					}
758a25f0a04SGreg Roach				} else {
759a25f0a04SGreg Roach					foreach ($workingResult as $workingEntry) {
760a25f0a04SGreg Roach						if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) {
761a25f0a04SGreg Roach							// Incoming sound isn't a duplicate of the previous sound
762a25f0a04SGreg Roach							$workingEntry[] = $soundTableEntry[$state];
763a25f0a04SGreg Roach						} else {
764a25f0a04SGreg Roach							// Incoming sound is a duplicate of the previous sound
765a25f0a04SGreg Roach							// For Hebrew and Arabic, we need to create a pair of D-M sound codes,
766a25f0a04SGreg Roach							// one of the pair with only a single occurrence of the duplicate sound,
767a25f0a04SGreg Roach							// the other with both occurrences
768a25f0a04SGreg Roach							if ($noVowels) {
769a25f0a04SGreg Roach								$workingEntry[] = $soundTableEntry[$state];
770a25f0a04SGreg Roach							}
771a25f0a04SGreg Roach						}
772a25f0a04SGreg Roach						if (count($workingEntry) < 7) {
773a25f0a04SGreg Roach							$partialResult[] = $workingEntry;
774a25f0a04SGreg Roach						} else {
775a25f0a04SGreg Roach							// This is the 6th code in the sequence
776a25f0a04SGreg Roach							// We're looking for 7 entries because the first is '!' and doesn't count
777a25f0a04SGreg Roach							$tempResult = str_replace('!', '', implode('', $workingEntry));
778a25f0a04SGreg Roach							// Only return codes from recognisable sounds
779a25f0a04SGreg Roach							if ($tempResult) {
780a25f0a04SGreg Roach								$result[] = substr($tempResult . '000000', 0, 6);
781a25f0a04SGreg Roach							}
782a25f0a04SGreg Roach						}
783a25f0a04SGreg Roach					}
784a25f0a04SGreg Roach				}
785a25f0a04SGreg Roach				$state = $state + 3; // Advance to next triplet while keeping the same basic state
786a25f0a04SGreg Roach			}
787a25f0a04SGreg Roach		}
788a25f0a04SGreg Roach
789a25f0a04SGreg Roach		// Zero-fill and copy all remaining partial results
790a25f0a04SGreg Roach		foreach ($partialResult as $workingEntry) {
791a25f0a04SGreg Roach			$tempResult = str_replace('!', '', implode('', $workingEntry));
792a25f0a04SGreg Roach			// Only return codes from recognisable sounds
793a25f0a04SGreg Roach			if ($tempResult) {
794a25f0a04SGreg Roach				$result[] = substr($tempResult . '000000', 0, 6);
795a25f0a04SGreg Roach			}
796a25f0a04SGreg Roach		}
797a25f0a04SGreg Roach
798a25f0a04SGreg Roach		return $result;
799a25f0a04SGreg Roach	}
800a25f0a04SGreg Roach}
801