xref: /webtrees/app/Soundex.php (revision 1062a1429914c995339f502856821457aa975a5a)
1a25f0a04SGreg Roach<?php
2a25f0a04SGreg Roach/**
3a25f0a04SGreg Roach * webtrees: online genealogy
4*1062a142SGreg Roach * Copyright (C) 2018 webtrees development team
5a25f0a04SGreg Roach * This program is free software: you can redistribute it and/or modify
6a25f0a04SGreg Roach * it under the terms of the GNU General Public License as published by
7a25f0a04SGreg Roach * the Free Software Foundation, either version 3 of the License, or
8a25f0a04SGreg Roach * (at your option) any later version.
9a25f0a04SGreg Roach * This program is distributed in the hope that it will be useful,
10a25f0a04SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
11a25f0a04SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12a25f0a04SGreg Roach * GNU General Public License for more details.
13a25f0a04SGreg Roach * You should have received a copy of the GNU General Public License
14a25f0a04SGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>.
15a25f0a04SGreg Roach */
1676692c8bSGreg Roachnamespace Fisharebest\Webtrees;
17a25f0a04SGreg Roach
18a25f0a04SGreg Roach/**
1976692c8bSGreg Roach * Phonetic matching of strings.
20a25f0a04SGreg Roach */
21a25f0a04SGreg Roachclass Soundex {
22a25f0a04SGreg Roach	/**
2376692c8bSGreg Roach	 * Which algorithms are supported.
2476692c8bSGreg Roach	 *
25a25f0a04SGreg Roach	 * @return string[]
26a25f0a04SGreg Roach	 */
27a25f0a04SGreg Roach	public static function getAlgorithms() {
2813abd6f3SGreg Roach		return [
29a25f0a04SGreg Roach			'std' => /* I18N: http://en.wikipedia.org/wiki/Soundex */ I18N::translate('Russell'),
30a25f0a04SGreg Roach			'dm'  => /* I18N: http://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */ I18N::translate('Daitch-Mokotoff'),
3113abd6f3SGreg Roach		];
32a25f0a04SGreg Roach	}
33a25f0a04SGreg Roach
34a25f0a04SGreg Roach	/**
35a25f0a04SGreg Roach	 * Is there a match between two soundex codes?
36a25f0a04SGreg Roach	 *
37a25f0a04SGreg Roach	 * @param string $soundex1
38a25f0a04SGreg Roach	 * @param string $soundex2
39a25f0a04SGreg Roach	 *
40cbc1590aSGreg Roach	 * @return bool
41a25f0a04SGreg Roach	 */
42a25f0a04SGreg Roach	public static function compare($soundex1, $soundex2) {
43a25f0a04SGreg Roach		if ($soundex1 && $soundex2) {
44a25f0a04SGreg Roach			foreach (explode(':', $soundex1) as $code) {
45a25f0a04SGreg Roach				if (strpos($soundex2, $code) !== false) {
46a25f0a04SGreg Roach					return true;
47a25f0a04SGreg Roach				}
48a25f0a04SGreg Roach			}
49a25f0a04SGreg Roach		}
50a25f0a04SGreg Roach
51a25f0a04SGreg Roach		return false;
52a25f0a04SGreg Roach	}
53a25f0a04SGreg Roach
54a25f0a04SGreg Roach	/**
55a25f0a04SGreg Roach	 * Generate Russell soundex codes for a given text.
56a25f0a04SGreg Roach	 *
57a25f0a04SGreg Roach	 * @param $text
58a25f0a04SGreg Roach	 *
59a25f0a04SGreg Roach	 * @return null|string
60a25f0a04SGreg Roach	 */
61a25f0a04SGreg Roach	public static function russell($text) {
62a25f0a04SGreg Roach		$words         = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY);
6313abd6f3SGreg Roach		$soundex_array = [];
64a25f0a04SGreg Roach		foreach ($words as $word) {
65a25f0a04SGreg Roach			$soundex = soundex($word);
66a25f0a04SGreg Roach			// Only return codes from recognisable sounds
67a25f0a04SGreg Roach			if ($soundex !== '0000') {
68a25f0a04SGreg Roach				$soundex_array[] = $soundex;
69a25f0a04SGreg Roach			}
70a25f0a04SGreg Roach		}
71a25f0a04SGreg Roach		// Combine words, e.g. “New York” as “Newyork”
72a25f0a04SGreg Roach		if (count($words) > 1) {
73a25f0a04SGreg Roach			$soundex_array[] = soundex(strtr($text, ' ', ''));
74a25f0a04SGreg Roach		}
75a25f0a04SGreg Roach		// A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters)
76a25f0a04SGreg Roach		$soundex_array = array_slice(array_unique($soundex_array), 0, 51);
77a25f0a04SGreg Roach
78a25f0a04SGreg Roach		if ($soundex_array) {
79a25f0a04SGreg Roach			return implode(':', $soundex_array);
80a25f0a04SGreg Roach		} else {
8115e87d46SGreg Roach			return '';
82a25f0a04SGreg Roach		}
83a25f0a04SGreg Roach	}
84a25f0a04SGreg Roach
85a25f0a04SGreg Roach	/**
86a25f0a04SGreg Roach	 * Generate Daitch–Mokotoff soundex codes for a given text.
87a25f0a04SGreg Roach	 *
88a25f0a04SGreg Roach	 * @param $text
89a25f0a04SGreg Roach	 *
90a25f0a04SGreg Roach	 * @return null|string
91a25f0a04SGreg Roach	 */
92a25f0a04SGreg Roach	public static function daitchMokotoff($text) {
93a25f0a04SGreg Roach		$words         = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY);
9413abd6f3SGreg Roach		$soundex_array = [];
95a25f0a04SGreg Roach		foreach ($words as $word) {
96a25f0a04SGreg Roach			$soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word));
97a25f0a04SGreg Roach		}
98a25f0a04SGreg Roach		// Combine words, e.g. “New York” as “Newyork”
99a25f0a04SGreg Roach		if (count($words) > 1) {
100a25f0a04SGreg Roach			$soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(strtr($text, ' ', '')));
101a25f0a04SGreg Roach		}
102a25f0a04SGreg Roach		// A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters)
103a25f0a04SGreg Roach		$soundex_array = array_slice(array_unique($soundex_array), 0, 36);
104a25f0a04SGreg Roach
105a25f0a04SGreg Roach		if ($soundex_array) {
106a25f0a04SGreg Roach			return implode(':', $soundex_array);
107a25f0a04SGreg Roach		} else {
10815e87d46SGreg Roach			return '';
109a25f0a04SGreg Roach		}
110a25f0a04SGreg Roach	}
111a25f0a04SGreg Roach
112a25f0a04SGreg Roach	// Determine the Daitch–Mokotoff Soundex code for a word
113a25f0a04SGreg Roach	// Original implementation by Gerry Kroll, and analysis by Meliza Amity
114a25f0a04SGreg Roach
115a25f0a04SGreg Roach	// Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!)
116a25f0a04SGreg Roach	const MAXCHAR = 7;
117a25f0a04SGreg Roach
118a25f0a04SGreg Roach	/**
119a25f0a04SGreg Roach	 * Name transformation arrays.
120a25f0a04SGreg Roach	 * Used to transform the Name string to simplify the "sounds like" table.
121a25f0a04SGreg Roach	 * This is especially useful in Hebrew.
122a25f0a04SGreg Roach	 *
123a25f0a04SGreg Roach	 * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text)
124a25f0a04SGreg Roach	 * function call to achieve the desired transformations.
125a25f0a04SGreg Roach	 *
126a25f0a04SGreg Roach	 * Note about the use of "\x01":
127a25f0a04SGreg Roach	 * This code, which can’t legitimately occur in the kind of text we're dealing with,
128a25f0a04SGreg Roach	 * is used as a place-holder so that conditional string replacements can be done.
129a25f0a04SGreg Roach	 *
130a25f0a04SGreg Roach	 * @var string[][]
131a25f0a04SGreg Roach	 */
13213abd6f3SGreg Roach	private static $transformNameTable = [
133a25f0a04SGreg Roach		// Force Yiddish ligatures to be treated as separate letters
13413abd6f3SGreg Roach		['װ', 'וו'],
13513abd6f3SGreg Roach		['ײ', 'יי'],
13613abd6f3SGreg Roach		['ױ', 'וי'],
13713abd6f3SGreg Roach		['בו', 'בע'],
13813abd6f3SGreg Roach		['פו', 'פע'],
13913abd6f3SGreg Roach		['ומ', 'עמ'],
14013abd6f3SGreg Roach		['ום', 'עם'],
14113abd6f3SGreg Roach		['ונ', 'ענ'],
14213abd6f3SGreg Roach		['ון', 'ען'],
14313abd6f3SGreg Roach		['וו', 'ב'],
14413abd6f3SGreg Roach		["\x01", ''],
14513abd6f3SGreg Roach		['ייה$', "\x01ה"],
14613abd6f3SGreg Roach		['ייע$', "\x01ע"],
14713abd6f3SGreg Roach		['יי', 'ע'],
14813abd6f3SGreg Roach		["\x01", 'יי'],
14913abd6f3SGreg Roach	];
150a25f0a04SGreg Roach
151a25f0a04SGreg Roach	/**
152a25f0a04SGreg Roach	 * The DM sound coding table is organized this way:
153a25f0a04SGreg Roach	 * key: a variable-length string that corresponds to the UTF-8 character sequence
154a25f0a04SGreg Roach	 * represented by the table entry. Currently, that string can be up to 7
155a25f0a04SGreg Roach	 * bytes long. This maximum length is defined by the value of global variable
156a25f0a04SGreg Roach	 * $maxchar.
157a25f0a04SGreg Roach	 *
158a25f0a04SGreg Roach	 * value: an array as follows:
159a25f0a04SGreg Roach	 * [0]:  zero if not a vowel
160a25f0a04SGreg Roach	 * [1]:  sound value when this string is at the beginning of the word
161a25f0a04SGreg Roach	 * [2]:  sound value when this string is followed by a vowel
162a25f0a04SGreg Roach	 * [3]:  sound value for other cases
163a25f0a04SGreg Roach	 * [1],[2],[3] can be repeated several times to create branches in the code
164a25f0a04SGreg Roach	 * an empty sound value means "ignore in this state"
165a25f0a04SGreg Roach	 *
166a25f0a04SGreg Roach	 * @var string[][]
167a25f0a04SGreg Roach	 */
16813abd6f3SGreg Roach	private static $dmsounds = [
16913abd6f3SGreg Roach		'A'       => ['1', '0', '', ''],
17013abd6f3SGreg Roach		'À'       => ['1', '0', '', ''],
17113abd6f3SGreg Roach		'Á'       => ['1', '0', '', ''],
17213abd6f3SGreg Roach		'Â'       => ['1', '0', '', ''],
17313abd6f3SGreg Roach		'Ã'       => ['1', '0', '', ''],
17413abd6f3SGreg Roach		'Ä'       => ['1', '0', '1', '', '0', '', ''],
17513abd6f3SGreg Roach		'Å'       => ['1', '0', '', ''],
17613abd6f3SGreg Roach		'Ă'       => ['1', '0', '', ''],
17713abd6f3SGreg Roach		'Ą'       => ['1', '', '', '', '', '', '6'],
17813abd6f3SGreg Roach		'Ạ'       => ['1', '0', '', ''],
17913abd6f3SGreg Roach		'Ả'       => ['1', '0', '', ''],
18013abd6f3SGreg Roach		'Ấ'       => ['1', '0', '', ''],
18113abd6f3SGreg Roach		'Ầ'       => ['1', '0', '', ''],
18213abd6f3SGreg Roach		'Ẩ'       => ['1', '0', '', ''],
18313abd6f3SGreg Roach		'Ẫ'       => ['1', '0', '', ''],
18413abd6f3SGreg Roach		'Ậ'       => ['1', '0', '', ''],
18513abd6f3SGreg Roach		'Ắ'       => ['1', '0', '', ''],
18613abd6f3SGreg Roach		'Ằ'       => ['1', '0', '', ''],
18713abd6f3SGreg Roach		'Ẳ'       => ['1', '0', '', ''],
18813abd6f3SGreg Roach		'Ẵ'       => ['1', '0', '', ''],
18913abd6f3SGreg Roach		'Ặ'       => ['1', '0', '', ''],
19013abd6f3SGreg Roach		'AE'      => ['1', '0', '1', ''],
19113abd6f3SGreg Roach		'Æ'       => ['1', '0', '1', ''],
19213abd6f3SGreg Roach		'AI'      => ['1', '0', '1', ''],
19313abd6f3SGreg Roach		'AJ'      => ['1', '0', '1', ''],
19413abd6f3SGreg Roach		'AU'      => ['1', '0', '7', ''],
19513abd6f3SGreg Roach		'AV'      => ['1', '0', '7', '', '7', '7', '7'],
19613abd6f3SGreg Roach		'ÄU'      => ['1', '0', '1', ''],
19713abd6f3SGreg Roach		'AY'      => ['1', '0', '1', ''],
19813abd6f3SGreg Roach		'B'       => ['0', '7', '7', '7'],
19913abd6f3SGreg Roach		'C'       => ['0', '5', '5', '5', '34', '4', '4'],
20013abd6f3SGreg Roach		'Ć'       => ['0', '4', '4', '4'],
20113abd6f3SGreg Roach		'Č'       => ['0', '4', '4', '4'],
20213abd6f3SGreg Roach		'Ç'       => ['0', '4', '4', '4'],
20313abd6f3SGreg Roach		'CH'      => ['0', '5', '5', '5', '34', '4', '4'],
20413abd6f3SGreg Roach		'CHS'     => ['0', '5', '54', '54'],
20513abd6f3SGreg Roach		'CK'      => ['0', '5', '5', '5', '45', '45', '45'],
20613abd6f3SGreg Roach		'CCS'     => ['0', '4', '4', '4'],
20713abd6f3SGreg Roach		'CS'      => ['0', '4', '4', '4'],
20813abd6f3SGreg Roach		'CSZ'     => ['0', '4', '4', '4'],
20913abd6f3SGreg Roach		'CZ'      => ['0', '4', '4', '4'],
21013abd6f3SGreg Roach		'CZS'     => ['0', '4', '4', '4'],
21113abd6f3SGreg Roach		'D'       => ['0', '3', '3', '3'],
21213abd6f3SGreg Roach		'Ď'       => ['0', '3', '3', '3'],
21313abd6f3SGreg Roach		'Đ'       => ['0', '3', '3', '3'],
21413abd6f3SGreg Roach		'DRS'     => ['0', '4', '4', '4'],
21513abd6f3SGreg Roach		'DRZ'     => ['0', '4', '4', '4'],
21613abd6f3SGreg Roach		'DS'      => ['0', '4', '4', '4'],
21713abd6f3SGreg Roach		'DSH'     => ['0', '4', '4', '4'],
21813abd6f3SGreg Roach		'DSZ'     => ['0', '4', '4', '4'],
21913abd6f3SGreg Roach		'DT'      => ['0', '3', '3', '3'],
22013abd6f3SGreg Roach		'DDZ'     => ['0', '4', '4', '4'],
22113abd6f3SGreg Roach		'DDZS'    => ['0', '4', '4', '4'],
22213abd6f3SGreg Roach		'DZ'      => ['0', '4', '4', '4'],
22313abd6f3SGreg Roach		'DŹ'      => ['0', '4', '4', '4'],
22413abd6f3SGreg Roach		'DŻ'      => ['0', '4', '4', '4'],
22513abd6f3SGreg Roach		'DZH'     => ['0', '4', '4', '4'],
22613abd6f3SGreg Roach		'DZS'     => ['0', '4', '4', '4'],
22713abd6f3SGreg Roach		'E'       => ['1', '0', '', ''],
22813abd6f3SGreg Roach		'È'       => ['1', '0', '', ''],
22913abd6f3SGreg Roach		'É'       => ['1', '0', '', ''],
23013abd6f3SGreg Roach		'Ê'       => ['1', '0', '', ''],
23113abd6f3SGreg Roach		'Ë'       => ['1', '0', '', ''],
23213abd6f3SGreg Roach		'Ĕ'       => ['1', '0', '', ''],
23313abd6f3SGreg Roach		'Ė'       => ['1', '0', '', ''],
23413abd6f3SGreg Roach		'Ę'       => ['1', '', '', '6', '', '', ''],
23513abd6f3SGreg Roach		'Ẹ'       => ['1', '0', '', ''],
23613abd6f3SGreg Roach		'Ẻ'       => ['1', '0', '', ''],
23713abd6f3SGreg Roach		'Ẽ'       => ['1', '0', '', ''],
23813abd6f3SGreg Roach		'Ế'       => ['1', '0', '', ''],
23913abd6f3SGreg Roach		'Ề'       => ['1', '0', '', ''],
24013abd6f3SGreg Roach		'Ể'       => ['1', '0', '', ''],
24113abd6f3SGreg Roach		'Ễ'       => ['1', '0', '', ''],
24213abd6f3SGreg Roach		'Ệ'       => ['1', '0', '', ''],
24313abd6f3SGreg Roach		'EAU'     => ['1', '0', '', ''],
24413abd6f3SGreg Roach		'EI'      => ['1', '0', '1', ''],
24513abd6f3SGreg Roach		'EJ'      => ['1', '0', '1', ''],
24613abd6f3SGreg Roach		'EU'      => ['1', '1', '1', ''],
24713abd6f3SGreg Roach		'EY'      => ['1', '0', '1', ''],
24813abd6f3SGreg Roach		'F'       => ['0', '7', '7', '7'],
24913abd6f3SGreg Roach		'FB'      => ['0', '7', '7', '7'],
25013abd6f3SGreg Roach		'G'       => ['0', '5', '5', '5', '34', '4', '4'],
25113abd6f3SGreg Roach		'Ğ'       => ['0', '', '', ''],
25213abd6f3SGreg Roach		'GGY'     => ['0', '5', '5', '5'],
25313abd6f3SGreg Roach		'GY'      => ['0', '5', '5', '5'],
25413abd6f3SGreg Roach		'H'       => ['0', '5', '5', '', '5', '5', '5'],
25513abd6f3SGreg Roach		'I'       => ['1', '0', '', ''],
25613abd6f3SGreg Roach		'Ì'       => ['1', '0', '', ''],
25713abd6f3SGreg Roach		'Í'       => ['1', '0', '', ''],
25813abd6f3SGreg Roach		'Î'       => ['1', '0', '', ''],
25913abd6f3SGreg Roach		'Ï'       => ['1', '0', '', ''],
26013abd6f3SGreg Roach		'Ĩ'       => ['1', '0', '', ''],
26113abd6f3SGreg Roach		'Į'       => ['1', '0', '', ''],
26213abd6f3SGreg Roach		'İ'       => ['1', '0', '', ''],
26313abd6f3SGreg Roach		'Ỉ'       => ['1', '0', '', ''],
26413abd6f3SGreg Roach		'Ị'       => ['1', '0', '', ''],
26513abd6f3SGreg Roach		'IA'      => ['1', '1', '', ''],
26613abd6f3SGreg Roach		'IE'      => ['1', '1', '', ''],
26713abd6f3SGreg Roach		'IO'      => ['1', '1', '', ''],
26813abd6f3SGreg Roach		'IU'      => ['1', '1', '', ''],
26913abd6f3SGreg Roach		'J'       => ['0', '1', '', '', '4', '4', '4', '5', '5', ''],
27013abd6f3SGreg Roach		'K'       => ['0', '5', '5', '5'],
27113abd6f3SGreg Roach		'KH'      => ['0', '5', '5', '5'],
27213abd6f3SGreg Roach		'KS'      => ['0', '5', '54', '54'],
27313abd6f3SGreg Roach		'L'       => ['0', '8', '8', '8'],
27413abd6f3SGreg Roach		'Ľ'       => ['0', '8', '8', '8'],
27513abd6f3SGreg Roach		'Ĺ'       => ['0', '8', '8', '8'],
27613abd6f3SGreg Roach		'Ł'       => ['0', '7', '7', '7', '8', '8', '8'],
27713abd6f3SGreg Roach		'LL'      => ['0', '8', '8', '8', '58', '8', '8', '1', '8', '8'],
27813abd6f3SGreg Roach		'LLY'     => ['0', '8', '8', '8', '1', '8', '8'],
27913abd6f3SGreg Roach		'LY'      => ['0', '8', '8', '8', '1', '8', '8'],
28013abd6f3SGreg Roach		'M'       => ['0', '6', '6', '6'],
28113abd6f3SGreg Roach		'MĔ'      => ['0', '66', '66', '66'],
28213abd6f3SGreg Roach		'MN'      => ['0', '66', '66', '66'],
28313abd6f3SGreg Roach		'N'       => ['0', '6', '6', '6'],
28413abd6f3SGreg Roach		'Ń'       => ['0', '6', '6', '6'],
28513abd6f3SGreg Roach		'Ň'       => ['0', '6', '6', '6'],
28613abd6f3SGreg Roach		'Ñ'       => ['0', '6', '6', '6'],
28713abd6f3SGreg Roach		'NM'      => ['0', '66', '66', '66'],
28813abd6f3SGreg Roach		'O'       => ['1', '0', '', ''],
28913abd6f3SGreg Roach		'Ò'       => ['1', '0', '', ''],
29013abd6f3SGreg Roach		'Ó'       => ['1', '0', '', ''],
29113abd6f3SGreg Roach		'Ô'       => ['1', '0', '', ''],
29213abd6f3SGreg Roach		'Õ'       => ['1', '0', '', ''],
29313abd6f3SGreg Roach		'Ö'       => ['1', '0', '', ''],
29413abd6f3SGreg Roach		'Ø'       => ['1', '0', '', ''],
29513abd6f3SGreg Roach		'Ő'       => ['1', '0', '', ''],
29613abd6f3SGreg Roach		'Œ'       => ['1', '0', '', ''],
29713abd6f3SGreg Roach		'Ơ'       => ['1', '0', '', ''],
29813abd6f3SGreg Roach		'Ọ'       => ['1', '0', '', ''],
29913abd6f3SGreg Roach		'Ỏ'       => ['1', '0', '', ''],
30013abd6f3SGreg Roach		'Ố'       => ['1', '0', '', ''],
30113abd6f3SGreg Roach		'Ồ'       => ['1', '0', '', ''],
30213abd6f3SGreg Roach		'Ổ'       => ['1', '0', '', ''],
30313abd6f3SGreg Roach		'Ỗ'       => ['1', '0', '', ''],
30413abd6f3SGreg Roach		'Ộ'       => ['1', '0', '', ''],
30513abd6f3SGreg Roach		'Ớ'       => ['1', '0', '', ''],
30613abd6f3SGreg Roach		'Ờ'       => ['1', '0', '', ''],
30713abd6f3SGreg Roach		'Ở'       => ['1', '0', '', ''],
30813abd6f3SGreg Roach		'Ỡ'       => ['1', '0', '', ''],
30913abd6f3SGreg Roach		'Ợ'       => ['1', '0', '', ''],
31013abd6f3SGreg Roach		'OE'      => ['1', '0', '', ''],
31113abd6f3SGreg Roach		'OI'      => ['1', '0', '1', ''],
31213abd6f3SGreg Roach		'OJ'      => ['1', '0', '1', ''],
31313abd6f3SGreg Roach		'OU'      => ['1', '0', '', ''],
31413abd6f3SGreg Roach		'OY'      => ['1', '0', '1', ''],
31513abd6f3SGreg Roach		'P'       => ['0', '7', '7', '7'],
31613abd6f3SGreg Roach		'PF'      => ['0', '7', '7', '7'],
31713abd6f3SGreg Roach		'PH'      => ['0', '7', '7', '7'],
31813abd6f3SGreg Roach		'Q'       => ['0', '5', '5', '5'],
31913abd6f3SGreg Roach		'R'       => ['0', '9', '9', '9'],
32013abd6f3SGreg Roach		'Ř'       => ['0', '4', '4', '4'],
32113abd6f3SGreg Roach		'RS'      => ['0', '4', '4', '4', '94', '94', '94'],
32213abd6f3SGreg Roach		'RZ'      => ['0', '4', '4', '4', '94', '94', '94'],
32313abd6f3SGreg Roach		'S'       => ['0', '4', '4', '4'],
32413abd6f3SGreg Roach		'Ś'       => ['0', '4', '4', '4'],
32513abd6f3SGreg Roach		'Š'       => ['0', '4', '4', '4'],
32613abd6f3SGreg Roach		'Ş'       => ['0', '4', '4', '4'],
32713abd6f3SGreg Roach		'SC'      => ['0', '2', '4', '4'],
32813abd6f3SGreg Roach		'ŠČ'      => ['0', '2', '4', '4'],
32913abd6f3SGreg Roach		'SCH'     => ['0', '4', '4', '4'],
33013abd6f3SGreg Roach		'SCHD'    => ['0', '2', '43', '43'],
33113abd6f3SGreg Roach		'SCHT'    => ['0', '2', '43', '43'],
33213abd6f3SGreg Roach		'SCHTCH'  => ['0', '2', '4', '4'],
33313abd6f3SGreg Roach		'SCHTSCH' => ['0', '2', '4', '4'],
33413abd6f3SGreg Roach		'SCHTSH'  => ['0', '2', '4', '4'],
33513abd6f3SGreg Roach		'SD'      => ['0', '2', '43', '43'],
33613abd6f3SGreg Roach		'SH'      => ['0', '4', '4', '4'],
33713abd6f3SGreg Roach		'SHCH'    => ['0', '2', '4', '4'],
33813abd6f3SGreg Roach		'SHD'     => ['0', '2', '43', '43'],
33913abd6f3SGreg Roach		'SHT'     => ['0', '2', '43', '43'],
34013abd6f3SGreg Roach		'SHTCH'   => ['0', '2', '4', '4'],
34113abd6f3SGreg Roach		'SHTSH'   => ['0', '2', '4', '4'],
34213abd6f3SGreg Roach		'ß'       => ['0', '', '4', '4'],
34313abd6f3SGreg Roach		'ST'      => ['0', '2', '43', '43'],
34413abd6f3SGreg Roach		'STCH'    => ['0', '2', '4', '4'],
34513abd6f3SGreg Roach		'STRS'    => ['0', '2', '4', '4'],
34613abd6f3SGreg Roach		'STRZ'    => ['0', '2', '4', '4'],
34713abd6f3SGreg Roach		'STSCH'   => ['0', '2', '4', '4'],
34813abd6f3SGreg Roach		'STSH'    => ['0', '2', '4', '4'],
34913abd6f3SGreg Roach		'SSZ'     => ['0', '4', '4', '4'],
35013abd6f3SGreg Roach		'SZ'      => ['0', '4', '4', '4'],
35113abd6f3SGreg Roach		'SZCS'    => ['0', '2', '4', '4'],
35213abd6f3SGreg Roach		'SZCZ'    => ['0', '2', '4', '4'],
35313abd6f3SGreg Roach		'SZD'     => ['0', '2', '43', '43'],
35413abd6f3SGreg Roach		'SZT'     => ['0', '2', '43', '43'],
35513abd6f3SGreg Roach		'T'       => ['0', '3', '3', '3'],
35613abd6f3SGreg Roach		'Ť'       => ['0', '3', '3', '3'],
35713abd6f3SGreg Roach		'Ţ'       => ['0', '3', '3', '3', '4', '4', '4'],
35813abd6f3SGreg Roach		'TC'      => ['0', '4', '4', '4'],
35913abd6f3SGreg Roach		'TCH'     => ['0', '4', '4', '4'],
36013abd6f3SGreg Roach		'TH'      => ['0', '3', '3', '3'],
36113abd6f3SGreg Roach		'TRS'     => ['0', '4', '4', '4'],
36213abd6f3SGreg Roach		'TRZ'     => ['0', '4', '4', '4'],
36313abd6f3SGreg Roach		'TS'      => ['0', '4', '4', '4'],
36413abd6f3SGreg Roach		'TSCH'    => ['0', '4', '4', '4'],
36513abd6f3SGreg Roach		'TSH'     => ['0', '4', '4', '4'],
36613abd6f3SGreg Roach		'TSZ'     => ['0', '4', '4', '4'],
36713abd6f3SGreg Roach		'TTCH'    => ['0', '4', '4', '4'],
36813abd6f3SGreg Roach		'TTS'     => ['0', '4', '4', '4'],
36913abd6f3SGreg Roach		'TTSCH'   => ['0', '4', '4', '4'],
37013abd6f3SGreg Roach		'TTSZ'    => ['0', '4', '4', '4'],
37113abd6f3SGreg Roach		'TTZ'     => ['0', '4', '4', '4'],
37213abd6f3SGreg Roach		'TZ'      => ['0', '4', '4', '4'],
37313abd6f3SGreg Roach		'TZS'     => ['0', '4', '4', '4'],
37413abd6f3SGreg Roach		'U'       => ['1', '0', '', ''],
37513abd6f3SGreg Roach		'Ù'       => ['1', '0', '', ''],
37613abd6f3SGreg Roach		'Ú'       => ['1', '0', '', ''],
37713abd6f3SGreg Roach		'Û'       => ['1', '0', '', ''],
37813abd6f3SGreg Roach		'Ü'       => ['1', '0', '', ''],
37913abd6f3SGreg Roach		'Ũ'       => ['1', '0', '', ''],
38013abd6f3SGreg Roach		'Ū'       => ['1', '0', '', ''],
38113abd6f3SGreg Roach		'Ů'       => ['1', '0', '', ''],
38213abd6f3SGreg Roach		'Ű'       => ['1', '0', '', ''],
38313abd6f3SGreg Roach		'Ų'       => ['1', '0', '', ''],
38413abd6f3SGreg Roach		'Ư'       => ['1', '0', '', ''],
38513abd6f3SGreg Roach		'Ụ'       => ['1', '0', '', ''],
38613abd6f3SGreg Roach		'Ủ'       => ['1', '0', '', ''],
38713abd6f3SGreg Roach		'Ứ'       => ['1', '0', '', ''],
38813abd6f3SGreg Roach		'Ừ'       => ['1', '0', '', ''],
38913abd6f3SGreg Roach		'Ử'       => ['1', '0', '', ''],
39013abd6f3SGreg Roach		'Ữ'       => ['1', '0', '', ''],
39113abd6f3SGreg Roach		'Ự'       => ['1', '0', '', ''],
39213abd6f3SGreg Roach		'UE'      => ['1', '0', '', ''],
39313abd6f3SGreg Roach		'UI'      => ['1', '0', '1', ''],
39413abd6f3SGreg Roach		'UJ'      => ['1', '0', '1', ''],
39513abd6f3SGreg Roach		'UY'      => ['1', '0', '1', ''],
39613abd6f3SGreg Roach		'UW'      => ['1', '0', '1', '', '0', '7', '7'],
39713abd6f3SGreg Roach		'V'       => ['0', '7', '7', '7'],
39813abd6f3SGreg Roach		'W'       => ['0', '7', '7', '7'],
39913abd6f3SGreg Roach		'X'       => ['0', '5', '54', '54'],
40013abd6f3SGreg Roach		'Y'       => ['1', '1', '', ''],
40113abd6f3SGreg Roach		'Ý'       => ['1', '1', '', ''],
40213abd6f3SGreg Roach		'Ỳ'       => ['1', '1', '', ''],
40313abd6f3SGreg Roach		'Ỵ'       => ['1', '1', '', ''],
40413abd6f3SGreg Roach		'Ỷ'       => ['1', '1', '', ''],
40513abd6f3SGreg Roach		'Ỹ'       => ['1', '1', '', ''],
40613abd6f3SGreg Roach		'Z'       => ['0', '4', '4', '4'],
40713abd6f3SGreg Roach		'Ź'       => ['0', '4', '4', '4'],
40813abd6f3SGreg Roach		'Ż'       => ['0', '4', '4', '4'],
40913abd6f3SGreg Roach		'Ž'       => ['0', '4', '4', '4'],
41013abd6f3SGreg Roach		'ZD'      => ['0', '2', '43', '43'],
41113abd6f3SGreg Roach		'ZDZ'     => ['0', '2', '4', '4'],
41213abd6f3SGreg Roach		'ZDZH'    => ['0', '2', '4', '4'],
41313abd6f3SGreg Roach		'ZH'      => ['0', '4', '4', '4'],
41413abd6f3SGreg Roach		'ZHD'     => ['0', '2', '43', '43'],
41513abd6f3SGreg Roach		'ZHDZH'   => ['0', '2', '4', '4'],
41613abd6f3SGreg Roach		'ZS'      => ['0', '4', '4', '4'],
41713abd6f3SGreg Roach		'ZSCH'    => ['0', '4', '4', '4'],
41813abd6f3SGreg Roach		'ZSH'     => ['0', '4', '4', '4'],
41913abd6f3SGreg Roach		'ZZS'     => ['0', '4', '4', '4'],
420a25f0a04SGreg Roach		// Cyrillic alphabet
42113abd6f3SGreg Roach		'А'   => ['1', '0', '', ''],
42213abd6f3SGreg Roach		'Б'   => ['0', '7', '7', '7'],
42313abd6f3SGreg Roach		'В'   => ['0', '7', '7', '7'],
42413abd6f3SGreg Roach		'Г'   => ['0', '5', '5', '5'],
42513abd6f3SGreg Roach		'Д'   => ['0', '3', '3', '3'],
42613abd6f3SGreg Roach		'ДЗ'  => ['0', '4', '4', '4'],
42713abd6f3SGreg Roach		'Е'   => ['1', '0', '', ''],
42813abd6f3SGreg Roach		'Ё'   => ['1', '0', '', ''],
42913abd6f3SGreg Roach		'Ж'   => ['0', '4', '4', '4'],
43013abd6f3SGreg Roach		'З'   => ['0', '4', '4', '4'],
43113abd6f3SGreg Roach		'И'   => ['1', '0', '', ''],
43213abd6f3SGreg Roach		'Й'   => ['1', '1', '', '', '4', '4', '4'],
43313abd6f3SGreg Roach		'К'   => ['0', '5', '5', '5'],
43413abd6f3SGreg Roach		'Л'   => ['0', '8', '8', '8'],
43513abd6f3SGreg Roach		'М'   => ['0', '6', '6', '6'],
43613abd6f3SGreg Roach		'Н'   => ['0', '6', '6', '6'],
43713abd6f3SGreg Roach		'О'   => ['1', '0', '', ''],
43813abd6f3SGreg Roach		'П'   => ['0', '7', '7', '7'],
43913abd6f3SGreg Roach		'Р'   => ['0', '9', '9', '9'],
44013abd6f3SGreg Roach		'РЖ'  => ['0', '4', '4', '4'],
44113abd6f3SGreg Roach		'С'   => ['0', '4', '4', '4'],
44213abd6f3SGreg Roach		'Т'   => ['0', '3', '3', '3'],
44313abd6f3SGreg Roach		'У'   => ['1', '0', '', ''],
44413abd6f3SGreg Roach		'Ф'   => ['0', '7', '7', '7'],
44513abd6f3SGreg Roach		'Х'   => ['0', '5', '5', '5'],
44613abd6f3SGreg Roach		'Ц'   => ['0', '4', '4', '4'],
44713abd6f3SGreg Roach		'Ч'   => ['0', '4', '4', '4'],
44813abd6f3SGreg Roach		'Ш'   => ['0', '4', '4', '4'],
44913abd6f3SGreg Roach		'Щ'   => ['0', '2', '4', '4'],
45013abd6f3SGreg Roach		'Ъ'   => ['0', '', '', ''],
45113abd6f3SGreg Roach		'Ы'   => ['0', '1', '', ''],
45213abd6f3SGreg Roach		'Ь'   => ['0', '', '', ''],
45313abd6f3SGreg Roach		'Э'   => ['1', '0', '', ''],
45413abd6f3SGreg Roach		'Ю'   => ['0', '1', '', ''],
45513abd6f3SGreg Roach		'Я'   => ['0', '1', '', ''],
456a25f0a04SGreg Roach		// Greek alphabet
45713abd6f3SGreg Roach		'Α'   => ['1', '0', '', ''],
45813abd6f3SGreg Roach		'Ά'   => ['1', '0', '', ''],
45913abd6f3SGreg Roach		'ΑΙ'  => ['1', '0', '1', ''],
46013abd6f3SGreg Roach		'ΑΥ'  => ['1', '0', '1', ''],
46113abd6f3SGreg Roach		'Β'   => ['0', '7', '7', '7'],
46213abd6f3SGreg Roach		'Γ'   => ['0', '5', '5', '5'],
46313abd6f3SGreg Roach		'Δ'   => ['0', '3', '3', '3'],
46413abd6f3SGreg Roach		'Ε'   => ['1', '0', '', ''],
46513abd6f3SGreg Roach		'Έ'   => ['1', '0', '', ''],
46613abd6f3SGreg Roach		'ΕΙ'  => ['1', '0', '1', ''],
46713abd6f3SGreg Roach		'ΕΥ'  => ['1', '1', '1', ''],
46813abd6f3SGreg Roach		'Ζ'   => ['0', '4', '4', '4'],
46913abd6f3SGreg Roach		'Η'   => ['1', '0', '', ''],
47013abd6f3SGreg Roach		'Ή'   => ['1', '0', '', ''],
47113abd6f3SGreg Roach		'Θ'   => ['0', '3', '3', '3'],
47213abd6f3SGreg Roach		'Ι'   => ['1', '0', '', ''],
47313abd6f3SGreg Roach		'Ί'   => ['1', '0', '', ''],
47413abd6f3SGreg Roach		'Ϊ'   => ['1', '0', '', ''],
47513abd6f3SGreg Roach		'ΐ'   => ['1', '0', '', ''],
47613abd6f3SGreg Roach		'Κ'   => ['0', '5', '5', '5'],
47713abd6f3SGreg Roach		'Λ'   => ['0', '8', '8', '8'],
47813abd6f3SGreg Roach		'Μ'   => ['0', '6', '6', '6'],
47913abd6f3SGreg Roach		'ΜΠ'  => ['0', '7', '7', '7'],
48013abd6f3SGreg Roach		'Ν'   => ['0', '6', '6', '6'],
48113abd6f3SGreg Roach		'ΝΤ'  => ['0', '3', '3', '3'],
48213abd6f3SGreg Roach		'Ξ'   => ['0', '5', '54', '54'],
48313abd6f3SGreg Roach		'Ο'   => ['1', '0', '', ''],
48413abd6f3SGreg Roach		'Ό'   => ['1', '0', '', ''],
48513abd6f3SGreg Roach		'ΟΙ'  => ['1', '0', '1', ''],
48613abd6f3SGreg Roach		'ΟΥ'  => ['1', '0', '1', ''],
48713abd6f3SGreg Roach		'Π'   => ['0', '7', '7', '7'],
48813abd6f3SGreg Roach		'Ρ'   => ['0', '9', '9', '9'],
48913abd6f3SGreg Roach		'Σ'   => ['0', '4', '4', '4'],
49013abd6f3SGreg Roach		'ς'   => ['0', '', '', '4'],
49113abd6f3SGreg Roach		'Τ'   => ['0', '3', '3', '3'],
49213abd6f3SGreg Roach		'ΤΖ'  => ['0', '4', '4', '4'],
49313abd6f3SGreg Roach		'ΤΣ'  => ['0', '4', '4', '4'],
49413abd6f3SGreg Roach		'Υ'   => ['1', '1', '', ''],
49513abd6f3SGreg Roach		'Ύ'   => ['1', '1', '', ''],
49613abd6f3SGreg Roach		'Ϋ'   => ['1', '1', '', ''],
49713abd6f3SGreg Roach		'ΰ'   => ['1', '1', '', ''],
49813abd6f3SGreg Roach		'ΥΚ'  => ['1', '5', '5', '5'],
49913abd6f3SGreg Roach		'ΥΥ'  => ['1', '65', '65', '65'],
50013abd6f3SGreg Roach		'Φ'   => ['0', '7', '7', '7'],
50113abd6f3SGreg Roach		'Χ'   => ['0', '5', '5', '5'],
50213abd6f3SGreg Roach		'Ψ'   => ['0', '7', '7', '7'],
50313abd6f3SGreg Roach		'Ω'   => ['1', '0', '', ''],
50413abd6f3SGreg Roach		'Ώ'   => ['1', '0', '', ''],
505a25f0a04SGreg Roach		// Hebrew alphabet
50613abd6f3SGreg Roach		'א'     => ['1', '0', '', ''],
50713abd6f3SGreg Roach		'או'    => ['1', '0', '7', ''],
50813abd6f3SGreg Roach		'אג'    => ['1', '4', '4', '4', '5', '5', '5', '34', '34', '34'],
50913abd6f3SGreg Roach		'בב'    => ['0', '7', '7', '7', '77', '77', '77'],
51013abd6f3SGreg Roach		'ב'     => ['0', '7', '7', '7'],
51113abd6f3SGreg Roach		'גג'    => ['0', '4', '4', '4', '5', '5', '5', '45', '45', '45', '55', '55', '55', '54', '54', '54'],
51213abd6f3SGreg Roach		'גד'    => ['0', '43', '43', '43', '53', '53', '53'],
51313abd6f3SGreg Roach		'גה'    => ['0', '45', '45', '45', '55', '55', '55'],
51413abd6f3SGreg Roach		'גז'    => ['0', '44', '44', '44', '45', '45', '45'],
51513abd6f3SGreg Roach		'גח'    => ['0', '45', '45', '45', '55', '55', '55'],
51613abd6f3SGreg Roach		'גכ'    => ['0', '45', '45', '45', '55', '55', '55'],
51713abd6f3SGreg Roach		'גך'    => ['0', '45', '45', '45', '55', '55', '55'],
51813abd6f3SGreg Roach		'גצ'    => ['0', '44', '44', '44', '45', '45', '45'],
51913abd6f3SGreg Roach		'גץ'    => ['0', '44', '44', '44', '45', '45', '45'],
52013abd6f3SGreg Roach		'גק'    => ['0', '45', '45', '45', '54', '54', '54'],
52113abd6f3SGreg Roach		'גש'    => ['0', '44', '44', '44', '54', '54', '54'],
52213abd6f3SGreg Roach		'גת'    => ['0', '43', '43', '43', '53', '53', '53'],
52313abd6f3SGreg Roach		'ג'     => ['0', '4', '4', '4', '5', '5', '5'],
52413abd6f3SGreg Roach		'דז'    => ['0', '4', '4', '4'],
52513abd6f3SGreg Roach		'דד'    => ['0', '3', '3', '3', '33', '33', '33'],
52613abd6f3SGreg Roach		'דט'    => ['0', '33', '33', '33'],
52713abd6f3SGreg Roach		'דש'    => ['0', '4', '4', '4'],
52813abd6f3SGreg Roach		'דצ'    => ['0', '4', '4', '4'],
52913abd6f3SGreg Roach		'דץ'    => ['0', '4', '4', '4'],
53013abd6f3SGreg Roach		'ד'     => ['0', '3', '3', '3'],
53113abd6f3SGreg Roach		'הג'    => ['0', '54', '54', '54', '55', '55', '55'],
53213abd6f3SGreg Roach		'הכ'    => ['0', '55', '55', '55'],
53313abd6f3SGreg Roach		'הח'    => ['0', '55', '55', '55'],
53413abd6f3SGreg Roach		'הק'    => ['0', '55', '55', '55', '5', '5', '5'],
53513abd6f3SGreg Roach		'הה'    => ['0', '5', '5', '', '55', '55', ''],
53613abd6f3SGreg Roach		'ה'     => ['0', '5', '5', ''],
53713abd6f3SGreg Roach		'וי'    => ['1', '', '', '', '7', '7', '7'],
53813abd6f3SGreg Roach		'ו'     => ['1', '7', '7', '7', '7', '', ''],
53913abd6f3SGreg Roach		'וו'    => ['1', '7', '7', '7', '7', '', ''],
54013abd6f3SGreg Roach		'וופ'   => ['1', '7', '7', '7', '77', '77', '77'],
54113abd6f3SGreg Roach		'זש'    => ['0', '4', '4', '4', '44', '44', '44'],
54213abd6f3SGreg Roach		'זדז'   => ['0', '2', '4', '4'],
54313abd6f3SGreg Roach		'ז'     => ['0', '4', '4', '4'],
54413abd6f3SGreg Roach		'זג'    => ['0', '44', '44', '44', '45', '45', '45'],
54513abd6f3SGreg Roach		'זז'    => ['0', '4', '4', '4', '44', '44', '44'],
54613abd6f3SGreg Roach		'זס'    => ['0', '44', '44', '44'],
54713abd6f3SGreg Roach		'זצ'    => ['0', '44', '44', '44'],
54813abd6f3SGreg Roach		'זץ'    => ['0', '44', '44', '44'],
54913abd6f3SGreg Roach		'חג'    => ['0', '54', '54', '54', '53', '53', '53'],
55013abd6f3SGreg Roach		'חח'    => ['0', '5', '5', '5', '55', '55', '55'],
55113abd6f3SGreg Roach		'חק'    => ['0', '55', '55', '55', '5', '5', '5'],
55213abd6f3SGreg Roach		'חכ'    => ['0', '45', '45', '45', '55', '55', '55'],
55313abd6f3SGreg Roach		'חס'    => ['0', '5', '54', '54'],
55413abd6f3SGreg Roach		'חש'    => ['0', '5', '54', '54'],
55513abd6f3SGreg Roach		'ח'     => ['0', '5', '5', '5'],
55613abd6f3SGreg Roach		'טש'    => ['0', '4', '4', '4'],
55713abd6f3SGreg Roach		'טד'    => ['0', '33', '33', '33'],
55813abd6f3SGreg Roach		'טי'    => ['0', '3', '3', '3', '4', '4', '4', '3', '3', '34'],
55913abd6f3SGreg Roach		'טת'    => ['0', '33', '33', '33'],
56013abd6f3SGreg Roach		'טט'    => ['0', '3', '3', '3', '33', '33', '33'],
56113abd6f3SGreg Roach		'ט'     => ['0', '3', '3', '3'],
56213abd6f3SGreg Roach		'י'     => ['1', '1', '', ''],
56313abd6f3SGreg Roach		'יא'    => ['1', '1', '', '', '1', '1', '1'],
56413abd6f3SGreg Roach		'כג'    => ['0', '55', '55', '55', '54', '54', '54'],
56513abd6f3SGreg Roach		'כש'    => ['0', '5', '54', '54'],
56613abd6f3SGreg Roach		'כס'    => ['0', '5', '54', '54'],
56713abd6f3SGreg Roach		'ככ'    => ['0', '5', '5', '5', '55', '55', '55'],
56813abd6f3SGreg Roach		'כך'    => ['0', '5', '5', '5', '55', '55', '55'],
56913abd6f3SGreg Roach		'כ'     => ['0', '5', '5', '5'],
57013abd6f3SGreg Roach		'כח'    => ['0', '55', '55', '55', '5', '5', '5'],
57113abd6f3SGreg Roach		'ך'     => ['0', '', '5', '5'],
57213abd6f3SGreg Roach		'ל'     => ['0', '8', '8', '8'],
57313abd6f3SGreg Roach		'לל'    => ['0', '88', '88', '88', '8', '8', '8'],
57413abd6f3SGreg Roach		'מנ'    => ['0', '66', '66', '66'],
57513abd6f3SGreg Roach		'מן'    => ['0', '66', '66', '66'],
57613abd6f3SGreg Roach		'ממ'    => ['0', '6', '6', '6', '66', '66', '66'],
57713abd6f3SGreg Roach		'מם'    => ['0', '6', '6', '6', '66', '66', '66'],
57813abd6f3SGreg Roach		'מ'     => ['0', '6', '6', '6'],
57913abd6f3SGreg Roach		'ם'     => ['0', '', '6', '6'],
58013abd6f3SGreg Roach		'נמ'    => ['0', '66', '66', '66'],
58113abd6f3SGreg Roach		'נם'    => ['0', '66', '66', '66'],
58213abd6f3SGreg Roach		'ננ'    => ['0', '6', '6', '6', '66', '66', '66'],
58313abd6f3SGreg Roach		'נן'    => ['0', '6', '6', '6', '66', '66', '66'],
58413abd6f3SGreg Roach		'נ'     => ['0', '6', '6', '6'],
58513abd6f3SGreg Roach		'ן'     => ['0', '', '6', '6'],
58613abd6f3SGreg Roach		'סתש'   => ['0', '2', '4', '4'],
58713abd6f3SGreg Roach		'סתז'   => ['0', '2', '4', '4'],
58813abd6f3SGreg Roach		'סטז'   => ['0', '2', '4', '4'],
58913abd6f3SGreg Roach		'סטש'   => ['0', '2', '4', '4'],
59013abd6f3SGreg Roach		'סצד'   => ['0', '2', '4', '4'],
59113abd6f3SGreg Roach		'סט'    => ['0', '2', '4', '4', '43', '43', '43'],
59213abd6f3SGreg Roach		'סת'    => ['0', '2', '4', '4', '43', '43', '43'],
59313abd6f3SGreg Roach		'סג'    => ['0', '44', '44', '44', '4', '4', '4'],
59413abd6f3SGreg Roach		'סס'    => ['0', '4', '4', '4', '44', '44', '44'],
59513abd6f3SGreg Roach		'סצ'    => ['0', '44', '44', '44'],
59613abd6f3SGreg Roach		'סץ'    => ['0', '44', '44', '44'],
59713abd6f3SGreg Roach		'סז'    => ['0', '44', '44', '44'],
59813abd6f3SGreg Roach		'סש'    => ['0', '44', '44', '44'],
59913abd6f3SGreg Roach		'ס'     => ['0', '4', '4', '4'],
60013abd6f3SGreg Roach		'ע'     => ['1', '0', '', ''],
60113abd6f3SGreg Roach		'פב'    => ['0', '7', '7', '7', '77', '77', '77'],
60213abd6f3SGreg Roach		'פוו'   => ['0', '7', '7', '7', '77', '77', '77'],
60313abd6f3SGreg Roach		'פפ'    => ['0', '7', '7', '7', '77', '77', '77'],
60413abd6f3SGreg Roach		'פף'    => ['0', '7', '7', '7', '77', '77', '77'],
60513abd6f3SGreg Roach		'פ'     => ['0', '7', '7', '7'],
60613abd6f3SGreg Roach		'ף'     => ['0', '', '7', '7'],
60713abd6f3SGreg Roach		'צג'    => ['0', '44', '44', '44', '45', '45', '45'],
60813abd6f3SGreg Roach		'צז'    => ['0', '44', '44', '44'],
60913abd6f3SGreg Roach		'צס'    => ['0', '44', '44', '44'],
61013abd6f3SGreg Roach		'צצ'    => ['0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54', '45', '45', '45'],
61113abd6f3SGreg Roach		'צץ'    => ['0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54'],
61213abd6f3SGreg Roach		'צש'    => ['0', '44', '44', '44', '4', '4', '4', '5', '5', '5'],
61313abd6f3SGreg Roach		'צ'     => ['0', '4', '4', '4', '5', '5', '5'],
61413abd6f3SGreg Roach		'ץ'     => ['0', '', '4', '4'],
61513abd6f3SGreg Roach		'קה'    => ['0', '55', '55', '5'],
61613abd6f3SGreg Roach		'קס'    => ['0', '5', '54', '54'],
61713abd6f3SGreg Roach		'קש'    => ['0', '5', '54', '54'],
61813abd6f3SGreg Roach		'קק'    => ['0', '5', '5', '5', '55', '55', '55'],
61913abd6f3SGreg Roach		'קח'    => ['0', '55', '55', '55'],
62013abd6f3SGreg Roach		'קכ'    => ['0', '55', '55', '55'],
62113abd6f3SGreg Roach		'קך'    => ['0', '55', '55', '55'],
62213abd6f3SGreg Roach		'קג'    => ['0', '55', '55', '55', '54', '54', '54'],
62313abd6f3SGreg Roach		'ק'     => ['0', '5', '5', '5'],
62413abd6f3SGreg Roach		'רר'    => ['0', '99', '99', '99', '9', '9', '9'],
62513abd6f3SGreg Roach		'ר'     => ['0', '9', '9', '9'],
62613abd6f3SGreg Roach		'שטז'   => ['0', '2', '4', '4'],
62713abd6f3SGreg Roach		'שתש'   => ['0', '2', '4', '4'],
62813abd6f3SGreg Roach		'שתז'   => ['0', '2', '4', '4'],
62913abd6f3SGreg Roach		'שטש'   => ['0', '2', '4', '4'],
63013abd6f3SGreg Roach		'שד'    => ['0', '2', '43', '43'],
63113abd6f3SGreg Roach		'שז'    => ['0', '44', '44', '44'],
63213abd6f3SGreg Roach		'שס'    => ['0', '44', '44', '44'],
63313abd6f3SGreg Roach		'שת'    => ['0', '2', '43', '43'],
63413abd6f3SGreg Roach		'שג'    => ['0', '4', '4', '4', '44', '44', '44', '4', '43', '43'],
63513abd6f3SGreg Roach		'שט'    => ['0', '2', '43', '43', '44', '44', '44'],
63613abd6f3SGreg Roach		'שצ'    => ['0', '44', '44', '44', '45', '45', '45'],
63713abd6f3SGreg Roach		'שץ'    => ['0', '44', '', '44', '45', '', '45'],
63813abd6f3SGreg Roach		'שש'    => ['0', '4', '4', '4', '44', '44', '44'],
63913abd6f3SGreg Roach		'ש'     => ['0', '4', '4', '4'],
64013abd6f3SGreg Roach		'תג'    => ['0', '34', '34', '34'],
64113abd6f3SGreg Roach		'תז'    => ['0', '34', '34', '34'],
64213abd6f3SGreg Roach		'תש'    => ['0', '4', '4', '4'],
64313abd6f3SGreg Roach		'תת'    => ['0', '3', '3', '3', '4', '4', '4', '33', '33', '33', '44', '44', '44', '34', '34', '34', '43', '43', '43'],
64413abd6f3SGreg Roach		'ת'     => ['0', '3', '3', '3', '4', '4', '4'],
645a25f0a04SGreg Roach		// Arabic alphabet
64613abd6f3SGreg Roach		'ا'   => ['1', '0', '', ''],
64713abd6f3SGreg Roach		'ب'   => ['0', '7', '7', '7'],
64813abd6f3SGreg Roach		'ت'   => ['0', '3', '3', '3'],
64913abd6f3SGreg Roach		'ث'   => ['0', '3', '3', '3'],
65013abd6f3SGreg Roach		'ج'   => ['0', '4', '4', '4'],
65113abd6f3SGreg Roach		'ح'   => ['0', '5', '5', '5'],
65213abd6f3SGreg Roach		'خ'   => ['0', '5', '5', '5'],
65313abd6f3SGreg Roach		'د'   => ['0', '3', '3', '3'],
65413abd6f3SGreg Roach		'ذ'   => ['0', '3', '3', '3'],
65513abd6f3SGreg Roach		'ر'   => ['0', '9', '9', '9'],
65613abd6f3SGreg Roach		'ز'   => ['0', '4', '4', '4'],
65713abd6f3SGreg Roach		'س'   => ['0', '4', '4', '4'],
65813abd6f3SGreg Roach		'ش'   => ['0', '4', '4', '4'],
65913abd6f3SGreg Roach		'ص'   => ['0', '4', '4', '4'],
66013abd6f3SGreg Roach		'ض'   => ['0', '3', '3', '3'],
66113abd6f3SGreg Roach		'ط'   => ['0', '3', '3', '3'],
66213abd6f3SGreg Roach		'ظ'   => ['0', '4', '4', '4'],
66313abd6f3SGreg Roach		'ع'   => ['1', '0', '', ''],
66413abd6f3SGreg Roach		'غ'   => ['0', '0', '', ''],
66513abd6f3SGreg Roach		'ف'   => ['0', '7', '7', '7'],
66613abd6f3SGreg Roach		'ق'   => ['0', '5', '5', '5'],
66713abd6f3SGreg Roach		'ك'   => ['0', '5', '5', '5'],
66813abd6f3SGreg Roach		'ل'   => ['0', '8', '8', '8'],
66913abd6f3SGreg Roach		'لا'  => ['0', '8', '8', '8'],
67013abd6f3SGreg Roach		'م'   => ['0', '6', '6', '6'],
67113abd6f3SGreg Roach		'ن'   => ['0', '6', '6', '6'],
67213abd6f3SGreg Roach		'هن'  => ['0', '66', '66', '66'],
67313abd6f3SGreg Roach		'ه'   => ['0', '5', '5', ''],
67413abd6f3SGreg Roach		'و'   => ['1', '', '', '', '7', '', ''],
67513abd6f3SGreg Roach		'ي'   => ['0', '1', '', ''],
67613abd6f3SGreg Roach		'آ'   => ['0', '1', '', ''],
67713abd6f3SGreg Roach		'ة'   => ['0', '', '', '3'],
67813abd6f3SGreg Roach		'ی'   => ['0', '1', '', ''],
67913abd6f3SGreg Roach		'ى'   => ['1', '1', '', ''],
68013abd6f3SGreg Roach	];
681a25f0a04SGreg Roach
682a25f0a04SGreg Roach	/**
68376692c8bSGreg Roach	 * Calculate the Daitch-Mokotoff soundex for a word.
68476692c8bSGreg Roach	 *
685a25f0a04SGreg Roach	 * @param string $name
686a25f0a04SGreg Roach	 *
687a25f0a04SGreg Roach	 * @return string[] List of possible DM codes for the word.
688a25f0a04SGreg Roach	 */
689a25f0a04SGreg Roach	private static function daitchMokotoffWord($name) {
690a25f0a04SGreg Roach		// Apply special transformation rules to the input string
691a25f0a04SGreg Roach		$name = I18N::strtoupper($name);
692a25f0a04SGreg Roach		foreach (self::$transformNameTable as $transformRule) {
693a25f0a04SGreg Roach			$name = str_replace($transformRule[0], $transformRule[1], $name);
694a25f0a04SGreg Roach		}
695a25f0a04SGreg Roach
696a25f0a04SGreg Roach		// Initialize
697a25f0a04SGreg Roach		$name_script = I18N::textScript($name);
698a25f0a04SGreg Roach		$noVowels    = ($name_script == 'Hebr' || $name_script == 'Arab');
699a25f0a04SGreg Roach
700a25f0a04SGreg Roach		$lastPos         = strlen($name) - 1;
701a25f0a04SGreg Roach		$currPos         = 0;
702a25f0a04SGreg Roach		$state           = 1; // 1: start of input string, 2: before vowel, 3: other
70313abd6f3SGreg Roach		$result          = []; // accumulate complete 6-digit D-M codes here
70413abd6f3SGreg Roach		$partialResult   = []; // accumulate incomplete D-M codes here
70513abd6f3SGreg Roach		$partialResult[] = ['!']; // initialize 1st partial result  ('!' stops "duplicate sound" check)
706a25f0a04SGreg Roach
707a25f0a04SGreg Roach		// Loop through the input string.
708a25f0a04SGreg Roach		// Stop when the string is exhausted or when no more partial results remain
709a25f0a04SGreg Roach		while (count($partialResult) !== 0 && $currPos <= $lastPos) {
710a25f0a04SGreg Roach			// Find the DM coding table entry for the chunk at the current position
711a25f0a04SGreg Roach			$thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
712a25f0a04SGreg Roach			while ($thisEntry != '') {
713a25f0a04SGreg Roach				if (isset(self::$dmsounds[$thisEntry])) {
714a25f0a04SGreg Roach					break;
715a25f0a04SGreg Roach				}
716a25f0a04SGreg Roach				$thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk
717a25f0a04SGreg Roach			}
718a25f0a04SGreg Roach			if ($thisEntry === '') {
719a25f0a04SGreg Roach				$currPos++; // Not in table: advance pointer to next byte
720a25f0a04SGreg Roach				continue; // and try again
721a25f0a04SGreg Roach			}
722a25f0a04SGreg Roach
723a25f0a04SGreg Roach			$soundTableEntry = self::$dmsounds[$thisEntry];
724a25f0a04SGreg Roach			$workingResult   = $partialResult;
72513abd6f3SGreg Roach			$partialResult   = [];
726a25f0a04SGreg Roach			$currPos += strlen($thisEntry);
727a25f0a04SGreg Roach
728a25f0a04SGreg Roach			// Not at beginning of input string
729a25f0a04SGreg Roach			if ($state != 1) {
730a25f0a04SGreg Roach				if ($currPos <= $lastPos) {
731a25f0a04SGreg Roach					// Determine whether the next chunk is a vowel
732a25f0a04SGreg Roach					$nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
733a25f0a04SGreg Roach					while ($nextEntry != '') {
734a25f0a04SGreg Roach						if (isset(self::$dmsounds[$nextEntry])) {
735a25f0a04SGreg Roach							break;
736a25f0a04SGreg Roach						}
737a25f0a04SGreg Roach						$nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk
738a25f0a04SGreg Roach					}
739a25f0a04SGreg Roach				} else {
740a25f0a04SGreg Roach					$nextEntry = '';
741a25f0a04SGreg Roach				}
742a25f0a04SGreg Roach				if ($nextEntry != '' && self::$dmsounds[$nextEntry][0] != '0') {
743a25f0a04SGreg Roach					$state = 2;
744a25f0a04SGreg Roach				} else {
745a25f0a04SGreg Roach					// Next chunk is a vowel
746a25f0a04SGreg Roach					$state = 3;
747a25f0a04SGreg Roach				}
748a25f0a04SGreg Roach			}
749a25f0a04SGreg Roach
750a25f0a04SGreg Roach			while ($state < count($soundTableEntry)) {
751a25f0a04SGreg Roach				// empty means 'ignore this sound in this state'
752a25f0a04SGreg Roach				if ($soundTableEntry[$state] == '') {
753a25f0a04SGreg Roach					foreach ($workingResult as $workingEntry) {
754a25f0a04SGreg Roach						$tempEntry = $workingEntry;
755a25f0a04SGreg Roach						$tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles'
756a25f0a04SGreg Roach						$partialResult[] = $tempEntry;
757a25f0a04SGreg Roach					}
758a25f0a04SGreg Roach				} else {
759a25f0a04SGreg Roach					foreach ($workingResult as $workingEntry) {
760a25f0a04SGreg Roach						if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) {
761a25f0a04SGreg Roach							// Incoming sound isn't a duplicate of the previous sound
762a25f0a04SGreg Roach							$workingEntry[] = $soundTableEntry[$state];
763a25f0a04SGreg Roach						} else {
764a25f0a04SGreg Roach							// Incoming sound is a duplicate of the previous sound
765a25f0a04SGreg Roach							// For Hebrew and Arabic, we need to create a pair of D-M sound codes,
766a25f0a04SGreg Roach							// one of the pair with only a single occurrence of the duplicate sound,
767a25f0a04SGreg Roach							// the other with both occurrences
768a25f0a04SGreg Roach							if ($noVowels) {
769a25f0a04SGreg Roach								$workingEntry[] = $soundTableEntry[$state];
770a25f0a04SGreg Roach							}
771a25f0a04SGreg Roach						}
772a25f0a04SGreg Roach						if (count($workingEntry) < 7) {
773a25f0a04SGreg Roach							$partialResult[] = $workingEntry;
774a25f0a04SGreg Roach						} else {
775a25f0a04SGreg Roach							// This is the 6th code in the sequence
776a25f0a04SGreg Roach							// We're looking for 7 entries because the first is '!' and doesn't count
777a25f0a04SGreg Roach							$tempResult = str_replace('!', '', implode('', $workingEntry));
778a25f0a04SGreg Roach							// Only return codes from recognisable sounds
779a25f0a04SGreg Roach							if ($tempResult) {
780a25f0a04SGreg Roach								$result[] = substr($tempResult . '000000', 0, 6);
781a25f0a04SGreg Roach							}
782a25f0a04SGreg Roach						}
783a25f0a04SGreg Roach					}
784a25f0a04SGreg Roach				}
785a25f0a04SGreg Roach				$state = $state + 3; // Advance to next triplet while keeping the same basic state
786a25f0a04SGreg Roach			}
787a25f0a04SGreg Roach		}
788a25f0a04SGreg Roach
789a25f0a04SGreg Roach		// Zero-fill and copy all remaining partial results
790a25f0a04SGreg Roach		foreach ($partialResult as $workingEntry) {
791a25f0a04SGreg Roach			$tempResult = str_replace('!', '', implode('', $workingEntry));
792a25f0a04SGreg Roach			// Only return codes from recognisable sounds
793a25f0a04SGreg Roach			if ($tempResult) {
794a25f0a04SGreg Roach				$result[] = substr($tempResult . '000000', 0, 6);
795a25f0a04SGreg Roach			}
796a25f0a04SGreg Roach		}
797a25f0a04SGreg Roach
798a25f0a04SGreg Roach		return $result;
799a25f0a04SGreg Roach	}
800a25f0a04SGreg Roach}
801