xref: /webtrees/app/Soundex.php (revision 15e87d462e9435ddccb0faee11a1184bbac749ba)
1a25f0a04SGreg Roach<?php
2a25f0a04SGreg Roach/**
3a25f0a04SGreg Roach * webtrees: online genealogy
4369c0ce6SGreg Roach * Copyright (C) 2016 webtrees development team
5a25f0a04SGreg Roach * This program is free software: you can redistribute it and/or modify
6a25f0a04SGreg Roach * it under the terms of the GNU General Public License as published by
7a25f0a04SGreg Roach * the Free Software Foundation, either version 3 of the License, or
8a25f0a04SGreg Roach * (at your option) any later version.
9a25f0a04SGreg Roach * This program is distributed in the hope that it will be useful,
10a25f0a04SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
11a25f0a04SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12a25f0a04SGreg Roach * GNU General Public License for more details.
13a25f0a04SGreg Roach * You should have received a copy of the GNU General Public License
14a25f0a04SGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>.
15a25f0a04SGreg Roach */
1676692c8bSGreg Roachnamespace Fisharebest\Webtrees;
17a25f0a04SGreg Roach
18a25f0a04SGreg Roach/**
1976692c8bSGreg Roach * Phonetic matching of strings.
20a25f0a04SGreg Roach */
21a25f0a04SGreg Roachclass Soundex {
22a25f0a04SGreg Roach	/**
2376692c8bSGreg Roach	 * Which algorithms are supported.
2476692c8bSGreg Roach	 *
25a25f0a04SGreg Roach	 * @return string[]
26a25f0a04SGreg Roach	 */
27a25f0a04SGreg Roach	public static function getAlgorithms() {
28a25f0a04SGreg Roach		return array(
29a25f0a04SGreg Roach			'std' => /* I18N: http://en.wikipedia.org/wiki/Soundex */ I18N::translate('Russell'),
30a25f0a04SGreg Roach			'dm'  => /* I18N: http://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */ I18N::translate('Daitch-Mokotoff'),
31a25f0a04SGreg Roach		);
32a25f0a04SGreg Roach	}
33a25f0a04SGreg Roach
34a25f0a04SGreg Roach	/**
35a25f0a04SGreg Roach	 * Is there a match between two soundex codes?
36a25f0a04SGreg Roach	 *
37a25f0a04SGreg Roach	 * @param string $soundex1
38a25f0a04SGreg Roach	 * @param string $soundex2
39a25f0a04SGreg Roach	 *
40cbc1590aSGreg Roach	 * @return bool
41a25f0a04SGreg Roach	 */
42a25f0a04SGreg Roach	public static function compare($soundex1, $soundex2) {
43a25f0a04SGreg Roach		if ($soundex1 && $soundex2) {
44a25f0a04SGreg Roach			foreach (explode(':', $soundex1) as $code) {
45a25f0a04SGreg Roach				if (strpos($soundex2, $code) !== false) {
46a25f0a04SGreg Roach					return true;
47a25f0a04SGreg Roach				}
48a25f0a04SGreg Roach			}
49a25f0a04SGreg Roach		}
50a25f0a04SGreg Roach
51a25f0a04SGreg Roach		return false;
52a25f0a04SGreg Roach	}
53a25f0a04SGreg Roach
54a25f0a04SGreg Roach	/**
55a25f0a04SGreg Roach	 * Generate Russell soundex codes for a given text.
56a25f0a04SGreg Roach	 *
57a25f0a04SGreg Roach	 * @param $text
58a25f0a04SGreg Roach	 *
59a25f0a04SGreg Roach	 * @return null|string
60a25f0a04SGreg Roach	 */
61a25f0a04SGreg Roach	public static function russell($text) {
62a25f0a04SGreg Roach		$words         = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY);
63a25f0a04SGreg Roach		$soundex_array = array();
64a25f0a04SGreg Roach		foreach ($words as $word) {
65a25f0a04SGreg Roach			$soundex = soundex($word);
66a25f0a04SGreg Roach			// Only return codes from recognisable sounds
67a25f0a04SGreg Roach			if ($soundex !== '0000') {
68a25f0a04SGreg Roach				$soundex_array[] = $soundex;
69a25f0a04SGreg Roach			}
70a25f0a04SGreg Roach		}
71a25f0a04SGreg Roach		// Combine words, e.g. “New York” as “Newyork”
72a25f0a04SGreg Roach		if (count($words) > 1) {
73a25f0a04SGreg Roach			$soundex_array[] = soundex(strtr($text, ' ', ''));
74a25f0a04SGreg Roach		}
75a25f0a04SGreg Roach		// A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters)
76a25f0a04SGreg Roach		$soundex_array = array_slice(array_unique($soundex_array), 0, 51);
77a25f0a04SGreg Roach
78a25f0a04SGreg Roach		if ($soundex_array) {
79a25f0a04SGreg Roach			return implode(':', $soundex_array);
80a25f0a04SGreg Roach		} else {
81*15e87d46SGreg Roach			return '';
82a25f0a04SGreg Roach		}
83a25f0a04SGreg Roach	}
84a25f0a04SGreg Roach
85a25f0a04SGreg Roach	/**
86a25f0a04SGreg Roach	 * Generate Daitch–Mokotoff soundex codes for a given text.
87a25f0a04SGreg Roach	 *
88a25f0a04SGreg Roach	 * @param $text
89a25f0a04SGreg Roach	 *
90a25f0a04SGreg Roach	 * @return null|string
91a25f0a04SGreg Roach	 */
92a25f0a04SGreg Roach	public static function daitchMokotoff($text) {
93a25f0a04SGreg Roach		$words         = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY);
94a25f0a04SGreg Roach		$soundex_array = array();
95a25f0a04SGreg Roach		foreach ($words as $word) {
96a25f0a04SGreg Roach			$soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word));
97a25f0a04SGreg Roach		}
98a25f0a04SGreg Roach		// Combine words, e.g. “New York” as “Newyork”
99a25f0a04SGreg Roach		if (count($words) > 1) {
100a25f0a04SGreg Roach			$soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(strtr($text, ' ', '')));
101a25f0a04SGreg Roach		}
102a25f0a04SGreg Roach		// A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters)
103a25f0a04SGreg Roach		$soundex_array = array_slice(array_unique($soundex_array), 0, 36);
104a25f0a04SGreg Roach
105a25f0a04SGreg Roach		if ($soundex_array) {
106a25f0a04SGreg Roach			return implode(':', $soundex_array);
107a25f0a04SGreg Roach		} else {
108*15e87d46SGreg Roach			return '';
109a25f0a04SGreg Roach		}
110a25f0a04SGreg Roach	}
111a25f0a04SGreg Roach
112a25f0a04SGreg Roach	// Determine the Daitch–Mokotoff Soundex code for a word
113a25f0a04SGreg Roach	// Original implementation by Gerry Kroll, and analysis by Meliza Amity
114a25f0a04SGreg Roach
115a25f0a04SGreg Roach	// Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!)
116a25f0a04SGreg Roach	const MAXCHAR = 7;
117a25f0a04SGreg Roach
118a25f0a04SGreg Roach	/**
119a25f0a04SGreg Roach	 * Name transformation arrays.
120a25f0a04SGreg Roach	 * Used to transform the Name string to simplify the "sounds like" table.
121a25f0a04SGreg Roach	 * This is especially useful in Hebrew.
122a25f0a04SGreg Roach	 *
123a25f0a04SGreg Roach	 * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text)
124a25f0a04SGreg Roach	 * function call to achieve the desired transformations.
125a25f0a04SGreg Roach	 *
126a25f0a04SGreg Roach	 * Note about the use of "\x01":
127a25f0a04SGreg Roach	 * This code, which can’t legitimately occur in the kind of text we're dealing with,
128a25f0a04SGreg Roach	 * is used as a place-holder so that conditional string replacements can be done.
129a25f0a04SGreg Roach	 *
130a25f0a04SGreg Roach	 * @var string[][]
131a25f0a04SGreg Roach	 */
132a25f0a04SGreg Roach	private static $transformNameTable = array(
133a25f0a04SGreg Roach		// Force Yiddish ligatures to be treated as separate letters
134a25f0a04SGreg Roach		array('װ', 'וו'),
135a25f0a04SGreg Roach		array('ײ', 'יי'),
136a25f0a04SGreg Roach		array('ױ', 'וי'),
137a25f0a04SGreg Roach		array('בו', 'בע'),
138a25f0a04SGreg Roach		array('פו', 'פע'),
139a25f0a04SGreg Roach		array('ומ', 'עמ'),
140a25f0a04SGreg Roach		array('ום', 'עם'),
141a25f0a04SGreg Roach		array('ונ', 'ענ'),
142a25f0a04SGreg Roach		array('ון', 'ען'),
143a25f0a04SGreg Roach		array('וו', 'ב'),
144a25f0a04SGreg Roach		array("\x01", ''),
145a25f0a04SGreg Roach		array('ייה$', "\x01ה"),
146a25f0a04SGreg Roach		array('ייע$', "\x01ע"),
147a25f0a04SGreg Roach		array('יי', 'ע'),
148a25f0a04SGreg Roach		array("\x01", 'יי'),
149a25f0a04SGreg Roach	);
150a25f0a04SGreg Roach
151a25f0a04SGreg Roach	/**
152a25f0a04SGreg Roach	 * The DM sound coding table is organized this way:
153a25f0a04SGreg Roach	 * key: a variable-length string that corresponds to the UTF-8 character sequence
154a25f0a04SGreg Roach	 * represented by the table entry. Currently, that string can be up to 7
155a25f0a04SGreg Roach	 * bytes long. This maximum length is defined by the value of global variable
156a25f0a04SGreg Roach	 * $maxchar.
157a25f0a04SGreg Roach	 *
158a25f0a04SGreg Roach	 * value: an array as follows:
159a25f0a04SGreg Roach	 * [0]:  zero if not a vowel
160a25f0a04SGreg Roach	 * [1]:  sound value when this string is at the beginning of the word
161a25f0a04SGreg Roach	 * [2]:  sound value when this string is followed by a vowel
162a25f0a04SGreg Roach	 * [3]:  sound value for other cases
163a25f0a04SGreg Roach	 * [1],[2],[3] can be repeated several times to create branches in the code
164a25f0a04SGreg Roach	 * an empty sound value means "ignore in this state"
165a25f0a04SGreg Roach	 *
166a25f0a04SGreg Roach	 * @var string[][]
167a25f0a04SGreg Roach	 */
168a25f0a04SGreg Roach	private static $dmsounds = array(
169a25f0a04SGreg Roach		'A'       => array('1', '0', '', ''),
170a25f0a04SGreg Roach		'À'       => array('1', '0', '', ''),
171a25f0a04SGreg Roach		'Á'       => array('1', '0', '', ''),
172a25f0a04SGreg Roach		'Â'       => array('1', '0', '', ''),
173a25f0a04SGreg Roach		'Ã'       => array('1', '0', '', ''),
174a25f0a04SGreg Roach		'Ä'       => array('1', '0', '1', '', '0', '', ''),
175a25f0a04SGreg Roach		'Å'       => array('1', '0', '', ''),
176a25f0a04SGreg Roach		'Ă'       => array('1', '0', '', ''),
177a25f0a04SGreg Roach		'Ą'       => array('1', '', '', '', '', '', '6'),
178a25f0a04SGreg Roach		'Ạ'       => array('1', '0', '', ''),
179a25f0a04SGreg Roach		'Ả'       => array('1', '0', '', ''),
180a25f0a04SGreg Roach		'Ấ'       => array('1', '0', '', ''),
181a25f0a04SGreg Roach		'Ầ'       => array('1', '0', '', ''),
182a25f0a04SGreg Roach		'Ẩ'       => array('1', '0', '', ''),
183a25f0a04SGreg Roach		'Ẫ'       => array('1', '0', '', ''),
184a25f0a04SGreg Roach		'Ậ'       => array('1', '0', '', ''),
185a25f0a04SGreg Roach		'Ắ'       => array('1', '0', '', ''),
186a25f0a04SGreg Roach		'Ằ'       => array('1', '0', '', ''),
187a25f0a04SGreg Roach		'Ẳ'       => array('1', '0', '', ''),
188a25f0a04SGreg Roach		'Ẵ'       => array('1', '0', '', ''),
189a25f0a04SGreg Roach		'Ặ'       => array('1', '0', '', ''),
190a25f0a04SGreg Roach		'AE'      => array('1', '0', '1', ''),
191a25f0a04SGreg Roach		'Æ'       => array('1', '0', '1', ''),
192a25f0a04SGreg Roach		'AI'      => array('1', '0', '1', ''),
193a25f0a04SGreg Roach		'AJ'      => array('1', '0', '1', ''),
194a25f0a04SGreg Roach		'AU'      => array('1', '0', '7', ''),
195a25f0a04SGreg Roach		'AV'      => array('1', '0', '7', '', '7', '7', '7'),
196a25f0a04SGreg Roach		'ÄU'      => array('1', '0', '1', ''),
197a25f0a04SGreg Roach		'AY'      => array('1', '0', '1', ''),
198a25f0a04SGreg Roach		'B'       => array('0', '7', '7', '7'),
199a25f0a04SGreg Roach		'C'       => array('0', '5', '5', '5', '34', '4', '4'),
200a25f0a04SGreg Roach		'Ć'       => array('0', '4', '4', '4'),
201a25f0a04SGreg Roach		'Č'       => array('0', '4', '4', '4'),
202a25f0a04SGreg Roach		'Ç'       => array('0', '4', '4', '4'),
203a25f0a04SGreg Roach		'CH'      => array('0', '5', '5', '5', '34', '4', '4'),
204a25f0a04SGreg Roach		'CHS'     => array('0', '5', '54', '54'),
205a25f0a04SGreg Roach		'CK'      => array('0', '5', '5', '5', '45', '45', '45'),
206a25f0a04SGreg Roach		'CCS'     => array('0', '4', '4', '4'),
207a25f0a04SGreg Roach		'CS'      => array('0', '4', '4', '4'),
208a25f0a04SGreg Roach		'CSZ'     => array('0', '4', '4', '4'),
209a25f0a04SGreg Roach		'CZ'      => array('0', '4', '4', '4'),
210a25f0a04SGreg Roach		'CZS'     => array('0', '4', '4', '4'),
211a25f0a04SGreg Roach		'D'       => array('0', '3', '3', '3'),
212a25f0a04SGreg Roach		'Ď'       => array('0', '3', '3', '3'),
213a25f0a04SGreg Roach		'Đ'       => array('0', '3', '3', '3'),
214a25f0a04SGreg Roach		'DRS'     => array('0', '4', '4', '4'),
215a25f0a04SGreg Roach		'DRZ'     => array('0', '4', '4', '4'),
216a25f0a04SGreg Roach		'DS'      => array('0', '4', '4', '4'),
217a25f0a04SGreg Roach		'DSH'     => array('0', '4', '4', '4'),
218a25f0a04SGreg Roach		'DSZ'     => array('0', '4', '4', '4'),
219a25f0a04SGreg Roach		'DT'      => array('0', '3', '3', '3'),
220a25f0a04SGreg Roach		'DDZ'     => array('0', '4', '4', '4'),
221a25f0a04SGreg Roach		'DDZS'    => array('0', '4', '4', '4'),
222a25f0a04SGreg Roach		'DZ'      => array('0', '4', '4', '4'),
223a25f0a04SGreg Roach		'DŹ'      => array('0', '4', '4', '4'),
224a25f0a04SGreg Roach		'DŻ'      => array('0', '4', '4', '4'),
225a25f0a04SGreg Roach		'DZH'     => array('0', '4', '4', '4'),
226a25f0a04SGreg Roach		'DZS'     => array('0', '4', '4', '4'),
227a25f0a04SGreg Roach		'E'       => array('1', '0', '', ''),
228a25f0a04SGreg Roach		'È'       => array('1', '0', '', ''),
229a25f0a04SGreg Roach		'É'       => array('1', '0', '', ''),
230a25f0a04SGreg Roach		'Ê'       => array('1', '0', '', ''),
231a25f0a04SGreg Roach		'Ë'       => array('1', '0', '', ''),
232a25f0a04SGreg Roach		'Ĕ'       => array('1', '0', '', ''),
233a25f0a04SGreg Roach		'Ė'       => array('1', '0', '', ''),
234a25f0a04SGreg Roach		'Ę'       => array('1', '', '', '6', '', '', ''),
235a25f0a04SGreg Roach		'Ẹ'       => array('1', '0', '', ''),
236a25f0a04SGreg Roach		'Ẻ'       => array('1', '0', '', ''),
237a25f0a04SGreg Roach		'Ẽ'       => array('1', '0', '', ''),
238a25f0a04SGreg Roach		'Ế'       => array('1', '0', '', ''),
239a25f0a04SGreg Roach		'Ề'       => array('1', '0', '', ''),
240a25f0a04SGreg Roach		'Ể'       => array('1', '0', '', ''),
241a25f0a04SGreg Roach		'Ễ'       => array('1', '0', '', ''),
242a25f0a04SGreg Roach		'Ệ'       => array('1', '0', '', ''),
243a25f0a04SGreg Roach		'EAU'     => array('1', '0', '', ''),
244a25f0a04SGreg Roach		'EI'      => array('1', '0', '1', ''),
245a25f0a04SGreg Roach		'EJ'      => array('1', '0', '1', ''),
246a25f0a04SGreg Roach		'EU'      => array('1', '1', '1', ''),
247a25f0a04SGreg Roach		'EY'      => array('1', '0', '1', ''),
248a25f0a04SGreg Roach		'F'       => array('0', '7', '7', '7'),
249a25f0a04SGreg Roach		'FB'      => array('0', '7', '7', '7'),
250a25f0a04SGreg Roach		'G'       => array('0', '5', '5', '5', '34', '4', '4'),
251a25f0a04SGreg Roach		'Ğ'       => array('0', '', '', ''),
252a25f0a04SGreg Roach		'GGY'     => array('0', '5', '5', '5'),
253a25f0a04SGreg Roach		'GY'      => array('0', '5', '5', '5'),
254a25f0a04SGreg Roach		'H'       => array('0', '5', '5', '', '5', '5', '5'),
255a25f0a04SGreg Roach		'I'       => array('1', '0', '', ''),
256a25f0a04SGreg Roach		'Ì'       => array('1', '0', '', ''),
257a25f0a04SGreg Roach		'Í'       => array('1', '0', '', ''),
258a25f0a04SGreg Roach		'Î'       => array('1', '0', '', ''),
259a25f0a04SGreg Roach		'Ï'       => array('1', '0', '', ''),
260a25f0a04SGreg Roach		'Ĩ'       => array('1', '0', '', ''),
261a25f0a04SGreg Roach		'Į'       => array('1', '0', '', ''),
262a25f0a04SGreg Roach		'İ'       => array('1', '0', '', ''),
263a25f0a04SGreg Roach		'Ỉ'       => array('1', '0', '', ''),
264a25f0a04SGreg Roach		'Ị'       => array('1', '0', '', ''),
265a25f0a04SGreg Roach		'IA'      => array('1', '1', '', ''),
266a25f0a04SGreg Roach		'IE'      => array('1', '1', '', ''),
267a25f0a04SGreg Roach		'IO'      => array('1', '1', '', ''),
268a25f0a04SGreg Roach		'IU'      => array('1', '1', '', ''),
269a25f0a04SGreg Roach		'J'       => array('0', '1', '', '', '4', '4', '4', '5', '5', ''),
270a25f0a04SGreg Roach		'K'       => array('0', '5', '5', '5'),
271a25f0a04SGreg Roach		'KH'      => array('0', '5', '5', '5'),
272a25f0a04SGreg Roach		'KS'      => array('0', '5', '54', '54'),
273a25f0a04SGreg Roach		'L'       => array('0', '8', '8', '8'),
274a25f0a04SGreg Roach		'Ľ'       => array('0', '8', '8', '8'),
275a25f0a04SGreg Roach		'Ĺ'       => array('0', '8', '8', '8'),
276a25f0a04SGreg Roach		'Ł'       => array('0', '7', '7', '7', '8', '8', '8'),
277a25f0a04SGreg Roach		'LL'      => array('0', '8', '8', '8', '58', '8', '8', '1', '8', '8'),
278a25f0a04SGreg Roach		'LLY'     => array('0', '8', '8', '8', '1', '8', '8'),
279a25f0a04SGreg Roach		'LY'      => array('0', '8', '8', '8', '1', '8', '8'),
280a25f0a04SGreg Roach		'M'       => array('0', '6', '6', '6'),
281a25f0a04SGreg Roach		'MĔ'      => array('0', '66', '66', '66'),
282a25f0a04SGreg Roach		'MN'      => array('0', '66', '66', '66'),
283a25f0a04SGreg Roach		'N'       => array('0', '6', '6', '6'),
284a25f0a04SGreg Roach		'Ń'       => array('0', '6', '6', '6'),
285a25f0a04SGreg Roach		'Ň'       => array('0', '6', '6', '6'),
286a25f0a04SGreg Roach		'Ñ'       => array('0', '6', '6', '6'),
287a25f0a04SGreg Roach		'NM'      => array('0', '66', '66', '66'),
288a25f0a04SGreg Roach		'O'       => array('1', '0', '', ''),
289a25f0a04SGreg Roach		'Ò'       => array('1', '0', '', ''),
290a25f0a04SGreg Roach		'Ó'       => array('1', '0', '', ''),
291a25f0a04SGreg Roach		'Ô'       => array('1', '0', '', ''),
292a25f0a04SGreg Roach		'Õ'       => array('1', '0', '', ''),
293a25f0a04SGreg Roach		'Ö'       => array('1', '0', '', ''),
294a25f0a04SGreg Roach		'Ø'       => array('1', '0', '', ''),
295a25f0a04SGreg Roach		'Ő'       => array('1', '0', '', ''),
296a25f0a04SGreg Roach		'Œ'       => array('1', '0', '', ''),
297a25f0a04SGreg Roach		'Ơ'       => array('1', '0', '', ''),
298a25f0a04SGreg Roach		'Ọ'       => array('1', '0', '', ''),
299a25f0a04SGreg Roach		'Ỏ'       => array('1', '0', '', ''),
300a25f0a04SGreg Roach		'Ố'       => array('1', '0', '', ''),
301a25f0a04SGreg Roach		'Ồ'       => array('1', '0', '', ''),
302a25f0a04SGreg Roach		'Ổ'       => array('1', '0', '', ''),
303a25f0a04SGreg Roach		'Ỗ'       => array('1', '0', '', ''),
304a25f0a04SGreg Roach		'Ộ'       => array('1', '0', '', ''),
305a25f0a04SGreg Roach		'Ớ'       => array('1', '0', '', ''),
306a25f0a04SGreg Roach		'Ờ'       => array('1', '0', '', ''),
307a25f0a04SGreg Roach		'Ở'       => array('1', '0', '', ''),
308a25f0a04SGreg Roach		'Ỡ'       => array('1', '0', '', ''),
309a25f0a04SGreg Roach		'Ợ'       => array('1', '0', '', ''),
310a25f0a04SGreg Roach		'OE'      => array('1', '0', '', ''),
311a25f0a04SGreg Roach		'OI'      => array('1', '0', '1', ''),
312a25f0a04SGreg Roach		'OJ'      => array('1', '0', '1', ''),
313a25f0a04SGreg Roach		'OU'      => array('1', '0', '', ''),
314a25f0a04SGreg Roach		'OY'      => array('1', '0', '1', ''),
315a25f0a04SGreg Roach		'P'       => array('0', '7', '7', '7'),
316a25f0a04SGreg Roach		'PF'      => array('0', '7', '7', '7'),
317a25f0a04SGreg Roach		'PH'      => array('0', '7', '7', '7'),
318a25f0a04SGreg Roach		'Q'       => array('0', '5', '5', '5'),
319a25f0a04SGreg Roach		'R'       => array('0', '9', '9', '9'),
320a25f0a04SGreg Roach		'Ř'       => array('0', '4', '4', '4'),
321a25f0a04SGreg Roach		'RS'      => array('0', '4', '4', '4', '94', '94', '94'),
322a25f0a04SGreg Roach		'RZ'      => array('0', '4', '4', '4', '94', '94', '94'),
323a25f0a04SGreg Roach		'S'       => array('0', '4', '4', '4'),
324a25f0a04SGreg Roach		'Ś'       => array('0', '4', '4', '4'),
325a25f0a04SGreg Roach		'Š'       => array('0', '4', '4', '4'),
326a25f0a04SGreg Roach		'Ş'       => array('0', '4', '4', '4'),
327a25f0a04SGreg Roach		'SC'      => array('0', '2', '4', '4'),
328a25f0a04SGreg Roach		'ŠČ'      => array('0', '2', '4', '4'),
329a25f0a04SGreg Roach		'SCH'     => array('0', '4', '4', '4'),
330a25f0a04SGreg Roach		'SCHD'    => array('0', '2', '43', '43'),
331a25f0a04SGreg Roach		'SCHT'    => array('0', '2', '43', '43'),
332a25f0a04SGreg Roach		'SCHTCH'  => array('0', '2', '4', '4'),
333a25f0a04SGreg Roach		'SCHTSCH' => array('0', '2', '4', '4'),
334a25f0a04SGreg Roach		'SCHTSH'  => array('0', '2', '4', '4'),
335a25f0a04SGreg Roach		'SD'      => array('0', '2', '43', '43'),
336a25f0a04SGreg Roach		'SH'      => array('0', '4', '4', '4'),
337a25f0a04SGreg Roach		'SHCH'    => array('0', '2', '4', '4'),
338a25f0a04SGreg Roach		'SHD'     => array('0', '2', '43', '43'),
339a25f0a04SGreg Roach		'SHT'     => array('0', '2', '43', '43'),
340a25f0a04SGreg Roach		'SHTCH'   => array('0', '2', '4', '4'),
341a25f0a04SGreg Roach		'SHTSH'   => array('0', '2', '4', '4'),
342a25f0a04SGreg Roach		'ß'       => array('0', '', '4', '4'),
343a25f0a04SGreg Roach		'ST'      => array('0', '2', '43', '43'),
344a25f0a04SGreg Roach		'STCH'    => array('0', '2', '4', '4'),
345a25f0a04SGreg Roach		'STRS'    => array('0', '2', '4', '4'),
346a25f0a04SGreg Roach		'STRZ'    => array('0', '2', '4', '4'),
347a25f0a04SGreg Roach		'STSCH'   => array('0', '2', '4', '4'),
348a25f0a04SGreg Roach		'STSH'    => array('0', '2', '4', '4'),
349a25f0a04SGreg Roach		'SSZ'     => array('0', '4', '4', '4'),
350a25f0a04SGreg Roach		'SZ'      => array('0', '4', '4', '4'),
351a25f0a04SGreg Roach		'SZCS'    => array('0', '2', '4', '4'),
352a25f0a04SGreg Roach		'SZCZ'    => array('0', '2', '4', '4'),
353a25f0a04SGreg Roach		'SZD'     => array('0', '2', '43', '43'),
354a25f0a04SGreg Roach		'SZT'     => array('0', '2', '43', '43'),
355a25f0a04SGreg Roach		'T'       => array('0', '3', '3', '3'),
356a25f0a04SGreg Roach		'Ť'       => array('0', '3', '3', '3'),
357a25f0a04SGreg Roach		'Ţ'       => array('0', '3', '3', '3', '4', '4', '4'),
358a25f0a04SGreg Roach		'TC'      => array('0', '4', '4', '4'),
359a25f0a04SGreg Roach		'TCH'     => array('0', '4', '4', '4'),
360a25f0a04SGreg Roach		'TH'      => array('0', '3', '3', '3'),
361a25f0a04SGreg Roach		'TRS'     => array('0', '4', '4', '4'),
362a25f0a04SGreg Roach		'TRZ'     => array('0', '4', '4', '4'),
363a25f0a04SGreg Roach		'TS'      => array('0', '4', '4', '4'),
364a25f0a04SGreg Roach		'TSCH'    => array('0', '4', '4', '4'),
365a25f0a04SGreg Roach		'TSH'     => array('0', '4', '4', '4'),
366a25f0a04SGreg Roach		'TSZ'     => array('0', '4', '4', '4'),
367a25f0a04SGreg Roach		'TTCH'    => array('0', '4', '4', '4'),
368a25f0a04SGreg Roach		'TTS'     => array('0', '4', '4', '4'),
369a25f0a04SGreg Roach		'TTSCH'   => array('0', '4', '4', '4'),
370a25f0a04SGreg Roach		'TTSZ'    => array('0', '4', '4', '4'),
371a25f0a04SGreg Roach		'TTZ'     => array('0', '4', '4', '4'),
372a25f0a04SGreg Roach		'TZ'      => array('0', '4', '4', '4'),
373a25f0a04SGreg Roach		'TZS'     => array('0', '4', '4', '4'),
374a25f0a04SGreg Roach		'U'       => array('1', '0', '', ''),
375a25f0a04SGreg Roach		'Ù'       => array('1', '0', '', ''),
376a25f0a04SGreg Roach		'Ú'       => array('1', '0', '', ''),
377a25f0a04SGreg Roach		'Û'       => array('1', '0', '', ''),
378a25f0a04SGreg Roach		'Ü'       => array('1', '0', '', ''),
379a25f0a04SGreg Roach		'Ũ'       => array('1', '0', '', ''),
380a25f0a04SGreg Roach		'Ū'       => array('1', '0', '', ''),
381a25f0a04SGreg Roach		'Ů'       => array('1', '0', '', ''),
382a25f0a04SGreg Roach		'Ű'       => array('1', '0', '', ''),
383a25f0a04SGreg Roach		'Ų'       => array('1', '0', '', ''),
384a25f0a04SGreg Roach		'Ư'       => array('1', '0', '', ''),
385a25f0a04SGreg Roach		'Ụ'       => array('1', '0', '', ''),
386a25f0a04SGreg Roach		'Ủ'       => array('1', '0', '', ''),
387a25f0a04SGreg Roach		'Ứ'       => array('1', '0', '', ''),
388a25f0a04SGreg Roach		'Ừ'       => array('1', '0', '', ''),
389a25f0a04SGreg Roach		'Ử'       => array('1', '0', '', ''),
390a25f0a04SGreg Roach		'Ữ'       => array('1', '0', '', ''),
391a25f0a04SGreg Roach		'Ự'       => array('1', '0', '', ''),
392a25f0a04SGreg Roach		'UE'      => array('1', '0', '', ''),
393a25f0a04SGreg Roach		'UI'      => array('1', '0', '1', ''),
394a25f0a04SGreg Roach		'UJ'      => array('1', '0', '1', ''),
395a25f0a04SGreg Roach		'UY'      => array('1', '0', '1', ''),
396a25f0a04SGreg Roach		'UW'      => array('1', '0', '1', '', '0', '7', '7'),
397a25f0a04SGreg Roach		'V'       => array('0', '7', '7', '7'),
398a25f0a04SGreg Roach		'W'       => array('0', '7', '7', '7'),
399a25f0a04SGreg Roach		'X'       => array('0', '5', '54', '54'),
400a25f0a04SGreg Roach		'Y'       => array('1', '1', '', ''),
401a25f0a04SGreg Roach		'Ý'       => array('1', '1', '', ''),
402a25f0a04SGreg Roach		'Ỳ'       => array('1', '1', '', ''),
403a25f0a04SGreg Roach		'Ỵ'       => array('1', '1', '', ''),
404a25f0a04SGreg Roach		'Ỷ'       => array('1', '1', '', ''),
405a25f0a04SGreg Roach		'Ỹ'       => array('1', '1', '', ''),
406a25f0a04SGreg Roach		'Z'       => array('0', '4', '4', '4'),
407a25f0a04SGreg Roach		'Ź'       => array('0', '4', '4', '4'),
408a25f0a04SGreg Roach		'Ż'       => array('0', '4', '4', '4'),
409a25f0a04SGreg Roach		'Ž'       => array('0', '4', '4', '4'),
410a25f0a04SGreg Roach		'ZD'      => array('0', '2', '43', '43'),
411a25f0a04SGreg Roach		'ZDZ'     => array('0', '2', '4', '4'),
412a25f0a04SGreg Roach		'ZDZH'    => array('0', '2', '4', '4'),
413a25f0a04SGreg Roach		'ZH'      => array('0', '4', '4', '4'),
414a25f0a04SGreg Roach		'ZHD'     => array('0', '2', '43', '43'),
415a25f0a04SGreg Roach		'ZHDZH'   => array('0', '2', '4', '4'),
416a25f0a04SGreg Roach		'ZS'      => array('0', '4', '4', '4'),
417a25f0a04SGreg Roach		'ZSCH'    => array('0', '4', '4', '4'),
418a25f0a04SGreg Roach		'ZSH'     => array('0', '4', '4', '4'),
419a25f0a04SGreg Roach		'ZZS'     => array('0', '4', '4', '4'),
420a25f0a04SGreg Roach		// Cyrillic alphabet
421a25f0a04SGreg Roach		'А'   => array('1', '0', '', ''),
422a25f0a04SGreg Roach		'Б'   => array('0', '7', '7', '7'),
423a25f0a04SGreg Roach		'В'   => array('0', '7', '7', '7'),
424a25f0a04SGreg Roach		'Г'   => array('0', '5', '5', '5'),
425a25f0a04SGreg Roach		'Д'   => array('0', '3', '3', '3'),
426a25f0a04SGreg Roach		'ДЗ'  => array('0', '4', '4', '4'),
427a25f0a04SGreg Roach		'Е'   => array('1', '0', '', ''),
428a25f0a04SGreg Roach		'Ё'   => array('1', '0', '', ''),
429a25f0a04SGreg Roach		'Ж'   => array('0', '4', '4', '4'),
430a25f0a04SGreg Roach		'З'   => array('0', '4', '4', '4'),
431a25f0a04SGreg Roach		'И'   => array('1', '0', '', ''),
432a25f0a04SGreg Roach		'Й'   => array('1', '1', '', '', '4', '4', '4'),
433a25f0a04SGreg Roach		'К'   => array('0', '5', '5', '5'),
434a25f0a04SGreg Roach		'Л'   => array('0', '8', '8', '8'),
435a25f0a04SGreg Roach		'М'   => array('0', '6', '6', '6'),
436a25f0a04SGreg Roach		'Н'   => array('0', '6', '6', '6'),
437a25f0a04SGreg Roach		'О'   => array('1', '0', '', ''),
438a25f0a04SGreg Roach		'П'   => array('0', '7', '7', '7'),
439a25f0a04SGreg Roach		'Р'   => array('0', '9', '9', '9'),
440a25f0a04SGreg Roach		'РЖ'  => array('0', '4', '4', '4'),
441a25f0a04SGreg Roach		'С'   => array('0', '4', '4', '4'),
442a25f0a04SGreg Roach		'Т'   => array('0', '3', '3', '3'),
443a25f0a04SGreg Roach		'У'   => array('1', '0', '', ''),
444a25f0a04SGreg Roach		'Ф'   => array('0', '7', '7', '7'),
445a25f0a04SGreg Roach		'Х'   => array('0', '5', '5', '5'),
446a25f0a04SGreg Roach		'Ц'   => array('0', '4', '4', '4'),
447a25f0a04SGreg Roach		'Ч'   => array('0', '4', '4', '4'),
448a25f0a04SGreg Roach		'Ш'   => array('0', '4', '4', '4'),
449a25f0a04SGreg Roach		'Щ'   => array('0', '2', '4', '4'),
450a25f0a04SGreg Roach		'Ъ'   => array('0', '', '', ''),
451a25f0a04SGreg Roach		'Ы'   => array('0', '1', '', ''),
452a25f0a04SGreg Roach		'Ь'   => array('0', '', '', ''),
453a25f0a04SGreg Roach		'Э'   => array('1', '0', '', ''),
454a25f0a04SGreg Roach		'Ю'   => array('0', '1', '', ''),
455a25f0a04SGreg Roach		'Я'   => array('0', '1', '', ''),
456a25f0a04SGreg Roach		// Greek alphabet
457a25f0a04SGreg Roach		'Α'   => array('1', '0', '', ''),
458a25f0a04SGreg Roach		'Ά'   => array('1', '0', '', ''),
459a25f0a04SGreg Roach		'ΑΙ'  => array('1', '0', '1', ''),
460a25f0a04SGreg Roach		'ΑΥ'  => array('1', '0', '1', ''),
461a25f0a04SGreg Roach		'Β'   => array('0', '7', '7', '7'),
462a25f0a04SGreg Roach		'Γ'   => array('0', '5', '5', '5'),
463a25f0a04SGreg Roach		'Δ'   => array('0', '3', '3', '3'),
464a25f0a04SGreg Roach		'Ε'   => array('1', '0', '', ''),
465a25f0a04SGreg Roach		'Έ'   => array('1', '0', '', ''),
466a25f0a04SGreg Roach		'ΕΙ'  => array('1', '0', '1', ''),
467a25f0a04SGreg Roach		'ΕΥ'  => array('1', '1', '1', ''),
468a25f0a04SGreg Roach		'Ζ'   => array('0', '4', '4', '4'),
469a25f0a04SGreg Roach		'Η'   => array('1', '0', '', ''),
470a25f0a04SGreg Roach		'Ή'   => array('1', '0', '', ''),
471a25f0a04SGreg Roach		'Θ'   => array('0', '3', '3', '3'),
472a25f0a04SGreg Roach		'Ι'   => array('1', '0', '', ''),
473a25f0a04SGreg Roach		'Ί'   => array('1', '0', '', ''),
474a25f0a04SGreg Roach		'Ϊ'   => array('1', '0', '', ''),
475a25f0a04SGreg Roach		'ΐ'   => array('1', '0', '', ''),
476a25f0a04SGreg Roach		'Κ'   => array('0', '5', '5', '5'),
477a25f0a04SGreg Roach		'Λ'   => array('0', '8', '8', '8'),
478a25f0a04SGreg Roach		'Μ'   => array('0', '6', '6', '6'),
479a25f0a04SGreg Roach		'ΜΠ'  => array('0', '7', '7', '7'),
480a25f0a04SGreg Roach		'Ν'   => array('0', '6', '6', '6'),
481a25f0a04SGreg Roach		'ΝΤ'  => array('0', '3', '3', '3'),
482a25f0a04SGreg Roach		'Ξ'   => array('0', '5', '54', '54'),
483a25f0a04SGreg Roach		'Ο'   => array('1', '0', '', ''),
484a25f0a04SGreg Roach		'Ό'   => array('1', '0', '', ''),
485a25f0a04SGreg Roach		'ΟΙ'  => array('1', '0', '1', ''),
486a25f0a04SGreg Roach		'ΟΥ'  => array('1', '0', '1', ''),
487a25f0a04SGreg Roach		'Π'   => array('0', '7', '7', '7'),
488a25f0a04SGreg Roach		'Ρ'   => array('0', '9', '9', '9'),
489a25f0a04SGreg Roach		'Σ'   => array('0', '4', '4', '4'),
490a25f0a04SGreg Roach		'ς'   => array('0', '', '', '4'),
491a25f0a04SGreg Roach		'Τ'   => array('0', '3', '3', '3'),
492a25f0a04SGreg Roach		'ΤΖ'  => array('0', '4', '4', '4'),
493a25f0a04SGreg Roach		'ΤΣ'  => array('0', '4', '4', '4'),
494a25f0a04SGreg Roach		'Υ'   => array('1', '1', '', ''),
495a25f0a04SGreg Roach		'Ύ'   => array('1', '1', '', ''),
496a25f0a04SGreg Roach		'Ϋ'   => array('1', '1', '', ''),
497a25f0a04SGreg Roach		'ΰ'   => array('1', '1', '', ''),
498a25f0a04SGreg Roach		'ΥΚ'  => array('1', '5', '5', '5'),
499a25f0a04SGreg Roach		'ΥΥ'  => array('1', '65', '65', '65'),
500a25f0a04SGreg Roach		'Φ'   => array('0', '7', '7', '7'),
501a25f0a04SGreg Roach		'Χ'   => array('0', '5', '5', '5'),
502a25f0a04SGreg Roach		'Ψ'   => array('0', '7', '7', '7'),
503a25f0a04SGreg Roach		'Ω'   => array('1', '0', '', ''),
504a25f0a04SGreg Roach		'Ώ'   => array('1', '0', '', ''),
505a25f0a04SGreg Roach		// Hebrew alphabet
506a25f0a04SGreg Roach		'א'     => array('1', '0', '', ''),
507a25f0a04SGreg Roach		'או'    => array('1', '0', '7', ''),
508a25f0a04SGreg Roach		'אג'    => array('1', '4', '4', '4', '5', '5', '5', '34', '34', '34'),
509a25f0a04SGreg Roach		'בב'    => array('0', '7', '7', '7', '77', '77', '77'),
510a25f0a04SGreg Roach		'ב'     => array('0', '7', '7', '7'),
511a25f0a04SGreg Roach		'גג'    => array('0', '4', '4', '4', '5', '5', '5', '45', '45', '45', '55', '55', '55', '54', '54', '54'),
512a25f0a04SGreg Roach		'גד'    => array('0', '43', '43', '43', '53', '53', '53'),
513a25f0a04SGreg Roach		'גה'    => array('0', '45', '45', '45', '55', '55', '55'),
514a25f0a04SGreg Roach		'גז'    => array('0', '44', '44', '44', '45', '45', '45'),
515a25f0a04SGreg Roach		'גח'    => array('0', '45', '45', '45', '55', '55', '55'),
516a25f0a04SGreg Roach		'גכ'    => array('0', '45', '45', '45', '55', '55', '55'),
517a25f0a04SGreg Roach		'גך'    => array('0', '45', '45', '45', '55', '55', '55'),
518a25f0a04SGreg Roach		'גצ'    => array('0', '44', '44', '44', '45', '45', '45'),
519a25f0a04SGreg Roach		'גץ'    => array('0', '44', '44', '44', '45', '45', '45'),
520a25f0a04SGreg Roach		'גק'    => array('0', '45', '45', '45', '54', '54', '54'),
521a25f0a04SGreg Roach		'גש'    => array('0', '44', '44', '44', '54', '54', '54'),
522a25f0a04SGreg Roach		'גת'    => array('0', '43', '43', '43', '53', '53', '53'),
523a25f0a04SGreg Roach		'ג'     => array('0', '4', '4', '4', '5', '5', '5'),
524a25f0a04SGreg Roach		'דז'    => array('0', '4', '4', '4'),
525a25f0a04SGreg Roach		'דד'    => array('0', '3', '3', '3', '33', '33', '33'),
526a25f0a04SGreg Roach		'דט'    => array('0', '33', '33', '33'),
527a25f0a04SGreg Roach		'דש'    => array('0', '4', '4', '4'),
528a25f0a04SGreg Roach		'דצ'    => array('0', '4', '4', '4'),
529a25f0a04SGreg Roach		'דץ'    => array('0', '4', '4', '4'),
530a25f0a04SGreg Roach		'ד'     => array('0', '3', '3', '3'),
531a25f0a04SGreg Roach		'הג'    => array('0', '54', '54', '54', '55', '55', '55'),
532a25f0a04SGreg Roach		'הכ'    => array('0', '55', '55', '55'),
533a25f0a04SGreg Roach		'הח'    => array('0', '55', '55', '55'),
534a25f0a04SGreg Roach		'הק'    => array('0', '55', '55', '55', '5', '5', '5'),
535a25f0a04SGreg Roach		'הה'    => array('0', '5', '5', '', '55', '55', ''),
536a25f0a04SGreg Roach		'ה'     => array('0', '5', '5', ''),
537a25f0a04SGreg Roach		'וי'    => array('1', '', '', '', '7', '7', '7'),
538a25f0a04SGreg Roach		'ו'     => array('1', '7', '7', '7', '7', '', ''),
539a25f0a04SGreg Roach		'וו'    => array('1', '7', '7', '7', '7', '', ''),
540a25f0a04SGreg Roach		'וופ'   => array('1', '7', '7', '7', '77', '77', '77'),
541a25f0a04SGreg Roach		'זש'    => array('0', '4', '4', '4', '44', '44', '44'),
542a25f0a04SGreg Roach		'זדז'   => array('0', '2', '4', '4'),
543a25f0a04SGreg Roach		'ז'     => array('0', '4', '4', '4'),
544a25f0a04SGreg Roach		'זג'    => array('0', '44', '44', '44', '45', '45', '45'),
545a25f0a04SGreg Roach		'זז'    => array('0', '4', '4', '4', '44', '44', '44'),
546a25f0a04SGreg Roach		'זס'    => array('0', '44', '44', '44'),
547a25f0a04SGreg Roach		'זצ'    => array('0', '44', '44', '44'),
548a25f0a04SGreg Roach		'זץ'    => array('0', '44', '44', '44'),
549a25f0a04SGreg Roach		'חג'    => array('0', '54', '54', '54', '53', '53', '53'),
550a25f0a04SGreg Roach		'חח'    => array('0', '5', '5', '5', '55', '55', '55'),
551a25f0a04SGreg Roach		'חק'    => array('0', '55', '55', '55', '5', '5', '5'),
552a25f0a04SGreg Roach		'חכ'    => array('0', '45', '45', '45', '55', '55', '55'),
553a25f0a04SGreg Roach		'חס'    => array('0', '5', '54', '54'),
554a25f0a04SGreg Roach		'חש'    => array('0', '5', '54', '54'),
555a25f0a04SGreg Roach		'ח'     => array('0', '5', '5', '5'),
556a25f0a04SGreg Roach		'טש'    => array('0', '4', '4', '4'),
557a25f0a04SGreg Roach		'טד'    => array('0', '33', '33', '33'),
558a25f0a04SGreg Roach		'טי'    => array('0', '3', '3', '3', '4', '4', '4', '3', '3', '34'),
559a25f0a04SGreg Roach		'טת'    => array('0', '33', '33', '33'),
560a25f0a04SGreg Roach		'טט'    => array('0', '3', '3', '3', '33', '33', '33'),
561a25f0a04SGreg Roach		'ט'     => array('0', '3', '3', '3'),
562a25f0a04SGreg Roach		'י'     => array('1', '1', '', ''),
563a25f0a04SGreg Roach		'יא'    => array('1', '1', '', '', '1', '1', '1'),
564a25f0a04SGreg Roach		'כג'    => array('0', '55', '55', '55', '54', '54', '54'),
565a25f0a04SGreg Roach		'כש'    => array('0', '5', '54', '54'),
566a25f0a04SGreg Roach		'כס'    => array('0', '5', '54', '54'),
567a25f0a04SGreg Roach		'ככ'    => array('0', '5', '5', '5', '55', '55', '55'),
568a25f0a04SGreg Roach		'כך'    => array('0', '5', '5', '5', '55', '55', '55'),
569a25f0a04SGreg Roach		'כ'     => array('0', '5', '5', '5'),
570a25f0a04SGreg Roach		'כח'    => array('0', '55', '55', '55', '5', '5', '5'),
571a25f0a04SGreg Roach		'ך'     => array('0', '', '5', '5'),
572a25f0a04SGreg Roach		'ל'     => array('0', '8', '8', '8'),
573a25f0a04SGreg Roach		'לל'    => array('0', '88', '88', '88', '8', '8', '8'),
574a25f0a04SGreg Roach		'מנ'    => array('0', '66', '66', '66'),
575a25f0a04SGreg Roach		'מן'    => array('0', '66', '66', '66'),
576a25f0a04SGreg Roach		'ממ'    => array('0', '6', '6', '6', '66', '66', '66'),
577a25f0a04SGreg Roach		'מם'    => array('0', '6', '6', '6', '66', '66', '66'),
578a25f0a04SGreg Roach		'מ'     => array('0', '6', '6', '6'),
579a25f0a04SGreg Roach		'ם'     => array('0', '', '6', '6'),
580a25f0a04SGreg Roach		'נמ'    => array('0', '66', '66', '66'),
581a25f0a04SGreg Roach		'נם'    => array('0', '66', '66', '66'),
582a25f0a04SGreg Roach		'ננ'    => array('0', '6', '6', '6', '66', '66', '66'),
583a25f0a04SGreg Roach		'נן'    => array('0', '6', '6', '6', '66', '66', '66'),
584a25f0a04SGreg Roach		'נ'     => array('0', '6', '6', '6'),
585a25f0a04SGreg Roach		'ן'     => array('0', '', '6', '6'),
586a25f0a04SGreg Roach		'סתש'   => array('0', '2', '4', '4'),
587a25f0a04SGreg Roach		'סתז'   => array('0', '2', '4', '4'),
588a25f0a04SGreg Roach		'סטז'   => array('0', '2', '4', '4'),
589a25f0a04SGreg Roach		'סטש'   => array('0', '2', '4', '4'),
590a25f0a04SGreg Roach		'סצד'   => array('0', '2', '4', '4'),
591a25f0a04SGreg Roach		'סט'    => array('0', '2', '4', '4', '43', '43', '43'),
592a25f0a04SGreg Roach		'סת'    => array('0', '2', '4', '4', '43', '43', '43'),
593a25f0a04SGreg Roach		'סג'    => array('0', '44', '44', '44', '4', '4', '4'),
594a25f0a04SGreg Roach		'סס'    => array('0', '4', '4', '4', '44', '44', '44'),
595a25f0a04SGreg Roach		'סצ'    => array('0', '44', '44', '44'),
596a25f0a04SGreg Roach		'סץ'    => array('0', '44', '44', '44'),
597a25f0a04SGreg Roach		'סז'    => array('0', '44', '44', '44'),
598a25f0a04SGreg Roach		'סש'    => array('0', '44', '44', '44'),
599a25f0a04SGreg Roach		'ס'     => array('0', '4', '4', '4'),
600a25f0a04SGreg Roach		'ע'     => array('1', '0', '', ''),
601a25f0a04SGreg Roach		'פב'    => array('0', '7', '7', '7', '77', '77', '77'),
602a25f0a04SGreg Roach		'פוו'   => array('0', '7', '7', '7', '77', '77', '77'),
603a25f0a04SGreg Roach		'פפ'    => array('0', '7', '7', '7', '77', '77', '77'),
604a25f0a04SGreg Roach		'פף'    => array('0', '7', '7', '7', '77', '77', '77'),
605a25f0a04SGreg Roach		'פ'     => array('0', '7', '7', '7'),
606a25f0a04SGreg Roach		'ף'     => array('0', '', '7', '7'),
607a25f0a04SGreg Roach		'צג'    => array('0', '44', '44', '44', '45', '45', '45'),
608a25f0a04SGreg Roach		'צז'    => array('0', '44', '44', '44'),
609a25f0a04SGreg Roach		'צס'    => array('0', '44', '44', '44'),
610a25f0a04SGreg Roach		'צצ'    => array('0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54', '45', '45', '45'),
611a25f0a04SGreg Roach		'צץ'    => array('0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54'),
612a25f0a04SGreg Roach		'צש'    => array('0', '44', '44', '44', '4', '4', '4', '5', '5', '5'),
613a25f0a04SGreg Roach		'צ'     => array('0', '4', '4', '4', '5', '5', '5'),
614a25f0a04SGreg Roach		'ץ'     => array('0', '', '4', '4'),
615a25f0a04SGreg Roach		'קה'    => array('0', '55', '55', '5'),
616a25f0a04SGreg Roach		'קס'    => array('0', '5', '54', '54'),
617a25f0a04SGreg Roach		'קש'    => array('0', '5', '54', '54'),
618a25f0a04SGreg Roach		'קק'    => array('0', '5', '5', '5', '55', '55', '55'),
619a25f0a04SGreg Roach		'קח'    => array('0', '55', '55', '55'),
620a25f0a04SGreg Roach		'קכ'    => array('0', '55', '55', '55'),
621a25f0a04SGreg Roach		'קך'    => array('0', '55', '55', '55'),
622a25f0a04SGreg Roach		'קג'    => array('0', '55', '55', '55', '54', '54', '54'),
623a25f0a04SGreg Roach		'ק'     => array('0', '5', '5', '5'),
624a25f0a04SGreg Roach		'רר'    => array('0', '99', '99', '99', '9', '9', '9'),
625a25f0a04SGreg Roach		'ר'     => array('0', '9', '9', '9'),
626a25f0a04SGreg Roach		'שטז'   => array('0', '2', '4', '4'),
627a25f0a04SGreg Roach		'שתש'   => array('0', '2', '4', '4'),
628a25f0a04SGreg Roach		'שתז'   => array('0', '2', '4', '4'),
629a25f0a04SGreg Roach		'שטש'   => array('0', '2', '4', '4'),
630a25f0a04SGreg Roach		'שד'    => array('0', '2', '43', '43'),
631a25f0a04SGreg Roach		'שז'    => array('0', '44', '44', '44'),
632a25f0a04SGreg Roach		'שס'    => array('0', '44', '44', '44'),
633a25f0a04SGreg Roach		'שת'    => array('0', '2', '43', '43'),
634a25f0a04SGreg Roach		'שג'    => array('0', '4', '4', '4', '44', '44', '44', '4', '43', '43'),
635a25f0a04SGreg Roach		'שט'    => array('0', '2', '43', '43', '44', '44', '44'),
636a25f0a04SGreg Roach		'שצ'    => array('0', '44', '44', '44', '45', '45', '45'),
637a25f0a04SGreg Roach		'שץ'    => array('0', '44', '', '44', '45', '', '45'),
638a25f0a04SGreg Roach		'שש'    => array('0', '4', '4', '4', '44', '44', '44'),
639a25f0a04SGreg Roach		'ש'     => array('0', '4', '4', '4'),
640a25f0a04SGreg Roach		'תג'    => array('0', '34', '34', '34'),
641a25f0a04SGreg Roach		'תז'    => array('0', '34', '34', '34'),
642a25f0a04SGreg Roach		'תש'    => array('0', '4', '4', '4'),
643a25f0a04SGreg Roach		'תת'    => array('0', '3', '3', '3', '4', '4', '4', '33', '33', '33', '44', '44', '44', '34', '34', '34', '43', '43', '43'),
644a25f0a04SGreg Roach		'ת'     => array('0', '3', '3', '3', '4', '4', '4'),
645a25f0a04SGreg Roach		// Arabic alphabet
646a25f0a04SGreg Roach		'ا'   => array('1', '0', '', ''),
647a25f0a04SGreg Roach		'ب'   => array('0', '7', '7', '7'),
648a25f0a04SGreg Roach		'ت'   => array('0', '3', '3', '3'),
649a25f0a04SGreg Roach		'ث'   => array('0', '3', '3', '3'),
650a25f0a04SGreg Roach		'ج'   => array('0', '4', '4', '4'),
651a25f0a04SGreg Roach		'ح'   => array('0', '5', '5', '5'),
652a25f0a04SGreg Roach		'خ'   => array('0', '5', '5', '5'),
653a25f0a04SGreg Roach		'د'   => array('0', '3', '3', '3'),
654a25f0a04SGreg Roach		'ذ'   => array('0', '3', '3', '3'),
655a25f0a04SGreg Roach		'ر'   => array('0', '9', '9', '9'),
656a25f0a04SGreg Roach		'ز'   => array('0', '4', '4', '4'),
657a25f0a04SGreg Roach		'س'   => array('0', '4', '4', '4'),
658a25f0a04SGreg Roach		'ش'   => array('0', '4', '4', '4'),
659a25f0a04SGreg Roach		'ص'   => array('0', '4', '4', '4'),
660a25f0a04SGreg Roach		'ض'   => array('0', '3', '3', '3'),
661a25f0a04SGreg Roach		'ط'   => array('0', '3', '3', '3'),
662a25f0a04SGreg Roach		'ظ'   => array('0', '4', '4', '4'),
663a25f0a04SGreg Roach		'ع'   => array('1', '0', '', ''),
664a25f0a04SGreg Roach		'غ'   => array('0', '0', '', ''),
665a25f0a04SGreg Roach		'ف'   => array('0', '7', '7', '7'),
666a25f0a04SGreg Roach		'ق'   => array('0', '5', '5', '5'),
667a25f0a04SGreg Roach		'ك'   => array('0', '5', '5', '5'),
668a25f0a04SGreg Roach		'ل'   => array('0', '8', '8', '8'),
669a25f0a04SGreg Roach		'لا'  => array('0', '8', '8', '8'),
670a25f0a04SGreg Roach		'م'   => array('0', '6', '6', '6'),
671a25f0a04SGreg Roach		'ن'   => array('0', '6', '6', '6'),
672a25f0a04SGreg Roach		'هن'  => array('0', '66', '66', '66'),
673a25f0a04SGreg Roach		'ه'   => array('0', '5', '5', ''),
674a25f0a04SGreg Roach		'و'   => array('1', '', '', '', '7', '', ''),
675a25f0a04SGreg Roach		'ي'   => array('0', '1', '', ''),
676a25f0a04SGreg Roach		'آ'   => array('0', '1', '', ''),
677a25f0a04SGreg Roach		'ة'   => array('0', '', '', '3'),
678a25f0a04SGreg Roach		'ی'   => array('0', '1', '', ''),
679a25f0a04SGreg Roach		'ى'   => array('1', '1', '', ''),
680a25f0a04SGreg Roach	);
681a25f0a04SGreg Roach
682a25f0a04SGreg Roach	/**
68376692c8bSGreg Roach	 * Calculate the Daitch-Mokotoff soundex for a word.
68476692c8bSGreg Roach	 *
685a25f0a04SGreg Roach	 * @param string $name
686a25f0a04SGreg Roach	 *
687a25f0a04SGreg Roach	 * @return string[] List of possible DM codes for the word.
688a25f0a04SGreg Roach	 */
689a25f0a04SGreg Roach	private static function daitchMokotoffWord($name) {
690a25f0a04SGreg Roach		// Apply special transformation rules to the input string
691a25f0a04SGreg Roach		$name = I18N::strtoupper($name);
692a25f0a04SGreg Roach		foreach (self::$transformNameTable as $transformRule) {
693a25f0a04SGreg Roach			$name = str_replace($transformRule[0], $transformRule[1], $name);
694a25f0a04SGreg Roach		}
695a25f0a04SGreg Roach
696a25f0a04SGreg Roach		// Initialize
697a25f0a04SGreg Roach		$name_script = I18N::textScript($name);
698a25f0a04SGreg Roach		$noVowels    = ($name_script == 'Hebr' || $name_script == 'Arab');
699a25f0a04SGreg Roach
700a25f0a04SGreg Roach		$lastPos         = strlen($name) - 1;
701a25f0a04SGreg Roach		$currPos         = 0;
702a25f0a04SGreg Roach		$state           = 1; // 1: start of input string, 2: before vowel, 3: other
703a25f0a04SGreg Roach		$result          = array(); // accumulate complete 6-digit D-M codes here
704a25f0a04SGreg Roach		$partialResult   = array(); // accumulate incomplete D-M codes here
705a25f0a04SGreg Roach		$partialResult[] = array('!'); // initialize 1st partial result  ('!' stops "duplicate sound" check)
706a25f0a04SGreg Roach
707a25f0a04SGreg Roach		// Loop through the input string.
708a25f0a04SGreg Roach		// Stop when the string is exhausted or when no more partial results remain
709a25f0a04SGreg Roach		while (count($partialResult) !== 0 && $currPos <= $lastPos) {
710a25f0a04SGreg Roach			// Find the DM coding table entry for the chunk at the current position
711a25f0a04SGreg Roach			$thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
712a25f0a04SGreg Roach			while ($thisEntry != '') {
713a25f0a04SGreg Roach				if (isset(self::$dmsounds[$thisEntry])) {
714a25f0a04SGreg Roach					break;
715a25f0a04SGreg Roach				}
716a25f0a04SGreg Roach				$thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk
717a25f0a04SGreg Roach			}
718a25f0a04SGreg Roach			if ($thisEntry === '') {
719a25f0a04SGreg Roach				$currPos++; // Not in table: advance pointer to next byte
720a25f0a04SGreg Roach				continue; // and try again
721a25f0a04SGreg Roach			}
722a25f0a04SGreg Roach
723a25f0a04SGreg Roach			$soundTableEntry = self::$dmsounds[$thisEntry];
724a25f0a04SGreg Roach			$workingResult   = $partialResult;
725a25f0a04SGreg Roach			$partialResult   = array();
726a25f0a04SGreg Roach			$currPos += strlen($thisEntry);
727a25f0a04SGreg Roach
728a25f0a04SGreg Roach			// Not at beginning of input string
729a25f0a04SGreg Roach			if ($state != 1) {
730a25f0a04SGreg Roach				if ($currPos <= $lastPos) {
731a25f0a04SGreg Roach					// Determine whether the next chunk is a vowel
732a25f0a04SGreg Roach					$nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
733a25f0a04SGreg Roach					while ($nextEntry != '') {
734a25f0a04SGreg Roach						if (isset(self::$dmsounds[$nextEntry])) {
735a25f0a04SGreg Roach							break;
736a25f0a04SGreg Roach						}
737a25f0a04SGreg Roach						$nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk
738a25f0a04SGreg Roach					}
739a25f0a04SGreg Roach				} else {
740a25f0a04SGreg Roach					$nextEntry = '';
741a25f0a04SGreg Roach				}
742a25f0a04SGreg Roach				if ($nextEntry != '' && self::$dmsounds[$nextEntry][0] != '0') {
743a25f0a04SGreg Roach					$state = 2;
744a25f0a04SGreg Roach				} else {
745a25f0a04SGreg Roach					// Next chunk is a vowel
746a25f0a04SGreg Roach					$state = 3;
747a25f0a04SGreg Roach				}
748a25f0a04SGreg Roach			}
749a25f0a04SGreg Roach
750a25f0a04SGreg Roach			while ($state < count($soundTableEntry)) {
751a25f0a04SGreg Roach				// empty means 'ignore this sound in this state'
752a25f0a04SGreg Roach				if ($soundTableEntry[$state] == '') {
753a25f0a04SGreg Roach					foreach ($workingResult as $workingEntry) {
754a25f0a04SGreg Roach						$tempEntry = $workingEntry;
755a25f0a04SGreg Roach						$tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles'
756a25f0a04SGreg Roach						$partialResult[] = $tempEntry;
757a25f0a04SGreg Roach					}
758a25f0a04SGreg Roach				} else {
759a25f0a04SGreg Roach					foreach ($workingResult as $workingEntry) {
760a25f0a04SGreg Roach						if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) {
761a25f0a04SGreg Roach							// Incoming sound isn't a duplicate of the previous sound
762a25f0a04SGreg Roach							$workingEntry[] = $soundTableEntry[$state];
763a25f0a04SGreg Roach						} else {
764a25f0a04SGreg Roach							// Incoming sound is a duplicate of the previous sound
765a25f0a04SGreg Roach							// For Hebrew and Arabic, we need to create a pair of D-M sound codes,
766a25f0a04SGreg Roach							// one of the pair with only a single occurrence of the duplicate sound,
767a25f0a04SGreg Roach							// the other with both occurrences
768a25f0a04SGreg Roach							if ($noVowels) {
769a25f0a04SGreg Roach								$workingEntry[] = $soundTableEntry[$state];
770a25f0a04SGreg Roach							}
771a25f0a04SGreg Roach						}
772a25f0a04SGreg Roach						if (count($workingEntry) < 7) {
773a25f0a04SGreg Roach							$partialResult[] = $workingEntry;
774a25f0a04SGreg Roach						} else {
775a25f0a04SGreg Roach							// This is the 6th code in the sequence
776a25f0a04SGreg Roach							// We're looking for 7 entries because the first is '!' and doesn't count
777a25f0a04SGreg Roach							$tempResult = str_replace('!', '', implode('', $workingEntry));
778a25f0a04SGreg Roach							// Only return codes from recognisable sounds
779a25f0a04SGreg Roach							if ($tempResult) {
780a25f0a04SGreg Roach								$result[] = substr($tempResult . '000000', 0, 6);
781a25f0a04SGreg Roach							}
782a25f0a04SGreg Roach						}
783a25f0a04SGreg Roach					}
784a25f0a04SGreg Roach				}
785a25f0a04SGreg Roach				$state = $state + 3; // Advance to next triplet while keeping the same basic state
786a25f0a04SGreg Roach			}
787a25f0a04SGreg Roach		}
788a25f0a04SGreg Roach
789a25f0a04SGreg Roach		// Zero-fill and copy all remaining partial results
790a25f0a04SGreg Roach		foreach ($partialResult as $workingEntry) {
791a25f0a04SGreg Roach			$tempResult = str_replace('!', '', implode('', $workingEntry));
792a25f0a04SGreg Roach			// Only return codes from recognisable sounds
793a25f0a04SGreg Roach			if ($tempResult) {
794a25f0a04SGreg Roach				$result[] = substr($tempResult . '000000', 0, 6);
795a25f0a04SGreg Roach			}
796a25f0a04SGreg Roach		}
797a25f0a04SGreg Roach
798a25f0a04SGreg Roach		return $result;
799a25f0a04SGreg Roach	}
800a25f0a04SGreg Roach}
801