xref: /webtrees/app/Soundex.php (revision cbc1590a8c715aa2d88bd745610b899587bd9563)
1a25f0a04SGreg Roach<?php
2dd04c183SGreg Roachnamespace Fisharebest\Webtrees;
3a25f0a04SGreg Roach
4a25f0a04SGreg Roach/**
5a25f0a04SGreg Roach * webtrees: online genealogy
6a25f0a04SGreg Roach * Copyright (C) 2015 webtrees development team
7a25f0a04SGreg Roach * This program is free software: you can redistribute it and/or modify
8a25f0a04SGreg Roach * it under the terms of the GNU General Public License as published by
9a25f0a04SGreg Roach * the Free Software Foundation, either version 3 of the License, or
10a25f0a04SGreg Roach * (at your option) any later version.
11a25f0a04SGreg Roach * This program is distributed in the hope that it will be useful,
12a25f0a04SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
13a25f0a04SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14a25f0a04SGreg Roach * GNU General Public License for more details.
15a25f0a04SGreg Roach * You should have received a copy of the GNU General Public License
16a25f0a04SGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>.
17a25f0a04SGreg Roach */
18a25f0a04SGreg Roach
19a25f0a04SGreg Roach/**
20a25f0a04SGreg Roach * Class Soundex Functions for phonetic matching of strings
21a25f0a04SGreg Roach */
22a25f0a04SGreg Roachclass Soundex {
23a25f0a04SGreg Roach	/**
24a25f0a04SGreg Roach	 * @return string[]
25a25f0a04SGreg Roach	 */
26a25f0a04SGreg Roach	public static function getAlgorithms() {
27a25f0a04SGreg Roach		return array(
28a25f0a04SGreg Roach			'std' => /* I18N: http://en.wikipedia.org/wiki/Soundex */ I18N::translate('Russell'),
29a25f0a04SGreg Roach			'dm'  => /* I18N: http://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */ I18N::translate('Daitch-Mokotoff'),
30a25f0a04SGreg Roach		);
31a25f0a04SGreg Roach	}
32a25f0a04SGreg Roach
33a25f0a04SGreg Roach	/**
34a25f0a04SGreg Roach	 * Is there a match between two soundex codes?
35a25f0a04SGreg Roach	 *
36a25f0a04SGreg Roach	 * @param string $soundex1
37a25f0a04SGreg Roach	 * @param string $soundex2
38a25f0a04SGreg Roach	 *
39*cbc1590aSGreg Roach	 * @return bool
40a25f0a04SGreg Roach	 */
41a25f0a04SGreg Roach	public static function compare($soundex1, $soundex2) {
42a25f0a04SGreg Roach		if ($soundex1 && $soundex2) {
43a25f0a04SGreg Roach			foreach (explode(':', $soundex1) as $code) {
44a25f0a04SGreg Roach				if (strpos($soundex2, $code) !== false) {
45a25f0a04SGreg Roach					return true;
46a25f0a04SGreg Roach				}
47a25f0a04SGreg Roach			}
48a25f0a04SGreg Roach		}
49a25f0a04SGreg Roach
50a25f0a04SGreg Roach		return false;
51a25f0a04SGreg Roach	}
52a25f0a04SGreg Roach
53a25f0a04SGreg Roach	/**
54a25f0a04SGreg Roach	 * Generate Russell soundex codes for a given text.
55a25f0a04SGreg Roach	 *
56a25f0a04SGreg Roach	 * @param $text
57a25f0a04SGreg Roach	 *
58a25f0a04SGreg Roach	 * @return null|string
59a25f0a04SGreg Roach	 */
60a25f0a04SGreg Roach	public static function russell($text) {
61a25f0a04SGreg Roach		$words         = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY);
62a25f0a04SGreg Roach		$soundex_array = array();
63a25f0a04SGreg Roach		foreach ($words as $word) {
64a25f0a04SGreg Roach			$soundex = soundex($word);
65a25f0a04SGreg Roach			// Only return codes from recognisable sounds
66a25f0a04SGreg Roach			if ($soundex !== '0000') {
67a25f0a04SGreg Roach				$soundex_array[] = $soundex;
68a25f0a04SGreg Roach			}
69a25f0a04SGreg Roach		}
70a25f0a04SGreg Roach		// Combine words, e.g. “New York” as “Newyork”
71a25f0a04SGreg Roach		if (count($words) > 1) {
72a25f0a04SGreg Roach			$soundex_array[] = soundex(strtr($text, ' ', ''));
73a25f0a04SGreg Roach		}
74a25f0a04SGreg Roach		// A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters)
75a25f0a04SGreg Roach		$soundex_array = array_slice(array_unique($soundex_array), 0, 51);
76a25f0a04SGreg Roach
77a25f0a04SGreg Roach		if ($soundex_array) {
78a25f0a04SGreg Roach			return implode(':', $soundex_array);
79a25f0a04SGreg Roach		} else {
80a25f0a04SGreg Roach			return null;
81a25f0a04SGreg Roach		}
82a25f0a04SGreg Roach	}
83a25f0a04SGreg Roach
84a25f0a04SGreg Roach	/**
85a25f0a04SGreg Roach	 * Generate Daitch–Mokotoff soundex codes for a given text.
86a25f0a04SGreg Roach	 *
87a25f0a04SGreg Roach	 * @param $text
88a25f0a04SGreg Roach	 *
89a25f0a04SGreg Roach	 * @return null|string
90a25f0a04SGreg Roach	 */
91a25f0a04SGreg Roach	public static function daitchMokotoff($text) {
92a25f0a04SGreg Roach		$words         = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY);
93a25f0a04SGreg Roach		$soundex_array = array();
94a25f0a04SGreg Roach		foreach ($words as $word) {
95a25f0a04SGreg Roach			$soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word));
96a25f0a04SGreg Roach		}
97a25f0a04SGreg Roach		// Combine words, e.g. “New York” as “Newyork”
98a25f0a04SGreg Roach		if (count($words) > 1) {
99a25f0a04SGreg Roach			$soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(strtr($text, ' ', '')));
100a25f0a04SGreg Roach		}
101a25f0a04SGreg Roach		// A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters)
102a25f0a04SGreg Roach		$soundex_array = array_slice(array_unique($soundex_array), 0, 36);
103a25f0a04SGreg Roach
104a25f0a04SGreg Roach		if ($soundex_array) {
105a25f0a04SGreg Roach			return implode(':', $soundex_array);
106a25f0a04SGreg Roach		} else {
107a25f0a04SGreg Roach			return null;
108a25f0a04SGreg Roach		}
109a25f0a04SGreg Roach	}
110a25f0a04SGreg Roach
111a25f0a04SGreg Roach	// Determine the Daitch–Mokotoff Soundex code for a word
112a25f0a04SGreg Roach	// Original implementation by Gerry Kroll, and analysis by Meliza Amity
113a25f0a04SGreg Roach
114a25f0a04SGreg Roach	// Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!)
115a25f0a04SGreg Roach	const MAXCHAR = 7;
116a25f0a04SGreg Roach
117a25f0a04SGreg Roach	/**
118a25f0a04SGreg Roach	 * Name transformation arrays.
119a25f0a04SGreg Roach	 * Used to transform the Name string to simplify the "sounds like" table.
120a25f0a04SGreg Roach	 * This is especially useful in Hebrew.
121a25f0a04SGreg Roach	 *
122a25f0a04SGreg Roach	 * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text)
123a25f0a04SGreg Roach	 * function call to achieve the desired transformations.
124a25f0a04SGreg Roach	 *
125a25f0a04SGreg Roach	 * Note about the use of "\x01":
126a25f0a04SGreg Roach	 * This code, which can’t legitimately occur in the kind of text we're dealing with,
127a25f0a04SGreg Roach	 * is used as a place-holder so that conditional string replacements can be done.
128a25f0a04SGreg Roach	 *
129a25f0a04SGreg Roach	 * @var string[][]
130a25f0a04SGreg Roach	 */
131a25f0a04SGreg Roach	private static $transformNameTable = array(
132a25f0a04SGreg Roach		// Force Yiddish ligatures to be treated as separate letters
133a25f0a04SGreg Roach		array('װ', 'וו'),
134a25f0a04SGreg Roach		array('ײ', 'יי'),
135a25f0a04SGreg Roach		array('ױ', 'וי'),
136a25f0a04SGreg Roach		array('בו', 'בע'),
137a25f0a04SGreg Roach		array('פו', 'פע'),
138a25f0a04SGreg Roach		array('ומ', 'עמ'),
139a25f0a04SGreg Roach		array('ום', 'עם'),
140a25f0a04SGreg Roach		array('ונ', 'ענ'),
141a25f0a04SGreg Roach		array('ון', 'ען'),
142a25f0a04SGreg Roach		array('וו', 'ב'),
143a25f0a04SGreg Roach		array("\x01", ''),
144a25f0a04SGreg Roach		array('ייה$', "\x01ה"),
145a25f0a04SGreg Roach		array('ייע$', "\x01ע"),
146a25f0a04SGreg Roach		array('יי', 'ע'),
147a25f0a04SGreg Roach		array("\x01", 'יי'),
148a25f0a04SGreg Roach	);
149a25f0a04SGreg Roach
150a25f0a04SGreg Roach	/**
151a25f0a04SGreg Roach	 * The DM sound coding table is organized this way:
152a25f0a04SGreg Roach	 * key: a variable-length string that corresponds to the UTF-8 character sequence
153a25f0a04SGreg Roach	 * represented by the table entry.  Currently, that string can be up to 7
154a25f0a04SGreg Roach	 * bytes long.  This maximum length is defined by the value of global variable
155a25f0a04SGreg Roach	 * $maxchar.
156a25f0a04SGreg Roach	 *
157a25f0a04SGreg Roach	 * value: an array as follows:
158a25f0a04SGreg Roach	 * [0]:  zero if not a vowel
159a25f0a04SGreg Roach	 * [1]:  sound value when this string is at the beginning of the word
160a25f0a04SGreg Roach	 * [2]:  sound value when this string is followed by a vowel
161a25f0a04SGreg Roach	 * [3]:  sound value for other cases
162a25f0a04SGreg Roach	 * [1],[2],[3] can be repeated several times to create branches in the code
163a25f0a04SGreg Roach	 * an empty sound value means "ignore in this state"
164a25f0a04SGreg Roach	 *
165a25f0a04SGreg Roach	 * @var string[][]
166a25f0a04SGreg Roach	 */
167a25f0a04SGreg Roach	private static $dmsounds = array(
168a25f0a04SGreg Roach		'A'       => array('1', '0', '', ''),
169a25f0a04SGreg Roach		'À'      => array('1', '0', '', ''),
170a25f0a04SGreg Roach		'Á'      => array('1', '0', '', ''),
171a25f0a04SGreg Roach		'Â'      => array('1', '0', '', ''),
172a25f0a04SGreg Roach		'Ã'      => array('1', '0', '', ''),
173a25f0a04SGreg Roach		'Ä'      => array('1', '0', '1', '', '0', '', ''),
174a25f0a04SGreg Roach		'Å'      => array('1', '0', '', ''),
175a25f0a04SGreg Roach		'Ă'      => array('1', '0', '', ''),
176a25f0a04SGreg Roach		'Ą'      => array('1', '', '', '', '', '', '6'),
177a25f0a04SGreg Roach		'Ạ'     => array('1', '0', '', ''),
178a25f0a04SGreg Roach		'Ả'     => array('1', '0', '', ''),
179a25f0a04SGreg Roach		'Ấ'     => array('1', '0', '', ''),
180a25f0a04SGreg Roach		'Ầ'     => array('1', '0', '', ''),
181a25f0a04SGreg Roach		'Ẩ'     => array('1', '0', '', ''),
182a25f0a04SGreg Roach		'Ẫ'     => array('1', '0', '', ''),
183a25f0a04SGreg Roach		'Ậ'     => array('1', '0', '', ''),
184a25f0a04SGreg Roach		'Ắ'     => array('1', '0', '', ''),
185a25f0a04SGreg Roach		'Ằ'     => array('1', '0', '', ''),
186a25f0a04SGreg Roach		'Ẳ'     => array('1', '0', '', ''),
187a25f0a04SGreg Roach		'Ẵ'     => array('1', '0', '', ''),
188a25f0a04SGreg Roach		'Ặ'     => array('1', '0', '', ''),
189a25f0a04SGreg Roach		'AE'      => array('1', '0', '1', ''),
190a25f0a04SGreg Roach		'Æ'      => array('1', '0', '1', ''),
191a25f0a04SGreg Roach		'AI'      => array('1', '0', '1', ''),
192a25f0a04SGreg Roach		'AJ'      => array('1', '0', '1', ''),
193a25f0a04SGreg Roach		'AU'      => array('1', '0', '7', ''),
194a25f0a04SGreg Roach		'AV'      => array('1', '0', '7', '', '7', '7', '7'),
195a25f0a04SGreg Roach		'ÄU'     => array('1', '0', '1', ''),
196a25f0a04SGreg Roach		'AY'      => array('1', '0', '1', ''),
197a25f0a04SGreg Roach		'B'       => array('0', '7', '7', '7'),
198a25f0a04SGreg Roach		'C'       => array('0', '5', '5', '5', '34', '4', '4'),
199a25f0a04SGreg Roach		'Ć'      => array('0', '4', '4', '4'),
200a25f0a04SGreg Roach		'Č'      => array('0', '4', '4', '4'),
201a25f0a04SGreg Roach		'Ç'      => array('0', '4', '4', '4'),
202a25f0a04SGreg Roach		'CH'      => array('0', '5', '5', '5', '34', '4', '4'),
203a25f0a04SGreg Roach		'CHS'     => array('0', '5', '54', '54'),
204a25f0a04SGreg Roach		'CK'      => array('0', '5', '5', '5', '45', '45', '45'),
205a25f0a04SGreg Roach		'CCS'     => array('0', '4', '4', '4'),
206a25f0a04SGreg Roach		'CS'      => array('0', '4', '4', '4'),
207a25f0a04SGreg Roach		'CSZ'     => array('0', '4', '4', '4'),
208a25f0a04SGreg Roach		'CZ'      => array('0', '4', '4', '4'),
209a25f0a04SGreg Roach		'CZS'     => array('0', '4', '4', '4'),
210a25f0a04SGreg Roach		'D'       => array('0', '3', '3', '3'),
211a25f0a04SGreg Roach		'Ď'      => array('0', '3', '3', '3'),
212a25f0a04SGreg Roach		'Đ'      => array('0', '3', '3', '3'),
213a25f0a04SGreg Roach		'DRS'     => array('0', '4', '4', '4'),
214a25f0a04SGreg Roach		'DRZ'     => array('0', '4', '4', '4'),
215a25f0a04SGreg Roach		'DS'      => array('0', '4', '4', '4'),
216a25f0a04SGreg Roach		'DSH'     => array('0', '4', '4', '4'),
217a25f0a04SGreg Roach		'DSZ'     => array('0', '4', '4', '4'),
218a25f0a04SGreg Roach		'DT'      => array('0', '3', '3', '3'),
219a25f0a04SGreg Roach		'DDZ'     => array('0', '4', '4', '4'),
220a25f0a04SGreg Roach		'DDZS'    => array('0', '4', '4', '4'),
221a25f0a04SGreg Roach		'DZ'      => array('0', '4', '4', '4'),
222a25f0a04SGreg Roach		'DŹ'     => array('0', '4', '4', '4'),
223a25f0a04SGreg Roach		'DŻ'     => array('0', '4', '4', '4'),
224a25f0a04SGreg Roach		'DZH'     => array('0', '4', '4', '4'),
225a25f0a04SGreg Roach		'DZS'     => array('0', '4', '4', '4'),
226a25f0a04SGreg Roach		'E'       => array('1', '0', '', ''),
227a25f0a04SGreg Roach		'È'      => array('1', '0', '', ''),
228a25f0a04SGreg Roach		'É'      => array('1', '0', '', ''),
229a25f0a04SGreg Roach		'Ê'      => array('1', '0', '', ''),
230a25f0a04SGreg Roach		'Ë'      => array('1', '0', '', ''),
231a25f0a04SGreg Roach		'Ĕ'      => array('1', '0', '', ''),
232a25f0a04SGreg Roach		'Ė'      => array('1', '0', '', ''),
233a25f0a04SGreg Roach		'Ę'      => array('1', '', '', '6', '', '', ''),
234a25f0a04SGreg Roach		'Ẹ'     => array('1', '0', '', ''),
235a25f0a04SGreg Roach		'Ẻ'     => array('1', '0', '', ''),
236a25f0a04SGreg Roach		'Ẽ'     => array('1', '0', '', ''),
237a25f0a04SGreg Roach		'Ế'     => array('1', '0', '', ''),
238a25f0a04SGreg Roach		'Ề'     => array('1', '0', '', ''),
239a25f0a04SGreg Roach		'Ể'     => array('1', '0', '', ''),
240a25f0a04SGreg Roach		'Ễ'     => array('1', '0', '', ''),
241a25f0a04SGreg Roach		'Ệ'     => array('1', '0', '', ''),
242a25f0a04SGreg Roach		'EAU'     => array('1', '0', '', ''),
243a25f0a04SGreg Roach		'EI'      => array('1', '0', '1', ''),
244a25f0a04SGreg Roach		'EJ'      => array('1', '0', '1', ''),
245a25f0a04SGreg Roach		'EU'      => array('1', '1', '1', ''),
246a25f0a04SGreg Roach		'EY'      => array('1', '0', '1', ''),
247a25f0a04SGreg Roach		'F'       => array('0', '7', '7', '7'),
248a25f0a04SGreg Roach		'FB'      => array('0', '7', '7', '7'),
249a25f0a04SGreg Roach		'G'       => array('0', '5', '5', '5', '34', '4', '4'),
250a25f0a04SGreg Roach		'Ğ'      => array('0', '', '', ''),
251a25f0a04SGreg Roach		'GGY'     => array('0', '5', '5', '5'),
252a25f0a04SGreg Roach		'GY'      => array('0', '5', '5', '5'),
253a25f0a04SGreg Roach		'H'       => array('0', '5', '5', '', '5', '5', '5'),
254a25f0a04SGreg Roach		'I'       => array('1', '0', '', ''),
255a25f0a04SGreg Roach		'Ì'      => array('1', '0', '', ''),
256a25f0a04SGreg Roach		'Í'      => array('1', '0', '', ''),
257a25f0a04SGreg Roach		'Î'      => array('1', '0', '', ''),
258a25f0a04SGreg Roach		'Ï'      => array('1', '0', '', ''),
259a25f0a04SGreg Roach		'Ĩ'      => array('1', '0', '', ''),
260a25f0a04SGreg Roach		'Į'      => array('1', '0', '', ''),
261a25f0a04SGreg Roach		'İ'      => array('1', '0', '', ''),
262a25f0a04SGreg Roach		'Ỉ'     => array('1', '0', '', ''),
263a25f0a04SGreg Roach		'Ị'     => array('1', '0', '', ''),
264a25f0a04SGreg Roach		'IA'      => array('1', '1', '', ''),
265a25f0a04SGreg Roach		'IE'      => array('1', '1', '', ''),
266a25f0a04SGreg Roach		'IO'      => array('1', '1', '', ''),
267a25f0a04SGreg Roach		'IU'      => array('1', '1', '', ''),
268a25f0a04SGreg Roach		'J'       => array('0', '1', '', '', '4', '4', '4', '5', '5', ''),
269a25f0a04SGreg Roach		'K'       => array('0', '5', '5', '5'),
270a25f0a04SGreg Roach		'KH'      => array('0', '5', '5', '5'),
271a25f0a04SGreg Roach		'KS'      => array('0', '5', '54', '54'),
272a25f0a04SGreg Roach		'L'       => array('0', '8', '8', '8'),
273a25f0a04SGreg Roach		'Ľ'      => array('0', '8', '8', '8'),
274a25f0a04SGreg Roach		'Ĺ'      => array('0', '8', '8', '8'),
275a25f0a04SGreg Roach		'Ł'      => array('0', '7', '7', '7', '8', '8', '8'),
276a25f0a04SGreg Roach		'LL'      => array('0', '8', '8', '8', '58', '8', '8', '1', '8', '8'),
277a25f0a04SGreg Roach		'LLY'     => array('0', '8', '8', '8', '1', '8', '8'),
278a25f0a04SGreg Roach		'LY'      => array('0', '8', '8', '8', '1', '8', '8'),
279a25f0a04SGreg Roach		'M'       => array('0', '6', '6', '6'),
280a25f0a04SGreg Roach		'MĔ'     => array('0', '66', '66', '66'),
281a25f0a04SGreg Roach		'MN'      => array('0', '66', '66', '66'),
282a25f0a04SGreg Roach		'N'       => array('0', '6', '6', '6'),
283a25f0a04SGreg Roach		'Ń'      => array('0', '6', '6', '6'),
284a25f0a04SGreg Roach		'Ň'      => array('0', '6', '6', '6'),
285a25f0a04SGreg Roach		'Ñ'      => array('0', '6', '6', '6'),
286a25f0a04SGreg Roach		'NM'      => array('0', '66', '66', '66'),
287a25f0a04SGreg Roach		'O'       => array('1', '0', '', ''),
288a25f0a04SGreg Roach		'Ò'      => array('1', '0', '', ''),
289a25f0a04SGreg Roach		'Ó'      => array('1', '0', '', ''),
290a25f0a04SGreg Roach		'Ô'      => array('1', '0', '', ''),
291a25f0a04SGreg Roach		'Õ'      => array('1', '0', '', ''),
292a25f0a04SGreg Roach		'Ö'      => array('1', '0', '', ''),
293a25f0a04SGreg Roach		'Ø'      => array('1', '0', '', ''),
294a25f0a04SGreg Roach		'Ő'      => array('1', '0', '', ''),
295a25f0a04SGreg Roach		'Œ'      => array('1', '0', '', ''),
296a25f0a04SGreg Roach		'Ơ'      => array('1', '0', '', ''),
297a25f0a04SGreg Roach		'Ọ'     => array('1', '0', '', ''),
298a25f0a04SGreg Roach		'Ỏ'     => array('1', '0', '', ''),
299a25f0a04SGreg Roach		'Ố'     => array('1', '0', '', ''),
300a25f0a04SGreg Roach		'Ồ'     => array('1', '0', '', ''),
301a25f0a04SGreg Roach		'Ổ'     => array('1', '0', '', ''),
302a25f0a04SGreg Roach		'Ỗ'     => array('1', '0', '', ''),
303a25f0a04SGreg Roach		'Ộ'     => array('1', '0', '', ''),
304a25f0a04SGreg Roach		'Ớ'     => array('1', '0', '', ''),
305a25f0a04SGreg Roach		'Ờ'     => array('1', '0', '', ''),
306a25f0a04SGreg Roach		'Ở'     => array('1', '0', '', ''),
307a25f0a04SGreg Roach		'Ỡ'     => array('1', '0', '', ''),
308a25f0a04SGreg Roach		'Ợ'     => array('1', '0', '', ''),
309a25f0a04SGreg Roach		'OE'      => array('1', '0', '', ''),
310a25f0a04SGreg Roach		'OI'      => array('1', '0', '1', ''),
311a25f0a04SGreg Roach		'OJ'      => array('1', '0', '1', ''),
312a25f0a04SGreg Roach		'OU'      => array('1', '0', '', ''),
313a25f0a04SGreg Roach		'OY'      => array('1', '0', '1', ''),
314a25f0a04SGreg Roach		'P'       => array('0', '7', '7', '7'),
315a25f0a04SGreg Roach		'PF'      => array('0', '7', '7', '7'),
316a25f0a04SGreg Roach		'PH'      => array('0', '7', '7', '7'),
317a25f0a04SGreg Roach		'Q'       => array('0', '5', '5', '5'),
318a25f0a04SGreg Roach		'R'       => array('0', '9', '9', '9'),
319a25f0a04SGreg Roach		'Ř'      => array('0', '4', '4', '4'),
320a25f0a04SGreg Roach		'RS'      => array('0', '4', '4', '4', '94', '94', '94'),
321a25f0a04SGreg Roach		'RZ'      => array('0', '4', '4', '4', '94', '94', '94'),
322a25f0a04SGreg Roach		'S'       => array('0', '4', '4', '4'),
323a25f0a04SGreg Roach		'Ś'      => array('0', '4', '4', '4'),
324a25f0a04SGreg Roach		'Š'      => array('0', '4', '4', '4'),
325a25f0a04SGreg Roach		'Ş'      => array('0', '4', '4', '4'),
326a25f0a04SGreg Roach		'SC'      => array('0', '2', '4', '4'),
327a25f0a04SGreg Roach		'ŠČ'    => array('0', '2', '4', '4'),
328a25f0a04SGreg Roach		'SCH'     => array('0', '4', '4', '4'),
329a25f0a04SGreg Roach		'SCHD'    => array('0', '2', '43', '43'),
330a25f0a04SGreg Roach		'SCHT'    => array('0', '2', '43', '43'),
331a25f0a04SGreg Roach		'SCHTCH'  => array('0', '2', '4', '4'),
332a25f0a04SGreg Roach		'SCHTSCH' => array('0', '2', '4', '4'),
333a25f0a04SGreg Roach		'SCHTSH'  => array('0', '2', '4', '4'),
334a25f0a04SGreg Roach		'SD'      => array('0', '2', '43', '43'),
335a25f0a04SGreg Roach		'SH'      => array('0', '4', '4', '4'),
336a25f0a04SGreg Roach		'SHCH'    => array('0', '2', '4', '4'),
337a25f0a04SGreg Roach		'SHD'     => array('0', '2', '43', '43'),
338a25f0a04SGreg Roach		'SHT'     => array('0', '2', '43', '43'),
339a25f0a04SGreg Roach		'SHTCH'   => array('0', '2', '4', '4'),
340a25f0a04SGreg Roach		'SHTSH'   => array('0', '2', '4', '4'),
341a25f0a04SGreg Roach		'ß'      => array('0', '', '4', '4'),
342a25f0a04SGreg Roach		'ST'      => array('0', '2', '43', '43'),
343a25f0a04SGreg Roach		'STCH'    => array('0', '2', '4', '4'),
344a25f0a04SGreg Roach		'STRS'    => array('0', '2', '4', '4'),
345a25f0a04SGreg Roach		'STRZ'    => array('0', '2', '4', '4'),
346a25f0a04SGreg Roach		'STSCH'   => array('0', '2', '4', '4'),
347a25f0a04SGreg Roach		'STSH'    => array('0', '2', '4', '4'),
348a25f0a04SGreg Roach		'SSZ'     => array('0', '4', '4', '4'),
349a25f0a04SGreg Roach		'SZ'      => array('0', '4', '4', '4'),
350a25f0a04SGreg Roach		'SZCS'    => array('0', '2', '4', '4'),
351a25f0a04SGreg Roach		'SZCZ'    => array('0', '2', '4', '4'),
352a25f0a04SGreg Roach		'SZD'     => array('0', '2', '43', '43'),
353a25f0a04SGreg Roach		'SZT'     => array('0', '2', '43', '43'),
354a25f0a04SGreg Roach		'T'       => array('0', '3', '3', '3'),
355a25f0a04SGreg Roach		'Ť'      => array('0', '3', '3', '3'),
356a25f0a04SGreg Roach		'Ţ'      => array('0', '3', '3', '3', '4', '4', '4'),
357a25f0a04SGreg Roach		'TC'      => array('0', '4', '4', '4'),
358a25f0a04SGreg Roach		'TCH'     => array('0', '4', '4', '4'),
359a25f0a04SGreg Roach		'TH'      => array('0', '3', '3', '3'),
360a25f0a04SGreg Roach		'TRS'     => array('0', '4', '4', '4'),
361a25f0a04SGreg Roach		'TRZ'     => array('0', '4', '4', '4'),
362a25f0a04SGreg Roach		'TS'      => array('0', '4', '4', '4'),
363a25f0a04SGreg Roach		'TSCH'    => array('0', '4', '4', '4'),
364a25f0a04SGreg Roach		'TSH'     => array('0', '4', '4', '4'),
365a25f0a04SGreg Roach		'TSZ'     => array('0', '4', '4', '4'),
366a25f0a04SGreg Roach		'TTCH'    => array('0', '4', '4', '4'),
367a25f0a04SGreg Roach		'TTS'     => array('0', '4', '4', '4'),
368a25f0a04SGreg Roach		'TTSCH'   => array('0', '4', '4', '4'),
369a25f0a04SGreg Roach		'TTSZ'    => array('0', '4', '4', '4'),
370a25f0a04SGreg Roach		'TTZ'     => array('0', '4', '4', '4'),
371a25f0a04SGreg Roach		'TZ'      => array('0', '4', '4', '4'),
372a25f0a04SGreg Roach		'TZS'     => array('0', '4', '4', '4'),
373a25f0a04SGreg Roach		'U'       => array('1', '0', '', ''),
374a25f0a04SGreg Roach		'Ù'      => array('1', '0', '', ''),
375a25f0a04SGreg Roach		'Ú'      => array('1', '0', '', ''),
376a25f0a04SGreg Roach		'Û'      => array('1', '0', '', ''),
377a25f0a04SGreg Roach		'Ü'      => array('1', '0', '', ''),
378a25f0a04SGreg Roach		'Ũ'      => array('1', '0', '', ''),
379a25f0a04SGreg Roach		'Ū'      => array('1', '0', '', ''),
380a25f0a04SGreg Roach		'Ů'      => array('1', '0', '', ''),
381a25f0a04SGreg Roach		'Ű'      => array('1', '0', '', ''),
382a25f0a04SGreg Roach		'Ų'      => array('1', '0', '', ''),
383a25f0a04SGreg Roach		'Ư'      => array('1', '0', '', ''),
384a25f0a04SGreg Roach		'Ụ'     => array('1', '0', '', ''),
385a25f0a04SGreg Roach		'Ủ'     => array('1', '0', '', ''),
386a25f0a04SGreg Roach		'Ứ'     => array('1', '0', '', ''),
387a25f0a04SGreg Roach		'Ừ'     => array('1', '0', '', ''),
388a25f0a04SGreg Roach		'Ử'     => array('1', '0', '', ''),
389a25f0a04SGreg Roach		'Ữ'     => array('1', '0', '', ''),
390a25f0a04SGreg Roach		'Ự'     => array('1', '0', '', ''),
391a25f0a04SGreg Roach		'UE'      => array('1', '0', '', ''),
392a25f0a04SGreg Roach		'UI'      => array('1', '0', '1', ''),
393a25f0a04SGreg Roach		'UJ'      => array('1', '0', '1', ''),
394a25f0a04SGreg Roach		'UY'      => array('1', '0', '1', ''),
395a25f0a04SGreg Roach		'UW'      => array('1', '0', '1', '', '0', '7', '7'),
396a25f0a04SGreg Roach		'V'       => array('0', '7', '7', '7'),
397a25f0a04SGreg Roach		'W'       => array('0', '7', '7', '7'),
398a25f0a04SGreg Roach		'X'       => array('0', '5', '54', '54'),
399a25f0a04SGreg Roach		'Y'       => array('1', '1', '', ''),
400a25f0a04SGreg Roach		'Ý'      => array('1', '1', '', ''),
401a25f0a04SGreg Roach		'Ỳ'     => array('1', '1', '', ''),
402a25f0a04SGreg Roach		'Ỵ'     => array('1', '1', '', ''),
403a25f0a04SGreg Roach		'Ỷ'     => array('1', '1', '', ''),
404a25f0a04SGreg Roach		'Ỹ'     => array('1', '1', '', ''),
405a25f0a04SGreg Roach		'Z'       => array('0', '4', '4', '4'),
406a25f0a04SGreg Roach		'Ź'      => array('0', '4', '4', '4'),
407a25f0a04SGreg Roach		'Ż'      => array('0', '4', '4', '4'),
408a25f0a04SGreg Roach		'Ž'      => array('0', '4', '4', '4'),
409a25f0a04SGreg Roach		'ZD'      => array('0', '2', '43', '43'),
410a25f0a04SGreg Roach		'ZDZ'     => array('0', '2', '4', '4'),
411a25f0a04SGreg Roach		'ZDZH'    => array('0', '2', '4', '4'),
412a25f0a04SGreg Roach		'ZH'      => array('0', '4', '4', '4'),
413a25f0a04SGreg Roach		'ZHD'     => array('0', '2', '43', '43'),
414a25f0a04SGreg Roach		'ZHDZH'   => array('0', '2', '4', '4'),
415a25f0a04SGreg Roach		'ZS'      => array('0', '4', '4', '4'),
416a25f0a04SGreg Roach		'ZSCH'    => array('0', '4', '4', '4'),
417a25f0a04SGreg Roach		'ZSH'     => array('0', '4', '4', '4'),
418a25f0a04SGreg Roach		'ZZS'     => array('0', '4', '4', '4'),
419a25f0a04SGreg Roach		// Cyrillic alphabet
420a25f0a04SGreg Roach		'А'   => array('1', '0', '', ''),
421a25f0a04SGreg Roach		'Б'   => array('0', '7', '7', '7'),
422a25f0a04SGreg Roach		'В'   => array('0', '7', '7', '7'),
423a25f0a04SGreg Roach		'Г'   => array('0', '5', '5', '5'),
424a25f0a04SGreg Roach		'Д'   => array('0', '3', '3', '3'),
425a25f0a04SGreg Roach		'ДЗ' => array('0', '4', '4', '4'),
426a25f0a04SGreg Roach		'Е'   => array('1', '0', '', ''),
427a25f0a04SGreg Roach		'Ё'   => array('1', '0', '', ''),
428a25f0a04SGreg Roach		'Ж'   => array('0', '4', '4', '4'),
429a25f0a04SGreg Roach		'З'   => array('0', '4', '4', '4'),
430a25f0a04SGreg Roach		'И'   => array('1', '0', '', ''),
431a25f0a04SGreg Roach		'Й'   => array('1', '1', '', '', '4', '4', '4'),
432a25f0a04SGreg Roach		'К'   => array('0', '5', '5', '5'),
433a25f0a04SGreg Roach		'Л'   => array('0', '8', '8', '8'),
434a25f0a04SGreg Roach		'М'   => array('0', '6', '6', '6'),
435a25f0a04SGreg Roach		'Н'   => array('0', '6', '6', '6'),
436a25f0a04SGreg Roach		'О'   => array('1', '0', '', ''),
437a25f0a04SGreg Roach		'П'   => array('0', '7', '7', '7'),
438a25f0a04SGreg Roach		'Р'   => array('0', '9', '9', '9'),
439a25f0a04SGreg Roach		'РЖ' => array('0', '4', '4', '4'),
440a25f0a04SGreg Roach		'С'   => array('0', '4', '4', '4'),
441a25f0a04SGreg Roach		'Т'   => array('0', '3', '3', '3'),
442a25f0a04SGreg Roach		'У'   => array('1', '0', '', ''),
443a25f0a04SGreg Roach		'Ф'   => array('0', '7', '7', '7'),
444a25f0a04SGreg Roach		'Х'   => array('0', '5', '5', '5'),
445a25f0a04SGreg Roach		'Ц'   => array('0', '4', '4', '4'),
446a25f0a04SGreg Roach		'Ч'   => array('0', '4', '4', '4'),
447a25f0a04SGreg Roach		'Ш'   => array('0', '4', '4', '4'),
448a25f0a04SGreg Roach		'Щ'   => array('0', '2', '4', '4'),
449a25f0a04SGreg Roach		'Ъ'   => array('0', '', '', ''),
450a25f0a04SGreg Roach		'Ы'   => array('0', '1', '', ''),
451a25f0a04SGreg Roach		'Ь'   => array('0', '', '', ''),
452a25f0a04SGreg Roach		'Э'   => array('1', '0', '', ''),
453a25f0a04SGreg Roach		'Ю'   => array('0', '1', '', ''),
454a25f0a04SGreg Roach		'Я'   => array('0', '1', '', ''),
455a25f0a04SGreg Roach		// Greek alphabet
456a25f0a04SGreg Roach		'Α'   => array('1', '0', '', ''),
457a25f0a04SGreg Roach		'Ά'   => array('1', '0', '', ''),
458a25f0a04SGreg Roach		'ΑΙ' => array('1', '0', '1', ''),
459a25f0a04SGreg Roach		'ΑΥ' => array('1', '0', '1', ''),
460a25f0a04SGreg Roach		'Β'   => array('0', '7', '7', '7'),
461a25f0a04SGreg Roach		'Γ'   => array('0', '5', '5', '5'),
462a25f0a04SGreg Roach		'Δ'   => array('0', '3', '3', '3'),
463a25f0a04SGreg Roach		'Ε'   => array('1', '0', '', ''),
464a25f0a04SGreg Roach		'Έ'   => array('1', '0', '', ''),
465a25f0a04SGreg Roach		'ΕΙ' => array('1', '0', '1', ''),
466a25f0a04SGreg Roach		'ΕΥ' => array('1', '1', '1', ''),
467a25f0a04SGreg Roach		'Ζ'   => array('0', '4', '4', '4'),
468a25f0a04SGreg Roach		'Η'   => array('1', '0', '', ''),
469a25f0a04SGreg Roach		'Ή'   => array('1', '0', '', ''),
470a25f0a04SGreg Roach		'Θ'   => array('0', '3', '3', '3'),
471a25f0a04SGreg Roach		'Ι'   => array('1', '0', '', ''),
472a25f0a04SGreg Roach		'Ί'   => array('1', '0', '', ''),
473a25f0a04SGreg Roach		'Ϊ'   => array('1', '0', '', ''),
474a25f0a04SGreg Roach		'ΐ'   => array('1', '0', '', ''),
475a25f0a04SGreg Roach		'Κ'   => array('0', '5', '5', '5'),
476a25f0a04SGreg Roach		'Λ'   => array('0', '8', '8', '8'),
477a25f0a04SGreg Roach		'Μ'   => array('0', '6', '6', '6'),
478a25f0a04SGreg Roach		'ΜΠ' => array('0', '7', '7', '7'),
479a25f0a04SGreg Roach		'Ν'   => array('0', '6', '6', '6'),
480a25f0a04SGreg Roach		'ΝΤ' => array('0', '3', '3', '3'),
481a25f0a04SGreg Roach		'Ξ'   => array('0', '5', '54', '54'),
482a25f0a04SGreg Roach		'Ο'   => array('1', '0', '', ''),
483a25f0a04SGreg Roach		'Ό'   => array('1', '0', '', ''),
484a25f0a04SGreg Roach		'ΟΙ' => array('1', '0', '1', ''),
485a25f0a04SGreg Roach		'ΟΥ' => array('1', '0', '1', ''),
486a25f0a04SGreg Roach		'Π'   => array('0', '7', '7', '7'),
487a25f0a04SGreg Roach		'Ρ'   => array('0', '9', '9', '9'),
488a25f0a04SGreg Roach		'Σ'   => array('0', '4', '4', '4'),
489a25f0a04SGreg Roach		'ς'   => array('0', '', '', '4'),
490a25f0a04SGreg Roach		'Τ'   => array('0', '3', '3', '3'),
491a25f0a04SGreg Roach		'ΤΖ' => array('0', '4', '4', '4'),
492a25f0a04SGreg Roach		'ΤΣ' => array('0', '4', '4', '4'),
493a25f0a04SGreg Roach		'Υ'   => array('1', '1', '', ''),
494a25f0a04SGreg Roach		'Ύ'   => array('1', '1', '', ''),
495a25f0a04SGreg Roach		'Ϋ'   => array('1', '1', '', ''),
496a25f0a04SGreg Roach		'ΰ'   => array('1', '1', '', ''),
497a25f0a04SGreg Roach		'ΥΚ' => array('1', '5', '5', '5'),
498a25f0a04SGreg Roach		'ΥΥ' => array('1', '65', '65', '65'),
499a25f0a04SGreg Roach		'Φ'   => array('0', '7', '7', '7'),
500a25f0a04SGreg Roach		'Χ'   => array('0', '5', '5', '5'),
501a25f0a04SGreg Roach		'Ψ'   => array('0', '7', '7', '7'),
502a25f0a04SGreg Roach		'Ω'   => array('1', '0', '', ''),
503a25f0a04SGreg Roach		'Ώ'   => array('1', '0', '', ''),
504a25f0a04SGreg Roach		// Hebrew alphabet
505a25f0a04SGreg Roach		'א'     => array('1', '0', '', ''),
506a25f0a04SGreg Roach		'או'   => array('1', '0', '7', ''),
507a25f0a04SGreg Roach		'אג'   => array('1', '4', '4', '4', '5', '5', '5', '34', '34', '34'),
508a25f0a04SGreg Roach		'בב'   => array('0', '7', '7', '7', '77', '77', '77'),
509a25f0a04SGreg Roach		'ב'     => array('0', '7', '7', '7'),
510a25f0a04SGreg Roach		'גג'   => array('0', '4', '4', '4', '5', '5', '5', '45', '45', '45', '55', '55', '55', '54', '54', '54'),
511a25f0a04SGreg Roach		'גד'   => array('0', '43', '43', '43', '53', '53', '53'),
512a25f0a04SGreg Roach		'גה'   => array('0', '45', '45', '45', '55', '55', '55'),
513a25f0a04SGreg Roach		'גז'   => array('0', '44', '44', '44', '45', '45', '45'),
514a25f0a04SGreg Roach		'גח'   => array('0', '45', '45', '45', '55', '55', '55'),
515a25f0a04SGreg Roach		'גכ'   => array('0', '45', '45', '45', '55', '55', '55'),
516a25f0a04SGreg Roach		'גך'   => array('0', '45', '45', '45', '55', '55', '55'),
517a25f0a04SGreg Roach		'גצ'   => array('0', '44', '44', '44', '45', '45', '45'),
518a25f0a04SGreg Roach		'גץ'   => array('0', '44', '44', '44', '45', '45', '45'),
519a25f0a04SGreg Roach		'גק'   => array('0', '45', '45', '45', '54', '54', '54'),
520a25f0a04SGreg Roach		'גש'   => array('0', '44', '44', '44', '54', '54', '54'),
521a25f0a04SGreg Roach		'גת'   => array('0', '43', '43', '43', '53', '53', '53'),
522a25f0a04SGreg Roach		'ג'     => array('0', '4', '4', '4', '5', '5', '5'),
523a25f0a04SGreg Roach		'דז'   => array('0', '4', '4', '4'),
524a25f0a04SGreg Roach		'דד'   => array('0', '3', '3', '3', '33', '33', '33'),
525a25f0a04SGreg Roach		'דט'   => array('0', '33', '33', '33'),
526a25f0a04SGreg Roach		'דש'   => array('0', '4', '4', '4'),
527a25f0a04SGreg Roach		'דצ'   => array('0', '4', '4', '4'),
528a25f0a04SGreg Roach		'דץ'   => array('0', '4', '4', '4'),
529a25f0a04SGreg Roach		'ד'     => array('0', '3', '3', '3'),
530a25f0a04SGreg Roach		'הג'   => array('0', '54', '54', '54', '55', '55', '55'),
531a25f0a04SGreg Roach		'הכ'   => array('0', '55', '55', '55'),
532a25f0a04SGreg Roach		'הח'   => array('0', '55', '55', '55'),
533a25f0a04SGreg Roach		'הק'   => array('0', '55', '55', '55', '5', '5', '5'),
534a25f0a04SGreg Roach		'הה'   => array('0', '5', '5', '', '55', '55', ''),
535a25f0a04SGreg Roach		'ה'     => array('0', '5', '5', ''),
536a25f0a04SGreg Roach		'וי'   => array('1', '', '', '', '7', '7', '7'),
537a25f0a04SGreg Roach		'ו'     => array('1', '7', '7', '7', '7', '', ''),
538a25f0a04SGreg Roach		'וו'   => array('1', '7', '7', '7', '7', '', ''),
539a25f0a04SGreg Roach		'וופ' => array('1', '7', '7', '7', '77', '77', '77'),
540a25f0a04SGreg Roach		'זש'   => array('0', '4', '4', '4', '44', '44', '44'),
541a25f0a04SGreg Roach		'זדז' => array('0', '2', '4', '4'),
542a25f0a04SGreg Roach		'ז'     => array('0', '4', '4', '4'),
543a25f0a04SGreg Roach		'זג'   => array('0', '44', '44', '44', '45', '45', '45'),
544a25f0a04SGreg Roach		'זז'   => array('0', '4', '4', '4', '44', '44', '44'),
545a25f0a04SGreg Roach		'זס'   => array('0', '44', '44', '44'),
546a25f0a04SGreg Roach		'זצ'   => array('0', '44', '44', '44'),
547a25f0a04SGreg Roach		'זץ'   => array('0', '44', '44', '44'),
548a25f0a04SGreg Roach		'חג'   => array('0', '54', '54', '54', '53', '53', '53'),
549a25f0a04SGreg Roach		'חח'   => array('0', '5', '5', '5', '55', '55', '55'),
550a25f0a04SGreg Roach		'חק'   => array('0', '55', '55', '55', '5', '5', '5'),
551a25f0a04SGreg Roach		'חכ'   => array('0', '45', '45', '45', '55', '55', '55'),
552a25f0a04SGreg Roach		'חס'   => array('0', '5', '54', '54'),
553a25f0a04SGreg Roach		'חש'   => array('0', '5', '54', '54'),
554a25f0a04SGreg Roach		'ח'     => array('0', '5', '5', '5'),
555a25f0a04SGreg Roach		'טש'   => array('0', '4', '4', '4'),
556a25f0a04SGreg Roach		'טד'   => array('0', '33', '33', '33'),
557a25f0a04SGreg Roach		'טי'   => array('0', '3', '3', '3', '4', '4', '4', '3', '3', '34'),
558a25f0a04SGreg Roach		'טת'   => array('0', '33', '33', '33'),
559a25f0a04SGreg Roach		'טט'   => array('0', '3', '3', '3', '33', '33', '33'),
560a25f0a04SGreg Roach		'ט'     => array('0', '3', '3', '3'),
561a25f0a04SGreg Roach		'י'     => array('1', '1', '', ''),
562a25f0a04SGreg Roach		'יא'   => array('1', '1', '', '', '1', '1', '1'),
563a25f0a04SGreg Roach		'כג'   => array('0', '55', '55', '55', '54', '54', '54'),
564a25f0a04SGreg Roach		'כש'   => array('0', '5', '54', '54'),
565a25f0a04SGreg Roach		'כס'   => array('0', '5', '54', '54'),
566a25f0a04SGreg Roach		'ככ'   => array('0', '5', '5', '5', '55', '55', '55'),
567a25f0a04SGreg Roach		'כך'   => array('0', '5', '5', '5', '55', '55', '55'),
568a25f0a04SGreg Roach		'כ'     => array('0', '5', '5', '5'),
569a25f0a04SGreg Roach		'כח'   => array('0', '55', '55', '55', '5', '5', '5'),
570a25f0a04SGreg Roach		'ך'     => array('0', '', '5', '5'),
571a25f0a04SGreg Roach		'ל'     => array('0', '8', '8', '8'),
572a25f0a04SGreg Roach		'לל'   => array('0', '88', '88', '88', '8', '8', '8'),
573a25f0a04SGreg Roach		'מנ'   => array('0', '66', '66', '66'),
574a25f0a04SGreg Roach		'מן'   => array('0', '66', '66', '66'),
575a25f0a04SGreg Roach		'ממ'   => array('0', '6', '6', '6', '66', '66', '66'),
576a25f0a04SGreg Roach		'מם'   => array('0', '6', '6', '6', '66', '66', '66'),
577a25f0a04SGreg Roach		'מ'     => array('0', '6', '6', '6'),
578a25f0a04SGreg Roach		'ם'     => array('0', '', '6', '6'),
579a25f0a04SGreg Roach		'נמ'   => array('0', '66', '66', '66'),
580a25f0a04SGreg Roach		'נם'   => array('0', '66', '66', '66'),
581a25f0a04SGreg Roach		'ננ'   => array('0', '6', '6', '6', '66', '66', '66'),
582a25f0a04SGreg Roach		'נן'   => array('0', '6', '6', '6', '66', '66', '66'),
583a25f0a04SGreg Roach		'נ'     => array('0', '6', '6', '6'),
584a25f0a04SGreg Roach		'ן'     => array('0', '', '6', '6'),
585a25f0a04SGreg Roach		'סתש' => array('0', '2', '4', '4'),
586a25f0a04SGreg Roach		'סתז' => array('0', '2', '4', '4'),
587a25f0a04SGreg Roach		'סטז' => array('0', '2', '4', '4'),
588a25f0a04SGreg Roach		'סטש' => array('0', '2', '4', '4'),
589a25f0a04SGreg Roach		'סצד' => array('0', '2', '4', '4'),
590a25f0a04SGreg Roach		'סט'   => array('0', '2', '4', '4', '43', '43', '43'),
591a25f0a04SGreg Roach		'סת'   => array('0', '2', '4', '4', '43', '43', '43'),
592a25f0a04SGreg Roach		'סג'   => array('0', '44', '44', '44', '4', '4', '4'),
593a25f0a04SGreg Roach		'סס'   => array('0', '4', '4', '4', '44', '44', '44'),
594a25f0a04SGreg Roach		'סצ'   => array('0', '44', '44', '44'),
595a25f0a04SGreg Roach		'סץ'   => array('0', '44', '44', '44'),
596a25f0a04SGreg Roach		'סז'   => array('0', '44', '44', '44'),
597a25f0a04SGreg Roach		'סש'   => array('0', '44', '44', '44'),
598a25f0a04SGreg Roach		'ס'     => array('0', '4', '4', '4'),
599a25f0a04SGreg Roach		'ע'     => array('1', '0', '', ''),
600a25f0a04SGreg Roach		'פב'   => array('0', '7', '7', '7', '77', '77', '77'),
601a25f0a04SGreg Roach		'פוו' => array('0', '7', '7', '7', '77', '77', '77'),
602a25f0a04SGreg Roach		'פפ'   => array('0', '7', '7', '7', '77', '77', '77'),
603a25f0a04SGreg Roach		'פף'   => array('0', '7', '7', '7', '77', '77', '77'),
604a25f0a04SGreg Roach		'פ'     => array('0', '7', '7', '7'),
605a25f0a04SGreg Roach		'ף'     => array('0', '', '7', '7'),
606a25f0a04SGreg Roach		'צג'   => array('0', '44', '44', '44', '45', '45', '45'),
607a25f0a04SGreg Roach		'צז'   => array('0', '44', '44', '44'),
608a25f0a04SGreg Roach		'צס'   => array('0', '44', '44', '44'),
609a25f0a04SGreg Roach		'צצ'   => array('0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54', '45', '45', '45'),
610a25f0a04SGreg Roach		'צץ'   => array('0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54'),
611a25f0a04SGreg Roach		'צש'   => array('0', '44', '44', '44', '4', '4', '4', '5', '5', '5'),
612a25f0a04SGreg Roach		'צ'     => array('0', '4', '4', '4', '5', '5', '5'),
613a25f0a04SGreg Roach		'ץ'     => array('0', '', '4', '4'),
614a25f0a04SGreg Roach		'קה'   => array('0', '55', '55', '5'),
615a25f0a04SGreg Roach		'קס'   => array('0', '5', '54', '54'),
616a25f0a04SGreg Roach		'קש'   => array('0', '5', '54', '54'),
617a25f0a04SGreg Roach		'קק'   => array('0', '5', '5', '5', '55', '55', '55'),
618a25f0a04SGreg Roach		'קח'   => array('0', '55', '55', '55'),
619a25f0a04SGreg Roach		'קכ'   => array('0', '55', '55', '55'),
620a25f0a04SGreg Roach		'קך'   => array('0', '55', '55', '55'),
621a25f0a04SGreg Roach		'קג'   => array('0', '55', '55', '55', '54', '54', '54'),
622a25f0a04SGreg Roach		'ק'     => array('0', '5', '5', '5'),
623a25f0a04SGreg Roach		'רר'   => array('0', '99', '99', '99', '9', '9', '9'),
624a25f0a04SGreg Roach		'ר'     => array('0', '9', '9', '9'),
625a25f0a04SGreg Roach		'שטז' => array('0', '2', '4', '4'),
626a25f0a04SGreg Roach		'שתש' => array('0', '2', '4', '4'),
627a25f0a04SGreg Roach		'שתז' => array('0', '2', '4', '4'),
628a25f0a04SGreg Roach		'שטש' => array('0', '2', '4', '4'),
629a25f0a04SGreg Roach		'שד'   => array('0', '2', '43', '43'),
630a25f0a04SGreg Roach		'שז'   => array('0', '44', '44', '44'),
631a25f0a04SGreg Roach		'שס'   => array('0', '44', '44', '44'),
632a25f0a04SGreg Roach		'שת'   => array('0', '2', '43', '43'),
633a25f0a04SGreg Roach		'שג'   => array('0', '4', '4', '4', '44', '44', '44', '4', '43', '43'),
634a25f0a04SGreg Roach		'שט'   => array('0', '2', '43', '43', '44', '44', '44'),
635a25f0a04SGreg Roach		'שצ'   => array('0', '44', '44', '44', '45', '45', '45'),
636a25f0a04SGreg Roach		'שץ'   => array('0', '44', '', '44', '45', '', '45'),
637a25f0a04SGreg Roach		'שש'   => array('0', '4', '4', '4', '44', '44', '44'),
638a25f0a04SGreg Roach		'ש'     => array('0', '4', '4', '4'),
639a25f0a04SGreg Roach		'תג'   => array('0', '34', '34', '34'),
640a25f0a04SGreg Roach		'תז'   => array('0', '34', '34', '34'),
641a25f0a04SGreg Roach		'תש'   => array('0', '4', '4', '4'),
642a25f0a04SGreg Roach		'תת'   => array('0', '3', '3', '3', '4', '4', '4', '33', '33', '33', '44', '44', '44', '34', '34', '34', '43', '43', '43'),
643a25f0a04SGreg Roach		'ת'     => array('0', '3', '3', '3', '4', '4', '4'),
644a25f0a04SGreg Roach		// Arabic alphabet
645a25f0a04SGreg Roach		'ا'   => array('1', '0', '', ''),
646a25f0a04SGreg Roach		'ب'   => array('0', '7', '7', '7'),
647a25f0a04SGreg Roach		'ت'   => array('0', '3', '3', '3'),
648a25f0a04SGreg Roach		'ث'   => array('0', '3', '3', '3'),
649a25f0a04SGreg Roach		'ج'   => array('0', '4', '4', '4'),
650a25f0a04SGreg Roach		'ح'   => array('0', '5', '5', '5'),
651a25f0a04SGreg Roach		'خ'   => array('0', '5', '5', '5'),
652a25f0a04SGreg Roach		'د'   => array('0', '3', '3', '3'),
653a25f0a04SGreg Roach		'ذ'   => array('0', '3', '3', '3'),
654a25f0a04SGreg Roach		'ر'   => array('0', '9', '9', '9'),
655a25f0a04SGreg Roach		'ز'   => array('0', '4', '4', '4'),
656a25f0a04SGreg Roach		'س'   => array('0', '4', '4', '4'),
657a25f0a04SGreg Roach		'ش'   => array('0', '4', '4', '4'),
658a25f0a04SGreg Roach		'ص'   => array('0', '4', '4', '4'),
659a25f0a04SGreg Roach		'ض'   => array('0', '3', '3', '3'),
660a25f0a04SGreg Roach		'ط'   => array('0', '3', '3', '3'),
661a25f0a04SGreg Roach		'ظ'   => array('0', '4', '4', '4'),
662a25f0a04SGreg Roach		'ع'   => array('1', '0', '', ''),
663a25f0a04SGreg Roach		'غ'   => array('0', '0', '', ''),
664a25f0a04SGreg Roach		'ف'   => array('0', '7', '7', '7'),
665a25f0a04SGreg Roach		'ق'   => array('0', '5', '5', '5'),
666a25f0a04SGreg Roach		'ك'   => array('0', '5', '5', '5'),
667a25f0a04SGreg Roach		'ل'   => array('0', '8', '8', '8'),
668a25f0a04SGreg Roach		'لا' => array('0', '8', '8', '8'),
669a25f0a04SGreg Roach		'م'   => array('0', '6', '6', '6'),
670a25f0a04SGreg Roach		'ن'   => array('0', '6', '6', '6'),
671a25f0a04SGreg Roach		'هن' => array('0', '66', '66', '66'),
672a25f0a04SGreg Roach		'ه'   => array('0', '5', '5', ''),
673a25f0a04SGreg Roach		'و'   => array('1', '', '', '', '7', '', ''),
674a25f0a04SGreg Roach		'ي'   => array('0', '1', '', ''),
675a25f0a04SGreg Roach		'آ'   => array('0', '1', '', ''),
676a25f0a04SGreg Roach		'ة'   => array('0', '', '', '3'),
677a25f0a04SGreg Roach		'ی'   => array('0', '1', '', ''),
678a25f0a04SGreg Roach		'ى'   => array('1', '1', '', ''),
679a25f0a04SGreg Roach	);
680a25f0a04SGreg Roach
681a25f0a04SGreg Roach	/**
682a25f0a04SGreg Roach	 * @param string $name
683a25f0a04SGreg Roach	 *
684a25f0a04SGreg Roach	 * @return string[] List of possible DM codes for the word.
685a25f0a04SGreg Roach	 */
686a25f0a04SGreg Roach	private static function daitchMokotoffWord($name) {
687a25f0a04SGreg Roach		// Apply special transformation rules to the input string
688a25f0a04SGreg Roach		$name = I18N::strtoupper($name);
689a25f0a04SGreg Roach		foreach (self::$transformNameTable as $transformRule) {
690a25f0a04SGreg Roach			$name = str_replace($transformRule[0], $transformRule[1], $name);
691a25f0a04SGreg Roach		}
692a25f0a04SGreg Roach
693a25f0a04SGreg Roach		// Initialize
694a25f0a04SGreg Roach		$name_script = I18N::textScript($name);
695a25f0a04SGreg Roach		$noVowels    = ($name_script == 'Hebr' || $name_script == 'Arab');
696a25f0a04SGreg Roach
697a25f0a04SGreg Roach		$lastPos         = strlen($name) - 1;
698a25f0a04SGreg Roach		$currPos         = 0;
699a25f0a04SGreg Roach		$state           = 1; // 1: start of input string, 2: before vowel, 3: other
700a25f0a04SGreg Roach		$result          = array(); // accumulate complete 6-digit D-M codes here
701a25f0a04SGreg Roach		$partialResult   = array(); // accumulate incomplete D-M codes here
702a25f0a04SGreg Roach		$partialResult[] = array('!'); // initialize 1st partial result  ('!' stops "duplicate sound" check)
703a25f0a04SGreg Roach
704a25f0a04SGreg Roach		// Loop through the input string.
705a25f0a04SGreg Roach		// Stop when the string is exhausted or when no more partial results remain
706a25f0a04SGreg Roach		while (count($partialResult) !== 0 && $currPos <= $lastPos) {
707a25f0a04SGreg Roach			// Find the DM coding table entry for the chunk at the current position
708a25f0a04SGreg Roach			$thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
709a25f0a04SGreg Roach			while ($thisEntry != '') {
710a25f0a04SGreg Roach				if (isset(self::$dmsounds[$thisEntry])) {
711a25f0a04SGreg Roach					break;
712a25f0a04SGreg Roach				}
713a25f0a04SGreg Roach				$thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk
714a25f0a04SGreg Roach			}
715a25f0a04SGreg Roach			if ($thisEntry === '') {
716a25f0a04SGreg Roach				$currPos++; // Not in table: advance pointer to next byte
717a25f0a04SGreg Roach				continue; // and try again
718a25f0a04SGreg Roach			}
719a25f0a04SGreg Roach
720a25f0a04SGreg Roach			$soundTableEntry = self::$dmsounds[$thisEntry];
721a25f0a04SGreg Roach			$workingResult   = $partialResult;
722a25f0a04SGreg Roach			$partialResult   = array();
723a25f0a04SGreg Roach			$currPos += strlen($thisEntry);
724a25f0a04SGreg Roach
725a25f0a04SGreg Roach			// Not at beginning of input string
726a25f0a04SGreg Roach			if ($state != 1) {
727a25f0a04SGreg Roach				if ($currPos <= $lastPos) {
728a25f0a04SGreg Roach					// Determine whether the next chunk is a vowel
729a25f0a04SGreg Roach					$nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
730a25f0a04SGreg Roach					while ($nextEntry != '') {
731a25f0a04SGreg Roach						if (isset(self::$dmsounds[$nextEntry])) {
732a25f0a04SGreg Roach							break;
733a25f0a04SGreg Roach						}
734a25f0a04SGreg Roach						$nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk
735a25f0a04SGreg Roach					}
736a25f0a04SGreg Roach				} else {
737a25f0a04SGreg Roach					$nextEntry = '';
738a25f0a04SGreg Roach				}
739a25f0a04SGreg Roach				if ($nextEntry != '' && self::$dmsounds[$nextEntry][0] != '0') {
740a25f0a04SGreg Roach					$state = 2;
741a25f0a04SGreg Roach				} else {
742a25f0a04SGreg Roach					// Next chunk is a vowel
743a25f0a04SGreg Roach					$state = 3;
744a25f0a04SGreg Roach				}
745a25f0a04SGreg Roach			}
746a25f0a04SGreg Roach
747a25f0a04SGreg Roach			while ($state < count($soundTableEntry)) {
748a25f0a04SGreg Roach				// empty means 'ignore this sound in this state'
749a25f0a04SGreg Roach				if ($soundTableEntry[$state] == '') {
750a25f0a04SGreg Roach					foreach ($workingResult as $workingEntry) {
751a25f0a04SGreg Roach						$tempEntry = $workingEntry;
752a25f0a04SGreg Roach						$tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles'
753a25f0a04SGreg Roach						$partialResult[] = $tempEntry;
754a25f0a04SGreg Roach					}
755a25f0a04SGreg Roach				} else {
756a25f0a04SGreg Roach					foreach ($workingResult as $workingEntry) {
757a25f0a04SGreg Roach						if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) {
758a25f0a04SGreg Roach							// Incoming sound isn't a duplicate of the previous sound
759a25f0a04SGreg Roach							$workingEntry[] = $soundTableEntry[$state];
760a25f0a04SGreg Roach						} else {
761a25f0a04SGreg Roach							// Incoming sound is a duplicate of the previous sound
762a25f0a04SGreg Roach							// For Hebrew and Arabic, we need to create a pair of D-M sound codes,
763a25f0a04SGreg Roach							// one of the pair with only a single occurrence of the duplicate sound,
764a25f0a04SGreg Roach							// the other with both occurrences
765a25f0a04SGreg Roach							if ($noVowels) {
766a25f0a04SGreg Roach								$workingEntry[] = $soundTableEntry[$state];
767a25f0a04SGreg Roach							}
768a25f0a04SGreg Roach						}
769a25f0a04SGreg Roach						if (count($workingEntry) < 7) {
770a25f0a04SGreg Roach							$partialResult[] = $workingEntry;
771a25f0a04SGreg Roach						} else {
772a25f0a04SGreg Roach							// This is the 6th code in the sequence
773a25f0a04SGreg Roach							// We're looking for 7 entries because the first is '!' and doesn't count
774a25f0a04SGreg Roach							$tempResult = str_replace('!', '', implode('', $workingEntry));
775a25f0a04SGreg Roach							// Only return codes from recognisable sounds
776a25f0a04SGreg Roach							if ($tempResult) {
777a25f0a04SGreg Roach								$result[] = substr($tempResult . '000000', 0, 6);
778a25f0a04SGreg Roach							}
779a25f0a04SGreg Roach						}
780a25f0a04SGreg Roach					}
781a25f0a04SGreg Roach				}
782a25f0a04SGreg Roach				$state = $state + 3; // Advance to next triplet while keeping the same basic state
783a25f0a04SGreg Roach			}
784a25f0a04SGreg Roach		}
785a25f0a04SGreg Roach
786a25f0a04SGreg Roach		// Zero-fill and copy all remaining partial results
787a25f0a04SGreg Roach		foreach ($partialResult as $workingEntry) {
788a25f0a04SGreg Roach			$tempResult = str_replace('!', '', implode('', $workingEntry));
789a25f0a04SGreg Roach			// Only return codes from recognisable sounds
790a25f0a04SGreg Roach			if ($tempResult) {
791a25f0a04SGreg Roach				$result[] = substr($tempResult . '000000', 0, 6);
792a25f0a04SGreg Roach			}
793a25f0a04SGreg Roach		}
794a25f0a04SGreg Roach
795a25f0a04SGreg Roach		return $result;
796a25f0a04SGreg Roach	}
797a25f0a04SGreg Roach}
798