xref: /webtrees/app/Soundex.php (revision dd04c183d8beed05be2226b30b7dda485ea4538a)
1a25f0a04SGreg Roach<?php
2*dd04c183SGreg Roachnamespace Fisharebest\Webtrees;
3a25f0a04SGreg Roach
4a25f0a04SGreg Roach/**
5a25f0a04SGreg Roach * webtrees: online genealogy
6a25f0a04SGreg Roach * Copyright (C) 2015 webtrees development team
7a25f0a04SGreg Roach * This program is free software: you can redistribute it and/or modify
8a25f0a04SGreg Roach * it under the terms of the GNU General Public License as published by
9a25f0a04SGreg Roach * the Free Software Foundation, either version 3 of the License, or
10a25f0a04SGreg Roach * (at your option) any later version.
11a25f0a04SGreg Roach * This program is distributed in the hope that it will be useful,
12a25f0a04SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
13a25f0a04SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14a25f0a04SGreg Roach * GNU General Public License for more details.
15a25f0a04SGreg Roach * You should have received a copy of the GNU General Public License
16a25f0a04SGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>.
17a25f0a04SGreg Roach */
18a25f0a04SGreg Roach
19a25f0a04SGreg Roach/**
20a25f0a04SGreg Roach * Class Soundex Functions for phonetic matching of strings
21a25f0a04SGreg Roach */
22a25f0a04SGreg Roachclass Soundex {
23a25f0a04SGreg Roach	/**
24a25f0a04SGreg Roach	 * @return string[]
25a25f0a04SGreg Roach	 */
26a25f0a04SGreg Roach	public static function getAlgorithms() {
27a25f0a04SGreg Roach		return array(
28a25f0a04SGreg Roach			'std' => /* I18N: http://en.wikipedia.org/wiki/Soundex */ I18N::translate('Russell'),
29a25f0a04SGreg Roach			'dm'  => /* I18N: http://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */ I18N::translate('Daitch-Mokotoff'),
30a25f0a04SGreg Roach		);
31a25f0a04SGreg Roach	}
32a25f0a04SGreg Roach
33a25f0a04SGreg Roach	/**
34a25f0a04SGreg Roach	 * @param string $algorithm
35a25f0a04SGreg Roach	 * @param string $text
36a25f0a04SGreg Roach	 *
37a25f0a04SGreg Roach	 * @return string
38a25f0a04SGreg Roach	 */
39a25f0a04SGreg Roach	public static function soundex($algorithm, $text) {
40a25f0a04SGreg Roach		switch ($algorithm) {
41a25f0a04SGreg Roach		case 'std':
42a25f0a04SGreg Roach			return self::russell($text);
43a25f0a04SGreg Roach		case 'dm':
44a25f0a04SGreg Roach			return self::daitchMokotoff($text);
45a25f0a04SGreg Roach		default:
46a25f0a04SGreg Roach			throw new \InvalidArgumentException('Bad argument to Soundex::soundex()');
47a25f0a04SGreg Roach		}
48a25f0a04SGreg Roach	}
49a25f0a04SGreg Roach
50a25f0a04SGreg Roach	/**
51a25f0a04SGreg Roach	 * Is there a match between two soundex codes?
52a25f0a04SGreg Roach	 *
53a25f0a04SGreg Roach	 * @param string $soundex1
54a25f0a04SGreg Roach	 * @param string $soundex2
55a25f0a04SGreg Roach	 *
56a25f0a04SGreg Roach	 * @return boolean
57a25f0a04SGreg Roach	 */
58a25f0a04SGreg Roach	public static function compare($soundex1, $soundex2) {
59a25f0a04SGreg Roach		if ($soundex1 && $soundex2) {
60a25f0a04SGreg Roach			foreach (explode(':', $soundex1) as $code) {
61a25f0a04SGreg Roach				if (strpos($soundex2, $code) !== false) {
62a25f0a04SGreg Roach					return true;
63a25f0a04SGreg Roach				}
64a25f0a04SGreg Roach			}
65a25f0a04SGreg Roach		}
66a25f0a04SGreg Roach
67a25f0a04SGreg Roach		return false;
68a25f0a04SGreg Roach	}
69a25f0a04SGreg Roach
70a25f0a04SGreg Roach	/**
71a25f0a04SGreg Roach	 * Generate Russell soundex codes for a given text.
72a25f0a04SGreg Roach	 *
73a25f0a04SGreg Roach	 * @param $text
74a25f0a04SGreg Roach	 *
75a25f0a04SGreg Roach	 * @return null|string
76a25f0a04SGreg Roach	 */
77a25f0a04SGreg Roach	public static function russell($text) {
78a25f0a04SGreg Roach		$words         = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY);
79a25f0a04SGreg Roach		$soundex_array = array();
80a25f0a04SGreg Roach		foreach ($words as $word) {
81a25f0a04SGreg Roach			$soundex = soundex($word);
82a25f0a04SGreg Roach			// Only return codes from recognisable sounds
83a25f0a04SGreg Roach			if ($soundex !== '0000') {
84a25f0a04SGreg Roach				$soundex_array[] = $soundex;
85a25f0a04SGreg Roach			}
86a25f0a04SGreg Roach		}
87a25f0a04SGreg Roach		// Combine words, e.g. “New York” as “Newyork”
88a25f0a04SGreg Roach		if (count($words) > 1) {
89a25f0a04SGreg Roach			$soundex_array[] = soundex(strtr($text, ' ', ''));
90a25f0a04SGreg Roach		}
91a25f0a04SGreg Roach		// A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters)
92a25f0a04SGreg Roach		$soundex_array = array_slice(array_unique($soundex_array), 0, 51);
93a25f0a04SGreg Roach
94a25f0a04SGreg Roach		if ($soundex_array) {
95a25f0a04SGreg Roach			return implode(':', $soundex_array);
96a25f0a04SGreg Roach		} else {
97a25f0a04SGreg Roach			return null;
98a25f0a04SGreg Roach		}
99a25f0a04SGreg Roach	}
100a25f0a04SGreg Roach
101a25f0a04SGreg Roach	/**
102a25f0a04SGreg Roach	 * Generate Daitch–Mokotoff soundex codes for a given text.
103a25f0a04SGreg Roach	 *
104a25f0a04SGreg Roach	 * @param $text
105a25f0a04SGreg Roach	 *
106a25f0a04SGreg Roach	 * @return null|string
107a25f0a04SGreg Roach	 */
108a25f0a04SGreg Roach	public static function daitchMokotoff($text) {
109a25f0a04SGreg Roach		$words         = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY);
110a25f0a04SGreg Roach		$soundex_array = array();
111a25f0a04SGreg Roach		foreach ($words as $word) {
112a25f0a04SGreg Roach			$soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word));
113a25f0a04SGreg Roach		}
114a25f0a04SGreg Roach		// Combine words, e.g. “New York” as “Newyork”
115a25f0a04SGreg Roach		if (count($words) > 1) {
116a25f0a04SGreg Roach			$soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(strtr($text, ' ', '')));
117a25f0a04SGreg Roach		}
118a25f0a04SGreg Roach		// A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters)
119a25f0a04SGreg Roach		$soundex_array = array_slice(array_unique($soundex_array), 0, 36);
120a25f0a04SGreg Roach
121a25f0a04SGreg Roach		if ($soundex_array) {
122a25f0a04SGreg Roach			return implode(':', $soundex_array);
123a25f0a04SGreg Roach		} else {
124a25f0a04SGreg Roach			return null;
125a25f0a04SGreg Roach		}
126a25f0a04SGreg Roach	}
127a25f0a04SGreg Roach
128a25f0a04SGreg Roach	// Determine the Daitch–Mokotoff Soundex code for a word
129a25f0a04SGreg Roach	// Original implementation by Gerry Kroll, and analysis by Meliza Amity
130a25f0a04SGreg Roach
131a25f0a04SGreg Roach	// Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!)
132a25f0a04SGreg Roach	const MAXCHAR = 7;
133a25f0a04SGreg Roach
134a25f0a04SGreg Roach	/**
135a25f0a04SGreg Roach	 * Name transformation arrays.
136a25f0a04SGreg Roach	 * Used to transform the Name string to simplify the "sounds like" table.
137a25f0a04SGreg Roach	 * This is especially useful in Hebrew.
138a25f0a04SGreg Roach	 *
139a25f0a04SGreg Roach	 * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text)
140a25f0a04SGreg Roach	 * function call to achieve the desired transformations.
141a25f0a04SGreg Roach	 *
142a25f0a04SGreg Roach	 * Note about the use of "\x01":
143a25f0a04SGreg Roach	 * This code, which can’t legitimately occur in the kind of text we're dealing with,
144a25f0a04SGreg Roach	 * is used as a place-holder so that conditional string replacements can be done.
145a25f0a04SGreg Roach	 *
146a25f0a04SGreg Roach	 * @var string[][]
147a25f0a04SGreg Roach	 */
148a25f0a04SGreg Roach	private static $transformNameTable = array(
149a25f0a04SGreg Roach		// Force Yiddish ligatures to be treated as separate letters
150a25f0a04SGreg Roach		array('װ', 'וו'),
151a25f0a04SGreg Roach		array('ײ', 'יי'),
152a25f0a04SGreg Roach		array('ױ', 'וי'),
153a25f0a04SGreg Roach		array('בו', 'בע'),
154a25f0a04SGreg Roach		array('פו', 'פע'),
155a25f0a04SGreg Roach		array('ומ', 'עמ'),
156a25f0a04SGreg Roach		array('ום', 'עם'),
157a25f0a04SGreg Roach		array('ונ', 'ענ'),
158a25f0a04SGreg Roach		array('ון', 'ען'),
159a25f0a04SGreg Roach		array('וו', 'ב'),
160a25f0a04SGreg Roach		array("\x01", ''),
161a25f0a04SGreg Roach		array('ייה$', "\x01ה"),
162a25f0a04SGreg Roach		array('ייע$', "\x01ע"),
163a25f0a04SGreg Roach		array('יי', 'ע'),
164a25f0a04SGreg Roach		array("\x01", 'יי'),
165a25f0a04SGreg Roach	);
166a25f0a04SGreg Roach
167a25f0a04SGreg Roach	/**
168a25f0a04SGreg Roach	 * The DM sound coding table is organized this way:
169a25f0a04SGreg Roach	 * key: a variable-length string that corresponds to the UTF-8 character sequence
170a25f0a04SGreg Roach	 * represented by the table entry.  Currently, that string can be up to 7
171a25f0a04SGreg Roach	 * bytes long.  This maximum length is defined by the value of global variable
172a25f0a04SGreg Roach	 * $maxchar.
173a25f0a04SGreg Roach	 *
174a25f0a04SGreg Roach	 * value: an array as follows:
175a25f0a04SGreg Roach	 * [0]:  zero if not a vowel
176a25f0a04SGreg Roach	 * [1]:  sound value when this string is at the beginning of the word
177a25f0a04SGreg Roach	 * [2]:  sound value when this string is followed by a vowel
178a25f0a04SGreg Roach	 * [3]:  sound value for other cases
179a25f0a04SGreg Roach	 * [1],[2],[3] can be repeated several times to create branches in the code
180a25f0a04SGreg Roach	 * an empty sound value means "ignore in this state"
181a25f0a04SGreg Roach	 *
182a25f0a04SGreg Roach	 * @var string[][]
183a25f0a04SGreg Roach	 */
184a25f0a04SGreg Roach	private static $dmsounds = array(
185a25f0a04SGreg Roach		'A' => array('1', '0', '', ''),
186a25f0a04SGreg Roach		'À' => array('1', '0', '', ''),
187a25f0a04SGreg Roach		'Á' => array('1', '0', '', ''),
188a25f0a04SGreg Roach		'Â' => array('1', '0', '', ''),
189a25f0a04SGreg Roach		'Ã' => array('1', '0', '', ''),
190a25f0a04SGreg Roach		'Ä' => array('1', '0', '1', '', '0', '', ''),
191a25f0a04SGreg Roach		'Å' => array('1', '0', '', ''),
192a25f0a04SGreg Roach		'Ă' => array('1', '0', '', ''),
193a25f0a04SGreg Roach		'Ą' => array('1', '', '', '', '', '', '6'),
194a25f0a04SGreg Roach		'Ạ' => array('1', '0', '', ''),
195a25f0a04SGreg Roach		'Ả' => array('1', '0', '', ''),
196a25f0a04SGreg Roach		'Ấ' => array('1', '0', '', ''),
197a25f0a04SGreg Roach		'Ầ' => array('1', '0', '', ''),
198a25f0a04SGreg Roach		'Ẩ' => array('1', '0', '', ''),
199a25f0a04SGreg Roach		'Ẫ' => array('1', '0', '', ''),
200a25f0a04SGreg Roach		'Ậ' => array('1', '0', '', ''),
201a25f0a04SGreg Roach		'Ắ' => array('1', '0', '', ''),
202a25f0a04SGreg Roach		'Ằ' => array('1', '0', '', ''),
203a25f0a04SGreg Roach		'Ẳ' => array('1', '0', '', ''),
204a25f0a04SGreg Roach		'Ẵ' => array('1', '0', '', ''),
205a25f0a04SGreg Roach		'Ặ' => array('1', '0', '', ''),
206a25f0a04SGreg Roach		'AE' => array('1', '0', '1', ''),
207a25f0a04SGreg Roach		'Æ' => array('1', '0', '1', ''),
208a25f0a04SGreg Roach		'AI' => array('1', '0', '1', ''),
209a25f0a04SGreg Roach		'AJ' => array('1', '0', '1', ''),
210a25f0a04SGreg Roach		'AU' => array('1', '0', '7', ''),
211a25f0a04SGreg Roach		'AV' => array('1', '0', '7', '', '7', '7', '7'),
212a25f0a04SGreg Roach		'ÄU' => array('1', '0', '1', ''),
213a25f0a04SGreg Roach		'AY' => array('1', '0', '1', ''),
214a25f0a04SGreg Roach		'B' => array('0', '7', '7', '7'),
215a25f0a04SGreg Roach		'C' => array('0', '5', '5', '5', '34', '4', '4'),
216a25f0a04SGreg Roach		'Ć' => array('0', '4', '4', '4'),
217a25f0a04SGreg Roach		'Č' => array('0', '4', '4', '4'),
218a25f0a04SGreg Roach		'Ç' => array('0', '4', '4', '4'),
219a25f0a04SGreg Roach		'CH' => array('0', '5', '5', '5', '34', '4', '4'),
220a25f0a04SGreg Roach		'CHS' => array('0', '5', '54', '54'),
221a25f0a04SGreg Roach		'CK' => array('0', '5', '5', '5', '45', '45', '45'),
222a25f0a04SGreg Roach		'CCS' => array('0', '4', '4', '4'),
223a25f0a04SGreg Roach		'CS' => array('0', '4', '4', '4'),
224a25f0a04SGreg Roach		'CSZ' => array('0', '4', '4', '4'),
225a25f0a04SGreg Roach		'CZ' => array('0', '4', '4', '4'),
226a25f0a04SGreg Roach		'CZS' => array('0', '4', '4', '4'),
227a25f0a04SGreg Roach		'D' => array('0', '3', '3', '3'),
228a25f0a04SGreg Roach		'Ď' => array('0', '3', '3', '3'),
229a25f0a04SGreg Roach		'Đ' => array('0', '3', '3', '3'),
230a25f0a04SGreg Roach		'DRS' => array('0', '4', '4', '4'),
231a25f0a04SGreg Roach		'DRZ' => array('0', '4', '4', '4'),
232a25f0a04SGreg Roach		'DS' => array('0', '4', '4', '4'),
233a25f0a04SGreg Roach		'DSH' => array('0', '4', '4', '4'),
234a25f0a04SGreg Roach		'DSZ' => array('0', '4', '4', '4'),
235a25f0a04SGreg Roach		'DT' => array('0', '3', '3', '3'),
236a25f0a04SGreg Roach		'DDZ' => array('0', '4', '4', '4'),
237a25f0a04SGreg Roach		'DDZS' => array('0', '4', '4', '4'),
238a25f0a04SGreg Roach		'DZ' => array('0', '4', '4', '4'),
239a25f0a04SGreg Roach		'DŹ' => array('0', '4', '4', '4'),
240a25f0a04SGreg Roach		'DŻ' => array('0', '4', '4', '4'),
241a25f0a04SGreg Roach		'DZH' => array('0', '4', '4', '4'),
242a25f0a04SGreg Roach		'DZS' => array('0', '4', '4', '4'),
243a25f0a04SGreg Roach		'E' => array('1', '0', '', ''),
244a25f0a04SGreg Roach		'È' => array('1', '0', '', ''),
245a25f0a04SGreg Roach		'É' => array('1', '0', '', ''),
246a25f0a04SGreg Roach		'Ê' => array('1', '0', '', ''),
247a25f0a04SGreg Roach		'Ë' => array('1', '0', '', ''),
248a25f0a04SGreg Roach		'Ĕ' => array('1', '0', '', ''),
249a25f0a04SGreg Roach		'Ė' => array('1', '0', '', ''),
250a25f0a04SGreg Roach		'Ę' => array('1', '', '', '6', '', '', ''),
251a25f0a04SGreg Roach		'Ẹ' => array('1', '0', '', ''),
252a25f0a04SGreg Roach		'Ẻ' => array('1', '0', '', ''),
253a25f0a04SGreg Roach		'Ẽ' => array('1', '0', '', ''),
254a25f0a04SGreg Roach		'Ế' => array('1', '0', '', ''),
255a25f0a04SGreg Roach		'Ề' => array('1', '0', '', ''),
256a25f0a04SGreg Roach		'Ể' => array('1', '0', '', ''),
257a25f0a04SGreg Roach		'Ễ' => array('1', '0', '', ''),
258a25f0a04SGreg Roach		'Ệ' => array('1', '0', '', ''),
259a25f0a04SGreg Roach		'EAU' => array('1', '0', '', ''),
260a25f0a04SGreg Roach		'EI' => array('1', '0', '1', ''),
261a25f0a04SGreg Roach		'EJ' => array('1', '0', '1', ''),
262a25f0a04SGreg Roach		'EU' => array('1', '1', '1', ''),
263a25f0a04SGreg Roach		'EY' => array('1', '0', '1', ''),
264a25f0a04SGreg Roach		'F' => array('0', '7', '7', '7'),
265a25f0a04SGreg Roach		'FB' => array('0', '7', '7', '7'),
266a25f0a04SGreg Roach		'G' => array('0', '5', '5', '5', '34', '4', '4'),
267a25f0a04SGreg Roach		'Ğ' => array('0', '', '', ''),
268a25f0a04SGreg Roach		'GGY' => array('0', '5', '5', '5'),
269a25f0a04SGreg Roach		'GY' => array('0', '5', '5', '5'),
270a25f0a04SGreg Roach		'H' => array('0', '5', '5', '', '5', '5', '5'),
271a25f0a04SGreg Roach		'I' => array('1', '0', '', ''),
272a25f0a04SGreg Roach		'Ì' => array('1', '0', '', ''),
273a25f0a04SGreg Roach		'Í' => array('1', '0', '', ''),
274a25f0a04SGreg Roach		'Î' => array('1', '0', '', ''),
275a25f0a04SGreg Roach		'Ï' => array('1', '0', '', ''),
276a25f0a04SGreg Roach		'Ĩ' => array('1', '0', '', ''),
277a25f0a04SGreg Roach		'Į' => array('1', '0', '', ''),
278a25f0a04SGreg Roach		'İ' => array('1', '0', '', ''),
279a25f0a04SGreg Roach		'Ỉ' => array('1', '0', '', ''),
280a25f0a04SGreg Roach		'Ị' => array('1', '0', '', ''),
281a25f0a04SGreg Roach		'IA' => array('1', '1', '', ''),
282a25f0a04SGreg Roach		'IE' => array('1', '1', '', ''),
283a25f0a04SGreg Roach		'IO' => array('1', '1', '', ''),
284a25f0a04SGreg Roach		'IU' => array('1', '1', '', ''),
285a25f0a04SGreg Roach		'J' => array('0', '1', '', '', '4', '4', '4', '5', '5', ''),
286a25f0a04SGreg Roach		'K' => array('0', '5', '5', '5'),
287a25f0a04SGreg Roach		'KH' => array('0', '5', '5', '5'),
288a25f0a04SGreg Roach		'KS' => array('0', '5', '54', '54'),
289a25f0a04SGreg Roach		'L' => array('0', '8', '8', '8'),
290a25f0a04SGreg Roach		'Ľ' => array('0', '8', '8', '8'),
291a25f0a04SGreg Roach		'Ĺ' => array('0', '8', '8', '8'),
292a25f0a04SGreg Roach		'Ł' => array('0', '7', '7', '7', '8', '8', '8'),
293a25f0a04SGreg Roach		'LL' => array('0', '8', '8', '8', '58', '8', '8', '1', '8', '8'),
294a25f0a04SGreg Roach		'LLY' => array('0', '8', '8', '8', '1', '8', '8'),
295a25f0a04SGreg Roach		'LY' => array('0', '8', '8', '8', '1', '8', '8'),
296a25f0a04SGreg Roach		'M' => array('0', '6', '6', '6'),
297a25f0a04SGreg Roach		'MĔ' => array('0', '66', '66', '66'),
298a25f0a04SGreg Roach		'MN' => array('0', '66', '66', '66'),
299a25f0a04SGreg Roach		'N' => array('0', '6', '6', '6'),
300a25f0a04SGreg Roach		'Ń' => array('0', '6', '6', '6'),
301a25f0a04SGreg Roach		'Ň' => array('0', '6', '6', '6'),
302a25f0a04SGreg Roach		'Ñ' => array('0', '6', '6', '6'),
303a25f0a04SGreg Roach		'NM' => array('0', '66', '66', '66'),
304a25f0a04SGreg Roach		'O' => array('1', '0', '', ''),
305a25f0a04SGreg Roach		'Ò' => array('1', '0', '', ''),
306a25f0a04SGreg Roach		'Ó' => array('1', '0', '', ''),
307a25f0a04SGreg Roach		'Ô' => array('1', '0', '', ''),
308a25f0a04SGreg Roach		'Õ' => array('1', '0', '', ''),
309a25f0a04SGreg Roach		'Ö' => array('1', '0', '', ''),
310a25f0a04SGreg Roach		'Ø' => array('1', '0', '', ''),
311a25f0a04SGreg Roach		'Ő' => array('1', '0', '', ''),
312a25f0a04SGreg Roach		'Œ' => array('1', '0', '', ''),
313a25f0a04SGreg Roach		'Ơ' => array('1', '0', '', ''),
314a25f0a04SGreg Roach		'Ọ' => array('1', '0', '', ''),
315a25f0a04SGreg Roach		'Ỏ' => array('1', '0', '', ''),
316a25f0a04SGreg Roach		'Ố' => array('1', '0', '', ''),
317a25f0a04SGreg Roach		'Ồ' => array('1', '0', '', ''),
318a25f0a04SGreg Roach		'Ổ' => array('1', '0', '', ''),
319a25f0a04SGreg Roach		'Ỗ' => array('1', '0', '', ''),
320a25f0a04SGreg Roach		'Ộ' => array('1', '0', '', ''),
321a25f0a04SGreg Roach		'Ớ' => array('1', '0', '', ''),
322a25f0a04SGreg Roach		'Ờ' => array('1', '0', '', ''),
323a25f0a04SGreg Roach		'Ở' => array('1', '0', '', ''),
324a25f0a04SGreg Roach		'Ỡ' => array('1', '0', '', ''),
325a25f0a04SGreg Roach		'Ợ' => array('1', '0', '', ''),
326a25f0a04SGreg Roach		'OE' => array('1', '0', '', ''),
327a25f0a04SGreg Roach		'OI' => array('1', '0', '1', ''),
328a25f0a04SGreg Roach		'OJ' => array('1', '0', '1', ''),
329a25f0a04SGreg Roach		'OU' => array('1', '0', '', ''),
330a25f0a04SGreg Roach		'OY' => array('1', '0', '1', ''),
331a25f0a04SGreg Roach		'P' => array('0', '7', '7', '7'),
332a25f0a04SGreg Roach		'PF' => array('0', '7', '7', '7'),
333a25f0a04SGreg Roach		'PH' => array('0', '7', '7', '7'),
334a25f0a04SGreg Roach		'Q' => array('0', '5', '5', '5'),
335a25f0a04SGreg Roach		'R' => array('0', '9', '9', '9'),
336a25f0a04SGreg Roach		'Ř' => array('0', '4', '4', '4'),
337a25f0a04SGreg Roach		'RS' => array('0', '4', '4', '4', '94', '94', '94'),
338a25f0a04SGreg Roach		'RZ' => array('0', '4', '4', '4', '94', '94', '94'),
339a25f0a04SGreg Roach		'S' => array('0', '4', '4', '4'),
340a25f0a04SGreg Roach		'Ś' => array('0', '4', '4', '4'),
341a25f0a04SGreg Roach		'Š' => array('0', '4', '4', '4'),
342a25f0a04SGreg Roach		'Ş' => array('0', '4', '4', '4'),
343a25f0a04SGreg Roach		'SC' => array('0', '2', '4', '4'),
344a25f0a04SGreg Roach		'ŠČ' => array('0', '2', '4', '4'),
345a25f0a04SGreg Roach		'SCH' => array('0', '4', '4', '4'),
346a25f0a04SGreg Roach		'SCHD' => array('0', '2', '43', '43'),
347a25f0a04SGreg Roach		'SCHT' => array('0', '2', '43', '43'),
348a25f0a04SGreg Roach		'SCHTCH' => array('0', '2', '4', '4'),
349a25f0a04SGreg Roach		'SCHTSCH' => array('0', '2', '4', '4'),
350a25f0a04SGreg Roach		'SCHTSH' => array('0', '2', '4', '4'),
351a25f0a04SGreg Roach		'SD' => array('0', '2', '43', '43'),
352a25f0a04SGreg Roach		'SH' => array('0', '4', '4', '4'),
353a25f0a04SGreg Roach		'SHCH' => array('0', '2', '4', '4'),
354a25f0a04SGreg Roach		'SHD' => array('0', '2', '43', '43'),
355a25f0a04SGreg Roach		'SHT' => array('0', '2', '43', '43'),
356a25f0a04SGreg Roach		'SHTCH' => array('0', '2', '4', '4'),
357a25f0a04SGreg Roach		'SHTSH' => array('0', '2', '4', '4'),
358a25f0a04SGreg Roach		'ß' => array('0', '', '4', '4'),
359a25f0a04SGreg Roach		'ST' => array('0', '2', '43', '43'),
360a25f0a04SGreg Roach		'STCH' => array('0', '2', '4', '4'),
361a25f0a04SGreg Roach		'STRS' => array('0', '2', '4', '4'),
362a25f0a04SGreg Roach		'STRZ' => array('0', '2', '4', '4'),
363a25f0a04SGreg Roach		'STSCH' => array('0', '2', '4', '4'),
364a25f0a04SGreg Roach		'STSH' => array('0', '2', '4', '4'),
365a25f0a04SGreg Roach		'SSZ' => array('0', '4', '4', '4'),
366a25f0a04SGreg Roach		'SZ' => array('0', '4', '4', '4'),
367a25f0a04SGreg Roach		'SZCS' => array('0', '2', '4', '4'),
368a25f0a04SGreg Roach		'SZCZ' => array('0', '2', '4', '4'),
369a25f0a04SGreg Roach		'SZD' => array('0', '2', '43', '43'),
370a25f0a04SGreg Roach		'SZT' => array('0', '2', '43', '43'),
371a25f0a04SGreg Roach		'T' => array('0', '3', '3', '3'),
372a25f0a04SGreg Roach		'Ť' => array('0', '3', '3', '3'),
373a25f0a04SGreg Roach		'Ţ' => array('0', '3', '3', '3', '4', '4', '4'),
374a25f0a04SGreg Roach		'TC' => array('0', '4', '4', '4'),
375a25f0a04SGreg Roach		'TCH' => array('0', '4', '4', '4'),
376a25f0a04SGreg Roach		'TH' => array('0', '3', '3', '3'),
377a25f0a04SGreg Roach		'TRS' => array('0', '4', '4', '4'),
378a25f0a04SGreg Roach		'TRZ' => array('0', '4', '4', '4'),
379a25f0a04SGreg Roach		'TS' => array('0', '4', '4', '4'),
380a25f0a04SGreg Roach		'TSCH' => array('0', '4', '4', '4'),
381a25f0a04SGreg Roach		'TSH' => array('0', '4', '4', '4'),
382a25f0a04SGreg Roach		'TSZ' => array('0', '4', '4', '4'),
383a25f0a04SGreg Roach		'TTCH' => array('0', '4', '4', '4'),
384a25f0a04SGreg Roach		'TTS' => array('0', '4', '4', '4'),
385a25f0a04SGreg Roach		'TTSCH' => array('0', '4', '4', '4'),
386a25f0a04SGreg Roach		'TTSZ' => array('0', '4', '4', '4'),
387a25f0a04SGreg Roach		'TTZ' => array('0', '4', '4', '4'),
388a25f0a04SGreg Roach		'TZ' => array('0', '4', '4', '4'),
389a25f0a04SGreg Roach		'TZS' => array('0', '4', '4', '4'),
390a25f0a04SGreg Roach		'U' => array('1', '0', '', ''),
391a25f0a04SGreg Roach		'Ù' => array('1', '0', '', ''),
392a25f0a04SGreg Roach		'Ú' => array('1', '0', '', ''),
393a25f0a04SGreg Roach		'Û' => array('1', '0', '', ''),
394a25f0a04SGreg Roach		'Ü' => array('1', '0', '', ''),
395a25f0a04SGreg Roach		'Ũ' => array('1', '0', '', ''),
396a25f0a04SGreg Roach		'Ū' => array('1', '0', '', ''),
397a25f0a04SGreg Roach		'Ů' => array('1', '0', '', ''),
398a25f0a04SGreg Roach		'Ű' => array('1', '0', '', ''),
399a25f0a04SGreg Roach		'Ų' => array('1', '0', '', ''),
400a25f0a04SGreg Roach		'Ư' => array('1', '0', '', ''),
401a25f0a04SGreg Roach		'Ụ' => array('1', '0', '', ''),
402a25f0a04SGreg Roach		'Ủ' => array('1', '0', '', ''),
403a25f0a04SGreg Roach		'Ứ' => array('1', '0', '', ''),
404a25f0a04SGreg Roach		'Ừ' => array('1', '0', '', ''),
405a25f0a04SGreg Roach		'Ử' => array('1', '0', '', ''),
406a25f0a04SGreg Roach		'Ữ' => array('1', '0', '', ''),
407a25f0a04SGreg Roach		'Ự' => array('1', '0', '', ''),
408a25f0a04SGreg Roach		'UE' => array('1', '0', '', ''),
409a25f0a04SGreg Roach		'UI' => array('1', '0', '1', ''),
410a25f0a04SGreg Roach		'UJ' => array('1', '0', '1', ''),
411a25f0a04SGreg Roach		'UY' => array('1', '0', '1', ''),
412a25f0a04SGreg Roach		'UW' => array('1', '0', '1', '', '0', '7', '7'),
413a25f0a04SGreg Roach		'V' => array('0', '7', '7', '7'),
414a25f0a04SGreg Roach		'W' => array('0', '7', '7', '7'),
415a25f0a04SGreg Roach		'X' => array('0', '5', '54', '54'),
416a25f0a04SGreg Roach		'Y' => array('1', '1', '', ''),
417a25f0a04SGreg Roach		'Ý' => array('1', '1', '', ''),
418a25f0a04SGreg Roach		'Ỳ' => array('1', '1', '', ''),
419a25f0a04SGreg Roach		'Ỵ' => array('1', '1', '', ''),
420a25f0a04SGreg Roach		'Ỷ' => array('1', '1', '', ''),
421a25f0a04SGreg Roach		'Ỹ' => array('1', '1', '', ''),
422a25f0a04SGreg Roach		'Z' => array('0', '4', '4', '4'),
423a25f0a04SGreg Roach		'Ź' => array('0', '4', '4', '4'),
424a25f0a04SGreg Roach		'Ż' => array('0', '4', '4', '4'),
425a25f0a04SGreg Roach		'Ž' => array('0', '4', '4', '4'),
426a25f0a04SGreg Roach		'ZD' => array('0', '2', '43', '43'),
427a25f0a04SGreg Roach		'ZDZ' => array('0', '2', '4', '4'),
428a25f0a04SGreg Roach		'ZDZH' => array('0', '2', '4', '4'),
429a25f0a04SGreg Roach		'ZH' => array('0', '4', '4', '4'),
430a25f0a04SGreg Roach		'ZHD' => array('0', '2', '43', '43'),
431a25f0a04SGreg Roach		'ZHDZH' => array('0', '2', '4', '4'),
432a25f0a04SGreg Roach		'ZS' => array('0', '4', '4', '4'),
433a25f0a04SGreg Roach		'ZSCH' => array('0', '4', '4', '4'),
434a25f0a04SGreg Roach		'ZSH' => array('0', '4', '4', '4'),
435a25f0a04SGreg Roach		'ZZS' => array('0', '4', '4', '4'),
436a25f0a04SGreg Roach		// Cyrillic alphabet
437a25f0a04SGreg Roach		'А' => array('1', '0', '', ''),
438a25f0a04SGreg Roach		'Б' => array('0', '7', '7', '7'),
439a25f0a04SGreg Roach		'В' => array('0', '7', '7', '7'),
440a25f0a04SGreg Roach		'Г' => array('0', '5', '5', '5'),
441a25f0a04SGreg Roach		'Д' => array('0', '3', '3', '3'),
442a25f0a04SGreg Roach		'ДЗ' => array('0', '4', '4', '4'),
443a25f0a04SGreg Roach		'Е' => array('1', '0', '', ''),
444a25f0a04SGreg Roach		'Ё' => array('1', '0', '', ''),
445a25f0a04SGreg Roach		'Ж' => array('0', '4', '4', '4'),
446a25f0a04SGreg Roach		'З' => array('0', '4', '4', '4'),
447a25f0a04SGreg Roach		'И' => array('1', '0', '', ''),
448a25f0a04SGreg Roach		'Й' => array('1', '1', '', '', '4', '4', '4'),
449a25f0a04SGreg Roach		'К' => array('0', '5', '5', '5'),
450a25f0a04SGreg Roach		'Л' => array('0', '8', '8', '8'),
451a25f0a04SGreg Roach		'М' => array('0', '6', '6', '6'),
452a25f0a04SGreg Roach		'Н' => array('0', '6', '6', '6'),
453a25f0a04SGreg Roach		'О' => array('1', '0', '', ''),
454a25f0a04SGreg Roach		'П' => array('0', '7', '7', '7'),
455a25f0a04SGreg Roach		'Р' => array('0', '9', '9', '9'),
456a25f0a04SGreg Roach		'РЖ' => array('0', '4', '4', '4'),
457a25f0a04SGreg Roach		'С' => array('0', '4', '4', '4'),
458a25f0a04SGreg Roach		'Т' => array('0', '3', '3', '3'),
459a25f0a04SGreg Roach		'У' => array('1', '0', '', ''),
460a25f0a04SGreg Roach		'Ф' => array('0', '7', '7', '7'),
461a25f0a04SGreg Roach		'Х' => array('0', '5', '5', '5'),
462a25f0a04SGreg Roach		'Ц' => array('0', '4', '4', '4'),
463a25f0a04SGreg Roach		'Ч' => array('0', '4', '4', '4'),
464a25f0a04SGreg Roach		'Ш' => array('0', '4', '4', '4'),
465a25f0a04SGreg Roach		'Щ' => array('0', '2', '4', '4'),
466a25f0a04SGreg Roach		'Ъ' => array('0', '', '', ''),
467a25f0a04SGreg Roach		'Ы' => array('0', '1', '', ''),
468a25f0a04SGreg Roach		'Ь' => array('0', '', '', ''),
469a25f0a04SGreg Roach		'Э' => array('1', '0', '', ''),
470a25f0a04SGreg Roach		'Ю' => array('0', '1', '', ''),
471a25f0a04SGreg Roach		'Я' => array('0', '1', '', ''),
472a25f0a04SGreg Roach		// Greek alphabet
473a25f0a04SGreg Roach		'Α' => array('1', '0', '', ''),
474a25f0a04SGreg Roach		'Ά' => array('1', '0', '', ''),
475a25f0a04SGreg Roach		'ΑΙ' => array('1', '0', '1', ''),
476a25f0a04SGreg Roach		'ΑΥ' => array('1', '0', '1', ''),
477a25f0a04SGreg Roach		'Β' => array('0', '7', '7', '7'),
478a25f0a04SGreg Roach		'Γ' => array('0', '5', '5', '5'),
479a25f0a04SGreg Roach		'Δ' => array('0', '3', '3', '3'),
480a25f0a04SGreg Roach		'Ε' => array('1', '0', '', ''),
481a25f0a04SGreg Roach		'Έ' => array('1', '0', '', ''),
482a25f0a04SGreg Roach		'ΕΙ' => array('1', '0', '1', ''),
483a25f0a04SGreg Roach		'ΕΥ' => array('1', '1', '1', ''),
484a25f0a04SGreg Roach		'Ζ' => array('0', '4', '4', '4'),
485a25f0a04SGreg Roach		'Η' => array('1', '0', '', ''),
486a25f0a04SGreg Roach		'Ή' => array('1', '0', '', ''),
487a25f0a04SGreg Roach		'Θ' => array('0', '3', '3', '3'),
488a25f0a04SGreg Roach		'Ι' => array('1', '0', '', ''),
489a25f0a04SGreg Roach		'Ί' => array('1', '0', '', ''),
490a25f0a04SGreg Roach		'Ϊ' => array('1', '0', '', ''),
491a25f0a04SGreg Roach		'ΐ' => array('1', '0', '', ''),
492a25f0a04SGreg Roach		'Κ' => array('0', '5', '5', '5'),
493a25f0a04SGreg Roach		'Λ' => array('0', '8', '8', '8'),
494a25f0a04SGreg Roach		'Μ' => array('0', '6', '6', '6'),
495a25f0a04SGreg Roach		'ΜΠ' => array('0', '7', '7', '7'),
496a25f0a04SGreg Roach		'Ν' => array('0', '6', '6', '6'),
497a25f0a04SGreg Roach		'ΝΤ' => array('0', '3', '3', '3'),
498a25f0a04SGreg Roach		'Ξ' => array('0', '5', '54', '54'),
499a25f0a04SGreg Roach		'Ο' => array('1', '0', '', ''),
500a25f0a04SGreg Roach		'Ό' => array('1', '0', '', ''),
501a25f0a04SGreg Roach		'ΟΙ' => array('1', '0', '1', ''),
502a25f0a04SGreg Roach		'ΟΥ' => array('1', '0', '1', ''),
503a25f0a04SGreg Roach		'Π' => array('0', '7', '7', '7'),
504a25f0a04SGreg Roach		'Ρ' => array('0', '9', '9', '9'),
505a25f0a04SGreg Roach		'Σ' => array('0', '4', '4', '4'),
506a25f0a04SGreg Roach		'ς' => array('0', '', '', '4'),
507a25f0a04SGreg Roach		'Τ' => array('0', '3', '3', '3'),
508a25f0a04SGreg Roach		'ΤΖ' => array('0', '4', '4', '4'),
509a25f0a04SGreg Roach		'ΤΣ' => array('0', '4', '4', '4'),
510a25f0a04SGreg Roach		'Υ' => array('1', '1', '', ''),
511a25f0a04SGreg Roach		'Ύ' => array('1', '1', '', ''),
512a25f0a04SGreg Roach		'Ϋ' => array('1', '1', '', ''),
513a25f0a04SGreg Roach		'ΰ' => array('1', '1', '', ''),
514a25f0a04SGreg Roach		'ΥΚ' => array('1', '5', '5', '5'),
515a25f0a04SGreg Roach		'ΥΥ' => array('1', '65', '65', '65'),
516a25f0a04SGreg Roach		'Φ' => array('0', '7', '7', '7'),
517a25f0a04SGreg Roach		'Χ' => array('0', '5', '5', '5'),
518a25f0a04SGreg Roach		'Ψ' => array('0', '7', '7', '7'),
519a25f0a04SGreg Roach		'Ω' => array('1', '0', '', ''),
520a25f0a04SGreg Roach		'Ώ' => array('1', '0', '', ''),
521a25f0a04SGreg Roach		// Hebrew alphabet
522a25f0a04SGreg Roach		'א' => array('1', '0', '', ''),
523a25f0a04SGreg Roach		'או' => array('1', '0', '7', ''),
524a25f0a04SGreg Roach		'אג' => array('1', '4', '4', '4', '5', '5', '5', '34', '34', '34'),
525a25f0a04SGreg Roach		'בב' => array('0', '7', '7', '7', '77', '77', '77'),
526a25f0a04SGreg Roach		'ב' => array('0', '7', '7', '7'),
527a25f0a04SGreg Roach		'גג' => array('0', '4', '4', '4', '5', '5', '5', '45', '45', '45', '55', '55', '55', '54', '54', '54'),
528a25f0a04SGreg Roach		'גד' => array('0', '43', '43', '43', '53', '53', '53'),
529a25f0a04SGreg Roach		'גה' => array('0', '45', '45', '45', '55', '55', '55'),
530a25f0a04SGreg Roach		'גז' => array('0', '44', '44', '44', '45', '45', '45'),
531a25f0a04SGreg Roach		'גח' => array('0', '45', '45', '45', '55', '55', '55'),
532a25f0a04SGreg Roach		'גכ' => array('0', '45', '45', '45', '55', '55', '55'),
533a25f0a04SGreg Roach		'גך' => array('0', '45', '45', '45', '55', '55', '55'),
534a25f0a04SGreg Roach		'גצ' => array('0', '44', '44', '44', '45', '45', '45'),
535a25f0a04SGreg Roach		'גץ' => array('0', '44', '44', '44', '45', '45', '45'),
536a25f0a04SGreg Roach		'גק' => array('0', '45', '45', '45', '54', '54', '54'),
537a25f0a04SGreg Roach		'גש' => array('0', '44', '44', '44', '54', '54', '54'),
538a25f0a04SGreg Roach		'גת' => array('0', '43', '43', '43', '53', '53', '53'),
539a25f0a04SGreg Roach		'ג' => array('0', '4', '4', '4', '5', '5', '5'),
540a25f0a04SGreg Roach		'דז' => array('0', '4', '4', '4'),
541a25f0a04SGreg Roach		'דד' => array('0', '3', '3', '3', '33', '33', '33'),
542a25f0a04SGreg Roach		'דט' => array('0', '33', '33', '33'),
543a25f0a04SGreg Roach		'דש' => array('0', '4', '4', '4'),
544a25f0a04SGreg Roach		'דצ' => array('0', '4', '4', '4'),
545a25f0a04SGreg Roach		'דץ' => array('0', '4', '4', '4'),
546a25f0a04SGreg Roach		'ד' => array('0', '3', '3', '3'),
547a25f0a04SGreg Roach		'הג' => array('0', '54', '54', '54', '55', '55', '55'),
548a25f0a04SGreg Roach		'הכ' => array('0', '55', '55', '55'),
549a25f0a04SGreg Roach		'הח' => array('0', '55', '55', '55'),
550a25f0a04SGreg Roach		'הק' => array('0', '55', '55', '55', '5', '5', '5'),
551a25f0a04SGreg Roach		'הה' => array('0', '5', '5', '', '55', '55', ''),
552a25f0a04SGreg Roach		'ה' => array('0', '5', '5', ''),
553a25f0a04SGreg Roach		'וי' => array('1', '', '', '', '7', '7', '7'),
554a25f0a04SGreg Roach		'ו' => array('1', '7', '7', '7', '7', '', ''),
555a25f0a04SGreg Roach		'וו' => array('1', '7', '7', '7', '7', '', ''),
556a25f0a04SGreg Roach		'וופ' => array('1', '7', '7', '7', '77', '77', '77'),
557a25f0a04SGreg Roach		'זש' => array('0', '4', '4', '4', '44', '44', '44'),
558a25f0a04SGreg Roach		'זדז' => array('0', '2', '4', '4'),
559a25f0a04SGreg Roach		'ז' => array('0', '4', '4', '4'),
560a25f0a04SGreg Roach		'זג' => array('0', '44', '44', '44', '45', '45', '45'),
561a25f0a04SGreg Roach		'זז' => array('0', '4', '4', '4', '44', '44', '44'),
562a25f0a04SGreg Roach		'זס' => array('0', '44', '44', '44'),
563a25f0a04SGreg Roach		'זצ' => array('0', '44', '44', '44'),
564a25f0a04SGreg Roach		'זץ' => array('0', '44', '44', '44'),
565a25f0a04SGreg Roach		'חג' => array('0', '54', '54', '54', '53', '53', '53'),
566a25f0a04SGreg Roach		'חח' => array('0', '5', '5', '5', '55', '55', '55'),
567a25f0a04SGreg Roach		'חק' => array('0', '55', '55', '55', '5', '5', '5'),
568a25f0a04SGreg Roach		'חכ' => array('0', '45', '45', '45', '55', '55', '55'),
569a25f0a04SGreg Roach		'חס' => array('0', '5', '54', '54'),
570a25f0a04SGreg Roach		'חש' => array('0', '5', '54', '54'),
571a25f0a04SGreg Roach		'ח' => array('0', '5', '5', '5'),
572a25f0a04SGreg Roach		'טש' => array('0', '4', '4', '4'),
573a25f0a04SGreg Roach		'טד' => array('0', '33', '33', '33'),
574a25f0a04SGreg Roach		'טי' => array('0', '3', '3', '3', '4', '4', '4', '3', '3', '34'),
575a25f0a04SGreg Roach		'טת' => array('0', '33', '33', '33'),
576a25f0a04SGreg Roach		'טט' => array('0', '3', '3', '3', '33', '33', '33'),
577a25f0a04SGreg Roach		'ט' => array('0', '3', '3', '3'),
578a25f0a04SGreg Roach		'י' => array('1', '1', '', ''),
579a25f0a04SGreg Roach		'יא' => array('1', '1', '', '', '1', '1', '1'),
580a25f0a04SGreg Roach		'כג' => array('0', '55', '55', '55', '54', '54', '54'),
581a25f0a04SGreg Roach		'כש' => array('0', '5', '54', '54'),
582a25f0a04SGreg Roach		'כס' => array('0', '5', '54', '54'),
583a25f0a04SGreg Roach		'ככ' => array('0', '5', '5', '5', '55', '55', '55'),
584a25f0a04SGreg Roach		'כך' => array('0', '5', '5', '5', '55', '55', '55'),
585a25f0a04SGreg Roach		'כ' => array('0', '5', '5', '5'),
586a25f0a04SGreg Roach		'כח' => array('0', '55', '55', '55', '5', '5', '5'),
587a25f0a04SGreg Roach		'ך' => array('0', '', '5', '5'),
588a25f0a04SGreg Roach		'ל' => array('0', '8', '8', '8'),
589a25f0a04SGreg Roach		'לל' => array('0', '88', '88', '88', '8', '8', '8'),
590a25f0a04SGreg Roach		'מנ' => array('0', '66', '66', '66'),
591a25f0a04SGreg Roach		'מן' => array('0', '66', '66', '66'),
592a25f0a04SGreg Roach		'ממ' => array('0', '6', '6', '6', '66', '66', '66'),
593a25f0a04SGreg Roach		'מם' => array('0', '6', '6', '6', '66', '66', '66'),
594a25f0a04SGreg Roach		'מ' => array('0', '6', '6', '6'),
595a25f0a04SGreg Roach		'ם' => array('0', '', '6', '6'),
596a25f0a04SGreg Roach		'נמ' => array('0', '66', '66', '66'),
597a25f0a04SGreg Roach		'נם' => array('0', '66', '66', '66'),
598a25f0a04SGreg Roach		'ננ' => array('0', '6', '6', '6', '66', '66', '66'),
599a25f0a04SGreg Roach		'נן' => array('0', '6', '6', '6', '66', '66', '66'),
600a25f0a04SGreg Roach		'נ' => array('0', '6', '6', '6'),
601a25f0a04SGreg Roach		'ן' => array('0', '', '6', '6'),
602a25f0a04SGreg Roach		'סתש' => array('0', '2', '4', '4'),
603a25f0a04SGreg Roach		'סתז' => array('0', '2', '4', '4'),
604a25f0a04SGreg Roach		'סטז' => array('0', '2', '4', '4'),
605a25f0a04SGreg Roach		'סטש' => array('0', '2', '4', '4'),
606a25f0a04SGreg Roach		'סצד' => array('0', '2', '4', '4'),
607a25f0a04SGreg Roach		'סט' => array('0', '2', '4', '4', '43', '43', '43'),
608a25f0a04SGreg Roach		'סת' => array('0', '2', '4', '4', '43', '43', '43'),
609a25f0a04SGreg Roach		'סג' => array('0', '44', '44', '44', '4', '4', '4'),
610a25f0a04SGreg Roach		'סס' => array('0', '4', '4', '4', '44', '44', '44'),
611a25f0a04SGreg Roach		'סצ' => array('0', '44', '44', '44'),
612a25f0a04SGreg Roach		'סץ' => array('0', '44', '44', '44'),
613a25f0a04SGreg Roach		'סז' => array('0', '44', '44', '44'),
614a25f0a04SGreg Roach		'סש' => array('0', '44', '44', '44'),
615a25f0a04SGreg Roach		'ס' => array('0', '4', '4', '4'),
616a25f0a04SGreg Roach		'ע' => array('1', '0', '', ''),
617a25f0a04SGreg Roach		'פב' => array('0', '7', '7', '7', '77', '77', '77'),
618a25f0a04SGreg Roach		'פוו' => array('0', '7', '7', '7', '77', '77', '77'),
619a25f0a04SGreg Roach		'פפ' => array('0', '7', '7', '7', '77', '77', '77'),
620a25f0a04SGreg Roach		'פף' => array('0', '7', '7', '7', '77', '77', '77'),
621a25f0a04SGreg Roach		'פ' => array('0', '7', '7', '7'),
622a25f0a04SGreg Roach		'ף' => array('0', '', '7', '7'),
623a25f0a04SGreg Roach		'צג' => array('0', '44', '44', '44', '45', '45', '45'),
624a25f0a04SGreg Roach		'צז' => array('0', '44', '44', '44'),
625a25f0a04SGreg Roach		'צס' => array('0', '44', '44', '44'),
626a25f0a04SGreg Roach		'צצ' => array('0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54', '45', '45', '45'),
627a25f0a04SGreg Roach		'צץ' => array('0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54'),
628a25f0a04SGreg Roach		'צש' => array('0', '44', '44', '44', '4', '4', '4', '5', '5', '5'),
629a25f0a04SGreg Roach		'צ' => array('0', '4', '4', '4', '5', '5', '5'),
630a25f0a04SGreg Roach		'ץ' => array('0', '', '4', '4'),
631a25f0a04SGreg Roach		'קה' => array('0', '55', '55', '5'),
632a25f0a04SGreg Roach		'קס' => array('0', '5', '54', '54'),
633a25f0a04SGreg Roach		'קש' => array('0', '5', '54', '54'),
634a25f0a04SGreg Roach		'קק' => array('0', '5', '5', '5', '55', '55', '55'),
635a25f0a04SGreg Roach		'קח' => array('0', '55', '55', '55'),
636a25f0a04SGreg Roach		'קכ' => array('0', '55', '55', '55'),
637a25f0a04SGreg Roach		'קך' => array('0', '55', '55', '55'),
638a25f0a04SGreg Roach		'קג' => array('0', '55', '55', '55', '54', '54', '54'),
639a25f0a04SGreg Roach		'ק' => array('0', '5', '5', '5'),
640a25f0a04SGreg Roach		'רר' => array('0', '99', '99', '99', '9', '9', '9'),
641a25f0a04SGreg Roach		'ר' => array('0', '9', '9', '9'),
642a25f0a04SGreg Roach		'שטז' => array('0', '2', '4', '4'),
643a25f0a04SGreg Roach		'שתש' => array('0', '2', '4', '4'),
644a25f0a04SGreg Roach		'שתז' => array('0', '2', '4', '4'),
645a25f0a04SGreg Roach		'שטש' => array('0', '2', '4', '4'),
646a25f0a04SGreg Roach		'שד' => array('0', '2', '43', '43'),
647a25f0a04SGreg Roach		'שז' => array('0', '44', '44', '44'),
648a25f0a04SGreg Roach		'שס' => array('0', '44', '44', '44'),
649a25f0a04SGreg Roach		'שת' => array('0', '2', '43', '43'),
650a25f0a04SGreg Roach		'שג' => array('0', '4', '4', '4', '44', '44', '44', '4', '43', '43'),
651a25f0a04SGreg Roach		'שט' => array('0', '2', '43', '43', '44', '44', '44'),
652a25f0a04SGreg Roach		'שצ' => array('0', '44', '44', '44', '45', '45', '45'),
653a25f0a04SGreg Roach		'שץ' => array('0', '44', '', '44', '45', '', '45'),
654a25f0a04SGreg Roach		'שש' => array('0', '4', '4', '4', '44', '44', '44'),
655a25f0a04SGreg Roach		'ש' => array('0', '4', '4', '4'),
656a25f0a04SGreg Roach		'תג' => array('0', '34', '34', '34'),
657a25f0a04SGreg Roach		'תז' => array('0', '34', '34', '34'),
658a25f0a04SGreg Roach		'תש' => array('0', '4', '4', '4'),
659a25f0a04SGreg Roach		'תת' => array('0', '3', '3', '3', '4', '4', '4', '33', '33', '33', '44', '44', '44', '34', '34', '34', '43', '43', '43'),
660a25f0a04SGreg Roach		'ת' => array('0', '3', '3', '3', '4', '4', '4'),
661a25f0a04SGreg Roach		// Arabic alphabet
662a25f0a04SGreg Roach		'ا' => array('1', '0', '', ''),
663a25f0a04SGreg Roach		'ب' => array('0', '7', '7', '7'),
664a25f0a04SGreg Roach		'ت' => array('0', '3', '3', '3'),
665a25f0a04SGreg Roach		'ث' => array('0', '3', '3', '3'),
666a25f0a04SGreg Roach		'ج' => array('0', '4', '4', '4'),
667a25f0a04SGreg Roach		'ح' => array('0', '5', '5', '5'),
668a25f0a04SGreg Roach		'خ' => array('0', '5', '5', '5'),
669a25f0a04SGreg Roach		'د' => array('0', '3', '3', '3'),
670a25f0a04SGreg Roach		'ذ' => array('0', '3', '3', '3'),
671a25f0a04SGreg Roach		'ر' => array('0', '9', '9', '9'),
672a25f0a04SGreg Roach		'ز' => array('0', '4', '4', '4'),
673a25f0a04SGreg Roach		'س' => array('0', '4', '4', '4'),
674a25f0a04SGreg Roach		'ش' => array('0', '4', '4', '4'),
675a25f0a04SGreg Roach		'ص' => array('0', '4', '4', '4'),
676a25f0a04SGreg Roach		'ض' => array('0', '3', '3', '3'),
677a25f0a04SGreg Roach		'ط' => array('0', '3', '3', '3'),
678a25f0a04SGreg Roach		'ظ' => array('0', '4', '4', '4'),
679a25f0a04SGreg Roach		'ع' => array('1', '0', '', ''),
680a25f0a04SGreg Roach		'غ' => array('0', '0', '', ''),
681a25f0a04SGreg Roach		'ف' => array('0', '7', '7', '7'),
682a25f0a04SGreg Roach		'ق' => array('0', '5', '5', '5'),
683a25f0a04SGreg Roach		'ك' => array('0', '5', '5', '5'),
684a25f0a04SGreg Roach		'ل' => array('0', '8', '8', '8'),
685a25f0a04SGreg Roach		'لا' => array('0', '8', '8', '8'),
686a25f0a04SGreg Roach		'م' => array('0', '6', '6', '6'),
687a25f0a04SGreg Roach		'ن' => array('0', '6', '6', '6'),
688a25f0a04SGreg Roach		'هن' => array('0', '66', '66', '66'),
689a25f0a04SGreg Roach		'ه' => array('0', '5', '5', ''),
690a25f0a04SGreg Roach		'و' => array('1', '', '', '', '7', '', ''),
691a25f0a04SGreg Roach		'ي' => array('0', '1', '', ''),
692a25f0a04SGreg Roach		'آ' => array('0', '1', '', ''),
693a25f0a04SGreg Roach		'ة' => array('0', '', '', '3'),
694a25f0a04SGreg Roach		'ی' => array('0', '1', '', ''),
695a25f0a04SGreg Roach		'ى' => array('1', '1', '', ''),
696a25f0a04SGreg Roach	);
697a25f0a04SGreg Roach
698a25f0a04SGreg Roach	/**
699a25f0a04SGreg Roach	 * @param string $name
700a25f0a04SGreg Roach	 *
701a25f0a04SGreg Roach	 * @return string[] List of possible DM codes for the word.
702a25f0a04SGreg Roach	 */
703a25f0a04SGreg Roach	private static function daitchMokotoffWord($name) {
704a25f0a04SGreg Roach		// Apply special transformation rules to the input string
705a25f0a04SGreg Roach		$name = I18N::strtoupper($name);
706a25f0a04SGreg Roach		foreach (self::$transformNameTable as $transformRule) {
707a25f0a04SGreg Roach			$name = str_replace($transformRule[0], $transformRule[1], $name);
708a25f0a04SGreg Roach		}
709a25f0a04SGreg Roach
710a25f0a04SGreg Roach		// Initialize
711a25f0a04SGreg Roach		$name_script = I18N::textScript($name);
712a25f0a04SGreg Roach		$noVowels = ($name_script == 'Hebr' || $name_script == 'Arab');
713a25f0a04SGreg Roach
714a25f0a04SGreg Roach		$lastPos         = strlen($name) - 1;
715a25f0a04SGreg Roach		$currPos         = 0;
716a25f0a04SGreg Roach		$state           = 1; // 1: start of input string, 2: before vowel, 3: other
717a25f0a04SGreg Roach		$result          = array(); // accumulate complete 6-digit D-M codes here
718a25f0a04SGreg Roach		$partialResult   = array(); // accumulate incomplete D-M codes here
719a25f0a04SGreg Roach		$partialResult[] = array('!'); // initialize 1st partial result  ('!' stops "duplicate sound" check)
720a25f0a04SGreg Roach
721a25f0a04SGreg Roach		// Loop through the input string.
722a25f0a04SGreg Roach		// Stop when the string is exhausted or when no more partial results remain
723a25f0a04SGreg Roach		while (count($partialResult) !== 0 && $currPos <= $lastPos) {
724a25f0a04SGreg Roach			// Find the DM coding table entry for the chunk at the current position
725a25f0a04SGreg Roach			$thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
726a25f0a04SGreg Roach			while ($thisEntry != '') {
727a25f0a04SGreg Roach				if (isset(self::$dmsounds[$thisEntry])) {
728a25f0a04SGreg Roach					break;
729a25f0a04SGreg Roach				}
730a25f0a04SGreg Roach				$thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk
731a25f0a04SGreg Roach			}
732a25f0a04SGreg Roach			if ($thisEntry === '') {
733a25f0a04SGreg Roach				$currPos++; // Not in table: advance pointer to next byte
734a25f0a04SGreg Roach				continue; // and try again
735a25f0a04SGreg Roach			}
736a25f0a04SGreg Roach
737a25f0a04SGreg Roach			$soundTableEntry = self::$dmsounds[$thisEntry];
738a25f0a04SGreg Roach			$workingResult   = $partialResult;
739a25f0a04SGreg Roach			$partialResult   = array();
740a25f0a04SGreg Roach			$currPos += strlen($thisEntry);
741a25f0a04SGreg Roach
742a25f0a04SGreg Roach			// Not at beginning of input string
743a25f0a04SGreg Roach			if ($state != 1) {
744a25f0a04SGreg Roach				if ($currPos <= $lastPos) {
745a25f0a04SGreg Roach					// Determine whether the next chunk is a vowel
746a25f0a04SGreg Roach					$nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
747a25f0a04SGreg Roach					while ($nextEntry != '') {
748a25f0a04SGreg Roach						if (isset(self::$dmsounds[$nextEntry])) {
749a25f0a04SGreg Roach							break;
750a25f0a04SGreg Roach						}
751a25f0a04SGreg Roach						$nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk
752a25f0a04SGreg Roach					}
753a25f0a04SGreg Roach				} else {
754a25f0a04SGreg Roach					$nextEntry = '';
755a25f0a04SGreg Roach				}
756a25f0a04SGreg Roach				if ($nextEntry != '' && self::$dmsounds[$nextEntry][0] != '0') {
757a25f0a04SGreg Roach					$state = 2;
758a25f0a04SGreg Roach				} else {
759a25f0a04SGreg Roach					// Next chunk is a vowel
760a25f0a04SGreg Roach					$state = 3;
761a25f0a04SGreg Roach				}
762a25f0a04SGreg Roach			}
763a25f0a04SGreg Roach
764a25f0a04SGreg Roach			while ($state < count($soundTableEntry)) {
765a25f0a04SGreg Roach				// empty means 'ignore this sound in this state'
766a25f0a04SGreg Roach				if ($soundTableEntry[$state] == '') {
767a25f0a04SGreg Roach					foreach ($workingResult as $workingEntry) {
768a25f0a04SGreg Roach						$tempEntry = $workingEntry;
769a25f0a04SGreg Roach						$tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles'
770a25f0a04SGreg Roach						$partialResult[] = $tempEntry;
771a25f0a04SGreg Roach					}
772a25f0a04SGreg Roach				} else {
773a25f0a04SGreg Roach					foreach ($workingResult as $workingEntry) {
774a25f0a04SGreg Roach						if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) {
775a25f0a04SGreg Roach							// Incoming sound isn't a duplicate of the previous sound
776a25f0a04SGreg Roach							$workingEntry[] = $soundTableEntry[$state];
777a25f0a04SGreg Roach						} else {
778a25f0a04SGreg Roach							// Incoming sound is a duplicate of the previous sound
779a25f0a04SGreg Roach							// For Hebrew and Arabic, we need to create a pair of D-M sound codes,
780a25f0a04SGreg Roach							// one of the pair with only a single occurrence of the duplicate sound,
781a25f0a04SGreg Roach							// the other with both occurrences
782a25f0a04SGreg Roach							if ($noVowels) {
783a25f0a04SGreg Roach								$workingEntry[] = $soundTableEntry[$state];
784a25f0a04SGreg Roach							}
785a25f0a04SGreg Roach						}
786a25f0a04SGreg Roach						if (count($workingEntry) < 7) {
787a25f0a04SGreg Roach							$partialResult[] = $workingEntry;
788a25f0a04SGreg Roach						} else {
789a25f0a04SGreg Roach							// This is the 6th code in the sequence
790a25f0a04SGreg Roach							// We're looking for 7 entries because the first is '!' and doesn't count
791a25f0a04SGreg Roach							$tempResult = str_replace('!', '', implode('', $workingEntry));
792a25f0a04SGreg Roach							// Only return codes from recognisable sounds
793a25f0a04SGreg Roach							if ($tempResult) {
794a25f0a04SGreg Roach								$result[] = substr($tempResult . '000000', 0, 6);
795a25f0a04SGreg Roach							}
796a25f0a04SGreg Roach						}
797a25f0a04SGreg Roach					}
798a25f0a04SGreg Roach				}
799a25f0a04SGreg Roach				$state = $state + 3; // Advance to next triplet while keeping the same basic state
800a25f0a04SGreg Roach			}
801a25f0a04SGreg Roach		}
802a25f0a04SGreg Roach
803a25f0a04SGreg Roach		// Zero-fill and copy all remaining partial results
804a25f0a04SGreg Roach		foreach ($partialResult as $workingEntry) {
805a25f0a04SGreg Roach			$tempResult = str_replace('!', '', implode('', $workingEntry));
806a25f0a04SGreg Roach			// Only return codes from recognisable sounds
807a25f0a04SGreg Roach			if ($tempResult) {
808a25f0a04SGreg Roach				$result[] = substr($tempResult . '000000', 0, 6);
809a25f0a04SGreg Roach			}
810a25f0a04SGreg Roach		}
811a25f0a04SGreg Roach
812a25f0a04SGreg Roach		return $result;
813a25f0a04SGreg Roach	}
814a25f0a04SGreg Roach}
815