xref: /webtrees/app/Soundex.php (revision a25f0a04682c4c39c1947220c90af4118c713952)
1*a25f0a04SGreg Roach<?php
2*a25f0a04SGreg Roachnamespace Webtrees;
3*a25f0a04SGreg Roach
4*a25f0a04SGreg Roach/**
5*a25f0a04SGreg Roach * webtrees: online genealogy
6*a25f0a04SGreg Roach * Copyright (C) 2015 webtrees development team
7*a25f0a04SGreg Roach * This program is free software: you can redistribute it and/or modify
8*a25f0a04SGreg Roach * it under the terms of the GNU General Public License as published by
9*a25f0a04SGreg Roach * the Free Software Foundation, either version 3 of the License, or
10*a25f0a04SGreg Roach * (at your option) any later version.
11*a25f0a04SGreg Roach * This program is distributed in the hope that it will be useful,
12*a25f0a04SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
13*a25f0a04SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14*a25f0a04SGreg Roach * GNU General Public License for more details.
15*a25f0a04SGreg Roach * You should have received a copy of the GNU General Public License
16*a25f0a04SGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>.
17*a25f0a04SGreg Roach */
18*a25f0a04SGreg Roach
19*a25f0a04SGreg Roach/**
20*a25f0a04SGreg Roach * Class Soundex Functions for phonetic matching of strings
21*a25f0a04SGreg Roach */
22*a25f0a04SGreg Roachclass Soundex {
23*a25f0a04SGreg Roach	/**
24*a25f0a04SGreg Roach	 * @return string[]
25*a25f0a04SGreg Roach	 */
26*a25f0a04SGreg Roach	public static function getAlgorithms() {
27*a25f0a04SGreg Roach		return array(
28*a25f0a04SGreg Roach			'std' => /* I18N: http://en.wikipedia.org/wiki/Soundex */ I18N::translate('Russell'),
29*a25f0a04SGreg Roach			'dm'  => /* I18N: http://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */ I18N::translate('Daitch-Mokotoff'),
30*a25f0a04SGreg Roach		);
31*a25f0a04SGreg Roach	}
32*a25f0a04SGreg Roach
33*a25f0a04SGreg Roach	/**
34*a25f0a04SGreg Roach	 * @param string $algorithm
35*a25f0a04SGreg Roach	 * @param string $text
36*a25f0a04SGreg Roach	 *
37*a25f0a04SGreg Roach	 * @return string
38*a25f0a04SGreg Roach	 */
39*a25f0a04SGreg Roach	public static function soundex($algorithm, $text) {
40*a25f0a04SGreg Roach		switch ($algorithm) {
41*a25f0a04SGreg Roach		case 'std':
42*a25f0a04SGreg Roach			return self::russell($text);
43*a25f0a04SGreg Roach		case 'dm':
44*a25f0a04SGreg Roach			return self::daitchMokotoff($text);
45*a25f0a04SGreg Roach		default:
46*a25f0a04SGreg Roach			throw new \InvalidArgumentException('Bad argument to Soundex::soundex()');
47*a25f0a04SGreg Roach		}
48*a25f0a04SGreg Roach	}
49*a25f0a04SGreg Roach
50*a25f0a04SGreg Roach	/**
51*a25f0a04SGreg Roach	 * Is there a match between two soundex codes?
52*a25f0a04SGreg Roach	 *
53*a25f0a04SGreg Roach	 * @param string $soundex1
54*a25f0a04SGreg Roach	 * @param string $soundex2
55*a25f0a04SGreg Roach	 *
56*a25f0a04SGreg Roach	 * @return boolean
57*a25f0a04SGreg Roach	 */
58*a25f0a04SGreg Roach	public static function compare($soundex1, $soundex2) {
59*a25f0a04SGreg Roach		if ($soundex1 && $soundex2) {
60*a25f0a04SGreg Roach			foreach (explode(':', $soundex1) as $code) {
61*a25f0a04SGreg Roach				if (strpos($soundex2, $code) !== false) {
62*a25f0a04SGreg Roach					return true;
63*a25f0a04SGreg Roach				}
64*a25f0a04SGreg Roach			}
65*a25f0a04SGreg Roach		}
66*a25f0a04SGreg Roach
67*a25f0a04SGreg Roach		return false;
68*a25f0a04SGreg Roach	}
69*a25f0a04SGreg Roach
70*a25f0a04SGreg Roach	/**
71*a25f0a04SGreg Roach	 * Generate Russell soundex codes for a given text.
72*a25f0a04SGreg Roach	 *
73*a25f0a04SGreg Roach	 * @param $text
74*a25f0a04SGreg Roach	 *
75*a25f0a04SGreg Roach	 * @return null|string
76*a25f0a04SGreg Roach	 */
77*a25f0a04SGreg Roach	public static function russell($text) {
78*a25f0a04SGreg Roach		$words         = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY);
79*a25f0a04SGreg Roach		$soundex_array = array();
80*a25f0a04SGreg Roach		foreach ($words as $word) {
81*a25f0a04SGreg Roach			$soundex = soundex($word);
82*a25f0a04SGreg Roach			// Only return codes from recognisable sounds
83*a25f0a04SGreg Roach			if ($soundex !== '0000') {
84*a25f0a04SGreg Roach				$soundex_array[] = $soundex;
85*a25f0a04SGreg Roach			}
86*a25f0a04SGreg Roach		}
87*a25f0a04SGreg Roach		// Combine words, e.g. “New York” as “Newyork”
88*a25f0a04SGreg Roach		if (count($words) > 1) {
89*a25f0a04SGreg Roach			$soundex_array[] = soundex(strtr($text, ' ', ''));
90*a25f0a04SGreg Roach		}
91*a25f0a04SGreg Roach		// A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters)
92*a25f0a04SGreg Roach		$soundex_array = array_slice(array_unique($soundex_array), 0, 51);
93*a25f0a04SGreg Roach
94*a25f0a04SGreg Roach		if ($soundex_array) {
95*a25f0a04SGreg Roach			return implode(':', $soundex_array);
96*a25f0a04SGreg Roach		} else {
97*a25f0a04SGreg Roach			return null;
98*a25f0a04SGreg Roach		}
99*a25f0a04SGreg Roach	}
100*a25f0a04SGreg Roach
101*a25f0a04SGreg Roach	/**
102*a25f0a04SGreg Roach	 * Generate Daitch–Mokotoff soundex codes for a given text.
103*a25f0a04SGreg Roach	 *
104*a25f0a04SGreg Roach	 * @param $text
105*a25f0a04SGreg Roach	 *
106*a25f0a04SGreg Roach	 * @return null|string
107*a25f0a04SGreg Roach	 */
108*a25f0a04SGreg Roach	public static function daitchMokotoff($text) {
109*a25f0a04SGreg Roach		$words         = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY);
110*a25f0a04SGreg Roach		$soundex_array = array();
111*a25f0a04SGreg Roach		foreach ($words as $word) {
112*a25f0a04SGreg Roach			$soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word));
113*a25f0a04SGreg Roach		}
114*a25f0a04SGreg Roach		// Combine words, e.g. “New York” as “Newyork”
115*a25f0a04SGreg Roach		if (count($words) > 1) {
116*a25f0a04SGreg Roach			$soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(strtr($text, ' ', '')));
117*a25f0a04SGreg Roach		}
118*a25f0a04SGreg Roach		// A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters)
119*a25f0a04SGreg Roach		$soundex_array = array_slice(array_unique($soundex_array), 0, 36);
120*a25f0a04SGreg Roach
121*a25f0a04SGreg Roach		if ($soundex_array) {
122*a25f0a04SGreg Roach			return implode(':', $soundex_array);
123*a25f0a04SGreg Roach		} else {
124*a25f0a04SGreg Roach			return null;
125*a25f0a04SGreg Roach		}
126*a25f0a04SGreg Roach	}
127*a25f0a04SGreg Roach
128*a25f0a04SGreg Roach	// Determine the Daitch–Mokotoff Soundex code for a word
129*a25f0a04SGreg Roach	// Original implementation by Gerry Kroll, and analysis by Meliza Amity
130*a25f0a04SGreg Roach
131*a25f0a04SGreg Roach	// Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!)
132*a25f0a04SGreg Roach	const MAXCHAR = 7;
133*a25f0a04SGreg Roach
134*a25f0a04SGreg Roach	/**
135*a25f0a04SGreg Roach	 * Name transformation arrays.
136*a25f0a04SGreg Roach	 * Used to transform the Name string to simplify the "sounds like" table.
137*a25f0a04SGreg Roach	 * This is especially useful in Hebrew.
138*a25f0a04SGreg Roach	 *
139*a25f0a04SGreg Roach	 * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text)
140*a25f0a04SGreg Roach	 * function call to achieve the desired transformations.
141*a25f0a04SGreg Roach	 *
142*a25f0a04SGreg Roach	 * Note about the use of "\x01":
143*a25f0a04SGreg Roach	 * This code, which can’t legitimately occur in the kind of text we're dealing with,
144*a25f0a04SGreg Roach	 * is used as a place-holder so that conditional string replacements can be done.
145*a25f0a04SGreg Roach	 *
146*a25f0a04SGreg Roach	 * @var string[][]
147*a25f0a04SGreg Roach	 */
148*a25f0a04SGreg Roach	private static $transformNameTable = array(
149*a25f0a04SGreg Roach		// Force Yiddish ligatures to be treated as separate letters
150*a25f0a04SGreg Roach		array('װ', 'וו'),
151*a25f0a04SGreg Roach		array('ײ', 'יי'),
152*a25f0a04SGreg Roach		array('ױ', 'וי'),
153*a25f0a04SGreg Roach		array('בו', 'בע'),
154*a25f0a04SGreg Roach		array('פו', 'פע'),
155*a25f0a04SGreg Roach		array('ומ', 'עמ'),
156*a25f0a04SGreg Roach		array('ום', 'עם'),
157*a25f0a04SGreg Roach		array('ונ', 'ענ'),
158*a25f0a04SGreg Roach		array('ון', 'ען'),
159*a25f0a04SGreg Roach		array('וו', 'ב'),
160*a25f0a04SGreg Roach		array("\x01", ''),
161*a25f0a04SGreg Roach		array('ייה$', "\x01ה"),
162*a25f0a04SGreg Roach		array('ייע$', "\x01ע"),
163*a25f0a04SGreg Roach		array('יי', 'ע'),
164*a25f0a04SGreg Roach		array("\x01", 'יי'),
165*a25f0a04SGreg Roach	);
166*a25f0a04SGreg Roach
167*a25f0a04SGreg Roach	/**
168*a25f0a04SGreg Roach	 * The DM sound coding table is organized this way:
169*a25f0a04SGreg Roach	 * key: a variable-length string that corresponds to the UTF-8 character sequence
170*a25f0a04SGreg Roach	 * represented by the table entry.  Currently, that string can be up to 7
171*a25f0a04SGreg Roach	 * bytes long.  This maximum length is defined by the value of global variable
172*a25f0a04SGreg Roach	 * $maxchar.
173*a25f0a04SGreg Roach	 *
174*a25f0a04SGreg Roach	 * value: an array as follows:
175*a25f0a04SGreg Roach	 * [0]:  zero if not a vowel
176*a25f0a04SGreg Roach	 * [1]:  sound value when this string is at the beginning of the word
177*a25f0a04SGreg Roach	 * [2]:  sound value when this string is followed by a vowel
178*a25f0a04SGreg Roach	 * [3]:  sound value for other cases
179*a25f0a04SGreg Roach	 * [1],[2],[3] can be repeated several times to create branches in the code
180*a25f0a04SGreg Roach	 * an empty sound value means "ignore in this state"
181*a25f0a04SGreg Roach	 *
182*a25f0a04SGreg Roach	 * @var string[][]
183*a25f0a04SGreg Roach	 */
184*a25f0a04SGreg Roach	private static $dmsounds = array(
185*a25f0a04SGreg Roach		'A' => array('1', '0', '', ''),
186*a25f0a04SGreg Roach		'À' => array('1', '0', '', ''),
187*a25f0a04SGreg Roach		'Á' => array('1', '0', '', ''),
188*a25f0a04SGreg Roach		'Â' => array('1', '0', '', ''),
189*a25f0a04SGreg Roach		'Ã' => array('1', '0', '', ''),
190*a25f0a04SGreg Roach		'Ä' => array('1', '0', '1', '', '0', '', ''),
191*a25f0a04SGreg Roach		'Å' => array('1', '0', '', ''),
192*a25f0a04SGreg Roach		'Ă' => array('1', '0', '', ''),
193*a25f0a04SGreg Roach		'Ą' => array('1', '', '', '', '', '', '6'),
194*a25f0a04SGreg Roach		'Ạ' => array('1', '0', '', ''),
195*a25f0a04SGreg Roach		'Ả' => array('1', '0', '', ''),
196*a25f0a04SGreg Roach		'Ấ' => array('1', '0', '', ''),
197*a25f0a04SGreg Roach		'Ầ' => array('1', '0', '', ''),
198*a25f0a04SGreg Roach		'Ẩ' => array('1', '0', '', ''),
199*a25f0a04SGreg Roach		'Ẫ' => array('1', '0', '', ''),
200*a25f0a04SGreg Roach		'Ậ' => array('1', '0', '', ''),
201*a25f0a04SGreg Roach		'Ắ' => array('1', '0', '', ''),
202*a25f0a04SGreg Roach		'Ằ' => array('1', '0', '', ''),
203*a25f0a04SGreg Roach		'Ẳ' => array('1', '0', '', ''),
204*a25f0a04SGreg Roach		'Ẵ' => array('1', '0', '', ''),
205*a25f0a04SGreg Roach		'Ặ' => array('1', '0', '', ''),
206*a25f0a04SGreg Roach		'AE' => array('1', '0', '1', ''),
207*a25f0a04SGreg Roach		'Æ' => array('1', '0', '1', ''),
208*a25f0a04SGreg Roach		'AI' => array('1', '0', '1', ''),
209*a25f0a04SGreg Roach		'AJ' => array('1', '0', '1', ''),
210*a25f0a04SGreg Roach		'AU' => array('1', '0', '7', ''),
211*a25f0a04SGreg Roach		'AV' => array('1', '0', '7', '', '7', '7', '7'),
212*a25f0a04SGreg Roach		'ÄU' => array('1', '0', '1', ''),
213*a25f0a04SGreg Roach		'AY' => array('1', '0', '1', ''),
214*a25f0a04SGreg Roach		'B' => array('0', '7', '7', '7'),
215*a25f0a04SGreg Roach		'C' => array('0', '5', '5', '5', '34', '4', '4'),
216*a25f0a04SGreg Roach		'Ć' => array('0', '4', '4', '4'),
217*a25f0a04SGreg Roach		'Č' => array('0', '4', '4', '4'),
218*a25f0a04SGreg Roach		'Ç' => array('0', '4', '4', '4'),
219*a25f0a04SGreg Roach		'CH' => array('0', '5', '5', '5', '34', '4', '4'),
220*a25f0a04SGreg Roach		'CHS' => array('0', '5', '54', '54'),
221*a25f0a04SGreg Roach		'CK' => array('0', '5', '5', '5', '45', '45', '45'),
222*a25f0a04SGreg Roach		'CCS' => array('0', '4', '4', '4'),
223*a25f0a04SGreg Roach		'CS' => array('0', '4', '4', '4'),
224*a25f0a04SGreg Roach		'CSZ' => array('0', '4', '4', '4'),
225*a25f0a04SGreg Roach		'CZ' => array('0', '4', '4', '4'),
226*a25f0a04SGreg Roach		'CZS' => array('0', '4', '4', '4'),
227*a25f0a04SGreg Roach		'D' => array('0', '3', '3', '3'),
228*a25f0a04SGreg Roach		'Ď' => array('0', '3', '3', '3'),
229*a25f0a04SGreg Roach		'Đ' => array('0', '3', '3', '3'),
230*a25f0a04SGreg Roach		'DRS' => array('0', '4', '4', '4'),
231*a25f0a04SGreg Roach		'DRZ' => array('0', '4', '4', '4'),
232*a25f0a04SGreg Roach		'DS' => array('0', '4', '4', '4'),
233*a25f0a04SGreg Roach		'DSH' => array('0', '4', '4', '4'),
234*a25f0a04SGreg Roach		'DSZ' => array('0', '4', '4', '4'),
235*a25f0a04SGreg Roach		'DT' => array('0', '3', '3', '3'),
236*a25f0a04SGreg Roach		'DDZ' => array('0', '4', '4', '4'),
237*a25f0a04SGreg Roach		'DDZS' => array('0', '4', '4', '4'),
238*a25f0a04SGreg Roach		'DZ' => array('0', '4', '4', '4'),
239*a25f0a04SGreg Roach		'DŹ' => array('0', '4', '4', '4'),
240*a25f0a04SGreg Roach		'DŻ' => array('0', '4', '4', '4'),
241*a25f0a04SGreg Roach		'DZH' => array('0', '4', '4', '4'),
242*a25f0a04SGreg Roach		'DZS' => array('0', '4', '4', '4'),
243*a25f0a04SGreg Roach		'E' => array('1', '0', '', ''),
244*a25f0a04SGreg Roach		'È' => array('1', '0', '', ''),
245*a25f0a04SGreg Roach		'É' => array('1', '0', '', ''),
246*a25f0a04SGreg Roach		'Ê' => array('1', '0', '', ''),
247*a25f0a04SGreg Roach		'Ë' => array('1', '0', '', ''),
248*a25f0a04SGreg Roach		'Ĕ' => array('1', '0', '', ''),
249*a25f0a04SGreg Roach		'Ė' => array('1', '0', '', ''),
250*a25f0a04SGreg Roach		'Ę' => array('1', '', '', '6', '', '', ''),
251*a25f0a04SGreg Roach		'Ẹ' => array('1', '0', '', ''),
252*a25f0a04SGreg Roach		'Ẻ' => array('1', '0', '', ''),
253*a25f0a04SGreg Roach		'Ẽ' => array('1', '0', '', ''),
254*a25f0a04SGreg Roach		'Ế' => array('1', '0', '', ''),
255*a25f0a04SGreg Roach		'Ề' => array('1', '0', '', ''),
256*a25f0a04SGreg Roach		'Ể' => array('1', '0', '', ''),
257*a25f0a04SGreg Roach		'Ễ' => array('1', '0', '', ''),
258*a25f0a04SGreg Roach		'Ệ' => array('1', '0', '', ''),
259*a25f0a04SGreg Roach		'EAU' => array('1', '0', '', ''),
260*a25f0a04SGreg Roach		'EI' => array('1', '0', '1', ''),
261*a25f0a04SGreg Roach		'EJ' => array('1', '0', '1', ''),
262*a25f0a04SGreg Roach		'EU' => array('1', '1', '1', ''),
263*a25f0a04SGreg Roach		'EY' => array('1', '0', '1', ''),
264*a25f0a04SGreg Roach		'F' => array('0', '7', '7', '7'),
265*a25f0a04SGreg Roach		'FB' => array('0', '7', '7', '7'),
266*a25f0a04SGreg Roach		'G' => array('0', '5', '5', '5', '34', '4', '4'),
267*a25f0a04SGreg Roach		'Ğ' => array('0', '', '', ''),
268*a25f0a04SGreg Roach		'GGY' => array('0', '5', '5', '5'),
269*a25f0a04SGreg Roach		'GY' => array('0', '5', '5', '5'),
270*a25f0a04SGreg Roach		'H' => array('0', '5', '5', '', '5', '5', '5'),
271*a25f0a04SGreg Roach		'I' => array('1', '0', '', ''),
272*a25f0a04SGreg Roach		'Ì' => array('1', '0', '', ''),
273*a25f0a04SGreg Roach		'Í' => array('1', '0', '', ''),
274*a25f0a04SGreg Roach		'Î' => array('1', '0', '', ''),
275*a25f0a04SGreg Roach		'Ï' => array('1', '0', '', ''),
276*a25f0a04SGreg Roach		'Ĩ' => array('1', '0', '', ''),
277*a25f0a04SGreg Roach		'Į' => array('1', '0', '', ''),
278*a25f0a04SGreg Roach		'İ' => array('1', '0', '', ''),
279*a25f0a04SGreg Roach		'Ỉ' => array('1', '0', '', ''),
280*a25f0a04SGreg Roach		'Ị' => array('1', '0', '', ''),
281*a25f0a04SGreg Roach		'IA' => array('1', '1', '', ''),
282*a25f0a04SGreg Roach		'IE' => array('1', '1', '', ''),
283*a25f0a04SGreg Roach		'IO' => array('1', '1', '', ''),
284*a25f0a04SGreg Roach		'IU' => array('1', '1', '', ''),
285*a25f0a04SGreg Roach		'J' => array('0', '1', '', '', '4', '4', '4', '5', '5', ''),
286*a25f0a04SGreg Roach		'K' => array('0', '5', '5', '5'),
287*a25f0a04SGreg Roach		'KH' => array('0', '5', '5', '5'),
288*a25f0a04SGreg Roach		'KS' => array('0', '5', '54', '54'),
289*a25f0a04SGreg Roach		'L' => array('0', '8', '8', '8'),
290*a25f0a04SGreg Roach		'Ľ' => array('0', '8', '8', '8'),
291*a25f0a04SGreg Roach		'Ĺ' => array('0', '8', '8', '8'),
292*a25f0a04SGreg Roach		'Ł' => array('0', '7', '7', '7', '8', '8', '8'),
293*a25f0a04SGreg Roach		'LL' => array('0', '8', '8', '8', '58', '8', '8', '1', '8', '8'),
294*a25f0a04SGreg Roach		'LLY' => array('0', '8', '8', '8', '1', '8', '8'),
295*a25f0a04SGreg Roach		'LY' => array('0', '8', '8', '8', '1', '8', '8'),
296*a25f0a04SGreg Roach		'M' => array('0', '6', '6', '6'),
297*a25f0a04SGreg Roach		'MĔ' => array('0', '66', '66', '66'),
298*a25f0a04SGreg Roach		'MN' => array('0', '66', '66', '66'),
299*a25f0a04SGreg Roach		'N' => array('0', '6', '6', '6'),
300*a25f0a04SGreg Roach		'Ń' => array('0', '6', '6', '6'),
301*a25f0a04SGreg Roach		'Ň' => array('0', '6', '6', '6'),
302*a25f0a04SGreg Roach		'Ñ' => array('0', '6', '6', '6'),
303*a25f0a04SGreg Roach		'NM' => array('0', '66', '66', '66'),
304*a25f0a04SGreg Roach		'O' => array('1', '0', '', ''),
305*a25f0a04SGreg Roach		'Ò' => array('1', '0', '', ''),
306*a25f0a04SGreg Roach		'Ó' => array('1', '0', '', ''),
307*a25f0a04SGreg Roach		'Ô' => array('1', '0', '', ''),
308*a25f0a04SGreg Roach		'Õ' => array('1', '0', '', ''),
309*a25f0a04SGreg Roach		'Ö' => array('1', '0', '', ''),
310*a25f0a04SGreg Roach		'Ø' => array('1', '0', '', ''),
311*a25f0a04SGreg Roach		'Ő' => array('1', '0', '', ''),
312*a25f0a04SGreg Roach		'Œ' => array('1', '0', '', ''),
313*a25f0a04SGreg Roach		'Ơ' => array('1', '0', '', ''),
314*a25f0a04SGreg Roach		'Ọ' => array('1', '0', '', ''),
315*a25f0a04SGreg Roach		'Ỏ' => array('1', '0', '', ''),
316*a25f0a04SGreg Roach		'Ố' => array('1', '0', '', ''),
317*a25f0a04SGreg Roach		'Ồ' => array('1', '0', '', ''),
318*a25f0a04SGreg Roach		'Ổ' => array('1', '0', '', ''),
319*a25f0a04SGreg Roach		'Ỗ' => array('1', '0', '', ''),
320*a25f0a04SGreg Roach		'Ộ' => array('1', '0', '', ''),
321*a25f0a04SGreg Roach		'Ớ' => array('1', '0', '', ''),
322*a25f0a04SGreg Roach		'Ờ' => array('1', '0', '', ''),
323*a25f0a04SGreg Roach		'Ở' => array('1', '0', '', ''),
324*a25f0a04SGreg Roach		'Ỡ' => array('1', '0', '', ''),
325*a25f0a04SGreg Roach		'Ợ' => array('1', '0', '', ''),
326*a25f0a04SGreg Roach		'OE' => array('1', '0', '', ''),
327*a25f0a04SGreg Roach		'OI' => array('1', '0', '1', ''),
328*a25f0a04SGreg Roach		'OJ' => array('1', '0', '1', ''),
329*a25f0a04SGreg Roach		'OU' => array('1', '0', '', ''),
330*a25f0a04SGreg Roach		'OY' => array('1', '0', '1', ''),
331*a25f0a04SGreg Roach		'P' => array('0', '7', '7', '7'),
332*a25f0a04SGreg Roach		'PF' => array('0', '7', '7', '7'),
333*a25f0a04SGreg Roach		'PH' => array('0', '7', '7', '7'),
334*a25f0a04SGreg Roach		'Q' => array('0', '5', '5', '5'),
335*a25f0a04SGreg Roach		'R' => array('0', '9', '9', '9'),
336*a25f0a04SGreg Roach		'Ř' => array('0', '4', '4', '4'),
337*a25f0a04SGreg Roach		'RS' => array('0', '4', '4', '4', '94', '94', '94'),
338*a25f0a04SGreg Roach		'RZ' => array('0', '4', '4', '4', '94', '94', '94'),
339*a25f0a04SGreg Roach		'S' => array('0', '4', '4', '4'),
340*a25f0a04SGreg Roach		'Ś' => array('0', '4', '4', '4'),
341*a25f0a04SGreg Roach		'Š' => array('0', '4', '4', '4'),
342*a25f0a04SGreg Roach		'Ş' => array('0', '4', '4', '4'),
343*a25f0a04SGreg Roach		'SC' => array('0', '2', '4', '4'),
344*a25f0a04SGreg Roach		'ŠČ' => array('0', '2', '4', '4'),
345*a25f0a04SGreg Roach		'SCH' => array('0', '4', '4', '4'),
346*a25f0a04SGreg Roach		'SCHD' => array('0', '2', '43', '43'),
347*a25f0a04SGreg Roach		'SCHT' => array('0', '2', '43', '43'),
348*a25f0a04SGreg Roach		'SCHTCH' => array('0', '2', '4', '4'),
349*a25f0a04SGreg Roach		'SCHTSCH' => array('0', '2', '4', '4'),
350*a25f0a04SGreg Roach		'SCHTSH' => array('0', '2', '4', '4'),
351*a25f0a04SGreg Roach		'SD' => array('0', '2', '43', '43'),
352*a25f0a04SGreg Roach		'SH' => array('0', '4', '4', '4'),
353*a25f0a04SGreg Roach		'SHCH' => array('0', '2', '4', '4'),
354*a25f0a04SGreg Roach		'SHD' => array('0', '2', '43', '43'),
355*a25f0a04SGreg Roach		'SHT' => array('0', '2', '43', '43'),
356*a25f0a04SGreg Roach		'SHTCH' => array('0', '2', '4', '4'),
357*a25f0a04SGreg Roach		'SHTSH' => array('0', '2', '4', '4'),
358*a25f0a04SGreg Roach		'ß' => array('0', '', '4', '4'),
359*a25f0a04SGreg Roach		'ST' => array('0', '2', '43', '43'),
360*a25f0a04SGreg Roach		'STCH' => array('0', '2', '4', '4'),
361*a25f0a04SGreg Roach		'STRS' => array('0', '2', '4', '4'),
362*a25f0a04SGreg Roach		'STRZ' => array('0', '2', '4', '4'),
363*a25f0a04SGreg Roach		'STSCH' => array('0', '2', '4', '4'),
364*a25f0a04SGreg Roach		'STSH' => array('0', '2', '4', '4'),
365*a25f0a04SGreg Roach		'SSZ' => array('0', '4', '4', '4'),
366*a25f0a04SGreg Roach		'SZ' => array('0', '4', '4', '4'),
367*a25f0a04SGreg Roach		'SZCS' => array('0', '2', '4', '4'),
368*a25f0a04SGreg Roach		'SZCZ' => array('0', '2', '4', '4'),
369*a25f0a04SGreg Roach		'SZD' => array('0', '2', '43', '43'),
370*a25f0a04SGreg Roach		'SZT' => array('0', '2', '43', '43'),
371*a25f0a04SGreg Roach		'T' => array('0', '3', '3', '3'),
372*a25f0a04SGreg Roach		'Ť' => array('0', '3', '3', '3'),
373*a25f0a04SGreg Roach		'Ţ' => array('0', '3', '3', '3', '4', '4', '4'),
374*a25f0a04SGreg Roach		'TC' => array('0', '4', '4', '4'),
375*a25f0a04SGreg Roach		'TCH' => array('0', '4', '4', '4'),
376*a25f0a04SGreg Roach		'TH' => array('0', '3', '3', '3'),
377*a25f0a04SGreg Roach		'TRS' => array('0', '4', '4', '4'),
378*a25f0a04SGreg Roach		'TRZ' => array('0', '4', '4', '4'),
379*a25f0a04SGreg Roach		'TS' => array('0', '4', '4', '4'),
380*a25f0a04SGreg Roach		'TSCH' => array('0', '4', '4', '4'),
381*a25f0a04SGreg Roach		'TSH' => array('0', '4', '4', '4'),
382*a25f0a04SGreg Roach		'TSZ' => array('0', '4', '4', '4'),
383*a25f0a04SGreg Roach		'TTCH' => array('0', '4', '4', '4'),
384*a25f0a04SGreg Roach		'TTS' => array('0', '4', '4', '4'),
385*a25f0a04SGreg Roach		'TTSCH' => array('0', '4', '4', '4'),
386*a25f0a04SGreg Roach		'TTSZ' => array('0', '4', '4', '4'),
387*a25f0a04SGreg Roach		'TTZ' => array('0', '4', '4', '4'),
388*a25f0a04SGreg Roach		'TZ' => array('0', '4', '4', '4'),
389*a25f0a04SGreg Roach		'TZS' => array('0', '4', '4', '4'),
390*a25f0a04SGreg Roach		'U' => array('1', '0', '', ''),
391*a25f0a04SGreg Roach		'Ù' => array('1', '0', '', ''),
392*a25f0a04SGreg Roach		'Ú' => array('1', '0', '', ''),
393*a25f0a04SGreg Roach		'Û' => array('1', '0', '', ''),
394*a25f0a04SGreg Roach		'Ü' => array('1', '0', '', ''),
395*a25f0a04SGreg Roach		'Ũ' => array('1', '0', '', ''),
396*a25f0a04SGreg Roach		'Ū' => array('1', '0', '', ''),
397*a25f0a04SGreg Roach		'Ů' => array('1', '0', '', ''),
398*a25f0a04SGreg Roach		'Ű' => array('1', '0', '', ''),
399*a25f0a04SGreg Roach		'Ų' => array('1', '0', '', ''),
400*a25f0a04SGreg Roach		'Ư' => array('1', '0', '', ''),
401*a25f0a04SGreg Roach		'Ụ' => array('1', '0', '', ''),
402*a25f0a04SGreg Roach		'Ủ' => array('1', '0', '', ''),
403*a25f0a04SGreg Roach		'Ứ' => array('1', '0', '', ''),
404*a25f0a04SGreg Roach		'Ừ' => array('1', '0', '', ''),
405*a25f0a04SGreg Roach		'Ử' => array('1', '0', '', ''),
406*a25f0a04SGreg Roach		'Ữ' => array('1', '0', '', ''),
407*a25f0a04SGreg Roach		'Ự' => array('1', '0', '', ''),
408*a25f0a04SGreg Roach		'UE' => array('1', '0', '', ''),
409*a25f0a04SGreg Roach		'UI' => array('1', '0', '1', ''),
410*a25f0a04SGreg Roach		'UJ' => array('1', '0', '1', ''),
411*a25f0a04SGreg Roach		'UY' => array('1', '0', '1', ''),
412*a25f0a04SGreg Roach		'UW' => array('1', '0', '1', '', '0', '7', '7'),
413*a25f0a04SGreg Roach		'V' => array('0', '7', '7', '7'),
414*a25f0a04SGreg Roach		'W' => array('0', '7', '7', '7'),
415*a25f0a04SGreg Roach		'X' => array('0', '5', '54', '54'),
416*a25f0a04SGreg Roach		'Y' => array('1', '1', '', ''),
417*a25f0a04SGreg Roach		'Ý' => array('1', '1', '', ''),
418*a25f0a04SGreg Roach		'Ỳ' => array('1', '1', '', ''),
419*a25f0a04SGreg Roach		'Ỵ' => array('1', '1', '', ''),
420*a25f0a04SGreg Roach		'Ỷ' => array('1', '1', '', ''),
421*a25f0a04SGreg Roach		'Ỹ' => array('1', '1', '', ''),
422*a25f0a04SGreg Roach		'Z' => array('0', '4', '4', '4'),
423*a25f0a04SGreg Roach		'Ź' => array('0', '4', '4', '4'),
424*a25f0a04SGreg Roach		'Ż' => array('0', '4', '4', '4'),
425*a25f0a04SGreg Roach		'Ž' => array('0', '4', '4', '4'),
426*a25f0a04SGreg Roach		'ZD' => array('0', '2', '43', '43'),
427*a25f0a04SGreg Roach		'ZDZ' => array('0', '2', '4', '4'),
428*a25f0a04SGreg Roach		'ZDZH' => array('0', '2', '4', '4'),
429*a25f0a04SGreg Roach		'ZH' => array('0', '4', '4', '4'),
430*a25f0a04SGreg Roach		'ZHD' => array('0', '2', '43', '43'),
431*a25f0a04SGreg Roach		'ZHDZH' => array('0', '2', '4', '4'),
432*a25f0a04SGreg Roach		'ZS' => array('0', '4', '4', '4'),
433*a25f0a04SGreg Roach		'ZSCH' => array('0', '4', '4', '4'),
434*a25f0a04SGreg Roach		'ZSH' => array('0', '4', '4', '4'),
435*a25f0a04SGreg Roach		'ZZS' => array('0', '4', '4', '4'),
436*a25f0a04SGreg Roach		// Cyrillic alphabet
437*a25f0a04SGreg Roach		'А' => array('1', '0', '', ''),
438*a25f0a04SGreg Roach		'Б' => array('0', '7', '7', '7'),
439*a25f0a04SGreg Roach		'В' => array('0', '7', '7', '7'),
440*a25f0a04SGreg Roach		'Г' => array('0', '5', '5', '5'),
441*a25f0a04SGreg Roach		'Д' => array('0', '3', '3', '3'),
442*a25f0a04SGreg Roach		'ДЗ' => array('0', '4', '4', '4'),
443*a25f0a04SGreg Roach		'Е' => array('1', '0', '', ''),
444*a25f0a04SGreg Roach		'Ё' => array('1', '0', '', ''),
445*a25f0a04SGreg Roach		'Ж' => array('0', '4', '4', '4'),
446*a25f0a04SGreg Roach		'З' => array('0', '4', '4', '4'),
447*a25f0a04SGreg Roach		'И' => array('1', '0', '', ''),
448*a25f0a04SGreg Roach		'Й' => array('1', '1', '', '', '4', '4', '4'),
449*a25f0a04SGreg Roach		'К' => array('0', '5', '5', '5'),
450*a25f0a04SGreg Roach		'Л' => array('0', '8', '8', '8'),
451*a25f0a04SGreg Roach		'М' => array('0', '6', '6', '6'),
452*a25f0a04SGreg Roach		'Н' => array('0', '6', '6', '6'),
453*a25f0a04SGreg Roach		'О' => array('1', '0', '', ''),
454*a25f0a04SGreg Roach		'П' => array('0', '7', '7', '7'),
455*a25f0a04SGreg Roach		'Р' => array('0', '9', '9', '9'),
456*a25f0a04SGreg Roach		'РЖ' => array('0', '4', '4', '4'),
457*a25f0a04SGreg Roach		'С' => array('0', '4', '4', '4'),
458*a25f0a04SGreg Roach		'Т' => array('0', '3', '3', '3'),
459*a25f0a04SGreg Roach		'У' => array('1', '0', '', ''),
460*a25f0a04SGreg Roach		'Ф' => array('0', '7', '7', '7'),
461*a25f0a04SGreg Roach		'Х' => array('0', '5', '5', '5'),
462*a25f0a04SGreg Roach		'Ц' => array('0', '4', '4', '4'),
463*a25f0a04SGreg Roach		'Ч' => array('0', '4', '4', '4'),
464*a25f0a04SGreg Roach		'Ш' => array('0', '4', '4', '4'),
465*a25f0a04SGreg Roach		'Щ' => array('0', '2', '4', '4'),
466*a25f0a04SGreg Roach		'Ъ' => array('0', '', '', ''),
467*a25f0a04SGreg Roach		'Ы' => array('0', '1', '', ''),
468*a25f0a04SGreg Roach		'Ь' => array('0', '', '', ''),
469*a25f0a04SGreg Roach		'Э' => array('1', '0', '', ''),
470*a25f0a04SGreg Roach		'Ю' => array('0', '1', '', ''),
471*a25f0a04SGreg Roach		'Я' => array('0', '1', '', ''),
472*a25f0a04SGreg Roach		// Greek alphabet
473*a25f0a04SGreg Roach		'Α' => array('1', '0', '', ''),
474*a25f0a04SGreg Roach		'Ά' => array('1', '0', '', ''),
475*a25f0a04SGreg Roach		'ΑΙ' => array('1', '0', '1', ''),
476*a25f0a04SGreg Roach		'ΑΥ' => array('1', '0', '1', ''),
477*a25f0a04SGreg Roach		'Β' => array('0', '7', '7', '7'),
478*a25f0a04SGreg Roach		'Γ' => array('0', '5', '5', '5'),
479*a25f0a04SGreg Roach		'Δ' => array('0', '3', '3', '3'),
480*a25f0a04SGreg Roach		'Ε' => array('1', '0', '', ''),
481*a25f0a04SGreg Roach		'Έ' => array('1', '0', '', ''),
482*a25f0a04SGreg Roach		'ΕΙ' => array('1', '0', '1', ''),
483*a25f0a04SGreg Roach		'ΕΥ' => array('1', '1', '1', ''),
484*a25f0a04SGreg Roach		'Ζ' => array('0', '4', '4', '4'),
485*a25f0a04SGreg Roach		'Η' => array('1', '0', '', ''),
486*a25f0a04SGreg Roach		'Ή' => array('1', '0', '', ''),
487*a25f0a04SGreg Roach		'Θ' => array('0', '3', '3', '3'),
488*a25f0a04SGreg Roach		'Ι' => array('1', '0', '', ''),
489*a25f0a04SGreg Roach		'Ί' => array('1', '0', '', ''),
490*a25f0a04SGreg Roach		'Ϊ' => array('1', '0', '', ''),
491*a25f0a04SGreg Roach		'ΐ' => array('1', '0', '', ''),
492*a25f0a04SGreg Roach		'Κ' => array('0', '5', '5', '5'),
493*a25f0a04SGreg Roach		'Λ' => array('0', '8', '8', '8'),
494*a25f0a04SGreg Roach		'Μ' => array('0', '6', '6', '6'),
495*a25f0a04SGreg Roach		'ΜΠ' => array('0', '7', '7', '7'),
496*a25f0a04SGreg Roach		'Ν' => array('0', '6', '6', '6'),
497*a25f0a04SGreg Roach		'ΝΤ' => array('0', '3', '3', '3'),
498*a25f0a04SGreg Roach		'Ξ' => array('0', '5', '54', '54'),
499*a25f0a04SGreg Roach		'Ο' => array('1', '0', '', ''),
500*a25f0a04SGreg Roach		'Ό' => array('1', '0', '', ''),
501*a25f0a04SGreg Roach		'ΟΙ' => array('1', '0', '1', ''),
502*a25f0a04SGreg Roach		'ΟΥ' => array('1', '0', '1', ''),
503*a25f0a04SGreg Roach		'Π' => array('0', '7', '7', '7'),
504*a25f0a04SGreg Roach		'Ρ' => array('0', '9', '9', '9'),
505*a25f0a04SGreg Roach		'Σ' => array('0', '4', '4', '4'),
506*a25f0a04SGreg Roach		'ς' => array('0', '', '', '4'),
507*a25f0a04SGreg Roach		'Τ' => array('0', '3', '3', '3'),
508*a25f0a04SGreg Roach		'ΤΖ' => array('0', '4', '4', '4'),
509*a25f0a04SGreg Roach		'ΤΣ' => array('0', '4', '4', '4'),
510*a25f0a04SGreg Roach		'Υ' => array('1', '1', '', ''),
511*a25f0a04SGreg Roach		'Ύ' => array('1', '1', '', ''),
512*a25f0a04SGreg Roach		'Ϋ' => array('1', '1', '', ''),
513*a25f0a04SGreg Roach		'ΰ' => array('1', '1', '', ''),
514*a25f0a04SGreg Roach		'ΥΚ' => array('1', '5', '5', '5'),
515*a25f0a04SGreg Roach		'ΥΥ' => array('1', '65', '65', '65'),
516*a25f0a04SGreg Roach		'Φ' => array('0', '7', '7', '7'),
517*a25f0a04SGreg Roach		'Χ' => array('0', '5', '5', '5'),
518*a25f0a04SGreg Roach		'Ψ' => array('0', '7', '7', '7'),
519*a25f0a04SGreg Roach		'Ω' => array('1', '0', '', ''),
520*a25f0a04SGreg Roach		'Ώ' => array('1', '0', '', ''),
521*a25f0a04SGreg Roach		// Hebrew alphabet
522*a25f0a04SGreg Roach		'א' => array('1', '0', '', ''),
523*a25f0a04SGreg Roach		'או' => array('1', '0', '7', ''),
524*a25f0a04SGreg Roach		'אג' => array('1', '4', '4', '4', '5', '5', '5', '34', '34', '34'),
525*a25f0a04SGreg Roach		'בב' => array('0', '7', '7', '7', '77', '77', '77'),
526*a25f0a04SGreg Roach		'ב' => array('0', '7', '7', '7'),
527*a25f0a04SGreg Roach		'גג' => array('0', '4', '4', '4', '5', '5', '5', '45', '45', '45', '55', '55', '55', '54', '54', '54'),
528*a25f0a04SGreg Roach		'גד' => array('0', '43', '43', '43', '53', '53', '53'),
529*a25f0a04SGreg Roach		'גה' => array('0', '45', '45', '45', '55', '55', '55'),
530*a25f0a04SGreg Roach		'גז' => array('0', '44', '44', '44', '45', '45', '45'),
531*a25f0a04SGreg Roach		'גח' => array('0', '45', '45', '45', '55', '55', '55'),
532*a25f0a04SGreg Roach		'גכ' => array('0', '45', '45', '45', '55', '55', '55'),
533*a25f0a04SGreg Roach		'גך' => array('0', '45', '45', '45', '55', '55', '55'),
534*a25f0a04SGreg Roach		'גצ' => array('0', '44', '44', '44', '45', '45', '45'),
535*a25f0a04SGreg Roach		'גץ' => array('0', '44', '44', '44', '45', '45', '45'),
536*a25f0a04SGreg Roach		'גק' => array('0', '45', '45', '45', '54', '54', '54'),
537*a25f0a04SGreg Roach		'גש' => array('0', '44', '44', '44', '54', '54', '54'),
538*a25f0a04SGreg Roach		'גת' => array('0', '43', '43', '43', '53', '53', '53'),
539*a25f0a04SGreg Roach		'ג' => array('0', '4', '4', '4', '5', '5', '5'),
540*a25f0a04SGreg Roach		'דז' => array('0', '4', '4', '4'),
541*a25f0a04SGreg Roach		'דד' => array('0', '3', '3', '3', '33', '33', '33'),
542*a25f0a04SGreg Roach		'דט' => array('0', '33', '33', '33'),
543*a25f0a04SGreg Roach		'דש' => array('0', '4', '4', '4'),
544*a25f0a04SGreg Roach		'דצ' => array('0', '4', '4', '4'),
545*a25f0a04SGreg Roach		'דץ' => array('0', '4', '4', '4'),
546*a25f0a04SGreg Roach		'ד' => array('0', '3', '3', '3'),
547*a25f0a04SGreg Roach		'הג' => array('0', '54', '54', '54', '55', '55', '55'),
548*a25f0a04SGreg Roach		'הכ' => array('0', '55', '55', '55'),
549*a25f0a04SGreg Roach		'הח' => array('0', '55', '55', '55'),
550*a25f0a04SGreg Roach		'הק' => array('0', '55', '55', '55', '5', '5', '5'),
551*a25f0a04SGreg Roach		'הה' => array('0', '5', '5', '', '55', '55', ''),
552*a25f0a04SGreg Roach		'ה' => array('0', '5', '5', ''),
553*a25f0a04SGreg Roach		'וי' => array('1', '', '', '', '7', '7', '7'),
554*a25f0a04SGreg Roach		'ו' => array('1', '7', '7', '7', '7', '', ''),
555*a25f0a04SGreg Roach		'וו' => array('1', '7', '7', '7', '7', '', ''),
556*a25f0a04SGreg Roach		'וופ' => array('1', '7', '7', '7', '77', '77', '77'),
557*a25f0a04SGreg Roach		'זש' => array('0', '4', '4', '4', '44', '44', '44'),
558*a25f0a04SGreg Roach		'זדז' => array('0', '2', '4', '4'),
559*a25f0a04SGreg Roach		'ז' => array('0', '4', '4', '4'),
560*a25f0a04SGreg Roach		'זג' => array('0', '44', '44', '44', '45', '45', '45'),
561*a25f0a04SGreg Roach		'זז' => array('0', '4', '4', '4', '44', '44', '44'),
562*a25f0a04SGreg Roach		'זס' => array('0', '44', '44', '44'),
563*a25f0a04SGreg Roach		'זצ' => array('0', '44', '44', '44'),
564*a25f0a04SGreg Roach		'זץ' => array('0', '44', '44', '44'),
565*a25f0a04SGreg Roach		'חג' => array('0', '54', '54', '54', '53', '53', '53'),
566*a25f0a04SGreg Roach		'חח' => array('0', '5', '5', '5', '55', '55', '55'),
567*a25f0a04SGreg Roach		'חק' => array('0', '55', '55', '55', '5', '5', '5'),
568*a25f0a04SGreg Roach		'חכ' => array('0', '45', '45', '45', '55', '55', '55'),
569*a25f0a04SGreg Roach		'חס' => array('0', '5', '54', '54'),
570*a25f0a04SGreg Roach		'חש' => array('0', '5', '54', '54'),
571*a25f0a04SGreg Roach		'ח' => array('0', '5', '5', '5'),
572*a25f0a04SGreg Roach		'טש' => array('0', '4', '4', '4'),
573*a25f0a04SGreg Roach		'טד' => array('0', '33', '33', '33'),
574*a25f0a04SGreg Roach		'טי' => array('0', '3', '3', '3', '4', '4', '4', '3', '3', '34'),
575*a25f0a04SGreg Roach		'טת' => array('0', '33', '33', '33'),
576*a25f0a04SGreg Roach		'טט' => array('0', '3', '3', '3', '33', '33', '33'),
577*a25f0a04SGreg Roach		'ט' => array('0', '3', '3', '3'),
578*a25f0a04SGreg Roach		'י' => array('1', '1', '', ''),
579*a25f0a04SGreg Roach		'יא' => array('1', '1', '', '', '1', '1', '1'),
580*a25f0a04SGreg Roach		'כג' => array('0', '55', '55', '55', '54', '54', '54'),
581*a25f0a04SGreg Roach		'כש' => array('0', '5', '54', '54'),
582*a25f0a04SGreg Roach		'כס' => array('0', '5', '54', '54'),
583*a25f0a04SGreg Roach		'ככ' => array('0', '5', '5', '5', '55', '55', '55'),
584*a25f0a04SGreg Roach		'כך' => array('0', '5', '5', '5', '55', '55', '55'),
585*a25f0a04SGreg Roach		'כ' => array('0', '5', '5', '5'),
586*a25f0a04SGreg Roach		'כח' => array('0', '55', '55', '55', '5', '5', '5'),
587*a25f0a04SGreg Roach		'ך' => array('0', '', '5', '5'),
588*a25f0a04SGreg Roach		'ל' => array('0', '8', '8', '8'),
589*a25f0a04SGreg Roach		'לל' => array('0', '88', '88', '88', '8', '8', '8'),
590*a25f0a04SGreg Roach		'מנ' => array('0', '66', '66', '66'),
591*a25f0a04SGreg Roach		'מן' => array('0', '66', '66', '66'),
592*a25f0a04SGreg Roach		'ממ' => array('0', '6', '6', '6', '66', '66', '66'),
593*a25f0a04SGreg Roach		'מם' => array('0', '6', '6', '6', '66', '66', '66'),
594*a25f0a04SGreg Roach		'מ' => array('0', '6', '6', '6'),
595*a25f0a04SGreg Roach		'ם' => array('0', '', '6', '6'),
596*a25f0a04SGreg Roach		'נמ' => array('0', '66', '66', '66'),
597*a25f0a04SGreg Roach		'נם' => array('0', '66', '66', '66'),
598*a25f0a04SGreg Roach		'ננ' => array('0', '6', '6', '6', '66', '66', '66'),
599*a25f0a04SGreg Roach		'נן' => array('0', '6', '6', '6', '66', '66', '66'),
600*a25f0a04SGreg Roach		'נ' => array('0', '6', '6', '6'),
601*a25f0a04SGreg Roach		'ן' => array('0', '', '6', '6'),
602*a25f0a04SGreg Roach		'סתש' => array('0', '2', '4', '4'),
603*a25f0a04SGreg Roach		'סתז' => array('0', '2', '4', '4'),
604*a25f0a04SGreg Roach		'סטז' => array('0', '2', '4', '4'),
605*a25f0a04SGreg Roach		'סטש' => array('0', '2', '4', '4'),
606*a25f0a04SGreg Roach		'סצד' => array('0', '2', '4', '4'),
607*a25f0a04SGreg Roach		'סט' => array('0', '2', '4', '4', '43', '43', '43'),
608*a25f0a04SGreg Roach		'סת' => array('0', '2', '4', '4', '43', '43', '43'),
609*a25f0a04SGreg Roach		'סג' => array('0', '44', '44', '44', '4', '4', '4'),
610*a25f0a04SGreg Roach		'סס' => array('0', '4', '4', '4', '44', '44', '44'),
611*a25f0a04SGreg Roach		'סצ' => array('0', '44', '44', '44'),
612*a25f0a04SGreg Roach		'סץ' => array('0', '44', '44', '44'),
613*a25f0a04SGreg Roach		'סז' => array('0', '44', '44', '44'),
614*a25f0a04SGreg Roach		'סש' => array('0', '44', '44', '44'),
615*a25f0a04SGreg Roach		'ס' => array('0', '4', '4', '4'),
616*a25f0a04SGreg Roach		'ע' => array('1', '0', '', ''),
617*a25f0a04SGreg Roach		'פב' => array('0', '7', '7', '7', '77', '77', '77'),
618*a25f0a04SGreg Roach		'פוו' => array('0', '7', '7', '7', '77', '77', '77'),
619*a25f0a04SGreg Roach		'פפ' => array('0', '7', '7', '7', '77', '77', '77'),
620*a25f0a04SGreg Roach		'פף' => array('0', '7', '7', '7', '77', '77', '77'),
621*a25f0a04SGreg Roach		'פ' => array('0', '7', '7', '7'),
622*a25f0a04SGreg Roach		'ף' => array('0', '', '7', '7'),
623*a25f0a04SGreg Roach		'צג' => array('0', '44', '44', '44', '45', '45', '45'),
624*a25f0a04SGreg Roach		'צז' => array('0', '44', '44', '44'),
625*a25f0a04SGreg Roach		'צס' => array('0', '44', '44', '44'),
626*a25f0a04SGreg Roach		'צצ' => array('0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54', '45', '45', '45'),
627*a25f0a04SGreg Roach		'צץ' => array('0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54'),
628*a25f0a04SGreg Roach		'צש' => array('0', '44', '44', '44', '4', '4', '4', '5', '5', '5'),
629*a25f0a04SGreg Roach		'צ' => array('0', '4', '4', '4', '5', '5', '5'),
630*a25f0a04SGreg Roach		'ץ' => array('0', '', '4', '4'),
631*a25f0a04SGreg Roach		'קה' => array('0', '55', '55', '5'),
632*a25f0a04SGreg Roach		'קס' => array('0', '5', '54', '54'),
633*a25f0a04SGreg Roach		'קש' => array('0', '5', '54', '54'),
634*a25f0a04SGreg Roach		'קק' => array('0', '5', '5', '5', '55', '55', '55'),
635*a25f0a04SGreg Roach		'קח' => array('0', '55', '55', '55'),
636*a25f0a04SGreg Roach		'קכ' => array('0', '55', '55', '55'),
637*a25f0a04SGreg Roach		'קך' => array('0', '55', '55', '55'),
638*a25f0a04SGreg Roach		'קג' => array('0', '55', '55', '55', '54', '54', '54'),
639*a25f0a04SGreg Roach		'ק' => array('0', '5', '5', '5'),
640*a25f0a04SGreg Roach		'רר' => array('0', '99', '99', '99', '9', '9', '9'),
641*a25f0a04SGreg Roach		'ר' => array('0', '9', '9', '9'),
642*a25f0a04SGreg Roach		'שטז' => array('0', '2', '4', '4'),
643*a25f0a04SGreg Roach		'שתש' => array('0', '2', '4', '4'),
644*a25f0a04SGreg Roach		'שתז' => array('0', '2', '4', '4'),
645*a25f0a04SGreg Roach		'שטש' => array('0', '2', '4', '4'),
646*a25f0a04SGreg Roach		'שד' => array('0', '2', '43', '43'),
647*a25f0a04SGreg Roach		'שז' => array('0', '44', '44', '44'),
648*a25f0a04SGreg Roach		'שס' => array('0', '44', '44', '44'),
649*a25f0a04SGreg Roach		'שת' => array('0', '2', '43', '43'),
650*a25f0a04SGreg Roach		'שג' => array('0', '4', '4', '4', '44', '44', '44', '4', '43', '43'),
651*a25f0a04SGreg Roach		'שט' => array('0', '2', '43', '43', '44', '44', '44'),
652*a25f0a04SGreg Roach		'שצ' => array('0', '44', '44', '44', '45', '45', '45'),
653*a25f0a04SGreg Roach		'שץ' => array('0', '44', '', '44', '45', '', '45'),
654*a25f0a04SGreg Roach		'שש' => array('0', '4', '4', '4', '44', '44', '44'),
655*a25f0a04SGreg Roach		'ש' => array('0', '4', '4', '4'),
656*a25f0a04SGreg Roach		'תג' => array('0', '34', '34', '34'),
657*a25f0a04SGreg Roach		'תז' => array('0', '34', '34', '34'),
658*a25f0a04SGreg Roach		'תש' => array('0', '4', '4', '4'),
659*a25f0a04SGreg Roach		'תת' => array('0', '3', '3', '3', '4', '4', '4', '33', '33', '33', '44', '44', '44', '34', '34', '34', '43', '43', '43'),
660*a25f0a04SGreg Roach		'ת' => array('0', '3', '3', '3', '4', '4', '4'),
661*a25f0a04SGreg Roach		// Arabic alphabet
662*a25f0a04SGreg Roach		'ا' => array('1', '0', '', ''),
663*a25f0a04SGreg Roach		'ب' => array('0', '7', '7', '7'),
664*a25f0a04SGreg Roach		'ت' => array('0', '3', '3', '3'),
665*a25f0a04SGreg Roach		'ث' => array('0', '3', '3', '3'),
666*a25f0a04SGreg Roach		'ج' => array('0', '4', '4', '4'),
667*a25f0a04SGreg Roach		'ح' => array('0', '5', '5', '5'),
668*a25f0a04SGreg Roach		'خ' => array('0', '5', '5', '5'),
669*a25f0a04SGreg Roach		'د' => array('0', '3', '3', '3'),
670*a25f0a04SGreg Roach		'ذ' => array('0', '3', '3', '3'),
671*a25f0a04SGreg Roach		'ر' => array('0', '9', '9', '9'),
672*a25f0a04SGreg Roach		'ز' => array('0', '4', '4', '4'),
673*a25f0a04SGreg Roach		'س' => array('0', '4', '4', '4'),
674*a25f0a04SGreg Roach		'ش' => array('0', '4', '4', '4'),
675*a25f0a04SGreg Roach		'ص' => array('0', '4', '4', '4'),
676*a25f0a04SGreg Roach		'ض' => array('0', '3', '3', '3'),
677*a25f0a04SGreg Roach		'ط' => array('0', '3', '3', '3'),
678*a25f0a04SGreg Roach		'ظ' => array('0', '4', '4', '4'),
679*a25f0a04SGreg Roach		'ع' => array('1', '0', '', ''),
680*a25f0a04SGreg Roach		'غ' => array('0', '0', '', ''),
681*a25f0a04SGreg Roach		'ف' => array('0', '7', '7', '7'),
682*a25f0a04SGreg Roach		'ق' => array('0', '5', '5', '5'),
683*a25f0a04SGreg Roach		'ك' => array('0', '5', '5', '5'),
684*a25f0a04SGreg Roach		'ل' => array('0', '8', '8', '8'),
685*a25f0a04SGreg Roach		'لا' => array('0', '8', '8', '8'),
686*a25f0a04SGreg Roach		'م' => array('0', '6', '6', '6'),
687*a25f0a04SGreg Roach		'ن' => array('0', '6', '6', '6'),
688*a25f0a04SGreg Roach		'هن' => array('0', '66', '66', '66'),
689*a25f0a04SGreg Roach		'ه' => array('0', '5', '5', ''),
690*a25f0a04SGreg Roach		'و' => array('1', '', '', '', '7', '', ''),
691*a25f0a04SGreg Roach		'ي' => array('0', '1', '', ''),
692*a25f0a04SGreg Roach		'آ' => array('0', '1', '', ''),
693*a25f0a04SGreg Roach		'ة' => array('0', '', '', '3'),
694*a25f0a04SGreg Roach		'ی' => array('0', '1', '', ''),
695*a25f0a04SGreg Roach		'ى' => array('1', '1', '', ''),
696*a25f0a04SGreg Roach	);
697*a25f0a04SGreg Roach
698*a25f0a04SGreg Roach	/**
699*a25f0a04SGreg Roach	 * @param string $name
700*a25f0a04SGreg Roach	 *
701*a25f0a04SGreg Roach	 * @return string[] List of possible DM codes for the word.
702*a25f0a04SGreg Roach	 */
703*a25f0a04SGreg Roach	private static function daitchMokotoffWord($name) {
704*a25f0a04SGreg Roach		// Apply special transformation rules to the input string
705*a25f0a04SGreg Roach		$name = I18N::strtoupper($name);
706*a25f0a04SGreg Roach		foreach (self::$transformNameTable as $transformRule) {
707*a25f0a04SGreg Roach			$name = str_replace($transformRule[0], $transformRule[1], $name);
708*a25f0a04SGreg Roach		}
709*a25f0a04SGreg Roach
710*a25f0a04SGreg Roach		// Initialize
711*a25f0a04SGreg Roach		$name_script = I18N::textScript($name);
712*a25f0a04SGreg Roach		$noVowels = ($name_script == 'Hebr' || $name_script == 'Arab');
713*a25f0a04SGreg Roach
714*a25f0a04SGreg Roach		$lastPos         = strlen($name) - 1;
715*a25f0a04SGreg Roach		$currPos         = 0;
716*a25f0a04SGreg Roach		$state           = 1; // 1: start of input string, 2: before vowel, 3: other
717*a25f0a04SGreg Roach		$result          = array(); // accumulate complete 6-digit D-M codes here
718*a25f0a04SGreg Roach		$partialResult   = array(); // accumulate incomplete D-M codes here
719*a25f0a04SGreg Roach		$partialResult[] = array('!'); // initialize 1st partial result  ('!' stops "duplicate sound" check)
720*a25f0a04SGreg Roach
721*a25f0a04SGreg Roach		// Loop through the input string.
722*a25f0a04SGreg Roach		// Stop when the string is exhausted or when no more partial results remain
723*a25f0a04SGreg Roach		while (count($partialResult) !== 0 && $currPos <= $lastPos) {
724*a25f0a04SGreg Roach			// Find the DM coding table entry for the chunk at the current position
725*a25f0a04SGreg Roach			$thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
726*a25f0a04SGreg Roach			while ($thisEntry != '') {
727*a25f0a04SGreg Roach				if (isset(self::$dmsounds[$thisEntry])) {
728*a25f0a04SGreg Roach					break;
729*a25f0a04SGreg Roach				}
730*a25f0a04SGreg Roach				$thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk
731*a25f0a04SGreg Roach			}
732*a25f0a04SGreg Roach			if ($thisEntry === '') {
733*a25f0a04SGreg Roach				$currPos++; // Not in table: advance pointer to next byte
734*a25f0a04SGreg Roach				continue; // and try again
735*a25f0a04SGreg Roach			}
736*a25f0a04SGreg Roach
737*a25f0a04SGreg Roach			$soundTableEntry = self::$dmsounds[$thisEntry];
738*a25f0a04SGreg Roach			$workingResult   = $partialResult;
739*a25f0a04SGreg Roach			$partialResult   = array();
740*a25f0a04SGreg Roach			$currPos += strlen($thisEntry);
741*a25f0a04SGreg Roach
742*a25f0a04SGreg Roach			// Not at beginning of input string
743*a25f0a04SGreg Roach			if ($state != 1) {
744*a25f0a04SGreg Roach				if ($currPos <= $lastPos) {
745*a25f0a04SGreg Roach					// Determine whether the next chunk is a vowel
746*a25f0a04SGreg Roach					$nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
747*a25f0a04SGreg Roach					while ($nextEntry != '') {
748*a25f0a04SGreg Roach						if (isset(self::$dmsounds[$nextEntry])) {
749*a25f0a04SGreg Roach							break;
750*a25f0a04SGreg Roach						}
751*a25f0a04SGreg Roach						$nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk
752*a25f0a04SGreg Roach					}
753*a25f0a04SGreg Roach				} else {
754*a25f0a04SGreg Roach					$nextEntry = '';
755*a25f0a04SGreg Roach				}
756*a25f0a04SGreg Roach				if ($nextEntry != '' && self::$dmsounds[$nextEntry][0] != '0') {
757*a25f0a04SGreg Roach					$state = 2;
758*a25f0a04SGreg Roach				} else {
759*a25f0a04SGreg Roach					// Next chunk is a vowel
760*a25f0a04SGreg Roach					$state = 3;
761*a25f0a04SGreg Roach				}
762*a25f0a04SGreg Roach			}
763*a25f0a04SGreg Roach
764*a25f0a04SGreg Roach			while ($state < count($soundTableEntry)) {
765*a25f0a04SGreg Roach				// empty means 'ignore this sound in this state'
766*a25f0a04SGreg Roach				if ($soundTableEntry[$state] == '') {
767*a25f0a04SGreg Roach					foreach ($workingResult as $workingEntry) {
768*a25f0a04SGreg Roach						$tempEntry = $workingEntry;
769*a25f0a04SGreg Roach						$tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles'
770*a25f0a04SGreg Roach						$partialResult[] = $tempEntry;
771*a25f0a04SGreg Roach					}
772*a25f0a04SGreg Roach				} else {
773*a25f0a04SGreg Roach					foreach ($workingResult as $workingEntry) {
774*a25f0a04SGreg Roach						if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) {
775*a25f0a04SGreg Roach							// Incoming sound isn't a duplicate of the previous sound
776*a25f0a04SGreg Roach							$workingEntry[] = $soundTableEntry[$state];
777*a25f0a04SGreg Roach						} else {
778*a25f0a04SGreg Roach							// Incoming sound is a duplicate of the previous sound
779*a25f0a04SGreg Roach							// For Hebrew and Arabic, we need to create a pair of D-M sound codes,
780*a25f0a04SGreg Roach							// one of the pair with only a single occurrence of the duplicate sound,
781*a25f0a04SGreg Roach							// the other with both occurrences
782*a25f0a04SGreg Roach							if ($noVowels) {
783*a25f0a04SGreg Roach								$workingEntry[] = $soundTableEntry[$state];
784*a25f0a04SGreg Roach							}
785*a25f0a04SGreg Roach						}
786*a25f0a04SGreg Roach						if (count($workingEntry) < 7) {
787*a25f0a04SGreg Roach							$partialResult[] = $workingEntry;
788*a25f0a04SGreg Roach						} else {
789*a25f0a04SGreg Roach							// This is the 6th code in the sequence
790*a25f0a04SGreg Roach							// We're looking for 7 entries because the first is '!' and doesn't count
791*a25f0a04SGreg Roach							$tempResult = str_replace('!', '', implode('', $workingEntry));
792*a25f0a04SGreg Roach							// Only return codes from recognisable sounds
793*a25f0a04SGreg Roach							if ($tempResult) {
794*a25f0a04SGreg Roach								$result[] = substr($tempResult . '000000', 0, 6);
795*a25f0a04SGreg Roach							}
796*a25f0a04SGreg Roach						}
797*a25f0a04SGreg Roach					}
798*a25f0a04SGreg Roach				}
799*a25f0a04SGreg Roach				$state = $state + 3; // Advance to next triplet while keeping the same basic state
800*a25f0a04SGreg Roach			}
801*a25f0a04SGreg Roach		}
802*a25f0a04SGreg Roach
803*a25f0a04SGreg Roach		// Zero-fill and copy all remaining partial results
804*a25f0a04SGreg Roach		foreach ($partialResult as $workingEntry) {
805*a25f0a04SGreg Roach			$tempResult = str_replace('!', '', implode('', $workingEntry));
806*a25f0a04SGreg Roach			// Only return codes from recognisable sounds
807*a25f0a04SGreg Roach			if ($tempResult) {
808*a25f0a04SGreg Roach				$result[] = substr($tempResult . '000000', 0, 6);
809*a25f0a04SGreg Roach			}
810*a25f0a04SGreg Roach		}
811*a25f0a04SGreg Roach
812*a25f0a04SGreg Roach		return $result;
813*a25f0a04SGreg Roach	}
814*a25f0a04SGreg Roach}
815