1a25f0a04SGreg Roach<?php 2a25f0a04SGreg Roach/** 3a25f0a04SGreg Roach * webtrees: online genealogy 4369c0ce6SGreg Roach * Copyright (C) 2016 webtrees development team 5a25f0a04SGreg Roach * This program is free software: you can redistribute it and/or modify 6a25f0a04SGreg Roach * it under the terms of the GNU General Public License as published by 7a25f0a04SGreg Roach * the Free Software Foundation, either version 3 of the License, or 8a25f0a04SGreg Roach * (at your option) any later version. 9a25f0a04SGreg Roach * This program is distributed in the hope that it will be useful, 10a25f0a04SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 11a25f0a04SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12a25f0a04SGreg Roach * GNU General Public License for more details. 13a25f0a04SGreg Roach * You should have received a copy of the GNU General Public License 14a25f0a04SGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>. 15a25f0a04SGreg Roach */ 1676692c8bSGreg Roachnamespace Fisharebest\Webtrees; 17a25f0a04SGreg Roach 18a25f0a04SGreg Roach/** 1976692c8bSGreg Roach * Phonetic matching of strings. 20a25f0a04SGreg Roach */ 21a25f0a04SGreg Roachclass Soundex { 22a25f0a04SGreg Roach /** 2376692c8bSGreg Roach * Which algorithms are supported. 2476692c8bSGreg Roach * 25a25f0a04SGreg Roach * @return string[] 26a25f0a04SGreg Roach */ 27a25f0a04SGreg Roach public static function getAlgorithms() { 28*13abd6f3SGreg Roach return [ 29a25f0a04SGreg Roach 'std' => /* I18N: http://en.wikipedia.org/wiki/Soundex */ I18N::translate('Russell'), 30a25f0a04SGreg Roach 'dm' => /* I18N: http://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */ I18N::translate('Daitch-Mokotoff'), 31*13abd6f3SGreg Roach ]; 32a25f0a04SGreg Roach } 33a25f0a04SGreg Roach 34a25f0a04SGreg Roach /** 35a25f0a04SGreg Roach * Is there a match between two soundex codes? 36a25f0a04SGreg Roach * 37a25f0a04SGreg Roach * @param string $soundex1 38a25f0a04SGreg Roach * @param string $soundex2 39a25f0a04SGreg Roach * 40cbc1590aSGreg Roach * @return bool 41a25f0a04SGreg Roach */ 42a25f0a04SGreg Roach public static function compare($soundex1, $soundex2) { 43a25f0a04SGreg Roach if ($soundex1 && $soundex2) { 44a25f0a04SGreg Roach foreach (explode(':', $soundex1) as $code) { 45a25f0a04SGreg Roach if (strpos($soundex2, $code) !== false) { 46a25f0a04SGreg Roach return true; 47a25f0a04SGreg Roach } 48a25f0a04SGreg Roach } 49a25f0a04SGreg Roach } 50a25f0a04SGreg Roach 51a25f0a04SGreg Roach return false; 52a25f0a04SGreg Roach } 53a25f0a04SGreg Roach 54a25f0a04SGreg Roach /** 55a25f0a04SGreg Roach * Generate Russell soundex codes for a given text. 56a25f0a04SGreg Roach * 57a25f0a04SGreg Roach * @param $text 58a25f0a04SGreg Roach * 59a25f0a04SGreg Roach * @return null|string 60a25f0a04SGreg Roach */ 61a25f0a04SGreg Roach public static function russell($text) { 62a25f0a04SGreg Roach $words = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY); 63*13abd6f3SGreg Roach $soundex_array = []; 64a25f0a04SGreg Roach foreach ($words as $word) { 65a25f0a04SGreg Roach $soundex = soundex($word); 66a25f0a04SGreg Roach // Only return codes from recognisable sounds 67a25f0a04SGreg Roach if ($soundex !== '0000') { 68a25f0a04SGreg Roach $soundex_array[] = $soundex; 69a25f0a04SGreg Roach } 70a25f0a04SGreg Roach } 71a25f0a04SGreg Roach // Combine words, e.g. “New York” as “Newyork” 72a25f0a04SGreg Roach if (count($words) > 1) { 73a25f0a04SGreg Roach $soundex_array[] = soundex(strtr($text, ' ', '')); 74a25f0a04SGreg Roach } 75a25f0a04SGreg Roach // A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters) 76a25f0a04SGreg Roach $soundex_array = array_slice(array_unique($soundex_array), 0, 51); 77a25f0a04SGreg Roach 78a25f0a04SGreg Roach if ($soundex_array) { 79a25f0a04SGreg Roach return implode(':', $soundex_array); 80a25f0a04SGreg Roach } else { 8115e87d46SGreg Roach return ''; 82a25f0a04SGreg Roach } 83a25f0a04SGreg Roach } 84a25f0a04SGreg Roach 85a25f0a04SGreg Roach /** 86a25f0a04SGreg Roach * Generate Daitch–Mokotoff soundex codes for a given text. 87a25f0a04SGreg Roach * 88a25f0a04SGreg Roach * @param $text 89a25f0a04SGreg Roach * 90a25f0a04SGreg Roach * @return null|string 91a25f0a04SGreg Roach */ 92a25f0a04SGreg Roach public static function daitchMokotoff($text) { 93a25f0a04SGreg Roach $words = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY); 94*13abd6f3SGreg Roach $soundex_array = []; 95a25f0a04SGreg Roach foreach ($words as $word) { 96a25f0a04SGreg Roach $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word)); 97a25f0a04SGreg Roach } 98a25f0a04SGreg Roach // Combine words, e.g. “New York” as “Newyork” 99a25f0a04SGreg Roach if (count($words) > 1) { 100a25f0a04SGreg Roach $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(strtr($text, ' ', ''))); 101a25f0a04SGreg Roach } 102a25f0a04SGreg Roach // A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters) 103a25f0a04SGreg Roach $soundex_array = array_slice(array_unique($soundex_array), 0, 36); 104a25f0a04SGreg Roach 105a25f0a04SGreg Roach if ($soundex_array) { 106a25f0a04SGreg Roach return implode(':', $soundex_array); 107a25f0a04SGreg Roach } else { 10815e87d46SGreg Roach return ''; 109a25f0a04SGreg Roach } 110a25f0a04SGreg Roach } 111a25f0a04SGreg Roach 112a25f0a04SGreg Roach // Determine the Daitch–Mokotoff Soundex code for a word 113a25f0a04SGreg Roach // Original implementation by Gerry Kroll, and analysis by Meliza Amity 114a25f0a04SGreg Roach 115a25f0a04SGreg Roach // Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!) 116a25f0a04SGreg Roach const MAXCHAR = 7; 117a25f0a04SGreg Roach 118a25f0a04SGreg Roach /** 119a25f0a04SGreg Roach * Name transformation arrays. 120a25f0a04SGreg Roach * Used to transform the Name string to simplify the "sounds like" table. 121a25f0a04SGreg Roach * This is especially useful in Hebrew. 122a25f0a04SGreg Roach * 123a25f0a04SGreg Roach * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text) 124a25f0a04SGreg Roach * function call to achieve the desired transformations. 125a25f0a04SGreg Roach * 126a25f0a04SGreg Roach * Note about the use of "\x01": 127a25f0a04SGreg Roach * This code, which can’t legitimately occur in the kind of text we're dealing with, 128a25f0a04SGreg Roach * is used as a place-holder so that conditional string replacements can be done. 129a25f0a04SGreg Roach * 130a25f0a04SGreg Roach * @var string[][] 131a25f0a04SGreg Roach */ 132*13abd6f3SGreg Roach private static $transformNameTable = [ 133a25f0a04SGreg Roach // Force Yiddish ligatures to be treated as separate letters 134*13abd6f3SGreg Roach ['װ', 'וו'], 135*13abd6f3SGreg Roach ['ײ', 'יי'], 136*13abd6f3SGreg Roach ['ױ', 'וי'], 137*13abd6f3SGreg Roach ['בו', 'בע'], 138*13abd6f3SGreg Roach ['פו', 'פע'], 139*13abd6f3SGreg Roach ['ומ', 'עמ'], 140*13abd6f3SGreg Roach ['ום', 'עם'], 141*13abd6f3SGreg Roach ['ונ', 'ענ'], 142*13abd6f3SGreg Roach ['ון', 'ען'], 143*13abd6f3SGreg Roach ['וו', 'ב'], 144*13abd6f3SGreg Roach ["\x01", ''], 145*13abd6f3SGreg Roach ['ייה$', "\x01ה"], 146*13abd6f3SGreg Roach ['ייע$', "\x01ע"], 147*13abd6f3SGreg Roach ['יי', 'ע'], 148*13abd6f3SGreg Roach ["\x01", 'יי'], 149*13abd6f3SGreg Roach ]; 150a25f0a04SGreg Roach 151a25f0a04SGreg Roach /** 152a25f0a04SGreg Roach * The DM sound coding table is organized this way: 153a25f0a04SGreg Roach * key: a variable-length string that corresponds to the UTF-8 character sequence 154a25f0a04SGreg Roach * represented by the table entry. Currently, that string can be up to 7 155a25f0a04SGreg Roach * bytes long. This maximum length is defined by the value of global variable 156a25f0a04SGreg Roach * $maxchar. 157a25f0a04SGreg Roach * 158a25f0a04SGreg Roach * value: an array as follows: 159a25f0a04SGreg Roach * [0]: zero if not a vowel 160a25f0a04SGreg Roach * [1]: sound value when this string is at the beginning of the word 161a25f0a04SGreg Roach * [2]: sound value when this string is followed by a vowel 162a25f0a04SGreg Roach * [3]: sound value for other cases 163a25f0a04SGreg Roach * [1],[2],[3] can be repeated several times to create branches in the code 164a25f0a04SGreg Roach * an empty sound value means "ignore in this state" 165a25f0a04SGreg Roach * 166a25f0a04SGreg Roach * @var string[][] 167a25f0a04SGreg Roach */ 168*13abd6f3SGreg Roach private static $dmsounds = [ 169*13abd6f3SGreg Roach 'A' => ['1', '0', '', ''], 170*13abd6f3SGreg Roach 'À' => ['1', '0', '', ''], 171*13abd6f3SGreg Roach 'Á' => ['1', '0', '', ''], 172*13abd6f3SGreg Roach 'Â' => ['1', '0', '', ''], 173*13abd6f3SGreg Roach 'Ã' => ['1', '0', '', ''], 174*13abd6f3SGreg Roach 'Ä' => ['1', '0', '1', '', '0', '', ''], 175*13abd6f3SGreg Roach 'Å' => ['1', '0', '', ''], 176*13abd6f3SGreg Roach 'Ă' => ['1', '0', '', ''], 177*13abd6f3SGreg Roach 'Ą' => ['1', '', '', '', '', '', '6'], 178*13abd6f3SGreg Roach 'Ạ' => ['1', '0', '', ''], 179*13abd6f3SGreg Roach 'Ả' => ['1', '0', '', ''], 180*13abd6f3SGreg Roach 'Ấ' => ['1', '0', '', ''], 181*13abd6f3SGreg Roach 'Ầ' => ['1', '0', '', ''], 182*13abd6f3SGreg Roach 'Ẩ' => ['1', '0', '', ''], 183*13abd6f3SGreg Roach 'Ẫ' => ['1', '0', '', ''], 184*13abd6f3SGreg Roach 'Ậ' => ['1', '0', '', ''], 185*13abd6f3SGreg Roach 'Ắ' => ['1', '0', '', ''], 186*13abd6f3SGreg Roach 'Ằ' => ['1', '0', '', ''], 187*13abd6f3SGreg Roach 'Ẳ' => ['1', '0', '', ''], 188*13abd6f3SGreg Roach 'Ẵ' => ['1', '0', '', ''], 189*13abd6f3SGreg Roach 'Ặ' => ['1', '0', '', ''], 190*13abd6f3SGreg Roach 'AE' => ['1', '0', '1', ''], 191*13abd6f3SGreg Roach 'Æ' => ['1', '0', '1', ''], 192*13abd6f3SGreg Roach 'AI' => ['1', '0', '1', ''], 193*13abd6f3SGreg Roach 'AJ' => ['1', '0', '1', ''], 194*13abd6f3SGreg Roach 'AU' => ['1', '0', '7', ''], 195*13abd6f3SGreg Roach 'AV' => ['1', '0', '7', '', '7', '7', '7'], 196*13abd6f3SGreg Roach 'ÄU' => ['1', '0', '1', ''], 197*13abd6f3SGreg Roach 'AY' => ['1', '0', '1', ''], 198*13abd6f3SGreg Roach 'B' => ['0', '7', '7', '7'], 199*13abd6f3SGreg Roach 'C' => ['0', '5', '5', '5', '34', '4', '4'], 200*13abd6f3SGreg Roach 'Ć' => ['0', '4', '4', '4'], 201*13abd6f3SGreg Roach 'Č' => ['0', '4', '4', '4'], 202*13abd6f3SGreg Roach 'Ç' => ['0', '4', '4', '4'], 203*13abd6f3SGreg Roach 'CH' => ['0', '5', '5', '5', '34', '4', '4'], 204*13abd6f3SGreg Roach 'CHS' => ['0', '5', '54', '54'], 205*13abd6f3SGreg Roach 'CK' => ['0', '5', '5', '5', '45', '45', '45'], 206*13abd6f3SGreg Roach 'CCS' => ['0', '4', '4', '4'], 207*13abd6f3SGreg Roach 'CS' => ['0', '4', '4', '4'], 208*13abd6f3SGreg Roach 'CSZ' => ['0', '4', '4', '4'], 209*13abd6f3SGreg Roach 'CZ' => ['0', '4', '4', '4'], 210*13abd6f3SGreg Roach 'CZS' => ['0', '4', '4', '4'], 211*13abd6f3SGreg Roach 'D' => ['0', '3', '3', '3'], 212*13abd6f3SGreg Roach 'Ď' => ['0', '3', '3', '3'], 213*13abd6f3SGreg Roach 'Đ' => ['0', '3', '3', '3'], 214*13abd6f3SGreg Roach 'DRS' => ['0', '4', '4', '4'], 215*13abd6f3SGreg Roach 'DRZ' => ['0', '4', '4', '4'], 216*13abd6f3SGreg Roach 'DS' => ['0', '4', '4', '4'], 217*13abd6f3SGreg Roach 'DSH' => ['0', '4', '4', '4'], 218*13abd6f3SGreg Roach 'DSZ' => ['0', '4', '4', '4'], 219*13abd6f3SGreg Roach 'DT' => ['0', '3', '3', '3'], 220*13abd6f3SGreg Roach 'DDZ' => ['0', '4', '4', '4'], 221*13abd6f3SGreg Roach 'DDZS' => ['0', '4', '4', '4'], 222*13abd6f3SGreg Roach 'DZ' => ['0', '4', '4', '4'], 223*13abd6f3SGreg Roach 'DŹ' => ['0', '4', '4', '4'], 224*13abd6f3SGreg Roach 'DŻ' => ['0', '4', '4', '4'], 225*13abd6f3SGreg Roach 'DZH' => ['0', '4', '4', '4'], 226*13abd6f3SGreg Roach 'DZS' => ['0', '4', '4', '4'], 227*13abd6f3SGreg Roach 'E' => ['1', '0', '', ''], 228*13abd6f3SGreg Roach 'È' => ['1', '0', '', ''], 229*13abd6f3SGreg Roach 'É' => ['1', '0', '', ''], 230*13abd6f3SGreg Roach 'Ê' => ['1', '0', '', ''], 231*13abd6f3SGreg Roach 'Ë' => ['1', '0', '', ''], 232*13abd6f3SGreg Roach 'Ĕ' => ['1', '0', '', ''], 233*13abd6f3SGreg Roach 'Ė' => ['1', '0', '', ''], 234*13abd6f3SGreg Roach 'Ę' => ['1', '', '', '6', '', '', ''], 235*13abd6f3SGreg Roach 'Ẹ' => ['1', '0', '', ''], 236*13abd6f3SGreg Roach 'Ẻ' => ['1', '0', '', ''], 237*13abd6f3SGreg Roach 'Ẽ' => ['1', '0', '', ''], 238*13abd6f3SGreg Roach 'Ế' => ['1', '0', '', ''], 239*13abd6f3SGreg Roach 'Ề' => ['1', '0', '', ''], 240*13abd6f3SGreg Roach 'Ể' => ['1', '0', '', ''], 241*13abd6f3SGreg Roach 'Ễ' => ['1', '0', '', ''], 242*13abd6f3SGreg Roach 'Ệ' => ['1', '0', '', ''], 243*13abd6f3SGreg Roach 'EAU' => ['1', '0', '', ''], 244*13abd6f3SGreg Roach 'EI' => ['1', '0', '1', ''], 245*13abd6f3SGreg Roach 'EJ' => ['1', '0', '1', ''], 246*13abd6f3SGreg Roach 'EU' => ['1', '1', '1', ''], 247*13abd6f3SGreg Roach 'EY' => ['1', '0', '1', ''], 248*13abd6f3SGreg Roach 'F' => ['0', '7', '7', '7'], 249*13abd6f3SGreg Roach 'FB' => ['0', '7', '7', '7'], 250*13abd6f3SGreg Roach 'G' => ['0', '5', '5', '5', '34', '4', '4'], 251*13abd6f3SGreg Roach 'Ğ' => ['0', '', '', ''], 252*13abd6f3SGreg Roach 'GGY' => ['0', '5', '5', '5'], 253*13abd6f3SGreg Roach 'GY' => ['0', '5', '5', '5'], 254*13abd6f3SGreg Roach 'H' => ['0', '5', '5', '', '5', '5', '5'], 255*13abd6f3SGreg Roach 'I' => ['1', '0', '', ''], 256*13abd6f3SGreg Roach 'Ì' => ['1', '0', '', ''], 257*13abd6f3SGreg Roach 'Í' => ['1', '0', '', ''], 258*13abd6f3SGreg Roach 'Î' => ['1', '0', '', ''], 259*13abd6f3SGreg Roach 'Ï' => ['1', '0', '', ''], 260*13abd6f3SGreg Roach 'Ĩ' => ['1', '0', '', ''], 261*13abd6f3SGreg Roach 'Į' => ['1', '0', '', ''], 262*13abd6f3SGreg Roach 'İ' => ['1', '0', '', ''], 263*13abd6f3SGreg Roach 'Ỉ' => ['1', '0', '', ''], 264*13abd6f3SGreg Roach 'Ị' => ['1', '0', '', ''], 265*13abd6f3SGreg Roach 'IA' => ['1', '1', '', ''], 266*13abd6f3SGreg Roach 'IE' => ['1', '1', '', ''], 267*13abd6f3SGreg Roach 'IO' => ['1', '1', '', ''], 268*13abd6f3SGreg Roach 'IU' => ['1', '1', '', ''], 269*13abd6f3SGreg Roach 'J' => ['0', '1', '', '', '4', '4', '4', '5', '5', ''], 270*13abd6f3SGreg Roach 'K' => ['0', '5', '5', '5'], 271*13abd6f3SGreg Roach 'KH' => ['0', '5', '5', '5'], 272*13abd6f3SGreg Roach 'KS' => ['0', '5', '54', '54'], 273*13abd6f3SGreg Roach 'L' => ['0', '8', '8', '8'], 274*13abd6f3SGreg Roach 'Ľ' => ['0', '8', '8', '8'], 275*13abd6f3SGreg Roach 'Ĺ' => ['0', '8', '8', '8'], 276*13abd6f3SGreg Roach 'Ł' => ['0', '7', '7', '7', '8', '8', '8'], 277*13abd6f3SGreg Roach 'LL' => ['0', '8', '8', '8', '58', '8', '8', '1', '8', '8'], 278*13abd6f3SGreg Roach 'LLY' => ['0', '8', '8', '8', '1', '8', '8'], 279*13abd6f3SGreg Roach 'LY' => ['0', '8', '8', '8', '1', '8', '8'], 280*13abd6f3SGreg Roach 'M' => ['0', '6', '6', '6'], 281*13abd6f3SGreg Roach 'MĔ' => ['0', '66', '66', '66'], 282*13abd6f3SGreg Roach 'MN' => ['0', '66', '66', '66'], 283*13abd6f3SGreg Roach 'N' => ['0', '6', '6', '6'], 284*13abd6f3SGreg Roach 'Ń' => ['0', '6', '6', '6'], 285*13abd6f3SGreg Roach 'Ň' => ['0', '6', '6', '6'], 286*13abd6f3SGreg Roach 'Ñ' => ['0', '6', '6', '6'], 287*13abd6f3SGreg Roach 'NM' => ['0', '66', '66', '66'], 288*13abd6f3SGreg Roach 'O' => ['1', '0', '', ''], 289*13abd6f3SGreg Roach 'Ò' => ['1', '0', '', ''], 290*13abd6f3SGreg Roach 'Ó' => ['1', '0', '', ''], 291*13abd6f3SGreg Roach 'Ô' => ['1', '0', '', ''], 292*13abd6f3SGreg Roach 'Õ' => ['1', '0', '', ''], 293*13abd6f3SGreg Roach 'Ö' => ['1', '0', '', ''], 294*13abd6f3SGreg Roach 'Ø' => ['1', '0', '', ''], 295*13abd6f3SGreg Roach 'Ő' => ['1', '0', '', ''], 296*13abd6f3SGreg Roach 'Œ' => ['1', '0', '', ''], 297*13abd6f3SGreg Roach 'Ơ' => ['1', '0', '', ''], 298*13abd6f3SGreg Roach 'Ọ' => ['1', '0', '', ''], 299*13abd6f3SGreg Roach 'Ỏ' => ['1', '0', '', ''], 300*13abd6f3SGreg Roach 'Ố' => ['1', '0', '', ''], 301*13abd6f3SGreg Roach 'Ồ' => ['1', '0', '', ''], 302*13abd6f3SGreg Roach 'Ổ' => ['1', '0', '', ''], 303*13abd6f3SGreg Roach 'Ỗ' => ['1', '0', '', ''], 304*13abd6f3SGreg Roach 'Ộ' => ['1', '0', '', ''], 305*13abd6f3SGreg Roach 'Ớ' => ['1', '0', '', ''], 306*13abd6f3SGreg Roach 'Ờ' => ['1', '0', '', ''], 307*13abd6f3SGreg Roach 'Ở' => ['1', '0', '', ''], 308*13abd6f3SGreg Roach 'Ỡ' => ['1', '0', '', ''], 309*13abd6f3SGreg Roach 'Ợ' => ['1', '0', '', ''], 310*13abd6f3SGreg Roach 'OE' => ['1', '0', '', ''], 311*13abd6f3SGreg Roach 'OI' => ['1', '0', '1', ''], 312*13abd6f3SGreg Roach 'OJ' => ['1', '0', '1', ''], 313*13abd6f3SGreg Roach 'OU' => ['1', '0', '', ''], 314*13abd6f3SGreg Roach 'OY' => ['1', '0', '1', ''], 315*13abd6f3SGreg Roach 'P' => ['0', '7', '7', '7'], 316*13abd6f3SGreg Roach 'PF' => ['0', '7', '7', '7'], 317*13abd6f3SGreg Roach 'PH' => ['0', '7', '7', '7'], 318*13abd6f3SGreg Roach 'Q' => ['0', '5', '5', '5'], 319*13abd6f3SGreg Roach 'R' => ['0', '9', '9', '9'], 320*13abd6f3SGreg Roach 'Ř' => ['0', '4', '4', '4'], 321*13abd6f3SGreg Roach 'RS' => ['0', '4', '4', '4', '94', '94', '94'], 322*13abd6f3SGreg Roach 'RZ' => ['0', '4', '4', '4', '94', '94', '94'], 323*13abd6f3SGreg Roach 'S' => ['0', '4', '4', '4'], 324*13abd6f3SGreg Roach 'Ś' => ['0', '4', '4', '4'], 325*13abd6f3SGreg Roach 'Š' => ['0', '4', '4', '4'], 326*13abd6f3SGreg Roach 'Ş' => ['0', '4', '4', '4'], 327*13abd6f3SGreg Roach 'SC' => ['0', '2', '4', '4'], 328*13abd6f3SGreg Roach 'ŠČ' => ['0', '2', '4', '4'], 329*13abd6f3SGreg Roach 'SCH' => ['0', '4', '4', '4'], 330*13abd6f3SGreg Roach 'SCHD' => ['0', '2', '43', '43'], 331*13abd6f3SGreg Roach 'SCHT' => ['0', '2', '43', '43'], 332*13abd6f3SGreg Roach 'SCHTCH' => ['0', '2', '4', '4'], 333*13abd6f3SGreg Roach 'SCHTSCH' => ['0', '2', '4', '4'], 334*13abd6f3SGreg Roach 'SCHTSH' => ['0', '2', '4', '4'], 335*13abd6f3SGreg Roach 'SD' => ['0', '2', '43', '43'], 336*13abd6f3SGreg Roach 'SH' => ['0', '4', '4', '4'], 337*13abd6f3SGreg Roach 'SHCH' => ['0', '2', '4', '4'], 338*13abd6f3SGreg Roach 'SHD' => ['0', '2', '43', '43'], 339*13abd6f3SGreg Roach 'SHT' => ['0', '2', '43', '43'], 340*13abd6f3SGreg Roach 'SHTCH' => ['0', '2', '4', '4'], 341*13abd6f3SGreg Roach 'SHTSH' => ['0', '2', '4', '4'], 342*13abd6f3SGreg Roach 'ß' => ['0', '', '4', '4'], 343*13abd6f3SGreg Roach 'ST' => ['0', '2', '43', '43'], 344*13abd6f3SGreg Roach 'STCH' => ['0', '2', '4', '4'], 345*13abd6f3SGreg Roach 'STRS' => ['0', '2', '4', '4'], 346*13abd6f3SGreg Roach 'STRZ' => ['0', '2', '4', '4'], 347*13abd6f3SGreg Roach 'STSCH' => ['0', '2', '4', '4'], 348*13abd6f3SGreg Roach 'STSH' => ['0', '2', '4', '4'], 349*13abd6f3SGreg Roach 'SSZ' => ['0', '4', '4', '4'], 350*13abd6f3SGreg Roach 'SZ' => ['0', '4', '4', '4'], 351*13abd6f3SGreg Roach 'SZCS' => ['0', '2', '4', '4'], 352*13abd6f3SGreg Roach 'SZCZ' => ['0', '2', '4', '4'], 353*13abd6f3SGreg Roach 'SZD' => ['0', '2', '43', '43'], 354*13abd6f3SGreg Roach 'SZT' => ['0', '2', '43', '43'], 355*13abd6f3SGreg Roach 'T' => ['0', '3', '3', '3'], 356*13abd6f3SGreg Roach 'Ť' => ['0', '3', '3', '3'], 357*13abd6f3SGreg Roach 'Ţ' => ['0', '3', '3', '3', '4', '4', '4'], 358*13abd6f3SGreg Roach 'TC' => ['0', '4', '4', '4'], 359*13abd6f3SGreg Roach 'TCH' => ['0', '4', '4', '4'], 360*13abd6f3SGreg Roach 'TH' => ['0', '3', '3', '3'], 361*13abd6f3SGreg Roach 'TRS' => ['0', '4', '4', '4'], 362*13abd6f3SGreg Roach 'TRZ' => ['0', '4', '4', '4'], 363*13abd6f3SGreg Roach 'TS' => ['0', '4', '4', '4'], 364*13abd6f3SGreg Roach 'TSCH' => ['0', '4', '4', '4'], 365*13abd6f3SGreg Roach 'TSH' => ['0', '4', '4', '4'], 366*13abd6f3SGreg Roach 'TSZ' => ['0', '4', '4', '4'], 367*13abd6f3SGreg Roach 'TTCH' => ['0', '4', '4', '4'], 368*13abd6f3SGreg Roach 'TTS' => ['0', '4', '4', '4'], 369*13abd6f3SGreg Roach 'TTSCH' => ['0', '4', '4', '4'], 370*13abd6f3SGreg Roach 'TTSZ' => ['0', '4', '4', '4'], 371*13abd6f3SGreg Roach 'TTZ' => ['0', '4', '4', '4'], 372*13abd6f3SGreg Roach 'TZ' => ['0', '4', '4', '4'], 373*13abd6f3SGreg Roach 'TZS' => ['0', '4', '4', '4'], 374*13abd6f3SGreg Roach 'U' => ['1', '0', '', ''], 375*13abd6f3SGreg Roach 'Ù' => ['1', '0', '', ''], 376*13abd6f3SGreg Roach 'Ú' => ['1', '0', '', ''], 377*13abd6f3SGreg Roach 'Û' => ['1', '0', '', ''], 378*13abd6f3SGreg Roach 'Ü' => ['1', '0', '', ''], 379*13abd6f3SGreg Roach 'Ũ' => ['1', '0', '', ''], 380*13abd6f3SGreg Roach 'Ū' => ['1', '0', '', ''], 381*13abd6f3SGreg Roach 'Ů' => ['1', '0', '', ''], 382*13abd6f3SGreg Roach 'Ű' => ['1', '0', '', ''], 383*13abd6f3SGreg Roach 'Ų' => ['1', '0', '', ''], 384*13abd6f3SGreg Roach 'Ư' => ['1', '0', '', ''], 385*13abd6f3SGreg Roach 'Ụ' => ['1', '0', '', ''], 386*13abd6f3SGreg Roach 'Ủ' => ['1', '0', '', ''], 387*13abd6f3SGreg Roach 'Ứ' => ['1', '0', '', ''], 388*13abd6f3SGreg Roach 'Ừ' => ['1', '0', '', ''], 389*13abd6f3SGreg Roach 'Ử' => ['1', '0', '', ''], 390*13abd6f3SGreg Roach 'Ữ' => ['1', '0', '', ''], 391*13abd6f3SGreg Roach 'Ự' => ['1', '0', '', ''], 392*13abd6f3SGreg Roach 'UE' => ['1', '0', '', ''], 393*13abd6f3SGreg Roach 'UI' => ['1', '0', '1', ''], 394*13abd6f3SGreg Roach 'UJ' => ['1', '0', '1', ''], 395*13abd6f3SGreg Roach 'UY' => ['1', '0', '1', ''], 396*13abd6f3SGreg Roach 'UW' => ['1', '0', '1', '', '0', '7', '7'], 397*13abd6f3SGreg Roach 'V' => ['0', '7', '7', '7'], 398*13abd6f3SGreg Roach 'W' => ['0', '7', '7', '7'], 399*13abd6f3SGreg Roach 'X' => ['0', '5', '54', '54'], 400*13abd6f3SGreg Roach 'Y' => ['1', '1', '', ''], 401*13abd6f3SGreg Roach 'Ý' => ['1', '1', '', ''], 402*13abd6f3SGreg Roach 'Ỳ' => ['1', '1', '', ''], 403*13abd6f3SGreg Roach 'Ỵ' => ['1', '1', '', ''], 404*13abd6f3SGreg Roach 'Ỷ' => ['1', '1', '', ''], 405*13abd6f3SGreg Roach 'Ỹ' => ['1', '1', '', ''], 406*13abd6f3SGreg Roach 'Z' => ['0', '4', '4', '4'], 407*13abd6f3SGreg Roach 'Ź' => ['0', '4', '4', '4'], 408*13abd6f3SGreg Roach 'Ż' => ['0', '4', '4', '4'], 409*13abd6f3SGreg Roach 'Ž' => ['0', '4', '4', '4'], 410*13abd6f3SGreg Roach 'ZD' => ['0', '2', '43', '43'], 411*13abd6f3SGreg Roach 'ZDZ' => ['0', '2', '4', '4'], 412*13abd6f3SGreg Roach 'ZDZH' => ['0', '2', '4', '4'], 413*13abd6f3SGreg Roach 'ZH' => ['0', '4', '4', '4'], 414*13abd6f3SGreg Roach 'ZHD' => ['0', '2', '43', '43'], 415*13abd6f3SGreg Roach 'ZHDZH' => ['0', '2', '4', '4'], 416*13abd6f3SGreg Roach 'ZS' => ['0', '4', '4', '4'], 417*13abd6f3SGreg Roach 'ZSCH' => ['0', '4', '4', '4'], 418*13abd6f3SGreg Roach 'ZSH' => ['0', '4', '4', '4'], 419*13abd6f3SGreg Roach 'ZZS' => ['0', '4', '4', '4'], 420a25f0a04SGreg Roach // Cyrillic alphabet 421*13abd6f3SGreg Roach 'А' => ['1', '0', '', ''], 422*13abd6f3SGreg Roach 'Б' => ['0', '7', '7', '7'], 423*13abd6f3SGreg Roach 'В' => ['0', '7', '7', '7'], 424*13abd6f3SGreg Roach 'Г' => ['0', '5', '5', '5'], 425*13abd6f3SGreg Roach 'Д' => ['0', '3', '3', '3'], 426*13abd6f3SGreg Roach 'ДЗ' => ['0', '4', '4', '4'], 427*13abd6f3SGreg Roach 'Е' => ['1', '0', '', ''], 428*13abd6f3SGreg Roach 'Ё' => ['1', '0', '', ''], 429*13abd6f3SGreg Roach 'Ж' => ['0', '4', '4', '4'], 430*13abd6f3SGreg Roach 'З' => ['0', '4', '4', '4'], 431*13abd6f3SGreg Roach 'И' => ['1', '0', '', ''], 432*13abd6f3SGreg Roach 'Й' => ['1', '1', '', '', '4', '4', '4'], 433*13abd6f3SGreg Roach 'К' => ['0', '5', '5', '5'], 434*13abd6f3SGreg Roach 'Л' => ['0', '8', '8', '8'], 435*13abd6f3SGreg Roach 'М' => ['0', '6', '6', '6'], 436*13abd6f3SGreg Roach 'Н' => ['0', '6', '6', '6'], 437*13abd6f3SGreg Roach 'О' => ['1', '0', '', ''], 438*13abd6f3SGreg Roach 'П' => ['0', '7', '7', '7'], 439*13abd6f3SGreg Roach 'Р' => ['0', '9', '9', '9'], 440*13abd6f3SGreg Roach 'РЖ' => ['0', '4', '4', '4'], 441*13abd6f3SGreg Roach 'С' => ['0', '4', '4', '4'], 442*13abd6f3SGreg Roach 'Т' => ['0', '3', '3', '3'], 443*13abd6f3SGreg Roach 'У' => ['1', '0', '', ''], 444*13abd6f3SGreg Roach 'Ф' => ['0', '7', '7', '7'], 445*13abd6f3SGreg Roach 'Х' => ['0', '5', '5', '5'], 446*13abd6f3SGreg Roach 'Ц' => ['0', '4', '4', '4'], 447*13abd6f3SGreg Roach 'Ч' => ['0', '4', '4', '4'], 448*13abd6f3SGreg Roach 'Ш' => ['0', '4', '4', '4'], 449*13abd6f3SGreg Roach 'Щ' => ['0', '2', '4', '4'], 450*13abd6f3SGreg Roach 'Ъ' => ['0', '', '', ''], 451*13abd6f3SGreg Roach 'Ы' => ['0', '1', '', ''], 452*13abd6f3SGreg Roach 'Ь' => ['0', '', '', ''], 453*13abd6f3SGreg Roach 'Э' => ['1', '0', '', ''], 454*13abd6f3SGreg Roach 'Ю' => ['0', '1', '', ''], 455*13abd6f3SGreg Roach 'Я' => ['0', '1', '', ''], 456a25f0a04SGreg Roach // Greek alphabet 457*13abd6f3SGreg Roach 'Α' => ['1', '0', '', ''], 458*13abd6f3SGreg Roach 'Ά' => ['1', '0', '', ''], 459*13abd6f3SGreg Roach 'ΑΙ' => ['1', '0', '1', ''], 460*13abd6f3SGreg Roach 'ΑΥ' => ['1', '0', '1', ''], 461*13abd6f3SGreg Roach 'Β' => ['0', '7', '7', '7'], 462*13abd6f3SGreg Roach 'Γ' => ['0', '5', '5', '5'], 463*13abd6f3SGreg Roach 'Δ' => ['0', '3', '3', '3'], 464*13abd6f3SGreg Roach 'Ε' => ['1', '0', '', ''], 465*13abd6f3SGreg Roach 'Έ' => ['1', '0', '', ''], 466*13abd6f3SGreg Roach 'ΕΙ' => ['1', '0', '1', ''], 467*13abd6f3SGreg Roach 'ΕΥ' => ['1', '1', '1', ''], 468*13abd6f3SGreg Roach 'Ζ' => ['0', '4', '4', '4'], 469*13abd6f3SGreg Roach 'Η' => ['1', '0', '', ''], 470*13abd6f3SGreg Roach 'Ή' => ['1', '0', '', ''], 471*13abd6f3SGreg Roach 'Θ' => ['0', '3', '3', '3'], 472*13abd6f3SGreg Roach 'Ι' => ['1', '0', '', ''], 473*13abd6f3SGreg Roach 'Ί' => ['1', '0', '', ''], 474*13abd6f3SGreg Roach 'Ϊ' => ['1', '0', '', ''], 475*13abd6f3SGreg Roach 'ΐ' => ['1', '0', '', ''], 476*13abd6f3SGreg Roach 'Κ' => ['0', '5', '5', '5'], 477*13abd6f3SGreg Roach 'Λ' => ['0', '8', '8', '8'], 478*13abd6f3SGreg Roach 'Μ' => ['0', '6', '6', '6'], 479*13abd6f3SGreg Roach 'ΜΠ' => ['0', '7', '7', '7'], 480*13abd6f3SGreg Roach 'Ν' => ['0', '6', '6', '6'], 481*13abd6f3SGreg Roach 'ΝΤ' => ['0', '3', '3', '3'], 482*13abd6f3SGreg Roach 'Ξ' => ['0', '5', '54', '54'], 483*13abd6f3SGreg Roach 'Ο' => ['1', '0', '', ''], 484*13abd6f3SGreg Roach 'Ό' => ['1', '0', '', ''], 485*13abd6f3SGreg Roach 'ΟΙ' => ['1', '0', '1', ''], 486*13abd6f3SGreg Roach 'ΟΥ' => ['1', '0', '1', ''], 487*13abd6f3SGreg Roach 'Π' => ['0', '7', '7', '7'], 488*13abd6f3SGreg Roach 'Ρ' => ['0', '9', '9', '9'], 489*13abd6f3SGreg Roach 'Σ' => ['0', '4', '4', '4'], 490*13abd6f3SGreg Roach 'ς' => ['0', '', '', '4'], 491*13abd6f3SGreg Roach 'Τ' => ['0', '3', '3', '3'], 492*13abd6f3SGreg Roach 'ΤΖ' => ['0', '4', '4', '4'], 493*13abd6f3SGreg Roach 'ΤΣ' => ['0', '4', '4', '4'], 494*13abd6f3SGreg Roach 'Υ' => ['1', '1', '', ''], 495*13abd6f3SGreg Roach 'Ύ' => ['1', '1', '', ''], 496*13abd6f3SGreg Roach 'Ϋ' => ['1', '1', '', ''], 497*13abd6f3SGreg Roach 'ΰ' => ['1', '1', '', ''], 498*13abd6f3SGreg Roach 'ΥΚ' => ['1', '5', '5', '5'], 499*13abd6f3SGreg Roach 'ΥΥ' => ['1', '65', '65', '65'], 500*13abd6f3SGreg Roach 'Φ' => ['0', '7', '7', '7'], 501*13abd6f3SGreg Roach 'Χ' => ['0', '5', '5', '5'], 502*13abd6f3SGreg Roach 'Ψ' => ['0', '7', '7', '7'], 503*13abd6f3SGreg Roach 'Ω' => ['1', '0', '', ''], 504*13abd6f3SGreg Roach 'Ώ' => ['1', '0', '', ''], 505a25f0a04SGreg Roach // Hebrew alphabet 506*13abd6f3SGreg Roach 'א' => ['1', '0', '', ''], 507*13abd6f3SGreg Roach 'או' => ['1', '0', '7', ''], 508*13abd6f3SGreg Roach 'אג' => ['1', '4', '4', '4', '5', '5', '5', '34', '34', '34'], 509*13abd6f3SGreg Roach 'בב' => ['0', '7', '7', '7', '77', '77', '77'], 510*13abd6f3SGreg Roach 'ב' => ['0', '7', '7', '7'], 511*13abd6f3SGreg Roach 'גג' => ['0', '4', '4', '4', '5', '5', '5', '45', '45', '45', '55', '55', '55', '54', '54', '54'], 512*13abd6f3SGreg Roach 'גד' => ['0', '43', '43', '43', '53', '53', '53'], 513*13abd6f3SGreg Roach 'גה' => ['0', '45', '45', '45', '55', '55', '55'], 514*13abd6f3SGreg Roach 'גז' => ['0', '44', '44', '44', '45', '45', '45'], 515*13abd6f3SGreg Roach 'גח' => ['0', '45', '45', '45', '55', '55', '55'], 516*13abd6f3SGreg Roach 'גכ' => ['0', '45', '45', '45', '55', '55', '55'], 517*13abd6f3SGreg Roach 'גך' => ['0', '45', '45', '45', '55', '55', '55'], 518*13abd6f3SGreg Roach 'גצ' => ['0', '44', '44', '44', '45', '45', '45'], 519*13abd6f3SGreg Roach 'גץ' => ['0', '44', '44', '44', '45', '45', '45'], 520*13abd6f3SGreg Roach 'גק' => ['0', '45', '45', '45', '54', '54', '54'], 521*13abd6f3SGreg Roach 'גש' => ['0', '44', '44', '44', '54', '54', '54'], 522*13abd6f3SGreg Roach 'גת' => ['0', '43', '43', '43', '53', '53', '53'], 523*13abd6f3SGreg Roach 'ג' => ['0', '4', '4', '4', '5', '5', '5'], 524*13abd6f3SGreg Roach 'דז' => ['0', '4', '4', '4'], 525*13abd6f3SGreg Roach 'דד' => ['0', '3', '3', '3', '33', '33', '33'], 526*13abd6f3SGreg Roach 'דט' => ['0', '33', '33', '33'], 527*13abd6f3SGreg Roach 'דש' => ['0', '4', '4', '4'], 528*13abd6f3SGreg Roach 'דצ' => ['0', '4', '4', '4'], 529*13abd6f3SGreg Roach 'דץ' => ['0', '4', '4', '4'], 530*13abd6f3SGreg Roach 'ד' => ['0', '3', '3', '3'], 531*13abd6f3SGreg Roach 'הג' => ['0', '54', '54', '54', '55', '55', '55'], 532*13abd6f3SGreg Roach 'הכ' => ['0', '55', '55', '55'], 533*13abd6f3SGreg Roach 'הח' => ['0', '55', '55', '55'], 534*13abd6f3SGreg Roach 'הק' => ['0', '55', '55', '55', '5', '5', '5'], 535*13abd6f3SGreg Roach 'הה' => ['0', '5', '5', '', '55', '55', ''], 536*13abd6f3SGreg Roach 'ה' => ['0', '5', '5', ''], 537*13abd6f3SGreg Roach 'וי' => ['1', '', '', '', '7', '7', '7'], 538*13abd6f3SGreg Roach 'ו' => ['1', '7', '7', '7', '7', '', ''], 539*13abd6f3SGreg Roach 'וו' => ['1', '7', '7', '7', '7', '', ''], 540*13abd6f3SGreg Roach 'וופ' => ['1', '7', '7', '7', '77', '77', '77'], 541*13abd6f3SGreg Roach 'זש' => ['0', '4', '4', '4', '44', '44', '44'], 542*13abd6f3SGreg Roach 'זדז' => ['0', '2', '4', '4'], 543*13abd6f3SGreg Roach 'ז' => ['0', '4', '4', '4'], 544*13abd6f3SGreg Roach 'זג' => ['0', '44', '44', '44', '45', '45', '45'], 545*13abd6f3SGreg Roach 'זז' => ['0', '4', '4', '4', '44', '44', '44'], 546*13abd6f3SGreg Roach 'זס' => ['0', '44', '44', '44'], 547*13abd6f3SGreg Roach 'זצ' => ['0', '44', '44', '44'], 548*13abd6f3SGreg Roach 'זץ' => ['0', '44', '44', '44'], 549*13abd6f3SGreg Roach 'חג' => ['0', '54', '54', '54', '53', '53', '53'], 550*13abd6f3SGreg Roach 'חח' => ['0', '5', '5', '5', '55', '55', '55'], 551*13abd6f3SGreg Roach 'חק' => ['0', '55', '55', '55', '5', '5', '5'], 552*13abd6f3SGreg Roach 'חכ' => ['0', '45', '45', '45', '55', '55', '55'], 553*13abd6f3SGreg Roach 'חס' => ['0', '5', '54', '54'], 554*13abd6f3SGreg Roach 'חש' => ['0', '5', '54', '54'], 555*13abd6f3SGreg Roach 'ח' => ['0', '5', '5', '5'], 556*13abd6f3SGreg Roach 'טש' => ['0', '4', '4', '4'], 557*13abd6f3SGreg Roach 'טד' => ['0', '33', '33', '33'], 558*13abd6f3SGreg Roach 'טי' => ['0', '3', '3', '3', '4', '4', '4', '3', '3', '34'], 559*13abd6f3SGreg Roach 'טת' => ['0', '33', '33', '33'], 560*13abd6f3SGreg Roach 'טט' => ['0', '3', '3', '3', '33', '33', '33'], 561*13abd6f3SGreg Roach 'ט' => ['0', '3', '3', '3'], 562*13abd6f3SGreg Roach 'י' => ['1', '1', '', ''], 563*13abd6f3SGreg Roach 'יא' => ['1', '1', '', '', '1', '1', '1'], 564*13abd6f3SGreg Roach 'כג' => ['0', '55', '55', '55', '54', '54', '54'], 565*13abd6f3SGreg Roach 'כש' => ['0', '5', '54', '54'], 566*13abd6f3SGreg Roach 'כס' => ['0', '5', '54', '54'], 567*13abd6f3SGreg Roach 'ככ' => ['0', '5', '5', '5', '55', '55', '55'], 568*13abd6f3SGreg Roach 'כך' => ['0', '5', '5', '5', '55', '55', '55'], 569*13abd6f3SGreg Roach 'כ' => ['0', '5', '5', '5'], 570*13abd6f3SGreg Roach 'כח' => ['0', '55', '55', '55', '5', '5', '5'], 571*13abd6f3SGreg Roach 'ך' => ['0', '', '5', '5'], 572*13abd6f3SGreg Roach 'ל' => ['0', '8', '8', '8'], 573*13abd6f3SGreg Roach 'לל' => ['0', '88', '88', '88', '8', '8', '8'], 574*13abd6f3SGreg Roach 'מנ' => ['0', '66', '66', '66'], 575*13abd6f3SGreg Roach 'מן' => ['0', '66', '66', '66'], 576*13abd6f3SGreg Roach 'ממ' => ['0', '6', '6', '6', '66', '66', '66'], 577*13abd6f3SGreg Roach 'מם' => ['0', '6', '6', '6', '66', '66', '66'], 578*13abd6f3SGreg Roach 'מ' => ['0', '6', '6', '6'], 579*13abd6f3SGreg Roach 'ם' => ['0', '', '6', '6'], 580*13abd6f3SGreg Roach 'נמ' => ['0', '66', '66', '66'], 581*13abd6f3SGreg Roach 'נם' => ['0', '66', '66', '66'], 582*13abd6f3SGreg Roach 'ננ' => ['0', '6', '6', '6', '66', '66', '66'], 583*13abd6f3SGreg Roach 'נן' => ['0', '6', '6', '6', '66', '66', '66'], 584*13abd6f3SGreg Roach 'נ' => ['0', '6', '6', '6'], 585*13abd6f3SGreg Roach 'ן' => ['0', '', '6', '6'], 586*13abd6f3SGreg Roach 'סתש' => ['0', '2', '4', '4'], 587*13abd6f3SGreg Roach 'סתז' => ['0', '2', '4', '4'], 588*13abd6f3SGreg Roach 'סטז' => ['0', '2', '4', '4'], 589*13abd6f3SGreg Roach 'סטש' => ['0', '2', '4', '4'], 590*13abd6f3SGreg Roach 'סצד' => ['0', '2', '4', '4'], 591*13abd6f3SGreg Roach 'סט' => ['0', '2', '4', '4', '43', '43', '43'], 592*13abd6f3SGreg Roach 'סת' => ['0', '2', '4', '4', '43', '43', '43'], 593*13abd6f3SGreg Roach 'סג' => ['0', '44', '44', '44', '4', '4', '4'], 594*13abd6f3SGreg Roach 'סס' => ['0', '4', '4', '4', '44', '44', '44'], 595*13abd6f3SGreg Roach 'סצ' => ['0', '44', '44', '44'], 596*13abd6f3SGreg Roach 'סץ' => ['0', '44', '44', '44'], 597*13abd6f3SGreg Roach 'סז' => ['0', '44', '44', '44'], 598*13abd6f3SGreg Roach 'סש' => ['0', '44', '44', '44'], 599*13abd6f3SGreg Roach 'ס' => ['0', '4', '4', '4'], 600*13abd6f3SGreg Roach 'ע' => ['1', '0', '', ''], 601*13abd6f3SGreg Roach 'פב' => ['0', '7', '7', '7', '77', '77', '77'], 602*13abd6f3SGreg Roach 'פוו' => ['0', '7', '7', '7', '77', '77', '77'], 603*13abd6f3SGreg Roach 'פפ' => ['0', '7', '7', '7', '77', '77', '77'], 604*13abd6f3SGreg Roach 'פף' => ['0', '7', '7', '7', '77', '77', '77'], 605*13abd6f3SGreg Roach 'פ' => ['0', '7', '7', '7'], 606*13abd6f3SGreg Roach 'ף' => ['0', '', '7', '7'], 607*13abd6f3SGreg Roach 'צג' => ['0', '44', '44', '44', '45', '45', '45'], 608*13abd6f3SGreg Roach 'צז' => ['0', '44', '44', '44'], 609*13abd6f3SGreg Roach 'צס' => ['0', '44', '44', '44'], 610*13abd6f3SGreg Roach 'צצ' => ['0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54', '45', '45', '45'], 611*13abd6f3SGreg Roach 'צץ' => ['0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54'], 612*13abd6f3SGreg Roach 'צש' => ['0', '44', '44', '44', '4', '4', '4', '5', '5', '5'], 613*13abd6f3SGreg Roach 'צ' => ['0', '4', '4', '4', '5', '5', '5'], 614*13abd6f3SGreg Roach 'ץ' => ['0', '', '4', '4'], 615*13abd6f3SGreg Roach 'קה' => ['0', '55', '55', '5'], 616*13abd6f3SGreg Roach 'קס' => ['0', '5', '54', '54'], 617*13abd6f3SGreg Roach 'קש' => ['0', '5', '54', '54'], 618*13abd6f3SGreg Roach 'קק' => ['0', '5', '5', '5', '55', '55', '55'], 619*13abd6f3SGreg Roach 'קח' => ['0', '55', '55', '55'], 620*13abd6f3SGreg Roach 'קכ' => ['0', '55', '55', '55'], 621*13abd6f3SGreg Roach 'קך' => ['0', '55', '55', '55'], 622*13abd6f3SGreg Roach 'קג' => ['0', '55', '55', '55', '54', '54', '54'], 623*13abd6f3SGreg Roach 'ק' => ['0', '5', '5', '5'], 624*13abd6f3SGreg Roach 'רר' => ['0', '99', '99', '99', '9', '9', '9'], 625*13abd6f3SGreg Roach 'ר' => ['0', '9', '9', '9'], 626*13abd6f3SGreg Roach 'שטז' => ['0', '2', '4', '4'], 627*13abd6f3SGreg Roach 'שתש' => ['0', '2', '4', '4'], 628*13abd6f3SGreg Roach 'שתז' => ['0', '2', '4', '4'], 629*13abd6f3SGreg Roach 'שטש' => ['0', '2', '4', '4'], 630*13abd6f3SGreg Roach 'שד' => ['0', '2', '43', '43'], 631*13abd6f3SGreg Roach 'שז' => ['0', '44', '44', '44'], 632*13abd6f3SGreg Roach 'שס' => ['0', '44', '44', '44'], 633*13abd6f3SGreg Roach 'שת' => ['0', '2', '43', '43'], 634*13abd6f3SGreg Roach 'שג' => ['0', '4', '4', '4', '44', '44', '44', '4', '43', '43'], 635*13abd6f3SGreg Roach 'שט' => ['0', '2', '43', '43', '44', '44', '44'], 636*13abd6f3SGreg Roach 'שצ' => ['0', '44', '44', '44', '45', '45', '45'], 637*13abd6f3SGreg Roach 'שץ' => ['0', '44', '', '44', '45', '', '45'], 638*13abd6f3SGreg Roach 'שש' => ['0', '4', '4', '4', '44', '44', '44'], 639*13abd6f3SGreg Roach 'ש' => ['0', '4', '4', '4'], 640*13abd6f3SGreg Roach 'תג' => ['0', '34', '34', '34'], 641*13abd6f3SGreg Roach 'תז' => ['0', '34', '34', '34'], 642*13abd6f3SGreg Roach 'תש' => ['0', '4', '4', '4'], 643*13abd6f3SGreg Roach 'תת' => ['0', '3', '3', '3', '4', '4', '4', '33', '33', '33', '44', '44', '44', '34', '34', '34', '43', '43', '43'], 644*13abd6f3SGreg Roach 'ת' => ['0', '3', '3', '3', '4', '4', '4'], 645a25f0a04SGreg Roach // Arabic alphabet 646*13abd6f3SGreg Roach 'ا' => ['1', '0', '', ''], 647*13abd6f3SGreg Roach 'ب' => ['0', '7', '7', '7'], 648*13abd6f3SGreg Roach 'ت' => ['0', '3', '3', '3'], 649*13abd6f3SGreg Roach 'ث' => ['0', '3', '3', '3'], 650*13abd6f3SGreg Roach 'ج' => ['0', '4', '4', '4'], 651*13abd6f3SGreg Roach 'ح' => ['0', '5', '5', '5'], 652*13abd6f3SGreg Roach 'خ' => ['0', '5', '5', '5'], 653*13abd6f3SGreg Roach 'د' => ['0', '3', '3', '3'], 654*13abd6f3SGreg Roach 'ذ' => ['0', '3', '3', '3'], 655*13abd6f3SGreg Roach 'ر' => ['0', '9', '9', '9'], 656*13abd6f3SGreg Roach 'ز' => ['0', '4', '4', '4'], 657*13abd6f3SGreg Roach 'س' => ['0', '4', '4', '4'], 658*13abd6f3SGreg Roach 'ش' => ['0', '4', '4', '4'], 659*13abd6f3SGreg Roach 'ص' => ['0', '4', '4', '4'], 660*13abd6f3SGreg Roach 'ض' => ['0', '3', '3', '3'], 661*13abd6f3SGreg Roach 'ط' => ['0', '3', '3', '3'], 662*13abd6f3SGreg Roach 'ظ' => ['0', '4', '4', '4'], 663*13abd6f3SGreg Roach 'ع' => ['1', '0', '', ''], 664*13abd6f3SGreg Roach 'غ' => ['0', '0', '', ''], 665*13abd6f3SGreg Roach 'ف' => ['0', '7', '7', '7'], 666*13abd6f3SGreg Roach 'ق' => ['0', '5', '5', '5'], 667*13abd6f3SGreg Roach 'ك' => ['0', '5', '5', '5'], 668*13abd6f3SGreg Roach 'ل' => ['0', '8', '8', '8'], 669*13abd6f3SGreg Roach 'لا' => ['0', '8', '8', '8'], 670*13abd6f3SGreg Roach 'م' => ['0', '6', '6', '6'], 671*13abd6f3SGreg Roach 'ن' => ['0', '6', '6', '6'], 672*13abd6f3SGreg Roach 'هن' => ['0', '66', '66', '66'], 673*13abd6f3SGreg Roach 'ه' => ['0', '5', '5', ''], 674*13abd6f3SGreg Roach 'و' => ['1', '', '', '', '7', '', ''], 675*13abd6f3SGreg Roach 'ي' => ['0', '1', '', ''], 676*13abd6f3SGreg Roach 'آ' => ['0', '1', '', ''], 677*13abd6f3SGreg Roach 'ة' => ['0', '', '', '3'], 678*13abd6f3SGreg Roach 'ی' => ['0', '1', '', ''], 679*13abd6f3SGreg Roach 'ى' => ['1', '1', '', ''], 680*13abd6f3SGreg Roach ]; 681a25f0a04SGreg Roach 682a25f0a04SGreg Roach /** 68376692c8bSGreg Roach * Calculate the Daitch-Mokotoff soundex for a word. 68476692c8bSGreg Roach * 685a25f0a04SGreg Roach * @param string $name 686a25f0a04SGreg Roach * 687a25f0a04SGreg Roach * @return string[] List of possible DM codes for the word. 688a25f0a04SGreg Roach */ 689a25f0a04SGreg Roach private static function daitchMokotoffWord($name) { 690a25f0a04SGreg Roach // Apply special transformation rules to the input string 691a25f0a04SGreg Roach $name = I18N::strtoupper($name); 692a25f0a04SGreg Roach foreach (self::$transformNameTable as $transformRule) { 693a25f0a04SGreg Roach $name = str_replace($transformRule[0], $transformRule[1], $name); 694a25f0a04SGreg Roach } 695a25f0a04SGreg Roach 696a25f0a04SGreg Roach // Initialize 697a25f0a04SGreg Roach $name_script = I18N::textScript($name); 698a25f0a04SGreg Roach $noVowels = ($name_script == 'Hebr' || $name_script == 'Arab'); 699a25f0a04SGreg Roach 700a25f0a04SGreg Roach $lastPos = strlen($name) - 1; 701a25f0a04SGreg Roach $currPos = 0; 702a25f0a04SGreg Roach $state = 1; // 1: start of input string, 2: before vowel, 3: other 703*13abd6f3SGreg Roach $result = []; // accumulate complete 6-digit D-M codes here 704*13abd6f3SGreg Roach $partialResult = []; // accumulate incomplete D-M codes here 705*13abd6f3SGreg Roach $partialResult[] = ['!']; // initialize 1st partial result ('!' stops "duplicate sound" check) 706a25f0a04SGreg Roach 707a25f0a04SGreg Roach // Loop through the input string. 708a25f0a04SGreg Roach // Stop when the string is exhausted or when no more partial results remain 709a25f0a04SGreg Roach while (count($partialResult) !== 0 && $currPos <= $lastPos) { 710a25f0a04SGreg Roach // Find the DM coding table entry for the chunk at the current position 711a25f0a04SGreg Roach $thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 712a25f0a04SGreg Roach while ($thisEntry != '') { 713a25f0a04SGreg Roach if (isset(self::$dmsounds[$thisEntry])) { 714a25f0a04SGreg Roach break; 715a25f0a04SGreg Roach } 716a25f0a04SGreg Roach $thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk 717a25f0a04SGreg Roach } 718a25f0a04SGreg Roach if ($thisEntry === '') { 719a25f0a04SGreg Roach $currPos++; // Not in table: advance pointer to next byte 720a25f0a04SGreg Roach continue; // and try again 721a25f0a04SGreg Roach } 722a25f0a04SGreg Roach 723a25f0a04SGreg Roach $soundTableEntry = self::$dmsounds[$thisEntry]; 724a25f0a04SGreg Roach $workingResult = $partialResult; 725*13abd6f3SGreg Roach $partialResult = []; 726a25f0a04SGreg Roach $currPos += strlen($thisEntry); 727a25f0a04SGreg Roach 728a25f0a04SGreg Roach // Not at beginning of input string 729a25f0a04SGreg Roach if ($state != 1) { 730a25f0a04SGreg Roach if ($currPos <= $lastPos) { 731a25f0a04SGreg Roach // Determine whether the next chunk is a vowel 732a25f0a04SGreg Roach $nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 733a25f0a04SGreg Roach while ($nextEntry != '') { 734a25f0a04SGreg Roach if (isset(self::$dmsounds[$nextEntry])) { 735a25f0a04SGreg Roach break; 736a25f0a04SGreg Roach } 737a25f0a04SGreg Roach $nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk 738a25f0a04SGreg Roach } 739a25f0a04SGreg Roach } else { 740a25f0a04SGreg Roach $nextEntry = ''; 741a25f0a04SGreg Roach } 742a25f0a04SGreg Roach if ($nextEntry != '' && self::$dmsounds[$nextEntry][0] != '0') { 743a25f0a04SGreg Roach $state = 2; 744a25f0a04SGreg Roach } else { 745a25f0a04SGreg Roach // Next chunk is a vowel 746a25f0a04SGreg Roach $state = 3; 747a25f0a04SGreg Roach } 748a25f0a04SGreg Roach } 749a25f0a04SGreg Roach 750a25f0a04SGreg Roach while ($state < count($soundTableEntry)) { 751a25f0a04SGreg Roach // empty means 'ignore this sound in this state' 752a25f0a04SGreg Roach if ($soundTableEntry[$state] == '') { 753a25f0a04SGreg Roach foreach ($workingResult as $workingEntry) { 754a25f0a04SGreg Roach $tempEntry = $workingEntry; 755a25f0a04SGreg Roach $tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles' 756a25f0a04SGreg Roach $partialResult[] = $tempEntry; 757a25f0a04SGreg Roach } 758a25f0a04SGreg Roach } else { 759a25f0a04SGreg Roach foreach ($workingResult as $workingEntry) { 760a25f0a04SGreg Roach if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) { 761a25f0a04SGreg Roach // Incoming sound isn't a duplicate of the previous sound 762a25f0a04SGreg Roach $workingEntry[] = $soundTableEntry[$state]; 763a25f0a04SGreg Roach } else { 764a25f0a04SGreg Roach // Incoming sound is a duplicate of the previous sound 765a25f0a04SGreg Roach // For Hebrew and Arabic, we need to create a pair of D-M sound codes, 766a25f0a04SGreg Roach // one of the pair with only a single occurrence of the duplicate sound, 767a25f0a04SGreg Roach // the other with both occurrences 768a25f0a04SGreg Roach if ($noVowels) { 769a25f0a04SGreg Roach $workingEntry[] = $soundTableEntry[$state]; 770a25f0a04SGreg Roach } 771a25f0a04SGreg Roach } 772a25f0a04SGreg Roach if (count($workingEntry) < 7) { 773a25f0a04SGreg Roach $partialResult[] = $workingEntry; 774a25f0a04SGreg Roach } else { 775a25f0a04SGreg Roach // This is the 6th code in the sequence 776a25f0a04SGreg Roach // We're looking for 7 entries because the first is '!' and doesn't count 777a25f0a04SGreg Roach $tempResult = str_replace('!', '', implode('', $workingEntry)); 778a25f0a04SGreg Roach // Only return codes from recognisable sounds 779a25f0a04SGreg Roach if ($tempResult) { 780a25f0a04SGreg Roach $result[] = substr($tempResult . '000000', 0, 6); 781a25f0a04SGreg Roach } 782a25f0a04SGreg Roach } 783a25f0a04SGreg Roach } 784a25f0a04SGreg Roach } 785a25f0a04SGreg Roach $state = $state + 3; // Advance to next triplet while keeping the same basic state 786a25f0a04SGreg Roach } 787a25f0a04SGreg Roach } 788a25f0a04SGreg Roach 789a25f0a04SGreg Roach // Zero-fill and copy all remaining partial results 790a25f0a04SGreg Roach foreach ($partialResult as $workingEntry) { 791a25f0a04SGreg Roach $tempResult = str_replace('!', '', implode('', $workingEntry)); 792a25f0a04SGreg Roach // Only return codes from recognisable sounds 793a25f0a04SGreg Roach if ($tempResult) { 794a25f0a04SGreg Roach $result[] = substr($tempResult . '000000', 0, 6); 795a25f0a04SGreg Roach } 796a25f0a04SGreg Roach } 797a25f0a04SGreg Roach 798a25f0a04SGreg Roach return $result; 799a25f0a04SGreg Roach } 800a25f0a04SGreg Roach} 801