1*a25f0a04SGreg Roach<?php 2*a25f0a04SGreg Roachnamespace Webtrees; 3*a25f0a04SGreg Roach 4*a25f0a04SGreg Roach/** 5*a25f0a04SGreg Roach * webtrees: online genealogy 6*a25f0a04SGreg Roach * Copyright (C) 2015 webtrees development team 7*a25f0a04SGreg Roach * This program is free software: you can redistribute it and/or modify 8*a25f0a04SGreg Roach * it under the terms of the GNU General Public License as published by 9*a25f0a04SGreg Roach * the Free Software Foundation, either version 3 of the License, or 10*a25f0a04SGreg Roach * (at your option) any later version. 11*a25f0a04SGreg Roach * This program is distributed in the hope that it will be useful, 12*a25f0a04SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 13*a25f0a04SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14*a25f0a04SGreg Roach * GNU General Public License for more details. 15*a25f0a04SGreg Roach * You should have received a copy of the GNU General Public License 16*a25f0a04SGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>. 17*a25f0a04SGreg Roach */ 18*a25f0a04SGreg Roach 19*a25f0a04SGreg Roach/** 20*a25f0a04SGreg Roach * Class Soundex Functions for phonetic matching of strings 21*a25f0a04SGreg Roach */ 22*a25f0a04SGreg Roachclass Soundex { 23*a25f0a04SGreg Roach /** 24*a25f0a04SGreg Roach * @return string[] 25*a25f0a04SGreg Roach */ 26*a25f0a04SGreg Roach public static function getAlgorithms() { 27*a25f0a04SGreg Roach return array( 28*a25f0a04SGreg Roach 'std' => /* I18N: http://en.wikipedia.org/wiki/Soundex */ I18N::translate('Russell'), 29*a25f0a04SGreg Roach 'dm' => /* I18N: http://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */ I18N::translate('Daitch-Mokotoff'), 30*a25f0a04SGreg Roach ); 31*a25f0a04SGreg Roach } 32*a25f0a04SGreg Roach 33*a25f0a04SGreg Roach /** 34*a25f0a04SGreg Roach * @param string $algorithm 35*a25f0a04SGreg Roach * @param string $text 36*a25f0a04SGreg Roach * 37*a25f0a04SGreg Roach * @return string 38*a25f0a04SGreg Roach */ 39*a25f0a04SGreg Roach public static function soundex($algorithm, $text) { 40*a25f0a04SGreg Roach switch ($algorithm) { 41*a25f0a04SGreg Roach case 'std': 42*a25f0a04SGreg Roach return self::russell($text); 43*a25f0a04SGreg Roach case 'dm': 44*a25f0a04SGreg Roach return self::daitchMokotoff($text); 45*a25f0a04SGreg Roach default: 46*a25f0a04SGreg Roach throw new \InvalidArgumentException('Bad argument to Soundex::soundex()'); 47*a25f0a04SGreg Roach } 48*a25f0a04SGreg Roach } 49*a25f0a04SGreg Roach 50*a25f0a04SGreg Roach /** 51*a25f0a04SGreg Roach * Is there a match between two soundex codes? 52*a25f0a04SGreg Roach * 53*a25f0a04SGreg Roach * @param string $soundex1 54*a25f0a04SGreg Roach * @param string $soundex2 55*a25f0a04SGreg Roach * 56*a25f0a04SGreg Roach * @return boolean 57*a25f0a04SGreg Roach */ 58*a25f0a04SGreg Roach public static function compare($soundex1, $soundex2) { 59*a25f0a04SGreg Roach if ($soundex1 && $soundex2) { 60*a25f0a04SGreg Roach foreach (explode(':', $soundex1) as $code) { 61*a25f0a04SGreg Roach if (strpos($soundex2, $code) !== false) { 62*a25f0a04SGreg Roach return true; 63*a25f0a04SGreg Roach } 64*a25f0a04SGreg Roach } 65*a25f0a04SGreg Roach } 66*a25f0a04SGreg Roach 67*a25f0a04SGreg Roach return false; 68*a25f0a04SGreg Roach } 69*a25f0a04SGreg Roach 70*a25f0a04SGreg Roach /** 71*a25f0a04SGreg Roach * Generate Russell soundex codes for a given text. 72*a25f0a04SGreg Roach * 73*a25f0a04SGreg Roach * @param $text 74*a25f0a04SGreg Roach * 75*a25f0a04SGreg Roach * @return null|string 76*a25f0a04SGreg Roach */ 77*a25f0a04SGreg Roach public static function russell($text) { 78*a25f0a04SGreg Roach $words = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY); 79*a25f0a04SGreg Roach $soundex_array = array(); 80*a25f0a04SGreg Roach foreach ($words as $word) { 81*a25f0a04SGreg Roach $soundex = soundex($word); 82*a25f0a04SGreg Roach // Only return codes from recognisable sounds 83*a25f0a04SGreg Roach if ($soundex !== '0000') { 84*a25f0a04SGreg Roach $soundex_array[] = $soundex; 85*a25f0a04SGreg Roach } 86*a25f0a04SGreg Roach } 87*a25f0a04SGreg Roach // Combine words, e.g. “New York” as “Newyork” 88*a25f0a04SGreg Roach if (count($words) > 1) { 89*a25f0a04SGreg Roach $soundex_array[] = soundex(strtr($text, ' ', '')); 90*a25f0a04SGreg Roach } 91*a25f0a04SGreg Roach // A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters) 92*a25f0a04SGreg Roach $soundex_array = array_slice(array_unique($soundex_array), 0, 51); 93*a25f0a04SGreg Roach 94*a25f0a04SGreg Roach if ($soundex_array) { 95*a25f0a04SGreg Roach return implode(':', $soundex_array); 96*a25f0a04SGreg Roach } else { 97*a25f0a04SGreg Roach return null; 98*a25f0a04SGreg Roach } 99*a25f0a04SGreg Roach } 100*a25f0a04SGreg Roach 101*a25f0a04SGreg Roach /** 102*a25f0a04SGreg Roach * Generate Daitch–Mokotoff soundex codes for a given text. 103*a25f0a04SGreg Roach * 104*a25f0a04SGreg Roach * @param $text 105*a25f0a04SGreg Roach * 106*a25f0a04SGreg Roach * @return null|string 107*a25f0a04SGreg Roach */ 108*a25f0a04SGreg Roach public static function daitchMokotoff($text) { 109*a25f0a04SGreg Roach $words = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY); 110*a25f0a04SGreg Roach $soundex_array = array(); 111*a25f0a04SGreg Roach foreach ($words as $word) { 112*a25f0a04SGreg Roach $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word)); 113*a25f0a04SGreg Roach } 114*a25f0a04SGreg Roach // Combine words, e.g. “New York” as “Newyork” 115*a25f0a04SGreg Roach if (count($words) > 1) { 116*a25f0a04SGreg Roach $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(strtr($text, ' ', ''))); 117*a25f0a04SGreg Roach } 118*a25f0a04SGreg Roach // A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters) 119*a25f0a04SGreg Roach $soundex_array = array_slice(array_unique($soundex_array), 0, 36); 120*a25f0a04SGreg Roach 121*a25f0a04SGreg Roach if ($soundex_array) { 122*a25f0a04SGreg Roach return implode(':', $soundex_array); 123*a25f0a04SGreg Roach } else { 124*a25f0a04SGreg Roach return null; 125*a25f0a04SGreg Roach } 126*a25f0a04SGreg Roach } 127*a25f0a04SGreg Roach 128*a25f0a04SGreg Roach // Determine the Daitch–Mokotoff Soundex code for a word 129*a25f0a04SGreg Roach // Original implementation by Gerry Kroll, and analysis by Meliza Amity 130*a25f0a04SGreg Roach 131*a25f0a04SGreg Roach // Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!) 132*a25f0a04SGreg Roach const MAXCHAR = 7; 133*a25f0a04SGreg Roach 134*a25f0a04SGreg Roach /** 135*a25f0a04SGreg Roach * Name transformation arrays. 136*a25f0a04SGreg Roach * Used to transform the Name string to simplify the "sounds like" table. 137*a25f0a04SGreg Roach * This is especially useful in Hebrew. 138*a25f0a04SGreg Roach * 139*a25f0a04SGreg Roach * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text) 140*a25f0a04SGreg Roach * function call to achieve the desired transformations. 141*a25f0a04SGreg Roach * 142*a25f0a04SGreg Roach * Note about the use of "\x01": 143*a25f0a04SGreg Roach * This code, which can’t legitimately occur in the kind of text we're dealing with, 144*a25f0a04SGreg Roach * is used as a place-holder so that conditional string replacements can be done. 145*a25f0a04SGreg Roach * 146*a25f0a04SGreg Roach * @var string[][] 147*a25f0a04SGreg Roach */ 148*a25f0a04SGreg Roach private static $transformNameTable = array( 149*a25f0a04SGreg Roach // Force Yiddish ligatures to be treated as separate letters 150*a25f0a04SGreg Roach array('װ', 'וו'), 151*a25f0a04SGreg Roach array('ײ', 'יי'), 152*a25f0a04SGreg Roach array('ױ', 'וי'), 153*a25f0a04SGreg Roach array('בו', 'בע'), 154*a25f0a04SGreg Roach array('פו', 'פע'), 155*a25f0a04SGreg Roach array('ומ', 'עמ'), 156*a25f0a04SGreg Roach array('ום', 'עם'), 157*a25f0a04SGreg Roach array('ונ', 'ענ'), 158*a25f0a04SGreg Roach array('ון', 'ען'), 159*a25f0a04SGreg Roach array('וו', 'ב'), 160*a25f0a04SGreg Roach array("\x01", ''), 161*a25f0a04SGreg Roach array('ייה$', "\x01ה"), 162*a25f0a04SGreg Roach array('ייע$', "\x01ע"), 163*a25f0a04SGreg Roach array('יי', 'ע'), 164*a25f0a04SGreg Roach array("\x01", 'יי'), 165*a25f0a04SGreg Roach ); 166*a25f0a04SGreg Roach 167*a25f0a04SGreg Roach /** 168*a25f0a04SGreg Roach * The DM sound coding table is organized this way: 169*a25f0a04SGreg Roach * key: a variable-length string that corresponds to the UTF-8 character sequence 170*a25f0a04SGreg Roach * represented by the table entry. Currently, that string can be up to 7 171*a25f0a04SGreg Roach * bytes long. This maximum length is defined by the value of global variable 172*a25f0a04SGreg Roach * $maxchar. 173*a25f0a04SGreg Roach * 174*a25f0a04SGreg Roach * value: an array as follows: 175*a25f0a04SGreg Roach * [0]: zero if not a vowel 176*a25f0a04SGreg Roach * [1]: sound value when this string is at the beginning of the word 177*a25f0a04SGreg Roach * [2]: sound value when this string is followed by a vowel 178*a25f0a04SGreg Roach * [3]: sound value for other cases 179*a25f0a04SGreg Roach * [1],[2],[3] can be repeated several times to create branches in the code 180*a25f0a04SGreg Roach * an empty sound value means "ignore in this state" 181*a25f0a04SGreg Roach * 182*a25f0a04SGreg Roach * @var string[][] 183*a25f0a04SGreg Roach */ 184*a25f0a04SGreg Roach private static $dmsounds = array( 185*a25f0a04SGreg Roach 'A' => array('1', '0', '', ''), 186*a25f0a04SGreg Roach 'À' => array('1', '0', '', ''), 187*a25f0a04SGreg Roach 'Á' => array('1', '0', '', ''), 188*a25f0a04SGreg Roach 'Â' => array('1', '0', '', ''), 189*a25f0a04SGreg Roach 'Ã' => array('1', '0', '', ''), 190*a25f0a04SGreg Roach 'Ä' => array('1', '0', '1', '', '0', '', ''), 191*a25f0a04SGreg Roach 'Å' => array('1', '0', '', ''), 192*a25f0a04SGreg Roach 'Ă' => array('1', '0', '', ''), 193*a25f0a04SGreg Roach 'Ą' => array('1', '', '', '', '', '', '6'), 194*a25f0a04SGreg Roach 'Ạ' => array('1', '0', '', ''), 195*a25f0a04SGreg Roach 'Ả' => array('1', '0', '', ''), 196*a25f0a04SGreg Roach 'Ấ' => array('1', '0', '', ''), 197*a25f0a04SGreg Roach 'Ầ' => array('1', '0', '', ''), 198*a25f0a04SGreg Roach 'Ẩ' => array('1', '0', '', ''), 199*a25f0a04SGreg Roach 'Ẫ' => array('1', '0', '', ''), 200*a25f0a04SGreg Roach 'Ậ' => array('1', '0', '', ''), 201*a25f0a04SGreg Roach 'Ắ' => array('1', '0', '', ''), 202*a25f0a04SGreg Roach 'Ằ' => array('1', '0', '', ''), 203*a25f0a04SGreg Roach 'Ẳ' => array('1', '0', '', ''), 204*a25f0a04SGreg Roach 'Ẵ' => array('1', '0', '', ''), 205*a25f0a04SGreg Roach 'Ặ' => array('1', '0', '', ''), 206*a25f0a04SGreg Roach 'AE' => array('1', '0', '1', ''), 207*a25f0a04SGreg Roach 'Æ' => array('1', '0', '1', ''), 208*a25f0a04SGreg Roach 'AI' => array('1', '0', '1', ''), 209*a25f0a04SGreg Roach 'AJ' => array('1', '0', '1', ''), 210*a25f0a04SGreg Roach 'AU' => array('1', '0', '7', ''), 211*a25f0a04SGreg Roach 'AV' => array('1', '0', '7', '', '7', '7', '7'), 212*a25f0a04SGreg Roach 'ÄU' => array('1', '0', '1', ''), 213*a25f0a04SGreg Roach 'AY' => array('1', '0', '1', ''), 214*a25f0a04SGreg Roach 'B' => array('0', '7', '7', '7'), 215*a25f0a04SGreg Roach 'C' => array('0', '5', '5', '5', '34', '4', '4'), 216*a25f0a04SGreg Roach 'Ć' => array('0', '4', '4', '4'), 217*a25f0a04SGreg Roach 'Č' => array('0', '4', '4', '4'), 218*a25f0a04SGreg Roach 'Ç' => array('0', '4', '4', '4'), 219*a25f0a04SGreg Roach 'CH' => array('0', '5', '5', '5', '34', '4', '4'), 220*a25f0a04SGreg Roach 'CHS' => array('0', '5', '54', '54'), 221*a25f0a04SGreg Roach 'CK' => array('0', '5', '5', '5', '45', '45', '45'), 222*a25f0a04SGreg Roach 'CCS' => array('0', '4', '4', '4'), 223*a25f0a04SGreg Roach 'CS' => array('0', '4', '4', '4'), 224*a25f0a04SGreg Roach 'CSZ' => array('0', '4', '4', '4'), 225*a25f0a04SGreg Roach 'CZ' => array('0', '4', '4', '4'), 226*a25f0a04SGreg Roach 'CZS' => array('0', '4', '4', '4'), 227*a25f0a04SGreg Roach 'D' => array('0', '3', '3', '3'), 228*a25f0a04SGreg Roach 'Ď' => array('0', '3', '3', '3'), 229*a25f0a04SGreg Roach 'Đ' => array('0', '3', '3', '3'), 230*a25f0a04SGreg Roach 'DRS' => array('0', '4', '4', '4'), 231*a25f0a04SGreg Roach 'DRZ' => array('0', '4', '4', '4'), 232*a25f0a04SGreg Roach 'DS' => array('0', '4', '4', '4'), 233*a25f0a04SGreg Roach 'DSH' => array('0', '4', '4', '4'), 234*a25f0a04SGreg Roach 'DSZ' => array('0', '4', '4', '4'), 235*a25f0a04SGreg Roach 'DT' => array('0', '3', '3', '3'), 236*a25f0a04SGreg Roach 'DDZ' => array('0', '4', '4', '4'), 237*a25f0a04SGreg Roach 'DDZS' => array('0', '4', '4', '4'), 238*a25f0a04SGreg Roach 'DZ' => array('0', '4', '4', '4'), 239*a25f0a04SGreg Roach 'DŹ' => array('0', '4', '4', '4'), 240*a25f0a04SGreg Roach 'DŻ' => array('0', '4', '4', '4'), 241*a25f0a04SGreg Roach 'DZH' => array('0', '4', '4', '4'), 242*a25f0a04SGreg Roach 'DZS' => array('0', '4', '4', '4'), 243*a25f0a04SGreg Roach 'E' => array('1', '0', '', ''), 244*a25f0a04SGreg Roach 'È' => array('1', '0', '', ''), 245*a25f0a04SGreg Roach 'É' => array('1', '0', '', ''), 246*a25f0a04SGreg Roach 'Ê' => array('1', '0', '', ''), 247*a25f0a04SGreg Roach 'Ë' => array('1', '0', '', ''), 248*a25f0a04SGreg Roach 'Ĕ' => array('1', '0', '', ''), 249*a25f0a04SGreg Roach 'Ė' => array('1', '0', '', ''), 250*a25f0a04SGreg Roach 'Ę' => array('1', '', '', '6', '', '', ''), 251*a25f0a04SGreg Roach 'Ẹ' => array('1', '0', '', ''), 252*a25f0a04SGreg Roach 'Ẻ' => array('1', '0', '', ''), 253*a25f0a04SGreg Roach 'Ẽ' => array('1', '0', '', ''), 254*a25f0a04SGreg Roach 'Ế' => array('1', '0', '', ''), 255*a25f0a04SGreg Roach 'Ề' => array('1', '0', '', ''), 256*a25f0a04SGreg Roach 'Ể' => array('1', '0', '', ''), 257*a25f0a04SGreg Roach 'Ễ' => array('1', '0', '', ''), 258*a25f0a04SGreg Roach 'Ệ' => array('1', '0', '', ''), 259*a25f0a04SGreg Roach 'EAU' => array('1', '0', '', ''), 260*a25f0a04SGreg Roach 'EI' => array('1', '0', '1', ''), 261*a25f0a04SGreg Roach 'EJ' => array('1', '0', '1', ''), 262*a25f0a04SGreg Roach 'EU' => array('1', '1', '1', ''), 263*a25f0a04SGreg Roach 'EY' => array('1', '0', '1', ''), 264*a25f0a04SGreg Roach 'F' => array('0', '7', '7', '7'), 265*a25f0a04SGreg Roach 'FB' => array('0', '7', '7', '7'), 266*a25f0a04SGreg Roach 'G' => array('0', '5', '5', '5', '34', '4', '4'), 267*a25f0a04SGreg Roach 'Ğ' => array('0', '', '', ''), 268*a25f0a04SGreg Roach 'GGY' => array('0', '5', '5', '5'), 269*a25f0a04SGreg Roach 'GY' => array('0', '5', '5', '5'), 270*a25f0a04SGreg Roach 'H' => array('0', '5', '5', '', '5', '5', '5'), 271*a25f0a04SGreg Roach 'I' => array('1', '0', '', ''), 272*a25f0a04SGreg Roach 'Ì' => array('1', '0', '', ''), 273*a25f0a04SGreg Roach 'Í' => array('1', '0', '', ''), 274*a25f0a04SGreg Roach 'Î' => array('1', '0', '', ''), 275*a25f0a04SGreg Roach 'Ï' => array('1', '0', '', ''), 276*a25f0a04SGreg Roach 'Ĩ' => array('1', '0', '', ''), 277*a25f0a04SGreg Roach 'Į' => array('1', '0', '', ''), 278*a25f0a04SGreg Roach 'İ' => array('1', '0', '', ''), 279*a25f0a04SGreg Roach 'Ỉ' => array('1', '0', '', ''), 280*a25f0a04SGreg Roach 'Ị' => array('1', '0', '', ''), 281*a25f0a04SGreg Roach 'IA' => array('1', '1', '', ''), 282*a25f0a04SGreg Roach 'IE' => array('1', '1', '', ''), 283*a25f0a04SGreg Roach 'IO' => array('1', '1', '', ''), 284*a25f0a04SGreg Roach 'IU' => array('1', '1', '', ''), 285*a25f0a04SGreg Roach 'J' => array('0', '1', '', '', '4', '4', '4', '5', '5', ''), 286*a25f0a04SGreg Roach 'K' => array('0', '5', '5', '5'), 287*a25f0a04SGreg Roach 'KH' => array('0', '5', '5', '5'), 288*a25f0a04SGreg Roach 'KS' => array('0', '5', '54', '54'), 289*a25f0a04SGreg Roach 'L' => array('0', '8', '8', '8'), 290*a25f0a04SGreg Roach 'Ľ' => array('0', '8', '8', '8'), 291*a25f0a04SGreg Roach 'Ĺ' => array('0', '8', '8', '8'), 292*a25f0a04SGreg Roach 'Ł' => array('0', '7', '7', '7', '8', '8', '8'), 293*a25f0a04SGreg Roach 'LL' => array('0', '8', '8', '8', '58', '8', '8', '1', '8', '8'), 294*a25f0a04SGreg Roach 'LLY' => array('0', '8', '8', '8', '1', '8', '8'), 295*a25f0a04SGreg Roach 'LY' => array('0', '8', '8', '8', '1', '8', '8'), 296*a25f0a04SGreg Roach 'M' => array('0', '6', '6', '6'), 297*a25f0a04SGreg Roach 'MĔ' => array('0', '66', '66', '66'), 298*a25f0a04SGreg Roach 'MN' => array('0', '66', '66', '66'), 299*a25f0a04SGreg Roach 'N' => array('0', '6', '6', '6'), 300*a25f0a04SGreg Roach 'Ń' => array('0', '6', '6', '6'), 301*a25f0a04SGreg Roach 'Ň' => array('0', '6', '6', '6'), 302*a25f0a04SGreg Roach 'Ñ' => array('0', '6', '6', '6'), 303*a25f0a04SGreg Roach 'NM' => array('0', '66', '66', '66'), 304*a25f0a04SGreg Roach 'O' => array('1', '0', '', ''), 305*a25f0a04SGreg Roach 'Ò' => array('1', '0', '', ''), 306*a25f0a04SGreg Roach 'Ó' => array('1', '0', '', ''), 307*a25f0a04SGreg Roach 'Ô' => array('1', '0', '', ''), 308*a25f0a04SGreg Roach 'Õ' => array('1', '0', '', ''), 309*a25f0a04SGreg Roach 'Ö' => array('1', '0', '', ''), 310*a25f0a04SGreg Roach 'Ø' => array('1', '0', '', ''), 311*a25f0a04SGreg Roach 'Ő' => array('1', '0', '', ''), 312*a25f0a04SGreg Roach 'Œ' => array('1', '0', '', ''), 313*a25f0a04SGreg Roach 'Ơ' => array('1', '0', '', ''), 314*a25f0a04SGreg Roach 'Ọ' => array('1', '0', '', ''), 315*a25f0a04SGreg Roach 'Ỏ' => array('1', '0', '', ''), 316*a25f0a04SGreg Roach 'Ố' => array('1', '0', '', ''), 317*a25f0a04SGreg Roach 'Ồ' => array('1', '0', '', ''), 318*a25f0a04SGreg Roach 'Ổ' => array('1', '0', '', ''), 319*a25f0a04SGreg Roach 'Ỗ' => array('1', '0', '', ''), 320*a25f0a04SGreg Roach 'Ộ' => array('1', '0', '', ''), 321*a25f0a04SGreg Roach 'Ớ' => array('1', '0', '', ''), 322*a25f0a04SGreg Roach 'Ờ' => array('1', '0', '', ''), 323*a25f0a04SGreg Roach 'Ở' => array('1', '0', '', ''), 324*a25f0a04SGreg Roach 'Ỡ' => array('1', '0', '', ''), 325*a25f0a04SGreg Roach 'Ợ' => array('1', '0', '', ''), 326*a25f0a04SGreg Roach 'OE' => array('1', '0', '', ''), 327*a25f0a04SGreg Roach 'OI' => array('1', '0', '1', ''), 328*a25f0a04SGreg Roach 'OJ' => array('1', '0', '1', ''), 329*a25f0a04SGreg Roach 'OU' => array('1', '0', '', ''), 330*a25f0a04SGreg Roach 'OY' => array('1', '0', '1', ''), 331*a25f0a04SGreg Roach 'P' => array('0', '7', '7', '7'), 332*a25f0a04SGreg Roach 'PF' => array('0', '7', '7', '7'), 333*a25f0a04SGreg Roach 'PH' => array('0', '7', '7', '7'), 334*a25f0a04SGreg Roach 'Q' => array('0', '5', '5', '5'), 335*a25f0a04SGreg Roach 'R' => array('0', '9', '9', '9'), 336*a25f0a04SGreg Roach 'Ř' => array('0', '4', '4', '4'), 337*a25f0a04SGreg Roach 'RS' => array('0', '4', '4', '4', '94', '94', '94'), 338*a25f0a04SGreg Roach 'RZ' => array('0', '4', '4', '4', '94', '94', '94'), 339*a25f0a04SGreg Roach 'S' => array('0', '4', '4', '4'), 340*a25f0a04SGreg Roach 'Ś' => array('0', '4', '4', '4'), 341*a25f0a04SGreg Roach 'Š' => array('0', '4', '4', '4'), 342*a25f0a04SGreg Roach 'Ş' => array('0', '4', '4', '4'), 343*a25f0a04SGreg Roach 'SC' => array('0', '2', '4', '4'), 344*a25f0a04SGreg Roach 'ŠČ' => array('0', '2', '4', '4'), 345*a25f0a04SGreg Roach 'SCH' => array('0', '4', '4', '4'), 346*a25f0a04SGreg Roach 'SCHD' => array('0', '2', '43', '43'), 347*a25f0a04SGreg Roach 'SCHT' => array('0', '2', '43', '43'), 348*a25f0a04SGreg Roach 'SCHTCH' => array('0', '2', '4', '4'), 349*a25f0a04SGreg Roach 'SCHTSCH' => array('0', '2', '4', '4'), 350*a25f0a04SGreg Roach 'SCHTSH' => array('0', '2', '4', '4'), 351*a25f0a04SGreg Roach 'SD' => array('0', '2', '43', '43'), 352*a25f0a04SGreg Roach 'SH' => array('0', '4', '4', '4'), 353*a25f0a04SGreg Roach 'SHCH' => array('0', '2', '4', '4'), 354*a25f0a04SGreg Roach 'SHD' => array('0', '2', '43', '43'), 355*a25f0a04SGreg Roach 'SHT' => array('0', '2', '43', '43'), 356*a25f0a04SGreg Roach 'SHTCH' => array('0', '2', '4', '4'), 357*a25f0a04SGreg Roach 'SHTSH' => array('0', '2', '4', '4'), 358*a25f0a04SGreg Roach 'ß' => array('0', '', '4', '4'), 359*a25f0a04SGreg Roach 'ST' => array('0', '2', '43', '43'), 360*a25f0a04SGreg Roach 'STCH' => array('0', '2', '4', '4'), 361*a25f0a04SGreg Roach 'STRS' => array('0', '2', '4', '4'), 362*a25f0a04SGreg Roach 'STRZ' => array('0', '2', '4', '4'), 363*a25f0a04SGreg Roach 'STSCH' => array('0', '2', '4', '4'), 364*a25f0a04SGreg Roach 'STSH' => array('0', '2', '4', '4'), 365*a25f0a04SGreg Roach 'SSZ' => array('0', '4', '4', '4'), 366*a25f0a04SGreg Roach 'SZ' => array('0', '4', '4', '4'), 367*a25f0a04SGreg Roach 'SZCS' => array('0', '2', '4', '4'), 368*a25f0a04SGreg Roach 'SZCZ' => array('0', '2', '4', '4'), 369*a25f0a04SGreg Roach 'SZD' => array('0', '2', '43', '43'), 370*a25f0a04SGreg Roach 'SZT' => array('0', '2', '43', '43'), 371*a25f0a04SGreg Roach 'T' => array('0', '3', '3', '3'), 372*a25f0a04SGreg Roach 'Ť' => array('0', '3', '3', '3'), 373*a25f0a04SGreg Roach 'Ţ' => array('0', '3', '3', '3', '4', '4', '4'), 374*a25f0a04SGreg Roach 'TC' => array('0', '4', '4', '4'), 375*a25f0a04SGreg Roach 'TCH' => array('0', '4', '4', '4'), 376*a25f0a04SGreg Roach 'TH' => array('0', '3', '3', '3'), 377*a25f0a04SGreg Roach 'TRS' => array('0', '4', '4', '4'), 378*a25f0a04SGreg Roach 'TRZ' => array('0', '4', '4', '4'), 379*a25f0a04SGreg Roach 'TS' => array('0', '4', '4', '4'), 380*a25f0a04SGreg Roach 'TSCH' => array('0', '4', '4', '4'), 381*a25f0a04SGreg Roach 'TSH' => array('0', '4', '4', '4'), 382*a25f0a04SGreg Roach 'TSZ' => array('0', '4', '4', '4'), 383*a25f0a04SGreg Roach 'TTCH' => array('0', '4', '4', '4'), 384*a25f0a04SGreg Roach 'TTS' => array('0', '4', '4', '4'), 385*a25f0a04SGreg Roach 'TTSCH' => array('0', '4', '4', '4'), 386*a25f0a04SGreg Roach 'TTSZ' => array('0', '4', '4', '4'), 387*a25f0a04SGreg Roach 'TTZ' => array('0', '4', '4', '4'), 388*a25f0a04SGreg Roach 'TZ' => array('0', '4', '4', '4'), 389*a25f0a04SGreg Roach 'TZS' => array('0', '4', '4', '4'), 390*a25f0a04SGreg Roach 'U' => array('1', '0', '', ''), 391*a25f0a04SGreg Roach 'Ù' => array('1', '0', '', ''), 392*a25f0a04SGreg Roach 'Ú' => array('1', '0', '', ''), 393*a25f0a04SGreg Roach 'Û' => array('1', '0', '', ''), 394*a25f0a04SGreg Roach 'Ü' => array('1', '0', '', ''), 395*a25f0a04SGreg Roach 'Ũ' => array('1', '0', '', ''), 396*a25f0a04SGreg Roach 'Ū' => array('1', '0', '', ''), 397*a25f0a04SGreg Roach 'Ů' => array('1', '0', '', ''), 398*a25f0a04SGreg Roach 'Ű' => array('1', '0', '', ''), 399*a25f0a04SGreg Roach 'Ų' => array('1', '0', '', ''), 400*a25f0a04SGreg Roach 'Ư' => array('1', '0', '', ''), 401*a25f0a04SGreg Roach 'Ụ' => array('1', '0', '', ''), 402*a25f0a04SGreg Roach 'Ủ' => array('1', '0', '', ''), 403*a25f0a04SGreg Roach 'Ứ' => array('1', '0', '', ''), 404*a25f0a04SGreg Roach 'Ừ' => array('1', '0', '', ''), 405*a25f0a04SGreg Roach 'Ử' => array('1', '0', '', ''), 406*a25f0a04SGreg Roach 'Ữ' => array('1', '0', '', ''), 407*a25f0a04SGreg Roach 'Ự' => array('1', '0', '', ''), 408*a25f0a04SGreg Roach 'UE' => array('1', '0', '', ''), 409*a25f0a04SGreg Roach 'UI' => array('1', '0', '1', ''), 410*a25f0a04SGreg Roach 'UJ' => array('1', '0', '1', ''), 411*a25f0a04SGreg Roach 'UY' => array('1', '0', '1', ''), 412*a25f0a04SGreg Roach 'UW' => array('1', '0', '1', '', '0', '7', '7'), 413*a25f0a04SGreg Roach 'V' => array('0', '7', '7', '7'), 414*a25f0a04SGreg Roach 'W' => array('0', '7', '7', '7'), 415*a25f0a04SGreg Roach 'X' => array('0', '5', '54', '54'), 416*a25f0a04SGreg Roach 'Y' => array('1', '1', '', ''), 417*a25f0a04SGreg Roach 'Ý' => array('1', '1', '', ''), 418*a25f0a04SGreg Roach 'Ỳ' => array('1', '1', '', ''), 419*a25f0a04SGreg Roach 'Ỵ' => array('1', '1', '', ''), 420*a25f0a04SGreg Roach 'Ỷ' => array('1', '1', '', ''), 421*a25f0a04SGreg Roach 'Ỹ' => array('1', '1', '', ''), 422*a25f0a04SGreg Roach 'Z' => array('0', '4', '4', '4'), 423*a25f0a04SGreg Roach 'Ź' => array('0', '4', '4', '4'), 424*a25f0a04SGreg Roach 'Ż' => array('0', '4', '4', '4'), 425*a25f0a04SGreg Roach 'Ž' => array('0', '4', '4', '4'), 426*a25f0a04SGreg Roach 'ZD' => array('0', '2', '43', '43'), 427*a25f0a04SGreg Roach 'ZDZ' => array('0', '2', '4', '4'), 428*a25f0a04SGreg Roach 'ZDZH' => array('0', '2', '4', '4'), 429*a25f0a04SGreg Roach 'ZH' => array('0', '4', '4', '4'), 430*a25f0a04SGreg Roach 'ZHD' => array('0', '2', '43', '43'), 431*a25f0a04SGreg Roach 'ZHDZH' => array('0', '2', '4', '4'), 432*a25f0a04SGreg Roach 'ZS' => array('0', '4', '4', '4'), 433*a25f0a04SGreg Roach 'ZSCH' => array('0', '4', '4', '4'), 434*a25f0a04SGreg Roach 'ZSH' => array('0', '4', '4', '4'), 435*a25f0a04SGreg Roach 'ZZS' => array('0', '4', '4', '4'), 436*a25f0a04SGreg Roach // Cyrillic alphabet 437*a25f0a04SGreg Roach 'А' => array('1', '0', '', ''), 438*a25f0a04SGreg Roach 'Б' => array('0', '7', '7', '7'), 439*a25f0a04SGreg Roach 'В' => array('0', '7', '7', '7'), 440*a25f0a04SGreg Roach 'Г' => array('0', '5', '5', '5'), 441*a25f0a04SGreg Roach 'Д' => array('0', '3', '3', '3'), 442*a25f0a04SGreg Roach 'ДЗ' => array('0', '4', '4', '4'), 443*a25f0a04SGreg Roach 'Е' => array('1', '0', '', ''), 444*a25f0a04SGreg Roach 'Ё' => array('1', '0', '', ''), 445*a25f0a04SGreg Roach 'Ж' => array('0', '4', '4', '4'), 446*a25f0a04SGreg Roach 'З' => array('0', '4', '4', '4'), 447*a25f0a04SGreg Roach 'И' => array('1', '0', '', ''), 448*a25f0a04SGreg Roach 'Й' => array('1', '1', '', '', '4', '4', '4'), 449*a25f0a04SGreg Roach 'К' => array('0', '5', '5', '5'), 450*a25f0a04SGreg Roach 'Л' => array('0', '8', '8', '8'), 451*a25f0a04SGreg Roach 'М' => array('0', '6', '6', '6'), 452*a25f0a04SGreg Roach 'Н' => array('0', '6', '6', '6'), 453*a25f0a04SGreg Roach 'О' => array('1', '0', '', ''), 454*a25f0a04SGreg Roach 'П' => array('0', '7', '7', '7'), 455*a25f0a04SGreg Roach 'Р' => array('0', '9', '9', '9'), 456*a25f0a04SGreg Roach 'РЖ' => array('0', '4', '4', '4'), 457*a25f0a04SGreg Roach 'С' => array('0', '4', '4', '4'), 458*a25f0a04SGreg Roach 'Т' => array('0', '3', '3', '3'), 459*a25f0a04SGreg Roach 'У' => array('1', '0', '', ''), 460*a25f0a04SGreg Roach 'Ф' => array('0', '7', '7', '7'), 461*a25f0a04SGreg Roach 'Х' => array('0', '5', '5', '5'), 462*a25f0a04SGreg Roach 'Ц' => array('0', '4', '4', '4'), 463*a25f0a04SGreg Roach 'Ч' => array('0', '4', '4', '4'), 464*a25f0a04SGreg Roach 'Ш' => array('0', '4', '4', '4'), 465*a25f0a04SGreg Roach 'Щ' => array('0', '2', '4', '4'), 466*a25f0a04SGreg Roach 'Ъ' => array('0', '', '', ''), 467*a25f0a04SGreg Roach 'Ы' => array('0', '1', '', ''), 468*a25f0a04SGreg Roach 'Ь' => array('0', '', '', ''), 469*a25f0a04SGreg Roach 'Э' => array('1', '0', '', ''), 470*a25f0a04SGreg Roach 'Ю' => array('0', '1', '', ''), 471*a25f0a04SGreg Roach 'Я' => array('0', '1', '', ''), 472*a25f0a04SGreg Roach // Greek alphabet 473*a25f0a04SGreg Roach 'Α' => array('1', '0', '', ''), 474*a25f0a04SGreg Roach 'Ά' => array('1', '0', '', ''), 475*a25f0a04SGreg Roach 'ΑΙ' => array('1', '0', '1', ''), 476*a25f0a04SGreg Roach 'ΑΥ' => array('1', '0', '1', ''), 477*a25f0a04SGreg Roach 'Β' => array('0', '7', '7', '7'), 478*a25f0a04SGreg Roach 'Γ' => array('0', '5', '5', '5'), 479*a25f0a04SGreg Roach 'Δ' => array('0', '3', '3', '3'), 480*a25f0a04SGreg Roach 'Ε' => array('1', '0', '', ''), 481*a25f0a04SGreg Roach 'Έ' => array('1', '0', '', ''), 482*a25f0a04SGreg Roach 'ΕΙ' => array('1', '0', '1', ''), 483*a25f0a04SGreg Roach 'ΕΥ' => array('1', '1', '1', ''), 484*a25f0a04SGreg Roach 'Ζ' => array('0', '4', '4', '4'), 485*a25f0a04SGreg Roach 'Η' => array('1', '0', '', ''), 486*a25f0a04SGreg Roach 'Ή' => array('1', '0', '', ''), 487*a25f0a04SGreg Roach 'Θ' => array('0', '3', '3', '3'), 488*a25f0a04SGreg Roach 'Ι' => array('1', '0', '', ''), 489*a25f0a04SGreg Roach 'Ί' => array('1', '0', '', ''), 490*a25f0a04SGreg Roach 'Ϊ' => array('1', '0', '', ''), 491*a25f0a04SGreg Roach 'ΐ' => array('1', '0', '', ''), 492*a25f0a04SGreg Roach 'Κ' => array('0', '5', '5', '5'), 493*a25f0a04SGreg Roach 'Λ' => array('0', '8', '8', '8'), 494*a25f0a04SGreg Roach 'Μ' => array('0', '6', '6', '6'), 495*a25f0a04SGreg Roach 'ΜΠ' => array('0', '7', '7', '7'), 496*a25f0a04SGreg Roach 'Ν' => array('0', '6', '6', '6'), 497*a25f0a04SGreg Roach 'ΝΤ' => array('0', '3', '3', '3'), 498*a25f0a04SGreg Roach 'Ξ' => array('0', '5', '54', '54'), 499*a25f0a04SGreg Roach 'Ο' => array('1', '0', '', ''), 500*a25f0a04SGreg Roach 'Ό' => array('1', '0', '', ''), 501*a25f0a04SGreg Roach 'ΟΙ' => array('1', '0', '1', ''), 502*a25f0a04SGreg Roach 'ΟΥ' => array('1', '0', '1', ''), 503*a25f0a04SGreg Roach 'Π' => array('0', '7', '7', '7'), 504*a25f0a04SGreg Roach 'Ρ' => array('0', '9', '9', '9'), 505*a25f0a04SGreg Roach 'Σ' => array('0', '4', '4', '4'), 506*a25f0a04SGreg Roach 'ς' => array('0', '', '', '4'), 507*a25f0a04SGreg Roach 'Τ' => array('0', '3', '3', '3'), 508*a25f0a04SGreg Roach 'ΤΖ' => array('0', '4', '4', '4'), 509*a25f0a04SGreg Roach 'ΤΣ' => array('0', '4', '4', '4'), 510*a25f0a04SGreg Roach 'Υ' => array('1', '1', '', ''), 511*a25f0a04SGreg Roach 'Ύ' => array('1', '1', '', ''), 512*a25f0a04SGreg Roach 'Ϋ' => array('1', '1', '', ''), 513*a25f0a04SGreg Roach 'ΰ' => array('1', '1', '', ''), 514*a25f0a04SGreg Roach 'ΥΚ' => array('1', '5', '5', '5'), 515*a25f0a04SGreg Roach 'ΥΥ' => array('1', '65', '65', '65'), 516*a25f0a04SGreg Roach 'Φ' => array('0', '7', '7', '7'), 517*a25f0a04SGreg Roach 'Χ' => array('0', '5', '5', '5'), 518*a25f0a04SGreg Roach 'Ψ' => array('0', '7', '7', '7'), 519*a25f0a04SGreg Roach 'Ω' => array('1', '0', '', ''), 520*a25f0a04SGreg Roach 'Ώ' => array('1', '0', '', ''), 521*a25f0a04SGreg Roach // Hebrew alphabet 522*a25f0a04SGreg Roach 'א' => array('1', '0', '', ''), 523*a25f0a04SGreg Roach 'או' => array('1', '0', '7', ''), 524*a25f0a04SGreg Roach 'אג' => array('1', '4', '4', '4', '5', '5', '5', '34', '34', '34'), 525*a25f0a04SGreg Roach 'בב' => array('0', '7', '7', '7', '77', '77', '77'), 526*a25f0a04SGreg Roach 'ב' => array('0', '7', '7', '7'), 527*a25f0a04SGreg Roach 'גג' => array('0', '4', '4', '4', '5', '5', '5', '45', '45', '45', '55', '55', '55', '54', '54', '54'), 528*a25f0a04SGreg Roach 'גד' => array('0', '43', '43', '43', '53', '53', '53'), 529*a25f0a04SGreg Roach 'גה' => array('0', '45', '45', '45', '55', '55', '55'), 530*a25f0a04SGreg Roach 'גז' => array('0', '44', '44', '44', '45', '45', '45'), 531*a25f0a04SGreg Roach 'גח' => array('0', '45', '45', '45', '55', '55', '55'), 532*a25f0a04SGreg Roach 'גכ' => array('0', '45', '45', '45', '55', '55', '55'), 533*a25f0a04SGreg Roach 'גך' => array('0', '45', '45', '45', '55', '55', '55'), 534*a25f0a04SGreg Roach 'גצ' => array('0', '44', '44', '44', '45', '45', '45'), 535*a25f0a04SGreg Roach 'גץ' => array('0', '44', '44', '44', '45', '45', '45'), 536*a25f0a04SGreg Roach 'גק' => array('0', '45', '45', '45', '54', '54', '54'), 537*a25f0a04SGreg Roach 'גש' => array('0', '44', '44', '44', '54', '54', '54'), 538*a25f0a04SGreg Roach 'גת' => array('0', '43', '43', '43', '53', '53', '53'), 539*a25f0a04SGreg Roach 'ג' => array('0', '4', '4', '4', '5', '5', '5'), 540*a25f0a04SGreg Roach 'דז' => array('0', '4', '4', '4'), 541*a25f0a04SGreg Roach 'דד' => array('0', '3', '3', '3', '33', '33', '33'), 542*a25f0a04SGreg Roach 'דט' => array('0', '33', '33', '33'), 543*a25f0a04SGreg Roach 'דש' => array('0', '4', '4', '4'), 544*a25f0a04SGreg Roach 'דצ' => array('0', '4', '4', '4'), 545*a25f0a04SGreg Roach 'דץ' => array('0', '4', '4', '4'), 546*a25f0a04SGreg Roach 'ד' => array('0', '3', '3', '3'), 547*a25f0a04SGreg Roach 'הג' => array('0', '54', '54', '54', '55', '55', '55'), 548*a25f0a04SGreg Roach 'הכ' => array('0', '55', '55', '55'), 549*a25f0a04SGreg Roach 'הח' => array('0', '55', '55', '55'), 550*a25f0a04SGreg Roach 'הק' => array('0', '55', '55', '55', '5', '5', '5'), 551*a25f0a04SGreg Roach 'הה' => array('0', '5', '5', '', '55', '55', ''), 552*a25f0a04SGreg Roach 'ה' => array('0', '5', '5', ''), 553*a25f0a04SGreg Roach 'וי' => array('1', '', '', '', '7', '7', '7'), 554*a25f0a04SGreg Roach 'ו' => array('1', '7', '7', '7', '7', '', ''), 555*a25f0a04SGreg Roach 'וו' => array('1', '7', '7', '7', '7', '', ''), 556*a25f0a04SGreg Roach 'וופ' => array('1', '7', '7', '7', '77', '77', '77'), 557*a25f0a04SGreg Roach 'זש' => array('0', '4', '4', '4', '44', '44', '44'), 558*a25f0a04SGreg Roach 'זדז' => array('0', '2', '4', '4'), 559*a25f0a04SGreg Roach 'ז' => array('0', '4', '4', '4'), 560*a25f0a04SGreg Roach 'זג' => array('0', '44', '44', '44', '45', '45', '45'), 561*a25f0a04SGreg Roach 'זז' => array('0', '4', '4', '4', '44', '44', '44'), 562*a25f0a04SGreg Roach 'זס' => array('0', '44', '44', '44'), 563*a25f0a04SGreg Roach 'זצ' => array('0', '44', '44', '44'), 564*a25f0a04SGreg Roach 'זץ' => array('0', '44', '44', '44'), 565*a25f0a04SGreg Roach 'חג' => array('0', '54', '54', '54', '53', '53', '53'), 566*a25f0a04SGreg Roach 'חח' => array('0', '5', '5', '5', '55', '55', '55'), 567*a25f0a04SGreg Roach 'חק' => array('0', '55', '55', '55', '5', '5', '5'), 568*a25f0a04SGreg Roach 'חכ' => array('0', '45', '45', '45', '55', '55', '55'), 569*a25f0a04SGreg Roach 'חס' => array('0', '5', '54', '54'), 570*a25f0a04SGreg Roach 'חש' => array('0', '5', '54', '54'), 571*a25f0a04SGreg Roach 'ח' => array('0', '5', '5', '5'), 572*a25f0a04SGreg Roach 'טש' => array('0', '4', '4', '4'), 573*a25f0a04SGreg Roach 'טד' => array('0', '33', '33', '33'), 574*a25f0a04SGreg Roach 'טי' => array('0', '3', '3', '3', '4', '4', '4', '3', '3', '34'), 575*a25f0a04SGreg Roach 'טת' => array('0', '33', '33', '33'), 576*a25f0a04SGreg Roach 'טט' => array('0', '3', '3', '3', '33', '33', '33'), 577*a25f0a04SGreg Roach 'ט' => array('0', '3', '3', '3'), 578*a25f0a04SGreg Roach 'י' => array('1', '1', '', ''), 579*a25f0a04SGreg Roach 'יא' => array('1', '1', '', '', '1', '1', '1'), 580*a25f0a04SGreg Roach 'כג' => array('0', '55', '55', '55', '54', '54', '54'), 581*a25f0a04SGreg Roach 'כש' => array('0', '5', '54', '54'), 582*a25f0a04SGreg Roach 'כס' => array('0', '5', '54', '54'), 583*a25f0a04SGreg Roach 'ככ' => array('0', '5', '5', '5', '55', '55', '55'), 584*a25f0a04SGreg Roach 'כך' => array('0', '5', '5', '5', '55', '55', '55'), 585*a25f0a04SGreg Roach 'כ' => array('0', '5', '5', '5'), 586*a25f0a04SGreg Roach 'כח' => array('0', '55', '55', '55', '5', '5', '5'), 587*a25f0a04SGreg Roach 'ך' => array('0', '', '5', '5'), 588*a25f0a04SGreg Roach 'ל' => array('0', '8', '8', '8'), 589*a25f0a04SGreg Roach 'לל' => array('0', '88', '88', '88', '8', '8', '8'), 590*a25f0a04SGreg Roach 'מנ' => array('0', '66', '66', '66'), 591*a25f0a04SGreg Roach 'מן' => array('0', '66', '66', '66'), 592*a25f0a04SGreg Roach 'ממ' => array('0', '6', '6', '6', '66', '66', '66'), 593*a25f0a04SGreg Roach 'מם' => array('0', '6', '6', '6', '66', '66', '66'), 594*a25f0a04SGreg Roach 'מ' => array('0', '6', '6', '6'), 595*a25f0a04SGreg Roach 'ם' => array('0', '', '6', '6'), 596*a25f0a04SGreg Roach 'נמ' => array('0', '66', '66', '66'), 597*a25f0a04SGreg Roach 'נם' => array('0', '66', '66', '66'), 598*a25f0a04SGreg Roach 'ננ' => array('0', '6', '6', '6', '66', '66', '66'), 599*a25f0a04SGreg Roach 'נן' => array('0', '6', '6', '6', '66', '66', '66'), 600*a25f0a04SGreg Roach 'נ' => array('0', '6', '6', '6'), 601*a25f0a04SGreg Roach 'ן' => array('0', '', '6', '6'), 602*a25f0a04SGreg Roach 'סתש' => array('0', '2', '4', '4'), 603*a25f0a04SGreg Roach 'סתז' => array('0', '2', '4', '4'), 604*a25f0a04SGreg Roach 'סטז' => array('0', '2', '4', '4'), 605*a25f0a04SGreg Roach 'סטש' => array('0', '2', '4', '4'), 606*a25f0a04SGreg Roach 'סצד' => array('0', '2', '4', '4'), 607*a25f0a04SGreg Roach 'סט' => array('0', '2', '4', '4', '43', '43', '43'), 608*a25f0a04SGreg Roach 'סת' => array('0', '2', '4', '4', '43', '43', '43'), 609*a25f0a04SGreg Roach 'סג' => array('0', '44', '44', '44', '4', '4', '4'), 610*a25f0a04SGreg Roach 'סס' => array('0', '4', '4', '4', '44', '44', '44'), 611*a25f0a04SGreg Roach 'סצ' => array('0', '44', '44', '44'), 612*a25f0a04SGreg Roach 'סץ' => array('0', '44', '44', '44'), 613*a25f0a04SGreg Roach 'סז' => array('0', '44', '44', '44'), 614*a25f0a04SGreg Roach 'סש' => array('0', '44', '44', '44'), 615*a25f0a04SGreg Roach 'ס' => array('0', '4', '4', '4'), 616*a25f0a04SGreg Roach 'ע' => array('1', '0', '', ''), 617*a25f0a04SGreg Roach 'פב' => array('0', '7', '7', '7', '77', '77', '77'), 618*a25f0a04SGreg Roach 'פוו' => array('0', '7', '7', '7', '77', '77', '77'), 619*a25f0a04SGreg Roach 'פפ' => array('0', '7', '7', '7', '77', '77', '77'), 620*a25f0a04SGreg Roach 'פף' => array('0', '7', '7', '7', '77', '77', '77'), 621*a25f0a04SGreg Roach 'פ' => array('0', '7', '7', '7'), 622*a25f0a04SGreg Roach 'ף' => array('0', '', '7', '7'), 623*a25f0a04SGreg Roach 'צג' => array('0', '44', '44', '44', '45', '45', '45'), 624*a25f0a04SGreg Roach 'צז' => array('0', '44', '44', '44'), 625*a25f0a04SGreg Roach 'צס' => array('0', '44', '44', '44'), 626*a25f0a04SGreg Roach 'צצ' => array('0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54', '45', '45', '45'), 627*a25f0a04SGreg Roach 'צץ' => array('0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54'), 628*a25f0a04SGreg Roach 'צש' => array('0', '44', '44', '44', '4', '4', '4', '5', '5', '5'), 629*a25f0a04SGreg Roach 'צ' => array('0', '4', '4', '4', '5', '5', '5'), 630*a25f0a04SGreg Roach 'ץ' => array('0', '', '4', '4'), 631*a25f0a04SGreg Roach 'קה' => array('0', '55', '55', '5'), 632*a25f0a04SGreg Roach 'קס' => array('0', '5', '54', '54'), 633*a25f0a04SGreg Roach 'קש' => array('0', '5', '54', '54'), 634*a25f0a04SGreg Roach 'קק' => array('0', '5', '5', '5', '55', '55', '55'), 635*a25f0a04SGreg Roach 'קח' => array('0', '55', '55', '55'), 636*a25f0a04SGreg Roach 'קכ' => array('0', '55', '55', '55'), 637*a25f0a04SGreg Roach 'קך' => array('0', '55', '55', '55'), 638*a25f0a04SGreg Roach 'קג' => array('0', '55', '55', '55', '54', '54', '54'), 639*a25f0a04SGreg Roach 'ק' => array('0', '5', '5', '5'), 640*a25f0a04SGreg Roach 'רר' => array('0', '99', '99', '99', '9', '9', '9'), 641*a25f0a04SGreg Roach 'ר' => array('0', '9', '9', '9'), 642*a25f0a04SGreg Roach 'שטז' => array('0', '2', '4', '4'), 643*a25f0a04SGreg Roach 'שתש' => array('0', '2', '4', '4'), 644*a25f0a04SGreg Roach 'שתז' => array('0', '2', '4', '4'), 645*a25f0a04SGreg Roach 'שטש' => array('0', '2', '4', '4'), 646*a25f0a04SGreg Roach 'שד' => array('0', '2', '43', '43'), 647*a25f0a04SGreg Roach 'שז' => array('0', '44', '44', '44'), 648*a25f0a04SGreg Roach 'שס' => array('0', '44', '44', '44'), 649*a25f0a04SGreg Roach 'שת' => array('0', '2', '43', '43'), 650*a25f0a04SGreg Roach 'שג' => array('0', '4', '4', '4', '44', '44', '44', '4', '43', '43'), 651*a25f0a04SGreg Roach 'שט' => array('0', '2', '43', '43', '44', '44', '44'), 652*a25f0a04SGreg Roach 'שצ' => array('0', '44', '44', '44', '45', '45', '45'), 653*a25f0a04SGreg Roach 'שץ' => array('0', '44', '', '44', '45', '', '45'), 654*a25f0a04SGreg Roach 'שש' => array('0', '4', '4', '4', '44', '44', '44'), 655*a25f0a04SGreg Roach 'ש' => array('0', '4', '4', '4'), 656*a25f0a04SGreg Roach 'תג' => array('0', '34', '34', '34'), 657*a25f0a04SGreg Roach 'תז' => array('0', '34', '34', '34'), 658*a25f0a04SGreg Roach 'תש' => array('0', '4', '4', '4'), 659*a25f0a04SGreg Roach 'תת' => array('0', '3', '3', '3', '4', '4', '4', '33', '33', '33', '44', '44', '44', '34', '34', '34', '43', '43', '43'), 660*a25f0a04SGreg Roach 'ת' => array('0', '3', '3', '3', '4', '4', '4'), 661*a25f0a04SGreg Roach // Arabic alphabet 662*a25f0a04SGreg Roach 'ا' => array('1', '0', '', ''), 663*a25f0a04SGreg Roach 'ب' => array('0', '7', '7', '7'), 664*a25f0a04SGreg Roach 'ت' => array('0', '3', '3', '3'), 665*a25f0a04SGreg Roach 'ث' => array('0', '3', '3', '3'), 666*a25f0a04SGreg Roach 'ج' => array('0', '4', '4', '4'), 667*a25f0a04SGreg Roach 'ح' => array('0', '5', '5', '5'), 668*a25f0a04SGreg Roach 'خ' => array('0', '5', '5', '5'), 669*a25f0a04SGreg Roach 'د' => array('0', '3', '3', '3'), 670*a25f0a04SGreg Roach 'ذ' => array('0', '3', '3', '3'), 671*a25f0a04SGreg Roach 'ر' => array('0', '9', '9', '9'), 672*a25f0a04SGreg Roach 'ز' => array('0', '4', '4', '4'), 673*a25f0a04SGreg Roach 'س' => array('0', '4', '4', '4'), 674*a25f0a04SGreg Roach 'ش' => array('0', '4', '4', '4'), 675*a25f0a04SGreg Roach 'ص' => array('0', '4', '4', '4'), 676*a25f0a04SGreg Roach 'ض' => array('0', '3', '3', '3'), 677*a25f0a04SGreg Roach 'ط' => array('0', '3', '3', '3'), 678*a25f0a04SGreg Roach 'ظ' => array('0', '4', '4', '4'), 679*a25f0a04SGreg Roach 'ع' => array('1', '0', '', ''), 680*a25f0a04SGreg Roach 'غ' => array('0', '0', '', ''), 681*a25f0a04SGreg Roach 'ف' => array('0', '7', '7', '7'), 682*a25f0a04SGreg Roach 'ق' => array('0', '5', '5', '5'), 683*a25f0a04SGreg Roach 'ك' => array('0', '5', '5', '5'), 684*a25f0a04SGreg Roach 'ل' => array('0', '8', '8', '8'), 685*a25f0a04SGreg Roach 'لا' => array('0', '8', '8', '8'), 686*a25f0a04SGreg Roach 'م' => array('0', '6', '6', '6'), 687*a25f0a04SGreg Roach 'ن' => array('0', '6', '6', '6'), 688*a25f0a04SGreg Roach 'هن' => array('0', '66', '66', '66'), 689*a25f0a04SGreg Roach 'ه' => array('0', '5', '5', ''), 690*a25f0a04SGreg Roach 'و' => array('1', '', '', '', '7', '', ''), 691*a25f0a04SGreg Roach 'ي' => array('0', '1', '', ''), 692*a25f0a04SGreg Roach 'آ' => array('0', '1', '', ''), 693*a25f0a04SGreg Roach 'ة' => array('0', '', '', '3'), 694*a25f0a04SGreg Roach 'ی' => array('0', '1', '', ''), 695*a25f0a04SGreg Roach 'ى' => array('1', '1', '', ''), 696*a25f0a04SGreg Roach ); 697*a25f0a04SGreg Roach 698*a25f0a04SGreg Roach /** 699*a25f0a04SGreg Roach * @param string $name 700*a25f0a04SGreg Roach * 701*a25f0a04SGreg Roach * @return string[] List of possible DM codes for the word. 702*a25f0a04SGreg Roach */ 703*a25f0a04SGreg Roach private static function daitchMokotoffWord($name) { 704*a25f0a04SGreg Roach // Apply special transformation rules to the input string 705*a25f0a04SGreg Roach $name = I18N::strtoupper($name); 706*a25f0a04SGreg Roach foreach (self::$transformNameTable as $transformRule) { 707*a25f0a04SGreg Roach $name = str_replace($transformRule[0], $transformRule[1], $name); 708*a25f0a04SGreg Roach } 709*a25f0a04SGreg Roach 710*a25f0a04SGreg Roach // Initialize 711*a25f0a04SGreg Roach $name_script = I18N::textScript($name); 712*a25f0a04SGreg Roach $noVowels = ($name_script == 'Hebr' || $name_script == 'Arab'); 713*a25f0a04SGreg Roach 714*a25f0a04SGreg Roach $lastPos = strlen($name) - 1; 715*a25f0a04SGreg Roach $currPos = 0; 716*a25f0a04SGreg Roach $state = 1; // 1: start of input string, 2: before vowel, 3: other 717*a25f0a04SGreg Roach $result = array(); // accumulate complete 6-digit D-M codes here 718*a25f0a04SGreg Roach $partialResult = array(); // accumulate incomplete D-M codes here 719*a25f0a04SGreg Roach $partialResult[] = array('!'); // initialize 1st partial result ('!' stops "duplicate sound" check) 720*a25f0a04SGreg Roach 721*a25f0a04SGreg Roach // Loop through the input string. 722*a25f0a04SGreg Roach // Stop when the string is exhausted or when no more partial results remain 723*a25f0a04SGreg Roach while (count($partialResult) !== 0 && $currPos <= $lastPos) { 724*a25f0a04SGreg Roach // Find the DM coding table entry for the chunk at the current position 725*a25f0a04SGreg Roach $thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 726*a25f0a04SGreg Roach while ($thisEntry != '') { 727*a25f0a04SGreg Roach if (isset(self::$dmsounds[$thisEntry])) { 728*a25f0a04SGreg Roach break; 729*a25f0a04SGreg Roach } 730*a25f0a04SGreg Roach $thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk 731*a25f0a04SGreg Roach } 732*a25f0a04SGreg Roach if ($thisEntry === '') { 733*a25f0a04SGreg Roach $currPos++; // Not in table: advance pointer to next byte 734*a25f0a04SGreg Roach continue; // and try again 735*a25f0a04SGreg Roach } 736*a25f0a04SGreg Roach 737*a25f0a04SGreg Roach $soundTableEntry = self::$dmsounds[$thisEntry]; 738*a25f0a04SGreg Roach $workingResult = $partialResult; 739*a25f0a04SGreg Roach $partialResult = array(); 740*a25f0a04SGreg Roach $currPos += strlen($thisEntry); 741*a25f0a04SGreg Roach 742*a25f0a04SGreg Roach // Not at beginning of input string 743*a25f0a04SGreg Roach if ($state != 1) { 744*a25f0a04SGreg Roach if ($currPos <= $lastPos) { 745*a25f0a04SGreg Roach // Determine whether the next chunk is a vowel 746*a25f0a04SGreg Roach $nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 747*a25f0a04SGreg Roach while ($nextEntry != '') { 748*a25f0a04SGreg Roach if (isset(self::$dmsounds[$nextEntry])) { 749*a25f0a04SGreg Roach break; 750*a25f0a04SGreg Roach } 751*a25f0a04SGreg Roach $nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk 752*a25f0a04SGreg Roach } 753*a25f0a04SGreg Roach } else { 754*a25f0a04SGreg Roach $nextEntry = ''; 755*a25f0a04SGreg Roach } 756*a25f0a04SGreg Roach if ($nextEntry != '' && self::$dmsounds[$nextEntry][0] != '0') { 757*a25f0a04SGreg Roach $state = 2; 758*a25f0a04SGreg Roach } else { 759*a25f0a04SGreg Roach // Next chunk is a vowel 760*a25f0a04SGreg Roach $state = 3; 761*a25f0a04SGreg Roach } 762*a25f0a04SGreg Roach } 763*a25f0a04SGreg Roach 764*a25f0a04SGreg Roach while ($state < count($soundTableEntry)) { 765*a25f0a04SGreg Roach // empty means 'ignore this sound in this state' 766*a25f0a04SGreg Roach if ($soundTableEntry[$state] == '') { 767*a25f0a04SGreg Roach foreach ($workingResult as $workingEntry) { 768*a25f0a04SGreg Roach $tempEntry = $workingEntry; 769*a25f0a04SGreg Roach $tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles' 770*a25f0a04SGreg Roach $partialResult[] = $tempEntry; 771*a25f0a04SGreg Roach } 772*a25f0a04SGreg Roach } else { 773*a25f0a04SGreg Roach foreach ($workingResult as $workingEntry) { 774*a25f0a04SGreg Roach if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) { 775*a25f0a04SGreg Roach // Incoming sound isn't a duplicate of the previous sound 776*a25f0a04SGreg Roach $workingEntry[] = $soundTableEntry[$state]; 777*a25f0a04SGreg Roach } else { 778*a25f0a04SGreg Roach // Incoming sound is a duplicate of the previous sound 779*a25f0a04SGreg Roach // For Hebrew and Arabic, we need to create a pair of D-M sound codes, 780*a25f0a04SGreg Roach // one of the pair with only a single occurrence of the duplicate sound, 781*a25f0a04SGreg Roach // the other with both occurrences 782*a25f0a04SGreg Roach if ($noVowels) { 783*a25f0a04SGreg Roach $workingEntry[] = $soundTableEntry[$state]; 784*a25f0a04SGreg Roach } 785*a25f0a04SGreg Roach } 786*a25f0a04SGreg Roach if (count($workingEntry) < 7) { 787*a25f0a04SGreg Roach $partialResult[] = $workingEntry; 788*a25f0a04SGreg Roach } else { 789*a25f0a04SGreg Roach // This is the 6th code in the sequence 790*a25f0a04SGreg Roach // We're looking for 7 entries because the first is '!' and doesn't count 791*a25f0a04SGreg Roach $tempResult = str_replace('!', '', implode('', $workingEntry)); 792*a25f0a04SGreg Roach // Only return codes from recognisable sounds 793*a25f0a04SGreg Roach if ($tempResult) { 794*a25f0a04SGreg Roach $result[] = substr($tempResult . '000000', 0, 6); 795*a25f0a04SGreg Roach } 796*a25f0a04SGreg Roach } 797*a25f0a04SGreg Roach } 798*a25f0a04SGreg Roach } 799*a25f0a04SGreg Roach $state = $state + 3; // Advance to next triplet while keeping the same basic state 800*a25f0a04SGreg Roach } 801*a25f0a04SGreg Roach } 802*a25f0a04SGreg Roach 803*a25f0a04SGreg Roach // Zero-fill and copy all remaining partial results 804*a25f0a04SGreg Roach foreach ($partialResult as $workingEntry) { 805*a25f0a04SGreg Roach $tempResult = str_replace('!', '', implode('', $workingEntry)); 806*a25f0a04SGreg Roach // Only return codes from recognisable sounds 807*a25f0a04SGreg Roach if ($tempResult) { 808*a25f0a04SGreg Roach $result[] = substr($tempResult . '000000', 0, 6); 809*a25f0a04SGreg Roach } 810*a25f0a04SGreg Roach } 811*a25f0a04SGreg Roach 812*a25f0a04SGreg Roach return $result; 813*a25f0a04SGreg Roach } 814*a25f0a04SGreg Roach} 815