1a25f0a04SGreg Roach<?php 23976b470SGreg Roach 3a25f0a04SGreg Roach/** 4a25f0a04SGreg Roach * webtrees: online genealogy 589f7189bSGreg Roach * Copyright (C) 2021 webtrees development team 6a25f0a04SGreg Roach * This program is free software: you can redistribute it and/or modify 7a25f0a04SGreg Roach * it under the terms of the GNU General Public License as published by 8a25f0a04SGreg Roach * the Free Software Foundation, either version 3 of the License, or 9a25f0a04SGreg Roach * (at your option) any later version. 10a25f0a04SGreg Roach * This program is distributed in the hope that it will be useful, 11a25f0a04SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 12a25f0a04SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13a25f0a04SGreg Roach * GNU General Public License for more details. 14a25f0a04SGreg Roach * You should have received a copy of the GNU General Public License 1589f7189bSGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>. 16a25f0a04SGreg Roach */ 17fcfa147eSGreg Roach 18e7f56f2aSGreg Roachdeclare(strict_types=1); 19e7f56f2aSGreg Roach 2076692c8bSGreg Roachnamespace Fisharebest\Webtrees; 21a25f0a04SGreg Roach 22a25f0a04SGreg Roach/** 2376692c8bSGreg Roach * Phonetic matching of strings. 24a25f0a04SGreg Roach */ 25c1010edaSGreg Roachclass Soundex 26c1010edaSGreg Roach{ 27a25f0a04SGreg Roach // Determine the Daitch–Mokotoff Soundex code for a word 28a25f0a04SGreg Roach // Original implementation by Gerry Kroll, and analysis by Meliza Amity 29a25f0a04SGreg Roach 30a25f0a04SGreg Roach // Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!) 3116cfb0b9SGreg Roach private const MAXCHAR = 7; 32a25f0a04SGreg Roach 33a25f0a04SGreg Roach /** 34a25f0a04SGreg Roach * Name transformation arrays. 35a25f0a04SGreg Roach * Used to transform the Name string to simplify the "sounds like" table. 36a25f0a04SGreg Roach * This is especially useful in Hebrew. 37a25f0a04SGreg Roach * 38a25f0a04SGreg Roach * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text) 39a25f0a04SGreg Roach * function call to achieve the desired transformations. 40a25f0a04SGreg Roach * 41a25f0a04SGreg Roach * Note about the use of "\x01": 42a25f0a04SGreg Roach * This code, which can’t legitimately occur in the kind of text we're dealing with, 43a25f0a04SGreg Roach * is used as a place-holder so that conditional string replacements can be done. 44a25f0a04SGreg Roach */ 4516cfb0b9SGreg Roach private const TRANSFORM_NAMES = [ 46a25f0a04SGreg Roach // Force Yiddish ligatures to be treated as separate letters 47*4096896cSGreg Roach ['װ', 'וו'], 48*4096896cSGreg Roach ['ײ', 'יי'], 49*4096896cSGreg Roach ['ױ', 'וי'], 50*4096896cSGreg Roach ['בו', 'בע'], 51*4096896cSGreg Roach ['פו', 'פע'], 52*4096896cSGreg Roach ['ומ', 'עמ'], 53*4096896cSGreg Roach ['ום', 'עם'], 54*4096896cSGreg Roach ['ונ', 'ענ'], 55*4096896cSGreg Roach ['ון', 'ען'], 56*4096896cSGreg Roach ['וו', 'ב'], 57*4096896cSGreg Roach ["\x01", ''], 58*4096896cSGreg Roach ['ייה$', "\x01ה"], 59*4096896cSGreg Roach ['ייע$', "\x01ע"], 60*4096896cSGreg Roach ['יי', 'ע'], 61*4096896cSGreg Roach ["\x01", 'יי'], 6213abd6f3SGreg Roach ]; 63a25f0a04SGreg Roach 64a25f0a04SGreg Roach /** 65a25f0a04SGreg Roach * The DM sound coding table is organized this way: 66a25f0a04SGreg Roach * key: a variable-length string that corresponds to the UTF-8 character sequence 67a25f0a04SGreg Roach * represented by the table entry. Currently, that string can be up to 7 68a25f0a04SGreg Roach * bytes long. This maximum length is defined by the value of global variable 69a25f0a04SGreg Roach * $maxchar. 70a25f0a04SGreg Roach * 71a25f0a04SGreg Roach * value: an array as follows: 72a25f0a04SGreg Roach * [0]: zero if not a vowel 73a25f0a04SGreg Roach * [1]: sound value when this string is at the beginning of the word 74a25f0a04SGreg Roach * [2]: sound value when this string is followed by a vowel 75a25f0a04SGreg Roach * [3]: sound value for other cases 76a25f0a04SGreg Roach * [1],[2],[3] can be repeated several times to create branches in the code 77a25f0a04SGreg Roach * an empty sound value means "ignore in this state" 78a25f0a04SGreg Roach */ 7916cfb0b9SGreg Roach private const DM_SOUNDS = [ 80*4096896cSGreg Roach 'A' => ['1', '0', '', ''], 81*4096896cSGreg Roach 'À' => ['1', '0', '', ''], 82*4096896cSGreg Roach 'Á' => ['1', '0', '', ''], 83*4096896cSGreg Roach 'Â' => ['1', '0', '', ''], 84*4096896cSGreg Roach 'Ã' => ['1', '0', '', ''], 85*4096896cSGreg Roach 'Ä' => ['1', '0', '1', '', '0', '', ''], 86*4096896cSGreg Roach 'Å' => ['1', '0', '', ''], 87*4096896cSGreg Roach 'Ă' => ['1', '0', '', ''], 88*4096896cSGreg Roach 'Ą' => ['1', '', '', '', '', '', '6'], 89*4096896cSGreg Roach 'Ạ' => ['1', '0', '', ''], 90*4096896cSGreg Roach 'Ả' => ['1', '0', '', ''], 91*4096896cSGreg Roach 'Ấ' => ['1', '0', '', ''], 92*4096896cSGreg Roach 'Ầ' => ['1', '0', '', ''], 93*4096896cSGreg Roach 'Ẩ' => ['1', '0', '', ''], 94*4096896cSGreg Roach 'Ẫ' => ['1', '0', '', ''], 95*4096896cSGreg Roach 'Ậ' => ['1', '0', '', ''], 96*4096896cSGreg Roach 'Ắ' => ['1', '0', '', ''], 97*4096896cSGreg Roach 'Ằ' => ['1', '0', '', ''], 98*4096896cSGreg Roach 'Ẳ' => ['1', '0', '', ''], 99*4096896cSGreg Roach 'Ẵ' => ['1', '0', '', ''], 100*4096896cSGreg Roach 'Ặ' => ['1', '0', '', ''], 101*4096896cSGreg Roach 'AE' => ['1', '0', '1', ''], 102*4096896cSGreg Roach 'Æ' => ['1', '0', '1', ''], 103*4096896cSGreg Roach 'AI' => ['1', '0', '1', ''], 104*4096896cSGreg Roach 'AJ' => ['1', '0', '1', ''], 105*4096896cSGreg Roach 'AU' => ['1', '0', '7', ''], 106*4096896cSGreg Roach 'AV' => ['1', '0', '7', '', '7', '7', '7'], 107*4096896cSGreg Roach 'ÄU' => ['1', '0', '1', ''], 108*4096896cSGreg Roach 'AY' => ['1', '0', '1', ''], 109*4096896cSGreg Roach 'B' => ['0', '7', '7', '7'], 110*4096896cSGreg Roach 'C' => ['0', '5', '5', '5', '34', '4', '4'], 111*4096896cSGreg Roach 'Ć' => ['0', '4', '4', '4'], 112*4096896cSGreg Roach 'Č' => ['0', '4', '4', '4'], 113*4096896cSGreg Roach 'Ç' => ['0', '4', '4', '4'], 114*4096896cSGreg Roach 'CH' => ['0', '5', '5', '5', '34', '4', '4'], 115*4096896cSGreg Roach 'CHS' => ['0', '5', '54', '54'], 116*4096896cSGreg Roach 'CK' => ['0', '5', '5', '5', '45', '45', '45'], 117*4096896cSGreg Roach 'CCS' => ['0', '4', '4', '4'], 118*4096896cSGreg Roach 'CS' => ['0', '4', '4', '4'], 119*4096896cSGreg Roach 'CSZ' => ['0', '4', '4', '4'], 120*4096896cSGreg Roach 'CZ' => ['0', '4', '4', '4'], 121*4096896cSGreg Roach 'CZS' => ['0', '4', '4', '4'], 122*4096896cSGreg Roach 'D' => ['0', '3', '3', '3'], 123*4096896cSGreg Roach 'Ď' => ['0', '3', '3', '3'], 124*4096896cSGreg Roach 'Đ' => ['0', '3', '3', '3'], 125*4096896cSGreg Roach 'DRS' => ['0', '4', '4', '4'], 126*4096896cSGreg Roach 'DRZ' => ['0', '4', '4', '4'], 127*4096896cSGreg Roach 'DS' => ['0', '4', '4', '4'], 128*4096896cSGreg Roach 'DSH' => ['0', '4', '4', '4'], 129*4096896cSGreg Roach 'DSZ' => ['0', '4', '4', '4'], 130*4096896cSGreg Roach 'DT' => ['0', '3', '3', '3'], 131*4096896cSGreg Roach 'DDZ' => ['0', '4', '4', '4'], 132*4096896cSGreg Roach 'DDZS' => ['0', '4', '4', '4'], 133*4096896cSGreg Roach 'DZ' => ['0', '4', '4', '4'], 134*4096896cSGreg Roach 'DŹ' => ['0', '4', '4', '4'], 135*4096896cSGreg Roach 'DŻ' => ['0', '4', '4', '4'], 136*4096896cSGreg Roach 'DZH' => ['0', '4', '4', '4'], 137*4096896cSGreg Roach 'DZS' => ['0', '4', '4', '4'], 138*4096896cSGreg Roach 'E' => ['1', '0', '', ''], 139*4096896cSGreg Roach 'È' => ['1', '0', '', ''], 140*4096896cSGreg Roach 'É' => ['1', '0', '', ''], 141*4096896cSGreg Roach 'Ê' => ['1', '0', '', ''], 142*4096896cSGreg Roach 'Ë' => ['1', '0', '', ''], 143*4096896cSGreg Roach 'Ĕ' => ['1', '0', '', ''], 144*4096896cSGreg Roach 'Ė' => ['1', '0', '', ''], 145*4096896cSGreg Roach 'Ę' => ['1', '', '', '6', '', '', ''], 146*4096896cSGreg Roach 'Ẹ' => ['1', '0', '', ''], 147*4096896cSGreg Roach 'Ẻ' => ['1', '0', '', ''], 148*4096896cSGreg Roach 'Ẽ' => ['1', '0', '', ''], 149*4096896cSGreg Roach 'Ế' => ['1', '0', '', ''], 150*4096896cSGreg Roach 'Ề' => ['1', '0', '', ''], 151*4096896cSGreg Roach 'Ể' => ['1', '0', '', ''], 152*4096896cSGreg Roach 'Ễ' => ['1', '0', '', ''], 153*4096896cSGreg Roach 'Ệ' => ['1', '0', '', ''], 154*4096896cSGreg Roach 'EAU' => ['1', '0', '', ''], 155*4096896cSGreg Roach 'EI' => ['1', '0', '1', ''], 156*4096896cSGreg Roach 'EJ' => ['1', '0', '1', ''], 157*4096896cSGreg Roach 'EU' => ['1', '1', '1', ''], 158*4096896cSGreg Roach 'EY' => ['1', '0', '1', ''], 159*4096896cSGreg Roach 'F' => ['0', '7', '7', '7'], 160*4096896cSGreg Roach 'FB' => ['0', '7', '7', '7'], 161*4096896cSGreg Roach 'G' => ['0', '5', '5', '5', '34', '4', '4'], 162*4096896cSGreg Roach 'Ğ' => ['0', '', '', ''], 163*4096896cSGreg Roach 'GGY' => ['0', '5', '5', '5'], 164*4096896cSGreg Roach 'GY' => ['0', '5', '5', '5'], 165*4096896cSGreg Roach 'H' => ['0', '5', '5', '', '5', '5', '5'], 166*4096896cSGreg Roach 'I' => ['1', '0', '', ''], 167*4096896cSGreg Roach 'Ì' => ['1', '0', '', ''], 168*4096896cSGreg Roach 'Í' => ['1', '0', '', ''], 169*4096896cSGreg Roach 'Î' => ['1', '0', '', ''], 170*4096896cSGreg Roach 'Ï' => ['1', '0', '', ''], 171*4096896cSGreg Roach 'Ĩ' => ['1', '0', '', ''], 172*4096896cSGreg Roach 'Į' => ['1', '0', '', ''], 173*4096896cSGreg Roach 'İ' => ['1', '0', '', ''], 174*4096896cSGreg Roach 'Ỉ' => ['1', '0', '', ''], 175*4096896cSGreg Roach 'Ị' => ['1', '0', '', ''], 176*4096896cSGreg Roach 'IA' => ['1', '1', '', ''], 177*4096896cSGreg Roach 'IE' => ['1', '1', '', ''], 178*4096896cSGreg Roach 'IO' => ['1', '1', '', ''], 179*4096896cSGreg Roach 'IU' => ['1', '1', '', ''], 180*4096896cSGreg Roach 'J' => ['0', '1', '', '', '4', '4', '4', '5', '5', ''], 181*4096896cSGreg Roach 'K' => ['0', '5', '5', '5'], 182*4096896cSGreg Roach 'KH' => ['0', '5', '5', '5'], 183*4096896cSGreg Roach 'KS' => ['0', '5', '54', '54'], 184*4096896cSGreg Roach 'L' => ['0', '8', '8', '8'], 185*4096896cSGreg Roach 'Ľ' => ['0', '8', '8', '8'], 186*4096896cSGreg Roach 'Ĺ' => ['0', '8', '8', '8'], 187*4096896cSGreg Roach 'Ł' => ['0', '7', '7', '7', '8', '8', '8'], 188*4096896cSGreg Roach 'LL' => ['0', '8', '8', '8', '58', '8', '8', '1', '8', '8'], 189*4096896cSGreg Roach 'LLY' => ['0', '8', '8', '8', '1', '8', '8'], 190*4096896cSGreg Roach 'LY' => ['0', '8', '8', '8', '1', '8', '8'], 191*4096896cSGreg Roach 'M' => ['0', '6', '6', '6'], 192*4096896cSGreg Roach 'MĔ' => ['0', '66', '66', '66'], 193*4096896cSGreg Roach 'MN' => ['0', '66', '66', '66'], 194*4096896cSGreg Roach 'N' => ['0', '6', '6', '6'], 195*4096896cSGreg Roach 'Ń' => ['0', '6', '6', '6'], 196*4096896cSGreg Roach 'Ň' => ['0', '6', '6', '6'], 197*4096896cSGreg Roach 'Ñ' => ['0', '6', '6', '6'], 198*4096896cSGreg Roach 'NM' => ['0', '66', '66', '66'], 199*4096896cSGreg Roach 'O' => ['1', '0', '', ''], 200*4096896cSGreg Roach 'Ò' => ['1', '0', '', ''], 201*4096896cSGreg Roach 'Ó' => ['1', '0', '', ''], 202*4096896cSGreg Roach 'Ô' => ['1', '0', '', ''], 203*4096896cSGreg Roach 'Õ' => ['1', '0', '', ''], 204*4096896cSGreg Roach 'Ö' => ['1', '0', '', ''], 205*4096896cSGreg Roach 'Ø' => ['1', '0', '', ''], 206*4096896cSGreg Roach 'Ő' => ['1', '0', '', ''], 207*4096896cSGreg Roach 'Œ' => ['1', '0', '', ''], 208*4096896cSGreg Roach 'Ơ' => ['1', '0', '', ''], 209*4096896cSGreg Roach 'Ọ' => ['1', '0', '', ''], 210*4096896cSGreg Roach 'Ỏ' => ['1', '0', '', ''], 211*4096896cSGreg Roach 'Ố' => ['1', '0', '', ''], 212*4096896cSGreg Roach 'Ồ' => ['1', '0', '', ''], 213*4096896cSGreg Roach 'Ổ' => ['1', '0', '', ''], 214*4096896cSGreg Roach 'Ỗ' => ['1', '0', '', ''], 215*4096896cSGreg Roach 'Ộ' => ['1', '0', '', ''], 216*4096896cSGreg Roach 'Ớ' => ['1', '0', '', ''], 217*4096896cSGreg Roach 'Ờ' => ['1', '0', '', ''], 218*4096896cSGreg Roach 'Ở' => ['1', '0', '', ''], 219*4096896cSGreg Roach 'Ỡ' => ['1', '0', '', ''], 220*4096896cSGreg Roach 'Ợ' => ['1', '0', '', ''], 221*4096896cSGreg Roach 'OE' => ['1', '0', '', ''], 222*4096896cSGreg Roach 'OI' => ['1', '0', '1', ''], 223*4096896cSGreg Roach 'OJ' => ['1', '0', '1', ''], 224*4096896cSGreg Roach 'OU' => ['1', '0', '', ''], 225*4096896cSGreg Roach 'OY' => ['1', '0', '1', ''], 226*4096896cSGreg Roach 'P' => ['0', '7', '7', '7'], 227*4096896cSGreg Roach 'PF' => ['0', '7', '7', '7'], 228*4096896cSGreg Roach 'PH' => ['0', '7', '7', '7'], 229*4096896cSGreg Roach 'Q' => ['0', '5', '5', '5'], 230*4096896cSGreg Roach 'R' => ['0', '9', '9', '9'], 231*4096896cSGreg Roach 'Ř' => ['0', '4', '4', '4'], 232*4096896cSGreg Roach 'RS' => ['0', '4', '4', '4', '94', '94', '94'], 233*4096896cSGreg Roach 'RZ' => ['0', '4', '4', '4', '94', '94', '94'], 234*4096896cSGreg Roach 'S' => ['0', '4', '4', '4'], 235*4096896cSGreg Roach 'Ś' => ['0', '4', '4', '4'], 236*4096896cSGreg Roach 'Š' => ['0', '4', '4', '4'], 237*4096896cSGreg Roach 'Ş' => ['0', '4', '4', '4'], 238*4096896cSGreg Roach 'SC' => ['0', '2', '4', '4'], 239*4096896cSGreg Roach 'ŠČ' => ['0', '2', '4', '4'], 240*4096896cSGreg Roach 'SCH' => ['0', '4', '4', '4'], 241*4096896cSGreg Roach 'SCHD' => ['0', '2', '43', '43'], 242*4096896cSGreg Roach 'SCHT' => ['0', '2', '43', '43'], 243*4096896cSGreg Roach 'SCHTCH' => ['0', '2', '4', '4'], 244*4096896cSGreg Roach 'SCHTSCH' => ['0', '2', '4', '4'], 245*4096896cSGreg Roach 'SCHTSH' => ['0', '2', '4', '4'], 246*4096896cSGreg Roach 'SD' => ['0', '2', '43', '43'], 247*4096896cSGreg Roach 'SH' => ['0', '4', '4', '4'], 248*4096896cSGreg Roach 'SHCH' => ['0', '2', '4', '4'], 249*4096896cSGreg Roach 'SHD' => ['0', '2', '43', '43'], 250*4096896cSGreg Roach 'SHT' => ['0', '2', '43', '43'], 251*4096896cSGreg Roach 'SHTCH' => ['0', '2', '4', '4'], 252*4096896cSGreg Roach 'SHTSH' => ['0', '2', '4', '4'], 253*4096896cSGreg Roach 'ß' => ['0', '', '4', '4'], 254*4096896cSGreg Roach 'ST' => ['0', '2', '43', '43'], 255*4096896cSGreg Roach 'STCH' => ['0', '2', '4', '4'], 256*4096896cSGreg Roach 'STRS' => ['0', '2', '4', '4'], 257*4096896cSGreg Roach 'STRZ' => ['0', '2', '4', '4'], 258*4096896cSGreg Roach 'STSCH' => ['0', '2', '4', '4'], 259*4096896cSGreg Roach 'STSH' => ['0', '2', '4', '4'], 260*4096896cSGreg Roach 'SSZ' => ['0', '4', '4', '4'], 261*4096896cSGreg Roach 'SZ' => ['0', '4', '4', '4'], 262*4096896cSGreg Roach 'SZCS' => ['0', '2', '4', '4'], 263*4096896cSGreg Roach 'SZCZ' => ['0', '2', '4', '4'], 264*4096896cSGreg Roach 'SZD' => ['0', '2', '43', '43'], 265*4096896cSGreg Roach 'SZT' => ['0', '2', '43', '43'], 266*4096896cSGreg Roach 'T' => ['0', '3', '3', '3'], 267*4096896cSGreg Roach 'Ť' => ['0', '3', '3', '3'], 268*4096896cSGreg Roach 'Ţ' => ['0', '3', '3', '3', '4', '4', '4'], 269*4096896cSGreg Roach 'TC' => ['0', '4', '4', '4'], 270*4096896cSGreg Roach 'TCH' => ['0', '4', '4', '4'], 271*4096896cSGreg Roach 'TH' => ['0', '3', '3', '3'], 272*4096896cSGreg Roach 'TRS' => ['0', '4', '4', '4'], 273*4096896cSGreg Roach 'TRZ' => ['0', '4', '4', '4'], 274*4096896cSGreg Roach 'TS' => ['0', '4', '4', '4'], 275*4096896cSGreg Roach 'TSCH' => ['0', '4', '4', '4'], 276*4096896cSGreg Roach 'TSH' => ['0', '4', '4', '4'], 277*4096896cSGreg Roach 'TSZ' => ['0', '4', '4', '4'], 278*4096896cSGreg Roach 'TTCH' => ['0', '4', '4', '4'], 279*4096896cSGreg Roach 'TTS' => ['0', '4', '4', '4'], 280*4096896cSGreg Roach 'TTSCH' => ['0', '4', '4', '4'], 281*4096896cSGreg Roach 'TTSZ' => ['0', '4', '4', '4'], 282*4096896cSGreg Roach 'TTZ' => ['0', '4', '4', '4'], 283*4096896cSGreg Roach 'TZ' => ['0', '4', '4', '4'], 284*4096896cSGreg Roach 'TZS' => ['0', '4', '4', '4'], 285*4096896cSGreg Roach 'U' => ['1', '0', '', ''], 286*4096896cSGreg Roach 'Ù' => ['1', '0', '', ''], 287*4096896cSGreg Roach 'Ú' => ['1', '0', '', ''], 288*4096896cSGreg Roach 'Û' => ['1', '0', '', ''], 289*4096896cSGreg Roach 'Ü' => ['1', '0', '', ''], 290*4096896cSGreg Roach 'Ũ' => ['1', '0', '', ''], 291*4096896cSGreg Roach 'Ū' => ['1', '0', '', ''], 292*4096896cSGreg Roach 'Ů' => ['1', '0', '', ''], 293*4096896cSGreg Roach 'Ű' => ['1', '0', '', ''], 294*4096896cSGreg Roach 'Ų' => ['1', '0', '', ''], 295*4096896cSGreg Roach 'Ư' => ['1', '0', '', ''], 296*4096896cSGreg Roach 'Ụ' => ['1', '0', '', ''], 297*4096896cSGreg Roach 'Ủ' => ['1', '0', '', ''], 298*4096896cSGreg Roach 'Ứ' => ['1', '0', '', ''], 299*4096896cSGreg Roach 'Ừ' => ['1', '0', '', ''], 300*4096896cSGreg Roach 'Ử' => ['1', '0', '', ''], 301*4096896cSGreg Roach 'Ữ' => ['1', '0', '', ''], 302*4096896cSGreg Roach 'Ự' => ['1', '0', '', ''], 303*4096896cSGreg Roach 'UE' => ['1', '0', '', ''], 304*4096896cSGreg Roach 'UI' => ['1', '0', '1', ''], 305*4096896cSGreg Roach 'UJ' => ['1', '0', '1', ''], 306*4096896cSGreg Roach 'UY' => ['1', '0', '1', ''], 307*4096896cSGreg Roach 'UW' => ['1', '0', '1', '', '0', '7', '7'], 308*4096896cSGreg Roach 'V' => ['0', '7', '7', '7'], 309*4096896cSGreg Roach 'W' => ['0', '7', '7', '7'], 310*4096896cSGreg Roach 'X' => ['0', '5', '54', '54'], 311*4096896cSGreg Roach 'Y' => ['1', '1', '', ''], 312*4096896cSGreg Roach 'Ý' => ['1', '1', '', ''], 313*4096896cSGreg Roach 'Ỳ' => ['1', '1', '', ''], 314*4096896cSGreg Roach 'Ỵ' => ['1', '1', '', ''], 315*4096896cSGreg Roach 'Ỷ' => ['1', '1', '', ''], 316*4096896cSGreg Roach 'Ỹ' => ['1', '1', '', ''], 317*4096896cSGreg Roach 'Z' => ['0', '4', '4', '4'], 318*4096896cSGreg Roach 'Ź' => ['0', '4', '4', '4'], 319*4096896cSGreg Roach 'Ż' => ['0', '4', '4', '4'], 320*4096896cSGreg Roach 'Ž' => ['0', '4', '4', '4'], 321*4096896cSGreg Roach 'ZD' => ['0', '2', '43', '43'], 322*4096896cSGreg Roach 'ZDZ' => ['0', '2', '4', '4'], 323*4096896cSGreg Roach 'ZDZH' => ['0', '2', '4', '4'], 324*4096896cSGreg Roach 'ZH' => ['0', '4', '4', '4'], 325*4096896cSGreg Roach 'ZHD' => ['0', '2', '43', '43'], 326*4096896cSGreg Roach 'ZHDZH' => ['0', '2', '4', '4'], 327*4096896cSGreg Roach 'ZS' => ['0', '4', '4', '4'], 328*4096896cSGreg Roach 'ZSCH' => ['0', '4', '4', '4'], 329*4096896cSGreg Roach 'ZSH' => ['0', '4', '4', '4'], 330*4096896cSGreg Roach 'ZZS' => ['0', '4', '4', '4'], 331a25f0a04SGreg Roach // Cyrillic alphabet 332*4096896cSGreg Roach 'А' => ['1', '0', '', ''], 333*4096896cSGreg Roach 'Б' => ['0', '7', '7', '7'], 334*4096896cSGreg Roach 'В' => ['0', '7', '7', '7'], 335*4096896cSGreg Roach 'Г' => ['0', '5', '5', '5'], 336*4096896cSGreg Roach 'Д' => ['0', '3', '3', '3'], 337*4096896cSGreg Roach 'ДЗ' => ['0', '4', '4', '4'], 338*4096896cSGreg Roach 'Е' => ['1', '0', '', ''], 339*4096896cSGreg Roach 'Ё' => ['1', '0', '', ''], 340*4096896cSGreg Roach 'Ж' => ['0', '4', '4', '4'], 341*4096896cSGreg Roach 'З' => ['0', '4', '4', '4'], 342*4096896cSGreg Roach 'И' => ['1', '0', '', ''], 343*4096896cSGreg Roach 'Й' => ['1', '1', '', '', '4', '4', '4'], 344*4096896cSGreg Roach 'К' => ['0', '5', '5', '5'], 345*4096896cSGreg Roach 'Л' => ['0', '8', '8', '8'], 346*4096896cSGreg Roach 'М' => ['0', '6', '6', '6'], 347*4096896cSGreg Roach 'Н' => ['0', '6', '6', '6'], 348*4096896cSGreg Roach 'О' => ['1', '0', '', ''], 349*4096896cSGreg Roach 'П' => ['0', '7', '7', '7'], 350*4096896cSGreg Roach 'Р' => ['0', '9', '9', '9'], 351*4096896cSGreg Roach 'РЖ' => ['0', '4', '4', '4'], 352*4096896cSGreg Roach 'С' => ['0', '4', '4', '4'], 353*4096896cSGreg Roach 'Т' => ['0', '3', '3', '3'], 354*4096896cSGreg Roach 'У' => ['1', '0', '', ''], 355*4096896cSGreg Roach 'Ф' => ['0', '7', '7', '7'], 356*4096896cSGreg Roach 'Х' => ['0', '5', '5', '5'], 357*4096896cSGreg Roach 'Ц' => ['0', '4', '4', '4'], 358*4096896cSGreg Roach 'Ч' => ['0', '4', '4', '4'], 359*4096896cSGreg Roach 'Ш' => ['0', '4', '4', '4'], 360*4096896cSGreg Roach 'Щ' => ['0', '2', '4', '4'], 361*4096896cSGreg Roach 'Ъ' => ['0', '', '', ''], 362*4096896cSGreg Roach 'Ы' => ['0', '1', '', ''], 363*4096896cSGreg Roach 'Ь' => ['0', '', '', ''], 364*4096896cSGreg Roach 'Э' => ['1', '0', '', ''], 365*4096896cSGreg Roach 'Ю' => ['0', '1', '', ''], 366*4096896cSGreg Roach 'Я' => ['0', '1', '', ''], 367a25f0a04SGreg Roach // Greek alphabet 368*4096896cSGreg Roach 'Α' => ['1', '0', '', ''], 369*4096896cSGreg Roach 'Ά' => ['1', '0', '', ''], 370*4096896cSGreg Roach 'ΑΙ' => ['1', '0', '1', ''], 371*4096896cSGreg Roach 'ΑΥ' => ['1', '0', '1', ''], 372*4096896cSGreg Roach 'Β' => ['0', '7', '7', '7'], 373*4096896cSGreg Roach 'Γ' => ['0', '5', '5', '5'], 374*4096896cSGreg Roach 'Δ' => ['0', '3', '3', '3'], 375*4096896cSGreg Roach 'Ε' => ['1', '0', '', ''], 376*4096896cSGreg Roach 'Έ' => ['1', '0', '', ''], 377*4096896cSGreg Roach 'ΕΙ' => ['1', '0', '1', ''], 378*4096896cSGreg Roach 'ΕΥ' => ['1', '1', '1', ''], 379*4096896cSGreg Roach 'Ζ' => ['0', '4', '4', '4'], 380*4096896cSGreg Roach 'Η' => ['1', '0', '', ''], 381*4096896cSGreg Roach 'Ή' => ['1', '0', '', ''], 382*4096896cSGreg Roach 'Θ' => ['0', '3', '3', '3'], 383*4096896cSGreg Roach 'Ι' => ['1', '0', '', ''], 384*4096896cSGreg Roach 'Ί' => ['1', '0', '', ''], 385*4096896cSGreg Roach 'Ϊ' => ['1', '0', '', ''], 386*4096896cSGreg Roach 'ΐ' => ['1', '0', '', ''], 387*4096896cSGreg Roach 'Κ' => ['0', '5', '5', '5'], 388*4096896cSGreg Roach 'Λ' => ['0', '8', '8', '8'], 389*4096896cSGreg Roach 'Μ' => ['0', '6', '6', '6'], 390*4096896cSGreg Roach 'ΜΠ' => ['0', '7', '7', '7'], 391*4096896cSGreg Roach 'Ν' => ['0', '6', '6', '6'], 392*4096896cSGreg Roach 'ΝΤ' => ['0', '3', '3', '3'], 393*4096896cSGreg Roach 'Ξ' => ['0', '5', '54', '54'], 394*4096896cSGreg Roach 'Ο' => ['1', '0', '', ''], 395*4096896cSGreg Roach 'Ό' => ['1', '0', '', ''], 396*4096896cSGreg Roach 'ΟΙ' => ['1', '0', '1', ''], 397*4096896cSGreg Roach 'ΟΥ' => ['1', '0', '1', ''], 398*4096896cSGreg Roach 'Π' => ['0', '7', '7', '7'], 399*4096896cSGreg Roach 'Ρ' => ['0', '9', '9', '9'], 400*4096896cSGreg Roach 'Σ' => ['0', '4', '4', '4'], 401*4096896cSGreg Roach 'ς' => ['0', '', '', '4'], 402*4096896cSGreg Roach 'Τ' => ['0', '3', '3', '3'], 403*4096896cSGreg Roach 'ΤΖ' => ['0', '4', '4', '4'], 404*4096896cSGreg Roach 'ΤΣ' => ['0', '4', '4', '4'], 405*4096896cSGreg Roach 'Υ' => ['1', '1', '', ''], 406*4096896cSGreg Roach 'Ύ' => ['1', '1', '', ''], 407*4096896cSGreg Roach 'Ϋ' => ['1', '1', '', ''], 408*4096896cSGreg Roach 'ΰ' => ['1', '1', '', ''], 409*4096896cSGreg Roach 'ΥΚ' => ['1', '5', '5', '5'], 410*4096896cSGreg Roach 'ΥΥ' => ['1', '65', '65', '65'], 411*4096896cSGreg Roach 'Φ' => ['0', '7', '7', '7'], 412*4096896cSGreg Roach 'Χ' => ['0', '5', '5', '5'], 413*4096896cSGreg Roach 'Ψ' => ['0', '7', '7', '7'], 414*4096896cSGreg Roach 'Ω' => ['1', '0', '', ''], 415*4096896cSGreg Roach 'Ώ' => ['1', '0', '', ''], 416a25f0a04SGreg Roach // Hebrew alphabet 417*4096896cSGreg Roach 'א' => ['1', '0', '', ''], 418*4096896cSGreg Roach 'או' => ['1', '0', '7', ''], 419*4096896cSGreg Roach 'אג' => ['1', '4', '4', '4', '5', '5', '5', '34', '34', '34'], 420*4096896cSGreg Roach 'בב' => ['0', '7', '7', '7', '77', '77', '77'], 421*4096896cSGreg Roach 'ב' => ['0', '7', '7', '7'], 422*4096896cSGreg Roach 'גג' => ['0', '4', '4', '4', '5', '5', '5', '45', '45', '45', '55', '55', '55', '54', '54', '54'], 423*4096896cSGreg Roach 'גד' => ['0', '43', '43', '43', '53', '53', '53'], 424*4096896cSGreg Roach 'גה' => ['0', '45', '45', '45', '55', '55', '55'], 425*4096896cSGreg Roach 'גז' => ['0', '44', '44', '44', '45', '45', '45'], 426*4096896cSGreg Roach 'גח' => ['0', '45', '45', '45', '55', '55', '55'], 427*4096896cSGreg Roach 'גכ' => ['0', '45', '45', '45', '55', '55', '55'], 428*4096896cSGreg Roach 'גך' => ['0', '45', '45', '45', '55', '55', '55'], 429*4096896cSGreg Roach 'גצ' => ['0', '44', '44', '44', '45', '45', '45'], 430*4096896cSGreg Roach 'גץ' => ['0', '44', '44', '44', '45', '45', '45'], 431*4096896cSGreg Roach 'גק' => ['0', '45', '45', '45', '54', '54', '54'], 432*4096896cSGreg Roach 'גש' => ['0', '44', '44', '44', '54', '54', '54'], 433*4096896cSGreg Roach 'גת' => ['0', '43', '43', '43', '53', '53', '53'], 434*4096896cSGreg Roach 'ג' => ['0', '4', '4', '4', '5', '5', '5'], 435*4096896cSGreg Roach 'דז' => ['0', '4', '4', '4'], 436*4096896cSGreg Roach 'דד' => ['0', '3', '3', '3', '33', '33', '33'], 437*4096896cSGreg Roach 'דט' => ['0', '33', '33', '33'], 438*4096896cSGreg Roach 'דש' => ['0', '4', '4', '4'], 439*4096896cSGreg Roach 'דצ' => ['0', '4', '4', '4'], 440*4096896cSGreg Roach 'דץ' => ['0', '4', '4', '4'], 441*4096896cSGreg Roach 'ד' => ['0', '3', '3', '3'], 442*4096896cSGreg Roach 'הג' => ['0', '54', '54', '54', '55', '55', '55'], 443*4096896cSGreg Roach 'הכ' => ['0', '55', '55', '55'], 444*4096896cSGreg Roach 'הח' => ['0', '55', '55', '55'], 445*4096896cSGreg Roach 'הק' => ['0', '55', '55', '55', '5', '5', '5'], 446*4096896cSGreg Roach 'הה' => ['0', '5', '5', '', '55', '55', ''], 447*4096896cSGreg Roach 'ה' => ['0', '5', '5', ''], 448*4096896cSGreg Roach 'וי' => ['1', '', '', '', '7', '7', '7'], 449*4096896cSGreg Roach 'ו' => ['1', '7', '7', '7', '7', '', ''], 450*4096896cSGreg Roach 'וו' => ['1', '7', '7', '7', '7', '', ''], 451*4096896cSGreg Roach 'וופ' => ['1', '7', '7', '7', '77', '77', '77'], 452*4096896cSGreg Roach 'זש' => ['0', '4', '4', '4', '44', '44', '44'], 453*4096896cSGreg Roach 'זדז' => ['0', '2', '4', '4'], 454*4096896cSGreg Roach 'ז' => ['0', '4', '4', '4'], 455*4096896cSGreg Roach 'זג' => ['0', '44', '44', '44', '45', '45', '45'], 456*4096896cSGreg Roach 'זז' => ['0', '4', '4', '4', '44', '44', '44'], 457*4096896cSGreg Roach 'זס' => ['0', '44', '44', '44'], 458*4096896cSGreg Roach 'זצ' => ['0', '44', '44', '44'], 459*4096896cSGreg Roach 'זץ' => ['0', '44', '44', '44'], 460*4096896cSGreg Roach 'חג' => ['0', '54', '54', '54', '53', '53', '53'], 461*4096896cSGreg Roach 'חח' => ['0', '5', '5', '5', '55', '55', '55'], 462*4096896cSGreg Roach 'חק' => ['0', '55', '55', '55', '5', '5', '5'], 463*4096896cSGreg Roach 'חכ' => ['0', '45', '45', '45', '55', '55', '55'], 464*4096896cSGreg Roach 'חס' => ['0', '5', '54', '54'], 465*4096896cSGreg Roach 'חש' => ['0', '5', '54', '54'], 466*4096896cSGreg Roach 'ח' => ['0', '5', '5', '5'], 467*4096896cSGreg Roach 'טש' => ['0', '4', '4', '4'], 468*4096896cSGreg Roach 'טד' => ['0', '33', '33', '33'], 469*4096896cSGreg Roach 'טי' => ['0', '3', '3', '3', '4', '4', '4', '3', '3', '34'], 470*4096896cSGreg Roach 'טת' => ['0', '33', '33', '33'], 471*4096896cSGreg Roach 'טט' => ['0', '3', '3', '3', '33', '33', '33'], 472*4096896cSGreg Roach 'ט' => ['0', '3', '3', '3'], 473*4096896cSGreg Roach 'י' => ['1', '1', '', ''], 474*4096896cSGreg Roach 'יא' => ['1', '1', '', '', '1', '1', '1'], 475*4096896cSGreg Roach 'כג' => ['0', '55', '55', '55', '54', '54', '54'], 476*4096896cSGreg Roach 'כש' => ['0', '5', '54', '54'], 477*4096896cSGreg Roach 'כס' => ['0', '5', '54', '54'], 478*4096896cSGreg Roach 'ככ' => ['0', '5', '5', '5', '55', '55', '55'], 479*4096896cSGreg Roach 'כך' => ['0', '5', '5', '5', '55', '55', '55'], 480*4096896cSGreg Roach 'כ' => ['0', '5', '5', '5'], 481*4096896cSGreg Roach 'כח' => ['0', '55', '55', '55', '5', '5', '5'], 482*4096896cSGreg Roach 'ך' => ['0', '', '5', '5'], 483*4096896cSGreg Roach 'ל' => ['0', '8', '8', '8'], 484*4096896cSGreg Roach 'לל' => ['0', '88', '88', '88', '8', '8', '8'], 485*4096896cSGreg Roach 'מנ' => ['0', '66', '66', '66'], 486*4096896cSGreg Roach 'מן' => ['0', '66', '66', '66'], 487*4096896cSGreg Roach 'ממ' => ['0', '6', '6', '6', '66', '66', '66'], 488*4096896cSGreg Roach 'מם' => ['0', '6', '6', '6', '66', '66', '66'], 489*4096896cSGreg Roach 'מ' => ['0', '6', '6', '6'], 490*4096896cSGreg Roach 'ם' => ['0', '', '6', '6'], 491*4096896cSGreg Roach 'נמ' => ['0', '66', '66', '66'], 492*4096896cSGreg Roach 'נם' => ['0', '66', '66', '66'], 493*4096896cSGreg Roach 'ננ' => ['0', '6', '6', '6', '66', '66', '66'], 494*4096896cSGreg Roach 'נן' => ['0', '6', '6', '6', '66', '66', '66'], 495*4096896cSGreg Roach 'נ' => ['0', '6', '6', '6'], 496*4096896cSGreg Roach 'ן' => ['0', '', '6', '6'], 497*4096896cSGreg Roach 'סתש' => ['0', '2', '4', '4'], 498*4096896cSGreg Roach 'סתז' => ['0', '2', '4', '4'], 499*4096896cSGreg Roach 'סטז' => ['0', '2', '4', '4'], 500*4096896cSGreg Roach 'סטש' => ['0', '2', '4', '4'], 501*4096896cSGreg Roach 'סצד' => ['0', '2', '4', '4'], 502*4096896cSGreg Roach 'סט' => ['0', '2', '4', '4', '43', '43', '43'], 503*4096896cSGreg Roach 'סת' => ['0', '2', '4', '4', '43', '43', '43'], 504*4096896cSGreg Roach 'סג' => ['0', '44', '44', '44', '4', '4', '4'], 505*4096896cSGreg Roach 'סס' => ['0', '4', '4', '4', '44', '44', '44'], 506*4096896cSGreg Roach 'סצ' => ['0', '44', '44', '44'], 507*4096896cSGreg Roach 'סץ' => ['0', '44', '44', '44'], 508*4096896cSGreg Roach 'סז' => ['0', '44', '44', '44'], 509*4096896cSGreg Roach 'סש' => ['0', '44', '44', '44'], 510*4096896cSGreg Roach 'ס' => ['0', '4', '4', '4'], 511*4096896cSGreg Roach 'ע' => ['1', '0', '', ''], 512*4096896cSGreg Roach 'פב' => ['0', '7', '7', '7', '77', '77', '77'], 513*4096896cSGreg Roach 'פוו' => ['0', '7', '7', '7', '77', '77', '77'], 514*4096896cSGreg Roach 'פפ' => ['0', '7', '7', '7', '77', '77', '77'], 515*4096896cSGreg Roach 'פף' => ['0', '7', '7', '7', '77', '77', '77'], 516*4096896cSGreg Roach 'פ' => ['0', '7', '7', '7'], 517*4096896cSGreg Roach 'ף' => ['0', '', '7', '7'], 518*4096896cSGreg Roach 'צג' => ['0', '44', '44', '44', '45', '45', '45'], 519*4096896cSGreg Roach 'צז' => ['0', '44', '44', '44'], 520*4096896cSGreg Roach 'צס' => ['0', '44', '44', '44'], 521*4096896cSGreg Roach 'צצ' => ['0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54', '45', '45', '45'], 522*4096896cSGreg Roach 'צץ' => ['0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54'], 523*4096896cSGreg Roach 'צש' => ['0', '44', '44', '44', '4', '4', '4', '5', '5', '5'], 524*4096896cSGreg Roach 'צ' => ['0', '4', '4', '4', '5', '5', '5'], 525*4096896cSGreg Roach 'ץ' => ['0', '', '4', '4'], 526*4096896cSGreg Roach 'קה' => ['0', '55', '55', '5'], 527*4096896cSGreg Roach 'קס' => ['0', '5', '54', '54'], 528*4096896cSGreg Roach 'קש' => ['0', '5', '54', '54'], 529*4096896cSGreg Roach 'קק' => ['0', '5', '5', '5', '55', '55', '55'], 530*4096896cSGreg Roach 'קח' => ['0', '55', '55', '55'], 531*4096896cSGreg Roach 'קכ' => ['0', '55', '55', '55'], 532*4096896cSGreg Roach 'קך' => ['0', '55', '55', '55'], 533*4096896cSGreg Roach 'קג' => ['0', '55', '55', '55', '54', '54', '54'], 534*4096896cSGreg Roach 'ק' => ['0', '5', '5', '5'], 535*4096896cSGreg Roach 'רר' => ['0', '99', '99', '99', '9', '9', '9'], 536*4096896cSGreg Roach 'ר' => ['0', '9', '9', '9'], 537*4096896cSGreg Roach 'שטז' => ['0', '2', '4', '4'], 538*4096896cSGreg Roach 'שתש' => ['0', '2', '4', '4'], 539*4096896cSGreg Roach 'שתז' => ['0', '2', '4', '4'], 540*4096896cSGreg Roach 'שטש' => ['0', '2', '4', '4'], 541*4096896cSGreg Roach 'שד' => ['0', '2', '43', '43'], 542*4096896cSGreg Roach 'שז' => ['0', '44', '44', '44'], 543*4096896cSGreg Roach 'שס' => ['0', '44', '44', '44'], 544*4096896cSGreg Roach 'שת' => ['0', '2', '43', '43'], 545*4096896cSGreg Roach 'שג' => ['0', '4', '4', '4', '44', '44', '44', '4', '43', '43'], 546*4096896cSGreg Roach 'שט' => ['0', '2', '43', '43', '44', '44', '44'], 547*4096896cSGreg Roach 'שצ' => ['0', '44', '44', '44', '45', '45', '45'], 548*4096896cSGreg Roach 'שץ' => ['0', '44', '', '44', '45', '', '45'], 549*4096896cSGreg Roach 'שש' => ['0', '4', '4', '4', '44', '44', '44'], 550*4096896cSGreg Roach 'ש' => ['0', '4', '4', '4'], 551*4096896cSGreg Roach 'תג' => ['0', '34', '34', '34'], 552*4096896cSGreg Roach 'תז' => ['0', '34', '34', '34'], 553*4096896cSGreg Roach 'תש' => ['0', '4', '4', '4'], 554*4096896cSGreg Roach 'תת' => ['0', '3', '3', '3', '4', '4', '4', '33', '33', '33', '44', '44', '44', '34', '34', '34', '43', '43', '43'], 555*4096896cSGreg Roach 'ת' => ['0', '3', '3', '3', '4', '4', '4'], 556a25f0a04SGreg Roach // Arabic alphabet 557*4096896cSGreg Roach 'ا' => ['1', '0', '', ''], 558*4096896cSGreg Roach 'ب' => ['0', '7', '7', '7'], 559*4096896cSGreg Roach 'ت' => ['0', '3', '3', '3'], 560*4096896cSGreg Roach 'ث' => ['0', '3', '3', '3'], 561*4096896cSGreg Roach 'ج' => ['0', '4', '4', '4'], 562*4096896cSGreg Roach 'ح' => ['0', '5', '5', '5'], 563*4096896cSGreg Roach 'خ' => ['0', '5', '5', '5'], 564*4096896cSGreg Roach 'د' => ['0', '3', '3', '3'], 565*4096896cSGreg Roach 'ذ' => ['0', '3', '3', '3'], 566*4096896cSGreg Roach 'ر' => ['0', '9', '9', '9'], 567*4096896cSGreg Roach 'ز' => ['0', '4', '4', '4'], 568*4096896cSGreg Roach 'س' => ['0', '4', '4', '4'], 569*4096896cSGreg Roach 'ش' => ['0', '4', '4', '4'], 570*4096896cSGreg Roach 'ص' => ['0', '4', '4', '4'], 571*4096896cSGreg Roach 'ض' => ['0', '3', '3', '3'], 572*4096896cSGreg Roach 'ط' => ['0', '3', '3', '3'], 573*4096896cSGreg Roach 'ظ' => ['0', '4', '4', '4'], 574*4096896cSGreg Roach 'ع' => ['1', '0', '', ''], 575*4096896cSGreg Roach 'غ' => ['0', '0', '', ''], 576*4096896cSGreg Roach 'ف' => ['0', '7', '7', '7'], 577*4096896cSGreg Roach 'ق' => ['0', '5', '5', '5'], 578*4096896cSGreg Roach 'ك' => ['0', '5', '5', '5'], 579*4096896cSGreg Roach 'ل' => ['0', '8', '8', '8'], 580*4096896cSGreg Roach 'لا' => ['0', '8', '8', '8'], 581*4096896cSGreg Roach 'م' => ['0', '6', '6', '6'], 582*4096896cSGreg Roach 'ن' => ['0', '6', '6', '6'], 583*4096896cSGreg Roach 'هن' => ['0', '66', '66', '66'], 584*4096896cSGreg Roach 'ه' => ['0', '5', '5', ''], 585*4096896cSGreg Roach 'و' => ['1', '', '', '', '7', '', ''], 586*4096896cSGreg Roach 'ي' => ['0', '1', '', ''], 587*4096896cSGreg Roach 'آ' => ['0', '1', '', ''], 588*4096896cSGreg Roach 'ة' => ['0', '', '', '3'], 589*4096896cSGreg Roach 'ی' => ['0', '1', '', ''], 590*4096896cSGreg Roach 'ى' => ['1', '1', '', ''], 59113abd6f3SGreg Roach ]; 592a25f0a04SGreg Roach 593a25f0a04SGreg Roach /** 59416cfb0b9SGreg Roach * Which algorithms are supported. 59516cfb0b9SGreg Roach * 59624f2a3afSGreg Roach * @return array<string> 59716cfb0b9SGreg Roach */ 59816cfb0b9SGreg Roach public static function getAlgorithms(): array 59916cfb0b9SGreg Roach { 60016cfb0b9SGreg Roach return [ 601ad3143ccSGreg Roach /* I18N: https://en.wikipedia.org/wiki/Soundex */ 60216cfb0b9SGreg Roach 'std' => I18N::translate('Russell'), 603ad3143ccSGreg Roach /* I18N: https://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */ 60416cfb0b9SGreg Roach 'dm' => I18N::translate('Daitch-Mokotoff'), 60516cfb0b9SGreg Roach ]; 60616cfb0b9SGreg Roach } 60716cfb0b9SGreg Roach 60816cfb0b9SGreg Roach /** 60916cfb0b9SGreg Roach * Is there a match between two soundex codes? 61016cfb0b9SGreg Roach * 61116cfb0b9SGreg Roach * @param string $soundex1 61216cfb0b9SGreg Roach * @param string $soundex2 61316cfb0b9SGreg Roach * 61416cfb0b9SGreg Roach * @return bool 61516cfb0b9SGreg Roach */ 61624f2a3afSGreg Roach public static function compare(string $soundex1, string $soundex2): bool 61716cfb0b9SGreg Roach { 61816cfb0b9SGreg Roach if ($soundex1 !== '' && $soundex2 !== '') { 61954c1ab5eSGreg Roach return array_intersect(explode(':', $soundex1), explode(':', $soundex2)) !== []; 62016cfb0b9SGreg Roach } 62116cfb0b9SGreg Roach 62216cfb0b9SGreg Roach return false; 62316cfb0b9SGreg Roach } 62416cfb0b9SGreg Roach 62516cfb0b9SGreg Roach /** 62616cfb0b9SGreg Roach * Generate Russell soundex codes for a given text. 62716cfb0b9SGreg Roach * 62816cfb0b9SGreg Roach * @param string $text 62916cfb0b9SGreg Roach * 63016cfb0b9SGreg Roach * @return string 63116cfb0b9SGreg Roach */ 63216cfb0b9SGreg Roach public static function russell(string $text): string 63316cfb0b9SGreg Roach { 63416cfb0b9SGreg Roach $words = explode(' ', $text); 63516cfb0b9SGreg Roach $soundex_array = []; 63616cfb0b9SGreg Roach 63716cfb0b9SGreg Roach foreach ($words as $word) { 63816cfb0b9SGreg Roach $soundex = soundex($word); 63916cfb0b9SGreg Roach 64016cfb0b9SGreg Roach // Only return codes from recognisable sounds 64116cfb0b9SGreg Roach if ($soundex !== '0000') { 64216cfb0b9SGreg Roach $soundex_array[] = $soundex; 64316cfb0b9SGreg Roach } 64416cfb0b9SGreg Roach } 64516cfb0b9SGreg Roach 64616cfb0b9SGreg Roach // Combine words, e.g. “New York” as “Newyork” 64716cfb0b9SGreg Roach if (count($words) > 1) { 648e364afe4SGreg Roach $soundex_array[] = soundex(str_replace(' ', '', $text)); 64916cfb0b9SGreg Roach } 65016cfb0b9SGreg Roach 65116cfb0b9SGreg Roach // A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters) 65216cfb0b9SGreg Roach $soundex_array = array_slice(array_unique($soundex_array), 0, 51); 65316cfb0b9SGreg Roach 65416cfb0b9SGreg Roach return implode(':', $soundex_array); 65516cfb0b9SGreg Roach } 65616cfb0b9SGreg Roach 65716cfb0b9SGreg Roach /** 65816cfb0b9SGreg Roach * Generate Daitch–Mokotoff soundex codes for a given text. 65916cfb0b9SGreg Roach * 66016cfb0b9SGreg Roach * @param string $text 66116cfb0b9SGreg Roach * 66216cfb0b9SGreg Roach * @return string 66316cfb0b9SGreg Roach */ 66416cfb0b9SGreg Roach public static function daitchMokotoff(string $text): string 66516cfb0b9SGreg Roach { 66616cfb0b9SGreg Roach $words = explode(' ', $text); 66716cfb0b9SGreg Roach $soundex_array = []; 66816cfb0b9SGreg Roach 66916cfb0b9SGreg Roach foreach ($words as $word) { 67016cfb0b9SGreg Roach $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word)); 67116cfb0b9SGreg Roach } 67216cfb0b9SGreg Roach // Combine words, e.g. “New York” as “Newyork” 67316cfb0b9SGreg Roach if (count($words) > 1) { 674e364afe4SGreg Roach $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(str_replace(' ', '', $text))); 67516cfb0b9SGreg Roach } 67616cfb0b9SGreg Roach 67716cfb0b9SGreg Roach // A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters) 67816cfb0b9SGreg Roach $soundex_array = array_slice(array_unique($soundex_array), 0, 36); 67916cfb0b9SGreg Roach 68016cfb0b9SGreg Roach return implode(':', $soundex_array); 68116cfb0b9SGreg Roach } 68216cfb0b9SGreg Roach 68316cfb0b9SGreg Roach /** 68476692c8bSGreg Roach * Calculate the Daitch-Mokotoff soundex for a word. 68576692c8bSGreg Roach * 686a25f0a04SGreg Roach * @param string $name 687a25f0a04SGreg Roach * 68824f2a3afSGreg Roach * @return array<string> List of possible DM codes for the word. 689a25f0a04SGreg Roach */ 69024f2a3afSGreg Roach private static function daitchMokotoffWord(string $name): array 691c1010edaSGreg Roach { 692a25f0a04SGreg Roach // Apply special transformation rules to the input string 693a25f0a04SGreg Roach $name = I18N::strtoupper($name); 69416cfb0b9SGreg Roach foreach (self::TRANSFORM_NAMES as $transformRule) { 695a25f0a04SGreg Roach $name = str_replace($transformRule[0], $transformRule[1], $name); 696a25f0a04SGreg Roach } 697a25f0a04SGreg Roach 698a25f0a04SGreg Roach // Initialize 699a25f0a04SGreg Roach $name_script = I18N::textScript($name); 700dd71ff6bSGreg Roach $noVowels = $name_script === 'Hebr' || $name_script === 'Arab'; 701a25f0a04SGreg Roach 702a25f0a04SGreg Roach $lastPos = strlen($name) - 1; 703a25f0a04SGreg Roach $currPos = 0; 704a25f0a04SGreg Roach $state = 1; // 1: start of input string, 2: before vowel, 3: other 70513abd6f3SGreg Roach $result = []; // accumulate complete 6-digit D-M codes here 70613abd6f3SGreg Roach $partialResult = []; // accumulate incomplete D-M codes here 70713abd6f3SGreg Roach $partialResult[] = ['!']; // initialize 1st partial result ('!' stops "duplicate sound" check) 708a25f0a04SGreg Roach 709a25f0a04SGreg Roach // Loop through the input string. 710a25f0a04SGreg Roach // Stop when the string is exhausted or when no more partial results remain 711a25f0a04SGreg Roach while (count($partialResult) !== 0 && $currPos <= $lastPos) { 712a25f0a04SGreg Roach // Find the DM coding table entry for the chunk at the current position 713a25f0a04SGreg Roach $thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 714e364afe4SGreg Roach while ($thisEntry !== '') { 71516cfb0b9SGreg Roach if (isset(self::DM_SOUNDS[$thisEntry])) { 716a25f0a04SGreg Roach break; 717a25f0a04SGreg Roach } 718a25f0a04SGreg Roach $thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk 719a25f0a04SGreg Roach } 720a25f0a04SGreg Roach if ($thisEntry === '') { 721a25f0a04SGreg Roach $currPos++; // Not in table: advance pointer to next byte 722a25f0a04SGreg Roach continue; // and try again 723a25f0a04SGreg Roach } 724a25f0a04SGreg Roach 72516cfb0b9SGreg Roach $soundTableEntry = self::DM_SOUNDS[$thisEntry]; 726a25f0a04SGreg Roach $workingResult = $partialResult; 72713abd6f3SGreg Roach $partialResult = []; 728a25f0a04SGreg Roach $currPos += strlen($thisEntry); 729a25f0a04SGreg Roach 730a25f0a04SGreg Roach // Not at beginning of input string 731e364afe4SGreg Roach if ($state !== 1) { 732a25f0a04SGreg Roach if ($currPos <= $lastPos) { 733a25f0a04SGreg Roach // Determine whether the next chunk is a vowel 734a25f0a04SGreg Roach $nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 735e364afe4SGreg Roach while ($nextEntry !== '') { 73616cfb0b9SGreg Roach if (isset(self::DM_SOUNDS[$nextEntry])) { 737a25f0a04SGreg Roach break; 738a25f0a04SGreg Roach } 739a25f0a04SGreg Roach $nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk 740a25f0a04SGreg Roach } 741a25f0a04SGreg Roach } else { 742a25f0a04SGreg Roach $nextEntry = ''; 743a25f0a04SGreg Roach } 744e364afe4SGreg Roach if ($nextEntry !== '' && self::DM_SOUNDS[$nextEntry][0] !== '0') { 745a25f0a04SGreg Roach $state = 2; 746a25f0a04SGreg Roach } else { 747a25f0a04SGreg Roach // Next chunk is a vowel 748a25f0a04SGreg Roach $state = 3; 749a25f0a04SGreg Roach } 750a25f0a04SGreg Roach } 751a25f0a04SGreg Roach 752a25f0a04SGreg Roach while ($state < count($soundTableEntry)) { 753a25f0a04SGreg Roach // empty means 'ignore this sound in this state' 754e364afe4SGreg Roach if ($soundTableEntry[$state] === '') { 755a25f0a04SGreg Roach foreach ($workingResult as $workingEntry) { 756a25f0a04SGreg Roach $tempEntry = $workingEntry; 757a25f0a04SGreg Roach $tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles' 758a25f0a04SGreg Roach $partialResult[] = $tempEntry; 759a25f0a04SGreg Roach } 760a25f0a04SGreg Roach } else { 761a25f0a04SGreg Roach foreach ($workingResult as $workingEntry) { 762a25f0a04SGreg Roach if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) { 763a25f0a04SGreg Roach // Incoming sound isn't a duplicate of the previous sound 764a25f0a04SGreg Roach $workingEntry[] = $soundTableEntry[$state]; 765e364afe4SGreg Roach } elseif ($noVowels) { 766a25f0a04SGreg Roach // Incoming sound is a duplicate of the previous sound 767a25f0a04SGreg Roach // For Hebrew and Arabic, we need to create a pair of D-M sound codes, 768a25f0a04SGreg Roach // one of the pair with only a single occurrence of the duplicate sound, 769a25f0a04SGreg Roach // the other with both occurrences 770a25f0a04SGreg Roach $workingEntry[] = $soundTableEntry[$state]; 771a25f0a04SGreg Roach } 772e364afe4SGreg Roach 773a25f0a04SGreg Roach if (count($workingEntry) < 7) { 774a25f0a04SGreg Roach $partialResult[] = $workingEntry; 775a25f0a04SGreg Roach } else { 776a25f0a04SGreg Roach // This is the 6th code in the sequence 777a25f0a04SGreg Roach // We're looking for 7 entries because the first is '!' and doesn't count 778a25f0a04SGreg Roach $tempResult = str_replace('!', '', implode('', $workingEntry)); 779a25f0a04SGreg Roach // Only return codes from recognisable sounds 780a25f0a04SGreg Roach if ($tempResult) { 781a25f0a04SGreg Roach $result[] = substr($tempResult . '000000', 0, 6); 782a25f0a04SGreg Roach } 783a25f0a04SGreg Roach } 784a25f0a04SGreg Roach } 785a25f0a04SGreg Roach } 786e364afe4SGreg Roach $state += 3; // Advance to next triplet while keeping the same basic state 787a25f0a04SGreg Roach } 788a25f0a04SGreg Roach } 789a25f0a04SGreg Roach 790a25f0a04SGreg Roach // Zero-fill and copy all remaining partial results 791a25f0a04SGreg Roach foreach ($partialResult as $workingEntry) { 792a25f0a04SGreg Roach $tempResult = str_replace('!', '', implode('', $workingEntry)); 793a25f0a04SGreg Roach // Only return codes from recognisable sounds 794a25f0a04SGreg Roach if ($tempResult) { 795a25f0a04SGreg Roach $result[] = substr($tempResult . '000000', 0, 6); 796a25f0a04SGreg Roach } 797a25f0a04SGreg Roach } 798a25f0a04SGreg Roach 799a25f0a04SGreg Roach return $result; 800a25f0a04SGreg Roach } 801a25f0a04SGreg Roach} 802