1a25f0a04SGreg Roach<?php 2a25f0a04SGreg Roach/** 3a25f0a04SGreg Roach * webtrees: online genealogy 4*1062a142SGreg Roach * Copyright (C) 2018 webtrees development team 5a25f0a04SGreg Roach * This program is free software: you can redistribute it and/or modify 6a25f0a04SGreg Roach * it under the terms of the GNU General Public License as published by 7a25f0a04SGreg Roach * the Free Software Foundation, either version 3 of the License, or 8a25f0a04SGreg Roach * (at your option) any later version. 9a25f0a04SGreg Roach * This program is distributed in the hope that it will be useful, 10a25f0a04SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 11a25f0a04SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12a25f0a04SGreg Roach * GNU General Public License for more details. 13a25f0a04SGreg Roach * You should have received a copy of the GNU General Public License 14a25f0a04SGreg Roach * along with this program. If not, see <http://www.gnu.org/licenses/>. 15a25f0a04SGreg Roach */ 1676692c8bSGreg Roachnamespace Fisharebest\Webtrees; 17a25f0a04SGreg Roach 18a25f0a04SGreg Roach/** 1976692c8bSGreg Roach * Phonetic matching of strings. 20a25f0a04SGreg Roach */ 21a25f0a04SGreg Roachclass Soundex { 22a25f0a04SGreg Roach /** 2376692c8bSGreg Roach * Which algorithms are supported. 2476692c8bSGreg Roach * 25a25f0a04SGreg Roach * @return string[] 26a25f0a04SGreg Roach */ 27a25f0a04SGreg Roach public static function getAlgorithms() { 2813abd6f3SGreg Roach return [ 29a25f0a04SGreg Roach 'std' => /* I18N: http://en.wikipedia.org/wiki/Soundex */ I18N::translate('Russell'), 30a25f0a04SGreg Roach 'dm' => /* I18N: http://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */ I18N::translate('Daitch-Mokotoff'), 3113abd6f3SGreg Roach ]; 32a25f0a04SGreg Roach } 33a25f0a04SGreg Roach 34a25f0a04SGreg Roach /** 35a25f0a04SGreg Roach * Is there a match between two soundex codes? 36a25f0a04SGreg Roach * 37a25f0a04SGreg Roach * @param string $soundex1 38a25f0a04SGreg Roach * @param string $soundex2 39a25f0a04SGreg Roach * 40cbc1590aSGreg Roach * @return bool 41a25f0a04SGreg Roach */ 42a25f0a04SGreg Roach public static function compare($soundex1, $soundex2) { 43a25f0a04SGreg Roach if ($soundex1 && $soundex2) { 44a25f0a04SGreg Roach foreach (explode(':', $soundex1) as $code) { 45a25f0a04SGreg Roach if (strpos($soundex2, $code) !== false) { 46a25f0a04SGreg Roach return true; 47a25f0a04SGreg Roach } 48a25f0a04SGreg Roach } 49a25f0a04SGreg Roach } 50a25f0a04SGreg Roach 51a25f0a04SGreg Roach return false; 52a25f0a04SGreg Roach } 53a25f0a04SGreg Roach 54a25f0a04SGreg Roach /** 55a25f0a04SGreg Roach * Generate Russell soundex codes for a given text. 56a25f0a04SGreg Roach * 57a25f0a04SGreg Roach * @param $text 58a25f0a04SGreg Roach * 59a25f0a04SGreg Roach * @return null|string 60a25f0a04SGreg Roach */ 61a25f0a04SGreg Roach public static function russell($text) { 62a25f0a04SGreg Roach $words = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY); 6313abd6f3SGreg Roach $soundex_array = []; 64a25f0a04SGreg Roach foreach ($words as $word) { 65a25f0a04SGreg Roach $soundex = soundex($word); 66a25f0a04SGreg Roach // Only return codes from recognisable sounds 67a25f0a04SGreg Roach if ($soundex !== '0000') { 68a25f0a04SGreg Roach $soundex_array[] = $soundex; 69a25f0a04SGreg Roach } 70a25f0a04SGreg Roach } 71a25f0a04SGreg Roach // Combine words, e.g. “New York” as “Newyork” 72a25f0a04SGreg Roach if (count($words) > 1) { 73a25f0a04SGreg Roach $soundex_array[] = soundex(strtr($text, ' ', '')); 74a25f0a04SGreg Roach } 75a25f0a04SGreg Roach // A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters) 76a25f0a04SGreg Roach $soundex_array = array_slice(array_unique($soundex_array), 0, 51); 77a25f0a04SGreg Roach 78a25f0a04SGreg Roach if ($soundex_array) { 79a25f0a04SGreg Roach return implode(':', $soundex_array); 80a25f0a04SGreg Roach } else { 8115e87d46SGreg Roach return ''; 82a25f0a04SGreg Roach } 83a25f0a04SGreg Roach } 84a25f0a04SGreg Roach 85a25f0a04SGreg Roach /** 86a25f0a04SGreg Roach * Generate Daitch–Mokotoff soundex codes for a given text. 87a25f0a04SGreg Roach * 88a25f0a04SGreg Roach * @param $text 89a25f0a04SGreg Roach * 90a25f0a04SGreg Roach * @return null|string 91a25f0a04SGreg Roach */ 92a25f0a04SGreg Roach public static function daitchMokotoff($text) { 93a25f0a04SGreg Roach $words = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY); 9413abd6f3SGreg Roach $soundex_array = []; 95a25f0a04SGreg Roach foreach ($words as $word) { 96a25f0a04SGreg Roach $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word)); 97a25f0a04SGreg Roach } 98a25f0a04SGreg Roach // Combine words, e.g. “New York” as “Newyork” 99a25f0a04SGreg Roach if (count($words) > 1) { 100a25f0a04SGreg Roach $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(strtr($text, ' ', ''))); 101a25f0a04SGreg Roach } 102a25f0a04SGreg Roach // A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters) 103a25f0a04SGreg Roach $soundex_array = array_slice(array_unique($soundex_array), 0, 36); 104a25f0a04SGreg Roach 105a25f0a04SGreg Roach if ($soundex_array) { 106a25f0a04SGreg Roach return implode(':', $soundex_array); 107a25f0a04SGreg Roach } else { 10815e87d46SGreg Roach return ''; 109a25f0a04SGreg Roach } 110a25f0a04SGreg Roach } 111a25f0a04SGreg Roach 112a25f0a04SGreg Roach // Determine the Daitch–Mokotoff Soundex code for a word 113a25f0a04SGreg Roach // Original implementation by Gerry Kroll, and analysis by Meliza Amity 114a25f0a04SGreg Roach 115a25f0a04SGreg Roach // Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!) 116a25f0a04SGreg Roach const MAXCHAR = 7; 117a25f0a04SGreg Roach 118a25f0a04SGreg Roach /** 119a25f0a04SGreg Roach * Name transformation arrays. 120a25f0a04SGreg Roach * Used to transform the Name string to simplify the "sounds like" table. 121a25f0a04SGreg Roach * This is especially useful in Hebrew. 122a25f0a04SGreg Roach * 123a25f0a04SGreg Roach * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text) 124a25f0a04SGreg Roach * function call to achieve the desired transformations. 125a25f0a04SGreg Roach * 126a25f0a04SGreg Roach * Note about the use of "\x01": 127a25f0a04SGreg Roach * This code, which can’t legitimately occur in the kind of text we're dealing with, 128a25f0a04SGreg Roach * is used as a place-holder so that conditional string replacements can be done. 129a25f0a04SGreg Roach * 130a25f0a04SGreg Roach * @var string[][] 131a25f0a04SGreg Roach */ 13213abd6f3SGreg Roach private static $transformNameTable = [ 133a25f0a04SGreg Roach // Force Yiddish ligatures to be treated as separate letters 13413abd6f3SGreg Roach ['װ', 'וו'], 13513abd6f3SGreg Roach ['ײ', 'יי'], 13613abd6f3SGreg Roach ['ױ', 'וי'], 13713abd6f3SGreg Roach ['בו', 'בע'], 13813abd6f3SGreg Roach ['פו', 'פע'], 13913abd6f3SGreg Roach ['ומ', 'עמ'], 14013abd6f3SGreg Roach ['ום', 'עם'], 14113abd6f3SGreg Roach ['ונ', 'ענ'], 14213abd6f3SGreg Roach ['ון', 'ען'], 14313abd6f3SGreg Roach ['וו', 'ב'], 14413abd6f3SGreg Roach ["\x01", ''], 14513abd6f3SGreg Roach ['ייה$', "\x01ה"], 14613abd6f3SGreg Roach ['ייע$', "\x01ע"], 14713abd6f3SGreg Roach ['יי', 'ע'], 14813abd6f3SGreg Roach ["\x01", 'יי'], 14913abd6f3SGreg Roach ]; 150a25f0a04SGreg Roach 151a25f0a04SGreg Roach /** 152a25f0a04SGreg Roach * The DM sound coding table is organized this way: 153a25f0a04SGreg Roach * key: a variable-length string that corresponds to the UTF-8 character sequence 154a25f0a04SGreg Roach * represented by the table entry. Currently, that string can be up to 7 155a25f0a04SGreg Roach * bytes long. This maximum length is defined by the value of global variable 156a25f0a04SGreg Roach * $maxchar. 157a25f0a04SGreg Roach * 158a25f0a04SGreg Roach * value: an array as follows: 159a25f0a04SGreg Roach * [0]: zero if not a vowel 160a25f0a04SGreg Roach * [1]: sound value when this string is at the beginning of the word 161a25f0a04SGreg Roach * [2]: sound value when this string is followed by a vowel 162a25f0a04SGreg Roach * [3]: sound value for other cases 163a25f0a04SGreg Roach * [1],[2],[3] can be repeated several times to create branches in the code 164a25f0a04SGreg Roach * an empty sound value means "ignore in this state" 165a25f0a04SGreg Roach * 166a25f0a04SGreg Roach * @var string[][] 167a25f0a04SGreg Roach */ 16813abd6f3SGreg Roach private static $dmsounds = [ 16913abd6f3SGreg Roach 'A' => ['1', '0', '', ''], 17013abd6f3SGreg Roach 'À' => ['1', '0', '', ''], 17113abd6f3SGreg Roach 'Á' => ['1', '0', '', ''], 17213abd6f3SGreg Roach 'Â' => ['1', '0', '', ''], 17313abd6f3SGreg Roach 'Ã' => ['1', '0', '', ''], 17413abd6f3SGreg Roach 'Ä' => ['1', '0', '1', '', '0', '', ''], 17513abd6f3SGreg Roach 'Å' => ['1', '0', '', ''], 17613abd6f3SGreg Roach 'Ă' => ['1', '0', '', ''], 17713abd6f3SGreg Roach 'Ą' => ['1', '', '', '', '', '', '6'], 17813abd6f3SGreg Roach 'Ạ' => ['1', '0', '', ''], 17913abd6f3SGreg Roach 'Ả' => ['1', '0', '', ''], 18013abd6f3SGreg Roach 'Ấ' => ['1', '0', '', ''], 18113abd6f3SGreg Roach 'Ầ' => ['1', '0', '', ''], 18213abd6f3SGreg Roach 'Ẩ' => ['1', '0', '', ''], 18313abd6f3SGreg Roach 'Ẫ' => ['1', '0', '', ''], 18413abd6f3SGreg Roach 'Ậ' => ['1', '0', '', ''], 18513abd6f3SGreg Roach 'Ắ' => ['1', '0', '', ''], 18613abd6f3SGreg Roach 'Ằ' => ['1', '0', '', ''], 18713abd6f3SGreg Roach 'Ẳ' => ['1', '0', '', ''], 18813abd6f3SGreg Roach 'Ẵ' => ['1', '0', '', ''], 18913abd6f3SGreg Roach 'Ặ' => ['1', '0', '', ''], 19013abd6f3SGreg Roach 'AE' => ['1', '0', '1', ''], 19113abd6f3SGreg Roach 'Æ' => ['1', '0', '1', ''], 19213abd6f3SGreg Roach 'AI' => ['1', '0', '1', ''], 19313abd6f3SGreg Roach 'AJ' => ['1', '0', '1', ''], 19413abd6f3SGreg Roach 'AU' => ['1', '0', '7', ''], 19513abd6f3SGreg Roach 'AV' => ['1', '0', '7', '', '7', '7', '7'], 19613abd6f3SGreg Roach 'ÄU' => ['1', '0', '1', ''], 19713abd6f3SGreg Roach 'AY' => ['1', '0', '1', ''], 19813abd6f3SGreg Roach 'B' => ['0', '7', '7', '7'], 19913abd6f3SGreg Roach 'C' => ['0', '5', '5', '5', '34', '4', '4'], 20013abd6f3SGreg Roach 'Ć' => ['0', '4', '4', '4'], 20113abd6f3SGreg Roach 'Č' => ['0', '4', '4', '4'], 20213abd6f3SGreg Roach 'Ç' => ['0', '4', '4', '4'], 20313abd6f3SGreg Roach 'CH' => ['0', '5', '5', '5', '34', '4', '4'], 20413abd6f3SGreg Roach 'CHS' => ['0', '5', '54', '54'], 20513abd6f3SGreg Roach 'CK' => ['0', '5', '5', '5', '45', '45', '45'], 20613abd6f3SGreg Roach 'CCS' => ['0', '4', '4', '4'], 20713abd6f3SGreg Roach 'CS' => ['0', '4', '4', '4'], 20813abd6f3SGreg Roach 'CSZ' => ['0', '4', '4', '4'], 20913abd6f3SGreg Roach 'CZ' => ['0', '4', '4', '4'], 21013abd6f3SGreg Roach 'CZS' => ['0', '4', '4', '4'], 21113abd6f3SGreg Roach 'D' => ['0', '3', '3', '3'], 21213abd6f3SGreg Roach 'Ď' => ['0', '3', '3', '3'], 21313abd6f3SGreg Roach 'Đ' => ['0', '3', '3', '3'], 21413abd6f3SGreg Roach 'DRS' => ['0', '4', '4', '4'], 21513abd6f3SGreg Roach 'DRZ' => ['0', '4', '4', '4'], 21613abd6f3SGreg Roach 'DS' => ['0', '4', '4', '4'], 21713abd6f3SGreg Roach 'DSH' => ['0', '4', '4', '4'], 21813abd6f3SGreg Roach 'DSZ' => ['0', '4', '4', '4'], 21913abd6f3SGreg Roach 'DT' => ['0', '3', '3', '3'], 22013abd6f3SGreg Roach 'DDZ' => ['0', '4', '4', '4'], 22113abd6f3SGreg Roach 'DDZS' => ['0', '4', '4', '4'], 22213abd6f3SGreg Roach 'DZ' => ['0', '4', '4', '4'], 22313abd6f3SGreg Roach 'DŹ' => ['0', '4', '4', '4'], 22413abd6f3SGreg Roach 'DŻ' => ['0', '4', '4', '4'], 22513abd6f3SGreg Roach 'DZH' => ['0', '4', '4', '4'], 22613abd6f3SGreg Roach 'DZS' => ['0', '4', '4', '4'], 22713abd6f3SGreg Roach 'E' => ['1', '0', '', ''], 22813abd6f3SGreg Roach 'È' => ['1', '0', '', ''], 22913abd6f3SGreg Roach 'É' => ['1', '0', '', ''], 23013abd6f3SGreg Roach 'Ê' => ['1', '0', '', ''], 23113abd6f3SGreg Roach 'Ë' => ['1', '0', '', ''], 23213abd6f3SGreg Roach 'Ĕ' => ['1', '0', '', ''], 23313abd6f3SGreg Roach 'Ė' => ['1', '0', '', ''], 23413abd6f3SGreg Roach 'Ę' => ['1', '', '', '6', '', '', ''], 23513abd6f3SGreg Roach 'Ẹ' => ['1', '0', '', ''], 23613abd6f3SGreg Roach 'Ẻ' => ['1', '0', '', ''], 23713abd6f3SGreg Roach 'Ẽ' => ['1', '0', '', ''], 23813abd6f3SGreg Roach 'Ế' => ['1', '0', '', ''], 23913abd6f3SGreg Roach 'Ề' => ['1', '0', '', ''], 24013abd6f3SGreg Roach 'Ể' => ['1', '0', '', ''], 24113abd6f3SGreg Roach 'Ễ' => ['1', '0', '', ''], 24213abd6f3SGreg Roach 'Ệ' => ['1', '0', '', ''], 24313abd6f3SGreg Roach 'EAU' => ['1', '0', '', ''], 24413abd6f3SGreg Roach 'EI' => ['1', '0', '1', ''], 24513abd6f3SGreg Roach 'EJ' => ['1', '0', '1', ''], 24613abd6f3SGreg Roach 'EU' => ['1', '1', '1', ''], 24713abd6f3SGreg Roach 'EY' => ['1', '0', '1', ''], 24813abd6f3SGreg Roach 'F' => ['0', '7', '7', '7'], 24913abd6f3SGreg Roach 'FB' => ['0', '7', '7', '7'], 25013abd6f3SGreg Roach 'G' => ['0', '5', '5', '5', '34', '4', '4'], 25113abd6f3SGreg Roach 'Ğ' => ['0', '', '', ''], 25213abd6f3SGreg Roach 'GGY' => ['0', '5', '5', '5'], 25313abd6f3SGreg Roach 'GY' => ['0', '5', '5', '5'], 25413abd6f3SGreg Roach 'H' => ['0', '5', '5', '', '5', '5', '5'], 25513abd6f3SGreg Roach 'I' => ['1', '0', '', ''], 25613abd6f3SGreg Roach 'Ì' => ['1', '0', '', ''], 25713abd6f3SGreg Roach 'Í' => ['1', '0', '', ''], 25813abd6f3SGreg Roach 'Î' => ['1', '0', '', ''], 25913abd6f3SGreg Roach 'Ï' => ['1', '0', '', ''], 26013abd6f3SGreg Roach 'Ĩ' => ['1', '0', '', ''], 26113abd6f3SGreg Roach 'Į' => ['1', '0', '', ''], 26213abd6f3SGreg Roach 'İ' => ['1', '0', '', ''], 26313abd6f3SGreg Roach 'Ỉ' => ['1', '0', '', ''], 26413abd6f3SGreg Roach 'Ị' => ['1', '0', '', ''], 26513abd6f3SGreg Roach 'IA' => ['1', '1', '', ''], 26613abd6f3SGreg Roach 'IE' => ['1', '1', '', ''], 26713abd6f3SGreg Roach 'IO' => ['1', '1', '', ''], 26813abd6f3SGreg Roach 'IU' => ['1', '1', '', ''], 26913abd6f3SGreg Roach 'J' => ['0', '1', '', '', '4', '4', '4', '5', '5', ''], 27013abd6f3SGreg Roach 'K' => ['0', '5', '5', '5'], 27113abd6f3SGreg Roach 'KH' => ['0', '5', '5', '5'], 27213abd6f3SGreg Roach 'KS' => ['0', '5', '54', '54'], 27313abd6f3SGreg Roach 'L' => ['0', '8', '8', '8'], 27413abd6f3SGreg Roach 'Ľ' => ['0', '8', '8', '8'], 27513abd6f3SGreg Roach 'Ĺ' => ['0', '8', '8', '8'], 27613abd6f3SGreg Roach 'Ł' => ['0', '7', '7', '7', '8', '8', '8'], 27713abd6f3SGreg Roach 'LL' => ['0', '8', '8', '8', '58', '8', '8', '1', '8', '8'], 27813abd6f3SGreg Roach 'LLY' => ['0', '8', '8', '8', '1', '8', '8'], 27913abd6f3SGreg Roach 'LY' => ['0', '8', '8', '8', '1', '8', '8'], 28013abd6f3SGreg Roach 'M' => ['0', '6', '6', '6'], 28113abd6f3SGreg Roach 'MĔ' => ['0', '66', '66', '66'], 28213abd6f3SGreg Roach 'MN' => ['0', '66', '66', '66'], 28313abd6f3SGreg Roach 'N' => ['0', '6', '6', '6'], 28413abd6f3SGreg Roach 'Ń' => ['0', '6', '6', '6'], 28513abd6f3SGreg Roach 'Ň' => ['0', '6', '6', '6'], 28613abd6f3SGreg Roach 'Ñ' => ['0', '6', '6', '6'], 28713abd6f3SGreg Roach 'NM' => ['0', '66', '66', '66'], 28813abd6f3SGreg Roach 'O' => ['1', '0', '', ''], 28913abd6f3SGreg Roach 'Ò' => ['1', '0', '', ''], 29013abd6f3SGreg Roach 'Ó' => ['1', '0', '', ''], 29113abd6f3SGreg Roach 'Ô' => ['1', '0', '', ''], 29213abd6f3SGreg Roach 'Õ' => ['1', '0', '', ''], 29313abd6f3SGreg Roach 'Ö' => ['1', '0', '', ''], 29413abd6f3SGreg Roach 'Ø' => ['1', '0', '', ''], 29513abd6f3SGreg Roach 'Ő' => ['1', '0', '', ''], 29613abd6f3SGreg Roach 'Œ' => ['1', '0', '', ''], 29713abd6f3SGreg Roach 'Ơ' => ['1', '0', '', ''], 29813abd6f3SGreg Roach 'Ọ' => ['1', '0', '', ''], 29913abd6f3SGreg Roach 'Ỏ' => ['1', '0', '', ''], 30013abd6f3SGreg Roach 'Ố' => ['1', '0', '', ''], 30113abd6f3SGreg Roach 'Ồ' => ['1', '0', '', ''], 30213abd6f3SGreg Roach 'Ổ' => ['1', '0', '', ''], 30313abd6f3SGreg Roach 'Ỗ' => ['1', '0', '', ''], 30413abd6f3SGreg Roach 'Ộ' => ['1', '0', '', ''], 30513abd6f3SGreg Roach 'Ớ' => ['1', '0', '', ''], 30613abd6f3SGreg Roach 'Ờ' => ['1', '0', '', ''], 30713abd6f3SGreg Roach 'Ở' => ['1', '0', '', ''], 30813abd6f3SGreg Roach 'Ỡ' => ['1', '0', '', ''], 30913abd6f3SGreg Roach 'Ợ' => ['1', '0', '', ''], 31013abd6f3SGreg Roach 'OE' => ['1', '0', '', ''], 31113abd6f3SGreg Roach 'OI' => ['1', '0', '1', ''], 31213abd6f3SGreg Roach 'OJ' => ['1', '0', '1', ''], 31313abd6f3SGreg Roach 'OU' => ['1', '0', '', ''], 31413abd6f3SGreg Roach 'OY' => ['1', '0', '1', ''], 31513abd6f3SGreg Roach 'P' => ['0', '7', '7', '7'], 31613abd6f3SGreg Roach 'PF' => ['0', '7', '7', '7'], 31713abd6f3SGreg Roach 'PH' => ['0', '7', '7', '7'], 31813abd6f3SGreg Roach 'Q' => ['0', '5', '5', '5'], 31913abd6f3SGreg Roach 'R' => ['0', '9', '9', '9'], 32013abd6f3SGreg Roach 'Ř' => ['0', '4', '4', '4'], 32113abd6f3SGreg Roach 'RS' => ['0', '4', '4', '4', '94', '94', '94'], 32213abd6f3SGreg Roach 'RZ' => ['0', '4', '4', '4', '94', '94', '94'], 32313abd6f3SGreg Roach 'S' => ['0', '4', '4', '4'], 32413abd6f3SGreg Roach 'Ś' => ['0', '4', '4', '4'], 32513abd6f3SGreg Roach 'Š' => ['0', '4', '4', '4'], 32613abd6f3SGreg Roach 'Ş' => ['0', '4', '4', '4'], 32713abd6f3SGreg Roach 'SC' => ['0', '2', '4', '4'], 32813abd6f3SGreg Roach 'ŠČ' => ['0', '2', '4', '4'], 32913abd6f3SGreg Roach 'SCH' => ['0', '4', '4', '4'], 33013abd6f3SGreg Roach 'SCHD' => ['0', '2', '43', '43'], 33113abd6f3SGreg Roach 'SCHT' => ['0', '2', '43', '43'], 33213abd6f3SGreg Roach 'SCHTCH' => ['0', '2', '4', '4'], 33313abd6f3SGreg Roach 'SCHTSCH' => ['0', '2', '4', '4'], 33413abd6f3SGreg Roach 'SCHTSH' => ['0', '2', '4', '4'], 33513abd6f3SGreg Roach 'SD' => ['0', '2', '43', '43'], 33613abd6f3SGreg Roach 'SH' => ['0', '4', '4', '4'], 33713abd6f3SGreg Roach 'SHCH' => ['0', '2', '4', '4'], 33813abd6f3SGreg Roach 'SHD' => ['0', '2', '43', '43'], 33913abd6f3SGreg Roach 'SHT' => ['0', '2', '43', '43'], 34013abd6f3SGreg Roach 'SHTCH' => ['0', '2', '4', '4'], 34113abd6f3SGreg Roach 'SHTSH' => ['0', '2', '4', '4'], 34213abd6f3SGreg Roach 'ß' => ['0', '', '4', '4'], 34313abd6f3SGreg Roach 'ST' => ['0', '2', '43', '43'], 34413abd6f3SGreg Roach 'STCH' => ['0', '2', '4', '4'], 34513abd6f3SGreg Roach 'STRS' => ['0', '2', '4', '4'], 34613abd6f3SGreg Roach 'STRZ' => ['0', '2', '4', '4'], 34713abd6f3SGreg Roach 'STSCH' => ['0', '2', '4', '4'], 34813abd6f3SGreg Roach 'STSH' => ['0', '2', '4', '4'], 34913abd6f3SGreg Roach 'SSZ' => ['0', '4', '4', '4'], 35013abd6f3SGreg Roach 'SZ' => ['0', '4', '4', '4'], 35113abd6f3SGreg Roach 'SZCS' => ['0', '2', '4', '4'], 35213abd6f3SGreg Roach 'SZCZ' => ['0', '2', '4', '4'], 35313abd6f3SGreg Roach 'SZD' => ['0', '2', '43', '43'], 35413abd6f3SGreg Roach 'SZT' => ['0', '2', '43', '43'], 35513abd6f3SGreg Roach 'T' => ['0', '3', '3', '3'], 35613abd6f3SGreg Roach 'Ť' => ['0', '3', '3', '3'], 35713abd6f3SGreg Roach 'Ţ' => ['0', '3', '3', '3', '4', '4', '4'], 35813abd6f3SGreg Roach 'TC' => ['0', '4', '4', '4'], 35913abd6f3SGreg Roach 'TCH' => ['0', '4', '4', '4'], 36013abd6f3SGreg Roach 'TH' => ['0', '3', '3', '3'], 36113abd6f3SGreg Roach 'TRS' => ['0', '4', '4', '4'], 36213abd6f3SGreg Roach 'TRZ' => ['0', '4', '4', '4'], 36313abd6f3SGreg Roach 'TS' => ['0', '4', '4', '4'], 36413abd6f3SGreg Roach 'TSCH' => ['0', '4', '4', '4'], 36513abd6f3SGreg Roach 'TSH' => ['0', '4', '4', '4'], 36613abd6f3SGreg Roach 'TSZ' => ['0', '4', '4', '4'], 36713abd6f3SGreg Roach 'TTCH' => ['0', '4', '4', '4'], 36813abd6f3SGreg Roach 'TTS' => ['0', '4', '4', '4'], 36913abd6f3SGreg Roach 'TTSCH' => ['0', '4', '4', '4'], 37013abd6f3SGreg Roach 'TTSZ' => ['0', '4', '4', '4'], 37113abd6f3SGreg Roach 'TTZ' => ['0', '4', '4', '4'], 37213abd6f3SGreg Roach 'TZ' => ['0', '4', '4', '4'], 37313abd6f3SGreg Roach 'TZS' => ['0', '4', '4', '4'], 37413abd6f3SGreg Roach 'U' => ['1', '0', '', ''], 37513abd6f3SGreg Roach 'Ù' => ['1', '0', '', ''], 37613abd6f3SGreg Roach 'Ú' => ['1', '0', '', ''], 37713abd6f3SGreg Roach 'Û' => ['1', '0', '', ''], 37813abd6f3SGreg Roach 'Ü' => ['1', '0', '', ''], 37913abd6f3SGreg Roach 'Ũ' => ['1', '0', '', ''], 38013abd6f3SGreg Roach 'Ū' => ['1', '0', '', ''], 38113abd6f3SGreg Roach 'Ů' => ['1', '0', '', ''], 38213abd6f3SGreg Roach 'Ű' => ['1', '0', '', ''], 38313abd6f3SGreg Roach 'Ų' => ['1', '0', '', ''], 38413abd6f3SGreg Roach 'Ư' => ['1', '0', '', ''], 38513abd6f3SGreg Roach 'Ụ' => ['1', '0', '', ''], 38613abd6f3SGreg Roach 'Ủ' => ['1', '0', '', ''], 38713abd6f3SGreg Roach 'Ứ' => ['1', '0', '', ''], 38813abd6f3SGreg Roach 'Ừ' => ['1', '0', '', ''], 38913abd6f3SGreg Roach 'Ử' => ['1', '0', '', ''], 39013abd6f3SGreg Roach 'Ữ' => ['1', '0', '', ''], 39113abd6f3SGreg Roach 'Ự' => ['1', '0', '', ''], 39213abd6f3SGreg Roach 'UE' => ['1', '0', '', ''], 39313abd6f3SGreg Roach 'UI' => ['1', '0', '1', ''], 39413abd6f3SGreg Roach 'UJ' => ['1', '0', '1', ''], 39513abd6f3SGreg Roach 'UY' => ['1', '0', '1', ''], 39613abd6f3SGreg Roach 'UW' => ['1', '0', '1', '', '0', '7', '7'], 39713abd6f3SGreg Roach 'V' => ['0', '7', '7', '7'], 39813abd6f3SGreg Roach 'W' => ['0', '7', '7', '7'], 39913abd6f3SGreg Roach 'X' => ['0', '5', '54', '54'], 40013abd6f3SGreg Roach 'Y' => ['1', '1', '', ''], 40113abd6f3SGreg Roach 'Ý' => ['1', '1', '', ''], 40213abd6f3SGreg Roach 'Ỳ' => ['1', '1', '', ''], 40313abd6f3SGreg Roach 'Ỵ' => ['1', '1', '', ''], 40413abd6f3SGreg Roach 'Ỷ' => ['1', '1', '', ''], 40513abd6f3SGreg Roach 'Ỹ' => ['1', '1', '', ''], 40613abd6f3SGreg Roach 'Z' => ['0', '4', '4', '4'], 40713abd6f3SGreg Roach 'Ź' => ['0', '4', '4', '4'], 40813abd6f3SGreg Roach 'Ż' => ['0', '4', '4', '4'], 40913abd6f3SGreg Roach 'Ž' => ['0', '4', '4', '4'], 41013abd6f3SGreg Roach 'ZD' => ['0', '2', '43', '43'], 41113abd6f3SGreg Roach 'ZDZ' => ['0', '2', '4', '4'], 41213abd6f3SGreg Roach 'ZDZH' => ['0', '2', '4', '4'], 41313abd6f3SGreg Roach 'ZH' => ['0', '4', '4', '4'], 41413abd6f3SGreg Roach 'ZHD' => ['0', '2', '43', '43'], 41513abd6f3SGreg Roach 'ZHDZH' => ['0', '2', '4', '4'], 41613abd6f3SGreg Roach 'ZS' => ['0', '4', '4', '4'], 41713abd6f3SGreg Roach 'ZSCH' => ['0', '4', '4', '4'], 41813abd6f3SGreg Roach 'ZSH' => ['0', '4', '4', '4'], 41913abd6f3SGreg Roach 'ZZS' => ['0', '4', '4', '4'], 420a25f0a04SGreg Roach // Cyrillic alphabet 42113abd6f3SGreg Roach 'А' => ['1', '0', '', ''], 42213abd6f3SGreg Roach 'Б' => ['0', '7', '7', '7'], 42313abd6f3SGreg Roach 'В' => ['0', '7', '7', '7'], 42413abd6f3SGreg Roach 'Г' => ['0', '5', '5', '5'], 42513abd6f3SGreg Roach 'Д' => ['0', '3', '3', '3'], 42613abd6f3SGreg Roach 'ДЗ' => ['0', '4', '4', '4'], 42713abd6f3SGreg Roach 'Е' => ['1', '0', '', ''], 42813abd6f3SGreg Roach 'Ё' => ['1', '0', '', ''], 42913abd6f3SGreg Roach 'Ж' => ['0', '4', '4', '4'], 43013abd6f3SGreg Roach 'З' => ['0', '4', '4', '4'], 43113abd6f3SGreg Roach 'И' => ['1', '0', '', ''], 43213abd6f3SGreg Roach 'Й' => ['1', '1', '', '', '4', '4', '4'], 43313abd6f3SGreg Roach 'К' => ['0', '5', '5', '5'], 43413abd6f3SGreg Roach 'Л' => ['0', '8', '8', '8'], 43513abd6f3SGreg Roach 'М' => ['0', '6', '6', '6'], 43613abd6f3SGreg Roach 'Н' => ['0', '6', '6', '6'], 43713abd6f3SGreg Roach 'О' => ['1', '0', '', ''], 43813abd6f3SGreg Roach 'П' => ['0', '7', '7', '7'], 43913abd6f3SGreg Roach 'Р' => ['0', '9', '9', '9'], 44013abd6f3SGreg Roach 'РЖ' => ['0', '4', '4', '4'], 44113abd6f3SGreg Roach 'С' => ['0', '4', '4', '4'], 44213abd6f3SGreg Roach 'Т' => ['0', '3', '3', '3'], 44313abd6f3SGreg Roach 'У' => ['1', '0', '', ''], 44413abd6f3SGreg Roach 'Ф' => ['0', '7', '7', '7'], 44513abd6f3SGreg Roach 'Х' => ['0', '5', '5', '5'], 44613abd6f3SGreg Roach 'Ц' => ['0', '4', '4', '4'], 44713abd6f3SGreg Roach 'Ч' => ['0', '4', '4', '4'], 44813abd6f3SGreg Roach 'Ш' => ['0', '4', '4', '4'], 44913abd6f3SGreg Roach 'Щ' => ['0', '2', '4', '4'], 45013abd6f3SGreg Roach 'Ъ' => ['0', '', '', ''], 45113abd6f3SGreg Roach 'Ы' => ['0', '1', '', ''], 45213abd6f3SGreg Roach 'Ь' => ['0', '', '', ''], 45313abd6f3SGreg Roach 'Э' => ['1', '0', '', ''], 45413abd6f3SGreg Roach 'Ю' => ['0', '1', '', ''], 45513abd6f3SGreg Roach 'Я' => ['0', '1', '', ''], 456a25f0a04SGreg Roach // Greek alphabet 45713abd6f3SGreg Roach 'Α' => ['1', '0', '', ''], 45813abd6f3SGreg Roach 'Ά' => ['1', '0', '', ''], 45913abd6f3SGreg Roach 'ΑΙ' => ['1', '0', '1', ''], 46013abd6f3SGreg Roach 'ΑΥ' => ['1', '0', '1', ''], 46113abd6f3SGreg Roach 'Β' => ['0', '7', '7', '7'], 46213abd6f3SGreg Roach 'Γ' => ['0', '5', '5', '5'], 46313abd6f3SGreg Roach 'Δ' => ['0', '3', '3', '3'], 46413abd6f3SGreg Roach 'Ε' => ['1', '0', '', ''], 46513abd6f3SGreg Roach 'Έ' => ['1', '0', '', ''], 46613abd6f3SGreg Roach 'ΕΙ' => ['1', '0', '1', ''], 46713abd6f3SGreg Roach 'ΕΥ' => ['1', '1', '1', ''], 46813abd6f3SGreg Roach 'Ζ' => ['0', '4', '4', '4'], 46913abd6f3SGreg Roach 'Η' => ['1', '0', '', ''], 47013abd6f3SGreg Roach 'Ή' => ['1', '0', '', ''], 47113abd6f3SGreg Roach 'Θ' => ['0', '3', '3', '3'], 47213abd6f3SGreg Roach 'Ι' => ['1', '0', '', ''], 47313abd6f3SGreg Roach 'Ί' => ['1', '0', '', ''], 47413abd6f3SGreg Roach 'Ϊ' => ['1', '0', '', ''], 47513abd6f3SGreg Roach 'ΐ' => ['1', '0', '', ''], 47613abd6f3SGreg Roach 'Κ' => ['0', '5', '5', '5'], 47713abd6f3SGreg Roach 'Λ' => ['0', '8', '8', '8'], 47813abd6f3SGreg Roach 'Μ' => ['0', '6', '6', '6'], 47913abd6f3SGreg Roach 'ΜΠ' => ['0', '7', '7', '7'], 48013abd6f3SGreg Roach 'Ν' => ['0', '6', '6', '6'], 48113abd6f3SGreg Roach 'ΝΤ' => ['0', '3', '3', '3'], 48213abd6f3SGreg Roach 'Ξ' => ['0', '5', '54', '54'], 48313abd6f3SGreg Roach 'Ο' => ['1', '0', '', ''], 48413abd6f3SGreg Roach 'Ό' => ['1', '0', '', ''], 48513abd6f3SGreg Roach 'ΟΙ' => ['1', '0', '1', ''], 48613abd6f3SGreg Roach 'ΟΥ' => ['1', '0', '1', ''], 48713abd6f3SGreg Roach 'Π' => ['0', '7', '7', '7'], 48813abd6f3SGreg Roach 'Ρ' => ['0', '9', '9', '9'], 48913abd6f3SGreg Roach 'Σ' => ['0', '4', '4', '4'], 49013abd6f3SGreg Roach 'ς' => ['0', '', '', '4'], 49113abd6f3SGreg Roach 'Τ' => ['0', '3', '3', '3'], 49213abd6f3SGreg Roach 'ΤΖ' => ['0', '4', '4', '4'], 49313abd6f3SGreg Roach 'ΤΣ' => ['0', '4', '4', '4'], 49413abd6f3SGreg Roach 'Υ' => ['1', '1', '', ''], 49513abd6f3SGreg Roach 'Ύ' => ['1', '1', '', ''], 49613abd6f3SGreg Roach 'Ϋ' => ['1', '1', '', ''], 49713abd6f3SGreg Roach 'ΰ' => ['1', '1', '', ''], 49813abd6f3SGreg Roach 'ΥΚ' => ['1', '5', '5', '5'], 49913abd6f3SGreg Roach 'ΥΥ' => ['1', '65', '65', '65'], 50013abd6f3SGreg Roach 'Φ' => ['0', '7', '7', '7'], 50113abd6f3SGreg Roach 'Χ' => ['0', '5', '5', '5'], 50213abd6f3SGreg Roach 'Ψ' => ['0', '7', '7', '7'], 50313abd6f3SGreg Roach 'Ω' => ['1', '0', '', ''], 50413abd6f3SGreg Roach 'Ώ' => ['1', '0', '', ''], 505a25f0a04SGreg Roach // Hebrew alphabet 50613abd6f3SGreg Roach 'א' => ['1', '0', '', ''], 50713abd6f3SGreg Roach 'או' => ['1', '0', '7', ''], 50813abd6f3SGreg Roach 'אג' => ['1', '4', '4', '4', '5', '5', '5', '34', '34', '34'], 50913abd6f3SGreg Roach 'בב' => ['0', '7', '7', '7', '77', '77', '77'], 51013abd6f3SGreg Roach 'ב' => ['0', '7', '7', '7'], 51113abd6f3SGreg Roach 'גג' => ['0', '4', '4', '4', '5', '5', '5', '45', '45', '45', '55', '55', '55', '54', '54', '54'], 51213abd6f3SGreg Roach 'גד' => ['0', '43', '43', '43', '53', '53', '53'], 51313abd6f3SGreg Roach 'גה' => ['0', '45', '45', '45', '55', '55', '55'], 51413abd6f3SGreg Roach 'גז' => ['0', '44', '44', '44', '45', '45', '45'], 51513abd6f3SGreg Roach 'גח' => ['0', '45', '45', '45', '55', '55', '55'], 51613abd6f3SGreg Roach 'גכ' => ['0', '45', '45', '45', '55', '55', '55'], 51713abd6f3SGreg Roach 'גך' => ['0', '45', '45', '45', '55', '55', '55'], 51813abd6f3SGreg Roach 'גצ' => ['0', '44', '44', '44', '45', '45', '45'], 51913abd6f3SGreg Roach 'גץ' => ['0', '44', '44', '44', '45', '45', '45'], 52013abd6f3SGreg Roach 'גק' => ['0', '45', '45', '45', '54', '54', '54'], 52113abd6f3SGreg Roach 'גש' => ['0', '44', '44', '44', '54', '54', '54'], 52213abd6f3SGreg Roach 'גת' => ['0', '43', '43', '43', '53', '53', '53'], 52313abd6f3SGreg Roach 'ג' => ['0', '4', '4', '4', '5', '5', '5'], 52413abd6f3SGreg Roach 'דז' => ['0', '4', '4', '4'], 52513abd6f3SGreg Roach 'דד' => ['0', '3', '3', '3', '33', '33', '33'], 52613abd6f3SGreg Roach 'דט' => ['0', '33', '33', '33'], 52713abd6f3SGreg Roach 'דש' => ['0', '4', '4', '4'], 52813abd6f3SGreg Roach 'דצ' => ['0', '4', '4', '4'], 52913abd6f3SGreg Roach 'דץ' => ['0', '4', '4', '4'], 53013abd6f3SGreg Roach 'ד' => ['0', '3', '3', '3'], 53113abd6f3SGreg Roach 'הג' => ['0', '54', '54', '54', '55', '55', '55'], 53213abd6f3SGreg Roach 'הכ' => ['0', '55', '55', '55'], 53313abd6f3SGreg Roach 'הח' => ['0', '55', '55', '55'], 53413abd6f3SGreg Roach 'הק' => ['0', '55', '55', '55', '5', '5', '5'], 53513abd6f3SGreg Roach 'הה' => ['0', '5', '5', '', '55', '55', ''], 53613abd6f3SGreg Roach 'ה' => ['0', '5', '5', ''], 53713abd6f3SGreg Roach 'וי' => ['1', '', '', '', '7', '7', '7'], 53813abd6f3SGreg Roach 'ו' => ['1', '7', '7', '7', '7', '', ''], 53913abd6f3SGreg Roach 'וו' => ['1', '7', '7', '7', '7', '', ''], 54013abd6f3SGreg Roach 'וופ' => ['1', '7', '7', '7', '77', '77', '77'], 54113abd6f3SGreg Roach 'זש' => ['0', '4', '4', '4', '44', '44', '44'], 54213abd6f3SGreg Roach 'זדז' => ['0', '2', '4', '4'], 54313abd6f3SGreg Roach 'ז' => ['0', '4', '4', '4'], 54413abd6f3SGreg Roach 'זג' => ['0', '44', '44', '44', '45', '45', '45'], 54513abd6f3SGreg Roach 'זז' => ['0', '4', '4', '4', '44', '44', '44'], 54613abd6f3SGreg Roach 'זס' => ['0', '44', '44', '44'], 54713abd6f3SGreg Roach 'זצ' => ['0', '44', '44', '44'], 54813abd6f3SGreg Roach 'זץ' => ['0', '44', '44', '44'], 54913abd6f3SGreg Roach 'חג' => ['0', '54', '54', '54', '53', '53', '53'], 55013abd6f3SGreg Roach 'חח' => ['0', '5', '5', '5', '55', '55', '55'], 55113abd6f3SGreg Roach 'חק' => ['0', '55', '55', '55', '5', '5', '5'], 55213abd6f3SGreg Roach 'חכ' => ['0', '45', '45', '45', '55', '55', '55'], 55313abd6f3SGreg Roach 'חס' => ['0', '5', '54', '54'], 55413abd6f3SGreg Roach 'חש' => ['0', '5', '54', '54'], 55513abd6f3SGreg Roach 'ח' => ['0', '5', '5', '5'], 55613abd6f3SGreg Roach 'טש' => ['0', '4', '4', '4'], 55713abd6f3SGreg Roach 'טד' => ['0', '33', '33', '33'], 55813abd6f3SGreg Roach 'טי' => ['0', '3', '3', '3', '4', '4', '4', '3', '3', '34'], 55913abd6f3SGreg Roach 'טת' => ['0', '33', '33', '33'], 56013abd6f3SGreg Roach 'טט' => ['0', '3', '3', '3', '33', '33', '33'], 56113abd6f3SGreg Roach 'ט' => ['0', '3', '3', '3'], 56213abd6f3SGreg Roach 'י' => ['1', '1', '', ''], 56313abd6f3SGreg Roach 'יא' => ['1', '1', '', '', '1', '1', '1'], 56413abd6f3SGreg Roach 'כג' => ['0', '55', '55', '55', '54', '54', '54'], 56513abd6f3SGreg Roach 'כש' => ['0', '5', '54', '54'], 56613abd6f3SGreg Roach 'כס' => ['0', '5', '54', '54'], 56713abd6f3SGreg Roach 'ככ' => ['0', '5', '5', '5', '55', '55', '55'], 56813abd6f3SGreg Roach 'כך' => ['0', '5', '5', '5', '55', '55', '55'], 56913abd6f3SGreg Roach 'כ' => ['0', '5', '5', '5'], 57013abd6f3SGreg Roach 'כח' => ['0', '55', '55', '55', '5', '5', '5'], 57113abd6f3SGreg Roach 'ך' => ['0', '', '5', '5'], 57213abd6f3SGreg Roach 'ל' => ['0', '8', '8', '8'], 57313abd6f3SGreg Roach 'לל' => ['0', '88', '88', '88', '8', '8', '8'], 57413abd6f3SGreg Roach 'מנ' => ['0', '66', '66', '66'], 57513abd6f3SGreg Roach 'מן' => ['0', '66', '66', '66'], 57613abd6f3SGreg Roach 'ממ' => ['0', '6', '6', '6', '66', '66', '66'], 57713abd6f3SGreg Roach 'מם' => ['0', '6', '6', '6', '66', '66', '66'], 57813abd6f3SGreg Roach 'מ' => ['0', '6', '6', '6'], 57913abd6f3SGreg Roach 'ם' => ['0', '', '6', '6'], 58013abd6f3SGreg Roach 'נמ' => ['0', '66', '66', '66'], 58113abd6f3SGreg Roach 'נם' => ['0', '66', '66', '66'], 58213abd6f3SGreg Roach 'ננ' => ['0', '6', '6', '6', '66', '66', '66'], 58313abd6f3SGreg Roach 'נן' => ['0', '6', '6', '6', '66', '66', '66'], 58413abd6f3SGreg Roach 'נ' => ['0', '6', '6', '6'], 58513abd6f3SGreg Roach 'ן' => ['0', '', '6', '6'], 58613abd6f3SGreg Roach 'סתש' => ['0', '2', '4', '4'], 58713abd6f3SGreg Roach 'סתז' => ['0', '2', '4', '4'], 58813abd6f3SGreg Roach 'סטז' => ['0', '2', '4', '4'], 58913abd6f3SGreg Roach 'סטש' => ['0', '2', '4', '4'], 59013abd6f3SGreg Roach 'סצד' => ['0', '2', '4', '4'], 59113abd6f3SGreg Roach 'סט' => ['0', '2', '4', '4', '43', '43', '43'], 59213abd6f3SGreg Roach 'סת' => ['0', '2', '4', '4', '43', '43', '43'], 59313abd6f3SGreg Roach 'סג' => ['0', '44', '44', '44', '4', '4', '4'], 59413abd6f3SGreg Roach 'סס' => ['0', '4', '4', '4', '44', '44', '44'], 59513abd6f3SGreg Roach 'סצ' => ['0', '44', '44', '44'], 59613abd6f3SGreg Roach 'סץ' => ['0', '44', '44', '44'], 59713abd6f3SGreg Roach 'סז' => ['0', '44', '44', '44'], 59813abd6f3SGreg Roach 'סש' => ['0', '44', '44', '44'], 59913abd6f3SGreg Roach 'ס' => ['0', '4', '4', '4'], 60013abd6f3SGreg Roach 'ע' => ['1', '0', '', ''], 60113abd6f3SGreg Roach 'פב' => ['0', '7', '7', '7', '77', '77', '77'], 60213abd6f3SGreg Roach 'פוו' => ['0', '7', '7', '7', '77', '77', '77'], 60313abd6f3SGreg Roach 'פפ' => ['0', '7', '7', '7', '77', '77', '77'], 60413abd6f3SGreg Roach 'פף' => ['0', '7', '7', '7', '77', '77', '77'], 60513abd6f3SGreg Roach 'פ' => ['0', '7', '7', '7'], 60613abd6f3SGreg Roach 'ף' => ['0', '', '7', '7'], 60713abd6f3SGreg Roach 'צג' => ['0', '44', '44', '44', '45', '45', '45'], 60813abd6f3SGreg Roach 'צז' => ['0', '44', '44', '44'], 60913abd6f3SGreg Roach 'צס' => ['0', '44', '44', '44'], 61013abd6f3SGreg Roach 'צצ' => ['0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54', '45', '45', '45'], 61113abd6f3SGreg Roach 'צץ' => ['0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54'], 61213abd6f3SGreg Roach 'צש' => ['0', '44', '44', '44', '4', '4', '4', '5', '5', '5'], 61313abd6f3SGreg Roach 'צ' => ['0', '4', '4', '4', '5', '5', '5'], 61413abd6f3SGreg Roach 'ץ' => ['0', '', '4', '4'], 61513abd6f3SGreg Roach 'קה' => ['0', '55', '55', '5'], 61613abd6f3SGreg Roach 'קס' => ['0', '5', '54', '54'], 61713abd6f3SGreg Roach 'קש' => ['0', '5', '54', '54'], 61813abd6f3SGreg Roach 'קק' => ['0', '5', '5', '5', '55', '55', '55'], 61913abd6f3SGreg Roach 'קח' => ['0', '55', '55', '55'], 62013abd6f3SGreg Roach 'קכ' => ['0', '55', '55', '55'], 62113abd6f3SGreg Roach 'קך' => ['0', '55', '55', '55'], 62213abd6f3SGreg Roach 'קג' => ['0', '55', '55', '55', '54', '54', '54'], 62313abd6f3SGreg Roach 'ק' => ['0', '5', '5', '5'], 62413abd6f3SGreg Roach 'רר' => ['0', '99', '99', '99', '9', '9', '9'], 62513abd6f3SGreg Roach 'ר' => ['0', '9', '9', '9'], 62613abd6f3SGreg Roach 'שטז' => ['0', '2', '4', '4'], 62713abd6f3SGreg Roach 'שתש' => ['0', '2', '4', '4'], 62813abd6f3SGreg Roach 'שתז' => ['0', '2', '4', '4'], 62913abd6f3SGreg Roach 'שטש' => ['0', '2', '4', '4'], 63013abd6f3SGreg Roach 'שד' => ['0', '2', '43', '43'], 63113abd6f3SGreg Roach 'שז' => ['0', '44', '44', '44'], 63213abd6f3SGreg Roach 'שס' => ['0', '44', '44', '44'], 63313abd6f3SGreg Roach 'שת' => ['0', '2', '43', '43'], 63413abd6f3SGreg Roach 'שג' => ['0', '4', '4', '4', '44', '44', '44', '4', '43', '43'], 63513abd6f3SGreg Roach 'שט' => ['0', '2', '43', '43', '44', '44', '44'], 63613abd6f3SGreg Roach 'שצ' => ['0', '44', '44', '44', '45', '45', '45'], 63713abd6f3SGreg Roach 'שץ' => ['0', '44', '', '44', '45', '', '45'], 63813abd6f3SGreg Roach 'שש' => ['0', '4', '4', '4', '44', '44', '44'], 63913abd6f3SGreg Roach 'ש' => ['0', '4', '4', '4'], 64013abd6f3SGreg Roach 'תג' => ['0', '34', '34', '34'], 64113abd6f3SGreg Roach 'תז' => ['0', '34', '34', '34'], 64213abd6f3SGreg Roach 'תש' => ['0', '4', '4', '4'], 64313abd6f3SGreg Roach 'תת' => ['0', '3', '3', '3', '4', '4', '4', '33', '33', '33', '44', '44', '44', '34', '34', '34', '43', '43', '43'], 64413abd6f3SGreg Roach 'ת' => ['0', '3', '3', '3', '4', '4', '4'], 645a25f0a04SGreg Roach // Arabic alphabet 64613abd6f3SGreg Roach 'ا' => ['1', '0', '', ''], 64713abd6f3SGreg Roach 'ب' => ['0', '7', '7', '7'], 64813abd6f3SGreg Roach 'ت' => ['0', '3', '3', '3'], 64913abd6f3SGreg Roach 'ث' => ['0', '3', '3', '3'], 65013abd6f3SGreg Roach 'ج' => ['0', '4', '4', '4'], 65113abd6f3SGreg Roach 'ح' => ['0', '5', '5', '5'], 65213abd6f3SGreg Roach 'خ' => ['0', '5', '5', '5'], 65313abd6f3SGreg Roach 'د' => ['0', '3', '3', '3'], 65413abd6f3SGreg Roach 'ذ' => ['0', '3', '3', '3'], 65513abd6f3SGreg Roach 'ر' => ['0', '9', '9', '9'], 65613abd6f3SGreg Roach 'ز' => ['0', '4', '4', '4'], 65713abd6f3SGreg Roach 'س' => ['0', '4', '4', '4'], 65813abd6f3SGreg Roach 'ش' => ['0', '4', '4', '4'], 65913abd6f3SGreg Roach 'ص' => ['0', '4', '4', '4'], 66013abd6f3SGreg Roach 'ض' => ['0', '3', '3', '3'], 66113abd6f3SGreg Roach 'ط' => ['0', '3', '3', '3'], 66213abd6f3SGreg Roach 'ظ' => ['0', '4', '4', '4'], 66313abd6f3SGreg Roach 'ع' => ['1', '0', '', ''], 66413abd6f3SGreg Roach 'غ' => ['0', '0', '', ''], 66513abd6f3SGreg Roach 'ف' => ['0', '7', '7', '7'], 66613abd6f3SGreg Roach 'ق' => ['0', '5', '5', '5'], 66713abd6f3SGreg Roach 'ك' => ['0', '5', '5', '5'], 66813abd6f3SGreg Roach 'ل' => ['0', '8', '8', '8'], 66913abd6f3SGreg Roach 'لا' => ['0', '8', '8', '8'], 67013abd6f3SGreg Roach 'م' => ['0', '6', '6', '6'], 67113abd6f3SGreg Roach 'ن' => ['0', '6', '6', '6'], 67213abd6f3SGreg Roach 'هن' => ['0', '66', '66', '66'], 67313abd6f3SGreg Roach 'ه' => ['0', '5', '5', ''], 67413abd6f3SGreg Roach 'و' => ['1', '', '', '', '7', '', ''], 67513abd6f3SGreg Roach 'ي' => ['0', '1', '', ''], 67613abd6f3SGreg Roach 'آ' => ['0', '1', '', ''], 67713abd6f3SGreg Roach 'ة' => ['0', '', '', '3'], 67813abd6f3SGreg Roach 'ی' => ['0', '1', '', ''], 67913abd6f3SGreg Roach 'ى' => ['1', '1', '', ''], 68013abd6f3SGreg Roach ]; 681a25f0a04SGreg Roach 682a25f0a04SGreg Roach /** 68376692c8bSGreg Roach * Calculate the Daitch-Mokotoff soundex for a word. 68476692c8bSGreg Roach * 685a25f0a04SGreg Roach * @param string $name 686a25f0a04SGreg Roach * 687a25f0a04SGreg Roach * @return string[] List of possible DM codes for the word. 688a25f0a04SGreg Roach */ 689a25f0a04SGreg Roach private static function daitchMokotoffWord($name) { 690a25f0a04SGreg Roach // Apply special transformation rules to the input string 691a25f0a04SGreg Roach $name = I18N::strtoupper($name); 692a25f0a04SGreg Roach foreach (self::$transformNameTable as $transformRule) { 693a25f0a04SGreg Roach $name = str_replace($transformRule[0], $transformRule[1], $name); 694a25f0a04SGreg Roach } 695a25f0a04SGreg Roach 696a25f0a04SGreg Roach // Initialize 697a25f0a04SGreg Roach $name_script = I18N::textScript($name); 698a25f0a04SGreg Roach $noVowels = ($name_script == 'Hebr' || $name_script == 'Arab'); 699a25f0a04SGreg Roach 700a25f0a04SGreg Roach $lastPos = strlen($name) - 1; 701a25f0a04SGreg Roach $currPos = 0; 702a25f0a04SGreg Roach $state = 1; // 1: start of input string, 2: before vowel, 3: other 70313abd6f3SGreg Roach $result = []; // accumulate complete 6-digit D-M codes here 70413abd6f3SGreg Roach $partialResult = []; // accumulate incomplete D-M codes here 70513abd6f3SGreg Roach $partialResult[] = ['!']; // initialize 1st partial result ('!' stops "duplicate sound" check) 706a25f0a04SGreg Roach 707a25f0a04SGreg Roach // Loop through the input string. 708a25f0a04SGreg Roach // Stop when the string is exhausted or when no more partial results remain 709a25f0a04SGreg Roach while (count($partialResult) !== 0 && $currPos <= $lastPos) { 710a25f0a04SGreg Roach // Find the DM coding table entry for the chunk at the current position 711a25f0a04SGreg Roach $thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 712a25f0a04SGreg Roach while ($thisEntry != '') { 713a25f0a04SGreg Roach if (isset(self::$dmsounds[$thisEntry])) { 714a25f0a04SGreg Roach break; 715a25f0a04SGreg Roach } 716a25f0a04SGreg Roach $thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk 717a25f0a04SGreg Roach } 718a25f0a04SGreg Roach if ($thisEntry === '') { 719a25f0a04SGreg Roach $currPos++; // Not in table: advance pointer to next byte 720a25f0a04SGreg Roach continue; // and try again 721a25f0a04SGreg Roach } 722a25f0a04SGreg Roach 723a25f0a04SGreg Roach $soundTableEntry = self::$dmsounds[$thisEntry]; 724a25f0a04SGreg Roach $workingResult = $partialResult; 72513abd6f3SGreg Roach $partialResult = []; 726a25f0a04SGreg Roach $currPos += strlen($thisEntry); 727a25f0a04SGreg Roach 728a25f0a04SGreg Roach // Not at beginning of input string 729a25f0a04SGreg Roach if ($state != 1) { 730a25f0a04SGreg Roach if ($currPos <= $lastPos) { 731a25f0a04SGreg Roach // Determine whether the next chunk is a vowel 732a25f0a04SGreg Roach $nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 733a25f0a04SGreg Roach while ($nextEntry != '') { 734a25f0a04SGreg Roach if (isset(self::$dmsounds[$nextEntry])) { 735a25f0a04SGreg Roach break; 736a25f0a04SGreg Roach } 737a25f0a04SGreg Roach $nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk 738a25f0a04SGreg Roach } 739a25f0a04SGreg Roach } else { 740a25f0a04SGreg Roach $nextEntry = ''; 741a25f0a04SGreg Roach } 742a25f0a04SGreg Roach if ($nextEntry != '' && self::$dmsounds[$nextEntry][0] != '0') { 743a25f0a04SGreg Roach $state = 2; 744a25f0a04SGreg Roach } else { 745a25f0a04SGreg Roach // Next chunk is a vowel 746a25f0a04SGreg Roach $state = 3; 747a25f0a04SGreg Roach } 748a25f0a04SGreg Roach } 749a25f0a04SGreg Roach 750a25f0a04SGreg Roach while ($state < count($soundTableEntry)) { 751a25f0a04SGreg Roach // empty means 'ignore this sound in this state' 752a25f0a04SGreg Roach if ($soundTableEntry[$state] == '') { 753a25f0a04SGreg Roach foreach ($workingResult as $workingEntry) { 754a25f0a04SGreg Roach $tempEntry = $workingEntry; 755a25f0a04SGreg Roach $tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles' 756a25f0a04SGreg Roach $partialResult[] = $tempEntry; 757a25f0a04SGreg Roach } 758a25f0a04SGreg Roach } else { 759a25f0a04SGreg Roach foreach ($workingResult as $workingEntry) { 760a25f0a04SGreg Roach if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) { 761a25f0a04SGreg Roach // Incoming sound isn't a duplicate of the previous sound 762a25f0a04SGreg Roach $workingEntry[] = $soundTableEntry[$state]; 763a25f0a04SGreg Roach } else { 764a25f0a04SGreg Roach // Incoming sound is a duplicate of the previous sound 765a25f0a04SGreg Roach // For Hebrew and Arabic, we need to create a pair of D-M sound codes, 766a25f0a04SGreg Roach // one of the pair with only a single occurrence of the duplicate sound, 767a25f0a04SGreg Roach // the other with both occurrences 768a25f0a04SGreg Roach if ($noVowels) { 769a25f0a04SGreg Roach $workingEntry[] = $soundTableEntry[$state]; 770a25f0a04SGreg Roach } 771a25f0a04SGreg Roach } 772a25f0a04SGreg Roach if (count($workingEntry) < 7) { 773a25f0a04SGreg Roach $partialResult[] = $workingEntry; 774a25f0a04SGreg Roach } else { 775a25f0a04SGreg Roach // This is the 6th code in the sequence 776a25f0a04SGreg Roach // We're looking for 7 entries because the first is '!' and doesn't count 777a25f0a04SGreg Roach $tempResult = str_replace('!', '', implode('', $workingEntry)); 778a25f0a04SGreg Roach // Only return codes from recognisable sounds 779a25f0a04SGreg Roach if ($tempResult) { 780a25f0a04SGreg Roach $result[] = substr($tempResult . '000000', 0, 6); 781a25f0a04SGreg Roach } 782a25f0a04SGreg Roach } 783a25f0a04SGreg Roach } 784a25f0a04SGreg Roach } 785a25f0a04SGreg Roach $state = $state + 3; // Advance to next triplet while keeping the same basic state 786a25f0a04SGreg Roach } 787a25f0a04SGreg Roach } 788a25f0a04SGreg Roach 789a25f0a04SGreg Roach // Zero-fill and copy all remaining partial results 790a25f0a04SGreg Roach foreach ($partialResult as $workingEntry) { 791a25f0a04SGreg Roach $tempResult = str_replace('!', '', implode('', $workingEntry)); 792a25f0a04SGreg Roach // Only return codes from recognisable sounds 793a25f0a04SGreg Roach if ($tempResult) { 794a25f0a04SGreg Roach $result[] = substr($tempResult . '000000', 0, 6); 795a25f0a04SGreg Roach } 796a25f0a04SGreg Roach } 797a25f0a04SGreg Roach 798a25f0a04SGreg Roach return $result; 799a25f0a04SGreg Roach } 800a25f0a04SGreg Roach} 801