1a25f0a04SGreg Roach<?php 23976b470SGreg Roach 3a25f0a04SGreg Roach/** 4a25f0a04SGreg Roach * webtrees: online genealogy 55bfc6897SGreg Roach * Copyright (C) 2022 webtrees development team 6a25f0a04SGreg Roach * This program is free software: you can redistribute it and/or modify 7a25f0a04SGreg Roach * it under the terms of the GNU General Public License as published by 8a25f0a04SGreg Roach * the Free Software Foundation, either version 3 of the License, or 9a25f0a04SGreg Roach * (at your option) any later version. 10a25f0a04SGreg Roach * This program is distributed in the hope that it will be useful, 11a25f0a04SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of 12a25f0a04SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13a25f0a04SGreg Roach * GNU General Public License for more details. 14a25f0a04SGreg Roach * You should have received a copy of the GNU General Public License 1589f7189bSGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>. 16a25f0a04SGreg Roach */ 17fcfa147eSGreg Roach 18e7f56f2aSGreg Roachdeclare(strict_types=1); 19e7f56f2aSGreg Roach 2076692c8bSGreg Roachnamespace Fisharebest\Webtrees; 21a25f0a04SGreg Roach 2210e06497SGreg Roachuse function array_slice; 2310e06497SGreg Roachuse function count; 2410e06497SGreg Roachuse function strlen; 2510e06497SGreg Roach 26a25f0a04SGreg Roach/** 2776692c8bSGreg Roach * Phonetic matching of strings. 28a25f0a04SGreg Roach */ 29c1010edaSGreg Roachclass Soundex 30c1010edaSGreg Roach{ 31a25f0a04SGreg Roach // Determine the Daitch–Mokotoff Soundex code for a word 32a25f0a04SGreg Roach // Original implementation by Gerry Kroll, and analysis by Meliza Amity 33a25f0a04SGreg Roach 34a25f0a04SGreg Roach // Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!) 3516cfb0b9SGreg Roach private const MAXCHAR = 7; 36a25f0a04SGreg Roach 37a25f0a04SGreg Roach /** 38a25f0a04SGreg Roach * Name transformation arrays. 39a25f0a04SGreg Roach * Used to transform the Name string to simplify the "sounds like" table. 40a25f0a04SGreg Roach * This is especially useful in Hebrew. 41a25f0a04SGreg Roach * 42a25f0a04SGreg Roach * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text) 43a25f0a04SGreg Roach * function call to achieve the desired transformations. 44a25f0a04SGreg Roach * 45a25f0a04SGreg Roach * Note about the use of "\x01": 46a25f0a04SGreg Roach * This code, which can’t legitimately occur in the kind of text we're dealing with, 47a25f0a04SGreg Roach * is used as a place-holder so that conditional string replacements can be done. 48a25f0a04SGreg Roach */ 4916cfb0b9SGreg Roach private const TRANSFORM_NAMES = [ 50a25f0a04SGreg Roach // Force Yiddish ligatures to be treated as separate letters 514096896cSGreg Roach ['װ', 'וו'], 524096896cSGreg Roach ['ײ', 'יי'], 534096896cSGreg Roach ['ױ', 'וי'], 544096896cSGreg Roach ['בו', 'בע'], 554096896cSGreg Roach ['פו', 'פע'], 564096896cSGreg Roach ['ומ', 'עמ'], 574096896cSGreg Roach ['ום', 'עם'], 584096896cSGreg Roach ['ונ', 'ענ'], 594096896cSGreg Roach ['ון', 'ען'], 604096896cSGreg Roach ['וו', 'ב'], 614096896cSGreg Roach ["\x01", ''], 624096896cSGreg Roach ['ייה$', "\x01ה"], 634096896cSGreg Roach ['ייע$', "\x01ע"], 644096896cSGreg Roach ['יי', 'ע'], 654096896cSGreg Roach ["\x01", 'יי'], 6613abd6f3SGreg Roach ]; 67a25f0a04SGreg Roach 68a25f0a04SGreg Roach /** 69a25f0a04SGreg Roach * The DM sound coding table is organized this way: 70a25f0a04SGreg Roach * key: a variable-length string that corresponds to the UTF-8 character sequence 71a25f0a04SGreg Roach * represented by the table entry. Currently, that string can be up to 7 72a25f0a04SGreg Roach * bytes long. This maximum length is defined by the value of global variable 73a25f0a04SGreg Roach * $maxchar. 74a25f0a04SGreg Roach * 75a25f0a04SGreg Roach * value: an array as follows: 76a25f0a04SGreg Roach * [0]: zero if not a vowel 77a25f0a04SGreg Roach * [1]: sound value when this string is at the beginning of the word 78a25f0a04SGreg Roach * [2]: sound value when this string is followed by a vowel 79a25f0a04SGreg Roach * [3]: sound value for other cases 80a25f0a04SGreg Roach * [1],[2],[3] can be repeated several times to create branches in the code 81a25f0a04SGreg Roach * an empty sound value means "ignore in this state" 82a25f0a04SGreg Roach */ 8316cfb0b9SGreg Roach private const DM_SOUNDS = [ 844096896cSGreg Roach 'A' => ['1', '0', '', ''], 854096896cSGreg Roach 'À' => ['1', '0', '', ''], 864096896cSGreg Roach 'Á' => ['1', '0', '', ''], 874096896cSGreg Roach 'Â' => ['1', '0', '', ''], 884096896cSGreg Roach 'Ã' => ['1', '0', '', ''], 894096896cSGreg Roach 'Ä' => ['1', '0', '1', '', '0', '', ''], 904096896cSGreg Roach 'Å' => ['1', '0', '', ''], 914096896cSGreg Roach 'Ă' => ['1', '0', '', ''], 924096896cSGreg Roach 'Ą' => ['1', '', '', '', '', '', '6'], 934096896cSGreg Roach 'Ạ' => ['1', '0', '', ''], 944096896cSGreg Roach 'Ả' => ['1', '0', '', ''], 954096896cSGreg Roach 'Ấ' => ['1', '0', '', ''], 964096896cSGreg Roach 'Ầ' => ['1', '0', '', ''], 974096896cSGreg Roach 'Ẩ' => ['1', '0', '', ''], 984096896cSGreg Roach 'Ẫ' => ['1', '0', '', ''], 994096896cSGreg Roach 'Ậ' => ['1', '0', '', ''], 1004096896cSGreg Roach 'Ắ' => ['1', '0', '', ''], 1014096896cSGreg Roach 'Ằ' => ['1', '0', '', ''], 1024096896cSGreg Roach 'Ẳ' => ['1', '0', '', ''], 1034096896cSGreg Roach 'Ẵ' => ['1', '0', '', ''], 1044096896cSGreg Roach 'Ặ' => ['1', '0', '', ''], 1054096896cSGreg Roach 'AE' => ['1', '0', '1', ''], 1064096896cSGreg Roach 'Æ' => ['1', '0', '1', ''], 1074096896cSGreg Roach 'AI' => ['1', '0', '1', ''], 1084096896cSGreg Roach 'AJ' => ['1', '0', '1', ''], 1094096896cSGreg Roach 'AU' => ['1', '0', '7', ''], 1104096896cSGreg Roach 'AV' => ['1', '0', '7', '', '7', '7', '7'], 1114096896cSGreg Roach 'ÄU' => ['1', '0', '1', ''], 1124096896cSGreg Roach 'AY' => ['1', '0', '1', ''], 1134096896cSGreg Roach 'B' => ['0', '7', '7', '7'], 1144096896cSGreg Roach 'C' => ['0', '5', '5', '5', '34', '4', '4'], 1154096896cSGreg Roach 'Ć' => ['0', '4', '4', '4'], 1164096896cSGreg Roach 'Č' => ['0', '4', '4', '4'], 1174096896cSGreg Roach 'Ç' => ['0', '4', '4', '4'], 1184096896cSGreg Roach 'CH' => ['0', '5', '5', '5', '34', '4', '4'], 1194096896cSGreg Roach 'CHS' => ['0', '5', '54', '54'], 1204096896cSGreg Roach 'CK' => ['0', '5', '5', '5', '45', '45', '45'], 1214096896cSGreg Roach 'CCS' => ['0', '4', '4', '4'], 1224096896cSGreg Roach 'CS' => ['0', '4', '4', '4'], 1234096896cSGreg Roach 'CSZ' => ['0', '4', '4', '4'], 1244096896cSGreg Roach 'CZ' => ['0', '4', '4', '4'], 1254096896cSGreg Roach 'CZS' => ['0', '4', '4', '4'], 1264096896cSGreg Roach 'D' => ['0', '3', '3', '3'], 1274096896cSGreg Roach 'Ď' => ['0', '3', '3', '3'], 1284096896cSGreg Roach 'Đ' => ['0', '3', '3', '3'], 1294096896cSGreg Roach 'DRS' => ['0', '4', '4', '4'], 1304096896cSGreg Roach 'DRZ' => ['0', '4', '4', '4'], 1314096896cSGreg Roach 'DS' => ['0', '4', '4', '4'], 1324096896cSGreg Roach 'DSH' => ['0', '4', '4', '4'], 1334096896cSGreg Roach 'DSZ' => ['0', '4', '4', '4'], 1344096896cSGreg Roach 'DT' => ['0', '3', '3', '3'], 1354096896cSGreg Roach 'DDZ' => ['0', '4', '4', '4'], 1364096896cSGreg Roach 'DDZS' => ['0', '4', '4', '4'], 1374096896cSGreg Roach 'DZ' => ['0', '4', '4', '4'], 1384096896cSGreg Roach 'DŹ' => ['0', '4', '4', '4'], 1394096896cSGreg Roach 'DŻ' => ['0', '4', '4', '4'], 1404096896cSGreg Roach 'DZH' => ['0', '4', '4', '4'], 1414096896cSGreg Roach 'DZS' => ['0', '4', '4', '4'], 1424096896cSGreg Roach 'E' => ['1', '0', '', ''], 1434096896cSGreg Roach 'È' => ['1', '0', '', ''], 1444096896cSGreg Roach 'É' => ['1', '0', '', ''], 1454096896cSGreg Roach 'Ê' => ['1', '0', '', ''], 1464096896cSGreg Roach 'Ë' => ['1', '0', '', ''], 1474096896cSGreg Roach 'Ĕ' => ['1', '0', '', ''], 1484096896cSGreg Roach 'Ė' => ['1', '0', '', ''], 1494096896cSGreg Roach 'Ę' => ['1', '', '', '6', '', '', ''], 1504096896cSGreg Roach 'Ẹ' => ['1', '0', '', ''], 1514096896cSGreg Roach 'Ẻ' => ['1', '0', '', ''], 1524096896cSGreg Roach 'Ẽ' => ['1', '0', '', ''], 1534096896cSGreg Roach 'Ế' => ['1', '0', '', ''], 1544096896cSGreg Roach 'Ề' => ['1', '0', '', ''], 1554096896cSGreg Roach 'Ể' => ['1', '0', '', ''], 1564096896cSGreg Roach 'Ễ' => ['1', '0', '', ''], 1574096896cSGreg Roach 'Ệ' => ['1', '0', '', ''], 1584096896cSGreg Roach 'EAU' => ['1', '0', '', ''], 1594096896cSGreg Roach 'EI' => ['1', '0', '1', ''], 1604096896cSGreg Roach 'EJ' => ['1', '0', '1', ''], 1614096896cSGreg Roach 'EU' => ['1', '1', '1', ''], 1624096896cSGreg Roach 'EY' => ['1', '0', '1', ''], 1634096896cSGreg Roach 'F' => ['0', '7', '7', '7'], 1644096896cSGreg Roach 'FB' => ['0', '7', '7', '7'], 1654096896cSGreg Roach 'G' => ['0', '5', '5', '5', '34', '4', '4'], 1664096896cSGreg Roach 'Ğ' => ['0', '', '', ''], 1674096896cSGreg Roach 'GGY' => ['0', '5', '5', '5'], 1684096896cSGreg Roach 'GY' => ['0', '5', '5', '5'], 1694096896cSGreg Roach 'H' => ['0', '5', '5', '', '5', '5', '5'], 1704096896cSGreg Roach 'I' => ['1', '0', '', ''], 1714096896cSGreg Roach 'Ì' => ['1', '0', '', ''], 1724096896cSGreg Roach 'Í' => ['1', '0', '', ''], 1734096896cSGreg Roach 'Î' => ['1', '0', '', ''], 1744096896cSGreg Roach 'Ï' => ['1', '0', '', ''], 1754096896cSGreg Roach 'Ĩ' => ['1', '0', '', ''], 1764096896cSGreg Roach 'Į' => ['1', '0', '', ''], 1774096896cSGreg Roach 'İ' => ['1', '0', '', ''], 1784096896cSGreg Roach 'Ỉ' => ['1', '0', '', ''], 1794096896cSGreg Roach 'Ị' => ['1', '0', '', ''], 1804096896cSGreg Roach 'IA' => ['1', '1', '', ''], 1814096896cSGreg Roach 'IE' => ['1', '1', '', ''], 1824096896cSGreg Roach 'IO' => ['1', '1', '', ''], 1834096896cSGreg Roach 'IU' => ['1', '1', '', ''], 1844096896cSGreg Roach 'J' => ['0', '1', '', '', '4', '4', '4', '5', '5', ''], 1854096896cSGreg Roach 'K' => ['0', '5', '5', '5'], 1864096896cSGreg Roach 'KH' => ['0', '5', '5', '5'], 1874096896cSGreg Roach 'KS' => ['0', '5', '54', '54'], 1884096896cSGreg Roach 'L' => ['0', '8', '8', '8'], 1894096896cSGreg Roach 'Ľ' => ['0', '8', '8', '8'], 1904096896cSGreg Roach 'Ĺ' => ['0', '8', '8', '8'], 1914096896cSGreg Roach 'Ł' => ['0', '7', '7', '7', '8', '8', '8'], 1924096896cSGreg Roach 'LL' => ['0', '8', '8', '8', '58', '8', '8', '1', '8', '8'], 1934096896cSGreg Roach 'LLY' => ['0', '8', '8', '8', '1', '8', '8'], 1944096896cSGreg Roach 'LY' => ['0', '8', '8', '8', '1', '8', '8'], 1954096896cSGreg Roach 'M' => ['0', '6', '6', '6'], 1964096896cSGreg Roach 'MĔ' => ['0', '66', '66', '66'], 1974096896cSGreg Roach 'MN' => ['0', '66', '66', '66'], 1984096896cSGreg Roach 'N' => ['0', '6', '6', '6'], 1994096896cSGreg Roach 'Ń' => ['0', '6', '6', '6'], 2004096896cSGreg Roach 'Ň' => ['0', '6', '6', '6'], 2014096896cSGreg Roach 'Ñ' => ['0', '6', '6', '6'], 2024096896cSGreg Roach 'NM' => ['0', '66', '66', '66'], 2034096896cSGreg Roach 'O' => ['1', '0', '', ''], 2044096896cSGreg Roach 'Ò' => ['1', '0', '', ''], 2054096896cSGreg Roach 'Ó' => ['1', '0', '', ''], 2064096896cSGreg Roach 'Ô' => ['1', '0', '', ''], 2074096896cSGreg Roach 'Õ' => ['1', '0', '', ''], 2084096896cSGreg Roach 'Ö' => ['1', '0', '', ''], 2094096896cSGreg Roach 'Ø' => ['1', '0', '', ''], 2104096896cSGreg Roach 'Ő' => ['1', '0', '', ''], 2114096896cSGreg Roach 'Œ' => ['1', '0', '', ''], 2124096896cSGreg Roach 'Ơ' => ['1', '0', '', ''], 2134096896cSGreg Roach 'Ọ' => ['1', '0', '', ''], 2144096896cSGreg Roach 'Ỏ' => ['1', '0', '', ''], 2154096896cSGreg Roach 'Ố' => ['1', '0', '', ''], 2164096896cSGreg Roach 'Ồ' => ['1', '0', '', ''], 2174096896cSGreg Roach 'Ổ' => ['1', '0', '', ''], 2184096896cSGreg Roach 'Ỗ' => ['1', '0', '', ''], 2194096896cSGreg Roach 'Ộ' => ['1', '0', '', ''], 2204096896cSGreg Roach 'Ớ' => ['1', '0', '', ''], 2214096896cSGreg Roach 'Ờ' => ['1', '0', '', ''], 2224096896cSGreg Roach 'Ở' => ['1', '0', '', ''], 2234096896cSGreg Roach 'Ỡ' => ['1', '0', '', ''], 2244096896cSGreg Roach 'Ợ' => ['1', '0', '', ''], 2254096896cSGreg Roach 'OE' => ['1', '0', '', ''], 2264096896cSGreg Roach 'OI' => ['1', '0', '1', ''], 2274096896cSGreg Roach 'OJ' => ['1', '0', '1', ''], 2284096896cSGreg Roach 'OU' => ['1', '0', '', ''], 2294096896cSGreg Roach 'OY' => ['1', '0', '1', ''], 2304096896cSGreg Roach 'P' => ['0', '7', '7', '7'], 2314096896cSGreg Roach 'PF' => ['0', '7', '7', '7'], 2324096896cSGreg Roach 'PH' => ['0', '7', '7', '7'], 2334096896cSGreg Roach 'Q' => ['0', '5', '5', '5'], 2344096896cSGreg Roach 'R' => ['0', '9', '9', '9'], 2354096896cSGreg Roach 'Ř' => ['0', '4', '4', '4'], 2364096896cSGreg Roach 'RS' => ['0', '4', '4', '4', '94', '94', '94'], 2374096896cSGreg Roach 'RZ' => ['0', '4', '4', '4', '94', '94', '94'], 2384096896cSGreg Roach 'S' => ['0', '4', '4', '4'], 2394096896cSGreg Roach 'Ś' => ['0', '4', '4', '4'], 2404096896cSGreg Roach 'Š' => ['0', '4', '4', '4'], 2414096896cSGreg Roach 'Ş' => ['0', '4', '4', '4'], 2424096896cSGreg Roach 'SC' => ['0', '2', '4', '4'], 2434096896cSGreg Roach 'ŠČ' => ['0', '2', '4', '4'], 2444096896cSGreg Roach 'SCH' => ['0', '4', '4', '4'], 2454096896cSGreg Roach 'SCHD' => ['0', '2', '43', '43'], 2464096896cSGreg Roach 'SCHT' => ['0', '2', '43', '43'], 2474096896cSGreg Roach 'SCHTCH' => ['0', '2', '4', '4'], 2484096896cSGreg Roach 'SCHTSCH' => ['0', '2', '4', '4'], 2494096896cSGreg Roach 'SCHTSH' => ['0', '2', '4', '4'], 2504096896cSGreg Roach 'SD' => ['0', '2', '43', '43'], 2514096896cSGreg Roach 'SH' => ['0', '4', '4', '4'], 2524096896cSGreg Roach 'SHCH' => ['0', '2', '4', '4'], 2534096896cSGreg Roach 'SHD' => ['0', '2', '43', '43'], 2544096896cSGreg Roach 'SHT' => ['0', '2', '43', '43'], 2554096896cSGreg Roach 'SHTCH' => ['0', '2', '4', '4'], 2564096896cSGreg Roach 'SHTSH' => ['0', '2', '4', '4'], 2574096896cSGreg Roach 'ß' => ['0', '', '4', '4'], 2584096896cSGreg Roach 'ST' => ['0', '2', '43', '43'], 2594096896cSGreg Roach 'STCH' => ['0', '2', '4', '4'], 2604096896cSGreg Roach 'STRS' => ['0', '2', '4', '4'], 2614096896cSGreg Roach 'STRZ' => ['0', '2', '4', '4'], 2624096896cSGreg Roach 'STSCH' => ['0', '2', '4', '4'], 2634096896cSGreg Roach 'STSH' => ['0', '2', '4', '4'], 2644096896cSGreg Roach 'SSZ' => ['0', '4', '4', '4'], 2654096896cSGreg Roach 'SZ' => ['0', '4', '4', '4'], 2664096896cSGreg Roach 'SZCS' => ['0', '2', '4', '4'], 2674096896cSGreg Roach 'SZCZ' => ['0', '2', '4', '4'], 2684096896cSGreg Roach 'SZD' => ['0', '2', '43', '43'], 2694096896cSGreg Roach 'SZT' => ['0', '2', '43', '43'], 2704096896cSGreg Roach 'T' => ['0', '3', '3', '3'], 2714096896cSGreg Roach 'Ť' => ['0', '3', '3', '3'], 2724096896cSGreg Roach 'Ţ' => ['0', '3', '3', '3', '4', '4', '4'], 2734096896cSGreg Roach 'TC' => ['0', '4', '4', '4'], 2744096896cSGreg Roach 'TCH' => ['0', '4', '4', '4'], 2754096896cSGreg Roach 'TH' => ['0', '3', '3', '3'], 2764096896cSGreg Roach 'TRS' => ['0', '4', '4', '4'], 2774096896cSGreg Roach 'TRZ' => ['0', '4', '4', '4'], 2784096896cSGreg Roach 'TS' => ['0', '4', '4', '4'], 2794096896cSGreg Roach 'TSCH' => ['0', '4', '4', '4'], 2804096896cSGreg Roach 'TSH' => ['0', '4', '4', '4'], 2814096896cSGreg Roach 'TSZ' => ['0', '4', '4', '4'], 2824096896cSGreg Roach 'TTCH' => ['0', '4', '4', '4'], 2834096896cSGreg Roach 'TTS' => ['0', '4', '4', '4'], 2844096896cSGreg Roach 'TTSCH' => ['0', '4', '4', '4'], 2854096896cSGreg Roach 'TTSZ' => ['0', '4', '4', '4'], 2864096896cSGreg Roach 'TTZ' => ['0', '4', '4', '4'], 2874096896cSGreg Roach 'TZ' => ['0', '4', '4', '4'], 2884096896cSGreg Roach 'TZS' => ['0', '4', '4', '4'], 2894096896cSGreg Roach 'U' => ['1', '0', '', ''], 2904096896cSGreg Roach 'Ù' => ['1', '0', '', ''], 2914096896cSGreg Roach 'Ú' => ['1', '0', '', ''], 2924096896cSGreg Roach 'Û' => ['1', '0', '', ''], 2934096896cSGreg Roach 'Ü' => ['1', '0', '', ''], 2944096896cSGreg Roach 'Ũ' => ['1', '0', '', ''], 2954096896cSGreg Roach 'Ū' => ['1', '0', '', ''], 2964096896cSGreg Roach 'Ů' => ['1', '0', '', ''], 2974096896cSGreg Roach 'Ű' => ['1', '0', '', ''], 2984096896cSGreg Roach 'Ų' => ['1', '0', '', ''], 2994096896cSGreg Roach 'Ư' => ['1', '0', '', ''], 3004096896cSGreg Roach 'Ụ' => ['1', '0', '', ''], 3014096896cSGreg Roach 'Ủ' => ['1', '0', '', ''], 3024096896cSGreg Roach 'Ứ' => ['1', '0', '', ''], 3034096896cSGreg Roach 'Ừ' => ['1', '0', '', ''], 3044096896cSGreg Roach 'Ử' => ['1', '0', '', ''], 3054096896cSGreg Roach 'Ữ' => ['1', '0', '', ''], 3064096896cSGreg Roach 'Ự' => ['1', '0', '', ''], 3074096896cSGreg Roach 'UE' => ['1', '0', '', ''], 3084096896cSGreg Roach 'UI' => ['1', '0', '1', ''], 3094096896cSGreg Roach 'UJ' => ['1', '0', '1', ''], 3104096896cSGreg Roach 'UY' => ['1', '0', '1', ''], 3114096896cSGreg Roach 'UW' => ['1', '0', '1', '', '0', '7', '7'], 3124096896cSGreg Roach 'V' => ['0', '7', '7', '7'], 3134096896cSGreg Roach 'W' => ['0', '7', '7', '7'], 3144096896cSGreg Roach 'X' => ['0', '5', '54', '54'], 3154096896cSGreg Roach 'Y' => ['1', '1', '', ''], 3164096896cSGreg Roach 'Ý' => ['1', '1', '', ''], 3174096896cSGreg Roach 'Ỳ' => ['1', '1', '', ''], 3184096896cSGreg Roach 'Ỵ' => ['1', '1', '', ''], 3194096896cSGreg Roach 'Ỷ' => ['1', '1', '', ''], 3204096896cSGreg Roach 'Ỹ' => ['1', '1', '', ''], 3214096896cSGreg Roach 'Z' => ['0', '4', '4', '4'], 3224096896cSGreg Roach 'Ź' => ['0', '4', '4', '4'], 3234096896cSGreg Roach 'Ż' => ['0', '4', '4', '4'], 3244096896cSGreg Roach 'Ž' => ['0', '4', '4', '4'], 3254096896cSGreg Roach 'ZD' => ['0', '2', '43', '43'], 3264096896cSGreg Roach 'ZDZ' => ['0', '2', '4', '4'], 3274096896cSGreg Roach 'ZDZH' => ['0', '2', '4', '4'], 3284096896cSGreg Roach 'ZH' => ['0', '4', '4', '4'], 3294096896cSGreg Roach 'ZHD' => ['0', '2', '43', '43'], 3304096896cSGreg Roach 'ZHDZH' => ['0', '2', '4', '4'], 3314096896cSGreg Roach 'ZS' => ['0', '4', '4', '4'], 3324096896cSGreg Roach 'ZSCH' => ['0', '4', '4', '4'], 3334096896cSGreg Roach 'ZSH' => ['0', '4', '4', '4'], 3344096896cSGreg Roach 'ZZS' => ['0', '4', '4', '4'], 335a25f0a04SGreg Roach // Cyrillic alphabet 3364096896cSGreg Roach 'А' => ['1', '0', '', ''], 3374096896cSGreg Roach 'Б' => ['0', '7', '7', '7'], 3384096896cSGreg Roach 'В' => ['0', '7', '7', '7'], 3394096896cSGreg Roach 'Г' => ['0', '5', '5', '5'], 3404096896cSGreg Roach 'Д' => ['0', '3', '3', '3'], 3414096896cSGreg Roach 'ДЗ' => ['0', '4', '4', '4'], 3424096896cSGreg Roach 'Е' => ['1', '0', '', ''], 3434096896cSGreg Roach 'Ё' => ['1', '0', '', ''], 3444096896cSGreg Roach 'Ж' => ['0', '4', '4', '4'], 3454096896cSGreg Roach 'З' => ['0', '4', '4', '4'], 3464096896cSGreg Roach 'И' => ['1', '0', '', ''], 3474096896cSGreg Roach 'Й' => ['1', '1', '', '', '4', '4', '4'], 3484096896cSGreg Roach 'К' => ['0', '5', '5', '5'], 3494096896cSGreg Roach 'Л' => ['0', '8', '8', '8'], 3504096896cSGreg Roach 'М' => ['0', '6', '6', '6'], 3514096896cSGreg Roach 'Н' => ['0', '6', '6', '6'], 3524096896cSGreg Roach 'О' => ['1', '0', '', ''], 3534096896cSGreg Roach 'П' => ['0', '7', '7', '7'], 3544096896cSGreg Roach 'Р' => ['0', '9', '9', '9'], 3554096896cSGreg Roach 'РЖ' => ['0', '4', '4', '4'], 3564096896cSGreg Roach 'С' => ['0', '4', '4', '4'], 3574096896cSGreg Roach 'Т' => ['0', '3', '3', '3'], 3584096896cSGreg Roach 'У' => ['1', '0', '', ''], 3594096896cSGreg Roach 'Ф' => ['0', '7', '7', '7'], 3604096896cSGreg Roach 'Х' => ['0', '5', '5', '5'], 3614096896cSGreg Roach 'Ц' => ['0', '4', '4', '4'], 3624096896cSGreg Roach 'Ч' => ['0', '4', '4', '4'], 3634096896cSGreg Roach 'Ш' => ['0', '4', '4', '4'], 3644096896cSGreg Roach 'Щ' => ['0', '2', '4', '4'], 3654096896cSGreg Roach 'Ъ' => ['0', '', '', ''], 3664096896cSGreg Roach 'Ы' => ['0', '1', '', ''], 3674096896cSGreg Roach 'Ь' => ['0', '', '', ''], 3684096896cSGreg Roach 'Э' => ['1', '0', '', ''], 3694096896cSGreg Roach 'Ю' => ['0', '1', '', ''], 3704096896cSGreg Roach 'Я' => ['0', '1', '', ''], 371a25f0a04SGreg Roach // Greek alphabet 3724096896cSGreg Roach 'Α' => ['1', '0', '', ''], 3734096896cSGreg Roach 'Ά' => ['1', '0', '', ''], 3744096896cSGreg Roach 'ΑΙ' => ['1', '0', '1', ''], 3754096896cSGreg Roach 'ΑΥ' => ['1', '0', '1', ''], 3764096896cSGreg Roach 'Β' => ['0', '7', '7', '7'], 3774096896cSGreg Roach 'Γ' => ['0', '5', '5', '5'], 3784096896cSGreg Roach 'Δ' => ['0', '3', '3', '3'], 3794096896cSGreg Roach 'Ε' => ['1', '0', '', ''], 3804096896cSGreg Roach 'Έ' => ['1', '0', '', ''], 3814096896cSGreg Roach 'ΕΙ' => ['1', '0', '1', ''], 3824096896cSGreg Roach 'ΕΥ' => ['1', '1', '1', ''], 3834096896cSGreg Roach 'Ζ' => ['0', '4', '4', '4'], 3844096896cSGreg Roach 'Η' => ['1', '0', '', ''], 3854096896cSGreg Roach 'Ή' => ['1', '0', '', ''], 3864096896cSGreg Roach 'Θ' => ['0', '3', '3', '3'], 3874096896cSGreg Roach 'Ι' => ['1', '0', '', ''], 3884096896cSGreg Roach 'Ί' => ['1', '0', '', ''], 3894096896cSGreg Roach 'Ϊ' => ['1', '0', '', ''], 3904096896cSGreg Roach 'ΐ' => ['1', '0', '', ''], 3914096896cSGreg Roach 'Κ' => ['0', '5', '5', '5'], 3924096896cSGreg Roach 'Λ' => ['0', '8', '8', '8'], 3934096896cSGreg Roach 'Μ' => ['0', '6', '6', '6'], 3944096896cSGreg Roach 'ΜΠ' => ['0', '7', '7', '7'], 3954096896cSGreg Roach 'Ν' => ['0', '6', '6', '6'], 3964096896cSGreg Roach 'ΝΤ' => ['0', '3', '3', '3'], 3974096896cSGreg Roach 'Ξ' => ['0', '5', '54', '54'], 3984096896cSGreg Roach 'Ο' => ['1', '0', '', ''], 3994096896cSGreg Roach 'Ό' => ['1', '0', '', ''], 4004096896cSGreg Roach 'ΟΙ' => ['1', '0', '1', ''], 4014096896cSGreg Roach 'ΟΥ' => ['1', '0', '1', ''], 4024096896cSGreg Roach 'Π' => ['0', '7', '7', '7'], 4034096896cSGreg Roach 'Ρ' => ['0', '9', '9', '9'], 4044096896cSGreg Roach 'Σ' => ['0', '4', '4', '4'], 4054096896cSGreg Roach 'ς' => ['0', '', '', '4'], 4064096896cSGreg Roach 'Τ' => ['0', '3', '3', '3'], 4074096896cSGreg Roach 'ΤΖ' => ['0', '4', '4', '4'], 4084096896cSGreg Roach 'ΤΣ' => ['0', '4', '4', '4'], 4094096896cSGreg Roach 'Υ' => ['1', '1', '', ''], 4104096896cSGreg Roach 'Ύ' => ['1', '1', '', ''], 4114096896cSGreg Roach 'Ϋ' => ['1', '1', '', ''], 4124096896cSGreg Roach 'ΰ' => ['1', '1', '', ''], 4134096896cSGreg Roach 'ΥΚ' => ['1', '5', '5', '5'], 4144096896cSGreg Roach 'ΥΥ' => ['1', '65', '65', '65'], 4154096896cSGreg Roach 'Φ' => ['0', '7', '7', '7'], 4164096896cSGreg Roach 'Χ' => ['0', '5', '5', '5'], 4174096896cSGreg Roach 'Ψ' => ['0', '7', '7', '7'], 4184096896cSGreg Roach 'Ω' => ['1', '0', '', ''], 4194096896cSGreg Roach 'Ώ' => ['1', '0', '', ''], 420a25f0a04SGreg Roach // Hebrew alphabet 4214096896cSGreg Roach 'א' => ['1', '0', '', ''], 4224096896cSGreg Roach 'או' => ['1', '0', '7', ''], 4234096896cSGreg Roach 'אג' => ['1', '4', '4', '4', '5', '5', '5', '34', '34', '34'], 4244096896cSGreg Roach 'בב' => ['0', '7', '7', '7', '77', '77', '77'], 4254096896cSGreg Roach 'ב' => ['0', '7', '7', '7'], 4264096896cSGreg Roach 'גג' => ['0', '4', '4', '4', '5', '5', '5', '45', '45', '45', '55', '55', '55', '54', '54', '54'], 4274096896cSGreg Roach 'גד' => ['0', '43', '43', '43', '53', '53', '53'], 4284096896cSGreg Roach 'גה' => ['0', '45', '45', '45', '55', '55', '55'], 4294096896cSGreg Roach 'גז' => ['0', '44', '44', '44', '45', '45', '45'], 4304096896cSGreg Roach 'גח' => ['0', '45', '45', '45', '55', '55', '55'], 4314096896cSGreg Roach 'גכ' => ['0', '45', '45', '45', '55', '55', '55'], 4324096896cSGreg Roach 'גך' => ['0', '45', '45', '45', '55', '55', '55'], 4334096896cSGreg Roach 'גצ' => ['0', '44', '44', '44', '45', '45', '45'], 4344096896cSGreg Roach 'גץ' => ['0', '44', '44', '44', '45', '45', '45'], 4354096896cSGreg Roach 'גק' => ['0', '45', '45', '45', '54', '54', '54'], 4364096896cSGreg Roach 'גש' => ['0', '44', '44', '44', '54', '54', '54'], 4374096896cSGreg Roach 'גת' => ['0', '43', '43', '43', '53', '53', '53'], 4384096896cSGreg Roach 'ג' => ['0', '4', '4', '4', '5', '5', '5'], 4394096896cSGreg Roach 'דז' => ['0', '4', '4', '4'], 4404096896cSGreg Roach 'דד' => ['0', '3', '3', '3', '33', '33', '33'], 4414096896cSGreg Roach 'דט' => ['0', '33', '33', '33'], 4424096896cSGreg Roach 'דש' => ['0', '4', '4', '4'], 4434096896cSGreg Roach 'דצ' => ['0', '4', '4', '4'], 4444096896cSGreg Roach 'דץ' => ['0', '4', '4', '4'], 4454096896cSGreg Roach 'ד' => ['0', '3', '3', '3'], 4464096896cSGreg Roach 'הג' => ['0', '54', '54', '54', '55', '55', '55'], 4474096896cSGreg Roach 'הכ' => ['0', '55', '55', '55'], 4484096896cSGreg Roach 'הח' => ['0', '55', '55', '55'], 4494096896cSGreg Roach 'הק' => ['0', '55', '55', '55', '5', '5', '5'], 4504096896cSGreg Roach 'הה' => ['0', '5', '5', '', '55', '55', ''], 4514096896cSGreg Roach 'ה' => ['0', '5', '5', ''], 4524096896cSGreg Roach 'וי' => ['1', '', '', '', '7', '7', '7'], 4534096896cSGreg Roach 'ו' => ['1', '7', '7', '7', '7', '', ''], 4544096896cSGreg Roach 'וו' => ['1', '7', '7', '7', '7', '', ''], 4554096896cSGreg Roach 'וופ' => ['1', '7', '7', '7', '77', '77', '77'], 4564096896cSGreg Roach 'זש' => ['0', '4', '4', '4', '44', '44', '44'], 4574096896cSGreg Roach 'זדז' => ['0', '2', '4', '4'], 4584096896cSGreg Roach 'ז' => ['0', '4', '4', '4'], 4594096896cSGreg Roach 'זג' => ['0', '44', '44', '44', '45', '45', '45'], 4604096896cSGreg Roach 'זז' => ['0', '4', '4', '4', '44', '44', '44'], 4614096896cSGreg Roach 'זס' => ['0', '44', '44', '44'], 4624096896cSGreg Roach 'זצ' => ['0', '44', '44', '44'], 4634096896cSGreg Roach 'זץ' => ['0', '44', '44', '44'], 4644096896cSGreg Roach 'חג' => ['0', '54', '54', '54', '53', '53', '53'], 4654096896cSGreg Roach 'חח' => ['0', '5', '5', '5', '55', '55', '55'], 4664096896cSGreg Roach 'חק' => ['0', '55', '55', '55', '5', '5', '5'], 4674096896cSGreg Roach 'חכ' => ['0', '45', '45', '45', '55', '55', '55'], 4684096896cSGreg Roach 'חס' => ['0', '5', '54', '54'], 4694096896cSGreg Roach 'חש' => ['0', '5', '54', '54'], 4704096896cSGreg Roach 'ח' => ['0', '5', '5', '5'], 4714096896cSGreg Roach 'טש' => ['0', '4', '4', '4'], 4724096896cSGreg Roach 'טד' => ['0', '33', '33', '33'], 4734096896cSGreg Roach 'טי' => ['0', '3', '3', '3', '4', '4', '4', '3', '3', '34'], 4744096896cSGreg Roach 'טת' => ['0', '33', '33', '33'], 4754096896cSGreg Roach 'טט' => ['0', '3', '3', '3', '33', '33', '33'], 4764096896cSGreg Roach 'ט' => ['0', '3', '3', '3'], 4774096896cSGreg Roach 'י' => ['1', '1', '', ''], 4784096896cSGreg Roach 'יא' => ['1', '1', '', '', '1', '1', '1'], 4794096896cSGreg Roach 'כג' => ['0', '55', '55', '55', '54', '54', '54'], 4804096896cSGreg Roach 'כש' => ['0', '5', '54', '54'], 4814096896cSGreg Roach 'כס' => ['0', '5', '54', '54'], 4824096896cSGreg Roach 'ככ' => ['0', '5', '5', '5', '55', '55', '55'], 4834096896cSGreg Roach 'כך' => ['0', '5', '5', '5', '55', '55', '55'], 4844096896cSGreg Roach 'כ' => ['0', '5', '5', '5'], 4854096896cSGreg Roach 'כח' => ['0', '55', '55', '55', '5', '5', '5'], 4864096896cSGreg Roach 'ך' => ['0', '', '5', '5'], 4874096896cSGreg Roach 'ל' => ['0', '8', '8', '8'], 4884096896cSGreg Roach 'לל' => ['0', '88', '88', '88', '8', '8', '8'], 4894096896cSGreg Roach 'מנ' => ['0', '66', '66', '66'], 4904096896cSGreg Roach 'מן' => ['0', '66', '66', '66'], 4914096896cSGreg Roach 'ממ' => ['0', '6', '6', '6', '66', '66', '66'], 4924096896cSGreg Roach 'מם' => ['0', '6', '6', '6', '66', '66', '66'], 4934096896cSGreg Roach 'מ' => ['0', '6', '6', '6'], 4944096896cSGreg Roach 'ם' => ['0', '', '6', '6'], 4954096896cSGreg Roach 'נמ' => ['0', '66', '66', '66'], 4964096896cSGreg Roach 'נם' => ['0', '66', '66', '66'], 4974096896cSGreg Roach 'ננ' => ['0', '6', '6', '6', '66', '66', '66'], 4984096896cSGreg Roach 'נן' => ['0', '6', '6', '6', '66', '66', '66'], 4994096896cSGreg Roach 'נ' => ['0', '6', '6', '6'], 5004096896cSGreg Roach 'ן' => ['0', '', '6', '6'], 5014096896cSGreg Roach 'סתש' => ['0', '2', '4', '4'], 5024096896cSGreg Roach 'סתז' => ['0', '2', '4', '4'], 5034096896cSGreg Roach 'סטז' => ['0', '2', '4', '4'], 5044096896cSGreg Roach 'סטש' => ['0', '2', '4', '4'], 5054096896cSGreg Roach 'סצד' => ['0', '2', '4', '4'], 5064096896cSGreg Roach 'סט' => ['0', '2', '4', '4', '43', '43', '43'], 5074096896cSGreg Roach 'סת' => ['0', '2', '4', '4', '43', '43', '43'], 5084096896cSGreg Roach 'סג' => ['0', '44', '44', '44', '4', '4', '4'], 5094096896cSGreg Roach 'סס' => ['0', '4', '4', '4', '44', '44', '44'], 5104096896cSGreg Roach 'סצ' => ['0', '44', '44', '44'], 5114096896cSGreg Roach 'סץ' => ['0', '44', '44', '44'], 5124096896cSGreg Roach 'סז' => ['0', '44', '44', '44'], 5134096896cSGreg Roach 'סש' => ['0', '44', '44', '44'], 5144096896cSGreg Roach 'ס' => ['0', '4', '4', '4'], 5154096896cSGreg Roach 'ע' => ['1', '0', '', ''], 5164096896cSGreg Roach 'פב' => ['0', '7', '7', '7', '77', '77', '77'], 5174096896cSGreg Roach 'פוו' => ['0', '7', '7', '7', '77', '77', '77'], 5184096896cSGreg Roach 'פפ' => ['0', '7', '7', '7', '77', '77', '77'], 5194096896cSGreg Roach 'פף' => ['0', '7', '7', '7', '77', '77', '77'], 5204096896cSGreg Roach 'פ' => ['0', '7', '7', '7'], 5214096896cSGreg Roach 'ף' => ['0', '', '7', '7'], 5224096896cSGreg Roach 'צג' => ['0', '44', '44', '44', '45', '45', '45'], 5234096896cSGreg Roach 'צז' => ['0', '44', '44', '44'], 5244096896cSGreg Roach 'צס' => ['0', '44', '44', '44'], 5254096896cSGreg Roach 'צצ' => ['0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54', '45', '45', '45'], 5264096896cSGreg Roach 'צץ' => ['0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54'], 5274096896cSGreg Roach 'צש' => ['0', '44', '44', '44', '4', '4', '4', '5', '5', '5'], 5284096896cSGreg Roach 'צ' => ['0', '4', '4', '4', '5', '5', '5'], 5294096896cSGreg Roach 'ץ' => ['0', '', '4', '4'], 5304096896cSGreg Roach 'קה' => ['0', '55', '55', '5'], 5314096896cSGreg Roach 'קס' => ['0', '5', '54', '54'], 5324096896cSGreg Roach 'קש' => ['0', '5', '54', '54'], 5334096896cSGreg Roach 'קק' => ['0', '5', '5', '5', '55', '55', '55'], 5344096896cSGreg Roach 'קח' => ['0', '55', '55', '55'], 5354096896cSGreg Roach 'קכ' => ['0', '55', '55', '55'], 5364096896cSGreg Roach 'קך' => ['0', '55', '55', '55'], 5374096896cSGreg Roach 'קג' => ['0', '55', '55', '55', '54', '54', '54'], 5384096896cSGreg Roach 'ק' => ['0', '5', '5', '5'], 5394096896cSGreg Roach 'רר' => ['0', '99', '99', '99', '9', '9', '9'], 5404096896cSGreg Roach 'ר' => ['0', '9', '9', '9'], 5414096896cSGreg Roach 'שטז' => ['0', '2', '4', '4'], 5424096896cSGreg Roach 'שתש' => ['0', '2', '4', '4'], 5434096896cSGreg Roach 'שתז' => ['0', '2', '4', '4'], 5444096896cSGreg Roach 'שטש' => ['0', '2', '4', '4'], 5454096896cSGreg Roach 'שד' => ['0', '2', '43', '43'], 5464096896cSGreg Roach 'שז' => ['0', '44', '44', '44'], 5474096896cSGreg Roach 'שס' => ['0', '44', '44', '44'], 5484096896cSGreg Roach 'שת' => ['0', '2', '43', '43'], 5494096896cSGreg Roach 'שג' => ['0', '4', '4', '4', '44', '44', '44', '4', '43', '43'], 5504096896cSGreg Roach 'שט' => ['0', '2', '43', '43', '44', '44', '44'], 5514096896cSGreg Roach 'שצ' => ['0', '44', '44', '44', '45', '45', '45'], 5524096896cSGreg Roach 'שץ' => ['0', '44', '', '44', '45', '', '45'], 5534096896cSGreg Roach 'שש' => ['0', '4', '4', '4', '44', '44', '44'], 5544096896cSGreg Roach 'ש' => ['0', '4', '4', '4'], 5554096896cSGreg Roach 'תג' => ['0', '34', '34', '34'], 5564096896cSGreg Roach 'תז' => ['0', '34', '34', '34'], 5574096896cSGreg Roach 'תש' => ['0', '4', '4', '4'], 5584096896cSGreg Roach 'תת' => ['0', '3', '3', '3', '4', '4', '4', '33', '33', '33', '44', '44', '44', '34', '34', '34', '43', '43', '43'], 5594096896cSGreg Roach 'ת' => ['0', '3', '3', '3', '4', '4', '4'], 560a25f0a04SGreg Roach // Arabic alphabet 5614096896cSGreg Roach 'ا' => ['1', '0', '', ''], 5624096896cSGreg Roach 'ب' => ['0', '7', '7', '7'], 5634096896cSGreg Roach 'ت' => ['0', '3', '3', '3'], 5644096896cSGreg Roach 'ث' => ['0', '3', '3', '3'], 5654096896cSGreg Roach 'ج' => ['0', '4', '4', '4'], 5664096896cSGreg Roach 'ح' => ['0', '5', '5', '5'], 5674096896cSGreg Roach 'خ' => ['0', '5', '5', '5'], 5684096896cSGreg Roach 'د' => ['0', '3', '3', '3'], 5694096896cSGreg Roach 'ذ' => ['0', '3', '3', '3'], 5704096896cSGreg Roach 'ر' => ['0', '9', '9', '9'], 5714096896cSGreg Roach 'ز' => ['0', '4', '4', '4'], 5724096896cSGreg Roach 'س' => ['0', '4', '4', '4'], 5734096896cSGreg Roach 'ش' => ['0', '4', '4', '4'], 5744096896cSGreg Roach 'ص' => ['0', '4', '4', '4'], 5754096896cSGreg Roach 'ض' => ['0', '3', '3', '3'], 5764096896cSGreg Roach 'ط' => ['0', '3', '3', '3'], 5774096896cSGreg Roach 'ظ' => ['0', '4', '4', '4'], 5784096896cSGreg Roach 'ع' => ['1', '0', '', ''], 5794096896cSGreg Roach 'غ' => ['0', '0', '', ''], 5804096896cSGreg Roach 'ف' => ['0', '7', '7', '7'], 5814096896cSGreg Roach 'ق' => ['0', '5', '5', '5'], 5824096896cSGreg Roach 'ك' => ['0', '5', '5', '5'], 5834096896cSGreg Roach 'ل' => ['0', '8', '8', '8'], 5844096896cSGreg Roach 'لا' => ['0', '8', '8', '8'], 5854096896cSGreg Roach 'م' => ['0', '6', '6', '6'], 5864096896cSGreg Roach 'ن' => ['0', '6', '6', '6'], 5874096896cSGreg Roach 'هن' => ['0', '66', '66', '66'], 5884096896cSGreg Roach 'ه' => ['0', '5', '5', ''], 5894096896cSGreg Roach 'و' => ['1', '', '', '', '7', '', ''], 5904096896cSGreg Roach 'ي' => ['0', '1', '', ''], 5914096896cSGreg Roach 'آ' => ['0', '1', '', ''], 5924096896cSGreg Roach 'ة' => ['0', '', '', '3'], 5934096896cSGreg Roach 'ی' => ['0', '1', '', ''], 5944096896cSGreg Roach 'ى' => ['1', '1', '', ''], 59513abd6f3SGreg Roach ]; 596a25f0a04SGreg Roach 597a25f0a04SGreg Roach /** 59816cfb0b9SGreg Roach * Which algorithms are supported. 59916cfb0b9SGreg Roach * 60024f2a3afSGreg Roach * @return array<string> 60116cfb0b9SGreg Roach */ 60216cfb0b9SGreg Roach public static function getAlgorithms(): array 60316cfb0b9SGreg Roach { 60416cfb0b9SGreg Roach return [ 605ad3143ccSGreg Roach /* I18N: https://en.wikipedia.org/wiki/Soundex */ 60616cfb0b9SGreg Roach 'std' => I18N::translate('Russell'), 607ad3143ccSGreg Roach /* I18N: https://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */ 60816cfb0b9SGreg Roach 'dm' => I18N::translate('Daitch-Mokotoff'), 60916cfb0b9SGreg Roach ]; 61016cfb0b9SGreg Roach } 61116cfb0b9SGreg Roach 61216cfb0b9SGreg Roach /** 61316cfb0b9SGreg Roach * Is there a match between two soundex codes? 61416cfb0b9SGreg Roach * 61516cfb0b9SGreg Roach * @param string $soundex1 61616cfb0b9SGreg Roach * @param string $soundex2 61716cfb0b9SGreg Roach * 61816cfb0b9SGreg Roach * @return bool 61916cfb0b9SGreg Roach */ 62024f2a3afSGreg Roach public static function compare(string $soundex1, string $soundex2): bool 62116cfb0b9SGreg Roach { 62216cfb0b9SGreg Roach if ($soundex1 !== '' && $soundex2 !== '') { 62354c1ab5eSGreg Roach return array_intersect(explode(':', $soundex1), explode(':', $soundex2)) !== []; 62416cfb0b9SGreg Roach } 62516cfb0b9SGreg Roach 62616cfb0b9SGreg Roach return false; 62716cfb0b9SGreg Roach } 62816cfb0b9SGreg Roach 62916cfb0b9SGreg Roach /** 63016cfb0b9SGreg Roach * Generate Russell soundex codes for a given text. 63116cfb0b9SGreg Roach * 63216cfb0b9SGreg Roach * @param string $text 63316cfb0b9SGreg Roach * 63416cfb0b9SGreg Roach * @return string 63516cfb0b9SGreg Roach */ 63616cfb0b9SGreg Roach public static function russell(string $text): string 63716cfb0b9SGreg Roach { 63816cfb0b9SGreg Roach $words = explode(' ', $text); 63916cfb0b9SGreg Roach $soundex_array = []; 64016cfb0b9SGreg Roach 64116cfb0b9SGreg Roach foreach ($words as $word) { 64216cfb0b9SGreg Roach $soundex = soundex($word); 64316cfb0b9SGreg Roach 64416cfb0b9SGreg Roach // Only return codes from recognisable sounds 64516cfb0b9SGreg Roach if ($soundex !== '0000') { 64616cfb0b9SGreg Roach $soundex_array[] = $soundex; 64716cfb0b9SGreg Roach } 64816cfb0b9SGreg Roach } 64916cfb0b9SGreg Roach 65016cfb0b9SGreg Roach // Combine words, e.g. “New York” as “Newyork” 65116cfb0b9SGreg Roach if (count($words) > 1) { 652e364afe4SGreg Roach $soundex_array[] = soundex(str_replace(' ', '', $text)); 65316cfb0b9SGreg Roach } 65416cfb0b9SGreg Roach 65516cfb0b9SGreg Roach // A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters) 65616cfb0b9SGreg Roach $soundex_array = array_slice(array_unique($soundex_array), 0, 51); 65716cfb0b9SGreg Roach 65816cfb0b9SGreg Roach return implode(':', $soundex_array); 65916cfb0b9SGreg Roach } 66016cfb0b9SGreg Roach 66116cfb0b9SGreg Roach /** 66216cfb0b9SGreg Roach * Generate Daitch–Mokotoff soundex codes for a given text. 66316cfb0b9SGreg Roach * 66416cfb0b9SGreg Roach * @param string $text 66516cfb0b9SGreg Roach * 66616cfb0b9SGreg Roach * @return string 66716cfb0b9SGreg Roach */ 66816cfb0b9SGreg Roach public static function daitchMokotoff(string $text): string 66916cfb0b9SGreg Roach { 67016cfb0b9SGreg Roach $words = explode(' ', $text); 67116cfb0b9SGreg Roach $soundex_array = []; 67216cfb0b9SGreg Roach 67316cfb0b9SGreg Roach foreach ($words as $word) { 67416cfb0b9SGreg Roach $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word)); 67516cfb0b9SGreg Roach } 67616cfb0b9SGreg Roach // Combine words, e.g. “New York” as “Newyork” 67716cfb0b9SGreg Roach if (count($words) > 1) { 678e364afe4SGreg Roach $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(str_replace(' ', '', $text))); 67916cfb0b9SGreg Roach } 68016cfb0b9SGreg Roach 68116cfb0b9SGreg Roach // A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters) 68216cfb0b9SGreg Roach $soundex_array = array_slice(array_unique($soundex_array), 0, 36); 68316cfb0b9SGreg Roach 68416cfb0b9SGreg Roach return implode(':', $soundex_array); 68516cfb0b9SGreg Roach } 68616cfb0b9SGreg Roach 68716cfb0b9SGreg Roach /** 68876692c8bSGreg Roach * Calculate the Daitch-Mokotoff soundex for a word. 68976692c8bSGreg Roach * 690a25f0a04SGreg Roach * @param string $name 691a25f0a04SGreg Roach * 69224f2a3afSGreg Roach * @return array<string> List of possible DM codes for the word. 693a25f0a04SGreg Roach */ 69424f2a3afSGreg Roach private static function daitchMokotoffWord(string $name): array 695c1010edaSGreg Roach { 696a25f0a04SGreg Roach // Apply special transformation rules to the input string 697a25f0a04SGreg Roach $name = I18N::strtoupper($name); 69816cfb0b9SGreg Roach foreach (self::TRANSFORM_NAMES as $transformRule) { 699a25f0a04SGreg Roach $name = str_replace($transformRule[0], $transformRule[1], $name); 700a25f0a04SGreg Roach } 701a25f0a04SGreg Roach 702a25f0a04SGreg Roach // Initialize 703a25f0a04SGreg Roach $name_script = I18N::textScript($name); 704dd71ff6bSGreg Roach $noVowels = $name_script === 'Hebr' || $name_script === 'Arab'; 705a25f0a04SGreg Roach 706a25f0a04SGreg Roach $lastPos = strlen($name) - 1; 707a25f0a04SGreg Roach $currPos = 0; 708a25f0a04SGreg Roach $state = 1; // 1: start of input string, 2: before vowel, 3: other 70913abd6f3SGreg Roach $result = []; // accumulate complete 6-digit D-M codes here 71013abd6f3SGreg Roach $partialResult = []; // accumulate incomplete D-M codes here 71113abd6f3SGreg Roach $partialResult[] = ['!']; // initialize 1st partial result ('!' stops "duplicate sound" check) 712a25f0a04SGreg Roach 713a25f0a04SGreg Roach // Loop through the input string. 714a25f0a04SGreg Roach // Stop when the string is exhausted or when no more partial results remain 715*ef475b14SGreg Roach while ($partialResult !== [] && $currPos <= $lastPos) { 716a25f0a04SGreg Roach // Find the DM coding table entry for the chunk at the current position 717a25f0a04SGreg Roach $thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 718e364afe4SGreg Roach while ($thisEntry !== '') { 71916cfb0b9SGreg Roach if (isset(self::DM_SOUNDS[$thisEntry])) { 720a25f0a04SGreg Roach break; 721a25f0a04SGreg Roach } 722a25f0a04SGreg Roach $thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk 723a25f0a04SGreg Roach } 724a25f0a04SGreg Roach if ($thisEntry === '') { 725a25f0a04SGreg Roach $currPos++; // Not in table: advance pointer to next byte 726a25f0a04SGreg Roach continue; // and try again 727a25f0a04SGreg Roach } 728a25f0a04SGreg Roach 72916cfb0b9SGreg Roach $soundTableEntry = self::DM_SOUNDS[$thisEntry]; 730a25f0a04SGreg Roach $workingResult = $partialResult; 73113abd6f3SGreg Roach $partialResult = []; 732a25f0a04SGreg Roach $currPos += strlen($thisEntry); 733a25f0a04SGreg Roach 734a25f0a04SGreg Roach // Not at beginning of input string 735e364afe4SGreg Roach if ($state !== 1) { 736a25f0a04SGreg Roach if ($currPos <= $lastPos) { 737a25f0a04SGreg Roach // Determine whether the next chunk is a vowel 738a25f0a04SGreg Roach $nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 739e364afe4SGreg Roach while ($nextEntry !== '') { 74016cfb0b9SGreg Roach if (isset(self::DM_SOUNDS[$nextEntry])) { 741a25f0a04SGreg Roach break; 742a25f0a04SGreg Roach } 743a25f0a04SGreg Roach $nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk 744a25f0a04SGreg Roach } 745a25f0a04SGreg Roach } else { 746a25f0a04SGreg Roach $nextEntry = ''; 747a25f0a04SGreg Roach } 748e364afe4SGreg Roach if ($nextEntry !== '' && self::DM_SOUNDS[$nextEntry][0] !== '0') { 749a25f0a04SGreg Roach $state = 2; 750a25f0a04SGreg Roach } else { 751a25f0a04SGreg Roach // Next chunk is a vowel 752a25f0a04SGreg Roach $state = 3; 753a25f0a04SGreg Roach } 754a25f0a04SGreg Roach } 755a25f0a04SGreg Roach 756a25f0a04SGreg Roach while ($state < count($soundTableEntry)) { 757a25f0a04SGreg Roach // empty means 'ignore this sound in this state' 758e364afe4SGreg Roach if ($soundTableEntry[$state] === '') { 759a25f0a04SGreg Roach foreach ($workingResult as $workingEntry) { 760a25f0a04SGreg Roach $tempEntry = $workingEntry; 761a25f0a04SGreg Roach $tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles' 762a25f0a04SGreg Roach $partialResult[] = $tempEntry; 763a25f0a04SGreg Roach } 764a25f0a04SGreg Roach } else { 765a25f0a04SGreg Roach foreach ($workingResult as $workingEntry) { 766a25f0a04SGreg Roach if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) { 767a25f0a04SGreg Roach // Incoming sound isn't a duplicate of the previous sound 768a25f0a04SGreg Roach $workingEntry[] = $soundTableEntry[$state]; 769e364afe4SGreg Roach } elseif ($noVowels) { 770a25f0a04SGreg Roach // Incoming sound is a duplicate of the previous sound 771a25f0a04SGreg Roach // For Hebrew and Arabic, we need to create a pair of D-M sound codes, 772a25f0a04SGreg Roach // one of the pair with only a single occurrence of the duplicate sound, 773a25f0a04SGreg Roach // the other with both occurrences 774a25f0a04SGreg Roach $workingEntry[] = $soundTableEntry[$state]; 775a25f0a04SGreg Roach } 776e364afe4SGreg Roach 777a25f0a04SGreg Roach if (count($workingEntry) < 7) { 778a25f0a04SGreg Roach $partialResult[] = $workingEntry; 779a25f0a04SGreg Roach } else { 780a25f0a04SGreg Roach // This is the 6th code in the sequence 781a25f0a04SGreg Roach // We're looking for 7 entries because the first is '!' and doesn't count 782a25f0a04SGreg Roach $tempResult = str_replace('!', '', implode('', $workingEntry)); 783a25f0a04SGreg Roach // Only return codes from recognisable sounds 784*ef475b14SGreg Roach if ($tempResult !== '') { 785a25f0a04SGreg Roach $result[] = substr($tempResult . '000000', 0, 6); 786a25f0a04SGreg Roach } 787a25f0a04SGreg Roach } 788a25f0a04SGreg Roach } 789a25f0a04SGreg Roach } 790e364afe4SGreg Roach $state += 3; // Advance to next triplet while keeping the same basic state 791a25f0a04SGreg Roach } 792a25f0a04SGreg Roach } 793a25f0a04SGreg Roach 794a25f0a04SGreg Roach // Zero-fill and copy all remaining partial results 795a25f0a04SGreg Roach foreach ($partialResult as $workingEntry) { 796a25f0a04SGreg Roach $tempResult = str_replace('!', '', implode('', $workingEntry)); 797a25f0a04SGreg Roach // Only return codes from recognisable sounds 798*ef475b14SGreg Roach if ($tempResult !== '') { 799a25f0a04SGreg Roach $result[] = substr($tempResult . '000000', 0, 6); 800a25f0a04SGreg Roach } 801a25f0a04SGreg Roach } 802a25f0a04SGreg Roach 803a25f0a04SGreg Roach return $result; 804a25f0a04SGreg Roach } 805a25f0a04SGreg Roach} 806