xref: /webtrees/app/Soundex.php (revision 5bfc689774bb9a6401271c4ed15a6d50652c991b)
1a25f0a04SGreg Roach<?php
23976b470SGreg Roach
3a25f0a04SGreg Roach/**
4a25f0a04SGreg Roach * webtrees: online genealogy
5*5bfc6897SGreg Roach * Copyright (C) 2022 webtrees development team
6a25f0a04SGreg Roach * This program is free software: you can redistribute it and/or modify
7a25f0a04SGreg Roach * it under the terms of the GNU General Public License as published by
8a25f0a04SGreg Roach * the Free Software Foundation, either version 3 of the License, or
9a25f0a04SGreg Roach * (at your option) any later version.
10a25f0a04SGreg Roach * This program is distributed in the hope that it will be useful,
11a25f0a04SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
12a25f0a04SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13a25f0a04SGreg Roach * GNU General Public License for more details.
14a25f0a04SGreg Roach * You should have received a copy of the GNU General Public License
1589f7189bSGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>.
16a25f0a04SGreg Roach */
17fcfa147eSGreg Roach
18e7f56f2aSGreg Roachdeclare(strict_types=1);
19e7f56f2aSGreg Roach
2076692c8bSGreg Roachnamespace Fisharebest\Webtrees;
21a25f0a04SGreg Roach
22a25f0a04SGreg Roach/**
2376692c8bSGreg Roach * Phonetic matching of strings.
24a25f0a04SGreg Roach */
25c1010edaSGreg Roachclass Soundex
26c1010edaSGreg Roach{
27a25f0a04SGreg Roach    // Determine the Daitch–Mokotoff Soundex code for a word
28a25f0a04SGreg Roach    // Original implementation by Gerry Kroll, and analysis by Meliza Amity
29a25f0a04SGreg Roach
30a25f0a04SGreg Roach    // Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!)
3116cfb0b9SGreg Roach    private const MAXCHAR = 7;
32a25f0a04SGreg Roach
33a25f0a04SGreg Roach    /**
34a25f0a04SGreg Roach     * Name transformation arrays.
35a25f0a04SGreg Roach     * Used to transform the Name string to simplify the "sounds like" table.
36a25f0a04SGreg Roach     * This is especially useful in Hebrew.
37a25f0a04SGreg Roach     *
38a25f0a04SGreg Roach     * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text)
39a25f0a04SGreg Roach     * function call to achieve the desired transformations.
40a25f0a04SGreg Roach     *
41a25f0a04SGreg Roach     * Note about the use of "\x01":
42a25f0a04SGreg Roach     * This code, which can’t legitimately occur in the kind of text we're dealing with,
43a25f0a04SGreg Roach     * is used as a place-holder so that conditional string replacements can be done.
44a25f0a04SGreg Roach     */
4516cfb0b9SGreg Roach    private const TRANSFORM_NAMES = [
46a25f0a04SGreg Roach        // Force Yiddish ligatures to be treated as separate letters
474096896cSGreg Roach        ['װ', 'וו'],
484096896cSGreg Roach        ['ײ', 'יי'],
494096896cSGreg Roach        ['ױ', 'וי'],
504096896cSGreg Roach        ['בו', 'בע'],
514096896cSGreg Roach        ['פו', 'פע'],
524096896cSGreg Roach        ['ומ', 'עמ'],
534096896cSGreg Roach        ['ום', 'עם'],
544096896cSGreg Roach        ['ונ', 'ענ'],
554096896cSGreg Roach        ['ון', 'ען'],
564096896cSGreg Roach        ['וו', 'ב'],
574096896cSGreg Roach        ["\x01", ''],
584096896cSGreg Roach        ['ייה$', "\x01ה"],
594096896cSGreg Roach        ['ייע$', "\x01ע"],
604096896cSGreg Roach        ['יי', 'ע'],
614096896cSGreg Roach        ["\x01", 'יי'],
6213abd6f3SGreg Roach    ];
63a25f0a04SGreg Roach
64a25f0a04SGreg Roach    /**
65a25f0a04SGreg Roach     * The DM sound coding table is organized this way:
66a25f0a04SGreg Roach     * key: a variable-length string that corresponds to the UTF-8 character sequence
67a25f0a04SGreg Roach     * represented by the table entry. Currently, that string can be up to 7
68a25f0a04SGreg Roach     * bytes long. This maximum length is defined by the value of global variable
69a25f0a04SGreg Roach     * $maxchar.
70a25f0a04SGreg Roach     *
71a25f0a04SGreg Roach     * value: an array as follows:
72a25f0a04SGreg Roach     * [0]:  zero if not a vowel
73a25f0a04SGreg Roach     * [1]:  sound value when this string is at the beginning of the word
74a25f0a04SGreg Roach     * [2]:  sound value when this string is followed by a vowel
75a25f0a04SGreg Roach     * [3]:  sound value for other cases
76a25f0a04SGreg Roach     * [1],[2],[3] can be repeated several times to create branches in the code
77a25f0a04SGreg Roach     * an empty sound value means "ignore in this state"
78a25f0a04SGreg Roach     */
7916cfb0b9SGreg Roach    private const DM_SOUNDS = [
804096896cSGreg Roach        'A'       => ['1', '0', '', ''],
814096896cSGreg Roach        'À'       => ['1', '0', '', ''],
824096896cSGreg Roach        'Á'       => ['1', '0', '', ''],
834096896cSGreg Roach        'Â'       => ['1', '0', '', ''],
844096896cSGreg Roach        'Ã'       => ['1', '0', '', ''],
854096896cSGreg Roach        'Ä'       => ['1', '0', '1', '', '0', '', ''],
864096896cSGreg Roach        'Å'       => ['1', '0', '', ''],
874096896cSGreg Roach        'Ă'       => ['1', '0', '', ''],
884096896cSGreg Roach        'Ą'       => ['1', '', '', '', '', '', '6'],
894096896cSGreg Roach        'Ạ'       => ['1', '0', '', ''],
904096896cSGreg Roach        'Ả'       => ['1', '0', '', ''],
914096896cSGreg Roach        'Ấ'       => ['1', '0', '', ''],
924096896cSGreg Roach        'Ầ'       => ['1', '0', '', ''],
934096896cSGreg Roach        'Ẩ'       => ['1', '0', '', ''],
944096896cSGreg Roach        'Ẫ'       => ['1', '0', '', ''],
954096896cSGreg Roach        'Ậ'       => ['1', '0', '', ''],
964096896cSGreg Roach        'Ắ'       => ['1', '0', '', ''],
974096896cSGreg Roach        'Ằ'       => ['1', '0', '', ''],
984096896cSGreg Roach        'Ẳ'       => ['1', '0', '', ''],
994096896cSGreg Roach        'Ẵ'       => ['1', '0', '', ''],
1004096896cSGreg Roach        'Ặ'       => ['1', '0', '', ''],
1014096896cSGreg Roach        'AE'      => ['1', '0', '1', ''],
1024096896cSGreg Roach        'Æ'       => ['1', '0', '1', ''],
1034096896cSGreg Roach        'AI'      => ['1', '0', '1', ''],
1044096896cSGreg Roach        'AJ'      => ['1', '0', '1', ''],
1054096896cSGreg Roach        'AU'      => ['1', '0', '7', ''],
1064096896cSGreg Roach        'AV'      => ['1', '0', '7', '', '7', '7', '7'],
1074096896cSGreg Roach        'ÄU'      => ['1', '0', '1', ''],
1084096896cSGreg Roach        'AY'      => ['1', '0', '1', ''],
1094096896cSGreg Roach        'B'       => ['0', '7', '7', '7'],
1104096896cSGreg Roach        'C'       => ['0', '5', '5', '5', '34', '4', '4'],
1114096896cSGreg Roach        'Ć'       => ['0', '4', '4', '4'],
1124096896cSGreg Roach        'Č'       => ['0', '4', '4', '4'],
1134096896cSGreg Roach        'Ç'       => ['0', '4', '4', '4'],
1144096896cSGreg Roach        'CH'      => ['0', '5', '5', '5', '34', '4', '4'],
1154096896cSGreg Roach        'CHS'     => ['0', '5', '54', '54'],
1164096896cSGreg Roach        'CK'      => ['0', '5', '5', '5', '45', '45', '45'],
1174096896cSGreg Roach        'CCS'     => ['0', '4', '4', '4'],
1184096896cSGreg Roach        'CS'      => ['0', '4', '4', '4'],
1194096896cSGreg Roach        'CSZ'     => ['0', '4', '4', '4'],
1204096896cSGreg Roach        'CZ'      => ['0', '4', '4', '4'],
1214096896cSGreg Roach        'CZS'     => ['0', '4', '4', '4'],
1224096896cSGreg Roach        'D'       => ['0', '3', '3', '3'],
1234096896cSGreg Roach        'Ď'       => ['0', '3', '3', '3'],
1244096896cSGreg Roach        'Đ'       => ['0', '3', '3', '3'],
1254096896cSGreg Roach        'DRS'     => ['0', '4', '4', '4'],
1264096896cSGreg Roach        'DRZ'     => ['0', '4', '4', '4'],
1274096896cSGreg Roach        'DS'      => ['0', '4', '4', '4'],
1284096896cSGreg Roach        'DSH'     => ['0', '4', '4', '4'],
1294096896cSGreg Roach        'DSZ'     => ['0', '4', '4', '4'],
1304096896cSGreg Roach        'DT'      => ['0', '3', '3', '3'],
1314096896cSGreg Roach        'DDZ'     => ['0', '4', '4', '4'],
1324096896cSGreg Roach        'DDZS'    => ['0', '4', '4', '4'],
1334096896cSGreg Roach        'DZ'      => ['0', '4', '4', '4'],
1344096896cSGreg Roach        'DŹ'      => ['0', '4', '4', '4'],
1354096896cSGreg Roach        'DŻ'      => ['0', '4', '4', '4'],
1364096896cSGreg Roach        'DZH'     => ['0', '4', '4', '4'],
1374096896cSGreg Roach        'DZS'     => ['0', '4', '4', '4'],
1384096896cSGreg Roach        'E'       => ['1', '0', '', ''],
1394096896cSGreg Roach        'È'       => ['1', '0', '', ''],
1404096896cSGreg Roach        'É'       => ['1', '0', '', ''],
1414096896cSGreg Roach        'Ê'       => ['1', '0', '', ''],
1424096896cSGreg Roach        'Ë'       => ['1', '0', '', ''],
1434096896cSGreg Roach        'Ĕ'       => ['1', '0', '', ''],
1444096896cSGreg Roach        'Ė'       => ['1', '0', '', ''],
1454096896cSGreg Roach        'Ę'       => ['1', '', '', '6', '', '', ''],
1464096896cSGreg Roach        'Ẹ'       => ['1', '0', '', ''],
1474096896cSGreg Roach        'Ẻ'       => ['1', '0', '', ''],
1484096896cSGreg Roach        'Ẽ'       => ['1', '0', '', ''],
1494096896cSGreg Roach        'Ế'       => ['1', '0', '', ''],
1504096896cSGreg Roach        'Ề'       => ['1', '0', '', ''],
1514096896cSGreg Roach        'Ể'       => ['1', '0', '', ''],
1524096896cSGreg Roach        'Ễ'       => ['1', '0', '', ''],
1534096896cSGreg Roach        'Ệ'       => ['1', '0', '', ''],
1544096896cSGreg Roach        'EAU'     => ['1', '0', '', ''],
1554096896cSGreg Roach        'EI'      => ['1', '0', '1', ''],
1564096896cSGreg Roach        'EJ'      => ['1', '0', '1', ''],
1574096896cSGreg Roach        'EU'      => ['1', '1', '1', ''],
1584096896cSGreg Roach        'EY'      => ['1', '0', '1', ''],
1594096896cSGreg Roach        'F'       => ['0', '7', '7', '7'],
1604096896cSGreg Roach        'FB'      => ['0', '7', '7', '7'],
1614096896cSGreg Roach        'G'       => ['0', '5', '5', '5', '34', '4', '4'],
1624096896cSGreg Roach        'Ğ'       => ['0', '', '', ''],
1634096896cSGreg Roach        'GGY'     => ['0', '5', '5', '5'],
1644096896cSGreg Roach        'GY'      => ['0', '5', '5', '5'],
1654096896cSGreg Roach        'H'       => ['0', '5', '5', '', '5', '5', '5'],
1664096896cSGreg Roach        'I'       => ['1', '0', '', ''],
1674096896cSGreg Roach        'Ì'       => ['1', '0', '', ''],
1684096896cSGreg Roach        'Í'       => ['1', '0', '', ''],
1694096896cSGreg Roach        'Î'       => ['1', '0', '', ''],
1704096896cSGreg Roach        'Ï'       => ['1', '0', '', ''],
1714096896cSGreg Roach        'Ĩ'       => ['1', '0', '', ''],
1724096896cSGreg Roach        'Į'       => ['1', '0', '', ''],
1734096896cSGreg Roach        'İ'       => ['1', '0', '', ''],
1744096896cSGreg Roach        'Ỉ'       => ['1', '0', '', ''],
1754096896cSGreg Roach        'Ị'       => ['1', '0', '', ''],
1764096896cSGreg Roach        'IA'      => ['1', '1', '', ''],
1774096896cSGreg Roach        'IE'      => ['1', '1', '', ''],
1784096896cSGreg Roach        'IO'      => ['1', '1', '', ''],
1794096896cSGreg Roach        'IU'      => ['1', '1', '', ''],
1804096896cSGreg Roach        'J'       => ['0', '1', '', '', '4', '4', '4', '5', '5', ''],
1814096896cSGreg Roach        'K'       => ['0', '5', '5', '5'],
1824096896cSGreg Roach        'KH'      => ['0', '5', '5', '5'],
1834096896cSGreg Roach        'KS'      => ['0', '5', '54', '54'],
1844096896cSGreg Roach        'L'       => ['0', '8', '8', '8'],
1854096896cSGreg Roach        'Ľ'       => ['0', '8', '8', '8'],
1864096896cSGreg Roach        'Ĺ'       => ['0', '8', '8', '8'],
1874096896cSGreg Roach        'Ł'       => ['0', '7', '7', '7', '8', '8', '8'],
1884096896cSGreg Roach        'LL'      => ['0', '8', '8', '8', '58', '8', '8', '1', '8', '8'],
1894096896cSGreg Roach        'LLY'     => ['0', '8', '8', '8', '1', '8', '8'],
1904096896cSGreg Roach        'LY'      => ['0', '8', '8', '8', '1', '8', '8'],
1914096896cSGreg Roach        'M'       => ['0', '6', '6', '6'],
1924096896cSGreg Roach        'MĔ'      => ['0', '66', '66', '66'],
1934096896cSGreg Roach        'MN'      => ['0', '66', '66', '66'],
1944096896cSGreg Roach        'N'       => ['0', '6', '6', '6'],
1954096896cSGreg Roach        'Ń'       => ['0', '6', '6', '6'],
1964096896cSGreg Roach        'Ň'       => ['0', '6', '6', '6'],
1974096896cSGreg Roach        'Ñ'       => ['0', '6', '6', '6'],
1984096896cSGreg Roach        'NM'      => ['0', '66', '66', '66'],
1994096896cSGreg Roach        'O'       => ['1', '0', '', ''],
2004096896cSGreg Roach        'Ò'       => ['1', '0', '', ''],
2014096896cSGreg Roach        'Ó'       => ['1', '0', '', ''],
2024096896cSGreg Roach        'Ô'       => ['1', '0', '', ''],
2034096896cSGreg Roach        'Õ'       => ['1', '0', '', ''],
2044096896cSGreg Roach        'Ö'       => ['1', '0', '', ''],
2054096896cSGreg Roach        'Ø'       => ['1', '0', '', ''],
2064096896cSGreg Roach        'Ő'       => ['1', '0', '', ''],
2074096896cSGreg Roach        'Œ'       => ['1', '0', '', ''],
2084096896cSGreg Roach        'Ơ'       => ['1', '0', '', ''],
2094096896cSGreg Roach        'Ọ'       => ['1', '0', '', ''],
2104096896cSGreg Roach        'Ỏ'       => ['1', '0', '', ''],
2114096896cSGreg Roach        'Ố'       => ['1', '0', '', ''],
2124096896cSGreg Roach        'Ồ'       => ['1', '0', '', ''],
2134096896cSGreg Roach        'Ổ'       => ['1', '0', '', ''],
2144096896cSGreg Roach        'Ỗ'       => ['1', '0', '', ''],
2154096896cSGreg Roach        'Ộ'       => ['1', '0', '', ''],
2164096896cSGreg Roach        'Ớ'       => ['1', '0', '', ''],
2174096896cSGreg Roach        'Ờ'       => ['1', '0', '', ''],
2184096896cSGreg Roach        'Ở'       => ['1', '0', '', ''],
2194096896cSGreg Roach        'Ỡ'       => ['1', '0', '', ''],
2204096896cSGreg Roach        'Ợ'       => ['1', '0', '', ''],
2214096896cSGreg Roach        'OE'      => ['1', '0', '', ''],
2224096896cSGreg Roach        'OI'      => ['1', '0', '1', ''],
2234096896cSGreg Roach        'OJ'      => ['1', '0', '1', ''],
2244096896cSGreg Roach        'OU'      => ['1', '0', '', ''],
2254096896cSGreg Roach        'OY'      => ['1', '0', '1', ''],
2264096896cSGreg Roach        'P'       => ['0', '7', '7', '7'],
2274096896cSGreg Roach        'PF'      => ['0', '7', '7', '7'],
2284096896cSGreg Roach        'PH'      => ['0', '7', '7', '7'],
2294096896cSGreg Roach        'Q'       => ['0', '5', '5', '5'],
2304096896cSGreg Roach        'R'       => ['0', '9', '9', '9'],
2314096896cSGreg Roach        'Ř'       => ['0', '4', '4', '4'],
2324096896cSGreg Roach        'RS'      => ['0', '4', '4', '4', '94', '94', '94'],
2334096896cSGreg Roach        'RZ'      => ['0', '4', '4', '4', '94', '94', '94'],
2344096896cSGreg Roach        'S'       => ['0', '4', '4', '4'],
2354096896cSGreg Roach        'Ś'       => ['0', '4', '4', '4'],
2364096896cSGreg Roach        'Š'       => ['0', '4', '4', '4'],
2374096896cSGreg Roach        'Ş'       => ['0', '4', '4', '4'],
2384096896cSGreg Roach        'SC'      => ['0', '2', '4', '4'],
2394096896cSGreg Roach        'ŠČ'      => ['0', '2', '4', '4'],
2404096896cSGreg Roach        'SCH'     => ['0', '4', '4', '4'],
2414096896cSGreg Roach        'SCHD'    => ['0', '2', '43', '43'],
2424096896cSGreg Roach        'SCHT'    => ['0', '2', '43', '43'],
2434096896cSGreg Roach        'SCHTCH'  => ['0', '2', '4', '4'],
2444096896cSGreg Roach        'SCHTSCH' => ['0', '2', '4', '4'],
2454096896cSGreg Roach        'SCHTSH'  => ['0', '2', '4', '4'],
2464096896cSGreg Roach        'SD'      => ['0', '2', '43', '43'],
2474096896cSGreg Roach        'SH'      => ['0', '4', '4', '4'],
2484096896cSGreg Roach        'SHCH'    => ['0', '2', '4', '4'],
2494096896cSGreg Roach        'SHD'     => ['0', '2', '43', '43'],
2504096896cSGreg Roach        'SHT'     => ['0', '2', '43', '43'],
2514096896cSGreg Roach        'SHTCH'   => ['0', '2', '4', '4'],
2524096896cSGreg Roach        'SHTSH'   => ['0', '2', '4', '4'],
2534096896cSGreg Roach        'ß'       => ['0', '', '4', '4'],
2544096896cSGreg Roach        'ST'      => ['0', '2', '43', '43'],
2554096896cSGreg Roach        'STCH'    => ['0', '2', '4', '4'],
2564096896cSGreg Roach        'STRS'    => ['0', '2', '4', '4'],
2574096896cSGreg Roach        'STRZ'    => ['0', '2', '4', '4'],
2584096896cSGreg Roach        'STSCH'   => ['0', '2', '4', '4'],
2594096896cSGreg Roach        'STSH'    => ['0', '2', '4', '4'],
2604096896cSGreg Roach        'SSZ'     => ['0', '4', '4', '4'],
2614096896cSGreg Roach        'SZ'      => ['0', '4', '4', '4'],
2624096896cSGreg Roach        'SZCS'    => ['0', '2', '4', '4'],
2634096896cSGreg Roach        'SZCZ'    => ['0', '2', '4', '4'],
2644096896cSGreg Roach        'SZD'     => ['0', '2', '43', '43'],
2654096896cSGreg Roach        'SZT'     => ['0', '2', '43', '43'],
2664096896cSGreg Roach        'T'       => ['0', '3', '3', '3'],
2674096896cSGreg Roach        'Ť'       => ['0', '3', '3', '3'],
2684096896cSGreg Roach        'Ţ'       => ['0', '3', '3', '3', '4', '4', '4'],
2694096896cSGreg Roach        'TC'      => ['0', '4', '4', '4'],
2704096896cSGreg Roach        'TCH'     => ['0', '4', '4', '4'],
2714096896cSGreg Roach        'TH'      => ['0', '3', '3', '3'],
2724096896cSGreg Roach        'TRS'     => ['0', '4', '4', '4'],
2734096896cSGreg Roach        'TRZ'     => ['0', '4', '4', '4'],
2744096896cSGreg Roach        'TS'      => ['0', '4', '4', '4'],
2754096896cSGreg Roach        'TSCH'    => ['0', '4', '4', '4'],
2764096896cSGreg Roach        'TSH'     => ['0', '4', '4', '4'],
2774096896cSGreg Roach        'TSZ'     => ['0', '4', '4', '4'],
2784096896cSGreg Roach        'TTCH'    => ['0', '4', '4', '4'],
2794096896cSGreg Roach        'TTS'     => ['0', '4', '4', '4'],
2804096896cSGreg Roach        'TTSCH'   => ['0', '4', '4', '4'],
2814096896cSGreg Roach        'TTSZ'    => ['0', '4', '4', '4'],
2824096896cSGreg Roach        'TTZ'     => ['0', '4', '4', '4'],
2834096896cSGreg Roach        'TZ'      => ['0', '4', '4', '4'],
2844096896cSGreg Roach        'TZS'     => ['0', '4', '4', '4'],
2854096896cSGreg Roach        'U'       => ['1', '0', '', ''],
2864096896cSGreg Roach        'Ù'       => ['1', '0', '', ''],
2874096896cSGreg Roach        'Ú'       => ['1', '0', '', ''],
2884096896cSGreg Roach        'Û'       => ['1', '0', '', ''],
2894096896cSGreg Roach        'Ü'       => ['1', '0', '', ''],
2904096896cSGreg Roach        'Ũ'       => ['1', '0', '', ''],
2914096896cSGreg Roach        'Ū'       => ['1', '0', '', ''],
2924096896cSGreg Roach        'Ů'       => ['1', '0', '', ''],
2934096896cSGreg Roach        'Ű'       => ['1', '0', '', ''],
2944096896cSGreg Roach        'Ų'       => ['1', '0', '', ''],
2954096896cSGreg Roach        'Ư'       => ['1', '0', '', ''],
2964096896cSGreg Roach        'Ụ'       => ['1', '0', '', ''],
2974096896cSGreg Roach        'Ủ'       => ['1', '0', '', ''],
2984096896cSGreg Roach        'Ứ'       => ['1', '0', '', ''],
2994096896cSGreg Roach        'Ừ'       => ['1', '0', '', ''],
3004096896cSGreg Roach        'Ử'       => ['1', '0', '', ''],
3014096896cSGreg Roach        'Ữ'       => ['1', '0', '', ''],
3024096896cSGreg Roach        'Ự'       => ['1', '0', '', ''],
3034096896cSGreg Roach        'UE'      => ['1', '0', '', ''],
3044096896cSGreg Roach        'UI'      => ['1', '0', '1', ''],
3054096896cSGreg Roach        'UJ'      => ['1', '0', '1', ''],
3064096896cSGreg Roach        'UY'      => ['1', '0', '1', ''],
3074096896cSGreg Roach        'UW'      => ['1', '0', '1', '', '0', '7', '7'],
3084096896cSGreg Roach        'V'       => ['0', '7', '7', '7'],
3094096896cSGreg Roach        'W'       => ['0', '7', '7', '7'],
3104096896cSGreg Roach        'X'       => ['0', '5', '54', '54'],
3114096896cSGreg Roach        'Y'       => ['1', '1', '', ''],
3124096896cSGreg Roach        'Ý'       => ['1', '1', '', ''],
3134096896cSGreg Roach        'Ỳ'       => ['1', '1', '', ''],
3144096896cSGreg Roach        'Ỵ'       => ['1', '1', '', ''],
3154096896cSGreg Roach        'Ỷ'       => ['1', '1', '', ''],
3164096896cSGreg Roach        'Ỹ'       => ['1', '1', '', ''],
3174096896cSGreg Roach        'Z'       => ['0', '4', '4', '4'],
3184096896cSGreg Roach        'Ź'       => ['0', '4', '4', '4'],
3194096896cSGreg Roach        'Ż'       => ['0', '4', '4', '4'],
3204096896cSGreg Roach        'Ž'       => ['0', '4', '4', '4'],
3214096896cSGreg Roach        'ZD'      => ['0', '2', '43', '43'],
3224096896cSGreg Roach        'ZDZ'     => ['0', '2', '4', '4'],
3234096896cSGreg Roach        'ZDZH'    => ['0', '2', '4', '4'],
3244096896cSGreg Roach        'ZH'      => ['0', '4', '4', '4'],
3254096896cSGreg Roach        'ZHD'     => ['0', '2', '43', '43'],
3264096896cSGreg Roach        'ZHDZH'   => ['0', '2', '4', '4'],
3274096896cSGreg Roach        'ZS'      => ['0', '4', '4', '4'],
3284096896cSGreg Roach        'ZSCH'    => ['0', '4', '4', '4'],
3294096896cSGreg Roach        'ZSH'     => ['0', '4', '4', '4'],
3304096896cSGreg Roach        'ZZS'     => ['0', '4', '4', '4'],
331a25f0a04SGreg Roach        // Cyrillic alphabet
3324096896cSGreg Roach        'А'       => ['1', '0', '', ''],
3334096896cSGreg Roach        'Б'       => ['0', '7', '7', '7'],
3344096896cSGreg Roach        'В'       => ['0', '7', '7', '7'],
3354096896cSGreg Roach        'Г'       => ['0', '5', '5', '5'],
3364096896cSGreg Roach        'Д'       => ['0', '3', '3', '3'],
3374096896cSGreg Roach        'ДЗ'      => ['0', '4', '4', '4'],
3384096896cSGreg Roach        'Е'       => ['1', '0', '', ''],
3394096896cSGreg Roach        'Ё'       => ['1', '0', '', ''],
3404096896cSGreg Roach        'Ж'       => ['0', '4', '4', '4'],
3414096896cSGreg Roach        'З'       => ['0', '4', '4', '4'],
3424096896cSGreg Roach        'И'       => ['1', '0', '', ''],
3434096896cSGreg Roach        'Й'       => ['1', '1', '', '', '4', '4', '4'],
3444096896cSGreg Roach        'К'       => ['0', '5', '5', '5'],
3454096896cSGreg Roach        'Л'       => ['0', '8', '8', '8'],
3464096896cSGreg Roach        'М'       => ['0', '6', '6', '6'],
3474096896cSGreg Roach        'Н'       => ['0', '6', '6', '6'],
3484096896cSGreg Roach        'О'       => ['1', '0', '', ''],
3494096896cSGreg Roach        'П'       => ['0', '7', '7', '7'],
3504096896cSGreg Roach        'Р'       => ['0', '9', '9', '9'],
3514096896cSGreg Roach        'РЖ'      => ['0', '4', '4', '4'],
3524096896cSGreg Roach        'С'       => ['0', '4', '4', '4'],
3534096896cSGreg Roach        'Т'       => ['0', '3', '3', '3'],
3544096896cSGreg Roach        'У'       => ['1', '0', '', ''],
3554096896cSGreg Roach        'Ф'       => ['0', '7', '7', '7'],
3564096896cSGreg Roach        'Х'       => ['0', '5', '5', '5'],
3574096896cSGreg Roach        'Ц'       => ['0', '4', '4', '4'],
3584096896cSGreg Roach        'Ч'       => ['0', '4', '4', '4'],
3594096896cSGreg Roach        'Ш'       => ['0', '4', '4', '4'],
3604096896cSGreg Roach        'Щ'       => ['0', '2', '4', '4'],
3614096896cSGreg Roach        'Ъ'       => ['0', '', '', ''],
3624096896cSGreg Roach        'Ы'       => ['0', '1', '', ''],
3634096896cSGreg Roach        'Ь'       => ['0', '', '', ''],
3644096896cSGreg Roach        'Э'       => ['1', '0', '', ''],
3654096896cSGreg Roach        'Ю'       => ['0', '1', '', ''],
3664096896cSGreg Roach        'Я'       => ['0', '1', '', ''],
367a25f0a04SGreg Roach        // Greek alphabet
3684096896cSGreg Roach        'Α'       => ['1', '0', '', ''],
3694096896cSGreg Roach        'Ά'       => ['1', '0', '', ''],
3704096896cSGreg Roach        'ΑΙ'      => ['1', '0', '1', ''],
3714096896cSGreg Roach        'ΑΥ'      => ['1', '0', '1', ''],
3724096896cSGreg Roach        'Β'       => ['0', '7', '7', '7'],
3734096896cSGreg Roach        'Γ'       => ['0', '5', '5', '5'],
3744096896cSGreg Roach        'Δ'       => ['0', '3', '3', '3'],
3754096896cSGreg Roach        'Ε'       => ['1', '0', '', ''],
3764096896cSGreg Roach        'Έ'       => ['1', '0', '', ''],
3774096896cSGreg Roach        'ΕΙ'      => ['1', '0', '1', ''],
3784096896cSGreg Roach        'ΕΥ'      => ['1', '1', '1', ''],
3794096896cSGreg Roach        'Ζ'       => ['0', '4', '4', '4'],
3804096896cSGreg Roach        'Η'       => ['1', '0', '', ''],
3814096896cSGreg Roach        'Ή'       => ['1', '0', '', ''],
3824096896cSGreg Roach        'Θ'       => ['0', '3', '3', '3'],
3834096896cSGreg Roach        'Ι'       => ['1', '0', '', ''],
3844096896cSGreg Roach        'Ί'       => ['1', '0', '', ''],
3854096896cSGreg Roach        'Ϊ'       => ['1', '0', '', ''],
3864096896cSGreg Roach        'ΐ'       => ['1', '0', '', ''],
3874096896cSGreg Roach        'Κ'       => ['0', '5', '5', '5'],
3884096896cSGreg Roach        'Λ'       => ['0', '8', '8', '8'],
3894096896cSGreg Roach        'Μ'       => ['0', '6', '6', '6'],
3904096896cSGreg Roach        'ΜΠ'      => ['0', '7', '7', '7'],
3914096896cSGreg Roach        'Ν'       => ['0', '6', '6', '6'],
3924096896cSGreg Roach        'ΝΤ'      => ['0', '3', '3', '3'],
3934096896cSGreg Roach        'Ξ'       => ['0', '5', '54', '54'],
3944096896cSGreg Roach        'Ο'       => ['1', '0', '', ''],
3954096896cSGreg Roach        'Ό'       => ['1', '0', '', ''],
3964096896cSGreg Roach        'ΟΙ'      => ['1', '0', '1', ''],
3974096896cSGreg Roach        'ΟΥ'      => ['1', '0', '1', ''],
3984096896cSGreg Roach        'Π'       => ['0', '7', '7', '7'],
3994096896cSGreg Roach        'Ρ'       => ['0', '9', '9', '9'],
4004096896cSGreg Roach        'Σ'       => ['0', '4', '4', '4'],
4014096896cSGreg Roach        'ς'       => ['0', '', '', '4'],
4024096896cSGreg Roach        'Τ'       => ['0', '3', '3', '3'],
4034096896cSGreg Roach        'ΤΖ'      => ['0', '4', '4', '4'],
4044096896cSGreg Roach        'ΤΣ'      => ['0', '4', '4', '4'],
4054096896cSGreg Roach        'Υ'       => ['1', '1', '', ''],
4064096896cSGreg Roach        'Ύ'       => ['1', '1', '', ''],
4074096896cSGreg Roach        'Ϋ'       => ['1', '1', '', ''],
4084096896cSGreg Roach        'ΰ'       => ['1', '1', '', ''],
4094096896cSGreg Roach        'ΥΚ'      => ['1', '5', '5', '5'],
4104096896cSGreg Roach        'ΥΥ'      => ['1', '65', '65', '65'],
4114096896cSGreg Roach        'Φ'       => ['0', '7', '7', '7'],
4124096896cSGreg Roach        'Χ'       => ['0', '5', '5', '5'],
4134096896cSGreg Roach        'Ψ'       => ['0', '7', '7', '7'],
4144096896cSGreg Roach        'Ω'       => ['1', '0', '', ''],
4154096896cSGreg Roach        'Ώ'       => ['1', '0', '', ''],
416a25f0a04SGreg Roach        // Hebrew alphabet
4174096896cSGreg Roach        'א'       => ['1', '0', '', ''],
4184096896cSGreg Roach        'או'      => ['1', '0', '7', ''],
4194096896cSGreg Roach        'אג'      => ['1', '4', '4', '4', '5', '5', '5', '34', '34', '34'],
4204096896cSGreg Roach        'בב'      => ['0', '7', '7', '7', '77', '77', '77'],
4214096896cSGreg Roach        'ב'       => ['0', '7', '7', '7'],
4224096896cSGreg Roach        'גג'      => ['0', '4', '4', '4', '5', '5', '5', '45', '45', '45', '55', '55', '55', '54', '54', '54'],
4234096896cSGreg Roach        'גד'      => ['0', '43', '43', '43', '53', '53', '53'],
4244096896cSGreg Roach        'גה'      => ['0', '45', '45', '45', '55', '55', '55'],
4254096896cSGreg Roach        'גז'      => ['0', '44', '44', '44', '45', '45', '45'],
4264096896cSGreg Roach        'גח'      => ['0', '45', '45', '45', '55', '55', '55'],
4274096896cSGreg Roach        'גכ'      => ['0', '45', '45', '45', '55', '55', '55'],
4284096896cSGreg Roach        'גך'      => ['0', '45', '45', '45', '55', '55', '55'],
4294096896cSGreg Roach        'גצ'      => ['0', '44', '44', '44', '45', '45', '45'],
4304096896cSGreg Roach        'גץ'      => ['0', '44', '44', '44', '45', '45', '45'],
4314096896cSGreg Roach        'גק'      => ['0', '45', '45', '45', '54', '54', '54'],
4324096896cSGreg Roach        'גש'      => ['0', '44', '44', '44', '54', '54', '54'],
4334096896cSGreg Roach        'גת'      => ['0', '43', '43', '43', '53', '53', '53'],
4344096896cSGreg Roach        'ג'       => ['0', '4', '4', '4', '5', '5', '5'],
4354096896cSGreg Roach        'דז'      => ['0', '4', '4', '4'],
4364096896cSGreg Roach        'דד'      => ['0', '3', '3', '3', '33', '33', '33'],
4374096896cSGreg Roach        'דט'      => ['0', '33', '33', '33'],
4384096896cSGreg Roach        'דש'      => ['0', '4', '4', '4'],
4394096896cSGreg Roach        'דצ'      => ['0', '4', '4', '4'],
4404096896cSGreg Roach        'דץ'      => ['0', '4', '4', '4'],
4414096896cSGreg Roach        'ד'       => ['0', '3', '3', '3'],
4424096896cSGreg Roach        'הג'      => ['0', '54', '54', '54', '55', '55', '55'],
4434096896cSGreg Roach        'הכ'      => ['0', '55', '55', '55'],
4444096896cSGreg Roach        'הח'      => ['0', '55', '55', '55'],
4454096896cSGreg Roach        'הק'      => ['0', '55', '55', '55', '5', '5', '5'],
4464096896cSGreg Roach        'הה'      => ['0', '5', '5', '', '55', '55', ''],
4474096896cSGreg Roach        'ה'       => ['0', '5', '5', ''],
4484096896cSGreg Roach        'וי'      => ['1', '', '', '', '7', '7', '7'],
4494096896cSGreg Roach        'ו'       => ['1', '7', '7', '7', '7', '', ''],
4504096896cSGreg Roach        'וו'      => ['1', '7', '7', '7', '7', '', ''],
4514096896cSGreg Roach        'וופ'     => ['1', '7', '7', '7', '77', '77', '77'],
4524096896cSGreg Roach        'זש'      => ['0', '4', '4', '4', '44', '44', '44'],
4534096896cSGreg Roach        'זדז'     => ['0', '2', '4', '4'],
4544096896cSGreg Roach        'ז'       => ['0', '4', '4', '4'],
4554096896cSGreg Roach        'זג'      => ['0', '44', '44', '44', '45', '45', '45'],
4564096896cSGreg Roach        'זז'      => ['0', '4', '4', '4', '44', '44', '44'],
4574096896cSGreg Roach        'זס'      => ['0', '44', '44', '44'],
4584096896cSGreg Roach        'זצ'      => ['0', '44', '44', '44'],
4594096896cSGreg Roach        'זץ'      => ['0', '44', '44', '44'],
4604096896cSGreg Roach        'חג'      => ['0', '54', '54', '54', '53', '53', '53'],
4614096896cSGreg Roach        'חח'      => ['0', '5', '5', '5', '55', '55', '55'],
4624096896cSGreg Roach        'חק'      => ['0', '55', '55', '55', '5', '5', '5'],
4634096896cSGreg Roach        'חכ'      => ['0', '45', '45', '45', '55', '55', '55'],
4644096896cSGreg Roach        'חס'      => ['0', '5', '54', '54'],
4654096896cSGreg Roach        'חש'      => ['0', '5', '54', '54'],
4664096896cSGreg Roach        'ח'       => ['0', '5', '5', '5'],
4674096896cSGreg Roach        'טש'      => ['0', '4', '4', '4'],
4684096896cSGreg Roach        'טד'      => ['0', '33', '33', '33'],
4694096896cSGreg Roach        'טי'      => ['0', '3', '3', '3', '4', '4', '4', '3', '3', '34'],
4704096896cSGreg Roach        'טת'      => ['0', '33', '33', '33'],
4714096896cSGreg Roach        'טט'      => ['0', '3', '3', '3', '33', '33', '33'],
4724096896cSGreg Roach        'ט'       => ['0', '3', '3', '3'],
4734096896cSGreg Roach        'י'       => ['1', '1', '', ''],
4744096896cSGreg Roach        'יא'      => ['1', '1', '', '', '1', '1', '1'],
4754096896cSGreg Roach        'כג'      => ['0', '55', '55', '55', '54', '54', '54'],
4764096896cSGreg Roach        'כש'      => ['0', '5', '54', '54'],
4774096896cSGreg Roach        'כס'      => ['0', '5', '54', '54'],
4784096896cSGreg Roach        'ככ'      => ['0', '5', '5', '5', '55', '55', '55'],
4794096896cSGreg Roach        'כך'      => ['0', '5', '5', '5', '55', '55', '55'],
4804096896cSGreg Roach        'כ'       => ['0', '5', '5', '5'],
4814096896cSGreg Roach        'כח'      => ['0', '55', '55', '55', '5', '5', '5'],
4824096896cSGreg Roach        'ך'       => ['0', '', '5', '5'],
4834096896cSGreg Roach        'ל'       => ['0', '8', '8', '8'],
4844096896cSGreg Roach        'לל'      => ['0', '88', '88', '88', '8', '8', '8'],
4854096896cSGreg Roach        'מנ'      => ['0', '66', '66', '66'],
4864096896cSGreg Roach        'מן'      => ['0', '66', '66', '66'],
4874096896cSGreg Roach        'ממ'      => ['0', '6', '6', '6', '66', '66', '66'],
4884096896cSGreg Roach        'מם'      => ['0', '6', '6', '6', '66', '66', '66'],
4894096896cSGreg Roach        'מ'       => ['0', '6', '6', '6'],
4904096896cSGreg Roach        'ם'       => ['0', '', '6', '6'],
4914096896cSGreg Roach        'נמ'      => ['0', '66', '66', '66'],
4924096896cSGreg Roach        'נם'      => ['0', '66', '66', '66'],
4934096896cSGreg Roach        'ננ'      => ['0', '6', '6', '6', '66', '66', '66'],
4944096896cSGreg Roach        'נן'      => ['0', '6', '6', '6', '66', '66', '66'],
4954096896cSGreg Roach        'נ'       => ['0', '6', '6', '6'],
4964096896cSGreg Roach        'ן'       => ['0', '', '6', '6'],
4974096896cSGreg Roach        'סתש'     => ['0', '2', '4', '4'],
4984096896cSGreg Roach        'סתז'     => ['0', '2', '4', '4'],
4994096896cSGreg Roach        'סטז'     => ['0', '2', '4', '4'],
5004096896cSGreg Roach        'סטש'     => ['0', '2', '4', '4'],
5014096896cSGreg Roach        'סצד'     => ['0', '2', '4', '4'],
5024096896cSGreg Roach        'סט'      => ['0', '2', '4', '4', '43', '43', '43'],
5034096896cSGreg Roach        'סת'      => ['0', '2', '4', '4', '43', '43', '43'],
5044096896cSGreg Roach        'סג'      => ['0', '44', '44', '44', '4', '4', '4'],
5054096896cSGreg Roach        'סס'      => ['0', '4', '4', '4', '44', '44', '44'],
5064096896cSGreg Roach        'סצ'      => ['0', '44', '44', '44'],
5074096896cSGreg Roach        'סץ'      => ['0', '44', '44', '44'],
5084096896cSGreg Roach        'סז'      => ['0', '44', '44', '44'],
5094096896cSGreg Roach        'סש'      => ['0', '44', '44', '44'],
5104096896cSGreg Roach        'ס'       => ['0', '4', '4', '4'],
5114096896cSGreg Roach        'ע'       => ['1', '0', '', ''],
5124096896cSGreg Roach        'פב'      => ['0', '7', '7', '7', '77', '77', '77'],
5134096896cSGreg Roach        'פוו'     => ['0', '7', '7', '7', '77', '77', '77'],
5144096896cSGreg Roach        'פפ'      => ['0', '7', '7', '7', '77', '77', '77'],
5154096896cSGreg Roach        'פף'      => ['0', '7', '7', '7', '77', '77', '77'],
5164096896cSGreg Roach        'פ'       => ['0', '7', '7', '7'],
5174096896cSGreg Roach        'ף'       => ['0', '', '7', '7'],
5184096896cSGreg Roach        'צג'      => ['0', '44', '44', '44', '45', '45', '45'],
5194096896cSGreg Roach        'צז'      => ['0', '44', '44', '44'],
5204096896cSGreg Roach        'צס'      => ['0', '44', '44', '44'],
5214096896cSGreg Roach        'צצ'      => ['0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54', '45', '45', '45'],
5224096896cSGreg Roach        'צץ'      => ['0', '4', '4', '4', '5', '5', '5', '44', '44', '44', '54', '54', '54'],
5234096896cSGreg Roach        'צש'      => ['0', '44', '44', '44', '4', '4', '4', '5', '5', '5'],
5244096896cSGreg Roach        'צ'       => ['0', '4', '4', '4', '5', '5', '5'],
5254096896cSGreg Roach        'ץ'       => ['0', '', '4', '4'],
5264096896cSGreg Roach        'קה'      => ['0', '55', '55', '5'],
5274096896cSGreg Roach        'קס'      => ['0', '5', '54', '54'],
5284096896cSGreg Roach        'קש'      => ['0', '5', '54', '54'],
5294096896cSGreg Roach        'קק'      => ['0', '5', '5', '5', '55', '55', '55'],
5304096896cSGreg Roach        'קח'      => ['0', '55', '55', '55'],
5314096896cSGreg Roach        'קכ'      => ['0', '55', '55', '55'],
5324096896cSGreg Roach        'קך'      => ['0', '55', '55', '55'],
5334096896cSGreg Roach        'קג'      => ['0', '55', '55', '55', '54', '54', '54'],
5344096896cSGreg Roach        'ק'       => ['0', '5', '5', '5'],
5354096896cSGreg Roach        'רר'      => ['0', '99', '99', '99', '9', '9', '9'],
5364096896cSGreg Roach        'ר'       => ['0', '9', '9', '9'],
5374096896cSGreg Roach        'שטז'     => ['0', '2', '4', '4'],
5384096896cSGreg Roach        'שתש'     => ['0', '2', '4', '4'],
5394096896cSGreg Roach        'שתז'     => ['0', '2', '4', '4'],
5404096896cSGreg Roach        'שטש'     => ['0', '2', '4', '4'],
5414096896cSGreg Roach        'שד'      => ['0', '2', '43', '43'],
5424096896cSGreg Roach        'שז'      => ['0', '44', '44', '44'],
5434096896cSGreg Roach        'שס'      => ['0', '44', '44', '44'],
5444096896cSGreg Roach        'שת'      => ['0', '2', '43', '43'],
5454096896cSGreg Roach        'שג'      => ['0', '4', '4', '4', '44', '44', '44', '4', '43', '43'],
5464096896cSGreg Roach        'שט'      => ['0', '2', '43', '43', '44', '44', '44'],
5474096896cSGreg Roach        'שצ'      => ['0', '44', '44', '44', '45', '45', '45'],
5484096896cSGreg Roach        'שץ'      => ['0', '44', '', '44', '45', '', '45'],
5494096896cSGreg Roach        'שש'      => ['0', '4', '4', '4', '44', '44', '44'],
5504096896cSGreg Roach        'ש'       => ['0', '4', '4', '4'],
5514096896cSGreg Roach        'תג'      => ['0', '34', '34', '34'],
5524096896cSGreg Roach        'תז'      => ['0', '34', '34', '34'],
5534096896cSGreg Roach        'תש'      => ['0', '4', '4', '4'],
5544096896cSGreg Roach        'תת'      => ['0', '3', '3', '3', '4', '4', '4', '33', '33', '33', '44', '44', '44', '34', '34', '34', '43', '43', '43'],
5554096896cSGreg Roach        'ת'       => ['0', '3', '3', '3', '4', '4', '4'],
556a25f0a04SGreg Roach        // Arabic alphabet
5574096896cSGreg Roach        'ا'       => ['1', '0', '', ''],
5584096896cSGreg Roach        'ب'       => ['0', '7', '7', '7'],
5594096896cSGreg Roach        'ت'       => ['0', '3', '3', '3'],
5604096896cSGreg Roach        'ث'       => ['0', '3', '3', '3'],
5614096896cSGreg Roach        'ج'       => ['0', '4', '4', '4'],
5624096896cSGreg Roach        'ح'       => ['0', '5', '5', '5'],
5634096896cSGreg Roach        'خ'       => ['0', '5', '5', '5'],
5644096896cSGreg Roach        'د'       => ['0', '3', '3', '3'],
5654096896cSGreg Roach        'ذ'       => ['0', '3', '3', '3'],
5664096896cSGreg Roach        'ر'       => ['0', '9', '9', '9'],
5674096896cSGreg Roach        'ز'       => ['0', '4', '4', '4'],
5684096896cSGreg Roach        'س'       => ['0', '4', '4', '4'],
5694096896cSGreg Roach        'ش'       => ['0', '4', '4', '4'],
5704096896cSGreg Roach        'ص'       => ['0', '4', '4', '4'],
5714096896cSGreg Roach        'ض'       => ['0', '3', '3', '3'],
5724096896cSGreg Roach        'ط'       => ['0', '3', '3', '3'],
5734096896cSGreg Roach        'ظ'       => ['0', '4', '4', '4'],
5744096896cSGreg Roach        'ع'       => ['1', '0', '', ''],
5754096896cSGreg Roach        'غ'       => ['0', '0', '', ''],
5764096896cSGreg Roach        'ف'       => ['0', '7', '7', '7'],
5774096896cSGreg Roach        'ق'       => ['0', '5', '5', '5'],
5784096896cSGreg Roach        'ك'       => ['0', '5', '5', '5'],
5794096896cSGreg Roach        'ل'       => ['0', '8', '8', '8'],
5804096896cSGreg Roach        'لا'      => ['0', '8', '8', '8'],
5814096896cSGreg Roach        'م'       => ['0', '6', '6', '6'],
5824096896cSGreg Roach        'ن'       => ['0', '6', '6', '6'],
5834096896cSGreg Roach        'هن'      => ['0', '66', '66', '66'],
5844096896cSGreg Roach        'ه'       => ['0', '5', '5', ''],
5854096896cSGreg Roach        'و'       => ['1', '', '', '', '7', '', ''],
5864096896cSGreg Roach        'ي'       => ['0', '1', '', ''],
5874096896cSGreg Roach        'آ'       => ['0', '1', '', ''],
5884096896cSGreg Roach        'ة'       => ['0', '', '', '3'],
5894096896cSGreg Roach        'ی'       => ['0', '1', '', ''],
5904096896cSGreg Roach        'ى'       => ['1', '1', '', ''],
59113abd6f3SGreg Roach    ];
592a25f0a04SGreg Roach
593a25f0a04SGreg Roach    /**
59416cfb0b9SGreg Roach     * Which algorithms are supported.
59516cfb0b9SGreg Roach     *
59624f2a3afSGreg Roach     * @return array<string>
59716cfb0b9SGreg Roach     */
59816cfb0b9SGreg Roach    public static function getAlgorithms(): array
59916cfb0b9SGreg Roach    {
60016cfb0b9SGreg Roach        return [
601ad3143ccSGreg Roach            /* I18N: https://en.wikipedia.org/wiki/Soundex */
60216cfb0b9SGreg Roach            'std' => I18N::translate('Russell'),
603ad3143ccSGreg Roach            /* I18N: https://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */
60416cfb0b9SGreg Roach            'dm'  => I18N::translate('Daitch-Mokotoff'),
60516cfb0b9SGreg Roach        ];
60616cfb0b9SGreg Roach    }
60716cfb0b9SGreg Roach
60816cfb0b9SGreg Roach    /**
60916cfb0b9SGreg Roach     * Is there a match between two soundex codes?
61016cfb0b9SGreg Roach     *
61116cfb0b9SGreg Roach     * @param string $soundex1
61216cfb0b9SGreg Roach     * @param string $soundex2
61316cfb0b9SGreg Roach     *
61416cfb0b9SGreg Roach     * @return bool
61516cfb0b9SGreg Roach     */
61624f2a3afSGreg Roach    public static function compare(string $soundex1, string $soundex2): bool
61716cfb0b9SGreg Roach    {
61816cfb0b9SGreg Roach        if ($soundex1 !== '' && $soundex2 !== '') {
61954c1ab5eSGreg Roach            return array_intersect(explode(':', $soundex1), explode(':', $soundex2)) !== [];
62016cfb0b9SGreg Roach        }
62116cfb0b9SGreg Roach
62216cfb0b9SGreg Roach        return false;
62316cfb0b9SGreg Roach    }
62416cfb0b9SGreg Roach
62516cfb0b9SGreg Roach    /**
62616cfb0b9SGreg Roach     * Generate Russell soundex codes for a given text.
62716cfb0b9SGreg Roach     *
62816cfb0b9SGreg Roach     * @param string $text
62916cfb0b9SGreg Roach     *
63016cfb0b9SGreg Roach     * @return string
63116cfb0b9SGreg Roach     */
63216cfb0b9SGreg Roach    public static function russell(string $text): string
63316cfb0b9SGreg Roach    {
63416cfb0b9SGreg Roach        $words         = explode(' ', $text);
63516cfb0b9SGreg Roach        $soundex_array = [];
63616cfb0b9SGreg Roach
63716cfb0b9SGreg Roach        foreach ($words as $word) {
63816cfb0b9SGreg Roach            $soundex = soundex($word);
63916cfb0b9SGreg Roach
64016cfb0b9SGreg Roach            // Only return codes from recognisable sounds
64116cfb0b9SGreg Roach            if ($soundex !== '0000') {
64216cfb0b9SGreg Roach                $soundex_array[] = $soundex;
64316cfb0b9SGreg Roach            }
64416cfb0b9SGreg Roach        }
64516cfb0b9SGreg Roach
64616cfb0b9SGreg Roach        // Combine words, e.g. “New York” as “Newyork”
64716cfb0b9SGreg Roach        if (count($words) > 1) {
648e364afe4SGreg Roach            $soundex_array[] = soundex(str_replace(' ', '', $text));
64916cfb0b9SGreg Roach        }
65016cfb0b9SGreg Roach
65116cfb0b9SGreg Roach        // A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters)
65216cfb0b9SGreg Roach        $soundex_array = array_slice(array_unique($soundex_array), 0, 51);
65316cfb0b9SGreg Roach
65416cfb0b9SGreg Roach        return implode(':', $soundex_array);
65516cfb0b9SGreg Roach    }
65616cfb0b9SGreg Roach
65716cfb0b9SGreg Roach    /**
65816cfb0b9SGreg Roach     * Generate Daitch–Mokotoff soundex codes for a given text.
65916cfb0b9SGreg Roach     *
66016cfb0b9SGreg Roach     * @param string $text
66116cfb0b9SGreg Roach     *
66216cfb0b9SGreg Roach     * @return string
66316cfb0b9SGreg Roach     */
66416cfb0b9SGreg Roach    public static function daitchMokotoff(string $text): string
66516cfb0b9SGreg Roach    {
66616cfb0b9SGreg Roach        $words         = explode(' ', $text);
66716cfb0b9SGreg Roach        $soundex_array = [];
66816cfb0b9SGreg Roach
66916cfb0b9SGreg Roach        foreach ($words as $word) {
67016cfb0b9SGreg Roach            $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word));
67116cfb0b9SGreg Roach        }
67216cfb0b9SGreg Roach        // Combine words, e.g. “New York” as “Newyork”
67316cfb0b9SGreg Roach        if (count($words) > 1) {
674e364afe4SGreg Roach            $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(str_replace(' ', '', $text)));
67516cfb0b9SGreg Roach        }
67616cfb0b9SGreg Roach
67716cfb0b9SGreg Roach        // A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters)
67816cfb0b9SGreg Roach        $soundex_array = array_slice(array_unique($soundex_array), 0, 36);
67916cfb0b9SGreg Roach
68016cfb0b9SGreg Roach        return implode(':', $soundex_array);
68116cfb0b9SGreg Roach    }
68216cfb0b9SGreg Roach
68316cfb0b9SGreg Roach    /**
68476692c8bSGreg Roach     * Calculate the Daitch-Mokotoff soundex for a word.
68576692c8bSGreg Roach     *
686a25f0a04SGreg Roach     * @param string $name
687a25f0a04SGreg Roach     *
68824f2a3afSGreg Roach     * @return array<string> List of possible DM codes for the word.
689a25f0a04SGreg Roach     */
69024f2a3afSGreg Roach    private static function daitchMokotoffWord(string $name): array
691c1010edaSGreg Roach    {
692a25f0a04SGreg Roach        // Apply special transformation rules to the input string
693a25f0a04SGreg Roach        $name = I18N::strtoupper($name);
69416cfb0b9SGreg Roach        foreach (self::TRANSFORM_NAMES as $transformRule) {
695a25f0a04SGreg Roach            $name = str_replace($transformRule[0], $transformRule[1], $name);
696a25f0a04SGreg Roach        }
697a25f0a04SGreg Roach
698a25f0a04SGreg Roach        // Initialize
699a25f0a04SGreg Roach        $name_script = I18N::textScript($name);
700dd71ff6bSGreg Roach        $noVowels    = $name_script === 'Hebr' || $name_script === 'Arab';
701a25f0a04SGreg Roach
702a25f0a04SGreg Roach        $lastPos         = strlen($name) - 1;
703a25f0a04SGreg Roach        $currPos         = 0;
704a25f0a04SGreg Roach        $state           = 1; // 1: start of input string, 2: before vowel, 3: other
70513abd6f3SGreg Roach        $result          = []; // accumulate complete 6-digit D-M codes here
70613abd6f3SGreg Roach        $partialResult   = []; // accumulate incomplete D-M codes here
70713abd6f3SGreg Roach        $partialResult[] = ['!']; // initialize 1st partial result  ('!' stops "duplicate sound" check)
708a25f0a04SGreg Roach
709a25f0a04SGreg Roach        // Loop through the input string.
710a25f0a04SGreg Roach        // Stop when the string is exhausted or when no more partial results remain
711a25f0a04SGreg Roach        while (count($partialResult) !== 0 && $currPos <= $lastPos) {
712a25f0a04SGreg Roach            // Find the DM coding table entry for the chunk at the current position
713a25f0a04SGreg Roach            $thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
714e364afe4SGreg Roach            while ($thisEntry !== '') {
71516cfb0b9SGreg Roach                if (isset(self::DM_SOUNDS[$thisEntry])) {
716a25f0a04SGreg Roach                    break;
717a25f0a04SGreg Roach                }
718a25f0a04SGreg Roach                $thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk
719a25f0a04SGreg Roach            }
720a25f0a04SGreg Roach            if ($thisEntry === '') {
721a25f0a04SGreg Roach                $currPos++; // Not in table: advance pointer to next byte
722a25f0a04SGreg Roach                continue; // and try again
723a25f0a04SGreg Roach            }
724a25f0a04SGreg Roach
72516cfb0b9SGreg Roach            $soundTableEntry = self::DM_SOUNDS[$thisEntry];
726a25f0a04SGreg Roach            $workingResult   = $partialResult;
72713abd6f3SGreg Roach            $partialResult   = [];
728a25f0a04SGreg Roach            $currPos += strlen($thisEntry);
729a25f0a04SGreg Roach
730a25f0a04SGreg Roach            // Not at beginning of input string
731e364afe4SGreg Roach            if ($state !== 1) {
732a25f0a04SGreg Roach                if ($currPos <= $lastPos) {
733a25f0a04SGreg Roach                    // Determine whether the next chunk is a vowel
734a25f0a04SGreg Roach                    $nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
735e364afe4SGreg Roach                    while ($nextEntry !== '') {
73616cfb0b9SGreg Roach                        if (isset(self::DM_SOUNDS[$nextEntry])) {
737a25f0a04SGreg Roach                            break;
738a25f0a04SGreg Roach                        }
739a25f0a04SGreg Roach                        $nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk
740a25f0a04SGreg Roach                    }
741a25f0a04SGreg Roach                } else {
742a25f0a04SGreg Roach                    $nextEntry = '';
743a25f0a04SGreg Roach                }
744e364afe4SGreg Roach                if ($nextEntry !== '' && self::DM_SOUNDS[$nextEntry][0] !== '0') {
745a25f0a04SGreg Roach                    $state = 2;
746a25f0a04SGreg Roach                } else {
747a25f0a04SGreg Roach                    // Next chunk is a vowel
748a25f0a04SGreg Roach                    $state = 3;
749a25f0a04SGreg Roach                }
750a25f0a04SGreg Roach            }
751a25f0a04SGreg Roach
752a25f0a04SGreg Roach            while ($state < count($soundTableEntry)) {
753a25f0a04SGreg Roach                // empty means 'ignore this sound in this state'
754e364afe4SGreg Roach                if ($soundTableEntry[$state] === '') {
755a25f0a04SGreg Roach                    foreach ($workingResult as $workingEntry) {
756a25f0a04SGreg Roach                        $tempEntry                        = $workingEntry;
757a25f0a04SGreg Roach                        $tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles'
758a25f0a04SGreg Roach                        $partialResult[]                  = $tempEntry;
759a25f0a04SGreg Roach                    }
760a25f0a04SGreg Roach                } else {
761a25f0a04SGreg Roach                    foreach ($workingResult as $workingEntry) {
762a25f0a04SGreg Roach                        if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) {
763a25f0a04SGreg Roach                            // Incoming sound isn't a duplicate of the previous sound
764a25f0a04SGreg Roach                            $workingEntry[] = $soundTableEntry[$state];
765e364afe4SGreg Roach                        } elseif ($noVowels) {
766a25f0a04SGreg Roach                            // Incoming sound is a duplicate of the previous sound
767a25f0a04SGreg Roach                            // For Hebrew and Arabic, we need to create a pair of D-M sound codes,
768a25f0a04SGreg Roach                            // one of the pair with only a single occurrence of the duplicate sound,
769a25f0a04SGreg Roach                            // the other with both occurrences
770a25f0a04SGreg Roach                            $workingEntry[] = $soundTableEntry[$state];
771a25f0a04SGreg Roach                        }
772e364afe4SGreg Roach
773a25f0a04SGreg Roach                        if (count($workingEntry) < 7) {
774a25f0a04SGreg Roach                            $partialResult[] = $workingEntry;
775a25f0a04SGreg Roach                        } else {
776a25f0a04SGreg Roach                            // This is the 6th code in the sequence
777a25f0a04SGreg Roach                            // We're looking for 7 entries because the first is '!' and doesn't count
778a25f0a04SGreg Roach                            $tempResult = str_replace('!', '', implode('', $workingEntry));
779a25f0a04SGreg Roach                            // Only return codes from recognisable sounds
780a25f0a04SGreg Roach                            if ($tempResult) {
781a25f0a04SGreg Roach                                $result[] = substr($tempResult . '000000', 0, 6);
782a25f0a04SGreg Roach                            }
783a25f0a04SGreg Roach                        }
784a25f0a04SGreg Roach                    }
785a25f0a04SGreg Roach                }
786e364afe4SGreg Roach                $state += 3; // Advance to next triplet while keeping the same basic state
787a25f0a04SGreg Roach            }
788a25f0a04SGreg Roach        }
789a25f0a04SGreg Roach
790a25f0a04SGreg Roach        // Zero-fill and copy all remaining partial results
791a25f0a04SGreg Roach        foreach ($partialResult as $workingEntry) {
792a25f0a04SGreg Roach            $tempResult = str_replace('!', '', implode('', $workingEntry));
793a25f0a04SGreg Roach            // Only return codes from recognisable sounds
794a25f0a04SGreg Roach            if ($tempResult) {
795a25f0a04SGreg Roach                $result[] = substr($tempResult . '000000', 0, 6);
796a25f0a04SGreg Roach            }
797a25f0a04SGreg Roach        }
798a25f0a04SGreg Roach
799a25f0a04SGreg Roach        return $result;
800a25f0a04SGreg Roach    }
801a25f0a04SGreg Roach}
802