xref: /webtrees/app/Soundex.php (revision 8fcd0d32e56ee262912bbdb593202cfd1cbc1615)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2019 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees;
19
20/**
21 * Phonetic matching of strings.
22 */
23class Soundex
24{
25    /**
26     * Which algorithms are supported.
27     *
28     * @return string[]
29     */
30    public static function getAlgorithms(): array
31    {
32        return [
33            /* I18N: http://en.wikipedia.org/wiki/Soundex */
34            'std' => I18N::translate('Russell'),
35            /* I18N: http://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */
36            'dm'  => I18N::translate('Daitch-Mokotoff'),
37        ];
38    }
39
40    /**
41     * Is there a match between two soundex codes?
42     *
43     * @param string $soundex1
44     * @param string $soundex2
45     *
46     * @return bool
47     */
48    public static function compare($soundex1, $soundex2): bool
49    {
50        if ($soundex1 !== '' && $soundex2 !== '') {
51            return !empty(array_intersect(explode(':', $soundex1), explode(':', $soundex2)));
52        }
53
54        return false;
55    }
56
57    /**
58     * Generate Russell soundex codes for a given text.
59     *
60     * @param string $text
61     *
62     * @return string
63     */
64    public static function russell(string $text): string
65    {
66        $words         = explode(' ', $text);
67        $soundex_array = [];
68
69        foreach ($words as $word) {
70            $soundex = soundex($word);
71
72            // Only return codes from recognisable sounds
73            if ($soundex !== '0000') {
74                $soundex_array[] = $soundex;
75            }
76        }
77
78        // Combine words, e.g. “New York” as “Newyork”
79        if (count($words) > 1) {
80            $soundex_array[] = soundex(strtr($text, ' ', ''));
81        }
82
83        // A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters)
84        $soundex_array = array_slice(array_unique($soundex_array), 0, 51);
85
86        return implode(':', $soundex_array);
87    }
88
89    /**
90     * Generate Daitch–Mokotoff soundex codes for a given text.
91     *
92     * @param string $text
93     *
94     * @return string
95     */
96    public static function daitchMokotoff(string $text): string
97    {
98        $words         = explode(' ', $text);
99        $soundex_array = [];
100
101        foreach ($words as $word) {
102            $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word));
103        }
104        // Combine words, e.g. “New York” as “Newyork”
105        if (count($words) > 1) {
106            $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(strtr($text, ' ', '')));
107        }
108
109        // A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters)
110        $soundex_array = array_slice(array_unique($soundex_array), 0, 36);
111
112        return implode(':', $soundex_array);
113    }
114
115    // Determine the Daitch–Mokotoff Soundex code for a word
116    // Original implementation by Gerry Kroll, and analysis by Meliza Amity
117
118    // Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!)
119    const MAXCHAR = 7;
120
121    /**
122     * Name transformation arrays.
123     * Used to transform the Name string to simplify the "sounds like" table.
124     * This is especially useful in Hebrew.
125     *
126     * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text)
127     * function call to achieve the desired transformations.
128     *
129     * Note about the use of "\x01":
130     * This code, which can’t legitimately occur in the kind of text we're dealing with,
131     * is used as a place-holder so that conditional string replacements can be done.
132     *
133     * @var string[][]
134     */
135    private static $transformNameTable = [
136        // Force Yiddish ligatures to be treated as separate letters
137        [
138            'װ',
139            'וו',
140        ],
141        [
142            'ײ',
143            'יי',
144        ],
145        [
146            'ױ',
147            'וי',
148        ],
149        [
150            'בו',
151            'בע',
152        ],
153        [
154            'פו',
155            'פע',
156        ],
157        [
158            'ומ',
159            'עמ',
160        ],
161        [
162            'ום',
163            'עם',
164        ],
165        [
166            'ונ',
167            'ענ',
168        ],
169        [
170            'ון',
171            'ען',
172        ],
173        [
174            'וו',
175            'ב',
176        ],
177        [
178            "\x01",
179            '',
180        ],
181        [
182            'ייה$',
183            "\x01ה",
184        ],
185        [
186            'ייע$',
187            "\x01ע",
188        ],
189        [
190            'יי',
191            'ע',
192        ],
193        [
194            "\x01",
195            'יי',
196        ],
197    ];
198
199    /**
200     * The DM sound coding table is organized this way:
201     * key: a variable-length string that corresponds to the UTF-8 character sequence
202     * represented by the table entry. Currently, that string can be up to 7
203     * bytes long. This maximum length is defined by the value of global variable
204     * $maxchar.
205     *
206     * value: an array as follows:
207     * [0]:  zero if not a vowel
208     * [1]:  sound value when this string is at the beginning of the word
209     * [2]:  sound value when this string is followed by a vowel
210     * [3]:  sound value for other cases
211     * [1],[2],[3] can be repeated several times to create branches in the code
212     * an empty sound value means "ignore in this state"
213     *
214     * @var string[][]
215     */
216    private static $dmsounds = [
217        'A'       => [
218            '1',
219            '0',
220            '',
221            '',
222        ],
223        'À'       => [
224            '1',
225            '0',
226            '',
227            '',
228        ],
229        'Á'       => [
230            '1',
231            '0',
232            '',
233            '',
234        ],
235        'Â'       => [
236            '1',
237            '0',
238            '',
239            '',
240        ],
241        'Ã'       => [
242            '1',
243            '0',
244            '',
245            '',
246        ],
247        'Ä'       => [
248            '1',
249            '0',
250            '1',
251            '',
252            '0',
253            '',
254            '',
255        ],
256        'Å'       => [
257            '1',
258            '0',
259            '',
260            '',
261        ],
262        'Ă'       => [
263            '1',
264            '0',
265            '',
266            '',
267        ],
268        'Ą'       => [
269            '1',
270            '',
271            '',
272            '',
273            '',
274            '',
275            '6',
276        ],
277        'Ạ'       => [
278            '1',
279            '0',
280            '',
281            '',
282        ],
283        'Ả'       => [
284            '1',
285            '0',
286            '',
287            '',
288        ],
289        'Ấ'       => [
290            '1',
291            '0',
292            '',
293            '',
294        ],
295        'Ầ'       => [
296            '1',
297            '0',
298            '',
299            '',
300        ],
301        'Ẩ'       => [
302            '1',
303            '0',
304            '',
305            '',
306        ],
307        'Ẫ'       => [
308            '1',
309            '0',
310            '',
311            '',
312        ],
313        'Ậ'       => [
314            '1',
315            '0',
316            '',
317            '',
318        ],
319        'Ắ'       => [
320            '1',
321            '0',
322            '',
323            '',
324        ],
325        'Ằ'       => [
326            '1',
327            '0',
328            '',
329            '',
330        ],
331        'Ẳ'       => [
332            '1',
333            '0',
334            '',
335            '',
336        ],
337        'Ẵ'       => [
338            '1',
339            '0',
340            '',
341            '',
342        ],
343        'Ặ'       => [
344            '1',
345            '0',
346            '',
347            '',
348        ],
349        'AE'      => [
350            '1',
351            '0',
352            '1',
353            '',
354        ],
355        'Æ'       => [
356            '1',
357            '0',
358            '1',
359            '',
360        ],
361        'AI'      => [
362            '1',
363            '0',
364            '1',
365            '',
366        ],
367        'AJ'      => [
368            '1',
369            '0',
370            '1',
371            '',
372        ],
373        'AU'      => [
374            '1',
375            '0',
376            '7',
377            '',
378        ],
379        'AV'      => [
380            '1',
381            '0',
382            '7',
383            '',
384            '7',
385            '7',
386            '7',
387        ],
388        'ÄU'      => [
389            '1',
390            '0',
391            '1',
392            '',
393        ],
394        'AY'      => [
395            '1',
396            '0',
397            '1',
398            '',
399        ],
400        'B'       => [
401            '0',
402            '7',
403            '7',
404            '7',
405        ],
406        'C'       => [
407            '0',
408            '5',
409            '5',
410            '5',
411            '34',
412            '4',
413            '4',
414        ],
415        'Ć'       => [
416            '0',
417            '4',
418            '4',
419            '4',
420        ],
421        'Č'       => [
422            '0',
423            '4',
424            '4',
425            '4',
426        ],
427        'Ç'       => [
428            '0',
429            '4',
430            '4',
431            '4',
432        ],
433        'CH'      => [
434            '0',
435            '5',
436            '5',
437            '5',
438            '34',
439            '4',
440            '4',
441        ],
442        'CHS'     => [
443            '0',
444            '5',
445            '54',
446            '54',
447        ],
448        'CK'      => [
449            '0',
450            '5',
451            '5',
452            '5',
453            '45',
454            '45',
455            '45',
456        ],
457        'CCS'     => [
458            '0',
459            '4',
460            '4',
461            '4',
462        ],
463        'CS'      => [
464            '0',
465            '4',
466            '4',
467            '4',
468        ],
469        'CSZ'     => [
470            '0',
471            '4',
472            '4',
473            '4',
474        ],
475        'CZ'      => [
476            '0',
477            '4',
478            '4',
479            '4',
480        ],
481        'CZS'     => [
482            '0',
483            '4',
484            '4',
485            '4',
486        ],
487        'D'       => [
488            '0',
489            '3',
490            '3',
491            '3',
492        ],
493        'Ď'       => [
494            '0',
495            '3',
496            '3',
497            '3',
498        ],
499        'Đ'       => [
500            '0',
501            '3',
502            '3',
503            '3',
504        ],
505        'DRS'     => [
506            '0',
507            '4',
508            '4',
509            '4',
510        ],
511        'DRZ'     => [
512            '0',
513            '4',
514            '4',
515            '4',
516        ],
517        'DS'      => [
518            '0',
519            '4',
520            '4',
521            '4',
522        ],
523        'DSH'     => [
524            '0',
525            '4',
526            '4',
527            '4',
528        ],
529        'DSZ'     => [
530            '0',
531            '4',
532            '4',
533            '4',
534        ],
535        'DT'      => [
536            '0',
537            '3',
538            '3',
539            '3',
540        ],
541        'DDZ'     => [
542            '0',
543            '4',
544            '4',
545            '4',
546        ],
547        'DDZS'    => [
548            '0',
549            '4',
550            '4',
551            '4',
552        ],
553        'DZ'      => [
554            '0',
555            '4',
556            '4',
557            '4',
558        ],
559        'DŹ'      => [
560            '0',
561            '4',
562            '4',
563            '4',
564        ],
565        'DŻ'      => [
566            '0',
567            '4',
568            '4',
569            '4',
570        ],
571        'DZH'     => [
572            '0',
573            '4',
574            '4',
575            '4',
576        ],
577        'DZS'     => [
578            '0',
579            '4',
580            '4',
581            '4',
582        ],
583        'E'       => [
584            '1',
585            '0',
586            '',
587            '',
588        ],
589        'È'       => [
590            '1',
591            '0',
592            '',
593            '',
594        ],
595        'É'       => [
596            '1',
597            '0',
598            '',
599            '',
600        ],
601        'Ê'       => [
602            '1',
603            '0',
604            '',
605            '',
606        ],
607        'Ë'       => [
608            '1',
609            '0',
610            '',
611            '',
612        ],
613        'Ĕ'       => [
614            '1',
615            '0',
616            '',
617            '',
618        ],
619        'Ė'       => [
620            '1',
621            '0',
622            '',
623            '',
624        ],
625        'Ę'       => [
626            '1',
627            '',
628            '',
629            '6',
630            '',
631            '',
632            '',
633        ],
634        'Ẹ'       => [
635            '1',
636            '0',
637            '',
638            '',
639        ],
640        'Ẻ'       => [
641            '1',
642            '0',
643            '',
644            '',
645        ],
646        'Ẽ'       => [
647            '1',
648            '0',
649            '',
650            '',
651        ],
652        'Ế'       => [
653            '1',
654            '0',
655            '',
656            '',
657        ],
658        'Ề'       => [
659            '1',
660            '0',
661            '',
662            '',
663        ],
664        'Ể'       => [
665            '1',
666            '0',
667            '',
668            '',
669        ],
670        'Ễ'       => [
671            '1',
672            '0',
673            '',
674            '',
675        ],
676        'Ệ'       => [
677            '1',
678            '0',
679            '',
680            '',
681        ],
682        'EAU'     => [
683            '1',
684            '0',
685            '',
686            '',
687        ],
688        'EI'      => [
689            '1',
690            '0',
691            '1',
692            '',
693        ],
694        'EJ'      => [
695            '1',
696            '0',
697            '1',
698            '',
699        ],
700        'EU'      => [
701            '1',
702            '1',
703            '1',
704            '',
705        ],
706        'EY'      => [
707            '1',
708            '0',
709            '1',
710            '',
711        ],
712        'F'       => [
713            '0',
714            '7',
715            '7',
716            '7',
717        ],
718        'FB'      => [
719            '0',
720            '7',
721            '7',
722            '7',
723        ],
724        'G'       => [
725            '0',
726            '5',
727            '5',
728            '5',
729            '34',
730            '4',
731            '4',
732        ],
733        'Ğ'       => [
734            '0',
735            '',
736            '',
737            '',
738        ],
739        'GGY'     => [
740            '0',
741            '5',
742            '5',
743            '5',
744        ],
745        'GY'      => [
746            '0',
747            '5',
748            '5',
749            '5',
750        ],
751        'H'       => [
752            '0',
753            '5',
754            '5',
755            '',
756            '5',
757            '5',
758            '5',
759        ],
760        'I'       => [
761            '1',
762            '0',
763            '',
764            '',
765        ],
766        'Ì'       => [
767            '1',
768            '0',
769            '',
770            '',
771        ],
772        'Í'       => [
773            '1',
774            '0',
775            '',
776            '',
777        ],
778        'Î'       => [
779            '1',
780            '0',
781            '',
782            '',
783        ],
784        'Ï'       => [
785            '1',
786            '0',
787            '',
788            '',
789        ],
790        'Ĩ'       => [
791            '1',
792            '0',
793            '',
794            '',
795        ],
796        'Į'       => [
797            '1',
798            '0',
799            '',
800            '',
801        ],
802        'İ'       => [
803            '1',
804            '0',
805            '',
806            '',
807        ],
808        'Ỉ'       => [
809            '1',
810            '0',
811            '',
812            '',
813        ],
814        'Ị'       => [
815            '1',
816            '0',
817            '',
818            '',
819        ],
820        'IA'      => [
821            '1',
822            '1',
823            '',
824            '',
825        ],
826        'IE'      => [
827            '1',
828            '1',
829            '',
830            '',
831        ],
832        'IO'      => [
833            '1',
834            '1',
835            '',
836            '',
837        ],
838        'IU'      => [
839            '1',
840            '1',
841            '',
842            '',
843        ],
844        'J'       => [
845            '0',
846            '1',
847            '',
848            '',
849            '4',
850            '4',
851            '4',
852            '5',
853            '5',
854            '',
855        ],
856        'K'       => [
857            '0',
858            '5',
859            '5',
860            '5',
861        ],
862        'KH'      => [
863            '0',
864            '5',
865            '5',
866            '5',
867        ],
868        'KS'      => [
869            '0',
870            '5',
871            '54',
872            '54',
873        ],
874        'L'       => [
875            '0',
876            '8',
877            '8',
878            '8',
879        ],
880        'Ľ'       => [
881            '0',
882            '8',
883            '8',
884            '8',
885        ],
886        'Ĺ'       => [
887            '0',
888            '8',
889            '8',
890            '8',
891        ],
892        'Ł'       => [
893            '0',
894            '7',
895            '7',
896            '7',
897            '8',
898            '8',
899            '8',
900        ],
901        'LL'      => [
902            '0',
903            '8',
904            '8',
905            '8',
906            '58',
907            '8',
908            '8',
909            '1',
910            '8',
911            '8',
912        ],
913        'LLY'     => [
914            '0',
915            '8',
916            '8',
917            '8',
918            '1',
919            '8',
920            '8',
921        ],
922        'LY'      => [
923            '0',
924            '8',
925            '8',
926            '8',
927            '1',
928            '8',
929            '8',
930        ],
931        'M'       => [
932            '0',
933            '6',
934            '6',
935            '6',
936        ],
937        'MĔ'      => [
938            '0',
939            '66',
940            '66',
941            '66',
942        ],
943        'MN'      => [
944            '0',
945            '66',
946            '66',
947            '66',
948        ],
949        'N'       => [
950            '0',
951            '6',
952            '6',
953            '6',
954        ],
955        'Ń'       => [
956            '0',
957            '6',
958            '6',
959            '6',
960        ],
961        'Ň'       => [
962            '0',
963            '6',
964            '6',
965            '6',
966        ],
967        'Ñ'       => [
968            '0',
969            '6',
970            '6',
971            '6',
972        ],
973        'NM'      => [
974            '0',
975            '66',
976            '66',
977            '66',
978        ],
979        'O'       => [
980            '1',
981            '0',
982            '',
983            '',
984        ],
985        'Ò'       => [
986            '1',
987            '0',
988            '',
989            '',
990        ],
991        'Ó'       => [
992            '1',
993            '0',
994            '',
995            '',
996        ],
997        'Ô'       => [
998            '1',
999            '0',
1000            '',
1001            '',
1002        ],
1003        'Õ'       => [
1004            '1',
1005            '0',
1006            '',
1007            '',
1008        ],
1009        'Ö'       => [
1010            '1',
1011            '0',
1012            '',
1013            '',
1014        ],
1015        'Ø'       => [
1016            '1',
1017            '0',
1018            '',
1019            '',
1020        ],
1021        'Ő'       => [
1022            '1',
1023            '0',
1024            '',
1025            '',
1026        ],
1027        'Œ'       => [
1028            '1',
1029            '0',
1030            '',
1031            '',
1032        ],
1033        'Ơ'       => [
1034            '1',
1035            '0',
1036            '',
1037            '',
1038        ],
1039        'Ọ'       => [
1040            '1',
1041            '0',
1042            '',
1043            '',
1044        ],
1045        'Ỏ'       => [
1046            '1',
1047            '0',
1048            '',
1049            '',
1050        ],
1051        'Ố'       => [
1052            '1',
1053            '0',
1054            '',
1055            '',
1056        ],
1057        'Ồ'       => [
1058            '1',
1059            '0',
1060            '',
1061            '',
1062        ],
1063        'Ổ'       => [
1064            '1',
1065            '0',
1066            '',
1067            '',
1068        ],
1069        'Ỗ'       => [
1070            '1',
1071            '0',
1072            '',
1073            '',
1074        ],
1075        'Ộ'       => [
1076            '1',
1077            '0',
1078            '',
1079            '',
1080        ],
1081        'Ớ'       => [
1082            '1',
1083            '0',
1084            '',
1085            '',
1086        ],
1087        'Ờ'       => [
1088            '1',
1089            '0',
1090            '',
1091            '',
1092        ],
1093        'Ở'       => [
1094            '1',
1095            '0',
1096            '',
1097            '',
1098        ],
1099        'Ỡ'       => [
1100            '1',
1101            '0',
1102            '',
1103            '',
1104        ],
1105        'Ợ'       => [
1106            '1',
1107            '0',
1108            '',
1109            '',
1110        ],
1111        'OE'      => [
1112            '1',
1113            '0',
1114            '',
1115            '',
1116        ],
1117        'OI'      => [
1118            '1',
1119            '0',
1120            '1',
1121            '',
1122        ],
1123        'OJ'      => [
1124            '1',
1125            '0',
1126            '1',
1127            '',
1128        ],
1129        'OU'      => [
1130            '1',
1131            '0',
1132            '',
1133            '',
1134        ],
1135        'OY'      => [
1136            '1',
1137            '0',
1138            '1',
1139            '',
1140        ],
1141        'P'       => [
1142            '0',
1143            '7',
1144            '7',
1145            '7',
1146        ],
1147        'PF'      => [
1148            '0',
1149            '7',
1150            '7',
1151            '7',
1152        ],
1153        'PH'      => [
1154            '0',
1155            '7',
1156            '7',
1157            '7',
1158        ],
1159        'Q'       => [
1160            '0',
1161            '5',
1162            '5',
1163            '5',
1164        ],
1165        'R'       => [
1166            '0',
1167            '9',
1168            '9',
1169            '9',
1170        ],
1171        'Ř'       => [
1172            '0',
1173            '4',
1174            '4',
1175            '4',
1176        ],
1177        'RS'      => [
1178            '0',
1179            '4',
1180            '4',
1181            '4',
1182            '94',
1183            '94',
1184            '94',
1185        ],
1186        'RZ'      => [
1187            '0',
1188            '4',
1189            '4',
1190            '4',
1191            '94',
1192            '94',
1193            '94',
1194        ],
1195        'S'       => [
1196            '0',
1197            '4',
1198            '4',
1199            '4',
1200        ],
1201        'Ś'       => [
1202            '0',
1203            '4',
1204            '4',
1205            '4',
1206        ],
1207        'Š'       => [
1208            '0',
1209            '4',
1210            '4',
1211            '4',
1212        ],
1213        'Ş'       => [
1214            '0',
1215            '4',
1216            '4',
1217            '4',
1218        ],
1219        'SC'      => [
1220            '0',
1221            '2',
1222            '4',
1223            '4',
1224        ],
1225        'ŠČ'      => [
1226            '0',
1227            '2',
1228            '4',
1229            '4',
1230        ],
1231        'SCH'     => [
1232            '0',
1233            '4',
1234            '4',
1235            '4',
1236        ],
1237        'SCHD'    => [
1238            '0',
1239            '2',
1240            '43',
1241            '43',
1242        ],
1243        'SCHT'    => [
1244            '0',
1245            '2',
1246            '43',
1247            '43',
1248        ],
1249        'SCHTCH'  => [
1250            '0',
1251            '2',
1252            '4',
1253            '4',
1254        ],
1255        'SCHTSCH' => [
1256            '0',
1257            '2',
1258            '4',
1259            '4',
1260        ],
1261        'SCHTSH'  => [
1262            '0',
1263            '2',
1264            '4',
1265            '4',
1266        ],
1267        'SD'      => [
1268            '0',
1269            '2',
1270            '43',
1271            '43',
1272        ],
1273        'SH'      => [
1274            '0',
1275            '4',
1276            '4',
1277            '4',
1278        ],
1279        'SHCH'    => [
1280            '0',
1281            '2',
1282            '4',
1283            '4',
1284        ],
1285        'SHD'     => [
1286            '0',
1287            '2',
1288            '43',
1289            '43',
1290        ],
1291        'SHT'     => [
1292            '0',
1293            '2',
1294            '43',
1295            '43',
1296        ],
1297        'SHTCH'   => [
1298            '0',
1299            '2',
1300            '4',
1301            '4',
1302        ],
1303        'SHTSH'   => [
1304            '0',
1305            '2',
1306            '4',
1307            '4',
1308        ],
1309        'ß'       => [
1310            '0',
1311            '',
1312            '4',
1313            '4',
1314        ],
1315        'ST'      => [
1316            '0',
1317            '2',
1318            '43',
1319            '43',
1320        ],
1321        'STCH'    => [
1322            '0',
1323            '2',
1324            '4',
1325            '4',
1326        ],
1327        'STRS'    => [
1328            '0',
1329            '2',
1330            '4',
1331            '4',
1332        ],
1333        'STRZ'    => [
1334            '0',
1335            '2',
1336            '4',
1337            '4',
1338        ],
1339        'STSCH'   => [
1340            '0',
1341            '2',
1342            '4',
1343            '4',
1344        ],
1345        'STSH'    => [
1346            '0',
1347            '2',
1348            '4',
1349            '4',
1350        ],
1351        'SSZ'     => [
1352            '0',
1353            '4',
1354            '4',
1355            '4',
1356        ],
1357        'SZ'      => [
1358            '0',
1359            '4',
1360            '4',
1361            '4',
1362        ],
1363        'SZCS'    => [
1364            '0',
1365            '2',
1366            '4',
1367            '4',
1368        ],
1369        'SZCZ'    => [
1370            '0',
1371            '2',
1372            '4',
1373            '4',
1374        ],
1375        'SZD'     => [
1376            '0',
1377            '2',
1378            '43',
1379            '43',
1380        ],
1381        'SZT'     => [
1382            '0',
1383            '2',
1384            '43',
1385            '43',
1386        ],
1387        'T'       => [
1388            '0',
1389            '3',
1390            '3',
1391            '3',
1392        ],
1393        'Ť'       => [
1394            '0',
1395            '3',
1396            '3',
1397            '3',
1398        ],
1399        'Ţ'       => [
1400            '0',
1401            '3',
1402            '3',
1403            '3',
1404            '4',
1405            '4',
1406            '4',
1407        ],
1408        'TC'      => [
1409            '0',
1410            '4',
1411            '4',
1412            '4',
1413        ],
1414        'TCH'     => [
1415            '0',
1416            '4',
1417            '4',
1418            '4',
1419        ],
1420        'TH'      => [
1421            '0',
1422            '3',
1423            '3',
1424            '3',
1425        ],
1426        'TRS'     => [
1427            '0',
1428            '4',
1429            '4',
1430            '4',
1431        ],
1432        'TRZ'     => [
1433            '0',
1434            '4',
1435            '4',
1436            '4',
1437        ],
1438        'TS'      => [
1439            '0',
1440            '4',
1441            '4',
1442            '4',
1443        ],
1444        'TSCH'    => [
1445            '0',
1446            '4',
1447            '4',
1448            '4',
1449        ],
1450        'TSH'     => [
1451            '0',
1452            '4',
1453            '4',
1454            '4',
1455        ],
1456        'TSZ'     => [
1457            '0',
1458            '4',
1459            '4',
1460            '4',
1461        ],
1462        'TTCH'    => [
1463            '0',
1464            '4',
1465            '4',
1466            '4',
1467        ],
1468        'TTS'     => [
1469            '0',
1470            '4',
1471            '4',
1472            '4',
1473        ],
1474        'TTSCH'   => [
1475            '0',
1476            '4',
1477            '4',
1478            '4',
1479        ],
1480        'TTSZ'    => [
1481            '0',
1482            '4',
1483            '4',
1484            '4',
1485        ],
1486        'TTZ'     => [
1487            '0',
1488            '4',
1489            '4',
1490            '4',
1491        ],
1492        'TZ'      => [
1493            '0',
1494            '4',
1495            '4',
1496            '4',
1497        ],
1498        'TZS'     => [
1499            '0',
1500            '4',
1501            '4',
1502            '4',
1503        ],
1504        'U'       => [
1505            '1',
1506            '0',
1507            '',
1508            '',
1509        ],
1510        'Ù'       => [
1511            '1',
1512            '0',
1513            '',
1514            '',
1515        ],
1516        'Ú'       => [
1517            '1',
1518            '0',
1519            '',
1520            '',
1521        ],
1522        'Û'       => [
1523            '1',
1524            '0',
1525            '',
1526            '',
1527        ],
1528        'Ü'       => [
1529            '1',
1530            '0',
1531            '',
1532            '',
1533        ],
1534        'Ũ'       => [
1535            '1',
1536            '0',
1537            '',
1538            '',
1539        ],
1540        'Ū'       => [
1541            '1',
1542            '0',
1543            '',
1544            '',
1545        ],
1546        'Ů'       => [
1547            '1',
1548            '0',
1549            '',
1550            '',
1551        ],
1552        'Ű'       => [
1553            '1',
1554            '0',
1555            '',
1556            '',
1557        ],
1558        'Ų'       => [
1559            '1',
1560            '0',
1561            '',
1562            '',
1563        ],
1564        'Ư'       => [
1565            '1',
1566            '0',
1567            '',
1568            '',
1569        ],
1570        'Ụ'       => [
1571            '1',
1572            '0',
1573            '',
1574            '',
1575        ],
1576        'Ủ'       => [
1577            '1',
1578            '0',
1579            '',
1580            '',
1581        ],
1582        'Ứ'       => [
1583            '1',
1584            '0',
1585            '',
1586            '',
1587        ],
1588        'Ừ'       => [
1589            '1',
1590            '0',
1591            '',
1592            '',
1593        ],
1594        'Ử'       => [
1595            '1',
1596            '0',
1597            '',
1598            '',
1599        ],
1600        'Ữ'       => [
1601            '1',
1602            '0',
1603            '',
1604            '',
1605        ],
1606        'Ự'       => [
1607            '1',
1608            '0',
1609            '',
1610            '',
1611        ],
1612        'UE'      => [
1613            '1',
1614            '0',
1615            '',
1616            '',
1617        ],
1618        'UI'      => [
1619            '1',
1620            '0',
1621            '1',
1622            '',
1623        ],
1624        'UJ'      => [
1625            '1',
1626            '0',
1627            '1',
1628            '',
1629        ],
1630        'UY'      => [
1631            '1',
1632            '0',
1633            '1',
1634            '',
1635        ],
1636        'UW'      => [
1637            '1',
1638            '0',
1639            '1',
1640            '',
1641            '0',
1642            '7',
1643            '7',
1644        ],
1645        'V'       => [
1646            '0',
1647            '7',
1648            '7',
1649            '7',
1650        ],
1651        'W'       => [
1652            '0',
1653            '7',
1654            '7',
1655            '7',
1656        ],
1657        'X'       => [
1658            '0',
1659            '5',
1660            '54',
1661            '54',
1662        ],
1663        'Y'       => [
1664            '1',
1665            '1',
1666            '',
1667            '',
1668        ],
1669        'Ý'       => [
1670            '1',
1671            '1',
1672            '',
1673            '',
1674        ],
1675        'Ỳ'       => [
1676            '1',
1677            '1',
1678            '',
1679            '',
1680        ],
1681        'Ỵ'       => [
1682            '1',
1683            '1',
1684            '',
1685            '',
1686        ],
1687        'Ỷ'       => [
1688            '1',
1689            '1',
1690            '',
1691            '',
1692        ],
1693        'Ỹ'       => [
1694            '1',
1695            '1',
1696            '',
1697            '',
1698        ],
1699        'Z'       => [
1700            '0',
1701            '4',
1702            '4',
1703            '4',
1704        ],
1705        'Ź'       => [
1706            '0',
1707            '4',
1708            '4',
1709            '4',
1710        ],
1711        'Ż'       => [
1712            '0',
1713            '4',
1714            '4',
1715            '4',
1716        ],
1717        'Ž'       => [
1718            '0',
1719            '4',
1720            '4',
1721            '4',
1722        ],
1723        'ZD'      => [
1724            '0',
1725            '2',
1726            '43',
1727            '43',
1728        ],
1729        'ZDZ'     => [
1730            '0',
1731            '2',
1732            '4',
1733            '4',
1734        ],
1735        'ZDZH'    => [
1736            '0',
1737            '2',
1738            '4',
1739            '4',
1740        ],
1741        'ZH'      => [
1742            '0',
1743            '4',
1744            '4',
1745            '4',
1746        ],
1747        'ZHD'     => [
1748            '0',
1749            '2',
1750            '43',
1751            '43',
1752        ],
1753        'ZHDZH'   => [
1754            '0',
1755            '2',
1756            '4',
1757            '4',
1758        ],
1759        'ZS'      => [
1760            '0',
1761            '4',
1762            '4',
1763            '4',
1764        ],
1765        'ZSCH'    => [
1766            '0',
1767            '4',
1768            '4',
1769            '4',
1770        ],
1771        'ZSH'     => [
1772            '0',
1773            '4',
1774            '4',
1775            '4',
1776        ],
1777        'ZZS'     => [
1778            '0',
1779            '4',
1780            '4',
1781            '4',
1782        ],
1783        // Cyrillic alphabet
1784        'А'       => [
1785            '1',
1786            '0',
1787            '',
1788            '',
1789        ],
1790        'Б'       => [
1791            '0',
1792            '7',
1793            '7',
1794            '7',
1795        ],
1796        'В'       => [
1797            '0',
1798            '7',
1799            '7',
1800            '7',
1801        ],
1802        'Г'       => [
1803            '0',
1804            '5',
1805            '5',
1806            '5',
1807        ],
1808        'Д'       => [
1809            '0',
1810            '3',
1811            '3',
1812            '3',
1813        ],
1814        'ДЗ'      => [
1815            '0',
1816            '4',
1817            '4',
1818            '4',
1819        ],
1820        'Е'       => [
1821            '1',
1822            '0',
1823            '',
1824            '',
1825        ],
1826        'Ё'       => [
1827            '1',
1828            '0',
1829            '',
1830            '',
1831        ],
1832        'Ж'       => [
1833            '0',
1834            '4',
1835            '4',
1836            '4',
1837        ],
1838        'З'       => [
1839            '0',
1840            '4',
1841            '4',
1842            '4',
1843        ],
1844        'И'       => [
1845            '1',
1846            '0',
1847            '',
1848            '',
1849        ],
1850        'Й'       => [
1851            '1',
1852            '1',
1853            '',
1854            '',
1855            '4',
1856            '4',
1857            '4',
1858        ],
1859        'К'       => [
1860            '0',
1861            '5',
1862            '5',
1863            '5',
1864        ],
1865        'Л'       => [
1866            '0',
1867            '8',
1868            '8',
1869            '8',
1870        ],
1871        'М'       => [
1872            '0',
1873            '6',
1874            '6',
1875            '6',
1876        ],
1877        'Н'       => [
1878            '0',
1879            '6',
1880            '6',
1881            '6',
1882        ],
1883        'О'       => [
1884            '1',
1885            '0',
1886            '',
1887            '',
1888        ],
1889        'П'       => [
1890            '0',
1891            '7',
1892            '7',
1893            '7',
1894        ],
1895        'Р'       => [
1896            '0',
1897            '9',
1898            '9',
1899            '9',
1900        ],
1901        'РЖ'      => [
1902            '0',
1903            '4',
1904            '4',
1905            '4',
1906        ],
1907        'С'       => [
1908            '0',
1909            '4',
1910            '4',
1911            '4',
1912        ],
1913        'Т'       => [
1914            '0',
1915            '3',
1916            '3',
1917            '3',
1918        ],
1919        'У'       => [
1920            '1',
1921            '0',
1922            '',
1923            '',
1924        ],
1925        'Ф'       => [
1926            '0',
1927            '7',
1928            '7',
1929            '7',
1930        ],
1931        'Х'       => [
1932            '0',
1933            '5',
1934            '5',
1935            '5',
1936        ],
1937        'Ц'       => [
1938            '0',
1939            '4',
1940            '4',
1941            '4',
1942        ],
1943        'Ч'       => [
1944            '0',
1945            '4',
1946            '4',
1947            '4',
1948        ],
1949        'Ш'       => [
1950            '0',
1951            '4',
1952            '4',
1953            '4',
1954        ],
1955        'Щ'       => [
1956            '0',
1957            '2',
1958            '4',
1959            '4',
1960        ],
1961        'Ъ'       => [
1962            '0',
1963            '',
1964            '',
1965            '',
1966        ],
1967        'Ы'       => [
1968            '0',
1969            '1',
1970            '',
1971            '',
1972        ],
1973        'Ь'       => [
1974            '0',
1975            '',
1976            '',
1977            '',
1978        ],
1979        'Э'       => [
1980            '1',
1981            '0',
1982            '',
1983            '',
1984        ],
1985        'Ю'       => [
1986            '0',
1987            '1',
1988            '',
1989            '',
1990        ],
1991        'Я'       => [
1992            '0',
1993            '1',
1994            '',
1995            '',
1996        ],
1997        // Greek alphabet
1998        'Α'       => [
1999            '1',
2000            '0',
2001            '',
2002            '',
2003        ],
2004        'Ά'       => [
2005            '1',
2006            '0',
2007            '',
2008            '',
2009        ],
2010        'ΑΙ'      => [
2011            '1',
2012            '0',
2013            '1',
2014            '',
2015        ],
2016        'ΑΥ'      => [
2017            '1',
2018            '0',
2019            '1',
2020            '',
2021        ],
2022        'Β'       => [
2023            '0',
2024            '7',
2025            '7',
2026            '7',
2027        ],
2028        'Γ'       => [
2029            '0',
2030            '5',
2031            '5',
2032            '5',
2033        ],
2034        'Δ'       => [
2035            '0',
2036            '3',
2037            '3',
2038            '3',
2039        ],
2040        'Ε'       => [
2041            '1',
2042            '0',
2043            '',
2044            '',
2045        ],
2046        'Έ'       => [
2047            '1',
2048            '0',
2049            '',
2050            '',
2051        ],
2052        'ΕΙ'      => [
2053            '1',
2054            '0',
2055            '1',
2056            '',
2057        ],
2058        'ΕΥ'      => [
2059            '1',
2060            '1',
2061            '1',
2062            '',
2063        ],
2064        'Ζ'       => [
2065            '0',
2066            '4',
2067            '4',
2068            '4',
2069        ],
2070        'Η'       => [
2071            '1',
2072            '0',
2073            '',
2074            '',
2075        ],
2076        'Ή'       => [
2077            '1',
2078            '0',
2079            '',
2080            '',
2081        ],
2082        'Θ'       => [
2083            '0',
2084            '3',
2085            '3',
2086            '3',
2087        ],
2088        'Ι'       => [
2089            '1',
2090            '0',
2091            '',
2092            '',
2093        ],
2094        'Ί'       => [
2095            '1',
2096            '0',
2097            '',
2098            '',
2099        ],
2100        'Ϊ'       => [
2101            '1',
2102            '0',
2103            '',
2104            '',
2105        ],
2106        'ΐ'       => [
2107            '1',
2108            '0',
2109            '',
2110            '',
2111        ],
2112        'Κ'       => [
2113            '0',
2114            '5',
2115            '5',
2116            '5',
2117        ],
2118        'Λ'       => [
2119            '0',
2120            '8',
2121            '8',
2122            '8',
2123        ],
2124        'Μ'       => [
2125            '0',
2126            '6',
2127            '6',
2128            '6',
2129        ],
2130        'ΜΠ'      => [
2131            '0',
2132            '7',
2133            '7',
2134            '7',
2135        ],
2136        'Ν'       => [
2137            '0',
2138            '6',
2139            '6',
2140            '6',
2141        ],
2142        'ΝΤ'      => [
2143            '0',
2144            '3',
2145            '3',
2146            '3',
2147        ],
2148        'Ξ'       => [
2149            '0',
2150            '5',
2151            '54',
2152            '54',
2153        ],
2154        'Ο'       => [
2155            '1',
2156            '0',
2157            '',
2158            '',
2159        ],
2160        'Ό'       => [
2161            '1',
2162            '0',
2163            '',
2164            '',
2165        ],
2166        'ΟΙ'      => [
2167            '1',
2168            '0',
2169            '1',
2170            '',
2171        ],
2172        'ΟΥ'      => [
2173            '1',
2174            '0',
2175            '1',
2176            '',
2177        ],
2178        'Π'       => [
2179            '0',
2180            '7',
2181            '7',
2182            '7',
2183        ],
2184        'Ρ'       => [
2185            '0',
2186            '9',
2187            '9',
2188            '9',
2189        ],
2190        'Σ'       => [
2191            '0',
2192            '4',
2193            '4',
2194            '4',
2195        ],
2196        'ς'       => [
2197            '0',
2198            '',
2199            '',
2200            '4',
2201        ],
2202        'Τ'       => [
2203            '0',
2204            '3',
2205            '3',
2206            '3',
2207        ],
2208        'ΤΖ'      => [
2209            '0',
2210            '4',
2211            '4',
2212            '4',
2213        ],
2214        'ΤΣ'      => [
2215            '0',
2216            '4',
2217            '4',
2218            '4',
2219        ],
2220        'Υ'       => [
2221            '1',
2222            '1',
2223            '',
2224            '',
2225        ],
2226        'Ύ'       => [
2227            '1',
2228            '1',
2229            '',
2230            '',
2231        ],
2232        'Ϋ'       => [
2233            '1',
2234            '1',
2235            '',
2236            '',
2237        ],
2238        'ΰ'       => [
2239            '1',
2240            '1',
2241            '',
2242            '',
2243        ],
2244        'ΥΚ'      => [
2245            '1',
2246            '5',
2247            '5',
2248            '5',
2249        ],
2250        'ΥΥ'      => [
2251            '1',
2252            '65',
2253            '65',
2254            '65',
2255        ],
2256        'Φ'       => [
2257            '0',
2258            '7',
2259            '7',
2260            '7',
2261        ],
2262        'Χ'       => [
2263            '0',
2264            '5',
2265            '5',
2266            '5',
2267        ],
2268        'Ψ'       => [
2269            '0',
2270            '7',
2271            '7',
2272            '7',
2273        ],
2274        'Ω'       => [
2275            '1',
2276            '0',
2277            '',
2278            '',
2279        ],
2280        'Ώ'       => [
2281            '1',
2282            '0',
2283            '',
2284            '',
2285        ],
2286        // Hebrew alphabet
2287        'א'       => [
2288            '1',
2289            '0',
2290            '',
2291            '',
2292        ],
2293        'או'      => [
2294            '1',
2295            '0',
2296            '7',
2297            '',
2298        ],
2299        'אג'      => [
2300            '1',
2301            '4',
2302            '4',
2303            '4',
2304            '5',
2305            '5',
2306            '5',
2307            '34',
2308            '34',
2309            '34',
2310        ],
2311        'בב'      => [
2312            '0',
2313            '7',
2314            '7',
2315            '7',
2316            '77',
2317            '77',
2318            '77',
2319        ],
2320        'ב'       => [
2321            '0',
2322            '7',
2323            '7',
2324            '7',
2325        ],
2326        'גג'      => [
2327            '0',
2328            '4',
2329            '4',
2330            '4',
2331            '5',
2332            '5',
2333            '5',
2334            '45',
2335            '45',
2336            '45',
2337            '55',
2338            '55',
2339            '55',
2340            '54',
2341            '54',
2342            '54',
2343        ],
2344        'גד'      => [
2345            '0',
2346            '43',
2347            '43',
2348            '43',
2349            '53',
2350            '53',
2351            '53',
2352        ],
2353        'גה'      => [
2354            '0',
2355            '45',
2356            '45',
2357            '45',
2358            '55',
2359            '55',
2360            '55',
2361        ],
2362        'גז'      => [
2363            '0',
2364            '44',
2365            '44',
2366            '44',
2367            '45',
2368            '45',
2369            '45',
2370        ],
2371        'גח'      => [
2372            '0',
2373            '45',
2374            '45',
2375            '45',
2376            '55',
2377            '55',
2378            '55',
2379        ],
2380        'גכ'      => [
2381            '0',
2382            '45',
2383            '45',
2384            '45',
2385            '55',
2386            '55',
2387            '55',
2388        ],
2389        'גך'      => [
2390            '0',
2391            '45',
2392            '45',
2393            '45',
2394            '55',
2395            '55',
2396            '55',
2397        ],
2398        'גצ'      => [
2399            '0',
2400            '44',
2401            '44',
2402            '44',
2403            '45',
2404            '45',
2405            '45',
2406        ],
2407        'גץ'      => [
2408            '0',
2409            '44',
2410            '44',
2411            '44',
2412            '45',
2413            '45',
2414            '45',
2415        ],
2416        'גק'      => [
2417            '0',
2418            '45',
2419            '45',
2420            '45',
2421            '54',
2422            '54',
2423            '54',
2424        ],
2425        'גש'      => [
2426            '0',
2427            '44',
2428            '44',
2429            '44',
2430            '54',
2431            '54',
2432            '54',
2433        ],
2434        'גת'      => [
2435            '0',
2436            '43',
2437            '43',
2438            '43',
2439            '53',
2440            '53',
2441            '53',
2442        ],
2443        'ג'       => [
2444            '0',
2445            '4',
2446            '4',
2447            '4',
2448            '5',
2449            '5',
2450            '5',
2451        ],
2452        'דז'      => [
2453            '0',
2454            '4',
2455            '4',
2456            '4',
2457        ],
2458        'דד'      => [
2459            '0',
2460            '3',
2461            '3',
2462            '3',
2463            '33',
2464            '33',
2465            '33',
2466        ],
2467        'דט'      => [
2468            '0',
2469            '33',
2470            '33',
2471            '33',
2472        ],
2473        'דש'      => [
2474            '0',
2475            '4',
2476            '4',
2477            '4',
2478        ],
2479        'דצ'      => [
2480            '0',
2481            '4',
2482            '4',
2483            '4',
2484        ],
2485        'דץ'      => [
2486            '0',
2487            '4',
2488            '4',
2489            '4',
2490        ],
2491        'ד'       => [
2492            '0',
2493            '3',
2494            '3',
2495            '3',
2496        ],
2497        'הג'      => [
2498            '0',
2499            '54',
2500            '54',
2501            '54',
2502            '55',
2503            '55',
2504            '55',
2505        ],
2506        'הכ'      => [
2507            '0',
2508            '55',
2509            '55',
2510            '55',
2511        ],
2512        'הח'      => [
2513            '0',
2514            '55',
2515            '55',
2516            '55',
2517        ],
2518        'הק'      => [
2519            '0',
2520            '55',
2521            '55',
2522            '55',
2523            '5',
2524            '5',
2525            '5',
2526        ],
2527        'הה'      => [
2528            '0',
2529            '5',
2530            '5',
2531            '',
2532            '55',
2533            '55',
2534            '',
2535        ],
2536        'ה'       => [
2537            '0',
2538            '5',
2539            '5',
2540            '',
2541        ],
2542        'וי'      => [
2543            '1',
2544            '',
2545            '',
2546            '',
2547            '7',
2548            '7',
2549            '7',
2550        ],
2551        'ו'       => [
2552            '1',
2553            '7',
2554            '7',
2555            '7',
2556            '7',
2557            '',
2558            '',
2559        ],
2560        'וו'      => [
2561            '1',
2562            '7',
2563            '7',
2564            '7',
2565            '7',
2566            '',
2567            '',
2568        ],
2569        'וופ'     => [
2570            '1',
2571            '7',
2572            '7',
2573            '7',
2574            '77',
2575            '77',
2576            '77',
2577        ],
2578        'זש'      => [
2579            '0',
2580            '4',
2581            '4',
2582            '4',
2583            '44',
2584            '44',
2585            '44',
2586        ],
2587        'זדז'     => [
2588            '0',
2589            '2',
2590            '4',
2591            '4',
2592        ],
2593        'ז'       => [
2594            '0',
2595            '4',
2596            '4',
2597            '4',
2598        ],
2599        'זג'      => [
2600            '0',
2601            '44',
2602            '44',
2603            '44',
2604            '45',
2605            '45',
2606            '45',
2607        ],
2608        'זז'      => [
2609            '0',
2610            '4',
2611            '4',
2612            '4',
2613            '44',
2614            '44',
2615            '44',
2616        ],
2617        'זס'      => [
2618            '0',
2619            '44',
2620            '44',
2621            '44',
2622        ],
2623        'זצ'      => [
2624            '0',
2625            '44',
2626            '44',
2627            '44',
2628        ],
2629        'זץ'      => [
2630            '0',
2631            '44',
2632            '44',
2633            '44',
2634        ],
2635        'חג'      => [
2636            '0',
2637            '54',
2638            '54',
2639            '54',
2640            '53',
2641            '53',
2642            '53',
2643        ],
2644        'חח'      => [
2645            '0',
2646            '5',
2647            '5',
2648            '5',
2649            '55',
2650            '55',
2651            '55',
2652        ],
2653        'חק'      => [
2654            '0',
2655            '55',
2656            '55',
2657            '55',
2658            '5',
2659            '5',
2660            '5',
2661        ],
2662        'חכ'      => [
2663            '0',
2664            '45',
2665            '45',
2666            '45',
2667            '55',
2668            '55',
2669            '55',
2670        ],
2671        'חס'      => [
2672            '0',
2673            '5',
2674            '54',
2675            '54',
2676        ],
2677        'חש'      => [
2678            '0',
2679            '5',
2680            '54',
2681            '54',
2682        ],
2683        'ח'       => [
2684            '0',
2685            '5',
2686            '5',
2687            '5',
2688        ],
2689        'טש'      => [
2690            '0',
2691            '4',
2692            '4',
2693            '4',
2694        ],
2695        'טד'      => [
2696            '0',
2697            '33',
2698            '33',
2699            '33',
2700        ],
2701        'טי'      => [
2702            '0',
2703            '3',
2704            '3',
2705            '3',
2706            '4',
2707            '4',
2708            '4',
2709            '3',
2710            '3',
2711            '34',
2712        ],
2713        'טת'      => [
2714            '0',
2715            '33',
2716            '33',
2717            '33',
2718        ],
2719        'טט'      => [
2720            '0',
2721            '3',
2722            '3',
2723            '3',
2724            '33',
2725            '33',
2726            '33',
2727        ],
2728        'ט'       => [
2729            '0',
2730            '3',
2731            '3',
2732            '3',
2733        ],
2734        'י'       => [
2735            '1',
2736            '1',
2737            '',
2738            '',
2739        ],
2740        'יא'      => [
2741            '1',
2742            '1',
2743            '',
2744            '',
2745            '1',
2746            '1',
2747            '1',
2748        ],
2749        'כג'      => [
2750            '0',
2751            '55',
2752            '55',
2753            '55',
2754            '54',
2755            '54',
2756            '54',
2757        ],
2758        'כש'      => [
2759            '0',
2760            '5',
2761            '54',
2762            '54',
2763        ],
2764        'כס'      => [
2765            '0',
2766            '5',
2767            '54',
2768            '54',
2769        ],
2770        'ככ'      => [
2771            '0',
2772            '5',
2773            '5',
2774            '5',
2775            '55',
2776            '55',
2777            '55',
2778        ],
2779        'כך'      => [
2780            '0',
2781            '5',
2782            '5',
2783            '5',
2784            '55',
2785            '55',
2786            '55',
2787        ],
2788        'כ'       => [
2789            '0',
2790            '5',
2791            '5',
2792            '5',
2793        ],
2794        'כח'      => [
2795            '0',
2796            '55',
2797            '55',
2798            '55',
2799            '5',
2800            '5',
2801            '5',
2802        ],
2803        'ך'       => [
2804            '0',
2805            '',
2806            '5',
2807            '5',
2808        ],
2809        'ל'       => [
2810            '0',
2811            '8',
2812            '8',
2813            '8',
2814        ],
2815        'לל'      => [
2816            '0',
2817            '88',
2818            '88',
2819            '88',
2820            '8',
2821            '8',
2822            '8',
2823        ],
2824        'מנ'      => [
2825            '0',
2826            '66',
2827            '66',
2828            '66',
2829        ],
2830        'מן'      => [
2831            '0',
2832            '66',
2833            '66',
2834            '66',
2835        ],
2836        'ממ'      => [
2837            '0',
2838            '6',
2839            '6',
2840            '6',
2841            '66',
2842            '66',
2843            '66',
2844        ],
2845        'מם'      => [
2846            '0',
2847            '6',
2848            '6',
2849            '6',
2850            '66',
2851            '66',
2852            '66',
2853        ],
2854        'מ'       => [
2855            '0',
2856            '6',
2857            '6',
2858            '6',
2859        ],
2860        'ם'       => [
2861            '0',
2862            '',
2863            '6',
2864            '6',
2865        ],
2866        'נמ'      => [
2867            '0',
2868            '66',
2869            '66',
2870            '66',
2871        ],
2872        'נם'      => [
2873            '0',
2874            '66',
2875            '66',
2876            '66',
2877        ],
2878        'ננ'      => [
2879            '0',
2880            '6',
2881            '6',
2882            '6',
2883            '66',
2884            '66',
2885            '66',
2886        ],
2887        'נן'      => [
2888            '0',
2889            '6',
2890            '6',
2891            '6',
2892            '66',
2893            '66',
2894            '66',
2895        ],
2896        'נ'       => [
2897            '0',
2898            '6',
2899            '6',
2900            '6',
2901        ],
2902        'ן'       => [
2903            '0',
2904            '',
2905            '6',
2906            '6',
2907        ],
2908        'סתש'     => [
2909            '0',
2910            '2',
2911            '4',
2912            '4',
2913        ],
2914        'סתז'     => [
2915            '0',
2916            '2',
2917            '4',
2918            '4',
2919        ],
2920        'סטז'     => [
2921            '0',
2922            '2',
2923            '4',
2924            '4',
2925        ],
2926        'סטש'     => [
2927            '0',
2928            '2',
2929            '4',
2930            '4',
2931        ],
2932        'סצד'     => [
2933            '0',
2934            '2',
2935            '4',
2936            '4',
2937        ],
2938        'סט'      => [
2939            '0',
2940            '2',
2941            '4',
2942            '4',
2943            '43',
2944            '43',
2945            '43',
2946        ],
2947        'סת'      => [
2948            '0',
2949            '2',
2950            '4',
2951            '4',
2952            '43',
2953            '43',
2954            '43',
2955        ],
2956        'סג'      => [
2957            '0',
2958            '44',
2959            '44',
2960            '44',
2961            '4',
2962            '4',
2963            '4',
2964        ],
2965        'סס'      => [
2966            '0',
2967            '4',
2968            '4',
2969            '4',
2970            '44',
2971            '44',
2972            '44',
2973        ],
2974        'סצ'      => [
2975            '0',
2976            '44',
2977            '44',
2978            '44',
2979        ],
2980        'סץ'      => [
2981            '0',
2982            '44',
2983            '44',
2984            '44',
2985        ],
2986        'סז'      => [
2987            '0',
2988            '44',
2989            '44',
2990            '44',
2991        ],
2992        'סש'      => [
2993            '0',
2994            '44',
2995            '44',
2996            '44',
2997        ],
2998        'ס'       => [
2999            '0',
3000            '4',
3001            '4',
3002            '4',
3003        ],
3004        'ע'       => [
3005            '1',
3006            '0',
3007            '',
3008            '',
3009        ],
3010        'פב'      => [
3011            '0',
3012            '7',
3013            '7',
3014            '7',
3015            '77',
3016            '77',
3017            '77',
3018        ],
3019        'פוו'     => [
3020            '0',
3021            '7',
3022            '7',
3023            '7',
3024            '77',
3025            '77',
3026            '77',
3027        ],
3028        'פפ'      => [
3029            '0',
3030            '7',
3031            '7',
3032            '7',
3033            '77',
3034            '77',
3035            '77',
3036        ],
3037        'פף'      => [
3038            '0',
3039            '7',
3040            '7',
3041            '7',
3042            '77',
3043            '77',
3044            '77',
3045        ],
3046        'פ'       => [
3047            '0',
3048            '7',
3049            '7',
3050            '7',
3051        ],
3052        'ף'       => [
3053            '0',
3054            '',
3055            '7',
3056            '7',
3057        ],
3058        'צג'      => [
3059            '0',
3060            '44',
3061            '44',
3062            '44',
3063            '45',
3064            '45',
3065            '45',
3066        ],
3067        'צז'      => [
3068            '0',
3069            '44',
3070            '44',
3071            '44',
3072        ],
3073        'צס'      => [
3074            '0',
3075            '44',
3076            '44',
3077            '44',
3078        ],
3079        'צצ'      => [
3080            '0',
3081            '4',
3082            '4',
3083            '4',
3084            '5',
3085            '5',
3086            '5',
3087            '44',
3088            '44',
3089            '44',
3090            '54',
3091            '54',
3092            '54',
3093            '45',
3094            '45',
3095            '45',
3096        ],
3097        'צץ'      => [
3098            '0',
3099            '4',
3100            '4',
3101            '4',
3102            '5',
3103            '5',
3104            '5',
3105            '44',
3106            '44',
3107            '44',
3108            '54',
3109            '54',
3110            '54',
3111        ],
3112        'צש'      => [
3113            '0',
3114            '44',
3115            '44',
3116            '44',
3117            '4',
3118            '4',
3119            '4',
3120            '5',
3121            '5',
3122            '5',
3123        ],
3124        'צ'       => [
3125            '0',
3126            '4',
3127            '4',
3128            '4',
3129            '5',
3130            '5',
3131            '5',
3132        ],
3133        'ץ'       => [
3134            '0',
3135            '',
3136            '4',
3137            '4',
3138        ],
3139        'קה'      => [
3140            '0',
3141            '55',
3142            '55',
3143            '5',
3144        ],
3145        'קס'      => [
3146            '0',
3147            '5',
3148            '54',
3149            '54',
3150        ],
3151        'קש'      => [
3152            '0',
3153            '5',
3154            '54',
3155            '54',
3156        ],
3157        'קק'      => [
3158            '0',
3159            '5',
3160            '5',
3161            '5',
3162            '55',
3163            '55',
3164            '55',
3165        ],
3166        'קח'      => [
3167            '0',
3168            '55',
3169            '55',
3170            '55',
3171        ],
3172        'קכ'      => [
3173            '0',
3174            '55',
3175            '55',
3176            '55',
3177        ],
3178        'קך'      => [
3179            '0',
3180            '55',
3181            '55',
3182            '55',
3183        ],
3184        'קג'      => [
3185            '0',
3186            '55',
3187            '55',
3188            '55',
3189            '54',
3190            '54',
3191            '54',
3192        ],
3193        'ק'       => [
3194            '0',
3195            '5',
3196            '5',
3197            '5',
3198        ],
3199        'רר'      => [
3200            '0',
3201            '99',
3202            '99',
3203            '99',
3204            '9',
3205            '9',
3206            '9',
3207        ],
3208        'ר'       => [
3209            '0',
3210            '9',
3211            '9',
3212            '9',
3213        ],
3214        'שטז'     => [
3215            '0',
3216            '2',
3217            '4',
3218            '4',
3219        ],
3220        'שתש'     => [
3221            '0',
3222            '2',
3223            '4',
3224            '4',
3225        ],
3226        'שתז'     => [
3227            '0',
3228            '2',
3229            '4',
3230            '4',
3231        ],
3232        'שטש'     => [
3233            '0',
3234            '2',
3235            '4',
3236            '4',
3237        ],
3238        'שד'      => [
3239            '0',
3240            '2',
3241            '43',
3242            '43',
3243        ],
3244        'שז'      => [
3245            '0',
3246            '44',
3247            '44',
3248            '44',
3249        ],
3250        'שס'      => [
3251            '0',
3252            '44',
3253            '44',
3254            '44',
3255        ],
3256        'שת'      => [
3257            '0',
3258            '2',
3259            '43',
3260            '43',
3261        ],
3262        'שג'      => [
3263            '0',
3264            '4',
3265            '4',
3266            '4',
3267            '44',
3268            '44',
3269            '44',
3270            '4',
3271            '43',
3272            '43',
3273        ],
3274        'שט'      => [
3275            '0',
3276            '2',
3277            '43',
3278            '43',
3279            '44',
3280            '44',
3281            '44',
3282        ],
3283        'שצ'      => [
3284            '0',
3285            '44',
3286            '44',
3287            '44',
3288            '45',
3289            '45',
3290            '45',
3291        ],
3292        'שץ'      => [
3293            '0',
3294            '44',
3295            '',
3296            '44',
3297            '45',
3298            '',
3299            '45',
3300        ],
3301        'שש'      => [
3302            '0',
3303            '4',
3304            '4',
3305            '4',
3306            '44',
3307            '44',
3308            '44',
3309        ],
3310        'ש'       => [
3311            '0',
3312            '4',
3313            '4',
3314            '4',
3315        ],
3316        'תג'      => [
3317            '0',
3318            '34',
3319            '34',
3320            '34',
3321        ],
3322        'תז'      => [
3323            '0',
3324            '34',
3325            '34',
3326            '34',
3327        ],
3328        'תש'      => [
3329            '0',
3330            '4',
3331            '4',
3332            '4',
3333        ],
3334        'תת'      => [
3335            '0',
3336            '3',
3337            '3',
3338            '3',
3339            '4',
3340            '4',
3341            '4',
3342            '33',
3343            '33',
3344            '33',
3345            '44',
3346            '44',
3347            '44',
3348            '34',
3349            '34',
3350            '34',
3351            '43',
3352            '43',
3353            '43',
3354        ],
3355        'ת'       => [
3356            '0',
3357            '3',
3358            '3',
3359            '3',
3360            '4',
3361            '4',
3362            '4',
3363        ],
3364        // Arabic alphabet
3365        'ا'       => [
3366            '1',
3367            '0',
3368            '',
3369            '',
3370        ],
3371        'ب'       => [
3372            '0',
3373            '7',
3374            '7',
3375            '7',
3376        ],
3377        'ت'       => [
3378            '0',
3379            '3',
3380            '3',
3381            '3',
3382        ],
3383        'ث'       => [
3384            '0',
3385            '3',
3386            '3',
3387            '3',
3388        ],
3389        'ج'       => [
3390            '0',
3391            '4',
3392            '4',
3393            '4',
3394        ],
3395        'ح'       => [
3396            '0',
3397            '5',
3398            '5',
3399            '5',
3400        ],
3401        'خ'       => [
3402            '0',
3403            '5',
3404            '5',
3405            '5',
3406        ],
3407        'د'       => [
3408            '0',
3409            '3',
3410            '3',
3411            '3',
3412        ],
3413        'ذ'       => [
3414            '0',
3415            '3',
3416            '3',
3417            '3',
3418        ],
3419        'ر'       => [
3420            '0',
3421            '9',
3422            '9',
3423            '9',
3424        ],
3425        'ز'       => [
3426            '0',
3427            '4',
3428            '4',
3429            '4',
3430        ],
3431        'س'       => [
3432            '0',
3433            '4',
3434            '4',
3435            '4',
3436        ],
3437        'ش'       => [
3438            '0',
3439            '4',
3440            '4',
3441            '4',
3442        ],
3443        'ص'       => [
3444            '0',
3445            '4',
3446            '4',
3447            '4',
3448        ],
3449        'ض'       => [
3450            '0',
3451            '3',
3452            '3',
3453            '3',
3454        ],
3455        'ط'       => [
3456            '0',
3457            '3',
3458            '3',
3459            '3',
3460        ],
3461        'ظ'       => [
3462            '0',
3463            '4',
3464            '4',
3465            '4',
3466        ],
3467        'ع'       => [
3468            '1',
3469            '0',
3470            '',
3471            '',
3472        ],
3473        'غ'       => [
3474            '0',
3475            '0',
3476            '',
3477            '',
3478        ],
3479        'ف'       => [
3480            '0',
3481            '7',
3482            '7',
3483            '7',
3484        ],
3485        'ق'       => [
3486            '0',
3487            '5',
3488            '5',
3489            '5',
3490        ],
3491        'ك'       => [
3492            '0',
3493            '5',
3494            '5',
3495            '5',
3496        ],
3497        'ل'       => [
3498            '0',
3499            '8',
3500            '8',
3501            '8',
3502        ],
3503        'لا'      => [
3504            '0',
3505            '8',
3506            '8',
3507            '8',
3508        ],
3509        'م'       => [
3510            '0',
3511            '6',
3512            '6',
3513            '6',
3514        ],
3515        'ن'       => [
3516            '0',
3517            '6',
3518            '6',
3519            '6',
3520        ],
3521        'هن'      => [
3522            '0',
3523            '66',
3524            '66',
3525            '66',
3526        ],
3527        'ه'       => [
3528            '0',
3529            '5',
3530            '5',
3531            '',
3532        ],
3533        'و'       => [
3534            '1',
3535            '',
3536            '',
3537            '',
3538            '7',
3539            '',
3540            '',
3541        ],
3542        'ي'       => [
3543            '0',
3544            '1',
3545            '',
3546            '',
3547        ],
3548        'آ'       => [
3549            '0',
3550            '1',
3551            '',
3552            '',
3553        ],
3554        'ة'       => [
3555            '0',
3556            '',
3557            '',
3558            '3',
3559        ],
3560        'ی'       => [
3561            '0',
3562            '1',
3563            '',
3564            '',
3565        ],
3566        'ى'       => [
3567            '1',
3568            '1',
3569            '',
3570            '',
3571        ],
3572    ];
3573
3574    /**
3575     * Calculate the Daitch-Mokotoff soundex for a word.
3576     *
3577     * @param string $name
3578     *
3579     * @return string[] List of possible DM codes for the word.
3580     */
3581    private static function daitchMokotoffWord($name): array
3582    {
3583        // Apply special transformation rules to the input string
3584        $name = I18N::strtoupper($name);
3585        foreach (self::$transformNameTable as $transformRule) {
3586            $name = str_replace($transformRule[0], $transformRule[1], $name);
3587        }
3588
3589        // Initialize
3590        $name_script = I18N::textScript($name);
3591        $noVowels    = ($name_script == 'Hebr' || $name_script == 'Arab');
3592
3593        $lastPos         = strlen($name) - 1;
3594        $currPos         = 0;
3595        $state           = 1; // 1: start of input string, 2: before vowel, 3: other
3596        $result          = []; // accumulate complete 6-digit D-M codes here
3597        $partialResult   = []; // accumulate incomplete D-M codes here
3598        $partialResult[] = ['!']; // initialize 1st partial result  ('!' stops "duplicate sound" check)
3599
3600        // Loop through the input string.
3601        // Stop when the string is exhausted or when no more partial results remain
3602        while (count($partialResult) !== 0 && $currPos <= $lastPos) {
3603            // Find the DM coding table entry for the chunk at the current position
3604            $thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
3605            while ($thisEntry != '') {
3606                if (isset(self::$dmsounds[$thisEntry])) {
3607                    break;
3608                }
3609                $thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk
3610            }
3611            if ($thisEntry === '') {
3612                $currPos++; // Not in table: advance pointer to next byte
3613                continue; // and try again
3614            }
3615
3616            $soundTableEntry = self::$dmsounds[$thisEntry];
3617            $workingResult   = $partialResult;
3618            $partialResult   = [];
3619            $currPos += strlen($thisEntry);
3620
3621            // Not at beginning of input string
3622            if ($state != 1) {
3623                if ($currPos <= $lastPos) {
3624                    // Determine whether the next chunk is a vowel
3625                    $nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
3626                    while ($nextEntry != '') {
3627                        if (isset(self::$dmsounds[$nextEntry])) {
3628                            break;
3629                        }
3630                        $nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk
3631                    }
3632                } else {
3633                    $nextEntry = '';
3634                }
3635                if ($nextEntry != '' && self::$dmsounds[$nextEntry][0] != '0') {
3636                    $state = 2;
3637                } else {
3638                    // Next chunk is a vowel
3639                    $state = 3;
3640                }
3641            }
3642
3643            while ($state < count($soundTableEntry)) {
3644                // empty means 'ignore this sound in this state'
3645                if ($soundTableEntry[$state] == '') {
3646                    foreach ($workingResult as $workingEntry) {
3647                        $tempEntry                        = $workingEntry;
3648                        $tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles'
3649                        $partialResult[]                  = $tempEntry;
3650                    }
3651                } else {
3652                    foreach ($workingResult as $workingEntry) {
3653                        if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) {
3654                            // Incoming sound isn't a duplicate of the previous sound
3655                            $workingEntry[] = $soundTableEntry[$state];
3656                        } else {
3657                            // Incoming sound is a duplicate of the previous sound
3658                            // For Hebrew and Arabic, we need to create a pair of D-M sound codes,
3659                            // one of the pair with only a single occurrence of the duplicate sound,
3660                            // the other with both occurrences
3661                            if ($noVowels) {
3662                                $workingEntry[] = $soundTableEntry[$state];
3663                            }
3664                        }
3665                        if (count($workingEntry) < 7) {
3666                            $partialResult[] = $workingEntry;
3667                        } else {
3668                            // This is the 6th code in the sequence
3669                            // We're looking for 7 entries because the first is '!' and doesn't count
3670                            $tempResult = str_replace('!', '', implode('', $workingEntry));
3671                            // Only return codes from recognisable sounds
3672                            if ($tempResult) {
3673                                $result[] = substr($tempResult . '000000', 0, 6);
3674                            }
3675                        }
3676                    }
3677                }
3678                $state = $state + 3; // Advance to next triplet while keeping the same basic state
3679            }
3680        }
3681
3682        // Zero-fill and copy all remaining partial results
3683        foreach ($partialResult as $workingEntry) {
3684            $tempResult = str_replace('!', '', implode('', $workingEntry));
3685            // Only return codes from recognisable sounds
3686            if ($tempResult) {
3687                $result[] = substr($tempResult . '000000', 0, 6);
3688            }
3689        }
3690
3691        return $result;
3692    }
3693}
3694