xref: /webtrees/app/Soundex.php (revision 67994fb087e1b24564a780e4ae8aeff801733e35)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2018 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees;
19
20/**
21 * Phonetic matching of strings.
22 */
23class Soundex
24{
25    /**
26     * Which algorithms are supported.
27     *
28     * @return string[]
29     */
30    public static function getAlgorithms(): array
31    {
32        return [
33            /* I18N: http://en.wikipedia.org/wiki/Soundex */
34            'std' => I18N::translate('Russell'),
35            /* I18N: http://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */
36            'dm'  => I18N::translate('Daitch-Mokotoff'),
37        ];
38    }
39
40    /**
41     * Is there a match between two soundex codes?
42     *
43     * @param string $soundex1
44     * @param string $soundex2
45     *
46     * @return bool
47     */
48    public static function compare($soundex1, $soundex2): bool
49    {
50        if ($soundex1 !== '' && $soundex2 !== '') {
51            return !empty(array_intersect(explode(':', $soundex1), explode(':', $soundex2)));
52        }
53
54        return false;
55    }
56
57    /**
58     * Generate Russell soundex codes for a given text.
59     *
60     * @param string $text
61     *
62     * @return string
63     */
64    public static function russell(string $text): string
65    {
66        $words         = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY);
67        $soundex_array = [];
68        foreach ($words as $word) {
69            $soundex = soundex($word);
70            // Only return codes from recognisable sounds
71            if ($soundex !== '0000') {
72                $soundex_array[] = $soundex;
73            }
74        }
75        // Combine words, e.g. “New York” as “Newyork”
76        if (count($words) > 1) {
77            $soundex_array[] = soundex(strtr($text, ' ', ''));
78        }
79        // A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters)
80        $soundex_array = array_slice(array_unique($soundex_array), 0, 51);
81
82        if ($soundex_array) {
83            return implode(':', $soundex_array);
84        }
85
86        return '';
87    }
88
89    /**
90     * Generate Daitch–Mokotoff soundex codes for a given text.
91     *
92     * @param string $text
93     *
94     * @return string
95     */
96    public static function daitchMokotoff(string $text): string
97    {
98        $words         = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY);
99        $soundex_array = [];
100        foreach ($words as $word) {
101            $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word));
102        }
103        // Combine words, e.g. “New York” as “Newyork”
104        if (count($words) > 1) {
105            $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(strtr($text, ' ', '')));
106        }
107        // A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters)
108        $soundex_array = array_slice(array_unique($soundex_array), 0, 36);
109
110        if ($soundex_array) {
111            return implode(':', $soundex_array);
112        }
113
114        return '';
115    }
116
117    // Determine the Daitch–Mokotoff Soundex code for a word
118    // Original implementation by Gerry Kroll, and analysis by Meliza Amity
119
120    // Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!)
121    const MAXCHAR = 7;
122
123    /**
124     * Name transformation arrays.
125     * Used to transform the Name string to simplify the "sounds like" table.
126     * This is especially useful in Hebrew.
127     *
128     * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text)
129     * function call to achieve the desired transformations.
130     *
131     * Note about the use of "\x01":
132     * This code, which can’t legitimately occur in the kind of text we're dealing with,
133     * is used as a place-holder so that conditional string replacements can be done.
134     *
135     * @var string[][]
136     */
137    private static $transformNameTable = [
138        // Force Yiddish ligatures to be treated as separate letters
139        [
140            'װ',
141            'וו',
142        ],
143        [
144            'ײ',
145            'יי',
146        ],
147        [
148            'ױ',
149            'וי',
150        ],
151        [
152            'בו',
153            'בע',
154        ],
155        [
156            'פו',
157            'פע',
158        ],
159        [
160            'ומ',
161            'עמ',
162        ],
163        [
164            'ום',
165            'עם',
166        ],
167        [
168            'ונ',
169            'ענ',
170        ],
171        [
172            'ון',
173            'ען',
174        ],
175        [
176            'וו',
177            'ב',
178        ],
179        [
180            "\x01",
181            '',
182        ],
183        [
184            'ייה$',
185            "\x01ה",
186        ],
187        [
188            'ייע$',
189            "\x01ע",
190        ],
191        [
192            'יי',
193            'ע',
194        ],
195        [
196            "\x01",
197            'יי',
198        ],
199    ];
200
201    /**
202     * The DM sound coding table is organized this way:
203     * key: a variable-length string that corresponds to the UTF-8 character sequence
204     * represented by the table entry. Currently, that string can be up to 7
205     * bytes long. This maximum length is defined by the value of global variable
206     * $maxchar.
207     *
208     * value: an array as follows:
209     * [0]:  zero if not a vowel
210     * [1]:  sound value when this string is at the beginning of the word
211     * [2]:  sound value when this string is followed by a vowel
212     * [3]:  sound value for other cases
213     * [1],[2],[3] can be repeated several times to create branches in the code
214     * an empty sound value means "ignore in this state"
215     *
216     * @var string[][]
217     */
218    private static $dmsounds = [
219        'A'       => [
220            '1',
221            '0',
222            '',
223            '',
224        ],
225        'À'       => [
226            '1',
227            '0',
228            '',
229            '',
230        ],
231        'Á'       => [
232            '1',
233            '0',
234            '',
235            '',
236        ],
237        'Â'       => [
238            '1',
239            '0',
240            '',
241            '',
242        ],
243        'Ã'       => [
244            '1',
245            '0',
246            '',
247            '',
248        ],
249        'Ä'       => [
250            '1',
251            '0',
252            '1',
253            '',
254            '0',
255            '',
256            '',
257        ],
258        'Å'       => [
259            '1',
260            '0',
261            '',
262            '',
263        ],
264        'Ă'       => [
265            '1',
266            '0',
267            '',
268            '',
269        ],
270        'Ą'       => [
271            '1',
272            '',
273            '',
274            '',
275            '',
276            '',
277            '6',
278        ],
279        'Ạ'       => [
280            '1',
281            '0',
282            '',
283            '',
284        ],
285        'Ả'       => [
286            '1',
287            '0',
288            '',
289            '',
290        ],
291        'Ấ'       => [
292            '1',
293            '0',
294            '',
295            '',
296        ],
297        'Ầ'       => [
298            '1',
299            '0',
300            '',
301            '',
302        ],
303        'Ẩ'       => [
304            '1',
305            '0',
306            '',
307            '',
308        ],
309        'Ẫ'       => [
310            '1',
311            '0',
312            '',
313            '',
314        ],
315        'Ậ'       => [
316            '1',
317            '0',
318            '',
319            '',
320        ],
321        'Ắ'       => [
322            '1',
323            '0',
324            '',
325            '',
326        ],
327        'Ằ'       => [
328            '1',
329            '0',
330            '',
331            '',
332        ],
333        'Ẳ'       => [
334            '1',
335            '0',
336            '',
337            '',
338        ],
339        'Ẵ'       => [
340            '1',
341            '0',
342            '',
343            '',
344        ],
345        'Ặ'       => [
346            '1',
347            '0',
348            '',
349            '',
350        ],
351        'AE'      => [
352            '1',
353            '0',
354            '1',
355            '',
356        ],
357        'Æ'       => [
358            '1',
359            '0',
360            '1',
361            '',
362        ],
363        'AI'      => [
364            '1',
365            '0',
366            '1',
367            '',
368        ],
369        'AJ'      => [
370            '1',
371            '0',
372            '1',
373            '',
374        ],
375        'AU'      => [
376            '1',
377            '0',
378            '7',
379            '',
380        ],
381        'AV'      => [
382            '1',
383            '0',
384            '7',
385            '',
386            '7',
387            '7',
388            '7',
389        ],
390        'ÄU'      => [
391            '1',
392            '0',
393            '1',
394            '',
395        ],
396        'AY'      => [
397            '1',
398            '0',
399            '1',
400            '',
401        ],
402        'B'       => [
403            '0',
404            '7',
405            '7',
406            '7',
407        ],
408        'C'       => [
409            '0',
410            '5',
411            '5',
412            '5',
413            '34',
414            '4',
415            '4',
416        ],
417        'Ć'       => [
418            '0',
419            '4',
420            '4',
421            '4',
422        ],
423        'Č'       => [
424            '0',
425            '4',
426            '4',
427            '4',
428        ],
429        'Ç'       => [
430            '0',
431            '4',
432            '4',
433            '4',
434        ],
435        'CH'      => [
436            '0',
437            '5',
438            '5',
439            '5',
440            '34',
441            '4',
442            '4',
443        ],
444        'CHS'     => [
445            '0',
446            '5',
447            '54',
448            '54',
449        ],
450        'CK'      => [
451            '0',
452            '5',
453            '5',
454            '5',
455            '45',
456            '45',
457            '45',
458        ],
459        'CCS'     => [
460            '0',
461            '4',
462            '4',
463            '4',
464        ],
465        'CS'      => [
466            '0',
467            '4',
468            '4',
469            '4',
470        ],
471        'CSZ'     => [
472            '0',
473            '4',
474            '4',
475            '4',
476        ],
477        'CZ'      => [
478            '0',
479            '4',
480            '4',
481            '4',
482        ],
483        'CZS'     => [
484            '0',
485            '4',
486            '4',
487            '4',
488        ],
489        'D'       => [
490            '0',
491            '3',
492            '3',
493            '3',
494        ],
495        'Ď'       => [
496            '0',
497            '3',
498            '3',
499            '3',
500        ],
501        'Đ'       => [
502            '0',
503            '3',
504            '3',
505            '3',
506        ],
507        'DRS'     => [
508            '0',
509            '4',
510            '4',
511            '4',
512        ],
513        'DRZ'     => [
514            '0',
515            '4',
516            '4',
517            '4',
518        ],
519        'DS'      => [
520            '0',
521            '4',
522            '4',
523            '4',
524        ],
525        'DSH'     => [
526            '0',
527            '4',
528            '4',
529            '4',
530        ],
531        'DSZ'     => [
532            '0',
533            '4',
534            '4',
535            '4',
536        ],
537        'DT'      => [
538            '0',
539            '3',
540            '3',
541            '3',
542        ],
543        'DDZ'     => [
544            '0',
545            '4',
546            '4',
547            '4',
548        ],
549        'DDZS'    => [
550            '0',
551            '4',
552            '4',
553            '4',
554        ],
555        'DZ'      => [
556            '0',
557            '4',
558            '4',
559            '4',
560        ],
561        'DŹ'      => [
562            '0',
563            '4',
564            '4',
565            '4',
566        ],
567        'DŻ'      => [
568            '0',
569            '4',
570            '4',
571            '4',
572        ],
573        'DZH'     => [
574            '0',
575            '4',
576            '4',
577            '4',
578        ],
579        'DZS'     => [
580            '0',
581            '4',
582            '4',
583            '4',
584        ],
585        'E'       => [
586            '1',
587            '0',
588            '',
589            '',
590        ],
591        'È'       => [
592            '1',
593            '0',
594            '',
595            '',
596        ],
597        'É'       => [
598            '1',
599            '0',
600            '',
601            '',
602        ],
603        'Ê'       => [
604            '1',
605            '0',
606            '',
607            '',
608        ],
609        'Ë'       => [
610            '1',
611            '0',
612            '',
613            '',
614        ],
615        'Ĕ'       => [
616            '1',
617            '0',
618            '',
619            '',
620        ],
621        'Ė'       => [
622            '1',
623            '0',
624            '',
625            '',
626        ],
627        'Ę'       => [
628            '1',
629            '',
630            '',
631            '6',
632            '',
633            '',
634            '',
635        ],
636        'Ẹ'       => [
637            '1',
638            '0',
639            '',
640            '',
641        ],
642        'Ẻ'       => [
643            '1',
644            '0',
645            '',
646            '',
647        ],
648        'Ẽ'       => [
649            '1',
650            '0',
651            '',
652            '',
653        ],
654        'Ế'       => [
655            '1',
656            '0',
657            '',
658            '',
659        ],
660        'Ề'       => [
661            '1',
662            '0',
663            '',
664            '',
665        ],
666        'Ể'       => [
667            '1',
668            '0',
669            '',
670            '',
671        ],
672        'Ễ'       => [
673            '1',
674            '0',
675            '',
676            '',
677        ],
678        'Ệ'       => [
679            '1',
680            '0',
681            '',
682            '',
683        ],
684        'EAU'     => [
685            '1',
686            '0',
687            '',
688            '',
689        ],
690        'EI'      => [
691            '1',
692            '0',
693            '1',
694            '',
695        ],
696        'EJ'      => [
697            '1',
698            '0',
699            '1',
700            '',
701        ],
702        'EU'      => [
703            '1',
704            '1',
705            '1',
706            '',
707        ],
708        'EY'      => [
709            '1',
710            '0',
711            '1',
712            '',
713        ],
714        'F'       => [
715            '0',
716            '7',
717            '7',
718            '7',
719        ],
720        'FB'      => [
721            '0',
722            '7',
723            '7',
724            '7',
725        ],
726        'G'       => [
727            '0',
728            '5',
729            '5',
730            '5',
731            '34',
732            '4',
733            '4',
734        ],
735        'Ğ'       => [
736            '0',
737            '',
738            '',
739            '',
740        ],
741        'GGY'     => [
742            '0',
743            '5',
744            '5',
745            '5',
746        ],
747        'GY'      => [
748            '0',
749            '5',
750            '5',
751            '5',
752        ],
753        'H'       => [
754            '0',
755            '5',
756            '5',
757            '',
758            '5',
759            '5',
760            '5',
761        ],
762        'I'       => [
763            '1',
764            '0',
765            '',
766            '',
767        ],
768        'Ì'       => [
769            '1',
770            '0',
771            '',
772            '',
773        ],
774        'Í'       => [
775            '1',
776            '0',
777            '',
778            '',
779        ],
780        'Î'       => [
781            '1',
782            '0',
783            '',
784            '',
785        ],
786        'Ï'       => [
787            '1',
788            '0',
789            '',
790            '',
791        ],
792        'Ĩ'       => [
793            '1',
794            '0',
795            '',
796            '',
797        ],
798        'Į'       => [
799            '1',
800            '0',
801            '',
802            '',
803        ],
804        'İ'       => [
805            '1',
806            '0',
807            '',
808            '',
809        ],
810        'Ỉ'       => [
811            '1',
812            '0',
813            '',
814            '',
815        ],
816        'Ị'       => [
817            '1',
818            '0',
819            '',
820            '',
821        ],
822        'IA'      => [
823            '1',
824            '1',
825            '',
826            '',
827        ],
828        'IE'      => [
829            '1',
830            '1',
831            '',
832            '',
833        ],
834        'IO'      => [
835            '1',
836            '1',
837            '',
838            '',
839        ],
840        'IU'      => [
841            '1',
842            '1',
843            '',
844            '',
845        ],
846        'J'       => [
847            '0',
848            '1',
849            '',
850            '',
851            '4',
852            '4',
853            '4',
854            '5',
855            '5',
856            '',
857        ],
858        'K'       => [
859            '0',
860            '5',
861            '5',
862            '5',
863        ],
864        'KH'      => [
865            '0',
866            '5',
867            '5',
868            '5',
869        ],
870        'KS'      => [
871            '0',
872            '5',
873            '54',
874            '54',
875        ],
876        'L'       => [
877            '0',
878            '8',
879            '8',
880            '8',
881        ],
882        'Ľ'       => [
883            '0',
884            '8',
885            '8',
886            '8',
887        ],
888        'Ĺ'       => [
889            '0',
890            '8',
891            '8',
892            '8',
893        ],
894        'Ł'       => [
895            '0',
896            '7',
897            '7',
898            '7',
899            '8',
900            '8',
901            '8',
902        ],
903        'LL'      => [
904            '0',
905            '8',
906            '8',
907            '8',
908            '58',
909            '8',
910            '8',
911            '1',
912            '8',
913            '8',
914        ],
915        'LLY'     => [
916            '0',
917            '8',
918            '8',
919            '8',
920            '1',
921            '8',
922            '8',
923        ],
924        'LY'      => [
925            '0',
926            '8',
927            '8',
928            '8',
929            '1',
930            '8',
931            '8',
932        ],
933        'M'       => [
934            '0',
935            '6',
936            '6',
937            '6',
938        ],
939        'MĔ'      => [
940            '0',
941            '66',
942            '66',
943            '66',
944        ],
945        'MN'      => [
946            '0',
947            '66',
948            '66',
949            '66',
950        ],
951        'N'       => [
952            '0',
953            '6',
954            '6',
955            '6',
956        ],
957        'Ń'       => [
958            '0',
959            '6',
960            '6',
961            '6',
962        ],
963        'Ň'       => [
964            '0',
965            '6',
966            '6',
967            '6',
968        ],
969        'Ñ'       => [
970            '0',
971            '6',
972            '6',
973            '6',
974        ],
975        'NM'      => [
976            '0',
977            '66',
978            '66',
979            '66',
980        ],
981        'O'       => [
982            '1',
983            '0',
984            '',
985            '',
986        ],
987        'Ò'       => [
988            '1',
989            '0',
990            '',
991            '',
992        ],
993        'Ó'       => [
994            '1',
995            '0',
996            '',
997            '',
998        ],
999        'Ô'       => [
1000            '1',
1001            '0',
1002            '',
1003            '',
1004        ],
1005        'Õ'       => [
1006            '1',
1007            '0',
1008            '',
1009            '',
1010        ],
1011        'Ö'       => [
1012            '1',
1013            '0',
1014            '',
1015            '',
1016        ],
1017        'Ø'       => [
1018            '1',
1019            '0',
1020            '',
1021            '',
1022        ],
1023        'Ő'       => [
1024            '1',
1025            '0',
1026            '',
1027            '',
1028        ],
1029        'Œ'       => [
1030            '1',
1031            '0',
1032            '',
1033            '',
1034        ],
1035        'Ơ'       => [
1036            '1',
1037            '0',
1038            '',
1039            '',
1040        ],
1041        'Ọ'       => [
1042            '1',
1043            '0',
1044            '',
1045            '',
1046        ],
1047        'Ỏ'       => [
1048            '1',
1049            '0',
1050            '',
1051            '',
1052        ],
1053        'Ố'       => [
1054            '1',
1055            '0',
1056            '',
1057            '',
1058        ],
1059        'Ồ'       => [
1060            '1',
1061            '0',
1062            '',
1063            '',
1064        ],
1065        'Ổ'       => [
1066            '1',
1067            '0',
1068            '',
1069            '',
1070        ],
1071        'Ỗ'       => [
1072            '1',
1073            '0',
1074            '',
1075            '',
1076        ],
1077        'Ộ'       => [
1078            '1',
1079            '0',
1080            '',
1081            '',
1082        ],
1083        'Ớ'       => [
1084            '1',
1085            '0',
1086            '',
1087            '',
1088        ],
1089        'Ờ'       => [
1090            '1',
1091            '0',
1092            '',
1093            '',
1094        ],
1095        'Ở'       => [
1096            '1',
1097            '0',
1098            '',
1099            '',
1100        ],
1101        'Ỡ'       => [
1102            '1',
1103            '0',
1104            '',
1105            '',
1106        ],
1107        'Ợ'       => [
1108            '1',
1109            '0',
1110            '',
1111            '',
1112        ],
1113        'OE'      => [
1114            '1',
1115            '0',
1116            '',
1117            '',
1118        ],
1119        'OI'      => [
1120            '1',
1121            '0',
1122            '1',
1123            '',
1124        ],
1125        'OJ'      => [
1126            '1',
1127            '0',
1128            '1',
1129            '',
1130        ],
1131        'OU'      => [
1132            '1',
1133            '0',
1134            '',
1135            '',
1136        ],
1137        'OY'      => [
1138            '1',
1139            '0',
1140            '1',
1141            '',
1142        ],
1143        'P'       => [
1144            '0',
1145            '7',
1146            '7',
1147            '7',
1148        ],
1149        'PF'      => [
1150            '0',
1151            '7',
1152            '7',
1153            '7',
1154        ],
1155        'PH'      => [
1156            '0',
1157            '7',
1158            '7',
1159            '7',
1160        ],
1161        'Q'       => [
1162            '0',
1163            '5',
1164            '5',
1165            '5',
1166        ],
1167        'R'       => [
1168            '0',
1169            '9',
1170            '9',
1171            '9',
1172        ],
1173        'Ř'       => [
1174            '0',
1175            '4',
1176            '4',
1177            '4',
1178        ],
1179        'RS'      => [
1180            '0',
1181            '4',
1182            '4',
1183            '4',
1184            '94',
1185            '94',
1186            '94',
1187        ],
1188        'RZ'      => [
1189            '0',
1190            '4',
1191            '4',
1192            '4',
1193            '94',
1194            '94',
1195            '94',
1196        ],
1197        'S'       => [
1198            '0',
1199            '4',
1200            '4',
1201            '4',
1202        ],
1203        'Ś'       => [
1204            '0',
1205            '4',
1206            '4',
1207            '4',
1208        ],
1209        'Š'       => [
1210            '0',
1211            '4',
1212            '4',
1213            '4',
1214        ],
1215        'Ş'       => [
1216            '0',
1217            '4',
1218            '4',
1219            '4',
1220        ],
1221        'SC'      => [
1222            '0',
1223            '2',
1224            '4',
1225            '4',
1226        ],
1227        'ŠČ'      => [
1228            '0',
1229            '2',
1230            '4',
1231            '4',
1232        ],
1233        'SCH'     => [
1234            '0',
1235            '4',
1236            '4',
1237            '4',
1238        ],
1239        'SCHD'    => [
1240            '0',
1241            '2',
1242            '43',
1243            '43',
1244        ],
1245        'SCHT'    => [
1246            '0',
1247            '2',
1248            '43',
1249            '43',
1250        ],
1251        'SCHTCH'  => [
1252            '0',
1253            '2',
1254            '4',
1255            '4',
1256        ],
1257        'SCHTSCH' => [
1258            '0',
1259            '2',
1260            '4',
1261            '4',
1262        ],
1263        'SCHTSH'  => [
1264            '0',
1265            '2',
1266            '4',
1267            '4',
1268        ],
1269        'SD'      => [
1270            '0',
1271            '2',
1272            '43',
1273            '43',
1274        ],
1275        'SH'      => [
1276            '0',
1277            '4',
1278            '4',
1279            '4',
1280        ],
1281        'SHCH'    => [
1282            '0',
1283            '2',
1284            '4',
1285            '4',
1286        ],
1287        'SHD'     => [
1288            '0',
1289            '2',
1290            '43',
1291            '43',
1292        ],
1293        'SHT'     => [
1294            '0',
1295            '2',
1296            '43',
1297            '43',
1298        ],
1299        'SHTCH'   => [
1300            '0',
1301            '2',
1302            '4',
1303            '4',
1304        ],
1305        'SHTSH'   => [
1306            '0',
1307            '2',
1308            '4',
1309            '4',
1310        ],
1311        'ß'       => [
1312            '0',
1313            '',
1314            '4',
1315            '4',
1316        ],
1317        'ST'      => [
1318            '0',
1319            '2',
1320            '43',
1321            '43',
1322        ],
1323        'STCH'    => [
1324            '0',
1325            '2',
1326            '4',
1327            '4',
1328        ],
1329        'STRS'    => [
1330            '0',
1331            '2',
1332            '4',
1333            '4',
1334        ],
1335        'STRZ'    => [
1336            '0',
1337            '2',
1338            '4',
1339            '4',
1340        ],
1341        'STSCH'   => [
1342            '0',
1343            '2',
1344            '4',
1345            '4',
1346        ],
1347        'STSH'    => [
1348            '0',
1349            '2',
1350            '4',
1351            '4',
1352        ],
1353        'SSZ'     => [
1354            '0',
1355            '4',
1356            '4',
1357            '4',
1358        ],
1359        'SZ'      => [
1360            '0',
1361            '4',
1362            '4',
1363            '4',
1364        ],
1365        'SZCS'    => [
1366            '0',
1367            '2',
1368            '4',
1369            '4',
1370        ],
1371        'SZCZ'    => [
1372            '0',
1373            '2',
1374            '4',
1375            '4',
1376        ],
1377        'SZD'     => [
1378            '0',
1379            '2',
1380            '43',
1381            '43',
1382        ],
1383        'SZT'     => [
1384            '0',
1385            '2',
1386            '43',
1387            '43',
1388        ],
1389        'T'       => [
1390            '0',
1391            '3',
1392            '3',
1393            '3',
1394        ],
1395        'Ť'       => [
1396            '0',
1397            '3',
1398            '3',
1399            '3',
1400        ],
1401        'Ţ'       => [
1402            '0',
1403            '3',
1404            '3',
1405            '3',
1406            '4',
1407            '4',
1408            '4',
1409        ],
1410        'TC'      => [
1411            '0',
1412            '4',
1413            '4',
1414            '4',
1415        ],
1416        'TCH'     => [
1417            '0',
1418            '4',
1419            '4',
1420            '4',
1421        ],
1422        'TH'      => [
1423            '0',
1424            '3',
1425            '3',
1426            '3',
1427        ],
1428        'TRS'     => [
1429            '0',
1430            '4',
1431            '4',
1432            '4',
1433        ],
1434        'TRZ'     => [
1435            '0',
1436            '4',
1437            '4',
1438            '4',
1439        ],
1440        'TS'      => [
1441            '0',
1442            '4',
1443            '4',
1444            '4',
1445        ],
1446        'TSCH'    => [
1447            '0',
1448            '4',
1449            '4',
1450            '4',
1451        ],
1452        'TSH'     => [
1453            '0',
1454            '4',
1455            '4',
1456            '4',
1457        ],
1458        'TSZ'     => [
1459            '0',
1460            '4',
1461            '4',
1462            '4',
1463        ],
1464        'TTCH'    => [
1465            '0',
1466            '4',
1467            '4',
1468            '4',
1469        ],
1470        'TTS'     => [
1471            '0',
1472            '4',
1473            '4',
1474            '4',
1475        ],
1476        'TTSCH'   => [
1477            '0',
1478            '4',
1479            '4',
1480            '4',
1481        ],
1482        'TTSZ'    => [
1483            '0',
1484            '4',
1485            '4',
1486            '4',
1487        ],
1488        'TTZ'     => [
1489            '0',
1490            '4',
1491            '4',
1492            '4',
1493        ],
1494        'TZ'      => [
1495            '0',
1496            '4',
1497            '4',
1498            '4',
1499        ],
1500        'TZS'     => [
1501            '0',
1502            '4',
1503            '4',
1504            '4',
1505        ],
1506        'U'       => [
1507            '1',
1508            '0',
1509            '',
1510            '',
1511        ],
1512        'Ù'       => [
1513            '1',
1514            '0',
1515            '',
1516            '',
1517        ],
1518        'Ú'       => [
1519            '1',
1520            '0',
1521            '',
1522            '',
1523        ],
1524        'Û'       => [
1525            '1',
1526            '0',
1527            '',
1528            '',
1529        ],
1530        'Ü'       => [
1531            '1',
1532            '0',
1533            '',
1534            '',
1535        ],
1536        'Ũ'       => [
1537            '1',
1538            '0',
1539            '',
1540            '',
1541        ],
1542        'Ū'       => [
1543            '1',
1544            '0',
1545            '',
1546            '',
1547        ],
1548        'Ů'       => [
1549            '1',
1550            '0',
1551            '',
1552            '',
1553        ],
1554        'Ű'       => [
1555            '1',
1556            '0',
1557            '',
1558            '',
1559        ],
1560        'Ų'       => [
1561            '1',
1562            '0',
1563            '',
1564            '',
1565        ],
1566        'Ư'       => [
1567            '1',
1568            '0',
1569            '',
1570            '',
1571        ],
1572        'Ụ'       => [
1573            '1',
1574            '0',
1575            '',
1576            '',
1577        ],
1578        'Ủ'       => [
1579            '1',
1580            '0',
1581            '',
1582            '',
1583        ],
1584        'Ứ'       => [
1585            '1',
1586            '0',
1587            '',
1588            '',
1589        ],
1590        'Ừ'       => [
1591            '1',
1592            '0',
1593            '',
1594            '',
1595        ],
1596        'Ử'       => [
1597            '1',
1598            '0',
1599            '',
1600            '',
1601        ],
1602        'Ữ'       => [
1603            '1',
1604            '0',
1605            '',
1606            '',
1607        ],
1608        'Ự'       => [
1609            '1',
1610            '0',
1611            '',
1612            '',
1613        ],
1614        'UE'      => [
1615            '1',
1616            '0',
1617            '',
1618            '',
1619        ],
1620        'UI'      => [
1621            '1',
1622            '0',
1623            '1',
1624            '',
1625        ],
1626        'UJ'      => [
1627            '1',
1628            '0',
1629            '1',
1630            '',
1631        ],
1632        'UY'      => [
1633            '1',
1634            '0',
1635            '1',
1636            '',
1637        ],
1638        'UW'      => [
1639            '1',
1640            '0',
1641            '1',
1642            '',
1643            '0',
1644            '7',
1645            '7',
1646        ],
1647        'V'       => [
1648            '0',
1649            '7',
1650            '7',
1651            '7',
1652        ],
1653        'W'       => [
1654            '0',
1655            '7',
1656            '7',
1657            '7',
1658        ],
1659        'X'       => [
1660            '0',
1661            '5',
1662            '54',
1663            '54',
1664        ],
1665        'Y'       => [
1666            '1',
1667            '1',
1668            '',
1669            '',
1670        ],
1671        'Ý'       => [
1672            '1',
1673            '1',
1674            '',
1675            '',
1676        ],
1677        'Ỳ'       => [
1678            '1',
1679            '1',
1680            '',
1681            '',
1682        ],
1683        'Ỵ'       => [
1684            '1',
1685            '1',
1686            '',
1687            '',
1688        ],
1689        'Ỷ'       => [
1690            '1',
1691            '1',
1692            '',
1693            '',
1694        ],
1695        'Ỹ'       => [
1696            '1',
1697            '1',
1698            '',
1699            '',
1700        ],
1701        'Z'       => [
1702            '0',
1703            '4',
1704            '4',
1705            '4',
1706        ],
1707        'Ź'       => [
1708            '0',
1709            '4',
1710            '4',
1711            '4',
1712        ],
1713        'Ż'       => [
1714            '0',
1715            '4',
1716            '4',
1717            '4',
1718        ],
1719        'Ž'       => [
1720            '0',
1721            '4',
1722            '4',
1723            '4',
1724        ],
1725        'ZD'      => [
1726            '0',
1727            '2',
1728            '43',
1729            '43',
1730        ],
1731        'ZDZ'     => [
1732            '0',
1733            '2',
1734            '4',
1735            '4',
1736        ],
1737        'ZDZH'    => [
1738            '0',
1739            '2',
1740            '4',
1741            '4',
1742        ],
1743        'ZH'      => [
1744            '0',
1745            '4',
1746            '4',
1747            '4',
1748        ],
1749        'ZHD'     => [
1750            '0',
1751            '2',
1752            '43',
1753            '43',
1754        ],
1755        'ZHDZH'   => [
1756            '0',
1757            '2',
1758            '4',
1759            '4',
1760        ],
1761        'ZS'      => [
1762            '0',
1763            '4',
1764            '4',
1765            '4',
1766        ],
1767        'ZSCH'    => [
1768            '0',
1769            '4',
1770            '4',
1771            '4',
1772        ],
1773        'ZSH'     => [
1774            '0',
1775            '4',
1776            '4',
1777            '4',
1778        ],
1779        'ZZS'     => [
1780            '0',
1781            '4',
1782            '4',
1783            '4',
1784        ],
1785        // Cyrillic alphabet
1786        'А'       => [
1787            '1',
1788            '0',
1789            '',
1790            '',
1791        ],
1792        'Б'       => [
1793            '0',
1794            '7',
1795            '7',
1796            '7',
1797        ],
1798        'В'       => [
1799            '0',
1800            '7',
1801            '7',
1802            '7',
1803        ],
1804        'Г'       => [
1805            '0',
1806            '5',
1807            '5',
1808            '5',
1809        ],
1810        'Д'       => [
1811            '0',
1812            '3',
1813            '3',
1814            '3',
1815        ],
1816        'ДЗ'      => [
1817            '0',
1818            '4',
1819            '4',
1820            '4',
1821        ],
1822        'Е'       => [
1823            '1',
1824            '0',
1825            '',
1826            '',
1827        ],
1828        'Ё'       => [
1829            '1',
1830            '0',
1831            '',
1832            '',
1833        ],
1834        'Ж'       => [
1835            '0',
1836            '4',
1837            '4',
1838            '4',
1839        ],
1840        'З'       => [
1841            '0',
1842            '4',
1843            '4',
1844            '4',
1845        ],
1846        'И'       => [
1847            '1',
1848            '0',
1849            '',
1850            '',
1851        ],
1852        'Й'       => [
1853            '1',
1854            '1',
1855            '',
1856            '',
1857            '4',
1858            '4',
1859            '4',
1860        ],
1861        'К'       => [
1862            '0',
1863            '5',
1864            '5',
1865            '5',
1866        ],
1867        'Л'       => [
1868            '0',
1869            '8',
1870            '8',
1871            '8',
1872        ],
1873        'М'       => [
1874            '0',
1875            '6',
1876            '6',
1877            '6',
1878        ],
1879        'Н'       => [
1880            '0',
1881            '6',
1882            '6',
1883            '6',
1884        ],
1885        'О'       => [
1886            '1',
1887            '0',
1888            '',
1889            '',
1890        ],
1891        'П'       => [
1892            '0',
1893            '7',
1894            '7',
1895            '7',
1896        ],
1897        'Р'       => [
1898            '0',
1899            '9',
1900            '9',
1901            '9',
1902        ],
1903        'РЖ'      => [
1904            '0',
1905            '4',
1906            '4',
1907            '4',
1908        ],
1909        'С'       => [
1910            '0',
1911            '4',
1912            '4',
1913            '4',
1914        ],
1915        'Т'       => [
1916            '0',
1917            '3',
1918            '3',
1919            '3',
1920        ],
1921        'У'       => [
1922            '1',
1923            '0',
1924            '',
1925            '',
1926        ],
1927        'Ф'       => [
1928            '0',
1929            '7',
1930            '7',
1931            '7',
1932        ],
1933        'Х'       => [
1934            '0',
1935            '5',
1936            '5',
1937            '5',
1938        ],
1939        'Ц'       => [
1940            '0',
1941            '4',
1942            '4',
1943            '4',
1944        ],
1945        'Ч'       => [
1946            '0',
1947            '4',
1948            '4',
1949            '4',
1950        ],
1951        'Ш'       => [
1952            '0',
1953            '4',
1954            '4',
1955            '4',
1956        ],
1957        'Щ'       => [
1958            '0',
1959            '2',
1960            '4',
1961            '4',
1962        ],
1963        'Ъ'       => [
1964            '0',
1965            '',
1966            '',
1967            '',
1968        ],
1969        'Ы'       => [
1970            '0',
1971            '1',
1972            '',
1973            '',
1974        ],
1975        'Ь'       => [
1976            '0',
1977            '',
1978            '',
1979            '',
1980        ],
1981        'Э'       => [
1982            '1',
1983            '0',
1984            '',
1985            '',
1986        ],
1987        'Ю'       => [
1988            '0',
1989            '1',
1990            '',
1991            '',
1992        ],
1993        'Я'       => [
1994            '0',
1995            '1',
1996            '',
1997            '',
1998        ],
1999        // Greek alphabet
2000        'Α'       => [
2001            '1',
2002            '0',
2003            '',
2004            '',
2005        ],
2006        'Ά'       => [
2007            '1',
2008            '0',
2009            '',
2010            '',
2011        ],
2012        'ΑΙ'      => [
2013            '1',
2014            '0',
2015            '1',
2016            '',
2017        ],
2018        'ΑΥ'      => [
2019            '1',
2020            '0',
2021            '1',
2022            '',
2023        ],
2024        'Β'       => [
2025            '0',
2026            '7',
2027            '7',
2028            '7',
2029        ],
2030        'Γ'       => [
2031            '0',
2032            '5',
2033            '5',
2034            '5',
2035        ],
2036        'Δ'       => [
2037            '0',
2038            '3',
2039            '3',
2040            '3',
2041        ],
2042        'Ε'       => [
2043            '1',
2044            '0',
2045            '',
2046            '',
2047        ],
2048        'Έ'       => [
2049            '1',
2050            '0',
2051            '',
2052            '',
2053        ],
2054        'ΕΙ'      => [
2055            '1',
2056            '0',
2057            '1',
2058            '',
2059        ],
2060        'ΕΥ'      => [
2061            '1',
2062            '1',
2063            '1',
2064            '',
2065        ],
2066        'Ζ'       => [
2067            '0',
2068            '4',
2069            '4',
2070            '4',
2071        ],
2072        'Η'       => [
2073            '1',
2074            '0',
2075            '',
2076            '',
2077        ],
2078        'Ή'       => [
2079            '1',
2080            '0',
2081            '',
2082            '',
2083        ],
2084        'Θ'       => [
2085            '0',
2086            '3',
2087            '3',
2088            '3',
2089        ],
2090        'Ι'       => [
2091            '1',
2092            '0',
2093            '',
2094            '',
2095        ],
2096        'Ί'       => [
2097            '1',
2098            '0',
2099            '',
2100            '',
2101        ],
2102        'Ϊ'       => [
2103            '1',
2104            '0',
2105            '',
2106            '',
2107        ],
2108        'ΐ'       => [
2109            '1',
2110            '0',
2111            '',
2112            '',
2113        ],
2114        'Κ'       => [
2115            '0',
2116            '5',
2117            '5',
2118            '5',
2119        ],
2120        'Λ'       => [
2121            '0',
2122            '8',
2123            '8',
2124            '8',
2125        ],
2126        'Μ'       => [
2127            '0',
2128            '6',
2129            '6',
2130            '6',
2131        ],
2132        'ΜΠ'      => [
2133            '0',
2134            '7',
2135            '7',
2136            '7',
2137        ],
2138        'Ν'       => [
2139            '0',
2140            '6',
2141            '6',
2142            '6',
2143        ],
2144        'ΝΤ'      => [
2145            '0',
2146            '3',
2147            '3',
2148            '3',
2149        ],
2150        'Ξ'       => [
2151            '0',
2152            '5',
2153            '54',
2154            '54',
2155        ],
2156        'Ο'       => [
2157            '1',
2158            '0',
2159            '',
2160            '',
2161        ],
2162        'Ό'       => [
2163            '1',
2164            '0',
2165            '',
2166            '',
2167        ],
2168        'ΟΙ'      => [
2169            '1',
2170            '0',
2171            '1',
2172            '',
2173        ],
2174        'ΟΥ'      => [
2175            '1',
2176            '0',
2177            '1',
2178            '',
2179        ],
2180        'Π'       => [
2181            '0',
2182            '7',
2183            '7',
2184            '7',
2185        ],
2186        'Ρ'       => [
2187            '0',
2188            '9',
2189            '9',
2190            '9',
2191        ],
2192        'Σ'       => [
2193            '0',
2194            '4',
2195            '4',
2196            '4',
2197        ],
2198        'ς'       => [
2199            '0',
2200            '',
2201            '',
2202            '4',
2203        ],
2204        'Τ'       => [
2205            '0',
2206            '3',
2207            '3',
2208            '3',
2209        ],
2210        'ΤΖ'      => [
2211            '0',
2212            '4',
2213            '4',
2214            '4',
2215        ],
2216        'ΤΣ'      => [
2217            '0',
2218            '4',
2219            '4',
2220            '4',
2221        ],
2222        'Υ'       => [
2223            '1',
2224            '1',
2225            '',
2226            '',
2227        ],
2228        'Ύ'       => [
2229            '1',
2230            '1',
2231            '',
2232            '',
2233        ],
2234        'Ϋ'       => [
2235            '1',
2236            '1',
2237            '',
2238            '',
2239        ],
2240        'ΰ'       => [
2241            '1',
2242            '1',
2243            '',
2244            '',
2245        ],
2246        'ΥΚ'      => [
2247            '1',
2248            '5',
2249            '5',
2250            '5',
2251        ],
2252        'ΥΥ'      => [
2253            '1',
2254            '65',
2255            '65',
2256            '65',
2257        ],
2258        'Φ'       => [
2259            '0',
2260            '7',
2261            '7',
2262            '7',
2263        ],
2264        'Χ'       => [
2265            '0',
2266            '5',
2267            '5',
2268            '5',
2269        ],
2270        'Ψ'       => [
2271            '0',
2272            '7',
2273            '7',
2274            '7',
2275        ],
2276        'Ω'       => [
2277            '1',
2278            '0',
2279            '',
2280            '',
2281        ],
2282        'Ώ'       => [
2283            '1',
2284            '0',
2285            '',
2286            '',
2287        ],
2288        // Hebrew alphabet
2289        'א'       => [
2290            '1',
2291            '0',
2292            '',
2293            '',
2294        ],
2295        'או'      => [
2296            '1',
2297            '0',
2298            '7',
2299            '',
2300        ],
2301        'אג'      => [
2302            '1',
2303            '4',
2304            '4',
2305            '4',
2306            '5',
2307            '5',
2308            '5',
2309            '34',
2310            '34',
2311            '34',
2312        ],
2313        'בב'      => [
2314            '0',
2315            '7',
2316            '7',
2317            '7',
2318            '77',
2319            '77',
2320            '77',
2321        ],
2322        'ב'       => [
2323            '0',
2324            '7',
2325            '7',
2326            '7',
2327        ],
2328        'גג'      => [
2329            '0',
2330            '4',
2331            '4',
2332            '4',
2333            '5',
2334            '5',
2335            '5',
2336            '45',
2337            '45',
2338            '45',
2339            '55',
2340            '55',
2341            '55',
2342            '54',
2343            '54',
2344            '54',
2345        ],
2346        'גד'      => [
2347            '0',
2348            '43',
2349            '43',
2350            '43',
2351            '53',
2352            '53',
2353            '53',
2354        ],
2355        'גה'      => [
2356            '0',
2357            '45',
2358            '45',
2359            '45',
2360            '55',
2361            '55',
2362            '55',
2363        ],
2364        'גז'      => [
2365            '0',
2366            '44',
2367            '44',
2368            '44',
2369            '45',
2370            '45',
2371            '45',
2372        ],
2373        'גח'      => [
2374            '0',
2375            '45',
2376            '45',
2377            '45',
2378            '55',
2379            '55',
2380            '55',
2381        ],
2382        'גכ'      => [
2383            '0',
2384            '45',
2385            '45',
2386            '45',
2387            '55',
2388            '55',
2389            '55',
2390        ],
2391        'גך'      => [
2392            '0',
2393            '45',
2394            '45',
2395            '45',
2396            '55',
2397            '55',
2398            '55',
2399        ],
2400        'גצ'      => [
2401            '0',
2402            '44',
2403            '44',
2404            '44',
2405            '45',
2406            '45',
2407            '45',
2408        ],
2409        'גץ'      => [
2410            '0',
2411            '44',
2412            '44',
2413            '44',
2414            '45',
2415            '45',
2416            '45',
2417        ],
2418        'גק'      => [
2419            '0',
2420            '45',
2421            '45',
2422            '45',
2423            '54',
2424            '54',
2425            '54',
2426        ],
2427        'גש'      => [
2428            '0',
2429            '44',
2430            '44',
2431            '44',
2432            '54',
2433            '54',
2434            '54',
2435        ],
2436        'גת'      => [
2437            '0',
2438            '43',
2439            '43',
2440            '43',
2441            '53',
2442            '53',
2443            '53',
2444        ],
2445        'ג'       => [
2446            '0',
2447            '4',
2448            '4',
2449            '4',
2450            '5',
2451            '5',
2452            '5',
2453        ],
2454        'דז'      => [
2455            '0',
2456            '4',
2457            '4',
2458            '4',
2459        ],
2460        'דד'      => [
2461            '0',
2462            '3',
2463            '3',
2464            '3',
2465            '33',
2466            '33',
2467            '33',
2468        ],
2469        'דט'      => [
2470            '0',
2471            '33',
2472            '33',
2473            '33',
2474        ],
2475        'דש'      => [
2476            '0',
2477            '4',
2478            '4',
2479            '4',
2480        ],
2481        'דצ'      => [
2482            '0',
2483            '4',
2484            '4',
2485            '4',
2486        ],
2487        'דץ'      => [
2488            '0',
2489            '4',
2490            '4',
2491            '4',
2492        ],
2493        'ד'       => [
2494            '0',
2495            '3',
2496            '3',
2497            '3',
2498        ],
2499        'הג'      => [
2500            '0',
2501            '54',
2502            '54',
2503            '54',
2504            '55',
2505            '55',
2506            '55',
2507        ],
2508        'הכ'      => [
2509            '0',
2510            '55',
2511            '55',
2512            '55',
2513        ],
2514        'הח'      => [
2515            '0',
2516            '55',
2517            '55',
2518            '55',
2519        ],
2520        'הק'      => [
2521            '0',
2522            '55',
2523            '55',
2524            '55',
2525            '5',
2526            '5',
2527            '5',
2528        ],
2529        'הה'      => [
2530            '0',
2531            '5',
2532            '5',
2533            '',
2534            '55',
2535            '55',
2536            '',
2537        ],
2538        'ה'       => [
2539            '0',
2540            '5',
2541            '5',
2542            '',
2543        ],
2544        'וי'      => [
2545            '1',
2546            '',
2547            '',
2548            '',
2549            '7',
2550            '7',
2551            '7',
2552        ],
2553        'ו'       => [
2554            '1',
2555            '7',
2556            '7',
2557            '7',
2558            '7',
2559            '',
2560            '',
2561        ],
2562        'וו'      => [
2563            '1',
2564            '7',
2565            '7',
2566            '7',
2567            '7',
2568            '',
2569            '',
2570        ],
2571        'וופ'     => [
2572            '1',
2573            '7',
2574            '7',
2575            '7',
2576            '77',
2577            '77',
2578            '77',
2579        ],
2580        'זש'      => [
2581            '0',
2582            '4',
2583            '4',
2584            '4',
2585            '44',
2586            '44',
2587            '44',
2588        ],
2589        'זדז'     => [
2590            '0',
2591            '2',
2592            '4',
2593            '4',
2594        ],
2595        'ז'       => [
2596            '0',
2597            '4',
2598            '4',
2599            '4',
2600        ],
2601        'זג'      => [
2602            '0',
2603            '44',
2604            '44',
2605            '44',
2606            '45',
2607            '45',
2608            '45',
2609        ],
2610        'זז'      => [
2611            '0',
2612            '4',
2613            '4',
2614            '4',
2615            '44',
2616            '44',
2617            '44',
2618        ],
2619        'זס'      => [
2620            '0',
2621            '44',
2622            '44',
2623            '44',
2624        ],
2625        'זצ'      => [
2626            '0',
2627            '44',
2628            '44',
2629            '44',
2630        ],
2631        'זץ'      => [
2632            '0',
2633            '44',
2634            '44',
2635            '44',
2636        ],
2637        'חג'      => [
2638            '0',
2639            '54',
2640            '54',
2641            '54',
2642            '53',
2643            '53',
2644            '53',
2645        ],
2646        'חח'      => [
2647            '0',
2648            '5',
2649            '5',
2650            '5',
2651            '55',
2652            '55',
2653            '55',
2654        ],
2655        'חק'      => [
2656            '0',
2657            '55',
2658            '55',
2659            '55',
2660            '5',
2661            '5',
2662            '5',
2663        ],
2664        'חכ'      => [
2665            '0',
2666            '45',
2667            '45',
2668            '45',
2669            '55',
2670            '55',
2671            '55',
2672        ],
2673        'חס'      => [
2674            '0',
2675            '5',
2676            '54',
2677            '54',
2678        ],
2679        'חש'      => [
2680            '0',
2681            '5',
2682            '54',
2683            '54',
2684        ],
2685        'ח'       => [
2686            '0',
2687            '5',
2688            '5',
2689            '5',
2690        ],
2691        'טש'      => [
2692            '0',
2693            '4',
2694            '4',
2695            '4',
2696        ],
2697        'טד'      => [
2698            '0',
2699            '33',
2700            '33',
2701            '33',
2702        ],
2703        'טי'      => [
2704            '0',
2705            '3',
2706            '3',
2707            '3',
2708            '4',
2709            '4',
2710            '4',
2711            '3',
2712            '3',
2713            '34',
2714        ],
2715        'טת'      => [
2716            '0',
2717            '33',
2718            '33',
2719            '33',
2720        ],
2721        'טט'      => [
2722            '0',
2723            '3',
2724            '3',
2725            '3',
2726            '33',
2727            '33',
2728            '33',
2729        ],
2730        'ט'       => [
2731            '0',
2732            '3',
2733            '3',
2734            '3',
2735        ],
2736        'י'       => [
2737            '1',
2738            '1',
2739            '',
2740            '',
2741        ],
2742        'יא'      => [
2743            '1',
2744            '1',
2745            '',
2746            '',
2747            '1',
2748            '1',
2749            '1',
2750        ],
2751        'כג'      => [
2752            '0',
2753            '55',
2754            '55',
2755            '55',
2756            '54',
2757            '54',
2758            '54',
2759        ],
2760        'כש'      => [
2761            '0',
2762            '5',
2763            '54',
2764            '54',
2765        ],
2766        'כס'      => [
2767            '0',
2768            '5',
2769            '54',
2770            '54',
2771        ],
2772        'ככ'      => [
2773            '0',
2774            '5',
2775            '5',
2776            '5',
2777            '55',
2778            '55',
2779            '55',
2780        ],
2781        'כך'      => [
2782            '0',
2783            '5',
2784            '5',
2785            '5',
2786            '55',
2787            '55',
2788            '55',
2789        ],
2790        'כ'       => [
2791            '0',
2792            '5',
2793            '5',
2794            '5',
2795        ],
2796        'כח'      => [
2797            '0',
2798            '55',
2799            '55',
2800            '55',
2801            '5',
2802            '5',
2803            '5',
2804        ],
2805        'ך'       => [
2806            '0',
2807            '',
2808            '5',
2809            '5',
2810        ],
2811        'ל'       => [
2812            '0',
2813            '8',
2814            '8',
2815            '8',
2816        ],
2817        'לל'      => [
2818            '0',
2819            '88',
2820            '88',
2821            '88',
2822            '8',
2823            '8',
2824            '8',
2825        ],
2826        'מנ'      => [
2827            '0',
2828            '66',
2829            '66',
2830            '66',
2831        ],
2832        'מן'      => [
2833            '0',
2834            '66',
2835            '66',
2836            '66',
2837        ],
2838        'ממ'      => [
2839            '0',
2840            '6',
2841            '6',
2842            '6',
2843            '66',
2844            '66',
2845            '66',
2846        ],
2847        'מם'      => [
2848            '0',
2849            '6',
2850            '6',
2851            '6',
2852            '66',
2853            '66',
2854            '66',
2855        ],
2856        'מ'       => [
2857            '0',
2858            '6',
2859            '6',
2860            '6',
2861        ],
2862        'ם'       => [
2863            '0',
2864            '',
2865            '6',
2866            '6',
2867        ],
2868        'נמ'      => [
2869            '0',
2870            '66',
2871            '66',
2872            '66',
2873        ],
2874        'נם'      => [
2875            '0',
2876            '66',
2877            '66',
2878            '66',
2879        ],
2880        'ננ'      => [
2881            '0',
2882            '6',
2883            '6',
2884            '6',
2885            '66',
2886            '66',
2887            '66',
2888        ],
2889        'נן'      => [
2890            '0',
2891            '6',
2892            '6',
2893            '6',
2894            '66',
2895            '66',
2896            '66',
2897        ],
2898        'נ'       => [
2899            '0',
2900            '6',
2901            '6',
2902            '6',
2903        ],
2904        'ן'       => [
2905            '0',
2906            '',
2907            '6',
2908            '6',
2909        ],
2910        'סתש'     => [
2911            '0',
2912            '2',
2913            '4',
2914            '4',
2915        ],
2916        'סתז'     => [
2917            '0',
2918            '2',
2919            '4',
2920            '4',
2921        ],
2922        'סטז'     => [
2923            '0',
2924            '2',
2925            '4',
2926            '4',
2927        ],
2928        'סטש'     => [
2929            '0',
2930            '2',
2931            '4',
2932            '4',
2933        ],
2934        'סצד'     => [
2935            '0',
2936            '2',
2937            '4',
2938            '4',
2939        ],
2940        'סט'      => [
2941            '0',
2942            '2',
2943            '4',
2944            '4',
2945            '43',
2946            '43',
2947            '43',
2948        ],
2949        'סת'      => [
2950            '0',
2951            '2',
2952            '4',
2953            '4',
2954            '43',
2955            '43',
2956            '43',
2957        ],
2958        'סג'      => [
2959            '0',
2960            '44',
2961            '44',
2962            '44',
2963            '4',
2964            '4',
2965            '4',
2966        ],
2967        'סס'      => [
2968            '0',
2969            '4',
2970            '4',
2971            '4',
2972            '44',
2973            '44',
2974            '44',
2975        ],
2976        'סצ'      => [
2977            '0',
2978            '44',
2979            '44',
2980            '44',
2981        ],
2982        'סץ'      => [
2983            '0',
2984            '44',
2985            '44',
2986            '44',
2987        ],
2988        'סז'      => [
2989            '0',
2990            '44',
2991            '44',
2992            '44',
2993        ],
2994        'סש'      => [
2995            '0',
2996            '44',
2997            '44',
2998            '44',
2999        ],
3000        'ס'       => [
3001            '0',
3002            '4',
3003            '4',
3004            '4',
3005        ],
3006        'ע'       => [
3007            '1',
3008            '0',
3009            '',
3010            '',
3011        ],
3012        'פב'      => [
3013            '0',
3014            '7',
3015            '7',
3016            '7',
3017            '77',
3018            '77',
3019            '77',
3020        ],
3021        'פוו'     => [
3022            '0',
3023            '7',
3024            '7',
3025            '7',
3026            '77',
3027            '77',
3028            '77',
3029        ],
3030        'פפ'      => [
3031            '0',
3032            '7',
3033            '7',
3034            '7',
3035            '77',
3036            '77',
3037            '77',
3038        ],
3039        'פף'      => [
3040            '0',
3041            '7',
3042            '7',
3043            '7',
3044            '77',
3045            '77',
3046            '77',
3047        ],
3048        'פ'       => [
3049            '0',
3050            '7',
3051            '7',
3052            '7',
3053        ],
3054        'ף'       => [
3055            '0',
3056            '',
3057            '7',
3058            '7',
3059        ],
3060        'צג'      => [
3061            '0',
3062            '44',
3063            '44',
3064            '44',
3065            '45',
3066            '45',
3067            '45',
3068        ],
3069        'צז'      => [
3070            '0',
3071            '44',
3072            '44',
3073            '44',
3074        ],
3075        'צס'      => [
3076            '0',
3077            '44',
3078            '44',
3079            '44',
3080        ],
3081        'צצ'      => [
3082            '0',
3083            '4',
3084            '4',
3085            '4',
3086            '5',
3087            '5',
3088            '5',
3089            '44',
3090            '44',
3091            '44',
3092            '54',
3093            '54',
3094            '54',
3095            '45',
3096            '45',
3097            '45',
3098        ],
3099        'צץ'      => [
3100            '0',
3101            '4',
3102            '4',
3103            '4',
3104            '5',
3105            '5',
3106            '5',
3107            '44',
3108            '44',
3109            '44',
3110            '54',
3111            '54',
3112            '54',
3113        ],
3114        'צש'      => [
3115            '0',
3116            '44',
3117            '44',
3118            '44',
3119            '4',
3120            '4',
3121            '4',
3122            '5',
3123            '5',
3124            '5',
3125        ],
3126        'צ'       => [
3127            '0',
3128            '4',
3129            '4',
3130            '4',
3131            '5',
3132            '5',
3133            '5',
3134        ],
3135        'ץ'       => [
3136            '0',
3137            '',
3138            '4',
3139            '4',
3140        ],
3141        'קה'      => [
3142            '0',
3143            '55',
3144            '55',
3145            '5',
3146        ],
3147        'קס'      => [
3148            '0',
3149            '5',
3150            '54',
3151            '54',
3152        ],
3153        'קש'      => [
3154            '0',
3155            '5',
3156            '54',
3157            '54',
3158        ],
3159        'קק'      => [
3160            '0',
3161            '5',
3162            '5',
3163            '5',
3164            '55',
3165            '55',
3166            '55',
3167        ],
3168        'קח'      => [
3169            '0',
3170            '55',
3171            '55',
3172            '55',
3173        ],
3174        'קכ'      => [
3175            '0',
3176            '55',
3177            '55',
3178            '55',
3179        ],
3180        'קך'      => [
3181            '0',
3182            '55',
3183            '55',
3184            '55',
3185        ],
3186        'קג'      => [
3187            '0',
3188            '55',
3189            '55',
3190            '55',
3191            '54',
3192            '54',
3193            '54',
3194        ],
3195        'ק'       => [
3196            '0',
3197            '5',
3198            '5',
3199            '5',
3200        ],
3201        'רר'      => [
3202            '0',
3203            '99',
3204            '99',
3205            '99',
3206            '9',
3207            '9',
3208            '9',
3209        ],
3210        'ר'       => [
3211            '0',
3212            '9',
3213            '9',
3214            '9',
3215        ],
3216        'שטז'     => [
3217            '0',
3218            '2',
3219            '4',
3220            '4',
3221        ],
3222        'שתש'     => [
3223            '0',
3224            '2',
3225            '4',
3226            '4',
3227        ],
3228        'שתז'     => [
3229            '0',
3230            '2',
3231            '4',
3232            '4',
3233        ],
3234        'שטש'     => [
3235            '0',
3236            '2',
3237            '4',
3238            '4',
3239        ],
3240        'שד'      => [
3241            '0',
3242            '2',
3243            '43',
3244            '43',
3245        ],
3246        'שז'      => [
3247            '0',
3248            '44',
3249            '44',
3250            '44',
3251        ],
3252        'שס'      => [
3253            '0',
3254            '44',
3255            '44',
3256            '44',
3257        ],
3258        'שת'      => [
3259            '0',
3260            '2',
3261            '43',
3262            '43',
3263        ],
3264        'שג'      => [
3265            '0',
3266            '4',
3267            '4',
3268            '4',
3269            '44',
3270            '44',
3271            '44',
3272            '4',
3273            '43',
3274            '43',
3275        ],
3276        'שט'      => [
3277            '0',
3278            '2',
3279            '43',
3280            '43',
3281            '44',
3282            '44',
3283            '44',
3284        ],
3285        'שצ'      => [
3286            '0',
3287            '44',
3288            '44',
3289            '44',
3290            '45',
3291            '45',
3292            '45',
3293        ],
3294        'שץ'      => [
3295            '0',
3296            '44',
3297            '',
3298            '44',
3299            '45',
3300            '',
3301            '45',
3302        ],
3303        'שש'      => [
3304            '0',
3305            '4',
3306            '4',
3307            '4',
3308            '44',
3309            '44',
3310            '44',
3311        ],
3312        'ש'       => [
3313            '0',
3314            '4',
3315            '4',
3316            '4',
3317        ],
3318        'תג'      => [
3319            '0',
3320            '34',
3321            '34',
3322            '34',
3323        ],
3324        'תז'      => [
3325            '0',
3326            '34',
3327            '34',
3328            '34',
3329        ],
3330        'תש'      => [
3331            '0',
3332            '4',
3333            '4',
3334            '4',
3335        ],
3336        'תת'      => [
3337            '0',
3338            '3',
3339            '3',
3340            '3',
3341            '4',
3342            '4',
3343            '4',
3344            '33',
3345            '33',
3346            '33',
3347            '44',
3348            '44',
3349            '44',
3350            '34',
3351            '34',
3352            '34',
3353            '43',
3354            '43',
3355            '43',
3356        ],
3357        'ת'       => [
3358            '0',
3359            '3',
3360            '3',
3361            '3',
3362            '4',
3363            '4',
3364            '4',
3365        ],
3366        // Arabic alphabet
3367        'ا'       => [
3368            '1',
3369            '0',
3370            '',
3371            '',
3372        ],
3373        'ب'       => [
3374            '0',
3375            '7',
3376            '7',
3377            '7',
3378        ],
3379        'ت'       => [
3380            '0',
3381            '3',
3382            '3',
3383            '3',
3384        ],
3385        'ث'       => [
3386            '0',
3387            '3',
3388            '3',
3389            '3',
3390        ],
3391        'ج'       => [
3392            '0',
3393            '4',
3394            '4',
3395            '4',
3396        ],
3397        'ح'       => [
3398            '0',
3399            '5',
3400            '5',
3401            '5',
3402        ],
3403        'خ'       => [
3404            '0',
3405            '5',
3406            '5',
3407            '5',
3408        ],
3409        'د'       => [
3410            '0',
3411            '3',
3412            '3',
3413            '3',
3414        ],
3415        'ذ'       => [
3416            '0',
3417            '3',
3418            '3',
3419            '3',
3420        ],
3421        'ر'       => [
3422            '0',
3423            '9',
3424            '9',
3425            '9',
3426        ],
3427        'ز'       => [
3428            '0',
3429            '4',
3430            '4',
3431            '4',
3432        ],
3433        'س'       => [
3434            '0',
3435            '4',
3436            '4',
3437            '4',
3438        ],
3439        'ش'       => [
3440            '0',
3441            '4',
3442            '4',
3443            '4',
3444        ],
3445        'ص'       => [
3446            '0',
3447            '4',
3448            '4',
3449            '4',
3450        ],
3451        'ض'       => [
3452            '0',
3453            '3',
3454            '3',
3455            '3',
3456        ],
3457        'ط'       => [
3458            '0',
3459            '3',
3460            '3',
3461            '3',
3462        ],
3463        'ظ'       => [
3464            '0',
3465            '4',
3466            '4',
3467            '4',
3468        ],
3469        'ع'       => [
3470            '1',
3471            '0',
3472            '',
3473            '',
3474        ],
3475        'غ'       => [
3476            '0',
3477            '0',
3478            '',
3479            '',
3480        ],
3481        'ف'       => [
3482            '0',
3483            '7',
3484            '7',
3485            '7',
3486        ],
3487        'ق'       => [
3488            '0',
3489            '5',
3490            '5',
3491            '5',
3492        ],
3493        'ك'       => [
3494            '0',
3495            '5',
3496            '5',
3497            '5',
3498        ],
3499        'ل'       => [
3500            '0',
3501            '8',
3502            '8',
3503            '8',
3504        ],
3505        'لا'      => [
3506            '0',
3507            '8',
3508            '8',
3509            '8',
3510        ],
3511        'م'       => [
3512            '0',
3513            '6',
3514            '6',
3515            '6',
3516        ],
3517        'ن'       => [
3518            '0',
3519            '6',
3520            '6',
3521            '6',
3522        ],
3523        'هن'      => [
3524            '0',
3525            '66',
3526            '66',
3527            '66',
3528        ],
3529        'ه'       => [
3530            '0',
3531            '5',
3532            '5',
3533            '',
3534        ],
3535        'و'       => [
3536            '1',
3537            '',
3538            '',
3539            '',
3540            '7',
3541            '',
3542            '',
3543        ],
3544        'ي'       => [
3545            '0',
3546            '1',
3547            '',
3548            '',
3549        ],
3550        'آ'       => [
3551            '0',
3552            '1',
3553            '',
3554            '',
3555        ],
3556        'ة'       => [
3557            '0',
3558            '',
3559            '',
3560            '3',
3561        ],
3562        'ی'       => [
3563            '0',
3564            '1',
3565            '',
3566            '',
3567        ],
3568        'ى'       => [
3569            '1',
3570            '1',
3571            '',
3572            '',
3573        ],
3574    ];
3575
3576    /**
3577     * Calculate the Daitch-Mokotoff soundex for a word.
3578     *
3579     * @param string $name
3580     *
3581     * @return string[] List of possible DM codes for the word.
3582     */
3583    private static function daitchMokotoffWord($name): array
3584    {
3585        // Apply special transformation rules to the input string
3586        $name = I18N::strtoupper($name);
3587        foreach (self::$transformNameTable as $transformRule) {
3588            $name = str_replace($transformRule[0], $transformRule[1], $name);
3589        }
3590
3591        // Initialize
3592        $name_script = I18N::textScript($name);
3593        $noVowels    = ($name_script == 'Hebr' || $name_script == 'Arab');
3594
3595        $lastPos         = strlen($name) - 1;
3596        $currPos         = 0;
3597        $state           = 1; // 1: start of input string, 2: before vowel, 3: other
3598        $result          = []; // accumulate complete 6-digit D-M codes here
3599        $partialResult   = []; // accumulate incomplete D-M codes here
3600        $partialResult[] = ['!']; // initialize 1st partial result  ('!' stops "duplicate sound" check)
3601
3602        // Loop through the input string.
3603        // Stop when the string is exhausted or when no more partial results remain
3604        while (count($partialResult) !== 0 && $currPos <= $lastPos) {
3605            // Find the DM coding table entry for the chunk at the current position
3606            $thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
3607            while ($thisEntry != '') {
3608                if (isset(self::$dmsounds[$thisEntry])) {
3609                    break;
3610                }
3611                $thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk
3612            }
3613            if ($thisEntry === '') {
3614                $currPos++; // Not in table: advance pointer to next byte
3615                continue; // and try again
3616            }
3617
3618            $soundTableEntry = self::$dmsounds[$thisEntry];
3619            $workingResult   = $partialResult;
3620            $partialResult   = [];
3621            $currPos += strlen($thisEntry);
3622
3623            // Not at beginning of input string
3624            if ($state != 1) {
3625                if ($currPos <= $lastPos) {
3626                    // Determine whether the next chunk is a vowel
3627                    $nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
3628                    while ($nextEntry != '') {
3629                        if (isset(self::$dmsounds[$nextEntry])) {
3630                            break;
3631                        }
3632                        $nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk
3633                    }
3634                } else {
3635                    $nextEntry = '';
3636                }
3637                if ($nextEntry != '' && self::$dmsounds[$nextEntry][0] != '0') {
3638                    $state = 2;
3639                } else {
3640                    // Next chunk is a vowel
3641                    $state = 3;
3642                }
3643            }
3644
3645            while ($state < count($soundTableEntry)) {
3646                // empty means 'ignore this sound in this state'
3647                if ($soundTableEntry[$state] == '') {
3648                    foreach ($workingResult as $workingEntry) {
3649                        $tempEntry                        = $workingEntry;
3650                        $tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles'
3651                        $partialResult[]                  = $tempEntry;
3652                    }
3653                } else {
3654                    foreach ($workingResult as $workingEntry) {
3655                        if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) {
3656                            // Incoming sound isn't a duplicate of the previous sound
3657                            $workingEntry[] = $soundTableEntry[$state];
3658                        } else {
3659                            // Incoming sound is a duplicate of the previous sound
3660                            // For Hebrew and Arabic, we need to create a pair of D-M sound codes,
3661                            // one of the pair with only a single occurrence of the duplicate sound,
3662                            // the other with both occurrences
3663                            if ($noVowels) {
3664                                $workingEntry[] = $soundTableEntry[$state];
3665                            }
3666                        }
3667                        if (count($workingEntry) < 7) {
3668                            $partialResult[] = $workingEntry;
3669                        } else {
3670                            // This is the 6th code in the sequence
3671                            // We're looking for 7 entries because the first is '!' and doesn't count
3672                            $tempResult = str_replace('!', '', implode('', $workingEntry));
3673                            // Only return codes from recognisable sounds
3674                            if ($tempResult) {
3675                                $result[] = substr($tempResult . '000000', 0, 6);
3676                            }
3677                        }
3678                    }
3679                }
3680                $state = $state + 3; // Advance to next triplet while keeping the same basic state
3681            }
3682        }
3683
3684        // Zero-fill and copy all remaining partial results
3685        foreach ($partialResult as $workingEntry) {
3686            $tempResult = str_replace('!', '', implode('', $workingEntry));
3687            // Only return codes from recognisable sounds
3688            if ($tempResult) {
3689                $result[] = substr($tempResult . '000000', 0, 6);
3690            }
3691        }
3692
3693        return $result;
3694    }
3695}
3696