xref: /webtrees/app/Soundex.php (revision e24444eeefe1caed93aa11866313e0407b4ce028)
1<?php
2/**
3 * webtrees: online genealogy
4 * Copyright (C) 2018 webtrees development team
5 * This program is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License
14 * along with this program. If not, see <http://www.gnu.org/licenses/>.
15 */
16declare(strict_types=1);
17
18namespace Fisharebest\Webtrees;
19
20/**
21 * Phonetic matching of strings.
22 */
23class Soundex
24{
25    /**
26     * Which algorithms are supported.
27     *
28     * @return string[]
29     */
30    public static function getAlgorithms(): array
31    {
32        return [
33            /* I18N: http://en.wikipedia.org/wiki/Soundex */
34            'std' => I18N::translate('Russell'),
35            /* I18N: http://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */
36            'dm'  => I18N::translate('Daitch-Mokotoff'),
37        ];
38    }
39
40    /**
41     * Is there a match between two soundex codes?
42     *
43     * @param string $soundex1
44     * @param string $soundex2
45     *
46     * @return bool
47     */
48    public static function compare($soundex1, $soundex2): bool
49    {
50        if ($soundex1 !== '' && $soundex2 !== '') {
51            return !empty(array_intersect(explode(':', $soundex1), explode(':', $soundex2)));
52        }
53
54        return false;
55    }
56
57    /**
58     * Generate Russell soundex codes for a given text.
59     *
60     * @param string $text
61     *
62     * @return string
63     */
64    public static function russell(string $text): string
65    {
66        $words         = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY);
67        $soundex_array = [];
68        foreach ($words as $word) {
69            $soundex = soundex($word);
70            // Only return codes from recognisable sounds
71            if ($soundex !== '0000') {
72                $soundex_array[] = $soundex;
73            }
74        }
75        // Combine words, e.g. “New York” as “Newyork”
76        if (count($words) > 1) {
77            $soundex_array[] = soundex(strtr($text, ' ', ''));
78        }
79        // A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters)
80        $soundex_array = array_slice(array_unique($soundex_array), 0, 51);
81
82        return implode(':', $soundex_array);
83    }
84
85    /**
86     * Generate Daitch–Mokotoff soundex codes for a given text.
87     *
88     * @param string $text
89     *
90     * @return string
91     */
92    public static function daitchMokotoff(string $text): string
93    {
94        $words         = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY);
95        $soundex_array = [];
96        foreach ($words as $word) {
97            $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word));
98        }
99        // Combine words, e.g. “New York” as “Newyork”
100        if (count($words) > 1) {
101            $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(strtr($text, ' ', '')));
102        }
103        // A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters)
104        $soundex_array = array_slice(array_unique($soundex_array), 0, 36);
105
106        return implode(':', $soundex_array);
107    }
108
109    // Determine the Daitch–Mokotoff Soundex code for a word
110    // Original implementation by Gerry Kroll, and analysis by Meliza Amity
111
112    // Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!)
113    const MAXCHAR = 7;
114
115    /**
116     * Name transformation arrays.
117     * Used to transform the Name string to simplify the "sounds like" table.
118     * This is especially useful in Hebrew.
119     *
120     * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text)
121     * function call to achieve the desired transformations.
122     *
123     * Note about the use of "\x01":
124     * This code, which can’t legitimately occur in the kind of text we're dealing with,
125     * is used as a place-holder so that conditional string replacements can be done.
126     *
127     * @var string[][]
128     */
129    private static $transformNameTable = [
130        // Force Yiddish ligatures to be treated as separate letters
131        [
132            'װ',
133            'וו',
134        ],
135        [
136            'ײ',
137            'יי',
138        ],
139        [
140            'ױ',
141            'וי',
142        ],
143        [
144            'בו',
145            'בע',
146        ],
147        [
148            'פו',
149            'פע',
150        ],
151        [
152            'ומ',
153            'עמ',
154        ],
155        [
156            'ום',
157            'עם',
158        ],
159        [
160            'ונ',
161            'ענ',
162        ],
163        [
164            'ון',
165            'ען',
166        ],
167        [
168            'וו',
169            'ב',
170        ],
171        [
172            "\x01",
173            '',
174        ],
175        [
176            'ייה$',
177            "\x01ה",
178        ],
179        [
180            'ייע$',
181            "\x01ע",
182        ],
183        [
184            'יי',
185            'ע',
186        ],
187        [
188            "\x01",
189            'יי',
190        ],
191    ];
192
193    /**
194     * The DM sound coding table is organized this way:
195     * key: a variable-length string that corresponds to the UTF-8 character sequence
196     * represented by the table entry. Currently, that string can be up to 7
197     * bytes long. This maximum length is defined by the value of global variable
198     * $maxchar.
199     *
200     * value: an array as follows:
201     * [0]:  zero if not a vowel
202     * [1]:  sound value when this string is at the beginning of the word
203     * [2]:  sound value when this string is followed by a vowel
204     * [3]:  sound value for other cases
205     * [1],[2],[3] can be repeated several times to create branches in the code
206     * an empty sound value means "ignore in this state"
207     *
208     * @var string[][]
209     */
210    private static $dmsounds = [
211        'A'       => [
212            '1',
213            '0',
214            '',
215            '',
216        ],
217        'À'       => [
218            '1',
219            '0',
220            '',
221            '',
222        ],
223        'Á'       => [
224            '1',
225            '0',
226            '',
227            '',
228        ],
229        'Â'       => [
230            '1',
231            '0',
232            '',
233            '',
234        ],
235        'Ã'       => [
236            '1',
237            '0',
238            '',
239            '',
240        ],
241        'Ä'       => [
242            '1',
243            '0',
244            '1',
245            '',
246            '0',
247            '',
248            '',
249        ],
250        'Å'       => [
251            '1',
252            '0',
253            '',
254            '',
255        ],
256        'Ă'       => [
257            '1',
258            '0',
259            '',
260            '',
261        ],
262        'Ą'       => [
263            '1',
264            '',
265            '',
266            '',
267            '',
268            '',
269            '6',
270        ],
271        'Ạ'       => [
272            '1',
273            '0',
274            '',
275            '',
276        ],
277        'Ả'       => [
278            '1',
279            '0',
280            '',
281            '',
282        ],
283        'Ấ'       => [
284            '1',
285            '0',
286            '',
287            '',
288        ],
289        'Ầ'       => [
290            '1',
291            '0',
292            '',
293            '',
294        ],
295        'Ẩ'       => [
296            '1',
297            '0',
298            '',
299            '',
300        ],
301        'Ẫ'       => [
302            '1',
303            '0',
304            '',
305            '',
306        ],
307        'Ậ'       => [
308            '1',
309            '0',
310            '',
311            '',
312        ],
313        'Ắ'       => [
314            '1',
315            '0',
316            '',
317            '',
318        ],
319        'Ằ'       => [
320            '1',
321            '0',
322            '',
323            '',
324        ],
325        'Ẳ'       => [
326            '1',
327            '0',
328            '',
329            '',
330        ],
331        'Ẵ'       => [
332            '1',
333            '0',
334            '',
335            '',
336        ],
337        'Ặ'       => [
338            '1',
339            '0',
340            '',
341            '',
342        ],
343        'AE'      => [
344            '1',
345            '0',
346            '1',
347            '',
348        ],
349        'Æ'       => [
350            '1',
351            '0',
352            '1',
353            '',
354        ],
355        'AI'      => [
356            '1',
357            '0',
358            '1',
359            '',
360        ],
361        'AJ'      => [
362            '1',
363            '0',
364            '1',
365            '',
366        ],
367        'AU'      => [
368            '1',
369            '0',
370            '7',
371            '',
372        ],
373        'AV'      => [
374            '1',
375            '0',
376            '7',
377            '',
378            '7',
379            '7',
380            '7',
381        ],
382        'ÄU'      => [
383            '1',
384            '0',
385            '1',
386            '',
387        ],
388        'AY'      => [
389            '1',
390            '0',
391            '1',
392            '',
393        ],
394        'B'       => [
395            '0',
396            '7',
397            '7',
398            '7',
399        ],
400        'C'       => [
401            '0',
402            '5',
403            '5',
404            '5',
405            '34',
406            '4',
407            '4',
408        ],
409        'Ć'       => [
410            '0',
411            '4',
412            '4',
413            '4',
414        ],
415        'Č'       => [
416            '0',
417            '4',
418            '4',
419            '4',
420        ],
421        'Ç'       => [
422            '0',
423            '4',
424            '4',
425            '4',
426        ],
427        'CH'      => [
428            '0',
429            '5',
430            '5',
431            '5',
432            '34',
433            '4',
434            '4',
435        ],
436        'CHS'     => [
437            '0',
438            '5',
439            '54',
440            '54',
441        ],
442        'CK'      => [
443            '0',
444            '5',
445            '5',
446            '5',
447            '45',
448            '45',
449            '45',
450        ],
451        'CCS'     => [
452            '0',
453            '4',
454            '4',
455            '4',
456        ],
457        'CS'      => [
458            '0',
459            '4',
460            '4',
461            '4',
462        ],
463        'CSZ'     => [
464            '0',
465            '4',
466            '4',
467            '4',
468        ],
469        'CZ'      => [
470            '0',
471            '4',
472            '4',
473            '4',
474        ],
475        'CZS'     => [
476            '0',
477            '4',
478            '4',
479            '4',
480        ],
481        'D'       => [
482            '0',
483            '3',
484            '3',
485            '3',
486        ],
487        'Ď'       => [
488            '0',
489            '3',
490            '3',
491            '3',
492        ],
493        'Đ'       => [
494            '0',
495            '3',
496            '3',
497            '3',
498        ],
499        'DRS'     => [
500            '0',
501            '4',
502            '4',
503            '4',
504        ],
505        'DRZ'     => [
506            '0',
507            '4',
508            '4',
509            '4',
510        ],
511        'DS'      => [
512            '0',
513            '4',
514            '4',
515            '4',
516        ],
517        'DSH'     => [
518            '0',
519            '4',
520            '4',
521            '4',
522        ],
523        'DSZ'     => [
524            '0',
525            '4',
526            '4',
527            '4',
528        ],
529        'DT'      => [
530            '0',
531            '3',
532            '3',
533            '3',
534        ],
535        'DDZ'     => [
536            '0',
537            '4',
538            '4',
539            '4',
540        ],
541        'DDZS'    => [
542            '0',
543            '4',
544            '4',
545            '4',
546        ],
547        'DZ'      => [
548            '0',
549            '4',
550            '4',
551            '4',
552        ],
553        'DŹ'      => [
554            '0',
555            '4',
556            '4',
557            '4',
558        ],
559        'DŻ'      => [
560            '0',
561            '4',
562            '4',
563            '4',
564        ],
565        'DZH'     => [
566            '0',
567            '4',
568            '4',
569            '4',
570        ],
571        'DZS'     => [
572            '0',
573            '4',
574            '4',
575            '4',
576        ],
577        'E'       => [
578            '1',
579            '0',
580            '',
581            '',
582        ],
583        'È'       => [
584            '1',
585            '0',
586            '',
587            '',
588        ],
589        'É'       => [
590            '1',
591            '0',
592            '',
593            '',
594        ],
595        'Ê'       => [
596            '1',
597            '0',
598            '',
599            '',
600        ],
601        'Ë'       => [
602            '1',
603            '0',
604            '',
605            '',
606        ],
607        'Ĕ'       => [
608            '1',
609            '0',
610            '',
611            '',
612        ],
613        'Ė'       => [
614            '1',
615            '0',
616            '',
617            '',
618        ],
619        'Ę'       => [
620            '1',
621            '',
622            '',
623            '6',
624            '',
625            '',
626            '',
627        ],
628        'Ẹ'       => [
629            '1',
630            '0',
631            '',
632            '',
633        ],
634        'Ẻ'       => [
635            '1',
636            '0',
637            '',
638            '',
639        ],
640        'Ẽ'       => [
641            '1',
642            '0',
643            '',
644            '',
645        ],
646        'Ế'       => [
647            '1',
648            '0',
649            '',
650            '',
651        ],
652        'Ề'       => [
653            '1',
654            '0',
655            '',
656            '',
657        ],
658        'Ể'       => [
659            '1',
660            '0',
661            '',
662            '',
663        ],
664        'Ễ'       => [
665            '1',
666            '0',
667            '',
668            '',
669        ],
670        'Ệ'       => [
671            '1',
672            '0',
673            '',
674            '',
675        ],
676        'EAU'     => [
677            '1',
678            '0',
679            '',
680            '',
681        ],
682        'EI'      => [
683            '1',
684            '0',
685            '1',
686            '',
687        ],
688        'EJ'      => [
689            '1',
690            '0',
691            '1',
692            '',
693        ],
694        'EU'      => [
695            '1',
696            '1',
697            '1',
698            '',
699        ],
700        'EY'      => [
701            '1',
702            '0',
703            '1',
704            '',
705        ],
706        'F'       => [
707            '0',
708            '7',
709            '7',
710            '7',
711        ],
712        'FB'      => [
713            '0',
714            '7',
715            '7',
716            '7',
717        ],
718        'G'       => [
719            '0',
720            '5',
721            '5',
722            '5',
723            '34',
724            '4',
725            '4',
726        ],
727        'Ğ'       => [
728            '0',
729            '',
730            '',
731            '',
732        ],
733        'GGY'     => [
734            '0',
735            '5',
736            '5',
737            '5',
738        ],
739        'GY'      => [
740            '0',
741            '5',
742            '5',
743            '5',
744        ],
745        'H'       => [
746            '0',
747            '5',
748            '5',
749            '',
750            '5',
751            '5',
752            '5',
753        ],
754        'I'       => [
755            '1',
756            '0',
757            '',
758            '',
759        ],
760        'Ì'       => [
761            '1',
762            '0',
763            '',
764            '',
765        ],
766        'Í'       => [
767            '1',
768            '0',
769            '',
770            '',
771        ],
772        'Î'       => [
773            '1',
774            '0',
775            '',
776            '',
777        ],
778        'Ï'       => [
779            '1',
780            '0',
781            '',
782            '',
783        ],
784        'Ĩ'       => [
785            '1',
786            '0',
787            '',
788            '',
789        ],
790        'Į'       => [
791            '1',
792            '0',
793            '',
794            '',
795        ],
796        'İ'       => [
797            '1',
798            '0',
799            '',
800            '',
801        ],
802        'Ỉ'       => [
803            '1',
804            '0',
805            '',
806            '',
807        ],
808        'Ị'       => [
809            '1',
810            '0',
811            '',
812            '',
813        ],
814        'IA'      => [
815            '1',
816            '1',
817            '',
818            '',
819        ],
820        'IE'      => [
821            '1',
822            '1',
823            '',
824            '',
825        ],
826        'IO'      => [
827            '1',
828            '1',
829            '',
830            '',
831        ],
832        'IU'      => [
833            '1',
834            '1',
835            '',
836            '',
837        ],
838        'J'       => [
839            '0',
840            '1',
841            '',
842            '',
843            '4',
844            '4',
845            '4',
846            '5',
847            '5',
848            '',
849        ],
850        'K'       => [
851            '0',
852            '5',
853            '5',
854            '5',
855        ],
856        'KH'      => [
857            '0',
858            '5',
859            '5',
860            '5',
861        ],
862        'KS'      => [
863            '0',
864            '5',
865            '54',
866            '54',
867        ],
868        'L'       => [
869            '0',
870            '8',
871            '8',
872            '8',
873        ],
874        'Ľ'       => [
875            '0',
876            '8',
877            '8',
878            '8',
879        ],
880        'Ĺ'       => [
881            '0',
882            '8',
883            '8',
884            '8',
885        ],
886        'Ł'       => [
887            '0',
888            '7',
889            '7',
890            '7',
891            '8',
892            '8',
893            '8',
894        ],
895        'LL'      => [
896            '0',
897            '8',
898            '8',
899            '8',
900            '58',
901            '8',
902            '8',
903            '1',
904            '8',
905            '8',
906        ],
907        'LLY'     => [
908            '0',
909            '8',
910            '8',
911            '8',
912            '1',
913            '8',
914            '8',
915        ],
916        'LY'      => [
917            '0',
918            '8',
919            '8',
920            '8',
921            '1',
922            '8',
923            '8',
924        ],
925        'M'       => [
926            '0',
927            '6',
928            '6',
929            '6',
930        ],
931        'MĔ'      => [
932            '0',
933            '66',
934            '66',
935            '66',
936        ],
937        'MN'      => [
938            '0',
939            '66',
940            '66',
941            '66',
942        ],
943        'N'       => [
944            '0',
945            '6',
946            '6',
947            '6',
948        ],
949        'Ń'       => [
950            '0',
951            '6',
952            '6',
953            '6',
954        ],
955        'Ň'       => [
956            '0',
957            '6',
958            '6',
959            '6',
960        ],
961        'Ñ'       => [
962            '0',
963            '6',
964            '6',
965            '6',
966        ],
967        'NM'      => [
968            '0',
969            '66',
970            '66',
971            '66',
972        ],
973        'O'       => [
974            '1',
975            '0',
976            '',
977            '',
978        ],
979        'Ò'       => [
980            '1',
981            '0',
982            '',
983            '',
984        ],
985        'Ó'       => [
986            '1',
987            '0',
988            '',
989            '',
990        ],
991        'Ô'       => [
992            '1',
993            '0',
994            '',
995            '',
996        ],
997        'Õ'       => [
998            '1',
999            '0',
1000            '',
1001            '',
1002        ],
1003        'Ö'       => [
1004            '1',
1005            '0',
1006            '',
1007            '',
1008        ],
1009        'Ø'       => [
1010            '1',
1011            '0',
1012            '',
1013            '',
1014        ],
1015        'Ő'       => [
1016            '1',
1017            '0',
1018            '',
1019            '',
1020        ],
1021        'Œ'       => [
1022            '1',
1023            '0',
1024            '',
1025            '',
1026        ],
1027        'Ơ'       => [
1028            '1',
1029            '0',
1030            '',
1031            '',
1032        ],
1033        'Ọ'       => [
1034            '1',
1035            '0',
1036            '',
1037            '',
1038        ],
1039        'Ỏ'       => [
1040            '1',
1041            '0',
1042            '',
1043            '',
1044        ],
1045        'Ố'       => [
1046            '1',
1047            '0',
1048            '',
1049            '',
1050        ],
1051        'Ồ'       => [
1052            '1',
1053            '0',
1054            '',
1055            '',
1056        ],
1057        'Ổ'       => [
1058            '1',
1059            '0',
1060            '',
1061            '',
1062        ],
1063        'Ỗ'       => [
1064            '1',
1065            '0',
1066            '',
1067            '',
1068        ],
1069        'Ộ'       => [
1070            '1',
1071            '0',
1072            '',
1073            '',
1074        ],
1075        'Ớ'       => [
1076            '1',
1077            '0',
1078            '',
1079            '',
1080        ],
1081        'Ờ'       => [
1082            '1',
1083            '0',
1084            '',
1085            '',
1086        ],
1087        'Ở'       => [
1088            '1',
1089            '0',
1090            '',
1091            '',
1092        ],
1093        'Ỡ'       => [
1094            '1',
1095            '0',
1096            '',
1097            '',
1098        ],
1099        'Ợ'       => [
1100            '1',
1101            '0',
1102            '',
1103            '',
1104        ],
1105        'OE'      => [
1106            '1',
1107            '0',
1108            '',
1109            '',
1110        ],
1111        'OI'      => [
1112            '1',
1113            '0',
1114            '1',
1115            '',
1116        ],
1117        'OJ'      => [
1118            '1',
1119            '0',
1120            '1',
1121            '',
1122        ],
1123        'OU'      => [
1124            '1',
1125            '0',
1126            '',
1127            '',
1128        ],
1129        'OY'      => [
1130            '1',
1131            '0',
1132            '1',
1133            '',
1134        ],
1135        'P'       => [
1136            '0',
1137            '7',
1138            '7',
1139            '7',
1140        ],
1141        'PF'      => [
1142            '0',
1143            '7',
1144            '7',
1145            '7',
1146        ],
1147        'PH'      => [
1148            '0',
1149            '7',
1150            '7',
1151            '7',
1152        ],
1153        'Q'       => [
1154            '0',
1155            '5',
1156            '5',
1157            '5',
1158        ],
1159        'R'       => [
1160            '0',
1161            '9',
1162            '9',
1163            '9',
1164        ],
1165        'Ř'       => [
1166            '0',
1167            '4',
1168            '4',
1169            '4',
1170        ],
1171        'RS'      => [
1172            '0',
1173            '4',
1174            '4',
1175            '4',
1176            '94',
1177            '94',
1178            '94',
1179        ],
1180        'RZ'      => [
1181            '0',
1182            '4',
1183            '4',
1184            '4',
1185            '94',
1186            '94',
1187            '94',
1188        ],
1189        'S'       => [
1190            '0',
1191            '4',
1192            '4',
1193            '4',
1194        ],
1195        'Ś'       => [
1196            '0',
1197            '4',
1198            '4',
1199            '4',
1200        ],
1201        'Š'       => [
1202            '0',
1203            '4',
1204            '4',
1205            '4',
1206        ],
1207        'Ş'       => [
1208            '0',
1209            '4',
1210            '4',
1211            '4',
1212        ],
1213        'SC'      => [
1214            '0',
1215            '2',
1216            '4',
1217            '4',
1218        ],
1219        'ŠČ'      => [
1220            '0',
1221            '2',
1222            '4',
1223            '4',
1224        ],
1225        'SCH'     => [
1226            '0',
1227            '4',
1228            '4',
1229            '4',
1230        ],
1231        'SCHD'    => [
1232            '0',
1233            '2',
1234            '43',
1235            '43',
1236        ],
1237        'SCHT'    => [
1238            '0',
1239            '2',
1240            '43',
1241            '43',
1242        ],
1243        'SCHTCH'  => [
1244            '0',
1245            '2',
1246            '4',
1247            '4',
1248        ],
1249        'SCHTSCH' => [
1250            '0',
1251            '2',
1252            '4',
1253            '4',
1254        ],
1255        'SCHTSH'  => [
1256            '0',
1257            '2',
1258            '4',
1259            '4',
1260        ],
1261        'SD'      => [
1262            '0',
1263            '2',
1264            '43',
1265            '43',
1266        ],
1267        'SH'      => [
1268            '0',
1269            '4',
1270            '4',
1271            '4',
1272        ],
1273        'SHCH'    => [
1274            '0',
1275            '2',
1276            '4',
1277            '4',
1278        ],
1279        'SHD'     => [
1280            '0',
1281            '2',
1282            '43',
1283            '43',
1284        ],
1285        'SHT'     => [
1286            '0',
1287            '2',
1288            '43',
1289            '43',
1290        ],
1291        'SHTCH'   => [
1292            '0',
1293            '2',
1294            '4',
1295            '4',
1296        ],
1297        'SHTSH'   => [
1298            '0',
1299            '2',
1300            '4',
1301            '4',
1302        ],
1303        'ß'       => [
1304            '0',
1305            '',
1306            '4',
1307            '4',
1308        ],
1309        'ST'      => [
1310            '0',
1311            '2',
1312            '43',
1313            '43',
1314        ],
1315        'STCH'    => [
1316            '0',
1317            '2',
1318            '4',
1319            '4',
1320        ],
1321        'STRS'    => [
1322            '0',
1323            '2',
1324            '4',
1325            '4',
1326        ],
1327        'STRZ'    => [
1328            '0',
1329            '2',
1330            '4',
1331            '4',
1332        ],
1333        'STSCH'   => [
1334            '0',
1335            '2',
1336            '4',
1337            '4',
1338        ],
1339        'STSH'    => [
1340            '0',
1341            '2',
1342            '4',
1343            '4',
1344        ],
1345        'SSZ'     => [
1346            '0',
1347            '4',
1348            '4',
1349            '4',
1350        ],
1351        'SZ'      => [
1352            '0',
1353            '4',
1354            '4',
1355            '4',
1356        ],
1357        'SZCS'    => [
1358            '0',
1359            '2',
1360            '4',
1361            '4',
1362        ],
1363        'SZCZ'    => [
1364            '0',
1365            '2',
1366            '4',
1367            '4',
1368        ],
1369        'SZD'     => [
1370            '0',
1371            '2',
1372            '43',
1373            '43',
1374        ],
1375        'SZT'     => [
1376            '0',
1377            '2',
1378            '43',
1379            '43',
1380        ],
1381        'T'       => [
1382            '0',
1383            '3',
1384            '3',
1385            '3',
1386        ],
1387        'Ť'       => [
1388            '0',
1389            '3',
1390            '3',
1391            '3',
1392        ],
1393        'Ţ'       => [
1394            '0',
1395            '3',
1396            '3',
1397            '3',
1398            '4',
1399            '4',
1400            '4',
1401        ],
1402        'TC'      => [
1403            '0',
1404            '4',
1405            '4',
1406            '4',
1407        ],
1408        'TCH'     => [
1409            '0',
1410            '4',
1411            '4',
1412            '4',
1413        ],
1414        'TH'      => [
1415            '0',
1416            '3',
1417            '3',
1418            '3',
1419        ],
1420        'TRS'     => [
1421            '0',
1422            '4',
1423            '4',
1424            '4',
1425        ],
1426        'TRZ'     => [
1427            '0',
1428            '4',
1429            '4',
1430            '4',
1431        ],
1432        'TS'      => [
1433            '0',
1434            '4',
1435            '4',
1436            '4',
1437        ],
1438        'TSCH'    => [
1439            '0',
1440            '4',
1441            '4',
1442            '4',
1443        ],
1444        'TSH'     => [
1445            '0',
1446            '4',
1447            '4',
1448            '4',
1449        ],
1450        'TSZ'     => [
1451            '0',
1452            '4',
1453            '4',
1454            '4',
1455        ],
1456        'TTCH'    => [
1457            '0',
1458            '4',
1459            '4',
1460            '4',
1461        ],
1462        'TTS'     => [
1463            '0',
1464            '4',
1465            '4',
1466            '4',
1467        ],
1468        'TTSCH'   => [
1469            '0',
1470            '4',
1471            '4',
1472            '4',
1473        ],
1474        'TTSZ'    => [
1475            '0',
1476            '4',
1477            '4',
1478            '4',
1479        ],
1480        'TTZ'     => [
1481            '0',
1482            '4',
1483            '4',
1484            '4',
1485        ],
1486        'TZ'      => [
1487            '0',
1488            '4',
1489            '4',
1490            '4',
1491        ],
1492        'TZS'     => [
1493            '0',
1494            '4',
1495            '4',
1496            '4',
1497        ],
1498        'U'       => [
1499            '1',
1500            '0',
1501            '',
1502            '',
1503        ],
1504        'Ù'       => [
1505            '1',
1506            '0',
1507            '',
1508            '',
1509        ],
1510        'Ú'       => [
1511            '1',
1512            '0',
1513            '',
1514            '',
1515        ],
1516        'Û'       => [
1517            '1',
1518            '0',
1519            '',
1520            '',
1521        ],
1522        'Ü'       => [
1523            '1',
1524            '0',
1525            '',
1526            '',
1527        ],
1528        'Ũ'       => [
1529            '1',
1530            '0',
1531            '',
1532            '',
1533        ],
1534        'Ū'       => [
1535            '1',
1536            '0',
1537            '',
1538            '',
1539        ],
1540        'Ů'       => [
1541            '1',
1542            '0',
1543            '',
1544            '',
1545        ],
1546        'Ű'       => [
1547            '1',
1548            '0',
1549            '',
1550            '',
1551        ],
1552        'Ų'       => [
1553            '1',
1554            '0',
1555            '',
1556            '',
1557        ],
1558        'Ư'       => [
1559            '1',
1560            '0',
1561            '',
1562            '',
1563        ],
1564        'Ụ'       => [
1565            '1',
1566            '0',
1567            '',
1568            '',
1569        ],
1570        'Ủ'       => [
1571            '1',
1572            '0',
1573            '',
1574            '',
1575        ],
1576        'Ứ'       => [
1577            '1',
1578            '0',
1579            '',
1580            '',
1581        ],
1582        'Ừ'       => [
1583            '1',
1584            '0',
1585            '',
1586            '',
1587        ],
1588        'Ử'       => [
1589            '1',
1590            '0',
1591            '',
1592            '',
1593        ],
1594        'Ữ'       => [
1595            '1',
1596            '0',
1597            '',
1598            '',
1599        ],
1600        'Ự'       => [
1601            '1',
1602            '0',
1603            '',
1604            '',
1605        ],
1606        'UE'      => [
1607            '1',
1608            '0',
1609            '',
1610            '',
1611        ],
1612        'UI'      => [
1613            '1',
1614            '0',
1615            '1',
1616            '',
1617        ],
1618        'UJ'      => [
1619            '1',
1620            '0',
1621            '1',
1622            '',
1623        ],
1624        'UY'      => [
1625            '1',
1626            '0',
1627            '1',
1628            '',
1629        ],
1630        'UW'      => [
1631            '1',
1632            '0',
1633            '1',
1634            '',
1635            '0',
1636            '7',
1637            '7',
1638        ],
1639        'V'       => [
1640            '0',
1641            '7',
1642            '7',
1643            '7',
1644        ],
1645        'W'       => [
1646            '0',
1647            '7',
1648            '7',
1649            '7',
1650        ],
1651        'X'       => [
1652            '0',
1653            '5',
1654            '54',
1655            '54',
1656        ],
1657        'Y'       => [
1658            '1',
1659            '1',
1660            '',
1661            '',
1662        ],
1663        'Ý'       => [
1664            '1',
1665            '1',
1666            '',
1667            '',
1668        ],
1669        'Ỳ'       => [
1670            '1',
1671            '1',
1672            '',
1673            '',
1674        ],
1675        'Ỵ'       => [
1676            '1',
1677            '1',
1678            '',
1679            '',
1680        ],
1681        'Ỷ'       => [
1682            '1',
1683            '1',
1684            '',
1685            '',
1686        ],
1687        'Ỹ'       => [
1688            '1',
1689            '1',
1690            '',
1691            '',
1692        ],
1693        'Z'       => [
1694            '0',
1695            '4',
1696            '4',
1697            '4',
1698        ],
1699        'Ź'       => [
1700            '0',
1701            '4',
1702            '4',
1703            '4',
1704        ],
1705        'Ż'       => [
1706            '0',
1707            '4',
1708            '4',
1709            '4',
1710        ],
1711        'Ž'       => [
1712            '0',
1713            '4',
1714            '4',
1715            '4',
1716        ],
1717        'ZD'      => [
1718            '0',
1719            '2',
1720            '43',
1721            '43',
1722        ],
1723        'ZDZ'     => [
1724            '0',
1725            '2',
1726            '4',
1727            '4',
1728        ],
1729        'ZDZH'    => [
1730            '0',
1731            '2',
1732            '4',
1733            '4',
1734        ],
1735        'ZH'      => [
1736            '0',
1737            '4',
1738            '4',
1739            '4',
1740        ],
1741        'ZHD'     => [
1742            '0',
1743            '2',
1744            '43',
1745            '43',
1746        ],
1747        'ZHDZH'   => [
1748            '0',
1749            '2',
1750            '4',
1751            '4',
1752        ],
1753        'ZS'      => [
1754            '0',
1755            '4',
1756            '4',
1757            '4',
1758        ],
1759        'ZSCH'    => [
1760            '0',
1761            '4',
1762            '4',
1763            '4',
1764        ],
1765        'ZSH'     => [
1766            '0',
1767            '4',
1768            '4',
1769            '4',
1770        ],
1771        'ZZS'     => [
1772            '0',
1773            '4',
1774            '4',
1775            '4',
1776        ],
1777        // Cyrillic alphabet
1778        'А'       => [
1779            '1',
1780            '0',
1781            '',
1782            '',
1783        ],
1784        'Б'       => [
1785            '0',
1786            '7',
1787            '7',
1788            '7',
1789        ],
1790        'В'       => [
1791            '0',
1792            '7',
1793            '7',
1794            '7',
1795        ],
1796        'Г'       => [
1797            '0',
1798            '5',
1799            '5',
1800            '5',
1801        ],
1802        'Д'       => [
1803            '0',
1804            '3',
1805            '3',
1806            '3',
1807        ],
1808        'ДЗ'      => [
1809            '0',
1810            '4',
1811            '4',
1812            '4',
1813        ],
1814        'Е'       => [
1815            '1',
1816            '0',
1817            '',
1818            '',
1819        ],
1820        'Ё'       => [
1821            '1',
1822            '0',
1823            '',
1824            '',
1825        ],
1826        'Ж'       => [
1827            '0',
1828            '4',
1829            '4',
1830            '4',
1831        ],
1832        'З'       => [
1833            '0',
1834            '4',
1835            '4',
1836            '4',
1837        ],
1838        'И'       => [
1839            '1',
1840            '0',
1841            '',
1842            '',
1843        ],
1844        'Й'       => [
1845            '1',
1846            '1',
1847            '',
1848            '',
1849            '4',
1850            '4',
1851            '4',
1852        ],
1853        'К'       => [
1854            '0',
1855            '5',
1856            '5',
1857            '5',
1858        ],
1859        'Л'       => [
1860            '0',
1861            '8',
1862            '8',
1863            '8',
1864        ],
1865        'М'       => [
1866            '0',
1867            '6',
1868            '6',
1869            '6',
1870        ],
1871        'Н'       => [
1872            '0',
1873            '6',
1874            '6',
1875            '6',
1876        ],
1877        'О'       => [
1878            '1',
1879            '0',
1880            '',
1881            '',
1882        ],
1883        'П'       => [
1884            '0',
1885            '7',
1886            '7',
1887            '7',
1888        ],
1889        'Р'       => [
1890            '0',
1891            '9',
1892            '9',
1893            '9',
1894        ],
1895        'РЖ'      => [
1896            '0',
1897            '4',
1898            '4',
1899            '4',
1900        ],
1901        'С'       => [
1902            '0',
1903            '4',
1904            '4',
1905            '4',
1906        ],
1907        'Т'       => [
1908            '0',
1909            '3',
1910            '3',
1911            '3',
1912        ],
1913        'У'       => [
1914            '1',
1915            '0',
1916            '',
1917            '',
1918        ],
1919        'Ф'       => [
1920            '0',
1921            '7',
1922            '7',
1923            '7',
1924        ],
1925        'Х'       => [
1926            '0',
1927            '5',
1928            '5',
1929            '5',
1930        ],
1931        'Ц'       => [
1932            '0',
1933            '4',
1934            '4',
1935            '4',
1936        ],
1937        'Ч'       => [
1938            '0',
1939            '4',
1940            '4',
1941            '4',
1942        ],
1943        'Ш'       => [
1944            '0',
1945            '4',
1946            '4',
1947            '4',
1948        ],
1949        'Щ'       => [
1950            '0',
1951            '2',
1952            '4',
1953            '4',
1954        ],
1955        'Ъ'       => [
1956            '0',
1957            '',
1958            '',
1959            '',
1960        ],
1961        'Ы'       => [
1962            '0',
1963            '1',
1964            '',
1965            '',
1966        ],
1967        'Ь'       => [
1968            '0',
1969            '',
1970            '',
1971            '',
1972        ],
1973        'Э'       => [
1974            '1',
1975            '0',
1976            '',
1977            '',
1978        ],
1979        'Ю'       => [
1980            '0',
1981            '1',
1982            '',
1983            '',
1984        ],
1985        'Я'       => [
1986            '0',
1987            '1',
1988            '',
1989            '',
1990        ],
1991        // Greek alphabet
1992        'Α'       => [
1993            '1',
1994            '0',
1995            '',
1996            '',
1997        ],
1998        'Ά'       => [
1999            '1',
2000            '0',
2001            '',
2002            '',
2003        ],
2004        'ΑΙ'      => [
2005            '1',
2006            '0',
2007            '1',
2008            '',
2009        ],
2010        'ΑΥ'      => [
2011            '1',
2012            '0',
2013            '1',
2014            '',
2015        ],
2016        'Β'       => [
2017            '0',
2018            '7',
2019            '7',
2020            '7',
2021        ],
2022        'Γ'       => [
2023            '0',
2024            '5',
2025            '5',
2026            '5',
2027        ],
2028        'Δ'       => [
2029            '0',
2030            '3',
2031            '3',
2032            '3',
2033        ],
2034        'Ε'       => [
2035            '1',
2036            '0',
2037            '',
2038            '',
2039        ],
2040        'Έ'       => [
2041            '1',
2042            '0',
2043            '',
2044            '',
2045        ],
2046        'ΕΙ'      => [
2047            '1',
2048            '0',
2049            '1',
2050            '',
2051        ],
2052        'ΕΥ'      => [
2053            '1',
2054            '1',
2055            '1',
2056            '',
2057        ],
2058        'Ζ'       => [
2059            '0',
2060            '4',
2061            '4',
2062            '4',
2063        ],
2064        'Η'       => [
2065            '1',
2066            '0',
2067            '',
2068            '',
2069        ],
2070        'Ή'       => [
2071            '1',
2072            '0',
2073            '',
2074            '',
2075        ],
2076        'Θ'       => [
2077            '0',
2078            '3',
2079            '3',
2080            '3',
2081        ],
2082        'Ι'       => [
2083            '1',
2084            '0',
2085            '',
2086            '',
2087        ],
2088        'Ί'       => [
2089            '1',
2090            '0',
2091            '',
2092            '',
2093        ],
2094        'Ϊ'       => [
2095            '1',
2096            '0',
2097            '',
2098            '',
2099        ],
2100        'ΐ'       => [
2101            '1',
2102            '0',
2103            '',
2104            '',
2105        ],
2106        'Κ'       => [
2107            '0',
2108            '5',
2109            '5',
2110            '5',
2111        ],
2112        'Λ'       => [
2113            '0',
2114            '8',
2115            '8',
2116            '8',
2117        ],
2118        'Μ'       => [
2119            '0',
2120            '6',
2121            '6',
2122            '6',
2123        ],
2124        'ΜΠ'      => [
2125            '0',
2126            '7',
2127            '7',
2128            '7',
2129        ],
2130        'Ν'       => [
2131            '0',
2132            '6',
2133            '6',
2134            '6',
2135        ],
2136        'ΝΤ'      => [
2137            '0',
2138            '3',
2139            '3',
2140            '3',
2141        ],
2142        'Ξ'       => [
2143            '0',
2144            '5',
2145            '54',
2146            '54',
2147        ],
2148        'Ο'       => [
2149            '1',
2150            '0',
2151            '',
2152            '',
2153        ],
2154        'Ό'       => [
2155            '1',
2156            '0',
2157            '',
2158            '',
2159        ],
2160        'ΟΙ'      => [
2161            '1',
2162            '0',
2163            '1',
2164            '',
2165        ],
2166        'ΟΥ'      => [
2167            '1',
2168            '0',
2169            '1',
2170            '',
2171        ],
2172        'Π'       => [
2173            '0',
2174            '7',
2175            '7',
2176            '7',
2177        ],
2178        'Ρ'       => [
2179            '0',
2180            '9',
2181            '9',
2182            '9',
2183        ],
2184        'Σ'       => [
2185            '0',
2186            '4',
2187            '4',
2188            '4',
2189        ],
2190        'ς'       => [
2191            '0',
2192            '',
2193            '',
2194            '4',
2195        ],
2196        'Τ'       => [
2197            '0',
2198            '3',
2199            '3',
2200            '3',
2201        ],
2202        'ΤΖ'      => [
2203            '0',
2204            '4',
2205            '4',
2206            '4',
2207        ],
2208        'ΤΣ'      => [
2209            '0',
2210            '4',
2211            '4',
2212            '4',
2213        ],
2214        'Υ'       => [
2215            '1',
2216            '1',
2217            '',
2218            '',
2219        ],
2220        'Ύ'       => [
2221            '1',
2222            '1',
2223            '',
2224            '',
2225        ],
2226        'Ϋ'       => [
2227            '1',
2228            '1',
2229            '',
2230            '',
2231        ],
2232        'ΰ'       => [
2233            '1',
2234            '1',
2235            '',
2236            '',
2237        ],
2238        'ΥΚ'      => [
2239            '1',
2240            '5',
2241            '5',
2242            '5',
2243        ],
2244        'ΥΥ'      => [
2245            '1',
2246            '65',
2247            '65',
2248            '65',
2249        ],
2250        'Φ'       => [
2251            '0',
2252            '7',
2253            '7',
2254            '7',
2255        ],
2256        'Χ'       => [
2257            '0',
2258            '5',
2259            '5',
2260            '5',
2261        ],
2262        'Ψ'       => [
2263            '0',
2264            '7',
2265            '7',
2266            '7',
2267        ],
2268        'Ω'       => [
2269            '1',
2270            '0',
2271            '',
2272            '',
2273        ],
2274        'Ώ'       => [
2275            '1',
2276            '0',
2277            '',
2278            '',
2279        ],
2280        // Hebrew alphabet
2281        'א'       => [
2282            '1',
2283            '0',
2284            '',
2285            '',
2286        ],
2287        'או'      => [
2288            '1',
2289            '0',
2290            '7',
2291            '',
2292        ],
2293        'אג'      => [
2294            '1',
2295            '4',
2296            '4',
2297            '4',
2298            '5',
2299            '5',
2300            '5',
2301            '34',
2302            '34',
2303            '34',
2304        ],
2305        'בב'      => [
2306            '0',
2307            '7',
2308            '7',
2309            '7',
2310            '77',
2311            '77',
2312            '77',
2313        ],
2314        'ב'       => [
2315            '0',
2316            '7',
2317            '7',
2318            '7',
2319        ],
2320        'גג'      => [
2321            '0',
2322            '4',
2323            '4',
2324            '4',
2325            '5',
2326            '5',
2327            '5',
2328            '45',
2329            '45',
2330            '45',
2331            '55',
2332            '55',
2333            '55',
2334            '54',
2335            '54',
2336            '54',
2337        ],
2338        'גד'      => [
2339            '0',
2340            '43',
2341            '43',
2342            '43',
2343            '53',
2344            '53',
2345            '53',
2346        ],
2347        'גה'      => [
2348            '0',
2349            '45',
2350            '45',
2351            '45',
2352            '55',
2353            '55',
2354            '55',
2355        ],
2356        'גז'      => [
2357            '0',
2358            '44',
2359            '44',
2360            '44',
2361            '45',
2362            '45',
2363            '45',
2364        ],
2365        'גח'      => [
2366            '0',
2367            '45',
2368            '45',
2369            '45',
2370            '55',
2371            '55',
2372            '55',
2373        ],
2374        'גכ'      => [
2375            '0',
2376            '45',
2377            '45',
2378            '45',
2379            '55',
2380            '55',
2381            '55',
2382        ],
2383        'גך'      => [
2384            '0',
2385            '45',
2386            '45',
2387            '45',
2388            '55',
2389            '55',
2390            '55',
2391        ],
2392        'גצ'      => [
2393            '0',
2394            '44',
2395            '44',
2396            '44',
2397            '45',
2398            '45',
2399            '45',
2400        ],
2401        'גץ'      => [
2402            '0',
2403            '44',
2404            '44',
2405            '44',
2406            '45',
2407            '45',
2408            '45',
2409        ],
2410        'גק'      => [
2411            '0',
2412            '45',
2413            '45',
2414            '45',
2415            '54',
2416            '54',
2417            '54',
2418        ],
2419        'גש'      => [
2420            '0',
2421            '44',
2422            '44',
2423            '44',
2424            '54',
2425            '54',
2426            '54',
2427        ],
2428        'גת'      => [
2429            '0',
2430            '43',
2431            '43',
2432            '43',
2433            '53',
2434            '53',
2435            '53',
2436        ],
2437        'ג'       => [
2438            '0',
2439            '4',
2440            '4',
2441            '4',
2442            '5',
2443            '5',
2444            '5',
2445        ],
2446        'דז'      => [
2447            '0',
2448            '4',
2449            '4',
2450            '4',
2451        ],
2452        'דד'      => [
2453            '0',
2454            '3',
2455            '3',
2456            '3',
2457            '33',
2458            '33',
2459            '33',
2460        ],
2461        'דט'      => [
2462            '0',
2463            '33',
2464            '33',
2465            '33',
2466        ],
2467        'דש'      => [
2468            '0',
2469            '4',
2470            '4',
2471            '4',
2472        ],
2473        'דצ'      => [
2474            '0',
2475            '4',
2476            '4',
2477            '4',
2478        ],
2479        'דץ'      => [
2480            '0',
2481            '4',
2482            '4',
2483            '4',
2484        ],
2485        'ד'       => [
2486            '0',
2487            '3',
2488            '3',
2489            '3',
2490        ],
2491        'הג'      => [
2492            '0',
2493            '54',
2494            '54',
2495            '54',
2496            '55',
2497            '55',
2498            '55',
2499        ],
2500        'הכ'      => [
2501            '0',
2502            '55',
2503            '55',
2504            '55',
2505        ],
2506        'הח'      => [
2507            '0',
2508            '55',
2509            '55',
2510            '55',
2511        ],
2512        'הק'      => [
2513            '0',
2514            '55',
2515            '55',
2516            '55',
2517            '5',
2518            '5',
2519            '5',
2520        ],
2521        'הה'      => [
2522            '0',
2523            '5',
2524            '5',
2525            '',
2526            '55',
2527            '55',
2528            '',
2529        ],
2530        'ה'       => [
2531            '0',
2532            '5',
2533            '5',
2534            '',
2535        ],
2536        'וי'      => [
2537            '1',
2538            '',
2539            '',
2540            '',
2541            '7',
2542            '7',
2543            '7',
2544        ],
2545        'ו'       => [
2546            '1',
2547            '7',
2548            '7',
2549            '7',
2550            '7',
2551            '',
2552            '',
2553        ],
2554        'וו'      => [
2555            '1',
2556            '7',
2557            '7',
2558            '7',
2559            '7',
2560            '',
2561            '',
2562        ],
2563        'וופ'     => [
2564            '1',
2565            '7',
2566            '7',
2567            '7',
2568            '77',
2569            '77',
2570            '77',
2571        ],
2572        'זש'      => [
2573            '0',
2574            '4',
2575            '4',
2576            '4',
2577            '44',
2578            '44',
2579            '44',
2580        ],
2581        'זדז'     => [
2582            '0',
2583            '2',
2584            '4',
2585            '4',
2586        ],
2587        'ז'       => [
2588            '0',
2589            '4',
2590            '4',
2591            '4',
2592        ],
2593        'זג'      => [
2594            '0',
2595            '44',
2596            '44',
2597            '44',
2598            '45',
2599            '45',
2600            '45',
2601        ],
2602        'זז'      => [
2603            '0',
2604            '4',
2605            '4',
2606            '4',
2607            '44',
2608            '44',
2609            '44',
2610        ],
2611        'זס'      => [
2612            '0',
2613            '44',
2614            '44',
2615            '44',
2616        ],
2617        'זצ'      => [
2618            '0',
2619            '44',
2620            '44',
2621            '44',
2622        ],
2623        'זץ'      => [
2624            '0',
2625            '44',
2626            '44',
2627            '44',
2628        ],
2629        'חג'      => [
2630            '0',
2631            '54',
2632            '54',
2633            '54',
2634            '53',
2635            '53',
2636            '53',
2637        ],
2638        'חח'      => [
2639            '0',
2640            '5',
2641            '5',
2642            '5',
2643            '55',
2644            '55',
2645            '55',
2646        ],
2647        'חק'      => [
2648            '0',
2649            '55',
2650            '55',
2651            '55',
2652            '5',
2653            '5',
2654            '5',
2655        ],
2656        'חכ'      => [
2657            '0',
2658            '45',
2659            '45',
2660            '45',
2661            '55',
2662            '55',
2663            '55',
2664        ],
2665        'חס'      => [
2666            '0',
2667            '5',
2668            '54',
2669            '54',
2670        ],
2671        'חש'      => [
2672            '0',
2673            '5',
2674            '54',
2675            '54',
2676        ],
2677        'ח'       => [
2678            '0',
2679            '5',
2680            '5',
2681            '5',
2682        ],
2683        'טש'      => [
2684            '0',
2685            '4',
2686            '4',
2687            '4',
2688        ],
2689        'טד'      => [
2690            '0',
2691            '33',
2692            '33',
2693            '33',
2694        ],
2695        'טי'      => [
2696            '0',
2697            '3',
2698            '3',
2699            '3',
2700            '4',
2701            '4',
2702            '4',
2703            '3',
2704            '3',
2705            '34',
2706        ],
2707        'טת'      => [
2708            '0',
2709            '33',
2710            '33',
2711            '33',
2712        ],
2713        'טט'      => [
2714            '0',
2715            '3',
2716            '3',
2717            '3',
2718            '33',
2719            '33',
2720            '33',
2721        ],
2722        'ט'       => [
2723            '0',
2724            '3',
2725            '3',
2726            '3',
2727        ],
2728        'י'       => [
2729            '1',
2730            '1',
2731            '',
2732            '',
2733        ],
2734        'יא'      => [
2735            '1',
2736            '1',
2737            '',
2738            '',
2739            '1',
2740            '1',
2741            '1',
2742        ],
2743        'כג'      => [
2744            '0',
2745            '55',
2746            '55',
2747            '55',
2748            '54',
2749            '54',
2750            '54',
2751        ],
2752        'כש'      => [
2753            '0',
2754            '5',
2755            '54',
2756            '54',
2757        ],
2758        'כס'      => [
2759            '0',
2760            '5',
2761            '54',
2762            '54',
2763        ],
2764        'ככ'      => [
2765            '0',
2766            '5',
2767            '5',
2768            '5',
2769            '55',
2770            '55',
2771            '55',
2772        ],
2773        'כך'      => [
2774            '0',
2775            '5',
2776            '5',
2777            '5',
2778            '55',
2779            '55',
2780            '55',
2781        ],
2782        'כ'       => [
2783            '0',
2784            '5',
2785            '5',
2786            '5',
2787        ],
2788        'כח'      => [
2789            '0',
2790            '55',
2791            '55',
2792            '55',
2793            '5',
2794            '5',
2795            '5',
2796        ],
2797        'ך'       => [
2798            '0',
2799            '',
2800            '5',
2801            '5',
2802        ],
2803        'ל'       => [
2804            '0',
2805            '8',
2806            '8',
2807            '8',
2808        ],
2809        'לל'      => [
2810            '0',
2811            '88',
2812            '88',
2813            '88',
2814            '8',
2815            '8',
2816            '8',
2817        ],
2818        'מנ'      => [
2819            '0',
2820            '66',
2821            '66',
2822            '66',
2823        ],
2824        'מן'      => [
2825            '0',
2826            '66',
2827            '66',
2828            '66',
2829        ],
2830        'ממ'      => [
2831            '0',
2832            '6',
2833            '6',
2834            '6',
2835            '66',
2836            '66',
2837            '66',
2838        ],
2839        'מם'      => [
2840            '0',
2841            '6',
2842            '6',
2843            '6',
2844            '66',
2845            '66',
2846            '66',
2847        ],
2848        'מ'       => [
2849            '0',
2850            '6',
2851            '6',
2852            '6',
2853        ],
2854        'ם'       => [
2855            '0',
2856            '',
2857            '6',
2858            '6',
2859        ],
2860        'נמ'      => [
2861            '0',
2862            '66',
2863            '66',
2864            '66',
2865        ],
2866        'נם'      => [
2867            '0',
2868            '66',
2869            '66',
2870            '66',
2871        ],
2872        'ננ'      => [
2873            '0',
2874            '6',
2875            '6',
2876            '6',
2877            '66',
2878            '66',
2879            '66',
2880        ],
2881        'נן'      => [
2882            '0',
2883            '6',
2884            '6',
2885            '6',
2886            '66',
2887            '66',
2888            '66',
2889        ],
2890        'נ'       => [
2891            '0',
2892            '6',
2893            '6',
2894            '6',
2895        ],
2896        'ן'       => [
2897            '0',
2898            '',
2899            '6',
2900            '6',
2901        ],
2902        'סתש'     => [
2903            '0',
2904            '2',
2905            '4',
2906            '4',
2907        ],
2908        'סתז'     => [
2909            '0',
2910            '2',
2911            '4',
2912            '4',
2913        ],
2914        'סטז'     => [
2915            '0',
2916            '2',
2917            '4',
2918            '4',
2919        ],
2920        'סטש'     => [
2921            '0',
2922            '2',
2923            '4',
2924            '4',
2925        ],
2926        'סצד'     => [
2927            '0',
2928            '2',
2929            '4',
2930            '4',
2931        ],
2932        'סט'      => [
2933            '0',
2934            '2',
2935            '4',
2936            '4',
2937            '43',
2938            '43',
2939            '43',
2940        ],
2941        'סת'      => [
2942            '0',
2943            '2',
2944            '4',
2945            '4',
2946            '43',
2947            '43',
2948            '43',
2949        ],
2950        'סג'      => [
2951            '0',
2952            '44',
2953            '44',
2954            '44',
2955            '4',
2956            '4',
2957            '4',
2958        ],
2959        'סס'      => [
2960            '0',
2961            '4',
2962            '4',
2963            '4',
2964            '44',
2965            '44',
2966            '44',
2967        ],
2968        'סצ'      => [
2969            '0',
2970            '44',
2971            '44',
2972            '44',
2973        ],
2974        'סץ'      => [
2975            '0',
2976            '44',
2977            '44',
2978            '44',
2979        ],
2980        'סז'      => [
2981            '0',
2982            '44',
2983            '44',
2984            '44',
2985        ],
2986        'סש'      => [
2987            '0',
2988            '44',
2989            '44',
2990            '44',
2991        ],
2992        'ס'       => [
2993            '0',
2994            '4',
2995            '4',
2996            '4',
2997        ],
2998        'ע'       => [
2999            '1',
3000            '0',
3001            '',
3002            '',
3003        ],
3004        'פב'      => [
3005            '0',
3006            '7',
3007            '7',
3008            '7',
3009            '77',
3010            '77',
3011            '77',
3012        ],
3013        'פוו'     => [
3014            '0',
3015            '7',
3016            '7',
3017            '7',
3018            '77',
3019            '77',
3020            '77',
3021        ],
3022        'פפ'      => [
3023            '0',
3024            '7',
3025            '7',
3026            '7',
3027            '77',
3028            '77',
3029            '77',
3030        ],
3031        'פף'      => [
3032            '0',
3033            '7',
3034            '7',
3035            '7',
3036            '77',
3037            '77',
3038            '77',
3039        ],
3040        'פ'       => [
3041            '0',
3042            '7',
3043            '7',
3044            '7',
3045        ],
3046        'ף'       => [
3047            '0',
3048            '',
3049            '7',
3050            '7',
3051        ],
3052        'צג'      => [
3053            '0',
3054            '44',
3055            '44',
3056            '44',
3057            '45',
3058            '45',
3059            '45',
3060        ],
3061        'צז'      => [
3062            '0',
3063            '44',
3064            '44',
3065            '44',
3066        ],
3067        'צס'      => [
3068            '0',
3069            '44',
3070            '44',
3071            '44',
3072        ],
3073        'צצ'      => [
3074            '0',
3075            '4',
3076            '4',
3077            '4',
3078            '5',
3079            '5',
3080            '5',
3081            '44',
3082            '44',
3083            '44',
3084            '54',
3085            '54',
3086            '54',
3087            '45',
3088            '45',
3089            '45',
3090        ],
3091        'צץ'      => [
3092            '0',
3093            '4',
3094            '4',
3095            '4',
3096            '5',
3097            '5',
3098            '5',
3099            '44',
3100            '44',
3101            '44',
3102            '54',
3103            '54',
3104            '54',
3105        ],
3106        'צש'      => [
3107            '0',
3108            '44',
3109            '44',
3110            '44',
3111            '4',
3112            '4',
3113            '4',
3114            '5',
3115            '5',
3116            '5',
3117        ],
3118        'צ'       => [
3119            '0',
3120            '4',
3121            '4',
3122            '4',
3123            '5',
3124            '5',
3125            '5',
3126        ],
3127        'ץ'       => [
3128            '0',
3129            '',
3130            '4',
3131            '4',
3132        ],
3133        'קה'      => [
3134            '0',
3135            '55',
3136            '55',
3137            '5',
3138        ],
3139        'קס'      => [
3140            '0',
3141            '5',
3142            '54',
3143            '54',
3144        ],
3145        'קש'      => [
3146            '0',
3147            '5',
3148            '54',
3149            '54',
3150        ],
3151        'קק'      => [
3152            '0',
3153            '5',
3154            '5',
3155            '5',
3156            '55',
3157            '55',
3158            '55',
3159        ],
3160        'קח'      => [
3161            '0',
3162            '55',
3163            '55',
3164            '55',
3165        ],
3166        'קכ'      => [
3167            '0',
3168            '55',
3169            '55',
3170            '55',
3171        ],
3172        'קך'      => [
3173            '0',
3174            '55',
3175            '55',
3176            '55',
3177        ],
3178        'קג'      => [
3179            '0',
3180            '55',
3181            '55',
3182            '55',
3183            '54',
3184            '54',
3185            '54',
3186        ],
3187        'ק'       => [
3188            '0',
3189            '5',
3190            '5',
3191            '5',
3192        ],
3193        'רר'      => [
3194            '0',
3195            '99',
3196            '99',
3197            '99',
3198            '9',
3199            '9',
3200            '9',
3201        ],
3202        'ר'       => [
3203            '0',
3204            '9',
3205            '9',
3206            '9',
3207        ],
3208        'שטז'     => [
3209            '0',
3210            '2',
3211            '4',
3212            '4',
3213        ],
3214        'שתש'     => [
3215            '0',
3216            '2',
3217            '4',
3218            '4',
3219        ],
3220        'שתז'     => [
3221            '0',
3222            '2',
3223            '4',
3224            '4',
3225        ],
3226        'שטש'     => [
3227            '0',
3228            '2',
3229            '4',
3230            '4',
3231        ],
3232        'שד'      => [
3233            '0',
3234            '2',
3235            '43',
3236            '43',
3237        ],
3238        'שז'      => [
3239            '0',
3240            '44',
3241            '44',
3242            '44',
3243        ],
3244        'שס'      => [
3245            '0',
3246            '44',
3247            '44',
3248            '44',
3249        ],
3250        'שת'      => [
3251            '0',
3252            '2',
3253            '43',
3254            '43',
3255        ],
3256        'שג'      => [
3257            '0',
3258            '4',
3259            '4',
3260            '4',
3261            '44',
3262            '44',
3263            '44',
3264            '4',
3265            '43',
3266            '43',
3267        ],
3268        'שט'      => [
3269            '0',
3270            '2',
3271            '43',
3272            '43',
3273            '44',
3274            '44',
3275            '44',
3276        ],
3277        'שצ'      => [
3278            '0',
3279            '44',
3280            '44',
3281            '44',
3282            '45',
3283            '45',
3284            '45',
3285        ],
3286        'שץ'      => [
3287            '0',
3288            '44',
3289            '',
3290            '44',
3291            '45',
3292            '',
3293            '45',
3294        ],
3295        'שש'      => [
3296            '0',
3297            '4',
3298            '4',
3299            '4',
3300            '44',
3301            '44',
3302            '44',
3303        ],
3304        'ש'       => [
3305            '0',
3306            '4',
3307            '4',
3308            '4',
3309        ],
3310        'תג'      => [
3311            '0',
3312            '34',
3313            '34',
3314            '34',
3315        ],
3316        'תז'      => [
3317            '0',
3318            '34',
3319            '34',
3320            '34',
3321        ],
3322        'תש'      => [
3323            '0',
3324            '4',
3325            '4',
3326            '4',
3327        ],
3328        'תת'      => [
3329            '0',
3330            '3',
3331            '3',
3332            '3',
3333            '4',
3334            '4',
3335            '4',
3336            '33',
3337            '33',
3338            '33',
3339            '44',
3340            '44',
3341            '44',
3342            '34',
3343            '34',
3344            '34',
3345            '43',
3346            '43',
3347            '43',
3348        ],
3349        'ת'       => [
3350            '0',
3351            '3',
3352            '3',
3353            '3',
3354            '4',
3355            '4',
3356            '4',
3357        ],
3358        // Arabic alphabet
3359        'ا'       => [
3360            '1',
3361            '0',
3362            '',
3363            '',
3364        ],
3365        'ب'       => [
3366            '0',
3367            '7',
3368            '7',
3369            '7',
3370        ],
3371        'ت'       => [
3372            '0',
3373            '3',
3374            '3',
3375            '3',
3376        ],
3377        'ث'       => [
3378            '0',
3379            '3',
3380            '3',
3381            '3',
3382        ],
3383        'ج'       => [
3384            '0',
3385            '4',
3386            '4',
3387            '4',
3388        ],
3389        'ح'       => [
3390            '0',
3391            '5',
3392            '5',
3393            '5',
3394        ],
3395        'خ'       => [
3396            '0',
3397            '5',
3398            '5',
3399            '5',
3400        ],
3401        'د'       => [
3402            '0',
3403            '3',
3404            '3',
3405            '3',
3406        ],
3407        'ذ'       => [
3408            '0',
3409            '3',
3410            '3',
3411            '3',
3412        ],
3413        'ر'       => [
3414            '0',
3415            '9',
3416            '9',
3417            '9',
3418        ],
3419        'ز'       => [
3420            '0',
3421            '4',
3422            '4',
3423            '4',
3424        ],
3425        'س'       => [
3426            '0',
3427            '4',
3428            '4',
3429            '4',
3430        ],
3431        'ش'       => [
3432            '0',
3433            '4',
3434            '4',
3435            '4',
3436        ],
3437        'ص'       => [
3438            '0',
3439            '4',
3440            '4',
3441            '4',
3442        ],
3443        'ض'       => [
3444            '0',
3445            '3',
3446            '3',
3447            '3',
3448        ],
3449        'ط'       => [
3450            '0',
3451            '3',
3452            '3',
3453            '3',
3454        ],
3455        'ظ'       => [
3456            '0',
3457            '4',
3458            '4',
3459            '4',
3460        ],
3461        'ع'       => [
3462            '1',
3463            '0',
3464            '',
3465            '',
3466        ],
3467        'غ'       => [
3468            '0',
3469            '0',
3470            '',
3471            '',
3472        ],
3473        'ف'       => [
3474            '0',
3475            '7',
3476            '7',
3477            '7',
3478        ],
3479        'ق'       => [
3480            '0',
3481            '5',
3482            '5',
3483            '5',
3484        ],
3485        'ك'       => [
3486            '0',
3487            '5',
3488            '5',
3489            '5',
3490        ],
3491        'ل'       => [
3492            '0',
3493            '8',
3494            '8',
3495            '8',
3496        ],
3497        'لا'      => [
3498            '0',
3499            '8',
3500            '8',
3501            '8',
3502        ],
3503        'م'       => [
3504            '0',
3505            '6',
3506            '6',
3507            '6',
3508        ],
3509        'ن'       => [
3510            '0',
3511            '6',
3512            '6',
3513            '6',
3514        ],
3515        'هن'      => [
3516            '0',
3517            '66',
3518            '66',
3519            '66',
3520        ],
3521        'ه'       => [
3522            '0',
3523            '5',
3524            '5',
3525            '',
3526        ],
3527        'و'       => [
3528            '1',
3529            '',
3530            '',
3531            '',
3532            '7',
3533            '',
3534            '',
3535        ],
3536        'ي'       => [
3537            '0',
3538            '1',
3539            '',
3540            '',
3541        ],
3542        'آ'       => [
3543            '0',
3544            '1',
3545            '',
3546            '',
3547        ],
3548        'ة'       => [
3549            '0',
3550            '',
3551            '',
3552            '3',
3553        ],
3554        'ی'       => [
3555            '0',
3556            '1',
3557            '',
3558            '',
3559        ],
3560        'ى'       => [
3561            '1',
3562            '1',
3563            '',
3564            '',
3565        ],
3566    ];
3567
3568    /**
3569     * Calculate the Daitch-Mokotoff soundex for a word.
3570     *
3571     * @param string $name
3572     *
3573     * @return string[] List of possible DM codes for the word.
3574     */
3575    private static function daitchMokotoffWord($name): array
3576    {
3577        // Apply special transformation rules to the input string
3578        $name = I18N::strtoupper($name);
3579        foreach (self::$transformNameTable as $transformRule) {
3580            $name = str_replace($transformRule[0], $transformRule[1], $name);
3581        }
3582
3583        // Initialize
3584        $name_script = I18N::textScript($name);
3585        $noVowels    = ($name_script == 'Hebr' || $name_script == 'Arab');
3586
3587        $lastPos         = strlen($name) - 1;
3588        $currPos         = 0;
3589        $state           = 1; // 1: start of input string, 2: before vowel, 3: other
3590        $result          = []; // accumulate complete 6-digit D-M codes here
3591        $partialResult   = []; // accumulate incomplete D-M codes here
3592        $partialResult[] = ['!']; // initialize 1st partial result  ('!' stops "duplicate sound" check)
3593
3594        // Loop through the input string.
3595        // Stop when the string is exhausted or when no more partial results remain
3596        while (count($partialResult) !== 0 && $currPos <= $lastPos) {
3597            // Find the DM coding table entry for the chunk at the current position
3598            $thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
3599            while ($thisEntry != '') {
3600                if (isset(self::$dmsounds[$thisEntry])) {
3601                    break;
3602                }
3603                $thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk
3604            }
3605            if ($thisEntry === '') {
3606                $currPos++; // Not in table: advance pointer to next byte
3607                continue; // and try again
3608            }
3609
3610            $soundTableEntry = self::$dmsounds[$thisEntry];
3611            $workingResult   = $partialResult;
3612            $partialResult   = [];
3613            $currPos += strlen($thisEntry);
3614
3615            // Not at beginning of input string
3616            if ($state != 1) {
3617                if ($currPos <= $lastPos) {
3618                    // Determine whether the next chunk is a vowel
3619                    $nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk
3620                    while ($nextEntry != '') {
3621                        if (isset(self::$dmsounds[$nextEntry])) {
3622                            break;
3623                        }
3624                        $nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk
3625                    }
3626                } else {
3627                    $nextEntry = '';
3628                }
3629                if ($nextEntry != '' && self::$dmsounds[$nextEntry][0] != '0') {
3630                    $state = 2;
3631                } else {
3632                    // Next chunk is a vowel
3633                    $state = 3;
3634                }
3635            }
3636
3637            while ($state < count($soundTableEntry)) {
3638                // empty means 'ignore this sound in this state'
3639                if ($soundTableEntry[$state] == '') {
3640                    foreach ($workingResult as $workingEntry) {
3641                        $tempEntry                        = $workingEntry;
3642                        $tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles'
3643                        $partialResult[]                  = $tempEntry;
3644                    }
3645                } else {
3646                    foreach ($workingResult as $workingEntry) {
3647                        if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) {
3648                            // Incoming sound isn't a duplicate of the previous sound
3649                            $workingEntry[] = $soundTableEntry[$state];
3650                        } else {
3651                            // Incoming sound is a duplicate of the previous sound
3652                            // For Hebrew and Arabic, we need to create a pair of D-M sound codes,
3653                            // one of the pair with only a single occurrence of the duplicate sound,
3654                            // the other with both occurrences
3655                            if ($noVowels) {
3656                                $workingEntry[] = $soundTableEntry[$state];
3657                            }
3658                        }
3659                        if (count($workingEntry) < 7) {
3660                            $partialResult[] = $workingEntry;
3661                        } else {
3662                            // This is the 6th code in the sequence
3663                            // We're looking for 7 entries because the first is '!' and doesn't count
3664                            $tempResult = str_replace('!', '', implode('', $workingEntry));
3665                            // Only return codes from recognisable sounds
3666                            if ($tempResult) {
3667                                $result[] = substr($tempResult . '000000', 0, 6);
3668                            }
3669                        }
3670                    }
3671                }
3672                $state = $state + 3; // Advance to next triplet while keeping the same basic state
3673            }
3674        }
3675
3676        // Zero-fill and copy all remaining partial results
3677        foreach ($partialResult as $workingEntry) {
3678            $tempResult = str_replace('!', '', implode('', $workingEntry));
3679            // Only return codes from recognisable sounds
3680            if ($tempResult) {
3681                $result[] = substr($tempResult . '000000', 0, 6);
3682            }
3683        }
3684
3685        return $result;
3686    }
3687}
3688