1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2018 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16declare(strict_types=1); 17 18namespace Fisharebest\Webtrees; 19 20/** 21 * Phonetic matching of strings. 22 */ 23class Soundex 24{ 25 /** 26 * Which algorithms are supported. 27 * 28 * @return string[] 29 */ 30 public static function getAlgorithms(): array 31 { 32 return [ 33 /* I18N: http://en.wikipedia.org/wiki/Soundex */ 34 'std' => I18N::translate('Russell'), 35 /* I18N: http://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */ 36 'dm' => I18N::translate('Daitch-Mokotoff'), 37 ]; 38 } 39 40 /** 41 * Is there a match between two soundex codes? 42 * 43 * @param string $soundex1 44 * @param string $soundex2 45 * 46 * @return bool 47 */ 48 public static function compare($soundex1, $soundex2): bool 49 { 50 if ($soundex1 !== '' && $soundex2 !== '') { 51 return !empty(array_intersect(explode(':', $soundex1), explode(':', $soundex2))); 52 } 53 54 return false; 55 } 56 57 /** 58 * Generate Russell soundex codes for a given text. 59 * 60 * @param string $text 61 * 62 * @return string 63 */ 64 public static function russell(string $text): string 65 { 66 $words = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY); 67 $soundex_array = []; 68 foreach ($words as $word) { 69 $soundex = soundex($word); 70 // Only return codes from recognisable sounds 71 if ($soundex !== '0000') { 72 $soundex_array[] = $soundex; 73 } 74 } 75 // Combine words, e.g. “New York” as “Newyork” 76 if (count($words) > 1) { 77 $soundex_array[] = soundex(strtr($text, ' ', '')); 78 } 79 // A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters) 80 $soundex_array = array_slice(array_unique($soundex_array), 0, 51); 81 82 if ($soundex_array) { 83 return implode(':', $soundex_array); 84 } 85 86 return ''; 87 } 88 89 /** 90 * Generate Daitch–Mokotoff soundex codes for a given text. 91 * 92 * @param string $text 93 * 94 * @return string 95 */ 96 public static function daitchMokotoff(string $text): string 97 { 98 $words = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY); 99 $soundex_array = []; 100 foreach ($words as $word) { 101 $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word)); 102 } 103 // Combine words, e.g. “New York” as “Newyork” 104 if (count($words) > 1) { 105 $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(strtr($text, ' ', ''))); 106 } 107 // A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters) 108 $soundex_array = array_slice(array_unique($soundex_array), 0, 36); 109 110 if ($soundex_array) { 111 return implode(':', $soundex_array); 112 } 113 114 return ''; 115 } 116 117 // Determine the Daitch–Mokotoff Soundex code for a word 118 // Original implementation by Gerry Kroll, and analysis by Meliza Amity 119 120 // Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!) 121 const MAXCHAR = 7; 122 123 /** 124 * Name transformation arrays. 125 * Used to transform the Name string to simplify the "sounds like" table. 126 * This is especially useful in Hebrew. 127 * 128 * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text) 129 * function call to achieve the desired transformations. 130 * 131 * Note about the use of "\x01": 132 * This code, which can’t legitimately occur in the kind of text we're dealing with, 133 * is used as a place-holder so that conditional string replacements can be done. 134 * 135 * @var string[][] 136 */ 137 private static $transformNameTable = [ 138 // Force Yiddish ligatures to be treated as separate letters 139 [ 140 'װ', 141 'וו', 142 ], 143 [ 144 'ײ', 145 'יי', 146 ], 147 [ 148 'ױ', 149 'וי', 150 ], 151 [ 152 'בו', 153 'בע', 154 ], 155 [ 156 'פו', 157 'פע', 158 ], 159 [ 160 'ומ', 161 'עמ', 162 ], 163 [ 164 'ום', 165 'עם', 166 ], 167 [ 168 'ונ', 169 'ענ', 170 ], 171 [ 172 'ון', 173 'ען', 174 ], 175 [ 176 'וו', 177 'ב', 178 ], 179 [ 180 "\x01", 181 '', 182 ], 183 [ 184 'ייה$', 185 "\x01ה", 186 ], 187 [ 188 'ייע$', 189 "\x01ע", 190 ], 191 [ 192 'יי', 193 'ע', 194 ], 195 [ 196 "\x01", 197 'יי', 198 ], 199 ]; 200 201 /** 202 * The DM sound coding table is organized this way: 203 * key: a variable-length string that corresponds to the UTF-8 character sequence 204 * represented by the table entry. Currently, that string can be up to 7 205 * bytes long. This maximum length is defined by the value of global variable 206 * $maxchar. 207 * 208 * value: an array as follows: 209 * [0]: zero if not a vowel 210 * [1]: sound value when this string is at the beginning of the word 211 * [2]: sound value when this string is followed by a vowel 212 * [3]: sound value for other cases 213 * [1],[2],[3] can be repeated several times to create branches in the code 214 * an empty sound value means "ignore in this state" 215 * 216 * @var string[][] 217 */ 218 private static $dmsounds = [ 219 'A' => [ 220 '1', 221 '0', 222 '', 223 '', 224 ], 225 'À' => [ 226 '1', 227 '0', 228 '', 229 '', 230 ], 231 'Á' => [ 232 '1', 233 '0', 234 '', 235 '', 236 ], 237 'Â' => [ 238 '1', 239 '0', 240 '', 241 '', 242 ], 243 'Ã' => [ 244 '1', 245 '0', 246 '', 247 '', 248 ], 249 'Ä' => [ 250 '1', 251 '0', 252 '1', 253 '', 254 '0', 255 '', 256 '', 257 ], 258 'Å' => [ 259 '1', 260 '0', 261 '', 262 '', 263 ], 264 'Ă' => [ 265 '1', 266 '0', 267 '', 268 '', 269 ], 270 'Ą' => [ 271 '1', 272 '', 273 '', 274 '', 275 '', 276 '', 277 '6', 278 ], 279 'Ạ' => [ 280 '1', 281 '0', 282 '', 283 '', 284 ], 285 'Ả' => [ 286 '1', 287 '0', 288 '', 289 '', 290 ], 291 'Ấ' => [ 292 '1', 293 '0', 294 '', 295 '', 296 ], 297 'Ầ' => [ 298 '1', 299 '0', 300 '', 301 '', 302 ], 303 'Ẩ' => [ 304 '1', 305 '0', 306 '', 307 '', 308 ], 309 'Ẫ' => [ 310 '1', 311 '0', 312 '', 313 '', 314 ], 315 'Ậ' => [ 316 '1', 317 '0', 318 '', 319 '', 320 ], 321 'Ắ' => [ 322 '1', 323 '0', 324 '', 325 '', 326 ], 327 'Ằ' => [ 328 '1', 329 '0', 330 '', 331 '', 332 ], 333 'Ẳ' => [ 334 '1', 335 '0', 336 '', 337 '', 338 ], 339 'Ẵ' => [ 340 '1', 341 '0', 342 '', 343 '', 344 ], 345 'Ặ' => [ 346 '1', 347 '0', 348 '', 349 '', 350 ], 351 'AE' => [ 352 '1', 353 '0', 354 '1', 355 '', 356 ], 357 'Æ' => [ 358 '1', 359 '0', 360 '1', 361 '', 362 ], 363 'AI' => [ 364 '1', 365 '0', 366 '1', 367 '', 368 ], 369 'AJ' => [ 370 '1', 371 '0', 372 '1', 373 '', 374 ], 375 'AU' => [ 376 '1', 377 '0', 378 '7', 379 '', 380 ], 381 'AV' => [ 382 '1', 383 '0', 384 '7', 385 '', 386 '7', 387 '7', 388 '7', 389 ], 390 'ÄU' => [ 391 '1', 392 '0', 393 '1', 394 '', 395 ], 396 'AY' => [ 397 '1', 398 '0', 399 '1', 400 '', 401 ], 402 'B' => [ 403 '0', 404 '7', 405 '7', 406 '7', 407 ], 408 'C' => [ 409 '0', 410 '5', 411 '5', 412 '5', 413 '34', 414 '4', 415 '4', 416 ], 417 'Ć' => [ 418 '0', 419 '4', 420 '4', 421 '4', 422 ], 423 'Č' => [ 424 '0', 425 '4', 426 '4', 427 '4', 428 ], 429 'Ç' => [ 430 '0', 431 '4', 432 '4', 433 '4', 434 ], 435 'CH' => [ 436 '0', 437 '5', 438 '5', 439 '5', 440 '34', 441 '4', 442 '4', 443 ], 444 'CHS' => [ 445 '0', 446 '5', 447 '54', 448 '54', 449 ], 450 'CK' => [ 451 '0', 452 '5', 453 '5', 454 '5', 455 '45', 456 '45', 457 '45', 458 ], 459 'CCS' => [ 460 '0', 461 '4', 462 '4', 463 '4', 464 ], 465 'CS' => [ 466 '0', 467 '4', 468 '4', 469 '4', 470 ], 471 'CSZ' => [ 472 '0', 473 '4', 474 '4', 475 '4', 476 ], 477 'CZ' => [ 478 '0', 479 '4', 480 '4', 481 '4', 482 ], 483 'CZS' => [ 484 '0', 485 '4', 486 '4', 487 '4', 488 ], 489 'D' => [ 490 '0', 491 '3', 492 '3', 493 '3', 494 ], 495 'Ď' => [ 496 '0', 497 '3', 498 '3', 499 '3', 500 ], 501 'Đ' => [ 502 '0', 503 '3', 504 '3', 505 '3', 506 ], 507 'DRS' => [ 508 '0', 509 '4', 510 '4', 511 '4', 512 ], 513 'DRZ' => [ 514 '0', 515 '4', 516 '4', 517 '4', 518 ], 519 'DS' => [ 520 '0', 521 '4', 522 '4', 523 '4', 524 ], 525 'DSH' => [ 526 '0', 527 '4', 528 '4', 529 '4', 530 ], 531 'DSZ' => [ 532 '0', 533 '4', 534 '4', 535 '4', 536 ], 537 'DT' => [ 538 '0', 539 '3', 540 '3', 541 '3', 542 ], 543 'DDZ' => [ 544 '0', 545 '4', 546 '4', 547 '4', 548 ], 549 'DDZS' => [ 550 '0', 551 '4', 552 '4', 553 '4', 554 ], 555 'DZ' => [ 556 '0', 557 '4', 558 '4', 559 '4', 560 ], 561 'DŹ' => [ 562 '0', 563 '4', 564 '4', 565 '4', 566 ], 567 'DŻ' => [ 568 '0', 569 '4', 570 '4', 571 '4', 572 ], 573 'DZH' => [ 574 '0', 575 '4', 576 '4', 577 '4', 578 ], 579 'DZS' => [ 580 '0', 581 '4', 582 '4', 583 '4', 584 ], 585 'E' => [ 586 '1', 587 '0', 588 '', 589 '', 590 ], 591 'È' => [ 592 '1', 593 '0', 594 '', 595 '', 596 ], 597 'É' => [ 598 '1', 599 '0', 600 '', 601 '', 602 ], 603 'Ê' => [ 604 '1', 605 '0', 606 '', 607 '', 608 ], 609 'Ë' => [ 610 '1', 611 '0', 612 '', 613 '', 614 ], 615 'Ĕ' => [ 616 '1', 617 '0', 618 '', 619 '', 620 ], 621 'Ė' => [ 622 '1', 623 '0', 624 '', 625 '', 626 ], 627 'Ę' => [ 628 '1', 629 '', 630 '', 631 '6', 632 '', 633 '', 634 '', 635 ], 636 'Ẹ' => [ 637 '1', 638 '0', 639 '', 640 '', 641 ], 642 'Ẻ' => [ 643 '1', 644 '0', 645 '', 646 '', 647 ], 648 'Ẽ' => [ 649 '1', 650 '0', 651 '', 652 '', 653 ], 654 'Ế' => [ 655 '1', 656 '0', 657 '', 658 '', 659 ], 660 'Ề' => [ 661 '1', 662 '0', 663 '', 664 '', 665 ], 666 'Ể' => [ 667 '1', 668 '0', 669 '', 670 '', 671 ], 672 'Ễ' => [ 673 '1', 674 '0', 675 '', 676 '', 677 ], 678 'Ệ' => [ 679 '1', 680 '0', 681 '', 682 '', 683 ], 684 'EAU' => [ 685 '1', 686 '0', 687 '', 688 '', 689 ], 690 'EI' => [ 691 '1', 692 '0', 693 '1', 694 '', 695 ], 696 'EJ' => [ 697 '1', 698 '0', 699 '1', 700 '', 701 ], 702 'EU' => [ 703 '1', 704 '1', 705 '1', 706 '', 707 ], 708 'EY' => [ 709 '1', 710 '0', 711 '1', 712 '', 713 ], 714 'F' => [ 715 '0', 716 '7', 717 '7', 718 '7', 719 ], 720 'FB' => [ 721 '0', 722 '7', 723 '7', 724 '7', 725 ], 726 'G' => [ 727 '0', 728 '5', 729 '5', 730 '5', 731 '34', 732 '4', 733 '4', 734 ], 735 'Ğ' => [ 736 '0', 737 '', 738 '', 739 '', 740 ], 741 'GGY' => [ 742 '0', 743 '5', 744 '5', 745 '5', 746 ], 747 'GY' => [ 748 '0', 749 '5', 750 '5', 751 '5', 752 ], 753 'H' => [ 754 '0', 755 '5', 756 '5', 757 '', 758 '5', 759 '5', 760 '5', 761 ], 762 'I' => [ 763 '1', 764 '0', 765 '', 766 '', 767 ], 768 'Ì' => [ 769 '1', 770 '0', 771 '', 772 '', 773 ], 774 'Í' => [ 775 '1', 776 '0', 777 '', 778 '', 779 ], 780 'Î' => [ 781 '1', 782 '0', 783 '', 784 '', 785 ], 786 'Ï' => [ 787 '1', 788 '0', 789 '', 790 '', 791 ], 792 'Ĩ' => [ 793 '1', 794 '0', 795 '', 796 '', 797 ], 798 'Į' => [ 799 '1', 800 '0', 801 '', 802 '', 803 ], 804 'İ' => [ 805 '1', 806 '0', 807 '', 808 '', 809 ], 810 'Ỉ' => [ 811 '1', 812 '0', 813 '', 814 '', 815 ], 816 'Ị' => [ 817 '1', 818 '0', 819 '', 820 '', 821 ], 822 'IA' => [ 823 '1', 824 '1', 825 '', 826 '', 827 ], 828 'IE' => [ 829 '1', 830 '1', 831 '', 832 '', 833 ], 834 'IO' => [ 835 '1', 836 '1', 837 '', 838 '', 839 ], 840 'IU' => [ 841 '1', 842 '1', 843 '', 844 '', 845 ], 846 'J' => [ 847 '0', 848 '1', 849 '', 850 '', 851 '4', 852 '4', 853 '4', 854 '5', 855 '5', 856 '', 857 ], 858 'K' => [ 859 '0', 860 '5', 861 '5', 862 '5', 863 ], 864 'KH' => [ 865 '0', 866 '5', 867 '5', 868 '5', 869 ], 870 'KS' => [ 871 '0', 872 '5', 873 '54', 874 '54', 875 ], 876 'L' => [ 877 '0', 878 '8', 879 '8', 880 '8', 881 ], 882 'Ľ' => [ 883 '0', 884 '8', 885 '8', 886 '8', 887 ], 888 'Ĺ' => [ 889 '0', 890 '8', 891 '8', 892 '8', 893 ], 894 'Ł' => [ 895 '0', 896 '7', 897 '7', 898 '7', 899 '8', 900 '8', 901 '8', 902 ], 903 'LL' => [ 904 '0', 905 '8', 906 '8', 907 '8', 908 '58', 909 '8', 910 '8', 911 '1', 912 '8', 913 '8', 914 ], 915 'LLY' => [ 916 '0', 917 '8', 918 '8', 919 '8', 920 '1', 921 '8', 922 '8', 923 ], 924 'LY' => [ 925 '0', 926 '8', 927 '8', 928 '8', 929 '1', 930 '8', 931 '8', 932 ], 933 'M' => [ 934 '0', 935 '6', 936 '6', 937 '6', 938 ], 939 'MĔ' => [ 940 '0', 941 '66', 942 '66', 943 '66', 944 ], 945 'MN' => [ 946 '0', 947 '66', 948 '66', 949 '66', 950 ], 951 'N' => [ 952 '0', 953 '6', 954 '6', 955 '6', 956 ], 957 'Ń' => [ 958 '0', 959 '6', 960 '6', 961 '6', 962 ], 963 'Ň' => [ 964 '0', 965 '6', 966 '6', 967 '6', 968 ], 969 'Ñ' => [ 970 '0', 971 '6', 972 '6', 973 '6', 974 ], 975 'NM' => [ 976 '0', 977 '66', 978 '66', 979 '66', 980 ], 981 'O' => [ 982 '1', 983 '0', 984 '', 985 '', 986 ], 987 'Ò' => [ 988 '1', 989 '0', 990 '', 991 '', 992 ], 993 'Ó' => [ 994 '1', 995 '0', 996 '', 997 '', 998 ], 999 'Ô' => [ 1000 '1', 1001 '0', 1002 '', 1003 '', 1004 ], 1005 'Õ' => [ 1006 '1', 1007 '0', 1008 '', 1009 '', 1010 ], 1011 'Ö' => [ 1012 '1', 1013 '0', 1014 '', 1015 '', 1016 ], 1017 'Ø' => [ 1018 '1', 1019 '0', 1020 '', 1021 '', 1022 ], 1023 'Ő' => [ 1024 '1', 1025 '0', 1026 '', 1027 '', 1028 ], 1029 'Œ' => [ 1030 '1', 1031 '0', 1032 '', 1033 '', 1034 ], 1035 'Ơ' => [ 1036 '1', 1037 '0', 1038 '', 1039 '', 1040 ], 1041 'Ọ' => [ 1042 '1', 1043 '0', 1044 '', 1045 '', 1046 ], 1047 'Ỏ' => [ 1048 '1', 1049 '0', 1050 '', 1051 '', 1052 ], 1053 'Ố' => [ 1054 '1', 1055 '0', 1056 '', 1057 '', 1058 ], 1059 'Ồ' => [ 1060 '1', 1061 '0', 1062 '', 1063 '', 1064 ], 1065 'Ổ' => [ 1066 '1', 1067 '0', 1068 '', 1069 '', 1070 ], 1071 'Ỗ' => [ 1072 '1', 1073 '0', 1074 '', 1075 '', 1076 ], 1077 'Ộ' => [ 1078 '1', 1079 '0', 1080 '', 1081 '', 1082 ], 1083 'Ớ' => [ 1084 '1', 1085 '0', 1086 '', 1087 '', 1088 ], 1089 'Ờ' => [ 1090 '1', 1091 '0', 1092 '', 1093 '', 1094 ], 1095 'Ở' => [ 1096 '1', 1097 '0', 1098 '', 1099 '', 1100 ], 1101 'Ỡ' => [ 1102 '1', 1103 '0', 1104 '', 1105 '', 1106 ], 1107 'Ợ' => [ 1108 '1', 1109 '0', 1110 '', 1111 '', 1112 ], 1113 'OE' => [ 1114 '1', 1115 '0', 1116 '', 1117 '', 1118 ], 1119 'OI' => [ 1120 '1', 1121 '0', 1122 '1', 1123 '', 1124 ], 1125 'OJ' => [ 1126 '1', 1127 '0', 1128 '1', 1129 '', 1130 ], 1131 'OU' => [ 1132 '1', 1133 '0', 1134 '', 1135 '', 1136 ], 1137 'OY' => [ 1138 '1', 1139 '0', 1140 '1', 1141 '', 1142 ], 1143 'P' => [ 1144 '0', 1145 '7', 1146 '7', 1147 '7', 1148 ], 1149 'PF' => [ 1150 '0', 1151 '7', 1152 '7', 1153 '7', 1154 ], 1155 'PH' => [ 1156 '0', 1157 '7', 1158 '7', 1159 '7', 1160 ], 1161 'Q' => [ 1162 '0', 1163 '5', 1164 '5', 1165 '5', 1166 ], 1167 'R' => [ 1168 '0', 1169 '9', 1170 '9', 1171 '9', 1172 ], 1173 'Ř' => [ 1174 '0', 1175 '4', 1176 '4', 1177 '4', 1178 ], 1179 'RS' => [ 1180 '0', 1181 '4', 1182 '4', 1183 '4', 1184 '94', 1185 '94', 1186 '94', 1187 ], 1188 'RZ' => [ 1189 '0', 1190 '4', 1191 '4', 1192 '4', 1193 '94', 1194 '94', 1195 '94', 1196 ], 1197 'S' => [ 1198 '0', 1199 '4', 1200 '4', 1201 '4', 1202 ], 1203 'Ś' => [ 1204 '0', 1205 '4', 1206 '4', 1207 '4', 1208 ], 1209 'Š' => [ 1210 '0', 1211 '4', 1212 '4', 1213 '4', 1214 ], 1215 'Ş' => [ 1216 '0', 1217 '4', 1218 '4', 1219 '4', 1220 ], 1221 'SC' => [ 1222 '0', 1223 '2', 1224 '4', 1225 '4', 1226 ], 1227 'ŠČ' => [ 1228 '0', 1229 '2', 1230 '4', 1231 '4', 1232 ], 1233 'SCH' => [ 1234 '0', 1235 '4', 1236 '4', 1237 '4', 1238 ], 1239 'SCHD' => [ 1240 '0', 1241 '2', 1242 '43', 1243 '43', 1244 ], 1245 'SCHT' => [ 1246 '0', 1247 '2', 1248 '43', 1249 '43', 1250 ], 1251 'SCHTCH' => [ 1252 '0', 1253 '2', 1254 '4', 1255 '4', 1256 ], 1257 'SCHTSCH' => [ 1258 '0', 1259 '2', 1260 '4', 1261 '4', 1262 ], 1263 'SCHTSH' => [ 1264 '0', 1265 '2', 1266 '4', 1267 '4', 1268 ], 1269 'SD' => [ 1270 '0', 1271 '2', 1272 '43', 1273 '43', 1274 ], 1275 'SH' => [ 1276 '0', 1277 '4', 1278 '4', 1279 '4', 1280 ], 1281 'SHCH' => [ 1282 '0', 1283 '2', 1284 '4', 1285 '4', 1286 ], 1287 'SHD' => [ 1288 '0', 1289 '2', 1290 '43', 1291 '43', 1292 ], 1293 'SHT' => [ 1294 '0', 1295 '2', 1296 '43', 1297 '43', 1298 ], 1299 'SHTCH' => [ 1300 '0', 1301 '2', 1302 '4', 1303 '4', 1304 ], 1305 'SHTSH' => [ 1306 '0', 1307 '2', 1308 '4', 1309 '4', 1310 ], 1311 'ß' => [ 1312 '0', 1313 '', 1314 '4', 1315 '4', 1316 ], 1317 'ST' => [ 1318 '0', 1319 '2', 1320 '43', 1321 '43', 1322 ], 1323 'STCH' => [ 1324 '0', 1325 '2', 1326 '4', 1327 '4', 1328 ], 1329 'STRS' => [ 1330 '0', 1331 '2', 1332 '4', 1333 '4', 1334 ], 1335 'STRZ' => [ 1336 '0', 1337 '2', 1338 '4', 1339 '4', 1340 ], 1341 'STSCH' => [ 1342 '0', 1343 '2', 1344 '4', 1345 '4', 1346 ], 1347 'STSH' => [ 1348 '0', 1349 '2', 1350 '4', 1351 '4', 1352 ], 1353 'SSZ' => [ 1354 '0', 1355 '4', 1356 '4', 1357 '4', 1358 ], 1359 'SZ' => [ 1360 '0', 1361 '4', 1362 '4', 1363 '4', 1364 ], 1365 'SZCS' => [ 1366 '0', 1367 '2', 1368 '4', 1369 '4', 1370 ], 1371 'SZCZ' => [ 1372 '0', 1373 '2', 1374 '4', 1375 '4', 1376 ], 1377 'SZD' => [ 1378 '0', 1379 '2', 1380 '43', 1381 '43', 1382 ], 1383 'SZT' => [ 1384 '0', 1385 '2', 1386 '43', 1387 '43', 1388 ], 1389 'T' => [ 1390 '0', 1391 '3', 1392 '3', 1393 '3', 1394 ], 1395 'Ť' => [ 1396 '0', 1397 '3', 1398 '3', 1399 '3', 1400 ], 1401 'Ţ' => [ 1402 '0', 1403 '3', 1404 '3', 1405 '3', 1406 '4', 1407 '4', 1408 '4', 1409 ], 1410 'TC' => [ 1411 '0', 1412 '4', 1413 '4', 1414 '4', 1415 ], 1416 'TCH' => [ 1417 '0', 1418 '4', 1419 '4', 1420 '4', 1421 ], 1422 'TH' => [ 1423 '0', 1424 '3', 1425 '3', 1426 '3', 1427 ], 1428 'TRS' => [ 1429 '0', 1430 '4', 1431 '4', 1432 '4', 1433 ], 1434 'TRZ' => [ 1435 '0', 1436 '4', 1437 '4', 1438 '4', 1439 ], 1440 'TS' => [ 1441 '0', 1442 '4', 1443 '4', 1444 '4', 1445 ], 1446 'TSCH' => [ 1447 '0', 1448 '4', 1449 '4', 1450 '4', 1451 ], 1452 'TSH' => [ 1453 '0', 1454 '4', 1455 '4', 1456 '4', 1457 ], 1458 'TSZ' => [ 1459 '0', 1460 '4', 1461 '4', 1462 '4', 1463 ], 1464 'TTCH' => [ 1465 '0', 1466 '4', 1467 '4', 1468 '4', 1469 ], 1470 'TTS' => [ 1471 '0', 1472 '4', 1473 '4', 1474 '4', 1475 ], 1476 'TTSCH' => [ 1477 '0', 1478 '4', 1479 '4', 1480 '4', 1481 ], 1482 'TTSZ' => [ 1483 '0', 1484 '4', 1485 '4', 1486 '4', 1487 ], 1488 'TTZ' => [ 1489 '0', 1490 '4', 1491 '4', 1492 '4', 1493 ], 1494 'TZ' => [ 1495 '0', 1496 '4', 1497 '4', 1498 '4', 1499 ], 1500 'TZS' => [ 1501 '0', 1502 '4', 1503 '4', 1504 '4', 1505 ], 1506 'U' => [ 1507 '1', 1508 '0', 1509 '', 1510 '', 1511 ], 1512 'Ù' => [ 1513 '1', 1514 '0', 1515 '', 1516 '', 1517 ], 1518 'Ú' => [ 1519 '1', 1520 '0', 1521 '', 1522 '', 1523 ], 1524 'Û' => [ 1525 '1', 1526 '0', 1527 '', 1528 '', 1529 ], 1530 'Ü' => [ 1531 '1', 1532 '0', 1533 '', 1534 '', 1535 ], 1536 'Ũ' => [ 1537 '1', 1538 '0', 1539 '', 1540 '', 1541 ], 1542 'Ū' => [ 1543 '1', 1544 '0', 1545 '', 1546 '', 1547 ], 1548 'Ů' => [ 1549 '1', 1550 '0', 1551 '', 1552 '', 1553 ], 1554 'Ű' => [ 1555 '1', 1556 '0', 1557 '', 1558 '', 1559 ], 1560 'Ų' => [ 1561 '1', 1562 '0', 1563 '', 1564 '', 1565 ], 1566 'Ư' => [ 1567 '1', 1568 '0', 1569 '', 1570 '', 1571 ], 1572 'Ụ' => [ 1573 '1', 1574 '0', 1575 '', 1576 '', 1577 ], 1578 'Ủ' => [ 1579 '1', 1580 '0', 1581 '', 1582 '', 1583 ], 1584 'Ứ' => [ 1585 '1', 1586 '0', 1587 '', 1588 '', 1589 ], 1590 'Ừ' => [ 1591 '1', 1592 '0', 1593 '', 1594 '', 1595 ], 1596 'Ử' => [ 1597 '1', 1598 '0', 1599 '', 1600 '', 1601 ], 1602 'Ữ' => [ 1603 '1', 1604 '0', 1605 '', 1606 '', 1607 ], 1608 'Ự' => [ 1609 '1', 1610 '0', 1611 '', 1612 '', 1613 ], 1614 'UE' => [ 1615 '1', 1616 '0', 1617 '', 1618 '', 1619 ], 1620 'UI' => [ 1621 '1', 1622 '0', 1623 '1', 1624 '', 1625 ], 1626 'UJ' => [ 1627 '1', 1628 '0', 1629 '1', 1630 '', 1631 ], 1632 'UY' => [ 1633 '1', 1634 '0', 1635 '1', 1636 '', 1637 ], 1638 'UW' => [ 1639 '1', 1640 '0', 1641 '1', 1642 '', 1643 '0', 1644 '7', 1645 '7', 1646 ], 1647 'V' => [ 1648 '0', 1649 '7', 1650 '7', 1651 '7', 1652 ], 1653 'W' => [ 1654 '0', 1655 '7', 1656 '7', 1657 '7', 1658 ], 1659 'X' => [ 1660 '0', 1661 '5', 1662 '54', 1663 '54', 1664 ], 1665 'Y' => [ 1666 '1', 1667 '1', 1668 '', 1669 '', 1670 ], 1671 'Ý' => [ 1672 '1', 1673 '1', 1674 '', 1675 '', 1676 ], 1677 'Ỳ' => [ 1678 '1', 1679 '1', 1680 '', 1681 '', 1682 ], 1683 'Ỵ' => [ 1684 '1', 1685 '1', 1686 '', 1687 '', 1688 ], 1689 'Ỷ' => [ 1690 '1', 1691 '1', 1692 '', 1693 '', 1694 ], 1695 'Ỹ' => [ 1696 '1', 1697 '1', 1698 '', 1699 '', 1700 ], 1701 'Z' => [ 1702 '0', 1703 '4', 1704 '4', 1705 '4', 1706 ], 1707 'Ź' => [ 1708 '0', 1709 '4', 1710 '4', 1711 '4', 1712 ], 1713 'Ż' => [ 1714 '0', 1715 '4', 1716 '4', 1717 '4', 1718 ], 1719 'Ž' => [ 1720 '0', 1721 '4', 1722 '4', 1723 '4', 1724 ], 1725 'ZD' => [ 1726 '0', 1727 '2', 1728 '43', 1729 '43', 1730 ], 1731 'ZDZ' => [ 1732 '0', 1733 '2', 1734 '4', 1735 '4', 1736 ], 1737 'ZDZH' => [ 1738 '0', 1739 '2', 1740 '4', 1741 '4', 1742 ], 1743 'ZH' => [ 1744 '0', 1745 '4', 1746 '4', 1747 '4', 1748 ], 1749 'ZHD' => [ 1750 '0', 1751 '2', 1752 '43', 1753 '43', 1754 ], 1755 'ZHDZH' => [ 1756 '0', 1757 '2', 1758 '4', 1759 '4', 1760 ], 1761 'ZS' => [ 1762 '0', 1763 '4', 1764 '4', 1765 '4', 1766 ], 1767 'ZSCH' => [ 1768 '0', 1769 '4', 1770 '4', 1771 '4', 1772 ], 1773 'ZSH' => [ 1774 '0', 1775 '4', 1776 '4', 1777 '4', 1778 ], 1779 'ZZS' => [ 1780 '0', 1781 '4', 1782 '4', 1783 '4', 1784 ], 1785 // Cyrillic alphabet 1786 'А' => [ 1787 '1', 1788 '0', 1789 '', 1790 '', 1791 ], 1792 'Б' => [ 1793 '0', 1794 '7', 1795 '7', 1796 '7', 1797 ], 1798 'В' => [ 1799 '0', 1800 '7', 1801 '7', 1802 '7', 1803 ], 1804 'Г' => [ 1805 '0', 1806 '5', 1807 '5', 1808 '5', 1809 ], 1810 'Д' => [ 1811 '0', 1812 '3', 1813 '3', 1814 '3', 1815 ], 1816 'ДЗ' => [ 1817 '0', 1818 '4', 1819 '4', 1820 '4', 1821 ], 1822 'Е' => [ 1823 '1', 1824 '0', 1825 '', 1826 '', 1827 ], 1828 'Ё' => [ 1829 '1', 1830 '0', 1831 '', 1832 '', 1833 ], 1834 'Ж' => [ 1835 '0', 1836 '4', 1837 '4', 1838 '4', 1839 ], 1840 'З' => [ 1841 '0', 1842 '4', 1843 '4', 1844 '4', 1845 ], 1846 'И' => [ 1847 '1', 1848 '0', 1849 '', 1850 '', 1851 ], 1852 'Й' => [ 1853 '1', 1854 '1', 1855 '', 1856 '', 1857 '4', 1858 '4', 1859 '4', 1860 ], 1861 'К' => [ 1862 '0', 1863 '5', 1864 '5', 1865 '5', 1866 ], 1867 'Л' => [ 1868 '0', 1869 '8', 1870 '8', 1871 '8', 1872 ], 1873 'М' => [ 1874 '0', 1875 '6', 1876 '6', 1877 '6', 1878 ], 1879 'Н' => [ 1880 '0', 1881 '6', 1882 '6', 1883 '6', 1884 ], 1885 'О' => [ 1886 '1', 1887 '0', 1888 '', 1889 '', 1890 ], 1891 'П' => [ 1892 '0', 1893 '7', 1894 '7', 1895 '7', 1896 ], 1897 'Р' => [ 1898 '0', 1899 '9', 1900 '9', 1901 '9', 1902 ], 1903 'РЖ' => [ 1904 '0', 1905 '4', 1906 '4', 1907 '4', 1908 ], 1909 'С' => [ 1910 '0', 1911 '4', 1912 '4', 1913 '4', 1914 ], 1915 'Т' => [ 1916 '0', 1917 '3', 1918 '3', 1919 '3', 1920 ], 1921 'У' => [ 1922 '1', 1923 '0', 1924 '', 1925 '', 1926 ], 1927 'Ф' => [ 1928 '0', 1929 '7', 1930 '7', 1931 '7', 1932 ], 1933 'Х' => [ 1934 '0', 1935 '5', 1936 '5', 1937 '5', 1938 ], 1939 'Ц' => [ 1940 '0', 1941 '4', 1942 '4', 1943 '4', 1944 ], 1945 'Ч' => [ 1946 '0', 1947 '4', 1948 '4', 1949 '4', 1950 ], 1951 'Ш' => [ 1952 '0', 1953 '4', 1954 '4', 1955 '4', 1956 ], 1957 'Щ' => [ 1958 '0', 1959 '2', 1960 '4', 1961 '4', 1962 ], 1963 'Ъ' => [ 1964 '0', 1965 '', 1966 '', 1967 '', 1968 ], 1969 'Ы' => [ 1970 '0', 1971 '1', 1972 '', 1973 '', 1974 ], 1975 'Ь' => [ 1976 '0', 1977 '', 1978 '', 1979 '', 1980 ], 1981 'Э' => [ 1982 '1', 1983 '0', 1984 '', 1985 '', 1986 ], 1987 'Ю' => [ 1988 '0', 1989 '1', 1990 '', 1991 '', 1992 ], 1993 'Я' => [ 1994 '0', 1995 '1', 1996 '', 1997 '', 1998 ], 1999 // Greek alphabet 2000 'Α' => [ 2001 '1', 2002 '0', 2003 '', 2004 '', 2005 ], 2006 'Ά' => [ 2007 '1', 2008 '0', 2009 '', 2010 '', 2011 ], 2012 'ΑΙ' => [ 2013 '1', 2014 '0', 2015 '1', 2016 '', 2017 ], 2018 'ΑΥ' => [ 2019 '1', 2020 '0', 2021 '1', 2022 '', 2023 ], 2024 'Β' => [ 2025 '0', 2026 '7', 2027 '7', 2028 '7', 2029 ], 2030 'Γ' => [ 2031 '0', 2032 '5', 2033 '5', 2034 '5', 2035 ], 2036 'Δ' => [ 2037 '0', 2038 '3', 2039 '3', 2040 '3', 2041 ], 2042 'Ε' => [ 2043 '1', 2044 '0', 2045 '', 2046 '', 2047 ], 2048 'Έ' => [ 2049 '1', 2050 '0', 2051 '', 2052 '', 2053 ], 2054 'ΕΙ' => [ 2055 '1', 2056 '0', 2057 '1', 2058 '', 2059 ], 2060 'ΕΥ' => [ 2061 '1', 2062 '1', 2063 '1', 2064 '', 2065 ], 2066 'Ζ' => [ 2067 '0', 2068 '4', 2069 '4', 2070 '4', 2071 ], 2072 'Η' => [ 2073 '1', 2074 '0', 2075 '', 2076 '', 2077 ], 2078 'Ή' => [ 2079 '1', 2080 '0', 2081 '', 2082 '', 2083 ], 2084 'Θ' => [ 2085 '0', 2086 '3', 2087 '3', 2088 '3', 2089 ], 2090 'Ι' => [ 2091 '1', 2092 '0', 2093 '', 2094 '', 2095 ], 2096 'Ί' => [ 2097 '1', 2098 '0', 2099 '', 2100 '', 2101 ], 2102 'Ϊ' => [ 2103 '1', 2104 '0', 2105 '', 2106 '', 2107 ], 2108 'ΐ' => [ 2109 '1', 2110 '0', 2111 '', 2112 '', 2113 ], 2114 'Κ' => [ 2115 '0', 2116 '5', 2117 '5', 2118 '5', 2119 ], 2120 'Λ' => [ 2121 '0', 2122 '8', 2123 '8', 2124 '8', 2125 ], 2126 'Μ' => [ 2127 '0', 2128 '6', 2129 '6', 2130 '6', 2131 ], 2132 'ΜΠ' => [ 2133 '0', 2134 '7', 2135 '7', 2136 '7', 2137 ], 2138 'Ν' => [ 2139 '0', 2140 '6', 2141 '6', 2142 '6', 2143 ], 2144 'ΝΤ' => [ 2145 '0', 2146 '3', 2147 '3', 2148 '3', 2149 ], 2150 'Ξ' => [ 2151 '0', 2152 '5', 2153 '54', 2154 '54', 2155 ], 2156 'Ο' => [ 2157 '1', 2158 '0', 2159 '', 2160 '', 2161 ], 2162 'Ό' => [ 2163 '1', 2164 '0', 2165 '', 2166 '', 2167 ], 2168 'ΟΙ' => [ 2169 '1', 2170 '0', 2171 '1', 2172 '', 2173 ], 2174 'ΟΥ' => [ 2175 '1', 2176 '0', 2177 '1', 2178 '', 2179 ], 2180 'Π' => [ 2181 '0', 2182 '7', 2183 '7', 2184 '7', 2185 ], 2186 'Ρ' => [ 2187 '0', 2188 '9', 2189 '9', 2190 '9', 2191 ], 2192 'Σ' => [ 2193 '0', 2194 '4', 2195 '4', 2196 '4', 2197 ], 2198 'ς' => [ 2199 '0', 2200 '', 2201 '', 2202 '4', 2203 ], 2204 'Τ' => [ 2205 '0', 2206 '3', 2207 '3', 2208 '3', 2209 ], 2210 'ΤΖ' => [ 2211 '0', 2212 '4', 2213 '4', 2214 '4', 2215 ], 2216 'ΤΣ' => [ 2217 '0', 2218 '4', 2219 '4', 2220 '4', 2221 ], 2222 'Υ' => [ 2223 '1', 2224 '1', 2225 '', 2226 '', 2227 ], 2228 'Ύ' => [ 2229 '1', 2230 '1', 2231 '', 2232 '', 2233 ], 2234 'Ϋ' => [ 2235 '1', 2236 '1', 2237 '', 2238 '', 2239 ], 2240 'ΰ' => [ 2241 '1', 2242 '1', 2243 '', 2244 '', 2245 ], 2246 'ΥΚ' => [ 2247 '1', 2248 '5', 2249 '5', 2250 '5', 2251 ], 2252 'ΥΥ' => [ 2253 '1', 2254 '65', 2255 '65', 2256 '65', 2257 ], 2258 'Φ' => [ 2259 '0', 2260 '7', 2261 '7', 2262 '7', 2263 ], 2264 'Χ' => [ 2265 '0', 2266 '5', 2267 '5', 2268 '5', 2269 ], 2270 'Ψ' => [ 2271 '0', 2272 '7', 2273 '7', 2274 '7', 2275 ], 2276 'Ω' => [ 2277 '1', 2278 '0', 2279 '', 2280 '', 2281 ], 2282 'Ώ' => [ 2283 '1', 2284 '0', 2285 '', 2286 '', 2287 ], 2288 // Hebrew alphabet 2289 'א' => [ 2290 '1', 2291 '0', 2292 '', 2293 '', 2294 ], 2295 'או' => [ 2296 '1', 2297 '0', 2298 '7', 2299 '', 2300 ], 2301 'אג' => [ 2302 '1', 2303 '4', 2304 '4', 2305 '4', 2306 '5', 2307 '5', 2308 '5', 2309 '34', 2310 '34', 2311 '34', 2312 ], 2313 'בב' => [ 2314 '0', 2315 '7', 2316 '7', 2317 '7', 2318 '77', 2319 '77', 2320 '77', 2321 ], 2322 'ב' => [ 2323 '0', 2324 '7', 2325 '7', 2326 '7', 2327 ], 2328 'גג' => [ 2329 '0', 2330 '4', 2331 '4', 2332 '4', 2333 '5', 2334 '5', 2335 '5', 2336 '45', 2337 '45', 2338 '45', 2339 '55', 2340 '55', 2341 '55', 2342 '54', 2343 '54', 2344 '54', 2345 ], 2346 'גד' => [ 2347 '0', 2348 '43', 2349 '43', 2350 '43', 2351 '53', 2352 '53', 2353 '53', 2354 ], 2355 'גה' => [ 2356 '0', 2357 '45', 2358 '45', 2359 '45', 2360 '55', 2361 '55', 2362 '55', 2363 ], 2364 'גז' => [ 2365 '0', 2366 '44', 2367 '44', 2368 '44', 2369 '45', 2370 '45', 2371 '45', 2372 ], 2373 'גח' => [ 2374 '0', 2375 '45', 2376 '45', 2377 '45', 2378 '55', 2379 '55', 2380 '55', 2381 ], 2382 'גכ' => [ 2383 '0', 2384 '45', 2385 '45', 2386 '45', 2387 '55', 2388 '55', 2389 '55', 2390 ], 2391 'גך' => [ 2392 '0', 2393 '45', 2394 '45', 2395 '45', 2396 '55', 2397 '55', 2398 '55', 2399 ], 2400 'גצ' => [ 2401 '0', 2402 '44', 2403 '44', 2404 '44', 2405 '45', 2406 '45', 2407 '45', 2408 ], 2409 'גץ' => [ 2410 '0', 2411 '44', 2412 '44', 2413 '44', 2414 '45', 2415 '45', 2416 '45', 2417 ], 2418 'גק' => [ 2419 '0', 2420 '45', 2421 '45', 2422 '45', 2423 '54', 2424 '54', 2425 '54', 2426 ], 2427 'גש' => [ 2428 '0', 2429 '44', 2430 '44', 2431 '44', 2432 '54', 2433 '54', 2434 '54', 2435 ], 2436 'גת' => [ 2437 '0', 2438 '43', 2439 '43', 2440 '43', 2441 '53', 2442 '53', 2443 '53', 2444 ], 2445 'ג' => [ 2446 '0', 2447 '4', 2448 '4', 2449 '4', 2450 '5', 2451 '5', 2452 '5', 2453 ], 2454 'דז' => [ 2455 '0', 2456 '4', 2457 '4', 2458 '4', 2459 ], 2460 'דד' => [ 2461 '0', 2462 '3', 2463 '3', 2464 '3', 2465 '33', 2466 '33', 2467 '33', 2468 ], 2469 'דט' => [ 2470 '0', 2471 '33', 2472 '33', 2473 '33', 2474 ], 2475 'דש' => [ 2476 '0', 2477 '4', 2478 '4', 2479 '4', 2480 ], 2481 'דצ' => [ 2482 '0', 2483 '4', 2484 '4', 2485 '4', 2486 ], 2487 'דץ' => [ 2488 '0', 2489 '4', 2490 '4', 2491 '4', 2492 ], 2493 'ד' => [ 2494 '0', 2495 '3', 2496 '3', 2497 '3', 2498 ], 2499 'הג' => [ 2500 '0', 2501 '54', 2502 '54', 2503 '54', 2504 '55', 2505 '55', 2506 '55', 2507 ], 2508 'הכ' => [ 2509 '0', 2510 '55', 2511 '55', 2512 '55', 2513 ], 2514 'הח' => [ 2515 '0', 2516 '55', 2517 '55', 2518 '55', 2519 ], 2520 'הק' => [ 2521 '0', 2522 '55', 2523 '55', 2524 '55', 2525 '5', 2526 '5', 2527 '5', 2528 ], 2529 'הה' => [ 2530 '0', 2531 '5', 2532 '5', 2533 '', 2534 '55', 2535 '55', 2536 '', 2537 ], 2538 'ה' => [ 2539 '0', 2540 '5', 2541 '5', 2542 '', 2543 ], 2544 'וי' => [ 2545 '1', 2546 '', 2547 '', 2548 '', 2549 '7', 2550 '7', 2551 '7', 2552 ], 2553 'ו' => [ 2554 '1', 2555 '7', 2556 '7', 2557 '7', 2558 '7', 2559 '', 2560 '', 2561 ], 2562 'וו' => [ 2563 '1', 2564 '7', 2565 '7', 2566 '7', 2567 '7', 2568 '', 2569 '', 2570 ], 2571 'וופ' => [ 2572 '1', 2573 '7', 2574 '7', 2575 '7', 2576 '77', 2577 '77', 2578 '77', 2579 ], 2580 'זש' => [ 2581 '0', 2582 '4', 2583 '4', 2584 '4', 2585 '44', 2586 '44', 2587 '44', 2588 ], 2589 'זדז' => [ 2590 '0', 2591 '2', 2592 '4', 2593 '4', 2594 ], 2595 'ז' => [ 2596 '0', 2597 '4', 2598 '4', 2599 '4', 2600 ], 2601 'זג' => [ 2602 '0', 2603 '44', 2604 '44', 2605 '44', 2606 '45', 2607 '45', 2608 '45', 2609 ], 2610 'זז' => [ 2611 '0', 2612 '4', 2613 '4', 2614 '4', 2615 '44', 2616 '44', 2617 '44', 2618 ], 2619 'זס' => [ 2620 '0', 2621 '44', 2622 '44', 2623 '44', 2624 ], 2625 'זצ' => [ 2626 '0', 2627 '44', 2628 '44', 2629 '44', 2630 ], 2631 'זץ' => [ 2632 '0', 2633 '44', 2634 '44', 2635 '44', 2636 ], 2637 'חג' => [ 2638 '0', 2639 '54', 2640 '54', 2641 '54', 2642 '53', 2643 '53', 2644 '53', 2645 ], 2646 'חח' => [ 2647 '0', 2648 '5', 2649 '5', 2650 '5', 2651 '55', 2652 '55', 2653 '55', 2654 ], 2655 'חק' => [ 2656 '0', 2657 '55', 2658 '55', 2659 '55', 2660 '5', 2661 '5', 2662 '5', 2663 ], 2664 'חכ' => [ 2665 '0', 2666 '45', 2667 '45', 2668 '45', 2669 '55', 2670 '55', 2671 '55', 2672 ], 2673 'חס' => [ 2674 '0', 2675 '5', 2676 '54', 2677 '54', 2678 ], 2679 'חש' => [ 2680 '0', 2681 '5', 2682 '54', 2683 '54', 2684 ], 2685 'ח' => [ 2686 '0', 2687 '5', 2688 '5', 2689 '5', 2690 ], 2691 'טש' => [ 2692 '0', 2693 '4', 2694 '4', 2695 '4', 2696 ], 2697 'טד' => [ 2698 '0', 2699 '33', 2700 '33', 2701 '33', 2702 ], 2703 'טי' => [ 2704 '0', 2705 '3', 2706 '3', 2707 '3', 2708 '4', 2709 '4', 2710 '4', 2711 '3', 2712 '3', 2713 '34', 2714 ], 2715 'טת' => [ 2716 '0', 2717 '33', 2718 '33', 2719 '33', 2720 ], 2721 'טט' => [ 2722 '0', 2723 '3', 2724 '3', 2725 '3', 2726 '33', 2727 '33', 2728 '33', 2729 ], 2730 'ט' => [ 2731 '0', 2732 '3', 2733 '3', 2734 '3', 2735 ], 2736 'י' => [ 2737 '1', 2738 '1', 2739 '', 2740 '', 2741 ], 2742 'יא' => [ 2743 '1', 2744 '1', 2745 '', 2746 '', 2747 '1', 2748 '1', 2749 '1', 2750 ], 2751 'כג' => [ 2752 '0', 2753 '55', 2754 '55', 2755 '55', 2756 '54', 2757 '54', 2758 '54', 2759 ], 2760 'כש' => [ 2761 '0', 2762 '5', 2763 '54', 2764 '54', 2765 ], 2766 'כס' => [ 2767 '0', 2768 '5', 2769 '54', 2770 '54', 2771 ], 2772 'ככ' => [ 2773 '0', 2774 '5', 2775 '5', 2776 '5', 2777 '55', 2778 '55', 2779 '55', 2780 ], 2781 'כך' => [ 2782 '0', 2783 '5', 2784 '5', 2785 '5', 2786 '55', 2787 '55', 2788 '55', 2789 ], 2790 'כ' => [ 2791 '0', 2792 '5', 2793 '5', 2794 '5', 2795 ], 2796 'כח' => [ 2797 '0', 2798 '55', 2799 '55', 2800 '55', 2801 '5', 2802 '5', 2803 '5', 2804 ], 2805 'ך' => [ 2806 '0', 2807 '', 2808 '5', 2809 '5', 2810 ], 2811 'ל' => [ 2812 '0', 2813 '8', 2814 '8', 2815 '8', 2816 ], 2817 'לל' => [ 2818 '0', 2819 '88', 2820 '88', 2821 '88', 2822 '8', 2823 '8', 2824 '8', 2825 ], 2826 'מנ' => [ 2827 '0', 2828 '66', 2829 '66', 2830 '66', 2831 ], 2832 'מן' => [ 2833 '0', 2834 '66', 2835 '66', 2836 '66', 2837 ], 2838 'ממ' => [ 2839 '0', 2840 '6', 2841 '6', 2842 '6', 2843 '66', 2844 '66', 2845 '66', 2846 ], 2847 'מם' => [ 2848 '0', 2849 '6', 2850 '6', 2851 '6', 2852 '66', 2853 '66', 2854 '66', 2855 ], 2856 'מ' => [ 2857 '0', 2858 '6', 2859 '6', 2860 '6', 2861 ], 2862 'ם' => [ 2863 '0', 2864 '', 2865 '6', 2866 '6', 2867 ], 2868 'נמ' => [ 2869 '0', 2870 '66', 2871 '66', 2872 '66', 2873 ], 2874 'נם' => [ 2875 '0', 2876 '66', 2877 '66', 2878 '66', 2879 ], 2880 'ננ' => [ 2881 '0', 2882 '6', 2883 '6', 2884 '6', 2885 '66', 2886 '66', 2887 '66', 2888 ], 2889 'נן' => [ 2890 '0', 2891 '6', 2892 '6', 2893 '6', 2894 '66', 2895 '66', 2896 '66', 2897 ], 2898 'נ' => [ 2899 '0', 2900 '6', 2901 '6', 2902 '6', 2903 ], 2904 'ן' => [ 2905 '0', 2906 '', 2907 '6', 2908 '6', 2909 ], 2910 'סתש' => [ 2911 '0', 2912 '2', 2913 '4', 2914 '4', 2915 ], 2916 'סתז' => [ 2917 '0', 2918 '2', 2919 '4', 2920 '4', 2921 ], 2922 'סטז' => [ 2923 '0', 2924 '2', 2925 '4', 2926 '4', 2927 ], 2928 'סטש' => [ 2929 '0', 2930 '2', 2931 '4', 2932 '4', 2933 ], 2934 'סצד' => [ 2935 '0', 2936 '2', 2937 '4', 2938 '4', 2939 ], 2940 'סט' => [ 2941 '0', 2942 '2', 2943 '4', 2944 '4', 2945 '43', 2946 '43', 2947 '43', 2948 ], 2949 'סת' => [ 2950 '0', 2951 '2', 2952 '4', 2953 '4', 2954 '43', 2955 '43', 2956 '43', 2957 ], 2958 'סג' => [ 2959 '0', 2960 '44', 2961 '44', 2962 '44', 2963 '4', 2964 '4', 2965 '4', 2966 ], 2967 'סס' => [ 2968 '0', 2969 '4', 2970 '4', 2971 '4', 2972 '44', 2973 '44', 2974 '44', 2975 ], 2976 'סצ' => [ 2977 '0', 2978 '44', 2979 '44', 2980 '44', 2981 ], 2982 'סץ' => [ 2983 '0', 2984 '44', 2985 '44', 2986 '44', 2987 ], 2988 'סז' => [ 2989 '0', 2990 '44', 2991 '44', 2992 '44', 2993 ], 2994 'סש' => [ 2995 '0', 2996 '44', 2997 '44', 2998 '44', 2999 ], 3000 'ס' => [ 3001 '0', 3002 '4', 3003 '4', 3004 '4', 3005 ], 3006 'ע' => [ 3007 '1', 3008 '0', 3009 '', 3010 '', 3011 ], 3012 'פב' => [ 3013 '0', 3014 '7', 3015 '7', 3016 '7', 3017 '77', 3018 '77', 3019 '77', 3020 ], 3021 'פוו' => [ 3022 '0', 3023 '7', 3024 '7', 3025 '7', 3026 '77', 3027 '77', 3028 '77', 3029 ], 3030 'פפ' => [ 3031 '0', 3032 '7', 3033 '7', 3034 '7', 3035 '77', 3036 '77', 3037 '77', 3038 ], 3039 'פף' => [ 3040 '0', 3041 '7', 3042 '7', 3043 '7', 3044 '77', 3045 '77', 3046 '77', 3047 ], 3048 'פ' => [ 3049 '0', 3050 '7', 3051 '7', 3052 '7', 3053 ], 3054 'ף' => [ 3055 '0', 3056 '', 3057 '7', 3058 '7', 3059 ], 3060 'צג' => [ 3061 '0', 3062 '44', 3063 '44', 3064 '44', 3065 '45', 3066 '45', 3067 '45', 3068 ], 3069 'צז' => [ 3070 '0', 3071 '44', 3072 '44', 3073 '44', 3074 ], 3075 'צס' => [ 3076 '0', 3077 '44', 3078 '44', 3079 '44', 3080 ], 3081 'צצ' => [ 3082 '0', 3083 '4', 3084 '4', 3085 '4', 3086 '5', 3087 '5', 3088 '5', 3089 '44', 3090 '44', 3091 '44', 3092 '54', 3093 '54', 3094 '54', 3095 '45', 3096 '45', 3097 '45', 3098 ], 3099 'צץ' => [ 3100 '0', 3101 '4', 3102 '4', 3103 '4', 3104 '5', 3105 '5', 3106 '5', 3107 '44', 3108 '44', 3109 '44', 3110 '54', 3111 '54', 3112 '54', 3113 ], 3114 'צש' => [ 3115 '0', 3116 '44', 3117 '44', 3118 '44', 3119 '4', 3120 '4', 3121 '4', 3122 '5', 3123 '5', 3124 '5', 3125 ], 3126 'צ' => [ 3127 '0', 3128 '4', 3129 '4', 3130 '4', 3131 '5', 3132 '5', 3133 '5', 3134 ], 3135 'ץ' => [ 3136 '0', 3137 '', 3138 '4', 3139 '4', 3140 ], 3141 'קה' => [ 3142 '0', 3143 '55', 3144 '55', 3145 '5', 3146 ], 3147 'קס' => [ 3148 '0', 3149 '5', 3150 '54', 3151 '54', 3152 ], 3153 'קש' => [ 3154 '0', 3155 '5', 3156 '54', 3157 '54', 3158 ], 3159 'קק' => [ 3160 '0', 3161 '5', 3162 '5', 3163 '5', 3164 '55', 3165 '55', 3166 '55', 3167 ], 3168 'קח' => [ 3169 '0', 3170 '55', 3171 '55', 3172 '55', 3173 ], 3174 'קכ' => [ 3175 '0', 3176 '55', 3177 '55', 3178 '55', 3179 ], 3180 'קך' => [ 3181 '0', 3182 '55', 3183 '55', 3184 '55', 3185 ], 3186 'קג' => [ 3187 '0', 3188 '55', 3189 '55', 3190 '55', 3191 '54', 3192 '54', 3193 '54', 3194 ], 3195 'ק' => [ 3196 '0', 3197 '5', 3198 '5', 3199 '5', 3200 ], 3201 'רר' => [ 3202 '0', 3203 '99', 3204 '99', 3205 '99', 3206 '9', 3207 '9', 3208 '9', 3209 ], 3210 'ר' => [ 3211 '0', 3212 '9', 3213 '9', 3214 '9', 3215 ], 3216 'שטז' => [ 3217 '0', 3218 '2', 3219 '4', 3220 '4', 3221 ], 3222 'שתש' => [ 3223 '0', 3224 '2', 3225 '4', 3226 '4', 3227 ], 3228 'שתז' => [ 3229 '0', 3230 '2', 3231 '4', 3232 '4', 3233 ], 3234 'שטש' => [ 3235 '0', 3236 '2', 3237 '4', 3238 '4', 3239 ], 3240 'שד' => [ 3241 '0', 3242 '2', 3243 '43', 3244 '43', 3245 ], 3246 'שז' => [ 3247 '0', 3248 '44', 3249 '44', 3250 '44', 3251 ], 3252 'שס' => [ 3253 '0', 3254 '44', 3255 '44', 3256 '44', 3257 ], 3258 'שת' => [ 3259 '0', 3260 '2', 3261 '43', 3262 '43', 3263 ], 3264 'שג' => [ 3265 '0', 3266 '4', 3267 '4', 3268 '4', 3269 '44', 3270 '44', 3271 '44', 3272 '4', 3273 '43', 3274 '43', 3275 ], 3276 'שט' => [ 3277 '0', 3278 '2', 3279 '43', 3280 '43', 3281 '44', 3282 '44', 3283 '44', 3284 ], 3285 'שצ' => [ 3286 '0', 3287 '44', 3288 '44', 3289 '44', 3290 '45', 3291 '45', 3292 '45', 3293 ], 3294 'שץ' => [ 3295 '0', 3296 '44', 3297 '', 3298 '44', 3299 '45', 3300 '', 3301 '45', 3302 ], 3303 'שש' => [ 3304 '0', 3305 '4', 3306 '4', 3307 '4', 3308 '44', 3309 '44', 3310 '44', 3311 ], 3312 'ש' => [ 3313 '0', 3314 '4', 3315 '4', 3316 '4', 3317 ], 3318 'תג' => [ 3319 '0', 3320 '34', 3321 '34', 3322 '34', 3323 ], 3324 'תז' => [ 3325 '0', 3326 '34', 3327 '34', 3328 '34', 3329 ], 3330 'תש' => [ 3331 '0', 3332 '4', 3333 '4', 3334 '4', 3335 ], 3336 'תת' => [ 3337 '0', 3338 '3', 3339 '3', 3340 '3', 3341 '4', 3342 '4', 3343 '4', 3344 '33', 3345 '33', 3346 '33', 3347 '44', 3348 '44', 3349 '44', 3350 '34', 3351 '34', 3352 '34', 3353 '43', 3354 '43', 3355 '43', 3356 ], 3357 'ת' => [ 3358 '0', 3359 '3', 3360 '3', 3361 '3', 3362 '4', 3363 '4', 3364 '4', 3365 ], 3366 // Arabic alphabet 3367 'ا' => [ 3368 '1', 3369 '0', 3370 '', 3371 '', 3372 ], 3373 'ب' => [ 3374 '0', 3375 '7', 3376 '7', 3377 '7', 3378 ], 3379 'ت' => [ 3380 '0', 3381 '3', 3382 '3', 3383 '3', 3384 ], 3385 'ث' => [ 3386 '0', 3387 '3', 3388 '3', 3389 '3', 3390 ], 3391 'ج' => [ 3392 '0', 3393 '4', 3394 '4', 3395 '4', 3396 ], 3397 'ح' => [ 3398 '0', 3399 '5', 3400 '5', 3401 '5', 3402 ], 3403 'خ' => [ 3404 '0', 3405 '5', 3406 '5', 3407 '5', 3408 ], 3409 'د' => [ 3410 '0', 3411 '3', 3412 '3', 3413 '3', 3414 ], 3415 'ذ' => [ 3416 '0', 3417 '3', 3418 '3', 3419 '3', 3420 ], 3421 'ر' => [ 3422 '0', 3423 '9', 3424 '9', 3425 '9', 3426 ], 3427 'ز' => [ 3428 '0', 3429 '4', 3430 '4', 3431 '4', 3432 ], 3433 'س' => [ 3434 '0', 3435 '4', 3436 '4', 3437 '4', 3438 ], 3439 'ش' => [ 3440 '0', 3441 '4', 3442 '4', 3443 '4', 3444 ], 3445 'ص' => [ 3446 '0', 3447 '4', 3448 '4', 3449 '4', 3450 ], 3451 'ض' => [ 3452 '0', 3453 '3', 3454 '3', 3455 '3', 3456 ], 3457 'ط' => [ 3458 '0', 3459 '3', 3460 '3', 3461 '3', 3462 ], 3463 'ظ' => [ 3464 '0', 3465 '4', 3466 '4', 3467 '4', 3468 ], 3469 'ع' => [ 3470 '1', 3471 '0', 3472 '', 3473 '', 3474 ], 3475 'غ' => [ 3476 '0', 3477 '0', 3478 '', 3479 '', 3480 ], 3481 'ف' => [ 3482 '0', 3483 '7', 3484 '7', 3485 '7', 3486 ], 3487 'ق' => [ 3488 '0', 3489 '5', 3490 '5', 3491 '5', 3492 ], 3493 'ك' => [ 3494 '0', 3495 '5', 3496 '5', 3497 '5', 3498 ], 3499 'ل' => [ 3500 '0', 3501 '8', 3502 '8', 3503 '8', 3504 ], 3505 'لا' => [ 3506 '0', 3507 '8', 3508 '8', 3509 '8', 3510 ], 3511 'م' => [ 3512 '0', 3513 '6', 3514 '6', 3515 '6', 3516 ], 3517 'ن' => [ 3518 '0', 3519 '6', 3520 '6', 3521 '6', 3522 ], 3523 'هن' => [ 3524 '0', 3525 '66', 3526 '66', 3527 '66', 3528 ], 3529 'ه' => [ 3530 '0', 3531 '5', 3532 '5', 3533 '', 3534 ], 3535 'و' => [ 3536 '1', 3537 '', 3538 '', 3539 '', 3540 '7', 3541 '', 3542 '', 3543 ], 3544 'ي' => [ 3545 '0', 3546 '1', 3547 '', 3548 '', 3549 ], 3550 'آ' => [ 3551 '0', 3552 '1', 3553 '', 3554 '', 3555 ], 3556 'ة' => [ 3557 '0', 3558 '', 3559 '', 3560 '3', 3561 ], 3562 'ی' => [ 3563 '0', 3564 '1', 3565 '', 3566 '', 3567 ], 3568 'ى' => [ 3569 '1', 3570 '1', 3571 '', 3572 '', 3573 ], 3574 ]; 3575 3576 /** 3577 * Calculate the Daitch-Mokotoff soundex for a word. 3578 * 3579 * @param string $name 3580 * 3581 * @return string[] List of possible DM codes for the word. 3582 */ 3583 private static function daitchMokotoffWord($name): array 3584 { 3585 // Apply special transformation rules to the input string 3586 $name = I18N::strtoupper($name); 3587 foreach (self::$transformNameTable as $transformRule) { 3588 $name = str_replace($transformRule[0], $transformRule[1], $name); 3589 } 3590 3591 // Initialize 3592 $name_script = I18N::textScript($name); 3593 $noVowels = ($name_script == 'Hebr' || $name_script == 'Arab'); 3594 3595 $lastPos = strlen($name) - 1; 3596 $currPos = 0; 3597 $state = 1; // 1: start of input string, 2: before vowel, 3: other 3598 $result = []; // accumulate complete 6-digit D-M codes here 3599 $partialResult = []; // accumulate incomplete D-M codes here 3600 $partialResult[] = ['!']; // initialize 1st partial result ('!' stops "duplicate sound" check) 3601 3602 // Loop through the input string. 3603 // Stop when the string is exhausted or when no more partial results remain 3604 while (count($partialResult) !== 0 && $currPos <= $lastPos) { 3605 // Find the DM coding table entry for the chunk at the current position 3606 $thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 3607 while ($thisEntry != '') { 3608 if (isset(self::$dmsounds[$thisEntry])) { 3609 break; 3610 } 3611 $thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk 3612 } 3613 if ($thisEntry === '') { 3614 $currPos++; // Not in table: advance pointer to next byte 3615 continue; // and try again 3616 } 3617 3618 $soundTableEntry = self::$dmsounds[$thisEntry]; 3619 $workingResult = $partialResult; 3620 $partialResult = []; 3621 $currPos += strlen($thisEntry); 3622 3623 // Not at beginning of input string 3624 if ($state != 1) { 3625 if ($currPos <= $lastPos) { 3626 // Determine whether the next chunk is a vowel 3627 $nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 3628 while ($nextEntry != '') { 3629 if (isset(self::$dmsounds[$nextEntry])) { 3630 break; 3631 } 3632 $nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk 3633 } 3634 } else { 3635 $nextEntry = ''; 3636 } 3637 if ($nextEntry != '' && self::$dmsounds[$nextEntry][0] != '0') { 3638 $state = 2; 3639 } else { 3640 // Next chunk is a vowel 3641 $state = 3; 3642 } 3643 } 3644 3645 while ($state < count($soundTableEntry)) { 3646 // empty means 'ignore this sound in this state' 3647 if ($soundTableEntry[$state] == '') { 3648 foreach ($workingResult as $workingEntry) { 3649 $tempEntry = $workingEntry; 3650 $tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles' 3651 $partialResult[] = $tempEntry; 3652 } 3653 } else { 3654 foreach ($workingResult as $workingEntry) { 3655 if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) { 3656 // Incoming sound isn't a duplicate of the previous sound 3657 $workingEntry[] = $soundTableEntry[$state]; 3658 } else { 3659 // Incoming sound is a duplicate of the previous sound 3660 // For Hebrew and Arabic, we need to create a pair of D-M sound codes, 3661 // one of the pair with only a single occurrence of the duplicate sound, 3662 // the other with both occurrences 3663 if ($noVowels) { 3664 $workingEntry[] = $soundTableEntry[$state]; 3665 } 3666 } 3667 if (count($workingEntry) < 7) { 3668 $partialResult[] = $workingEntry; 3669 } else { 3670 // This is the 6th code in the sequence 3671 // We're looking for 7 entries because the first is '!' and doesn't count 3672 $tempResult = str_replace('!', '', implode('', $workingEntry)); 3673 // Only return codes from recognisable sounds 3674 if ($tempResult) { 3675 $result[] = substr($tempResult . '000000', 0, 6); 3676 } 3677 } 3678 } 3679 } 3680 $state = $state + 3; // Advance to next triplet while keeping the same basic state 3681 } 3682 } 3683 3684 // Zero-fill and copy all remaining partial results 3685 foreach ($partialResult as $workingEntry) { 3686 $tempResult = str_replace('!', '', implode('', $workingEntry)); 3687 // Only return codes from recognisable sounds 3688 if ($tempResult) { 3689 $result[] = substr($tempResult . '000000', 0, 6); 3690 } 3691 } 3692 3693 return $result; 3694 } 3695} 3696