1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2018 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16declare(strict_types=1); 17 18namespace Fisharebest\Webtrees; 19 20/** 21 * Phonetic matching of strings. 22 */ 23class Soundex 24{ 25 /** 26 * Which algorithms are supported. 27 * 28 * @return string[] 29 */ 30 public static function getAlgorithms(): array 31 { 32 return [ 33 /* I18N: http://en.wikipedia.org/wiki/Soundex */ 34 'std' => I18N::translate('Russell'), 35 /* I18N: http://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */ 36 'dm' => I18N::translate('Daitch-Mokotoff'), 37 ]; 38 } 39 40 /** 41 * Is there a match between two soundex codes? 42 * 43 * @param string $soundex1 44 * @param string $soundex2 45 * 46 * @return bool 47 */ 48 public static function compare($soundex1, $soundex2): bool 49 { 50 if ($soundex1 !== '' && $soundex2 !== '') { 51 return !empty(array_intersect(explode(':', $soundex1), explode(':', $soundex2))); 52 } 53 54 return false; 55 } 56 57 /** 58 * Generate Russell soundex codes for a given text. 59 * 60 * @param string $text 61 * 62 * @return string 63 */ 64 public static function russell(string $text): string 65 { 66 $words = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY); 67 $soundex_array = []; 68 foreach ($words as $word) { 69 $soundex = soundex($word); 70 // Only return codes from recognisable sounds 71 if ($soundex !== '0000') { 72 $soundex_array[] = $soundex; 73 } 74 } 75 // Combine words, e.g. “New York” as “Newyork” 76 if (count($words) > 1) { 77 $soundex_array[] = soundex(strtr($text, ' ', '')); 78 } 79 // A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters) 80 $soundex_array = array_slice(array_unique($soundex_array), 0, 51); 81 82 return implode(':', $soundex_array); 83 } 84 85 /** 86 * Generate Daitch–Mokotoff soundex codes for a given text. 87 * 88 * @param string $text 89 * 90 * @return string 91 */ 92 public static function daitchMokotoff(string $text): string 93 { 94 $words = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY); 95 $soundex_array = []; 96 foreach ($words as $word) { 97 $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word)); 98 } 99 // Combine words, e.g. “New York” as “Newyork” 100 if (count($words) > 1) { 101 $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(strtr($text, ' ', ''))); 102 } 103 // A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters) 104 $soundex_array = array_slice(array_unique($soundex_array), 0, 36); 105 106 return implode(':', $soundex_array); 107 } 108 109 // Determine the Daitch–Mokotoff Soundex code for a word 110 // Original implementation by Gerry Kroll, and analysis by Meliza Amity 111 112 // Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!) 113 const MAXCHAR = 7; 114 115 /** 116 * Name transformation arrays. 117 * Used to transform the Name string to simplify the "sounds like" table. 118 * This is especially useful in Hebrew. 119 * 120 * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text) 121 * function call to achieve the desired transformations. 122 * 123 * Note about the use of "\x01": 124 * This code, which can’t legitimately occur in the kind of text we're dealing with, 125 * is used as a place-holder so that conditional string replacements can be done. 126 * 127 * @var string[][] 128 */ 129 private static $transformNameTable = [ 130 // Force Yiddish ligatures to be treated as separate letters 131 [ 132 'װ', 133 'וו', 134 ], 135 [ 136 'ײ', 137 'יי', 138 ], 139 [ 140 'ױ', 141 'וי', 142 ], 143 [ 144 'בו', 145 'בע', 146 ], 147 [ 148 'פו', 149 'פע', 150 ], 151 [ 152 'ומ', 153 'עמ', 154 ], 155 [ 156 'ום', 157 'עם', 158 ], 159 [ 160 'ונ', 161 'ענ', 162 ], 163 [ 164 'ון', 165 'ען', 166 ], 167 [ 168 'וו', 169 'ב', 170 ], 171 [ 172 "\x01", 173 '', 174 ], 175 [ 176 'ייה$', 177 "\x01ה", 178 ], 179 [ 180 'ייע$', 181 "\x01ע", 182 ], 183 [ 184 'יי', 185 'ע', 186 ], 187 [ 188 "\x01", 189 'יי', 190 ], 191 ]; 192 193 /** 194 * The DM sound coding table is organized this way: 195 * key: a variable-length string that corresponds to the UTF-8 character sequence 196 * represented by the table entry. Currently, that string can be up to 7 197 * bytes long. This maximum length is defined by the value of global variable 198 * $maxchar. 199 * 200 * value: an array as follows: 201 * [0]: zero if not a vowel 202 * [1]: sound value when this string is at the beginning of the word 203 * [2]: sound value when this string is followed by a vowel 204 * [3]: sound value for other cases 205 * [1],[2],[3] can be repeated several times to create branches in the code 206 * an empty sound value means "ignore in this state" 207 * 208 * @var string[][] 209 */ 210 private static $dmsounds = [ 211 'A' => [ 212 '1', 213 '0', 214 '', 215 '', 216 ], 217 'À' => [ 218 '1', 219 '0', 220 '', 221 '', 222 ], 223 'Á' => [ 224 '1', 225 '0', 226 '', 227 '', 228 ], 229 'Â' => [ 230 '1', 231 '0', 232 '', 233 '', 234 ], 235 'Ã' => [ 236 '1', 237 '0', 238 '', 239 '', 240 ], 241 'Ä' => [ 242 '1', 243 '0', 244 '1', 245 '', 246 '0', 247 '', 248 '', 249 ], 250 'Å' => [ 251 '1', 252 '0', 253 '', 254 '', 255 ], 256 'Ă' => [ 257 '1', 258 '0', 259 '', 260 '', 261 ], 262 'Ą' => [ 263 '1', 264 '', 265 '', 266 '', 267 '', 268 '', 269 '6', 270 ], 271 'Ạ' => [ 272 '1', 273 '0', 274 '', 275 '', 276 ], 277 'Ả' => [ 278 '1', 279 '0', 280 '', 281 '', 282 ], 283 'Ấ' => [ 284 '1', 285 '0', 286 '', 287 '', 288 ], 289 'Ầ' => [ 290 '1', 291 '0', 292 '', 293 '', 294 ], 295 'Ẩ' => [ 296 '1', 297 '0', 298 '', 299 '', 300 ], 301 'Ẫ' => [ 302 '1', 303 '0', 304 '', 305 '', 306 ], 307 'Ậ' => [ 308 '1', 309 '0', 310 '', 311 '', 312 ], 313 'Ắ' => [ 314 '1', 315 '0', 316 '', 317 '', 318 ], 319 'Ằ' => [ 320 '1', 321 '0', 322 '', 323 '', 324 ], 325 'Ẳ' => [ 326 '1', 327 '0', 328 '', 329 '', 330 ], 331 'Ẵ' => [ 332 '1', 333 '0', 334 '', 335 '', 336 ], 337 'Ặ' => [ 338 '1', 339 '0', 340 '', 341 '', 342 ], 343 'AE' => [ 344 '1', 345 '0', 346 '1', 347 '', 348 ], 349 'Æ' => [ 350 '1', 351 '0', 352 '1', 353 '', 354 ], 355 'AI' => [ 356 '1', 357 '0', 358 '1', 359 '', 360 ], 361 'AJ' => [ 362 '1', 363 '0', 364 '1', 365 '', 366 ], 367 'AU' => [ 368 '1', 369 '0', 370 '7', 371 '', 372 ], 373 'AV' => [ 374 '1', 375 '0', 376 '7', 377 '', 378 '7', 379 '7', 380 '7', 381 ], 382 'ÄU' => [ 383 '1', 384 '0', 385 '1', 386 '', 387 ], 388 'AY' => [ 389 '1', 390 '0', 391 '1', 392 '', 393 ], 394 'B' => [ 395 '0', 396 '7', 397 '7', 398 '7', 399 ], 400 'C' => [ 401 '0', 402 '5', 403 '5', 404 '5', 405 '34', 406 '4', 407 '4', 408 ], 409 'Ć' => [ 410 '0', 411 '4', 412 '4', 413 '4', 414 ], 415 'Č' => [ 416 '0', 417 '4', 418 '4', 419 '4', 420 ], 421 'Ç' => [ 422 '0', 423 '4', 424 '4', 425 '4', 426 ], 427 'CH' => [ 428 '0', 429 '5', 430 '5', 431 '5', 432 '34', 433 '4', 434 '4', 435 ], 436 'CHS' => [ 437 '0', 438 '5', 439 '54', 440 '54', 441 ], 442 'CK' => [ 443 '0', 444 '5', 445 '5', 446 '5', 447 '45', 448 '45', 449 '45', 450 ], 451 'CCS' => [ 452 '0', 453 '4', 454 '4', 455 '4', 456 ], 457 'CS' => [ 458 '0', 459 '4', 460 '4', 461 '4', 462 ], 463 'CSZ' => [ 464 '0', 465 '4', 466 '4', 467 '4', 468 ], 469 'CZ' => [ 470 '0', 471 '4', 472 '4', 473 '4', 474 ], 475 'CZS' => [ 476 '0', 477 '4', 478 '4', 479 '4', 480 ], 481 'D' => [ 482 '0', 483 '3', 484 '3', 485 '3', 486 ], 487 'Ď' => [ 488 '0', 489 '3', 490 '3', 491 '3', 492 ], 493 'Đ' => [ 494 '0', 495 '3', 496 '3', 497 '3', 498 ], 499 'DRS' => [ 500 '0', 501 '4', 502 '4', 503 '4', 504 ], 505 'DRZ' => [ 506 '0', 507 '4', 508 '4', 509 '4', 510 ], 511 'DS' => [ 512 '0', 513 '4', 514 '4', 515 '4', 516 ], 517 'DSH' => [ 518 '0', 519 '4', 520 '4', 521 '4', 522 ], 523 'DSZ' => [ 524 '0', 525 '4', 526 '4', 527 '4', 528 ], 529 'DT' => [ 530 '0', 531 '3', 532 '3', 533 '3', 534 ], 535 'DDZ' => [ 536 '0', 537 '4', 538 '4', 539 '4', 540 ], 541 'DDZS' => [ 542 '0', 543 '4', 544 '4', 545 '4', 546 ], 547 'DZ' => [ 548 '0', 549 '4', 550 '4', 551 '4', 552 ], 553 'DŹ' => [ 554 '0', 555 '4', 556 '4', 557 '4', 558 ], 559 'DŻ' => [ 560 '0', 561 '4', 562 '4', 563 '4', 564 ], 565 'DZH' => [ 566 '0', 567 '4', 568 '4', 569 '4', 570 ], 571 'DZS' => [ 572 '0', 573 '4', 574 '4', 575 '4', 576 ], 577 'E' => [ 578 '1', 579 '0', 580 '', 581 '', 582 ], 583 'È' => [ 584 '1', 585 '0', 586 '', 587 '', 588 ], 589 'É' => [ 590 '1', 591 '0', 592 '', 593 '', 594 ], 595 'Ê' => [ 596 '1', 597 '0', 598 '', 599 '', 600 ], 601 'Ë' => [ 602 '1', 603 '0', 604 '', 605 '', 606 ], 607 'Ĕ' => [ 608 '1', 609 '0', 610 '', 611 '', 612 ], 613 'Ė' => [ 614 '1', 615 '0', 616 '', 617 '', 618 ], 619 'Ę' => [ 620 '1', 621 '', 622 '', 623 '6', 624 '', 625 '', 626 '', 627 ], 628 'Ẹ' => [ 629 '1', 630 '0', 631 '', 632 '', 633 ], 634 'Ẻ' => [ 635 '1', 636 '0', 637 '', 638 '', 639 ], 640 'Ẽ' => [ 641 '1', 642 '0', 643 '', 644 '', 645 ], 646 'Ế' => [ 647 '1', 648 '0', 649 '', 650 '', 651 ], 652 'Ề' => [ 653 '1', 654 '0', 655 '', 656 '', 657 ], 658 'Ể' => [ 659 '1', 660 '0', 661 '', 662 '', 663 ], 664 'Ễ' => [ 665 '1', 666 '0', 667 '', 668 '', 669 ], 670 'Ệ' => [ 671 '1', 672 '0', 673 '', 674 '', 675 ], 676 'EAU' => [ 677 '1', 678 '0', 679 '', 680 '', 681 ], 682 'EI' => [ 683 '1', 684 '0', 685 '1', 686 '', 687 ], 688 'EJ' => [ 689 '1', 690 '0', 691 '1', 692 '', 693 ], 694 'EU' => [ 695 '1', 696 '1', 697 '1', 698 '', 699 ], 700 'EY' => [ 701 '1', 702 '0', 703 '1', 704 '', 705 ], 706 'F' => [ 707 '0', 708 '7', 709 '7', 710 '7', 711 ], 712 'FB' => [ 713 '0', 714 '7', 715 '7', 716 '7', 717 ], 718 'G' => [ 719 '0', 720 '5', 721 '5', 722 '5', 723 '34', 724 '4', 725 '4', 726 ], 727 'Ğ' => [ 728 '0', 729 '', 730 '', 731 '', 732 ], 733 'GGY' => [ 734 '0', 735 '5', 736 '5', 737 '5', 738 ], 739 'GY' => [ 740 '0', 741 '5', 742 '5', 743 '5', 744 ], 745 'H' => [ 746 '0', 747 '5', 748 '5', 749 '', 750 '5', 751 '5', 752 '5', 753 ], 754 'I' => [ 755 '1', 756 '0', 757 '', 758 '', 759 ], 760 'Ì' => [ 761 '1', 762 '0', 763 '', 764 '', 765 ], 766 'Í' => [ 767 '1', 768 '0', 769 '', 770 '', 771 ], 772 'Î' => [ 773 '1', 774 '0', 775 '', 776 '', 777 ], 778 'Ï' => [ 779 '1', 780 '0', 781 '', 782 '', 783 ], 784 'Ĩ' => [ 785 '1', 786 '0', 787 '', 788 '', 789 ], 790 'Į' => [ 791 '1', 792 '0', 793 '', 794 '', 795 ], 796 'İ' => [ 797 '1', 798 '0', 799 '', 800 '', 801 ], 802 'Ỉ' => [ 803 '1', 804 '0', 805 '', 806 '', 807 ], 808 'Ị' => [ 809 '1', 810 '0', 811 '', 812 '', 813 ], 814 'IA' => [ 815 '1', 816 '1', 817 '', 818 '', 819 ], 820 'IE' => [ 821 '1', 822 '1', 823 '', 824 '', 825 ], 826 'IO' => [ 827 '1', 828 '1', 829 '', 830 '', 831 ], 832 'IU' => [ 833 '1', 834 '1', 835 '', 836 '', 837 ], 838 'J' => [ 839 '0', 840 '1', 841 '', 842 '', 843 '4', 844 '4', 845 '4', 846 '5', 847 '5', 848 '', 849 ], 850 'K' => [ 851 '0', 852 '5', 853 '5', 854 '5', 855 ], 856 'KH' => [ 857 '0', 858 '5', 859 '5', 860 '5', 861 ], 862 'KS' => [ 863 '0', 864 '5', 865 '54', 866 '54', 867 ], 868 'L' => [ 869 '0', 870 '8', 871 '8', 872 '8', 873 ], 874 'Ľ' => [ 875 '0', 876 '8', 877 '8', 878 '8', 879 ], 880 'Ĺ' => [ 881 '0', 882 '8', 883 '8', 884 '8', 885 ], 886 'Ł' => [ 887 '0', 888 '7', 889 '7', 890 '7', 891 '8', 892 '8', 893 '8', 894 ], 895 'LL' => [ 896 '0', 897 '8', 898 '8', 899 '8', 900 '58', 901 '8', 902 '8', 903 '1', 904 '8', 905 '8', 906 ], 907 'LLY' => [ 908 '0', 909 '8', 910 '8', 911 '8', 912 '1', 913 '8', 914 '8', 915 ], 916 'LY' => [ 917 '0', 918 '8', 919 '8', 920 '8', 921 '1', 922 '8', 923 '8', 924 ], 925 'M' => [ 926 '0', 927 '6', 928 '6', 929 '6', 930 ], 931 'MĔ' => [ 932 '0', 933 '66', 934 '66', 935 '66', 936 ], 937 'MN' => [ 938 '0', 939 '66', 940 '66', 941 '66', 942 ], 943 'N' => [ 944 '0', 945 '6', 946 '6', 947 '6', 948 ], 949 'Ń' => [ 950 '0', 951 '6', 952 '6', 953 '6', 954 ], 955 'Ň' => [ 956 '0', 957 '6', 958 '6', 959 '6', 960 ], 961 'Ñ' => [ 962 '0', 963 '6', 964 '6', 965 '6', 966 ], 967 'NM' => [ 968 '0', 969 '66', 970 '66', 971 '66', 972 ], 973 'O' => [ 974 '1', 975 '0', 976 '', 977 '', 978 ], 979 'Ò' => [ 980 '1', 981 '0', 982 '', 983 '', 984 ], 985 'Ó' => [ 986 '1', 987 '0', 988 '', 989 '', 990 ], 991 'Ô' => [ 992 '1', 993 '0', 994 '', 995 '', 996 ], 997 'Õ' => [ 998 '1', 999 '0', 1000 '', 1001 '', 1002 ], 1003 'Ö' => [ 1004 '1', 1005 '0', 1006 '', 1007 '', 1008 ], 1009 'Ø' => [ 1010 '1', 1011 '0', 1012 '', 1013 '', 1014 ], 1015 'Ő' => [ 1016 '1', 1017 '0', 1018 '', 1019 '', 1020 ], 1021 'Œ' => [ 1022 '1', 1023 '0', 1024 '', 1025 '', 1026 ], 1027 'Ơ' => [ 1028 '1', 1029 '0', 1030 '', 1031 '', 1032 ], 1033 'Ọ' => [ 1034 '1', 1035 '0', 1036 '', 1037 '', 1038 ], 1039 'Ỏ' => [ 1040 '1', 1041 '0', 1042 '', 1043 '', 1044 ], 1045 'Ố' => [ 1046 '1', 1047 '0', 1048 '', 1049 '', 1050 ], 1051 'Ồ' => [ 1052 '1', 1053 '0', 1054 '', 1055 '', 1056 ], 1057 'Ổ' => [ 1058 '1', 1059 '0', 1060 '', 1061 '', 1062 ], 1063 'Ỗ' => [ 1064 '1', 1065 '0', 1066 '', 1067 '', 1068 ], 1069 'Ộ' => [ 1070 '1', 1071 '0', 1072 '', 1073 '', 1074 ], 1075 'Ớ' => [ 1076 '1', 1077 '0', 1078 '', 1079 '', 1080 ], 1081 'Ờ' => [ 1082 '1', 1083 '0', 1084 '', 1085 '', 1086 ], 1087 'Ở' => [ 1088 '1', 1089 '0', 1090 '', 1091 '', 1092 ], 1093 'Ỡ' => [ 1094 '1', 1095 '0', 1096 '', 1097 '', 1098 ], 1099 'Ợ' => [ 1100 '1', 1101 '0', 1102 '', 1103 '', 1104 ], 1105 'OE' => [ 1106 '1', 1107 '0', 1108 '', 1109 '', 1110 ], 1111 'OI' => [ 1112 '1', 1113 '0', 1114 '1', 1115 '', 1116 ], 1117 'OJ' => [ 1118 '1', 1119 '0', 1120 '1', 1121 '', 1122 ], 1123 'OU' => [ 1124 '1', 1125 '0', 1126 '', 1127 '', 1128 ], 1129 'OY' => [ 1130 '1', 1131 '0', 1132 '1', 1133 '', 1134 ], 1135 'P' => [ 1136 '0', 1137 '7', 1138 '7', 1139 '7', 1140 ], 1141 'PF' => [ 1142 '0', 1143 '7', 1144 '7', 1145 '7', 1146 ], 1147 'PH' => [ 1148 '0', 1149 '7', 1150 '7', 1151 '7', 1152 ], 1153 'Q' => [ 1154 '0', 1155 '5', 1156 '5', 1157 '5', 1158 ], 1159 'R' => [ 1160 '0', 1161 '9', 1162 '9', 1163 '9', 1164 ], 1165 'Ř' => [ 1166 '0', 1167 '4', 1168 '4', 1169 '4', 1170 ], 1171 'RS' => [ 1172 '0', 1173 '4', 1174 '4', 1175 '4', 1176 '94', 1177 '94', 1178 '94', 1179 ], 1180 'RZ' => [ 1181 '0', 1182 '4', 1183 '4', 1184 '4', 1185 '94', 1186 '94', 1187 '94', 1188 ], 1189 'S' => [ 1190 '0', 1191 '4', 1192 '4', 1193 '4', 1194 ], 1195 'Ś' => [ 1196 '0', 1197 '4', 1198 '4', 1199 '4', 1200 ], 1201 'Š' => [ 1202 '0', 1203 '4', 1204 '4', 1205 '4', 1206 ], 1207 'Ş' => [ 1208 '0', 1209 '4', 1210 '4', 1211 '4', 1212 ], 1213 'SC' => [ 1214 '0', 1215 '2', 1216 '4', 1217 '4', 1218 ], 1219 'ŠČ' => [ 1220 '0', 1221 '2', 1222 '4', 1223 '4', 1224 ], 1225 'SCH' => [ 1226 '0', 1227 '4', 1228 '4', 1229 '4', 1230 ], 1231 'SCHD' => [ 1232 '0', 1233 '2', 1234 '43', 1235 '43', 1236 ], 1237 'SCHT' => [ 1238 '0', 1239 '2', 1240 '43', 1241 '43', 1242 ], 1243 'SCHTCH' => [ 1244 '0', 1245 '2', 1246 '4', 1247 '4', 1248 ], 1249 'SCHTSCH' => [ 1250 '0', 1251 '2', 1252 '4', 1253 '4', 1254 ], 1255 'SCHTSH' => [ 1256 '0', 1257 '2', 1258 '4', 1259 '4', 1260 ], 1261 'SD' => [ 1262 '0', 1263 '2', 1264 '43', 1265 '43', 1266 ], 1267 'SH' => [ 1268 '0', 1269 '4', 1270 '4', 1271 '4', 1272 ], 1273 'SHCH' => [ 1274 '0', 1275 '2', 1276 '4', 1277 '4', 1278 ], 1279 'SHD' => [ 1280 '0', 1281 '2', 1282 '43', 1283 '43', 1284 ], 1285 'SHT' => [ 1286 '0', 1287 '2', 1288 '43', 1289 '43', 1290 ], 1291 'SHTCH' => [ 1292 '0', 1293 '2', 1294 '4', 1295 '4', 1296 ], 1297 'SHTSH' => [ 1298 '0', 1299 '2', 1300 '4', 1301 '4', 1302 ], 1303 'ß' => [ 1304 '0', 1305 '', 1306 '4', 1307 '4', 1308 ], 1309 'ST' => [ 1310 '0', 1311 '2', 1312 '43', 1313 '43', 1314 ], 1315 'STCH' => [ 1316 '0', 1317 '2', 1318 '4', 1319 '4', 1320 ], 1321 'STRS' => [ 1322 '0', 1323 '2', 1324 '4', 1325 '4', 1326 ], 1327 'STRZ' => [ 1328 '0', 1329 '2', 1330 '4', 1331 '4', 1332 ], 1333 'STSCH' => [ 1334 '0', 1335 '2', 1336 '4', 1337 '4', 1338 ], 1339 'STSH' => [ 1340 '0', 1341 '2', 1342 '4', 1343 '4', 1344 ], 1345 'SSZ' => [ 1346 '0', 1347 '4', 1348 '4', 1349 '4', 1350 ], 1351 'SZ' => [ 1352 '0', 1353 '4', 1354 '4', 1355 '4', 1356 ], 1357 'SZCS' => [ 1358 '0', 1359 '2', 1360 '4', 1361 '4', 1362 ], 1363 'SZCZ' => [ 1364 '0', 1365 '2', 1366 '4', 1367 '4', 1368 ], 1369 'SZD' => [ 1370 '0', 1371 '2', 1372 '43', 1373 '43', 1374 ], 1375 'SZT' => [ 1376 '0', 1377 '2', 1378 '43', 1379 '43', 1380 ], 1381 'T' => [ 1382 '0', 1383 '3', 1384 '3', 1385 '3', 1386 ], 1387 'Ť' => [ 1388 '0', 1389 '3', 1390 '3', 1391 '3', 1392 ], 1393 'Ţ' => [ 1394 '0', 1395 '3', 1396 '3', 1397 '3', 1398 '4', 1399 '4', 1400 '4', 1401 ], 1402 'TC' => [ 1403 '0', 1404 '4', 1405 '4', 1406 '4', 1407 ], 1408 'TCH' => [ 1409 '0', 1410 '4', 1411 '4', 1412 '4', 1413 ], 1414 'TH' => [ 1415 '0', 1416 '3', 1417 '3', 1418 '3', 1419 ], 1420 'TRS' => [ 1421 '0', 1422 '4', 1423 '4', 1424 '4', 1425 ], 1426 'TRZ' => [ 1427 '0', 1428 '4', 1429 '4', 1430 '4', 1431 ], 1432 'TS' => [ 1433 '0', 1434 '4', 1435 '4', 1436 '4', 1437 ], 1438 'TSCH' => [ 1439 '0', 1440 '4', 1441 '4', 1442 '4', 1443 ], 1444 'TSH' => [ 1445 '0', 1446 '4', 1447 '4', 1448 '4', 1449 ], 1450 'TSZ' => [ 1451 '0', 1452 '4', 1453 '4', 1454 '4', 1455 ], 1456 'TTCH' => [ 1457 '0', 1458 '4', 1459 '4', 1460 '4', 1461 ], 1462 'TTS' => [ 1463 '0', 1464 '4', 1465 '4', 1466 '4', 1467 ], 1468 'TTSCH' => [ 1469 '0', 1470 '4', 1471 '4', 1472 '4', 1473 ], 1474 'TTSZ' => [ 1475 '0', 1476 '4', 1477 '4', 1478 '4', 1479 ], 1480 'TTZ' => [ 1481 '0', 1482 '4', 1483 '4', 1484 '4', 1485 ], 1486 'TZ' => [ 1487 '0', 1488 '4', 1489 '4', 1490 '4', 1491 ], 1492 'TZS' => [ 1493 '0', 1494 '4', 1495 '4', 1496 '4', 1497 ], 1498 'U' => [ 1499 '1', 1500 '0', 1501 '', 1502 '', 1503 ], 1504 'Ù' => [ 1505 '1', 1506 '0', 1507 '', 1508 '', 1509 ], 1510 'Ú' => [ 1511 '1', 1512 '0', 1513 '', 1514 '', 1515 ], 1516 'Û' => [ 1517 '1', 1518 '0', 1519 '', 1520 '', 1521 ], 1522 'Ü' => [ 1523 '1', 1524 '0', 1525 '', 1526 '', 1527 ], 1528 'Ũ' => [ 1529 '1', 1530 '0', 1531 '', 1532 '', 1533 ], 1534 'Ū' => [ 1535 '1', 1536 '0', 1537 '', 1538 '', 1539 ], 1540 'Ů' => [ 1541 '1', 1542 '0', 1543 '', 1544 '', 1545 ], 1546 'Ű' => [ 1547 '1', 1548 '0', 1549 '', 1550 '', 1551 ], 1552 'Ų' => [ 1553 '1', 1554 '0', 1555 '', 1556 '', 1557 ], 1558 'Ư' => [ 1559 '1', 1560 '0', 1561 '', 1562 '', 1563 ], 1564 'Ụ' => [ 1565 '1', 1566 '0', 1567 '', 1568 '', 1569 ], 1570 'Ủ' => [ 1571 '1', 1572 '0', 1573 '', 1574 '', 1575 ], 1576 'Ứ' => [ 1577 '1', 1578 '0', 1579 '', 1580 '', 1581 ], 1582 'Ừ' => [ 1583 '1', 1584 '0', 1585 '', 1586 '', 1587 ], 1588 'Ử' => [ 1589 '1', 1590 '0', 1591 '', 1592 '', 1593 ], 1594 'Ữ' => [ 1595 '1', 1596 '0', 1597 '', 1598 '', 1599 ], 1600 'Ự' => [ 1601 '1', 1602 '0', 1603 '', 1604 '', 1605 ], 1606 'UE' => [ 1607 '1', 1608 '0', 1609 '', 1610 '', 1611 ], 1612 'UI' => [ 1613 '1', 1614 '0', 1615 '1', 1616 '', 1617 ], 1618 'UJ' => [ 1619 '1', 1620 '0', 1621 '1', 1622 '', 1623 ], 1624 'UY' => [ 1625 '1', 1626 '0', 1627 '1', 1628 '', 1629 ], 1630 'UW' => [ 1631 '1', 1632 '0', 1633 '1', 1634 '', 1635 '0', 1636 '7', 1637 '7', 1638 ], 1639 'V' => [ 1640 '0', 1641 '7', 1642 '7', 1643 '7', 1644 ], 1645 'W' => [ 1646 '0', 1647 '7', 1648 '7', 1649 '7', 1650 ], 1651 'X' => [ 1652 '0', 1653 '5', 1654 '54', 1655 '54', 1656 ], 1657 'Y' => [ 1658 '1', 1659 '1', 1660 '', 1661 '', 1662 ], 1663 'Ý' => [ 1664 '1', 1665 '1', 1666 '', 1667 '', 1668 ], 1669 'Ỳ' => [ 1670 '1', 1671 '1', 1672 '', 1673 '', 1674 ], 1675 'Ỵ' => [ 1676 '1', 1677 '1', 1678 '', 1679 '', 1680 ], 1681 'Ỷ' => [ 1682 '1', 1683 '1', 1684 '', 1685 '', 1686 ], 1687 'Ỹ' => [ 1688 '1', 1689 '1', 1690 '', 1691 '', 1692 ], 1693 'Z' => [ 1694 '0', 1695 '4', 1696 '4', 1697 '4', 1698 ], 1699 'Ź' => [ 1700 '0', 1701 '4', 1702 '4', 1703 '4', 1704 ], 1705 'Ż' => [ 1706 '0', 1707 '4', 1708 '4', 1709 '4', 1710 ], 1711 'Ž' => [ 1712 '0', 1713 '4', 1714 '4', 1715 '4', 1716 ], 1717 'ZD' => [ 1718 '0', 1719 '2', 1720 '43', 1721 '43', 1722 ], 1723 'ZDZ' => [ 1724 '0', 1725 '2', 1726 '4', 1727 '4', 1728 ], 1729 'ZDZH' => [ 1730 '0', 1731 '2', 1732 '4', 1733 '4', 1734 ], 1735 'ZH' => [ 1736 '0', 1737 '4', 1738 '4', 1739 '4', 1740 ], 1741 'ZHD' => [ 1742 '0', 1743 '2', 1744 '43', 1745 '43', 1746 ], 1747 'ZHDZH' => [ 1748 '0', 1749 '2', 1750 '4', 1751 '4', 1752 ], 1753 'ZS' => [ 1754 '0', 1755 '4', 1756 '4', 1757 '4', 1758 ], 1759 'ZSCH' => [ 1760 '0', 1761 '4', 1762 '4', 1763 '4', 1764 ], 1765 'ZSH' => [ 1766 '0', 1767 '4', 1768 '4', 1769 '4', 1770 ], 1771 'ZZS' => [ 1772 '0', 1773 '4', 1774 '4', 1775 '4', 1776 ], 1777 // Cyrillic alphabet 1778 'А' => [ 1779 '1', 1780 '0', 1781 '', 1782 '', 1783 ], 1784 'Б' => [ 1785 '0', 1786 '7', 1787 '7', 1788 '7', 1789 ], 1790 'В' => [ 1791 '0', 1792 '7', 1793 '7', 1794 '7', 1795 ], 1796 'Г' => [ 1797 '0', 1798 '5', 1799 '5', 1800 '5', 1801 ], 1802 'Д' => [ 1803 '0', 1804 '3', 1805 '3', 1806 '3', 1807 ], 1808 'ДЗ' => [ 1809 '0', 1810 '4', 1811 '4', 1812 '4', 1813 ], 1814 'Е' => [ 1815 '1', 1816 '0', 1817 '', 1818 '', 1819 ], 1820 'Ё' => [ 1821 '1', 1822 '0', 1823 '', 1824 '', 1825 ], 1826 'Ж' => [ 1827 '0', 1828 '4', 1829 '4', 1830 '4', 1831 ], 1832 'З' => [ 1833 '0', 1834 '4', 1835 '4', 1836 '4', 1837 ], 1838 'И' => [ 1839 '1', 1840 '0', 1841 '', 1842 '', 1843 ], 1844 'Й' => [ 1845 '1', 1846 '1', 1847 '', 1848 '', 1849 '4', 1850 '4', 1851 '4', 1852 ], 1853 'К' => [ 1854 '0', 1855 '5', 1856 '5', 1857 '5', 1858 ], 1859 'Л' => [ 1860 '0', 1861 '8', 1862 '8', 1863 '8', 1864 ], 1865 'М' => [ 1866 '0', 1867 '6', 1868 '6', 1869 '6', 1870 ], 1871 'Н' => [ 1872 '0', 1873 '6', 1874 '6', 1875 '6', 1876 ], 1877 'О' => [ 1878 '1', 1879 '0', 1880 '', 1881 '', 1882 ], 1883 'П' => [ 1884 '0', 1885 '7', 1886 '7', 1887 '7', 1888 ], 1889 'Р' => [ 1890 '0', 1891 '9', 1892 '9', 1893 '9', 1894 ], 1895 'РЖ' => [ 1896 '0', 1897 '4', 1898 '4', 1899 '4', 1900 ], 1901 'С' => [ 1902 '0', 1903 '4', 1904 '4', 1905 '4', 1906 ], 1907 'Т' => [ 1908 '0', 1909 '3', 1910 '3', 1911 '3', 1912 ], 1913 'У' => [ 1914 '1', 1915 '0', 1916 '', 1917 '', 1918 ], 1919 'Ф' => [ 1920 '0', 1921 '7', 1922 '7', 1923 '7', 1924 ], 1925 'Х' => [ 1926 '0', 1927 '5', 1928 '5', 1929 '5', 1930 ], 1931 'Ц' => [ 1932 '0', 1933 '4', 1934 '4', 1935 '4', 1936 ], 1937 'Ч' => [ 1938 '0', 1939 '4', 1940 '4', 1941 '4', 1942 ], 1943 'Ш' => [ 1944 '0', 1945 '4', 1946 '4', 1947 '4', 1948 ], 1949 'Щ' => [ 1950 '0', 1951 '2', 1952 '4', 1953 '4', 1954 ], 1955 'Ъ' => [ 1956 '0', 1957 '', 1958 '', 1959 '', 1960 ], 1961 'Ы' => [ 1962 '0', 1963 '1', 1964 '', 1965 '', 1966 ], 1967 'Ь' => [ 1968 '0', 1969 '', 1970 '', 1971 '', 1972 ], 1973 'Э' => [ 1974 '1', 1975 '0', 1976 '', 1977 '', 1978 ], 1979 'Ю' => [ 1980 '0', 1981 '1', 1982 '', 1983 '', 1984 ], 1985 'Я' => [ 1986 '0', 1987 '1', 1988 '', 1989 '', 1990 ], 1991 // Greek alphabet 1992 'Α' => [ 1993 '1', 1994 '0', 1995 '', 1996 '', 1997 ], 1998 'Ά' => [ 1999 '1', 2000 '0', 2001 '', 2002 '', 2003 ], 2004 'ΑΙ' => [ 2005 '1', 2006 '0', 2007 '1', 2008 '', 2009 ], 2010 'ΑΥ' => [ 2011 '1', 2012 '0', 2013 '1', 2014 '', 2015 ], 2016 'Β' => [ 2017 '0', 2018 '7', 2019 '7', 2020 '7', 2021 ], 2022 'Γ' => [ 2023 '0', 2024 '5', 2025 '5', 2026 '5', 2027 ], 2028 'Δ' => [ 2029 '0', 2030 '3', 2031 '3', 2032 '3', 2033 ], 2034 'Ε' => [ 2035 '1', 2036 '0', 2037 '', 2038 '', 2039 ], 2040 'Έ' => [ 2041 '1', 2042 '0', 2043 '', 2044 '', 2045 ], 2046 'ΕΙ' => [ 2047 '1', 2048 '0', 2049 '1', 2050 '', 2051 ], 2052 'ΕΥ' => [ 2053 '1', 2054 '1', 2055 '1', 2056 '', 2057 ], 2058 'Ζ' => [ 2059 '0', 2060 '4', 2061 '4', 2062 '4', 2063 ], 2064 'Η' => [ 2065 '1', 2066 '0', 2067 '', 2068 '', 2069 ], 2070 'Ή' => [ 2071 '1', 2072 '0', 2073 '', 2074 '', 2075 ], 2076 'Θ' => [ 2077 '0', 2078 '3', 2079 '3', 2080 '3', 2081 ], 2082 'Ι' => [ 2083 '1', 2084 '0', 2085 '', 2086 '', 2087 ], 2088 'Ί' => [ 2089 '1', 2090 '0', 2091 '', 2092 '', 2093 ], 2094 'Ϊ' => [ 2095 '1', 2096 '0', 2097 '', 2098 '', 2099 ], 2100 'ΐ' => [ 2101 '1', 2102 '0', 2103 '', 2104 '', 2105 ], 2106 'Κ' => [ 2107 '0', 2108 '5', 2109 '5', 2110 '5', 2111 ], 2112 'Λ' => [ 2113 '0', 2114 '8', 2115 '8', 2116 '8', 2117 ], 2118 'Μ' => [ 2119 '0', 2120 '6', 2121 '6', 2122 '6', 2123 ], 2124 'ΜΠ' => [ 2125 '0', 2126 '7', 2127 '7', 2128 '7', 2129 ], 2130 'Ν' => [ 2131 '0', 2132 '6', 2133 '6', 2134 '6', 2135 ], 2136 'ΝΤ' => [ 2137 '0', 2138 '3', 2139 '3', 2140 '3', 2141 ], 2142 'Ξ' => [ 2143 '0', 2144 '5', 2145 '54', 2146 '54', 2147 ], 2148 'Ο' => [ 2149 '1', 2150 '0', 2151 '', 2152 '', 2153 ], 2154 'Ό' => [ 2155 '1', 2156 '0', 2157 '', 2158 '', 2159 ], 2160 'ΟΙ' => [ 2161 '1', 2162 '0', 2163 '1', 2164 '', 2165 ], 2166 'ΟΥ' => [ 2167 '1', 2168 '0', 2169 '1', 2170 '', 2171 ], 2172 'Π' => [ 2173 '0', 2174 '7', 2175 '7', 2176 '7', 2177 ], 2178 'Ρ' => [ 2179 '0', 2180 '9', 2181 '9', 2182 '9', 2183 ], 2184 'Σ' => [ 2185 '0', 2186 '4', 2187 '4', 2188 '4', 2189 ], 2190 'ς' => [ 2191 '0', 2192 '', 2193 '', 2194 '4', 2195 ], 2196 'Τ' => [ 2197 '0', 2198 '3', 2199 '3', 2200 '3', 2201 ], 2202 'ΤΖ' => [ 2203 '0', 2204 '4', 2205 '4', 2206 '4', 2207 ], 2208 'ΤΣ' => [ 2209 '0', 2210 '4', 2211 '4', 2212 '4', 2213 ], 2214 'Υ' => [ 2215 '1', 2216 '1', 2217 '', 2218 '', 2219 ], 2220 'Ύ' => [ 2221 '1', 2222 '1', 2223 '', 2224 '', 2225 ], 2226 'Ϋ' => [ 2227 '1', 2228 '1', 2229 '', 2230 '', 2231 ], 2232 'ΰ' => [ 2233 '1', 2234 '1', 2235 '', 2236 '', 2237 ], 2238 'ΥΚ' => [ 2239 '1', 2240 '5', 2241 '5', 2242 '5', 2243 ], 2244 'ΥΥ' => [ 2245 '1', 2246 '65', 2247 '65', 2248 '65', 2249 ], 2250 'Φ' => [ 2251 '0', 2252 '7', 2253 '7', 2254 '7', 2255 ], 2256 'Χ' => [ 2257 '0', 2258 '5', 2259 '5', 2260 '5', 2261 ], 2262 'Ψ' => [ 2263 '0', 2264 '7', 2265 '7', 2266 '7', 2267 ], 2268 'Ω' => [ 2269 '1', 2270 '0', 2271 '', 2272 '', 2273 ], 2274 'Ώ' => [ 2275 '1', 2276 '0', 2277 '', 2278 '', 2279 ], 2280 // Hebrew alphabet 2281 'א' => [ 2282 '1', 2283 '0', 2284 '', 2285 '', 2286 ], 2287 'או' => [ 2288 '1', 2289 '0', 2290 '7', 2291 '', 2292 ], 2293 'אג' => [ 2294 '1', 2295 '4', 2296 '4', 2297 '4', 2298 '5', 2299 '5', 2300 '5', 2301 '34', 2302 '34', 2303 '34', 2304 ], 2305 'בב' => [ 2306 '0', 2307 '7', 2308 '7', 2309 '7', 2310 '77', 2311 '77', 2312 '77', 2313 ], 2314 'ב' => [ 2315 '0', 2316 '7', 2317 '7', 2318 '7', 2319 ], 2320 'גג' => [ 2321 '0', 2322 '4', 2323 '4', 2324 '4', 2325 '5', 2326 '5', 2327 '5', 2328 '45', 2329 '45', 2330 '45', 2331 '55', 2332 '55', 2333 '55', 2334 '54', 2335 '54', 2336 '54', 2337 ], 2338 'גד' => [ 2339 '0', 2340 '43', 2341 '43', 2342 '43', 2343 '53', 2344 '53', 2345 '53', 2346 ], 2347 'גה' => [ 2348 '0', 2349 '45', 2350 '45', 2351 '45', 2352 '55', 2353 '55', 2354 '55', 2355 ], 2356 'גז' => [ 2357 '0', 2358 '44', 2359 '44', 2360 '44', 2361 '45', 2362 '45', 2363 '45', 2364 ], 2365 'גח' => [ 2366 '0', 2367 '45', 2368 '45', 2369 '45', 2370 '55', 2371 '55', 2372 '55', 2373 ], 2374 'גכ' => [ 2375 '0', 2376 '45', 2377 '45', 2378 '45', 2379 '55', 2380 '55', 2381 '55', 2382 ], 2383 'גך' => [ 2384 '0', 2385 '45', 2386 '45', 2387 '45', 2388 '55', 2389 '55', 2390 '55', 2391 ], 2392 'גצ' => [ 2393 '0', 2394 '44', 2395 '44', 2396 '44', 2397 '45', 2398 '45', 2399 '45', 2400 ], 2401 'גץ' => [ 2402 '0', 2403 '44', 2404 '44', 2405 '44', 2406 '45', 2407 '45', 2408 '45', 2409 ], 2410 'גק' => [ 2411 '0', 2412 '45', 2413 '45', 2414 '45', 2415 '54', 2416 '54', 2417 '54', 2418 ], 2419 'גש' => [ 2420 '0', 2421 '44', 2422 '44', 2423 '44', 2424 '54', 2425 '54', 2426 '54', 2427 ], 2428 'גת' => [ 2429 '0', 2430 '43', 2431 '43', 2432 '43', 2433 '53', 2434 '53', 2435 '53', 2436 ], 2437 'ג' => [ 2438 '0', 2439 '4', 2440 '4', 2441 '4', 2442 '5', 2443 '5', 2444 '5', 2445 ], 2446 'דז' => [ 2447 '0', 2448 '4', 2449 '4', 2450 '4', 2451 ], 2452 'דד' => [ 2453 '0', 2454 '3', 2455 '3', 2456 '3', 2457 '33', 2458 '33', 2459 '33', 2460 ], 2461 'דט' => [ 2462 '0', 2463 '33', 2464 '33', 2465 '33', 2466 ], 2467 'דש' => [ 2468 '0', 2469 '4', 2470 '4', 2471 '4', 2472 ], 2473 'דצ' => [ 2474 '0', 2475 '4', 2476 '4', 2477 '4', 2478 ], 2479 'דץ' => [ 2480 '0', 2481 '4', 2482 '4', 2483 '4', 2484 ], 2485 'ד' => [ 2486 '0', 2487 '3', 2488 '3', 2489 '3', 2490 ], 2491 'הג' => [ 2492 '0', 2493 '54', 2494 '54', 2495 '54', 2496 '55', 2497 '55', 2498 '55', 2499 ], 2500 'הכ' => [ 2501 '0', 2502 '55', 2503 '55', 2504 '55', 2505 ], 2506 'הח' => [ 2507 '0', 2508 '55', 2509 '55', 2510 '55', 2511 ], 2512 'הק' => [ 2513 '0', 2514 '55', 2515 '55', 2516 '55', 2517 '5', 2518 '5', 2519 '5', 2520 ], 2521 'הה' => [ 2522 '0', 2523 '5', 2524 '5', 2525 '', 2526 '55', 2527 '55', 2528 '', 2529 ], 2530 'ה' => [ 2531 '0', 2532 '5', 2533 '5', 2534 '', 2535 ], 2536 'וי' => [ 2537 '1', 2538 '', 2539 '', 2540 '', 2541 '7', 2542 '7', 2543 '7', 2544 ], 2545 'ו' => [ 2546 '1', 2547 '7', 2548 '7', 2549 '7', 2550 '7', 2551 '', 2552 '', 2553 ], 2554 'וו' => [ 2555 '1', 2556 '7', 2557 '7', 2558 '7', 2559 '7', 2560 '', 2561 '', 2562 ], 2563 'וופ' => [ 2564 '1', 2565 '7', 2566 '7', 2567 '7', 2568 '77', 2569 '77', 2570 '77', 2571 ], 2572 'זש' => [ 2573 '0', 2574 '4', 2575 '4', 2576 '4', 2577 '44', 2578 '44', 2579 '44', 2580 ], 2581 'זדז' => [ 2582 '0', 2583 '2', 2584 '4', 2585 '4', 2586 ], 2587 'ז' => [ 2588 '0', 2589 '4', 2590 '4', 2591 '4', 2592 ], 2593 'זג' => [ 2594 '0', 2595 '44', 2596 '44', 2597 '44', 2598 '45', 2599 '45', 2600 '45', 2601 ], 2602 'זז' => [ 2603 '0', 2604 '4', 2605 '4', 2606 '4', 2607 '44', 2608 '44', 2609 '44', 2610 ], 2611 'זס' => [ 2612 '0', 2613 '44', 2614 '44', 2615 '44', 2616 ], 2617 'זצ' => [ 2618 '0', 2619 '44', 2620 '44', 2621 '44', 2622 ], 2623 'זץ' => [ 2624 '0', 2625 '44', 2626 '44', 2627 '44', 2628 ], 2629 'חג' => [ 2630 '0', 2631 '54', 2632 '54', 2633 '54', 2634 '53', 2635 '53', 2636 '53', 2637 ], 2638 'חח' => [ 2639 '0', 2640 '5', 2641 '5', 2642 '5', 2643 '55', 2644 '55', 2645 '55', 2646 ], 2647 'חק' => [ 2648 '0', 2649 '55', 2650 '55', 2651 '55', 2652 '5', 2653 '5', 2654 '5', 2655 ], 2656 'חכ' => [ 2657 '0', 2658 '45', 2659 '45', 2660 '45', 2661 '55', 2662 '55', 2663 '55', 2664 ], 2665 'חס' => [ 2666 '0', 2667 '5', 2668 '54', 2669 '54', 2670 ], 2671 'חש' => [ 2672 '0', 2673 '5', 2674 '54', 2675 '54', 2676 ], 2677 'ח' => [ 2678 '0', 2679 '5', 2680 '5', 2681 '5', 2682 ], 2683 'טש' => [ 2684 '0', 2685 '4', 2686 '4', 2687 '4', 2688 ], 2689 'טד' => [ 2690 '0', 2691 '33', 2692 '33', 2693 '33', 2694 ], 2695 'טי' => [ 2696 '0', 2697 '3', 2698 '3', 2699 '3', 2700 '4', 2701 '4', 2702 '4', 2703 '3', 2704 '3', 2705 '34', 2706 ], 2707 'טת' => [ 2708 '0', 2709 '33', 2710 '33', 2711 '33', 2712 ], 2713 'טט' => [ 2714 '0', 2715 '3', 2716 '3', 2717 '3', 2718 '33', 2719 '33', 2720 '33', 2721 ], 2722 'ט' => [ 2723 '0', 2724 '3', 2725 '3', 2726 '3', 2727 ], 2728 'י' => [ 2729 '1', 2730 '1', 2731 '', 2732 '', 2733 ], 2734 'יא' => [ 2735 '1', 2736 '1', 2737 '', 2738 '', 2739 '1', 2740 '1', 2741 '1', 2742 ], 2743 'כג' => [ 2744 '0', 2745 '55', 2746 '55', 2747 '55', 2748 '54', 2749 '54', 2750 '54', 2751 ], 2752 'כש' => [ 2753 '0', 2754 '5', 2755 '54', 2756 '54', 2757 ], 2758 'כס' => [ 2759 '0', 2760 '5', 2761 '54', 2762 '54', 2763 ], 2764 'ככ' => [ 2765 '0', 2766 '5', 2767 '5', 2768 '5', 2769 '55', 2770 '55', 2771 '55', 2772 ], 2773 'כך' => [ 2774 '0', 2775 '5', 2776 '5', 2777 '5', 2778 '55', 2779 '55', 2780 '55', 2781 ], 2782 'כ' => [ 2783 '0', 2784 '5', 2785 '5', 2786 '5', 2787 ], 2788 'כח' => [ 2789 '0', 2790 '55', 2791 '55', 2792 '55', 2793 '5', 2794 '5', 2795 '5', 2796 ], 2797 'ך' => [ 2798 '0', 2799 '', 2800 '5', 2801 '5', 2802 ], 2803 'ל' => [ 2804 '0', 2805 '8', 2806 '8', 2807 '8', 2808 ], 2809 'לל' => [ 2810 '0', 2811 '88', 2812 '88', 2813 '88', 2814 '8', 2815 '8', 2816 '8', 2817 ], 2818 'מנ' => [ 2819 '0', 2820 '66', 2821 '66', 2822 '66', 2823 ], 2824 'מן' => [ 2825 '0', 2826 '66', 2827 '66', 2828 '66', 2829 ], 2830 'ממ' => [ 2831 '0', 2832 '6', 2833 '6', 2834 '6', 2835 '66', 2836 '66', 2837 '66', 2838 ], 2839 'מם' => [ 2840 '0', 2841 '6', 2842 '6', 2843 '6', 2844 '66', 2845 '66', 2846 '66', 2847 ], 2848 'מ' => [ 2849 '0', 2850 '6', 2851 '6', 2852 '6', 2853 ], 2854 'ם' => [ 2855 '0', 2856 '', 2857 '6', 2858 '6', 2859 ], 2860 'נמ' => [ 2861 '0', 2862 '66', 2863 '66', 2864 '66', 2865 ], 2866 'נם' => [ 2867 '0', 2868 '66', 2869 '66', 2870 '66', 2871 ], 2872 'ננ' => [ 2873 '0', 2874 '6', 2875 '6', 2876 '6', 2877 '66', 2878 '66', 2879 '66', 2880 ], 2881 'נן' => [ 2882 '0', 2883 '6', 2884 '6', 2885 '6', 2886 '66', 2887 '66', 2888 '66', 2889 ], 2890 'נ' => [ 2891 '0', 2892 '6', 2893 '6', 2894 '6', 2895 ], 2896 'ן' => [ 2897 '0', 2898 '', 2899 '6', 2900 '6', 2901 ], 2902 'סתש' => [ 2903 '0', 2904 '2', 2905 '4', 2906 '4', 2907 ], 2908 'סתז' => [ 2909 '0', 2910 '2', 2911 '4', 2912 '4', 2913 ], 2914 'סטז' => [ 2915 '0', 2916 '2', 2917 '4', 2918 '4', 2919 ], 2920 'סטש' => [ 2921 '0', 2922 '2', 2923 '4', 2924 '4', 2925 ], 2926 'סצד' => [ 2927 '0', 2928 '2', 2929 '4', 2930 '4', 2931 ], 2932 'סט' => [ 2933 '0', 2934 '2', 2935 '4', 2936 '4', 2937 '43', 2938 '43', 2939 '43', 2940 ], 2941 'סת' => [ 2942 '0', 2943 '2', 2944 '4', 2945 '4', 2946 '43', 2947 '43', 2948 '43', 2949 ], 2950 'סג' => [ 2951 '0', 2952 '44', 2953 '44', 2954 '44', 2955 '4', 2956 '4', 2957 '4', 2958 ], 2959 'סס' => [ 2960 '0', 2961 '4', 2962 '4', 2963 '4', 2964 '44', 2965 '44', 2966 '44', 2967 ], 2968 'סצ' => [ 2969 '0', 2970 '44', 2971 '44', 2972 '44', 2973 ], 2974 'סץ' => [ 2975 '0', 2976 '44', 2977 '44', 2978 '44', 2979 ], 2980 'סז' => [ 2981 '0', 2982 '44', 2983 '44', 2984 '44', 2985 ], 2986 'סש' => [ 2987 '0', 2988 '44', 2989 '44', 2990 '44', 2991 ], 2992 'ס' => [ 2993 '0', 2994 '4', 2995 '4', 2996 '4', 2997 ], 2998 'ע' => [ 2999 '1', 3000 '0', 3001 '', 3002 '', 3003 ], 3004 'פב' => [ 3005 '0', 3006 '7', 3007 '7', 3008 '7', 3009 '77', 3010 '77', 3011 '77', 3012 ], 3013 'פוו' => [ 3014 '0', 3015 '7', 3016 '7', 3017 '7', 3018 '77', 3019 '77', 3020 '77', 3021 ], 3022 'פפ' => [ 3023 '0', 3024 '7', 3025 '7', 3026 '7', 3027 '77', 3028 '77', 3029 '77', 3030 ], 3031 'פף' => [ 3032 '0', 3033 '7', 3034 '7', 3035 '7', 3036 '77', 3037 '77', 3038 '77', 3039 ], 3040 'פ' => [ 3041 '0', 3042 '7', 3043 '7', 3044 '7', 3045 ], 3046 'ף' => [ 3047 '0', 3048 '', 3049 '7', 3050 '7', 3051 ], 3052 'צג' => [ 3053 '0', 3054 '44', 3055 '44', 3056 '44', 3057 '45', 3058 '45', 3059 '45', 3060 ], 3061 'צז' => [ 3062 '0', 3063 '44', 3064 '44', 3065 '44', 3066 ], 3067 'צס' => [ 3068 '0', 3069 '44', 3070 '44', 3071 '44', 3072 ], 3073 'צצ' => [ 3074 '0', 3075 '4', 3076 '4', 3077 '4', 3078 '5', 3079 '5', 3080 '5', 3081 '44', 3082 '44', 3083 '44', 3084 '54', 3085 '54', 3086 '54', 3087 '45', 3088 '45', 3089 '45', 3090 ], 3091 'צץ' => [ 3092 '0', 3093 '4', 3094 '4', 3095 '4', 3096 '5', 3097 '5', 3098 '5', 3099 '44', 3100 '44', 3101 '44', 3102 '54', 3103 '54', 3104 '54', 3105 ], 3106 'צש' => [ 3107 '0', 3108 '44', 3109 '44', 3110 '44', 3111 '4', 3112 '4', 3113 '4', 3114 '5', 3115 '5', 3116 '5', 3117 ], 3118 'צ' => [ 3119 '0', 3120 '4', 3121 '4', 3122 '4', 3123 '5', 3124 '5', 3125 '5', 3126 ], 3127 'ץ' => [ 3128 '0', 3129 '', 3130 '4', 3131 '4', 3132 ], 3133 'קה' => [ 3134 '0', 3135 '55', 3136 '55', 3137 '5', 3138 ], 3139 'קס' => [ 3140 '0', 3141 '5', 3142 '54', 3143 '54', 3144 ], 3145 'קש' => [ 3146 '0', 3147 '5', 3148 '54', 3149 '54', 3150 ], 3151 'קק' => [ 3152 '0', 3153 '5', 3154 '5', 3155 '5', 3156 '55', 3157 '55', 3158 '55', 3159 ], 3160 'קח' => [ 3161 '0', 3162 '55', 3163 '55', 3164 '55', 3165 ], 3166 'קכ' => [ 3167 '0', 3168 '55', 3169 '55', 3170 '55', 3171 ], 3172 'קך' => [ 3173 '0', 3174 '55', 3175 '55', 3176 '55', 3177 ], 3178 'קג' => [ 3179 '0', 3180 '55', 3181 '55', 3182 '55', 3183 '54', 3184 '54', 3185 '54', 3186 ], 3187 'ק' => [ 3188 '0', 3189 '5', 3190 '5', 3191 '5', 3192 ], 3193 'רר' => [ 3194 '0', 3195 '99', 3196 '99', 3197 '99', 3198 '9', 3199 '9', 3200 '9', 3201 ], 3202 'ר' => [ 3203 '0', 3204 '9', 3205 '9', 3206 '9', 3207 ], 3208 'שטז' => [ 3209 '0', 3210 '2', 3211 '4', 3212 '4', 3213 ], 3214 'שתש' => [ 3215 '0', 3216 '2', 3217 '4', 3218 '4', 3219 ], 3220 'שתז' => [ 3221 '0', 3222 '2', 3223 '4', 3224 '4', 3225 ], 3226 'שטש' => [ 3227 '0', 3228 '2', 3229 '4', 3230 '4', 3231 ], 3232 'שד' => [ 3233 '0', 3234 '2', 3235 '43', 3236 '43', 3237 ], 3238 'שז' => [ 3239 '0', 3240 '44', 3241 '44', 3242 '44', 3243 ], 3244 'שס' => [ 3245 '0', 3246 '44', 3247 '44', 3248 '44', 3249 ], 3250 'שת' => [ 3251 '0', 3252 '2', 3253 '43', 3254 '43', 3255 ], 3256 'שג' => [ 3257 '0', 3258 '4', 3259 '4', 3260 '4', 3261 '44', 3262 '44', 3263 '44', 3264 '4', 3265 '43', 3266 '43', 3267 ], 3268 'שט' => [ 3269 '0', 3270 '2', 3271 '43', 3272 '43', 3273 '44', 3274 '44', 3275 '44', 3276 ], 3277 'שצ' => [ 3278 '0', 3279 '44', 3280 '44', 3281 '44', 3282 '45', 3283 '45', 3284 '45', 3285 ], 3286 'שץ' => [ 3287 '0', 3288 '44', 3289 '', 3290 '44', 3291 '45', 3292 '', 3293 '45', 3294 ], 3295 'שש' => [ 3296 '0', 3297 '4', 3298 '4', 3299 '4', 3300 '44', 3301 '44', 3302 '44', 3303 ], 3304 'ש' => [ 3305 '0', 3306 '4', 3307 '4', 3308 '4', 3309 ], 3310 'תג' => [ 3311 '0', 3312 '34', 3313 '34', 3314 '34', 3315 ], 3316 'תז' => [ 3317 '0', 3318 '34', 3319 '34', 3320 '34', 3321 ], 3322 'תש' => [ 3323 '0', 3324 '4', 3325 '4', 3326 '4', 3327 ], 3328 'תת' => [ 3329 '0', 3330 '3', 3331 '3', 3332 '3', 3333 '4', 3334 '4', 3335 '4', 3336 '33', 3337 '33', 3338 '33', 3339 '44', 3340 '44', 3341 '44', 3342 '34', 3343 '34', 3344 '34', 3345 '43', 3346 '43', 3347 '43', 3348 ], 3349 'ת' => [ 3350 '0', 3351 '3', 3352 '3', 3353 '3', 3354 '4', 3355 '4', 3356 '4', 3357 ], 3358 // Arabic alphabet 3359 'ا' => [ 3360 '1', 3361 '0', 3362 '', 3363 '', 3364 ], 3365 'ب' => [ 3366 '0', 3367 '7', 3368 '7', 3369 '7', 3370 ], 3371 'ت' => [ 3372 '0', 3373 '3', 3374 '3', 3375 '3', 3376 ], 3377 'ث' => [ 3378 '0', 3379 '3', 3380 '3', 3381 '3', 3382 ], 3383 'ج' => [ 3384 '0', 3385 '4', 3386 '4', 3387 '4', 3388 ], 3389 'ح' => [ 3390 '0', 3391 '5', 3392 '5', 3393 '5', 3394 ], 3395 'خ' => [ 3396 '0', 3397 '5', 3398 '5', 3399 '5', 3400 ], 3401 'د' => [ 3402 '0', 3403 '3', 3404 '3', 3405 '3', 3406 ], 3407 'ذ' => [ 3408 '0', 3409 '3', 3410 '3', 3411 '3', 3412 ], 3413 'ر' => [ 3414 '0', 3415 '9', 3416 '9', 3417 '9', 3418 ], 3419 'ز' => [ 3420 '0', 3421 '4', 3422 '4', 3423 '4', 3424 ], 3425 'س' => [ 3426 '0', 3427 '4', 3428 '4', 3429 '4', 3430 ], 3431 'ش' => [ 3432 '0', 3433 '4', 3434 '4', 3435 '4', 3436 ], 3437 'ص' => [ 3438 '0', 3439 '4', 3440 '4', 3441 '4', 3442 ], 3443 'ض' => [ 3444 '0', 3445 '3', 3446 '3', 3447 '3', 3448 ], 3449 'ط' => [ 3450 '0', 3451 '3', 3452 '3', 3453 '3', 3454 ], 3455 'ظ' => [ 3456 '0', 3457 '4', 3458 '4', 3459 '4', 3460 ], 3461 'ع' => [ 3462 '1', 3463 '0', 3464 '', 3465 '', 3466 ], 3467 'غ' => [ 3468 '0', 3469 '0', 3470 '', 3471 '', 3472 ], 3473 'ف' => [ 3474 '0', 3475 '7', 3476 '7', 3477 '7', 3478 ], 3479 'ق' => [ 3480 '0', 3481 '5', 3482 '5', 3483 '5', 3484 ], 3485 'ك' => [ 3486 '0', 3487 '5', 3488 '5', 3489 '5', 3490 ], 3491 'ل' => [ 3492 '0', 3493 '8', 3494 '8', 3495 '8', 3496 ], 3497 'لا' => [ 3498 '0', 3499 '8', 3500 '8', 3501 '8', 3502 ], 3503 'م' => [ 3504 '0', 3505 '6', 3506 '6', 3507 '6', 3508 ], 3509 'ن' => [ 3510 '0', 3511 '6', 3512 '6', 3513 '6', 3514 ], 3515 'هن' => [ 3516 '0', 3517 '66', 3518 '66', 3519 '66', 3520 ], 3521 'ه' => [ 3522 '0', 3523 '5', 3524 '5', 3525 '', 3526 ], 3527 'و' => [ 3528 '1', 3529 '', 3530 '', 3531 '', 3532 '7', 3533 '', 3534 '', 3535 ], 3536 'ي' => [ 3537 '0', 3538 '1', 3539 '', 3540 '', 3541 ], 3542 'آ' => [ 3543 '0', 3544 '1', 3545 '', 3546 '', 3547 ], 3548 'ة' => [ 3549 '0', 3550 '', 3551 '', 3552 '3', 3553 ], 3554 'ی' => [ 3555 '0', 3556 '1', 3557 '', 3558 '', 3559 ], 3560 'ى' => [ 3561 '1', 3562 '1', 3563 '', 3564 '', 3565 ], 3566 ]; 3567 3568 /** 3569 * Calculate the Daitch-Mokotoff soundex for a word. 3570 * 3571 * @param string $name 3572 * 3573 * @return string[] List of possible DM codes for the word. 3574 */ 3575 private static function daitchMokotoffWord($name): array 3576 { 3577 // Apply special transformation rules to the input string 3578 $name = I18N::strtoupper($name); 3579 foreach (self::$transformNameTable as $transformRule) { 3580 $name = str_replace($transformRule[0], $transformRule[1], $name); 3581 } 3582 3583 // Initialize 3584 $name_script = I18N::textScript($name); 3585 $noVowels = ($name_script == 'Hebr' || $name_script == 'Arab'); 3586 3587 $lastPos = strlen($name) - 1; 3588 $currPos = 0; 3589 $state = 1; // 1: start of input string, 2: before vowel, 3: other 3590 $result = []; // accumulate complete 6-digit D-M codes here 3591 $partialResult = []; // accumulate incomplete D-M codes here 3592 $partialResult[] = ['!']; // initialize 1st partial result ('!' stops "duplicate sound" check) 3593 3594 // Loop through the input string. 3595 // Stop when the string is exhausted or when no more partial results remain 3596 while (count($partialResult) !== 0 && $currPos <= $lastPos) { 3597 // Find the DM coding table entry for the chunk at the current position 3598 $thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 3599 while ($thisEntry != '') { 3600 if (isset(self::$dmsounds[$thisEntry])) { 3601 break; 3602 } 3603 $thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk 3604 } 3605 if ($thisEntry === '') { 3606 $currPos++; // Not in table: advance pointer to next byte 3607 continue; // and try again 3608 } 3609 3610 $soundTableEntry = self::$dmsounds[$thisEntry]; 3611 $workingResult = $partialResult; 3612 $partialResult = []; 3613 $currPos += strlen($thisEntry); 3614 3615 // Not at beginning of input string 3616 if ($state != 1) { 3617 if ($currPos <= $lastPos) { 3618 // Determine whether the next chunk is a vowel 3619 $nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 3620 while ($nextEntry != '') { 3621 if (isset(self::$dmsounds[$nextEntry])) { 3622 break; 3623 } 3624 $nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk 3625 } 3626 } else { 3627 $nextEntry = ''; 3628 } 3629 if ($nextEntry != '' && self::$dmsounds[$nextEntry][0] != '0') { 3630 $state = 2; 3631 } else { 3632 // Next chunk is a vowel 3633 $state = 3; 3634 } 3635 } 3636 3637 while ($state < count($soundTableEntry)) { 3638 // empty means 'ignore this sound in this state' 3639 if ($soundTableEntry[$state] == '') { 3640 foreach ($workingResult as $workingEntry) { 3641 $tempEntry = $workingEntry; 3642 $tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles' 3643 $partialResult[] = $tempEntry; 3644 } 3645 } else { 3646 foreach ($workingResult as $workingEntry) { 3647 if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) { 3648 // Incoming sound isn't a duplicate of the previous sound 3649 $workingEntry[] = $soundTableEntry[$state]; 3650 } else { 3651 // Incoming sound is a duplicate of the previous sound 3652 // For Hebrew and Arabic, we need to create a pair of D-M sound codes, 3653 // one of the pair with only a single occurrence of the duplicate sound, 3654 // the other with both occurrences 3655 if ($noVowels) { 3656 $workingEntry[] = $soundTableEntry[$state]; 3657 } 3658 } 3659 if (count($workingEntry) < 7) { 3660 $partialResult[] = $workingEntry; 3661 } else { 3662 // This is the 6th code in the sequence 3663 // We're looking for 7 entries because the first is '!' and doesn't count 3664 $tempResult = str_replace('!', '', implode('', $workingEntry)); 3665 // Only return codes from recognisable sounds 3666 if ($tempResult) { 3667 $result[] = substr($tempResult . '000000', 0, 6); 3668 } 3669 } 3670 } 3671 } 3672 $state = $state + 3; // Advance to next triplet while keeping the same basic state 3673 } 3674 } 3675 3676 // Zero-fill and copy all remaining partial results 3677 foreach ($partialResult as $workingEntry) { 3678 $tempResult = str_replace('!', '', implode('', $workingEntry)); 3679 // Only return codes from recognisable sounds 3680 if ($tempResult) { 3681 $result[] = substr($tempResult . '000000', 0, 6); 3682 } 3683 } 3684 3685 return $result; 3686 } 3687} 3688