1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2018 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16namespace Fisharebest\Webtrees; 17 18/** 19 * Phonetic matching of strings. 20 */ 21class Soundex 22{ 23 /** 24 * Which algorithms are supported. 25 * 26 * @return string[] 27 */ 28 public static function getAlgorithms() 29 { 30 return [ 31 'std' => /* I18N: http://en.wikipedia.org/wiki/Soundex */ 32 I18N::translate('Russell'), 33 'dm' => /* I18N: http://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */ 34 I18N::translate('Daitch-Mokotoff'), 35 ]; 36 } 37 38 /** 39 * Is there a match between two soundex codes? 40 * 41 * @param string $soundex1 42 * @param string $soundex2 43 * 44 * @return bool 45 */ 46 public static function compare($soundex1, $soundex2) 47 { 48 if ($soundex1 !== '' && $soundex2 !== '') { 49 return !empty(array_intersect(explode(':', $soundex1), explode(':', $soundex2))); 50 } 51 52 return false; 53 } 54 55 /** 56 * Generate Russell soundex codes for a given text. 57 * 58 * @param $text 59 * 60 * @return null|string 61 */ 62 public static function russell($text) 63 { 64 $words = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY); 65 $soundex_array = []; 66 foreach ($words as $word) { 67 $soundex = soundex($word); 68 // Only return codes from recognisable sounds 69 if ($soundex !== '0000') { 70 $soundex_array[] = $soundex; 71 } 72 } 73 // Combine words, e.g. “New York” as “Newyork” 74 if (count($words) > 1) { 75 $soundex_array[] = soundex(strtr($text, ' ', '')); 76 } 77 // A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters) 78 $soundex_array = array_slice(array_unique($soundex_array), 0, 51); 79 80 if ($soundex_array) { 81 return implode(':', $soundex_array); 82 } else { 83 return ''; 84 } 85 } 86 87 /** 88 * Generate Daitch–Mokotoff soundex codes for a given text. 89 * 90 * @param $text 91 * 92 * @return string 93 */ 94 public static function daitchMokotoff($text) 95 { 96 $words = preg_split('/\s/', $text, -1, PREG_SPLIT_NO_EMPTY); 97 $soundex_array = []; 98 foreach ($words as $word) { 99 $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word)); 100 } 101 // Combine words, e.g. “New York” as “Newyork” 102 if (count($words) > 1) { 103 $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(strtr($text, ' ', ''))); 104 } 105 // A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters) 106 $soundex_array = array_slice(array_unique($soundex_array), 0, 36); 107 108 if ($soundex_array) { 109 return implode(':', $soundex_array); 110 } else { 111 return ''; 112 } 113 } 114 115 // Determine the Daitch–Mokotoff Soundex code for a word 116 // Original implementation by Gerry Kroll, and analysis by Meliza Amity 117 118 // Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!) 119 const MAXCHAR = 7; 120 121 /** 122 * Name transformation arrays. 123 * Used to transform the Name string to simplify the "sounds like" table. 124 * This is especially useful in Hebrew. 125 * 126 * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text) 127 * function call to achieve the desired transformations. 128 * 129 * Note about the use of "\x01": 130 * This code, which can’t legitimately occur in the kind of text we're dealing with, 131 * is used as a place-holder so that conditional string replacements can be done. 132 * 133 * @var string[][] 134 */ 135 private static $transformNameTable = [ 136 // Force Yiddish ligatures to be treated as separate letters 137 [ 138 'װ', 139 'וו', 140 ], 141 [ 142 'ײ', 143 'יי', 144 ], 145 [ 146 'ױ', 147 'וי', 148 ], 149 [ 150 'בו', 151 'בע', 152 ], 153 [ 154 'פו', 155 'פע', 156 ], 157 [ 158 'ומ', 159 'עמ', 160 ], 161 [ 162 'ום', 163 'עם', 164 ], 165 [ 166 'ונ', 167 'ענ', 168 ], 169 [ 170 'ון', 171 'ען', 172 ], 173 [ 174 'וו', 175 'ב', 176 ], 177 [ 178 "\x01", 179 '', 180 ], 181 [ 182 'ייה$', 183 "\x01ה", 184 ], 185 [ 186 'ייע$', 187 "\x01ע", 188 ], 189 [ 190 'יי', 191 'ע', 192 ], 193 [ 194 "\x01", 195 'יי', 196 ], 197 ]; 198 199 /** 200 * The DM sound coding table is organized this way: 201 * key: a variable-length string that corresponds to the UTF-8 character sequence 202 * represented by the table entry. Currently, that string can be up to 7 203 * bytes long. This maximum length is defined by the value of global variable 204 * $maxchar. 205 * 206 * value: an array as follows: 207 * [0]: zero if not a vowel 208 * [1]: sound value when this string is at the beginning of the word 209 * [2]: sound value when this string is followed by a vowel 210 * [3]: sound value for other cases 211 * [1],[2],[3] can be repeated several times to create branches in the code 212 * an empty sound value means "ignore in this state" 213 * 214 * @var string[][] 215 */ 216 private static $dmsounds = [ 217 'A' => [ 218 '1', 219 '0', 220 '', 221 '', 222 ], 223 'À' => [ 224 '1', 225 '0', 226 '', 227 '', 228 ], 229 'Á' => [ 230 '1', 231 '0', 232 '', 233 '', 234 ], 235 'Â' => [ 236 '1', 237 '0', 238 '', 239 '', 240 ], 241 'Ã' => [ 242 '1', 243 '0', 244 '', 245 '', 246 ], 247 'Ä' => [ 248 '1', 249 '0', 250 '1', 251 '', 252 '0', 253 '', 254 '', 255 ], 256 'Å' => [ 257 '1', 258 '0', 259 '', 260 '', 261 ], 262 'Ă' => [ 263 '1', 264 '0', 265 '', 266 '', 267 ], 268 'Ą' => [ 269 '1', 270 '', 271 '', 272 '', 273 '', 274 '', 275 '6', 276 ], 277 'Ạ' => [ 278 '1', 279 '0', 280 '', 281 '', 282 ], 283 'Ả' => [ 284 '1', 285 '0', 286 '', 287 '', 288 ], 289 'Ấ' => [ 290 '1', 291 '0', 292 '', 293 '', 294 ], 295 'Ầ' => [ 296 '1', 297 '0', 298 '', 299 '', 300 ], 301 'Ẩ' => [ 302 '1', 303 '0', 304 '', 305 '', 306 ], 307 'Ẫ' => [ 308 '1', 309 '0', 310 '', 311 '', 312 ], 313 'Ậ' => [ 314 '1', 315 '0', 316 '', 317 '', 318 ], 319 'Ắ' => [ 320 '1', 321 '0', 322 '', 323 '', 324 ], 325 'Ằ' => [ 326 '1', 327 '0', 328 '', 329 '', 330 ], 331 'Ẳ' => [ 332 '1', 333 '0', 334 '', 335 '', 336 ], 337 'Ẵ' => [ 338 '1', 339 '0', 340 '', 341 '', 342 ], 343 'Ặ' => [ 344 '1', 345 '0', 346 '', 347 '', 348 ], 349 'AE' => [ 350 '1', 351 '0', 352 '1', 353 '', 354 ], 355 'Æ' => [ 356 '1', 357 '0', 358 '1', 359 '', 360 ], 361 'AI' => [ 362 '1', 363 '0', 364 '1', 365 '', 366 ], 367 'AJ' => [ 368 '1', 369 '0', 370 '1', 371 '', 372 ], 373 'AU' => [ 374 '1', 375 '0', 376 '7', 377 '', 378 ], 379 'AV' => [ 380 '1', 381 '0', 382 '7', 383 '', 384 '7', 385 '7', 386 '7', 387 ], 388 'ÄU' => [ 389 '1', 390 '0', 391 '1', 392 '', 393 ], 394 'AY' => [ 395 '1', 396 '0', 397 '1', 398 '', 399 ], 400 'B' => [ 401 '0', 402 '7', 403 '7', 404 '7', 405 ], 406 'C' => [ 407 '0', 408 '5', 409 '5', 410 '5', 411 '34', 412 '4', 413 '4', 414 ], 415 'Ć' => [ 416 '0', 417 '4', 418 '4', 419 '4', 420 ], 421 'Č' => [ 422 '0', 423 '4', 424 '4', 425 '4', 426 ], 427 'Ç' => [ 428 '0', 429 '4', 430 '4', 431 '4', 432 ], 433 'CH' => [ 434 '0', 435 '5', 436 '5', 437 '5', 438 '34', 439 '4', 440 '4', 441 ], 442 'CHS' => [ 443 '0', 444 '5', 445 '54', 446 '54', 447 ], 448 'CK' => [ 449 '0', 450 '5', 451 '5', 452 '5', 453 '45', 454 '45', 455 '45', 456 ], 457 'CCS' => [ 458 '0', 459 '4', 460 '4', 461 '4', 462 ], 463 'CS' => [ 464 '0', 465 '4', 466 '4', 467 '4', 468 ], 469 'CSZ' => [ 470 '0', 471 '4', 472 '4', 473 '4', 474 ], 475 'CZ' => [ 476 '0', 477 '4', 478 '4', 479 '4', 480 ], 481 'CZS' => [ 482 '0', 483 '4', 484 '4', 485 '4', 486 ], 487 'D' => [ 488 '0', 489 '3', 490 '3', 491 '3', 492 ], 493 'Ď' => [ 494 '0', 495 '3', 496 '3', 497 '3', 498 ], 499 'Đ' => [ 500 '0', 501 '3', 502 '3', 503 '3', 504 ], 505 'DRS' => [ 506 '0', 507 '4', 508 '4', 509 '4', 510 ], 511 'DRZ' => [ 512 '0', 513 '4', 514 '4', 515 '4', 516 ], 517 'DS' => [ 518 '0', 519 '4', 520 '4', 521 '4', 522 ], 523 'DSH' => [ 524 '0', 525 '4', 526 '4', 527 '4', 528 ], 529 'DSZ' => [ 530 '0', 531 '4', 532 '4', 533 '4', 534 ], 535 'DT' => [ 536 '0', 537 '3', 538 '3', 539 '3', 540 ], 541 'DDZ' => [ 542 '0', 543 '4', 544 '4', 545 '4', 546 ], 547 'DDZS' => [ 548 '0', 549 '4', 550 '4', 551 '4', 552 ], 553 'DZ' => [ 554 '0', 555 '4', 556 '4', 557 '4', 558 ], 559 'DŹ' => [ 560 '0', 561 '4', 562 '4', 563 '4', 564 ], 565 'DŻ' => [ 566 '0', 567 '4', 568 '4', 569 '4', 570 ], 571 'DZH' => [ 572 '0', 573 '4', 574 '4', 575 '4', 576 ], 577 'DZS' => [ 578 '0', 579 '4', 580 '4', 581 '4', 582 ], 583 'E' => [ 584 '1', 585 '0', 586 '', 587 '', 588 ], 589 'È' => [ 590 '1', 591 '0', 592 '', 593 '', 594 ], 595 'É' => [ 596 '1', 597 '0', 598 '', 599 '', 600 ], 601 'Ê' => [ 602 '1', 603 '0', 604 '', 605 '', 606 ], 607 'Ë' => [ 608 '1', 609 '0', 610 '', 611 '', 612 ], 613 'Ĕ' => [ 614 '1', 615 '0', 616 '', 617 '', 618 ], 619 'Ė' => [ 620 '1', 621 '0', 622 '', 623 '', 624 ], 625 'Ę' => [ 626 '1', 627 '', 628 '', 629 '6', 630 '', 631 '', 632 '', 633 ], 634 'Ẹ' => [ 635 '1', 636 '0', 637 '', 638 '', 639 ], 640 'Ẻ' => [ 641 '1', 642 '0', 643 '', 644 '', 645 ], 646 'Ẽ' => [ 647 '1', 648 '0', 649 '', 650 '', 651 ], 652 'Ế' => [ 653 '1', 654 '0', 655 '', 656 '', 657 ], 658 'Ề' => [ 659 '1', 660 '0', 661 '', 662 '', 663 ], 664 'Ể' => [ 665 '1', 666 '0', 667 '', 668 '', 669 ], 670 'Ễ' => [ 671 '1', 672 '0', 673 '', 674 '', 675 ], 676 'Ệ' => [ 677 '1', 678 '0', 679 '', 680 '', 681 ], 682 'EAU' => [ 683 '1', 684 '0', 685 '', 686 '', 687 ], 688 'EI' => [ 689 '1', 690 '0', 691 '1', 692 '', 693 ], 694 'EJ' => [ 695 '1', 696 '0', 697 '1', 698 '', 699 ], 700 'EU' => [ 701 '1', 702 '1', 703 '1', 704 '', 705 ], 706 'EY' => [ 707 '1', 708 '0', 709 '1', 710 '', 711 ], 712 'F' => [ 713 '0', 714 '7', 715 '7', 716 '7', 717 ], 718 'FB' => [ 719 '0', 720 '7', 721 '7', 722 '7', 723 ], 724 'G' => [ 725 '0', 726 '5', 727 '5', 728 '5', 729 '34', 730 '4', 731 '4', 732 ], 733 'Ğ' => [ 734 '0', 735 '', 736 '', 737 '', 738 ], 739 'GGY' => [ 740 '0', 741 '5', 742 '5', 743 '5', 744 ], 745 'GY' => [ 746 '0', 747 '5', 748 '5', 749 '5', 750 ], 751 'H' => [ 752 '0', 753 '5', 754 '5', 755 '', 756 '5', 757 '5', 758 '5', 759 ], 760 'I' => [ 761 '1', 762 '0', 763 '', 764 '', 765 ], 766 'Ì' => [ 767 '1', 768 '0', 769 '', 770 '', 771 ], 772 'Í' => [ 773 '1', 774 '0', 775 '', 776 '', 777 ], 778 'Î' => [ 779 '1', 780 '0', 781 '', 782 '', 783 ], 784 'Ï' => [ 785 '1', 786 '0', 787 '', 788 '', 789 ], 790 'Ĩ' => [ 791 '1', 792 '0', 793 '', 794 '', 795 ], 796 'Į' => [ 797 '1', 798 '0', 799 '', 800 '', 801 ], 802 'İ' => [ 803 '1', 804 '0', 805 '', 806 '', 807 ], 808 'Ỉ' => [ 809 '1', 810 '0', 811 '', 812 '', 813 ], 814 'Ị' => [ 815 '1', 816 '0', 817 '', 818 '', 819 ], 820 'IA' => [ 821 '1', 822 '1', 823 '', 824 '', 825 ], 826 'IE' => [ 827 '1', 828 '1', 829 '', 830 '', 831 ], 832 'IO' => [ 833 '1', 834 '1', 835 '', 836 '', 837 ], 838 'IU' => [ 839 '1', 840 '1', 841 '', 842 '', 843 ], 844 'J' => [ 845 '0', 846 '1', 847 '', 848 '', 849 '4', 850 '4', 851 '4', 852 '5', 853 '5', 854 '', 855 ], 856 'K' => [ 857 '0', 858 '5', 859 '5', 860 '5', 861 ], 862 'KH' => [ 863 '0', 864 '5', 865 '5', 866 '5', 867 ], 868 'KS' => [ 869 '0', 870 '5', 871 '54', 872 '54', 873 ], 874 'L' => [ 875 '0', 876 '8', 877 '8', 878 '8', 879 ], 880 'Ľ' => [ 881 '0', 882 '8', 883 '8', 884 '8', 885 ], 886 'Ĺ' => [ 887 '0', 888 '8', 889 '8', 890 '8', 891 ], 892 'Ł' => [ 893 '0', 894 '7', 895 '7', 896 '7', 897 '8', 898 '8', 899 '8', 900 ], 901 'LL' => [ 902 '0', 903 '8', 904 '8', 905 '8', 906 '58', 907 '8', 908 '8', 909 '1', 910 '8', 911 '8', 912 ], 913 'LLY' => [ 914 '0', 915 '8', 916 '8', 917 '8', 918 '1', 919 '8', 920 '8', 921 ], 922 'LY' => [ 923 '0', 924 '8', 925 '8', 926 '8', 927 '1', 928 '8', 929 '8', 930 ], 931 'M' => [ 932 '0', 933 '6', 934 '6', 935 '6', 936 ], 937 'MĔ' => [ 938 '0', 939 '66', 940 '66', 941 '66', 942 ], 943 'MN' => [ 944 '0', 945 '66', 946 '66', 947 '66', 948 ], 949 'N' => [ 950 '0', 951 '6', 952 '6', 953 '6', 954 ], 955 'Ń' => [ 956 '0', 957 '6', 958 '6', 959 '6', 960 ], 961 'Ň' => [ 962 '0', 963 '6', 964 '6', 965 '6', 966 ], 967 'Ñ' => [ 968 '0', 969 '6', 970 '6', 971 '6', 972 ], 973 'NM' => [ 974 '0', 975 '66', 976 '66', 977 '66', 978 ], 979 'O' => [ 980 '1', 981 '0', 982 '', 983 '', 984 ], 985 'Ò' => [ 986 '1', 987 '0', 988 '', 989 '', 990 ], 991 'Ó' => [ 992 '1', 993 '0', 994 '', 995 '', 996 ], 997 'Ô' => [ 998 '1', 999 '0', 1000 '', 1001 '', 1002 ], 1003 'Õ' => [ 1004 '1', 1005 '0', 1006 '', 1007 '', 1008 ], 1009 'Ö' => [ 1010 '1', 1011 '0', 1012 '', 1013 '', 1014 ], 1015 'Ø' => [ 1016 '1', 1017 '0', 1018 '', 1019 '', 1020 ], 1021 'Ő' => [ 1022 '1', 1023 '0', 1024 '', 1025 '', 1026 ], 1027 'Œ' => [ 1028 '1', 1029 '0', 1030 '', 1031 '', 1032 ], 1033 'Ơ' => [ 1034 '1', 1035 '0', 1036 '', 1037 '', 1038 ], 1039 'Ọ' => [ 1040 '1', 1041 '0', 1042 '', 1043 '', 1044 ], 1045 'Ỏ' => [ 1046 '1', 1047 '0', 1048 '', 1049 '', 1050 ], 1051 'Ố' => [ 1052 '1', 1053 '0', 1054 '', 1055 '', 1056 ], 1057 'Ồ' => [ 1058 '1', 1059 '0', 1060 '', 1061 '', 1062 ], 1063 'Ổ' => [ 1064 '1', 1065 '0', 1066 '', 1067 '', 1068 ], 1069 'Ỗ' => [ 1070 '1', 1071 '0', 1072 '', 1073 '', 1074 ], 1075 'Ộ' => [ 1076 '1', 1077 '0', 1078 '', 1079 '', 1080 ], 1081 'Ớ' => [ 1082 '1', 1083 '0', 1084 '', 1085 '', 1086 ], 1087 'Ờ' => [ 1088 '1', 1089 '0', 1090 '', 1091 '', 1092 ], 1093 'Ở' => [ 1094 '1', 1095 '0', 1096 '', 1097 '', 1098 ], 1099 'Ỡ' => [ 1100 '1', 1101 '0', 1102 '', 1103 '', 1104 ], 1105 'Ợ' => [ 1106 '1', 1107 '0', 1108 '', 1109 '', 1110 ], 1111 'OE' => [ 1112 '1', 1113 '0', 1114 '', 1115 '', 1116 ], 1117 'OI' => [ 1118 '1', 1119 '0', 1120 '1', 1121 '', 1122 ], 1123 'OJ' => [ 1124 '1', 1125 '0', 1126 '1', 1127 '', 1128 ], 1129 'OU' => [ 1130 '1', 1131 '0', 1132 '', 1133 '', 1134 ], 1135 'OY' => [ 1136 '1', 1137 '0', 1138 '1', 1139 '', 1140 ], 1141 'P' => [ 1142 '0', 1143 '7', 1144 '7', 1145 '7', 1146 ], 1147 'PF' => [ 1148 '0', 1149 '7', 1150 '7', 1151 '7', 1152 ], 1153 'PH' => [ 1154 '0', 1155 '7', 1156 '7', 1157 '7', 1158 ], 1159 'Q' => [ 1160 '0', 1161 '5', 1162 '5', 1163 '5', 1164 ], 1165 'R' => [ 1166 '0', 1167 '9', 1168 '9', 1169 '9', 1170 ], 1171 'Ř' => [ 1172 '0', 1173 '4', 1174 '4', 1175 '4', 1176 ], 1177 'RS' => [ 1178 '0', 1179 '4', 1180 '4', 1181 '4', 1182 '94', 1183 '94', 1184 '94', 1185 ], 1186 'RZ' => [ 1187 '0', 1188 '4', 1189 '4', 1190 '4', 1191 '94', 1192 '94', 1193 '94', 1194 ], 1195 'S' => [ 1196 '0', 1197 '4', 1198 '4', 1199 '4', 1200 ], 1201 'Ś' => [ 1202 '0', 1203 '4', 1204 '4', 1205 '4', 1206 ], 1207 'Š' => [ 1208 '0', 1209 '4', 1210 '4', 1211 '4', 1212 ], 1213 'Ş' => [ 1214 '0', 1215 '4', 1216 '4', 1217 '4', 1218 ], 1219 'SC' => [ 1220 '0', 1221 '2', 1222 '4', 1223 '4', 1224 ], 1225 'ŠČ' => [ 1226 '0', 1227 '2', 1228 '4', 1229 '4', 1230 ], 1231 'SCH' => [ 1232 '0', 1233 '4', 1234 '4', 1235 '4', 1236 ], 1237 'SCHD' => [ 1238 '0', 1239 '2', 1240 '43', 1241 '43', 1242 ], 1243 'SCHT' => [ 1244 '0', 1245 '2', 1246 '43', 1247 '43', 1248 ], 1249 'SCHTCH' => [ 1250 '0', 1251 '2', 1252 '4', 1253 '4', 1254 ], 1255 'SCHTSCH' => [ 1256 '0', 1257 '2', 1258 '4', 1259 '4', 1260 ], 1261 'SCHTSH' => [ 1262 '0', 1263 '2', 1264 '4', 1265 '4', 1266 ], 1267 'SD' => [ 1268 '0', 1269 '2', 1270 '43', 1271 '43', 1272 ], 1273 'SH' => [ 1274 '0', 1275 '4', 1276 '4', 1277 '4', 1278 ], 1279 'SHCH' => [ 1280 '0', 1281 '2', 1282 '4', 1283 '4', 1284 ], 1285 'SHD' => [ 1286 '0', 1287 '2', 1288 '43', 1289 '43', 1290 ], 1291 'SHT' => [ 1292 '0', 1293 '2', 1294 '43', 1295 '43', 1296 ], 1297 'SHTCH' => [ 1298 '0', 1299 '2', 1300 '4', 1301 '4', 1302 ], 1303 'SHTSH' => [ 1304 '0', 1305 '2', 1306 '4', 1307 '4', 1308 ], 1309 'ß' => [ 1310 '0', 1311 '', 1312 '4', 1313 '4', 1314 ], 1315 'ST' => [ 1316 '0', 1317 '2', 1318 '43', 1319 '43', 1320 ], 1321 'STCH' => [ 1322 '0', 1323 '2', 1324 '4', 1325 '4', 1326 ], 1327 'STRS' => [ 1328 '0', 1329 '2', 1330 '4', 1331 '4', 1332 ], 1333 'STRZ' => [ 1334 '0', 1335 '2', 1336 '4', 1337 '4', 1338 ], 1339 'STSCH' => [ 1340 '0', 1341 '2', 1342 '4', 1343 '4', 1344 ], 1345 'STSH' => [ 1346 '0', 1347 '2', 1348 '4', 1349 '4', 1350 ], 1351 'SSZ' => [ 1352 '0', 1353 '4', 1354 '4', 1355 '4', 1356 ], 1357 'SZ' => [ 1358 '0', 1359 '4', 1360 '4', 1361 '4', 1362 ], 1363 'SZCS' => [ 1364 '0', 1365 '2', 1366 '4', 1367 '4', 1368 ], 1369 'SZCZ' => [ 1370 '0', 1371 '2', 1372 '4', 1373 '4', 1374 ], 1375 'SZD' => [ 1376 '0', 1377 '2', 1378 '43', 1379 '43', 1380 ], 1381 'SZT' => [ 1382 '0', 1383 '2', 1384 '43', 1385 '43', 1386 ], 1387 'T' => [ 1388 '0', 1389 '3', 1390 '3', 1391 '3', 1392 ], 1393 'Ť' => [ 1394 '0', 1395 '3', 1396 '3', 1397 '3', 1398 ], 1399 'Ţ' => [ 1400 '0', 1401 '3', 1402 '3', 1403 '3', 1404 '4', 1405 '4', 1406 '4', 1407 ], 1408 'TC' => [ 1409 '0', 1410 '4', 1411 '4', 1412 '4', 1413 ], 1414 'TCH' => [ 1415 '0', 1416 '4', 1417 '4', 1418 '4', 1419 ], 1420 'TH' => [ 1421 '0', 1422 '3', 1423 '3', 1424 '3', 1425 ], 1426 'TRS' => [ 1427 '0', 1428 '4', 1429 '4', 1430 '4', 1431 ], 1432 'TRZ' => [ 1433 '0', 1434 '4', 1435 '4', 1436 '4', 1437 ], 1438 'TS' => [ 1439 '0', 1440 '4', 1441 '4', 1442 '4', 1443 ], 1444 'TSCH' => [ 1445 '0', 1446 '4', 1447 '4', 1448 '4', 1449 ], 1450 'TSH' => [ 1451 '0', 1452 '4', 1453 '4', 1454 '4', 1455 ], 1456 'TSZ' => [ 1457 '0', 1458 '4', 1459 '4', 1460 '4', 1461 ], 1462 'TTCH' => [ 1463 '0', 1464 '4', 1465 '4', 1466 '4', 1467 ], 1468 'TTS' => [ 1469 '0', 1470 '4', 1471 '4', 1472 '4', 1473 ], 1474 'TTSCH' => [ 1475 '0', 1476 '4', 1477 '4', 1478 '4', 1479 ], 1480 'TTSZ' => [ 1481 '0', 1482 '4', 1483 '4', 1484 '4', 1485 ], 1486 'TTZ' => [ 1487 '0', 1488 '4', 1489 '4', 1490 '4', 1491 ], 1492 'TZ' => [ 1493 '0', 1494 '4', 1495 '4', 1496 '4', 1497 ], 1498 'TZS' => [ 1499 '0', 1500 '4', 1501 '4', 1502 '4', 1503 ], 1504 'U' => [ 1505 '1', 1506 '0', 1507 '', 1508 '', 1509 ], 1510 'Ù' => [ 1511 '1', 1512 '0', 1513 '', 1514 '', 1515 ], 1516 'Ú' => [ 1517 '1', 1518 '0', 1519 '', 1520 '', 1521 ], 1522 'Û' => [ 1523 '1', 1524 '0', 1525 '', 1526 '', 1527 ], 1528 'Ü' => [ 1529 '1', 1530 '0', 1531 '', 1532 '', 1533 ], 1534 'Ũ' => [ 1535 '1', 1536 '0', 1537 '', 1538 '', 1539 ], 1540 'Ū' => [ 1541 '1', 1542 '0', 1543 '', 1544 '', 1545 ], 1546 'Ů' => [ 1547 '1', 1548 '0', 1549 '', 1550 '', 1551 ], 1552 'Ű' => [ 1553 '1', 1554 '0', 1555 '', 1556 '', 1557 ], 1558 'Ų' => [ 1559 '1', 1560 '0', 1561 '', 1562 '', 1563 ], 1564 'Ư' => [ 1565 '1', 1566 '0', 1567 '', 1568 '', 1569 ], 1570 'Ụ' => [ 1571 '1', 1572 '0', 1573 '', 1574 '', 1575 ], 1576 'Ủ' => [ 1577 '1', 1578 '0', 1579 '', 1580 '', 1581 ], 1582 'Ứ' => [ 1583 '1', 1584 '0', 1585 '', 1586 '', 1587 ], 1588 'Ừ' => [ 1589 '1', 1590 '0', 1591 '', 1592 '', 1593 ], 1594 'Ử' => [ 1595 '1', 1596 '0', 1597 '', 1598 '', 1599 ], 1600 'Ữ' => [ 1601 '1', 1602 '0', 1603 '', 1604 '', 1605 ], 1606 'Ự' => [ 1607 '1', 1608 '0', 1609 '', 1610 '', 1611 ], 1612 'UE' => [ 1613 '1', 1614 '0', 1615 '', 1616 '', 1617 ], 1618 'UI' => [ 1619 '1', 1620 '0', 1621 '1', 1622 '', 1623 ], 1624 'UJ' => [ 1625 '1', 1626 '0', 1627 '1', 1628 '', 1629 ], 1630 'UY' => [ 1631 '1', 1632 '0', 1633 '1', 1634 '', 1635 ], 1636 'UW' => [ 1637 '1', 1638 '0', 1639 '1', 1640 '', 1641 '0', 1642 '7', 1643 '7', 1644 ], 1645 'V' => [ 1646 '0', 1647 '7', 1648 '7', 1649 '7', 1650 ], 1651 'W' => [ 1652 '0', 1653 '7', 1654 '7', 1655 '7', 1656 ], 1657 'X' => [ 1658 '0', 1659 '5', 1660 '54', 1661 '54', 1662 ], 1663 'Y' => [ 1664 '1', 1665 '1', 1666 '', 1667 '', 1668 ], 1669 'Ý' => [ 1670 '1', 1671 '1', 1672 '', 1673 '', 1674 ], 1675 'Ỳ' => [ 1676 '1', 1677 '1', 1678 '', 1679 '', 1680 ], 1681 'Ỵ' => [ 1682 '1', 1683 '1', 1684 '', 1685 '', 1686 ], 1687 'Ỷ' => [ 1688 '1', 1689 '1', 1690 '', 1691 '', 1692 ], 1693 'Ỹ' => [ 1694 '1', 1695 '1', 1696 '', 1697 '', 1698 ], 1699 'Z' => [ 1700 '0', 1701 '4', 1702 '4', 1703 '4', 1704 ], 1705 'Ź' => [ 1706 '0', 1707 '4', 1708 '4', 1709 '4', 1710 ], 1711 'Ż' => [ 1712 '0', 1713 '4', 1714 '4', 1715 '4', 1716 ], 1717 'Ž' => [ 1718 '0', 1719 '4', 1720 '4', 1721 '4', 1722 ], 1723 'ZD' => [ 1724 '0', 1725 '2', 1726 '43', 1727 '43', 1728 ], 1729 'ZDZ' => [ 1730 '0', 1731 '2', 1732 '4', 1733 '4', 1734 ], 1735 'ZDZH' => [ 1736 '0', 1737 '2', 1738 '4', 1739 '4', 1740 ], 1741 'ZH' => [ 1742 '0', 1743 '4', 1744 '4', 1745 '4', 1746 ], 1747 'ZHD' => [ 1748 '0', 1749 '2', 1750 '43', 1751 '43', 1752 ], 1753 'ZHDZH' => [ 1754 '0', 1755 '2', 1756 '4', 1757 '4', 1758 ], 1759 'ZS' => [ 1760 '0', 1761 '4', 1762 '4', 1763 '4', 1764 ], 1765 'ZSCH' => [ 1766 '0', 1767 '4', 1768 '4', 1769 '4', 1770 ], 1771 'ZSH' => [ 1772 '0', 1773 '4', 1774 '4', 1775 '4', 1776 ], 1777 'ZZS' => [ 1778 '0', 1779 '4', 1780 '4', 1781 '4', 1782 ], 1783 // Cyrillic alphabet 1784 'А' => [ 1785 '1', 1786 '0', 1787 '', 1788 '', 1789 ], 1790 'Б' => [ 1791 '0', 1792 '7', 1793 '7', 1794 '7', 1795 ], 1796 'В' => [ 1797 '0', 1798 '7', 1799 '7', 1800 '7', 1801 ], 1802 'Г' => [ 1803 '0', 1804 '5', 1805 '5', 1806 '5', 1807 ], 1808 'Д' => [ 1809 '0', 1810 '3', 1811 '3', 1812 '3', 1813 ], 1814 'ДЗ' => [ 1815 '0', 1816 '4', 1817 '4', 1818 '4', 1819 ], 1820 'Е' => [ 1821 '1', 1822 '0', 1823 '', 1824 '', 1825 ], 1826 'Ё' => [ 1827 '1', 1828 '0', 1829 '', 1830 '', 1831 ], 1832 'Ж' => [ 1833 '0', 1834 '4', 1835 '4', 1836 '4', 1837 ], 1838 'З' => [ 1839 '0', 1840 '4', 1841 '4', 1842 '4', 1843 ], 1844 'И' => [ 1845 '1', 1846 '0', 1847 '', 1848 '', 1849 ], 1850 'Й' => [ 1851 '1', 1852 '1', 1853 '', 1854 '', 1855 '4', 1856 '4', 1857 '4', 1858 ], 1859 'К' => [ 1860 '0', 1861 '5', 1862 '5', 1863 '5', 1864 ], 1865 'Л' => [ 1866 '0', 1867 '8', 1868 '8', 1869 '8', 1870 ], 1871 'М' => [ 1872 '0', 1873 '6', 1874 '6', 1875 '6', 1876 ], 1877 'Н' => [ 1878 '0', 1879 '6', 1880 '6', 1881 '6', 1882 ], 1883 'О' => [ 1884 '1', 1885 '0', 1886 '', 1887 '', 1888 ], 1889 'П' => [ 1890 '0', 1891 '7', 1892 '7', 1893 '7', 1894 ], 1895 'Р' => [ 1896 '0', 1897 '9', 1898 '9', 1899 '9', 1900 ], 1901 'РЖ' => [ 1902 '0', 1903 '4', 1904 '4', 1905 '4', 1906 ], 1907 'С' => [ 1908 '0', 1909 '4', 1910 '4', 1911 '4', 1912 ], 1913 'Т' => [ 1914 '0', 1915 '3', 1916 '3', 1917 '3', 1918 ], 1919 'У' => [ 1920 '1', 1921 '0', 1922 '', 1923 '', 1924 ], 1925 'Ф' => [ 1926 '0', 1927 '7', 1928 '7', 1929 '7', 1930 ], 1931 'Х' => [ 1932 '0', 1933 '5', 1934 '5', 1935 '5', 1936 ], 1937 'Ц' => [ 1938 '0', 1939 '4', 1940 '4', 1941 '4', 1942 ], 1943 'Ч' => [ 1944 '0', 1945 '4', 1946 '4', 1947 '4', 1948 ], 1949 'Ш' => [ 1950 '0', 1951 '4', 1952 '4', 1953 '4', 1954 ], 1955 'Щ' => [ 1956 '0', 1957 '2', 1958 '4', 1959 '4', 1960 ], 1961 'Ъ' => [ 1962 '0', 1963 '', 1964 '', 1965 '', 1966 ], 1967 'Ы' => [ 1968 '0', 1969 '1', 1970 '', 1971 '', 1972 ], 1973 'Ь' => [ 1974 '0', 1975 '', 1976 '', 1977 '', 1978 ], 1979 'Э' => [ 1980 '1', 1981 '0', 1982 '', 1983 '', 1984 ], 1985 'Ю' => [ 1986 '0', 1987 '1', 1988 '', 1989 '', 1990 ], 1991 'Я' => [ 1992 '0', 1993 '1', 1994 '', 1995 '', 1996 ], 1997 // Greek alphabet 1998 'Α' => [ 1999 '1', 2000 '0', 2001 '', 2002 '', 2003 ], 2004 'Ά' => [ 2005 '1', 2006 '0', 2007 '', 2008 '', 2009 ], 2010 'ΑΙ' => [ 2011 '1', 2012 '0', 2013 '1', 2014 '', 2015 ], 2016 'ΑΥ' => [ 2017 '1', 2018 '0', 2019 '1', 2020 '', 2021 ], 2022 'Β' => [ 2023 '0', 2024 '7', 2025 '7', 2026 '7', 2027 ], 2028 'Γ' => [ 2029 '0', 2030 '5', 2031 '5', 2032 '5', 2033 ], 2034 'Δ' => [ 2035 '0', 2036 '3', 2037 '3', 2038 '3', 2039 ], 2040 'Ε' => [ 2041 '1', 2042 '0', 2043 '', 2044 '', 2045 ], 2046 'Έ' => [ 2047 '1', 2048 '0', 2049 '', 2050 '', 2051 ], 2052 'ΕΙ' => [ 2053 '1', 2054 '0', 2055 '1', 2056 '', 2057 ], 2058 'ΕΥ' => [ 2059 '1', 2060 '1', 2061 '1', 2062 '', 2063 ], 2064 'Ζ' => [ 2065 '0', 2066 '4', 2067 '4', 2068 '4', 2069 ], 2070 'Η' => [ 2071 '1', 2072 '0', 2073 '', 2074 '', 2075 ], 2076 'Ή' => [ 2077 '1', 2078 '0', 2079 '', 2080 '', 2081 ], 2082 'Θ' => [ 2083 '0', 2084 '3', 2085 '3', 2086 '3', 2087 ], 2088 'Ι' => [ 2089 '1', 2090 '0', 2091 '', 2092 '', 2093 ], 2094 'Ί' => [ 2095 '1', 2096 '0', 2097 '', 2098 '', 2099 ], 2100 'Ϊ' => [ 2101 '1', 2102 '0', 2103 '', 2104 '', 2105 ], 2106 'ΐ' => [ 2107 '1', 2108 '0', 2109 '', 2110 '', 2111 ], 2112 'Κ' => [ 2113 '0', 2114 '5', 2115 '5', 2116 '5', 2117 ], 2118 'Λ' => [ 2119 '0', 2120 '8', 2121 '8', 2122 '8', 2123 ], 2124 'Μ' => [ 2125 '0', 2126 '6', 2127 '6', 2128 '6', 2129 ], 2130 'ΜΠ' => [ 2131 '0', 2132 '7', 2133 '7', 2134 '7', 2135 ], 2136 'Ν' => [ 2137 '0', 2138 '6', 2139 '6', 2140 '6', 2141 ], 2142 'ΝΤ' => [ 2143 '0', 2144 '3', 2145 '3', 2146 '3', 2147 ], 2148 'Ξ' => [ 2149 '0', 2150 '5', 2151 '54', 2152 '54', 2153 ], 2154 'Ο' => [ 2155 '1', 2156 '0', 2157 '', 2158 '', 2159 ], 2160 'Ό' => [ 2161 '1', 2162 '0', 2163 '', 2164 '', 2165 ], 2166 'ΟΙ' => [ 2167 '1', 2168 '0', 2169 '1', 2170 '', 2171 ], 2172 'ΟΥ' => [ 2173 '1', 2174 '0', 2175 '1', 2176 '', 2177 ], 2178 'Π' => [ 2179 '0', 2180 '7', 2181 '7', 2182 '7', 2183 ], 2184 'Ρ' => [ 2185 '0', 2186 '9', 2187 '9', 2188 '9', 2189 ], 2190 'Σ' => [ 2191 '0', 2192 '4', 2193 '4', 2194 '4', 2195 ], 2196 'ς' => [ 2197 '0', 2198 '', 2199 '', 2200 '4', 2201 ], 2202 'Τ' => [ 2203 '0', 2204 '3', 2205 '3', 2206 '3', 2207 ], 2208 'ΤΖ' => [ 2209 '0', 2210 '4', 2211 '4', 2212 '4', 2213 ], 2214 'ΤΣ' => [ 2215 '0', 2216 '4', 2217 '4', 2218 '4', 2219 ], 2220 'Υ' => [ 2221 '1', 2222 '1', 2223 '', 2224 '', 2225 ], 2226 'Ύ' => [ 2227 '1', 2228 '1', 2229 '', 2230 '', 2231 ], 2232 'Ϋ' => [ 2233 '1', 2234 '1', 2235 '', 2236 '', 2237 ], 2238 'ΰ' => [ 2239 '1', 2240 '1', 2241 '', 2242 '', 2243 ], 2244 'ΥΚ' => [ 2245 '1', 2246 '5', 2247 '5', 2248 '5', 2249 ], 2250 'ΥΥ' => [ 2251 '1', 2252 '65', 2253 '65', 2254 '65', 2255 ], 2256 'Φ' => [ 2257 '0', 2258 '7', 2259 '7', 2260 '7', 2261 ], 2262 'Χ' => [ 2263 '0', 2264 '5', 2265 '5', 2266 '5', 2267 ], 2268 'Ψ' => [ 2269 '0', 2270 '7', 2271 '7', 2272 '7', 2273 ], 2274 'Ω' => [ 2275 '1', 2276 '0', 2277 '', 2278 '', 2279 ], 2280 'Ώ' => [ 2281 '1', 2282 '0', 2283 '', 2284 '', 2285 ], 2286 // Hebrew alphabet 2287 'א' => [ 2288 '1', 2289 '0', 2290 '', 2291 '', 2292 ], 2293 'או' => [ 2294 '1', 2295 '0', 2296 '7', 2297 '', 2298 ], 2299 'אג' => [ 2300 '1', 2301 '4', 2302 '4', 2303 '4', 2304 '5', 2305 '5', 2306 '5', 2307 '34', 2308 '34', 2309 '34', 2310 ], 2311 'בב' => [ 2312 '0', 2313 '7', 2314 '7', 2315 '7', 2316 '77', 2317 '77', 2318 '77', 2319 ], 2320 'ב' => [ 2321 '0', 2322 '7', 2323 '7', 2324 '7', 2325 ], 2326 'גג' => [ 2327 '0', 2328 '4', 2329 '4', 2330 '4', 2331 '5', 2332 '5', 2333 '5', 2334 '45', 2335 '45', 2336 '45', 2337 '55', 2338 '55', 2339 '55', 2340 '54', 2341 '54', 2342 '54', 2343 ], 2344 'גד' => [ 2345 '0', 2346 '43', 2347 '43', 2348 '43', 2349 '53', 2350 '53', 2351 '53', 2352 ], 2353 'גה' => [ 2354 '0', 2355 '45', 2356 '45', 2357 '45', 2358 '55', 2359 '55', 2360 '55', 2361 ], 2362 'גז' => [ 2363 '0', 2364 '44', 2365 '44', 2366 '44', 2367 '45', 2368 '45', 2369 '45', 2370 ], 2371 'גח' => [ 2372 '0', 2373 '45', 2374 '45', 2375 '45', 2376 '55', 2377 '55', 2378 '55', 2379 ], 2380 'גכ' => [ 2381 '0', 2382 '45', 2383 '45', 2384 '45', 2385 '55', 2386 '55', 2387 '55', 2388 ], 2389 'גך' => [ 2390 '0', 2391 '45', 2392 '45', 2393 '45', 2394 '55', 2395 '55', 2396 '55', 2397 ], 2398 'גצ' => [ 2399 '0', 2400 '44', 2401 '44', 2402 '44', 2403 '45', 2404 '45', 2405 '45', 2406 ], 2407 'גץ' => [ 2408 '0', 2409 '44', 2410 '44', 2411 '44', 2412 '45', 2413 '45', 2414 '45', 2415 ], 2416 'גק' => [ 2417 '0', 2418 '45', 2419 '45', 2420 '45', 2421 '54', 2422 '54', 2423 '54', 2424 ], 2425 'גש' => [ 2426 '0', 2427 '44', 2428 '44', 2429 '44', 2430 '54', 2431 '54', 2432 '54', 2433 ], 2434 'גת' => [ 2435 '0', 2436 '43', 2437 '43', 2438 '43', 2439 '53', 2440 '53', 2441 '53', 2442 ], 2443 'ג' => [ 2444 '0', 2445 '4', 2446 '4', 2447 '4', 2448 '5', 2449 '5', 2450 '5', 2451 ], 2452 'דז' => [ 2453 '0', 2454 '4', 2455 '4', 2456 '4', 2457 ], 2458 'דד' => [ 2459 '0', 2460 '3', 2461 '3', 2462 '3', 2463 '33', 2464 '33', 2465 '33', 2466 ], 2467 'דט' => [ 2468 '0', 2469 '33', 2470 '33', 2471 '33', 2472 ], 2473 'דש' => [ 2474 '0', 2475 '4', 2476 '4', 2477 '4', 2478 ], 2479 'דצ' => [ 2480 '0', 2481 '4', 2482 '4', 2483 '4', 2484 ], 2485 'דץ' => [ 2486 '0', 2487 '4', 2488 '4', 2489 '4', 2490 ], 2491 'ד' => [ 2492 '0', 2493 '3', 2494 '3', 2495 '3', 2496 ], 2497 'הג' => [ 2498 '0', 2499 '54', 2500 '54', 2501 '54', 2502 '55', 2503 '55', 2504 '55', 2505 ], 2506 'הכ' => [ 2507 '0', 2508 '55', 2509 '55', 2510 '55', 2511 ], 2512 'הח' => [ 2513 '0', 2514 '55', 2515 '55', 2516 '55', 2517 ], 2518 'הק' => [ 2519 '0', 2520 '55', 2521 '55', 2522 '55', 2523 '5', 2524 '5', 2525 '5', 2526 ], 2527 'הה' => [ 2528 '0', 2529 '5', 2530 '5', 2531 '', 2532 '55', 2533 '55', 2534 '', 2535 ], 2536 'ה' => [ 2537 '0', 2538 '5', 2539 '5', 2540 '', 2541 ], 2542 'וי' => [ 2543 '1', 2544 '', 2545 '', 2546 '', 2547 '7', 2548 '7', 2549 '7', 2550 ], 2551 'ו' => [ 2552 '1', 2553 '7', 2554 '7', 2555 '7', 2556 '7', 2557 '', 2558 '', 2559 ], 2560 'וו' => [ 2561 '1', 2562 '7', 2563 '7', 2564 '7', 2565 '7', 2566 '', 2567 '', 2568 ], 2569 'וופ' => [ 2570 '1', 2571 '7', 2572 '7', 2573 '7', 2574 '77', 2575 '77', 2576 '77', 2577 ], 2578 'זש' => [ 2579 '0', 2580 '4', 2581 '4', 2582 '4', 2583 '44', 2584 '44', 2585 '44', 2586 ], 2587 'זדז' => [ 2588 '0', 2589 '2', 2590 '4', 2591 '4', 2592 ], 2593 'ז' => [ 2594 '0', 2595 '4', 2596 '4', 2597 '4', 2598 ], 2599 'זג' => [ 2600 '0', 2601 '44', 2602 '44', 2603 '44', 2604 '45', 2605 '45', 2606 '45', 2607 ], 2608 'זז' => [ 2609 '0', 2610 '4', 2611 '4', 2612 '4', 2613 '44', 2614 '44', 2615 '44', 2616 ], 2617 'זס' => [ 2618 '0', 2619 '44', 2620 '44', 2621 '44', 2622 ], 2623 'זצ' => [ 2624 '0', 2625 '44', 2626 '44', 2627 '44', 2628 ], 2629 'זץ' => [ 2630 '0', 2631 '44', 2632 '44', 2633 '44', 2634 ], 2635 'חג' => [ 2636 '0', 2637 '54', 2638 '54', 2639 '54', 2640 '53', 2641 '53', 2642 '53', 2643 ], 2644 'חח' => [ 2645 '0', 2646 '5', 2647 '5', 2648 '5', 2649 '55', 2650 '55', 2651 '55', 2652 ], 2653 'חק' => [ 2654 '0', 2655 '55', 2656 '55', 2657 '55', 2658 '5', 2659 '5', 2660 '5', 2661 ], 2662 'חכ' => [ 2663 '0', 2664 '45', 2665 '45', 2666 '45', 2667 '55', 2668 '55', 2669 '55', 2670 ], 2671 'חס' => [ 2672 '0', 2673 '5', 2674 '54', 2675 '54', 2676 ], 2677 'חש' => [ 2678 '0', 2679 '5', 2680 '54', 2681 '54', 2682 ], 2683 'ח' => [ 2684 '0', 2685 '5', 2686 '5', 2687 '5', 2688 ], 2689 'טש' => [ 2690 '0', 2691 '4', 2692 '4', 2693 '4', 2694 ], 2695 'טד' => [ 2696 '0', 2697 '33', 2698 '33', 2699 '33', 2700 ], 2701 'טי' => [ 2702 '0', 2703 '3', 2704 '3', 2705 '3', 2706 '4', 2707 '4', 2708 '4', 2709 '3', 2710 '3', 2711 '34', 2712 ], 2713 'טת' => [ 2714 '0', 2715 '33', 2716 '33', 2717 '33', 2718 ], 2719 'טט' => [ 2720 '0', 2721 '3', 2722 '3', 2723 '3', 2724 '33', 2725 '33', 2726 '33', 2727 ], 2728 'ט' => [ 2729 '0', 2730 '3', 2731 '3', 2732 '3', 2733 ], 2734 'י' => [ 2735 '1', 2736 '1', 2737 '', 2738 '', 2739 ], 2740 'יא' => [ 2741 '1', 2742 '1', 2743 '', 2744 '', 2745 '1', 2746 '1', 2747 '1', 2748 ], 2749 'כג' => [ 2750 '0', 2751 '55', 2752 '55', 2753 '55', 2754 '54', 2755 '54', 2756 '54', 2757 ], 2758 'כש' => [ 2759 '0', 2760 '5', 2761 '54', 2762 '54', 2763 ], 2764 'כס' => [ 2765 '0', 2766 '5', 2767 '54', 2768 '54', 2769 ], 2770 'ככ' => [ 2771 '0', 2772 '5', 2773 '5', 2774 '5', 2775 '55', 2776 '55', 2777 '55', 2778 ], 2779 'כך' => [ 2780 '0', 2781 '5', 2782 '5', 2783 '5', 2784 '55', 2785 '55', 2786 '55', 2787 ], 2788 'כ' => [ 2789 '0', 2790 '5', 2791 '5', 2792 '5', 2793 ], 2794 'כח' => [ 2795 '0', 2796 '55', 2797 '55', 2798 '55', 2799 '5', 2800 '5', 2801 '5', 2802 ], 2803 'ך' => [ 2804 '0', 2805 '', 2806 '5', 2807 '5', 2808 ], 2809 'ל' => [ 2810 '0', 2811 '8', 2812 '8', 2813 '8', 2814 ], 2815 'לל' => [ 2816 '0', 2817 '88', 2818 '88', 2819 '88', 2820 '8', 2821 '8', 2822 '8', 2823 ], 2824 'מנ' => [ 2825 '0', 2826 '66', 2827 '66', 2828 '66', 2829 ], 2830 'מן' => [ 2831 '0', 2832 '66', 2833 '66', 2834 '66', 2835 ], 2836 'ממ' => [ 2837 '0', 2838 '6', 2839 '6', 2840 '6', 2841 '66', 2842 '66', 2843 '66', 2844 ], 2845 'מם' => [ 2846 '0', 2847 '6', 2848 '6', 2849 '6', 2850 '66', 2851 '66', 2852 '66', 2853 ], 2854 'מ' => [ 2855 '0', 2856 '6', 2857 '6', 2858 '6', 2859 ], 2860 'ם' => [ 2861 '0', 2862 '', 2863 '6', 2864 '6', 2865 ], 2866 'נמ' => [ 2867 '0', 2868 '66', 2869 '66', 2870 '66', 2871 ], 2872 'נם' => [ 2873 '0', 2874 '66', 2875 '66', 2876 '66', 2877 ], 2878 'ננ' => [ 2879 '0', 2880 '6', 2881 '6', 2882 '6', 2883 '66', 2884 '66', 2885 '66', 2886 ], 2887 'נן' => [ 2888 '0', 2889 '6', 2890 '6', 2891 '6', 2892 '66', 2893 '66', 2894 '66', 2895 ], 2896 'נ' => [ 2897 '0', 2898 '6', 2899 '6', 2900 '6', 2901 ], 2902 'ן' => [ 2903 '0', 2904 '', 2905 '6', 2906 '6', 2907 ], 2908 'סתש' => [ 2909 '0', 2910 '2', 2911 '4', 2912 '4', 2913 ], 2914 'סתז' => [ 2915 '0', 2916 '2', 2917 '4', 2918 '4', 2919 ], 2920 'סטז' => [ 2921 '0', 2922 '2', 2923 '4', 2924 '4', 2925 ], 2926 'סטש' => [ 2927 '0', 2928 '2', 2929 '4', 2930 '4', 2931 ], 2932 'סצד' => [ 2933 '0', 2934 '2', 2935 '4', 2936 '4', 2937 ], 2938 'סט' => [ 2939 '0', 2940 '2', 2941 '4', 2942 '4', 2943 '43', 2944 '43', 2945 '43', 2946 ], 2947 'סת' => [ 2948 '0', 2949 '2', 2950 '4', 2951 '4', 2952 '43', 2953 '43', 2954 '43', 2955 ], 2956 'סג' => [ 2957 '0', 2958 '44', 2959 '44', 2960 '44', 2961 '4', 2962 '4', 2963 '4', 2964 ], 2965 'סס' => [ 2966 '0', 2967 '4', 2968 '4', 2969 '4', 2970 '44', 2971 '44', 2972 '44', 2973 ], 2974 'סצ' => [ 2975 '0', 2976 '44', 2977 '44', 2978 '44', 2979 ], 2980 'סץ' => [ 2981 '0', 2982 '44', 2983 '44', 2984 '44', 2985 ], 2986 'סז' => [ 2987 '0', 2988 '44', 2989 '44', 2990 '44', 2991 ], 2992 'סש' => [ 2993 '0', 2994 '44', 2995 '44', 2996 '44', 2997 ], 2998 'ס' => [ 2999 '0', 3000 '4', 3001 '4', 3002 '4', 3003 ], 3004 'ע' => [ 3005 '1', 3006 '0', 3007 '', 3008 '', 3009 ], 3010 'פב' => [ 3011 '0', 3012 '7', 3013 '7', 3014 '7', 3015 '77', 3016 '77', 3017 '77', 3018 ], 3019 'פוו' => [ 3020 '0', 3021 '7', 3022 '7', 3023 '7', 3024 '77', 3025 '77', 3026 '77', 3027 ], 3028 'פפ' => [ 3029 '0', 3030 '7', 3031 '7', 3032 '7', 3033 '77', 3034 '77', 3035 '77', 3036 ], 3037 'פף' => [ 3038 '0', 3039 '7', 3040 '7', 3041 '7', 3042 '77', 3043 '77', 3044 '77', 3045 ], 3046 'פ' => [ 3047 '0', 3048 '7', 3049 '7', 3050 '7', 3051 ], 3052 'ף' => [ 3053 '0', 3054 '', 3055 '7', 3056 '7', 3057 ], 3058 'צג' => [ 3059 '0', 3060 '44', 3061 '44', 3062 '44', 3063 '45', 3064 '45', 3065 '45', 3066 ], 3067 'צז' => [ 3068 '0', 3069 '44', 3070 '44', 3071 '44', 3072 ], 3073 'צס' => [ 3074 '0', 3075 '44', 3076 '44', 3077 '44', 3078 ], 3079 'צצ' => [ 3080 '0', 3081 '4', 3082 '4', 3083 '4', 3084 '5', 3085 '5', 3086 '5', 3087 '44', 3088 '44', 3089 '44', 3090 '54', 3091 '54', 3092 '54', 3093 '45', 3094 '45', 3095 '45', 3096 ], 3097 'צץ' => [ 3098 '0', 3099 '4', 3100 '4', 3101 '4', 3102 '5', 3103 '5', 3104 '5', 3105 '44', 3106 '44', 3107 '44', 3108 '54', 3109 '54', 3110 '54', 3111 ], 3112 'צש' => [ 3113 '0', 3114 '44', 3115 '44', 3116 '44', 3117 '4', 3118 '4', 3119 '4', 3120 '5', 3121 '5', 3122 '5', 3123 ], 3124 'צ' => [ 3125 '0', 3126 '4', 3127 '4', 3128 '4', 3129 '5', 3130 '5', 3131 '5', 3132 ], 3133 'ץ' => [ 3134 '0', 3135 '', 3136 '4', 3137 '4', 3138 ], 3139 'קה' => [ 3140 '0', 3141 '55', 3142 '55', 3143 '5', 3144 ], 3145 'קס' => [ 3146 '0', 3147 '5', 3148 '54', 3149 '54', 3150 ], 3151 'קש' => [ 3152 '0', 3153 '5', 3154 '54', 3155 '54', 3156 ], 3157 'קק' => [ 3158 '0', 3159 '5', 3160 '5', 3161 '5', 3162 '55', 3163 '55', 3164 '55', 3165 ], 3166 'קח' => [ 3167 '0', 3168 '55', 3169 '55', 3170 '55', 3171 ], 3172 'קכ' => [ 3173 '0', 3174 '55', 3175 '55', 3176 '55', 3177 ], 3178 'קך' => [ 3179 '0', 3180 '55', 3181 '55', 3182 '55', 3183 ], 3184 'קג' => [ 3185 '0', 3186 '55', 3187 '55', 3188 '55', 3189 '54', 3190 '54', 3191 '54', 3192 ], 3193 'ק' => [ 3194 '0', 3195 '5', 3196 '5', 3197 '5', 3198 ], 3199 'רר' => [ 3200 '0', 3201 '99', 3202 '99', 3203 '99', 3204 '9', 3205 '9', 3206 '9', 3207 ], 3208 'ר' => [ 3209 '0', 3210 '9', 3211 '9', 3212 '9', 3213 ], 3214 'שטז' => [ 3215 '0', 3216 '2', 3217 '4', 3218 '4', 3219 ], 3220 'שתש' => [ 3221 '0', 3222 '2', 3223 '4', 3224 '4', 3225 ], 3226 'שתז' => [ 3227 '0', 3228 '2', 3229 '4', 3230 '4', 3231 ], 3232 'שטש' => [ 3233 '0', 3234 '2', 3235 '4', 3236 '4', 3237 ], 3238 'שד' => [ 3239 '0', 3240 '2', 3241 '43', 3242 '43', 3243 ], 3244 'שז' => [ 3245 '0', 3246 '44', 3247 '44', 3248 '44', 3249 ], 3250 'שס' => [ 3251 '0', 3252 '44', 3253 '44', 3254 '44', 3255 ], 3256 'שת' => [ 3257 '0', 3258 '2', 3259 '43', 3260 '43', 3261 ], 3262 'שג' => [ 3263 '0', 3264 '4', 3265 '4', 3266 '4', 3267 '44', 3268 '44', 3269 '44', 3270 '4', 3271 '43', 3272 '43', 3273 ], 3274 'שט' => [ 3275 '0', 3276 '2', 3277 '43', 3278 '43', 3279 '44', 3280 '44', 3281 '44', 3282 ], 3283 'שצ' => [ 3284 '0', 3285 '44', 3286 '44', 3287 '44', 3288 '45', 3289 '45', 3290 '45', 3291 ], 3292 'שץ' => [ 3293 '0', 3294 '44', 3295 '', 3296 '44', 3297 '45', 3298 '', 3299 '45', 3300 ], 3301 'שש' => [ 3302 '0', 3303 '4', 3304 '4', 3305 '4', 3306 '44', 3307 '44', 3308 '44', 3309 ], 3310 'ש' => [ 3311 '0', 3312 '4', 3313 '4', 3314 '4', 3315 ], 3316 'תג' => [ 3317 '0', 3318 '34', 3319 '34', 3320 '34', 3321 ], 3322 'תז' => [ 3323 '0', 3324 '34', 3325 '34', 3326 '34', 3327 ], 3328 'תש' => [ 3329 '0', 3330 '4', 3331 '4', 3332 '4', 3333 ], 3334 'תת' => [ 3335 '0', 3336 '3', 3337 '3', 3338 '3', 3339 '4', 3340 '4', 3341 '4', 3342 '33', 3343 '33', 3344 '33', 3345 '44', 3346 '44', 3347 '44', 3348 '34', 3349 '34', 3350 '34', 3351 '43', 3352 '43', 3353 '43', 3354 ], 3355 'ת' => [ 3356 '0', 3357 '3', 3358 '3', 3359 '3', 3360 '4', 3361 '4', 3362 '4', 3363 ], 3364 // Arabic alphabet 3365 'ا' => [ 3366 '1', 3367 '0', 3368 '', 3369 '', 3370 ], 3371 'ب' => [ 3372 '0', 3373 '7', 3374 '7', 3375 '7', 3376 ], 3377 'ت' => [ 3378 '0', 3379 '3', 3380 '3', 3381 '3', 3382 ], 3383 'ث' => [ 3384 '0', 3385 '3', 3386 '3', 3387 '3', 3388 ], 3389 'ج' => [ 3390 '0', 3391 '4', 3392 '4', 3393 '4', 3394 ], 3395 'ح' => [ 3396 '0', 3397 '5', 3398 '5', 3399 '5', 3400 ], 3401 'خ' => [ 3402 '0', 3403 '5', 3404 '5', 3405 '5', 3406 ], 3407 'د' => [ 3408 '0', 3409 '3', 3410 '3', 3411 '3', 3412 ], 3413 'ذ' => [ 3414 '0', 3415 '3', 3416 '3', 3417 '3', 3418 ], 3419 'ر' => [ 3420 '0', 3421 '9', 3422 '9', 3423 '9', 3424 ], 3425 'ز' => [ 3426 '0', 3427 '4', 3428 '4', 3429 '4', 3430 ], 3431 'س' => [ 3432 '0', 3433 '4', 3434 '4', 3435 '4', 3436 ], 3437 'ش' => [ 3438 '0', 3439 '4', 3440 '4', 3441 '4', 3442 ], 3443 'ص' => [ 3444 '0', 3445 '4', 3446 '4', 3447 '4', 3448 ], 3449 'ض' => [ 3450 '0', 3451 '3', 3452 '3', 3453 '3', 3454 ], 3455 'ط' => [ 3456 '0', 3457 '3', 3458 '3', 3459 '3', 3460 ], 3461 'ظ' => [ 3462 '0', 3463 '4', 3464 '4', 3465 '4', 3466 ], 3467 'ع' => [ 3468 '1', 3469 '0', 3470 '', 3471 '', 3472 ], 3473 'غ' => [ 3474 '0', 3475 '0', 3476 '', 3477 '', 3478 ], 3479 'ف' => [ 3480 '0', 3481 '7', 3482 '7', 3483 '7', 3484 ], 3485 'ق' => [ 3486 '0', 3487 '5', 3488 '5', 3489 '5', 3490 ], 3491 'ك' => [ 3492 '0', 3493 '5', 3494 '5', 3495 '5', 3496 ], 3497 'ل' => [ 3498 '0', 3499 '8', 3500 '8', 3501 '8', 3502 ], 3503 'لا' => [ 3504 '0', 3505 '8', 3506 '8', 3507 '8', 3508 ], 3509 'م' => [ 3510 '0', 3511 '6', 3512 '6', 3513 '6', 3514 ], 3515 'ن' => [ 3516 '0', 3517 '6', 3518 '6', 3519 '6', 3520 ], 3521 'هن' => [ 3522 '0', 3523 '66', 3524 '66', 3525 '66', 3526 ], 3527 'ه' => [ 3528 '0', 3529 '5', 3530 '5', 3531 '', 3532 ], 3533 'و' => [ 3534 '1', 3535 '', 3536 '', 3537 '', 3538 '7', 3539 '', 3540 '', 3541 ], 3542 'ي' => [ 3543 '0', 3544 '1', 3545 '', 3546 '', 3547 ], 3548 'آ' => [ 3549 '0', 3550 '1', 3551 '', 3552 '', 3553 ], 3554 'ة' => [ 3555 '0', 3556 '', 3557 '', 3558 '3', 3559 ], 3560 'ی' => [ 3561 '0', 3562 '1', 3563 '', 3564 '', 3565 ], 3566 'ى' => [ 3567 '1', 3568 '1', 3569 '', 3570 '', 3571 ], 3572 ]; 3573 3574 /** 3575 * Calculate the Daitch-Mokotoff soundex for a word. 3576 * 3577 * @param string $name 3578 * 3579 * @return string[] List of possible DM codes for the word. 3580 */ 3581 private static function daitchMokotoffWord($name) 3582 { 3583 // Apply special transformation rules to the input string 3584 $name = I18N::strtoupper($name); 3585 foreach (self::$transformNameTable as $transformRule) { 3586 $name = str_replace($transformRule[0], $transformRule[1], $name); 3587 } 3588 3589 // Initialize 3590 $name_script = I18N::textScript($name); 3591 $noVowels = ($name_script == 'Hebr' || $name_script == 'Arab'); 3592 3593 $lastPos = strlen($name) - 1; 3594 $currPos = 0; 3595 $state = 1; // 1: start of input string, 2: before vowel, 3: other 3596 $result = []; // accumulate complete 6-digit D-M codes here 3597 $partialResult = []; // accumulate incomplete D-M codes here 3598 $partialResult[] = ['!']; // initialize 1st partial result ('!' stops "duplicate sound" check) 3599 3600 // Loop through the input string. 3601 // Stop when the string is exhausted or when no more partial results remain 3602 while (count($partialResult) !== 0 && $currPos <= $lastPos) { 3603 // Find the DM coding table entry for the chunk at the current position 3604 $thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 3605 while ($thisEntry != '') { 3606 if (isset(self::$dmsounds[$thisEntry])) { 3607 break; 3608 } 3609 $thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk 3610 } 3611 if ($thisEntry === '') { 3612 $currPos++; // Not in table: advance pointer to next byte 3613 continue; // and try again 3614 } 3615 3616 $soundTableEntry = self::$dmsounds[$thisEntry]; 3617 $workingResult = $partialResult; 3618 $partialResult = []; 3619 $currPos += strlen($thisEntry); 3620 3621 // Not at beginning of input string 3622 if ($state != 1) { 3623 if ($currPos <= $lastPos) { 3624 // Determine whether the next chunk is a vowel 3625 $nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 3626 while ($nextEntry != '') { 3627 if (isset(self::$dmsounds[$nextEntry])) { 3628 break; 3629 } 3630 $nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk 3631 } 3632 } else { 3633 $nextEntry = ''; 3634 } 3635 if ($nextEntry != '' && self::$dmsounds[$nextEntry][0] != '0') { 3636 $state = 2; 3637 } else { 3638 // Next chunk is a vowel 3639 $state = 3; 3640 } 3641 } 3642 3643 while ($state < count($soundTableEntry)) { 3644 // empty means 'ignore this sound in this state' 3645 if ($soundTableEntry[$state] == '') { 3646 foreach ($workingResult as $workingEntry) { 3647 $tempEntry = $workingEntry; 3648 $tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles' 3649 $partialResult[] = $tempEntry; 3650 } 3651 } else { 3652 foreach ($workingResult as $workingEntry) { 3653 if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) { 3654 // Incoming sound isn't a duplicate of the previous sound 3655 $workingEntry[] = $soundTableEntry[$state]; 3656 } else { 3657 // Incoming sound is a duplicate of the previous sound 3658 // For Hebrew and Arabic, we need to create a pair of D-M sound codes, 3659 // one of the pair with only a single occurrence of the duplicate sound, 3660 // the other with both occurrences 3661 if ($noVowels) { 3662 $workingEntry[] = $soundTableEntry[$state]; 3663 } 3664 } 3665 if (count($workingEntry) < 7) { 3666 $partialResult[] = $workingEntry; 3667 } else { 3668 // This is the 6th code in the sequence 3669 // We're looking for 7 entries because the first is '!' and doesn't count 3670 $tempResult = str_replace('!', '', implode('', $workingEntry)); 3671 // Only return codes from recognisable sounds 3672 if ($tempResult) { 3673 $result[] = substr($tempResult . '000000', 0, 6); 3674 } 3675 } 3676 } 3677 } 3678 $state = $state + 3; // Advance to next triplet while keeping the same basic state 3679 } 3680 } 3681 3682 // Zero-fill and copy all remaining partial results 3683 foreach ($partialResult as $workingEntry) { 3684 $tempResult = str_replace('!', '', implode('', $workingEntry)); 3685 // Only return codes from recognisable sounds 3686 if ($tempResult) { 3687 $result[] = substr($tempResult . '000000', 0, 6); 3688 } 3689 } 3690 3691 return $result; 3692 } 3693} 3694