1<?php 2/** 3 * webtrees: online genealogy 4 * Copyright (C) 2019 webtrees development team 5 * This program is free software: you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation, either version 3 of the License, or 8 * (at your option) any later version. 9 * This program is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details. 13 * You should have received a copy of the GNU General Public License 14 * along with this program. If not, see <http://www.gnu.org/licenses/>. 15 */ 16declare(strict_types=1); 17 18namespace Fisharebest\Webtrees; 19 20/** 21 * Phonetic matching of strings. 22 */ 23class Soundex 24{ 25 // Determine the Daitch–Mokotoff Soundex code for a word 26 // Original implementation by Gerry Kroll, and analysis by Meliza Amity 27 28 // Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!) 29 private const MAXCHAR = 7; 30 31 /** 32 * Name transformation arrays. 33 * Used to transform the Name string to simplify the "sounds like" table. 34 * This is especially useful in Hebrew. 35 * 36 * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text) 37 * function call to achieve the desired transformations. 38 * 39 * Note about the use of "\x01": 40 * This code, which can’t legitimately occur in the kind of text we're dealing with, 41 * is used as a place-holder so that conditional string replacements can be done. 42 */ 43 private const TRANSFORM_NAMES = [ 44 // Force Yiddish ligatures to be treated as separate letters 45 [ 46 'װ', 47 'וו', 48 ], 49 [ 50 'ײ', 51 'יי', 52 ], 53 [ 54 'ױ', 55 'וי', 56 ], 57 [ 58 'בו', 59 'בע', 60 ], 61 [ 62 'פו', 63 'פע', 64 ], 65 [ 66 'ומ', 67 'עמ', 68 ], 69 [ 70 'ום', 71 'עם', 72 ], 73 [ 74 'ונ', 75 'ענ', 76 ], 77 [ 78 'ון', 79 'ען', 80 ], 81 [ 82 'וו', 83 'ב', 84 ], 85 [ 86 "\x01", 87 '', 88 ], 89 [ 90 'ייה$', 91 "\x01ה", 92 ], 93 [ 94 'ייע$', 95 "\x01ע", 96 ], 97 [ 98 'יי', 99 'ע', 100 ], 101 [ 102 "\x01", 103 'יי', 104 ], 105 ]; 106 107 /** 108 * The DM sound coding table is organized this way: 109 * key: a variable-length string that corresponds to the UTF-8 character sequence 110 * represented by the table entry. Currently, that string can be up to 7 111 * bytes long. This maximum length is defined by the value of global variable 112 * $maxchar. 113 * 114 * value: an array as follows: 115 * [0]: zero if not a vowel 116 * [1]: sound value when this string is at the beginning of the word 117 * [2]: sound value when this string is followed by a vowel 118 * [3]: sound value for other cases 119 * [1],[2],[3] can be repeated several times to create branches in the code 120 * an empty sound value means "ignore in this state" 121 */ 122 private const DM_SOUNDS = [ 123 'A' => [ 124 '1', 125 '0', 126 '', 127 '', 128 ], 129 'À' => [ 130 '1', 131 '0', 132 '', 133 '', 134 ], 135 'Á' => [ 136 '1', 137 '0', 138 '', 139 '', 140 ], 141 'Â' => [ 142 '1', 143 '0', 144 '', 145 '', 146 ], 147 'Ã' => [ 148 '1', 149 '0', 150 '', 151 '', 152 ], 153 'Ä' => [ 154 '1', 155 '0', 156 '1', 157 '', 158 '0', 159 '', 160 '', 161 ], 162 'Å' => [ 163 '1', 164 '0', 165 '', 166 '', 167 ], 168 'Ă' => [ 169 '1', 170 '0', 171 '', 172 '', 173 ], 174 'Ą' => [ 175 '1', 176 '', 177 '', 178 '', 179 '', 180 '', 181 '6', 182 ], 183 'Ạ' => [ 184 '1', 185 '0', 186 '', 187 '', 188 ], 189 'Ả' => [ 190 '1', 191 '0', 192 '', 193 '', 194 ], 195 'Ấ' => [ 196 '1', 197 '0', 198 '', 199 '', 200 ], 201 'Ầ' => [ 202 '1', 203 '0', 204 '', 205 '', 206 ], 207 'Ẩ' => [ 208 '1', 209 '0', 210 '', 211 '', 212 ], 213 'Ẫ' => [ 214 '1', 215 '0', 216 '', 217 '', 218 ], 219 'Ậ' => [ 220 '1', 221 '0', 222 '', 223 '', 224 ], 225 'Ắ' => [ 226 '1', 227 '0', 228 '', 229 '', 230 ], 231 'Ằ' => [ 232 '1', 233 '0', 234 '', 235 '', 236 ], 237 'Ẳ' => [ 238 '1', 239 '0', 240 '', 241 '', 242 ], 243 'Ẵ' => [ 244 '1', 245 '0', 246 '', 247 '', 248 ], 249 'Ặ' => [ 250 '1', 251 '0', 252 '', 253 '', 254 ], 255 'AE' => [ 256 '1', 257 '0', 258 '1', 259 '', 260 ], 261 'Æ' => [ 262 '1', 263 '0', 264 '1', 265 '', 266 ], 267 'AI' => [ 268 '1', 269 '0', 270 '1', 271 '', 272 ], 273 'AJ' => [ 274 '1', 275 '0', 276 '1', 277 '', 278 ], 279 'AU' => [ 280 '1', 281 '0', 282 '7', 283 '', 284 ], 285 'AV' => [ 286 '1', 287 '0', 288 '7', 289 '', 290 '7', 291 '7', 292 '7', 293 ], 294 'ÄU' => [ 295 '1', 296 '0', 297 '1', 298 '', 299 ], 300 'AY' => [ 301 '1', 302 '0', 303 '1', 304 '', 305 ], 306 'B' => [ 307 '0', 308 '7', 309 '7', 310 '7', 311 ], 312 'C' => [ 313 '0', 314 '5', 315 '5', 316 '5', 317 '34', 318 '4', 319 '4', 320 ], 321 'Ć' => [ 322 '0', 323 '4', 324 '4', 325 '4', 326 ], 327 'Č' => [ 328 '0', 329 '4', 330 '4', 331 '4', 332 ], 333 'Ç' => [ 334 '0', 335 '4', 336 '4', 337 '4', 338 ], 339 'CH' => [ 340 '0', 341 '5', 342 '5', 343 '5', 344 '34', 345 '4', 346 '4', 347 ], 348 'CHS' => [ 349 '0', 350 '5', 351 '54', 352 '54', 353 ], 354 'CK' => [ 355 '0', 356 '5', 357 '5', 358 '5', 359 '45', 360 '45', 361 '45', 362 ], 363 'CCS' => [ 364 '0', 365 '4', 366 '4', 367 '4', 368 ], 369 'CS' => [ 370 '0', 371 '4', 372 '4', 373 '4', 374 ], 375 'CSZ' => [ 376 '0', 377 '4', 378 '4', 379 '4', 380 ], 381 'CZ' => [ 382 '0', 383 '4', 384 '4', 385 '4', 386 ], 387 'CZS' => [ 388 '0', 389 '4', 390 '4', 391 '4', 392 ], 393 'D' => [ 394 '0', 395 '3', 396 '3', 397 '3', 398 ], 399 'Ď' => [ 400 '0', 401 '3', 402 '3', 403 '3', 404 ], 405 'Đ' => [ 406 '0', 407 '3', 408 '3', 409 '3', 410 ], 411 'DRS' => [ 412 '0', 413 '4', 414 '4', 415 '4', 416 ], 417 'DRZ' => [ 418 '0', 419 '4', 420 '4', 421 '4', 422 ], 423 'DS' => [ 424 '0', 425 '4', 426 '4', 427 '4', 428 ], 429 'DSH' => [ 430 '0', 431 '4', 432 '4', 433 '4', 434 ], 435 'DSZ' => [ 436 '0', 437 '4', 438 '4', 439 '4', 440 ], 441 'DT' => [ 442 '0', 443 '3', 444 '3', 445 '3', 446 ], 447 'DDZ' => [ 448 '0', 449 '4', 450 '4', 451 '4', 452 ], 453 'DDZS' => [ 454 '0', 455 '4', 456 '4', 457 '4', 458 ], 459 'DZ' => [ 460 '0', 461 '4', 462 '4', 463 '4', 464 ], 465 'DŹ' => [ 466 '0', 467 '4', 468 '4', 469 '4', 470 ], 471 'DŻ' => [ 472 '0', 473 '4', 474 '4', 475 '4', 476 ], 477 'DZH' => [ 478 '0', 479 '4', 480 '4', 481 '4', 482 ], 483 'DZS' => [ 484 '0', 485 '4', 486 '4', 487 '4', 488 ], 489 'E' => [ 490 '1', 491 '0', 492 '', 493 '', 494 ], 495 'È' => [ 496 '1', 497 '0', 498 '', 499 '', 500 ], 501 'É' => [ 502 '1', 503 '0', 504 '', 505 '', 506 ], 507 'Ê' => [ 508 '1', 509 '0', 510 '', 511 '', 512 ], 513 'Ë' => [ 514 '1', 515 '0', 516 '', 517 '', 518 ], 519 'Ĕ' => [ 520 '1', 521 '0', 522 '', 523 '', 524 ], 525 'Ė' => [ 526 '1', 527 '0', 528 '', 529 '', 530 ], 531 'Ę' => [ 532 '1', 533 '', 534 '', 535 '6', 536 '', 537 '', 538 '', 539 ], 540 'Ẹ' => [ 541 '1', 542 '0', 543 '', 544 '', 545 ], 546 'Ẻ' => [ 547 '1', 548 '0', 549 '', 550 '', 551 ], 552 'Ẽ' => [ 553 '1', 554 '0', 555 '', 556 '', 557 ], 558 'Ế' => [ 559 '1', 560 '0', 561 '', 562 '', 563 ], 564 'Ề' => [ 565 '1', 566 '0', 567 '', 568 '', 569 ], 570 'Ể' => [ 571 '1', 572 '0', 573 '', 574 '', 575 ], 576 'Ễ' => [ 577 '1', 578 '0', 579 '', 580 '', 581 ], 582 'Ệ' => [ 583 '1', 584 '0', 585 '', 586 '', 587 ], 588 'EAU' => [ 589 '1', 590 '0', 591 '', 592 '', 593 ], 594 'EI' => [ 595 '1', 596 '0', 597 '1', 598 '', 599 ], 600 'EJ' => [ 601 '1', 602 '0', 603 '1', 604 '', 605 ], 606 'EU' => [ 607 '1', 608 '1', 609 '1', 610 '', 611 ], 612 'EY' => [ 613 '1', 614 '0', 615 '1', 616 '', 617 ], 618 'F' => [ 619 '0', 620 '7', 621 '7', 622 '7', 623 ], 624 'FB' => [ 625 '0', 626 '7', 627 '7', 628 '7', 629 ], 630 'G' => [ 631 '0', 632 '5', 633 '5', 634 '5', 635 '34', 636 '4', 637 '4', 638 ], 639 'Ğ' => [ 640 '0', 641 '', 642 '', 643 '', 644 ], 645 'GGY' => [ 646 '0', 647 '5', 648 '5', 649 '5', 650 ], 651 'GY' => [ 652 '0', 653 '5', 654 '5', 655 '5', 656 ], 657 'H' => [ 658 '0', 659 '5', 660 '5', 661 '', 662 '5', 663 '5', 664 '5', 665 ], 666 'I' => [ 667 '1', 668 '0', 669 '', 670 '', 671 ], 672 'Ì' => [ 673 '1', 674 '0', 675 '', 676 '', 677 ], 678 'Í' => [ 679 '1', 680 '0', 681 '', 682 '', 683 ], 684 'Î' => [ 685 '1', 686 '0', 687 '', 688 '', 689 ], 690 'Ï' => [ 691 '1', 692 '0', 693 '', 694 '', 695 ], 696 'Ĩ' => [ 697 '1', 698 '0', 699 '', 700 '', 701 ], 702 'Į' => [ 703 '1', 704 '0', 705 '', 706 '', 707 ], 708 'İ' => [ 709 '1', 710 '0', 711 '', 712 '', 713 ], 714 'Ỉ' => [ 715 '1', 716 '0', 717 '', 718 '', 719 ], 720 'Ị' => [ 721 '1', 722 '0', 723 '', 724 '', 725 ], 726 'IA' => [ 727 '1', 728 '1', 729 '', 730 '', 731 ], 732 'IE' => [ 733 '1', 734 '1', 735 '', 736 '', 737 ], 738 'IO' => [ 739 '1', 740 '1', 741 '', 742 '', 743 ], 744 'IU' => [ 745 '1', 746 '1', 747 '', 748 '', 749 ], 750 'J' => [ 751 '0', 752 '1', 753 '', 754 '', 755 '4', 756 '4', 757 '4', 758 '5', 759 '5', 760 '', 761 ], 762 'K' => [ 763 '0', 764 '5', 765 '5', 766 '5', 767 ], 768 'KH' => [ 769 '0', 770 '5', 771 '5', 772 '5', 773 ], 774 'KS' => [ 775 '0', 776 '5', 777 '54', 778 '54', 779 ], 780 'L' => [ 781 '0', 782 '8', 783 '8', 784 '8', 785 ], 786 'Ľ' => [ 787 '0', 788 '8', 789 '8', 790 '8', 791 ], 792 'Ĺ' => [ 793 '0', 794 '8', 795 '8', 796 '8', 797 ], 798 'Ł' => [ 799 '0', 800 '7', 801 '7', 802 '7', 803 '8', 804 '8', 805 '8', 806 ], 807 'LL' => [ 808 '0', 809 '8', 810 '8', 811 '8', 812 '58', 813 '8', 814 '8', 815 '1', 816 '8', 817 '8', 818 ], 819 'LLY' => [ 820 '0', 821 '8', 822 '8', 823 '8', 824 '1', 825 '8', 826 '8', 827 ], 828 'LY' => [ 829 '0', 830 '8', 831 '8', 832 '8', 833 '1', 834 '8', 835 '8', 836 ], 837 'M' => [ 838 '0', 839 '6', 840 '6', 841 '6', 842 ], 843 'MĔ' => [ 844 '0', 845 '66', 846 '66', 847 '66', 848 ], 849 'MN' => [ 850 '0', 851 '66', 852 '66', 853 '66', 854 ], 855 'N' => [ 856 '0', 857 '6', 858 '6', 859 '6', 860 ], 861 'Ń' => [ 862 '0', 863 '6', 864 '6', 865 '6', 866 ], 867 'Ň' => [ 868 '0', 869 '6', 870 '6', 871 '6', 872 ], 873 'Ñ' => [ 874 '0', 875 '6', 876 '6', 877 '6', 878 ], 879 'NM' => [ 880 '0', 881 '66', 882 '66', 883 '66', 884 ], 885 'O' => [ 886 '1', 887 '0', 888 '', 889 '', 890 ], 891 'Ò' => [ 892 '1', 893 '0', 894 '', 895 '', 896 ], 897 'Ó' => [ 898 '1', 899 '0', 900 '', 901 '', 902 ], 903 'Ô' => [ 904 '1', 905 '0', 906 '', 907 '', 908 ], 909 'Õ' => [ 910 '1', 911 '0', 912 '', 913 '', 914 ], 915 'Ö' => [ 916 '1', 917 '0', 918 '', 919 '', 920 ], 921 'Ø' => [ 922 '1', 923 '0', 924 '', 925 '', 926 ], 927 'Ő' => [ 928 '1', 929 '0', 930 '', 931 '', 932 ], 933 'Œ' => [ 934 '1', 935 '0', 936 '', 937 '', 938 ], 939 'Ơ' => [ 940 '1', 941 '0', 942 '', 943 '', 944 ], 945 'Ọ' => [ 946 '1', 947 '0', 948 '', 949 '', 950 ], 951 'Ỏ' => [ 952 '1', 953 '0', 954 '', 955 '', 956 ], 957 'Ố' => [ 958 '1', 959 '0', 960 '', 961 '', 962 ], 963 'Ồ' => [ 964 '1', 965 '0', 966 '', 967 '', 968 ], 969 'Ổ' => [ 970 '1', 971 '0', 972 '', 973 '', 974 ], 975 'Ỗ' => [ 976 '1', 977 '0', 978 '', 979 '', 980 ], 981 'Ộ' => [ 982 '1', 983 '0', 984 '', 985 '', 986 ], 987 'Ớ' => [ 988 '1', 989 '0', 990 '', 991 '', 992 ], 993 'Ờ' => [ 994 '1', 995 '0', 996 '', 997 '', 998 ], 999 'Ở' => [ 1000 '1', 1001 '0', 1002 '', 1003 '', 1004 ], 1005 'Ỡ' => [ 1006 '1', 1007 '0', 1008 '', 1009 '', 1010 ], 1011 'Ợ' => [ 1012 '1', 1013 '0', 1014 '', 1015 '', 1016 ], 1017 'OE' => [ 1018 '1', 1019 '0', 1020 '', 1021 '', 1022 ], 1023 'OI' => [ 1024 '1', 1025 '0', 1026 '1', 1027 '', 1028 ], 1029 'OJ' => [ 1030 '1', 1031 '0', 1032 '1', 1033 '', 1034 ], 1035 'OU' => [ 1036 '1', 1037 '0', 1038 '', 1039 '', 1040 ], 1041 'OY' => [ 1042 '1', 1043 '0', 1044 '1', 1045 '', 1046 ], 1047 'P' => [ 1048 '0', 1049 '7', 1050 '7', 1051 '7', 1052 ], 1053 'PF' => [ 1054 '0', 1055 '7', 1056 '7', 1057 '7', 1058 ], 1059 'PH' => [ 1060 '0', 1061 '7', 1062 '7', 1063 '7', 1064 ], 1065 'Q' => [ 1066 '0', 1067 '5', 1068 '5', 1069 '5', 1070 ], 1071 'R' => [ 1072 '0', 1073 '9', 1074 '9', 1075 '9', 1076 ], 1077 'Ř' => [ 1078 '0', 1079 '4', 1080 '4', 1081 '4', 1082 ], 1083 'RS' => [ 1084 '0', 1085 '4', 1086 '4', 1087 '4', 1088 '94', 1089 '94', 1090 '94', 1091 ], 1092 'RZ' => [ 1093 '0', 1094 '4', 1095 '4', 1096 '4', 1097 '94', 1098 '94', 1099 '94', 1100 ], 1101 'S' => [ 1102 '0', 1103 '4', 1104 '4', 1105 '4', 1106 ], 1107 'Ś' => [ 1108 '0', 1109 '4', 1110 '4', 1111 '4', 1112 ], 1113 'Š' => [ 1114 '0', 1115 '4', 1116 '4', 1117 '4', 1118 ], 1119 'Ş' => [ 1120 '0', 1121 '4', 1122 '4', 1123 '4', 1124 ], 1125 'SC' => [ 1126 '0', 1127 '2', 1128 '4', 1129 '4', 1130 ], 1131 'ŠČ' => [ 1132 '0', 1133 '2', 1134 '4', 1135 '4', 1136 ], 1137 'SCH' => [ 1138 '0', 1139 '4', 1140 '4', 1141 '4', 1142 ], 1143 'SCHD' => [ 1144 '0', 1145 '2', 1146 '43', 1147 '43', 1148 ], 1149 'SCHT' => [ 1150 '0', 1151 '2', 1152 '43', 1153 '43', 1154 ], 1155 'SCHTCH' => [ 1156 '0', 1157 '2', 1158 '4', 1159 '4', 1160 ], 1161 'SCHTSCH' => [ 1162 '0', 1163 '2', 1164 '4', 1165 '4', 1166 ], 1167 'SCHTSH' => [ 1168 '0', 1169 '2', 1170 '4', 1171 '4', 1172 ], 1173 'SD' => [ 1174 '0', 1175 '2', 1176 '43', 1177 '43', 1178 ], 1179 'SH' => [ 1180 '0', 1181 '4', 1182 '4', 1183 '4', 1184 ], 1185 'SHCH' => [ 1186 '0', 1187 '2', 1188 '4', 1189 '4', 1190 ], 1191 'SHD' => [ 1192 '0', 1193 '2', 1194 '43', 1195 '43', 1196 ], 1197 'SHT' => [ 1198 '0', 1199 '2', 1200 '43', 1201 '43', 1202 ], 1203 'SHTCH' => [ 1204 '0', 1205 '2', 1206 '4', 1207 '4', 1208 ], 1209 'SHTSH' => [ 1210 '0', 1211 '2', 1212 '4', 1213 '4', 1214 ], 1215 'ß' => [ 1216 '0', 1217 '', 1218 '4', 1219 '4', 1220 ], 1221 'ST' => [ 1222 '0', 1223 '2', 1224 '43', 1225 '43', 1226 ], 1227 'STCH' => [ 1228 '0', 1229 '2', 1230 '4', 1231 '4', 1232 ], 1233 'STRS' => [ 1234 '0', 1235 '2', 1236 '4', 1237 '4', 1238 ], 1239 'STRZ' => [ 1240 '0', 1241 '2', 1242 '4', 1243 '4', 1244 ], 1245 'STSCH' => [ 1246 '0', 1247 '2', 1248 '4', 1249 '4', 1250 ], 1251 'STSH' => [ 1252 '0', 1253 '2', 1254 '4', 1255 '4', 1256 ], 1257 'SSZ' => [ 1258 '0', 1259 '4', 1260 '4', 1261 '4', 1262 ], 1263 'SZ' => [ 1264 '0', 1265 '4', 1266 '4', 1267 '4', 1268 ], 1269 'SZCS' => [ 1270 '0', 1271 '2', 1272 '4', 1273 '4', 1274 ], 1275 'SZCZ' => [ 1276 '0', 1277 '2', 1278 '4', 1279 '4', 1280 ], 1281 'SZD' => [ 1282 '0', 1283 '2', 1284 '43', 1285 '43', 1286 ], 1287 'SZT' => [ 1288 '0', 1289 '2', 1290 '43', 1291 '43', 1292 ], 1293 'T' => [ 1294 '0', 1295 '3', 1296 '3', 1297 '3', 1298 ], 1299 'Ť' => [ 1300 '0', 1301 '3', 1302 '3', 1303 '3', 1304 ], 1305 'Ţ' => [ 1306 '0', 1307 '3', 1308 '3', 1309 '3', 1310 '4', 1311 '4', 1312 '4', 1313 ], 1314 'TC' => [ 1315 '0', 1316 '4', 1317 '4', 1318 '4', 1319 ], 1320 'TCH' => [ 1321 '0', 1322 '4', 1323 '4', 1324 '4', 1325 ], 1326 'TH' => [ 1327 '0', 1328 '3', 1329 '3', 1330 '3', 1331 ], 1332 'TRS' => [ 1333 '0', 1334 '4', 1335 '4', 1336 '4', 1337 ], 1338 'TRZ' => [ 1339 '0', 1340 '4', 1341 '4', 1342 '4', 1343 ], 1344 'TS' => [ 1345 '0', 1346 '4', 1347 '4', 1348 '4', 1349 ], 1350 'TSCH' => [ 1351 '0', 1352 '4', 1353 '4', 1354 '4', 1355 ], 1356 'TSH' => [ 1357 '0', 1358 '4', 1359 '4', 1360 '4', 1361 ], 1362 'TSZ' => [ 1363 '0', 1364 '4', 1365 '4', 1366 '4', 1367 ], 1368 'TTCH' => [ 1369 '0', 1370 '4', 1371 '4', 1372 '4', 1373 ], 1374 'TTS' => [ 1375 '0', 1376 '4', 1377 '4', 1378 '4', 1379 ], 1380 'TTSCH' => [ 1381 '0', 1382 '4', 1383 '4', 1384 '4', 1385 ], 1386 'TTSZ' => [ 1387 '0', 1388 '4', 1389 '4', 1390 '4', 1391 ], 1392 'TTZ' => [ 1393 '0', 1394 '4', 1395 '4', 1396 '4', 1397 ], 1398 'TZ' => [ 1399 '0', 1400 '4', 1401 '4', 1402 '4', 1403 ], 1404 'TZS' => [ 1405 '0', 1406 '4', 1407 '4', 1408 '4', 1409 ], 1410 'U' => [ 1411 '1', 1412 '0', 1413 '', 1414 '', 1415 ], 1416 'Ù' => [ 1417 '1', 1418 '0', 1419 '', 1420 '', 1421 ], 1422 'Ú' => [ 1423 '1', 1424 '0', 1425 '', 1426 '', 1427 ], 1428 'Û' => [ 1429 '1', 1430 '0', 1431 '', 1432 '', 1433 ], 1434 'Ü' => [ 1435 '1', 1436 '0', 1437 '', 1438 '', 1439 ], 1440 'Ũ' => [ 1441 '1', 1442 '0', 1443 '', 1444 '', 1445 ], 1446 'Ū' => [ 1447 '1', 1448 '0', 1449 '', 1450 '', 1451 ], 1452 'Ů' => [ 1453 '1', 1454 '0', 1455 '', 1456 '', 1457 ], 1458 'Ű' => [ 1459 '1', 1460 '0', 1461 '', 1462 '', 1463 ], 1464 'Ų' => [ 1465 '1', 1466 '0', 1467 '', 1468 '', 1469 ], 1470 'Ư' => [ 1471 '1', 1472 '0', 1473 '', 1474 '', 1475 ], 1476 'Ụ' => [ 1477 '1', 1478 '0', 1479 '', 1480 '', 1481 ], 1482 'Ủ' => [ 1483 '1', 1484 '0', 1485 '', 1486 '', 1487 ], 1488 'Ứ' => [ 1489 '1', 1490 '0', 1491 '', 1492 '', 1493 ], 1494 'Ừ' => [ 1495 '1', 1496 '0', 1497 '', 1498 '', 1499 ], 1500 'Ử' => [ 1501 '1', 1502 '0', 1503 '', 1504 '', 1505 ], 1506 'Ữ' => [ 1507 '1', 1508 '0', 1509 '', 1510 '', 1511 ], 1512 'Ự' => [ 1513 '1', 1514 '0', 1515 '', 1516 '', 1517 ], 1518 'UE' => [ 1519 '1', 1520 '0', 1521 '', 1522 '', 1523 ], 1524 'UI' => [ 1525 '1', 1526 '0', 1527 '1', 1528 '', 1529 ], 1530 'UJ' => [ 1531 '1', 1532 '0', 1533 '1', 1534 '', 1535 ], 1536 'UY' => [ 1537 '1', 1538 '0', 1539 '1', 1540 '', 1541 ], 1542 'UW' => [ 1543 '1', 1544 '0', 1545 '1', 1546 '', 1547 '0', 1548 '7', 1549 '7', 1550 ], 1551 'V' => [ 1552 '0', 1553 '7', 1554 '7', 1555 '7', 1556 ], 1557 'W' => [ 1558 '0', 1559 '7', 1560 '7', 1561 '7', 1562 ], 1563 'X' => [ 1564 '0', 1565 '5', 1566 '54', 1567 '54', 1568 ], 1569 'Y' => [ 1570 '1', 1571 '1', 1572 '', 1573 '', 1574 ], 1575 'Ý' => [ 1576 '1', 1577 '1', 1578 '', 1579 '', 1580 ], 1581 'Ỳ' => [ 1582 '1', 1583 '1', 1584 '', 1585 '', 1586 ], 1587 'Ỵ' => [ 1588 '1', 1589 '1', 1590 '', 1591 '', 1592 ], 1593 'Ỷ' => [ 1594 '1', 1595 '1', 1596 '', 1597 '', 1598 ], 1599 'Ỹ' => [ 1600 '1', 1601 '1', 1602 '', 1603 '', 1604 ], 1605 'Z' => [ 1606 '0', 1607 '4', 1608 '4', 1609 '4', 1610 ], 1611 'Ź' => [ 1612 '0', 1613 '4', 1614 '4', 1615 '4', 1616 ], 1617 'Ż' => [ 1618 '0', 1619 '4', 1620 '4', 1621 '4', 1622 ], 1623 'Ž' => [ 1624 '0', 1625 '4', 1626 '4', 1627 '4', 1628 ], 1629 'ZD' => [ 1630 '0', 1631 '2', 1632 '43', 1633 '43', 1634 ], 1635 'ZDZ' => [ 1636 '0', 1637 '2', 1638 '4', 1639 '4', 1640 ], 1641 'ZDZH' => [ 1642 '0', 1643 '2', 1644 '4', 1645 '4', 1646 ], 1647 'ZH' => [ 1648 '0', 1649 '4', 1650 '4', 1651 '4', 1652 ], 1653 'ZHD' => [ 1654 '0', 1655 '2', 1656 '43', 1657 '43', 1658 ], 1659 'ZHDZH' => [ 1660 '0', 1661 '2', 1662 '4', 1663 '4', 1664 ], 1665 'ZS' => [ 1666 '0', 1667 '4', 1668 '4', 1669 '4', 1670 ], 1671 'ZSCH' => [ 1672 '0', 1673 '4', 1674 '4', 1675 '4', 1676 ], 1677 'ZSH' => [ 1678 '0', 1679 '4', 1680 '4', 1681 '4', 1682 ], 1683 'ZZS' => [ 1684 '0', 1685 '4', 1686 '4', 1687 '4', 1688 ], 1689 // Cyrillic alphabet 1690 'А' => [ 1691 '1', 1692 '0', 1693 '', 1694 '', 1695 ], 1696 'Б' => [ 1697 '0', 1698 '7', 1699 '7', 1700 '7', 1701 ], 1702 'В' => [ 1703 '0', 1704 '7', 1705 '7', 1706 '7', 1707 ], 1708 'Г' => [ 1709 '0', 1710 '5', 1711 '5', 1712 '5', 1713 ], 1714 'Д' => [ 1715 '0', 1716 '3', 1717 '3', 1718 '3', 1719 ], 1720 'ДЗ' => [ 1721 '0', 1722 '4', 1723 '4', 1724 '4', 1725 ], 1726 'Е' => [ 1727 '1', 1728 '0', 1729 '', 1730 '', 1731 ], 1732 'Ё' => [ 1733 '1', 1734 '0', 1735 '', 1736 '', 1737 ], 1738 'Ж' => [ 1739 '0', 1740 '4', 1741 '4', 1742 '4', 1743 ], 1744 'З' => [ 1745 '0', 1746 '4', 1747 '4', 1748 '4', 1749 ], 1750 'И' => [ 1751 '1', 1752 '0', 1753 '', 1754 '', 1755 ], 1756 'Й' => [ 1757 '1', 1758 '1', 1759 '', 1760 '', 1761 '4', 1762 '4', 1763 '4', 1764 ], 1765 'К' => [ 1766 '0', 1767 '5', 1768 '5', 1769 '5', 1770 ], 1771 'Л' => [ 1772 '0', 1773 '8', 1774 '8', 1775 '8', 1776 ], 1777 'М' => [ 1778 '0', 1779 '6', 1780 '6', 1781 '6', 1782 ], 1783 'Н' => [ 1784 '0', 1785 '6', 1786 '6', 1787 '6', 1788 ], 1789 'О' => [ 1790 '1', 1791 '0', 1792 '', 1793 '', 1794 ], 1795 'П' => [ 1796 '0', 1797 '7', 1798 '7', 1799 '7', 1800 ], 1801 'Р' => [ 1802 '0', 1803 '9', 1804 '9', 1805 '9', 1806 ], 1807 'РЖ' => [ 1808 '0', 1809 '4', 1810 '4', 1811 '4', 1812 ], 1813 'С' => [ 1814 '0', 1815 '4', 1816 '4', 1817 '4', 1818 ], 1819 'Т' => [ 1820 '0', 1821 '3', 1822 '3', 1823 '3', 1824 ], 1825 'У' => [ 1826 '1', 1827 '0', 1828 '', 1829 '', 1830 ], 1831 'Ф' => [ 1832 '0', 1833 '7', 1834 '7', 1835 '7', 1836 ], 1837 'Х' => [ 1838 '0', 1839 '5', 1840 '5', 1841 '5', 1842 ], 1843 'Ц' => [ 1844 '0', 1845 '4', 1846 '4', 1847 '4', 1848 ], 1849 'Ч' => [ 1850 '0', 1851 '4', 1852 '4', 1853 '4', 1854 ], 1855 'Ш' => [ 1856 '0', 1857 '4', 1858 '4', 1859 '4', 1860 ], 1861 'Щ' => [ 1862 '0', 1863 '2', 1864 '4', 1865 '4', 1866 ], 1867 'Ъ' => [ 1868 '0', 1869 '', 1870 '', 1871 '', 1872 ], 1873 'Ы' => [ 1874 '0', 1875 '1', 1876 '', 1877 '', 1878 ], 1879 'Ь' => [ 1880 '0', 1881 '', 1882 '', 1883 '', 1884 ], 1885 'Э' => [ 1886 '1', 1887 '0', 1888 '', 1889 '', 1890 ], 1891 'Ю' => [ 1892 '0', 1893 '1', 1894 '', 1895 '', 1896 ], 1897 'Я' => [ 1898 '0', 1899 '1', 1900 '', 1901 '', 1902 ], 1903 // Greek alphabet 1904 'Α' => [ 1905 '1', 1906 '0', 1907 '', 1908 '', 1909 ], 1910 'Ά' => [ 1911 '1', 1912 '0', 1913 '', 1914 '', 1915 ], 1916 'ΑΙ' => [ 1917 '1', 1918 '0', 1919 '1', 1920 '', 1921 ], 1922 'ΑΥ' => [ 1923 '1', 1924 '0', 1925 '1', 1926 '', 1927 ], 1928 'Β' => [ 1929 '0', 1930 '7', 1931 '7', 1932 '7', 1933 ], 1934 'Γ' => [ 1935 '0', 1936 '5', 1937 '5', 1938 '5', 1939 ], 1940 'Δ' => [ 1941 '0', 1942 '3', 1943 '3', 1944 '3', 1945 ], 1946 'Ε' => [ 1947 '1', 1948 '0', 1949 '', 1950 '', 1951 ], 1952 'Έ' => [ 1953 '1', 1954 '0', 1955 '', 1956 '', 1957 ], 1958 'ΕΙ' => [ 1959 '1', 1960 '0', 1961 '1', 1962 '', 1963 ], 1964 'ΕΥ' => [ 1965 '1', 1966 '1', 1967 '1', 1968 '', 1969 ], 1970 'Ζ' => [ 1971 '0', 1972 '4', 1973 '4', 1974 '4', 1975 ], 1976 'Η' => [ 1977 '1', 1978 '0', 1979 '', 1980 '', 1981 ], 1982 'Ή' => [ 1983 '1', 1984 '0', 1985 '', 1986 '', 1987 ], 1988 'Θ' => [ 1989 '0', 1990 '3', 1991 '3', 1992 '3', 1993 ], 1994 'Ι' => [ 1995 '1', 1996 '0', 1997 '', 1998 '', 1999 ], 2000 'Ί' => [ 2001 '1', 2002 '0', 2003 '', 2004 '', 2005 ], 2006 'Ϊ' => [ 2007 '1', 2008 '0', 2009 '', 2010 '', 2011 ], 2012 'ΐ' => [ 2013 '1', 2014 '0', 2015 '', 2016 '', 2017 ], 2018 'Κ' => [ 2019 '0', 2020 '5', 2021 '5', 2022 '5', 2023 ], 2024 'Λ' => [ 2025 '0', 2026 '8', 2027 '8', 2028 '8', 2029 ], 2030 'Μ' => [ 2031 '0', 2032 '6', 2033 '6', 2034 '6', 2035 ], 2036 'ΜΠ' => [ 2037 '0', 2038 '7', 2039 '7', 2040 '7', 2041 ], 2042 'Ν' => [ 2043 '0', 2044 '6', 2045 '6', 2046 '6', 2047 ], 2048 'ΝΤ' => [ 2049 '0', 2050 '3', 2051 '3', 2052 '3', 2053 ], 2054 'Ξ' => [ 2055 '0', 2056 '5', 2057 '54', 2058 '54', 2059 ], 2060 'Ο' => [ 2061 '1', 2062 '0', 2063 '', 2064 '', 2065 ], 2066 'Ό' => [ 2067 '1', 2068 '0', 2069 '', 2070 '', 2071 ], 2072 'ΟΙ' => [ 2073 '1', 2074 '0', 2075 '1', 2076 '', 2077 ], 2078 'ΟΥ' => [ 2079 '1', 2080 '0', 2081 '1', 2082 '', 2083 ], 2084 'Π' => [ 2085 '0', 2086 '7', 2087 '7', 2088 '7', 2089 ], 2090 'Ρ' => [ 2091 '0', 2092 '9', 2093 '9', 2094 '9', 2095 ], 2096 'Σ' => [ 2097 '0', 2098 '4', 2099 '4', 2100 '4', 2101 ], 2102 'ς' => [ 2103 '0', 2104 '', 2105 '', 2106 '4', 2107 ], 2108 'Τ' => [ 2109 '0', 2110 '3', 2111 '3', 2112 '3', 2113 ], 2114 'ΤΖ' => [ 2115 '0', 2116 '4', 2117 '4', 2118 '4', 2119 ], 2120 'ΤΣ' => [ 2121 '0', 2122 '4', 2123 '4', 2124 '4', 2125 ], 2126 'Υ' => [ 2127 '1', 2128 '1', 2129 '', 2130 '', 2131 ], 2132 'Ύ' => [ 2133 '1', 2134 '1', 2135 '', 2136 '', 2137 ], 2138 'Ϋ' => [ 2139 '1', 2140 '1', 2141 '', 2142 '', 2143 ], 2144 'ΰ' => [ 2145 '1', 2146 '1', 2147 '', 2148 '', 2149 ], 2150 'ΥΚ' => [ 2151 '1', 2152 '5', 2153 '5', 2154 '5', 2155 ], 2156 'ΥΥ' => [ 2157 '1', 2158 '65', 2159 '65', 2160 '65', 2161 ], 2162 'Φ' => [ 2163 '0', 2164 '7', 2165 '7', 2166 '7', 2167 ], 2168 'Χ' => [ 2169 '0', 2170 '5', 2171 '5', 2172 '5', 2173 ], 2174 'Ψ' => [ 2175 '0', 2176 '7', 2177 '7', 2178 '7', 2179 ], 2180 'Ω' => [ 2181 '1', 2182 '0', 2183 '', 2184 '', 2185 ], 2186 'Ώ' => [ 2187 '1', 2188 '0', 2189 '', 2190 '', 2191 ], 2192 // Hebrew alphabet 2193 'א' => [ 2194 '1', 2195 '0', 2196 '', 2197 '', 2198 ], 2199 'או' => [ 2200 '1', 2201 '0', 2202 '7', 2203 '', 2204 ], 2205 'אג' => [ 2206 '1', 2207 '4', 2208 '4', 2209 '4', 2210 '5', 2211 '5', 2212 '5', 2213 '34', 2214 '34', 2215 '34', 2216 ], 2217 'בב' => [ 2218 '0', 2219 '7', 2220 '7', 2221 '7', 2222 '77', 2223 '77', 2224 '77', 2225 ], 2226 'ב' => [ 2227 '0', 2228 '7', 2229 '7', 2230 '7', 2231 ], 2232 'גג' => [ 2233 '0', 2234 '4', 2235 '4', 2236 '4', 2237 '5', 2238 '5', 2239 '5', 2240 '45', 2241 '45', 2242 '45', 2243 '55', 2244 '55', 2245 '55', 2246 '54', 2247 '54', 2248 '54', 2249 ], 2250 'גד' => [ 2251 '0', 2252 '43', 2253 '43', 2254 '43', 2255 '53', 2256 '53', 2257 '53', 2258 ], 2259 'גה' => [ 2260 '0', 2261 '45', 2262 '45', 2263 '45', 2264 '55', 2265 '55', 2266 '55', 2267 ], 2268 'גז' => [ 2269 '0', 2270 '44', 2271 '44', 2272 '44', 2273 '45', 2274 '45', 2275 '45', 2276 ], 2277 'גח' => [ 2278 '0', 2279 '45', 2280 '45', 2281 '45', 2282 '55', 2283 '55', 2284 '55', 2285 ], 2286 'גכ' => [ 2287 '0', 2288 '45', 2289 '45', 2290 '45', 2291 '55', 2292 '55', 2293 '55', 2294 ], 2295 'גך' => [ 2296 '0', 2297 '45', 2298 '45', 2299 '45', 2300 '55', 2301 '55', 2302 '55', 2303 ], 2304 'גצ' => [ 2305 '0', 2306 '44', 2307 '44', 2308 '44', 2309 '45', 2310 '45', 2311 '45', 2312 ], 2313 'גץ' => [ 2314 '0', 2315 '44', 2316 '44', 2317 '44', 2318 '45', 2319 '45', 2320 '45', 2321 ], 2322 'גק' => [ 2323 '0', 2324 '45', 2325 '45', 2326 '45', 2327 '54', 2328 '54', 2329 '54', 2330 ], 2331 'גש' => [ 2332 '0', 2333 '44', 2334 '44', 2335 '44', 2336 '54', 2337 '54', 2338 '54', 2339 ], 2340 'גת' => [ 2341 '0', 2342 '43', 2343 '43', 2344 '43', 2345 '53', 2346 '53', 2347 '53', 2348 ], 2349 'ג' => [ 2350 '0', 2351 '4', 2352 '4', 2353 '4', 2354 '5', 2355 '5', 2356 '5', 2357 ], 2358 'דז' => [ 2359 '0', 2360 '4', 2361 '4', 2362 '4', 2363 ], 2364 'דד' => [ 2365 '0', 2366 '3', 2367 '3', 2368 '3', 2369 '33', 2370 '33', 2371 '33', 2372 ], 2373 'דט' => [ 2374 '0', 2375 '33', 2376 '33', 2377 '33', 2378 ], 2379 'דש' => [ 2380 '0', 2381 '4', 2382 '4', 2383 '4', 2384 ], 2385 'דצ' => [ 2386 '0', 2387 '4', 2388 '4', 2389 '4', 2390 ], 2391 'דץ' => [ 2392 '0', 2393 '4', 2394 '4', 2395 '4', 2396 ], 2397 'ד' => [ 2398 '0', 2399 '3', 2400 '3', 2401 '3', 2402 ], 2403 'הג' => [ 2404 '0', 2405 '54', 2406 '54', 2407 '54', 2408 '55', 2409 '55', 2410 '55', 2411 ], 2412 'הכ' => [ 2413 '0', 2414 '55', 2415 '55', 2416 '55', 2417 ], 2418 'הח' => [ 2419 '0', 2420 '55', 2421 '55', 2422 '55', 2423 ], 2424 'הק' => [ 2425 '0', 2426 '55', 2427 '55', 2428 '55', 2429 '5', 2430 '5', 2431 '5', 2432 ], 2433 'הה' => [ 2434 '0', 2435 '5', 2436 '5', 2437 '', 2438 '55', 2439 '55', 2440 '', 2441 ], 2442 'ה' => [ 2443 '0', 2444 '5', 2445 '5', 2446 '', 2447 ], 2448 'וי' => [ 2449 '1', 2450 '', 2451 '', 2452 '', 2453 '7', 2454 '7', 2455 '7', 2456 ], 2457 'ו' => [ 2458 '1', 2459 '7', 2460 '7', 2461 '7', 2462 '7', 2463 '', 2464 '', 2465 ], 2466 'וו' => [ 2467 '1', 2468 '7', 2469 '7', 2470 '7', 2471 '7', 2472 '', 2473 '', 2474 ], 2475 'וופ' => [ 2476 '1', 2477 '7', 2478 '7', 2479 '7', 2480 '77', 2481 '77', 2482 '77', 2483 ], 2484 'זש' => [ 2485 '0', 2486 '4', 2487 '4', 2488 '4', 2489 '44', 2490 '44', 2491 '44', 2492 ], 2493 'זדז' => [ 2494 '0', 2495 '2', 2496 '4', 2497 '4', 2498 ], 2499 'ז' => [ 2500 '0', 2501 '4', 2502 '4', 2503 '4', 2504 ], 2505 'זג' => [ 2506 '0', 2507 '44', 2508 '44', 2509 '44', 2510 '45', 2511 '45', 2512 '45', 2513 ], 2514 'זז' => [ 2515 '0', 2516 '4', 2517 '4', 2518 '4', 2519 '44', 2520 '44', 2521 '44', 2522 ], 2523 'זס' => [ 2524 '0', 2525 '44', 2526 '44', 2527 '44', 2528 ], 2529 'זצ' => [ 2530 '0', 2531 '44', 2532 '44', 2533 '44', 2534 ], 2535 'זץ' => [ 2536 '0', 2537 '44', 2538 '44', 2539 '44', 2540 ], 2541 'חג' => [ 2542 '0', 2543 '54', 2544 '54', 2545 '54', 2546 '53', 2547 '53', 2548 '53', 2549 ], 2550 'חח' => [ 2551 '0', 2552 '5', 2553 '5', 2554 '5', 2555 '55', 2556 '55', 2557 '55', 2558 ], 2559 'חק' => [ 2560 '0', 2561 '55', 2562 '55', 2563 '55', 2564 '5', 2565 '5', 2566 '5', 2567 ], 2568 'חכ' => [ 2569 '0', 2570 '45', 2571 '45', 2572 '45', 2573 '55', 2574 '55', 2575 '55', 2576 ], 2577 'חס' => [ 2578 '0', 2579 '5', 2580 '54', 2581 '54', 2582 ], 2583 'חש' => [ 2584 '0', 2585 '5', 2586 '54', 2587 '54', 2588 ], 2589 'ח' => [ 2590 '0', 2591 '5', 2592 '5', 2593 '5', 2594 ], 2595 'טש' => [ 2596 '0', 2597 '4', 2598 '4', 2599 '4', 2600 ], 2601 'טד' => [ 2602 '0', 2603 '33', 2604 '33', 2605 '33', 2606 ], 2607 'טי' => [ 2608 '0', 2609 '3', 2610 '3', 2611 '3', 2612 '4', 2613 '4', 2614 '4', 2615 '3', 2616 '3', 2617 '34', 2618 ], 2619 'טת' => [ 2620 '0', 2621 '33', 2622 '33', 2623 '33', 2624 ], 2625 'טט' => [ 2626 '0', 2627 '3', 2628 '3', 2629 '3', 2630 '33', 2631 '33', 2632 '33', 2633 ], 2634 'ט' => [ 2635 '0', 2636 '3', 2637 '3', 2638 '3', 2639 ], 2640 'י' => [ 2641 '1', 2642 '1', 2643 '', 2644 '', 2645 ], 2646 'יא' => [ 2647 '1', 2648 '1', 2649 '', 2650 '', 2651 '1', 2652 '1', 2653 '1', 2654 ], 2655 'כג' => [ 2656 '0', 2657 '55', 2658 '55', 2659 '55', 2660 '54', 2661 '54', 2662 '54', 2663 ], 2664 'כש' => [ 2665 '0', 2666 '5', 2667 '54', 2668 '54', 2669 ], 2670 'כס' => [ 2671 '0', 2672 '5', 2673 '54', 2674 '54', 2675 ], 2676 'ככ' => [ 2677 '0', 2678 '5', 2679 '5', 2680 '5', 2681 '55', 2682 '55', 2683 '55', 2684 ], 2685 'כך' => [ 2686 '0', 2687 '5', 2688 '5', 2689 '5', 2690 '55', 2691 '55', 2692 '55', 2693 ], 2694 'כ' => [ 2695 '0', 2696 '5', 2697 '5', 2698 '5', 2699 ], 2700 'כח' => [ 2701 '0', 2702 '55', 2703 '55', 2704 '55', 2705 '5', 2706 '5', 2707 '5', 2708 ], 2709 'ך' => [ 2710 '0', 2711 '', 2712 '5', 2713 '5', 2714 ], 2715 'ל' => [ 2716 '0', 2717 '8', 2718 '8', 2719 '8', 2720 ], 2721 'לל' => [ 2722 '0', 2723 '88', 2724 '88', 2725 '88', 2726 '8', 2727 '8', 2728 '8', 2729 ], 2730 'מנ' => [ 2731 '0', 2732 '66', 2733 '66', 2734 '66', 2735 ], 2736 'מן' => [ 2737 '0', 2738 '66', 2739 '66', 2740 '66', 2741 ], 2742 'ממ' => [ 2743 '0', 2744 '6', 2745 '6', 2746 '6', 2747 '66', 2748 '66', 2749 '66', 2750 ], 2751 'מם' => [ 2752 '0', 2753 '6', 2754 '6', 2755 '6', 2756 '66', 2757 '66', 2758 '66', 2759 ], 2760 'מ' => [ 2761 '0', 2762 '6', 2763 '6', 2764 '6', 2765 ], 2766 'ם' => [ 2767 '0', 2768 '', 2769 '6', 2770 '6', 2771 ], 2772 'נמ' => [ 2773 '0', 2774 '66', 2775 '66', 2776 '66', 2777 ], 2778 'נם' => [ 2779 '0', 2780 '66', 2781 '66', 2782 '66', 2783 ], 2784 'ננ' => [ 2785 '0', 2786 '6', 2787 '6', 2788 '6', 2789 '66', 2790 '66', 2791 '66', 2792 ], 2793 'נן' => [ 2794 '0', 2795 '6', 2796 '6', 2797 '6', 2798 '66', 2799 '66', 2800 '66', 2801 ], 2802 'נ' => [ 2803 '0', 2804 '6', 2805 '6', 2806 '6', 2807 ], 2808 'ן' => [ 2809 '0', 2810 '', 2811 '6', 2812 '6', 2813 ], 2814 'סתש' => [ 2815 '0', 2816 '2', 2817 '4', 2818 '4', 2819 ], 2820 'סתז' => [ 2821 '0', 2822 '2', 2823 '4', 2824 '4', 2825 ], 2826 'סטז' => [ 2827 '0', 2828 '2', 2829 '4', 2830 '4', 2831 ], 2832 'סטש' => [ 2833 '0', 2834 '2', 2835 '4', 2836 '4', 2837 ], 2838 'סצד' => [ 2839 '0', 2840 '2', 2841 '4', 2842 '4', 2843 ], 2844 'סט' => [ 2845 '0', 2846 '2', 2847 '4', 2848 '4', 2849 '43', 2850 '43', 2851 '43', 2852 ], 2853 'סת' => [ 2854 '0', 2855 '2', 2856 '4', 2857 '4', 2858 '43', 2859 '43', 2860 '43', 2861 ], 2862 'סג' => [ 2863 '0', 2864 '44', 2865 '44', 2866 '44', 2867 '4', 2868 '4', 2869 '4', 2870 ], 2871 'סס' => [ 2872 '0', 2873 '4', 2874 '4', 2875 '4', 2876 '44', 2877 '44', 2878 '44', 2879 ], 2880 'סצ' => [ 2881 '0', 2882 '44', 2883 '44', 2884 '44', 2885 ], 2886 'סץ' => [ 2887 '0', 2888 '44', 2889 '44', 2890 '44', 2891 ], 2892 'סז' => [ 2893 '0', 2894 '44', 2895 '44', 2896 '44', 2897 ], 2898 'סש' => [ 2899 '0', 2900 '44', 2901 '44', 2902 '44', 2903 ], 2904 'ס' => [ 2905 '0', 2906 '4', 2907 '4', 2908 '4', 2909 ], 2910 'ע' => [ 2911 '1', 2912 '0', 2913 '', 2914 '', 2915 ], 2916 'פב' => [ 2917 '0', 2918 '7', 2919 '7', 2920 '7', 2921 '77', 2922 '77', 2923 '77', 2924 ], 2925 'פוו' => [ 2926 '0', 2927 '7', 2928 '7', 2929 '7', 2930 '77', 2931 '77', 2932 '77', 2933 ], 2934 'פפ' => [ 2935 '0', 2936 '7', 2937 '7', 2938 '7', 2939 '77', 2940 '77', 2941 '77', 2942 ], 2943 'פף' => [ 2944 '0', 2945 '7', 2946 '7', 2947 '7', 2948 '77', 2949 '77', 2950 '77', 2951 ], 2952 'פ' => [ 2953 '0', 2954 '7', 2955 '7', 2956 '7', 2957 ], 2958 'ף' => [ 2959 '0', 2960 '', 2961 '7', 2962 '7', 2963 ], 2964 'צג' => [ 2965 '0', 2966 '44', 2967 '44', 2968 '44', 2969 '45', 2970 '45', 2971 '45', 2972 ], 2973 'צז' => [ 2974 '0', 2975 '44', 2976 '44', 2977 '44', 2978 ], 2979 'צס' => [ 2980 '0', 2981 '44', 2982 '44', 2983 '44', 2984 ], 2985 'צצ' => [ 2986 '0', 2987 '4', 2988 '4', 2989 '4', 2990 '5', 2991 '5', 2992 '5', 2993 '44', 2994 '44', 2995 '44', 2996 '54', 2997 '54', 2998 '54', 2999 '45', 3000 '45', 3001 '45', 3002 ], 3003 'צץ' => [ 3004 '0', 3005 '4', 3006 '4', 3007 '4', 3008 '5', 3009 '5', 3010 '5', 3011 '44', 3012 '44', 3013 '44', 3014 '54', 3015 '54', 3016 '54', 3017 ], 3018 'צש' => [ 3019 '0', 3020 '44', 3021 '44', 3022 '44', 3023 '4', 3024 '4', 3025 '4', 3026 '5', 3027 '5', 3028 '5', 3029 ], 3030 'צ' => [ 3031 '0', 3032 '4', 3033 '4', 3034 '4', 3035 '5', 3036 '5', 3037 '5', 3038 ], 3039 'ץ' => [ 3040 '0', 3041 '', 3042 '4', 3043 '4', 3044 ], 3045 'קה' => [ 3046 '0', 3047 '55', 3048 '55', 3049 '5', 3050 ], 3051 'קס' => [ 3052 '0', 3053 '5', 3054 '54', 3055 '54', 3056 ], 3057 'קש' => [ 3058 '0', 3059 '5', 3060 '54', 3061 '54', 3062 ], 3063 'קק' => [ 3064 '0', 3065 '5', 3066 '5', 3067 '5', 3068 '55', 3069 '55', 3070 '55', 3071 ], 3072 'קח' => [ 3073 '0', 3074 '55', 3075 '55', 3076 '55', 3077 ], 3078 'קכ' => [ 3079 '0', 3080 '55', 3081 '55', 3082 '55', 3083 ], 3084 'קך' => [ 3085 '0', 3086 '55', 3087 '55', 3088 '55', 3089 ], 3090 'קג' => [ 3091 '0', 3092 '55', 3093 '55', 3094 '55', 3095 '54', 3096 '54', 3097 '54', 3098 ], 3099 'ק' => [ 3100 '0', 3101 '5', 3102 '5', 3103 '5', 3104 ], 3105 'רר' => [ 3106 '0', 3107 '99', 3108 '99', 3109 '99', 3110 '9', 3111 '9', 3112 '9', 3113 ], 3114 'ר' => [ 3115 '0', 3116 '9', 3117 '9', 3118 '9', 3119 ], 3120 'שטז' => [ 3121 '0', 3122 '2', 3123 '4', 3124 '4', 3125 ], 3126 'שתש' => [ 3127 '0', 3128 '2', 3129 '4', 3130 '4', 3131 ], 3132 'שתז' => [ 3133 '0', 3134 '2', 3135 '4', 3136 '4', 3137 ], 3138 'שטש' => [ 3139 '0', 3140 '2', 3141 '4', 3142 '4', 3143 ], 3144 'שד' => [ 3145 '0', 3146 '2', 3147 '43', 3148 '43', 3149 ], 3150 'שז' => [ 3151 '0', 3152 '44', 3153 '44', 3154 '44', 3155 ], 3156 'שס' => [ 3157 '0', 3158 '44', 3159 '44', 3160 '44', 3161 ], 3162 'שת' => [ 3163 '0', 3164 '2', 3165 '43', 3166 '43', 3167 ], 3168 'שג' => [ 3169 '0', 3170 '4', 3171 '4', 3172 '4', 3173 '44', 3174 '44', 3175 '44', 3176 '4', 3177 '43', 3178 '43', 3179 ], 3180 'שט' => [ 3181 '0', 3182 '2', 3183 '43', 3184 '43', 3185 '44', 3186 '44', 3187 '44', 3188 ], 3189 'שצ' => [ 3190 '0', 3191 '44', 3192 '44', 3193 '44', 3194 '45', 3195 '45', 3196 '45', 3197 ], 3198 'שץ' => [ 3199 '0', 3200 '44', 3201 '', 3202 '44', 3203 '45', 3204 '', 3205 '45', 3206 ], 3207 'שש' => [ 3208 '0', 3209 '4', 3210 '4', 3211 '4', 3212 '44', 3213 '44', 3214 '44', 3215 ], 3216 'ש' => [ 3217 '0', 3218 '4', 3219 '4', 3220 '4', 3221 ], 3222 'תג' => [ 3223 '0', 3224 '34', 3225 '34', 3226 '34', 3227 ], 3228 'תז' => [ 3229 '0', 3230 '34', 3231 '34', 3232 '34', 3233 ], 3234 'תש' => [ 3235 '0', 3236 '4', 3237 '4', 3238 '4', 3239 ], 3240 'תת' => [ 3241 '0', 3242 '3', 3243 '3', 3244 '3', 3245 '4', 3246 '4', 3247 '4', 3248 '33', 3249 '33', 3250 '33', 3251 '44', 3252 '44', 3253 '44', 3254 '34', 3255 '34', 3256 '34', 3257 '43', 3258 '43', 3259 '43', 3260 ], 3261 'ת' => [ 3262 '0', 3263 '3', 3264 '3', 3265 '3', 3266 '4', 3267 '4', 3268 '4', 3269 ], 3270 // Arabic alphabet 3271 'ا' => [ 3272 '1', 3273 '0', 3274 '', 3275 '', 3276 ], 3277 'ب' => [ 3278 '0', 3279 '7', 3280 '7', 3281 '7', 3282 ], 3283 'ت' => [ 3284 '0', 3285 '3', 3286 '3', 3287 '3', 3288 ], 3289 'ث' => [ 3290 '0', 3291 '3', 3292 '3', 3293 '3', 3294 ], 3295 'ج' => [ 3296 '0', 3297 '4', 3298 '4', 3299 '4', 3300 ], 3301 'ح' => [ 3302 '0', 3303 '5', 3304 '5', 3305 '5', 3306 ], 3307 'خ' => [ 3308 '0', 3309 '5', 3310 '5', 3311 '5', 3312 ], 3313 'د' => [ 3314 '0', 3315 '3', 3316 '3', 3317 '3', 3318 ], 3319 'ذ' => [ 3320 '0', 3321 '3', 3322 '3', 3323 '3', 3324 ], 3325 'ر' => [ 3326 '0', 3327 '9', 3328 '9', 3329 '9', 3330 ], 3331 'ز' => [ 3332 '0', 3333 '4', 3334 '4', 3335 '4', 3336 ], 3337 'س' => [ 3338 '0', 3339 '4', 3340 '4', 3341 '4', 3342 ], 3343 'ش' => [ 3344 '0', 3345 '4', 3346 '4', 3347 '4', 3348 ], 3349 'ص' => [ 3350 '0', 3351 '4', 3352 '4', 3353 '4', 3354 ], 3355 'ض' => [ 3356 '0', 3357 '3', 3358 '3', 3359 '3', 3360 ], 3361 'ط' => [ 3362 '0', 3363 '3', 3364 '3', 3365 '3', 3366 ], 3367 'ظ' => [ 3368 '0', 3369 '4', 3370 '4', 3371 '4', 3372 ], 3373 'ع' => [ 3374 '1', 3375 '0', 3376 '', 3377 '', 3378 ], 3379 'غ' => [ 3380 '0', 3381 '0', 3382 '', 3383 '', 3384 ], 3385 'ف' => [ 3386 '0', 3387 '7', 3388 '7', 3389 '7', 3390 ], 3391 'ق' => [ 3392 '0', 3393 '5', 3394 '5', 3395 '5', 3396 ], 3397 'ك' => [ 3398 '0', 3399 '5', 3400 '5', 3401 '5', 3402 ], 3403 'ل' => [ 3404 '0', 3405 '8', 3406 '8', 3407 '8', 3408 ], 3409 'لا' => [ 3410 '0', 3411 '8', 3412 '8', 3413 '8', 3414 ], 3415 'م' => [ 3416 '0', 3417 '6', 3418 '6', 3419 '6', 3420 ], 3421 'ن' => [ 3422 '0', 3423 '6', 3424 '6', 3425 '6', 3426 ], 3427 'هن' => [ 3428 '0', 3429 '66', 3430 '66', 3431 '66', 3432 ], 3433 'ه' => [ 3434 '0', 3435 '5', 3436 '5', 3437 '', 3438 ], 3439 'و' => [ 3440 '1', 3441 '', 3442 '', 3443 '', 3444 '7', 3445 '', 3446 '', 3447 ], 3448 'ي' => [ 3449 '0', 3450 '1', 3451 '', 3452 '', 3453 ], 3454 'آ' => [ 3455 '0', 3456 '1', 3457 '', 3458 '', 3459 ], 3460 'ة' => [ 3461 '0', 3462 '', 3463 '', 3464 '3', 3465 ], 3466 'ی' => [ 3467 '0', 3468 '1', 3469 '', 3470 '', 3471 ], 3472 'ى' => [ 3473 '1', 3474 '1', 3475 '', 3476 '', 3477 ], 3478 ]; 3479 3480 /** 3481 * Which algorithms are supported. 3482 * 3483 * @return string[] 3484 */ 3485 public static function getAlgorithms(): array 3486 { 3487 return [ 3488 /* I18N: http://en.wikipedia.org/wiki/Soundex */ 3489 'std' => I18N::translate('Russell'), 3490 /* I18N: http://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */ 3491 'dm' => I18N::translate('Daitch-Mokotoff'), 3492 ]; 3493 } 3494 3495 /** 3496 * Is there a match between two soundex codes? 3497 * 3498 * @param string $soundex1 3499 * @param string $soundex2 3500 * 3501 * @return bool 3502 */ 3503 public static function compare($soundex1, $soundex2): bool 3504 { 3505 if ($soundex1 !== '' && $soundex2 !== '') { 3506 return !empty(array_intersect(explode(':', $soundex1), explode(':', $soundex2))); 3507 } 3508 3509 return false; 3510 } 3511 3512 /** 3513 * Generate Russell soundex codes for a given text. 3514 * 3515 * @param string $text 3516 * 3517 * @return string 3518 */ 3519 public static function russell(string $text): string 3520 { 3521 $words = explode(' ', $text); 3522 $soundex_array = []; 3523 3524 foreach ($words as $word) { 3525 $soundex = soundex($word); 3526 3527 // Only return codes from recognisable sounds 3528 if ($soundex !== '0000') { 3529 $soundex_array[] = $soundex; 3530 } 3531 } 3532 3533 // Combine words, e.g. “New York” as “Newyork” 3534 if (count($words) > 1) { 3535 $soundex_array[] = soundex(str_replace(' ', '', $text)); 3536 } 3537 3538 // A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters) 3539 $soundex_array = array_slice(array_unique($soundex_array), 0, 51); 3540 3541 return implode(':', $soundex_array); 3542 } 3543 3544 /** 3545 * Generate Daitch–Mokotoff soundex codes for a given text. 3546 * 3547 * @param string $text 3548 * 3549 * @return string 3550 */ 3551 public static function daitchMokotoff(string $text): string 3552 { 3553 $words = explode(' ', $text); 3554 $soundex_array = []; 3555 3556 foreach ($words as $word) { 3557 $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word)); 3558 } 3559 // Combine words, e.g. “New York” as “Newyork” 3560 if (count($words) > 1) { 3561 $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(str_replace(' ', '', $text))); 3562 } 3563 3564 // A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters) 3565 $soundex_array = array_slice(array_unique($soundex_array), 0, 36); 3566 3567 return implode(':', $soundex_array); 3568 } 3569 3570 /** 3571 * Calculate the Daitch-Mokotoff soundex for a word. 3572 * 3573 * @param string $name 3574 * 3575 * @return string[] List of possible DM codes for the word. 3576 */ 3577 private static function daitchMokotoffWord($name): array 3578 { 3579 // Apply special transformation rules to the input string 3580 $name = I18N::strtoupper($name); 3581 foreach (self::TRANSFORM_NAMES as $transformRule) { 3582 $name = str_replace($transformRule[0], $transformRule[1], $name); 3583 } 3584 3585 // Initialize 3586 $name_script = I18N::textScript($name); 3587 $noVowels = ($name_script === 'Hebr' || $name_script === 'Arab'); 3588 3589 $lastPos = strlen($name) - 1; 3590 $currPos = 0; 3591 $state = 1; // 1: start of input string, 2: before vowel, 3: other 3592 $result = []; // accumulate complete 6-digit D-M codes here 3593 $partialResult = []; // accumulate incomplete D-M codes here 3594 $partialResult[] = ['!']; // initialize 1st partial result ('!' stops "duplicate sound" check) 3595 3596 // Loop through the input string. 3597 // Stop when the string is exhausted or when no more partial results remain 3598 while (count($partialResult) !== 0 && $currPos <= $lastPos) { 3599 // Find the DM coding table entry for the chunk at the current position 3600 $thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 3601 while ($thisEntry !== '') { 3602 if (isset(self::DM_SOUNDS[$thisEntry])) { 3603 break; 3604 } 3605 $thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk 3606 } 3607 if ($thisEntry === '') { 3608 $currPos++; // Not in table: advance pointer to next byte 3609 continue; // and try again 3610 } 3611 3612 $soundTableEntry = self::DM_SOUNDS[$thisEntry]; 3613 $workingResult = $partialResult; 3614 $partialResult = []; 3615 $currPos += strlen($thisEntry); 3616 3617 // Not at beginning of input string 3618 if ($state !== 1) { 3619 if ($currPos <= $lastPos) { 3620 // Determine whether the next chunk is a vowel 3621 $nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 3622 while ($nextEntry !== '') { 3623 if (isset(self::DM_SOUNDS[$nextEntry])) { 3624 break; 3625 } 3626 $nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk 3627 } 3628 } else { 3629 $nextEntry = ''; 3630 } 3631 if ($nextEntry !== '' && self::DM_SOUNDS[$nextEntry][0] !== '0') { 3632 $state = 2; 3633 } else { 3634 // Next chunk is a vowel 3635 $state = 3; 3636 } 3637 } 3638 3639 while ($state < count($soundTableEntry)) { 3640 // empty means 'ignore this sound in this state' 3641 if ($soundTableEntry[$state] === '') { 3642 foreach ($workingResult as $workingEntry) { 3643 $tempEntry = $workingEntry; 3644 $tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles' 3645 $partialResult[] = $tempEntry; 3646 } 3647 } else { 3648 foreach ($workingResult as $workingEntry) { 3649 if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) { 3650 // Incoming sound isn't a duplicate of the previous sound 3651 $workingEntry[] = $soundTableEntry[$state]; 3652 } elseif ($noVowels) { 3653 // Incoming sound is a duplicate of the previous sound 3654 // For Hebrew and Arabic, we need to create a pair of D-M sound codes, 3655 // one of the pair with only a single occurrence of the duplicate sound, 3656 // the other with both occurrences 3657 $workingEntry[] = $soundTableEntry[$state]; 3658 } 3659 3660 if (count($workingEntry) < 7) { 3661 $partialResult[] = $workingEntry; 3662 } else { 3663 // This is the 6th code in the sequence 3664 // We're looking for 7 entries because the first is '!' and doesn't count 3665 $tempResult = str_replace('!', '', implode('', $workingEntry)); 3666 // Only return codes from recognisable sounds 3667 if ($tempResult) { 3668 $result[] = substr($tempResult . '000000', 0, 6); 3669 } 3670 } 3671 } 3672 } 3673 $state += 3; // Advance to next triplet while keeping the same basic state 3674 } 3675 } 3676 3677 // Zero-fill and copy all remaining partial results 3678 foreach ($partialResult as $workingEntry) { 3679 $tempResult = str_replace('!', '', implode('', $workingEntry)); 3680 // Only return codes from recognisable sounds 3681 if ($tempResult) { 3682 $result[] = substr($tempResult . '000000', 0, 6); 3683 } 3684 } 3685 3686 return $result; 3687 } 3688} 3689