1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2021 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees; 21 22/** 23 * Phonetic matching of strings. 24 */ 25class Soundex 26{ 27 // Determine the Daitch–Mokotoff Soundex code for a word 28 // Original implementation by Gerry Kroll, and analysis by Meliza Amity 29 30 // Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!) 31 private const MAXCHAR = 7; 32 33 /** 34 * Name transformation arrays. 35 * Used to transform the Name string to simplify the "sounds like" table. 36 * This is especially useful in Hebrew. 37 * 38 * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text) 39 * function call to achieve the desired transformations. 40 * 41 * Note about the use of "\x01": 42 * This code, which can’t legitimately occur in the kind of text we're dealing with, 43 * is used as a place-holder so that conditional string replacements can be done. 44 */ 45 private const TRANSFORM_NAMES = [ 46 // Force Yiddish ligatures to be treated as separate letters 47 [ 48 'װ', 49 'וו', 50 ], 51 [ 52 'ײ', 53 'יי', 54 ], 55 [ 56 'ױ', 57 'וי', 58 ], 59 [ 60 'בו', 61 'בע', 62 ], 63 [ 64 'פו', 65 'פע', 66 ], 67 [ 68 'ומ', 69 'עמ', 70 ], 71 [ 72 'ום', 73 'עם', 74 ], 75 [ 76 'ונ', 77 'ענ', 78 ], 79 [ 80 'ון', 81 'ען', 82 ], 83 [ 84 'וו', 85 'ב', 86 ], 87 [ 88 "\x01", 89 '', 90 ], 91 [ 92 'ייה$', 93 "\x01ה", 94 ], 95 [ 96 'ייע$', 97 "\x01ע", 98 ], 99 [ 100 'יי', 101 'ע', 102 ], 103 [ 104 "\x01", 105 'יי', 106 ], 107 ]; 108 109 /** 110 * The DM sound coding table is organized this way: 111 * key: a variable-length string that corresponds to the UTF-8 character sequence 112 * represented by the table entry. Currently, that string can be up to 7 113 * bytes long. This maximum length is defined by the value of global variable 114 * $maxchar. 115 * 116 * value: an array as follows: 117 * [0]: zero if not a vowel 118 * [1]: sound value when this string is at the beginning of the word 119 * [2]: sound value when this string is followed by a vowel 120 * [3]: sound value for other cases 121 * [1],[2],[3] can be repeated several times to create branches in the code 122 * an empty sound value means "ignore in this state" 123 */ 124 private const DM_SOUNDS = [ 125 'A' => [ 126 '1', 127 '0', 128 '', 129 '', 130 ], 131 'À' => [ 132 '1', 133 '0', 134 '', 135 '', 136 ], 137 'Á' => [ 138 '1', 139 '0', 140 '', 141 '', 142 ], 143 'Â' => [ 144 '1', 145 '0', 146 '', 147 '', 148 ], 149 'Ã' => [ 150 '1', 151 '0', 152 '', 153 '', 154 ], 155 'Ä' => [ 156 '1', 157 '0', 158 '1', 159 '', 160 '0', 161 '', 162 '', 163 ], 164 'Å' => [ 165 '1', 166 '0', 167 '', 168 '', 169 ], 170 'Ă' => [ 171 '1', 172 '0', 173 '', 174 '', 175 ], 176 'Ą' => [ 177 '1', 178 '', 179 '', 180 '', 181 '', 182 '', 183 '6', 184 ], 185 'Ạ' => [ 186 '1', 187 '0', 188 '', 189 '', 190 ], 191 'Ả' => [ 192 '1', 193 '0', 194 '', 195 '', 196 ], 197 'Ấ' => [ 198 '1', 199 '0', 200 '', 201 '', 202 ], 203 'Ầ' => [ 204 '1', 205 '0', 206 '', 207 '', 208 ], 209 'Ẩ' => [ 210 '1', 211 '0', 212 '', 213 '', 214 ], 215 'Ẫ' => [ 216 '1', 217 '0', 218 '', 219 '', 220 ], 221 'Ậ' => [ 222 '1', 223 '0', 224 '', 225 '', 226 ], 227 'Ắ' => [ 228 '1', 229 '0', 230 '', 231 '', 232 ], 233 'Ằ' => [ 234 '1', 235 '0', 236 '', 237 '', 238 ], 239 'Ẳ' => [ 240 '1', 241 '0', 242 '', 243 '', 244 ], 245 'Ẵ' => [ 246 '1', 247 '0', 248 '', 249 '', 250 ], 251 'Ặ' => [ 252 '1', 253 '0', 254 '', 255 '', 256 ], 257 'AE' => [ 258 '1', 259 '0', 260 '1', 261 '', 262 ], 263 'Æ' => [ 264 '1', 265 '0', 266 '1', 267 '', 268 ], 269 'AI' => [ 270 '1', 271 '0', 272 '1', 273 '', 274 ], 275 'AJ' => [ 276 '1', 277 '0', 278 '1', 279 '', 280 ], 281 'AU' => [ 282 '1', 283 '0', 284 '7', 285 '', 286 ], 287 'AV' => [ 288 '1', 289 '0', 290 '7', 291 '', 292 '7', 293 '7', 294 '7', 295 ], 296 'ÄU' => [ 297 '1', 298 '0', 299 '1', 300 '', 301 ], 302 'AY' => [ 303 '1', 304 '0', 305 '1', 306 '', 307 ], 308 'B' => [ 309 '0', 310 '7', 311 '7', 312 '7', 313 ], 314 'C' => [ 315 '0', 316 '5', 317 '5', 318 '5', 319 '34', 320 '4', 321 '4', 322 ], 323 'Ć' => [ 324 '0', 325 '4', 326 '4', 327 '4', 328 ], 329 'Č' => [ 330 '0', 331 '4', 332 '4', 333 '4', 334 ], 335 'Ç' => [ 336 '0', 337 '4', 338 '4', 339 '4', 340 ], 341 'CH' => [ 342 '0', 343 '5', 344 '5', 345 '5', 346 '34', 347 '4', 348 '4', 349 ], 350 'CHS' => [ 351 '0', 352 '5', 353 '54', 354 '54', 355 ], 356 'CK' => [ 357 '0', 358 '5', 359 '5', 360 '5', 361 '45', 362 '45', 363 '45', 364 ], 365 'CCS' => [ 366 '0', 367 '4', 368 '4', 369 '4', 370 ], 371 'CS' => [ 372 '0', 373 '4', 374 '4', 375 '4', 376 ], 377 'CSZ' => [ 378 '0', 379 '4', 380 '4', 381 '4', 382 ], 383 'CZ' => [ 384 '0', 385 '4', 386 '4', 387 '4', 388 ], 389 'CZS' => [ 390 '0', 391 '4', 392 '4', 393 '4', 394 ], 395 'D' => [ 396 '0', 397 '3', 398 '3', 399 '3', 400 ], 401 'Ď' => [ 402 '0', 403 '3', 404 '3', 405 '3', 406 ], 407 'Đ' => [ 408 '0', 409 '3', 410 '3', 411 '3', 412 ], 413 'DRS' => [ 414 '0', 415 '4', 416 '4', 417 '4', 418 ], 419 'DRZ' => [ 420 '0', 421 '4', 422 '4', 423 '4', 424 ], 425 'DS' => [ 426 '0', 427 '4', 428 '4', 429 '4', 430 ], 431 'DSH' => [ 432 '0', 433 '4', 434 '4', 435 '4', 436 ], 437 'DSZ' => [ 438 '0', 439 '4', 440 '4', 441 '4', 442 ], 443 'DT' => [ 444 '0', 445 '3', 446 '3', 447 '3', 448 ], 449 'DDZ' => [ 450 '0', 451 '4', 452 '4', 453 '4', 454 ], 455 'DDZS' => [ 456 '0', 457 '4', 458 '4', 459 '4', 460 ], 461 'DZ' => [ 462 '0', 463 '4', 464 '4', 465 '4', 466 ], 467 'DŹ' => [ 468 '0', 469 '4', 470 '4', 471 '4', 472 ], 473 'DŻ' => [ 474 '0', 475 '4', 476 '4', 477 '4', 478 ], 479 'DZH' => [ 480 '0', 481 '4', 482 '4', 483 '4', 484 ], 485 'DZS' => [ 486 '0', 487 '4', 488 '4', 489 '4', 490 ], 491 'E' => [ 492 '1', 493 '0', 494 '', 495 '', 496 ], 497 'È' => [ 498 '1', 499 '0', 500 '', 501 '', 502 ], 503 'É' => [ 504 '1', 505 '0', 506 '', 507 '', 508 ], 509 'Ê' => [ 510 '1', 511 '0', 512 '', 513 '', 514 ], 515 'Ë' => [ 516 '1', 517 '0', 518 '', 519 '', 520 ], 521 'Ĕ' => [ 522 '1', 523 '0', 524 '', 525 '', 526 ], 527 'Ė' => [ 528 '1', 529 '0', 530 '', 531 '', 532 ], 533 'Ę' => [ 534 '1', 535 '', 536 '', 537 '6', 538 '', 539 '', 540 '', 541 ], 542 'Ẹ' => [ 543 '1', 544 '0', 545 '', 546 '', 547 ], 548 'Ẻ' => [ 549 '1', 550 '0', 551 '', 552 '', 553 ], 554 'Ẽ' => [ 555 '1', 556 '0', 557 '', 558 '', 559 ], 560 'Ế' => [ 561 '1', 562 '0', 563 '', 564 '', 565 ], 566 'Ề' => [ 567 '1', 568 '0', 569 '', 570 '', 571 ], 572 'Ể' => [ 573 '1', 574 '0', 575 '', 576 '', 577 ], 578 'Ễ' => [ 579 '1', 580 '0', 581 '', 582 '', 583 ], 584 'Ệ' => [ 585 '1', 586 '0', 587 '', 588 '', 589 ], 590 'EAU' => [ 591 '1', 592 '0', 593 '', 594 '', 595 ], 596 'EI' => [ 597 '1', 598 '0', 599 '1', 600 '', 601 ], 602 'EJ' => [ 603 '1', 604 '0', 605 '1', 606 '', 607 ], 608 'EU' => [ 609 '1', 610 '1', 611 '1', 612 '', 613 ], 614 'EY' => [ 615 '1', 616 '0', 617 '1', 618 '', 619 ], 620 'F' => [ 621 '0', 622 '7', 623 '7', 624 '7', 625 ], 626 'FB' => [ 627 '0', 628 '7', 629 '7', 630 '7', 631 ], 632 'G' => [ 633 '0', 634 '5', 635 '5', 636 '5', 637 '34', 638 '4', 639 '4', 640 ], 641 'Ğ' => [ 642 '0', 643 '', 644 '', 645 '', 646 ], 647 'GGY' => [ 648 '0', 649 '5', 650 '5', 651 '5', 652 ], 653 'GY' => [ 654 '0', 655 '5', 656 '5', 657 '5', 658 ], 659 'H' => [ 660 '0', 661 '5', 662 '5', 663 '', 664 '5', 665 '5', 666 '5', 667 ], 668 'I' => [ 669 '1', 670 '0', 671 '', 672 '', 673 ], 674 'Ì' => [ 675 '1', 676 '0', 677 '', 678 '', 679 ], 680 'Í' => [ 681 '1', 682 '0', 683 '', 684 '', 685 ], 686 'Î' => [ 687 '1', 688 '0', 689 '', 690 '', 691 ], 692 'Ï' => [ 693 '1', 694 '0', 695 '', 696 '', 697 ], 698 'Ĩ' => [ 699 '1', 700 '0', 701 '', 702 '', 703 ], 704 'Į' => [ 705 '1', 706 '0', 707 '', 708 '', 709 ], 710 'İ' => [ 711 '1', 712 '0', 713 '', 714 '', 715 ], 716 'Ỉ' => [ 717 '1', 718 '0', 719 '', 720 '', 721 ], 722 'Ị' => [ 723 '1', 724 '0', 725 '', 726 '', 727 ], 728 'IA' => [ 729 '1', 730 '1', 731 '', 732 '', 733 ], 734 'IE' => [ 735 '1', 736 '1', 737 '', 738 '', 739 ], 740 'IO' => [ 741 '1', 742 '1', 743 '', 744 '', 745 ], 746 'IU' => [ 747 '1', 748 '1', 749 '', 750 '', 751 ], 752 'J' => [ 753 '0', 754 '1', 755 '', 756 '', 757 '4', 758 '4', 759 '4', 760 '5', 761 '5', 762 '', 763 ], 764 'K' => [ 765 '0', 766 '5', 767 '5', 768 '5', 769 ], 770 'KH' => [ 771 '0', 772 '5', 773 '5', 774 '5', 775 ], 776 'KS' => [ 777 '0', 778 '5', 779 '54', 780 '54', 781 ], 782 'L' => [ 783 '0', 784 '8', 785 '8', 786 '8', 787 ], 788 'Ľ' => [ 789 '0', 790 '8', 791 '8', 792 '8', 793 ], 794 'Ĺ' => [ 795 '0', 796 '8', 797 '8', 798 '8', 799 ], 800 'Ł' => [ 801 '0', 802 '7', 803 '7', 804 '7', 805 '8', 806 '8', 807 '8', 808 ], 809 'LL' => [ 810 '0', 811 '8', 812 '8', 813 '8', 814 '58', 815 '8', 816 '8', 817 '1', 818 '8', 819 '8', 820 ], 821 'LLY' => [ 822 '0', 823 '8', 824 '8', 825 '8', 826 '1', 827 '8', 828 '8', 829 ], 830 'LY' => [ 831 '0', 832 '8', 833 '8', 834 '8', 835 '1', 836 '8', 837 '8', 838 ], 839 'M' => [ 840 '0', 841 '6', 842 '6', 843 '6', 844 ], 845 'MĔ' => [ 846 '0', 847 '66', 848 '66', 849 '66', 850 ], 851 'MN' => [ 852 '0', 853 '66', 854 '66', 855 '66', 856 ], 857 'N' => [ 858 '0', 859 '6', 860 '6', 861 '6', 862 ], 863 'Ń' => [ 864 '0', 865 '6', 866 '6', 867 '6', 868 ], 869 'Ň' => [ 870 '0', 871 '6', 872 '6', 873 '6', 874 ], 875 'Ñ' => [ 876 '0', 877 '6', 878 '6', 879 '6', 880 ], 881 'NM' => [ 882 '0', 883 '66', 884 '66', 885 '66', 886 ], 887 'O' => [ 888 '1', 889 '0', 890 '', 891 '', 892 ], 893 'Ò' => [ 894 '1', 895 '0', 896 '', 897 '', 898 ], 899 'Ó' => [ 900 '1', 901 '0', 902 '', 903 '', 904 ], 905 'Ô' => [ 906 '1', 907 '0', 908 '', 909 '', 910 ], 911 'Õ' => [ 912 '1', 913 '0', 914 '', 915 '', 916 ], 917 'Ö' => [ 918 '1', 919 '0', 920 '', 921 '', 922 ], 923 'Ø' => [ 924 '1', 925 '0', 926 '', 927 '', 928 ], 929 'Ő' => [ 930 '1', 931 '0', 932 '', 933 '', 934 ], 935 'Œ' => [ 936 '1', 937 '0', 938 '', 939 '', 940 ], 941 'Ơ' => [ 942 '1', 943 '0', 944 '', 945 '', 946 ], 947 'Ọ' => [ 948 '1', 949 '0', 950 '', 951 '', 952 ], 953 'Ỏ' => [ 954 '1', 955 '0', 956 '', 957 '', 958 ], 959 'Ố' => [ 960 '1', 961 '0', 962 '', 963 '', 964 ], 965 'Ồ' => [ 966 '1', 967 '0', 968 '', 969 '', 970 ], 971 'Ổ' => [ 972 '1', 973 '0', 974 '', 975 '', 976 ], 977 'Ỗ' => [ 978 '1', 979 '0', 980 '', 981 '', 982 ], 983 'Ộ' => [ 984 '1', 985 '0', 986 '', 987 '', 988 ], 989 'Ớ' => [ 990 '1', 991 '0', 992 '', 993 '', 994 ], 995 'Ờ' => [ 996 '1', 997 '0', 998 '', 999 '', 1000 ], 1001 'Ở' => [ 1002 '1', 1003 '0', 1004 '', 1005 '', 1006 ], 1007 'Ỡ' => [ 1008 '1', 1009 '0', 1010 '', 1011 '', 1012 ], 1013 'Ợ' => [ 1014 '1', 1015 '0', 1016 '', 1017 '', 1018 ], 1019 'OE' => [ 1020 '1', 1021 '0', 1022 '', 1023 '', 1024 ], 1025 'OI' => [ 1026 '1', 1027 '0', 1028 '1', 1029 '', 1030 ], 1031 'OJ' => [ 1032 '1', 1033 '0', 1034 '1', 1035 '', 1036 ], 1037 'OU' => [ 1038 '1', 1039 '0', 1040 '', 1041 '', 1042 ], 1043 'OY' => [ 1044 '1', 1045 '0', 1046 '1', 1047 '', 1048 ], 1049 'P' => [ 1050 '0', 1051 '7', 1052 '7', 1053 '7', 1054 ], 1055 'PF' => [ 1056 '0', 1057 '7', 1058 '7', 1059 '7', 1060 ], 1061 'PH' => [ 1062 '0', 1063 '7', 1064 '7', 1065 '7', 1066 ], 1067 'Q' => [ 1068 '0', 1069 '5', 1070 '5', 1071 '5', 1072 ], 1073 'R' => [ 1074 '0', 1075 '9', 1076 '9', 1077 '9', 1078 ], 1079 'Ř' => [ 1080 '0', 1081 '4', 1082 '4', 1083 '4', 1084 ], 1085 'RS' => [ 1086 '0', 1087 '4', 1088 '4', 1089 '4', 1090 '94', 1091 '94', 1092 '94', 1093 ], 1094 'RZ' => [ 1095 '0', 1096 '4', 1097 '4', 1098 '4', 1099 '94', 1100 '94', 1101 '94', 1102 ], 1103 'S' => [ 1104 '0', 1105 '4', 1106 '4', 1107 '4', 1108 ], 1109 'Ś' => [ 1110 '0', 1111 '4', 1112 '4', 1113 '4', 1114 ], 1115 'Š' => [ 1116 '0', 1117 '4', 1118 '4', 1119 '4', 1120 ], 1121 'Ş' => [ 1122 '0', 1123 '4', 1124 '4', 1125 '4', 1126 ], 1127 'SC' => [ 1128 '0', 1129 '2', 1130 '4', 1131 '4', 1132 ], 1133 'ŠČ' => [ 1134 '0', 1135 '2', 1136 '4', 1137 '4', 1138 ], 1139 'SCH' => [ 1140 '0', 1141 '4', 1142 '4', 1143 '4', 1144 ], 1145 'SCHD' => [ 1146 '0', 1147 '2', 1148 '43', 1149 '43', 1150 ], 1151 'SCHT' => [ 1152 '0', 1153 '2', 1154 '43', 1155 '43', 1156 ], 1157 'SCHTCH' => [ 1158 '0', 1159 '2', 1160 '4', 1161 '4', 1162 ], 1163 'SCHTSCH' => [ 1164 '0', 1165 '2', 1166 '4', 1167 '4', 1168 ], 1169 'SCHTSH' => [ 1170 '0', 1171 '2', 1172 '4', 1173 '4', 1174 ], 1175 'SD' => [ 1176 '0', 1177 '2', 1178 '43', 1179 '43', 1180 ], 1181 'SH' => [ 1182 '0', 1183 '4', 1184 '4', 1185 '4', 1186 ], 1187 'SHCH' => [ 1188 '0', 1189 '2', 1190 '4', 1191 '4', 1192 ], 1193 'SHD' => [ 1194 '0', 1195 '2', 1196 '43', 1197 '43', 1198 ], 1199 'SHT' => [ 1200 '0', 1201 '2', 1202 '43', 1203 '43', 1204 ], 1205 'SHTCH' => [ 1206 '0', 1207 '2', 1208 '4', 1209 '4', 1210 ], 1211 'SHTSH' => [ 1212 '0', 1213 '2', 1214 '4', 1215 '4', 1216 ], 1217 'ß' => [ 1218 '0', 1219 '', 1220 '4', 1221 '4', 1222 ], 1223 'ST' => [ 1224 '0', 1225 '2', 1226 '43', 1227 '43', 1228 ], 1229 'STCH' => [ 1230 '0', 1231 '2', 1232 '4', 1233 '4', 1234 ], 1235 'STRS' => [ 1236 '0', 1237 '2', 1238 '4', 1239 '4', 1240 ], 1241 'STRZ' => [ 1242 '0', 1243 '2', 1244 '4', 1245 '4', 1246 ], 1247 'STSCH' => [ 1248 '0', 1249 '2', 1250 '4', 1251 '4', 1252 ], 1253 'STSH' => [ 1254 '0', 1255 '2', 1256 '4', 1257 '4', 1258 ], 1259 'SSZ' => [ 1260 '0', 1261 '4', 1262 '4', 1263 '4', 1264 ], 1265 'SZ' => [ 1266 '0', 1267 '4', 1268 '4', 1269 '4', 1270 ], 1271 'SZCS' => [ 1272 '0', 1273 '2', 1274 '4', 1275 '4', 1276 ], 1277 'SZCZ' => [ 1278 '0', 1279 '2', 1280 '4', 1281 '4', 1282 ], 1283 'SZD' => [ 1284 '0', 1285 '2', 1286 '43', 1287 '43', 1288 ], 1289 'SZT' => [ 1290 '0', 1291 '2', 1292 '43', 1293 '43', 1294 ], 1295 'T' => [ 1296 '0', 1297 '3', 1298 '3', 1299 '3', 1300 ], 1301 'Ť' => [ 1302 '0', 1303 '3', 1304 '3', 1305 '3', 1306 ], 1307 'Ţ' => [ 1308 '0', 1309 '3', 1310 '3', 1311 '3', 1312 '4', 1313 '4', 1314 '4', 1315 ], 1316 'TC' => [ 1317 '0', 1318 '4', 1319 '4', 1320 '4', 1321 ], 1322 'TCH' => [ 1323 '0', 1324 '4', 1325 '4', 1326 '4', 1327 ], 1328 'TH' => [ 1329 '0', 1330 '3', 1331 '3', 1332 '3', 1333 ], 1334 'TRS' => [ 1335 '0', 1336 '4', 1337 '4', 1338 '4', 1339 ], 1340 'TRZ' => [ 1341 '0', 1342 '4', 1343 '4', 1344 '4', 1345 ], 1346 'TS' => [ 1347 '0', 1348 '4', 1349 '4', 1350 '4', 1351 ], 1352 'TSCH' => [ 1353 '0', 1354 '4', 1355 '4', 1356 '4', 1357 ], 1358 'TSH' => [ 1359 '0', 1360 '4', 1361 '4', 1362 '4', 1363 ], 1364 'TSZ' => [ 1365 '0', 1366 '4', 1367 '4', 1368 '4', 1369 ], 1370 'TTCH' => [ 1371 '0', 1372 '4', 1373 '4', 1374 '4', 1375 ], 1376 'TTS' => [ 1377 '0', 1378 '4', 1379 '4', 1380 '4', 1381 ], 1382 'TTSCH' => [ 1383 '0', 1384 '4', 1385 '4', 1386 '4', 1387 ], 1388 'TTSZ' => [ 1389 '0', 1390 '4', 1391 '4', 1392 '4', 1393 ], 1394 'TTZ' => [ 1395 '0', 1396 '4', 1397 '4', 1398 '4', 1399 ], 1400 'TZ' => [ 1401 '0', 1402 '4', 1403 '4', 1404 '4', 1405 ], 1406 'TZS' => [ 1407 '0', 1408 '4', 1409 '4', 1410 '4', 1411 ], 1412 'U' => [ 1413 '1', 1414 '0', 1415 '', 1416 '', 1417 ], 1418 'Ù' => [ 1419 '1', 1420 '0', 1421 '', 1422 '', 1423 ], 1424 'Ú' => [ 1425 '1', 1426 '0', 1427 '', 1428 '', 1429 ], 1430 'Û' => [ 1431 '1', 1432 '0', 1433 '', 1434 '', 1435 ], 1436 'Ü' => [ 1437 '1', 1438 '0', 1439 '', 1440 '', 1441 ], 1442 'Ũ' => [ 1443 '1', 1444 '0', 1445 '', 1446 '', 1447 ], 1448 'Ū' => [ 1449 '1', 1450 '0', 1451 '', 1452 '', 1453 ], 1454 'Ů' => [ 1455 '1', 1456 '0', 1457 '', 1458 '', 1459 ], 1460 'Ű' => [ 1461 '1', 1462 '0', 1463 '', 1464 '', 1465 ], 1466 'Ų' => [ 1467 '1', 1468 '0', 1469 '', 1470 '', 1471 ], 1472 'Ư' => [ 1473 '1', 1474 '0', 1475 '', 1476 '', 1477 ], 1478 'Ụ' => [ 1479 '1', 1480 '0', 1481 '', 1482 '', 1483 ], 1484 'Ủ' => [ 1485 '1', 1486 '0', 1487 '', 1488 '', 1489 ], 1490 'Ứ' => [ 1491 '1', 1492 '0', 1493 '', 1494 '', 1495 ], 1496 'Ừ' => [ 1497 '1', 1498 '0', 1499 '', 1500 '', 1501 ], 1502 'Ử' => [ 1503 '1', 1504 '0', 1505 '', 1506 '', 1507 ], 1508 'Ữ' => [ 1509 '1', 1510 '0', 1511 '', 1512 '', 1513 ], 1514 'Ự' => [ 1515 '1', 1516 '0', 1517 '', 1518 '', 1519 ], 1520 'UE' => [ 1521 '1', 1522 '0', 1523 '', 1524 '', 1525 ], 1526 'UI' => [ 1527 '1', 1528 '0', 1529 '1', 1530 '', 1531 ], 1532 'UJ' => [ 1533 '1', 1534 '0', 1535 '1', 1536 '', 1537 ], 1538 'UY' => [ 1539 '1', 1540 '0', 1541 '1', 1542 '', 1543 ], 1544 'UW' => [ 1545 '1', 1546 '0', 1547 '1', 1548 '', 1549 '0', 1550 '7', 1551 '7', 1552 ], 1553 'V' => [ 1554 '0', 1555 '7', 1556 '7', 1557 '7', 1558 ], 1559 'W' => [ 1560 '0', 1561 '7', 1562 '7', 1563 '7', 1564 ], 1565 'X' => [ 1566 '0', 1567 '5', 1568 '54', 1569 '54', 1570 ], 1571 'Y' => [ 1572 '1', 1573 '1', 1574 '', 1575 '', 1576 ], 1577 'Ý' => [ 1578 '1', 1579 '1', 1580 '', 1581 '', 1582 ], 1583 'Ỳ' => [ 1584 '1', 1585 '1', 1586 '', 1587 '', 1588 ], 1589 'Ỵ' => [ 1590 '1', 1591 '1', 1592 '', 1593 '', 1594 ], 1595 'Ỷ' => [ 1596 '1', 1597 '1', 1598 '', 1599 '', 1600 ], 1601 'Ỹ' => [ 1602 '1', 1603 '1', 1604 '', 1605 '', 1606 ], 1607 'Z' => [ 1608 '0', 1609 '4', 1610 '4', 1611 '4', 1612 ], 1613 'Ź' => [ 1614 '0', 1615 '4', 1616 '4', 1617 '4', 1618 ], 1619 'Ż' => [ 1620 '0', 1621 '4', 1622 '4', 1623 '4', 1624 ], 1625 'Ž' => [ 1626 '0', 1627 '4', 1628 '4', 1629 '4', 1630 ], 1631 'ZD' => [ 1632 '0', 1633 '2', 1634 '43', 1635 '43', 1636 ], 1637 'ZDZ' => [ 1638 '0', 1639 '2', 1640 '4', 1641 '4', 1642 ], 1643 'ZDZH' => [ 1644 '0', 1645 '2', 1646 '4', 1647 '4', 1648 ], 1649 'ZH' => [ 1650 '0', 1651 '4', 1652 '4', 1653 '4', 1654 ], 1655 'ZHD' => [ 1656 '0', 1657 '2', 1658 '43', 1659 '43', 1660 ], 1661 'ZHDZH' => [ 1662 '0', 1663 '2', 1664 '4', 1665 '4', 1666 ], 1667 'ZS' => [ 1668 '0', 1669 '4', 1670 '4', 1671 '4', 1672 ], 1673 'ZSCH' => [ 1674 '0', 1675 '4', 1676 '4', 1677 '4', 1678 ], 1679 'ZSH' => [ 1680 '0', 1681 '4', 1682 '4', 1683 '4', 1684 ], 1685 'ZZS' => [ 1686 '0', 1687 '4', 1688 '4', 1689 '4', 1690 ], 1691 // Cyrillic alphabet 1692 'А' => [ 1693 '1', 1694 '0', 1695 '', 1696 '', 1697 ], 1698 'Б' => [ 1699 '0', 1700 '7', 1701 '7', 1702 '7', 1703 ], 1704 'В' => [ 1705 '0', 1706 '7', 1707 '7', 1708 '7', 1709 ], 1710 'Г' => [ 1711 '0', 1712 '5', 1713 '5', 1714 '5', 1715 ], 1716 'Д' => [ 1717 '0', 1718 '3', 1719 '3', 1720 '3', 1721 ], 1722 'ДЗ' => [ 1723 '0', 1724 '4', 1725 '4', 1726 '4', 1727 ], 1728 'Е' => [ 1729 '1', 1730 '0', 1731 '', 1732 '', 1733 ], 1734 'Ё' => [ 1735 '1', 1736 '0', 1737 '', 1738 '', 1739 ], 1740 'Ж' => [ 1741 '0', 1742 '4', 1743 '4', 1744 '4', 1745 ], 1746 'З' => [ 1747 '0', 1748 '4', 1749 '4', 1750 '4', 1751 ], 1752 'И' => [ 1753 '1', 1754 '0', 1755 '', 1756 '', 1757 ], 1758 'Й' => [ 1759 '1', 1760 '1', 1761 '', 1762 '', 1763 '4', 1764 '4', 1765 '4', 1766 ], 1767 'К' => [ 1768 '0', 1769 '5', 1770 '5', 1771 '5', 1772 ], 1773 'Л' => [ 1774 '0', 1775 '8', 1776 '8', 1777 '8', 1778 ], 1779 'М' => [ 1780 '0', 1781 '6', 1782 '6', 1783 '6', 1784 ], 1785 'Н' => [ 1786 '0', 1787 '6', 1788 '6', 1789 '6', 1790 ], 1791 'О' => [ 1792 '1', 1793 '0', 1794 '', 1795 '', 1796 ], 1797 'П' => [ 1798 '0', 1799 '7', 1800 '7', 1801 '7', 1802 ], 1803 'Р' => [ 1804 '0', 1805 '9', 1806 '9', 1807 '9', 1808 ], 1809 'РЖ' => [ 1810 '0', 1811 '4', 1812 '4', 1813 '4', 1814 ], 1815 'С' => [ 1816 '0', 1817 '4', 1818 '4', 1819 '4', 1820 ], 1821 'Т' => [ 1822 '0', 1823 '3', 1824 '3', 1825 '3', 1826 ], 1827 'У' => [ 1828 '1', 1829 '0', 1830 '', 1831 '', 1832 ], 1833 'Ф' => [ 1834 '0', 1835 '7', 1836 '7', 1837 '7', 1838 ], 1839 'Х' => [ 1840 '0', 1841 '5', 1842 '5', 1843 '5', 1844 ], 1845 'Ц' => [ 1846 '0', 1847 '4', 1848 '4', 1849 '4', 1850 ], 1851 'Ч' => [ 1852 '0', 1853 '4', 1854 '4', 1855 '4', 1856 ], 1857 'Ш' => [ 1858 '0', 1859 '4', 1860 '4', 1861 '4', 1862 ], 1863 'Щ' => [ 1864 '0', 1865 '2', 1866 '4', 1867 '4', 1868 ], 1869 'Ъ' => [ 1870 '0', 1871 '', 1872 '', 1873 '', 1874 ], 1875 'Ы' => [ 1876 '0', 1877 '1', 1878 '', 1879 '', 1880 ], 1881 'Ь' => [ 1882 '0', 1883 '', 1884 '', 1885 '', 1886 ], 1887 'Э' => [ 1888 '1', 1889 '0', 1890 '', 1891 '', 1892 ], 1893 'Ю' => [ 1894 '0', 1895 '1', 1896 '', 1897 '', 1898 ], 1899 'Я' => [ 1900 '0', 1901 '1', 1902 '', 1903 '', 1904 ], 1905 // Greek alphabet 1906 'Α' => [ 1907 '1', 1908 '0', 1909 '', 1910 '', 1911 ], 1912 'Ά' => [ 1913 '1', 1914 '0', 1915 '', 1916 '', 1917 ], 1918 'ΑΙ' => [ 1919 '1', 1920 '0', 1921 '1', 1922 '', 1923 ], 1924 'ΑΥ' => [ 1925 '1', 1926 '0', 1927 '1', 1928 '', 1929 ], 1930 'Β' => [ 1931 '0', 1932 '7', 1933 '7', 1934 '7', 1935 ], 1936 'Γ' => [ 1937 '0', 1938 '5', 1939 '5', 1940 '5', 1941 ], 1942 'Δ' => [ 1943 '0', 1944 '3', 1945 '3', 1946 '3', 1947 ], 1948 'Ε' => [ 1949 '1', 1950 '0', 1951 '', 1952 '', 1953 ], 1954 'Έ' => [ 1955 '1', 1956 '0', 1957 '', 1958 '', 1959 ], 1960 'ΕΙ' => [ 1961 '1', 1962 '0', 1963 '1', 1964 '', 1965 ], 1966 'ΕΥ' => [ 1967 '1', 1968 '1', 1969 '1', 1970 '', 1971 ], 1972 'Ζ' => [ 1973 '0', 1974 '4', 1975 '4', 1976 '4', 1977 ], 1978 'Η' => [ 1979 '1', 1980 '0', 1981 '', 1982 '', 1983 ], 1984 'Ή' => [ 1985 '1', 1986 '0', 1987 '', 1988 '', 1989 ], 1990 'Θ' => [ 1991 '0', 1992 '3', 1993 '3', 1994 '3', 1995 ], 1996 'Ι' => [ 1997 '1', 1998 '0', 1999 '', 2000 '', 2001 ], 2002 'Ί' => [ 2003 '1', 2004 '0', 2005 '', 2006 '', 2007 ], 2008 'Ϊ' => [ 2009 '1', 2010 '0', 2011 '', 2012 '', 2013 ], 2014 'ΐ' => [ 2015 '1', 2016 '0', 2017 '', 2018 '', 2019 ], 2020 'Κ' => [ 2021 '0', 2022 '5', 2023 '5', 2024 '5', 2025 ], 2026 'Λ' => [ 2027 '0', 2028 '8', 2029 '8', 2030 '8', 2031 ], 2032 'Μ' => [ 2033 '0', 2034 '6', 2035 '6', 2036 '6', 2037 ], 2038 'ΜΠ' => [ 2039 '0', 2040 '7', 2041 '7', 2042 '7', 2043 ], 2044 'Ν' => [ 2045 '0', 2046 '6', 2047 '6', 2048 '6', 2049 ], 2050 'ΝΤ' => [ 2051 '0', 2052 '3', 2053 '3', 2054 '3', 2055 ], 2056 'Ξ' => [ 2057 '0', 2058 '5', 2059 '54', 2060 '54', 2061 ], 2062 'Ο' => [ 2063 '1', 2064 '0', 2065 '', 2066 '', 2067 ], 2068 'Ό' => [ 2069 '1', 2070 '0', 2071 '', 2072 '', 2073 ], 2074 'ΟΙ' => [ 2075 '1', 2076 '0', 2077 '1', 2078 '', 2079 ], 2080 'ΟΥ' => [ 2081 '1', 2082 '0', 2083 '1', 2084 '', 2085 ], 2086 'Π' => [ 2087 '0', 2088 '7', 2089 '7', 2090 '7', 2091 ], 2092 'Ρ' => [ 2093 '0', 2094 '9', 2095 '9', 2096 '9', 2097 ], 2098 'Σ' => [ 2099 '0', 2100 '4', 2101 '4', 2102 '4', 2103 ], 2104 'ς' => [ 2105 '0', 2106 '', 2107 '', 2108 '4', 2109 ], 2110 'Τ' => [ 2111 '0', 2112 '3', 2113 '3', 2114 '3', 2115 ], 2116 'ΤΖ' => [ 2117 '0', 2118 '4', 2119 '4', 2120 '4', 2121 ], 2122 'ΤΣ' => [ 2123 '0', 2124 '4', 2125 '4', 2126 '4', 2127 ], 2128 'Υ' => [ 2129 '1', 2130 '1', 2131 '', 2132 '', 2133 ], 2134 'Ύ' => [ 2135 '1', 2136 '1', 2137 '', 2138 '', 2139 ], 2140 'Ϋ' => [ 2141 '1', 2142 '1', 2143 '', 2144 '', 2145 ], 2146 'ΰ' => [ 2147 '1', 2148 '1', 2149 '', 2150 '', 2151 ], 2152 'ΥΚ' => [ 2153 '1', 2154 '5', 2155 '5', 2156 '5', 2157 ], 2158 'ΥΥ' => [ 2159 '1', 2160 '65', 2161 '65', 2162 '65', 2163 ], 2164 'Φ' => [ 2165 '0', 2166 '7', 2167 '7', 2168 '7', 2169 ], 2170 'Χ' => [ 2171 '0', 2172 '5', 2173 '5', 2174 '5', 2175 ], 2176 'Ψ' => [ 2177 '0', 2178 '7', 2179 '7', 2180 '7', 2181 ], 2182 'Ω' => [ 2183 '1', 2184 '0', 2185 '', 2186 '', 2187 ], 2188 'Ώ' => [ 2189 '1', 2190 '0', 2191 '', 2192 '', 2193 ], 2194 // Hebrew alphabet 2195 'א' => [ 2196 '1', 2197 '0', 2198 '', 2199 '', 2200 ], 2201 'או' => [ 2202 '1', 2203 '0', 2204 '7', 2205 '', 2206 ], 2207 'אג' => [ 2208 '1', 2209 '4', 2210 '4', 2211 '4', 2212 '5', 2213 '5', 2214 '5', 2215 '34', 2216 '34', 2217 '34', 2218 ], 2219 'בב' => [ 2220 '0', 2221 '7', 2222 '7', 2223 '7', 2224 '77', 2225 '77', 2226 '77', 2227 ], 2228 'ב' => [ 2229 '0', 2230 '7', 2231 '7', 2232 '7', 2233 ], 2234 'גג' => [ 2235 '0', 2236 '4', 2237 '4', 2238 '4', 2239 '5', 2240 '5', 2241 '5', 2242 '45', 2243 '45', 2244 '45', 2245 '55', 2246 '55', 2247 '55', 2248 '54', 2249 '54', 2250 '54', 2251 ], 2252 'גד' => [ 2253 '0', 2254 '43', 2255 '43', 2256 '43', 2257 '53', 2258 '53', 2259 '53', 2260 ], 2261 'גה' => [ 2262 '0', 2263 '45', 2264 '45', 2265 '45', 2266 '55', 2267 '55', 2268 '55', 2269 ], 2270 'גז' => [ 2271 '0', 2272 '44', 2273 '44', 2274 '44', 2275 '45', 2276 '45', 2277 '45', 2278 ], 2279 'גח' => [ 2280 '0', 2281 '45', 2282 '45', 2283 '45', 2284 '55', 2285 '55', 2286 '55', 2287 ], 2288 'גכ' => [ 2289 '0', 2290 '45', 2291 '45', 2292 '45', 2293 '55', 2294 '55', 2295 '55', 2296 ], 2297 'גך' => [ 2298 '0', 2299 '45', 2300 '45', 2301 '45', 2302 '55', 2303 '55', 2304 '55', 2305 ], 2306 'גצ' => [ 2307 '0', 2308 '44', 2309 '44', 2310 '44', 2311 '45', 2312 '45', 2313 '45', 2314 ], 2315 'גץ' => [ 2316 '0', 2317 '44', 2318 '44', 2319 '44', 2320 '45', 2321 '45', 2322 '45', 2323 ], 2324 'גק' => [ 2325 '0', 2326 '45', 2327 '45', 2328 '45', 2329 '54', 2330 '54', 2331 '54', 2332 ], 2333 'גש' => [ 2334 '0', 2335 '44', 2336 '44', 2337 '44', 2338 '54', 2339 '54', 2340 '54', 2341 ], 2342 'גת' => [ 2343 '0', 2344 '43', 2345 '43', 2346 '43', 2347 '53', 2348 '53', 2349 '53', 2350 ], 2351 'ג' => [ 2352 '0', 2353 '4', 2354 '4', 2355 '4', 2356 '5', 2357 '5', 2358 '5', 2359 ], 2360 'דז' => [ 2361 '0', 2362 '4', 2363 '4', 2364 '4', 2365 ], 2366 'דד' => [ 2367 '0', 2368 '3', 2369 '3', 2370 '3', 2371 '33', 2372 '33', 2373 '33', 2374 ], 2375 'דט' => [ 2376 '0', 2377 '33', 2378 '33', 2379 '33', 2380 ], 2381 'דש' => [ 2382 '0', 2383 '4', 2384 '4', 2385 '4', 2386 ], 2387 'דצ' => [ 2388 '0', 2389 '4', 2390 '4', 2391 '4', 2392 ], 2393 'דץ' => [ 2394 '0', 2395 '4', 2396 '4', 2397 '4', 2398 ], 2399 'ד' => [ 2400 '0', 2401 '3', 2402 '3', 2403 '3', 2404 ], 2405 'הג' => [ 2406 '0', 2407 '54', 2408 '54', 2409 '54', 2410 '55', 2411 '55', 2412 '55', 2413 ], 2414 'הכ' => [ 2415 '0', 2416 '55', 2417 '55', 2418 '55', 2419 ], 2420 'הח' => [ 2421 '0', 2422 '55', 2423 '55', 2424 '55', 2425 ], 2426 'הק' => [ 2427 '0', 2428 '55', 2429 '55', 2430 '55', 2431 '5', 2432 '5', 2433 '5', 2434 ], 2435 'הה' => [ 2436 '0', 2437 '5', 2438 '5', 2439 '', 2440 '55', 2441 '55', 2442 '', 2443 ], 2444 'ה' => [ 2445 '0', 2446 '5', 2447 '5', 2448 '', 2449 ], 2450 'וי' => [ 2451 '1', 2452 '', 2453 '', 2454 '', 2455 '7', 2456 '7', 2457 '7', 2458 ], 2459 'ו' => [ 2460 '1', 2461 '7', 2462 '7', 2463 '7', 2464 '7', 2465 '', 2466 '', 2467 ], 2468 'וו' => [ 2469 '1', 2470 '7', 2471 '7', 2472 '7', 2473 '7', 2474 '', 2475 '', 2476 ], 2477 'וופ' => [ 2478 '1', 2479 '7', 2480 '7', 2481 '7', 2482 '77', 2483 '77', 2484 '77', 2485 ], 2486 'זש' => [ 2487 '0', 2488 '4', 2489 '4', 2490 '4', 2491 '44', 2492 '44', 2493 '44', 2494 ], 2495 'זדז' => [ 2496 '0', 2497 '2', 2498 '4', 2499 '4', 2500 ], 2501 'ז' => [ 2502 '0', 2503 '4', 2504 '4', 2505 '4', 2506 ], 2507 'זג' => [ 2508 '0', 2509 '44', 2510 '44', 2511 '44', 2512 '45', 2513 '45', 2514 '45', 2515 ], 2516 'זז' => [ 2517 '0', 2518 '4', 2519 '4', 2520 '4', 2521 '44', 2522 '44', 2523 '44', 2524 ], 2525 'זס' => [ 2526 '0', 2527 '44', 2528 '44', 2529 '44', 2530 ], 2531 'זצ' => [ 2532 '0', 2533 '44', 2534 '44', 2535 '44', 2536 ], 2537 'זץ' => [ 2538 '0', 2539 '44', 2540 '44', 2541 '44', 2542 ], 2543 'חג' => [ 2544 '0', 2545 '54', 2546 '54', 2547 '54', 2548 '53', 2549 '53', 2550 '53', 2551 ], 2552 'חח' => [ 2553 '0', 2554 '5', 2555 '5', 2556 '5', 2557 '55', 2558 '55', 2559 '55', 2560 ], 2561 'חק' => [ 2562 '0', 2563 '55', 2564 '55', 2565 '55', 2566 '5', 2567 '5', 2568 '5', 2569 ], 2570 'חכ' => [ 2571 '0', 2572 '45', 2573 '45', 2574 '45', 2575 '55', 2576 '55', 2577 '55', 2578 ], 2579 'חס' => [ 2580 '0', 2581 '5', 2582 '54', 2583 '54', 2584 ], 2585 'חש' => [ 2586 '0', 2587 '5', 2588 '54', 2589 '54', 2590 ], 2591 'ח' => [ 2592 '0', 2593 '5', 2594 '5', 2595 '5', 2596 ], 2597 'טש' => [ 2598 '0', 2599 '4', 2600 '4', 2601 '4', 2602 ], 2603 'טד' => [ 2604 '0', 2605 '33', 2606 '33', 2607 '33', 2608 ], 2609 'טי' => [ 2610 '0', 2611 '3', 2612 '3', 2613 '3', 2614 '4', 2615 '4', 2616 '4', 2617 '3', 2618 '3', 2619 '34', 2620 ], 2621 'טת' => [ 2622 '0', 2623 '33', 2624 '33', 2625 '33', 2626 ], 2627 'טט' => [ 2628 '0', 2629 '3', 2630 '3', 2631 '3', 2632 '33', 2633 '33', 2634 '33', 2635 ], 2636 'ט' => [ 2637 '0', 2638 '3', 2639 '3', 2640 '3', 2641 ], 2642 'י' => [ 2643 '1', 2644 '1', 2645 '', 2646 '', 2647 ], 2648 'יא' => [ 2649 '1', 2650 '1', 2651 '', 2652 '', 2653 '1', 2654 '1', 2655 '1', 2656 ], 2657 'כג' => [ 2658 '0', 2659 '55', 2660 '55', 2661 '55', 2662 '54', 2663 '54', 2664 '54', 2665 ], 2666 'כש' => [ 2667 '0', 2668 '5', 2669 '54', 2670 '54', 2671 ], 2672 'כס' => [ 2673 '0', 2674 '5', 2675 '54', 2676 '54', 2677 ], 2678 'ככ' => [ 2679 '0', 2680 '5', 2681 '5', 2682 '5', 2683 '55', 2684 '55', 2685 '55', 2686 ], 2687 'כך' => [ 2688 '0', 2689 '5', 2690 '5', 2691 '5', 2692 '55', 2693 '55', 2694 '55', 2695 ], 2696 'כ' => [ 2697 '0', 2698 '5', 2699 '5', 2700 '5', 2701 ], 2702 'כח' => [ 2703 '0', 2704 '55', 2705 '55', 2706 '55', 2707 '5', 2708 '5', 2709 '5', 2710 ], 2711 'ך' => [ 2712 '0', 2713 '', 2714 '5', 2715 '5', 2716 ], 2717 'ל' => [ 2718 '0', 2719 '8', 2720 '8', 2721 '8', 2722 ], 2723 'לל' => [ 2724 '0', 2725 '88', 2726 '88', 2727 '88', 2728 '8', 2729 '8', 2730 '8', 2731 ], 2732 'מנ' => [ 2733 '0', 2734 '66', 2735 '66', 2736 '66', 2737 ], 2738 'מן' => [ 2739 '0', 2740 '66', 2741 '66', 2742 '66', 2743 ], 2744 'ממ' => [ 2745 '0', 2746 '6', 2747 '6', 2748 '6', 2749 '66', 2750 '66', 2751 '66', 2752 ], 2753 'מם' => [ 2754 '0', 2755 '6', 2756 '6', 2757 '6', 2758 '66', 2759 '66', 2760 '66', 2761 ], 2762 'מ' => [ 2763 '0', 2764 '6', 2765 '6', 2766 '6', 2767 ], 2768 'ם' => [ 2769 '0', 2770 '', 2771 '6', 2772 '6', 2773 ], 2774 'נמ' => [ 2775 '0', 2776 '66', 2777 '66', 2778 '66', 2779 ], 2780 'נם' => [ 2781 '0', 2782 '66', 2783 '66', 2784 '66', 2785 ], 2786 'ננ' => [ 2787 '0', 2788 '6', 2789 '6', 2790 '6', 2791 '66', 2792 '66', 2793 '66', 2794 ], 2795 'נן' => [ 2796 '0', 2797 '6', 2798 '6', 2799 '6', 2800 '66', 2801 '66', 2802 '66', 2803 ], 2804 'נ' => [ 2805 '0', 2806 '6', 2807 '6', 2808 '6', 2809 ], 2810 'ן' => [ 2811 '0', 2812 '', 2813 '6', 2814 '6', 2815 ], 2816 'סתש' => [ 2817 '0', 2818 '2', 2819 '4', 2820 '4', 2821 ], 2822 'סתז' => [ 2823 '0', 2824 '2', 2825 '4', 2826 '4', 2827 ], 2828 'סטז' => [ 2829 '0', 2830 '2', 2831 '4', 2832 '4', 2833 ], 2834 'סטש' => [ 2835 '0', 2836 '2', 2837 '4', 2838 '4', 2839 ], 2840 'סצד' => [ 2841 '0', 2842 '2', 2843 '4', 2844 '4', 2845 ], 2846 'סט' => [ 2847 '0', 2848 '2', 2849 '4', 2850 '4', 2851 '43', 2852 '43', 2853 '43', 2854 ], 2855 'סת' => [ 2856 '0', 2857 '2', 2858 '4', 2859 '4', 2860 '43', 2861 '43', 2862 '43', 2863 ], 2864 'סג' => [ 2865 '0', 2866 '44', 2867 '44', 2868 '44', 2869 '4', 2870 '4', 2871 '4', 2872 ], 2873 'סס' => [ 2874 '0', 2875 '4', 2876 '4', 2877 '4', 2878 '44', 2879 '44', 2880 '44', 2881 ], 2882 'סצ' => [ 2883 '0', 2884 '44', 2885 '44', 2886 '44', 2887 ], 2888 'סץ' => [ 2889 '0', 2890 '44', 2891 '44', 2892 '44', 2893 ], 2894 'סז' => [ 2895 '0', 2896 '44', 2897 '44', 2898 '44', 2899 ], 2900 'סש' => [ 2901 '0', 2902 '44', 2903 '44', 2904 '44', 2905 ], 2906 'ס' => [ 2907 '0', 2908 '4', 2909 '4', 2910 '4', 2911 ], 2912 'ע' => [ 2913 '1', 2914 '0', 2915 '', 2916 '', 2917 ], 2918 'פב' => [ 2919 '0', 2920 '7', 2921 '7', 2922 '7', 2923 '77', 2924 '77', 2925 '77', 2926 ], 2927 'פוו' => [ 2928 '0', 2929 '7', 2930 '7', 2931 '7', 2932 '77', 2933 '77', 2934 '77', 2935 ], 2936 'פפ' => [ 2937 '0', 2938 '7', 2939 '7', 2940 '7', 2941 '77', 2942 '77', 2943 '77', 2944 ], 2945 'פף' => [ 2946 '0', 2947 '7', 2948 '7', 2949 '7', 2950 '77', 2951 '77', 2952 '77', 2953 ], 2954 'פ' => [ 2955 '0', 2956 '7', 2957 '7', 2958 '7', 2959 ], 2960 'ף' => [ 2961 '0', 2962 '', 2963 '7', 2964 '7', 2965 ], 2966 'צג' => [ 2967 '0', 2968 '44', 2969 '44', 2970 '44', 2971 '45', 2972 '45', 2973 '45', 2974 ], 2975 'צז' => [ 2976 '0', 2977 '44', 2978 '44', 2979 '44', 2980 ], 2981 'צס' => [ 2982 '0', 2983 '44', 2984 '44', 2985 '44', 2986 ], 2987 'צצ' => [ 2988 '0', 2989 '4', 2990 '4', 2991 '4', 2992 '5', 2993 '5', 2994 '5', 2995 '44', 2996 '44', 2997 '44', 2998 '54', 2999 '54', 3000 '54', 3001 '45', 3002 '45', 3003 '45', 3004 ], 3005 'צץ' => [ 3006 '0', 3007 '4', 3008 '4', 3009 '4', 3010 '5', 3011 '5', 3012 '5', 3013 '44', 3014 '44', 3015 '44', 3016 '54', 3017 '54', 3018 '54', 3019 ], 3020 'צש' => [ 3021 '0', 3022 '44', 3023 '44', 3024 '44', 3025 '4', 3026 '4', 3027 '4', 3028 '5', 3029 '5', 3030 '5', 3031 ], 3032 'צ' => [ 3033 '0', 3034 '4', 3035 '4', 3036 '4', 3037 '5', 3038 '5', 3039 '5', 3040 ], 3041 'ץ' => [ 3042 '0', 3043 '', 3044 '4', 3045 '4', 3046 ], 3047 'קה' => [ 3048 '0', 3049 '55', 3050 '55', 3051 '5', 3052 ], 3053 'קס' => [ 3054 '0', 3055 '5', 3056 '54', 3057 '54', 3058 ], 3059 'קש' => [ 3060 '0', 3061 '5', 3062 '54', 3063 '54', 3064 ], 3065 'קק' => [ 3066 '0', 3067 '5', 3068 '5', 3069 '5', 3070 '55', 3071 '55', 3072 '55', 3073 ], 3074 'קח' => [ 3075 '0', 3076 '55', 3077 '55', 3078 '55', 3079 ], 3080 'קכ' => [ 3081 '0', 3082 '55', 3083 '55', 3084 '55', 3085 ], 3086 'קך' => [ 3087 '0', 3088 '55', 3089 '55', 3090 '55', 3091 ], 3092 'קג' => [ 3093 '0', 3094 '55', 3095 '55', 3096 '55', 3097 '54', 3098 '54', 3099 '54', 3100 ], 3101 'ק' => [ 3102 '0', 3103 '5', 3104 '5', 3105 '5', 3106 ], 3107 'רר' => [ 3108 '0', 3109 '99', 3110 '99', 3111 '99', 3112 '9', 3113 '9', 3114 '9', 3115 ], 3116 'ר' => [ 3117 '0', 3118 '9', 3119 '9', 3120 '9', 3121 ], 3122 'שטז' => [ 3123 '0', 3124 '2', 3125 '4', 3126 '4', 3127 ], 3128 'שתש' => [ 3129 '0', 3130 '2', 3131 '4', 3132 '4', 3133 ], 3134 'שתז' => [ 3135 '0', 3136 '2', 3137 '4', 3138 '4', 3139 ], 3140 'שטש' => [ 3141 '0', 3142 '2', 3143 '4', 3144 '4', 3145 ], 3146 'שד' => [ 3147 '0', 3148 '2', 3149 '43', 3150 '43', 3151 ], 3152 'שז' => [ 3153 '0', 3154 '44', 3155 '44', 3156 '44', 3157 ], 3158 'שס' => [ 3159 '0', 3160 '44', 3161 '44', 3162 '44', 3163 ], 3164 'שת' => [ 3165 '0', 3166 '2', 3167 '43', 3168 '43', 3169 ], 3170 'שג' => [ 3171 '0', 3172 '4', 3173 '4', 3174 '4', 3175 '44', 3176 '44', 3177 '44', 3178 '4', 3179 '43', 3180 '43', 3181 ], 3182 'שט' => [ 3183 '0', 3184 '2', 3185 '43', 3186 '43', 3187 '44', 3188 '44', 3189 '44', 3190 ], 3191 'שצ' => [ 3192 '0', 3193 '44', 3194 '44', 3195 '44', 3196 '45', 3197 '45', 3198 '45', 3199 ], 3200 'שץ' => [ 3201 '0', 3202 '44', 3203 '', 3204 '44', 3205 '45', 3206 '', 3207 '45', 3208 ], 3209 'שש' => [ 3210 '0', 3211 '4', 3212 '4', 3213 '4', 3214 '44', 3215 '44', 3216 '44', 3217 ], 3218 'ש' => [ 3219 '0', 3220 '4', 3221 '4', 3222 '4', 3223 ], 3224 'תג' => [ 3225 '0', 3226 '34', 3227 '34', 3228 '34', 3229 ], 3230 'תז' => [ 3231 '0', 3232 '34', 3233 '34', 3234 '34', 3235 ], 3236 'תש' => [ 3237 '0', 3238 '4', 3239 '4', 3240 '4', 3241 ], 3242 'תת' => [ 3243 '0', 3244 '3', 3245 '3', 3246 '3', 3247 '4', 3248 '4', 3249 '4', 3250 '33', 3251 '33', 3252 '33', 3253 '44', 3254 '44', 3255 '44', 3256 '34', 3257 '34', 3258 '34', 3259 '43', 3260 '43', 3261 '43', 3262 ], 3263 'ת' => [ 3264 '0', 3265 '3', 3266 '3', 3267 '3', 3268 '4', 3269 '4', 3270 '4', 3271 ], 3272 // Arabic alphabet 3273 'ا' => [ 3274 '1', 3275 '0', 3276 '', 3277 '', 3278 ], 3279 'ب' => [ 3280 '0', 3281 '7', 3282 '7', 3283 '7', 3284 ], 3285 'ت' => [ 3286 '0', 3287 '3', 3288 '3', 3289 '3', 3290 ], 3291 'ث' => [ 3292 '0', 3293 '3', 3294 '3', 3295 '3', 3296 ], 3297 'ج' => [ 3298 '0', 3299 '4', 3300 '4', 3301 '4', 3302 ], 3303 'ح' => [ 3304 '0', 3305 '5', 3306 '5', 3307 '5', 3308 ], 3309 'خ' => [ 3310 '0', 3311 '5', 3312 '5', 3313 '5', 3314 ], 3315 'د' => [ 3316 '0', 3317 '3', 3318 '3', 3319 '3', 3320 ], 3321 'ذ' => [ 3322 '0', 3323 '3', 3324 '3', 3325 '3', 3326 ], 3327 'ر' => [ 3328 '0', 3329 '9', 3330 '9', 3331 '9', 3332 ], 3333 'ز' => [ 3334 '0', 3335 '4', 3336 '4', 3337 '4', 3338 ], 3339 'س' => [ 3340 '0', 3341 '4', 3342 '4', 3343 '4', 3344 ], 3345 'ش' => [ 3346 '0', 3347 '4', 3348 '4', 3349 '4', 3350 ], 3351 'ص' => [ 3352 '0', 3353 '4', 3354 '4', 3355 '4', 3356 ], 3357 'ض' => [ 3358 '0', 3359 '3', 3360 '3', 3361 '3', 3362 ], 3363 'ط' => [ 3364 '0', 3365 '3', 3366 '3', 3367 '3', 3368 ], 3369 'ظ' => [ 3370 '0', 3371 '4', 3372 '4', 3373 '4', 3374 ], 3375 'ع' => [ 3376 '1', 3377 '0', 3378 '', 3379 '', 3380 ], 3381 'غ' => [ 3382 '0', 3383 '0', 3384 '', 3385 '', 3386 ], 3387 'ف' => [ 3388 '0', 3389 '7', 3390 '7', 3391 '7', 3392 ], 3393 'ق' => [ 3394 '0', 3395 '5', 3396 '5', 3397 '5', 3398 ], 3399 'ك' => [ 3400 '0', 3401 '5', 3402 '5', 3403 '5', 3404 ], 3405 'ل' => [ 3406 '0', 3407 '8', 3408 '8', 3409 '8', 3410 ], 3411 'لا' => [ 3412 '0', 3413 '8', 3414 '8', 3415 '8', 3416 ], 3417 'م' => [ 3418 '0', 3419 '6', 3420 '6', 3421 '6', 3422 ], 3423 'ن' => [ 3424 '0', 3425 '6', 3426 '6', 3427 '6', 3428 ], 3429 'هن' => [ 3430 '0', 3431 '66', 3432 '66', 3433 '66', 3434 ], 3435 'ه' => [ 3436 '0', 3437 '5', 3438 '5', 3439 '', 3440 ], 3441 'و' => [ 3442 '1', 3443 '', 3444 '', 3445 '', 3446 '7', 3447 '', 3448 '', 3449 ], 3450 'ي' => [ 3451 '0', 3452 '1', 3453 '', 3454 '', 3455 ], 3456 'آ' => [ 3457 '0', 3458 '1', 3459 '', 3460 '', 3461 ], 3462 'ة' => [ 3463 '0', 3464 '', 3465 '', 3466 '3', 3467 ], 3468 'ی' => [ 3469 '0', 3470 '1', 3471 '', 3472 '', 3473 ], 3474 'ى' => [ 3475 '1', 3476 '1', 3477 '', 3478 '', 3479 ], 3480 ]; 3481 3482 /** 3483 * Which algorithms are supported. 3484 * 3485 * @return array<string> 3486 */ 3487 public static function getAlgorithms(): array 3488 { 3489 return [ 3490 /* I18N: https://en.wikipedia.org/wiki/Soundex */ 3491 'std' => I18N::translate('Russell'), 3492 /* I18N: https://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */ 3493 'dm' => I18N::translate('Daitch-Mokotoff'), 3494 ]; 3495 } 3496 3497 /** 3498 * Is there a match between two soundex codes? 3499 * 3500 * @param string $soundex1 3501 * @param string $soundex2 3502 * 3503 * @return bool 3504 */ 3505 public static function compare(string $soundex1, string $soundex2): bool 3506 { 3507 if ($soundex1 !== '' && $soundex2 !== '') { 3508 return array_intersect(explode(':', $soundex1), explode(':', $soundex2)) !== []; 3509 } 3510 3511 return false; 3512 } 3513 3514 /** 3515 * Generate Russell soundex codes for a given text. 3516 * 3517 * @param string $text 3518 * 3519 * @return string 3520 */ 3521 public static function russell(string $text): string 3522 { 3523 $words = explode(' ', $text); 3524 $soundex_array = []; 3525 3526 foreach ($words as $word) { 3527 $soundex = soundex($word); 3528 3529 // Only return codes from recognisable sounds 3530 if ($soundex !== '0000') { 3531 $soundex_array[] = $soundex; 3532 } 3533 } 3534 3535 // Combine words, e.g. “New York” as “Newyork” 3536 if (count($words) > 1) { 3537 $soundex_array[] = soundex(str_replace(' ', '', $text)); 3538 } 3539 3540 // A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters) 3541 $soundex_array = array_slice(array_unique($soundex_array), 0, 51); 3542 3543 return implode(':', $soundex_array); 3544 } 3545 3546 /** 3547 * Generate Daitch–Mokotoff soundex codes for a given text. 3548 * 3549 * @param string $text 3550 * 3551 * @return string 3552 */ 3553 public static function daitchMokotoff(string $text): string 3554 { 3555 $words = explode(' ', $text); 3556 $soundex_array = []; 3557 3558 foreach ($words as $word) { 3559 $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word)); 3560 } 3561 // Combine words, e.g. “New York” as “Newyork” 3562 if (count($words) > 1) { 3563 $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(str_replace(' ', '', $text))); 3564 } 3565 3566 // A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters) 3567 $soundex_array = array_slice(array_unique($soundex_array), 0, 36); 3568 3569 return implode(':', $soundex_array); 3570 } 3571 3572 /** 3573 * Calculate the Daitch-Mokotoff soundex for a word. 3574 * 3575 * @param string $name 3576 * 3577 * @return array<string> List of possible DM codes for the word. 3578 */ 3579 private static function daitchMokotoffWord(string $name): array 3580 { 3581 // Apply special transformation rules to the input string 3582 $name = I18N::strtoupper($name); 3583 foreach (self::TRANSFORM_NAMES as $transformRule) { 3584 $name = str_replace($transformRule[0], $transformRule[1], $name); 3585 } 3586 3587 // Initialize 3588 $name_script = I18N::textScript($name); 3589 $noVowels = $name_script === 'Hebr' || $name_script === 'Arab'; 3590 3591 $lastPos = strlen($name) - 1; 3592 $currPos = 0; 3593 $state = 1; // 1: start of input string, 2: before vowel, 3: other 3594 $result = []; // accumulate complete 6-digit D-M codes here 3595 $partialResult = []; // accumulate incomplete D-M codes here 3596 $partialResult[] = ['!']; // initialize 1st partial result ('!' stops "duplicate sound" check) 3597 3598 // Loop through the input string. 3599 // Stop when the string is exhausted or when no more partial results remain 3600 while (count($partialResult) !== 0 && $currPos <= $lastPos) { 3601 // Find the DM coding table entry for the chunk at the current position 3602 $thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 3603 while ($thisEntry !== '') { 3604 if (isset(self::DM_SOUNDS[$thisEntry])) { 3605 break; 3606 } 3607 $thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk 3608 } 3609 if ($thisEntry === '') { 3610 $currPos++; // Not in table: advance pointer to next byte 3611 continue; // and try again 3612 } 3613 3614 $soundTableEntry = self::DM_SOUNDS[$thisEntry]; 3615 $workingResult = $partialResult; 3616 $partialResult = []; 3617 $currPos += strlen($thisEntry); 3618 3619 // Not at beginning of input string 3620 if ($state !== 1) { 3621 if ($currPos <= $lastPos) { 3622 // Determine whether the next chunk is a vowel 3623 $nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 3624 while ($nextEntry !== '') { 3625 if (isset(self::DM_SOUNDS[$nextEntry])) { 3626 break; 3627 } 3628 $nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk 3629 } 3630 } else { 3631 $nextEntry = ''; 3632 } 3633 if ($nextEntry !== '' && self::DM_SOUNDS[$nextEntry][0] !== '0') { 3634 $state = 2; 3635 } else { 3636 // Next chunk is a vowel 3637 $state = 3; 3638 } 3639 } 3640 3641 while ($state < count($soundTableEntry)) { 3642 // empty means 'ignore this sound in this state' 3643 if ($soundTableEntry[$state] === '') { 3644 foreach ($workingResult as $workingEntry) { 3645 $tempEntry = $workingEntry; 3646 $tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles' 3647 $partialResult[] = $tempEntry; 3648 } 3649 } else { 3650 foreach ($workingResult as $workingEntry) { 3651 if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) { 3652 // Incoming sound isn't a duplicate of the previous sound 3653 $workingEntry[] = $soundTableEntry[$state]; 3654 } elseif ($noVowels) { 3655 // Incoming sound is a duplicate of the previous sound 3656 // For Hebrew and Arabic, we need to create a pair of D-M sound codes, 3657 // one of the pair with only a single occurrence of the duplicate sound, 3658 // the other with both occurrences 3659 $workingEntry[] = $soundTableEntry[$state]; 3660 } 3661 3662 if (count($workingEntry) < 7) { 3663 $partialResult[] = $workingEntry; 3664 } else { 3665 // This is the 6th code in the sequence 3666 // We're looking for 7 entries because the first is '!' and doesn't count 3667 $tempResult = str_replace('!', '', implode('', $workingEntry)); 3668 // Only return codes from recognisable sounds 3669 if ($tempResult) { 3670 $result[] = substr($tempResult . '000000', 0, 6); 3671 } 3672 } 3673 } 3674 } 3675 $state += 3; // Advance to next triplet while keeping the same basic state 3676 } 3677 } 3678 3679 // Zero-fill and copy all remaining partial results 3680 foreach ($partialResult as $workingEntry) { 3681 $tempResult = str_replace('!', '', implode('', $workingEntry)); 3682 // Only return codes from recognisable sounds 3683 if ($tempResult) { 3684 $result[] = substr($tempResult . '000000', 0, 6); 3685 } 3686 } 3687 3688 return $result; 3689 } 3690} 3691