1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2019 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <http://www.gnu.org/licenses/>. 16 */ 17declare(strict_types=1); 18 19namespace Fisharebest\Webtrees; 20 21/** 22 * Phonetic matching of strings. 23 */ 24class Soundex 25{ 26 // Determine the Daitch–Mokotoff Soundex code for a word 27 // Original implementation by Gerry Kroll, and analysis by Meliza Amity 28 29 // Max. table key length (in ASCII bytes -- NOT in UTF-8 characters!) 30 private const MAXCHAR = 7; 31 32 /** 33 * Name transformation arrays. 34 * Used to transform the Name string to simplify the "sounds like" table. 35 * This is especially useful in Hebrew. 36 * 37 * Each array entry defines the "from" and "to" arguments of an preg($from, $to, $text) 38 * function call to achieve the desired transformations. 39 * 40 * Note about the use of "\x01": 41 * This code, which can’t legitimately occur in the kind of text we're dealing with, 42 * is used as a place-holder so that conditional string replacements can be done. 43 */ 44 private const TRANSFORM_NAMES = [ 45 // Force Yiddish ligatures to be treated as separate letters 46 [ 47 'װ', 48 'וו', 49 ], 50 [ 51 'ײ', 52 'יי', 53 ], 54 [ 55 'ױ', 56 'וי', 57 ], 58 [ 59 'בו', 60 'בע', 61 ], 62 [ 63 'פו', 64 'פע', 65 ], 66 [ 67 'ומ', 68 'עמ', 69 ], 70 [ 71 'ום', 72 'עם', 73 ], 74 [ 75 'ונ', 76 'ענ', 77 ], 78 [ 79 'ון', 80 'ען', 81 ], 82 [ 83 'וו', 84 'ב', 85 ], 86 [ 87 "\x01", 88 '', 89 ], 90 [ 91 'ייה$', 92 "\x01ה", 93 ], 94 [ 95 'ייע$', 96 "\x01ע", 97 ], 98 [ 99 'יי', 100 'ע', 101 ], 102 [ 103 "\x01", 104 'יי', 105 ], 106 ]; 107 108 /** 109 * The DM sound coding table is organized this way: 110 * key: a variable-length string that corresponds to the UTF-8 character sequence 111 * represented by the table entry. Currently, that string can be up to 7 112 * bytes long. This maximum length is defined by the value of global variable 113 * $maxchar. 114 * 115 * value: an array as follows: 116 * [0]: zero if not a vowel 117 * [1]: sound value when this string is at the beginning of the word 118 * [2]: sound value when this string is followed by a vowel 119 * [3]: sound value for other cases 120 * [1],[2],[3] can be repeated several times to create branches in the code 121 * an empty sound value means "ignore in this state" 122 */ 123 private const DM_SOUNDS = [ 124 'A' => [ 125 '1', 126 '0', 127 '', 128 '', 129 ], 130 'À' => [ 131 '1', 132 '0', 133 '', 134 '', 135 ], 136 'Á' => [ 137 '1', 138 '0', 139 '', 140 '', 141 ], 142 'Â' => [ 143 '1', 144 '0', 145 '', 146 '', 147 ], 148 'Ã' => [ 149 '1', 150 '0', 151 '', 152 '', 153 ], 154 'Ä' => [ 155 '1', 156 '0', 157 '1', 158 '', 159 '0', 160 '', 161 '', 162 ], 163 'Å' => [ 164 '1', 165 '0', 166 '', 167 '', 168 ], 169 'Ă' => [ 170 '1', 171 '0', 172 '', 173 '', 174 ], 175 'Ą' => [ 176 '1', 177 '', 178 '', 179 '', 180 '', 181 '', 182 '6', 183 ], 184 'Ạ' => [ 185 '1', 186 '0', 187 '', 188 '', 189 ], 190 'Ả' => [ 191 '1', 192 '0', 193 '', 194 '', 195 ], 196 'Ấ' => [ 197 '1', 198 '0', 199 '', 200 '', 201 ], 202 'Ầ' => [ 203 '1', 204 '0', 205 '', 206 '', 207 ], 208 'Ẩ' => [ 209 '1', 210 '0', 211 '', 212 '', 213 ], 214 'Ẫ' => [ 215 '1', 216 '0', 217 '', 218 '', 219 ], 220 'Ậ' => [ 221 '1', 222 '0', 223 '', 224 '', 225 ], 226 'Ắ' => [ 227 '1', 228 '0', 229 '', 230 '', 231 ], 232 'Ằ' => [ 233 '1', 234 '0', 235 '', 236 '', 237 ], 238 'Ẳ' => [ 239 '1', 240 '0', 241 '', 242 '', 243 ], 244 'Ẵ' => [ 245 '1', 246 '0', 247 '', 248 '', 249 ], 250 'Ặ' => [ 251 '1', 252 '0', 253 '', 254 '', 255 ], 256 'AE' => [ 257 '1', 258 '0', 259 '1', 260 '', 261 ], 262 'Æ' => [ 263 '1', 264 '0', 265 '1', 266 '', 267 ], 268 'AI' => [ 269 '1', 270 '0', 271 '1', 272 '', 273 ], 274 'AJ' => [ 275 '1', 276 '0', 277 '1', 278 '', 279 ], 280 'AU' => [ 281 '1', 282 '0', 283 '7', 284 '', 285 ], 286 'AV' => [ 287 '1', 288 '0', 289 '7', 290 '', 291 '7', 292 '7', 293 '7', 294 ], 295 'ÄU' => [ 296 '1', 297 '0', 298 '1', 299 '', 300 ], 301 'AY' => [ 302 '1', 303 '0', 304 '1', 305 '', 306 ], 307 'B' => [ 308 '0', 309 '7', 310 '7', 311 '7', 312 ], 313 'C' => [ 314 '0', 315 '5', 316 '5', 317 '5', 318 '34', 319 '4', 320 '4', 321 ], 322 'Ć' => [ 323 '0', 324 '4', 325 '4', 326 '4', 327 ], 328 'Č' => [ 329 '0', 330 '4', 331 '4', 332 '4', 333 ], 334 'Ç' => [ 335 '0', 336 '4', 337 '4', 338 '4', 339 ], 340 'CH' => [ 341 '0', 342 '5', 343 '5', 344 '5', 345 '34', 346 '4', 347 '4', 348 ], 349 'CHS' => [ 350 '0', 351 '5', 352 '54', 353 '54', 354 ], 355 'CK' => [ 356 '0', 357 '5', 358 '5', 359 '5', 360 '45', 361 '45', 362 '45', 363 ], 364 'CCS' => [ 365 '0', 366 '4', 367 '4', 368 '4', 369 ], 370 'CS' => [ 371 '0', 372 '4', 373 '4', 374 '4', 375 ], 376 'CSZ' => [ 377 '0', 378 '4', 379 '4', 380 '4', 381 ], 382 'CZ' => [ 383 '0', 384 '4', 385 '4', 386 '4', 387 ], 388 'CZS' => [ 389 '0', 390 '4', 391 '4', 392 '4', 393 ], 394 'D' => [ 395 '0', 396 '3', 397 '3', 398 '3', 399 ], 400 'Ď' => [ 401 '0', 402 '3', 403 '3', 404 '3', 405 ], 406 'Đ' => [ 407 '0', 408 '3', 409 '3', 410 '3', 411 ], 412 'DRS' => [ 413 '0', 414 '4', 415 '4', 416 '4', 417 ], 418 'DRZ' => [ 419 '0', 420 '4', 421 '4', 422 '4', 423 ], 424 'DS' => [ 425 '0', 426 '4', 427 '4', 428 '4', 429 ], 430 'DSH' => [ 431 '0', 432 '4', 433 '4', 434 '4', 435 ], 436 'DSZ' => [ 437 '0', 438 '4', 439 '4', 440 '4', 441 ], 442 'DT' => [ 443 '0', 444 '3', 445 '3', 446 '3', 447 ], 448 'DDZ' => [ 449 '0', 450 '4', 451 '4', 452 '4', 453 ], 454 'DDZS' => [ 455 '0', 456 '4', 457 '4', 458 '4', 459 ], 460 'DZ' => [ 461 '0', 462 '4', 463 '4', 464 '4', 465 ], 466 'DŹ' => [ 467 '0', 468 '4', 469 '4', 470 '4', 471 ], 472 'DŻ' => [ 473 '0', 474 '4', 475 '4', 476 '4', 477 ], 478 'DZH' => [ 479 '0', 480 '4', 481 '4', 482 '4', 483 ], 484 'DZS' => [ 485 '0', 486 '4', 487 '4', 488 '4', 489 ], 490 'E' => [ 491 '1', 492 '0', 493 '', 494 '', 495 ], 496 'È' => [ 497 '1', 498 '0', 499 '', 500 '', 501 ], 502 'É' => [ 503 '1', 504 '0', 505 '', 506 '', 507 ], 508 'Ê' => [ 509 '1', 510 '0', 511 '', 512 '', 513 ], 514 'Ë' => [ 515 '1', 516 '0', 517 '', 518 '', 519 ], 520 'Ĕ' => [ 521 '1', 522 '0', 523 '', 524 '', 525 ], 526 'Ė' => [ 527 '1', 528 '0', 529 '', 530 '', 531 ], 532 'Ę' => [ 533 '1', 534 '', 535 '', 536 '6', 537 '', 538 '', 539 '', 540 ], 541 'Ẹ' => [ 542 '1', 543 '0', 544 '', 545 '', 546 ], 547 'Ẻ' => [ 548 '1', 549 '0', 550 '', 551 '', 552 ], 553 'Ẽ' => [ 554 '1', 555 '0', 556 '', 557 '', 558 ], 559 'Ế' => [ 560 '1', 561 '0', 562 '', 563 '', 564 ], 565 'Ề' => [ 566 '1', 567 '0', 568 '', 569 '', 570 ], 571 'Ể' => [ 572 '1', 573 '0', 574 '', 575 '', 576 ], 577 'Ễ' => [ 578 '1', 579 '0', 580 '', 581 '', 582 ], 583 'Ệ' => [ 584 '1', 585 '0', 586 '', 587 '', 588 ], 589 'EAU' => [ 590 '1', 591 '0', 592 '', 593 '', 594 ], 595 'EI' => [ 596 '1', 597 '0', 598 '1', 599 '', 600 ], 601 'EJ' => [ 602 '1', 603 '0', 604 '1', 605 '', 606 ], 607 'EU' => [ 608 '1', 609 '1', 610 '1', 611 '', 612 ], 613 'EY' => [ 614 '1', 615 '0', 616 '1', 617 '', 618 ], 619 'F' => [ 620 '0', 621 '7', 622 '7', 623 '7', 624 ], 625 'FB' => [ 626 '0', 627 '7', 628 '7', 629 '7', 630 ], 631 'G' => [ 632 '0', 633 '5', 634 '5', 635 '5', 636 '34', 637 '4', 638 '4', 639 ], 640 'Ğ' => [ 641 '0', 642 '', 643 '', 644 '', 645 ], 646 'GGY' => [ 647 '0', 648 '5', 649 '5', 650 '5', 651 ], 652 'GY' => [ 653 '0', 654 '5', 655 '5', 656 '5', 657 ], 658 'H' => [ 659 '0', 660 '5', 661 '5', 662 '', 663 '5', 664 '5', 665 '5', 666 ], 667 'I' => [ 668 '1', 669 '0', 670 '', 671 '', 672 ], 673 'Ì' => [ 674 '1', 675 '0', 676 '', 677 '', 678 ], 679 'Í' => [ 680 '1', 681 '0', 682 '', 683 '', 684 ], 685 'Î' => [ 686 '1', 687 '0', 688 '', 689 '', 690 ], 691 'Ï' => [ 692 '1', 693 '0', 694 '', 695 '', 696 ], 697 'Ĩ' => [ 698 '1', 699 '0', 700 '', 701 '', 702 ], 703 'Į' => [ 704 '1', 705 '0', 706 '', 707 '', 708 ], 709 'İ' => [ 710 '1', 711 '0', 712 '', 713 '', 714 ], 715 'Ỉ' => [ 716 '1', 717 '0', 718 '', 719 '', 720 ], 721 'Ị' => [ 722 '1', 723 '0', 724 '', 725 '', 726 ], 727 'IA' => [ 728 '1', 729 '1', 730 '', 731 '', 732 ], 733 'IE' => [ 734 '1', 735 '1', 736 '', 737 '', 738 ], 739 'IO' => [ 740 '1', 741 '1', 742 '', 743 '', 744 ], 745 'IU' => [ 746 '1', 747 '1', 748 '', 749 '', 750 ], 751 'J' => [ 752 '0', 753 '1', 754 '', 755 '', 756 '4', 757 '4', 758 '4', 759 '5', 760 '5', 761 '', 762 ], 763 'K' => [ 764 '0', 765 '5', 766 '5', 767 '5', 768 ], 769 'KH' => [ 770 '0', 771 '5', 772 '5', 773 '5', 774 ], 775 'KS' => [ 776 '0', 777 '5', 778 '54', 779 '54', 780 ], 781 'L' => [ 782 '0', 783 '8', 784 '8', 785 '8', 786 ], 787 'Ľ' => [ 788 '0', 789 '8', 790 '8', 791 '8', 792 ], 793 'Ĺ' => [ 794 '0', 795 '8', 796 '8', 797 '8', 798 ], 799 'Ł' => [ 800 '0', 801 '7', 802 '7', 803 '7', 804 '8', 805 '8', 806 '8', 807 ], 808 'LL' => [ 809 '0', 810 '8', 811 '8', 812 '8', 813 '58', 814 '8', 815 '8', 816 '1', 817 '8', 818 '8', 819 ], 820 'LLY' => [ 821 '0', 822 '8', 823 '8', 824 '8', 825 '1', 826 '8', 827 '8', 828 ], 829 'LY' => [ 830 '0', 831 '8', 832 '8', 833 '8', 834 '1', 835 '8', 836 '8', 837 ], 838 'M' => [ 839 '0', 840 '6', 841 '6', 842 '6', 843 ], 844 'MĔ' => [ 845 '0', 846 '66', 847 '66', 848 '66', 849 ], 850 'MN' => [ 851 '0', 852 '66', 853 '66', 854 '66', 855 ], 856 'N' => [ 857 '0', 858 '6', 859 '6', 860 '6', 861 ], 862 'Ń' => [ 863 '0', 864 '6', 865 '6', 866 '6', 867 ], 868 'Ň' => [ 869 '0', 870 '6', 871 '6', 872 '6', 873 ], 874 'Ñ' => [ 875 '0', 876 '6', 877 '6', 878 '6', 879 ], 880 'NM' => [ 881 '0', 882 '66', 883 '66', 884 '66', 885 ], 886 'O' => [ 887 '1', 888 '0', 889 '', 890 '', 891 ], 892 'Ò' => [ 893 '1', 894 '0', 895 '', 896 '', 897 ], 898 'Ó' => [ 899 '1', 900 '0', 901 '', 902 '', 903 ], 904 'Ô' => [ 905 '1', 906 '0', 907 '', 908 '', 909 ], 910 'Õ' => [ 911 '1', 912 '0', 913 '', 914 '', 915 ], 916 'Ö' => [ 917 '1', 918 '0', 919 '', 920 '', 921 ], 922 'Ø' => [ 923 '1', 924 '0', 925 '', 926 '', 927 ], 928 'Ő' => [ 929 '1', 930 '0', 931 '', 932 '', 933 ], 934 'Œ' => [ 935 '1', 936 '0', 937 '', 938 '', 939 ], 940 'Ơ' => [ 941 '1', 942 '0', 943 '', 944 '', 945 ], 946 'Ọ' => [ 947 '1', 948 '0', 949 '', 950 '', 951 ], 952 'Ỏ' => [ 953 '1', 954 '0', 955 '', 956 '', 957 ], 958 'Ố' => [ 959 '1', 960 '0', 961 '', 962 '', 963 ], 964 'Ồ' => [ 965 '1', 966 '0', 967 '', 968 '', 969 ], 970 'Ổ' => [ 971 '1', 972 '0', 973 '', 974 '', 975 ], 976 'Ỗ' => [ 977 '1', 978 '0', 979 '', 980 '', 981 ], 982 'Ộ' => [ 983 '1', 984 '0', 985 '', 986 '', 987 ], 988 'Ớ' => [ 989 '1', 990 '0', 991 '', 992 '', 993 ], 994 'Ờ' => [ 995 '1', 996 '0', 997 '', 998 '', 999 ], 1000 'Ở' => [ 1001 '1', 1002 '0', 1003 '', 1004 '', 1005 ], 1006 'Ỡ' => [ 1007 '1', 1008 '0', 1009 '', 1010 '', 1011 ], 1012 'Ợ' => [ 1013 '1', 1014 '0', 1015 '', 1016 '', 1017 ], 1018 'OE' => [ 1019 '1', 1020 '0', 1021 '', 1022 '', 1023 ], 1024 'OI' => [ 1025 '1', 1026 '0', 1027 '1', 1028 '', 1029 ], 1030 'OJ' => [ 1031 '1', 1032 '0', 1033 '1', 1034 '', 1035 ], 1036 'OU' => [ 1037 '1', 1038 '0', 1039 '', 1040 '', 1041 ], 1042 'OY' => [ 1043 '1', 1044 '0', 1045 '1', 1046 '', 1047 ], 1048 'P' => [ 1049 '0', 1050 '7', 1051 '7', 1052 '7', 1053 ], 1054 'PF' => [ 1055 '0', 1056 '7', 1057 '7', 1058 '7', 1059 ], 1060 'PH' => [ 1061 '0', 1062 '7', 1063 '7', 1064 '7', 1065 ], 1066 'Q' => [ 1067 '0', 1068 '5', 1069 '5', 1070 '5', 1071 ], 1072 'R' => [ 1073 '0', 1074 '9', 1075 '9', 1076 '9', 1077 ], 1078 'Ř' => [ 1079 '0', 1080 '4', 1081 '4', 1082 '4', 1083 ], 1084 'RS' => [ 1085 '0', 1086 '4', 1087 '4', 1088 '4', 1089 '94', 1090 '94', 1091 '94', 1092 ], 1093 'RZ' => [ 1094 '0', 1095 '4', 1096 '4', 1097 '4', 1098 '94', 1099 '94', 1100 '94', 1101 ], 1102 'S' => [ 1103 '0', 1104 '4', 1105 '4', 1106 '4', 1107 ], 1108 'Ś' => [ 1109 '0', 1110 '4', 1111 '4', 1112 '4', 1113 ], 1114 'Š' => [ 1115 '0', 1116 '4', 1117 '4', 1118 '4', 1119 ], 1120 'Ş' => [ 1121 '0', 1122 '4', 1123 '4', 1124 '4', 1125 ], 1126 'SC' => [ 1127 '0', 1128 '2', 1129 '4', 1130 '4', 1131 ], 1132 'ŠČ' => [ 1133 '0', 1134 '2', 1135 '4', 1136 '4', 1137 ], 1138 'SCH' => [ 1139 '0', 1140 '4', 1141 '4', 1142 '4', 1143 ], 1144 'SCHD' => [ 1145 '0', 1146 '2', 1147 '43', 1148 '43', 1149 ], 1150 'SCHT' => [ 1151 '0', 1152 '2', 1153 '43', 1154 '43', 1155 ], 1156 'SCHTCH' => [ 1157 '0', 1158 '2', 1159 '4', 1160 '4', 1161 ], 1162 'SCHTSCH' => [ 1163 '0', 1164 '2', 1165 '4', 1166 '4', 1167 ], 1168 'SCHTSH' => [ 1169 '0', 1170 '2', 1171 '4', 1172 '4', 1173 ], 1174 'SD' => [ 1175 '0', 1176 '2', 1177 '43', 1178 '43', 1179 ], 1180 'SH' => [ 1181 '0', 1182 '4', 1183 '4', 1184 '4', 1185 ], 1186 'SHCH' => [ 1187 '0', 1188 '2', 1189 '4', 1190 '4', 1191 ], 1192 'SHD' => [ 1193 '0', 1194 '2', 1195 '43', 1196 '43', 1197 ], 1198 'SHT' => [ 1199 '0', 1200 '2', 1201 '43', 1202 '43', 1203 ], 1204 'SHTCH' => [ 1205 '0', 1206 '2', 1207 '4', 1208 '4', 1209 ], 1210 'SHTSH' => [ 1211 '0', 1212 '2', 1213 '4', 1214 '4', 1215 ], 1216 'ß' => [ 1217 '0', 1218 '', 1219 '4', 1220 '4', 1221 ], 1222 'ST' => [ 1223 '0', 1224 '2', 1225 '43', 1226 '43', 1227 ], 1228 'STCH' => [ 1229 '0', 1230 '2', 1231 '4', 1232 '4', 1233 ], 1234 'STRS' => [ 1235 '0', 1236 '2', 1237 '4', 1238 '4', 1239 ], 1240 'STRZ' => [ 1241 '0', 1242 '2', 1243 '4', 1244 '4', 1245 ], 1246 'STSCH' => [ 1247 '0', 1248 '2', 1249 '4', 1250 '4', 1251 ], 1252 'STSH' => [ 1253 '0', 1254 '2', 1255 '4', 1256 '4', 1257 ], 1258 'SSZ' => [ 1259 '0', 1260 '4', 1261 '4', 1262 '4', 1263 ], 1264 'SZ' => [ 1265 '0', 1266 '4', 1267 '4', 1268 '4', 1269 ], 1270 'SZCS' => [ 1271 '0', 1272 '2', 1273 '4', 1274 '4', 1275 ], 1276 'SZCZ' => [ 1277 '0', 1278 '2', 1279 '4', 1280 '4', 1281 ], 1282 'SZD' => [ 1283 '0', 1284 '2', 1285 '43', 1286 '43', 1287 ], 1288 'SZT' => [ 1289 '0', 1290 '2', 1291 '43', 1292 '43', 1293 ], 1294 'T' => [ 1295 '0', 1296 '3', 1297 '3', 1298 '3', 1299 ], 1300 'Ť' => [ 1301 '0', 1302 '3', 1303 '3', 1304 '3', 1305 ], 1306 'Ţ' => [ 1307 '0', 1308 '3', 1309 '3', 1310 '3', 1311 '4', 1312 '4', 1313 '4', 1314 ], 1315 'TC' => [ 1316 '0', 1317 '4', 1318 '4', 1319 '4', 1320 ], 1321 'TCH' => [ 1322 '0', 1323 '4', 1324 '4', 1325 '4', 1326 ], 1327 'TH' => [ 1328 '0', 1329 '3', 1330 '3', 1331 '3', 1332 ], 1333 'TRS' => [ 1334 '0', 1335 '4', 1336 '4', 1337 '4', 1338 ], 1339 'TRZ' => [ 1340 '0', 1341 '4', 1342 '4', 1343 '4', 1344 ], 1345 'TS' => [ 1346 '0', 1347 '4', 1348 '4', 1349 '4', 1350 ], 1351 'TSCH' => [ 1352 '0', 1353 '4', 1354 '4', 1355 '4', 1356 ], 1357 'TSH' => [ 1358 '0', 1359 '4', 1360 '4', 1361 '4', 1362 ], 1363 'TSZ' => [ 1364 '0', 1365 '4', 1366 '4', 1367 '4', 1368 ], 1369 'TTCH' => [ 1370 '0', 1371 '4', 1372 '4', 1373 '4', 1374 ], 1375 'TTS' => [ 1376 '0', 1377 '4', 1378 '4', 1379 '4', 1380 ], 1381 'TTSCH' => [ 1382 '0', 1383 '4', 1384 '4', 1385 '4', 1386 ], 1387 'TTSZ' => [ 1388 '0', 1389 '4', 1390 '4', 1391 '4', 1392 ], 1393 'TTZ' => [ 1394 '0', 1395 '4', 1396 '4', 1397 '4', 1398 ], 1399 'TZ' => [ 1400 '0', 1401 '4', 1402 '4', 1403 '4', 1404 ], 1405 'TZS' => [ 1406 '0', 1407 '4', 1408 '4', 1409 '4', 1410 ], 1411 'U' => [ 1412 '1', 1413 '0', 1414 '', 1415 '', 1416 ], 1417 'Ù' => [ 1418 '1', 1419 '0', 1420 '', 1421 '', 1422 ], 1423 'Ú' => [ 1424 '1', 1425 '0', 1426 '', 1427 '', 1428 ], 1429 'Û' => [ 1430 '1', 1431 '0', 1432 '', 1433 '', 1434 ], 1435 'Ü' => [ 1436 '1', 1437 '0', 1438 '', 1439 '', 1440 ], 1441 'Ũ' => [ 1442 '1', 1443 '0', 1444 '', 1445 '', 1446 ], 1447 'Ū' => [ 1448 '1', 1449 '0', 1450 '', 1451 '', 1452 ], 1453 'Ů' => [ 1454 '1', 1455 '0', 1456 '', 1457 '', 1458 ], 1459 'Ű' => [ 1460 '1', 1461 '0', 1462 '', 1463 '', 1464 ], 1465 'Ų' => [ 1466 '1', 1467 '0', 1468 '', 1469 '', 1470 ], 1471 'Ư' => [ 1472 '1', 1473 '0', 1474 '', 1475 '', 1476 ], 1477 'Ụ' => [ 1478 '1', 1479 '0', 1480 '', 1481 '', 1482 ], 1483 'Ủ' => [ 1484 '1', 1485 '0', 1486 '', 1487 '', 1488 ], 1489 'Ứ' => [ 1490 '1', 1491 '0', 1492 '', 1493 '', 1494 ], 1495 'Ừ' => [ 1496 '1', 1497 '0', 1498 '', 1499 '', 1500 ], 1501 'Ử' => [ 1502 '1', 1503 '0', 1504 '', 1505 '', 1506 ], 1507 'Ữ' => [ 1508 '1', 1509 '0', 1510 '', 1511 '', 1512 ], 1513 'Ự' => [ 1514 '1', 1515 '0', 1516 '', 1517 '', 1518 ], 1519 'UE' => [ 1520 '1', 1521 '0', 1522 '', 1523 '', 1524 ], 1525 'UI' => [ 1526 '1', 1527 '0', 1528 '1', 1529 '', 1530 ], 1531 'UJ' => [ 1532 '1', 1533 '0', 1534 '1', 1535 '', 1536 ], 1537 'UY' => [ 1538 '1', 1539 '0', 1540 '1', 1541 '', 1542 ], 1543 'UW' => [ 1544 '1', 1545 '0', 1546 '1', 1547 '', 1548 '0', 1549 '7', 1550 '7', 1551 ], 1552 'V' => [ 1553 '0', 1554 '7', 1555 '7', 1556 '7', 1557 ], 1558 'W' => [ 1559 '0', 1560 '7', 1561 '7', 1562 '7', 1563 ], 1564 'X' => [ 1565 '0', 1566 '5', 1567 '54', 1568 '54', 1569 ], 1570 'Y' => [ 1571 '1', 1572 '1', 1573 '', 1574 '', 1575 ], 1576 'Ý' => [ 1577 '1', 1578 '1', 1579 '', 1580 '', 1581 ], 1582 'Ỳ' => [ 1583 '1', 1584 '1', 1585 '', 1586 '', 1587 ], 1588 'Ỵ' => [ 1589 '1', 1590 '1', 1591 '', 1592 '', 1593 ], 1594 'Ỷ' => [ 1595 '1', 1596 '1', 1597 '', 1598 '', 1599 ], 1600 'Ỹ' => [ 1601 '1', 1602 '1', 1603 '', 1604 '', 1605 ], 1606 'Z' => [ 1607 '0', 1608 '4', 1609 '4', 1610 '4', 1611 ], 1612 'Ź' => [ 1613 '0', 1614 '4', 1615 '4', 1616 '4', 1617 ], 1618 'Ż' => [ 1619 '0', 1620 '4', 1621 '4', 1622 '4', 1623 ], 1624 'Ž' => [ 1625 '0', 1626 '4', 1627 '4', 1628 '4', 1629 ], 1630 'ZD' => [ 1631 '0', 1632 '2', 1633 '43', 1634 '43', 1635 ], 1636 'ZDZ' => [ 1637 '0', 1638 '2', 1639 '4', 1640 '4', 1641 ], 1642 'ZDZH' => [ 1643 '0', 1644 '2', 1645 '4', 1646 '4', 1647 ], 1648 'ZH' => [ 1649 '0', 1650 '4', 1651 '4', 1652 '4', 1653 ], 1654 'ZHD' => [ 1655 '0', 1656 '2', 1657 '43', 1658 '43', 1659 ], 1660 'ZHDZH' => [ 1661 '0', 1662 '2', 1663 '4', 1664 '4', 1665 ], 1666 'ZS' => [ 1667 '0', 1668 '4', 1669 '4', 1670 '4', 1671 ], 1672 'ZSCH' => [ 1673 '0', 1674 '4', 1675 '4', 1676 '4', 1677 ], 1678 'ZSH' => [ 1679 '0', 1680 '4', 1681 '4', 1682 '4', 1683 ], 1684 'ZZS' => [ 1685 '0', 1686 '4', 1687 '4', 1688 '4', 1689 ], 1690 // Cyrillic alphabet 1691 'А' => [ 1692 '1', 1693 '0', 1694 '', 1695 '', 1696 ], 1697 'Б' => [ 1698 '0', 1699 '7', 1700 '7', 1701 '7', 1702 ], 1703 'В' => [ 1704 '0', 1705 '7', 1706 '7', 1707 '7', 1708 ], 1709 'Г' => [ 1710 '0', 1711 '5', 1712 '5', 1713 '5', 1714 ], 1715 'Д' => [ 1716 '0', 1717 '3', 1718 '3', 1719 '3', 1720 ], 1721 'ДЗ' => [ 1722 '0', 1723 '4', 1724 '4', 1725 '4', 1726 ], 1727 'Е' => [ 1728 '1', 1729 '0', 1730 '', 1731 '', 1732 ], 1733 'Ё' => [ 1734 '1', 1735 '0', 1736 '', 1737 '', 1738 ], 1739 'Ж' => [ 1740 '0', 1741 '4', 1742 '4', 1743 '4', 1744 ], 1745 'З' => [ 1746 '0', 1747 '4', 1748 '4', 1749 '4', 1750 ], 1751 'И' => [ 1752 '1', 1753 '0', 1754 '', 1755 '', 1756 ], 1757 'Й' => [ 1758 '1', 1759 '1', 1760 '', 1761 '', 1762 '4', 1763 '4', 1764 '4', 1765 ], 1766 'К' => [ 1767 '0', 1768 '5', 1769 '5', 1770 '5', 1771 ], 1772 'Л' => [ 1773 '0', 1774 '8', 1775 '8', 1776 '8', 1777 ], 1778 'М' => [ 1779 '0', 1780 '6', 1781 '6', 1782 '6', 1783 ], 1784 'Н' => [ 1785 '0', 1786 '6', 1787 '6', 1788 '6', 1789 ], 1790 'О' => [ 1791 '1', 1792 '0', 1793 '', 1794 '', 1795 ], 1796 'П' => [ 1797 '0', 1798 '7', 1799 '7', 1800 '7', 1801 ], 1802 'Р' => [ 1803 '0', 1804 '9', 1805 '9', 1806 '9', 1807 ], 1808 'РЖ' => [ 1809 '0', 1810 '4', 1811 '4', 1812 '4', 1813 ], 1814 'С' => [ 1815 '0', 1816 '4', 1817 '4', 1818 '4', 1819 ], 1820 'Т' => [ 1821 '0', 1822 '3', 1823 '3', 1824 '3', 1825 ], 1826 'У' => [ 1827 '1', 1828 '0', 1829 '', 1830 '', 1831 ], 1832 'Ф' => [ 1833 '0', 1834 '7', 1835 '7', 1836 '7', 1837 ], 1838 'Х' => [ 1839 '0', 1840 '5', 1841 '5', 1842 '5', 1843 ], 1844 'Ц' => [ 1845 '0', 1846 '4', 1847 '4', 1848 '4', 1849 ], 1850 'Ч' => [ 1851 '0', 1852 '4', 1853 '4', 1854 '4', 1855 ], 1856 'Ш' => [ 1857 '0', 1858 '4', 1859 '4', 1860 '4', 1861 ], 1862 'Щ' => [ 1863 '0', 1864 '2', 1865 '4', 1866 '4', 1867 ], 1868 'Ъ' => [ 1869 '0', 1870 '', 1871 '', 1872 '', 1873 ], 1874 'Ы' => [ 1875 '0', 1876 '1', 1877 '', 1878 '', 1879 ], 1880 'Ь' => [ 1881 '0', 1882 '', 1883 '', 1884 '', 1885 ], 1886 'Э' => [ 1887 '1', 1888 '0', 1889 '', 1890 '', 1891 ], 1892 'Ю' => [ 1893 '0', 1894 '1', 1895 '', 1896 '', 1897 ], 1898 'Я' => [ 1899 '0', 1900 '1', 1901 '', 1902 '', 1903 ], 1904 // Greek alphabet 1905 'Α' => [ 1906 '1', 1907 '0', 1908 '', 1909 '', 1910 ], 1911 'Ά' => [ 1912 '1', 1913 '0', 1914 '', 1915 '', 1916 ], 1917 'ΑΙ' => [ 1918 '1', 1919 '0', 1920 '1', 1921 '', 1922 ], 1923 'ΑΥ' => [ 1924 '1', 1925 '0', 1926 '1', 1927 '', 1928 ], 1929 'Β' => [ 1930 '0', 1931 '7', 1932 '7', 1933 '7', 1934 ], 1935 'Γ' => [ 1936 '0', 1937 '5', 1938 '5', 1939 '5', 1940 ], 1941 'Δ' => [ 1942 '0', 1943 '3', 1944 '3', 1945 '3', 1946 ], 1947 'Ε' => [ 1948 '1', 1949 '0', 1950 '', 1951 '', 1952 ], 1953 'Έ' => [ 1954 '1', 1955 '0', 1956 '', 1957 '', 1958 ], 1959 'ΕΙ' => [ 1960 '1', 1961 '0', 1962 '1', 1963 '', 1964 ], 1965 'ΕΥ' => [ 1966 '1', 1967 '1', 1968 '1', 1969 '', 1970 ], 1971 'Ζ' => [ 1972 '0', 1973 '4', 1974 '4', 1975 '4', 1976 ], 1977 'Η' => [ 1978 '1', 1979 '0', 1980 '', 1981 '', 1982 ], 1983 'Ή' => [ 1984 '1', 1985 '0', 1986 '', 1987 '', 1988 ], 1989 'Θ' => [ 1990 '0', 1991 '3', 1992 '3', 1993 '3', 1994 ], 1995 'Ι' => [ 1996 '1', 1997 '0', 1998 '', 1999 '', 2000 ], 2001 'Ί' => [ 2002 '1', 2003 '0', 2004 '', 2005 '', 2006 ], 2007 'Ϊ' => [ 2008 '1', 2009 '0', 2010 '', 2011 '', 2012 ], 2013 'ΐ' => [ 2014 '1', 2015 '0', 2016 '', 2017 '', 2018 ], 2019 'Κ' => [ 2020 '0', 2021 '5', 2022 '5', 2023 '5', 2024 ], 2025 'Λ' => [ 2026 '0', 2027 '8', 2028 '8', 2029 '8', 2030 ], 2031 'Μ' => [ 2032 '0', 2033 '6', 2034 '6', 2035 '6', 2036 ], 2037 'ΜΠ' => [ 2038 '0', 2039 '7', 2040 '7', 2041 '7', 2042 ], 2043 'Ν' => [ 2044 '0', 2045 '6', 2046 '6', 2047 '6', 2048 ], 2049 'ΝΤ' => [ 2050 '0', 2051 '3', 2052 '3', 2053 '3', 2054 ], 2055 'Ξ' => [ 2056 '0', 2057 '5', 2058 '54', 2059 '54', 2060 ], 2061 'Ο' => [ 2062 '1', 2063 '0', 2064 '', 2065 '', 2066 ], 2067 'Ό' => [ 2068 '1', 2069 '0', 2070 '', 2071 '', 2072 ], 2073 'ΟΙ' => [ 2074 '1', 2075 '0', 2076 '1', 2077 '', 2078 ], 2079 'ΟΥ' => [ 2080 '1', 2081 '0', 2082 '1', 2083 '', 2084 ], 2085 'Π' => [ 2086 '0', 2087 '7', 2088 '7', 2089 '7', 2090 ], 2091 'Ρ' => [ 2092 '0', 2093 '9', 2094 '9', 2095 '9', 2096 ], 2097 'Σ' => [ 2098 '0', 2099 '4', 2100 '4', 2101 '4', 2102 ], 2103 'ς' => [ 2104 '0', 2105 '', 2106 '', 2107 '4', 2108 ], 2109 'Τ' => [ 2110 '0', 2111 '3', 2112 '3', 2113 '3', 2114 ], 2115 'ΤΖ' => [ 2116 '0', 2117 '4', 2118 '4', 2119 '4', 2120 ], 2121 'ΤΣ' => [ 2122 '0', 2123 '4', 2124 '4', 2125 '4', 2126 ], 2127 'Υ' => [ 2128 '1', 2129 '1', 2130 '', 2131 '', 2132 ], 2133 'Ύ' => [ 2134 '1', 2135 '1', 2136 '', 2137 '', 2138 ], 2139 'Ϋ' => [ 2140 '1', 2141 '1', 2142 '', 2143 '', 2144 ], 2145 'ΰ' => [ 2146 '1', 2147 '1', 2148 '', 2149 '', 2150 ], 2151 'ΥΚ' => [ 2152 '1', 2153 '5', 2154 '5', 2155 '5', 2156 ], 2157 'ΥΥ' => [ 2158 '1', 2159 '65', 2160 '65', 2161 '65', 2162 ], 2163 'Φ' => [ 2164 '0', 2165 '7', 2166 '7', 2167 '7', 2168 ], 2169 'Χ' => [ 2170 '0', 2171 '5', 2172 '5', 2173 '5', 2174 ], 2175 'Ψ' => [ 2176 '0', 2177 '7', 2178 '7', 2179 '7', 2180 ], 2181 'Ω' => [ 2182 '1', 2183 '0', 2184 '', 2185 '', 2186 ], 2187 'Ώ' => [ 2188 '1', 2189 '0', 2190 '', 2191 '', 2192 ], 2193 // Hebrew alphabet 2194 'א' => [ 2195 '1', 2196 '0', 2197 '', 2198 '', 2199 ], 2200 'או' => [ 2201 '1', 2202 '0', 2203 '7', 2204 '', 2205 ], 2206 'אג' => [ 2207 '1', 2208 '4', 2209 '4', 2210 '4', 2211 '5', 2212 '5', 2213 '5', 2214 '34', 2215 '34', 2216 '34', 2217 ], 2218 'בב' => [ 2219 '0', 2220 '7', 2221 '7', 2222 '7', 2223 '77', 2224 '77', 2225 '77', 2226 ], 2227 'ב' => [ 2228 '0', 2229 '7', 2230 '7', 2231 '7', 2232 ], 2233 'גג' => [ 2234 '0', 2235 '4', 2236 '4', 2237 '4', 2238 '5', 2239 '5', 2240 '5', 2241 '45', 2242 '45', 2243 '45', 2244 '55', 2245 '55', 2246 '55', 2247 '54', 2248 '54', 2249 '54', 2250 ], 2251 'גד' => [ 2252 '0', 2253 '43', 2254 '43', 2255 '43', 2256 '53', 2257 '53', 2258 '53', 2259 ], 2260 'גה' => [ 2261 '0', 2262 '45', 2263 '45', 2264 '45', 2265 '55', 2266 '55', 2267 '55', 2268 ], 2269 'גז' => [ 2270 '0', 2271 '44', 2272 '44', 2273 '44', 2274 '45', 2275 '45', 2276 '45', 2277 ], 2278 'גח' => [ 2279 '0', 2280 '45', 2281 '45', 2282 '45', 2283 '55', 2284 '55', 2285 '55', 2286 ], 2287 'גכ' => [ 2288 '0', 2289 '45', 2290 '45', 2291 '45', 2292 '55', 2293 '55', 2294 '55', 2295 ], 2296 'גך' => [ 2297 '0', 2298 '45', 2299 '45', 2300 '45', 2301 '55', 2302 '55', 2303 '55', 2304 ], 2305 'גצ' => [ 2306 '0', 2307 '44', 2308 '44', 2309 '44', 2310 '45', 2311 '45', 2312 '45', 2313 ], 2314 'גץ' => [ 2315 '0', 2316 '44', 2317 '44', 2318 '44', 2319 '45', 2320 '45', 2321 '45', 2322 ], 2323 'גק' => [ 2324 '0', 2325 '45', 2326 '45', 2327 '45', 2328 '54', 2329 '54', 2330 '54', 2331 ], 2332 'גש' => [ 2333 '0', 2334 '44', 2335 '44', 2336 '44', 2337 '54', 2338 '54', 2339 '54', 2340 ], 2341 'גת' => [ 2342 '0', 2343 '43', 2344 '43', 2345 '43', 2346 '53', 2347 '53', 2348 '53', 2349 ], 2350 'ג' => [ 2351 '0', 2352 '4', 2353 '4', 2354 '4', 2355 '5', 2356 '5', 2357 '5', 2358 ], 2359 'דז' => [ 2360 '0', 2361 '4', 2362 '4', 2363 '4', 2364 ], 2365 'דד' => [ 2366 '0', 2367 '3', 2368 '3', 2369 '3', 2370 '33', 2371 '33', 2372 '33', 2373 ], 2374 'דט' => [ 2375 '0', 2376 '33', 2377 '33', 2378 '33', 2379 ], 2380 'דש' => [ 2381 '0', 2382 '4', 2383 '4', 2384 '4', 2385 ], 2386 'דצ' => [ 2387 '0', 2388 '4', 2389 '4', 2390 '4', 2391 ], 2392 'דץ' => [ 2393 '0', 2394 '4', 2395 '4', 2396 '4', 2397 ], 2398 'ד' => [ 2399 '0', 2400 '3', 2401 '3', 2402 '3', 2403 ], 2404 'הג' => [ 2405 '0', 2406 '54', 2407 '54', 2408 '54', 2409 '55', 2410 '55', 2411 '55', 2412 ], 2413 'הכ' => [ 2414 '0', 2415 '55', 2416 '55', 2417 '55', 2418 ], 2419 'הח' => [ 2420 '0', 2421 '55', 2422 '55', 2423 '55', 2424 ], 2425 'הק' => [ 2426 '0', 2427 '55', 2428 '55', 2429 '55', 2430 '5', 2431 '5', 2432 '5', 2433 ], 2434 'הה' => [ 2435 '0', 2436 '5', 2437 '5', 2438 '', 2439 '55', 2440 '55', 2441 '', 2442 ], 2443 'ה' => [ 2444 '0', 2445 '5', 2446 '5', 2447 '', 2448 ], 2449 'וי' => [ 2450 '1', 2451 '', 2452 '', 2453 '', 2454 '7', 2455 '7', 2456 '7', 2457 ], 2458 'ו' => [ 2459 '1', 2460 '7', 2461 '7', 2462 '7', 2463 '7', 2464 '', 2465 '', 2466 ], 2467 'וו' => [ 2468 '1', 2469 '7', 2470 '7', 2471 '7', 2472 '7', 2473 '', 2474 '', 2475 ], 2476 'וופ' => [ 2477 '1', 2478 '7', 2479 '7', 2480 '7', 2481 '77', 2482 '77', 2483 '77', 2484 ], 2485 'זש' => [ 2486 '0', 2487 '4', 2488 '4', 2489 '4', 2490 '44', 2491 '44', 2492 '44', 2493 ], 2494 'זדז' => [ 2495 '0', 2496 '2', 2497 '4', 2498 '4', 2499 ], 2500 'ז' => [ 2501 '0', 2502 '4', 2503 '4', 2504 '4', 2505 ], 2506 'זג' => [ 2507 '0', 2508 '44', 2509 '44', 2510 '44', 2511 '45', 2512 '45', 2513 '45', 2514 ], 2515 'זז' => [ 2516 '0', 2517 '4', 2518 '4', 2519 '4', 2520 '44', 2521 '44', 2522 '44', 2523 ], 2524 'זס' => [ 2525 '0', 2526 '44', 2527 '44', 2528 '44', 2529 ], 2530 'זצ' => [ 2531 '0', 2532 '44', 2533 '44', 2534 '44', 2535 ], 2536 'זץ' => [ 2537 '0', 2538 '44', 2539 '44', 2540 '44', 2541 ], 2542 'חג' => [ 2543 '0', 2544 '54', 2545 '54', 2546 '54', 2547 '53', 2548 '53', 2549 '53', 2550 ], 2551 'חח' => [ 2552 '0', 2553 '5', 2554 '5', 2555 '5', 2556 '55', 2557 '55', 2558 '55', 2559 ], 2560 'חק' => [ 2561 '0', 2562 '55', 2563 '55', 2564 '55', 2565 '5', 2566 '5', 2567 '5', 2568 ], 2569 'חכ' => [ 2570 '0', 2571 '45', 2572 '45', 2573 '45', 2574 '55', 2575 '55', 2576 '55', 2577 ], 2578 'חס' => [ 2579 '0', 2580 '5', 2581 '54', 2582 '54', 2583 ], 2584 'חש' => [ 2585 '0', 2586 '5', 2587 '54', 2588 '54', 2589 ], 2590 'ח' => [ 2591 '0', 2592 '5', 2593 '5', 2594 '5', 2595 ], 2596 'טש' => [ 2597 '0', 2598 '4', 2599 '4', 2600 '4', 2601 ], 2602 'טד' => [ 2603 '0', 2604 '33', 2605 '33', 2606 '33', 2607 ], 2608 'טי' => [ 2609 '0', 2610 '3', 2611 '3', 2612 '3', 2613 '4', 2614 '4', 2615 '4', 2616 '3', 2617 '3', 2618 '34', 2619 ], 2620 'טת' => [ 2621 '0', 2622 '33', 2623 '33', 2624 '33', 2625 ], 2626 'טט' => [ 2627 '0', 2628 '3', 2629 '3', 2630 '3', 2631 '33', 2632 '33', 2633 '33', 2634 ], 2635 'ט' => [ 2636 '0', 2637 '3', 2638 '3', 2639 '3', 2640 ], 2641 'י' => [ 2642 '1', 2643 '1', 2644 '', 2645 '', 2646 ], 2647 'יא' => [ 2648 '1', 2649 '1', 2650 '', 2651 '', 2652 '1', 2653 '1', 2654 '1', 2655 ], 2656 'כג' => [ 2657 '0', 2658 '55', 2659 '55', 2660 '55', 2661 '54', 2662 '54', 2663 '54', 2664 ], 2665 'כש' => [ 2666 '0', 2667 '5', 2668 '54', 2669 '54', 2670 ], 2671 'כס' => [ 2672 '0', 2673 '5', 2674 '54', 2675 '54', 2676 ], 2677 'ככ' => [ 2678 '0', 2679 '5', 2680 '5', 2681 '5', 2682 '55', 2683 '55', 2684 '55', 2685 ], 2686 'כך' => [ 2687 '0', 2688 '5', 2689 '5', 2690 '5', 2691 '55', 2692 '55', 2693 '55', 2694 ], 2695 'כ' => [ 2696 '0', 2697 '5', 2698 '5', 2699 '5', 2700 ], 2701 'כח' => [ 2702 '0', 2703 '55', 2704 '55', 2705 '55', 2706 '5', 2707 '5', 2708 '5', 2709 ], 2710 'ך' => [ 2711 '0', 2712 '', 2713 '5', 2714 '5', 2715 ], 2716 'ל' => [ 2717 '0', 2718 '8', 2719 '8', 2720 '8', 2721 ], 2722 'לל' => [ 2723 '0', 2724 '88', 2725 '88', 2726 '88', 2727 '8', 2728 '8', 2729 '8', 2730 ], 2731 'מנ' => [ 2732 '0', 2733 '66', 2734 '66', 2735 '66', 2736 ], 2737 'מן' => [ 2738 '0', 2739 '66', 2740 '66', 2741 '66', 2742 ], 2743 'ממ' => [ 2744 '0', 2745 '6', 2746 '6', 2747 '6', 2748 '66', 2749 '66', 2750 '66', 2751 ], 2752 'מם' => [ 2753 '0', 2754 '6', 2755 '6', 2756 '6', 2757 '66', 2758 '66', 2759 '66', 2760 ], 2761 'מ' => [ 2762 '0', 2763 '6', 2764 '6', 2765 '6', 2766 ], 2767 'ם' => [ 2768 '0', 2769 '', 2770 '6', 2771 '6', 2772 ], 2773 'נמ' => [ 2774 '0', 2775 '66', 2776 '66', 2777 '66', 2778 ], 2779 'נם' => [ 2780 '0', 2781 '66', 2782 '66', 2783 '66', 2784 ], 2785 'ננ' => [ 2786 '0', 2787 '6', 2788 '6', 2789 '6', 2790 '66', 2791 '66', 2792 '66', 2793 ], 2794 'נן' => [ 2795 '0', 2796 '6', 2797 '6', 2798 '6', 2799 '66', 2800 '66', 2801 '66', 2802 ], 2803 'נ' => [ 2804 '0', 2805 '6', 2806 '6', 2807 '6', 2808 ], 2809 'ן' => [ 2810 '0', 2811 '', 2812 '6', 2813 '6', 2814 ], 2815 'סתש' => [ 2816 '0', 2817 '2', 2818 '4', 2819 '4', 2820 ], 2821 'סתז' => [ 2822 '0', 2823 '2', 2824 '4', 2825 '4', 2826 ], 2827 'סטז' => [ 2828 '0', 2829 '2', 2830 '4', 2831 '4', 2832 ], 2833 'סטש' => [ 2834 '0', 2835 '2', 2836 '4', 2837 '4', 2838 ], 2839 'סצד' => [ 2840 '0', 2841 '2', 2842 '4', 2843 '4', 2844 ], 2845 'סט' => [ 2846 '0', 2847 '2', 2848 '4', 2849 '4', 2850 '43', 2851 '43', 2852 '43', 2853 ], 2854 'סת' => [ 2855 '0', 2856 '2', 2857 '4', 2858 '4', 2859 '43', 2860 '43', 2861 '43', 2862 ], 2863 'סג' => [ 2864 '0', 2865 '44', 2866 '44', 2867 '44', 2868 '4', 2869 '4', 2870 '4', 2871 ], 2872 'סס' => [ 2873 '0', 2874 '4', 2875 '4', 2876 '4', 2877 '44', 2878 '44', 2879 '44', 2880 ], 2881 'סצ' => [ 2882 '0', 2883 '44', 2884 '44', 2885 '44', 2886 ], 2887 'סץ' => [ 2888 '0', 2889 '44', 2890 '44', 2891 '44', 2892 ], 2893 'סז' => [ 2894 '0', 2895 '44', 2896 '44', 2897 '44', 2898 ], 2899 'סש' => [ 2900 '0', 2901 '44', 2902 '44', 2903 '44', 2904 ], 2905 'ס' => [ 2906 '0', 2907 '4', 2908 '4', 2909 '4', 2910 ], 2911 'ע' => [ 2912 '1', 2913 '0', 2914 '', 2915 '', 2916 ], 2917 'פב' => [ 2918 '0', 2919 '7', 2920 '7', 2921 '7', 2922 '77', 2923 '77', 2924 '77', 2925 ], 2926 'פוו' => [ 2927 '0', 2928 '7', 2929 '7', 2930 '7', 2931 '77', 2932 '77', 2933 '77', 2934 ], 2935 'פפ' => [ 2936 '0', 2937 '7', 2938 '7', 2939 '7', 2940 '77', 2941 '77', 2942 '77', 2943 ], 2944 'פף' => [ 2945 '0', 2946 '7', 2947 '7', 2948 '7', 2949 '77', 2950 '77', 2951 '77', 2952 ], 2953 'פ' => [ 2954 '0', 2955 '7', 2956 '7', 2957 '7', 2958 ], 2959 'ף' => [ 2960 '0', 2961 '', 2962 '7', 2963 '7', 2964 ], 2965 'צג' => [ 2966 '0', 2967 '44', 2968 '44', 2969 '44', 2970 '45', 2971 '45', 2972 '45', 2973 ], 2974 'צז' => [ 2975 '0', 2976 '44', 2977 '44', 2978 '44', 2979 ], 2980 'צס' => [ 2981 '0', 2982 '44', 2983 '44', 2984 '44', 2985 ], 2986 'צצ' => [ 2987 '0', 2988 '4', 2989 '4', 2990 '4', 2991 '5', 2992 '5', 2993 '5', 2994 '44', 2995 '44', 2996 '44', 2997 '54', 2998 '54', 2999 '54', 3000 '45', 3001 '45', 3002 '45', 3003 ], 3004 'צץ' => [ 3005 '0', 3006 '4', 3007 '4', 3008 '4', 3009 '5', 3010 '5', 3011 '5', 3012 '44', 3013 '44', 3014 '44', 3015 '54', 3016 '54', 3017 '54', 3018 ], 3019 'צש' => [ 3020 '0', 3021 '44', 3022 '44', 3023 '44', 3024 '4', 3025 '4', 3026 '4', 3027 '5', 3028 '5', 3029 '5', 3030 ], 3031 'צ' => [ 3032 '0', 3033 '4', 3034 '4', 3035 '4', 3036 '5', 3037 '5', 3038 '5', 3039 ], 3040 'ץ' => [ 3041 '0', 3042 '', 3043 '4', 3044 '4', 3045 ], 3046 'קה' => [ 3047 '0', 3048 '55', 3049 '55', 3050 '5', 3051 ], 3052 'קס' => [ 3053 '0', 3054 '5', 3055 '54', 3056 '54', 3057 ], 3058 'קש' => [ 3059 '0', 3060 '5', 3061 '54', 3062 '54', 3063 ], 3064 'קק' => [ 3065 '0', 3066 '5', 3067 '5', 3068 '5', 3069 '55', 3070 '55', 3071 '55', 3072 ], 3073 'קח' => [ 3074 '0', 3075 '55', 3076 '55', 3077 '55', 3078 ], 3079 'קכ' => [ 3080 '0', 3081 '55', 3082 '55', 3083 '55', 3084 ], 3085 'קך' => [ 3086 '0', 3087 '55', 3088 '55', 3089 '55', 3090 ], 3091 'קג' => [ 3092 '0', 3093 '55', 3094 '55', 3095 '55', 3096 '54', 3097 '54', 3098 '54', 3099 ], 3100 'ק' => [ 3101 '0', 3102 '5', 3103 '5', 3104 '5', 3105 ], 3106 'רר' => [ 3107 '0', 3108 '99', 3109 '99', 3110 '99', 3111 '9', 3112 '9', 3113 '9', 3114 ], 3115 'ר' => [ 3116 '0', 3117 '9', 3118 '9', 3119 '9', 3120 ], 3121 'שטז' => [ 3122 '0', 3123 '2', 3124 '4', 3125 '4', 3126 ], 3127 'שתש' => [ 3128 '0', 3129 '2', 3130 '4', 3131 '4', 3132 ], 3133 'שתז' => [ 3134 '0', 3135 '2', 3136 '4', 3137 '4', 3138 ], 3139 'שטש' => [ 3140 '0', 3141 '2', 3142 '4', 3143 '4', 3144 ], 3145 'שד' => [ 3146 '0', 3147 '2', 3148 '43', 3149 '43', 3150 ], 3151 'שז' => [ 3152 '0', 3153 '44', 3154 '44', 3155 '44', 3156 ], 3157 'שס' => [ 3158 '0', 3159 '44', 3160 '44', 3161 '44', 3162 ], 3163 'שת' => [ 3164 '0', 3165 '2', 3166 '43', 3167 '43', 3168 ], 3169 'שג' => [ 3170 '0', 3171 '4', 3172 '4', 3173 '4', 3174 '44', 3175 '44', 3176 '44', 3177 '4', 3178 '43', 3179 '43', 3180 ], 3181 'שט' => [ 3182 '0', 3183 '2', 3184 '43', 3185 '43', 3186 '44', 3187 '44', 3188 '44', 3189 ], 3190 'שצ' => [ 3191 '0', 3192 '44', 3193 '44', 3194 '44', 3195 '45', 3196 '45', 3197 '45', 3198 ], 3199 'שץ' => [ 3200 '0', 3201 '44', 3202 '', 3203 '44', 3204 '45', 3205 '', 3206 '45', 3207 ], 3208 'שש' => [ 3209 '0', 3210 '4', 3211 '4', 3212 '4', 3213 '44', 3214 '44', 3215 '44', 3216 ], 3217 'ש' => [ 3218 '0', 3219 '4', 3220 '4', 3221 '4', 3222 ], 3223 'תג' => [ 3224 '0', 3225 '34', 3226 '34', 3227 '34', 3228 ], 3229 'תז' => [ 3230 '0', 3231 '34', 3232 '34', 3233 '34', 3234 ], 3235 'תש' => [ 3236 '0', 3237 '4', 3238 '4', 3239 '4', 3240 ], 3241 'תת' => [ 3242 '0', 3243 '3', 3244 '3', 3245 '3', 3246 '4', 3247 '4', 3248 '4', 3249 '33', 3250 '33', 3251 '33', 3252 '44', 3253 '44', 3254 '44', 3255 '34', 3256 '34', 3257 '34', 3258 '43', 3259 '43', 3260 '43', 3261 ], 3262 'ת' => [ 3263 '0', 3264 '3', 3265 '3', 3266 '3', 3267 '4', 3268 '4', 3269 '4', 3270 ], 3271 // Arabic alphabet 3272 'ا' => [ 3273 '1', 3274 '0', 3275 '', 3276 '', 3277 ], 3278 'ب' => [ 3279 '0', 3280 '7', 3281 '7', 3282 '7', 3283 ], 3284 'ت' => [ 3285 '0', 3286 '3', 3287 '3', 3288 '3', 3289 ], 3290 'ث' => [ 3291 '0', 3292 '3', 3293 '3', 3294 '3', 3295 ], 3296 'ج' => [ 3297 '0', 3298 '4', 3299 '4', 3300 '4', 3301 ], 3302 'ح' => [ 3303 '0', 3304 '5', 3305 '5', 3306 '5', 3307 ], 3308 'خ' => [ 3309 '0', 3310 '5', 3311 '5', 3312 '5', 3313 ], 3314 'د' => [ 3315 '0', 3316 '3', 3317 '3', 3318 '3', 3319 ], 3320 'ذ' => [ 3321 '0', 3322 '3', 3323 '3', 3324 '3', 3325 ], 3326 'ر' => [ 3327 '0', 3328 '9', 3329 '9', 3330 '9', 3331 ], 3332 'ز' => [ 3333 '0', 3334 '4', 3335 '4', 3336 '4', 3337 ], 3338 'س' => [ 3339 '0', 3340 '4', 3341 '4', 3342 '4', 3343 ], 3344 'ش' => [ 3345 '0', 3346 '4', 3347 '4', 3348 '4', 3349 ], 3350 'ص' => [ 3351 '0', 3352 '4', 3353 '4', 3354 '4', 3355 ], 3356 'ض' => [ 3357 '0', 3358 '3', 3359 '3', 3360 '3', 3361 ], 3362 'ط' => [ 3363 '0', 3364 '3', 3365 '3', 3366 '3', 3367 ], 3368 'ظ' => [ 3369 '0', 3370 '4', 3371 '4', 3372 '4', 3373 ], 3374 'ع' => [ 3375 '1', 3376 '0', 3377 '', 3378 '', 3379 ], 3380 'غ' => [ 3381 '0', 3382 '0', 3383 '', 3384 '', 3385 ], 3386 'ف' => [ 3387 '0', 3388 '7', 3389 '7', 3390 '7', 3391 ], 3392 'ق' => [ 3393 '0', 3394 '5', 3395 '5', 3396 '5', 3397 ], 3398 'ك' => [ 3399 '0', 3400 '5', 3401 '5', 3402 '5', 3403 ], 3404 'ل' => [ 3405 '0', 3406 '8', 3407 '8', 3408 '8', 3409 ], 3410 'لا' => [ 3411 '0', 3412 '8', 3413 '8', 3414 '8', 3415 ], 3416 'م' => [ 3417 '0', 3418 '6', 3419 '6', 3420 '6', 3421 ], 3422 'ن' => [ 3423 '0', 3424 '6', 3425 '6', 3426 '6', 3427 ], 3428 'هن' => [ 3429 '0', 3430 '66', 3431 '66', 3432 '66', 3433 ], 3434 'ه' => [ 3435 '0', 3436 '5', 3437 '5', 3438 '', 3439 ], 3440 'و' => [ 3441 '1', 3442 '', 3443 '', 3444 '', 3445 '7', 3446 '', 3447 '', 3448 ], 3449 'ي' => [ 3450 '0', 3451 '1', 3452 '', 3453 '', 3454 ], 3455 'آ' => [ 3456 '0', 3457 '1', 3458 '', 3459 '', 3460 ], 3461 'ة' => [ 3462 '0', 3463 '', 3464 '', 3465 '3', 3466 ], 3467 'ی' => [ 3468 '0', 3469 '1', 3470 '', 3471 '', 3472 ], 3473 'ى' => [ 3474 '1', 3475 '1', 3476 '', 3477 '', 3478 ], 3479 ]; 3480 3481 /** 3482 * Which algorithms are supported. 3483 * 3484 * @return string[] 3485 */ 3486 public static function getAlgorithms(): array 3487 { 3488 return [ 3489 /* I18N: http://en.wikipedia.org/wiki/Soundex */ 3490 'std' => I18N::translate('Russell'), 3491 /* I18N: http://en.wikipedia.org/wiki/Daitch–Mokotoff_Soundex */ 3492 'dm' => I18N::translate('Daitch-Mokotoff'), 3493 ]; 3494 } 3495 3496 /** 3497 * Is there a match between two soundex codes? 3498 * 3499 * @param string $soundex1 3500 * @param string $soundex2 3501 * 3502 * @return bool 3503 */ 3504 public static function compare($soundex1, $soundex2): bool 3505 { 3506 if ($soundex1 !== '' && $soundex2 !== '') { 3507 return !empty(array_intersect(explode(':', $soundex1), explode(':', $soundex2))); 3508 } 3509 3510 return false; 3511 } 3512 3513 /** 3514 * Generate Russell soundex codes for a given text. 3515 * 3516 * @param string $text 3517 * 3518 * @return string 3519 */ 3520 public static function russell(string $text): string 3521 { 3522 $words = explode(' ', $text); 3523 $soundex_array = []; 3524 3525 foreach ($words as $word) { 3526 $soundex = soundex($word); 3527 3528 // Only return codes from recognisable sounds 3529 if ($soundex !== '0000') { 3530 $soundex_array[] = $soundex; 3531 } 3532 } 3533 3534 // Combine words, e.g. “New York” as “Newyork” 3535 if (count($words) > 1) { 3536 $soundex_array[] = soundex(str_replace(' ', '', $text)); 3537 } 3538 3539 // A varchar(255) column can only hold 51 4-character codes (plus 50 delimiters) 3540 $soundex_array = array_slice(array_unique($soundex_array), 0, 51); 3541 3542 return implode(':', $soundex_array); 3543 } 3544 3545 /** 3546 * Generate Daitch–Mokotoff soundex codes for a given text. 3547 * 3548 * @param string $text 3549 * 3550 * @return string 3551 */ 3552 public static function daitchMokotoff(string $text): string 3553 { 3554 $words = explode(' ', $text); 3555 $soundex_array = []; 3556 3557 foreach ($words as $word) { 3558 $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord($word)); 3559 } 3560 // Combine words, e.g. “New York” as “Newyork” 3561 if (count($words) > 1) { 3562 $soundex_array = array_merge($soundex_array, self::daitchMokotoffWord(str_replace(' ', '', $text))); 3563 } 3564 3565 // A varchar(255) column can only hold 36 6-character codes (plus 35 delimiters) 3566 $soundex_array = array_slice(array_unique($soundex_array), 0, 36); 3567 3568 return implode(':', $soundex_array); 3569 } 3570 3571 /** 3572 * Calculate the Daitch-Mokotoff soundex for a word. 3573 * 3574 * @param string $name 3575 * 3576 * @return string[] List of possible DM codes for the word. 3577 */ 3578 private static function daitchMokotoffWord($name): array 3579 { 3580 // Apply special transformation rules to the input string 3581 $name = I18N::strtoupper($name); 3582 foreach (self::TRANSFORM_NAMES as $transformRule) { 3583 $name = str_replace($transformRule[0], $transformRule[1], $name); 3584 } 3585 3586 // Initialize 3587 $name_script = I18N::textScript($name); 3588 $noVowels = ($name_script === 'Hebr' || $name_script === 'Arab'); 3589 3590 $lastPos = strlen($name) - 1; 3591 $currPos = 0; 3592 $state = 1; // 1: start of input string, 2: before vowel, 3: other 3593 $result = []; // accumulate complete 6-digit D-M codes here 3594 $partialResult = []; // accumulate incomplete D-M codes here 3595 $partialResult[] = ['!']; // initialize 1st partial result ('!' stops "duplicate sound" check) 3596 3597 // Loop through the input string. 3598 // Stop when the string is exhausted or when no more partial results remain 3599 while (count($partialResult) !== 0 && $currPos <= $lastPos) { 3600 // Find the DM coding table entry for the chunk at the current position 3601 $thisEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 3602 while ($thisEntry !== '') { 3603 if (isset(self::DM_SOUNDS[$thisEntry])) { 3604 break; 3605 } 3606 $thisEntry = substr($thisEntry, 0, -1); // Not in table: try a shorter chunk 3607 } 3608 if ($thisEntry === '') { 3609 $currPos++; // Not in table: advance pointer to next byte 3610 continue; // and try again 3611 } 3612 3613 $soundTableEntry = self::DM_SOUNDS[$thisEntry]; 3614 $workingResult = $partialResult; 3615 $partialResult = []; 3616 $currPos += strlen($thisEntry); 3617 3618 // Not at beginning of input string 3619 if ($state !== 1) { 3620 if ($currPos <= $lastPos) { 3621 // Determine whether the next chunk is a vowel 3622 $nextEntry = substr($name, $currPos, self::MAXCHAR); // Get maximum length chunk 3623 while ($nextEntry !== '') { 3624 if (isset(self::DM_SOUNDS[$nextEntry])) { 3625 break; 3626 } 3627 $nextEntry = substr($nextEntry, 0, -1); // Not in table: try a shorter chunk 3628 } 3629 } else { 3630 $nextEntry = ''; 3631 } 3632 if ($nextEntry !== '' && self::DM_SOUNDS[$nextEntry][0] !== '0') { 3633 $state = 2; 3634 } else { 3635 // Next chunk is a vowel 3636 $state = 3; 3637 } 3638 } 3639 3640 while ($state < count($soundTableEntry)) { 3641 // empty means 'ignore this sound in this state' 3642 if ($soundTableEntry[$state] === '') { 3643 foreach ($workingResult as $workingEntry) { 3644 $tempEntry = $workingEntry; 3645 $tempEntry[count($tempEntry) - 1] .= '!'; // Prevent false 'doubles' 3646 $partialResult[] = $tempEntry; 3647 } 3648 } else { 3649 foreach ($workingResult as $workingEntry) { 3650 if ($soundTableEntry[$state] !== $workingEntry[count($workingEntry) - 1]) { 3651 // Incoming sound isn't a duplicate of the previous sound 3652 $workingEntry[] = $soundTableEntry[$state]; 3653 } elseif ($noVowels) { 3654 // Incoming sound is a duplicate of the previous sound 3655 // For Hebrew and Arabic, we need to create a pair of D-M sound codes, 3656 // one of the pair with only a single occurrence of the duplicate sound, 3657 // the other with both occurrences 3658 $workingEntry[] = $soundTableEntry[$state]; 3659 } 3660 3661 if (count($workingEntry) < 7) { 3662 $partialResult[] = $workingEntry; 3663 } else { 3664 // This is the 6th code in the sequence 3665 // We're looking for 7 entries because the first is '!' and doesn't count 3666 $tempResult = str_replace('!', '', implode('', $workingEntry)); 3667 // Only return codes from recognisable sounds 3668 if ($tempResult) { 3669 $result[] = substr($tempResult . '000000', 0, 6); 3670 } 3671 } 3672 } 3673 } 3674 $state += 3; // Advance to next triplet while keeping the same basic state 3675 } 3676 } 3677 3678 // Zero-fill and copy all remaining partial results 3679 foreach ($partialResult as $workingEntry) { 3680 $tempResult = str_replace('!', '', implode('', $workingEntry)); 3681 // Only return codes from recognisable sounds 3682 if ($tempResult) { 3683 $result[] = substr($tempResult . '000000', 0, 6); 3684 } 3685 } 3686 3687 return $result; 3688 } 3689} 3690