1 /* 2 * Copyright 2010-2018 Haiku Inc. All rights reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Christophe Huriaux, c.huriaux@gmail.com 7 * Andrew Lindesay, apl@lindesay.co.nz 8 */ 9 10 11 #include <Url.h> 12 13 #include <ctype.h> 14 #include <cstdio> 15 #include <cstdlib> 16 #include <new> 17 18 #include <MimeType.h> 19 #include <Roster.h> 20 21 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 22 #include <ICUWrapper.h> 23 #endif 24 25 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 26 #include <unicode/idna.h> 27 #include <unicode/stringpiece.h> 28 #endif 29 30 31 static const char* kArchivedUrl = "be:url string"; 32 33 34 BUrl::BUrl(const char* url) 35 : 36 fUrlString(), 37 fProtocol(), 38 fUser(), 39 fPassword(), 40 fHost(), 41 fPort(0), 42 fPath(), 43 fRequest(), 44 fHasHost(false), 45 fHasFragment(false) 46 { 47 SetUrlString(url); 48 } 49 50 51 BUrl::BUrl(BMessage* archive) 52 : 53 fUrlString(), 54 fProtocol(), 55 fUser(), 56 fPassword(), 57 fHost(), 58 fPort(0), 59 fPath(), 60 fRequest(), 61 fHasHost(false), 62 fHasFragment(false) 63 { 64 BString url; 65 66 if (archive->FindString(kArchivedUrl, &url) == B_OK) 67 SetUrlString(url); 68 else 69 _ResetFields(); 70 } 71 72 73 BUrl::BUrl(const BUrl& other) 74 : 75 BArchivable(), 76 fUrlString(), 77 fProtocol(other.fProtocol), 78 fUser(other.fUser), 79 fPassword(other.fPassword), 80 fHost(other.fHost), 81 fPort(other.fPort), 82 fPath(other.fPath), 83 fRequest(other.fRequest), 84 fFragment(other.fFragment), 85 fUrlStringValid(other.fUrlStringValid), 86 fAuthorityValid(other.fAuthorityValid), 87 fUserInfoValid(other.fUserInfoValid), 88 fHasProtocol(other.fHasProtocol), 89 fHasUserName(other.fHasUserName), 90 fHasPassword(other.fHasPassword), 91 fHasHost(other.fHasHost), 92 fHasPort(other.fHasPort), 93 fHasPath(other.fHasPath), 94 fHasRequest(other.fHasRequest), 95 fHasFragment(other.fHasFragment) 96 { 97 if (fUrlStringValid) 98 fUrlString = other.fUrlString; 99 100 if (fAuthorityValid) 101 fAuthority = other.fAuthority; 102 103 if (fUserInfoValid) 104 fUserInfo = other.fUserInfo; 105 106 } 107 108 109 BUrl::BUrl(const BUrl& base, const BString& location) 110 : 111 fUrlString(), 112 fProtocol(), 113 fUser(), 114 fPassword(), 115 fHost(), 116 fPort(0), 117 fPath(), 118 fRequest(), 119 fAuthorityValid(false), 120 fUserInfoValid(false), 121 fHasUserName(false), 122 fHasPassword(false), 123 fHasHost(false), 124 fHasPort(false), 125 fHasFragment(false) 126 { 127 // This implements the algorithm in RFC3986, Section 5.2. 128 129 BUrl relative(location); 130 if (relative.HasProtocol()) { 131 SetProtocol(relative.Protocol()); 132 if (relative.HasAuthority()) 133 SetAuthority(relative.Authority()); 134 SetPath(relative.Path()); 135 SetRequest(relative.Request()); 136 } else { 137 if (relative.HasAuthority()) { 138 SetAuthority(relative.Authority()); 139 SetPath(relative.Path()); 140 SetRequest(relative.Request()); 141 } else { 142 if (relative.Path().IsEmpty()) { 143 _SetPathUnsafe(base.Path()); 144 if (relative.HasRequest()) 145 SetRequest(relative.Request()); 146 else 147 SetRequest(base.Request()); 148 } else { 149 if (relative.Path()[0] == '/') 150 SetPath(relative.Path()); 151 else { 152 BString path = base._MergePath(relative.Path()); 153 SetPath(path); 154 } 155 SetRequest(relative.Request()); 156 } 157 158 if (base.HasAuthority()) 159 SetAuthority(base.Authority()); 160 } 161 SetProtocol(base.Protocol()); 162 } 163 164 if (relative.HasFragment()) 165 SetFragment(relative.Fragment()); 166 } 167 168 169 BUrl::BUrl() 170 : 171 fUrlString(), 172 fProtocol(), 173 fUser(), 174 fPassword(), 175 fHost(), 176 fPort(0), 177 fPath(), 178 fRequest(), 179 fHasHost(false), 180 fHasFragment(false) 181 { 182 _ResetFields(); 183 } 184 185 186 BUrl::BUrl(const BPath& path) 187 : 188 fUrlString(), 189 fProtocol(), 190 fUser(), 191 fPassword(), 192 fHost(), 193 fPort(0), 194 fPath(), 195 fRequest(), 196 fHasHost(false), 197 fHasFragment(false) 198 { 199 SetUrlString(UrlEncode(path.Path(), true, true)); 200 SetProtocol("file"); 201 } 202 203 204 BUrl::~BUrl() 205 { 206 } 207 208 209 // #pragma mark URL fields modifiers 210 211 212 BUrl& 213 BUrl::SetUrlString(const BString& url) 214 { 215 _ExplodeUrlString(url); 216 return *this; 217 } 218 219 220 BUrl& 221 BUrl::SetProtocol(const BString& protocol) 222 { 223 fProtocol = protocol; 224 fHasProtocol = !fProtocol.IsEmpty(); 225 fUrlStringValid = false; 226 return *this; 227 } 228 229 230 BUrl& 231 BUrl::SetUserName(const BString& user) 232 { 233 fUser = user; 234 fHasUserName = !fUser.IsEmpty(); 235 fUrlStringValid = false; 236 fAuthorityValid = false; 237 fUserInfoValid = false; 238 return *this; 239 } 240 241 242 BUrl& 243 BUrl::SetPassword(const BString& password) 244 { 245 fPassword = password; 246 fHasPassword = !fPassword.IsEmpty(); 247 fUrlStringValid = false; 248 fAuthorityValid = false; 249 fUserInfoValid = false; 250 return *this; 251 } 252 253 254 BUrl& 255 BUrl::SetHost(const BString& host) 256 { 257 fHost = host; 258 fHasHost = !fHost.IsEmpty(); 259 fUrlStringValid = false; 260 fAuthorityValid = false; 261 return *this; 262 } 263 264 265 BUrl& 266 BUrl::SetPort(int port) 267 { 268 fPort = port; 269 fHasPort = (port != 0); 270 fUrlStringValid = false; 271 fAuthorityValid = false; 272 return *this; 273 } 274 275 276 BUrl& 277 BUrl::SetPath(const BString& path) 278 { 279 // Implements RFC3986 section 5.2.4, "Remove dot segments" 280 281 // 1. 282 BString output; 283 BString input(path); 284 285 // 2. 286 while (!input.IsEmpty()) { 287 // 2.A. 288 if (input.StartsWith("./")) { 289 input.Remove(0, 2); 290 continue; 291 } 292 293 if (input.StartsWith("../")) { 294 input.Remove(0, 3); 295 continue; 296 } 297 298 // 2.B. 299 if (input.StartsWith("/./")) { 300 input.Remove(0, 2); 301 continue; 302 } 303 304 if (input == "/.") { 305 input.Remove(1, 1); 306 continue; 307 } 308 309 // 2.C. 310 if (input.StartsWith("/../")) { 311 input.Remove(0, 3); 312 output.Truncate(output.FindLast('/')); 313 continue; 314 } 315 316 if (input == "/..") { 317 input.Remove(1, 2); 318 output.Truncate(output.FindLast('/')); 319 continue; 320 } 321 322 // 2.D. 323 if (input == "." || input == "..") { 324 break; 325 } 326 327 if (input == "/.") { 328 input.Remove(1, 1); 329 continue; 330 } 331 332 // 2.E. 333 int slashpos = input.FindFirst('/', 1); 334 if (slashpos > 0) { 335 output.Append(input, slashpos); 336 input.Remove(0, slashpos); 337 } else { 338 output.Append(input); 339 break; 340 } 341 } 342 343 _SetPathUnsafe(output); 344 return *this; 345 } 346 347 348 BUrl& 349 BUrl::SetRequest(const BString& request) 350 { 351 fRequest = request; 352 fHasRequest = !fRequest.IsEmpty(); 353 fUrlStringValid = false; 354 return *this; 355 } 356 357 358 BUrl& 359 BUrl::SetFragment(const BString& fragment) 360 { 361 fFragment = fragment; 362 fHasFragment = true; 363 fUrlStringValid = false; 364 return *this; 365 } 366 367 368 // #pragma mark URL fields access 369 370 371 const BString& 372 BUrl::UrlString() const 373 { 374 if (!fUrlStringValid) { 375 fUrlString.Truncate(0); 376 377 if (HasProtocol()) { 378 fUrlString << fProtocol << ':'; 379 } 380 381 if (HasAuthority()) { 382 fUrlString << "//"; 383 fUrlString << Authority(); 384 } 385 fUrlString << Path(); 386 387 if (HasRequest()) 388 fUrlString << '?' << fRequest; 389 390 if (HasFragment()) 391 fUrlString << '#' << fFragment; 392 393 fUrlStringValid = true; 394 } 395 396 return fUrlString; 397 } 398 399 400 const BString& 401 BUrl::Protocol() const 402 { 403 return fProtocol; 404 } 405 406 407 const BString& 408 BUrl::UserName() const 409 { 410 return fUser; 411 } 412 413 414 const BString& 415 BUrl::Password() const 416 { 417 return fPassword; 418 } 419 420 421 const BString& 422 BUrl::UserInfo() const 423 { 424 if (!fUserInfoValid) { 425 fUserInfo = fUser; 426 427 if (HasPassword()) 428 fUserInfo << ':' << fPassword; 429 430 fUserInfoValid = true; 431 } 432 433 return fUserInfo; 434 } 435 436 437 const BString& 438 BUrl::Host() const 439 { 440 return fHost; 441 } 442 443 444 int 445 BUrl::Port() const 446 { 447 return fPort; 448 } 449 450 451 const BString& 452 BUrl::Authority() const 453 { 454 if (!fAuthorityValid) { 455 fAuthority.Truncate(0); 456 457 if (HasUserInfo()) 458 fAuthority << UserInfo() << '@'; 459 fAuthority << Host(); 460 461 if (HasPort()) 462 fAuthority << ':' << fPort; 463 464 fAuthorityValid = true; 465 } 466 return fAuthority; 467 } 468 469 470 const BString& 471 BUrl::Path() const 472 { 473 return fPath; 474 } 475 476 477 const BString& 478 BUrl::Request() const 479 { 480 return fRequest; 481 } 482 483 484 const BString& 485 BUrl::Fragment() const 486 { 487 return fFragment; 488 } 489 490 491 // #pragma mark URL fields tests 492 493 494 bool 495 BUrl::IsValid() const 496 { 497 if (!fHasProtocol) 498 return false; 499 500 if (!_IsProtocolValid()) 501 return false; 502 503 // it is possible that there can be an authority but no host. 504 // wierd://tea:tree@/x 505 if (HasHost() && !(fHost.IsEmpty() && HasAuthority()) && !_IsHostValid()) 506 return false; 507 508 if (fProtocol == "http" || fProtocol == "https" || fProtocol == "ftp" 509 || fProtocol == "ipp" || fProtocol == "afp" || fProtocol == "telnet" 510 || fProtocol == "gopher" || fProtocol == "nntp" || fProtocol == "sftp" 511 || fProtocol == "finger" || fProtocol == "pop" || fProtocol == "imap") { 512 return HasHost() && !fHost.IsEmpty(); 513 } 514 515 if (fProtocol == "file") 516 return fHasPath; 517 518 return true; 519 } 520 521 522 bool 523 BUrl::HasProtocol() const 524 { 525 return fHasProtocol; 526 } 527 528 529 bool 530 BUrl::HasAuthority() const 531 { 532 return fHasHost || fHasUserName; 533 } 534 535 536 bool 537 BUrl::HasUserName() const 538 { 539 return fHasUserName; 540 } 541 542 543 bool 544 BUrl::HasPassword() const 545 { 546 return fHasPassword; 547 } 548 549 550 bool 551 BUrl::HasUserInfo() const 552 { 553 return fHasUserName || fHasPassword; 554 } 555 556 557 bool 558 BUrl::HasHost() const 559 { 560 return fHasHost; 561 } 562 563 564 bool 565 BUrl::HasPort() const 566 { 567 return fHasPort; 568 } 569 570 571 bool 572 BUrl::HasPath() const 573 { 574 return fHasPath; 575 } 576 577 578 bool 579 BUrl::HasRequest() const 580 { 581 return fHasRequest; 582 } 583 584 585 bool 586 BUrl::HasFragment() const 587 { 588 return fHasFragment; 589 } 590 591 592 // #pragma mark URL encoding/decoding of needed fields 593 594 595 void 596 BUrl::UrlEncode(bool strict) 597 { 598 fUser = _DoUrlEncodeChunk(fUser, strict); 599 fPassword = _DoUrlEncodeChunk(fPassword, strict); 600 fHost = _DoUrlEncodeChunk(fHost, strict); 601 fFragment = _DoUrlEncodeChunk(fFragment, strict); 602 fPath = _DoUrlEncodeChunk(fPath, strict, true); 603 } 604 605 606 void 607 BUrl::UrlDecode(bool strict) 608 { 609 fUser = _DoUrlDecodeChunk(fUser, strict); 610 fPassword = _DoUrlDecodeChunk(fPassword, strict); 611 fHost = _DoUrlDecodeChunk(fHost, strict); 612 fFragment = _DoUrlDecodeChunk(fFragment, strict); 613 fPath = _DoUrlDecodeChunk(fPath, strict); 614 } 615 616 617 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 618 status_t 619 BUrl::IDNAToAscii() 620 { 621 UErrorCode err = U_ZERO_ERROR; 622 icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err); 623 icu::IDNAInfo info; 624 625 BString result; 626 BStringByteSink sink(&result); 627 converter->nameToASCII_UTF8(icu::StringPiece(fHost.String()), sink, info, 628 err); 629 630 delete converter; 631 632 if (U_FAILURE(err)) 633 return B_ERROR; 634 635 fHost = result; 636 return B_OK; 637 } 638 #endif 639 640 641 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 642 status_t 643 BUrl::IDNAToUnicode() 644 { 645 UErrorCode err = U_ZERO_ERROR; 646 icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err); 647 icu::IDNAInfo info; 648 649 BString result; 650 BStringByteSink sink(&result); 651 converter->nameToUnicodeUTF8(icu::StringPiece(fHost.String()), sink, info, 652 err); 653 654 delete converter; 655 656 if (U_FAILURE(err)) 657 return B_ERROR; 658 659 fHost = result; 660 return B_OK; 661 } 662 #endif 663 664 665 // #pragma mark - utility functionality 666 667 668 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 669 bool 670 BUrl::HasPreferredApplication() const 671 { 672 BString appSignature = PreferredApplication(); 673 BMimeType mime(appSignature.String()); 674 675 if (appSignature.IFindFirst("application/") == 0 676 && mime.IsValid()) 677 return true; 678 679 return false; 680 } 681 #endif 682 683 684 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 685 BString 686 BUrl::PreferredApplication() const 687 { 688 BString appSignature; 689 BMimeType mime(_UrlMimeType().String()); 690 mime.GetPreferredApp(appSignature.LockBuffer(B_MIME_TYPE_LENGTH)); 691 appSignature.UnlockBuffer(); 692 693 return BString(appSignature); 694 } 695 #endif 696 697 698 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 699 status_t 700 BUrl::OpenWithPreferredApplication(bool onProblemAskUser) const 701 { 702 if (!IsValid()) 703 return B_BAD_VALUE; 704 705 BString urlString = UrlString(); 706 if (urlString.Length() > B_PATH_NAME_LENGTH) { 707 // TODO: BAlert 708 // if (onProblemAskUser) 709 // BAlert ... Too long URL! 710 #if DEBUG 711 fprintf(stderr, "URL too long"); 712 #endif 713 return B_NAME_TOO_LONG; 714 } 715 716 char* argv[] = { 717 const_cast<char*>("BUrlInvokedApplication"), 718 const_cast<char*>(urlString.String()), 719 NULL 720 }; 721 722 #if DEBUG 723 if (HasPreferredApplication()) 724 printf("HasPreferredApplication() == true\n"); 725 else 726 printf("HasPreferredApplication() == false\n"); 727 #endif 728 729 status_t status = be_roster->Launch(_UrlMimeType().String(), 1, argv+1); 730 if (status != B_OK) { 731 #if DEBUG 732 fprintf(stderr, "Opening URL failed: %s\n", strerror(status)); 733 #endif 734 } 735 736 return status; 737 } 738 #endif 739 740 741 // #pragma mark Url encoding/decoding of string 742 743 744 /*static*/ BString 745 BUrl::UrlEncode(const BString& url, bool strict, bool directory) 746 { 747 return _DoUrlEncodeChunk(url, strict, directory); 748 } 749 750 751 /*static*/ BString 752 BUrl::UrlDecode(const BString& url, bool strict) 753 { 754 return _DoUrlDecodeChunk(url, strict); 755 } 756 757 758 // #pragma mark BArchivable members 759 760 761 status_t 762 BUrl::Archive(BMessage* into, bool deep) const 763 { 764 status_t ret = BArchivable::Archive(into, deep); 765 766 if (ret == B_OK) 767 ret = into->AddString(kArchivedUrl, UrlString()); 768 769 return ret; 770 } 771 772 773 /*static*/ BArchivable* 774 BUrl::Instantiate(BMessage* archive) 775 { 776 if (validate_instantiation(archive, "BUrl")) 777 return new(std::nothrow) BUrl(archive); 778 return NULL; 779 } 780 781 782 // #pragma mark URL comparison 783 784 785 bool 786 BUrl::operator==(BUrl& other) const 787 { 788 UrlString(); 789 other.UrlString(); 790 791 return fUrlString == other.fUrlString; 792 } 793 794 795 bool 796 BUrl::operator!=(BUrl& other) const 797 { 798 return !(*this == other); 799 } 800 801 802 // #pragma mark URL assignment 803 804 805 const BUrl& 806 BUrl::operator=(const BUrl& other) 807 { 808 fUrlStringValid = other.fUrlStringValid; 809 if (fUrlStringValid) 810 fUrlString = other.fUrlString; 811 812 fAuthorityValid = other.fAuthorityValid; 813 if (fAuthorityValid) 814 fAuthority = other.fAuthority; 815 816 fUserInfoValid = other.fUserInfoValid; 817 if (fUserInfoValid) 818 fUserInfo = other.fUserInfo; 819 820 fProtocol = other.fProtocol; 821 fUser = other.fUser; 822 fPassword = other.fPassword; 823 fHost = other.fHost; 824 fPort = other.fPort; 825 fPath = other.fPath; 826 fRequest = other.fRequest; 827 fFragment = other.fFragment; 828 829 fHasProtocol = other.fHasProtocol; 830 fHasUserName = other.fHasUserName; 831 fHasPassword = other.fHasPassword; 832 fHasHost = other.fHasHost; 833 fHasPort = other.fHasPort; 834 fHasPath = other.fHasPath; 835 fHasRequest = other.fHasRequest; 836 fHasFragment = other.fHasFragment; 837 838 return *this; 839 } 840 841 842 const BUrl& 843 BUrl::operator=(const BString& string) 844 { 845 SetUrlString(string); 846 return *this; 847 } 848 849 850 const BUrl& 851 BUrl::operator=(const char* string) 852 { 853 SetUrlString(string); 854 return *this; 855 } 856 857 858 // #pragma mark URL to string conversion 859 860 861 BUrl::operator const char*() const 862 { 863 return UrlString(); 864 } 865 866 867 void 868 BUrl::_ResetFields() 869 { 870 fHasProtocol = false; 871 fHasUserName = false; 872 fHasPassword = false; 873 fHasHost = false; 874 fHasPort = false; 875 fHasPath = false; 876 fHasRequest = false; 877 fHasFragment = false; 878 879 fProtocol.Truncate(0); 880 fUser.Truncate(0); 881 fPassword.Truncate(0); 882 fHost.Truncate(0); 883 fPort = 0; 884 fPath.Truncate(0); 885 fRequest.Truncate(0); 886 fFragment.Truncate(0); 887 888 // Force re-generation of these fields 889 fUrlStringValid = false; 890 fUserInfoValid = false; 891 fAuthorityValid = false; 892 } 893 894 895 bool 896 BUrl::_ContainsDelimiter(const BString& url) 897 { 898 int32 len = url.Length(); 899 900 for (int32 i = 0; i < len; i++) { 901 switch (url[i]) { 902 case ' ': 903 case '\n': 904 case '\t': 905 case '\r': 906 case '<': 907 case '>': 908 case '"': 909 return true; 910 } 911 } 912 913 return false; 914 } 915 916 917 enum explode_url_parse_state { 918 EXPLODE_PROTOCOL, 919 EXPLODE_PROTOCOLTERMINATOR, 920 EXPLODE_AUTHORITYORPATH, 921 EXPLODE_AUTHORITY, 922 EXPLODE_PATH, 923 EXPLODE_REQUEST, // query 924 EXPLODE_FRAGMENT, 925 EXPLODE_COMPLETE 926 }; 927 928 929 typedef bool (*explode_char_match_fn)(char c); 930 931 932 static bool 933 explode_is_protocol_char(char c) 934 { 935 return isalnum(c) || c == '+' || c == '.' || c == '-'; 936 } 937 938 939 static bool 940 explode_is_authority_char(char c) 941 { 942 return !(c == '/' || c == '?' || c == '#'); 943 } 944 945 946 static bool 947 explode_is_path_char(char c) 948 { 949 return !(c == '#' || c == '?'); 950 } 951 952 953 static bool 954 explode_is_request_char(char c) 955 { 956 return c != '#'; 957 } 958 959 960 static int32 961 char_offset_until_fn_false(const char* url, int32 len, int32 offset, 962 explode_char_match_fn fn) 963 { 964 while (offset < len && fn(url[offset])) 965 offset++; 966 967 return offset; 968 } 969 970 /* 971 * This function takes a URL in string-form and parses the components of the URL out. 972 */ 973 status_t 974 BUrl::_ExplodeUrlString(const BString& url) 975 { 976 _ResetFields(); 977 978 // RFC3986, Appendix C; the URL should not contain whitespace or delimiters 979 // by this point. 980 981 if (_ContainsDelimiter(url)) 982 return B_BAD_VALUE; 983 984 explode_url_parse_state state = EXPLODE_PROTOCOL; 985 int32 offset = 0; 986 int32 length = url.Length(); 987 const char *url_c = url.String(); 988 989 // The regexp is provided in RFC3986 (URI generic syntax), Appendix B 990 // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))? 991 // The ensuing logic attempts to simulate the behaviour of extracting the groups 992 // from the string without requiring a group-capable regex engine. 993 994 while (offset < length) { 995 switch (state) { 996 997 case EXPLODE_PROTOCOL: 998 { 999 int32 end_protocol = char_offset_until_fn_false(url_c, length, 1000 offset, explode_is_protocol_char); 1001 1002 if (end_protocol < length) { 1003 SetProtocol(BString(&url_c[offset], end_protocol - offset)); 1004 state = EXPLODE_PROTOCOLTERMINATOR; 1005 offset = end_protocol; 1006 } else { 1007 // No protocol was found, try parsing from the string 1008 // start, beginning with authority or path 1009 SetProtocol(""); 1010 offset = 0; 1011 state = EXPLODE_AUTHORITYORPATH; 1012 } 1013 break; 1014 } 1015 1016 case EXPLODE_PROTOCOLTERMINATOR: 1017 { 1018 if (url[offset] == ':') { 1019 offset++; 1020 } else { 1021 // No protocol was found, try parsing from the string 1022 // start, beginning with authority or path 1023 SetProtocol(""); 1024 offset = 0; 1025 } 1026 state = EXPLODE_AUTHORITYORPATH; 1027 break; 1028 } 1029 1030 case EXPLODE_AUTHORITYORPATH: 1031 { 1032 // The authority must start with //. If it isn't there, skip 1033 // to parsing the path. 1034 if (strncmp(&url_c[offset], "//", 2) == 0) { 1035 state = EXPLODE_AUTHORITY; 1036 offset += 2; 1037 } else { 1038 state = EXPLODE_PATH; 1039 } 1040 break; 1041 } 1042 1043 case EXPLODE_AUTHORITY: 1044 { 1045 int end_authority = char_offset_until_fn_false(url_c, length, 1046 offset, explode_is_authority_char); 1047 SetAuthority(BString(&url_c[offset], end_authority - offset)); 1048 state = EXPLODE_PATH; 1049 offset = end_authority; 1050 break; 1051 } 1052 1053 case EXPLODE_PATH: 1054 { 1055 int end_path = char_offset_until_fn_false(url_c, length, offset, 1056 explode_is_path_char); 1057 SetPath(BString(&url_c[offset], end_path - offset)); 1058 state = EXPLODE_REQUEST; 1059 offset = end_path; 1060 break; 1061 } 1062 1063 case EXPLODE_REQUEST: // query 1064 { 1065 if (url_c[offset] == '?') { 1066 offset++; 1067 int end_request = char_offset_until_fn_false(url_c, length, 1068 offset, explode_is_request_char); 1069 SetRequest(BString(&url_c[offset], end_request - offset)); 1070 offset = end_request; 1071 } 1072 state = EXPLODE_FRAGMENT; 1073 break; 1074 } 1075 1076 case EXPLODE_FRAGMENT: 1077 { 1078 if (url_c[offset] == '#') { 1079 offset++; 1080 SetFragment(BString(&url_c[offset], length - offset)); 1081 offset = length; 1082 } 1083 state = EXPLODE_COMPLETE; 1084 break; 1085 } 1086 1087 case EXPLODE_COMPLETE: 1088 // should never be reached - keeps the compiler happy 1089 break; 1090 1091 } 1092 } 1093 1094 return B_OK; 1095 } 1096 1097 1098 BString 1099 BUrl::_MergePath(const BString& relative) const 1100 { 1101 // This implements RFC3986, Section 5.2.3. 1102 if (HasAuthority() && fPath == "") { 1103 BString result("/"); 1104 result << relative; 1105 return result; 1106 } 1107 1108 BString result(fPath); 1109 result.Truncate(result.FindLast("/") + 1); 1110 result << relative; 1111 1112 return result; 1113 } 1114 1115 1116 // This sets the path without normalizing it. If fed with a path that has . or 1117 // .. segments, this would make the URL invalid. 1118 void 1119 BUrl::_SetPathUnsafe(const BString& path) 1120 { 1121 fPath = path; 1122 fHasPath = true; // RFC says an empty path is still a path 1123 fUrlStringValid = false; 1124 } 1125 1126 1127 enum authority_parse_state { 1128 AUTHORITY_USERNAME, 1129 AUTHORITY_PASSWORD, 1130 AUTHORITY_HOST, 1131 AUTHORITY_PORT, 1132 AUTHORITY_COMPLETE 1133 }; 1134 1135 void 1136 BUrl::SetAuthority(const BString& authority) 1137 { 1138 fAuthority = authority; 1139 1140 fUser.Truncate(0); 1141 fPassword.Truncate(0); 1142 fHost.Truncate(0); 1143 fPort = 0; 1144 fHasPort = false; 1145 fHasUserName = false; 1146 fHasPassword = false; 1147 1148 bool hasUsernamePassword = B_ERROR != fAuthority.FindFirst('@'); 1149 authority_parse_state state = AUTHORITY_USERNAME; 1150 int32 offset = 0; 1151 int32 length = authority.Length(); 1152 const char *authority_c = authority.String(); 1153 1154 while (AUTHORITY_COMPLETE != state && offset < length) { 1155 1156 switch (state) { 1157 1158 case AUTHORITY_USERNAME: 1159 { 1160 if (hasUsernamePassword) { 1161 int32 end_username = char_offset_until_fn_false( 1162 authority_c, length, offset, _IsUsernameChar); 1163 1164 SetUserName(BString(&authority_c[offset], 1165 end_username - offset)); 1166 1167 state = AUTHORITY_PASSWORD; 1168 offset = end_username; 1169 } else { 1170 state = AUTHORITY_HOST; 1171 } 1172 break; 1173 } 1174 1175 case AUTHORITY_PASSWORD: 1176 { 1177 if (hasUsernamePassword && ':' == authority[offset]) { 1178 offset++; // move past the delimiter 1179 int32 end_password = char_offset_until_fn_false( 1180 authority_c, length, offset, _IsPasswordChar); 1181 1182 SetPassword(BString(&authority_c[offset], 1183 end_password - offset)); 1184 1185 offset = end_password; 1186 } 1187 1188 // if the host was preceded by a username + password couple 1189 // then there will be an '@' delimiter to avoid. 1190 1191 if (authority_c[offset] == '@') { 1192 offset++; 1193 } 1194 1195 state = AUTHORITY_HOST; 1196 break; 1197 } 1198 1199 case AUTHORITY_HOST: 1200 { 1201 1202 // the host may be enclosed within brackets in order to express 1203 // an IPV6 address. 1204 1205 if (authority_c[offset] == '[') { 1206 int32 end_ipv6_host = char_offset_until_fn_false( 1207 authority_c, length, offset + 1, _IsIPV6Char); 1208 1209 if (authority_c[end_ipv6_host] == ']') { 1210 SetHost(BString(&authority_c[offset], 1211 (end_ipv6_host - offset) + 1)); 1212 state = AUTHORITY_PORT; 1213 offset = end_ipv6_host + 1; 1214 } 1215 } 1216 1217 // if an IPV6 host was not found. 1218 1219 if (AUTHORITY_HOST == state) { 1220 int32 end_host = char_offset_until_fn_false( 1221 authority_c, length, offset, _IsHostChar); 1222 1223 SetHost(BString(&authority_c[offset], end_host - offset)); 1224 state = AUTHORITY_PORT; 1225 offset = end_host; 1226 } 1227 1228 break; 1229 } 1230 1231 case AUTHORITY_PORT: 1232 { 1233 if (authority_c[offset] == ':') { 1234 offset++; 1235 int32 end_port = char_offset_until_fn_false( 1236 authority_c, length, offset, _IsPortChar); 1237 SetPort(atoi(&authority_c[offset])); 1238 offset = end_port; 1239 } 1240 1241 state = AUTHORITY_COMPLETE; 1242 1243 break; 1244 } 1245 1246 case AUTHORITY_COMPLETE: 1247 // should never be reached - keeps the compiler happy 1248 break; 1249 } 1250 } 1251 1252 // An empty authority is still an authority, making it possible to have 1253 // URLs such as file:///path/to/file. 1254 // TODO however, there is no way to unset the authority once it is set... 1255 // We may want to take a const char* parameter and allow NULL. 1256 fHasHost = true; 1257 } 1258 1259 1260 /*static*/ BString 1261 BUrl::_DoUrlEncodeChunk(const BString& chunk, bool strict, bool directory) 1262 { 1263 BString result; 1264 1265 for (int32 i = 0; i < chunk.Length(); i++) { 1266 if (_IsUnreserved(chunk[i]) 1267 || (directory && (chunk[i] == '/' || chunk[i] == '\\'))) { 1268 result << chunk[i]; 1269 } else { 1270 if (chunk[i] == ' ' && !strict) { 1271 result << '+'; 1272 // In non-strict mode, spaces are encoded by a plus sign 1273 } else { 1274 char hexString[5]; 1275 snprintf(hexString, 5, "%X", chunk[i]); 1276 1277 result << '%' << hexString; 1278 } 1279 } 1280 } 1281 1282 return result; 1283 } 1284 1285 1286 /*static*/ BString 1287 BUrl::_DoUrlDecodeChunk(const BString& chunk, bool strict) 1288 { 1289 BString result; 1290 1291 for (int32 i = 0; i < chunk.Length(); i++) { 1292 if (chunk[i] == '+' && !strict) 1293 result << ' '; 1294 else { 1295 char decoded = 0; 1296 char* out = NULL; 1297 char hexString[3]; 1298 1299 if (chunk[i] == '%' && i < chunk.Length() - 2 1300 && isxdigit(chunk[i + 1]) && isxdigit(chunk[i+2])) { 1301 hexString[0] = chunk[i + 1]; 1302 hexString[1] = chunk[i + 2]; 1303 hexString[2] = 0; 1304 decoded = (char)strtol(hexString, &out, 16); 1305 } 1306 1307 if (out == hexString + 2) { 1308 i += 2; 1309 result << decoded; 1310 } else 1311 result << chunk[i]; 1312 } 1313 } 1314 return result; 1315 } 1316 1317 1318 bool 1319 BUrl::_IsHostIPV6Valid(size_t offset, int32 length) const 1320 { 1321 for (int32 i = 0; i < length; i++) { 1322 char c = fHost[offset + i]; 1323 if (!_IsIPV6Char(c)) 1324 return false; 1325 } 1326 1327 return length > 0; 1328 } 1329 1330 1331 bool 1332 BUrl::_IsHostValid() const 1333 { 1334 if (fHost.StartsWith("[") && fHost.EndsWith("]")) 1335 return _IsHostIPV6Valid(1, fHost.Length() - 2); 1336 1337 bool lastWasDot = false; 1338 1339 for (int32 i = 0; i < fHost.Length(); i++) { 1340 char c = fHost[i]; 1341 1342 if (c == '.') { 1343 if (lastWasDot || i == 0) 1344 return false; 1345 lastWasDot = true; 1346 } else { 1347 lastWasDot = false; 1348 } 1349 1350 if (!_IsHostChar(c) && c != '.') { 1351 // the underscore is technically not allowed, but occurs sometimes 1352 // in the wild. 1353 return false; 1354 } 1355 } 1356 1357 return true; 1358 } 1359 1360 1361 bool 1362 BUrl::_IsProtocolValid() const 1363 { 1364 for (int8 index = 0; index < fProtocol.Length(); index++) { 1365 char c = fProtocol[index]; 1366 1367 if (index == 0 && !isalpha(c)) 1368 return false; 1369 else if (!isalnum(c) && c != '+' && c != '-' && c != '.') 1370 return false; 1371 } 1372 1373 return !fProtocol.IsEmpty(); 1374 } 1375 1376 1377 bool 1378 BUrl::_IsUnreserved(char c) 1379 { 1380 return isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~'; 1381 } 1382 1383 1384 bool 1385 BUrl::_IsGenDelim(char c) 1386 { 1387 return c == ':' || c == '/' || c == '?' || c == '#' || c == '[' 1388 || c == ']' || c == '@'; 1389 } 1390 1391 1392 bool 1393 BUrl::_IsSubDelim(char c) 1394 { 1395 return c == '!' || c == '$' || c == '&' || c == '\'' || c == '(' 1396 || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' 1397 || c == '='; 1398 } 1399 1400 1401 bool 1402 BUrl::_IsUsernameChar(char c) 1403 { 1404 return !(c == ':' || c == '@'); 1405 } 1406 1407 1408 bool 1409 BUrl::_IsPasswordChar(char c) 1410 { 1411 return !(c == '@'); 1412 } 1413 1414 1415 bool 1416 BUrl::_IsHostChar(char c) 1417 { 1418 return ((uint8) c) > 127 || isalnum(c) || c == '-' || c == '_' || c == '.' 1419 || c == '%'; 1420 } 1421 1422 1423 bool 1424 BUrl::_IsPortChar(char c) 1425 { 1426 return isdigit(c); 1427 } 1428 1429 1430 bool 1431 BUrl::_IsIPV6Char(char c) 1432 { 1433 return c == ':' || isxdigit(c); 1434 } 1435 1436 1437 BString 1438 BUrl::_UrlMimeType() const 1439 { 1440 BString mime; 1441 mime << "application/x-vnd.Be.URL." << fProtocol; 1442 1443 return BString(mime); 1444 } 1445