1 /* 2 * Copyright 2010-2018 Haiku Inc. All rights reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Christophe Huriaux, c.huriaux@gmail.com 7 * Andrew Lindesay, apl@lindesay.co.nz 8 */ 9 10 11 #include <Url.h> 12 13 #include <ctype.h> 14 #include <cstdio> 15 #include <cstdlib> 16 #include <new> 17 18 #include <MimeType.h> 19 #include <Roster.h> 20 21 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 22 #include <ICUWrapper.h> 23 #endif 24 25 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 26 #include <unicode/idna.h> 27 #include <unicode/stringpiece.h> 28 #endif 29 30 31 static const char* kArchivedUrl = "be:url string"; 32 33 /*! These flags can be combined to control the parse process. */ 34 35 const uint32 PARSE_NO_MASK_BIT = 0x00000000; 36 const uint32 PARSE_RAW_PATH_MASK_BIT = 0x00000001; 37 38 39 BUrl::BUrl(const char* url) 40 : 41 fUrlString(), 42 fProtocol(), 43 fUser(), 44 fPassword(), 45 fHost(), 46 fPort(0), 47 fPath(), 48 fRequest(), 49 fHasHost(false), 50 fHasFragment(false) 51 { 52 SetUrlString(url); 53 } 54 55 56 BUrl::BUrl(BMessage* archive) 57 : 58 fUrlString(), 59 fProtocol(), 60 fUser(), 61 fPassword(), 62 fHost(), 63 fPort(0), 64 fPath(), 65 fRequest(), 66 fHasHost(false), 67 fHasFragment(false) 68 { 69 BString url; 70 71 if (archive->FindString(kArchivedUrl, &url) == B_OK) 72 SetUrlString(url); 73 else 74 _ResetFields(); 75 } 76 77 78 BUrl::BUrl(const BUrl& other) 79 : 80 BArchivable(), 81 fUrlString(), 82 fProtocol(other.fProtocol), 83 fUser(other.fUser), 84 fPassword(other.fPassword), 85 fHost(other.fHost), 86 fPort(other.fPort), 87 fPath(other.fPath), 88 fRequest(other.fRequest), 89 fFragment(other.fFragment), 90 fUrlStringValid(other.fUrlStringValid), 91 fAuthorityValid(other.fAuthorityValid), 92 fUserInfoValid(other.fUserInfoValid), 93 fHasProtocol(other.fHasProtocol), 94 fHasUserName(other.fHasUserName), 95 fHasPassword(other.fHasPassword), 96 fHasHost(other.fHasHost), 97 fHasPort(other.fHasPort), 98 fHasPath(other.fHasPath), 99 fHasRequest(other.fHasRequest), 100 fHasFragment(other.fHasFragment) 101 { 102 if (fUrlStringValid) 103 fUrlString = other.fUrlString; 104 105 if (fAuthorityValid) 106 fAuthority = other.fAuthority; 107 108 if (fUserInfoValid) 109 fUserInfo = other.fUserInfo; 110 111 } 112 113 114 BUrl::BUrl(const BUrl& base, const BString& location) 115 : 116 fUrlString(), 117 fProtocol(), 118 fUser(), 119 fPassword(), 120 fHost(), 121 fPort(0), 122 fPath(), 123 fRequest(), 124 fAuthorityValid(false), 125 fUserInfoValid(false), 126 fHasUserName(false), 127 fHasPassword(false), 128 fHasHost(false), 129 fHasPort(false), 130 fHasFragment(false) 131 { 132 // This implements the algorithm in RFC3986, Section 5.2. 133 134 BUrl relative; 135 relative._ExplodeUrlString(location, PARSE_RAW_PATH_MASK_BIT); 136 // This parse will leave the path 'raw' so that it still carries any 137 // special sequences such as '..' and '.' in it. This way it can be 138 // later combined with the base. 139 140 if (relative.HasProtocol()) { 141 SetProtocol(relative.Protocol()); 142 if (relative.HasAuthority()) 143 SetAuthority(relative.Authority()); 144 SetPath(relative.Path()); 145 SetRequest(relative.Request()); 146 } else { 147 if (relative.HasAuthority()) { 148 SetAuthority(relative.Authority()); 149 SetPath(relative.Path()); 150 SetRequest(relative.Request()); 151 } else { 152 if (relative.Path().IsEmpty()) { 153 _SetPathUnsafe(base.Path()); 154 if (relative.HasRequest()) 155 SetRequest(relative.Request()); 156 else 157 SetRequest(base.Request()); 158 } else { 159 if (relative.Path()[0] == '/') 160 SetPath(relative.Path()); 161 else { 162 BString path = base._MergePath(relative.Path()); 163 SetPath(path); 164 } 165 SetRequest(relative.Request()); 166 } 167 168 if (base.HasAuthority()) 169 SetAuthority(base.Authority()); 170 } 171 SetProtocol(base.Protocol()); 172 } 173 174 if (relative.HasFragment()) 175 SetFragment(relative.Fragment()); 176 } 177 178 179 BUrl::BUrl() 180 : 181 fUrlString(), 182 fProtocol(), 183 fUser(), 184 fPassword(), 185 fHost(), 186 fPort(0), 187 fPath(), 188 fRequest(), 189 fHasHost(false), 190 fHasFragment(false) 191 { 192 _ResetFields(); 193 } 194 195 196 BUrl::BUrl(const BPath& path) 197 : 198 fUrlString(), 199 fProtocol(), 200 fUser(), 201 fPassword(), 202 fHost(), 203 fPort(0), 204 fPath(), 205 fRequest(), 206 fHasHost(false), 207 fHasFragment(false) 208 { 209 SetUrlString(UrlEncode(path.Path(), true, true)); 210 SetProtocol("file"); 211 } 212 213 214 BUrl::~BUrl() 215 { 216 } 217 218 219 // #pragma mark URL fields modifiers 220 221 222 BUrl& 223 BUrl::SetUrlString(const BString& url) 224 { 225 _ExplodeUrlString(url, PARSE_NO_MASK_BIT); 226 return *this; 227 } 228 229 230 BUrl& 231 BUrl::SetProtocol(const BString& protocol) 232 { 233 fProtocol = protocol; 234 fHasProtocol = !fProtocol.IsEmpty(); 235 fUrlStringValid = false; 236 return *this; 237 } 238 239 240 BUrl& 241 BUrl::SetUserName(const BString& user) 242 { 243 fUser = user; 244 fHasUserName = !fUser.IsEmpty(); 245 fUrlStringValid = false; 246 fAuthorityValid = false; 247 fUserInfoValid = false; 248 return *this; 249 } 250 251 252 BUrl& 253 BUrl::SetPassword(const BString& password) 254 { 255 fPassword = password; 256 fHasPassword = !fPassword.IsEmpty(); 257 fUrlStringValid = false; 258 fAuthorityValid = false; 259 fUserInfoValid = false; 260 return *this; 261 } 262 263 264 BUrl& 265 BUrl::SetHost(const BString& host) 266 { 267 fHost = host; 268 fHasHost = !fHost.IsEmpty(); 269 fUrlStringValid = false; 270 fAuthorityValid = false; 271 return *this; 272 } 273 274 275 BUrl& 276 BUrl::SetPort(int port) 277 { 278 fPort = port; 279 fHasPort = (port != 0); 280 fUrlStringValid = false; 281 fAuthorityValid = false; 282 return *this; 283 } 284 285 286 void 287 BUrl::_RemoveLastPathComponent(BString& path) 288 { 289 int32 outputLastSlashIdx = path.FindLast('/'); 290 291 if (outputLastSlashIdx == B_ERROR) 292 path.Truncate(0); 293 else 294 path.Truncate(outputLastSlashIdx); 295 } 296 297 298 BUrl& 299 BUrl::SetPath(const BString& path) 300 { 301 // Implements RFC3986 section 5.2.4, "Remove dot segments" 302 303 // 1. 304 BString output; 305 BString input(path); 306 307 // 2. 308 while (!input.IsEmpty()) { 309 // 2.A. 310 if (input.StartsWith("./")) { 311 input.Remove(0, 2); 312 continue; 313 } 314 315 if (input.StartsWith("../")) { 316 input.Remove(0, 3); 317 continue; 318 } 319 320 // 2.B. 321 if (input.StartsWith("/./")) { 322 input.Remove(0, 2); 323 continue; 324 } 325 326 if (input == "/.") { 327 input.Remove(1, 1); 328 continue; 329 } 330 331 // 2.C. 332 if (input.StartsWith("/../")) { 333 input.Remove(0, 3); 334 _RemoveLastPathComponent(output); 335 continue; 336 } 337 338 if (input == "/..") { 339 input.Remove(1, 2); 340 _RemoveLastPathComponent(output); 341 continue; 342 } 343 344 // 2.D. 345 if (input == "." || input == "..") { 346 break; 347 } 348 349 if (input == "/.") { 350 input.Remove(1, 1); 351 continue; 352 } 353 354 // 2.E. 355 int slashpos = input.FindFirst('/', 1); 356 if (slashpos > 0) { 357 output.Append(input, slashpos); 358 input.Remove(0, slashpos); 359 } else { 360 output.Append(input); 361 break; 362 } 363 } 364 365 _SetPathUnsafe(output); 366 return *this; 367 } 368 369 370 BUrl& 371 BUrl::SetRequest(const BString& request) 372 { 373 fRequest = request; 374 fHasRequest = !fRequest.IsEmpty(); 375 fUrlStringValid = false; 376 return *this; 377 } 378 379 380 BUrl& 381 BUrl::SetFragment(const BString& fragment) 382 { 383 fFragment = fragment; 384 fHasFragment = true; 385 fUrlStringValid = false; 386 return *this; 387 } 388 389 390 // #pragma mark URL fields access 391 392 393 const BString& 394 BUrl::UrlString() const 395 { 396 if (!fUrlStringValid) { 397 fUrlString.Truncate(0); 398 399 if (HasProtocol()) { 400 fUrlString << fProtocol << ':'; 401 } 402 403 if (HasAuthority()) { 404 fUrlString << "//"; 405 fUrlString << Authority(); 406 } 407 fUrlString << Path(); 408 409 if (HasRequest()) 410 fUrlString << '?' << fRequest; 411 412 if (HasFragment()) 413 fUrlString << '#' << fFragment; 414 415 fUrlStringValid = true; 416 } 417 418 return fUrlString; 419 } 420 421 422 const BString& 423 BUrl::Protocol() const 424 { 425 return fProtocol; 426 } 427 428 429 const BString& 430 BUrl::UserName() const 431 { 432 return fUser; 433 } 434 435 436 const BString& 437 BUrl::Password() const 438 { 439 return fPassword; 440 } 441 442 443 const BString& 444 BUrl::UserInfo() const 445 { 446 if (!fUserInfoValid) { 447 fUserInfo = fUser; 448 449 if (HasPassword()) 450 fUserInfo << ':' << fPassword; 451 452 fUserInfoValid = true; 453 } 454 455 return fUserInfo; 456 } 457 458 459 const BString& 460 BUrl::Host() const 461 { 462 return fHost; 463 } 464 465 466 int 467 BUrl::Port() const 468 { 469 return fPort; 470 } 471 472 473 const BString& 474 BUrl::Authority() const 475 { 476 if (!fAuthorityValid) { 477 fAuthority.Truncate(0); 478 479 if (HasUserInfo()) 480 fAuthority << UserInfo() << '@'; 481 fAuthority << Host(); 482 483 if (HasPort()) 484 fAuthority << ':' << fPort; 485 486 fAuthorityValid = true; 487 } 488 return fAuthority; 489 } 490 491 492 const BString& 493 BUrl::Path() const 494 { 495 return fPath; 496 } 497 498 499 const BString& 500 BUrl::Request() const 501 { 502 return fRequest; 503 } 504 505 506 const BString& 507 BUrl::Fragment() const 508 { 509 return fFragment; 510 } 511 512 513 // #pragma mark URL fields tests 514 515 516 bool 517 BUrl::IsValid() const 518 { 519 if (!fHasProtocol) 520 return false; 521 522 if (!_IsProtocolValid()) 523 return false; 524 525 // it is possible that there can be an authority but no host. 526 // wierd://tea:tree@/x 527 if (HasHost() && !(fHost.IsEmpty() && HasAuthority()) && !_IsHostValid()) 528 return false; 529 530 if (fProtocol == "http" || fProtocol == "https" || fProtocol == "ftp" 531 || fProtocol == "ipp" || fProtocol == "afp" || fProtocol == "telnet" 532 || fProtocol == "gopher" || fProtocol == "nntp" || fProtocol == "sftp" 533 || fProtocol == "finger" || fProtocol == "pop" || fProtocol == "imap") { 534 return HasHost() && !fHost.IsEmpty(); 535 } 536 537 if (fProtocol == "file") 538 return fHasPath; 539 540 return true; 541 } 542 543 544 bool 545 BUrl::HasProtocol() const 546 { 547 return fHasProtocol; 548 } 549 550 551 bool 552 BUrl::HasAuthority() const 553 { 554 return fHasHost || fHasUserName; 555 } 556 557 558 bool 559 BUrl::HasUserName() const 560 { 561 return fHasUserName; 562 } 563 564 565 bool 566 BUrl::HasPassword() const 567 { 568 return fHasPassword; 569 } 570 571 572 bool 573 BUrl::HasUserInfo() const 574 { 575 return fHasUserName || fHasPassword; 576 } 577 578 579 bool 580 BUrl::HasHost() const 581 { 582 return fHasHost; 583 } 584 585 586 bool 587 BUrl::HasPort() const 588 { 589 return fHasPort; 590 } 591 592 593 bool 594 BUrl::HasPath() const 595 { 596 return fHasPath; 597 } 598 599 600 bool 601 BUrl::HasRequest() const 602 { 603 return fHasRequest; 604 } 605 606 607 bool 608 BUrl::HasFragment() const 609 { 610 return fHasFragment; 611 } 612 613 614 // #pragma mark URL encoding/decoding of needed fields 615 616 617 void 618 BUrl::UrlEncode(bool strict) 619 { 620 fUser = _DoUrlEncodeChunk(fUser, strict); 621 fPassword = _DoUrlEncodeChunk(fPassword, strict); 622 fHost = _DoUrlEncodeChunk(fHost, strict); 623 fFragment = _DoUrlEncodeChunk(fFragment, strict); 624 fPath = _DoUrlEncodeChunk(fPath, strict, true); 625 } 626 627 628 void 629 BUrl::UrlDecode(bool strict) 630 { 631 fUser = _DoUrlDecodeChunk(fUser, strict); 632 fPassword = _DoUrlDecodeChunk(fPassword, strict); 633 fHost = _DoUrlDecodeChunk(fHost, strict); 634 fFragment = _DoUrlDecodeChunk(fFragment, strict); 635 fPath = _DoUrlDecodeChunk(fPath, strict); 636 } 637 638 639 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 640 status_t 641 BUrl::IDNAToAscii() 642 { 643 UErrorCode err = U_ZERO_ERROR; 644 icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err); 645 icu::IDNAInfo info; 646 647 BString result; 648 BStringByteSink sink(&result); 649 converter->nameToASCII_UTF8(icu::StringPiece(fHost.String()), sink, info, 650 err); 651 652 delete converter; 653 654 if (U_FAILURE(err)) 655 return B_ERROR; 656 657 fHost = result; 658 return B_OK; 659 } 660 #endif 661 662 663 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 664 status_t 665 BUrl::IDNAToUnicode() 666 { 667 UErrorCode err = U_ZERO_ERROR; 668 icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err); 669 icu::IDNAInfo info; 670 671 BString result; 672 BStringByteSink sink(&result); 673 converter->nameToUnicodeUTF8(icu::StringPiece(fHost.String()), sink, info, 674 err); 675 676 delete converter; 677 678 if (U_FAILURE(err)) 679 return B_ERROR; 680 681 fHost = result; 682 return B_OK; 683 } 684 #endif 685 686 687 // #pragma mark - utility functionality 688 689 690 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 691 bool 692 BUrl::HasPreferredApplication() const 693 { 694 BString appSignature = PreferredApplication(); 695 BMimeType mime(appSignature.String()); 696 697 if (appSignature.IFindFirst("application/") == 0 698 && mime.IsValid()) 699 return true; 700 701 return false; 702 } 703 #endif 704 705 706 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 707 BString 708 BUrl::PreferredApplication() const 709 { 710 BString appSignature; 711 BMimeType mime(_UrlMimeType().String()); 712 mime.GetPreferredApp(appSignature.LockBuffer(B_MIME_TYPE_LENGTH)); 713 appSignature.UnlockBuffer(); 714 715 return BString(appSignature); 716 } 717 #endif 718 719 720 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 721 status_t 722 BUrl::OpenWithPreferredApplication(bool onProblemAskUser) const 723 { 724 if (!IsValid()) 725 return B_BAD_VALUE; 726 727 BString urlString = UrlString(); 728 if (urlString.Length() > B_PATH_NAME_LENGTH) { 729 // TODO: BAlert 730 // if (onProblemAskUser) 731 // BAlert ... Too long URL! 732 #if DEBUG 733 fprintf(stderr, "URL too long"); 734 #endif 735 return B_NAME_TOO_LONG; 736 } 737 738 char* argv[] = { 739 const_cast<char*>("BUrlInvokedApplication"), 740 const_cast<char*>(urlString.String()), 741 NULL 742 }; 743 744 #if DEBUG 745 if (HasPreferredApplication()) 746 printf("HasPreferredApplication() == true\n"); 747 else 748 printf("HasPreferredApplication() == false\n"); 749 #endif 750 751 status_t status = be_roster->Launch(_UrlMimeType().String(), 1, argv+1); 752 if (status != B_OK) { 753 #if DEBUG 754 fprintf(stderr, "Opening URL failed: %s\n", strerror(status)); 755 #endif 756 } 757 758 return status; 759 } 760 #endif 761 762 763 // #pragma mark Url encoding/decoding of string 764 765 766 /*static*/ BString 767 BUrl::UrlEncode(const BString& url, bool strict, bool directory) 768 { 769 return _DoUrlEncodeChunk(url, strict, directory); 770 } 771 772 773 /*static*/ BString 774 BUrl::UrlDecode(const BString& url, bool strict) 775 { 776 return _DoUrlDecodeChunk(url, strict); 777 } 778 779 780 // #pragma mark BArchivable members 781 782 783 status_t 784 BUrl::Archive(BMessage* into, bool deep) const 785 { 786 status_t ret = BArchivable::Archive(into, deep); 787 788 if (ret == B_OK) 789 ret = into->AddString(kArchivedUrl, UrlString()); 790 791 return ret; 792 } 793 794 795 /*static*/ BArchivable* 796 BUrl::Instantiate(BMessage* archive) 797 { 798 if (validate_instantiation(archive, "BUrl")) 799 return new(std::nothrow) BUrl(archive); 800 return NULL; 801 } 802 803 804 // #pragma mark URL comparison 805 806 807 bool 808 BUrl::operator==(BUrl& other) const 809 { 810 UrlString(); 811 other.UrlString(); 812 813 return fUrlString == other.fUrlString; 814 } 815 816 817 bool 818 BUrl::operator!=(BUrl& other) const 819 { 820 return !(*this == other); 821 } 822 823 824 // #pragma mark URL assignment 825 826 827 const BUrl& 828 BUrl::operator=(const BUrl& other) 829 { 830 fUrlStringValid = other.fUrlStringValid; 831 if (fUrlStringValid) 832 fUrlString = other.fUrlString; 833 834 fAuthorityValid = other.fAuthorityValid; 835 if (fAuthorityValid) 836 fAuthority = other.fAuthority; 837 838 fUserInfoValid = other.fUserInfoValid; 839 if (fUserInfoValid) 840 fUserInfo = other.fUserInfo; 841 842 fProtocol = other.fProtocol; 843 fUser = other.fUser; 844 fPassword = other.fPassword; 845 fHost = other.fHost; 846 fPort = other.fPort; 847 fPath = other.fPath; 848 fRequest = other.fRequest; 849 fFragment = other.fFragment; 850 851 fHasProtocol = other.fHasProtocol; 852 fHasUserName = other.fHasUserName; 853 fHasPassword = other.fHasPassword; 854 fHasHost = other.fHasHost; 855 fHasPort = other.fHasPort; 856 fHasPath = other.fHasPath; 857 fHasRequest = other.fHasRequest; 858 fHasFragment = other.fHasFragment; 859 860 return *this; 861 } 862 863 864 const BUrl& 865 BUrl::operator=(const BString& string) 866 { 867 SetUrlString(string); 868 return *this; 869 } 870 871 872 const BUrl& 873 BUrl::operator=(const char* string) 874 { 875 SetUrlString(string); 876 return *this; 877 } 878 879 880 // #pragma mark URL to string conversion 881 882 883 BUrl::operator const char*() const 884 { 885 return UrlString(); 886 } 887 888 889 void 890 BUrl::_ResetFields() 891 { 892 fHasProtocol = false; 893 fHasUserName = false; 894 fHasPassword = false; 895 fHasHost = false; 896 fHasPort = false; 897 fHasPath = false; 898 fHasRequest = false; 899 fHasFragment = false; 900 901 fProtocol.Truncate(0); 902 fUser.Truncate(0); 903 fPassword.Truncate(0); 904 fHost.Truncate(0); 905 fPort = 0; 906 fPath.Truncate(0); 907 fRequest.Truncate(0); 908 fFragment.Truncate(0); 909 910 // Force re-generation of these fields 911 fUrlStringValid = false; 912 fUserInfoValid = false; 913 fAuthorityValid = false; 914 } 915 916 917 bool 918 BUrl::_ContainsDelimiter(const BString& url) 919 { 920 int32 len = url.Length(); 921 922 for (int32 i = 0; i < len; i++) { 923 switch (url[i]) { 924 case ' ': 925 case '\n': 926 case '\t': 927 case '\r': 928 case '<': 929 case '>': 930 case '"': 931 return true; 932 } 933 } 934 935 return false; 936 } 937 938 939 enum explode_url_parse_state { 940 EXPLODE_PROTOCOL, 941 EXPLODE_PROTOCOLTERMINATOR, 942 EXPLODE_AUTHORITYORPATH, 943 EXPLODE_AUTHORITY, 944 EXPLODE_PATH, 945 EXPLODE_REQUEST, // query 946 EXPLODE_FRAGMENT, 947 EXPLODE_COMPLETE 948 }; 949 950 951 typedef bool (*explode_char_match_fn)(char c); 952 953 954 static bool 955 explode_is_protocol_char(char c) 956 { 957 return isalnum(c) || c == '+' || c == '.' || c == '-'; 958 } 959 960 961 static bool 962 explode_is_authority_char(char c) 963 { 964 return !(c == '/' || c == '?' || c == '#'); 965 } 966 967 968 static bool 969 explode_is_path_char(char c) 970 { 971 return !(c == '#' || c == '?'); 972 } 973 974 975 static bool 976 explode_is_request_char(char c) 977 { 978 return c != '#'; 979 } 980 981 982 static int32 983 char_offset_until_fn_false(const char* url, int32 len, int32 offset, 984 explode_char_match_fn fn) 985 { 986 while (offset < len && fn(url[offset])) 987 offset++; 988 989 return offset; 990 } 991 992 /* 993 * This function takes a URL in string-form and parses the components of the URL out. 994 */ 995 status_t 996 BUrl::_ExplodeUrlString(const BString& url, uint32 flags) 997 { 998 _ResetFields(); 999 1000 // RFC3986, Appendix C; the URL should not contain whitespace or delimiters 1001 // by this point. 1002 1003 if (_ContainsDelimiter(url)) 1004 return B_BAD_VALUE; 1005 1006 explode_url_parse_state state = EXPLODE_PROTOCOL; 1007 int32 offset = 0; 1008 int32 length = url.Length(); 1009 bool forceHasHost = false; 1010 const char *url_c = url.String(); 1011 1012 // The regexp is provided in RFC3986 (URI generic syntax), Appendix B 1013 // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))? 1014 // The ensuing logic attempts to simulate the behaviour of extracting the groups 1015 // from the string without requiring a group-capable regex engine. 1016 1017 while (offset < length) { 1018 switch (state) { 1019 1020 case EXPLODE_PROTOCOL: 1021 { 1022 int32 end_protocol = char_offset_until_fn_false(url_c, length, 1023 offset, explode_is_protocol_char); 1024 1025 if (end_protocol < length) { 1026 SetProtocol(BString(&url_c[offset], end_protocol - offset)); 1027 state = EXPLODE_PROTOCOLTERMINATOR; 1028 offset = end_protocol; 1029 } else { 1030 // No protocol was found, try parsing from the string 1031 // start, beginning with authority or path 1032 SetProtocol(""); 1033 offset = 0; 1034 state = EXPLODE_AUTHORITYORPATH; 1035 } 1036 break; 1037 } 1038 1039 case EXPLODE_PROTOCOLTERMINATOR: 1040 { 1041 if (url[offset] == ':') { 1042 offset++; 1043 } else { 1044 // No protocol was found, try parsing from the string 1045 // start, beginning with authority or path 1046 SetProtocol(""); 1047 offset = 0; 1048 } 1049 state = EXPLODE_AUTHORITYORPATH; 1050 break; 1051 } 1052 1053 case EXPLODE_AUTHORITYORPATH: 1054 { 1055 // The authority must start with //. If it isn't there, skip 1056 // to parsing the path. 1057 if (strncmp(&url_c[offset], "//", 2) == 0) { 1058 state = EXPLODE_AUTHORITY; 1059 // if we see the // then this would imply that a host is 1060 // to be rendered even if no host has been parsed. 1061 forceHasHost = true; 1062 offset += 2; 1063 } else { 1064 state = EXPLODE_PATH; 1065 } 1066 break; 1067 } 1068 1069 case EXPLODE_AUTHORITY: 1070 { 1071 int end_authority = char_offset_until_fn_false(url_c, length, 1072 offset, explode_is_authority_char); 1073 SetAuthority(BString(&url_c[offset], end_authority - offset)); 1074 state = EXPLODE_PATH; 1075 offset = end_authority; 1076 break; 1077 } 1078 1079 case EXPLODE_PATH: 1080 { 1081 int end_path = char_offset_until_fn_false(url_c, length, offset, 1082 explode_is_path_char); 1083 BString path(&url_c[offset], end_path - offset); 1084 1085 if ((flags & PARSE_RAW_PATH_MASK_BIT) == 0) 1086 SetPath(path); 1087 else 1088 _SetPathUnsafe(path); 1089 state = EXPLODE_REQUEST; 1090 offset = end_path; 1091 break; 1092 } 1093 1094 case EXPLODE_REQUEST: // query 1095 { 1096 if (url_c[offset] == '?') { 1097 offset++; 1098 int end_request = char_offset_until_fn_false(url_c, length, 1099 offset, explode_is_request_char); 1100 SetRequest(BString(&url_c[offset], end_request - offset)); 1101 offset = end_request; 1102 // if there is a "?" in the parse then it is clear that 1103 // there is a 'request' / query present regardless if there 1104 // are any valid key-value pairs. 1105 fHasRequest = true; 1106 } 1107 state = EXPLODE_FRAGMENT; 1108 break; 1109 } 1110 1111 case EXPLODE_FRAGMENT: 1112 { 1113 if (url_c[offset] == '#') { 1114 offset++; 1115 SetFragment(BString(&url_c[offset], length - offset)); 1116 offset = length; 1117 } 1118 state = EXPLODE_COMPLETE; 1119 break; 1120 } 1121 1122 case EXPLODE_COMPLETE: 1123 // should never be reached - keeps the compiler happy 1124 break; 1125 1126 } 1127 } 1128 1129 if (forceHasHost) 1130 fHasHost = true; 1131 1132 return B_OK; 1133 } 1134 1135 1136 BString 1137 BUrl::_MergePath(const BString& relative) const 1138 { 1139 // This implements RFC3986, Section 5.2.3. 1140 if (HasAuthority() && fPath == "") { 1141 BString result("/"); 1142 result << relative; 1143 return result; 1144 } 1145 1146 int32 lastSlashIndex = fPath.FindLast("/"); 1147 1148 if (lastSlashIndex == B_ERROR) 1149 return relative; 1150 1151 BString result; 1152 result.SetTo(fPath, lastSlashIndex + 1); 1153 result << relative; 1154 1155 return result; 1156 } 1157 1158 1159 // This sets the path without normalizing it. If fed with a path that has . or 1160 // .. segments, this would make the URL invalid. 1161 void 1162 BUrl::_SetPathUnsafe(const BString& path) 1163 { 1164 fPath = path; 1165 fHasPath = true; // RFC says an empty path is still a path 1166 fUrlStringValid = false; 1167 } 1168 1169 1170 enum authority_parse_state { 1171 AUTHORITY_USERNAME, 1172 AUTHORITY_PASSWORD, 1173 AUTHORITY_HOST, 1174 AUTHORITY_PORT, 1175 AUTHORITY_COMPLETE 1176 }; 1177 1178 void 1179 BUrl::SetAuthority(const BString& authority) 1180 { 1181 fAuthority = authority; 1182 1183 fUser.Truncate(0); 1184 fPassword.Truncate(0); 1185 fHost.Truncate(0); 1186 fPort = 0; 1187 fHasPort = false; 1188 fHasUserName = false; 1189 fHasPassword = false; 1190 1191 bool hasUsernamePassword = B_ERROR != fAuthority.FindFirst('@'); 1192 authority_parse_state state = AUTHORITY_USERNAME; 1193 int32 offset = 0; 1194 int32 length = authority.Length(); 1195 const char *authority_c = authority.String(); 1196 1197 while (AUTHORITY_COMPLETE != state && offset < length) { 1198 1199 switch (state) { 1200 1201 case AUTHORITY_USERNAME: 1202 { 1203 if (hasUsernamePassword) { 1204 int32 end_username = char_offset_until_fn_false( 1205 authority_c, length, offset, _IsUsernameChar); 1206 1207 SetUserName(BString(&authority_c[offset], 1208 end_username - offset)); 1209 1210 state = AUTHORITY_PASSWORD; 1211 offset = end_username; 1212 } else { 1213 state = AUTHORITY_HOST; 1214 } 1215 break; 1216 } 1217 1218 case AUTHORITY_PASSWORD: 1219 { 1220 if (hasUsernamePassword && ':' == authority[offset]) { 1221 offset++; // move past the delimiter 1222 int32 end_password = char_offset_until_fn_false( 1223 authority_c, length, offset, _IsPasswordChar); 1224 1225 SetPassword(BString(&authority_c[offset], 1226 end_password - offset)); 1227 1228 offset = end_password; 1229 } 1230 1231 // if the host was preceded by a username + password couple 1232 // then there will be an '@' delimiter to avoid. 1233 1234 if (authority_c[offset] == '@') { 1235 offset++; 1236 } 1237 1238 state = AUTHORITY_HOST; 1239 break; 1240 } 1241 1242 case AUTHORITY_HOST: 1243 { 1244 1245 // the host may be enclosed within brackets in order to express 1246 // an IPV6 address. 1247 1248 if (authority_c[offset] == '[') { 1249 int32 end_ipv6_host = char_offset_until_fn_false( 1250 authority_c, length, offset + 1, _IsIPV6Char); 1251 1252 if (authority_c[end_ipv6_host] == ']') { 1253 SetHost(BString(&authority_c[offset], 1254 (end_ipv6_host - offset) + 1)); 1255 state = AUTHORITY_PORT; 1256 offset = end_ipv6_host + 1; 1257 } 1258 } 1259 1260 // if an IPV6 host was not found. 1261 1262 if (AUTHORITY_HOST == state) { 1263 int32 end_host = char_offset_until_fn_false( 1264 authority_c, length, offset, _IsHostChar); 1265 1266 SetHost(BString(&authority_c[offset], end_host - offset)); 1267 state = AUTHORITY_PORT; 1268 offset = end_host; 1269 } 1270 1271 break; 1272 } 1273 1274 case AUTHORITY_PORT: 1275 { 1276 if (authority_c[offset] == ':') { 1277 offset++; 1278 int32 end_port = char_offset_until_fn_false( 1279 authority_c, length, offset, _IsPortChar); 1280 SetPort(atoi(&authority_c[offset])); 1281 offset = end_port; 1282 } 1283 1284 state = AUTHORITY_COMPLETE; 1285 1286 break; 1287 } 1288 1289 case AUTHORITY_COMPLETE: 1290 // should never be reached - keeps the compiler happy 1291 break; 1292 } 1293 } 1294 1295 // An empty authority is still an authority, making it possible to have 1296 // URLs such as file:///path/to/file. 1297 // TODO however, there is no way to unset the authority once it is set... 1298 // We may want to take a const char* parameter and allow NULL. 1299 fHasHost = true; 1300 } 1301 1302 1303 /*static*/ BString 1304 BUrl::_DoUrlEncodeChunk(const BString& chunk, bool strict, bool directory) 1305 { 1306 BString result; 1307 1308 for (int32 i = 0; i < chunk.Length(); i++) { 1309 if (_IsUnreserved(chunk[i]) 1310 || (directory && (chunk[i] == '/' || chunk[i] == '\\'))) { 1311 result << chunk[i]; 1312 } else { 1313 if (chunk[i] == ' ' && !strict) { 1314 result << '+'; 1315 // In non-strict mode, spaces are encoded by a plus sign 1316 } else { 1317 char hexString[5]; 1318 snprintf(hexString, 5, "%X", chunk[i]); 1319 1320 result << '%' << hexString; 1321 } 1322 } 1323 } 1324 1325 return result; 1326 } 1327 1328 1329 /*static*/ BString 1330 BUrl::_DoUrlDecodeChunk(const BString& chunk, bool strict) 1331 { 1332 BString result; 1333 1334 for (int32 i = 0; i < chunk.Length(); i++) { 1335 if (chunk[i] == '+' && !strict) 1336 result << ' '; 1337 else { 1338 char decoded = 0; 1339 char* out = NULL; 1340 char hexString[3]; 1341 1342 if (chunk[i] == '%' && i < chunk.Length() - 2 1343 && isxdigit(chunk[i + 1]) && isxdigit(chunk[i+2])) { 1344 hexString[0] = chunk[i + 1]; 1345 hexString[1] = chunk[i + 2]; 1346 hexString[2] = 0; 1347 decoded = (char)strtol(hexString, &out, 16); 1348 } 1349 1350 if (out == hexString + 2) { 1351 i += 2; 1352 result << decoded; 1353 } else 1354 result << chunk[i]; 1355 } 1356 } 1357 return result; 1358 } 1359 1360 1361 bool 1362 BUrl::_IsHostIPV6Valid(size_t offset, int32 length) const 1363 { 1364 for (int32 i = 0; i < length; i++) { 1365 char c = fHost[offset + i]; 1366 if (!_IsIPV6Char(c)) 1367 return false; 1368 } 1369 1370 return length > 0; 1371 } 1372 1373 1374 bool 1375 BUrl::_IsHostValid() const 1376 { 1377 if (fHost.StartsWith("[") && fHost.EndsWith("]")) 1378 return _IsHostIPV6Valid(1, fHost.Length() - 2); 1379 1380 bool lastWasDot = false; 1381 1382 for (int32 i = 0; i < fHost.Length(); i++) { 1383 char c = fHost[i]; 1384 1385 if (c == '.') { 1386 if (lastWasDot || i == 0) 1387 return false; 1388 lastWasDot = true; 1389 } else { 1390 lastWasDot = false; 1391 } 1392 1393 if (!_IsHostChar(c) && c != '.') { 1394 // the underscore is technically not allowed, but occurs sometimes 1395 // in the wild. 1396 return false; 1397 } 1398 } 1399 1400 return true; 1401 } 1402 1403 1404 bool 1405 BUrl::_IsProtocolValid() const 1406 { 1407 for (int8 index = 0; index < fProtocol.Length(); index++) { 1408 char c = fProtocol[index]; 1409 1410 if (index == 0 && !isalpha(c)) 1411 return false; 1412 else if (!isalnum(c) && c != '+' && c != '-' && c != '.') 1413 return false; 1414 } 1415 1416 return !fProtocol.IsEmpty(); 1417 } 1418 1419 1420 bool 1421 BUrl::_IsUnreserved(char c) 1422 { 1423 return isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~'; 1424 } 1425 1426 1427 bool 1428 BUrl::_IsGenDelim(char c) 1429 { 1430 return c == ':' || c == '/' || c == '?' || c == '#' || c == '[' 1431 || c == ']' || c == '@'; 1432 } 1433 1434 1435 bool 1436 BUrl::_IsSubDelim(char c) 1437 { 1438 return c == '!' || c == '$' || c == '&' || c == '\'' || c == '(' 1439 || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' 1440 || c == '='; 1441 } 1442 1443 1444 bool 1445 BUrl::_IsUsernameChar(char c) 1446 { 1447 return !(c == ':' || c == '@'); 1448 } 1449 1450 1451 bool 1452 BUrl::_IsPasswordChar(char c) 1453 { 1454 return !(c == '@'); 1455 } 1456 1457 1458 bool 1459 BUrl::_IsHostChar(char c) 1460 { 1461 return ((uint8) c) > 127 || isalnum(c) || c == '-' || c == '_' || c == '.' 1462 || c == '%'; 1463 } 1464 1465 1466 bool 1467 BUrl::_IsPortChar(char c) 1468 { 1469 return isdigit(c); 1470 } 1471 1472 1473 bool 1474 BUrl::_IsIPV6Char(char c) 1475 { 1476 return c == ':' || isxdigit(c); 1477 } 1478 1479 1480 BString 1481 BUrl::_UrlMimeType() const 1482 { 1483 BString mime; 1484 mime << "application/x-vnd.Be.URL." << fProtocol; 1485 1486 return BString(mime); 1487 } 1488