1 /* 2 * Copyright 2010-2016 Haiku Inc. All rights reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Christophe Huriaux, c.huriaux@gmail.com 7 * Andrew Lindesay, apl@lindesay.co.nz 8 */ 9 10 11 #include <Url.h> 12 13 #include <ctype.h> 14 #include <cstdio> 15 #include <cstdlib> 16 #include <new> 17 18 #include <MimeType.h> 19 #include <Roster.h> 20 21 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 22 #include <ICUWrapper.h> 23 #endif 24 25 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 26 #include <unicode/idna.h> 27 #include <unicode/stringpiece.h> 28 #endif 29 30 31 static const char* kArchivedUrl = "be:url string"; 32 33 34 BUrl::BUrl(const char* url) 35 : 36 fUrlString(), 37 fProtocol(), 38 fUser(), 39 fPassword(), 40 fHost(), 41 fPort(0), 42 fPath(), 43 fRequest(), 44 fHasHost(false), 45 fHasFragment(false) 46 { 47 SetUrlString(url); 48 } 49 50 51 BUrl::BUrl(BMessage* archive) 52 : 53 fUrlString(), 54 fProtocol(), 55 fUser(), 56 fPassword(), 57 fHost(), 58 fPort(0), 59 fPath(), 60 fRequest(), 61 fHasHost(false), 62 fHasFragment(false) 63 { 64 BString url; 65 66 if (archive->FindString(kArchivedUrl, &url) == B_OK) 67 SetUrlString(url); 68 else 69 _ResetFields(); 70 } 71 72 73 BUrl::BUrl(const BUrl& other) 74 : 75 BArchivable(), 76 fUrlString(), 77 fProtocol(other.fProtocol), 78 fUser(other.fUser), 79 fPassword(other.fPassword), 80 fHost(other.fHost), 81 fPort(other.fPort), 82 fPath(other.fPath), 83 fRequest(other.fRequest), 84 fFragment(other.fFragment), 85 fUrlStringValid(other.fUrlStringValid), 86 fAuthorityValid(other.fAuthorityValid), 87 fUserInfoValid(other.fUserInfoValid), 88 fHasProtocol(other.fHasProtocol), 89 fHasUserName(other.fHasUserName), 90 fHasPassword(other.fHasPassword), 91 fHasHost(other.fHasHost), 92 fHasPort(other.fHasPort), 93 fHasPath(other.fHasPath), 94 fHasRequest(other.fHasRequest), 95 fHasFragment(other.fHasFragment) 96 { 97 if (fUrlStringValid) 98 fUrlString = other.fUrlString; 99 100 if (fAuthorityValid) 101 fAuthority = other.fAuthority; 102 103 if (fUserInfoValid) 104 fUserInfo = other.fUserInfo; 105 106 } 107 108 109 BUrl::BUrl(const BUrl& base, const BString& location) 110 : 111 fUrlString(), 112 fProtocol(), 113 fUser(), 114 fPassword(), 115 fHost(), 116 fPort(0), 117 fPath(), 118 fRequest(), 119 fAuthorityValid(false), 120 fUserInfoValid(false), 121 fHasUserName(false), 122 fHasPassword(false), 123 fHasHost(false), 124 fHasPort(false), 125 fHasFragment(false) 126 { 127 // This implements the algorithm in RFC3986, Section 5.2. 128 129 BUrl relative(location); 130 if (relative.HasProtocol()) { 131 SetProtocol(relative.Protocol()); 132 if (relative.HasAuthority()) 133 SetAuthority(relative.Authority()); 134 SetPath(relative.Path()); 135 SetRequest(relative.Request()); 136 } else { 137 if (relative.HasAuthority()) { 138 SetAuthority(relative.Authority()); 139 SetPath(relative.Path()); 140 SetRequest(relative.Request()); 141 } else { 142 if (relative.Path().IsEmpty()) { 143 _SetPathUnsafe(base.Path()); 144 if (relative.HasRequest()) 145 SetRequest(relative.Request()); 146 else 147 SetRequest(base.Request()); 148 } else { 149 if (relative.Path()[0] == '/') 150 SetPath(relative.Path()); 151 else { 152 BString path = base._MergePath(relative.Path()); 153 SetPath(path); 154 } 155 SetRequest(relative.Request()); 156 } 157 158 if (base.HasAuthority()) 159 SetAuthority(base.Authority()); 160 } 161 SetProtocol(base.Protocol()); 162 } 163 164 if (relative.HasFragment()) 165 SetFragment(relative.Fragment()); 166 } 167 168 169 BUrl::BUrl() 170 : 171 fUrlString(), 172 fProtocol(), 173 fUser(), 174 fPassword(), 175 fHost(), 176 fPort(0), 177 fPath(), 178 fRequest(), 179 fHasHost(false), 180 fHasFragment(false) 181 { 182 _ResetFields(); 183 } 184 185 186 BUrl::BUrl(const BPath& path) 187 : 188 fUrlString(), 189 fProtocol(), 190 fUser(), 191 fPassword(), 192 fHost(), 193 fPort(0), 194 fPath(), 195 fRequest(), 196 fHasHost(false), 197 fHasFragment(false) 198 { 199 SetUrlString(UrlEncode(path.Path(), true, true)); 200 SetProtocol("file"); 201 } 202 203 204 BUrl::~BUrl() 205 { 206 } 207 208 209 // #pragma mark URL fields modifiers 210 211 212 BUrl& 213 BUrl::SetUrlString(const BString& url) 214 { 215 _ExplodeUrlString(url); 216 return *this; 217 } 218 219 220 BUrl& 221 BUrl::SetProtocol(const BString& protocol) 222 { 223 fProtocol = protocol; 224 fHasProtocol = !fProtocol.IsEmpty(); 225 fUrlStringValid = false; 226 return *this; 227 } 228 229 230 BUrl& 231 BUrl::SetUserName(const BString& user) 232 { 233 fUser = user; 234 fHasUserName = !fUser.IsEmpty(); 235 fUrlStringValid = false; 236 fAuthorityValid = false; 237 fUserInfoValid = false; 238 return *this; 239 } 240 241 242 BUrl& 243 BUrl::SetPassword(const BString& password) 244 { 245 fPassword = password; 246 fHasPassword = !fPassword.IsEmpty(); 247 fUrlStringValid = false; 248 fAuthorityValid = false; 249 fUserInfoValid = false; 250 return *this; 251 } 252 253 254 BUrl& 255 BUrl::SetHost(const BString& host) 256 { 257 fHost = host; 258 fHasHost = !fHost.IsEmpty(); 259 fUrlStringValid = false; 260 fAuthorityValid = false; 261 return *this; 262 } 263 264 265 BUrl& 266 BUrl::SetPort(int port) 267 { 268 fPort = port; 269 fHasPort = (port != 0); 270 fUrlStringValid = false; 271 fAuthorityValid = false; 272 return *this; 273 } 274 275 276 BUrl& 277 BUrl::SetPath(const BString& path) 278 { 279 // Implements RFC3986 section 5.2.4, "Remove dot segments" 280 281 // 1. 282 BString output; 283 BString input(path); 284 285 // 2. 286 while(!input.IsEmpty()) 287 { 288 // 2.A. 289 if (input.StartsWith("./")) 290 { 291 input.Remove(0, 2); 292 continue; 293 } 294 295 if (input.StartsWith("../")) 296 { 297 input.Remove(0, 3); 298 continue; 299 } 300 301 // 2.B. 302 if (input.StartsWith("/./")) 303 { 304 input.Remove(0, 2); 305 continue; 306 } 307 308 if (input == "/.") 309 { 310 input.Remove(1, 1); 311 continue; 312 } 313 314 // 2.C. 315 if (input.StartsWith("/../")) 316 { 317 input.Remove(0, 3); 318 output.Truncate(output.FindLast('/')); 319 continue; 320 } 321 322 if (input == "/..") 323 { 324 input.Remove(1, 2); 325 output.Truncate(output.FindLast('/')); 326 continue; 327 } 328 329 // 2.D. 330 if (input == "." || input == "..") 331 { 332 break; 333 } 334 335 if (input == "/.") 336 { 337 input.Remove(1, 1); 338 continue; 339 } 340 341 // 2.E. 342 int slashpos = input.FindFirst('/', 1); 343 if (slashpos > 0) { 344 output.Append(input, slashpos); 345 input.Remove(0, slashpos); 346 } else { 347 output.Append(input); 348 break; 349 } 350 } 351 352 _SetPathUnsafe(output); 353 return *this; 354 } 355 356 357 BUrl& 358 BUrl::SetRequest(const BString& request) 359 { 360 fRequest = request; 361 fHasRequest = !fRequest.IsEmpty(); 362 fUrlStringValid = false; 363 return *this; 364 } 365 366 367 BUrl& 368 BUrl::SetFragment(const BString& fragment) 369 { 370 fFragment = fragment; 371 fHasFragment = true; 372 fUrlStringValid = false; 373 return *this; 374 } 375 376 377 // #pragma mark URL fields access 378 379 380 const BString& 381 BUrl::UrlString() const 382 { 383 if (!fUrlStringValid) { 384 fUrlString.Truncate(0); 385 386 if (HasProtocol()) { 387 fUrlString << fProtocol << ':'; 388 } 389 390 if (HasAuthority()) { 391 fUrlString << "//"; 392 fUrlString << Authority(); 393 } 394 fUrlString << Path(); 395 396 if (HasRequest()) 397 fUrlString << '?' << fRequest; 398 399 if (HasFragment()) 400 fUrlString << '#' << fFragment; 401 402 fUrlStringValid = true; 403 } 404 405 return fUrlString; 406 } 407 408 409 const BString& 410 BUrl::Protocol() const 411 { 412 return fProtocol; 413 } 414 415 416 const BString& 417 BUrl::UserName() const 418 { 419 return fUser; 420 } 421 422 423 const BString& 424 BUrl::Password() const 425 { 426 return fPassword; 427 } 428 429 430 const BString& 431 BUrl::UserInfo() const 432 { 433 if (!fUserInfoValid) { 434 fUserInfo = fUser; 435 436 if (HasPassword()) 437 fUserInfo << ':' << fPassword; 438 439 fUserInfoValid = true; 440 } 441 442 return fUserInfo; 443 } 444 445 446 const BString& 447 BUrl::Host() const 448 { 449 return fHost; 450 } 451 452 453 int 454 BUrl::Port() const 455 { 456 return fPort; 457 } 458 459 460 const BString& 461 BUrl::Authority() const 462 { 463 if (!fAuthorityValid) { 464 fAuthority.Truncate(0); 465 466 if (HasUserInfo()) 467 fAuthority << UserInfo() << '@'; 468 fAuthority << Host(); 469 470 if (HasPort()) 471 fAuthority << ':' << fPort; 472 473 fAuthorityValid = true; 474 } 475 return fAuthority; 476 } 477 478 479 const BString& 480 BUrl::Path() const 481 { 482 return fPath; 483 } 484 485 486 const BString& 487 BUrl::Request() const 488 { 489 return fRequest; 490 } 491 492 493 const BString& 494 BUrl::Fragment() const 495 { 496 return fFragment; 497 } 498 499 500 // #pragma mark URL fields tests 501 502 503 bool 504 BUrl::IsValid() const 505 { 506 if (!fHasProtocol) 507 return false; 508 509 if (fProtocol == "http" || fProtocol == "https" || fProtocol == "ftp" 510 || fProtocol == "ipp" || fProtocol == "afp" || fProtocol == "telnet" 511 || fProtocol == "gopher" || fProtocol == "nntp" || fProtocol == "sftp" 512 || fProtocol == "finger" || fProtocol == "pop" || fProtocol == "imap") { 513 return fHasHost && !fHost.IsEmpty(); 514 } 515 516 if (fProtocol == "file") 517 return fHasPath; 518 519 return true; 520 } 521 522 523 bool 524 BUrl::HasProtocol() const 525 { 526 return fHasProtocol; 527 } 528 529 530 bool 531 BUrl::HasAuthority() const 532 { 533 return fHasHost || fHasUserName; 534 } 535 536 537 bool 538 BUrl::HasUserName() const 539 { 540 return fHasUserName; 541 } 542 543 544 bool 545 BUrl::HasPassword() const 546 { 547 return fHasPassword; 548 } 549 550 551 bool 552 BUrl::HasUserInfo() const 553 { 554 return fHasUserName || fHasPassword; 555 } 556 557 558 bool 559 BUrl::HasHost() const 560 { 561 return fHasHost; 562 } 563 564 565 bool 566 BUrl::HasPort() const 567 { 568 return fHasPort; 569 } 570 571 572 bool 573 BUrl::HasPath() const 574 { 575 return fHasPath; 576 } 577 578 579 bool 580 BUrl::HasRequest() const 581 { 582 return fHasRequest; 583 } 584 585 586 bool 587 BUrl::HasFragment() const 588 { 589 return fHasFragment; 590 } 591 592 593 // #pragma mark URL encoding/decoding of needed fields 594 595 596 void 597 BUrl::UrlEncode(bool strict) 598 { 599 fUser = _DoUrlEncodeChunk(fUser, strict); 600 fPassword = _DoUrlEncodeChunk(fPassword, strict); 601 fHost = _DoUrlEncodeChunk(fHost, strict); 602 fFragment = _DoUrlEncodeChunk(fFragment, strict); 603 fPath = _DoUrlEncodeChunk(fPath, strict, true); 604 } 605 606 607 void 608 BUrl::UrlDecode(bool strict) 609 { 610 fUser = _DoUrlDecodeChunk(fUser, strict); 611 fPassword = _DoUrlDecodeChunk(fPassword, strict); 612 fHost = _DoUrlDecodeChunk(fHost, strict); 613 fFragment = _DoUrlDecodeChunk(fFragment, strict); 614 fPath = _DoUrlDecodeChunk(fPath, strict); 615 } 616 617 618 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 619 status_t 620 BUrl::IDNAToAscii() 621 { 622 UErrorCode err = U_ZERO_ERROR; 623 icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err); 624 icu::IDNAInfo info; 625 626 BString result; 627 BStringByteSink sink(&result); 628 converter->nameToASCII_UTF8(icu::StringPiece(fHost.String()), sink, info, 629 err); 630 631 delete converter; 632 633 if (U_FAILURE(err)) 634 return B_ERROR; 635 636 fHost = result; 637 return B_OK; 638 } 639 #endif 640 641 642 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 643 status_t 644 BUrl::IDNAToUnicode() 645 { 646 UErrorCode err = U_ZERO_ERROR; 647 icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err); 648 icu::IDNAInfo info; 649 650 BString result; 651 BStringByteSink sink(&result); 652 converter->nameToUnicodeUTF8(icu::StringPiece(fHost.String()), sink, info, 653 err); 654 655 delete converter; 656 657 if (U_FAILURE(err)) 658 return B_ERROR; 659 660 fHost = result; 661 return B_OK; 662 } 663 #endif 664 665 666 // #pragma mark - utility functionality 667 668 669 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 670 bool 671 BUrl::HasPreferredApplication() const 672 { 673 BString appSignature = PreferredApplication(); 674 BMimeType mime(appSignature.String()); 675 676 if (appSignature.IFindFirst("application/") == 0 677 && mime.IsValid()) 678 return true; 679 680 return false; 681 } 682 #endif 683 684 685 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 686 BString 687 BUrl::PreferredApplication() const 688 { 689 BString appSignature; 690 BMimeType mime(_UrlMimeType().String()); 691 mime.GetPreferredApp(appSignature.LockBuffer(B_MIME_TYPE_LENGTH)); 692 appSignature.UnlockBuffer(); 693 694 return BString(appSignature); 695 } 696 #endif 697 698 699 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 700 status_t 701 BUrl::OpenWithPreferredApplication(bool onProblemAskUser) const 702 { 703 if (!IsValid()) 704 return B_BAD_VALUE; 705 706 BString urlString = UrlString(); 707 if (urlString.Length() > B_PATH_NAME_LENGTH) { 708 // TODO: BAlert 709 // if (onProblemAskUser) 710 // BAlert ... Too long URL! 711 #if DEBUG 712 fprintf(stderr, "URL too long"); 713 #endif 714 return B_NAME_TOO_LONG; 715 } 716 717 char* argv[] = { 718 const_cast<char*>("BUrlInvokedApplication"), 719 const_cast<char*>(urlString.String()), 720 NULL 721 }; 722 723 #if DEBUG 724 if (HasPreferredApplication()) 725 printf("HasPreferredApplication() == true\n"); 726 else 727 printf("HasPreferredApplication() == false\n"); 728 #endif 729 730 status_t status = be_roster->Launch(_UrlMimeType().String(), 1, argv+1); 731 if (status != B_OK) { 732 #if DEBUG 733 fprintf(stderr, "Opening URL failed: %s\n", strerror(status)); 734 #endif 735 } 736 737 return status; 738 } 739 #endif 740 741 742 // #pragma mark Url encoding/decoding of string 743 744 745 /*static*/ BString 746 BUrl::UrlEncode(const BString& url, bool strict, bool directory) 747 { 748 return _DoUrlEncodeChunk(url, strict, directory); 749 } 750 751 752 /*static*/ BString 753 BUrl::UrlDecode(const BString& url, bool strict) 754 { 755 return _DoUrlDecodeChunk(url, strict); 756 } 757 758 759 // #pragma mark BArchivable members 760 761 762 status_t 763 BUrl::Archive(BMessage* into, bool deep) const 764 { 765 status_t ret = BArchivable::Archive(into, deep); 766 767 if (ret == B_OK) 768 ret = into->AddString(kArchivedUrl, UrlString()); 769 770 return ret; 771 } 772 773 774 /*static*/ BArchivable* 775 BUrl::Instantiate(BMessage* archive) 776 { 777 if (validate_instantiation(archive, "BUrl")) 778 return new(std::nothrow) BUrl(archive); 779 return NULL; 780 } 781 782 783 // #pragma mark URL comparison 784 785 786 bool 787 BUrl::operator==(BUrl& other) const 788 { 789 UrlString(); 790 other.UrlString(); 791 792 return fUrlString == other.fUrlString; 793 } 794 795 796 bool 797 BUrl::operator!=(BUrl& other) const 798 { 799 return !(*this == other); 800 } 801 802 803 // #pragma mark URL assignment 804 805 806 const BUrl& 807 BUrl::operator=(const BUrl& other) 808 { 809 fUrlStringValid = other.fUrlStringValid; 810 if (fUrlStringValid) 811 fUrlString = other.fUrlString; 812 813 fAuthorityValid = other.fAuthorityValid; 814 if (fAuthorityValid) 815 fAuthority = other.fAuthority; 816 817 fUserInfoValid = other.fUserInfoValid; 818 if (fUserInfoValid) 819 fUserInfo = other.fUserInfo; 820 821 fProtocol = other.fProtocol; 822 fUser = other.fUser; 823 fPassword = other.fPassword; 824 fHost = other.fHost; 825 fPort = other.fPort; 826 fPath = other.fPath; 827 fRequest = other.fRequest; 828 fFragment = other.fFragment; 829 830 fHasProtocol = other.fHasProtocol; 831 fHasUserName = other.fHasUserName; 832 fHasPassword = other.fHasPassword; 833 fHasHost = other.fHasHost; 834 fHasPort = other.fHasPort; 835 fHasPath = other.fHasPath; 836 fHasRequest = other.fHasRequest; 837 fHasFragment = other.fHasFragment; 838 839 return *this; 840 } 841 842 843 const BUrl& 844 BUrl::operator=(const BString& string) 845 { 846 SetUrlString(string); 847 return *this; 848 } 849 850 851 const BUrl& 852 BUrl::operator=(const char* string) 853 { 854 SetUrlString(string); 855 return *this; 856 } 857 858 859 // #pragma mark URL to string conversion 860 861 862 BUrl::operator const char*() const 863 { 864 return UrlString(); 865 } 866 867 868 void 869 BUrl::_ResetFields() 870 { 871 fHasProtocol = false; 872 fHasUserName = false; 873 fHasPassword = false; 874 fHasHost = false; 875 fHasPort = false; 876 fHasPath = false; 877 fHasRequest = false; 878 fHasFragment = false; 879 880 fProtocol.Truncate(0); 881 fUser.Truncate(0); 882 fPassword.Truncate(0); 883 fHost.Truncate(0); 884 fPort = 0; 885 fPath.Truncate(0); 886 fRequest.Truncate(0); 887 fFragment.Truncate(0); 888 889 // Force re-generation of these fields 890 fUrlStringValid = false; 891 fUserInfoValid = false; 892 fAuthorityValid = false; 893 } 894 895 896 bool 897 BUrl::_ContainsDelimiter(const BString& url) 898 { 899 int32 len = url.Length(); 900 901 for (int32 i = 0; i < len; i++) { 902 switch (url[i]) { 903 case ' ': 904 case '\n': 905 case '\t': 906 case '\r': 907 case '<': 908 case '>': 909 case '"': 910 return true; 911 } 912 } 913 914 return false; 915 } 916 917 918 enum explode_url_parse_state { 919 EXPLODE_PROTOCOL, 920 EXPLODE_PROTOCOLTERMINATOR, 921 EXPLODE_AUTHORITYORPATH, 922 EXPLODE_AUTHORITY, 923 EXPLODE_PATH, 924 EXPLODE_REQUEST, // query 925 EXPLODE_FRAGMENT, 926 EXPLODE_COMPLETE 927 }; 928 929 930 typedef bool (*explode_char_match_fn)(char c); 931 932 933 static bool 934 explode_is_protocol_char(char c) 935 { 936 return isalnum(c) || c == '+' || c == '.' || c == '-'; 937 } 938 939 940 static bool 941 explode_is_authority_char(char c) 942 { 943 return !(c == '/' || c == '?' || c == '#'); 944 } 945 946 947 static bool 948 explode_is_path_char(char c) 949 { 950 return !(c == '#' || c == '?'); 951 } 952 953 954 static bool 955 explode_is_request_char(char c) 956 { 957 return c != '#'; 958 } 959 960 961 static int32 962 char_offset_until_fn_false(const char* url, int32 len, int32 offset, 963 explode_char_match_fn fn) 964 { 965 while (offset < len && fn(url[offset])) 966 offset++; 967 968 return offset; 969 } 970 971 /* 972 * This function takes a URL in string-form and parses the components of the URL out. 973 */ 974 status_t 975 BUrl::_ExplodeUrlString(const BString& url) 976 { 977 _ResetFields(); 978 979 // RFC3986, Appendix C; the URL should not contain whitespace or delimiters 980 // by this point. 981 982 if (_ContainsDelimiter(url)) 983 return B_BAD_VALUE; 984 985 explode_url_parse_state state = EXPLODE_PROTOCOL; 986 int32 offset = 0; 987 int32 length = url.Length(); 988 const char *url_c = url.String(); 989 990 // The regexp is provided in RFC3986 (URI generic syntax), Appendix B 991 // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))? 992 // The ensuing logic attempts to simulate the behaviour of extracting the groups 993 // from the string without requiring a group-capable regex engine. 994 995 while (offset < length) { 996 switch (state) { 997 998 case EXPLODE_PROTOCOL: 999 { 1000 int32 end_protocol = char_offset_until_fn_false(url_c, length, 1001 offset, explode_is_protocol_char); 1002 1003 if (end_protocol < length) { 1004 SetProtocol(BString(&url_c[offset], end_protocol - offset)); 1005 state = EXPLODE_PROTOCOLTERMINATOR; 1006 offset = end_protocol; 1007 } else { 1008 // No protocol was found, try parsing from the string 1009 // start, beginning with authority or path 1010 SetProtocol(""); 1011 offset = 0; 1012 state = EXPLODE_AUTHORITYORPATH; 1013 } 1014 break; 1015 } 1016 1017 case EXPLODE_PROTOCOLTERMINATOR: 1018 { 1019 if (url[offset] == ':') { 1020 offset++; 1021 } else { 1022 // No protocol was found, try parsing from the string 1023 // start, beginning with authority or path 1024 SetProtocol(""); 1025 offset = 0; 1026 } 1027 state = EXPLODE_AUTHORITYORPATH; 1028 break; 1029 } 1030 1031 case EXPLODE_AUTHORITYORPATH: 1032 { 1033 // The authority must start with //. If it isn't there, skip 1034 // to parsing the path. 1035 if (strncmp(&url_c[offset], "//", 2) == 0) { 1036 state = EXPLODE_AUTHORITY; 1037 offset += 2; 1038 } else { 1039 state = EXPLODE_PATH; 1040 } 1041 break; 1042 } 1043 1044 case EXPLODE_AUTHORITY: 1045 { 1046 int end_authority = char_offset_until_fn_false(url_c, length, 1047 offset, explode_is_authority_char); 1048 SetAuthority(BString(&url_c[offset], end_authority - offset)); 1049 state = EXPLODE_PATH; 1050 offset = end_authority; 1051 break; 1052 } 1053 1054 case EXPLODE_PATH: 1055 { 1056 int end_path = char_offset_until_fn_false(url_c, length, offset, 1057 explode_is_path_char); 1058 SetPath(BString(&url_c[offset], end_path - offset)); 1059 state = EXPLODE_REQUEST; 1060 offset = end_path; 1061 break; 1062 } 1063 1064 case EXPLODE_REQUEST: // query 1065 { 1066 if (url_c[offset] == '?') { 1067 offset++; 1068 int end_request = char_offset_until_fn_false(url_c, length, 1069 offset, explode_is_request_char); 1070 SetRequest(BString(&url_c[offset], end_request - offset)); 1071 offset = end_request; 1072 } 1073 state = EXPLODE_FRAGMENT; 1074 break; 1075 } 1076 1077 case EXPLODE_FRAGMENT: 1078 { 1079 if (url_c[offset] == '#') { 1080 offset++; 1081 SetFragment(BString(&url_c[offset], length - offset)); 1082 offset = length; 1083 } 1084 state = EXPLODE_COMPLETE; 1085 break; 1086 } 1087 1088 case EXPLODE_COMPLETE: 1089 // should never be reached - keeps the compiler happy 1090 break; 1091 1092 } 1093 } 1094 1095 return B_OK; 1096 } 1097 1098 1099 BString 1100 BUrl::_MergePath(const BString& relative) const 1101 { 1102 // This implements RFC3986, Section 5.2.3. 1103 if (HasAuthority() && fPath == "") 1104 { 1105 BString result("/"); 1106 result << relative; 1107 return result; 1108 } 1109 1110 BString result(fPath); 1111 result.Truncate(result.FindLast("/") + 1); 1112 result << relative; 1113 1114 return result; 1115 } 1116 1117 1118 // This sets the path without normalizing it. If fed with a path that has . or 1119 // .. segments, this would make the URL invalid. 1120 void 1121 BUrl::_SetPathUnsafe(const BString& path) 1122 { 1123 fPath = path; 1124 fHasPath = true; // RFC says an empty path is still a path 1125 fUrlStringValid = false; 1126 } 1127 1128 1129 enum authority_parse_state { 1130 AUTHORITY_USERNAME, 1131 AUTHORITY_PASSWORD, 1132 AUTHORITY_HOST, 1133 AUTHORITY_PORT, 1134 AUTHORITY_COMPLETE 1135 }; 1136 1137 1138 static bool 1139 authority_is_username_char(char c) 1140 { 1141 return !(c == ':' || c == '@'); 1142 } 1143 1144 1145 static bool 1146 authority_is_password_char(char c) 1147 { 1148 return !(c == '@'); 1149 } 1150 1151 1152 static bool 1153 authority_is_ipv6_host_char(char c) { 1154 return (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f') 1155 || (c >= '0' && c <= '9') || c == ':'; 1156 } 1157 1158 1159 static bool 1160 authority_is_host_char(char c) { 1161 return !(c == ':' || c == '/'); 1162 } 1163 1164 1165 static bool 1166 authority_is_port_char(char c) { 1167 return c >= '0' && c <= '9'; 1168 } 1169 1170 1171 void 1172 BUrl::SetAuthority(const BString& authority) 1173 { 1174 fAuthority = authority; 1175 1176 fUser.Truncate(0); 1177 fPassword.Truncate(0); 1178 fHost.Truncate(0); 1179 fPort = 0; 1180 fHasPort = false; 1181 fHasUserName = false; 1182 fHasPassword = false; 1183 1184 bool hasUsernamePassword = B_ERROR != fAuthority.FindFirst('@'); 1185 authority_parse_state state = AUTHORITY_USERNAME; 1186 int32 offset = 0; 1187 int32 length = authority.Length(); 1188 const char *authority_c = authority.String(); 1189 1190 while (AUTHORITY_COMPLETE != state && offset < length) { 1191 1192 switch (state) { 1193 1194 case AUTHORITY_USERNAME: 1195 { 1196 if (hasUsernamePassword) { 1197 int32 end_username = char_offset_until_fn_false( 1198 authority_c, length, offset, 1199 authority_is_username_char); 1200 1201 SetUserName(BString(&authority_c[offset], 1202 end_username - offset)); 1203 1204 state = AUTHORITY_PASSWORD; 1205 offset = end_username; 1206 } else { 1207 state = AUTHORITY_HOST; 1208 } 1209 break; 1210 } 1211 1212 case AUTHORITY_PASSWORD: 1213 { 1214 if (hasUsernamePassword && ':' == authority[offset]) { 1215 offset++; // move past the delimiter 1216 int32 end_password = char_offset_until_fn_false( 1217 authority_c, length, offset, 1218 authority_is_password_char); 1219 1220 SetPassword(BString(&authority_c[offset], 1221 end_password - offset)); 1222 1223 offset = end_password; 1224 } 1225 1226 // if the host was preceded by a username + password couple 1227 // then there will be an '@' delimiter to avoid. 1228 1229 if (authority_c[offset] == '@') { 1230 offset++; 1231 } 1232 1233 state = AUTHORITY_HOST; 1234 break; 1235 } 1236 1237 case AUTHORITY_HOST: 1238 { 1239 1240 // the host may be enclosed within brackets in order to express 1241 // an IPV6 address. 1242 1243 if (authority_c[offset] == '[') { 1244 int32 end_ipv6_host = char_offset_until_fn_false( 1245 authority_c, length, offset + 1, 1246 authority_is_ipv6_host_char); 1247 1248 if (authority_c[end_ipv6_host] == ']') { 1249 SetHost(BString(&authority_c[offset], 1250 (end_ipv6_host - offset) + 1)); 1251 state = AUTHORITY_PORT; 1252 offset = end_ipv6_host + 1; 1253 } 1254 } 1255 1256 // if an IPV6 host was not found. 1257 1258 if (AUTHORITY_HOST == state) { 1259 int32 end_host = char_offset_until_fn_false( 1260 authority_c, length, offset, authority_is_host_char); 1261 1262 SetHost(BString(&authority_c[offset], end_host - offset)); 1263 state = AUTHORITY_PORT; 1264 offset = end_host; 1265 } 1266 1267 break; 1268 } 1269 1270 case AUTHORITY_PORT: 1271 { 1272 if (authority_c[offset] == ':') { 1273 offset++; 1274 int32 end_port = char_offset_until_fn_false( 1275 authority_c, length, offset, authority_is_port_char); 1276 SetPort(atoi(&authority_c[offset])); 1277 offset = end_port; 1278 } 1279 1280 state = AUTHORITY_COMPLETE; 1281 1282 break; 1283 } 1284 1285 case AUTHORITY_COMPLETE: 1286 // should never be reached - keeps the compiler happy 1287 break; 1288 } 1289 } 1290 1291 // An empty authority is still an authority, making it possible to have 1292 // URLs such as file:///path/to/file. 1293 // TODO however, there is no way to unset the authority once it is set... 1294 // We may want to take a const char* parameter and allow NULL. 1295 fHasHost = true; 1296 } 1297 1298 1299 /*static*/ BString 1300 BUrl::_DoUrlEncodeChunk(const BString& chunk, bool strict, bool directory) 1301 { 1302 BString result; 1303 1304 for (int32 i = 0; i < chunk.Length(); i++) { 1305 if (_IsUnreserved(chunk[i]) 1306 || (directory && (chunk[i] == '/' || chunk[i] == '\\'))) { 1307 result << chunk[i]; 1308 } else { 1309 if (chunk[i] == ' ' && !strict) { 1310 result << '+'; 1311 // In non-strict mode, spaces are encoded by a plus sign 1312 } else { 1313 char hexString[5]; 1314 snprintf(hexString, 5, "%X", chunk[i]); 1315 1316 result << '%' << hexString; 1317 } 1318 } 1319 } 1320 1321 return result; 1322 } 1323 1324 1325 /*static*/ BString 1326 BUrl::_DoUrlDecodeChunk(const BString& chunk, bool strict) 1327 { 1328 BString result; 1329 1330 for (int32 i = 0; i < chunk.Length(); i++) { 1331 if (chunk[i] == '+' && !strict) 1332 result << ' '; 1333 else { 1334 char decoded = 0; 1335 char* out = NULL; 1336 char hexString[3]; 1337 1338 if (chunk[i] == '%' && i < chunk.Length() - 2 1339 && isxdigit(chunk[i + 1]) && isxdigit(chunk[i+2])) { 1340 hexString[0] = chunk[i + 1]; 1341 hexString[1] = chunk[i + 2]; 1342 hexString[2] = 0; 1343 decoded = (char)strtol(hexString, &out, 16); 1344 } 1345 1346 if (out == hexString + 2) { 1347 i += 2; 1348 result << decoded; 1349 } else 1350 result << chunk[i]; 1351 } 1352 } 1353 return result; 1354 } 1355 1356 1357 bool 1358 BUrl::_IsProtocolValid() 1359 { 1360 for (int8 index = 0; index < fProtocol.Length(); index++) { 1361 char c = fProtocol[index]; 1362 1363 if (index == 0 && !isalpha(c)) 1364 return false; 1365 else if (!isalnum(c) && c != '+' && c != '-' && c != '.') 1366 return false; 1367 } 1368 1369 return fProtocol.Length() > 0; 1370 } 1371 1372 1373 bool 1374 BUrl::_IsUnreserved(char c) 1375 { 1376 return isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~'; 1377 } 1378 1379 1380 bool 1381 BUrl::_IsGenDelim(char c) 1382 { 1383 return c == ':' || c == '/' || c == '?' || c == '#' || c == '[' 1384 || c == ']' || c == '@'; 1385 } 1386 1387 1388 bool 1389 BUrl::_IsSubDelim(char c) 1390 { 1391 return c == '!' || c == '$' || c == '&' || c == '\'' || c == '(' 1392 || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' 1393 || c == '='; 1394 } 1395 1396 1397 BString 1398 BUrl::_UrlMimeType() const 1399 { 1400 BString mime; 1401 mime << "application/x-vnd.Be.URL." << fProtocol; 1402 1403 return BString(mime); 1404 } 1405