1 /* 2 * Copyright 2010-2016 Haiku Inc. All rights reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Christophe Huriaux, c.huriaux@gmail.com 7 * Andrew Lindesay, apl@lindesay.co.nz 8 */ 9 10 11 #include <Url.h> 12 13 #include <ctype.h> 14 #include <cstdio> 15 #include <cstdlib> 16 #include <new> 17 18 #include <MimeType.h> 19 #include <Roster.h> 20 21 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 22 #include <ICUWrapper.h> 23 #endif 24 25 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 26 #include <unicode/idna.h> 27 #include <unicode/stringpiece.h> 28 #endif 29 30 31 static const char* kArchivedUrl = "be:url string"; 32 33 34 BUrl::BUrl(const char* url) 35 : 36 fUrlString(), 37 fProtocol(), 38 fUser(), 39 fPassword(), 40 fHost(), 41 fPort(0), 42 fPath(), 43 fRequest(), 44 fHasHost(false), 45 fHasFragment(false) 46 { 47 SetUrlString(url); 48 } 49 50 51 BUrl::BUrl(BMessage* archive) 52 : 53 fUrlString(), 54 fProtocol(), 55 fUser(), 56 fPassword(), 57 fHost(), 58 fPort(0), 59 fPath(), 60 fRequest(), 61 fHasHost(false), 62 fHasFragment(false) 63 { 64 BString url; 65 66 if (archive->FindString(kArchivedUrl, &url) == B_OK) 67 SetUrlString(url); 68 else 69 _ResetFields(); 70 } 71 72 73 BUrl::BUrl(const BUrl& other) 74 : 75 BArchivable(), 76 fUrlString(), 77 fProtocol(other.fProtocol), 78 fUser(other.fUser), 79 fPassword(other.fPassword), 80 fHost(other.fHost), 81 fPort(other.fPort), 82 fPath(other.fPath), 83 fRequest(other.fRequest), 84 fFragment(other.fFragment), 85 fUrlStringValid(other.fUrlStringValid), 86 fAuthorityValid(other.fAuthorityValid), 87 fUserInfoValid(other.fUserInfoValid), 88 fHasProtocol(other.fHasProtocol), 89 fHasUserName(other.fHasUserName), 90 fHasPassword(other.fHasPassword), 91 fHasHost(other.fHasHost), 92 fHasPort(other.fHasPort), 93 fHasPath(other.fHasPath), 94 fHasRequest(other.fHasRequest), 95 fHasFragment(other.fHasFragment) 96 { 97 if (fUrlStringValid) 98 fUrlString = other.fUrlString; 99 100 if (fAuthorityValid) 101 fAuthority = other.fAuthority; 102 103 if (fUserInfoValid) 104 fUserInfo = other.fUserInfo; 105 106 } 107 108 109 BUrl::BUrl(const BUrl& base, const BString& location) 110 : 111 fUrlString(), 112 fProtocol(), 113 fUser(), 114 fPassword(), 115 fHost(), 116 fPort(0), 117 fPath(), 118 fRequest(), 119 fAuthorityValid(false), 120 fUserInfoValid(false), 121 fHasUserName(false), 122 fHasPassword(false), 123 fHasHost(false), 124 fHasPort(false), 125 fHasFragment(false) 126 { 127 // This implements the algorithm in RFC3986, Section 5.2. 128 129 BUrl relative(location); 130 if (relative.HasProtocol()) { 131 SetProtocol(relative.Protocol()); 132 if (relative.HasAuthority()) 133 SetAuthority(relative.Authority()); 134 SetPath(relative.Path()); 135 SetRequest(relative.Request()); 136 } else { 137 if (relative.HasAuthority()) { 138 SetAuthority(relative.Authority()); 139 SetPath(relative.Path()); 140 SetRequest(relative.Request()); 141 } else { 142 if (relative.Path().IsEmpty()) { 143 _SetPathUnsafe(base.Path()); 144 if (relative.HasRequest()) 145 SetRequest(relative.Request()); 146 else 147 SetRequest(base.Request()); 148 } else { 149 if (relative.Path()[0] == '/') 150 SetPath(relative.Path()); 151 else { 152 BString path = base._MergePath(relative.Path()); 153 SetPath(path); 154 } 155 SetRequest(relative.Request()); 156 } 157 158 if (base.HasAuthority()) 159 SetAuthority(base.Authority()); 160 } 161 SetProtocol(base.Protocol()); 162 } 163 164 if (relative.HasFragment()) 165 SetFragment(relative.Fragment()); 166 } 167 168 169 BUrl::BUrl() 170 : 171 fUrlString(), 172 fProtocol(), 173 fUser(), 174 fPassword(), 175 fHost(), 176 fPort(0), 177 fPath(), 178 fRequest(), 179 fHasHost(false), 180 fHasFragment(false) 181 { 182 _ResetFields(); 183 } 184 185 186 BUrl::BUrl(const BPath& path) 187 : 188 fUrlString(), 189 fProtocol(), 190 fUser(), 191 fPassword(), 192 fHost(), 193 fPort(0), 194 fPath(), 195 fRequest(), 196 fHasHost(false), 197 fHasFragment(false) 198 { 199 SetUrlString(UrlEncode(path.Path(), true, true)); 200 SetProtocol("file"); 201 } 202 203 204 BUrl::~BUrl() 205 { 206 } 207 208 209 // #pragma mark URL fields modifiers 210 211 212 BUrl& 213 BUrl::SetUrlString(const BString& url) 214 { 215 _ExplodeUrlString(url); 216 return *this; 217 } 218 219 220 BUrl& 221 BUrl::SetProtocol(const BString& protocol) 222 { 223 fProtocol = protocol; 224 fHasProtocol = !fProtocol.IsEmpty(); 225 fUrlStringValid = false; 226 return *this; 227 } 228 229 230 BUrl& 231 BUrl::SetUserName(const BString& user) 232 { 233 fUser = user; 234 fHasUserName = !fUser.IsEmpty(); 235 fUrlStringValid = false; 236 fAuthorityValid = false; 237 fUserInfoValid = false; 238 return *this; 239 } 240 241 242 BUrl& 243 BUrl::SetPassword(const BString& password) 244 { 245 fPassword = password; 246 fHasPassword = !fPassword.IsEmpty(); 247 fUrlStringValid = false; 248 fAuthorityValid = false; 249 fUserInfoValid = false; 250 return *this; 251 } 252 253 254 BUrl& 255 BUrl::SetHost(const BString& host) 256 { 257 fHost = host; 258 fHasHost = !fHost.IsEmpty(); 259 fUrlStringValid = false; 260 fAuthorityValid = false; 261 return *this; 262 } 263 264 265 BUrl& 266 BUrl::SetPort(int port) 267 { 268 fPort = port; 269 fHasPort = (port != 0); 270 fUrlStringValid = false; 271 fAuthorityValid = false; 272 return *this; 273 } 274 275 276 BUrl& 277 BUrl::SetPath(const BString& path) 278 { 279 // Implements RFC3986 section 5.2.4, "Remove dot segments" 280 281 // 1. 282 BString output; 283 BString input(path); 284 285 // 2. 286 while (!input.IsEmpty()) { 287 // 2.A. 288 if (input.StartsWith("./")) { 289 input.Remove(0, 2); 290 continue; 291 } 292 293 if (input.StartsWith("../")) { 294 input.Remove(0, 3); 295 continue; 296 } 297 298 // 2.B. 299 if (input.StartsWith("/./")) { 300 input.Remove(0, 2); 301 continue; 302 } 303 304 if (input == "/.") { 305 input.Remove(1, 1); 306 continue; 307 } 308 309 // 2.C. 310 if (input.StartsWith("/../")) { 311 input.Remove(0, 3); 312 output.Truncate(output.FindLast('/')); 313 continue; 314 } 315 316 if (input == "/..") { 317 input.Remove(1, 2); 318 output.Truncate(output.FindLast('/')); 319 continue; 320 } 321 322 // 2.D. 323 if (input == "." || input == "..") { 324 break; 325 } 326 327 if (input == "/.") { 328 input.Remove(1, 1); 329 continue; 330 } 331 332 // 2.E. 333 int slashpos = input.FindFirst('/', 1); 334 if (slashpos > 0) { 335 output.Append(input, slashpos); 336 input.Remove(0, slashpos); 337 } else { 338 output.Append(input); 339 break; 340 } 341 } 342 343 _SetPathUnsafe(output); 344 return *this; 345 } 346 347 348 BUrl& 349 BUrl::SetRequest(const BString& request) 350 { 351 fRequest = request; 352 fHasRequest = !fRequest.IsEmpty(); 353 fUrlStringValid = false; 354 return *this; 355 } 356 357 358 BUrl& 359 BUrl::SetFragment(const BString& fragment) 360 { 361 fFragment = fragment; 362 fHasFragment = true; 363 fUrlStringValid = false; 364 return *this; 365 } 366 367 368 // #pragma mark URL fields access 369 370 371 const BString& 372 BUrl::UrlString() const 373 { 374 if (!fUrlStringValid) { 375 fUrlString.Truncate(0); 376 377 if (HasProtocol()) { 378 fUrlString << fProtocol << ':'; 379 } 380 381 if (HasAuthority()) { 382 fUrlString << "//"; 383 fUrlString << Authority(); 384 } 385 fUrlString << Path(); 386 387 if (HasRequest()) 388 fUrlString << '?' << fRequest; 389 390 if (HasFragment()) 391 fUrlString << '#' << fFragment; 392 393 fUrlStringValid = true; 394 } 395 396 return fUrlString; 397 } 398 399 400 const BString& 401 BUrl::Protocol() const 402 { 403 return fProtocol; 404 } 405 406 407 const BString& 408 BUrl::UserName() const 409 { 410 return fUser; 411 } 412 413 414 const BString& 415 BUrl::Password() const 416 { 417 return fPassword; 418 } 419 420 421 const BString& 422 BUrl::UserInfo() const 423 { 424 if (!fUserInfoValid) { 425 fUserInfo = fUser; 426 427 if (HasPassword()) 428 fUserInfo << ':' << fPassword; 429 430 fUserInfoValid = true; 431 } 432 433 return fUserInfo; 434 } 435 436 437 const BString& 438 BUrl::Host() const 439 { 440 return fHost; 441 } 442 443 444 int 445 BUrl::Port() const 446 { 447 return fPort; 448 } 449 450 451 const BString& 452 BUrl::Authority() const 453 { 454 if (!fAuthorityValid) { 455 fAuthority.Truncate(0); 456 457 if (HasUserInfo()) 458 fAuthority << UserInfo() << '@'; 459 fAuthority << Host(); 460 461 if (HasPort()) 462 fAuthority << ':' << fPort; 463 464 fAuthorityValid = true; 465 } 466 return fAuthority; 467 } 468 469 470 const BString& 471 BUrl::Path() const 472 { 473 return fPath; 474 } 475 476 477 const BString& 478 BUrl::Request() const 479 { 480 return fRequest; 481 } 482 483 484 const BString& 485 BUrl::Fragment() const 486 { 487 return fFragment; 488 } 489 490 491 // #pragma mark URL fields tests 492 493 494 bool 495 BUrl::IsValid() const 496 { 497 if (!fHasProtocol) 498 return false; 499 500 if (fProtocol == "http" || fProtocol == "https" || fProtocol == "ftp" 501 || fProtocol == "ipp" || fProtocol == "afp" || fProtocol == "telnet" 502 || fProtocol == "gopher" || fProtocol == "nntp" || fProtocol == "sftp" 503 || fProtocol == "finger" || fProtocol == "pop" || fProtocol == "imap") { 504 return fHasHost && !fHost.IsEmpty(); 505 } 506 507 if (fProtocol == "file") 508 return fHasPath; 509 510 return true; 511 } 512 513 514 bool 515 BUrl::HasProtocol() const 516 { 517 return fHasProtocol; 518 } 519 520 521 bool 522 BUrl::HasAuthority() const 523 { 524 return fHasHost || fHasUserName; 525 } 526 527 528 bool 529 BUrl::HasUserName() const 530 { 531 return fHasUserName; 532 } 533 534 535 bool 536 BUrl::HasPassword() const 537 { 538 return fHasPassword; 539 } 540 541 542 bool 543 BUrl::HasUserInfo() const 544 { 545 return fHasUserName || fHasPassword; 546 } 547 548 549 bool 550 BUrl::HasHost() const 551 { 552 return fHasHost; 553 } 554 555 556 bool 557 BUrl::HasPort() const 558 { 559 return fHasPort; 560 } 561 562 563 bool 564 BUrl::HasPath() const 565 { 566 return fHasPath; 567 } 568 569 570 bool 571 BUrl::HasRequest() const 572 { 573 return fHasRequest; 574 } 575 576 577 bool 578 BUrl::HasFragment() const 579 { 580 return fHasFragment; 581 } 582 583 584 // #pragma mark URL encoding/decoding of needed fields 585 586 587 void 588 BUrl::UrlEncode(bool strict) 589 { 590 fUser = _DoUrlEncodeChunk(fUser, strict); 591 fPassword = _DoUrlEncodeChunk(fPassword, strict); 592 fHost = _DoUrlEncodeChunk(fHost, strict); 593 fFragment = _DoUrlEncodeChunk(fFragment, strict); 594 fPath = _DoUrlEncodeChunk(fPath, strict, true); 595 } 596 597 598 void 599 BUrl::UrlDecode(bool strict) 600 { 601 fUser = _DoUrlDecodeChunk(fUser, strict); 602 fPassword = _DoUrlDecodeChunk(fPassword, strict); 603 fHost = _DoUrlDecodeChunk(fHost, strict); 604 fFragment = _DoUrlDecodeChunk(fFragment, strict); 605 fPath = _DoUrlDecodeChunk(fPath, strict); 606 } 607 608 609 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 610 status_t 611 BUrl::IDNAToAscii() 612 { 613 UErrorCode err = U_ZERO_ERROR; 614 icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err); 615 icu::IDNAInfo info; 616 617 BString result; 618 BStringByteSink sink(&result); 619 converter->nameToASCII_UTF8(icu::StringPiece(fHost.String()), sink, info, 620 err); 621 622 delete converter; 623 624 if (U_FAILURE(err)) 625 return B_ERROR; 626 627 fHost = result; 628 return B_OK; 629 } 630 #endif 631 632 633 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 634 status_t 635 BUrl::IDNAToUnicode() 636 { 637 UErrorCode err = U_ZERO_ERROR; 638 icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err); 639 icu::IDNAInfo info; 640 641 BString result; 642 BStringByteSink sink(&result); 643 converter->nameToUnicodeUTF8(icu::StringPiece(fHost.String()), sink, info, 644 err); 645 646 delete converter; 647 648 if (U_FAILURE(err)) 649 return B_ERROR; 650 651 fHost = result; 652 return B_OK; 653 } 654 #endif 655 656 657 // #pragma mark - utility functionality 658 659 660 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 661 bool 662 BUrl::HasPreferredApplication() const 663 { 664 BString appSignature = PreferredApplication(); 665 BMimeType mime(appSignature.String()); 666 667 if (appSignature.IFindFirst("application/") == 0 668 && mime.IsValid()) 669 return true; 670 671 return false; 672 } 673 #endif 674 675 676 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 677 BString 678 BUrl::PreferredApplication() const 679 { 680 BString appSignature; 681 BMimeType mime(_UrlMimeType().String()); 682 mime.GetPreferredApp(appSignature.LockBuffer(B_MIME_TYPE_LENGTH)); 683 appSignature.UnlockBuffer(); 684 685 return BString(appSignature); 686 } 687 #endif 688 689 690 #ifdef HAIKU_TARGET_PLATFORM_HAIKU 691 status_t 692 BUrl::OpenWithPreferredApplication(bool onProblemAskUser) const 693 { 694 if (!IsValid()) 695 return B_BAD_VALUE; 696 697 BString urlString = UrlString(); 698 if (urlString.Length() > B_PATH_NAME_LENGTH) { 699 // TODO: BAlert 700 // if (onProblemAskUser) 701 // BAlert ... Too long URL! 702 #if DEBUG 703 fprintf(stderr, "URL too long"); 704 #endif 705 return B_NAME_TOO_LONG; 706 } 707 708 char* argv[] = { 709 const_cast<char*>("BUrlInvokedApplication"), 710 const_cast<char*>(urlString.String()), 711 NULL 712 }; 713 714 #if DEBUG 715 if (HasPreferredApplication()) 716 printf("HasPreferredApplication() == true\n"); 717 else 718 printf("HasPreferredApplication() == false\n"); 719 #endif 720 721 status_t status = be_roster->Launch(_UrlMimeType().String(), 1, argv+1); 722 if (status != B_OK) { 723 #if DEBUG 724 fprintf(stderr, "Opening URL failed: %s\n", strerror(status)); 725 #endif 726 } 727 728 return status; 729 } 730 #endif 731 732 733 // #pragma mark Url encoding/decoding of string 734 735 736 /*static*/ BString 737 BUrl::UrlEncode(const BString& url, bool strict, bool directory) 738 { 739 return _DoUrlEncodeChunk(url, strict, directory); 740 } 741 742 743 /*static*/ BString 744 BUrl::UrlDecode(const BString& url, bool strict) 745 { 746 return _DoUrlDecodeChunk(url, strict); 747 } 748 749 750 // #pragma mark BArchivable members 751 752 753 status_t 754 BUrl::Archive(BMessage* into, bool deep) const 755 { 756 status_t ret = BArchivable::Archive(into, deep); 757 758 if (ret == B_OK) 759 ret = into->AddString(kArchivedUrl, UrlString()); 760 761 return ret; 762 } 763 764 765 /*static*/ BArchivable* 766 BUrl::Instantiate(BMessage* archive) 767 { 768 if (validate_instantiation(archive, "BUrl")) 769 return new(std::nothrow) BUrl(archive); 770 return NULL; 771 } 772 773 774 // #pragma mark URL comparison 775 776 777 bool 778 BUrl::operator==(BUrl& other) const 779 { 780 UrlString(); 781 other.UrlString(); 782 783 return fUrlString == other.fUrlString; 784 } 785 786 787 bool 788 BUrl::operator!=(BUrl& other) const 789 { 790 return !(*this == other); 791 } 792 793 794 // #pragma mark URL assignment 795 796 797 const BUrl& 798 BUrl::operator=(const BUrl& other) 799 { 800 fUrlStringValid = other.fUrlStringValid; 801 if (fUrlStringValid) 802 fUrlString = other.fUrlString; 803 804 fAuthorityValid = other.fAuthorityValid; 805 if (fAuthorityValid) 806 fAuthority = other.fAuthority; 807 808 fUserInfoValid = other.fUserInfoValid; 809 if (fUserInfoValid) 810 fUserInfo = other.fUserInfo; 811 812 fProtocol = other.fProtocol; 813 fUser = other.fUser; 814 fPassword = other.fPassword; 815 fHost = other.fHost; 816 fPort = other.fPort; 817 fPath = other.fPath; 818 fRequest = other.fRequest; 819 fFragment = other.fFragment; 820 821 fHasProtocol = other.fHasProtocol; 822 fHasUserName = other.fHasUserName; 823 fHasPassword = other.fHasPassword; 824 fHasHost = other.fHasHost; 825 fHasPort = other.fHasPort; 826 fHasPath = other.fHasPath; 827 fHasRequest = other.fHasRequest; 828 fHasFragment = other.fHasFragment; 829 830 return *this; 831 } 832 833 834 const BUrl& 835 BUrl::operator=(const BString& string) 836 { 837 SetUrlString(string); 838 return *this; 839 } 840 841 842 const BUrl& 843 BUrl::operator=(const char* string) 844 { 845 SetUrlString(string); 846 return *this; 847 } 848 849 850 // #pragma mark URL to string conversion 851 852 853 BUrl::operator const char*() const 854 { 855 return UrlString(); 856 } 857 858 859 void 860 BUrl::_ResetFields() 861 { 862 fHasProtocol = false; 863 fHasUserName = false; 864 fHasPassword = false; 865 fHasHost = false; 866 fHasPort = false; 867 fHasPath = false; 868 fHasRequest = false; 869 fHasFragment = false; 870 871 fProtocol.Truncate(0); 872 fUser.Truncate(0); 873 fPassword.Truncate(0); 874 fHost.Truncate(0); 875 fPort = 0; 876 fPath.Truncate(0); 877 fRequest.Truncate(0); 878 fFragment.Truncate(0); 879 880 // Force re-generation of these fields 881 fUrlStringValid = false; 882 fUserInfoValid = false; 883 fAuthorityValid = false; 884 } 885 886 887 bool 888 BUrl::_ContainsDelimiter(const BString& url) 889 { 890 int32 len = url.Length(); 891 892 for (int32 i = 0; i < len; i++) { 893 switch (url[i]) { 894 case ' ': 895 case '\n': 896 case '\t': 897 case '\r': 898 case '<': 899 case '>': 900 case '"': 901 return true; 902 } 903 } 904 905 return false; 906 } 907 908 909 enum explode_url_parse_state { 910 EXPLODE_PROTOCOL, 911 EXPLODE_PROTOCOLTERMINATOR, 912 EXPLODE_AUTHORITYORPATH, 913 EXPLODE_AUTHORITY, 914 EXPLODE_PATH, 915 EXPLODE_REQUEST, // query 916 EXPLODE_FRAGMENT, 917 EXPLODE_COMPLETE 918 }; 919 920 921 typedef bool (*explode_char_match_fn)(char c); 922 923 924 static bool 925 explode_is_protocol_char(char c) 926 { 927 return isalnum(c) || c == '+' || c == '.' || c == '-'; 928 } 929 930 931 static bool 932 explode_is_authority_char(char c) 933 { 934 return !(c == '/' || c == '?' || c == '#'); 935 } 936 937 938 static bool 939 explode_is_path_char(char c) 940 { 941 return !(c == '#' || c == '?'); 942 } 943 944 945 static bool 946 explode_is_request_char(char c) 947 { 948 return c != '#'; 949 } 950 951 952 static int32 953 char_offset_until_fn_false(const char* url, int32 len, int32 offset, 954 explode_char_match_fn fn) 955 { 956 while (offset < len && fn(url[offset])) 957 offset++; 958 959 return offset; 960 } 961 962 /* 963 * This function takes a URL in string-form and parses the components of the URL out. 964 */ 965 status_t 966 BUrl::_ExplodeUrlString(const BString& url) 967 { 968 _ResetFields(); 969 970 // RFC3986, Appendix C; the URL should not contain whitespace or delimiters 971 // by this point. 972 973 if (_ContainsDelimiter(url)) 974 return B_BAD_VALUE; 975 976 explode_url_parse_state state = EXPLODE_PROTOCOL; 977 int32 offset = 0; 978 int32 length = url.Length(); 979 const char *url_c = url.String(); 980 981 // The regexp is provided in RFC3986 (URI generic syntax), Appendix B 982 // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))? 983 // The ensuing logic attempts to simulate the behaviour of extracting the groups 984 // from the string without requiring a group-capable regex engine. 985 986 while (offset < length) { 987 switch (state) { 988 989 case EXPLODE_PROTOCOL: 990 { 991 int32 end_protocol = char_offset_until_fn_false(url_c, length, 992 offset, explode_is_protocol_char); 993 994 if (end_protocol < length) { 995 SetProtocol(BString(&url_c[offset], end_protocol - offset)); 996 state = EXPLODE_PROTOCOLTERMINATOR; 997 offset = end_protocol; 998 } else { 999 // No protocol was found, try parsing from the string 1000 // start, beginning with authority or path 1001 SetProtocol(""); 1002 offset = 0; 1003 state = EXPLODE_AUTHORITYORPATH; 1004 } 1005 break; 1006 } 1007 1008 case EXPLODE_PROTOCOLTERMINATOR: 1009 { 1010 if (url[offset] == ':') { 1011 offset++; 1012 } else { 1013 // No protocol was found, try parsing from the string 1014 // start, beginning with authority or path 1015 SetProtocol(""); 1016 offset = 0; 1017 } 1018 state = EXPLODE_AUTHORITYORPATH; 1019 break; 1020 } 1021 1022 case EXPLODE_AUTHORITYORPATH: 1023 { 1024 // The authority must start with //. If it isn't there, skip 1025 // to parsing the path. 1026 if (strncmp(&url_c[offset], "//", 2) == 0) { 1027 state = EXPLODE_AUTHORITY; 1028 offset += 2; 1029 } else { 1030 state = EXPLODE_PATH; 1031 } 1032 break; 1033 } 1034 1035 case EXPLODE_AUTHORITY: 1036 { 1037 int end_authority = char_offset_until_fn_false(url_c, length, 1038 offset, explode_is_authority_char); 1039 SetAuthority(BString(&url_c[offset], end_authority - offset)); 1040 state = EXPLODE_PATH; 1041 offset = end_authority; 1042 break; 1043 } 1044 1045 case EXPLODE_PATH: 1046 { 1047 int end_path = char_offset_until_fn_false(url_c, length, offset, 1048 explode_is_path_char); 1049 SetPath(BString(&url_c[offset], end_path - offset)); 1050 state = EXPLODE_REQUEST; 1051 offset = end_path; 1052 break; 1053 } 1054 1055 case EXPLODE_REQUEST: // query 1056 { 1057 if (url_c[offset] == '?') { 1058 offset++; 1059 int end_request = char_offset_until_fn_false(url_c, length, 1060 offset, explode_is_request_char); 1061 SetRequest(BString(&url_c[offset], end_request - offset)); 1062 offset = end_request; 1063 } 1064 state = EXPLODE_FRAGMENT; 1065 break; 1066 } 1067 1068 case EXPLODE_FRAGMENT: 1069 { 1070 if (url_c[offset] == '#') { 1071 offset++; 1072 SetFragment(BString(&url_c[offset], length - offset)); 1073 offset = length; 1074 } 1075 state = EXPLODE_COMPLETE; 1076 break; 1077 } 1078 1079 case EXPLODE_COMPLETE: 1080 // should never be reached - keeps the compiler happy 1081 break; 1082 1083 } 1084 } 1085 1086 return B_OK; 1087 } 1088 1089 1090 BString 1091 BUrl::_MergePath(const BString& relative) const 1092 { 1093 // This implements RFC3986, Section 5.2.3. 1094 if (HasAuthority() && fPath == "") { 1095 BString result("/"); 1096 result << relative; 1097 return result; 1098 } 1099 1100 BString result(fPath); 1101 result.Truncate(result.FindLast("/") + 1); 1102 result << relative; 1103 1104 return result; 1105 } 1106 1107 1108 // This sets the path without normalizing it. If fed with a path that has . or 1109 // .. segments, this would make the URL invalid. 1110 void 1111 BUrl::_SetPathUnsafe(const BString& path) 1112 { 1113 fPath = path; 1114 fHasPath = true; // RFC says an empty path is still a path 1115 fUrlStringValid = false; 1116 } 1117 1118 1119 enum authority_parse_state { 1120 AUTHORITY_USERNAME, 1121 AUTHORITY_PASSWORD, 1122 AUTHORITY_HOST, 1123 AUTHORITY_PORT, 1124 AUTHORITY_COMPLETE 1125 }; 1126 1127 1128 static bool 1129 authority_is_username_char(char c) 1130 { 1131 return !(c == ':' || c == '@'); 1132 } 1133 1134 1135 static bool 1136 authority_is_password_char(char c) 1137 { 1138 return !(c == '@'); 1139 } 1140 1141 1142 static bool 1143 authority_is_ipv6_host_char(char c) { 1144 return (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f') 1145 || (c >= '0' && c <= '9') || c == ':'; 1146 } 1147 1148 1149 static bool 1150 authority_is_host_char(char c) { 1151 return !(c == ':' || c == '/'); 1152 } 1153 1154 1155 static bool 1156 authority_is_port_char(char c) { 1157 return c >= '0' && c <= '9'; 1158 } 1159 1160 1161 void 1162 BUrl::SetAuthority(const BString& authority) 1163 { 1164 fAuthority = authority; 1165 1166 fUser.Truncate(0); 1167 fPassword.Truncate(0); 1168 fHost.Truncate(0); 1169 fPort = 0; 1170 fHasPort = false; 1171 fHasUserName = false; 1172 fHasPassword = false; 1173 1174 bool hasUsernamePassword = B_ERROR != fAuthority.FindFirst('@'); 1175 authority_parse_state state = AUTHORITY_USERNAME; 1176 int32 offset = 0; 1177 int32 length = authority.Length(); 1178 const char *authority_c = authority.String(); 1179 1180 while (AUTHORITY_COMPLETE != state && offset < length) { 1181 1182 switch (state) { 1183 1184 case AUTHORITY_USERNAME: 1185 { 1186 if (hasUsernamePassword) { 1187 int32 end_username = char_offset_until_fn_false( 1188 authority_c, length, offset, 1189 authority_is_username_char); 1190 1191 SetUserName(BString(&authority_c[offset], 1192 end_username - offset)); 1193 1194 state = AUTHORITY_PASSWORD; 1195 offset = end_username; 1196 } else { 1197 state = AUTHORITY_HOST; 1198 } 1199 break; 1200 } 1201 1202 case AUTHORITY_PASSWORD: 1203 { 1204 if (hasUsernamePassword && ':' == authority[offset]) { 1205 offset++; // move past the delimiter 1206 int32 end_password = char_offset_until_fn_false( 1207 authority_c, length, offset, 1208 authority_is_password_char); 1209 1210 SetPassword(BString(&authority_c[offset], 1211 end_password - offset)); 1212 1213 offset = end_password; 1214 } 1215 1216 // if the host was preceded by a username + password couple 1217 // then there will be an '@' delimiter to avoid. 1218 1219 if (authority_c[offset] == '@') { 1220 offset++; 1221 } 1222 1223 state = AUTHORITY_HOST; 1224 break; 1225 } 1226 1227 case AUTHORITY_HOST: 1228 { 1229 1230 // the host may be enclosed within brackets in order to express 1231 // an IPV6 address. 1232 1233 if (authority_c[offset] == '[') { 1234 int32 end_ipv6_host = char_offset_until_fn_false( 1235 authority_c, length, offset + 1, 1236 authority_is_ipv6_host_char); 1237 1238 if (authority_c[end_ipv6_host] == ']') { 1239 SetHost(BString(&authority_c[offset], 1240 (end_ipv6_host - offset) + 1)); 1241 state = AUTHORITY_PORT; 1242 offset = end_ipv6_host + 1; 1243 } 1244 } 1245 1246 // if an IPV6 host was not found. 1247 1248 if (AUTHORITY_HOST == state) { 1249 int32 end_host = char_offset_until_fn_false( 1250 authority_c, length, offset, authority_is_host_char); 1251 1252 SetHost(BString(&authority_c[offset], end_host - offset)); 1253 state = AUTHORITY_PORT; 1254 offset = end_host; 1255 } 1256 1257 break; 1258 } 1259 1260 case AUTHORITY_PORT: 1261 { 1262 if (authority_c[offset] == ':') { 1263 offset++; 1264 int32 end_port = char_offset_until_fn_false( 1265 authority_c, length, offset, authority_is_port_char); 1266 SetPort(atoi(&authority_c[offset])); 1267 offset = end_port; 1268 } 1269 1270 state = AUTHORITY_COMPLETE; 1271 1272 break; 1273 } 1274 1275 case AUTHORITY_COMPLETE: 1276 // should never be reached - keeps the compiler happy 1277 break; 1278 } 1279 } 1280 1281 // An empty authority is still an authority, making it possible to have 1282 // URLs such as file:///path/to/file. 1283 // TODO however, there is no way to unset the authority once it is set... 1284 // We may want to take a const char* parameter and allow NULL. 1285 fHasHost = true; 1286 } 1287 1288 1289 /*static*/ BString 1290 BUrl::_DoUrlEncodeChunk(const BString& chunk, bool strict, bool directory) 1291 { 1292 BString result; 1293 1294 for (int32 i = 0; i < chunk.Length(); i++) { 1295 if (_IsUnreserved(chunk[i]) 1296 || (directory && (chunk[i] == '/' || chunk[i] == '\\'))) { 1297 result << chunk[i]; 1298 } else { 1299 if (chunk[i] == ' ' && !strict) { 1300 result << '+'; 1301 // In non-strict mode, spaces are encoded by a plus sign 1302 } else { 1303 char hexString[5]; 1304 snprintf(hexString, 5, "%X", chunk[i]); 1305 1306 result << '%' << hexString; 1307 } 1308 } 1309 } 1310 1311 return result; 1312 } 1313 1314 1315 /*static*/ BString 1316 BUrl::_DoUrlDecodeChunk(const BString& chunk, bool strict) 1317 { 1318 BString result; 1319 1320 for (int32 i = 0; i < chunk.Length(); i++) { 1321 if (chunk[i] == '+' && !strict) 1322 result << ' '; 1323 else { 1324 char decoded = 0; 1325 char* out = NULL; 1326 char hexString[3]; 1327 1328 if (chunk[i] == '%' && i < chunk.Length() - 2 1329 && isxdigit(chunk[i + 1]) && isxdigit(chunk[i+2])) { 1330 hexString[0] = chunk[i + 1]; 1331 hexString[1] = chunk[i + 2]; 1332 hexString[2] = 0; 1333 decoded = (char)strtol(hexString, &out, 16); 1334 } 1335 1336 if (out == hexString + 2) { 1337 i += 2; 1338 result << decoded; 1339 } else 1340 result << chunk[i]; 1341 } 1342 } 1343 return result; 1344 } 1345 1346 1347 bool 1348 BUrl::_IsProtocolValid() 1349 { 1350 for (int8 index = 0; index < fProtocol.Length(); index++) { 1351 char c = fProtocol[index]; 1352 1353 if (index == 0 && !isalpha(c)) 1354 return false; 1355 else if (!isalnum(c) && c != '+' && c != '-' && c != '.') 1356 return false; 1357 } 1358 1359 return fProtocol.Length() > 0; 1360 } 1361 1362 1363 bool 1364 BUrl::_IsUnreserved(char c) 1365 { 1366 return isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~'; 1367 } 1368 1369 1370 bool 1371 BUrl::_IsGenDelim(char c) 1372 { 1373 return c == ':' || c == '/' || c == '?' || c == '#' || c == '[' 1374 || c == ']' || c == '@'; 1375 } 1376 1377 1378 bool 1379 BUrl::_IsSubDelim(char c) 1380 { 1381 return c == '!' || c == '$' || c == '&' || c == '\'' || c == '(' 1382 || c == ')' || c == '*' || c == '+' || c == ',' || c == ';' 1383 || c == '='; 1384 } 1385 1386 1387 BString 1388 BUrl::_UrlMimeType() const 1389 { 1390 BString mime; 1391 mime << "application/x-vnd.Be.URL." << fProtocol; 1392 1393 return BString(mime); 1394 } 1395