1 /*
2 * Copyright 2010-2018 Haiku Inc. All rights reserved.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 * Christophe Huriaux, c.huriaux@gmail.com
7 * Andrew Lindesay, apl@lindesay.co.nz
8 */
9
10
11 #include <Url.h>
12
13 #include <ctype.h>
14 #include <cstdio>
15 #include <cstdlib>
16 #include <new>
17
18 #include <MimeType.h>
19 #include <Roster.h>
20
21 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
22 #include <ICUWrapper.h>
23 #endif
24
25 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
26 #include <unicode/idna.h>
27 #include <unicode/stringpiece.h>
28 #endif
29
30
31 static const char* kArchivedUrl = "be:url string";
32
33 /*! These flags can be combined to control the parse process. */
34
35 const uint32 PARSE_NO_MASK_BIT = 0x00000000;
36 const uint32 PARSE_RAW_PATH_MASK_BIT = 0x00000001;
37
38
BUrl(const char * url)39 BUrl::BUrl(const char* url)
40 :
41 fUrlString(),
42 fProtocol(),
43 fUser(),
44 fPassword(),
45 fHost(),
46 fPort(0),
47 fPath(),
48 fRequest(),
49 fHasHost(false),
50 fHasFragment(false)
51 {
52 SetUrlString(url);
53 }
54
55
BUrl(BMessage * archive)56 BUrl::BUrl(BMessage* archive)
57 :
58 fUrlString(),
59 fProtocol(),
60 fUser(),
61 fPassword(),
62 fHost(),
63 fPort(0),
64 fPath(),
65 fRequest(),
66 fHasHost(false),
67 fHasFragment(false)
68 {
69 BString url;
70
71 if (archive->FindString(kArchivedUrl, &url) == B_OK)
72 SetUrlString(url);
73 else
74 _ResetFields();
75 }
76
77
BUrl(const BUrl & other)78 BUrl::BUrl(const BUrl& other)
79 :
80 BArchivable(),
81 fUrlString(),
82 fProtocol(other.fProtocol),
83 fUser(other.fUser),
84 fPassword(other.fPassword),
85 fHost(other.fHost),
86 fPort(other.fPort),
87 fPath(other.fPath),
88 fRequest(other.fRequest),
89 fFragment(other.fFragment),
90 fUrlStringValid(other.fUrlStringValid),
91 fAuthorityValid(other.fAuthorityValid),
92 fUserInfoValid(other.fUserInfoValid),
93 fHasProtocol(other.fHasProtocol),
94 fHasUserName(other.fHasUserName),
95 fHasPassword(other.fHasPassword),
96 fHasHost(other.fHasHost),
97 fHasPort(other.fHasPort),
98 fHasPath(other.fHasPath),
99 fHasRequest(other.fHasRequest),
100 fHasFragment(other.fHasFragment)
101 {
102 if (fUrlStringValid)
103 fUrlString = other.fUrlString;
104
105 if (fAuthorityValid)
106 fAuthority = other.fAuthority;
107
108 if (fUserInfoValid)
109 fUserInfo = other.fUserInfo;
110
111 }
112
113
BUrl(const BUrl & base,const BString & location)114 BUrl::BUrl(const BUrl& base, const BString& location)
115 :
116 fUrlString(),
117 fProtocol(),
118 fUser(),
119 fPassword(),
120 fHost(),
121 fPort(0),
122 fPath(),
123 fRequest(),
124 fAuthorityValid(false),
125 fUserInfoValid(false),
126 fHasUserName(false),
127 fHasPassword(false),
128 fHasHost(false),
129 fHasPort(false),
130 fHasFragment(false)
131 {
132 // This implements the algorithm in RFC3986, Section 5.2.
133
134 BUrl relative;
135 relative._ExplodeUrlString(location, PARSE_RAW_PATH_MASK_BIT);
136 // This parse will leave the path 'raw' so that it still carries any
137 // special sequences such as '..' and '.' in it. This way it can be
138 // later combined with the base.
139
140 if (relative.HasProtocol()) {
141 SetProtocol(relative.Protocol());
142 if (relative.HasAuthority())
143 SetAuthority(relative.Authority());
144 SetPath(relative.Path());
145 SetRequest(relative.Request());
146 } else {
147 if (relative.HasAuthority()) {
148 SetAuthority(relative.Authority());
149 SetPath(relative.Path());
150 SetRequest(relative.Request());
151 } else {
152 if (relative.Path().IsEmpty()) {
153 _SetPathUnsafe(base.Path());
154 if (relative.HasRequest())
155 SetRequest(relative.Request());
156 else
157 SetRequest(base.Request());
158 } else {
159 if (relative.Path()[0] == '/')
160 SetPath(relative.Path());
161 else {
162 BString path = base._MergePath(relative.Path());
163 SetPath(path);
164 }
165 SetRequest(relative.Request());
166 }
167
168 if (base.HasAuthority())
169 SetAuthority(base.Authority());
170 }
171 SetProtocol(base.Protocol());
172 }
173
174 if (relative.HasFragment())
175 SetFragment(relative.Fragment());
176 }
177
178
BUrl()179 BUrl::BUrl()
180 :
181 fUrlString(),
182 fProtocol(),
183 fUser(),
184 fPassword(),
185 fHost(),
186 fPort(0),
187 fPath(),
188 fRequest(),
189 fHasHost(false),
190 fHasFragment(false)
191 {
192 _ResetFields();
193 }
194
195
BUrl(const BPath & path)196 BUrl::BUrl(const BPath& path)
197 :
198 fUrlString(),
199 fProtocol(),
200 fUser(),
201 fPassword(),
202 fHost(),
203 fPort(0),
204 fPath(),
205 fRequest(),
206 fHasHost(false),
207 fHasFragment(false)
208 {
209 SetUrlString(UrlEncode(path.Path(), true, true));
210 SetProtocol("file");
211 }
212
213
~BUrl()214 BUrl::~BUrl()
215 {
216 }
217
218
219 // #pragma mark URL fields modifiers
220
221
222 BUrl&
SetUrlString(const BString & url)223 BUrl::SetUrlString(const BString& url)
224 {
225 _ExplodeUrlString(url, PARSE_NO_MASK_BIT);
226 return *this;
227 }
228
229
230 BUrl&
SetProtocol(const BString & protocol)231 BUrl::SetProtocol(const BString& protocol)
232 {
233 fProtocol = protocol;
234 fHasProtocol = !fProtocol.IsEmpty();
235 fUrlStringValid = false;
236 return *this;
237 }
238
239
240 BUrl&
SetUserName(const BString & user)241 BUrl::SetUserName(const BString& user)
242 {
243 fUser = user;
244 fHasUserName = !fUser.IsEmpty();
245 fUrlStringValid = false;
246 fAuthorityValid = false;
247 fUserInfoValid = false;
248 return *this;
249 }
250
251
252 BUrl&
SetPassword(const BString & password)253 BUrl::SetPassword(const BString& password)
254 {
255 fPassword = password;
256 fHasPassword = !fPassword.IsEmpty();
257 fUrlStringValid = false;
258 fAuthorityValid = false;
259 fUserInfoValid = false;
260 return *this;
261 }
262
263
264 BUrl&
SetHost(const BString & host)265 BUrl::SetHost(const BString& host)
266 {
267 fHost = host;
268 fHasHost = !fHost.IsEmpty();
269 fUrlStringValid = false;
270 fAuthorityValid = false;
271 return *this;
272 }
273
274
275 BUrl&
SetPort(int port)276 BUrl::SetPort(int port)
277 {
278 fPort = port;
279 fHasPort = (port != 0);
280 fUrlStringValid = false;
281 fAuthorityValid = false;
282 return *this;
283 }
284
285
286 void
_RemoveLastPathComponent(BString & path)287 BUrl::_RemoveLastPathComponent(BString& path)
288 {
289 int32 outputLastSlashIdx = path.FindLast('/');
290
291 if (outputLastSlashIdx == B_ERROR)
292 path.Truncate(0);
293 else
294 path.Truncate(outputLastSlashIdx);
295 }
296
297
298 BUrl&
SetPath(const BString & path)299 BUrl::SetPath(const BString& path)
300 {
301 // Implements RFC3986 section 5.2.4, "Remove dot segments"
302
303 // 1.
304 BString output;
305 BString input(path);
306
307 // 2.
308 while (!input.IsEmpty()) {
309 // 2.A.
310 if (input.StartsWith("./")) {
311 input.Remove(0, 2);
312 continue;
313 }
314
315 if (input.StartsWith("../")) {
316 input.Remove(0, 3);
317 continue;
318 }
319
320 // 2.B.
321 if (input.StartsWith("/./")) {
322 input.Remove(0, 2);
323 continue;
324 }
325
326 if (input == "/.") {
327 input.Remove(1, 1);
328 continue;
329 }
330
331 // 2.C.
332 if (input.StartsWith("/../")) {
333 input.Remove(0, 3);
334 _RemoveLastPathComponent(output);
335 continue;
336 }
337
338 if (input == "/..") {
339 input.Remove(1, 2);
340 _RemoveLastPathComponent(output);
341 continue;
342 }
343
344 // 2.D.
345 if (input == "." || input == "..") {
346 break;
347 }
348
349 if (input == "/.") {
350 input.Remove(1, 1);
351 continue;
352 }
353
354 // 2.E.
355 int slashpos = input.FindFirst('/', 1);
356 if (slashpos > 0) {
357 output.Append(input, slashpos);
358 input.Remove(0, slashpos);
359 } else {
360 output.Append(input);
361 break;
362 }
363 }
364
365 _SetPathUnsafe(output);
366 return *this;
367 }
368
369
370 BUrl&
SetRequest(const BString & request)371 BUrl::SetRequest(const BString& request)
372 {
373 fRequest = request;
374 fHasRequest = !fRequest.IsEmpty();
375 fUrlStringValid = false;
376 return *this;
377 }
378
379
380 BUrl&
SetFragment(const BString & fragment)381 BUrl::SetFragment(const BString& fragment)
382 {
383 fFragment = fragment;
384 fHasFragment = true;
385 fUrlStringValid = false;
386 return *this;
387 }
388
389
390 // #pragma mark URL fields access
391
392
393 const BString&
UrlString() const394 BUrl::UrlString() const
395 {
396 if (!fUrlStringValid) {
397 fUrlString.Truncate(0);
398
399 if (HasProtocol()) {
400 fUrlString << fProtocol << ':';
401 }
402
403 if (HasAuthority()) {
404 fUrlString << "//";
405 fUrlString << Authority();
406 }
407 fUrlString << Path();
408
409 if (HasRequest())
410 fUrlString << '?' << fRequest;
411
412 if (HasFragment())
413 fUrlString << '#' << fFragment;
414
415 fUrlStringValid = true;
416 }
417
418 return fUrlString;
419 }
420
421
422 const BString&
Protocol() const423 BUrl::Protocol() const
424 {
425 return fProtocol;
426 }
427
428
429 const BString&
UserName() const430 BUrl::UserName() const
431 {
432 return fUser;
433 }
434
435
436 const BString&
Password() const437 BUrl::Password() const
438 {
439 return fPassword;
440 }
441
442
443 const BString&
UserInfo() const444 BUrl::UserInfo() const
445 {
446 if (!fUserInfoValid) {
447 fUserInfo = fUser;
448
449 if (HasPassword())
450 fUserInfo << ':' << fPassword;
451
452 fUserInfoValid = true;
453 }
454
455 return fUserInfo;
456 }
457
458
459 const BString&
Host() const460 BUrl::Host() const
461 {
462 return fHost;
463 }
464
465
466 int
Port() const467 BUrl::Port() const
468 {
469 return fPort;
470 }
471
472
473 const BString&
Authority() const474 BUrl::Authority() const
475 {
476 if (!fAuthorityValid) {
477 fAuthority.Truncate(0);
478
479 if (HasUserInfo())
480 fAuthority << UserInfo() << '@';
481 fAuthority << Host();
482
483 if (HasPort())
484 fAuthority << ':' << fPort;
485
486 fAuthorityValid = true;
487 }
488 return fAuthority;
489 }
490
491
492 const BString&
Path() const493 BUrl::Path() const
494 {
495 return fPath;
496 }
497
498
499 const BString&
Request() const500 BUrl::Request() const
501 {
502 return fRequest;
503 }
504
505
506 const BString&
Fragment() const507 BUrl::Fragment() const
508 {
509 return fFragment;
510 }
511
512
513 // #pragma mark URL fields tests
514
515
516 bool
IsValid() const517 BUrl::IsValid() const
518 {
519 if (!fHasProtocol)
520 return false;
521
522 if (!_IsProtocolValid())
523 return false;
524
525 // it is possible that there can be an authority but no host.
526 // wierd://tea:tree@/x
527 if (HasHost() && !(fHost.IsEmpty() && HasAuthority()) && !_IsHostValid())
528 return false;
529
530 if (fProtocol == "http" || fProtocol == "https" || fProtocol == "ftp"
531 || fProtocol == "ipp" || fProtocol == "afp" || fProtocol == "telnet"
532 || fProtocol == "gopher" || fProtocol == "nntp" || fProtocol == "sftp"
533 || fProtocol == "finger" || fProtocol == "pop" || fProtocol == "imap") {
534 return HasHost() && !fHost.IsEmpty();
535 }
536
537 if (fProtocol == "file")
538 return fHasPath;
539
540 return true;
541 }
542
543
544 bool
HasProtocol() const545 BUrl::HasProtocol() const
546 {
547 return fHasProtocol;
548 }
549
550
551 bool
HasAuthority() const552 BUrl::HasAuthority() const
553 {
554 return fHasHost || fHasUserName;
555 }
556
557
558 bool
HasUserName() const559 BUrl::HasUserName() const
560 {
561 return fHasUserName;
562 }
563
564
565 bool
HasPassword() const566 BUrl::HasPassword() const
567 {
568 return fHasPassword;
569 }
570
571
572 bool
HasUserInfo() const573 BUrl::HasUserInfo() const
574 {
575 return fHasUserName || fHasPassword;
576 }
577
578
579 bool
HasHost() const580 BUrl::HasHost() const
581 {
582 return fHasHost;
583 }
584
585
586 bool
HasPort() const587 BUrl::HasPort() const
588 {
589 return fHasPort;
590 }
591
592
593 bool
HasPath() const594 BUrl::HasPath() const
595 {
596 return fHasPath;
597 }
598
599
600 bool
HasRequest() const601 BUrl::HasRequest() const
602 {
603 return fHasRequest;
604 }
605
606
607 bool
HasFragment() const608 BUrl::HasFragment() const
609 {
610 return fHasFragment;
611 }
612
613
614 // #pragma mark URL encoding/decoding of needed fields
615
616
617 void
UrlEncode(bool strict)618 BUrl::UrlEncode(bool strict)
619 {
620 fUser = _DoUrlEncodeChunk(fUser, strict);
621 fPassword = _DoUrlEncodeChunk(fPassword, strict);
622 fHost = _DoUrlEncodeChunk(fHost, strict);
623 fFragment = _DoUrlEncodeChunk(fFragment, strict);
624 fPath = _DoUrlEncodeChunk(fPath, strict, true);
625 }
626
627
628 void
UrlDecode(bool strict)629 BUrl::UrlDecode(bool strict)
630 {
631 fUser = _DoUrlDecodeChunk(fUser, strict);
632 fPassword = _DoUrlDecodeChunk(fPassword, strict);
633 fHost = _DoUrlDecodeChunk(fHost, strict);
634 fFragment = _DoUrlDecodeChunk(fFragment, strict);
635 fPath = _DoUrlDecodeChunk(fPath, strict);
636 }
637
638
639 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
640 status_t
IDNAToAscii()641 BUrl::IDNAToAscii()
642 {
643 UErrorCode err = U_ZERO_ERROR;
644 icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err);
645 icu::IDNAInfo info;
646
647 BString result;
648 BStringByteSink sink(&result);
649 converter->nameToASCII_UTF8(icu::StringPiece(fHost.String()), sink, info,
650 err);
651
652 delete converter;
653
654 if (U_FAILURE(err))
655 return B_ERROR;
656
657 fHost = result;
658 return B_OK;
659 }
660 #endif
661
662
663 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
664 status_t
IDNAToUnicode()665 BUrl::IDNAToUnicode()
666 {
667 UErrorCode err = U_ZERO_ERROR;
668 icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err);
669 icu::IDNAInfo info;
670
671 BString result;
672 BStringByteSink sink(&result);
673 converter->nameToUnicodeUTF8(icu::StringPiece(fHost.String()), sink, info,
674 err);
675
676 delete converter;
677
678 if (U_FAILURE(err))
679 return B_ERROR;
680
681 fHost = result;
682 return B_OK;
683 }
684 #endif
685
686
687 // #pragma mark - utility functionality
688
689
690 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
691 bool
HasPreferredApplication() const692 BUrl::HasPreferredApplication() const
693 {
694 BString appSignature = PreferredApplication();
695 BMimeType mime(appSignature.String());
696
697 if (appSignature.IFindFirst("application/") == 0
698 && mime.IsValid())
699 return true;
700
701 return false;
702 }
703 #endif
704
705
706 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
707 BString
PreferredApplication() const708 BUrl::PreferredApplication() const
709 {
710 BString appSignature;
711 BMimeType mime(_UrlMimeType().String());
712 mime.GetPreferredApp(appSignature.LockBuffer(B_MIME_TYPE_LENGTH));
713 appSignature.UnlockBuffer();
714
715 return BString(appSignature);
716 }
717 #endif
718
719
720 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
721 status_t
OpenWithPreferredApplication(bool onProblemAskUser) const722 BUrl::OpenWithPreferredApplication(bool onProblemAskUser) const
723 {
724 if (!IsValid())
725 return B_BAD_VALUE;
726
727 BString urlString = UrlString();
728 if (urlString.Length() > B_PATH_NAME_LENGTH) {
729 // TODO: BAlert
730 // if (onProblemAskUser)
731 // BAlert ... Too long URL!
732 #if DEBUG
733 fprintf(stderr, "URL too long");
734 #endif
735 return B_NAME_TOO_LONG;
736 }
737
738 char* argv[] = {
739 const_cast<char*>("BUrlInvokedApplication"),
740 const_cast<char*>(urlString.String()),
741 NULL
742 };
743
744 #if DEBUG
745 if (HasPreferredApplication())
746 printf("HasPreferredApplication() == true\n");
747 else
748 printf("HasPreferredApplication() == false\n");
749 #endif
750
751 status_t status = be_roster->Launch(_UrlMimeType().String(), 1, argv+1);
752 if (status != B_OK) {
753 #if DEBUG
754 fprintf(stderr, "Opening URL failed: %s\n", strerror(status));
755 #endif
756 }
757
758 return status;
759 }
760 #endif
761
762
763 // #pragma mark Url encoding/decoding of string
764
765
766 /*static*/ BString
UrlEncode(const BString & url,bool strict,bool directory)767 BUrl::UrlEncode(const BString& url, bool strict, bool directory)
768 {
769 return _DoUrlEncodeChunk(url, strict, directory);
770 }
771
772
773 /*static*/ BString
UrlDecode(const BString & url,bool strict)774 BUrl::UrlDecode(const BString& url, bool strict)
775 {
776 return _DoUrlDecodeChunk(url, strict);
777 }
778
779
780 // #pragma mark BArchivable members
781
782
783 status_t
Archive(BMessage * into,bool deep) const784 BUrl::Archive(BMessage* into, bool deep) const
785 {
786 status_t ret = BArchivable::Archive(into, deep);
787
788 if (ret == B_OK)
789 ret = into->AddString(kArchivedUrl, UrlString());
790
791 return ret;
792 }
793
794
795 /*static*/ BArchivable*
Instantiate(BMessage * archive)796 BUrl::Instantiate(BMessage* archive)
797 {
798 if (validate_instantiation(archive, "BUrl"))
799 return new(std::nothrow) BUrl(archive);
800 return NULL;
801 }
802
803
804 // #pragma mark URL comparison
805
806
807 bool
operator ==(BUrl & other) const808 BUrl::operator==(BUrl& other) const
809 {
810 UrlString();
811 other.UrlString();
812
813 return fUrlString == other.fUrlString;
814 }
815
816
817 bool
operator !=(BUrl & other) const818 BUrl::operator!=(BUrl& other) const
819 {
820 return !(*this == other);
821 }
822
823
824 // #pragma mark URL assignment
825
826
827 const BUrl&
operator =(const BUrl & other)828 BUrl::operator=(const BUrl& other)
829 {
830 fUrlStringValid = other.fUrlStringValid;
831 if (fUrlStringValid)
832 fUrlString = other.fUrlString;
833
834 fAuthorityValid = other.fAuthorityValid;
835 if (fAuthorityValid)
836 fAuthority = other.fAuthority;
837
838 fUserInfoValid = other.fUserInfoValid;
839 if (fUserInfoValid)
840 fUserInfo = other.fUserInfo;
841
842 fProtocol = other.fProtocol;
843 fUser = other.fUser;
844 fPassword = other.fPassword;
845 fHost = other.fHost;
846 fPort = other.fPort;
847 fPath = other.fPath;
848 fRequest = other.fRequest;
849 fFragment = other.fFragment;
850
851 fHasProtocol = other.fHasProtocol;
852 fHasUserName = other.fHasUserName;
853 fHasPassword = other.fHasPassword;
854 fHasHost = other.fHasHost;
855 fHasPort = other.fHasPort;
856 fHasPath = other.fHasPath;
857 fHasRequest = other.fHasRequest;
858 fHasFragment = other.fHasFragment;
859
860 return *this;
861 }
862
863
864 const BUrl&
operator =(const BString & string)865 BUrl::operator=(const BString& string)
866 {
867 SetUrlString(string);
868 return *this;
869 }
870
871
872 const BUrl&
operator =(const char * string)873 BUrl::operator=(const char* string)
874 {
875 SetUrlString(string);
876 return *this;
877 }
878
879
880 // #pragma mark URL to string conversion
881
882
operator const char*() const883 BUrl::operator const char*() const
884 {
885 return UrlString();
886 }
887
888
889 void
_ResetFields()890 BUrl::_ResetFields()
891 {
892 fHasProtocol = false;
893 fHasUserName = false;
894 fHasPassword = false;
895 fHasHost = false;
896 fHasPort = false;
897 fHasPath = false;
898 fHasRequest = false;
899 fHasFragment = false;
900
901 fProtocol.Truncate(0);
902 fUser.Truncate(0);
903 fPassword.Truncate(0);
904 fHost.Truncate(0);
905 fPort = 0;
906 fPath.Truncate(0);
907 fRequest.Truncate(0);
908 fFragment.Truncate(0);
909
910 // Force re-generation of these fields
911 fUrlStringValid = false;
912 fUserInfoValid = false;
913 fAuthorityValid = false;
914 }
915
916
917 bool
_ContainsDelimiter(const BString & url)918 BUrl::_ContainsDelimiter(const BString& url)
919 {
920 int32 len = url.Length();
921
922 for (int32 i = 0; i < len; i++) {
923 switch (url[i]) {
924 case ' ':
925 case '\n':
926 case '\t':
927 case '\r':
928 case '<':
929 case '>':
930 case '"':
931 return true;
932 }
933 }
934
935 return false;
936 }
937
938
939 enum explode_url_parse_state {
940 EXPLODE_PROTOCOL,
941 EXPLODE_PROTOCOLTERMINATOR,
942 EXPLODE_AUTHORITYORPATH,
943 EXPLODE_AUTHORITY,
944 EXPLODE_PATH,
945 EXPLODE_REQUEST, // query
946 EXPLODE_FRAGMENT,
947 EXPLODE_COMPLETE
948 };
949
950
951 typedef bool (*explode_char_match_fn)(char c);
952
953
954 static bool
explode_is_protocol_char(char c)955 explode_is_protocol_char(char c)
956 {
957 return isalnum(c) || c == '+' || c == '.' || c == '-';
958 }
959
960
961 static bool
explode_is_authority_char(char c)962 explode_is_authority_char(char c)
963 {
964 return !(c == '/' || c == '?' || c == '#');
965 }
966
967
968 static bool
explode_is_path_char(char c)969 explode_is_path_char(char c)
970 {
971 return !(c == '#' || c == '?');
972 }
973
974
975 static bool
explode_is_request_char(char c)976 explode_is_request_char(char c)
977 {
978 return c != '#';
979 }
980
981
982 static int32
char_offset_until_fn_false(const char * url,int32 len,int32 offset,explode_char_match_fn fn)983 char_offset_until_fn_false(const char* url, int32 len, int32 offset,
984 explode_char_match_fn fn)
985 {
986 while (offset < len && fn(url[offset]))
987 offset++;
988
989 return offset;
990 }
991
992 /*
993 * This function takes a URL in string-form and parses the components of the URL out.
994 */
995 status_t
_ExplodeUrlString(const BString & url,uint32 flags)996 BUrl::_ExplodeUrlString(const BString& url, uint32 flags)
997 {
998 _ResetFields();
999
1000 // RFC3986, Appendix C; the URL should not contain whitespace or delimiters
1001 // by this point.
1002
1003 if (_ContainsDelimiter(url))
1004 return B_BAD_VALUE;
1005
1006 explode_url_parse_state state = EXPLODE_PROTOCOL;
1007 int32 offset = 0;
1008 int32 length = url.Length();
1009 bool forceHasHost = false;
1010 const char *url_c = url.String();
1011
1012 // The regexp is provided in RFC3986 (URI generic syntax), Appendix B
1013 // ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?
1014 // The ensuing logic attempts to simulate the behaviour of extracting the groups
1015 // from the string without requiring a group-capable regex engine.
1016
1017 while (offset < length) {
1018 switch (state) {
1019
1020 case EXPLODE_PROTOCOL:
1021 {
1022 int32 end_protocol = char_offset_until_fn_false(url_c, length,
1023 offset, explode_is_protocol_char);
1024
1025 if (end_protocol < length) {
1026 SetProtocol(BString(&url_c[offset], end_protocol - offset));
1027 state = EXPLODE_PROTOCOLTERMINATOR;
1028 offset = end_protocol;
1029 } else {
1030 // No protocol was found, try parsing from the string
1031 // start, beginning with authority or path
1032 SetProtocol("");
1033 offset = 0;
1034 state = EXPLODE_AUTHORITYORPATH;
1035 }
1036 break;
1037 }
1038
1039 case EXPLODE_PROTOCOLTERMINATOR:
1040 {
1041 if (url[offset] == ':') {
1042 offset++;
1043 } else {
1044 // No protocol was found, try parsing from the string
1045 // start, beginning with authority or path
1046 SetProtocol("");
1047 offset = 0;
1048 }
1049 state = EXPLODE_AUTHORITYORPATH;
1050 break;
1051 }
1052
1053 case EXPLODE_AUTHORITYORPATH:
1054 {
1055 // The authority must start with //. If it isn't there, skip
1056 // to parsing the path.
1057 if (strncmp(&url_c[offset], "//", 2) == 0) {
1058 state = EXPLODE_AUTHORITY;
1059 // if we see the // then this would imply that a host is
1060 // to be rendered even if no host has been parsed.
1061 forceHasHost = true;
1062 offset += 2;
1063 } else {
1064 state = EXPLODE_PATH;
1065 }
1066 break;
1067 }
1068
1069 case EXPLODE_AUTHORITY:
1070 {
1071 int end_authority = char_offset_until_fn_false(url_c, length,
1072 offset, explode_is_authority_char);
1073 SetAuthority(BString(&url_c[offset], end_authority - offset));
1074 state = EXPLODE_PATH;
1075 offset = end_authority;
1076 break;
1077 }
1078
1079 case EXPLODE_PATH:
1080 {
1081 int end_path = char_offset_until_fn_false(url_c, length, offset,
1082 explode_is_path_char);
1083 BString path(&url_c[offset], end_path - offset);
1084
1085 if ((flags & PARSE_RAW_PATH_MASK_BIT) == 0)
1086 SetPath(path);
1087 else
1088 _SetPathUnsafe(path);
1089 state = EXPLODE_REQUEST;
1090 offset = end_path;
1091 break;
1092 }
1093
1094 case EXPLODE_REQUEST: // query
1095 {
1096 if (url_c[offset] == '?') {
1097 offset++;
1098 int end_request = char_offset_until_fn_false(url_c, length,
1099 offset, explode_is_request_char);
1100 SetRequest(BString(&url_c[offset], end_request - offset));
1101 offset = end_request;
1102 // if there is a "?" in the parse then it is clear that
1103 // there is a 'request' / query present regardless if there
1104 // are any valid key-value pairs.
1105 fHasRequest = true;
1106 }
1107 state = EXPLODE_FRAGMENT;
1108 break;
1109 }
1110
1111 case EXPLODE_FRAGMENT:
1112 {
1113 if (url_c[offset] == '#') {
1114 offset++;
1115 SetFragment(BString(&url_c[offset], length - offset));
1116 offset = length;
1117 }
1118 state = EXPLODE_COMPLETE;
1119 break;
1120 }
1121
1122 case EXPLODE_COMPLETE:
1123 // should never be reached - keeps the compiler happy
1124 break;
1125
1126 }
1127 }
1128
1129 if (forceHasHost)
1130 fHasHost = true;
1131
1132 return B_OK;
1133 }
1134
1135
1136 BString
_MergePath(const BString & relative) const1137 BUrl::_MergePath(const BString& relative) const
1138 {
1139 // This implements RFC3986, Section 5.2.3.
1140 if (HasAuthority() && fPath == "") {
1141 BString result("/");
1142 result << relative;
1143 return result;
1144 }
1145
1146 int32 lastSlashIndex = fPath.FindLast("/");
1147
1148 if (lastSlashIndex == B_ERROR)
1149 return relative;
1150
1151 BString result;
1152 result.SetTo(fPath, lastSlashIndex + 1);
1153 result << relative;
1154
1155 return result;
1156 }
1157
1158
1159 // This sets the path without normalizing it. If fed with a path that has . or
1160 // .. segments, this would make the URL invalid.
1161 void
_SetPathUnsafe(const BString & path)1162 BUrl::_SetPathUnsafe(const BString& path)
1163 {
1164 fPath = path;
1165 fHasPath = true; // RFC says an empty path is still a path
1166 fUrlStringValid = false;
1167 }
1168
1169
1170 enum authority_parse_state {
1171 AUTHORITY_USERNAME,
1172 AUTHORITY_PASSWORD,
1173 AUTHORITY_HOST,
1174 AUTHORITY_PORT,
1175 AUTHORITY_COMPLETE
1176 };
1177
1178 void
SetAuthority(const BString & authority)1179 BUrl::SetAuthority(const BString& authority)
1180 {
1181 fAuthority = authority;
1182
1183 fUser.Truncate(0);
1184 fPassword.Truncate(0);
1185 fHost.Truncate(0);
1186 fPort = 0;
1187 fHasPort = false;
1188 fHasUserName = false;
1189 fHasPassword = false;
1190
1191 bool hasUsernamePassword = B_ERROR != fAuthority.FindFirst('@');
1192 authority_parse_state state = AUTHORITY_USERNAME;
1193 int32 offset = 0;
1194 int32 length = authority.Length();
1195 const char *authority_c = authority.String();
1196
1197 while (AUTHORITY_COMPLETE != state && offset < length) {
1198
1199 switch (state) {
1200
1201 case AUTHORITY_USERNAME:
1202 {
1203 if (hasUsernamePassword) {
1204 int32 end_username = char_offset_until_fn_false(
1205 authority_c, length, offset, _IsUsernameChar);
1206
1207 SetUserName(BString(&authority_c[offset],
1208 end_username - offset));
1209
1210 state = AUTHORITY_PASSWORD;
1211 offset = end_username;
1212 } else {
1213 state = AUTHORITY_HOST;
1214 }
1215 break;
1216 }
1217
1218 case AUTHORITY_PASSWORD:
1219 {
1220 if (hasUsernamePassword && ':' == authority[offset]) {
1221 offset++; // move past the delimiter
1222 int32 end_password = char_offset_until_fn_false(
1223 authority_c, length, offset, _IsPasswordChar);
1224
1225 SetPassword(BString(&authority_c[offset],
1226 end_password - offset));
1227
1228 offset = end_password;
1229 }
1230
1231 // if the host was preceded by a username + password couple
1232 // then there will be an '@' delimiter to avoid.
1233
1234 if (authority_c[offset] == '@') {
1235 offset++;
1236 }
1237
1238 state = AUTHORITY_HOST;
1239 break;
1240 }
1241
1242 case AUTHORITY_HOST:
1243 {
1244
1245 // the host may be enclosed within brackets in order to express
1246 // an IPV6 address.
1247
1248 if (authority_c[offset] == '[') {
1249 int32 end_ipv6_host = char_offset_until_fn_false(
1250 authority_c, length, offset + 1, _IsIPV6Char);
1251
1252 if (authority_c[end_ipv6_host] == ']') {
1253 SetHost(BString(&authority_c[offset],
1254 (end_ipv6_host - offset) + 1));
1255 state = AUTHORITY_PORT;
1256 offset = end_ipv6_host + 1;
1257 }
1258 }
1259
1260 // if an IPV6 host was not found.
1261
1262 if (AUTHORITY_HOST == state) {
1263 int32 end_host = char_offset_until_fn_false(
1264 authority_c, length, offset, _IsHostChar);
1265
1266 SetHost(BString(&authority_c[offset], end_host - offset));
1267 state = AUTHORITY_PORT;
1268 offset = end_host;
1269 }
1270
1271 break;
1272 }
1273
1274 case AUTHORITY_PORT:
1275 {
1276 if (authority_c[offset] == ':') {
1277 offset++;
1278 int32 end_port = char_offset_until_fn_false(
1279 authority_c, length, offset, _IsPortChar);
1280 SetPort(atoi(&authority_c[offset]));
1281 offset = end_port;
1282 }
1283
1284 state = AUTHORITY_COMPLETE;
1285
1286 break;
1287 }
1288
1289 case AUTHORITY_COMPLETE:
1290 // should never be reached - keeps the compiler happy
1291 break;
1292 }
1293 }
1294
1295 // An empty authority is still an authority, making it possible to have
1296 // URLs such as file:///path/to/file.
1297 // TODO however, there is no way to unset the authority once it is set...
1298 // We may want to take a const char* parameter and allow NULL.
1299 fHasHost = true;
1300 }
1301
1302
1303 /*static*/ BString
_DoUrlEncodeChunk(const BString & chunk,bool strict,bool directory)1304 BUrl::_DoUrlEncodeChunk(const BString& chunk, bool strict, bool directory)
1305 {
1306 BString result;
1307
1308 for (int32 i = 0; i < chunk.Length(); i++) {
1309 if (_IsUnreserved(chunk[i])
1310 || (directory && (chunk[i] == '/' || chunk[i] == '\\'))) {
1311 result << chunk[i];
1312 } else {
1313 if (chunk[i] == ' ' && !strict) {
1314 result << '+';
1315 // In non-strict mode, spaces are encoded by a plus sign
1316 } else {
1317 char hexString[5];
1318 snprintf(hexString, 5, "%X", chunk[i]);
1319
1320 result << '%' << hexString;
1321 }
1322 }
1323 }
1324
1325 return result;
1326 }
1327
1328
1329 /*static*/ BString
_DoUrlDecodeChunk(const BString & chunk,bool strict)1330 BUrl::_DoUrlDecodeChunk(const BString& chunk, bool strict)
1331 {
1332 BString result;
1333
1334 for (int32 i = 0; i < chunk.Length(); i++) {
1335 if (chunk[i] == '+' && !strict)
1336 result << ' ';
1337 else {
1338 char decoded = 0;
1339 char* out = NULL;
1340 char hexString[3];
1341
1342 if (chunk[i] == '%' && i < chunk.Length() - 2
1343 && isxdigit(chunk[i + 1]) && isxdigit(chunk[i+2])) {
1344 hexString[0] = chunk[i + 1];
1345 hexString[1] = chunk[i + 2];
1346 hexString[2] = 0;
1347 decoded = (char)strtol(hexString, &out, 16);
1348 }
1349
1350 if (out == hexString + 2) {
1351 i += 2;
1352 result << decoded;
1353 } else
1354 result << chunk[i];
1355 }
1356 }
1357 return result;
1358 }
1359
1360
1361 bool
_IsHostIPV6Valid(size_t offset,int32 length) const1362 BUrl::_IsHostIPV6Valid(size_t offset, int32 length) const
1363 {
1364 for (int32 i = 0; i < length; i++) {
1365 char c = fHost[offset + i];
1366 if (!_IsIPV6Char(c))
1367 return false;
1368 }
1369
1370 return length > 0;
1371 }
1372
1373
1374 bool
_IsHostValid() const1375 BUrl::_IsHostValid() const
1376 {
1377 if (fHost.StartsWith("[") && fHost.EndsWith("]"))
1378 return _IsHostIPV6Valid(1, fHost.Length() - 2);
1379
1380 bool lastWasDot = false;
1381
1382 for (int32 i = 0; i < fHost.Length(); i++) {
1383 char c = fHost[i];
1384
1385 if (c == '.') {
1386 if (lastWasDot || i == 0)
1387 return false;
1388 lastWasDot = true;
1389 } else {
1390 lastWasDot = false;
1391 }
1392
1393 if (!_IsHostChar(c) && c != '.') {
1394 // the underscore is technically not allowed, but occurs sometimes
1395 // in the wild.
1396 return false;
1397 }
1398 }
1399
1400 return true;
1401 }
1402
1403
1404 bool
_IsProtocolValid() const1405 BUrl::_IsProtocolValid() const
1406 {
1407 for (int8 index = 0; index < fProtocol.Length(); index++) {
1408 char c = fProtocol[index];
1409
1410 if (index == 0 && !isalpha(c))
1411 return false;
1412 else if (!isalnum(c) && c != '+' && c != '-' && c != '.')
1413 return false;
1414 }
1415
1416 return !fProtocol.IsEmpty();
1417 }
1418
1419
1420 bool
_IsUnreserved(char c)1421 BUrl::_IsUnreserved(char c)
1422 {
1423 return isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~';
1424 }
1425
1426
1427 bool
_IsGenDelim(char c)1428 BUrl::_IsGenDelim(char c)
1429 {
1430 return c == ':' || c == '/' || c == '?' || c == '#' || c == '['
1431 || c == ']' || c == '@';
1432 }
1433
1434
1435 bool
_IsSubDelim(char c)1436 BUrl::_IsSubDelim(char c)
1437 {
1438 return c == '!' || c == '$' || c == '&' || c == '\'' || c == '('
1439 || c == ')' || c == '*' || c == '+' || c == ',' || c == ';'
1440 || c == '=';
1441 }
1442
1443
1444 bool
_IsUsernameChar(char c)1445 BUrl::_IsUsernameChar(char c)
1446 {
1447 return !(c == ':' || c == '@');
1448 }
1449
1450
1451 bool
_IsPasswordChar(char c)1452 BUrl::_IsPasswordChar(char c)
1453 {
1454 return !(c == '@');
1455 }
1456
1457
1458 bool
_IsHostChar(char c)1459 BUrl::_IsHostChar(char c)
1460 {
1461 return ((uint8) c) > 127 || isalnum(c) || c == '-' || c == '_' || c == '.'
1462 || c == '%';
1463 }
1464
1465
1466 bool
_IsPortChar(char c)1467 BUrl::_IsPortChar(char c)
1468 {
1469 return isdigit(c);
1470 }
1471
1472
1473 bool
_IsIPV6Char(char c)1474 BUrl::_IsIPV6Char(char c)
1475 {
1476 return c == ':' || isxdigit(c);
1477 }
1478
1479
1480 BString
_UrlMimeType() const1481 BUrl::_UrlMimeType() const
1482 {
1483 BString mime;
1484 mime << "application/x-vnd.Be.URL." << fProtocol;
1485
1486 return BString(mime);
1487 }
1488