xref: /haiku/src/kits/support/Url.cpp (revision 03e5dd5273ae9bcef15db099630c4c8cf8b7bbdc)
1 /*
2  * Copyright 2010-2018 Haiku Inc. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Christophe Huriaux, c.huriaux@gmail.com
7  *		Andrew Lindesay, apl@lindesay.co.nz
8  */
9 
10 
11 #include <Url.h>
12 
13 #include <ctype.h>
14 #include <cstdio>
15 #include <cstdlib>
16 #include <new>
17 
18 #include <MimeType.h>
19 #include <Roster.h>
20 
21 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
22 	#include <ICUWrapper.h>
23 #endif
24 
25 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
26 	#include <unicode/idna.h>
27 	#include <unicode/stringpiece.h>
28 #endif
29 
30 
31 static const char* kArchivedUrl = "be:url string";
32 
33 
34 BUrl::BUrl(const char* url)
35 	:
36 	fUrlString(),
37 	fProtocol(),
38 	fUser(),
39 	fPassword(),
40 	fHost(),
41 	fPort(0),
42 	fPath(),
43 	fRequest(),
44 	fHasHost(false),
45 	fHasFragment(false)
46 {
47 	SetUrlString(url);
48 }
49 
50 
51 BUrl::BUrl(BMessage* archive)
52 	:
53 	fUrlString(),
54 	fProtocol(),
55 	fUser(),
56 	fPassword(),
57 	fHost(),
58 	fPort(0),
59 	fPath(),
60 	fRequest(),
61 	fHasHost(false),
62 	fHasFragment(false)
63 {
64 	BString url;
65 
66 	if (archive->FindString(kArchivedUrl, &url) == B_OK)
67 		SetUrlString(url);
68 	else
69 		_ResetFields();
70 }
71 
72 
73 BUrl::BUrl(const BUrl& other)
74 	:
75 	BArchivable(),
76 	fUrlString(),
77 	fProtocol(other.fProtocol),
78 	fUser(other.fUser),
79 	fPassword(other.fPassword),
80 	fHost(other.fHost),
81 	fPort(other.fPort),
82 	fPath(other.fPath),
83 	fRequest(other.fRequest),
84 	fFragment(other.fFragment),
85 	fUrlStringValid(other.fUrlStringValid),
86 	fAuthorityValid(other.fAuthorityValid),
87 	fUserInfoValid(other.fUserInfoValid),
88 	fHasProtocol(other.fHasProtocol),
89 	fHasUserName(other.fHasUserName),
90 	fHasPassword(other.fHasPassword),
91 	fHasHost(other.fHasHost),
92 	fHasPort(other.fHasPort),
93 	fHasPath(other.fHasPath),
94 	fHasRequest(other.fHasRequest),
95 	fHasFragment(other.fHasFragment)
96 {
97 	if (fUrlStringValid)
98 		fUrlString = other.fUrlString;
99 
100 	if (fAuthorityValid)
101 		fAuthority = other.fAuthority;
102 
103 	if (fUserInfoValid)
104 		fUserInfo = other.fUserInfo;
105 
106 }
107 
108 
109 BUrl::BUrl(const BUrl& base, const BString& location)
110 	:
111 	fUrlString(),
112 	fProtocol(),
113 	fUser(),
114 	fPassword(),
115 	fHost(),
116 	fPort(0),
117 	fPath(),
118 	fRequest(),
119 	fAuthorityValid(false),
120 	fUserInfoValid(false),
121 	fHasUserName(false),
122 	fHasPassword(false),
123 	fHasHost(false),
124 	fHasPort(false),
125 	fHasFragment(false)
126 {
127 	// This implements the algorithm in RFC3986, Section 5.2.
128 
129 	BUrl relative(location);
130 	if (relative.HasProtocol()) {
131 		SetProtocol(relative.Protocol());
132 		if (relative.HasAuthority())
133 			SetAuthority(relative.Authority());
134 		SetPath(relative.Path());
135 		SetRequest(relative.Request());
136 	} else {
137 		if (relative.HasAuthority()) {
138 			SetAuthority(relative.Authority());
139 			SetPath(relative.Path());
140 			SetRequest(relative.Request());
141 		} else {
142 			if (relative.Path().IsEmpty()) {
143 				_SetPathUnsafe(base.Path());
144 				if (relative.HasRequest())
145 					SetRequest(relative.Request());
146 				else
147 					SetRequest(base.Request());
148 			} else {
149 				if (relative.Path()[0] == '/')
150 					SetPath(relative.Path());
151 				else {
152 					BString path = base._MergePath(relative.Path());
153 					SetPath(path);
154 				}
155 				SetRequest(relative.Request());
156 			}
157 
158 			if (base.HasAuthority())
159 				SetAuthority(base.Authority());
160 		}
161 		SetProtocol(base.Protocol());
162 	}
163 
164 	if (relative.HasFragment())
165 		SetFragment(relative.Fragment());
166 }
167 
168 
169 BUrl::BUrl()
170 	:
171 	fUrlString(),
172 	fProtocol(),
173 	fUser(),
174 	fPassword(),
175 	fHost(),
176 	fPort(0),
177 	fPath(),
178 	fRequest(),
179 	fHasHost(false),
180 	fHasFragment(false)
181 {
182 	_ResetFields();
183 }
184 
185 
186 BUrl::BUrl(const BPath& path)
187 	:
188 	fUrlString(),
189 	fProtocol(),
190 	fUser(),
191 	fPassword(),
192 	fHost(),
193 	fPort(0),
194 	fPath(),
195 	fRequest(),
196 	fHasHost(false),
197 	fHasFragment(false)
198 {
199 	SetUrlString(UrlEncode(path.Path(), true, true));
200 	SetProtocol("file");
201 }
202 
203 
204 BUrl::~BUrl()
205 {
206 }
207 
208 
209 // #pragma mark URL fields modifiers
210 
211 
212 BUrl&
213 BUrl::SetUrlString(const BString& url)
214 {
215 	_ExplodeUrlString(url);
216 	return *this;
217 }
218 
219 
220 BUrl&
221 BUrl::SetProtocol(const BString& protocol)
222 {
223 	fProtocol = protocol;
224 	fHasProtocol = !fProtocol.IsEmpty();
225 	fUrlStringValid = false;
226 	return *this;
227 }
228 
229 
230 BUrl&
231 BUrl::SetUserName(const BString& user)
232 {
233 	fUser = user;
234 	fHasUserName = !fUser.IsEmpty();
235 	fUrlStringValid = false;
236 	fAuthorityValid = false;
237 	fUserInfoValid = false;
238 	return *this;
239 }
240 
241 
242 BUrl&
243 BUrl::SetPassword(const BString& password)
244 {
245 	fPassword = password;
246 	fHasPassword = !fPassword.IsEmpty();
247 	fUrlStringValid = false;
248 	fAuthorityValid = false;
249 	fUserInfoValid = false;
250 	return *this;
251 }
252 
253 
254 BUrl&
255 BUrl::SetHost(const BString& host)
256 {
257 	fHost = host;
258 	fHasHost = !fHost.IsEmpty();
259 	fUrlStringValid = false;
260 	fAuthorityValid = false;
261 	return *this;
262 }
263 
264 
265 BUrl&
266 BUrl::SetPort(int port)
267 {
268 	fPort = port;
269 	fHasPort = (port != 0);
270 	fUrlStringValid = false;
271 	fAuthorityValid = false;
272 	return *this;
273 }
274 
275 
276 BUrl&
277 BUrl::SetPath(const BString& path)
278 {
279 	// Implements RFC3986 section 5.2.4, "Remove dot segments"
280 
281 	// 1.
282 	BString output;
283 	BString input(path);
284 
285 	// 2.
286 	while (!input.IsEmpty()) {
287 		// 2.A.
288 		if (input.StartsWith("./")) {
289 			input.Remove(0, 2);
290 			continue;
291 		}
292 
293 		if (input.StartsWith("../")) {
294 			input.Remove(0, 3);
295 			continue;
296 		}
297 
298 		// 2.B.
299 		if (input.StartsWith("/./")) {
300 			input.Remove(0, 2);
301 			continue;
302 		}
303 
304 		if (input == "/.") {
305 			input.Remove(1, 1);
306 			continue;
307 		}
308 
309 		// 2.C.
310 		if (input.StartsWith("/../")) {
311 			input.Remove(0, 3);
312 			output.Truncate(output.FindLast('/'));
313 			continue;
314 		}
315 
316 		if (input == "/..") {
317 			input.Remove(1, 2);
318 			output.Truncate(output.FindLast('/'));
319 			continue;
320 		}
321 
322 		// 2.D.
323 		if (input == "." || input == "..") {
324 			break;
325 		}
326 
327 		if (input == "/.") {
328 			input.Remove(1, 1);
329 			continue;
330 		}
331 
332 		// 2.E.
333 		int slashpos = input.FindFirst('/', 1);
334 		if (slashpos > 0) {
335 			output.Append(input, slashpos);
336 			input.Remove(0, slashpos);
337 		} else {
338 			output.Append(input);
339 			break;
340 		}
341 	}
342 
343 	_SetPathUnsafe(output);
344 	return *this;
345 }
346 
347 
348 BUrl&
349 BUrl::SetRequest(const BString& request)
350 {
351 	fRequest = request;
352 	fHasRequest = !fRequest.IsEmpty();
353 	fUrlStringValid = false;
354 	return *this;
355 }
356 
357 
358 BUrl&
359 BUrl::SetFragment(const BString& fragment)
360 {
361 	fFragment = fragment;
362 	fHasFragment = true;
363 	fUrlStringValid = false;
364 	return *this;
365 }
366 
367 
368 // #pragma mark URL fields access
369 
370 
371 const BString&
372 BUrl::UrlString() const
373 {
374 	if (!fUrlStringValid) {
375 		fUrlString.Truncate(0);
376 
377 		if (HasProtocol()) {
378 			fUrlString << fProtocol << ':';
379 		}
380 
381 		if (HasAuthority()) {
382 			fUrlString << "//";
383 			fUrlString << Authority();
384 		}
385 		fUrlString << Path();
386 
387 		if (HasRequest())
388 			fUrlString << '?' << fRequest;
389 
390 		if (HasFragment())
391 			fUrlString << '#' << fFragment;
392 
393 		fUrlStringValid = true;
394 	}
395 
396 	return fUrlString;
397 }
398 
399 
400 const BString&
401 BUrl::Protocol() const
402 {
403 	return fProtocol;
404 }
405 
406 
407 const BString&
408 BUrl::UserName() const
409 {
410 	return fUser;
411 }
412 
413 
414 const BString&
415 BUrl::Password() const
416 {
417 	return fPassword;
418 }
419 
420 
421 const BString&
422 BUrl::UserInfo() const
423 {
424 	if (!fUserInfoValid) {
425 		fUserInfo = fUser;
426 
427 		if (HasPassword())
428 			fUserInfo << ':' << fPassword;
429 
430 		fUserInfoValid = true;
431 	}
432 
433 	return fUserInfo;
434 }
435 
436 
437 const BString&
438 BUrl::Host() const
439 {
440 	return fHost;
441 }
442 
443 
444 int
445 BUrl::Port() const
446 {
447 	return fPort;
448 }
449 
450 
451 const BString&
452 BUrl::Authority() const
453 {
454 	if (!fAuthorityValid) {
455 		fAuthority.Truncate(0);
456 
457 		if (HasUserInfo())
458 			fAuthority << UserInfo() << '@';
459 		fAuthority << Host();
460 
461 		if (HasPort())
462 			fAuthority << ':' << fPort;
463 
464 		fAuthorityValid = true;
465 	}
466 	return fAuthority;
467 }
468 
469 
470 const BString&
471 BUrl::Path() const
472 {
473 	return fPath;
474 }
475 
476 
477 const BString&
478 BUrl::Request() const
479 {
480 	return fRequest;
481 }
482 
483 
484 const BString&
485 BUrl::Fragment() const
486 {
487 	return fFragment;
488 }
489 
490 
491 // #pragma mark URL fields tests
492 
493 
494 bool
495 BUrl::IsValid() const
496 {
497 	if (!fHasProtocol)
498 		return false;
499 
500 	if (!_IsProtocolValid())
501 		return false;
502 
503 	// it is possible that there can be an authority but no host.
504 	// wierd://tea:tree@/x
505 	if (HasHost() && !(fHost.IsEmpty() && HasAuthority()) && !_IsHostValid())
506 		return false;
507 
508 	if (fProtocol == "http" || fProtocol == "https" || fProtocol == "ftp"
509 		|| fProtocol == "ipp" || fProtocol == "afp" || fProtocol == "telnet"
510 		|| fProtocol == "gopher" || fProtocol == "nntp" || fProtocol == "sftp"
511 		|| fProtocol == "finger" || fProtocol == "pop" || fProtocol == "imap") {
512 		return HasHost() && !fHost.IsEmpty();
513 	}
514 
515 	if (fProtocol == "file")
516 		return fHasPath;
517 
518 	return true;
519 }
520 
521 
522 bool
523 BUrl::HasProtocol() const
524 {
525 	return fHasProtocol;
526 }
527 
528 
529 bool
530 BUrl::HasAuthority() const
531 {
532 	return fHasHost || fHasUserName;
533 }
534 
535 
536 bool
537 BUrl::HasUserName() const
538 {
539 	return fHasUserName;
540 }
541 
542 
543 bool
544 BUrl::HasPassword() const
545 {
546 	return fHasPassword;
547 }
548 
549 
550 bool
551 BUrl::HasUserInfo() const
552 {
553 	return fHasUserName || fHasPassword;
554 }
555 
556 
557 bool
558 BUrl::HasHost() const
559 {
560 	return fHasHost;
561 }
562 
563 
564 bool
565 BUrl::HasPort() const
566 {
567 	return fHasPort;
568 }
569 
570 
571 bool
572 BUrl::HasPath() const
573 {
574 	return fHasPath;
575 }
576 
577 
578 bool
579 BUrl::HasRequest() const
580 {
581 	return fHasRequest;
582 }
583 
584 
585 bool
586 BUrl::HasFragment() const
587 {
588 	return fHasFragment;
589 }
590 
591 
592 // #pragma mark URL encoding/decoding of needed fields
593 
594 
595 void
596 BUrl::UrlEncode(bool strict)
597 {
598 	fUser = _DoUrlEncodeChunk(fUser, strict);
599 	fPassword = _DoUrlEncodeChunk(fPassword, strict);
600 	fHost = _DoUrlEncodeChunk(fHost, strict);
601 	fFragment = _DoUrlEncodeChunk(fFragment, strict);
602 	fPath = _DoUrlEncodeChunk(fPath, strict, true);
603 }
604 
605 
606 void
607 BUrl::UrlDecode(bool strict)
608 {
609 	fUser = _DoUrlDecodeChunk(fUser, strict);
610 	fPassword = _DoUrlDecodeChunk(fPassword, strict);
611 	fHost = _DoUrlDecodeChunk(fHost, strict);
612 	fFragment = _DoUrlDecodeChunk(fFragment, strict);
613 	fPath = _DoUrlDecodeChunk(fPath, strict);
614 }
615 
616 
617 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
618 status_t
619 BUrl::IDNAToAscii()
620 {
621 	UErrorCode err = U_ZERO_ERROR;
622 	icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err);
623 	icu::IDNAInfo info;
624 
625 	BString result;
626 	BStringByteSink sink(&result);
627 	converter->nameToASCII_UTF8(icu::StringPiece(fHost.String()), sink, info,
628 		err);
629 
630 	delete converter;
631 
632 	if (U_FAILURE(err))
633 		return B_ERROR;
634 
635 	fHost = result;
636 	return B_OK;
637 }
638 #endif
639 
640 
641 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
642 status_t
643 BUrl::IDNAToUnicode()
644 {
645 	UErrorCode err = U_ZERO_ERROR;
646 	icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err);
647 	icu::IDNAInfo info;
648 
649 	BString result;
650 	BStringByteSink sink(&result);
651 	converter->nameToUnicodeUTF8(icu::StringPiece(fHost.String()), sink, info,
652 		err);
653 
654 	delete converter;
655 
656 	if (U_FAILURE(err))
657 		return B_ERROR;
658 
659 	fHost = result;
660 	return B_OK;
661 }
662 #endif
663 
664 
665 // #pragma mark - utility functionality
666 
667 
668 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
669 bool
670 BUrl::HasPreferredApplication() const
671 {
672 	BString appSignature = PreferredApplication();
673 	BMimeType mime(appSignature.String());
674 
675 	if (appSignature.IFindFirst("application/") == 0
676 		&& mime.IsValid())
677 		return true;
678 
679 	return false;
680 }
681 #endif
682 
683 
684 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
685 BString
686 BUrl::PreferredApplication() const
687 {
688 	BString appSignature;
689 	BMimeType mime(_UrlMimeType().String());
690 	mime.GetPreferredApp(appSignature.LockBuffer(B_MIME_TYPE_LENGTH));
691 	appSignature.UnlockBuffer();
692 
693 	return BString(appSignature);
694 }
695 #endif
696 
697 
698 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
699 status_t
700 BUrl::OpenWithPreferredApplication(bool onProblemAskUser) const
701 {
702 	if (!IsValid())
703 		return B_BAD_VALUE;
704 
705 	BString urlString = UrlString();
706 	if (urlString.Length() > B_PATH_NAME_LENGTH) {
707 		// TODO: BAlert
708 		//	if (onProblemAskUser)
709 		//		BAlert ... Too long URL!
710 #if DEBUG
711 		fprintf(stderr, "URL too long");
712 #endif
713 		return B_NAME_TOO_LONG;
714 	}
715 
716 	char* argv[] = {
717 		const_cast<char*>("BUrlInvokedApplication"),
718 		const_cast<char*>(urlString.String()),
719 		NULL
720 	};
721 
722 #if DEBUG
723 	if (HasPreferredApplication())
724 		printf("HasPreferredApplication() == true\n");
725 	else
726 		printf("HasPreferredApplication() == false\n");
727 #endif
728 
729 	status_t status = be_roster->Launch(_UrlMimeType().String(), 1, argv+1);
730 	if (status != B_OK) {
731 #if DEBUG
732 		fprintf(stderr, "Opening URL failed: %s\n", strerror(status));
733 #endif
734 	}
735 
736 	return status;
737 }
738 #endif
739 
740 
741 // #pragma mark Url encoding/decoding of string
742 
743 
744 /*static*/ BString
745 BUrl::UrlEncode(const BString& url, bool strict, bool directory)
746 {
747 	return _DoUrlEncodeChunk(url, strict, directory);
748 }
749 
750 
751 /*static*/ BString
752 BUrl::UrlDecode(const BString& url, bool strict)
753 {
754 	return _DoUrlDecodeChunk(url, strict);
755 }
756 
757 
758 // #pragma mark BArchivable members
759 
760 
761 status_t
762 BUrl::Archive(BMessage* into, bool deep) const
763 {
764 	status_t ret = BArchivable::Archive(into, deep);
765 
766 	if (ret == B_OK)
767 		ret = into->AddString(kArchivedUrl, UrlString());
768 
769 	return ret;
770 }
771 
772 
773 /*static*/ BArchivable*
774 BUrl::Instantiate(BMessage* archive)
775 {
776 	if (validate_instantiation(archive, "BUrl"))
777 		return new(std::nothrow) BUrl(archive);
778 	return NULL;
779 }
780 
781 
782 // #pragma mark URL comparison
783 
784 
785 bool
786 BUrl::operator==(BUrl& other) const
787 {
788 	UrlString();
789 	other.UrlString();
790 
791 	return fUrlString == other.fUrlString;
792 }
793 
794 
795 bool
796 BUrl::operator!=(BUrl& other) const
797 {
798 	return !(*this == other);
799 }
800 
801 
802 // #pragma mark URL assignment
803 
804 
805 const BUrl&
806 BUrl::operator=(const BUrl& other)
807 {
808 	fUrlStringValid = other.fUrlStringValid;
809 	if (fUrlStringValid)
810 		fUrlString = other.fUrlString;
811 
812 	fAuthorityValid = other.fAuthorityValid;
813 	if (fAuthorityValid)
814 		fAuthority = other.fAuthority;
815 
816 	fUserInfoValid = other.fUserInfoValid;
817 	if (fUserInfoValid)
818 		fUserInfo = other.fUserInfo;
819 
820 	fProtocol = other.fProtocol;
821 	fUser = other.fUser;
822 	fPassword = other.fPassword;
823 	fHost = other.fHost;
824 	fPort = other.fPort;
825 	fPath = other.fPath;
826 	fRequest = other.fRequest;
827 	fFragment = other.fFragment;
828 
829 	fHasProtocol = other.fHasProtocol;
830 	fHasUserName = other.fHasUserName;
831 	fHasPassword = other.fHasPassword;
832 	fHasHost = other.fHasHost;
833 	fHasPort = other.fHasPort;
834 	fHasPath = other.fHasPath;
835 	fHasRequest = other.fHasRequest;
836 	fHasFragment = other.fHasFragment;
837 
838 	return *this;
839 }
840 
841 
842 const BUrl&
843 BUrl::operator=(const BString& string)
844 {
845 	SetUrlString(string);
846 	return *this;
847 }
848 
849 
850 const BUrl&
851 BUrl::operator=(const char* string)
852 {
853 	SetUrlString(string);
854 	return *this;
855 }
856 
857 
858 // #pragma mark URL to string conversion
859 
860 
861 BUrl::operator const char*() const
862 {
863 	return UrlString();
864 }
865 
866 
867 void
868 BUrl::_ResetFields()
869 {
870 	fHasProtocol = false;
871 	fHasUserName = false;
872 	fHasPassword = false;
873 	fHasHost = false;
874 	fHasPort = false;
875 	fHasPath = false;
876 	fHasRequest = false;
877 	fHasFragment = false;
878 
879 	fProtocol.Truncate(0);
880 	fUser.Truncate(0);
881 	fPassword.Truncate(0);
882 	fHost.Truncate(0);
883 	fPort = 0;
884 	fPath.Truncate(0);
885 	fRequest.Truncate(0);
886 	fFragment.Truncate(0);
887 
888 	// Force re-generation of these fields
889 	fUrlStringValid = false;
890 	fUserInfoValid = false;
891 	fAuthorityValid = false;
892 }
893 
894 
895 bool
896 BUrl::_ContainsDelimiter(const BString& url)
897 {
898 	int32 len = url.Length();
899 
900 	for (int32 i = 0; i < len; i++) {
901 		switch (url[i]) {
902 			case ' ':
903 			case '\n':
904 			case '\t':
905 			case '\r':
906 			case '<':
907 			case '>':
908 			case '"':
909 				return true;
910 		}
911 	}
912 
913 	return false;
914 }
915 
916 
917 enum explode_url_parse_state {
918 	EXPLODE_PROTOCOL,
919 	EXPLODE_PROTOCOLTERMINATOR,
920 	EXPLODE_AUTHORITYORPATH,
921 	EXPLODE_AUTHORITY,
922 	EXPLODE_PATH,
923 	EXPLODE_REQUEST, // query
924 	EXPLODE_FRAGMENT,
925 	EXPLODE_COMPLETE
926 };
927 
928 
929 typedef bool (*explode_char_match_fn)(char c);
930 
931 
932 static bool
933 explode_is_protocol_char(char c)
934 {
935 	return isalnum(c) || c == '+' || c == '.' || c == '-';
936 }
937 
938 
939 static bool
940 explode_is_authority_char(char c)
941 {
942 	return !(c == '/' || c == '?' || c == '#');
943 }
944 
945 
946 static bool
947 explode_is_path_char(char c)
948 {
949 	return !(c == '#' || c == '?');
950 }
951 
952 
953 static bool
954 explode_is_request_char(char c)
955 {
956 	return c != '#';
957 }
958 
959 
960 static int32
961 char_offset_until_fn_false(const char* url, int32 len, int32 offset,
962 	explode_char_match_fn fn)
963 {
964 	while (offset < len && fn(url[offset]))
965 		offset++;
966 
967 	return offset;
968 }
969 
970 /*
971  * This function takes a URL in string-form and parses the components of the URL out.
972  */
973 status_t
974 BUrl::_ExplodeUrlString(const BString& url)
975 {
976 	_ResetFields();
977 
978 	// RFC3986, Appendix C; the URL should not contain whitespace or delimiters
979 	// by this point.
980 
981 	if (_ContainsDelimiter(url))
982 		return B_BAD_VALUE;
983 
984 	explode_url_parse_state state = EXPLODE_PROTOCOL;
985 	int32 offset = 0;
986 	int32 length = url.Length();
987 	const char *url_c = url.String();
988 
989 	// The regexp is provided in RFC3986 (URI generic syntax), Appendix B
990 	// ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?
991 	// The ensuing logic attempts to simulate the behaviour of extracting the groups
992 	// from the string without requiring a group-capable regex engine.
993 
994 	while (offset < length) {
995 		switch (state) {
996 
997 			case EXPLODE_PROTOCOL:
998 			{
999 				int32 end_protocol = char_offset_until_fn_false(url_c, length,
1000 					offset, explode_is_protocol_char);
1001 
1002 				if (end_protocol < length) {
1003 					SetProtocol(BString(&url_c[offset], end_protocol - offset));
1004 					state = EXPLODE_PROTOCOLTERMINATOR;
1005 					offset = end_protocol;
1006 				} else {
1007 					// No protocol was found, try parsing from the string
1008 					// start, beginning with authority or path
1009 					SetProtocol("");
1010 					offset = 0;
1011 					state = EXPLODE_AUTHORITYORPATH;
1012 				}
1013 				break;
1014 			}
1015 
1016 			case EXPLODE_PROTOCOLTERMINATOR:
1017 			{
1018 				if (url[offset] == ':') {
1019 					offset++;
1020 				} else {
1021 					// No protocol was found, try parsing from the string
1022 					// start, beginning with authority or path
1023 					SetProtocol("");
1024 					offset = 0;
1025 				}
1026 				state = EXPLODE_AUTHORITYORPATH;
1027 				break;
1028 			}
1029 
1030 			case EXPLODE_AUTHORITYORPATH:
1031 			{
1032 				// The authority must start with //. If it isn't there, skip
1033 				// to parsing the path.
1034 				if (strncmp(&url_c[offset], "//", 2) == 0) {
1035 					state = EXPLODE_AUTHORITY;
1036 					offset += 2;
1037 				} else {
1038 					state = EXPLODE_PATH;
1039 				}
1040 				break;
1041 			}
1042 
1043 			case EXPLODE_AUTHORITY:
1044 			{
1045 				int end_authority = char_offset_until_fn_false(url_c, length,
1046 					offset, explode_is_authority_char);
1047 				SetAuthority(BString(&url_c[offset], end_authority - offset));
1048 				state = EXPLODE_PATH;
1049 				offset = end_authority;
1050 				break;
1051 			}
1052 
1053 			case EXPLODE_PATH:
1054 			{
1055 				int end_path = char_offset_until_fn_false(url_c, length, offset,
1056 					explode_is_path_char);
1057 				SetPath(BString(&url_c[offset], end_path - offset));
1058 				state = EXPLODE_REQUEST;
1059 				offset = end_path;
1060 				break;
1061 			}
1062 
1063 			case EXPLODE_REQUEST: // query
1064 			{
1065 				if (url_c[offset] == '?') {
1066 					offset++;
1067 					int end_request = char_offset_until_fn_false(url_c, length,
1068 						offset, explode_is_request_char);
1069 					SetRequest(BString(&url_c[offset], end_request - offset));
1070 					offset = end_request;
1071 				}
1072 				state = EXPLODE_FRAGMENT;
1073 				break;
1074 			}
1075 
1076 			case EXPLODE_FRAGMENT:
1077 			{
1078 				if (url_c[offset] == '#') {
1079 					offset++;
1080 					SetFragment(BString(&url_c[offset], length - offset));
1081 					offset = length;
1082 				}
1083 				state = EXPLODE_COMPLETE;
1084 				break;
1085 			}
1086 
1087 			case EXPLODE_COMPLETE:
1088 				// should never be reached - keeps the compiler happy
1089 				break;
1090 
1091 		}
1092 	}
1093 
1094 	return B_OK;
1095 }
1096 
1097 
1098 BString
1099 BUrl::_MergePath(const BString& relative) const
1100 {
1101 	// This implements RFC3986, Section 5.2.3.
1102 	if (HasAuthority() && fPath == "") {
1103 		BString result("/");
1104 		result << relative;
1105 		return result;
1106 	}
1107 
1108 	BString result(fPath);
1109 	result.Truncate(result.FindLast("/") + 1);
1110 	result << relative;
1111 
1112 	return result;
1113 }
1114 
1115 
1116 // This sets the path without normalizing it. If fed with a path that has . or
1117 // .. segments, this would make the URL invalid.
1118 void
1119 BUrl::_SetPathUnsafe(const BString& path)
1120 {
1121 	fPath = path;
1122 	fHasPath = true; // RFC says an empty path is still a path
1123 	fUrlStringValid = false;
1124 }
1125 
1126 
1127 enum authority_parse_state {
1128 	AUTHORITY_USERNAME,
1129 	AUTHORITY_PASSWORD,
1130 	AUTHORITY_HOST,
1131 	AUTHORITY_PORT,
1132 	AUTHORITY_COMPLETE
1133 };
1134 
1135 void
1136 BUrl::SetAuthority(const BString& authority)
1137 {
1138 	fAuthority = authority;
1139 
1140 	fUser.Truncate(0);
1141 	fPassword.Truncate(0);
1142 	fHost.Truncate(0);
1143 	fPort = 0;
1144 	fHasPort = false;
1145 	fHasUserName = false;
1146 	fHasPassword = false;
1147 
1148 	bool hasUsernamePassword = B_ERROR != fAuthority.FindFirst('@');
1149 	authority_parse_state state = AUTHORITY_USERNAME;
1150 	int32 offset = 0;
1151 	int32 length = authority.Length();
1152 	const char *authority_c = authority.String();
1153 
1154 	while (AUTHORITY_COMPLETE != state && offset < length) {
1155 
1156 		switch (state) {
1157 
1158 			case AUTHORITY_USERNAME:
1159 			{
1160 				if (hasUsernamePassword) {
1161 					int32 end_username = char_offset_until_fn_false(
1162 						authority_c, length, offset, _IsUsernameChar);
1163 
1164 					SetUserName(BString(&authority_c[offset],
1165 						end_username - offset));
1166 
1167 					state = AUTHORITY_PASSWORD;
1168 					offset = end_username;
1169 				} else {
1170 					state = AUTHORITY_HOST;
1171 				}
1172 				break;
1173 			}
1174 
1175 			case AUTHORITY_PASSWORD:
1176 			{
1177 				if (hasUsernamePassword && ':' == authority[offset]) {
1178 					offset++; // move past the delimiter
1179 					int32 end_password = char_offset_until_fn_false(
1180 						authority_c, length, offset, _IsPasswordChar);
1181 
1182 					SetPassword(BString(&authority_c[offset],
1183 						end_password - offset));
1184 
1185 					offset = end_password;
1186 				}
1187 
1188 				// if the host was preceded by a username + password couple
1189 				// then there will be an '@' delimiter to avoid.
1190 
1191 				if (authority_c[offset] == '@') {
1192 					offset++;
1193 				}
1194 
1195 				state = AUTHORITY_HOST;
1196 				break;
1197 			}
1198 
1199 			case AUTHORITY_HOST:
1200 			{
1201 
1202 				// the host may be enclosed within brackets in order to express
1203 				// an IPV6 address.
1204 
1205 				if (authority_c[offset] == '[') {
1206 					int32 end_ipv6_host = char_offset_until_fn_false(
1207 						authority_c, length, offset + 1, _IsIPV6Char);
1208 
1209 					if (authority_c[end_ipv6_host] == ']') {
1210 						SetHost(BString(&authority_c[offset],
1211 							(end_ipv6_host - offset) + 1));
1212 						state = AUTHORITY_PORT;
1213 						offset = end_ipv6_host + 1;
1214 					}
1215 				}
1216 
1217 				// if an IPV6 host was not found.
1218 
1219 				if (AUTHORITY_HOST == state) {
1220 					int32 end_host = char_offset_until_fn_false(
1221 						authority_c, length, offset, _IsHostChar);
1222 
1223 					SetHost(BString(&authority_c[offset], end_host - offset));
1224 					state = AUTHORITY_PORT;
1225 					offset = end_host;
1226 				}
1227 
1228 				break;
1229 			}
1230 
1231 			case AUTHORITY_PORT:
1232 			{
1233 				if (authority_c[offset] == ':') {
1234 					offset++;
1235 					int32 end_port = char_offset_until_fn_false(
1236 						authority_c, length, offset, _IsPortChar);
1237 					SetPort(atoi(&authority_c[offset]));
1238 					offset = end_port;
1239 				}
1240 
1241 				state = AUTHORITY_COMPLETE;
1242 
1243 				break;
1244 			}
1245 
1246 			case AUTHORITY_COMPLETE:
1247 				// should never be reached - keeps the compiler happy
1248 				break;
1249 		}
1250 	}
1251 
1252 	// An empty authority is still an authority, making it possible to have
1253 	// URLs such as file:///path/to/file.
1254 	// TODO however, there is no way to unset the authority once it is set...
1255 	// We may want to take a const char* parameter and allow NULL.
1256 	fHasHost = true;
1257 }
1258 
1259 
1260 /*static*/ BString
1261 BUrl::_DoUrlEncodeChunk(const BString& chunk, bool strict, bool directory)
1262 {
1263 	BString result;
1264 
1265 	for (int32 i = 0; i < chunk.Length(); i++) {
1266 		if (_IsUnreserved(chunk[i])
1267 				|| (directory && (chunk[i] == '/' || chunk[i] == '\\'))) {
1268 			result << chunk[i];
1269 		} else {
1270 			if (chunk[i] == ' ' && !strict) {
1271 				result << '+';
1272 					// In non-strict mode, spaces are encoded by a plus sign
1273 			} else {
1274 				char hexString[5];
1275 				snprintf(hexString, 5, "%X", chunk[i]);
1276 
1277 				result << '%' << hexString;
1278 			}
1279 		}
1280 	}
1281 
1282 	return result;
1283 }
1284 
1285 
1286 /*static*/ BString
1287 BUrl::_DoUrlDecodeChunk(const BString& chunk, bool strict)
1288 {
1289 	BString result;
1290 
1291 	for (int32 i = 0; i < chunk.Length(); i++) {
1292 		if (chunk[i] == '+' && !strict)
1293 			result << ' ';
1294 		else {
1295 			char decoded = 0;
1296 			char* out = NULL;
1297 			char hexString[3];
1298 
1299 			if (chunk[i] == '%' && i < chunk.Length() - 2
1300 				&& isxdigit(chunk[i + 1]) && isxdigit(chunk[i+2])) {
1301 				hexString[0] = chunk[i + 1];
1302 				hexString[1] = chunk[i + 2];
1303 				hexString[2] = 0;
1304 				decoded = (char)strtol(hexString, &out, 16);
1305 			}
1306 
1307 			if (out == hexString + 2) {
1308 				i += 2;
1309 				result << decoded;
1310 			} else
1311 				result << chunk[i];
1312 		}
1313 	}
1314 	return result;
1315 }
1316 
1317 
1318 bool
1319 BUrl::_IsHostIPV6Valid(size_t offset, int32 length) const
1320 {
1321 	for (int32 i = 0; i < length; i++) {
1322 		char c = fHost[offset + i];
1323 		if (!_IsIPV6Char(c))
1324 			return false;
1325 	}
1326 
1327 	return length > 0;
1328 }
1329 
1330 
1331 bool
1332 BUrl::_IsHostValid() const
1333 {
1334 	if (fHost.StartsWith("[") && fHost.EndsWith("]"))
1335 		return _IsHostIPV6Valid(1, fHost.Length() - 2);
1336 
1337 	bool lastWasDot = false;
1338 
1339 	for (int32 i = 0; i < fHost.Length(); i++) {
1340 		char c = fHost[i];
1341 
1342 		if (c == '.') {
1343 			if (lastWasDot || i == 0)
1344 				return false;
1345 			lastWasDot = true;
1346 		} else {
1347 			lastWasDot = false;
1348 		}
1349 
1350 		if (!_IsHostChar(c) && c != '.') {
1351 			// the underscore is technically not allowed, but occurs sometimes
1352 			// in the wild.
1353 			return false;
1354 		}
1355 	}
1356 
1357 	return true;
1358 }
1359 
1360 
1361 bool
1362 BUrl::_IsProtocolValid() const
1363 {
1364 	for (int8 index = 0; index < fProtocol.Length(); index++) {
1365 		char c = fProtocol[index];
1366 
1367 		if (index == 0 && !isalpha(c))
1368 			return false;
1369 		else if (!isalnum(c) && c != '+' && c != '-' && c != '.')
1370 			return false;
1371 	}
1372 
1373 	return !fProtocol.IsEmpty();
1374 }
1375 
1376 
1377 bool
1378 BUrl::_IsUnreserved(char c)
1379 {
1380 	return isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~';
1381 }
1382 
1383 
1384 bool
1385 BUrl::_IsGenDelim(char c)
1386 {
1387 	return c == ':' || c == '/' || c == '?' || c == '#' || c == '['
1388 		|| c == ']' || c == '@';
1389 }
1390 
1391 
1392 bool
1393 BUrl::_IsSubDelim(char c)
1394 {
1395 	return c == '!' || c == '$' || c == '&' || c == '\'' || c == '('
1396 		|| c == ')' || c == '*' || c == '+' || c == ',' || c == ';'
1397 		|| c == '=';
1398 }
1399 
1400 
1401 bool
1402 BUrl::_IsUsernameChar(char c)
1403 {
1404 	return !(c == ':' || c == '@');
1405 }
1406 
1407 
1408 bool
1409 BUrl::_IsPasswordChar(char c)
1410 {
1411 	return !(c == '@');
1412 }
1413 
1414 
1415 bool
1416 BUrl::_IsHostChar(char c)
1417 {
1418 	return ((uint8) c) > 127 || isalnum(c) || c == '-' || c == '_' || c == '.'
1419 		|| c == '%';
1420 }
1421 
1422 
1423 bool
1424 BUrl::_IsPortChar(char c)
1425 {
1426 	return isdigit(c);
1427 }
1428 
1429 
1430 bool
1431 BUrl::_IsIPV6Char(char c)
1432 {
1433 	return c == ':' || isxdigit(c);
1434 }
1435 
1436 
1437 BString
1438 BUrl::_UrlMimeType() const
1439 {
1440 	BString mime;
1441 	mime << "application/x-vnd.Be.URL." << fProtocol;
1442 
1443 	return BString(mime);
1444 }
1445