xref: /haiku/src/kits/support/Url.cpp (revision b08627f310bb2e80bca50176e7a758182384735a)
1 /*
2  * Copyright 2010-2016 Haiku Inc. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Christophe Huriaux, c.huriaux@gmail.com
7  *		Andrew Lindesay, apl@lindesay.co.nz
8  */
9 
10 
11 #include <Url.h>
12 
13 #include <ctype.h>
14 #include <cstdio>
15 #include <cstdlib>
16 #include <new>
17 
18 #include <MimeType.h>
19 #include <Roster.h>
20 
21 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
22 	#include <ICUWrapper.h>
23 #endif
24 
25 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
26 	#include <unicode/idna.h>
27 	#include <unicode/stringpiece.h>
28 #endif
29 
30 
31 static const char* kArchivedUrl = "be:url string";
32 
33 
34 BUrl::BUrl(const char* url)
35 	:
36 	fUrlString(),
37 	fProtocol(),
38 	fUser(),
39 	fPassword(),
40 	fHost(),
41 	fPort(0),
42 	fPath(),
43 	fRequest(),
44 	fHasHost(false),
45 	fHasFragment(false)
46 {
47 	SetUrlString(url);
48 }
49 
50 
51 BUrl::BUrl(BMessage* archive)
52 	:
53 	fUrlString(),
54 	fProtocol(),
55 	fUser(),
56 	fPassword(),
57 	fHost(),
58 	fPort(0),
59 	fPath(),
60 	fRequest(),
61 	fHasHost(false),
62 	fHasFragment(false)
63 {
64 	BString url;
65 
66 	if (archive->FindString(kArchivedUrl, &url) == B_OK)
67 		SetUrlString(url);
68 	else
69 		_ResetFields();
70 }
71 
72 
73 BUrl::BUrl(const BUrl& other)
74 	:
75 	BArchivable(),
76 	fUrlString(),
77 	fProtocol(other.fProtocol),
78 	fUser(other.fUser),
79 	fPassword(other.fPassword),
80 	fHost(other.fHost),
81 	fPort(other.fPort),
82 	fPath(other.fPath),
83 	fRequest(other.fRequest),
84 	fFragment(other.fFragment),
85 	fUrlStringValid(other.fUrlStringValid),
86 	fAuthorityValid(other.fAuthorityValid),
87 	fUserInfoValid(other.fUserInfoValid),
88 	fHasProtocol(other.fHasProtocol),
89 	fHasUserName(other.fHasUserName),
90 	fHasPassword(other.fHasPassword),
91 	fHasHost(other.fHasHost),
92 	fHasPort(other.fHasPort),
93 	fHasPath(other.fHasPath),
94 	fHasRequest(other.fHasRequest),
95 	fHasFragment(other.fHasFragment)
96 {
97 	if (fUrlStringValid)
98 		fUrlString = other.fUrlString;
99 
100 	if (fAuthorityValid)
101 		fAuthority = other.fAuthority;
102 
103 	if (fUserInfoValid)
104 		fUserInfo = other.fUserInfo;
105 
106 }
107 
108 
109 BUrl::BUrl(const BUrl& base, const BString& location)
110 	:
111 	fUrlString(),
112 	fProtocol(),
113 	fUser(),
114 	fPassword(),
115 	fHost(),
116 	fPort(0),
117 	fPath(),
118 	fRequest(),
119 	fAuthorityValid(false),
120 	fUserInfoValid(false),
121 	fHasUserName(false),
122 	fHasPassword(false),
123 	fHasHost(false),
124 	fHasPort(false),
125 	fHasFragment(false)
126 {
127 	// This implements the algorithm in RFC3986, Section 5.2.
128 
129 	BUrl relative(location);
130 	if (relative.HasProtocol()) {
131 		SetProtocol(relative.Protocol());
132 		if (relative.HasAuthority())
133 			SetAuthority(relative.Authority());
134 		SetPath(relative.Path());
135 		SetRequest(relative.Request());
136 	} else {
137 		if (relative.HasAuthority()) {
138 			SetAuthority(relative.Authority());
139 			SetPath(relative.Path());
140 			SetRequest(relative.Request());
141 		} else {
142 			if (relative.Path().IsEmpty()) {
143 				_SetPathUnsafe(base.Path());
144 				if (relative.HasRequest())
145 					SetRequest(relative.Request());
146 				else
147 					SetRequest(base.Request());
148 			} else {
149 				if (relative.Path()[0] == '/')
150 					SetPath(relative.Path());
151 				else {
152 					BString path = base._MergePath(relative.Path());
153 					SetPath(path);
154 				}
155 				SetRequest(relative.Request());
156 			}
157 
158 			if (base.HasAuthority())
159 				SetAuthority(base.Authority());
160 		}
161 		SetProtocol(base.Protocol());
162 	}
163 
164 	if (relative.HasFragment())
165 		SetFragment(relative.Fragment());
166 }
167 
168 
169 BUrl::BUrl()
170 	:
171 	fUrlString(),
172 	fProtocol(),
173 	fUser(),
174 	fPassword(),
175 	fHost(),
176 	fPort(0),
177 	fPath(),
178 	fRequest(),
179 	fHasHost(false),
180 	fHasFragment(false)
181 {
182 	_ResetFields();
183 }
184 
185 
186 BUrl::BUrl(const BPath& path)
187 	:
188 	fUrlString(),
189 	fProtocol(),
190 	fUser(),
191 	fPassword(),
192 	fHost(),
193 	fPort(0),
194 	fPath(),
195 	fRequest(),
196 	fHasHost(false),
197 	fHasFragment(false)
198 {
199 	SetUrlString(UrlEncode(path.Path(), true, true));
200 	SetProtocol("file");
201 }
202 
203 
204 BUrl::~BUrl()
205 {
206 }
207 
208 
209 // #pragma mark URL fields modifiers
210 
211 
212 BUrl&
213 BUrl::SetUrlString(const BString& url)
214 {
215 	_ExplodeUrlString(url);
216 	return *this;
217 }
218 
219 
220 BUrl&
221 BUrl::SetProtocol(const BString& protocol)
222 {
223 	fProtocol = protocol;
224 	fHasProtocol = !fProtocol.IsEmpty();
225 	fUrlStringValid = false;
226 	return *this;
227 }
228 
229 
230 BUrl&
231 BUrl::SetUserName(const BString& user)
232 {
233 	fUser = user;
234 	fHasUserName = !fUser.IsEmpty();
235 	fUrlStringValid = false;
236 	fAuthorityValid = false;
237 	fUserInfoValid = false;
238 	return *this;
239 }
240 
241 
242 BUrl&
243 BUrl::SetPassword(const BString& password)
244 {
245 	fPassword = password;
246 	fHasPassword = !fPassword.IsEmpty();
247 	fUrlStringValid = false;
248 	fAuthorityValid = false;
249 	fUserInfoValid = false;
250 	return *this;
251 }
252 
253 
254 BUrl&
255 BUrl::SetHost(const BString& host)
256 {
257 	fHost = host;
258 	fHasHost = !fHost.IsEmpty();
259 	fUrlStringValid = false;
260 	fAuthorityValid = false;
261 	return *this;
262 }
263 
264 
265 BUrl&
266 BUrl::SetPort(int port)
267 {
268 	fPort = port;
269 	fHasPort = (port != 0);
270 	fUrlStringValid = false;
271 	fAuthorityValid = false;
272 	return *this;
273 }
274 
275 
276 BUrl&
277 BUrl::SetPath(const BString& path)
278 {
279 	// Implements RFC3986 section 5.2.4, "Remove dot segments"
280 
281 	// 1.
282 	BString output;
283 	BString input(path);
284 
285 	// 2.
286 	while(!input.IsEmpty())
287 	{
288 		// 2.A.
289 		if (input.StartsWith("./"))
290 		{
291 			input.Remove(0, 2);
292 			continue;
293 		}
294 
295 		if (input.StartsWith("../"))
296 		{
297 			input.Remove(0, 3);
298 			continue;
299 		}
300 
301 		// 2.B.
302 		if (input.StartsWith("/./"))
303 		{
304 			input.Remove(0, 2);
305 			continue;
306 		}
307 
308 		if (input == "/.")
309 		{
310 			input.Remove(1, 1);
311 			continue;
312 		}
313 
314 		// 2.C.
315 		if (input.StartsWith("/../"))
316 		{
317 			input.Remove(0, 3);
318 			output.Truncate(output.FindLast('/'));
319 			continue;
320 		}
321 
322 		if (input == "/..")
323 		{
324 			input.Remove(1, 2);
325 			output.Truncate(output.FindLast('/'));
326 			continue;
327 		}
328 
329 		// 2.D.
330 		if (input == "." || input == "..")
331 		{
332 			break;
333 		}
334 
335 		if (input == "/.")
336 		{
337 			input.Remove(1, 1);
338 			continue;
339 		}
340 
341 		// 2.E.
342 		int slashpos = input.FindFirst('/', 1);
343 		if (slashpos > 0) {
344 			output.Append(input, slashpos);
345 			input.Remove(0, slashpos);
346 		} else {
347 			output.Append(input);
348 			break;
349 		}
350 	}
351 
352 	_SetPathUnsafe(output);
353 	return *this;
354 }
355 
356 
357 BUrl&
358 BUrl::SetRequest(const BString& request)
359 {
360 	fRequest = request;
361 	fHasRequest = !fRequest.IsEmpty();
362 	fUrlStringValid = false;
363 	return *this;
364 }
365 
366 
367 BUrl&
368 BUrl::SetFragment(const BString& fragment)
369 {
370 	fFragment = fragment;
371 	fHasFragment = true;
372 	fUrlStringValid = false;
373 	return *this;
374 }
375 
376 
377 // #pragma mark URL fields access
378 
379 
380 const BString&
381 BUrl::UrlString() const
382 {
383 	if (!fUrlStringValid) {
384 		fUrlString.Truncate(0);
385 
386 		if (HasProtocol()) {
387 			fUrlString << fProtocol << ':';
388 		}
389 
390 		if (HasAuthority()) {
391 			fUrlString << "//";
392 			fUrlString << Authority();
393 		}
394 		fUrlString << Path();
395 
396 		if (HasRequest())
397 			fUrlString << '?' << fRequest;
398 
399 		if (HasFragment())
400 			fUrlString << '#' << fFragment;
401 
402 		fUrlStringValid = true;
403 	}
404 
405 	return fUrlString;
406 }
407 
408 
409 const BString&
410 BUrl::Protocol() const
411 {
412 	return fProtocol;
413 }
414 
415 
416 const BString&
417 BUrl::UserName() const
418 {
419 	return fUser;
420 }
421 
422 
423 const BString&
424 BUrl::Password() const
425 {
426 	return fPassword;
427 }
428 
429 
430 const BString&
431 BUrl::UserInfo() const
432 {
433 	if (!fUserInfoValid) {
434 		fUserInfo = fUser;
435 
436 		if (HasPassword())
437 			fUserInfo << ':' << fPassword;
438 
439 		fUserInfoValid = true;
440 	}
441 
442 	return fUserInfo;
443 }
444 
445 
446 const BString&
447 BUrl::Host() const
448 {
449 	return fHost;
450 }
451 
452 
453 int
454 BUrl::Port() const
455 {
456 	return fPort;
457 }
458 
459 
460 const BString&
461 BUrl::Authority() const
462 {
463 	if (!fAuthorityValid) {
464 		fAuthority.Truncate(0);
465 
466 		if (HasUserInfo())
467 			fAuthority << UserInfo() << '@';
468 		fAuthority << Host();
469 
470 		if (HasPort())
471 			fAuthority << ':' << fPort;
472 
473 		fAuthorityValid = true;
474 	}
475 	return fAuthority;
476 }
477 
478 
479 const BString&
480 BUrl::Path() const
481 {
482 	return fPath;
483 }
484 
485 
486 const BString&
487 BUrl::Request() const
488 {
489 	return fRequest;
490 }
491 
492 
493 const BString&
494 BUrl::Fragment() const
495 {
496 	return fFragment;
497 }
498 
499 
500 // #pragma mark URL fields tests
501 
502 
503 bool
504 BUrl::IsValid() const
505 {
506 	if (!fHasProtocol)
507 		return false;
508 
509 	if (fProtocol == "http" || fProtocol == "https" || fProtocol == "ftp"
510 		|| fProtocol == "ipp" || fProtocol == "afp" || fProtocol == "telnet"
511 		|| fProtocol == "gopher" || fProtocol == "nntp" || fProtocol == "sftp"
512 		|| fProtocol == "finger" || fProtocol == "pop" || fProtocol == "imap") {
513 		return fHasHost && !fHost.IsEmpty();
514 	}
515 
516 	if (fProtocol == "file")
517 		return fHasPath;
518 
519 	return true;
520 }
521 
522 
523 bool
524 BUrl::HasProtocol() const
525 {
526 	return fHasProtocol;
527 }
528 
529 
530 bool
531 BUrl::HasAuthority() const
532 {
533 	return fHasHost || fHasUserName;
534 }
535 
536 
537 bool
538 BUrl::HasUserName() const
539 {
540 	return fHasUserName;
541 }
542 
543 
544 bool
545 BUrl::HasPassword() const
546 {
547 	return fHasPassword;
548 }
549 
550 
551 bool
552 BUrl::HasUserInfo() const
553 {
554 	return fHasUserName || fHasPassword;
555 }
556 
557 
558 bool
559 BUrl::HasHost() const
560 {
561 	return fHasHost;
562 }
563 
564 
565 bool
566 BUrl::HasPort() const
567 {
568 	return fHasPort;
569 }
570 
571 
572 bool
573 BUrl::HasPath() const
574 {
575 	return fHasPath;
576 }
577 
578 
579 bool
580 BUrl::HasRequest() const
581 {
582 	return fHasRequest;
583 }
584 
585 
586 bool
587 BUrl::HasFragment() const
588 {
589 	return fHasFragment;
590 }
591 
592 
593 // #pragma mark URL encoding/decoding of needed fields
594 
595 
596 void
597 BUrl::UrlEncode(bool strict)
598 {
599 	fUser = _DoUrlEncodeChunk(fUser, strict);
600 	fPassword = _DoUrlEncodeChunk(fPassword, strict);
601 	fHost = _DoUrlEncodeChunk(fHost, strict);
602 	fFragment = _DoUrlEncodeChunk(fFragment, strict);
603 	fPath = _DoUrlEncodeChunk(fPath, strict, true);
604 }
605 
606 
607 void
608 BUrl::UrlDecode(bool strict)
609 {
610 	fUser = _DoUrlDecodeChunk(fUser, strict);
611 	fPassword = _DoUrlDecodeChunk(fPassword, strict);
612 	fHost = _DoUrlDecodeChunk(fHost, strict);
613 	fFragment = _DoUrlDecodeChunk(fFragment, strict);
614 	fPath = _DoUrlDecodeChunk(fPath, strict);
615 }
616 
617 
618 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
619 status_t
620 BUrl::IDNAToAscii()
621 {
622 	UErrorCode err = U_ZERO_ERROR;
623 	icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err);
624 	icu::IDNAInfo info;
625 
626 	BString result;
627 	BStringByteSink sink(&result);
628 	converter->nameToASCII_UTF8(icu::StringPiece(fHost.String()), sink, info,
629 		err);
630 
631 	delete converter;
632 
633 	if (U_FAILURE(err))
634 		return B_ERROR;
635 
636 	fHost = result;
637 	return B_OK;
638 }
639 #endif
640 
641 
642 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
643 status_t
644 BUrl::IDNAToUnicode()
645 {
646 	UErrorCode err = U_ZERO_ERROR;
647 	icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err);
648 	icu::IDNAInfo info;
649 
650 	BString result;
651 	BStringByteSink sink(&result);
652 	converter->nameToUnicodeUTF8(icu::StringPiece(fHost.String()), sink, info,
653 		err);
654 
655 	delete converter;
656 
657 	if (U_FAILURE(err))
658 		return B_ERROR;
659 
660 	fHost = result;
661 	return B_OK;
662 }
663 #endif
664 
665 
666 // #pragma mark - utility functionality
667 
668 
669 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
670 bool
671 BUrl::HasPreferredApplication() const
672 {
673 	BString appSignature = PreferredApplication();
674 	BMimeType mime(appSignature.String());
675 
676 	if (appSignature.IFindFirst("application/") == 0
677 		&& mime.IsValid())
678 		return true;
679 
680 	return false;
681 }
682 #endif
683 
684 
685 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
686 BString
687 BUrl::PreferredApplication() const
688 {
689 	BString appSignature;
690 	BMimeType mime(_UrlMimeType().String());
691 	mime.GetPreferredApp(appSignature.LockBuffer(B_MIME_TYPE_LENGTH));
692 	appSignature.UnlockBuffer();
693 
694 	return BString(appSignature);
695 }
696 #endif
697 
698 
699 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
700 status_t
701 BUrl::OpenWithPreferredApplication(bool onProblemAskUser) const
702 {
703 	if (!IsValid())
704 		return B_BAD_VALUE;
705 
706 	BString urlString = UrlString();
707 	if (urlString.Length() > B_PATH_NAME_LENGTH) {
708 		// TODO: BAlert
709 		//	if (onProblemAskUser)
710 		//		BAlert ... Too long URL!
711 #if DEBUG
712 		fprintf(stderr, "URL too long");
713 #endif
714 		return B_NAME_TOO_LONG;
715 	}
716 
717 	char* argv[] = {
718 		const_cast<char*>("BUrlInvokedApplication"),
719 		const_cast<char*>(urlString.String()),
720 		NULL
721 	};
722 
723 #if DEBUG
724 	if (HasPreferredApplication())
725 		printf("HasPreferredApplication() == true\n");
726 	else
727 		printf("HasPreferredApplication() == false\n");
728 #endif
729 
730 	status_t status = be_roster->Launch(_UrlMimeType().String(), 1, argv+1);
731 	if (status != B_OK) {
732 #if DEBUG
733 		fprintf(stderr, "Opening URL failed: %s\n", strerror(status));
734 #endif
735 	}
736 
737 	return status;
738 }
739 #endif
740 
741 
742 // #pragma mark Url encoding/decoding of string
743 
744 
745 /*static*/ BString
746 BUrl::UrlEncode(const BString& url, bool strict, bool directory)
747 {
748 	return _DoUrlEncodeChunk(url, strict, directory);
749 }
750 
751 
752 /*static*/ BString
753 BUrl::UrlDecode(const BString& url, bool strict)
754 {
755 	return _DoUrlDecodeChunk(url, strict);
756 }
757 
758 
759 // #pragma mark BArchivable members
760 
761 
762 status_t
763 BUrl::Archive(BMessage* into, bool deep) const
764 {
765 	status_t ret = BArchivable::Archive(into, deep);
766 
767 	if (ret == B_OK)
768 		ret = into->AddString(kArchivedUrl, UrlString());
769 
770 	return ret;
771 }
772 
773 
774 /*static*/ BArchivable*
775 BUrl::Instantiate(BMessage* archive)
776 {
777 	if (validate_instantiation(archive, "BUrl"))
778 		return new(std::nothrow) BUrl(archive);
779 	return NULL;
780 }
781 
782 
783 // #pragma mark URL comparison
784 
785 
786 bool
787 BUrl::operator==(BUrl& other) const
788 {
789 	UrlString();
790 	other.UrlString();
791 
792 	return fUrlString == other.fUrlString;
793 }
794 
795 
796 bool
797 BUrl::operator!=(BUrl& other) const
798 {
799 	return !(*this == other);
800 }
801 
802 
803 // #pragma mark URL assignment
804 
805 
806 const BUrl&
807 BUrl::operator=(const BUrl& other)
808 {
809 	fUrlStringValid = other.fUrlStringValid;
810 	if (fUrlStringValid)
811 		fUrlString = other.fUrlString;
812 
813 	fAuthorityValid = other.fAuthorityValid;
814 	if (fAuthorityValid)
815 		fAuthority = other.fAuthority;
816 
817 	fUserInfoValid = other.fUserInfoValid;
818 	if (fUserInfoValid)
819 		fUserInfo = other.fUserInfo;
820 
821 	fProtocol = other.fProtocol;
822 	fUser = other.fUser;
823 	fPassword = other.fPassword;
824 	fHost = other.fHost;
825 	fPort = other.fPort;
826 	fPath = other.fPath;
827 	fRequest = other.fRequest;
828 	fFragment = other.fFragment;
829 
830 	fHasProtocol = other.fHasProtocol;
831 	fHasUserName = other.fHasUserName;
832 	fHasPassword = other.fHasPassword;
833 	fHasHost = other.fHasHost;
834 	fHasPort = other.fHasPort;
835 	fHasPath = other.fHasPath;
836 	fHasRequest = other.fHasRequest;
837 	fHasFragment = other.fHasFragment;
838 
839 	return *this;
840 }
841 
842 
843 const BUrl&
844 BUrl::operator=(const BString& string)
845 {
846 	SetUrlString(string);
847 	return *this;
848 }
849 
850 
851 const BUrl&
852 BUrl::operator=(const char* string)
853 {
854 	SetUrlString(string);
855 	return *this;
856 }
857 
858 
859 // #pragma mark URL to string conversion
860 
861 
862 BUrl::operator const char*() const
863 {
864 	return UrlString();
865 }
866 
867 
868 void
869 BUrl::_ResetFields()
870 {
871 	fHasProtocol = false;
872 	fHasUserName = false;
873 	fHasPassword = false;
874 	fHasHost = false;
875 	fHasPort = false;
876 	fHasPath = false;
877 	fHasRequest = false;
878 	fHasFragment = false;
879 
880 	fProtocol.Truncate(0);
881 	fUser.Truncate(0);
882 	fPassword.Truncate(0);
883 	fHost.Truncate(0);
884 	fPort = 0;
885 	fPath.Truncate(0);
886 	fRequest.Truncate(0);
887 	fFragment.Truncate(0);
888 
889 	// Force re-generation of these fields
890 	fUrlStringValid = false;
891 	fUserInfoValid = false;
892 	fAuthorityValid = false;
893 }
894 
895 
896 bool
897 BUrl::_ContainsDelimiter(const BString& url)
898 {
899 	int32 len = url.Length();
900 
901 	for (int32 i = 0; i < len; i++) {
902 		switch (url[i]) {
903 			case ' ':
904 			case '\n':
905 			case '\t':
906 			case '\r':
907 			case '<':
908 			case '>':
909 			case '"':
910 				return true;
911 		}
912 	}
913 
914 	return false;
915 }
916 
917 
918 enum explode_url_parse_state {
919 	EXPLODE_PROTOCOL,
920 	EXPLODE_PROTOCOLTERMINATOR,
921 	EXPLODE_AUTHORITYORPATH,
922 	EXPLODE_AUTHORITY,
923 	EXPLODE_PATH,
924 	EXPLODE_REQUEST, // query
925 	EXPLODE_FRAGMENT,
926 	EXPLODE_COMPLETE
927 };
928 
929 
930 typedef bool (*explode_char_match_fn)(char c);
931 
932 
933 static bool
934 explode_is_protocol_char(char c)
935 {
936 	return isalnum(c) || c == '+' || c == '.' || c == '-';
937 }
938 
939 
940 static bool
941 explode_is_authority_char(char c)
942 {
943 	return !(c == '/' || c == '?' || c == '#');
944 }
945 
946 
947 static bool
948 explode_is_path_char(char c)
949 {
950 	return !(c == '#' || c == '?');
951 }
952 
953 
954 static bool
955 explode_is_request_char(char c)
956 {
957 	return c != '#';
958 }
959 
960 
961 static int32
962 char_offset_until_fn_false(const char* url, int32 len, int32 offset,
963 	explode_char_match_fn fn)
964 {
965 	while (offset < len && fn(url[offset]))
966 		offset++;
967 
968 	return offset;
969 }
970 
971 /*
972  * This function takes a URL in string-form and parses the components of the URL out.
973  */
974 status_t
975 BUrl::_ExplodeUrlString(const BString& url)
976 {
977 	_ResetFields();
978 
979 	// RFC3986, Appendix C; the URL should not contain whitespace or delimiters
980 	// by this point.
981 
982 	if (_ContainsDelimiter(url))
983 		return B_BAD_VALUE;
984 
985 	explode_url_parse_state state = EXPLODE_PROTOCOL;
986 	int32 offset = 0;
987 	int32 length = url.Length();
988 	const char *url_c = url.String();
989 
990 	// The regexp is provided in RFC3986 (URI generic syntax), Appendix B
991 	// ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?
992 	// The ensuing logic attempts to simulate the behaviour of extracting the groups
993 	// from the string without requiring a group-capable regex engine.
994 
995 	while (offset < length) {
996 		switch (state) {
997 
998 			case EXPLODE_PROTOCOL:
999 			{
1000 				int32 end_protocol = char_offset_until_fn_false(url_c, length,
1001 					offset, explode_is_protocol_char);
1002 
1003 				if (end_protocol < length) {
1004 					SetProtocol(BString(&url_c[offset], end_protocol - offset));
1005 					state = EXPLODE_PROTOCOLTERMINATOR;
1006 					offset = end_protocol;
1007 				} else {
1008 					// No protocol was found, try parsing from the string
1009 					// start, beginning with authority or path
1010 					SetProtocol("");
1011 					offset = 0;
1012 					state = EXPLODE_AUTHORITYORPATH;
1013 				}
1014 				break;
1015 			}
1016 
1017 			case EXPLODE_PROTOCOLTERMINATOR:
1018 			{
1019 				if (url[offset] == ':') {
1020 					offset++;
1021 				} else {
1022 					// No protocol was found, try parsing from the string
1023 					// start, beginning with authority or path
1024 					SetProtocol("");
1025 					offset = 0;
1026 				}
1027 				state = EXPLODE_AUTHORITYORPATH;
1028 				break;
1029 			}
1030 
1031 			case EXPLODE_AUTHORITYORPATH:
1032 			{
1033 				// The authority must start with //. If it isn't there, skip
1034 				// to parsing the path.
1035 				if (strncmp(&url_c[offset], "//", 2) == 0) {
1036 					state = EXPLODE_AUTHORITY;
1037 					offset += 2;
1038 				} else {
1039 					state = EXPLODE_PATH;
1040 				}
1041 				break;
1042 			}
1043 
1044 			case EXPLODE_AUTHORITY:
1045 			{
1046 				int end_authority = char_offset_until_fn_false(url_c, length,
1047 					offset, explode_is_authority_char);
1048 				SetAuthority(BString(&url_c[offset], end_authority - offset));
1049 				state = EXPLODE_PATH;
1050 				offset = end_authority;
1051 				break;
1052 			}
1053 
1054 			case EXPLODE_PATH:
1055 			{
1056 				int end_path = char_offset_until_fn_false(url_c, length, offset,
1057 					explode_is_path_char);
1058 				SetPath(BString(&url_c[offset], end_path - offset));
1059 				state = EXPLODE_REQUEST;
1060 				offset = end_path;
1061 				break;
1062 			}
1063 
1064 			case EXPLODE_REQUEST: // query
1065 			{
1066 				if (url_c[offset] == '?') {
1067 					offset++;
1068 					int end_request = char_offset_until_fn_false(url_c, length,
1069 						offset, explode_is_request_char);
1070 					SetRequest(BString(&url_c[offset], end_request - offset));
1071 					offset = end_request;
1072 				}
1073 				state = EXPLODE_FRAGMENT;
1074 				break;
1075 			}
1076 
1077 			case EXPLODE_FRAGMENT:
1078 			{
1079 				if (url_c[offset] == '#') {
1080 					offset++;
1081 					SetFragment(BString(&url_c[offset], length - offset));
1082 					offset = length;
1083 				}
1084 				state = EXPLODE_COMPLETE;
1085 				break;
1086 			}
1087 
1088 			case EXPLODE_COMPLETE:
1089 				// should never be reached - keeps the compiler happy
1090 				break;
1091 
1092 		}
1093 	}
1094 
1095 	return B_OK;
1096 }
1097 
1098 
1099 BString
1100 BUrl::_MergePath(const BString& relative) const
1101 {
1102 	// This implements RFC3986, Section 5.2.3.
1103 	if (HasAuthority() && fPath == "")
1104 	{
1105 		BString result("/");
1106 		result << relative;
1107 		return result;
1108 	}
1109 
1110 	BString result(fPath);
1111 	result.Truncate(result.FindLast("/") + 1);
1112 	result << relative;
1113 
1114 	return result;
1115 }
1116 
1117 
1118 // This sets the path without normalizing it. If fed with a path that has . or
1119 // .. segments, this would make the URL invalid.
1120 void
1121 BUrl::_SetPathUnsafe(const BString& path)
1122 {
1123 	fPath = path;
1124 	fHasPath = true; // RFC says an empty path is still a path
1125 	fUrlStringValid = false;
1126 }
1127 
1128 
1129 enum authority_parse_state {
1130 	AUTHORITY_USERNAME,
1131 	AUTHORITY_PASSWORD,
1132 	AUTHORITY_HOST,
1133 	AUTHORITY_PORT,
1134 	AUTHORITY_COMPLETE
1135 };
1136 
1137 
1138 static bool
1139 authority_is_username_char(char c)
1140 {
1141 	return !(c == ':' || c == '@');
1142 }
1143 
1144 
1145 static bool
1146 authority_is_password_char(char c)
1147 {
1148 	return !(c == '@');
1149 }
1150 
1151 
1152 static bool
1153 authority_is_ipv6_host_char(char c) {
1154 	return (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')
1155 		|| (c >= '0' && c <= '9') || c == ':';
1156 }
1157 
1158 
1159 static bool
1160 authority_is_host_char(char c) {
1161 	return !(c == ':' || c == '/');
1162 }
1163 
1164 
1165 static bool
1166 authority_is_port_char(char c) {
1167 	return c >= '0' && c <= '9';
1168 }
1169 
1170 
1171 void
1172 BUrl::SetAuthority(const BString& authority)
1173 {
1174 	fAuthority = authority;
1175 
1176 	fUser.Truncate(0);
1177 	fPassword.Truncate(0);
1178 	fHost.Truncate(0);
1179 	fPort = 0;
1180 	fHasPort = false;
1181 	fHasUserName = false;
1182 	fHasPassword = false;
1183 
1184 	bool hasUsernamePassword = B_ERROR != fAuthority.FindFirst('@');
1185 	authority_parse_state state = AUTHORITY_USERNAME;
1186 	int32 offset = 0;
1187 	int32 length = authority.Length();
1188 	const char *authority_c = authority.String();
1189 
1190 	while (AUTHORITY_COMPLETE != state && offset < length) {
1191 
1192 		switch (state) {
1193 
1194 			case AUTHORITY_USERNAME:
1195 			{
1196 				if (hasUsernamePassword) {
1197 					int32 end_username = char_offset_until_fn_false(
1198 						authority_c, length, offset,
1199 						authority_is_username_char);
1200 
1201 					SetUserName(BString(&authority_c[offset],
1202 						end_username - offset));
1203 
1204 					state = AUTHORITY_PASSWORD;
1205 					offset = end_username;
1206 				} else {
1207 					state = AUTHORITY_HOST;
1208 				}
1209 				break;
1210 			}
1211 
1212 			case AUTHORITY_PASSWORD:
1213 			{
1214 				if (hasUsernamePassword && ':' == authority[offset]) {
1215 					offset++; // move past the delimiter
1216 					int32 end_password = char_offset_until_fn_false(
1217 						authority_c, length, offset,
1218 						authority_is_password_char);
1219 
1220 					SetPassword(BString(&authority_c[offset],
1221 						end_password - offset));
1222 
1223 					offset = end_password;
1224 				}
1225 
1226 				// if the host was preceded by a username + password couple
1227 				// then there will be an '@' delimiter to avoid.
1228 
1229 				if (authority_c[offset] == '@') {
1230 					offset++;
1231 				}
1232 
1233 				state = AUTHORITY_HOST;
1234 				break;
1235 			}
1236 
1237 			case AUTHORITY_HOST:
1238 			{
1239 
1240 				// the host may be enclosed within brackets in order to express
1241 				// an IPV6 address.
1242 
1243 				if (authority_c[offset] == '[') {
1244 					int32 end_ipv6_host = char_offset_until_fn_false(
1245 						authority_c, length, offset + 1,
1246 						authority_is_ipv6_host_char);
1247 
1248 					if (authority_c[end_ipv6_host] == ']') {
1249 						SetHost(BString(&authority_c[offset],
1250 							(end_ipv6_host - offset) + 1));
1251 						state = AUTHORITY_PORT;
1252 						offset = end_ipv6_host + 1;
1253 					}
1254 				}
1255 
1256 				// if an IPV6 host was not found.
1257 
1258 				if (AUTHORITY_HOST == state) {
1259 					int32 end_host = char_offset_until_fn_false(
1260 						authority_c, length, offset, authority_is_host_char);
1261 
1262 					SetHost(BString(&authority_c[offset], end_host - offset));
1263 					state = AUTHORITY_PORT;
1264 					offset = end_host;
1265 				}
1266 
1267 				break;
1268 			}
1269 
1270 			case AUTHORITY_PORT:
1271 			{
1272 				if (authority_c[offset] == ':') {
1273 					offset++;
1274 					int32 end_port = char_offset_until_fn_false(
1275 						authority_c, length, offset, authority_is_port_char);
1276 					SetPort(atoi(&authority_c[offset]));
1277 					offset = end_port;
1278 				}
1279 
1280 				state = AUTHORITY_COMPLETE;
1281 
1282 				break;
1283 			}
1284 
1285 			case AUTHORITY_COMPLETE:
1286 				// should never be reached - keeps the compiler happy
1287 				break;
1288 		}
1289 	}
1290 
1291 	// An empty authority is still an authority, making it possible to have
1292 	// URLs such as file:///path/to/file.
1293 	// TODO however, there is no way to unset the authority once it is set...
1294 	// We may want to take a const char* parameter and allow NULL.
1295 	fHasHost = true;
1296 }
1297 
1298 
1299 /*static*/ BString
1300 BUrl::_DoUrlEncodeChunk(const BString& chunk, bool strict, bool directory)
1301 {
1302 	BString result;
1303 
1304 	for (int32 i = 0; i < chunk.Length(); i++) {
1305 		if (_IsUnreserved(chunk[i])
1306 				|| (directory && (chunk[i] == '/' || chunk[i] == '\\'))) {
1307 			result << chunk[i];
1308 		} else {
1309 			if (chunk[i] == ' ' && !strict) {
1310 				result << '+';
1311 					// In non-strict mode, spaces are encoded by a plus sign
1312 			} else {
1313 				char hexString[5];
1314 				snprintf(hexString, 5, "%X", chunk[i]);
1315 
1316 				result << '%' << hexString;
1317 			}
1318 		}
1319 	}
1320 
1321 	return result;
1322 }
1323 
1324 
1325 /*static*/ BString
1326 BUrl::_DoUrlDecodeChunk(const BString& chunk, bool strict)
1327 {
1328 	BString result;
1329 
1330 	for (int32 i = 0; i < chunk.Length(); i++) {
1331 		if (chunk[i] == '+' && !strict)
1332 			result << ' ';
1333 		else {
1334 			char decoded = 0;
1335 			char* out = NULL;
1336 			char hexString[3];
1337 
1338 			if (chunk[i] == '%' && i < chunk.Length() - 2
1339 				&& isxdigit(chunk[i + 1]) && isxdigit(chunk[i+2])) {
1340 				hexString[0] = chunk[i + 1];
1341 				hexString[1] = chunk[i + 2];
1342 				hexString[2] = 0;
1343 				decoded = (char)strtol(hexString, &out, 16);
1344 			}
1345 
1346 			if (out == hexString + 2) {
1347 				i += 2;
1348 				result << decoded;
1349 			} else
1350 				result << chunk[i];
1351 		}
1352 	}
1353 	return result;
1354 }
1355 
1356 
1357 bool
1358 BUrl::_IsProtocolValid()
1359 {
1360 	for (int8 index = 0; index < fProtocol.Length(); index++) {
1361 		char c = fProtocol[index];
1362 
1363 		if (index == 0 && !isalpha(c))
1364 			return false;
1365 		else if (!isalnum(c) && c != '+' && c != '-' && c != '.')
1366 			return false;
1367 	}
1368 
1369 	return fProtocol.Length() > 0;
1370 }
1371 
1372 
1373 bool
1374 BUrl::_IsUnreserved(char c)
1375 {
1376 	return isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~';
1377 }
1378 
1379 
1380 bool
1381 BUrl::_IsGenDelim(char c)
1382 {
1383 	return c == ':' || c == '/' || c == '?' || c == '#' || c == '['
1384 		|| c == ']' || c == '@';
1385 }
1386 
1387 
1388 bool
1389 BUrl::_IsSubDelim(char c)
1390 {
1391 	return c == '!' || c == '$' || c == '&' || c == '\'' || c == '('
1392 		|| c == ')' || c == '*' || c == '+' || c == ',' || c == ';'
1393 		|| c == '=';
1394 }
1395 
1396 
1397 BString
1398 BUrl::_UrlMimeType() const
1399 {
1400 	BString mime;
1401 	mime << "application/x-vnd.Be.URL." << fProtocol;
1402 
1403 	return BString(mime);
1404 }
1405