xref: /haiku/src/kits/support/Url.cpp (revision efafab643ce980e3f3c916795ed302599f6b4f66)
1 /*
2  * Copyright 2010-2018 Haiku Inc. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Christophe Huriaux, c.huriaux@gmail.com
7  *		Andrew Lindesay, apl@lindesay.co.nz
8  */
9 
10 
11 #include <Url.h>
12 
13 #include <ctype.h>
14 #include <cstdio>
15 #include <cstdlib>
16 #include <new>
17 
18 #include <MimeType.h>
19 #include <Roster.h>
20 
21 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
22 	#include <ICUWrapper.h>
23 #endif
24 
25 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
26 	#include <unicode/idna.h>
27 	#include <unicode/stringpiece.h>
28 #endif
29 
30 
31 static const char* kArchivedUrl = "be:url string";
32 
33 /*! These flags can be combined to control the parse process. */
34 
35 const uint32 PARSE_NO_MASK_BIT				= 0x00000000;
36 const uint32 PARSE_RAW_PATH_MASK_BIT		= 0x00000001;
37 
38 
39 BUrl::BUrl(const char* url)
40 	:
41 	fUrlString(),
42 	fProtocol(),
43 	fUser(),
44 	fPassword(),
45 	fHost(),
46 	fPort(0),
47 	fPath(),
48 	fRequest(),
49 	fHasHost(false),
50 	fHasFragment(false)
51 {
52 	SetUrlString(url);
53 }
54 
55 
56 BUrl::BUrl(BMessage* archive)
57 	:
58 	fUrlString(),
59 	fProtocol(),
60 	fUser(),
61 	fPassword(),
62 	fHost(),
63 	fPort(0),
64 	fPath(),
65 	fRequest(),
66 	fHasHost(false),
67 	fHasFragment(false)
68 {
69 	BString url;
70 
71 	if (archive->FindString(kArchivedUrl, &url) == B_OK)
72 		SetUrlString(url);
73 	else
74 		_ResetFields();
75 }
76 
77 
78 BUrl::BUrl(const BUrl& other)
79 	:
80 	BArchivable(),
81 	fUrlString(),
82 	fProtocol(other.fProtocol),
83 	fUser(other.fUser),
84 	fPassword(other.fPassword),
85 	fHost(other.fHost),
86 	fPort(other.fPort),
87 	fPath(other.fPath),
88 	fRequest(other.fRequest),
89 	fFragment(other.fFragment),
90 	fUrlStringValid(other.fUrlStringValid),
91 	fAuthorityValid(other.fAuthorityValid),
92 	fUserInfoValid(other.fUserInfoValid),
93 	fHasProtocol(other.fHasProtocol),
94 	fHasUserName(other.fHasUserName),
95 	fHasPassword(other.fHasPassword),
96 	fHasHost(other.fHasHost),
97 	fHasPort(other.fHasPort),
98 	fHasPath(other.fHasPath),
99 	fHasRequest(other.fHasRequest),
100 	fHasFragment(other.fHasFragment)
101 {
102 	if (fUrlStringValid)
103 		fUrlString = other.fUrlString;
104 
105 	if (fAuthorityValid)
106 		fAuthority = other.fAuthority;
107 
108 	if (fUserInfoValid)
109 		fUserInfo = other.fUserInfo;
110 
111 }
112 
113 
114 BUrl::BUrl(const BUrl& base, const BString& location)
115 	:
116 	fUrlString(),
117 	fProtocol(),
118 	fUser(),
119 	fPassword(),
120 	fHost(),
121 	fPort(0),
122 	fPath(),
123 	fRequest(),
124 	fAuthorityValid(false),
125 	fUserInfoValid(false),
126 	fHasUserName(false),
127 	fHasPassword(false),
128 	fHasHost(false),
129 	fHasPort(false),
130 	fHasFragment(false)
131 {
132 	// This implements the algorithm in RFC3986, Section 5.2.
133 
134 	BUrl relative;
135 	relative._ExplodeUrlString(location, PARSE_RAW_PATH_MASK_BIT);
136 		// This parse will leave the path 'raw' so that it still carries any
137 		// special sequences such as '..' and '.' in it.  This way it can be
138 		// later combined with the base.
139 
140 	if (relative.HasProtocol()) {
141 		SetProtocol(relative.Protocol());
142 		if (relative.HasAuthority())
143 			SetAuthority(relative.Authority());
144 		SetPath(relative.Path());
145 		SetRequest(relative.Request());
146 	} else {
147 		if (relative.HasAuthority()) {
148 			SetAuthority(relative.Authority());
149 			SetPath(relative.Path());
150 			SetRequest(relative.Request());
151 		} else {
152 			if (relative.Path().IsEmpty()) {
153 				_SetPathUnsafe(base.Path());
154 				if (relative.HasRequest())
155 					SetRequest(relative.Request());
156 				else
157 					SetRequest(base.Request());
158 			} else {
159 				if (relative.Path()[0] == '/')
160 					SetPath(relative.Path());
161 				else {
162 					BString path = base._MergePath(relative.Path());
163 					SetPath(path);
164 				}
165 				SetRequest(relative.Request());
166 			}
167 
168 			if (base.HasAuthority())
169 				SetAuthority(base.Authority());
170 		}
171 		SetProtocol(base.Protocol());
172 	}
173 
174 	if (relative.HasFragment())
175 		SetFragment(relative.Fragment());
176 }
177 
178 
179 BUrl::BUrl()
180 	:
181 	fUrlString(),
182 	fProtocol(),
183 	fUser(),
184 	fPassword(),
185 	fHost(),
186 	fPort(0),
187 	fPath(),
188 	fRequest(),
189 	fHasHost(false),
190 	fHasFragment(false)
191 {
192 	_ResetFields();
193 }
194 
195 
196 BUrl::BUrl(const BPath& path)
197 	:
198 	fUrlString(),
199 	fProtocol(),
200 	fUser(),
201 	fPassword(),
202 	fHost(),
203 	fPort(0),
204 	fPath(),
205 	fRequest(),
206 	fHasHost(false),
207 	fHasFragment(false)
208 {
209 	SetUrlString(UrlEncode(path.Path(), true, true));
210 	SetProtocol("file");
211 }
212 
213 
214 BUrl::~BUrl()
215 {
216 }
217 
218 
219 // #pragma mark URL fields modifiers
220 
221 
222 BUrl&
223 BUrl::SetUrlString(const BString& url)
224 {
225 	_ExplodeUrlString(url, PARSE_NO_MASK_BIT);
226 	return *this;
227 }
228 
229 
230 BUrl&
231 BUrl::SetProtocol(const BString& protocol)
232 {
233 	fProtocol = protocol;
234 	fHasProtocol = !fProtocol.IsEmpty();
235 	fUrlStringValid = false;
236 	return *this;
237 }
238 
239 
240 BUrl&
241 BUrl::SetUserName(const BString& user)
242 {
243 	fUser = user;
244 	fHasUserName = !fUser.IsEmpty();
245 	fUrlStringValid = false;
246 	fAuthorityValid = false;
247 	fUserInfoValid = false;
248 	return *this;
249 }
250 
251 
252 BUrl&
253 BUrl::SetPassword(const BString& password)
254 {
255 	fPassword = password;
256 	fHasPassword = !fPassword.IsEmpty();
257 	fUrlStringValid = false;
258 	fAuthorityValid = false;
259 	fUserInfoValid = false;
260 	return *this;
261 }
262 
263 
264 BUrl&
265 BUrl::SetHost(const BString& host)
266 {
267 	fHost = host;
268 	fHasHost = !fHost.IsEmpty();
269 	fUrlStringValid = false;
270 	fAuthorityValid = false;
271 	return *this;
272 }
273 
274 
275 BUrl&
276 BUrl::SetPort(int port)
277 {
278 	fPort = port;
279 	fHasPort = (port != 0);
280 	fUrlStringValid = false;
281 	fAuthorityValid = false;
282 	return *this;
283 }
284 
285 
286 void
287 BUrl::_RemoveLastPathComponent(BString& path)
288 {
289 	int32 outputLastSlashIdx = path.FindLast('/');
290 
291 	if (outputLastSlashIdx == B_ERROR)
292 		path.Truncate(0);
293 	else
294 		path.Truncate(outputLastSlashIdx);
295 }
296 
297 
298 BUrl&
299 BUrl::SetPath(const BString& path)
300 {
301 	// Implements RFC3986 section 5.2.4, "Remove dot segments"
302 
303 	// 1.
304 	BString output;
305 	BString input(path);
306 
307 	// 2.
308 	while (!input.IsEmpty()) {
309 		// 2.A.
310 		if (input.StartsWith("./")) {
311 			input.Remove(0, 2);
312 			continue;
313 		}
314 
315 		if (input.StartsWith("../")) {
316 			input.Remove(0, 3);
317 			continue;
318 		}
319 
320 		// 2.B.
321 		if (input.StartsWith("/./")) {
322 			input.Remove(0, 2);
323 			continue;
324 		}
325 
326 		if (input == "/.") {
327 			input.Remove(1, 1);
328 			continue;
329 		}
330 
331 		// 2.C.
332 		if (input.StartsWith("/../")) {
333 			input.Remove(0, 3);
334 			_RemoveLastPathComponent(output);
335 			continue;
336 		}
337 
338 		if (input == "/..") {
339 			input.Remove(1, 2);
340 			_RemoveLastPathComponent(output);
341 			continue;
342 		}
343 
344 		// 2.D.
345 		if (input == "." || input == "..") {
346 			break;
347 		}
348 
349 		if (input == "/.") {
350 			input.Remove(1, 1);
351 			continue;
352 		}
353 
354 		// 2.E.
355 		int slashpos = input.FindFirst('/', 1);
356 		if (slashpos > 0) {
357 			output.Append(input, slashpos);
358 			input.Remove(0, slashpos);
359 		} else {
360 			output.Append(input);
361 			break;
362 		}
363 	}
364 
365 	_SetPathUnsafe(output);
366 	return *this;
367 }
368 
369 
370 BUrl&
371 BUrl::SetRequest(const BString& request)
372 {
373 	fRequest = request;
374 	fHasRequest = !fRequest.IsEmpty();
375 	fUrlStringValid = false;
376 	return *this;
377 }
378 
379 
380 BUrl&
381 BUrl::SetFragment(const BString& fragment)
382 {
383 	fFragment = fragment;
384 	fHasFragment = true;
385 	fUrlStringValid = false;
386 	return *this;
387 }
388 
389 
390 // #pragma mark URL fields access
391 
392 
393 const BString&
394 BUrl::UrlString() const
395 {
396 	if (!fUrlStringValid) {
397 		fUrlString.Truncate(0);
398 
399 		if (HasProtocol()) {
400 			fUrlString << fProtocol << ':';
401 		}
402 
403 		if (HasAuthority()) {
404 			fUrlString << "//";
405 			fUrlString << Authority();
406 		}
407 		fUrlString << Path();
408 
409 		if (HasRequest())
410 			fUrlString << '?' << fRequest;
411 
412 		if (HasFragment())
413 			fUrlString << '#' << fFragment;
414 
415 		fUrlStringValid = true;
416 	}
417 
418 	return fUrlString;
419 }
420 
421 
422 const BString&
423 BUrl::Protocol() const
424 {
425 	return fProtocol;
426 }
427 
428 
429 const BString&
430 BUrl::UserName() const
431 {
432 	return fUser;
433 }
434 
435 
436 const BString&
437 BUrl::Password() const
438 {
439 	return fPassword;
440 }
441 
442 
443 const BString&
444 BUrl::UserInfo() const
445 {
446 	if (!fUserInfoValid) {
447 		fUserInfo = fUser;
448 
449 		if (HasPassword())
450 			fUserInfo << ':' << fPassword;
451 
452 		fUserInfoValid = true;
453 	}
454 
455 	return fUserInfo;
456 }
457 
458 
459 const BString&
460 BUrl::Host() const
461 {
462 	return fHost;
463 }
464 
465 
466 int
467 BUrl::Port() const
468 {
469 	return fPort;
470 }
471 
472 
473 const BString&
474 BUrl::Authority() const
475 {
476 	if (!fAuthorityValid) {
477 		fAuthority.Truncate(0);
478 
479 		if (HasUserInfo())
480 			fAuthority << UserInfo() << '@';
481 		fAuthority << Host();
482 
483 		if (HasPort())
484 			fAuthority << ':' << fPort;
485 
486 		fAuthorityValid = true;
487 	}
488 	return fAuthority;
489 }
490 
491 
492 const BString&
493 BUrl::Path() const
494 {
495 	return fPath;
496 }
497 
498 
499 const BString&
500 BUrl::Request() const
501 {
502 	return fRequest;
503 }
504 
505 
506 const BString&
507 BUrl::Fragment() const
508 {
509 	return fFragment;
510 }
511 
512 
513 // #pragma mark URL fields tests
514 
515 
516 bool
517 BUrl::IsValid() const
518 {
519 	if (!fHasProtocol)
520 		return false;
521 
522 	if (!_IsProtocolValid())
523 		return false;
524 
525 	// it is possible that there can be an authority but no host.
526 	// wierd://tea:tree@/x
527 	if (HasHost() && !(fHost.IsEmpty() && HasAuthority()) && !_IsHostValid())
528 		return false;
529 
530 	if (fProtocol == "http" || fProtocol == "https" || fProtocol == "ftp"
531 		|| fProtocol == "ipp" || fProtocol == "afp" || fProtocol == "telnet"
532 		|| fProtocol == "gopher" || fProtocol == "nntp" || fProtocol == "sftp"
533 		|| fProtocol == "finger" || fProtocol == "pop" || fProtocol == "imap") {
534 		return HasHost() && !fHost.IsEmpty();
535 	}
536 
537 	if (fProtocol == "file")
538 		return fHasPath;
539 
540 	return true;
541 }
542 
543 
544 bool
545 BUrl::HasProtocol() const
546 {
547 	return fHasProtocol;
548 }
549 
550 
551 bool
552 BUrl::HasAuthority() const
553 {
554 	return fHasHost || fHasUserName;
555 }
556 
557 
558 bool
559 BUrl::HasUserName() const
560 {
561 	return fHasUserName;
562 }
563 
564 
565 bool
566 BUrl::HasPassword() const
567 {
568 	return fHasPassword;
569 }
570 
571 
572 bool
573 BUrl::HasUserInfo() const
574 {
575 	return fHasUserName || fHasPassword;
576 }
577 
578 
579 bool
580 BUrl::HasHost() const
581 {
582 	return fHasHost;
583 }
584 
585 
586 bool
587 BUrl::HasPort() const
588 {
589 	return fHasPort;
590 }
591 
592 
593 bool
594 BUrl::HasPath() const
595 {
596 	return fHasPath;
597 }
598 
599 
600 bool
601 BUrl::HasRequest() const
602 {
603 	return fHasRequest;
604 }
605 
606 
607 bool
608 BUrl::HasFragment() const
609 {
610 	return fHasFragment;
611 }
612 
613 
614 // #pragma mark URL encoding/decoding of needed fields
615 
616 
617 void
618 BUrl::UrlEncode(bool strict)
619 {
620 	fUser = _DoUrlEncodeChunk(fUser, strict);
621 	fPassword = _DoUrlEncodeChunk(fPassword, strict);
622 	fHost = _DoUrlEncodeChunk(fHost, strict);
623 	fFragment = _DoUrlEncodeChunk(fFragment, strict);
624 	fPath = _DoUrlEncodeChunk(fPath, strict, true);
625 }
626 
627 
628 void
629 BUrl::UrlDecode(bool strict)
630 {
631 	fUser = _DoUrlDecodeChunk(fUser, strict);
632 	fPassword = _DoUrlDecodeChunk(fPassword, strict);
633 	fHost = _DoUrlDecodeChunk(fHost, strict);
634 	fFragment = _DoUrlDecodeChunk(fFragment, strict);
635 	fPath = _DoUrlDecodeChunk(fPath, strict);
636 }
637 
638 
639 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
640 status_t
641 BUrl::IDNAToAscii()
642 {
643 	UErrorCode err = U_ZERO_ERROR;
644 	icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err);
645 	icu::IDNAInfo info;
646 
647 	BString result;
648 	BStringByteSink sink(&result);
649 	converter->nameToASCII_UTF8(icu::StringPiece(fHost.String()), sink, info,
650 		err);
651 
652 	delete converter;
653 
654 	if (U_FAILURE(err))
655 		return B_ERROR;
656 
657 	fHost = result;
658 	return B_OK;
659 }
660 #endif
661 
662 
663 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
664 status_t
665 BUrl::IDNAToUnicode()
666 {
667 	UErrorCode err = U_ZERO_ERROR;
668 	icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err);
669 	icu::IDNAInfo info;
670 
671 	BString result;
672 	BStringByteSink sink(&result);
673 	converter->nameToUnicodeUTF8(icu::StringPiece(fHost.String()), sink, info,
674 		err);
675 
676 	delete converter;
677 
678 	if (U_FAILURE(err))
679 		return B_ERROR;
680 
681 	fHost = result;
682 	return B_OK;
683 }
684 #endif
685 
686 
687 // #pragma mark - utility functionality
688 
689 
690 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
691 bool
692 BUrl::HasPreferredApplication() const
693 {
694 	BString appSignature = PreferredApplication();
695 	BMimeType mime(appSignature.String());
696 
697 	if (appSignature.IFindFirst("application/") == 0
698 		&& mime.IsValid())
699 		return true;
700 
701 	return false;
702 }
703 #endif
704 
705 
706 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
707 BString
708 BUrl::PreferredApplication() const
709 {
710 	BString appSignature;
711 	BMimeType mime(_UrlMimeType().String());
712 	mime.GetPreferredApp(appSignature.LockBuffer(B_MIME_TYPE_LENGTH));
713 	appSignature.UnlockBuffer();
714 
715 	return BString(appSignature);
716 }
717 #endif
718 
719 
720 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
721 status_t
722 BUrl::OpenWithPreferredApplication(bool onProblemAskUser) const
723 {
724 	if (!IsValid())
725 		return B_BAD_VALUE;
726 
727 	BString urlString = UrlString();
728 	if (urlString.Length() > B_PATH_NAME_LENGTH) {
729 		// TODO: BAlert
730 		//	if (onProblemAskUser)
731 		//		BAlert ... Too long URL!
732 #if DEBUG
733 		fprintf(stderr, "URL too long");
734 #endif
735 		return B_NAME_TOO_LONG;
736 	}
737 
738 	char* argv[] = {
739 		const_cast<char*>("BUrlInvokedApplication"),
740 		const_cast<char*>(urlString.String()),
741 		NULL
742 	};
743 
744 #if DEBUG
745 	if (HasPreferredApplication())
746 		printf("HasPreferredApplication() == true\n");
747 	else
748 		printf("HasPreferredApplication() == false\n");
749 #endif
750 
751 	status_t status = be_roster->Launch(_UrlMimeType().String(), 1, argv+1);
752 	if (status != B_OK) {
753 #if DEBUG
754 		fprintf(stderr, "Opening URL failed: %s\n", strerror(status));
755 #endif
756 	}
757 
758 	return status;
759 }
760 #endif
761 
762 
763 // #pragma mark Url encoding/decoding of string
764 
765 
766 /*static*/ BString
767 BUrl::UrlEncode(const BString& url, bool strict, bool directory)
768 {
769 	return _DoUrlEncodeChunk(url, strict, directory);
770 }
771 
772 
773 /*static*/ BString
774 BUrl::UrlDecode(const BString& url, bool strict)
775 {
776 	return _DoUrlDecodeChunk(url, strict);
777 }
778 
779 
780 // #pragma mark BArchivable members
781 
782 
783 status_t
784 BUrl::Archive(BMessage* into, bool deep) const
785 {
786 	status_t ret = BArchivable::Archive(into, deep);
787 
788 	if (ret == B_OK)
789 		ret = into->AddString(kArchivedUrl, UrlString());
790 
791 	return ret;
792 }
793 
794 
795 /*static*/ BArchivable*
796 BUrl::Instantiate(BMessage* archive)
797 {
798 	if (validate_instantiation(archive, "BUrl"))
799 		return new(std::nothrow) BUrl(archive);
800 	return NULL;
801 }
802 
803 
804 // #pragma mark URL comparison
805 
806 
807 bool
808 BUrl::operator==(BUrl& other) const
809 {
810 	UrlString();
811 	other.UrlString();
812 
813 	return fUrlString == other.fUrlString;
814 }
815 
816 
817 bool
818 BUrl::operator!=(BUrl& other) const
819 {
820 	return !(*this == other);
821 }
822 
823 
824 // #pragma mark URL assignment
825 
826 
827 const BUrl&
828 BUrl::operator=(const BUrl& other)
829 {
830 	fUrlStringValid = other.fUrlStringValid;
831 	if (fUrlStringValid)
832 		fUrlString = other.fUrlString;
833 
834 	fAuthorityValid = other.fAuthorityValid;
835 	if (fAuthorityValid)
836 		fAuthority = other.fAuthority;
837 
838 	fUserInfoValid = other.fUserInfoValid;
839 	if (fUserInfoValid)
840 		fUserInfo = other.fUserInfo;
841 
842 	fProtocol = other.fProtocol;
843 	fUser = other.fUser;
844 	fPassword = other.fPassword;
845 	fHost = other.fHost;
846 	fPort = other.fPort;
847 	fPath = other.fPath;
848 	fRequest = other.fRequest;
849 	fFragment = other.fFragment;
850 
851 	fHasProtocol = other.fHasProtocol;
852 	fHasUserName = other.fHasUserName;
853 	fHasPassword = other.fHasPassword;
854 	fHasHost = other.fHasHost;
855 	fHasPort = other.fHasPort;
856 	fHasPath = other.fHasPath;
857 	fHasRequest = other.fHasRequest;
858 	fHasFragment = other.fHasFragment;
859 
860 	return *this;
861 }
862 
863 
864 const BUrl&
865 BUrl::operator=(const BString& string)
866 {
867 	SetUrlString(string);
868 	return *this;
869 }
870 
871 
872 const BUrl&
873 BUrl::operator=(const char* string)
874 {
875 	SetUrlString(string);
876 	return *this;
877 }
878 
879 
880 // #pragma mark URL to string conversion
881 
882 
883 BUrl::operator const char*() const
884 {
885 	return UrlString();
886 }
887 
888 
889 void
890 BUrl::_ResetFields()
891 {
892 	fHasProtocol = false;
893 	fHasUserName = false;
894 	fHasPassword = false;
895 	fHasHost = false;
896 	fHasPort = false;
897 	fHasPath = false;
898 	fHasRequest = false;
899 	fHasFragment = false;
900 
901 	fProtocol.Truncate(0);
902 	fUser.Truncate(0);
903 	fPassword.Truncate(0);
904 	fHost.Truncate(0);
905 	fPort = 0;
906 	fPath.Truncate(0);
907 	fRequest.Truncate(0);
908 	fFragment.Truncate(0);
909 
910 	// Force re-generation of these fields
911 	fUrlStringValid = false;
912 	fUserInfoValid = false;
913 	fAuthorityValid = false;
914 }
915 
916 
917 bool
918 BUrl::_ContainsDelimiter(const BString& url)
919 {
920 	int32 len = url.Length();
921 
922 	for (int32 i = 0; i < len; i++) {
923 		switch (url[i]) {
924 			case ' ':
925 			case '\n':
926 			case '\t':
927 			case '\r':
928 			case '<':
929 			case '>':
930 			case '"':
931 				return true;
932 		}
933 	}
934 
935 	return false;
936 }
937 
938 
939 enum explode_url_parse_state {
940 	EXPLODE_PROTOCOL,
941 	EXPLODE_PROTOCOLTERMINATOR,
942 	EXPLODE_AUTHORITYORPATH,
943 	EXPLODE_AUTHORITY,
944 	EXPLODE_PATH,
945 	EXPLODE_REQUEST, // query
946 	EXPLODE_FRAGMENT,
947 	EXPLODE_COMPLETE
948 };
949 
950 
951 typedef bool (*explode_char_match_fn)(char c);
952 
953 
954 static bool
955 explode_is_protocol_char(char c)
956 {
957 	return isalnum(c) || c == '+' || c == '.' || c == '-';
958 }
959 
960 
961 static bool
962 explode_is_authority_char(char c)
963 {
964 	return !(c == '/' || c == '?' || c == '#');
965 }
966 
967 
968 static bool
969 explode_is_path_char(char c)
970 {
971 	return !(c == '#' || c == '?');
972 }
973 
974 
975 static bool
976 explode_is_request_char(char c)
977 {
978 	return c != '#';
979 }
980 
981 
982 static int32
983 char_offset_until_fn_false(const char* url, int32 len, int32 offset,
984 	explode_char_match_fn fn)
985 {
986 	while (offset < len && fn(url[offset]))
987 		offset++;
988 
989 	return offset;
990 }
991 
992 /*
993  * This function takes a URL in string-form and parses the components of the URL out.
994  */
995 status_t
996 BUrl::_ExplodeUrlString(const BString& url, uint32 flags)
997 {
998 	_ResetFields();
999 
1000 	// RFC3986, Appendix C; the URL should not contain whitespace or delimiters
1001 	// by this point.
1002 
1003 	if (_ContainsDelimiter(url))
1004 		return B_BAD_VALUE;
1005 
1006 	explode_url_parse_state state = EXPLODE_PROTOCOL;
1007 	int32 offset = 0;
1008 	int32 length = url.Length();
1009 	bool forceHasHost = false;
1010 	const char *url_c = url.String();
1011 
1012 	// The regexp is provided in RFC3986 (URI generic syntax), Appendix B
1013 	// ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?
1014 	// The ensuing logic attempts to simulate the behaviour of extracting the groups
1015 	// from the string without requiring a group-capable regex engine.
1016 
1017 	while (offset < length) {
1018 		switch (state) {
1019 
1020 			case EXPLODE_PROTOCOL:
1021 			{
1022 				int32 end_protocol = char_offset_until_fn_false(url_c, length,
1023 					offset, explode_is_protocol_char);
1024 
1025 				if (end_protocol < length) {
1026 					SetProtocol(BString(&url_c[offset], end_protocol - offset));
1027 					state = EXPLODE_PROTOCOLTERMINATOR;
1028 					offset = end_protocol;
1029 				} else {
1030 					// No protocol was found, try parsing from the string
1031 					// start, beginning with authority or path
1032 					SetProtocol("");
1033 					offset = 0;
1034 					state = EXPLODE_AUTHORITYORPATH;
1035 				}
1036 				break;
1037 			}
1038 
1039 			case EXPLODE_PROTOCOLTERMINATOR:
1040 			{
1041 				if (url[offset] == ':') {
1042 					offset++;
1043 				} else {
1044 					// No protocol was found, try parsing from the string
1045 					// start, beginning with authority or path
1046 					SetProtocol("");
1047 					offset = 0;
1048 				}
1049 				state = EXPLODE_AUTHORITYORPATH;
1050 				break;
1051 			}
1052 
1053 			case EXPLODE_AUTHORITYORPATH:
1054 			{
1055 				// The authority must start with //. If it isn't there, skip
1056 				// to parsing the path.
1057 				if (strncmp(&url_c[offset], "//", 2) == 0) {
1058 					state = EXPLODE_AUTHORITY;
1059 					// if we see the // then this would imply that a host is
1060 					// to be rendered even if no host has been parsed.
1061 					forceHasHost = true;
1062 					offset += 2;
1063 				} else {
1064 					state = EXPLODE_PATH;
1065 				}
1066 				break;
1067 			}
1068 
1069 			case EXPLODE_AUTHORITY:
1070 			{
1071 				int end_authority = char_offset_until_fn_false(url_c, length,
1072 					offset, explode_is_authority_char);
1073 				SetAuthority(BString(&url_c[offset], end_authority - offset));
1074 				state = EXPLODE_PATH;
1075 				offset = end_authority;
1076 				break;
1077 			}
1078 
1079 			case EXPLODE_PATH:
1080 			{
1081 				int end_path = char_offset_until_fn_false(url_c, length, offset,
1082 					explode_is_path_char);
1083 				BString path(&url_c[offset], end_path - offset);
1084 
1085 				if ((flags & PARSE_RAW_PATH_MASK_BIT) == 0)
1086 					SetPath(path);
1087 				else
1088 					_SetPathUnsafe(path);
1089 				state = EXPLODE_REQUEST;
1090 				offset = end_path;
1091 				break;
1092 			}
1093 
1094 			case EXPLODE_REQUEST: // query
1095 			{
1096 				if (url_c[offset] == '?') {
1097 					offset++;
1098 					int end_request = char_offset_until_fn_false(url_c, length,
1099 						offset, explode_is_request_char);
1100 					SetRequest(BString(&url_c[offset], end_request - offset));
1101 					offset = end_request;
1102 					// if there is a "?" in the parse then it is clear that
1103 					// there is a 'request' / query present regardless if there
1104 					// are any valid key-value pairs.
1105 					fHasRequest = true;
1106 				}
1107 				state = EXPLODE_FRAGMENT;
1108 				break;
1109 			}
1110 
1111 			case EXPLODE_FRAGMENT:
1112 			{
1113 				if (url_c[offset] == '#') {
1114 					offset++;
1115 					SetFragment(BString(&url_c[offset], length - offset));
1116 					offset = length;
1117 				}
1118 				state = EXPLODE_COMPLETE;
1119 				break;
1120 			}
1121 
1122 			case EXPLODE_COMPLETE:
1123 				// should never be reached - keeps the compiler happy
1124 				break;
1125 
1126 		}
1127 	}
1128 
1129 	if (forceHasHost)
1130 		fHasHost = true;
1131 
1132 	return B_OK;
1133 }
1134 
1135 
1136 BString
1137 BUrl::_MergePath(const BString& relative) const
1138 {
1139 	// This implements RFC3986, Section 5.2.3.
1140 	if (HasAuthority() && fPath == "") {
1141 		BString result("/");
1142 		result << relative;
1143 		return result;
1144 	}
1145 
1146 	int32 lastSlashIndex = fPath.FindLast("/");
1147 
1148 	if (lastSlashIndex == B_ERROR)
1149 		return relative;
1150 
1151 	BString result;
1152 	result.SetTo(fPath, lastSlashIndex + 1);
1153 	result << relative;
1154 
1155 	return result;
1156 }
1157 
1158 
1159 // This sets the path without normalizing it. If fed with a path that has . or
1160 // .. segments, this would make the URL invalid.
1161 void
1162 BUrl::_SetPathUnsafe(const BString& path)
1163 {
1164 	fPath = path;
1165 	fHasPath = true; // RFC says an empty path is still a path
1166 	fUrlStringValid = false;
1167 }
1168 
1169 
1170 enum authority_parse_state {
1171 	AUTHORITY_USERNAME,
1172 	AUTHORITY_PASSWORD,
1173 	AUTHORITY_HOST,
1174 	AUTHORITY_PORT,
1175 	AUTHORITY_COMPLETE
1176 };
1177 
1178 void
1179 BUrl::SetAuthority(const BString& authority)
1180 {
1181 	fAuthority = authority;
1182 
1183 	fUser.Truncate(0);
1184 	fPassword.Truncate(0);
1185 	fHost.Truncate(0);
1186 	fPort = 0;
1187 	fHasPort = false;
1188 	fHasUserName = false;
1189 	fHasPassword = false;
1190 
1191 	bool hasUsernamePassword = B_ERROR != fAuthority.FindFirst('@');
1192 	authority_parse_state state = AUTHORITY_USERNAME;
1193 	int32 offset = 0;
1194 	int32 length = authority.Length();
1195 	const char *authority_c = authority.String();
1196 
1197 	while (AUTHORITY_COMPLETE != state && offset < length) {
1198 
1199 		switch (state) {
1200 
1201 			case AUTHORITY_USERNAME:
1202 			{
1203 				if (hasUsernamePassword) {
1204 					int32 end_username = char_offset_until_fn_false(
1205 						authority_c, length, offset, _IsUsernameChar);
1206 
1207 					SetUserName(BString(&authority_c[offset],
1208 						end_username - offset));
1209 
1210 					state = AUTHORITY_PASSWORD;
1211 					offset = end_username;
1212 				} else {
1213 					state = AUTHORITY_HOST;
1214 				}
1215 				break;
1216 			}
1217 
1218 			case AUTHORITY_PASSWORD:
1219 			{
1220 				if (hasUsernamePassword && ':' == authority[offset]) {
1221 					offset++; // move past the delimiter
1222 					int32 end_password = char_offset_until_fn_false(
1223 						authority_c, length, offset, _IsPasswordChar);
1224 
1225 					SetPassword(BString(&authority_c[offset],
1226 						end_password - offset));
1227 
1228 					offset = end_password;
1229 				}
1230 
1231 				// if the host was preceded by a username + password couple
1232 				// then there will be an '@' delimiter to avoid.
1233 
1234 				if (authority_c[offset] == '@') {
1235 					offset++;
1236 				}
1237 
1238 				state = AUTHORITY_HOST;
1239 				break;
1240 			}
1241 
1242 			case AUTHORITY_HOST:
1243 			{
1244 
1245 				// the host may be enclosed within brackets in order to express
1246 				// an IPV6 address.
1247 
1248 				if (authority_c[offset] == '[') {
1249 					int32 end_ipv6_host = char_offset_until_fn_false(
1250 						authority_c, length, offset + 1, _IsIPV6Char);
1251 
1252 					if (authority_c[end_ipv6_host] == ']') {
1253 						SetHost(BString(&authority_c[offset],
1254 							(end_ipv6_host - offset) + 1));
1255 						state = AUTHORITY_PORT;
1256 						offset = end_ipv6_host + 1;
1257 					}
1258 				}
1259 
1260 				// if an IPV6 host was not found.
1261 
1262 				if (AUTHORITY_HOST == state) {
1263 					int32 end_host = char_offset_until_fn_false(
1264 						authority_c, length, offset, _IsHostChar);
1265 
1266 					SetHost(BString(&authority_c[offset], end_host - offset));
1267 					state = AUTHORITY_PORT;
1268 					offset = end_host;
1269 				}
1270 
1271 				break;
1272 			}
1273 
1274 			case AUTHORITY_PORT:
1275 			{
1276 				if (authority_c[offset] == ':') {
1277 					offset++;
1278 					int32 end_port = char_offset_until_fn_false(
1279 						authority_c, length, offset, _IsPortChar);
1280 					SetPort(atoi(&authority_c[offset]));
1281 					offset = end_port;
1282 				}
1283 
1284 				state = AUTHORITY_COMPLETE;
1285 
1286 				break;
1287 			}
1288 
1289 			case AUTHORITY_COMPLETE:
1290 				// should never be reached - keeps the compiler happy
1291 				break;
1292 		}
1293 	}
1294 
1295 	// An empty authority is still an authority, making it possible to have
1296 	// URLs such as file:///path/to/file.
1297 	// TODO however, there is no way to unset the authority once it is set...
1298 	// We may want to take a const char* parameter and allow NULL.
1299 	fHasHost = true;
1300 }
1301 
1302 
1303 /*static*/ BString
1304 BUrl::_DoUrlEncodeChunk(const BString& chunk, bool strict, bool directory)
1305 {
1306 	BString result;
1307 
1308 	for (int32 i = 0; i < chunk.Length(); i++) {
1309 		if (_IsUnreserved(chunk[i])
1310 				|| (directory && (chunk[i] == '/' || chunk[i] == '\\'))) {
1311 			result << chunk[i];
1312 		} else {
1313 			if (chunk[i] == ' ' && !strict) {
1314 				result << '+';
1315 					// In non-strict mode, spaces are encoded by a plus sign
1316 			} else {
1317 				char hexString[5];
1318 				snprintf(hexString, 5, "%X", chunk[i]);
1319 
1320 				result << '%' << hexString;
1321 			}
1322 		}
1323 	}
1324 
1325 	return result;
1326 }
1327 
1328 
1329 /*static*/ BString
1330 BUrl::_DoUrlDecodeChunk(const BString& chunk, bool strict)
1331 {
1332 	BString result;
1333 
1334 	for (int32 i = 0; i < chunk.Length(); i++) {
1335 		if (chunk[i] == '+' && !strict)
1336 			result << ' ';
1337 		else {
1338 			char decoded = 0;
1339 			char* out = NULL;
1340 			char hexString[3];
1341 
1342 			if (chunk[i] == '%' && i < chunk.Length() - 2
1343 				&& isxdigit(chunk[i + 1]) && isxdigit(chunk[i+2])) {
1344 				hexString[0] = chunk[i + 1];
1345 				hexString[1] = chunk[i + 2];
1346 				hexString[2] = 0;
1347 				decoded = (char)strtol(hexString, &out, 16);
1348 			}
1349 
1350 			if (out == hexString + 2) {
1351 				i += 2;
1352 				result << decoded;
1353 			} else
1354 				result << chunk[i];
1355 		}
1356 	}
1357 	return result;
1358 }
1359 
1360 
1361 bool
1362 BUrl::_IsHostIPV6Valid(size_t offset, int32 length) const
1363 {
1364 	for (int32 i = 0; i < length; i++) {
1365 		char c = fHost[offset + i];
1366 		if (!_IsIPV6Char(c))
1367 			return false;
1368 	}
1369 
1370 	return length > 0;
1371 }
1372 
1373 
1374 bool
1375 BUrl::_IsHostValid() const
1376 {
1377 	if (fHost.StartsWith("[") && fHost.EndsWith("]"))
1378 		return _IsHostIPV6Valid(1, fHost.Length() - 2);
1379 
1380 	bool lastWasDot = false;
1381 
1382 	for (int32 i = 0; i < fHost.Length(); i++) {
1383 		char c = fHost[i];
1384 
1385 		if (c == '.') {
1386 			if (lastWasDot || i == 0)
1387 				return false;
1388 			lastWasDot = true;
1389 		} else {
1390 			lastWasDot = false;
1391 		}
1392 
1393 		if (!_IsHostChar(c) && c != '.') {
1394 			// the underscore is technically not allowed, but occurs sometimes
1395 			// in the wild.
1396 			return false;
1397 		}
1398 	}
1399 
1400 	return true;
1401 }
1402 
1403 
1404 bool
1405 BUrl::_IsProtocolValid() const
1406 {
1407 	for (int8 index = 0; index < fProtocol.Length(); index++) {
1408 		char c = fProtocol[index];
1409 
1410 		if (index == 0 && !isalpha(c))
1411 			return false;
1412 		else if (!isalnum(c) && c != '+' && c != '-' && c != '.')
1413 			return false;
1414 	}
1415 
1416 	return !fProtocol.IsEmpty();
1417 }
1418 
1419 
1420 bool
1421 BUrl::_IsUnreserved(char c)
1422 {
1423 	return isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~';
1424 }
1425 
1426 
1427 bool
1428 BUrl::_IsGenDelim(char c)
1429 {
1430 	return c == ':' || c == '/' || c == '?' || c == '#' || c == '['
1431 		|| c == ']' || c == '@';
1432 }
1433 
1434 
1435 bool
1436 BUrl::_IsSubDelim(char c)
1437 {
1438 	return c == '!' || c == '$' || c == '&' || c == '\'' || c == '('
1439 		|| c == ')' || c == '*' || c == '+' || c == ',' || c == ';'
1440 		|| c == '=';
1441 }
1442 
1443 
1444 bool
1445 BUrl::_IsUsernameChar(char c)
1446 {
1447 	return !(c == ':' || c == '@');
1448 }
1449 
1450 
1451 bool
1452 BUrl::_IsPasswordChar(char c)
1453 {
1454 	return !(c == '@');
1455 }
1456 
1457 
1458 bool
1459 BUrl::_IsHostChar(char c)
1460 {
1461 	return ((uint8) c) > 127 || isalnum(c) || c == '-' || c == '_' || c == '.'
1462 		|| c == '%';
1463 }
1464 
1465 
1466 bool
1467 BUrl::_IsPortChar(char c)
1468 {
1469 	return isdigit(c);
1470 }
1471 
1472 
1473 bool
1474 BUrl::_IsIPV6Char(char c)
1475 {
1476 	return c == ':' || isxdigit(c);
1477 }
1478 
1479 
1480 BString
1481 BUrl::_UrlMimeType() const
1482 {
1483 	BString mime;
1484 	mime << "application/x-vnd.Be.URL." << fProtocol;
1485 
1486 	return BString(mime);
1487 }
1488