xref: /haiku/src/kits/support/Url.cpp (revision 5d0fd0e4220b461e2021d5768ebaa936c13417f8)
1 /*
2  * Copyright 2010-2016 Haiku Inc. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  *
5  * Authors:
6  *		Christophe Huriaux, c.huriaux@gmail.com
7  *		Andrew Lindesay, apl@lindesay.co.nz
8  */
9 
10 
11 #include <Url.h>
12 
13 #include <ctype.h>
14 #include <cstdio>
15 #include <cstdlib>
16 #include <new>
17 
18 #include <MimeType.h>
19 #include <Roster.h>
20 
21 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
22 	#include <ICUWrapper.h>
23 #endif
24 
25 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
26 	#include <unicode/idna.h>
27 	#include <unicode/stringpiece.h>
28 #endif
29 
30 
31 static const char* kArchivedUrl = "be:url string";
32 
33 
34 BUrl::BUrl(const char* url)
35 	:
36 	fUrlString(),
37 	fProtocol(),
38 	fUser(),
39 	fPassword(),
40 	fHost(),
41 	fPort(0),
42 	fPath(),
43 	fRequest(),
44 	fHasHost(false),
45 	fHasFragment(false)
46 {
47 	SetUrlString(url);
48 }
49 
50 
51 BUrl::BUrl(BMessage* archive)
52 	:
53 	fUrlString(),
54 	fProtocol(),
55 	fUser(),
56 	fPassword(),
57 	fHost(),
58 	fPort(0),
59 	fPath(),
60 	fRequest(),
61 	fHasHost(false),
62 	fHasFragment(false)
63 {
64 	BString url;
65 
66 	if (archive->FindString(kArchivedUrl, &url) == B_OK)
67 		SetUrlString(url);
68 	else
69 		_ResetFields();
70 }
71 
72 
73 BUrl::BUrl(const BUrl& other)
74 	:
75 	BArchivable(),
76 	fUrlString(),
77 	fProtocol(other.fProtocol),
78 	fUser(other.fUser),
79 	fPassword(other.fPassword),
80 	fHost(other.fHost),
81 	fPort(other.fPort),
82 	fPath(other.fPath),
83 	fRequest(other.fRequest),
84 	fFragment(other.fFragment),
85 	fUrlStringValid(other.fUrlStringValid),
86 	fAuthorityValid(other.fAuthorityValid),
87 	fUserInfoValid(other.fUserInfoValid),
88 	fHasProtocol(other.fHasProtocol),
89 	fHasUserName(other.fHasUserName),
90 	fHasPassword(other.fHasPassword),
91 	fHasHost(other.fHasHost),
92 	fHasPort(other.fHasPort),
93 	fHasPath(other.fHasPath),
94 	fHasRequest(other.fHasRequest),
95 	fHasFragment(other.fHasFragment)
96 {
97 	if (fUrlStringValid)
98 		fUrlString = other.fUrlString;
99 
100 	if (fAuthorityValid)
101 		fAuthority = other.fAuthority;
102 
103 	if (fUserInfoValid)
104 		fUserInfo = other.fUserInfo;
105 
106 }
107 
108 
109 BUrl::BUrl(const BUrl& base, const BString& location)
110 	:
111 	fUrlString(),
112 	fProtocol(),
113 	fUser(),
114 	fPassword(),
115 	fHost(),
116 	fPort(0),
117 	fPath(),
118 	fRequest(),
119 	fAuthorityValid(false),
120 	fUserInfoValid(false),
121 	fHasUserName(false),
122 	fHasPassword(false),
123 	fHasHost(false),
124 	fHasPort(false),
125 	fHasFragment(false)
126 {
127 	// This implements the algorithm in RFC3986, Section 5.2.
128 
129 	BUrl relative(location);
130 	if (relative.HasProtocol()) {
131 		SetProtocol(relative.Protocol());
132 		if (relative.HasAuthority())
133 			SetAuthority(relative.Authority());
134 		SetPath(relative.Path());
135 		SetRequest(relative.Request());
136 	} else {
137 		if (relative.HasAuthority()) {
138 			SetAuthority(relative.Authority());
139 			SetPath(relative.Path());
140 			SetRequest(relative.Request());
141 		} else {
142 			if (relative.Path().IsEmpty()) {
143 				_SetPathUnsafe(base.Path());
144 				if (relative.HasRequest())
145 					SetRequest(relative.Request());
146 				else
147 					SetRequest(base.Request());
148 			} else {
149 				if (relative.Path()[0] == '/')
150 					SetPath(relative.Path());
151 				else {
152 					BString path = base._MergePath(relative.Path());
153 					SetPath(path);
154 				}
155 				SetRequest(relative.Request());
156 			}
157 
158 			if (base.HasAuthority())
159 				SetAuthority(base.Authority());
160 		}
161 		SetProtocol(base.Protocol());
162 	}
163 
164 	if (relative.HasFragment())
165 		SetFragment(relative.Fragment());
166 }
167 
168 
169 BUrl::BUrl()
170 	:
171 	fUrlString(),
172 	fProtocol(),
173 	fUser(),
174 	fPassword(),
175 	fHost(),
176 	fPort(0),
177 	fPath(),
178 	fRequest(),
179 	fHasHost(false),
180 	fHasFragment(false)
181 {
182 	_ResetFields();
183 }
184 
185 
186 BUrl::BUrl(const BPath& path)
187 	:
188 	fUrlString(),
189 	fProtocol(),
190 	fUser(),
191 	fPassword(),
192 	fHost(),
193 	fPort(0),
194 	fPath(),
195 	fRequest(),
196 	fHasHost(false),
197 	fHasFragment(false)
198 {
199 	SetUrlString(UrlEncode(path.Path(), true, true));
200 	SetProtocol("file");
201 }
202 
203 
204 BUrl::~BUrl()
205 {
206 }
207 
208 
209 // #pragma mark URL fields modifiers
210 
211 
212 BUrl&
213 BUrl::SetUrlString(const BString& url)
214 {
215 	_ExplodeUrlString(url);
216 	return *this;
217 }
218 
219 
220 BUrl&
221 BUrl::SetProtocol(const BString& protocol)
222 {
223 	fProtocol = protocol;
224 	fHasProtocol = !fProtocol.IsEmpty();
225 	fUrlStringValid = false;
226 	return *this;
227 }
228 
229 
230 BUrl&
231 BUrl::SetUserName(const BString& user)
232 {
233 	fUser = user;
234 	fHasUserName = !fUser.IsEmpty();
235 	fUrlStringValid = false;
236 	fAuthorityValid = false;
237 	fUserInfoValid = false;
238 	return *this;
239 }
240 
241 
242 BUrl&
243 BUrl::SetPassword(const BString& password)
244 {
245 	fPassword = password;
246 	fHasPassword = !fPassword.IsEmpty();
247 	fUrlStringValid = false;
248 	fAuthorityValid = false;
249 	fUserInfoValid = false;
250 	return *this;
251 }
252 
253 
254 BUrl&
255 BUrl::SetHost(const BString& host)
256 {
257 	fHost = host;
258 	fHasHost = !fHost.IsEmpty();
259 	fUrlStringValid = false;
260 	fAuthorityValid = false;
261 	return *this;
262 }
263 
264 
265 BUrl&
266 BUrl::SetPort(int port)
267 {
268 	fPort = port;
269 	fHasPort = (port != 0);
270 	fUrlStringValid = false;
271 	fAuthorityValid = false;
272 	return *this;
273 }
274 
275 
276 BUrl&
277 BUrl::SetPath(const BString& path)
278 {
279 	// Implements RFC3986 section 5.2.4, "Remove dot segments"
280 
281 	// 1.
282 	BString output;
283 	BString input(path);
284 
285 	// 2.
286 	while (!input.IsEmpty()) {
287 		// 2.A.
288 		if (input.StartsWith("./")) {
289 			input.Remove(0, 2);
290 			continue;
291 		}
292 
293 		if (input.StartsWith("../")) {
294 			input.Remove(0, 3);
295 			continue;
296 		}
297 
298 		// 2.B.
299 		if (input.StartsWith("/./")) {
300 			input.Remove(0, 2);
301 			continue;
302 		}
303 
304 		if (input == "/.") {
305 			input.Remove(1, 1);
306 			continue;
307 		}
308 
309 		// 2.C.
310 		if (input.StartsWith("/../")) {
311 			input.Remove(0, 3);
312 			output.Truncate(output.FindLast('/'));
313 			continue;
314 		}
315 
316 		if (input == "/..") {
317 			input.Remove(1, 2);
318 			output.Truncate(output.FindLast('/'));
319 			continue;
320 		}
321 
322 		// 2.D.
323 		if (input == "." || input == "..") {
324 			break;
325 		}
326 
327 		if (input == "/.") {
328 			input.Remove(1, 1);
329 			continue;
330 		}
331 
332 		// 2.E.
333 		int slashpos = input.FindFirst('/', 1);
334 		if (slashpos > 0) {
335 			output.Append(input, slashpos);
336 			input.Remove(0, slashpos);
337 		} else {
338 			output.Append(input);
339 			break;
340 		}
341 	}
342 
343 	_SetPathUnsafe(output);
344 	return *this;
345 }
346 
347 
348 BUrl&
349 BUrl::SetRequest(const BString& request)
350 {
351 	fRequest = request;
352 	fHasRequest = !fRequest.IsEmpty();
353 	fUrlStringValid = false;
354 	return *this;
355 }
356 
357 
358 BUrl&
359 BUrl::SetFragment(const BString& fragment)
360 {
361 	fFragment = fragment;
362 	fHasFragment = true;
363 	fUrlStringValid = false;
364 	return *this;
365 }
366 
367 
368 // #pragma mark URL fields access
369 
370 
371 const BString&
372 BUrl::UrlString() const
373 {
374 	if (!fUrlStringValid) {
375 		fUrlString.Truncate(0);
376 
377 		if (HasProtocol()) {
378 			fUrlString << fProtocol << ':';
379 		}
380 
381 		if (HasAuthority()) {
382 			fUrlString << "//";
383 			fUrlString << Authority();
384 		}
385 		fUrlString << Path();
386 
387 		if (HasRequest())
388 			fUrlString << '?' << fRequest;
389 
390 		if (HasFragment())
391 			fUrlString << '#' << fFragment;
392 
393 		fUrlStringValid = true;
394 	}
395 
396 	return fUrlString;
397 }
398 
399 
400 const BString&
401 BUrl::Protocol() const
402 {
403 	return fProtocol;
404 }
405 
406 
407 const BString&
408 BUrl::UserName() const
409 {
410 	return fUser;
411 }
412 
413 
414 const BString&
415 BUrl::Password() const
416 {
417 	return fPassword;
418 }
419 
420 
421 const BString&
422 BUrl::UserInfo() const
423 {
424 	if (!fUserInfoValid) {
425 		fUserInfo = fUser;
426 
427 		if (HasPassword())
428 			fUserInfo << ':' << fPassword;
429 
430 		fUserInfoValid = true;
431 	}
432 
433 	return fUserInfo;
434 }
435 
436 
437 const BString&
438 BUrl::Host() const
439 {
440 	return fHost;
441 }
442 
443 
444 int
445 BUrl::Port() const
446 {
447 	return fPort;
448 }
449 
450 
451 const BString&
452 BUrl::Authority() const
453 {
454 	if (!fAuthorityValid) {
455 		fAuthority.Truncate(0);
456 
457 		if (HasUserInfo())
458 			fAuthority << UserInfo() << '@';
459 		fAuthority << Host();
460 
461 		if (HasPort())
462 			fAuthority << ':' << fPort;
463 
464 		fAuthorityValid = true;
465 	}
466 	return fAuthority;
467 }
468 
469 
470 const BString&
471 BUrl::Path() const
472 {
473 	return fPath;
474 }
475 
476 
477 const BString&
478 BUrl::Request() const
479 {
480 	return fRequest;
481 }
482 
483 
484 const BString&
485 BUrl::Fragment() const
486 {
487 	return fFragment;
488 }
489 
490 
491 // #pragma mark URL fields tests
492 
493 
494 bool
495 BUrl::IsValid() const
496 {
497 	if (!fHasProtocol)
498 		return false;
499 
500 	if (fProtocol == "http" || fProtocol == "https" || fProtocol == "ftp"
501 		|| fProtocol == "ipp" || fProtocol == "afp" || fProtocol == "telnet"
502 		|| fProtocol == "gopher" || fProtocol == "nntp" || fProtocol == "sftp"
503 		|| fProtocol == "finger" || fProtocol == "pop" || fProtocol == "imap") {
504 		return fHasHost && !fHost.IsEmpty();
505 	}
506 
507 	if (fProtocol == "file")
508 		return fHasPath;
509 
510 	return true;
511 }
512 
513 
514 bool
515 BUrl::HasProtocol() const
516 {
517 	return fHasProtocol;
518 }
519 
520 
521 bool
522 BUrl::HasAuthority() const
523 {
524 	return fHasHost || fHasUserName;
525 }
526 
527 
528 bool
529 BUrl::HasUserName() const
530 {
531 	return fHasUserName;
532 }
533 
534 
535 bool
536 BUrl::HasPassword() const
537 {
538 	return fHasPassword;
539 }
540 
541 
542 bool
543 BUrl::HasUserInfo() const
544 {
545 	return fHasUserName || fHasPassword;
546 }
547 
548 
549 bool
550 BUrl::HasHost() const
551 {
552 	return fHasHost;
553 }
554 
555 
556 bool
557 BUrl::HasPort() const
558 {
559 	return fHasPort;
560 }
561 
562 
563 bool
564 BUrl::HasPath() const
565 {
566 	return fHasPath;
567 }
568 
569 
570 bool
571 BUrl::HasRequest() const
572 {
573 	return fHasRequest;
574 }
575 
576 
577 bool
578 BUrl::HasFragment() const
579 {
580 	return fHasFragment;
581 }
582 
583 
584 // #pragma mark URL encoding/decoding of needed fields
585 
586 
587 void
588 BUrl::UrlEncode(bool strict)
589 {
590 	fUser = _DoUrlEncodeChunk(fUser, strict);
591 	fPassword = _DoUrlEncodeChunk(fPassword, strict);
592 	fHost = _DoUrlEncodeChunk(fHost, strict);
593 	fFragment = _DoUrlEncodeChunk(fFragment, strict);
594 	fPath = _DoUrlEncodeChunk(fPath, strict, true);
595 }
596 
597 
598 void
599 BUrl::UrlDecode(bool strict)
600 {
601 	fUser = _DoUrlDecodeChunk(fUser, strict);
602 	fPassword = _DoUrlDecodeChunk(fPassword, strict);
603 	fHost = _DoUrlDecodeChunk(fHost, strict);
604 	fFragment = _DoUrlDecodeChunk(fFragment, strict);
605 	fPath = _DoUrlDecodeChunk(fPath, strict);
606 }
607 
608 
609 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
610 status_t
611 BUrl::IDNAToAscii()
612 {
613 	UErrorCode err = U_ZERO_ERROR;
614 	icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err);
615 	icu::IDNAInfo info;
616 
617 	BString result;
618 	BStringByteSink sink(&result);
619 	converter->nameToASCII_UTF8(icu::StringPiece(fHost.String()), sink, info,
620 		err);
621 
622 	delete converter;
623 
624 	if (U_FAILURE(err))
625 		return B_ERROR;
626 
627 	fHost = result;
628 	return B_OK;
629 }
630 #endif
631 
632 
633 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
634 status_t
635 BUrl::IDNAToUnicode()
636 {
637 	UErrorCode err = U_ZERO_ERROR;
638 	icu::IDNA* converter = icu::IDNA::createUTS46Instance(0, err);
639 	icu::IDNAInfo info;
640 
641 	BString result;
642 	BStringByteSink sink(&result);
643 	converter->nameToUnicodeUTF8(icu::StringPiece(fHost.String()), sink, info,
644 		err);
645 
646 	delete converter;
647 
648 	if (U_FAILURE(err))
649 		return B_ERROR;
650 
651 	fHost = result;
652 	return B_OK;
653 }
654 #endif
655 
656 
657 // #pragma mark - utility functionality
658 
659 
660 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
661 bool
662 BUrl::HasPreferredApplication() const
663 {
664 	BString appSignature = PreferredApplication();
665 	BMimeType mime(appSignature.String());
666 
667 	if (appSignature.IFindFirst("application/") == 0
668 		&& mime.IsValid())
669 		return true;
670 
671 	return false;
672 }
673 #endif
674 
675 
676 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
677 BString
678 BUrl::PreferredApplication() const
679 {
680 	BString appSignature;
681 	BMimeType mime(_UrlMimeType().String());
682 	mime.GetPreferredApp(appSignature.LockBuffer(B_MIME_TYPE_LENGTH));
683 	appSignature.UnlockBuffer();
684 
685 	return BString(appSignature);
686 }
687 #endif
688 
689 
690 #ifdef HAIKU_TARGET_PLATFORM_HAIKU
691 status_t
692 BUrl::OpenWithPreferredApplication(bool onProblemAskUser) const
693 {
694 	if (!IsValid())
695 		return B_BAD_VALUE;
696 
697 	BString urlString = UrlString();
698 	if (urlString.Length() > B_PATH_NAME_LENGTH) {
699 		// TODO: BAlert
700 		//	if (onProblemAskUser)
701 		//		BAlert ... Too long URL!
702 #if DEBUG
703 		fprintf(stderr, "URL too long");
704 #endif
705 		return B_NAME_TOO_LONG;
706 	}
707 
708 	char* argv[] = {
709 		const_cast<char*>("BUrlInvokedApplication"),
710 		const_cast<char*>(urlString.String()),
711 		NULL
712 	};
713 
714 #if DEBUG
715 	if (HasPreferredApplication())
716 		printf("HasPreferredApplication() == true\n");
717 	else
718 		printf("HasPreferredApplication() == false\n");
719 #endif
720 
721 	status_t status = be_roster->Launch(_UrlMimeType().String(), 1, argv+1);
722 	if (status != B_OK) {
723 #if DEBUG
724 		fprintf(stderr, "Opening URL failed: %s\n", strerror(status));
725 #endif
726 	}
727 
728 	return status;
729 }
730 #endif
731 
732 
733 // #pragma mark Url encoding/decoding of string
734 
735 
736 /*static*/ BString
737 BUrl::UrlEncode(const BString& url, bool strict, bool directory)
738 {
739 	return _DoUrlEncodeChunk(url, strict, directory);
740 }
741 
742 
743 /*static*/ BString
744 BUrl::UrlDecode(const BString& url, bool strict)
745 {
746 	return _DoUrlDecodeChunk(url, strict);
747 }
748 
749 
750 // #pragma mark BArchivable members
751 
752 
753 status_t
754 BUrl::Archive(BMessage* into, bool deep) const
755 {
756 	status_t ret = BArchivable::Archive(into, deep);
757 
758 	if (ret == B_OK)
759 		ret = into->AddString(kArchivedUrl, UrlString());
760 
761 	return ret;
762 }
763 
764 
765 /*static*/ BArchivable*
766 BUrl::Instantiate(BMessage* archive)
767 {
768 	if (validate_instantiation(archive, "BUrl"))
769 		return new(std::nothrow) BUrl(archive);
770 	return NULL;
771 }
772 
773 
774 // #pragma mark URL comparison
775 
776 
777 bool
778 BUrl::operator==(BUrl& other) const
779 {
780 	UrlString();
781 	other.UrlString();
782 
783 	return fUrlString == other.fUrlString;
784 }
785 
786 
787 bool
788 BUrl::operator!=(BUrl& other) const
789 {
790 	return !(*this == other);
791 }
792 
793 
794 // #pragma mark URL assignment
795 
796 
797 const BUrl&
798 BUrl::operator=(const BUrl& other)
799 {
800 	fUrlStringValid = other.fUrlStringValid;
801 	if (fUrlStringValid)
802 		fUrlString = other.fUrlString;
803 
804 	fAuthorityValid = other.fAuthorityValid;
805 	if (fAuthorityValid)
806 		fAuthority = other.fAuthority;
807 
808 	fUserInfoValid = other.fUserInfoValid;
809 	if (fUserInfoValid)
810 		fUserInfo = other.fUserInfo;
811 
812 	fProtocol = other.fProtocol;
813 	fUser = other.fUser;
814 	fPassword = other.fPassword;
815 	fHost = other.fHost;
816 	fPort = other.fPort;
817 	fPath = other.fPath;
818 	fRequest = other.fRequest;
819 	fFragment = other.fFragment;
820 
821 	fHasProtocol = other.fHasProtocol;
822 	fHasUserName = other.fHasUserName;
823 	fHasPassword = other.fHasPassword;
824 	fHasHost = other.fHasHost;
825 	fHasPort = other.fHasPort;
826 	fHasPath = other.fHasPath;
827 	fHasRequest = other.fHasRequest;
828 	fHasFragment = other.fHasFragment;
829 
830 	return *this;
831 }
832 
833 
834 const BUrl&
835 BUrl::operator=(const BString& string)
836 {
837 	SetUrlString(string);
838 	return *this;
839 }
840 
841 
842 const BUrl&
843 BUrl::operator=(const char* string)
844 {
845 	SetUrlString(string);
846 	return *this;
847 }
848 
849 
850 // #pragma mark URL to string conversion
851 
852 
853 BUrl::operator const char*() const
854 {
855 	return UrlString();
856 }
857 
858 
859 void
860 BUrl::_ResetFields()
861 {
862 	fHasProtocol = false;
863 	fHasUserName = false;
864 	fHasPassword = false;
865 	fHasHost = false;
866 	fHasPort = false;
867 	fHasPath = false;
868 	fHasRequest = false;
869 	fHasFragment = false;
870 
871 	fProtocol.Truncate(0);
872 	fUser.Truncate(0);
873 	fPassword.Truncate(0);
874 	fHost.Truncate(0);
875 	fPort = 0;
876 	fPath.Truncate(0);
877 	fRequest.Truncate(0);
878 	fFragment.Truncate(0);
879 
880 	// Force re-generation of these fields
881 	fUrlStringValid = false;
882 	fUserInfoValid = false;
883 	fAuthorityValid = false;
884 }
885 
886 
887 bool
888 BUrl::_ContainsDelimiter(const BString& url)
889 {
890 	int32 len = url.Length();
891 
892 	for (int32 i = 0; i < len; i++) {
893 		switch (url[i]) {
894 			case ' ':
895 			case '\n':
896 			case '\t':
897 			case '\r':
898 			case '<':
899 			case '>':
900 			case '"':
901 				return true;
902 		}
903 	}
904 
905 	return false;
906 }
907 
908 
909 enum explode_url_parse_state {
910 	EXPLODE_PROTOCOL,
911 	EXPLODE_PROTOCOLTERMINATOR,
912 	EXPLODE_AUTHORITYORPATH,
913 	EXPLODE_AUTHORITY,
914 	EXPLODE_PATH,
915 	EXPLODE_REQUEST, // query
916 	EXPLODE_FRAGMENT,
917 	EXPLODE_COMPLETE
918 };
919 
920 
921 typedef bool (*explode_char_match_fn)(char c);
922 
923 
924 static bool
925 explode_is_protocol_char(char c)
926 {
927 	return isalnum(c) || c == '+' || c == '.' || c == '-';
928 }
929 
930 
931 static bool
932 explode_is_authority_char(char c)
933 {
934 	return !(c == '/' || c == '?' || c == '#');
935 }
936 
937 
938 static bool
939 explode_is_path_char(char c)
940 {
941 	return !(c == '#' || c == '?');
942 }
943 
944 
945 static bool
946 explode_is_request_char(char c)
947 {
948 	return c != '#';
949 }
950 
951 
952 static int32
953 char_offset_until_fn_false(const char* url, int32 len, int32 offset,
954 	explode_char_match_fn fn)
955 {
956 	while (offset < len && fn(url[offset]))
957 		offset++;
958 
959 	return offset;
960 }
961 
962 /*
963  * This function takes a URL in string-form and parses the components of the URL out.
964  */
965 status_t
966 BUrl::_ExplodeUrlString(const BString& url)
967 {
968 	_ResetFields();
969 
970 	// RFC3986, Appendix C; the URL should not contain whitespace or delimiters
971 	// by this point.
972 
973 	if (_ContainsDelimiter(url))
974 		return B_BAD_VALUE;
975 
976 	explode_url_parse_state state = EXPLODE_PROTOCOL;
977 	int32 offset = 0;
978 	int32 length = url.Length();
979 	const char *url_c = url.String();
980 
981 	// The regexp is provided in RFC3986 (URI generic syntax), Appendix B
982 	// ^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?
983 	// The ensuing logic attempts to simulate the behaviour of extracting the groups
984 	// from the string without requiring a group-capable regex engine.
985 
986 	while (offset < length) {
987 		switch (state) {
988 
989 			case EXPLODE_PROTOCOL:
990 			{
991 				int32 end_protocol = char_offset_until_fn_false(url_c, length,
992 					offset, explode_is_protocol_char);
993 
994 				if (end_protocol < length) {
995 					SetProtocol(BString(&url_c[offset], end_protocol - offset));
996 					state = EXPLODE_PROTOCOLTERMINATOR;
997 					offset = end_protocol;
998 				} else {
999 					// No protocol was found, try parsing from the string
1000 					// start, beginning with authority or path
1001 					SetProtocol("");
1002 					offset = 0;
1003 					state = EXPLODE_AUTHORITYORPATH;
1004 				}
1005 				break;
1006 			}
1007 
1008 			case EXPLODE_PROTOCOLTERMINATOR:
1009 			{
1010 				if (url[offset] == ':') {
1011 					offset++;
1012 				} else {
1013 					// No protocol was found, try parsing from the string
1014 					// start, beginning with authority or path
1015 					SetProtocol("");
1016 					offset = 0;
1017 				}
1018 				state = EXPLODE_AUTHORITYORPATH;
1019 				break;
1020 			}
1021 
1022 			case EXPLODE_AUTHORITYORPATH:
1023 			{
1024 				// The authority must start with //. If it isn't there, skip
1025 				// to parsing the path.
1026 				if (strncmp(&url_c[offset], "//", 2) == 0) {
1027 					state = EXPLODE_AUTHORITY;
1028 					offset += 2;
1029 				} else {
1030 					state = EXPLODE_PATH;
1031 				}
1032 				break;
1033 			}
1034 
1035 			case EXPLODE_AUTHORITY:
1036 			{
1037 				int end_authority = char_offset_until_fn_false(url_c, length,
1038 					offset, explode_is_authority_char);
1039 				SetAuthority(BString(&url_c[offset], end_authority - offset));
1040 				state = EXPLODE_PATH;
1041 				offset = end_authority;
1042 				break;
1043 			}
1044 
1045 			case EXPLODE_PATH:
1046 			{
1047 				int end_path = char_offset_until_fn_false(url_c, length, offset,
1048 					explode_is_path_char);
1049 				SetPath(BString(&url_c[offset], end_path - offset));
1050 				state = EXPLODE_REQUEST;
1051 				offset = end_path;
1052 				break;
1053 			}
1054 
1055 			case EXPLODE_REQUEST: // query
1056 			{
1057 				if (url_c[offset] == '?') {
1058 					offset++;
1059 					int end_request = char_offset_until_fn_false(url_c, length,
1060 						offset, explode_is_request_char);
1061 					SetRequest(BString(&url_c[offset], end_request - offset));
1062 					offset = end_request;
1063 				}
1064 				state = EXPLODE_FRAGMENT;
1065 				break;
1066 			}
1067 
1068 			case EXPLODE_FRAGMENT:
1069 			{
1070 				if (url_c[offset] == '#') {
1071 					offset++;
1072 					SetFragment(BString(&url_c[offset], length - offset));
1073 					offset = length;
1074 				}
1075 				state = EXPLODE_COMPLETE;
1076 				break;
1077 			}
1078 
1079 			case EXPLODE_COMPLETE:
1080 				// should never be reached - keeps the compiler happy
1081 				break;
1082 
1083 		}
1084 	}
1085 
1086 	return B_OK;
1087 }
1088 
1089 
1090 BString
1091 BUrl::_MergePath(const BString& relative) const
1092 {
1093 	// This implements RFC3986, Section 5.2.3.
1094 	if (HasAuthority() && fPath == "") {
1095 		BString result("/");
1096 		result << relative;
1097 		return result;
1098 	}
1099 
1100 	BString result(fPath);
1101 	result.Truncate(result.FindLast("/") + 1);
1102 	result << relative;
1103 
1104 	return result;
1105 }
1106 
1107 
1108 // This sets the path without normalizing it. If fed with a path that has . or
1109 // .. segments, this would make the URL invalid.
1110 void
1111 BUrl::_SetPathUnsafe(const BString& path)
1112 {
1113 	fPath = path;
1114 	fHasPath = true; // RFC says an empty path is still a path
1115 	fUrlStringValid = false;
1116 }
1117 
1118 
1119 enum authority_parse_state {
1120 	AUTHORITY_USERNAME,
1121 	AUTHORITY_PASSWORD,
1122 	AUTHORITY_HOST,
1123 	AUTHORITY_PORT,
1124 	AUTHORITY_COMPLETE
1125 };
1126 
1127 
1128 static bool
1129 authority_is_username_char(char c)
1130 {
1131 	return !(c == ':' || c == '@');
1132 }
1133 
1134 
1135 static bool
1136 authority_is_password_char(char c)
1137 {
1138 	return !(c == '@');
1139 }
1140 
1141 
1142 static bool
1143 authority_is_ipv6_host_char(char c) {
1144 	return (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')
1145 		|| (c >= '0' && c <= '9') || c == ':';
1146 }
1147 
1148 
1149 static bool
1150 authority_is_host_char(char c) {
1151 	return !(c == ':' || c == '/');
1152 }
1153 
1154 
1155 static bool
1156 authority_is_port_char(char c) {
1157 	return c >= '0' && c <= '9';
1158 }
1159 
1160 
1161 void
1162 BUrl::SetAuthority(const BString& authority)
1163 {
1164 	fAuthority = authority;
1165 
1166 	fUser.Truncate(0);
1167 	fPassword.Truncate(0);
1168 	fHost.Truncate(0);
1169 	fPort = 0;
1170 	fHasPort = false;
1171 	fHasUserName = false;
1172 	fHasPassword = false;
1173 
1174 	bool hasUsernamePassword = B_ERROR != fAuthority.FindFirst('@');
1175 	authority_parse_state state = AUTHORITY_USERNAME;
1176 	int32 offset = 0;
1177 	int32 length = authority.Length();
1178 	const char *authority_c = authority.String();
1179 
1180 	while (AUTHORITY_COMPLETE != state && offset < length) {
1181 
1182 		switch (state) {
1183 
1184 			case AUTHORITY_USERNAME:
1185 			{
1186 				if (hasUsernamePassword) {
1187 					int32 end_username = char_offset_until_fn_false(
1188 						authority_c, length, offset,
1189 						authority_is_username_char);
1190 
1191 					SetUserName(BString(&authority_c[offset],
1192 						end_username - offset));
1193 
1194 					state = AUTHORITY_PASSWORD;
1195 					offset = end_username;
1196 				} else {
1197 					state = AUTHORITY_HOST;
1198 				}
1199 				break;
1200 			}
1201 
1202 			case AUTHORITY_PASSWORD:
1203 			{
1204 				if (hasUsernamePassword && ':' == authority[offset]) {
1205 					offset++; // move past the delimiter
1206 					int32 end_password = char_offset_until_fn_false(
1207 						authority_c, length, offset,
1208 						authority_is_password_char);
1209 
1210 					SetPassword(BString(&authority_c[offset],
1211 						end_password - offset));
1212 
1213 					offset = end_password;
1214 				}
1215 
1216 				// if the host was preceded by a username + password couple
1217 				// then there will be an '@' delimiter to avoid.
1218 
1219 				if (authority_c[offset] == '@') {
1220 					offset++;
1221 				}
1222 
1223 				state = AUTHORITY_HOST;
1224 				break;
1225 			}
1226 
1227 			case AUTHORITY_HOST:
1228 			{
1229 
1230 				// the host may be enclosed within brackets in order to express
1231 				// an IPV6 address.
1232 
1233 				if (authority_c[offset] == '[') {
1234 					int32 end_ipv6_host = char_offset_until_fn_false(
1235 						authority_c, length, offset + 1,
1236 						authority_is_ipv6_host_char);
1237 
1238 					if (authority_c[end_ipv6_host] == ']') {
1239 						SetHost(BString(&authority_c[offset],
1240 							(end_ipv6_host - offset) + 1));
1241 						state = AUTHORITY_PORT;
1242 						offset = end_ipv6_host + 1;
1243 					}
1244 				}
1245 
1246 				// if an IPV6 host was not found.
1247 
1248 				if (AUTHORITY_HOST == state) {
1249 					int32 end_host = char_offset_until_fn_false(
1250 						authority_c, length, offset, authority_is_host_char);
1251 
1252 					SetHost(BString(&authority_c[offset], end_host - offset));
1253 					state = AUTHORITY_PORT;
1254 					offset = end_host;
1255 				}
1256 
1257 				break;
1258 			}
1259 
1260 			case AUTHORITY_PORT:
1261 			{
1262 				if (authority_c[offset] == ':') {
1263 					offset++;
1264 					int32 end_port = char_offset_until_fn_false(
1265 						authority_c, length, offset, authority_is_port_char);
1266 					SetPort(atoi(&authority_c[offset]));
1267 					offset = end_port;
1268 				}
1269 
1270 				state = AUTHORITY_COMPLETE;
1271 
1272 				break;
1273 			}
1274 
1275 			case AUTHORITY_COMPLETE:
1276 				// should never be reached - keeps the compiler happy
1277 				break;
1278 		}
1279 	}
1280 
1281 	// An empty authority is still an authority, making it possible to have
1282 	// URLs such as file:///path/to/file.
1283 	// TODO however, there is no way to unset the authority once it is set...
1284 	// We may want to take a const char* parameter and allow NULL.
1285 	fHasHost = true;
1286 }
1287 
1288 
1289 /*static*/ BString
1290 BUrl::_DoUrlEncodeChunk(const BString& chunk, bool strict, bool directory)
1291 {
1292 	BString result;
1293 
1294 	for (int32 i = 0; i < chunk.Length(); i++) {
1295 		if (_IsUnreserved(chunk[i])
1296 				|| (directory && (chunk[i] == '/' || chunk[i] == '\\'))) {
1297 			result << chunk[i];
1298 		} else {
1299 			if (chunk[i] == ' ' && !strict) {
1300 				result << '+';
1301 					// In non-strict mode, spaces are encoded by a plus sign
1302 			} else {
1303 				char hexString[5];
1304 				snprintf(hexString, 5, "%X", chunk[i]);
1305 
1306 				result << '%' << hexString;
1307 			}
1308 		}
1309 	}
1310 
1311 	return result;
1312 }
1313 
1314 
1315 /*static*/ BString
1316 BUrl::_DoUrlDecodeChunk(const BString& chunk, bool strict)
1317 {
1318 	BString result;
1319 
1320 	for (int32 i = 0; i < chunk.Length(); i++) {
1321 		if (chunk[i] == '+' && !strict)
1322 			result << ' ';
1323 		else {
1324 			char decoded = 0;
1325 			char* out = NULL;
1326 			char hexString[3];
1327 
1328 			if (chunk[i] == '%' && i < chunk.Length() - 2
1329 				&& isxdigit(chunk[i + 1]) && isxdigit(chunk[i+2])) {
1330 				hexString[0] = chunk[i + 1];
1331 				hexString[1] = chunk[i + 2];
1332 				hexString[2] = 0;
1333 				decoded = (char)strtol(hexString, &out, 16);
1334 			}
1335 
1336 			if (out == hexString + 2) {
1337 				i += 2;
1338 				result << decoded;
1339 			} else
1340 				result << chunk[i];
1341 		}
1342 	}
1343 	return result;
1344 }
1345 
1346 
1347 bool
1348 BUrl::_IsProtocolValid()
1349 {
1350 	for (int8 index = 0; index < fProtocol.Length(); index++) {
1351 		char c = fProtocol[index];
1352 
1353 		if (index == 0 && !isalpha(c))
1354 			return false;
1355 		else if (!isalnum(c) && c != '+' && c != '-' && c != '.')
1356 			return false;
1357 	}
1358 
1359 	return fProtocol.Length() > 0;
1360 }
1361 
1362 
1363 bool
1364 BUrl::_IsUnreserved(char c)
1365 {
1366 	return isalnum(c) || c == '-' || c == '.' || c == '_' || c == '~';
1367 }
1368 
1369 
1370 bool
1371 BUrl::_IsGenDelim(char c)
1372 {
1373 	return c == ':' || c == '/' || c == '?' || c == '#' || c == '['
1374 		|| c == ']' || c == '@';
1375 }
1376 
1377 
1378 bool
1379 BUrl::_IsSubDelim(char c)
1380 {
1381 	return c == '!' || c == '$' || c == '&' || c == '\'' || c == '('
1382 		|| c == ')' || c == '*' || c == '+' || c == ',' || c == ';'
1383 		|| c == '=';
1384 }
1385 
1386 
1387 BString
1388 BUrl::_UrlMimeType() const
1389 {
1390 	BString mime;
1391 	mime << "application/x-vnd.Be.URL." << fProtocol;
1392 
1393 	return BString(mime);
1394 }
1395