xref: /haiku/docs/user/support/Url.dox (revision ff1ee776fe00c4b29992cd25ef94463302ba6a92)
1/*
2 * Copyright 2019 Haiku, Inc. All rights reserved.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 *      Nabanita Dash, dashnabanita@gmail.com
7 *
8 * Proofreaders:
9 *      Adrien Destugues, pulkomandy@gmail.com
10 *
11 * Corresponds to:
12 *		headers/os/support/Url.h	 hrev52332
13 *		src/kits/support/Url.cpp	 hrev52332
14 */
15
16/*!
17	\file Url.h
18	\ingroup support
19	\ingroup libbe
20	\brief Provides the BUrl class
21	\since Haiku R1
22*/
23
24/*!
25	\class BUrl
26	\ingroup support
27	\ingroup libbe
28	\brief Represents and manipulates an URL (Uniform Resource Locator).
29	\since Haiku R1
30
31	An "Uniform Resource Locator" identifies a place where a resource can
32	be found. It specifies both a location and a mechanism to retrieve the
33	data. For example, http://www.example.com/index.html indicates a protocol
34	(http), a hostname (www.example.com), and a file name (index.html).
35
36	Every URL consists of a sequence of up to five components:
37	protocol, authority (consisting of login and password, hostname and port)
38	path, request and fragment.
39
40	The format is provided in RFC3986 (URI generic syntax), Appendix B as a regular expression:
41	^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\\?([^#]*))?(#(.*))?
42
43	This regular expression makes it possible to parse any string as an URL (if there are no
44	special characters to spearate the fields, everything will end up in the path compopent).
45	However, some characters are not allowed: space, newlines, tabs, <, > and ". If any of these
46	is present in the URL string, the parsing results in an empty URL.
47
48	The protocols (http, https, ftp, irc, etc) identifies which way the resource
49	can be accessed.
50
51	Authority consists of userinfo such as username and password, a host
52	subcomponent consisting of IP address or hostname and a port subcomponent.
53
54	The path component locates the resource inside the authority's hierarchy,
55	and can have different formats (for example, directory names separated by
56	slashes) depending on the protocol in use.
57
58	The request component (preceeded by a question mark) contains a query
59	string of non-hierarchial data.
60
61	The fragment contains a fragment identifier providing direction to a
62	secondary resource, usually an identifier for a specific element into the
63	resource such as a paragraph in a text.
64*/
65
66//! @{
67
68/*!
69	\fn BUrl::BUrl(const char* url);
70	\brief Constructs a BUrl and fills it.
71
72	\param url A string to parse and populate the URL fields from.
73
74	Call InitCheck() to verify that the string was succesfully parsed and
75	resulted in a valid URL.
76*/
77
78/*!
79	\fn BUrl::BUrl(BMessage* archive);
80	\brief Restore an URL from archived data.
81
82	\param archive An archived BUrl (using BArchive()).
83
84	Usually, archived messages are restored using BArchivable::Unarchive()
85	which will automatically instanciate the correct class.
86*/
87
88/*!
89	\fn BUrl::BUrl(const BUrl& other);
90	\brief Copy constructor
91
92	\param other A BUrl object to copy.
93*/
94
95/*!
96	\fn BUrl::BUrl(const BUrl& base, const BString& relative);
97	\brief Construct a BUrl using a known base and a string representing a relative URL.
98
99	\param base A BUrl object that holds base URL.
100	\param relative A path relative to the base URL.
101
102	URLs can sometimes be represented in relative form. For example, links in
103	a webpage may refer to only a path, assuming the same protocol and authority
104	are the same as the current page. This constructor applies the required
105	resolution process to construct a complete, standalone URL from such a
106	string.
107
108	For example, the following:
109
110	BUrl base("http://example.org/path/page.html");
111	BUrl relative(base, "sudirectory/otherpage.html");
112
113	results in:
114
115	"http://example.org/path/subdirectory/otherpage.hhtml"
116
117	The relative URL can override any of the fields from the original one. The algorithm
118	for resolution is documented in RFC3986 section 5.
119*/
120
121/*!
122	\fn BUrl::BUrl(const BPath& path);
123	\brief Constructs a BUrl identifying a local file.
124
125	\param path The path to convert into an URL
126
127	The generated URL uses the file protocol, and its path component is the
128	path given as a parameter.
129*/
130
131/*!
132	\fn BUrl::BUrl();
133	\brief Constructs an empty BUrl.
134*/
135
136/*!
137	\fn BUrl::~BUrl();
138	\brief Destructor for BUrl.
139*/
140
141//! @}
142
143//! @{
144
145/*!
146	\fn BUrl& BUrl::SetUrlString(const BString& url);
147	\brief Parse a string and set the URL accordingly
148
149	\param url A string to parse as an absolute URL.
150*/
151
152/*!
153	\fn BUrl& BUrl::SetProtocol(const BString& scheme);
154	\brief Set the protocol
155
156	\param scheme The protocol to use.
157*/
158
159/*!
160	\fn BUrl& BUrl::SetUserName(const BString& user);
161	\brief Set the username in the authority component
162
163	\param user The username.
164*/
165
166/*!
167	\fn BUrl& BUrl::SetPassword(const BString& password);
168	\brief Set the password in the authority component
169
170	\param password The password.
171*/
172
173/*!
174	\fn void BUrl::SetAuthority(const BString& authority);
175	\brief Replace the complete authority component
176
177	\param authority The authority component.
178
179	The username, password, host and port fields are replaced. The authority
180	can be of the form username:password\@host:port
181*/
182
183/*!
184	\fn BUrl& BUrl::SetHost(const BString& host);
185	\brief Sets the host part of the authority component.
186
187	\param host The hostname or address to use.
188*/
189
190/*!
191	\fn BUrl& BUrl::SetPort(int port);
192	\brief Set the port of the authority component
193
194	\param port The port number to use (usually a TCP or UDP port).
195*/
196
197/*!
198	\fn BUrl& BUrl::SetPath(const BString& path);
199	\brief Set the path of the URL.
200
201	\param path Set the path to use.
202*/
203
204/*!
205	\fn BUrl& BUrl::SetRequest(const BString& request);
206	\brief Set the request part of the URL.
207
208	\param request The request string.
209*/
210
211/*!
212	\fn BUrl& BUrl::SetFragment(const BString& fragment);
213	\brief Set the fragment part of the URL.
214
215	\param fragment The fragment to use.
216*/
217
218//! @}
219
220//! @{
221
222/*!
223	\fn const BString& BUrl::UrlString() const;
224	\brief Returns the string representation of the URL.
225
226	\returns the string representation of the URL.
227
228	A complete URL string is of the form protocol://username:passord\@host:port/path?request#fragment . All the fields are optional, for example a file URL will
229	have only a protocol and a path.
230*/
231
232/*!
233	\fn const BString& BUrl::Protocol() const;
234	\brief Returns the protocol used in the url.
235
236	\returns The URL protocol.
237*/
238
239/*!
240	\fn const BString& BUrl::UserName() const;
241	\brief Returns the username.
242
243	\returns The username.
244*/
245
246/*!
247	\fn const BString& BUrl::Password() const;
248	\brief Returns the password.
249
250	\returns The password.
251*/
252
253/*!
254	\fn const BString& BUrl::UserInfo() const;
255	\brief Returns the user information (username:password)
256
257	\returns The username and password.
258
259	If there is no password, the username alone is returned. If there is no
260	username, a string of the form ":password" is returned.
261*/
262
263/*!
264	\fn const BString& BUrl::Host() const;
265	\brief Returns the URL host component.
266
267	\returns The URL host.
268*/
269
270/*!
271	\fn int BUrl::Port() const;
272	\brief Returns the URL port number.
273
274	\returns The URL port number.
275
276	-1 is returned if no port is set.
277*/
278
279/*!
280	\fn const BString& BUrl::Authority() const;
281	\brief Returns the authority url as a string.
282
283	\returns The authority url as a string.
284
285	The authority is of the form username:password\@host:port.
286*/
287
288/*!
289	\fn const BString& BUrl::Path() const;
290	\brief Returns the url path.
291
292	\returns The url-path.
293*/
294
295/*!
296	\fn const BString& BUrl::Request() const;
297	\brief Returns the url-request.
298
299	\returns The url-request as a string.
300*/
301
302/*!
303	\fn const BString& BUrl::Fragment() const;
304	\brief Returns the fragment of the url.
305
306	\returns The fragment of the url as a string.
307*/
308
309//! @}
310
311//! @{
312
313/*!
314	\fn bool BUrl::IsValid() const;
315	\brief Check if the URL is valid.
316
317	\returns true if the URL is valid.
318
319	This function verifies that the mandatory fields are present and perform
320	some other sanity checks on the URL.
321
322	An URL is valid if:
323	- It has a protocol, starting with an alphabetic character and folowed by alphanumeric or +, -,
324	  or . characters exclusively,
325	- If the protocol requires one, there is a valid host,
326	- If the protocol requires one, there is a path.
327	- If there is a host, it is either an IPv4 address or valid DNS name, or an IPv6 address
328	  enclosed in brackets
329
330	An invalid URL can still be modified using the various setters to turn it into a valid one.
331*/
332
333/*!
334	\fn bool BUrl::HasProtocol() const;
335	\brief Check wether the URL has a protocol.
336
337	\returns True if the URL has a protocol.
338*/
339
340/*!
341	\fn bool BUrl::HasUserName() const;
342	\brief Check wether the URL has an username.
343
344	\returns True if the URL has an username.
345*/
346
347/*!
348	\fn bool BUrl::HasPassword() const;
349	\brief Check wether the URL has a password.
350
351	\returns True if the URL has a password.
352*/
353
354/*!
355	\fn bool BUrl::HasUserInfo() const;
356	\brief Check wether the URL has user information.
357
358	\returns True if the URL has an username or password.
359*/
360
361/*!
362	\fn bool BUrl::HasHost() const;
363	\brief Check wether the URL has an host.
364
365	\returns True if the URL has an host.
366*/
367
368/*!
369	\fn bool BUrl::HasPort() const;
370	\brief Check wether the URL has a port.
371
372	\returns True if the URL has a port.
373*/
374
375/*!
376	\fn bool BUrl::HasAuthority() const;
377	\brief Check if the URL has an host or port.
378
379	\returns True if the URL has an host or port.
380*/
381
382/*!
383	\fn bool BUrl::HasPath() const;
384	\brief Check wether the URL has a path.
385
386	\returns True if the URL has a path.
387*/
388
389/*!
390	\fn bool BUrl::HasRequest() const;
391	\brief Check wether the URL has a request.
392
393	\returns True if the URL has a request.
394*/
395
396/*!
397	\fn bool BUrl::HasFragment() const;
398	\brief Check wether the URL has a fragment.
399
400	\returns True if the URL has a fragment.
401*/
402
403//! @}
404
405
406/*!
407	\fn void BUrl::UrlEncode(bool strict=false)
408	\brief Undocumented public method
409
410	\param strict Undocumented
411
412	\since Haiku R1
413*/
414
415
416/*!
417	\fn void BUrl::UrlDecode(bool strict=false)
418	\brief Undocumented public method
419
420	\param strict Undocumented
421
422	\since Haiku R1
423*/
424
425
426/*!
427	\fn status_t BUrl::IDNAToAscii()
428	\brief Undocumented public method
429
430	\return Undocumented
431	\retval <value> Undocumented
432
433	\since Haiku R1
434*/
435
436
437/*!
438	\fn status_t BUrl::IDNAToUnicode()
439	\brief Undocumented public method
440
441	\return Undocumented
442	\retval <value> Undocumented
443
444	\since Haiku R1
445*/
446
447
448/*!
449	\fn bool BUrl::HasPreferredApplication() const
450	\brief Undocumented public method
451
452	\return Undocumented
453	\retval <value> Undocumented
454
455	\since Haiku R1
456*/
457
458
459/*!
460	\fn BString BUrl::PreferredApplication() const
461	\brief Undocumented public method
462
463	\return Undocumented
464	\retval <value> Undocumented
465
466	\since Haiku R1
467*/
468
469
470/*!
471	\fn status_t BUrl::OpenWithPreferredApplication(bool onProblemAskUser=true) const
472	\brief Undocumented public method
473
474	\param onProblemAskUser Undocumented
475
476	\return Undocumented
477	\retval <value> Undocumented
478
479	\since Haiku R1
480*/
481
482
483/*!
484	\fn bool BUrl::operator==(BUrl &other) const
485	\brief Undocumented public method
486
487	\param other Undocumented
488
489	\return Undocumented
490	\retval <value> Undocumented
491
492	\since Haiku R1
493*/
494
495
496/*!
497	\fn bool BUrl::operator!=(BUrl &other) const
498	\brief Undocumented public method
499
500	\param other Undocumented
501
502	\return Undocumented
503	\retval <value> Undocumented
504
505	\since Haiku R1
506*/
507
508
509/*!
510	\fn const BUrl& BUrl::operator=(const BUrl &other)
511	\brief Undocumented public method
512
513	\param other Undocumented
514
515	\return Undocumented
516	\retval <value> Undocumented
517
518	\since Haiku R1
519*/
520
521
522/*!
523	\fn const BUrl& BUrl::operator=(const BString &string)
524	\brief Undocumented public method
525
526	\param string Undocumented
527
528	\return Undocumented
529	\retval <value> Undocumented
530
531	\since Haiku R1
532*/
533
534
535/*!
536	\fn const BUrl& BUrl::operator=(const char *string)
537	\brief Undocumented public method
538
539	\param string Undocumented
540
541	\return Undocumented
542	\retval <value> Undocumented
543
544	\since Haiku R1
545*/
546
547
548/*!
549	\fn BUrl::operator const char *() const
550	\brief Undocumented public method
551
552	\return Undocumented
553	\retval <value> Undocumented
554
555	\since Haiku R1
556*/
557
558
559/*!
560	\fn static static BString BUrl::UrlEncode(const BString &url, bool strict=false, bool directory=false)
561	\brief Undocumented public method
562
563	\param url Undocumented
564	\param strict Undocumented
565	\param directory Undocumented
566
567	\return Undocumented
568	\retval <value> Undocumented
569
570	\since Haiku R1
571*/
572
573
574/*!
575	\fn static static BString BUrl::UrlDecode(const BString &url, bool strict=false)
576	\brief Undocumented public method
577
578	\param url Undocumented
579	\param strict Undocumented
580
581	\return Undocumented
582	\retval <value> Undocumented
583
584	\since Haiku R1
585*/
586
587
588/*!
589	\fn static static BArchivable* BUrl::Instantiate(BMessage *archive)
590	\brief Undocumented public method
591
592	\param archive Undocumented
593
594	\return Undocumented
595	\retval <value> Undocumented
596
597	\since Haiku R1
598*/
599