xref: /haiku/docs/user/locale/UnicodeChar.dox (revision f8da8f3477d3c18142e59d17d05a545982faa5a8)
1/*
2 * Copyright 2011 Haiku, Inc. All rights reserved.
3 * Distributed under the terms of the OpenBeOS License.
4 *
5 * Authors:
6 *		Axel Dörfler, axeld@pinc-software.de
7 *		John Scipione, jscipione@gmail.com
8 *
9 * Corresponds to:
10 *		headers/os/locale/UnicodeChar.h	 rev 42274
11 *		src/kits/locale/UnicodeChar.cpp	 rev 42274
12 */
13
14
15/*!
16	\file UnicodeChar.h
17	\ingroup locale
18	\ingroup libbe
19	\brief Provides the BUnicodeChar class.
20*/
21
22
23/*!
24	\class BUnicodeChar
25	\ingroup locale
26	\ingroup libbe
27	\brief Management of all information about characters.
28
29	This class provide a set of tools for managing the whole set of characters
30	defined by unicode. This include information about special sets of
31	characters such as if the character is whitespace, or alphanumeric. It also
32	provides the uppercase equivalent of a character and determines whether a
33	character can be ornamented with accents.
34
35	This class consists entirely of static methods, so you do not have to
36	instantiate it. You can call one of the methods passing in the character
37	that you want to be examined.
38
39	Note all the function work with chars encoded in UTF-32. This is not the
40	most usual way to handle characters, but it is the fastest. To convert an
41	UTF-8 string to an UTF-32 character use the FromUTF8() method.
42*/
43
44
45/*!
46	\fn static bool BUnicodeChar::IsAlpha(uint32 c)
47	\brief Determine if \a c is alphabetic.
48
49	\returns \c true if the specified unicode character is an
50		alphabetic character.
51*/
52
53
54/*!
55	\fn static bool BUnicodeChar::IsAlNum(uint32 c)
56	\brief Determine if \a c is alphanumeric.
57
58	\returns \c true if the specified unicode character is a
59		alphabetic or numeric character.
60*/
61
62
63/*!
64	\fn static bool BUnicodeChar::IsDigit(uint32 c)
65	\brief Determine if \a c is numeric.
66
67	\returns \c true if the specified unicode character is a
68		number character.
69*/
70
71
72/*!
73	\fn static bool BUnicodeChar::IsHexDigit(uint32 c)
74	\brief Determine if \a c is a hexadecimal digit.
75
76	\returns \c true if the specified unicode character is a
77		hexadecimal number character.
78*/
79
80
81/*!
82	\fn static bool BUnicodeChar::IsUpper(uint32 c)
83	\brief Determine if \a c is uppercase.
84
85	\returns \c true if the specified unicode character is an
86		uppercase character.
87*/
88
89
90/*!
91	\fn static bool BUnicodeChar::IsLower(uint32 c)
92	\brief Determine if \a c is lowercase.
93
94	\returns \c true if the specified unicode character is a
95		lowercase character.
96*/
97
98
99/*!
100	\fn static bool BUnicodeChar::IsSpace(uint32 c)
101	\brief Determine if \a c is a space.
102
103	Unlike IsWhitespace() this function will return \c true for non-breakable
104	spaces. This method is useful for determining if the character will render
105	as an empty space which can be stretched on-screen.
106
107	\returns \c true if the specified unicode character is some
108		kind of a space character.
109
110	\sa IsWhitespace()
111*/
112
113
114/*!
115	\fn static bool BUnicodeChar::IsWhitespace(uint32 c)
116	\brief Determine if \a c is whitespace.
117
118	This method is essentially the same as IsSpace(), but excludes all
119	non-breakable spaces.
120
121	\returns \c true if the specified unicode character is a whitespace
122		character.
123
124	\sa IsSpace()
125*/
126
127
128/*!
129	\fn static bool BUnicodeChar::IsControl(uint32 c)
130	\brief Determine if \a c is a control character.
131
132	Example control characters are the non-printable ASCII characters from
133	0x0 to 0x1F.
134
135	\returns \c true if the specified unicode character is a control
136		character.
137
138	\sa IsPrintable()
139*/
140
141
142/*!
143	\fn static bool BUnicodeChar::IsPunctuation(uint32 c)
144	\brief Determine if \a c is punctuation character.
145
146	\returns \c true if the specified unicode character is a
147		punctuation character.
148*/
149
150
151/*!
152	\fn static bool BUnicodeChar::IsPrintable(uint32 c)
153	\brief Determine if \a c is printable.
154
155	Printable characters are not control characters.
156
157	\returns \c true if the specified unicode character is a printable
158		character.
159
160	\sa IsControl()
161*/
162
163
164/*!
165	\fn static bool BUnicodeChar::IsTitle(uint32 c)
166	\brief Determine if \a c is title case.
167
168	Title case characters are a smaller version of normal uppercase letters.
169
170	\returns \c true if the specified unicode character is a title case
171		character.
172*/
173
174
175/*!
176	\fn static bool BUnicodeChar::IsDefined(uint32 c)
177	\brief Determine if \a c is defined.
178
179	In unicode some codes are not valid or not attributed yet.
180	For these codes this method will return \c false.
181
182	\returns \c true if the specified unicode character is defined.
183*/
184
185
186/*!
187	\fn static bool BUnicodeChar::IsBase(uint32 c)
188	\brief Determine if \a c can be used with a diacritic.
189
190	\note IsBase() does not determine if a unicode character is distinct.
191
192	\returns \c true if the specified unicode character is a base
193		form character that can be used with a diacritic.
194*/
195
196
197/*!
198	\fn static int8 BUnicodeChar::Type(uint32 c)
199	\brief Gets the type of a character.
200
201	\returns A member of the \c unicode_char_category enum.
202*/
203
204
205/*!
206	\fn uint32  BUnicodeChar::ToLower(uint32 c)
207	\brief Transforms \a c to lowercase.
208
209	\returns The lowercase version of the specified unicode character.
210*/
211
212
213/*!
214	\fn uint32 BUnicodeChar::ToUpper(uint32 c)
215	\brief Transforms \a c to uppercase.
216
217	\returns The uppercase version of the specified unicode character.
218*/
219
220
221/*!
222	\fn uint32 BUnicodeChar::ToTitle(uint32 c)
223	\brief Transforms \a c to title case.
224
225	\returns The title case version of the specified unicode character.
226*/
227
228
229/*!
230	\fn int32 BUnicodeChar::DigitValue(uint32 c)
231	\brief Gets the numeric value \a c.
232
233	\returns The numeric version of the specified unicode character.
234*/
235
236
237/*!
238	\fn void BUnicodeChar::ToUTF8(uint32 c, char **out)
239	\brief Transform a character to UTF-8 encoding.
240
241	\returns The UTF-8 encoding of the specified unicode character.
242*/
243
244
245/*!
246	\fn uint32 BUnicodeChar::FromUTF8(const char **in)
247	\brief Transform a UTF-8 string to an UTF-32 character.
248
249	If the string contains multiple characters, only the fist one is used.
250	This function updates the in pointer so that it points on the next
251	character for the following call.
252
253	\returns The UTF-32 encoded version of \a in.
254*/
255
256
257/*!
258	\fn size_t BUnicodeChar::UTF8StringLength(const char *str)
259	\brief Counts the characters in the given \c NUL terminated string.
260
261	\returns the number of UTF-8 characters in the \c NUL terminated string.
262
263	\sa BString::CountChars()
264*/
265
266
267/*!
268	\fn size_t BUnicodeChar::UTF8StringLength(const char *str, size_t maxLength)
269	\brief Counts the characters in the given string up to \a maxLength
270		characters.
271
272	The string does not need to be \c NUL terminated if you specify a
273	\a maxLength that is shorter than the maximum length of the string.
274
275	\returns the number of UTF-8 characters in the \c NUL terminated string
276		up to \a maxLength characters.
277*/
278