xref: /haiku/docs/user/locale/UnicodeChar.dox (revision 4bd0c1066b227cec4b79883bdef697c7a27f2e90)
1/*
2 * Copyright 2011 Haiku, Inc. All rights reserved.
3 * Distributed under the terms of the MIT License.
4 *
5 * Authors:
6 *		Axel Dörfler, axeld@pinc-software.de
7 *		John Scipione, jscipione@gmail.com
8 *
9 * Corresponds to:
10 *		headers/os/locale/UnicodeChar.h	 rev 42274
11 *		src/kits/locale/UnicodeChar.cpp	 rev 42274
12 */
13
14
15/*!
16	\file UnicodeChar.h
17	\ingroup locale
18	\ingroup libbe
19	\brief Provides the BUnicodeChar class.
20*/
21
22
23/*!
24	\class BUnicodeChar
25	\ingroup locale
26	\ingroup libbe
27	\brief Management of all information about characters.
28
29	This class provide a set of tools for managing the whole set of characters
30	defined by unicode. This include information about special sets of
31	characters such as if the character is whitespace, or alphanumeric. It also
32	provides the uppercase equivalent of a character and determines whether a
33	character can be ornamented with accents.
34
35	This class consists entirely of static methods, so you do not have to
36	instantiate it. You can call one of the methods passing in the character
37	that you want to be examined.
38
39	Note all the function work with chars encoded in UTF-32. This is not the
40	most usual way to handle characters, but it is the fastest. To convert an
41	UTF-8 string to an UTF-32 character use the FromUTF8() method.
42
43	\since Haiku R1
44*/
45
46
47/*!
48	\fn static bool BUnicodeChar::IsAlpha(uint32 c)
49	\brief Determine if \a c is alphabetic.
50
51	\returns \c true if the specified unicode character is an
52	         alphabetic character.
53
54	\since Haiku R1
55*/
56
57
58/*!
59	\fn static bool BUnicodeChar::IsAlNum(uint32 c)
60	\brief Determine if \a c is alphanumeric.
61
62	\returns \c true if the specified unicode character is a
63	         alphabetic or numeric character.
64
65	\since Haiku R1
66*/
67
68
69/*!
70	\fn static bool BUnicodeChar::IsDigit(uint32 c)
71	\brief Determine if \a c is numeric.
72
73	\returns \c true if the specified unicode character is a
74	         number character.
75
76	\since Haiku R1
77*/
78
79
80/*!
81	\fn static bool BUnicodeChar::IsHexDigit(uint32 c)
82	\brief Determine if \a c is a hexadecimal digit.
83
84	\returns \c true if the specified unicode character is a
85	         hexadecimal number character.
86
87	\since Haiku R1
88*/
89
90
91/*!
92	\fn static bool BUnicodeChar::IsUpper(uint32 c)
93	\brief Determine if \a c is uppercase.
94
95	\returns \c true if the specified unicode character is an
96	         uppercase character.
97
98	\since Haiku R1
99*/
100
101
102/*!
103	\fn static bool BUnicodeChar::IsLower(uint32 c)
104	\brief Determine if \a c is lowercase.
105
106	\returns \c true if the specified unicode character is a
107	         lowercase character.
108
109	\since Haiku R1
110*/
111
112
113/*!
114	\fn static bool BUnicodeChar::IsSpace(uint32 c)
115	\brief Determine if \a c is a space.
116
117	Unlike IsWhitespace() this function will return \c true for non-breakable
118	spaces. This method is useful for determining if the character will render
119	as an empty space which can be stretched on-screen.
120
121	\returns \c true if the specified unicode character is some
122	         kind of a space character.
123
124	\sa IsWhitespace()
125
126	\since Haiku R1
127*/
128
129
130/*!
131	\fn static bool BUnicodeChar::IsWhitespace(uint32 c)
132	\brief Determine if \a c is whitespace.
133
134	This method is essentially the same as IsSpace(), but excludes all
135	non-breakable spaces.
136
137	\returns \c true if the specified unicode character is a whitespace
138	         character.
139
140	\sa IsSpace()
141
142	\since Haiku R1
143*/
144
145
146/*!
147	\fn static bool BUnicodeChar::IsControl(uint32 c)
148	\brief Determine if \a c is a control character.
149
150	Example control characters are the non-printable ASCII characters from
151	0x0 to 0x1F.
152
153	\returns \c true if the specified unicode character is a control
154	         character.
155
156	\sa IsPrintable()
157
158	\since Haiku R1
159*/
160
161
162/*!
163	\fn static bool BUnicodeChar::IsPunctuation(uint32 c)
164	\brief Determine if \a c is punctuation character.
165
166	\returns \c true if the specified unicode character is a
167	         punctuation character.
168
169	\since Haiku R1
170*/
171
172
173/*!
174	\fn static bool BUnicodeChar::IsPrintable(uint32 c)
175	\brief Determine if \a c is printable.
176
177	Printable characters are not control characters.
178
179	\returns \c true if the specified unicode character is a printable
180	         character.
181
182	\sa IsControl()
183
184	\since Haiku R1
185*/
186
187
188/*!
189	\fn static bool BUnicodeChar::IsTitle(uint32 c)
190	\brief Determine if \a c is title case.
191
192	Title case characters are a smaller version of normal uppercase letters.
193
194	\returns \c true if the specified unicode character is a title case
195	         character.
196
197	\since Haiku R1
198*/
199
200
201/*!
202	\fn static bool BUnicodeChar::IsDefined(uint32 c)
203	\brief Determine if \a c is defined.
204
205	In unicode some codes are not valid or not attributed yet.
206	For these codes this method will return \c false.
207
208	\returns \c true if the specified unicode character is defined.
209
210	\since Haiku R1
211*/
212
213
214/*!
215	\fn static bool BUnicodeChar::IsBase(uint32 c)
216	\brief Determine if \a c can be used with a diacritic.
217
218	\note IsBase() does not determine if a unicode character is distinct.
219
220	\returns \c true if the specified unicode character is a base
221	         form character that can be used with a diacritic.
222
223	\since Haiku R1
224*/
225
226
227/*!
228	\fn static int8 BUnicodeChar::Type(uint32 c)
229	\brief Gets the type of a character.
230
231	\returns A member of the \c unicode_char_category enum.
232
233	\since Haiku R1
234*/
235
236
237/*!
238	\fn uint32  BUnicodeChar::ToLower(uint32 c)
239	\brief Transforms \a c to lowercase.
240
241	\returns The lowercase version of the specified unicode character.
242
243	\since Haiku R1
244*/
245
246
247/*!
248	\fn uint32 BUnicodeChar::ToUpper(uint32 c)
249	\brief Transforms \a c to uppercase.
250
251	\returns The uppercase version of the specified unicode character.
252
253	\since Haiku R1
254*/
255
256
257/*!
258	\fn uint32 BUnicodeChar::ToTitle(uint32 c)
259	\brief Transforms \a c to title case.
260
261	\returns The title case version of the specified unicode character.
262
263	\since Haiku R1
264*/
265
266
267/*!
268	\fn int32 BUnicodeChar::DigitValue(uint32 c)
269	\brief Gets the numeric value \a c.
270
271	\returns The numeric version of the specified unicode character.
272
273	\since Haiku R1
274*/
275
276
277/*!
278	\fn void BUnicodeChar::ToUTF8(uint32 c, char** out)
279	\brief Transform a character to UTF-8 encoding.
280
281	\returns The UTF-8 encoding of the specified unicode character.
282
283	\since Haiku R1
284*/
285
286
287/*!
288	\fn uint32 BUnicodeChar::FromUTF8(const char** in)
289	\brief Transform a UTF-8 string to an UTF-32 character.
290
291	If the string contains multiple characters, only the fist one is used.
292	This function updates the in pointer so that it points on the next
293	character for the following call.
294
295	\returns The UTF-32 encoded version of \a in.
296
297	\since Haiku R1
298*/
299
300
301/*!
302	\fn size_t BUnicodeChar::UTF8StringLength(const char* string)
303	\brief Counts the characters in the given \c NUL terminated string.
304
305	\returns the number of UTF-8 characters in the \c NUL terminated string.
306
307	\sa BString::CountChars()
308
309	\since Haiku R1
310*/
311
312
313/*!
314	\fn size_t BUnicodeChar::UTF8StringLength(const char* string,
315		size_t maxLength)
316	\brief Counts the characters in the given string up to \a maxLength
317		characters.
318
319	\param string does not need to be \c NUL terminated if you specify a
320	       \a maxLength that is shorter than the maximum length of the string.
321	\param maxLength The maximum length of the string in bytes.
322
323	\returns the number of UTF-8 characters in the \c NUL terminated string
324	         up to \a maxLength characters.
325
326	\since Haiku R1
327*/
328