xref: /haiku/headers/os/locale/UnicodeChar.h (revision 219bf21ef68bab598416c79ee2222593fa8c9080)
1c3ac87e8SOliver Tappe #ifndef _UNICODE_CHAR_H_
2c3ac87e8SOliver Tappe #define _UNICODE_CHAR_H_
3c3ac87e8SOliver Tappe 
4c3ac87e8SOliver Tappe #include <SupportDefs.h>
5c3ac87e8SOliver Tappe 
6c3ac87e8SOliver Tappe enum unicode_char_category
7c3ac87e8SOliver Tappe {
8c3ac87e8SOliver Tappe 	// Non-category for unassigned and non-character code points.
9c3ac87e8SOliver Tappe 	B_UNICODE_UNASSIGNED				= 0,
10c3ac87e8SOliver Tappe 
11b6fd91b4SSiarzhuk Zharski 	B_UNICODE_GENERAL_OTHER_TYPES		= 0,	// Cn
12c3ac87e8SOliver Tappe 	B_UNICODE_UPPERCASE_LETTER			= 1,	// Lu
13c3ac87e8SOliver Tappe 	B_UNICODE_LOWERCASE_LETTER			= 2,	// Ll
14c3ac87e8SOliver Tappe 	B_UNICODE_TITLECASE_LETTER			= 3,	// Lt
15c3ac87e8SOliver Tappe 	B_UNICODE_MODIFIER_LETTER			= 4,	// Lm
16c3ac87e8SOliver Tappe 	B_UNICODE_OTHER_LETTER				= 5,	// Lo
17c3ac87e8SOliver Tappe 	B_UNICODE_NON_SPACING_MARK			= 6,	// Mn
18c3ac87e8SOliver Tappe 	B_UNICODE_ENCLOSING_MARK			= 7,	// Me
19c3ac87e8SOliver Tappe 	B_UNICODE_COMBINING_SPACING_MARK	= 8,	// Mc
20c3ac87e8SOliver Tappe 	B_UNICODE_DECIMAL_DIGIT_NUMBER		= 9,	// Nd
21c3ac87e8SOliver Tappe 	B_UNICODE_LETTER_NUMBER				= 10,	// Nl
22c3ac87e8SOliver Tappe 	B_UNICODE_OTHER_NUMBER				= 11,	// No
23c3ac87e8SOliver Tappe 	B_UNICODE_SPACE_SEPARATOR			= 12,	// Zs
24c3ac87e8SOliver Tappe 	B_UNICODE_LINE_SEPARATOR			= 13,	// Zl
25c3ac87e8SOliver Tappe 	B_UNICODE_PARAGRAPH_SEPARATOR		= 14,	// Zp
26c3ac87e8SOliver Tappe 	B_UNICODE_CONTROL_CHAR				= 15,	// Cc
27c3ac87e8SOliver Tappe 	B_UNICODE_FORMAT_CHAR				= 16,	// Cf
28c3ac87e8SOliver Tappe 	B_UNICODE_PRIVATE_USE_CHAR			= 17,	// Co
29c3ac87e8SOliver Tappe 	B_UNICODE_SURROGATE					= 18,	// Cs
30c3ac87e8SOliver Tappe 	B_UNICODE_DASH_PUNCTUATION			= 19,	// Pd
31c3ac87e8SOliver Tappe 	B_UNICODE_START_PUNCTUATION			= 20,	// Ps
32c3ac87e8SOliver Tappe 	B_UNICODE_END_PUNCTUATION			= 21,	// Pe
33c3ac87e8SOliver Tappe 	B_UNICODE_CONNECTOR_PUNCTUATION		= 22,	// Pc
34c3ac87e8SOliver Tappe 	B_UNICODE_OTHER_PUNCTUATION			= 23,	// Po
35c3ac87e8SOliver Tappe 	B_UNICODE_MATH_SYMBOL				= 24,	// Sm
36c3ac87e8SOliver Tappe 	B_UNICODE_CURRENCY_SYMBOL			= 25,	// Sc
37c3ac87e8SOliver Tappe 	B_UNICODE_MODIFIER_SYMBOL			= 26,	// Sk
38c3ac87e8SOliver Tappe 	B_UNICODE_OTHER_SYMBOL				= 27,	// So
39c3ac87e8SOliver Tappe 	B_UNICODE_INITIAL_PUNCTUATION		= 28,	// Pi
40c3ac87e8SOliver Tappe 	B_UNICODE_FINAL_PUNCTUATION			= 29,	// Pf
41c3ac87e8SOliver Tappe 
42c3ac87e8SOliver Tappe 	B_UNICODE_CATEGORY_COUNT
43c3ac87e8SOliver Tappe };
44c3ac87e8SOliver Tappe 
45c3ac87e8SOliver Tappe 
46b6fd91b4SSiarzhuk Zharski // This specifies the language directional property of a character set.
47c3ac87e8SOliver Tappe 
48c3ac87e8SOliver Tappe enum unicode_char_direction {
49c3ac87e8SOliver Tappe 	B_UNICODE_LEFT_TO_RIGHT					= 0,
50c3ac87e8SOliver Tappe 	B_UNICODE_RIGHT_TO_LEFT					= 1,
51c3ac87e8SOliver Tappe 	B_UNICODE_EUROPEAN_NUMBER				= 2,
52c3ac87e8SOliver Tappe 	B_UNICODE_EUROPEAN_NUMBER_SEPARATOR		= 3,
53c3ac87e8SOliver Tappe 	B_UNICODE_EUROPEAN_NUMBER_TERMINATOR	= 4,
54c3ac87e8SOliver Tappe 	B_UNICODE_ARABIC_NUMBER					= 5,
55c3ac87e8SOliver Tappe 	B_UNICODE_COMMON_NUMBER_SEPARATOR		= 6,
56c3ac87e8SOliver Tappe 	B_UNICODE_BLOCK_SEPARATOR				= 7,
57c3ac87e8SOliver Tappe 	B_UNICODE_SEGMENT_SEPARATOR				= 8,
58c3ac87e8SOliver Tappe 	B_UNICODE_WHITE_SPACE_NEUTRAL			= 9,
59c3ac87e8SOliver Tappe 	B_UNICODE_OTHER_NEUTRAL					= 10,
60c3ac87e8SOliver Tappe 	B_UNICODE_LEFT_TO_RIGHT_EMBEDDING		= 11,
61c3ac87e8SOliver Tappe 	B_UNICODE_LEFT_TO_RIGHT_OVERRIDE		= 12,
62c3ac87e8SOliver Tappe 	B_UNICODE_RIGHT_TO_LEFT_ARABIC			= 13,
63c3ac87e8SOliver Tappe 	B_UNICODE_RIGHT_TO_LEFT_EMBEDDING		= 14,
64c3ac87e8SOliver Tappe 	B_UNICODE_RIGHT_TO_LEFT_OVERRIDE		= 15,
65c3ac87e8SOliver Tappe 	B_UNICODE_POP_DIRECTIONAL_FORMAT		= 16,
66c3ac87e8SOliver Tappe 	B_UNICODE_DIR_NON_SPACING_MARK			= 17,
67c3ac87e8SOliver Tappe 	B_UNICODE_BOUNDARY_NEUTRAL				= 18,
68c3ac87e8SOliver Tappe 
69c3ac87e8SOliver Tappe 	B_UNICODE_DIRECTION_COUNT
70c3ac87e8SOliver Tappe };
71c3ac87e8SOliver Tappe 
72c3ac87e8SOliver Tappe 
73b6fd91b4SSiarzhuk Zharski // Script range as defined in the Unicode standard.
74c3ac87e8SOliver Tappe 
75c3ac87e8SOliver Tappe enum unicode_char_script {
76b6fd91b4SSiarzhuk Zharski 	// New No_Block value in Unicode 4.
77b6fd91b4SSiarzhuk Zharski 	B_UNICODE_NO_BLOCK								= 0, // [none] Special range
78b6fd91b4SSiarzhuk Zharski 	B_UNICODE_BASIC_LATIN							= 1, // [0000]
79b6fd91b4SSiarzhuk Zharski 	B_UNICODE_LATIN_1_SUPPLEMENT					= 2, // [0080]
80b6fd91b4SSiarzhuk Zharski 	B_UNICODE_LATIN_EXTENDED_A						= 3, // [0100]
81b6fd91b4SSiarzhuk Zharski 	B_UNICODE_LATIN_EXTENDED_B						= 4, // [0180]
82b6fd91b4SSiarzhuk Zharski 	B_UNICODE_IPA_EXTENSIONS						= 5, // [0250]
83b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SPACING_MODIFIER_LETTERS				= 6, // [02B0]
84b6fd91b4SSiarzhuk Zharski 	B_UNICODE_COMBINING_DIACRITICAL_MARKS			= 7, // [0300]
85b6fd91b4SSiarzhuk Zharski 	B_UNICODE_GREEK									= 8, // [0370]
86b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CYRILLIC								= 9, // [0400]
87b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ARMENIAN								= 10, // [0530]
88b6fd91b4SSiarzhuk Zharski 	B_UNICODE_HEBREW								= 11, // [0590]
89b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ARABIC								= 12, // [0600]
90b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SYRIAC								= 13, // [0700]
91b6fd91b4SSiarzhuk Zharski 	B_UNICODE_THAANA								= 14, // [0780]
92b6fd91b4SSiarzhuk Zharski 	B_UNICODE_DEVANAGARI							= 15, // [0900]
93b6fd91b4SSiarzhuk Zharski 	B_UNICODE_BENGALI								= 16, // [0980]
94b6fd91b4SSiarzhuk Zharski 	B_UNICODE_GURMUKHI								= 17, // [0A00]
95b6fd91b4SSiarzhuk Zharski 	B_UNICODE_GUJARATI								= 18, // [0A80]
96b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ORIYA									= 19, // [0B00]
97b6fd91b4SSiarzhuk Zharski 	B_UNICODE_TAMIL									= 20, // [0B80]
98b6fd91b4SSiarzhuk Zharski 	B_UNICODE_TELUGU								= 21, // [0C00]
99b6fd91b4SSiarzhuk Zharski 	B_UNICODE_KANNADA								= 22, // [0C80]
100b6fd91b4SSiarzhuk Zharski 	B_UNICODE_MALAYALAM								= 23, // [0D00]
101b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SINHALA								= 24, // [0D80]
102b6fd91b4SSiarzhuk Zharski 	B_UNICODE_THAI									= 25, // [0E00]
103b6fd91b4SSiarzhuk Zharski 	B_UNICODE_LAO									= 26, // [0E80]
104b6fd91b4SSiarzhuk Zharski 	B_UNICODE_TIBETAN								= 27, // [0F00]
105b6fd91b4SSiarzhuk Zharski 	B_UNICODE_MYANMAR								= 28, // [1000]
106b6fd91b4SSiarzhuk Zharski 	B_UNICODE_GEORGIAN								= 29, // [10A0]
107b6fd91b4SSiarzhuk Zharski 	B_UNICODE_HANGUL_JAMO							= 30, // [1100]
108b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ETHIOPIC								= 31, // [1200]
109b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CHEROKEE								= 32, // [13A0]
110b6fd91b4SSiarzhuk Zharski 	B_UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS	= 33, // [1400]
111b6fd91b4SSiarzhuk Zharski 	B_UNICODE_OGHAM									= 34, // [1680]
112b6fd91b4SSiarzhuk Zharski 	B_UNICODE_RUNIC									= 35, // [16A0]
113b6fd91b4SSiarzhuk Zharski 	B_UNICODE_KHMER									= 36, // [1780]
114b6fd91b4SSiarzhuk Zharski 	B_UNICODE_MONGOLIAN								= 37, // [1800]
115b6fd91b4SSiarzhuk Zharski 	B_UNICODE_LATIN_EXTENDED_ADDITIONAL				= 38, // [1E00]
116b6fd91b4SSiarzhuk Zharski 	B_UNICODE_GREEK_EXTENDED						= 39, // [1F00]
117b6fd91b4SSiarzhuk Zharski 	B_UNICODE_GENERAL_PUNCTUATION					= 40, // [2000]
118b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SUPERSCRIPTS_AND_SUBSCRIPTS			= 41, // [2070]
119b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CURRENCY_SYMBOLS						= 42, // [20A0]
120b6fd91b4SSiarzhuk Zharski 	B_UNICODE_COMBINING_MARKS_FOR_SYMBOLS			= 43, // [20D0]
121b6fd91b4SSiarzhuk Zharski 	B_UNICODE_LETTERLIKE_SYMBOLS					= 44, // [2100]
122b6fd91b4SSiarzhuk Zharski 	B_UNICODE_NUMBER_FORMS							= 45, // [2150]
123b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ARROWS								= 46, // [2190]
124b6fd91b4SSiarzhuk Zharski 	B_UNICODE_MATHEMATICAL_OPERATORS				= 47, // [2200]
125b6fd91b4SSiarzhuk Zharski 	B_UNICODE_MISCELLANEOUS_TECHNICAL				= 48, // [2300]
126b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CONTROL_PICTURES						= 49, // [2400]
127b6fd91b4SSiarzhuk Zharski 	B_UNICODE_OPTICAL_CHARACTER_RECOGNITION			= 50, // [2440]
128b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ENCLOSED_ALPHANUMERICS				= 51, // [2460]
129b6fd91b4SSiarzhuk Zharski 	B_UNICODE_BOX_DRAWING							= 52, // [2500]
130b6fd91b4SSiarzhuk Zharski 	B_UNICODE_BLOCK_ELEMENTS						= 53, // [2580]
131b6fd91b4SSiarzhuk Zharski 	B_UNICODE_GEOMETRIC_SHAPES						= 54, // [25A0]
132b6fd91b4SSiarzhuk Zharski 	B_UNICODE_MISCELLANEOUS_SYMBOLS					= 55, // [2600]
133b6fd91b4SSiarzhuk Zharski 	B_UNICODE_DINGBATS								= 56, // [2700]
134b6fd91b4SSiarzhuk Zharski 	B_UNICODE_BRAILLE_PATTERNS						= 57, // [2800]
135b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CJK_RADICALS_SUPPLEMENT				= 58, // [2E80]
136b6fd91b4SSiarzhuk Zharski 	B_UNICODE_KANGXI_RADICALS						= 59, // [2F00]
137b6fd91b4SSiarzhuk Zharski 	B_UNICODE_IDEOGRAPHIC_DESCRIPTION_CHARACTERS	= 60, // [2FF0]
138b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CJK_SYMBOLS_AND_PUNCTUATION			= 61, // [3000]
139b6fd91b4SSiarzhuk Zharski 	B_UNICODE_HIRAGANA								= 62, // [3040]
140b6fd91b4SSiarzhuk Zharski 	B_UNICODE_KATAKANA								= 63, // [30A0]
141b6fd91b4SSiarzhuk Zharski 	B_UNICODE_BOPOMOFO								= 64, // [3100]
142b6fd91b4SSiarzhuk Zharski 	B_UNICODE_HANGUL_COMPATIBILITY_JAMO				= 65, // [3130]
143b6fd91b4SSiarzhuk Zharski 	B_UNICODE_KANBUN								= 66, // [3190]
144b6fd91b4SSiarzhuk Zharski 	B_UNICODE_BOPOMOFO_EXTENDED						= 67, // [31A0]
145b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ENCLOSED_CJK_LETTERS_AND_MONTHS		= 68, // [3200]
146b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CJK_COMPATIBILITY						= 69, // [3300]
147b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A	= 70, // [3400]
148b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CJK_UNIFIED_IDEOGRAPHS				= 71, // [4E00]
149b6fd91b4SSiarzhuk Zharski 	B_UNICODE_YI_SYLLABLES							= 72, // [A000]
150b6fd91b4SSiarzhuk Zharski 	B_UNICODE_YI_RADICALS							= 73, // [A490]
151b6fd91b4SSiarzhuk Zharski 	B_UNICODE_HANGUL_SYLLABLES						= 74, // [AC00]
152b6fd91b4SSiarzhuk Zharski 	B_UNICODE_HIGH_SURROGATES						= 75, // [D800]
153b6fd91b4SSiarzhuk Zharski 	B_UNICODE_HIGH_PRIVATE_USE_SURROGATES			= 76, // [DB80]
154b6fd91b4SSiarzhuk Zharski 	B_UNICODE_LOW_SURROGATES						= 77, // [DC00]
155b6fd91b4SSiarzhuk Zharski 	B_UNICODE_PRIVATE_USE							= 78,
156b6fd91b4SSiarzhuk Zharski 	B_UNICODE_PRIVATE_USE_AREA = B_UNICODE_PRIVATE_USE, // [E000]
157b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS			= 79, // [F900]
158b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ALPHABETIC_PRESENTATION_FORMS			= 80, // [FB00]
159b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ARABIC_PRESENTATION_FORMS_A			= 81, // [FB50]
160b6fd91b4SSiarzhuk Zharski 	B_UNICODE_COMBINING_HALF_MARKS					= 82, // [FE20]
161b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CJK_COMPATIBILITY_FORMS				= 83, // [FE30]
162b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SMALL_FORM_VARIANTS					= 84, // [FE50]
163b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ARABIC_PRESENTATION_FORMS_B			= 85, // [FE70]
164b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SPECIALS								= 86, // [FFF0]
165b6fd91b4SSiarzhuk Zharski 	B_UNICODE_HALFWIDTH_AND_FULLWIDTH_FORMS			= 87, // [FF00]
166c3ac87e8SOliver Tappe 
167b6fd91b4SSiarzhuk Zharski 	// New blocks in Unicode 3.1
168b6fd91b4SSiarzhuk Zharski 	B_UNICODE_OLD_ITALIC							= 88, // [10300]
169b6fd91b4SSiarzhuk Zharski 	B_UNICODE_GOTHIC								= 89, // [10330]
170b6fd91b4SSiarzhuk Zharski 	B_UNICODE_DESERET								= 90, // [10400]
171b6fd91b4SSiarzhuk Zharski 	B_UNICODE_BYZANTINE_MUSICAL_SYMBOLS				= 91, // [1D000]
172b6fd91b4SSiarzhuk Zharski 	B_UNICODE_MUSICAL_SYMBOLS						= 92, // [1D100]
173b6fd91b4SSiarzhuk Zharski 	B_UNICODE_MATHEMATICAL_ALPHANUMERIC_SYMBOLS		= 93, // [1D400]
174b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B	= 94, // [20000]
175b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, // [2F800]
176b6fd91b4SSiarzhuk Zharski 	B_UNICODE_TAGS									= 96, // [E0000]
177b6fd91b4SSiarzhuk Zharski 
178b6fd91b4SSiarzhuk Zharski 	// New blocks in Unicode
179b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CYRILLIC_SUPPLEMENTARY				= 97,
180b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CYRILLIC_SUPPLEMENT = B_UNICODE_CYRILLIC_SUPPLEMENTARY, // [0500]
181b6fd91b4SSiarzhuk Zharski 	B_UNICODE_TAGALOG								= 98, // [1700]
182b6fd91b4SSiarzhuk Zharski 	B_UNICODE_HANUNOO								= 99, // [1720]
183b6fd91b4SSiarzhuk Zharski 	B_UNICODE_BUHID									= 100, // [1740]
184b6fd91b4SSiarzhuk Zharski 	B_UNICODE_TAGBANWA								= 101, // [1760]
185b6fd91b4SSiarzhuk Zharski 	B_UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A	= 102, // [27C0]
186b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SUPPLEMENTAL_ARROWS_A					= 103, // [27F0]
187b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SUPPLEMENTAL_ARROWS_B					= 104, // [2900]
188b6fd91b4SSiarzhuk Zharski 	B_UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B	= 105, // [2980]
189b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SUPPLEMENTAL_MATHEMATICAL_OPERATORS	= 106, // [2A00]
190b6fd91b4SSiarzhuk Zharski 	B_UNICODE_KATAKANA_PHONETIC_EXTENSIONS			= 107, // [31F0]
191b6fd91b4SSiarzhuk Zharski 	B_UNICODE_VARIATION_SELECTORS					= 108, // [FE00]
192b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_A		= 109, // [F0000]
193b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_B		= 110, // [100000]
194b6fd91b4SSiarzhuk Zharski 
195b6fd91b4SSiarzhuk Zharski 	// New blocks in Unicode 4
196b6fd91b4SSiarzhuk Zharski 	B_UNICODE_LIMBU									= 111, // [1900]
197b6fd91b4SSiarzhuk Zharski 	B_UNICODE_TAI_LE								= 112, // [1950]
198b6fd91b4SSiarzhuk Zharski 	B_UNICODE_KHMER_SYMBOLS							= 113, // [19E0]
199b6fd91b4SSiarzhuk Zharski 	B_UNICODE_PHONETIC_EXTENSIONS					= 114, // [1D00]
200b6fd91b4SSiarzhuk Zharski 	B_UNICODE_MISCELLANEOUS_SYMBOLS_AND_ARROWS		= 115, // [2B00]
201b6fd91b4SSiarzhuk Zharski 	B_UNICODE_YIJING_HEXAGRAM_SYMBOLS				= 116, // [4DC0]
202b6fd91b4SSiarzhuk Zharski 	B_UNICODE_LINEAR_B_SYLLABARY					= 117, // [10000]
203b6fd91b4SSiarzhuk Zharski 	B_UNICODE_LINEAR_B_IDEOGRAMS					= 118, // [10080]
204b6fd91b4SSiarzhuk Zharski 	B_UNICODE_AEGEAN_NUMBERS						= 119, // [10100]
205b6fd91b4SSiarzhuk Zharski 	B_UNICODE_UGARITIC								= 120, // [10380]
206b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SHAVIAN								= 121, // [10450]
207b6fd91b4SSiarzhuk Zharski 	B_UNICODE_OSMANYA								= 122, // [10480]
208b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CYPRIOT_SYLLABARY						= 123, // [10800]
209b6fd91b4SSiarzhuk Zharski 	B_UNICODE_TAI_XUAN_JING_SYMBOLS					= 124, // [1D300]
210b6fd91b4SSiarzhuk Zharski 	B_UNICODE_VARIATION_SELECTORS_SUPPLEMENT		= 125, // [E0100]
211b6fd91b4SSiarzhuk Zharski 
212b6fd91b4SSiarzhuk Zharski 	// New blocks in Unicode 4.1
213b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ANCIENT_GREEK_MUSICAL_NOTATION		= 126, // [1D200]
214b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ANCIENT_GREEK_NUMBERS					= 127, // [10140]
215b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ARABIC_SUPPLEMENT						= 128, // [0750]
216b6fd91b4SSiarzhuk Zharski 	B_UNICODE_BUGINESE								= 129, // [1A00]
217b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CJK_STROKES							= 130, // [31C0]
218b6fd91b4SSiarzhuk Zharski 	B_UNICODE_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, // [1DC0]
219b6fd91b4SSiarzhuk Zharski 	B_UNICODE_COPTIC								= 132, // [2C80]
220b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ETHIOPIC_EXTENDED						= 133, // [2D80]
221b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ETHIOPIC_SUPPLEMENT					= 134, // [1380]
222b6fd91b4SSiarzhuk Zharski 	B_UNICODE_GEORGIAN_SUPPLEMENT					= 135, // [2D00]
223b6fd91b4SSiarzhuk Zharski 	B_UNICODE_GLAGOLITIC							= 136, // [2C00]
224b6fd91b4SSiarzhuk Zharski 	B_UNICODE_KHAROSHTHI							= 137, // [10A00]
225b6fd91b4SSiarzhuk Zharski 	B_UNICODE_MODIFIER_TONE_LETTERS					= 138, // [A700]
226b6fd91b4SSiarzhuk Zharski 	B_UNICODE_NEW_TAI_LUE							= 139, // [1980]
227b6fd91b4SSiarzhuk Zharski 	B_UNICODE_OLD_PERSIAN							= 140, // [103A0]
228b6fd91b4SSiarzhuk Zharski 	B_UNICODE_PHONETIC_EXTENSIONS_SUPPLEMENT		= 141, // [1D80]
229b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SUPPLEMENTAL_PUNCTUATION				= 142, // [2E00]
230b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SYLOTI_NAGRI							= 143, // [A800]
231b6fd91b4SSiarzhuk Zharski 	B_UNICODE_TIFINAGH								= 144, // [2D30]
232b6fd91b4SSiarzhuk Zharski 	B_UNICODE_VERTICAL_FORMS						= 145, // [FE10]
233b6fd91b4SSiarzhuk Zharski 
234b6fd91b4SSiarzhuk Zharski 	// New blocks in Unicode 5.0
235b6fd91b4SSiarzhuk Zharski 	B_UNICODE_NKO									= 146, // [07C0]
236b6fd91b4SSiarzhuk Zharski 	B_UNICODE_BALINESE								= 147, // [1B00]
237b6fd91b4SSiarzhuk Zharski 	B_UNICODE_LATIN_EXTENDED_C						= 148, // [2C60]
238b6fd91b4SSiarzhuk Zharski 	B_UNICODE_LATIN_EXTENDED_D						= 149, // [A720]
239b6fd91b4SSiarzhuk Zharski 	B_UNICODE_PHAGS_PA								= 150, // [A840]
240b6fd91b4SSiarzhuk Zharski 	B_UNICODE_PHOENICIAN							= 151, // [10900]
241b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CUNEIFORM								= 152, // [12000]
242b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CUNEIFORM_NUMBERS_AND_PUNCTUATION		= 153, // [12400]
243b6fd91b4SSiarzhuk Zharski 	B_UNICODE_COUNTING_ROD_NUMERALS					= 154, // [1D360]
244b6fd91b4SSiarzhuk Zharski 
245b6fd91b4SSiarzhuk Zharski 	//  New blocks in Unicode 5.1
246b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SUNDANESE								= 155, // [1B80]
247b6fd91b4SSiarzhuk Zharski 	B_UNICODE_LEPCHA								= 156, // [1C00]
248b6fd91b4SSiarzhuk Zharski 	B_UNICODE_OL_CHIKI								= 157, // [1C50]
249b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CYRILLIC_EXTENDED_A					= 158, // [2DE0]
250b6fd91b4SSiarzhuk Zharski 	B_UNICODE_VAI									= 159, // [A500]
251b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CYRILLIC_EXTENDED_B					= 160, // [A640]
252b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SAURASHTRA							= 161, // [A880]
253b6fd91b4SSiarzhuk Zharski 	B_UNICODE_KAYAH_LI								= 162, // [A900]
254b6fd91b4SSiarzhuk Zharski 	B_UNICODE_REJANG								= 163, // [A930]
255b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CHAM									= 164, // [AA00]
256b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ANCIENT_SYMBOLS						= 165, // [10190]
257b6fd91b4SSiarzhuk Zharski 	B_UNICODE_PHAISTOS_DISC							= 166, // [101D0]
258b6fd91b4SSiarzhuk Zharski 	B_UNICODE_LYCIAN								= 167, // [10280]
259b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CARIAN								= 168, // [102A0]
260b6fd91b4SSiarzhuk Zharski 	B_UNICODE_LYDIAN								= 169, // [10920]
261b6fd91b4SSiarzhuk Zharski 	B_UNICODE_MAHJONG_TILES							= 170, // [1F000]
262b6fd91b4SSiarzhuk Zharski 	B_UNICODE_DOMINO_TILES							= 171, // [1F030]
263b6fd91b4SSiarzhuk Zharski 
264b6fd91b4SSiarzhuk Zharski 	//  New blocks in Unicode 5.2
265b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SAMARITAN								= 172, // [0800]
266b6fd91b4SSiarzhuk Zharski 	B_UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, // [18B0]
267b6fd91b4SSiarzhuk Zharski 	B_UNICODE_TAI_THAM								= 174, // [1A20]
268b6fd91b4SSiarzhuk Zharski 	B_UNICODE_VEDIC_EXTENSIONS						= 175, // [1CD0]
269b6fd91b4SSiarzhuk Zharski 	B_UNICODE_LISU									= 176, // [A4D0]
270b6fd91b4SSiarzhuk Zharski 	B_UNICODE_BAMUM									= 177, // [A6A0]
271b6fd91b4SSiarzhuk Zharski 	B_UNICODE_COMMON_INDIC_NUMBER_FORMS				= 178, // [A830]
272b6fd91b4SSiarzhuk Zharski 	B_UNICODE_DEVANAGARI_EXTENDED					= 179, // [A8E0]
273b6fd91b4SSiarzhuk Zharski 	B_UNICODE_HANGUL_JAMO_EXTENDED_A				= 180, // [A960]
274b6fd91b4SSiarzhuk Zharski 	B_UNICODE_JAVANESE								= 181, // [A980]
275b6fd91b4SSiarzhuk Zharski 	B_UNICODE_MYANMAR_EXTENDED_A					= 182, // [AA60]
276b6fd91b4SSiarzhuk Zharski 	B_UNICODE_TAI_VIET								= 183, // [AA80]
277b6fd91b4SSiarzhuk Zharski 	B_UNICODE_MEETEI_MAYEK							= 184, // [ABC0]
278b6fd91b4SSiarzhuk Zharski 	B_UNICODE_HANGUL_JAMO_EXTENDED_B				= 185, // [D7B0]
279b6fd91b4SSiarzhuk Zharski 	B_UNICODE_IMPERIAL_ARAMAIC						= 186, // [10840]
280b6fd91b4SSiarzhuk Zharski 	B_UNICODE_OLD_SOUTH_ARABIAN						= 187, // [10A60]
281b6fd91b4SSiarzhuk Zharski 	B_UNICODE_AVESTAN								= 188, // [10B00]
282b6fd91b4SSiarzhuk Zharski 	B_UNICODE_INSCRIPTIONAL_PARTHIAN				= 189, // [10B40]
283b6fd91b4SSiarzhuk Zharski 	B_UNICODE_INSCRIPTIONAL_PAHLAVI					= 190, // [10B60]
284b6fd91b4SSiarzhuk Zharski 	B_UNICODE_OLD_TURKIC							= 191, // [10C00]
285b6fd91b4SSiarzhuk Zharski 	B_UNICODE_RUMI_NUMERAL_SYMBOLS					= 192, // [10E60]
286b6fd91b4SSiarzhuk Zharski 	B_UNICODE_KAITHI								= 193, // [11080]
287b6fd91b4SSiarzhuk Zharski 	B_UNICODE_EGYPTIAN_HIEROGLYPHS					= 194, // [13000]
288b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ENCLOSED_ALPHANUMERIC_SUPPLEMENT		= 195, // [1F100]
289b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT		= 196, // [1F200]
290b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C	= 197, // [2A700]
291b6fd91b4SSiarzhuk Zharski 
292b6fd91b4SSiarzhuk Zharski 	//  New blocks in Unicode 6.0
293b6fd91b4SSiarzhuk Zharski 	B_UNICODE_MANDAIC								= 198, // [0840]
294b6fd91b4SSiarzhuk Zharski 	B_UNICODE_BATAK									= 199, // [1BC0]
295b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ETHIOPIC_EXTENDED_A					= 200, // [AB00]
296b6fd91b4SSiarzhuk Zharski 	B_UNICODE_BRAHMI								= 201, // [11000]
297b6fd91b4SSiarzhuk Zharski 	B_UNICODE_BAMUM_SUPPLEMENT						= 202, // [16800]
298b6fd91b4SSiarzhuk Zharski 	B_UNICODE_KANA_SUPPLEMENT						= 203, // [1B000]
299b6fd91b4SSiarzhuk Zharski 	B_UNICODE_PLAYING_CARDS							= 204, // [1F0A0]
300b6fd91b4SSiarzhuk Zharski 	B_UNICODE_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS	= 205, // [1F300]
301b6fd91b4SSiarzhuk Zharski 	B_UNICODE_EMOTICONS								= 206, // [1F600]
302b6fd91b4SSiarzhuk Zharski 	B_UNICODE_TRANSPORT_AND_MAP_SYMBOLS				= 207, // [1F680]
303b6fd91b4SSiarzhuk Zharski 	B_UNICODE_ALCHEMICAL_SYMBOLS					= 208, // [1F700]
304b6fd91b4SSiarzhuk Zharski 	B_UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D	= 209, // [2B740]
305b6fd91b4SSiarzhuk Zharski 
306b6fd91b4SSiarzhuk Zharski 	B_UNICODE_SCRIPT_COUNT							= 210,
307b6fd91b4SSiarzhuk Zharski 	B_UNICODE_NO_SCRIPT = B_UNICODE_SCRIPT_COUNT,
308b6fd91b4SSiarzhuk Zharski 
309b6fd91b4SSiarzhuk Zharski 	B_UNICODE_INVALID_CODE							= -1
310c3ac87e8SOliver Tappe };
311c3ac87e8SOliver Tappe 
312c3ac87e8SOliver Tappe 
313b6fd91b4SSiarzhuk Zharski // East Asian Width constants.
314c3ac87e8SOliver Tappe 
315b6fd91b4SSiarzhuk Zharski enum unicode_east_asian_width
316c3ac87e8SOliver Tappe {
317b6fd91b4SSiarzhuk Zharski 	B_UNICODE_EA_NEUTRAL,   // [N]
318b6fd91b4SSiarzhuk Zharski 	B_UNICODE_EA_AMBIGUOUS, // [A]
319b6fd91b4SSiarzhuk Zharski 	B_UNICODE_EA_HALFWIDTH, // [H]
320b6fd91b4SSiarzhuk Zharski 	B_UNICODE_EA_FULLWIDTH, // [F]
321b6fd91b4SSiarzhuk Zharski 	B_UNICODE_EA_NARROW,	// [Na]
322b6fd91b4SSiarzhuk Zharski 	B_UNICODE_EA_WIDE,		// [W]
323b6fd91b4SSiarzhuk Zharski 	B_UNICODE_EA_COUNT
324c3ac87e8SOliver Tappe };
325c3ac87e8SOliver Tappe 
326c3ac87e8SOliver Tappe 
327723383c0SOliver Tappe class BUnicodeChar {
328c3ac87e8SOliver Tappe 	public:
329c3ac87e8SOliver Tappe 		static bool IsAlpha(uint32 c);
330c3ac87e8SOliver Tappe 		static bool IsAlNum(uint32 c);
331c3ac87e8SOliver Tappe 		static bool IsDigit(uint32 c);
332c3ac87e8SOliver Tappe 		static bool IsHexDigit(uint32 c);
333c3ac87e8SOliver Tappe 		static bool IsUpper(uint32 c);
334c3ac87e8SOliver Tappe 		static bool IsLower(uint32 c);
335c3ac87e8SOliver Tappe 		static bool IsSpace(uint32 c);
336c3ac87e8SOliver Tappe 		static bool IsWhitespace(uint32 c);
337c3ac87e8SOliver Tappe 		static bool IsControl(uint32 c);
338c3ac87e8SOliver Tappe 		static bool IsPunctuation(uint32 c);
339c3ac87e8SOliver Tappe 		static bool IsPrintable(uint32 c);
340c3ac87e8SOliver Tappe 		static bool IsTitle(uint32 c);
341c3ac87e8SOliver Tappe 		static bool IsDefined(uint32 c);
342c3ac87e8SOliver Tappe 		static bool IsBase(uint32 c);
343c3ac87e8SOliver Tappe 
344c3ac87e8SOliver Tappe 		static int8 Type(uint32 c);
345c3ac87e8SOliver Tappe 
346c3ac87e8SOliver Tappe 		static uint32 ToLower(uint32 c);
347c3ac87e8SOliver Tappe 		static uint32 ToUpper(uint32 c);
348c3ac87e8SOliver Tappe 		static uint32 ToTitle(uint32 c);
349c3ac87e8SOliver Tappe 		static int32 DigitValue(uint32 c);
350b6fd91b4SSiarzhuk Zharski 		static unicode_east_asian_width EastAsianWidth(uint32 c);
351c3ac87e8SOliver Tappe 
352c3ac87e8SOliver Tappe 		static void ToUTF8(uint32 c, char** out);
353c3ac87e8SOliver Tappe 		static uint32 FromUTF8(const char** in);
354c3ac87e8SOliver Tappe 		static uint32 FromUTF8(const char* in);
355c3ac87e8SOliver Tappe 
356*219bf21eSJohn Scipione 		static size_t UTF8StringLength(const char* string);
357*219bf21eSJohn Scipione 		static size_t UTF8StringLength(const char* string, size_t maxLength);
358c3ac87e8SOliver Tappe 
359c3ac87e8SOliver Tappe 	private:
360c3ac87e8SOliver Tappe 		BUnicodeChar();
361c3ac87e8SOliver Tappe };
362c3ac87e8SOliver Tappe 
363c3ac87e8SOliver Tappe 
364c3ac87e8SOliver Tappe inline uint32
FromUTF8(const char * in)365c3ac87e8SOliver Tappe BUnicodeChar::FromUTF8(const char* in)
366c3ac87e8SOliver Tappe {
367c3ac87e8SOliver Tappe 	const char* string = in;
368c3ac87e8SOliver Tappe 	return FromUTF8(&string);
369c3ac87e8SOliver Tappe }
370c3ac87e8SOliver Tappe 
371c3ac87e8SOliver Tappe 
372b6fd91b4SSiarzhuk Zharski #endif	//  _UNICODE_CHAR_H_
373