xref: /webtrees/app/Encodings/UTF8.php (revision 5bfc689774bb9a6401271c4ed15a6d50652c991b)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2022 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Encodings;
21
22use InvalidArgumentException;
23
24use function chr;
25use function mb_substitute_character;
26
27/**
28 * Convert between (potentially invalid) UTF-8 and UTF-8.
29 */
30class UTF8 extends AbstractEncoding
31{
32    public const NAME = 'UTF-8';
33
34    public const START_OF_STRING                                       = "\u{0098}";
35    public const STRING_TERMINATOR                                     = "\u{009C}";
36    public const NO_BREAK_SPACE                                        = "\u{00A0}";
37    public const INVERTED_EXCLAMATION_MARK                             = "\u{00A1}";
38    public const CENT_SIGN                                             = "\u{00A2}";
39    public const POUND_SIGN                                            = "\u{00A3}";
40    public const CURRENCY_SIGN                                         = "\u{00A4}";
41    public const YEN_SIGN                                              = "\u{00A5}";
42    public const BROKEN_BAR                                            = "\u{00A6}";
43    public const SECTION_SIGN                                          = "\u{00A7}";
44    public const DIAERESIS                                             = "\u{00A8}";
45    public const COPYRIGHT_SIGN                                        = "\u{00A9}";
46    public const FEMININE_ORDINAL_INDICATOR                            = "\u{00AA}";
47    public const LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK             = "\u{00AB}";
48    public const NOT_SIGN                                              = "\u{00AC}";
49    public const SOFT_HYPHEN                                           = "\u{00AD}";
50    public const REGISTERED_SIGN                                       = "\u{00AE}";
51    public const MACRON                                                = "\u{00AF}";
52    public const DEGREE_SIGN                                           = "\u{00B0}";
53    public const PLUS_MINUS_SIGN                                       = "\u{00B1}";
54    public const SUPERSCRIPT_TWO                                       = "\u{00B2}";
55    public const SUPERSCRIPT_THREE                                     = "\u{00B3}";
56    public const ACUTE_ACCENT                                          = "\u{00B4}";
57    public const MICRO_SIGN                                            = "\u{00B5}";
58    public const PILCROW_SIGN                                          = "\u{00B6}";
59    public const MIDDLE_DOT                                            = "\u{00B7}";
60    public const CEDILLA                                               = "\u{00B8}";
61    public const SUPERSCRIPT_ONE                                       = "\u{00B9}";
62    public const MASCULINE_ORDINAL_INDICATOR                           = "\u{00BA}";
63    public const RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK            = "\u{00BB}";
64    public const VULGAR_FRACTION_ONE_QUARTER                           = "\u{00BC}";
65    public const VULGAR_FRACTION_ONE_HALF                              = "\u{00BD}";
66    public const VULGAR_FRACTION_THREE_QUARTERS                        = "\u{00BE}";
67    public const INVERTED_QUESTION_MARK                                = "\u{00BF}";
68    public const LATIN_CAPITAL_LETTER_A_WITH_GRAVE                     = "\u{00C0}";
69    public const LATIN_CAPITAL_LETTER_A_WITH_ACUTE                     = "\u{00C1}";
70    public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX                = "\u{00C2}";
71    public const LATIN_CAPITAL_LETTER_A_WITH_TILDE                     = "\u{00C3}";
72    public const LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS                 = "\u{00C4}";
73    public const LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE                = "\u{00C5}";
74    public const LATIN_CAPITAL_LETTER_AE                               = "\u{00C6}";
75    public const LATIN_CAPITAL_LETTER_C_WITH_CEDILLA                   = "\u{00C7}";
76    public const LATIN_CAPITAL_LETTER_E_WITH_GRAVE                     = "\u{00C8}";
77    public const LATIN_CAPITAL_LETTER_E_WITH_ACUTE                     = "\u{00C9}";
78    public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX                = "\u{00CA}";
79    public const LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS                 = "\u{00CB}";
80    public const LATIN_CAPITAL_LETTER_I_WITH_GRAVE                     = "\u{00CC}";
81    public const LATIN_CAPITAL_LETTER_I_WITH_ACUTE                     = "\u{00CD}";
82    public const LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX                = "\u{00CE}";
83    public const LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS                 = "\u{00CF}";
84    public const LATIN_CAPITAL_LETTER_ETH                              = "\u{00D0}";
85    public const LATIN_CAPITAL_LETTER_N_WITH_TILDE                     = "\u{00D1}";
86    public const LATIN_CAPITAL_LETTER_O_WITH_GRAVE                     = "\u{00D2}";
87    public const LATIN_CAPITAL_LETTER_O_WITH_ACUTE                     = "\u{00D3}";
88    public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX                = "\u{00D4}";
89    public const LATIN_CAPITAL_LETTER_O_WITH_TILDE                     = "\u{00D5}";
90    public const LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS                 = "\u{00D6}";
91    public const MULTIPLICATION_SIGN                                   = "\u{00D7}";
92    public const LATIN_CAPITAL_LETTER_O_WITH_STROKE                    = "\u{00D8}";
93    public const LATIN_CAPITAL_LETTER_U_WITH_GRAVE                     = "\u{00D9}";
94    public const LATIN_CAPITAL_LETTER_U_WITH_ACUTE                     = "\u{00DA}";
95    public const LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX                = "\u{00DB}";
96    public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS                 = "\u{00DC}";
97    public const LATIN_CAPITAL_LETTER_Y_WITH_ACUTE                     = "\u{00DD}";
98    public const LATIN_CAPITAL_LETTER_THORN                            = "\u{00DE}";
99    public const LATIN_SMALL_LETTER_SHARP_S                            = "\u{00DF}";
100    public const LATIN_SMALL_LETTER_A_WITH_GRAVE                       = "\u{00E0}";
101    public const LATIN_SMALL_LETTER_A_WITH_ACUTE                       = "\u{00E1}";
102    public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX                  = "\u{00E2}";
103    public const LATIN_SMALL_LETTER_A_WITH_TILDE                       = "\u{00E3}";
104    public const LATIN_SMALL_LETTER_A_WITH_DIAERESIS                   = "\u{00E4}";
105    public const LATIN_SMALL_LETTER_A_WITH_RING_ABOVE                  = "\u{00E5}";
106    public const LATIN_SMALL_LETTER_AE                                 = "\u{00E6}";
107    public const LATIN_SMALL_LETTER_C_WITH_CEDILLA                     = "\u{00E7}";
108    public const LATIN_SMALL_LETTER_E_WITH_GRAVE                       = "\u{00E8}";
109    public const LATIN_SMALL_LETTER_E_WITH_ACUTE                       = "\u{00E9}";
110    public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX                  = "\u{00EA}";
111    public const LATIN_SMALL_LETTER_E_WITH_DIAERESIS                   = "\u{00EB}";
112    public const LATIN_SMALL_LETTER_I_WITH_GRAVE                       = "\u{00EC}";
113    public const LATIN_SMALL_LETTER_I_WITH_ACUTE                       = "\u{00ED}";
114    public const LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX                  = "\u{00EE}";
115    public const LATIN_SMALL_LETTER_I_WITH_DIAERESIS                   = "\u{00EF}";
116    public const LATIN_SMALL_LETTER_ETH                                = "\u{00F0}";
117    public const LATIN_SMALL_LETTER_N_WITH_TILDE                       = "\u{00F1}";
118    public const LATIN_SMALL_LETTER_O_WITH_GRAVE                       = "\u{00F2}";
119    public const LATIN_SMALL_LETTER_O_WITH_ACUTE                       = "\u{00F3}";
120    public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX                  = "\u{00F4}";
121    public const LATIN_SMALL_LETTER_O_WITH_TILDE                       = "\u{00F5}";
122    public const LATIN_SMALL_LETTER_O_WITH_DIAERESIS                   = "\u{00F6}";
123    public const DIVISION_SIGN                                         = "\u{00F7}";
124    public const LATIN_SMALL_LETTER_O_WITH_STROKE                      = "\u{00F8}";
125    public const LATIN_SMALL_LETTER_U_WITH_GRAVE                       = "\u{00F9}";
126    public const LATIN_SMALL_LETTER_U_WITH_ACUTE                       = "\u{00FA}";
127    public const LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX                  = "\u{00FB}";
128    public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS                   = "\u{00FC}";
129    public const LATIN_SMALL_LETTER_Y_WITH_ACUTE                       = "\u{00FD}";
130    public const LATIN_SMALL_LETTER_THORN                              = "\u{00FE}";
131    public const LATIN_SMALL_LETTER_Y_WITH_DIAERESIS                   = "\u{00FF}";
132    public const LATIN_CAPITAL_LETTER_A_WITH_MACRON                    = "\u{0100}";
133    public const LATIN_SMALL_LETTER_A_WITH_MACRON                      = "\u{0101}";
134    public const LATIN_CAPITAL_LETTER_A_WITH_BREVE                     = "\u{0102}";
135    public const LATIN_SMALL_LETTER_A_WITH_BREVE                       = "\u{0103}";
136    public const LATIN_CAPITAL_LETTER_A_WITH_OGONEK                    = "\u{0104}";
137    public const LATIN_SMALL_LETTER_A_WITH_OGONEK                      = "\u{0105}";
138    public const LATIN_CAPITAL_LETTER_C_WITH_ACUTE                     = "\u{0106}";
139    public const LATIN_SMALL_LETTER_C_WITH_ACUTE                       = "\u{0107}";
140    public const LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX                = "\u{0108}";
141    public const LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX                  = "\u{0109}";
142    public const LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE                 = "\u{010A}";
143    public const LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE                   = "\u{010B}";
144    public const LATIN_CAPITAL_LETTER_C_WITH_CARON                     = "\u{010C}";
145    public const LATIN_SMALL_LETTER_C_WITH_CARON                       = "\u{010D}";
146    public const LATIN_CAPITAL_LETTER_D_WITH_CARON                     = "\u{010E}";
147    public const LATIN_SMALL_LETTER_D_WITH_CARON                       = "\u{010F}";
148    public const LATIN_CAPITAL_LETTER_D_WITH_STROKE                    = "\u{0110}";
149    public const LATIN_SMALL_LETTER_D_WITH_STROKE                      = "\u{0111}";
150    public const LATIN_CAPITAL_LETTER_E_WITH_MACRON                    = "\u{0112}";
151    public const LATIN_SMALL_LETTER_E_WITH_MACRON                      = "\u{0113}";
152    public const LATIN_CAPITAL_LETTER_E_WITH_BREVE                     = "\u{0114}";
153    public const LATIN_SMALL_LETTER_E_WITH_BREVE                       = "\u{0115}";
154    public const LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE                 = "\u{0116}";
155    public const LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE                   = "\u{0117}";
156    public const LATIN_CAPITAL_LETTER_E_WITH_OGONEK                    = "\u{0118}";
157    public const LATIN_SMALL_LETTER_E_WITH_OGONEK                      = "\u{0119}";
158    public const LATIN_CAPITAL_LETTER_E_WITH_CARON                     = "\u{011A}";
159    public const LATIN_SMALL_LETTER_E_WITH_CARON                       = "\u{011B}";
160    public const LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX                = "\u{011C}";
161    public const LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX                  = "\u{011D}";
162    public const LATIN_CAPITAL_LETTER_G_WITH_BREVE                     = "\u{011E}";
163    public const LATIN_SMALL_LETTER_G_WITH_BREVE                       = "\u{011F}";
164    public const LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE                 = "\u{0120}";
165    public const LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE                   = "\u{0121}";
166    public const LATIN_CAPITAL_LETTER_G_WITH_CEDILLA                   = "\u{0122}";
167    public const LATIN_SMALL_LETTER_G_WITH_CEDILLA                     = "\u{0123}";
168    public const LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX                = "\u{0124}";
169    public const LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX                  = "\u{0125}";
170    public const LATIN_CAPITAL_LETTER_H_WITH_STROKE                    = "\u{0126}";
171    public const LATIN_SMALL_LETTER_H_WITH_STROKE                      = "\u{0127}";
172    public const LATIN_CAPITAL_LETTER_I_WITH_TILDE                     = "\u{0128}";
173    public const LATIN_SMALL_LETTER_I_WITH_TILDE                       = "\u{0129}";
174    public const LATIN_CAPITAL_LETTER_I_WITH_MACRON                    = "\u{012A}";
175    public const LATIN_SMALL_LETTER_I_WITH_MACRON                      = "\u{012B}";
176    public const LATIN_CAPITAL_LETTER_I_WITH_BREVE                     = "\u{012C}";
177    public const LATIN_SMALL_LETTER_I_WITH_BREVE                       = "\u{012D}";
178    public const LATIN_CAPITAL_LETTER_I_WITH_OGONEK                    = "\u{012E}";
179    public const LATIN_SMALL_LETTER_I_WITH_OGONEK                      = "\u{012F}";
180    public const LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE                 = "\u{0130}";
181    public const LATIN_SMALL_LETTER_DOTLESS_I                          = "\u{0131}";
182    public const LATIN_CAPITAL_LIGATURE_IJ                             = "\u{0132}";
183    public const LATIN_SMALL_LIGATURE_IJ                               = "\u{0133}";
184    public const LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX                = "\u{0134}";
185    public const LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX                  = "\u{0135}";
186    public const LATIN_CAPITAL_LETTER_K_WITH_CEDILLA                   = "\u{0136}";
187    public const LATIN_SMALL_LETTER_K_WITH_CEDILLA                     = "\u{0137}";
188    public const LATIN_SMALL_LETTER_KRA                                = "\u{0138}";
189    public const LATIN_CAPITAL_LETTER_L_WITH_ACUTE                     = "\u{0139}";
190    public const LATIN_SMALL_LETTER_L_WITH_ACUTE                       = "\u{013A}";
191    public const LATIN_CAPITAL_LETTER_L_WITH_CEDILLA                   = "\u{013B}";
192    public const LATIN_SMALL_LETTER_L_WITH_CEDILLA                     = "\u{013C}";
193    public const LATIN_CAPITAL_LETTER_L_WITH_CARON                     = "\u{013D}";
194    public const LATIN_SMALL_LETTER_L_WITH_CARON                       = "\u{013E}";
195    public const LATIN_CAPITAL_LETTER_L_WITH_MIDDLE_DOT                = "\u{013F}";
196    public const LATIN_SMALL_LETTER_L_WITH_MIDDLE_DOT                  = "\u{0140}";
197    public const LATIN_CAPITAL_LETTER_L_WITH_STROKE                    = "\u{0141}";
198    public const LATIN_SMALL_LETTER_L_WITH_STROKE                      = "\u{0142}";
199    public const LATIN_CAPITAL_LETTER_N_WITH_ACUTE                     = "\u{0143}";
200    public const LATIN_SMALL_LETTER_N_WITH_ACUTE                       = "\u{0144}";
201    public const LATIN_CAPITAL_LETTER_N_WITH_CEDILLA                   = "\u{0145}";
202    public const LATIN_SMALL_LETTER_N_WITH_CEDILLA                     = "\u{0146}";
203    public const LATIN_CAPITAL_LETTER_N_WITH_CARON                     = "\u{0147}";
204    public const LATIN_SMALL_LETTER_N_WITH_CARON                       = "\u{0148}";
205    public const LATIN_SMALL_LETTER_N_PRECEDED_BY_APOSTROPHE           = "\u{0149}";
206    public const LATIN_CAPITAL_LETTER_ENG                              = "\u{014A}";
207    public const LATIN_SMALL_LETTER_ENG                                = "\u{014B}";
208    public const LATIN_CAPITAL_LETTER_O_WITH_MACRON                    = "\u{014C}";
209    public const LATIN_SMALL_LETTER_O_WITH_MACRON                      = "\u{014D}";
210    public const LATIN_CAPITAL_LETTER_O_WITH_BREVE                     = "\u{014E}";
211    public const LATIN_SMALL_LETTER_O_WITH_BREVE                       = "\u{014F}";
212    public const LATIN_CAPITAL_LETTER_O_WITH_DOUBLE_ACUTE              = "\u{0150}";
213    public const LATIN_SMALL_LETTER_O_WITH_DOUBLE_ACUTE                = "\u{0151}";
214    public const LATIN_CAPITAL_LIGATURE_OE                             = "\u{0152}";
215    public const LATIN_SMALL_LIGATURE_OE                               = "\u{0153}";
216    public const LATIN_CAPITAL_LETTER_R_WITH_ACUTE                     = "\u{0154}";
217    public const LATIN_SMALL_LETTER_R_WITH_ACUTE                       = "\u{0155}";
218    public const LATIN_CAPITAL_LETTER_R_WITH_CEDILLA                   = "\u{0156}";
219    public const LATIN_SMALL_LETTER_R_WITH_CEDILLA                     = "\u{0157}";
220    public const LATIN_CAPITAL_LETTER_R_WITH_CARON                     = "\u{0158}";
221    public const LATIN_SMALL_LETTER_R_WITH_CARON                       = "\u{0159}";
222    public const LATIN_CAPITAL_LETTER_S_WITH_ACUTE                     = "\u{015A}";
223    public const LATIN_SMALL_LETTER_S_WITH_ACUTE                       = "\u{015B}";
224    public const LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX                = "\u{015C}";
225    public const LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX                  = "\u{015D}";
226    public const LATIN_CAPITAL_LETTER_S_WITH_CEDILLA                   = "\u{015E}";
227    public const LATIN_SMALL_LETTER_S_WITH_CEDILLA                     = "\u{015F}";
228    public const LATIN_CAPITAL_LETTER_S_WITH_CARON                     = "\u{0160}";
229    public const LATIN_SMALL_LETTER_S_WITH_CARON                       = "\u{0161}";
230    public const LATIN_CAPITAL_LETTER_T_WITH_CEDILLA                   = "\u{0162}";
231    public const LATIN_SMALL_LETTER_T_WITH_CEDILLA                     = "\u{0163}";
232    public const LATIN_CAPITAL_LETTER_T_WITH_CARON                     = "\u{0164}";
233    public const LATIN_SMALL_LETTER_T_WITH_CARON                       = "\u{0165}";
234    public const LATIN_CAPITAL_LETTER_T_WITH_STROKE                    = "\u{0166}";
235    public const LATIN_SMALL_LETTER_T_WITH_STROKE                      = "\u{0167}";
236    public const LATIN_CAPITAL_LETTER_U_WITH_TILDE                     = "\u{0168}";
237    public const LATIN_SMALL_LETTER_U_WITH_TILDE                       = "\u{0169}";
238    public const LATIN_CAPITAL_LETTER_U_WITH_MACRON                    = "\u{016A}";
239    public const LATIN_SMALL_LETTER_U_WITH_MACRON                      = "\u{016B}";
240    public const LATIN_CAPITAL_LETTER_U_WITH_BREVE                     = "\u{016C}";
241    public const LATIN_SMALL_LETTER_U_WITH_BREVE                       = "\u{016D}";
242    public const LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE                = "\u{016E}";
243    public const LATIN_SMALL_LETTER_U_WITH_RING_ABOVE                  = "\u{016F}";
244    public const LATIN_CAPITAL_LETTER_U_WITH_DOUBLE_ACUTE              = "\u{0170}";
245    public const LATIN_SMALL_LETTER_U_WITH_DOUBLE_ACUTE                = "\u{0171}";
246    public const LATIN_CAPITAL_LETTER_U_WITH_OGONEK                    = "\u{0172}";
247    public const LATIN_SMALL_LETTER_U_WITH_OGONEK                      = "\u{0173}";
248    public const LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX                = "\u{0174}";
249    public const LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX                  = "\u{0175}";
250    public const LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX                = "\u{0176}";
251    public const LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX                  = "\u{0177}";
252    public const LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS                 = "\u{0178}";
253    public const LATIN_CAPITAL_LETTER_Z_WITH_ACUTE                     = "\u{0179}";
254    public const LATIN_SMALL_LETTER_Z_WITH_ACUTE                       = "\u{017A}";
255    public const LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE                 = "\u{017B}";
256    public const LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE                   = "\u{017C}";
257    public const LATIN_CAPITAL_LETTER_Z_WITH_CARON                     = "\u{017D}";
258    public const LATIN_SMALL_LETTER_Z_WITH_CARON                       = "\u{017E}";
259    public const LATIN_SMALL_LETTER_LONG_S                             = "\u{017F}";
260    public const LATIN_SMALL_LETTER_B_WITH_STROKE                      = "\u{0180}";
261    public const LATIN_CAPITAL_LETTER_B_WITH_HOOK                      = "\u{0181}";
262    public const LATIN_CAPITAL_LETTER_B_WITH_TOPBAR                    = "\u{0182}";
263    public const LATIN_SMALL_LETTER_B_WITH_TOPBAR                      = "\u{0183}";
264    public const LATIN_CAPITAL_LETTER_F_WITH_HOOK                      = "\u{0191}";
265    public const LATIN_SMALL_LETTER_F_WITH_HOOK                        = "\u{0192}";
266    public const LATIN_SMALL_LETTER_O_WITH_HORN                        = "\u{01A1}";
267    public const LATIN_CAPITAL_LETTER_O_WITH_HORN                      = "\u{01A0}";
268    public const LATIN_CAPITAL_LETTER_U_WITH_HORN                      = "\u{01AF}";
269    public const LATIN_SMALL_LETTER_U_WITH_HORN                        = "\u{01B0}";
270    public const LATIN_CAPITAL_LETTER_A_WITH_CARON                     = "\u{01CD}";
271    public const LATIN_SMALL_LETTER_A_WITH_CARON                       = "\u{01CE}";
272    public const LATIN_CAPITAL_LETTER_I_WITH_CARON                     = "\u{01CF}";
273    public const LATIN_SMALL_LETTER_I_WITH_CARON                       = "\u{01D0}";
274    public const LATIN_CAPITAL_LETTER_O_WITH_CARON                     = "\u{01D1}";
275    public const LATIN_SMALL_LETTER_O_WITH_CARON                       = "\u{01D2}";
276    public const LATIN_CAPITAL_LETTER_U_WITH_CARON                     = "\u{01D3}";
277    public const LATIN_SMALL_LETTER_U_WITH_CARON                       = "\u{01D4}";
278    public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_MACRON      = "\u{01D5}";
279    public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_MACRON        = "\u{01D6}";
280    public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_ACUTE       = "\u{01D7}";
281    public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_ACUTE         = "\u{01D8}";
282    public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_CARON       = "\u{01D9}";
283    public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_CARON         = "\u{01DA}";
284    public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_GRAVE       = "\u{01DB}";
285    public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_GRAVE         = "\u{01DC}";
286    public const LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS_AND_MACRON      = "\u{01DE}";
287    public const LATIN_SMALL_LETTER_A_WITH_DIAERESIS_AND_MACRON        = "\u{01DF}";
288    public const LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE_AND_MACRON      = "\u{01E0}";
289    public const LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE_AND_MACRON        = "\u{01E1}";
290    public const LATIN_CAPITAL_LETTER_AE_WITH_MACRON                   = "\u{01E2}";
291    public const LATIN_SMALL_LETTER_AE_WITH_MACRON                     = "\u{01E3}";
292    public const LATIN_CAPITAL_LETTER_G_WITH_CARON                     = "\u{01E6}";
293    public const LATIN_SMALL_LETTER_G_WITH_CARON                       = "\u{01E7}";
294    public const LATIN_CAPITAL_LETTER_K_WITH_CARON                     = "\u{01E8}";
295    public const LATIN_SMALL_LETTER_K_WITH_CARON                       = "\u{01E9}";
296    public const LATIN_CAPITAL_LETTER_O_WITH_OGONEK                    = "\u{01EA}";
297    public const LATIN_SMALL_LETTER_O_WITH_OGONEK                      = "\u{01EB}";
298    public const LATIN_CAPITAL_LETTER_O_WITH_OGONEK_AND_MACRON         = "\u{01EC}";
299    public const LATIN_SMALL_LETTER_O_WITH_OGONEK_AND_MACRON           = "\u{01ED}";
300    public const LATIN_SMALL_LETTER_J_WITH_CARON                       = "\u{01F0}";
301    public const LATIN_CAPITAL_LETTER_G_WITH_ACUTE                     = "\u{01F4}";
302    public const LATIN_SMALL_LETTER_G_WITH_ACUTE                       = "\u{01F5}";
303    public const LATIN_CAPITAL_LETTER_N_WITH_GRAVE                     = "\u{01F8}";
304    public const LATIN_SMALL_LETTER_N_WITH_GRAVE                       = "\u{01F9}";
305    public const LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE_AND_ACUTE      = "\u{01FA}";
306    public const LATIN_SMALL_LETTER_A_WITH_RING_ABOVE_AND_ACUTE        = "\u{01FB}";
307    public const LATIN_CAPITAL_LETTER_AE_WITH_ACUTE                    = "\u{01FC}";
308    public const LATIN_SMALL_LETTER_AE_WITH_ACUTE                      = "\u{01FD}";
309    public const LATIN_CAPITAL_LETTER_O_WITH_STROKE_AND_ACUTE          = "\u{01FE}";
310    public const LATIN_SMALL_LETTER_O_WITH_STROKE_AND_ACUTE            = "\u{01FF}";
311    public const LATIN_CAPITAL_LETTER_S_WITH_COMMA_BELOW               = "\u{0218}";
312    public const LATIN_SMALL_LETTER_S_WITH_COMMA_BELOW                 = "\u{0219}";
313    public const LATIN_CAPITAL_LETTER_T_WITH_COMMA_BELOW               = "\u{021A}";
314    public const LATIN_SMALL_LETTER_T_WITH_COMMA_BELOW                 = "\u{021B}";
315    public const LATIN_CAPITAL_LETTER_H_WITH_CARON                     = "\u{021E}";
316    public const LATIN_SMALL_LETTER_H_WITH_CARON                       = "\u{021F}";
317    public const LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE                 = "\u{0226}";
318    public const LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE                   = "\u{0227}";
319    public const LATIN_CAPITAL_LETTER_E_WITH_CEDILLA                   = "\u{0228}";
320    public const LATIN_SMALL_LETTER_E_WITH_CEDILLA                     = "\u{0229}";
321    public const LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS_AND_MACRON      = "\u{022A}";
322    public const LATIN_SMALL_LETTER_O_WITH_DIAERESIS_AND_MACRON        = "\u{022B}";
323    public const LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_MACRON          = "\u{022C}";
324    public const LATIN_SMALL_LETTER_O_WITH_TILDE_AND_MACRON            = "\u{022D}";
325    public const LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE                 = "\u{022E}";
326    public const LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE                   = "\u{022F}";
327    public const LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE_AND_MACRON      = "\u{0230}";
328    public const LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE_AND_MACRON        = "\u{0231}";
329    public const LATIN_CAPITAL_LETTER_Y_WITH_MACRON                    = "\u{0232}";
330    public const LATIN_SMALL_LETTER_Y_WITH_MACRON                      = "\u{0233}";
331    public const MODIFIER_LETTER_PRIME                                 = "\u{02B9}";
332    public const MODIFIER_LETTER_DOUBLE_PRIME                          = "\u{02BA}";
333    public const MODIFIER_LETTER_TURNED_COMMA                          = "\u{02BB}";
334    public const MODIFIER_LETTER_APOSTROPHE                            = "\u{02BC}";
335    public const MODIFIER_LETTER_CIRCUMFLEX_ACCENT                     = "\u{02C6}";
336    public const CARON                                                 = "\u{02C7}";
337    public const BREVE                                                 = "\u{02D8}";
338    public const DOT_ABOVE                                             = "\u{02D9}";
339    public const RING_ABOVE                                            = "\u{02DA}";
340    public const OGONEK                                                = "\u{02DB}";
341    public const SMALL_TILDE                                           = "\u{02DC}";
342    public const DOUBLE_ACUTE_ACCENT                                   = "\u{02DD}";
343    public const COMBINING_GRAVE_ACCENT                                = "\u{0300}";
344    public const COMBINING_ACUTE_ACCENT                                = "\u{0301}";
345    public const COMBINING_CIRCUMFLEX_ACCENT                           = "\u{0302}";
346    public const COMBINING_TILDE                                       = "\u{0303}";
347    public const COMBINING_MACRON                                      = "\u{0304}";
348    public const COMBINING_OVERLINE                                    = "\u{0305}";
349    public const COMBINING_BREVE                                       = "\u{0306}";
350    public const COMBINING_DOT_ABOVE                                   = "\u{0307}";
351    public const COMBINING_DIAERESIS                                   = "\u{0308}";
352    public const COMBINING_HOOK_ABOVE                                  = "\u{0309}";
353    public const COMBINING_RING_ABOVE                                  = "\u{030A}";
354    public const COMBINING_DOUBLE_ACUTE_ACCENT                         = "\u{030B}";
355    public const COMBINING_CARON                                       = "\u{030C}";
356    public const COMBINING_CANDRABINDU                                 = "\u{0310}";
357    public const COMBINING_COMMA_ABOVE                                 = "\u{0313}";
358    public const COMBINING_COMMA_ABOVE_RIGHT                           = "\u{0315}";
359    public const COMBINING_HORN                                        = "\u{031B}";
360    public const COMBINING_LEFT_HALF_RING_BELOW                        = "\u{031C}";
361    public const COMBINING_DOT_BELOW                                   = "\u{0323}";
362    public const COMBINING_DIAERESIS_BELOW                             = "\u{0324}";
363    public const COMBINING_RING_BELOW                                  = "\u{0325}";
364    public const COMBINING_COMMA_BELOW                                 = "\u{0326}";
365    public const COMBINING_CEDILLA                                     = "\u{0327}";
366    public const COMBINING_OGONEK                                      = "\u{0328}";
367    public const COMBINING_BRIDGE_BELOW                                = "\u{032A}";
368    public const COMBINING_BREVE_BELOW                                 = "\u{032E}";
369    public const COMBINING_LOW_LINE                                    = "\u{0332}";
370    public const COMBINING_DOUBLE_LOW_LINE                             = "\u{0333}";
371    public const COMBINING_LONG_SOLIDUS_OVERLAY                        = "\u{0338}";
372    public const COMBINING_DOUBLE_TILDE                                = "\u{0360}";
373    public const COMBINING_DOUBLE_INVERTED_BREVE                       = "\u{0361}";
374    public const GREEK_CAPITAL_LETTER_GAMMA                            = "\u{0393}";
375    public const GREEK_CAPITAL_LETTER_THETA                            = "\u{0398}";
376    public const GREEK_CAPITAL_LETTER_SIGMA                            = "\u{03A3}";
377    public const GREEK_CAPITAL_LETTER_PHI                              = "\u{03A6}";
378    public const GREEK_CAPITAL_LETTER_OMEGA                            = "\u{03A9}";
379    public const GREEK_SMALL_LETTER_ALPHA                              = "\u{03B1}";
380    public const GREEK_SMALL_LETTER_DELTA                              = "\u{03B4}";
381    public const GREEK_SMALL_LETTER_EPSILON                            = "\u{03B5}";
382    public const GREEK_SMALL_LETTER_PI                                 = "\u{03C0}";
383    public const GREEK_SMALL_LETTER_SIGMA                              = "\u{03C3}";
384    public const GREEK_SMALL_LETTER_TAU                                = "\u{03C4}";
385    public const GREEK_SMALL_LETTER_PHI                                = "\u{03C6}";
386    public const CYRILLIC_CAPITAL_LETTER_IO                            = "\u{0401}";
387    public const CYRILLIC_CAPITAL_LETTER_DJE                           = "\u{0402}";
388    public const CYRILLIC_CAPITAL_LETTER_GJE                           = "\u{0403}";
389    public const CYRILLIC_CAPITAL_LETTER_UKRANIAN_IE                   = "\u{0404}";
390    public const CYRILLIC_CAPITAL_LETTER_DZE                           = "\u{0405}";
391    public const CYRILLIC_CAPITAL_LETTER_BYELORUSSIAN_UKRAINIAN_I      = "\u{0406}";
392    public const CYRILLIC_CAPITAL_LETTER_YI                            = "\u{0407}";
393    public const CYRILLIC_CAPITAL_LETTER_JE                            = "\u{0408}";
394    public const CYRILLIC_CAPITAL_LETTER_LJE                           = "\u{0409}";
395    public const CYRILLIC_CAPITAL_LETTER_NJE                           = "\u{040A}";
396    public const CYRILLIC_CAPITAL_LETTER_TSHE                          = "\u{040B}";
397    public const CYRILLIC_CAPITAL_LETTER_KJE                           = "\u{040C}";
398    public const CYRILLIC_CAPITAL_LETTER_SHORT_U                       = "\u{040E}";
399    public const CYRILLIC_CAPITAL_LETTER_DZHE                          = "\u{040F}";
400    public const CYRILLIC_CAPITAL_LETTER_A                             = "\u{0410}";
401    public const CYRILLIC_CAPITAL_LETTER_BE                            = "\u{0411}";
402    public const CYRILLIC_CAPITAL_LETTER_VE                            = "\u{0412}";
403    public const CYRILLIC_CAPITAL_LETTER_GHE                           = "\u{0413}";
404    public const CYRILLIC_CAPITAL_LETTER_DE                            = "\u{0414}";
405    public const CYRILLIC_CAPITAL_LETTER_IE                            = "\u{0415}";
406    public const CYRILLIC_CAPITAL_LETTER_ZHE                           = "\u{0416}";
407    public const CYRILLIC_CAPITAL_LETTER_ZE                            = "\u{0417}";
408    public const CYRILLIC_CAPITAL_LETTER_I                             = "\u{0418}";
409    public const CYRILLIC_CAPITAL_LETTER_SHORT_I                       = "\u{0419}";
410    public const CYRILLIC_CAPITAL_LETTER_KA                            = "\u{041A}";
411    public const CYRILLIC_CAPITAL_LETTER_EL                            = "\u{041B}";
412    public const CYRILLIC_CAPITAL_LETTER_EM                            = "\u{041C}";
413    public const CYRILLIC_CAPITAL_LETTER_EN                            = "\u{041D}";
414    public const CYRILLIC_CAPITAL_LETTER_O                             = "\u{041E}";
415    public const CYRILLIC_CAPITAL_LETTER_PE                            = "\u{041F}";
416    public const CYRILLIC_CAPITAL_LETTER_ER                            = "\u{0420}";
417    public const CYRILLIC_CAPITAL_LETTER_ES                            = "\u{0421}";
418    public const CYRILLIC_CAPITAL_LETTER_TE                            = "\u{0422}";
419    public const CYRILLIC_CAPITAL_LETTER_U                             = "\u{0423}";
420    public const CYRILLIC_CAPITAL_LETTER_EF                            = "\u{0424}";
421    public const CYRILLIC_CAPITAL_LETTER_HA                            = "\u{0425}";
422    public const CYRILLIC_CAPITAL_LETTER_TSE                           = "\u{0426}";
423    public const CYRILLIC_CAPITAL_LETTER_CHE                           = "\u{0427}";
424    public const CYRILLIC_CAPITAL_LETTER_SHA                           = "\u{0428}";
425    public const CYRILLIC_CAPITAL_LETTER_SHCHA                         = "\u{0429}";
426    public const CYRILLIC_CAPITAL_LETTER_HARD_SIGN                     = "\u{042A}";
427    public const CYRILLIC_CAPITAL_LETTER_YERU                          = "\u{042B}";
428    public const CYRILLIC_CAPITAL_LETTER_SOFT_SIGN                     = "\u{042C}";
429    public const CYRILLIC_CAPITAL_LETTER_E                             = "\u{042D}";
430    public const CYRILLIC_CAPITAL_LETTER_YU                            = "\u{042E}";
431    public const CYRILLIC_CAPITAL_LETTER_YA                            = "\u{042F}";
432    public const CYRILLIC_SMALL_LETTER_A                               = "\u{0430}";
433    public const CYRILLIC_SMALL_LETTER_BE                              = "\u{0431}";
434    public const CYRILLIC_SMALL_LETTER_VE                              = "\u{0432}";
435    public const CYRILLIC_SMALL_LETTER_GHE                             = "\u{0433}";
436    public const CYRILLIC_SMALL_LETTER_DE                              = "\u{0434}";
437    public const CYRILLIC_SMALL_LETTER_IE                              = "\u{0435}";
438    public const CYRILLIC_SMALL_LETTER_ZHE                             = "\u{0436}";
439    public const CYRILLIC_SMALL_LETTER_ZE                              = "\u{0437}";
440    public const CYRILLIC_SMALL_LETTER_I                               = "\u{0438}";
441    public const CYRILLIC_SMALL_LETTER_SHORT_I                         = "\u{0439}";
442    public const CYRILLIC_SMALL_LETTER_KA                              = "\u{043A}";
443    public const CYRILLIC_SMALL_LETTER_EL                              = "\u{043B}";
444    public const CYRILLIC_SMALL_LETTER_EM                              = "\u{043C}";
445    public const CYRILLIC_SMALL_LETTER_EN                              = "\u{043D}";
446    public const CYRILLIC_SMALL_LETTER_O                               = "\u{043E}";
447    public const CYRILLIC_SMALL_LETTER_PE                              = "\u{043F}";
448    public const CYRILLIC_SMALL_LETTER_ER                              = "\u{0440}";
449    public const CYRILLIC_SMALL_LETTER_ES                              = "\u{0441}";
450    public const CYRILLIC_SMALL_LETTER_TE                              = "\u{0442}";
451    public const CYRILLIC_SMALL_LETTER_U                               = "\u{0443}";
452    public const CYRILLIC_SMALL_LETTER_EF                              = "\u{0444}";
453    public const CYRILLIC_SMALL_LETTER_HA                              = "\u{0445}";
454    public const CYRILLIC_SMALL_LETTER_TSE                             = "\u{0446}";
455    public const CYRILLIC_SMALL_LETTER_CHE                             = "\u{0447}";
456    public const CYRILLIC_SMALL_LETTER_SHA                             = "\u{0448}";
457    public const CYRILLIC_SMALL_LETTER_SHCHA                           = "\u{0449}";
458    public const CYRILLIC_SMALL_LETTER_HARD_SIGN                       = "\u{044A}";
459    public const CYRILLIC_SMALL_LETTER_YERU                            = "\u{044B}";
460    public const CYRILLIC_SMALL_LETTER_SOFT_SIGN                       = "\u{044C}";
461    public const CYRILLIC_SMALL_LETTER_E                               = "\u{044D}";
462    public const CYRILLIC_SMALL_LETTER_YU                              = "\u{044E}";
463    public const CYRILLIC_SMALL_LETTER_YA                              = "\u{044F}";
464    public const CYRILLIC_SMALL_LETTER_IO                              = "\u{0451}";
465    public const CYRILLIC_SMALL_LETTER_DJE                             = "\u{0452}";
466    public const CYRILLIC_SMALL_LETTER_GJE                             = "\u{0453}";
467    public const CYRILLIC_SMALL_LETTER_UKRANIAN_IE                     = "\u{0454}";
468    public const CYRILLIC_SMALL_LETTER_DZE                             = "\u{0455}";
469    public const CYRILLIC_SMALL_LETTER_BYELORUSSIAN_UKRAINIAN_I        = "\u{0456}";
470    public const CYRILLIC_SMALL_LETTER_YI                              = "\u{0457}";
471    public const CYRILLIC_SMALL_LETTER_JE                              = "\u{0458}";
472    public const CYRILLIC_SMALL_LETTER_LJE                             = "\u{0459}";
473    public const CYRILLIC_SMALL_LETTER_NJE                             = "\u{045A}";
474    public const CYRILLIC_SMALL_LETTER_TSHE                            = "\u{045B}";
475    public const CYRILLIC_SMALL_LETTER_KJE                             = "\u{045C}";
476    public const CYRILLIC_SMALL_LETTER_SHORT_U                         = "\u{045E}";
477    public const CYRILLIC_SMALL_LETTER_DZHE                            = "\u{045F}";
478    public const CYRILLIC_CAPITAL_LETTER_GHE_WITH_UPTURN               = "\u{0490}";
479    public const CYRILLIC_SMALL_LETTER_GHE_WITH_UPTURN                 = "\u{0491}";
480    public const LATIN_CAPITAL_LETTER_A_WITH_RING_BELOW                = "\u{1E00}";
481    public const LATIN_SMALL_LETTER_A_WITH_RING_BELOW                  = "\u{1E01}";
482    public const LATIN_CAPITAL_LETTER_B_WITH_DOT_ABOVE                 = "\u{1E02}";
483    public const LATIN_SMALL_LETTER_B_WITH_DOT_ABOVE                   = "\u{1E03}";
484    public const LATIN_CAPITAL_LETTER_B_WITH_DOT_BELOW                 = "\u{1E04}";
485    public const LATIN_SMALL_LETTER_B_WITH_DOT_BELOW                   = "\u{1E05}";
486    public const LATIN_CAPITAL_LETTER_C_WITH_CEDILLA_AND_ACUTE         = "\u{1E08}";
487    public const LATIN_SMALL_LETTER_C_WITH_CEDILLA_AND_ACUTE           = "\u{1E09}";
488    public const LATIN_CAPITAL_LETTER_D_WITH_DOT_ABOVE                 = "\u{1E0A}";
489    public const LATIN_SMALL_LETTER_D_WITH_DOT_ABOVE                   = "\u{1E0B}";
490    public const LATIN_CAPITAL_LETTER_D_WITH_DOT_BELOW                 = "\u{1E0C}";
491    public const LATIN_SMALL_LETTER_D_WITH_DOT_BELOW                   = "\u{1E0D}";
492    public const LATIN_CAPITAL_LETTER_SHARP_S                          = "\u{1E9E}";
493    public const LATIN_CAPITAL_LETTER_D_WITH_CEDILLA                   = "\u{1E10}";
494    public const LATIN_SMALL_LETTER_D_WITH_CEDILLA                     = "\u{1E11}";
495    public const LATIN_CAPITAL_LETTER_E_WITH_MACRON_AND_GRAVE          = "\u{1E14}";
496    public const LATIN_SMALL_LETTER_E_WITH_MACRON_AND_GRAVE            = "\u{1E15}";
497    public const LATIN_CAPITAL_LETTER_E_WITH_MACRON_AND_ACUTE          = "\u{1E16}";
498    public const LATIN_SMALL_LETTER_E_WITH_MACRON_AND_ACUTE            = "\u{1E17}";
499    public const LATIN_CAPITAL_LETTER_E_WITH_CEDILLA_AND_BREVE         = "\u{1E1C}";
500    public const LATIN_SMALL_LETTER_E_WITH_CEDILLA_AND_BREVE           = "\u{1E1D}";
501    public const LATIN_CAPITAL_LETTER_F_WITH_DOT_ABOVE                 = "\u{1E1E}";
502    public const LATIN_SMALL_LETTER_F_WITH_DOT_ABOVE                   = "\u{1E1F}";
503    public const LATIN_CAPITAL_LETTER_G_WITH_MACRON                    = "\u{1E20}";
504    public const LATIN_SMALL_LETTER_G_WITH_MACRON                      = "\u{1E21}";
505    public const LATIN_CAPITAL_LETTER_H_WITH_DOT_ABOVE                 = "\u{1E22}";
506    public const LATIN_SMALL_LETTER_H_WITH_DOT_ABOVE                   = "\u{1E23}";
507    public const LATIN_CAPITAL_LETTER_H_WITH_DOT_BELOW                 = "\u{1E24}";
508    public const LATIN_SMALL_LETTER_H_WITH_DOT_BELOW                   = "\u{1E25}";
509    public const LATIN_CAPITAL_LETTER_H_WITH_DIAERESIS                 = "\u{1E26}";
510    public const LATIN_SMALL_LETTER_H_WITH_DIAERESIS                   = "\u{1E27}";
511    public const LATIN_CAPITAL_LETTER_H_WITH_CEDILLA                   = "\u{1E28}";
512    public const LATIN_SMALL_LETTER_H_WITH_CEDILLA                     = "\u{1E29}";
513    public const LATIN_CAPITAL_LETTER_H_WITH_BREVE_BELOW               = "\u{1E2A}";
514    public const LATIN_SMALL_LETTER_H_WITH_BREVE_BELOW                 = "\u{1E2B}";
515    public const LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS_AND_ACUTE       = "\u{1E2E}";
516    public const LATIN_SMALL_LETTER_I_WITH_DIAERESIS_AND_ACUTE         = "\u{1E2F}";
517    public const LATIN_CAPITAL_LETTER_K_WITH_ACUTE                     = "\u{1E30}";
518    public const LATIN_SMALL_LETTER_K_WITH_ACUTE                       = "\u{1E31}";
519    public const LATIN_CAPITAL_LETTER_K_WITH_DOT_BELOW                 = "\u{1E32}";
520    public const LATIN_SMALL_LETTER_K_WITH_DOT_BELOW                   = "\u{1E33}";
521    public const LATIN_CAPITAL_LETTER_L_WITH_DOT_BELOW                 = "\u{1E36}";
522    public const LATIN_SMALL_LETTER_L_WITH_DOT_BELOW                   = "\u{1E37}";
523    public const LATIN_CAPITAL_LETTER_L_WITH_DOT_BELOW_AND_MACRON      = "\u{1E38}";
524    public const LATIN_SMALL_LETTER_L_WITH_DOT_BELOW_AND_MACRON        = "\u{1E39}";
525    public const LATIN_CAPITAL_LETTER_M_WITH_ACUTE                     = "\u{1E3E}";
526    public const LATIN_SMALL_LETTER_M_WITH_ACUTE                       = "\u{1E3F}";
527    public const LATIN_CAPITAL_LETTER_M_WITH_DOT_ABOVE                 = "\u{1E40}";
528    public const LATIN_SMALL_LETTER_M_WITH_DOT_ABOVE                   = "\u{1E41}";
529    public const LATIN_CAPITAL_LETTER_M_WITH_DOT_BELOW                 = "\u{1E42}";
530    public const LATIN_SMALL_LETTER_M_WITH_DOT_BELOW                   = "\u{1E43}";
531    public const LATIN_CAPITAL_LETTER_N_WITH_DOT_ABOVE                 = "\u{1E44}";
532    public const LATIN_SMALL_LETTER_N_WITH_DOT_ABOVE                   = "\u{1E45}";
533    public const LATIN_CAPITAL_LETTER_N_WITH_DOT_BELOW                 = "\u{1E46}";
534    public const LATIN_SMALL_LETTER_N_WITH_DOT_BELOW                   = "\u{1E47}";
535    public const LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_ACUTE           = "\u{1E4C}";
536    public const LATIN_SMALL_LETTER_O_WITH_TILDE_AND_ACUTE             = "\u{1E4D}";
537    public const LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_DIAERESIS       = "\u{1E4E}";
538    public const LATIN_SMALL_LETTER_O_WITH_TILDE_AND_DIAERESIS         = "\u{1E4F}";
539    public const LATIN_CAPITAL_LETTER_O_WITH_MACRON_AND_GRAVE          = "\u{1E50}";
540    public const LATIN_SMALL_LETTER_O_WITH_MACRON_AND_GRAVE            = "\u{1E51}";
541    public const LATIN_CAPITAL_LETTER_O_WITH_MACRON_AND_ACUTE          = "\u{1E52}";
542    public const LATIN_SMALL_LETTER_O_WITH_MACRON_AND_ACUTE            = "\u{1E53}";
543    public const LATIN_CAPITAL_LETTER_P_WITH_ACUTE                     = "\u{1E54}";
544    public const LATIN_SMALL_LETTER_P_WITH_ACUTE                       = "\u{1E55}";
545    public const LATIN_CAPITAL_LETTER_P_WITH_DOT_ABOVE                 = "\u{1E56}";
546    public const LATIN_SMALL_LETTER_P_WITH_DOT_ABOVE                   = "\u{1E57}";
547    public const LATIN_CAPITAL_LETTER_R_WITH_DOT_ABOVE                 = "\u{1E58}";
548    public const LATIN_SMALL_LETTER_R_WITH_DOT_ABOVE                   = "\u{1E59}";
549    public const LATIN_CAPITAL_LETTER_R_WITH_DOT_BELOW                 = "\u{1E5A}";
550    public const LATIN_SMALL_LETTER_R_WITH_DOT_BELOW                   = "\u{1E5B}";
551    public const LATIN_CAPITAL_LETTER_R_WITH_DOT_BELOW_AND_MACRON      = "\u{1E5C}";
552    public const LATIN_SMALL_LETTER_R_WITH_DOT_BELOW_AND_MACRON        = "\u{1E5D}";
553    public const LATIN_CAPITAL_LETTER_S_WITH_DOT_ABOVE                 = "\u{1E60}";
554    public const LATIN_SMALL_LETTER_S_WITH_DOT_ABOVE                   = "\u{1E61}";
555    public const LATIN_CAPITAL_LETTER_S_WITH_DOT_BELOW                 = "\u{1E62}";
556    public const LATIN_SMALL_LETTER_S_WITH_DOT_BELOW                   = "\u{1E63}";
557    public const LATIN_CAPITAL_LETTER_S_WITH_ACUTE_AND_DOT_ABOVE       = "\u{1E64}";
558    public const LATIN_SMALL_LETTER_S_WITH_ACUTE_AND_DOT_ABOVE         = "\u{1E65}";
559    public const LATIN_CAPITAL_LETTER_S_WITH_CARON_AND_DOT_ABOVE       = "\u{1E66}";
560    public const LATIN_SMALL_LETTER_S_WITH_CARON_AND_DOT_ABOVE         = "\u{1E67}";
561    public const LATIN_CAPITAL_LETTER_S_WITH_DOT_BELOW_AND_DOT_ABOVE   = "\u{1E68}";
562    public const LATIN_SMALL_LETTER_S_WITH_DOT_BELOW_AND_DOT_ABOVE     = "\u{1E69}";
563    public const LATIN_CAPITAL_LETTER_T_WITH_DOT_ABOVE                 = "\u{1E6A}";
564    public const LATIN_SMALL_LETTER_T_WITH_DOT_ABOVE                   = "\u{1E6B}";
565    public const LATIN_CAPITAL_LETTER_T_WITH_DOT_BELOW                 = "\u{1E6C}";
566    public const LATIN_SMALL_LETTER_T_WITH_DOT_BELOW                   = "\u{1E6D}";
567    public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_BELOW           = "\u{1E72}";
568    public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS_BELOW             = "\u{1E73}";
569    public const LATIN_CAPITAL_LETTER_U_WITH_TILDE_AND_ACUTE           = "\u{1E78}";
570    public const LATIN_SMALL_LETTER_U_WITH_TILDE_AND_ACUTE             = "\u{1E79}";
571    public const LATIN_CAPITAL_LETTER_U_WITH_MACRON_AND_DIAERESIS      = "\u{1E7A}";
572    public const LATIN_SMALL_LETTER_U_WITH_MACRON_AND_DIAERESIS        = "\u{1E7B}";
573    public const LATIN_CAPITAL_LETTER_V_WITH_TILDE                     = "\u{1E7C}";
574    public const LATIN_SMALL_LETTER_V_WITH_TILDE                       = "\u{1E7D}";
575    public const LATIN_CAPITAL_LETTER_V_WITH_DOT_BELOW                 = "\u{1E7E}";
576    public const LATIN_SMALL_LETTER_V_WITH_DOT_BELOW                   = "\u{1E7F}";
577    public const LATIN_CAPITAL_LETTER_W_WITH_GRAVE                     = "\u{1E80}";
578    public const LATIN_SMALL_LETTER_W_WITH_GRAVE                       = "\u{1E81}";
579    public const LATIN_CAPITAL_LETTER_W_WITH_ACUTE                     = "\u{1E82}";
580    public const LATIN_SMALL_LETTER_W_WITH_ACUTE                       = "\u{1E83}";
581    public const LATIN_CAPITAL_LETTER_W_WITH_DIAERESIS                 = "\u{1E84}";
582    public const LATIN_SMALL_LETTER_W_WITH_DIAERESIS                   = "\u{1E85}";
583    public const LATIN_CAPITAL_LETTER_W_WITH_DOT_ABOVE                 = "\u{1E86}";
584    public const LATIN_SMALL_LETTER_W_WITH_DOT_ABOVE                   = "\u{1E87}";
585    public const LATIN_CAPITAL_LETTER_W_WITH_DOT_BELOW                 = "\u{1E88}";
586    public const LATIN_SMALL_LETTER_W_WITH_DOT_BELOW                   = "\u{1E89}";
587    public const LATIN_CAPITAL_LETTER_X_WITH_DOT_ABOVE                 = "\u{1E8A}";
588    public const LATIN_SMALL_LETTER_X_WITH_DOT_ABOVE                   = "\u{1E8B}";
589    public const LATIN_CAPITAL_LETTER_X_WITH_DIAERESIS                 = "\u{1E8C}";
590    public const LATIN_SMALL_LETTER_X_WITH_DIAERESIS                   = "\u{1E8D}";
591    public const LATIN_CAPITAL_LETTER_Y_WITH_DOT_ABOVE                 = "\u{1E8E}";
592    public const LATIN_SMALL_LETTER_Y_WITH_DOT_ABOVE                   = "\u{1E8F}";
593    public const LATIN_CAPITAL_LETTER_Z_WITH_CIRCUMFLEX                = "\u{1E90}";
594    public const LATIN_SMALL_LETTER_Z_WITH_CIRCUMFLEX                  = "\u{1E91}";
595    public const LATIN_CAPITAL_LETTER_Z_WITH_DOT_BELOW                 = "\u{1E92}";
596    public const LATIN_SMALL_LETTER_Z_WITH_DOT_BELOW                   = "\u{1E93}";
597    public const LATIN_SMALL_LETTER_T_WITH_DIAERESIS                   = "\u{1E97}";
598    public const LATIN_SMALL_LETTER_W_WITH_RING_ABOVE                  = "\u{1E98}";
599    public const LATIN_SMALL_LETTER_Y_WITH_RING_ABOVE                  = "\u{1E99}";
600    public const LATIN_CAPITAL_LETTER_A_WITH_DOT_BELOW                 = "\u{1EA0}";
601    public const LATIN_SMALL_LETTER_A_WITH_DOT_BELOW                   = "\u{1EA1}";
602    public const LATIN_CAPITAL_LETTER_A_WITH_HOOK_ABOVE                = "\u{1EA2}";
603    public const LATIN_SMALL_LETTER_A_WITH_HOOK_ABOVE                  = "\u{1EA3}";
604    public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_ACUTE      = "\u{1EA4}";
605    public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_ACUTE        = "\u{1EA5}";
606    public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_GRAVE      = "\u{1EA6}";
607    public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_GRAVE        = "\u{1EA7}";
608    public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_HOOK_ABOVE = "\u{1EA8}";
609    public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_HOOK_ABOVE   = "\u{1EA9}";
610    public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_TILDE      = "\u{1EAA}";
611    public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_TILDE        = "\u{1EAB}";
612    public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_DOT_BELOW  = "\u{1EAC}";
613    public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_DOT_BELOW    = "\u{1EAD}";
614    public const LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_ACUTE           = "\u{1EAE}";
615    public const LATIN_SMALL_LETTER_A_WITH_BREVE_AND_ACUTE             = "\u{1EAF}";
616    public const LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_GRAVE           = "\u{1EB0}";
617    public const LATIN_SMALL_LETTER_A_WITH_BREVE_AND_GRAVE             = "\u{1EB1}";
618    public const LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_HOOK_ABOVE      = "\u{1EB2}";
619    public const LATIN_SMALL_LETTER_A_WITH_BREVE_AND_HOOK_ABOVE        = "\u{1EB3}";
620    public const LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_TILDE           = "\u{1EB4}";
621    public const LATIN_SMALL_LETTER_A_WITH_BREVE_AND_TILDE             = "\u{1EB5}";
622    public const LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_DOT_BELOW       = "\u{1EB6}";
623    public const LATIN_SMALL_LETTER_A_WITH_BREVE_AND_DOT_BELOW         = "\u{1EB7}";
624    public const LATIN_CAPITAL_LETTER_E_WITH_DOT_BELOW                 = "\u{1EB8}";
625    public const LATIN_SMALL_LETTER_E_WITH_DOT_BELOW                   = "\u{1EB9}";
626    public const LATIN_CAPITAL_LETTER_E_WITH_HOOK_ABOVE                = "\u{1EBA}";
627    public const LATIN_SMALL_LETTER_E_WITH_HOOK_ABOVE                  = "\u{1EBB}";
628    public const LATIN_CAPITAL_LETTER_E_WITH_TILDE                     = "\u{1EBC}";
629    public const LATIN_SMALL_LETTER_E_WITH_TILDE                       = "\u{1EBD}";
630    public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_ACUTE      = "\u{1EBE}";
631    public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_ACUTE        = "\u{1EBF}";
632    public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_GRAVE      = "\u{1EC0}";
633    public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_GRAVE        = "\u{1EC1}";
634    public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_HOOK_ABOVE = "\u{1EC2}";
635    public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_HOOK_ABOVE   = "\u{1EC3}";
636    public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_TILDE      = "\u{1EC4}";
637    public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_TILDE        = "\u{1EC5}";
638    public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_DOT_BELOW  = "\u{1EC6}";
639    public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_DOT_BELOW    = "\u{1EC7}";
640    public const LATIN_CAPITAL_LETTER_I_WITH_HOOK_ABOVE                = "\u{1EC8}";
641    public const LATIN_SMALL_LETTER_I_WITH_HOOK_ABOVE                  = "\u{1EC9}";
642    public const LATIN_CAPITAL_LETTER_I_WITH_DOT_BELOW                 = "\u{1ECA}";
643    public const LATIN_SMALL_LETTER_I_WITH_DOT_BELOW                   = "\u{1ECB}";
644    public const LATIN_CAPITAL_LETTER_O_WITH_DOT_BELOW                 = "\u{1ECC}";
645    public const LATIN_SMALL_LETTER_O_WITH_DOT_BELOW                   = "\u{1ECD}";
646    public const LATIN_CAPITAL_LETTER_O_WITH_HOOK_ABOVE                = "\u{1ECE}";
647    public const LATIN_SMALL_LETTER_O_WITH_HOOK_ABOVE                  = "\u{1ECF}";
648    public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_ACUTE      = "\u{1ED0}";
649    public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_ACUTE        = "\u{1ED1}";
650    public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_GRAVE      = "\u{1ED2}";
651    public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_GRAVE        = "\u{1ED3}";
652    public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_HOOK_ABOVE = "\u{1ED4}";
653    public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_HOOK_ABOVE   = "\u{1ED5}";
654    public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_TILDE      = "\u{1ED6}";
655    public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_TILDE        = "\u{1ED7}";
656    public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_DOT_BELOW  = "\u{1ED8}";
657    public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_DOT_BELOW    = "\u{1ED9}";
658    public const LATIN_CAPITAL_LETTER_U_WITH_DOT_BELOW                 = "\u{1EE4}";
659    public const LATIN_SMALL_LETTER_U_WITH_DOT_BELOW                   = "\u{1EE5}";
660    public const LATIN_CAPITAL_LETTER_U_WITH_HOOK_ABOVE                = "\u{1EE6}";
661    public const LATIN_SMALL_LETTER_U_WITH_HOOK_ABOVE                  = "\u{1EE7}";
662    public const LATIN_CAPITAL_LETTER_Y_WITH_GRAVE                     = "\u{1EF2}";
663    public const LATIN_SMALL_LETTER_Y_WITH_GRAVE                       = "\u{1EF3}";
664    public const LATIN_CAPITAL_LETTER_Y_WITH_DOT_BELOW                 = "\u{1EF4}";
665    public const LATIN_SMALL_LETTER_Y_WITH_DOT_BELOW                   = "\u{1EF5}";
666    public const LATIN_CAPITAL_LETTER_Y_WITH_HOOK_ABOVE                = "\u{1EF6}";
667    public const LATIN_SMALL_LETTER_Y_WITH_HOOK_ABOVE                  = "\u{1EF7}";
668    public const LATIN_CAPITAL_LETTER_Y_WITH_TILDE                     = "\u{1EF8}";
669    public const LATIN_SMALL_LETTER_Y_WITH_TILDE                       = "\u{1EF9}";
670    public const ZERO_WIDTH_NON_JOINER                                 = "\u{200C}";
671    public const ZERO_WIDTH_JOINER                                     = "\u{200D}";
672    public const EN_DASH                                               = "\u{2013}";
673    public const EM_DASH                                               = "\u{2014}";
674    public const DOUBLE_LOW_LINE                                       = "\u{2017}";
675    public const LEFT_SINGLE_QUOTATION_MARK                            = "\u{2018}";
676    public const RIGHT_SINGLE_QUOTATION_MARK                           = "\u{2019}";
677    public const SINGLE_LOW_9_QUOTATION_MARK                           = "\u{201A}";
678    public const LEFT_DOUBLE_QUOTATION_MARK                            = "\u{201C}";
679    public const RIGHT_DOUBLE_QUOTATION_MARK                           = "\u{201D}";
680    public const DOUBLE_LOW_9_QUOTATION_MARK                           = "\u{201E}";
681    public const DAGGER                                                = "\u{2020}";
682    public const DOUBLE_DAGGER                                         = "\u{2021}";
683    public const BULLET                                                = "\u{2022}";
684    public const HORIZONTAL_ELLIPSIS                                   = "\u{2026}";
685    public const PER_MILLE_SIGN                                        = "\u{2030}";
686    public const SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK             = "\u{2039}";
687    public const SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK            = "\u{203A}";
688    public const FRACTION_SLASH                                        = "\u{2044}";
689    public const SUPERSCRIPT_LATIN_SMALL_LETTER_N                      = "\u{207F}";
690    public const PESETA_SIGN                                           = "\u{20A7}";
691    public const EURO_SIGN                                             = "\u{20AC}";
692    public const SCRIPT_SMALL_L                                        = "\u{2113}";
693    public const NUMERO_SIGN                                           = "\u{2116}";
694    public const SOUND_RECORDING_COPYRIGHT                             = "\u{2117}";
695    public const TRADE_MARK_SIGN                                       = "\u{2122}";
696    public const PARTIAL_DIFFERENTIAL                                  = "\u{2202}";
697    public const INCREMENT                                             = "\u{2206}";
698    public const N_ARY_PRODUCT                                         = "\u{220F}";
699    public const N_ARY_SUMMATION                                       = "\u{2211}";
700    public const BULLET_OPERATOR                                       = "\u{2219}";
701    public const SQUARE_ROOT                                           = "\u{221A}";
702    public const INFINITY                                              = "\u{221E}";
703    public const INTERSECTION                                          = "\u{2229}";
704    public const INTEGRAL                                              = "\u{222B}";
705    public const ALMOST_EQUAL_TO                                       = "\u{2248}";
706    public const NOT_EQUAL_TO                                          = "\u{2260}";
707    public const IDENTICAL_TO                                          = "\u{2261}";
708    public const LESS_THAN_OR_EQUAL_TO                                 = "\u{2264}";
709    public const GREATER_THAN_OR_EQUAL_TO                              = "\u{2265}";
710    public const REVERSED_NOT_SIGN                                     = "\u{2310}";
711    public const TOP_HALF_INTEGRAL                                     = "\u{2320}";
712    public const BOTTOM_HALF_INTEGRAL                                  = "\u{2321}";
713    public const BOX_DRAWINGS_LIGHT_HORIZONTAL                         = "\u{2500}";
714    public const BOX_DRAWINGS_LIGHT_VERTICAL                           = "\u{2502}";
715    public const BOX_DRAWINGS_LIGHT_DOWN_AND_RIGHT                     = "\u{250C}";
716    public const BOX_DRAWINGS_LIGHT_DOWN_AND_LEFT                      = "\u{2510}";
717    public const BOX_DRAWINGS_LIGHT_UP_AND_LEFT                        = "\u{2518}";
718    public const BOX_DRAWINGS_LIGHT_UP_AND_RIGHT                       = "\u{2514}";
719    public const BOX_DRAWINGS_LIGHT_VERTICAL_AND_RIGHT                 = "\u{251C}";
720    public const BOX_DRAWINGS_LIGHT_VERTICAL_AND_LEFT                  = "\u{2524}";
721    public const BOX_DRAWINGS_LIGHT_DOWN_AND_HORIZONTAL                = "\u{252C}";
722    public const BOX_DRAWINGS_LIGHT_UP_AND_HORIZONTAL                  = "\u{2534}";
723    public const BOX_DRAWINGS_LIGHT_VERTICAL_AND_HORIZONTAL            = "\u{253C}";
724    public const BOX_DRAWINGS_DOUBLE_HORIZONTAL                        = "\u{2550}";
725    public const BOX_DRAWINGS_DOUBLE_VERTICAL                          = "\u{2551}";
726    public const BOX_DRAWINGS_DOWN_SINGLE_AND_RIGHT_DOUBLE             = "\u{2552}";
727    public const BOX_DRAWINGS_DOWN_DOUBLE_AND_RIGHT_SINGLE             = "\u{2553}";
728    public const BOX_DRAWINGS_DOUBLE_DOWN_AND_RIGHT                    = "\u{2554}";
729    public const BOX_DRAWINGS_DOWN_SINGLE_AND_LEFT_DOUBLE              = "\u{2555}";
730    public const BOX_DRAWINGS_DOWN_DOUBLE_AND_LEFT_SINGLE              = "\u{2556}";
731    public const BOX_DRAWINGS_DOUBLE_DOWN_AND_LEFT                     = "\u{2557}";
732    public const BOX_DRAWINGS_UP_SINGLE_AND_RIGHT_DOUBLE               = "\u{2558}";
733    public const BOX_DRAWINGS_UP_DOUBLE_AND_RIGHT_SINGLE               = "\u{2559}";
734    public const BOX_DRAWINGS_DOUBLE_UP_AND_RIGHT                      = "\u{255A}";
735    public const BOX_DRAWINGS_UP_SINGLE_AND_LEFT_DOUBLE                = "\u{255B}";
736    public const BOX_DRAWINGS_UP_DOUBLE_AND_LEFT_SINGLE                = "\u{255C}";
737    public const BOX_DRAWINGS_DOUBLE_UP_AND_LEFT                       = "\u{255D}";
738    public const BOX_DRAWINGS_VERTICAL_SINGLE_AND_RIGHT_DOUBLE         = "\u{255E}";
739    public const BOX_DRAWINGS_VERTICAL_DOUBLE_AND_RIGHT_SINGLE         = "\u{255F}";
740    public const BOX_DRAWINGS_DOUBLE_VERTICAL_AND_RIGHT                = "\u{2560}";
741    public const BOX_DRAWINGS_VERTICAL_SINGLE_AND_LEFT_DOUBLE          = "\u{2561}";
742    public const BOX_DRAWINGS_VERTICAL_DOUBLE_AND_LEFT_SINGLE          = "\u{2562}";
743    public const BOX_DRAWINGS_DOUBLE_VERTICAL_AND_LEFT                 = "\u{2563}";
744    public const BOX_DRAWINGS_DOWN_SINGLE_AND_HORIZONTAL_DOUBLE        = "\u{2564}";
745    public const BOX_DRAWINGS_DOWN_DOUBLE_AND_HORIZONTAL_SINGLE        = "\u{2565}";
746    public const BOX_DRAWINGS_DOUBLE_DOWN_AND_HORIZONTAL               = "\u{2566}";
747    public const BOX_DRAWINGS_UP_SINGLE_AND_HORIZONTAL_DOUBLE          = "\u{2567}";
748    public const BOX_DRAWINGS_UP_DOUBLE_AND_HORIZONTAL_SINGLE          = "\u{2568}";
749    public const BOX_DRAWINGS_BOX_DRAWINGS_DOUBLE_UP_AND_HORIZONTAL    = "\u{2569}";
750    public const BOX_DRAWINGS_VERTICAL_SINGLE_AND_HORIZONTAL_DOUBLE    = "\u{256A}";
751    public const BOX_DRAWINGS_VERTICAL_DOUBLE_AND_HORIZONTAL_SINGLE    = "\u{256B}";
752    public const BOX_DRAWINGS_DOUBLE_VERTICAL_AND_HORIZONTAL           = "\u{256C}";
753    public const UPPER_HALF_BLOCK                                      = "\u{2580}";
754    public const LOWER_HALF_BLOCK                                      = "\u{2584}";
755    public const FULL_BLOCK                                            = "\u{2588}";
756    public const LEFT_HALF_BLOCK                                       = "\u{258C}";
757    public const RIGHT_HALF_BLOCK                                      = "\u{2590}";
758    public const LIGHT_SHADE                                           = "\u{2591}";
759    public const MEDIUM_SHADE                                          = "\u{2592}";
760    public const DARK_SHADE                                            = "\u{2593}";
761    public const BLACK_SQUARE                                          = "\u{25A0}";
762    public const WHITE_SQUARE                                          = "\u{25A1}";
763    public const LOZENGE                                               = "\u{25CA}";
764    public const MUSIC_FLAT_SIGN                                       = "\u{266D}";
765    public const MUSIC_SHARP_SIGN                                      = "\u{266F}";
766    public const LATIN_SMALL_LIGATURE_FI                               = "\u{FB01}";
767    public const LATIN_SMALL_LIGATURE_FL                               = "\u{FB02}";
768    public const BYTE_ORDER_MARK                                       = "\u{FEFF}";
769    public const REPLACEMENT_CHARACTER                                 = "\u{FFFD}";
770
771    public const COMPOSED_CHARACTERS = [
772        'A' . self::COMBINING_ACUTE_ACCENT      => self::LATIN_CAPITAL_LETTER_A_WITH_ACUTE,
773        'A' . self::COMBINING_CIRCUMFLEX_ACCENT => self::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX,
774        'A' . self::COMBINING_DIAERESIS         => self::LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS,
775        'A' . self::COMBINING_GRAVE_ACCENT      => self::LATIN_CAPITAL_LETTER_A_WITH_GRAVE,
776        'A' . self::COMBINING_RING_ABOVE        => self::LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE,
777        'A' . self::COMBINING_TILDE             => self::LATIN_CAPITAL_LETTER_A_WITH_TILDE,
778        'C' . self::COMBINING_CEDILLA           => self::LATIN_CAPITAL_LETTER_C_WITH_CEDILLA,
779        'E' . self::COMBINING_ACUTE_ACCENT      => self::LATIN_CAPITAL_LETTER_E_WITH_ACUTE,
780        'E' . self::COMBINING_DIAERESIS         => self::LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS,
781        'E' . self::COMBINING_CIRCUMFLEX_ACCENT => self::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX,
782        'E' . self::COMBINING_GRAVE_ACCENT      => self::LATIN_CAPITAL_LETTER_E_WITH_GRAVE,
783        'a' . self::COMBINING_CIRCUMFLEX_ACCENT => self::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX,
784        'a' . self::COMBINING_DIAERESIS         => self::LATIN_SMALL_LETTER_A_WITH_DIAERESIS,
785        'e' . self::COMBINING_ACUTE_ACCENT      => self::LATIN_SMALL_LETTER_E_WITH_ACUTE,
786        'u' . self::COMBINING_DIAERESIS         => self::LATIN_SMALL_LETTER_U_WITH_DIAERESIS,
787    ];
788
789    /**
790     * Convert text from (potentially invalid) UTF-8 to UTF-8.
791     *
792     * @param string $text
793     *
794     * @return string
795     */
796    public function fromUtf8(string $text): string
797    {
798        if (preg_match('//u', $text) === false) {
799            // Not UTF8?
800            mb_substitute_character(0xFFFD);
801
802            return mb_convert_encoding($text, 'UTF-8', 'UTF-8');
803        }
804
805        return $text;
806    }
807
808    /**
809     * Convert text from (potentially invalid) UTF-8 to UTF-8.
810     *
811     * @param string $text
812     *
813     * @return string
814     */
815    public function toUtf8(string $text): string
816    {
817        return $this->fromUtf8($text);
818    }
819
820    /**
821     * Create a UTF8 character from a code.
822     *
823     * @param int $code
824     *
825     * @return string
826     */
827    public static function chr(int $code): string
828    {
829        if ($code < 0 || $code > 0x1FFFFF) {
830            throw new InvalidArgumentException((string)$code);
831        }
832
833        if ($code <= 0x7F) {
834            return chr($code);
835        }
836
837        if ($code <= 0x7FF) {
838            return
839                chr(($code >> 6) + 0xC0) .
840                chr(($code & 0x3F) + 0x80);
841        }
842
843        if ($code <= 0xFFFF) {
844            return
845                chr(($code >> 12) + 0xE0) .
846                chr((($code >> 6) & 0x3F) + 0x80) .
847                chr(($code & 0x3F) + 0x80);
848        }
849
850        return
851            chr(($code >> 18) + 0xF0) .
852            chr((($code >> 12) & 0x3F) + 0x80) .
853            chr((($code >> 6) & 0x3F) + 0x80) .
854            chr(($code & 0x3F) + 0x80);
855    }
856}
857