xref: /webtrees/app/Encodings/UTF8.php (revision 36779af1bd0601de7819554b13a393f6edb92507)
1<?php
2
3/**
4 * webtrees: online genealogy
5 * Copyright (C) 2021 webtrees development team
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <https://www.gnu.org/licenses/>.
16 */
17
18declare(strict_types=1);
19
20namespace Fisharebest\Webtrees\Encodings;
21
22use InvalidArgumentException;
23
24use function chr;
25use function mb_substitute_character;
26use function preg_replace;
27
28/**
29 * Convert between (potentially invalid) UTF-8 and UTF-8.
30 */
31class UTF8 extends AbstractEncoding
32{
33    public const NAME = 'UTF-8';
34
35    public const START_OF_STRING                                       = "\u{0098}";
36    public const STRING_TERMINATOR                                     = "\u{009C}";
37    public const NO_BREAK_SPACE                                        = "\u{00A0}";
38    public const INVERTED_EXCLAMATION_MARK                             = "\u{00A1}";
39    public const CENT_SIGN                                             = "\u{00A2}";
40    public const POUND_SIGN                                            = "\u{00A3}";
41    public const CURRENCY_SIGN                                         = "\u{00A4}";
42    public const YEN_SIGN                                              = "\u{00A5}";
43    public const BROKEN_BAR                                            = "\u{00A6}";
44    public const SECTION_SIGN                                          = "\u{00A7}";
45    public const DIAERESIS                                             = "\u{00A8}";
46    public const COPYRIGHT_SIGN                                        = "\u{00A9}";
47    public const FEMININE_ORDINAL_INDICATOR                            = "\u{00AA}";
48    public const LEFT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK             = "\u{00AB}";
49    public const NOT_SIGN                                              = "\u{00AC}";
50    public const SOFT_HYPHEN                                           = "\u{00AD}";
51    public const REGISTERED_SIGN                                       = "\u{00AE}";
52    public const MACRON                                                = "\u{00AF}";
53    public const DEGREE_SIGN                                           = "\u{00B0}";
54    public const PLUS_MINUS_SIGN                                       = "\u{00B1}";
55    public const SUPERSCRIPT_TWO                                       = "\u{00B2}";
56    public const SUPERSCRIPT_THREE                                     = "\u{00B3}";
57    public const ACUTE_ACCENT                                          = "\u{00B4}";
58    public const MICRO_SIGN                                            = "\u{00B5}";
59    public const PILCROW_SIGN                                          = "\u{00B6}";
60    public const MIDDLE_DOT                                            = "\u{00B7}";
61    public const CEDILLA                                               = "\u{00B8}";
62    public const SUPERSCRIPT_ONE                                       = "\u{00B9}";
63    public const MASCULINE_ORDINAL_INDICATOR                           = "\u{00BA}";
64    public const RIGHT_POINTING_DOUBLE_ANGLE_QUOTATION_MARK            = "\u{00BB}";
65    public const VULGAR_FRACTION_ONE_QUARTER                           = "\u{00BC}";
66    public const VULGAR_FRACTION_ONE_HALF                              = "\u{00BD}";
67    public const VULGAR_FRACTION_THREE_QUARTERS                        = "\u{00BE}";
68    public const INVERTED_QUESTION_MARK                                = "\u{00BF}";
69    public const LATIN_CAPITAL_LETTER_A_WITH_GRAVE                     = "\u{00C0}";
70    public const LATIN_CAPITAL_LETTER_A_WITH_ACUTE                     = "\u{00C1}";
71    public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX                = "\u{00C2}";
72    public const LATIN_CAPITAL_LETTER_A_WITH_TILDE                     = "\u{00C3}";
73    public const LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS                 = "\u{00C4}";
74    public const LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE                = "\u{00C5}";
75    public const LATIN_CAPITAL_LETTER_AE                               = "\u{00C6}";
76    public const LATIN_CAPITAL_LETTER_C_WITH_CEDILLA                   = "\u{00C7}";
77    public const LATIN_CAPITAL_LETTER_E_WITH_GRAVE                     = "\u{00C8}";
78    public const LATIN_CAPITAL_LETTER_E_WITH_ACUTE                     = "\u{00C9}";
79    public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX                = "\u{00CA}";
80    public const LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS                 = "\u{00CB}";
81    public const LATIN_CAPITAL_LETTER_I_WITH_GRAVE                     = "\u{00CC}";
82    public const LATIN_CAPITAL_LETTER_I_WITH_ACUTE                     = "\u{00CD}";
83    public const LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX                = "\u{00CE}";
84    public const LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS                 = "\u{00CF}";
85    public const LATIN_CAPITAL_LETTER_ETH                              = "\u{00D0}";
86    public const LATIN_CAPITAL_LETTER_N_WITH_TILDE                     = "\u{00D1}";
87    public const LATIN_CAPITAL_LETTER_O_WITH_GRAVE                     = "\u{00D2}";
88    public const LATIN_CAPITAL_LETTER_O_WITH_ACUTE                     = "\u{00D3}";
89    public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX                = "\u{00D4}";
90    public const LATIN_CAPITAL_LETTER_O_WITH_TILDE                     = "\u{00D5}";
91    public const LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS                 = "\u{00D6}";
92    public const MULTIPLICATION_SIGN                                   = "\u{00D7}";
93    public const LATIN_CAPITAL_LETTER_O_WITH_STROKE                    = "\u{00D8}";
94    public const LATIN_CAPITAL_LETTER_U_WITH_GRAVE                     = "\u{00D9}";
95    public const LATIN_CAPITAL_LETTER_U_WITH_ACUTE                     = "\u{00DA}";
96    public const LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX                = "\u{00DB}";
97    public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS                 = "\u{00DC}";
98    public const LATIN_CAPITAL_LETTER_Y_WITH_ACUTE                     = "\u{00DD}";
99    public const LATIN_CAPITAL_LETTER_THORN                            = "\u{00DE}";
100    public const LATIN_SMALL_LETTER_SHARP_S                            = "\u{00DF}";
101    public const LATIN_SMALL_LETTER_A_WITH_GRAVE                       = "\u{00E0}";
102    public const LATIN_SMALL_LETTER_A_WITH_ACUTE                       = "\u{00E1}";
103    public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX                  = "\u{00E2}";
104    public const LATIN_SMALL_LETTER_A_WITH_TILDE                       = "\u{00E3}";
105    public const LATIN_SMALL_LETTER_A_WITH_DIAERESIS                   = "\u{00E4}";
106    public const LATIN_SMALL_LETTER_A_WITH_RING_ABOVE                  = "\u{00E5}";
107    public const LATIN_SMALL_LETTER_AE                                 = "\u{00E6}";
108    public const LATIN_SMALL_LETTER_C_WITH_CEDILLA                     = "\u{00E7}";
109    public const LATIN_SMALL_LETTER_E_WITH_GRAVE                       = "\u{00E8}";
110    public const LATIN_SMALL_LETTER_E_WITH_ACUTE                       = "\u{00E9}";
111    public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX                  = "\u{00EA}";
112    public const LATIN_SMALL_LETTER_E_WITH_DIAERESIS                   = "\u{00EB}";
113    public const LATIN_SMALL_LETTER_I_WITH_GRAVE                       = "\u{00EC}";
114    public const LATIN_SMALL_LETTER_I_WITH_ACUTE                       = "\u{00ED}";
115    public const LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX                  = "\u{00EE}";
116    public const LATIN_SMALL_LETTER_I_WITH_DIAERESIS                   = "\u{00EF}";
117    public const LATIN_SMALL_LETTER_ETH                                = "\u{00F0}";
118    public const LATIN_SMALL_LETTER_N_WITH_TILDE                       = "\u{00F1}";
119    public const LATIN_SMALL_LETTER_O_WITH_GRAVE                       = "\u{00F2}";
120    public const LATIN_SMALL_LETTER_O_WITH_ACUTE                       = "\u{00F3}";
121    public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX                  = "\u{00F4}";
122    public const LATIN_SMALL_LETTER_O_WITH_TILDE                       = "\u{00F5}";
123    public const LATIN_SMALL_LETTER_O_WITH_DIAERESIS                   = "\u{00F6}";
124    public const DIVISION_SIGN                                         = "\u{00F7}";
125    public const LATIN_SMALL_LETTER_O_WITH_STROKE                      = "\u{00F8}";
126    public const LATIN_SMALL_LETTER_U_WITH_GRAVE                       = "\u{00F9}";
127    public const LATIN_SMALL_LETTER_U_WITH_ACUTE                       = "\u{00FA}";
128    public const LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX                  = "\u{00FB}";
129    public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS                   = "\u{00FC}";
130    public const LATIN_SMALL_LETTER_Y_WITH_ACUTE                       = "\u{00FD}";
131    public const LATIN_SMALL_LETTER_THORN                              = "\u{00FE}";
132    public const LATIN_SMALL_LETTER_Y_WITH_DIAERESIS                   = "\u{00FF}";
133    public const LATIN_CAPITAL_LETTER_A_WITH_MACRON                    = "\u{0100}";
134    public const LATIN_SMALL_LETTER_A_WITH_MACRON                      = "\u{0101}";
135    public const LATIN_CAPITAL_LETTER_A_WITH_BREVE                     = "\u{0102}";
136    public const LATIN_SMALL_LETTER_A_WITH_BREVE                       = "\u{0103}";
137    public const LATIN_CAPITAL_LETTER_A_WITH_OGONEK                    = "\u{0104}";
138    public const LATIN_SMALL_LETTER_A_WITH_OGONEK                      = "\u{0105}";
139    public const LATIN_CAPITAL_LETTER_C_WITH_ACUTE                     = "\u{0106}";
140    public const LATIN_SMALL_LETTER_C_WITH_ACUTE                       = "\u{0107}";
141    public const LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX                = "\u{0108}";
142    public const LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX                  = "\u{0109}";
143    public const LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE                 = "\u{010A}";
144    public const LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE                   = "\u{010B}";
145    public const LATIN_CAPITAL_LETTER_C_WITH_CARON                     = "\u{010C}";
146    public const LATIN_SMALL_LETTER_C_WITH_CARON                       = "\u{010D}";
147    public const LATIN_CAPITAL_LETTER_D_WITH_CARON                     = "\u{010E}";
148    public const LATIN_SMALL_LETTER_D_WITH_CARON                       = "\u{010F}";
149    public const LATIN_CAPITAL_LETTER_D_WITH_STROKE                    = "\u{0110}";
150    public const LATIN_SMALL_LETTER_D_WITH_STROKE                      = "\u{0111}";
151    public const LATIN_CAPITAL_LETTER_E_WITH_MACRON                    = "\u{0112}";
152    public const LATIN_SMALL_LETTER_E_WITH_MACRON                      = "\u{0113}";
153    public const LATIN_CAPITAL_LETTER_E_WITH_BREVE                     = "\u{0114}";
154    public const LATIN_SMALL_LETTER_E_WITH_BREVE                       = "\u{0115}";
155    public const LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE                 = "\u{0116}";
156    public const LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE                   = "\u{0117}";
157    public const LATIN_CAPITAL_LETTER_E_WITH_OGONEK                    = "\u{0118}";
158    public const LATIN_SMALL_LETTER_E_WITH_OGONEK                      = "\u{0119}";
159    public const LATIN_CAPITAL_LETTER_E_WITH_CARON                     = "\u{011A}";
160    public const LATIN_SMALL_LETTER_E_WITH_CARON                       = "\u{011B}";
161    public const LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX                = "\u{011C}";
162    public const LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX                  = "\u{011D}";
163    public const LATIN_CAPITAL_LETTER_G_WITH_BREVE                     = "\u{011E}";
164    public const LATIN_SMALL_LETTER_G_WITH_BREVE                       = "\u{011F}";
165    public const LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE                 = "\u{0120}";
166    public const LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE                   = "\u{0121}";
167    public const LATIN_CAPITAL_LETTER_G_WITH_CEDILLA                   = "\u{0122}";
168    public const LATIN_SMALL_LETTER_G_WITH_CEDILLA                     = "\u{0123}";
169    public const LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX                = "\u{0124}";
170    public const LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX                  = "\u{0125}";
171    public const LATIN_CAPITAL_LETTER_H_WITH_STROKE                    = "\u{0126}";
172    public const LATIN_SMALL_LETTER_H_WITH_STROKE                      = "\u{0127}";
173    public const LATIN_CAPITAL_LETTER_I_WITH_TILDE                     = "\u{0128}";
174    public const LATIN_SMALL_LETTER_I_WITH_TILDE                       = "\u{0129}";
175    public const LATIN_CAPITAL_LETTER_I_WITH_MACRON                    = "\u{012A}";
176    public const LATIN_SMALL_LETTER_I_WITH_MACRON                      = "\u{012B}";
177    public const LATIN_CAPITAL_LETTER_I_WITH_BREVE                     = "\u{012C}";
178    public const LATIN_SMALL_LETTER_I_WITH_BREVE                       = "\u{012D}";
179    public const LATIN_CAPITAL_LETTER_I_WITH_OGONEK                    = "\u{012E}";
180    public const LATIN_SMALL_LETTER_I_WITH_OGONEK                      = "\u{012F}";
181    public const LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE                 = "\u{0130}";
182    public const LATIN_SMALL_LETTER_DOTLESS_I                          = "\u{0131}";
183    public const LATIN_CAPITAL_LIGATURE_IJ                             = "\u{0132}";
184    public const LATIN_SMALL_LIGATURE_IJ                               = "\u{0133}";
185    public const LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX                = "\u{0134}";
186    public const LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX                  = "\u{0135}";
187    public const LATIN_CAPITAL_LETTER_K_WITH_CEDILLA                   = "\u{0136}";
188    public const LATIN_SMALL_LETTER_K_WITH_CEDILLA                     = "\u{0137}";
189    public const LATIN_SMALL_LETTER_KRA                                = "\u{0138}";
190    public const LATIN_CAPITAL_LETTER_L_WITH_ACUTE                     = "\u{0139}";
191    public const LATIN_SMALL_LETTER_L_WITH_ACUTE                       = "\u{013A}";
192    public const LATIN_CAPITAL_LETTER_L_WITH_CEDILLA                   = "\u{013B}";
193    public const LATIN_SMALL_LETTER_L_WITH_CEDILLA                     = "\u{013C}";
194    public const LATIN_CAPITAL_LETTER_L_WITH_CARON                     = "\u{013D}";
195    public const LATIN_SMALL_LETTER_L_WITH_CARON                       = "\u{013E}";
196    public const LATIN_CAPITAL_LETTER_L_WITH_MIDDLE_DOT                = "\u{013F}";
197    public const LATIN_SMALL_LETTER_L_WITH_MIDDLE_DOT                  = "\u{0140}";
198    public const LATIN_CAPITAL_LETTER_L_WITH_STROKE                    = "\u{0141}";
199    public const LATIN_SMALL_LETTER_L_WITH_STROKE                      = "\u{0142}";
200    public const LATIN_CAPITAL_LETTER_N_WITH_ACUTE                     = "\u{0143}";
201    public const LATIN_SMALL_LETTER_N_WITH_ACUTE                       = "\u{0144}";
202    public const LATIN_CAPITAL_LETTER_N_WITH_CEDILLA                   = "\u{0145}";
203    public const LATIN_SMALL_LETTER_N_WITH_CEDILLA                     = "\u{0146}";
204    public const LATIN_CAPITAL_LETTER_N_WITH_CARON                     = "\u{0147}";
205    public const LATIN_SMALL_LETTER_N_WITH_CARON                       = "\u{0148}";
206    public const LATIN_SMALL_LETTER_N_PRECEDED_BY_APOSTROPHE           = "\u{0149}";
207    public const LATIN_CAPITAL_LETTER_ENG                              = "\u{014A}";
208    public const LATIN_SMALL_LETTER_ENG                                = "\u{014B}";
209    public const LATIN_CAPITAL_LETTER_O_WITH_MACRON                    = "\u{014C}";
210    public const LATIN_SMALL_LETTER_O_WITH_MACRON                      = "\u{014D}";
211    public const LATIN_CAPITAL_LETTER_O_WITH_BREVE                     = "\u{014E}";
212    public const LATIN_SMALL_LETTER_O_WITH_BREVE                       = "\u{014F}";
213    public const LATIN_CAPITAL_LETTER_O_WITH_DOUBLE_ACUTE              = "\u{0150}";
214    public const LATIN_SMALL_LETTER_O_WITH_DOUBLE_ACUTE                = "\u{0151}";
215    public const LATIN_CAPITAL_LIGATURE_OE                             = "\u{0152}";
216    public const LATIN_SMALL_LIGATURE_OE                               = "\u{0153}";
217    public const LATIN_CAPITAL_LETTER_R_WITH_ACUTE                     = "\u{0154}";
218    public const LATIN_SMALL_LETTER_R_WITH_ACUTE                       = "\u{0155}";
219    public const LATIN_CAPITAL_LETTER_R_WITH_CEDILLA                   = "\u{0156}";
220    public const LATIN_SMALL_LETTER_R_WITH_CEDILLA                     = "\u{0157}";
221    public const LATIN_CAPITAL_LETTER_R_WITH_CARON                     = "\u{0158}";
222    public const LATIN_SMALL_LETTER_R_WITH_CARON                       = "\u{0159}";
223    public const LATIN_CAPITAL_LETTER_S_WITH_ACUTE                     = "\u{015A}";
224    public const LATIN_SMALL_LETTER_S_WITH_ACUTE                       = "\u{015B}";
225    public const LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX                = "\u{015C}";
226    public const LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX                  = "\u{015D}";
227    public const LATIN_CAPITAL_LETTER_S_WITH_CEDILLA                   = "\u{015E}";
228    public const LATIN_SMALL_LETTER_S_WITH_CEDILLA                     = "\u{015F}";
229    public const LATIN_CAPITAL_LETTER_S_WITH_CARON                     = "\u{0160}";
230    public const LATIN_SMALL_LETTER_S_WITH_CARON                       = "\u{0161}";
231    public const LATIN_CAPITAL_LETTER_T_WITH_CEDILLA                   = "\u{0162}";
232    public const LATIN_SMALL_LETTER_T_WITH_CEDILLA                     = "\u{0163}";
233    public const LATIN_CAPITAL_LETTER_T_WITH_CARON                     = "\u{0164}";
234    public const LATIN_SMALL_LETTER_T_WITH_CARON                       = "\u{0165}";
235    public const LATIN_CAPITAL_LETTER_T_WITH_STROKE                    = "\u{0166}";
236    public const LATIN_SMALL_LETTER_T_WITH_STROKE                      = "\u{0167}";
237    public const LATIN_CAPITAL_LETTER_U_WITH_TILDE                     = "\u{0168}";
238    public const LATIN_SMALL_LETTER_U_WITH_TILDE                       = "\u{0169}";
239    public const LATIN_CAPITAL_LETTER_U_WITH_MACRON                    = "\u{016A}";
240    public const LATIN_SMALL_LETTER_U_WITH_MACRON                      = "\u{016B}";
241    public const LATIN_CAPITAL_LETTER_U_WITH_BREVE                     = "\u{016C}";
242    public const LATIN_SMALL_LETTER_U_WITH_BREVE                       = "\u{016D}";
243    public const LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE                = "\u{016E}";
244    public const LATIN_SMALL_LETTER_U_WITH_RING_ABOVE                  = "\u{016F}";
245    public const LATIN_CAPITAL_LETTER_U_WITH_DOUBLE_ACUTE              = "\u{0170}";
246    public const LATIN_SMALL_LETTER_U_WITH_DOUBLE_ACUTE                = "\u{0171}";
247    public const LATIN_CAPITAL_LETTER_U_WITH_OGONEK                    = "\u{0172}";
248    public const LATIN_SMALL_LETTER_U_WITH_OGONEK                      = "\u{0173}";
249    public const LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX                = "\u{0174}";
250    public const LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX                  = "\u{0175}";
251    public const LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX                = "\u{0176}";
252    public const LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX                  = "\u{0177}";
253    public const LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS                 = "\u{0178}";
254    public const LATIN_CAPITAL_LETTER_Z_WITH_ACUTE                     = "\u{0179}";
255    public const LATIN_SMALL_LETTER_Z_WITH_ACUTE                       = "\u{017A}";
256    public const LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE                 = "\u{017B}";
257    public const LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE                   = "\u{017C}";
258    public const LATIN_CAPITAL_LETTER_Z_WITH_CARON                     = "\u{017D}";
259    public const LATIN_SMALL_LETTER_Z_WITH_CARON                       = "\u{017E}";
260    public const LATIN_SMALL_LETTER_LONG_S                             = "\u{017F}";
261    public const LATIN_SMALL_LETTER_B_WITH_STROKE                      = "\u{0180}";
262    public const LATIN_CAPITAL_LETTER_B_WITH_HOOK                      = "\u{0181}";
263    public const LATIN_CAPITAL_LETTER_B_WITH_TOPBAR                    = "\u{0182}";
264    public const LATIN_SMALL_LETTER_B_WITH_TOPBAR                      = "\u{0183}";
265    public const LATIN_CAPITAL_LETTER_F_WITH_HOOK                      = "\u{0191}";
266    public const LATIN_SMALL_LETTER_F_WITH_HOOK                        = "\u{0192}";
267    public const LATIN_SMALL_LETTER_O_WITH_HORN                        = "\u{01A1}";
268    public const LATIN_CAPITAL_LETTER_O_WITH_HORN                      = "\u{01A0}";
269    public const LATIN_CAPITAL_LETTER_U_WITH_HORN                      = "\u{01AF}";
270    public const LATIN_SMALL_LETTER_U_WITH_HORN                        = "\u{01B0}";
271    public const LATIN_CAPITAL_LETTER_A_WITH_CARON                     = "\u{01CD}";
272    public const LATIN_SMALL_LETTER_A_WITH_CARON                       = "\u{01CE}";
273    public const LATIN_CAPITAL_LETTER_I_WITH_CARON                     = "\u{01CF}";
274    public const LATIN_SMALL_LETTER_I_WITH_CARON                       = "\u{01D0}";
275    public const LATIN_CAPITAL_LETTER_O_WITH_CARON                     = "\u{01D1}";
276    public const LATIN_SMALL_LETTER_O_WITH_CARON                       = "\u{01D2}";
277    public const LATIN_CAPITAL_LETTER_U_WITH_CARON                     = "\u{01D3}";
278    public const LATIN_SMALL_LETTER_U_WITH_CARON                       = "\u{01D4}";
279    public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_MACRON      = "\u{01D5}";
280    public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_MACRON        = "\u{01D6}";
281    public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_ACUTE       = "\u{01D7}";
282    public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_ACUTE         = "\u{01D8}";
283    public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_CARON       = "\u{01D9}";
284    public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_CARON         = "\u{01DA}";
285    public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_GRAVE       = "\u{01DB}";
286    public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_GRAVE         = "\u{01DC}";
287    public const LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS_AND_MACRON      = "\u{01DE}";
288    public const LATIN_SMALL_LETTER_A_WITH_DIAERESIS_AND_MACRON        = "\u{01DF}";
289    public const LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE_AND_MACRON      = "\u{01E0}";
290    public const LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE_AND_MACRON        = "\u{01E1}";
291    public const LATIN_CAPITAL_LETTER_AE_WITH_MACRON                   = "\u{01E2}";
292    public const LATIN_SMALL_LETTER_AE_WITH_MACRON                     = "\u{01E3}";
293    public const LATIN_CAPITAL_LETTER_G_WITH_CARON                     = "\u{01E6}";
294    public const LATIN_SMALL_LETTER_G_WITH_CARON                       = "\u{01E7}";
295    public const LATIN_CAPITAL_LETTER_K_WITH_CARON                     = "\u{01E8}";
296    public const LATIN_SMALL_LETTER_K_WITH_CARON                       = "\u{01E9}";
297    public const LATIN_CAPITAL_LETTER_O_WITH_OGONEK                    = "\u{01EA}";
298    public const LATIN_SMALL_LETTER_O_WITH_OGONEK                      = "\u{01EB}";
299    public const LATIN_CAPITAL_LETTER_O_WITH_OGONEK_AND_MACRON         = "\u{01EC}";
300    public const LATIN_SMALL_LETTER_O_WITH_OGONEK_AND_MACRON           = "\u{01ED}";
301    public const LATIN_SMALL_LETTER_J_WITH_CARON                       = "\u{01F0}";
302    public const LATIN_CAPITAL_LETTER_G_WITH_ACUTE                     = "\u{01F4}";
303    public const LATIN_SMALL_LETTER_G_WITH_ACUTE                       = "\u{01F5}";
304    public const LATIN_CAPITAL_LETTER_N_WITH_GRAVE                     = "\u{01F8}";
305    public const LATIN_SMALL_LETTER_N_WITH_GRAVE                       = "\u{01F9}";
306    public const LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE_AND_ACUTE      = "\u{01FA}";
307    public const LATIN_SMALL_LETTER_A_WITH_RING_ABOVE_AND_ACUTE        = "\u{01FB}";
308    public const LATIN_CAPITAL_LETTER_AE_WITH_ACUTE                    = "\u{01FC}";
309    public const LATIN_SMALL_LETTER_AE_WITH_ACUTE                      = "\u{01FD}";
310    public const LATIN_CAPITAL_LETTER_O_WITH_STROKE_AND_ACUTE          = "\u{01FE}";
311    public const LATIN_SMALL_LETTER_O_WITH_STROKE_AND_ACUTE            = "\u{01FF}";
312    public const LATIN_CAPITAL_LETTER_S_WITH_COMMA_BELOW               = "\u{0218}";
313    public const LATIN_SMALL_LETTER_S_WITH_COMMA_BELOW                 = "\u{0219}";
314    public const LATIN_CAPITAL_LETTER_T_WITH_COMMA_BELOW               = "\u{021A}";
315    public const LATIN_SMALL_LETTER_T_WITH_COMMA_BELOW                 = "\u{021B}";
316    public const LATIN_CAPITAL_LETTER_H_WITH_CARON                     = "\u{021E}";
317    public const LATIN_SMALL_LETTER_H_WITH_CARON                       = "\u{021F}";
318    public const LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE                 = "\u{0226}";
319    public const LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE                   = "\u{0227}";
320    public const LATIN_CAPITAL_LETTER_E_WITH_CEDILLA                   = "\u{0228}";
321    public const LATIN_SMALL_LETTER_E_WITH_CEDILLA                     = "\u{0229}";
322    public const LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS_AND_MACRON      = "\u{022A}";
323    public const LATIN_SMALL_LETTER_O_WITH_DIAERESIS_AND_MACRON        = "\u{022B}";
324    public const LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_MACRON          = "\u{022C}";
325    public const LATIN_SMALL_LETTER_O_WITH_TILDE_AND_MACRON            = "\u{022D}";
326    public const LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE                 = "\u{022E}";
327    public const LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE                   = "\u{022F}";
328    public const LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE_AND_MACRON      = "\u{0230}";
329    public const LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE_AND_MACRON        = "\u{0231}";
330    public const LATIN_CAPITAL_LETTER_Y_WITH_MACRON                    = "\u{0232}";
331    public const LATIN_SMALL_LETTER_Y_WITH_MACRON                      = "\u{0233}";
332    public const MODIFIER_LETTER_PRIME                                 = "\u{02B9}";
333    public const MODIFIER_LETTER_DOUBLE_PRIME                          = "\u{02BA}";
334    public const MODIFIER_LETTER_TURNED_COMMA                          = "\u{02BB}";
335    public const MODIFIER_LETTER_APOSTROPHE                            = "\u{02BC}";
336    public const MODIFIER_LETTER_CIRCUMFLEX_ACCENT                     = "\u{02C6}";
337    public const CARON                                                 = "\u{02C7}";
338    public const BREVE                                                 = "\u{02D8}";
339    public const DOT_ABOVE                                             = "\u{02D9}";
340    public const RING_ABOVE                                            = "\u{02DA}";
341    public const OGONEK                                                = "\u{02DB}";
342    public const SMALL_TILDE                                           = "\u{02DC}";
343    public const DOUBLE_ACUTE_ACCENT                                   = "\u{02DD}";
344    public const COMBINING_GRAVE_ACCENT                                = "\u{0300}";
345    public const COMBINING_ACUTE_ACCENT                                = "\u{0301}";
346    public const COMBINING_CIRCUMFLEX_ACCENT                           = "\u{0302}";
347    public const COMBINING_TILDE                                       = "\u{0303}";
348    public const COMBINING_MACRON                                      = "\u{0304}";
349    public const COMBINING_OVERLINE                                    = "\u{0305}";
350    public const COMBINING_BREVE                                       = "\u{0306}";
351    public const COMBINING_DOT_ABOVE                                   = "\u{0307}";
352    public const COMBINING_DIAERESIS                                   = "\u{0308}";
353    public const COMBINING_HOOK_ABOVE                                  = "\u{0309}";
354    public const COMBINING_RING_ABOVE                                  = "\u{030A}";
355    public const COMBINING_DOUBLE_ACUTE_ACCENT                         = "\u{030B}";
356    public const COMBINING_CARON                                       = "\u{030C}";
357    public const COMBINING_CANDRABINDU                                 = "\u{0310}";
358    public const COMBINING_COMMA_ABOVE                                 = "\u{0313}";
359    public const COMBINING_COMMA_ABOVE_RIGHT                           = "\u{0315}";
360    public const COMBINING_HORN                                        = "\u{031B}";
361    public const COMBINING_LEFT_HALF_RING_BELOW                        = "\u{031C}";
362    public const COMBINING_DOT_BELOW                                   = "\u{0323}";
363    public const COMBINING_DIAERESIS_BELOW                             = "\u{0324}";
364    public const COMBINING_RING_BELOW                                  = "\u{0325}";
365    public const COMBINING_COMMA_BELOW                                 = "\u{0326}";
366    public const COMBINING_CEDILLA                                     = "\u{0327}";
367    public const COMBINING_OGONEK                                      = "\u{0328}";
368    public const COMBINING_BRIDGE_BELOW                                = "\u{032A}";
369    public const COMBINING_BREVE_BELOW                                 = "\u{032E}";
370    public const COMBINING_LOW_LINE                                    = "\u{0332}";
371    public const COMBINING_DOUBLE_LOW_LINE                             = "\u{0333}";
372    public const COMBINING_LONG_SOLIDUS_OVERLAY                        = "\u{0338}";
373    public const COMBINING_DOUBLE_TILDE                                = "\u{0360}";
374    public const COMBINING_DOUBLE_INVERTED_BREVE                       = "\u{0361}";
375    public const GREEK_CAPITAL_LETTER_GAMMA                            = "\u{0393}";
376    public const GREEK_CAPITAL_LETTER_THETA                            = "\u{0398}";
377    public const GREEK_CAPITAL_LETTER_SIGMA                            = "\u{03A3}";
378    public const GREEK_CAPITAL_LETTER_PHI                              = "\u{03A6}";
379    public const GREEK_CAPITAL_LETTER_OMEGA                            = "\u{03A9}";
380    public const GREEK_SMALL_LETTER_ALPHA                              = "\u{03B1}";
381    public const GREEK_SMALL_LETTER_DELTA                              = "\u{03B4}";
382    public const GREEK_SMALL_LETTER_EPSILON                            = "\u{03B5}";
383    public const GREEK_SMALL_LETTER_PI                                 = "\u{03C0}";
384    public const GREEK_SMALL_LETTER_SIGMA                              = "\u{03C3}";
385    public const GREEK_SMALL_LETTER_TAU                                = "\u{03C4}";
386    public const GREEK_SMALL_LETTER_PHI                                = "\u{03C6}";
387    public const CYRILLIC_CAPITAL_LETTER_IO                            = "\u{0401}";
388    public const CYRILLIC_CAPITAL_LETTER_DJE                           = "\u{0402}";
389    public const CYRILLIC_CAPITAL_LETTER_GJE                           = "\u{0403}";
390    public const CYRILLIC_CAPITAL_LETTER_UKRANIAN_IE                   = "\u{0404}";
391    public const CYRILLIC_CAPITAL_LETTER_DZE                           = "\u{0405}";
392    public const CYRILLIC_CAPITAL_LETTER_BYELORUSSIAN_UKRAINIAN_I      = "\u{0406}";
393    public const CYRILLIC_CAPITAL_LETTER_YI                            = "\u{0407}";
394    public const CYRILLIC_CAPITAL_LETTER_JE                            = "\u{0408}";
395    public const CYRILLIC_CAPITAL_LETTER_LJE                           = "\u{0409}";
396    public const CYRILLIC_CAPITAL_LETTER_NJE                           = "\u{040A}";
397    public const CYRILLIC_CAPITAL_LETTER_TSHE                          = "\u{040B}";
398    public const CYRILLIC_CAPITAL_LETTER_KJE                           = "\u{040C}";
399    public const CYRILLIC_CAPITAL_LETTER_SHORT_U                       = "\u{040E}";
400    public const CYRILLIC_CAPITAL_LETTER_DZHE                          = "\u{040F}";
401    public const CYRILLIC_CAPITAL_LETTER_A                             = "\u{0410}";
402    public const CYRILLIC_CAPITAL_LETTER_BE                            = "\u{0411}";
403    public const CYRILLIC_CAPITAL_LETTER_VE                            = "\u{0412}";
404    public const CYRILLIC_CAPITAL_LETTER_GHE                           = "\u{0413}";
405    public const CYRILLIC_CAPITAL_LETTER_DE                            = "\u{0414}";
406    public const CYRILLIC_CAPITAL_LETTER_IE                            = "\u{0415}";
407    public const CYRILLIC_CAPITAL_LETTER_ZHE                           = "\u{0416}";
408    public const CYRILLIC_CAPITAL_LETTER_ZE                            = "\u{0417}";
409    public const CYRILLIC_CAPITAL_LETTER_I                             = "\u{0418}";
410    public const CYRILLIC_CAPITAL_LETTER_SHORT_I                       = "\u{0419}";
411    public const CYRILLIC_CAPITAL_LETTER_KA                            = "\u{041A}";
412    public const CYRILLIC_CAPITAL_LETTER_EL                            = "\u{041B}";
413    public const CYRILLIC_CAPITAL_LETTER_EM                            = "\u{041C}";
414    public const CYRILLIC_CAPITAL_LETTER_EN                            = "\u{041D}";
415    public const CYRILLIC_CAPITAL_LETTER_O                             = "\u{041E}";
416    public const CYRILLIC_CAPITAL_LETTER_PE                            = "\u{041F}";
417    public const CYRILLIC_CAPITAL_LETTER_ER                            = "\u{0420}";
418    public const CYRILLIC_CAPITAL_LETTER_ES                            = "\u{0421}";
419    public const CYRILLIC_CAPITAL_LETTER_TE                            = "\u{0422}";
420    public const CYRILLIC_CAPITAL_LETTER_U                             = "\u{0423}";
421    public const CYRILLIC_CAPITAL_LETTER_EF                            = "\u{0424}";
422    public const CYRILLIC_CAPITAL_LETTER_HA                            = "\u{0425}";
423    public const CYRILLIC_CAPITAL_LETTER_TSE                           = "\u{0426}";
424    public const CYRILLIC_CAPITAL_LETTER_CHE                           = "\u{0427}";
425    public const CYRILLIC_CAPITAL_LETTER_SHA                           = "\u{0428}";
426    public const CYRILLIC_CAPITAL_LETTER_SHCHA                         = "\u{0429}";
427    public const CYRILLIC_CAPITAL_LETTER_HARD_SIGN                     = "\u{042A}";
428    public const CYRILLIC_CAPITAL_LETTER_YERU                          = "\u{042B}";
429    public const CYRILLIC_CAPITAL_LETTER_SOFT_SIGN                     = "\u{042C}";
430    public const CYRILLIC_CAPITAL_LETTER_E                             = "\u{042D}";
431    public const CYRILLIC_CAPITAL_LETTER_YU                            = "\u{042E}";
432    public const CYRILLIC_CAPITAL_LETTER_YA                            = "\u{042F}";
433    public const CYRILLIC_SMALL_LETTER_A                               = "\u{0430}";
434    public const CYRILLIC_SMALL_LETTER_BE                              = "\u{0431}";
435    public const CYRILLIC_SMALL_LETTER_VE                              = "\u{0432}";
436    public const CYRILLIC_SMALL_LETTER_GHE                             = "\u{0433}";
437    public const CYRILLIC_SMALL_LETTER_DE                              = "\u{0434}";
438    public const CYRILLIC_SMALL_LETTER_IE                              = "\u{0435}";
439    public const CYRILLIC_SMALL_LETTER_ZHE                             = "\u{0436}";
440    public const CYRILLIC_SMALL_LETTER_ZE                              = "\u{0437}";
441    public const CYRILLIC_SMALL_LETTER_I                               = "\u{0438}";
442    public const CYRILLIC_SMALL_LETTER_SHORT_I                         = "\u{0439}";
443    public const CYRILLIC_SMALL_LETTER_KA                              = "\u{043A}";
444    public const CYRILLIC_SMALL_LETTER_EL                              = "\u{043B}";
445    public const CYRILLIC_SMALL_LETTER_EM                              = "\u{043C}";
446    public const CYRILLIC_SMALL_LETTER_EN                              = "\u{043D}";
447    public const CYRILLIC_SMALL_LETTER_O                               = "\u{043E}";
448    public const CYRILLIC_SMALL_LETTER_PE                              = "\u{043F}";
449    public const CYRILLIC_SMALL_LETTER_ER                              = "\u{0440}";
450    public const CYRILLIC_SMALL_LETTER_ES                              = "\u{0441}";
451    public const CYRILLIC_SMALL_LETTER_TE                              = "\u{0442}";
452    public const CYRILLIC_SMALL_LETTER_U                               = "\u{0443}";
453    public const CYRILLIC_SMALL_LETTER_EF                              = "\u{0444}";
454    public const CYRILLIC_SMALL_LETTER_HA                              = "\u{0445}";
455    public const CYRILLIC_SMALL_LETTER_TSE                             = "\u{0446}";
456    public const CYRILLIC_SMALL_LETTER_CHE                             = "\u{0447}";
457    public const CYRILLIC_SMALL_LETTER_SHA                             = "\u{0448}";
458    public const CYRILLIC_SMALL_LETTER_SHCHA                           = "\u{0449}";
459    public const CYRILLIC_SMALL_LETTER_HARD_SIGN                       = "\u{044A}";
460    public const CYRILLIC_SMALL_LETTER_YERU                            = "\u{044B}";
461    public const CYRILLIC_SMALL_LETTER_SOFT_SIGN                       = "\u{044C}";
462    public const CYRILLIC_SMALL_LETTER_E                               = "\u{044D}";
463    public const CYRILLIC_SMALL_LETTER_YU                              = "\u{044E}";
464    public const CYRILLIC_SMALL_LETTER_YA                              = "\u{044F}";
465    public const CYRILLIC_SMALL_LETTER_IO                              = "\u{0451}";
466    public const CYRILLIC_SMALL_LETTER_DJE                             = "\u{0452}";
467    public const CYRILLIC_SMALL_LETTER_GJE                             = "\u{0453}";
468    public const CYRILLIC_SMALL_LETTER_UKRANIAN_IE                     = "\u{0454}";
469    public const CYRILLIC_SMALL_LETTER_DZE                             = "\u{0455}";
470    public const CYRILLIC_SMALL_LETTER_BYELORUSSIAN_UKRAINIAN_I        = "\u{0456}";
471    public const CYRILLIC_SMALL_LETTER_YI                              = "\u{0457}";
472    public const CYRILLIC_SMALL_LETTER_JE                              = "\u{0458}";
473    public const CYRILLIC_SMALL_LETTER_LJE                             = "\u{0459}";
474    public const CYRILLIC_SMALL_LETTER_NJE                             = "\u{045A}";
475    public const CYRILLIC_SMALL_LETTER_TSHE                            = "\u{045B}";
476    public const CYRILLIC_SMALL_LETTER_KJE                             = "\u{045C}";
477    public const CYRILLIC_SMALL_LETTER_SHORT_U                         = "\u{045E}";
478    public const CYRILLIC_SMALL_LETTER_DZHE                            = "\u{045F}";
479    public const CYRILLIC_CAPITAL_LETTER_GHE_WITH_UPTURN               = "\u{0490}";
480    public const CYRILLIC_SMALL_LETTER_GHE_WITH_UPTURN                 = "\u{0491}";
481    public const LATIN_CAPITAL_LETTER_A_WITH_RING_BELOW                = "\u{1E00}";
482    public const LATIN_SMALL_LETTER_A_WITH_RING_BELOW                  = "\u{1E01}";
483    public const LATIN_CAPITAL_LETTER_B_WITH_DOT_ABOVE                 = "\u{1E02}";
484    public const LATIN_SMALL_LETTER_B_WITH_DOT_ABOVE                   = "\u{1E03}";
485    public const LATIN_CAPITAL_LETTER_B_WITH_DOT_BELOW                 = "\u{1E04}";
486    public const LATIN_SMALL_LETTER_B_WITH_DOT_BELOW                   = "\u{1E05}";
487    public const LATIN_CAPITAL_LETTER_C_WITH_CEDILLA_AND_ACUTE         = "\u{1E08}";
488    public const LATIN_SMALL_LETTER_C_WITH_CEDILLA_AND_ACUTE           = "\u{1E09}";
489    public const LATIN_CAPITAL_LETTER_D_WITH_DOT_ABOVE                 = "\u{1E0A}";
490    public const LATIN_SMALL_LETTER_D_WITH_DOT_ABOVE                   = "\u{1E0B}";
491    public const LATIN_CAPITAL_LETTER_D_WITH_DOT_BELOW                 = "\u{1E0C}";
492    public const LATIN_SMALL_LETTER_D_WITH_DOT_BELOW                   = "\u{1E0D}";
493    public const LATIN_CAPITAL_LETTER_SHARP_S                          = "\u{1E9E}";
494    public const LATIN_CAPITAL_LETTER_D_WITH_CEDILLA                   = "\u{1E10}";
495    public const LATIN_SMALL_LETTER_D_WITH_CEDILLA                     = "\u{1E11}";
496    public const LATIN_CAPITAL_LETTER_E_WITH_MACRON_AND_GRAVE          = "\u{1E14}";
497    public const LATIN_SMALL_LETTER_E_WITH_MACRON_AND_GRAVE            = "\u{1E15}";
498    public const LATIN_CAPITAL_LETTER_E_WITH_MACRON_AND_ACUTE          = "\u{1E16}";
499    public const LATIN_SMALL_LETTER_E_WITH_MACRON_AND_ACUTE            = "\u{1E17}";
500    public const LATIN_CAPITAL_LETTER_E_WITH_CEDILLA_AND_BREVE         = "\u{1E1C}";
501    public const LATIN_SMALL_LETTER_E_WITH_CEDILLA_AND_BREVE           = "\u{1E1D}";
502    public const LATIN_CAPITAL_LETTER_F_WITH_DOT_ABOVE                 = "\u{1E1E}";
503    public const LATIN_SMALL_LETTER_F_WITH_DOT_ABOVE                   = "\u{1E1F}";
504    public const LATIN_CAPITAL_LETTER_G_WITH_MACRON                    = "\u{1E20}";
505    public const LATIN_SMALL_LETTER_G_WITH_MACRON                      = "\u{1E21}";
506    public const LATIN_CAPITAL_LETTER_H_WITH_DOT_ABOVE                 = "\u{1E22}";
507    public const LATIN_SMALL_LETTER_H_WITH_DOT_ABOVE                   = "\u{1E23}";
508    public const LATIN_CAPITAL_LETTER_H_WITH_DOT_BELOW                 = "\u{1E24}";
509    public const LATIN_SMALL_LETTER_H_WITH_DOT_BELOW                   = "\u{1E25}";
510    public const LATIN_CAPITAL_LETTER_H_WITH_DIAERESIS                 = "\u{1E26}";
511    public const LATIN_SMALL_LETTER_H_WITH_DIAERESIS                   = "\u{1E27}";
512    public const LATIN_CAPITAL_LETTER_H_WITH_CEDILLA                   = "\u{1E28}";
513    public const LATIN_SMALL_LETTER_H_WITH_CEDILLA                     = "\u{1E29}";
514    public const LATIN_CAPITAL_LETTER_H_WITH_BREVE_BELOW               = "\u{1E2A}";
515    public const LATIN_SMALL_LETTER_H_WITH_BREVE_BELOW                 = "\u{1E2B}";
516    public const LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS_AND_ACUTE       = "\u{1E2E}";
517    public const LATIN_SMALL_LETTER_I_WITH_DIAERESIS_AND_ACUTE         = "\u{1E2F}";
518    public const LATIN_CAPITAL_LETTER_K_WITH_ACUTE                     = "\u{1E30}";
519    public const LATIN_SMALL_LETTER_K_WITH_ACUTE                       = "\u{1E31}";
520    public const LATIN_CAPITAL_LETTER_K_WITH_DOT_BELOW                 = "\u{1E32}";
521    public const LATIN_SMALL_LETTER_K_WITH_DOT_BELOW                   = "\u{1E33}";
522    public const LATIN_CAPITAL_LETTER_L_WITH_DOT_BELOW                 = "\u{1E36}";
523    public const LATIN_SMALL_LETTER_L_WITH_DOT_BELOW                   = "\u{1E37}";
524    public const LATIN_CAPITAL_LETTER_L_WITH_DOT_BELOW_AND_MACRON      = "\u{1E38}";
525    public const LATIN_SMALL_LETTER_L_WITH_DOT_BELOW_AND_MACRON        = "\u{1E39}";
526    public const LATIN_CAPITAL_LETTER_M_WITH_ACUTE                     = "\u{1E3E}";
527    public const LATIN_SMALL_LETTER_M_WITH_ACUTE                       = "\u{1E3F}";
528    public const LATIN_CAPITAL_LETTER_M_WITH_DOT_ABOVE                 = "\u{1E40}";
529    public const LATIN_SMALL_LETTER_M_WITH_DOT_ABOVE                   = "\u{1E41}";
530    public const LATIN_CAPITAL_LETTER_M_WITH_DOT_BELOW                 = "\u{1E42}";
531    public const LATIN_SMALL_LETTER_M_WITH_DOT_BELOW                   = "\u{1E43}";
532    public const LATIN_CAPITAL_LETTER_N_WITH_DOT_ABOVE                 = "\u{1E44}";
533    public const LATIN_SMALL_LETTER_N_WITH_DOT_ABOVE                   = "\u{1E45}";
534    public const LATIN_CAPITAL_LETTER_N_WITH_DOT_BELOW                 = "\u{1E46}";
535    public const LATIN_SMALL_LETTER_N_WITH_DOT_BELOW                   = "\u{1E47}";
536    public const LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_ACUTE           = "\u{1E4C}";
537    public const LATIN_SMALL_LETTER_O_WITH_TILDE_AND_ACUTE             = "\u{1E4D}";
538    public const LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_DIAERESIS       = "\u{1E4E}";
539    public const LATIN_SMALL_LETTER_O_WITH_TILDE_AND_DIAERESIS         = "\u{1E4F}";
540    public const LATIN_CAPITAL_LETTER_O_WITH_MACRON_AND_GRAVE          = "\u{1E50}";
541    public const LATIN_SMALL_LETTER_O_WITH_MACRON_AND_GRAVE            = "\u{1E51}";
542    public const LATIN_CAPITAL_LETTER_O_WITH_MACRON_AND_ACUTE          = "\u{1E52}";
543    public const LATIN_SMALL_LETTER_O_WITH_MACRON_AND_ACUTE            = "\u{1E53}";
544    public const LATIN_CAPITAL_LETTER_P_WITH_ACUTE                     = "\u{1E54}";
545    public const LATIN_SMALL_LETTER_P_WITH_ACUTE                       = "\u{1E55}";
546    public const LATIN_CAPITAL_LETTER_P_WITH_DOT_ABOVE                 = "\u{1E56}";
547    public const LATIN_SMALL_LETTER_P_WITH_DOT_ABOVE                   = "\u{1E57}";
548    public const LATIN_CAPITAL_LETTER_R_WITH_DOT_ABOVE                 = "\u{1E58}";
549    public const LATIN_SMALL_LETTER_R_WITH_DOT_ABOVE                   = "\u{1E59}";
550    public const LATIN_CAPITAL_LETTER_R_WITH_DOT_BELOW                 = "\u{1E5A}";
551    public const LATIN_SMALL_LETTER_R_WITH_DOT_BELOW                   = "\u{1E5B}";
552    public const LATIN_CAPITAL_LETTER_R_WITH_DOT_BELOW_AND_MACRON      = "\u{1E5C}";
553    public const LATIN_SMALL_LETTER_R_WITH_DOT_BELOW_AND_MACRON        = "\u{1E5D}";
554    public const LATIN_CAPITAL_LETTER_S_WITH_DOT_ABOVE                 = "\u{1E60}";
555    public const LATIN_SMALL_LETTER_S_WITH_DOT_ABOVE                   = "\u{1E61}";
556    public const LATIN_CAPITAL_LETTER_S_WITH_DOT_BELOW                 = "\u{1E62}";
557    public const LATIN_SMALL_LETTER_S_WITH_DOT_BELOW                   = "\u{1E63}";
558    public const LATIN_CAPITAL_LETTER_S_WITH_ACUTE_AND_DOT_ABOVE       = "\u{1E64}";
559    public const LATIN_SMALL_LETTER_S_WITH_ACUTE_AND_DOT_ABOVE         = "\u{1E65}";
560    public const LATIN_CAPITAL_LETTER_S_WITH_CARON_AND_DOT_ABOVE       = "\u{1E66}";
561    public const LATIN_SMALL_LETTER_S_WITH_CARON_AND_DOT_ABOVE         = "\u{1E67}";
562    public const LATIN_CAPITAL_LETTER_S_WITH_DOT_BELOW_AND_DOT_ABOVE   = "\u{1E68}";
563    public const LATIN_SMALL_LETTER_S_WITH_DOT_BELOW_AND_DOT_ABOVE     = "\u{1E69}";
564    public const LATIN_CAPITAL_LETTER_T_WITH_DOT_ABOVE                 = "\u{1E6A}";
565    public const LATIN_SMALL_LETTER_T_WITH_DOT_ABOVE                   = "\u{1E6B}";
566    public const LATIN_CAPITAL_LETTER_T_WITH_DOT_BELOW                 = "\u{1E6C}";
567    public const LATIN_SMALL_LETTER_T_WITH_DOT_BELOW                   = "\u{1E6D}";
568    public const LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_BELOW           = "\u{1E72}";
569    public const LATIN_SMALL_LETTER_U_WITH_DIAERESIS_BELOW             = "\u{1E73}";
570    public const LATIN_CAPITAL_LETTER_U_WITH_TILDE_AND_ACUTE           = "\u{1E78}";
571    public const LATIN_SMALL_LETTER_U_WITH_TILDE_AND_ACUTE             = "\u{1E79}";
572    public const LATIN_CAPITAL_LETTER_U_WITH_MACRON_AND_DIAERESIS      = "\u{1E7A}";
573    public const LATIN_SMALL_LETTER_U_WITH_MACRON_AND_DIAERESIS        = "\u{1E7B}";
574    public const LATIN_CAPITAL_LETTER_V_WITH_TILDE                     = "\u{1E7C}";
575    public const LATIN_SMALL_LETTER_V_WITH_TILDE                       = "\u{1E7D}";
576    public const LATIN_CAPITAL_LETTER_V_WITH_DOT_BELOW                 = "\u{1E7E}";
577    public const LATIN_SMALL_LETTER_V_WITH_DOT_BELOW                   = "\u{1E7F}";
578    public const LATIN_CAPITAL_LETTER_W_WITH_GRAVE                     = "\u{1E80}";
579    public const LATIN_SMALL_LETTER_W_WITH_GRAVE                       = "\u{1E81}";
580    public const LATIN_CAPITAL_LETTER_W_WITH_ACUTE                     = "\u{1E82}";
581    public const LATIN_SMALL_LETTER_W_WITH_ACUTE                       = "\u{1E83}";
582    public const LATIN_CAPITAL_LETTER_W_WITH_DIAERESIS                 = "\u{1E84}";
583    public const LATIN_SMALL_LETTER_W_WITH_DIAERESIS                   = "\u{1E85}";
584    public const LATIN_CAPITAL_LETTER_W_WITH_DOT_ABOVE                 = "\u{1E86}";
585    public const LATIN_SMALL_LETTER_W_WITH_DOT_ABOVE                   = "\u{1E87}";
586    public const LATIN_CAPITAL_LETTER_W_WITH_DOT_BELOW                 = "\u{1E88}";
587    public const LATIN_SMALL_LETTER_W_WITH_DOT_BELOW                   = "\u{1E89}";
588    public const LATIN_CAPITAL_LETTER_X_WITH_DOT_ABOVE                 = "\u{1E8A}";
589    public const LATIN_SMALL_LETTER_X_WITH_DOT_ABOVE                   = "\u{1E8B}";
590    public const LATIN_CAPITAL_LETTER_X_WITH_DIAERESIS                 = "\u{1E8C}";
591    public const LATIN_SMALL_LETTER_X_WITH_DIAERESIS                   = "\u{1E8D}";
592    public const LATIN_CAPITAL_LETTER_Y_WITH_DOT_ABOVE                 = "\u{1E8E}";
593    public const LATIN_SMALL_LETTER_Y_WITH_DOT_ABOVE                   = "\u{1E8F}";
594    public const LATIN_CAPITAL_LETTER_Z_WITH_CIRCUMFLEX                = "\u{1E90}";
595    public const LATIN_SMALL_LETTER_Z_WITH_CIRCUMFLEX                  = "\u{1E91}";
596    public const LATIN_CAPITAL_LETTER_Z_WITH_DOT_BELOW                 = "\u{1E92}";
597    public const LATIN_SMALL_LETTER_Z_WITH_DOT_BELOW                   = "\u{1E93}";
598    public const LATIN_SMALL_LETTER_T_WITH_DIAERESIS                   = "\u{1E97}";
599    public const LATIN_SMALL_LETTER_W_WITH_RING_ABOVE                  = "\u{1E98}";
600    public const LATIN_SMALL_LETTER_Y_WITH_RING_ABOVE                  = "\u{1E99}";
601    public const LATIN_CAPITAL_LETTER_A_WITH_DOT_BELOW                 = "\u{1EA0}";
602    public const LATIN_SMALL_LETTER_A_WITH_DOT_BELOW                   = "\u{1EA1}";
603    public const LATIN_CAPITAL_LETTER_A_WITH_HOOK_ABOVE                = "\u{1EA2}";
604    public const LATIN_SMALL_LETTER_A_WITH_HOOK_ABOVE                  = "\u{1EA3}";
605    public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_ACUTE      = "\u{1EA4}";
606    public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_ACUTE        = "\u{1EA5}";
607    public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_GRAVE      = "\u{1EA6}";
608    public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_GRAVE        = "\u{1EA7}";
609    public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_HOOK_ABOVE = "\u{1EA8}";
610    public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_HOOK_ABOVE   = "\u{1EA9}";
611    public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_TILDE      = "\u{1EAA}";
612    public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_TILDE        = "\u{1EAB}";
613    public const LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_DOT_BELOW  = "\u{1EAC}";
614    public const LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_DOT_BELOW    = "\u{1EAD}";
615    public const LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_ACUTE           = "\u{1EAE}";
616    public const LATIN_SMALL_LETTER_A_WITH_BREVE_AND_ACUTE             = "\u{1EAF}";
617    public const LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_GRAVE           = "\u{1EB0}";
618    public const LATIN_SMALL_LETTER_A_WITH_BREVE_AND_GRAVE             = "\u{1EB1}";
619    public const LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_HOOK_ABOVE      = "\u{1EB2}";
620    public const LATIN_SMALL_LETTER_A_WITH_BREVE_AND_HOOK_ABOVE        = "\u{1EB3}";
621    public const LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_TILDE           = "\u{1EB4}";
622    public const LATIN_SMALL_LETTER_A_WITH_BREVE_AND_TILDE             = "\u{1EB5}";
623    public const LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_DOT_BELOW       = "\u{1EB6}";
624    public const LATIN_SMALL_LETTER_A_WITH_BREVE_AND_DOT_BELOW         = "\u{1EB7}";
625    public const LATIN_CAPITAL_LETTER_E_WITH_DOT_BELOW                 = "\u{1EB8}";
626    public const LATIN_SMALL_LETTER_E_WITH_DOT_BELOW                   = "\u{1EB9}";
627    public const LATIN_CAPITAL_LETTER_E_WITH_HOOK_ABOVE                = "\u{1EBA}";
628    public const LATIN_SMALL_LETTER_E_WITH_HOOK_ABOVE                  = "\u{1EBB}";
629    public const LATIN_CAPITAL_LETTER_E_WITH_TILDE                     = "\u{1EBC}";
630    public const LATIN_SMALL_LETTER_E_WITH_TILDE                       = "\u{1EBD}";
631    public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_ACUTE      = "\u{1EBE}";
632    public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_ACUTE        = "\u{1EBF}";
633    public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_GRAVE      = "\u{1EC0}";
634    public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_GRAVE        = "\u{1EC1}";
635    public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_HOOK_ABOVE = "\u{1EC2}";
636    public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_HOOK_ABOVE   = "\u{1EC3}";
637    public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_TILDE      = "\u{1EC4}";
638    public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_TILDE        = "\u{1EC5}";
639    public const LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_DOT_BELOW  = "\u{1EC6}";
640    public const LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_DOT_BELOW    = "\u{1EC7}";
641    public const LATIN_CAPITAL_LETTER_I_WITH_HOOK_ABOVE                = "\u{1EC8}";
642    public const LATIN_SMALL_LETTER_I_WITH_HOOK_ABOVE                  = "\u{1EC9}";
643    public const LATIN_CAPITAL_LETTER_I_WITH_DOT_BELOW                 = "\u{1ECA}";
644    public const LATIN_SMALL_LETTER_I_WITH_DOT_BELOW                   = "\u{1ECB}";
645    public const LATIN_CAPITAL_LETTER_O_WITH_DOT_BELOW                 = "\u{1ECC}";
646    public const LATIN_SMALL_LETTER_O_WITH_DOT_BELOW                   = "\u{1ECD}";
647    public const LATIN_CAPITAL_LETTER_O_WITH_HOOK_ABOVE                = "\u{1ECE}";
648    public const LATIN_SMALL_LETTER_O_WITH_HOOK_ABOVE                  = "\u{1ECF}";
649    public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_ACUTE      = "\u{1ED0}";
650    public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_ACUTE        = "\u{1ED1}";
651    public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_GRAVE      = "\u{1ED2}";
652    public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_GRAVE        = "\u{1ED3}";
653    public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_HOOK_ABOVE = "\u{1ED4}";
654    public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_HOOK_ABOVE   = "\u{1ED5}";
655    public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_TILDE      = "\u{1ED6}";
656    public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_TILDE        = "\u{1ED7}";
657    public const LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_DOT_BELOW  = "\u{1ED8}";
658    public const LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_DOT_BELOW    = "\u{1ED9}";
659    public const LATIN_CAPITAL_LETTER_U_WITH_DOT_BELOW                 = "\u{1EE4}";
660    public const LATIN_SMALL_LETTER_U_WITH_DOT_BELOW                   = "\u{1EE5}";
661    public const LATIN_CAPITAL_LETTER_U_WITH_HOOK_ABOVE                = "\u{1EE6}";
662    public const LATIN_SMALL_LETTER_U_WITH_HOOK_ABOVE                  = "\u{1EE7}";
663    public const LATIN_CAPITAL_LETTER_Y_WITH_GRAVE                     = "\u{1EF2}";
664    public const LATIN_SMALL_LETTER_Y_WITH_GRAVE                       = "\u{1EF3}";
665    public const LATIN_CAPITAL_LETTER_Y_WITH_DOT_BELOW                 = "\u{1EF4}";
666    public const LATIN_SMALL_LETTER_Y_WITH_DOT_BELOW                   = "\u{1EF5}";
667    public const LATIN_CAPITAL_LETTER_Y_WITH_HOOK_ABOVE                = "\u{1EF6}";
668    public const LATIN_SMALL_LETTER_Y_WITH_HOOK_ABOVE                  = "\u{1EF7}";
669    public const LATIN_CAPITAL_LETTER_Y_WITH_TILDE                     = "\u{1EF8}";
670    public const LATIN_SMALL_LETTER_Y_WITH_TILDE                       = "\u{1EF9}";
671    public const ZERO_WIDTH_NON_JOINER                                 = "\u{200C}";
672    public const ZERO_WIDTH_JOINER                                     = "\u{200D}";
673    public const EN_DASH                                               = "\u{2013}";
674    public const EM_DASH                                               = "\u{2014}";
675    public const DOUBLE_LOW_LINE                                       = "\u{2017}";
676    public const LEFT_SINGLE_QUOTATION_MARK                            = "\u{2018}";
677    public const RIGHT_SINGLE_QUOTATION_MARK                           = "\u{2019}";
678    public const SINGLE_LOW_9_QUOTATION_MARK                           = "\u{201A}";
679    public const LEFT_DOUBLE_QUOTATION_MARK                            = "\u{201C}";
680    public const RIGHT_DOUBLE_QUOTATION_MARK                           = "\u{201D}";
681    public const DOUBLE_LOW_9_QUOTATION_MARK                           = "\u{201E}";
682    public const DAGGER                                                = "\u{2020}";
683    public const DOUBLE_DAGGER                                         = "\u{2021}";
684    public const BULLET                                                = "\u{2022}";
685    public const HORIZONTAL_ELLIPSIS                                   = "\u{2026}";
686    public const PER_MILLE_SIGN                                        = "\u{2030}";
687    public const SINGLE_LEFT_POINTING_ANGLE_QUOTATION_MARK             = "\u{2039}";
688    public const SINGLE_RIGHT_POINTING_ANGLE_QUOTATION_MARK            = "\u{203A}";
689    public const FRACTION_SLASH                                        = "\u{2044}";
690    public const SUPERSCRIPT_LATIN_SMALL_LETTER_N                      = "\u{207F}";
691    public const PESETA_SIGN                                           = "\u{20A7}";
692    public const EURO_SIGN                                             = "\u{20AC}";
693    public const SCRIPT_SMALL_L                                        = "\u{2113}";
694    public const NUMERO_SIGN                                           = "\u{2116}";
695    public const SOUND_RECORDING_COPYRIGHT                             = "\u{2117}";
696    public const TRADE_MARK_SIGN                                       = "\u{2122}";
697    public const PARTIAL_DIFFERENTIAL                                  = "\u{2202}";
698    public const INCREMENT                                             = "\u{2206}";
699    public const N_ARY_PRODUCT                                         = "\u{220F}";
700    public const N_ARY_SUMMATION                                       = "\u{2211}";
701    public const BULLET_OPERATOR                                       = "\u{2219}";
702    public const SQUARE_ROOT                                           = "\u{221A}";
703    public const INFINITY                                              = "\u{221E}";
704    public const INTERSECTION                                          = "\u{2229}";
705    public const INTEGRAL                                              = "\u{222B}";
706    public const ALMOST_EQUAL_TO                                       = "\u{2248}";
707    public const NOT_EQUAL_TO                                          = "\u{2260}";
708    public const IDENTICAL_TO                                          = "\u{2261}";
709    public const LESS_THAN_OR_EQUAL_TO                                 = "\u{2264}";
710    public const GREATER_THAN_OR_EQUAL_TO                              = "\u{2265}";
711    public const REVERSED_NOT_SIGN                                     = "\u{2310}";
712    public const TOP_HALF_INTEGRAL                                     = "\u{2320}";
713    public const BOTTOM_HALF_INTEGRAL                                  = "\u{2321}";
714    public const BOX_DRAWINGS_LIGHT_HORIZONTAL                         = "\u{2500}";
715    public const BOX_DRAWINGS_LIGHT_VERTICAL                           = "\u{2502}";
716    public const BOX_DRAWINGS_LIGHT_DOWN_AND_RIGHT                     = "\u{250C}";
717    public const BOX_DRAWINGS_LIGHT_DOWN_AND_LEFT                      = "\u{2510}";
718    public const BOX_DRAWINGS_LIGHT_UP_AND_LEFT                        = "\u{2518}";
719    public const BOX_DRAWINGS_LIGHT_UP_AND_RIGHT                       = "\u{2514}";
720    public const BOX_DRAWINGS_LIGHT_VERTICAL_AND_RIGHT                 = "\u{251C}";
721    public const BOX_DRAWINGS_LIGHT_VERTICAL_AND_LEFT                  = "\u{2524}";
722    public const BOX_DRAWINGS_LIGHT_DOWN_AND_HORIZONTAL                = "\u{252C}";
723    public const BOX_DRAWINGS_LIGHT_UP_AND_HORIZONTAL                  = "\u{2534}";
724    public const BOX_DRAWINGS_LIGHT_VERTICAL_AND_HORIZONTAL            = "\u{253C}";
725    public const BOX_DRAWINGS_DOUBLE_HORIZONTAL                        = "\u{2550}";
726    public const BOX_DRAWINGS_DOUBLE_VERTICAL                          = "\u{2551}";
727    public const BOX_DRAWINGS_DOWN_SINGLE_AND_RIGHT_DOUBLE             = "\u{2552}";
728    public const BOX_DRAWINGS_DOWN_DOUBLE_AND_RIGHT_SINGLE             = "\u{2553}";
729    public const BOX_DRAWINGS_DOUBLE_DOWN_AND_RIGHT                    = "\u{2554}";
730    public const BOX_DRAWINGS_DOWN_SINGLE_AND_LEFT_DOUBLE              = "\u{2555}";
731    public const BOX_DRAWINGS_DOWN_DOUBLE_AND_LEFT_SINGLE              = "\u{2556}";
732    public const BOX_DRAWINGS_DOUBLE_DOWN_AND_LEFT                     = "\u{2557}";
733    public const BOX_DRAWINGS_UP_SINGLE_AND_RIGHT_DOUBLE               = "\u{2558}";
734    public const BOX_DRAWINGS_UP_DOUBLE_AND_RIGHT_SINGLE               = "\u{2559}";
735    public const BOX_DRAWINGS_DOUBLE_UP_AND_RIGHT                      = "\u{255A}";
736    public const BOX_DRAWINGS_UP_SINGLE_AND_LEFT_DOUBLE                = "\u{255B}";
737    public const BOX_DRAWINGS_UP_DOUBLE_AND_LEFT_SINGLE                = "\u{255C}";
738    public const BOX_DRAWINGS_DOUBLE_UP_AND_LEFT                       = "\u{255D}";
739    public const BOX_DRAWINGS_VERTICAL_SINGLE_AND_RIGHT_DOUBLE         = "\u{255E}";
740    public const BOX_DRAWINGS_VERTICAL_DOUBLE_AND_RIGHT_SINGLE         = "\u{255F}";
741    public const BOX_DRAWINGS_DOUBLE_VERTICAL_AND_RIGHT                = "\u{2560}";
742    public const BOX_DRAWINGS_VERTICAL_SINGLE_AND_LEFT_DOUBLE          = "\u{2561}";
743    public const BOX_DRAWINGS_VERTICAL_DOUBLE_AND_LEFT_SINGLE          = "\u{2562}";
744    public const BOX_DRAWINGS_DOUBLE_VERTICAL_AND_LEFT                 = "\u{2563}";
745    public const BOX_DRAWINGS_DOWN_SINGLE_AND_HORIZONTAL_DOUBLE        = "\u{2564}";
746    public const BOX_DRAWINGS_DOWN_DOUBLE_AND_HORIZONTAL_SINGLE        = "\u{2565}";
747    public const BOX_DRAWINGS_DOUBLE_DOWN_AND_HORIZONTAL               = "\u{2566}";
748    public const BOX_DRAWINGS_UP_SINGLE_AND_HORIZONTAL_DOUBLE          = "\u{2567}";
749    public const BOX_DRAWINGS_UP_DOUBLE_AND_HORIZONTAL_SINGLE          = "\u{2568}";
750    public const BOX_DRAWINGS_BOX_DRAWINGS_DOUBLE_UP_AND_HORIZONTAL    = "\u{2569}";
751    public const BOX_DRAWINGS_VERTICAL_SINGLE_AND_HORIZONTAL_DOUBLE    = "\u{256A}";
752    public const BOX_DRAWINGS_VERTICAL_DOUBLE_AND_HORIZONTAL_SINGLE    = "\u{256B}";
753    public const BOX_DRAWINGS_DOUBLE_VERTICAL_AND_HORIZONTAL           = "\u{256C}";
754    public const UPPER_HALF_BLOCK                                      = "\u{2580}";
755    public const LOWER_HALF_BLOCK                                      = "\u{2584}";
756    public const FULL_BLOCK                                            = "\u{2588}";
757    public const LEFT_HALF_BLOCK                                       = "\u{258C}";
758    public const RIGHT_HALF_BLOCK                                      = "\u{2590}";
759    public const LIGHT_SHADE                                           = "\u{2591}";
760    public const MEDIUM_SHADE                                          = "\u{2592}";
761    public const DARK_SHADE                                            = "\u{2593}";
762    public const BLACK_SQUARE                                          = "\u{25A0}";
763    public const WHITE_SQUARE                                          = "\u{25A1}";
764    public const LOZENGE                                               = "\u{25CA}";
765    public const MUSIC_FLAT_SIGN                                       = "\u{266D}";
766    public const MUSIC_SHARP_SIGN                                      = "\u{266F}";
767    public const LATIN_SMALL_LIGATURE_FI                               = "\u{FB01}";
768    public const LATIN_SMALL_LIGATURE_FL                               = "\u{FB02}";
769    public const BYTE_ORDER_MARK                                       = "\u{FEFF}";
770    public const REPLACEMENT_CHARACTER                                 = "\u{FFFD}";
771
772    public const COMPOSED_CHARACTERS = [
773        'A' . self::COMBINING_ACUTE_ACCENT      => self::LATIN_CAPITAL_LETTER_A_WITH_ACUTE,
774        'A' . self::COMBINING_CIRCUMFLEX_ACCENT => self::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX,
775        'A' . self::COMBINING_DIAERESIS         => self::LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS,
776        'A' . self::COMBINING_GRAVE_ACCENT      => self::LATIN_CAPITAL_LETTER_A_WITH_GRAVE,
777        'A' . self::COMBINING_RING_ABOVE        => self::LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE,
778        'A' . self::COMBINING_TILDE             => self::LATIN_CAPITAL_LETTER_A_WITH_TILDE,
779        'C' . self::COMBINING_CEDILLA           => self::LATIN_CAPITAL_LETTER_C_WITH_CEDILLA,
780        'E' . self::COMBINING_ACUTE_ACCENT      => self::LATIN_CAPITAL_LETTER_E_WITH_ACUTE,
781        'E' . self::COMBINING_DIAERESIS         => self::LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS,
782        'E' . self::COMBINING_CIRCUMFLEX_ACCENT => self::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX,
783        'E' . self::COMBINING_GRAVE_ACCENT      => self::LATIN_CAPITAL_LETTER_E_WITH_GRAVE,
784        'a' . self::COMBINING_CIRCUMFLEX_ACCENT => self::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX,
785        'a' . self::COMBINING_DIAERESIS         => self::LATIN_SMALL_LETTER_A_WITH_DIAERESIS,
786        'e' . self::COMBINING_ACUTE_ACCENT      => self::LATIN_SMALL_LETTER_E_WITH_ACUTE,
787        'u' . self::COMBINING_DIAERESIS         => self::LATIN_SMALL_LETTER_U_WITH_DIAERESIS,
788    ];
789
790    /**
791     * Convert text from (potentially invalid) UTF-8 to UTF-8.
792     *
793     * @param string $text
794     *
795     * @return string
796     */
797    public function fromUtf8(string $text): string
798    {
799        if (preg_match('//u', $text) === false) {
800            // Not UTF8?
801            mb_substitute_character(0xFFFD);
802
803            return mb_convert_encoding($text, 'UTF-8', 'UTF-8');
804        }
805
806        return $text;
807    }
808
809    /**
810     * Convert text from (potentially invalid) UTF-8 to UTF-8.
811     *
812     * @param string $text
813     *
814     * @return string
815     */
816    public function toUtf8(string $text): string
817    {
818        return $this->fromUtf8($text);
819    }
820
821    /**
822     * Create a UTF8 character from a code.
823     *
824     * @param int $code
825     *
826     * @return string
827     */
828    public static function chr(int $code): string
829    {
830        if ($code < 0 || $code > 0x1FFFFF) {
831            throw new InvalidArgumentException((string)$code);
832        }
833
834        if ($code <= 0x7F) {
835            return chr($code);
836        }
837
838        if ($code <= 0x7FF) {
839            return
840                chr(($code >> 6) + 0xC0) .
841                chr(($code & 0x3F) + 0x80);
842        }
843
844        if ($code <= 0xFFFF) {
845            return
846                chr(($code >> 12) + 0xE0) .
847                chr((($code >> 6) & 0x3F) + 0x80) .
848                chr(($code & 0x3F) + 0x80);
849        }
850
851        return
852            chr(($code >> 18) + 0xF0) .
853            chr((($code >> 12) & 0x3F) + 0x80) .
854            chr((($code >> 6) & 0x3F) + 0x80) .
855            chr(($code & 0x3F) + 0x80);
856    }
857}
858