1<?php 2 3/** 4 * webtrees: online genealogy 5 * Copyright (C) 2023 webtrees development team 6 * This program is free software: you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation, either version 3 of the License, or 9 * (at your option) any later version. 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 13 * GNU General Public License for more details. 14 * You should have received a copy of the GNU General Public License 15 * along with this program. If not, see <https://www.gnu.org/licenses/>. 16 */ 17 18declare(strict_types=1); 19 20namespace Fisharebest\Webtrees\Encodings; 21 22use function preg_replace; 23use function strtr; 24 25/** 26 * Convert between UTF-8 and ANSEL encoding. 27 * 28 * ANSEL is the common name for the MARC-21 encoding, also known as Z39.47, which 29 * has a number of editions. These are denoted by a year suffix. 30 * 31 * The GEDCOM 5.5.1 specification (1999-10-02) specifies the Z39.47-1985 edition. 32 * It adds Es Zett (ß) at CF. 33 * 34 * According to wikipedia, other non-standard characters are also added. 35 * 36 * HEX Unicode Glyph Description 37 * BE 25A1 □ Empty box 38 * BF 25A0 ■ Black box 39 * CD 0065 e Midline e 40 * CE 006F o Midline o 41 * CF 00DF ß Es Zett 42 * FC 0338 / Combining slash 43 * 44 * @link https://en.wikipedia.org/wiki/ANSEL 45 * 46 * The MARC-21 specification has added a number of additional characters since 47 * the 1985 edition. 48 * 49 * HEX Unicode Glyph Description 50 * 88 0098 Start of string 51 * 89 009C String terminator 52 * 8D 200D Zero width joiner 53 * 8E 200C Zero width non-joiner 54 * A7 CAB9 ʹ Single prime 55 * AC C6AF Ơ LATIN CAPITAL LETTER O WITH HORN 56 * AD C6AF Ư LATIN CAPITAL LETTER U WITH HORN 57 * B7 CABA ʺ Double prime 58 * BC C6A1 ơ LATIN SMALL LETTER O WITH HORN 59 * BD C6B0 ư LATIN SMALL LETTER U WITH HORN 60 * C0 C2B0 ° Degree sign 61 * C1 E28493 ℓ Script small L 62 * C2 E28497 ℗ Sound recording copyright 63 * C4 E282AC ♯ Music sharp sign 64 * C7 00DF ß Es Zett 65 * C8 20AC € Euro sign 66 * E0 0309 Hook above 67 * EB 0361 Breve (first part / double) 68 * EC 0361 Breve (second part) 69 * EF 0310 Candrabindu 70 * F2 0323 Low dot 71 * F3 0324 Diaeresis below 72 * F4 0325 Ring below 73 * F5 0333 Double underline 74 * F7 0332 Underline 75 * F8 031C Comma below 76 * F9 032E Breve below 77 * FA 0360 Double tilde (first part / double). 78 * FB 0360 Double tilde (second part). 79 * FF 0338 Slash 80 * 81 * @link https://memory.loc.gov/diglib/codetables/45.html 82 * 83 * Note that this means we can expect two different representations of Es Zett. 84 * 85 * There are two multi-part diacritics. There are two ways to represent these. 86 * 87 * ANSEL | UTF-8 | UTF-8 (prefered) 88 * ------------+---------------+----------------- 89 * FA x FB y | x FE22 y FE23 | x 0360 y 90 * EB x EC y | y FE20 y FE21 | x 0361 y 91 */ 92class ANSEL extends AbstractEncoding 93{ 94 public const string NAME = 'ANSEL'; 95 96 protected const array TO_UTF8 = [ 97 "\x80" => UTF8::REPLACEMENT_CHARACTER, 98 "\x81" => UTF8::REPLACEMENT_CHARACTER, 99 "\x82" => UTF8::REPLACEMENT_CHARACTER, 100 "\x83" => UTF8::REPLACEMENT_CHARACTER, 101 "\x84" => UTF8::REPLACEMENT_CHARACTER, 102 "\x85" => UTF8::REPLACEMENT_CHARACTER, 103 "\x86" => UTF8::REPLACEMENT_CHARACTER, 104 "\x87" => UTF8::REPLACEMENT_CHARACTER, 105 "\x88" => UTF8::START_OF_STRING, 106 "\x89" => UTF8::STRING_TERMINATOR, 107 "\x8A" => UTF8::REPLACEMENT_CHARACTER, 108 "\x8B" => UTF8::REPLACEMENT_CHARACTER, 109 "\x8C" => UTF8::REPLACEMENT_CHARACTER, 110 "\x8D" => UTF8::ZERO_WIDTH_JOINER, 111 "\x8E" => UTF8::ZERO_WIDTH_NON_JOINER, 112 "\x8F" => UTF8::REPLACEMENT_CHARACTER, 113 "\x90" => UTF8::REPLACEMENT_CHARACTER, 114 "\x91" => UTF8::REPLACEMENT_CHARACTER, 115 "\x92" => UTF8::REPLACEMENT_CHARACTER, 116 "\x93" => UTF8::REPLACEMENT_CHARACTER, 117 "\x94" => UTF8::REPLACEMENT_CHARACTER, 118 "\x95" => UTF8::REPLACEMENT_CHARACTER, 119 "\x96" => UTF8::REPLACEMENT_CHARACTER, 120 "\x97" => UTF8::REPLACEMENT_CHARACTER, 121 "\x98" => UTF8::REPLACEMENT_CHARACTER, 122 "\x99" => UTF8::REPLACEMENT_CHARACTER, 123 "\x9A" => UTF8::REPLACEMENT_CHARACTER, 124 "\x9B" => UTF8::REPLACEMENT_CHARACTER, 125 "\x9C" => UTF8::REPLACEMENT_CHARACTER, 126 "\x9D" => UTF8::REPLACEMENT_CHARACTER, 127 "\x9E" => UTF8::REPLACEMENT_CHARACTER, 128 "\x9F" => UTF8::REPLACEMENT_CHARACTER, 129 "\xA0" => UTF8::REPLACEMENT_CHARACTER, 130 "\xA1" => UTF8::LATIN_CAPITAL_LETTER_L_WITH_STROKE, 131 "\xA2" => UTF8::LATIN_CAPITAL_LETTER_O_WITH_STROKE, 132 "\xA3" => UTF8::LATIN_CAPITAL_LETTER_D_WITH_STROKE, 133 "\xA4" => UTF8::LATIN_CAPITAL_LETTER_THORN, 134 "\xA5" => UTF8::LATIN_CAPITAL_LETTER_AE, 135 "\xA6" => UTF8::LATIN_CAPITAL_LIGATURE_OE, 136 "\xA7" => UTF8::MODIFIER_LETTER_PRIME, 137 "\xA8" => UTF8::MIDDLE_DOT, 138 "\xA9" => UTF8::MUSIC_FLAT_SIGN, 139 "\xAA" => UTF8::REGISTERED_SIGN, 140 "\xAB" => UTF8::PLUS_MINUS_SIGN, 141 "\xAC" => UTF8::LATIN_CAPITAL_LETTER_O_WITH_HORN, 142 "\xAD" => UTF8::LATIN_CAPITAL_LETTER_U_WITH_HORN, 143 "\xAE" => UTF8::MODIFIER_LETTER_APOSTROPHE, 144 "\xAF" => UTF8::REPLACEMENT_CHARACTER, 145 "\xB0" => UTF8::MODIFIER_LETTER_TURNED_COMMA, 146 "\xB1" => UTF8::LATIN_SMALL_LETTER_L_WITH_STROKE, 147 "\xB2" => UTF8::LATIN_SMALL_LETTER_O_WITH_STROKE, 148 "\xB3" => UTF8::LATIN_SMALL_LETTER_D_WITH_STROKE, 149 "\xB4" => UTF8::LATIN_SMALL_LETTER_THORN, 150 "\xB5" => UTF8::LATIN_SMALL_LETTER_AE, 151 "\xB6" => UTF8::LATIN_SMALL_LIGATURE_OE, 152 "\xB7" => UTF8::MODIFIER_LETTER_DOUBLE_PRIME, 153 "\xB8" => UTF8::LATIN_SMALL_LETTER_DOTLESS_I, 154 "\xB9" => UTF8::POUND_SIGN, 155 "\xBA" => UTF8::LATIN_SMALL_LETTER_ETH, 156 "\xBB" => UTF8::REPLACEMENT_CHARACTER, 157 "\xBC" => UTF8::LATIN_SMALL_LETTER_O_WITH_HORN, 158 "\xBD" => UTF8::LATIN_SMALL_LETTER_U_WITH_HORN, 159 "\xBE" => UTF8::WHITE_SQUARE, 160 "\xBF" => UTF8::BLACK_SQUARE, 161 "\xC0" => UTF8::DEGREE_SIGN, 162 "\xC1" => UTF8::SCRIPT_SMALL_L, 163 "\xC2" => UTF8::SOUND_RECORDING_COPYRIGHT, 164 "\xC3" => UTF8::COPYRIGHT_SIGN, 165 "\xC4" => UTF8::MUSIC_SHARP_SIGN, 166 "\xC5" => UTF8::INVERTED_QUESTION_MARK, 167 "\xC6" => UTF8::INVERTED_EXCLAMATION_MARK, 168 "\xC7" => UTF8::LATIN_CAPITAL_LETTER_SHARP_S, 169 "\xC8" => UTF8::EURO_SIGN, 170 "\xC9" => UTF8::REPLACEMENT_CHARACTER, 171 "\xCA" => UTF8::REPLACEMENT_CHARACTER, 172 "\xCB" => UTF8::REPLACEMENT_CHARACTER, 173 "\xCC" => UTF8::REPLACEMENT_CHARACTER, 174 "\xCD" => UTF8::REPLACEMENT_CHARACTER, 175 "\xCE" => UTF8::REPLACEMENT_CHARACTER, 176 "\xCF" => UTF8::LATIN_SMALL_LETTER_SHARP_S, 177 "\xD0" => UTF8::REPLACEMENT_CHARACTER, 178 "\xD1" => UTF8::REPLACEMENT_CHARACTER, 179 "\xD2" => UTF8::REPLACEMENT_CHARACTER, 180 "\xD3" => UTF8::REPLACEMENT_CHARACTER, 181 "\xD4" => UTF8::REPLACEMENT_CHARACTER, 182 "\xD5" => UTF8::REPLACEMENT_CHARACTER, 183 "\xD6" => UTF8::REPLACEMENT_CHARACTER, 184 "\xD7" => UTF8::REPLACEMENT_CHARACTER, 185 "\xD8" => UTF8::REPLACEMENT_CHARACTER, 186 "\xD9" => UTF8::REPLACEMENT_CHARACTER, 187 "\xDA" => UTF8::REPLACEMENT_CHARACTER, 188 "\xDB" => UTF8::REPLACEMENT_CHARACTER, 189 "\xDC" => UTF8::REPLACEMENT_CHARACTER, 190 "\xDD" => UTF8::REPLACEMENT_CHARACTER, 191 "\xDE" => UTF8::REPLACEMENT_CHARACTER, 192 "\xDF" => UTF8::REPLACEMENT_CHARACTER, 193 "\xE0" => UTF8::COMBINING_HOOK_ABOVE, 194 "\xE1" => UTF8::COMBINING_GRAVE_ACCENT, 195 "\xE2" => UTF8::COMBINING_ACUTE_ACCENT, 196 "\xE3" => UTF8::COMBINING_CIRCUMFLEX_ACCENT, 197 "\xE4" => UTF8::COMBINING_TILDE, 198 "\xE5" => UTF8::COMBINING_MACRON, 199 "\xE6" => UTF8::COMBINING_BREVE, 200 "\xE7" => UTF8::COMBINING_DOT_ABOVE, 201 "\xE8" => UTF8::COMBINING_DIAERESIS, 202 "\xE9" => UTF8::COMBINING_CARON, 203 "\xEA" => UTF8::COMBINING_RING_ABOVE, 204 "\xEB" => UTF8::COMBINING_DOUBLE_INVERTED_BREVE, 205 "\xEC" => '', 206 "\xED" => UTF8::COMBINING_COMMA_ABOVE_RIGHT, 207 "\xEE" => UTF8::COMBINING_DOUBLE_ACUTE_ACCENT, 208 "\xEF" => UTF8::COMBINING_CANDRABINDU, 209 "\xF0" => UTF8::COMBINING_CEDILLA, 210 "\xF1" => UTF8::COMBINING_OGONEK, 211 "\xF2" => UTF8::COMBINING_DOT_BELOW, 212 "\xF3" => UTF8::COMBINING_DIAERESIS_BELOW, 213 "\xF4" => UTF8::COMBINING_RING_BELOW, 214 "\xF5" => UTF8::COMBINING_DOUBLE_LOW_LINE, 215 "\xF6" => UTF8::COMBINING_LOW_LINE, 216 "\xF7" => UTF8::COMBINING_COMMA_BELOW, 217 "\xF8" => UTF8::COMBINING_LEFT_HALF_RING_BELOW, 218 "\xF9" => UTF8::COMBINING_BREVE_BELOW, 219 "\xFA" => UTF8::COMBINING_DOUBLE_TILDE, 220 "\xFB" => '', 221 "\xFC" => UTF8::REPLACEMENT_CHARACTER, 222 "\xFD" => UTF8::REPLACEMENT_CHARACTER, 223 "\xFE" => UTF8::COMBINING_COMMA_ABOVE, 224 "\xFF" => UTF8::COMBINING_LONG_SOLIDUS_OVERLAY, 225 ]; 226 227 // The subset of pre-composed UTF8 characters that can be made from ANSEL characters. 228 private const array PRECOMPOSED_CHARACTERS = [ 229 'A' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_A_WITH_ACUTE, 230 'A' . UTF8::COMBINING_BREVE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE, 231 'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_ACUTE, 232 'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_DOT_BELOW, 233 'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_GRAVE, 234 'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_HOOK_ABOVE, 235 'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_TILDE, 236 'A' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CARON, 237 'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX, 238 'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_ACUTE, 239 'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_DOT_BELOW, 240 'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_GRAVE, 241 'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_HOOK_ABOVE, 242 'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_TILDE, 243 'A' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS, 244 'A' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS_AND_MACRON, 245 'A' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE, 246 'A' . UTF8::COMBINING_DOT_ABOVE . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE_AND_MACRON, 247 'A' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DOT_BELOW, 248 'A' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_A_WITH_GRAVE, 249 'A' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_HOOK_ABOVE, 250 'A' . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_A_WITH_MACRON, 251 'A' . UTF8::COMBINING_OGONEK => UTF8::LATIN_CAPITAL_LETTER_A_WITH_OGONEK, 252 'A' . UTF8::COMBINING_RING_ABOVE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE, 253 'A' . UTF8::COMBINING_RING_ABOVE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE_AND_ACUTE, 254 'A' . UTF8::COMBINING_RING_BELOW => UTF8::LATIN_CAPITAL_LETTER_A_WITH_RING_BELOW, 255 'A' . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_A_WITH_TILDE, 256 'B' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_B_WITH_DOT_ABOVE, 257 'B' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_B_WITH_DOT_BELOW, 258 'C' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_C_WITH_ACUTE, 259 'C' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_C_WITH_CARON, 260 'C' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_C_WITH_CEDILLA, 261 'C' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX, 262 'C' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE, 263 'C' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_C_WITH_CEDILLA_AND_ACUTE, 264 'D' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_D_WITH_CARON, 265 'D' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_D_WITH_CEDILLA, 266 'D' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_D_WITH_DOT_ABOVE, 267 'D' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_D_WITH_DOT_BELOW, 268 'E' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_E_WITH_ACUTE, 269 'E' . UTF8::COMBINING_BREVE => UTF8::LATIN_CAPITAL_LETTER_E_WITH_BREVE, 270 'E' . UTF8::COMBINING_BREVE . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CEDILLA_AND_BREVE, 271 'E' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CARON, 272 'E' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CEDILLA, 273 'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX, 274 'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_ACUTE, 275 'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_DOT_BELOW, 276 'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_GRAVE, 277 'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_HOOK_ABOVE, 278 'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_TILDE, 279 'E' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS, 280 'E' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE, 281 'E' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_E_WITH_DOT_BELOW, 282 'E' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_E_WITH_GRAVE, 283 'E' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_E_WITH_HOOK_ABOVE, 284 'E' . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_E_WITH_MACRON, 285 'E' . UTF8::COMBINING_MACRON . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_E_WITH_MACRON_AND_ACUTE, 286 'E' . UTF8::COMBINING_MACRON . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_E_WITH_MACRON_AND_GRAVE, 287 'E' . UTF8::COMBINING_OGONEK => UTF8::LATIN_CAPITAL_LETTER_E_WITH_OGONEK, 288 'E' . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_E_WITH_TILDE, 289 'F' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_F_WITH_DOT_ABOVE, 290 'G' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_G_WITH_ACUTE, 291 'G' . UTF8::COMBINING_BREVE => UTF8::LATIN_CAPITAL_LETTER_G_WITH_BREVE, 292 'G' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_G_WITH_CARON, 293 'G' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_G_WITH_CEDILLA, 294 'G' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX, 295 'G' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE, 296 'G' . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_G_WITH_MACRON, 297 'H' . UTF8::COMBINING_BREVE_BELOW => UTF8::LATIN_CAPITAL_LETTER_H_WITH_BREVE_BELOW, 298 'H' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_H_WITH_CARON, 299 'H' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_H_WITH_CEDILLA, 300 'H' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX, 301 'H' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_H_WITH_DIAERESIS, 302 'H' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_H_WITH_DOT_ABOVE, 303 'H' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_H_WITH_DOT_BELOW, 304 'I' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_I_WITH_ACUTE, 305 'I' . UTF8::COMBINING_BREVE => UTF8::LATIN_CAPITAL_LETTER_I_WITH_BREVE, 306 'I' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_I_WITH_CARON, 307 'I' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX, 308 'I' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS, 309 'I' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS_AND_ACUTE, 310 'I' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE, 311 'I' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_I_WITH_DOT_BELOW, 312 'I' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_I_WITH_GRAVE, 313 'I' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_I_WITH_HOOK_ABOVE, 314 'I' . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_I_WITH_MACRON, 315 'I' . UTF8::COMBINING_OGONEK => UTF8::LATIN_CAPITAL_LETTER_I_WITH_OGONEK, 316 'I' . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_I_WITH_TILDE, 317 'J' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX, 318 'K' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_K_WITH_CARON, 319 'K' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_K_WITH_CEDILLA, 320 'K' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_K_WITH_ACUTE, 321 'K' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_K_WITH_DOT_BELOW, 322 'L' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_L_WITH_ACUTE, 323 'L' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_L_WITH_CARON, 324 'L' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_L_WITH_CEDILLA, 325 'L' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_L_WITH_DOT_BELOW, 326 'L' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_L_WITH_DOT_BELOW_AND_MACRON, 327 'M' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_M_WITH_ACUTE, 328 'M' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_M_WITH_DOT_ABOVE, 329 'M' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_M_WITH_DOT_BELOW, 330 'N' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_N_WITH_ACUTE, 331 'N' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_N_WITH_CARON, 332 'N' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_N_WITH_CEDILLA, 333 'N' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_N_WITH_DOT_ABOVE, 334 'N' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_N_WITH_DOT_BELOW, 335 'N' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_N_WITH_GRAVE, 336 'N' . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_N_WITH_TILDE, 337 'O' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_ACUTE, 338 'O' . UTF8::COMBINING_BREVE => UTF8::LATIN_CAPITAL_LETTER_O_WITH_BREVE, 339 'O' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CARON, 340 'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX, 341 'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_ACUTE, 342 'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_DOT_BELOW, 343 'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_GRAVE, 344 'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_HOOK_ABOVE, 345 'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_TILDE, 346 'O' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS, 347 'O' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS_AND_MACRON, 348 'O' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE, 349 'O' . UTF8::COMBINING_DOT_ABOVE . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE_AND_MACRON, 350 'O' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DOT_BELOW, 351 'O' . UTF8::COMBINING_DOUBLE_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DOUBLE_ACUTE, 352 'O' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_GRAVE, 353 'O' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_O_WITH_HOOK_ABOVE, 354 'O' . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_O_WITH_MACRON, 355 'O' . UTF8::COMBINING_MACRON . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_MACRON_AND_ACUTE, 356 'O' . UTF8::COMBINING_MACRON . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_MACRON_AND_GRAVE, 357 'O' . UTF8::COMBINING_OGONEK => UTF8::LATIN_CAPITAL_LETTER_O_WITH_OGONEK, 358 'O' . UTF8::COMBINING_OGONEK . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_O_WITH_OGONEK_AND_MACRON, 359 'O' . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_O_WITH_TILDE, 360 'O' . UTF8::COMBINING_TILDE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_ACUTE, 361 'O' . UTF8::COMBINING_TILDE . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_DIAERESIS, 362 'O' . UTF8::COMBINING_TILDE . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_MACRON, 363 'P' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_P_WITH_ACUTE, 364 'P' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_P_WITH_DOT_ABOVE, 365 'R' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_R_WITH_ACUTE, 366 'R' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_R_WITH_CARON, 367 'R' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_R_WITH_CEDILLA, 368 'R' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_R_WITH_DOT_ABOVE, 369 'R' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_R_WITH_DOT_BELOW, 370 'R' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_R_WITH_DOT_BELOW_AND_MACRON, 371 'S' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_S_WITH_ACUTE, 372 'S' . UTF8::COMBINING_ACUTE_ACCENT . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_S_WITH_ACUTE_AND_DOT_ABOVE, 373 'S' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_S_WITH_CARON, 374 'S' . UTF8::COMBINING_CARON . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_S_WITH_CARON_AND_DOT_ABOVE, 375 'S' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_S_WITH_CEDILLA, 376 'S' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX, 377 'S' . UTF8::COMBINING_COMMA_BELOW => UTF8::LATIN_CAPITAL_LETTER_S_WITH_COMMA_BELOW, 378 'S' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_S_WITH_DOT_ABOVE, 379 'S' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_S_WITH_DOT_BELOW, 380 'S' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_S_WITH_DOT_BELOW_AND_DOT_ABOVE, 381 'T' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_T_WITH_CARON, 382 'T' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_CAPITAL_LETTER_T_WITH_CEDILLA, 383 'T' . UTF8::COMBINING_COMMA_BELOW => UTF8::LATIN_CAPITAL_LETTER_T_WITH_COMMA_BELOW, 384 'T' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_T_WITH_DOT_ABOVE, 385 'T' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_T_WITH_DOT_BELOW, 386 'U' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_U_WITH_ACUTE, 387 'U' . UTF8::COMBINING_BREVE => UTF8::LATIN_CAPITAL_LETTER_U_WITH_BREVE, 388 'U' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_U_WITH_CARON, 389 'U' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX, 390 'U' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS, 391 'U' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_ACUTE, 392 'U' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_CARON, 393 'U' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_GRAVE, 394 'U' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_MACRON, 395 'U' . UTF8::COMBINING_DIAERESIS_BELOW => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_BELOW, 396 'U' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DOT_BELOW, 397 'U' . UTF8::COMBINING_DOUBLE_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DOUBLE_ACUTE, 398 'U' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_U_WITH_GRAVE, 399 'U' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_U_WITH_HOOK_ABOVE, 400 'U' . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_U_WITH_MACRON, 401 'U' . UTF8::COMBINING_MACRON . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_U_WITH_MACRON_AND_DIAERESIS, 402 'U' . UTF8::COMBINING_OGONEK => UTF8::LATIN_CAPITAL_LETTER_U_WITH_OGONEK, 403 'U' . UTF8::COMBINING_RING_ABOVE => UTF8::LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE, 404 'U' . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_U_WITH_TILDE, 405 'U' . UTF8::COMBINING_TILDE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_U_WITH_TILDE_AND_ACUTE, 406 'V' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_V_WITH_DOT_BELOW, 407 'V' . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_V_WITH_TILDE, 408 'W' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_W_WITH_ACUTE, 409 'W' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX, 410 'W' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_W_WITH_DIAERESIS, 411 'W' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_W_WITH_DOT_ABOVE, 412 'W' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_W_WITH_DOT_BELOW, 413 'W' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_W_WITH_GRAVE, 414 'X' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_X_WITH_DIAERESIS, 415 'X' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_X_WITH_DOT_ABOVE, 416 'Y' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_ACUTE, 417 'Y' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX, 418 'Y' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS, 419 'Y' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_DOT_ABOVE, 420 'Y' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_DOT_BELOW, 421 'Y' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_GRAVE, 422 'Y' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_HOOK_ABOVE, 423 'Y' . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_MACRON, 424 'Y' . UTF8::COMBINING_TILDE => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_TILDE, 425 'Z' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_ACUTE, 426 'Z' . UTF8::COMBINING_CARON => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_CARON, 427 'Z' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_CIRCUMFLEX, 428 'Z' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE, 429 'Z' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_DOT_BELOW, 430 'a' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_A_WITH_ACUTE, 431 'a' . UTF8::COMBINING_BREVE => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE, 432 'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_ACUTE, 433 'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_DOT_BELOW, 434 'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_GRAVE, 435 'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_HOOK_ABOVE, 436 'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_TILDE, 437 'a' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_A_WITH_CARON, 438 'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX, 439 'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_ACUTE, 440 'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_DOT_BELOW, 441 'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_GRAVE, 442 'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_HOOK_ABOVE, 443 'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_TILDE, 444 'a' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_A_WITH_DIAERESIS, 445 'a' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_A_WITH_DIAERESIS_AND_MACRON, 446 'a' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE, 447 'a' . UTF8::COMBINING_DOT_ABOVE . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE_AND_MACRON, 448 'a' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_A_WITH_DOT_BELOW, 449 'a' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_A_WITH_GRAVE, 450 'a' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_A_WITH_HOOK_ABOVE, 451 'a' . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_A_WITH_MACRON, 452 'a' . UTF8::COMBINING_OGONEK => UTF8::LATIN_SMALL_LETTER_A_WITH_OGONEK, 453 'a' . UTF8::COMBINING_RING_ABOVE => UTF8::LATIN_SMALL_LETTER_A_WITH_RING_ABOVE, 454 'a' . UTF8::COMBINING_RING_ABOVE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_A_WITH_RING_ABOVE_AND_ACUTE, 455 'a' . UTF8::COMBINING_RING_BELOW => UTF8::LATIN_SMALL_LETTER_A_WITH_RING_BELOW, 456 'a' . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_A_WITH_TILDE, 457 'b' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_B_WITH_DOT_ABOVE, 458 'b' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_B_WITH_DOT_BELOW, 459 'c' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_C_WITH_ACUTE, 460 'c' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_C_WITH_CARON, 461 'c' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_C_WITH_CEDILLA, 462 'c' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX, 463 'c' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE, 464 'c' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_C_WITH_CEDILLA_AND_ACUTE, 465 'd' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_D_WITH_CARON, 466 'd' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_D_WITH_CEDILLA, 467 'd' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_D_WITH_DOT_ABOVE, 468 'd' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_D_WITH_DOT_BELOW, 469 'e' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_E_WITH_ACUTE, 470 'e' . UTF8::COMBINING_BREVE => UTF8::LATIN_SMALL_LETTER_E_WITH_BREVE, 471 'e' . UTF8::COMBINING_BREVE . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_E_WITH_CEDILLA_AND_BREVE, 472 'e' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_E_WITH_CARON, 473 'e' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_E_WITH_CEDILLA, 474 'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX, 475 'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_ACUTE, 476 'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_DOT_BELOW, 477 'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_GRAVE, 478 'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_HOOK_ABOVE, 479 'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_TILDE, 480 'e' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_E_WITH_DIAERESIS, 481 'e' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE, 482 'e' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_E_WITH_DOT_BELOW, 483 'e' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_E_WITH_GRAVE, 484 'e' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_E_WITH_HOOK_ABOVE, 485 'e' . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_E_WITH_MACRON, 486 'e' . UTF8::COMBINING_MACRON . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_E_WITH_MACRON_AND_ACUTE, 487 'e' . UTF8::COMBINING_MACRON . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_E_WITH_MACRON_AND_GRAVE, 488 'e' . UTF8::COMBINING_OGONEK => UTF8::LATIN_SMALL_LETTER_E_WITH_OGONEK, 489 'e' . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_E_WITH_TILDE, 490 'f' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_F_WITH_DOT_ABOVE, 491 'g' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_G_WITH_ACUTE, 492 'g' . UTF8::COMBINING_BREVE => UTF8::LATIN_SMALL_LETTER_G_WITH_BREVE, 493 'g' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_G_WITH_CARON, 494 'g' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_G_WITH_CEDILLA, 495 'g' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX, 496 'g' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE, 497 'g' . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_G_WITH_MACRON, 498 'h' . UTF8::COMBINING_BREVE_BELOW => UTF8::LATIN_SMALL_LETTER_H_WITH_BREVE_BELOW, 499 'h' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_H_WITH_CARON, 500 'h' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_H_WITH_CEDILLA, 501 'h' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX, 502 'h' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_H_WITH_DIAERESIS, 503 'h' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_H_WITH_DOT_ABOVE, 504 'h' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_H_WITH_DOT_BELOW, 505 'i' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_I_WITH_ACUTE, 506 'i' . UTF8::COMBINING_BREVE => UTF8::LATIN_SMALL_LETTER_I_WITH_BREVE, 507 'i' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_I_WITH_CARON, 508 'i' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX, 509 'i' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_I_WITH_DIAERESIS, 510 'i' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_I_WITH_DIAERESIS_AND_ACUTE, 511 'i' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_I_WITH_DOT_BELOW, 512 'i' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_I_WITH_GRAVE, 513 'i' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_I_WITH_HOOK_ABOVE, 514 'i' . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_I_WITH_MACRON, 515 'i' . UTF8::COMBINING_OGONEK => UTF8::LATIN_SMALL_LETTER_I_WITH_OGONEK, 516 'i' . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_I_WITH_TILDE, 517 'j' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_J_WITH_CARON, 518 'j' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX, 519 'k' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_K_WITH_CARON, 520 'k' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_K_WITH_CEDILLA, 521 'k' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_K_WITH_ACUTE, 522 'k' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_K_WITH_DOT_BELOW, 523 'l' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_L_WITH_ACUTE, 524 'l' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_L_WITH_CARON, 525 'l' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_L_WITH_CEDILLA, 526 'l' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_L_WITH_DOT_BELOW, 527 'l' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_L_WITH_DOT_BELOW_AND_MACRON, 528 'm' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_M_WITH_ACUTE, 529 'm' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_M_WITH_DOT_ABOVE, 530 'm' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_M_WITH_DOT_BELOW, 531 'n' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_N_WITH_ACUTE, 532 'n' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_N_WITH_CARON, 533 'n' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_N_WITH_CEDILLA, 534 'n' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_N_WITH_DOT_ABOVE, 535 'n' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_N_WITH_DOT_BELOW, 536 'n' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_N_WITH_GRAVE, 537 'n' . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_N_WITH_TILDE, 538 'o' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_ACUTE, 539 'o' . UTF8::COMBINING_BREVE => UTF8::LATIN_SMALL_LETTER_O_WITH_BREVE, 540 'o' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_O_WITH_CARON, 541 'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX, 542 'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_ACUTE, 543 'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_DOT_BELOW, 544 'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_GRAVE, 545 'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_HOOK_ABOVE, 546 'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_TILDE, 547 'o' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_O_WITH_DIAERESIS, 548 'o' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_O_WITH_DIAERESIS_AND_MACRON, 549 'o' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE, 550 'o' . UTF8::COMBINING_DOT_ABOVE . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE_AND_MACRON, 551 'o' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_O_WITH_DOT_BELOW, 552 'o' . UTF8::COMBINING_DOUBLE_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_DOUBLE_ACUTE, 553 'o' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_GRAVE, 554 'o' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_O_WITH_HOOK_ABOVE, 555 'o' . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_O_WITH_MACRON, 556 'o' . UTF8::COMBINING_MACRON . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_MACRON_AND_ACUTE, 557 'o' . UTF8::COMBINING_MACRON . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_MACRON_AND_GRAVE, 558 'o' . UTF8::COMBINING_OGONEK => UTF8::LATIN_SMALL_LETTER_O_WITH_OGONEK, 559 'o' . UTF8::COMBINING_OGONEK . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_O_WITH_OGONEK_AND_MACRON, 560 'o' . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_O_WITH_TILDE, 561 'o' . UTF8::COMBINING_TILDE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_TILDE_AND_ACUTE, 562 'o' . UTF8::COMBINING_TILDE . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_O_WITH_TILDE_AND_DIAERESIS, 563 'o' . UTF8::COMBINING_TILDE . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_O_WITH_TILDE_AND_MACRON, 564 'p' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_P_WITH_ACUTE, 565 'p' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_P_WITH_DOT_ABOVE, 566 'r' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_R_WITH_ACUTE, 567 'r' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_R_WITH_CARON, 568 'r' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_R_WITH_CEDILLA, 569 'r' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_R_WITH_DOT_ABOVE, 570 'r' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_R_WITH_DOT_BELOW, 571 'r' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_R_WITH_DOT_BELOW_AND_MACRON, 572 's' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_S_WITH_ACUTE, 573 's' . UTF8::COMBINING_ACUTE_ACCENT . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_S_WITH_ACUTE_AND_DOT_ABOVE, 574 's' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_S_WITH_CARON, 575 's' . UTF8::COMBINING_CARON . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_S_WITH_CARON_AND_DOT_ABOVE, 576 's' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_S_WITH_CEDILLA, 577 's' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX, 578 's' . UTF8::COMBINING_COMMA_BELOW => UTF8::LATIN_SMALL_LETTER_S_WITH_COMMA_BELOW, 579 's' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_S_WITH_DOT_ABOVE, 580 's' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_S_WITH_DOT_BELOW, 581 's' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_S_WITH_DOT_BELOW_AND_DOT_ABOVE, 582 't' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_T_WITH_CARON, 583 't' . UTF8::COMBINING_CEDILLA => UTF8::LATIN_SMALL_LETTER_T_WITH_CEDILLA, 584 't' . UTF8::COMBINING_COMMA_BELOW => UTF8::LATIN_SMALL_LETTER_T_WITH_COMMA_BELOW, 585 't' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_T_WITH_DIAERESIS, 586 't' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_T_WITH_DOT_ABOVE, 587 't' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_T_WITH_DOT_BELOW, 588 'u' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_U_WITH_ACUTE, 589 'u' . UTF8::COMBINING_BREVE => UTF8::LATIN_SMALL_LETTER_U_WITH_BREVE, 590 'u' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_U_WITH_CARON, 591 'u' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX, 592 'u' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS, 593 'u' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_ACUTE, 594 'u' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_CARON, 595 'u' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_GRAVE, 596 'u' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_MACRON, 597 'u' . UTF8::COMBINING_DIAERESIS_BELOW => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_BELOW, 598 'u' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_U_WITH_DOT_BELOW, 599 'u' . UTF8::COMBINING_DOUBLE_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_U_WITH_DOUBLE_ACUTE, 600 'u' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_U_WITH_GRAVE, 601 'u' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_U_WITH_HOOK_ABOVE, 602 'u' . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_U_WITH_MACRON, 603 'u' . UTF8::COMBINING_MACRON . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_U_WITH_MACRON_AND_DIAERESIS, 604 'u' . UTF8::COMBINING_OGONEK => UTF8::LATIN_SMALL_LETTER_U_WITH_OGONEK, 605 'u' . UTF8::COMBINING_RING_ABOVE => UTF8::LATIN_SMALL_LETTER_U_WITH_RING_ABOVE, 606 'u' . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_U_WITH_TILDE, 607 'u' . UTF8::COMBINING_TILDE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_U_WITH_TILDE_AND_ACUTE, 608 'v' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_V_WITH_DOT_BELOW, 609 'v' . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_V_WITH_TILDE, 610 'w' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_W_WITH_ACUTE, 611 'w' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX, 612 'w' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_W_WITH_DIAERESIS, 613 'w' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_W_WITH_DOT_ABOVE, 614 'w' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_W_WITH_DOT_BELOW, 615 'w' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_W_WITH_GRAVE, 616 'w' . UTF8::COMBINING_RING_ABOVE => UTF8::LATIN_SMALL_LETTER_W_WITH_RING_ABOVE, 617 'x' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_X_WITH_DIAERESIS, 618 'x' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_X_WITH_DOT_ABOVE, 619 'y' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_Y_WITH_ACUTE, 620 'y' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX, 621 'y' . UTF8::COMBINING_DIAERESIS => UTF8::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS, 622 'y' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_Y_WITH_DOT_ABOVE, 623 'y' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_Y_WITH_DOT_BELOW, 624 'y' . UTF8::COMBINING_GRAVE_ACCENT => UTF8::LATIN_SMALL_LETTER_Y_WITH_GRAVE, 625 'y' . UTF8::COMBINING_HOOK_ABOVE => UTF8::LATIN_SMALL_LETTER_Y_WITH_HOOK_ABOVE, 626 'y' . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_Y_WITH_MACRON, 627 'y' . UTF8::COMBINING_RING_ABOVE => UTF8::LATIN_SMALL_LETTER_Y_WITH_RING_ABOVE, 628 'y' . UTF8::COMBINING_TILDE => UTF8::LATIN_SMALL_LETTER_Y_WITH_TILDE, 629 'z' . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_Z_WITH_ACUTE, 630 'z' . UTF8::COMBINING_CARON => UTF8::LATIN_SMALL_LETTER_Z_WITH_CARON, 631 'z' . UTF8::COMBINING_CIRCUMFLEX_ACCENT => UTF8::LATIN_SMALL_LETTER_Z_WITH_CIRCUMFLEX, 632 'z' . UTF8::COMBINING_DOT_ABOVE => UTF8::LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE, 633 'z' . UTF8::COMBINING_DOT_BELOW => UTF8::LATIN_SMALL_LETTER_Z_WITH_DOT_BELOW, 634 UTF8::LATIN_CAPITAL_LETTER_AE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_AE_WITH_ACUTE, 635 UTF8::LATIN_CAPITAL_LETTER_AE . UTF8::COMBINING_MACRON => UTF8::LATIN_CAPITAL_LETTER_AE_WITH_MACRON, 636 UTF8::LATIN_CAPITAL_LETTER_O_WITH_STROKE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_STROKE_AND_ACUTE, 637 UTF8::LATIN_SMALL_LETTER_AE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_AE_WITH_ACUTE, 638 UTF8::LATIN_SMALL_LETTER_AE . UTF8::COMBINING_MACRON => UTF8::LATIN_SMALL_LETTER_AE_WITH_MACRON, 639 UTF8::LATIN_SMALL_LETTER_O_WITH_STROKE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_SMALL_LETTER_O_WITH_STROKE_AND_ACUTE, 640 ]; 641 642 // ANSEL supports O and U with a horn diacritic, but not the combining diacritic. 643 private const array HORN_CONVERT_STEP_1 = [ 644 'O' . UTF8::COMBINING_HORN => "\x00O_WITH_HORN\x00", 645 'U' . UTF8::COMBINING_HORN => "\x00U_WITH_HORN\x00", 646 'o' . UTF8::COMBINING_HORN => "\x00o_WITH_HORN\x00", 647 'u' . UTF8::COMBINING_HORN => "\x00u_WITH_HORN\x00", 648 ]; 649 private const array HORN_CONVERT_STEP_2 = [ 650 "\x00O_WITH_HORN\x00" => "\xAC", 651 "\x00U_WITH_HORN\x00" => "\xAD", 652 "\x00o_WITH_HORN\x00" => "\xBC", 653 "\x00u_WITH_HORN\x00" => "\xBD", 654 ]; 655 656 /** 657 * Convert a string from another encoding to UTF-8. 658 * 659 * @param string $text 660 * 661 * @return string 662 */ 663 public function toUtf8(string $text): string 664 { 665 // ANSEL diacritics are prefixes. UTF-8 diacritics are suffixes. 666 $text = preg_replace('/([\xE0-\xFF]+)(.)/', '$2$1', $text); 667 668 // Simple substitution creates denormalized UTF-8. 669 $text = strtr($text, self::TO_UTF8); 670 671 // Convert combining diacritics into pre-composed characters. 672 return strtr($text, self::PRECOMPOSED_CHARACTERS); 673 } 674 675 /** 676 * Convert a string from UTF-8 to another encoding. 677 * 678 * @param string $text 679 * 680 * @return string 681 */ 682 public function fromUtf8(string $text): string 683 { 684 // Convert pre-composed characters into combining diacritics. 685 $text = strtr($text, array_flip(self::PRECOMPOSED_CHARACTERS)); 686 687 // ANSEL supports letters with horns, but not the combining horn. 688 $text = strtr($text, self::HORN_CONVERT_STEP_1); 689 690 // Convert characters and combining diacritics separately. 691 $text = parent::fromUtf8($text); 692 693 // ANSEL supports two letters with horns, but not the combining horn. 694 $text = strtr($text, self::HORN_CONVERT_STEP_2); 695 696 // ANSEL diacritics are prefixes. UTF-8 diacritics are suffixes. 697 $text = preg_replace('/([^\xE0-\xFF])([\xE0-\xFF]+)/', '$2$1', $text); 698 699 return $text; 700 } 701} 702