xref: /webtrees/app/Encodings/ANSEL.php (revision d11be7027e34e3121be11cc025421873364403f9)
11c6adce8SGreg Roach<?php
21c6adce8SGreg Roach
31c6adce8SGreg Roach/**
41c6adce8SGreg Roach * webtrees: online genealogy
5*d11be702SGreg Roach * Copyright (C) 2023 webtrees development team
61c6adce8SGreg Roach * This program is free software: you can redistribute it and/or modify
71c6adce8SGreg Roach * it under the terms of the GNU General Public License as published by
81c6adce8SGreg Roach * the Free Software Foundation, either version 3 of the License, or
91c6adce8SGreg Roach * (at your option) any later version.
101c6adce8SGreg Roach * This program is distributed in the hope that it will be useful,
111c6adce8SGreg Roach * but WITHOUT ANY WARRANTY; without even the implied warranty of
121c6adce8SGreg Roach * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
131c6adce8SGreg Roach * GNU General Public License for more details.
141c6adce8SGreg Roach * You should have received a copy of the GNU General Public License
151c6adce8SGreg Roach * along with this program. If not, see <https://www.gnu.org/licenses/>.
161c6adce8SGreg Roach */
171c6adce8SGreg Roach
181c6adce8SGreg Roachdeclare(strict_types=1);
191c6adce8SGreg Roach
201c6adce8SGreg Roachnamespace Fisharebest\Webtrees\Encodings;
211c6adce8SGreg Roach
221c6adce8SGreg Roachuse function preg_replace;
231c6adce8SGreg Roachuse function strtr;
241c6adce8SGreg Roach
251c6adce8SGreg Roach/**
261c6adce8SGreg Roach * Convert between UTF-8 and ANSEL encoding.
271c6adce8SGreg Roach *
281c6adce8SGreg Roach * ANSEL is the common name for the MARC-21 encoding, also known as Z39.47, which
291c6adce8SGreg Roach * has a number of editions.  These are denoted by a year suffix.
301c6adce8SGreg Roach *
311c6adce8SGreg Roach * The GEDCOM 5.5.1 specification (1999-10-02) specifies the Z39.47-1985 edition.
321c6adce8SGreg Roach * It adds Es Zett (ß) at CF.
331c6adce8SGreg Roach *
341c6adce8SGreg Roach * According to wikipedia, other non-standard characters are also added.
351c6adce8SGreg Roach *
361c6adce8SGreg Roach * HEX Unicode Glyph Description
371c6adce8SGreg Roach * BE  25A1    □     Empty box
381c6adce8SGreg Roach * BF  25A0    ■     Black box
391c6adce8SGreg Roach * CD  0065    e     Midline e
401c6adce8SGreg Roach * CE  006F    o     Midline o
411c6adce8SGreg Roach * CF  00DF    ß     Es Zett
421c6adce8SGreg Roach * FC  0338    /     Combining slash
431c6adce8SGreg Roach *
441c6adce8SGreg Roach * @link https://en.wikipedia.org/wiki/ANSEL
451c6adce8SGreg Roach *
461c6adce8SGreg Roach * The MARC-21 specification has added a number of additional characters since
471c6adce8SGreg Roach * the 1985 edition.
481c6adce8SGreg Roach *
491c6adce8SGreg Roach * HEX Unicode Glyph Description
501c6adce8SGreg Roach * 88  0098          Start of string
511c6adce8SGreg Roach * 89  009C          String terminator
521c6adce8SGreg Roach * 8D  200D          Zero width joiner
531c6adce8SGreg Roach * 8E  200C          Zero width non-joiner
541c6adce8SGreg Roach * A7  CAB9       ʹ     Single prime
551c6adce8SGreg Roach * AC  C6AF    Ơ     LATIN CAPITAL LETTER O WITH HORN
561c6adce8SGreg Roach * AD  C6AF    Ư     LATIN CAPITAL LETTER U WITH HORN
571c6adce8SGreg Roach * B7  CABA    ʺ     Double prime
581c6adce8SGreg Roach * BC  C6A1    ơ     LATIN SMALL LETTER O WITH HORN
591c6adce8SGreg Roach * BD  C6B0    ư     LATIN SMALL LETTER U WITH HORN
601c6adce8SGreg Roach * C0  C2B0    °     Degree sign
611c6adce8SGreg Roach * C1  E28493  ℓ     Script small L
621c6adce8SGreg Roach * C2  E28497  ℗     Sound recording copyright
631c6adce8SGreg Roach * C4  E282AC  ♯     Music sharp sign
641c6adce8SGreg Roach * C7  00DF    ß     Es Zett
651c6adce8SGreg Roach * C8  20AC    €     Euro sign
661c6adce8SGreg Roach * E0  0309          Hook above
671c6adce8SGreg Roach * EB  0361          Breve (first part / double)
681c6adce8SGreg Roach * EC  0361          Breve (second part)
691c6adce8SGreg Roach * EF  0310          Candrabindu
701c6adce8SGreg Roach * F2  0323          Low dot
711c6adce8SGreg Roach * F3  0324          Diaeresis below
721c6adce8SGreg Roach * F4  0325          Ring below
731c6adce8SGreg Roach * F5  0333          Double underline
741c6adce8SGreg Roach * F7  0332          Underline
751c6adce8SGreg Roach * F8  031C          Comma below
761c6adce8SGreg Roach * F9  032E          Breve below
771c6adce8SGreg Roach * FA  0360          Double tilde (first part / double).
781c6adce8SGreg Roach * FB  0360          Double tilde (second part).
791c6adce8SGreg Roach * FF  0338          Slash
801c6adce8SGreg Roach *
811c6adce8SGreg Roach * @link https://memory.loc.gov/diglib/codetables/45.html
821c6adce8SGreg Roach *
831c6adce8SGreg Roach * Note that this means we can expect two different representations of Es Zett.
841c6adce8SGreg Roach *
851c6adce8SGreg Roach * There are two multi-part diacritics.  There are two ways to represent these.
861c6adce8SGreg Roach *
871c6adce8SGreg Roach * ANSEL       | UTF-8         | UTF-8 (prefered)
881c6adce8SGreg Roach * ------------+---------------+-----------------
891c6adce8SGreg Roach * FA x FB y   | x FE22 y FE23 | x 0360 y
901c6adce8SGreg Roach * EB x EC y   | y FE20 y FE21 | x 0361 y
911c6adce8SGreg Roach */
921c6adce8SGreg Roachclass ANSEL extends AbstractEncoding
931c6adce8SGreg Roach{
941c6adce8SGreg Roach    public const NAME = 'ANSEL';
951c6adce8SGreg Roach
961c6adce8SGreg Roach    protected const TO_UTF8 = [
971c6adce8SGreg Roach        "\x80" => UTF8::REPLACEMENT_CHARACTER,
981c6adce8SGreg Roach        "\x81" => UTF8::REPLACEMENT_CHARACTER,
991c6adce8SGreg Roach        "\x82" => UTF8::REPLACEMENT_CHARACTER,
1001c6adce8SGreg Roach        "\x83" => UTF8::REPLACEMENT_CHARACTER,
1011c6adce8SGreg Roach        "\x84" => UTF8::REPLACEMENT_CHARACTER,
1021c6adce8SGreg Roach        "\x85" => UTF8::REPLACEMENT_CHARACTER,
1031c6adce8SGreg Roach        "\x86" => UTF8::REPLACEMENT_CHARACTER,
1041c6adce8SGreg Roach        "\x87" => UTF8::REPLACEMENT_CHARACTER,
1051c6adce8SGreg Roach        "\x88" => UTF8::START_OF_STRING,
1061c6adce8SGreg Roach        "\x89" => UTF8::STRING_TERMINATOR,
1071c6adce8SGreg Roach        "\x8A" => UTF8::REPLACEMENT_CHARACTER,
1081c6adce8SGreg Roach        "\x8B" => UTF8::REPLACEMENT_CHARACTER,
1091c6adce8SGreg Roach        "\x8C" => UTF8::REPLACEMENT_CHARACTER,
1101c6adce8SGreg Roach        "\x8D" => UTF8::ZERO_WIDTH_JOINER,
1111c6adce8SGreg Roach        "\x8E" => UTF8::ZERO_WIDTH_NON_JOINER,
1121c6adce8SGreg Roach        "\x8F" => UTF8::REPLACEMENT_CHARACTER,
1131c6adce8SGreg Roach        "\x90" => UTF8::REPLACEMENT_CHARACTER,
1141c6adce8SGreg Roach        "\x91" => UTF8::REPLACEMENT_CHARACTER,
1151c6adce8SGreg Roach        "\x92" => UTF8::REPLACEMENT_CHARACTER,
1161c6adce8SGreg Roach        "\x93" => UTF8::REPLACEMENT_CHARACTER,
1171c6adce8SGreg Roach        "\x94" => UTF8::REPLACEMENT_CHARACTER,
1181c6adce8SGreg Roach        "\x95" => UTF8::REPLACEMENT_CHARACTER,
1191c6adce8SGreg Roach        "\x96" => UTF8::REPLACEMENT_CHARACTER,
1201c6adce8SGreg Roach        "\x97" => UTF8::REPLACEMENT_CHARACTER,
1211c6adce8SGreg Roach        "\x98" => UTF8::REPLACEMENT_CHARACTER,
1221c6adce8SGreg Roach        "\x99" => UTF8::REPLACEMENT_CHARACTER,
1231c6adce8SGreg Roach        "\x9A" => UTF8::REPLACEMENT_CHARACTER,
1241c6adce8SGreg Roach        "\x9B" => UTF8::REPLACEMENT_CHARACTER,
1251c6adce8SGreg Roach        "\x9C" => UTF8::REPLACEMENT_CHARACTER,
1261c6adce8SGreg Roach        "\x9D" => UTF8::REPLACEMENT_CHARACTER,
1271c6adce8SGreg Roach        "\x9E" => UTF8::REPLACEMENT_CHARACTER,
1281c6adce8SGreg Roach        "\x9F" => UTF8::REPLACEMENT_CHARACTER,
1291c6adce8SGreg Roach        "\xA0" => UTF8::REPLACEMENT_CHARACTER,
1301c6adce8SGreg Roach        "\xA1" => UTF8::LATIN_CAPITAL_LETTER_L_WITH_STROKE,
1311c6adce8SGreg Roach        "\xA2" => UTF8::LATIN_CAPITAL_LETTER_O_WITH_STROKE,
1321c6adce8SGreg Roach        "\xA3" => UTF8::LATIN_CAPITAL_LETTER_D_WITH_STROKE,
1331c6adce8SGreg Roach        "\xA4" => UTF8::LATIN_CAPITAL_LETTER_THORN,
1341c6adce8SGreg Roach        "\xA5" => UTF8::LATIN_CAPITAL_LETTER_AE,
1351c6adce8SGreg Roach        "\xA6" => UTF8::LATIN_CAPITAL_LIGATURE_OE,
1361c6adce8SGreg Roach        "\xA7" => UTF8::MODIFIER_LETTER_PRIME,
1371c6adce8SGreg Roach        "\xA8" => UTF8::MIDDLE_DOT,
1381c6adce8SGreg Roach        "\xA9" => UTF8::MUSIC_FLAT_SIGN,
1391c6adce8SGreg Roach        "\xAA" => UTF8::REGISTERED_SIGN,
1401c6adce8SGreg Roach        "\xAB" => UTF8::PLUS_MINUS_SIGN,
1411c6adce8SGreg Roach        "\xAC" => UTF8::LATIN_CAPITAL_LETTER_O_WITH_HORN,
1421c6adce8SGreg Roach        "\xAD" => UTF8::LATIN_CAPITAL_LETTER_U_WITH_HORN,
1431c6adce8SGreg Roach        "\xAE" => UTF8::MODIFIER_LETTER_APOSTROPHE,
1441c6adce8SGreg Roach        "\xAF" => UTF8::REPLACEMENT_CHARACTER,
1451c6adce8SGreg Roach        "\xB0" => UTF8::MODIFIER_LETTER_TURNED_COMMA,
1461c6adce8SGreg Roach        "\xB1" => UTF8::LATIN_SMALL_LETTER_L_WITH_STROKE,
1471c6adce8SGreg Roach        "\xB2" => UTF8::LATIN_SMALL_LETTER_O_WITH_STROKE,
1481c6adce8SGreg Roach        "\xB3" => UTF8::LATIN_SMALL_LETTER_D_WITH_STROKE,
1491c6adce8SGreg Roach        "\xB4" => UTF8::LATIN_SMALL_LETTER_THORN,
1501c6adce8SGreg Roach        "\xB5" => UTF8::LATIN_SMALL_LETTER_AE,
1511c6adce8SGreg Roach        "\xB6" => UTF8::LATIN_SMALL_LIGATURE_OE,
1521c6adce8SGreg Roach        "\xB7" => UTF8::MODIFIER_LETTER_DOUBLE_PRIME,
1531c6adce8SGreg Roach        "\xB8" => UTF8::LATIN_SMALL_LETTER_DOTLESS_I,
1541c6adce8SGreg Roach        "\xB9" => UTF8::POUND_SIGN,
1551c6adce8SGreg Roach        "\xBA" => UTF8::LATIN_SMALL_LETTER_ETH,
1561c6adce8SGreg Roach        "\xBB" => UTF8::REPLACEMENT_CHARACTER,
1571c6adce8SGreg Roach        "\xBC" => UTF8::LATIN_SMALL_LETTER_O_WITH_HORN,
1581c6adce8SGreg Roach        "\xBD" => UTF8::LATIN_SMALL_LETTER_U_WITH_HORN,
1591c6adce8SGreg Roach        "\xBE" => UTF8::WHITE_SQUARE,
1601c6adce8SGreg Roach        "\xBF" => UTF8::BLACK_SQUARE,
1611c6adce8SGreg Roach        "\xC0" => UTF8::DEGREE_SIGN,
1621c6adce8SGreg Roach        "\xC1" => UTF8::SCRIPT_SMALL_L,
1631c6adce8SGreg Roach        "\xC2" => UTF8::SOUND_RECORDING_COPYRIGHT,
1641c6adce8SGreg Roach        "\xC3" => UTF8::COPYRIGHT_SIGN,
1651c6adce8SGreg Roach        "\xC4" => UTF8::MUSIC_SHARP_SIGN,
1661c6adce8SGreg Roach        "\xC5" => UTF8::INVERTED_QUESTION_MARK,
1671c6adce8SGreg Roach        "\xC6" => UTF8::INVERTED_EXCLAMATION_MARK,
1681c6adce8SGreg Roach        "\xC7" => UTF8::LATIN_CAPITAL_LETTER_SHARP_S,
1691c6adce8SGreg Roach        "\xC8" => UTF8::EURO_SIGN,
1701c6adce8SGreg Roach        "\xC9" => UTF8::REPLACEMENT_CHARACTER,
1711c6adce8SGreg Roach        "\xCA" => UTF8::REPLACEMENT_CHARACTER,
1721c6adce8SGreg Roach        "\xCB" => UTF8::REPLACEMENT_CHARACTER,
1731c6adce8SGreg Roach        "\xCC" => UTF8::REPLACEMENT_CHARACTER,
1741c6adce8SGreg Roach        "\xCD" => UTF8::REPLACEMENT_CHARACTER,
1751c6adce8SGreg Roach        "\xCE" => UTF8::REPLACEMENT_CHARACTER,
1761c6adce8SGreg Roach        "\xCF" => UTF8::LATIN_SMALL_LETTER_SHARP_S,
1771c6adce8SGreg Roach        "\xD0" => UTF8::REPLACEMENT_CHARACTER,
1781c6adce8SGreg Roach        "\xD1" => UTF8::REPLACEMENT_CHARACTER,
1791c6adce8SGreg Roach        "\xD2" => UTF8::REPLACEMENT_CHARACTER,
1801c6adce8SGreg Roach        "\xD3" => UTF8::REPLACEMENT_CHARACTER,
1811c6adce8SGreg Roach        "\xD4" => UTF8::REPLACEMENT_CHARACTER,
1821c6adce8SGreg Roach        "\xD5" => UTF8::REPLACEMENT_CHARACTER,
1831c6adce8SGreg Roach        "\xD6" => UTF8::REPLACEMENT_CHARACTER,
1841c6adce8SGreg Roach        "\xD7" => UTF8::REPLACEMENT_CHARACTER,
1851c6adce8SGreg Roach        "\xD8" => UTF8::REPLACEMENT_CHARACTER,
1861c6adce8SGreg Roach        "\xD9" => UTF8::REPLACEMENT_CHARACTER,
1871c6adce8SGreg Roach        "\xDA" => UTF8::REPLACEMENT_CHARACTER,
1881c6adce8SGreg Roach        "\xDB" => UTF8::REPLACEMENT_CHARACTER,
1891c6adce8SGreg Roach        "\xDC" => UTF8::REPLACEMENT_CHARACTER,
1901c6adce8SGreg Roach        "\xDD" => UTF8::REPLACEMENT_CHARACTER,
1911c6adce8SGreg Roach        "\xDE" => UTF8::REPLACEMENT_CHARACTER,
1921c6adce8SGreg Roach        "\xDF" => UTF8::REPLACEMENT_CHARACTER,
1931c6adce8SGreg Roach        "\xE0" => UTF8::COMBINING_HOOK_ABOVE,
1941c6adce8SGreg Roach        "\xE1" => UTF8::COMBINING_GRAVE_ACCENT,
1951c6adce8SGreg Roach        "\xE2" => UTF8::COMBINING_ACUTE_ACCENT,
1961c6adce8SGreg Roach        "\xE3" => UTF8::COMBINING_CIRCUMFLEX_ACCENT,
1971c6adce8SGreg Roach        "\xE4" => UTF8::COMBINING_TILDE,
1981c6adce8SGreg Roach        "\xE5" => UTF8::COMBINING_MACRON,
1991c6adce8SGreg Roach        "\xE6" => UTF8::COMBINING_BREVE,
2001c6adce8SGreg Roach        "\xE7" => UTF8::COMBINING_DOT_ABOVE,
2011c6adce8SGreg Roach        "\xE8" => UTF8::COMBINING_DIAERESIS,
2021c6adce8SGreg Roach        "\xE9" => UTF8::COMBINING_CARON,
2031c6adce8SGreg Roach        "\xEA" => UTF8::COMBINING_RING_ABOVE,
2041c6adce8SGreg Roach        "\xEB" => UTF8::COMBINING_DOUBLE_INVERTED_BREVE,
2051c6adce8SGreg Roach        "\xEC" => '',
2061c6adce8SGreg Roach        "\xED" => UTF8::COMBINING_COMMA_ABOVE_RIGHT,
2071c6adce8SGreg Roach        "\xEE" => UTF8::COMBINING_DOUBLE_ACUTE_ACCENT,
2081c6adce8SGreg Roach        "\xEF" => UTF8::COMBINING_CANDRABINDU,
2091c6adce8SGreg Roach        "\xF0" => UTF8::COMBINING_CEDILLA,
2101c6adce8SGreg Roach        "\xF1" => UTF8::COMBINING_OGONEK,
2111c6adce8SGreg Roach        "\xF2" => UTF8::COMBINING_DOT_BELOW,
2121c6adce8SGreg Roach        "\xF3" => UTF8::COMBINING_DIAERESIS_BELOW,
2131c6adce8SGreg Roach        "\xF4" => UTF8::COMBINING_RING_BELOW,
2141c6adce8SGreg Roach        "\xF5" => UTF8::COMBINING_DOUBLE_LOW_LINE,
2151c6adce8SGreg Roach        "\xF6" => UTF8::COMBINING_LOW_LINE,
2161c6adce8SGreg Roach        "\xF7" => UTF8::COMBINING_COMMA_BELOW,
2171c6adce8SGreg Roach        "\xF8" => UTF8::COMBINING_LEFT_HALF_RING_BELOW,
2181c6adce8SGreg Roach        "\xF9" => UTF8::COMBINING_BREVE_BELOW,
2191c6adce8SGreg Roach        "\xFA" => UTF8::COMBINING_DOUBLE_TILDE,
2201c6adce8SGreg Roach        "\xFB" => '',
2211c6adce8SGreg Roach        "\xFC" => UTF8::REPLACEMENT_CHARACTER,
2221c6adce8SGreg Roach        "\xFD" => UTF8::REPLACEMENT_CHARACTER,
2231c6adce8SGreg Roach        "\xFE" => UTF8::COMBINING_COMMA_ABOVE,
2241c6adce8SGreg Roach        "\xFF" => UTF8::COMBINING_LONG_SOLIDUS_OVERLAY,
2251c6adce8SGreg Roach    ];
2261c6adce8SGreg Roach
2271c6adce8SGreg Roach    // The subset of pre-composed UTF8 characters that can be made from ANSEL characters.
2281c6adce8SGreg Roach    private const PRECOMPOSED_CHARACTERS = [
2291c6adce8SGreg Roach        'A' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_A_WITH_ACUTE,
2301c6adce8SGreg Roach        'A' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE,
2311c6adce8SGreg Roach        'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_ACUTE_ACCENT              => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_ACUTE,
2321c6adce8SGreg Roach        'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_DOT_BELOW                 => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_DOT_BELOW,
2331c6adce8SGreg Roach        'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_GRAVE_ACCENT              => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_GRAVE,
2341c6adce8SGreg Roach        'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_HOOK_ABOVE                => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_HOOK_ABOVE,
2351c6adce8SGreg Roach        'A' . UTF8::COMBINING_BREVE . UTF8::COMBINING_TILDE                     => UTF8::LATIN_CAPITAL_LETTER_A_WITH_BREVE_AND_TILDE,
2361c6adce8SGreg Roach        'A' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CARON,
2371c6adce8SGreg Roach        'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX,
2381c6adce8SGreg Roach        'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT  => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_ACUTE,
2391c6adce8SGreg Roach        'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW     => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_DOT_BELOW,
2401c6adce8SGreg Roach        'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT  => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_GRAVE,
2411c6adce8SGreg Roach        'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE    => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_HOOK_ABOVE,
2421c6adce8SGreg Roach        'A' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE         => UTF8::LATIN_CAPITAL_LETTER_A_WITH_CIRCUMFLEX_AND_TILDE,
2431c6adce8SGreg Roach        'A' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS,
2441c6adce8SGreg Roach        'A' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON                => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DIAERESIS_AND_MACRON,
2451c6adce8SGreg Roach        'A' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE,
2461c6adce8SGreg Roach        'A' . UTF8::COMBINING_DOT_ABOVE . UTF8::COMBINING_MACRON                => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DOT_ABOVE_AND_MACRON,
2471c6adce8SGreg Roach        'A' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_A_WITH_DOT_BELOW,
2481c6adce8SGreg Roach        'A' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_A_WITH_GRAVE,
2491c6adce8SGreg Roach        'A' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_CAPITAL_LETTER_A_WITH_HOOK_ABOVE,
2501c6adce8SGreg Roach        'A' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_CAPITAL_LETTER_A_WITH_MACRON,
2511c6adce8SGreg Roach        'A' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_CAPITAL_LETTER_A_WITH_OGONEK,
2521c6adce8SGreg Roach        'A' . UTF8::COMBINING_RING_ABOVE                                        => UTF8::LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE,
2531c6adce8SGreg Roach        'A' . UTF8::COMBINING_RING_ABOVE . UTF8::COMBINING_ACUTE_ACCENT         => UTF8::LATIN_CAPITAL_LETTER_A_WITH_RING_ABOVE_AND_ACUTE,
2541c6adce8SGreg Roach        'A' . UTF8::COMBINING_RING_BELOW                                        => UTF8::LATIN_CAPITAL_LETTER_A_WITH_RING_BELOW,
2551c6adce8SGreg Roach        'A' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_CAPITAL_LETTER_A_WITH_TILDE,
2561c6adce8SGreg Roach        'B' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_B_WITH_DOT_ABOVE,
2571c6adce8SGreg Roach        'B' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_B_WITH_DOT_BELOW,
2581c6adce8SGreg Roach        'C' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_C_WITH_ACUTE,
2591c6adce8SGreg Roach        'C' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_C_WITH_CARON,
2601c6adce8SGreg Roach        'C' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_C_WITH_CEDILLA,
2611c6adce8SGreg Roach        'C' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_C_WITH_CIRCUMFLEX,
2621c6adce8SGreg Roach        'C' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_C_WITH_DOT_ABOVE,
2631c6adce8SGreg Roach        'C' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_C_WITH_CEDILLA_AND_ACUTE,
2641c6adce8SGreg Roach        'D' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_D_WITH_CARON,
2651c6adce8SGreg Roach        'D' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_D_WITH_CEDILLA,
2661c6adce8SGreg Roach        'D' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_D_WITH_DOT_ABOVE,
2671c6adce8SGreg Roach        'D' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_D_WITH_DOT_BELOW,
2681c6adce8SGreg Roach        'E' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_E_WITH_ACUTE,
2691c6adce8SGreg Roach        'E' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_CAPITAL_LETTER_E_WITH_BREVE,
2701c6adce8SGreg Roach        'E' . UTF8::COMBINING_BREVE . UTF8::COMBINING_CEDILLA                   => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CEDILLA_AND_BREVE,
2711c6adce8SGreg Roach        'E' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CARON,
2721c6adce8SGreg Roach        'E' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CEDILLA,
2731c6adce8SGreg Roach        'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX,
2741c6adce8SGreg Roach        'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT  => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_ACUTE,
2751c6adce8SGreg Roach        'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW     => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_DOT_BELOW,
2761c6adce8SGreg Roach        'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT  => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_GRAVE,
2771c6adce8SGreg Roach        'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE    => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_HOOK_ABOVE,
2781c6adce8SGreg Roach        'E' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE         => UTF8::LATIN_CAPITAL_LETTER_E_WITH_CIRCUMFLEX_AND_TILDE,
2791c6adce8SGreg Roach        'E' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_E_WITH_DIAERESIS,
2801c6adce8SGreg Roach        'E' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_E_WITH_DOT_ABOVE,
2811c6adce8SGreg Roach        'E' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_E_WITH_DOT_BELOW,
2821c6adce8SGreg Roach        'E' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_E_WITH_GRAVE,
2831c6adce8SGreg Roach        'E' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_CAPITAL_LETTER_E_WITH_HOOK_ABOVE,
2841c6adce8SGreg Roach        'E' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_CAPITAL_LETTER_E_WITH_MACRON,
2851c6adce8SGreg Roach        'E' . UTF8::COMBINING_MACRON . UTF8::COMBINING_ACUTE_ACCENT             => UTF8::LATIN_CAPITAL_LETTER_E_WITH_MACRON_AND_ACUTE,
2861c6adce8SGreg Roach        'E' . UTF8::COMBINING_MACRON . UTF8::COMBINING_GRAVE_ACCENT             => UTF8::LATIN_CAPITAL_LETTER_E_WITH_MACRON_AND_GRAVE,
2871c6adce8SGreg Roach        'E' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_CAPITAL_LETTER_E_WITH_OGONEK,
2881c6adce8SGreg Roach        'E' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_CAPITAL_LETTER_E_WITH_TILDE,
2891c6adce8SGreg Roach        'F' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_F_WITH_DOT_ABOVE,
2901c6adce8SGreg Roach        'G' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_G_WITH_ACUTE,
2911c6adce8SGreg Roach        'G' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_CAPITAL_LETTER_G_WITH_BREVE,
2921c6adce8SGreg Roach        'G' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_G_WITH_CARON,
2931c6adce8SGreg Roach        'G' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_G_WITH_CEDILLA,
2941c6adce8SGreg Roach        'G' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_G_WITH_CIRCUMFLEX,
2951c6adce8SGreg Roach        'G' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_G_WITH_DOT_ABOVE,
2961c6adce8SGreg Roach        'G' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_CAPITAL_LETTER_G_WITH_MACRON,
2971c6adce8SGreg Roach        'H' . UTF8::COMBINING_BREVE_BELOW                                       => UTF8::LATIN_CAPITAL_LETTER_H_WITH_BREVE_BELOW,
2981c6adce8SGreg Roach        'H' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_H_WITH_CARON,
2991c6adce8SGreg Roach        'H' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_H_WITH_CEDILLA,
3001c6adce8SGreg Roach        'H' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_H_WITH_CIRCUMFLEX,
3011c6adce8SGreg Roach        'H' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_H_WITH_DIAERESIS,
3021c6adce8SGreg Roach        'H' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_H_WITH_DOT_ABOVE,
3031c6adce8SGreg Roach        'H' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_H_WITH_DOT_BELOW,
3041c6adce8SGreg Roach        'I' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_I_WITH_ACUTE,
3051c6adce8SGreg Roach        'I' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_CAPITAL_LETTER_I_WITH_BREVE,
3061c6adce8SGreg Roach        'I' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_I_WITH_CARON,
3071c6adce8SGreg Roach        'I' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_I_WITH_CIRCUMFLEX,
3081c6adce8SGreg Roach        'I' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS,
3091c6adce8SGreg Roach        'I' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_ACUTE_ACCENT          => UTF8::LATIN_CAPITAL_LETTER_I_WITH_DIAERESIS_AND_ACUTE,
3101c6adce8SGreg Roach        'I' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE,
3111c6adce8SGreg Roach        'I' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_I_WITH_DOT_BELOW,
3121c6adce8SGreg Roach        'I' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_I_WITH_GRAVE,
3131c6adce8SGreg Roach        'I' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_CAPITAL_LETTER_I_WITH_HOOK_ABOVE,
3141c6adce8SGreg Roach        'I' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_CAPITAL_LETTER_I_WITH_MACRON,
3151c6adce8SGreg Roach        'I' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_CAPITAL_LETTER_I_WITH_OGONEK,
3161c6adce8SGreg Roach        'I' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_CAPITAL_LETTER_I_WITH_TILDE,
3171c6adce8SGreg Roach        'J' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_J_WITH_CIRCUMFLEX,
3181c6adce8SGreg Roach        'K' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_K_WITH_CARON,
3191c6adce8SGreg Roach        'K' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_K_WITH_CEDILLA,
3201c6adce8SGreg Roach        'K' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_K_WITH_ACUTE,
3211c6adce8SGreg Roach        'K' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_K_WITH_DOT_BELOW,
3221c6adce8SGreg Roach        'L' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_L_WITH_ACUTE,
3231c6adce8SGreg Roach        'L' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_L_WITH_CARON,
3241c6adce8SGreg Roach        'L' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_L_WITH_CEDILLA,
3251c6adce8SGreg Roach        'L' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_L_WITH_DOT_BELOW,
3261c6adce8SGreg Roach        'L' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_MACRON                => UTF8::LATIN_CAPITAL_LETTER_L_WITH_DOT_BELOW_AND_MACRON,
3271c6adce8SGreg Roach        'M' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_M_WITH_ACUTE,
3281c6adce8SGreg Roach        'M' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_M_WITH_DOT_ABOVE,
3291c6adce8SGreg Roach        'M' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_M_WITH_DOT_BELOW,
3301c6adce8SGreg Roach        'N' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_N_WITH_ACUTE,
3311c6adce8SGreg Roach        'N' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_N_WITH_CARON,
3321c6adce8SGreg Roach        'N' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_N_WITH_CEDILLA,
3331c6adce8SGreg Roach        'N' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_N_WITH_DOT_ABOVE,
3341c6adce8SGreg Roach        'N' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_N_WITH_DOT_BELOW,
3351c6adce8SGreg Roach        'N' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_N_WITH_GRAVE,
3361c6adce8SGreg Roach        'N' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_CAPITAL_LETTER_N_WITH_TILDE,
3371c6adce8SGreg Roach        'O' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_O_WITH_ACUTE,
3381c6adce8SGreg Roach        'O' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_CAPITAL_LETTER_O_WITH_BREVE,
3391c6adce8SGreg Roach        'O' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CARON,
3401c6adce8SGreg Roach        'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX,
3411c6adce8SGreg Roach        'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT  => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_ACUTE,
3421c6adce8SGreg Roach        'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW     => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_DOT_BELOW,
3431c6adce8SGreg Roach        'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT  => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_GRAVE,
3441c6adce8SGreg Roach        'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE    => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_HOOK_ABOVE,
3451c6adce8SGreg Roach        'O' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE         => UTF8::LATIN_CAPITAL_LETTER_O_WITH_CIRCUMFLEX_AND_TILDE,
3461c6adce8SGreg Roach        'O' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS,
3471c6adce8SGreg Roach        'O' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON                => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DIAERESIS_AND_MACRON,
3481c6adce8SGreg Roach        'O' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE,
3491c6adce8SGreg Roach        'O' . UTF8::COMBINING_DOT_ABOVE . UTF8::COMBINING_MACRON                => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DOT_ABOVE_AND_MACRON,
3501c6adce8SGreg Roach        'O' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DOT_BELOW,
3511c6adce8SGreg Roach        'O' . UTF8::COMBINING_DOUBLE_ACUTE_ACCENT                               => UTF8::LATIN_CAPITAL_LETTER_O_WITH_DOUBLE_ACUTE,
3521c6adce8SGreg Roach        'O' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_O_WITH_GRAVE,
3531c6adce8SGreg Roach        'O' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_CAPITAL_LETTER_O_WITH_HOOK_ABOVE,
3541c6adce8SGreg Roach        'O' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_CAPITAL_LETTER_O_WITH_MACRON,
3551c6adce8SGreg Roach        'O' . UTF8::COMBINING_MACRON . UTF8::COMBINING_ACUTE_ACCENT             => UTF8::LATIN_CAPITAL_LETTER_O_WITH_MACRON_AND_ACUTE,
3561c6adce8SGreg Roach        'O' . UTF8::COMBINING_MACRON . UTF8::COMBINING_GRAVE_ACCENT             => UTF8::LATIN_CAPITAL_LETTER_O_WITH_MACRON_AND_GRAVE,
3571c6adce8SGreg Roach        'O' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_CAPITAL_LETTER_O_WITH_OGONEK,
3581c6adce8SGreg Roach        'O' . UTF8::COMBINING_OGONEK . UTF8::COMBINING_MACRON                   => UTF8::LATIN_CAPITAL_LETTER_O_WITH_OGONEK_AND_MACRON,
3591c6adce8SGreg Roach        'O' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_CAPITAL_LETTER_O_WITH_TILDE,
3601c6adce8SGreg Roach        'O' . UTF8::COMBINING_TILDE . UTF8::COMBINING_ACUTE_ACCENT              => UTF8::LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_ACUTE,
3611c6adce8SGreg Roach        'O' . UTF8::COMBINING_TILDE . UTF8::COMBINING_DIAERESIS                 => UTF8::LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_DIAERESIS,
3621c6adce8SGreg Roach        'O' . UTF8::COMBINING_TILDE . UTF8::COMBINING_MACRON                    => UTF8::LATIN_CAPITAL_LETTER_O_WITH_TILDE_AND_MACRON,
3631c6adce8SGreg Roach        'P' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_P_WITH_ACUTE,
3641c6adce8SGreg Roach        'P' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_P_WITH_DOT_ABOVE,
3651c6adce8SGreg Roach        'R' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_R_WITH_ACUTE,
3661c6adce8SGreg Roach        'R' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_R_WITH_CARON,
3671c6adce8SGreg Roach        'R' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_R_WITH_CEDILLA,
3681c6adce8SGreg Roach        'R' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_R_WITH_DOT_ABOVE,
3691c6adce8SGreg Roach        'R' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_R_WITH_DOT_BELOW,
3701c6adce8SGreg Roach        'R' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_MACRON                => UTF8::LATIN_CAPITAL_LETTER_R_WITH_DOT_BELOW_AND_MACRON,
3711c6adce8SGreg Roach        'S' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_S_WITH_ACUTE,
3721c6adce8SGreg Roach        'S' . UTF8::COMBINING_ACUTE_ACCENT . UTF8::COMBINING_DOT_ABOVE          => UTF8::LATIN_CAPITAL_LETTER_S_WITH_ACUTE_AND_DOT_ABOVE,
3731c6adce8SGreg Roach        'S' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_S_WITH_CARON,
3741c6adce8SGreg Roach        'S' . UTF8::COMBINING_CARON . UTF8::COMBINING_DOT_ABOVE                 => UTF8::LATIN_CAPITAL_LETTER_S_WITH_CARON_AND_DOT_ABOVE,
3751c6adce8SGreg Roach        'S' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_S_WITH_CEDILLA,
3761c6adce8SGreg Roach        'S' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_S_WITH_CIRCUMFLEX,
3771c6adce8SGreg Roach        'S' . UTF8::COMBINING_COMMA_BELOW                                       => UTF8::LATIN_CAPITAL_LETTER_S_WITH_COMMA_BELOW,
3781c6adce8SGreg Roach        'S' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_S_WITH_DOT_ABOVE,
3791c6adce8SGreg Roach        'S' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_S_WITH_DOT_BELOW,
3801c6adce8SGreg Roach        'S' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_DOT_ABOVE             => UTF8::LATIN_CAPITAL_LETTER_S_WITH_DOT_BELOW_AND_DOT_ABOVE,
3811c6adce8SGreg Roach        'T' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_T_WITH_CARON,
3821c6adce8SGreg Roach        'T' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_CAPITAL_LETTER_T_WITH_CEDILLA,
3831c6adce8SGreg Roach        'T' . UTF8::COMBINING_COMMA_BELOW                                       => UTF8::LATIN_CAPITAL_LETTER_T_WITH_COMMA_BELOW,
3841c6adce8SGreg Roach        'T' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_T_WITH_DOT_ABOVE,
3851c6adce8SGreg Roach        'T' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_T_WITH_DOT_BELOW,
3861c6adce8SGreg Roach        'U' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_U_WITH_ACUTE,
3871c6adce8SGreg Roach        'U' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_CAPITAL_LETTER_U_WITH_BREVE,
3881c6adce8SGreg Roach        'U' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_U_WITH_CARON,
3891c6adce8SGreg Roach        'U' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_U_WITH_CIRCUMFLEX,
3901c6adce8SGreg Roach        'U' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS,
3911c6adce8SGreg Roach        'U' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_ACUTE_ACCENT          => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_ACUTE,
3921c6adce8SGreg Roach        'U' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_CARON                 => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_CARON,
3931c6adce8SGreg Roach        'U' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_GRAVE_ACCENT          => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_GRAVE,
3941c6adce8SGreg Roach        'U' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON                => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_AND_MACRON,
3951c6adce8SGreg Roach        'U' . UTF8::COMBINING_DIAERESIS_BELOW                                   => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DIAERESIS_BELOW,
3961c6adce8SGreg Roach        'U' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DOT_BELOW,
3971c6adce8SGreg Roach        'U' . UTF8::COMBINING_DOUBLE_ACUTE_ACCENT                               => UTF8::LATIN_CAPITAL_LETTER_U_WITH_DOUBLE_ACUTE,
3981c6adce8SGreg Roach        'U' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_U_WITH_GRAVE,
3991c6adce8SGreg Roach        'U' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_CAPITAL_LETTER_U_WITH_HOOK_ABOVE,
4001c6adce8SGreg Roach        'U' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_CAPITAL_LETTER_U_WITH_MACRON,
4011c6adce8SGreg Roach        'U' . UTF8::COMBINING_MACRON . UTF8::COMBINING_DIAERESIS                => UTF8::LATIN_CAPITAL_LETTER_U_WITH_MACRON_AND_DIAERESIS,
4021c6adce8SGreg Roach        'U' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_CAPITAL_LETTER_U_WITH_OGONEK,
4031c6adce8SGreg Roach        'U' . UTF8::COMBINING_RING_ABOVE                                        => UTF8::LATIN_CAPITAL_LETTER_U_WITH_RING_ABOVE,
4041c6adce8SGreg Roach        'U' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_CAPITAL_LETTER_U_WITH_TILDE,
4051c6adce8SGreg Roach        'U' . UTF8::COMBINING_TILDE . UTF8::COMBINING_ACUTE_ACCENT              => UTF8::LATIN_CAPITAL_LETTER_U_WITH_TILDE_AND_ACUTE,
4061c6adce8SGreg Roach        'V' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_V_WITH_DOT_BELOW,
4071c6adce8SGreg Roach        'V' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_CAPITAL_LETTER_V_WITH_TILDE,
4081c6adce8SGreg Roach        'W' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_W_WITH_ACUTE,
4091c6adce8SGreg Roach        'W' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_W_WITH_CIRCUMFLEX,
4101c6adce8SGreg Roach        'W' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_W_WITH_DIAERESIS,
4111c6adce8SGreg Roach        'W' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_W_WITH_DOT_ABOVE,
4121c6adce8SGreg Roach        'W' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_W_WITH_DOT_BELOW,
4131c6adce8SGreg Roach        'W' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_W_WITH_GRAVE,
4141c6adce8SGreg Roach        'X' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_X_WITH_DIAERESIS,
4151c6adce8SGreg Roach        'X' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_X_WITH_DOT_ABOVE,
4161c6adce8SGreg Roach        'Y' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_ACUTE,
4171c6adce8SGreg Roach        'Y' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_CIRCUMFLEX,
4181c6adce8SGreg Roach        'Y' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_DIAERESIS,
4191c6adce8SGreg Roach        'Y' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_DOT_ABOVE,
4201c6adce8SGreg Roach        'Y' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_DOT_BELOW,
4211c6adce8SGreg Roach        'Y' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_GRAVE,
4221c6adce8SGreg Roach        'Y' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_HOOK_ABOVE,
4231c6adce8SGreg Roach        'Y' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_MACRON,
4241c6adce8SGreg Roach        'Y' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_CAPITAL_LETTER_Y_WITH_TILDE,
4251c6adce8SGreg Roach        'Z' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_ACUTE,
4261c6adce8SGreg Roach        'Z' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_CARON,
4271c6adce8SGreg Roach        'Z' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_CIRCUMFLEX,
4281c6adce8SGreg Roach        'Z' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_DOT_ABOVE,
4291c6adce8SGreg Roach        'Z' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_CAPITAL_LETTER_Z_WITH_DOT_BELOW,
4301c6adce8SGreg Roach        'a' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_A_WITH_ACUTE,
4311c6adce8SGreg Roach        'a' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE,
4321c6adce8SGreg Roach        'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_ACUTE_ACCENT              => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_ACUTE,
4331c6adce8SGreg Roach        'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_DOT_BELOW                 => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_DOT_BELOW,
4341c6adce8SGreg Roach        'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_GRAVE_ACCENT              => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_GRAVE,
4351c6adce8SGreg Roach        'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_HOOK_ABOVE                => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_HOOK_ABOVE,
4361c6adce8SGreg Roach        'a' . UTF8::COMBINING_BREVE . UTF8::COMBINING_TILDE                     => UTF8::LATIN_SMALL_LETTER_A_WITH_BREVE_AND_TILDE,
4371c6adce8SGreg Roach        'a' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_A_WITH_CARON,
4381c6adce8SGreg Roach        'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX,
4391c6adce8SGreg Roach        'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT  => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_ACUTE,
4401c6adce8SGreg Roach        'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW     => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_DOT_BELOW,
4411c6adce8SGreg Roach        'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT  => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_GRAVE,
4421c6adce8SGreg Roach        'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE    => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_HOOK_ABOVE,
4431c6adce8SGreg Roach        'a' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE         => UTF8::LATIN_SMALL_LETTER_A_WITH_CIRCUMFLEX_AND_TILDE,
4441c6adce8SGreg Roach        'a' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_A_WITH_DIAERESIS,
4451c6adce8SGreg Roach        'a' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON                => UTF8::LATIN_SMALL_LETTER_A_WITH_DIAERESIS_AND_MACRON,
4461c6adce8SGreg Roach        'a' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE,
4471c6adce8SGreg Roach        'a' . UTF8::COMBINING_DOT_ABOVE . UTF8::COMBINING_MACRON                => UTF8::LATIN_SMALL_LETTER_A_WITH_DOT_ABOVE_AND_MACRON,
4481c6adce8SGreg Roach        'a' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_A_WITH_DOT_BELOW,
4491c6adce8SGreg Roach        'a' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_A_WITH_GRAVE,
4501c6adce8SGreg Roach        'a' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_A_WITH_HOOK_ABOVE,
4511c6adce8SGreg Roach        'a' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_SMALL_LETTER_A_WITH_MACRON,
4521c6adce8SGreg Roach        'a' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_SMALL_LETTER_A_WITH_OGONEK,
4531c6adce8SGreg Roach        'a' . UTF8::COMBINING_RING_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_A_WITH_RING_ABOVE,
4541c6adce8SGreg Roach        'a' . UTF8::COMBINING_RING_ABOVE . UTF8::COMBINING_ACUTE_ACCENT         => UTF8::LATIN_SMALL_LETTER_A_WITH_RING_ABOVE_AND_ACUTE,
4551c6adce8SGreg Roach        'a' . UTF8::COMBINING_RING_BELOW                                        => UTF8::LATIN_SMALL_LETTER_A_WITH_RING_BELOW,
4561c6adce8SGreg Roach        'a' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_SMALL_LETTER_A_WITH_TILDE,
4571c6adce8SGreg Roach        'b' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_B_WITH_DOT_ABOVE,
4581c6adce8SGreg Roach        'b' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_B_WITH_DOT_BELOW,
4591c6adce8SGreg Roach        'c' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_C_WITH_ACUTE,
4601c6adce8SGreg Roach        'c' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_C_WITH_CARON,
4611c6adce8SGreg Roach        'c' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_C_WITH_CEDILLA,
4621c6adce8SGreg Roach        'c' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_C_WITH_CIRCUMFLEX,
4631c6adce8SGreg Roach        'c' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_C_WITH_DOT_ABOVE,
4641c6adce8SGreg Roach        'c' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_C_WITH_CEDILLA_AND_ACUTE,
4651c6adce8SGreg Roach        'd' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_D_WITH_CARON,
4661c6adce8SGreg Roach        'd' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_D_WITH_CEDILLA,
4671c6adce8SGreg Roach        'd' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_D_WITH_DOT_ABOVE,
4681c6adce8SGreg Roach        'd' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_D_WITH_DOT_BELOW,
4691c6adce8SGreg Roach        'e' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_E_WITH_ACUTE,
4701c6adce8SGreg Roach        'e' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_SMALL_LETTER_E_WITH_BREVE,
4711c6adce8SGreg Roach        'e' . UTF8::COMBINING_BREVE . UTF8::COMBINING_CEDILLA                   => UTF8::LATIN_SMALL_LETTER_E_WITH_CEDILLA_AND_BREVE,
4721c6adce8SGreg Roach        'e' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_E_WITH_CARON,
4731c6adce8SGreg Roach        'e' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_E_WITH_CEDILLA,
4741c6adce8SGreg Roach        'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX,
4751c6adce8SGreg Roach        'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT  => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_ACUTE,
4761c6adce8SGreg Roach        'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW     => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_DOT_BELOW,
4771c6adce8SGreg Roach        'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT  => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_GRAVE,
4781c6adce8SGreg Roach        'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE    => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_HOOK_ABOVE,
4791c6adce8SGreg Roach        'e' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE         => UTF8::LATIN_SMALL_LETTER_E_WITH_CIRCUMFLEX_AND_TILDE,
4801c6adce8SGreg Roach        'e' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_E_WITH_DIAERESIS,
4811c6adce8SGreg Roach        'e' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_E_WITH_DOT_ABOVE,
4821c6adce8SGreg Roach        'e' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_E_WITH_DOT_BELOW,
4831c6adce8SGreg Roach        'e' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_E_WITH_GRAVE,
4841c6adce8SGreg Roach        'e' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_E_WITH_HOOK_ABOVE,
4851c6adce8SGreg Roach        'e' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_SMALL_LETTER_E_WITH_MACRON,
4861c6adce8SGreg Roach        'e' . UTF8::COMBINING_MACRON . UTF8::COMBINING_ACUTE_ACCENT             => UTF8::LATIN_SMALL_LETTER_E_WITH_MACRON_AND_ACUTE,
4871c6adce8SGreg Roach        'e' . UTF8::COMBINING_MACRON . UTF8::COMBINING_GRAVE_ACCENT             => UTF8::LATIN_SMALL_LETTER_E_WITH_MACRON_AND_GRAVE,
4881c6adce8SGreg Roach        'e' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_SMALL_LETTER_E_WITH_OGONEK,
4891c6adce8SGreg Roach        'e' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_SMALL_LETTER_E_WITH_TILDE,
4901c6adce8SGreg Roach        'f' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_F_WITH_DOT_ABOVE,
4911c6adce8SGreg Roach        'g' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_G_WITH_ACUTE,
4921c6adce8SGreg Roach        'g' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_SMALL_LETTER_G_WITH_BREVE,
4931c6adce8SGreg Roach        'g' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_G_WITH_CARON,
4941c6adce8SGreg Roach        'g' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_G_WITH_CEDILLA,
4951c6adce8SGreg Roach        'g' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_G_WITH_CIRCUMFLEX,
4961c6adce8SGreg Roach        'g' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_G_WITH_DOT_ABOVE,
4971c6adce8SGreg Roach        'g' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_SMALL_LETTER_G_WITH_MACRON,
4981c6adce8SGreg Roach        'h' . UTF8::COMBINING_BREVE_BELOW                                       => UTF8::LATIN_SMALL_LETTER_H_WITH_BREVE_BELOW,
4991c6adce8SGreg Roach        'h' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_H_WITH_CARON,
5001c6adce8SGreg Roach        'h' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_H_WITH_CEDILLA,
5011c6adce8SGreg Roach        'h' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_H_WITH_CIRCUMFLEX,
5021c6adce8SGreg Roach        'h' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_H_WITH_DIAERESIS,
5031c6adce8SGreg Roach        'h' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_H_WITH_DOT_ABOVE,
5041c6adce8SGreg Roach        'h' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_H_WITH_DOT_BELOW,
5051c6adce8SGreg Roach        'i' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_I_WITH_ACUTE,
5061c6adce8SGreg Roach        'i' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_SMALL_LETTER_I_WITH_BREVE,
5071c6adce8SGreg Roach        'i' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_I_WITH_CARON,
5081c6adce8SGreg Roach        'i' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_I_WITH_CIRCUMFLEX,
5091c6adce8SGreg Roach        'i' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_I_WITH_DIAERESIS,
5101c6adce8SGreg Roach        'i' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_ACUTE_ACCENT          => UTF8::LATIN_SMALL_LETTER_I_WITH_DIAERESIS_AND_ACUTE,
5111c6adce8SGreg Roach        'i' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_I_WITH_DOT_BELOW,
5121c6adce8SGreg Roach        'i' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_I_WITH_GRAVE,
5131c6adce8SGreg Roach        'i' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_I_WITH_HOOK_ABOVE,
5141c6adce8SGreg Roach        'i' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_SMALL_LETTER_I_WITH_MACRON,
5151c6adce8SGreg Roach        'i' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_SMALL_LETTER_I_WITH_OGONEK,
5161c6adce8SGreg Roach        'i' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_SMALL_LETTER_I_WITH_TILDE,
5171c6adce8SGreg Roach        'j' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_J_WITH_CARON,
5181c6adce8SGreg Roach        'j' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_J_WITH_CIRCUMFLEX,
5191c6adce8SGreg Roach        'k' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_K_WITH_CARON,
5201c6adce8SGreg Roach        'k' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_K_WITH_CEDILLA,
5211c6adce8SGreg Roach        'k' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_K_WITH_ACUTE,
5221c6adce8SGreg Roach        'k' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_K_WITH_DOT_BELOW,
5231c6adce8SGreg Roach        'l' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_L_WITH_ACUTE,
5241c6adce8SGreg Roach        'l' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_L_WITH_CARON,
5251c6adce8SGreg Roach        'l' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_L_WITH_CEDILLA,
5261c6adce8SGreg Roach        'l' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_L_WITH_DOT_BELOW,
5271c6adce8SGreg Roach        'l' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_MACRON                => UTF8::LATIN_SMALL_LETTER_L_WITH_DOT_BELOW_AND_MACRON,
5281c6adce8SGreg Roach        'm' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_M_WITH_ACUTE,
5291c6adce8SGreg Roach        'm' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_M_WITH_DOT_ABOVE,
5301c6adce8SGreg Roach        'm' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_M_WITH_DOT_BELOW,
5311c6adce8SGreg Roach        'n' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_N_WITH_ACUTE,
5321c6adce8SGreg Roach        'n' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_N_WITH_CARON,
5331c6adce8SGreg Roach        'n' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_N_WITH_CEDILLA,
5341c6adce8SGreg Roach        'n' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_N_WITH_DOT_ABOVE,
5351c6adce8SGreg Roach        'n' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_N_WITH_DOT_BELOW,
5361c6adce8SGreg Roach        'n' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_N_WITH_GRAVE,
5371c6adce8SGreg Roach        'n' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_SMALL_LETTER_N_WITH_TILDE,
5381c6adce8SGreg Roach        'o' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_O_WITH_ACUTE,
5391c6adce8SGreg Roach        'o' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_SMALL_LETTER_O_WITH_BREVE,
5401c6adce8SGreg Roach        'o' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_O_WITH_CARON,
5411c6adce8SGreg Roach        'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX,
5421c6adce8SGreg Roach        'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_ACUTE_ACCENT  => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_ACUTE,
5431c6adce8SGreg Roach        'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_DOT_BELOW     => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_DOT_BELOW,
5441c6adce8SGreg Roach        'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_GRAVE_ACCENT  => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_GRAVE,
5451c6adce8SGreg Roach        'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_HOOK_ABOVE    => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_HOOK_ABOVE,
5461c6adce8SGreg Roach        'o' . UTF8::COMBINING_CIRCUMFLEX_ACCENT . UTF8::COMBINING_TILDE         => UTF8::LATIN_SMALL_LETTER_O_WITH_CIRCUMFLEX_AND_TILDE,
5471c6adce8SGreg Roach        'o' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_O_WITH_DIAERESIS,
5481c6adce8SGreg Roach        'o' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON                => UTF8::LATIN_SMALL_LETTER_O_WITH_DIAERESIS_AND_MACRON,
5491c6adce8SGreg Roach        'o' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE,
5501c6adce8SGreg Roach        'o' . UTF8::COMBINING_DOT_ABOVE . UTF8::COMBINING_MACRON                => UTF8::LATIN_SMALL_LETTER_O_WITH_DOT_ABOVE_AND_MACRON,
5511c6adce8SGreg Roach        'o' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_O_WITH_DOT_BELOW,
5521c6adce8SGreg Roach        'o' . UTF8::COMBINING_DOUBLE_ACUTE_ACCENT                               => UTF8::LATIN_SMALL_LETTER_O_WITH_DOUBLE_ACUTE,
5531c6adce8SGreg Roach        'o' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_O_WITH_GRAVE,
5541c6adce8SGreg Roach        'o' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_O_WITH_HOOK_ABOVE,
5551c6adce8SGreg Roach        'o' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_SMALL_LETTER_O_WITH_MACRON,
5561c6adce8SGreg Roach        'o' . UTF8::COMBINING_MACRON . UTF8::COMBINING_ACUTE_ACCENT             => UTF8::LATIN_SMALL_LETTER_O_WITH_MACRON_AND_ACUTE,
5571c6adce8SGreg Roach        'o' . UTF8::COMBINING_MACRON . UTF8::COMBINING_GRAVE_ACCENT             => UTF8::LATIN_SMALL_LETTER_O_WITH_MACRON_AND_GRAVE,
5581c6adce8SGreg Roach        'o' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_SMALL_LETTER_O_WITH_OGONEK,
5591c6adce8SGreg Roach        'o' . UTF8::COMBINING_OGONEK . UTF8::COMBINING_MACRON                   => UTF8::LATIN_SMALL_LETTER_O_WITH_OGONEK_AND_MACRON,
5601c6adce8SGreg Roach        'o' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_SMALL_LETTER_O_WITH_TILDE,
5611c6adce8SGreg Roach        'o' . UTF8::COMBINING_TILDE . UTF8::COMBINING_ACUTE_ACCENT              => UTF8::LATIN_SMALL_LETTER_O_WITH_TILDE_AND_ACUTE,
5621c6adce8SGreg Roach        'o' . UTF8::COMBINING_TILDE . UTF8::COMBINING_DIAERESIS                 => UTF8::LATIN_SMALL_LETTER_O_WITH_TILDE_AND_DIAERESIS,
5631c6adce8SGreg Roach        'o' . UTF8::COMBINING_TILDE . UTF8::COMBINING_MACRON                    => UTF8::LATIN_SMALL_LETTER_O_WITH_TILDE_AND_MACRON,
5641c6adce8SGreg Roach        'p' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_P_WITH_ACUTE,
5651c6adce8SGreg Roach        'p' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_P_WITH_DOT_ABOVE,
5661c6adce8SGreg Roach        'r' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_R_WITH_ACUTE,
5671c6adce8SGreg Roach        'r' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_R_WITH_CARON,
5681c6adce8SGreg Roach        'r' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_R_WITH_CEDILLA,
5691c6adce8SGreg Roach        'r' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_R_WITH_DOT_ABOVE,
5701c6adce8SGreg Roach        'r' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_R_WITH_DOT_BELOW,
5711c6adce8SGreg Roach        'r' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_MACRON                => UTF8::LATIN_SMALL_LETTER_R_WITH_DOT_BELOW_AND_MACRON,
5721c6adce8SGreg Roach        's' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_S_WITH_ACUTE,
5731c6adce8SGreg Roach        's' . UTF8::COMBINING_ACUTE_ACCENT . UTF8::COMBINING_DOT_ABOVE          => UTF8::LATIN_SMALL_LETTER_S_WITH_ACUTE_AND_DOT_ABOVE,
5741c6adce8SGreg Roach        's' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_S_WITH_CARON,
5751c6adce8SGreg Roach        's' . UTF8::COMBINING_CARON . UTF8::COMBINING_DOT_ABOVE                 => UTF8::LATIN_SMALL_LETTER_S_WITH_CARON_AND_DOT_ABOVE,
5761c6adce8SGreg Roach        's' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_S_WITH_CEDILLA,
5771c6adce8SGreg Roach        's' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_S_WITH_CIRCUMFLEX,
5781c6adce8SGreg Roach        's' . UTF8::COMBINING_COMMA_BELOW                                       => UTF8::LATIN_SMALL_LETTER_S_WITH_COMMA_BELOW,
5791c6adce8SGreg Roach        's' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_S_WITH_DOT_ABOVE,
5801c6adce8SGreg Roach        's' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_S_WITH_DOT_BELOW,
5811c6adce8SGreg Roach        's' . UTF8::COMBINING_DOT_BELOW . UTF8::COMBINING_DOT_ABOVE             => UTF8::LATIN_SMALL_LETTER_S_WITH_DOT_BELOW_AND_DOT_ABOVE,
5821c6adce8SGreg Roach        't' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_T_WITH_CARON,
5831c6adce8SGreg Roach        't' . UTF8::COMBINING_CEDILLA                                           => UTF8::LATIN_SMALL_LETTER_T_WITH_CEDILLA,
5841c6adce8SGreg Roach        't' . UTF8::COMBINING_COMMA_BELOW                                       => UTF8::LATIN_SMALL_LETTER_T_WITH_COMMA_BELOW,
5851c6adce8SGreg Roach        't' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_T_WITH_DIAERESIS,
5861c6adce8SGreg Roach        't' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_T_WITH_DOT_ABOVE,
5871c6adce8SGreg Roach        't' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_T_WITH_DOT_BELOW,
5881c6adce8SGreg Roach        'u' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_U_WITH_ACUTE,
5891c6adce8SGreg Roach        'u' . UTF8::COMBINING_BREVE                                             => UTF8::LATIN_SMALL_LETTER_U_WITH_BREVE,
5901c6adce8SGreg Roach        'u' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_U_WITH_CARON,
5911c6adce8SGreg Roach        'u' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_U_WITH_CIRCUMFLEX,
5921c6adce8SGreg Roach        'u' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS,
5931c6adce8SGreg Roach        'u' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_ACUTE_ACCENT          => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_ACUTE,
5941c6adce8SGreg Roach        'u' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_CARON                 => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_CARON,
5951c6adce8SGreg Roach        'u' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_GRAVE_ACCENT          => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_GRAVE,
5961c6adce8SGreg Roach        'u' . UTF8::COMBINING_DIAERESIS . UTF8::COMBINING_MACRON                => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_AND_MACRON,
5971c6adce8SGreg Roach        'u' . UTF8::COMBINING_DIAERESIS_BELOW                                   => UTF8::LATIN_SMALL_LETTER_U_WITH_DIAERESIS_BELOW,
5981c6adce8SGreg Roach        'u' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_U_WITH_DOT_BELOW,
5991c6adce8SGreg Roach        'u' . UTF8::COMBINING_DOUBLE_ACUTE_ACCENT                               => UTF8::LATIN_SMALL_LETTER_U_WITH_DOUBLE_ACUTE,
6001c6adce8SGreg Roach        'u' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_U_WITH_GRAVE,
6011c6adce8SGreg Roach        'u' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_U_WITH_HOOK_ABOVE,
6021c6adce8SGreg Roach        'u' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_SMALL_LETTER_U_WITH_MACRON,
6031c6adce8SGreg Roach        'u' . UTF8::COMBINING_MACRON . UTF8::COMBINING_DIAERESIS                => UTF8::LATIN_SMALL_LETTER_U_WITH_MACRON_AND_DIAERESIS,
6041c6adce8SGreg Roach        'u' . UTF8::COMBINING_OGONEK                                            => UTF8::LATIN_SMALL_LETTER_U_WITH_OGONEK,
6051c6adce8SGreg Roach        'u' . UTF8::COMBINING_RING_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_U_WITH_RING_ABOVE,
6061c6adce8SGreg Roach        'u' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_SMALL_LETTER_U_WITH_TILDE,
6071c6adce8SGreg Roach        'u' . UTF8::COMBINING_TILDE . UTF8::COMBINING_ACUTE_ACCENT              => UTF8::LATIN_SMALL_LETTER_U_WITH_TILDE_AND_ACUTE,
6081c6adce8SGreg Roach        'v' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_V_WITH_DOT_BELOW,
6091c6adce8SGreg Roach        'v' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_SMALL_LETTER_V_WITH_TILDE,
6101c6adce8SGreg Roach        'w' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_W_WITH_ACUTE,
6111c6adce8SGreg Roach        'w' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_W_WITH_CIRCUMFLEX,
6121c6adce8SGreg Roach        'w' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_W_WITH_DIAERESIS,
6131c6adce8SGreg Roach        'w' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_W_WITH_DOT_ABOVE,
6141c6adce8SGreg Roach        'w' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_W_WITH_DOT_BELOW,
6151c6adce8SGreg Roach        'w' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_W_WITH_GRAVE,
6161c6adce8SGreg Roach        'w' . UTF8::COMBINING_RING_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_W_WITH_RING_ABOVE,
6171c6adce8SGreg Roach        'x' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_X_WITH_DIAERESIS,
6181c6adce8SGreg Roach        'x' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_X_WITH_DOT_ABOVE,
6191c6adce8SGreg Roach        'y' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_Y_WITH_ACUTE,
6201c6adce8SGreg Roach        'y' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_Y_WITH_CIRCUMFLEX,
6211c6adce8SGreg Roach        'y' . UTF8::COMBINING_DIAERESIS                                         => UTF8::LATIN_SMALL_LETTER_Y_WITH_DIAERESIS,
6221c6adce8SGreg Roach        'y' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_Y_WITH_DOT_ABOVE,
6231c6adce8SGreg Roach        'y' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_Y_WITH_DOT_BELOW,
6241c6adce8SGreg Roach        'y' . UTF8::COMBINING_GRAVE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_Y_WITH_GRAVE,
6251c6adce8SGreg Roach        'y' . UTF8::COMBINING_HOOK_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_Y_WITH_HOOK_ABOVE,
6261c6adce8SGreg Roach        'y' . UTF8::COMBINING_MACRON                                            => UTF8::LATIN_SMALL_LETTER_Y_WITH_MACRON,
6271c6adce8SGreg Roach        'y' . UTF8::COMBINING_RING_ABOVE                                        => UTF8::LATIN_SMALL_LETTER_Y_WITH_RING_ABOVE,
6281c6adce8SGreg Roach        'y' . UTF8::COMBINING_TILDE                                             => UTF8::LATIN_SMALL_LETTER_Y_WITH_TILDE,
6291c6adce8SGreg Roach        'z' . UTF8::COMBINING_ACUTE_ACCENT                                      => UTF8::LATIN_SMALL_LETTER_Z_WITH_ACUTE,
6301c6adce8SGreg Roach        'z' . UTF8::COMBINING_CARON                                             => UTF8::LATIN_SMALL_LETTER_Z_WITH_CARON,
6311c6adce8SGreg Roach        'z' . UTF8::COMBINING_CIRCUMFLEX_ACCENT                                 => UTF8::LATIN_SMALL_LETTER_Z_WITH_CIRCUMFLEX,
6321c6adce8SGreg Roach        'z' . UTF8::COMBINING_DOT_ABOVE                                         => UTF8::LATIN_SMALL_LETTER_Z_WITH_DOT_ABOVE,
6331c6adce8SGreg Roach        'z' . UTF8::COMBINING_DOT_BELOW                                         => UTF8::LATIN_SMALL_LETTER_Z_WITH_DOT_BELOW,
6341c6adce8SGreg Roach        UTF8::LATIN_CAPITAL_LETTER_AE . UTF8::COMBINING_ACUTE_ACCENT            => UTF8::LATIN_CAPITAL_LETTER_AE_WITH_ACUTE,
6351c6adce8SGreg Roach        UTF8::LATIN_CAPITAL_LETTER_AE . UTF8::COMBINING_MACRON                  => UTF8::LATIN_CAPITAL_LETTER_AE_WITH_MACRON,
6361c6adce8SGreg Roach        UTF8::LATIN_CAPITAL_LETTER_O_WITH_STROKE . UTF8::COMBINING_ACUTE_ACCENT => UTF8::LATIN_CAPITAL_LETTER_O_WITH_STROKE_AND_ACUTE,
6371c6adce8SGreg Roach        UTF8::LATIN_SMALL_LETTER_AE . UTF8::COMBINING_ACUTE_ACCENT              => UTF8::LATIN_SMALL_LETTER_AE_WITH_ACUTE,
6381c6adce8SGreg Roach        UTF8::LATIN_SMALL_LETTER_AE . UTF8::COMBINING_MACRON                    => UTF8::LATIN_SMALL_LETTER_AE_WITH_MACRON,
6391c6adce8SGreg Roach        UTF8::LATIN_SMALL_LETTER_O_WITH_STROKE . UTF8::COMBINING_ACUTE_ACCENT   => UTF8::LATIN_SMALL_LETTER_O_WITH_STROKE_AND_ACUTE,
6401c6adce8SGreg Roach    ];
6411c6adce8SGreg Roach
6421c6adce8SGreg Roach    // ANSEL supports O and U with a horn diacritic, but not the combining diacritic.
6431c6adce8SGreg Roach    private const HORN_CONVERT_STEP_1 = [
6441c6adce8SGreg Roach        'O' . UTF8::COMBINING_HORN => "\x00O_WITH_HORN\x00",
6451c6adce8SGreg Roach        'U' . UTF8::COMBINING_HORN => "\x00U_WITH_HORN\x00",
6461c6adce8SGreg Roach        'o' . UTF8::COMBINING_HORN => "\x00o_WITH_HORN\x00",
6471c6adce8SGreg Roach        'u' . UTF8::COMBINING_HORN => "\x00u_WITH_HORN\x00",
6481c6adce8SGreg Roach    ];
6491c6adce8SGreg Roach    private const HORN_CONVERT_STEP_2 = [
6501c6adce8SGreg Roach        "\x00O_WITH_HORN\x00" => "\xAC",
6511c6adce8SGreg Roach        "\x00U_WITH_HORN\x00" => "\xAD",
6521c6adce8SGreg Roach        "\x00o_WITH_HORN\x00" => "\xBC",
6531c6adce8SGreg Roach        "\x00u_WITH_HORN\x00" => "\xBD",
6541c6adce8SGreg Roach    ];
6551c6adce8SGreg Roach
6561c6adce8SGreg Roach    /**
6571c6adce8SGreg Roach     * Convert a string from another encoding to UTF-8.
6581c6adce8SGreg Roach     *
6591c6adce8SGreg Roach     * @param string $text
6601c6adce8SGreg Roach     *
6611c6adce8SGreg Roach     * @return string
6621c6adce8SGreg Roach     */
6631c6adce8SGreg Roach    public function toUtf8(string $text): string
6641c6adce8SGreg Roach    {
6651c6adce8SGreg Roach        // ANSEL diacritics are prefixes.  UTF-8 diacritics are suffixes.
6661c6adce8SGreg Roach        $text = preg_replace('/([\xE0-\xFF]+)(.)/', '$2$1', $text);
6671c6adce8SGreg Roach
6681c6adce8SGreg Roach        // Simple substitution creates denormalized UTF-8.
6691c6adce8SGreg Roach        $text = strtr($text, self::TO_UTF8);
6701c6adce8SGreg Roach
6711c6adce8SGreg Roach        // Convert combining diacritics into pre-composed characters.
6721c6adce8SGreg Roach        return strtr($text, self::PRECOMPOSED_CHARACTERS);
6731c6adce8SGreg Roach    }
6741c6adce8SGreg Roach
6751c6adce8SGreg Roach    /**
6761c6adce8SGreg Roach     * Convert a string from UTF-8 to another encoding.
6771c6adce8SGreg Roach     *
6781c6adce8SGreg Roach     * @param string $text
6791c6adce8SGreg Roach     *
6801c6adce8SGreg Roach     * @return string
6811c6adce8SGreg Roach     */
6821c6adce8SGreg Roach    public function fromUtf8(string $text): string
6831c6adce8SGreg Roach    {
6841c6adce8SGreg Roach        // Convert pre-composed characters into combining diacritics.
6851c6adce8SGreg Roach        $text = strtr($text, array_flip(self::PRECOMPOSED_CHARACTERS));
6861c6adce8SGreg Roach
6871c6adce8SGreg Roach        // ANSEL supports letters with horns, but not the combining horn.
6881c6adce8SGreg Roach        $text = strtr($text, self::HORN_CONVERT_STEP_1);
6891c6adce8SGreg Roach
6901c6adce8SGreg Roach        // Convert characters and combining diacritics separately.
6911c6adce8SGreg Roach        $text = parent::fromUtf8($text);
6921c6adce8SGreg Roach
6931c6adce8SGreg Roach        // ANSEL supports two letters with horns, but not the combining horn.
6941c6adce8SGreg Roach        $text = strtr($text, self::HORN_CONVERT_STEP_2);
6951c6adce8SGreg Roach
6961c6adce8SGreg Roach        // ANSEL diacritics are prefixes.  UTF-8 diacritics are suffixes.
6971c6adce8SGreg Roach        $text = preg_replace('/([^\xE0-\xFF])([\xE0-\xFF]+)/', '$2$1', $text);
6981c6adce8SGreg Roach
6991c6adce8SGreg Roach        return $text;
7001c6adce8SGreg Roach    }
7011c6adce8SGreg Roach}
702