1 /* 2 * Copyright (C) 1999-2002, 2004 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21 /* 22 * TCVN-5712 23 */ 24 25 #include "flushwc.h" 26 #include "vietcomb.h" 27 28 static const unsigned char tcvn_comb_table[] = { 29 0xb0, 0xb3, 0xb2, 0xb1, 0xb4, 30 }; 31 32 /* The possible bases in viet_comp_table_data: 33 0x0041..0x0045, 0x0047..0x0049, 0x004B..0x0050, 0x0052..0x0057, 34 0x0059..0x005A, 0x0061..0x0065, 0x0067..0x0069, 0x006B..0x0070, 35 0x0072..0x0077, 0x0079..0x007A, 0x00A5, 0x00C2, 0x00CA, 0x00D3..0x00D6, 36 0x00DA, 0x00E2, 0x00EA, 0x00F3..0x00F6, 0x00FA, 0x0102..0x0103, 37 0x0168..0x0169, 0x01A0..0x01A1, 0x01AF..0x01B0. */ 38 static const unsigned int tcvn_comp_bases[] = { 39 0x06fdfbbe, 0x06fdfbbe, 0x00000000, 0x00000020, 0x04780404, 0x04780404, 40 0x0000000c, 0x00000000, 0x00000000, 0x00000300, 0x00000000, 0x00018003 41 }; 42 43 static const unsigned short tcvn_2uni_1[24] = { 44 /* 0x00 */ 45 0x0000, 0x00da, 0x1ee4, 0x0003, 0x1eea, 0x1eec, 0x1eee, 0x0007, 46 0x0008, 0x0009, 0x000a, 0x000b, 0x000c, 0x000d, 0x000e, 0x000f, 47 /* 0x10 */ 48 0x0010, 0x1ee8, 0x1ef0, 0x1ef2, 0x1ef6, 0x1ef8, 0x00dd, 0x1ef4, 49 }; 50 static const unsigned short tcvn_2uni_2[128] = { 51 /* 0x80 */ 52 0x00c0, 0x1ea2, 0x00c3, 0x00c1, 0x1ea0, 0x1eb6, 0x1eac, 0x00c8, 53 0x1eba, 0x1ebc, 0x00c9, 0x1eb8, 0x1ec6, 0x00cc, 0x1ec8, 0x0128, 54 /* 0x90 */ 55 0x00cd, 0x1eca, 0x00d2, 0x1ece, 0x00d5, 0x00d3, 0x1ecc, 0x1ed8, 56 0x1edc, 0x1ede, 0x1ee0, 0x1eda, 0x1ee2, 0x00d9, 0x1ee6, 0x0168, 57 /* 0xa0 */ 58 0x00a0, 0x0102, 0x00c2, 0x00ca, 0x00d4, 0x01a0, 0x01af, 0x0110, 59 0x0103, 0x00e2, 0x00ea, 0x00f4, 0x01a1, 0x01b0, 0x0111, 0x1eb0, 60 /* 0xb0 */ 61 0x0300, 0x0309, 0x0303, 0x0301, 0x0323, 0x00e0, 0x1ea3, 0x00e3, 62 0x00e1, 0x1ea1, 0x1eb2, 0x1eb1, 0x1eb3, 0x1eb5, 0x1eaf, 0x1eb4, 63 /* 0xc0 */ 64 0x1eae, 0x1ea6, 0x1ea8, 0x1eaa, 0x1ea4, 0x1ec0, 0x1eb7, 0x1ea7, 65 0x1ea9, 0x1eab, 0x1ea5, 0x1ead, 0x00e8, 0x1ec2, 0x1ebb, 0x1ebd, 66 /* 0xd0 */ 67 0x00e9, 0x1eb9, 0x1ec1, 0x1ec3, 0x1ec5, 0x1ebf, 0x1ec7, 0x00ec, 68 0x1ec9, 0x1ec4, 0x1ebe, 0x1ed2, 0x0129, 0x00ed, 0x1ecb, 0x00f2, 69 /* 0xe0 */ 70 0x1ed4, 0x1ecf, 0x00f5, 0x00f3, 0x1ecd, 0x1ed3, 0x1ed5, 0x1ed7, 71 0x1ed1, 0x1ed9, 0x1edd, 0x1edf, 0x1ee1, 0x1edb, 0x1ee3, 0x00f9, 72 /* 0xf0 */ 73 0x1ed6, 0x1ee7, 0x0169, 0x00fa, 0x1ee5, 0x1eeb, 0x1eed, 0x1eef, 74 0x1ee9, 0x1ef1, 0x1ef3, 0x1ef7, 0x1ef9, 0x00fd, 0x1ef5, 0x1ed0, 75 }; 76 77 /* In the TCVN to Unicode direction, the state contains a buffered 78 character, or 0 if none. */ 79 80 static int 81 tcvn_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 82 { 83 unsigned char c = *s; 84 unsigned short wc; 85 unsigned short last_wc; 86 if (c < 0x18) 87 wc = tcvn_2uni_1[c]; 88 else if (c < 0x80) 89 wc = c; 90 else 91 wc = tcvn_2uni_2[c-0x80]; 92 last_wc = conv->istate; 93 if (last_wc) { 94 if (wc >= 0x0300 && wc < 0x0340) { 95 /* See whether last_wc and wc can be combined. */ 96 unsigned int k; 97 unsigned int i1, i2; 98 switch (wc) { 99 case 0x0300: k = 0; break; 100 case 0x0301: k = 1; break; 101 case 0x0303: k = 2; break; 102 case 0x0309: k = 3; break; 103 case 0x0323: k = 4; break; 104 default: abort(); 105 } 106 i1 = viet_comp_table[k].idx; 107 i2 = i1 + viet_comp_table[k].len-1; 108 if (last_wc >= viet_comp_table_data[i1].base 109 && last_wc <= viet_comp_table_data[i2].base) { 110 unsigned int i; 111 for (;;) { 112 i = (i1+i2)>>1; 113 if (last_wc == viet_comp_table_data[i].base) 114 break; 115 if (last_wc < viet_comp_table_data[i].base) { 116 if (i1 == i) 117 goto not_combining; 118 i2 = i; 119 } else { 120 if (i1 != i) 121 i1 = i; 122 else { 123 i = i2; 124 if (last_wc == viet_comp_table_data[i].base) 125 break; 126 goto not_combining; 127 } 128 } 129 } 130 last_wc = viet_comp_table_data[i].composed; 131 /* Output the combined character. */ 132 conv->istate = 0; 133 *pwc = (ucs4_t) last_wc; 134 return 1; 135 } 136 } 137 not_combining: 138 /* Output the buffered character. */ 139 conv->istate = 0; 140 *pwc = (ucs4_t) last_wc; 141 return 0; /* Don't advance the input pointer. */ 142 } 143 if (wc >= 0x0041 && wc <= 0x01b0 144 && ((tcvn_comp_bases[(wc - 0x0040) >> 5] >> (wc & 0x1f)) & 1)) { 145 /* wc is a possible match in viet_comp_table_data. Buffer it. */ 146 conv->istate = wc; 147 return RET_TOOFEW(1); 148 } else { 149 /* Output wc immediately. */ 150 *pwc = (ucs4_t) wc; 151 return 1; 152 } 153 } 154 155 #define tcvn_flushwc normal_flushwc 156 157 static const unsigned char tcvn_page00[96+184] = { 158 0xa0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 159 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa8-0xaf */ 160 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 161 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb8-0xbf */ 162 0x80, 0x83, 0xa2, 0x82, 0x00, 0x00, 0x00, 0x00, /* 0xc0-0xc7 */ 163 0x87, 0x8a, 0xa3, 0x00, 0x8d, 0x90, 0x00, 0x00, /* 0xc8-0xcf */ 164 0x00, 0x00, 0x92, 0x95, 0xa4, 0x94, 0x00, 0x00, /* 0xd0-0xd7 */ 165 0x00, 0x9d, 0x01, 0x00, 0x00, 0x16, 0x00, 0x00, /* 0xd8-0xdf */ 166 0xb5, 0xb8, 0xa9, 0xb7, 0x00, 0x00, 0x00, 0x00, /* 0xe0-0xe7 */ 167 0xcc, 0xd0, 0xaa, 0x00, 0xd7, 0xdd, 0x00, 0x00, /* 0xe8-0xef */ 168 0x00, 0x00, 0xdf, 0xe3, 0xab, 0xe2, 0x00, 0x00, /* 0xf0-0xf7 */ 169 0x00, 0xef, 0xf3, 0x00, 0x00, 0xfd, 0x00, 0x00, /* 0xf8-0xff */ 170 /* 0x0100 */ 171 0x00, 0x00, 0xa1, 0xa8, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 172 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 173 0xa7, 0xae, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 174 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 175 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 176 0x8f, 0xdc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */ 177 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */ 178 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */ 179 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x40-0x47 */ 180 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x48-0x4f */ 181 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */ 182 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */ 183 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */ 184 0x9f, 0xf2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */ 185 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */ 186 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */ 187 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */ 188 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */ 189 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */ 190 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */ 191 0xa5, 0xac, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */ 192 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xa6, /* 0xa8-0xaf */ 193 0xad, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */ 194 }; 195 static const unsigned char tcvn_page03[40] = { 196 0xb0, 0xb3, 0x00, 0xb2, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */ 197 0x00, 0xb1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */ 198 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */ 199 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */ 200 0x00, 0x00, 0x00, 0xb4, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */ 201 }; 202 static const unsigned char tcvn_page1e[96] = { 203 0x84, 0xb9, 0x81, 0xb6, 0xc4, 0xca, 0xc1, 0xc7, /* 0xa0-0xa7 */ 204 0xc2, 0xc8, 0xc3, 0xc9, 0x86, 0xcb, 0xc0, 0xbe, /* 0xa8-0xaf */ 205 0xaf, 0xbb, 0xba, 0xbc, 0xbf, 0xbd, 0x85, 0xc6, /* 0xb0-0xb7 */ 206 0x8b, 0xd1, 0x88, 0xce, 0x89, 0xcf, 0xda, 0xd5, /* 0xb8-0xbf */ 207 0xc5, 0xd2, 0xcd, 0xd3, 0xd9, 0xd4, 0x8c, 0xd6, /* 0xc0-0xc7 */ 208 0x8e, 0xd8, 0x91, 0xde, 0x96, 0xe4, 0x93, 0xe1, /* 0xc8-0xcf */ 209 0xff, 0xe8, 0xdb, 0xe5, 0xe0, 0xe6, 0xf0, 0xe7, /* 0xd0-0xd7 */ 210 0x97, 0xe9, 0x9b, 0xed, 0x98, 0xea, 0x99, 0xeb, /* 0xd8-0xdf */ 211 0x9a, 0xec, 0x9c, 0xee, 0x02, 0xf4, 0x9e, 0xf1, /* 0xe0-0xe7 */ 212 0x11, 0xf8, 0x04, 0xf5, 0x05, 0xf6, 0x06, 0xf7, /* 0xe8-0xef */ 213 0x12, 0xf9, 0x13, 0xfa, 0x17, 0xfe, 0x14, 0xfb, /* 0xf0-0xf7 */ 214 0x15, 0xfc, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xf8-0xff */ 215 }; 216 217 static int 218 tcvn_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 219 { 220 unsigned char c = 0; 221 if (wc < 0x0080 && (wc >= 0x0020 || (0x00fe0076 & (1 << wc)) == 0)) { 222 *r = wc; 223 return 1; 224 } 225 else if (wc >= 0x00a0 && wc < 0x01b8) 226 c = tcvn_page00[wc-0x00a0]; 227 else if (wc >= 0x0300 && wc < 0x0328) 228 c = tcvn_page03[wc-0x0300]; 229 else if (wc >= 0x0340 && wc < 0x0342) /* deprecated Vietnamese tone marks */ 230 c = tcvn_page03[wc-0x0340]; 231 else if (wc >= 0x1ea0 && wc < 0x1f00) 232 c = tcvn_page1e[wc-0x1ea0]; 233 if (c != 0) { 234 *r = c; 235 return 1; 236 } 237 /* Try compatibility or canonical decomposition. */ 238 { 239 /* Binary search through viet_decomp_table. */ 240 unsigned int i1 = 0; 241 unsigned int i2 = sizeof(viet_decomp_table)/sizeof(viet_decomp_table[0])-1; 242 if (wc >= viet_decomp_table[i1].composed 243 && wc <= viet_decomp_table[i2].composed) { 244 unsigned int i; 245 for (;;) { 246 /* Here i2 - i1 > 0. */ 247 i = (i1+i2)>>1; 248 if (wc == viet_decomp_table[i].composed) 249 break; 250 if (wc < viet_decomp_table[i].composed) { 251 if (i1 == i) 252 return RET_ILUNI; 253 /* Here i1 < i < i2. */ 254 i2 = i; 255 } else { 256 /* Here i1 <= i < i2. */ 257 if (i1 != i) 258 i1 = i; 259 else { 260 /* Here i2 - i1 = 1. */ 261 i = i2; 262 if (wc == viet_decomp_table[i].composed) 263 break; 264 else 265 return RET_ILUNI; 266 } 267 } 268 } 269 /* Found a compatibility or canonical decomposition. */ 270 wc = viet_decomp_table[i].base; 271 /* wc is one of 0x0020, 0x0041..0x005a, 0x0061..0x007a, 0x00a5, 0x00a8, 272 0x00c2, 0x00c5..0x00c7, 0x00ca, 0x00cf, 0x00d3, 0x00d4, 0x00d6, 273 0x00d8, 0x00da, 0x00dc, 0x00e2, 0x00e5..0x00e7, 0x00ea, 0x00ef, 274 0x00f3, 0x00f4, 0x00f6, 0x00f8, 0x00fc, 0x0102, 0x0103, 0x01a0, 275 0x01a1, 0x01af, 0x01b0. */ 276 if (wc < 0x0080) 277 c = wc; 278 else { 279 c = tcvn_page00[wc-0x00a0]; 280 if (c == 0) 281 return RET_ILUNI; 282 } 283 if (n < 2) 284 return RET_TOOSMALL; 285 r[0] = c; 286 r[1] = tcvn_comb_table[viet_decomp_table[i].comb1]; 287 return 2; 288 } 289 } 290 return RET_ILUNI; 291 } 292