1 /*
2 * Copyright (C) 1999-2001, 2004 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20
21 /*
22 * CP1258
23 */
24
25 #include "flushwc.h"
26 #include "vietcomb.h"
27
28 static const unsigned char cp1258_comb_table[] = {
29 0xcc, 0xec, 0xde, 0xd2, 0xf2,
30 };
31
32 /* The possible bases in viet_comp_table_data:
33 0x0041..0x0045, 0x0047..0x0049, 0x004B..0x0050, 0x0052..0x0057,
34 0x0059..0x005A, 0x0061..0x0065, 0x0067..0x0069, 0x006B..0x0070,
35 0x0072..0x0077, 0x0079..0x007A, 0x00A5, 0x00A8, 0x00C2, 0x00C5..0x00C7,
36 0x00CA, 0x00CF, 0x00D3..0x00D4, 0x00D6, 0x00D8, 0x00DA, 0x00DC, 0x00E2,
37 0x00E5..0x00E7, 0x00EA, 0x00EF, 0x00F3..0x00F4, 0x00F6, 0x00F8, 0x00FA,
38 0x00FC, 0x0102..0x0103, 0x01A0..0x01A1, 0x01AF..0x01B0. */
39 static const unsigned int cp1258_comp_bases[] = {
40 0x06fdfbbe, 0x06fdfbbe, 0x00000000, 0x00000120, 0x155884e4, 0x155884e4,
41 0x0000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00018003
42 };
43
44 static const unsigned short cp1258_2uni[128] = {
45 /* 0x80 */
46 0x20ac, 0xfffd, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
47 0x02c6, 0x2030, 0xfffd, 0x2039, 0x0152, 0xfffd, 0xfffd, 0xfffd,
48 /* 0x90 */
49 0xfffd, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
50 0x02dc, 0x2122, 0xfffd, 0x203a, 0x0153, 0xfffd, 0xfffd, 0x0178,
51 /* 0xa0 */
52 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
53 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
54 /* 0xb0 */
55 0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
56 0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
57 /* 0xc0 */
58 0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
59 0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x0300, 0x00cd, 0x00ce, 0x00cf,
60 /* 0xd0 */
61 0x0110, 0x00d1, 0x0309, 0x00d3, 0x00d4, 0x01a0, 0x00d6, 0x00d7,
62 0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x01af, 0x0303, 0x00df,
63 /* 0xe0 */
64 0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
65 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x0301, 0x00ed, 0x00ee, 0x00ef,
66 /* 0xf0 */
67 0x0111, 0x00f1, 0x0323, 0x00f3, 0x00f4, 0x01a1, 0x00f6, 0x00f7,
68 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x01b0, 0x20ab, 0x00ff,
69 };
70
71 /* In the CP1258 to Unicode direction, the state contains a buffered
72 character, or 0 if none. */
73
74 static int
cp1258_mbtowc(conv_t conv,ucs4_t * pwc,const unsigned char * s,int n)75 cp1258_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
76 {
77 unsigned char c = *s;
78 unsigned short wc;
79 unsigned short last_wc;
80 if (c < 0x80) {
81 wc = c;
82 } else {
83 wc = cp1258_2uni[c-0x80];
84 if (wc == 0xfffd)
85 return RET_ILSEQ;
86 }
87 last_wc = conv->istate;
88 if (last_wc) {
89 if (wc >= 0x0300 && wc < 0x0340) {
90 /* See whether last_wc and wc can be combined. */
91 unsigned int k;
92 unsigned int i1, i2;
93 switch (wc) {
94 case 0x0300: k = 0; break;
95 case 0x0301: k = 1; break;
96 case 0x0303: k = 2; break;
97 case 0x0309: k = 3; break;
98 case 0x0323: k = 4; break;
99 default: abort();
100 }
101 i1 = viet_comp_table[k].idx;
102 i2 = i1 + viet_comp_table[k].len-1;
103 if (last_wc >= viet_comp_table_data[i1].base
104 && last_wc <= viet_comp_table_data[i2].base) {
105 unsigned int i;
106 for (;;) {
107 i = (i1+i2)>>1;
108 if (last_wc == viet_comp_table_data[i].base)
109 break;
110 if (last_wc < viet_comp_table_data[i].base) {
111 if (i1 == i)
112 goto not_combining;
113 i2 = i;
114 } else {
115 if (i1 != i)
116 i1 = i;
117 else {
118 i = i2;
119 if (last_wc == viet_comp_table_data[i].base)
120 break;
121 goto not_combining;
122 }
123 }
124 }
125 last_wc = viet_comp_table_data[i].composed;
126 /* Output the combined character. */
127 conv->istate = 0;
128 *pwc = (ucs4_t) last_wc;
129 return 1;
130 }
131 }
132 not_combining:
133 /* Output the buffered character. */
134 conv->istate = 0;
135 *pwc = (ucs4_t) last_wc;
136 return 0; /* Don't advance the input pointer. */
137 }
138 if (wc >= 0x0041 && wc <= 0x01b0
139 && ((cp1258_comp_bases[(wc - 0x0040) >> 5] >> (wc & 0x1f)) & 1)) {
140 /* wc is a possible match in viet_comp_table_data. Buffer it. */
141 conv->istate = wc;
142 return RET_TOOFEW(1);
143 } else {
144 /* Output wc immediately. */
145 *pwc = (ucs4_t) wc;
146 return 1;
147 }
148 }
149
150 #define cp1258_flushwc normal_flushwc
151
152 static const unsigned char cp1258_page00[88] = {
153 0xc0, 0xc1, 0xc2, 0x00, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
154 0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
155 0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7, /* 0xd0-0xd7 */
156 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf, /* 0xd8-0xdf */
157 0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
158 0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef, /* 0xe8-0xef */
159 0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7, /* 0xf0-0xf7 */
160 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff, /* 0xf8-0xff */
161 /* 0x0100 */
162 0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */
163 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */
164 0xd0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */
165 };
166 static const unsigned char cp1258_page01[104] = {
167 0x00, 0x00, 0x8c, 0x9c, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */
168 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */
169 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */
170 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */
171 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */
172 0x9f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */
173 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */
174 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */
175 0x00, 0x00, 0x83, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */
176 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */
177 0xd5, 0xf5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */
178 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdd, /* 0xa8-0xaf */
179 0xfd, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */
180 };
181 static const unsigned char cp1258_page02[32] = {
182 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0x00, /* 0xc0-0xc7 */
183 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
184 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */
185 0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, /* 0xd8-0xdf */
186 };
187 static const unsigned char cp1258_page03[40] = {
188 0xcc, 0xec, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */
189 0x00, 0xd2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */
190 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */
191 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */
192 0x00, 0x00, 0x00, 0xf2, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */
193 };
194 static const unsigned char cp1258_page20[48] = {
195 0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00, /* 0x10-0x17 */
196 0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00, /* 0x18-0x1f */
197 0x86, 0x87, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00, /* 0x20-0x27 */
198 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */
199 0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */
200 0x00, 0x8b, 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */
201 };
202
203 static int
cp1258_wctomb(conv_t conv,unsigned char * r,ucs4_t wc,int n)204 cp1258_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
205 {
206 unsigned char c = 0;
207 if (wc < 0x0080) {
208 *r = wc;
209 return 1;
210 }
211 else if (wc >= 0x00a0 && wc < 0x00c0)
212 c = wc;
213 else if (wc >= 0x00c0 && wc < 0x0118)
214 c = cp1258_page00[wc-0x00c0];
215 else if (wc >= 0x0150 && wc < 0x01b8)
216 c = cp1258_page01[wc-0x0150];
217 else if (wc >= 0x02c0 && wc < 0x02e0)
218 c = cp1258_page02[wc-0x02c0];
219 else if (wc >= 0x0300 && wc < 0x0328)
220 c = cp1258_page03[wc-0x0300];
221 else if (wc >= 0x0340 && wc < 0x0342) /* deprecated Vietnamese tone marks */
222 c = cp1258_page03[wc-0x0340];
223 else if (wc >= 0x2010 && wc < 0x2040)
224 c = cp1258_page20[wc-0x2010];
225 else if (wc == 0x20ab)
226 c = 0xfe;
227 else if (wc == 0x20ac)
228 c = 0x80;
229 else if (wc == 0x2122)
230 c = 0x99;
231 if (c != 0) {
232 *r = c;
233 return 1;
234 }
235 /* Try canonical decomposition. */
236 {
237 /* Binary search through viet_decomp_table. */
238 unsigned int i1 = 0;
239 unsigned int i2 = sizeof(viet_decomp_table)/sizeof(viet_decomp_table[0])-1;
240 if (wc >= viet_decomp_table[i1].composed
241 && wc <= viet_decomp_table[i2].composed) {
242 unsigned int i;
243 for (;;) {
244 /* Here i2 - i1 > 0. */
245 i = (i1+i2)>>1;
246 if (wc == viet_decomp_table[i].composed)
247 break;
248 if (wc < viet_decomp_table[i].composed) {
249 if (i1 == i)
250 return RET_ILUNI;
251 /* Here i1 < i < i2. */
252 i2 = i;
253 } else {
254 /* Here i1 <= i < i2. */
255 if (i1 != i)
256 i1 = i;
257 else {
258 /* Here i2 - i1 = 1. */
259 i = i2;
260 if (wc == viet_decomp_table[i].composed)
261 break;
262 else
263 return RET_ILUNI;
264 }
265 }
266 }
267 /* Found a canonical decomposition. */
268 wc = viet_decomp_table[i].base;
269 /* wc is one of 0x0020, 0x0041..0x005a, 0x0061..0x007a, 0x00a5, 0x00a8,
270 0x00c2, 0x00c5..0x00c7, 0x00ca, 0x00cf, 0x00d3, 0x00d4, 0x00d6,
271 0x00d8, 0x00da, 0x00dc, 0x00e2, 0x00e5..0x00e7, 0x00ea, 0x00ef,
272 0x00f3, 0x00f4, 0x00f6, 0x00f8, 0x00fc, 0x0102, 0x0103, 0x01a0,
273 0x01a1, 0x01af, 0x01b0. */
274 if (wc < 0x0100)
275 c = wc;
276 else if (wc < 0x0118)
277 c = cp1258_page00[wc-0x00c0];
278 else
279 c = cp1258_page01[wc-0x0150];
280 if (n < 2)
281 return RET_TOOSMALL;
282 r[0] = c;
283 r[1] = cp1258_comb_table[viet_decomp_table[i].comb1];
284 return 2;
285 }
286 }
287 return RET_ILUNI;
288 }
289