xref: /haiku/src/libs/iconv/cp1258.h (revision 21258e2674226d6aa732321b6f8494841895af5f)
1 /*
2  * Copyright (C) 1999-2001, 2004 Free Software Foundation, Inc.
3  * This file is part of the GNU LIBICONV Library.
4  *
5  * The GNU LIBICONV Library is free software; you can redistribute it
6  * and/or modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * The GNU LIBICONV Library is distributed in the hope that it will be
11  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public
16  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18  * Fifth Floor, Boston, MA 02110-1301, USA.
19  */
20 
21 /*
22  * CP1258
23  */
24 
25 #include "flushwc.h"
26 #include "vietcomb.h"
27 
28 static const unsigned char cp1258_comb_table[] = {
29   0xcc, 0xec, 0xde, 0xd2, 0xf2,
30 };
31 
32 /* The possible bases in viet_comp_table_data:
33    0x0041..0x0045, 0x0047..0x0049, 0x004B..0x0050, 0x0052..0x0057,
34    0x0059..0x005A, 0x0061..0x0065, 0x0067..0x0069, 0x006B..0x0070,
35    0x0072..0x0077, 0x0079..0x007A, 0x00A5, 0x00A8, 0x00C2, 0x00C5..0x00C7,
36    0x00CA, 0x00CF, 0x00D3..0x00D4, 0x00D6, 0x00D8, 0x00DA, 0x00DC, 0x00E2,
37    0x00E5..0x00E7, 0x00EA, 0x00EF, 0x00F3..0x00F4, 0x00F6, 0x00F8, 0x00FA,
38    0x00FC, 0x0102..0x0103, 0x01A0..0x01A1, 0x01AF..0x01B0. */
39 static const unsigned int cp1258_comp_bases[] = {
40   0x06fdfbbe, 0x06fdfbbe, 0x00000000, 0x00000120, 0x155884e4, 0x155884e4,
41   0x0000000c, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00018003
42 };
43 
44 static const unsigned short cp1258_2uni[128] = {
45   /* 0x80 */
46   0x20ac, 0xfffd, 0x201a, 0x0192, 0x201e, 0x2026, 0x2020, 0x2021,
47   0x02c6, 0x2030, 0xfffd, 0x2039, 0x0152, 0xfffd, 0xfffd, 0xfffd,
48   /* 0x90 */
49   0xfffd, 0x2018, 0x2019, 0x201c, 0x201d, 0x2022, 0x2013, 0x2014,
50   0x02dc, 0x2122, 0xfffd, 0x203a, 0x0153, 0xfffd, 0xfffd, 0x0178,
51   /* 0xa0 */
52   0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 0x00a7,
53   0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af,
54   /* 0xb0 */
55   0x00b0, 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7,
56   0x00b8, 0x00b9, 0x00ba, 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf,
57   /* 0xc0 */
58   0x00c0, 0x00c1, 0x00c2, 0x0102, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
59   0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x0300, 0x00cd, 0x00ce, 0x00cf,
60   /* 0xd0 */
61   0x0110, 0x00d1, 0x0309, 0x00d3, 0x00d4, 0x01a0, 0x00d6, 0x00d7,
62   0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x01af, 0x0303, 0x00df,
63   /* 0xe0 */
64   0x00e0, 0x00e1, 0x00e2, 0x0103, 0x00e4, 0x00e5, 0x00e6, 0x00e7,
65   0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x0301, 0x00ed, 0x00ee, 0x00ef,
66   /* 0xf0 */
67   0x0111, 0x00f1, 0x0323, 0x00f3, 0x00f4, 0x01a1, 0x00f6, 0x00f7,
68   0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x01b0, 0x20ab, 0x00ff,
69 };
70 
71 /* In the CP1258 to Unicode direction, the state contains a buffered
72    character, or 0 if none. */
73 
74 static int
75 cp1258_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
76 {
77   unsigned char c = *s;
78   unsigned short wc;
79   unsigned short last_wc;
80   if (c < 0x80) {
81     wc = c;
82   } else {
83     wc = cp1258_2uni[c-0x80];
84     if (wc == 0xfffd)
85       return RET_ILSEQ;
86   }
87   last_wc = conv->istate;
88   if (last_wc) {
89     if (wc >= 0x0300 && wc < 0x0340) {
90       /* See whether last_wc and wc can be combined. */
91       unsigned int k;
92       unsigned int i1, i2;
93       switch (wc) {
94         case 0x0300: k = 0; break;
95         case 0x0301: k = 1; break;
96         case 0x0303: k = 2; break;
97         case 0x0309: k = 3; break;
98         case 0x0323: k = 4; break;
99         default: abort();
100       }
101       i1 = viet_comp_table[k].idx;
102       i2 = i1 + viet_comp_table[k].len-1;
103       if (last_wc >= viet_comp_table_data[i1].base
104           && last_wc <= viet_comp_table_data[i2].base) {
105         unsigned int i;
106         for (;;) {
107           i = (i1+i2)>>1;
108           if (last_wc == viet_comp_table_data[i].base)
109             break;
110           if (last_wc < viet_comp_table_data[i].base) {
111             if (i1 == i)
112               goto not_combining;
113             i2 = i;
114           } else {
115             if (i1 != i)
116               i1 = i;
117             else {
118               i = i2;
119               if (last_wc == viet_comp_table_data[i].base)
120                 break;
121               goto not_combining;
122             }
123           }
124         }
125         last_wc = viet_comp_table_data[i].composed;
126         /* Output the combined character. */
127         conv->istate = 0;
128         *pwc = (ucs4_t) last_wc;
129         return 1;
130       }
131     }
132   not_combining:
133     /* Output the buffered character. */
134     conv->istate = 0;
135     *pwc = (ucs4_t) last_wc;
136     return 0; /* Don't advance the input pointer. */
137   }
138   if (wc >= 0x0041 && wc <= 0x01b0
139       && ((cp1258_comp_bases[(wc - 0x0040) >> 5] >> (wc & 0x1f)) & 1)) {
140     /* wc is a possible match in viet_comp_table_data. Buffer it. */
141     conv->istate = wc;
142     return RET_TOOFEW(1);
143   } else {
144     /* Output wc immediately. */
145     *pwc = (ucs4_t) wc;
146     return 1;
147   }
148 }
149 
150 #define cp1258_flushwc normal_flushwc
151 
152 static const unsigned char cp1258_page00[88] = {
153   0xc0, 0xc1, 0xc2, 0x00, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
154   0xc8, 0xc9, 0xca, 0xcb, 0x00, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
155   0x00, 0xd1, 0x00, 0xd3, 0xd4, 0x00, 0xd6, 0xd7, /* 0xd0-0xd7 */
156   0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0x00, 0x00, 0xdf, /* 0xd8-0xdf */
157   0xe0, 0xe1, 0xe2, 0x00, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
158   0xe8, 0xe9, 0xea, 0xeb, 0x00, 0xed, 0xee, 0xef, /* 0xe8-0xef */
159   0x00, 0xf1, 0x00, 0xf3, 0xf4, 0x00, 0xf6, 0xf7, /* 0xf0-0xf7 */
160   0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0x00, 0x00, 0xff, /* 0xf8-0xff */
161   /* 0x0100 */
162   0x00, 0x00, 0xc3, 0xe3, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */
163   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */
164   0xd0, 0xf0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */
165 };
166 static const unsigned char cp1258_page01[104] = {
167   0x00, 0x00, 0x8c, 0x9c, 0x00, 0x00, 0x00, 0x00, /* 0x50-0x57 */
168   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x58-0x5f */
169   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x60-0x67 */
170   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x68-0x6f */
171   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x70-0x77 */
172   0x9f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x78-0x7f */
173   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x80-0x87 */
174   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x88-0x8f */
175   0x00, 0x00, 0x83, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x90-0x97 */
176   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x98-0x9f */
177   0xd5, 0xf5, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xa0-0xa7 */
178   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xdd, /* 0xa8-0xaf */
179   0xfd, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xb0-0xb7 */
180 };
181 static const unsigned char cp1258_page02[32] = {
182   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x88, 0x00, /* 0xc0-0xc7 */
183   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xc8-0xcf */
184   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0xd0-0xd7 */
185   0x00, 0x00, 0x00, 0x00, 0x98, 0x00, 0x00, 0x00, /* 0xd8-0xdf */
186 };
187 static const unsigned char cp1258_page03[40] = {
188   0xcc, 0xec, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, /* 0x00-0x07 */
189   0x00, 0xd2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x08-0x0f */
190   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x10-0x17 */
191   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x18-0x1f */
192   0x00, 0x00, 0x00, 0xf2, 0x00, 0x00, 0x00, 0x00, /* 0x20-0x27 */
193 };
194 static const unsigned char cp1258_page20[48] = {
195   0x00, 0x00, 0x00, 0x96, 0x97, 0x00, 0x00, 0x00, /* 0x10-0x17 */
196   0x91, 0x92, 0x82, 0x00, 0x93, 0x94, 0x84, 0x00, /* 0x18-0x1f */
197   0x86, 0x87, 0x95, 0x00, 0x00, 0x00, 0x85, 0x00, /* 0x20-0x27 */
198   0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x28-0x2f */
199   0x89, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x30-0x37 */
200   0x00, 0x8b, 0x9b, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x38-0x3f */
201 };
202 
203 static int
204 cp1258_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
205 {
206   unsigned char c = 0;
207   if (wc < 0x0080) {
208     *r = wc;
209     return 1;
210   }
211   else if (wc >= 0x00a0 && wc < 0x00c0)
212     c = wc;
213   else if (wc >= 0x00c0 && wc < 0x0118)
214     c = cp1258_page00[wc-0x00c0];
215   else if (wc >= 0x0150 && wc < 0x01b8)
216     c = cp1258_page01[wc-0x0150];
217   else if (wc >= 0x02c0 && wc < 0x02e0)
218     c = cp1258_page02[wc-0x02c0];
219   else if (wc >= 0x0300 && wc < 0x0328)
220     c = cp1258_page03[wc-0x0300];
221   else if (wc >= 0x0340 && wc < 0x0342) /* deprecated Vietnamese tone marks */
222     c = cp1258_page03[wc-0x0340];
223   else if (wc >= 0x2010 && wc < 0x2040)
224     c = cp1258_page20[wc-0x2010];
225   else if (wc == 0x20ab)
226     c = 0xfe;
227   else if (wc == 0x20ac)
228     c = 0x80;
229   else if (wc == 0x2122)
230     c = 0x99;
231   if (c != 0) {
232     *r = c;
233     return 1;
234   }
235   /* Try canonical decomposition. */
236   {
237     /* Binary search through viet_decomp_table. */
238     unsigned int i1 = 0;
239     unsigned int i2 = sizeof(viet_decomp_table)/sizeof(viet_decomp_table[0])-1;
240     if (wc >= viet_decomp_table[i1].composed
241         && wc <= viet_decomp_table[i2].composed) {
242       unsigned int i;
243       for (;;) {
244         /* Here i2 - i1 > 0. */
245         i = (i1+i2)>>1;
246         if (wc == viet_decomp_table[i].composed)
247           break;
248         if (wc < viet_decomp_table[i].composed) {
249           if (i1 == i)
250             return RET_ILUNI;
251           /* Here i1 < i < i2. */
252           i2 = i;
253         } else {
254           /* Here i1 <= i < i2. */
255           if (i1 != i)
256             i1 = i;
257           else {
258             /* Here i2 - i1 = 1. */
259             i = i2;
260             if (wc == viet_decomp_table[i].composed)
261               break;
262             else
263               return RET_ILUNI;
264           }
265         }
266       }
267       /* Found a canonical decomposition. */
268       wc = viet_decomp_table[i].base;
269       /* wc is one of 0x0020, 0x0041..0x005a, 0x0061..0x007a, 0x00a5, 0x00a8,
270          0x00c2, 0x00c5..0x00c7, 0x00ca, 0x00cf, 0x00d3, 0x00d4, 0x00d6,
271          0x00d8, 0x00da, 0x00dc, 0x00e2, 0x00e5..0x00e7, 0x00ea, 0x00ef,
272          0x00f3, 0x00f4, 0x00f6, 0x00f8, 0x00fc, 0x0102, 0x0103, 0x01a0,
273          0x01a1, 0x01af, 0x01b0. */
274       if (wc < 0x0100)
275         c = wc;
276       else if (wc < 0x0118)
277         c = cp1258_page00[wc-0x00c0];
278       else
279         c = cp1258_page01[wc-0x0150];
280       if (n < 2)
281         return RET_TOOSMALL;
282       r[0] = c;
283       r[1] = cp1258_comb_table[viet_decomp_table[i].comb1];
284       return 2;
285     }
286   }
287   return RET_ILUNI;
288 }
289