xref: /haiku/src/libs/iconv/iso2022_cn.h (revision aef5731f38da6f7b913e0f64acd8a40555491ce5)
1*aef5731fSOliver Tappe /*
2*aef5731fSOliver Tappe  * Copyright (C) 1999-2001 Free Software Foundation, Inc.
3*aef5731fSOliver Tappe  * This file is part of the GNU LIBICONV Library.
4*aef5731fSOliver Tappe  *
5*aef5731fSOliver Tappe  * The GNU LIBICONV Library is free software; you can redistribute it
6*aef5731fSOliver Tappe  * and/or modify it under the terms of the GNU Library General Public
7*aef5731fSOliver Tappe  * License as published by the Free Software Foundation; either version 2
8*aef5731fSOliver Tappe  * of the License, or (at your option) any later version.
9*aef5731fSOliver Tappe  *
10*aef5731fSOliver Tappe  * The GNU LIBICONV Library is distributed in the hope that it will be
11*aef5731fSOliver Tappe  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12*aef5731fSOliver Tappe  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13*aef5731fSOliver Tappe  * Library General Public License for more details.
14*aef5731fSOliver Tappe  *
15*aef5731fSOliver Tappe  * You should have received a copy of the GNU Library General Public
16*aef5731fSOliver Tappe  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17*aef5731fSOliver Tappe  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18*aef5731fSOliver Tappe  * Fifth Floor, Boston, MA 02110-1301, USA.
19*aef5731fSOliver Tappe  */
20*aef5731fSOliver Tappe 
21*aef5731fSOliver Tappe /*
22*aef5731fSOliver Tappe  * ISO-2022-CN
23*aef5731fSOliver Tappe  */
24*aef5731fSOliver Tappe 
25*aef5731fSOliver Tappe /* Specification: RFC 1922 */
26*aef5731fSOliver Tappe 
27*aef5731fSOliver Tappe #define ESC 0x1b
28*aef5731fSOliver Tappe #define SO  0x0e
29*aef5731fSOliver Tappe #define SI  0x0f
30*aef5731fSOliver Tappe 
31*aef5731fSOliver Tappe /*
32*aef5731fSOliver Tappe  * The state is composed of one of the following values
33*aef5731fSOliver Tappe  */
34*aef5731fSOliver Tappe #define STATE_ASCII          0
35*aef5731fSOliver Tappe #define STATE_TWOBYTE        1
36*aef5731fSOliver Tappe /*
37*aef5731fSOliver Tappe  * and one of the following values, << 8
38*aef5731fSOliver Tappe  */
39*aef5731fSOliver Tappe #define STATE2_NONE                   0
40*aef5731fSOliver Tappe #define STATE2_DESIGNATED_GB2312      1
41*aef5731fSOliver Tappe #define STATE2_DESIGNATED_CNS11643_1  2
42*aef5731fSOliver Tappe /*
43*aef5731fSOliver Tappe  * and one of the following values, << 16
44*aef5731fSOliver Tappe  */
45*aef5731fSOliver Tappe #define STATE3_NONE                   0
46*aef5731fSOliver Tappe #define STATE3_DESIGNATED_CNS11643_2  1
47*aef5731fSOliver Tappe 
48*aef5731fSOliver Tappe #define SPLIT_STATE \
49*aef5731fSOliver Tappe   unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = state >> 16
50*aef5731fSOliver Tappe #define COMBINE_STATE \
51*aef5731fSOliver Tappe   state = (state3 << 16) | (state2 << 8) | state1
52*aef5731fSOliver Tappe 
53*aef5731fSOliver Tappe static int
iso2022_cn_mbtowc(conv_t conv,ucs4_t * pwc,const unsigned char * s,int n)54*aef5731fSOliver Tappe iso2022_cn_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
55*aef5731fSOliver Tappe {
56*aef5731fSOliver Tappe   state_t state = conv->istate;
57*aef5731fSOliver Tappe   SPLIT_STATE;
58*aef5731fSOliver Tappe   int count = 0;
59*aef5731fSOliver Tappe   unsigned char c;
60*aef5731fSOliver Tappe   for (;;) {
61*aef5731fSOliver Tappe     c = *s;
62*aef5731fSOliver Tappe     if (c == ESC) {
63*aef5731fSOliver Tappe       if (n < count+4)
64*aef5731fSOliver Tappe         goto none;
65*aef5731fSOliver Tappe       if (s[1] == '$') {
66*aef5731fSOliver Tappe         if (s[2] == ')') {
67*aef5731fSOliver Tappe           if (s[3] == 'A') {
68*aef5731fSOliver Tappe             state2 = STATE2_DESIGNATED_GB2312;
69*aef5731fSOliver Tappe             s += 4; count += 4;
70*aef5731fSOliver Tappe             if (n < count+1)
71*aef5731fSOliver Tappe               goto none;
72*aef5731fSOliver Tappe             continue;
73*aef5731fSOliver Tappe           }
74*aef5731fSOliver Tappe           if (s[3] == 'G') {
75*aef5731fSOliver Tappe             state2 = STATE2_DESIGNATED_CNS11643_1;
76*aef5731fSOliver Tappe             s += 4; count += 4;
77*aef5731fSOliver Tappe             if (n < count+1)
78*aef5731fSOliver Tappe               goto none;
79*aef5731fSOliver Tappe             continue;
80*aef5731fSOliver Tappe           }
81*aef5731fSOliver Tappe         }
82*aef5731fSOliver Tappe         if (s[2] == '*') {
83*aef5731fSOliver Tappe           if (s[3] == 'H') {
84*aef5731fSOliver Tappe             state3 = STATE3_DESIGNATED_CNS11643_2;
85*aef5731fSOliver Tappe             s += 4; count += 4;
86*aef5731fSOliver Tappe             if (n < count+1)
87*aef5731fSOliver Tappe               goto none;
88*aef5731fSOliver Tappe             continue;
89*aef5731fSOliver Tappe           }
90*aef5731fSOliver Tappe         }
91*aef5731fSOliver Tappe       }
92*aef5731fSOliver Tappe       if (s[1] == 'N') {
93*aef5731fSOliver Tappe         switch (state3) {
94*aef5731fSOliver Tappe           case STATE3_NONE:
95*aef5731fSOliver Tappe             return RET_ILSEQ;
96*aef5731fSOliver Tappe           case STATE3_DESIGNATED_CNS11643_2:
97*aef5731fSOliver Tappe             if (s[2] < 0x80 && s[3] < 0x80) {
98*aef5731fSOliver Tappe               int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
99*aef5731fSOliver Tappe               if (ret == RET_ILSEQ)
100*aef5731fSOliver Tappe                 return RET_ILSEQ;
101*aef5731fSOliver Tappe               if (ret != 2) abort();
102*aef5731fSOliver Tappe               COMBINE_STATE;
103*aef5731fSOliver Tappe               conv->istate = state;
104*aef5731fSOliver Tappe               return count+4;
105*aef5731fSOliver Tappe             } else
106*aef5731fSOliver Tappe               return RET_ILSEQ;
107*aef5731fSOliver Tappe           default: abort();
108*aef5731fSOliver Tappe         }
109*aef5731fSOliver Tappe       }
110*aef5731fSOliver Tappe       return RET_ILSEQ;
111*aef5731fSOliver Tappe     }
112*aef5731fSOliver Tappe     if (c == SO) {
113*aef5731fSOliver Tappe       if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1)
114*aef5731fSOliver Tappe         return RET_ILSEQ;
115*aef5731fSOliver Tappe       state1 = STATE_TWOBYTE;
116*aef5731fSOliver Tappe       s++; count++;
117*aef5731fSOliver Tappe       if (n < count+1)
118*aef5731fSOliver Tappe         goto none;
119*aef5731fSOliver Tappe       continue;
120*aef5731fSOliver Tappe     }
121*aef5731fSOliver Tappe     if (c == SI) {
122*aef5731fSOliver Tappe       state1 = STATE_ASCII;
123*aef5731fSOliver Tappe       s++; count++;
124*aef5731fSOliver Tappe       if (n < count+1)
125*aef5731fSOliver Tappe         goto none;
126*aef5731fSOliver Tappe       continue;
127*aef5731fSOliver Tappe     }
128*aef5731fSOliver Tappe     break;
129*aef5731fSOliver Tappe   }
130*aef5731fSOliver Tappe   switch (state1) {
131*aef5731fSOliver Tappe     case STATE_ASCII:
132*aef5731fSOliver Tappe       if (c < 0x80) {
133*aef5731fSOliver Tappe         int ret = ascii_mbtowc(conv,pwc,s,1);
134*aef5731fSOliver Tappe         if (ret == RET_ILSEQ)
135*aef5731fSOliver Tappe           return RET_ILSEQ;
136*aef5731fSOliver Tappe         if (ret != 1) abort();
137*aef5731fSOliver Tappe         if (*pwc == 0x000a || *pwc == 0x000d) {
138*aef5731fSOliver Tappe           state2 = STATE2_NONE; state3 = STATE3_NONE;
139*aef5731fSOliver Tappe         }
140*aef5731fSOliver Tappe         COMBINE_STATE;
141*aef5731fSOliver Tappe         conv->istate = state;
142*aef5731fSOliver Tappe         return count+1;
143*aef5731fSOliver Tappe       } else
144*aef5731fSOliver Tappe         return RET_ILSEQ;
145*aef5731fSOliver Tappe     case STATE_TWOBYTE:
146*aef5731fSOliver Tappe       if (n < count+2)
147*aef5731fSOliver Tappe         goto none;
148*aef5731fSOliver Tappe       if (s[0] < 0x80 && s[1] < 0x80) {
149*aef5731fSOliver Tappe         int ret;
150*aef5731fSOliver Tappe         switch (state2) {
151*aef5731fSOliver Tappe           case STATE2_NONE:
152*aef5731fSOliver Tappe             return RET_ILSEQ;
153*aef5731fSOliver Tappe           case STATE2_DESIGNATED_GB2312:
154*aef5731fSOliver Tappe             ret = gb2312_mbtowc(conv,pwc,s,2); break;
155*aef5731fSOliver Tappe           case STATE2_DESIGNATED_CNS11643_1:
156*aef5731fSOliver Tappe             ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
157*aef5731fSOliver Tappe           default: abort();
158*aef5731fSOliver Tappe         }
159*aef5731fSOliver Tappe         if (ret == RET_ILSEQ)
160*aef5731fSOliver Tappe           return RET_ILSEQ;
161*aef5731fSOliver Tappe         if (ret != 2) abort();
162*aef5731fSOliver Tappe         COMBINE_STATE;
163*aef5731fSOliver Tappe         conv->istate = state;
164*aef5731fSOliver Tappe         return count+2;
165*aef5731fSOliver Tappe       } else
166*aef5731fSOliver Tappe         return RET_ILSEQ;
167*aef5731fSOliver Tappe     default: abort();
168*aef5731fSOliver Tappe   }
169*aef5731fSOliver Tappe 
170*aef5731fSOliver Tappe none:
171*aef5731fSOliver Tappe   COMBINE_STATE;
172*aef5731fSOliver Tappe   conv->istate = state;
173*aef5731fSOliver Tappe   return RET_TOOFEW(count);
174*aef5731fSOliver Tappe }
175*aef5731fSOliver Tappe 
176*aef5731fSOliver Tappe static int
iso2022_cn_wctomb(conv_t conv,unsigned char * r,ucs4_t wc,int n)177*aef5731fSOliver Tappe iso2022_cn_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
178*aef5731fSOliver Tappe {
179*aef5731fSOliver Tappe   state_t state = conv->ostate;
180*aef5731fSOliver Tappe   SPLIT_STATE;
181*aef5731fSOliver Tappe   unsigned char buf[3];
182*aef5731fSOliver Tappe   int ret;
183*aef5731fSOliver Tappe 
184*aef5731fSOliver Tappe   /* There is no need to handle Unicode 3.1 tag characters and to look for
185*aef5731fSOliver Tappe      "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
186*aef5731fSOliver Tappe 
187*aef5731fSOliver Tappe   /* Try ASCII. */
188*aef5731fSOliver Tappe   ret = ascii_wctomb(conv,buf,wc,1);
189*aef5731fSOliver Tappe   if (ret != RET_ILUNI) {
190*aef5731fSOliver Tappe     if (ret != 1) abort();
191*aef5731fSOliver Tappe     if (buf[0] < 0x80) {
192*aef5731fSOliver Tappe       int count = (state1 == STATE_ASCII ? 1 : 2);
193*aef5731fSOliver Tappe       if (n < count)
194*aef5731fSOliver Tappe         return RET_TOOSMALL;
195*aef5731fSOliver Tappe       if (state1 != STATE_ASCII) {
196*aef5731fSOliver Tappe         r[0] = SI;
197*aef5731fSOliver Tappe         r += 1;
198*aef5731fSOliver Tappe         state1 = STATE_ASCII;
199*aef5731fSOliver Tappe       }
200*aef5731fSOliver Tappe       r[0] = buf[0];
201*aef5731fSOliver Tappe       if (wc == 0x000a || wc == 0x000d) {
202*aef5731fSOliver Tappe         state2 = STATE2_NONE; state3 = STATE3_NONE;
203*aef5731fSOliver Tappe       }
204*aef5731fSOliver Tappe       COMBINE_STATE;
205*aef5731fSOliver Tappe       conv->ostate = state;
206*aef5731fSOliver Tappe       return count;
207*aef5731fSOliver Tappe     }
208*aef5731fSOliver Tappe   }
209*aef5731fSOliver Tappe 
210*aef5731fSOliver Tappe   /* Try GB 2312-1980. */
211*aef5731fSOliver Tappe   ret = gb2312_wctomb(conv,buf,wc,2);
212*aef5731fSOliver Tappe   if (ret != RET_ILUNI) {
213*aef5731fSOliver Tappe     if (ret != 2) abort();
214*aef5731fSOliver Tappe     if (buf[0] < 0x80 && buf[1] < 0x80) {
215*aef5731fSOliver Tappe       int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
216*aef5731fSOliver Tappe       if (n < count)
217*aef5731fSOliver Tappe         return RET_TOOSMALL;
218*aef5731fSOliver Tappe       if (state2 != STATE2_DESIGNATED_GB2312) {
219*aef5731fSOliver Tappe         r[0] = ESC;
220*aef5731fSOliver Tappe         r[1] = '$';
221*aef5731fSOliver Tappe         r[2] = ')';
222*aef5731fSOliver Tappe         r[3] = 'A';
223*aef5731fSOliver Tappe         r += 4;
224*aef5731fSOliver Tappe         state2 = STATE2_DESIGNATED_GB2312;
225*aef5731fSOliver Tappe       }
226*aef5731fSOliver Tappe       if (state1 != STATE_TWOBYTE) {
227*aef5731fSOliver Tappe         r[0] = SO;
228*aef5731fSOliver Tappe         r += 1;
229*aef5731fSOliver Tappe         state1 = STATE_TWOBYTE;
230*aef5731fSOliver Tappe       }
231*aef5731fSOliver Tappe       r[0] = buf[0];
232*aef5731fSOliver Tappe       r[1] = buf[1];
233*aef5731fSOliver Tappe       COMBINE_STATE;
234*aef5731fSOliver Tappe       conv->ostate = state;
235*aef5731fSOliver Tappe       return count;
236*aef5731fSOliver Tappe     }
237*aef5731fSOliver Tappe   }
238*aef5731fSOliver Tappe 
239*aef5731fSOliver Tappe   ret = cns11643_wctomb(conv,buf,wc,3);
240*aef5731fSOliver Tappe   if (ret != RET_ILUNI) {
241*aef5731fSOliver Tappe     if (ret != 3) abort();
242*aef5731fSOliver Tappe 
243*aef5731fSOliver Tappe     /* Try CNS 11643-1992 Plane 1. */
244*aef5731fSOliver Tappe     if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
245*aef5731fSOliver Tappe       int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
246*aef5731fSOliver Tappe       if (n < count)
247*aef5731fSOliver Tappe         return RET_TOOSMALL;
248*aef5731fSOliver Tappe       if (state2 != STATE2_DESIGNATED_CNS11643_1) {
249*aef5731fSOliver Tappe         r[0] = ESC;
250*aef5731fSOliver Tappe         r[1] = '$';
251*aef5731fSOliver Tappe         r[2] = ')';
252*aef5731fSOliver Tappe         r[3] = 'G';
253*aef5731fSOliver Tappe         r += 4;
254*aef5731fSOliver Tappe         state2 = STATE2_DESIGNATED_CNS11643_1;
255*aef5731fSOliver Tappe       }
256*aef5731fSOliver Tappe       if (state1 != STATE_TWOBYTE) {
257*aef5731fSOliver Tappe         r[0] = SO;
258*aef5731fSOliver Tappe         r += 1;
259*aef5731fSOliver Tappe         state1 = STATE_TWOBYTE;
260*aef5731fSOliver Tappe       }
261*aef5731fSOliver Tappe       r[0] = buf[1];
262*aef5731fSOliver Tappe       r[1] = buf[2];
263*aef5731fSOliver Tappe       COMBINE_STATE;
264*aef5731fSOliver Tappe       conv->ostate = state;
265*aef5731fSOliver Tappe       return count;
266*aef5731fSOliver Tappe     }
267*aef5731fSOliver Tappe 
268*aef5731fSOliver Tappe     /* Try CNS 11643-1992 Plane 2. */
269*aef5731fSOliver Tappe     if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
270*aef5731fSOliver Tappe       int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
271*aef5731fSOliver Tappe       if (n < count)
272*aef5731fSOliver Tappe         return RET_TOOSMALL;
273*aef5731fSOliver Tappe       if (state3 != STATE3_DESIGNATED_CNS11643_2) {
274*aef5731fSOliver Tappe         r[0] = ESC;
275*aef5731fSOliver Tappe         r[1] = '$';
276*aef5731fSOliver Tappe         r[2] = '*';
277*aef5731fSOliver Tappe         r[3] = 'H';
278*aef5731fSOliver Tappe         r += 4;
279*aef5731fSOliver Tappe         state3 = STATE3_DESIGNATED_CNS11643_2;
280*aef5731fSOliver Tappe       }
281*aef5731fSOliver Tappe       r[0] = ESC;
282*aef5731fSOliver Tappe       r[1] = 'N';
283*aef5731fSOliver Tappe       r[2] = buf[1];
284*aef5731fSOliver Tappe       r[3] = buf[2];
285*aef5731fSOliver Tappe       COMBINE_STATE;
286*aef5731fSOliver Tappe       conv->ostate = state;
287*aef5731fSOliver Tappe       return count;
288*aef5731fSOliver Tappe     }
289*aef5731fSOliver Tappe   }
290*aef5731fSOliver Tappe 
291*aef5731fSOliver Tappe   return RET_ILUNI;
292*aef5731fSOliver Tappe }
293*aef5731fSOliver Tappe 
294*aef5731fSOliver Tappe static int
iso2022_cn_reset(conv_t conv,unsigned char * r,int n)295*aef5731fSOliver Tappe iso2022_cn_reset (conv_t conv, unsigned char *r, int n)
296*aef5731fSOliver Tappe {
297*aef5731fSOliver Tappe   state_t state = conv->ostate;
298*aef5731fSOliver Tappe   SPLIT_STATE;
299*aef5731fSOliver Tappe   (void)state2;
300*aef5731fSOliver Tappe   (void)state3;
301*aef5731fSOliver Tappe   if (state1 != STATE_ASCII) {
302*aef5731fSOliver Tappe     if (n < 1)
303*aef5731fSOliver Tappe       return RET_TOOSMALL;
304*aef5731fSOliver Tappe     r[0] = SI;
305*aef5731fSOliver Tappe     /* conv->ostate = 0; will be done by the caller */
306*aef5731fSOliver Tappe     return 1;
307*aef5731fSOliver Tappe   } else
308*aef5731fSOliver Tappe     return 0;
309*aef5731fSOliver Tappe }
310*aef5731fSOliver Tappe 
311*aef5731fSOliver Tappe #undef COMBINE_STATE
312*aef5731fSOliver Tappe #undef SPLIT_STATE
313*aef5731fSOliver Tappe #undef STATE3_DESIGNATED_CNS11643_2
314*aef5731fSOliver Tappe #undef STATE3_NONE
315*aef5731fSOliver Tappe #undef STATE2_DESIGNATED_CNS11643_1
316*aef5731fSOliver Tappe #undef STATE2_DESIGNATED_GB2312
317*aef5731fSOliver Tappe #undef STATE2_NONE
318*aef5731fSOliver Tappe #undef STATE_TWOBYTE
319*aef5731fSOliver Tappe #undef STATE_ASCII
320