1*aef5731fSOliver Tappe /*
2*aef5731fSOliver Tappe * Copyright (C) 1999-2001 Free Software Foundation, Inc.
3*aef5731fSOliver Tappe * This file is part of the GNU LIBICONV Library.
4*aef5731fSOliver Tappe *
5*aef5731fSOliver Tappe * The GNU LIBICONV Library is free software; you can redistribute it
6*aef5731fSOliver Tappe * and/or modify it under the terms of the GNU Library General Public
7*aef5731fSOliver Tappe * License as published by the Free Software Foundation; either version 2
8*aef5731fSOliver Tappe * of the License, or (at your option) any later version.
9*aef5731fSOliver Tappe *
10*aef5731fSOliver Tappe * The GNU LIBICONV Library is distributed in the hope that it will be
11*aef5731fSOliver Tappe * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12*aef5731fSOliver Tappe * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13*aef5731fSOliver Tappe * Library General Public License for more details.
14*aef5731fSOliver Tappe *
15*aef5731fSOliver Tappe * You should have received a copy of the GNU Library General Public
16*aef5731fSOliver Tappe * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17*aef5731fSOliver Tappe * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18*aef5731fSOliver Tappe * Fifth Floor, Boston, MA 02110-1301, USA.
19*aef5731fSOliver Tappe */
20*aef5731fSOliver Tappe
21*aef5731fSOliver Tappe /*
22*aef5731fSOliver Tappe * ISO-2022-CN
23*aef5731fSOliver Tappe */
24*aef5731fSOliver Tappe
25*aef5731fSOliver Tappe /* Specification: RFC 1922 */
26*aef5731fSOliver Tappe
27*aef5731fSOliver Tappe #define ESC 0x1b
28*aef5731fSOliver Tappe #define SO 0x0e
29*aef5731fSOliver Tappe #define SI 0x0f
30*aef5731fSOliver Tappe
31*aef5731fSOliver Tappe /*
32*aef5731fSOliver Tappe * The state is composed of one of the following values
33*aef5731fSOliver Tappe */
34*aef5731fSOliver Tappe #define STATE_ASCII 0
35*aef5731fSOliver Tappe #define STATE_TWOBYTE 1
36*aef5731fSOliver Tappe /*
37*aef5731fSOliver Tappe * and one of the following values, << 8
38*aef5731fSOliver Tappe */
39*aef5731fSOliver Tappe #define STATE2_NONE 0
40*aef5731fSOliver Tappe #define STATE2_DESIGNATED_GB2312 1
41*aef5731fSOliver Tappe #define STATE2_DESIGNATED_CNS11643_1 2
42*aef5731fSOliver Tappe /*
43*aef5731fSOliver Tappe * and one of the following values, << 16
44*aef5731fSOliver Tappe */
45*aef5731fSOliver Tappe #define STATE3_NONE 0
46*aef5731fSOliver Tappe #define STATE3_DESIGNATED_CNS11643_2 1
47*aef5731fSOliver Tappe
48*aef5731fSOliver Tappe #define SPLIT_STATE \
49*aef5731fSOliver Tappe unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = state >> 16
50*aef5731fSOliver Tappe #define COMBINE_STATE \
51*aef5731fSOliver Tappe state = (state3 << 16) | (state2 << 8) | state1
52*aef5731fSOliver Tappe
53*aef5731fSOliver Tappe static int
iso2022_cn_mbtowc(conv_t conv,ucs4_t * pwc,const unsigned char * s,int n)54*aef5731fSOliver Tappe iso2022_cn_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
55*aef5731fSOliver Tappe {
56*aef5731fSOliver Tappe state_t state = conv->istate;
57*aef5731fSOliver Tappe SPLIT_STATE;
58*aef5731fSOliver Tappe int count = 0;
59*aef5731fSOliver Tappe unsigned char c;
60*aef5731fSOliver Tappe for (;;) {
61*aef5731fSOliver Tappe c = *s;
62*aef5731fSOliver Tappe if (c == ESC) {
63*aef5731fSOliver Tappe if (n < count+4)
64*aef5731fSOliver Tappe goto none;
65*aef5731fSOliver Tappe if (s[1] == '$') {
66*aef5731fSOliver Tappe if (s[2] == ')') {
67*aef5731fSOliver Tappe if (s[3] == 'A') {
68*aef5731fSOliver Tappe state2 = STATE2_DESIGNATED_GB2312;
69*aef5731fSOliver Tappe s += 4; count += 4;
70*aef5731fSOliver Tappe if (n < count+1)
71*aef5731fSOliver Tappe goto none;
72*aef5731fSOliver Tappe continue;
73*aef5731fSOliver Tappe }
74*aef5731fSOliver Tappe if (s[3] == 'G') {
75*aef5731fSOliver Tappe state2 = STATE2_DESIGNATED_CNS11643_1;
76*aef5731fSOliver Tappe s += 4; count += 4;
77*aef5731fSOliver Tappe if (n < count+1)
78*aef5731fSOliver Tappe goto none;
79*aef5731fSOliver Tappe continue;
80*aef5731fSOliver Tappe }
81*aef5731fSOliver Tappe }
82*aef5731fSOliver Tappe if (s[2] == '*') {
83*aef5731fSOliver Tappe if (s[3] == 'H') {
84*aef5731fSOliver Tappe state3 = STATE3_DESIGNATED_CNS11643_2;
85*aef5731fSOliver Tappe s += 4; count += 4;
86*aef5731fSOliver Tappe if (n < count+1)
87*aef5731fSOliver Tappe goto none;
88*aef5731fSOliver Tappe continue;
89*aef5731fSOliver Tappe }
90*aef5731fSOliver Tappe }
91*aef5731fSOliver Tappe }
92*aef5731fSOliver Tappe if (s[1] == 'N') {
93*aef5731fSOliver Tappe switch (state3) {
94*aef5731fSOliver Tappe case STATE3_NONE:
95*aef5731fSOliver Tappe return RET_ILSEQ;
96*aef5731fSOliver Tappe case STATE3_DESIGNATED_CNS11643_2:
97*aef5731fSOliver Tappe if (s[2] < 0x80 && s[3] < 0x80) {
98*aef5731fSOliver Tappe int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
99*aef5731fSOliver Tappe if (ret == RET_ILSEQ)
100*aef5731fSOliver Tappe return RET_ILSEQ;
101*aef5731fSOliver Tappe if (ret != 2) abort();
102*aef5731fSOliver Tappe COMBINE_STATE;
103*aef5731fSOliver Tappe conv->istate = state;
104*aef5731fSOliver Tappe return count+4;
105*aef5731fSOliver Tappe } else
106*aef5731fSOliver Tappe return RET_ILSEQ;
107*aef5731fSOliver Tappe default: abort();
108*aef5731fSOliver Tappe }
109*aef5731fSOliver Tappe }
110*aef5731fSOliver Tappe return RET_ILSEQ;
111*aef5731fSOliver Tappe }
112*aef5731fSOliver Tappe if (c == SO) {
113*aef5731fSOliver Tappe if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1)
114*aef5731fSOliver Tappe return RET_ILSEQ;
115*aef5731fSOliver Tappe state1 = STATE_TWOBYTE;
116*aef5731fSOliver Tappe s++; count++;
117*aef5731fSOliver Tappe if (n < count+1)
118*aef5731fSOliver Tappe goto none;
119*aef5731fSOliver Tappe continue;
120*aef5731fSOliver Tappe }
121*aef5731fSOliver Tappe if (c == SI) {
122*aef5731fSOliver Tappe state1 = STATE_ASCII;
123*aef5731fSOliver Tappe s++; count++;
124*aef5731fSOliver Tappe if (n < count+1)
125*aef5731fSOliver Tappe goto none;
126*aef5731fSOliver Tappe continue;
127*aef5731fSOliver Tappe }
128*aef5731fSOliver Tappe break;
129*aef5731fSOliver Tappe }
130*aef5731fSOliver Tappe switch (state1) {
131*aef5731fSOliver Tappe case STATE_ASCII:
132*aef5731fSOliver Tappe if (c < 0x80) {
133*aef5731fSOliver Tappe int ret = ascii_mbtowc(conv,pwc,s,1);
134*aef5731fSOliver Tappe if (ret == RET_ILSEQ)
135*aef5731fSOliver Tappe return RET_ILSEQ;
136*aef5731fSOliver Tappe if (ret != 1) abort();
137*aef5731fSOliver Tappe if (*pwc == 0x000a || *pwc == 0x000d) {
138*aef5731fSOliver Tappe state2 = STATE2_NONE; state3 = STATE3_NONE;
139*aef5731fSOliver Tappe }
140*aef5731fSOliver Tappe COMBINE_STATE;
141*aef5731fSOliver Tappe conv->istate = state;
142*aef5731fSOliver Tappe return count+1;
143*aef5731fSOliver Tappe } else
144*aef5731fSOliver Tappe return RET_ILSEQ;
145*aef5731fSOliver Tappe case STATE_TWOBYTE:
146*aef5731fSOliver Tappe if (n < count+2)
147*aef5731fSOliver Tappe goto none;
148*aef5731fSOliver Tappe if (s[0] < 0x80 && s[1] < 0x80) {
149*aef5731fSOliver Tappe int ret;
150*aef5731fSOliver Tappe switch (state2) {
151*aef5731fSOliver Tappe case STATE2_NONE:
152*aef5731fSOliver Tappe return RET_ILSEQ;
153*aef5731fSOliver Tappe case STATE2_DESIGNATED_GB2312:
154*aef5731fSOliver Tappe ret = gb2312_mbtowc(conv,pwc,s,2); break;
155*aef5731fSOliver Tappe case STATE2_DESIGNATED_CNS11643_1:
156*aef5731fSOliver Tappe ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
157*aef5731fSOliver Tappe default: abort();
158*aef5731fSOliver Tappe }
159*aef5731fSOliver Tappe if (ret == RET_ILSEQ)
160*aef5731fSOliver Tappe return RET_ILSEQ;
161*aef5731fSOliver Tappe if (ret != 2) abort();
162*aef5731fSOliver Tappe COMBINE_STATE;
163*aef5731fSOliver Tappe conv->istate = state;
164*aef5731fSOliver Tappe return count+2;
165*aef5731fSOliver Tappe } else
166*aef5731fSOliver Tappe return RET_ILSEQ;
167*aef5731fSOliver Tappe default: abort();
168*aef5731fSOliver Tappe }
169*aef5731fSOliver Tappe
170*aef5731fSOliver Tappe none:
171*aef5731fSOliver Tappe COMBINE_STATE;
172*aef5731fSOliver Tappe conv->istate = state;
173*aef5731fSOliver Tappe return RET_TOOFEW(count);
174*aef5731fSOliver Tappe }
175*aef5731fSOliver Tappe
176*aef5731fSOliver Tappe static int
iso2022_cn_wctomb(conv_t conv,unsigned char * r,ucs4_t wc,int n)177*aef5731fSOliver Tappe iso2022_cn_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
178*aef5731fSOliver Tappe {
179*aef5731fSOliver Tappe state_t state = conv->ostate;
180*aef5731fSOliver Tappe SPLIT_STATE;
181*aef5731fSOliver Tappe unsigned char buf[3];
182*aef5731fSOliver Tappe int ret;
183*aef5731fSOliver Tappe
184*aef5731fSOliver Tappe /* There is no need to handle Unicode 3.1 tag characters and to look for
185*aef5731fSOliver Tappe "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
186*aef5731fSOliver Tappe
187*aef5731fSOliver Tappe /* Try ASCII. */
188*aef5731fSOliver Tappe ret = ascii_wctomb(conv,buf,wc,1);
189*aef5731fSOliver Tappe if (ret != RET_ILUNI) {
190*aef5731fSOliver Tappe if (ret != 1) abort();
191*aef5731fSOliver Tappe if (buf[0] < 0x80) {
192*aef5731fSOliver Tappe int count = (state1 == STATE_ASCII ? 1 : 2);
193*aef5731fSOliver Tappe if (n < count)
194*aef5731fSOliver Tappe return RET_TOOSMALL;
195*aef5731fSOliver Tappe if (state1 != STATE_ASCII) {
196*aef5731fSOliver Tappe r[0] = SI;
197*aef5731fSOliver Tappe r += 1;
198*aef5731fSOliver Tappe state1 = STATE_ASCII;
199*aef5731fSOliver Tappe }
200*aef5731fSOliver Tappe r[0] = buf[0];
201*aef5731fSOliver Tappe if (wc == 0x000a || wc == 0x000d) {
202*aef5731fSOliver Tappe state2 = STATE2_NONE; state3 = STATE3_NONE;
203*aef5731fSOliver Tappe }
204*aef5731fSOliver Tappe COMBINE_STATE;
205*aef5731fSOliver Tappe conv->ostate = state;
206*aef5731fSOliver Tappe return count;
207*aef5731fSOliver Tappe }
208*aef5731fSOliver Tappe }
209*aef5731fSOliver Tappe
210*aef5731fSOliver Tappe /* Try GB 2312-1980. */
211*aef5731fSOliver Tappe ret = gb2312_wctomb(conv,buf,wc,2);
212*aef5731fSOliver Tappe if (ret != RET_ILUNI) {
213*aef5731fSOliver Tappe if (ret != 2) abort();
214*aef5731fSOliver Tappe if (buf[0] < 0x80 && buf[1] < 0x80) {
215*aef5731fSOliver Tappe int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
216*aef5731fSOliver Tappe if (n < count)
217*aef5731fSOliver Tappe return RET_TOOSMALL;
218*aef5731fSOliver Tappe if (state2 != STATE2_DESIGNATED_GB2312) {
219*aef5731fSOliver Tappe r[0] = ESC;
220*aef5731fSOliver Tappe r[1] = '$';
221*aef5731fSOliver Tappe r[2] = ')';
222*aef5731fSOliver Tappe r[3] = 'A';
223*aef5731fSOliver Tappe r += 4;
224*aef5731fSOliver Tappe state2 = STATE2_DESIGNATED_GB2312;
225*aef5731fSOliver Tappe }
226*aef5731fSOliver Tappe if (state1 != STATE_TWOBYTE) {
227*aef5731fSOliver Tappe r[0] = SO;
228*aef5731fSOliver Tappe r += 1;
229*aef5731fSOliver Tappe state1 = STATE_TWOBYTE;
230*aef5731fSOliver Tappe }
231*aef5731fSOliver Tappe r[0] = buf[0];
232*aef5731fSOliver Tappe r[1] = buf[1];
233*aef5731fSOliver Tappe COMBINE_STATE;
234*aef5731fSOliver Tappe conv->ostate = state;
235*aef5731fSOliver Tappe return count;
236*aef5731fSOliver Tappe }
237*aef5731fSOliver Tappe }
238*aef5731fSOliver Tappe
239*aef5731fSOliver Tappe ret = cns11643_wctomb(conv,buf,wc,3);
240*aef5731fSOliver Tappe if (ret != RET_ILUNI) {
241*aef5731fSOliver Tappe if (ret != 3) abort();
242*aef5731fSOliver Tappe
243*aef5731fSOliver Tappe /* Try CNS 11643-1992 Plane 1. */
244*aef5731fSOliver Tappe if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
245*aef5731fSOliver Tappe int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
246*aef5731fSOliver Tappe if (n < count)
247*aef5731fSOliver Tappe return RET_TOOSMALL;
248*aef5731fSOliver Tappe if (state2 != STATE2_DESIGNATED_CNS11643_1) {
249*aef5731fSOliver Tappe r[0] = ESC;
250*aef5731fSOliver Tappe r[1] = '$';
251*aef5731fSOliver Tappe r[2] = ')';
252*aef5731fSOliver Tappe r[3] = 'G';
253*aef5731fSOliver Tappe r += 4;
254*aef5731fSOliver Tappe state2 = STATE2_DESIGNATED_CNS11643_1;
255*aef5731fSOliver Tappe }
256*aef5731fSOliver Tappe if (state1 != STATE_TWOBYTE) {
257*aef5731fSOliver Tappe r[0] = SO;
258*aef5731fSOliver Tappe r += 1;
259*aef5731fSOliver Tappe state1 = STATE_TWOBYTE;
260*aef5731fSOliver Tappe }
261*aef5731fSOliver Tappe r[0] = buf[1];
262*aef5731fSOliver Tappe r[1] = buf[2];
263*aef5731fSOliver Tappe COMBINE_STATE;
264*aef5731fSOliver Tappe conv->ostate = state;
265*aef5731fSOliver Tappe return count;
266*aef5731fSOliver Tappe }
267*aef5731fSOliver Tappe
268*aef5731fSOliver Tappe /* Try CNS 11643-1992 Plane 2. */
269*aef5731fSOliver Tappe if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
270*aef5731fSOliver Tappe int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
271*aef5731fSOliver Tappe if (n < count)
272*aef5731fSOliver Tappe return RET_TOOSMALL;
273*aef5731fSOliver Tappe if (state3 != STATE3_DESIGNATED_CNS11643_2) {
274*aef5731fSOliver Tappe r[0] = ESC;
275*aef5731fSOliver Tappe r[1] = '$';
276*aef5731fSOliver Tappe r[2] = '*';
277*aef5731fSOliver Tappe r[3] = 'H';
278*aef5731fSOliver Tappe r += 4;
279*aef5731fSOliver Tappe state3 = STATE3_DESIGNATED_CNS11643_2;
280*aef5731fSOliver Tappe }
281*aef5731fSOliver Tappe r[0] = ESC;
282*aef5731fSOliver Tappe r[1] = 'N';
283*aef5731fSOliver Tappe r[2] = buf[1];
284*aef5731fSOliver Tappe r[3] = buf[2];
285*aef5731fSOliver Tappe COMBINE_STATE;
286*aef5731fSOliver Tappe conv->ostate = state;
287*aef5731fSOliver Tappe return count;
288*aef5731fSOliver Tappe }
289*aef5731fSOliver Tappe }
290*aef5731fSOliver Tappe
291*aef5731fSOliver Tappe return RET_ILUNI;
292*aef5731fSOliver Tappe }
293*aef5731fSOliver Tappe
294*aef5731fSOliver Tappe static int
iso2022_cn_reset(conv_t conv,unsigned char * r,int n)295*aef5731fSOliver Tappe iso2022_cn_reset (conv_t conv, unsigned char *r, int n)
296*aef5731fSOliver Tappe {
297*aef5731fSOliver Tappe state_t state = conv->ostate;
298*aef5731fSOliver Tappe SPLIT_STATE;
299*aef5731fSOliver Tappe (void)state2;
300*aef5731fSOliver Tappe (void)state3;
301*aef5731fSOliver Tappe if (state1 != STATE_ASCII) {
302*aef5731fSOliver Tappe if (n < 1)
303*aef5731fSOliver Tappe return RET_TOOSMALL;
304*aef5731fSOliver Tappe r[0] = SI;
305*aef5731fSOliver Tappe /* conv->ostate = 0; will be done by the caller */
306*aef5731fSOliver Tappe return 1;
307*aef5731fSOliver Tappe } else
308*aef5731fSOliver Tappe return 0;
309*aef5731fSOliver Tappe }
310*aef5731fSOliver Tappe
311*aef5731fSOliver Tappe #undef COMBINE_STATE
312*aef5731fSOliver Tappe #undef SPLIT_STATE
313*aef5731fSOliver Tappe #undef STATE3_DESIGNATED_CNS11643_2
314*aef5731fSOliver Tappe #undef STATE3_NONE
315*aef5731fSOliver Tappe #undef STATE2_DESIGNATED_CNS11643_1
316*aef5731fSOliver Tappe #undef STATE2_DESIGNATED_GB2312
317*aef5731fSOliver Tappe #undef STATE2_NONE
318*aef5731fSOliver Tappe #undef STATE_TWOBYTE
319*aef5731fSOliver Tappe #undef STATE_ASCII
320