xref: /haiku/src/libs/iconv/iso2022_cn.h (revision 68ea01249e1e2088933cb12f9c28d4e5c5d1c9ef)
1 /*
2  * Copyright (C) 1999-2001 Free Software Foundation, Inc.
3  * This file is part of the GNU LIBICONV Library.
4  *
5  * The GNU LIBICONV Library is free software; you can redistribute it
6  * and/or modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * The GNU LIBICONV Library is distributed in the hope that it will be
11  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public
16  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18  * Fifth Floor, Boston, MA 02110-1301, USA.
19  */
20 
21 /*
22  * ISO-2022-CN
23  */
24 
25 /* Specification: RFC 1922 */
26 
27 #define ESC 0x1b
28 #define SO  0x0e
29 #define SI  0x0f
30 
31 /*
32  * The state is composed of one of the following values
33  */
34 #define STATE_ASCII          0
35 #define STATE_TWOBYTE        1
36 /*
37  * and one of the following values, << 8
38  */
39 #define STATE2_NONE                   0
40 #define STATE2_DESIGNATED_GB2312      1
41 #define STATE2_DESIGNATED_CNS11643_1  2
42 /*
43  * and one of the following values, << 16
44  */
45 #define STATE3_NONE                   0
46 #define STATE3_DESIGNATED_CNS11643_2  1
47 
48 #define SPLIT_STATE \
49   unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = state >> 16
50 #define COMBINE_STATE \
51   state = (state3 << 16) | (state2 << 8) | state1
52 
53 static int
54 iso2022_cn_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
55 {
56   state_t state = conv->istate;
57   SPLIT_STATE;
58   int count = 0;
59   unsigned char c;
60   for (;;) {
61     c = *s;
62     if (c == ESC) {
63       if (n < count+4)
64         goto none;
65       if (s[1] == '$') {
66         if (s[2] == ')') {
67           if (s[3] == 'A') {
68             state2 = STATE2_DESIGNATED_GB2312;
69             s += 4; count += 4;
70             if (n < count+1)
71               goto none;
72             continue;
73           }
74           if (s[3] == 'G') {
75             state2 = STATE2_DESIGNATED_CNS11643_1;
76             s += 4; count += 4;
77             if (n < count+1)
78               goto none;
79             continue;
80           }
81         }
82         if (s[2] == '*') {
83           if (s[3] == 'H') {
84             state3 = STATE3_DESIGNATED_CNS11643_2;
85             s += 4; count += 4;
86             if (n < count+1)
87               goto none;
88             continue;
89           }
90         }
91       }
92       if (s[1] == 'N') {
93         switch (state3) {
94           case STATE3_NONE:
95             return RET_ILSEQ;
96           case STATE3_DESIGNATED_CNS11643_2:
97             if (s[2] < 0x80 && s[3] < 0x80) {
98               int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
99               if (ret == RET_ILSEQ)
100                 return RET_ILSEQ;
101               if (ret != 2) abort();
102               COMBINE_STATE;
103               conv->istate = state;
104               return count+4;
105             } else
106               return RET_ILSEQ;
107           default: abort();
108         }
109       }
110       return RET_ILSEQ;
111     }
112     if (c == SO) {
113       if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1)
114         return RET_ILSEQ;
115       state1 = STATE_TWOBYTE;
116       s++; count++;
117       if (n < count+1)
118         goto none;
119       continue;
120     }
121     if (c == SI) {
122       state1 = STATE_ASCII;
123       s++; count++;
124       if (n < count+1)
125         goto none;
126       continue;
127     }
128     break;
129   }
130   switch (state1) {
131     case STATE_ASCII:
132       if (c < 0x80) {
133         int ret = ascii_mbtowc(conv,pwc,s,1);
134         if (ret == RET_ILSEQ)
135           return RET_ILSEQ;
136         if (ret != 1) abort();
137         if (*pwc == 0x000a || *pwc == 0x000d) {
138           state2 = STATE2_NONE; state3 = STATE3_NONE;
139         }
140         COMBINE_STATE;
141         conv->istate = state;
142         return count+1;
143       } else
144         return RET_ILSEQ;
145     case STATE_TWOBYTE:
146       if (n < count+2)
147         goto none;
148       if (s[0] < 0x80 && s[1] < 0x80) {
149         int ret;
150         switch (state2) {
151           case STATE2_NONE:
152             return RET_ILSEQ;
153           case STATE2_DESIGNATED_GB2312:
154             ret = gb2312_mbtowc(conv,pwc,s,2); break;
155           case STATE2_DESIGNATED_CNS11643_1:
156             ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
157           default: abort();
158         }
159         if (ret == RET_ILSEQ)
160           return RET_ILSEQ;
161         if (ret != 2) abort();
162         COMBINE_STATE;
163         conv->istate = state;
164         return count+2;
165       } else
166         return RET_ILSEQ;
167     default: abort();
168   }
169 
170 none:
171   COMBINE_STATE;
172   conv->istate = state;
173   return RET_TOOFEW(count);
174 }
175 
176 static int
177 iso2022_cn_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
178 {
179   state_t state = conv->ostate;
180   SPLIT_STATE;
181   unsigned char buf[3];
182   int ret;
183 
184   /* There is no need to handle Unicode 3.1 tag characters and to look for
185      "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
186 
187   /* Try ASCII. */
188   ret = ascii_wctomb(conv,buf,wc,1);
189   if (ret != RET_ILUNI) {
190     if (ret != 1) abort();
191     if (buf[0] < 0x80) {
192       int count = (state1 == STATE_ASCII ? 1 : 2);
193       if (n < count)
194         return RET_TOOSMALL;
195       if (state1 != STATE_ASCII) {
196         r[0] = SI;
197         r += 1;
198         state1 = STATE_ASCII;
199       }
200       r[0] = buf[0];
201       if (wc == 0x000a || wc == 0x000d) {
202         state2 = STATE2_NONE; state3 = STATE3_NONE;
203       }
204       COMBINE_STATE;
205       conv->ostate = state;
206       return count;
207     }
208   }
209 
210   /* Try GB 2312-1980. */
211   ret = gb2312_wctomb(conv,buf,wc,2);
212   if (ret != RET_ILUNI) {
213     if (ret != 2) abort();
214     if (buf[0] < 0x80 && buf[1] < 0x80) {
215       int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
216       if (n < count)
217         return RET_TOOSMALL;
218       if (state2 != STATE2_DESIGNATED_GB2312) {
219         r[0] = ESC;
220         r[1] = '$';
221         r[2] = ')';
222         r[3] = 'A';
223         r += 4;
224         state2 = STATE2_DESIGNATED_GB2312;
225       }
226       if (state1 != STATE_TWOBYTE) {
227         r[0] = SO;
228         r += 1;
229         state1 = STATE_TWOBYTE;
230       }
231       r[0] = buf[0];
232       r[1] = buf[1];
233       COMBINE_STATE;
234       conv->ostate = state;
235       return count;
236     }
237   }
238 
239   ret = cns11643_wctomb(conv,buf,wc,3);
240   if (ret != RET_ILUNI) {
241     if (ret != 3) abort();
242 
243     /* Try CNS 11643-1992 Plane 1. */
244     if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
245       int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
246       if (n < count)
247         return RET_TOOSMALL;
248       if (state2 != STATE2_DESIGNATED_CNS11643_1) {
249         r[0] = ESC;
250         r[1] = '$';
251         r[2] = ')';
252         r[3] = 'G';
253         r += 4;
254         state2 = STATE2_DESIGNATED_CNS11643_1;
255       }
256       if (state1 != STATE_TWOBYTE) {
257         r[0] = SO;
258         r += 1;
259         state1 = STATE_TWOBYTE;
260       }
261       r[0] = buf[1];
262       r[1] = buf[2];
263       COMBINE_STATE;
264       conv->ostate = state;
265       return count;
266     }
267 
268     /* Try CNS 11643-1992 Plane 2. */
269     if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
270       int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
271       if (n < count)
272         return RET_TOOSMALL;
273       if (state3 != STATE3_DESIGNATED_CNS11643_2) {
274         r[0] = ESC;
275         r[1] = '$';
276         r[2] = '*';
277         r[3] = 'H';
278         r += 4;
279         state3 = STATE3_DESIGNATED_CNS11643_2;
280       }
281       r[0] = ESC;
282       r[1] = 'N';
283       r[2] = buf[1];
284       r[3] = buf[2];
285       COMBINE_STATE;
286       conv->ostate = state;
287       return count;
288     }
289   }
290 
291   return RET_ILUNI;
292 }
293 
294 static int
295 iso2022_cn_reset (conv_t conv, unsigned char *r, int n)
296 {
297   state_t state = conv->ostate;
298   SPLIT_STATE;
299   (void)state2;
300   (void)state3;
301   if (state1 != STATE_ASCII) {
302     if (n < 1)
303       return RET_TOOSMALL;
304     r[0] = SI;
305     /* conv->ostate = 0; will be done by the caller */
306     return 1;
307   } else
308     return 0;
309 }
310 
311 #undef COMBINE_STATE
312 #undef SPLIT_STATE
313 #undef STATE3_DESIGNATED_CNS11643_2
314 #undef STATE3_NONE
315 #undef STATE2_DESIGNATED_CNS11643_1
316 #undef STATE2_DESIGNATED_GB2312
317 #undef STATE2_NONE
318 #undef STATE_TWOBYTE
319 #undef STATE_ASCII
320