1 /*
2 * Copyright (C) 1999-2001 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20
21 /*
22 * ISO-2022-CN-EXT
23 */
24
25 /* Specification: RFC 1922 */
26
27 #define ESC 0x1b
28 #define SO 0x0e
29 #define SI 0x0f
30
31 /*
32 * The state is composed of one of the following values
33 */
34 #define STATE_ASCII 0
35 #define STATE_TWOBYTE 1
36 /*
37 * and one of the following values, << 8
38 */
39 #define STATE2_NONE 0
40 #define STATE2_DESIGNATED_GB2312 1
41 #define STATE2_DESIGNATED_CNS11643_1 2
42 #define STATE2_DESIGNATED_ISO_IR_165 3
43 /*
44 * and one of the following values, << 16
45 */
46 #define STATE3_NONE 0
47 #define STATE3_DESIGNATED_CNS11643_2 1
48 /*
49 * and one of the following values, << 24
50 */
51 #define STATE4_NONE 0
52 #define STATE4_DESIGNATED_CNS11643_3 1
53 #define STATE4_DESIGNATED_CNS11643_4 2
54 #define STATE4_DESIGNATED_CNS11643_5 3
55 #define STATE4_DESIGNATED_CNS11643_6 4
56 #define STATE4_DESIGNATED_CNS11643_7 5
57
58 #define SPLIT_STATE \
59 unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = (state >> 16) & 0xff, state4 = state >> 24
60 #define COMBINE_STATE \
61 state = (state4 << 24) | (state3 << 16) | (state2 << 8) | state1
62
63 static int
iso2022_cn_ext_mbtowc(conv_t conv,ucs4_t * pwc,const unsigned char * s,int n)64 iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
65 {
66 state_t state = conv->istate;
67 SPLIT_STATE;
68 int count = 0;
69 unsigned char c;
70 for (;;) {
71 c = *s;
72 if (c == ESC) {
73 if (n < count+4)
74 goto none;
75 if (s[1] == '$') {
76 if (s[2] == ')') {
77 if (s[3] == 'A') {
78 state2 = STATE2_DESIGNATED_GB2312;
79 s += 4; count += 4;
80 if (n < count+1)
81 goto none;
82 continue;
83 }
84 if (s[3] == 'G') {
85 state2 = STATE2_DESIGNATED_CNS11643_1;
86 s += 4; count += 4;
87 if (n < count+1)
88 goto none;
89 continue;
90 }
91 if (s[3] == 'E') {
92 state2 = STATE2_DESIGNATED_ISO_IR_165;
93 s += 4; count += 4;
94 if (n < count+1)
95 goto none;
96 continue;
97 }
98 }
99 if (s[2] == '*') {
100 if (s[3] == 'H') {
101 state3 = STATE3_DESIGNATED_CNS11643_2;
102 s += 4; count += 4;
103 if (n < count+1)
104 goto none;
105 continue;
106 }
107 }
108 if (s[2] == '+') {
109 if (s[3] == 'I') {
110 state4 = STATE4_DESIGNATED_CNS11643_3;
111 s += 4; count += 4;
112 if (n < count+1)
113 goto none;
114 continue;
115 }
116 if (s[3] == 'J') {
117 state4 = STATE4_DESIGNATED_CNS11643_4;
118 s += 4; count += 4;
119 if (n < count+1)
120 goto none;
121 continue;
122 }
123 if (s[3] == 'K') {
124 state4 = STATE4_DESIGNATED_CNS11643_5;
125 s += 4; count += 4;
126 if (n < count+1)
127 goto none;
128 continue;
129 }
130 if (s[3] == 'L') {
131 state4 = STATE4_DESIGNATED_CNS11643_6;
132 s += 4; count += 4;
133 if (n < count+1)
134 goto none;
135 continue;
136 }
137 if (s[3] == 'M') {
138 state4 = STATE4_DESIGNATED_CNS11643_7;
139 s += 4; count += 4;
140 if (n < count+1)
141 goto none;
142 continue;
143 }
144 }
145 }
146 if (s[1] == 'N') {
147 switch (state3) {
148 case STATE3_NONE:
149 return RET_ILSEQ;
150 case STATE3_DESIGNATED_CNS11643_2:
151 if (s[2] < 0x80 && s[3] < 0x80) {
152 int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
153 if (ret == RET_ILSEQ)
154 return RET_ILSEQ;
155 if (ret != 2) abort();
156 COMBINE_STATE;
157 conv->istate = state;
158 return count+4;
159 } else
160 return RET_ILSEQ;
161 default: abort();
162 }
163 }
164 if (s[1] == 'O') {
165 switch (state4) {
166 case STATE4_NONE:
167 return RET_ILSEQ;
168 case STATE4_DESIGNATED_CNS11643_3:
169 if (s[2] < 0x80 && s[3] < 0x80) {
170 int ret = cns11643_3_mbtowc(conv,pwc,s+2,2);
171 if (ret == RET_ILSEQ)
172 return RET_ILSEQ;
173 if (ret != 2) abort();
174 COMBINE_STATE;
175 conv->istate = state;
176 return count+4;
177 } else
178 return RET_ILSEQ;
179 case STATE4_DESIGNATED_CNS11643_4:
180 if (s[2] < 0x80 && s[3] < 0x80) {
181 int ret = cns11643_4_mbtowc(conv,pwc,s+2,2);
182 if (ret == RET_ILSEQ)
183 return RET_ILSEQ;
184 if (ret != 2) abort();
185 COMBINE_STATE;
186 conv->istate = state;
187 return count+4;
188 } else
189 return RET_ILSEQ;
190 case STATE4_DESIGNATED_CNS11643_5:
191 if (s[2] < 0x80 && s[3] < 0x80) {
192 int ret = cns11643_5_mbtowc(conv,pwc,s+2,2);
193 if (ret == RET_ILSEQ)
194 return RET_ILSEQ;
195 if (ret != 2) abort();
196 COMBINE_STATE;
197 conv->istate = state;
198 return count+4;
199 } else
200 return RET_ILSEQ;
201 case STATE4_DESIGNATED_CNS11643_6:
202 if (s[2] < 0x80 && s[3] < 0x80) {
203 int ret = cns11643_6_mbtowc(conv,pwc,s+2,2);
204 if (ret == RET_ILSEQ)
205 return RET_ILSEQ;
206 if (ret != 2) abort();
207 COMBINE_STATE;
208 conv->istate = state;
209 return count+4;
210 } else
211 return RET_ILSEQ;
212 case STATE4_DESIGNATED_CNS11643_7:
213 if (s[2] < 0x80 && s[3] < 0x80) {
214 int ret = cns11643_7_mbtowc(conv,pwc,s+2,2);
215 if (ret == RET_ILSEQ)
216 return RET_ILSEQ;
217 if (ret != 2) abort();
218 COMBINE_STATE;
219 conv->istate = state;
220 return count+4;
221 } else
222 return RET_ILSEQ;
223 default: abort();
224 }
225 }
226 return RET_ILSEQ;
227 }
228 if (c == SO) {
229 if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1 && state2 != STATE2_DESIGNATED_ISO_IR_165)
230 return RET_ILSEQ;
231 state1 = STATE_TWOBYTE;
232 s++; count++;
233 if (n < count+1)
234 goto none;
235 continue;
236 }
237 if (c == SI) {
238 state1 = STATE_ASCII;
239 s++; count++;
240 if (n < count+1)
241 goto none;
242 continue;
243 }
244 break;
245 }
246 switch (state1) {
247 case STATE_ASCII:
248 if (c < 0x80) {
249 int ret = ascii_mbtowc(conv,pwc,s,1);
250 if (ret == RET_ILSEQ)
251 return RET_ILSEQ;
252 if (ret != 1) abort();
253 if (*pwc == 0x000a || *pwc == 0x000d) {
254 state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
255 }
256 COMBINE_STATE;
257 conv->istate = state;
258 return count+1;
259 } else
260 return RET_ILSEQ;
261 case STATE_TWOBYTE:
262 if (n < count+2)
263 goto none;
264 if (s[0] < 0x80 && s[1] < 0x80) {
265 int ret;
266 switch (state2) {
267 case STATE2_NONE:
268 return RET_ILSEQ;
269 case STATE2_DESIGNATED_GB2312:
270 ret = gb2312_mbtowc(conv,pwc,s,2); break;
271 case STATE2_DESIGNATED_CNS11643_1:
272 ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
273 case STATE2_DESIGNATED_ISO_IR_165:
274 ret = isoir165_mbtowc(conv,pwc,s,2); break;
275 default: abort();
276 }
277 if (ret == RET_ILSEQ)
278 return RET_ILSEQ;
279 if (ret != 2) abort();
280 COMBINE_STATE;
281 conv->istate = state;
282 return count+2;
283 } else
284 return RET_ILSEQ;
285 default: abort();
286 }
287
288 none:
289 COMBINE_STATE;
290 conv->istate = state;
291 return RET_TOOFEW(count);
292 }
293
294 static int
iso2022_cn_ext_wctomb(conv_t conv,unsigned char * r,ucs4_t wc,int n)295 iso2022_cn_ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
296 {
297 state_t state = conv->ostate;
298 SPLIT_STATE;
299 unsigned char buf[3];
300 int ret;
301
302 /* There is no need to handle Unicode 3.1 tag characters and to look for
303 "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
304
305 /* Try ASCII. */
306 ret = ascii_wctomb(conv,buf,wc,1);
307 if (ret != RET_ILUNI) {
308 if (ret != 1) abort();
309 if (buf[0] < 0x80) {
310 int count = (state1 == STATE_ASCII ? 1 : 2);
311 if (n < count)
312 return RET_TOOSMALL;
313 if (state1 != STATE_ASCII) {
314 r[0] = SI;
315 r += 1;
316 state1 = STATE_ASCII;
317 }
318 r[0] = buf[0];
319 if (wc == 0x000a || wc == 0x000d) {
320 state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
321 }
322 COMBINE_STATE;
323 conv->ostate = state;
324 return count;
325 }
326 }
327
328 /* Try GB 2312-1980. */
329 ret = gb2312_wctomb(conv,buf,wc,2);
330 if (ret != RET_ILUNI) {
331 if (ret != 2) abort();
332 if (buf[0] < 0x80 && buf[1] < 0x80) {
333 int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
334 if (n < count)
335 return RET_TOOSMALL;
336 if (state2 != STATE2_DESIGNATED_GB2312) {
337 r[0] = ESC;
338 r[1] = '$';
339 r[2] = ')';
340 r[3] = 'A';
341 r += 4;
342 state2 = STATE2_DESIGNATED_GB2312;
343 }
344 if (state1 != STATE_TWOBYTE) {
345 r[0] = SO;
346 r += 1;
347 state1 = STATE_TWOBYTE;
348 }
349 r[0] = buf[0];
350 r[1] = buf[1];
351 COMBINE_STATE;
352 conv->ostate = state;
353 return count;
354 }
355 }
356
357 ret = cns11643_wctomb(conv,buf,wc,3);
358 if (ret != RET_ILUNI) {
359 if (ret != 3) abort();
360
361 /* Try CNS 11643-1992 Plane 1. */
362 if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
363 int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
364 if (n < count)
365 return RET_TOOSMALL;
366 if (state2 != STATE2_DESIGNATED_CNS11643_1) {
367 r[0] = ESC;
368 r[1] = '$';
369 r[2] = ')';
370 r[3] = 'G';
371 r += 4;
372 state2 = STATE2_DESIGNATED_CNS11643_1;
373 }
374 if (state1 != STATE_TWOBYTE) {
375 r[0] = SO;
376 r += 1;
377 state1 = STATE_TWOBYTE;
378 }
379 r[0] = buf[1];
380 r[1] = buf[2];
381 COMBINE_STATE;
382 conv->ostate = state;
383 return count;
384 }
385
386 /* Try CNS 11643-1992 Plane 2. */
387 if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
388 int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
389 if (n < count)
390 return RET_TOOSMALL;
391 if (state3 != STATE3_DESIGNATED_CNS11643_2) {
392 r[0] = ESC;
393 r[1] = '$';
394 r[2] = '*';
395 r[3] = 'H';
396 r += 4;
397 state3 = STATE3_DESIGNATED_CNS11643_2;
398 }
399 r[0] = ESC;
400 r[1] = 'N';
401 r[2] = buf[1];
402 r[3] = buf[2];
403 COMBINE_STATE;
404 conv->ostate = state;
405 return count;
406 }
407
408 /* Try CNS 11643-1992 Plane 3. */
409 if (buf[0] == 3 && buf[1] < 0x80 && buf[2] < 0x80) {
410 int count = (state4 == STATE4_DESIGNATED_CNS11643_3 ? 0 : 4) + 4;
411 if (n < count)
412 return RET_TOOSMALL;
413 if (state4 != STATE4_DESIGNATED_CNS11643_3) {
414 r[0] = ESC;
415 r[1] = '$';
416 r[2] = '+';
417 r[3] = 'I';
418 r += 4;
419 state4 = STATE4_DESIGNATED_CNS11643_3;
420 }
421 r[0] = ESC;
422 r[1] = 'O';
423 r[2] = buf[1];
424 r[3] = buf[2];
425 COMBINE_STATE;
426 conv->ostate = state;
427 return count;
428 }
429
430 /* Try CNS 11643-1992 Plane 4. */
431 if (buf[0] == 4 && buf[1] < 0x80 && buf[2] < 0x80) {
432 int count = (state4 == STATE4_DESIGNATED_CNS11643_4 ? 0 : 4) + 4;
433 if (n < count)
434 return RET_TOOSMALL;
435 if (state4 != STATE4_DESIGNATED_CNS11643_4) {
436 r[0] = ESC;
437 r[1] = '$';
438 r[2] = '+';
439 r[3] = 'J';
440 r += 4;
441 state4 = STATE4_DESIGNATED_CNS11643_4;
442 }
443 r[0] = ESC;
444 r[1] = 'O';
445 r[2] = buf[1];
446 r[3] = buf[2];
447 COMBINE_STATE;
448 conv->ostate = state;
449 return count;
450 }
451
452 /* Try CNS 11643-1992 Plane 5. */
453 if (buf[0] == 5 && buf[1] < 0x80 && buf[2] < 0x80) {
454 int count = (state4 == STATE4_DESIGNATED_CNS11643_5 ? 0 : 4) + 4;
455 if (n < count)
456 return RET_TOOSMALL;
457 if (state4 != STATE4_DESIGNATED_CNS11643_5) {
458 r[0] = ESC;
459 r[1] = '$';
460 r[2] = '+';
461 r[3] = 'K';
462 r += 4;
463 state4 = STATE4_DESIGNATED_CNS11643_5;
464 }
465 r[0] = ESC;
466 r[1] = 'O';
467 r[2] = buf[1];
468 r[3] = buf[2];
469 COMBINE_STATE;
470 conv->ostate = state;
471 return count;
472 }
473
474 /* Try CNS 11643-1992 Plane 6. */
475 if (buf[0] == 6 && buf[1] < 0x80 && buf[2] < 0x80) {
476 int count = (state4 == STATE4_DESIGNATED_CNS11643_6 ? 0 : 4) + 4;
477 if (n < count)
478 return RET_TOOSMALL;
479 if (state4 != STATE4_DESIGNATED_CNS11643_6) {
480 r[0] = ESC;
481 r[1] = '$';
482 r[2] = '+';
483 r[3] = 'L';
484 r += 4;
485 state4 = STATE4_DESIGNATED_CNS11643_6;
486 }
487 r[0] = ESC;
488 r[1] = 'O';
489 r[2] = buf[1];
490 r[3] = buf[2];
491 COMBINE_STATE;
492 conv->ostate = state;
493 return count;
494 }
495
496 /* Try CNS 11643-1992 Plane 7. */
497 if (buf[0] == 7 && buf[1] < 0x80 && buf[2] < 0x80) {
498 int count = (state4 == STATE4_DESIGNATED_CNS11643_7 ? 0 : 4) + 4;
499 if (n < count)
500 return RET_TOOSMALL;
501 if (state4 != STATE4_DESIGNATED_CNS11643_7) {
502 r[0] = ESC;
503 r[1] = '$';
504 r[2] = '+';
505 r[3] = 'M';
506 r += 4;
507 state4 = STATE4_DESIGNATED_CNS11643_7;
508 }
509 r[0] = ESC;
510 r[1] = 'O';
511 r[2] = buf[1];
512 r[3] = buf[2];
513 COMBINE_STATE;
514 conv->ostate = state;
515 return count;
516 }
517
518 }
519
520 /* Try ISO-IR-165. */
521 ret = isoir165_wctomb(conv,buf,wc,2);
522 if (ret != RET_ILUNI) {
523 if (ret != 2) abort();
524 if (buf[0] < 0x80 && buf[1] < 0x80) {
525 int count = (state2 == STATE2_DESIGNATED_ISO_IR_165 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
526 if (n < count)
527 return RET_TOOSMALL;
528 if (state2 != STATE2_DESIGNATED_ISO_IR_165) {
529 r[0] = ESC;
530 r[1] = '$';
531 r[2] = ')';
532 r[3] = 'E';
533 r += 4;
534 state2 = STATE2_DESIGNATED_ISO_IR_165;
535 }
536 if (state1 != STATE_TWOBYTE) {
537 r[0] = SO;
538 r += 1;
539 state1 = STATE_TWOBYTE;
540 }
541 r[0] = buf[0];
542 r[1] = buf[1];
543 COMBINE_STATE;
544 conv->ostate = state;
545 return count;
546 }
547 }
548
549 return RET_ILUNI;
550 }
551
552 static int
iso2022_cn_ext_reset(conv_t conv,unsigned char * r,int n)553 iso2022_cn_ext_reset (conv_t conv, unsigned char *r, int n)
554 {
555 state_t state = conv->ostate;
556 SPLIT_STATE;
557 (void)state2;
558 (void)state3;
559 (void)state4;
560 if (state1 != STATE_ASCII) {
561 if (n < 1)
562 return RET_TOOSMALL;
563 r[0] = SI;
564 /* conv->ostate = 0; will be done by the caller */
565 return 1;
566 } else
567 return 0;
568 }
569
570 #undef COMBINE_STATE
571 #undef SPLIT_STATE
572 #undef STATE4_DESIGNATED_CNS11643_7
573 #undef STATE4_DESIGNATED_CNS11643_6
574 #undef STATE4_DESIGNATED_CNS11643_5
575 #undef STATE4_DESIGNATED_CNS11643_4
576 #undef STATE4_DESIGNATED_CNS11643_3
577 #undef STATE4_NONE
578 #undef STATE3_DESIGNATED_CNS11643_2
579 #undef STATE3_NONE
580 #undef STATE2_DESIGNATED_ISO_IR_165
581 #undef STATE2_DESIGNATED_CNS11643_1
582 #undef STATE2_DESIGNATED_GB2312
583 #undef STATE2_NONE
584 #undef STATE_TWOBYTE
585 #undef STATE_ASCII
586