xref: /haiku/src/libs/iconv/iso2022_cnext.h (revision 820dca4df6c7bf955c46e8f6521b9408f50b2900)
1 /*
2  * Copyright (C) 1999-2001 Free Software Foundation, Inc.
3  * This file is part of the GNU LIBICONV Library.
4  *
5  * The GNU LIBICONV Library is free software; you can redistribute it
6  * and/or modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * The GNU LIBICONV Library is distributed in the hope that it will be
11  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public
16  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18  * Fifth Floor, Boston, MA 02110-1301, USA.
19  */
20 
21 /*
22  * ISO-2022-CN-EXT
23  */
24 
25 /* Specification: RFC 1922 */
26 
27 #define ESC 0x1b
28 #define SO  0x0e
29 #define SI  0x0f
30 
31 /*
32  * The state is composed of one of the following values
33  */
34 #define STATE_ASCII          0
35 #define STATE_TWOBYTE        1
36 /*
37  * and one of the following values, << 8
38  */
39 #define STATE2_NONE                   0
40 #define STATE2_DESIGNATED_GB2312      1
41 #define STATE2_DESIGNATED_CNS11643_1  2
42 #define STATE2_DESIGNATED_ISO_IR_165  3
43 /*
44  * and one of the following values, << 16
45  */
46 #define STATE3_NONE                   0
47 #define STATE3_DESIGNATED_CNS11643_2  1
48 /*
49  * and one of the following values, << 24
50  */
51 #define STATE4_NONE                   0
52 #define STATE4_DESIGNATED_CNS11643_3  1
53 #define STATE4_DESIGNATED_CNS11643_4  2
54 #define STATE4_DESIGNATED_CNS11643_5  3
55 #define STATE4_DESIGNATED_CNS11643_6  4
56 #define STATE4_DESIGNATED_CNS11643_7  5
57 
58 #define SPLIT_STATE \
59   unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = (state >> 16) & 0xff, state4 = state >> 24
60 #define COMBINE_STATE \
61   state = (state4 << 24) | (state3 << 16) | (state2 << 8) | state1
62 
63 static int
64 iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
65 {
66   state_t state = conv->istate;
67   SPLIT_STATE;
68   int count = 0;
69   unsigned char c;
70   for (;;) {
71     c = *s;
72     if (c == ESC) {
73       if (n < count+4)
74         goto none;
75       if (s[1] == '$') {
76         if (s[2] == ')') {
77           if (s[3] == 'A') {
78             state2 = STATE2_DESIGNATED_GB2312;
79             s += 4; count += 4;
80             if (n < count+1)
81               goto none;
82             continue;
83           }
84           if (s[3] == 'G') {
85             state2 = STATE2_DESIGNATED_CNS11643_1;
86             s += 4; count += 4;
87             if (n < count+1)
88               goto none;
89             continue;
90           }
91           if (s[3] == 'E') {
92             state2 = STATE2_DESIGNATED_ISO_IR_165;
93             s += 4; count += 4;
94             if (n < count+1)
95               goto none;
96             continue;
97           }
98         }
99         if (s[2] == '*') {
100           if (s[3] == 'H') {
101             state3 = STATE3_DESIGNATED_CNS11643_2;
102             s += 4; count += 4;
103             if (n < count+1)
104               goto none;
105             continue;
106           }
107         }
108         if (s[2] == '+') {
109           if (s[3] == 'I') {
110             state4 = STATE4_DESIGNATED_CNS11643_3;
111             s += 4; count += 4;
112             if (n < count+1)
113               goto none;
114             continue;
115           }
116           if (s[3] == 'J') {
117             state4 = STATE4_DESIGNATED_CNS11643_4;
118             s += 4; count += 4;
119             if (n < count+1)
120               goto none;
121             continue;
122           }
123           if (s[3] == 'K') {
124             state4 = STATE4_DESIGNATED_CNS11643_5;
125             s += 4; count += 4;
126             if (n < count+1)
127               goto none;
128             continue;
129           }
130           if (s[3] == 'L') {
131             state4 = STATE4_DESIGNATED_CNS11643_6;
132             s += 4; count += 4;
133             if (n < count+1)
134               goto none;
135             continue;
136           }
137           if (s[3] == 'M') {
138             state4 = STATE4_DESIGNATED_CNS11643_7;
139             s += 4; count += 4;
140             if (n < count+1)
141               goto none;
142             continue;
143           }
144         }
145       }
146       if (s[1] == 'N') {
147         switch (state3) {
148           case STATE3_NONE:
149             return RET_ILSEQ;
150           case STATE3_DESIGNATED_CNS11643_2:
151             if (s[2] < 0x80 && s[3] < 0x80) {
152               int ret = cns11643_2_mbtowc(conv,pwc,s+2,2);
153               if (ret == RET_ILSEQ)
154                 return RET_ILSEQ;
155               if (ret != 2) abort();
156               COMBINE_STATE;
157               conv->istate = state;
158               return count+4;
159             } else
160               return RET_ILSEQ;
161           default: abort();
162         }
163       }
164       if (s[1] == 'O') {
165         switch (state4) {
166           case STATE4_NONE:
167             return RET_ILSEQ;
168           case STATE4_DESIGNATED_CNS11643_3:
169             if (s[2] < 0x80 && s[3] < 0x80) {
170               int ret = cns11643_3_mbtowc(conv,pwc,s+2,2);
171               if (ret == RET_ILSEQ)
172                 return RET_ILSEQ;
173               if (ret != 2) abort();
174               COMBINE_STATE;
175               conv->istate = state;
176               return count+4;
177             } else
178               return RET_ILSEQ;
179           case STATE4_DESIGNATED_CNS11643_4:
180             if (s[2] < 0x80 && s[3] < 0x80) {
181               int ret = cns11643_4_mbtowc(conv,pwc,s+2,2);
182               if (ret == RET_ILSEQ)
183                 return RET_ILSEQ;
184               if (ret != 2) abort();
185               COMBINE_STATE;
186               conv->istate = state;
187               return count+4;
188             } else
189               return RET_ILSEQ;
190           case STATE4_DESIGNATED_CNS11643_5:
191             if (s[2] < 0x80 && s[3] < 0x80) {
192               int ret = cns11643_5_mbtowc(conv,pwc,s+2,2);
193               if (ret == RET_ILSEQ)
194                 return RET_ILSEQ;
195               if (ret != 2) abort();
196               COMBINE_STATE;
197               conv->istate = state;
198               return count+4;
199             } else
200               return RET_ILSEQ;
201           case STATE4_DESIGNATED_CNS11643_6:
202             if (s[2] < 0x80 && s[3] < 0x80) {
203               int ret = cns11643_6_mbtowc(conv,pwc,s+2,2);
204               if (ret == RET_ILSEQ)
205                 return RET_ILSEQ;
206               if (ret != 2) abort();
207               COMBINE_STATE;
208               conv->istate = state;
209               return count+4;
210             } else
211               return RET_ILSEQ;
212           case STATE4_DESIGNATED_CNS11643_7:
213             if (s[2] < 0x80 && s[3] < 0x80) {
214               int ret = cns11643_7_mbtowc(conv,pwc,s+2,2);
215               if (ret == RET_ILSEQ)
216                 return RET_ILSEQ;
217               if (ret != 2) abort();
218               COMBINE_STATE;
219               conv->istate = state;
220               return count+4;
221             } else
222               return RET_ILSEQ;
223           default: abort();
224         }
225       }
226       return RET_ILSEQ;
227     }
228     if (c == SO) {
229       if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1 && state2 != STATE2_DESIGNATED_ISO_IR_165)
230         return RET_ILSEQ;
231       state1 = STATE_TWOBYTE;
232       s++; count++;
233       if (n < count+1)
234         goto none;
235       continue;
236     }
237     if (c == SI) {
238       state1 = STATE_ASCII;
239       s++; count++;
240       if (n < count+1)
241         goto none;
242       continue;
243     }
244     break;
245   }
246   switch (state1) {
247     case STATE_ASCII:
248       if (c < 0x80) {
249         int ret = ascii_mbtowc(conv,pwc,s,1);
250         if (ret == RET_ILSEQ)
251           return RET_ILSEQ;
252         if (ret != 1) abort();
253         if (*pwc == 0x000a || *pwc == 0x000d) {
254           state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
255         }
256         COMBINE_STATE;
257         conv->istate = state;
258         return count+1;
259       } else
260         return RET_ILSEQ;
261     case STATE_TWOBYTE:
262       if (n < count+2)
263         goto none;
264       if (s[0] < 0x80 && s[1] < 0x80) {
265         int ret;
266         switch (state2) {
267           case STATE2_NONE:
268             return RET_ILSEQ;
269           case STATE2_DESIGNATED_GB2312:
270             ret = gb2312_mbtowc(conv,pwc,s,2); break;
271           case STATE2_DESIGNATED_CNS11643_1:
272             ret = cns11643_1_mbtowc(conv,pwc,s,2); break;
273           case STATE2_DESIGNATED_ISO_IR_165:
274             ret = isoir165_mbtowc(conv,pwc,s,2); break;
275           default: abort();
276         }
277         if (ret == RET_ILSEQ)
278           return RET_ILSEQ;
279         if (ret != 2) abort();
280         COMBINE_STATE;
281         conv->istate = state;
282         return count+2;
283       } else
284         return RET_ILSEQ;
285     default: abort();
286   }
287 
288 none:
289   COMBINE_STATE;
290   conv->istate = state;
291   return RET_TOOFEW(count);
292 }
293 
294 static int
295 iso2022_cn_ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
296 {
297   state_t state = conv->ostate;
298   SPLIT_STATE;
299   unsigned char buf[3];
300   int ret;
301 
302   /* There is no need to handle Unicode 3.1 tag characters and to look for
303      "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */
304 
305   /* Try ASCII. */
306   ret = ascii_wctomb(conv,buf,wc,1);
307   if (ret != RET_ILUNI) {
308     if (ret != 1) abort();
309     if (buf[0] < 0x80) {
310       int count = (state1 == STATE_ASCII ? 1 : 2);
311       if (n < count)
312         return RET_TOOSMALL;
313       if (state1 != STATE_ASCII) {
314         r[0] = SI;
315         r += 1;
316         state1 = STATE_ASCII;
317       }
318       r[0] = buf[0];
319       if (wc == 0x000a || wc == 0x000d) {
320         state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE;
321       }
322       COMBINE_STATE;
323       conv->ostate = state;
324       return count;
325     }
326   }
327 
328   /* Try GB 2312-1980. */
329   ret = gb2312_wctomb(conv,buf,wc,2);
330   if (ret != RET_ILUNI) {
331     if (ret != 2) abort();
332     if (buf[0] < 0x80 && buf[1] < 0x80) {
333       int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
334       if (n < count)
335         return RET_TOOSMALL;
336       if (state2 != STATE2_DESIGNATED_GB2312) {
337         r[0] = ESC;
338         r[1] = '$';
339         r[2] = ')';
340         r[3] = 'A';
341         r += 4;
342         state2 = STATE2_DESIGNATED_GB2312;
343       }
344       if (state1 != STATE_TWOBYTE) {
345         r[0] = SO;
346         r += 1;
347         state1 = STATE_TWOBYTE;
348       }
349       r[0] = buf[0];
350       r[1] = buf[1];
351       COMBINE_STATE;
352       conv->ostate = state;
353       return count;
354     }
355   }
356 
357   ret = cns11643_wctomb(conv,buf,wc,3);
358   if (ret != RET_ILUNI) {
359     if (ret != 3) abort();
360 
361     /* Try CNS 11643-1992 Plane 1. */
362     if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) {
363       int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
364       if (n < count)
365         return RET_TOOSMALL;
366       if (state2 != STATE2_DESIGNATED_CNS11643_1) {
367         r[0] = ESC;
368         r[1] = '$';
369         r[2] = ')';
370         r[3] = 'G';
371         r += 4;
372         state2 = STATE2_DESIGNATED_CNS11643_1;
373       }
374       if (state1 != STATE_TWOBYTE) {
375         r[0] = SO;
376         r += 1;
377         state1 = STATE_TWOBYTE;
378       }
379       r[0] = buf[1];
380       r[1] = buf[2];
381       COMBINE_STATE;
382       conv->ostate = state;
383       return count;
384     }
385 
386     /* Try CNS 11643-1992 Plane 2. */
387     if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) {
388       int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4;
389       if (n < count)
390         return RET_TOOSMALL;
391       if (state3 != STATE3_DESIGNATED_CNS11643_2) {
392         r[0] = ESC;
393         r[1] = '$';
394         r[2] = '*';
395         r[3] = 'H';
396         r += 4;
397         state3 = STATE3_DESIGNATED_CNS11643_2;
398       }
399       r[0] = ESC;
400       r[1] = 'N';
401       r[2] = buf[1];
402       r[3] = buf[2];
403       COMBINE_STATE;
404       conv->ostate = state;
405       return count;
406     }
407 
408     /* Try CNS 11643-1992 Plane 3. */
409     if (buf[0] == 3 && buf[1] < 0x80 && buf[2] < 0x80) {
410       int count = (state4 == STATE4_DESIGNATED_CNS11643_3 ? 0 : 4) + 4;
411       if (n < count)
412         return RET_TOOSMALL;
413       if (state4 != STATE4_DESIGNATED_CNS11643_3) {
414         r[0] = ESC;
415         r[1] = '$';
416         r[2] = '+';
417         r[3] = 'I';
418         r += 4;
419         state4 = STATE4_DESIGNATED_CNS11643_3;
420       }
421       r[0] = ESC;
422       r[1] = 'O';
423       r[2] = buf[1];
424       r[3] = buf[2];
425       COMBINE_STATE;
426       conv->ostate = state;
427       return count;
428     }
429 
430     /* Try CNS 11643-1992 Plane 4. */
431     if (buf[0] == 4 && buf[1] < 0x80 && buf[2] < 0x80) {
432       int count = (state4 == STATE4_DESIGNATED_CNS11643_4 ? 0 : 4) + 4;
433       if (n < count)
434         return RET_TOOSMALL;
435       if (state4 != STATE4_DESIGNATED_CNS11643_4) {
436         r[0] = ESC;
437         r[1] = '$';
438         r[2] = '+';
439         r[3] = 'J';
440         r += 4;
441         state4 = STATE4_DESIGNATED_CNS11643_4;
442       }
443       r[0] = ESC;
444       r[1] = 'O';
445       r[2] = buf[1];
446       r[3] = buf[2];
447       COMBINE_STATE;
448       conv->ostate = state;
449       return count;
450     }
451 
452     /* Try CNS 11643-1992 Plane 5. */
453     if (buf[0] == 5 && buf[1] < 0x80 && buf[2] < 0x80) {
454       int count = (state4 == STATE4_DESIGNATED_CNS11643_5 ? 0 : 4) + 4;
455       if (n < count)
456         return RET_TOOSMALL;
457       if (state4 != STATE4_DESIGNATED_CNS11643_5) {
458         r[0] = ESC;
459         r[1] = '$';
460         r[2] = '+';
461         r[3] = 'K';
462         r += 4;
463         state4 = STATE4_DESIGNATED_CNS11643_5;
464       }
465       r[0] = ESC;
466       r[1] = 'O';
467       r[2] = buf[1];
468       r[3] = buf[2];
469       COMBINE_STATE;
470       conv->ostate = state;
471       return count;
472     }
473 
474     /* Try CNS 11643-1992 Plane 6. */
475     if (buf[0] == 6 && buf[1] < 0x80 && buf[2] < 0x80) {
476       int count = (state4 == STATE4_DESIGNATED_CNS11643_6 ? 0 : 4) + 4;
477       if (n < count)
478         return RET_TOOSMALL;
479       if (state4 != STATE4_DESIGNATED_CNS11643_6) {
480         r[0] = ESC;
481         r[1] = '$';
482         r[2] = '+';
483         r[3] = 'L';
484         r += 4;
485         state4 = STATE4_DESIGNATED_CNS11643_6;
486       }
487       r[0] = ESC;
488       r[1] = 'O';
489       r[2] = buf[1];
490       r[3] = buf[2];
491       COMBINE_STATE;
492       conv->ostate = state;
493       return count;
494     }
495 
496     /* Try CNS 11643-1992 Plane 7. */
497     if (buf[0] == 7 && buf[1] < 0x80 && buf[2] < 0x80) {
498       int count = (state4 == STATE4_DESIGNATED_CNS11643_7 ? 0 : 4) + 4;
499       if (n < count)
500         return RET_TOOSMALL;
501       if (state4 != STATE4_DESIGNATED_CNS11643_7) {
502         r[0] = ESC;
503         r[1] = '$';
504         r[2] = '+';
505         r[3] = 'M';
506         r += 4;
507         state4 = STATE4_DESIGNATED_CNS11643_7;
508       }
509       r[0] = ESC;
510       r[1] = 'O';
511       r[2] = buf[1];
512       r[3] = buf[2];
513       COMBINE_STATE;
514       conv->ostate = state;
515       return count;
516     }
517 
518   }
519 
520   /* Try ISO-IR-165. */
521   ret = isoir165_wctomb(conv,buf,wc,2);
522   if (ret != RET_ILUNI) {
523     if (ret != 2) abort();
524     if (buf[0] < 0x80 && buf[1] < 0x80) {
525       int count = (state2 == STATE2_DESIGNATED_ISO_IR_165 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2;
526       if (n < count)
527         return RET_TOOSMALL;
528       if (state2 != STATE2_DESIGNATED_ISO_IR_165) {
529         r[0] = ESC;
530         r[1] = '$';
531         r[2] = ')';
532         r[3] = 'E';
533         r += 4;
534         state2 = STATE2_DESIGNATED_ISO_IR_165;
535       }
536       if (state1 != STATE_TWOBYTE) {
537         r[0] = SO;
538         r += 1;
539         state1 = STATE_TWOBYTE;
540       }
541       r[0] = buf[0];
542       r[1] = buf[1];
543       COMBINE_STATE;
544       conv->ostate = state;
545       return count;
546     }
547   }
548 
549   return RET_ILUNI;
550 }
551 
552 static int
553 iso2022_cn_ext_reset (conv_t conv, unsigned char *r, int n)
554 {
555   state_t state = conv->ostate;
556   SPLIT_STATE;
557   (void)state2;
558   (void)state3;
559   (void)state4;
560   if (state1 != STATE_ASCII) {
561     if (n < 1)
562       return RET_TOOSMALL;
563     r[0] = SI;
564     /* conv->ostate = 0; will be done by the caller */
565     return 1;
566   } else
567     return 0;
568 }
569 
570 #undef COMBINE_STATE
571 #undef SPLIT_STATE
572 #undef STATE4_DESIGNATED_CNS11643_7
573 #undef STATE4_DESIGNATED_CNS11643_6
574 #undef STATE4_DESIGNATED_CNS11643_5
575 #undef STATE4_DESIGNATED_CNS11643_4
576 #undef STATE4_DESIGNATED_CNS11643_3
577 #undef STATE4_NONE
578 #undef STATE3_DESIGNATED_CNS11643_2
579 #undef STATE3_NONE
580 #undef STATE2_DESIGNATED_ISO_IR_165
581 #undef STATE2_DESIGNATED_CNS11643_1
582 #undef STATE2_DESIGNATED_GB2312
583 #undef STATE2_NONE
584 #undef STATE_TWOBYTE
585 #undef STATE_ASCII
586