1 /* 2 * Copyright (C) 1999-2001 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21 /* 22 * ISO-2022-CN 23 */ 24 25 /* Specification: RFC 1922 */ 26 27 #define ESC 0x1b 28 #define SO 0x0e 29 #define SI 0x0f 30 31 /* 32 * The state is composed of one of the following values 33 */ 34 #define STATE_ASCII 0 35 #define STATE_TWOBYTE 1 36 /* 37 * and one of the following values, << 8 38 */ 39 #define STATE2_NONE 0 40 #define STATE2_DESIGNATED_GB2312 1 41 #define STATE2_DESIGNATED_CNS11643_1 2 42 /* 43 * and one of the following values, << 16 44 */ 45 #define STATE3_NONE 0 46 #define STATE3_DESIGNATED_CNS11643_2 1 47 48 #define SPLIT_STATE \ 49 unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = state >> 16 50 #define COMBINE_STATE \ 51 state = (state3 << 16) | (state2 << 8) | state1 52 53 static int 54 iso2022_cn_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 55 { 56 state_t state = conv->istate; 57 SPLIT_STATE; 58 int count = 0; 59 unsigned char c; 60 for (;;) { 61 c = *s; 62 if (c == ESC) { 63 if (n < count+4) 64 goto none; 65 if (s[1] == '$') { 66 if (s[2] == ')') { 67 if (s[3] == 'A') { 68 state2 = STATE2_DESIGNATED_GB2312; 69 s += 4; count += 4; 70 if (n < count+1) 71 goto none; 72 continue; 73 } 74 if (s[3] == 'G') { 75 state2 = STATE2_DESIGNATED_CNS11643_1; 76 s += 4; count += 4; 77 if (n < count+1) 78 goto none; 79 continue; 80 } 81 } 82 if (s[2] == '*') { 83 if (s[3] == 'H') { 84 state3 = STATE3_DESIGNATED_CNS11643_2; 85 s += 4; count += 4; 86 if (n < count+1) 87 goto none; 88 continue; 89 } 90 } 91 } 92 if (s[1] == 'N') { 93 switch (state3) { 94 case STATE3_NONE: 95 return RET_ILSEQ; 96 case STATE3_DESIGNATED_CNS11643_2: 97 if (s[2] < 0x80 && s[3] < 0x80) { 98 int ret = cns11643_2_mbtowc(conv,pwc,s+2,2); 99 if (ret == RET_ILSEQ) 100 return RET_ILSEQ; 101 if (ret != 2) abort(); 102 COMBINE_STATE; 103 conv->istate = state; 104 return count+4; 105 } else 106 return RET_ILSEQ; 107 default: abort(); 108 } 109 } 110 return RET_ILSEQ; 111 } 112 if (c == SO) { 113 if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1) 114 return RET_ILSEQ; 115 state1 = STATE_TWOBYTE; 116 s++; count++; 117 if (n < count+1) 118 goto none; 119 continue; 120 } 121 if (c == SI) { 122 state1 = STATE_ASCII; 123 s++; count++; 124 if (n < count+1) 125 goto none; 126 continue; 127 } 128 break; 129 } 130 switch (state1) { 131 case STATE_ASCII: 132 if (c < 0x80) { 133 int ret = ascii_mbtowc(conv,pwc,s,1); 134 if (ret == RET_ILSEQ) 135 return RET_ILSEQ; 136 if (ret != 1) abort(); 137 if (*pwc == 0x000a || *pwc == 0x000d) { 138 state2 = STATE2_NONE; state3 = STATE3_NONE; 139 } 140 COMBINE_STATE; 141 conv->istate = state; 142 return count+1; 143 } else 144 return RET_ILSEQ; 145 case STATE_TWOBYTE: 146 if (n < count+2) 147 goto none; 148 if (s[0] < 0x80 && s[1] < 0x80) { 149 int ret; 150 switch (state2) { 151 case STATE2_NONE: 152 return RET_ILSEQ; 153 case STATE2_DESIGNATED_GB2312: 154 ret = gb2312_mbtowc(conv,pwc,s,2); break; 155 case STATE2_DESIGNATED_CNS11643_1: 156 ret = cns11643_1_mbtowc(conv,pwc,s,2); break; 157 default: abort(); 158 } 159 if (ret == RET_ILSEQ) 160 return RET_ILSEQ; 161 if (ret != 2) abort(); 162 COMBINE_STATE; 163 conv->istate = state; 164 return count+2; 165 } else 166 return RET_ILSEQ; 167 default: abort(); 168 } 169 170 none: 171 COMBINE_STATE; 172 conv->istate = state; 173 return RET_TOOFEW(count); 174 } 175 176 static int 177 iso2022_cn_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 178 { 179 state_t state = conv->ostate; 180 SPLIT_STATE; 181 unsigned char buf[3]; 182 int ret; 183 184 /* There is no need to handle Unicode 3.1 tag characters and to look for 185 "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */ 186 187 /* Try ASCII. */ 188 ret = ascii_wctomb(conv,buf,wc,1); 189 if (ret != RET_ILUNI) { 190 if (ret != 1) abort(); 191 if (buf[0] < 0x80) { 192 int count = (state1 == STATE_ASCII ? 1 : 2); 193 if (n < count) 194 return RET_TOOSMALL; 195 if (state1 != STATE_ASCII) { 196 r[0] = SI; 197 r += 1; 198 state1 = STATE_ASCII; 199 } 200 r[0] = buf[0]; 201 if (wc == 0x000a || wc == 0x000d) { 202 state2 = STATE2_NONE; state3 = STATE3_NONE; 203 } 204 COMBINE_STATE; 205 conv->ostate = state; 206 return count; 207 } 208 } 209 210 /* Try GB 2312-1980. */ 211 ret = gb2312_wctomb(conv,buf,wc,2); 212 if (ret != RET_ILUNI) { 213 if (ret != 2) abort(); 214 if (buf[0] < 0x80 && buf[1] < 0x80) { 215 int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2; 216 if (n < count) 217 return RET_TOOSMALL; 218 if (state2 != STATE2_DESIGNATED_GB2312) { 219 r[0] = ESC; 220 r[1] = '$'; 221 r[2] = ')'; 222 r[3] = 'A'; 223 r += 4; 224 state2 = STATE2_DESIGNATED_GB2312; 225 } 226 if (state1 != STATE_TWOBYTE) { 227 r[0] = SO; 228 r += 1; 229 state1 = STATE_TWOBYTE; 230 } 231 r[0] = buf[0]; 232 r[1] = buf[1]; 233 COMBINE_STATE; 234 conv->ostate = state; 235 return count; 236 } 237 } 238 239 ret = cns11643_wctomb(conv,buf,wc,3); 240 if (ret != RET_ILUNI) { 241 if (ret != 3) abort(); 242 243 /* Try CNS 11643-1992 Plane 1. */ 244 if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) { 245 int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2; 246 if (n < count) 247 return RET_TOOSMALL; 248 if (state2 != STATE2_DESIGNATED_CNS11643_1) { 249 r[0] = ESC; 250 r[1] = '$'; 251 r[2] = ')'; 252 r[3] = 'G'; 253 r += 4; 254 state2 = STATE2_DESIGNATED_CNS11643_1; 255 } 256 if (state1 != STATE_TWOBYTE) { 257 r[0] = SO; 258 r += 1; 259 state1 = STATE_TWOBYTE; 260 } 261 r[0] = buf[1]; 262 r[1] = buf[2]; 263 COMBINE_STATE; 264 conv->ostate = state; 265 return count; 266 } 267 268 /* Try CNS 11643-1992 Plane 2. */ 269 if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) { 270 int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4; 271 if (n < count) 272 return RET_TOOSMALL; 273 if (state3 != STATE3_DESIGNATED_CNS11643_2) { 274 r[0] = ESC; 275 r[1] = '$'; 276 r[2] = '*'; 277 r[3] = 'H'; 278 r += 4; 279 state3 = STATE3_DESIGNATED_CNS11643_2; 280 } 281 r[0] = ESC; 282 r[1] = 'N'; 283 r[2] = buf[1]; 284 r[3] = buf[2]; 285 COMBINE_STATE; 286 conv->ostate = state; 287 return count; 288 } 289 } 290 291 return RET_ILUNI; 292 } 293 294 static int 295 iso2022_cn_reset (conv_t conv, unsigned char *r, int n) 296 { 297 state_t state = conv->ostate; 298 SPLIT_STATE; 299 (void)state2; 300 (void)state3; 301 if (state1 != STATE_ASCII) { 302 if (n < 1) 303 return RET_TOOSMALL; 304 r[0] = SI; 305 /* conv->ostate = 0; will be done by the caller */ 306 return 1; 307 } else 308 return 0; 309 } 310 311 #undef COMBINE_STATE 312 #undef SPLIT_STATE 313 #undef STATE3_DESIGNATED_CNS11643_2 314 #undef STATE3_NONE 315 #undef STATE2_DESIGNATED_CNS11643_1 316 #undef STATE2_DESIGNATED_GB2312 317 #undef STATE2_NONE 318 #undef STATE_TWOBYTE 319 #undef STATE_ASCII 320