1 /* 2 * Copyright (C) 1999-2001 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21 /* 22 * ISO-2022-CN-EXT 23 */ 24 25 /* Specification: RFC 1922 */ 26 27 #define ESC 0x1b 28 #define SO 0x0e 29 #define SI 0x0f 30 31 /* 32 * The state is composed of one of the following values 33 */ 34 #define STATE_ASCII 0 35 #define STATE_TWOBYTE 1 36 /* 37 * and one of the following values, << 8 38 */ 39 #define STATE2_NONE 0 40 #define STATE2_DESIGNATED_GB2312 1 41 #define STATE2_DESIGNATED_CNS11643_1 2 42 #define STATE2_DESIGNATED_ISO_IR_165 3 43 /* 44 * and one of the following values, << 16 45 */ 46 #define STATE3_NONE 0 47 #define STATE3_DESIGNATED_CNS11643_2 1 48 /* 49 * and one of the following values, << 24 50 */ 51 #define STATE4_NONE 0 52 #define STATE4_DESIGNATED_CNS11643_3 1 53 #define STATE4_DESIGNATED_CNS11643_4 2 54 #define STATE4_DESIGNATED_CNS11643_5 3 55 #define STATE4_DESIGNATED_CNS11643_6 4 56 #define STATE4_DESIGNATED_CNS11643_7 5 57 58 #define SPLIT_STATE \ 59 unsigned int state1 = state & 0xff, state2 = (state >> 8) & 0xff, state3 = (state >> 16) & 0xff, state4 = state >> 24 60 #define COMBINE_STATE \ 61 state = (state4 << 24) | (state3 << 16) | (state2 << 8) | state1 62 63 static int 64 iso2022_cn_ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 65 { 66 state_t state = conv->istate; 67 SPLIT_STATE; 68 int count = 0; 69 unsigned char c; 70 for (;;) { 71 c = *s; 72 if (c == ESC) { 73 if (n < count+4) 74 goto none; 75 if (s[1] == '$') { 76 if (s[2] == ')') { 77 if (s[3] == 'A') { 78 state2 = STATE2_DESIGNATED_GB2312; 79 s += 4; count += 4; 80 if (n < count+1) 81 goto none; 82 continue; 83 } 84 if (s[3] == 'G') { 85 state2 = STATE2_DESIGNATED_CNS11643_1; 86 s += 4; count += 4; 87 if (n < count+1) 88 goto none; 89 continue; 90 } 91 if (s[3] == 'E') { 92 state2 = STATE2_DESIGNATED_ISO_IR_165; 93 s += 4; count += 4; 94 if (n < count+1) 95 goto none; 96 continue; 97 } 98 } 99 if (s[2] == '*') { 100 if (s[3] == 'H') { 101 state3 = STATE3_DESIGNATED_CNS11643_2; 102 s += 4; count += 4; 103 if (n < count+1) 104 goto none; 105 continue; 106 } 107 } 108 if (s[2] == '+') { 109 if (s[3] == 'I') { 110 state4 = STATE4_DESIGNATED_CNS11643_3; 111 s += 4; count += 4; 112 if (n < count+1) 113 goto none; 114 continue; 115 } 116 if (s[3] == 'J') { 117 state4 = STATE4_DESIGNATED_CNS11643_4; 118 s += 4; count += 4; 119 if (n < count+1) 120 goto none; 121 continue; 122 } 123 if (s[3] == 'K') { 124 state4 = STATE4_DESIGNATED_CNS11643_5; 125 s += 4; count += 4; 126 if (n < count+1) 127 goto none; 128 continue; 129 } 130 if (s[3] == 'L') { 131 state4 = STATE4_DESIGNATED_CNS11643_6; 132 s += 4; count += 4; 133 if (n < count+1) 134 goto none; 135 continue; 136 } 137 if (s[3] == 'M') { 138 state4 = STATE4_DESIGNATED_CNS11643_7; 139 s += 4; count += 4; 140 if (n < count+1) 141 goto none; 142 continue; 143 } 144 } 145 } 146 if (s[1] == 'N') { 147 switch (state3) { 148 case STATE3_NONE: 149 return RET_ILSEQ; 150 case STATE3_DESIGNATED_CNS11643_2: 151 if (s[2] < 0x80 && s[3] < 0x80) { 152 int ret = cns11643_2_mbtowc(conv,pwc,s+2,2); 153 if (ret == RET_ILSEQ) 154 return RET_ILSEQ; 155 if (ret != 2) abort(); 156 COMBINE_STATE; 157 conv->istate = state; 158 return count+4; 159 } else 160 return RET_ILSEQ; 161 default: abort(); 162 } 163 } 164 if (s[1] == 'O') { 165 switch (state4) { 166 case STATE4_NONE: 167 return RET_ILSEQ; 168 case STATE4_DESIGNATED_CNS11643_3: 169 if (s[2] < 0x80 && s[3] < 0x80) { 170 int ret = cns11643_3_mbtowc(conv,pwc,s+2,2); 171 if (ret == RET_ILSEQ) 172 return RET_ILSEQ; 173 if (ret != 2) abort(); 174 COMBINE_STATE; 175 conv->istate = state; 176 return count+4; 177 } else 178 return RET_ILSEQ; 179 case STATE4_DESIGNATED_CNS11643_4: 180 if (s[2] < 0x80 && s[3] < 0x80) { 181 int ret = cns11643_4_mbtowc(conv,pwc,s+2,2); 182 if (ret == RET_ILSEQ) 183 return RET_ILSEQ; 184 if (ret != 2) abort(); 185 COMBINE_STATE; 186 conv->istate = state; 187 return count+4; 188 } else 189 return RET_ILSEQ; 190 case STATE4_DESIGNATED_CNS11643_5: 191 if (s[2] < 0x80 && s[3] < 0x80) { 192 int ret = cns11643_5_mbtowc(conv,pwc,s+2,2); 193 if (ret == RET_ILSEQ) 194 return RET_ILSEQ; 195 if (ret != 2) abort(); 196 COMBINE_STATE; 197 conv->istate = state; 198 return count+4; 199 } else 200 return RET_ILSEQ; 201 case STATE4_DESIGNATED_CNS11643_6: 202 if (s[2] < 0x80 && s[3] < 0x80) { 203 int ret = cns11643_6_mbtowc(conv,pwc,s+2,2); 204 if (ret == RET_ILSEQ) 205 return RET_ILSEQ; 206 if (ret != 2) abort(); 207 COMBINE_STATE; 208 conv->istate = state; 209 return count+4; 210 } else 211 return RET_ILSEQ; 212 case STATE4_DESIGNATED_CNS11643_7: 213 if (s[2] < 0x80 && s[3] < 0x80) { 214 int ret = cns11643_7_mbtowc(conv,pwc,s+2,2); 215 if (ret == RET_ILSEQ) 216 return RET_ILSEQ; 217 if (ret != 2) abort(); 218 COMBINE_STATE; 219 conv->istate = state; 220 return count+4; 221 } else 222 return RET_ILSEQ; 223 default: abort(); 224 } 225 } 226 return RET_ILSEQ; 227 } 228 if (c == SO) { 229 if (state2 != STATE2_DESIGNATED_GB2312 && state2 != STATE2_DESIGNATED_CNS11643_1 && state2 != STATE2_DESIGNATED_ISO_IR_165) 230 return RET_ILSEQ; 231 state1 = STATE_TWOBYTE; 232 s++; count++; 233 if (n < count+1) 234 goto none; 235 continue; 236 } 237 if (c == SI) { 238 state1 = STATE_ASCII; 239 s++; count++; 240 if (n < count+1) 241 goto none; 242 continue; 243 } 244 break; 245 } 246 switch (state1) { 247 case STATE_ASCII: 248 if (c < 0x80) { 249 int ret = ascii_mbtowc(conv,pwc,s,1); 250 if (ret == RET_ILSEQ) 251 return RET_ILSEQ; 252 if (ret != 1) abort(); 253 if (*pwc == 0x000a || *pwc == 0x000d) { 254 state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE; 255 } 256 COMBINE_STATE; 257 conv->istate = state; 258 return count+1; 259 } else 260 return RET_ILSEQ; 261 case STATE_TWOBYTE: 262 if (n < count+2) 263 goto none; 264 if (s[0] < 0x80 && s[1] < 0x80) { 265 int ret; 266 switch (state2) { 267 case STATE2_NONE: 268 return RET_ILSEQ; 269 case STATE2_DESIGNATED_GB2312: 270 ret = gb2312_mbtowc(conv,pwc,s,2); break; 271 case STATE2_DESIGNATED_CNS11643_1: 272 ret = cns11643_1_mbtowc(conv,pwc,s,2); break; 273 case STATE2_DESIGNATED_ISO_IR_165: 274 ret = isoir165_mbtowc(conv,pwc,s,2); break; 275 default: abort(); 276 } 277 if (ret == RET_ILSEQ) 278 return RET_ILSEQ; 279 if (ret != 2) abort(); 280 COMBINE_STATE; 281 conv->istate = state; 282 return count+2; 283 } else 284 return RET_ILSEQ; 285 default: abort(); 286 } 287 288 none: 289 COMBINE_STATE; 290 conv->istate = state; 291 return RET_TOOFEW(count); 292 } 293 294 static int 295 iso2022_cn_ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 296 { 297 state_t state = conv->ostate; 298 SPLIT_STATE; 299 unsigned char buf[3]; 300 int ret; 301 302 /* There is no need to handle Unicode 3.1 tag characters and to look for 303 "zh-CN" or "zh-TW" tags, because GB2312 and CNS11643 are disjoint. */ 304 305 /* Try ASCII. */ 306 ret = ascii_wctomb(conv,buf,wc,1); 307 if (ret != RET_ILUNI) { 308 if (ret != 1) abort(); 309 if (buf[0] < 0x80) { 310 int count = (state1 == STATE_ASCII ? 1 : 2); 311 if (n < count) 312 return RET_TOOSMALL; 313 if (state1 != STATE_ASCII) { 314 r[0] = SI; 315 r += 1; 316 state1 = STATE_ASCII; 317 } 318 r[0] = buf[0]; 319 if (wc == 0x000a || wc == 0x000d) { 320 state2 = STATE2_NONE; state3 = STATE3_NONE; state4 = STATE3_NONE; 321 } 322 COMBINE_STATE; 323 conv->ostate = state; 324 return count; 325 } 326 } 327 328 /* Try GB 2312-1980. */ 329 ret = gb2312_wctomb(conv,buf,wc,2); 330 if (ret != RET_ILUNI) { 331 if (ret != 2) abort(); 332 if (buf[0] < 0x80 && buf[1] < 0x80) { 333 int count = (state2 == STATE2_DESIGNATED_GB2312 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2; 334 if (n < count) 335 return RET_TOOSMALL; 336 if (state2 != STATE2_DESIGNATED_GB2312) { 337 r[0] = ESC; 338 r[1] = '$'; 339 r[2] = ')'; 340 r[3] = 'A'; 341 r += 4; 342 state2 = STATE2_DESIGNATED_GB2312; 343 } 344 if (state1 != STATE_TWOBYTE) { 345 r[0] = SO; 346 r += 1; 347 state1 = STATE_TWOBYTE; 348 } 349 r[0] = buf[0]; 350 r[1] = buf[1]; 351 COMBINE_STATE; 352 conv->ostate = state; 353 return count; 354 } 355 } 356 357 ret = cns11643_wctomb(conv,buf,wc,3); 358 if (ret != RET_ILUNI) { 359 if (ret != 3) abort(); 360 361 /* Try CNS 11643-1992 Plane 1. */ 362 if (buf[0] == 1 && buf[1] < 0x80 && buf[2] < 0x80) { 363 int count = (state2 == STATE2_DESIGNATED_CNS11643_1 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2; 364 if (n < count) 365 return RET_TOOSMALL; 366 if (state2 != STATE2_DESIGNATED_CNS11643_1) { 367 r[0] = ESC; 368 r[1] = '$'; 369 r[2] = ')'; 370 r[3] = 'G'; 371 r += 4; 372 state2 = STATE2_DESIGNATED_CNS11643_1; 373 } 374 if (state1 != STATE_TWOBYTE) { 375 r[0] = SO; 376 r += 1; 377 state1 = STATE_TWOBYTE; 378 } 379 r[0] = buf[1]; 380 r[1] = buf[2]; 381 COMBINE_STATE; 382 conv->ostate = state; 383 return count; 384 } 385 386 /* Try CNS 11643-1992 Plane 2. */ 387 if (buf[0] == 2 && buf[1] < 0x80 && buf[2] < 0x80) { 388 int count = (state3 == STATE3_DESIGNATED_CNS11643_2 ? 0 : 4) + 4; 389 if (n < count) 390 return RET_TOOSMALL; 391 if (state3 != STATE3_DESIGNATED_CNS11643_2) { 392 r[0] = ESC; 393 r[1] = '$'; 394 r[2] = '*'; 395 r[3] = 'H'; 396 r += 4; 397 state3 = STATE3_DESIGNATED_CNS11643_2; 398 } 399 r[0] = ESC; 400 r[1] = 'N'; 401 r[2] = buf[1]; 402 r[3] = buf[2]; 403 COMBINE_STATE; 404 conv->ostate = state; 405 return count; 406 } 407 408 /* Try CNS 11643-1992 Plane 3. */ 409 if (buf[0] == 3 && buf[1] < 0x80 && buf[2] < 0x80) { 410 int count = (state4 == STATE4_DESIGNATED_CNS11643_3 ? 0 : 4) + 4; 411 if (n < count) 412 return RET_TOOSMALL; 413 if (state4 != STATE4_DESIGNATED_CNS11643_3) { 414 r[0] = ESC; 415 r[1] = '$'; 416 r[2] = '+'; 417 r[3] = 'I'; 418 r += 4; 419 state4 = STATE4_DESIGNATED_CNS11643_3; 420 } 421 r[0] = ESC; 422 r[1] = 'O'; 423 r[2] = buf[1]; 424 r[3] = buf[2]; 425 COMBINE_STATE; 426 conv->ostate = state; 427 return count; 428 } 429 430 /* Try CNS 11643-1992 Plane 4. */ 431 if (buf[0] == 4 && buf[1] < 0x80 && buf[2] < 0x80) { 432 int count = (state4 == STATE4_DESIGNATED_CNS11643_4 ? 0 : 4) + 4; 433 if (n < count) 434 return RET_TOOSMALL; 435 if (state4 != STATE4_DESIGNATED_CNS11643_4) { 436 r[0] = ESC; 437 r[1] = '$'; 438 r[2] = '+'; 439 r[3] = 'J'; 440 r += 4; 441 state4 = STATE4_DESIGNATED_CNS11643_4; 442 } 443 r[0] = ESC; 444 r[1] = 'O'; 445 r[2] = buf[1]; 446 r[3] = buf[2]; 447 COMBINE_STATE; 448 conv->ostate = state; 449 return count; 450 } 451 452 /* Try CNS 11643-1992 Plane 5. */ 453 if (buf[0] == 5 && buf[1] < 0x80 && buf[2] < 0x80) { 454 int count = (state4 == STATE4_DESIGNATED_CNS11643_5 ? 0 : 4) + 4; 455 if (n < count) 456 return RET_TOOSMALL; 457 if (state4 != STATE4_DESIGNATED_CNS11643_5) { 458 r[0] = ESC; 459 r[1] = '$'; 460 r[2] = '+'; 461 r[3] = 'K'; 462 r += 4; 463 state4 = STATE4_DESIGNATED_CNS11643_5; 464 } 465 r[0] = ESC; 466 r[1] = 'O'; 467 r[2] = buf[1]; 468 r[3] = buf[2]; 469 COMBINE_STATE; 470 conv->ostate = state; 471 return count; 472 } 473 474 /* Try CNS 11643-1992 Plane 6. */ 475 if (buf[0] == 6 && buf[1] < 0x80 && buf[2] < 0x80) { 476 int count = (state4 == STATE4_DESIGNATED_CNS11643_6 ? 0 : 4) + 4; 477 if (n < count) 478 return RET_TOOSMALL; 479 if (state4 != STATE4_DESIGNATED_CNS11643_6) { 480 r[0] = ESC; 481 r[1] = '$'; 482 r[2] = '+'; 483 r[3] = 'L'; 484 r += 4; 485 state4 = STATE4_DESIGNATED_CNS11643_6; 486 } 487 r[0] = ESC; 488 r[1] = 'O'; 489 r[2] = buf[1]; 490 r[3] = buf[2]; 491 COMBINE_STATE; 492 conv->ostate = state; 493 return count; 494 } 495 496 /* Try CNS 11643-1992 Plane 7. */ 497 if (buf[0] == 7 && buf[1] < 0x80 && buf[2] < 0x80) { 498 int count = (state4 == STATE4_DESIGNATED_CNS11643_7 ? 0 : 4) + 4; 499 if (n < count) 500 return RET_TOOSMALL; 501 if (state4 != STATE4_DESIGNATED_CNS11643_7) { 502 r[0] = ESC; 503 r[1] = '$'; 504 r[2] = '+'; 505 r[3] = 'M'; 506 r += 4; 507 state4 = STATE4_DESIGNATED_CNS11643_7; 508 } 509 r[0] = ESC; 510 r[1] = 'O'; 511 r[2] = buf[1]; 512 r[3] = buf[2]; 513 COMBINE_STATE; 514 conv->ostate = state; 515 return count; 516 } 517 518 } 519 520 /* Try ISO-IR-165. */ 521 ret = isoir165_wctomb(conv,buf,wc,2); 522 if (ret != RET_ILUNI) { 523 if (ret != 2) abort(); 524 if (buf[0] < 0x80 && buf[1] < 0x80) { 525 int count = (state2 == STATE2_DESIGNATED_ISO_IR_165 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2; 526 if (n < count) 527 return RET_TOOSMALL; 528 if (state2 != STATE2_DESIGNATED_ISO_IR_165) { 529 r[0] = ESC; 530 r[1] = '$'; 531 r[2] = ')'; 532 r[3] = 'E'; 533 r += 4; 534 state2 = STATE2_DESIGNATED_ISO_IR_165; 535 } 536 if (state1 != STATE_TWOBYTE) { 537 r[0] = SO; 538 r += 1; 539 state1 = STATE_TWOBYTE; 540 } 541 r[0] = buf[0]; 542 r[1] = buf[1]; 543 COMBINE_STATE; 544 conv->ostate = state; 545 return count; 546 } 547 } 548 549 return RET_ILUNI; 550 } 551 552 static int 553 iso2022_cn_ext_reset (conv_t conv, unsigned char *r, int n) 554 { 555 state_t state = conv->ostate; 556 SPLIT_STATE; 557 (void)state2; 558 (void)state3; 559 (void)state4; 560 if (state1 != STATE_ASCII) { 561 if (n < 1) 562 return RET_TOOSMALL; 563 r[0] = SI; 564 /* conv->ostate = 0; will be done by the caller */ 565 return 1; 566 } else 567 return 0; 568 } 569 570 #undef COMBINE_STATE 571 #undef SPLIT_STATE 572 #undef STATE4_DESIGNATED_CNS11643_7 573 #undef STATE4_DESIGNATED_CNS11643_6 574 #undef STATE4_DESIGNATED_CNS11643_5 575 #undef STATE4_DESIGNATED_CNS11643_4 576 #undef STATE4_DESIGNATED_CNS11643_3 577 #undef STATE4_NONE 578 #undef STATE3_DESIGNATED_CNS11643_2 579 #undef STATE3_NONE 580 #undef STATE2_DESIGNATED_ISO_IR_165 581 #undef STATE2_DESIGNATED_CNS11643_1 582 #undef STATE2_DESIGNATED_GB2312 583 #undef STATE2_NONE 584 #undef STATE_TWOBYTE 585 #undef STATE_ASCII 586