1 /* 2 * Copyright (C) 1999-2001 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21 /* 22 * ISO-2022-KR 23 */ 24 25 /* Specification: RFC 1557 */ 26 27 /* Note: CJK.INF says the SO designator needs to appear only once at the 28 beginning of a text, but to decrease the risk of ambiguities, when 29 producing ISO-2022-KR, we repeat the designator in every line containing 30 SO characters. RFC 1557 does not mandate this. */ 31 32 #define ESC 0x1b 33 #define SO 0x0e 34 #define SI 0x0f 35 36 /* 37 * The state is composed of one of the following values 38 */ 39 #define STATE_ASCII 0 40 #define STATE_TWOBYTE 1 41 /* 42 * and one of the following values, << 8 43 */ 44 #define STATE2_NONE 0 45 #define STATE2_DESIGNATED_KSC5601 1 46 47 #define SPLIT_STATE \ 48 unsigned int state1 = state & 0xff, state2 = state >> 8 49 #define COMBINE_STATE \ 50 state = (state2 << 8) | state1 51 52 static int 53 iso2022_kr_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 54 { 55 state_t state = conv->istate; 56 SPLIT_STATE; 57 int count = 0; 58 unsigned char c; 59 for (;;) { 60 c = *s; 61 if (c == ESC) { 62 if (n < count+4) 63 goto none; 64 if (s[1] == '$') { 65 if (s[2] == ')') { 66 if (s[3] == 'C') { 67 state2 = STATE2_DESIGNATED_KSC5601; 68 s += 4; count += 4; 69 if (n < count+1) 70 goto none; 71 continue; 72 } 73 } 74 } 75 return RET_ILSEQ; 76 } 77 if (c == SO) { 78 if (state2 != STATE2_DESIGNATED_KSC5601) 79 return RET_ILSEQ; 80 state1 = STATE_TWOBYTE; 81 s++; count++; 82 if (n < count+1) 83 goto none; 84 continue; 85 } 86 if (c == SI) { 87 state1 = STATE_ASCII; 88 s++; count++; 89 if (n < count+1) 90 goto none; 91 continue; 92 } 93 break; 94 } 95 switch (state1) { 96 case STATE_ASCII: 97 if (c < 0x80) { 98 int ret = ascii_mbtowc(conv,pwc,s,1); 99 if (ret == RET_ILSEQ) 100 return RET_ILSEQ; 101 if (ret != 1) abort(); 102 #if 0 /* Accept ISO-2022-KR according to CJK.INF. */ 103 if (*pwc == 0x000a || *pwc == 0x000d) 104 state2 = STATE2_NONE; 105 #endif 106 COMBINE_STATE; 107 conv->istate = state; 108 return count+1; 109 } else 110 return RET_ILSEQ; 111 case STATE_TWOBYTE: 112 if (n < count+2) 113 goto none; 114 if (state2 != STATE2_DESIGNATED_KSC5601) abort(); 115 if (s[0] < 0x80 && s[1] < 0x80) { 116 int ret = ksc5601_mbtowc(conv,pwc,s,2); 117 if (ret == RET_ILSEQ) 118 return RET_ILSEQ; 119 if (ret != 2) abort(); 120 COMBINE_STATE; 121 conv->istate = state; 122 return count+2; 123 } else 124 return RET_ILSEQ; 125 default: abort(); 126 } 127 128 none: 129 COMBINE_STATE; 130 conv->istate = state; 131 return RET_TOOFEW(count); 132 } 133 134 static int 135 iso2022_kr_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 136 { 137 state_t state = conv->ostate; 138 SPLIT_STATE; 139 unsigned char buf[2]; 140 int ret; 141 142 /* Try ASCII. */ 143 ret = ascii_wctomb(conv,buf,wc,1); 144 if (ret != RET_ILUNI) { 145 if (ret != 1) abort(); 146 if (buf[0] < 0x80) { 147 int count = (state1 == STATE_ASCII ? 1 : 2); 148 if (n < count) 149 return RET_TOOSMALL; 150 if (state1 != STATE_ASCII) { 151 r[0] = SI; 152 r += 1; 153 state1 = STATE_ASCII; 154 } 155 r[0] = buf[0]; 156 if (wc == 0x000a || wc == 0x000d) 157 state2 = STATE2_NONE; 158 COMBINE_STATE; 159 conv->ostate = state; 160 return count; 161 } 162 } 163 164 /* Try KS C 5601-1992. */ 165 ret = ksc5601_wctomb(conv,buf,wc,2); 166 if (ret != RET_ILUNI) { 167 if (ret != 2) abort(); 168 if (buf[0] < 0x80 && buf[1] < 0x80) { 169 int count = (state2 == STATE2_DESIGNATED_KSC5601 ? 0 : 4) + (state1 == STATE_TWOBYTE ? 0 : 1) + 2; 170 if (n < count) 171 return RET_TOOSMALL; 172 if (state2 != STATE2_DESIGNATED_KSC5601) { 173 r[0] = ESC; 174 r[1] = '$'; 175 r[2] = ')'; 176 r[3] = 'C'; 177 r += 4; 178 state2 = STATE2_DESIGNATED_KSC5601; 179 } 180 if (state1 != STATE_TWOBYTE) { 181 r[0] = SO; 182 r += 1; 183 state1 = STATE_TWOBYTE; 184 } 185 r[0] = buf[0]; 186 r[1] = buf[1]; 187 COMBINE_STATE; 188 conv->ostate = state; 189 return count; 190 } 191 } 192 193 return RET_ILUNI; 194 } 195 196 static int 197 iso2022_kr_reset (conv_t conv, unsigned char *r, int n) 198 { 199 state_t state = conv->ostate; 200 SPLIT_STATE; 201 (void)state2; 202 if (state1 != STATE_ASCII) { 203 if (n < 1) 204 return RET_TOOSMALL; 205 r[0] = SI; 206 /* conv->ostate = 0; will be done by the caller */ 207 return 1; 208 } else 209 return 0; 210 } 211 212 #undef COMBINE_STATE 213 #undef SPLIT_STATE 214 #undef STATE2_DESIGNATED_KSC5601 215 #undef STATE2_NONE 216 #undef STATE_TWOBYTE 217 #undef STATE_ASCII 218