1 /* 2 * Copyright (C) 1999-2002 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21 /* 22 * EUC-JISX0213 23 */ 24 25 /* The structure of EUC-JISX0213 is as follows: 26 27 0x00..0x7F: ASCII 28 29 0x8E{A1..FE}: JISX0201 Katakana, with prefix 0x8E, offset by +0x80. 30 31 0x8F{A1..FE}{A1..FE}: JISX0213 plane 2, with prefix 0x8F, offset by +0x8080. 32 33 0x{A1..FE}{A1..FE}: JISX0213 plane 1, offset by +0x8080. 34 35 Note that some JISX0213 characters are not contained in Unicode 3.2 36 and are therefore best represented as sequences of Unicode characters. 37 */ 38 39 #include "jisx0213.h" 40 #include "flushwc.h" 41 42 static int 43 euc_jisx0213_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 44 { 45 ucs4_t last_wc = conv->istate; 46 if (last_wc) { 47 /* Output the buffered character. */ 48 conv->istate = 0; 49 *pwc = last_wc; 50 return 0; /* Don't advance the input pointer. */ 51 } else { 52 unsigned char c = *s; 53 if (c < 0x80) { 54 /* Plain ASCII character. */ 55 *pwc = (ucs4_t) c; 56 return 1; 57 } else { 58 if ((c >= 0xa1 && c <= 0xfe) || c == 0x8e || c == 0x8f) { 59 /* Two or three byte character. */ 60 if (n >= 2) { 61 unsigned char c2 = s[1]; 62 if (c2 >= 0xa1 && c2 <= 0xfe) { 63 if (c == 0x8e) { 64 /* Half-width katakana. */ 65 if (c2 <= 0xdf) { 66 *pwc = c2 + 0xfec0; 67 return 2; 68 } 69 } else { 70 ucs4_t wc; 71 if (c == 0x8f) { 72 /* JISX 0213 plane 2. */ 73 if (n >= 3) { 74 unsigned char c3 = s[2]; 75 wc = jisx0213_to_ucs4(0x200-0x80+c2,c3^0x80); 76 } else 77 return RET_TOOFEW(0); 78 } else { 79 /* JISX 0213 plane 1. */ 80 wc = jisx0213_to_ucs4(0x100-0x80+c,c2^0x80); 81 } 82 if (wc) { 83 if (wc < 0x80) { 84 /* It's a combining character. */ 85 ucs4_t wc1 = jisx0213_to_ucs_combining[wc - 1][0]; 86 ucs4_t wc2 = jisx0213_to_ucs_combining[wc - 1][1]; 87 /* We cannot output two Unicode characters at once. So, 88 output the first character and buffer the second one. */ 89 *pwc = wc1; 90 conv->istate = wc2; 91 } else 92 *pwc = wc; 93 return (c == 0x8f ? 3 : 2); 94 } 95 } 96 } 97 } else 98 return RET_TOOFEW(0); 99 } 100 return RET_ILSEQ; 101 } 102 } 103 } 104 105 #define euc_jisx0213_flushwc normal_flushwc 106 107 /* Composition tables for each of the relevant combining characters. */ 108 static const struct { unsigned short base; unsigned short composed; } euc_jisx0213_comp_table_data[] = { 109 #define euc_jisx0213_comp_table02e5_idx 0 110 #define euc_jisx0213_comp_table02e5_len 1 111 { 0xabe4, 0xabe5 }, /* 0x12B65 = 0x12B64 U+02E5 */ 112 #define euc_jisx0213_comp_table02e9_idx (euc_jisx0213_comp_table02e5_idx+euc_jisx0213_comp_table02e5_len) 113 #define euc_jisx0213_comp_table02e9_len 1 114 { 0xabe0, 0xabe6 }, /* 0x12B66 = 0x12B60 U+02E9 */ 115 #define euc_jisx0213_comp_table0300_idx (euc_jisx0213_comp_table02e9_idx+euc_jisx0213_comp_table02e9_len) 116 #define euc_jisx0213_comp_table0300_len 5 117 { 0xa9dc, 0xabc4 }, /* 0x12B44 = 0x1295C U+0300 */ 118 { 0xabb8, 0xabc8 }, /* 0x12B48 = 0x12B38 U+0300 */ 119 { 0xabb7, 0xabca }, /* 0x12B4A = 0x12B37 U+0300 */ 120 { 0xabb0, 0xabcc }, /* 0x12B4C = 0x12B30 U+0300 */ 121 { 0xabc3, 0xabce }, /* 0x12B4E = 0x12B43 U+0300 */ 122 #define euc_jisx0213_comp_table0301_idx (euc_jisx0213_comp_table0300_idx+euc_jisx0213_comp_table0300_len) 123 #define euc_jisx0213_comp_table0301_len 4 124 { 0xabb8, 0xabc9 }, /* 0x12B49 = 0x12B38 U+0301 */ 125 { 0xabb7, 0xabcb }, /* 0x12B4B = 0x12B37 U+0301 */ 126 { 0xabb0, 0xabcd }, /* 0x12B4D = 0x12B30 U+0301 */ 127 { 0xabc3, 0xabcf }, /* 0x12B4F = 0x12B43 U+0301 */ 128 #define euc_jisx0213_comp_table309a_idx (euc_jisx0213_comp_table0301_idx+euc_jisx0213_comp_table0301_len) 129 #define euc_jisx0213_comp_table309a_len 14 130 { 0xa4ab, 0xa4f7 }, /* 0x12477 = 0x1242B U+309A */ 131 { 0xa4ad, 0xa4f8 }, /* 0x12478 = 0x1242D U+309A */ 132 { 0xa4af, 0xa4f9 }, /* 0x12479 = 0x1242F U+309A */ 133 { 0xa4b1, 0xa4fa }, /* 0x1247A = 0x12431 U+309A */ 134 { 0xa4b3, 0xa4fb }, /* 0x1247B = 0x12433 U+309A */ 135 { 0xa5ab, 0xa5f7 }, /* 0x12577 = 0x1252B U+309A */ 136 { 0xa5ad, 0xa5f8 }, /* 0x12578 = 0x1252D U+309A */ 137 { 0xa5af, 0xa5f9 }, /* 0x12579 = 0x1252F U+309A */ 138 { 0xa5b1, 0xa5fa }, /* 0x1257A = 0x12531 U+309A */ 139 { 0xa5b3, 0xa5fb }, /* 0x1257B = 0x12533 U+309A */ 140 { 0xa5bb, 0xa5fc }, /* 0x1257C = 0x1253B U+309A */ 141 { 0xa5c4, 0xa5fd }, /* 0x1257D = 0x12544 U+309A */ 142 { 0xa5c8, 0xa5fe }, /* 0x1257E = 0x12548 U+309A */ 143 { 0xa6f5, 0xa6f8 }, /* 0x12678 = 0x12675 U+309A */ 144 }; 145 146 static int 147 euc_jisx0213_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 148 { 149 int count = 0; 150 unsigned short lasttwo = conv->ostate; 151 152 if (lasttwo) { 153 /* Attempt to combine the last character with this one. */ 154 unsigned int idx; 155 unsigned int len; 156 157 if (wc == 0x02e5) 158 idx = euc_jisx0213_comp_table02e5_idx, 159 len = euc_jisx0213_comp_table02e5_len; 160 else if (wc == 0x02e9) 161 idx = euc_jisx0213_comp_table02e9_idx, 162 len = euc_jisx0213_comp_table02e9_len; 163 else if (wc == 0x0300) 164 idx = euc_jisx0213_comp_table0300_idx, 165 len = euc_jisx0213_comp_table0300_len; 166 else if (wc == 0x0301) 167 idx = euc_jisx0213_comp_table0301_idx, 168 len = euc_jisx0213_comp_table0301_len; 169 else if (wc == 0x309a) 170 idx = euc_jisx0213_comp_table309a_idx, 171 len = euc_jisx0213_comp_table309a_len; 172 else 173 goto not_combining; 174 175 do 176 if (euc_jisx0213_comp_table_data[idx].base == lasttwo) 177 break; 178 while (++idx, --len > 0); 179 180 if (len > 0) { 181 /* Output the combined character. */ 182 if (n >= 2) { 183 lasttwo = euc_jisx0213_comp_table_data[idx].composed; 184 r[0] = (lasttwo >> 8) & 0xff; 185 r[1] = lasttwo & 0xff; 186 conv->ostate = 0; 187 return 2; 188 } else 189 return RET_TOOSMALL; 190 } 191 192 not_combining: 193 /* Output the buffered character. */ 194 if (n < 2) 195 return RET_TOOSMALL; 196 r[0] = (lasttwo >> 8) & 0xff; 197 r[1] = lasttwo & 0xff; 198 r += 2; 199 count = 2; 200 } 201 202 if (wc < 0x80) { 203 /* Plain ASCII character. */ 204 if (n > count) { 205 r[0] = (unsigned char) wc; 206 conv->ostate = 0; 207 return count+1; 208 } else 209 return RET_TOOSMALL; 210 } else if (wc >= 0xff61 && wc <= 0xff9f) { 211 /* Half-width katakana. */ 212 if (n >= count+2) { 213 r[0] = 0x8e; 214 r[1] = wc - 0xfec0; 215 conv->ostate = 0; 216 return count+2; 217 } else 218 return RET_TOOSMALL; 219 } else { 220 unsigned short jch = ucs4_to_jisx0213(wc); 221 if (jch != 0) { 222 if (jch & 0x0080) { 223 /* A possible match in comp_table_data. We have to buffer it. */ 224 /* We know it's a JISX 0213 plane 1 character. */ 225 if (jch & 0x8000) abort(); 226 conv->ostate = jch | 0x8080; 227 return count+0; 228 } 229 if (jch & 0x8000) { 230 /* JISX 0213 plane 2. */ 231 if (n >= count+3) { 232 r[0] = 0x8f; 233 r[1] = (jch >> 8) | 0x80; 234 r[2] = (jch & 0xff) | 0x80; 235 conv->ostate = 0; 236 return count+3; 237 } else 238 return RET_TOOSMALL; 239 } else { 240 /* JISX 0213 plane 1. */ 241 if (n >= count+2) { 242 r[0] = (jch >> 8) | 0x80; 243 r[1] = (jch & 0xff) | 0x80; 244 conv->ostate = 0; 245 return count+2; 246 } else 247 return RET_TOOSMALL; 248 } 249 } 250 return RET_ILUNI; 251 } 252 } 253 254 static int 255 euc_jisx0213_reset (conv_t conv, unsigned char *r, int n) 256 { 257 state_t lasttwo = conv->ostate; 258 259 if (lasttwo) { 260 if (n < 2) 261 return RET_TOOSMALL; 262 r[0] = (lasttwo >> 8) & 0xff; 263 r[1] = lasttwo & 0xff; 264 /* conv->ostate = 0; will be done by the caller */ 265 return 2; 266 } else 267 return 0; 268 } 269