1 /* 2 * Copyright (C) 1999-2002 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21 /* 22 * SHIFT_JISX0213 23 */ 24 25 /* The structure of Shift_JISX0213 is as follows: 26 27 0x00..0x7F: ISO646-JP, an ASCII variant 28 29 0x{A1..DF}: JISX0201 Katakana. 30 31 0x{81..9F,E0..EF}{40..7E,80..FC}: JISX0213 plane 1. 32 33 0x{F0..FC}{40..7E,80..FC}: JISX0213 plane 2, with irregular row mapping. 34 35 Note that some JISX0213 characters are not contained in Unicode 3.2 36 and are therefore best represented as sequences of Unicode characters. 37 */ 38 39 #include "jisx0213.h" 40 #include "flushwc.h" 41 42 static int 43 shift_jisx0213_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 44 { 45 ucs4_t last_wc = conv->istate; 46 if (last_wc) { 47 /* Output the buffered character. */ 48 conv->istate = 0; 49 *pwc = last_wc; 50 return 0; /* Don't advance the input pointer. */ 51 } else { 52 unsigned char c = *s; 53 if (c < 0x80) { 54 /* Plain ISO646-JP character. */ 55 if (c == 0x5c) 56 *pwc = (ucs4_t) 0x00a5; 57 else if (c == 0x7e) 58 *pwc = (ucs4_t) 0x203e; 59 else 60 *pwc = (ucs4_t) c; 61 return 1; 62 } else if (c >= 0xa1 && c <= 0xdf) { 63 *pwc = c + 0xfec0; 64 return 1; 65 } else { 66 if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)) { 67 /* Two byte character. */ 68 if (n >= 2) { 69 unsigned char c2 = s[1]; 70 if ((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfc)) { 71 unsigned int c1; 72 ucs4_t wc; 73 /* Convert to row and column. */ 74 if (c < 0xe0) 75 c -= 0x81; 76 else 77 c -= 0xc1; 78 if (c2 < 0x80) 79 c2 -= 0x40; 80 else 81 c2 -= 0x41; 82 /* Now 0 <= c <= 0x3b, 0 <= c2 <= 0xbb. */ 83 c1 = 2 * c; 84 if (c2 >= 0x5e) 85 c2 -= 0x5e, c1++; 86 c2 += 0x21; 87 if (c1 >= 0x5e) { 88 /* Handling of JISX 0213 plane 2 rows. */ 89 if (c1 >= 0x67) 90 c1 += 230; 91 else if (c1 >= 0x63 || c1 == 0x5f) 92 c1 += 168; 93 else 94 c1 += 162; 95 } 96 wc = jisx0213_to_ucs4(0x121+c1,c2); 97 if (wc) { 98 if (wc < 0x80) { 99 /* It's a combining character. */ 100 ucs4_t wc1 = jisx0213_to_ucs_combining[wc - 1][0]; 101 ucs4_t wc2 = jisx0213_to_ucs_combining[wc - 1][1]; 102 /* We cannot output two Unicode characters at once. So, 103 output the first character and buffer the second one. */ 104 *pwc = wc1; 105 conv->istate = wc2; 106 } else 107 *pwc = wc; 108 return 2; 109 } 110 } 111 } else 112 return RET_TOOFEW(0); 113 } 114 return RET_ILSEQ; 115 } 116 } 117 } 118 119 #define shift_jisx0213_flushwc normal_flushwc 120 121 /* Composition tables for each of the relevant combining characters. */ 122 static const struct { unsigned short base; unsigned short composed; } shift_jisx0213_comp_table_data[] = { 123 #define shift_jisx0213_comp_table02e5_idx 0 124 #define shift_jisx0213_comp_table02e5_len 1 125 { 0x8684, 0x8685 }, /* 0x12B65 = 0x12B64 U+02E5 */ 126 #define shift_jisx0213_comp_table02e9_idx (shift_jisx0213_comp_table02e5_idx+shift_jisx0213_comp_table02e5_len) 127 #define shift_jisx0213_comp_table02e9_len 1 128 { 0x8680, 0x8686 }, /* 0x12B66 = 0x12B60 U+02E9 */ 129 #define shift_jisx0213_comp_table0300_idx (shift_jisx0213_comp_table02e9_idx+shift_jisx0213_comp_table02e9_len) 130 #define shift_jisx0213_comp_table0300_len 5 131 { 0x857b, 0x8663 }, /* 0x12B44 = 0x1295C U+0300 */ 132 { 0x8657, 0x8667 }, /* 0x12B48 = 0x12B38 U+0300 */ 133 { 0x8656, 0x8669 }, /* 0x12B4A = 0x12B37 U+0300 */ 134 { 0x864f, 0x866b }, /* 0x12B4C = 0x12B30 U+0300 */ 135 { 0x8662, 0x866d }, /* 0x12B4E = 0x12B43 U+0300 */ 136 #define shift_jisx0213_comp_table0301_idx (shift_jisx0213_comp_table0300_idx+shift_jisx0213_comp_table0300_len) 137 #define shift_jisx0213_comp_table0301_len 4 138 { 0x8657, 0x8668 }, /* 0x12B49 = 0x12B38 U+0301 */ 139 { 0x8656, 0x866a }, /* 0x12B4B = 0x12B37 U+0301 */ 140 { 0x864f, 0x866c }, /* 0x12B4D = 0x12B30 U+0301 */ 141 { 0x8662, 0x866e }, /* 0x12B4F = 0x12B43 U+0301 */ 142 #define shift_jisx0213_comp_table309a_idx (shift_jisx0213_comp_table0301_idx+shift_jisx0213_comp_table0301_len) 143 #define shift_jisx0213_comp_table309a_len 14 144 { 0x82a9, 0x82f5 }, /* 0x12477 = 0x1242B U+309A */ 145 { 0x82ab, 0x82f6 }, /* 0x12478 = 0x1242D U+309A */ 146 { 0x82ad, 0x82f7 }, /* 0x12479 = 0x1242F U+309A */ 147 { 0x82af, 0x82f8 }, /* 0x1247A = 0x12431 U+309A */ 148 { 0x82b1, 0x82f9 }, /* 0x1247B = 0x12433 U+309A */ 149 { 0x834a, 0x8397 }, /* 0x12577 = 0x1252B U+309A */ 150 { 0x834c, 0x8398 }, /* 0x12578 = 0x1252D U+309A */ 151 { 0x834e, 0x8399 }, /* 0x12579 = 0x1252F U+309A */ 152 { 0x8350, 0x839a }, /* 0x1257A = 0x12531 U+309A */ 153 { 0x8352, 0x839b }, /* 0x1257B = 0x12533 U+309A */ 154 { 0x835a, 0x839c }, /* 0x1257C = 0x1253B U+309A */ 155 { 0x8363, 0x839d }, /* 0x1257D = 0x12544 U+309A */ 156 { 0x8367, 0x839e }, /* 0x1257E = 0x12548 U+309A */ 157 { 0x83f3, 0x83f6 }, /* 0x12678 = 0x12675 U+309A */ 158 }; 159 160 static int 161 shift_jisx0213_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 162 { 163 int count = 0; 164 unsigned short lasttwo = conv->ostate; 165 166 if (lasttwo) { 167 /* Attempt to combine the last character with this one. */ 168 unsigned int idx; 169 unsigned int len; 170 171 if (wc == 0x02e5) 172 idx = shift_jisx0213_comp_table02e5_idx, 173 len = shift_jisx0213_comp_table02e5_len; 174 else if (wc == 0x02e9) 175 idx = shift_jisx0213_comp_table02e9_idx, 176 len = shift_jisx0213_comp_table02e9_len; 177 else if (wc == 0x0300) 178 idx = shift_jisx0213_comp_table0300_idx, 179 len = shift_jisx0213_comp_table0300_len; 180 else if (wc == 0x0301) 181 idx = shift_jisx0213_comp_table0301_idx, 182 len = shift_jisx0213_comp_table0301_len; 183 else if (wc == 0x309a) 184 idx = shift_jisx0213_comp_table309a_idx, 185 len = shift_jisx0213_comp_table309a_len; 186 else 187 goto not_combining; 188 189 do 190 if (shift_jisx0213_comp_table_data[idx].base == lasttwo) 191 break; 192 while (++idx, --len > 0); 193 194 if (len > 0) { 195 /* Output the combined character. */ 196 if (n >= 2) { 197 lasttwo = shift_jisx0213_comp_table_data[idx].composed; 198 r[0] = (lasttwo >> 8) & 0xff; 199 r[1] = lasttwo & 0xff; 200 conv->ostate = 0; 201 return 2; 202 } else 203 return RET_TOOSMALL; 204 } 205 206 not_combining: 207 /* Output the buffered character. */ 208 if (n < 2) 209 return RET_TOOSMALL; 210 r[0] = (lasttwo >> 8) & 0xff; 211 r[1] = lasttwo & 0xff; 212 r += 2; 213 count = 2; 214 } 215 216 if (wc < 0x80 && wc != 0x5c && wc != 0x7e) { 217 /* Plain ISO646-JP character. */ 218 if (n > count) { 219 r[0] = (unsigned char) wc; 220 conv->ostate = 0; 221 return count+1; 222 } else 223 return RET_TOOSMALL; 224 } else if (wc == 0x00a5) { 225 if (n > count) { 226 r[0] = 0x5c; 227 conv->ostate = 0; 228 return count+1; 229 } else 230 return RET_TOOSMALL; 231 } else if (wc == 0x203e) { 232 if (n > count) { 233 r[0] = 0x7e; 234 conv->ostate = 0; 235 return count+1; 236 } else 237 return RET_TOOSMALL; 238 } else if (wc >= 0xff61 && wc <= 0xff9f) { 239 /* Half-width katakana. */ 240 if (n > count) { 241 r[0] = wc - 0xfec0; 242 conv->ostate = 0; 243 return count+1; 244 } else 245 return RET_TOOSMALL; 246 } else { 247 unsigned int s1, s2; 248 unsigned short jch = ucs4_to_jisx0213(wc); 249 if (jch != 0) { 250 /* Convert it to shifted representation. */ 251 s1 = jch >> 8; 252 s2 = jch & 0x7f; 253 s1 -= 0x21; 254 s2 -= 0x21; 255 if (s1 >= 0x5e) { 256 /* Handling of JISX 0213 plane 2 rows. */ 257 if (s1 >= 0xcd) /* rows 0x26E..0x27E */ 258 s1 -= 102; 259 else if (s1 >= 0x8b || s1 == 0x87) /* rows 0x228, 0x22C..0x22F */ 260 s1 -= 40; 261 else /* rows 0x221, 0x223..0x225 */ 262 s1 -= 34; 263 /* Now 0x5e <= s1 <= 0x77. */ 264 } 265 if (s1 & 1) 266 s2 += 0x5e; 267 s1 = s1 >> 1; 268 if (s1 < 0x1f) 269 s1 += 0x81; 270 else 271 s1 += 0xc1; 272 if (s2 < 0x3f) 273 s2 += 0x40; 274 else 275 s2 += 0x41; 276 if (jch & 0x0080) { 277 /* A possible match in comp_table_data. We have to buffer it. */ 278 /* We know it's a JISX 0213 plane 1 character. */ 279 if (jch & 0x8000) abort(); 280 conv->ostate = (s1 << 8) | s2; 281 return count+0; 282 } 283 /* Output the shifted representation. */ 284 if (n >= count+2) { 285 r[0] = s1; 286 r[1] = s2; 287 conv->ostate = 0; 288 return count+2; 289 } else 290 return RET_TOOSMALL; 291 } 292 return RET_ILUNI; 293 } 294 } 295 296 static int 297 shift_jisx0213_reset (conv_t conv, unsigned char *r, int n) 298 { 299 state_t lasttwo = conv->ostate; 300 301 if (lasttwo) { 302 if (n < 2) 303 return RET_TOOSMALL; 304 r[0] = (lasttwo >> 8) & 0xff; 305 r[1] = lasttwo & 0xff; 306 /* conv->ostate = 0; will be done by the caller */ 307 return 2; 308 } else 309 return 0; 310 } 311