1 /* 2 * Copyright (C) 1999-2001, 2005 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21 /* 22 * GB18030 two-byte extension 23 */ 24 25 static const unsigned short gb18030ext_2uni_pagea9[13] = { 26 /* 0xa9 */ 27 0x303e, 0x2ff0, 0x2ff1, 0x2ff2, 0x2ff3, 0x2ff4, 0x2ff5, 0x2ff6, 28 0x2ff7, 0x2ff8, 0x2ff9, 0x2ffa, 0x2ffb, 29 }; 30 static const unsigned short gb18030ext_2uni_pagefe[96] = { 31 /* 0xfe */ 32 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 33 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 0xfffd, 34 0x2e81, 0xe816, 0xe817, 0xe818, 0x2e84, 0x3473, 0x3447, 0x2e88, 35 0x2e8b, 0xe81e, 0x359e, 0x361a, 0x360e, 0x2e8c, 0x2e97, 0x396e, 36 0x3918, 0xe826, 0x39cf, 0x39df, 0x3a73, 0x39d0, 0xe82b, 0xe82c, 37 0x3b4e, 0x3c6e, 0x3ce0, 0x2ea7, 0xe831, 0xe832, 0x2eaa, 0x4056, 38 0x415f, 0x2eae, 0x4337, 0x2eb3, 0x2eb6, 0x2eb7, 0xe83b, 0x43b1, 39 0x43ac, 0x2ebb, 0x43dd, 0x44d6, 0x4661, 0x464c, 0xe843, 0x4723, 40 0x4729, 0x477c, 0x478d, 0x2eca, 0x4947, 0x497a, 0x497d, 0x4982, 41 0x4983, 0x4985, 0x4986, 0x499f, 0x499b, 0x49b7, 0x49b6, 0xe854, 42 0xe855, 0x4ca3, 0x4c9f, 0x4ca0, 0x4ca1, 0x4c77, 0x4ca2, 0x4d13, 43 0x4d14, 0x4d15, 0x4d16, 0x4d17, 0x4d18, 0x4d19, 0x4dae, 0xe864, 44 }; 45 46 static int 47 gb18030ext_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 48 { 49 unsigned char c1 = s[0]; 50 if ((c1 == 0xa2) || (c1 >= 0xa4 && c1 <= 0xa9) || (c1 == 0xd7) || (c1 == 0xfe)) { 51 if (n >= 2) { 52 unsigned char c2 = s[1]; 53 if ((c2 >= 0x40 && c2 < 0x7f) || (c2 >= 0x80 && c2 < 0xff)) { 54 unsigned int i = 190 * (c1 - 0x81) + (c2 - (c2 >= 0x80 ? 0x41 : 0x40)); 55 unsigned short wc = 0xfffd; 56 switch (c1) { 57 case 0xa2: 58 if (i >= 6376 && i <= 6381) /* 0xA2AB..0xA2B0 */ 59 wc = 0xe766 + (i - 6376); 60 else if (i == 6432) /* 0xA2E3 */ 61 wc = 0x20ac; 62 else if (i == 6433) /* 0xA2E4 */ 63 wc = 0xe76d; 64 else if (i >= 6444 && i <= 6445) /* 0xA2EF..0xA2F0 */ 65 wc = 0xe76e + (i - 6444); 66 else if (i >= 6458 && i <= 6459) /* 0xA2FD..0xA2FE */ 67 wc = 0xe770 + (i - 6458); 68 break; 69 case 0xa4: 70 if (i >= 6829 && i <= 6839) /* 0xA4F4..0xA4FE */ 71 wc = 0xe772 + (i - 6829); 72 break; 73 case 0xa5: 74 if (i >= 7022 && i <= 7029) /* 0xA5F7..0xA5FE */ 75 wc = 0xe77d + (i - 7022); 76 break; 77 case 0xa6: 78 if (i >= 7150 && i <= 7157) /* 0xA6B9..0xA6C0 */ 79 wc = 0xe785 + (i - 7150); 80 else if (i >= 7182 && i <= 7190) /* 0xA6D9..0xA6DF */ 81 wc = 0xe78d + (i - 7182); 82 else if (i >= 7201 && i <= 7202) /* 0xA6EC..0xA6ED */ 83 wc = 0xe794 + (i - 7201); 84 else if (i == 7208) /* 0xA6F3 */ 85 wc = 0xe796; 86 else if (i >= 7211 && i <= 7219) /* 0xA6F6..0xA6FE */ 87 wc = 0xe797 + (i - 7211); 88 break; 89 case 0xa7: 90 if (i >= 7349 && i <= 7363) /* 0xA7C2..0xA7D0 */ 91 wc = 0xe7a0 + (i - 7349); 92 else if (i >= 7397 && i <= 7409) /* 0xA7F2..0xA7FE */ 93 wc = 0xe7af + (i - 7397); 94 break; 95 case 0xa8: 96 if (i >= 7495 && i <= 7505) /* 0xA896..0xA8A0 */ 97 wc = 0xe7bc + (i - 7495); 98 else if (i == 7533) /* 0xA8BC */ 99 wc = 0xe7c7; 100 else if (i == 7536) /* 0xA8BF */ 101 wc = 0x01f9; 102 else if (i >= 7538 && i <= 7541) /* 0xA8C1..0xA8C4 */ 103 wc = 0xe7c9 + (i - 7538); 104 else if (i >= 7579 && i <= 7599) /* 0xA8EA..0xA8FE */ 105 wc = 0xe7cd + (i - 7579); 106 break; 107 case 0xa9: 108 if (i == 7624) /* 0xA958 */ 109 wc = 0xe7e2; 110 else if (i == 7627) /* 0xA95B */ 111 wc = 0xe7e3; 112 else if (i >= 7629 && i <= 7631) /* 0xA95D..0xA95F */ 113 wc = 0xe7e4 + (i - 7629); 114 else if (i >= 7672 && i < 7685) /* 0xA989..0xA995 */ 115 wc = gb18030ext_2uni_pagea9[i-7672]; 116 else if (i >= 7686 && i <= 7698) /* 0xA997..0xA9A3 */ 117 wc = 0xe7f4 + (i - 7686); 118 else if (i >= 7775 && i <= 7789) /* 0xA9F0..0xA9FE */ 119 wc = 0xe801 + (i - 7775); 120 break; 121 case 0xd7: 122 if (i >= 16525 && i <= 16529) /* 0xD7FA..0xD7FE */ 123 wc = 0xe810 + (i - 16525); 124 break; 125 case 0xfe: 126 if (i < 23846) 127 wc = gb18030ext_2uni_pagefe[i-23750]; 128 break; 129 default: 130 break; 131 } 132 if (wc != 0xfffd) { 133 *pwc = (ucs4_t) wc; 134 return 2; 135 } 136 } 137 return RET_ILSEQ; 138 } 139 return RET_TOOFEW(0); 140 } 141 return RET_ILSEQ; 142 } 143 144 static const unsigned short gb18030ext_page2e[80] = { 145 0x0000, 0xfe50, 0x0000, 0x0000, 0xfe54, 0x0000, 0x0000, 0x0000, /*0x80-0x87*/ 146 0xfe57, 0x0000, 0x0000, 0xfe58, 0xfe5d, 0x0000, 0x0000, 0x0000, /*0x88-0x8f*/ 147 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe5e, /*0x90-0x97*/ 148 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x98-0x9f*/ 149 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe6b, /*0xa0-0xa7*/ 150 0x0000, 0x0000, 0xfe6e, 0x0000, 0x0000, 0x0000, 0xfe71, 0x0000, /*0xa8-0xaf*/ 151 0x0000, 0x0000, 0x0000, 0xfe73, 0x0000, 0x0000, 0xfe74, 0xfe75, /*0xb0-0xb7*/ 152 0x0000, 0x0000, 0x0000, 0xfe79, 0x0000, 0x0000, 0x0000, 0x0000, /*0xb8-0xbf*/ 153 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xc0-0xc7*/ 154 0x0000, 0x0000, 0xfe84, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xc8-0xcf*/ 155 }; 156 static const unsigned short gb18030ext_page2f[16] = { 157 0xa98a, 0xa98b, 0xa98c, 0xa98d, 0xa98e, 0xa98f, 0xa990, 0xa991, /*0xf0-0xf7*/ 158 0xa992, 0xa993, 0xa994, 0xa995, 0x0000, 0x0000, 0x0000, 0x0000, /*0xf8-0xff*/ 159 }; 160 static const unsigned short gb18030ext_page34[56] = { 161 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe56, /*0x40-0x47*/ 162 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x48-0x4f*/ 163 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x50-0x57*/ 164 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x58-0x5f*/ 165 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x60-0x67*/ 166 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x68-0x6f*/ 167 0x0000, 0x0000, 0x0000, 0xfe55, 0x0000, 0x0000, 0x0000, 0x0000, /*0x70-0x77*/ 168 }; 169 static const unsigned short gb18030ext_page36[24] = { 170 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe5c, 0x0000, /*0x08-0x0f*/ 171 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x10-0x17*/ 172 0x0000, 0x0000, 0xfe5b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x18-0x1f*/ 173 }; 174 static const unsigned short gb18030ext_page39[24] = { 175 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe62, /*0xc8-0xcf*/ 176 0xfe65, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xd0-0xd7*/ 177 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe63, /*0xd8-0xdf*/ 178 }; 179 static const unsigned short gb18030ext_page43[56] = { 180 0x0000, 0x0000, 0x0000, 0x0000, 0xfe78, 0x0000, 0x0000, 0x0000, /*0xa8-0xaf*/ 181 0x0000, 0xfe77, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xb0-0xb7*/ 182 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xb8-0xbf*/ 183 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xc0-0xc7*/ 184 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xc8-0xcf*/ 185 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xd0-0xd7*/ 186 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe7a, 0x0000, 0x0000, /*0xd8-0xdf*/ 187 }; 188 static const unsigned short gb18030ext_page46[32] = { 189 0x0000, 0x0000, 0x0000, 0x0000, 0xfe7d, 0x0000, 0x0000, 0x0000, /*0x48-0x4f*/ 190 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x50-0x57*/ 191 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x58-0x5f*/ 192 0x0000, 0xfe7c, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x60-0x67*/ 193 }; 194 static const unsigned short gb18030ext_page47_1[16] = { 195 0x0000, 0x0000, 0x0000, 0xfe80, 0x0000, 0x0000, 0x0000, 0x0000, /*0x20-0x27*/ 196 0x0000, 0xfe81, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x28-0x2f*/ 197 }; 198 static const unsigned short gb18030ext_page47_2[24] = { 199 0x0000, 0x0000, 0x0000, 0x0000, 0xfe82, 0x0000, 0x0000, 0x0000, /*0x78-0x7f*/ 200 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x80-0x87*/ 201 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe83, 0x0000, 0x0000, /*0x88-0x8f*/ 202 }; 203 static const unsigned short gb18030ext_page49[120] = { 204 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe85, /*0x40-0x47*/ 205 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x48-0x4f*/ 206 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x50-0x57*/ 207 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x58-0x5f*/ 208 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x60-0x67*/ 209 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x68-0x6f*/ 210 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x70-0x77*/ 211 0x0000, 0x0000, 0xfe86, 0x0000, 0x0000, 0xfe87, 0x0000, 0x0000, /*0x78-0x7f*/ 212 0x0000, 0x0000, 0xfe88, 0xfe89, 0x0000, 0xfe8a, 0xfe8b, 0x0000, /*0x80-0x87*/ 213 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x88-0x8f*/ 214 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x90-0x97*/ 215 0x0000, 0x0000, 0x0000, 0xfe8d, 0x0000, 0x0000, 0x0000, 0xfe8c, /*0x98-0x9f*/ 216 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xa0-0xa7*/ 217 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0xa8-0xaf*/ 218 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe8f, 0xfe8e, /*0xb0-0xb7*/ 219 }; 220 static const unsigned short gb18030ext_page4c[56] = { 221 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe96, /*0x70-0x77*/ 222 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x78-0x7f*/ 223 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x80-0x87*/ 224 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x88-0x8f*/ 225 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x90-0x97*/ 226 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfe93, /*0x98-0x9f*/ 227 0xfe94, 0xfe95, 0xfe97, 0xfe92, 0x0000, 0x0000, 0x0000, 0x0000, /*0xa0-0xa7*/ 228 }; 229 static const unsigned short gb18030ext_page4d[16] = { 230 0x0000, 0x0000, 0x0000, 0xfe98, 0xfe99, 0xfe9a, 0xfe9b, 0xfe9c, /*0x10-0x17*/ 231 0xfe9d, 0xfe9e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, /*0x18-0x1f*/ 232 }; 233 234 static int 235 gb18030ext_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 236 { 237 if (n >= 2) { 238 unsigned short c = 0; 239 if (wc == 0x01f9) 240 c = 0xa8bf; 241 else if (wc == 0x20ac) 242 c = 0xa2e3; 243 else if (wc >= 0x2e80 && wc < 0x2ed0) 244 c = gb18030ext_page2e[wc-0x2e80]; 245 else if (wc >= 0x2ff0 && wc < 0x3000) 246 c = gb18030ext_page2f[wc-0x2ff0]; 247 else if (wc == 0x303e) 248 c = 0xa989; 249 else if (wc >= 0x3440 && wc < 0x3478) 250 c = gb18030ext_page34[wc-0x3440]; 251 else if (wc == 0x359e) 252 c = 0xfe5a; 253 else if (wc >= 0x3608 && wc < 0x3620) 254 c = gb18030ext_page36[wc-0x3608]; 255 else if (wc == 0x3918) 256 c = 0xfe60; 257 else if (wc == 0x396e) 258 c = 0xfe5f; 259 else if (wc >= 0x39c8 && wc < 0x39e0) 260 c = gb18030ext_page39[wc-0x39c8]; 261 else if (wc == 0x3a73) 262 c = 0xfe64; 263 else if (wc == 0x3b4e) 264 c = 0xfe68; 265 else if (wc == 0x3c6e) 266 c = 0xfe69; 267 else if (wc == 0x3ce0) 268 c = 0xfe6a; 269 else if (wc == 0x4056) 270 c = 0xfe6f; 271 else if (wc == 0x415f) 272 c = 0xfe70; 273 else if (wc == 0x4337) 274 c = 0xfe72; 275 else if (wc >= 0x43a8 && wc < 0x43e0) 276 c = gb18030ext_page43[wc-0x43a8]; 277 else if (wc == 0x44d6) 278 c = 0xfe7b; 279 else if (wc >= 0x4648 && wc < 0x4668) 280 c = gb18030ext_page46[wc-0x4648]; 281 else if (wc >= 0x4720 && wc < 0x4730) 282 c = gb18030ext_page47_1[wc-0x4720]; 283 else if (wc >= 0x4778 && wc < 0x4790) 284 c = gb18030ext_page47_2[wc-0x4778]; 285 else if (wc >= 0x4940 && wc < 0x49b8) 286 c = gb18030ext_page49[wc-0x4940]; 287 else if (wc >= 0x4c70 && wc < 0x4ca8) 288 c = gb18030ext_page4c[wc-0x4c70]; 289 else if (wc >= 0x4d10 && wc < 0x4d20) 290 c = gb18030ext_page4d[wc-0x4d10]; 291 else if (wc == 0x4dae) 292 c = 0xfe9f; 293 if (c != 0) { 294 r[0] = (c >> 8); r[1] = (c & 0xff); 295 return 2; 296 } 297 return RET_ILUNI; 298 } 299 return RET_TOOSMALL; 300 } 301