1 /*
2 * Copyright (C) 1999-2001, 2005 Free Software Foundation, Inc.
3 * This file is part of the GNU LIBICONV Library.
4 *
5 * The GNU LIBICONV Library is free software; you can redistribute it
6 * and/or modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either version 2
8 * of the License, or (at your option) any later version.
9 *
10 * The GNU LIBICONV Library is distributed in the hope that it will be
11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public
16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18 * Fifth Floor, Boston, MA 02110-1301, USA.
19 */
20
21 /*
22 * GB18030 four-byte extension
23 */
24
25 static const unsigned short gb18030uni_charset2uni_ranges[412] = {
26 0x0000, 0x0023, 0x0024, 0x0025, 0x0026, 0x002c, 0x002d, 0x0031,
27 0x0032, 0x0050, 0x0051, 0x0058, 0x0059, 0x005e, 0x005f, 0x005f,
28 0x0060, 0x0063, 0x0064, 0x0066, 0x0067, 0x0067, 0x0068, 0x0068,
29 0x0069, 0x006c, 0x006d, 0x007d, 0x007e, 0x0084, 0x0085, 0x0093,
30 0x0094, 0x00ab, 0x00ac, 0x00ae, 0x00af, 0x00b2, 0x00b3, 0x00cf,
31 0x00d0, 0x0131, 0x0132, 0x0132, 0x0133, 0x0133, 0x0134, 0x0134,
32 0x0135, 0x0135, 0x0136, 0x0136, 0x0137, 0x0137, 0x0138, 0x0138,
33 0x0139, 0x0154, 0x0155, 0x01ab, 0x01ac, 0x01ba, 0x01bb, 0x021f,
34 0x0220, 0x0220, 0x0221, 0x022d, 0x022e, 0x02e4, 0x02e5, 0x02e5,
35 0x02e6, 0x02ec, 0x02ed, 0x02ed, 0x02ee, 0x0324, 0x0325, 0x0332,
36 0x0333, 0x0333, 0x0334, 0x1ef1, 0x1ef2, 0x1ef3, 0x1ef4, 0x1ef4,
37 0x1ef5, 0x1ef6, 0x1ef7, 0x1efd, 0x1efe, 0x1f06, 0x1f07, 0x1f07,
38 0x1f08, 0x1f08, 0x1f09, 0x1f0d, 0x1f0e, 0x1f7d, 0x1f7e, 0x1fd3,
39 0x1fd4, 0x1fd4, 0x1fd5, 0x1fd7, 0x1fd8, 0x1fe3, 0x1fe4, 0x1fed,
40 0x1fee, 0x202b, 0x202c, 0x202f, 0x2030, 0x2045, 0x2046, 0x2047,
41 0x2048, 0x20b5, 0x20b6, 0x20bb, 0x20bc, 0x20bc, 0x20bd, 0x20bf,
42 0x20c0, 0x20c3, 0x20c4, 0x20c5, 0x20c6, 0x20c7, 0x20c8, 0x20c8,
43 0x20c9, 0x20c9, 0x20ca, 0x20cb, 0x20cc, 0x20d0, 0x20d1, 0x20d5,
44 0x20d6, 0x20df, 0x20e0, 0x20e2, 0x20e3, 0x20e7, 0x20e8, 0x20f4,
45 0x20f5, 0x20f6, 0x20f7, 0x20fc, 0x20fd, 0x2121, 0x2122, 0x2124,
46 0x2125, 0x212f, 0x2130, 0x2148, 0x2149, 0x219a, 0x219b, 0x22e7,
47 0x22e8, 0x22f1, 0x22f2, 0x2355, 0x2356, 0x2359, 0x235a, 0x2366,
48 0x2367, 0x2369, 0x236a, 0x2373, 0x2374, 0x2383, 0x2384, 0x238b,
49 0x238c, 0x2393, 0x2394, 0x2396, 0x2397, 0x2398, 0x2399, 0x23aa,
50 0x23ab, 0x23c9, 0x23ca, 0x23cb, 0x23cc, 0x2401, 0x2402, 0x2402,
51 0x2403, 0x2c40, 0x2c41, 0x2c42, 0x2c43, 0x2c45, 0x2c46, 0x2c47,
52 0x2c48, 0x2c51, 0x2c52, 0x2c60, 0x2c61, 0x2c62, 0x2c63, 0x2c65,
53 0x2c66, 0x2c69, 0x2c6a, 0x2c6b, 0x2c6c, 0x2c6e, 0x2c6f, 0x2c7c,
54 0x2c7d, 0x2da1, 0x2da2, 0x2da5, 0x2da6, 0x2da6, 0x2da7, 0x2dab,
55 0x2dac, 0x2dad, 0x2dae, 0x2dc1, 0x2dc2, 0x2dc3, 0x2dc4, 0x2dca,
56 0x2dcb, 0x2dcc, 0x2dcd, 0x2dd1, 0x2dd2, 0x2dd7, 0x2dd8, 0x2ecd,
57 0x2ece, 0x2ed4, 0x2ed5, 0x2f45, 0x2f46, 0x302f, 0x3030, 0x303b,
58 0x303c, 0x303d, 0x303e, 0x305f, 0x3060, 0x3068, 0x3069, 0x306a,
59 0x306b, 0x306c, 0x306d, 0x30dd, 0x30de, 0x3108, 0x3109, 0x3232,
60 0x3233, 0x32a1, 0x32a2, 0x32ac, 0x32ad, 0x35a9, 0x35aa, 0x35fe,
61 0x35ff, 0x365e, 0x365f, 0x366c, 0x366d, 0x36ff, 0x3700, 0x37d9,
62 0x37da, 0x38f8, 0x38f9, 0x3969, 0x396a, 0x3cde, 0x3cdf, 0x3de6,
63 0x3de7, 0x3fbd, 0x3fbe, 0x4031, 0x4032, 0x4035, 0x4036, 0x4060,
64 0x4061, 0x4158, 0x4159, 0x42cd, 0x42ce, 0x42e1, 0x42e2, 0x43a2,
65 0x43a3, 0x43a7, 0x43a8, 0x43f9, 0x43fa, 0x4409, 0x440a, 0x45c2,
66 0x45c3, 0x45f4, 0x45f5, 0x45f6, 0x45f7, 0x45fa, 0x45fb, 0x45fb,
67 0x45fc, 0x460f, 0x4610, 0x4612, 0x4613, 0x4628, 0x4629, 0x48e7,
68 0x48e8, 0x490e, 0x490f, 0x497d, 0x497e, 0x4a11, 0x4a12, 0x4a62,
69 0x4a63, 0x82bc,
70 0x82bd, 0x82bd, 0x82be, 0x82be, 0x82bf, 0x82cb,
71 0x82cc, 0x82cc, 0x82cd, 0x82d1, 0x82d2, 0x82d8, 0x82d9, 0x82dc,
72 0x82dd, 0x82e0, 0x82e1, 0x82e8, 0x82e9, 0x82ef, 0x82f0, 0x82ff,
73 0x8300, 0x830d,
74 0x830e, 0x93d4, 0x93d5, 0x9420, 0x9421, 0x943b,
75 0x943c, 0x948c, 0x948d, 0x9495, 0x9496, 0x94af, 0x94b0, 0x94b0,
76 0x94b1, 0x94b1, 0x94b2, 0x94b4, 0x94b5, 0x94ba, 0x94bb, 0x94bb,
77 0x94bc, 0x94bd, 0x94be, 0x98c3, 0x98c4, 0x98c4, 0x98c5, 0x98c8,
78 0x98c9, 0x98c9, 0x98ca, 0x98ca, 0x98cb, 0x98cb, 0x98cc, 0x9960,
79 0x9961, 0x99e1, 0x99e2, 0x99fb
80 };
81
82 static const unsigned short gb18030uni_uni2charset_ranges[412] = {
83 0x0080, 0x00a3, 0x00a5, 0x00a6, 0x00a9, 0x00af, 0x00b2, 0x00b6,
84 0x00b8, 0x00d6, 0x00d8, 0x00df, 0x00e2, 0x00e7, 0x00eb, 0x00eb,
85 0x00ee, 0x00f1, 0x00f4, 0x00f6, 0x00f8, 0x00f8, 0x00fb, 0x00fb,
86 0x00fd, 0x0100, 0x0102, 0x0112, 0x0114, 0x011a, 0x011c, 0x012a,
87 0x012c, 0x0143, 0x0145, 0x0147, 0x0149, 0x014c, 0x014e, 0x016a,
88 0x016c, 0x01cd, 0x01cf, 0x01cf, 0x01d1, 0x01d1, 0x01d3, 0x01d3,
89 0x01d5, 0x01d5, 0x01d7, 0x01d7, 0x01d9, 0x01d9, 0x01db, 0x01db,
90 0x01dd, 0x01f8, 0x01fa, 0x0250, 0x0252, 0x0260, 0x0262, 0x02c6,
91 0x02c8, 0x02c8, 0x02cc, 0x02d8, 0x02da, 0x0390, 0x03a2, 0x03a2,
92 0x03aa, 0x03b0, 0x03c2, 0x03c2, 0x03ca, 0x0400, 0x0402, 0x040f,
93 0x0450, 0x0450, 0x0452, 0x200f, 0x2011, 0x2012, 0x2017, 0x2017,
94 0x201a, 0x201b, 0x201e, 0x2024, 0x2027, 0x202f, 0x2031, 0x2031,
95 0x2034, 0x2034, 0x2036, 0x203a, 0x203c, 0x20ab, 0x20ad, 0x2102,
96 0x2104, 0x2104, 0x2106, 0x2108, 0x210a, 0x2115, 0x2117, 0x2120,
97 0x2122, 0x215f, 0x216c, 0x216f, 0x217a, 0x218f, 0x2194, 0x2195,
98 0x219a, 0x2207, 0x2209, 0x220e, 0x2210, 0x2210, 0x2212, 0x2214,
99 0x2216, 0x2219, 0x221b, 0x221c, 0x2221, 0x2222, 0x2224, 0x2224,
100 0x2226, 0x2226, 0x222c, 0x222d, 0x222f, 0x2233, 0x2238, 0x223c,
101 0x223e, 0x2247, 0x2249, 0x224b, 0x224d, 0x2251, 0x2253, 0x225f,
102 0x2262, 0x2263, 0x2268, 0x226d, 0x2270, 0x2294, 0x2296, 0x2298,
103 0x229a, 0x22a4, 0x22a6, 0x22be, 0x22c0, 0x2311, 0x2313, 0x245f,
104 0x246a, 0x2473, 0x249c, 0x24ff, 0x254c, 0x254f, 0x2574, 0x2580,
105 0x2590, 0x2592, 0x2596, 0x259f, 0x25a2, 0x25b1, 0x25b4, 0x25bb,
106 0x25be, 0x25c5, 0x25c8, 0x25ca, 0x25cc, 0x25cd, 0x25d0, 0x25e1,
107 0x25e6, 0x2604, 0x2607, 0x2608, 0x260a, 0x263f, 0x2641, 0x2641,
108 0x2643, 0x2e80, 0x2e82, 0x2e83, 0x2e85, 0x2e87, 0x2e89, 0x2e8a,
109 0x2e8d, 0x2e96, 0x2e98, 0x2ea6, 0x2ea8, 0x2ea9, 0x2eab, 0x2ead,
110 0x2eaf, 0x2eb2, 0x2eb4, 0x2eb5, 0x2eb8, 0x2eba, 0x2ebc, 0x2ec9,
111 0x2ecb, 0x2fef, 0x2ffc, 0x2fff, 0x3004, 0x3004, 0x3018, 0x301c,
112 0x301f, 0x3020, 0x302a, 0x303d, 0x303f, 0x3040, 0x3094, 0x309a,
113 0x309f, 0x30a0, 0x30f7, 0x30fb, 0x30ff, 0x3104, 0x312a, 0x321f,
114 0x322a, 0x3230, 0x3232, 0x32a2, 0x32a4, 0x338d, 0x3390, 0x339b,
115 0x339f, 0x33a0, 0x33a2, 0x33c3, 0x33c5, 0x33cd, 0x33cf, 0x33d0,
116 0x33d3, 0x33d4, 0x33d6, 0x3446, 0x3448, 0x3472, 0x3474, 0x359d,
117 0x359f, 0x360d, 0x360f, 0x3619, 0x361b, 0x3917, 0x3919, 0x396d,
118 0x396f, 0x39ce, 0x39d1, 0x39de, 0x39e0, 0x3a72, 0x3a74, 0x3b4d,
119 0x3b4f, 0x3c6d, 0x3c6f, 0x3cdf, 0x3ce1, 0x4055, 0x4057, 0x415e,
120 0x4160, 0x4336, 0x4338, 0x43ab, 0x43ad, 0x43b0, 0x43b2, 0x43dc,
121 0x43de, 0x44d5, 0x44d7, 0x464b, 0x464d, 0x4660, 0x4662, 0x4722,
122 0x4724, 0x4728, 0x472a, 0x477b, 0x477d, 0x478c, 0x478e, 0x4946,
123 0x4948, 0x4979, 0x497b, 0x497c, 0x497e, 0x4981, 0x4984, 0x4984,
124 0x4987, 0x499a, 0x499c, 0x499e, 0x49a0, 0x49b5, 0x49b8, 0x4c76,
125 0x4c78, 0x4c9e, 0x4ca4, 0x4d12, 0x4d1a, 0x4dad, 0x4daf, 0x4dff,
126 0x9fa6, 0xd7ff,
127 0xe76c, 0xe76c, 0xe7c8, 0xe7c8, 0xe7e7, 0xe7f3,
128 0xe815, 0xe815, 0xe819, 0xe81d, 0xe81f, 0xe825, 0xe827, 0xe82a,
129 0xe82d, 0xe830, 0xe833, 0xe83a, 0xe83c, 0xe842, 0xe844, 0xe853,
130 0xe856, 0xe863,
131 0xe865, 0xf92b, 0xf92d, 0xf978, 0xf97a, 0xf994,
132 0xf996, 0xf9e6, 0xf9e8, 0xf9f0, 0xf9f2, 0xfa0b, 0xfa10, 0xfa10,
133 0xfa12, 0xfa12, 0xfa15, 0xfa17, 0xfa19, 0xfa1e, 0xfa22, 0xfa22,
134 0xfa25, 0xfa26, 0xfa2a, 0xfe2f, 0xfe32, 0xfe32, 0xfe45, 0xfe48,
135 0xfe53, 0xfe53, 0xfe58, 0xfe58, 0xfe67, 0xfe67, 0xfe6c, 0xff00,
136 0xff5f, 0xffdf, 0xffe6, 0xffff
137 };
138
139 static const unsigned short gb18030uni_ranges[206] = {
140 128, 129, 131, 133, 134, 135, 137, 140,
141 142, 144, 145, 147, 148, 149, 150, 151,
142 152, 153, 154, 155, 156, 157, 158, 159,
143 160, 161, 162, 163, 164, 165, 166, 167,
144 168, 171, 172, 189, 196, 213, 220, 221,
145 285, 286, 287, 291, 293, 295, 297, 298,
146 300, 301, 302, 303, 304, 305, 306, 307,
147 308, 320, 330, 334, 338, 339, 340, 341,
148 342, 343, 347, 348, 349, 354, 355, 359,
149 360, 361, 362, 363, 365, 369, 371, 372,
150 373, 374, 375, 376, 386, 426, 502, 538,
151 553, 556, 558, 560, 562, 564, 565, 567,
152 571, 573, 574, 575, 576, 577, 578, 579,
153 581, 582, 583, 584, 585, 586, 588, 589,
154 590, 602, 606, 625, 627, 636, 637, 720,
155 724, 810, 813, 850, 860, 861, 862, 864,
156 867, 868, 869, 870, 872, 873, 874, 875,
157 876, 877, 878, 879, 880, 882, 883, 884,
158 885, 886, 887, 888, 889, 890, 891, 892,
159 893, 894, 895, 896, 897, 898, 899, 900,
160 901, 902, 903, 905, 907, 908, 909, 911,
161 912, 917, 924, 925, 21827,
162 25775, 25866, 25896,
163 25929, 25932, 25933, 25934, 25936, 25938, 25939, 25940,
164 25942,
165 25943, 25944, 25945, 25946, 25947, 25948, 25952,
166 25953, 25955, 25956, 25959, 25961, 25964, 25966, 25984,
167 25994, 25998, 26012, 26016, 26110, 26116
168 };
169
170 static int
gb18030uni_mbtowc(conv_t conv,ucs4_t * pwc,const unsigned char * s,int n)171 gb18030uni_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
172 {
173 unsigned char c1 = s[0];
174 if (c1 >= 0x81 && c1 <= 0x84) {
175 if (n >= 2) {
176 unsigned char c2 = s[1];
177 if (c2 >= 0x30 && c2 <= 0x39) {
178 if (n >= 3) {
179 unsigned char c3 = s[2];
180 if (c3 >= 0x81 && c3 <= 0xfe) {
181 if (n >= 4) {
182 unsigned char c4 = s[3];
183 if (c4 >= 0x30 && c4 <= 0x39) {
184 unsigned int i = (((c1 - 0x81) * 10 + (c2 - 0x30)) * 126 + (c3 - 0x81)) * 10 + (c4 - 0x30);
185 if (i >= 0 && i <= 39419) {
186 unsigned int k1 = 0;
187 unsigned int k2 = 205;
188 while (k1 < k2) {
189 unsigned int k = (k1 + k2) / 2;
190 if (i <= gb18030uni_charset2uni_ranges[2*k+1])
191 k2 = k;
192 else if (i >= gb18030uni_charset2uni_ranges[2*k+2])
193 k1 = k + 1;
194 else
195 return RET_ILSEQ;
196 }
197 {
198 unsigned int diff = gb18030uni_ranges[k1];
199 *pwc = (ucs4_t) (i + diff);
200 return 4;
201 }
202 }
203 }
204 return RET_ILSEQ;
205 }
206 return RET_TOOFEW(0);
207 }
208 return RET_ILSEQ;
209 }
210 return RET_TOOFEW(0);
211 }
212 return RET_ILSEQ;
213 }
214 return RET_TOOFEW(0);
215 }
216 return RET_ILSEQ;
217 }
218
219 static int
gb18030uni_wctomb(conv_t conv,unsigned char * r,ucs4_t wc,int n)220 gb18030uni_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
221 {
222 if (n >= 4) {
223 unsigned int i = wc;
224 if (i >= 0x0080 && i <= 0xffff) {
225 unsigned int k1 = 0;
226 unsigned int k2 = 205;
227 while (k1 < k2) {
228 unsigned int k = (k1 + k2) / 2;
229 if (i <= gb18030uni_uni2charset_ranges[2*k+1])
230 k2 = k;
231 else if (i >= gb18030uni_uni2charset_ranges[2*k+2])
232 k1 = k + 1;
233 else
234 return RET_ILUNI;
235 }
236 {
237 unsigned int diff = gb18030uni_ranges[k1];
238 i -= diff;
239 r[3] = (i % 10) + 0x30; i = i / 10;
240 r[2] = (i % 126) + 0x81; i = i / 126;
241 r[1] = (i % 10) + 0x30; i = i / 10;
242 r[0] = i + 0x81;
243 return 4;
244 }
245 }
246 return RET_ILUNI;
247 }
248 return RET_TOOSMALL;
249 }
250