xref: /haiku/src/libs/iconv/shift_jisx0213.h (revision aef5731f38da6f7b913e0f64acd8a40555491ce5)
1 /*
2  * Copyright (C) 1999-2002 Free Software Foundation, Inc.
3  * This file is part of the GNU LIBICONV Library.
4  *
5  * The GNU LIBICONV Library is free software; you can redistribute it
6  * and/or modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * The GNU LIBICONV Library is distributed in the hope that it will be
11  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public
16  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18  * Fifth Floor, Boston, MA 02110-1301, USA.
19  */
20 
21 /*
22  * SHIFT_JISX0213
23  */
24 
25 /* The structure of Shift_JISX0213 is as follows:
26 
27    0x00..0x7F: ISO646-JP, an ASCII variant
28 
29    0x{A1..DF}: JISX0201 Katakana.
30 
31    0x{81..9F,E0..EF}{40..7E,80..FC}: JISX0213 plane 1.
32 
33    0x{F0..FC}{40..7E,80..FC}: JISX0213 plane 2, with irregular row mapping.
34 
35    Note that some JISX0213 characters are not contained in Unicode 3.2
36    and are therefore best represented as sequences of Unicode characters.
37 */
38 
39 #include "jisx0213.h"
40 #include "flushwc.h"
41 
42 static int
shift_jisx0213_mbtowc(conv_t conv,ucs4_t * pwc,const unsigned char * s,int n)43 shift_jisx0213_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
44 {
45   ucs4_t last_wc = conv->istate;
46   if (last_wc) {
47     /* Output the buffered character. */
48     conv->istate = 0;
49     *pwc = last_wc;
50     return 0; /* Don't advance the input pointer. */
51   } else {
52     unsigned char c = *s;
53     if (c < 0x80) {
54       /* Plain ISO646-JP character. */
55       if (c == 0x5c)
56         *pwc = (ucs4_t) 0x00a5;
57       else if (c == 0x7e)
58         *pwc = (ucs4_t) 0x203e;
59       else
60         *pwc = (ucs4_t) c;
61       return 1;
62     } else if (c >= 0xa1 && c <= 0xdf) {
63       *pwc = c + 0xfec0;
64       return 1;
65     } else {
66       if ((c >= 0x81 && c <= 0x9f) || (c >= 0xe0 && c <= 0xfc)) {
67         /* Two byte character. */
68         if (n >= 2) {
69           unsigned char c2 = s[1];
70           if ((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfc)) {
71             unsigned int c1;
72             ucs4_t wc;
73             /* Convert to row and column. */
74             if (c < 0xe0)
75               c -= 0x81;
76             else
77               c -= 0xc1;
78             if (c2 < 0x80)
79               c2 -= 0x40;
80             else
81               c2 -= 0x41;
82             /* Now 0 <= c <= 0x3b, 0 <= c2 <= 0xbb. */
83             c1 = 2 * c;
84             if (c2 >= 0x5e)
85               c2 -= 0x5e, c1++;
86             c2 += 0x21;
87             if (c1 >= 0x5e) {
88               /* Handling of JISX 0213 plane 2 rows. */
89               if (c1 >= 0x67)
90                 c1 += 230;
91               else if (c1 >= 0x63 || c1 == 0x5f)
92                 c1 += 168;
93               else
94                 c1 += 162;
95             }
96             wc = jisx0213_to_ucs4(0x121+c1,c2);
97             if (wc) {
98               if (wc < 0x80) {
99                 /* It's a combining character. */
100                 ucs4_t wc1 = jisx0213_to_ucs_combining[wc - 1][0];
101                 ucs4_t wc2 = jisx0213_to_ucs_combining[wc - 1][1];
102                 /* We cannot output two Unicode characters at once. So,
103                    output the first character and buffer the second one. */
104                 *pwc = wc1;
105                 conv->istate = wc2;
106               } else
107                 *pwc = wc;
108               return 2;
109             }
110           }
111         } else
112           return RET_TOOFEW(0);
113       }
114       return RET_ILSEQ;
115     }
116   }
117 }
118 
119 #define shift_jisx0213_flushwc normal_flushwc
120 
121 /* Composition tables for each of the relevant combining characters.  */
122 static const struct { unsigned short base; unsigned short composed; } shift_jisx0213_comp_table_data[] = {
123 #define shift_jisx0213_comp_table02e5_idx 0
124 #define shift_jisx0213_comp_table02e5_len 1
125   { 0x8684, 0x8685 }, /* 0x12B65 = 0x12B64 U+02E5 */
126 #define shift_jisx0213_comp_table02e9_idx (shift_jisx0213_comp_table02e5_idx+shift_jisx0213_comp_table02e5_len)
127 #define shift_jisx0213_comp_table02e9_len 1
128   { 0x8680, 0x8686 }, /* 0x12B66 = 0x12B60 U+02E9 */
129 #define shift_jisx0213_comp_table0300_idx (shift_jisx0213_comp_table02e9_idx+shift_jisx0213_comp_table02e9_len)
130 #define shift_jisx0213_comp_table0300_len 5
131   { 0x857b, 0x8663 }, /* 0x12B44 = 0x1295C U+0300 */
132   { 0x8657, 0x8667 }, /* 0x12B48 = 0x12B38 U+0300 */
133   { 0x8656, 0x8669 }, /* 0x12B4A = 0x12B37 U+0300 */
134   { 0x864f, 0x866b }, /* 0x12B4C = 0x12B30 U+0300 */
135   { 0x8662, 0x866d }, /* 0x12B4E = 0x12B43 U+0300 */
136 #define shift_jisx0213_comp_table0301_idx (shift_jisx0213_comp_table0300_idx+shift_jisx0213_comp_table0300_len)
137 #define shift_jisx0213_comp_table0301_len 4
138   { 0x8657, 0x8668 }, /* 0x12B49 = 0x12B38 U+0301 */
139   { 0x8656, 0x866a }, /* 0x12B4B = 0x12B37 U+0301 */
140   { 0x864f, 0x866c }, /* 0x12B4D = 0x12B30 U+0301 */
141   { 0x8662, 0x866e }, /* 0x12B4F = 0x12B43 U+0301 */
142 #define shift_jisx0213_comp_table309a_idx (shift_jisx0213_comp_table0301_idx+shift_jisx0213_comp_table0301_len)
143 #define shift_jisx0213_comp_table309a_len 14
144   { 0x82a9, 0x82f5 }, /* 0x12477 = 0x1242B U+309A */
145   { 0x82ab, 0x82f6 }, /* 0x12478 = 0x1242D U+309A */
146   { 0x82ad, 0x82f7 }, /* 0x12479 = 0x1242F U+309A */
147   { 0x82af, 0x82f8 }, /* 0x1247A = 0x12431 U+309A */
148   { 0x82b1, 0x82f9 }, /* 0x1247B = 0x12433 U+309A */
149   { 0x834a, 0x8397 }, /* 0x12577 = 0x1252B U+309A */
150   { 0x834c, 0x8398 }, /* 0x12578 = 0x1252D U+309A */
151   { 0x834e, 0x8399 }, /* 0x12579 = 0x1252F U+309A */
152   { 0x8350, 0x839a }, /* 0x1257A = 0x12531 U+309A */
153   { 0x8352, 0x839b }, /* 0x1257B = 0x12533 U+309A */
154   { 0x835a, 0x839c }, /* 0x1257C = 0x1253B U+309A */
155   { 0x8363, 0x839d }, /* 0x1257D = 0x12544 U+309A */
156   { 0x8367, 0x839e }, /* 0x1257E = 0x12548 U+309A */
157   { 0x83f3, 0x83f6 }, /* 0x12678 = 0x12675 U+309A */
158 };
159 
160 static int
shift_jisx0213_wctomb(conv_t conv,unsigned char * r,ucs4_t wc,int n)161 shift_jisx0213_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
162 {
163   int count = 0;
164   unsigned short lasttwo = conv->ostate;
165 
166   if (lasttwo) {
167     /* Attempt to combine the last character with this one. */
168     unsigned int idx;
169     unsigned int len;
170 
171     if (wc == 0x02e5)
172       idx = shift_jisx0213_comp_table02e5_idx,
173       len = shift_jisx0213_comp_table02e5_len;
174     else if (wc == 0x02e9)
175       idx = shift_jisx0213_comp_table02e9_idx,
176       len = shift_jisx0213_comp_table02e9_len;
177     else if (wc == 0x0300)
178       idx = shift_jisx0213_comp_table0300_idx,
179       len = shift_jisx0213_comp_table0300_len;
180     else if (wc == 0x0301)
181       idx = shift_jisx0213_comp_table0301_idx,
182       len = shift_jisx0213_comp_table0301_len;
183     else if (wc == 0x309a)
184       idx = shift_jisx0213_comp_table309a_idx,
185       len = shift_jisx0213_comp_table309a_len;
186     else
187       goto not_combining;
188 
189     do
190       if (shift_jisx0213_comp_table_data[idx].base == lasttwo)
191         break;
192     while (++idx, --len > 0);
193 
194     if (len > 0) {
195       /* Output the combined character. */
196       if (n >= 2) {
197         lasttwo = shift_jisx0213_comp_table_data[idx].composed;
198         r[0] = (lasttwo >> 8) & 0xff;
199         r[1] = lasttwo & 0xff;
200         conv->ostate = 0;
201         return 2;
202       } else
203         return RET_TOOSMALL;
204     }
205 
206   not_combining:
207     /* Output the buffered character. */
208     if (n < 2)
209       return RET_TOOSMALL;
210     r[0] = (lasttwo >> 8) & 0xff;
211     r[1] = lasttwo & 0xff;
212     r += 2;
213     count = 2;
214   }
215 
216   if (wc < 0x80 && wc != 0x5c && wc != 0x7e) {
217     /* Plain ISO646-JP character. */
218     if (n > count) {
219       r[0] = (unsigned char) wc;
220       conv->ostate = 0;
221       return count+1;
222     } else
223       return RET_TOOSMALL;
224   } else if (wc == 0x00a5) {
225     if (n > count) {
226       r[0] = 0x5c;
227       conv->ostate = 0;
228       return count+1;
229     } else
230       return RET_TOOSMALL;
231   } else if (wc == 0x203e) {
232     if (n > count) {
233       r[0] = 0x7e;
234       conv->ostate = 0;
235       return count+1;
236     } else
237       return RET_TOOSMALL;
238   } else if (wc >= 0xff61 && wc <= 0xff9f) {
239     /* Half-width katakana. */
240     if (n > count) {
241       r[0] = wc - 0xfec0;
242       conv->ostate = 0;
243       return count+1;
244     } else
245       return RET_TOOSMALL;
246   } else {
247     unsigned int s1, s2;
248     unsigned short jch = ucs4_to_jisx0213(wc);
249     if (jch != 0) {
250       /* Convert it to shifted representation. */
251       s1 = jch >> 8;
252       s2 = jch & 0x7f;
253       s1 -= 0x21;
254       s2 -= 0x21;
255       if (s1 >= 0x5e) {
256         /* Handling of JISX 0213 plane 2 rows. */
257         if (s1 >= 0xcd) /* rows 0x26E..0x27E */
258           s1 -= 102;
259         else if (s1 >= 0x8b || s1 == 0x87) /* rows 0x228, 0x22C..0x22F */
260           s1 -= 40;
261         else /* rows 0x221, 0x223..0x225 */
262           s1 -= 34;
263         /* Now 0x5e <= s1 <= 0x77. */
264       }
265       if (s1 & 1)
266         s2 += 0x5e;
267       s1 = s1 >> 1;
268       if (s1 < 0x1f)
269         s1 += 0x81;
270       else
271         s1 += 0xc1;
272       if (s2 < 0x3f)
273         s2 += 0x40;
274       else
275         s2 += 0x41;
276       if (jch & 0x0080) {
277         /* A possible match in comp_table_data. We have to buffer it. */
278         /* We know it's a JISX 0213 plane 1 character. */
279         if (jch & 0x8000) abort();
280         conv->ostate = (s1 << 8) | s2;
281         return count+0;
282       }
283       /* Output the shifted representation. */
284       if (n >= count+2) {
285         r[0] = s1;
286         r[1] = s2;
287         conv->ostate = 0;
288         return count+2;
289       } else
290         return RET_TOOSMALL;
291     }
292     return RET_ILUNI;
293   }
294 }
295 
296 static int
shift_jisx0213_reset(conv_t conv,unsigned char * r,int n)297 shift_jisx0213_reset (conv_t conv, unsigned char *r, int n)
298 {
299   state_t lasttwo = conv->ostate;
300 
301   if (lasttwo) {
302     if (n < 2)
303       return RET_TOOSMALL;
304     r[0] = (lasttwo >> 8) & 0xff;
305     r[1] = lasttwo & 0xff;
306     /* conv->ostate = 0; will be done by the caller */
307     return 2;
308   } else
309     return 0;
310 }
311