xref: /haiku/src/libs/iconv/c99.h (revision 893988af824e65e49e55f517b157db8386e8002b)
1 /*
2  * Copyright (C) 1999-2002 Free Software Foundation, Inc.
3  * This file is part of the GNU LIBICONV Library.
4  *
5  * The GNU LIBICONV Library is free software; you can redistribute it
6  * and/or modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * The GNU LIBICONV Library is distributed in the hope that it will be
11  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public
16  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18  * Fifth Floor, Boston, MA 02110-1301, USA.
19  */
20 
21 /*
22  * C99
23  * This is ASCII with \uXXXX and \UXXXXXXXX escape sequences, denoting Unicode
24  * characters. See ISO/IEC 9899:1999, section 6.4.3.
25  * The treatment of control characters in the range U+0080..U+009F is not
26  * specified; we pass them through unmodified.
27  */
28 
29 static int
30 c99_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
31 {
32   unsigned char c;
33   ucs4_t wc;
34   int i;
35 
36   c = s[0];
37   if (c < 0xa0) {
38     if (c != '\\') {
39       *pwc = c;
40       return 1;
41     }
42     if (n < 2)
43       return RET_TOOFEW(0);
44     c = s[1];
45     if (c == 'u') {
46       wc = 0;
47       for (i = 2; i < 6; i++) {
48         if (n <= i)
49           return RET_TOOFEW(0);
50         c = s[i];
51         if (c >= '0' && c <= '9')
52           c -= '0';
53         else if (c >= 'A' && c <= 'Z')
54           c -= 'A'-10;
55         else if (c >= 'a' && c <= 'z')
56           c -= 'a'-10;
57         else
58           goto simply_backslash;
59         wc |= (ucs4_t) c << (4 * (5-i));
60       }
61       if ((wc >= 0x00a0 && !(wc >= 0xd800 && wc < 0xe000))
62           || wc == 0x0024 || wc == 0x0040 || wc == 0x0060) {
63         *pwc = wc;
64         return 6;
65       }
66     } else if (c == 'U') {
67       wc = 0;
68       for (i = 2; i < 10; i++) {
69         if (n <= i)
70           return RET_TOOFEW(0);
71         c = s[i];
72         if (c >= '0' && c <= '9')
73           c -= '0';
74         else if (c >= 'A' && c <= 'Z')
75           c -= 'A'-10;
76         else if (c >= 'a' && c <= 'z')
77           c -= 'a'-10;
78         else
79           goto simply_backslash;
80         wc |= (ucs4_t) c << (4 * (9-i));
81       }
82       if ((wc >= 0x00a0 && !(wc >= 0xd800 && wc < 0xe000))
83           || wc == 0x0024 || wc == 0x0040 || wc == 0x0060) {
84         *pwc = wc;
85         return 10;
86       }
87     } else
88       goto simply_backslash;
89   }
90   return RET_ILSEQ;
91 simply_backslash:
92   *pwc = '\\';
93   return 1;
94 }
95 
96 static int
97 c99_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
98 {
99   if (wc < 0xa0) {
100     *r = wc;
101     return 1;
102   } else {
103     int result;
104     unsigned char u;
105     if (wc < 0x10000) {
106       result = 6;
107       u = 'u';
108     } else {
109       result = 10;
110       u = 'U';
111     }
112     if (n >= result) {
113       int count;
114       r[0] = '\\';
115       r[1] = u;
116       r += 2;
117       for (count = result-3; count >= 0; count--) {
118         unsigned int i = (wc >> (4*count)) & 0x0f;
119         *r++ = (i < 10 ? '0'+i : 'a'-10+i);
120       }
121       return result;
122     } else
123       return RET_TOOSMALL;
124   }
125 }
126