1 /* 2 * Copyright (C) 1999-2002 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21 /* 22 * C99 23 * This is ASCII with \uXXXX and \UXXXXXXXX escape sequences, denoting Unicode 24 * characters. See ISO/IEC 9899:1999, section 6.4.3. 25 * The treatment of control characters in the range U+0080..U+009F is not 26 * specified; we pass them through unmodified. 27 */ 28 29 static int 30 c99_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 31 { 32 unsigned char c; 33 ucs4_t wc; 34 int i; 35 36 c = s[0]; 37 if (c < 0xa0) { 38 if (c != '\\') { 39 *pwc = c; 40 return 1; 41 } 42 if (n < 2) 43 return RET_TOOFEW(0); 44 c = s[1]; 45 if (c == 'u') { 46 wc = 0; 47 for (i = 2; i < 6; i++) { 48 if (n <= i) 49 return RET_TOOFEW(0); 50 c = s[i]; 51 if (c >= '0' && c <= '9') 52 c -= '0'; 53 else if (c >= 'A' && c <= 'Z') 54 c -= 'A'-10; 55 else if (c >= 'a' && c <= 'z') 56 c -= 'a'-10; 57 else 58 goto simply_backslash; 59 wc |= (ucs4_t) c << (4 * (5-i)); 60 } 61 if ((wc >= 0x00a0 && !(wc >= 0xd800 && wc < 0xe000)) 62 || wc == 0x0024 || wc == 0x0040 || wc == 0x0060) { 63 *pwc = wc; 64 return 6; 65 } 66 } else if (c == 'U') { 67 wc = 0; 68 for (i = 2; i < 10; i++) { 69 if (n <= i) 70 return RET_TOOFEW(0); 71 c = s[i]; 72 if (c >= '0' && c <= '9') 73 c -= '0'; 74 else if (c >= 'A' && c <= 'Z') 75 c -= 'A'-10; 76 else if (c >= 'a' && c <= 'z') 77 c -= 'a'-10; 78 else 79 goto simply_backslash; 80 wc |= (ucs4_t) c << (4 * (9-i)); 81 } 82 if ((wc >= 0x00a0 && !(wc >= 0xd800 && wc < 0xe000)) 83 || wc == 0x0024 || wc == 0x0040 || wc == 0x0060) { 84 *pwc = wc; 85 return 10; 86 } 87 } else 88 goto simply_backslash; 89 } 90 return RET_ILSEQ; 91 simply_backslash: 92 *pwc = '\\'; 93 return 1; 94 } 95 96 static int 97 c99_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 98 { 99 if (wc < 0xa0) { 100 *r = wc; 101 return 1; 102 } else { 103 int result; 104 unsigned char u; 105 if (wc < 0x10000) { 106 result = 6; 107 u = 'u'; 108 } else { 109 result = 10; 110 u = 'U'; 111 } 112 if (n >= result) { 113 int count; 114 r[0] = '\\'; 115 r[1] = u; 116 r += 2; 117 for (count = result-3; count >= 0; count--) { 118 unsigned int i = (wc >> (4*count)) & 0x0f; 119 *r++ = (i < 10 ? '0'+i : 'a'-10+i); 120 } 121 return result; 122 } else 123 return RET_TOOSMALL; 124 } 125 } 126