1 /* 2 * Copyright (C) 1999-2001 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21 /* 22 * UTF-32 23 */ 24 25 /* Specification: Unicode 3.1 Standard Annex #19 */ 26 27 /* Here we accept FFFE0000/0000FEFF marks as endianness indicators 28 everywhere in the stream, not just at the beginning. (This is contrary 29 to what #19 D36c specifies, but it allows concatenation of byte 30 sequences to work flawlessly, while disagreeing with #19 behaviour 31 only for strings containing U+FEFF characters, which is quite rare.) 32 The default is big-endian. */ 33 /* The state is 0 if big-endian, 1 if little-endian. */ 34 static int 35 utf32_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 36 { 37 state_t state = conv->istate; 38 int count = 0; 39 for (; n >= 4;) { 40 ucs4_t wc = (state 41 ? s[0] + (s[1] << 8) + (s[2] << 16) + (s[3] << 24) 42 : (s[0] << 24) + (s[1] << 16) + (s[2] << 8) + s[3]); 43 count += 4; 44 if (wc == 0x0000feff) { 45 } else if (wc == 0xfffe0000u) { 46 state ^= 1; 47 } else { 48 if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) { 49 *pwc = wc; 50 conv->istate = state; 51 return count; 52 } else 53 return RET_ILSEQ; 54 } 55 s += 4; n -= 4; 56 } 57 conv->istate = state; 58 return RET_TOOFEW(count); 59 } 60 61 /* We output UTF-32 in big-endian order, with byte-order mark. */ 62 /* The state is 0 at the beginning, 1 after the BOM has been written. */ 63 static int 64 utf32_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 65 { 66 if (wc < 0x110000 && !(wc >= 0xd800 && wc < 0xe000)) { 67 int count = 0; 68 if (!conv->ostate) { 69 if (n >= 4) { 70 r[0] = 0x00; 71 r[1] = 0x00; 72 r[2] = 0xFE; 73 r[3] = 0xFF; 74 r += 4; n -= 4; count += 4; 75 } else 76 return RET_TOOSMALL; 77 } 78 if (wc < 0x110000) { 79 if (n >= 4) { 80 r[0] = 0; 81 r[1] = (unsigned char) (wc >> 16); 82 r[2] = (unsigned char) (wc >> 8); 83 r[3] = (unsigned char) wc; 84 conv->ostate = 1; 85 return count+4; 86 } else 87 return RET_TOOSMALL; 88 } 89 } 90 return RET_ILUNI; 91 } 92