1 /* 2 * Copyright (C) 1999-2002, 2004-2007 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21 /* This file defines all the converters. */ 22 23 24 /* Our own notion of wide character, as UCS-4, according to ISO-10646-1. */ 25 typedef unsigned int ucs4_t; 26 27 /* State used by a conversion. 0 denotes the initial state. */ 28 typedef unsigned int state_t; 29 30 /* iconv_t is an opaque type. This is the real iconv_t type. */ 31 typedef struct conv_struct * conv_t; 32 33 /* 34 * Data type for conversion multibyte -> unicode 35 */ 36 struct mbtowc_funcs { 37 int (*xxx_mbtowc) (conv_t conv, ucs4_t *pwc, unsigned char const *s, int n); 38 /* 39 * int xxx_mbtowc (conv_t conv, ucs4_t *pwc, unsigned char const *s, int n) 40 * converts the byte sequence starting at s to a wide character. Up to n bytes 41 * are available at s. n is >= 1. 42 * Result is number of bytes consumed (if a wide character was read), 43 * or -1 if invalid, or -2 if n too small, or -2-(number of bytes consumed) 44 * if only a shift sequence was read. 45 */ 46 int (*xxx_flushwc) (conv_t conv, ucs4_t *pwc); 47 /* 48 * int xxx_flushwc (conv_t conv, ucs4_t *pwc) 49 * returns to the initial state and stores the pending wide character, if any. 50 * Result is 1 (if a wide character was read) or 0 if none was pending. 51 */ 52 }; 53 54 /* Return code if invalid. (xxx_mbtowc) */ 55 #define RET_ILSEQ -1 56 /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */ 57 #define RET_TOOFEW(n) (-2-(n)) 58 59 /* 60 * Data type for conversion unicode -> multibyte 61 */ 62 struct wctomb_funcs { 63 int (*xxx_wctomb) (conv_t conv, unsigned char *r, ucs4_t wc, int n); 64 /* 65 * int xxx_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) 66 * converts the wide character wc to the character set xxx, and stores the 67 * result beginning at r. Up to n bytes may be written at r. n is >= 1. 68 * Result is number of bytes written, or -1 if invalid, or -2 if n too small. 69 */ 70 int (*xxx_reset) (conv_t conv, unsigned char *r, int n); 71 /* 72 * int xxx_reset (conv_t conv, unsigned char *r, int n) 73 * stores a shift sequences returning to the initial state beginning at r. 74 * Up to n bytes may be written at r. n is >= 0. 75 * Result is number of bytes written, or -2 if n too small. 76 */ 77 }; 78 79 /* Return code if invalid. (xxx_wctomb) */ 80 #define RET_ILUNI -1 81 /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */ 82 #define RET_TOOSMALL -2 83 84 /* 85 * Contents of a conversion descriptor. 86 */ 87 struct conv_struct { 88 struct loop_funcs lfuncs; 89 /* Input (conversion multibyte -> unicode) */ 90 int iindex; 91 struct mbtowc_funcs ifuncs; 92 state_t istate; 93 /* Output (conversion unicode -> multibyte) */ 94 int oindex; 95 struct wctomb_funcs ofuncs; 96 int oflags; 97 state_t ostate; 98 /* Operation flags */ 99 int transliterate; 100 int discard_ilseq; 101 #ifndef LIBICONV_PLUG 102 struct iconv_fallbacks fallbacks; 103 struct iconv_hooks hooks; 104 #endif 105 }; 106 107 /* 108 * Include all the converters. 109 */ 110 111 #include "ascii.h" 112 113 /* General multi-byte encodings */ 114 #include "utf8.h" 115 #include "ucs2.h" 116 #include "ucs2be.h" 117 #include "ucs2le.h" 118 #include "ucs4.h" 119 #include "ucs4be.h" 120 #include "ucs4le.h" 121 #include "utf16.h" 122 #include "utf16be.h" 123 #include "utf16le.h" 124 #include "utf32.h" 125 #include "utf32be.h" 126 #include "utf32le.h" 127 #include "utf7.h" 128 #include "ucs2internal.h" 129 #include "ucs2swapped.h" 130 #include "ucs4internal.h" 131 #include "ucs4swapped.h" 132 #include "c99.h" 133 #include "java.h" 134 135 /* 8-bit encodings */ 136 #include "iso8859_1.h" 137 #include "iso8859_2.h" 138 #include "iso8859_3.h" 139 #include "iso8859_4.h" 140 #include "iso8859_5.h" 141 #include "iso8859_6.h" 142 #include "iso8859_7.h" 143 #include "iso8859_8.h" 144 #include "iso8859_9.h" 145 #include "iso8859_10.h" 146 #include "iso8859_11.h" 147 #include "iso8859_13.h" 148 #include "iso8859_14.h" 149 #include "iso8859_15.h" 150 #include "iso8859_16.h" 151 #include "koi8_r.h" 152 #include "koi8_u.h" 153 #include "koi8_ru.h" 154 #include "cp1250.h" 155 #include "cp1251.h" 156 #include "cp1252.h" 157 #include "cp1253.h" 158 #include "cp1254.h" 159 #include "cp1255.h" 160 #include "cp1256.h" 161 #include "cp1257.h" 162 #include "cp1258.h" 163 #include "cp850.h" 164 #include "cp862.h" 165 #include "cp866.h" 166 #include "mac_roman.h" 167 #include "mac_centraleurope.h" 168 #include "mac_iceland.h" 169 #include "mac_croatian.h" 170 #include "mac_romania.h" 171 #include "mac_cyrillic.h" 172 #include "mac_ukraine.h" 173 #include "mac_greek.h" 174 #include "mac_turkish.h" 175 #include "mac_hebrew.h" 176 #include "mac_arabic.h" 177 #include "mac_thai.h" 178 #include "hp_roman8.h" 179 #include "nextstep.h" 180 #include "armscii_8.h" 181 #include "georgian_academy.h" 182 #include "georgian_ps.h" 183 #include "koi8_t.h" 184 #include "pt154.h" 185 #include "rk1048.h" 186 #include "mulelao.h" 187 #include "cp1133.h" 188 #include "tis620.h" 189 #include "cp874.h" 190 #include "viscii.h" 191 #include "tcvn.h" 192 193 /* CJK character sets [CCS = coded character set] [CJKV.INF chapter 3] */ 194 195 typedef struct { 196 unsigned short indx; /* index into big table */ 197 unsigned short used; /* bitmask of used entries */ 198 } Summary16; 199 200 #include "iso646_jp.h" 201 #include "jisx0201.h" 202 #include "jisx0208.h" 203 #include "jisx0212.h" 204 205 #include "iso646_cn.h" 206 #include "gb2312.h" 207 #include "isoir165.h" 208 /*#include "gb12345.h"*/ 209 #include "gbk.h" 210 #include "cns11643.h" 211 #include "big5.h" 212 213 #include "ksc5601.h" 214 #include "johab_hangul.h" 215 216 /* CJK encodings [CES = character encoding scheme] [CJKV.INF chapter 4] */ 217 218 #include "euc_jp.h" 219 #include "sjis.h" 220 #include "cp932.h" 221 #include "iso2022_jp.h" 222 #include "iso2022_jp1.h" 223 #include "iso2022_jp2.h" 224 225 #include "euc_cn.h" 226 #include "ces_gbk.h" 227 #include "cp936.h" 228 #include "gb18030.h" 229 #include "iso2022_cn.h" 230 #include "iso2022_cnext.h" 231 #include "hz.h" 232 #include "euc_tw.h" 233 #include "ces_big5.h" 234 #include "cp950.h" 235 #include "big5hkscs1999.h" 236 #include "big5hkscs2001.h" 237 #include "big5hkscs2004.h" 238 239 #include "euc_kr.h" 240 #include "cp949.h" 241 #include "johab.h" 242 #include "iso2022_kr.h" 243 244 /* Encodings used by system dependent locales. */ 245 246 #ifdef USE_AIX 247 #include "cp856.h" 248 #include "cp922.h" 249 #include "cp943.h" 250 #include "cp1046.h" 251 #include "cp1124.h" 252 #include "cp1129.h" 253 #include "cp1161.h" 254 #include "cp1162.h" 255 #include "cp1163.h" 256 #endif 257 258 #ifdef USE_OSF1 259 #include "dec_kanji.h" 260 #include "dec_hanyu.h" 261 #endif 262 263 #ifdef USE_DOS 264 #include "cp437.h" 265 #include "cp737.h" 266 #include "cp775.h" 267 #include "cp852.h" 268 #include "cp853.h" 269 #include "cp855.h" 270 #include "cp857.h" 271 #include "cp858.h" 272 #include "cp860.h" 273 #include "cp861.h" 274 #include "cp863.h" 275 #include "cp864.h" 276 #include "cp865.h" 277 #include "cp869.h" 278 #include "cp1125.h" 279 #endif 280 281 #ifdef USE_EXTRA 282 #include "euc_jisx0213.h" 283 #include "shift_jisx0213.h" 284 #include "iso2022_jp3.h" 285 #include "big5_2003.h" 286 #include "tds565.h" 287 #include "atarist.h" 288 #include "riscos1.h" 289 #endif 290 291