xref: /haiku/src/libs/iconv/converters.h (revision f2b4344867e97c3f4e742a1b4a15e6879644601a)
1 /*
2  * Copyright (C) 1999-2002, 2004-2007 Free Software Foundation, Inc.
3  * This file is part of the GNU LIBICONV Library.
4  *
5  * The GNU LIBICONV Library is free software; you can redistribute it
6  * and/or modify it under the terms of the GNU Library General Public
7  * License as published by the Free Software Foundation; either version 2
8  * of the License, or (at your option) any later version.
9  *
10  * The GNU LIBICONV Library is distributed in the hope that it will be
11  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * Library General Public License for more details.
14  *
15  * You should have received a copy of the GNU Library General Public
16  * License along with the GNU LIBICONV Library; see the file COPYING.LIB.
17  * If not, write to the Free Software Foundation, Inc., 51 Franklin Street,
18  * Fifth Floor, Boston, MA 02110-1301, USA.
19  */
20 
21 /* This file defines all the converters. */
22 
23 
24 /* Our own notion of wide character, as UCS-4, according to ISO-10646-1. */
25 typedef unsigned int ucs4_t;
26 
27 /* State used by a conversion. 0 denotes the initial state. */
28 typedef unsigned int state_t;
29 
30 /* iconv_t is an opaque type. This is the real iconv_t type. */
31 typedef struct conv_struct * conv_t;
32 
33 /*
34  * Data type for conversion multibyte -> unicode
35  */
36 struct mbtowc_funcs {
37   int (*xxx_mbtowc) (conv_t conv, ucs4_t *pwc, unsigned char const *s, int n);
38   /*
39    * int xxx_mbtowc (conv_t conv, ucs4_t *pwc, unsigned char const *s, int n)
40    * converts the byte sequence starting at s to a wide character. Up to n bytes
41    * are available at s. n is >= 1.
42    * Result is number of bytes consumed (if a wide character was read),
43    * or -1 if invalid, or -2 if n too small, or -2-(number of bytes consumed)
44    * if only a shift sequence was read.
45    */
46   int (*xxx_flushwc) (conv_t conv, ucs4_t *pwc);
47   /*
48    * int xxx_flushwc (conv_t conv, ucs4_t *pwc)
49    * returns to the initial state and stores the pending wide character, if any.
50    * Result is 1 (if a wide character was read) or 0 if none was pending.
51    */
52 };
53 
54 /* Return code if invalid. (xxx_mbtowc) */
55 #define RET_ILSEQ      -1
56 /* Return code if only a shift sequence of n bytes was read. (xxx_mbtowc) */
57 #define RET_TOOFEW(n)  (-2-(n))
58 
59 /*
60  * Data type for conversion unicode -> multibyte
61  */
62 struct wctomb_funcs {
63   int (*xxx_wctomb) (conv_t conv, unsigned char *r, ucs4_t wc, int n);
64   /*
65    * int xxx_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n)
66    * converts the wide character wc to the character set xxx, and stores the
67    * result beginning at r. Up to n bytes may be written at r. n is >= 1.
68    * Result is number of bytes written, or -1 if invalid, or -2 if n too small.
69    */
70   int (*xxx_reset) (conv_t conv, unsigned char *r, int n);
71   /*
72    * int xxx_reset (conv_t conv, unsigned char *r, int n)
73    * stores a shift sequences returning to the initial state beginning at r.
74    * Up to n bytes may be written at r. n is >= 0.
75    * Result is number of bytes written, or -2 if n too small.
76    */
77 };
78 
79 /* Return code if invalid. (xxx_wctomb) */
80 #define RET_ILUNI      -1
81 /* Return code if output buffer is too small. (xxx_wctomb, xxx_reset) */
82 #define RET_TOOSMALL   -2
83 
84 /*
85  * Contents of a conversion descriptor.
86  */
87 struct conv_struct {
88   struct loop_funcs lfuncs;
89   /* Input (conversion multibyte -> unicode) */
90   int iindex;
91   struct mbtowc_funcs ifuncs;
92   state_t istate;
93   /* Output (conversion unicode -> multibyte) */
94   int oindex;
95   struct wctomb_funcs ofuncs;
96   int oflags;
97   state_t ostate;
98   /* Operation flags */
99   int transliterate;
100   int discard_ilseq;
101   #ifndef LIBICONV_PLUG
102   struct iconv_fallbacks fallbacks;
103   struct iconv_hooks hooks;
104   #endif
105 };
106 
107 /*
108  * Include all the converters.
109  */
110 
111 #include "ascii.h"
112 
113 /* General multi-byte encodings */
114 #include "utf8.h"
115 #include "ucs2.h"
116 #include "ucs2be.h"
117 #include "ucs2le.h"
118 #include "ucs4.h"
119 #include "ucs4be.h"
120 #include "ucs4le.h"
121 #include "utf16.h"
122 #include "utf16be.h"
123 #include "utf16le.h"
124 #include "utf32.h"
125 #include "utf32be.h"
126 #include "utf32le.h"
127 #include "utf7.h"
128 #include "ucs2internal.h"
129 #include "ucs2swapped.h"
130 #include "ucs4internal.h"
131 #include "ucs4swapped.h"
132 #include "c99.h"
133 #include "java.h"
134 
135 /* 8-bit encodings */
136 #include "iso8859_1.h"
137 #include "iso8859_2.h"
138 #include "iso8859_3.h"
139 #include "iso8859_4.h"
140 #include "iso8859_5.h"
141 #include "iso8859_6.h"
142 #include "iso8859_7.h"
143 #include "iso8859_8.h"
144 #include "iso8859_9.h"
145 #include "iso8859_10.h"
146 #include "iso8859_11.h"
147 #include "iso8859_13.h"
148 #include "iso8859_14.h"
149 #include "iso8859_15.h"
150 #include "iso8859_16.h"
151 #include "koi8_r.h"
152 #include "koi8_u.h"
153 #include "koi8_ru.h"
154 #include "cp1250.h"
155 #include "cp1251.h"
156 #include "cp1252.h"
157 #include "cp1253.h"
158 #include "cp1254.h"
159 #include "cp1255.h"
160 #include "cp1256.h"
161 #include "cp1257.h"
162 #include "cp1258.h"
163 #include "cp850.h"
164 #include "cp862.h"
165 #include "cp866.h"
166 #include "mac_roman.h"
167 #include "mac_centraleurope.h"
168 #include "mac_iceland.h"
169 #include "mac_croatian.h"
170 #include "mac_romania.h"
171 #include "mac_cyrillic.h"
172 #include "mac_ukraine.h"
173 #include "mac_greek.h"
174 #include "mac_turkish.h"
175 #include "mac_hebrew.h"
176 #include "mac_arabic.h"
177 #include "mac_thai.h"
178 #include "hp_roman8.h"
179 #include "nextstep.h"
180 #include "armscii_8.h"
181 #include "georgian_academy.h"
182 #include "georgian_ps.h"
183 #include "koi8_t.h"
184 #include "pt154.h"
185 #include "rk1048.h"
186 #include "mulelao.h"
187 #include "cp1133.h"
188 #include "tis620.h"
189 #include "cp874.h"
190 #include "viscii.h"
191 #include "tcvn.h"
192 
193 /* CJK character sets [CCS = coded character set] [CJKV.INF chapter 3] */
194 
195 typedef struct {
196   unsigned short indx; /* index into big table */
197   unsigned short used; /* bitmask of used entries */
198 } Summary16;
199 
200 #include "iso646_jp.h"
201 #include "jisx0201.h"
202 #include "jisx0208.h"
203 #include "jisx0212.h"
204 
205 #include "iso646_cn.h"
206 #include "gb2312.h"
207 #include "isoir165.h"
208 /*#include "gb12345.h"*/
209 #include "gbk.h"
210 #include "cns11643.h"
211 #include "big5.h"
212 
213 #include "ksc5601.h"
214 #include "johab_hangul.h"
215 
216 /* CJK encodings [CES = character encoding scheme] [CJKV.INF chapter 4] */
217 
218 #include "euc_jp.h"
219 #include "sjis.h"
220 #include "cp932.h"
221 #include "iso2022_jp.h"
222 #include "iso2022_jp1.h"
223 #include "iso2022_jp2.h"
224 
225 #include "euc_cn.h"
226 #include "ces_gbk.h"
227 #include "cp936.h"
228 #include "gb18030.h"
229 #include "iso2022_cn.h"
230 #include "iso2022_cnext.h"
231 #include "hz.h"
232 #include "euc_tw.h"
233 #include "ces_big5.h"
234 #include "cp950.h"
235 #include "big5hkscs1999.h"
236 #include "big5hkscs2001.h"
237 #include "big5hkscs2004.h"
238 
239 #include "euc_kr.h"
240 #include "cp949.h"
241 #include "johab.h"
242 #include "iso2022_kr.h"
243 
244 /* Encodings used by system dependent locales. */
245 
246 #ifdef USE_AIX
247 #include "cp856.h"
248 #include "cp922.h"
249 #include "cp943.h"
250 #include "cp1046.h"
251 #include "cp1124.h"
252 #include "cp1129.h"
253 #include "cp1161.h"
254 #include "cp1162.h"
255 #include "cp1163.h"
256 #endif
257 
258 #ifdef USE_OSF1
259 #include "dec_kanji.h"
260 #include "dec_hanyu.h"
261 #endif
262 
263 #ifdef USE_DOS
264 #include "cp437.h"
265 #include "cp737.h"
266 #include "cp775.h"
267 #include "cp852.h"
268 #include "cp853.h"
269 #include "cp855.h"
270 #include "cp857.h"
271 #include "cp858.h"
272 #include "cp860.h"
273 #include "cp861.h"
274 #include "cp863.h"
275 #include "cp864.h"
276 #include "cp865.h"
277 #include "cp869.h"
278 #include "cp1125.h"
279 #endif
280 
281 #ifdef USE_EXTRA
282 #include "euc_jisx0213.h"
283 #include "shift_jisx0213.h"
284 #include "iso2022_jp3.h"
285 #include "big5_2003.h"
286 #include "tds565.h"
287 #include "atarist.h"
288 #include "riscos1.h"
289 #endif
290 
291