xref: /haiku/src/system/libroot/posix/glibc/libio/iofwide.c (revision 820dca4df6c7bf955c46e8f6521b9408f50b2900)
1 /* Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3 
4    The GNU C Library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Lesser General Public
6    License as published by the Free Software Foundation; either
7    version 2.1 of the License, or (at your option) any later version.
8 
9    The GNU C Library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Lesser General Public License for more details.
13 
14    You should have received a copy of the GNU Lesser General Public
15    License along with the GNU C Library; if not, write to the Free
16    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17    02111-1307 USA.
18 
19    As a special exception, if you link the code in this file with
20    files compiled with a GNU compiler to produce an executable,
21    that does not cause the resulting executable to be covered by
22    the GNU Lesser General Public License.  This exception does not
23    however invalidate any other reasons why the executable file
24    might be covered by the GNU Lesser General Public License.
25    This exception applies to code released by its copyright holders
26    in files containing the exception.  */
27 
28 #include <libioP.h>
29 #ifdef _LIBC
30 # include <dlfcn.h>
31 # include <wchar.h>
32 #endif
33 #include <assert.h>
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #ifdef _LIBC
38 # include <langinfo.h>
39 # include <locale/localeinfo.h>
40 # include <wcsmbs/wcsmbsload.h>
41 # include <iconv/gconv_int.h>
42 # include <shlib-compat.h>
43 #endif
44 
45 /* Prototypes of libio's codecvt functions.  */
46 static enum __codecvt_result do_out (struct _IO_codecvt *codecvt,
47 				     __mbstate_t *statep,
48 				     const wchar_t *from_start,
49 				     const wchar_t *from_end,
50 				     const wchar_t **from_stop, char *to_start,
51 				     char *to_end, char **to_stop);
52 static enum __codecvt_result do_unshift (struct _IO_codecvt *codecvt,
53 					 __mbstate_t *statep, char *to_start,
54 					 char *to_end, char **to_stop);
55 static enum __codecvt_result do_in (struct _IO_codecvt *codecvt,
56 				    __mbstate_t *statep,
57 				    const char *from_start,
58 				    const char *from_end,
59 				    const char **from_stop, wchar_t *to_start,
60 				    wchar_t *to_end, wchar_t **to_stop);
61 static int do_encoding (struct _IO_codecvt *codecvt);
62 static int do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
63 		      const char *from_start,
64 		      const char *from_end, _IO_size_t max);
65 static int do_max_length (struct _IO_codecvt *codecvt);
66 static int do_always_noconv (struct _IO_codecvt *codecvt);
67 
68 
69 /* The functions used in `codecvt' for libio are always the same.  */
70 struct _IO_codecvt __libio_codecvt =
71 {
72   .__codecvt_destr = NULL,		/* Destructor, never used.  */
73   .__codecvt_do_out = do_out,
74   .__codecvt_do_unshift = do_unshift,
75   .__codecvt_do_in = do_in,
76   .__codecvt_do_encoding = do_encoding,
77   .__codecvt_do_always_noconv = do_always_noconv,
78   .__codecvt_do_length = do_length,
79   .__codecvt_do_max_length = do_max_length
80 };
81 
82 
83 #ifdef _LIBC
84 struct __gconv_trans_data __libio_translit attribute_hidden =
85 {
86   .__trans_fct = NULL
87 };
88 #endif
89 
90 /* Return orientation of stream.  If mode is nonzero try to change
91  * the orientation first.
92  */
93 
94 #undef _IO_fwide
95 
96 int
97 _IO_fwide(_IO_FILE *fp, int mode)
98 {
99 	/* Normalize the value.  */
100 	mode = mode < 0 ? -1 : (mode == 0 ? 0 : 1);
101 
102 	if (mode == 0) {
103 		/* The caller simply wants to know about the current orientation. */
104 		return fp->_mode;
105 	}
106 
107 #if defined SHARED && defined _LIBC \
108     && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
109   if (__builtin_expect (&_IO_stdin_used == NULL, 0)
110       && (fp == _IO_stdin ||  fp == _IO_stdout || fp == _IO_stderr))
111     /* This is for a stream in the glibc 2.0 format.  */
112     return -1;
113 #endif
114 
115 	if (fp->_mode != 0) {
116 		/* The orientation already has been determined.  */
117 		return fp->_mode;
118 	}
119 
120 	{
121 		struct _IO_codecvt *cc = fp->_codecvt = &fp->_wide_data->_codecvt;
122 
123 		fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end;
124 		fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;
125 
126 		/* Get the character conversion functions based on the currently
127 		 * selected locale for LC_CTYPE.
128 		 */
129 #ifdef _LIBC
130       {
131 	struct gconv_fcts fcts;
132 
133 	/* Clear the state.  We start all over again.  */
134 	memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t));
135 	memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t));
136 
137 	__wcsmbs_clone_conv (&fcts);
138 	assert (fcts.towc_nsteps == 1);
139 	assert (fcts.tomb_nsteps == 1);
140 
141 	/* The functions are always the same.  */
142 	*cc = __libio_codecvt;
143 
144 	cc->__cd_in.__cd.__nsteps = fcts.towc_nsteps;
145 	cc->__cd_in.__cd.__steps = fcts.towc;
146 
147 	cc->__cd_in.__cd.__data[0].__invocation_counter = 0;
148 	cc->__cd_in.__cd.__data[0].__internal_use = 1;
149 	cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST;
150 	cc->__cd_in.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
151 
152 	/* XXX For now no transliteration.  */
153 	cc->__cd_in.__cd.__data[0].__trans = NULL;
154 
155 	cc->__cd_out.__cd.__nsteps = fcts.tomb_nsteps;
156 	cc->__cd_out.__cd.__steps = fcts.tomb;
157 
158 	cc->__cd_out.__cd.__data[0].__invocation_counter = 0;
159 	cc->__cd_out.__cd.__data[0].__internal_use = 1;
160 	cc->__cd_out.__cd.__data[0].__flags = __GCONV_IS_LAST;
161 	cc->__cd_out.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
162 
163 	/* And now the transliteration.  */
164 	cc->__cd_out.__cd.__data[0].__trans = &__libio_translit;
165       }
166 #else
167 # ifdef _GLIBCPP_USE_WCHAR_T
168       {
169 	/* Determine internal and external character sets.
170 
171 	   XXX For now we make our life easy: we assume a fixed internal
172 	   encoding (as most sane systems have; hi HP/UX!).  If somebody
173 	   cares about systems which changing internal charsets they
174 	   should come up with a solution for the determination of the
175 	   currently used internal character set.  */
176 	const char *internal_ccs = _G_INTERNAL_CCS;
177 	const char *external_ccs = NULL;
178 
179 #  ifdef HAVE_NL_LANGINFO
180 	external_ccs = nl_langinfo (CODESET);
181 #  endif
182 	if (external_ccs == NULL)
183 	  external_ccs = "ISO-8859-1";
184 
185 	cc->__cd_in = iconv_open (internal_ccs, external_ccs);
186 	if (cc->__cd_in != (iconv_t) -1)
187 	  cc->__cd_out = iconv_open (external_ccs, internal_ccs);
188 
189 	if (cc->__cd_in == (iconv_t) -1 || cc->__cd_out == (iconv_t) -1)
190 	  {
191 	    if (cc->__cd_in != (iconv_t) -1)
192 	      iconv_close (cc->__cd_in);
193 	    /* XXX */
194 	    abort ();
195 	  }
196       }
197 # else
198 #  error "somehow determine this from LC_CTYPE"
199 # endif
200 #endif
201 
202       /* From now on use the wide character callback functions.  */
203       ((struct _IO_FILE_plus *) fp)->vtable = fp->_wide_data->_wide_vtable;
204 
205       /* One last twist: we get the current stream position.  The wide
206 	 char streams have much more problems with not knowing the
207 	 current position and so we should disable the optimization
208 	 which allows the functions without knowing the position.  */
209       fp->_offset = _IO_SYSSEEK (fp, 0, _IO_seek_cur);
210     }
211 	/* Set the mode now.  */
212 	fp->_mode = mode;
213 
214 	return mode;
215 }
216 
217 static enum __codecvt_result
218 do_out (struct _IO_codecvt *codecvt, __mbstate_t *statep,
219 	const wchar_t *from_start, const wchar_t *from_end,
220 	const wchar_t **from_stop, char *to_start, char *to_end,
221 	char **to_stop)
222 {
223   enum __codecvt_result result;
224 
225 #ifdef _LIBC
226   struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
227   int status;
228   size_t dummy;
229   const unsigned char *from_start_copy = (unsigned char *) from_start;
230 
231   codecvt->__cd_out.__cd.__data[0].__outbuf = to_start;
232   codecvt->__cd_out.__cd.__data[0].__outbufend = to_end;
233   codecvt->__cd_out.__cd.__data[0].__statep = statep;
234 
235   status = DL_CALL_FCT (gs->__fct,
236 			(gs, codecvt->__cd_out.__cd.__data, &from_start_copy,
237 			 (const unsigned char *) from_end, NULL,
238 			 &dummy, 0, 0));
239 
240   *from_stop = (wchar_t *) from_start_copy;
241   *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf;
242 
243   switch (status)
244     {
245     case __GCONV_OK:
246     case __GCONV_EMPTY_INPUT:
247       result = __codecvt_ok;
248       break;
249 
250     case __GCONV_FULL_OUTPUT:
251     case __GCONV_INCOMPLETE_INPUT:
252       result = __codecvt_partial;
253       break;
254 
255     default:
256       result = __codecvt_error;
257       break;
258     }
259 #else
260 # ifdef _GLIBCPP_USE_WCHAR_T
261   size_t res;
262   const char *from_start_copy = (const char *) from_start;
263   size_t from_len = from_end - from_start;
264   char *to_start_copy = to_start;
265   size_t to_len = to_end - to_start;
266   res = iconv (codecvt->__cd_out, &from_start_copy, &from_len,
267 	       &to_start_copy, &to_len);
268 
269   if (res == 0 || from_len == 0)
270     result = __codecvt_ok;
271   else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
272     result = __codecvt_partial;
273   else
274     result = __codecvt_error;
275 
276 # else
277   /* Decide what to do.  */
278   result = __codecvt_error;
279 # endif
280 #endif
281 
282   return result;
283 }
284 
285 
286 static enum __codecvt_result
287 do_unshift (struct _IO_codecvt *codecvt, __mbstate_t *statep,
288 	    char *to_start, char *to_end, char **to_stop)
289 {
290   enum __codecvt_result result;
291 
292 #ifdef _LIBC
293   struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
294   int status;
295   size_t dummy;
296 
297   codecvt->__cd_out.__cd.__data[0].__outbuf = to_start;
298   codecvt->__cd_out.__cd.__data[0].__outbufend = to_end;
299   codecvt->__cd_out.__cd.__data[0].__statep = statep;
300 
301   status = DL_CALL_FCT (gs->__fct,
302 			(gs, codecvt->__cd_out.__cd.__data, NULL, NULL,
303 			 NULL, &dummy, 1, 0));
304 
305   *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf;
306 
307   switch (status)
308     {
309     case __GCONV_OK:
310     case __GCONV_EMPTY_INPUT:
311       result = __codecvt_ok;
312       break;
313 
314     case __GCONV_FULL_OUTPUT:
315     case __GCONV_INCOMPLETE_INPUT:
316       result = __codecvt_partial;
317       break;
318 
319     default:
320       result = __codecvt_error;
321       break;
322     }
323 #else
324 # ifdef _GLIBCPP_USE_WCHAR_T
325   size_t res;
326   char *to_start_copy = (char *) to_start;
327   size_t to_len = to_end - to_start;
328 
329   res = iconv (codecvt->__cd_out, NULL, NULL, &to_start_copy, &to_len);
330 
331   if (res == 0)
332     result = __codecvt_ok;
333   else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
334     result = __codecvt_partial;
335   else
336     result = __codecvt_error;
337 # else
338   /* Decide what to do.  */
339   result = __codecvt_error;
340 # endif
341 #endif
342 
343   return result;
344 }
345 
346 
347 static enum __codecvt_result
348 do_in (struct _IO_codecvt *codecvt, __mbstate_t *statep,
349        const char *from_start, const char *from_end, const char **from_stop,
350        wchar_t *to_start, wchar_t *to_end, wchar_t **to_stop)
351 {
352   enum __codecvt_result result;
353 
354 #ifdef _LIBC
355   struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
356   int status;
357   size_t dummy;
358   const unsigned char *from_start_copy = (unsigned char *) from_start;
359 
360   codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_start;
361   codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) to_end;
362   codecvt->__cd_in.__cd.__data[0].__statep = statep;
363 
364   status = DL_CALL_FCT (gs->__fct,
365 			(gs, codecvt->__cd_in.__cd.__data, &from_start_copy,
366 			 from_end, NULL, &dummy, 0, 0));
367 
368   *from_stop = from_start_copy;
369   *to_stop = (wchar_t *) codecvt->__cd_in.__cd.__data[0].__outbuf;
370 
371   switch (status)
372     {
373     case __GCONV_OK:
374     case __GCONV_EMPTY_INPUT:
375       result = __codecvt_ok;
376       break;
377 
378     case __GCONV_FULL_OUTPUT:
379     case __GCONV_INCOMPLETE_INPUT:
380       result = __codecvt_partial;
381       break;
382 
383     default:
384       result = __codecvt_error;
385       break;
386     }
387 #else
388 # ifdef _GLIBCPP_USE_WCHAR_T
389   size_t res;
390   const char *from_start_copy = (const char *) from_start;
391   size_t from_len = from_end - from_start;
392   char *to_start_copy = (char *) from_start;
393   size_t to_len = to_end - to_start;
394 
395   res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
396 	       &to_start_copy, &to_len);
397 
398   if (res == 0)
399     result = __codecvt_ok;
400   else if (to_len == 0)
401     result = __codecvt_partial;
402   else if (from_len < codecvt->__codecvt_do_max_length (codecvt))
403     result = __codecvt_partial;
404   else
405     result = __codecvt_error;
406 # else
407   /* Decide what to do.  */
408   result = __codecvt_error;
409 # endif
410 #endif
411 
412   return result;
413 }
414 
415 
416 static int
417 do_encoding (struct _IO_codecvt *codecvt)
418 {
419 #ifdef _LIBC
420   /* See whether the encoding is stateful.  */
421   if (codecvt->__cd_in.__cd.__steps[0].__stateful)
422     return -1;
423   /* Fortunately not.  Now determine the input bytes for the conversion
424      necessary for each wide character.  */
425   if (codecvt->__cd_in.__cd.__steps[0].__min_needed_from
426       != codecvt->__cd_in.__cd.__steps[0].__max_needed_from)
427     /* Not a constant value.  */
428     return 0;
429 
430   return codecvt->__cd_in.__cd.__steps[0].__min_needed_from;
431 #else
432   /* Worst case scenario.  */
433   return -1;
434 #endif
435 }
436 
437 
438 static int
439 do_always_noconv (struct _IO_codecvt *codecvt)
440 {
441   return 0;
442 }
443 
444 
445 static int
446 do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
447 	   const char *from_start, const char *from_end, _IO_size_t max)
448 {
449   int result;
450 #ifdef _LIBC
451   const unsigned char *cp = (const unsigned char *) from_start;
452   wchar_t to_buf[max];
453   struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
454   int status;
455   size_t dummy;
456 
457   codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_buf;
458   codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) &to_buf[max];
459   codecvt->__cd_in.__cd.__data[0].__statep = statep;
460 
461   status = DL_CALL_FCT (gs->__fct,
462 			(gs, codecvt->__cd_in.__cd.__data, &cp, from_end,
463 			 NULL, &dummy, 0, 0));
464 
465   result = cp - (const unsigned char *) from_start;
466 #else
467 # ifdef _GLIBCPP_USE_WCHAR_T
468   const char *from_start_copy = (const char *) from_start;
469   size_t from_len = from_end - from_start;
470   wchar_t to_buf[max];
471   size_t res;
472   char *to_start = (char *) to_buf;
473 
474   res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
475 	       &to_start, &max);
476 
477   result = from_start_copy - (char *) from_start;
478 # else
479   /* Decide what to do.  */
480   result = 0;
481 # endif
482 #endif
483 
484   return result;
485 }
486 
487 
488 static int
489 do_max_length (struct _IO_codecvt *codecvt)
490 {
491 #ifdef _LIBC
492   return codecvt->__cd_in.__cd.__steps[0].__max_needed_from;
493 #else
494   return MB_CUR_MAX;
495 #endif
496 }
497