xref: /haiku/src/system/libroot/posix/glibc/libio/iofwide.c (revision 1d9d47fc72028bb71b5f232a877231e59cfe2438)
1 /* Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
2    This file is part of the GNU C Library.
3 
4    The GNU C Library is free software; you can redistribute it and/or
5    modify it under the terms of the GNU Lesser General Public
6    License as published by the Free Software Foundation; either
7    version 2.1 of the License, or (at your option) any later version.
8 
9    The GNU C Library is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12    Lesser General Public License for more details.
13 
14    You should have received a copy of the GNU Lesser General Public
15    License along with the GNU C Library; if not, write to the Free
16    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17    02111-1307 USA.
18 
19    As a special exception, if you link the code in this file with
20    files compiled with a GNU compiler to produce an executable,
21    that does not cause the resulting executable to be covered by
22    the GNU Lesser General Public License.  This exception does not
23    however invalidate any other reasons why the executable file
24    might be covered by the GNU Lesser General Public License.
25    This exception applies to code released by its copyright holders
26    in files containing the exception.  */
27 
28 #include <libioP.h>
29 #ifdef _LIBC
30 # include <dlfcn.h>
31 # include <wchar.h>
32 #endif
33 #include <assert.h>
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #ifdef _LIBC
38 # include <langinfo.h>
39 # include <locale/localeinfo.h>
40 # include <wcsmbs/wcsmbsload.h>
41 # include <iconv/gconv_int.h>
42 # include <shlib-compat.h>
43 #endif
44 
45 #if 0
46 /* Prototypes of libio's codecvt functions.  */
47 static enum __codecvt_result do_out (struct _IO_codecvt *codecvt,
48 				     __mbstate_t *statep,
49 				     const wchar_t *from_start,
50 				     const wchar_t *from_end,
51 				     const wchar_t **from_stop, char *to_start,
52 				     char *to_end, char **to_stop);
53 static enum __codecvt_result do_unshift (struct _IO_codecvt *codecvt,
54 					 __mbstate_t *statep, char *to_start,
55 					 char *to_end, char **to_stop);
56 static enum __codecvt_result do_in (struct _IO_codecvt *codecvt,
57 				    __mbstate_t *statep,
58 				    const char *from_start,
59 				    const char *from_end,
60 				    const char **from_stop, wchar_t *to_start,
61 				    wchar_t *to_end, wchar_t **to_stop);
62 static int do_encoding (struct _IO_codecvt *codecvt);
63 static int do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
64 		      const char *from_start,
65 		      const char *from_end, _IO_size_t max);
66 static int do_max_length (struct _IO_codecvt *codecvt);
67 static int do_always_noconv (struct _IO_codecvt *codecvt);
68 
69 
70 /* The functions used in `codecvt' for libio are always the same.  */
71 struct _IO_codecvt __libio_codecvt =
72 {
73   .__codecvt_destr = NULL,		/* Destructor, never used.  */
74   .__codecvt_do_out = do_out,
75   .__codecvt_do_unshift = do_unshift,
76   .__codecvt_do_in = do_in,
77   .__codecvt_do_encoding = do_encoding,
78   .__codecvt_do_always_noconv = do_always_noconv,
79   .__codecvt_do_length = do_length,
80   .__codecvt_do_max_length = do_max_length
81 };
82 
83 
84 #ifdef _LIBC
85 struct __gconv_trans_data __libio_translit attribute_hidden =
86 {
87   .__trans_fct = __gconv_transliterate
88 };
89 #endif
90 #endif
91 
92 /* Return orientation of stream.  If mode is nonzero try to change
93  * the orientation first.
94  */
95 
96 #undef _IO_fwide
97 
98 int
99 _IO_fwide(_IO_FILE *fp, int mode)
100 {
101 	/* Normalize the value.  */
102 	mode = mode < 0 ? -1 : (mode == 0 ? 0 : 1);
103 
104 	if (mode == 0) {
105 		/* The caller simply wants to know about the current orientation. */
106 		return fp->_mode;
107 	}
108 
109 #if defined SHARED && defined _LIBC \
110     && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
111   if (__builtin_expect (&_IO_stdin_used == NULL, 0)
112       && (fp == _IO_stdin ||  fp == _IO_stdout || fp == _IO_stderr))
113     /* This is for a stream in the glibc 2.0 format.  */
114     return -1;
115 #endif
116 
117 	if (fp->_mode != 0) {
118 		/* The orientation already has been determined.  */
119 		return fp->_mode;
120 	}
121 
122 	/* Set the orientation appropriately. */
123 	if (mode > 0) {
124 		// wide-orientation is currently disabled!
125 		return -1;
126 	}
127 #if 0
128 		struct _IO_codecvt *cc = fp->_codecvt = &fp->_wide_data->_codecvt;
129 
130 		fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end;
131 		fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;
132 
133 		/* Get the character conversion functions based on the currently
134 		 * selected locale for LC_CTYPE.
135 		 */
136 #ifdef _LIBC
137       {
138 	struct gconv_fcts fcts;
139 
140 	/* Clear the state.  We start all over again.  */
141 	memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t));
142 	memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t));
143 
144 	__wcsmbs_clone_conv (&fcts);
145 	assert (fcts.towc_nsteps == 1);
146 	assert (fcts.tomb_nsteps == 1);
147 
148 	/* The functions are always the same.  */
149 	*cc = __libio_codecvt;
150 
151 	cc->__cd_in.__cd.__nsteps = fcts.towc_nsteps;
152 	cc->__cd_in.__cd.__steps = fcts.towc;
153 
154 	cc->__cd_in.__cd.__data[0].__invocation_counter = 0;
155 	cc->__cd_in.__cd.__data[0].__internal_use = 1;
156 	cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST;
157 	cc->__cd_in.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
158 
159 	/* XXX For now no transliteration.  */
160 	cc->__cd_in.__cd.__data[0].__trans = NULL;
161 
162 	cc->__cd_out.__cd.__nsteps = fcts.tomb_nsteps;
163 	cc->__cd_out.__cd.__steps = fcts.tomb;
164 
165 	cc->__cd_out.__cd.__data[0].__invocation_counter = 0;
166 	cc->__cd_out.__cd.__data[0].__internal_use = 1;
167 	cc->__cd_out.__cd.__data[0].__flags = __GCONV_IS_LAST;
168 	cc->__cd_out.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
169 
170 	/* And now the transliteration.  */
171 	cc->__cd_out.__cd.__data[0].__trans = &__libio_translit;
172       }
173 #else
174 # ifdef _GLIBCPP_USE_WCHAR_T
175       {
176 	/* Determine internal and external character sets.
177 
178 	   XXX For now we make our life easy: we assume a fixed internal
179 	   encoding (as most sane systems have; hi HP/UX!).  If somebody
180 	   cares about systems which changing internal charsets they
181 	   should come up with a solution for the determination of the
182 	   currently used internal character set.  */
183 	const char *internal_ccs = _G_INTERNAL_CCS;
184 	const char *external_ccs = NULL;
185 
186 #  ifdef HAVE_NL_LANGINFO
187 	external_ccs = nl_langinfo (CODESET);
188 #  endif
189 	if (external_ccs == NULL)
190 	  external_ccs = "ISO-8859-1";
191 
192 	cc->__cd_in = iconv_open (internal_ccs, external_ccs);
193 	if (cc->__cd_in != (iconv_t) -1)
194 	  cc->__cd_out = iconv_open (external_ccs, internal_ccs);
195 
196 	if (cc->__cd_in == (iconv_t) -1 || cc->__cd_out == (iconv_t) -1)
197 	  {
198 	    if (cc->__cd_in != (iconv_t) -1)
199 	      iconv_close (cc->__cd_in);
200 	    /* XXX */
201 	    abort ();
202 	  }
203       }
204 # else
205 #  error "somehow determine this from LC_CTYPE"
206 # endif
207 #endif
208 
209       /* From now on use the wide character callback functions.  */
210       ((struct _IO_FILE_plus *) fp)->vtable = fp->_wide_data->_wide_vtable;
211 
212       /* One last twist: we get the current stream position.  The wide
213 	 char streams have much more problems with not knowing the
214 	 current position and so we should disable the optimization
215 	 which allows the functions without knowing the position.  */
216       fp->_offset = _IO_SYSSEEK (fp, 0, _IO_seek_cur);
217     }
218 #endif
219 
220 	/* Set the mode now.  */
221 	fp->_mode = mode;
222 
223 	return mode;
224 }
225 
226 #if 0
227 static enum __codecvt_result
228 do_out (struct _IO_codecvt *codecvt, __mbstate_t *statep,
229 	const wchar_t *from_start, const wchar_t *from_end,
230 	const wchar_t **from_stop, char *to_start, char *to_end,
231 	char **to_stop)
232 {
233   enum __codecvt_result result;
234 
235 #ifdef _LIBC
236   struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
237   int status;
238   size_t dummy;
239   const unsigned char *from_start_copy = (unsigned char *) from_start;
240 
241   codecvt->__cd_out.__cd.__data[0].__outbuf = to_start;
242   codecvt->__cd_out.__cd.__data[0].__outbufend = to_end;
243   codecvt->__cd_out.__cd.__data[0].__statep = statep;
244 
245   status = DL_CALL_FCT (gs->__fct,
246 			(gs, codecvt->__cd_out.__cd.__data, &from_start_copy,
247 			 (const unsigned char *) from_end, NULL,
248 			 &dummy, 0, 0));
249 
250   *from_stop = (wchar_t *) from_start_copy;
251   *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf;
252 
253   switch (status)
254     {
255     case __GCONV_OK:
256     case __GCONV_EMPTY_INPUT:
257       result = __codecvt_ok;
258       break;
259 
260     case __GCONV_FULL_OUTPUT:
261     case __GCONV_INCOMPLETE_INPUT:
262       result = __codecvt_partial;
263       break;
264 
265     default:
266       result = __codecvt_error;
267       break;
268     }
269 #else
270 # ifdef _GLIBCPP_USE_WCHAR_T
271   size_t res;
272   const char *from_start_copy = (const char *) from_start;
273   size_t from_len = from_end - from_start;
274   char *to_start_copy = to_start;
275   size_t to_len = to_end - to_start;
276   res = iconv (codecvt->__cd_out, &from_start_copy, &from_len,
277 	       &to_start_copy, &to_len);
278 
279   if (res == 0 || from_len == 0)
280     result = __codecvt_ok;
281   else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
282     result = __codecvt_partial;
283   else
284     result = __codecvt_error;
285 
286 # else
287   /* Decide what to do.  */
288   result = __codecvt_error;
289 # endif
290 #endif
291 
292   return result;
293 }
294 
295 
296 static enum __codecvt_result
297 do_unshift (struct _IO_codecvt *codecvt, __mbstate_t *statep,
298 	    char *to_start, char *to_end, char **to_stop)
299 {
300   enum __codecvt_result result;
301 
302 #ifdef _LIBC
303   struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
304   int status;
305   size_t dummy;
306 
307   codecvt->__cd_out.__cd.__data[0].__outbuf = to_start;
308   codecvt->__cd_out.__cd.__data[0].__outbufend = to_end;
309   codecvt->__cd_out.__cd.__data[0].__statep = statep;
310 
311   status = DL_CALL_FCT (gs->__fct,
312 			(gs, codecvt->__cd_out.__cd.__data, NULL, NULL,
313 			 NULL, &dummy, 1, 0));
314 
315   *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf;
316 
317   switch (status)
318     {
319     case __GCONV_OK:
320     case __GCONV_EMPTY_INPUT:
321       result = __codecvt_ok;
322       break;
323 
324     case __GCONV_FULL_OUTPUT:
325     case __GCONV_INCOMPLETE_INPUT:
326       result = __codecvt_partial;
327       break;
328 
329     default:
330       result = __codecvt_error;
331       break;
332     }
333 #else
334 # ifdef _GLIBCPP_USE_WCHAR_T
335   size_t res;
336   char *to_start_copy = (char *) to_start;
337   size_t to_len = to_end - to_start;
338 
339   res = iconv (codecvt->__cd_out, NULL, NULL, &to_start_copy, &to_len);
340 
341   if (res == 0)
342     result = __codecvt_ok;
343   else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
344     result = __codecvt_partial;
345   else
346     result = __codecvt_error;
347 # else
348   /* Decide what to do.  */
349   result = __codecvt_error;
350 # endif
351 #endif
352 
353   return result;
354 }
355 
356 
357 static enum __codecvt_result
358 do_in (struct _IO_codecvt *codecvt, __mbstate_t *statep,
359        const char *from_start, const char *from_end, const char **from_stop,
360        wchar_t *to_start, wchar_t *to_end, wchar_t **to_stop)
361 {
362   enum __codecvt_result result;
363 
364 #ifdef _LIBC
365   struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
366   int status;
367   size_t dummy;
368   const unsigned char *from_start_copy = (unsigned char *) from_start;
369 
370   codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_start;
371   codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) to_end;
372   codecvt->__cd_in.__cd.__data[0].__statep = statep;
373 
374   status = DL_CALL_FCT (gs->__fct,
375 			(gs, codecvt->__cd_in.__cd.__data, &from_start_copy,
376 			 from_end, NULL, &dummy, 0, 0));
377 
378   *from_stop = from_start_copy;
379   *to_stop = (wchar_t *) codecvt->__cd_in.__cd.__data[0].__outbuf;
380 
381   switch (status)
382     {
383     case __GCONV_OK:
384     case __GCONV_EMPTY_INPUT:
385       result = __codecvt_ok;
386       break;
387 
388     case __GCONV_FULL_OUTPUT:
389     case __GCONV_INCOMPLETE_INPUT:
390       result = __codecvt_partial;
391       break;
392 
393     default:
394       result = __codecvt_error;
395       break;
396     }
397 #else
398 # ifdef _GLIBCPP_USE_WCHAR_T
399   size_t res;
400   const char *from_start_copy = (const char *) from_start;
401   size_t from_len = from_end - from_start;
402   char *to_start_copy = (char *) from_start;
403   size_t to_len = to_end - to_start;
404 
405   res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
406 	       &to_start_copy, &to_len);
407 
408   if (res == 0)
409     result = __codecvt_ok;
410   else if (to_len == 0)
411     result = __codecvt_partial;
412   else if (from_len < codecvt->__codecvt_do_max_length (codecvt))
413     result = __codecvt_partial;
414   else
415     result = __codecvt_error;
416 # else
417   /* Decide what to do.  */
418   result = __codecvt_error;
419 # endif
420 #endif
421 
422   return result;
423 }
424 
425 
426 static int
427 do_encoding (struct _IO_codecvt *codecvt)
428 {
429 #ifdef _LIBC
430   /* See whether the encoding is stateful.  */
431   if (codecvt->__cd_in.__cd.__steps[0].__stateful)
432     return -1;
433   /* Fortunately not.  Now determine the input bytes for the conversion
434      necessary for each wide character.  */
435   if (codecvt->__cd_in.__cd.__steps[0].__min_needed_from
436       != codecvt->__cd_in.__cd.__steps[0].__max_needed_from)
437     /* Not a constant value.  */
438     return 0;
439 
440   return codecvt->__cd_in.__cd.__steps[0].__min_needed_from;
441 #else
442   /* Worst case scenario.  */
443   return -1;
444 #endif
445 }
446 
447 
448 static int
449 do_always_noconv (struct _IO_codecvt *codecvt)
450 {
451   return 0;
452 }
453 
454 
455 static int
456 do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
457 	   const char *from_start, const char *from_end, _IO_size_t max)
458 {
459   int result;
460 #ifdef _LIBC
461   const unsigned char *cp = (const unsigned char *) from_start;
462   wchar_t to_buf[max];
463   struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
464   int status;
465   size_t dummy;
466 
467   codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_buf;
468   codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) &to_buf[max];
469   codecvt->__cd_in.__cd.__data[0].__statep = statep;
470 
471   status = DL_CALL_FCT (gs->__fct,
472 			(gs, codecvt->__cd_in.__cd.__data, &cp, from_end,
473 			 NULL, &dummy, 0, 0));
474 
475   result = cp - (const unsigned char *) from_start;
476 #else
477 # ifdef _GLIBCPP_USE_WCHAR_T
478   const char *from_start_copy = (const char *) from_start;
479   size_t from_len = from_end - from_start;
480   wchar_t to_buf[max];
481   size_t res;
482   char *to_start = (char *) to_buf;
483 
484   res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
485 	       &to_start, &max);
486 
487   result = from_start_copy - (char *) from_start;
488 # else
489   /* Decide what to do.  */
490   result = 0;
491 # endif
492 #endif
493 
494   return result;
495 }
496 
497 
498 static int
499 do_max_length (struct _IO_codecvt *codecvt)
500 {
501 #ifdef _LIBC
502   return codecvt->__cd_in.__cd.__steps[0].__max_needed_from;
503 #else
504   return MB_CUR_MAX;
505 #endif
506 }
507 #endif
508