1 /* Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc. 2 This file is part of the GNU C Library. 3 4 The GNU C Library is free software; you can redistribute it and/or 5 modify it under the terms of the GNU Lesser General Public 6 License as published by the Free Software Foundation; either 7 version 2.1 of the License, or (at your option) any later version. 8 9 The GNU C Library is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 Lesser General Public License for more details. 13 14 You should have received a copy of the GNU Lesser General Public 15 License along with the GNU C Library; if not, write to the Free 16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 17 02111-1307 USA. 18 19 As a special exception, if you link the code in this file with 20 files compiled with a GNU compiler to produce an executable, 21 that does not cause the resulting executable to be covered by 22 the GNU Lesser General Public License. This exception does not 23 however invalidate any other reasons why the executable file 24 might be covered by the GNU Lesser General Public License. 25 This exception applies to code released by its copyright holders 26 in files containing the exception. */ 27 28 #include <libioP.h> 29 #ifdef _LIBC 30 # include <dlfcn.h> 31 # include <wchar.h> 32 #endif 33 #include <assert.h> 34 #include <stdlib.h> 35 #include <string.h> 36 37 #ifdef _LIBC 38 # include <langinfo.h> 39 # include <locale/localeinfo.h> 40 # include <wcsmbs/wcsmbsload.h> 41 # include <iconv/gconv_int.h> 42 # include <shlib-compat.h> 43 #endif 44 45 /* Prototypes of libio's codecvt functions. */ 46 static enum __codecvt_result do_out (struct _IO_codecvt *codecvt, 47 __mbstate_t *statep, 48 const wchar_t *from_start, 49 const wchar_t *from_end, 50 const wchar_t **from_stop, char *to_start, 51 char *to_end, char **to_stop); 52 static enum __codecvt_result do_unshift (struct _IO_codecvt *codecvt, 53 __mbstate_t *statep, char *to_start, 54 char *to_end, char **to_stop); 55 static enum __codecvt_result do_in (struct _IO_codecvt *codecvt, 56 __mbstate_t *statep, 57 const char *from_start, 58 const char *from_end, 59 const char **from_stop, wchar_t *to_start, 60 wchar_t *to_end, wchar_t **to_stop); 61 static int do_encoding (struct _IO_codecvt *codecvt); 62 static int do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep, 63 const char *from_start, 64 const char *from_end, _IO_size_t max); 65 static int do_max_length (struct _IO_codecvt *codecvt); 66 static int do_always_noconv (struct _IO_codecvt *codecvt); 67 68 69 /* The functions used in `codecvt' for libio are always the same. */ 70 struct _IO_codecvt __libio_codecvt = 71 { 72 .__codecvt_destr = NULL, /* Destructor, never used. */ 73 .__codecvt_do_out = do_out, 74 .__codecvt_do_unshift = do_unshift, 75 .__codecvt_do_in = do_in, 76 .__codecvt_do_encoding = do_encoding, 77 .__codecvt_do_always_noconv = do_always_noconv, 78 .__codecvt_do_length = do_length, 79 .__codecvt_do_max_length = do_max_length 80 }; 81 82 83 #ifdef _LIBC 84 struct __gconv_trans_data __libio_translit attribute_hidden = 85 { 86 .__trans_fct = NULL 87 }; 88 #endif 89 90 /* Return orientation of stream. If mode is nonzero try to change 91 * the orientation first. 92 */ 93 94 #undef _IO_fwide 95 96 int 97 _IO_fwide(_IO_FILE *fp, int mode) 98 { 99 /* Normalize the value. */ 100 mode = mode < 0 ? -1 : (mode == 0 ? 0 : 1); 101 102 if (mode == 0) { 103 /* The caller simply wants to know about the current orientation. */ 104 return fp->_mode; 105 } 106 107 #if defined SHARED && defined _LIBC \ 108 && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1) 109 if (__builtin_expect (&_IO_stdin_used == NULL, 0) 110 && (fp == _IO_stdin || fp == _IO_stdout || fp == _IO_stderr)) 111 /* This is for a stream in the glibc 2.0 format. */ 112 return -1; 113 #endif 114 115 if (fp->_mode != 0) { 116 /* The orientation already has been determined. */ 117 return fp->_mode; 118 } 119 120 { 121 struct _IO_codecvt *cc = fp->_codecvt = &fp->_wide_data->_codecvt; 122 123 fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end; 124 fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base; 125 126 /* Get the character conversion functions based on the currently 127 * selected locale for LC_CTYPE. 128 */ 129 #ifdef _LIBC 130 { 131 struct gconv_fcts fcts; 132 133 /* Clear the state. We start all over again. */ 134 memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t)); 135 memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t)); 136 137 __wcsmbs_clone_conv (&fcts); 138 assert (fcts.towc_nsteps == 1); 139 assert (fcts.tomb_nsteps == 1); 140 141 /* The functions are always the same. */ 142 *cc = __libio_codecvt; 143 144 cc->__cd_in.__cd.__nsteps = fcts.towc_nsteps; 145 cc->__cd_in.__cd.__steps = fcts.towc; 146 147 cc->__cd_in.__cd.__data[0].__invocation_counter = 0; 148 cc->__cd_in.__cd.__data[0].__internal_use = 1; 149 cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST; 150 cc->__cd_in.__cd.__data[0].__statep = &fp->_wide_data->_IO_state; 151 152 /* XXX For now no transliteration. */ 153 cc->__cd_in.__cd.__data[0].__trans = NULL; 154 155 cc->__cd_out.__cd.__nsteps = fcts.tomb_nsteps; 156 cc->__cd_out.__cd.__steps = fcts.tomb; 157 158 cc->__cd_out.__cd.__data[0].__invocation_counter = 0; 159 cc->__cd_out.__cd.__data[0].__internal_use = 1; 160 cc->__cd_out.__cd.__data[0].__flags = __GCONV_IS_LAST; 161 cc->__cd_out.__cd.__data[0].__statep = &fp->_wide_data->_IO_state; 162 163 /* And now the transliteration. */ 164 cc->__cd_out.__cd.__data[0].__trans = &__libio_translit; 165 } 166 #else 167 # ifdef _GLIBCPP_USE_WCHAR_T 168 { 169 /* Determine internal and external character sets. 170 171 XXX For now we make our life easy: we assume a fixed internal 172 encoding (as most sane systems have; hi HP/UX!). If somebody 173 cares about systems which changing internal charsets they 174 should come up with a solution for the determination of the 175 currently used internal character set. */ 176 const char *internal_ccs = _G_INTERNAL_CCS; 177 const char *external_ccs = NULL; 178 179 # ifdef HAVE_NL_LANGINFO 180 external_ccs = nl_langinfo (CODESET); 181 # endif 182 if (external_ccs == NULL) 183 external_ccs = "ISO-8859-1"; 184 185 cc->__cd_in = iconv_open (internal_ccs, external_ccs); 186 if (cc->__cd_in != (iconv_t) -1) 187 cc->__cd_out = iconv_open (external_ccs, internal_ccs); 188 189 if (cc->__cd_in == (iconv_t) -1 || cc->__cd_out == (iconv_t) -1) 190 { 191 if (cc->__cd_in != (iconv_t) -1) 192 iconv_close (cc->__cd_in); 193 /* XXX */ 194 abort (); 195 } 196 } 197 # else 198 # error "somehow determine this from LC_CTYPE" 199 # endif 200 #endif 201 202 /* From now on use the wide character callback functions. */ 203 ((struct _IO_FILE_plus *) fp)->vtable = fp->_wide_data->_wide_vtable; 204 205 /* One last twist: we get the current stream position. The wide 206 char streams have much more problems with not knowing the 207 current position and so we should disable the optimization 208 which allows the functions without knowing the position. */ 209 fp->_offset = _IO_SYSSEEK (fp, 0, _IO_seek_cur); 210 } 211 /* Set the mode now. */ 212 fp->_mode = mode; 213 214 return mode; 215 } 216 217 static enum __codecvt_result 218 do_out (struct _IO_codecvt *codecvt, __mbstate_t *statep, 219 const wchar_t *from_start, const wchar_t *from_end, 220 const wchar_t **from_stop, char *to_start, char *to_end, 221 char **to_stop) 222 { 223 enum __codecvt_result result; 224 225 #ifdef _LIBC 226 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps; 227 int status; 228 size_t dummy; 229 const unsigned char *from_start_copy = (unsigned char *) from_start; 230 231 codecvt->__cd_out.__cd.__data[0].__outbuf = to_start; 232 codecvt->__cd_out.__cd.__data[0].__outbufend = to_end; 233 codecvt->__cd_out.__cd.__data[0].__statep = statep; 234 235 status = DL_CALL_FCT (gs->__fct, 236 (gs, codecvt->__cd_out.__cd.__data, &from_start_copy, 237 (const unsigned char *) from_end, NULL, 238 &dummy, 0, 0)); 239 240 *from_stop = (wchar_t *) from_start_copy; 241 *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf; 242 243 switch (status) 244 { 245 case __GCONV_OK: 246 case __GCONV_EMPTY_INPUT: 247 result = __codecvt_ok; 248 break; 249 250 case __GCONV_FULL_OUTPUT: 251 case __GCONV_INCOMPLETE_INPUT: 252 result = __codecvt_partial; 253 break; 254 255 default: 256 result = __codecvt_error; 257 break; 258 } 259 #else 260 # ifdef _GLIBCPP_USE_WCHAR_T 261 size_t res; 262 const char *from_start_copy = (const char *) from_start; 263 size_t from_len = from_end - from_start; 264 char *to_start_copy = to_start; 265 size_t to_len = to_end - to_start; 266 res = iconv (codecvt->__cd_out, &from_start_copy, &from_len, 267 &to_start_copy, &to_len); 268 269 if (res == 0 || from_len == 0) 270 result = __codecvt_ok; 271 else if (to_len < codecvt->__codecvt_do_max_length (codecvt)) 272 result = __codecvt_partial; 273 else 274 result = __codecvt_error; 275 276 # else 277 /* Decide what to do. */ 278 result = __codecvt_error; 279 # endif 280 #endif 281 282 return result; 283 } 284 285 286 static enum __codecvt_result 287 do_unshift (struct _IO_codecvt *codecvt, __mbstate_t *statep, 288 char *to_start, char *to_end, char **to_stop) 289 { 290 enum __codecvt_result result; 291 292 #ifdef _LIBC 293 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps; 294 int status; 295 size_t dummy; 296 297 codecvt->__cd_out.__cd.__data[0].__outbuf = to_start; 298 codecvt->__cd_out.__cd.__data[0].__outbufend = to_end; 299 codecvt->__cd_out.__cd.__data[0].__statep = statep; 300 301 status = DL_CALL_FCT (gs->__fct, 302 (gs, codecvt->__cd_out.__cd.__data, NULL, NULL, 303 NULL, &dummy, 1, 0)); 304 305 *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf; 306 307 switch (status) 308 { 309 case __GCONV_OK: 310 case __GCONV_EMPTY_INPUT: 311 result = __codecvt_ok; 312 break; 313 314 case __GCONV_FULL_OUTPUT: 315 case __GCONV_INCOMPLETE_INPUT: 316 result = __codecvt_partial; 317 break; 318 319 default: 320 result = __codecvt_error; 321 break; 322 } 323 #else 324 # ifdef _GLIBCPP_USE_WCHAR_T 325 size_t res; 326 char *to_start_copy = (char *) to_start; 327 size_t to_len = to_end - to_start; 328 329 res = iconv (codecvt->__cd_out, NULL, NULL, &to_start_copy, &to_len); 330 331 if (res == 0) 332 result = __codecvt_ok; 333 else if (to_len < codecvt->__codecvt_do_max_length (codecvt)) 334 result = __codecvt_partial; 335 else 336 result = __codecvt_error; 337 # else 338 /* Decide what to do. */ 339 result = __codecvt_error; 340 # endif 341 #endif 342 343 return result; 344 } 345 346 347 static enum __codecvt_result 348 do_in (struct _IO_codecvt *codecvt, __mbstate_t *statep, 349 const char *from_start, const char *from_end, const char **from_stop, 350 wchar_t *to_start, wchar_t *to_end, wchar_t **to_stop) 351 { 352 enum __codecvt_result result; 353 354 #ifdef _LIBC 355 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps; 356 int status; 357 size_t dummy; 358 const unsigned char *from_start_copy = (unsigned char *) from_start; 359 360 codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_start; 361 codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) to_end; 362 codecvt->__cd_in.__cd.__data[0].__statep = statep; 363 364 status = DL_CALL_FCT (gs->__fct, 365 (gs, codecvt->__cd_in.__cd.__data, &from_start_copy, 366 from_end, NULL, &dummy, 0, 0)); 367 368 *from_stop = from_start_copy; 369 *to_stop = (wchar_t *) codecvt->__cd_in.__cd.__data[0].__outbuf; 370 371 switch (status) 372 { 373 case __GCONV_OK: 374 case __GCONV_EMPTY_INPUT: 375 result = __codecvt_ok; 376 break; 377 378 case __GCONV_FULL_OUTPUT: 379 case __GCONV_INCOMPLETE_INPUT: 380 result = __codecvt_partial; 381 break; 382 383 default: 384 result = __codecvt_error; 385 break; 386 } 387 #else 388 # ifdef _GLIBCPP_USE_WCHAR_T 389 size_t res; 390 const char *from_start_copy = (const char *) from_start; 391 size_t from_len = from_end - from_start; 392 char *to_start_copy = (char *) from_start; 393 size_t to_len = to_end - to_start; 394 395 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len, 396 &to_start_copy, &to_len); 397 398 if (res == 0) 399 result = __codecvt_ok; 400 else if (to_len == 0) 401 result = __codecvt_partial; 402 else if (from_len < codecvt->__codecvt_do_max_length (codecvt)) 403 result = __codecvt_partial; 404 else 405 result = __codecvt_error; 406 # else 407 /* Decide what to do. */ 408 result = __codecvt_error; 409 # endif 410 #endif 411 412 return result; 413 } 414 415 416 static int 417 do_encoding (struct _IO_codecvt *codecvt) 418 { 419 #ifdef _LIBC 420 /* See whether the encoding is stateful. */ 421 if (codecvt->__cd_in.__cd.__steps[0].__stateful) 422 return -1; 423 /* Fortunately not. Now determine the input bytes for the conversion 424 necessary for each wide character. */ 425 if (codecvt->__cd_in.__cd.__steps[0].__min_needed_from 426 != codecvt->__cd_in.__cd.__steps[0].__max_needed_from) 427 /* Not a constant value. */ 428 return 0; 429 430 return codecvt->__cd_in.__cd.__steps[0].__min_needed_from; 431 #else 432 /* Worst case scenario. */ 433 return -1; 434 #endif 435 } 436 437 438 static int 439 do_always_noconv (struct _IO_codecvt *codecvt) 440 { 441 return 0; 442 } 443 444 445 static int 446 do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep, 447 const char *from_start, const char *from_end, _IO_size_t max) 448 { 449 int result; 450 #ifdef _LIBC 451 const unsigned char *cp = (const unsigned char *) from_start; 452 wchar_t to_buf[max]; 453 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps; 454 int status; 455 size_t dummy; 456 457 codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_buf; 458 codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) &to_buf[max]; 459 codecvt->__cd_in.__cd.__data[0].__statep = statep; 460 461 status = DL_CALL_FCT (gs->__fct, 462 (gs, codecvt->__cd_in.__cd.__data, &cp, from_end, 463 NULL, &dummy, 0, 0)); 464 465 result = cp - (const unsigned char *) from_start; 466 #else 467 # ifdef _GLIBCPP_USE_WCHAR_T 468 const char *from_start_copy = (const char *) from_start; 469 size_t from_len = from_end - from_start; 470 wchar_t to_buf[max]; 471 size_t res; 472 char *to_start = (char *) to_buf; 473 474 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len, 475 &to_start, &max); 476 477 result = from_start_copy - (char *) from_start; 478 # else 479 /* Decide what to do. */ 480 result = 0; 481 # endif 482 #endif 483 484 return result; 485 } 486 487 488 static int 489 do_max_length (struct _IO_codecvt *codecvt) 490 { 491 #ifdef _LIBC 492 return codecvt->__cd_in.__cd.__steps[0].__max_needed_from; 493 #else 494 return MB_CUR_MAX; 495 #endif 496 } 497