1 /* Copyright (C) 1999, 2000, 2001, 2002 Free Software Foundation, Inc. 2 This file is part of the GNU C Library. 3 4 The GNU C Library is free software; you can redistribute it and/or 5 modify it under the terms of the GNU Lesser General Public 6 License as published by the Free Software Foundation; either 7 version 2.1 of the License, or (at your option) any later version. 8 9 The GNU C Library is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 Lesser General Public License for more details. 13 14 You should have received a copy of the GNU Lesser General Public 15 License along with the GNU C Library; if not, write to the Free 16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 17 02111-1307 USA. 18 19 As a special exception, if you link the code in this file with 20 files compiled with a GNU compiler to produce an executable, 21 that does not cause the resulting executable to be covered by 22 the GNU Lesser General Public License. This exception does not 23 however invalidate any other reasons why the executable file 24 might be covered by the GNU Lesser General Public License. 25 This exception applies to code released by its copyright holders 26 in files containing the exception. */ 27 28 #include <libioP.h> 29 #ifdef _LIBC 30 # include <dlfcn.h> 31 # include <wchar.h> 32 #endif 33 #include <assert.h> 34 #include <stdlib.h> 35 #include <string.h> 36 37 #ifdef _LIBC 38 # include <langinfo.h> 39 # include <locale/localeinfo.h> 40 # include <wcsmbs/wcsmbsload.h> 41 # include <iconv/gconv_int.h> 42 # include <shlib-compat.h> 43 #endif 44 45 #if 0 46 /* Prototypes of libio's codecvt functions. */ 47 static enum __codecvt_result do_out (struct _IO_codecvt *codecvt, 48 __mbstate_t *statep, 49 const wchar_t *from_start, 50 const wchar_t *from_end, 51 const wchar_t **from_stop, char *to_start, 52 char *to_end, char **to_stop); 53 static enum __codecvt_result do_unshift (struct _IO_codecvt *codecvt, 54 __mbstate_t *statep, char *to_start, 55 char *to_end, char **to_stop); 56 static enum __codecvt_result do_in (struct _IO_codecvt *codecvt, 57 __mbstate_t *statep, 58 const char *from_start, 59 const char *from_end, 60 const char **from_stop, wchar_t *to_start, 61 wchar_t *to_end, wchar_t **to_stop); 62 static int do_encoding (struct _IO_codecvt *codecvt); 63 static int do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep, 64 const char *from_start, 65 const char *from_end, _IO_size_t max); 66 static int do_max_length (struct _IO_codecvt *codecvt); 67 static int do_always_noconv (struct _IO_codecvt *codecvt); 68 69 70 /* The functions used in `codecvt' for libio are always the same. */ 71 struct _IO_codecvt __libio_codecvt = 72 { 73 .__codecvt_destr = NULL, /* Destructor, never used. */ 74 .__codecvt_do_out = do_out, 75 .__codecvt_do_unshift = do_unshift, 76 .__codecvt_do_in = do_in, 77 .__codecvt_do_encoding = do_encoding, 78 .__codecvt_do_always_noconv = do_always_noconv, 79 .__codecvt_do_length = do_length, 80 .__codecvt_do_max_length = do_max_length 81 }; 82 83 84 #ifdef _LIBC 85 struct __gconv_trans_data __libio_translit attribute_hidden = 86 { 87 .__trans_fct = __gconv_transliterate 88 }; 89 #endif 90 #endif 91 92 /* Return orientation of stream. If mode is nonzero try to change 93 * the orientation first. 94 */ 95 96 #undef _IO_fwide 97 98 int 99 _IO_fwide(_IO_FILE *fp, int mode) 100 { 101 /* Normalize the value. */ 102 mode = mode < 0 ? -1 : (mode == 0 ? 0 : 1); 103 104 if (mode == 0) { 105 /* The caller simply wants to know about the current orientation. */ 106 return fp->_mode; 107 } 108 109 #if defined SHARED && defined _LIBC \ 110 && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1) 111 if (__builtin_expect (&_IO_stdin_used == NULL, 0) 112 && (fp == _IO_stdin || fp == _IO_stdout || fp == _IO_stderr)) 113 /* This is for a stream in the glibc 2.0 format. */ 114 return -1; 115 #endif 116 117 if (fp->_mode != 0) { 118 /* The orientation already has been determined. */ 119 return fp->_mode; 120 } 121 122 /* Set the orientation appropriately. */ 123 if (mode > 0) { 124 // wide-orientation is currently disabled! 125 return -1; 126 } 127 #if 0 128 struct _IO_codecvt *cc = fp->_codecvt = &fp->_wide_data->_codecvt; 129 130 fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end; 131 fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base; 132 133 /* Get the character conversion functions based on the currently 134 * selected locale for LC_CTYPE. 135 */ 136 #ifdef _LIBC 137 { 138 struct gconv_fcts fcts; 139 140 /* Clear the state. We start all over again. */ 141 memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t)); 142 memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t)); 143 144 __wcsmbs_clone_conv (&fcts); 145 assert (fcts.towc_nsteps == 1); 146 assert (fcts.tomb_nsteps == 1); 147 148 /* The functions are always the same. */ 149 *cc = __libio_codecvt; 150 151 cc->__cd_in.__cd.__nsteps = fcts.towc_nsteps; 152 cc->__cd_in.__cd.__steps = fcts.towc; 153 154 cc->__cd_in.__cd.__data[0].__invocation_counter = 0; 155 cc->__cd_in.__cd.__data[0].__internal_use = 1; 156 cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST; 157 cc->__cd_in.__cd.__data[0].__statep = &fp->_wide_data->_IO_state; 158 159 /* XXX For now no transliteration. */ 160 cc->__cd_in.__cd.__data[0].__trans = NULL; 161 162 cc->__cd_out.__cd.__nsteps = fcts.tomb_nsteps; 163 cc->__cd_out.__cd.__steps = fcts.tomb; 164 165 cc->__cd_out.__cd.__data[0].__invocation_counter = 0; 166 cc->__cd_out.__cd.__data[0].__internal_use = 1; 167 cc->__cd_out.__cd.__data[0].__flags = __GCONV_IS_LAST; 168 cc->__cd_out.__cd.__data[0].__statep = &fp->_wide_data->_IO_state; 169 170 /* And now the transliteration. */ 171 cc->__cd_out.__cd.__data[0].__trans = &__libio_translit; 172 } 173 #else 174 # ifdef _GLIBCPP_USE_WCHAR_T 175 { 176 /* Determine internal and external character sets. 177 178 XXX For now we make our life easy: we assume a fixed internal 179 encoding (as most sane systems have; hi HP/UX!). If somebody 180 cares about systems which changing internal charsets they 181 should come up with a solution for the determination of the 182 currently used internal character set. */ 183 const char *internal_ccs = _G_INTERNAL_CCS; 184 const char *external_ccs = NULL; 185 186 # ifdef HAVE_NL_LANGINFO 187 external_ccs = nl_langinfo (CODESET); 188 # endif 189 if (external_ccs == NULL) 190 external_ccs = "ISO-8859-1"; 191 192 cc->__cd_in = iconv_open (internal_ccs, external_ccs); 193 if (cc->__cd_in != (iconv_t) -1) 194 cc->__cd_out = iconv_open (external_ccs, internal_ccs); 195 196 if (cc->__cd_in == (iconv_t) -1 || cc->__cd_out == (iconv_t) -1) 197 { 198 if (cc->__cd_in != (iconv_t) -1) 199 iconv_close (cc->__cd_in); 200 /* XXX */ 201 abort (); 202 } 203 } 204 # else 205 # error "somehow determine this from LC_CTYPE" 206 # endif 207 #endif 208 209 /* From now on use the wide character callback functions. */ 210 ((struct _IO_FILE_plus *) fp)->vtable = fp->_wide_data->_wide_vtable; 211 212 /* One last twist: we get the current stream position. The wide 213 char streams have much more problems with not knowing the 214 current position and so we should disable the optimization 215 which allows the functions without knowing the position. */ 216 fp->_offset = _IO_SYSSEEK (fp, 0, _IO_seek_cur); 217 } 218 #endif 219 220 /* Set the mode now. */ 221 fp->_mode = mode; 222 223 return mode; 224 } 225 226 #if 0 227 static enum __codecvt_result 228 do_out (struct _IO_codecvt *codecvt, __mbstate_t *statep, 229 const wchar_t *from_start, const wchar_t *from_end, 230 const wchar_t **from_stop, char *to_start, char *to_end, 231 char **to_stop) 232 { 233 enum __codecvt_result result; 234 235 #ifdef _LIBC 236 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps; 237 int status; 238 size_t dummy; 239 const unsigned char *from_start_copy = (unsigned char *) from_start; 240 241 codecvt->__cd_out.__cd.__data[0].__outbuf = to_start; 242 codecvt->__cd_out.__cd.__data[0].__outbufend = to_end; 243 codecvt->__cd_out.__cd.__data[0].__statep = statep; 244 245 status = DL_CALL_FCT (gs->__fct, 246 (gs, codecvt->__cd_out.__cd.__data, &from_start_copy, 247 (const unsigned char *) from_end, NULL, 248 &dummy, 0, 0)); 249 250 *from_stop = (wchar_t *) from_start_copy; 251 *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf; 252 253 switch (status) 254 { 255 case __GCONV_OK: 256 case __GCONV_EMPTY_INPUT: 257 result = __codecvt_ok; 258 break; 259 260 case __GCONV_FULL_OUTPUT: 261 case __GCONV_INCOMPLETE_INPUT: 262 result = __codecvt_partial; 263 break; 264 265 default: 266 result = __codecvt_error; 267 break; 268 } 269 #else 270 # ifdef _GLIBCPP_USE_WCHAR_T 271 size_t res; 272 const char *from_start_copy = (const char *) from_start; 273 size_t from_len = from_end - from_start; 274 char *to_start_copy = to_start; 275 size_t to_len = to_end - to_start; 276 res = iconv (codecvt->__cd_out, &from_start_copy, &from_len, 277 &to_start_copy, &to_len); 278 279 if (res == 0 || from_len == 0) 280 result = __codecvt_ok; 281 else if (to_len < codecvt->__codecvt_do_max_length (codecvt)) 282 result = __codecvt_partial; 283 else 284 result = __codecvt_error; 285 286 # else 287 /* Decide what to do. */ 288 result = __codecvt_error; 289 # endif 290 #endif 291 292 return result; 293 } 294 295 296 static enum __codecvt_result 297 do_unshift (struct _IO_codecvt *codecvt, __mbstate_t *statep, 298 char *to_start, char *to_end, char **to_stop) 299 { 300 enum __codecvt_result result; 301 302 #ifdef _LIBC 303 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps; 304 int status; 305 size_t dummy; 306 307 codecvt->__cd_out.__cd.__data[0].__outbuf = to_start; 308 codecvt->__cd_out.__cd.__data[0].__outbufend = to_end; 309 codecvt->__cd_out.__cd.__data[0].__statep = statep; 310 311 status = DL_CALL_FCT (gs->__fct, 312 (gs, codecvt->__cd_out.__cd.__data, NULL, NULL, 313 NULL, &dummy, 1, 0)); 314 315 *to_stop = codecvt->__cd_out.__cd.__data[0].__outbuf; 316 317 switch (status) 318 { 319 case __GCONV_OK: 320 case __GCONV_EMPTY_INPUT: 321 result = __codecvt_ok; 322 break; 323 324 case __GCONV_FULL_OUTPUT: 325 case __GCONV_INCOMPLETE_INPUT: 326 result = __codecvt_partial; 327 break; 328 329 default: 330 result = __codecvt_error; 331 break; 332 } 333 #else 334 # ifdef _GLIBCPP_USE_WCHAR_T 335 size_t res; 336 char *to_start_copy = (char *) to_start; 337 size_t to_len = to_end - to_start; 338 339 res = iconv (codecvt->__cd_out, NULL, NULL, &to_start_copy, &to_len); 340 341 if (res == 0) 342 result = __codecvt_ok; 343 else if (to_len < codecvt->__codecvt_do_max_length (codecvt)) 344 result = __codecvt_partial; 345 else 346 result = __codecvt_error; 347 # else 348 /* Decide what to do. */ 349 result = __codecvt_error; 350 # endif 351 #endif 352 353 return result; 354 } 355 356 357 static enum __codecvt_result 358 do_in (struct _IO_codecvt *codecvt, __mbstate_t *statep, 359 const char *from_start, const char *from_end, const char **from_stop, 360 wchar_t *to_start, wchar_t *to_end, wchar_t **to_stop) 361 { 362 enum __codecvt_result result; 363 364 #ifdef _LIBC 365 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps; 366 int status; 367 size_t dummy; 368 const unsigned char *from_start_copy = (unsigned char *) from_start; 369 370 codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_start; 371 codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) to_end; 372 codecvt->__cd_in.__cd.__data[0].__statep = statep; 373 374 status = DL_CALL_FCT (gs->__fct, 375 (gs, codecvt->__cd_in.__cd.__data, &from_start_copy, 376 from_end, NULL, &dummy, 0, 0)); 377 378 *from_stop = from_start_copy; 379 *to_stop = (wchar_t *) codecvt->__cd_in.__cd.__data[0].__outbuf; 380 381 switch (status) 382 { 383 case __GCONV_OK: 384 case __GCONV_EMPTY_INPUT: 385 result = __codecvt_ok; 386 break; 387 388 case __GCONV_FULL_OUTPUT: 389 case __GCONV_INCOMPLETE_INPUT: 390 result = __codecvt_partial; 391 break; 392 393 default: 394 result = __codecvt_error; 395 break; 396 } 397 #else 398 # ifdef _GLIBCPP_USE_WCHAR_T 399 size_t res; 400 const char *from_start_copy = (const char *) from_start; 401 size_t from_len = from_end - from_start; 402 char *to_start_copy = (char *) from_start; 403 size_t to_len = to_end - to_start; 404 405 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len, 406 &to_start_copy, &to_len); 407 408 if (res == 0) 409 result = __codecvt_ok; 410 else if (to_len == 0) 411 result = __codecvt_partial; 412 else if (from_len < codecvt->__codecvt_do_max_length (codecvt)) 413 result = __codecvt_partial; 414 else 415 result = __codecvt_error; 416 # else 417 /* Decide what to do. */ 418 result = __codecvt_error; 419 # endif 420 #endif 421 422 return result; 423 } 424 425 426 static int 427 do_encoding (struct _IO_codecvt *codecvt) 428 { 429 #ifdef _LIBC 430 /* See whether the encoding is stateful. */ 431 if (codecvt->__cd_in.__cd.__steps[0].__stateful) 432 return -1; 433 /* Fortunately not. Now determine the input bytes for the conversion 434 necessary for each wide character. */ 435 if (codecvt->__cd_in.__cd.__steps[0].__min_needed_from 436 != codecvt->__cd_in.__cd.__steps[0].__max_needed_from) 437 /* Not a constant value. */ 438 return 0; 439 440 return codecvt->__cd_in.__cd.__steps[0].__min_needed_from; 441 #else 442 /* Worst case scenario. */ 443 return -1; 444 #endif 445 } 446 447 448 static int 449 do_always_noconv (struct _IO_codecvt *codecvt) 450 { 451 return 0; 452 } 453 454 455 static int 456 do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep, 457 const char *from_start, const char *from_end, _IO_size_t max) 458 { 459 int result; 460 #ifdef _LIBC 461 const unsigned char *cp = (const unsigned char *) from_start; 462 wchar_t to_buf[max]; 463 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps; 464 int status; 465 size_t dummy; 466 467 codecvt->__cd_in.__cd.__data[0].__outbuf = (char *) to_buf; 468 codecvt->__cd_in.__cd.__data[0].__outbufend = (char *) &to_buf[max]; 469 codecvt->__cd_in.__cd.__data[0].__statep = statep; 470 471 status = DL_CALL_FCT (gs->__fct, 472 (gs, codecvt->__cd_in.__cd.__data, &cp, from_end, 473 NULL, &dummy, 0, 0)); 474 475 result = cp - (const unsigned char *) from_start; 476 #else 477 # ifdef _GLIBCPP_USE_WCHAR_T 478 const char *from_start_copy = (const char *) from_start; 479 size_t from_len = from_end - from_start; 480 wchar_t to_buf[max]; 481 size_t res; 482 char *to_start = (char *) to_buf; 483 484 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len, 485 &to_start, &max); 486 487 result = from_start_copy - (char *) from_start; 488 # else 489 /* Decide what to do. */ 490 result = 0; 491 # endif 492 #endif 493 494 return result; 495 } 496 497 498 static int 499 do_max_length (struct _IO_codecvt *codecvt) 500 { 501 #ifdef _LIBC 502 return codecvt->__cd_in.__cd.__steps[0].__max_needed_from; 503 #else 504 return MB_CUR_MAX; 505 #endif 506 } 507 #endif 508