1 /* 2 * Copyright (C) 2000-2002, 2005-2006 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21 /* This file defines three conversion loops: 22 - from wchar_t to anything else, 23 - from anything else to wchar_t, 24 - from wchar_t to wchar_t. 25 */ 26 27 #if HAVE_WCRTOMB || HAVE_MBRTOWC 28 # include <wchar.h> 29 # define BUF_SIZE 64 /* assume MB_LEN_MAX <= 64 */ 30 /* Some systems, like BeOS, have multibyte encodings but lack mbstate_t. */ 31 extern size_t mbrtowc (); 32 # ifdef mbstate_t 33 # define mbrtowc(pwc, s, n, ps) (mbrtowc)(pwc, s, n, 0) 34 # define mbsinit(ps) 1 35 # endif 36 # ifndef mbsinit 37 # if !HAVE_MBSINIT 38 # define mbsinit(ps) 1 39 # endif 40 # endif 41 #else 42 # ifndef mbstate_t 43 typedef int mbstate_t; 44 # endif 45 #endif 46 47 /* 48 * The first two conversion loops have an extended conversion descriptor. 49 */ 50 struct wchar_conv_struct { 51 struct conv_struct parent; 52 mbstate_t state; 53 }; 54 55 56 #if HAVE_WCRTOMB 57 58 /* From wchar_t to anything else. */ 59 60 #ifndef LIBICONV_PLUG 61 62 #if 0 63 64 struct wc_to_mb_fallback_locals { 65 struct wchar_conv_struct * l_wcd; 66 char* l_outbuf; 67 size_t l_outbytesleft; 68 int l_errno; 69 }; 70 71 /* A callback that writes a string given in the locale encoding. */ 72 static void wc_to_mb_write_replacement (const char *buf, size_t buflen, 73 void* callback_arg) 74 { 75 struct wc_to_mb_fallback_locals * plocals = 76 (struct wc_to_mb_fallback_locals *) callback_arg; 77 /* Do nothing if already encountered an error in a previous call. */ 78 if (plocals->l_errno == 0) { 79 /* Attempt to convert the passed buffer to the target encoding. 80 Here we don't support characters split across multiple calls. */ 81 const char* bufptr = buf; 82 size_t bufleft = buflen; 83 size_t res = unicode_loop_convert(&plocals->l_wcd->parent, 84 &bufptr,&bufleft, 85 &plocals->l_outbuf,&plocals->l_outbytesleft); 86 if (res == (size_t)(-1)) { 87 if (errno == EILSEQ || errno == EINVAL) 88 /* Invalid buf contents. */ 89 plocals->l_errno = EILSEQ; 90 else if (errno == E2BIG) 91 /* Output buffer too small. */ 92 plocals->l_errno = E2BIG; 93 else 94 abort(); 95 } else { 96 /* Successful conversion. */ 97 if (bufleft > 0) 98 abort(); 99 } 100 } 101 } 102 103 #else 104 105 struct wc_to_mb_fallback_locals { 106 char* l_outbuf; 107 size_t l_outbytesleft; 108 int l_errno; 109 }; 110 111 /* A callback that writes a string given in the target encoding. */ 112 static void wc_to_mb_write_replacement (const char *buf, size_t buflen, 113 void* callback_arg) 114 { 115 struct wc_to_mb_fallback_locals * plocals = 116 (struct wc_to_mb_fallback_locals *) callback_arg; 117 /* Do nothing if already encountered an error in a previous call. */ 118 if (plocals->l_errno == 0) { 119 /* Attempt to copy the passed buffer to the output buffer. */ 120 if (plocals->l_outbytesleft < buflen) 121 plocals->l_errno = E2BIG; 122 else { 123 memcpy(plocals->l_outbuf, buf, buflen); 124 plocals->l_outbuf += buflen; 125 plocals->l_outbytesleft -= buflen; 126 } 127 } 128 } 129 130 #endif 131 132 #endif /* !LIBICONV_PLUG */ 133 134 static size_t wchar_from_loop_convert (iconv_t icd, 135 const char* * inbuf, size_t *inbytesleft, 136 char* * outbuf, size_t *outbytesleft) 137 { 138 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd; 139 size_t result = 0; 140 while (*inbytesleft >= sizeof(wchar_t)) { 141 const wchar_t * inptr = (const wchar_t *) *inbuf; 142 size_t inleft = *inbytesleft; 143 char buf[BUF_SIZE]; 144 mbstate_t state = wcd->state; 145 size_t bufcount = 0; 146 while (inleft >= sizeof(wchar_t)) { 147 /* Convert one wchar_t to multibyte representation. */ 148 size_t count = wcrtomb(buf+bufcount,*inptr,&state); 149 if (count == (size_t)(-1)) { 150 /* Invalid input. */ 151 if (wcd->parent.discard_ilseq) { 152 count = 0; 153 } 154 #ifndef LIBICONV_PLUG 155 else if (wcd->parent.fallbacks.wc_to_mb_fallback != NULL) { 156 /* Drop the contents of buf[] accumulated so far, and instead 157 pass all queued wide characters to the fallback handler. */ 158 struct wc_to_mb_fallback_locals locals; 159 const wchar_t * fallback_inptr; 160 #if 0 161 locals.l_wcd = wcd; 162 #endif 163 locals.l_outbuf = *outbuf; 164 locals.l_outbytesleft = *outbytesleft; 165 locals.l_errno = 0; 166 for (fallback_inptr = (const wchar_t *) *inbuf; 167 fallback_inptr <= inptr; 168 fallback_inptr++) 169 wcd->parent.fallbacks.wc_to_mb_fallback(*fallback_inptr, 170 wc_to_mb_write_replacement, 171 &locals, 172 wcd->parent.fallbacks.data); 173 if (locals.l_errno != 0) { 174 errno = locals.l_errno; 175 return -1; 176 } 177 wcd->state = state; 178 *inbuf = (const char *) (inptr + 1); 179 *inbytesleft = inleft - sizeof(wchar_t); 180 *outbuf = locals.l_outbuf; 181 *outbytesleft = locals.l_outbytesleft; 182 result += 1; 183 break; 184 } 185 #endif 186 else { 187 errno = EILSEQ; 188 return -1; 189 } 190 } 191 inptr++; 192 inleft -= sizeof(wchar_t); 193 bufcount += count; 194 if (count == 0) { 195 /* Continue, append next wchar_t. */ 196 } else { 197 /* Attempt to convert the accumulated multibyte representations 198 to the target encoding. */ 199 const char* bufptr = buf; 200 size_t bufleft = bufcount; 201 char* outptr = *outbuf; 202 size_t outleft = *outbytesleft; 203 size_t res = unicode_loop_convert(&wcd->parent, 204 &bufptr,&bufleft, 205 &outptr,&outleft); 206 if (res == (size_t)(-1)) { 207 if (errno == EILSEQ) 208 /* Invalid input. */ 209 return -1; 210 else if (errno == E2BIG) 211 /* Output buffer too small. */ 212 return -1; 213 else if (errno == EINVAL) { 214 /* Continue, append next wchar_t, but avoid buffer overrun. */ 215 if (bufcount + MB_CUR_MAX > BUF_SIZE) 216 abort(); 217 } else 218 abort(); 219 } else { 220 /* Successful conversion. */ 221 wcd->state = state; 222 *inbuf = (const char *) inptr; 223 *inbytesleft = inleft; 224 *outbuf = outptr; 225 *outbytesleft = outleft; 226 result += res; 227 break; 228 } 229 } 230 } 231 } 232 return result; 233 } 234 235 static size_t wchar_from_loop_reset (iconv_t icd, 236 char* * outbuf, size_t *outbytesleft) 237 { 238 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd; 239 if (outbuf == NULL || *outbuf == NULL) { 240 /* Reset the states. */ 241 memset(&wcd->state,'\0',sizeof(mbstate_t)); 242 return unicode_loop_reset(&wcd->parent,NULL,NULL); 243 } else { 244 if (!mbsinit(&wcd->state)) { 245 mbstate_t state = wcd->state; 246 char buf[BUF_SIZE]; 247 size_t bufcount = wcrtomb(buf,(wchar_t)0,&state); 248 if (bufcount == (size_t)(-1) || bufcount == 0 || buf[bufcount-1] != '\0') 249 abort(); 250 else { 251 const char* bufptr = buf; 252 size_t bufleft = bufcount-1; 253 char* outptr = *outbuf; 254 size_t outleft = *outbytesleft; 255 size_t res = unicode_loop_convert(&wcd->parent, 256 &bufptr,&bufleft, 257 &outptr,&outleft); 258 if (res == (size_t)(-1)) { 259 if (errno == E2BIG) 260 return -1; 261 else 262 abort(); 263 } else { 264 res = unicode_loop_reset(&wcd->parent,&outptr,&outleft); 265 if (res == (size_t)(-1)) 266 return res; 267 else { 268 /* Successful. */ 269 wcd->state = state; 270 *outbuf = outptr; 271 *outbytesleft = outleft; 272 return 0; 273 } 274 } 275 } 276 } else 277 return unicode_loop_reset(&wcd->parent,outbuf,outbytesleft); 278 } 279 } 280 281 #endif 282 283 284 #if HAVE_MBRTOWC 285 286 /* From anything else to wchar_t. */ 287 288 #ifndef LIBICONV_PLUG 289 290 struct mb_to_wc_fallback_locals { 291 char* l_outbuf; 292 size_t l_outbytesleft; 293 int l_errno; 294 }; 295 296 static void mb_to_wc_write_replacement (const wchar_t *buf, size_t buflen, 297 void* callback_arg) 298 { 299 struct mb_to_wc_fallback_locals * plocals = 300 (struct mb_to_wc_fallback_locals *) callback_arg; 301 /* Do nothing if already encountered an error in a previous call. */ 302 if (plocals->l_errno == 0) { 303 /* Attempt to copy the passed buffer to the output buffer. */ 304 if (plocals->l_outbytesleft < sizeof(wchar_t)*buflen) 305 plocals->l_errno = E2BIG; 306 else { 307 for (; buflen > 0; buf++, buflen--) { 308 *(wchar_t*) plocals->l_outbuf = *buf; 309 plocals->l_outbuf += sizeof(wchar_t); 310 plocals->l_outbytesleft -= sizeof(wchar_t); 311 } 312 } 313 } 314 } 315 316 #endif /* !LIBICONV_PLUG */ 317 318 static size_t wchar_to_loop_convert (iconv_t icd, 319 const char* * inbuf, size_t *inbytesleft, 320 char* * outbuf, size_t *outbytesleft) 321 { 322 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd; 323 size_t result = 0; 324 while (*inbytesleft > 0) { 325 size_t incount; 326 for (incount = 1; incount <= *inbytesleft; incount++) { 327 char buf[BUF_SIZE]; 328 const char* inptr = *inbuf; 329 size_t inleft = incount; 330 char* bufptr = buf; 331 size_t bufleft = BUF_SIZE; 332 size_t res = unicode_loop_convert(&wcd->parent, 333 &inptr,&inleft, 334 &bufptr,&bufleft); 335 if (res == (size_t)(-1)) { 336 if (errno == EILSEQ) 337 /* Invalid input. */ 338 return -1; 339 else if (errno == EINVAL) { 340 /* Incomplete input. Next try with one more input byte. */ 341 } else 342 /* E2BIG shouldn't occur. */ 343 abort(); 344 } else { 345 /* Successful conversion. */ 346 size_t bufcount = bufptr-buf; /* = BUF_SIZE-bufleft */ 347 mbstate_t state = wcd->state; 348 wchar_t wc; 349 res = mbrtowc(&wc,buf,bufcount,&state); 350 if (res == (size_t)(-2)) { 351 /* Next try with one more input byte. */ 352 } else { 353 if (res == (size_t)(-1)) { 354 /* Invalid input. */ 355 if (wcd->parent.discard_ilseq) { 356 } 357 #ifndef LIBICONV_PLUG 358 else if (wcd->parent.fallbacks.mb_to_wc_fallback != NULL) { 359 /* Drop the contents of buf[] accumulated so far, and instead 360 pass all queued chars to the fallback handler. */ 361 struct mb_to_wc_fallback_locals locals; 362 locals.l_outbuf = *outbuf; 363 locals.l_outbytesleft = *outbytesleft; 364 locals.l_errno = 0; 365 wcd->parent.fallbacks.mb_to_wc_fallback(*inbuf, incount, 366 mb_to_wc_write_replacement, 367 &locals, 368 wcd->parent.fallbacks.data); 369 if (locals.l_errno != 0) { 370 errno = locals.l_errno; 371 return -1; 372 } 373 /* Restoring the state is not needed because it is the initial 374 state anyway: For all known locale encodings, the multibyte 375 to wchar_t conversion doesn't have shift state, and we have 376 excluded partial accumulated characters. */ 377 /* wcd->state = state; */ 378 *inbuf += incount; 379 *inbytesleft -= incount; 380 *outbuf = locals.l_outbuf; 381 *outbytesleft = locals.l_outbytesleft; 382 result += 1; 383 break; 384 } 385 #endif 386 else 387 return -1; 388 } else { 389 if (*outbytesleft < sizeof(wchar_t)) { 390 errno = E2BIG; 391 return -1; 392 } 393 *(wchar_t*) *outbuf = wc; 394 /* Restoring the state is not needed because it is the initial 395 state anyway: For all known locale encodings, the multibyte 396 to wchar_t conversion doesn't have shift state, and we have 397 excluded partial accumulated characters. */ 398 /* wcd->state = state; */ 399 *outbuf += sizeof(wchar_t); 400 *outbytesleft -= sizeof(wchar_t); 401 } 402 *inbuf += incount; 403 *inbytesleft -= incount; 404 result += res; 405 break; 406 } 407 } 408 } 409 } 410 return result; 411 } 412 413 static size_t wchar_to_loop_reset (iconv_t icd, 414 char* * outbuf, size_t *outbytesleft) 415 { 416 struct wchar_conv_struct * wcd = (struct wchar_conv_struct *) icd; 417 size_t res = unicode_loop_reset(&wcd->parent,outbuf,outbytesleft); 418 if (res == (size_t)(-1)) 419 return res; 420 memset(&wcd->state,0,sizeof(mbstate_t)); 421 return 0; 422 } 423 424 #endif 425 426 427 /* From wchar_t to wchar_t. */ 428 429 static size_t wchar_id_loop_convert (iconv_t icd, 430 const char* * inbuf, size_t *inbytesleft, 431 char* * outbuf, size_t *outbytesleft) 432 { 433 struct conv_struct * cd = (struct conv_struct *) icd; 434 const wchar_t* inptr = (const wchar_t*) *inbuf; 435 size_t inleft = *inbytesleft / sizeof(wchar_t); 436 wchar_t* outptr = (wchar_t*) *outbuf; 437 size_t outleft = *outbytesleft / sizeof(wchar_t); 438 size_t count = (inleft <= outleft ? inleft : outleft); 439 if (count > 0) { 440 *inbytesleft -= count * sizeof(wchar_t); 441 *outbytesleft -= count * sizeof(wchar_t); 442 do { 443 wchar_t wc = *inptr++; 444 *outptr++ = wc; 445 #ifndef LIBICONV_PLUG 446 if (cd->hooks.wc_hook) 447 (*cd->hooks.wc_hook)(wc, cd->hooks.data); 448 #endif 449 } while (--count > 0); 450 *inbuf = (const char*) inptr; 451 *outbuf = (char*) outptr; 452 } 453 return 0; 454 } 455 456 static size_t wchar_id_loop_reset (iconv_t icd, 457 char* * outbuf, size_t *outbytesleft) 458 { 459 return 0; 460 } 461