1 /* Skeleton for a conversion module. 2 Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. 5 6 The GNU C Library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Lesser General Public 8 License as published by the Free Software Foundation; either 9 version 2.1 of the License, or (at your option) any later version. 10 11 The GNU C Library is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Lesser General Public License for more details. 15 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, write to the Free 18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 19 02111-1307 USA. */ 20 21 /* This file can be included to provide definitions of several things 22 many modules have in common. It can be customized using the following 23 macros: 24 25 DEFINE_INIT define the default initializer. This requires the 26 following symbol to be defined. 27 28 CHARSET_NAME string with official name of the coded character 29 set (in all-caps) 30 31 DEFINE_FINI define the default destructor function. 32 33 MIN_NEEDED_FROM minimal number of bytes needed for the from-charset. 34 MIN_NEEDED_TO likewise for the to-charset. 35 36 MAX_NEEDED_FROM maximal number of bytes needed for the from-charset. 37 This macro is optional, it defaults to MIN_NEEDED_FROM. 38 MAX_NEEDED_TO likewise for the to-charset. 39 40 FROM_LOOP_MIN_NEEDED_FROM 41 FROM_LOOP_MAX_NEEDED_FROM 42 minimal/maximal number of bytes needed on input 43 of one round through the FROM_LOOP. Defaults 44 to MIN_NEEDED_FROM and MAX_NEEDED_FROM, respectively. 45 FROM_LOOP_MIN_NEEDED_TO 46 FROM_LOOP_MAX_NEEDED_TO 47 minimal/maximal number of bytes needed on output 48 of one round through the FROM_LOOP. Defaults 49 to MIN_NEEDED_TO and MAX_NEEDED_TO, respectively. 50 TO_LOOP_MIN_NEEDED_FROM 51 TO_LOOP_MAX_NEEDED_FROM 52 minimal/maximal number of bytes needed on input 53 of one round through the TO_LOOP. Defaults 54 to MIN_NEEDED_TO and MAX_NEEDED_TO, respectively. 55 TO_LOOP_MIN_NEEDED_TO 56 TO_LOOP_MAX_NEEDED_TO 57 minimal/maximal number of bytes needed on output 58 of one round through the TO_LOOP. Defaults 59 to MIN_NEEDED_FROM and MAX_NEEDED_FROM, respectively. 60 61 DEFINE_DIRECTION_OBJECTS 62 two objects will be defined to be used when the 63 `gconv' function must only distinguish two 64 directions. This is implied by DEFINE_INIT. 65 If this macro is not defined the following 66 macro must be available. 67 68 FROM_DIRECTION this macro is supposed to return a value != 0 69 if we convert from the current character set, 70 otherwise it return 0. 71 72 EMIT_SHIFT_TO_INIT this symbol is optional. If it is defined it 73 defines some code which writes out a sequence 74 of bytes which bring the current state into 75 the initial state. 76 77 FROM_LOOP name of the function implementing the conversion 78 from the current character set. 79 TO_LOOP likewise for the other direction 80 81 ONE_DIRECTION optional. If defined to 1, only one conversion 82 direction is defined instead of two. In this 83 case, FROM_DIRECTION should be defined to 1, and 84 FROM_LOOP and TO_LOOP should have the same value. 85 86 SAVE_RESET_STATE in case of an error we must reset the state for 87 the rerun so this macro must be defined for 88 stateful encodings. It takes an argument which 89 is nonzero when saving. 90 91 RESET_INPUT_BUFFER If the input character sets allow this the macro 92 can be defined to reset the input buffer pointers 93 to cover only those characters up to the error. 94 95 FUNCTION_NAME if not set the conversion function is named `gconv'. 96 97 PREPARE_LOOP optional code preparing the conversion loop. Can 98 contain variable definitions. 99 END_LOOP also optional, may be used to store information 100 101 EXTRA_LOOP_ARGS optional macro specifying extra arguments passed 102 to loop function. 103 104 STORE_REST optional, needed only when MAX_NEEDED_FROM > 4. 105 This macro stores the seen but unconverted input bytes 106 in the state. 107 108 FROM_ONEBYTE optional. If defined, should be the name of a 109 specialized conversion function for a single byte 110 from the current character set to INTERNAL. This 111 function has prototype 112 wint_t 113 FROM_ONEBYTE (struct __gconv_step *, unsigned char); 114 and does a special conversion: 115 - The input is a single byte. 116 - The output is a single uint32_t. 117 - The state before the conversion is the initial state; 118 the state after the conversion is irrelevant. 119 - No transliteration. 120 - __invocation_counter = 0. 121 - __internal_use = 1. 122 - do_flush = 0. 123 124 Modules can use mbstate_t to store conversion state as follows: 125 126 * Bits 2..0 of '__count' contain the number of lookahead input bytes 127 stored in __value.__wchb. Always zero if the converter never 128 returns __GCONV_INCOMPLETE_INPUT. 129 130 * Bits 31..3 of '__count' are module dependent shift state. 131 132 * __value: When STORE_REST/UNPACK_BYTES aren't defined and when the 133 converter has returned __GCONV_INCOMPLETE_INPUT, this contains 134 at most 4 lookahead bytes. Converters with an mb_cur_max > 4 135 (currently only UTF-8) must find a way to store their state 136 in __value.__wch and define STORE_REST/UNPACK_BYTES appropriately. 137 138 When __value contains lookahead, __count must not be zero, because 139 the converter is not in the initial state then, and mbsinit() -- 140 defined as a (__count == 0) test -- must reflect this. 141 */ 142 143 #include <assert.h> 144 #include <gconv.h> 145 #include <string.h> 146 #define __need_size_t 147 #define __need_NULL 148 #include <stddef.h> 149 150 #ifndef STATIC_GCONV 151 # include <dlfcn.h> 152 #endif 153 154 #ifndef DL_CALL_FCT 155 # define DL_CALL_FCT(fct, args) fct args 156 #endif 157 158 /* The direction objects. */ 159 #if DEFINE_DIRECTION_OBJECTS || DEFINE_INIT 160 static int from_object; 161 static int to_object; 162 163 # ifndef FROM_DIRECTION 164 # define FROM_DIRECTION (step->__data == &from_object) 165 # endif 166 #else 167 # ifndef FROM_DIRECTION 168 # error "FROM_DIRECTION must be provided if direction objects are not used" 169 # endif 170 #endif 171 172 173 /* How many bytes are needed at most for the from-charset. */ 174 #ifndef MAX_NEEDED_FROM 175 # define MAX_NEEDED_FROM MIN_NEEDED_FROM 176 #endif 177 178 /* Same for the to-charset. */ 179 #ifndef MAX_NEEDED_TO 180 # define MAX_NEEDED_TO MIN_NEEDED_TO 181 #endif 182 183 /* Defaults for the per-direction min/max constants. */ 184 #ifndef FROM_LOOP_MIN_NEEDED_FROM 185 # define FROM_LOOP_MIN_NEEDED_FROM MIN_NEEDED_FROM 186 #endif 187 #ifndef FROM_LOOP_MAX_NEEDED_FROM 188 # define FROM_LOOP_MAX_NEEDED_FROM MAX_NEEDED_FROM 189 #endif 190 #ifndef FROM_LOOP_MIN_NEEDED_TO 191 # define FROM_LOOP_MIN_NEEDED_TO MIN_NEEDED_TO 192 #endif 193 #ifndef FROM_LOOP_MAX_NEEDED_TO 194 # define FROM_LOOP_MAX_NEEDED_TO MAX_NEEDED_TO 195 #endif 196 #ifndef TO_LOOP_MIN_NEEDED_FROM 197 # define TO_LOOP_MIN_NEEDED_FROM MIN_NEEDED_TO 198 #endif 199 #ifndef TO_LOOP_MAX_NEEDED_FROM 200 # define TO_LOOP_MAX_NEEDED_FROM MAX_NEEDED_TO 201 #endif 202 #ifndef TO_LOOP_MIN_NEEDED_TO 203 # define TO_LOOP_MIN_NEEDED_TO MIN_NEEDED_FROM 204 #endif 205 #ifndef TO_LOOP_MAX_NEEDED_TO 206 # define TO_LOOP_MAX_NEEDED_TO MAX_NEEDED_FROM 207 #endif 208 209 210 /* Define macros which can access unaligned buffers. These macros are 211 supposed to be used only in code outside the inner loops. For the inner 212 loops we have other definitions which allow optimized access. */ 213 #ifdef _STRING_ARCH_unaligned 214 /* We can handle unaligned memory access. */ 215 # define get16u(addr) *((__const uint16_t *) (addr)) 216 # define get32u(addr) *((__const uint32_t *) (addr)) 217 218 /* We need no special support for writing values either. */ 219 # define put16u(addr, val) *((uint16_t *) (addr)) = (val) 220 # define put32u(addr, val) *((uint32_t *) (addr)) = (val) 221 #else 222 /* Distinguish between big endian and little endian. */ 223 # if __BYTE_ORDER == __LITTLE_ENDIAN 224 # define get16u(addr) \ 225 (((__const unsigned char *) (addr))[1] << 8 \ 226 | ((__const unsigned char *) (addr))[0]) 227 # define get32u(addr) \ 228 (((((__const unsigned char *) (addr))[3] << 8 \ 229 | ((__const unsigned char *) (addr))[2]) << 8 \ 230 | ((__const unsigned char *) (addr))[1]) << 8 \ 231 | ((__const unsigned char *) (addr))[0]) 232 233 # define put16u(addr, val) \ 234 ({ uint16_t __val = (val); \ 235 ((unsigned char *) (addr))[0] = __val; \ 236 ((unsigned char *) (addr))[1] = __val >> 8; \ 237 (void) 0; }) 238 # define put32u(addr, val) \ 239 ({ uint32_t __val = (val); \ 240 ((unsigned char *) (addr))[0] = __val; \ 241 __val >>= 8; \ 242 ((unsigned char *) (addr))[1] = __val; \ 243 __val >>= 8; \ 244 ((unsigned char *) (addr))[2] = __val; \ 245 __val >>= 8; \ 246 ((unsigned char *) (addr))[3] = __val; \ 247 (void) 0; }) 248 # else 249 # define get16u(addr) \ 250 (((__const unsigned char *) (addr))[0] << 8 \ 251 | ((__const unsigned char *) (addr))[1]) 252 # define get32u(addr) \ 253 (((((__const unsigned char *) (addr))[0] << 8 \ 254 | ((__const unsigned char *) (addr))[1]) << 8 \ 255 | ((__const unsigned char *) (addr))[2]) << 8 \ 256 | ((__const unsigned char *) (addr))[3]) 257 258 # define put16u(addr, val) \ 259 ({ uint16_t __val = (val); \ 260 ((unsigned char *) (addr))[1] = __val; \ 261 ((unsigned char *) (addr))[0] = __val >> 8; \ 262 (void) 0; }) 263 # define put32u(addr, val) \ 264 ({ uint32_t __val = (val); \ 265 ((unsigned char *) (addr))[3] = __val; \ 266 __val >>= 8; \ 267 ((unsigned char *) (addr))[2] = __val; \ 268 __val >>= 8; \ 269 ((unsigned char *) (addr))[1] = __val; \ 270 __val >>= 8; \ 271 ((unsigned char *) (addr))[0] = __val; \ 272 (void) 0; }) 273 # endif 274 #endif 275 276 277 /* For conversions from a fixed width character set to another fixed width 278 character set we can define RESET_INPUT_BUFFER in a very fast way. */ 279 #if !defined RESET_INPUT_BUFFER && !defined SAVE_RESET_STATE 280 # if FROM_LOOP_MIN_NEEDED_FROM == FROM_LOOP_MAX_NEEDED_FROM \ 281 && FROM_LOOP_MIN_NEEDED_TO == FROM_LOOP_MAX_NEEDED_TO \ 282 && TO_LOOP_MIN_NEEDED_FROM == TO_LOOP_MAX_NEEDED_FROM \ 283 && TO_LOOP_MIN_NEEDED_TO == TO_LOOP_MAX_NEEDED_TO 284 /* We have to use these `if's here since the compiler cannot know that 285 (outbuf - outerr) is always divisible by FROM/TO_LOOP_MIN_NEEDED_TO. 286 The ?:1 avoids division by zero warnings that gcc 3.2 emits even for 287 obviously unreachable code. */ 288 # define RESET_INPUT_BUFFER \ 289 if (FROM_DIRECTION) \ 290 { \ 291 if (FROM_LOOP_MIN_NEEDED_FROM % FROM_LOOP_MIN_NEEDED_TO == 0) \ 292 *inptrp -= (outbuf - outerr) \ 293 * (FROM_LOOP_MIN_NEEDED_FROM / FROM_LOOP_MIN_NEEDED_TO); \ 294 else if (FROM_LOOP_MIN_NEEDED_TO % FROM_LOOP_MIN_NEEDED_FROM == 0) \ 295 *inptrp -= (outbuf - outerr) \ 296 / (FROM_LOOP_MIN_NEEDED_TO / FROM_LOOP_MIN_NEEDED_FROM \ 297 ? : 1); \ 298 else \ 299 *inptrp -= ((outbuf - outerr) / FROM_LOOP_MIN_NEEDED_TO) \ 300 * FROM_LOOP_MIN_NEEDED_FROM; \ 301 } \ 302 else \ 303 { \ 304 if (TO_LOOP_MIN_NEEDED_FROM % TO_LOOP_MIN_NEEDED_TO == 0) \ 305 *inptrp -= (outbuf - outerr) \ 306 * (TO_LOOP_MIN_NEEDED_FROM / TO_LOOP_MIN_NEEDED_TO); \ 307 else if (TO_LOOP_MIN_NEEDED_TO % TO_LOOP_MIN_NEEDED_FROM == 0) \ 308 *inptrp -= (outbuf - outerr) \ 309 / (TO_LOOP_MIN_NEEDED_TO / TO_LOOP_MIN_NEEDED_FROM ? : 1); \ 310 else \ 311 *inptrp -= ((outbuf - outerr) / TO_LOOP_MIN_NEEDED_TO) \ 312 * TO_LOOP_MIN_NEEDED_FROM; \ 313 } 314 # endif 315 #endif 316 317 318 /* The default init function. It simply matches the name and initializes 319 the step data to point to one of the objects above. */ 320 #if DEFINE_INIT 321 # ifndef CHARSET_NAME 322 # error "CHARSET_NAME not defined" 323 # endif 324 325 extern int gconv_init (struct __gconv_step *step); 326 int 327 gconv_init (struct __gconv_step *step) 328 { 329 /* Determine which direction. */ 330 if (strcmp (step->__from_name, CHARSET_NAME) == 0) 331 { 332 step->__data = &from_object; 333 334 step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM; 335 step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM; 336 step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO; 337 step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO; 338 339 #ifdef FROM_ONEBYTE 340 step->__btowc_fct = FROM_ONEBYTE; 341 #endif 342 } 343 else if (__builtin_expect (strcmp (step->__to_name, CHARSET_NAME), 0) == 0) 344 { 345 step->__data = &to_object; 346 347 step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM; 348 step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM; 349 step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO; 350 step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO; 351 } 352 else 353 return __GCONV_NOCONV; 354 355 #ifdef SAVE_RESET_STATE 356 step->__stateful = 1; 357 #else 358 step->__stateful = 0; 359 #endif 360 361 return __GCONV_OK; 362 } 363 #endif 364 365 366 /* The default destructor function does nothing in the moment and so 367 we don't define it at all. But we still provide the macro just in 368 case we need it some day. */ 369 #if DEFINE_FINI 370 #endif 371 372 373 /* If no arguments have to passed to the loop function define the macro 374 as empty. */ 375 #ifndef EXTRA_LOOP_ARGS 376 # define EXTRA_LOOP_ARGS 377 #endif 378 379 380 /* This is the actual conversion function. */ 381 #ifndef FUNCTION_NAME 382 # define FUNCTION_NAME gconv 383 #endif 384 385 /* The macros are used to access the function to convert single characters. */ 386 #define SINGLE(fct) SINGLE2 (fct) 387 #define SINGLE2(fct) fct##_single 388 389 390 extern int FUNCTION_NAME (struct __gconv_step *step, 391 struct __gconv_step_data *data, 392 const unsigned char **inptrp, 393 const unsigned char *inend, 394 unsigned char **outbufstart, size_t *irreversible, 395 int do_flush, int consume_incomplete); 396 int 397 FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data, 398 const unsigned char **inptrp, const unsigned char *inend, 399 unsigned char **outbufstart, size_t *irreversible, int do_flush, 400 int consume_incomplete) 401 { 402 struct __gconv_step *next_step = step + 1; 403 struct __gconv_step_data *next_data = data + 1; 404 __gconv_fct fct; 405 int status; 406 407 fct = (data->__flags & __GCONV_IS_LAST) ? NULL : next_step->__fct; 408 409 /* If the function is called with no input this means we have to reset 410 to the initial state. The possibly partly converted input is 411 dropped. */ 412 if (__builtin_expect (do_flush, 0)) 413 { 414 /* This should never happen during error handling. */ 415 assert (outbufstart == NULL); 416 417 status = __GCONV_OK; 418 419 #ifdef EMIT_SHIFT_TO_INIT 420 if (do_flush == 1) 421 { 422 /* We preserve the initial values of the pointer variables. */ 423 unsigned char *outbuf = data->__outbuf; 424 unsigned char *outstart = outbuf; 425 unsigned char *outend = data->__outbufend; 426 427 # ifdef PREPARE_LOOP 428 PREPARE_LOOP 429 # endif 430 431 # ifdef SAVE_RESET_STATE 432 SAVE_RESET_STATE (1); 433 # endif 434 435 /* Emit the escape sequence to reset the state. */ 436 EMIT_SHIFT_TO_INIT; 437 438 /* Call the steps down the chain if there are any but only if we 439 successfully emitted the escape sequence. This should only 440 fail if the output buffer is full. If the input is invalid 441 it should be discarded since the user wants to start from a 442 clean state. */ 443 if (status == __GCONV_OK) 444 { 445 if (data->__flags & __GCONV_IS_LAST) 446 /* Store information about how many bytes are available. */ 447 data->__outbuf = outbuf; 448 else 449 { 450 /* Write out all output which was produced. */ 451 if (outbuf > outstart) 452 { 453 const unsigned char *outerr = outstart; 454 int result; 455 456 result = DL_CALL_FCT (fct, (next_step, next_data, 457 &outerr, outbuf, NULL, 458 irreversible, 0, 459 consume_incomplete)); 460 461 if (result != __GCONV_EMPTY_INPUT) 462 { 463 if (__builtin_expect (outerr != outbuf, 0)) 464 { 465 /* We have a problem. Undo the conversion. */ 466 outbuf = outstart; 467 468 /* Restore the state. */ 469 # ifdef SAVE_RESET_STATE 470 SAVE_RESET_STATE (0); 471 # endif 472 } 473 474 /* Change the status. */ 475 status = result; 476 } 477 } 478 479 if (status == __GCONV_OK) 480 /* Now flush the remaining steps. */ 481 status = DL_CALL_FCT (fct, (next_step, next_data, NULL, 482 NULL, NULL, irreversible, 1, 483 consume_incomplete)); 484 } 485 } 486 } 487 else 488 #endif 489 { 490 /* Clear the state object. There might be bytes in there from 491 previous calls with CONSUME_INCOMPLETE == 1. But don't emit 492 escape sequences. */ 493 memset (data->__statep, '\0', sizeof (*data->__statep)); 494 495 if (! (data->__flags & __GCONV_IS_LAST)) 496 /* Now flush the remaining steps. */ 497 status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL, 498 NULL, irreversible, do_flush, 499 consume_incomplete)); 500 } 501 } 502 else 503 { 504 /* We preserve the initial values of the pointer variables. */ 505 const unsigned char *inptr = *inptrp; 506 unsigned char *outbuf = (__builtin_expect (outbufstart == NULL, 1) 507 ? data->__outbuf : *outbufstart); 508 unsigned char *outend = data->__outbufend; 509 unsigned char *outstart; 510 /* This variable is used to count the number of characters we 511 actually converted. */ 512 size_t lirreversible = 0; 513 size_t *lirreversiblep = irreversible ? &lirreversible : NULL; 514 515 /* The following assumes that encodings, which have a variable length 516 what might unalign a buffer even though it is a aligned in the 517 beginning, either don't have the minimal number of bytes as a divisor 518 of the maximum length or have a minimum length of 1. This is true 519 for all known and supported encodings. 520 We use && instead of || to combine the subexpression for the FROM 521 encoding and for the TO encoding, because usually one of them is 522 INTERNAL, for which the subexpression evaluates to 1, but INTERNAL 523 buffers are always aligned correctly. */ 524 #define POSSIBLY_UNALIGNED \ 525 (!defined _STRING_ARCH_unaligned \ 526 && (((FROM_LOOP_MIN_NEEDED_FROM != 1 \ 527 && FROM_LOOP_MAX_NEEDED_FROM % FROM_LOOP_MIN_NEEDED_FROM == 0) \ 528 && (FROM_LOOP_MIN_NEEDED_TO != 1 \ 529 && FROM_LOOP_MAX_NEEDED_TO % FROM_LOOP_MIN_NEEDED_TO == 0)) \ 530 || ((TO_LOOP_MIN_NEEDED_FROM != 1 \ 531 && TO_LOOP_MAX_NEEDED_FROM % TO_LOOP_MIN_NEEDED_FROM == 0) \ 532 && (TO_LOOP_MIN_NEEDED_TO != 1 \ 533 && TO_LOOP_MAX_NEEDED_TO % TO_LOOP_MIN_NEEDED_TO == 0)))) 534 #if POSSIBLY_UNALIGNED 535 int unaligned; 536 # define GEN_unaligned(name) GEN_unaligned2 (name) 537 # define GEN_unaligned2(name) name##_unaligned 538 #else 539 # define unaligned 0 540 #endif 541 542 #ifdef PREPARE_LOOP 543 PREPARE_LOOP 544 #endif 545 546 #if FROM_LOOP_MAX_NEEDED_FROM > 1 || TO_LOOP_MAX_NEEDED_FROM > 1 547 /* If the function is used to implement the mb*towc*() or wc*tomb*() 548 functions we must test whether any bytes from the last call are 549 stored in the `state' object. */ 550 if (((FROM_LOOP_MAX_NEEDED_FROM > 1 && TO_LOOP_MAX_NEEDED_FROM > 1) 551 || (FROM_LOOP_MAX_NEEDED_FROM > 1 && FROM_DIRECTION) 552 || (TO_LOOP_MAX_NEEDED_FROM > 1 && !FROM_DIRECTION)) 553 && consume_incomplete && (data->__statep->__count & 7) != 0) 554 { 555 /* Yep, we have some bytes left over. Process them now. 556 But this must not happen while we are called from an 557 error handler. */ 558 assert (outbufstart == NULL); 559 560 # if FROM_LOOP_MAX_NEEDED_FROM > 1 561 if (TO_LOOP_MAX_NEEDED_FROM == 1 || FROM_DIRECTION) 562 status = SINGLE(FROM_LOOP) (step, data, inptrp, inend, &outbuf, 563 outend, lirreversiblep 564 EXTRA_LOOP_ARGS); 565 # endif 566 # if !ONE_DIRECTION 567 # if FROM_LOOP_MAX_NEEDED_FROM > 1 && TO_LOOP_MAX_NEEDED_FROM > 1 568 else 569 # endif 570 # if TO_LOOP_MAX_NEEDED_FROM > 1 571 status = SINGLE(TO_LOOP) (step, data, inptrp, inend, &outbuf, 572 outend, lirreversiblep EXTRA_LOOP_ARGS); 573 # endif 574 # endif 575 576 if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK) 577 return status; 578 } 579 #endif 580 581 #if POSSIBLY_UNALIGNED 582 unaligned = 583 ((FROM_DIRECTION 584 && ((uintptr_t) inptr % FROM_LOOP_MIN_NEEDED_FROM != 0 585 || ((data->__flags & __GCONV_IS_LAST) 586 && (uintptr_t) outbuf % FROM_LOOP_MIN_NEEDED_TO != 0))) 587 || (!FROM_DIRECTION 588 && (((data->__flags & __GCONV_IS_LAST) 589 && (uintptr_t) outbuf % TO_LOOP_MIN_NEEDED_TO != 0) 590 || (uintptr_t) inptr % TO_LOOP_MIN_NEEDED_FROM != 0))); 591 #endif 592 593 while (1) 594 { 595 struct __gconv_trans_data *trans; 596 597 /* Remember the start value for this round. */ 598 inptr = *inptrp; 599 /* The outbuf buffer is empty. */ 600 outstart = outbuf; 601 602 #ifdef SAVE_RESET_STATE 603 SAVE_RESET_STATE (1); 604 #endif 605 606 if (__builtin_expect (!unaligned, 1)) 607 { 608 if (FROM_DIRECTION) 609 /* Run the conversion loop. */ 610 status = FROM_LOOP (step, data, inptrp, inend, &outbuf, outend, 611 lirreversiblep EXTRA_LOOP_ARGS); 612 else 613 /* Run the conversion loop. */ 614 status = TO_LOOP (step, data, inptrp, inend, &outbuf, outend, 615 lirreversiblep EXTRA_LOOP_ARGS); 616 } 617 #if POSSIBLY_UNALIGNED 618 else 619 { 620 if (FROM_DIRECTION) 621 /* Run the conversion loop. */ 622 status = GEN_unaligned (FROM_LOOP) (step, data, inptrp, inend, 623 &outbuf, outend, 624 lirreversiblep 625 EXTRA_LOOP_ARGS); 626 else 627 /* Run the conversion loop. */ 628 status = GEN_unaligned (TO_LOOP) (step, data, inptrp, inend, 629 &outbuf, outend, 630 lirreversiblep 631 EXTRA_LOOP_ARGS); 632 } 633 #endif 634 635 /* If we were called as part of an error handling module we 636 don't do anything else here. */ 637 if (__builtin_expect (outbufstart != NULL, 0)) 638 { 639 *outbufstart = outbuf; 640 return status; 641 } 642 643 /* Give the transliteration module the chance to store the 644 original text and the result in case it needs a context. */ 645 for (trans = data->__trans; trans != NULL; trans = trans->__next) 646 if (trans->__trans_context_fct != NULL) 647 DL_CALL_FCT (trans->__trans_context_fct, 648 (trans->__data, inptr, *inptrp, outstart, outbuf)); 649 650 /* We finished one use of the loops. */ 651 ++data->__invocation_counter; 652 653 /* If this is the last step leave the loop, there is nothing 654 we can do. */ 655 if (__builtin_expect (data->__flags & __GCONV_IS_LAST, 0)) 656 { 657 /* Store information about how many bytes are available. */ 658 data->__outbuf = outbuf; 659 660 /* Remember how many non-identical characters we 661 converted in a irreversible way. */ 662 *irreversible += lirreversible; 663 664 break; 665 } 666 667 /* Write out all output which was produced. */ 668 if (__builtin_expect (outbuf > outstart, 1)) 669 { 670 const unsigned char *outerr = data->__outbuf; 671 int result; 672 673 result = DL_CALL_FCT (fct, (next_step, next_data, &outerr, 674 outbuf, NULL, irreversible, 0, 675 consume_incomplete)); 676 677 if (result != __GCONV_EMPTY_INPUT) 678 { 679 if (__builtin_expect (outerr != outbuf, 0)) 680 { 681 #ifdef RESET_INPUT_BUFFER 682 RESET_INPUT_BUFFER; 683 #else 684 /* We have a problem with the in on of the functions 685 below. Undo the conversion upto the error point. */ 686 size_t nstatus; 687 688 /* Reload the pointers. */ 689 *inptrp = inptr; 690 outbuf = outstart; 691 692 /* Restore the state. */ 693 # ifdef SAVE_RESET_STATE 694 SAVE_RESET_STATE (0); 695 # endif 696 697 if (__builtin_expect (!unaligned, 1)) 698 { 699 if (FROM_DIRECTION) 700 /* Run the conversion loop. */ 701 nstatus = FROM_LOOP (step, data, inptrp, inend, 702 &outbuf, outerr, 703 lirreversiblep 704 EXTRA_LOOP_ARGS); 705 else 706 /* Run the conversion loop. */ 707 nstatus = TO_LOOP (step, data, inptrp, inend, 708 &outbuf, outerr, 709 lirreversiblep 710 EXTRA_LOOP_ARGS); 711 } 712 # if POSSIBLY_UNALIGNED 713 else 714 { 715 if (FROM_DIRECTION) 716 /* Run the conversion loop. */ 717 nstatus = GEN_unaligned (FROM_LOOP) (step, data, 718 inptrp, inend, 719 &outbuf, 720 outerr, 721 lirreversiblep 722 EXTRA_LOOP_ARGS); 723 else 724 /* Run the conversion loop. */ 725 nstatus = GEN_unaligned (TO_LOOP) (step, data, 726 inptrp, inend, 727 &outbuf, outerr, 728 lirreversiblep 729 EXTRA_LOOP_ARGS); 730 } 731 # endif 732 733 /* We must run out of output buffer space in this 734 rerun. */ 735 assert (outbuf == outerr); 736 assert (nstatus == __GCONV_FULL_OUTPUT); 737 738 /* If we haven't consumed a single byte decrement 739 the invocation counter. */ 740 if (__builtin_expect (outbuf == outstart, 0)) 741 --data->__invocation_counter; 742 #endif /* reset input buffer */ 743 } 744 745 /* Change the status. */ 746 status = result; 747 } 748 else 749 /* All the output is consumed, we can make another run 750 if everything was ok. */ 751 if (status == __GCONV_FULL_OUTPUT) 752 { 753 status = __GCONV_OK; 754 outbuf = data->__outbuf; 755 } 756 } 757 758 if (status != __GCONV_OK) 759 break; 760 761 /* Reset the output buffer pointer for the next round. */ 762 outbuf = data->__outbuf; 763 } 764 765 #ifdef END_LOOP 766 END_LOOP 767 #endif 768 769 /* If we are supposed to consume all character store now all of the 770 remaining characters in the `state' object. */ 771 #if FROM_LOOP_MAX_NEEDED_FROM > 1 || TO_LOOP_MAX_NEEDED_FROM > 1 772 if (((FROM_LOOP_MAX_NEEDED_FROM > 1 && TO_LOOP_MAX_NEEDED_FROM > 1) 773 || (FROM_LOOP_MAX_NEEDED_FROM > 1 && FROM_DIRECTION) 774 || (TO_LOOP_MAX_NEEDED_FROM > 1 && !FROM_DIRECTION)) 775 && __builtin_expect (consume_incomplete, 0) 776 && status == __GCONV_INCOMPLETE_INPUT) 777 { 778 # ifdef STORE_REST 779 mbstate_t *state = data->__statep; 780 781 STORE_REST 782 # else 783 size_t cnt; 784 785 /* Make sure the remaining bytes fit into the state objects 786 buffer. */ 787 assert (inend - *inptrp < 4); 788 789 for (cnt = 0; *inptrp < inend; ++cnt) 790 data->__statep->__value.__wchb[cnt] = *(*inptrp)++; 791 data->__statep->__count &= ~7; 792 data->__statep->__count |= cnt; 793 # endif 794 } 795 #endif 796 #undef unaligned 797 #undef POSSIBLY_UNALIGNED 798 } 799 800 return status; 801 } 802 803 #undef DEFINE_INIT 804 #undef CHARSET_NAME 805 #undef DEFINE_FINI 806 #undef MIN_NEEDED_FROM 807 #undef MIN_NEEDED_TO 808 #undef MAX_NEEDED_FROM 809 #undef MAX_NEEDED_TO 810 #undef FROM_LOOP_MIN_NEEDED_FROM 811 #undef FROM_LOOP_MAX_NEEDED_FROM 812 #undef FROM_LOOP_MIN_NEEDED_TO 813 #undef FROM_LOOP_MAX_NEEDED_TO 814 #undef TO_LOOP_MIN_NEEDED_FROM 815 #undef TO_LOOP_MAX_NEEDED_FROM 816 #undef TO_LOOP_MIN_NEEDED_TO 817 #undef TO_LOOP_MAX_NEEDED_TO 818 #undef DEFINE_DIRECTION_OBJECTS 819 #undef FROM_DIRECTION 820 #undef EMIT_SHIFT_TO_INIT 821 #undef FROM_LOOP 822 #undef TO_LOOP 823 #undef ONE_DIRECTION 824 #undef SAVE_RESET_STATE 825 #undef RESET_INPUT_BUFFER 826 #undef FUNCTION_NAME 827 #undef PREPARE_LOOP 828 #undef END_LOOP 829 #undef EXTRA_LOOP_ARGS 830 #undef STORE_REST 831 #undef FROM_ONEBYTE 832