1 /* Skeleton for a conversion module.
2 Copyright (C) 1998, 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, write to the Free
18 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
19 02111-1307 USA. */
20
21 /* This file can be included to provide definitions of several things
22 many modules have in common. It can be customized using the following
23 macros:
24
25 DEFINE_INIT define the default initializer. This requires the
26 following symbol to be defined.
27
28 CHARSET_NAME string with official name of the coded character
29 set (in all-caps)
30
31 DEFINE_FINI define the default destructor function.
32
33 MIN_NEEDED_FROM minimal number of bytes needed for the from-charset.
34 MIN_NEEDED_TO likewise for the to-charset.
35
36 MAX_NEEDED_FROM maximal number of bytes needed for the from-charset.
37 This macro is optional, it defaults to MIN_NEEDED_FROM.
38 MAX_NEEDED_TO likewise for the to-charset.
39
40 FROM_LOOP_MIN_NEEDED_FROM
41 FROM_LOOP_MAX_NEEDED_FROM
42 minimal/maximal number of bytes needed on input
43 of one round through the FROM_LOOP. Defaults
44 to MIN_NEEDED_FROM and MAX_NEEDED_FROM, respectively.
45 FROM_LOOP_MIN_NEEDED_TO
46 FROM_LOOP_MAX_NEEDED_TO
47 minimal/maximal number of bytes needed on output
48 of one round through the FROM_LOOP. Defaults
49 to MIN_NEEDED_TO and MAX_NEEDED_TO, respectively.
50 TO_LOOP_MIN_NEEDED_FROM
51 TO_LOOP_MAX_NEEDED_FROM
52 minimal/maximal number of bytes needed on input
53 of one round through the TO_LOOP. Defaults
54 to MIN_NEEDED_TO and MAX_NEEDED_TO, respectively.
55 TO_LOOP_MIN_NEEDED_TO
56 TO_LOOP_MAX_NEEDED_TO
57 minimal/maximal number of bytes needed on output
58 of one round through the TO_LOOP. Defaults
59 to MIN_NEEDED_FROM and MAX_NEEDED_FROM, respectively.
60
61 DEFINE_DIRECTION_OBJECTS
62 two objects will be defined to be used when the
63 `gconv' function must only distinguish two
64 directions. This is implied by DEFINE_INIT.
65 If this macro is not defined the following
66 macro must be available.
67
68 FROM_DIRECTION this macro is supposed to return a value != 0
69 if we convert from the current character set,
70 otherwise it return 0.
71
72 EMIT_SHIFT_TO_INIT this symbol is optional. If it is defined it
73 defines some code which writes out a sequence
74 of bytes which bring the current state into
75 the initial state.
76
77 FROM_LOOP name of the function implementing the conversion
78 from the current character set.
79 TO_LOOP likewise for the other direction
80
81 ONE_DIRECTION optional. If defined to 1, only one conversion
82 direction is defined instead of two. In this
83 case, FROM_DIRECTION should be defined to 1, and
84 FROM_LOOP and TO_LOOP should have the same value.
85
86 SAVE_RESET_STATE in case of an error we must reset the state for
87 the rerun so this macro must be defined for
88 stateful encodings. It takes an argument which
89 is nonzero when saving.
90
91 RESET_INPUT_BUFFER If the input character sets allow this the macro
92 can be defined to reset the input buffer pointers
93 to cover only those characters up to the error.
94
95 FUNCTION_NAME if not set the conversion function is named `gconv'.
96
97 PREPARE_LOOP optional code preparing the conversion loop. Can
98 contain variable definitions.
99 END_LOOP also optional, may be used to store information
100
101 EXTRA_LOOP_ARGS optional macro specifying extra arguments passed
102 to loop function.
103
104 STORE_REST optional, needed only when MAX_NEEDED_FROM > 4.
105 This macro stores the seen but unconverted input bytes
106 in the state.
107
108 FROM_ONEBYTE optional. If defined, should be the name of a
109 specialized conversion function for a single byte
110 from the current character set to INTERNAL. This
111 function has prototype
112 wint_t
113 FROM_ONEBYTE (struct __gconv_step *, unsigned char);
114 and does a special conversion:
115 - The input is a single byte.
116 - The output is a single uint32_t.
117 - The state before the conversion is the initial state;
118 the state after the conversion is irrelevant.
119 - No transliteration.
120 - __invocation_counter = 0.
121 - __internal_use = 1.
122 - do_flush = 0.
123
124 Modules can use mbstate_t to store conversion state as follows:
125
126 * Bits 2..0 of '__count' contain the number of lookahead input bytes
127 stored in __value.__wchb. Always zero if the converter never
128 returns __GCONV_INCOMPLETE_INPUT.
129
130 * Bits 31..3 of '__count' are module dependent shift state.
131
132 * __value: When STORE_REST/UNPACK_BYTES aren't defined and when the
133 converter has returned __GCONV_INCOMPLETE_INPUT, this contains
134 at most 4 lookahead bytes. Converters with an mb_cur_max > 4
135 (currently only UTF-8) must find a way to store their state
136 in __value.__wch and define STORE_REST/UNPACK_BYTES appropriately.
137
138 When __value contains lookahead, __count must not be zero, because
139 the converter is not in the initial state then, and mbsinit() --
140 defined as a (__count == 0) test -- must reflect this.
141 */
142
143 #include <assert.h>
144 #include <gconv.h>
145 #include <string.h>
146 #define __need_size_t
147 #define __need_NULL
148 #include <stddef.h>
149
150 #ifndef STATIC_GCONV
151 # include <dlfcn.h>
152 #endif
153
154 #ifndef DL_CALL_FCT
155 # define DL_CALL_FCT(fct, args) fct args
156 #endif
157
158 /* The direction objects. */
159 #if DEFINE_DIRECTION_OBJECTS || DEFINE_INIT
160 static int from_object;
161 static int to_object;
162
163 # ifndef FROM_DIRECTION
164 # define FROM_DIRECTION (step->__data == &from_object)
165 # endif
166 #else
167 # ifndef FROM_DIRECTION
168 # error "FROM_DIRECTION must be provided if direction objects are not used"
169 # endif
170 #endif
171
172
173 /* How many bytes are needed at most for the from-charset. */
174 #ifndef MAX_NEEDED_FROM
175 # define MAX_NEEDED_FROM MIN_NEEDED_FROM
176 #endif
177
178 /* Same for the to-charset. */
179 #ifndef MAX_NEEDED_TO
180 # define MAX_NEEDED_TO MIN_NEEDED_TO
181 #endif
182
183 /* Defaults for the per-direction min/max constants. */
184 #ifndef FROM_LOOP_MIN_NEEDED_FROM
185 # define FROM_LOOP_MIN_NEEDED_FROM MIN_NEEDED_FROM
186 #endif
187 #ifndef FROM_LOOP_MAX_NEEDED_FROM
188 # define FROM_LOOP_MAX_NEEDED_FROM MAX_NEEDED_FROM
189 #endif
190 #ifndef FROM_LOOP_MIN_NEEDED_TO
191 # define FROM_LOOP_MIN_NEEDED_TO MIN_NEEDED_TO
192 #endif
193 #ifndef FROM_LOOP_MAX_NEEDED_TO
194 # define FROM_LOOP_MAX_NEEDED_TO MAX_NEEDED_TO
195 #endif
196 #ifndef TO_LOOP_MIN_NEEDED_FROM
197 # define TO_LOOP_MIN_NEEDED_FROM MIN_NEEDED_TO
198 #endif
199 #ifndef TO_LOOP_MAX_NEEDED_FROM
200 # define TO_LOOP_MAX_NEEDED_FROM MAX_NEEDED_TO
201 #endif
202 #ifndef TO_LOOP_MIN_NEEDED_TO
203 # define TO_LOOP_MIN_NEEDED_TO MIN_NEEDED_FROM
204 #endif
205 #ifndef TO_LOOP_MAX_NEEDED_TO
206 # define TO_LOOP_MAX_NEEDED_TO MAX_NEEDED_FROM
207 #endif
208
209
210 /* Define macros which can access unaligned buffers. These macros are
211 supposed to be used only in code outside the inner loops. For the inner
212 loops we have other definitions which allow optimized access. */
213 #ifdef _STRING_ARCH_unaligned
214 /* We can handle unaligned memory access. */
215 # define get16u(addr) *((__const uint16_t *) (addr))
216 # define get32u(addr) *((__const uint32_t *) (addr))
217
218 /* We need no special support for writing values either. */
219 # define put16u(addr, val) *((uint16_t *) (addr)) = (val)
220 # define put32u(addr, val) *((uint32_t *) (addr)) = (val)
221 #else
222 /* Distinguish between big endian and little endian. */
223 # if __BYTE_ORDER == __LITTLE_ENDIAN
224 # define get16u(addr) \
225 (((__const unsigned char *) (addr))[1] << 8 \
226 | ((__const unsigned char *) (addr))[0])
227 # define get32u(addr) \
228 (((((__const unsigned char *) (addr))[3] << 8 \
229 | ((__const unsigned char *) (addr))[2]) << 8 \
230 | ((__const unsigned char *) (addr))[1]) << 8 \
231 | ((__const unsigned char *) (addr))[0])
232
233 # define put16u(addr, val) \
234 ({ uint16_t __val = (val); \
235 ((unsigned char *) (addr))[0] = __val; \
236 ((unsigned char *) (addr))[1] = __val >> 8; \
237 (void) 0; })
238 # define put32u(addr, val) \
239 ({ uint32_t __val = (val); \
240 ((unsigned char *) (addr))[0] = __val; \
241 __val >>= 8; \
242 ((unsigned char *) (addr))[1] = __val; \
243 __val >>= 8; \
244 ((unsigned char *) (addr))[2] = __val; \
245 __val >>= 8; \
246 ((unsigned char *) (addr))[3] = __val; \
247 (void) 0; })
248 # else
249 # define get16u(addr) \
250 (((__const unsigned char *) (addr))[0] << 8 \
251 | ((__const unsigned char *) (addr))[1])
252 # define get32u(addr) \
253 (((((__const unsigned char *) (addr))[0] << 8 \
254 | ((__const unsigned char *) (addr))[1]) << 8 \
255 | ((__const unsigned char *) (addr))[2]) << 8 \
256 | ((__const unsigned char *) (addr))[3])
257
258 # define put16u(addr, val) \
259 ({ uint16_t __val = (val); \
260 ((unsigned char *) (addr))[1] = __val; \
261 ((unsigned char *) (addr))[0] = __val >> 8; \
262 (void) 0; })
263 # define put32u(addr, val) \
264 ({ uint32_t __val = (val); \
265 ((unsigned char *) (addr))[3] = __val; \
266 __val >>= 8; \
267 ((unsigned char *) (addr))[2] = __val; \
268 __val >>= 8; \
269 ((unsigned char *) (addr))[1] = __val; \
270 __val >>= 8; \
271 ((unsigned char *) (addr))[0] = __val; \
272 (void) 0; })
273 # endif
274 #endif
275
276
277 /* For conversions from a fixed width character set to another fixed width
278 character set we can define RESET_INPUT_BUFFER in a very fast way. */
279 #if !defined RESET_INPUT_BUFFER && !defined SAVE_RESET_STATE
280 # if FROM_LOOP_MIN_NEEDED_FROM == FROM_LOOP_MAX_NEEDED_FROM \
281 && FROM_LOOP_MIN_NEEDED_TO == FROM_LOOP_MAX_NEEDED_TO \
282 && TO_LOOP_MIN_NEEDED_FROM == TO_LOOP_MAX_NEEDED_FROM \
283 && TO_LOOP_MIN_NEEDED_TO == TO_LOOP_MAX_NEEDED_TO
284 /* We have to use these `if's here since the compiler cannot know that
285 (outbuf - outerr) is always divisible by FROM/TO_LOOP_MIN_NEEDED_TO.
286 The ?:1 avoids division by zero warnings that gcc 3.2 emits even for
287 obviously unreachable code. */
288 # define RESET_INPUT_BUFFER \
289 if (FROM_DIRECTION) \
290 { \
291 if (FROM_LOOP_MIN_NEEDED_FROM % FROM_LOOP_MIN_NEEDED_TO == 0) \
292 *inptrp -= (outbuf - outerr) \
293 * (FROM_LOOP_MIN_NEEDED_FROM / FROM_LOOP_MIN_NEEDED_TO); \
294 else if (FROM_LOOP_MIN_NEEDED_TO % FROM_LOOP_MIN_NEEDED_FROM == 0) \
295 *inptrp -= (outbuf - outerr) \
296 / (FROM_LOOP_MIN_NEEDED_TO / FROM_LOOP_MIN_NEEDED_FROM \
297 ? : 1); \
298 else \
299 *inptrp -= ((outbuf - outerr) / FROM_LOOP_MIN_NEEDED_TO) \
300 * FROM_LOOP_MIN_NEEDED_FROM; \
301 } \
302 else \
303 { \
304 if (TO_LOOP_MIN_NEEDED_FROM % TO_LOOP_MIN_NEEDED_TO == 0) \
305 *inptrp -= (outbuf - outerr) \
306 * (TO_LOOP_MIN_NEEDED_FROM / TO_LOOP_MIN_NEEDED_TO); \
307 else if (TO_LOOP_MIN_NEEDED_TO % TO_LOOP_MIN_NEEDED_FROM == 0) \
308 *inptrp -= (outbuf - outerr) \
309 / (TO_LOOP_MIN_NEEDED_TO / TO_LOOP_MIN_NEEDED_FROM ? : 1); \
310 else \
311 *inptrp -= ((outbuf - outerr) / TO_LOOP_MIN_NEEDED_TO) \
312 * TO_LOOP_MIN_NEEDED_FROM; \
313 }
314 # endif
315 #endif
316
317
318 /* The default init function. It simply matches the name and initializes
319 the step data to point to one of the objects above. */
320 #if DEFINE_INIT
321 # ifndef CHARSET_NAME
322 # error "CHARSET_NAME not defined"
323 # endif
324
325 extern int gconv_init (struct __gconv_step *step);
326 int
gconv_init(struct __gconv_step * step)327 gconv_init (struct __gconv_step *step)
328 {
329 /* Determine which direction. */
330 if (strcmp (step->__from_name, CHARSET_NAME) == 0)
331 {
332 step->__data = &from_object;
333
334 step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM;
335 step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM;
336 step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO;
337 step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO;
338
339 #ifdef FROM_ONEBYTE
340 step->__btowc_fct = FROM_ONEBYTE;
341 #endif
342 }
343 else if (__builtin_expect (strcmp (step->__to_name, CHARSET_NAME), 0) == 0)
344 {
345 step->__data = &to_object;
346
347 step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM;
348 step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM;
349 step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO;
350 step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO;
351 }
352 else
353 return __GCONV_NOCONV;
354
355 #ifdef SAVE_RESET_STATE
356 step->__stateful = 1;
357 #else
358 step->__stateful = 0;
359 #endif
360
361 return __GCONV_OK;
362 }
363 #endif
364
365
366 /* The default destructor function does nothing in the moment and so
367 we don't define it at all. But we still provide the macro just in
368 case we need it some day. */
369 #if DEFINE_FINI
370 #endif
371
372
373 /* If no arguments have to passed to the loop function define the macro
374 as empty. */
375 #ifndef EXTRA_LOOP_ARGS
376 # define EXTRA_LOOP_ARGS
377 #endif
378
379
380 /* This is the actual conversion function. */
381 #ifndef FUNCTION_NAME
382 # define FUNCTION_NAME gconv
383 #endif
384
385 /* The macros are used to access the function to convert single characters. */
386 #define SINGLE(fct) SINGLE2 (fct)
387 #define SINGLE2(fct) fct##_single
388
389
390 extern int FUNCTION_NAME (struct __gconv_step *step,
391 struct __gconv_step_data *data,
392 const unsigned char **inptrp,
393 const unsigned char *inend,
394 unsigned char **outbufstart, size_t *irreversible,
395 int do_flush, int consume_incomplete);
396 int
FUNCTION_NAME(struct __gconv_step * step,struct __gconv_step_data * data,const unsigned char ** inptrp,const unsigned char * inend,unsigned char ** outbufstart,size_t * irreversible,int do_flush,int consume_incomplete)397 FUNCTION_NAME (struct __gconv_step *step, struct __gconv_step_data *data,
398 const unsigned char **inptrp, const unsigned char *inend,
399 unsigned char **outbufstart, size_t *irreversible, int do_flush,
400 int consume_incomplete)
401 {
402 struct __gconv_step *next_step = step + 1;
403 struct __gconv_step_data *next_data = data + 1;
404 __gconv_fct fct;
405 int status;
406
407 fct = (data->__flags & __GCONV_IS_LAST) ? NULL : next_step->__fct;
408
409 /* If the function is called with no input this means we have to reset
410 to the initial state. The possibly partly converted input is
411 dropped. */
412 if (__builtin_expect (do_flush, 0))
413 {
414 /* This should never happen during error handling. */
415 assert (outbufstart == NULL);
416
417 status = __GCONV_OK;
418
419 #ifdef EMIT_SHIFT_TO_INIT
420 if (do_flush == 1)
421 {
422 /* We preserve the initial values of the pointer variables. */
423 unsigned char *outbuf = data->__outbuf;
424 unsigned char *outstart = outbuf;
425 unsigned char *outend = data->__outbufend;
426
427 # ifdef PREPARE_LOOP
428 PREPARE_LOOP
429 # endif
430
431 # ifdef SAVE_RESET_STATE
432 SAVE_RESET_STATE (1);
433 # endif
434
435 /* Emit the escape sequence to reset the state. */
436 EMIT_SHIFT_TO_INIT;
437
438 /* Call the steps down the chain if there are any but only if we
439 successfully emitted the escape sequence. This should only
440 fail if the output buffer is full. If the input is invalid
441 it should be discarded since the user wants to start from a
442 clean state. */
443 if (status == __GCONV_OK)
444 {
445 if (data->__flags & __GCONV_IS_LAST)
446 /* Store information about how many bytes are available. */
447 data->__outbuf = outbuf;
448 else
449 {
450 /* Write out all output which was produced. */
451 if (outbuf > outstart)
452 {
453 const unsigned char *outerr = outstart;
454 int result;
455
456 result = DL_CALL_FCT (fct, (next_step, next_data,
457 &outerr, outbuf, NULL,
458 irreversible, 0,
459 consume_incomplete));
460
461 if (result != __GCONV_EMPTY_INPUT)
462 {
463 if (__builtin_expect (outerr != outbuf, 0))
464 {
465 /* We have a problem. Undo the conversion. */
466 outbuf = outstart;
467
468 /* Restore the state. */
469 # ifdef SAVE_RESET_STATE
470 SAVE_RESET_STATE (0);
471 # endif
472 }
473
474 /* Change the status. */
475 status = result;
476 }
477 }
478
479 if (status == __GCONV_OK)
480 /* Now flush the remaining steps. */
481 status = DL_CALL_FCT (fct, (next_step, next_data, NULL,
482 NULL, NULL, irreversible, 1,
483 consume_incomplete));
484 }
485 }
486 }
487 else
488 #endif
489 {
490 /* Clear the state object. There might be bytes in there from
491 previous calls with CONSUME_INCOMPLETE == 1. But don't emit
492 escape sequences. */
493 memset (data->__statep, '\0', sizeof (*data->__statep));
494
495 if (! (data->__flags & __GCONV_IS_LAST))
496 /* Now flush the remaining steps. */
497 status = DL_CALL_FCT (fct, (next_step, next_data, NULL, NULL,
498 NULL, irreversible, do_flush,
499 consume_incomplete));
500 }
501 }
502 else
503 {
504 /* We preserve the initial values of the pointer variables. */
505 const unsigned char *inptr = *inptrp;
506 unsigned char *outbuf = (__builtin_expect (outbufstart == NULL, 1)
507 ? data->__outbuf : *outbufstart);
508 unsigned char *outend = data->__outbufend;
509 unsigned char *outstart;
510 /* This variable is used to count the number of characters we
511 actually converted. */
512 size_t lirreversible = 0;
513 size_t *lirreversiblep = irreversible ? &lirreversible : NULL;
514
515 /* The following assumes that encodings, which have a variable length
516 what might unalign a buffer even though it is a aligned in the
517 beginning, either don't have the minimal number of bytes as a divisor
518 of the maximum length or have a minimum length of 1. This is true
519 for all known and supported encodings.
520 We use && instead of || to combine the subexpression for the FROM
521 encoding and for the TO encoding, because usually one of them is
522 INTERNAL, for which the subexpression evaluates to 1, but INTERNAL
523 buffers are always aligned correctly. */
524 #define POSSIBLY_UNALIGNED \
525 (!defined _STRING_ARCH_unaligned \
526 && (((FROM_LOOP_MIN_NEEDED_FROM != 1 \
527 && FROM_LOOP_MAX_NEEDED_FROM % FROM_LOOP_MIN_NEEDED_FROM == 0) \
528 && (FROM_LOOP_MIN_NEEDED_TO != 1 \
529 && FROM_LOOP_MAX_NEEDED_TO % FROM_LOOP_MIN_NEEDED_TO == 0)) \
530 || ((TO_LOOP_MIN_NEEDED_FROM != 1 \
531 && TO_LOOP_MAX_NEEDED_FROM % TO_LOOP_MIN_NEEDED_FROM == 0) \
532 && (TO_LOOP_MIN_NEEDED_TO != 1 \
533 && TO_LOOP_MAX_NEEDED_TO % TO_LOOP_MIN_NEEDED_TO == 0))))
534 #if POSSIBLY_UNALIGNED
535 int unaligned;
536 # define GEN_unaligned(name) GEN_unaligned2 (name)
537 # define GEN_unaligned2(name) name##_unaligned
538 #else
539 # define unaligned 0
540 #endif
541
542 #ifdef PREPARE_LOOP
543 PREPARE_LOOP
544 #endif
545
546 #if FROM_LOOP_MAX_NEEDED_FROM > 1 || TO_LOOP_MAX_NEEDED_FROM > 1
547 /* If the function is used to implement the mb*towc*() or wc*tomb*()
548 functions we must test whether any bytes from the last call are
549 stored in the `state' object. */
550 if (((FROM_LOOP_MAX_NEEDED_FROM > 1 && TO_LOOP_MAX_NEEDED_FROM > 1)
551 || (FROM_LOOP_MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
552 || (TO_LOOP_MAX_NEEDED_FROM > 1 && !FROM_DIRECTION))
553 && consume_incomplete && (data->__statep->__count & 7) != 0)
554 {
555 /* Yep, we have some bytes left over. Process them now.
556 But this must not happen while we are called from an
557 error handler. */
558 assert (outbufstart == NULL);
559
560 # if FROM_LOOP_MAX_NEEDED_FROM > 1
561 if (TO_LOOP_MAX_NEEDED_FROM == 1 || FROM_DIRECTION)
562 status = SINGLE(FROM_LOOP) (step, data, inptrp, inend, &outbuf,
563 outend, lirreversiblep
564 EXTRA_LOOP_ARGS);
565 # endif
566 # if !ONE_DIRECTION
567 # if FROM_LOOP_MAX_NEEDED_FROM > 1 && TO_LOOP_MAX_NEEDED_FROM > 1
568 else
569 # endif
570 # if TO_LOOP_MAX_NEEDED_FROM > 1
571 status = SINGLE(TO_LOOP) (step, data, inptrp, inend, &outbuf,
572 outend, lirreversiblep EXTRA_LOOP_ARGS);
573 # endif
574 # endif
575
576 if (__builtin_expect (status, __GCONV_OK) != __GCONV_OK)
577 return status;
578 }
579 #endif
580
581 #if POSSIBLY_UNALIGNED
582 unaligned =
583 ((FROM_DIRECTION
584 && ((uintptr_t) inptr % FROM_LOOP_MIN_NEEDED_FROM != 0
585 || ((data->__flags & __GCONV_IS_LAST)
586 && (uintptr_t) outbuf % FROM_LOOP_MIN_NEEDED_TO != 0)))
587 || (!FROM_DIRECTION
588 && (((data->__flags & __GCONV_IS_LAST)
589 && (uintptr_t) outbuf % TO_LOOP_MIN_NEEDED_TO != 0)
590 || (uintptr_t) inptr % TO_LOOP_MIN_NEEDED_FROM != 0)));
591 #endif
592
593 while (1)
594 {
595 struct __gconv_trans_data *trans;
596
597 /* Remember the start value for this round. */
598 inptr = *inptrp;
599 /* The outbuf buffer is empty. */
600 outstart = outbuf;
601
602 #ifdef SAVE_RESET_STATE
603 SAVE_RESET_STATE (1);
604 #endif
605
606 if (__builtin_expect (!unaligned, 1))
607 {
608 if (FROM_DIRECTION)
609 /* Run the conversion loop. */
610 status = FROM_LOOP (step, data, inptrp, inend, &outbuf, outend,
611 lirreversiblep EXTRA_LOOP_ARGS);
612 else
613 /* Run the conversion loop. */
614 status = TO_LOOP (step, data, inptrp, inend, &outbuf, outend,
615 lirreversiblep EXTRA_LOOP_ARGS);
616 }
617 #if POSSIBLY_UNALIGNED
618 else
619 {
620 if (FROM_DIRECTION)
621 /* Run the conversion loop. */
622 status = GEN_unaligned (FROM_LOOP) (step, data, inptrp, inend,
623 &outbuf, outend,
624 lirreversiblep
625 EXTRA_LOOP_ARGS);
626 else
627 /* Run the conversion loop. */
628 status = GEN_unaligned (TO_LOOP) (step, data, inptrp, inend,
629 &outbuf, outend,
630 lirreversiblep
631 EXTRA_LOOP_ARGS);
632 }
633 #endif
634
635 /* If we were called as part of an error handling module we
636 don't do anything else here. */
637 if (__builtin_expect (outbufstart != NULL, 0))
638 {
639 *outbufstart = outbuf;
640 return status;
641 }
642
643 /* Give the transliteration module the chance to store the
644 original text and the result in case it needs a context. */
645 for (trans = data->__trans; trans != NULL; trans = trans->__next)
646 if (trans->__trans_context_fct != NULL)
647 DL_CALL_FCT (trans->__trans_context_fct,
648 (trans->__data, inptr, *inptrp, outstart, outbuf));
649
650 /* We finished one use of the loops. */
651 ++data->__invocation_counter;
652
653 /* If this is the last step leave the loop, there is nothing
654 we can do. */
655 if (__builtin_expect (data->__flags & __GCONV_IS_LAST, 0))
656 {
657 /* Store information about how many bytes are available. */
658 data->__outbuf = outbuf;
659
660 /* Remember how many non-identical characters we
661 converted in a irreversible way. */
662 *irreversible += lirreversible;
663
664 break;
665 }
666
667 /* Write out all output which was produced. */
668 if (__builtin_expect (outbuf > outstart, 1))
669 {
670 const unsigned char *outerr = data->__outbuf;
671 int result;
672
673 result = DL_CALL_FCT (fct, (next_step, next_data, &outerr,
674 outbuf, NULL, irreversible, 0,
675 consume_incomplete));
676
677 if (result != __GCONV_EMPTY_INPUT)
678 {
679 if (__builtin_expect (outerr != outbuf, 0))
680 {
681 #ifdef RESET_INPUT_BUFFER
682 RESET_INPUT_BUFFER;
683 #else
684 /* We have a problem with the in on of the functions
685 below. Undo the conversion upto the error point. */
686 size_t nstatus;
687
688 /* Reload the pointers. */
689 *inptrp = inptr;
690 outbuf = outstart;
691
692 /* Restore the state. */
693 # ifdef SAVE_RESET_STATE
694 SAVE_RESET_STATE (0);
695 # endif
696
697 if (__builtin_expect (!unaligned, 1))
698 {
699 if (FROM_DIRECTION)
700 /* Run the conversion loop. */
701 nstatus = FROM_LOOP (step, data, inptrp, inend,
702 &outbuf, outerr,
703 lirreversiblep
704 EXTRA_LOOP_ARGS);
705 else
706 /* Run the conversion loop. */
707 nstatus = TO_LOOP (step, data, inptrp, inend,
708 &outbuf, outerr,
709 lirreversiblep
710 EXTRA_LOOP_ARGS);
711 }
712 # if POSSIBLY_UNALIGNED
713 else
714 {
715 if (FROM_DIRECTION)
716 /* Run the conversion loop. */
717 nstatus = GEN_unaligned (FROM_LOOP) (step, data,
718 inptrp, inend,
719 &outbuf,
720 outerr,
721 lirreversiblep
722 EXTRA_LOOP_ARGS);
723 else
724 /* Run the conversion loop. */
725 nstatus = GEN_unaligned (TO_LOOP) (step, data,
726 inptrp, inend,
727 &outbuf, outerr,
728 lirreversiblep
729 EXTRA_LOOP_ARGS);
730 }
731 # endif
732
733 /* We must run out of output buffer space in this
734 rerun. */
735 assert (outbuf == outerr);
736 assert (nstatus == __GCONV_FULL_OUTPUT);
737
738 /* If we haven't consumed a single byte decrement
739 the invocation counter. */
740 if (__builtin_expect (outbuf == outstart, 0))
741 --data->__invocation_counter;
742 #endif /* reset input buffer */
743 }
744
745 /* Change the status. */
746 status = result;
747 }
748 else
749 /* All the output is consumed, we can make another run
750 if everything was ok. */
751 if (status == __GCONV_FULL_OUTPUT)
752 {
753 status = __GCONV_OK;
754 outbuf = data->__outbuf;
755 }
756 }
757
758 if (status != __GCONV_OK)
759 break;
760
761 /* Reset the output buffer pointer for the next round. */
762 outbuf = data->__outbuf;
763 }
764
765 #ifdef END_LOOP
766 END_LOOP
767 #endif
768
769 /* If we are supposed to consume all character store now all of the
770 remaining characters in the `state' object. */
771 #if FROM_LOOP_MAX_NEEDED_FROM > 1 || TO_LOOP_MAX_NEEDED_FROM > 1
772 if (((FROM_LOOP_MAX_NEEDED_FROM > 1 && TO_LOOP_MAX_NEEDED_FROM > 1)
773 || (FROM_LOOP_MAX_NEEDED_FROM > 1 && FROM_DIRECTION)
774 || (TO_LOOP_MAX_NEEDED_FROM > 1 && !FROM_DIRECTION))
775 && __builtin_expect (consume_incomplete, 0)
776 && status == __GCONV_INCOMPLETE_INPUT)
777 {
778 # ifdef STORE_REST
779 mbstate_t *state = data->__statep;
780
781 STORE_REST
782 # else
783 size_t cnt;
784
785 /* Make sure the remaining bytes fit into the state objects
786 buffer. */
787 assert (inend - *inptrp < 4);
788
789 for (cnt = 0; *inptrp < inend; ++cnt)
790 data->__statep->__value.__wchb[cnt] = *(*inptrp)++;
791 data->__statep->__count &= ~7;
792 data->__statep->__count |= cnt;
793 # endif
794 }
795 #endif
796 #undef unaligned
797 #undef POSSIBLY_UNALIGNED
798 }
799
800 return status;
801 }
802
803 #undef DEFINE_INIT
804 #undef CHARSET_NAME
805 #undef DEFINE_FINI
806 #undef MIN_NEEDED_FROM
807 #undef MIN_NEEDED_TO
808 #undef MAX_NEEDED_FROM
809 #undef MAX_NEEDED_TO
810 #undef FROM_LOOP_MIN_NEEDED_FROM
811 #undef FROM_LOOP_MAX_NEEDED_FROM
812 #undef FROM_LOOP_MIN_NEEDED_TO
813 #undef FROM_LOOP_MAX_NEEDED_TO
814 #undef TO_LOOP_MIN_NEEDED_FROM
815 #undef TO_LOOP_MAX_NEEDED_FROM
816 #undef TO_LOOP_MIN_NEEDED_TO
817 #undef TO_LOOP_MAX_NEEDED_TO
818 #undef DEFINE_DIRECTION_OBJECTS
819 #undef FROM_DIRECTION
820 #undef EMIT_SHIFT_TO_INIT
821 #undef FROM_LOOP
822 #undef TO_LOOP
823 #undef ONE_DIRECTION
824 #undef SAVE_RESET_STATE
825 #undef RESET_INPUT_BUFFER
826 #undef FUNCTION_NAME
827 #undef PREPARE_LOOP
828 #undef END_LOOP
829 #undef EXTRA_LOOP_ARGS
830 #undef STORE_REST
831 #undef FROM_ONEBYTE
832