1 /* Copyright (C) 1991-1999, 2000, 2001 Free Software Foundation, Inc. 2 This file is part of the GNU C Library. 3 4 The GNU C Library is free software; you can redistribute it and/or 5 modify it under the terms of the GNU Lesser General Public 6 License as published by the Free Software Foundation; either 7 version 2.1 of the License, or (at your option) any later version. 8 9 The GNU C Library is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 Lesser General Public License for more details. 13 14 You should have received a copy of the GNU Lesser General Public 15 License along with the GNU C Library; if not, write to the Free 16 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 17 02111-1307 USA. */ 18 19 #include <assert.h> 20 #include <errno.h> 21 #include <limits.h> 22 #include <ctype.h> 23 #include <stdarg.h> 24 #include <stdio.h> 25 #include <stdint.h> 26 #include <stdlib.h> 27 #include <string.h> 28 #include <wchar.h> 29 #include <wctype.h> 30 #include <bits/libc-lock.h> 31 #include <locale/localeinfo.h> 32 33 #ifdef __GNUC__ 34 # define HAVE_LONGLONG 35 # define LONGLONG long long 36 #else 37 # define LONGLONG long 38 #endif 39 40 /* Determine whether we have to handle `long long' at all. */ 41 #if LONG_MAX == LONG_LONG_MAX 42 # define need_longlong 0 43 #else 44 # define need_longlong 1 45 #endif 46 47 /* Determine whether we have to handle `long'. */ 48 #if INT_MAX == LONG_MAX 49 # define need_long 0 50 #else 51 # define need_long 1 52 #endif 53 54 /* Those are flags in the conversion format. */ 55 #define LONG 0x001 /* l: long or double */ 56 #define LONGDBL 0x002 /* L: long long or long double */ 57 #define SHORT 0x004 /* h: short */ 58 #define SUPPRESS 0x008 /* *: suppress assignment */ 59 #define POINTER 0x010 /* weird %p pointer (`fake hex') */ 60 #define NOSKIP 0x020 /* do not skip blanks */ 61 #define WIDTH 0x040 /* width was given */ 62 #define GROUP 0x080 /* ': group numbers */ 63 #define MALLOC 0x100 /* a: malloc strings */ 64 #define CHAR 0x200 /* hh: char */ 65 #define I18N 0x400 /* I: use locale's digits */ 66 67 68 #ifdef USE_IN_LIBIO 69 # include <libioP.h> 70 # include <libio.h> 71 72 # undef va_list 73 # define va_list _IO_va_list 74 75 # ifdef COMPILE_WSCANF 76 # define ungetc(c, s) ((void) (c == WEOF \ 77 || (--read_in, \ 78 _IO_sputbackwc (s, c)))) 79 # define ungetc_not_eof(c, s) ((void) (--read_in, \ 80 _IO_sputbackwc (s, c))) 81 # define inchar() (c == WEOF ? WEOF \ 82 : ((c = _IO_getwc_unlocked (s)), \ 83 (void) (c != WEOF && ++read_in), c)) 84 85 # define MEMCPY(d, s, n) __wmemcpy (d, s, n) 86 # define ISSPACE(Ch) iswspace (Ch) 87 # define ISDIGIT(Ch) iswdigit (Ch) 88 # define ISXDIGIT(Ch) iswxdigit (Ch) 89 # define TOLOWER(Ch) towlower (Ch) 90 # define ORIENT if (_IO_fwide (s, 1) != 1) return WEOF 91 # define __strtoll_internal __wcstoll_internal 92 # define __strtoull_internal __wcstoull_internal 93 # define __strtol_internal __wcstol_internal 94 # define __strtoul_internal __wcstoul_internal 95 # define __strtold_internal __wcstold_internal 96 # define __strtod_internal __wcstod_internal 97 # define __strtof_internal __wcstof_internal 98 99 # define L_(Str) L##Str 100 # define CHAR_T wchar_t 101 # define UCHAR_T unsigned int 102 # define WINT_T wint_t 103 # undef EOF 104 # define EOF WEOF 105 # else 106 # define ungetc(c, s) ((void) ((int) c == EOF \ 107 || (--read_in, \ 108 _IO_sputbackc (s, (unsigned char) c)))) 109 # define ungetc_not_eof(c, s) ((void) (--read_in, \ 110 _IO_sputbackc (s, (unsigned char) c))) 111 # define inchar() (c == EOF ? EOF \ 112 : ((c = _IO_getc_unlocked (s)), \ 113 (void) (c != EOF && ++read_in), c)) 114 # define MEMCPY(d, s, n) memcpy (d, s, n) 115 # define ISSPACE(Ch) isspace (Ch) 116 # define ISDIGIT(Ch) isdigit (Ch) 117 # define ISXDIGIT(Ch) isxdigit (Ch) 118 # define TOLOWER(Ch) tolower (Ch) 119 # define ORIENT if (s->_vtable_offset == 0 \ 120 && _IO_fwide (s, -1) != -1) \ 121 return EOF 122 123 # define L_(Str) Str 124 # define CHAR_T char 125 # define UCHAR_T unsigned char 126 # define WINT_T int 127 # endif 128 129 # define encode_error() do { \ 130 if (errp != NULL) *errp |= 4; \ 131 _IO_funlockfile (s); \ 132 __libc_cleanup_end (0); \ 133 __set_errno (EILSEQ); \ 134 return done; \ 135 } while (0) 136 # define conv_error() do { \ 137 if (errp != NULL) *errp |= 2; \ 138 _IO_funlockfile (s); \ 139 __libc_cleanup_end (0); \ 140 return done; \ 141 } while (0) 142 # define input_error() do { \ 143 _IO_funlockfile (s); \ 144 if (errp != NULL) *errp |= 1; \ 145 __libc_cleanup_end (0); \ 146 return done ?: EOF; \ 147 } while (0) 148 # define memory_error() do { \ 149 _IO_funlockfile (s); \ 150 __set_errno (ENOMEM); \ 151 __libc_cleanup_end (0); \ 152 return EOF; \ 153 } while (0) 154 # define ARGCHECK(s, format) \ 155 do \ 156 { \ 157 /* Check file argument for consistence. */ \ 158 CHECK_FILE (s, EOF); \ 159 if (s->_flags & _IO_NO_READS) \ 160 { \ 161 __set_errno (EBADF); \ 162 return EOF; \ 163 } \ 164 else if (format == NULL) \ 165 { \ 166 MAYBE_SET_EINVAL; \ 167 return EOF; \ 168 } \ 169 } while (0) 170 # define LOCK_STREAM(S) \ 171 __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \ 172 _IO_flockfile (S) 173 # define UNLOCK_STREAM(S) \ 174 _IO_funlockfile (S); \ 175 __libc_cleanup_region_end (0) 176 #else 177 # define ungetc(c, s) ((void) (c != EOF && --read_in), ungetc (c, s)) 178 # define ungetc_not_eof(c, s) (--read_in, (ungetc) (c, s)) 179 # define inchar() (c == EOF ? EOF \ 180 : ((c = getc (s)), (void) (c != EOF && ++read_in), c)) 181 # define MEMCPY(d, s, n) memcpy (d, s, n) 182 # define ISSPACE(Ch) isspace (Ch) 183 # define ISDIGIT(Ch) isdigit (Ch) 184 # define ISXDIGIT(Ch) isxdigit (Ch) 185 # define TOLOWER(Ch) tolower (Ch) 186 187 # define L_(Str) Str 188 # define CHAR_T char 189 # define UCHAR_T unsigned char 190 # define WINT_T int 191 192 # define encode_error() do { \ 193 funlockfile (s); \ 194 __set_errno (EILSEQ); \ 195 return done; \ 196 } while (0) 197 # define conv_error() do { \ 198 funlockfile (s); \ 199 return done; \ 200 } while (0) 201 # define input_error() do { \ 202 funlockfile (s); \ 203 return done ?: EOF; \ 204 } while (0) 205 # define memory_error() do { \ 206 funlockfile (s); \ 207 __set_errno (ENOMEM); \ 208 return EOF; \ 209 } while (0) 210 # define ARGCHECK(s, format) \ 211 do \ 212 { \ 213 /* Check file argument for consistence. */ \ 214 if (!__validfp (s) || !s->__mode.__read) \ 215 { \ 216 __set_errno (EBADF); \ 217 return EOF; \ 218 } \ 219 else if (format == NULL) \ 220 { \ 221 __set_errno (EINVAL); \ 222 return EOF; \ 223 } \ 224 } while (0) 225 #if 1 226 /* XXX For now !!! */ 227 # define flockfile(S) /* nothing */ 228 # define funlockfile(S) /* nothing */ 229 # define LOCK_STREAM(S) 230 # define UNLOCK_STREAM(S) 231 #else 232 # define LOCK_STREAM(S) \ 233 __libc_cleanup_region_start (&__funlockfile, (S)); \ 234 __flockfile (S) 235 # define UNLOCK_STREAM(S) \ 236 __funlockfile (S); \ 237 __libc_cleanup_region_end (0) 238 #endif 239 #endif 240 241 242 /* Read formatted input from S according to the format string 243 FORMAT, using the argument list in ARG. 244 Return the number of assignments made, or -1 for an input error. */ 245 #ifdef USE_IN_LIBIO 246 # ifdef COMPILE_WSCANF 247 int 248 _IO_vfwscanf (s, format, argptr, errp) 249 _IO_FILE *s; 250 const wchar_t *format; 251 _IO_va_list argptr; 252 int *errp; 253 # else 254 int 255 _IO_vfscanf (s, format, argptr, errp) 256 _IO_FILE *s; 257 const char *format; 258 _IO_va_list argptr; 259 int *errp; 260 # endif 261 #else 262 int 263 __vfscanf (FILE *s, const char *format, va_list argptr) 264 #endif 265 { 266 va_list arg; 267 register const CHAR_T *f = format; 268 register UCHAR_T fc; /* Current character of the format. */ 269 register WINT_T done = 0; /* Assignments done. */ 270 register size_t read_in = 0; /* Chars read in. */ 271 register WINT_T c = 0; /* Last char read. */ 272 register int width; /* Maximum field width. */ 273 register int flags; /* Modifiers for current format element. */ 274 275 /* Status for reading F-P nums. */ 276 char got_dot, got_e, negative; 277 /* If a [...] is a [^...]. */ 278 CHAR_T not_in; 279 #define exp_char not_in 280 /* Base for integral numbers. */ 281 int base; 282 /* Signedness for integral numbers. */ 283 int number_signed; 284 #define is_hexa number_signed 285 /* Decimal point character. */ 286 #ifdef COMPILE_WSCANF 287 wchar_t decimal; 288 #else 289 const char *decimal; 290 #endif 291 /* The thousands character of the current locale. */ 292 #ifdef COMPILE_WSCANF 293 wchar_t thousands; 294 #else 295 const char *thousands; 296 #endif 297 /* State for the conversions. */ 298 mbstate_t state; 299 /* Integral holding variables. */ 300 union 301 { 302 long long int q; 303 unsigned long long int uq; 304 long int l; 305 unsigned long int ul; 306 } num; 307 /* Character-buffer pointer. */ 308 char *str = NULL; 309 wchar_t *wstr = NULL; 310 char **strptr = NULL; 311 ssize_t strsize = 0; 312 /* We must not react on white spaces immediately because they can 313 possibly be matched even if in the input stream no character is 314 available anymore. */ 315 int skip_space = 0; 316 /* Nonzero if we are reading a pointer. */ 317 int read_pointer; 318 /* Workspace. */ 319 CHAR_T *tw; /* Temporary pointer. */ 320 CHAR_T *wp = NULL; /* Workspace. */ 321 size_t wpmax = 0; /* Maximal size of workspace. */ 322 size_t wpsize; /* Currently used bytes in workspace. */ 323 #define ADDW(Ch) \ 324 do \ 325 { \ 326 if (wpsize == wpmax) \ 327 { \ 328 CHAR_T *old = wp; \ 329 wpmax = (UCHAR_MAX + 1 > 2 * wpmax ? UCHAR_MAX + 1 : 2 * wpmax); \ 330 wp = (CHAR_T *) alloca (wpmax * sizeof (wchar_t)); \ 331 if (old != NULL) \ 332 MEMCPY (wp, old, wpsize); \ 333 } \ 334 wp[wpsize++] = (Ch); \ 335 } \ 336 while (0) 337 338 #ifdef __va_copy 339 __va_copy (arg, argptr); 340 #else 341 arg = (va_list) argptr; 342 #endif 343 344 #ifdef ORIENT 345 ORIENT; 346 #endif 347 348 ARGCHECK (s, format); 349 350 /* Figure out the decimal point character. */ 351 #ifdef COMPILE_WSCANF 352 decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC); 353 #else 354 decimal = _NL_CURRENT (LC_NUMERIC, DECIMAL_POINT); 355 #endif 356 /* Figure out the thousands separator character. */ 357 #ifdef COMPILE_WSCANF 358 thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC); 359 #else 360 thousands = _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP); 361 if (*thousands == '\0') 362 thousands = NULL; 363 #endif 364 365 /* Lock the stream. */ 366 LOCK_STREAM (s); 367 368 369 #ifndef COMPILE_WSCANF 370 /* From now on we use `state' to convert the format string. */ 371 memset (&state, '\0', sizeof (state)); 372 #endif 373 374 /* Run through the format string. */ 375 while (*f != '\0') 376 { 377 unsigned int argpos; 378 /* Extract the next argument, which is of type TYPE. 379 For a %N$... spec, this is the Nth argument from the beginning; 380 otherwise it is the next argument after the state now in ARG. */ 381 #ifdef __va_copy 382 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \ 383 ({ unsigned int pos = argpos; \ 384 va_list arg; \ 385 __va_copy (arg, argptr); \ 386 while (--pos > 0) \ 387 (void) va_arg (arg, void *); \ 388 va_arg (arg, type); \ 389 })) 390 #else 391 # if 0 392 /* XXX Possible optimization. */ 393 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \ 394 ({ va_list arg = (va_list) argptr; \ 395 arg = (va_list) ((char *) arg \ 396 + (argpos - 1) \ 397 * __va_rounded_size (void *)); \ 398 va_arg (arg, type); \ 399 })) 400 # else 401 # define ARG(type) (argpos == 0 ? va_arg (arg, type) : \ 402 ({ unsigned int pos = argpos; \ 403 va_list arg = (va_list) argptr; \ 404 while (--pos > 0) \ 405 (void) va_arg (arg, void *); \ 406 va_arg (arg, type); \ 407 })) 408 # endif 409 #endif 410 411 #ifndef COMPILE_WSCANF 412 if (!isascii ((unsigned char) *f)) 413 { 414 /* Non-ASCII, may be a multibyte. */ 415 int len = __mbrlen (f, strlen (f), &state); 416 if (len > 0) 417 { 418 do 419 { 420 c = inchar (); 421 if (c == EOF) 422 input_error (); 423 else if (c != (unsigned char) *f++) 424 { 425 ungetc_not_eof (c, s); 426 conv_error (); 427 } 428 } 429 while (--len > 0); 430 continue; 431 } 432 } 433 #endif 434 435 fc = *f++; 436 if (fc != '%') 437 { 438 /* Remember to skip spaces. */ 439 if (ISSPACE (fc)) 440 { 441 skip_space = 1; 442 continue; 443 } 444 445 /* Read a character. */ 446 c = inchar (); 447 448 /* Characters other than format specs must just match. */ 449 if (c == EOF) 450 input_error (); 451 452 /* We saw white space char as the last character in the format 453 string. Now it's time to skip all leading white space. */ 454 if (skip_space) 455 { 456 while (ISSPACE (c)) 457 if (inchar () == EOF && errno == EINTR) 458 conv_error (); 459 skip_space = 0; 460 } 461 462 if (c != fc) 463 { 464 ungetc (c, s); 465 conv_error (); 466 } 467 468 continue; 469 } 470 471 /* This is the start of the conversion string. */ 472 flags = 0; 473 474 /* Not yet decided whether we read a pointer or not. */ 475 read_pointer = 0; 476 477 /* Initialize state of modifiers. */ 478 argpos = 0; 479 480 /* Prepare temporary buffer. */ 481 wpsize = 0; 482 483 /* Check for a positional parameter specification. */ 484 if (ISDIGIT ((UCHAR_T) *f)) 485 { 486 argpos = (UCHAR_T) *f++ - L_('0'); 487 while (ISDIGIT ((UCHAR_T) *f)) 488 argpos = argpos * 10 + ((UCHAR_T) *f++ - L_('0')); 489 if (*f == L_('$')) 490 ++f; 491 else 492 { 493 /* Oops; that was actually the field width. */ 494 width = argpos; 495 flags |= WIDTH; 496 argpos = 0; 497 goto got_width; 498 } 499 } 500 501 /* Check for the assignment-suppressing, the number grouping flag, 502 and the signal to use the locale's digit representation. */ 503 while (*f == L_('*') || *f == L_('\'') || *f == L_('I')) 504 switch (*f++) 505 { 506 case L_('*'): 507 flags |= SUPPRESS; 508 break; 509 case L_('\''): 510 flags |= GROUP; 511 break; 512 case L_('I'): 513 flags |= I18N; 514 break; 515 } 516 517 /* We have seen width. */ 518 if (ISDIGIT ((UCHAR_T) *f)) 519 flags |= WIDTH; 520 521 /* Find the maximum field width. */ 522 width = 0; 523 while (ISDIGIT ((UCHAR_T) *f)) 524 { 525 width *= 10; 526 width += (UCHAR_T) *f++ - L_('0'); 527 } 528 got_width: 529 if (width == 0) 530 width = -1; 531 532 /* Check for type modifiers. */ 533 switch (*f++) 534 { 535 case L_('h'): 536 /* ints are short ints or chars. */ 537 if (*f == L_('h')) 538 { 539 ++f; 540 flags |= CHAR; 541 } 542 else 543 flags |= SHORT; 544 break; 545 case L_('l'): 546 if (*f == L_('l')) 547 { 548 /* A double `l' is equivalent to an `L'. */ 549 ++f; 550 flags |= LONGDBL | LONG; 551 } 552 else 553 /* ints are long ints. */ 554 flags |= LONG; 555 break; 556 case L_('q'): 557 case L_('L'): 558 /* doubles are long doubles, and ints are long long ints. */ 559 flags |= LONGDBL | LONG; 560 break; 561 case L_('a'): 562 /* The `a' is used as a flag only if followed by `s', `S' or 563 `['. */ 564 if (*f != L_('s') && *f != L_('S') && *f != L_('[')) 565 { 566 --f; 567 break; 568 } 569 /* String conversions (%s, %[) take a `char **' 570 arg and fill it in with a malloc'd pointer. */ 571 flags |= MALLOC; 572 break; 573 case L_('z'): 574 if (need_longlong && sizeof (size_t) > sizeof (unsigned long int)) 575 flags |= LONGDBL; 576 else if (sizeof (size_t) > sizeof (unsigned int)) 577 flags |= LONG; 578 break; 579 case L_('j'): 580 if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int)) 581 flags |= LONGDBL; 582 else if (sizeof (uintmax_t) > sizeof (unsigned int)) 583 flags |= LONG; 584 break; 585 case L_('t'): 586 if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int)) 587 flags |= LONGDBL; 588 else if (sizeof (ptrdiff_t) > sizeof (int)) 589 flags |= LONG; 590 break; 591 default: 592 /* Not a recognized modifier. Backup. */ 593 --f; 594 break; 595 } 596 597 /* End of the format string? */ 598 if (*f == L_('\0')) 599 conv_error (); 600 601 /* Find the conversion specifier. */ 602 fc = *f++; 603 if (skip_space || (fc != L_('[') && fc != L_('c') 604 && fc != L_('C') && fc != L_('n'))) 605 { 606 /* Eat whitespace. */ 607 int save_errno = errno; 608 __set_errno(0); 609 do 610 if (inchar () == EOF && errno == EINTR) 611 input_error (); 612 while (ISSPACE (c)); 613 __set_errno(save_errno); 614 ungetc (c, s); 615 skip_space = 0; 616 } 617 618 switch (fc) 619 { 620 case L_('%'): /* Must match a literal '%'. */ 621 c = inchar (); 622 if (c == EOF) 623 input_error (); 624 if (c != fc) 625 { 626 ungetc_not_eof (c, s); 627 conv_error (); 628 } 629 break; 630 631 case L_('n'): /* Answer number of assignments done. */ 632 /* Corrigendum 1 to ISO C 1990 describes the allowed flags 633 with the 'n' conversion specifier. */ 634 if (!(flags & SUPPRESS)) 635 { 636 /* Don't count the read-ahead. */ 637 if (need_longlong && (flags & LONGDBL)) 638 *ARG (long long int *) = read_in; 639 else if (need_long && (flags & LONG)) 640 *ARG (long int *) = read_in; 641 else if (flags & SHORT) 642 *ARG (short int *) = read_in; 643 else if (!(flags & CHAR)) 644 *ARG (int *) = read_in; 645 else 646 *ARG (char *) = read_in; 647 648 #ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1 649 /* We have a severe problem here. The ISO C standard 650 contradicts itself in explaining the effect of the %n 651 format in `scanf'. While in ISO C:1990 and the ISO C 652 Amendement 1:1995 the result is described as 653 654 Execution of a %n directive does not effect the 655 assignment count returned at the completion of 656 execution of the f(w)scanf function. 657 658 in ISO C Corrigendum 1:1994 the following was added: 659 660 Subclause 7.9.6.2 661 Add the following fourth example: 662 In: 663 #include <stdio.h> 664 int d1, d2, n1, n2, i; 665 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2); 666 the value 123 is assigned to d1 and the value3 to n1. 667 Because %n can never get an input failure the value 668 of 3 is also assigned to n2. The value of d2 is not 669 affected. The value 3 is assigned to i. 670 671 We go for now with the historically correct code from ISO C, 672 i.e., we don't count the %n assignments. When it ever 673 should proof to be wrong just remove the #ifdef above. */ 674 ++done; 675 #endif 676 } 677 break; 678 679 case L_('c'): /* Match characters. */ 680 if ((flags & LONG) == 0) 681 { 682 if (!(flags & SUPPRESS)) 683 { 684 str = ARG (char *); 685 if (str == NULL) 686 conv_error (); 687 } 688 689 c = inchar (); 690 if (c == EOF) 691 input_error (); 692 693 if (width == -1) 694 width = 1; 695 696 #ifdef COMPILE_WSCANF 697 /* We have to convert the wide character(s) into multibyte 698 characters and store the result. */ 699 memset (&state, '\0', sizeof (state)); 700 701 do 702 { 703 size_t n; 704 705 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state); 706 if (n == (size_t) -1) 707 /* No valid wide character. */ 708 input_error (); 709 710 /* Increment the output pointer. Even if we don't 711 write anything. */ 712 str += n; 713 } 714 while (--width > 0 && inchar () != EOF); 715 #else 716 if (!(flags & SUPPRESS)) 717 { 718 do 719 *str++ = c; 720 while (--width > 0 && inchar () != EOF); 721 } 722 else 723 while (--width > 0 && inchar () != EOF); 724 #endif 725 726 if (!(flags & SUPPRESS)) 727 ++done; 728 729 break; 730 } 731 /* FALLTHROUGH */ 732 case L_('C'): 733 if (!(flags & SUPPRESS)) 734 { 735 wstr = ARG (wchar_t *); 736 if (wstr == NULL) 737 conv_error (); 738 } 739 740 c = inchar (); 741 if (c == EOF) 742 input_error (); 743 744 #ifdef COMPILE_WSCANF 745 /* Just store the incoming wide characters. */ 746 if (!(flags & SUPPRESS)) 747 { 748 do 749 *wstr++ = c; 750 while (--width > 0 && inchar () != EOF); 751 } 752 else 753 while (--width > 0 && inchar () != EOF); 754 #else 755 { 756 /* We have to convert the multibyte input sequence to wide 757 characters. */ 758 char buf[1]; 759 mbstate_t cstate; 760 761 memset (&cstate, '\0', sizeof (cstate)); 762 763 do 764 { 765 /* This is what we present the mbrtowc function first. */ 766 buf[0] = c; 767 768 while (1) 769 { 770 size_t n; 771 772 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL, 773 buf, 1, &cstate); 774 775 if (n == (size_t) -2) 776 { 777 /* Possibly correct character, just not enough 778 input. */ 779 if (inchar () == EOF) 780 encode_error (); 781 782 buf[0] = c; 783 continue; 784 } 785 786 if (n != 1) 787 encode_error (); 788 789 /* We have a match. */ 790 break; 791 } 792 793 /* Advance the result pointer. */ 794 ++wstr; 795 } 796 while (--width > 0 && inchar () != EOF); 797 } 798 #endif 799 800 if (!(flags & SUPPRESS)) 801 ++done; 802 803 break; 804 805 case L_('s'): /* Read a string. */ 806 if (!(flags & LONG)) 807 { 808 #define STRING_ARG(Str, Type) \ 809 do if (!(flags & SUPPRESS)) \ 810 { \ 811 if (flags & MALLOC) \ 812 { \ 813 /* The string is to be stored in a malloc'd buffer. */ \ 814 strptr = ARG (char **); \ 815 if (strptr == NULL) \ 816 conv_error (); \ 817 /* Allocate an initial buffer. */ \ 818 strsize = 100; \ 819 *strptr = (char *) malloc (strsize * sizeof (Type)); \ 820 Str = (Type *) *strptr; \ 821 } \ 822 else \ 823 Str = ARG (Type *); \ 824 if (Str == NULL) \ 825 conv_error (); \ 826 } while (0) 827 STRING_ARG (str, char); 828 829 c = inchar (); 830 if (c == EOF) 831 input_error (); 832 833 #ifdef COMPILE_WSCANF 834 memset (&state, '\0', sizeof (state)); 835 #endif 836 837 do 838 { 839 if (ISSPACE (c)) 840 { 841 ungetc_not_eof (c, s); 842 break; 843 } 844 845 #ifdef COMPILE_WSCANF 846 /* This is quite complicated. We have to convert the 847 wide characters into multibyte characters and then 848 store them. */ 849 { 850 size_t n; 851 852 if (!(flags & SUPPRESS) && (flags & MALLOC) 853 && str + MB_CUR_MAX >= *strptr + strsize) 854 { 855 /* We have to enlarge the buffer if the `a' flag 856 was given. */ 857 size_t strleng = str - *strptr; 858 char *newstr; 859 860 newstr = (char *) realloc (*strptr, strsize * 2); 861 if (newstr == NULL) 862 { 863 /* Can't allocate that much. Last-ditch 864 effort. */ 865 newstr = (char *) realloc (*strptr, 866 strleng + MB_CUR_MAX); 867 if (newstr == NULL) 868 { 869 /* We lose. Oh well. Terminate the 870 string and stop converting, 871 so at least we don't skip any input. */ 872 ((char *) (*strptr))[strleng] = '\0'; 873 ++done; 874 conv_error (); 875 } 876 else 877 { 878 *strptr = newstr; 879 str = newstr + strleng; 880 strsize = strleng + MB_CUR_MAX; 881 } 882 } 883 else 884 { 885 *strptr = newstr; 886 str = newstr + strleng; 887 strsize *= 2; 888 } 889 } 890 891 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, 892 &state); 893 if (n == (size_t) -1) 894 encode_error (); 895 896 assert (n <= MB_CUR_MAX); 897 str += n; 898 } 899 #else 900 /* This is easy. */ 901 if (!(flags & SUPPRESS)) 902 { 903 *str++ = c; 904 if ((flags & MALLOC) 905 && (char *) str == *strptr + strsize) 906 { 907 /* Enlarge the buffer. */ 908 str = (char *) realloc (*strptr, 2 * strsize); 909 if (str == NULL) 910 { 911 /* Can't allocate that much. Last-ditch 912 effort. */ 913 str = (char *) realloc (*strptr, strsize + 1); 914 if (str == NULL) 915 { 916 /* We lose. Oh well. Terminate the 917 string and stop converting, 918 so at least we don't skip any input. */ 919 ((char *) (*strptr))[strsize - 1] = '\0'; 920 ++done; 921 conv_error (); 922 } 923 else 924 { 925 *strptr = (char *) str; 926 str += strsize; 927 ++strsize; 928 } 929 } 930 else 931 { 932 *strptr = (char *) str; 933 str += strsize; 934 strsize *= 2; 935 } 936 } 937 } 938 #endif 939 } 940 while ((width <= 0 || --width > 0) && inchar () != EOF); 941 942 if (!(flags & SUPPRESS)) 943 { 944 #ifdef COMPILE_WSCANF 945 /* We have to emit the code to get into the initial 946 state. */ 947 char buf[MB_LEN_MAX]; 948 size_t n = __wcrtomb (buf, L'\0', &state); 949 if (n > 0 && (flags & MALLOC) 950 && str + n >= *strptr + strsize) 951 { 952 /* Enlarge the buffer. */ 953 size_t strleng = str - *strptr; 954 char *newstr; 955 956 newstr = (char *) realloc (*strptr, strleng + n + 1); 957 if (newstr == NULL) 958 { 959 /* We lose. Oh well. Terminate the string 960 and stop converting, so at least we don't 961 skip any input. */ 962 ((char *) (*strptr))[strleng] = '\0'; 963 ++done; 964 conv_error (); 965 } 966 else 967 { 968 *strptr = newstr; 969 str = newstr + strleng; 970 strsize = strleng + n + 1; 971 } 972 } 973 974 str = __mempcpy (str, buf, n); 975 #endif 976 *str++ = '\0'; 977 978 if ((flags & MALLOC) && str - *strptr != strsize) 979 { 980 char *cp = (char *) realloc (*strptr, str - *strptr); 981 if (cp != NULL) 982 *strptr = cp; 983 } 984 985 ++done; 986 } 987 break; 988 } 989 /* FALLTHROUGH */ 990 991 case L_('S'): 992 { 993 #ifndef COMPILE_WSCANF 994 mbstate_t cstate; 995 #endif 996 997 /* Wide character string. */ 998 STRING_ARG (wstr, wchar_t); 999 1000 c = inchar (); 1001 if (c == EOF) 1002 input_error (); 1003 1004 #ifndef COMPILE_WSCANF 1005 memset (&cstate, '\0', sizeof (cstate)); 1006 #endif 1007 1008 do 1009 { 1010 if (ISSPACE (c)) 1011 { 1012 ungetc_not_eof (c, s); 1013 break; 1014 } 1015 1016 #ifdef COMPILE_WSCANF 1017 /* This is easy. */ 1018 if (!(flags & SUPPRESS)) 1019 { 1020 *wstr++ = c; 1021 if ((flags & MALLOC) 1022 && wstr == (wchar_t *) *strptr + strsize) 1023 { 1024 /* Enlarge the buffer. */ 1025 wstr = (wchar_t *) realloc (*strptr, 1026 (2 * strsize) 1027 * sizeof (wchar_t)); 1028 if (wstr == NULL) 1029 { 1030 /* Can't allocate that much. Last-ditch 1031 effort. */ 1032 wstr = (wchar_t *) realloc (*strptr, 1033 (strsize + 1) 1034 * sizeof (wchar_t)); 1035 if (wstr == NULL) 1036 { 1037 /* We lose. Oh well. Terminate the string 1038 and stop converting, so at least we don't 1039 skip any input. */ 1040 ((wchar_t *) (*strptr))[strsize - 1] = L'\0'; 1041 ++done; 1042 conv_error (); 1043 } 1044 else 1045 { 1046 *strptr = (char *) wstr; 1047 wstr += strsize; 1048 ++strsize; 1049 } 1050 } 1051 else 1052 { 1053 *strptr = (char *) wstr; 1054 wstr += strsize; 1055 strsize *= 2; 1056 } 1057 } 1058 } 1059 #else 1060 { 1061 char buf[1]; 1062 1063 buf[0] = c; 1064 1065 while (1) 1066 { 1067 size_t n; 1068 1069 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL, 1070 buf, 1, &cstate); 1071 1072 if (n == (size_t) -2) 1073 { 1074 /* Possibly correct character, just not enough 1075 input. */ 1076 if (inchar () == EOF) 1077 encode_error (); 1078 1079 buf[0] = c; 1080 continue; 1081 } 1082 1083 if (n != 1) 1084 encode_error (); 1085 1086 /* We have a match. */ 1087 ++wstr; 1088 break; 1089 } 1090 1091 if (!(flags & SUPPRESS) && (flags & MALLOC) 1092 && wstr == (wchar_t *) *strptr + strsize) 1093 { 1094 /* Enlarge the buffer. */ 1095 wstr = (wchar_t *) realloc (*strptr, 1096 (2 * strsize 1097 * sizeof (wchar_t))); 1098 if (wstr == NULL) 1099 { 1100 /* Can't allocate that much. Last-ditch effort. */ 1101 wstr = (wchar_t *) realloc (*strptr, 1102 ((strsize + 1) 1103 * sizeof (wchar_t))); 1104 if (wstr == NULL) 1105 { 1106 /* We lose. Oh well. Terminate the 1107 string and stop converting, so at 1108 least we don't skip any input. */ 1109 ((wchar_t *) (*strptr))[strsize - 1] = L'\0'; 1110 ++done; 1111 conv_error (); 1112 } 1113 else 1114 { 1115 *strptr = (char *) wstr; 1116 wstr += strsize; 1117 ++strsize; 1118 } 1119 } 1120 else 1121 { 1122 *strptr = (char *) wstr; 1123 wstr += strsize; 1124 strsize *= 2; 1125 } 1126 } 1127 } 1128 #endif 1129 } 1130 while ((width <= 0 || --width > 0) && inchar () != EOF); 1131 1132 if (!(flags & SUPPRESS)) 1133 { 1134 *wstr++ = L'\0'; 1135 1136 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize) 1137 { 1138 wchar_t *cp = (wchar_t *) realloc (*strptr, 1139 ((wstr 1140 - (wchar_t *) *strptr) 1141 * sizeof(wchar_t))); 1142 if (cp != NULL) 1143 *strptr = (char *) cp; 1144 } 1145 1146 ++done; 1147 } 1148 } 1149 break; 1150 1151 case L_('x'): /* Hexadecimal integer. */ 1152 case L_('X'): /* Ditto. */ 1153 base = 16; 1154 number_signed = 0; 1155 goto number; 1156 1157 case L_('o'): /* Octal integer. */ 1158 base = 8; 1159 number_signed = 0; 1160 goto number; 1161 1162 case L_('u'): /* Unsigned decimal integer. */ 1163 base = 10; 1164 number_signed = 0; 1165 goto number; 1166 1167 case L_('d'): /* Signed decimal integer. */ 1168 base = 10; 1169 number_signed = 1; 1170 goto number; 1171 1172 case L_('i'): /* Generic number. */ 1173 base = 0; 1174 number_signed = 1; 1175 1176 number: 1177 c = inchar (); 1178 if (c == EOF) 1179 input_error (); 1180 1181 /* Check for a sign. */ 1182 if (c == L_('-') || c == L_('+')) 1183 { 1184 ADDW (c); 1185 if (width > 0) 1186 --width; 1187 c = inchar (); 1188 } 1189 1190 /* Look for a leading indication of base. */ 1191 if (width != 0 && c == L_('0')) 1192 { 1193 if (width > 0) 1194 --width; 1195 1196 ADDW (c); 1197 c = inchar (); 1198 1199 if (width != 0 && TOLOWER (c) == L_('x')) 1200 { 1201 if (base == 0) 1202 base = 16; 1203 if (base == 16) 1204 { 1205 if (width > 0) 1206 --width; 1207 c = inchar (); 1208 } 1209 } 1210 else if (base == 0) 1211 base = 8; 1212 } 1213 1214 if (base == 0) 1215 base = 10; 1216 1217 if (base == 10 && (flags & I18N) != 0) 1218 { 1219 int from_level; 1220 int to_level; 1221 int level; 1222 #ifdef COMPILE_WSCANF 1223 const wchar_t *wcdigits[10]; 1224 #else 1225 const char *mbdigits[10]; 1226 #endif 1227 int n; 1228 1229 from_level = 0; 1230 #ifdef COMPILE_WSCANF 1231 to_level = _NL_CURRENT_WORD (LC_CTYPE, 1232 _NL_CTYPE_INDIGITS_WC_LEN) - 1; 1233 #else 1234 to_level = _NL_CURRENT_WORD (LC_CTYPE, 1235 _NL_CTYPE_INDIGITS_MB_LEN) - 1; 1236 #endif 1237 1238 /* Read the number into workspace. */ 1239 while (c != EOF && width != 0) 1240 { 1241 /* In this round we get the pointer to the digit strings 1242 and also perform the first round of comparisons. */ 1243 for (n = 0; n < 10; ++n) 1244 { 1245 /* Get the string for the digits with value N. */ 1246 #ifdef COMPILE_WSCANF 1247 wcdigits[n] = (const wchar_t *) 1248 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n); 1249 wcdigits[n] += from_level; 1250 1251 if (c == *wcdigits[n]) 1252 { 1253 to_level = from_level; 1254 break; 1255 } 1256 1257 /* Advance the pointer to the next string. */ 1258 ++wcdigits[n]; 1259 #else 1260 const char *cmpp; 1261 int avail = width > 0 ? width : INT_MAX; 1262 1263 mbdigits[n] = _NL_CURRENT (LC_CTYPE, 1264 _NL_CTYPE_INDIGITS0_MB + n); 1265 1266 for (level = 0; level < from_level; level++) 1267 mbdigits[n] = strchr (mbdigits[n], '\0') + 1; 1268 1269 cmpp = mbdigits[n]; 1270 while ((unsigned char) *cmpp == c && avail > 0) 1271 { 1272 if (*++cmpp == '\0') 1273 break; 1274 else 1275 { 1276 if ((c = inchar ()) == EOF) 1277 break; 1278 --avail; 1279 } 1280 } 1281 1282 if (*cmpp == '\0') 1283 { 1284 if (width > 0) 1285 width = avail; 1286 to_level = from_level; 1287 break; 1288 } 1289 1290 /* We are pushing all read characters back. */ 1291 if (cmpp > mbdigits[n]) 1292 { 1293 ungetc (c, s); 1294 while (--cmpp > mbdigits[n]) 1295 ungetc_not_eof ((unsigned char) *cmpp, s); 1296 c = (unsigned char) *cmpp; 1297 } 1298 1299 /* Advance the pointer to the next string. */ 1300 mbdigits[n] = strchr (mbdigits[n], '\0') + 1; 1301 #endif 1302 } 1303 1304 if (n == 10) 1305 { 1306 /* Have not yet found the digit. */ 1307 for (level = from_level + 1; level <= to_level; ++level) 1308 { 1309 /* Search all ten digits of this level. */ 1310 for (n = 0; n < 10; ++n) 1311 { 1312 #ifdef COMPILE_WSCANF 1313 if (c == *wcdigits[n]) 1314 break; 1315 1316 /* Advance the pointer to the next string. */ 1317 ++wcdigits[n]; 1318 #else 1319 const char *cmpp; 1320 int avail = width > 0 ? width : INT_MAX; 1321 1322 cmpp = mbdigits[n]; 1323 while ((unsigned char) *cmpp == c && avail > 0) 1324 { 1325 if (*++cmpp == '\0') 1326 break; 1327 else 1328 { 1329 if ((c = inchar ()) == EOF) 1330 break; 1331 --avail; 1332 } 1333 } 1334 1335 if (*cmpp == '\0') 1336 { 1337 if (width > 0) 1338 width = avail; 1339 break; 1340 } 1341 1342 /* We are pushing all read characters back. */ 1343 if (cmpp > mbdigits[n]) 1344 { 1345 ungetc (c, s); 1346 while (--cmpp > mbdigits[n]) 1347 ungetc_not_eof ((unsigned char) *cmpp, s); 1348 c = (unsigned char) *cmpp; 1349 } 1350 1351 /* Advance the pointer to the next string. */ 1352 mbdigits[n] = strchr (mbdigits[n], '\0') + 1; 1353 #endif 1354 } 1355 1356 if (n < 10) 1357 { 1358 /* Found it. */ 1359 from_level = level; 1360 to_level = level; 1361 break; 1362 } 1363 } 1364 } 1365 1366 if (n < 10) 1367 c = L_('0') + n; 1368 else if ((flags & GROUP) 1369 #ifdef COMPILE_WSCANF 1370 && thousands != L'\0' 1371 #else 1372 && thousands != NULL 1373 #endif 1374 ) 1375 { 1376 /* Try matching against the thousands separator. */ 1377 #ifdef COMPILE_WSCANF 1378 if (c != thousands) 1379 break; 1380 #else 1381 const char *cmpp = thousands; 1382 int avail = width > 0 ? width : INT_MAX; 1383 1384 while ((unsigned char) *cmpp == c && avail > 0) 1385 { 1386 ADDW (c); 1387 if (*++cmpp == '\0') 1388 break; 1389 else 1390 { 1391 if ((c = inchar ()) == EOF) 1392 break; 1393 --avail; 1394 } 1395 } 1396 1397 if (*cmpp != '\0') 1398 { 1399 /* We are pushing all read characters back. */ 1400 if (cmpp > thousands) 1401 { 1402 wpsize -= cmpp - thousands; 1403 ungetc (c, s); 1404 while (--cmpp > thousands) 1405 ungetc_not_eof ((unsigned char) *cmpp, s); 1406 c = (unsigned char) *cmpp; 1407 } 1408 break; 1409 } 1410 1411 if (width > 0) 1412 width = avail; 1413 1414 /* The last thousands character will be added back by 1415 the ADDW below. */ 1416 --wpsize; 1417 #endif 1418 } 1419 else 1420 break; 1421 1422 ADDW (c); 1423 if (width > 0) 1424 --width; 1425 1426 c = inchar (); 1427 } 1428 } 1429 else 1430 /* Read the number into workspace. */ 1431 while (c != EOF && width != 0) 1432 { 1433 if (base == 16) 1434 { 1435 if (!ISXDIGIT (c)) 1436 break; 1437 } 1438 else if (!ISDIGIT (c) || c - L_('0') >= base) 1439 { 1440 if (base == 10 && (flags & GROUP) 1441 #ifdef COMPILE_WSCANF 1442 && thousands != L'\0' 1443 #else 1444 && thousands != NULL 1445 #endif 1446 ) 1447 { 1448 /* Try matching against the thousands separator. */ 1449 #ifdef COMPILE_WSCANF 1450 if (c != thousands) 1451 break; 1452 #else 1453 const char *cmpp = thousands; 1454 int avail = width > 0 ? width : INT_MAX; 1455 1456 while ((unsigned char) *cmpp == c && avail > 0) 1457 { 1458 ADDW (c); 1459 if (*++cmpp == '\0') 1460 break; 1461 else 1462 { 1463 if ((c = inchar ()) == EOF) 1464 break; 1465 --avail; 1466 } 1467 } 1468 1469 if (*cmpp != '\0') 1470 { 1471 /* We are pushing all read characters back. */ 1472 if (cmpp > thousands) 1473 { 1474 wpsize -= cmpp - thousands; 1475 ungetc (c, s); 1476 while (--cmpp > thousands) 1477 ungetc_not_eof ((unsigned char) *cmpp, s); 1478 c = (unsigned char) *cmpp; 1479 } 1480 break; 1481 } 1482 1483 if (width > 0) 1484 width = avail; 1485 1486 /* The last thousands character will be added back by 1487 the ADDW below. */ 1488 --wpsize; 1489 #endif 1490 } 1491 else 1492 break; 1493 } 1494 ADDW (c); 1495 if (width > 0) 1496 --width; 1497 1498 c = inchar (); 1499 } 1500 1501 if (wpsize == 0 1502 || (wpsize == 1 && (wp[0] == L_('+') || wp[0] == L_('-')))) 1503 { 1504 /* There was no number. If we are supposed to read a pointer 1505 we must recognize "(nil)" as well. */ 1506 if (wpsize == 0 && read_pointer && (width < 0 || width >= 0) 1507 && c == '(' 1508 && TOLOWER (inchar ()) == L_('n') 1509 && TOLOWER (inchar ()) == L_('i') 1510 && TOLOWER (inchar ()) == L_('l') 1511 && inchar () == L_(')')) 1512 /* We must produce the value of a NULL pointer. A single 1513 '0' digit is enough. */ 1514 ADDW (L_('0')); 1515 else 1516 { 1517 /* The last read character is not part of the number 1518 anymore. */ 1519 ungetc (c, s); 1520 1521 conv_error (); 1522 } 1523 } 1524 else 1525 /* The just read character is not part of the number anymore. */ 1526 ungetc (c, s); 1527 1528 /* Convert the number. */ 1529 ADDW (L_('\0')); 1530 if (need_longlong && (flags & LONGDBL)) 1531 { 1532 if (number_signed) 1533 num.q = __strtoll_internal (wp, &tw, base, flags & GROUP); 1534 else 1535 num.uq = __strtoull_internal (wp, &tw, base, flags & GROUP); 1536 } 1537 else 1538 { 1539 if (number_signed) 1540 num.l = __strtol_internal (wp, &tw, base, flags & GROUP); 1541 else 1542 num.ul = __strtoul_internal (wp, &tw, base, flags & GROUP); 1543 } 1544 if (wp == tw) 1545 conv_error (); 1546 1547 if (!(flags & SUPPRESS)) 1548 { 1549 if (! number_signed) 1550 { 1551 if (need_longlong && (flags & LONGDBL)) 1552 *ARG (unsigned LONGLONG int *) = num.uq; 1553 else if (need_long && (flags & LONG)) 1554 *ARG (unsigned long int *) = num.ul; 1555 else if (flags & SHORT) 1556 *ARG (unsigned short int *) 1557 = (unsigned short int) num.ul; 1558 else if (!(flags & CHAR)) 1559 *ARG (unsigned int *) = (unsigned int) num.ul; 1560 else 1561 *ARG (unsigned char *) = (unsigned char) num.ul; 1562 } 1563 else 1564 { 1565 if (need_longlong && (flags & LONGDBL)) 1566 *ARG (LONGLONG int *) = num.q; 1567 else if (need_long && (flags & LONG)) 1568 *ARG (long int *) = num.l; 1569 else if (flags & SHORT) 1570 *ARG (short int *) = (short int) num.l; 1571 else if (!(flags & CHAR)) 1572 *ARG (int *) = (int) num.l; 1573 else 1574 *ARG (signed char *) = (signed char) num.ul; 1575 } 1576 ++done; 1577 } 1578 break; 1579 1580 case L_('e'): /* Floating-point numbers. */ 1581 case L_('E'): 1582 case L_('f'): 1583 case L_('F'): 1584 case L_('g'): 1585 case L_('G'): 1586 case L_('a'): 1587 case L_('A'): 1588 c = inchar (); 1589 if (c == EOF) 1590 input_error (); 1591 1592 /* Check for a sign. */ 1593 if (c == L_('-') || c == L_('+')) 1594 { 1595 negative = c == L_('-'); 1596 if (width == 0 || inchar () == EOF) 1597 /* EOF is only an input error before we read any chars. */ 1598 conv_error (); 1599 if (! ISDIGIT (c) && TOLOWER (c) != L_('i') 1600 && TOLOWER (c) != L_('n')) 1601 { 1602 #ifdef COMPILE_WSCANF 1603 if (c != decimal) 1604 { 1605 /* This is no valid number. */ 1606 ungetc (c, s); 1607 conv_error (); 1608 } 1609 #else 1610 /* Match against the decimal point. At this point 1611 we are taking advantage of the fact that we can 1612 push more than one character back. This is 1613 (almost) never necessary since the decimal point 1614 string hopefully never contains more than one 1615 byte. */ 1616 const char *cmpp = decimal; 1617 int avail = width > 0 ? width : INT_MAX; 1618 1619 while ((unsigned char) *cmpp == c && avail > 0) 1620 if (*++cmpp == '\0') 1621 break; 1622 else 1623 { 1624 if (inchar () == EOF) 1625 break; 1626 --avail; 1627 } 1628 1629 if (*cmpp != '\0') 1630 { 1631 /* This is no valid number. */ 1632 while (1) 1633 { 1634 ungetc (c, s); 1635 if (cmpp == decimal) 1636 break; 1637 c = (unsigned char) *--cmpp; 1638 } 1639 1640 conv_error (); 1641 } 1642 if (width > 0) 1643 width = avail; 1644 #endif 1645 } 1646 if (width > 0) 1647 --width; 1648 } 1649 else 1650 negative = 0; 1651 1652 /* Take care for the special arguments "nan" and "inf". */ 1653 if (TOLOWER (c) == L_('n')) 1654 { 1655 /* Maybe "nan". */ 1656 ADDW (c); 1657 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('a')) 1658 conv_error (); 1659 if (width > 0) 1660 --width; 1661 ADDW (c); 1662 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('n')) 1663 conv_error (); 1664 if (width > 0) 1665 --width; 1666 ADDW (c); 1667 /* It is "nan". */ 1668 goto scan_float; 1669 } 1670 else if (TOLOWER (c) == L_('i')) 1671 { 1672 /* Maybe "inf" or "infinity". */ 1673 ADDW (c); 1674 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('n')) 1675 conv_error (); 1676 if (width > 0) 1677 --width; 1678 ADDW (c); 1679 if (width == 0 || inchar () == EOF || TOLOWER (c) != L_('f')) 1680 conv_error (); 1681 if (width > 0) 1682 --width; 1683 ADDW (c); 1684 /* It is as least "inf". */ 1685 if (width != 0 && inchar () != EOF) 1686 { 1687 if (TOLOWER (c) == L_('i')) 1688 { 1689 if (width > 0) 1690 --width; 1691 /* Now we have to read the rest as well. */ 1692 ADDW (c); 1693 if (width == 0 || inchar () == EOF 1694 || TOLOWER (c) != L_('n')) 1695 conv_error (); 1696 if (width > 0) 1697 --width; 1698 ADDW (c); 1699 if (width == 0 || inchar () == EOF 1700 || TOLOWER (c) != L_('i')) 1701 conv_error (); 1702 if (width > 0) 1703 --width; 1704 ADDW (c); 1705 if (width == 0 || inchar () == EOF 1706 || TOLOWER (c) != L_('t')) 1707 conv_error (); 1708 if (width > 0) 1709 --width; 1710 ADDW (c); 1711 if (width == 0 || inchar () == EOF 1712 || TOLOWER (c) != L_('y')) 1713 conv_error (); 1714 if (width > 0) 1715 --width; 1716 ADDW (c); 1717 } 1718 else 1719 /* Never mind. */ 1720 ungetc (c, s); 1721 } 1722 goto scan_float; 1723 } 1724 1725 is_hexa = 0; 1726 exp_char = L_('e'); 1727 if (width != 0 && c == L_('0')) 1728 { 1729 ADDW (c); 1730 c = inchar (); 1731 if (width > 0) 1732 --width; 1733 if (width != 0 && TOLOWER (c) == L_('x')) 1734 { 1735 /* It is a number in hexadecimal format. */ 1736 ADDW (c); 1737 1738 is_hexa = 1; 1739 exp_char = L_('p'); 1740 1741 /* Grouping is not allowed. */ 1742 flags &= ~GROUP; 1743 c = inchar (); 1744 if (width > 0) 1745 --width; 1746 } 1747 } 1748 1749 got_dot = got_e = 0; 1750 do 1751 { 1752 if (ISDIGIT (c)) 1753 ADDW (c); 1754 else if (!got_e && is_hexa && ISXDIGIT (c)) 1755 ADDW (c); 1756 else if (got_e && wp[wpsize - 1] == exp_char 1757 && (c == L_('-') || c == L_('+'))) 1758 ADDW (c); 1759 else if (wpsize > 0 && !got_e && TOLOWER (c) == exp_char) 1760 { 1761 ADDW (exp_char); 1762 got_e = got_dot = 1; 1763 } 1764 else 1765 { 1766 #ifdef COMPILE_WSCANF 1767 if (! got_dot && c == decimal) 1768 { 1769 ADDW (c); 1770 got_dot = 1; 1771 } 1772 else if (thousands != L'\0' && ! got_dot && c == thousands) 1773 ADDW (c); 1774 else 1775 { 1776 /* The last read character is not part of the number 1777 anymore. */ 1778 ungetc (c, s); 1779 break; 1780 } 1781 #else 1782 const char *cmpp = decimal; 1783 int avail = width > 0 ? width : INT_MAX; 1784 1785 if (! got_dot) 1786 { 1787 while ((unsigned char) *cmpp == c && avail > 0) 1788 if (*++cmpp == '\0') 1789 break; 1790 else 1791 { 1792 if (inchar () == EOF) 1793 break; 1794 --avail; 1795 } 1796 } 1797 1798 if (*cmpp == '\0') 1799 { 1800 /* Add all the characters. */ 1801 for (cmpp = decimal; *cmpp != '\0'; ++cmpp) 1802 ADDW ((unsigned char) *cmpp); 1803 if (width > 0) 1804 width = avail; 1805 got_dot = 1; 1806 } 1807 else 1808 { 1809 /* Figure out whether it is a thousands separator. 1810 There is one problem: we possibly read more than 1811 one character. We cannot push them back but since 1812 we know that parts of the `decimal' string matched, 1813 we can compare against it. */ 1814 const char *cmp2p = thousands; 1815 1816 if (thousands != NULL && ! got_dot) 1817 { 1818 while (cmp2p < cmpp 1819 && *cmp2p == decimal[cmp2p - thousands]) 1820 ++cmp2p; 1821 if (cmp2p == cmpp) 1822 { 1823 while ((unsigned char) *cmp2p == c && avail > 0) 1824 if (*++cmp2p == '\0') 1825 break; 1826 else 1827 { 1828 if (inchar () == EOF) 1829 break; 1830 --avail; 1831 } 1832 } 1833 } 1834 1835 if (cmp2p != NULL && *cmp2p == '\0') 1836 { 1837 /* Add all the characters. */ 1838 for (cmpp = thousands; *cmpp != '\0'; ++cmpp) 1839 ADDW ((unsigned char) *cmpp); 1840 if (width > 0) 1841 width = avail; 1842 } 1843 else 1844 { 1845 /* The last read character is not part of the number 1846 anymore. */ 1847 ungetc (c, s); 1848 break; 1849 } 1850 } 1851 #endif 1852 } 1853 if (width > 0) 1854 --width; 1855 } 1856 while (width != 0 && inchar () != EOF); 1857 1858 /* Have we read any character? If we try to read a number 1859 in hexadecimal notation and we have read only the `0x' 1860 prefix or no exponent this is an error. */ 1861 if (wpsize == 0 || (is_hexa && (wpsize == 2 || ! got_e))) 1862 conv_error (); 1863 1864 scan_float: 1865 /* Convert the number. */ 1866 ADDW (L_('\0')); 1867 if (flags & LONGDBL) 1868 { 1869 long double d = __strtold_internal (wp, &tw, flags & GROUP); 1870 if (!(flags & SUPPRESS) && tw != wp) 1871 *ARG (long double *) = negative ? -d : d; 1872 } 1873 else if (flags & LONG) 1874 { 1875 double d = __strtod_internal (wp, &tw, flags & GROUP); 1876 if (!(flags & SUPPRESS) && tw != wp) 1877 *ARG (double *) = negative ? -d : d; 1878 } 1879 else 1880 { 1881 float d = __strtof_internal (wp, &tw, flags & GROUP); 1882 if (!(flags & SUPPRESS) && tw != wp) 1883 *ARG (float *) = negative ? -d : d; 1884 } 1885 1886 if (tw == wp) 1887 conv_error (); 1888 1889 if (!(flags & SUPPRESS)) 1890 ++done; 1891 break; 1892 1893 case L_('['): /* Character class. */ 1894 if (flags & LONG) 1895 STRING_ARG (wstr, wchar_t); 1896 else 1897 STRING_ARG (str, char); 1898 1899 if (*f == L_('^')) 1900 { 1901 ++f; 1902 not_in = 1; 1903 } 1904 else 1905 not_in = 0; 1906 1907 if (width < 0) 1908 /* There is no width given so there is also no limit on the 1909 number of characters we read. Therefore we set width to 1910 a very high value to make the algorithm easier. */ 1911 width = INT_MAX; 1912 1913 #ifdef COMPILE_WSCANF 1914 /* Find the beginning and the end of the scanlist. We are not 1915 creating a lookup table since it would have to be too large. 1916 Instead we search each time through the string. This is not 1917 a constant lookup time but who uses this feature deserves to 1918 be punished. */ 1919 tw = (wchar_t *) f; /* Marks the beginning. */ 1920 1921 if (*f == L']') 1922 ++f; 1923 1924 while ((fc = *f++) != L'\0' && fc != L']'); 1925 1926 if (fc == L'\0') 1927 conv_error (); 1928 wp = (wchar_t *) f - 1; 1929 #else 1930 /* Fill WP with byte flags indexed by character. 1931 We will use this flag map for matching input characters. */ 1932 if (wpmax < UCHAR_MAX + 1) 1933 { 1934 wpmax = UCHAR_MAX + 1; 1935 wp = (char *) alloca (wpmax); 1936 } 1937 memset (wp, '\0', UCHAR_MAX + 1); 1938 1939 fc = *f; 1940 if (fc == ']' || fc == '-') 1941 { 1942 /* If ] or - appears before any char in the set, it is not 1943 the terminator or separator, but the first char in the 1944 set. */ 1945 wp[fc] = 1; 1946 ++f; 1947 } 1948 1949 while ((fc = *f++) != '\0' && fc != ']') 1950 if (fc == '-' && *f != '\0' && *f != ']' 1951 && (unsigned char) f[-2] <= (unsigned char) *f) 1952 { 1953 /* Add all characters from the one before the '-' 1954 up to (but not including) the next format char. */ 1955 for (fc = (unsigned char) f[-2]; fc < (unsigned char) *f; ++fc) 1956 wp[fc] = 1; 1957 } 1958 else 1959 /* Add the character to the flag map. */ 1960 wp[fc] = 1; 1961 1962 if (fc == '\0') 1963 conv_error(); 1964 #endif 1965 1966 if (flags & LONG) 1967 { 1968 size_t now = read_in; 1969 #ifdef COMPILE_WSCANF 1970 if (inchar () == WEOF) 1971 input_error (); 1972 1973 do 1974 { 1975 wchar_t *runp; 1976 1977 /* Test whether it's in the scanlist. */ 1978 runp = tw; 1979 while (runp < wp) 1980 { 1981 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp 1982 && runp != tw 1983 && (unsigned int) runp[-1] <= (unsigned int) runp[1]) 1984 { 1985 /* Match against all characters in between the 1986 first and last character of the sequence. */ 1987 wchar_t wc; 1988 1989 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc) 1990 if (wc == c) 1991 break; 1992 1993 if (wc <= runp[1] && !not_in) 1994 break; 1995 if (wc <= runp[1] && not_in) 1996 { 1997 /* The current character is not in the 1998 scanset. */ 1999 ungetc (c, s); 2000 goto out; 2001 } 2002 2003 runp += 2; 2004 } 2005 else 2006 { 2007 if (*runp == c && !not_in) 2008 break; 2009 if (*runp == c && not_in) 2010 { 2011 ungetc (c, s); 2012 goto out; 2013 } 2014 2015 ++runp; 2016 } 2017 } 2018 2019 if (runp == wp && !not_in) 2020 { 2021 ungetc (c, s); 2022 goto out; 2023 } 2024 2025 if (!(flags & SUPPRESS)) 2026 { 2027 *wstr++ = c; 2028 2029 if ((flags & MALLOC) 2030 && wstr == (wchar_t *) *strptr + strsize) 2031 { 2032 /* Enlarge the buffer. */ 2033 wstr = (wchar_t *) realloc (*strptr, 2034 (2 * strsize) 2035 * sizeof (wchar_t)); 2036 if (wstr == NULL) 2037 { 2038 /* Can't allocate that much. Last-ditch 2039 effort. */ 2040 wstr = (wchar_t *) 2041 realloc (*strptr, (strsize + 1) 2042 * sizeof (wchar_t)); 2043 if (wstr == NULL) 2044 { 2045 /* We lose. Oh well. Terminate the string 2046 and stop converting, so at least we don't 2047 skip any input. */ 2048 ((wchar_t *) (*strptr))[strsize - 1] = L'\0'; 2049 ++done; 2050 conv_error (); 2051 } 2052 else 2053 { 2054 *strptr = (char *) wstr; 2055 wstr += strsize; 2056 ++strsize; 2057 } 2058 } 2059 else 2060 { 2061 *strptr = (char *) wstr; 2062 wstr += strsize; 2063 strsize *= 2; 2064 } 2065 } 2066 } 2067 } 2068 while (--width > 0 && inchar () != WEOF); 2069 out: 2070 #else 2071 char buf[MB_LEN_MAX]; 2072 size_t cnt = 0; 2073 mbstate_t cstate; 2074 2075 if (inchar () == EOF) 2076 input_error (); 2077 2078 memset (&cstate, '\0', sizeof (cstate)); 2079 2080 do 2081 { 2082 if (wp[c] == not_in) 2083 { 2084 ungetc_not_eof (c, s); 2085 break; 2086 } 2087 2088 /* This is easy. */ 2089 if (!(flags & SUPPRESS)) 2090 { 2091 size_t n; 2092 2093 /* Convert it into a wide character. */ 2094 buf[0] = c; 2095 n = __mbrtowc (wstr, buf, 1, &cstate); 2096 2097 if (n == (size_t) -2) 2098 { 2099 /* Possibly correct character, just not enough 2100 input. */ 2101 ++cnt; 2102 assert (cnt < MB_CUR_MAX); 2103 continue; 2104 } 2105 2106 ++wstr; 2107 if ((flags & MALLOC) 2108 && wstr == (wchar_t *) *strptr + strsize) 2109 { 2110 /* Enlarge the buffer. */ 2111 wstr = (wchar_t *) realloc (*strptr, 2112 (2 * strsize 2113 * sizeof (wchar_t))); 2114 if (wstr == NULL) 2115 { 2116 /* Can't allocate that much. Last-ditch 2117 effort. */ 2118 wstr = (wchar_t *) 2119 realloc (*strptr, ((strsize + 1) 2120 * sizeof (wchar_t))); 2121 if (wstr == NULL) 2122 { 2123 /* We lose. Oh well. Terminate the 2124 string and stop converting, 2125 so at least we don't skip any input. */ 2126 ((wchar_t *) (*strptr))[strsize - 1] = L'\0'; 2127 ++done; 2128 conv_error (); 2129 } 2130 else 2131 { 2132 *strptr = (char *) wstr; 2133 wstr += strsize; 2134 ++strsize; 2135 } 2136 } 2137 else 2138 { 2139 *strptr = (char *) wstr; 2140 wstr += strsize; 2141 strsize *= 2; 2142 } 2143 } 2144 } 2145 2146 if (--width <= 0) 2147 break; 2148 } 2149 while (inchar () != EOF); 2150 2151 if (cnt != 0) 2152 /* We stopped in the middle of recognizing another 2153 character. That's a problem. */ 2154 encode_error (); 2155 #endif 2156 2157 if (now == read_in) 2158 /* We haven't succesfully read any character. */ 2159 conv_error (); 2160 2161 if (!(flags & SUPPRESS)) 2162 { 2163 *wstr++ = L'\0'; 2164 2165 if ((flags & MALLOC) 2166 && wstr - (wchar_t *) *strptr != strsize) 2167 { 2168 wchar_t *cp = (wchar_t *) 2169 realloc (*strptr, ((wstr - (wchar_t *) *strptr) 2170 * sizeof(wchar_t))); 2171 if (cp != NULL) 2172 *strptr = (char *) cp; 2173 } 2174 2175 ++done; 2176 } 2177 } 2178 else 2179 { 2180 size_t now = read_in; 2181 2182 if (inchar () == EOF) 2183 input_error (); 2184 2185 #ifdef COMPILE_WSCANF 2186 2187 memset (&state, '\0', sizeof (state)); 2188 2189 do 2190 { 2191 wchar_t *runp; 2192 size_t n; 2193 2194 /* Test whether it's in the scanlist. */ 2195 runp = tw; 2196 while (runp < wp) 2197 { 2198 if (runp[0] == L'-' && runp[1] != '\0' && runp + 1 != wp 2199 && runp != tw 2200 && (unsigned int) runp[-1] <= (unsigned int) runp[1]) 2201 { 2202 /* Match against all characters in between the 2203 first and last character of the sequence. */ 2204 wchar_t wc; 2205 2206 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc) 2207 if (wc == c) 2208 break; 2209 2210 if (wc <= runp[1] && !not_in) 2211 break; 2212 if (wc <= runp[1] && not_in) 2213 { 2214 /* The current character is not in the 2215 scanset. */ 2216 ungetc (c, s); 2217 goto out2; 2218 } 2219 2220 runp += 2; 2221 } 2222 else 2223 { 2224 if (*runp == c && !not_in) 2225 break; 2226 if (*runp == c && not_in) 2227 { 2228 ungetc (c, s); 2229 goto out2; 2230 } 2231 2232 ++runp; 2233 } 2234 } 2235 2236 if (runp == wp && !not_in) 2237 { 2238 ungetc (c, s); 2239 goto out2; 2240 } 2241 2242 if (!(flags & SUPPRESS)) 2243 { 2244 if ((flags & MALLOC) 2245 && str + MB_CUR_MAX >= *strptr + strsize) 2246 { 2247 /* Enlarge the buffer. */ 2248 size_t strleng = str - *strptr; 2249 char *newstr; 2250 2251 newstr = (char *) realloc (*strptr, 2 * strsize); 2252 if (newstr == NULL) 2253 { 2254 /* Can't allocate that much. Last-ditch 2255 effort. */ 2256 newstr = (char *) realloc (*strptr, 2257 strleng + MB_CUR_MAX); 2258 if (newstr == NULL) 2259 { 2260 /* We lose. Oh well. Terminate the string 2261 and stop converting, so at least we don't 2262 skip any input. */ 2263 ((char *) (*strptr))[strleng] = '\0'; 2264 ++done; 2265 conv_error (); 2266 } 2267 else 2268 { 2269 *strptr = newstr; 2270 str = newstr + strleng; 2271 strsize = strleng + MB_CUR_MAX; 2272 } 2273 } 2274 else 2275 { 2276 *strptr = newstr; 2277 str = newstr + strleng; 2278 strsize *= 2; 2279 } 2280 } 2281 } 2282 2283 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state); 2284 if (n == (size_t) -1) 2285 encode_error (); 2286 2287 assert (n <= MB_CUR_MAX); 2288 str += n; 2289 } 2290 while (--width > 0 && inchar () != WEOF); 2291 out2: 2292 #else 2293 do 2294 { 2295 if (wp[c] == not_in) 2296 { 2297 ungetc_not_eof (c, s); 2298 break; 2299 } 2300 2301 /* This is easy. */ 2302 if (!(flags & SUPPRESS)) 2303 { 2304 *str++ = c; 2305 if ((flags & MALLOC) 2306 && (char *) str == *strptr + strsize) 2307 { 2308 /* Enlarge the buffer. */ 2309 str = (char *) realloc (*strptr, 2 * strsize); 2310 if (str == NULL) 2311 { 2312 /* Can't allocate that much. Last-ditch 2313 effort. */ 2314 str = (char *) realloc (*strptr, strsize + 1); 2315 if (str == NULL) 2316 { 2317 /* We lose. Oh well. Terminate the 2318 string and stop converting, 2319 so at least we don't skip any input. */ 2320 ((char *) (*strptr))[strsize - 1] = '\0'; 2321 ++done; 2322 conv_error (); 2323 } 2324 else 2325 { 2326 *strptr = (char *) str; 2327 str += strsize; 2328 ++strsize; 2329 } 2330 } 2331 else 2332 { 2333 *strptr = (char *) str; 2334 str += strsize; 2335 strsize *= 2; 2336 } 2337 } 2338 } 2339 } 2340 while (--width > 0 && inchar () != EOF); 2341 #endif 2342 2343 if (now == read_in) 2344 /* We haven't succesfully read any character. */ 2345 conv_error (); 2346 2347 if (!(flags & SUPPRESS)) 2348 { 2349 #ifdef COMPILE_WSCANF 2350 /* We have to emit the code to get into the initial 2351 state. */ 2352 char buf[MB_LEN_MAX]; 2353 size_t n = __wcrtomb (buf, L'\0', &state); 2354 if (n > 0 && (flags & MALLOC) 2355 && str + n >= *strptr + strsize) 2356 { 2357 /* Enlarge the buffer. */ 2358 size_t strleng = str - *strptr; 2359 char *newstr; 2360 2361 newstr = (char *) realloc (*strptr, strleng + n + 1); 2362 if (newstr == NULL) 2363 { 2364 /* We lose. Oh well. Terminate the string 2365 and stop converting, so at least we don't 2366 skip any input. */ 2367 ((char *) (*strptr))[strleng] = '\0'; 2368 ++done; 2369 conv_error (); 2370 } 2371 else 2372 { 2373 *strptr = newstr; 2374 str = newstr + strleng; 2375 strsize = strleng + n + 1; 2376 } 2377 } 2378 2379 str = __mempcpy (str, buf, n); 2380 #endif 2381 *str++ = '\0'; 2382 2383 if ((flags & MALLOC) && str - *strptr != strsize) 2384 { 2385 char *cp = (char *) realloc (*strptr, str - *strptr); 2386 if (cp != NULL) 2387 *strptr = cp; 2388 } 2389 2390 ++done; 2391 } 2392 } 2393 break; 2394 2395 case L_('p'): /* Generic pointer. */ 2396 base = 16; 2397 /* A PTR must be the same size as a `long int'. */ 2398 flags &= ~(SHORT|LONGDBL); 2399 if (need_long) 2400 flags |= LONG; 2401 number_signed = 0; 2402 read_pointer = 1; 2403 goto number; 2404 2405 default: 2406 /* If this is an unknown format character punt. */ 2407 conv_error (); 2408 } 2409 } 2410 2411 /* The last thing we saw int the format string was a white space. 2412 Consume the last white spaces. */ 2413 if (skip_space) 2414 { 2415 do 2416 c = inchar (); 2417 while (ISSPACE (c)); 2418 ungetc (c, s); 2419 } 2420 2421 /* Unlock stream. */ 2422 UNLOCK_STREAM (s); 2423 2424 return done; 2425 } 2426 2427 #ifdef USE_IN_LIBIO 2428 # ifdef COMPILE_WSCANF 2429 int 2430 __vfwscanf (FILE *s, const wchar_t *format, va_list argptr) 2431 { 2432 return _IO_vfwscanf (s, format, argptr, NULL); 2433 } 2434 # else 2435 int 2436 __vfscanf (FILE *s, const char *format, va_list argptr) 2437 { 2438 return _IO_vfscanf (s, format, argptr, NULL); 2439 } 2440 # endif 2441 #endif 2442 2443 #ifdef COMPILE_WSCANF 2444 weak_alias (__vfwscanf, vfwscanf) 2445 #else 2446 weak_alias (__vfscanf, vfscanf) 2447 #endif 2448