1 /* Internal header for parsing printf format strings. 2 Copyright (C) 1995-1999, 2000 Free Software Foundation, Inc. 3 This file is part of th GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, write to the Free 17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 18 02111-1307 USA. */ 19 20 #include <ctype.h> 21 #include <limits.h> 22 #include <printf.h> 23 #include <stdint.h> 24 #include <stddef.h> 25 #include <string.h> 26 27 #define NDEBUG 1 28 #include <assert.h> 29 30 31 struct printf_spec 32 { 33 /* Information parsed from the format spec. */ 34 struct printf_info info; 35 36 /* Pointers into the format string for the end of this format 37 spec and the next (or to the end of the string if no more). */ 38 const UCHAR_T *end_of_fmt, *next_fmt; 39 40 /* Position of arguments for precision and width, or -1 if `info' has 41 the constant value. */ 42 int prec_arg, width_arg; 43 44 int data_arg; /* Position of data argument. */ 45 int data_arg_type; /* Type of first argument. */ 46 /* Number of arguments consumed by this format specifier. */ 47 size_t ndata_args; 48 }; 49 50 51 /* The various kinds off arguments that can be passed to printf. */ 52 union printf_arg 53 { 54 unsigned char pa_char; 55 wchar_t pa_wchar; 56 short int pa_short_int; 57 int pa_int; 58 long int pa_long_int; 59 long long int pa_long_long_int; 60 unsigned short int pa_u_short_int; 61 unsigned int pa_u_int; 62 unsigned long int pa_u_long_int; 63 unsigned long long int pa_u_long_long_int; 64 float pa_float; 65 double pa_double; 66 long double pa_long_double; 67 const char *pa_string; 68 const wchar_t *pa_wstring; 69 void *pa_pointer; 70 }; 71 72 73 /* Read a simple integer from a string and update the string pointer. 74 It is assumed that the first character is a digit. */ 75 static inline unsigned int 76 read_int (const UCHAR_T * *pstr) 77 { 78 unsigned int retval = **pstr - L_('0'); 79 80 while (ISDIGIT (*++(*pstr))) 81 { 82 retval *= 10; 83 retval += **pstr - L_('0'); 84 } 85 86 return retval; 87 } 88 89 90 91 /* Find the next spec in FORMAT, or the end of the string. Returns 92 a pointer into FORMAT, to a '%' or a '\0'. */ 93 static inline const UCHAR_T * 94 #ifdef COMPILE_WPRINTF 95 find_spec (const UCHAR_T *format) 96 #else 97 find_spec (const UCHAR_T *format, mbstate_t *ps) 98 #endif 99 { 100 #ifdef COMPILE_WPRINTF 101 return (const UCHAR_T *) __wcschrnul ((const CHAR_T *) format, L'%'); 102 #else 103 while (*format != L_('\0') && *format != L_('%')) 104 { 105 int len; 106 107 /* Remove any hints of a wrong encoding. */ 108 ps->__count = 0; 109 if (! isascii (*format) && (len = __mbrlen (format, MB_CUR_MAX, ps)) > 0) 110 format += len; 111 else 112 ++format; 113 } 114 return format; 115 #endif 116 } 117 118 119 /* These are defined in reg-printf.c. */ 120 extern printf_arginfo_function *__printf_arginfo_table[]; 121 extern printf_function **__printf_function_table; 122 123 124 /* FORMAT must point to a '%' at the beginning of a spec. Fills in *SPEC 125 with the parsed details. POSN is the number of arguments already 126 consumed. At most MAXTYPES - POSN types are filled in TYPES. Return 127 the number of args consumed by this spec; *MAX_REF_ARG is updated so it 128 remains the highest argument index used. */ 129 static inline size_t 130 #ifdef COMPILE_WPRINTF 131 parse_one_spec (const UCHAR_T *format, size_t posn, struct printf_spec *spec, 132 size_t *max_ref_arg) 133 #else 134 parse_one_spec (const UCHAR_T *format, size_t posn, struct printf_spec *spec, 135 size_t *max_ref_arg, mbstate_t *ps) 136 #endif 137 { 138 unsigned int n; 139 size_t nargs = 0; 140 141 /* Skip the '%'. */ 142 ++format; 143 144 /* Clear information structure. */ 145 spec->data_arg = -1; 146 spec->info.alt = 0; 147 spec->info.space = 0; 148 spec->info.left = 0; 149 spec->info.showsign = 0; 150 spec->info.group = 0; 151 spec->info.i18n = 0; 152 spec->info.pad = ' '; 153 spec->info.wide = sizeof (UCHAR_T) > 1; 154 155 /* Test for positional argument. */ 156 if (ISDIGIT (*format)) 157 { 158 const UCHAR_T *begin = format; 159 160 n = read_int (&format); 161 162 if (n > 0 && *format == L_('$')) 163 /* Is positional parameter. */ 164 { 165 ++format; /* Skip the '$'. */ 166 spec->data_arg = n - 1; 167 *max_ref_arg = MAX (*max_ref_arg, n); 168 } 169 else 170 /* Oops; that was actually the width and/or 0 padding flag. 171 Step back and read it again. */ 172 format = begin; 173 } 174 175 /* Check for spec modifiers. */ 176 do 177 { 178 switch (*format) 179 { 180 case L_(' '): 181 /* Output a space in place of a sign, when there is no sign. */ 182 spec->info.space = 1; 183 continue; 184 case L_('+'): 185 /* Always output + or - for numbers. */ 186 spec->info.showsign = 1; 187 continue; 188 case L_('-'): 189 /* Left-justify things. */ 190 spec->info.left = 1; 191 continue; 192 case L_('#'): 193 /* Use the "alternate form": 194 Hex has 0x or 0X, FP always has a decimal point. */ 195 spec->info.alt = 1; 196 continue; 197 case L_('0'): 198 /* Pad with 0s. */ 199 spec->info.pad = '0'; 200 continue; 201 case L_('\''): 202 /* Show grouping in numbers if the locale information 203 indicates any. */ 204 spec->info.group = 1; 205 continue; 206 case L_('I'): 207 /* Use the internationalized form of the output. Currently 208 means to use the `outdigits' of the current locale. */ 209 spec->info.i18n = 1; 210 continue; 211 default: 212 break; 213 } 214 break; 215 } 216 while (*++format); 217 218 if (spec->info.left) 219 spec->info.pad = ' '; 220 221 /* Get the field width. */ 222 spec->width_arg = -1; 223 spec->info.width = 0; 224 if (*format == L_('*')) 225 { 226 /* The field width is given in an argument. 227 A negative field width indicates left justification. */ 228 const UCHAR_T *begin = ++format; 229 230 if (ISDIGIT (*format)) 231 { 232 /* The width argument might be found in a positional parameter. */ 233 n = read_int (&format); 234 235 if (n > 0 && *format == L_('$')) 236 { 237 spec->width_arg = n - 1; 238 *max_ref_arg = MAX (*max_ref_arg, n); 239 ++format; /* Skip '$'. */ 240 } 241 } 242 243 if (spec->width_arg < 0) 244 { 245 /* Not in a positional parameter. Consume one argument. */ 246 spec->width_arg = posn++; 247 ++nargs; 248 format = begin; /* Step back and reread. */ 249 } 250 } 251 else if (ISDIGIT (*format)) 252 /* Constant width specification. */ 253 spec->info.width = read_int (&format); 254 255 /* Get the precision. */ 256 spec->prec_arg = -1; 257 /* -1 means none given; 0 means explicit 0. */ 258 spec->info.prec = -1; 259 if (*format == L_('.')) 260 { 261 ++format; 262 if (*format == L_('*')) 263 { 264 /* The precision is given in an argument. */ 265 const UCHAR_T *begin = ++format; 266 267 if (ISDIGIT (*format)) 268 { 269 n = read_int (&format); 270 271 if (n > 0 && *format == L_('$')) 272 { 273 spec->prec_arg = n - 1; 274 *max_ref_arg = MAX (*max_ref_arg, n); 275 ++format; 276 } 277 } 278 279 if (spec->prec_arg < 0) 280 { 281 /* Not in a positional parameter. */ 282 spec->prec_arg = posn++; 283 ++nargs; 284 format = begin; 285 } 286 } 287 else if (ISDIGIT (*format)) 288 spec->info.prec = read_int (&format); 289 else 290 /* "%.?" is treated like "%.0?". */ 291 spec->info.prec = 0; 292 } 293 294 /* Check for type modifiers. */ 295 spec->info.is_long_double = 0; 296 spec->info.is_short = 0; 297 spec->info.is_long = 0; 298 spec->info.is_char = 0; 299 300 switch (*format++) 301 { 302 case L_('h'): 303 /* ints are short ints or chars. */ 304 if (*format != L_('h')) 305 spec->info.is_short = 1; 306 else 307 { 308 ++format; 309 spec->info.is_char = 1; 310 } 311 break; 312 case L_('l'): 313 /* ints are long ints. */ 314 spec->info.is_long = 1; 315 if (*format != L_('l')) 316 break; 317 ++format; 318 /* FALLTHROUGH */ 319 case L_('L'): 320 /* doubles are long doubles, and ints are long long ints. */ 321 case L_('q'): 322 /* 4.4 uses this for long long. */ 323 spec->info.is_long_double = 1; 324 break; 325 case L_('z'): 326 case L_('Z'): 327 /* ints are size_ts. */ 328 assert (sizeof (size_t) <= sizeof (unsigned long long int)); 329 #if LONG_MAX != LONG_LONG_MAX 330 spec->info.is_long_double = sizeof (size_t) > sizeof (unsigned long int); 331 #endif 332 spec->info.is_long = sizeof (size_t) > sizeof (unsigned int); 333 break; 334 case L_('t'): 335 assert (sizeof (ptrdiff_t) <= sizeof (long long int)); 336 #if LONG_MAX != LONG_LONG_MAX 337 spec->info.is_long_double = (sizeof (ptrdiff_t) > sizeof (long int)); 338 #endif 339 spec->info.is_long = sizeof (ptrdiff_t) > sizeof (int); 340 break; 341 case L_('j'): 342 assert (sizeof (uintmax_t) <= sizeof (unsigned long long int)); 343 #if LONG_MAX != LONG_LONG_MAX 344 spec->info.is_long_double = (sizeof (uintmax_t) 345 > sizeof (unsigned long int)); 346 #endif 347 spec->info.is_long = sizeof (uintmax_t) > sizeof (unsigned int); 348 break; 349 default: 350 /* Not a recognized modifier. Backup. */ 351 --format; 352 break; 353 } 354 355 /* Get the format specification. */ 356 spec->info.spec = (wchar_t) *format++; 357 if (__printf_function_table != NULL 358 && spec->info.spec <= UCHAR_MAX 359 && __printf_arginfo_table[spec->info.spec] != NULL) 360 /* We don't try to get the types for all arguments if the format 361 uses more than one. The normal case is covered though. */ 362 spec->ndata_args = (*__printf_arginfo_table[spec->info.spec]) 363 (&spec->info, 1, &spec->data_arg_type); 364 else 365 { 366 /* Find the data argument types of a built-in spec. */ 367 spec->ndata_args = 1; 368 369 switch (spec->info.spec) 370 { 371 case L'i': 372 case L'd': 373 case L'u': 374 case L'o': 375 case L'X': 376 case L'x': 377 #if LONG_MAX != LONG_LONG_MAX 378 if (spec->info.is_long_double) 379 spec->data_arg_type = PA_INT|PA_FLAG_LONG_LONG; 380 else 381 #endif 382 if (spec->info.is_long) 383 spec->data_arg_type = PA_INT|PA_FLAG_LONG; 384 else if (spec->info.is_short) 385 spec->data_arg_type = PA_INT|PA_FLAG_SHORT; 386 else if (spec->info.is_char) 387 spec->data_arg_type = PA_CHAR; 388 else 389 spec->data_arg_type = PA_INT; 390 break; 391 case L'e': 392 case L'E': 393 case L'f': 394 case L'F': 395 case L'g': 396 case L'G': 397 case L'a': 398 case L'A': 399 if (spec->info.is_long_double) 400 spec->data_arg_type = PA_DOUBLE|PA_FLAG_LONG_DOUBLE; 401 else 402 spec->data_arg_type = PA_DOUBLE; 403 break; 404 case L'c': 405 spec->data_arg_type = PA_CHAR; 406 break; 407 case L'C': 408 spec->data_arg_type = PA_WCHAR; 409 break; 410 case L's': 411 spec->data_arg_type = PA_STRING; 412 break; 413 case L'S': 414 spec->data_arg_type = PA_WSTRING; 415 break; 416 case L'p': 417 spec->data_arg_type = PA_POINTER; 418 break; 419 case L'n': 420 spec->data_arg_type = PA_INT|PA_FLAG_PTR; 421 break; 422 423 case L'm': 424 default: 425 /* An unknown spec will consume no args. */ 426 spec->ndata_args = 0; 427 break; 428 } 429 } 430 431 if (spec->data_arg == -1 && spec->ndata_args > 0) 432 { 433 /* There are args consumed, but no positional spec. Use the 434 next sequential arg position. */ 435 spec->data_arg = posn; 436 nargs += spec->ndata_args; 437 } 438 439 if (spec->info.spec == L'\0') 440 /* Format ended before this spec was complete. */ 441 spec->end_of_fmt = spec->next_fmt = format - 1; 442 else 443 { 444 /* Find the next format spec. */ 445 spec->end_of_fmt = format; 446 #ifdef COMPILE_WPRINTF 447 spec->next_fmt = find_spec (format); 448 #else 449 spec->next_fmt = find_spec (format, ps); 450 #endif 451 } 452 453 return nargs; 454 } 455