1 /* 2 * Copyright 2006-2009 Haiku, Inc. All Rights Reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Ingo Weinhold <bonefish@cs.tu-berlin.de> 7 * Stephan Aßmus <superstippi@gmx.de> 8 */ 9 10 #include <ExpressionParser.h> 11 12 #include <ctype.h> 13 #include <math.h> 14 #include <stdio.h> 15 #include <stdlib.h> 16 #include <string.h> 17 18 #include <m_apm.h> 19 20 21 static const int32 kMaxDecimalPlaces = 32; 22 23 enum { 24 TOKEN_IDENTIFIER = 0, 25 26 TOKEN_CONSTANT, 27 28 TOKEN_PLUS, 29 TOKEN_MINUS, 30 31 TOKEN_STAR, 32 TOKEN_SLASH, 33 TOKEN_MODULO, 34 35 TOKEN_POWER, 36 TOKEN_FACTORIAL, 37 38 TOKEN_OPENING_BRACKET, 39 TOKEN_CLOSING_BRACKET, 40 41 TOKEN_AND, 42 TOKEN_OR, 43 TOKEN_NOT, 44 45 TOKEN_NONE, 46 TOKEN_END_OF_LINE 47 }; 48 49 struct Token { 50 Token() 51 : string(""), 52 type(TOKEN_NONE), 53 value(0), 54 position(0) 55 { 56 } 57 58 Token(const Token& other) 59 : string(other.string), 60 type(other.type), 61 value(other.value), 62 position(other.position) 63 { 64 } 65 66 Token(const char* string, int32 length, int32 position, int32 type) 67 : string(string, length), 68 type(type), 69 value(0), 70 position(position) 71 { 72 } 73 74 Token& operator=(const Token& other) 75 { 76 string = other.string; 77 type = other.type; 78 value = other.value; 79 position = other.position; 80 return *this; 81 } 82 83 BString string; 84 int32 type; 85 MAPM value; 86 87 int32 position; 88 }; 89 90 91 class Tokenizer { 92 public: 93 Tokenizer() 94 : fString(""), 95 fCurrentChar(NULL), 96 fCurrentToken(), 97 fReuseToken(false), 98 fHexSupport(false) 99 { 100 } 101 102 void SetSupportHexInput(bool enabled) 103 { 104 fHexSupport = enabled; 105 } 106 107 void SetTo(const char* string) 108 { 109 fString = string; 110 fCurrentChar = fString.String(); 111 fCurrentToken = Token(); 112 fReuseToken = false; 113 } 114 115 const Token& NextToken() 116 { 117 if (fCurrentToken.type == TOKEN_END_OF_LINE) 118 return fCurrentToken; 119 120 if (fReuseToken) { 121 fReuseToken = false; 122 //printf("next token (recycled): '%s'\n", fCurrentToken.string.String()); 123 return fCurrentToken; 124 } 125 126 while (*fCurrentChar != 0 && isspace(*fCurrentChar)) 127 fCurrentChar++; 128 129 if (*fCurrentChar == 0) 130 return fCurrentToken = Token("", 0, _CurrentPos(), TOKEN_END_OF_LINE); 131 132 bool decimal = *fCurrentChar == '.' || *fCurrentChar == ','; 133 134 if (decimal || isdigit(*fCurrentChar)) { 135 if (fHexSupport && *fCurrentChar == '0' && fCurrentChar[1] == 'x') 136 return _ParseHexNumber(); 137 138 BString temp; 139 140 const char* begin = fCurrentChar; 141 142 // optional digits before the comma 143 while (isdigit(*fCurrentChar)) { 144 temp << *fCurrentChar; 145 fCurrentChar++; 146 } 147 148 // optional post comma part 149 // (required if there are no digits before the comma) 150 if (*fCurrentChar == '.' || *fCurrentChar == ',') { 151 temp << '.'; 152 fCurrentChar++; 153 154 // optional post comma digits 155 while (isdigit(*fCurrentChar)) { 156 temp << *fCurrentChar; 157 fCurrentChar++; 158 } 159 } 160 161 // optional exponent part 162 if (*fCurrentChar == 'e' || *fCurrentChar == 'E') { 163 temp << *fCurrentChar; 164 fCurrentChar++; 165 166 // optional exponent sign 167 if (*fCurrentChar == '+' || *fCurrentChar == '-') { 168 temp << *fCurrentChar; 169 fCurrentChar++; 170 } 171 172 // required exponent digits 173 if (!isdigit(*fCurrentChar)) { 174 throw ParseException("missing exponent in constant", 175 fCurrentChar - begin); 176 } 177 178 while (isdigit(*fCurrentChar)) { 179 temp << *fCurrentChar; 180 fCurrentChar++; 181 } 182 } 183 184 int32 length = fCurrentChar - begin; 185 BString test = temp; 186 test << "&_"; 187 double value; 188 char t[2]; 189 int32 matches = sscanf(test.String(), "%lf&%s", &value, t); 190 if (matches != 2) { 191 throw ParseException("error in constant", 192 _CurrentPos() - length); 193 } 194 195 fCurrentToken = Token(begin, length, _CurrentPos() - length, 196 TOKEN_CONSTANT); 197 fCurrentToken.value = temp.String(); 198 199 } else if (isalpha(*fCurrentChar) && *fCurrentChar != 'x') { 200 const char* begin = fCurrentChar; 201 while (*fCurrentChar != 0 && (isalpha(*fCurrentChar) 202 || isdigit(*fCurrentChar))) { 203 fCurrentChar++; 204 } 205 int32 length = fCurrentChar - begin; 206 fCurrentToken = Token(begin, length, _CurrentPos() - length, 207 TOKEN_IDENTIFIER); 208 209 } else { 210 int32 type = TOKEN_NONE; 211 212 switch (*fCurrentChar) { 213 case '+': 214 type = TOKEN_PLUS; 215 break; 216 case '-': 217 type = TOKEN_MINUS; 218 break; 219 case '*': 220 type = TOKEN_STAR; 221 break; 222 case '/': 223 case '\\': 224 case ':': 225 type = TOKEN_SLASH; 226 break; 227 228 case '%': 229 type = TOKEN_MODULO; 230 break; 231 case '^': 232 type = TOKEN_POWER; 233 break; 234 case '!': 235 type = TOKEN_FACTORIAL; 236 break; 237 238 case '(': 239 type = TOKEN_OPENING_BRACKET; 240 break; 241 case ')': 242 type = TOKEN_CLOSING_BRACKET; 243 break; 244 245 case '&': 246 type = TOKEN_AND; 247 break; 248 case '|': 249 type = TOKEN_OR; 250 break; 251 case '~': 252 type = TOKEN_NOT; 253 break; 254 255 case '\n': 256 type = TOKEN_END_OF_LINE; 257 break; 258 259 case 'x': 260 if (!fHexSupport) { 261 type = TOKEN_STAR; 262 break; 263 } 264 // fall through 265 266 default: 267 throw ParseException("unexpected character", _CurrentPos()); 268 } 269 fCurrentToken = Token(fCurrentChar, 1, _CurrentPos(), type); 270 fCurrentChar++; 271 } 272 273 //printf("next token: '%s'\n", fCurrentToken.string.String()); 274 return fCurrentToken; 275 } 276 277 void RewindToken() 278 { 279 fReuseToken = true; 280 } 281 282 private: 283 static bool _IsHexDigit(char c) 284 { 285 return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); 286 } 287 288 Token& _ParseHexNumber() 289 { 290 const char* begin = fCurrentChar; 291 fCurrentChar += 2; 292 // skip "0x" 293 294 if (!_IsHexDigit(*fCurrentChar)) 295 throw ParseException("expected hex digit", _CurrentPos()); 296 297 fCurrentChar++; 298 while (_IsHexDigit(*fCurrentChar)) 299 fCurrentChar++; 300 301 int32 length = fCurrentChar - begin; 302 fCurrentToken = Token(begin, length, _CurrentPos() - length, 303 TOKEN_CONSTANT); 304 305 // MAPM has no conversion from long long, so we need to improvise. 306 uint64 value = strtoll(fCurrentToken.string.String(), NULL, 0); 307 if (value <= 0x7fffffff) { 308 fCurrentToken.value = (long)value; 309 } else { 310 fCurrentToken.value = (int)(value >> 60); 311 fCurrentToken.value *= 1 << 30; 312 fCurrentToken.value += (int)((value >> 30) & 0x3fffffff); 313 fCurrentToken.value *= 1 << 30; 314 fCurrentToken.value += (int)(value& 0x3fffffff); 315 } 316 317 return fCurrentToken; 318 } 319 320 int32 _CurrentPos() const 321 { 322 return fCurrentChar - fString.String(); 323 } 324 325 BString fString; 326 const char* fCurrentChar; 327 Token fCurrentToken; 328 bool fReuseToken; 329 bool fHexSupport; 330 }; 331 332 333 ExpressionParser::ExpressionParser() 334 : fTokenizer(new Tokenizer()) 335 { 336 } 337 338 339 ExpressionParser::~ExpressionParser() 340 { 341 delete fTokenizer; 342 } 343 344 345 void 346 ExpressionParser::SetSupportHexInput(bool enabled) 347 { 348 fTokenizer->SetSupportHexInput(enabled); 349 } 350 351 352 BString 353 ExpressionParser::Evaluate(const char* expressionString) 354 { 355 fTokenizer->SetTo(expressionString); 356 357 MAPM value = _ParseBinary(); 358 Token token = fTokenizer->NextToken(); 359 if (token.type != TOKEN_END_OF_LINE) 360 throw ParseException("parse error", token.position); 361 362 if (value == 0) 363 return BString("0"); 364 365 char* buffer = value.toFixPtStringExp(kMaxDecimalPlaces, '.', 0, 0); 366 if (buffer == NULL) 367 throw ParseException("out of memory", 0); 368 369 // remove surplus zeros 370 int32 lastChar = strlen(buffer) - 1; 371 if (strchr(buffer, '.')) { 372 while (buffer[lastChar] == '0') 373 lastChar--; 374 if (buffer[lastChar] == '.') 375 lastChar--; 376 } 377 378 BString result(buffer, lastChar + 1); 379 free(buffer); 380 return result; 381 } 382 383 384 int64 385 ExpressionParser::EvaluateToInt64(const char* expressionString) 386 { 387 fTokenizer->SetTo(expressionString); 388 389 MAPM value = _ParseBinary(); 390 Token token = fTokenizer->NextToken(); 391 if (token.type != TOKEN_END_OF_LINE) 392 throw ParseException("parse error", token.position); 393 394 char buffer[128]; 395 value.toIntegerString(buffer); 396 397 return strtoll(buffer, NULL, 0); 398 } 399 400 401 double 402 ExpressionParser::EvaluateToDouble(const char* expressionString) 403 { 404 fTokenizer->SetTo(expressionString); 405 406 MAPM value = _ParseBinary(); 407 Token token = fTokenizer->NextToken(); 408 if (token.type != TOKEN_END_OF_LINE) 409 throw ParseException("parse error", token.position); 410 411 char buffer[1024]; 412 value.toString(buffer, sizeof(buffer) - 4); 413 414 return strtod(buffer, NULL); 415 } 416 417 418 MAPM 419 ExpressionParser::_ParseBinary() 420 { 421 return _ParseSum(); 422 // binary operation appearantly not supported by m_apm library, 423 // should not be too hard to implement though.... 424 425 // double value = _ParseSum(); 426 // 427 // while (true) { 428 // Token token = fTokenizer->NextToken(); 429 // switch (token.type) { 430 // case TOKEN_AND: 431 // value = (uint64)value & (uint64)_ParseSum(); 432 // break; 433 // case TOKEN_OR: 434 // value = (uint64)value | (uint64)_ParseSum(); 435 // break; 436 // 437 // default: 438 // fTokenizer->RewindToken(); 439 // return value; 440 // } 441 // } 442 } 443 444 445 MAPM 446 ExpressionParser::_ParseSum() 447 { 448 // TODO: check isnan()... 449 MAPM value = _ParseProduct(); 450 451 while (true) { 452 Token token = fTokenizer->NextToken(); 453 switch (token.type) { 454 case TOKEN_PLUS: 455 value = value + _ParseProduct(); 456 break; 457 case TOKEN_MINUS: 458 value = value - _ParseProduct(); 459 break; 460 461 default: 462 fTokenizer->RewindToken(); 463 return _ParseFactorial(value); 464 } 465 } 466 } 467 468 469 MAPM 470 ExpressionParser::_ParseProduct() 471 { 472 // TODO: check isnan()... 473 MAPM value = _ParsePower(); 474 475 while (true) { 476 Token token = fTokenizer->NextToken(); 477 switch (token.type) { 478 case TOKEN_STAR: 479 value = value * _ParsePower(); 480 break; 481 case TOKEN_SLASH: { 482 MAPM rhs = _ParsePower(); 483 if (rhs == MAPM(0)) 484 throw ParseException("division by zero", token.position); 485 value = value / rhs; 486 break; 487 } 488 case TOKEN_MODULO: { 489 MAPM rhs = _ParsePower(); 490 if (rhs == MAPM(0)) 491 throw ParseException("modulo by zero", token.position); 492 value = value % rhs; 493 break; 494 } 495 496 default: 497 fTokenizer->RewindToken(); 498 return _ParseFactorial(value); 499 } 500 } 501 } 502 503 504 MAPM 505 ExpressionParser::_ParsePower() 506 { 507 MAPM value = _ParseUnary(); 508 509 while (true) { 510 Token token = fTokenizer->NextToken(); 511 if (token.type != TOKEN_POWER) { 512 fTokenizer->RewindToken(); 513 return _ParseFactorial(value); 514 } 515 value = value.pow(_ParseUnary()); 516 } 517 } 518 519 520 MAPM 521 ExpressionParser::_ParseUnary() 522 { 523 Token token = fTokenizer->NextToken(); 524 if (token.type == TOKEN_END_OF_LINE) 525 throw ParseException("unexpected end of expression", token.position); 526 527 switch (token.type) { 528 case TOKEN_PLUS: 529 return _ParseUnary(); 530 case TOKEN_MINUS: 531 return -_ParseUnary(); 532 // TODO: Implement ! 533 // case TOKEN_NOT: 534 // return ~(uint64)_ParseUnary(); 535 536 case TOKEN_IDENTIFIER: 537 return _ParseFunction(token); 538 539 default: 540 fTokenizer->RewindToken(); 541 return _ParseAtom(); 542 } 543 544 return MAPM(0); 545 } 546 547 548 struct Function { 549 const char* name; 550 int argumentCount; 551 void* function; 552 MAPM value; 553 }; 554 555 556 void 557 ExpressionParser::_InitArguments(MAPM values[], int32 argumentCount) 558 { 559 _EatToken(TOKEN_OPENING_BRACKET); 560 561 for (int32 i = 0; i < argumentCount; i++) 562 values[i] = _ParseBinary(); 563 564 _EatToken(TOKEN_CLOSING_BRACKET); 565 } 566 567 568 MAPM 569 ExpressionParser::_ParseFunction(const Token& token) 570 { 571 if (strcasecmp("e", token.string.String()) == 0) 572 return _ParseFactorial(MAPM(MM_E)); 573 else if (strcasecmp("pi", token.string.String()) == 0) 574 return _ParseFactorial(MAPM(MM_PI)); 575 576 // hard coded cases for different count of arguments 577 // supports functions with 3 arguments at most 578 579 MAPM values[3]; 580 581 if (strcasecmp("abs", token.string.String()) == 0) { 582 _InitArguments(values, 1); 583 return _ParseFactorial(values[0].abs()); 584 } else if (strcasecmp("acos", token.string.String()) == 0) { 585 _InitArguments(values, 1); 586 if (values[0] < -1 || values[0] > 1) 587 throw ParseException("out of domain", token.position); 588 return _ParseFactorial(values[0].acos()); 589 } else if (strcasecmp("asin", token.string.String()) == 0) { 590 _InitArguments(values, 1); 591 if (values[0] < -1 || values[0] > 1) 592 throw ParseException("out of domain", token.position); 593 return _ParseFactorial(values[0].asin()); 594 } else if (strcasecmp("atan", token.string.String()) == 0) { 595 _InitArguments(values, 1); 596 return _ParseFactorial(values[0].atan()); 597 } else if (strcasecmp("atan2", token.string.String()) == 0) { 598 _InitArguments(values, 2); 599 return _ParseFactorial(values[0].atan2(values[1])); 600 } else if (strcasecmp("cbrt", token.string.String()) == 0) { 601 _InitArguments(values, 1); 602 return _ParseFactorial(values[0].cbrt()); 603 } else if (strcasecmp("ceil", token.string.String()) == 0) { 604 _InitArguments(values, 1); 605 return _ParseFactorial(values[0].ceil()); 606 } else if (strcasecmp("cos", token.string.String()) == 0) { 607 _InitArguments(values, 1); 608 return _ParseFactorial(values[0].cos()); 609 } else if (strcasecmp("cosh", token.string.String()) == 0) { 610 _InitArguments(values, 1); 611 return _ParseFactorial(values[0].cosh()); 612 } else if (strcasecmp("exp", token.string.String()) == 0) { 613 _InitArguments(values, 1); 614 return _ParseFactorial(values[0].exp()); 615 } else if (strcasecmp("floor", token.string.String()) == 0) { 616 _InitArguments(values, 1); 617 return _ParseFactorial(values[0].floor()); 618 } else if (strcasecmp("ln", token.string.String()) == 0) { 619 _InitArguments(values, 1); 620 if (values[0] <= 0) 621 throw ParseException("out of domain", token.position); 622 return _ParseFactorial(values[0].log()); 623 } else if (strcasecmp("log", token.string.String()) == 0) { 624 _InitArguments(values, 1); 625 if (values[0] <= 0) 626 throw ParseException("out of domain", token.position); 627 return _ParseFactorial(values[0].log10()); 628 } else if (strcasecmp("pow", token.string.String()) == 0) { 629 _InitArguments(values, 2); 630 return _ParseFactorial(values[0].pow(values[1])); 631 } else if (strcasecmp("sin", token.string.String()) == 0) { 632 _InitArguments(values, 1); 633 return _ParseFactorial(values[0].sin()); 634 } else if (strcasecmp("sinh", token.string.String()) == 0) { 635 _InitArguments(values, 1); 636 return _ParseFactorial(values[0].sinh()); 637 } else if (strcasecmp("sqrt", token.string.String()) == 0) { 638 _InitArguments(values, 1); 639 if (values[0] < 0) 640 throw ParseException("out of domain", token.position); 641 return _ParseFactorial(values[0].sqrt()); 642 } else if (strcasecmp("tan", token.string.String()) == 0) { 643 _InitArguments(values, 1); 644 return _ParseFactorial(values[0].tan()); 645 } else if (strcasecmp("tanh", token.string.String()) == 0) { 646 _InitArguments(values, 1); 647 return _ParseFactorial(values[0].tanh()); 648 } 649 650 throw ParseException("unknown identifier", token.position); 651 } 652 653 654 MAPM 655 ExpressionParser::_ParseAtom() 656 { 657 Token token = fTokenizer->NextToken(); 658 if (token.type == TOKEN_END_OF_LINE) 659 throw ParseException("unexpected end of expression", token.position); 660 661 if (token.type == TOKEN_CONSTANT) 662 return _ParseFactorial(token.value); 663 664 fTokenizer->RewindToken(); 665 666 _EatToken(TOKEN_OPENING_BRACKET); 667 668 MAPM value = _ParseBinary(); 669 670 _EatToken(TOKEN_CLOSING_BRACKET); 671 672 return _ParseFactorial(value); 673 } 674 675 676 MAPM 677 ExpressionParser::_ParseFactorial(MAPM value) 678 { 679 if (fTokenizer->NextToken().type == TOKEN_FACTORIAL) { 680 fTokenizer->RewindToken(); 681 _EatToken(TOKEN_FACTORIAL); 682 return value.factorial(); 683 } 684 685 fTokenizer->RewindToken(); 686 return value; 687 } 688 689 690 void 691 ExpressionParser::_EatToken(int32 type) 692 { 693 Token token = fTokenizer->NextToken(); 694 if (token.type != type) { 695 BString temp("expected '"); 696 temp << (char)type << "' got '" << token.string << "'"; 697 throw ParseException(temp.String(), token.position); 698 } 699 } 700 701