1 /* 2 * Copyright 2006-2012 Haiku, Inc. All Rights Reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Stephan Aßmus <superstippi@gmx.de> 7 * John Scipione <jscipione@gmail.com> 8 * Ingo Weinhold <bonefish@cs.tu-berlin.de> 9 */ 10 11 #include <ExpressionParser.h> 12 13 #include <ctype.h> 14 #include <math.h> 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <strings.h> 18 19 #include <m_apm.h> 20 21 22 static const int32 kMaxDecimalPlaces = 32; 23 24 25 enum { 26 TOKEN_NONE = 0, 27 TOKEN_IDENTIFIER, 28 TOKEN_CONSTANT, 29 30 TOKEN_END_OF_LINE = '\n', 31 32 TOKEN_PLUS = '+', 33 TOKEN_MINUS = '-', 34 35 TOKEN_STAR = '*', 36 TOKEN_SLASH = '/', 37 TOKEN_MODULO = '%', 38 39 TOKEN_POWER = '^', 40 TOKEN_FACTORIAL = '!', 41 42 TOKEN_OPENING_BRACKET = '(', 43 TOKEN_CLOSING_BRACKET = ')', 44 45 TOKEN_AND = '&', 46 TOKEN_OR = '|', 47 TOKEN_NOT = '~' 48 }; 49 50 51 struct ExpressionParser::Token { 52 Token() 53 : string(""), 54 type(TOKEN_NONE), 55 value(0), 56 position(0) 57 { 58 } 59 60 Token(const Token& other) 61 : string(other.string), 62 type(other.type), 63 value(other.value), 64 position(other.position) 65 { 66 } 67 68 Token(const char* string, int32 length, int32 position, int32 type) 69 : string(string, length), 70 type(type), 71 value(0), 72 position(position) 73 { 74 } 75 76 Token& operator=(const Token& other) 77 { 78 string = other.string; 79 type = other.type; 80 value = other.value; 81 position = other.position; 82 return *this; 83 } 84 85 BString string; 86 int32 type; 87 MAPM value; 88 89 int32 position; 90 }; 91 92 93 class ExpressionParser::Tokenizer { 94 public: 95 Tokenizer() 96 : fString(""), 97 fCurrentChar(NULL), 98 fCurrentToken(), 99 fReuseToken(false), 100 fHexSupport(false) 101 { 102 } 103 104 void SetSupportHexInput(bool enabled) 105 { 106 fHexSupport = enabled; 107 } 108 109 void SetTo(const char* string) 110 { 111 fString = string; 112 fCurrentChar = fString.String(); 113 fCurrentToken = Token(); 114 fReuseToken = false; 115 } 116 117 const Token& NextToken() 118 { 119 if (fCurrentToken.type == TOKEN_END_OF_LINE) 120 return fCurrentToken; 121 122 if (fReuseToken) { 123 fReuseToken = false; 124 //printf("next token (recycled): '%s'\n", fCurrentToken.string.String()); 125 return fCurrentToken; 126 } 127 128 while (*fCurrentChar != 0 && isspace(*fCurrentChar)) 129 fCurrentChar++; 130 131 if (*fCurrentChar == 0) 132 return fCurrentToken = Token("", 0, _CurrentPos(), TOKEN_END_OF_LINE); 133 134 bool decimal = *fCurrentChar == '.' || *fCurrentChar == ','; 135 136 if (decimal || isdigit(*fCurrentChar)) { 137 if (fHexSupport && *fCurrentChar == '0' && fCurrentChar[1] == 'x') 138 return _ParseHexNumber(); 139 140 BString temp; 141 142 const char* begin = fCurrentChar; 143 144 // optional digits before the comma 145 while (isdigit(*fCurrentChar)) { 146 temp << *fCurrentChar; 147 fCurrentChar++; 148 } 149 150 // optional post comma part 151 // (required if there are no digits before the comma) 152 if (*fCurrentChar == '.' || *fCurrentChar == ',') { 153 temp << '.'; 154 fCurrentChar++; 155 156 // optional post comma digits 157 while (isdigit(*fCurrentChar)) { 158 temp << *fCurrentChar; 159 fCurrentChar++; 160 } 161 } 162 163 // optional exponent part 164 if (*fCurrentChar == 'E') { 165 temp << *fCurrentChar; 166 fCurrentChar++; 167 168 // optional exponent sign 169 if (*fCurrentChar == '+' || *fCurrentChar == '-') { 170 temp << *fCurrentChar; 171 fCurrentChar++; 172 } 173 174 // required exponent digits 175 if (!isdigit(*fCurrentChar)) { 176 throw ParseException("missing exponent in constant", 177 fCurrentChar - begin); 178 } 179 180 while (isdigit(*fCurrentChar)) { 181 temp << *fCurrentChar; 182 fCurrentChar++; 183 } 184 } 185 186 int32 length = fCurrentChar - begin; 187 BString test = temp; 188 test << "&_"; 189 double value; 190 char t[2]; 191 int32 matches = sscanf(test.String(), "%lf&%s", &value, t); 192 if (matches != 2) { 193 throw ParseException("error in constant", 194 _CurrentPos() - length); 195 } 196 197 fCurrentToken = Token(begin, length, _CurrentPos() - length, 198 TOKEN_CONSTANT); 199 fCurrentToken.value = temp.String(); 200 } else if (isalpha(*fCurrentChar) && *fCurrentChar != 'x') { 201 const char* begin = fCurrentChar; 202 while (*fCurrentChar != 0 && (isalpha(*fCurrentChar) 203 || isdigit(*fCurrentChar))) { 204 fCurrentChar++; 205 } 206 int32 length = fCurrentChar - begin; 207 fCurrentToken = Token(begin, length, _CurrentPos() - length, 208 TOKEN_IDENTIFIER); 209 } else if (strncmp(fCurrentChar, "π", 2) == 0) { 210 fCurrentToken = Token(fCurrentChar, 2, _CurrentPos() - 1, 211 TOKEN_IDENTIFIER); 212 fCurrentChar += 2; 213 } else { 214 int32 type = TOKEN_NONE; 215 216 switch (*fCurrentChar) { 217 case TOKEN_PLUS: 218 case TOKEN_MINUS: 219 case TOKEN_STAR: 220 case TOKEN_SLASH: 221 case TOKEN_MODULO: 222 case TOKEN_POWER: 223 case TOKEN_FACTORIAL: 224 case TOKEN_OPENING_BRACKET: 225 case TOKEN_CLOSING_BRACKET: 226 case TOKEN_AND: 227 case TOKEN_OR: 228 case TOKEN_NOT: 229 case TOKEN_END_OF_LINE: 230 type = *fCurrentChar; 231 break; 232 233 case '\\': 234 case ':': 235 type = TOKEN_SLASH; 236 break; 237 238 case 'x': 239 if (!fHexSupport) { 240 type = TOKEN_STAR; 241 break; 242 } 243 // fall through 244 245 default: 246 throw ParseException("unexpected character", _CurrentPos()); 247 } 248 fCurrentToken = Token(fCurrentChar, 1, _CurrentPos(), type); 249 fCurrentChar++; 250 } 251 252 //printf("next token: '%s'\n", fCurrentToken.string.String()); 253 return fCurrentToken; 254 } 255 256 void RewindToken() 257 { 258 fReuseToken = true; 259 } 260 261 private: 262 static bool _IsHexDigit(char c) 263 { 264 return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); 265 } 266 267 Token& _ParseHexNumber() 268 { 269 const char* begin = fCurrentChar; 270 fCurrentChar += 2; 271 // skip "0x" 272 273 if (!_IsHexDigit(*fCurrentChar)) 274 throw ParseException("expected hex digit", _CurrentPos()); 275 276 fCurrentChar++; 277 while (_IsHexDigit(*fCurrentChar)) 278 fCurrentChar++; 279 280 int32 length = fCurrentChar - begin; 281 fCurrentToken = Token(begin, length, _CurrentPos() - length, 282 TOKEN_CONSTANT); 283 284 // MAPM has no conversion from long long, so we need to improvise. 285 uint64 value = strtoll(fCurrentToken.string.String(), NULL, 0); 286 if (value <= 0x7fffffff) { 287 fCurrentToken.value = (long)value; 288 } else { 289 fCurrentToken.value = (int)(value >> 60); 290 fCurrentToken.value *= 1 << 30; 291 fCurrentToken.value += (int)((value >> 30) & 0x3fffffff); 292 fCurrentToken.value *= 1 << 30; 293 fCurrentToken.value += (int)(value& 0x3fffffff); 294 } 295 296 return fCurrentToken; 297 } 298 299 int32 _CurrentPos() const 300 { 301 return fCurrentChar - fString.String(); 302 } 303 304 BString fString; 305 const char* fCurrentChar; 306 Token fCurrentToken; 307 bool fReuseToken; 308 bool fHexSupport; 309 }; 310 311 312 ExpressionParser::ExpressionParser() 313 : fTokenizer(new Tokenizer()), 314 fDegreeMode(false) 315 { 316 } 317 318 319 ExpressionParser::~ExpressionParser() 320 { 321 delete fTokenizer; 322 } 323 324 325 bool 326 ExpressionParser::DegreeMode() 327 { 328 return fDegreeMode; 329 } 330 331 332 void 333 ExpressionParser::SetDegreeMode(bool degrees) 334 { 335 fDegreeMode = degrees; 336 } 337 338 339 void 340 ExpressionParser::SetSupportHexInput(bool enabled) 341 { 342 fTokenizer->SetSupportHexInput(enabled); 343 } 344 345 346 BString 347 ExpressionParser::Evaluate(const char* expressionString) 348 { 349 fTokenizer->SetTo(expressionString); 350 351 MAPM value = _ParseBinary(); 352 Token token = fTokenizer->NextToken(); 353 if (token.type != TOKEN_END_OF_LINE) 354 throw ParseException("parse error", token.position); 355 356 if (value == 0) 357 return BString("0"); 358 359 char* buffer = value.toFixPtStringExp(kMaxDecimalPlaces, '.', 0, 0); 360 if (buffer == NULL) 361 throw ParseException("out of memory", 0); 362 363 // remove surplus zeros 364 int32 lastChar = strlen(buffer) - 1; 365 if (strchr(buffer, '.')) { 366 while (buffer[lastChar] == '0') 367 lastChar--; 368 if (buffer[lastChar] == '.') 369 lastChar--; 370 } 371 372 BString result(buffer, lastChar + 1); 373 free(buffer); 374 return result; 375 } 376 377 378 int64 379 ExpressionParser::EvaluateToInt64(const char* expressionString) 380 { 381 fTokenizer->SetTo(expressionString); 382 383 MAPM value = _ParseBinary(); 384 Token token = fTokenizer->NextToken(); 385 if (token.type != TOKEN_END_OF_LINE) 386 throw ParseException("parse error", token.position); 387 388 char buffer[128]; 389 value.toIntegerString(buffer); 390 391 return strtoll(buffer, NULL, 0); 392 } 393 394 395 double 396 ExpressionParser::EvaluateToDouble(const char* expressionString) 397 { 398 fTokenizer->SetTo(expressionString); 399 400 MAPM value = _ParseBinary(); 401 Token token = fTokenizer->NextToken(); 402 if (token.type != TOKEN_END_OF_LINE) 403 throw ParseException("parse error", token.position); 404 405 char buffer[1024]; 406 value.toString(buffer, sizeof(buffer) - 4); 407 408 return strtod(buffer, NULL); 409 } 410 411 412 MAPM 413 ExpressionParser::_ParseBinary() 414 { 415 return _ParseSum(); 416 // binary operation appearantly not supported by m_apm library, 417 // should not be too hard to implement though.... 418 419 // double value = _ParseSum(); 420 // 421 // while (true) { 422 // Token token = fTokenizer->NextToken(); 423 // switch (token.type) { 424 // case TOKEN_AND: 425 // value = (uint64)value & (uint64)_ParseSum(); 426 // break; 427 // case TOKEN_OR: 428 // value = (uint64)value | (uint64)_ParseSum(); 429 // break; 430 // 431 // default: 432 // fTokenizer->RewindToken(); 433 // return value; 434 // } 435 // } 436 } 437 438 439 MAPM 440 ExpressionParser::_ParseSum() 441 { 442 // TODO: check isnan()... 443 MAPM value = _ParseProduct(); 444 445 while (true) { 446 Token token = fTokenizer->NextToken(); 447 switch (token.type) { 448 case TOKEN_PLUS: 449 value = value + _ParseProduct(); 450 break; 451 case TOKEN_MINUS: 452 value = value - _ParseProduct(); 453 break; 454 455 default: 456 fTokenizer->RewindToken(); 457 return _ParseFactorial(value); 458 } 459 } 460 } 461 462 463 MAPM 464 ExpressionParser::_ParseProduct() 465 { 466 // TODO: check isnan()... 467 MAPM value = _ParsePower(); 468 469 while (true) { 470 Token token = fTokenizer->NextToken(); 471 switch (token.type) { 472 case TOKEN_STAR: 473 value = value * _ParsePower(); 474 break; 475 case TOKEN_SLASH: { 476 MAPM rhs = _ParsePower(); 477 if (rhs == MAPM(0)) 478 throw ParseException("division by zero", token.position); 479 value = value / rhs; 480 break; 481 } 482 case TOKEN_MODULO: { 483 MAPM rhs = _ParsePower(); 484 if (rhs == MAPM(0)) 485 throw ParseException("modulo by zero", token.position); 486 value = value % rhs; 487 break; 488 } 489 490 default: 491 fTokenizer->RewindToken(); 492 return _ParseFactorial(value); 493 } 494 } 495 } 496 497 498 MAPM 499 ExpressionParser::_ParsePower() 500 { 501 MAPM value = _ParseUnary(); 502 503 while (true) { 504 Token token = fTokenizer->NextToken(); 505 if (token.type != TOKEN_POWER) { 506 fTokenizer->RewindToken(); 507 return _ParseFactorial(value); 508 } 509 value = value.pow(_ParseUnary()); 510 } 511 } 512 513 514 MAPM 515 ExpressionParser::_ParseUnary() 516 { 517 Token token = fTokenizer->NextToken(); 518 if (token.type == TOKEN_END_OF_LINE) 519 throw ParseException("unexpected end of expression", token.position); 520 521 switch (token.type) { 522 case TOKEN_PLUS: 523 return _ParseUnary(); 524 case TOKEN_MINUS: 525 return -_ParseUnary(); 526 // TODO: Implement ! 527 // case TOKEN_NOT: 528 // return ~(uint64)_ParseUnary(); 529 530 case TOKEN_IDENTIFIER: 531 return _ParseFunction(token); 532 533 default: 534 fTokenizer->RewindToken(); 535 return _ParseAtom(); 536 } 537 538 return MAPM(0); 539 } 540 541 542 struct Function { 543 const char* name; 544 int argumentCount; 545 void* function; 546 MAPM value; 547 }; 548 549 550 void 551 ExpressionParser::_InitArguments(MAPM values[], int32 argumentCount) 552 { 553 _EatToken(TOKEN_OPENING_BRACKET); 554 555 for (int32 i = 0; i < argumentCount; i++) 556 values[i] = _ParseBinary(); 557 558 _EatToken(TOKEN_CLOSING_BRACKET); 559 } 560 561 562 MAPM 563 ExpressionParser::_ParseFunction(const Token& token) 564 { 565 if (token.string == "e") 566 return _ParseFactorial(MAPM(MM_E)); 567 else if (token.string.ICompare("pi") == 0 || token.string == "π") 568 return _ParseFactorial(MAPM(MM_PI)); 569 570 // hard coded cases for different count of arguments 571 // supports functions with 3 arguments at most 572 573 MAPM values[3]; 574 575 if (strcasecmp("abs", token.string.String()) == 0) { 576 _InitArguments(values, 1); 577 return _ParseFactorial(values[0].abs()); 578 } else if (strcasecmp("acos", token.string.String()) == 0) { 579 _InitArguments(values, 1); 580 if (fDegreeMode) 581 values[0] = values[0] * MM_PI / 180; 582 583 if (values[0] < -1 || values[0] > 1) 584 throw ParseException("out of domain", token.position); 585 586 return _ParseFactorial(values[0].acos()); 587 } else if (strcasecmp("asin", token.string.String()) == 0) { 588 _InitArguments(values, 1); 589 if (fDegreeMode) 590 values[0] = values[0] * MM_PI / 180; 591 592 if (values[0] < -1 || values[0] > 1) 593 throw ParseException("out of domain", token.position); 594 595 return _ParseFactorial(values[0].asin()); 596 } else if (strcasecmp("atan", token.string.String()) == 0) { 597 _InitArguments(values, 1); 598 if (fDegreeMode) 599 values[0] = values[0] * MM_PI / 180; 600 601 return _ParseFactorial(values[0].atan()); 602 } else if (strcasecmp("atan2", token.string.String()) == 0) { 603 _InitArguments(values, 2); 604 605 if (fDegreeMode) { 606 values[0] = values[0] * MM_PI / 180; 607 values[1] = values[1] * MM_PI / 180; 608 } 609 610 return _ParseFactorial(values[0].atan2(values[1])); 611 } else if (strcasecmp("cbrt", token.string.String()) == 0) { 612 _InitArguments(values, 1); 613 return _ParseFactorial(values[0].cbrt()); 614 } else if (strcasecmp("ceil", token.string.String()) == 0) { 615 _InitArguments(values, 1); 616 return _ParseFactorial(values[0].ceil()); 617 } else if (strcasecmp("cos", token.string.String()) == 0) { 618 _InitArguments(values, 1); 619 if (fDegreeMode) 620 values[0] = values[0] * MM_PI / 180; 621 622 return _ParseFactorial(values[0].cos()); 623 } else if (strcasecmp("cosh", token.string.String()) == 0) { 624 _InitArguments(values, 1); 625 // This function always uses radians 626 return _ParseFactorial(values[0].cosh()); 627 } else if (strcasecmp("exp", token.string.String()) == 0) { 628 _InitArguments(values, 1); 629 return _ParseFactorial(values[0].exp()); 630 } else if (strcasecmp("floor", token.string.String()) == 0) { 631 _InitArguments(values, 1); 632 return _ParseFactorial(values[0].floor()); 633 } else if (strcasecmp("ln", token.string.String()) == 0) { 634 _InitArguments(values, 1); 635 if (values[0] <= 0) 636 throw ParseException("out of domain", token.position); 637 638 return _ParseFactorial(values[0].log()); 639 } else if (strcasecmp("log", token.string.String()) == 0) { 640 _InitArguments(values, 1); 641 if (values[0] <= 0) 642 throw ParseException("out of domain", token.position); 643 644 return _ParseFactorial(values[0].log10()); 645 } else if (strcasecmp("pow", token.string.String()) == 0) { 646 _InitArguments(values, 2); 647 return _ParseFactorial(values[0].pow(values[1])); 648 } else if (strcasecmp("sin", token.string.String()) == 0) { 649 _InitArguments(values, 1); 650 if (fDegreeMode) 651 values[0] = values[0] * MM_PI / 180; 652 653 return _ParseFactorial(values[0].sin()); 654 } else if (strcasecmp("sinh", token.string.String()) == 0) { 655 _InitArguments(values, 1); 656 // This function always uses radians 657 return _ParseFactorial(values[0].sinh()); 658 } else if (strcasecmp("sqrt", token.string.String()) == 0) { 659 _InitArguments(values, 1); 660 if (values[0] < 0) 661 throw ParseException("out of domain", token.position); 662 663 return _ParseFactorial(values[0].sqrt()); 664 } else if (strcasecmp("tan", token.string.String()) == 0) { 665 _InitArguments(values, 1); 666 if (fDegreeMode) 667 values[0] = values[0] * MM_PI / 180; 668 669 MAPM divided_by_half_pi = values[0] / MM_HALF_PI; 670 if (divided_by_half_pi.is_integer() && divided_by_half_pi.is_odd()) 671 throw ParseException("out of domain", token.position); 672 673 return _ParseFactorial(values[0].tan()); 674 } else if (strcasecmp("tanh", token.string.String()) == 0) { 675 _InitArguments(values, 1); 676 // This function always uses radians 677 return _ParseFactorial(values[0].tanh()); 678 } 679 680 throw ParseException("unknown identifier", token.position); 681 } 682 683 684 MAPM 685 ExpressionParser::_ParseAtom() 686 { 687 Token token = fTokenizer->NextToken(); 688 if (token.type == TOKEN_END_OF_LINE) 689 throw ParseException("unexpected end of expression", token.position); 690 691 if (token.type == TOKEN_CONSTANT) 692 return _ParseFactorial(token.value); 693 694 fTokenizer->RewindToken(); 695 696 _EatToken(TOKEN_OPENING_BRACKET); 697 698 MAPM value = _ParseBinary(); 699 700 _EatToken(TOKEN_CLOSING_BRACKET); 701 702 return _ParseFactorial(value); 703 } 704 705 706 MAPM 707 ExpressionParser::_ParseFactorial(MAPM value) 708 { 709 if (fTokenizer->NextToken().type == TOKEN_FACTORIAL) { 710 fTokenizer->RewindToken(); 711 _EatToken(TOKEN_FACTORIAL); 712 if (value < 1000) 713 return value.factorial(); 714 else { 715 // Use Stirling's approximation (9 term expansion) 716 // http://en.wikipedia.org/wiki/Stirling%27s_approximation 717 // http://www.wolframalpha.com/input/?i=stirling%27s+series 718 // all constants must fit in a signed long for MAPM 719 // (LONG_MAX = 2147483647) 720 return value.pow(value) / value.exp() 721 * (MAPM(2) * MAPM(MM_PI) * value).sqrt() 722 * (MAPM(1) + (MAPM(1) / (MAPM(12) * value)) 723 + (MAPM(1) / (MAPM(288) * value.pow(2))) 724 - (MAPM(139) / (MAPM(51840) * value.pow(3))) 725 - (MAPM(571) / (MAPM(2488320) * value.pow(4))) 726 + (MAPM(163879) / (MAPM(209018880) * value.pow(5))) 727 // 2147483647 * 35 + 84869155 = 75246796800 728 + (MAPM(5246819) / ((MAPM(2147483647) * MAPM(35) 729 + MAPM(84869155)) * value.pow(6))) 730 // 2147483647 * 420 + 1018429860 = 902961561600 731 - (MAPM(534703531) / ((MAPM(2147483647) * MAPM(420) 732 + MAPM(1018429860)) * value.pow(7))) 733 // 2147483647 * 2 + 188163965 = 4483131259 734 // 2147483647 * 40366 + 985018798 = 86686309913600 735 - ((MAPM(2147483647) * MAPM(2) + MAPM(188163965)) 736 / ((MAPM(2147483647) * MAPM(40366) + MAPM(985018798)) 737 * value.pow(8))) 738 // 2147483647 * 201287 + 1380758682 = 432261921612371 739 // 2147483647 * 239771232 + 1145740896 740 // = 514904800886784000 741 + ((MAPM(2147483647) * MAPM(201287) + MAPM(1380758682)) 742 / ((MAPM(2147483647) * MAPM(239771232) 743 + MAPM(1145740896)) 744 * value.pow(9)))); 745 } 746 } 747 748 fTokenizer->RewindToken(); 749 return value; 750 } 751 752 753 void 754 ExpressionParser::_EatToken(int32 type) 755 { 756 Token token = fTokenizer->NextToken(); 757 if (token.type != type) { 758 BString expected; 759 switch (type) { 760 case TOKEN_IDENTIFIER: 761 expected = "an identifier"; 762 break; 763 764 case TOKEN_CONSTANT: 765 expected = "a constant"; 766 break; 767 768 case TOKEN_PLUS: 769 case TOKEN_MINUS: 770 case TOKEN_STAR: 771 case TOKEN_MODULO: 772 case TOKEN_POWER: 773 case TOKEN_FACTORIAL: 774 case TOKEN_OPENING_BRACKET: 775 case TOKEN_CLOSING_BRACKET: 776 case TOKEN_AND: 777 case TOKEN_OR: 778 case TOKEN_NOT: 779 expected << "'" << (char)type << "'"; 780 break; 781 782 case TOKEN_SLASH: 783 expected = "'/', '\\', or ':'"; 784 break; 785 786 case TOKEN_END_OF_LINE: 787 expected = "'\\n'"; 788 break; 789 } 790 BString temp; 791 temp << "Expected " << expected.String() << " got '" << token.string << "'"; 792 throw ParseException(temp.String(), token.position); 793 } 794 } 795