1 /* 2 * Copyright 2006-2012 Haiku, Inc. All Rights Reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Stephan Aßmus <superstippi@gmx.de> 7 * John Scipione <jscipione@gmail.com> 8 * Ingo Weinhold <bonefish@cs.tu-berlin.de> 9 */ 10 11 #include <ExpressionParser.h> 12 13 #include <ctype.h> 14 #include <math.h> 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 19 #include <m_apm.h> 20 21 22 static const int32 kMaxDecimalPlaces = 32; 23 24 enum { 25 TOKEN_IDENTIFIER = 0, 26 27 TOKEN_CONSTANT, 28 29 TOKEN_PLUS, 30 TOKEN_MINUS, 31 32 TOKEN_STAR, 33 TOKEN_SLASH, 34 TOKEN_MODULO, 35 36 TOKEN_POWER, 37 TOKEN_FACTORIAL, 38 39 TOKEN_OPENING_BRACKET, 40 TOKEN_CLOSING_BRACKET, 41 42 TOKEN_AND, 43 TOKEN_OR, 44 TOKEN_NOT, 45 46 TOKEN_NONE, 47 TOKEN_END_OF_LINE 48 }; 49 50 struct Token { 51 Token() 52 : string(""), 53 type(TOKEN_NONE), 54 value(0), 55 position(0) 56 { 57 } 58 59 Token(const Token& other) 60 : string(other.string), 61 type(other.type), 62 value(other.value), 63 position(other.position) 64 { 65 } 66 67 Token(const char* string, int32 length, int32 position, int32 type) 68 : string(string, length), 69 type(type), 70 value(0), 71 position(position) 72 { 73 } 74 75 Token& operator=(const Token& other) 76 { 77 string = other.string; 78 type = other.type; 79 value = other.value; 80 position = other.position; 81 return *this; 82 } 83 84 BString string; 85 int32 type; 86 MAPM value; 87 88 int32 position; 89 }; 90 91 92 class Tokenizer { 93 public: 94 Tokenizer() 95 : fString(""), 96 fCurrentChar(NULL), 97 fCurrentToken(), 98 fReuseToken(false), 99 fHexSupport(false) 100 { 101 } 102 103 void SetSupportHexInput(bool enabled) 104 { 105 fHexSupport = enabled; 106 } 107 108 void SetTo(const char* string) 109 { 110 fString = string; 111 fCurrentChar = fString.String(); 112 fCurrentToken = Token(); 113 fReuseToken = false; 114 } 115 116 const Token& NextToken() 117 { 118 if (fCurrentToken.type == TOKEN_END_OF_LINE) 119 return fCurrentToken; 120 121 if (fReuseToken) { 122 fReuseToken = false; 123 //printf("next token (recycled): '%s'\n", fCurrentToken.string.String()); 124 return fCurrentToken; 125 } 126 127 while (*fCurrentChar != 0 && isspace(*fCurrentChar)) 128 fCurrentChar++; 129 130 if (*fCurrentChar == 0) 131 return fCurrentToken = Token("", 0, _CurrentPos(), TOKEN_END_OF_LINE); 132 133 bool decimal = *fCurrentChar == '.' || *fCurrentChar == ','; 134 135 if (decimal || isdigit(*fCurrentChar)) { 136 if (fHexSupport && *fCurrentChar == '0' && fCurrentChar[1] == 'x') 137 return _ParseHexNumber(); 138 139 BString temp; 140 141 const char* begin = fCurrentChar; 142 143 // optional digits before the comma 144 while (isdigit(*fCurrentChar)) { 145 temp << *fCurrentChar; 146 fCurrentChar++; 147 } 148 149 // optional post comma part 150 // (required if there are no digits before the comma) 151 if (*fCurrentChar == '.' || *fCurrentChar == ',') { 152 temp << '.'; 153 fCurrentChar++; 154 155 // optional post comma digits 156 while (isdigit(*fCurrentChar)) { 157 temp << *fCurrentChar; 158 fCurrentChar++; 159 } 160 } 161 162 // optional exponent part 163 if (*fCurrentChar == 'E') { 164 temp << *fCurrentChar; 165 fCurrentChar++; 166 167 // optional exponent sign 168 if (*fCurrentChar == '+' || *fCurrentChar == '-') { 169 temp << *fCurrentChar; 170 fCurrentChar++; 171 } 172 173 // required exponent digits 174 if (!isdigit(*fCurrentChar)) { 175 throw ParseException("missing exponent in constant", 176 fCurrentChar - begin); 177 } 178 179 while (isdigit(*fCurrentChar)) { 180 temp << *fCurrentChar; 181 fCurrentChar++; 182 } 183 } 184 185 int32 length = fCurrentChar - begin; 186 BString test = temp; 187 test << "&_"; 188 double value; 189 char t[2]; 190 int32 matches = sscanf(test.String(), "%lf&%s", &value, t); 191 if (matches != 2) { 192 throw ParseException("error in constant", 193 _CurrentPos() - length); 194 } 195 196 fCurrentToken = Token(begin, length, _CurrentPos() - length, 197 TOKEN_CONSTANT); 198 fCurrentToken.value = temp.String(); 199 200 } else if (isalpha(*fCurrentChar) && *fCurrentChar != 'x') { 201 const char* begin = fCurrentChar; 202 while (*fCurrentChar != 0 && (isalpha(*fCurrentChar) 203 || isdigit(*fCurrentChar))) { 204 fCurrentChar++; 205 } 206 int32 length = fCurrentChar - begin; 207 fCurrentToken = Token(begin, length, _CurrentPos() - length, 208 TOKEN_IDENTIFIER); 209 210 } else if ((unsigned char)fCurrentChar[0] == 0xCF 211 && (unsigned char)fCurrentChar[1] == 0x80) { 212 // UTF-8 small greek letter PI 213 fCurrentToken = Token(fCurrentChar, 2, _CurrentPos() - 1, 214 TOKEN_IDENTIFIER); 215 fCurrentChar += 2; 216 217 } else { 218 int32 type = TOKEN_NONE; 219 220 switch (*fCurrentChar) { 221 case '+': 222 type = TOKEN_PLUS; 223 break; 224 case '-': 225 type = TOKEN_MINUS; 226 break; 227 case '*': 228 type = TOKEN_STAR; 229 break; 230 case '/': 231 case '\\': 232 case ':': 233 type = TOKEN_SLASH; 234 break; 235 236 case '%': 237 type = TOKEN_MODULO; 238 break; 239 case '^': 240 type = TOKEN_POWER; 241 break; 242 case '!': 243 type = TOKEN_FACTORIAL; 244 break; 245 246 case '(': 247 type = TOKEN_OPENING_BRACKET; 248 break; 249 case ')': 250 type = TOKEN_CLOSING_BRACKET; 251 break; 252 253 case '&': 254 type = TOKEN_AND; 255 break; 256 case '|': 257 type = TOKEN_OR; 258 break; 259 case '~': 260 type = TOKEN_NOT; 261 break; 262 263 case '\n': 264 type = TOKEN_END_OF_LINE; 265 break; 266 267 case 'x': 268 if (!fHexSupport) { 269 type = TOKEN_STAR; 270 break; 271 } 272 // fall through 273 274 default: 275 throw ParseException("unexpected character", _CurrentPos()); 276 } 277 fCurrentToken = Token(fCurrentChar, 1, _CurrentPos(), type); 278 fCurrentChar++; 279 } 280 281 //printf("next token: '%s'\n", fCurrentToken.string.String()); 282 return fCurrentToken; 283 } 284 285 void RewindToken() 286 { 287 fReuseToken = true; 288 } 289 290 private: 291 static bool _IsHexDigit(char c) 292 { 293 return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); 294 } 295 296 Token& _ParseHexNumber() 297 { 298 const char* begin = fCurrentChar; 299 fCurrentChar += 2; 300 // skip "0x" 301 302 if (!_IsHexDigit(*fCurrentChar)) 303 throw ParseException("expected hex digit", _CurrentPos()); 304 305 fCurrentChar++; 306 while (_IsHexDigit(*fCurrentChar)) 307 fCurrentChar++; 308 309 int32 length = fCurrentChar - begin; 310 fCurrentToken = Token(begin, length, _CurrentPos() - length, 311 TOKEN_CONSTANT); 312 313 // MAPM has no conversion from long long, so we need to improvise. 314 uint64 value = strtoll(fCurrentToken.string.String(), NULL, 0); 315 if (value <= 0x7fffffff) { 316 fCurrentToken.value = (long)value; 317 } else { 318 fCurrentToken.value = (int)(value >> 60); 319 fCurrentToken.value *= 1 << 30; 320 fCurrentToken.value += (int)((value >> 30) & 0x3fffffff); 321 fCurrentToken.value *= 1 << 30; 322 fCurrentToken.value += (int)(value& 0x3fffffff); 323 } 324 325 return fCurrentToken; 326 } 327 328 int32 _CurrentPos() const 329 { 330 return fCurrentChar - fString.String(); 331 } 332 333 BString fString; 334 const char* fCurrentChar; 335 Token fCurrentToken; 336 bool fReuseToken; 337 bool fHexSupport; 338 }; 339 340 341 ExpressionParser::ExpressionParser() 342 : fTokenizer(new Tokenizer()), 343 fDegreeMode(false) 344 { 345 } 346 347 348 ExpressionParser::~ExpressionParser() 349 { 350 delete fTokenizer; 351 } 352 353 354 bool 355 ExpressionParser::DegreeMode() 356 { 357 return fDegreeMode; 358 } 359 360 361 void 362 ExpressionParser::SetDegreeMode(bool degrees) 363 { 364 fDegreeMode = degrees; 365 } 366 367 368 void 369 ExpressionParser::SetSupportHexInput(bool enabled) 370 { 371 fTokenizer->SetSupportHexInput(enabled); 372 } 373 374 375 BString 376 ExpressionParser::Evaluate(const char* expressionString) 377 { 378 fTokenizer->SetTo(expressionString); 379 380 MAPM value = _ParseBinary(); 381 Token token = fTokenizer->NextToken(); 382 if (token.type != TOKEN_END_OF_LINE) 383 throw ParseException("parse error", token.position); 384 385 if (value == 0) 386 return BString("0"); 387 388 char* buffer = value.toFixPtStringExp(kMaxDecimalPlaces, '.', 0, 0); 389 if (buffer == NULL) 390 throw ParseException("out of memory", 0); 391 392 // remove surplus zeros 393 int32 lastChar = strlen(buffer) - 1; 394 if (strchr(buffer, '.')) { 395 while (buffer[lastChar] == '0') 396 lastChar--; 397 if (buffer[lastChar] == '.') 398 lastChar--; 399 } 400 401 BString result(buffer, lastChar + 1); 402 free(buffer); 403 return result; 404 } 405 406 407 int64 408 ExpressionParser::EvaluateToInt64(const char* expressionString) 409 { 410 fTokenizer->SetTo(expressionString); 411 412 MAPM value = _ParseBinary(); 413 Token token = fTokenizer->NextToken(); 414 if (token.type != TOKEN_END_OF_LINE) 415 throw ParseException("parse error", token.position); 416 417 char buffer[128]; 418 value.toIntegerString(buffer); 419 420 return strtoll(buffer, NULL, 0); 421 } 422 423 424 double 425 ExpressionParser::EvaluateToDouble(const char* expressionString) 426 { 427 fTokenizer->SetTo(expressionString); 428 429 MAPM value = _ParseBinary(); 430 Token token = fTokenizer->NextToken(); 431 if (token.type != TOKEN_END_OF_LINE) 432 throw ParseException("parse error", token.position); 433 434 char buffer[1024]; 435 value.toString(buffer, sizeof(buffer) - 4); 436 437 return strtod(buffer, NULL); 438 } 439 440 441 MAPM 442 ExpressionParser::_ParseBinary() 443 { 444 return _ParseSum(); 445 // binary operation appearantly not supported by m_apm library, 446 // should not be too hard to implement though.... 447 448 // double value = _ParseSum(); 449 // 450 // while (true) { 451 // Token token = fTokenizer->NextToken(); 452 // switch (token.type) { 453 // case TOKEN_AND: 454 // value = (uint64)value & (uint64)_ParseSum(); 455 // break; 456 // case TOKEN_OR: 457 // value = (uint64)value | (uint64)_ParseSum(); 458 // break; 459 // 460 // default: 461 // fTokenizer->RewindToken(); 462 // return value; 463 // } 464 // } 465 } 466 467 468 MAPM 469 ExpressionParser::_ParseSum() 470 { 471 // TODO: check isnan()... 472 MAPM value = _ParseProduct(); 473 474 while (true) { 475 Token token = fTokenizer->NextToken(); 476 switch (token.type) { 477 case TOKEN_PLUS: 478 value = value + _ParseProduct(); 479 break; 480 case TOKEN_MINUS: 481 value = value - _ParseProduct(); 482 break; 483 484 default: 485 fTokenizer->RewindToken(); 486 return _ParseFactorial(value); 487 } 488 } 489 } 490 491 492 MAPM 493 ExpressionParser::_ParseProduct() 494 { 495 // TODO: check isnan()... 496 MAPM value = _ParsePower(); 497 498 while (true) { 499 Token token = fTokenizer->NextToken(); 500 switch (token.type) { 501 case TOKEN_STAR: 502 value = value * _ParsePower(); 503 break; 504 case TOKEN_SLASH: { 505 MAPM rhs = _ParsePower(); 506 if (rhs == MAPM(0)) 507 throw ParseException("division by zero", token.position); 508 value = value / rhs; 509 break; 510 } 511 case TOKEN_MODULO: { 512 MAPM rhs = _ParsePower(); 513 if (rhs == MAPM(0)) 514 throw ParseException("modulo by zero", token.position); 515 value = value % rhs; 516 break; 517 } 518 519 default: 520 fTokenizer->RewindToken(); 521 return _ParseFactorial(value); 522 } 523 } 524 } 525 526 527 MAPM 528 ExpressionParser::_ParsePower() 529 { 530 MAPM value = _ParseUnary(); 531 532 while (true) { 533 Token token = fTokenizer->NextToken(); 534 if (token.type != TOKEN_POWER) { 535 fTokenizer->RewindToken(); 536 return _ParseFactorial(value); 537 } 538 value = value.pow(_ParseUnary()); 539 } 540 } 541 542 543 MAPM 544 ExpressionParser::_ParseUnary() 545 { 546 Token token = fTokenizer->NextToken(); 547 if (token.type == TOKEN_END_OF_LINE) 548 throw ParseException("unexpected end of expression", token.position); 549 550 switch (token.type) { 551 case TOKEN_PLUS: 552 return _ParseUnary(); 553 case TOKEN_MINUS: 554 return -_ParseUnary(); 555 // TODO: Implement ! 556 // case TOKEN_NOT: 557 // return ~(uint64)_ParseUnary(); 558 559 case TOKEN_IDENTIFIER: 560 return _ParseFunction(token); 561 562 default: 563 fTokenizer->RewindToken(); 564 return _ParseAtom(); 565 } 566 567 return MAPM(0); 568 } 569 570 571 struct Function { 572 const char* name; 573 int argumentCount; 574 void* function; 575 MAPM value; 576 }; 577 578 579 void 580 ExpressionParser::_InitArguments(MAPM values[], int32 argumentCount) 581 { 582 _EatToken(TOKEN_OPENING_BRACKET); 583 584 for (int32 i = 0; i < argumentCount; i++) 585 values[i] = _ParseBinary(); 586 587 _EatToken(TOKEN_CLOSING_BRACKET); 588 } 589 590 591 MAPM 592 ExpressionParser::_ParseFunction(const Token& token) 593 { 594 if (strcmp("e", token.string.String()) == 0) 595 return _ParseFactorial(MAPM(MM_E)); 596 else if (strcasecmp("pi", token.string.String()) == 0 597 || ((unsigned char)token.string.String()[0] == 0xCF 598 && (unsigned char)token.string.String()[1] == 0x80)) { 599 // UTF-8 small greek letter PI 600 return _ParseFactorial(MAPM(MM_PI)); 601 } 602 603 // hard coded cases for different count of arguments 604 // supports functions with 3 arguments at most 605 606 MAPM values[3]; 607 608 if (strcasecmp("abs", token.string.String()) == 0) { 609 _InitArguments(values, 1); 610 return _ParseFactorial(values[0].abs()); 611 } else if (strcasecmp("acos", token.string.String()) == 0) { 612 _InitArguments(values, 1); 613 if (fDegreeMode) 614 values[0] = values[0] * MM_PI / 180; 615 616 if (values[0] < -1 || values[0] > 1) 617 throw ParseException("out of domain", token.position); 618 619 return _ParseFactorial(values[0].acos()); 620 } else if (strcasecmp("asin", token.string.String()) == 0) { 621 _InitArguments(values, 1); 622 if (fDegreeMode) 623 values[0] = values[0] * MM_PI / 180; 624 625 if (values[0] < -1 || values[0] > 1) 626 throw ParseException("out of domain", token.position); 627 628 return _ParseFactorial(values[0].asin()); 629 } else if (strcasecmp("atan", token.string.String()) == 0) { 630 _InitArguments(values, 1); 631 if (fDegreeMode) 632 values[0] = values[0] * MM_PI / 180; 633 634 return _ParseFactorial(values[0].atan()); 635 } else if (strcasecmp("atan2", token.string.String()) == 0) { 636 _InitArguments(values, 2); 637 638 if (fDegreeMode) { 639 values[0] = values[0] * MM_PI / 180; 640 values[1] = values[1] * MM_PI / 180; 641 } 642 643 return _ParseFactorial(values[0].atan2(values[1])); 644 } else if (strcasecmp("cbrt", token.string.String()) == 0) { 645 _InitArguments(values, 1); 646 return _ParseFactorial(values[0].cbrt()); 647 } else if (strcasecmp("ceil", token.string.String()) == 0) { 648 _InitArguments(values, 1); 649 return _ParseFactorial(values[0].ceil()); 650 } else if (strcasecmp("cos", token.string.String()) == 0) { 651 _InitArguments(values, 1); 652 if (fDegreeMode) 653 values[0] = values[0] * MM_PI / 180; 654 655 return _ParseFactorial(values[0].cos()); 656 } else if (strcasecmp("cosh", token.string.String()) == 0) { 657 _InitArguments(values, 1); 658 // This function always uses radians 659 return _ParseFactorial(values[0].cosh()); 660 } else if (strcasecmp("exp", token.string.String()) == 0) { 661 _InitArguments(values, 1); 662 return _ParseFactorial(values[0].exp()); 663 } else if (strcasecmp("floor", token.string.String()) == 0) { 664 _InitArguments(values, 1); 665 return _ParseFactorial(values[0].floor()); 666 } else if (strcasecmp("ln", token.string.String()) == 0) { 667 _InitArguments(values, 1); 668 if (values[0] <= 0) 669 throw ParseException("out of domain", token.position); 670 671 return _ParseFactorial(values[0].log()); 672 } else if (strcasecmp("log", token.string.String()) == 0) { 673 _InitArguments(values, 1); 674 if (values[0] <= 0) 675 throw ParseException("out of domain", token.position); 676 677 return _ParseFactorial(values[0].log10()); 678 } else if (strcasecmp("pow", token.string.String()) == 0) { 679 _InitArguments(values, 2); 680 return _ParseFactorial(values[0].pow(values[1])); 681 } else if (strcasecmp("sin", token.string.String()) == 0) { 682 _InitArguments(values, 1); 683 if (fDegreeMode) 684 values[0] = values[0] * MM_PI / 180; 685 686 return _ParseFactorial(values[0].sin()); 687 } else if (strcasecmp("sinh", token.string.String()) == 0) { 688 _InitArguments(values, 1); 689 // This function always uses radians 690 return _ParseFactorial(values[0].sinh()); 691 } else if (strcasecmp("sqrt", token.string.String()) == 0) { 692 _InitArguments(values, 1); 693 if (values[0] < 0) 694 throw ParseException("out of domain", token.position); 695 696 return _ParseFactorial(values[0].sqrt()); 697 } else if (strcasecmp("tan", token.string.String()) == 0) { 698 _InitArguments(values, 1); 699 if (fDegreeMode) 700 values[0] = values[0] * MM_PI / 180; 701 702 MAPM divided_by_half_pi = values[0] / MM_HALF_PI; 703 if (divided_by_half_pi.is_integer() && divided_by_half_pi.is_odd()) 704 throw ParseException("out of domain", token.position); 705 706 return _ParseFactorial(values[0].tan()); 707 } else if (strcasecmp("tanh", token.string.String()) == 0) { 708 _InitArguments(values, 1); 709 // This function always uses radians 710 return _ParseFactorial(values[0].tanh()); 711 } 712 713 throw ParseException("unknown identifier", token.position); 714 } 715 716 717 MAPM 718 ExpressionParser::_ParseAtom() 719 { 720 Token token = fTokenizer->NextToken(); 721 if (token.type == TOKEN_END_OF_LINE) 722 throw ParseException("unexpected end of expression", token.position); 723 724 if (token.type == TOKEN_CONSTANT) 725 return _ParseFactorial(token.value); 726 727 fTokenizer->RewindToken(); 728 729 _EatToken(TOKEN_OPENING_BRACKET); 730 731 MAPM value = _ParseBinary(); 732 733 _EatToken(TOKEN_CLOSING_BRACKET); 734 735 return _ParseFactorial(value); 736 } 737 738 739 MAPM 740 ExpressionParser::_ParseFactorial(MAPM value) 741 { 742 if (fTokenizer->NextToken().type == TOKEN_FACTORIAL) { 743 fTokenizer->RewindToken(); 744 _EatToken(TOKEN_FACTORIAL); 745 return value.factorial(); 746 } 747 748 fTokenizer->RewindToken(); 749 return value; 750 } 751 752 753 void 754 ExpressionParser::_EatToken(int32 type) 755 { 756 Token token = fTokenizer->NextToken(); 757 if (token.type != type) { 758 BString temp("expected '"); 759 temp << (char)type << "' got '" << token.string << "'"; 760 throw ParseException(temp.String(), token.position); 761 } 762 } 763 764