1 /* 2 * Copyright 2006-2011 Haiku, Inc. All Rights Reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Ingo Weinhold <bonefish@cs.tu-berlin.de> 7 * Stephan Aßmus <superstippi@gmx.de> 8 */ 9 10 #include <ExpressionParser.h> 11 12 #include <ctype.h> 13 #include <math.h> 14 #include <stdio.h> 15 #include <stdlib.h> 16 #include <string.h> 17 18 #include <m_apm.h> 19 20 21 static const int32 kMaxDecimalPlaces = 32; 22 23 enum { 24 TOKEN_IDENTIFIER = 0, 25 26 TOKEN_CONSTANT, 27 28 TOKEN_PLUS, 29 TOKEN_MINUS, 30 31 TOKEN_STAR, 32 TOKEN_SLASH, 33 TOKEN_MODULO, 34 35 TOKEN_POWER, 36 TOKEN_FACTORIAL, 37 38 TOKEN_OPENING_BRACKET, 39 TOKEN_CLOSING_BRACKET, 40 41 TOKEN_AND, 42 TOKEN_OR, 43 TOKEN_NOT, 44 45 TOKEN_NONE, 46 TOKEN_END_OF_LINE 47 }; 48 49 struct Token { 50 Token() 51 : string(""), 52 type(TOKEN_NONE), 53 value(0), 54 position(0) 55 { 56 } 57 58 Token(const Token& other) 59 : string(other.string), 60 type(other.type), 61 value(other.value), 62 position(other.position) 63 { 64 } 65 66 Token(const char* string, int32 length, int32 position, int32 type) 67 : string(string, length), 68 type(type), 69 value(0), 70 position(position) 71 { 72 } 73 74 Token& operator=(const Token& other) 75 { 76 string = other.string; 77 type = other.type; 78 value = other.value; 79 position = other.position; 80 return *this; 81 } 82 83 BString string; 84 int32 type; 85 MAPM value; 86 87 int32 position; 88 }; 89 90 91 class Tokenizer { 92 public: 93 Tokenizer() 94 : fString(""), 95 fCurrentChar(NULL), 96 fCurrentToken(), 97 fReuseToken(false), 98 fHexSupport(false) 99 { 100 } 101 102 void SetSupportHexInput(bool enabled) 103 { 104 fHexSupport = enabled; 105 } 106 107 void SetTo(const char* string) 108 { 109 fString = string; 110 fCurrentChar = fString.String(); 111 fCurrentToken = Token(); 112 fReuseToken = false; 113 } 114 115 const Token& NextToken() 116 { 117 if (fCurrentToken.type == TOKEN_END_OF_LINE) 118 return fCurrentToken; 119 120 if (fReuseToken) { 121 fReuseToken = false; 122 //printf("next token (recycled): '%s'\n", fCurrentToken.string.String()); 123 return fCurrentToken; 124 } 125 126 while (*fCurrentChar != 0 && isspace(*fCurrentChar)) 127 fCurrentChar++; 128 129 if (*fCurrentChar == 0) 130 return fCurrentToken = Token("", 0, _CurrentPos(), TOKEN_END_OF_LINE); 131 132 bool decimal = *fCurrentChar == '.' || *fCurrentChar == ','; 133 134 if (decimal || isdigit(*fCurrentChar)) { 135 if (fHexSupport && *fCurrentChar == '0' && fCurrentChar[1] == 'x') 136 return _ParseHexNumber(); 137 138 BString temp; 139 140 const char* begin = fCurrentChar; 141 142 // optional digits before the comma 143 while (isdigit(*fCurrentChar)) { 144 temp << *fCurrentChar; 145 fCurrentChar++; 146 } 147 148 // optional post comma part 149 // (required if there are no digits before the comma) 150 if (*fCurrentChar == '.' || *fCurrentChar == ',') { 151 temp << '.'; 152 fCurrentChar++; 153 154 // optional post comma digits 155 while (isdigit(*fCurrentChar)) { 156 temp << *fCurrentChar; 157 fCurrentChar++; 158 } 159 } 160 161 // optional exponent part 162 if (*fCurrentChar == 'E') { 163 temp << *fCurrentChar; 164 fCurrentChar++; 165 166 // optional exponent sign 167 if (*fCurrentChar == '+' || *fCurrentChar == '-') { 168 temp << *fCurrentChar; 169 fCurrentChar++; 170 } 171 172 // required exponent digits 173 if (!isdigit(*fCurrentChar)) { 174 throw ParseException("missing exponent in constant", 175 fCurrentChar - begin); 176 } 177 178 while (isdigit(*fCurrentChar)) { 179 temp << *fCurrentChar; 180 fCurrentChar++; 181 } 182 } 183 184 int32 length = fCurrentChar - begin; 185 BString test = temp; 186 test << "&_"; 187 double value; 188 char t[2]; 189 int32 matches = sscanf(test.String(), "%lf&%s", &value, t); 190 if (matches != 2) { 191 throw ParseException("error in constant", 192 _CurrentPos() - length); 193 } 194 195 fCurrentToken = Token(begin, length, _CurrentPos() - length, 196 TOKEN_CONSTANT); 197 fCurrentToken.value = temp.String(); 198 199 } else if (isalpha(*fCurrentChar) && *fCurrentChar != 'x') { 200 const char* begin = fCurrentChar; 201 while (*fCurrentChar != 0 && (isalpha(*fCurrentChar) 202 || isdigit(*fCurrentChar))) { 203 fCurrentChar++; 204 } 205 int32 length = fCurrentChar - begin; 206 fCurrentToken = Token(begin, length, _CurrentPos() - length, 207 TOKEN_IDENTIFIER); 208 209 } else if ((unsigned char)fCurrentChar[0] == 0xCF 210 && (unsigned char)fCurrentChar[1] == 0x80) { 211 // UTF-8 small greek letter PI 212 fCurrentToken = Token(fCurrentChar, 2, _CurrentPos() - 1, 213 TOKEN_IDENTIFIER); 214 fCurrentChar += 2; 215 216 } else { 217 int32 type = TOKEN_NONE; 218 219 switch (*fCurrentChar) { 220 case '+': 221 type = TOKEN_PLUS; 222 break; 223 case '-': 224 type = TOKEN_MINUS; 225 break; 226 case '*': 227 type = TOKEN_STAR; 228 break; 229 case '/': 230 case '\\': 231 case ':': 232 type = TOKEN_SLASH; 233 break; 234 235 case '%': 236 type = TOKEN_MODULO; 237 break; 238 case '^': 239 type = TOKEN_POWER; 240 break; 241 case '!': 242 type = TOKEN_FACTORIAL; 243 break; 244 245 case '(': 246 type = TOKEN_OPENING_BRACKET; 247 break; 248 case ')': 249 type = TOKEN_CLOSING_BRACKET; 250 break; 251 252 case '&': 253 type = TOKEN_AND; 254 break; 255 case '|': 256 type = TOKEN_OR; 257 break; 258 case '~': 259 type = TOKEN_NOT; 260 break; 261 262 case '\n': 263 type = TOKEN_END_OF_LINE; 264 break; 265 266 case 'x': 267 if (!fHexSupport) { 268 type = TOKEN_STAR; 269 break; 270 } 271 // fall through 272 273 default: 274 throw ParseException("unexpected character", _CurrentPos()); 275 } 276 fCurrentToken = Token(fCurrentChar, 1, _CurrentPos(), type); 277 fCurrentChar++; 278 } 279 280 //printf("next token: '%s'\n", fCurrentToken.string.String()); 281 return fCurrentToken; 282 } 283 284 void RewindToken() 285 { 286 fReuseToken = true; 287 } 288 289 private: 290 static bool _IsHexDigit(char c) 291 { 292 return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); 293 } 294 295 Token& _ParseHexNumber() 296 { 297 const char* begin = fCurrentChar; 298 fCurrentChar += 2; 299 // skip "0x" 300 301 if (!_IsHexDigit(*fCurrentChar)) 302 throw ParseException("expected hex digit", _CurrentPos()); 303 304 fCurrentChar++; 305 while (_IsHexDigit(*fCurrentChar)) 306 fCurrentChar++; 307 308 int32 length = fCurrentChar - begin; 309 fCurrentToken = Token(begin, length, _CurrentPos() - length, 310 TOKEN_CONSTANT); 311 312 // MAPM has no conversion from long long, so we need to improvise. 313 uint64 value = strtoll(fCurrentToken.string.String(), NULL, 0); 314 if (value <= 0x7fffffff) { 315 fCurrentToken.value = (long)value; 316 } else { 317 fCurrentToken.value = (int)(value >> 60); 318 fCurrentToken.value *= 1 << 30; 319 fCurrentToken.value += (int)((value >> 30) & 0x3fffffff); 320 fCurrentToken.value *= 1 << 30; 321 fCurrentToken.value += (int)(value& 0x3fffffff); 322 } 323 324 return fCurrentToken; 325 } 326 327 int32 _CurrentPos() const 328 { 329 return fCurrentChar - fString.String(); 330 } 331 332 BString fString; 333 const char* fCurrentChar; 334 Token fCurrentToken; 335 bool fReuseToken; 336 bool fHexSupport; 337 }; 338 339 340 ExpressionParser::ExpressionParser() 341 : fTokenizer(new Tokenizer()) 342 { 343 } 344 345 346 ExpressionParser::~ExpressionParser() 347 { 348 delete fTokenizer; 349 } 350 351 352 void 353 ExpressionParser::SetSupportHexInput(bool enabled) 354 { 355 fTokenizer->SetSupportHexInput(enabled); 356 } 357 358 359 BString 360 ExpressionParser::Evaluate(const char* expressionString) 361 { 362 fTokenizer->SetTo(expressionString); 363 364 MAPM value = _ParseBinary(); 365 Token token = fTokenizer->NextToken(); 366 if (token.type != TOKEN_END_OF_LINE) 367 throw ParseException("parse error", token.position); 368 369 if (value == 0) 370 return BString("0"); 371 372 char* buffer = value.toFixPtStringExp(kMaxDecimalPlaces, '.', 0, 0); 373 if (buffer == NULL) 374 throw ParseException("out of memory", 0); 375 376 // remove surplus zeros 377 int32 lastChar = strlen(buffer) - 1; 378 if (strchr(buffer, '.')) { 379 while (buffer[lastChar] == '0') 380 lastChar--; 381 if (buffer[lastChar] == '.') 382 lastChar--; 383 } 384 385 BString result(buffer, lastChar + 1); 386 free(buffer); 387 return result; 388 } 389 390 391 int64 392 ExpressionParser::EvaluateToInt64(const char* expressionString) 393 { 394 fTokenizer->SetTo(expressionString); 395 396 MAPM value = _ParseBinary(); 397 Token token = fTokenizer->NextToken(); 398 if (token.type != TOKEN_END_OF_LINE) 399 throw ParseException("parse error", token.position); 400 401 char buffer[128]; 402 value.toIntegerString(buffer); 403 404 return strtoll(buffer, NULL, 0); 405 } 406 407 408 double 409 ExpressionParser::EvaluateToDouble(const char* expressionString) 410 { 411 fTokenizer->SetTo(expressionString); 412 413 MAPM value = _ParseBinary(); 414 Token token = fTokenizer->NextToken(); 415 if (token.type != TOKEN_END_OF_LINE) 416 throw ParseException("parse error", token.position); 417 418 char buffer[1024]; 419 value.toString(buffer, sizeof(buffer) - 4); 420 421 return strtod(buffer, NULL); 422 } 423 424 425 MAPM 426 ExpressionParser::_ParseBinary() 427 { 428 return _ParseSum(); 429 // binary operation appearantly not supported by m_apm library, 430 // should not be too hard to implement though.... 431 432 // double value = _ParseSum(); 433 // 434 // while (true) { 435 // Token token = fTokenizer->NextToken(); 436 // switch (token.type) { 437 // case TOKEN_AND: 438 // value = (uint64)value & (uint64)_ParseSum(); 439 // break; 440 // case TOKEN_OR: 441 // value = (uint64)value | (uint64)_ParseSum(); 442 // break; 443 // 444 // default: 445 // fTokenizer->RewindToken(); 446 // return value; 447 // } 448 // } 449 } 450 451 452 MAPM 453 ExpressionParser::_ParseSum() 454 { 455 // TODO: check isnan()... 456 MAPM value = _ParseProduct(); 457 458 while (true) { 459 Token token = fTokenizer->NextToken(); 460 switch (token.type) { 461 case TOKEN_PLUS: 462 value = value + _ParseProduct(); 463 break; 464 case TOKEN_MINUS: 465 value = value - _ParseProduct(); 466 break; 467 468 default: 469 fTokenizer->RewindToken(); 470 return _ParseFactorial(value); 471 } 472 } 473 } 474 475 476 MAPM 477 ExpressionParser::_ParseProduct() 478 { 479 // TODO: check isnan()... 480 MAPM value = _ParsePower(); 481 482 while (true) { 483 Token token = fTokenizer->NextToken(); 484 switch (token.type) { 485 case TOKEN_STAR: 486 value = value * _ParsePower(); 487 break; 488 case TOKEN_SLASH: { 489 MAPM rhs = _ParsePower(); 490 if (rhs == MAPM(0)) 491 throw ParseException("division by zero", token.position); 492 value = value / rhs; 493 break; 494 } 495 case TOKEN_MODULO: { 496 MAPM rhs = _ParsePower(); 497 if (rhs == MAPM(0)) 498 throw ParseException("modulo by zero", token.position); 499 value = value % rhs; 500 break; 501 } 502 503 default: 504 fTokenizer->RewindToken(); 505 return _ParseFactorial(value); 506 } 507 } 508 } 509 510 511 MAPM 512 ExpressionParser::_ParsePower() 513 { 514 MAPM value = _ParseUnary(); 515 516 while (true) { 517 Token token = fTokenizer->NextToken(); 518 if (token.type != TOKEN_POWER) { 519 fTokenizer->RewindToken(); 520 return _ParseFactorial(value); 521 } 522 value = value.pow(_ParseUnary()); 523 } 524 } 525 526 527 MAPM 528 ExpressionParser::_ParseUnary() 529 { 530 Token token = fTokenizer->NextToken(); 531 if (token.type == TOKEN_END_OF_LINE) 532 throw ParseException("unexpected end of expression", token.position); 533 534 switch (token.type) { 535 case TOKEN_PLUS: 536 return _ParseUnary(); 537 case TOKEN_MINUS: 538 return -_ParseUnary(); 539 // TODO: Implement ! 540 // case TOKEN_NOT: 541 // return ~(uint64)_ParseUnary(); 542 543 case TOKEN_IDENTIFIER: 544 return _ParseFunction(token); 545 546 default: 547 fTokenizer->RewindToken(); 548 return _ParseAtom(); 549 } 550 551 return MAPM(0); 552 } 553 554 555 struct Function { 556 const char* name; 557 int argumentCount; 558 void* function; 559 MAPM value; 560 }; 561 562 563 void 564 ExpressionParser::_InitArguments(MAPM values[], int32 argumentCount) 565 { 566 _EatToken(TOKEN_OPENING_BRACKET); 567 568 for (int32 i = 0; i < argumentCount; i++) 569 values[i] = _ParseBinary(); 570 571 _EatToken(TOKEN_CLOSING_BRACKET); 572 } 573 574 575 MAPM 576 ExpressionParser::_ParseFunction(const Token& token) 577 { 578 if (strcmp("e", token.string.String()) == 0) 579 return _ParseFactorial(MAPM(MM_E)); 580 else if (strcasecmp("pi", token.string.String()) == 0 581 || ((unsigned char)token.string.String()[0] == 0xCF 582 && (unsigned char)token.string.String()[1] == 0x80)) { 583 // UTF-8 small greek letter PI 584 return _ParseFactorial(MAPM(MM_PI)); 585 } 586 587 // hard coded cases for different count of arguments 588 // supports functions with 3 arguments at most 589 590 MAPM values[3]; 591 592 if (strcasecmp("abs", token.string.String()) == 0) { 593 _InitArguments(values, 1); 594 return _ParseFactorial(values[0].abs()); 595 } else if (strcasecmp("acos", token.string.String()) == 0) { 596 _InitArguments(values, 1); 597 if (values[0] < -1 || values[0] > 1) 598 throw ParseException("out of domain", token.position); 599 return _ParseFactorial(values[0].acos()); 600 } else if (strcasecmp("asin", token.string.String()) == 0) { 601 _InitArguments(values, 1); 602 if (values[0] < -1 || values[0] > 1) 603 throw ParseException("out of domain", token.position); 604 return _ParseFactorial(values[0].asin()); 605 } else if (strcasecmp("atan", token.string.String()) == 0) { 606 _InitArguments(values, 1); 607 return _ParseFactorial(values[0].atan()); 608 } else if (strcasecmp("atan2", token.string.String()) == 0) { 609 _InitArguments(values, 2); 610 return _ParseFactorial(values[0].atan2(values[1])); 611 } else if (strcasecmp("cbrt", token.string.String()) == 0) { 612 _InitArguments(values, 1); 613 return _ParseFactorial(values[0].cbrt()); 614 } else if (strcasecmp("ceil", token.string.String()) == 0) { 615 _InitArguments(values, 1); 616 return _ParseFactorial(values[0].ceil()); 617 } else if (strcasecmp("cos", token.string.String()) == 0) { 618 _InitArguments(values, 1); 619 return _ParseFactorial(values[0].cos()); 620 } else if (strcasecmp("cosh", token.string.String()) == 0) { 621 _InitArguments(values, 1); 622 return _ParseFactorial(values[0].cosh()); 623 } else if (strcasecmp("exp", token.string.String()) == 0) { 624 _InitArguments(values, 1); 625 return _ParseFactorial(values[0].exp()); 626 } else if (strcasecmp("floor", token.string.String()) == 0) { 627 _InitArguments(values, 1); 628 return _ParseFactorial(values[0].floor()); 629 } else if (strcasecmp("ln", token.string.String()) == 0) { 630 _InitArguments(values, 1); 631 if (values[0] <= 0) 632 throw ParseException("out of domain", token.position); 633 return _ParseFactorial(values[0].log()); 634 } else if (strcasecmp("log", token.string.String()) == 0) { 635 _InitArguments(values, 1); 636 if (values[0] <= 0) 637 throw ParseException("out of domain", token.position); 638 return _ParseFactorial(values[0].log10()); 639 } else if (strcasecmp("pow", token.string.String()) == 0) { 640 _InitArguments(values, 2); 641 return _ParseFactorial(values[0].pow(values[1])); 642 } else if (strcasecmp("sin", token.string.String()) == 0) { 643 _InitArguments(values, 1); 644 return _ParseFactorial(values[0].sin()); 645 } else if (strcasecmp("sinh", token.string.String()) == 0) { 646 _InitArguments(values, 1); 647 return _ParseFactorial(values[0].sinh()); 648 } else if (strcasecmp("sqrt", token.string.String()) == 0) { 649 _InitArguments(values, 1); 650 if (values[0] < 0) 651 throw ParseException("out of domain", token.position); 652 return _ParseFactorial(values[0].sqrt()); 653 } else if (strcasecmp("tan", token.string.String()) == 0) { 654 _InitArguments(values, 1); 655 MAPM divided_by_half_pi = values[0] / MM_HALF_PI; 656 if (divided_by_half_pi.is_integer() && divided_by_half_pi.is_odd()) 657 throw ParseException("out of domain", token.position); 658 return _ParseFactorial(values[0].tan()); 659 } else if (strcasecmp("tanh", token.string.String()) == 0) { 660 _InitArguments(values, 1); 661 return _ParseFactorial(values[0].tanh()); 662 } 663 664 throw ParseException("unknown identifier", token.position); 665 } 666 667 668 MAPM 669 ExpressionParser::_ParseAtom() 670 { 671 Token token = fTokenizer->NextToken(); 672 if (token.type == TOKEN_END_OF_LINE) 673 throw ParseException("unexpected end of expression", token.position); 674 675 if (token.type == TOKEN_CONSTANT) 676 return _ParseFactorial(token.value); 677 678 fTokenizer->RewindToken(); 679 680 _EatToken(TOKEN_OPENING_BRACKET); 681 682 MAPM value = _ParseBinary(); 683 684 _EatToken(TOKEN_CLOSING_BRACKET); 685 686 return _ParseFactorial(value); 687 } 688 689 690 MAPM 691 ExpressionParser::_ParseFactorial(MAPM value) 692 { 693 if (fTokenizer->NextToken().type == TOKEN_FACTORIAL) { 694 fTokenizer->RewindToken(); 695 _EatToken(TOKEN_FACTORIAL); 696 return value.factorial(); 697 } 698 699 fTokenizer->RewindToken(); 700 return value; 701 } 702 703 704 void 705 ExpressionParser::_EatToken(int32 type) 706 { 707 Token token = fTokenizer->NextToken(); 708 if (token.type != type) { 709 BString temp("expected '"); 710 temp << (char)type << "' got '" << token.string << "'"; 711 throw ParseException(temp.String(), token.position); 712 } 713 } 714 715