1 /* 2 * Copyright 2006-2009 Haiku, Inc. All Rights Reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Ingo Weinhold <bonefish@cs.tu-berlin.de> 7 * Stephan Aßmus <superstippi@gmx.de> 8 */ 9 10 #include <ExpressionParser.h> 11 12 #include <ctype.h> 13 #include <math.h> 14 #include <stdio.h> 15 #include <stdlib.h> 16 #include <string.h> 17 18 #include <m_apm.h> 19 20 21 static const int32 kMaxDecimalPlaces = 32; 22 23 enum { 24 TOKEN_IDENTIFIER = 0, 25 26 TOKEN_CONSTANT, 27 28 TOKEN_PLUS, 29 TOKEN_MINUS, 30 31 TOKEN_STAR, 32 TOKEN_SLASH, 33 TOKEN_MODULO, 34 35 TOKEN_POWER, 36 37 TOKEN_OPENING_BRACKET, 38 TOKEN_CLOSING_BRACKET, 39 40 TOKEN_AND, 41 TOKEN_OR, 42 TOKEN_NOT, 43 44 TOKEN_NONE, 45 TOKEN_END_OF_LINE 46 }; 47 48 struct Token { 49 Token() 50 : string(""), 51 type(TOKEN_NONE), 52 value(0), 53 position(0) 54 { 55 } 56 57 Token(const Token& other) 58 : string(other.string), 59 type(other.type), 60 value(other.value), 61 position(other.position) 62 { 63 } 64 65 Token(const char* string, int32 length, int32 position, int32 type) 66 : string(string, length), 67 type(type), 68 value(0), 69 position(position) 70 { 71 } 72 73 Token& operator=(const Token& other) 74 { 75 string = other.string; 76 type = other.type; 77 value = other.value; 78 position = other.position; 79 return *this; 80 } 81 82 BString string; 83 int32 type; 84 MAPM value; 85 86 int32 position; 87 }; 88 89 90 class Tokenizer { 91 public: 92 Tokenizer() 93 : fString(""), 94 fCurrentChar(NULL), 95 fCurrentToken(), 96 fReuseToken(false), 97 fHexSupport(false) 98 { 99 } 100 101 void SetSupportHexInput(bool enabled) 102 { 103 fHexSupport = enabled; 104 } 105 106 void SetTo(const char* string) 107 { 108 fString = string; 109 fCurrentChar = fString.String(); 110 fCurrentToken = Token(); 111 fReuseToken = false; 112 } 113 114 const Token& NextToken() 115 { 116 if (fCurrentToken.type == TOKEN_END_OF_LINE) 117 return fCurrentToken; 118 119 if (fReuseToken) { 120 fReuseToken = false; 121 //printf("next token (recycled): '%s'\n", fCurrentToken.string.String()); 122 return fCurrentToken; 123 } 124 125 while (*fCurrentChar != 0 && isspace(*fCurrentChar)) 126 fCurrentChar++; 127 128 if (*fCurrentChar == 0) 129 return fCurrentToken = Token("", 0, _CurrentPos(), TOKEN_END_OF_LINE); 130 131 bool decimal = *fCurrentChar == '.' || *fCurrentChar == ','; 132 133 if (decimal || isdigit(*fCurrentChar)) { 134 if (fHexSupport && *fCurrentChar == '0' && fCurrentChar[1] == 'x') 135 return _ParseHexNumber(); 136 137 BString temp; 138 139 const char* begin = fCurrentChar; 140 141 // optional digits before the comma 142 while (isdigit(*fCurrentChar)) { 143 temp << *fCurrentChar; 144 fCurrentChar++; 145 } 146 147 // optional post comma part 148 // (required if there are no digits before the comma) 149 if (*fCurrentChar == '.' || *fCurrentChar == ',') { 150 temp << '.'; 151 fCurrentChar++; 152 153 // optional post comma digits 154 while (isdigit(*fCurrentChar)) { 155 temp << *fCurrentChar; 156 fCurrentChar++; 157 } 158 } 159 160 // optional exponent part 161 if (*fCurrentChar == 'e' || *fCurrentChar == 'E') { 162 temp << *fCurrentChar; 163 fCurrentChar++; 164 165 // optional exponent sign 166 if (*fCurrentChar == '+' || *fCurrentChar == '-') { 167 temp << *fCurrentChar; 168 fCurrentChar++; 169 } 170 171 // required exponent digits 172 if (!isdigit(*fCurrentChar)) { 173 throw ParseException("missing exponent in constant", 174 fCurrentChar - begin); 175 } 176 177 while (isdigit(*fCurrentChar)) { 178 temp << *fCurrentChar; 179 fCurrentChar++; 180 } 181 } 182 183 int32 length = fCurrentChar - begin; 184 BString test = temp; 185 test << "&_"; 186 double value; 187 char t[2]; 188 int32 matches = sscanf(test.String(), "%lf&%s", &value, t); 189 if (matches != 2) { 190 throw ParseException("error in constant", 191 _CurrentPos() - length); 192 } 193 194 fCurrentToken = Token(begin, length, _CurrentPos() - length, 195 TOKEN_CONSTANT); 196 fCurrentToken.value = temp.String(); 197 198 } else if (isalpha(*fCurrentChar) && *fCurrentChar != 'x') { 199 const char* begin = fCurrentChar; 200 while (*fCurrentChar != 0 && (isalpha(*fCurrentChar) 201 || isdigit(*fCurrentChar))) { 202 fCurrentChar++; 203 } 204 int32 length = fCurrentChar - begin; 205 fCurrentToken = Token(begin, length, _CurrentPos() - length, 206 TOKEN_IDENTIFIER); 207 208 } else { 209 int32 type = TOKEN_NONE; 210 211 switch (*fCurrentChar) { 212 case '+': 213 type = TOKEN_PLUS; 214 break; 215 case '-': 216 type = TOKEN_MINUS; 217 break; 218 case '*': 219 type = TOKEN_STAR; 220 break; 221 case '/': 222 case '\\': 223 case ':': 224 type = TOKEN_SLASH; 225 break; 226 227 case '%': 228 type = TOKEN_MODULO; 229 break; 230 case '^': 231 type = TOKEN_POWER; 232 break; 233 234 case '(': 235 type = TOKEN_OPENING_BRACKET; 236 break; 237 case ')': 238 type = TOKEN_CLOSING_BRACKET; 239 break; 240 241 case '&': 242 type = TOKEN_AND; 243 break; 244 case '|': 245 type = TOKEN_OR; 246 break; 247 case '~': 248 type = TOKEN_NOT; 249 break; 250 251 case '\n': 252 type = TOKEN_END_OF_LINE; 253 break; 254 255 case 'x': 256 if (!fHexSupport) { 257 type = TOKEN_STAR; 258 break; 259 } 260 // fall through 261 262 default: 263 throw ParseException("unexpected character", _CurrentPos()); 264 } 265 fCurrentToken = Token(fCurrentChar, 1, _CurrentPos(), type); 266 fCurrentChar++; 267 } 268 269 //printf("next token: '%s'\n", fCurrentToken.string.String()); 270 return fCurrentToken; 271 } 272 273 void RewindToken() 274 { 275 fReuseToken = true; 276 } 277 278 private: 279 static bool _IsHexDigit(char c) 280 { 281 return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); 282 } 283 284 Token& _ParseHexNumber() 285 { 286 const char* begin = fCurrentChar; 287 fCurrentChar += 2; 288 // skip "0x" 289 290 if (!_IsHexDigit(*fCurrentChar)) 291 throw ParseException("expected hex digit", _CurrentPos()); 292 293 fCurrentChar++; 294 while (_IsHexDigit(*fCurrentChar)) 295 fCurrentChar++; 296 297 int32 length = fCurrentChar - begin; 298 fCurrentToken = Token(begin, length, _CurrentPos() - length, 299 TOKEN_CONSTANT); 300 301 // MAPM has no conversion from long long, so we need to improvise. 302 uint64 value = strtoll(fCurrentToken.string.String(), NULL, 0); 303 if (value <= 0x7fffffff) { 304 fCurrentToken.value = (long)value; 305 } else { 306 fCurrentToken.value = (int)(value >> 60); 307 fCurrentToken.value *= 1 << 30; 308 fCurrentToken.value += (int)((value >> 30) & 0x3fffffff); 309 fCurrentToken.value *= 1 << 30; 310 fCurrentToken.value += (int)(value& 0x3fffffff); 311 } 312 313 return fCurrentToken; 314 } 315 316 int32 _CurrentPos() const 317 { 318 return fCurrentChar - fString.String(); 319 } 320 321 BString fString; 322 const char* fCurrentChar; 323 Token fCurrentToken; 324 bool fReuseToken; 325 bool fHexSupport; 326 }; 327 328 329 ExpressionParser::ExpressionParser() 330 : fTokenizer(new Tokenizer()) 331 { 332 } 333 334 335 ExpressionParser::~ExpressionParser() 336 { 337 delete fTokenizer; 338 } 339 340 341 void 342 ExpressionParser::SetSupportHexInput(bool enabled) 343 { 344 fTokenizer->SetSupportHexInput(enabled); 345 } 346 347 348 BString 349 ExpressionParser::Evaluate(const char* expressionString) 350 { 351 fTokenizer->SetTo(expressionString); 352 353 MAPM value = _ParseBinary(); 354 Token token = fTokenizer->NextToken(); 355 if (token.type != TOKEN_END_OF_LINE) 356 throw ParseException("parse error", token.position); 357 358 if (value == 0) 359 return BString("0"); 360 361 char* buffer = value.toFixPtStringExp(kMaxDecimalPlaces, '.', 0, 0); 362 if (buffer == NULL) 363 throw ParseException("out of memory", 0); 364 365 // remove surplus zeros 366 int32 lastChar = strlen(buffer) - 1; 367 if (strchr(buffer, '.')) { 368 while (buffer[lastChar] == '0') 369 lastChar--; 370 if (buffer[lastChar] == '.') 371 lastChar--; 372 } 373 374 BString result(buffer, lastChar + 1); 375 free(buffer); 376 return result; 377 } 378 379 380 int64 381 ExpressionParser::EvaluateToInt64(const char* expressionString) 382 { 383 fTokenizer->SetTo(expressionString); 384 385 MAPM value = _ParseBinary(); 386 Token token = fTokenizer->NextToken(); 387 if (token.type != TOKEN_END_OF_LINE) 388 throw ParseException("parse error", token.position); 389 390 char buffer[128]; 391 value.toIntegerString(buffer); 392 393 return strtoll(buffer, NULL, 0); 394 } 395 396 397 double 398 ExpressionParser::EvaluateToDouble(const char* expressionString) 399 { 400 fTokenizer->SetTo(expressionString); 401 402 MAPM value = _ParseBinary(); 403 Token token = fTokenizer->NextToken(); 404 if (token.type != TOKEN_END_OF_LINE) 405 throw ParseException("parse error", token.position); 406 407 char buffer[1024]; 408 value.toString(buffer, sizeof(buffer) - 4); 409 410 return strtod(buffer, NULL); 411 } 412 413 414 MAPM 415 ExpressionParser::_ParseBinary() 416 { 417 return _ParseSum(); 418 // binary operation appearantly not supported by m_apm library, 419 // should not be too hard to implement though.... 420 421 // double value = _ParseSum(); 422 // 423 // while (true) { 424 // Token token = fTokenizer->NextToken(); 425 // switch (token.type) { 426 // case TOKEN_AND: 427 // value = (uint64)value & (uint64)_ParseSum(); 428 // break; 429 // case TOKEN_OR: 430 // value = (uint64)value | (uint64)_ParseSum(); 431 // break; 432 // 433 // default: 434 // fTokenizer->RewindToken(); 435 // return value; 436 // } 437 // } 438 } 439 440 441 MAPM 442 ExpressionParser::_ParseSum() 443 { 444 // TODO: check isnan()... 445 MAPM value = _ParseProduct(); 446 447 while (true) { 448 Token token = fTokenizer->NextToken(); 449 switch (token.type) { 450 case TOKEN_PLUS: 451 value = value + _ParseProduct(); 452 break; 453 case TOKEN_MINUS: 454 value = value - _ParseProduct(); 455 break; 456 457 default: 458 fTokenizer->RewindToken(); 459 return value; 460 } 461 } 462 } 463 464 465 MAPM 466 ExpressionParser::_ParseProduct() 467 { 468 // TODO: check isnan()... 469 MAPM value = _ParsePower(); 470 471 while (true) { 472 Token token = fTokenizer->NextToken(); 473 switch (token.type) { 474 case TOKEN_STAR: 475 value = value * _ParsePower(); 476 break; 477 case TOKEN_SLASH: { 478 MAPM rhs = _ParsePower(); 479 if (rhs == MAPM(0)) 480 throw ParseException("division by zero", token.position); 481 value = value / rhs; 482 break; 483 } 484 case TOKEN_MODULO: { 485 MAPM rhs = _ParsePower(); 486 if (rhs == MAPM(0)) 487 throw ParseException("modulo by zero", token.position); 488 value = value % rhs; 489 break; 490 } 491 492 default: 493 fTokenizer->RewindToken(); 494 return value; 495 } 496 } 497 } 498 499 500 MAPM 501 ExpressionParser::_ParsePower() 502 { 503 MAPM value = _ParseUnary(); 504 505 while (true) { 506 Token token = fTokenizer->NextToken(); 507 if (token.type != TOKEN_POWER) { 508 fTokenizer->RewindToken(); 509 return value; 510 } 511 value = value.pow(_ParseUnary()); 512 } 513 } 514 515 516 MAPM 517 ExpressionParser::_ParseUnary() 518 { 519 Token token = fTokenizer->NextToken(); 520 if (token.type == TOKEN_END_OF_LINE) 521 throw ParseException("unexpected end of expression", token.position); 522 523 switch (token.type) { 524 case TOKEN_PLUS: 525 return _ParseUnary(); 526 case TOKEN_MINUS: 527 return -_ParseUnary(); 528 // TODO: Implement ! 529 // case TOKEN_NOT: 530 // return ~(uint64)_ParseUnary(); 531 532 case TOKEN_IDENTIFIER: 533 return _ParseFunction(token); 534 535 default: 536 fTokenizer->RewindToken(); 537 return _ParseAtom(); 538 } 539 540 return MAPM(0); 541 } 542 543 544 struct Function { 545 const char* name; 546 int argumentCount; 547 void* function; 548 MAPM value; 549 }; 550 551 552 void 553 ExpressionParser::_InitArguments(MAPM values[], int32 argumentCount) 554 { 555 _EatToken(TOKEN_OPENING_BRACKET); 556 557 for (int32 i = 0; i < argumentCount; i++) 558 values[i] = _ParseBinary(); 559 560 _EatToken(TOKEN_CLOSING_BRACKET); 561 } 562 563 564 MAPM 565 ExpressionParser::_ParseFunction(const Token& token) 566 { 567 if (strcasecmp("e", token.string.String()) == 0) 568 return MAPM(MM_E); 569 else if (strcasecmp("pi", token.string.String()) == 0) 570 return MAPM(MM_PI); 571 572 // hard coded cases for different count of arguments 573 // supports functions with 3 arguments at most 574 575 MAPM values[3]; 576 577 if (strcasecmp("abs", token.string.String()) == 0) { 578 _InitArguments(values, 1); 579 return values[0].abs(); 580 } else if (strcasecmp("acos", token.string.String()) == 0) { 581 _InitArguments(values, 1); 582 if (values[0] < -1 || values[0] > 1) 583 throw ParseException("out of domain", token.position); 584 return values[0].acos(); 585 } else if (strcasecmp("asin", token.string.String()) == 0) { 586 _InitArguments(values, 1); 587 if (values[0] < -1 || values[0] > 1) 588 throw ParseException("out of domain", token.position); 589 return values[0].asin(); 590 } else if (strcasecmp("atan", token.string.String()) == 0) { 591 _InitArguments(values, 1); 592 return values[0].atan(); 593 } else if (strcasecmp("atan2", token.string.String()) == 0) { 594 _InitArguments(values, 2); 595 return values[0].atan2(values[1]); 596 } else if (strcasecmp("ceil", token.string.String()) == 0) { 597 _InitArguments(values, 1); 598 return values[0].ceil(); 599 } else if (strcasecmp("cos", token.string.String()) == 0) { 600 _InitArguments(values, 1); 601 return values[0].cos(); 602 } else if (strcasecmp("cosh", token.string.String()) == 0) { 603 _InitArguments(values, 1); 604 return values[0].cosh(); 605 } else if (strcasecmp("exp", token.string.String()) == 0) { 606 _InitArguments(values, 1); 607 return values[0].exp(); 608 } else if (strcasecmp("floor", token.string.String()) == 0) { 609 _InitArguments(values, 1); 610 return values[0].floor(); 611 } else if (strcasecmp("ln", token.string.String()) == 0) { 612 _InitArguments(values, 1); 613 if (values[0] <= 0) 614 throw ParseException("out of domain", token.position); 615 return values[0].log(); 616 } else if (strcasecmp("log", token.string.String()) == 0) { 617 _InitArguments(values, 1); 618 if (values[0] <= 0) 619 throw ParseException("out of domain", token.position); 620 return values[0].log10(); 621 } else if (strcasecmp("pow", token.string.String()) == 0) { 622 _InitArguments(values, 2); 623 return values[0].pow(values[1]); 624 } else if (strcasecmp("sin", token.string.String()) == 0) { 625 _InitArguments(values, 1); 626 return values[0].sin(); 627 } else if (strcasecmp("sinh", token.string.String()) == 0) { 628 _InitArguments(values, 1); 629 return values[0].sinh(); 630 } else if (strcasecmp("sqrt", token.string.String()) == 0) { 631 _InitArguments(values, 1); 632 if (values[0] < 0) 633 throw ParseException("out of domain", token.position); 634 return values[0].sqrt(); 635 } else if (strcasecmp("tan", token.string.String()) == 0) { 636 _InitArguments(values, 1); 637 return values[0].tan(); 638 } else if (strcasecmp("tanh", token.string.String()) == 0) { 639 _InitArguments(values, 1); 640 return values[0].tanh(); 641 } 642 643 throw ParseException("unknown identifier", token.position); 644 } 645 646 647 MAPM 648 ExpressionParser::_ParseAtom() 649 { 650 Token token = fTokenizer->NextToken(); 651 if (token.type == TOKEN_END_OF_LINE) 652 throw ParseException("unexpected end of expression", token.position); 653 654 if (token.type == TOKEN_CONSTANT) 655 return token.value; 656 657 fTokenizer->RewindToken(); 658 659 _EatToken(TOKEN_OPENING_BRACKET); 660 661 MAPM value = _ParseBinary(); 662 663 _EatToken(TOKEN_CLOSING_BRACKET); 664 665 return value; 666 } 667 668 669 void 670 ExpressionParser::_EatToken(int32 type) 671 { 672 Token token = fTokenizer->NextToken(); 673 if (token.type != type) { 674 BString temp("expected '"); 675 temp << (char)type << "' got '" << token.string << "'"; 676 throw ParseException(temp.String(), token.position); 677 } 678 } 679 680