1 /* 2 * Copyright 2006-2009 Haiku, Inc. All Rights Reserved. 3 * Distributed under the terms of the MIT License. 4 * 5 * Authors: 6 * Ingo Weinhold <bonefish@cs.tu-berlin.de> 7 * Stephan Aßmus <superstippi@gmx.de> 8 */ 9 10 #include <ExpressionParser.h> 11 12 #include <ctype.h> 13 #include <math.h> 14 #include <stdio.h> 15 #include <stdlib.h> 16 #include <string.h> 17 18 #include <m_apm.h> 19 20 21 static const int32 kMaxDecimalPlaces = 32; 22 23 enum { 24 TOKEN_IDENTIFIER = 0, 25 26 TOKEN_CONSTANT, 27 28 TOKEN_PLUS, 29 TOKEN_MINUS, 30 31 TOKEN_STAR, 32 TOKEN_SLASH, 33 TOKEN_MODULO, 34 35 TOKEN_POWER, 36 37 TOKEN_OPENING_BRACKET, 38 TOKEN_CLOSING_BRACKET, 39 40 TOKEN_AND, 41 TOKEN_OR, 42 TOKEN_NOT, 43 44 TOKEN_NONE, 45 TOKEN_END_OF_LINE 46 }; 47 48 struct Token { 49 Token() 50 : string(""), 51 type(TOKEN_NONE), 52 value(0), 53 position(0) 54 { 55 } 56 57 Token(const Token& other) 58 : string(other.string), 59 type(other.type), 60 value(other.value), 61 position(other.position) 62 { 63 } 64 65 Token(const char* string, int32 length, int32 position, int32 type) 66 : string(string, length), 67 type(type), 68 value(0), 69 position(position) 70 { 71 } 72 73 Token& operator=(const Token& other) 74 { 75 string = other.string; 76 type = other.type; 77 value = other.value; 78 position = other.position; 79 return *this; 80 } 81 82 BString string; 83 int32 type; 84 MAPM value; 85 86 int32 position; 87 }; 88 89 90 class Tokenizer { 91 public: 92 Tokenizer() 93 : fString(""), 94 fCurrentChar(NULL), 95 fCurrentToken(), 96 fReuseToken(false), 97 fHexSupport(false) 98 { 99 } 100 101 void SetSupportHexInput(bool enabled) 102 { 103 fHexSupport = enabled; 104 } 105 106 void SetTo(const char* string) 107 { 108 fString = string; 109 fCurrentChar = fString.String(); 110 fCurrentToken = Token(); 111 fReuseToken = false; 112 } 113 114 const Token& NextToken() 115 { 116 if (fCurrentToken.type == TOKEN_END_OF_LINE) 117 return fCurrentToken; 118 119 if (fReuseToken) { 120 fReuseToken = false; 121 //printf("next token (recycled): '%s'\n", fCurrentToken.string.String()); 122 return fCurrentToken; 123 } 124 125 while (*fCurrentChar != 0 && isspace(*fCurrentChar)) 126 fCurrentChar++; 127 128 if (*fCurrentChar == 0) 129 return fCurrentToken = Token("", 0, _CurrentPos(), TOKEN_END_OF_LINE); 130 131 bool decimal = *fCurrentChar == '.' || *fCurrentChar == ','; 132 133 if (decimal || isdigit(*fCurrentChar)) { 134 if (fHexSupport && *fCurrentChar == '0' && fCurrentChar[1] == 'x') 135 return _ParseHexNumber(); 136 137 BString temp; 138 139 const char* begin = fCurrentChar; 140 while (*fCurrentChar != 0) { 141 if (!isdigit(*fCurrentChar)) { 142 if (!(*fCurrentChar == '.' || *fCurrentChar == ',' 143 || *fCurrentChar == 'e' || *fCurrentChar == 'E')) 144 break; 145 } 146 if (*fCurrentChar == ',') 147 temp << '.'; 148 else 149 temp << *fCurrentChar; 150 fCurrentChar++; 151 } 152 int32 length = fCurrentChar - begin; 153 BString test = temp; 154 test << "&_"; 155 double value; 156 char t[2]; 157 int32 matches = sscanf(test.String(), "%lf&%s", &value, t); 158 if (matches != 2) { 159 throw ParseException("error in constant", 160 _CurrentPos() - length); 161 } 162 163 fCurrentToken = Token(begin, length, _CurrentPos() - length, 164 TOKEN_CONSTANT); 165 fCurrentToken.value = temp.String(); 166 167 } else if (isalpha(*fCurrentChar) && *fCurrentChar != 'x') { 168 const char* begin = fCurrentChar; 169 while (*fCurrentChar != 0 && (isalpha(*fCurrentChar) 170 || isdigit(*fCurrentChar))) { 171 fCurrentChar++; 172 } 173 int32 length = fCurrentChar - begin; 174 fCurrentToken = Token(begin, length, _CurrentPos() - length, 175 TOKEN_IDENTIFIER); 176 177 } else { 178 int32 type = TOKEN_NONE; 179 180 switch (*fCurrentChar) { 181 case '+': 182 type = TOKEN_PLUS; 183 break; 184 case '-': 185 type = TOKEN_MINUS; 186 break; 187 case '*': 188 type = TOKEN_STAR; 189 break; 190 case '/': 191 case '\\': 192 case ':': 193 type = TOKEN_SLASH; 194 break; 195 196 case '%': 197 type = TOKEN_MODULO; 198 break; 199 case '^': 200 type = TOKEN_POWER; 201 break; 202 203 case '(': 204 type = TOKEN_OPENING_BRACKET; 205 break; 206 case ')': 207 type = TOKEN_CLOSING_BRACKET; 208 break; 209 210 case '&': 211 type = TOKEN_AND; 212 break; 213 case '|': 214 type = TOKEN_OR; 215 break; 216 case '~': 217 type = TOKEN_NOT; 218 break; 219 220 case '\n': 221 type = TOKEN_END_OF_LINE; 222 break; 223 224 case 'x': 225 if (!fHexSupport) { 226 type = TOKEN_STAR; 227 break; 228 } 229 // fall through 230 231 default: 232 throw ParseException("unexpected character", _CurrentPos()); 233 } 234 fCurrentToken = Token(fCurrentChar, 1, _CurrentPos(), type); 235 fCurrentChar++; 236 } 237 238 //printf("next token: '%s'\n", fCurrentToken.string.String()); 239 return fCurrentToken; 240 } 241 242 void RewindToken() 243 { 244 fReuseToken = true; 245 } 246 247 private: 248 static bool _IsHexDigit(char c) 249 { 250 return isdigit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'); 251 } 252 253 Token& _ParseHexNumber() 254 { 255 const char* begin = fCurrentChar; 256 fCurrentChar += 2; 257 // skip "0x" 258 259 if (!_IsHexDigit(*fCurrentChar)) 260 throw ParseException("expected hex digit", _CurrentPos()); 261 262 fCurrentChar++; 263 while (_IsHexDigit(*fCurrentChar)) 264 fCurrentChar++; 265 266 int32 length = fCurrentChar - begin; 267 fCurrentToken = Token(begin, length, _CurrentPos() - length, 268 TOKEN_CONSTANT); 269 270 // MAPM has no conversion from long long, so we need to improvise. 271 uint64 value = strtoll(fCurrentToken.string.String(), NULL, 0); 272 if (value <= 0x7fffffff) { 273 fCurrentToken.value = (long)value; 274 } else { 275 fCurrentToken.value = (int)(value >> 60); 276 fCurrentToken.value *= 1 << 30; 277 fCurrentToken.value += (int)((value >> 30) & 0x3fffffff); 278 fCurrentToken.value *= 1 << 30; 279 fCurrentToken.value += (int)(value& 0x3fffffff); 280 } 281 282 return fCurrentToken; 283 } 284 285 int32 _CurrentPos() const 286 { 287 return fCurrentChar - fString.String(); 288 } 289 290 BString fString; 291 const char* fCurrentChar; 292 Token fCurrentToken; 293 bool fReuseToken; 294 bool fHexSupport; 295 }; 296 297 298 ExpressionParser::ExpressionParser() 299 : fTokenizer(new Tokenizer()) 300 { 301 } 302 303 304 ExpressionParser::~ExpressionParser() 305 { 306 delete fTokenizer; 307 } 308 309 310 void 311 ExpressionParser::SetSupportHexInput(bool enabled) 312 { 313 fTokenizer->SetSupportHexInput(enabled); 314 } 315 316 317 BString 318 ExpressionParser::Evaluate(const char* expressionString) 319 { 320 fTokenizer->SetTo(expressionString); 321 322 MAPM value = _ParseBinary(); 323 Token token = fTokenizer->NextToken(); 324 if (token.type != TOKEN_END_OF_LINE) 325 throw ParseException("parse error", token.position); 326 327 if (value == 0) 328 return BString("0"); 329 330 char* buffer = value.toFixPtStringExp(kMaxDecimalPlaces, '.', 0, 0); 331 if (buffer == NULL) 332 throw ParseException("out of memory", 0); 333 334 // remove surplus zeros 335 int32 lastChar = strlen(buffer) - 1; 336 if (strchr(buffer, '.')) { 337 while (buffer[lastChar] == '0') 338 lastChar--; 339 if (buffer[lastChar] == '.') 340 lastChar--; 341 } 342 343 BString result(buffer, lastChar + 1); 344 free(buffer); 345 return result; 346 } 347 348 349 int64 350 ExpressionParser::EvaluateToInt64(const char* expressionString) 351 { 352 fTokenizer->SetTo(expressionString); 353 354 MAPM value = _ParseBinary(); 355 Token token = fTokenizer->NextToken(); 356 if (token.type != TOKEN_END_OF_LINE) 357 throw ParseException("parse error", token.position); 358 359 char buffer[128]; 360 value.toIntegerString(buffer); 361 362 return strtoll(buffer, NULL, 0); 363 } 364 365 366 double 367 ExpressionParser::EvaluateToDouble(const char* expressionString) 368 { 369 fTokenizer->SetTo(expressionString); 370 371 MAPM value = _ParseBinary(); 372 Token token = fTokenizer->NextToken(); 373 if (token.type != TOKEN_END_OF_LINE) 374 throw ParseException("parse error", token.position); 375 376 char buffer[1024]; 377 value.toString(buffer, sizeof(buffer) - 4); 378 379 return strtod(buffer, NULL); 380 } 381 382 383 MAPM 384 ExpressionParser::_ParseBinary() 385 { 386 return _ParseSum(); 387 // binary operation appearantly not supported by m_apm library, 388 // should not be too hard to implement though.... 389 390 // double value = _ParseSum(); 391 // 392 // while (true) { 393 // Token token = fTokenizer->NextToken(); 394 // switch (token.type) { 395 // case TOKEN_AND: 396 // value = (uint64)value & (uint64)_ParseSum(); 397 // break; 398 // case TOKEN_OR: 399 // value = (uint64)value | (uint64)_ParseSum(); 400 // break; 401 // 402 // default: 403 // fTokenizer->RewindToken(); 404 // return value; 405 // } 406 // } 407 } 408 409 410 MAPM 411 ExpressionParser::_ParseSum() 412 { 413 // TODO: check isnan()... 414 MAPM value = _ParseProduct(); 415 416 while (true) { 417 Token token = fTokenizer->NextToken(); 418 switch (token.type) { 419 case TOKEN_PLUS: 420 value = value + _ParseProduct(); 421 break; 422 case TOKEN_MINUS: 423 value = value - _ParseProduct(); 424 break; 425 426 default: 427 fTokenizer->RewindToken(); 428 return value; 429 } 430 } 431 } 432 433 434 MAPM 435 ExpressionParser::_ParseProduct() 436 { 437 // TODO: check isnan()... 438 MAPM value = _ParsePower(); 439 440 while (true) { 441 Token token = fTokenizer->NextToken(); 442 switch (token.type) { 443 case TOKEN_STAR: 444 value = value * _ParsePower(); 445 break; 446 case TOKEN_SLASH: { 447 MAPM rhs = _ParsePower(); 448 if (rhs == MAPM(0)) 449 throw ParseException("division by zero", token.position); 450 value = value / rhs; 451 break; 452 } 453 case TOKEN_MODULO: { 454 MAPM rhs = _ParsePower(); 455 if (rhs == MAPM(0)) 456 throw ParseException("modulo by zero", token.position); 457 value = value % rhs; 458 break; 459 } 460 461 default: 462 fTokenizer->RewindToken(); 463 return value; 464 } 465 } 466 } 467 468 469 MAPM 470 ExpressionParser::_ParsePower() 471 { 472 MAPM value = _ParseUnary(); 473 474 while (true) { 475 Token token = fTokenizer->NextToken(); 476 if (token.type != TOKEN_POWER) { 477 fTokenizer->RewindToken(); 478 return value; 479 } 480 value = value.pow(_ParseUnary()); 481 } 482 } 483 484 485 MAPM 486 ExpressionParser::_ParseUnary() 487 { 488 Token token = fTokenizer->NextToken(); 489 if (token.type == TOKEN_END_OF_LINE) 490 throw ParseException("unexpected end of expression", token.position); 491 492 switch (token.type) { 493 case TOKEN_PLUS: 494 return _ParseUnary(); 495 case TOKEN_MINUS: 496 return -_ParseUnary(); 497 // TODO: Implement ! 498 // case TOKEN_NOT: 499 // return ~(uint64)_ParseUnary(); 500 501 case TOKEN_IDENTIFIER: 502 return _ParseFunction(token); 503 504 default: 505 fTokenizer->RewindToken(); 506 return _ParseAtom(); 507 } 508 509 return MAPM(0); 510 } 511 512 513 struct Function { 514 const char* name; 515 int argumentCount; 516 void* function; 517 MAPM value; 518 }; 519 520 521 void 522 ExpressionParser::_InitArguments(MAPM values[], int32 argumentCount) 523 { 524 _EatToken(TOKEN_OPENING_BRACKET); 525 526 for (int32 i = 0; i < argumentCount; i++) 527 values[i] = _ParseBinary(); 528 529 _EatToken(TOKEN_CLOSING_BRACKET); 530 } 531 532 533 MAPM 534 ExpressionParser::_ParseFunction(const Token& token) 535 { 536 if (strcasecmp("e", token.string.String()) == 0) 537 return MAPM(M_E); 538 else if (strcasecmp("pi", token.string.String()) == 0) 539 return MAPM(M_PI); 540 541 // hard coded cases for different count of arguments 542 // supports functions with 3 arguments at most 543 544 MAPM values[3]; 545 546 if (strcasecmp("abs", token.string.String()) == 0) { 547 _InitArguments(values, 1); 548 return values[0].abs(); 549 } else if (strcasecmp("acos", token.string.String()) == 0) { 550 _InitArguments(values, 1); 551 return values[0].acos(); 552 } else if (strcasecmp("asin", token.string.String()) == 0) { 553 _InitArguments(values, 1); 554 return values[0].asin(); 555 } else if (strcasecmp("atan", token.string.String()) == 0) { 556 _InitArguments(values, 1); 557 return values[0].atan(); 558 } else if (strcasecmp("atan2", token.string.String()) == 0) { 559 _InitArguments(values, 2); 560 return values[0].atan2(values[1]); 561 } else if (strcasecmp("ceil", token.string.String()) == 0) { 562 _InitArguments(values, 1); 563 return values[0].ceil(); 564 } else if (strcasecmp("cos", token.string.String()) == 0) { 565 _InitArguments(values, 1); 566 return values[0].cos(); 567 } else if (strcasecmp("cosh", token.string.String()) == 0) { 568 _InitArguments(values, 1); 569 return values[0].cosh(); 570 } else if (strcasecmp("exp", token.string.String()) == 0) { 571 _InitArguments(values, 1); 572 return values[0].exp(); 573 } else if (strcasecmp("floor", token.string.String()) == 0) { 574 _InitArguments(values, 1); 575 return values[0].floor(); 576 } else if (strcasecmp("log", token.string.String()) == 0) { 577 _InitArguments(values, 1); 578 return values[0].log(); 579 } else if (strcasecmp("log10", token.string.String()) == 0) { 580 _InitArguments(values, 1); 581 return values[0].log10(); 582 } else if (strcasecmp("pow", token.string.String()) == 0) { 583 _InitArguments(values, 2); 584 return values[0].pow(values[1]); 585 } else if (strcasecmp("sin", token.string.String()) == 0) { 586 _InitArguments(values, 1); 587 return values[0].sin(); 588 } else if (strcasecmp("sinh", token.string.String()) == 0) { 589 _InitArguments(values, 1); 590 return values[0].sinh(); 591 } else if (strcasecmp("sqrt", token.string.String()) == 0) { 592 _InitArguments(values, 1); 593 return values[0].sqrt(); 594 } else if (strcasecmp("tan", token.string.String()) == 0) { 595 _InitArguments(values, 1); 596 return values[0].tan(); 597 } else if (strcasecmp("tanh", token.string.String()) == 0) { 598 _InitArguments(values, 1); 599 return values[0].tanh(); 600 } 601 602 throw ParseException("unknown identifier", token.position); 603 } 604 605 606 MAPM 607 ExpressionParser::_ParseAtom() 608 { 609 Token token = fTokenizer->NextToken(); 610 if (token.type == TOKEN_END_OF_LINE) 611 throw ParseException("unexpected end of expression", token.position); 612 613 if (token.type == TOKEN_CONSTANT) 614 return token.value; 615 616 fTokenizer->RewindToken(); 617 618 _EatToken(TOKEN_OPENING_BRACKET); 619 620 MAPM value = _ParseBinary(); 621 622 _EatToken(TOKEN_CLOSING_BRACKET); 623 624 return value; 625 } 626 627 628 void 629 ExpressionParser::_EatToken(int32 type) 630 { 631 Token token = fTokenizer->NextToken(); 632 if (token.type != type) { 633 BString temp("expected '"); 634 temp << (char)type << "' got '" << token.string << "'"; 635 throw ParseException(temp.String(), token.position); 636 } 637 } 638 639