1 // MimeSnifferTest.cpp 2 3 #include <MimeSnifferTest.h> 4 #include <cppunit/Test.h> 5 #include <cppunit/TestSuite.h> 6 #include <cppunit/TestCaller.h> 7 #include <sniffer/Rule.h> 8 #include <sniffer/Parser.h> 9 #include <DataIO.h> 10 #include <Mime.h> 11 #include <String.h> // BString 12 #include <TestUtils.h> 13 14 #include <stdio.h> 15 #include <string> 16 17 using namespace BPrivate::Storage::Sniffer; 18 19 // Suite 20 CppUnit::Test* 21 MimeSnifferTest::Suite() { 22 CppUnit::TestSuite *suite = new CppUnit::TestSuite(); 23 typedef CppUnit::TestCaller<MimeSnifferTest> TC; 24 25 suite->addTest( new TC("Mime Sniffer::Scanner Test", 26 &MimeSnifferTest::ScannerTest) ); 27 suite->addTest( new TC("Mime Sniffer::Parser Test", 28 &MimeSnifferTest::ParserTest) ); 29 suite->addTest( new TC("Mime Sniffer::Sniffer Test", 30 &MimeSnifferTest::SnifferTest) ); 31 32 return suite; 33 } 34 35 // Scanner Test 36 void 37 MimeSnifferTest::ScannerTest() { 38 #if TEST_R5 39 Outputf("(no tests actually performed for R5 version)\n"); 40 #else // TEST_R5 41 42 43 // tests: 44 // Internal TokenStream and CharStream classes 45 46 // Define some useful macros for dynamically allocating 47 // various Token classes 48 #define T(type) (new Token(type, -1)) 49 #define S(str) (new StringToken(str, -1)) 50 #define I(val) (new IntToken(val, -1)) 51 #define F(val) (new FloatToken(val, -1)) 52 53 struct test_case { 54 const char *rule; 55 int tokenCount; 56 Token *tokens[256]; 57 } testCases[] = { 58 { "'Hey'[]:", 4, 59 { S("Hey"), 60 T(LeftBracket), 61 T(RightBracket), 62 T(Colon) 63 } 64 }, 65 { "1", 1, { I(1) } }, 66 { "1.0", 1, { F(1.0) } }, 67 68 { "1.0 (\"ABCD\")", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } }, 69 { "1.0 ('ABCD')", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } }, 70 { " 1.0 ('ABCD') ", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } }, 71 { "0.8 [0:3] ('ABCDEFG' | 'abcdefghij')", 11, 72 { F(0.8), 73 T(LeftBracket), 74 I(0), 75 T(Colon), 76 I(3), 77 T(RightBracket), 78 T(LeftParen), 79 S("ABCDEFG"), 80 T(Divider), 81 S("abcdefghij"), 82 T(RightParen) 83 } 84 }, 85 { "0.5([10]'ABCD'|[17]'abcd'|[13]'EFGH')", 17, 86 { F(0.5), 87 T(LeftParen), 88 T(LeftBracket), 89 I(10), 90 T(RightBracket), 91 S("ABCD"), 92 T(Divider), 93 T(LeftBracket), 94 I(17), 95 T(RightBracket), 96 S("abcd"), 97 T(Divider), 98 T(LeftBracket), 99 I(13), 100 T(RightBracket), 101 S("EFGH"), 102 T(RightParen) 103 } 104 }, 105 { "0.5 \n [0:3] \t ('ABCD' \n | 'abcd' | 'EFGH')", 13, 106 { F(0.5), 107 T(LeftBracket), 108 I(0), 109 T(Colon), 110 I(3), 111 T(RightBracket), 112 T(LeftParen), 113 S("ABCD"), 114 T(Divider), 115 S("abcd"), 116 T(Divider), 117 S("EFGH"), 118 T(RightParen) 119 } 120 }, 121 { "0.8 [ 0 : 3 ] ('ABCDEFG' | 'abcdefghij')", 11, 122 { F(0.8), 123 T(LeftBracket), 124 I(0), 125 T(Colon), 126 I(3), 127 T(RightBracket), 128 T(LeftParen), 129 S("ABCDEFG"), 130 T(Divider), 131 S("abcdefghij"), 132 T(RightParen) 133 } 134 }, 135 { "0.8 [0:3] ('ABCDEFG' & 'abcdefg')", 11, 136 { F(0.8), 137 T(LeftBracket), 138 I(0), 139 T(Colon), 140 I(3), 141 T(RightBracket), 142 T(LeftParen), 143 S("ABCDEFG"), 144 T(Ampersand), 145 S("abcdefg"), 146 T(RightParen) 147 } 148 }, 149 { "1.0 ('ABCD') | ('EFGH')", 8, 150 { F(1.0), 151 T(LeftParen), 152 S("ABCD"), 153 T(RightParen), 154 T(Divider), 155 T(LeftParen), 156 S("EFGH"), 157 T(RightParen) 158 } 159 }, 160 { "1.0 [0:3] ('ABCD') | [2:4] ('EFGH')", 18, 161 { F(1.0), 162 T(LeftBracket), 163 I(0), 164 T(Colon), 165 I(3), 166 T(RightBracket), 167 T(LeftParen), 168 S("ABCD"), 169 T(RightParen), 170 T(Divider), 171 T(LeftBracket), 172 I(2), 173 T(Colon), 174 I(4), 175 T(RightBracket), 176 T(LeftParen), 177 S("EFGH"), 178 T(RightParen) 179 } 180 }, 181 { "0.8 [0:4] (\\077Mkj0x34 & 'abcdefgh')", 11, 182 { F(0.8), 183 T(LeftBracket), 184 I(0), 185 T(Colon), 186 I(4), 187 T(RightBracket), 188 T(LeftParen), 189 S("\077Mkj0x34"), 190 T(Ampersand), 191 S("abcdefgh"), 192 T(RightParen) 193 } 194 }, 195 { "0.8 [0:4] (\\077Mkj\\x34 & 'abcdefgh')", 11, 196 { F(0.8), 197 T(LeftBracket), 198 I(0), 199 T(Colon), 200 I(4), 201 T(RightBracket), 202 T(LeftParen), 203 S("\077Mkj\x34"), 204 T(Ampersand), 205 S("abcdefgh"), 206 T(RightParen) 207 } 208 }, 209 { "0.8 [0:3] (\\077034 & 'abcd')", 11, 210 { F(0.8), 211 T(LeftBracket), 212 I(0), 213 T(Colon), 214 I(3), 215 T(RightBracket), 216 T(LeftParen), 217 S("\077034"), 218 T(Ampersand), 219 S("abcd"), 220 T(RightParen) 221 } 222 }, 223 { "0.8 [0:3] (\\077\\034 & 'ab')", 11, 224 { F(0.8), 225 T(LeftBracket), 226 I(0), 227 T(Colon), 228 I(3), 229 T(RightBracket), 230 T(LeftParen), 231 S("\077\034"), 232 T(Ampersand), 233 S("ab"), 234 T(RightParen) 235 } 236 }, 237 { "0.8 [0:3] (\\77\\034 & 'ab')", 11, 238 { F(0.8), 239 T(LeftBracket), 240 I(0), 241 T(Colon), 242 I(3), 243 T(RightBracket), 244 T(LeftParen), 245 S("\077\034"), 246 T(Ampersand), 247 S("ab"), 248 T(RightParen) 249 } 250 }, 251 { "0.8 [0:3] (\\7 & 'a')", 11, 252 { F(0.8), 253 T(LeftBracket), 254 I(0), 255 T(Colon), 256 I(3), 257 T(RightBracket), 258 T(LeftParen), 259 S("\007"), 260 T(Ampersand), 261 S("a"), 262 T(RightParen) 263 } 264 }, 265 { "0.8 [0:3] (\"\\17\" & 'a')", 11, 266 { F(0.8), 267 T(LeftBracket), 268 I(0), 269 T(Colon), 270 I(3), 271 T(RightBracket), 272 T(LeftParen), 273 S("\017"), 274 T(Ampersand), 275 S("a"), 276 T(RightParen) 277 } 278 }, 279 { "0.8 [0:3] ('\\17' & 'a')", 11, 280 { F(0.8), 281 T(LeftBracket), 282 I(0), 283 T(Colon), 284 I(3), 285 T(RightBracket), 286 T(LeftParen), 287 S("\017"), 288 T(Ampersand), 289 S("a"), 290 T(RightParen) 291 } 292 }, 293 { "0.8 [0:3] (\\g & 'a')", 11, 294 { F(0.8), 295 T(LeftBracket), 296 I(0), 297 T(Colon), 298 I(3), 299 T(RightBracket), 300 T(LeftParen), 301 S("g"), 302 T(Ampersand), 303 S("a"), 304 T(RightParen) 305 } 306 }, 307 { "0.8 [0:3] (\\g&\\b)", 11, 308 { F(0.8), 309 T(LeftBracket), 310 I(0), 311 T(Colon), 312 I(3), 313 T(RightBracket), 314 T(LeftParen), 315 S("g"), 316 T(Ampersand), 317 S("\b"), 318 T(RightParen) 319 } 320 }, 321 { "0.8 [0:3] (\\g\\&b & 'abc')", 11, 322 { F(0.8), 323 T(LeftBracket), 324 I(0), 325 T(Colon), 326 I(3), 327 T(RightBracket), 328 T(LeftParen), 329 S("g&b"), 330 T(Ampersand), 331 S("abc"), 332 T(RightParen) 333 } 334 }, 335 { "0.8 [0:3] (0x3457 & 'ab')", 11, 336 { F(0.8), 337 T(LeftBracket), 338 I(0), 339 T(Colon), 340 I(3), 341 T(RightBracket), 342 T(LeftParen), 343 S("\x34\x57"), 344 T(Ampersand), 345 S("ab"), 346 T(RightParen) 347 } 348 }, 349 { "0.8 [0:3] (\\x34\\x57 & 'ab')", 11, 350 { F(0.8), 351 T(LeftBracket), 352 I(0), 353 T(Colon), 354 I(3), 355 T(RightBracket), 356 T(LeftParen), 357 S("\x34\x57"), 358 T(Ampersand), 359 S("ab"), 360 T(RightParen) 361 } 362 }, 363 { "0.8 [0:3] (0xA4b7 & 'ab')", 11, 364 { F(0.8), 365 T(LeftBracket), 366 I(0), 367 T(Colon), 368 I(3), 369 T(RightBracket), 370 T(LeftParen), 371 S("\xA4\xb7"), 372 T(Ampersand), 373 S("ab"), 374 T(RightParen) 375 } 376 }, 377 { "0.8 [0:3] (\\xA4\\xb7 & 'ab')", 11, 378 { F(0.8), 379 T(LeftBracket), 380 I(0), 381 T(Colon), 382 I(3), 383 T(RightBracket), 384 T(LeftParen), 385 S("\xA4\xb7"), 386 T(Ampersand), 387 S("ab"), 388 T(RightParen) 389 } 390 }, 391 { "0.8 [0:3] (\"\\xA4\\xb7\" & 'ab')", 11, 392 { F(0.8), 393 T(LeftBracket), 394 I(0), 395 T(Colon), 396 I(3), 397 T(RightBracket), 398 T(LeftParen), 399 S("\xA4\xb7"), 400 T(Ampersand), 401 S("ab"), 402 T(RightParen) 403 } 404 }, 405 { "0.8 [0:3] (\'\\xA4\\xb7\' & 'ab')", 11, 406 { F(0.8), 407 T(LeftBracket), 408 I(0), 409 T(Colon), 410 I(3), 411 T(RightBracket), 412 T(LeftParen), 413 S("\xA4\xb7"), 414 T(Ampersand), 415 S("ab"), 416 T(RightParen) 417 } 418 }, 419 { "0.8 [0:3] ('ab\"' & 'abc')", 11, 420 { F(0.8), 421 T(LeftBracket), 422 I(0), 423 T(Colon), 424 I(3), 425 T(RightBracket), 426 T(LeftParen), 427 S("ab\""), 428 T(Ampersand), 429 S("abc"), 430 T(RightParen) 431 } 432 }, 433 { "0.8 [0:3] (\"ab\\\"\" & 'abc')", 11, 434 { F(0.8), 435 T(LeftBracket), 436 I(0), 437 T(Colon), 438 I(3), 439 T(RightBracket), 440 T(LeftParen), 441 S("ab\""), 442 T(Ampersand), 443 S("abc"), 444 T(RightParen) 445 } 446 }, 447 { "0.8 [0:3] (\"ab\\A\" & 'abc')", 11, 448 { F(0.8), 449 T(LeftBracket), 450 I(0), 451 T(Colon), 452 I(3), 453 T(RightBracket), 454 T(LeftParen), 455 S("abA"), 456 T(Ampersand), 457 S("abc"), 458 T(RightParen) 459 } 460 }, 461 { "0.8 [0:3] (\"ab'\" & 'abc')", 11, 462 { F(0.8), 463 T(LeftBracket), 464 I(0), 465 T(Colon), 466 I(3), 467 T(RightBracket), 468 T(LeftParen), 469 S("ab'"), 470 T(Ampersand), 471 S("abc"), 472 T(RightParen) 473 } 474 }, 475 { "0.8 [0:3] (\"ab\\\\\" & 'abc')", 11, 476 { F(0.8), 477 T(LeftBracket), 478 I(0), 479 T(Colon), 480 I(3), 481 T(RightBracket), 482 T(LeftParen), 483 S("ab\\"), 484 T(Ampersand), 485 S("abc"), 486 T(RightParen) 487 } 488 }, 489 { "0.8 [-5:-3] (\"abc\" & 'abc')", 11, 490 { F(0.8), 491 T(LeftBracket), 492 I(-5), 493 T(Colon), 494 I(-3), 495 T(RightBracket), 496 T(LeftParen), 497 S("abc"), 498 T(Ampersand), 499 S("abc"), 500 T(RightParen) 501 } 502 }, 503 { "0.8 [5:3] (\"abc\" & 'abc')", 11, 504 { F(0.8), 505 T(LeftBracket), 506 I(5), 507 T(Colon), 508 I(3), 509 T(RightBracket), 510 T(LeftParen), 511 S("abc"), 512 T(Ampersand), 513 S("abc"), 514 T(RightParen) 515 } 516 }, 517 { "1.2 ('ABCD')", 4, 518 { F(1.2), 519 T(LeftParen), 520 S("ABCD"), 521 T(RightParen) 522 } 523 }, 524 { ".2 ('ABCD')", 4, 525 { F(0.2), 526 T(LeftParen), 527 S("ABCD"), 528 T(RightParen) 529 } 530 }, 531 { "0. ('ABCD')", 4, 532 { F(0.0), 533 T(LeftParen), 534 S("ABCD"), 535 T(RightParen) 536 } 537 }, 538 // Signed integers 539 { "-1 ('ABCD')", 4, 540 { I(-1), 541 T(LeftParen), 542 S("ABCD"), 543 T(RightParen) 544 } 545 }, 546 { "+1 ('ABCD')", 4, 547 { I(1), 548 T(LeftParen), 549 S("ABCD"), 550 T(RightParen) 551 } 552 }, 553 // Unsigned extended floats 554 { "1E25 ('ABCD')", 4, 555 { F(1e25), 556 T(LeftParen), 557 S("ABCD"), 558 T(RightParen) 559 } 560 }, 561 { "1e25 ('ABCD')", 4, 562 { F(1e25), 563 T(LeftParen), 564 S("ABCD"), 565 T(RightParen) 566 } 567 }, 568 { "1E+25 ('ABCD')", 4, 569 { F(1e25), 570 T(LeftParen), 571 S("ABCD"), 572 T(RightParen) 573 } 574 }, 575 { "1e+25 ('ABCD')", 4, 576 { F(1e25), 577 T(LeftParen), 578 S("ABCD"), 579 T(RightParen) 580 } 581 }, 582 { "1E-25 ('ABCD')", 4, 583 { F(1e-25), 584 T(LeftParen), 585 S("ABCD"), 586 T(RightParen) 587 } 588 }, 589 { "1e-25 ('ABCD')", 4, 590 { F(1e-25), 591 T(LeftParen), 592 S("ABCD"), 593 T(RightParen) 594 } 595 }, 596 // Positive signed extended floats 597 { "+1E25 ('ABCD')", 4, 598 { F(1e25), 599 T(LeftParen), 600 S("ABCD"), 601 T(RightParen) 602 } 603 }, 604 { "+1e25 ('ABCD')", 4, 605 { F(1e25), 606 T(LeftParen), 607 S("ABCD"), 608 T(RightParen) 609 } 610 }, 611 { "+1E+25 ('ABCD')", 4, 612 { F(1e25), 613 T(LeftParen), 614 S("ABCD"), 615 T(RightParen) 616 } 617 }, 618 { "+1e+25 ('ABCD')", 4, 619 { F(1e25), 620 T(LeftParen), 621 S("ABCD"), 622 T(RightParen) 623 } 624 }, 625 { "+1E-25 ('ABCD')", 4, 626 { F(1e-25), 627 T(LeftParen), 628 S("ABCD"), 629 T(RightParen) 630 } 631 }, 632 { "+1e-25 ('ABCD')", 4, 633 { F(1e-25), 634 T(LeftParen), 635 S("ABCD"), 636 T(RightParen) 637 } 638 }, 639 // Negative signed extended floats 640 { "-1E25 ('ABCD')", 4, 641 { F(-1e25), 642 T(LeftParen), 643 S("ABCD"), 644 T(RightParen) 645 } 646 }, 647 { "-1e25 ('ABCD')", 4, 648 { F(-1e25), 649 T(LeftParen), 650 S("ABCD"), 651 T(RightParen) 652 } 653 }, 654 { "-1E+25 ('ABCD')", 4, 655 { F(-1e25), 656 T(LeftParen), 657 S("ABCD"), 658 T(RightParen) 659 } 660 }, 661 { "-1e+25 ('ABCD')", 4, 662 { F(-1e25), 663 T(LeftParen), 664 S("ABCD"), 665 T(RightParen) 666 } 667 }, 668 { "-1E-25 ('ABCD')", 4, 669 { F(-1e-25), 670 T(LeftParen), 671 S("ABCD"), 672 T(RightParen) 673 } 674 }, 675 { "-1e-25 ('ABCD')", 4, 676 { F(-1e-25), 677 T(LeftParen), 678 S("ABCD"), 679 T(RightParen) 680 } 681 }, 682 // Miscellaneous extended floats 683 { ".1E-25 ('ABCD')", 4, 684 { F(0.1e-25), 685 T(LeftParen), 686 S("ABCD"), 687 T(RightParen) 688 } 689 }, 690 { "-.1e-25 ('ABCD')", 4, 691 { F(-0.1e-25), 692 T(LeftParen), 693 S("ABCD"), 694 T(RightParen) 695 } 696 }, 697 // Signed floats 698 { "-1.0 ('ABCD')", 4, 699 { F(-1.0), 700 T(LeftParen), 701 S("ABCD"), 702 T(RightParen) 703 } 704 }, 705 { "+1.0 ('ABCD')", 4, 706 { F(1.0), 707 T(LeftParen), 708 S("ABCD"), 709 T(RightParen) 710 } 711 }, 712 // The uber test 713 { "0 -0 +0 1 -2 +3 0. -0. +0. 1. -2. +3. 0.0 -0.1 +0.2 1.0 -2.1 +3.2 " 714 "0.e0 0.e-1 0.e+2 1.e1 2.e-2 3.e+3 -1.e1 -2.e-2 -3.e+3 +1.e1 +2.e-2 +3.e+3 " 715 "0.012345 1.23456 ( ) [ ] | & : -i " 716 " \"abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \\\" ' \\012\\0\\377\\x00\\x12\\xab\\xCD\\xeF\\x1A\\xb2 \" " 717 " 'abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" \\' \\012\\0\\377\\x00\\x12\\xab\\xCD\\xeF\\x1A\\xb2 ' " 718 " \\000abc_xyz123\"'\"'456 \\xA1a1 \\!\\?\\\\ " 719 " 0x00 0x12 0xabCD 0xaBcD 0x0123456789aBcDeFfEdCbA", 50, 720 { I(0), I(0), I(0), I(1), I(-2), I(3), F(0.0), F(0.0), F(0.0), 721 F(1.0), F(-2.0), F(3.0), F(0.0), F(-0.1), F(0.2), F(1.0), F(-2.1), F(3.2), 722 F(0.0), F(0.0e-1), F(0.0e2), F(1.0e1), F(2.0e-2), F(3.0e3), 723 F(-1.0e1), F(-2.0e-2), F(-3.0e3), F(1.0e1), F(2.0e-2), F(3.0e3), 724 F(0.012345), F(1.23456), T(LeftParen), T(RightParen), T(LeftBracket), 725 T(RightBracket), T(Divider), T(Ampersand), T(Colon), T(CaseInsensitiveFlag), 726 S(std::string("abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" ' \012\0\377\x00\x12\xab\xCD\xeF\x1A\xb2 ", 49)), 727 S(std::string("abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" ' \012\0\377\x00\x12\xab\xCD\xeF\x1A\xb2 ", 49)), 728 S(std::string("\000abc_xyz123\"'\"'456", 18)), 729 S("\241a1"), 730 S("!?\\"), 731 S(std::string("\x00", 1)), S("\x12"), S("\xAB\xCD"), S("\xAB\xCD"), 732 S("\x01\x23\x45\x67\x89\xAB\xCD\xEF\xFE\xDC\xBA") 733 } 734 }, 735 }; 736 737 // Undefine our nasty macros 738 #undef T(type) 739 #undef S(str) 740 #undef I(val) 741 #undef F(val) 742 743 const int testCaseCount = sizeof(testCases) / sizeof(test_case); 744 for (int i = 0; i < testCaseCount; i++) { 745 NextSubTest(); 746 // cout << endl << testCases[i].rule << endl; 747 TokenStream stream; 748 try { 749 stream.SetTo(testCases[i].rule); 750 751 CHK(stream.InitCheck() == B_OK); 752 for (int j = 0; j < testCases[i].tokenCount; j++) { 753 const Token *token = stream.Get(); 754 CHK(token); 755 /* 756 cout << tokenTypeToString(token->Type()) << endl; 757 758 if (token->Type() == CharacterString) 759 cout << " token1 == " << token->String() << endl; 760 if (testCases[i].tokens[j]->Type() == CharacterString) 761 cout << " token2 == " << (testCases[i].tokens[j])->String() << endl; 762 763 if (token->Type() == CharacterString) 764 { 765 const std::string &str = token->String(); 766 printf("parser: "); 767 for (int i = 0; i < str.length(); i++) 768 printf("%x ", str[i]); 769 printf("\n"); 770 } 771 if (testCases[i].tokens[j]->Type() == CharacterString) 772 { 773 const std::string &str = (testCases[i].tokens[j])->String(); 774 printf("tester: "); 775 for (int i = 0; i < str.length(); i++) 776 printf("%x ", str[i]); 777 printf("\n"); 778 } 779 780 switch (token->Type()) { 781 case CharacterString: 782 cout << " string == " << token->String() << endl; 783 break; 784 case Integer: 785 cout << " int == " << token->Int() << endl; 786 break; 787 case FloatingPoint: 788 cout << " float == " << token->Float() << endl; 789 break; 790 } 791 */ 792 CHK(*token == *(testCases[i].tokens[j])); 793 delete testCases[i].tokens[j]; 794 } 795 CHK(stream.IsEmpty()); 796 } catch (Err *e) { 797 CppUnit::Exception *err = new CppUnit::Exception(e->Msg()); 798 delete e; 799 throw *err; 800 } 801 } 802 803 #endif // !TEST_R5 804 } 805 806 // Parser Test 807 void 808 MimeSnifferTest::ParserTest() { 809 // test a couple of valid and invalid rules 810 struct test_case { 811 const char *rule; 812 const char *error; // NULL, if valid 813 } testCases[] = { 814 // valid rules 815 { "1.0 (\"ABCD\")", NULL }, 816 { "1.0 ('ABCD')", NULL }, 817 { " 1.0 ('ABCD') ", NULL }, 818 { "0.8 [0:3] ('ABCDEFG' | 'abcdefghij')", NULL }, 819 { "0.5([10]'ABCD'|[17]'abcd'|[13]'EFGH')", NULL } , 820 { "0.5 \n [0:3] \t ('ABCD' \n | 'abcd' | 'EFGH')", NULL }, 821 { "0.8 [ 0 : 3 ] ('ABCDEFG' | 'abcdefghij')", NULL }, 822 { "0.8 [0:3] ('ABCDEFG' & 'abcdefg')", NULL }, 823 // These two rules are accepted by the R5 sniffer checker, but not 824 // by the parser. Thus, we're not accepting them with either. 825 // { "1.0 ('ABCD') | ('EFGH')", NULL }, 826 // { "1.0 [0:3] ('ABCD') | [2:4] ('EFGH')", NULL }, 827 { "0.8 [0:3] (\\077Mkl0x34 & 'abcdefgh')", NULL }, 828 { "0.8 [0:3] (\\077034 & 'abcd')", NULL }, 829 { "0.8 [0:3] (\\077\\034 & 'ab')", NULL }, 830 { "0.8 [0:3] (\\77\\034 & 'ab')", NULL }, 831 { "0.8 [0:3] (\\7 & 'a')", NULL }, 832 { "0.8 [0:3] (\"\\17\" & 'a')", NULL }, 833 { "0.8 [0:3] ('\\17' & 'a')", NULL }, 834 { "0.8 [0:3] (\\g & 'a')", NULL }, 835 { "0.8 [0:3] (\\g&\\b)", NULL }, 836 { "0.8 [0:3] (\\g\\&b & 'abc')", NULL }, 837 { "0.8 [0:3] (0x3457 & 'ab')", NULL }, 838 { "0.8 [0:3] (0xA4b7 & 'ab')", NULL }, 839 { "0.8 [0:3] ('ab\"' & 'abc')", NULL }, 840 { "0.8 [0:3] (\"ab\\\"\" & 'abc')", NULL }, 841 { "0.8 [0:3] (\"ab\\A\" & 'abc')", NULL }, 842 { "0.8 [0:3] (\"ab'\" & 'abc')", NULL }, 843 { "0.8 [0:3] (\"ab\\\\\" & 'abc')", NULL }, 844 { "0.8 [-5:-3] (\"abc\" & 'abc')", NULL }, 845 // Also accepted by the R5 sniffer but not the R5 parser. We reject. 846 // { "0.8 [5:3] (\"abc\" & 'abc')", NULL }, 847 { "1.0 ('ABCD')", NULL }, 848 { ".2 ('ABCD')", NULL }, 849 { "0. ('ABCD')", NULL }, 850 { "1 ('ABCD')", NULL }, 851 { "+1 ('ABCD')", NULL }, 852 // We accept extended notation floating point numbers now, but 853 // not invalid priorities. 854 // { "1E25 ('ABCD')", NULL }, 855 // { "1e25 ('ABCD')", NULL }, 856 // R5 chokes on this rule :-( 857 #if !TEST_R5 858 { "1e-3 ('ABCD')", NULL }, 859 #endif 860 { "+.003e2 ('ABCD')", NULL }, 861 // This one too. See how much better our parser is? :-) 862 #if !TEST_R5 863 { "-123e-9999999999 ('ABCD')", NULL }, // Hooray for the stunning accuracy of floating point ;-) 864 #endif 865 // invalid rules 866 { "0.0 ('')", 867 "Sniffer pattern error: illegal empty pattern" }, 868 { "('ABCD')", 869 "Sniffer pattern error: match level expected" }, 870 { "[0:3] ('ABCD')", 871 "Sniffer pattern error: match level expected" }, 872 { "0.8 [0:3] ( | 'abcdefghij')", 873 "Sniffer pattern error: missing pattern" }, 874 { "0.8 [0:3] ('ABCDEFG' | )", 875 "Sniffer pattern error: missing pattern" }, 876 { "[0:3] ('ABCD')", 877 "Sniffer pattern error: match level expected" }, 878 { "1.0 (ABCD')", 879 #if TEST_R5 880 "Sniffer pattern error: misplaced single quote" 881 #else 882 "Sniffer pattern error: invalid character 'A'" 883 #endif 884 }, 885 { "1.0 ('ABCD)", 886 #if TEST_R5 887 "Sniffer pattern error: unterminated rule" 888 #else 889 "Sniffer pattern error: unterminated single-quoted string" 890 #endif 891 }, 892 { "1.0 (ABCD)", 893 #if TEST_R5 894 "Sniffer pattern error: missing pattern" 895 #else 896 "Sniffer pattern error: invalid character 'A'" 897 #endif 898 }, 899 { "1.0 (ABCD 'ABCD')", 900 #if TEST_R5 901 "Sniffer pattern error: missing pattern" 902 #else 903 "Sniffer pattern error: invalid character 'A'" 904 #endif 905 }, 906 { "1.0 'ABCD')", 907 #if TEST_R5 908 "Sniffer pattern error: missing pattern" 909 #else 910 "Sniffer pattern error: missing pattern" 911 #endif 912 }, 913 { "1.0 ('ABCD'", 914 "Sniffer pattern error: unterminated rule" }, 915 { "1.0 'ABCD'", 916 #if TEST_R5 917 "Sniffer pattern error: missing sniff pattern" 918 #else 919 "Sniffer pattern error: missing pattern" 920 #endif 921 }, 922 { "0.5 [0:3] ('ABCD' | 'abcd' | [13] 'EFGH')", 923 "Sniffer pattern error: missing pattern" }, 924 { "0.5('ABCD'|'abcd'|[13]'EFGH')", 925 "Sniffer pattern error: missing pattern" }, 926 { "0.5[0:3]([10]'ABCD'|[17]'abcd'|[13]'EFGH')", 927 "Sniffer pattern error: missing pattern" }, 928 { "0.8 [0x10:3] ('ABCDEFG' | 'abcdefghij')", 929 "Sniffer pattern error: pattern offset expected" }, 930 { "0.8 [0:A] ('ABCDEFG' | 'abcdefghij')", 931 #if TEST_R5 932 "Sniffer pattern error: pattern range end expected" 933 #else 934 "Sniffer pattern error: invalid character 'A'" 935 #endif 936 }, 937 { "0.8 [0:3] ('ABCDEFG' & 'abcdefghij')", 938 "Sniffer pattern error: pattern and mask lengths do not match" }, 939 { "0.8 [0:3] ('ABCDEFG' & 'abcdefg' & 'xyzwmno')", 940 #if TEST_R5 941 "Sniffer pattern error: unterminated rule" 942 #else 943 "Sniffer pattern error: expecting '|', ')', or possibly '&'" 944 #endif 945 }, 946 { "0.8 [0:3] (\\g&b & 'a')", 947 #if TEST_R5 948 "Sniffer pattern error: missing mask" 949 #else 950 "Sniffer pattern error: invalid character 'b'" 951 #endif 952 }, 953 { "0.8 [0:3] (\\19 & 'a')", 954 "Sniffer pattern error: pattern and mask lengths do not match" }, 955 { "0.8 [0:3] (0x345 & 'ab')", 956 "Sniffer pattern error: bad hex literal" }, 957 { "0.8 [0:3] (0x3457M & 'abc')", 958 #if TEST_R5 959 "Sniffer pattern error: expecting '|' or '&'" 960 #else 961 "Sniffer pattern error: invalid character 'M'" 962 #endif 963 }, 964 { "0.8 [0:3] (0x3457\\7 & 'abc')", 965 #if TEST_R5 966 "Sniffer pattern error: expecting '|' or '&'" 967 #else 968 "Sniffer pattern error: expecting '|', ')', or possibly '&'" 969 #endif 970 }, 971 972 // Miscellaneous tests designed to hit every remaining 973 // relevant "throw new Err()" statement in the scanner. 974 // R5 versions will come later... 975 #if !TEST_R5 976 { "\x03 ", "Sniffer pattern error: invalid character '\x03'" }, 977 { "\"blah", "Sniffer pattern error: unterminated double-quoted string" }, 978 { "0xThisIsNotAHexCode", "Sniffer pattern error: incomplete hex code" }, 979 { "0xAndNeitherIsThis:-)", "Sniffer pattern error: bad hex literal" }, 980 { ".NotAFloat", "Sniffer pattern error: incomplete floating point number" }, 981 { "-NotANumber", "Sniffer pattern error: incomplete signed number" }, 982 { "+NotANumber", "Sniffer pattern error: incomplete signed number" }, 983 984 { "0.0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 985 { "1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 986 { ".0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 987 { "0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 988 { "1e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 989 { "-1e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 990 { "+1e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 991 { "-1.e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 992 { "+1.e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 993 { "-1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 994 { "+1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 995 996 { "0.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 997 { "1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 998 { ".0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 999 { "0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1000 { "1e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1001 { "-1e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1002 { "+1e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1003 { "-1.e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1004 { "+1.e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1005 { "-1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1006 { "+1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1007 1008 { "0.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1009 { "1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1010 { ".0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1011 { "0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1012 { "1e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1013 { "-1e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1014 { "+1e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1015 { "-1.e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1016 { "+1.e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1017 { "-1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1018 { "+1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1019 1020 { "\\11\\", "Sniffer pattern error: incomplete escape sequence" }, 1021 { "\"Escape!! \\", "Sniffer pattern error: incomplete escape sequence" }, 1022 { "'Escape!! \\", "Sniffer pattern error: incomplete escape sequence" }, 1023 1024 { "\\x", "Sniffer pattern error: incomplete escaped hex code" }, 1025 { "\\xNotAHexCode", "Sniffer pattern error: incomplete escaped hex code" }, 1026 { "\\xAlsoNotAHexCode", "Sniffer pattern error: incomplete escaped hex code" }, 1027 { "\\x0", "Sniffer pattern error: incomplete escaped hex code" }, 1028 1029 { "1.0 (\\377)", NULL }, 1030 { "\\400", "Sniffer pattern error: invalid octal literal (octals must be between octal 0 and octal 377 inclusive)" }, 1031 { "\\777", "Sniffer pattern error: invalid octal literal (octals must be between octal 0 and octal 377 inclusive)" }, 1032 { "1.0 (\\800)", NULL }, 1033 1034 { NULL, "Sniffer pattern error: NULL pattern" }, 1035 1036 { "-2", "Sniffer pattern error: invalid priority" }, 1037 { "+2", "Sniffer pattern error: invalid priority" }, 1038 1039 { "1.0", "Sniffer pattern error: missing expression" }, 1040 #endif // !TEST_R5 1041 1042 1043 // { "1E-25 ('ABCD')", "Sniffer pattern error: missing pattern" }, 1044 // I don't currently understand what's wrong with the above rule... R5 1045 // rejects it though, for some reason. 1046 }; 1047 const int testCaseCount = sizeof(testCases) / sizeof(test_case); 1048 BMimeType type; 1049 for (int32 i = 0; i < testCaseCount; i++) { 1050 //cout << endl << "----------------------------------------------------------------------" << endl; 1051 NextSubTest(); 1052 test_case &testCase = testCases[i]; 1053 //cout << endl << testCase.rule << endl; 1054 BString parseError; 1055 status_t error = BMimeType::CheckSnifferRule(testCase.rule, 1056 &parseError); 1057 if (testCase.error == NULL) { 1058 if (error != B_OK) { 1059 cout << endl << "This sucker's gonna fail..." 1060 << endl << "RULE: '" << testCase.rule << "'" 1061 << endl << "ERROR: " 1062 << endl << parseError.String() 1063 << endl; 1064 } 1065 CHK(error == B_OK); 1066 } else { 1067 1068 // if (parseError.FindLast(testCase.error) >= 0) { 1069 // cout << endl << parseError.String(); // << endl; 1070 // cout << endl << testCase.error << endl; 1071 // } 1072 // cout << endl << parseError.String(); // << endl; 1073 /* 1074 if (parseError.FindLast(testCase.error) >= 0) { 1075 cout << " -- OKAY" << endl; 1076 } else { 1077 cout << " -- NOGO" << endl; 1078 cout << testCase.error << endl; 1079 } 1080 */ 1081 if (testCase.rule && error != B_BAD_MIME_SNIFFER_RULE) { 1082 printf("rule: `%s'", testCase.rule); 1083 RES(error); 1084 } 1085 CHK(error == (testCase.rule ? B_BAD_MIME_SNIFFER_RULE : B_BAD_VALUE)); 1086 CHK(parseError.FindLast(testCase.error) >= 0); 1087 } 1088 } 1089 } 1090 1091 void dumpStr(const std::string &string, const char *label = NULL) { 1092 if (label) 1093 printf("%s: ", label); 1094 for (uint i = 0; i < string.length(); i++) 1095 printf("%x ", string[i]); 1096 printf("\n"); 1097 } 1098 1099 1100 void 1101 MimeSnifferTest::SnifferTest() { 1102 #if TEST_R5 1103 Outputf("(no tests actually performed for R5 version)\n"); 1104 #else // TEST_R5 1105 const char *rules[] = { 1106 // General tests 1107 "1.0 ('#include')", 1108 "0.0 [0:32] ('#include')", 1109 "0.e-230 [0:32] (\\#include | \\#ifndef)", 1110 ".2 ([0:32] \"#include\" | [0] '#define' | [0:200] 'int main(')", 1111 "1.0 [0:32] ('<html>' | '<head>' | '<body>')", 1112 // Range tests 1113 "1.0 [0:9] ('rock')", 1114 "1.0 ([0:9] 'roll')", 1115 "1.0 ([0:9] 'rock' | [0:9] 'roll')", 1116 "1.0 [0:9] ('rock' | 'roll')", 1117 "1.0 ([0] 'rock')", 1118 "1.0 ([0] 'rock' | [0:9] 'roll')", 1119 "1.0 ([9] 'rock' | [10] 'roll')", 1120 // Mask, octal, and hex tests 1121 "1.0 (\\xFF\\xFF & '\\xF0\\xF0')", 1122 "1.0 ('\\33\\34' & \\xFF\\x00)", 1123 "1.0 (\\33\\34 & \"\\x00\\xFF\")", 1124 "1.0 (\\xFF & \\x05)", 1125 // Conjunctions 1126 "1.0 ([4] 'rock') ([9] 'roll')", 1127 "1.0 [5] ('roll') [10] ('rock')", 1128 "1.0 [4] ('rock' | 'roll') ([9] 'rock' | [10] 'roll')", 1129 // Case insensitivity tests 1130 "1.0 [4] (-i 'Rock' | 'Roll')", 1131 "1.0 [9] ('Rock' | -i 'Roll')", 1132 "1.0 (-i [4] 'Rock' | [9] 'Roll')", 1133 "1.0 ([9] 'Rock' | -i [4] 'Roll')", 1134 }; 1135 const int ruleCount = sizeof(rules)/sizeof(char*); 1136 struct test_case { 1137 const std::string data; 1138 const bool result[ruleCount]; 1139 } tests[] = { 1140 1141 //------------------------------ 1142 { 1143 "#include <stdio.h> \n\ 1144 #include <stdlib.h> \n\ 1145 \n\ 1146 int main() { \n\ 1147 return 0; \n\ 1148 } \n\ 1149 \n\ 1150 ", { true, true, true, true, false, 1151 false, false, false, false, false, false, false, 1152 false, false, false, false, 1153 false, false, false, 1154 false, false, false, false 1155 } 1156 }, 1157 //------------------------------ 1158 { 1159 " #include <stdio.h> \n\ 1160 #include <stdlib.h> \n\ 1161 \n\ 1162 int main() { \n\ 1163 return 0; \n\ 1164 } \n\ 1165 \n\ 1166 ", { false, true, true, true, false, 1167 false, false, false, false, false, false, false, 1168 false, false, false, false, 1169 false, false, false, 1170 false, false, false, false 1171 } 1172 }, 1173 //------------------------------ 1174 { 1175 "#ifndef SOME_TEST_H \n\ 1176 #define SOME_TEST_H \n\ 1177 \n\ 1178 void main(); \n\ 1179 \n\ 1180 #endif // SOME_TEST_H \n\ 1181 \n\ 1182 ", { false, false, true, false, false, 1183 false, false, false, false, false, false, false, 1184 false, false, false, false, 1185 false, false, false, 1186 false, false, false, false 1187 } 1188 }, 1189 //------------------------------ 1190 { 1191 "//------------------ \n\ 1192 // SomeTest.cpp \n\ 1193 //------------------ \n\ 1194 #include <stdio.h> \n\ 1195 \n\ 1196 int main() { \n\ 1197 return 0; \n\ 1198 } \n\ 1199 \n\ 1200 ", { false, false, false, true, false, 1201 false, false, false, false, false, false, false, 1202 false, false, false, true, 1203 // ^^^^ <= coincedence 1204 false, false, false, 1205 false, false, false, false 1206 } 1207 }, 1208 //------------------------------ 1209 { 1210 "<html> \n\ 1211 <body bgcolor='#ffffff'> \n\ 1212 HTML is boring as hell <br> \n\ 1213 when i write it too much <br> \n\ 1214 my head starts to swell <br> \n\ 1215 <br> \n\ 1216 HTML is stupid and dumb <br> \n\ 1217 running through traffic <br> \n\ 1218 is ten times as fun <br> \n\ 1219 </body> \n\ 1220 </html> \n\ 1221 ", { false, false, false, false, true, 1222 false, false, false, false, false, false, false, 1223 false, false, false, false, 1224 false, false, false, 1225 false, false, false, false 1226 } 1227 }, 1228 //--------- <= Ten characters in 1229 { 1230 " rock&roll", // 5,10 1231 { false, false, false, false, false, 1232 true, false, true, true, false, false, true, 1233 false, false, false, false, 1234 false, false, false, 1235 false, false, false, false 1236 } 1237 }, 1238 //--------- <= Ten characters in 1239 { 1240 " rock&roll", // 4,9 1241 { false, false, false, false, false, 1242 true, true, true, true, false, true, false, 1243 false, false, false, false, 1244 true, false, false, 1245 true, true, true, false 1246 } 1247 }, 1248 //--------- <= Ten characters in 1249 { 1250 " roll&rock", // 5,10 1251 { false, false, false, false, false, 1252 false, true, true, true, false, true, false, 1253 false, false, false, false, 1254 false, true, false, 1255 false, false, false, false 1256 } 1257 }, 1258 //--------- <= Ten characters in 1259 { 1260 " roll&rock", // 4,9 1261 { false, false, false, false, false, 1262 true, true, true, true, false, true, true, 1263 false, false, false, false, 1264 false, false, true, 1265 true, true, false, true 1266 } 1267 }, 1268 //--------- <= Ten characters in 1269 { 1270 " ROCK&ROLL", // 4,9 1271 { false, false, false, false, false, 1272 false, false, false, false, false, false, false, 1273 false, false, false, false, 1274 false, false, false, 1275 true, true, true, false 1276 } 1277 }, 1278 //--------- <= Ten characters in 1279 { 1280 " rOlL&RoCk", // 4,9 1281 { false, false, false, false, false, 1282 false, false, false, false, false, false, false, 1283 false, false, false, false, 1284 false, false, false, 1285 true, true, false, true 1286 } 1287 }, 1288 //------------------------------ 1289 { 1290 "\xFF\xFF FF FF", 1291 { false, false, false, false, false, 1292 false, false, false, false, false, false, false, 1293 true, false, false, true, 1294 false, false, false, 1295 false, false, false, false 1296 } 1297 }, 1298 //------------------------------ 1299 { 1300 "\xFA\xFA FA FA", 1301 { false, false, false, false, false, 1302 false, false, false, false, false, false, false, 1303 true, false, false, false, 1304 false, false, false, 1305 false, false, false, false 1306 } 1307 }, 1308 //------------------------------ 1309 { 1310 "\xAF\xAF AF AF", 1311 { false, false, false, false, false, 1312 false, false, false, false, false, false, false, 1313 false, false, false, true, 1314 false, false, false, 1315 false, false, false, false 1316 } 1317 }, 1318 //------------------------------ 1319 { 1320 std::string("\033\000 033 000", 10), // Otherwise, it thinks the NULL is the end of the string 1321 { false, false, false, false, false, 1322 false, false, false, false, false, false, false, 1323 false, true, false, false, 1324 false, false, false, 1325 false, false, false, false 1326 } 1327 }, 1328 //------------------------------ 1329 { 1330 std::string("\000\034 000 034", 10), // Otherwise, it thinks the NULL is the end of the string 1331 { false, false, false, false, false, 1332 false, false, false, false, false, false, false, 1333 false, false, true, false, 1334 false, false, false, 1335 false, false, false, false 1336 } 1337 }, 1338 //------------------------------ 1339 { 1340 "\033\034 033 034", 1341 { false, false, false, false, false, 1342 false, false, false, false, false, false, false, 1343 false, true, true, false, 1344 false, false, false, 1345 false, false, false, false 1346 } 1347 }, 1348 }; // tests[] 1349 const int32 testCount = sizeof(tests)/sizeof(test_case); 1350 1351 for (int i = 0; i < testCount; i++) { 1352 if (i > 0) 1353 NextSubTestBlock(); 1354 test_case &test = tests[i]; 1355 // cout << "--------------------------------------------------------------------------------" << endl; 1356 // cout << test.data << endl; 1357 1358 for (int j = 0; j < ruleCount; j++) { 1359 NextSubTest(); 1360 // cout << "############################################################" << endl; 1361 // cout << rules[j] << endl; 1362 // cout << test.result[j] << endl; 1363 Rule rule; 1364 BString errorMsg; 1365 status_t err = parse(rules[j], &rule, &errorMsg); 1366 // dumpStr(test.data, "str "); 1367 if (err) { 1368 // cout << "PARSE FAILURE!!!" << endl; 1369 // cout << errorMsg.String() << endl; 1370 } 1371 CHK(err == B_OK); 1372 if (!err) { 1373 BMallocIO data; 1374 data.Write(test.data.data(), test.data.length());//strlen(test.data)); 1375 bool match = rule.Sniff(&data); 1376 // cout << match << endl; 1377 // cout << "match == " << (match ? "yes" : "no") << ", " 1378 // << ((match == test.result[j]) ? "SUCCESS" : "FAILURE") << endl; 1379 CHK(match == test.result[j]); 1380 } 1381 } 1382 } 1383 #endif // !TEST_R5 1384 } 1385