1 // MimeSnifferTest.cpp 2 3 #include "MimeSnifferTest.h" 4 5 #include <cppunit/Test.h> 6 #include <cppunit/TestSuite.h> 7 #include <cppunit/TestCaller.h> 8 #include <sniffer/Rule.h> 9 #include <sniffer/Parser.h> 10 #include <DataIO.h> 11 #include <Mime.h> 12 #include <String.h> // BString 13 #include <TestUtils.h> 14 15 #include <stdio.h> 16 17 #include <iostream> 18 using std::cout; 19 using std::endl; 20 21 using namespace BPrivate::Storage::Sniffer; 22 23 // Suite 24 CppUnit::Test* 25 MimeSnifferTest::Suite() { 26 CppUnit::TestSuite *suite = new CppUnit::TestSuite(); 27 typedef CppUnit::TestCaller<MimeSnifferTest> TC; 28 29 suite->addTest( new TC("Mime Sniffer::Scanner Test", 30 &MimeSnifferTest::ScannerTest) ); 31 suite->addTest( new TC("Mime Sniffer::Parser Test", 32 &MimeSnifferTest::ParserTest) ); 33 suite->addTest( new TC("Mime Sniffer::Sniffer Test", 34 &MimeSnifferTest::SnifferTest) ); 35 36 return suite; 37 } 38 39 // Scanner Test 40 void 41 MimeSnifferTest::ScannerTest() { 42 #if TEST_R5 43 Outputf("(no tests actually performed for R5 version)\n"); 44 #else // TEST_R5 45 46 47 // tests: 48 // Internal TokenStream and CharStream classes 49 50 // Define some useful macros for dynamically allocating 51 // various Token classes 52 #define T(type) (new Token(type, -1)) 53 #define S(str) (new StringToken(str, -1)) 54 #define I(val) (new IntToken(val, -1)) 55 #define F(val) (new FloatToken(val, -1)) 56 57 struct test_case { 58 const char *rule; 59 int tokenCount; 60 Token *tokens[256]; 61 } testCases[] = { 62 { "'Hey'[]:", 4, 63 { S("Hey"), 64 T(LeftBracket), 65 T(RightBracket), 66 T(Colon) 67 } 68 }, 69 { "1", 1, { I(1) } }, 70 { "1.0", 1, { F(1.0) } }, 71 72 { "1.0 (\"ABCD\")", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } }, 73 { "1.0 ('ABCD')", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } }, 74 { " 1.0 ('ABCD') ", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } }, 75 { "0.8 [0:3] ('ABCDEFG' | 'abcdefghij')", 11, 76 { F(0.8), 77 T(LeftBracket), 78 I(0), 79 T(Colon), 80 I(3), 81 T(RightBracket), 82 T(LeftParen), 83 S("ABCDEFG"), 84 T(Divider), 85 S("abcdefghij"), 86 T(RightParen) 87 } 88 }, 89 { "0.5([10]'ABCD'|[17]'abcd'|[13]'EFGH')", 17, 90 { F(0.5), 91 T(LeftParen), 92 T(LeftBracket), 93 I(10), 94 T(RightBracket), 95 S("ABCD"), 96 T(Divider), 97 T(LeftBracket), 98 I(17), 99 T(RightBracket), 100 S("abcd"), 101 T(Divider), 102 T(LeftBracket), 103 I(13), 104 T(RightBracket), 105 S("EFGH"), 106 T(RightParen) 107 } 108 }, 109 { "0.5 \n [0:3] \t ('ABCD' \n | 'abcd' | 'EFGH')", 13, 110 { F(0.5), 111 T(LeftBracket), 112 I(0), 113 T(Colon), 114 I(3), 115 T(RightBracket), 116 T(LeftParen), 117 S("ABCD"), 118 T(Divider), 119 S("abcd"), 120 T(Divider), 121 S("EFGH"), 122 T(RightParen) 123 } 124 }, 125 { "0.8 [ 0 : 3 ] ('ABCDEFG' | 'abcdefghij')", 11, 126 { F(0.8), 127 T(LeftBracket), 128 I(0), 129 T(Colon), 130 I(3), 131 T(RightBracket), 132 T(LeftParen), 133 S("ABCDEFG"), 134 T(Divider), 135 S("abcdefghij"), 136 T(RightParen) 137 } 138 }, 139 { "0.8 [0:3] ('ABCDEFG' & 'abcdefg')", 11, 140 { F(0.8), 141 T(LeftBracket), 142 I(0), 143 T(Colon), 144 I(3), 145 T(RightBracket), 146 T(LeftParen), 147 S("ABCDEFG"), 148 T(Ampersand), 149 S("abcdefg"), 150 T(RightParen) 151 } 152 }, 153 { "1.0 ('ABCD') | ('EFGH')", 8, 154 { F(1.0), 155 T(LeftParen), 156 S("ABCD"), 157 T(RightParen), 158 T(Divider), 159 T(LeftParen), 160 S("EFGH"), 161 T(RightParen) 162 } 163 }, 164 { "1.0 [0:3] ('ABCD') | [2:4] ('EFGH')", 18, 165 { F(1.0), 166 T(LeftBracket), 167 I(0), 168 T(Colon), 169 I(3), 170 T(RightBracket), 171 T(LeftParen), 172 S("ABCD"), 173 T(RightParen), 174 T(Divider), 175 T(LeftBracket), 176 I(2), 177 T(Colon), 178 I(4), 179 T(RightBracket), 180 T(LeftParen), 181 S("EFGH"), 182 T(RightParen) 183 } 184 }, 185 { "0.8 [0:4] (\\077Mkj0x34 & 'abcdefgh')", 11, 186 { F(0.8), 187 T(LeftBracket), 188 I(0), 189 T(Colon), 190 I(4), 191 T(RightBracket), 192 T(LeftParen), 193 S("\077Mkj0x34"), 194 T(Ampersand), 195 S("abcdefgh"), 196 T(RightParen) 197 } 198 }, 199 { "0.8 [0:4] (\\077Mkj\\x34 & 'abcdefgh')", 11, 200 { F(0.8), 201 T(LeftBracket), 202 I(0), 203 T(Colon), 204 I(4), 205 T(RightBracket), 206 T(LeftParen), 207 S("\077Mkj\x34"), 208 T(Ampersand), 209 S("abcdefgh"), 210 T(RightParen) 211 } 212 }, 213 { "0.8 [0:3] (\\077034 & 'abcd')", 11, 214 { F(0.8), 215 T(LeftBracket), 216 I(0), 217 T(Colon), 218 I(3), 219 T(RightBracket), 220 T(LeftParen), 221 S("\077034"), 222 T(Ampersand), 223 S("abcd"), 224 T(RightParen) 225 } 226 }, 227 { "0.8 [0:3] (\\077\\034 & 'ab')", 11, 228 { F(0.8), 229 T(LeftBracket), 230 I(0), 231 T(Colon), 232 I(3), 233 T(RightBracket), 234 T(LeftParen), 235 S("\077\034"), 236 T(Ampersand), 237 S("ab"), 238 T(RightParen) 239 } 240 }, 241 { "0.8 [0:3] (\\77\\034 & 'ab')", 11, 242 { F(0.8), 243 T(LeftBracket), 244 I(0), 245 T(Colon), 246 I(3), 247 T(RightBracket), 248 T(LeftParen), 249 S("\077\034"), 250 T(Ampersand), 251 S("ab"), 252 T(RightParen) 253 } 254 }, 255 { "0.8 [0:3] (\\7 & 'a')", 11, 256 { F(0.8), 257 T(LeftBracket), 258 I(0), 259 T(Colon), 260 I(3), 261 T(RightBracket), 262 T(LeftParen), 263 S("\007"), 264 T(Ampersand), 265 S("a"), 266 T(RightParen) 267 } 268 }, 269 { "0.8 [0:3] (\"\\17\" & 'a')", 11, 270 { F(0.8), 271 T(LeftBracket), 272 I(0), 273 T(Colon), 274 I(3), 275 T(RightBracket), 276 T(LeftParen), 277 S("\017"), 278 T(Ampersand), 279 S("a"), 280 T(RightParen) 281 } 282 }, 283 { "0.8 [0:3] ('\\17' & 'a')", 11, 284 { F(0.8), 285 T(LeftBracket), 286 I(0), 287 T(Colon), 288 I(3), 289 T(RightBracket), 290 T(LeftParen), 291 S("\017"), 292 T(Ampersand), 293 S("a"), 294 T(RightParen) 295 } 296 }, 297 { "0.8 [0:3] (\\g & 'a')", 11, 298 { F(0.8), 299 T(LeftBracket), 300 I(0), 301 T(Colon), 302 I(3), 303 T(RightBracket), 304 T(LeftParen), 305 S("g"), 306 T(Ampersand), 307 S("a"), 308 T(RightParen) 309 } 310 }, 311 { "0.8 [0:3] (\\g&\\b)", 11, 312 { F(0.8), 313 T(LeftBracket), 314 I(0), 315 T(Colon), 316 I(3), 317 T(RightBracket), 318 T(LeftParen), 319 S("g"), 320 T(Ampersand), 321 S("\b"), 322 T(RightParen) 323 } 324 }, 325 { "0.8 [0:3] (\\g\\&b & 'abc')", 11, 326 { F(0.8), 327 T(LeftBracket), 328 I(0), 329 T(Colon), 330 I(3), 331 T(RightBracket), 332 T(LeftParen), 333 S("g&b"), 334 T(Ampersand), 335 S("abc"), 336 T(RightParen) 337 } 338 }, 339 { "0.8 [0:3] (0x3457 & 'ab')", 11, 340 { F(0.8), 341 T(LeftBracket), 342 I(0), 343 T(Colon), 344 I(3), 345 T(RightBracket), 346 T(LeftParen), 347 S("\x34\x57"), 348 T(Ampersand), 349 S("ab"), 350 T(RightParen) 351 } 352 }, 353 { "0.8 [0:3] (\\x34\\x57 & 'ab')", 11, 354 { F(0.8), 355 T(LeftBracket), 356 I(0), 357 T(Colon), 358 I(3), 359 T(RightBracket), 360 T(LeftParen), 361 S("\x34\x57"), 362 T(Ampersand), 363 S("ab"), 364 T(RightParen) 365 } 366 }, 367 { "0.8 [0:3] (0xA4b7 & 'ab')", 11, 368 { F(0.8), 369 T(LeftBracket), 370 I(0), 371 T(Colon), 372 I(3), 373 T(RightBracket), 374 T(LeftParen), 375 S("\xA4\xb7"), 376 T(Ampersand), 377 S("ab"), 378 T(RightParen) 379 } 380 }, 381 { "0.8 [0:3] (\\xA4\\xb7 & 'ab')", 11, 382 { F(0.8), 383 T(LeftBracket), 384 I(0), 385 T(Colon), 386 I(3), 387 T(RightBracket), 388 T(LeftParen), 389 S("\xA4\xb7"), 390 T(Ampersand), 391 S("ab"), 392 T(RightParen) 393 } 394 }, 395 { "0.8 [0:3] (\"\\xA4\\xb7\" & 'ab')", 11, 396 { F(0.8), 397 T(LeftBracket), 398 I(0), 399 T(Colon), 400 I(3), 401 T(RightBracket), 402 T(LeftParen), 403 S("\xA4\xb7"), 404 T(Ampersand), 405 S("ab"), 406 T(RightParen) 407 } 408 }, 409 { "0.8 [0:3] (\'\\xA4\\xb7\' & 'ab')", 11, 410 { F(0.8), 411 T(LeftBracket), 412 I(0), 413 T(Colon), 414 I(3), 415 T(RightBracket), 416 T(LeftParen), 417 S("\xA4\xb7"), 418 T(Ampersand), 419 S("ab"), 420 T(RightParen) 421 } 422 }, 423 { "0.8 [0:3] ('ab\"' & 'abc')", 11, 424 { F(0.8), 425 T(LeftBracket), 426 I(0), 427 T(Colon), 428 I(3), 429 T(RightBracket), 430 T(LeftParen), 431 S("ab\""), 432 T(Ampersand), 433 S("abc"), 434 T(RightParen) 435 } 436 }, 437 { "0.8 [0:3] (\"ab\\\"\" & 'abc')", 11, 438 { F(0.8), 439 T(LeftBracket), 440 I(0), 441 T(Colon), 442 I(3), 443 T(RightBracket), 444 T(LeftParen), 445 S("ab\""), 446 T(Ampersand), 447 S("abc"), 448 T(RightParen) 449 } 450 }, 451 { "0.8 [0:3] (\"ab\\A\" & 'abc')", 11, 452 { F(0.8), 453 T(LeftBracket), 454 I(0), 455 T(Colon), 456 I(3), 457 T(RightBracket), 458 T(LeftParen), 459 S("abA"), 460 T(Ampersand), 461 S("abc"), 462 T(RightParen) 463 } 464 }, 465 { "0.8 [0:3] (\"ab'\" & 'abc')", 11, 466 { F(0.8), 467 T(LeftBracket), 468 I(0), 469 T(Colon), 470 I(3), 471 T(RightBracket), 472 T(LeftParen), 473 S("ab'"), 474 T(Ampersand), 475 S("abc"), 476 T(RightParen) 477 } 478 }, 479 { "0.8 [0:3] (\"ab\\\\\" & 'abc')", 11, 480 { F(0.8), 481 T(LeftBracket), 482 I(0), 483 T(Colon), 484 I(3), 485 T(RightBracket), 486 T(LeftParen), 487 S("ab\\"), 488 T(Ampersand), 489 S("abc"), 490 T(RightParen) 491 } 492 }, 493 { "0.8 [-5:-3] (\"abc\" & 'abc')", 11, 494 { F(0.8), 495 T(LeftBracket), 496 I(-5), 497 T(Colon), 498 I(-3), 499 T(RightBracket), 500 T(LeftParen), 501 S("abc"), 502 T(Ampersand), 503 S("abc"), 504 T(RightParen) 505 } 506 }, 507 { "0.8 [5:3] (\"abc\" & 'abc')", 11, 508 { F(0.8), 509 T(LeftBracket), 510 I(5), 511 T(Colon), 512 I(3), 513 T(RightBracket), 514 T(LeftParen), 515 S("abc"), 516 T(Ampersand), 517 S("abc"), 518 T(RightParen) 519 } 520 }, 521 { "1.2 ('ABCD')", 4, 522 { F(1.2), 523 T(LeftParen), 524 S("ABCD"), 525 T(RightParen) 526 } 527 }, 528 { ".2 ('ABCD')", 4, 529 { F(0.2), 530 T(LeftParen), 531 S("ABCD"), 532 T(RightParen) 533 } 534 }, 535 { "0. ('ABCD')", 4, 536 { F(0.0), 537 T(LeftParen), 538 S("ABCD"), 539 T(RightParen) 540 } 541 }, 542 // Signed integers 543 { "-1 ('ABCD')", 4, 544 { I(-1), 545 T(LeftParen), 546 S("ABCD"), 547 T(RightParen) 548 } 549 }, 550 { "+1 ('ABCD')", 4, 551 { I(1), 552 T(LeftParen), 553 S("ABCD"), 554 T(RightParen) 555 } 556 }, 557 // Unsigned extended floats 558 { "1E25 ('ABCD')", 4, 559 { F(1e25), 560 T(LeftParen), 561 S("ABCD"), 562 T(RightParen) 563 } 564 }, 565 { "1e25 ('ABCD')", 4, 566 { F(1e25), 567 T(LeftParen), 568 S("ABCD"), 569 T(RightParen) 570 } 571 }, 572 { "1E+25 ('ABCD')", 4, 573 { F(1e25), 574 T(LeftParen), 575 S("ABCD"), 576 T(RightParen) 577 } 578 }, 579 { "1e+25 ('ABCD')", 4, 580 { F(1e25), 581 T(LeftParen), 582 S("ABCD"), 583 T(RightParen) 584 } 585 }, 586 { "1E-25 ('ABCD')", 4, 587 { F(1e-25), 588 T(LeftParen), 589 S("ABCD"), 590 T(RightParen) 591 } 592 }, 593 { "1e-25 ('ABCD')", 4, 594 { F(1e-25), 595 T(LeftParen), 596 S("ABCD"), 597 T(RightParen) 598 } 599 }, 600 // Positive signed extended floats 601 { "+1E25 ('ABCD')", 4, 602 { F(1e25), 603 T(LeftParen), 604 S("ABCD"), 605 T(RightParen) 606 } 607 }, 608 { "+1e25 ('ABCD')", 4, 609 { F(1e25), 610 T(LeftParen), 611 S("ABCD"), 612 T(RightParen) 613 } 614 }, 615 { "+1E+25 ('ABCD')", 4, 616 { F(1e25), 617 T(LeftParen), 618 S("ABCD"), 619 T(RightParen) 620 } 621 }, 622 { "+1e+25 ('ABCD')", 4, 623 { F(1e25), 624 T(LeftParen), 625 S("ABCD"), 626 T(RightParen) 627 } 628 }, 629 { "+1E-25 ('ABCD')", 4, 630 { F(1e-25), 631 T(LeftParen), 632 S("ABCD"), 633 T(RightParen) 634 } 635 }, 636 { "+1e-25 ('ABCD')", 4, 637 { F(1e-25), 638 T(LeftParen), 639 S("ABCD"), 640 T(RightParen) 641 } 642 }, 643 // Negative signed extended floats 644 { "-1E25 ('ABCD')", 4, 645 { F(-1e25), 646 T(LeftParen), 647 S("ABCD"), 648 T(RightParen) 649 } 650 }, 651 { "-1e25 ('ABCD')", 4, 652 { F(-1e25), 653 T(LeftParen), 654 S("ABCD"), 655 T(RightParen) 656 } 657 }, 658 { "-1E+25 ('ABCD')", 4, 659 { F(-1e25), 660 T(LeftParen), 661 S("ABCD"), 662 T(RightParen) 663 } 664 }, 665 { "-1e+25 ('ABCD')", 4, 666 { F(-1e25), 667 T(LeftParen), 668 S("ABCD"), 669 T(RightParen) 670 } 671 }, 672 { "-1E-25 ('ABCD')", 4, 673 { F(-1e-25), 674 T(LeftParen), 675 S("ABCD"), 676 T(RightParen) 677 } 678 }, 679 { "-1e-25 ('ABCD')", 4, 680 { F(-1e-25), 681 T(LeftParen), 682 S("ABCD"), 683 T(RightParen) 684 } 685 }, 686 // Miscellaneous extended floats 687 { ".1E-25 ('ABCD')", 4, 688 { F(0.1e-25), 689 T(LeftParen), 690 S("ABCD"), 691 T(RightParen) 692 } 693 }, 694 { "-.1e-25 ('ABCD')", 4, 695 { F(-0.1e-25), 696 T(LeftParen), 697 S("ABCD"), 698 T(RightParen) 699 } 700 }, 701 // Signed floats 702 { "-1.0 ('ABCD')", 4, 703 { F(-1.0), 704 T(LeftParen), 705 S("ABCD"), 706 T(RightParen) 707 } 708 }, 709 { "+1.0 ('ABCD')", 4, 710 { F(1.0), 711 T(LeftParen), 712 S("ABCD"), 713 T(RightParen) 714 } 715 }, 716 // The uber test 717 { "0 -0 +0 1 -2 +3 0. -0. +0. 1. -2. +3. 0.0 -0.1 +0.2 1.0 -2.1 +3.2 " 718 "0.e0 0.e-1 0.e+2 1.e1 2.e-2 3.e+3 -1.e1 -2.e-2 -3.e+3 +1.e1 +2.e-2 +3.e+3 " 719 "0.012345 1.23456 ( ) [ ] | & : -i " 720 " \"abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \\\" ' \\012\\0\\377\\x00\\x12\\xab\\xCD\\xeF\\x1A\\xb2 \" " 721 " 'abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" \\' \\012\\0\\377\\x00\\x12\\xab\\xCD\\xeF\\x1A\\xb2 ' " 722 " \\000abc_xyz123\"'\"'456 \\xA1a1 \\!\\?\\\\ " 723 " 0x00 0x12 0xabCD 0xaBcD 0x0123456789aBcDeFfEdCbA", 50, 724 { I(0), I(0), I(0), I(1), I(-2), I(3), F(0.0), F(0.0), F(0.0), 725 F(1.0), F(-2.0), F(3.0), F(0.0), F(-0.1), F(0.2), F(1.0), F(-2.1), F(3.2), 726 F(0.0), F(0.0e-1), F(0.0e2), F(1.0e1), F(2.0e-2), F(3.0e3), 727 F(-1.0e1), F(-2.0e-2), F(-3.0e3), F(1.0e1), F(2.0e-2), F(3.0e3), 728 F(0.012345), F(1.23456), T(LeftParen), T(RightParen), T(LeftBracket), 729 T(RightBracket), T(Divider), T(Ampersand), T(Colon), T(CaseInsensitiveFlag), 730 S(std::string("abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" ' \012\0\377\x00\x12\xab\xCD\xeF\x1A\xb2 ", 49)), 731 S(std::string("abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" ' \012\0\377\x00\x12\xab\xCD\xeF\x1A\xb2 ", 49)), 732 S(std::string("\000abc_xyz123\"'\"'456", 18)), 733 S("\241a1"), 734 S("!?\\"), 735 S(std::string("\x00", 1)), S("\x12"), S("\xAB\xCD"), S("\xAB\xCD"), 736 S("\x01\x23\x45\x67\x89\xAB\xCD\xEF\xFE\xDC\xBA") 737 } 738 }, 739 }; 740 741 // Undefine our nasty macros 742 #undef T 743 #undef S 744 #undef I 745 #undef F 746 747 const int testCaseCount = sizeof(testCases) / sizeof(test_case); 748 for (int i = 0; i < testCaseCount; i++) { 749 NextSubTest(); 750 // cout << endl << testCases[i].rule << endl; 751 TokenStream stream; 752 try { 753 stream.SetTo(testCases[i].rule); 754 755 CHK(stream.InitCheck() == B_OK); 756 for (int j = 0; j < testCases[i].tokenCount; j++) { 757 const Token *token = stream.Get(); 758 CHK(token); 759 /* 760 cout << tokenTypeToString(token->Type()) << endl; 761 762 if (token->Type() == CharacterString) 763 cout << " token1 == " << token->String() << endl; 764 if (testCases[i].tokens[j]->Type() == CharacterString) 765 cout << " token2 == " << (testCases[i].tokens[j])->String() << endl; 766 767 if (token->Type() == CharacterString) 768 { 769 const std::string &str = token->String(); 770 printf("parser: "); 771 for (int i = 0; i < str.length(); i++) 772 printf("%x ", str[i]); 773 printf("\n"); 774 } 775 if (testCases[i].tokens[j]->Type() == CharacterString) 776 { 777 const std::string &str = (testCases[i].tokens[j])->String(); 778 printf("tester: "); 779 for (int i = 0; i < str.length(); i++) 780 printf("%x ", str[i]); 781 printf("\n"); 782 } 783 784 switch (token->Type()) { 785 case CharacterString: 786 cout << " string == " << token->String() << endl; 787 break; 788 case Integer: 789 cout << " int == " << token->Int() << endl; 790 break; 791 case FloatingPoint: 792 cout << " float == " << token->Float() << endl; 793 break; 794 } 795 */ 796 CHK(*token == *(testCases[i].tokens[j])); 797 delete testCases[i].tokens[j]; 798 } 799 CHK(stream.IsEmpty()); 800 } catch (Err *e) { 801 CppUnit::Exception *err = new CppUnit::Exception(e->Msg()); 802 delete e; 803 throw *err; 804 } 805 } 806 807 #endif // !TEST_R5 808 } 809 810 // Parser Test 811 void 812 MimeSnifferTest::ParserTest() { 813 // test a couple of valid and invalid rules 814 struct test_case { 815 const char *rule; 816 const char *error; // NULL, if valid 817 } testCases[] = { 818 // valid rules 819 { "1.0 (\"ABCD\")", NULL }, 820 { "1.0 ('ABCD')", NULL }, 821 { " 1.0 ('ABCD') ", NULL }, 822 { "0.8 [0:3] ('ABCDEFG' | 'abcdefghij')", NULL }, 823 { "0.5([10]'ABCD'|[17]'abcd'|[13]'EFGH')", NULL } , 824 { "0.5 \n [0:3] \t ('ABCD' \n | 'abcd' | 'EFGH')", NULL }, 825 { "0.8 [ 0 : 3 ] ('ABCDEFG' | 'abcdefghij')", NULL }, 826 { "0.8 [0:3] ('ABCDEFG' & 'abcdefg')", NULL }, 827 // These two rules are accepted by the R5 sniffer checker, but not 828 // by the parser. Thus, we're not accepting them with either. 829 // { "1.0 ('ABCD') | ('EFGH')", NULL }, 830 // { "1.0 [0:3] ('ABCD') | [2:4] ('EFGH')", NULL }, 831 { "0.8 [0:3] (\\077Mkl0x34 & 'abcdefgh')", NULL }, 832 { "0.8 [0:3] (\\077034 & 'abcd')", NULL }, 833 { "0.8 [0:3] (\\077\\034 & 'ab')", NULL }, 834 { "0.8 [0:3] (\\77\\034 & 'ab')", NULL }, 835 { "0.8 [0:3] (\\7 & 'a')", NULL }, 836 { "0.8 [0:3] (\"\\17\" & 'a')", NULL }, 837 { "0.8 [0:3] ('\\17' & 'a')", NULL }, 838 { "0.8 [0:3] (\\g & 'a')", NULL }, 839 { "0.8 [0:3] (\\g&\\b)", NULL }, 840 { "0.8 [0:3] (\\g\\&b & 'abc')", NULL }, 841 { "0.8 [0:3] (0x3457 & 'ab')", NULL }, 842 { "0.8 [0:3] (0xA4b7 & 'ab')", NULL }, 843 { "0.8 [0:3] ('ab\"' & 'abc')", NULL }, 844 { "0.8 [0:3] (\"ab\\\"\" & 'abc')", NULL }, 845 { "0.8 [0:3] (\"ab\\A\" & 'abc')", NULL }, 846 { "0.8 [0:3] (\"ab'\" & 'abc')", NULL }, 847 { "0.8 [0:3] (\"ab\\\\\" & 'abc')", NULL }, 848 { "0.8 [-5:-3] (\"abc\" & 'abc')", NULL }, 849 // Also accepted by the R5 sniffer but not the R5 parser. We reject. 850 // { "0.8 [5:3] (\"abc\" & 'abc')", NULL }, 851 { "1.0 ('ABCD')", NULL }, 852 { ".2 ('ABCD')", NULL }, 853 { "0. ('ABCD')", NULL }, 854 { "1 ('ABCD')", NULL }, 855 { "+1 ('ABCD')", NULL }, 856 // We accept extended notation floating point numbers now, but 857 // not invalid priorities. 858 // { "1E25 ('ABCD')", NULL }, 859 // { "1e25 ('ABCD')", NULL }, 860 // R5 chokes on this rule :-( 861 #if !TEST_R5 862 { "1e-3 ('ABCD')", NULL }, 863 #endif 864 { "+.003e2 ('ABCD')", NULL }, 865 // This one too. See how much better our parser is? :-) 866 #if !TEST_R5 867 { "-123e-9999999999 ('ABCD')", NULL }, // Hooray for the stunning accuracy of floating point ;-) 868 #endif 869 // invalid rules 870 { "0.0 ('')", 871 "Sniffer pattern error: illegal empty pattern" }, 872 { "('ABCD')", 873 "Sniffer pattern error: match level expected" }, 874 { "[0:3] ('ABCD')", 875 "Sniffer pattern error: match level expected" }, 876 { "0.8 [0:3] ( | 'abcdefghij')", 877 "Sniffer pattern error: missing pattern" }, 878 { "0.8 [0:3] ('ABCDEFG' | )", 879 "Sniffer pattern error: missing pattern" }, 880 { "[0:3] ('ABCD')", 881 "Sniffer pattern error: match level expected" }, 882 { "1.0 (ABCD')", 883 #if TEST_R5 884 "Sniffer pattern error: misplaced single quote" 885 #else 886 "Sniffer pattern error: invalid character 'A'" 887 #endif 888 }, 889 { "1.0 ('ABCD)", 890 #if TEST_R5 891 "Sniffer pattern error: unterminated rule" 892 #else 893 "Sniffer pattern error: unterminated single-quoted string" 894 #endif 895 }, 896 { "1.0 (ABCD)", 897 #if TEST_R5 898 "Sniffer pattern error: missing pattern" 899 #else 900 "Sniffer pattern error: invalid character 'A'" 901 #endif 902 }, 903 { "1.0 (ABCD 'ABCD')", 904 #if TEST_R5 905 "Sniffer pattern error: missing pattern" 906 #else 907 "Sniffer pattern error: invalid character 'A'" 908 #endif 909 }, 910 { "1.0 'ABCD')", 911 #if TEST_R5 912 "Sniffer pattern error: missing pattern" 913 #else 914 "Sniffer pattern error: missing pattern" 915 #endif 916 }, 917 { "1.0 ('ABCD'", 918 "Sniffer pattern error: unterminated rule" }, 919 { "1.0 'ABCD'", 920 #if TEST_R5 921 "Sniffer pattern error: missing sniff pattern" 922 #else 923 "Sniffer pattern error: missing pattern" 924 #endif 925 }, 926 { "0.5 [0:3] ('ABCD' | 'abcd' | [13] 'EFGH')", 927 "Sniffer pattern error: missing pattern" }, 928 { "0.5('ABCD'|'abcd'|[13]'EFGH')", 929 "Sniffer pattern error: missing pattern" }, 930 { "0.5[0:3]([10]'ABCD'|[17]'abcd'|[13]'EFGH')", 931 "Sniffer pattern error: missing pattern" }, 932 { "0.8 [0x10:3] ('ABCDEFG' | 'abcdefghij')", 933 "Sniffer pattern error: pattern offset expected" }, 934 { "0.8 [0:A] ('ABCDEFG' | 'abcdefghij')", 935 #if TEST_R5 936 "Sniffer pattern error: pattern range end expected" 937 #else 938 "Sniffer pattern error: invalid character 'A'" 939 #endif 940 }, 941 { "0.8 [0:3] ('ABCDEFG' & 'abcdefghij')", 942 "Sniffer pattern error: pattern and mask lengths do not match" }, 943 { "0.8 [0:3] ('ABCDEFG' & 'abcdefg' & 'xyzwmno')", 944 #if TEST_R5 945 "Sniffer pattern error: unterminated rule" 946 #else 947 "Sniffer pattern error: expecting '|', ')', or possibly '&'" 948 #endif 949 }, 950 { "0.8 [0:3] (\\g&b & 'a')", 951 #if TEST_R5 952 "Sniffer pattern error: missing mask" 953 #else 954 "Sniffer pattern error: invalid character 'b'" 955 #endif 956 }, 957 { "0.8 [0:3] (\\19 & 'a')", 958 "Sniffer pattern error: pattern and mask lengths do not match" }, 959 { "0.8 [0:3] (0x345 & 'ab')", 960 "Sniffer pattern error: bad hex literal" }, 961 { "0.8 [0:3] (0x3457M & 'abc')", 962 #if TEST_R5 963 "Sniffer pattern error: expecting '|' or '&'" 964 #else 965 "Sniffer pattern error: invalid character 'M'" 966 #endif 967 }, 968 { "0.8 [0:3] (0x3457\\7 & 'abc')", 969 #if TEST_R5 970 "Sniffer pattern error: expecting '|' or '&'" 971 #else 972 "Sniffer pattern error: expecting '|', ')', or possibly '&'" 973 #endif 974 }, 975 976 // Miscellaneous tests designed to hit every remaining 977 // relevant "throw new Err()" statement in the scanner. 978 // R5 versions will come later... 979 #if !TEST_R5 980 { "\x03 ", "Sniffer pattern error: invalid character '\x03'" }, 981 { "\"blah", "Sniffer pattern error: unterminated double-quoted string" }, 982 { "0xThisIsNotAHexCode", "Sniffer pattern error: incomplete hex code" }, 983 { "0xAndNeitherIsThis:-)", "Sniffer pattern error: bad hex literal" }, 984 { ".NotAFloat", "Sniffer pattern error: incomplete floating point number" }, 985 { "-NotANumber", "Sniffer pattern error: incomplete signed number" }, 986 { "+NotANumber", "Sniffer pattern error: incomplete signed number" }, 987 988 { "0.0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 989 { "1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 990 { ".0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 991 { "0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 992 { "1e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 993 { "-1e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 994 { "+1e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 995 { "-1.e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 996 { "+1.e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 997 { "-1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 998 { "+1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 999 1000 { "0.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1001 { "1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1002 { ".0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1003 { "0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1004 { "1e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1005 { "-1e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1006 { "+1e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1007 { "-1.e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1008 { "+1.e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1009 { "-1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1010 { "+1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1011 1012 { "0.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1013 { "1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1014 { ".0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1015 { "0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1016 { "1e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1017 { "-1e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1018 { "+1e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1019 { "-1.e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1020 { "+1.e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1021 { "-1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1022 { "+1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1023 1024 { "\\11\\", "Sniffer pattern error: incomplete escape sequence" }, 1025 { "\"Escape!! \\", "Sniffer pattern error: incomplete escape sequence" }, 1026 { "'Escape!! \\", "Sniffer pattern error: incomplete escape sequence" }, 1027 1028 { "\\x", "Sniffer pattern error: incomplete escaped hex code" }, 1029 { "\\xNotAHexCode", "Sniffer pattern error: incomplete escaped hex code" }, 1030 { "\\xAlsoNotAHexCode", "Sniffer pattern error: incomplete escaped hex code" }, 1031 { "\\x0", "Sniffer pattern error: incomplete escaped hex code" }, 1032 1033 { "1.0 (\\377)", NULL }, 1034 { "\\400", "Sniffer pattern error: invalid octal literal (octals must be between octal 0 and octal 377 inclusive)" }, 1035 { "\\777", "Sniffer pattern error: invalid octal literal (octals must be between octal 0 and octal 377 inclusive)" }, 1036 { "1.0 (\\800)", NULL }, 1037 1038 { NULL, "Sniffer pattern error: NULL pattern" }, 1039 1040 { "-2", "Sniffer pattern error: invalid priority" }, 1041 { "+2", "Sniffer pattern error: invalid priority" }, 1042 1043 { "1.0", "Sniffer pattern error: missing expression" }, 1044 #endif // !TEST_R5 1045 1046 1047 // { "1E-25 ('ABCD')", "Sniffer pattern error: missing pattern" }, 1048 // I don't currently understand what's wrong with the above rule... R5 1049 // rejects it though, for some reason. 1050 }; 1051 const int testCaseCount = sizeof(testCases) / sizeof(test_case); 1052 BMimeType type; 1053 for (int32 i = 0; i < testCaseCount; i++) { 1054 //cout << endl << "----------------------------------------------------------------------" << endl; 1055 NextSubTest(); 1056 test_case &testCase = testCases[i]; 1057 //cout << endl << testCase.rule << endl; 1058 BString parseError; 1059 status_t error = BMimeType::CheckSnifferRule(testCase.rule, 1060 &parseError); 1061 if (testCase.error == NULL) { 1062 if (error != B_OK) { 1063 cout << endl << "This sucker's gonna fail..." 1064 << endl << "RULE: '" << testCase.rule << "'" 1065 << endl << "ERROR: " 1066 << endl << parseError.String() 1067 << endl; 1068 } 1069 CHK(error == B_OK); 1070 } else { 1071 1072 // if (parseError.FindLast(testCase.error) >= 0) { 1073 // cout << endl << parseError.String(); // << endl; 1074 // cout << endl << testCase.error << endl; 1075 // } 1076 // cout << endl << parseError.String(); // << endl; 1077 /* 1078 if (parseError.FindLast(testCase.error) >= 0) { 1079 cout << " -- OKAY" << endl; 1080 } else { 1081 cout << " -- NOGO" << endl; 1082 cout << testCase.error << endl; 1083 } 1084 */ 1085 if (testCase.rule && error != B_BAD_MIME_SNIFFER_RULE) { 1086 printf("rule: `%s'", testCase.rule); 1087 RES(error); 1088 } 1089 CHK(error == (testCase.rule ? B_BAD_MIME_SNIFFER_RULE : B_BAD_VALUE)); 1090 CHK(parseError.FindLast(testCase.error) >= 0); 1091 } 1092 } 1093 } 1094 1095 void dumpStr(const std::string &string, const char *label = NULL) { 1096 if (label) 1097 printf("%s: ", label); 1098 for (uint i = 0; i < string.length(); i++) 1099 printf("%x ", string[i]); 1100 printf("\n"); 1101 } 1102 1103 1104 void 1105 MimeSnifferTest::SnifferTest() { 1106 #if TEST_R5 1107 Outputf("(no tests actually performed for R5 version)\n"); 1108 #else // TEST_R5 1109 const char *rules[] = { 1110 // General tests 1111 "1.0 ('#include')", 1112 "0.0 [0:32] ('#include')", 1113 "0.e-230 [0:32] (\\#include | \\#ifndef)", 1114 ".2 ([0:32] \"#include\" | [0] '#define' | [0:200] 'int main(')", 1115 "1.0 [0:32] ('<html>' | '<head>' | '<body>')", 1116 // Range tests 1117 "1.0 [0:9] ('rock')", 1118 "1.0 ([0:9] 'roll')", 1119 "1.0 ([0:9] 'rock' | [0:9] 'roll')", 1120 "1.0 [0:9] ('rock' | 'roll')", 1121 "1.0 ([0] 'rock')", 1122 "1.0 ([0] 'rock' | [0:9] 'roll')", 1123 "1.0 ([9] 'rock' | [10] 'roll')", 1124 // Mask, octal, and hex tests 1125 "1.0 (\\xFF\\xFF & '\\xF0\\xF0')", 1126 "1.0 ('\\33\\34' & \\xFF\\x00)", 1127 "1.0 (\\33\\34 & \"\\x00\\xFF\")", 1128 "1.0 (\\xFF & \\x05)", 1129 // Conjunctions 1130 "1.0 ([4] 'rock') ([9] 'roll')", 1131 "1.0 [5] ('roll') [10] ('rock')", 1132 "1.0 [4] ('rock' | 'roll') ([9] 'rock' | [10] 'roll')", 1133 // Case insensitivity tests 1134 "1.0 [4] (-i 'Rock' | 'Roll')", 1135 "1.0 [9] ('Rock' | -i 'Roll')", 1136 "1.0 (-i [4] 'Rock' | [9] 'Roll')", 1137 "1.0 ([9] 'Rock' | -i [4] 'Roll')", 1138 }; 1139 const int ruleCount = sizeof(rules)/sizeof(char*); 1140 struct test_case { 1141 const std::string data; 1142 const bool result[ruleCount]; 1143 } tests[] = { 1144 1145 //------------------------------ 1146 { 1147 "#include <stdio.h> \n\ 1148 #include <stdlib.h> \n\ 1149 \n\ 1150 int main() { \n\ 1151 return 0; \n\ 1152 } \n\ 1153 \n\ 1154 ", { true, true, true, true, false, 1155 false, false, false, false, false, false, false, 1156 false, false, false, false, 1157 false, false, false, 1158 false, false, false, false 1159 } 1160 }, 1161 //------------------------------ 1162 { 1163 " #include <stdio.h> \n\ 1164 #include <stdlib.h> \n\ 1165 \n\ 1166 int main() { \n\ 1167 return 0; \n\ 1168 } \n\ 1169 \n\ 1170 ", { false, true, true, true, false, 1171 false, false, false, false, false, false, false, 1172 false, false, false, false, 1173 false, false, false, 1174 false, false, false, false 1175 } 1176 }, 1177 //------------------------------ 1178 { 1179 "#ifndef SOME_TEST_H \n\ 1180 #define SOME_TEST_H \n\ 1181 \n\ 1182 void main(); \n\ 1183 \n\ 1184 #endif // SOME_TEST_H \n\ 1185 \n\ 1186 ", { false, false, true, false, false, 1187 false, false, false, false, false, false, false, 1188 false, false, false, false, 1189 false, false, false, 1190 false, false, false, false 1191 } 1192 }, 1193 //------------------------------ 1194 { 1195 "//------------------ \n\ 1196 // SomeTest.cpp \n\ 1197 //------------------ \n\ 1198 #include <stdio.h> \n\ 1199 \n\ 1200 int main() { \n\ 1201 return 0; \n\ 1202 } \n\ 1203 \n\ 1204 ", { false, false, false, true, false, 1205 false, false, false, false, false, false, false, 1206 false, false, false, true, 1207 // ^^^^ <= coincedence 1208 false, false, false, 1209 false, false, false, false 1210 } 1211 }, 1212 //------------------------------ 1213 { 1214 "<html> \n\ 1215 <body bgcolor='#ffffff'> \n\ 1216 HTML is boring as hell <br> \n\ 1217 when i write it too much <br> \n\ 1218 my head starts to swell <br> \n\ 1219 <br> \n\ 1220 HTML is stupid and dumb <br> \n\ 1221 running through traffic <br> \n\ 1222 is ten times as fun <br> \n\ 1223 </body> \n\ 1224 </html> \n\ 1225 ", { false, false, false, false, true, 1226 false, false, false, false, false, false, false, 1227 false, false, false, false, 1228 false, false, false, 1229 false, false, false, false 1230 } 1231 }, 1232 //--------- <= Ten characters in 1233 { 1234 " rock&roll", // 5,10 1235 { false, false, false, false, false, 1236 true, false, true, true, false, false, true, 1237 false, false, false, false, 1238 false, false, false, 1239 false, false, false, false 1240 } 1241 }, 1242 //--------- <= Ten characters in 1243 { 1244 " rock&roll", // 4,9 1245 { false, false, false, false, false, 1246 true, true, true, true, false, true, false, 1247 false, false, false, false, 1248 true, false, false, 1249 true, true, true, false 1250 } 1251 }, 1252 //--------- <= Ten characters in 1253 { 1254 " roll&rock", // 5,10 1255 { false, false, false, false, false, 1256 false, true, true, true, false, true, false, 1257 false, false, false, false, 1258 false, true, false, 1259 false, false, false, false 1260 } 1261 }, 1262 //--------- <= Ten characters in 1263 { 1264 " roll&rock", // 4,9 1265 { false, false, false, false, false, 1266 true, true, true, true, false, true, true, 1267 false, false, false, false, 1268 false, false, true, 1269 true, true, false, true 1270 } 1271 }, 1272 //--------- <= Ten characters in 1273 { 1274 " ROCK&ROLL", // 4,9 1275 { false, false, false, false, false, 1276 false, false, false, false, false, false, false, 1277 false, false, false, false, 1278 false, false, false, 1279 true, true, true, false 1280 } 1281 }, 1282 //--------- <= Ten characters in 1283 { 1284 " rOlL&RoCk", // 4,9 1285 { false, false, false, false, false, 1286 false, false, false, false, false, false, false, 1287 false, false, false, false, 1288 false, false, false, 1289 true, true, false, true 1290 } 1291 }, 1292 //------------------------------ 1293 { 1294 "\xFF\xFF FF FF", 1295 { false, false, false, false, false, 1296 false, false, false, false, false, false, false, 1297 true, false, false, true, 1298 false, false, false, 1299 false, false, false, false 1300 } 1301 }, 1302 //------------------------------ 1303 { 1304 "\xFA\xFA FA FA", 1305 { false, false, false, false, false, 1306 false, false, false, false, false, false, false, 1307 true, false, false, false, 1308 false, false, false, 1309 false, false, false, false 1310 } 1311 }, 1312 //------------------------------ 1313 { 1314 "\xAF\xAF AF AF", 1315 { false, false, false, false, false, 1316 false, false, false, false, false, false, false, 1317 false, false, false, true, 1318 false, false, false, 1319 false, false, false, false 1320 } 1321 }, 1322 //------------------------------ 1323 { 1324 std::string("\033\000 033 000", 10), // Otherwise, it thinks the NULL is the end of the string 1325 { false, false, false, false, false, 1326 false, false, false, false, false, false, false, 1327 false, true, false, false, 1328 false, false, false, 1329 false, false, false, false 1330 } 1331 }, 1332 //------------------------------ 1333 { 1334 std::string("\000\034 000 034", 10), // Otherwise, it thinks the NULL is the end of the string 1335 { false, false, false, false, false, 1336 false, false, false, false, false, false, false, 1337 false, false, true, false, 1338 false, false, false, 1339 false, false, false, false 1340 } 1341 }, 1342 //------------------------------ 1343 { 1344 "\033\034 033 034", 1345 { false, false, false, false, false, 1346 false, false, false, false, false, false, false, 1347 false, true, true, false, 1348 false, false, false, 1349 false, false, false, false 1350 } 1351 }, 1352 }; // tests[] 1353 const int32 testCount = sizeof(tests)/sizeof(test_case); 1354 1355 for (int i = 0; i < testCount; i++) { 1356 if (i > 0) 1357 NextSubTestBlock(); 1358 test_case &test = tests[i]; 1359 // cout << "--------------------------------------------------------------------------------" << endl; 1360 // cout << test.data << endl; 1361 1362 for (int j = 0; j < ruleCount; j++) { 1363 NextSubTest(); 1364 // cout << "############################################################" << endl; 1365 // cout << rules[j] << endl; 1366 // cout << test.result[j] << endl; 1367 Rule rule; 1368 BString errorMsg; 1369 status_t err = parse(rules[j], &rule, &errorMsg); 1370 // dumpStr(test.data, "str "); 1371 if (err) { 1372 // cout << "PARSE FAILURE!!!" << endl; 1373 // cout << errorMsg.String() << endl; 1374 } 1375 CHK(err == B_OK); 1376 if (!err) { 1377 BMallocIO data; 1378 data.Write(test.data.data(), test.data.length());//strlen(test.data)); 1379 bool match = rule.Sniff(&data); 1380 // cout << match << endl; 1381 // cout << "match == " << (match ? "yes" : "no") << ", " 1382 // << ((match == test.result[j]) ? "SUCCESS" : "FAILURE") << endl; 1383 CHK(match == test.result[j]); 1384 } 1385 } 1386 } 1387 #endif // !TEST_R5 1388 } 1389