1 // MimeSnifferTest.cpp 2 3 #include "MimeSnifferTest.h" 4 5 #include <cppunit/Test.h> 6 #include <cppunit/TestSuite.h> 7 #include <cppunit/TestCaller.h> 8 #include <sniffer/Rule.h> 9 #include <sniffer/Parser.h> 10 #include <DataIO.h> 11 #include <Mime.h> 12 #include <String.h> // BString 13 #include <TestUtils.h> 14 15 #include <stdio.h> 16 #include <string> 17 18 using namespace BPrivate::Storage::Sniffer; 19 20 // Suite 21 CppUnit::Test* 22 MimeSnifferTest::Suite() { 23 CppUnit::TestSuite *suite = new CppUnit::TestSuite(); 24 typedef CppUnit::TestCaller<MimeSnifferTest> TC; 25 26 suite->addTest( new TC("Mime Sniffer::Scanner Test", 27 &MimeSnifferTest::ScannerTest) ); 28 suite->addTest( new TC("Mime Sniffer::Parser Test", 29 &MimeSnifferTest::ParserTest) ); 30 suite->addTest( new TC("Mime Sniffer::Sniffer Test", 31 &MimeSnifferTest::SnifferTest) ); 32 33 return suite; 34 } 35 36 // Scanner Test 37 void 38 MimeSnifferTest::ScannerTest() { 39 #if TEST_R5 40 Outputf("(no tests actually performed for R5 version)\n"); 41 #else // TEST_R5 42 43 44 // tests: 45 // Internal TokenStream and CharStream classes 46 47 // Define some useful macros for dynamically allocating 48 // various Token classes 49 #define T(type) (new Token(type, -1)) 50 #define S(str) (new StringToken(str, -1)) 51 #define I(val) (new IntToken(val, -1)) 52 #define F(val) (new FloatToken(val, -1)) 53 54 struct test_case { 55 const char *rule; 56 int tokenCount; 57 Token *tokens[256]; 58 } testCases[] = { 59 { "'Hey'[]:", 4, 60 { S("Hey"), 61 T(LeftBracket), 62 T(RightBracket), 63 T(Colon) 64 } 65 }, 66 { "1", 1, { I(1) } }, 67 { "1.0", 1, { F(1.0) } }, 68 69 { "1.0 (\"ABCD\")", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } }, 70 { "1.0 ('ABCD')", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } }, 71 { " 1.0 ('ABCD') ", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } }, 72 { "0.8 [0:3] ('ABCDEFG' | 'abcdefghij')", 11, 73 { F(0.8), 74 T(LeftBracket), 75 I(0), 76 T(Colon), 77 I(3), 78 T(RightBracket), 79 T(LeftParen), 80 S("ABCDEFG"), 81 T(Divider), 82 S("abcdefghij"), 83 T(RightParen) 84 } 85 }, 86 { "0.5([10]'ABCD'|[17]'abcd'|[13]'EFGH')", 17, 87 { F(0.5), 88 T(LeftParen), 89 T(LeftBracket), 90 I(10), 91 T(RightBracket), 92 S("ABCD"), 93 T(Divider), 94 T(LeftBracket), 95 I(17), 96 T(RightBracket), 97 S("abcd"), 98 T(Divider), 99 T(LeftBracket), 100 I(13), 101 T(RightBracket), 102 S("EFGH"), 103 T(RightParen) 104 } 105 }, 106 { "0.5 \n [0:3] \t ('ABCD' \n | 'abcd' | 'EFGH')", 13, 107 { F(0.5), 108 T(LeftBracket), 109 I(0), 110 T(Colon), 111 I(3), 112 T(RightBracket), 113 T(LeftParen), 114 S("ABCD"), 115 T(Divider), 116 S("abcd"), 117 T(Divider), 118 S("EFGH"), 119 T(RightParen) 120 } 121 }, 122 { "0.8 [ 0 : 3 ] ('ABCDEFG' | 'abcdefghij')", 11, 123 { F(0.8), 124 T(LeftBracket), 125 I(0), 126 T(Colon), 127 I(3), 128 T(RightBracket), 129 T(LeftParen), 130 S("ABCDEFG"), 131 T(Divider), 132 S("abcdefghij"), 133 T(RightParen) 134 } 135 }, 136 { "0.8 [0:3] ('ABCDEFG' & 'abcdefg')", 11, 137 { F(0.8), 138 T(LeftBracket), 139 I(0), 140 T(Colon), 141 I(3), 142 T(RightBracket), 143 T(LeftParen), 144 S("ABCDEFG"), 145 T(Ampersand), 146 S("abcdefg"), 147 T(RightParen) 148 } 149 }, 150 { "1.0 ('ABCD') | ('EFGH')", 8, 151 { F(1.0), 152 T(LeftParen), 153 S("ABCD"), 154 T(RightParen), 155 T(Divider), 156 T(LeftParen), 157 S("EFGH"), 158 T(RightParen) 159 } 160 }, 161 { "1.0 [0:3] ('ABCD') | [2:4] ('EFGH')", 18, 162 { F(1.0), 163 T(LeftBracket), 164 I(0), 165 T(Colon), 166 I(3), 167 T(RightBracket), 168 T(LeftParen), 169 S("ABCD"), 170 T(RightParen), 171 T(Divider), 172 T(LeftBracket), 173 I(2), 174 T(Colon), 175 I(4), 176 T(RightBracket), 177 T(LeftParen), 178 S("EFGH"), 179 T(RightParen) 180 } 181 }, 182 { "0.8 [0:4] (\\077Mkj0x34 & 'abcdefgh')", 11, 183 { F(0.8), 184 T(LeftBracket), 185 I(0), 186 T(Colon), 187 I(4), 188 T(RightBracket), 189 T(LeftParen), 190 S("\077Mkj0x34"), 191 T(Ampersand), 192 S("abcdefgh"), 193 T(RightParen) 194 } 195 }, 196 { "0.8 [0:4] (\\077Mkj\\x34 & 'abcdefgh')", 11, 197 { F(0.8), 198 T(LeftBracket), 199 I(0), 200 T(Colon), 201 I(4), 202 T(RightBracket), 203 T(LeftParen), 204 S("\077Mkj\x34"), 205 T(Ampersand), 206 S("abcdefgh"), 207 T(RightParen) 208 } 209 }, 210 { "0.8 [0:3] (\\077034 & 'abcd')", 11, 211 { F(0.8), 212 T(LeftBracket), 213 I(0), 214 T(Colon), 215 I(3), 216 T(RightBracket), 217 T(LeftParen), 218 S("\077034"), 219 T(Ampersand), 220 S("abcd"), 221 T(RightParen) 222 } 223 }, 224 { "0.8 [0:3] (\\077\\034 & 'ab')", 11, 225 { F(0.8), 226 T(LeftBracket), 227 I(0), 228 T(Colon), 229 I(3), 230 T(RightBracket), 231 T(LeftParen), 232 S("\077\034"), 233 T(Ampersand), 234 S("ab"), 235 T(RightParen) 236 } 237 }, 238 { "0.8 [0:3] (\\77\\034 & 'ab')", 11, 239 { F(0.8), 240 T(LeftBracket), 241 I(0), 242 T(Colon), 243 I(3), 244 T(RightBracket), 245 T(LeftParen), 246 S("\077\034"), 247 T(Ampersand), 248 S("ab"), 249 T(RightParen) 250 } 251 }, 252 { "0.8 [0:3] (\\7 & 'a')", 11, 253 { F(0.8), 254 T(LeftBracket), 255 I(0), 256 T(Colon), 257 I(3), 258 T(RightBracket), 259 T(LeftParen), 260 S("\007"), 261 T(Ampersand), 262 S("a"), 263 T(RightParen) 264 } 265 }, 266 { "0.8 [0:3] (\"\\17\" & 'a')", 11, 267 { F(0.8), 268 T(LeftBracket), 269 I(0), 270 T(Colon), 271 I(3), 272 T(RightBracket), 273 T(LeftParen), 274 S("\017"), 275 T(Ampersand), 276 S("a"), 277 T(RightParen) 278 } 279 }, 280 { "0.8 [0:3] ('\\17' & 'a')", 11, 281 { F(0.8), 282 T(LeftBracket), 283 I(0), 284 T(Colon), 285 I(3), 286 T(RightBracket), 287 T(LeftParen), 288 S("\017"), 289 T(Ampersand), 290 S("a"), 291 T(RightParen) 292 } 293 }, 294 { "0.8 [0:3] (\\g & 'a')", 11, 295 { F(0.8), 296 T(LeftBracket), 297 I(0), 298 T(Colon), 299 I(3), 300 T(RightBracket), 301 T(LeftParen), 302 S("g"), 303 T(Ampersand), 304 S("a"), 305 T(RightParen) 306 } 307 }, 308 { "0.8 [0:3] (\\g&\\b)", 11, 309 { F(0.8), 310 T(LeftBracket), 311 I(0), 312 T(Colon), 313 I(3), 314 T(RightBracket), 315 T(LeftParen), 316 S("g"), 317 T(Ampersand), 318 S("\b"), 319 T(RightParen) 320 } 321 }, 322 { "0.8 [0:3] (\\g\\&b & 'abc')", 11, 323 { F(0.8), 324 T(LeftBracket), 325 I(0), 326 T(Colon), 327 I(3), 328 T(RightBracket), 329 T(LeftParen), 330 S("g&b"), 331 T(Ampersand), 332 S("abc"), 333 T(RightParen) 334 } 335 }, 336 { "0.8 [0:3] (0x3457 & 'ab')", 11, 337 { F(0.8), 338 T(LeftBracket), 339 I(0), 340 T(Colon), 341 I(3), 342 T(RightBracket), 343 T(LeftParen), 344 S("\x34\x57"), 345 T(Ampersand), 346 S("ab"), 347 T(RightParen) 348 } 349 }, 350 { "0.8 [0:3] (\\x34\\x57 & 'ab')", 11, 351 { F(0.8), 352 T(LeftBracket), 353 I(0), 354 T(Colon), 355 I(3), 356 T(RightBracket), 357 T(LeftParen), 358 S("\x34\x57"), 359 T(Ampersand), 360 S("ab"), 361 T(RightParen) 362 } 363 }, 364 { "0.8 [0:3] (0xA4b7 & 'ab')", 11, 365 { F(0.8), 366 T(LeftBracket), 367 I(0), 368 T(Colon), 369 I(3), 370 T(RightBracket), 371 T(LeftParen), 372 S("\xA4\xb7"), 373 T(Ampersand), 374 S("ab"), 375 T(RightParen) 376 } 377 }, 378 { "0.8 [0:3] (\\xA4\\xb7 & 'ab')", 11, 379 { F(0.8), 380 T(LeftBracket), 381 I(0), 382 T(Colon), 383 I(3), 384 T(RightBracket), 385 T(LeftParen), 386 S("\xA4\xb7"), 387 T(Ampersand), 388 S("ab"), 389 T(RightParen) 390 } 391 }, 392 { "0.8 [0:3] (\"\\xA4\\xb7\" & 'ab')", 11, 393 { F(0.8), 394 T(LeftBracket), 395 I(0), 396 T(Colon), 397 I(3), 398 T(RightBracket), 399 T(LeftParen), 400 S("\xA4\xb7"), 401 T(Ampersand), 402 S("ab"), 403 T(RightParen) 404 } 405 }, 406 { "0.8 [0:3] (\'\\xA4\\xb7\' & 'ab')", 11, 407 { F(0.8), 408 T(LeftBracket), 409 I(0), 410 T(Colon), 411 I(3), 412 T(RightBracket), 413 T(LeftParen), 414 S("\xA4\xb7"), 415 T(Ampersand), 416 S("ab"), 417 T(RightParen) 418 } 419 }, 420 { "0.8 [0:3] ('ab\"' & 'abc')", 11, 421 { F(0.8), 422 T(LeftBracket), 423 I(0), 424 T(Colon), 425 I(3), 426 T(RightBracket), 427 T(LeftParen), 428 S("ab\""), 429 T(Ampersand), 430 S("abc"), 431 T(RightParen) 432 } 433 }, 434 { "0.8 [0:3] (\"ab\\\"\" & 'abc')", 11, 435 { F(0.8), 436 T(LeftBracket), 437 I(0), 438 T(Colon), 439 I(3), 440 T(RightBracket), 441 T(LeftParen), 442 S("ab\""), 443 T(Ampersand), 444 S("abc"), 445 T(RightParen) 446 } 447 }, 448 { "0.8 [0:3] (\"ab\\A\" & 'abc')", 11, 449 { F(0.8), 450 T(LeftBracket), 451 I(0), 452 T(Colon), 453 I(3), 454 T(RightBracket), 455 T(LeftParen), 456 S("abA"), 457 T(Ampersand), 458 S("abc"), 459 T(RightParen) 460 } 461 }, 462 { "0.8 [0:3] (\"ab'\" & 'abc')", 11, 463 { F(0.8), 464 T(LeftBracket), 465 I(0), 466 T(Colon), 467 I(3), 468 T(RightBracket), 469 T(LeftParen), 470 S("ab'"), 471 T(Ampersand), 472 S("abc"), 473 T(RightParen) 474 } 475 }, 476 { "0.8 [0:3] (\"ab\\\\\" & 'abc')", 11, 477 { F(0.8), 478 T(LeftBracket), 479 I(0), 480 T(Colon), 481 I(3), 482 T(RightBracket), 483 T(LeftParen), 484 S("ab\\"), 485 T(Ampersand), 486 S("abc"), 487 T(RightParen) 488 } 489 }, 490 { "0.8 [-5:-3] (\"abc\" & 'abc')", 11, 491 { F(0.8), 492 T(LeftBracket), 493 I(-5), 494 T(Colon), 495 I(-3), 496 T(RightBracket), 497 T(LeftParen), 498 S("abc"), 499 T(Ampersand), 500 S("abc"), 501 T(RightParen) 502 } 503 }, 504 { "0.8 [5:3] (\"abc\" & 'abc')", 11, 505 { F(0.8), 506 T(LeftBracket), 507 I(5), 508 T(Colon), 509 I(3), 510 T(RightBracket), 511 T(LeftParen), 512 S("abc"), 513 T(Ampersand), 514 S("abc"), 515 T(RightParen) 516 } 517 }, 518 { "1.2 ('ABCD')", 4, 519 { F(1.2), 520 T(LeftParen), 521 S("ABCD"), 522 T(RightParen) 523 } 524 }, 525 { ".2 ('ABCD')", 4, 526 { F(0.2), 527 T(LeftParen), 528 S("ABCD"), 529 T(RightParen) 530 } 531 }, 532 { "0. ('ABCD')", 4, 533 { F(0.0), 534 T(LeftParen), 535 S("ABCD"), 536 T(RightParen) 537 } 538 }, 539 // Signed integers 540 { "-1 ('ABCD')", 4, 541 { I(-1), 542 T(LeftParen), 543 S("ABCD"), 544 T(RightParen) 545 } 546 }, 547 { "+1 ('ABCD')", 4, 548 { I(1), 549 T(LeftParen), 550 S("ABCD"), 551 T(RightParen) 552 } 553 }, 554 // Unsigned extended floats 555 { "1E25 ('ABCD')", 4, 556 { F(1e25), 557 T(LeftParen), 558 S("ABCD"), 559 T(RightParen) 560 } 561 }, 562 { "1e25 ('ABCD')", 4, 563 { F(1e25), 564 T(LeftParen), 565 S("ABCD"), 566 T(RightParen) 567 } 568 }, 569 { "1E+25 ('ABCD')", 4, 570 { F(1e25), 571 T(LeftParen), 572 S("ABCD"), 573 T(RightParen) 574 } 575 }, 576 { "1e+25 ('ABCD')", 4, 577 { F(1e25), 578 T(LeftParen), 579 S("ABCD"), 580 T(RightParen) 581 } 582 }, 583 { "1E-25 ('ABCD')", 4, 584 { F(1e-25), 585 T(LeftParen), 586 S("ABCD"), 587 T(RightParen) 588 } 589 }, 590 { "1e-25 ('ABCD')", 4, 591 { F(1e-25), 592 T(LeftParen), 593 S("ABCD"), 594 T(RightParen) 595 } 596 }, 597 // Positive signed extended floats 598 { "+1E25 ('ABCD')", 4, 599 { F(1e25), 600 T(LeftParen), 601 S("ABCD"), 602 T(RightParen) 603 } 604 }, 605 { "+1e25 ('ABCD')", 4, 606 { F(1e25), 607 T(LeftParen), 608 S("ABCD"), 609 T(RightParen) 610 } 611 }, 612 { "+1E+25 ('ABCD')", 4, 613 { F(1e25), 614 T(LeftParen), 615 S("ABCD"), 616 T(RightParen) 617 } 618 }, 619 { "+1e+25 ('ABCD')", 4, 620 { F(1e25), 621 T(LeftParen), 622 S("ABCD"), 623 T(RightParen) 624 } 625 }, 626 { "+1E-25 ('ABCD')", 4, 627 { F(1e-25), 628 T(LeftParen), 629 S("ABCD"), 630 T(RightParen) 631 } 632 }, 633 { "+1e-25 ('ABCD')", 4, 634 { F(1e-25), 635 T(LeftParen), 636 S("ABCD"), 637 T(RightParen) 638 } 639 }, 640 // Negative signed extended floats 641 { "-1E25 ('ABCD')", 4, 642 { F(-1e25), 643 T(LeftParen), 644 S("ABCD"), 645 T(RightParen) 646 } 647 }, 648 { "-1e25 ('ABCD')", 4, 649 { F(-1e25), 650 T(LeftParen), 651 S("ABCD"), 652 T(RightParen) 653 } 654 }, 655 { "-1E+25 ('ABCD')", 4, 656 { F(-1e25), 657 T(LeftParen), 658 S("ABCD"), 659 T(RightParen) 660 } 661 }, 662 { "-1e+25 ('ABCD')", 4, 663 { F(-1e25), 664 T(LeftParen), 665 S("ABCD"), 666 T(RightParen) 667 } 668 }, 669 { "-1E-25 ('ABCD')", 4, 670 { F(-1e-25), 671 T(LeftParen), 672 S("ABCD"), 673 T(RightParen) 674 } 675 }, 676 { "-1e-25 ('ABCD')", 4, 677 { F(-1e-25), 678 T(LeftParen), 679 S("ABCD"), 680 T(RightParen) 681 } 682 }, 683 // Miscellaneous extended floats 684 { ".1E-25 ('ABCD')", 4, 685 { F(0.1e-25), 686 T(LeftParen), 687 S("ABCD"), 688 T(RightParen) 689 } 690 }, 691 { "-.1e-25 ('ABCD')", 4, 692 { F(-0.1e-25), 693 T(LeftParen), 694 S("ABCD"), 695 T(RightParen) 696 } 697 }, 698 // Signed floats 699 { "-1.0 ('ABCD')", 4, 700 { F(-1.0), 701 T(LeftParen), 702 S("ABCD"), 703 T(RightParen) 704 } 705 }, 706 { "+1.0 ('ABCD')", 4, 707 { F(1.0), 708 T(LeftParen), 709 S("ABCD"), 710 T(RightParen) 711 } 712 }, 713 // The uber test 714 { "0 -0 +0 1 -2 +3 0. -0. +0. 1. -2. +3. 0.0 -0.1 +0.2 1.0 -2.1 +3.2 " 715 "0.e0 0.e-1 0.e+2 1.e1 2.e-2 3.e+3 -1.e1 -2.e-2 -3.e+3 +1.e1 +2.e-2 +3.e+3 " 716 "0.012345 1.23456 ( ) [ ] | & : -i " 717 " \"abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \\\" ' \\012\\0\\377\\x00\\x12\\xab\\xCD\\xeF\\x1A\\xb2 \" " 718 " 'abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" \\' \\012\\0\\377\\x00\\x12\\xab\\xCD\\xeF\\x1A\\xb2 ' " 719 " \\000abc_xyz123\"'\"'456 \\xA1a1 \\!\\?\\\\ " 720 " 0x00 0x12 0xabCD 0xaBcD 0x0123456789aBcDeFfEdCbA", 50, 721 { I(0), I(0), I(0), I(1), I(-2), I(3), F(0.0), F(0.0), F(0.0), 722 F(1.0), F(-2.0), F(3.0), F(0.0), F(-0.1), F(0.2), F(1.0), F(-2.1), F(3.2), 723 F(0.0), F(0.0e-1), F(0.0e2), F(1.0e1), F(2.0e-2), F(3.0e3), 724 F(-1.0e1), F(-2.0e-2), F(-3.0e3), F(1.0e1), F(2.0e-2), F(3.0e3), 725 F(0.012345), F(1.23456), T(LeftParen), T(RightParen), T(LeftBracket), 726 T(RightBracket), T(Divider), T(Ampersand), T(Colon), T(CaseInsensitiveFlag), 727 S(std::string("abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" ' \012\0\377\x00\x12\xab\xCD\xeF\x1A\xb2 ", 49)), 728 S(std::string("abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" ' \012\0\377\x00\x12\xab\xCD\xeF\x1A\xb2 ", 49)), 729 S(std::string("\000abc_xyz123\"'\"'456", 18)), 730 S("\241a1"), 731 S("!?\\"), 732 S(std::string("\x00", 1)), S("\x12"), S("\xAB\xCD"), S("\xAB\xCD"), 733 S("\x01\x23\x45\x67\x89\xAB\xCD\xEF\xFE\xDC\xBA") 734 } 735 }, 736 }; 737 738 // Undefine our nasty macros 739 #undef T(type) 740 #undef S(str) 741 #undef I(val) 742 #undef F(val) 743 744 const int testCaseCount = sizeof(testCases) / sizeof(test_case); 745 for (int i = 0; i < testCaseCount; i++) { 746 NextSubTest(); 747 // cout << endl << testCases[i].rule << endl; 748 TokenStream stream; 749 try { 750 stream.SetTo(testCases[i].rule); 751 752 CHK(stream.InitCheck() == B_OK); 753 for (int j = 0; j < testCases[i].tokenCount; j++) { 754 const Token *token = stream.Get(); 755 CHK(token); 756 /* 757 cout << tokenTypeToString(token->Type()) << endl; 758 759 if (token->Type() == CharacterString) 760 cout << " token1 == " << token->String() << endl; 761 if (testCases[i].tokens[j]->Type() == CharacterString) 762 cout << " token2 == " << (testCases[i].tokens[j])->String() << endl; 763 764 if (token->Type() == CharacterString) 765 { 766 const std::string &str = token->String(); 767 printf("parser: "); 768 for (int i = 0; i < str.length(); i++) 769 printf("%x ", str[i]); 770 printf("\n"); 771 } 772 if (testCases[i].tokens[j]->Type() == CharacterString) 773 { 774 const std::string &str = (testCases[i].tokens[j])->String(); 775 printf("tester: "); 776 for (int i = 0; i < str.length(); i++) 777 printf("%x ", str[i]); 778 printf("\n"); 779 } 780 781 switch (token->Type()) { 782 case CharacterString: 783 cout << " string == " << token->String() << endl; 784 break; 785 case Integer: 786 cout << " int == " << token->Int() << endl; 787 break; 788 case FloatingPoint: 789 cout << " float == " << token->Float() << endl; 790 break; 791 } 792 */ 793 CHK(*token == *(testCases[i].tokens[j])); 794 delete testCases[i].tokens[j]; 795 } 796 CHK(stream.IsEmpty()); 797 } catch (Err *e) { 798 CppUnit::Exception *err = new CppUnit::Exception(e->Msg()); 799 delete e; 800 throw *err; 801 } 802 } 803 804 #endif // !TEST_R5 805 } 806 807 // Parser Test 808 void 809 MimeSnifferTest::ParserTest() { 810 // test a couple of valid and invalid rules 811 struct test_case { 812 const char *rule; 813 const char *error; // NULL, if valid 814 } testCases[] = { 815 // valid rules 816 { "1.0 (\"ABCD\")", NULL }, 817 { "1.0 ('ABCD')", NULL }, 818 { " 1.0 ('ABCD') ", NULL }, 819 { "0.8 [0:3] ('ABCDEFG' | 'abcdefghij')", NULL }, 820 { "0.5([10]'ABCD'|[17]'abcd'|[13]'EFGH')", NULL } , 821 { "0.5 \n [0:3] \t ('ABCD' \n | 'abcd' | 'EFGH')", NULL }, 822 { "0.8 [ 0 : 3 ] ('ABCDEFG' | 'abcdefghij')", NULL }, 823 { "0.8 [0:3] ('ABCDEFG' & 'abcdefg')", NULL }, 824 // These two rules are accepted by the R5 sniffer checker, but not 825 // by the parser. Thus, we're not accepting them with either. 826 // { "1.0 ('ABCD') | ('EFGH')", NULL }, 827 // { "1.0 [0:3] ('ABCD') | [2:4] ('EFGH')", NULL }, 828 { "0.8 [0:3] (\\077Mkl0x34 & 'abcdefgh')", NULL }, 829 { "0.8 [0:3] (\\077034 & 'abcd')", NULL }, 830 { "0.8 [0:3] (\\077\\034 & 'ab')", NULL }, 831 { "0.8 [0:3] (\\77\\034 & 'ab')", NULL }, 832 { "0.8 [0:3] (\\7 & 'a')", NULL }, 833 { "0.8 [0:3] (\"\\17\" & 'a')", NULL }, 834 { "0.8 [0:3] ('\\17' & 'a')", NULL }, 835 { "0.8 [0:3] (\\g & 'a')", NULL }, 836 { "0.8 [0:3] (\\g&\\b)", NULL }, 837 { "0.8 [0:3] (\\g\\&b & 'abc')", NULL }, 838 { "0.8 [0:3] (0x3457 & 'ab')", NULL }, 839 { "0.8 [0:3] (0xA4b7 & 'ab')", NULL }, 840 { "0.8 [0:3] ('ab\"' & 'abc')", NULL }, 841 { "0.8 [0:3] (\"ab\\\"\" & 'abc')", NULL }, 842 { "0.8 [0:3] (\"ab\\A\" & 'abc')", NULL }, 843 { "0.8 [0:3] (\"ab'\" & 'abc')", NULL }, 844 { "0.8 [0:3] (\"ab\\\\\" & 'abc')", NULL }, 845 { "0.8 [-5:-3] (\"abc\" & 'abc')", NULL }, 846 // Also accepted by the R5 sniffer but not the R5 parser. We reject. 847 // { "0.8 [5:3] (\"abc\" & 'abc')", NULL }, 848 { "1.0 ('ABCD')", NULL }, 849 { ".2 ('ABCD')", NULL }, 850 { "0. ('ABCD')", NULL }, 851 { "1 ('ABCD')", NULL }, 852 { "+1 ('ABCD')", NULL }, 853 // We accept extended notation floating point numbers now, but 854 // not invalid priorities. 855 // { "1E25 ('ABCD')", NULL }, 856 // { "1e25 ('ABCD')", NULL }, 857 // R5 chokes on this rule :-( 858 #if !TEST_R5 859 { "1e-3 ('ABCD')", NULL }, 860 #endif 861 { "+.003e2 ('ABCD')", NULL }, 862 // This one too. See how much better our parser is? :-) 863 #if !TEST_R5 864 { "-123e-9999999999 ('ABCD')", NULL }, // Hooray for the stunning accuracy of floating point ;-) 865 #endif 866 // invalid rules 867 { "0.0 ('')", 868 "Sniffer pattern error: illegal empty pattern" }, 869 { "('ABCD')", 870 "Sniffer pattern error: match level expected" }, 871 { "[0:3] ('ABCD')", 872 "Sniffer pattern error: match level expected" }, 873 { "0.8 [0:3] ( | 'abcdefghij')", 874 "Sniffer pattern error: missing pattern" }, 875 { "0.8 [0:3] ('ABCDEFG' | )", 876 "Sniffer pattern error: missing pattern" }, 877 { "[0:3] ('ABCD')", 878 "Sniffer pattern error: match level expected" }, 879 { "1.0 (ABCD')", 880 #if TEST_R5 881 "Sniffer pattern error: misplaced single quote" 882 #else 883 "Sniffer pattern error: invalid character 'A'" 884 #endif 885 }, 886 { "1.0 ('ABCD)", 887 #if TEST_R5 888 "Sniffer pattern error: unterminated rule" 889 #else 890 "Sniffer pattern error: unterminated single-quoted string" 891 #endif 892 }, 893 { "1.0 (ABCD)", 894 #if TEST_R5 895 "Sniffer pattern error: missing pattern" 896 #else 897 "Sniffer pattern error: invalid character 'A'" 898 #endif 899 }, 900 { "1.0 (ABCD 'ABCD')", 901 #if TEST_R5 902 "Sniffer pattern error: missing pattern" 903 #else 904 "Sniffer pattern error: invalid character 'A'" 905 #endif 906 }, 907 { "1.0 'ABCD')", 908 #if TEST_R5 909 "Sniffer pattern error: missing pattern" 910 #else 911 "Sniffer pattern error: missing pattern" 912 #endif 913 }, 914 { "1.0 ('ABCD'", 915 "Sniffer pattern error: unterminated rule" }, 916 { "1.0 'ABCD'", 917 #if TEST_R5 918 "Sniffer pattern error: missing sniff pattern" 919 #else 920 "Sniffer pattern error: missing pattern" 921 #endif 922 }, 923 { "0.5 [0:3] ('ABCD' | 'abcd' | [13] 'EFGH')", 924 "Sniffer pattern error: missing pattern" }, 925 { "0.5('ABCD'|'abcd'|[13]'EFGH')", 926 "Sniffer pattern error: missing pattern" }, 927 { "0.5[0:3]([10]'ABCD'|[17]'abcd'|[13]'EFGH')", 928 "Sniffer pattern error: missing pattern" }, 929 { "0.8 [0x10:3] ('ABCDEFG' | 'abcdefghij')", 930 "Sniffer pattern error: pattern offset expected" }, 931 { "0.8 [0:A] ('ABCDEFG' | 'abcdefghij')", 932 #if TEST_R5 933 "Sniffer pattern error: pattern range end expected" 934 #else 935 "Sniffer pattern error: invalid character 'A'" 936 #endif 937 }, 938 { "0.8 [0:3] ('ABCDEFG' & 'abcdefghij')", 939 "Sniffer pattern error: pattern and mask lengths do not match" }, 940 { "0.8 [0:3] ('ABCDEFG' & 'abcdefg' & 'xyzwmno')", 941 #if TEST_R5 942 "Sniffer pattern error: unterminated rule" 943 #else 944 "Sniffer pattern error: expecting '|', ')', or possibly '&'" 945 #endif 946 }, 947 { "0.8 [0:3] (\\g&b & 'a')", 948 #if TEST_R5 949 "Sniffer pattern error: missing mask" 950 #else 951 "Sniffer pattern error: invalid character 'b'" 952 #endif 953 }, 954 { "0.8 [0:3] (\\19 & 'a')", 955 "Sniffer pattern error: pattern and mask lengths do not match" }, 956 { "0.8 [0:3] (0x345 & 'ab')", 957 "Sniffer pattern error: bad hex literal" }, 958 { "0.8 [0:3] (0x3457M & 'abc')", 959 #if TEST_R5 960 "Sniffer pattern error: expecting '|' or '&'" 961 #else 962 "Sniffer pattern error: invalid character 'M'" 963 #endif 964 }, 965 { "0.8 [0:3] (0x3457\\7 & 'abc')", 966 #if TEST_R5 967 "Sniffer pattern error: expecting '|' or '&'" 968 #else 969 "Sniffer pattern error: expecting '|', ')', or possibly '&'" 970 #endif 971 }, 972 973 // Miscellaneous tests designed to hit every remaining 974 // relevant "throw new Err()" statement in the scanner. 975 // R5 versions will come later... 976 #if !TEST_R5 977 { "\x03 ", "Sniffer pattern error: invalid character '\x03'" }, 978 { "\"blah", "Sniffer pattern error: unterminated double-quoted string" }, 979 { "0xThisIsNotAHexCode", "Sniffer pattern error: incomplete hex code" }, 980 { "0xAndNeitherIsThis:-)", "Sniffer pattern error: bad hex literal" }, 981 { ".NotAFloat", "Sniffer pattern error: incomplete floating point number" }, 982 { "-NotANumber", "Sniffer pattern error: incomplete signed number" }, 983 { "+NotANumber", "Sniffer pattern error: incomplete signed number" }, 984 985 { "0.0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 986 { "1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 987 { ".0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 988 { "0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 989 { "1e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 990 { "-1e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 991 { "+1e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 992 { "-1.e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 993 { "+1.e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 994 { "-1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 995 { "+1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" }, 996 997 { "0.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 998 { "1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 999 { ".0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1000 { "0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1001 { "1e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1002 { "-1e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1003 { "+1e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1004 { "-1.e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1005 { "+1.e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1006 { "-1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1007 { "+1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1008 1009 { "0.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1010 { "1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1011 { ".0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1012 { "0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1013 { "1e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1014 { "-1e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1015 { "+1e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1016 { "-1.e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1017 { "+1.e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1018 { "-1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1019 { "+1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" }, 1020 1021 { "\\11\\", "Sniffer pattern error: incomplete escape sequence" }, 1022 { "\"Escape!! \\", "Sniffer pattern error: incomplete escape sequence" }, 1023 { "'Escape!! \\", "Sniffer pattern error: incomplete escape sequence" }, 1024 1025 { "\\x", "Sniffer pattern error: incomplete escaped hex code" }, 1026 { "\\xNotAHexCode", "Sniffer pattern error: incomplete escaped hex code" }, 1027 { "\\xAlsoNotAHexCode", "Sniffer pattern error: incomplete escaped hex code" }, 1028 { "\\x0", "Sniffer pattern error: incomplete escaped hex code" }, 1029 1030 { "1.0 (\\377)", NULL }, 1031 { "\\400", "Sniffer pattern error: invalid octal literal (octals must be between octal 0 and octal 377 inclusive)" }, 1032 { "\\777", "Sniffer pattern error: invalid octal literal (octals must be between octal 0 and octal 377 inclusive)" }, 1033 { "1.0 (\\800)", NULL }, 1034 1035 { NULL, "Sniffer pattern error: NULL pattern" }, 1036 1037 { "-2", "Sniffer pattern error: invalid priority" }, 1038 { "+2", "Sniffer pattern error: invalid priority" }, 1039 1040 { "1.0", "Sniffer pattern error: missing expression" }, 1041 #endif // !TEST_R5 1042 1043 1044 // { "1E-25 ('ABCD')", "Sniffer pattern error: missing pattern" }, 1045 // I don't currently understand what's wrong with the above rule... R5 1046 // rejects it though, for some reason. 1047 }; 1048 const int testCaseCount = sizeof(testCases) / sizeof(test_case); 1049 BMimeType type; 1050 for (int32 i = 0; i < testCaseCount; i++) { 1051 //cout << endl << "----------------------------------------------------------------------" << endl; 1052 NextSubTest(); 1053 test_case &testCase = testCases[i]; 1054 //cout << endl << testCase.rule << endl; 1055 BString parseError; 1056 status_t error = BMimeType::CheckSnifferRule(testCase.rule, 1057 &parseError); 1058 if (testCase.error == NULL) { 1059 if (error != B_OK) { 1060 cout << endl << "This sucker's gonna fail..." 1061 << endl << "RULE: '" << testCase.rule << "'" 1062 << endl << "ERROR: " 1063 << endl << parseError.String() 1064 << endl; 1065 } 1066 CHK(error == B_OK); 1067 } else { 1068 1069 // if (parseError.FindLast(testCase.error) >= 0) { 1070 // cout << endl << parseError.String(); // << endl; 1071 // cout << endl << testCase.error << endl; 1072 // } 1073 // cout << endl << parseError.String(); // << endl; 1074 /* 1075 if (parseError.FindLast(testCase.error) >= 0) { 1076 cout << " -- OKAY" << endl; 1077 } else { 1078 cout << " -- NOGO" << endl; 1079 cout << testCase.error << endl; 1080 } 1081 */ 1082 if (testCase.rule && error != B_BAD_MIME_SNIFFER_RULE) { 1083 printf("rule: `%s'", testCase.rule); 1084 RES(error); 1085 } 1086 CHK(error == (testCase.rule ? B_BAD_MIME_SNIFFER_RULE : B_BAD_VALUE)); 1087 CHK(parseError.FindLast(testCase.error) >= 0); 1088 } 1089 } 1090 } 1091 1092 void dumpStr(const std::string &string, const char *label = NULL) { 1093 if (label) 1094 printf("%s: ", label); 1095 for (uint i = 0; i < string.length(); i++) 1096 printf("%x ", string[i]); 1097 printf("\n"); 1098 } 1099 1100 1101 void 1102 MimeSnifferTest::SnifferTest() { 1103 #if TEST_R5 1104 Outputf("(no tests actually performed for R5 version)\n"); 1105 #else // TEST_R5 1106 const char *rules[] = { 1107 // General tests 1108 "1.0 ('#include')", 1109 "0.0 [0:32] ('#include')", 1110 "0.e-230 [0:32] (\\#include | \\#ifndef)", 1111 ".2 ([0:32] \"#include\" | [0] '#define' | [0:200] 'int main(')", 1112 "1.0 [0:32] ('<html>' | '<head>' | '<body>')", 1113 // Range tests 1114 "1.0 [0:9] ('rock')", 1115 "1.0 ([0:9] 'roll')", 1116 "1.0 ([0:9] 'rock' | [0:9] 'roll')", 1117 "1.0 [0:9] ('rock' | 'roll')", 1118 "1.0 ([0] 'rock')", 1119 "1.0 ([0] 'rock' | [0:9] 'roll')", 1120 "1.0 ([9] 'rock' | [10] 'roll')", 1121 // Mask, octal, and hex tests 1122 "1.0 (\\xFF\\xFF & '\\xF0\\xF0')", 1123 "1.0 ('\\33\\34' & \\xFF\\x00)", 1124 "1.0 (\\33\\34 & \"\\x00\\xFF\")", 1125 "1.0 (\\xFF & \\x05)", 1126 // Conjunctions 1127 "1.0 ([4] 'rock') ([9] 'roll')", 1128 "1.0 [5] ('roll') [10] ('rock')", 1129 "1.0 [4] ('rock' | 'roll') ([9] 'rock' | [10] 'roll')", 1130 // Case insensitivity tests 1131 "1.0 [4] (-i 'Rock' | 'Roll')", 1132 "1.0 [9] ('Rock' | -i 'Roll')", 1133 "1.0 (-i [4] 'Rock' | [9] 'Roll')", 1134 "1.0 ([9] 'Rock' | -i [4] 'Roll')", 1135 }; 1136 const int ruleCount = sizeof(rules)/sizeof(char*); 1137 struct test_case { 1138 const std::string data; 1139 const bool result[ruleCount]; 1140 } tests[] = { 1141 1142 //------------------------------ 1143 { 1144 "#include <stdio.h> \n\ 1145 #include <stdlib.h> \n\ 1146 \n\ 1147 int main() { \n\ 1148 return 0; \n\ 1149 } \n\ 1150 \n\ 1151 ", { true, true, true, true, false, 1152 false, false, false, false, false, false, false, 1153 false, false, false, false, 1154 false, false, false, 1155 false, false, false, false 1156 } 1157 }, 1158 //------------------------------ 1159 { 1160 " #include <stdio.h> \n\ 1161 #include <stdlib.h> \n\ 1162 \n\ 1163 int main() { \n\ 1164 return 0; \n\ 1165 } \n\ 1166 \n\ 1167 ", { false, true, true, true, false, 1168 false, false, false, false, false, false, false, 1169 false, false, false, false, 1170 false, false, false, 1171 false, false, false, false 1172 } 1173 }, 1174 //------------------------------ 1175 { 1176 "#ifndef SOME_TEST_H \n\ 1177 #define SOME_TEST_H \n\ 1178 \n\ 1179 void main(); \n\ 1180 \n\ 1181 #endif // SOME_TEST_H \n\ 1182 \n\ 1183 ", { false, false, true, false, false, 1184 false, false, false, false, false, false, false, 1185 false, false, false, false, 1186 false, false, false, 1187 false, false, false, false 1188 } 1189 }, 1190 //------------------------------ 1191 { 1192 "//------------------ \n\ 1193 // SomeTest.cpp \n\ 1194 //------------------ \n\ 1195 #include <stdio.h> \n\ 1196 \n\ 1197 int main() { \n\ 1198 return 0; \n\ 1199 } \n\ 1200 \n\ 1201 ", { false, false, false, true, false, 1202 false, false, false, false, false, false, false, 1203 false, false, false, true, 1204 // ^^^^ <= coincedence 1205 false, false, false, 1206 false, false, false, false 1207 } 1208 }, 1209 //------------------------------ 1210 { 1211 "<html> \n\ 1212 <body bgcolor='#ffffff'> \n\ 1213 HTML is boring as hell <br> \n\ 1214 when i write it too much <br> \n\ 1215 my head starts to swell <br> \n\ 1216 <br> \n\ 1217 HTML is stupid and dumb <br> \n\ 1218 running through traffic <br> \n\ 1219 is ten times as fun <br> \n\ 1220 </body> \n\ 1221 </html> \n\ 1222 ", { false, false, false, false, true, 1223 false, false, false, false, false, false, false, 1224 false, false, false, false, 1225 false, false, false, 1226 false, false, false, false 1227 } 1228 }, 1229 //--------- <= Ten characters in 1230 { 1231 " rock&roll", // 5,10 1232 { false, false, false, false, false, 1233 true, false, true, true, false, false, true, 1234 false, false, false, false, 1235 false, false, false, 1236 false, false, false, false 1237 } 1238 }, 1239 //--------- <= Ten characters in 1240 { 1241 " rock&roll", // 4,9 1242 { false, false, false, false, false, 1243 true, true, true, true, false, true, false, 1244 false, false, false, false, 1245 true, false, false, 1246 true, true, true, false 1247 } 1248 }, 1249 //--------- <= Ten characters in 1250 { 1251 " roll&rock", // 5,10 1252 { false, false, false, false, false, 1253 false, true, true, true, false, true, false, 1254 false, false, false, false, 1255 false, true, false, 1256 false, false, false, false 1257 } 1258 }, 1259 //--------- <= Ten characters in 1260 { 1261 " roll&rock", // 4,9 1262 { false, false, false, false, false, 1263 true, true, true, true, false, true, true, 1264 false, false, false, false, 1265 false, false, true, 1266 true, true, false, true 1267 } 1268 }, 1269 //--------- <= Ten characters in 1270 { 1271 " ROCK&ROLL", // 4,9 1272 { false, false, false, false, false, 1273 false, false, false, false, false, false, false, 1274 false, false, false, false, 1275 false, false, false, 1276 true, true, true, false 1277 } 1278 }, 1279 //--------- <= Ten characters in 1280 { 1281 " rOlL&RoCk", // 4,9 1282 { false, false, false, false, false, 1283 false, false, false, false, false, false, false, 1284 false, false, false, false, 1285 false, false, false, 1286 true, true, false, true 1287 } 1288 }, 1289 //------------------------------ 1290 { 1291 "\xFF\xFF FF FF", 1292 { false, false, false, false, false, 1293 false, false, false, false, false, false, false, 1294 true, false, false, true, 1295 false, false, false, 1296 false, false, false, false 1297 } 1298 }, 1299 //------------------------------ 1300 { 1301 "\xFA\xFA FA FA", 1302 { false, false, false, false, false, 1303 false, false, false, false, false, false, false, 1304 true, false, false, false, 1305 false, false, false, 1306 false, false, false, false 1307 } 1308 }, 1309 //------------------------------ 1310 { 1311 "\xAF\xAF AF AF", 1312 { false, false, false, false, false, 1313 false, false, false, false, false, false, false, 1314 false, false, false, true, 1315 false, false, false, 1316 false, false, false, false 1317 } 1318 }, 1319 //------------------------------ 1320 { 1321 std::string("\033\000 033 000", 10), // Otherwise, it thinks the NULL is the end of the string 1322 { false, false, false, false, false, 1323 false, false, false, false, false, false, false, 1324 false, true, false, false, 1325 false, false, false, 1326 false, false, false, false 1327 } 1328 }, 1329 //------------------------------ 1330 { 1331 std::string("\000\034 000 034", 10), // Otherwise, it thinks the NULL is the end of the string 1332 { false, false, false, false, false, 1333 false, false, false, false, false, false, false, 1334 false, false, true, false, 1335 false, false, false, 1336 false, false, false, false 1337 } 1338 }, 1339 //------------------------------ 1340 { 1341 "\033\034 033 034", 1342 { false, false, false, false, false, 1343 false, false, false, false, false, false, false, 1344 false, true, true, false, 1345 false, false, false, 1346 false, false, false, false 1347 } 1348 }, 1349 }; // tests[] 1350 const int32 testCount = sizeof(tests)/sizeof(test_case); 1351 1352 for (int i = 0; i < testCount; i++) { 1353 if (i > 0) 1354 NextSubTestBlock(); 1355 test_case &test = tests[i]; 1356 // cout << "--------------------------------------------------------------------------------" << endl; 1357 // cout << test.data << endl; 1358 1359 for (int j = 0; j < ruleCount; j++) { 1360 NextSubTest(); 1361 // cout << "############################################################" << endl; 1362 // cout << rules[j] << endl; 1363 // cout << test.result[j] << endl; 1364 Rule rule; 1365 BString errorMsg; 1366 status_t err = parse(rules[j], &rule, &errorMsg); 1367 // dumpStr(test.data, "str "); 1368 if (err) { 1369 // cout << "PARSE FAILURE!!!" << endl; 1370 // cout << errorMsg.String() << endl; 1371 } 1372 CHK(err == B_OK); 1373 if (!err) { 1374 BMallocIO data; 1375 data.Write(test.data.data(), test.data.length());//strlen(test.data)); 1376 bool match = rule.Sniff(&data); 1377 // cout << match << endl; 1378 // cout << "match == " << (match ? "yes" : "no") << ", " 1379 // << ((match == test.result[j]) ? "SUCCESS" : "FAILURE") << endl; 1380 CHK(match == test.result[j]); 1381 } 1382 } 1383 } 1384 #endif // !TEST_R5 1385 } 1386