1 // MimeSnifferTest.cpp
2
3 #include "MimeSnifferTest.h"
4
5 #include <cppunit/Test.h>
6 #include <cppunit/TestSuite.h>
7 #include <cppunit/TestCaller.h>
8 #include <sniffer/Rule.h>
9 #include <sniffer/Parser.h>
10 #include <DataIO.h>
11 #include <Mime.h>
12 #include <String.h> // BString
13 #include <TestUtils.h>
14
15 #include <stdio.h>
16
17 #include <iostream>
18 using std::cout;
19 using std::endl;
20
21 using namespace BPrivate::Storage::Sniffer;
22
23 // Suite
24 CppUnit::Test*
Suite()25 MimeSnifferTest::Suite() {
26 CppUnit::TestSuite *suite = new CppUnit::TestSuite();
27 typedef CppUnit::TestCaller<MimeSnifferTest> TC;
28
29 suite->addTest( new TC("Mime Sniffer::Scanner Test",
30 &MimeSnifferTest::ScannerTest) );
31 suite->addTest( new TC("Mime Sniffer::Parser Test",
32 &MimeSnifferTest::ParserTest) );
33 suite->addTest( new TC("Mime Sniffer::Sniffer Test",
34 &MimeSnifferTest::SnifferTest) );
35
36 return suite;
37 }
38
39 // Scanner Test
40 void
ScannerTest()41 MimeSnifferTest::ScannerTest() {
42 #if TEST_R5
43 Outputf("(no tests actually performed for R5 version)\n");
44 #else // TEST_R5
45
46
47 // tests:
48 // Internal TokenStream and CharStream classes
49
50 // Define some useful macros for dynamically allocating
51 // various Token classes
52 #define T(type) (new Token(type, -1))
53 #define S(str) (new StringToken(str, -1))
54 #define I(val) (new IntToken(val, -1))
55 #define F(val) (new FloatToken(val, -1))
56
57 struct test_case {
58 const char *rule;
59 int tokenCount;
60 Token *tokens[256];
61 } testCases[] = {
62 { "'Hey'[]:", 4,
63 { S("Hey"),
64 T(LeftBracket),
65 T(RightBracket),
66 T(Colon)
67 }
68 },
69 { "1", 1, { I(1) } },
70 { "1.0", 1, { F(1.0) } },
71
72 { "1.0 (\"ABCD\")", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } },
73 { "1.0 ('ABCD')", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } },
74 { " 1.0 ('ABCD') ", 4, { F(1.0), T(LeftParen), S("ABCD"), T(RightParen) } },
75 { "0.8 [0:3] ('ABCDEFG' | 'abcdefghij')", 11,
76 { F(0.8),
77 T(LeftBracket),
78 I(0),
79 T(Colon),
80 I(3),
81 T(RightBracket),
82 T(LeftParen),
83 S("ABCDEFG"),
84 T(Divider),
85 S("abcdefghij"),
86 T(RightParen)
87 }
88 },
89 { "0.5([10]'ABCD'|[17]'abcd'|[13]'EFGH')", 17,
90 { F(0.5),
91 T(LeftParen),
92 T(LeftBracket),
93 I(10),
94 T(RightBracket),
95 S("ABCD"),
96 T(Divider),
97 T(LeftBracket),
98 I(17),
99 T(RightBracket),
100 S("abcd"),
101 T(Divider),
102 T(LeftBracket),
103 I(13),
104 T(RightBracket),
105 S("EFGH"),
106 T(RightParen)
107 }
108 },
109 { "0.5 \n [0:3] \t ('ABCD' \n | 'abcd' | 'EFGH')", 13,
110 { F(0.5),
111 T(LeftBracket),
112 I(0),
113 T(Colon),
114 I(3),
115 T(RightBracket),
116 T(LeftParen),
117 S("ABCD"),
118 T(Divider),
119 S("abcd"),
120 T(Divider),
121 S("EFGH"),
122 T(RightParen)
123 }
124 },
125 { "0.8 [ 0 : 3 ] ('ABCDEFG' | 'abcdefghij')", 11,
126 { F(0.8),
127 T(LeftBracket),
128 I(0),
129 T(Colon),
130 I(3),
131 T(RightBracket),
132 T(LeftParen),
133 S("ABCDEFG"),
134 T(Divider),
135 S("abcdefghij"),
136 T(RightParen)
137 }
138 },
139 { "0.8 [0:3] ('ABCDEFG' & 'abcdefg')", 11,
140 { F(0.8),
141 T(LeftBracket),
142 I(0),
143 T(Colon),
144 I(3),
145 T(RightBracket),
146 T(LeftParen),
147 S("ABCDEFG"),
148 T(Ampersand),
149 S("abcdefg"),
150 T(RightParen)
151 }
152 },
153 { "1.0 ('ABCD') | ('EFGH')", 8,
154 { F(1.0),
155 T(LeftParen),
156 S("ABCD"),
157 T(RightParen),
158 T(Divider),
159 T(LeftParen),
160 S("EFGH"),
161 T(RightParen)
162 }
163 },
164 { "1.0 [0:3] ('ABCD') | [2:4] ('EFGH')", 18,
165 { F(1.0),
166 T(LeftBracket),
167 I(0),
168 T(Colon),
169 I(3),
170 T(RightBracket),
171 T(LeftParen),
172 S("ABCD"),
173 T(RightParen),
174 T(Divider),
175 T(LeftBracket),
176 I(2),
177 T(Colon),
178 I(4),
179 T(RightBracket),
180 T(LeftParen),
181 S("EFGH"),
182 T(RightParen)
183 }
184 },
185 { "0.8 [0:4] (\\077Mkj0x34 & 'abcdefgh')", 11,
186 { F(0.8),
187 T(LeftBracket),
188 I(0),
189 T(Colon),
190 I(4),
191 T(RightBracket),
192 T(LeftParen),
193 S("\077Mkj0x34"),
194 T(Ampersand),
195 S("abcdefgh"),
196 T(RightParen)
197 }
198 },
199 { "0.8 [0:4] (\\077Mkj\\x34 & 'abcdefgh')", 11,
200 { F(0.8),
201 T(LeftBracket),
202 I(0),
203 T(Colon),
204 I(4),
205 T(RightBracket),
206 T(LeftParen),
207 S("\077Mkj\x34"),
208 T(Ampersand),
209 S("abcdefgh"),
210 T(RightParen)
211 }
212 },
213 { "0.8 [0:3] (\\077034 & 'abcd')", 11,
214 { F(0.8),
215 T(LeftBracket),
216 I(0),
217 T(Colon),
218 I(3),
219 T(RightBracket),
220 T(LeftParen),
221 S("\077034"),
222 T(Ampersand),
223 S("abcd"),
224 T(RightParen)
225 }
226 },
227 { "0.8 [0:3] (\\077\\034 & 'ab')", 11,
228 { F(0.8),
229 T(LeftBracket),
230 I(0),
231 T(Colon),
232 I(3),
233 T(RightBracket),
234 T(LeftParen),
235 S("\077\034"),
236 T(Ampersand),
237 S("ab"),
238 T(RightParen)
239 }
240 },
241 { "0.8 [0:3] (\\77\\034 & 'ab')", 11,
242 { F(0.8),
243 T(LeftBracket),
244 I(0),
245 T(Colon),
246 I(3),
247 T(RightBracket),
248 T(LeftParen),
249 S("\077\034"),
250 T(Ampersand),
251 S("ab"),
252 T(RightParen)
253 }
254 },
255 { "0.8 [0:3] (\\7 & 'a')", 11,
256 { F(0.8),
257 T(LeftBracket),
258 I(0),
259 T(Colon),
260 I(3),
261 T(RightBracket),
262 T(LeftParen),
263 S("\007"),
264 T(Ampersand),
265 S("a"),
266 T(RightParen)
267 }
268 },
269 { "0.8 [0:3] (\"\\17\" & 'a')", 11,
270 { F(0.8),
271 T(LeftBracket),
272 I(0),
273 T(Colon),
274 I(3),
275 T(RightBracket),
276 T(LeftParen),
277 S("\017"),
278 T(Ampersand),
279 S("a"),
280 T(RightParen)
281 }
282 },
283 { "0.8 [0:3] ('\\17' & 'a')", 11,
284 { F(0.8),
285 T(LeftBracket),
286 I(0),
287 T(Colon),
288 I(3),
289 T(RightBracket),
290 T(LeftParen),
291 S("\017"),
292 T(Ampersand),
293 S("a"),
294 T(RightParen)
295 }
296 },
297 { "0.8 [0:3] (\\g & 'a')", 11,
298 { F(0.8),
299 T(LeftBracket),
300 I(0),
301 T(Colon),
302 I(3),
303 T(RightBracket),
304 T(LeftParen),
305 S("g"),
306 T(Ampersand),
307 S("a"),
308 T(RightParen)
309 }
310 },
311 { "0.8 [0:3] (\\g&\\b)", 11,
312 { F(0.8),
313 T(LeftBracket),
314 I(0),
315 T(Colon),
316 I(3),
317 T(RightBracket),
318 T(LeftParen),
319 S("g"),
320 T(Ampersand),
321 S("\b"),
322 T(RightParen)
323 }
324 },
325 { "0.8 [0:3] (\\g\\&b & 'abc')", 11,
326 { F(0.8),
327 T(LeftBracket),
328 I(0),
329 T(Colon),
330 I(3),
331 T(RightBracket),
332 T(LeftParen),
333 S("g&b"),
334 T(Ampersand),
335 S("abc"),
336 T(RightParen)
337 }
338 },
339 { "0.8 [0:3] (0x3457 & 'ab')", 11,
340 { F(0.8),
341 T(LeftBracket),
342 I(0),
343 T(Colon),
344 I(3),
345 T(RightBracket),
346 T(LeftParen),
347 S("\x34\x57"),
348 T(Ampersand),
349 S("ab"),
350 T(RightParen)
351 }
352 },
353 { "0.8 [0:3] (\\x34\\x57 & 'ab')", 11,
354 { F(0.8),
355 T(LeftBracket),
356 I(0),
357 T(Colon),
358 I(3),
359 T(RightBracket),
360 T(LeftParen),
361 S("\x34\x57"),
362 T(Ampersand),
363 S("ab"),
364 T(RightParen)
365 }
366 },
367 { "0.8 [0:3] (0xA4b7 & 'ab')", 11,
368 { F(0.8),
369 T(LeftBracket),
370 I(0),
371 T(Colon),
372 I(3),
373 T(RightBracket),
374 T(LeftParen),
375 S("\xA4\xb7"),
376 T(Ampersand),
377 S("ab"),
378 T(RightParen)
379 }
380 },
381 { "0.8 [0:3] (\\xA4\\xb7 & 'ab')", 11,
382 { F(0.8),
383 T(LeftBracket),
384 I(0),
385 T(Colon),
386 I(3),
387 T(RightBracket),
388 T(LeftParen),
389 S("\xA4\xb7"),
390 T(Ampersand),
391 S("ab"),
392 T(RightParen)
393 }
394 },
395 { "0.8 [0:3] (\"\\xA4\\xb7\" & 'ab')", 11,
396 { F(0.8),
397 T(LeftBracket),
398 I(0),
399 T(Colon),
400 I(3),
401 T(RightBracket),
402 T(LeftParen),
403 S("\xA4\xb7"),
404 T(Ampersand),
405 S("ab"),
406 T(RightParen)
407 }
408 },
409 { "0.8 [0:3] (\'\\xA4\\xb7\' & 'ab')", 11,
410 { F(0.8),
411 T(LeftBracket),
412 I(0),
413 T(Colon),
414 I(3),
415 T(RightBracket),
416 T(LeftParen),
417 S("\xA4\xb7"),
418 T(Ampersand),
419 S("ab"),
420 T(RightParen)
421 }
422 },
423 { "0.8 [0:3] ('ab\"' & 'abc')", 11,
424 { F(0.8),
425 T(LeftBracket),
426 I(0),
427 T(Colon),
428 I(3),
429 T(RightBracket),
430 T(LeftParen),
431 S("ab\""),
432 T(Ampersand),
433 S("abc"),
434 T(RightParen)
435 }
436 },
437 { "0.8 [0:3] (\"ab\\\"\" & 'abc')", 11,
438 { F(0.8),
439 T(LeftBracket),
440 I(0),
441 T(Colon),
442 I(3),
443 T(RightBracket),
444 T(LeftParen),
445 S("ab\""),
446 T(Ampersand),
447 S("abc"),
448 T(RightParen)
449 }
450 },
451 { "0.8 [0:3] (\"ab\\A\" & 'abc')", 11,
452 { F(0.8),
453 T(LeftBracket),
454 I(0),
455 T(Colon),
456 I(3),
457 T(RightBracket),
458 T(LeftParen),
459 S("abA"),
460 T(Ampersand),
461 S("abc"),
462 T(RightParen)
463 }
464 },
465 { "0.8 [0:3] (\"ab'\" & 'abc')", 11,
466 { F(0.8),
467 T(LeftBracket),
468 I(0),
469 T(Colon),
470 I(3),
471 T(RightBracket),
472 T(LeftParen),
473 S("ab'"),
474 T(Ampersand),
475 S("abc"),
476 T(RightParen)
477 }
478 },
479 { "0.8 [0:3] (\"ab\\\\\" & 'abc')", 11,
480 { F(0.8),
481 T(LeftBracket),
482 I(0),
483 T(Colon),
484 I(3),
485 T(RightBracket),
486 T(LeftParen),
487 S("ab\\"),
488 T(Ampersand),
489 S("abc"),
490 T(RightParen)
491 }
492 },
493 { "0.8 [-5:-3] (\"abc\" & 'abc')", 11,
494 { F(0.8),
495 T(LeftBracket),
496 I(-5),
497 T(Colon),
498 I(-3),
499 T(RightBracket),
500 T(LeftParen),
501 S("abc"),
502 T(Ampersand),
503 S("abc"),
504 T(RightParen)
505 }
506 },
507 { "0.8 [5:3] (\"abc\" & 'abc')", 11,
508 { F(0.8),
509 T(LeftBracket),
510 I(5),
511 T(Colon),
512 I(3),
513 T(RightBracket),
514 T(LeftParen),
515 S("abc"),
516 T(Ampersand),
517 S("abc"),
518 T(RightParen)
519 }
520 },
521 { "1.2 ('ABCD')", 4,
522 { F(1.2),
523 T(LeftParen),
524 S("ABCD"),
525 T(RightParen)
526 }
527 },
528 { ".2 ('ABCD')", 4,
529 { F(0.2),
530 T(LeftParen),
531 S("ABCD"),
532 T(RightParen)
533 }
534 },
535 { "0. ('ABCD')", 4,
536 { F(0.0),
537 T(LeftParen),
538 S("ABCD"),
539 T(RightParen)
540 }
541 },
542 // Signed integers
543 { "-1 ('ABCD')", 4,
544 { I(-1),
545 T(LeftParen),
546 S("ABCD"),
547 T(RightParen)
548 }
549 },
550 { "+1 ('ABCD')", 4,
551 { I(1),
552 T(LeftParen),
553 S("ABCD"),
554 T(RightParen)
555 }
556 },
557 // Unsigned extended floats
558 { "1E25 ('ABCD')", 4,
559 { F(1e25),
560 T(LeftParen),
561 S("ABCD"),
562 T(RightParen)
563 }
564 },
565 { "1e25 ('ABCD')", 4,
566 { F(1e25),
567 T(LeftParen),
568 S("ABCD"),
569 T(RightParen)
570 }
571 },
572 { "1E+25 ('ABCD')", 4,
573 { F(1e25),
574 T(LeftParen),
575 S("ABCD"),
576 T(RightParen)
577 }
578 },
579 { "1e+25 ('ABCD')", 4,
580 { F(1e25),
581 T(LeftParen),
582 S("ABCD"),
583 T(RightParen)
584 }
585 },
586 { "1E-25 ('ABCD')", 4,
587 { F(1e-25),
588 T(LeftParen),
589 S("ABCD"),
590 T(RightParen)
591 }
592 },
593 { "1e-25 ('ABCD')", 4,
594 { F(1e-25),
595 T(LeftParen),
596 S("ABCD"),
597 T(RightParen)
598 }
599 },
600 // Positive signed extended floats
601 { "+1E25 ('ABCD')", 4,
602 { F(1e25),
603 T(LeftParen),
604 S("ABCD"),
605 T(RightParen)
606 }
607 },
608 { "+1e25 ('ABCD')", 4,
609 { F(1e25),
610 T(LeftParen),
611 S("ABCD"),
612 T(RightParen)
613 }
614 },
615 { "+1E+25 ('ABCD')", 4,
616 { F(1e25),
617 T(LeftParen),
618 S("ABCD"),
619 T(RightParen)
620 }
621 },
622 { "+1e+25 ('ABCD')", 4,
623 { F(1e25),
624 T(LeftParen),
625 S("ABCD"),
626 T(RightParen)
627 }
628 },
629 { "+1E-25 ('ABCD')", 4,
630 { F(1e-25),
631 T(LeftParen),
632 S("ABCD"),
633 T(RightParen)
634 }
635 },
636 { "+1e-25 ('ABCD')", 4,
637 { F(1e-25),
638 T(LeftParen),
639 S("ABCD"),
640 T(RightParen)
641 }
642 },
643 // Negative signed extended floats
644 { "-1E25 ('ABCD')", 4,
645 { F(-1e25),
646 T(LeftParen),
647 S("ABCD"),
648 T(RightParen)
649 }
650 },
651 { "-1e25 ('ABCD')", 4,
652 { F(-1e25),
653 T(LeftParen),
654 S("ABCD"),
655 T(RightParen)
656 }
657 },
658 { "-1E+25 ('ABCD')", 4,
659 { F(-1e25),
660 T(LeftParen),
661 S("ABCD"),
662 T(RightParen)
663 }
664 },
665 { "-1e+25 ('ABCD')", 4,
666 { F(-1e25),
667 T(LeftParen),
668 S("ABCD"),
669 T(RightParen)
670 }
671 },
672 { "-1E-25 ('ABCD')", 4,
673 { F(-1e-25),
674 T(LeftParen),
675 S("ABCD"),
676 T(RightParen)
677 }
678 },
679 { "-1e-25 ('ABCD')", 4,
680 { F(-1e-25),
681 T(LeftParen),
682 S("ABCD"),
683 T(RightParen)
684 }
685 },
686 // Miscellaneous extended floats
687 { ".1E-25 ('ABCD')", 4,
688 { F(0.1e-25),
689 T(LeftParen),
690 S("ABCD"),
691 T(RightParen)
692 }
693 },
694 { "-.1e-25 ('ABCD')", 4,
695 { F(-0.1e-25),
696 T(LeftParen),
697 S("ABCD"),
698 T(RightParen)
699 }
700 },
701 // Signed floats
702 { "-1.0 ('ABCD')", 4,
703 { F(-1.0),
704 T(LeftParen),
705 S("ABCD"),
706 T(RightParen)
707 }
708 },
709 { "+1.0 ('ABCD')", 4,
710 { F(1.0),
711 T(LeftParen),
712 S("ABCD"),
713 T(RightParen)
714 }
715 },
716 // The uber test
717 { "0 -0 +0 1 -2 +3 0. -0. +0. 1. -2. +3. 0.0 -0.1 +0.2 1.0 -2.1 +3.2 "
718 "0.e0 0.e-1 0.e+2 1.e1 2.e-2 3.e+3 -1.e1 -2.e-2 -3.e+3 +1.e1 +2.e-2 +3.e+3 "
719 "0.012345 1.23456 ( ) [ ] | & : -i "
720 " \"abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \\\" ' \\012\\0\\377\\x00\\x12\\xab\\xCD\\xeF\\x1A\\xb2 \" "
721 " 'abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" \\' \\012\\0\\377\\x00\\x12\\xab\\xCD\\xeF\\x1A\\xb2 ' "
722 " \\000abc_xyz123\"'\"'456 \\xA1a1 \\!\\?\\\\ "
723 " 0x00 0x12 0xabCD 0xaBcD 0x0123456789aBcDeFfEdCbA", 50,
724 { I(0), I(0), I(0), I(1), I(-2), I(3), F(0.0), F(0.0), F(0.0),
725 F(1.0), F(-2.0), F(3.0), F(0.0), F(-0.1), F(0.2), F(1.0), F(-2.1), F(3.2),
726 F(0.0), F(0.0e-1), F(0.0e2), F(1.0e1), F(2.0e-2), F(3.0e3),
727 F(-1.0e1), F(-2.0e-2), F(-3.0e3), F(1.0e1), F(2.0e-2), F(3.0e3),
728 F(0.012345), F(1.23456), T(LeftParen), T(RightParen), T(LeftBracket),
729 T(RightBracket), T(Divider), T(Ampersand), T(Colon), T(CaseInsensitiveFlag),
730 S(std::string("abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" ' \012\0\377\x00\x12\xab\xCD\xeF\x1A\xb2 ", 49)),
731 S(std::string("abcxyzABCXYZ_ ( ) [ ] | & : -i \t\n \" ' \012\0\377\x00\x12\xab\xCD\xeF\x1A\xb2 ", 49)),
732 S(std::string("\000abc_xyz123\"'\"'456", 18)),
733 S("\241a1"),
734 S("!?\\"),
735 S(std::string("\x00", 1)), S("\x12"), S("\xAB\xCD"), S("\xAB\xCD"),
736 S("\x01\x23\x45\x67\x89\xAB\xCD\xEF\xFE\xDC\xBA")
737 }
738 },
739 };
740
741 // Undefine our nasty macros
742 #undef T
743 #undef S
744 #undef I
745 #undef F
746
747 const int testCaseCount = sizeof(testCases) / sizeof(test_case);
748 for (int i = 0; i < testCaseCount; i++) {
749 NextSubTest();
750 // cout << endl << testCases[i].rule << endl;
751 TokenStream stream;
752 try {
753 stream.SetTo(testCases[i].rule);
754
755 CHK(stream.InitCheck() == B_OK);
756 for (int j = 0; j < testCases[i].tokenCount; j++) {
757 const Token *token = stream.Get();
758 CHK(token);
759 /*
760 cout << tokenTypeToString(token->Type()) << endl;
761
762 if (token->Type() == CharacterString)
763 cout << " token1 == " << token->String() << endl;
764 if (testCases[i].tokens[j]->Type() == CharacterString)
765 cout << " token2 == " << (testCases[i].tokens[j])->String() << endl;
766
767 if (token->Type() == CharacterString)
768 {
769 const std::string &str = token->String();
770 printf("parser: ");
771 for (int i = 0; i < str.length(); i++)
772 printf("%x ", str[i]);
773 printf("\n");
774 }
775 if (testCases[i].tokens[j]->Type() == CharacterString)
776 {
777 const std::string &str = (testCases[i].tokens[j])->String();
778 printf("tester: ");
779 for (int i = 0; i < str.length(); i++)
780 printf("%x ", str[i]);
781 printf("\n");
782 }
783
784 switch (token->Type()) {
785 case CharacterString:
786 cout << " string == " << token->String() << endl;
787 break;
788 case Integer:
789 cout << " int == " << token->Int() << endl;
790 break;
791 case FloatingPoint:
792 cout << " float == " << token->Float() << endl;
793 break;
794 }
795 */
796 CHK(*token == *(testCases[i].tokens[j]));
797 delete testCases[i].tokens[j];
798 }
799 CHK(stream.IsEmpty());
800 } catch (Err *e) {
801 CppUnit::Exception *err = new CppUnit::Exception(e->Msg());
802 delete e;
803 throw *err;
804 }
805 }
806
807 #endif // !TEST_R5
808 }
809
810 // Parser Test
811 void
ParserTest()812 MimeSnifferTest::ParserTest() {
813 // test a couple of valid and invalid rules
814 struct test_case {
815 const char *rule;
816 const char *error; // NULL, if valid
817 } testCases[] = {
818 // valid rules
819 { "1.0 (\"ABCD\")", NULL },
820 { "1.0 ('ABCD')", NULL },
821 { " 1.0 ('ABCD') ", NULL },
822 { "0.8 [0:3] ('ABCDEFG' | 'abcdefghij')", NULL },
823 { "0.5([10]'ABCD'|[17]'abcd'|[13]'EFGH')", NULL } ,
824 { "0.5 \n [0:3] \t ('ABCD' \n | 'abcd' | 'EFGH')", NULL },
825 { "0.8 [ 0 : 3 ] ('ABCDEFG' | 'abcdefghij')", NULL },
826 { "0.8 [0:3] ('ABCDEFG' & 'abcdefg')", NULL },
827 // These two rules are accepted by the R5 sniffer checker, but not
828 // by the parser. Thus, we're not accepting them with either.
829 // { "1.0 ('ABCD') | ('EFGH')", NULL },
830 // { "1.0 [0:3] ('ABCD') | [2:4] ('EFGH')", NULL },
831 { "0.8 [0:3] (\\077Mkl0x34 & 'abcdefgh')", NULL },
832 { "0.8 [0:3] (\\077034 & 'abcd')", NULL },
833 { "0.8 [0:3] (\\077\\034 & 'ab')", NULL },
834 { "0.8 [0:3] (\\77\\034 & 'ab')", NULL },
835 { "0.8 [0:3] (\\7 & 'a')", NULL },
836 { "0.8 [0:3] (\"\\17\" & 'a')", NULL },
837 { "0.8 [0:3] ('\\17' & 'a')", NULL },
838 { "0.8 [0:3] (\\g & 'a')", NULL },
839 { "0.8 [0:3] (\\g&\\b)", NULL },
840 { "0.8 [0:3] (\\g\\&b & 'abc')", NULL },
841 { "0.8 [0:3] (0x3457 & 'ab')", NULL },
842 { "0.8 [0:3] (0xA4b7 & 'ab')", NULL },
843 { "0.8 [0:3] ('ab\"' & 'abc')", NULL },
844 { "0.8 [0:3] (\"ab\\\"\" & 'abc')", NULL },
845 { "0.8 [0:3] (\"ab\\A\" & 'abc')", NULL },
846 { "0.8 [0:3] (\"ab'\" & 'abc')", NULL },
847 { "0.8 [0:3] (\"ab\\\\\" & 'abc')", NULL },
848 { "0.8 [-5:-3] (\"abc\" & 'abc')", NULL },
849 // Also accepted by the R5 sniffer but not the R5 parser. We reject.
850 // { "0.8 [5:3] (\"abc\" & 'abc')", NULL },
851 { "1.0 ('ABCD')", NULL },
852 { ".2 ('ABCD')", NULL },
853 { "0. ('ABCD')", NULL },
854 { "1 ('ABCD')", NULL },
855 { "+1 ('ABCD')", NULL },
856 // We accept extended notation floating point numbers now, but
857 // not invalid priorities.
858 // { "1E25 ('ABCD')", NULL },
859 // { "1e25 ('ABCD')", NULL },
860 // R5 chokes on this rule :-(
861 #if !TEST_R5
862 { "1e-3 ('ABCD')", NULL },
863 #endif
864 { "+.003e2 ('ABCD')", NULL },
865 // This one too. See how much better our parser is? :-)
866 #if !TEST_R5
867 { "-123e-9999999999 ('ABCD')", NULL }, // Hooray for the stunning accuracy of floating point ;-)
868 #endif
869 // invalid rules
870 { "0.0 ('')",
871 "Sniffer pattern error: illegal empty pattern" },
872 { "('ABCD')",
873 "Sniffer pattern error: match level expected" },
874 { "[0:3] ('ABCD')",
875 "Sniffer pattern error: match level expected" },
876 { "0.8 [0:3] ( | 'abcdefghij')",
877 "Sniffer pattern error: missing pattern" },
878 { "0.8 [0:3] ('ABCDEFG' | )",
879 "Sniffer pattern error: missing pattern" },
880 { "[0:3] ('ABCD')",
881 "Sniffer pattern error: match level expected" },
882 { "1.0 (ABCD')",
883 #if TEST_R5
884 "Sniffer pattern error: misplaced single quote"
885 #else
886 "Sniffer pattern error: invalid character 'A'"
887 #endif
888 },
889 { "1.0 ('ABCD)",
890 #if TEST_R5
891 "Sniffer pattern error: unterminated rule"
892 #else
893 "Sniffer pattern error: unterminated single-quoted string"
894 #endif
895 },
896 { "1.0 (ABCD)",
897 #if TEST_R5
898 "Sniffer pattern error: missing pattern"
899 #else
900 "Sniffer pattern error: invalid character 'A'"
901 #endif
902 },
903 { "1.0 (ABCD 'ABCD')",
904 #if TEST_R5
905 "Sniffer pattern error: missing pattern"
906 #else
907 "Sniffer pattern error: invalid character 'A'"
908 #endif
909 },
910 { "1.0 'ABCD')",
911 #if TEST_R5
912 "Sniffer pattern error: missing pattern"
913 #else
914 "Sniffer pattern error: missing pattern"
915 #endif
916 },
917 { "1.0 ('ABCD'",
918 "Sniffer pattern error: unterminated rule" },
919 { "1.0 'ABCD'",
920 #if TEST_R5
921 "Sniffer pattern error: missing sniff pattern"
922 #else
923 "Sniffer pattern error: missing pattern"
924 #endif
925 },
926 { "0.5 [0:3] ('ABCD' | 'abcd' | [13] 'EFGH')",
927 "Sniffer pattern error: missing pattern" },
928 { "0.5('ABCD'|'abcd'|[13]'EFGH')",
929 "Sniffer pattern error: missing pattern" },
930 { "0.5[0:3]([10]'ABCD'|[17]'abcd'|[13]'EFGH')",
931 "Sniffer pattern error: missing pattern" },
932 { "0.8 [0x10:3] ('ABCDEFG' | 'abcdefghij')",
933 "Sniffer pattern error: pattern offset expected" },
934 { "0.8 [0:A] ('ABCDEFG' | 'abcdefghij')",
935 #if TEST_R5
936 "Sniffer pattern error: pattern range end expected"
937 #else
938 "Sniffer pattern error: invalid character 'A'"
939 #endif
940 },
941 { "0.8 [0:3] ('ABCDEFG' & 'abcdefghij')",
942 "Sniffer pattern error: pattern and mask lengths do not match" },
943 { "0.8 [0:3] ('ABCDEFG' & 'abcdefg' & 'xyzwmno')",
944 #if TEST_R5
945 "Sniffer pattern error: unterminated rule"
946 #else
947 "Sniffer pattern error: expecting '|', ')', or possibly '&'"
948 #endif
949 },
950 { "0.8 [0:3] (\\g&b & 'a')",
951 #if TEST_R5
952 "Sniffer pattern error: missing mask"
953 #else
954 "Sniffer pattern error: invalid character 'b'"
955 #endif
956 },
957 { "0.8 [0:3] (\\19 & 'a')",
958 "Sniffer pattern error: pattern and mask lengths do not match" },
959 { "0.8 [0:3] (0x345 & 'ab')",
960 "Sniffer pattern error: bad hex literal" },
961 { "0.8 [0:3] (0x3457M & 'abc')",
962 #if TEST_R5
963 "Sniffer pattern error: expecting '|' or '&'"
964 #else
965 "Sniffer pattern error: invalid character 'M'"
966 #endif
967 },
968 { "0.8 [0:3] (0x3457\\7 & 'abc')",
969 #if TEST_R5
970 "Sniffer pattern error: expecting '|' or '&'"
971 #else
972 "Sniffer pattern error: expecting '|', ')', or possibly '&'"
973 #endif
974 },
975
976 // Miscellaneous tests designed to hit every remaining
977 // relevant "throw new Err()" statement in the scanner.
978 // R5 versions will come later...
979 #if !TEST_R5
980 { "\x03 ", "Sniffer pattern error: invalid character '\x03'" },
981 { "\"blah", "Sniffer pattern error: unterminated double-quoted string" },
982 { "0xThisIsNotAHexCode", "Sniffer pattern error: incomplete hex code" },
983 { "0xAndNeitherIsThis:-)", "Sniffer pattern error: bad hex literal" },
984 { ".NotAFloat", "Sniffer pattern error: incomplete floating point number" },
985 { "-NotANumber", "Sniffer pattern error: incomplete signed number" },
986 { "+NotANumber", "Sniffer pattern error: incomplete signed number" },
987
988 { "0.0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
989 { "1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
990 { ".0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
991 { "0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
992 { "1e", "Sniffer pattern error: incomplete extended-notation floating point number" },
993 { "-1e", "Sniffer pattern error: incomplete extended-notation floating point number" },
994 { "+1e", "Sniffer pattern error: incomplete extended-notation floating point number" },
995 { "-1.e", "Sniffer pattern error: incomplete extended-notation floating point number" },
996 { "+1.e", "Sniffer pattern error: incomplete extended-notation floating point number" },
997 { "-1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
998 { "+1.0e", "Sniffer pattern error: incomplete extended-notation floating point number" },
999
1000 { "0.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1001 { "1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1002 { ".0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1003 { "0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1004 { "1e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1005 { "-1e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1006 { "+1e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1007 { "-1.e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1008 { "+1.e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1009 { "-1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1010 { "+1.0e-", "Sniffer pattern error: incomplete extended-notation floating point number" },
1011
1012 { "0.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1013 { "1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1014 { ".0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1015 { "0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1016 { "1e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1017 { "-1e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1018 { "+1e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1019 { "-1.e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1020 { "+1.e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1021 { "-1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1022 { "+1.0e+", "Sniffer pattern error: incomplete extended-notation floating point number" },
1023
1024 { "\\11\\", "Sniffer pattern error: incomplete escape sequence" },
1025 { "\"Escape!! \\", "Sniffer pattern error: incomplete escape sequence" },
1026 { "'Escape!! \\", "Sniffer pattern error: incomplete escape sequence" },
1027
1028 { "\\x", "Sniffer pattern error: incomplete escaped hex code" },
1029 { "\\xNotAHexCode", "Sniffer pattern error: incomplete escaped hex code" },
1030 { "\\xAlsoNotAHexCode", "Sniffer pattern error: incomplete escaped hex code" },
1031 { "\\x0", "Sniffer pattern error: incomplete escaped hex code" },
1032
1033 { "1.0 (\\377)", NULL },
1034 { "\\400", "Sniffer pattern error: invalid octal literal (octals must be between octal 0 and octal 377 inclusive)" },
1035 { "\\777", "Sniffer pattern error: invalid octal literal (octals must be between octal 0 and octal 377 inclusive)" },
1036 { "1.0 (\\800)", NULL },
1037
1038 { NULL, "Sniffer pattern error: NULL pattern" },
1039
1040 { "-2", "Sniffer pattern error: invalid priority" },
1041 { "+2", "Sniffer pattern error: invalid priority" },
1042
1043 { "1.0", "Sniffer pattern error: missing expression" },
1044 #endif // !TEST_R5
1045
1046
1047 // { "1E-25 ('ABCD')", "Sniffer pattern error: missing pattern" },
1048 // I don't currently understand what's wrong with the above rule... R5
1049 // rejects it though, for some reason.
1050 };
1051 const int testCaseCount = sizeof(testCases) / sizeof(test_case);
1052 BMimeType type;
1053 for (int32 i = 0; i < testCaseCount; i++) {
1054 //cout << endl << "----------------------------------------------------------------------" << endl;
1055 NextSubTest();
1056 test_case &testCase = testCases[i];
1057 //cout << endl << testCase.rule << endl;
1058 BString parseError;
1059 status_t error = BMimeType::CheckSnifferRule(testCase.rule,
1060 &parseError);
1061 if (testCase.error == NULL) {
1062 if (error != B_OK) {
1063 cout << endl << "This sucker's gonna fail..."
1064 << endl << "RULE: '" << testCase.rule << "'"
1065 << endl << "ERROR: "
1066 << endl << parseError.String()
1067 << endl;
1068 }
1069 CHK(error == B_OK);
1070 } else {
1071
1072 // if (parseError.FindLast(testCase.error) >= 0) {
1073 // cout << endl << parseError.String(); // << endl;
1074 // cout << endl << testCase.error << endl;
1075 // }
1076 // cout << endl << parseError.String(); // << endl;
1077 /*
1078 if (parseError.FindLast(testCase.error) >= 0) {
1079 cout << " -- OKAY" << endl;
1080 } else {
1081 cout << " -- NOGO" << endl;
1082 cout << testCase.error << endl;
1083 }
1084 */
1085 if (testCase.rule && error != B_BAD_MIME_SNIFFER_RULE) {
1086 printf("rule: `%s'", testCase.rule);
1087 RES(error);
1088 }
1089 CHK(error == (testCase.rule ? B_BAD_MIME_SNIFFER_RULE : B_BAD_VALUE));
1090 CHK(parseError.FindLast(testCase.error) >= 0);
1091 }
1092 }
1093 }
1094
dumpStr(const std::string & string,const char * label=NULL)1095 void dumpStr(const std::string &string, const char *label = NULL) {
1096 if (label)
1097 printf("%s: ", label);
1098 for (uint i = 0; i < string.length(); i++)
1099 printf("%x ", string[i]);
1100 printf("\n");
1101 }
1102
1103
1104 void
SnifferTest()1105 MimeSnifferTest::SnifferTest() {
1106 #if TEST_R5
1107 Outputf("(no tests actually performed for R5 version)\n");
1108 #else // TEST_R5
1109 const char *rules[] = {
1110 // General tests
1111 "1.0 ('#include')",
1112 "0.0 [0:32] ('#include')",
1113 "0.e-230 [0:32] (\\#include | \\#ifndef)",
1114 ".2 ([0:32] \"#include\" | [0] '#define' | [0:200] 'int main(')",
1115 "1.0 [0:32] ('<html>' | '<head>' | '<body>')",
1116 // Range tests
1117 "1.0 [0:9] ('rock')",
1118 "1.0 ([0:9] 'roll')",
1119 "1.0 ([0:9] 'rock' | [0:9] 'roll')",
1120 "1.0 [0:9] ('rock' | 'roll')",
1121 "1.0 ([0] 'rock')",
1122 "1.0 ([0] 'rock' | [0:9] 'roll')",
1123 "1.0 ([9] 'rock' | [10] 'roll')",
1124 // Mask, octal, and hex tests
1125 "1.0 (\\xFF\\xFF & '\\xF0\\xF0')",
1126 "1.0 ('\\33\\34' & \\xFF\\x00)",
1127 "1.0 (\\33\\34 & \"\\x00\\xFF\")",
1128 "1.0 (\\xFF & \\x05)",
1129 // Conjunctions
1130 "1.0 ([4] 'rock') ([9] 'roll')",
1131 "1.0 [5] ('roll') [10] ('rock')",
1132 "1.0 [4] ('rock' | 'roll') ([9] 'rock' | [10] 'roll')",
1133 // Case insensitivity tests
1134 "1.0 [4] (-i 'Rock' | 'Roll')",
1135 "1.0 [9] ('Rock' | -i 'Roll')",
1136 "1.0 (-i [4] 'Rock' | [9] 'Roll')",
1137 "1.0 ([9] 'Rock' | -i [4] 'Roll')",
1138 };
1139 const int ruleCount = sizeof(rules)/sizeof(char*);
1140 struct test_case {
1141 const std::string data;
1142 const bool result[ruleCount];
1143 } tests[] = {
1144
1145 //------------------------------
1146 {
1147 "#include <stdio.h> \n\
1148 #include <stdlib.h> \n\
1149 \n\
1150 int main() { \n\
1151 return 0; \n\
1152 } \n\
1153 \n\
1154 ", { true, true, true, true, false,
1155 false, false, false, false, false, false, false,
1156 false, false, false, false,
1157 false, false, false,
1158 false, false, false, false
1159 }
1160 },
1161 //------------------------------
1162 {
1163 " #include <stdio.h> \n\
1164 #include <stdlib.h> \n\
1165 \n\
1166 int main() { \n\
1167 return 0; \n\
1168 } \n\
1169 \n\
1170 ", { false, true, true, true, false,
1171 false, false, false, false, false, false, false,
1172 false, false, false, false,
1173 false, false, false,
1174 false, false, false, false
1175 }
1176 },
1177 //------------------------------
1178 {
1179 "#ifndef SOME_TEST_H \n\
1180 #define SOME_TEST_H \n\
1181 \n\
1182 void main(); \n\
1183 \n\
1184 #endif // SOME_TEST_H \n\
1185 \n\
1186 ", { false, false, true, false, false,
1187 false, false, false, false, false, false, false,
1188 false, false, false, false,
1189 false, false, false,
1190 false, false, false, false
1191 }
1192 },
1193 //------------------------------
1194 {
1195 "//------------------ \n\
1196 // SomeTest.cpp \n\
1197 //------------------ \n\
1198 #include <stdio.h> \n\
1199 \n\
1200 int main() { \n\
1201 return 0; \n\
1202 } \n\
1203 \n\
1204 ", { false, false, false, true, false,
1205 false, false, false, false, false, false, false,
1206 false, false, false, true,
1207 // ^^^^ <= coincedence
1208 false, false, false,
1209 false, false, false, false
1210 }
1211 },
1212 //------------------------------
1213 {
1214 "<html> \n\
1215 <body bgcolor='#ffffff'> \n\
1216 HTML is boring as hell <br> \n\
1217 when i write it too much <br> \n\
1218 my head starts to swell <br> \n\
1219 <br> \n\
1220 HTML is stupid and dumb <br> \n\
1221 running through traffic <br> \n\
1222 is ten times as fun <br> \n\
1223 </body> \n\
1224 </html> \n\
1225 ", { false, false, false, false, true,
1226 false, false, false, false, false, false, false,
1227 false, false, false, false,
1228 false, false, false,
1229 false, false, false, false
1230 }
1231 },
1232 //--------- <= Ten characters in
1233 {
1234 " rock&roll", // 5,10
1235 { false, false, false, false, false,
1236 true, false, true, true, false, false, true,
1237 false, false, false, false,
1238 false, false, false,
1239 false, false, false, false
1240 }
1241 },
1242 //--------- <= Ten characters in
1243 {
1244 " rock&roll", // 4,9
1245 { false, false, false, false, false,
1246 true, true, true, true, false, true, false,
1247 false, false, false, false,
1248 true, false, false,
1249 true, true, true, false
1250 }
1251 },
1252 //--------- <= Ten characters in
1253 {
1254 " roll&rock", // 5,10
1255 { false, false, false, false, false,
1256 false, true, true, true, false, true, false,
1257 false, false, false, false,
1258 false, true, false,
1259 false, false, false, false
1260 }
1261 },
1262 //--------- <= Ten characters in
1263 {
1264 " roll&rock", // 4,9
1265 { false, false, false, false, false,
1266 true, true, true, true, false, true, true,
1267 false, false, false, false,
1268 false, false, true,
1269 true, true, false, true
1270 }
1271 },
1272 //--------- <= Ten characters in
1273 {
1274 " ROCK&ROLL", // 4,9
1275 { false, false, false, false, false,
1276 false, false, false, false, false, false, false,
1277 false, false, false, false,
1278 false, false, false,
1279 true, true, true, false
1280 }
1281 },
1282 //--------- <= Ten characters in
1283 {
1284 " rOlL&RoCk", // 4,9
1285 { false, false, false, false, false,
1286 false, false, false, false, false, false, false,
1287 false, false, false, false,
1288 false, false, false,
1289 true, true, false, true
1290 }
1291 },
1292 //------------------------------
1293 {
1294 "\xFF\xFF FF FF",
1295 { false, false, false, false, false,
1296 false, false, false, false, false, false, false,
1297 true, false, false, true,
1298 false, false, false,
1299 false, false, false, false
1300 }
1301 },
1302 //------------------------------
1303 {
1304 "\xFA\xFA FA FA",
1305 { false, false, false, false, false,
1306 false, false, false, false, false, false, false,
1307 true, false, false, false,
1308 false, false, false,
1309 false, false, false, false
1310 }
1311 },
1312 //------------------------------
1313 {
1314 "\xAF\xAF AF AF",
1315 { false, false, false, false, false,
1316 false, false, false, false, false, false, false,
1317 false, false, false, true,
1318 false, false, false,
1319 false, false, false, false
1320 }
1321 },
1322 //------------------------------
1323 {
1324 std::string("\033\000 033 000", 10), // Otherwise, it thinks the NULL is the end of the string
1325 { false, false, false, false, false,
1326 false, false, false, false, false, false, false,
1327 false, true, false, false,
1328 false, false, false,
1329 false, false, false, false
1330 }
1331 },
1332 //------------------------------
1333 {
1334 std::string("\000\034 000 034", 10), // Otherwise, it thinks the NULL is the end of the string
1335 { false, false, false, false, false,
1336 false, false, false, false, false, false, false,
1337 false, false, true, false,
1338 false, false, false,
1339 false, false, false, false
1340 }
1341 },
1342 //------------------------------
1343 {
1344 "\033\034 033 034",
1345 { false, false, false, false, false,
1346 false, false, false, false, false, false, false,
1347 false, true, true, false,
1348 false, false, false,
1349 false, false, false, false
1350 }
1351 },
1352 }; // tests[]
1353 const int32 testCount = sizeof(tests)/sizeof(test_case);
1354
1355 for (int i = 0; i < testCount; i++) {
1356 if (i > 0)
1357 NextSubTestBlock();
1358 test_case &test = tests[i];
1359 // cout << "--------------------------------------------------------------------------------" << endl;
1360 // cout << test.data << endl;
1361
1362 for (int j = 0; j < ruleCount; j++) {
1363 NextSubTest();
1364 // cout << "############################################################" << endl;
1365 // cout << rules[j] << endl;
1366 // cout << test.result[j] << endl;
1367 Rule rule;
1368 BString errorMsg;
1369 status_t err = parse(rules[j], &rule, &errorMsg);
1370 // dumpStr(test.data, "str ");
1371 if (err) {
1372 // cout << "PARSE FAILURE!!!" << endl;
1373 // cout << errorMsg.String() << endl;
1374 }
1375 CHK(err == B_OK);
1376 if (!err) {
1377 BMallocIO data;
1378 data.Write(test.data.data(), test.data.length());//strlen(test.data));
1379 bool match = rule.Sniff(&data);
1380 // cout << match << endl;
1381 // cout << "match == " << (match ? "yes" : "no") << ", "
1382 // << ((match == test.result[j]) ? "SUCCESS" : "FAILURE") << endl;
1383 CHK(match == test.result[j]);
1384 }
1385 }
1386 }
1387 #endif // !TEST_R5
1388 }
1389