1 /*
2 * Copyright 2004-2010, Axel Dörfler, axeld@pinc-software.de.
3 * Distributed under the terms of the MIT License.
4 */
5
6
7 #include "RTF.h"
8
9 #include <ctype.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13
14 #include <DataIO.h>
15
16
17 //#define TRACE_RTF
18 #ifdef TRACE_RTF
19 # define TRACE(x...) printf(x)
20 #else
21 # define TRACE(x...) ;
22 #endif
23
24
25 static const char *kDestinationControlWords[] = {
26 "aftncn", "aftnsep", "aftnsepc", "annotation", "atnauthor", "atndate",
27 "atnicn", "atnid", "atnparent", "atnref", "atntime", "atrfend",
28 "atrfstart", "author", "background", "bkmkend", "buptim", "colortbl",
29 "comment", "creatim", "do", "doccomm", "docvar", "fonttbl", "footer",
30 "footerf", "footerl", "footerr", "footnote", "ftncn", "ftnsep",
31 "ftnsepc", "header", "headerf", "headerl", "headerr", "info",
32 "keywords", "operator", "pict", "printim", "private1", "revtim",
33 "rxe", "stylesheet", "subject", "tc", "title", "txe", "xe",
34 };
35
36 static char read_char(BDataIO &stream, bool endOfFileAllowed = false);
37 static int32 parse_integer(char first, BDataIO &stream, char &_last, int32 base = 10);
38
39
40 using namespace RTF;
41
42
43 static char
read_char(BDataIO & stream,bool endOfFileAllowed)44 read_char(BDataIO &stream, bool endOfFileAllowed)
45 {
46 char c;
47 ssize_t bytesRead = stream.Read(&c, 1);
48
49 if (bytesRead < B_OK)
50 throw (status_t)bytesRead;
51
52 if (bytesRead == 0 && !endOfFileAllowed)
53 throw (status_t)B_ERROR;
54
55 return c;
56 }
57
58
59 static int32
parse_integer(char first,BDataIO & stream,char & _last,int32 base)60 parse_integer(char first, BDataIO &stream, char &_last, int32 base)
61 {
62 const char *kDigits = "0123456789abcdef";
63 int32 integer = 0;
64 int32 count = 0;
65
66 char digit = first;
67
68 if (digit == '\0')
69 digit = read_char(stream);
70
71 while (true) {
72 int32 pos = 0;
73 for (; pos < base; pos++) {
74 if (kDigits[pos] == tolower(digit)) {
75 integer = integer * base + pos;
76 count++;
77 break;
78 }
79 }
80 if (pos == base) {
81 _last = digit;
82 goto out;
83 }
84
85 digit = read_char(stream);
86 }
87
88 out:
89 if (count == 0)
90 throw (status_t)B_BAD_TYPE;
91
92 return integer;
93 }
94
95
96 static int
string_array_compare(const char * key,const char ** array)97 string_array_compare(const char *key, const char **array)
98 {
99 return strcmp(key, array[0]);
100 }
101
102
103 static void
dump(Element & element,int32 level=0)104 dump(Element &element, int32 level = 0)
105 {
106 printf("%03" B_PRId32 " (%p):", level, &element);
107 for (int32 i = 0; i < level; i++)
108 printf(" ");
109
110 if (RTF::Header *header = dynamic_cast<RTF::Header *>(&element)) {
111 printf("<RTF header, major version %" B_PRId32 ">\n", header->Version());
112 } else if (RTF::Command *command = dynamic_cast<RTF::Command *>(&element)) {
113 printf("<Command: %s", command->Name());
114 if (command->HasOption())
115 printf(", Option %" B_PRId32, command->Option());
116 puts(">");
117 } else if (RTF::Text *text = dynamic_cast<RTF::Text *>(&element)) {
118 printf("<Text>");
119 puts(text->String());
120 } else if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element))
121 printf("<Group \"%s\">\n", group->Name());
122
123 if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element)) {
124 for (uint32 i = 0; i < group->CountElements(); i++)
125 dump(*group->ElementAt(i), level + 1);
126 }
127 }
128
129
130 // #pragma mark -
131
132
Parser(BPositionIO & stream)133 Parser::Parser(BPositionIO &stream)
134 :
135 fStream(&stream, 65536, false),
136 fIdentified(false)
137 {
138 }
139
140
141 status_t
Identify()142 Parser::Identify()
143 {
144 char header[5];
145 if (fStream.Read(header, sizeof(header)) < (ssize_t)sizeof(header))
146 return B_IO_ERROR;
147
148 if (strncmp(header, "{\\rtf", 5))
149 return B_BAD_TYPE;
150
151 fIdentified = true;
152 return B_OK;
153 }
154
155
156 status_t
Parse(Header & header)157 Parser::Parse(Header &header)
158 {
159 if (!fIdentified && Identify() != B_OK)
160 return B_BAD_TYPE;
161
162 try {
163 int32 openBrackets = 1;
164
165 // since we already preparsed parts of the RTF header, the header
166 // is handled here directly
167 char last;
168 header.Parse('\0', fStream, last);
169
170 Group *parent = &header;
171 char c = last;
172
173 while (true) {
174 Element *element = NULL;
175
176 // we'll just ignore the end of the stream
177 if (parent == NULL)
178 return B_OK;
179
180 switch (c) {
181 case '{':
182 openBrackets++;
183 parent->AddElement(element = new Group());
184 parent = static_cast<Group *>(element);
185 break;
186
187 case '\\':
188 parent->AddElement(element = new Command());
189 break;
190
191 case '}':
192 openBrackets--;
193 parent->DetermineDestination();
194 parent = parent->Parent();
195 // supposed to fall through
196 case '\n':
197 case '\r':
198 {
199 ssize_t bytesRead = fStream.Read(&c, 1);
200 if (bytesRead < B_OK)
201 throw (status_t)bytesRead;
202 else if (bytesRead != 1) {
203 // this is the only valid exit status
204 if (openBrackets == 0)
205 return B_OK;
206
207 throw (status_t)B_ERROR;
208 }
209 continue;
210 }
211
212 default:
213 parent->AddElement(element = new Text());
214 break;
215 }
216
217 if (element == NULL)
218 throw (status_t)B_ERROR;
219
220 element->Parse(c, fStream, last);
221 c = last;
222 }
223 } catch (status_t status) {
224 return status;
225 }
226
227 return B_OK;
228 }
229
230
231 // #pragma mark -
232
233
Element()234 Element::Element()
235 :
236 fParent(NULL)
237 {
238 }
239
240
~Element()241 Element::~Element()
242 {
243 }
244
245
246 void
SetParent(Group * parent)247 Element::SetParent(Group *parent)
248 {
249 fParent = parent;
250 }
251
252
253 Group *
Parent() const254 Element::Parent() const
255 {
256 return fParent;
257 }
258
259
260 bool
IsDefinitionDelimiter()261 Element::IsDefinitionDelimiter()
262 {
263 return false;
264 }
265
266
267 void
PrintToStream(int32 level)268 Element::PrintToStream(int32 level)
269 {
270 dump(*this, level);
271 }
272
273
274 // #pragma mark -
275
276
Group()277 Group::Group()
278 :
279 fDestination(TEXT_DESTINATION)
280 {
281 }
282
283
~Group()284 Group::~Group()
285 {
286 Element *element;
287 while ((element = (Element *)fElements.RemoveItem((int32)0)) != NULL) {
288 delete element;
289 }
290 }
291
292
293 void
Parse(char first,BDataIO & stream,char & last)294 Group::Parse(char first, BDataIO &stream, char &last)
295 {
296 if (first == '\0')
297 first = read_char(stream);
298
299 if (first != '{')
300 throw (status_t)B_BAD_TYPE;
301
302 last = read_char(stream);
303 }
304
305
306 status_t
AddElement(Element * element)307 Group::AddElement(Element *element)
308 {
309 if (element == NULL)
310 return B_BAD_VALUE;
311
312 if (fElements.AddItem(element)) {
313 element->SetParent(this);
314 return B_OK;
315 }
316
317 return B_NO_MEMORY;
318 }
319
320
321 uint32
CountElements() const322 Group::CountElements() const
323 {
324 return (uint32)fElements.CountItems();
325 }
326
327
328 Element *
ElementAt(uint32 index) const329 Group::ElementAt(uint32 index) const
330 {
331 return static_cast<Element *>(fElements.ItemAt(index));
332 }
333
334
335 Element *
FindDefinitionStart(int32 index,int32 * _startIndex) const336 Group::FindDefinitionStart(int32 index, int32 *_startIndex) const
337 {
338 if (index < 0)
339 return NULL;
340
341 Element *element;
342 int32 number = 0;
343 for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) {
344 if (number == index) {
345 if (_startIndex)
346 *_startIndex = i;
347 return element;
348 }
349
350 if (element->IsDefinitionDelimiter())
351 number++;
352 }
353
354 return NULL;
355 }
356
357
358 Command *
FindDefinition(const char * name,int32 index) const359 Group::FindDefinition(const char *name, int32 index) const
360 {
361 int32 startIndex;
362 Element *element = FindDefinitionStart(index, &startIndex);
363 if (element == NULL)
364 return NULL;
365
366 for (uint32 i = startIndex; (element = ElementAt(i)) != NULL; i++) {
367 if (element->IsDefinitionDelimiter())
368 break;
369
370 if (Command *command = dynamic_cast<Command *>(element)) {
371 if (command != NULL && !strcmp(name, command->Name()))
372 return command;
373 }
374 }
375
376 return NULL;
377 }
378
379
380 Group *
FindGroup(const char * name) const381 Group::FindGroup(const char *name) const
382 {
383 Element *element;
384 for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) {
385 Group *group = dynamic_cast<Group *>(element);
386 if (group == NULL)
387 continue;
388
389 Command *command = dynamic_cast<Command *>(group->ElementAt(0));
390 if (command != NULL && !strcmp(name, command->Name()))
391 return group;
392 }
393
394 return NULL;
395 }
396
397
398 const char *
Name() const399 Group::Name() const
400 {
401 Command *command = dynamic_cast<Command *>(ElementAt(0));
402 if (command != NULL)
403 return command->Name();
404
405 return NULL;
406 }
407
408
409 void
DetermineDestination()410 Group::DetermineDestination()
411 {
412 const char *name = Name();
413 if (name == NULL)
414 return;
415
416 if (!strcmp(name, "*")) {
417 fDestination = COMMENT_DESTINATION;
418 return;
419 }
420
421 // binary search for destination control words
422
423 if (bsearch(name, kDestinationControlWords,
424 sizeof(kDestinationControlWords) / sizeof(kDestinationControlWords[0]),
425 sizeof(kDestinationControlWords[0]),
426 (int (*)(const void *, const void *))string_array_compare) != NULL)
427 fDestination = OTHER_DESTINATION;
428 }
429
430
431 group_destination
Destination() const432 Group::Destination() const
433 {
434 return fDestination;
435 }
436
437
438 // #pragma mark -
439
440
Header()441 Header::Header()
442 :
443 fVersion(0)
444 {
445 }
446
447
~Header()448 Header::~Header()
449 {
450 }
451
452
453 void
Parse(char first,BDataIO & stream,char & last)454 Header::Parse(char first, BDataIO &stream, char &last)
455 {
456 // The stream has been peeked into by the parser already, and
457 // only the version follows in the stream -- let's pick it up
458
459 fVersion = parse_integer(first, stream, last);
460
461 // recreate "rtf" command to name this group
462
463 Command *command = new Command();
464 command->SetName("rtf");
465 command->SetOption(fVersion);
466
467 AddElement(command);
468 }
469
470
471 int32
Version() const472 Header::Version() const
473 {
474 return fVersion;
475 }
476
477
478 const char *
Charset() const479 Header::Charset() const
480 {
481 Command *command = dynamic_cast<Command *>(ElementAt(1));
482 if (command == NULL)
483 return NULL;
484
485 return command->Name();
486 }
487
488
489 rgb_color
Color(int32 index)490 Header::Color(int32 index)
491 {
492 rgb_color color = {0, 0, 0, 255};
493
494 Group *colorTable = FindGroup("colortbl");
495
496 if (colorTable != NULL) {
497 if (Command *gun = colorTable->FindDefinition("red", index))
498 color.red = gun->Option();
499 if (Command *gun = colorTable->FindDefinition("green", index))
500 color.green = gun->Option();
501 if (Command *gun = colorTable->FindDefinition("blue", index))
502 color.blue = gun->Option();
503 }
504
505 return color;
506 }
507
508
509 // #pragma mark -
510
511
Text()512 Text::Text()
513 {
514 }
515
516
~Text()517 Text::~Text()
518 {
519 SetTo(NULL);
520 }
521
522
523 bool
IsDefinitionDelimiter()524 Text::IsDefinitionDelimiter()
525 {
526 return fText == ";";
527 }
528
529
530 void
Parse(char first,BDataIO & stream,char & last)531 Text::Parse(char first, BDataIO &stream, char &last)
532 {
533 char c = first;
534 if (c == '\0')
535 c = read_char(stream);
536
537 if (c == ';') {
538 // definition delimiter
539 fText.SetTo(";");
540 last = read_char(stream);
541 return;
542 }
543
544 const size_t kBufferSteps = 1;
545 size_t maxSize = kBufferSteps;
546 char *text = fText.LockBuffer(maxSize);
547 if (text == NULL)
548 throw (status_t)B_NO_MEMORY;
549
550 size_t position = 0;
551
552 while (true) {
553 if (c == '\\' || c == '}' || c == '{' || c == ';' || c == '\n' || c == '\r')
554 break;
555
556 if (position >= maxSize) {
557 fText.UnlockBuffer(position);
558 text = fText.LockBuffer(maxSize += kBufferSteps);
559 if (text == NULL)
560 throw (status_t)B_NO_MEMORY;
561 }
562
563 text[position++] = c;
564
565 c = read_char(stream);
566 }
567 fText.UnlockBuffer(position);
568
569 // ToDo: add support for different charsets - right now, only ASCII is supported!
570 // To achieve this, we should just translate everything into UTF-8 here
571
572 last = c;
573 }
574
575
576 status_t
SetTo(const char * text)577 Text::SetTo(const char *text)
578 {
579 return fText.SetTo(text) != NULL ? B_OK : B_NO_MEMORY;
580 }
581
582
583 const char *
String() const584 Text::String() const
585 {
586 return fText.String();
587 }
588
589
590 uint32
Length() const591 Text::Length() const
592 {
593 return fText.Length();
594 }
595
596
597 // #pragma mark -
598
599
Command()600 Command::Command()
601 :
602 fName(NULL),
603 fHasOption(false),
604 fOption(-1)
605 {
606 }
607
608
~Command()609 Command::~Command()
610 {
611 }
612
613
614 void
Parse(char first,BDataIO & stream,char & last)615 Command::Parse(char first, BDataIO &stream, char &last)
616 {
617 if (first == '\0')
618 first = read_char(stream);
619
620 if (first != '\\')
621 throw (status_t)B_BAD_TYPE;
622
623 // get name
624 char name[kCommandLength];
625 size_t length = 0;
626 char c;
627 while (isalpha(c = read_char(stream))) {
628 name[length++] = c;
629 if (length >= kCommandLength - 1)
630 throw (status_t)B_BAD_TYPE;
631 }
632
633 if (length == 0) {
634 if (c == '\n' || c == '\r') {
635 // we're a hard return
636 fName.SetTo("par");
637 } else
638 fName.SetTo(c, 1);
639
640 // read over character
641 c = read_char(stream);
642 } else
643 fName.SetTo(name, length);
644
645 TRACE("command: %s\n", fName.String());
646
647 // parse numeric option
648
649 if (c == '-')
650 c = read_char(stream);
651
652 last = c;
653
654 if (fName == "'") {
655 // hexadecimal
656 char bytes[2];
657 bytes[0] = read_char(stream);
658 bytes[1] = '\0';
659 BMemoryIO memory(bytes, 2);
660
661 SetOption(parse_integer(c, memory, last, 16));
662 last = read_char(stream);
663 } else {
664 // decimal
665 if (isdigit(c))
666 SetOption(parse_integer(c, stream, last));
667
668 // a space delimiter is eaten up by the command
669 if (isspace(last))
670 last = read_char(stream);
671 }
672
673 if (HasOption())
674 TRACE(" option: %ld\n", fOption);
675 }
676
677
678 status_t
SetName(const char * name)679 Command::SetName(const char *name)
680 {
681 return fName.SetTo(name) != NULL ? B_OK : B_NO_MEMORY;
682 }
683
684
685 const char *
Name()686 Command::Name()
687 {
688 return fName.String();
689 }
690
691
692 void
UnsetOption()693 Command::UnsetOption()
694 {
695 fHasOption = false;
696 fOption = -1;
697 }
698
699
700 void
SetOption(int32 option)701 Command::SetOption(int32 option)
702 {
703 fOption = option;
704 fHasOption = true;
705 }
706
707
708 bool
HasOption() const709 Command::HasOption() const
710 {
711 return fHasOption;
712 }
713
714
715 int32
Option() const716 Command::Option() const
717 {
718 return fOption;
719 }
720
721
722 // #pragma mark -
723
724
Iterator(Element & start,group_destination destination)725 Iterator::Iterator(Element &start, group_destination destination)
726 {
727 SetTo(start, destination);
728 }
729
730
731 void
SetTo(Element & start,group_destination destination)732 Iterator::SetTo(Element &start, group_destination destination)
733 {
734 fStart = &start;
735 fDestination = destination;
736
737 Rewind();
738 }
739
740
741 void
Rewind()742 Iterator::Rewind()
743 {
744 fStack.MakeEmpty();
745 fStack.Push(fStart);
746 }
747
748
749 bool
HasNext() const750 Iterator::HasNext() const
751 {
752 return !fStack.IsEmpty();
753 }
754
755
756 Element *
Next()757 Iterator::Next()
758 {
759 Element *element;
760
761 if (!fStack.Pop(&element))
762 return NULL;
763
764 Group *group = dynamic_cast<Group *>(element);
765 if (group != NULL
766 && (fDestination == ALL_DESTINATIONS
767 || fDestination == group->Destination())) {
768 // put this group's children on the stack in
769 // reverse order, so that we iterate over
770 // the tree in in-order
771
772 for (int32 i = group->CountElements(); i-- > 0;) {
773 fStack.Push(group->ElementAt(i));
774 }
775 }
776
777 return element;
778 }
779
780
781 // #pragma mark -
782
783
Worker(RTF::Header & start)784 Worker::Worker(RTF::Header &start)
785 :
786 fStart(start)
787 {
788 }
789
790
~Worker()791 Worker::~Worker()
792 {
793 }
794
795
796 void
Dispatch(Element * element)797 Worker::Dispatch(Element *element)
798 {
799 if (RTF::Group *group = dynamic_cast<RTF::Group *>(element)) {
800 fSkip = false;
801 Group(group);
802
803 if (fSkip)
804 return;
805
806 for (int32 i = 0; (element = group->ElementAt(i)) != NULL; i++)
807 Dispatch(element);
808
809 GroupEnd(group);
810 } else if (RTF::Command *command = dynamic_cast<RTF::Command *>(element)) {
811 Command(command);
812 } else if (RTF::Text *text = dynamic_cast<RTF::Text *>(element)) {
813 Text(text);
814 }
815 }
816
817
818 void
Work()819 Worker::Work()
820 {
821 Dispatch(&fStart);
822 }
823
824
825 void
Group(RTF::Group * group)826 Worker::Group(RTF::Group *group)
827 {
828 }
829
830
831 void
GroupEnd(RTF::Group * group)832 Worker::GroupEnd(RTF::Group *group)
833 {
834 }
835
836
837 void
Command(RTF::Command * command)838 Worker::Command(RTF::Command *command)
839 {
840 }
841
842
843 void
Text(RTF::Text * text)844 Worker::Text(RTF::Text *text)
845 {
846 }
847
848
849 RTF::Header &
Start()850 Worker::Start()
851 {
852 return fStart;
853 }
854
855
856 void
Skip()857 Worker::Skip()
858 {
859 fSkip = true;
860 }
861
862
863 void
Abort(status_t status)864 Worker::Abort(status_t status)
865 {
866 throw status;
867 }
868
869