xref: /haiku/src/add-ons/translators/rtf/RTF.cpp (revision 4c8e85b316c35a9161f5a1c50ad70bc91c83a76f)
1 /*
2  * Copyright 2004-2010, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "RTF.h"
8 
9 #include <ctype.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include <DataIO.h>
15 
16 
17 //#define TRACE_RTF
18 #ifdef TRACE_RTF
19 #	define TRACE(x...) printf(x)
20 #else
21 #	define TRACE(x...) ;
22 #endif
23 
24 
25 static const char *kDestinationControlWords[] = {
26 	"aftncn", "aftnsep", "aftnsepc", "annotation", "atnauthor", "atndate",
27 	"atnicn", "atnid", "atnparent", "atnref", "atntime", "atrfend",
28 	"atrfstart", "author", "background", "bkmkend", "buptim", "colortbl",
29 	"comment", "creatim", "do", "doccomm", "docvar", "fonttbl", "footer",
30 	"footerf", "footerl", "footerr", "footnote", "ftncn", "ftnsep",
31 	"ftnsepc", "header", "headerf", "headerl", "headerr", "info",
32 	"keywords", "operator", "pict", "printim", "private1", "revtim",
33 	"rxe", "stylesheet", "subject", "tc", "title", "txe", "xe",
34 };
35 
36 static char read_char(BDataIO &stream, bool endOfFileAllowed = false);
37 static int32 parse_integer(char first, BDataIO &stream, char &_last, int32 base = 10);
38 
39 
40 using namespace RTF;
41 
42 
43 static char
44 read_char(BDataIO &stream, bool endOfFileAllowed)
45 {
46 	char c;
47 	ssize_t bytesRead = stream.Read(&c, 1);
48 
49 	if (bytesRead < B_OK)
50 		throw (status_t)bytesRead;
51 
52 	if (bytesRead == 0 && !endOfFileAllowed)
53 		throw (status_t)B_ERROR;
54 
55 	return c;
56 }
57 
58 
59 static int32
60 parse_integer(char first, BDataIO &stream, char &_last, int32 base)
61 {
62 	const char *kDigits = "0123456789abcdef";
63 	int32 integer = 0;
64 	int32 count = 0;
65 
66 	char digit = first;
67 
68 	if (digit == '\0')
69 		digit = read_char(stream);
70 
71 	while (true) {
72 		int32 pos = 0;
73 		for (; pos < base; pos++) {
74 			if (kDigits[pos] == tolower(digit)) {
75 				integer = integer * base + pos;
76 				count++;
77 				break;
78 			}
79 		}
80 		if (pos == base) {
81 			_last = digit;
82 			goto out;
83 		}
84 
85 		digit = read_char(stream);
86 	}
87 
88 out:
89 	if (count == 0)
90 		throw (status_t)B_BAD_TYPE;
91 
92 	return integer;
93 }
94 
95 
96 static int
97 string_array_compare(const char *key, const char **array)
98 {
99 	return strcmp(key, array[0]);
100 }
101 
102 
103 static void
104 dump(Element &element, int32 level = 0)
105 {
106 	printf("%03" B_PRId32 " (%p):", level, &element);
107 	for (int32 i = 0; i < level; i++)
108 		printf("  ");
109 
110 	if (RTF::Header *header = dynamic_cast<RTF::Header *>(&element)) {
111 		printf("<RTF header, major version %" B_PRId32 ">\n", header->Version());
112 	} else if (RTF::Command *command = dynamic_cast<RTF::Command *>(&element)) {
113 		printf("<Command: %s", command->Name());
114 		if (command->HasOption())
115 			printf(", Option %" B_PRId32, command->Option());
116 		puts(">");
117 	} else if (RTF::Text *text = dynamic_cast<RTF::Text *>(&element)) {
118 		printf("<Text>");
119 		puts(text->String());
120 	} else if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element))
121 		printf("<Group \"%s\">\n", group->Name());
122 
123 	if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element)) {
124 		for (uint32 i = 0; i < group->CountElements(); i++)
125 			dump(*group->ElementAt(i), level + 1);
126 	}
127 }
128 
129 
130 //	#pragma mark -
131 
132 
133 Parser::Parser(BPositionIO &stream)
134 	:
135 	fStream(&stream, 65536, false),
136 	fIdentified(false)
137 {
138 }
139 
140 
141 status_t
142 Parser::Identify()
143 {
144 	char header[5];
145 	if (fStream.Read(header, sizeof(header)) < (ssize_t)sizeof(header))
146 		return B_IO_ERROR;
147 
148 	if (strncmp(header, "{\\rtf", 5))
149 		return B_BAD_TYPE;
150 
151 	fIdentified = true;
152 	return B_OK;
153 }
154 
155 
156 status_t
157 Parser::Parse(Header &header)
158 {
159 	if (!fIdentified && Identify() != B_OK)
160 		return B_BAD_TYPE;
161 
162 	try {
163 		int32 openBrackets = 1;
164 
165 		// since we already preparsed parts of the RTF header, the header
166 		// is handled here directly
167 		char last;
168 		header.Parse('\0', fStream, last);
169 
170 		Group *parent = &header;
171 		char c = last;
172 
173 		while (true) {
174 			Element *element = NULL;
175 
176 			// we'll just ignore the end of the stream
177 			if (parent == NULL)
178 				return B_OK;
179 
180 			switch (c) {
181 				case '{':
182 					openBrackets++;
183 					parent->AddElement(element = new Group());
184 					parent = static_cast<Group *>(element);
185 					break;
186 
187 				case '\\':
188 					parent->AddElement(element = new Command());
189 					break;
190 
191 				case '}':
192 					openBrackets--;
193 					parent->DetermineDestination();
194 					parent = parent->Parent();
195 					// supposed to fall through
196 				case '\n':
197 				case '\r':
198 				{
199 					ssize_t bytesRead = fStream.Read(&c, 1);
200 					if (bytesRead < B_OK)
201 						throw (status_t)bytesRead;
202 					else if (bytesRead != 1) {
203 						// this is the only valid exit status
204 						if (openBrackets == 0)
205 							return B_OK;
206 
207 						throw (status_t)B_ERROR;
208 					}
209 					continue;
210 				}
211 
212 				default:
213 					parent->AddElement(element = new Text());
214 					break;
215 			}
216 
217 			if (element == NULL)
218 				throw (status_t)B_ERROR;
219 
220 			element->Parse(c, fStream, last);
221 			c = last;
222 		}
223 	} catch (status_t status) {
224 		return status;
225 	}
226 
227 	return B_OK;
228 }
229 
230 
231 //	#pragma mark -
232 
233 
234 Element::Element()
235 	:
236 	fParent(NULL)
237 {
238 }
239 
240 
241 Element::~Element()
242 {
243 }
244 
245 
246 void
247 Element::SetParent(Group *parent)
248 {
249 	fParent = parent;
250 }
251 
252 
253 Group *
254 Element::Parent() const
255 {
256 	return fParent;
257 }
258 
259 
260 bool
261 Element::IsDefinitionDelimiter()
262 {
263 	return false;
264 }
265 
266 
267 void
268 Element::PrintToStream(int32 level)
269 {
270 	dump(*this, level);
271 }
272 
273 
274 //	#pragma mark -
275 
276 
277 Group::Group()
278 	:
279 	fDestination(TEXT_DESTINATION)
280 {
281 }
282 
283 
284 Group::~Group()
285 {
286 	Element *element;
287 	while ((element = (Element *)fElements.RemoveItem((int32)0)) != NULL) {
288 		delete element;
289 	}
290 }
291 
292 
293 void
294 Group::Parse(char first, BDataIO &stream, char &last)
295 {
296 	if (first == '\0')
297 		first = read_char(stream);
298 
299 	if (first != '{')
300 		throw (status_t)B_BAD_TYPE;
301 
302 	last = read_char(stream);
303 }
304 
305 
306 status_t
307 Group::AddElement(Element *element)
308 {
309 	if (element == NULL)
310 		return B_BAD_VALUE;
311 
312 	if (fElements.AddItem(element)) {
313 		element->SetParent(this);
314 		return B_OK;
315 	}
316 
317 	return B_NO_MEMORY;
318 }
319 
320 
321 uint32
322 Group::CountElements() const
323 {
324 	return (uint32)fElements.CountItems();
325 }
326 
327 
328 Element *
329 Group::ElementAt(uint32 index) const
330 {
331 	return static_cast<Element *>(fElements.ItemAt(index));
332 }
333 
334 
335 Element *
336 Group::FindDefinitionStart(int32 index, int32 *_startIndex) const
337 {
338 	if (index < 0)
339 		return NULL;
340 
341 	Element *element;
342 	int32 number = 0;
343 	for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) {
344 		if (number == index) {
345 			if (_startIndex)
346 				*_startIndex = i;
347 			return element;
348 		}
349 
350 		if (element->IsDefinitionDelimiter())
351 			number++;
352 	}
353 
354 	return NULL;
355 }
356 
357 
358 Command *
359 Group::FindDefinition(const char *name, int32 index) const
360 {
361 	int32 startIndex;
362 	Element *element = FindDefinitionStart(index, &startIndex);
363 	if (element == NULL)
364 		return NULL;
365 
366 	for (uint32 i = startIndex; (element = ElementAt(i)) != NULL; i++) {
367 		if (element->IsDefinitionDelimiter())
368 			break;
369 
370 		if (Command *command = dynamic_cast<Command *>(element)) {
371 			if (command != NULL && !strcmp(name, command->Name()))
372 				return command;
373 		}
374 	}
375 
376 	return NULL;
377 }
378 
379 
380 Group *
381 Group::FindGroup(const char *name) const
382 {
383 	Element *element;
384 	for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) {
385 		Group *group = dynamic_cast<Group *>(element);
386 		if (group == NULL)
387 			continue;
388 
389 		Command *command = dynamic_cast<Command *>(group->ElementAt(0));
390 		if (command != NULL && !strcmp(name, command->Name()))
391 			return group;
392 	}
393 
394 	return NULL;
395 }
396 
397 
398 const char *
399 Group::Name() const
400 {
401 	Command *command = dynamic_cast<Command *>(ElementAt(0));
402 	if (command != NULL)
403 		return command->Name();
404 
405 	return NULL;
406 }
407 
408 
409 void
410 Group::DetermineDestination()
411 {
412 	const char *name = Name();
413 	if (name == NULL)
414 		return;
415 
416 	if (!strcmp(name, "*")) {
417 		fDestination = COMMENT_DESTINATION;
418 		return;
419 	}
420 
421 	// binary search for destination control words
422 
423 	if (bsearch(name, kDestinationControlWords,
424 			sizeof(kDestinationControlWords) / sizeof(kDestinationControlWords[0]),
425 			sizeof(kDestinationControlWords[0]),
426 			(int (*)(const void *, const void *))string_array_compare) != NULL)
427 		fDestination = OTHER_DESTINATION;
428 }
429 
430 
431 group_destination
432 Group::Destination() const
433 {
434 	return fDestination;
435 }
436 
437 
438 //	#pragma mark -
439 
440 
441 Header::Header()
442 	:
443 	fVersion(0)
444 {
445 }
446 
447 
448 Header::~Header()
449 {
450 }
451 
452 
453 void
454 Header::Parse(char first, BDataIO &stream, char &last)
455 {
456 	// The stream has been peeked into by the parser already, and
457 	// only the version follows in the stream -- let's pick it up
458 
459 	fVersion = parse_integer(first, stream, last);
460 
461 	// recreate "rtf" command to name this group
462 
463 	Command *command = new Command();
464 	command->SetName("rtf");
465 	command->SetOption(fVersion);
466 
467 	AddElement(command);
468 }
469 
470 
471 int32
472 Header::Version() const
473 {
474 	return fVersion;
475 }
476 
477 
478 const char *
479 Header::Charset() const
480 {
481 	Command *command = dynamic_cast<Command *>(ElementAt(1));
482 	if (command == NULL)
483 		return NULL;
484 
485 	return command->Name();
486 }
487 
488 
489 rgb_color
490 Header::Color(int32 index)
491 {
492 	rgb_color color = {0, 0, 0, 255};
493 
494 	Group *colorTable = FindGroup("colortbl");
495 
496 	if (colorTable != NULL) {
497 		if (Command *gun = colorTable->FindDefinition("red", index))
498 			color.red = gun->Option();
499 		if (Command *gun = colorTable->FindDefinition("green", index))
500 			color.green = gun->Option();
501 		if (Command *gun = colorTable->FindDefinition("blue", index))
502 			color.blue = gun->Option();
503 	}
504 
505 	return color;
506 }
507 
508 
509 //	#pragma mark -
510 
511 
512 Text::Text()
513 {
514 }
515 
516 
517 Text::~Text()
518 {
519 	SetTo(NULL);
520 }
521 
522 
523 bool
524 Text::IsDefinitionDelimiter()
525 {
526 	return fText == ";";
527 }
528 
529 
530 void
531 Text::Parse(char first, BDataIO &stream, char &last)
532 {
533 	char c = first;
534 	if (c == '\0')
535 		c = read_char(stream);
536 
537 	if (c == ';') {
538 		// definition delimiter
539 		fText.SetTo(";");
540 		last = read_char(stream);
541 		return;
542 	}
543 
544 	const size_t kBufferSteps = 1;
545 	size_t maxSize = kBufferSteps;
546 	char *text = fText.LockBuffer(maxSize);
547 	if (text == NULL)
548 		throw (status_t)B_NO_MEMORY;
549 
550 	size_t position = 0;
551 
552 	while (true) {
553 		if (c == '\\' || c == '}' || c == '{' || c == ';' || c == '\n' || c == '\r')
554 			break;
555 
556 		if (position >= maxSize) {
557 			fText.UnlockBuffer(position);
558 			text = fText.LockBuffer(maxSize += kBufferSteps);
559 			if (text == NULL)
560 				throw (status_t)B_NO_MEMORY;
561 		}
562 
563 		text[position++] = c;
564 
565 		c = read_char(stream);
566 	}
567 	fText.UnlockBuffer(position);
568 
569 	// ToDo: add support for different charsets - right now, only ASCII is supported!
570 	//	To achieve this, we should just translate everything into UTF-8 here
571 
572 	last = c;
573 }
574 
575 
576 status_t
577 Text::SetTo(const char *text)
578 {
579 	return fText.SetTo(text) != NULL ? B_OK : B_NO_MEMORY;
580 }
581 
582 
583 const char *
584 Text::String() const
585 {
586 	return fText.String();
587 }
588 
589 
590 uint32
591 Text::Length() const
592 {
593 	return fText.Length();
594 }
595 
596 
597 //	#pragma mark -
598 
599 
600 Command::Command()
601 	:
602 	fName(NULL),
603 	fHasOption(false),
604 	fOption(-1)
605 {
606 }
607 
608 
609 Command::~Command()
610 {
611 }
612 
613 
614 void
615 Command::Parse(char first, BDataIO &stream, char &last)
616 {
617 	if (first == '\0')
618 		first = read_char(stream);
619 
620 	if (first != '\\')
621 		throw (status_t)B_BAD_TYPE;
622 
623 	// get name
624 	char name[kCommandLength];
625 	size_t length = 0;
626 	char c;
627 	while (isalpha(c = read_char(stream))) {
628 		name[length++] = c;
629 		if (length >= kCommandLength - 1)
630 			throw (status_t)B_BAD_TYPE;
631 	}
632 
633 	if (length == 0) {
634 		if (c == '\n' || c == '\r') {
635 			// we're a hard return
636 			fName.SetTo("par");
637 		} else
638 			fName.SetTo(c, 1);
639 
640 		// read over character
641 		c = read_char(stream);
642 	} else
643 		fName.SetTo(name, length);
644 
645 	TRACE("command: %s\n", fName.String());
646 
647 	// parse numeric option
648 
649 	if (c == '-')
650 		c = read_char(stream);
651 
652 	last = c;
653 
654 	if (fName == "'") {
655 		// hexadecimal
656 		char bytes[2];
657 		bytes[0] = read_char(stream);
658 		bytes[1] = '\0';
659 		BMemoryIO memory(bytes, 2);
660 
661 		SetOption(parse_integer(c, memory, last, 16));
662 		last = read_char(stream);
663 	} else {
664 		// decimal
665 		if (isdigit(c))
666 			SetOption(parse_integer(c, stream, last));
667 
668 		// a space delimiter is eaten up by the command
669 		if (isspace(last))
670 			last = read_char(stream);
671 	}
672 
673 	if (HasOption())
674 		TRACE("  option: %ld\n", fOption);
675 }
676 
677 
678 status_t
679 Command::SetName(const char *name)
680 {
681 	return fName.SetTo(name) != NULL ? B_OK : B_NO_MEMORY;
682 }
683 
684 
685 const char *
686 Command::Name()
687 {
688 	return fName.String();
689 }
690 
691 
692 void
693 Command::UnsetOption()
694 {
695 	fHasOption = false;
696 	fOption = -1;
697 }
698 
699 
700 void
701 Command::SetOption(int32 option)
702 {
703 	fOption = option;
704 	fHasOption = true;
705 }
706 
707 
708 bool
709 Command::HasOption() const
710 {
711 	return fHasOption;
712 }
713 
714 
715 int32
716 Command::Option() const
717 {
718 	return fOption;
719 }
720 
721 
722 //	#pragma mark -
723 
724 
725 Iterator::Iterator(Element &start, group_destination destination)
726 {
727 	SetTo(start, destination);
728 }
729 
730 
731 void
732 Iterator::SetTo(Element &start, group_destination destination)
733 {
734 	fStart = &start;
735 	fDestination = destination;
736 
737 	Rewind();
738 }
739 
740 
741 void
742 Iterator::Rewind()
743 {
744 	fStack.MakeEmpty();
745 	fStack.Push(fStart);
746 }
747 
748 
749 bool
750 Iterator::HasNext() const
751 {
752 	return !fStack.IsEmpty();
753 }
754 
755 
756 Element *
757 Iterator::Next()
758 {
759 	Element *element;
760 
761 	if (!fStack.Pop(&element))
762 		return NULL;
763 
764 	Group *group = dynamic_cast<Group *>(element);
765 	if (group != NULL
766 		&& (fDestination == ALL_DESTINATIONS
767 			|| fDestination == group->Destination())) {
768 		// put this group's children on the stack in
769 		// reverse order, so that we iterate over
770 		// the tree in in-order
771 
772 		for (int32 i = group->CountElements(); i-- > 0;) {
773 			fStack.Push(group->ElementAt(i));
774 		}
775 	}
776 
777 	return element;
778 }
779 
780 
781 //	#pragma mark -
782 
783 
784 Worker::Worker(RTF::Header &start)
785 	:
786 	fStart(start)
787 {
788 }
789 
790 
791 Worker::~Worker()
792 {
793 }
794 
795 
796 void
797 Worker::Dispatch(Element *element)
798 {
799 	if (RTF::Group *group = dynamic_cast<RTF::Group *>(element)) {
800 		fSkip = false;
801 		Group(group);
802 
803 		if (fSkip)
804 			return;
805 
806 		for (int32 i = 0; (element = group->ElementAt(i)) != NULL; i++)
807 			Dispatch(element);
808 
809 		GroupEnd(group);
810 	} else if (RTF::Command *command = dynamic_cast<RTF::Command *>(element)) {
811 		Command(command);
812 	} else if (RTF::Text *text = dynamic_cast<RTF::Text *>(element)) {
813 		Text(text);
814 	}
815 }
816 
817 
818 void
819 Worker::Work()
820 {
821 	Dispatch(&fStart);
822 }
823 
824 
825 void
826 Worker::Group(RTF::Group *group)
827 {
828 }
829 
830 
831 void
832 Worker::GroupEnd(RTF::Group *group)
833 {
834 }
835 
836 
837 void
838 Worker::Command(RTF::Command *command)
839 {
840 }
841 
842 
843 void
844 Worker::Text(RTF::Text *text)
845 {
846 }
847 
848 
849 RTF::Header &
850 Worker::Start()
851 {
852 	return fStart;
853 }
854 
855 
856 void
857 Worker::Skip()
858 {
859 	fSkip = true;
860 }
861 
862 
863 void
864 Worker::Abort(status_t status)
865 {
866 	throw status;
867 }
868 
869