xref: /haiku/src/add-ons/translators/rtf/RTF.cpp (revision cbe0a0c436162d78cc3f92a305b64918c839d079)
1 /*
2  * Copyright 2004-2010, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "RTF.h"
8 
9 #include <ctype.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 
14 #include <DataIO.h>
15 
16 
17 //#define TRACE_RTF
18 #ifdef TRACE_RTF
19 #	define TRACE(x...) printf(x)
20 #else
21 #	define TRACE(x...) ;
22 #endif
23 
24 
25 static const char *kDestinationControlWords[] = {
26 	"aftncn", "aftnsep", "aftnsepc", "annotation", "atnauthor", "atndate",
27 	"atnicn", "atnid", "atnparent", "atnref", "atntime", "atrfend",
28 	"atrfstart", "author", "background", "bkmkend", "buptim", "colortbl",
29 	"comment", "creatim", "do", "doccomm", "docvar", "fonttbl", "footer",
30 	"footerf", "footerl", "footerr", "footnote", "ftncn", "ftnsep",
31 	"ftnsepc", "header", "headerf", "headerl", "headerr", "info",
32 	"keywords", "operator", "pict", "printim", "private1", "revtim",
33 	"rxe", "stylesheet", "subject", "tc", "title", "txe", "xe",
34 };
35 
36 static char read_char(BDataIO &stream, bool endOfFileAllowed = false) throw (status_t);
37 static int32 parse_integer(char first, BDataIO &stream, char &_last, int32 base = 10) throw (status_t);
38 
39 
40 using namespace RTF;
41 
42 
43 static char
44 read_char(BDataIO &stream, bool endOfFileAllowed) throw (status_t)
45 {
46 	char c;
47 	ssize_t bytesRead = stream.Read(&c, 1);
48 
49 	if (bytesRead < B_OK)
50 		throw (status_t)bytesRead;
51 
52 	if (bytesRead == 0 && !endOfFileAllowed)
53 		throw (status_t)B_ERROR;
54 
55 	return c;
56 }
57 
58 
59 static int32
60 parse_integer(char first, BDataIO &stream, char &_last, int32 base)
61 	throw (status_t)
62 {
63 	const char *kDigits = "0123456789abcdef";
64 	int32 integer = 0;
65 	int32 count = 0;
66 
67 	char digit = first;
68 
69 	if (digit == '\0')
70 		digit = read_char(stream);
71 
72 	while (true) {
73 		int32 pos = 0;
74 		for (; pos < base; pos++) {
75 			if (kDigits[pos] == tolower(digit)) {
76 				integer = integer * base + pos;
77 				count++;
78 				break;
79 			}
80 		}
81 		if (pos == base) {
82 			_last = digit;
83 			goto out;
84 		}
85 
86 		digit = read_char(stream);
87 	}
88 
89 out:
90 	if (count == 0)
91 		throw (status_t)B_BAD_TYPE;
92 
93 	return integer;
94 }
95 
96 
97 static int
98 string_array_compare(const char *key, const char **array)
99 {
100 	return strcmp(key, array[0]);
101 }
102 
103 
104 static void
105 dump(Element &element, int32 level = 0)
106 {
107 	printf("%03" B_PRId32 " (%p):", level, &element);
108 	for (int32 i = 0; i < level; i++)
109 		printf("  ");
110 
111 	if (RTF::Header *header = dynamic_cast<RTF::Header *>(&element)) {
112 		printf("<RTF header, major version %" B_PRId32 ">\n", header->Version());
113 	} else if (RTF::Command *command = dynamic_cast<RTF::Command *>(&element)) {
114 		printf("<Command: %s", command->Name());
115 		if (command->HasOption())
116 			printf(", Option %" B_PRId32, command->Option());
117 		puts(">");
118 	} else if (RTF::Text *text = dynamic_cast<RTF::Text *>(&element)) {
119 		printf("<Text>");
120 		puts(text->String());
121 	} else if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element))
122 		printf("<Group \"%s\">\n", group->Name());
123 
124 	if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element)) {
125 		for (uint32 i = 0; i < group->CountElements(); i++)
126 			dump(*group->ElementAt(i), level + 1);
127 	}
128 }
129 
130 
131 //	#pragma mark -
132 
133 
134 Parser::Parser(BPositionIO &stream)
135 	:
136 	fStream(&stream, 65536, false),
137 	fIdentified(false)
138 {
139 }
140 
141 
142 status_t
143 Parser::Identify()
144 {
145 	char header[5];
146 	if (fStream.Read(header, sizeof(header)) < (ssize_t)sizeof(header))
147 		return B_IO_ERROR;
148 
149 	if (strncmp(header, "{\\rtf", 5))
150 		return B_BAD_TYPE;
151 
152 	fIdentified = true;
153 	return B_OK;
154 }
155 
156 
157 status_t
158 Parser::Parse(Header &header)
159 {
160 	if (!fIdentified && Identify() != B_OK)
161 		return B_BAD_TYPE;
162 
163 	try {
164 		int32 openBrackets = 1;
165 
166 		// since we already preparsed parts of the RTF header, the header
167 		// is handled here directly
168 		char last;
169 		header.Parse('\0', fStream, last);
170 
171 		Group *parent = &header;
172 		char c = last;
173 
174 		while (true) {
175 			Element *element = NULL;
176 
177 			// we'll just ignore the end of the stream
178 			if (parent == NULL)
179 				return B_OK;
180 
181 			switch (c) {
182 				case '{':
183 					openBrackets++;
184 					parent->AddElement(element = new Group());
185 					parent = static_cast<Group *>(element);
186 					break;
187 
188 				case '\\':
189 					parent->AddElement(element = new Command());
190 					break;
191 
192 				case '}':
193 					openBrackets--;
194 					parent->DetermineDestination();
195 					parent = parent->Parent();
196 					// supposed to fall through
197 				case '\n':
198 				case '\r':
199 				{
200 					ssize_t bytesRead = fStream.Read(&c, 1);
201 					if (bytesRead < B_OK)
202 						throw (status_t)bytesRead;
203 					else if (bytesRead != 1) {
204 						// this is the only valid exit status
205 						if (openBrackets == 0)
206 							return B_OK;
207 
208 						throw (status_t)B_ERROR;
209 					}
210 					continue;
211 				}
212 
213 				default:
214 					parent->AddElement(element = new Text());
215 					break;
216 			}
217 
218 			if (element == NULL)
219 				throw (status_t)B_ERROR;
220 
221 			element->Parse(c, fStream, last);
222 			c = last;
223 		}
224 	} catch (status_t status) {
225 		return status;
226 	}
227 
228 	return B_OK;
229 }
230 
231 
232 //	#pragma mark -
233 
234 
235 Element::Element()
236 	:
237 	fParent(NULL)
238 {
239 }
240 
241 
242 Element::~Element()
243 {
244 }
245 
246 
247 void
248 Element::SetParent(Group *parent)
249 {
250 	fParent = parent;
251 }
252 
253 
254 Group *
255 Element::Parent() const
256 {
257 	return fParent;
258 }
259 
260 
261 bool
262 Element::IsDefinitionDelimiter()
263 {
264 	return false;
265 }
266 
267 
268 void
269 Element::PrintToStream(int32 level)
270 {
271 	dump(*this, level);
272 }
273 
274 
275 //	#pragma mark -
276 
277 
278 Group::Group()
279 	:
280 	fDestination(TEXT_DESTINATION)
281 {
282 }
283 
284 
285 Group::~Group()
286 {
287 	Element *element;
288 	while ((element = (Element *)fElements.RemoveItem((int32)0)) != NULL) {
289 		delete element;
290 	}
291 }
292 
293 
294 void
295 Group::Parse(char first, BDataIO &stream, char &last) throw (status_t)
296 {
297 	if (first == '\0')
298 		first = read_char(stream);
299 
300 	if (first != '{')
301 		throw (status_t)B_BAD_TYPE;
302 
303 	last = read_char(stream);
304 }
305 
306 
307 status_t
308 Group::AddElement(Element *element)
309 {
310 	if (element == NULL)
311 		return B_BAD_VALUE;
312 
313 	if (fElements.AddItem(element)) {
314 		element->SetParent(this);
315 		return B_OK;
316 	}
317 
318 	return B_NO_MEMORY;
319 }
320 
321 
322 uint32
323 Group::CountElements() const
324 {
325 	return (uint32)fElements.CountItems();
326 }
327 
328 
329 Element *
330 Group::ElementAt(uint32 index) const
331 {
332 	return static_cast<Element *>(fElements.ItemAt(index));
333 }
334 
335 
336 Element *
337 Group::FindDefinitionStart(int32 index, int32 *_startIndex) const
338 {
339 	if (index < 0)
340 		return NULL;
341 
342 	Element *element;
343 	int32 number = 0;
344 	for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) {
345 		if (number == index) {
346 			if (_startIndex)
347 				*_startIndex = i;
348 			return element;
349 		}
350 
351 		if (element->IsDefinitionDelimiter())
352 			number++;
353 	}
354 
355 	return NULL;
356 }
357 
358 
359 Command *
360 Group::FindDefinition(const char *name, int32 index) const
361 {
362 	int32 startIndex;
363 	Element *element = FindDefinitionStart(index, &startIndex);
364 	if (element == NULL)
365 		return NULL;
366 
367 	for (uint32 i = startIndex; (element = ElementAt(i)) != NULL; i++) {
368 		if (element->IsDefinitionDelimiter())
369 			break;
370 
371 		if (Command *command = dynamic_cast<Command *>(element)) {
372 			if (command != NULL && !strcmp(name, command->Name()))
373 				return command;
374 		}
375 	}
376 
377 	return NULL;
378 }
379 
380 
381 Group *
382 Group::FindGroup(const char *name) const
383 {
384 	Element *element;
385 	for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) {
386 		Group *group = dynamic_cast<Group *>(element);
387 		if (group == NULL)
388 			continue;
389 
390 		Command *command = dynamic_cast<Command *>(group->ElementAt(0));
391 		if (command != NULL && !strcmp(name, command->Name()))
392 			return group;
393 	}
394 
395 	return NULL;
396 }
397 
398 
399 const char *
400 Group::Name() const
401 {
402 	Command *command = dynamic_cast<Command *>(ElementAt(0));
403 	if (command != NULL)
404 		return command->Name();
405 
406 	return NULL;
407 }
408 
409 
410 void
411 Group::DetermineDestination()
412 {
413 	const char *name = Name();
414 	if (name == NULL)
415 		return;
416 
417 	if (!strcmp(name, "*")) {
418 		fDestination = COMMENT_DESTINATION;
419 		return;
420 	}
421 
422 	// binary search for destination control words
423 
424 	if (bsearch(name, kDestinationControlWords,
425 			sizeof(kDestinationControlWords) / sizeof(kDestinationControlWords[0]),
426 			sizeof(kDestinationControlWords[0]),
427 			(int (*)(const void *, const void *))string_array_compare) != NULL)
428 		fDestination = OTHER_DESTINATION;
429 }
430 
431 
432 group_destination
433 Group::Destination() const
434 {
435 	return fDestination;
436 }
437 
438 
439 //	#pragma mark -
440 
441 
442 Header::Header()
443 	:
444 	fVersion(0)
445 {
446 }
447 
448 
449 Header::~Header()
450 {
451 }
452 
453 
454 void
455 Header::Parse(char first, BDataIO &stream, char &last) throw (status_t)
456 {
457 	// The stream has been peeked into by the parser already, and
458 	// only the version follows in the stream -- let's pick it up
459 
460 	fVersion = parse_integer(first, stream, last);
461 
462 	// recreate "rtf" command to name this group
463 
464 	Command *command = new Command();
465 	command->SetName("rtf");
466 	command->SetOption(fVersion);
467 
468 	AddElement(command);
469 }
470 
471 
472 int32
473 Header::Version() const
474 {
475 	return fVersion;
476 }
477 
478 
479 const char *
480 Header::Charset() const
481 {
482 	Command *command = dynamic_cast<Command *>(ElementAt(1));
483 	if (command == NULL)
484 		return NULL;
485 
486 	return command->Name();
487 }
488 
489 
490 rgb_color
491 Header::Color(int32 index)
492 {
493 	rgb_color color = {0, 0, 0, 255};
494 
495 	Group *colorTable = FindGroup("colortbl");
496 
497 	if (colorTable != NULL) {
498 		if (Command *gun = colorTable->FindDefinition("red", index))
499 			color.red = gun->Option();
500 		if (Command *gun = colorTable->FindDefinition("green", index))
501 			color.green = gun->Option();
502 		if (Command *gun = colorTable->FindDefinition("blue", index))
503 			color.blue = gun->Option();
504 	}
505 
506 	return color;
507 }
508 
509 
510 //	#pragma mark -
511 
512 
513 Text::Text()
514 {
515 }
516 
517 
518 Text::~Text()
519 {
520 	SetTo(NULL);
521 }
522 
523 
524 bool
525 Text::IsDefinitionDelimiter()
526 {
527 	return fText == ";";
528 }
529 
530 
531 void
532 Text::Parse(char first, BDataIO &stream, char &last) throw (status_t)
533 {
534 	char c = first;
535 	if (c == '\0')
536 		c = read_char(stream);
537 
538 	if (c == ';') {
539 		// definition delimiter
540 		fText.SetTo(";");
541 		last = read_char(stream);
542 		return;
543 	}
544 
545 	const size_t kBufferSteps = 1;
546 	size_t maxSize = kBufferSteps;
547 	char *text = fText.LockBuffer(maxSize);
548 	if (text == NULL)
549 		throw (status_t)B_NO_MEMORY;
550 
551 	size_t position = 0;
552 
553 	while (true) {
554 		if (c == '\\' || c == '}' || c == '{' || c == ';' || c == '\n' || c == '\r')
555 			break;
556 
557 		if (position >= maxSize) {
558 			fText.UnlockBuffer(position);
559 			text = fText.LockBuffer(maxSize += kBufferSteps);
560 			if (text == NULL)
561 				throw (status_t)B_NO_MEMORY;
562 		}
563 
564 		text[position++] = c;
565 
566 		c = read_char(stream);
567 	}
568 	fText.UnlockBuffer(position);
569 
570 	// ToDo: add support for different charsets - right now, only ASCII is supported!
571 	//	To achieve this, we should just translate everything into UTF-8 here
572 
573 	last = c;
574 }
575 
576 
577 status_t
578 Text::SetTo(const char *text)
579 {
580 	return fText.SetTo(text) != NULL ? B_OK : B_NO_MEMORY;
581 }
582 
583 
584 const char *
585 Text::String() const
586 {
587 	return fText.String();
588 }
589 
590 
591 uint32
592 Text::Length() const
593 {
594 	return fText.Length();
595 }
596 
597 
598 //	#pragma mark -
599 
600 
601 Command::Command()
602 	:
603 	fName(NULL),
604 	fHasOption(false),
605 	fOption(-1)
606 {
607 }
608 
609 
610 Command::~Command()
611 {
612 }
613 
614 
615 void
616 Command::Parse(char first, BDataIO &stream, char &last) throw (status_t)
617 {
618 	if (first == '\0')
619 		first = read_char(stream);
620 
621 	if (first != '\\')
622 		throw (status_t)B_BAD_TYPE;
623 
624 	// get name
625 	char name[kCommandLength];
626 	size_t length = 0;
627 	char c;
628 	while (isalpha(c = read_char(stream))) {
629 		name[length++] = c;
630 		if (length >= kCommandLength - 1)
631 			throw (status_t)B_BAD_TYPE;
632 	}
633 
634 	if (length == 0) {
635 		if (c == '\n' || c == '\r') {
636 			// we're a hard return
637 			fName.SetTo("par");
638 		} else
639 			fName.SetTo(c, 1);
640 
641 		// read over character
642 		c = read_char(stream);
643 	} else
644 		fName.SetTo(name, length);
645 
646 	TRACE("command: %s\n", fName.String());
647 
648 	// parse numeric option
649 
650 	if (c == '-')
651 		c = read_char(stream);
652 
653 	last = c;
654 
655 	if (fName == "'") {
656 		// hexadecimal
657 		char bytes[2];
658 		bytes[0] = read_char(stream);
659 		bytes[1] = '\0';
660 		BMemoryIO memory(bytes, 2);
661 
662 		SetOption(parse_integer(c, memory, last, 16));
663 		last = read_char(stream);
664 	} else {
665 		// decimal
666 		if (isdigit(c))
667 			SetOption(parse_integer(c, stream, last));
668 
669 		// a space delimiter is eaten up by the command
670 		if (isspace(last))
671 			last = read_char(stream);
672 	}
673 
674 	if (HasOption())
675 		TRACE("  option: %ld\n", fOption);
676 }
677 
678 
679 status_t
680 Command::SetName(const char *name)
681 {
682 	return fName.SetTo(name) != NULL ? B_OK : B_NO_MEMORY;
683 }
684 
685 
686 const char *
687 Command::Name()
688 {
689 	return fName.String();
690 }
691 
692 
693 void
694 Command::UnsetOption()
695 {
696 	fHasOption = false;
697 	fOption = -1;
698 }
699 
700 
701 void
702 Command::SetOption(int32 option)
703 {
704 	fOption = option;
705 	fHasOption = true;
706 }
707 
708 
709 bool
710 Command::HasOption() const
711 {
712 	return fHasOption;
713 }
714 
715 
716 int32
717 Command::Option() const
718 {
719 	return fOption;
720 }
721 
722 
723 //	#pragma mark -
724 
725 
726 Iterator::Iterator(Element &start, group_destination destination)
727 {
728 	SetTo(start, destination);
729 }
730 
731 
732 void
733 Iterator::SetTo(Element &start, group_destination destination)
734 {
735 	fStart = &start;
736 	fDestination = destination;
737 
738 	Rewind();
739 }
740 
741 
742 void
743 Iterator::Rewind()
744 {
745 	fStack.MakeEmpty();
746 	fStack.Push(fStart);
747 }
748 
749 
750 bool
751 Iterator::HasNext() const
752 {
753 	return !fStack.IsEmpty();
754 }
755 
756 
757 Element *
758 Iterator::Next()
759 {
760 	Element *element;
761 
762 	if (!fStack.Pop(&element))
763 		return NULL;
764 
765 	Group *group = dynamic_cast<Group *>(element);
766 	if (group != NULL
767 		&& (fDestination == ALL_DESTINATIONS
768 			|| fDestination == group->Destination())) {
769 		// put this group's children on the stack in
770 		// reverse order, so that we iterate over
771 		// the tree in in-order
772 
773 		for (int32 i = group->CountElements(); i-- > 0;) {
774 			fStack.Push(group->ElementAt(i));
775 		}
776 	}
777 
778 	return element;
779 }
780 
781 
782 //	#pragma mark -
783 
784 
785 Worker::Worker(RTF::Header &start)
786 	:
787 	fStart(start)
788 {
789 }
790 
791 
792 Worker::~Worker()
793 {
794 }
795 
796 
797 void
798 Worker::Dispatch(Element *element)
799 {
800 	if (RTF::Group *group = dynamic_cast<RTF::Group *>(element)) {
801 		fSkip = false;
802 		Group(group);
803 
804 		if (fSkip)
805 			return;
806 
807 		for (int32 i = 0; (element = group->ElementAt(i)) != NULL; i++)
808 			Dispatch(element);
809 
810 		GroupEnd(group);
811 	} else if (RTF::Command *command = dynamic_cast<RTF::Command *>(element)) {
812 		Command(command);
813 	} else if (RTF::Text *text = dynamic_cast<RTF::Text *>(element)) {
814 		Text(text);
815 	}
816 }
817 
818 
819 void
820 Worker::Work() throw (status_t)
821 {
822 	Dispatch(&fStart);
823 }
824 
825 
826 void
827 Worker::Group(RTF::Group *group)
828 {
829 }
830 
831 
832 void
833 Worker::GroupEnd(RTF::Group *group)
834 {
835 }
836 
837 
838 void
839 Worker::Command(RTF::Command *command)
840 {
841 }
842 
843 
844 void
845 Worker::Text(RTF::Text *text)
846 {
847 }
848 
849 
850 RTF::Header &
851 Worker::Start()
852 {
853 	return fStart;
854 }
855 
856 
857 void
858 Worker::Skip()
859 {
860 	fSkip = true;
861 }
862 
863 
864 void
865 Worker::Abort(status_t status)
866 {
867 	throw status;
868 }
869 
870