xref: /haiku/src/add-ons/translators/rtf/RTF.cpp (revision 1e36cfc2721ef13a187c6f7354dc9cbc485e89d3)
1 /*
2  * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "RTF.h"
8 
9 #include <DataIO.h>
10 
11 #include <stdlib.h>
12 #include <stdio.h>
13 #include <string.h>
14 #include <ctype.h>
15 
16 
17 static const char *kDestinationControlWords[] = {
18 	"aftncn", "aftnsep", "aftnsepc", "annotation", "atnauthor", "atndate",
19 	"atnicn", "atnid", "atnparent", "atnref", "atntime", "atrfend",
20 	"atrfstart", "author", "background", "bkmkend", "buptim", "colortbl",
21 	"comment", "creatim", "do", "doccomm", "docvar", "fonttbl", "footer",
22 	"footerf", "footerl", "footerr", "footnote", "ftncn", "ftnsep",
23 	"ftnsepc", "header", "headerf", "headerl", "headerr", "info",
24 	"keywords", "operator", "pict", "printim", "private1", "revtim",
25 	"rxe", "stylesheet", "subject", "tc", "title", "txe", "xe",
26 };
27 
28 static char read_char(BDataIO &stream, bool endOfFileAllowed = false) throw (status_t);
29 static int32 parse_integer(char first, BDataIO &stream, char &_last, int32 base = 10) throw (status_t);
30 
31 
32 using namespace RTF;
33 
34 
35 static char
36 read_char(BDataIO &stream, bool endOfFileAllowed) throw (status_t)
37 {
38 	char c;
39 	ssize_t bytesRead = stream.Read(&c, 1);
40 
41 	if (bytesRead < B_OK)
42 		throw (status_t)bytesRead;
43 
44 	if (bytesRead == 0 && !endOfFileAllowed)
45 		throw (status_t)B_ERROR;
46 
47 	return c;
48 }
49 
50 
51 static int32
52 parse_integer(char first, BDataIO &stream, char &_last, int32 base) throw (status_t)
53 {
54 	const char *kDigits = "0123456789abcdef";
55 	int32 integer = 0;
56 	int32 count = 0;
57 
58 	char digit = first;
59 
60 	if (digit == '\0')
61 		digit = read_char(stream);
62 
63 	while (true) {
64 		int32 pos = 0;
65 		for (; pos < base; pos++) {
66 			if (kDigits[pos] == digit) {
67 				integer = integer * base + pos;
68 				count++;
69 				break;
70 			}
71 		}
72 		if (pos == base) {
73 			_last = digit;
74 			goto out;
75 		}
76 
77 		digit = read_char(stream);
78 	}
79 
80 out:
81 	if (count == 0)
82 		throw (status_t)B_BAD_TYPE;
83 
84 	return integer;
85 }
86 
87 
88 static int
89 string_array_compare(const char *key, const char **array)
90 {
91 	return strcmp(key, array[0]);
92 }
93 
94 
95 static void
96 dump(Element &element, int32 level = 0)
97 {
98 	printf("%03ld (%p):", level, &element);
99 	for (int32 i = 0; i < level; i++)
100 		printf("  ");
101 
102 	if (RTF::Header *header = dynamic_cast<RTF::Header *>(&element)) {
103 		printf("<RTF header, major version %ld>\n", header->Version());
104 	} else if (RTF::Command *command = dynamic_cast<RTF::Command *>(&element)) {
105 		printf("<Command: %s", command->Name());
106 		if (command->HasOption())
107 			printf(", Option %ld", command->Option());
108 		puts(">");
109 	} else if (RTF::Text *text = dynamic_cast<RTF::Text *>(&element)) {
110 		printf("<Text>");
111 		puts(text->String());
112 	} else if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element))
113 		printf("<Group \"%s\">\n", group->Name());
114 
115 	if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element)) {
116 		for (uint32 i = 0; i < group->CountElements(); i++)
117 			dump(*group->ElementAt(i), level + 1);
118 	}
119 }
120 
121 
122 //	#pragma mark -
123 
124 
125 Parser::Parser(BPositionIO &stream)
126 	:
127 	fStream(&stream, 65536, false),
128 	fIdentified(false)
129 {
130 }
131 
132 
133 status_t
134 Parser::Identify()
135 {
136 	char header[5];
137 	if (fStream.Read(header, sizeof(header)) < (ssize_t)sizeof(header))
138 		return B_IO_ERROR;
139 
140 	if (strncmp(header, "{\\rtf", 5))
141 		return B_BAD_TYPE;
142 
143 	fIdentified = true;
144 	return B_OK;
145 }
146 
147 
148 status_t
149 Parser::Parse(Header &header)
150 {
151 	if (!fIdentified && Identify() != B_OK)
152 		return B_BAD_TYPE;
153 
154 	try {
155 		int32 openBrackets = 1;
156 
157 		// since we already preparsed parts of the RTF header, the header
158 		// is handled here directly
159 		char last;
160 		header.Parse('\0', fStream, last);
161 
162 		Group *parent = &header;
163 		char c = last;
164 
165 		while (true) {
166 			Element *element = NULL;
167 
168 			// we'll just ignore the end of the stream
169 			if (parent == NULL)
170 				return B_OK;
171 
172 			switch (c) {
173 				case '{':
174 					openBrackets++;
175 					parent->AddElement(element = new Group());
176 					parent = static_cast<Group *>(element);
177 					break;
178 
179 				case '\\':
180 					parent->AddElement(element = new Command());
181 					break;
182 
183 				case '}':
184 					openBrackets--;
185 					parent->DetermineDestination();
186 					parent = parent->Parent();
187 					// supposed to fall through
188 				case '\n':
189 				case '\r':
190 				{
191 					ssize_t bytesRead = fStream.Read(&c, 1);
192 					if (bytesRead < B_OK)
193 						throw (status_t)bytesRead;
194 					else if (bytesRead != 1) {
195 						// this is the only valid exit status
196 						if (openBrackets == 0)
197 							return B_OK;
198 
199 						throw B_ERROR;
200 					}
201 					continue;
202 				}
203 
204 				default:
205 					parent->AddElement(element = new Text());
206 					break;
207 			}
208 
209 			if (element == NULL)
210 				throw (status_t)B_ERROR;
211 
212 			element->Parse(c, fStream, last);
213 			c = last;
214 		}
215 	} catch (status_t status) {
216 		return status;
217 	}
218 
219 	return B_OK;
220 }
221 
222 
223 //	#pragma mark -
224 
225 
226 Element::Element()
227 	:
228 	fParent(NULL)
229 {
230 }
231 
232 
233 Element::~Element()
234 {
235 }
236 
237 
238 void
239 Element::SetParent(Group *parent)
240 {
241 	fParent = parent;
242 }
243 
244 
245 Group *
246 Element::Parent() const
247 {
248 	return fParent;
249 }
250 
251 
252 bool
253 Element::IsDefinitionDelimiter()
254 {
255 	return false;
256 }
257 
258 
259 void
260 Element::PrintToStream(int32 level)
261 {
262 	dump(*this, level);
263 }
264 
265 
266 //	#pragma mark -
267 
268 
269 Group::Group()
270 	:
271 	fDestination(TEXT_DESTINATION)
272 {
273 }
274 
275 
276 Group::~Group()
277 {
278 	Element *element;
279 	while ((element = (Element *)fElements.RemoveItem(0L)) != NULL) {
280 		delete element;
281 	}
282 }
283 
284 
285 void
286 Group::Parse(char first, BDataIO &stream, char &last) throw (status_t)
287 {
288 	if (first == '\0')
289 		first = read_char(stream);
290 
291 	if (first != '{')
292 		throw (status_t)B_BAD_TYPE;
293 
294 	last = read_char(stream);
295 }
296 
297 
298 status_t
299 Group::AddElement(Element *element)
300 {
301 	if (element == NULL)
302 		return B_BAD_VALUE;
303 
304 	if (fElements.AddItem(element)) {
305 		element->SetParent(this);
306 		return B_OK;
307 	}
308 
309 	return B_NO_MEMORY;
310 }
311 
312 
313 uint32
314 Group::CountElements() const
315 {
316 	return (uint32)fElements.CountItems();
317 }
318 
319 
320 Element *
321 Group::ElementAt(uint32 index) const
322 {
323 	return static_cast<Element *>(fElements.ItemAt(index));
324 }
325 
326 
327 Element *
328 Group::FindDefinitionStart(int32 index, int32 *_startIndex) const
329 {
330 	if (index < 0)
331 		return NULL;
332 
333 	Element *element;
334 	int32 number = 0;
335 	for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) {
336 		if (number == index) {
337 			if (_startIndex)
338 				*_startIndex = i;
339 			return element;
340 		}
341 
342 		if (element->IsDefinitionDelimiter())
343 			number++;
344 	}
345 
346 	return NULL;
347 }
348 
349 
350 Command *
351 Group::FindDefinition(const char *name, int32 index) const
352 {
353 	int32 startIndex;
354 	Element *element = FindDefinitionStart(index, &startIndex);
355 	if (element == NULL)
356 		return NULL;
357 
358 	for (uint32 i = startIndex; (element = ElementAt(i)) != NULL; i++) {
359 		if (element->IsDefinitionDelimiter())
360 			break;
361 
362 		if (Command *command = dynamic_cast<Command *>(element)) {
363 			if (command != NULL && !strcmp(name, command->Name()))
364 				return command;
365 		}
366 	}
367 
368 	return NULL;
369 }
370 
371 
372 Group *
373 Group::FindGroup(const char *name) const
374 {
375 	Element *element;
376 	for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) {
377 		Group *group = dynamic_cast<Group *>(element);
378 		if (group == NULL)
379 			continue;
380 
381 		Command *command = dynamic_cast<Command *>(group->ElementAt(0));
382 		if (command != NULL && !strcmp(name, command->Name()))
383 			return group;
384 	}
385 
386 	return NULL;
387 }
388 
389 
390 const char *
391 Group::Name() const
392 {
393 	Command *command = dynamic_cast<Command *>(ElementAt(0));
394 	if (command != NULL)
395 		return command->Name();
396 
397 	return NULL;
398 }
399 
400 
401 void
402 Group::DetermineDestination()
403 {
404 	const char *name = Name();
405 	if (name == NULL)
406 		return;
407 
408 	if (!strcmp(name, "*")) {
409 		fDestination = COMMENT_DESTINATION;
410 		return;
411 	}
412 
413 	// binary search for destination control words
414 
415 	if (bsearch(name, kDestinationControlWords,
416 			sizeof(kDestinationControlWords) / sizeof(kDestinationControlWords[0]),
417 			sizeof(kDestinationControlWords[0]),
418 			(int (*)(const void *, const void *))string_array_compare) != NULL)
419 		fDestination = OTHER_DESTINATION;
420 }
421 
422 
423 group_destination
424 Group::Destination() const
425 {
426 	return fDestination;
427 }
428 
429 
430 //	#pragma mark -
431 
432 
433 Header::Header()
434 	:
435 	fVersion(0)
436 {
437 }
438 
439 
440 Header::~Header()
441 {
442 }
443 
444 
445 void
446 Header::Parse(char first, BDataIO &stream, char &last) throw (status_t)
447 {
448 	// The stream has been peeked into by the parser already, and
449 	// only the version follows in the stream -- let's pick it up
450 
451 	fVersion = parse_integer(first, stream, last);
452 
453 	// recreate "rtf" command to name this group
454 
455 	Command *command = new Command();
456 	command->SetName("rtf");
457 	command->SetOption(fVersion);
458 
459 	AddElement(command);
460 }
461 
462 
463 int32
464 Header::Version() const
465 {
466 	return fVersion;
467 }
468 
469 
470 const char *
471 Header::Charset() const
472 {
473 	Command *command = dynamic_cast<Command *>(ElementAt(1));
474 	if (command == NULL)
475 		return NULL;
476 
477 	return command->Name();
478 }
479 
480 
481 rgb_color
482 Header::Color(int32 index)
483 {
484 	rgb_color color = {0, 0, 0, 255};
485 
486 	Group *colorTable = FindGroup("colortbl");
487 
488 	if (colorTable != NULL) {
489 		if (Command *gun = colorTable->FindDefinition("red", index))
490 			color.red = gun->Option();
491 		if (Command *gun = colorTable->FindDefinition("green", index))
492 			color.green = gun->Option();
493 		if (Command *gun = colorTable->FindDefinition("blue", index))
494 			color.blue = gun->Option();
495 	}
496 
497 	return color;
498 }
499 
500 
501 //	#pragma mark -
502 
503 
504 Text::Text()
505 {
506 }
507 
508 
509 Text::~Text()
510 {
511 	SetTo(NULL);
512 }
513 
514 
515 bool
516 Text::IsDefinitionDelimiter()
517 {
518 	return fText == ";";
519 }
520 
521 
522 void
523 Text::Parse(char first, BDataIO &stream, char &last) throw (status_t)
524 {
525 	char c = first;
526 	if (c == '\0')
527 		c = read_char(stream);
528 
529 	if (c == ';') {
530 		// definition delimiter
531 		fText.SetTo(";");
532 		last = read_char(stream);
533 		return;
534 	}
535 
536 	const size_t kBufferSteps = 1;
537 	size_t maxSize = kBufferSteps;
538 	char *text = fText.LockBuffer(maxSize);
539 	if (text == NULL)
540 		throw (status_t)B_NO_MEMORY;
541 
542 	size_t position = 0;
543 
544 	while (true) {
545 		if (c == '\\' || c == '}' || c == '{' || c == ';' || c == '\n' || c == '\r')
546 			break;
547 
548 		if (position >= maxSize) {
549 			fText.UnlockBuffer(position);
550 			text = fText.LockBuffer(maxSize += kBufferSteps);
551 			if (text == NULL)
552 				throw (status_t)B_NO_MEMORY;
553 		}
554 
555 		text[position++] = c;
556 
557 		c = read_char(stream);
558 	}
559 	fText.UnlockBuffer(position);
560 
561 	// ToDo: add support for different charsets - right now, only ASCII is supported!
562 	//	To achieve this, we should just translate everything into UTF-8 here
563 
564 	last = c;
565 }
566 
567 
568 status_t
569 Text::SetTo(const char *text)
570 {
571 	return fText.SetTo(text) != NULL ? B_OK : B_NO_MEMORY;
572 }
573 
574 
575 const char *
576 Text::String() const
577 {
578 	return fText.String();
579 }
580 
581 
582 uint32
583 Text::Length() const
584 {
585 	return fText.Length();
586 }
587 
588 
589 //	#pragma mark -
590 
591 
592 Command::Command()
593 	:
594 	fName(NULL),
595 	fHasOption(false),
596 	fOption(-1)
597 {
598 }
599 
600 
601 Command::~Command()
602 {
603 }
604 
605 
606 void
607 Command::Parse(char first, BDataIO &stream, char &last) throw (status_t)
608 {
609 	if (first == '\0')
610 		first = read_char(stream);
611 
612 	if (first != '\\')
613 		throw (status_t)B_BAD_TYPE;
614 
615 	// get name
616 	char name[kCommandLength];
617 	size_t length = 0;
618 	char c;
619 	while (isalpha(c = read_char(stream))) {
620 		name[length++] = c;
621 		if (length >= kCommandLength - 1)
622 			throw (status_t)B_BAD_TYPE;
623 	}
624 
625 	if (length == 0) {
626 		if (c == '\n' || c == '\r') {
627 			// we're a hard return
628 			fName.SetTo("par");
629 		} else
630 			fName.SetTo(c, 1);
631 
632 		// read over character
633 		c = read_char(stream);
634 	} else
635 		fName.SetTo(name, length);
636 
637 	// parse numeric option
638 
639 	if (c == '-')
640 		c = read_char(stream);
641 
642 	last = c;
643 
644 	if (fName == "'") {
645 		// hexadecimal
646 		char bytes[2];
647 		bytes[0] = read_char(stream);
648 		bytes[1] = '\0';
649 		BMemoryIO memory(bytes, 2);
650 
651 		SetOption(parse_integer(c, memory, last, 16));
652 		last = read_char(stream);
653 	} else {
654 		// decimal
655 		if (isdigit(c))
656 			SetOption(parse_integer(c, stream, last));
657 
658 		// a space delimiter is eaten up by the command
659 		if (isspace(last))
660 			last = read_char(stream);
661 	}
662 }
663 
664 
665 status_t
666 Command::SetName(const char *name)
667 {
668 	return fName.SetTo(name) != NULL ? B_OK : B_NO_MEMORY;
669 }
670 
671 
672 const char *
673 Command::Name()
674 {
675 	return fName.String();
676 }
677 
678 
679 void
680 Command::UnsetOption()
681 {
682 	fHasOption = false;
683 	fOption = -1;
684 }
685 
686 
687 void
688 Command::SetOption(int32 option)
689 {
690 	fOption = option;
691 	fHasOption = true;
692 }
693 
694 
695 bool
696 Command::HasOption() const
697 {
698 	return fHasOption;
699 }
700 
701 
702 int32
703 Command::Option() const
704 {
705 	return fOption;
706 }
707 
708 
709 //	#pragma mark -
710 
711 
712 Iterator::Iterator(Element &start, group_destination destination)
713 {
714 	SetTo(start, destination);
715 }
716 
717 
718 void
719 Iterator::SetTo(Element &start, group_destination destination)
720 {
721 	fStart = &start;
722 	fDestination = destination;
723 
724 	Rewind();
725 }
726 
727 
728 void
729 Iterator::Rewind()
730 {
731 	fStack.MakeEmpty();
732 	fStack.Push(fStart);
733 }
734 
735 
736 bool
737 Iterator::HasNext() const
738 {
739 	return !fStack.IsEmpty();
740 }
741 
742 
743 Element *
744 Iterator::Next()
745 {
746 	Element *element;
747 
748 	if (!fStack.Pop(&element))
749 		return NULL;
750 
751 	Group *group = dynamic_cast<Group *>(element);
752 	if (group != NULL
753 		&& (fDestination == ALL_DESTINATIONS
754 			|| fDestination == group->Destination())) {
755 		// put this group's children on the stack in
756 		// reverse order, so that we iterate over
757 		// the tree in in-order
758 
759 		for (int32 i = group->CountElements(); i-- > 0;) {
760 			fStack.Push(group->ElementAt(i));
761 		}
762 	}
763 
764 	return element;
765 }
766 
767 
768 //	#pragma mark -
769 
770 
771 Worker::Worker(RTF::Header &start)
772 	:
773 	fStart(start)
774 {
775 }
776 
777 
778 Worker::~Worker()
779 {
780 }
781 
782 
783 void
784 Worker::Dispatch(Element *element)
785 {
786 	if (RTF::Group *group = dynamic_cast<RTF::Group *>(element)) {
787 		fSkip = false;
788 		Group(group);
789 
790 		if (fSkip)
791 			return;
792 
793 		for (int32 i = 0; (element = group->ElementAt(i)) != NULL; i++)
794 			Dispatch(element);
795 
796 		GroupEnd(group);
797 	} else if (RTF::Command *command = dynamic_cast<RTF::Command *>(element)) {
798 		Command(command);
799 	} else if (RTF::Text *text = dynamic_cast<RTF::Text *>(element)) {
800 		Text(text);
801 	}
802 }
803 
804 
805 void
806 Worker::Work() throw (status_t)
807 {
808 	Dispatch(&fStart);
809 }
810 
811 
812 void
813 Worker::Group(RTF::Group *group)
814 {
815 }
816 
817 
818 void
819 Worker::GroupEnd(RTF::Group *group)
820 {
821 }
822 
823 
824 void
825 Worker::Command(RTF::Command *command)
826 {
827 }
828 
829 
830 void
831 Worker::Text(RTF::Text *text)
832 {
833 }
834 
835 
836 RTF::Header &
837 Worker::Start()
838 {
839 	return fStart;
840 }
841 
842 
843 void
844 Worker::Skip()
845 {
846 	fSkip = true;
847 }
848 
849 
850 void
851 Worker::Abort(status_t status)
852 {
853 	throw status;
854 }
855 
856