xref: /haiku/src/add-ons/translators/rtf/convert.cpp (revision 8195a5a835117ab2da405e0d477153570b75d921)
1 /*
2  * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "convert.h"
8 #include "Stack.h"
9 
10 #include <TranslatorFormats.h>
11 
12 #include <Application.h>
13 #include <TextView.h>
14 #include <TypeConstants.h>
15 #include <ByteOrder.h>
16 #include <Node.h>
17 #include <Font.h>
18 
19 #include <stdlib.h>
20 #include <stdio.h>
21 #include <string.h>
22 
23 
24 struct conversion_context {
25 	conversion_context()
26 	{
27 		Reset();
28 	}
29 
30 	void Reset();
31 
32 	int32	section;
33 	int32	page;
34 	int32	start_page;
35 	int32	first_line_indent;
36 	bool	new_line;
37 };
38 
39 
40 class TextOutput : public RTF::Worker {
41 	public:
42 		TextOutput(RTF::Header &start, BDataIO *stream, bool processRuns);
43 		~TextOutput();
44 
45 		size_t Length() const;
46 		void *FlattenedRunArray(int32 &size);
47 
48 	protected:
49 		virtual void Group(RTF::Group *group);
50 		virtual void GroupEnd(RTF::Group *group);
51 		virtual void Command(RTF::Command *command);
52 		virtual void Text(RTF::Text *text);
53 
54 	private:
55 		void PrepareTextRun(text_run *current) throw (status_t);
56 
57 		BDataIO				*fTarget;
58 		int32				fOffset;
59 		conversion_context	fContext;
60 		Stack<text_run *>	fGroupStack;
61 		bool				fProcessRuns;
62 		BList				fRuns;
63 		text_run			*fCurrentRun;
64 		BApplication		*fApplication;
65 };
66 
67 
68 void
69 conversion_context::Reset()
70 {
71 	section = 1;
72 	page = 1;
73 	start_page = page;
74 	first_line_indent = 0;
75 	new_line = true;
76 }
77 
78 
79 //	#pragma mark -
80 
81 
82 static size_t
83 write_text(conversion_context &context, const char *text, size_t length,
84 	BDataIO *target = NULL) throw (status_t)
85 {
86 	size_t prefix = 0;
87 	if (context.new_line) {
88 		prefix = context.first_line_indent;
89 		context.new_line = false;
90 	}
91 
92 	if (target == NULL)
93 		return prefix + length;
94 
95 	for (uint32 i = 0; i < prefix; i++) {
96 		write_text(context, " ", 1, target);
97 	}
98 
99 	ssize_t written = target->Write(text, length);
100 	if (written < B_OK)
101 		throw (status_t)written;
102 	else if ((size_t)written != length)
103 		throw (status_t)B_IO_ERROR;
104 
105 	return prefix + length;
106 }
107 
108 
109 static size_t
110 write_text(conversion_context &context, const char *text,
111 	BDataIO *target = NULL) throw (status_t)
112 {
113 	return write_text(context, text, strlen(text), target);
114 }
115 
116 
117 static size_t
118 next_line(conversion_context &context, const char *prefix,
119 	BDataIO *target) throw (status_t)
120 {
121 	size_t length = strlen(prefix);
122 	context.new_line = true;
123 
124 	if (target != NULL) {
125 		ssize_t written = target->Write(prefix, length);
126 		if (written < B_OK)
127 			throw (status_t)written;
128 		else if ((size_t)written != length)
129 			throw (status_t)B_IO_ERROR;
130 	}
131 
132 	return length;
133 }
134 
135 
136 static size_t
137 write_unicode_char(conversion_context &context, uint32 c,
138 	BDataIO *target) throw (status_t)
139 {
140 	size_t length = 1;
141 	char bytes[4];
142 
143 	if (c < 0x80)
144 		bytes[0] = c;
145 	else if (c < 0x800) {
146 		bytes[0] = 0xc0 | (c >> 6);
147 		bytes[1] = 0x80 | (c & 0x3f);
148 		length = 2;
149 	} else if (c < 0x10000) {
150 		bytes[0] = 0xe0 | (c >> 12);
151 		bytes[1] = 0x80 | ((c >> 6) & 0x3f);
152 		bytes[2] = 0x80 | (c & 0x3f);
153 		length = 3;
154 	} else if (c <= 0x10ffff) {
155 		bytes[0] = 0xf0 | (c >> 18);
156 		bytes[1] = 0x80 | ((c >> 12) & 0x3f);
157 		bytes[2] = 0x80 | ((c >> 6) & 0x3f);
158 		bytes[3] = 0x80 | (c & 0x3f);
159 		length = 4;
160 	}
161 
162 	return write_text(context, bytes, length, target);
163 }
164 
165 
166 static size_t
167 process_command(conversion_context &context, RTF::Command *command,
168 	BDataIO *target) throw (status_t)
169 {
170 	const char *name = command->Name();
171 
172 	if (!strcmp(name, "par") || !strcmp(name, "line")) {
173 		// paragraph ended
174 		return next_line(context, "\n", target);
175 	}
176 	if (!strcmp(name, "sect")) {
177 		// section ended
178 		context.section++;
179 		return next_line(context, "\n", target);
180 	}
181 	if (!strcmp(name, "page")) {
182 		// we just insert two carriage returns for a page break
183 		context.page++;
184 		return next_line(context, "\n\n", target);
185 	}
186 	if (!strcmp(name, "tab")) {
187 		return write_text(context, "\t", target);
188 	}
189 	if (!strcmp(name, "'")) {
190 		return write_unicode_char(context, command->Option(), target);
191 	}
192 
193 	if (!strcmp(name, "pard")) {
194 		// reset paragraph
195 		context.first_line_indent = 0;
196 		return 0;
197 	}
198 	if (!strcmp(name, "fi") || !strcmp(name, "cufi")) {
199 		// "cufi" first line indent in 1/100 space steps
200 		// "fi" is most probably specified in 1/20 pts
201 		// Currently, we don't differentiate between the two...
202 		context.first_line_indent = (command->Option() + 50) / 100;
203 		if (context.first_line_indent < 0)
204 			context.first_line_indent = 0;
205 		if (context.first_line_indent > 8)
206 			context.first_line_indent = 8;
207 
208 		return 0;
209 	}
210 
211 	// document variables
212 
213 	if (!strcmp(name, "sectnum")) {
214 		char buffer[64];
215 		snprintf(buffer, sizeof(buffer), "%ld", context.section);
216 		return write_text(context, buffer, target);
217 	}
218 	if (!strcmp(name, "pgnstarts")) {
219 		context.start_page = command->HasOption() ? command->Option() : 1;
220 		return 0;
221 	}
222 	if (!strcmp(name, "pgnrestart")) {
223 		context.page = context.start_page;
224 		return 0;
225 	}
226 	if (!strcmp(name, "chpgn")) {
227 		char buffer[64];
228 		snprintf(buffer, sizeof(buffer), "%ld", context.page);
229 		return write_text(context, buffer, target);
230 	}
231 	return 0;
232 }
233 
234 
235 static void
236 set_font_face(BFont &font, uint16 face, bool on)
237 {
238 	// Special handling for B_REGULAR_FACE, since BFont::SetFace(0)
239 	// just doesn't do anything
240 
241 	if (font.Face() == B_REGULAR_FACE && on)
242 		font.SetFace(face);
243 	else if ((font.Face() & ~face) == 0 && !on)
244 		font.SetFace(B_REGULAR_FACE);
245 	else if (on)
246 		font.SetFace(font.Face() | face);
247 	else
248 		font.SetFace(font.Face() & ~face);
249 }
250 
251 
252 static bool
253 text_runs_are_equal(text_run *a, text_run *b)
254 {
255 	if (a == NULL && b == NULL)
256 		return true;
257 
258 	if (a == NULL || b == NULL)
259 		return false;
260 
261 	return a->offset == b->offset
262 		&& *(uint32*)&a->color == *(uint32*)&b->color
263 		&& a->font == b->font;
264 }
265 
266 
267 static text_run *
268 copy_text_run(text_run *run)
269 {
270 	static const rgb_color kBlack = {0, 0, 0, 255};
271 
272 	text_run *newRun = new text_run();
273 	if (newRun == NULL)
274 		throw (status_t)B_NO_MEMORY;
275 
276 	if (run != NULL) {
277 		newRun->offset = run->offset;
278 		newRun->font = run->font;
279 		newRun->color = run->color;
280 	} else {
281 		newRun->offset = 0;
282 		newRun->color = kBlack;
283 	}
284 
285 	return newRun;
286 }
287 
288 
289 #if 0
290 void
291 dump_text_run(text_run *run)
292 {
293 	if (run == NULL)
294 		return;
295 
296 	printf("run: offset = %ld, color = {%d,%d,%d}, font = ",
297 		run->offset, run->color.red, run->color.green, run->color.blue);
298 	run->font.PrintToStream();
299 }
300 #endif
301 
302 
303 //	#pragma mark -
304 
305 
306 TextOutput::TextOutput(RTF::Header &start, BDataIO *stream, bool processRuns)
307 	: RTF::Worker(start),
308 	fTarget(stream),
309 	fOffset(0),
310 	fProcessRuns(processRuns),
311 	fCurrentRun(NULL),
312 	fApplication(NULL)
313 {
314 	// This is not nice, but it's the only we can provide all features on command
315 	// line tools that don't create a BApplication - without a BApplication, we
316 	// could not support any text styles (colors and fonts)
317 
318 	if (processRuns && be_app == NULL)
319 		fApplication = new BApplication("application/x-vnd.Haiku-RTF-Translator");
320 }
321 
322 
323 TextOutput::~TextOutput()
324 {
325 	delete fApplication;
326 }
327 
328 
329 size_t
330 TextOutput::Length() const
331 {
332 	return (size_t)fOffset;
333 }
334 
335 
336 void *
337 TextOutput::FlattenedRunArray(int32 &_size)
338 {
339 	// are there any styles?
340 	if (fRuns.CountItems() == 0) {
341 		_size = 0;
342 		return NULL;
343 	}
344 
345 	// create array
346 
347 	text_run_array *array = (text_run_array *)malloc(sizeof(text_run_array)
348 		+ sizeof(text_run) * (fRuns.CountItems() - 1));
349 	if (array == NULL)
350 		throw (status_t)B_NO_MEMORY;
351 
352 	array->count = fRuns.CountItems();
353 
354 	for (int32 i = 0; i < array->count; i++) {
355 		text_run *run = (text_run *)fRuns.RemoveItem(0L);
356 		array->runs[i] = *run;
357 		delete run;
358 	}
359 
360 	return BTextView::FlattenRunArray(array, &_size);
361 }
362 
363 
364 void
365 TextOutput::PrepareTextRun(text_run *run) throw (status_t)
366 {
367 	if (run != NULL && fOffset == run->offset)
368 		return;
369 
370 	text_run *newRun = copy_text_run(run);
371 
372 	newRun->offset = fOffset;
373 
374 	fRuns.AddItem(newRun);
375 	fCurrentRun = newRun;
376 }
377 
378 
379 void
380 TextOutput::Group(RTF::Group *group)
381 {
382 	if (group->Destination() != RTF::TEXT_DESTINATION) {
383 		Skip();
384 		return;
385 	}
386 
387 	if (!fProcessRuns)
388 		return;
389 
390 	// We only push a copy of the run on the stack because the current
391 	// run may still be changed in the new group -- later, we'll just
392 	// see if that was the case, and either use the copied one then,
393 	// or throw it away
394 	text_run *run = NULL;
395 	if (fCurrentRun != NULL)
396 		run = copy_text_run(fCurrentRun);
397 
398 	fGroupStack.Push(run);
399 }
400 
401 
402 void
403 TextOutput::GroupEnd(RTF::Group *group)
404 {
405 	if (!fProcessRuns)
406 		return;
407 
408 	text_run *last;
409 	fGroupStack.Pop(&last);
410 
411 	// has the style been changed?
412 	if (!text_runs_are_equal(last, fCurrentRun)) {
413 		if (fCurrentRun != NULL && last != NULL
414 			&& fCurrentRun->offset == fOffset) {
415 			// replace the current one, we don't need it anymore
416 			fCurrentRun->color = last->color;
417 			fCurrentRun->font = last->font;
418 			delete last;
419 		} else if (last) {
420 			// adopt the text_run from the previous group
421 			last->offset = fOffset;
422 			fRuns.AddItem(last);
423 			fCurrentRun = last;
424 		}
425 	} else
426 		delete last;
427 }
428 
429 
430 void
431 TextOutput::Command(RTF::Command *command)
432 {
433 	if (!fProcessRuns) {
434 		fOffset += process_command(fContext, command, fTarget);
435 		return;
436 	}
437 
438 	const char *name = command->Name();
439 
440 	if (!strcmp(name, "cf")) {
441 		// foreground color
442 		PrepareTextRun(fCurrentRun);
443 		fCurrentRun->color = Start().Color(command->Option());
444 	} else if (!strcmp(name, "b")
445 		|| !strcmp(name, "embo") || !strcmp(name, "impr")) {
446 		// bold style ("emboss" and "engrave" are currently the same, too)
447 		PrepareTextRun(fCurrentRun);
448 		set_font_face(fCurrentRun->font, B_BOLD_FACE, command->Option() != 0);
449 	} else if (!strcmp(name, "i")) {
450 		// bold style
451 		PrepareTextRun(fCurrentRun);
452 		set_font_face(fCurrentRun->font, B_ITALIC_FACE, command->Option() != 0);
453 	} else if (!strcmp(name, "ul")) {
454 		// bold style
455 		PrepareTextRun(fCurrentRun);
456 		set_font_face(fCurrentRun->font, B_UNDERSCORE_FACE, command->Option() != 0);
457 	} else if (!strcmp(name, "fs")) {
458 		// font size in half points
459 		PrepareTextRun(fCurrentRun);
460 		fCurrentRun->font.SetSize(command->Option() / 2.0);
461 	} else if (!strcmp(name, "plain")) {
462 		// reset font to plain style
463 		PrepareTextRun(fCurrentRun);
464 		fCurrentRun->font = be_plain_font;
465 	} else if (!strcmp(name, "f")) {
466 		// font number
467 		RTF::Group *fonts = Start().FindGroup("fonttbl");
468 		if (fonts == NULL)
469 			return;
470 
471 		PrepareTextRun(fCurrentRun);
472 		BFont font;
473 			// missing font info will be replaced by the default font
474 
475 		RTF::Command *info;
476 		for (int32 index = 0; (info = fonts->FindDefinition("f", index)) != NULL; index++) {
477 			if (info->Option() != command->Option())
478 				continue;
479 
480 			// ToDo: really try to choose font by name and serif/sans-serif
481 			// ToDo: the font list should be built before once
482 
483 			// For now, it only differentiates fixed fonts from proportional ones
484 			if (fonts->FindDefinition("fmodern", index) != NULL)
485 				font = be_fixed_font;
486 		}
487 
488 		font_family family;
489 		font_style style;
490 		font.GetFamilyAndStyle(&family, &style);
491 
492 		fCurrentRun->font.SetFamilyAndFace(family, fCurrentRun->font.Face());
493 	} else
494 		fOffset += process_command(fContext, command, fTarget);
495 }
496 
497 
498 void
499 TextOutput::Text(RTF::Text *text)
500 {
501 	fOffset += write_text(fContext, text->String(), text->Length(), fTarget);
502 }
503 
504 
505 //	#pragma mark -
506 
507 
508 status_t
509 convert_to_stxt(RTF::Header &header, BDataIO &target)
510 {
511 	// count text bytes
512 
513 	size_t textSize = 0;
514 
515 	try {
516 		TextOutput counter(header, NULL, false);
517 
518 		counter.Work();
519 		textSize = counter.Length();
520 	} catch (status_t status) {
521 		return status;
522 	}
523 
524 	// put out header
525 
526 	TranslatorStyledTextStreamHeader stxtHeader;
527 	stxtHeader.header.magic = 'STXT';
528 	stxtHeader.header.header_size = sizeof(TranslatorStyledTextStreamHeader);
529 	stxtHeader.header.data_size = 0;
530 	stxtHeader.version = 100;
531 	status_t status = swap_data(B_UINT32_TYPE, &stxtHeader, sizeof(stxtHeader),
532 		B_SWAP_HOST_TO_BENDIAN);
533 	if (status != B_OK)
534 		return status;
535 
536 	ssize_t written = target.Write(&stxtHeader, sizeof(stxtHeader));
537 	if (written < B_OK)
538 		return written;
539 	if (written != sizeof(stxtHeader))
540 		return B_IO_ERROR;
541 
542 	TranslatorStyledTextTextHeader textHeader;
543 	textHeader.header.magic = 'TEXT';
544 	textHeader.header.header_size = sizeof(TranslatorStyledTextTextHeader);
545 	textHeader.header.data_size = textSize;
546 	textHeader.charset = B_UNICODE_UTF8;
547 	status = swap_data(B_UINT32_TYPE, &textHeader, sizeof(textHeader),
548 		B_SWAP_HOST_TO_BENDIAN);
549 	if (status != B_OK)
550 		return status;
551 
552 	written = target.Write(&textHeader, sizeof(textHeader));
553 	if (written < B_OK)
554 		return written;
555 	if (written != sizeof(textHeader))
556 		return B_IO_ERROR;
557 
558 	// put out main text
559 
560 	void *flattenedRuns = NULL;
561 	int32 flattenedSize = 0;
562 
563 	try {
564 		TextOutput output(header, &target, true);
565 
566 		output.Work();
567 		flattenedRuns = output.FlattenedRunArray(flattenedSize);
568 	} catch (status_t status) {
569 		return status;
570 	}
571 
572 	// put out styles
573 
574 	TranslatorStyledTextStyleHeader styleHeader;
575 	styleHeader.header.magic = 'STYL';
576 	styleHeader.header.header_size = sizeof(TranslatorStyledTextStyleHeader);
577 	styleHeader.header.data_size = flattenedSize;
578 	styleHeader.apply_offset = 0;
579 	styleHeader.apply_length = textSize;
580 
581 	status = swap_data(B_UINT32_TYPE, &styleHeader, sizeof(styleHeader),
582 		B_SWAP_HOST_TO_BENDIAN);
583 	if (status != B_OK)
584 		return status;
585 
586 	written = target.Write(&styleHeader, sizeof(styleHeader));
587 	if (written < B_OK)
588 		return written;
589 	if (written != sizeof(styleHeader))
590 		return B_IO_ERROR;
591 
592 	// output actual style information
593 	written = target.Write(flattenedRuns, flattenedSize);
594 
595 	free(flattenedRuns);
596 
597 	if (written < B_OK)
598 		return written;
599 	if (written != flattenedSize)
600 		return B_IO_ERROR;
601 
602 	return B_OK;
603 }
604 
605 
606 status_t
607 convert_to_plain_text(RTF::Header &header, BPositionIO &target)
608 {
609 	// put out main text
610 
611 	void *flattenedRuns = NULL;
612 	int32 flattenedSize = 0;
613 
614 	// ToDo: this is not really nice, we should adopt the BPositionIO class
615 	//	from Dano/Zeta which has meta data support
616 	BNode *node = dynamic_cast<BNode *>(&target);
617 
618 	try {
619 		TextOutput output(header, &target, node != NULL);
620 
621 		output.Work();
622 		flattenedRuns = output.FlattenedRunArray(flattenedSize);
623 	} catch (status_t status) {
624 		return status;
625 	}
626 
627 	if (node == NULL) {
628 		// we can't write the styles
629 		return B_OK;
630 	}
631 
632 	// put out styles
633 
634 	ssize_t written = node->WriteAttr("styles", B_RAW_TYPE, 0, flattenedRuns, flattenedSize);
635 	if (written >= B_OK && written != flattenedSize)
636 		node->RemoveAttr("styles");
637 
638 	free(flattenedRuns);
639 	return B_OK;
640 }
641