xref: /haiku/src/add-ons/translators/rtf/convert.cpp (revision 73254051b196497dfee9ab89eb0c2f60cc305819)
1 /*
2  * Copyright 2004-2009, Axel Dörfler, axeld@pinc-software.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "convert.h"
8 #include "Stack.h"
9 
10 #include <TranslatorFormats.h>
11 
12 #include <Application.h>
13 #include <TextView.h>
14 #include <TypeConstants.h>
15 #include <ByteOrder.h>
16 #include <File.h>
17 #include <Font.h>
18 
19 #include <AutoDeleter.h>
20 
21 #include <stdlib.h>
22 #include <stdio.h>
23 #include <string.h>
24 
25 
26 struct conversion_context {
27 	conversion_context()
28 	{
29 		Reset();
30 	}
31 
32 	void Reset();
33 
34 	int32	section;
35 	int32	page;
36 	int32	start_page;
37 	int32	first_line_indent;
38 	bool	new_line;
39 };
40 
41 
42 class TextOutput : public RTF::Worker {
43 	public:
44 		TextOutput(RTF::Header &start, BDataIO *stream, bool processRuns);
45 		~TextOutput();
46 
47 		size_t Length() const;
48 		void *FlattenedRunArray(int32 &size);
49 
50 	protected:
51 		virtual void Group(RTF::Group *group);
52 		virtual void GroupEnd(RTF::Group *group);
53 		virtual void Command(RTF::Command *command);
54 		virtual void Text(RTF::Text *text);
55 
56 	private:
57 		void PrepareTextRun(text_run *current) throw (status_t);
58 
59 		BDataIO				*fTarget;
60 		int32				fOffset;
61 		conversion_context	fContext;
62 		Stack<text_run *>	fGroupStack;
63 		bool				fProcessRuns;
64 		BList				fRuns;
65 		text_run			*fCurrentRun;
66 		BApplication		*fApplication;
67 };
68 
69 
70 void
71 conversion_context::Reset()
72 {
73 	section = 1;
74 	page = 1;
75 	start_page = page;
76 	first_line_indent = 0;
77 	new_line = true;
78 }
79 
80 
81 //	#pragma mark -
82 
83 
84 static size_t
85 write_text(conversion_context &context, const char *text, size_t length,
86 	BDataIO *target = NULL) throw (status_t)
87 {
88 	size_t prefix = 0;
89 	if (context.new_line) {
90 		prefix = context.first_line_indent;
91 		context.new_line = false;
92 	}
93 
94 	if (target == NULL)
95 		return prefix + length;
96 
97 	for (uint32 i = 0; i < prefix; i++) {
98 		write_text(context, " ", 1, target);
99 	}
100 
101 	ssize_t written = target->Write(text, length);
102 	if (written < B_OK)
103 		throw (status_t)written;
104 	else if ((size_t)written != length)
105 		throw (status_t)B_IO_ERROR;
106 
107 	return prefix + length;
108 }
109 
110 
111 static size_t
112 write_text(conversion_context &context, const char *text,
113 	BDataIO *target = NULL) throw (status_t)
114 {
115 	return write_text(context, text, strlen(text), target);
116 }
117 
118 
119 static size_t
120 next_line(conversion_context &context, const char *prefix,
121 	BDataIO *target) throw (status_t)
122 {
123 	size_t length = strlen(prefix);
124 	context.new_line = true;
125 
126 	if (target != NULL) {
127 		ssize_t written = target->Write(prefix, length);
128 		if (written < B_OK)
129 			throw (status_t)written;
130 		else if ((size_t)written != length)
131 			throw (status_t)B_IO_ERROR;
132 	}
133 
134 	return length;
135 }
136 
137 
138 static size_t
139 write_unicode_char(conversion_context &context, uint32 c,
140 	BDataIO *target) throw (status_t)
141 {
142 	size_t length = 1;
143 	char bytes[4];
144 
145 	if (c < 0x80)
146 		bytes[0] = c;
147 	else if (c < 0x800) {
148 		bytes[0] = 0xc0 | (c >> 6);
149 		bytes[1] = 0x80 | (c & 0x3f);
150 		length = 2;
151 	} else if (c < 0x10000) {
152 		bytes[0] = 0xe0 | (c >> 12);
153 		bytes[1] = 0x80 | ((c >> 6) & 0x3f);
154 		bytes[2] = 0x80 | (c & 0x3f);
155 		length = 3;
156 	} else if (c <= 0x10ffff) {
157 		bytes[0] = 0xf0 | (c >> 18);
158 		bytes[1] = 0x80 | ((c >> 12) & 0x3f);
159 		bytes[2] = 0x80 | ((c >> 6) & 0x3f);
160 		bytes[3] = 0x80 | (c & 0x3f);
161 		length = 4;
162 	}
163 
164 	return write_text(context, bytes, length, target);
165 }
166 
167 
168 static size_t
169 process_command(conversion_context &context, RTF::Command *command,
170 	BDataIO *target) throw (status_t)
171 {
172 	const char *name = command->Name();
173 
174 	if (!strcmp(name, "par") || !strcmp(name, "line")) {
175 		// paragraph ended
176 		return next_line(context, "\n", target);
177 	}
178 	if (!strcmp(name, "sect")) {
179 		// section ended
180 		context.section++;
181 		return next_line(context, "\n", target);
182 	}
183 	if (!strcmp(name, "page")) {
184 		// we just insert two carriage returns for a page break
185 		context.page++;
186 		return next_line(context, "\n\n", target);
187 	}
188 	if (!strcmp(name, "tab")) {
189 		return write_text(context, "\t", target);
190 	}
191 	if (!strcmp(name, "'")) {
192 		return write_unicode_char(context, command->Option(), target);
193 	}
194 
195 	if (!strcmp(name, "pard")) {
196 		// reset paragraph
197 		context.first_line_indent = 0;
198 		return 0;
199 	}
200 	if (!strcmp(name, "fi") || !strcmp(name, "cufi")) {
201 		// "cufi" first line indent in 1/100 space steps
202 		// "fi" is most probably specified in 1/20 pts
203 		// Currently, we don't differentiate between the two...
204 		context.first_line_indent = (command->Option() + 50) / 100;
205 		if (context.first_line_indent < 0)
206 			context.first_line_indent = 0;
207 		if (context.first_line_indent > 8)
208 			context.first_line_indent = 8;
209 
210 		return 0;
211 	}
212 
213 	// document variables
214 
215 	if (!strcmp(name, "sectnum")) {
216 		char buffer[64];
217 		snprintf(buffer, sizeof(buffer), "%" B_PRId32, context.section);
218 		return write_text(context, buffer, target);
219 	}
220 	if (!strcmp(name, "pgnstarts")) {
221 		context.start_page = command->HasOption() ? command->Option() : 1;
222 		return 0;
223 	}
224 	if (!strcmp(name, "pgnrestart")) {
225 		context.page = context.start_page;
226 		return 0;
227 	}
228 	if (!strcmp(name, "chpgn")) {
229 		char buffer[64];
230 		snprintf(buffer, sizeof(buffer), "%" B_PRId32, context.page);
231 		return write_text(context, buffer, target);
232 	}
233 	return 0;
234 }
235 
236 
237 static void
238 set_font_face(BFont &font, uint16 face, bool on)
239 {
240 	// Special handling for B_REGULAR_FACE, since BFont::SetFace(0)
241 	// just doesn't do anything
242 
243 	if (font.Face() == B_REGULAR_FACE && on)
244 		font.SetFace(face);
245 	else if ((font.Face() & ~face) == 0 && !on)
246 		font.SetFace(B_REGULAR_FACE);
247 	else if (on)
248 		font.SetFace(font.Face() | face);
249 	else
250 		font.SetFace(font.Face() & ~face);
251 }
252 
253 
254 static bool
255 text_runs_are_equal(text_run *a, text_run *b)
256 {
257 	if (a == NULL && b == NULL)
258 		return true;
259 
260 	if (a == NULL || b == NULL)
261 		return false;
262 
263 	return a->offset == b->offset
264 		&& *(uint32*)&a->color == *(uint32*)&b->color
265 		&& a->font == b->font;
266 }
267 
268 
269 static text_run *
270 copy_text_run(text_run *run)
271 {
272 	static const rgb_color kBlack = {0, 0, 0, 255};
273 
274 	text_run *newRun = new text_run();
275 	if (newRun == NULL)
276 		throw (status_t)B_NO_MEMORY;
277 
278 	if (run != NULL) {
279 		newRun->offset = run->offset;
280 		newRun->font = run->font;
281 		newRun->color = run->color;
282 	} else {
283 		newRun->offset = 0;
284 		newRun->color = kBlack;
285 	}
286 
287 	return newRun;
288 }
289 
290 
291 #if 0
292 void
293 dump_text_run(text_run *run)
294 {
295 	if (run == NULL)
296 		return;
297 
298 	printf("run: offset = %ld, color = {%d,%d,%d}, font = ",
299 		run->offset, run->color.red, run->color.green, run->color.blue);
300 	run->font.PrintToStream();
301 }
302 #endif
303 
304 
305 //	#pragma mark -
306 
307 
308 TextOutput::TextOutput(RTF::Header &start, BDataIO *stream, bool processRuns)
309 	: RTF::Worker(start),
310 	fTarget(stream),
311 	fOffset(0),
312 	fProcessRuns(processRuns),
313 	fCurrentRun(NULL),
314 	fApplication(NULL)
315 {
316 	// This is not nice, but it's the only we can provide all features on command
317 	// line tools that don't create a BApplication - without a BApplication, we
318 	// could not support any text styles (colors and fonts)
319 
320 	if (processRuns && be_app == NULL)
321 		fApplication = new BApplication("application/x-vnd.Haiku-RTFTranslator");
322 }
323 
324 
325 TextOutput::~TextOutput()
326 {
327 	delete fApplication;
328 }
329 
330 
331 size_t
332 TextOutput::Length() const
333 {
334 	return (size_t)fOffset;
335 }
336 
337 
338 void *
339 TextOutput::FlattenedRunArray(int32 &_size)
340 {
341 	// are there any styles?
342 	if (fRuns.CountItems() == 0) {
343 		_size = 0;
344 		return NULL;
345 	}
346 
347 	// create array
348 
349 	text_run_array *array = (text_run_array *)malloc(sizeof(text_run_array)
350 		+ sizeof(text_run) * (fRuns.CountItems() - 1));
351 	if (array == NULL)
352 		throw (status_t)B_NO_MEMORY;
353 
354 	array->count = fRuns.CountItems();
355 
356 	for (int32 i = 0; i < array->count; i++) {
357 		text_run *run = (text_run *)fRuns.RemoveItem((int32)0);
358 		array->runs[i] = *run;
359 		delete run;
360 	}
361 
362 	void *flattenedRunArray = BTextView::FlattenRunArray(array, &_size);
363 
364 	free(array);
365 
366 	return flattenedRunArray;
367 }
368 
369 
370 void
371 TextOutput::PrepareTextRun(text_run *run) throw (status_t)
372 {
373 	if (run != NULL && fOffset == run->offset)
374 		return;
375 
376 	text_run *newRun = copy_text_run(run);
377 
378 	newRun->offset = fOffset;
379 
380 	fRuns.AddItem(newRun);
381 	fCurrentRun = newRun;
382 }
383 
384 
385 void
386 TextOutput::Group(RTF::Group *group)
387 {
388 	if (group->Destination() != RTF::TEXT_DESTINATION) {
389 		Skip();
390 		return;
391 	}
392 
393 	if (!fProcessRuns)
394 		return;
395 
396 	// We only push a copy of the run on the stack because the current
397 	// run may still be changed in the new group -- later, we'll just
398 	// see if that was the case, and either use the copied one then,
399 	// or throw it away
400 	text_run *run = NULL;
401 	if (fCurrentRun != NULL)
402 		run = copy_text_run(fCurrentRun);
403 
404 	fGroupStack.Push(run);
405 }
406 
407 
408 void
409 TextOutput::GroupEnd(RTF::Group *group)
410 {
411 	if (!fProcessRuns)
412 		return;
413 
414 	text_run *last = NULL;
415 	fGroupStack.Pop(&last);
416 
417 	// has the style been changed?
418 	if (!text_runs_are_equal(last, fCurrentRun)) {
419 		if (fCurrentRun != NULL && last != NULL
420 			&& fCurrentRun->offset == fOffset) {
421 			// replace the current one, we don't need it anymore
422 			fCurrentRun->color = last->color;
423 			fCurrentRun->font = last->font;
424 			delete last;
425 		} else if (last) {
426 			// adopt the text_run from the previous group
427 			last->offset = fOffset;
428 			fRuns.AddItem(last);
429 			fCurrentRun = last;
430 		}
431 	} else
432 		delete last;
433 }
434 
435 
436 void
437 TextOutput::Command(RTF::Command *command)
438 {
439 	if (!fProcessRuns) {
440 		fOffset += process_command(fContext, command, fTarget);
441 		return;
442 	}
443 
444 	const char *name = command->Name();
445 
446 	if (!strcmp(name, "cf")) {
447 		// foreground color
448 		PrepareTextRun(fCurrentRun);
449 		fCurrentRun->color = Start().Color(command->Option());
450 	} else if (!strcmp(name, "b")
451 		|| !strcmp(name, "embo") || !strcmp(name, "impr")) {
452 		// bold style ("emboss" and "engrave" are currently the same, too)
453 		PrepareTextRun(fCurrentRun);
454 		set_font_face(fCurrentRun->font, B_BOLD_FACE, command->Option() != 0);
455 	} else if (!strcmp(name, "i")) {
456 		// bold style
457 		PrepareTextRun(fCurrentRun);
458 		set_font_face(fCurrentRun->font, B_ITALIC_FACE, command->Option() != 0);
459 	} else if (!strcmp(name, "ul")) {
460 		// bold style
461 		PrepareTextRun(fCurrentRun);
462 		set_font_face(fCurrentRun->font, B_UNDERSCORE_FACE, command->Option() != 0);
463 	} else if (!strcmp(name, "fs")) {
464 		// font size in half points
465 		PrepareTextRun(fCurrentRun);
466 		fCurrentRun->font.SetSize(command->Option() / 2.0);
467 	} else if (!strcmp(name, "plain")) {
468 		// reset font to plain style
469 		PrepareTextRun(fCurrentRun);
470 		fCurrentRun->font = be_plain_font;
471 	} else if (!strcmp(name, "f")) {
472 		// font number
473 		RTF::Group *fonts = Start().FindGroup("fonttbl");
474 		if (fonts == NULL)
475 			return;
476 
477 		PrepareTextRun(fCurrentRun);
478 		BFont font;
479 			// missing font info will be replaced by the default font
480 
481 		RTF::Command *info;
482 		for (int32 index = 0; (info = fonts->FindDefinition("f", index)) != NULL; index++) {
483 			if (info->Option() != command->Option())
484 				continue;
485 
486 			// ToDo: really try to choose font by name and serif/sans-serif
487 			// ToDo: the font list should be built before once
488 
489 			// For now, it only differentiates fixed fonts from proportional ones
490 			if (fonts->FindDefinition("fmodern", index) != NULL)
491 				font = be_fixed_font;
492 		}
493 
494 		font_family family;
495 		font_style style;
496 		font.GetFamilyAndStyle(&family, &style);
497 
498 		fCurrentRun->font.SetFamilyAndFace(family, fCurrentRun->font.Face());
499 	} else
500 		fOffset += process_command(fContext, command, fTarget);
501 }
502 
503 
504 void
505 TextOutput::Text(RTF::Text *text)
506 {
507 	fOffset += write_text(fContext, text->String(), text->Length(), fTarget);
508 }
509 
510 
511 //	#pragma mark -
512 
513 
514 status_t
515 convert_to_stxt(RTF::Header &header, BDataIO &target)
516 {
517 	// count text bytes
518 
519 	size_t textSize = 0;
520 
521 	try {
522 		TextOutput counter(header, NULL, false);
523 
524 		counter.Work();
525 		textSize = counter.Length();
526 	} catch (status_t status) {
527 		return status;
528 	}
529 
530 	// put out header
531 
532 	TranslatorStyledTextStreamHeader stxtHeader;
533 	stxtHeader.header.magic = 'STXT';
534 	stxtHeader.header.header_size = sizeof(TranslatorStyledTextStreamHeader);
535 	stxtHeader.header.data_size = 0;
536 	stxtHeader.version = 100;
537 	status_t status = swap_data(B_UINT32_TYPE, &stxtHeader, sizeof(stxtHeader),
538 		B_SWAP_HOST_TO_BENDIAN);
539 	if (status != B_OK)
540 		return status;
541 
542 	ssize_t written = target.Write(&stxtHeader, sizeof(stxtHeader));
543 	if (written < B_OK)
544 		return written;
545 	if (written != sizeof(stxtHeader))
546 		return B_IO_ERROR;
547 
548 	TranslatorStyledTextTextHeader textHeader;
549 	textHeader.header.magic = 'TEXT';
550 	textHeader.header.header_size = sizeof(TranslatorStyledTextTextHeader);
551 	textHeader.header.data_size = textSize;
552 	textHeader.charset = B_UNICODE_UTF8;
553 	status = swap_data(B_UINT32_TYPE, &textHeader, sizeof(textHeader),
554 		B_SWAP_HOST_TO_BENDIAN);
555 	if (status != B_OK)
556 		return status;
557 
558 	written = target.Write(&textHeader, sizeof(textHeader));
559 	if (written < B_OK)
560 		return written;
561 	if (written != sizeof(textHeader))
562 		return B_IO_ERROR;
563 
564 	// put out main text
565 
566 	void *flattenedRuns = NULL;
567 	int32 flattenedSize = 0;
568 
569 	try {
570 		TextOutput output(header, &target, true);
571 
572 		output.Work();
573 		flattenedRuns = output.FlattenedRunArray(flattenedSize);
574 	} catch (status_t status) {
575 		return status;
576 	}
577 
578 	BPrivate::MemoryDeleter _(flattenedRuns);
579 
580 	// put out styles
581 
582 	TranslatorStyledTextStyleHeader styleHeader;
583 	styleHeader.header.magic = 'STYL';
584 	styleHeader.header.header_size = sizeof(TranslatorStyledTextStyleHeader);
585 	styleHeader.header.data_size = flattenedSize;
586 	styleHeader.apply_offset = 0;
587 	styleHeader.apply_length = textSize;
588 
589 	status = swap_data(B_UINT32_TYPE, &styleHeader, sizeof(styleHeader),
590 		B_SWAP_HOST_TO_BENDIAN);
591 	if (status != B_OK)
592 		return status;
593 
594 	written = target.Write(&styleHeader, sizeof(styleHeader));
595 	if (written < B_OK)
596 		return written;
597 	if (written != sizeof(styleHeader))
598 		return B_IO_ERROR;
599 
600 	// output actual style information
601 	written = target.Write(flattenedRuns, flattenedSize);
602 
603 	if (written < B_OK)
604 		return written;
605 	if (written != flattenedSize)
606 		return B_IO_ERROR;
607 
608 	return B_OK;
609 }
610 
611 
612 status_t
613 convert_to_plain_text(RTF::Header &header, BPositionIO &target)
614 {
615 	// put out main text
616 
617 	void *flattenedRuns = NULL;
618 	int32 flattenedSize = 0;
619 
620 	// TODO: this is not really nice, we should adopt the BPositionIO class
621 	//	from Dano/Zeta which has meta data support
622 	BFile *file = dynamic_cast<BFile *>(&target);
623 
624 	try {
625 		TextOutput output(header, &target, file != NULL);
626 
627 		output.Work();
628 		flattenedRuns = output.FlattenedRunArray(flattenedSize);
629 	} catch (status_t status) {
630 		return status;
631 	}
632 
633 	if (file == NULL) {
634 		// we can't write the styles
635 		return B_OK;
636 	}
637 
638 	// put out styles
639 
640 	ssize_t written = file->WriteAttr("styles", B_RAW_TYPE, 0, flattenedRuns,
641 		flattenedSize);
642 	if (written >= B_OK && written != flattenedSize)
643 		file->RemoveAttr("styles");
644 
645 	free(flattenedRuns);
646 	return B_OK;
647 }
648