1 /* 2 * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "convert.h" 8 #include "Stack.h" 9 10 #include <TranslatorFormats.h> 11 12 #include <Application.h> 13 #include <TextView.h> 14 #include <TypeConstants.h> 15 #include <ByteOrder.h> 16 #include <Node.h> 17 #include <Font.h> 18 19 #include <stdlib.h> 20 #include <stdio.h> 21 #include <string.h> 22 23 24 struct conversion_context { 25 conversion_context() 26 { 27 Reset(); 28 } 29 30 void Reset(); 31 32 int32 section; 33 int32 page; 34 int32 start_page; 35 int32 first_line_indent; 36 bool new_line; 37 }; 38 39 40 class TextOutput : public RTF::Worker { 41 public: 42 TextOutput(RTF::Header &start, BDataIO *stream, bool processRuns); 43 ~TextOutput(); 44 45 size_t Length() const; 46 void *FlattenedRunArray(int32 &size); 47 48 protected: 49 virtual void Group(RTF::Group *group); 50 virtual void GroupEnd(RTF::Group *group); 51 virtual void Command(RTF::Command *command); 52 virtual void Text(RTF::Text *text); 53 54 private: 55 void PrepareTextRun(text_run *current) throw (status_t); 56 57 BDataIO *fTarget; 58 int32 fOffset; 59 conversion_context fContext; 60 Stack<text_run *> fGroupStack; 61 bool fProcessRuns; 62 BList fRuns; 63 text_run *fCurrentRun; 64 BApplication *fApplication; 65 }; 66 67 68 void 69 conversion_context::Reset() 70 { 71 section = 1; 72 page = 1; 73 start_page = page; 74 first_line_indent = 0; 75 new_line = true; 76 } 77 78 79 // #pragma mark - 80 81 82 static size_t 83 write_text(conversion_context &context, const char *text, size_t length, 84 BDataIO *target = NULL) throw (status_t) 85 { 86 size_t prefix = 0; 87 if (context.new_line) { 88 prefix = context.first_line_indent; 89 context.new_line = false; 90 } 91 92 if (target == NULL) 93 return prefix + length; 94 95 for (uint32 i = 0; i < prefix; i++) { 96 write_text(context, " ", 1, target); 97 } 98 99 ssize_t written = target->Write(text, length); 100 if (written < B_OK) 101 throw (status_t)written; 102 else if ((size_t)written != length) 103 throw (status_t)B_IO_ERROR; 104 105 return prefix + length; 106 } 107 108 109 static size_t 110 write_text(conversion_context &context, const char *text, 111 BDataIO *target = NULL) throw (status_t) 112 { 113 return write_text(context, text, strlen(text), target); 114 } 115 116 117 static size_t 118 next_line(conversion_context &context, const char *prefix, 119 BDataIO *target) throw (status_t) 120 { 121 size_t length = strlen(prefix); 122 context.new_line = true; 123 124 if (target != NULL) { 125 ssize_t written = target->Write(prefix, length); 126 if (written < B_OK) 127 throw (status_t)written; 128 else if ((size_t)written != length) 129 throw (status_t)B_IO_ERROR; 130 } 131 132 return length; 133 } 134 135 136 static size_t 137 write_unicode_char(conversion_context &context, uint32 c, 138 BDataIO *target) throw (status_t) 139 { 140 size_t length = 1; 141 char bytes[4]; 142 143 if (c < 0x80) 144 bytes[0] = c; 145 else if (c < 0x800) { 146 bytes[0] = 0xc0 | (c >> 6); 147 bytes[1] = 0x80 | (c & 0x3f); 148 length = 2; 149 } else if (c < 0x10000) { 150 bytes[0] = 0xe0 | (c >> 12); 151 bytes[1] = 0x80 | ((c >> 6) & 0x3f); 152 bytes[2] = 0x80 | (c & 0x3f); 153 length = 3; 154 } else if (c <= 0x10ffff) { 155 bytes[0] = 0xf0 | (c >> 18); 156 bytes[1] = 0x80 | ((c >> 12) & 0x3f); 157 bytes[2] = 0x80 | ((c >> 6) & 0x3f); 158 bytes[3] = 0x80 | (c & 0x3f); 159 length = 4; 160 } 161 162 return write_text(context, bytes, length, target); 163 } 164 165 166 static size_t 167 process_command(conversion_context &context, RTF::Command *command, 168 BDataIO *target) throw (status_t) 169 { 170 const char *name = command->Name(); 171 172 if (!strcmp(name, "par") || !strcmp(name, "line")) { 173 // paragraph ended 174 return next_line(context, "\n", target); 175 } 176 if (!strcmp(name, "sect")) { 177 // section ended 178 context.section++; 179 return next_line(context, "\n", target); 180 } 181 if (!strcmp(name, "page")) { 182 // we just insert two carriage returns for a page break 183 context.page++; 184 return next_line(context, "\n\n", target); 185 } 186 if (!strcmp(name, "tab")) { 187 return write_text(context, "\t", target); 188 } 189 if (!strcmp(name, "'")) { 190 return write_unicode_char(context, command->Option(), target); 191 } 192 193 if (!strcmp(name, "pard")) { 194 // reset paragraph 195 context.first_line_indent = 0; 196 return 0; 197 } 198 if (!strcmp(name, "fi") || !strcmp(name, "cufi")) { 199 // "cufi" first line indent in 1/100 space steps 200 // "fi" is most probably specified in 1/20 pts 201 // Currently, we don't differentiate between the two... 202 context.first_line_indent = (command->Option() + 50) / 100; 203 if (context.first_line_indent < 0) 204 context.first_line_indent = 0; 205 if (context.first_line_indent > 8) 206 context.first_line_indent = 8; 207 208 return 0; 209 } 210 211 // document variables 212 213 if (!strcmp(name, "sectnum")) { 214 char buffer[64]; 215 snprintf(buffer, sizeof(buffer), "%ld", context.section); 216 return write_text(context, buffer, target); 217 } 218 if (!strcmp(name, "pgnstarts")) { 219 context.start_page = command->HasOption() ? command->Option() : 1; 220 return 0; 221 } 222 if (!strcmp(name, "pgnrestart")) { 223 context.page = context.start_page; 224 return 0; 225 } 226 if (!strcmp(name, "chpgn")) { 227 char buffer[64]; 228 snprintf(buffer, sizeof(buffer), "%ld", context.page); 229 return write_text(context, buffer, target); 230 } 231 return 0; 232 } 233 234 235 static void 236 set_font_face(BFont &font, uint16 face, bool on) 237 { 238 // Special handling for B_REGULAR_FACE, since BFont::SetFace(0) 239 // just doesn't do anything 240 241 if (font.Face() == B_REGULAR_FACE && on) 242 font.SetFace(face); 243 else if ((font.Face() & ~face) == 0 && !on) 244 font.SetFace(B_REGULAR_FACE); 245 else if (on) 246 font.SetFace(font.Face() | face); 247 else 248 font.SetFace(font.Face() & ~face); 249 } 250 251 252 static bool 253 text_runs_are_equal(text_run *a, text_run *b) 254 { 255 if (a == NULL && b == NULL) 256 return true; 257 258 if (a == NULL || b == NULL) 259 return false; 260 261 return a->offset == b->offset 262 && *(uint32*)&a->color == *(uint32*)&b->color 263 && a->font == b->font; 264 } 265 266 267 static text_run * 268 copy_text_run(text_run *run) 269 { 270 static const rgb_color kBlack = {0, 0, 0, 255}; 271 272 text_run *newRun = new text_run(); 273 if (newRun == NULL) 274 throw (status_t)B_NO_MEMORY; 275 276 if (run != NULL) { 277 newRun->offset = run->offset; 278 newRun->font = run->font; 279 newRun->color = run->color; 280 } else { 281 newRun->offset = 0; 282 newRun->color = kBlack; 283 } 284 285 return newRun; 286 } 287 288 289 #if 0 290 void 291 dump_text_run(text_run *run) 292 { 293 if (run == NULL) 294 return; 295 296 printf("run: offset = %ld, color = {%d,%d,%d}, font = ", 297 run->offset, run->color.red, run->color.green, run->color.blue); 298 run->font.PrintToStream(); 299 } 300 #endif 301 302 303 // #pragma mark - 304 305 306 TextOutput::TextOutput(RTF::Header &start, BDataIO *stream, bool processRuns) 307 : RTF::Worker(start), 308 fTarget(stream), 309 fOffset(0), 310 fProcessRuns(processRuns), 311 fCurrentRun(NULL), 312 fApplication(NULL) 313 { 314 // This is not nice, but it's the only we can provide all features on command 315 // line tools that don't create a BApplication - without a BApplication, we 316 // could not support any text styles (colors and fonts) 317 318 if (processRuns && be_app == NULL) 319 fApplication = new BApplication("application/x-vnd.Haiku-RTF-Translator"); 320 } 321 322 323 TextOutput::~TextOutput() 324 { 325 delete fApplication; 326 } 327 328 329 size_t 330 TextOutput::Length() const 331 { 332 return (size_t)fOffset; 333 } 334 335 336 void * 337 TextOutput::FlattenedRunArray(int32 &_size) 338 { 339 // are there any styles? 340 if (fRuns.CountItems() == 0) { 341 _size = 0; 342 return NULL; 343 } 344 345 // create array 346 347 text_run_array *array = (text_run_array *)malloc(sizeof(text_run_array) 348 + sizeof(text_run) * (fRuns.CountItems() - 1)); 349 if (array == NULL) 350 throw (status_t)B_NO_MEMORY; 351 352 array->count = fRuns.CountItems(); 353 354 for (int32 i = 0; i < array->count; i++) { 355 text_run *run = (text_run *)fRuns.RemoveItem(0L); 356 array->runs[i] = *run; 357 delete run; 358 } 359 360 return BTextView::FlattenRunArray(array, &_size); 361 } 362 363 364 void 365 TextOutput::PrepareTextRun(text_run *run) throw (status_t) 366 { 367 if (run != NULL && fOffset == run->offset) 368 return; 369 370 text_run *newRun = copy_text_run(run); 371 372 newRun->offset = fOffset; 373 374 fRuns.AddItem(newRun); 375 fCurrentRun = newRun; 376 } 377 378 379 void 380 TextOutput::Group(RTF::Group *group) 381 { 382 if (group->Destination() != RTF::TEXT_DESTINATION) { 383 Skip(); 384 return; 385 } 386 387 if (!fProcessRuns) 388 return; 389 390 // We only push a copy of the run on the stack because the current 391 // run may still be changed in the new group -- later, we'll just 392 // see if that was the case, and either use the copied one then, 393 // or throw it away 394 text_run *run = NULL; 395 if (fCurrentRun != NULL) 396 run = copy_text_run(fCurrentRun); 397 398 fGroupStack.Push(run); 399 } 400 401 402 void 403 TextOutput::GroupEnd(RTF::Group *group) 404 { 405 if (!fProcessRuns) 406 return; 407 408 text_run *last; 409 fGroupStack.Pop(&last); 410 411 // has the style been changed? 412 if (!text_runs_are_equal(last, fCurrentRun)) { 413 if (fCurrentRun != NULL && last != NULL 414 && fCurrentRun->offset == fOffset) { 415 // replace the current one, we don't need it anymore 416 fCurrentRun->color = last->color; 417 fCurrentRun->font = last->font; 418 delete last; 419 } else if (last) { 420 // adopt the text_run from the previous group 421 last->offset = fOffset; 422 fRuns.AddItem(last); 423 fCurrentRun = last; 424 } 425 } else 426 delete last; 427 } 428 429 430 void 431 TextOutput::Command(RTF::Command *command) 432 { 433 if (!fProcessRuns) { 434 fOffset += process_command(fContext, command, fTarget); 435 return; 436 } 437 438 const char *name = command->Name(); 439 440 if (!strcmp(name, "cf")) { 441 // foreground color 442 PrepareTextRun(fCurrentRun); 443 fCurrentRun->color = Start().Color(command->Option()); 444 } else if (!strcmp(name, "b") 445 || !strcmp(name, "embo") || !strcmp(name, "impr")) { 446 // bold style ("emboss" and "engrave" are currently the same, too) 447 PrepareTextRun(fCurrentRun); 448 set_font_face(fCurrentRun->font, B_BOLD_FACE, command->Option() != 0); 449 } else if (!strcmp(name, "i")) { 450 // bold style 451 PrepareTextRun(fCurrentRun); 452 set_font_face(fCurrentRun->font, B_ITALIC_FACE, command->Option() != 0); 453 } else if (!strcmp(name, "ul")) { 454 // bold style 455 PrepareTextRun(fCurrentRun); 456 set_font_face(fCurrentRun->font, B_UNDERSCORE_FACE, command->Option() != 0); 457 } else if (!strcmp(name, "fs")) { 458 // font size in half points 459 PrepareTextRun(fCurrentRun); 460 fCurrentRun->font.SetSize(command->Option() / 2.0); 461 } else if (!strcmp(name, "plain")) { 462 // reset font to plain style 463 PrepareTextRun(fCurrentRun); 464 fCurrentRun->font = be_plain_font; 465 } else if (!strcmp(name, "f")) { 466 // font number 467 RTF::Group *fonts = Start().FindGroup("fonttbl"); 468 if (fonts == NULL) 469 return; 470 471 PrepareTextRun(fCurrentRun); 472 BFont font; 473 // missing font info will be replaced by the default font 474 475 RTF::Command *info; 476 for (int32 index = 0; (info = fonts->FindDefinition("f", index)) != NULL; index++) { 477 if (info->Option() != command->Option()) 478 continue; 479 480 // ToDo: really try to choose font by name and serif/sans-serif 481 // ToDo: the font list should be built before once 482 483 // For now, it only differentiates fixed fonts from proportional ones 484 if (fonts->FindDefinition("fmodern", index) != NULL) 485 font = be_fixed_font; 486 } 487 488 font_family family; 489 font_style style; 490 font.GetFamilyAndStyle(&family, &style); 491 492 fCurrentRun->font.SetFamilyAndFace(family, fCurrentRun->font.Face()); 493 } else 494 fOffset += process_command(fContext, command, fTarget); 495 } 496 497 498 void 499 TextOutput::Text(RTF::Text *text) 500 { 501 fOffset += write_text(fContext, text->String(), text->Length(), fTarget); 502 } 503 504 505 // #pragma mark - 506 507 508 status_t 509 convert_to_stxt(RTF::Header &header, BDataIO &target) 510 { 511 // count text bytes 512 513 size_t textSize = 0; 514 515 try { 516 TextOutput counter(header, NULL, false); 517 518 counter.Work(); 519 textSize = counter.Length(); 520 } catch (status_t status) { 521 return status; 522 } 523 524 // put out header 525 526 TranslatorStyledTextStreamHeader stxtHeader; 527 stxtHeader.header.magic = 'STXT'; 528 stxtHeader.header.header_size = sizeof(TranslatorStyledTextStreamHeader); 529 stxtHeader.header.data_size = 0; 530 stxtHeader.version = 100; 531 status_t status = swap_data(B_UINT32_TYPE, &stxtHeader, sizeof(stxtHeader), 532 B_SWAP_HOST_TO_BENDIAN); 533 if (status != B_OK) 534 return status; 535 536 ssize_t written = target.Write(&stxtHeader, sizeof(stxtHeader)); 537 if (written < B_OK) 538 return written; 539 if (written != sizeof(stxtHeader)) 540 return B_IO_ERROR; 541 542 TranslatorStyledTextTextHeader textHeader; 543 textHeader.header.magic = 'TEXT'; 544 textHeader.header.header_size = sizeof(TranslatorStyledTextTextHeader); 545 textHeader.header.data_size = textSize; 546 textHeader.charset = B_UNICODE_UTF8; 547 status = swap_data(B_UINT32_TYPE, &textHeader, sizeof(textHeader), 548 B_SWAP_HOST_TO_BENDIAN); 549 if (status != B_OK) 550 return status; 551 552 written = target.Write(&textHeader, sizeof(textHeader)); 553 if (written < B_OK) 554 return written; 555 if (written != sizeof(textHeader)) 556 return B_IO_ERROR; 557 558 // put out main text 559 560 void *flattenedRuns = NULL; 561 int32 flattenedSize = 0; 562 563 try { 564 TextOutput output(header, &target, true); 565 566 output.Work(); 567 flattenedRuns = output.FlattenedRunArray(flattenedSize); 568 } catch (status_t status) { 569 return status; 570 } 571 572 // put out styles 573 574 TranslatorStyledTextStyleHeader styleHeader; 575 styleHeader.header.magic = 'STYL'; 576 styleHeader.header.header_size = sizeof(TranslatorStyledTextStyleHeader); 577 styleHeader.header.data_size = flattenedSize; 578 styleHeader.apply_offset = 0; 579 styleHeader.apply_length = textSize; 580 581 status = swap_data(B_UINT32_TYPE, &styleHeader, sizeof(styleHeader), 582 B_SWAP_HOST_TO_BENDIAN); 583 if (status != B_OK) 584 return status; 585 586 written = target.Write(&styleHeader, sizeof(styleHeader)); 587 if (written < B_OK) 588 return written; 589 if (written != sizeof(styleHeader)) 590 return B_IO_ERROR; 591 592 // output actual style information 593 written = target.Write(flattenedRuns, flattenedSize); 594 595 free(flattenedRuns); 596 597 if (written < B_OK) 598 return written; 599 if (written != flattenedSize) 600 return B_IO_ERROR; 601 602 return B_OK; 603 } 604 605 606 status_t 607 convert_to_plain_text(RTF::Header &header, BPositionIO &target) 608 { 609 // put out main text 610 611 void *flattenedRuns = NULL; 612 int32 flattenedSize = 0; 613 614 // ToDo: this is not really nice, we should adopt the BPositionIO class 615 // from Dano/Zeta which has meta data support 616 BNode *node = dynamic_cast<BNode *>(&target); 617 618 try { 619 TextOutput output(header, &target, node != NULL); 620 621 output.Work(); 622 flattenedRuns = output.FlattenedRunArray(flattenedSize); 623 } catch (status_t status) { 624 return status; 625 } 626 627 if (node == NULL) { 628 // we can't write the styles 629 return B_OK; 630 } 631 632 // put out styles 633 634 ssize_t written = node->WriteAttr("styles", B_RAW_TYPE, 0, flattenedRuns, flattenedSize); 635 if (written >= B_OK && written != flattenedSize) 636 node->RemoveAttr("styles"); 637 638 free(flattenedRuns); 639 return B_OK; 640 } 641