1 /* 2 * Copyright 2004-2009, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "convert.h" 8 #include "Stack.h" 9 10 #include <TranslatorFormats.h> 11 12 #include <Application.h> 13 #include <TextView.h> 14 #include <TypeConstants.h> 15 #include <ByteOrder.h> 16 #include <File.h> 17 #include <Font.h> 18 19 #include <AutoDeleter.h> 20 21 #include <stdlib.h> 22 #include <stdio.h> 23 #include <string.h> 24 25 26 struct conversion_context { 27 conversion_context() 28 { 29 Reset(); 30 } 31 32 void Reset(); 33 34 int32 section; 35 int32 page; 36 int32 start_page; 37 int32 first_line_indent; 38 bool new_line; 39 }; 40 41 42 class TextOutput : public RTF::Worker { 43 public: 44 TextOutput(RTF::Header &start, BDataIO *stream, bool processRuns); 45 ~TextOutput(); 46 47 size_t Length() const; 48 void *FlattenedRunArray(int32 &size); 49 50 protected: 51 virtual void Group(RTF::Group *group); 52 virtual void GroupEnd(RTF::Group *group); 53 virtual void Command(RTF::Command *command); 54 virtual void Text(RTF::Text *text); 55 56 private: 57 void PrepareTextRun(text_run *current) throw (status_t); 58 59 BDataIO *fTarget; 60 int32 fOffset; 61 conversion_context fContext; 62 Stack<text_run *> fGroupStack; 63 bool fProcessRuns; 64 BList fRuns; 65 text_run *fCurrentRun; 66 BApplication *fApplication; 67 }; 68 69 70 void 71 conversion_context::Reset() 72 { 73 section = 1; 74 page = 1; 75 start_page = page; 76 first_line_indent = 0; 77 new_line = true; 78 } 79 80 81 // #pragma mark - 82 83 84 static size_t 85 write_text(conversion_context &context, const char *text, size_t length, 86 BDataIO *target = NULL) throw (status_t) 87 { 88 size_t prefix = 0; 89 if (context.new_line) { 90 prefix = context.first_line_indent; 91 context.new_line = false; 92 } 93 94 if (target == NULL) 95 return prefix + length; 96 97 for (uint32 i = 0; i < prefix; i++) { 98 write_text(context, " ", 1, target); 99 } 100 101 ssize_t written = target->Write(text, length); 102 if (written < B_OK) 103 throw (status_t)written; 104 else if ((size_t)written != length) 105 throw (status_t)B_IO_ERROR; 106 107 return prefix + length; 108 } 109 110 111 static size_t 112 write_text(conversion_context &context, const char *text, 113 BDataIO *target = NULL) throw (status_t) 114 { 115 return write_text(context, text, strlen(text), target); 116 } 117 118 119 static size_t 120 next_line(conversion_context &context, const char *prefix, 121 BDataIO *target) throw (status_t) 122 { 123 size_t length = strlen(prefix); 124 context.new_line = true; 125 126 if (target != NULL) { 127 ssize_t written = target->Write(prefix, length); 128 if (written < B_OK) 129 throw (status_t)written; 130 else if ((size_t)written != length) 131 throw (status_t)B_IO_ERROR; 132 } 133 134 return length; 135 } 136 137 138 static size_t 139 write_unicode_char(conversion_context &context, uint32 c, 140 BDataIO *target) throw (status_t) 141 { 142 size_t length = 1; 143 char bytes[4]; 144 145 if (c < 0x80) 146 bytes[0] = c; 147 else if (c < 0x800) { 148 bytes[0] = 0xc0 | (c >> 6); 149 bytes[1] = 0x80 | (c & 0x3f); 150 length = 2; 151 } else if (c < 0x10000) { 152 bytes[0] = 0xe0 | (c >> 12); 153 bytes[1] = 0x80 | ((c >> 6) & 0x3f); 154 bytes[2] = 0x80 | (c & 0x3f); 155 length = 3; 156 } else if (c <= 0x10ffff) { 157 bytes[0] = 0xf0 | (c >> 18); 158 bytes[1] = 0x80 | ((c >> 12) & 0x3f); 159 bytes[2] = 0x80 | ((c >> 6) & 0x3f); 160 bytes[3] = 0x80 | (c & 0x3f); 161 length = 4; 162 } 163 164 return write_text(context, bytes, length, target); 165 } 166 167 168 static size_t 169 process_command(conversion_context &context, RTF::Command *command, 170 BDataIO *target) throw (status_t) 171 { 172 const char *name = command->Name(); 173 174 if (!strcmp(name, "par") || !strcmp(name, "line")) { 175 // paragraph ended 176 return next_line(context, "\n", target); 177 } 178 if (!strcmp(name, "sect")) { 179 // section ended 180 context.section++; 181 return next_line(context, "\n", target); 182 } 183 if (!strcmp(name, "page")) { 184 // we just insert two carriage returns for a page break 185 context.page++; 186 return next_line(context, "\n\n", target); 187 } 188 if (!strcmp(name, "tab")) { 189 return write_text(context, "\t", target); 190 } 191 if (!strcmp(name, "'")) { 192 return write_unicode_char(context, command->Option(), target); 193 } 194 195 if (!strcmp(name, "pard")) { 196 // reset paragraph 197 context.first_line_indent = 0; 198 return 0; 199 } 200 if (!strcmp(name, "fi") || !strcmp(name, "cufi")) { 201 // "cufi" first line indent in 1/100 space steps 202 // "fi" is most probably specified in 1/20 pts 203 // Currently, we don't differentiate between the two... 204 context.first_line_indent = (command->Option() + 50) / 100; 205 if (context.first_line_indent < 0) 206 context.first_line_indent = 0; 207 if (context.first_line_indent > 8) 208 context.first_line_indent = 8; 209 210 return 0; 211 } 212 213 // document variables 214 215 if (!strcmp(name, "sectnum")) { 216 char buffer[64]; 217 snprintf(buffer, sizeof(buffer), "%" B_PRId32, context.section); 218 return write_text(context, buffer, target); 219 } 220 if (!strcmp(name, "pgnstarts")) { 221 context.start_page = command->HasOption() ? command->Option() : 1; 222 return 0; 223 } 224 if (!strcmp(name, "pgnrestart")) { 225 context.page = context.start_page; 226 return 0; 227 } 228 if (!strcmp(name, "chpgn")) { 229 char buffer[64]; 230 snprintf(buffer, sizeof(buffer), "%" B_PRId32, context.page); 231 return write_text(context, buffer, target); 232 } 233 return 0; 234 } 235 236 237 static void 238 set_font_face(BFont &font, uint16 face, bool on) 239 { 240 // Special handling for B_REGULAR_FACE, since BFont::SetFace(0) 241 // just doesn't do anything 242 243 if (font.Face() == B_REGULAR_FACE && on) 244 font.SetFace(face); 245 else if ((font.Face() & ~face) == 0 && !on) 246 font.SetFace(B_REGULAR_FACE); 247 else if (on) 248 font.SetFace(font.Face() | face); 249 else 250 font.SetFace(font.Face() & ~face); 251 } 252 253 254 static bool 255 text_runs_are_equal(text_run *a, text_run *b) 256 { 257 if (a == NULL && b == NULL) 258 return true; 259 260 if (a == NULL || b == NULL) 261 return false; 262 263 return a->offset == b->offset 264 && *(uint32*)&a->color == *(uint32*)&b->color 265 && a->font == b->font; 266 } 267 268 269 static text_run * 270 copy_text_run(text_run *run) 271 { 272 static const rgb_color kBlack = {0, 0, 0, 255}; 273 274 text_run *newRun = new text_run(); 275 if (newRun == NULL) 276 throw (status_t)B_NO_MEMORY; 277 278 if (run != NULL) { 279 newRun->offset = run->offset; 280 newRun->font = run->font; 281 newRun->color = run->color; 282 } else { 283 newRun->offset = 0; 284 newRun->color = kBlack; 285 } 286 287 return newRun; 288 } 289 290 291 #if 0 292 void 293 dump_text_run(text_run *run) 294 { 295 if (run == NULL) 296 return; 297 298 printf("run: offset = %ld, color = {%d,%d,%d}, font = ", 299 run->offset, run->color.red, run->color.green, run->color.blue); 300 run->font.PrintToStream(); 301 } 302 #endif 303 304 305 // #pragma mark - 306 307 308 TextOutput::TextOutput(RTF::Header &start, BDataIO *stream, bool processRuns) 309 : RTF::Worker(start), 310 fTarget(stream), 311 fOffset(0), 312 fProcessRuns(processRuns), 313 fCurrentRun(NULL), 314 fApplication(NULL) 315 { 316 // This is not nice, but it's the only we can provide all features on command 317 // line tools that don't create a BApplication - without a BApplication, we 318 // could not support any text styles (colors and fonts) 319 320 if (processRuns && be_app == NULL) 321 fApplication = new BApplication("application/x-vnd.Haiku-RTFTranslator"); 322 } 323 324 325 TextOutput::~TextOutput() 326 { 327 delete fApplication; 328 } 329 330 331 size_t 332 TextOutput::Length() const 333 { 334 return (size_t)fOffset; 335 } 336 337 338 void * 339 TextOutput::FlattenedRunArray(int32 &_size) 340 { 341 // are there any styles? 342 if (fRuns.CountItems() == 0) { 343 _size = 0; 344 return NULL; 345 } 346 347 // create array 348 349 text_run_array *array = (text_run_array *)malloc(sizeof(text_run_array) 350 + sizeof(text_run) * (fRuns.CountItems() - 1)); 351 if (array == NULL) 352 throw (status_t)B_NO_MEMORY; 353 354 array->count = fRuns.CountItems(); 355 356 for (int32 i = 0; i < array->count; i++) { 357 text_run *run = (text_run *)fRuns.RemoveItem((int32)0); 358 array->runs[i] = *run; 359 delete run; 360 } 361 362 void *flattenedRunArray = BTextView::FlattenRunArray(array, &_size); 363 364 free(array); 365 366 return flattenedRunArray; 367 } 368 369 370 void 371 TextOutput::PrepareTextRun(text_run *run) throw (status_t) 372 { 373 if (run != NULL && fOffset == run->offset) 374 return; 375 376 text_run *newRun = copy_text_run(run); 377 378 newRun->offset = fOffset; 379 380 fRuns.AddItem(newRun); 381 fCurrentRun = newRun; 382 } 383 384 385 void 386 TextOutput::Group(RTF::Group *group) 387 { 388 if (group->Destination() != RTF::TEXT_DESTINATION) { 389 Skip(); 390 return; 391 } 392 393 if (!fProcessRuns) 394 return; 395 396 // We only push a copy of the run on the stack because the current 397 // run may still be changed in the new group -- later, we'll just 398 // see if that was the case, and either use the copied one then, 399 // or throw it away 400 text_run *run = NULL; 401 if (fCurrentRun != NULL) 402 run = copy_text_run(fCurrentRun); 403 404 fGroupStack.Push(run); 405 } 406 407 408 void 409 TextOutput::GroupEnd(RTF::Group *group) 410 { 411 if (!fProcessRuns) 412 return; 413 414 text_run *last = NULL; 415 fGroupStack.Pop(&last); 416 417 // has the style been changed? 418 if (!text_runs_are_equal(last, fCurrentRun)) { 419 if (fCurrentRun != NULL && last != NULL 420 && fCurrentRun->offset == fOffset) { 421 // replace the current one, we don't need it anymore 422 fCurrentRun->color = last->color; 423 fCurrentRun->font = last->font; 424 delete last; 425 } else if (last) { 426 // adopt the text_run from the previous group 427 last->offset = fOffset; 428 fRuns.AddItem(last); 429 fCurrentRun = last; 430 } 431 } else 432 delete last; 433 } 434 435 436 void 437 TextOutput::Command(RTF::Command *command) 438 { 439 if (!fProcessRuns) { 440 fOffset += process_command(fContext, command, fTarget); 441 return; 442 } 443 444 const char *name = command->Name(); 445 446 if (!strcmp(name, "cf")) { 447 // foreground color 448 PrepareTextRun(fCurrentRun); 449 fCurrentRun->color = Start().Color(command->Option()); 450 } else if (!strcmp(name, "b") 451 || !strcmp(name, "embo") || !strcmp(name, "impr")) { 452 // bold style ("emboss" and "engrave" are currently the same, too) 453 PrepareTextRun(fCurrentRun); 454 set_font_face(fCurrentRun->font, B_BOLD_FACE, command->Option() != 0); 455 } else if (!strcmp(name, "i")) { 456 // bold style 457 PrepareTextRun(fCurrentRun); 458 set_font_face(fCurrentRun->font, B_ITALIC_FACE, command->Option() != 0); 459 } else if (!strcmp(name, "ul")) { 460 // bold style 461 PrepareTextRun(fCurrentRun); 462 set_font_face(fCurrentRun->font, B_UNDERSCORE_FACE, command->Option() != 0); 463 } else if (!strcmp(name, "fs")) { 464 // font size in half points 465 PrepareTextRun(fCurrentRun); 466 fCurrentRun->font.SetSize(command->Option() / 2.0); 467 } else if (!strcmp(name, "plain")) { 468 // reset font to plain style 469 PrepareTextRun(fCurrentRun); 470 fCurrentRun->font = be_plain_font; 471 } else if (!strcmp(name, "f")) { 472 // font number 473 RTF::Group *fonts = Start().FindGroup("fonttbl"); 474 if (fonts == NULL) 475 return; 476 477 PrepareTextRun(fCurrentRun); 478 BFont font; 479 // missing font info will be replaced by the default font 480 481 RTF::Command *info; 482 for (int32 index = 0; (info = fonts->FindDefinition("f", index)) != NULL; index++) { 483 if (info->Option() != command->Option()) 484 continue; 485 486 // ToDo: really try to choose font by name and serif/sans-serif 487 // ToDo: the font list should be built before once 488 489 // For now, it only differentiates fixed fonts from proportional ones 490 if (fonts->FindDefinition("fmodern", index) != NULL) 491 font = be_fixed_font; 492 } 493 494 font_family family; 495 font_style style; 496 font.GetFamilyAndStyle(&family, &style); 497 498 fCurrentRun->font.SetFamilyAndFace(family, fCurrentRun->font.Face()); 499 } else 500 fOffset += process_command(fContext, command, fTarget); 501 } 502 503 504 void 505 TextOutput::Text(RTF::Text *text) 506 { 507 fOffset += write_text(fContext, text->String(), text->Length(), fTarget); 508 } 509 510 511 // #pragma mark - 512 513 514 status_t 515 convert_to_stxt(RTF::Header &header, BDataIO &target) 516 { 517 // count text bytes 518 519 size_t textSize = 0; 520 521 try { 522 TextOutput counter(header, NULL, false); 523 524 counter.Work(); 525 textSize = counter.Length(); 526 } catch (status_t status) { 527 return status; 528 } 529 530 // put out header 531 532 TranslatorStyledTextStreamHeader stxtHeader; 533 stxtHeader.header.magic = 'STXT'; 534 stxtHeader.header.header_size = sizeof(TranslatorStyledTextStreamHeader); 535 stxtHeader.header.data_size = 0; 536 stxtHeader.version = 100; 537 status_t status = swap_data(B_UINT32_TYPE, &stxtHeader, sizeof(stxtHeader), 538 B_SWAP_HOST_TO_BENDIAN); 539 if (status != B_OK) 540 return status; 541 542 ssize_t written = target.Write(&stxtHeader, sizeof(stxtHeader)); 543 if (written < B_OK) 544 return written; 545 if (written != sizeof(stxtHeader)) 546 return B_IO_ERROR; 547 548 TranslatorStyledTextTextHeader textHeader; 549 textHeader.header.magic = 'TEXT'; 550 textHeader.header.header_size = sizeof(TranslatorStyledTextTextHeader); 551 textHeader.header.data_size = textSize; 552 textHeader.charset = B_UNICODE_UTF8; 553 status = swap_data(B_UINT32_TYPE, &textHeader, sizeof(textHeader), 554 B_SWAP_HOST_TO_BENDIAN); 555 if (status != B_OK) 556 return status; 557 558 written = target.Write(&textHeader, sizeof(textHeader)); 559 if (written < B_OK) 560 return written; 561 if (written != sizeof(textHeader)) 562 return B_IO_ERROR; 563 564 // put out main text 565 566 void *flattenedRuns = NULL; 567 int32 flattenedSize = 0; 568 569 try { 570 TextOutput output(header, &target, true); 571 572 output.Work(); 573 flattenedRuns = output.FlattenedRunArray(flattenedSize); 574 } catch (status_t status) { 575 return status; 576 } 577 578 BPrivate::MemoryDeleter _(flattenedRuns); 579 580 // put out styles 581 582 TranslatorStyledTextStyleHeader styleHeader; 583 styleHeader.header.magic = 'STYL'; 584 styleHeader.header.header_size = sizeof(TranslatorStyledTextStyleHeader); 585 styleHeader.header.data_size = flattenedSize; 586 styleHeader.apply_offset = 0; 587 styleHeader.apply_length = textSize; 588 589 status = swap_data(B_UINT32_TYPE, &styleHeader, sizeof(styleHeader), 590 B_SWAP_HOST_TO_BENDIAN); 591 if (status != B_OK) 592 return status; 593 594 written = target.Write(&styleHeader, sizeof(styleHeader)); 595 if (written < B_OK) 596 return written; 597 if (written != sizeof(styleHeader)) 598 return B_IO_ERROR; 599 600 // output actual style information 601 written = target.Write(flattenedRuns, flattenedSize); 602 603 if (written < B_OK) 604 return written; 605 if (written != flattenedSize) 606 return B_IO_ERROR; 607 608 return B_OK; 609 } 610 611 612 status_t 613 convert_to_plain_text(RTF::Header &header, BPositionIO &target) 614 { 615 // put out main text 616 617 void *flattenedRuns = NULL; 618 int32 flattenedSize = 0; 619 620 // TODO: this is not really nice, we should adopt the BPositionIO class 621 // from Dano/Zeta which has meta data support 622 BFile *file = dynamic_cast<BFile *>(&target); 623 624 try { 625 TextOutput output(header, &target, file != NULL); 626 627 output.Work(); 628 flattenedRuns = output.FlattenedRunArray(flattenedSize); 629 } catch (status_t status) { 630 return status; 631 } 632 633 if (file == NULL) { 634 // we can't write the styles 635 return B_OK; 636 } 637 638 // put out styles 639 640 ssize_t written = file->WriteAttr("styles", B_RAW_TYPE, 0, flattenedRuns, 641 flattenedSize); 642 if (written >= B_OK && written != flattenedSize) 643 file->RemoveAttr("styles"); 644 645 free(flattenedRuns); 646 return B_OK; 647 } 648