1 /* 2 * Copyright 2004-2010, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "RTF.h" 8 9 #include <ctype.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 14 #include <DataIO.h> 15 16 17 //#define TRACE_RTF 18 #ifdef TRACE_RTF 19 # define TRACE(x...) printf(x) 20 #else 21 # define TRACE(x...) ; 22 #endif 23 24 25 static const char *kDestinationControlWords[] = { 26 "aftncn", "aftnsep", "aftnsepc", "annotation", "atnauthor", "atndate", 27 "atnicn", "atnid", "atnparent", "atnref", "atntime", "atrfend", 28 "atrfstart", "author", "background", "bkmkend", "buptim", "colortbl", 29 "comment", "creatim", "do", "doccomm", "docvar", "fonttbl", "footer", 30 "footerf", "footerl", "footerr", "footnote", "ftncn", "ftnsep", 31 "ftnsepc", "header", "headerf", "headerl", "headerr", "info", 32 "keywords", "operator", "pict", "printim", "private1", "revtim", 33 "rxe", "stylesheet", "subject", "tc", "title", "txe", "xe", 34 }; 35 36 static char read_char(BDataIO &stream, bool endOfFileAllowed = false); 37 static int32 parse_integer(char first, BDataIO &stream, char &_last, int32 base = 10); 38 39 40 using namespace RTF; 41 42 43 static char 44 read_char(BDataIO &stream, bool endOfFileAllowed) 45 { 46 char c; 47 ssize_t bytesRead = stream.Read(&c, 1); 48 49 if (bytesRead < B_OK) 50 throw (status_t)bytesRead; 51 52 if (bytesRead == 0 && !endOfFileAllowed) 53 throw (status_t)B_ERROR; 54 55 return c; 56 } 57 58 59 static int32 60 parse_integer(char first, BDataIO &stream, char &_last, int32 base) 61 { 62 const char *kDigits = "0123456789abcdef"; 63 int32 integer = 0; 64 int32 count = 0; 65 66 char digit = first; 67 68 if (digit == '\0') 69 digit = read_char(stream); 70 71 while (true) { 72 int32 pos = 0; 73 for (; pos < base; pos++) { 74 if (kDigits[pos] == tolower(digit)) { 75 integer = integer * base + pos; 76 count++; 77 break; 78 } 79 } 80 if (pos == base) { 81 _last = digit; 82 goto out; 83 } 84 85 digit = read_char(stream); 86 } 87 88 out: 89 if (count == 0) 90 throw (status_t)B_BAD_TYPE; 91 92 return integer; 93 } 94 95 96 static int 97 string_array_compare(const char *key, const char **array) 98 { 99 return strcmp(key, array[0]); 100 } 101 102 103 static void 104 dump(Element &element, int32 level = 0) 105 { 106 printf("%03" B_PRId32 " (%p):", level, &element); 107 for (int32 i = 0; i < level; i++) 108 printf(" "); 109 110 if (RTF::Header *header = dynamic_cast<RTF::Header *>(&element)) { 111 printf("<RTF header, major version %" B_PRId32 ">\n", header->Version()); 112 } else if (RTF::Command *command = dynamic_cast<RTF::Command *>(&element)) { 113 printf("<Command: %s", command->Name()); 114 if (command->HasOption()) 115 printf(", Option %" B_PRId32, command->Option()); 116 puts(">"); 117 } else if (RTF::Text *text = dynamic_cast<RTF::Text *>(&element)) { 118 printf("<Text>"); 119 puts(text->String()); 120 } else if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element)) 121 printf("<Group \"%s\">\n", group->Name()); 122 123 if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element)) { 124 for (uint32 i = 0; i < group->CountElements(); i++) 125 dump(*group->ElementAt(i), level + 1); 126 } 127 } 128 129 130 // #pragma mark - 131 132 133 Parser::Parser(BPositionIO &stream) 134 : 135 fStream(&stream, 65536, false), 136 fIdentified(false) 137 { 138 } 139 140 141 status_t 142 Parser::Identify() 143 { 144 char header[5]; 145 if (fStream.Read(header, sizeof(header)) < (ssize_t)sizeof(header)) 146 return B_IO_ERROR; 147 148 if (strncmp(header, "{\\rtf", 5)) 149 return B_BAD_TYPE; 150 151 fIdentified = true; 152 return B_OK; 153 } 154 155 156 status_t 157 Parser::Parse(Header &header) 158 { 159 if (!fIdentified && Identify() != B_OK) 160 return B_BAD_TYPE; 161 162 try { 163 int32 openBrackets = 1; 164 165 // since we already preparsed parts of the RTF header, the header 166 // is handled here directly 167 char last; 168 header.Parse('\0', fStream, last); 169 170 Group *parent = &header; 171 char c = last; 172 173 while (true) { 174 Element *element = NULL; 175 176 // we'll just ignore the end of the stream 177 if (parent == NULL) 178 return B_OK; 179 180 switch (c) { 181 case '{': 182 openBrackets++; 183 parent->AddElement(element = new Group()); 184 parent = static_cast<Group *>(element); 185 break; 186 187 case '\\': 188 parent->AddElement(element = new Command()); 189 break; 190 191 case '}': 192 openBrackets--; 193 parent->DetermineDestination(); 194 parent = parent->Parent(); 195 // supposed to fall through 196 case '\n': 197 case '\r': 198 { 199 ssize_t bytesRead = fStream.Read(&c, 1); 200 if (bytesRead < B_OK) 201 throw (status_t)bytesRead; 202 else if (bytesRead != 1) { 203 // this is the only valid exit status 204 if (openBrackets == 0) 205 return B_OK; 206 207 throw (status_t)B_ERROR; 208 } 209 continue; 210 } 211 212 default: 213 parent->AddElement(element = new Text()); 214 break; 215 } 216 217 if (element == NULL) 218 throw (status_t)B_ERROR; 219 220 element->Parse(c, fStream, last); 221 c = last; 222 } 223 } catch (status_t status) { 224 return status; 225 } 226 227 return B_OK; 228 } 229 230 231 // #pragma mark - 232 233 234 Element::Element() 235 : 236 fParent(NULL) 237 { 238 } 239 240 241 Element::~Element() 242 { 243 } 244 245 246 void 247 Element::SetParent(Group *parent) 248 { 249 fParent = parent; 250 } 251 252 253 Group * 254 Element::Parent() const 255 { 256 return fParent; 257 } 258 259 260 bool 261 Element::IsDefinitionDelimiter() 262 { 263 return false; 264 } 265 266 267 void 268 Element::PrintToStream(int32 level) 269 { 270 dump(*this, level); 271 } 272 273 274 // #pragma mark - 275 276 277 Group::Group() 278 : 279 fDestination(TEXT_DESTINATION) 280 { 281 } 282 283 284 Group::~Group() 285 { 286 Element *element; 287 while ((element = (Element *)fElements.RemoveItem((int32)0)) != NULL) { 288 delete element; 289 } 290 } 291 292 293 void 294 Group::Parse(char first, BDataIO &stream, char &last) 295 { 296 if (first == '\0') 297 first = read_char(stream); 298 299 if (first != '{') 300 throw (status_t)B_BAD_TYPE; 301 302 last = read_char(stream); 303 } 304 305 306 status_t 307 Group::AddElement(Element *element) 308 { 309 if (element == NULL) 310 return B_BAD_VALUE; 311 312 if (fElements.AddItem(element)) { 313 element->SetParent(this); 314 return B_OK; 315 } 316 317 return B_NO_MEMORY; 318 } 319 320 321 uint32 322 Group::CountElements() const 323 { 324 return (uint32)fElements.CountItems(); 325 } 326 327 328 Element * 329 Group::ElementAt(uint32 index) const 330 { 331 return static_cast<Element *>(fElements.ItemAt(index)); 332 } 333 334 335 Element * 336 Group::FindDefinitionStart(int32 index, int32 *_startIndex) const 337 { 338 if (index < 0) 339 return NULL; 340 341 Element *element; 342 int32 number = 0; 343 for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) { 344 if (number == index) { 345 if (_startIndex) 346 *_startIndex = i; 347 return element; 348 } 349 350 if (element->IsDefinitionDelimiter()) 351 number++; 352 } 353 354 return NULL; 355 } 356 357 358 Command * 359 Group::FindDefinition(const char *name, int32 index) const 360 { 361 int32 startIndex; 362 Element *element = FindDefinitionStart(index, &startIndex); 363 if (element == NULL) 364 return NULL; 365 366 for (uint32 i = startIndex; (element = ElementAt(i)) != NULL; i++) { 367 if (element->IsDefinitionDelimiter()) 368 break; 369 370 if (Command *command = dynamic_cast<Command *>(element)) { 371 if (command != NULL && !strcmp(name, command->Name())) 372 return command; 373 } 374 } 375 376 return NULL; 377 } 378 379 380 Group * 381 Group::FindGroup(const char *name) const 382 { 383 Element *element; 384 for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) { 385 Group *group = dynamic_cast<Group *>(element); 386 if (group == NULL) 387 continue; 388 389 Command *command = dynamic_cast<Command *>(group->ElementAt(0)); 390 if (command != NULL && !strcmp(name, command->Name())) 391 return group; 392 } 393 394 return NULL; 395 } 396 397 398 const char * 399 Group::Name() const 400 { 401 Command *command = dynamic_cast<Command *>(ElementAt(0)); 402 if (command != NULL) 403 return command->Name(); 404 405 return NULL; 406 } 407 408 409 void 410 Group::DetermineDestination() 411 { 412 const char *name = Name(); 413 if (name == NULL) 414 return; 415 416 if (!strcmp(name, "*")) { 417 fDestination = COMMENT_DESTINATION; 418 return; 419 } 420 421 // binary search for destination control words 422 423 if (bsearch(name, kDestinationControlWords, 424 sizeof(kDestinationControlWords) / sizeof(kDestinationControlWords[0]), 425 sizeof(kDestinationControlWords[0]), 426 (int (*)(const void *, const void *))string_array_compare) != NULL) 427 fDestination = OTHER_DESTINATION; 428 } 429 430 431 group_destination 432 Group::Destination() const 433 { 434 return fDestination; 435 } 436 437 438 // #pragma mark - 439 440 441 Header::Header() 442 : 443 fVersion(0) 444 { 445 } 446 447 448 Header::~Header() 449 { 450 } 451 452 453 void 454 Header::Parse(char first, BDataIO &stream, char &last) 455 { 456 // The stream has been peeked into by the parser already, and 457 // only the version follows in the stream -- let's pick it up 458 459 fVersion = parse_integer(first, stream, last); 460 461 // recreate "rtf" command to name this group 462 463 Command *command = new Command(); 464 command->SetName("rtf"); 465 command->SetOption(fVersion); 466 467 AddElement(command); 468 } 469 470 471 int32 472 Header::Version() const 473 { 474 return fVersion; 475 } 476 477 478 const char * 479 Header::Charset() const 480 { 481 Command *command = dynamic_cast<Command *>(ElementAt(1)); 482 if (command == NULL) 483 return NULL; 484 485 return command->Name(); 486 } 487 488 489 rgb_color 490 Header::Color(int32 index) 491 { 492 rgb_color color = {0, 0, 0, 255}; 493 494 Group *colorTable = FindGroup("colortbl"); 495 496 if (colorTable != NULL) { 497 if (Command *gun = colorTable->FindDefinition("red", index)) 498 color.red = gun->Option(); 499 if (Command *gun = colorTable->FindDefinition("green", index)) 500 color.green = gun->Option(); 501 if (Command *gun = colorTable->FindDefinition("blue", index)) 502 color.blue = gun->Option(); 503 } 504 505 return color; 506 } 507 508 509 // #pragma mark - 510 511 512 Text::Text() 513 { 514 } 515 516 517 Text::~Text() 518 { 519 SetTo(NULL); 520 } 521 522 523 bool 524 Text::IsDefinitionDelimiter() 525 { 526 return fText == ";"; 527 } 528 529 530 void 531 Text::Parse(char first, BDataIO &stream, char &last) 532 { 533 char c = first; 534 if (c == '\0') 535 c = read_char(stream); 536 537 if (c == ';') { 538 // definition delimiter 539 fText.SetTo(";"); 540 last = read_char(stream); 541 return; 542 } 543 544 const size_t kBufferSteps = 1; 545 size_t maxSize = kBufferSteps; 546 char *text = fText.LockBuffer(maxSize); 547 if (text == NULL) 548 throw (status_t)B_NO_MEMORY; 549 550 size_t position = 0; 551 552 while (true) { 553 if (c == '\\' || c == '}' || c == '{' || c == ';' || c == '\n' || c == '\r') 554 break; 555 556 if (position >= maxSize) { 557 fText.UnlockBuffer(position); 558 text = fText.LockBuffer(maxSize += kBufferSteps); 559 if (text == NULL) 560 throw (status_t)B_NO_MEMORY; 561 } 562 563 text[position++] = c; 564 565 c = read_char(stream); 566 } 567 fText.UnlockBuffer(position); 568 569 // ToDo: add support for different charsets - right now, only ASCII is supported! 570 // To achieve this, we should just translate everything into UTF-8 here 571 572 last = c; 573 } 574 575 576 status_t 577 Text::SetTo(const char *text) 578 { 579 return fText.SetTo(text) != NULL ? B_OK : B_NO_MEMORY; 580 } 581 582 583 const char * 584 Text::String() const 585 { 586 return fText.String(); 587 } 588 589 590 uint32 591 Text::Length() const 592 { 593 return fText.Length(); 594 } 595 596 597 // #pragma mark - 598 599 600 Command::Command() 601 : 602 fName(NULL), 603 fHasOption(false), 604 fOption(-1) 605 { 606 } 607 608 609 Command::~Command() 610 { 611 } 612 613 614 void 615 Command::Parse(char first, BDataIO &stream, char &last) 616 { 617 if (first == '\0') 618 first = read_char(stream); 619 620 if (first != '\\') 621 throw (status_t)B_BAD_TYPE; 622 623 // get name 624 char name[kCommandLength]; 625 size_t length = 0; 626 char c; 627 while (isalpha(c = read_char(stream))) { 628 name[length++] = c; 629 if (length >= kCommandLength - 1) 630 throw (status_t)B_BAD_TYPE; 631 } 632 633 if (length == 0) { 634 if (c == '\n' || c == '\r') { 635 // we're a hard return 636 fName.SetTo("par"); 637 } else 638 fName.SetTo(c, 1); 639 640 // read over character 641 c = read_char(stream); 642 } else 643 fName.SetTo(name, length); 644 645 TRACE("command: %s\n", fName.String()); 646 647 // parse numeric option 648 649 if (c == '-') 650 c = read_char(stream); 651 652 last = c; 653 654 if (fName == "'") { 655 // hexadecimal 656 char bytes[2]; 657 bytes[0] = read_char(stream); 658 bytes[1] = '\0'; 659 BMemoryIO memory(bytes, 2); 660 661 SetOption(parse_integer(c, memory, last, 16)); 662 last = read_char(stream); 663 } else { 664 // decimal 665 if (isdigit(c)) 666 SetOption(parse_integer(c, stream, last)); 667 668 // a space delimiter is eaten up by the command 669 if (isspace(last)) 670 last = read_char(stream); 671 } 672 673 if (HasOption()) 674 TRACE(" option: %ld\n", fOption); 675 } 676 677 678 status_t 679 Command::SetName(const char *name) 680 { 681 return fName.SetTo(name) != NULL ? B_OK : B_NO_MEMORY; 682 } 683 684 685 const char * 686 Command::Name() 687 { 688 return fName.String(); 689 } 690 691 692 void 693 Command::UnsetOption() 694 { 695 fHasOption = false; 696 fOption = -1; 697 } 698 699 700 void 701 Command::SetOption(int32 option) 702 { 703 fOption = option; 704 fHasOption = true; 705 } 706 707 708 bool 709 Command::HasOption() const 710 { 711 return fHasOption; 712 } 713 714 715 int32 716 Command::Option() const 717 { 718 return fOption; 719 } 720 721 722 // #pragma mark - 723 724 725 Iterator::Iterator(Element &start, group_destination destination) 726 { 727 SetTo(start, destination); 728 } 729 730 731 void 732 Iterator::SetTo(Element &start, group_destination destination) 733 { 734 fStart = &start; 735 fDestination = destination; 736 737 Rewind(); 738 } 739 740 741 void 742 Iterator::Rewind() 743 { 744 fStack.MakeEmpty(); 745 fStack.Push(fStart); 746 } 747 748 749 bool 750 Iterator::HasNext() const 751 { 752 return !fStack.IsEmpty(); 753 } 754 755 756 Element * 757 Iterator::Next() 758 { 759 Element *element; 760 761 if (!fStack.Pop(&element)) 762 return NULL; 763 764 Group *group = dynamic_cast<Group *>(element); 765 if (group != NULL 766 && (fDestination == ALL_DESTINATIONS 767 || fDestination == group->Destination())) { 768 // put this group's children on the stack in 769 // reverse order, so that we iterate over 770 // the tree in in-order 771 772 for (int32 i = group->CountElements(); i-- > 0;) { 773 fStack.Push(group->ElementAt(i)); 774 } 775 } 776 777 return element; 778 } 779 780 781 // #pragma mark - 782 783 784 Worker::Worker(RTF::Header &start) 785 : 786 fStart(start) 787 { 788 } 789 790 791 Worker::~Worker() 792 { 793 } 794 795 796 void 797 Worker::Dispatch(Element *element) 798 { 799 if (RTF::Group *group = dynamic_cast<RTF::Group *>(element)) { 800 fSkip = false; 801 Group(group); 802 803 if (fSkip) 804 return; 805 806 for (int32 i = 0; (element = group->ElementAt(i)) != NULL; i++) 807 Dispatch(element); 808 809 GroupEnd(group); 810 } else if (RTF::Command *command = dynamic_cast<RTF::Command *>(element)) { 811 Command(command); 812 } else if (RTF::Text *text = dynamic_cast<RTF::Text *>(element)) { 813 Text(text); 814 } 815 } 816 817 818 void 819 Worker::Work() 820 { 821 Dispatch(&fStart); 822 } 823 824 825 void 826 Worker::Group(RTF::Group *group) 827 { 828 } 829 830 831 void 832 Worker::GroupEnd(RTF::Group *group) 833 { 834 } 835 836 837 void 838 Worker::Command(RTF::Command *command) 839 { 840 } 841 842 843 void 844 Worker::Text(RTF::Text *text) 845 { 846 } 847 848 849 RTF::Header & 850 Worker::Start() 851 { 852 return fStart; 853 } 854 855 856 void 857 Worker::Skip() 858 { 859 fSkip = true; 860 } 861 862 863 void 864 Worker::Abort(status_t status) 865 { 866 throw status; 867 } 868 869