1 /* 2 * Copyright 2004-2010, Axel Dörfler, axeld@pinc-software.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "RTF.h" 8 9 #include <ctype.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 14 #include <DataIO.h> 15 16 17 //#define TRACE_RTF 18 #ifdef TRACE_RTF 19 # define TRACE(x...) printf(x) 20 #else 21 # define TRACE(x...) ; 22 #endif 23 24 25 static const char *kDestinationControlWords[] = { 26 "aftncn", "aftnsep", "aftnsepc", "annotation", "atnauthor", "atndate", 27 "atnicn", "atnid", "atnparent", "atnref", "atntime", "atrfend", 28 "atrfstart", "author", "background", "bkmkend", "buptim", "colortbl", 29 "comment", "creatim", "do", "doccomm", "docvar", "fonttbl", "footer", 30 "footerf", "footerl", "footerr", "footnote", "ftncn", "ftnsep", 31 "ftnsepc", "header", "headerf", "headerl", "headerr", "info", 32 "keywords", "operator", "pict", "printim", "private1", "revtim", 33 "rxe", "stylesheet", "subject", "tc", "title", "txe", "xe", 34 }; 35 36 static char read_char(BDataIO &stream, bool endOfFileAllowed = false) throw (status_t); 37 static int32 parse_integer(char first, BDataIO &stream, char &_last, int32 base = 10) throw (status_t); 38 39 40 using namespace RTF; 41 42 43 static char 44 read_char(BDataIO &stream, bool endOfFileAllowed) throw (status_t) 45 { 46 char c; 47 ssize_t bytesRead = stream.Read(&c, 1); 48 49 if (bytesRead < B_OK) 50 throw (status_t)bytesRead; 51 52 if (bytesRead == 0 && !endOfFileAllowed) 53 throw (status_t)B_ERROR; 54 55 return c; 56 } 57 58 59 static int32 60 parse_integer(char first, BDataIO &stream, char &_last, int32 base) 61 throw (status_t) 62 { 63 const char *kDigits = "0123456789abcdef"; 64 int32 integer = 0; 65 int32 count = 0; 66 67 char digit = first; 68 69 if (digit == '\0') 70 digit = read_char(stream); 71 72 while (true) { 73 int32 pos = 0; 74 for (; pos < base; pos++) { 75 if (kDigits[pos] == tolower(digit)) { 76 integer = integer * base + pos; 77 count++; 78 break; 79 } 80 } 81 if (pos == base) { 82 _last = digit; 83 goto out; 84 } 85 86 digit = read_char(stream); 87 } 88 89 out: 90 if (count == 0) 91 throw (status_t)B_BAD_TYPE; 92 93 return integer; 94 } 95 96 97 static int 98 string_array_compare(const char *key, const char **array) 99 { 100 return strcmp(key, array[0]); 101 } 102 103 104 static void 105 dump(Element &element, int32 level = 0) 106 { 107 printf("%03" B_PRId32 " (%p):", level, &element); 108 for (int32 i = 0; i < level; i++) 109 printf(" "); 110 111 if (RTF::Header *header = dynamic_cast<RTF::Header *>(&element)) { 112 printf("<RTF header, major version %" B_PRId32 ">\n", header->Version()); 113 } else if (RTF::Command *command = dynamic_cast<RTF::Command *>(&element)) { 114 printf("<Command: %s", command->Name()); 115 if (command->HasOption()) 116 printf(", Option %" B_PRId32, command->Option()); 117 puts(">"); 118 } else if (RTF::Text *text = dynamic_cast<RTF::Text *>(&element)) { 119 printf("<Text>"); 120 puts(text->String()); 121 } else if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element)) 122 printf("<Group \"%s\">\n", group->Name()); 123 124 if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element)) { 125 for (uint32 i = 0; i < group->CountElements(); i++) 126 dump(*group->ElementAt(i), level + 1); 127 } 128 } 129 130 131 // #pragma mark - 132 133 134 Parser::Parser(BPositionIO &stream) 135 : 136 fStream(&stream, 65536, false), 137 fIdentified(false) 138 { 139 } 140 141 142 status_t 143 Parser::Identify() 144 { 145 char header[5]; 146 if (fStream.Read(header, sizeof(header)) < (ssize_t)sizeof(header)) 147 return B_IO_ERROR; 148 149 if (strncmp(header, "{\\rtf", 5)) 150 return B_BAD_TYPE; 151 152 fIdentified = true; 153 return B_OK; 154 } 155 156 157 status_t 158 Parser::Parse(Header &header) 159 { 160 if (!fIdentified && Identify() != B_OK) 161 return B_BAD_TYPE; 162 163 try { 164 int32 openBrackets = 1; 165 166 // since we already preparsed parts of the RTF header, the header 167 // is handled here directly 168 char last; 169 header.Parse('\0', fStream, last); 170 171 Group *parent = &header; 172 char c = last; 173 174 while (true) { 175 Element *element = NULL; 176 177 // we'll just ignore the end of the stream 178 if (parent == NULL) 179 return B_OK; 180 181 switch (c) { 182 case '{': 183 openBrackets++; 184 parent->AddElement(element = new Group()); 185 parent = static_cast<Group *>(element); 186 break; 187 188 case '\\': 189 parent->AddElement(element = new Command()); 190 break; 191 192 case '}': 193 openBrackets--; 194 parent->DetermineDestination(); 195 parent = parent->Parent(); 196 // supposed to fall through 197 case '\n': 198 case '\r': 199 { 200 ssize_t bytesRead = fStream.Read(&c, 1); 201 if (bytesRead < B_OK) 202 throw (status_t)bytesRead; 203 else if (bytesRead != 1) { 204 // this is the only valid exit status 205 if (openBrackets == 0) 206 return B_OK; 207 208 throw (status_t)B_ERROR; 209 } 210 continue; 211 } 212 213 default: 214 parent->AddElement(element = new Text()); 215 break; 216 } 217 218 if (element == NULL) 219 throw (status_t)B_ERROR; 220 221 element->Parse(c, fStream, last); 222 c = last; 223 } 224 } catch (status_t status) { 225 return status; 226 } 227 228 return B_OK; 229 } 230 231 232 // #pragma mark - 233 234 235 Element::Element() 236 : 237 fParent(NULL) 238 { 239 } 240 241 242 Element::~Element() 243 { 244 } 245 246 247 void 248 Element::SetParent(Group *parent) 249 { 250 fParent = parent; 251 } 252 253 254 Group * 255 Element::Parent() const 256 { 257 return fParent; 258 } 259 260 261 bool 262 Element::IsDefinitionDelimiter() 263 { 264 return false; 265 } 266 267 268 void 269 Element::PrintToStream(int32 level) 270 { 271 dump(*this, level); 272 } 273 274 275 // #pragma mark - 276 277 278 Group::Group() 279 : 280 fDestination(TEXT_DESTINATION) 281 { 282 } 283 284 285 Group::~Group() 286 { 287 Element *element; 288 while ((element = (Element *)fElements.RemoveItem((int32)0)) != NULL) { 289 delete element; 290 } 291 } 292 293 294 void 295 Group::Parse(char first, BDataIO &stream, char &last) throw (status_t) 296 { 297 if (first == '\0') 298 first = read_char(stream); 299 300 if (first != '{') 301 throw (status_t)B_BAD_TYPE; 302 303 last = read_char(stream); 304 } 305 306 307 status_t 308 Group::AddElement(Element *element) 309 { 310 if (element == NULL) 311 return B_BAD_VALUE; 312 313 if (fElements.AddItem(element)) { 314 element->SetParent(this); 315 return B_OK; 316 } 317 318 return B_NO_MEMORY; 319 } 320 321 322 uint32 323 Group::CountElements() const 324 { 325 return (uint32)fElements.CountItems(); 326 } 327 328 329 Element * 330 Group::ElementAt(uint32 index) const 331 { 332 return static_cast<Element *>(fElements.ItemAt(index)); 333 } 334 335 336 Element * 337 Group::FindDefinitionStart(int32 index, int32 *_startIndex) const 338 { 339 if (index < 0) 340 return NULL; 341 342 Element *element; 343 int32 number = 0; 344 for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) { 345 if (number == index) { 346 if (_startIndex) 347 *_startIndex = i; 348 return element; 349 } 350 351 if (element->IsDefinitionDelimiter()) 352 number++; 353 } 354 355 return NULL; 356 } 357 358 359 Command * 360 Group::FindDefinition(const char *name, int32 index) const 361 { 362 int32 startIndex; 363 Element *element = FindDefinitionStart(index, &startIndex); 364 if (element == NULL) 365 return NULL; 366 367 for (uint32 i = startIndex; (element = ElementAt(i)) != NULL; i++) { 368 if (element->IsDefinitionDelimiter()) 369 break; 370 371 if (Command *command = dynamic_cast<Command *>(element)) { 372 if (command != NULL && !strcmp(name, command->Name())) 373 return command; 374 } 375 } 376 377 return NULL; 378 } 379 380 381 Group * 382 Group::FindGroup(const char *name) const 383 { 384 Element *element; 385 for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) { 386 Group *group = dynamic_cast<Group *>(element); 387 if (group == NULL) 388 continue; 389 390 Command *command = dynamic_cast<Command *>(group->ElementAt(0)); 391 if (command != NULL && !strcmp(name, command->Name())) 392 return group; 393 } 394 395 return NULL; 396 } 397 398 399 const char * 400 Group::Name() const 401 { 402 Command *command = dynamic_cast<Command *>(ElementAt(0)); 403 if (command != NULL) 404 return command->Name(); 405 406 return NULL; 407 } 408 409 410 void 411 Group::DetermineDestination() 412 { 413 const char *name = Name(); 414 if (name == NULL) 415 return; 416 417 if (!strcmp(name, "*")) { 418 fDestination = COMMENT_DESTINATION; 419 return; 420 } 421 422 // binary search for destination control words 423 424 if (bsearch(name, kDestinationControlWords, 425 sizeof(kDestinationControlWords) / sizeof(kDestinationControlWords[0]), 426 sizeof(kDestinationControlWords[0]), 427 (int (*)(const void *, const void *))string_array_compare) != NULL) 428 fDestination = OTHER_DESTINATION; 429 } 430 431 432 group_destination 433 Group::Destination() const 434 { 435 return fDestination; 436 } 437 438 439 // #pragma mark - 440 441 442 Header::Header() 443 : 444 fVersion(0) 445 { 446 } 447 448 449 Header::~Header() 450 { 451 } 452 453 454 void 455 Header::Parse(char first, BDataIO &stream, char &last) throw (status_t) 456 { 457 // The stream has been peeked into by the parser already, and 458 // only the version follows in the stream -- let's pick it up 459 460 fVersion = parse_integer(first, stream, last); 461 462 // recreate "rtf" command to name this group 463 464 Command *command = new Command(); 465 command->SetName("rtf"); 466 command->SetOption(fVersion); 467 468 AddElement(command); 469 } 470 471 472 int32 473 Header::Version() const 474 { 475 return fVersion; 476 } 477 478 479 const char * 480 Header::Charset() const 481 { 482 Command *command = dynamic_cast<Command *>(ElementAt(1)); 483 if (command == NULL) 484 return NULL; 485 486 return command->Name(); 487 } 488 489 490 rgb_color 491 Header::Color(int32 index) 492 { 493 rgb_color color = {0, 0, 0, 255}; 494 495 Group *colorTable = FindGroup("colortbl"); 496 497 if (colorTable != NULL) { 498 if (Command *gun = colorTable->FindDefinition("red", index)) 499 color.red = gun->Option(); 500 if (Command *gun = colorTable->FindDefinition("green", index)) 501 color.green = gun->Option(); 502 if (Command *gun = colorTable->FindDefinition("blue", index)) 503 color.blue = gun->Option(); 504 } 505 506 return color; 507 } 508 509 510 // #pragma mark - 511 512 513 Text::Text() 514 { 515 } 516 517 518 Text::~Text() 519 { 520 SetTo(NULL); 521 } 522 523 524 bool 525 Text::IsDefinitionDelimiter() 526 { 527 return fText == ";"; 528 } 529 530 531 void 532 Text::Parse(char first, BDataIO &stream, char &last) throw (status_t) 533 { 534 char c = first; 535 if (c == '\0') 536 c = read_char(stream); 537 538 if (c == ';') { 539 // definition delimiter 540 fText.SetTo(";"); 541 last = read_char(stream); 542 return; 543 } 544 545 const size_t kBufferSteps = 1; 546 size_t maxSize = kBufferSteps; 547 char *text = fText.LockBuffer(maxSize); 548 if (text == NULL) 549 throw (status_t)B_NO_MEMORY; 550 551 size_t position = 0; 552 553 while (true) { 554 if (c == '\\' || c == '}' || c == '{' || c == ';' || c == '\n' || c == '\r') 555 break; 556 557 if (position >= maxSize) { 558 fText.UnlockBuffer(position); 559 text = fText.LockBuffer(maxSize += kBufferSteps); 560 if (text == NULL) 561 throw (status_t)B_NO_MEMORY; 562 } 563 564 text[position++] = c; 565 566 c = read_char(stream); 567 } 568 fText.UnlockBuffer(position); 569 570 // ToDo: add support for different charsets - right now, only ASCII is supported! 571 // To achieve this, we should just translate everything into UTF-8 here 572 573 last = c; 574 } 575 576 577 status_t 578 Text::SetTo(const char *text) 579 { 580 return fText.SetTo(text) != NULL ? B_OK : B_NO_MEMORY; 581 } 582 583 584 const char * 585 Text::String() const 586 { 587 return fText.String(); 588 } 589 590 591 uint32 592 Text::Length() const 593 { 594 return fText.Length(); 595 } 596 597 598 // #pragma mark - 599 600 601 Command::Command() 602 : 603 fName(NULL), 604 fHasOption(false), 605 fOption(-1) 606 { 607 } 608 609 610 Command::~Command() 611 { 612 } 613 614 615 void 616 Command::Parse(char first, BDataIO &stream, char &last) throw (status_t) 617 { 618 if (first == '\0') 619 first = read_char(stream); 620 621 if (first != '\\') 622 throw (status_t)B_BAD_TYPE; 623 624 // get name 625 char name[kCommandLength]; 626 size_t length = 0; 627 char c; 628 while (isalpha(c = read_char(stream))) { 629 name[length++] = c; 630 if (length >= kCommandLength - 1) 631 throw (status_t)B_BAD_TYPE; 632 } 633 634 if (length == 0) { 635 if (c == '\n' || c == '\r') { 636 // we're a hard return 637 fName.SetTo("par"); 638 } else 639 fName.SetTo(c, 1); 640 641 // read over character 642 c = read_char(stream); 643 } else 644 fName.SetTo(name, length); 645 646 TRACE("command: %s\n", fName.String()); 647 648 // parse numeric option 649 650 if (c == '-') 651 c = read_char(stream); 652 653 last = c; 654 655 if (fName == "'") { 656 // hexadecimal 657 char bytes[2]; 658 bytes[0] = read_char(stream); 659 bytes[1] = '\0'; 660 BMemoryIO memory(bytes, 2); 661 662 SetOption(parse_integer(c, memory, last, 16)); 663 last = read_char(stream); 664 } else { 665 // decimal 666 if (isdigit(c)) 667 SetOption(parse_integer(c, stream, last)); 668 669 // a space delimiter is eaten up by the command 670 if (isspace(last)) 671 last = read_char(stream); 672 } 673 674 if (HasOption()) 675 TRACE(" option: %ld\n", fOption); 676 } 677 678 679 status_t 680 Command::SetName(const char *name) 681 { 682 return fName.SetTo(name) != NULL ? B_OK : B_NO_MEMORY; 683 } 684 685 686 const char * 687 Command::Name() 688 { 689 return fName.String(); 690 } 691 692 693 void 694 Command::UnsetOption() 695 { 696 fHasOption = false; 697 fOption = -1; 698 } 699 700 701 void 702 Command::SetOption(int32 option) 703 { 704 fOption = option; 705 fHasOption = true; 706 } 707 708 709 bool 710 Command::HasOption() const 711 { 712 return fHasOption; 713 } 714 715 716 int32 717 Command::Option() const 718 { 719 return fOption; 720 } 721 722 723 // #pragma mark - 724 725 726 Iterator::Iterator(Element &start, group_destination destination) 727 { 728 SetTo(start, destination); 729 } 730 731 732 void 733 Iterator::SetTo(Element &start, group_destination destination) 734 { 735 fStart = &start; 736 fDestination = destination; 737 738 Rewind(); 739 } 740 741 742 void 743 Iterator::Rewind() 744 { 745 fStack.MakeEmpty(); 746 fStack.Push(fStart); 747 } 748 749 750 bool 751 Iterator::HasNext() const 752 { 753 return !fStack.IsEmpty(); 754 } 755 756 757 Element * 758 Iterator::Next() 759 { 760 Element *element; 761 762 if (!fStack.Pop(&element)) 763 return NULL; 764 765 Group *group = dynamic_cast<Group *>(element); 766 if (group != NULL 767 && (fDestination == ALL_DESTINATIONS 768 || fDestination == group->Destination())) { 769 // put this group's children on the stack in 770 // reverse order, so that we iterate over 771 // the tree in in-order 772 773 for (int32 i = group->CountElements(); i-- > 0;) { 774 fStack.Push(group->ElementAt(i)); 775 } 776 } 777 778 return element; 779 } 780 781 782 // #pragma mark - 783 784 785 Worker::Worker(RTF::Header &start) 786 : 787 fStart(start) 788 { 789 } 790 791 792 Worker::~Worker() 793 { 794 } 795 796 797 void 798 Worker::Dispatch(Element *element) 799 { 800 if (RTF::Group *group = dynamic_cast<RTF::Group *>(element)) { 801 fSkip = false; 802 Group(group); 803 804 if (fSkip) 805 return; 806 807 for (int32 i = 0; (element = group->ElementAt(i)) != NULL; i++) 808 Dispatch(element); 809 810 GroupEnd(group); 811 } else if (RTF::Command *command = dynamic_cast<RTF::Command *>(element)) { 812 Command(command); 813 } else if (RTF::Text *text = dynamic_cast<RTF::Text *>(element)) { 814 Text(text); 815 } 816 } 817 818 819 void 820 Worker::Work() throw (status_t) 821 { 822 Dispatch(&fStart); 823 } 824 825 826 void 827 Worker::Group(RTF::Group *group) 828 { 829 } 830 831 832 void 833 Worker::GroupEnd(RTF::Group *group) 834 { 835 } 836 837 838 void 839 Worker::Command(RTF::Command *command) 840 { 841 } 842 843 844 void 845 Worker::Text(RTF::Text *text) 846 { 847 } 848 849 850 RTF::Header & 851 Worker::Start() 852 { 853 return fStart; 854 } 855 856 857 void 858 Worker::Skip() 859 { 860 fSkip = true; 861 } 862 863 864 void 865 Worker::Abort(status_t status) 866 { 867 throw status; 868 } 869 870