1 /* 2 * Copyright 2004-2005, Axel Dörfler, axeld@pinc-software.de. All rights reserved. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "RTF.h" 8 9 #include <DataIO.h> 10 11 #include <stdlib.h> 12 #include <stdio.h> 13 #include <string.h> 14 #include <ctype.h> 15 16 17 static const char *kDestinationControlWords[] = { 18 "aftncn", "aftnsep", "aftnsepc", "annotation", "atnauthor", "atndate", 19 "atnicn", "atnid", "atnparent", "atnref", "atntime", "atrfend", 20 "atrfstart", "author", "background", "bkmkend", "buptim", "colortbl", 21 "comment", "creatim", "do", "doccomm", "docvar", "fonttbl", "footer", 22 "footerf", "footerl", "footerr", "footnote", "ftncn", "ftnsep", 23 "ftnsepc", "header", "headerf", "headerl", "headerr", "info", 24 "keywords", "operator", "pict", "printim", "private1", "revtim", 25 "rxe", "stylesheet", "subject", "tc", "title", "txe", "xe", 26 }; 27 28 static char read_char(BDataIO &stream, bool endOfFileAllowed = false) throw (status_t); 29 static int32 parse_integer(char first, BDataIO &stream, char &_last, int32 base = 10) throw (status_t); 30 31 32 using namespace RTF; 33 34 35 static char 36 read_char(BDataIO &stream, bool endOfFileAllowed) throw (status_t) 37 { 38 char c; 39 ssize_t bytesRead = stream.Read(&c, 1); 40 41 if (bytesRead < B_OK) 42 throw (status_t)bytesRead; 43 44 if (bytesRead == 0 && !endOfFileAllowed) 45 throw (status_t)B_ERROR; 46 47 return c; 48 } 49 50 51 static int32 52 parse_integer(char first, BDataIO &stream, char &_last, int32 base) throw (status_t) 53 { 54 const char *kDigits = "0123456789abcdef"; 55 int32 integer = 0; 56 int32 count = 0; 57 58 char digit = first; 59 60 if (digit == '\0') 61 digit = read_char(stream); 62 63 while (true) { 64 int32 pos = 0; 65 for (; pos < base; pos++) { 66 if (kDigits[pos] == digit) { 67 integer = integer * base + pos; 68 count++; 69 break; 70 } 71 } 72 if (pos == base) { 73 _last = digit; 74 goto out; 75 } 76 77 digit = read_char(stream); 78 } 79 80 out: 81 if (count == 0) 82 throw (status_t)B_BAD_TYPE; 83 84 return integer; 85 } 86 87 88 static int 89 string_array_compare(const char *key, const char **array) 90 { 91 return strcmp(key, array[0]); 92 } 93 94 95 static void 96 dump(Element &element, int32 level = 0) 97 { 98 printf("%03ld (%p):", level, &element); 99 for (int32 i = 0; i < level; i++) 100 printf(" "); 101 102 if (RTF::Header *header = dynamic_cast<RTF::Header *>(&element)) { 103 printf("<RTF header, major version %ld>\n", header->Version()); 104 } else if (RTF::Command *command = dynamic_cast<RTF::Command *>(&element)) { 105 printf("<Command: %s", command->Name()); 106 if (command->HasOption()) 107 printf(", Option %ld", command->Option()); 108 puts(">"); 109 } else if (RTF::Text *text = dynamic_cast<RTF::Text *>(&element)) { 110 printf("<Text>"); 111 puts(text->String()); 112 } else if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element)) 113 printf("<Group \"%s\">\n", group->Name()); 114 115 if (RTF::Group *group = dynamic_cast<RTF::Group *>(&element)) { 116 for (uint32 i = 0; i < group->CountElements(); i++) 117 dump(*group->ElementAt(i), level + 1); 118 } 119 } 120 121 122 // #pragma mark - 123 124 125 Parser::Parser(BPositionIO &stream) 126 : 127 fStream(&stream, 65536, false), 128 fIdentified(false) 129 { 130 } 131 132 133 status_t 134 Parser::Identify() 135 { 136 char header[5]; 137 if (fStream.Read(header, sizeof(header)) < (ssize_t)sizeof(header)) 138 return B_IO_ERROR; 139 140 if (strncmp(header, "{\\rtf", 5)) 141 return B_BAD_TYPE; 142 143 fIdentified = true; 144 return B_OK; 145 } 146 147 148 status_t 149 Parser::Parse(Header &header) 150 { 151 if (!fIdentified && Identify() != B_OK) 152 return B_BAD_TYPE; 153 154 try { 155 int32 openBrackets = 1; 156 157 // since we already preparsed parts of the RTF header, the header 158 // is handled here directly 159 char last; 160 header.Parse('\0', fStream, last); 161 162 Group *parent = &header; 163 char c = last; 164 165 while (true) { 166 Element *element = NULL; 167 168 // we'll just ignore the end of the stream 169 if (parent == NULL) 170 return B_OK; 171 172 switch (c) { 173 case '{': 174 openBrackets++; 175 parent->AddElement(element = new Group()); 176 parent = static_cast<Group *>(element); 177 break; 178 179 case '\\': 180 parent->AddElement(element = new Command()); 181 break; 182 183 case '}': 184 openBrackets--; 185 parent->DetermineDestination(); 186 parent = parent->Parent(); 187 // supposed to fall through 188 case '\n': 189 case '\r': 190 { 191 ssize_t bytesRead = fStream.Read(&c, 1); 192 if (bytesRead < B_OK) 193 throw (status_t)bytesRead; 194 else if (bytesRead != 1) { 195 // this is the only valid exit status 196 if (openBrackets == 0) 197 return B_OK; 198 199 throw B_ERROR; 200 } 201 continue; 202 } 203 204 default: 205 parent->AddElement(element = new Text()); 206 break; 207 } 208 209 if (element == NULL) 210 throw (status_t)B_ERROR; 211 212 element->Parse(c, fStream, last); 213 c = last; 214 } 215 } catch (status_t status) { 216 return status; 217 } 218 219 return B_OK; 220 } 221 222 223 // #pragma mark - 224 225 226 Element::Element() 227 : 228 fParent(NULL) 229 { 230 } 231 232 233 Element::~Element() 234 { 235 } 236 237 238 void 239 Element::SetParent(Group *parent) 240 { 241 fParent = parent; 242 } 243 244 245 Group * 246 Element::Parent() const 247 { 248 return fParent; 249 } 250 251 252 bool 253 Element::IsDefinitionDelimiter() 254 { 255 return false; 256 } 257 258 259 void 260 Element::PrintToStream(int32 level) 261 { 262 dump(*this, level); 263 } 264 265 266 // #pragma mark - 267 268 269 Group::Group() 270 : 271 fDestination(TEXT_DESTINATION) 272 { 273 } 274 275 276 Group::~Group() 277 { 278 Element *element; 279 while ((element = (Element *)fElements.RemoveItem(0L)) != NULL) { 280 delete element; 281 } 282 } 283 284 285 void 286 Group::Parse(char first, BDataIO &stream, char &last) throw (status_t) 287 { 288 if (first == '\0') 289 first = read_char(stream); 290 291 if (first != '{') 292 throw (status_t)B_BAD_TYPE; 293 294 last = read_char(stream); 295 } 296 297 298 status_t 299 Group::AddElement(Element *element) 300 { 301 if (element == NULL) 302 return B_BAD_VALUE; 303 304 if (fElements.AddItem(element)) { 305 element->SetParent(this); 306 return B_OK; 307 } 308 309 return B_NO_MEMORY; 310 } 311 312 313 uint32 314 Group::CountElements() const 315 { 316 return (uint32)fElements.CountItems(); 317 } 318 319 320 Element * 321 Group::ElementAt(uint32 index) const 322 { 323 return static_cast<Element *>(fElements.ItemAt(index)); 324 } 325 326 327 Element * 328 Group::FindDefinitionStart(int32 index, int32 *_startIndex) const 329 { 330 if (index < 0) 331 return NULL; 332 333 Element *element; 334 int32 number = 0; 335 for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) { 336 if (number == index) { 337 if (_startIndex) 338 *_startIndex = i; 339 return element; 340 } 341 342 if (element->IsDefinitionDelimiter()) 343 number++; 344 } 345 346 return NULL; 347 } 348 349 350 Command * 351 Group::FindDefinition(const char *name, int32 index) const 352 { 353 int32 startIndex; 354 Element *element = FindDefinitionStart(index, &startIndex); 355 if (element == NULL) 356 return NULL; 357 358 for (uint32 i = startIndex; (element = ElementAt(i)) != NULL; i++) { 359 if (element->IsDefinitionDelimiter()) 360 break; 361 362 if (Command *command = dynamic_cast<Command *>(element)) { 363 if (command != NULL && !strcmp(name, command->Name())) 364 return command; 365 } 366 } 367 368 return NULL; 369 } 370 371 372 Group * 373 Group::FindGroup(const char *name) const 374 { 375 Element *element; 376 for (uint32 i = 0; (element = ElementAt(i)) != NULL; i++) { 377 Group *group = dynamic_cast<Group *>(element); 378 if (group == NULL) 379 continue; 380 381 Command *command = dynamic_cast<Command *>(group->ElementAt(0)); 382 if (command != NULL && !strcmp(name, command->Name())) 383 return group; 384 } 385 386 return NULL; 387 } 388 389 390 const char * 391 Group::Name() const 392 { 393 Command *command = dynamic_cast<Command *>(ElementAt(0)); 394 if (command != NULL) 395 return command->Name(); 396 397 return NULL; 398 } 399 400 401 void 402 Group::DetermineDestination() 403 { 404 const char *name = Name(); 405 if (name == NULL) 406 return; 407 408 if (!strcmp(name, "*")) { 409 fDestination = COMMENT_DESTINATION; 410 return; 411 } 412 413 // binary search for destination control words 414 415 if (bsearch(name, kDestinationControlWords, 416 sizeof(kDestinationControlWords) / sizeof(kDestinationControlWords[0]), 417 sizeof(kDestinationControlWords[0]), 418 (int (*)(const void *, const void *))string_array_compare) != NULL) 419 fDestination = OTHER_DESTINATION; 420 } 421 422 423 group_destination 424 Group::Destination() const 425 { 426 return fDestination; 427 } 428 429 430 // #pragma mark - 431 432 433 Header::Header() 434 : 435 fVersion(0) 436 { 437 } 438 439 440 Header::~Header() 441 { 442 } 443 444 445 void 446 Header::Parse(char first, BDataIO &stream, char &last) throw (status_t) 447 { 448 // The stream has been peeked into by the parser already, and 449 // only the version follows in the stream -- let's pick it up 450 451 fVersion = parse_integer(first, stream, last); 452 453 // recreate "rtf" command to name this group 454 455 Command *command = new Command(); 456 command->SetName("rtf"); 457 command->SetOption(fVersion); 458 459 AddElement(command); 460 } 461 462 463 int32 464 Header::Version() const 465 { 466 return fVersion; 467 } 468 469 470 const char * 471 Header::Charset() const 472 { 473 Command *command = dynamic_cast<Command *>(ElementAt(1)); 474 if (command == NULL) 475 return NULL; 476 477 return command->Name(); 478 } 479 480 481 rgb_color 482 Header::Color(int32 index) 483 { 484 rgb_color color = {0, 0, 0, 255}; 485 486 Group *colorTable = FindGroup("colortbl"); 487 488 if (colorTable != NULL) { 489 if (Command *gun = colorTable->FindDefinition("red", index)) 490 color.red = gun->Option(); 491 if (Command *gun = colorTable->FindDefinition("green", index)) 492 color.green = gun->Option(); 493 if (Command *gun = colorTable->FindDefinition("blue", index)) 494 color.blue = gun->Option(); 495 } 496 497 return color; 498 } 499 500 501 // #pragma mark - 502 503 504 Text::Text() 505 { 506 } 507 508 509 Text::~Text() 510 { 511 SetTo(NULL); 512 } 513 514 515 bool 516 Text::IsDefinitionDelimiter() 517 { 518 return fText == ";"; 519 } 520 521 522 void 523 Text::Parse(char first, BDataIO &stream, char &last) throw (status_t) 524 { 525 char c = first; 526 if (c == '\0') 527 c = read_char(stream); 528 529 if (c == ';') { 530 // definition delimiter 531 fText.SetTo(";"); 532 last = read_char(stream); 533 return; 534 } 535 536 const size_t kBufferSteps = 1; 537 size_t maxSize = kBufferSteps; 538 char *text = fText.LockBuffer(maxSize); 539 if (text == NULL) 540 throw (status_t)B_NO_MEMORY; 541 542 size_t position = 0; 543 544 while (true) { 545 if (c == '\\' || c == '}' || c == '{' || c == ';' || c == '\n' || c == '\r') 546 break; 547 548 if (position >= maxSize) { 549 fText.UnlockBuffer(position); 550 text = fText.LockBuffer(maxSize += kBufferSteps); 551 if (text == NULL) 552 throw (status_t)B_NO_MEMORY; 553 } 554 555 text[position++] = c; 556 557 c = read_char(stream); 558 } 559 fText.UnlockBuffer(position); 560 561 // ToDo: add support for different charsets - right now, only ASCII is supported! 562 // To achieve this, we should just translate everything into UTF-8 here 563 564 last = c; 565 } 566 567 568 status_t 569 Text::SetTo(const char *text) 570 { 571 return fText.SetTo(text) != NULL ? B_OK : B_NO_MEMORY; 572 } 573 574 575 const char * 576 Text::String() const 577 { 578 return fText.String(); 579 } 580 581 582 uint32 583 Text::Length() const 584 { 585 return fText.Length(); 586 } 587 588 589 // #pragma mark - 590 591 592 Command::Command() 593 : 594 fName(NULL), 595 fHasOption(false), 596 fOption(-1) 597 { 598 } 599 600 601 Command::~Command() 602 { 603 } 604 605 606 void 607 Command::Parse(char first, BDataIO &stream, char &last) throw (status_t) 608 { 609 if (first == '\0') 610 first = read_char(stream); 611 612 if (first != '\\') 613 throw (status_t)B_BAD_TYPE; 614 615 // get name 616 char name[kCommandLength]; 617 size_t length = 0; 618 char c; 619 while (isalpha(c = read_char(stream))) { 620 name[length++] = c; 621 if (length >= kCommandLength - 1) 622 throw (status_t)B_BAD_TYPE; 623 } 624 625 if (length == 0) { 626 if (c == '\n' || c == '\r') { 627 // we're a hard return 628 fName.SetTo("par"); 629 } else 630 fName.SetTo(c, 1); 631 632 // read over character 633 c = read_char(stream); 634 } else 635 fName.SetTo(name, length); 636 637 // parse numeric option 638 639 if (c == '-') 640 c = read_char(stream); 641 642 last = c; 643 644 if (fName == "'") { 645 // hexadecimal 646 char bytes[2]; 647 bytes[0] = read_char(stream); 648 bytes[1] = '\0'; 649 BMemoryIO memory(bytes, 2); 650 651 SetOption(parse_integer(c, memory, last, 16)); 652 last = read_char(stream); 653 } else { 654 // decimal 655 if (isdigit(c)) 656 SetOption(parse_integer(c, stream, last)); 657 658 // a space delimiter is eaten up by the command 659 if (isspace(last)) 660 last = read_char(stream); 661 } 662 } 663 664 665 status_t 666 Command::SetName(const char *name) 667 { 668 return fName.SetTo(name) != NULL ? B_OK : B_NO_MEMORY; 669 } 670 671 672 const char * 673 Command::Name() 674 { 675 return fName.String(); 676 } 677 678 679 void 680 Command::UnsetOption() 681 { 682 fHasOption = false; 683 fOption = -1; 684 } 685 686 687 void 688 Command::SetOption(int32 option) 689 { 690 fOption = option; 691 fHasOption = true; 692 } 693 694 695 bool 696 Command::HasOption() const 697 { 698 return fHasOption; 699 } 700 701 702 int32 703 Command::Option() const 704 { 705 return fOption; 706 } 707 708 709 // #pragma mark - 710 711 712 Iterator::Iterator(Element &start, group_destination destination) 713 { 714 SetTo(start, destination); 715 } 716 717 718 void 719 Iterator::SetTo(Element &start, group_destination destination) 720 { 721 fStart = &start; 722 fDestination = destination; 723 724 Rewind(); 725 } 726 727 728 void 729 Iterator::Rewind() 730 { 731 fStack.MakeEmpty(); 732 fStack.Push(fStart); 733 } 734 735 736 bool 737 Iterator::HasNext() const 738 { 739 return !fStack.IsEmpty(); 740 } 741 742 743 Element * 744 Iterator::Next() 745 { 746 Element *element; 747 748 if (!fStack.Pop(&element)) 749 return NULL; 750 751 Group *group = dynamic_cast<Group *>(element); 752 if (group != NULL 753 && (fDestination == ALL_DESTINATIONS 754 || fDestination == group->Destination())) { 755 // put this group's children on the stack in 756 // reverse order, so that we iterate over 757 // the tree in in-order 758 759 for (int32 i = group->CountElements(); i-- > 0;) { 760 fStack.Push(group->ElementAt(i)); 761 } 762 } 763 764 return element; 765 } 766 767 768 // #pragma mark - 769 770 771 Worker::Worker(RTF::Header &start) 772 : 773 fStart(start) 774 { 775 } 776 777 778 Worker::~Worker() 779 { 780 } 781 782 783 void 784 Worker::Dispatch(Element *element) 785 { 786 if (RTF::Group *group = dynamic_cast<RTF::Group *>(element)) { 787 fSkip = false; 788 Group(group); 789 790 if (fSkip) 791 return; 792 793 for (int32 i = 0; (element = group->ElementAt(i)) != NULL; i++) 794 Dispatch(element); 795 796 GroupEnd(group); 797 } else if (RTF::Command *command = dynamic_cast<RTF::Command *>(element)) { 798 Command(command); 799 } else if (RTF::Text *text = dynamic_cast<RTF::Text *>(element)) { 800 Text(text); 801 } 802 } 803 804 805 void 806 Worker::Work() throw (status_t) 807 { 808 Dispatch(&fStart); 809 } 810 811 812 void 813 Worker::Group(RTF::Group *group) 814 { 815 } 816 817 818 void 819 Worker::GroupEnd(RTF::Group *group) 820 { 821 } 822 823 824 void 825 Worker::Command(RTF::Command *command) 826 { 827 } 828 829 830 void 831 Worker::Text(RTF::Text *text) 832 { 833 } 834 835 836 RTF::Header & 837 Worker::Start() 838 { 839 return fStart; 840 } 841 842 843 void 844 Worker::Skip() 845 { 846 fSkip = true; 847 } 848 849 850 void 851 Worker::Abort(status_t status) 852 { 853 throw status; 854 } 855 856