1 /* (Text)Component - message component base class and plain text 2 ** 3 ** Copyright 2001 Dr. Zoidberg Enterprises. All rights reserved. 4 */ 5 6 7 #include <String.h> 8 #include <Mime.h> 9 10 #include <malloc.h> 11 #include <ctype.h> 12 13 class _EXPORT BMailComponent; 14 class _EXPORT BTextMailComponent; 15 16 #include <MailComponent.h> 17 #include <MailAttachment.h> 18 #include <MailContainer.h> 19 #include <mail_util.h> 20 21 #include <CharacterSet.h> 22 #include <CharacterSetRoster.h> 23 24 using namespace BPrivate ; 25 26 struct CharsetConversionEntry 27 { 28 const char *charset; 29 uint32 flavor; 30 }; 31 32 extern const CharsetConversionEntry mail_charsets[]; 33 34 35 extern const char *kHeaderCharsetString = "header-charset"; 36 extern const char *kHeaderEncodingString = "header-encoding"; 37 // Special field names in the headers which specify the character set (int32) 38 // and encoding (int8) to use when converting the headers from UTF-8 to the 39 // output e-mail format (rfc2047). Since they are numbers, not strings, the 40 // extra fields won't be output. 41 42 43 BMailComponent::BMailComponent(uint32 defaultCharSet) 44 : _charSetForTextDecoding (defaultCharSet) 45 { 46 } 47 48 BMailComponent::~BMailComponent() 49 { 50 } 51 52 uint32 BMailComponent::ComponentType() 53 { 54 if (NULL != dynamic_cast<BAttributedMailAttachment *> (this)) 55 return B_MAIL_ATTRIBUTED_ATTACHMENT; 56 57 BMimeType type, super; 58 MIMEType(&type); 59 type.GetSupertype(&super); 60 61 //---------ATT-This code *desperately* needs to be improved 62 if (super == "multipart") { 63 if (type == "multipart/x-bfile") // Not likely, they have the MIME 64 return B_MAIL_ATTRIBUTED_ATTACHMENT; // of their data contents. 65 else 66 return B_MAIL_MULTIPART_CONTAINER; 67 } else if (!IsAttachment() && (super == "text" || type.Type() == NULL)) 68 return B_MAIL_PLAIN_TEXT_BODY; 69 else 70 return B_MAIL_SIMPLE_ATTACHMENT; 71 } 72 73 BMailComponent *BMailComponent::WhatIsThis() { 74 switch (ComponentType()) 75 { 76 case B_MAIL_SIMPLE_ATTACHMENT: 77 return new BSimpleMailAttachment; 78 case B_MAIL_ATTRIBUTED_ATTACHMENT: 79 return new BAttributedMailAttachment; 80 case B_MAIL_MULTIPART_CONTAINER: 81 return new BMIMEMultipartMailContainer (NULL, NULL, _charSetForTextDecoding); 82 case B_MAIL_PLAIN_TEXT_BODY: 83 default: 84 return new BTextMailComponent (NULL, _charSetForTextDecoding); 85 } 86 } 87 88 bool BMailComponent::IsAttachment() { 89 const char *disposition = HeaderField("Content-Disposition"); 90 if ((disposition != NULL) && (strncasecmp(disposition,"Attachment",strlen("Attachment")) == 0)) 91 return true; 92 93 BMessage header; 94 HeaderField("Content-Type",&header); 95 if (header.HasString("name")) 96 return true; 97 98 if (HeaderField("Content-Location",&header) == B_OK) 99 return true; 100 101 BMimeType type; 102 MIMEType(&type); 103 if (type == "multipart/x-bfile") 104 return true; 105 106 return false; 107 } 108 109 110 void BMailComponent::SetHeaderField(const char *key, const char *value, uint32 charset, mail_encoding encoding, bool replace_existing) { 111 if (replace_existing) 112 headers.RemoveName(key); 113 if (value != NULL && value[0] != 0) // Empty or NULL strings mean delete header. 114 headers.AddString(key,value); 115 116 // Latest setting of the character set and encoding to use when outputting 117 // the headers is the one which affects all the headers. There used to be 118 // separate settings for each item in the headers, but it never actually 119 // worked (can't store multiple items of different types in a BMessage). 120 if (charset != B_MAIL_NULL_CONVERSION && 121 headers.ReplaceInt32 (kHeaderCharsetString, charset) != B_OK) 122 headers.AddInt32 (kHeaderCharsetString, charset); 123 if (encoding != null_encoding && 124 headers.ReplaceInt8 (kHeaderEncodingString, encoding) != B_OK) 125 headers.AddInt8 (kHeaderEncodingString, encoding); 126 } 127 128 129 void BMailComponent::SetHeaderField(const char *key, BMessage *structure, bool replace_existing) { 130 int32 charset = B_MAIL_NULL_CONVERSION; 131 int8 encoding = null_encoding; 132 const char *unlabeled = "unlabeled"; 133 134 if (replace_existing) 135 headers.RemoveName(key); 136 137 BString value; 138 if (structure->HasString(unlabeled)) 139 value << structure->FindString(unlabeled) << "; "; 140 141 const char *name, *sub_val; 142 type_code type; 143 for (int32 i = 0; structure->GetInfo(B_STRING_TYPE,i, 144 #if !defined(HAIKU_TARGET_PLATFORM_DANO) 145 (char**) 146 #endif 147 &name,&type) == B_OK; i++) 148 { 149 if (strcasecmp(name, unlabeled) == 0) 150 continue; 151 152 structure->FindString(name, &sub_val); 153 value << name << '='; 154 if (BString(sub_val).FindFirst(' ') > 0) 155 value << '\"' << sub_val << "\"; "; 156 else 157 value << sub_val << "; "; 158 } 159 160 value.Truncate(value.Length() - 2); //-----Remove the last "; " 161 162 if (structure->HasInt32(kHeaderCharsetString)) 163 structure->FindInt32(kHeaderCharsetString, &charset); 164 if (structure->HasInt8(kHeaderEncodingString)) 165 structure->FindInt8(kHeaderEncodingString, &encoding); 166 167 SetHeaderField(key,value.String(),(uint32) charset, (mail_encoding) encoding); 168 } 169 170 const char *BMailComponent::HeaderField(const char *key, int32 index) { 171 const char *string = NULL; 172 173 headers.FindString(key,index,&string); 174 return string; 175 } 176 177 status_t BMailComponent::HeaderField(const char *key, BMessage *structure, int32 index) { 178 BString string = HeaderField(key,index); 179 if (string == "") 180 return B_NAME_NOT_FOUND; 181 182 BString sub_cat,end_piece; 183 int32 i = 0, end = 0; 184 185 // Break the header into parts, they're separated by semicolons, like this: 186 // Content-Type: multipart/mixed;boundary= "----=_NextPart_000_00AA_354DB459.5977A1CA" 187 // There's also white space and quotes to be removed, and even comments in 188 // parenthesis like this, which can appear anywhere white space is: (header comment) 189 190 while (end < string.Length()) { 191 end = string.FindFirst(';',i); 192 if (end < 0) 193 end = string.Length(); 194 195 string.CopyInto(sub_cat,i,end - i); 196 i = end + 1; 197 198 //-------Trim spaces off of beginning and end of text 199 for (int32 h = 0; h < sub_cat.Length(); h++) { 200 if (!isspace(sub_cat.ByteAt(h))) { 201 sub_cat.Remove(0,h); 202 break; 203 } 204 } 205 for (int32 h = sub_cat.Length()-1; h >= 0; h--) { 206 if (!isspace(sub_cat.ByteAt(h))) { 207 sub_cat.Truncate(h+1); 208 break; 209 } 210 } 211 //--------Split along '=' 212 int32 first_equal = sub_cat.FindFirst('='); 213 if (first_equal >= 0) { 214 sub_cat.CopyInto(end_piece,first_equal+1,sub_cat.Length() - first_equal - 1); 215 sub_cat.Truncate(first_equal); 216 // Remove leading spaces from part after the equals sign. 217 while (isspace (end_piece.ByteAt(0))) 218 end_piece.Remove (0 /* index */, 1 /* number of chars */); 219 // Remove quote marks. 220 if (end_piece.ByteAt(0) == '\"') { 221 end_piece.Remove(0,1); 222 end_piece.Truncate(end_piece.Length() - 1); 223 } 224 sub_cat.ToLower(); 225 structure->AddString(sub_cat.String(),end_piece.String()); 226 } else { 227 structure->AddString("unlabeled",sub_cat.String()); 228 } 229 } 230 231 return B_OK; 232 } 233 234 status_t BMailComponent::RemoveHeader(const char *key) { 235 return headers.RemoveName(key); 236 } 237 238 const char *BMailComponent::HeaderAt(int32 index) { 239 #if defined(HAIKU_TARGET_PLATFORM_DANO) 240 const 241 #endif 242 char *name = NULL; 243 type_code type; 244 245 headers.GetInfo(B_STRING_TYPE,index,&name,&type); 246 return name; 247 } 248 249 status_t BMailComponent::GetDecodedData(BPositionIO *) {return B_OK;} 250 status_t BMailComponent::SetDecodedData(BPositionIO *) {return B_OK;} 251 252 status_t 253 BMailComponent::SetToRFC822(BPositionIO *data, size_t /*length*/, bool /*parse_now*/) 254 { 255 headers.MakeEmpty(); 256 257 // Only parse the header here 258 return parse_header(headers, *data); 259 } 260 261 262 status_t 263 BMailComponent::RenderToRFC822(BPositionIO *render_to) { 264 int32 charset = B_ISO15_CONVERSION; 265 int8 encoding = quoted_printable; 266 const char *key, *value; 267 char *allocd; 268 ssize_t amountWritten; 269 BString concat; 270 type_code stupidity_personified = B_STRING_TYPE; 271 int32 count = 0; 272 273 if (headers.HasInt32 (kHeaderCharsetString)) 274 headers.FindInt32 (kHeaderCharsetString, &charset); 275 if (headers.HasInt8 (kHeaderEncodingString)) 276 headers.FindInt8 (kHeaderEncodingString, &encoding); 277 278 for (int32 index = 0; headers.GetInfo(B_STRING_TYPE,index, 279 #if !defined(HAIKU_TARGET_PLATFORM_DANO) 280 (char**) 281 #endif 282 &key,&stupidity_personified,&count) == B_OK; index++) { 283 for (int32 g = 0; g < count; g++) { 284 headers.FindString(key,g,(const char **)&value); 285 allocd = (char *)malloc(strlen(value) + 1); 286 strcpy(allocd,value); 287 288 concat << key << ": "; 289 concat.CapitalizeEachWord(); 290 291 concat.Append(allocd,utf8_to_rfc2047(&allocd, strlen(value), charset, encoding)); 292 free(allocd); 293 FoldLineAtWhiteSpaceAndAddCRLF (concat); 294 295 amountWritten = render_to->Write(concat.String(), concat.Length()); 296 if (amountWritten < 0) 297 return amountWritten; // IO error happened, usually disk full. 298 concat = ""; 299 } 300 } 301 302 render_to->Write("\r\n", 2); 303 304 return B_OK; 305 } 306 307 308 status_t BMailComponent::MIMEType(BMimeType *mime) { 309 bool foundBestHeader; 310 const char *boundaryString; 311 unsigned int i; 312 BMessage msg; 313 const char *typeAsString = NULL; 314 char typeAsLowerCaseString [B_MIME_TYPE_LENGTH]; 315 316 // Find the best Content-Type header to use. There should really be just 317 // one, but evil spammers sneakily insert one for multipart (with no 318 // boundary string), then one for text/plain. We'll scan through them and 319 // only use the multipart one if there are no others, and it has a 320 // boundary. 321 322 foundBestHeader = false; 323 for (i = 0; msg.MakeEmpty(), HeaderField("Content-Type", &msg, i) == B_OK; i++) { 324 typeAsString = msg.FindString("unlabeled"); 325 if (typeAsString != NULL && strncasecmp (typeAsString, "multipart", 9) != 0) { 326 foundBestHeader = true; 327 break; 328 } 329 } 330 if (!foundBestHeader) { 331 for (i = 0; msg.MakeEmpty(), HeaderField("Content-Type", &msg, i) == B_OK; i++) { 332 typeAsString = msg.FindString("unlabeled"); 333 if (typeAsString != NULL && strncasecmp (typeAsString, "multipart", 9) == 0) { 334 boundaryString = msg.FindString("boundary"); 335 if (boundaryString != NULL && strlen (boundaryString) > 0) { 336 foundBestHeader = true; 337 break; 338 } 339 } 340 } 341 } 342 // At this point we have the good MIME type in typeAsString, but only if 343 // foundBestHeader is true. 344 345 if (!foundBestHeader) { 346 strcpy (typeAsLowerCaseString, "text/plain"); // Hope this is an OK default. 347 } else { 348 // Some extra processing to convert mixed or upper case MIME types into 349 // lower case, since the BeOS R5 BMimeType is case sensitive (but OpenBeOS 350 // isn't). Also truncate the string if it is too long. 351 for (i = 0; i < sizeof (typeAsLowerCaseString) - 1 && typeAsString[i] != 0; i++) 352 typeAsLowerCaseString[i] = tolower (typeAsString[i]); 353 typeAsLowerCaseString[i] = 0; 354 355 // Some old e-mail programs saved the type as just "TEXT", which we need to 356 // convert to "text/plain" since the rest of the code looks for that. 357 if (strcmp (typeAsLowerCaseString, "text") == 0) 358 strcpy (typeAsLowerCaseString, "text/plain"); 359 } 360 mime->SetTo(typeAsLowerCaseString); 361 return B_OK; 362 } 363 364 365 void BMailComponent::_ReservedComponent1() {} 366 void BMailComponent::_ReservedComponent2() {} 367 void BMailComponent::_ReservedComponent3() {} 368 void BMailComponent::_ReservedComponent4() {} 369 void BMailComponent::_ReservedComponent5() {} 370 371 372 //------------------------------------------------------------------------- 373 // #pragma mark - 374 375 376 BTextMailComponent::BTextMailComponent(const char *text, uint32 defaultCharSet) 377 : BMailComponent(defaultCharSet), 378 encoding(quoted_printable), 379 charset(B_ISO15_CONVERSION), 380 raw_data(NULL) 381 { 382 if (text != NULL) 383 SetText(text); 384 385 SetHeaderField("MIME-Version","1.0"); 386 } 387 388 BTextMailComponent::~BTextMailComponent() 389 { 390 } 391 392 void BTextMailComponent::SetEncoding(mail_encoding encoding, int32 charset) { 393 this->encoding = encoding; 394 this->charset = charset; 395 } 396 397 void BTextMailComponent::SetText(const char *text) { 398 this->text.SetTo(text); 399 400 raw_data = NULL; 401 } 402 403 void BTextMailComponent::AppendText(const char *text) { 404 ParseRaw(); 405 406 this->text << text; 407 } 408 409 const char *BTextMailComponent::Text() { 410 ParseRaw(); 411 412 return text.String(); 413 } 414 415 BString *BTextMailComponent::BStringText() { 416 ParseRaw(); 417 418 return &text; 419 } 420 421 void BTextMailComponent::Quote(const char *message, const char *quote_style) { 422 ParseRaw(); 423 424 BString string; 425 string << '\n' << quote_style; 426 text.ReplaceAll("\n",string.String()); 427 428 string = message; 429 string << '\n'; 430 text.Prepend(string.String()); 431 } 432 433 status_t BTextMailComponent::GetDecodedData(BPositionIO *data) { 434 ParseRaw(); 435 436 if (data == NULL) 437 return B_IO_ERROR; 438 439 BMimeType type; 440 BMimeType textAny ("text"); 441 ssize_t written; 442 if (MIMEType(&type) == B_OK && textAny.Contains (&type)) 443 // Write out the string which has been both decoded from quoted 444 // printable or base64 etc, and then converted to UTF-8 from whatever 445 // character set the message specified. Do it for text/html, 446 // text/plain and all other text datatypes. Of course, if the message 447 // is HTML and specifies a META tag for a character set, it will now be 448 // wrong. But then we don't display HTML in BeMail, yet. 449 written = data->Write(text.String(),text.Length()); 450 else 451 // Just write out whatever the binary contents are, only decoded from 452 // the quoted printable etc format. 453 written = data->Write(decoded.String(), decoded.Length()); 454 455 return written >= 0 ? B_OK : written; 456 } 457 458 status_t BTextMailComponent::SetDecodedData(BPositionIO *data) { 459 char buffer[255]; 460 size_t buf_len; 461 462 while ((buf_len = data->Read(buffer,254)) > 0) { 463 buffer[buf_len] = 0; 464 this->text << buffer; 465 } 466 467 raw_data = NULL; 468 469 return B_OK; 470 } 471 472 473 status_t 474 BTextMailComponent::SetToRFC822(BPositionIO *data, size_t length, bool parseNow) 475 { 476 off_t position = data->Position(); 477 BMailComponent::SetToRFC822(data, length); 478 479 // Some malformed MIME headers can have the header running into the 480 // boundary of the next MIME chunk, resulting in a negative length. 481 length -= data->Position() - position; 482 if ((ssize_t) length < 0) 483 length = 0; 484 485 raw_data = data; 486 raw_length = length; 487 raw_offset = data->Position(); 488 489 if (parseNow) { 490 // copies the data stream and sets the raw_data variable to NULL 491 return ParseRaw(); 492 } 493 494 return B_OK; 495 } 496 497 498 status_t 499 BTextMailComponent::ParseRaw() 500 { 501 if (raw_data == NULL) 502 return B_OK; 503 504 raw_data->Seek(raw_offset, SEEK_SET); 505 506 BMessage content_type; 507 HeaderField("Content-Type", &content_type); 508 509 charset = _charSetForTextDecoding; 510 if (charset == B_MAIL_NULL_CONVERSION && content_type.HasString("charset")) { 511 const char * charset_string = content_type.FindString("charset"); 512 if (strcasecmp(charset_string, "us-ascii") == 0) { 513 charset = B_MAIL_US_ASCII_CONVERSION; 514 } else if (strcasecmp(charset_string, "utf-8") == 0) { 515 charset = B_MAIL_UTF8_CONVERSION; 516 } else { 517 const BCharacterSet * cs = BCharacterSetRoster::FindCharacterSetByName(charset_string); 518 if (cs != NULL) { 519 charset = cs->GetConversionID(); 520 } 521 } 522 } 523 524 encoding = encoding_for_cte(HeaderField("Content-Transfer-Encoding")); 525 526 char *buffer = (char *)malloc(raw_length + 1); 527 if (buffer == NULL) 528 return B_NO_MEMORY; 529 530 int32 bytes; 531 if ((bytes = raw_data->Read(buffer, raw_length)) < 0) 532 return B_IO_ERROR; 533 534 char *string = decoded.LockBuffer(bytes + 1); 535 bytes = decode(encoding, string, buffer, bytes, 0); 536 free (buffer); 537 buffer = NULL; 538 539 // Change line ends from \r\n to just \n. Though this won't work properly 540 // for UTF-16 because \r takes up two bytes rather than one. 541 char *dest, *src; 542 char *end = string + bytes; 543 for (dest = src = string; src < end; src++) { 544 if (*src != '\r') 545 *dest++ = *src; 546 } 547 decoded.UnlockBuffer(dest - string); 548 bytes = decoded.Length(); // Might have shrunk a bit. 549 550 // If the character set wasn't specified, try to guess. ISO-2022-JP 551 // contains the escape sequences ESC $ B or ESC $ @ to turn on 2 byte 552 // Japanese, and ESC ( J to switch to Roman, or sometimes ESC ( B for 553 // ASCII. We'll just try looking for the two switch to Japanese sequences. 554 555 if (charset == B_MAIL_NULL_CONVERSION) { 556 if (decoded.FindFirst ("\e$B") >= 0 || decoded.FindFirst ("\e$@") >= 0) 557 charset = B_JIS_CONVERSION; 558 else // Just assume the usual Latin-9 character set. 559 charset = B_ISO15_CONVERSION; 560 } 561 562 int32 state = 0; 563 int32 destLength = bytes * 3 /* in case it grows */ + 1 /* +1 so it isn't zero which crashes */; 564 string = text.LockBuffer(destLength); 565 mail_convert_to_utf8(charset, decoded.String(), &bytes, string, &destLength, &state); 566 if (destLength > 0) 567 text.UnlockBuffer(destLength); 568 else { 569 text.UnlockBuffer(0); 570 text.SetTo(decoded); 571 } 572 573 raw_data = NULL; 574 return B_OK; 575 } 576 577 578 status_t 579 BTextMailComponent::RenderToRFC822(BPositionIO *render_to) 580 { 581 status_t status = ParseRaw(); 582 if (status < B_OK) 583 return status; 584 585 BMimeType type; 586 MIMEType(&type); 587 BString content_type; 588 content_type << type.Type(); // Preserve MIME type (e.g. text/html 589 590 for (uint32 i = 0; mail_charsets[i].charset != NULL; i++) { 591 if (mail_charsets[i].flavor == charset) { 592 content_type << "; charset=\"" << mail_charsets[i].charset << "\""; 593 break; 594 } 595 } 596 597 SetHeaderField("Content-Type", content_type.String()); 598 599 const char *transfer_encoding = NULL; 600 switch (encoding) { 601 case base64: 602 transfer_encoding = "base64"; 603 break; 604 case quoted_printable: 605 transfer_encoding = "quoted-printable"; 606 break; 607 case eight_bit: 608 transfer_encoding = "8bit"; 609 break; 610 case seven_bit: 611 default: 612 transfer_encoding = "7bit"; 613 break; 614 } 615 616 SetHeaderField("Content-Transfer-Encoding",transfer_encoding); 617 618 BMailComponent::RenderToRFC822(render_to); 619 620 BString modified = this->text; 621 BString alt; 622 623 int32 len = this->text.Length(); 624 if (len > 0) { 625 int32 dest_len = len * 5; 626 // Shift-JIS can have a 3 byte escape sequence and a 2 byte code for 627 // each character (which could just be 2 bytes in UTF-8, or even 1 byte 628 // if it's regular ASCII), so it can get quite a bit larger than the 629 // original text. Multiplying by 5 should make more than enough space. 630 char *raw = alt.LockBuffer(dest_len); 631 int32 state = 0; 632 mail_convert_from_utf8(charset,this->text.String(),&len,raw,&dest_len,&state); 633 alt.UnlockBuffer(dest_len); 634 635 raw = modified.LockBuffer((alt.Length()*3)+1); 636 switch (encoding) { 637 case base64: 638 len = encode_base64(raw,alt.String(),alt.Length(),false); 639 raw[len] = 0; 640 break; 641 case quoted_printable: 642 len = encode_qp(raw,alt.String(),alt.Length(),false); 643 raw[len] = 0; 644 break; 645 case eight_bit: 646 case seven_bit: 647 default: 648 len = alt.Length(); 649 strcpy(raw,alt.String()); 650 } 651 modified.UnlockBuffer(len); 652 653 if (encoding != base64) // encode_base64 already does CRLF line endings. 654 modified.ReplaceAll("\n","\r\n"); 655 656 // There seem to be a possibility of NULL bytes in the text, so lets 657 // filter them out, shouldn't be any after the encoding stage. 658 659 char *string = modified.LockBuffer(modified.Length()); 660 for (int32 i = modified.Length();i-- > 0;) 661 { 662 if (string[i] != '\0') 663 continue; 664 665 puts("BTextMailComponent::RenderToRFC822: NULL byte in text!!"); 666 string[i] = ' '; 667 } 668 modified.UnlockBuffer(); 669 670 // word wrapping is already done by BeMail (user-configurable) 671 // and it does it *MUCH* nicer. 672 673 // //------Desperate bid to wrap lines 674 // int32 curr_line_length = 0; 675 // int32 last_space = 0; 676 // 677 // for (int32 i = 0; i < modified.Length(); i++) { 678 // if (isspace(modified.ByteAt(i))) 679 // last_space = i; 680 // 681 // if ((modified.ByteAt(i) == '\r') && (modified.ByteAt(i+1) == '\n')) 682 // curr_line_length = 0; 683 // else 684 // curr_line_length++; 685 // 686 // if (curr_line_length > 80) { 687 // if (last_space >= 0) { 688 // modified.Insert("\r\n",last_space); 689 // last_space = -1; 690 // curr_line_length = 0; 691 // } 692 // } 693 // } 694 } 695 modified << "\r\n"; 696 697 render_to->Write(modified.String(),modified.Length()); 698 699 return B_OK; 700 } 701 702 703 void BTextMailComponent::_ReservedText1() {} 704 void BTextMailComponent::_ReservedText2() {} 705