1 /* 2 * Copyright 2011, Haiku, Inc. All rights reserved. 3 * Copyright 2001-2003 Dr. Zoidberg Enterprises. All rights reserved. 4 */ 5 6 7 #include <mail_util.h> 8 9 #include <stdlib.h> 10 #include <string.h> 11 #include <stdio.h> 12 #define __USE_GNU 13 #include <regex.h> 14 #include <ctype.h> 15 #include <errno.h> 16 17 #include <List.h> 18 #include <Locker.h> 19 #include <parsedate.h> 20 #include <String.h> 21 #include <UTF8.h> 22 23 #include <mail_encoding.h> 24 25 #include <CharacterSet.h> 26 #include <CharacterSetRoster.h> 27 28 29 using namespace BPrivate; 30 31 32 #define CRLF "\r\n" 33 34 struct CharsetConversionEntry { 35 const char *charset; 36 uint32 flavor; 37 }; 38 39 extern const CharsetConversionEntry mail_charsets[] = { 40 // In order of authority, so when searching for the name for a particular 41 // numbered conversion, start at the beginning of the array. 42 {"iso-8859-1", B_ISO1_CONVERSION}, // MIME STANDARD 43 {"iso-8859-2", B_ISO2_CONVERSION}, // MIME STANDARD 44 {"iso-8859-3", B_ISO3_CONVERSION}, // MIME STANDARD 45 {"iso-8859-4", B_ISO4_CONVERSION}, // MIME STANDARD 46 {"iso-8859-5", B_ISO5_CONVERSION}, // MIME STANDARD 47 {"iso-8859-6", B_ISO6_CONVERSION}, // MIME STANDARD 48 {"iso-8859-7", B_ISO7_CONVERSION}, // MIME STANDARD 49 {"iso-8859-8", B_ISO8_CONVERSION}, // MIME STANDARD 50 {"iso-8859-9", B_ISO9_CONVERSION}, // MIME STANDARD 51 {"iso-8859-10", B_ISO10_CONVERSION}, // MIME STANDARD 52 {"iso-8859-13", B_ISO13_CONVERSION}, // MIME STANDARD 53 {"iso-8859-14", B_ISO14_CONVERSION}, // MIME STANDARD 54 {"iso-8859-15", B_ISO15_CONVERSION}, // MIME STANDARD 55 56 {"shift_jis", B_SJIS_CONVERSION}, // MIME STANDARD 57 {"shift-jis", B_SJIS_CONVERSION}, 58 {"iso-2022-jp", B_JIS_CONVERSION}, // MIME STANDARD 59 {"euc-jp", B_EUC_CONVERSION}, // MIME STANDARD 60 61 {"euc-kr", B_EUC_KR_CONVERSION}, // Shift encoding 7 bit and KSC-5601 if bit 8 is on. // MIME STANDARD 62 {"ksc5601", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE? 63 {"ks_c_5601-1987", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE with stupid MS software 64 65 {"koi8-r", B_KOI8R_CONVERSION}, // MIME STANDARD 66 {"windows-1251",B_MS_WINDOWS_1251_CONVERSION}, // MIME STANDARD 67 {"windows-1252",B_MS_WINDOWS_CONVERSION}, // MIME STANDARD 68 69 {"dos-437", B_MS_DOS_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM437? ) 70 {"dos-866", B_MS_DOS_866_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM866? ) 71 {"x-mac-roman", B_MAC_ROMAN_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( macintosh? + x-mac-roman? ) 72 73 {"big5", 24}, // MIME STANDARD 74 75 {"gb18030", 25}, // WRONG NAME : MIME STANDARD NAME = NONE ( GB18030? ) 76 {"gb2312", 25}, // COMPATIBLE 77 {"gbk", 25}, // COMPATIBLE 78 79 /* {"utf-16", B_UNICODE_CONVERSION}, Might not work due to NULs in text, needs testing. */ 80 {"us-ascii", B_MAIL_US_ASCII_CONVERSION}, // MIME STANDARD 81 {"utf-8", B_MAIL_UTF8_CONVERSION /* Special code for no conversion */}, // MIME STANDARD 82 83 {NULL, (uint32) -1} /* End of list marker, NULL string pointer is the key. */ 84 }; 85 86 87 static int32 gLocker = 0; 88 static size_t gNsub = 1; 89 static re_pattern_buffer gRe; 90 static re_pattern_buffer *gRebuf = NULL; 91 static unsigned char gTranslation[256]; 92 93 94 static int 95 handle_non_rfc2047_encoding(char **buffer, size_t *bufferLength, 96 size_t *sourceLength) 97 { 98 char *string = *buffer; 99 int32 length = *sourceLength; 100 int32 i; 101 102 // check for 8-bit characters 103 for (i = 0;i < length;i++) 104 if (string[i] & 0x80) 105 break; 106 if (i == length) 107 return false; 108 109 // check for groups of 8-bit characters - this code is not very smart; 110 // it just can detect some sort of single-byte encoded stuff, the rest 111 // is regarded as UTF-8 112 113 int32 singletons = 0,doubles = 0; 114 115 for (i = 0;i < length;i++) 116 { 117 if (string[i] & 0x80) 118 { 119 if ((string[i + 1] & 0x80) == 0) 120 singletons++; 121 else doubles++; 122 i++; 123 } 124 } 125 126 if (singletons != 0) // can't be valid UTF-8 anymore, so we assume ISO-Latin-1 127 { 128 int32 state = 0; 129 // just to be sure 130 int32 destLength = length * 4 + 1; 131 int32 destBufferLength = destLength; 132 char *dest = (char*)malloc(destLength); 133 if (dest == NULL) 134 return 0; 135 136 if (convert_to_utf8(B_ISO1_CONVERSION, string, &length,dest, 137 &destLength, &state) == B_OK) { 138 *buffer = dest; 139 *bufferLength = destBufferLength; 140 *sourceLength = destLength; 141 return true; 142 } 143 free(dest); 144 return false; 145 } 146 147 // we assume a valid UTF-8 string here, but yes, we don't check it 148 return true; 149 } 150 151 152 // #pragma mark - 153 154 155 status_t 156 write_read_attr(BNode& node, read_flags flag) 157 { 158 if (node.WriteAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32)) 159 < 0) 160 return B_ERROR; 161 162 // manage the status string only if it currently has a "read" status 163 BString currentStatus; 164 if (node.ReadAttrString(B_MAIL_ATTR_STATUS, ¤tStatus) == B_OK) { 165 if (currentStatus.ICompare("New") != 0 166 && currentStatus.ICompare("Read") != 0 167 && currentStatus.ICompare("Seen") != 0) 168 return B_OK; 169 } 170 171 const char* statusString = flag == B_READ ? "Read" 172 : flag == B_SEEN ? "Seen" : "New"; 173 if (node.WriteAttr(B_MAIL_ATTR_STATUS, B_STRING_TYPE, 0, statusString, 174 strlen(statusString)) < 0) 175 return B_ERROR; 176 177 return B_OK; 178 } 179 180 181 status_t 182 read_read_attr(BNode& node, read_flags& flag) 183 { 184 if (node.ReadAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32)) 185 == sizeof(int32)) 186 return B_OK; 187 188 BString statusString; 189 if (node.ReadAttrString(B_MAIL_ATTR_STATUS, &statusString) == B_OK) { 190 if (statusString.ICompare("New")) 191 flag = B_UNREAD; 192 else 193 flag = B_READ; 194 195 return B_OK; 196 } 197 198 return B_ERROR; 199 } 200 201 202 // The next couple of functions are our wrapper around convert_to_utf8 and 203 // convert_from_utf8 so that they can also convert from UTF-8 to UTF-8 by 204 // specifying the B_MAIL_UTF8_CONVERSION constant as the conversion operation. 205 // It also lets us add new conversions, like B_MAIL_US_ASCII_CONVERSION. 206 207 208 status_t 209 mail_convert_to_utf8(uint32 srcEncoding, const char *src, int32 *srcLen, 210 char *dst, int32 *dstLen, int32 *state, char substitute) 211 { 212 int32 copyAmount; 213 char *originalDst = dst; 214 status_t returnCode = -1; 215 216 if (srcEncoding == B_MAIL_UTF8_CONVERSION) { 217 copyAmount = *srcLen; 218 if (*dstLen < copyAmount) 219 copyAmount = *dstLen; 220 memcpy (dst, src, copyAmount); 221 *srcLen = copyAmount; 222 *dstLen = copyAmount; 223 returnCode = B_OK; 224 } else if (srcEncoding == B_MAIL_US_ASCII_CONVERSION) { 225 int32 i; 226 unsigned char letter; 227 copyAmount = *srcLen; 228 if (*dstLen < copyAmount) 229 copyAmount = *dstLen; 230 for (i = 0; i < copyAmount; i++) { 231 letter = *src++; 232 if (letter > 0x80U) 233 // Invalid, could also use substitute, but better to strip high bit. 234 *dst++ = letter - 0x80U; 235 else if (letter == 0x80U) 236 // Can't convert to 0x00 since that's NUL, which would cause problems. 237 *dst++ = substitute; 238 else 239 *dst++ = letter; 240 } 241 *srcLen = copyAmount; 242 *dstLen = copyAmount; 243 returnCode = B_OK; 244 } else 245 returnCode = convert_to_utf8 (srcEncoding, src, srcLen, 246 dst, dstLen, state, substitute); 247 248 if (returnCode == B_OK) { 249 // Replace spurious NUL bytes, which should normally not be in the 250 // output of the decoding (not normal UTF-8 characters, and no NULs are 251 // in our usual input strings). They happen for some odd ISO-2022-JP 252 // byte pair combinations which are improperly handled by the BeOS 253 // routines. Like "\e$ByD\e(B" where \e is the ESC character $1B, the 254 // first ESC $ B switches to a Japanese character set, then the next 255 // two bytes "yD" specify a character, then ESC ( B switches back to 256 // the ASCII character set. The UTF-8 conversion yields a NUL byte. 257 int32 i; 258 for (i = 0; i < *dstLen; i++) 259 if (originalDst[i] == 0) 260 originalDst[i] = substitute; 261 } 262 return returnCode; 263 } 264 265 266 status_t 267 mail_convert_from_utf8(uint32 dstEncoding, const char *src, int32 *srcLen, 268 char *dst, int32 *dstLen, int32 *state, char substitute) 269 { 270 int32 copyAmount; 271 status_t errorCode; 272 int32 originalDstLen = *dstLen; 273 int32 tempDstLen; 274 int32 tempSrcLen; 275 276 if (dstEncoding == B_MAIL_UTF8_CONVERSION) { 277 copyAmount = *srcLen; 278 if (*dstLen < copyAmount) 279 copyAmount = *dstLen; 280 memcpy (dst, src, copyAmount); 281 *srcLen = copyAmount; 282 *dstLen = copyAmount; 283 return B_OK; 284 } 285 286 if (dstEncoding == B_MAIL_US_ASCII_CONVERSION) { 287 int32 characterLength; 288 int32 dstRemaining = *dstLen; 289 unsigned char letter; 290 int32 srcRemaining = *srcLen; 291 292 // state contains the number of source bytes to skip, left over from a 293 // partial UTF-8 character split over the end of the buffer from last 294 // time. 295 if (srcRemaining <= *state) { 296 *state -= srcRemaining; 297 *dstLen = 0; 298 return B_OK; 299 } 300 srcRemaining -= *state; 301 src += *state; 302 *state = 0; 303 304 while (true) { 305 if (srcRemaining <= 0 || dstRemaining <= 0) 306 break; 307 letter = *src; 308 if (letter < 0x80) 309 characterLength = 1; // Regular ASCII equivalent code. 310 else if (letter < 0xC0) 311 characterLength = 1; // Invalid in-between data byte 10xxxxxx. 312 else if (letter < 0xE0) 313 characterLength = 2; 314 else if (letter < 0xF0) 315 characterLength = 3; 316 else if (letter < 0xF8) 317 characterLength = 4; 318 else if (letter < 0xFC) 319 characterLength = 5; 320 else if (letter < 0xFE) 321 characterLength = 6; 322 else 323 characterLength = 1; // 0xFE and 0xFF are invalid in UTF-8. 324 if (letter < 0x80) 325 *dst++ = *src; 326 else 327 *dst++ = substitute; 328 dstRemaining--; 329 if (srcRemaining < characterLength) { 330 // Character split past the end of the buffer. 331 *state = characterLength - srcRemaining; 332 srcRemaining = 0; 333 } else { 334 src += characterLength; 335 srcRemaining -= characterLength; 336 } 337 } 338 // Update with the amounts used. 339 *srcLen = *srcLen - srcRemaining; 340 *dstLen = *dstLen - dstRemaining; 341 return B_OK; 342 } 343 344 errorCode = convert_from_utf8(dstEncoding, src, srcLen, dst, dstLen, state, 345 substitute); 346 if (errorCode != B_OK) 347 return errorCode; 348 349 if (dstEncoding != B_JIS_CONVERSION) 350 return B_OK; 351 352 // B_JIS_CONVERSION (ISO-2022-JP) works by shifting between different 353 // character subsets. For E-mail headers (and other uses), it needs to be 354 // switched back to ASCII at the end (otherwise the last character gets 355 // lost or other weird things happen in the headers). Note that we can't 356 // just append the escape code since the convert_from_utf8 "state" will be 357 // wrong. So we append an ASCII letter and throw it away, leaving just the 358 // escape code. Well, it actually switches to the Roman character set, not 359 // ASCII, but that should be OK. 360 361 tempDstLen = originalDstLen - *dstLen; 362 if (tempDstLen < 3) // Not enough space remaining in the output. 363 return B_OK; // Sort of an error, but we did convert the rest OK. 364 tempSrcLen = 1; 365 errorCode = convert_from_utf8(dstEncoding, "a", &tempSrcLen, 366 dst + *dstLen, &tempDstLen, state, substitute); 367 if (errorCode != B_OK) 368 return errorCode; 369 *dstLen += tempDstLen - 1 /* don't include the ASCII letter */; 370 return B_OK; 371 } 372 373 374 ssize_t 375 rfc2047_to_utf8(char **bufp, size_t *bufLen, size_t strLen) 376 { 377 char *head, *tail; 378 char *charset, *encoding, *end; 379 ssize_t ret = B_OK; 380 381 if (bufp == NULL || *bufp == NULL) 382 return -1; 383 384 char *string = *bufp; 385 386 //---------Handle *&&^%*&^ non-RFC compliant, 8bit mail 387 if (handle_non_rfc2047_encoding(bufp,bufLen,&strLen)) 388 return strLen; 389 390 // set up string length 391 if (strLen == 0) 392 strLen = strlen(*bufp); 393 char lastChar = (*bufp)[strLen]; 394 (*bufp)[strLen] = '\0'; 395 396 //---------Whew! Now for RFC compliant mail 397 bool encodedWordFoundPreviously = false; 398 for (head = tail = string; 399 ((charset = strstr(tail, "=?")) != NULL) 400 && (((encoding = strchr(charset + 2, '?')) != NULL) 401 && encoding[1] && (encoding[2] == '?') && encoding[3]) 402 && (end = strstr(encoding + 3, "?=")) != NULL; 403 // found "=?...charset...?e?...text...?= (e == encoding) 404 // ^charset ^encoding ^end 405 tail = end) 406 { 407 // Copy non-encoded text (from tail up to charset) to the output. 408 // Ignore spaces between two encoded "words". RFC2047 says the words 409 // should be concatenated without the space (designed for Asian 410 // sentences which have no spaces yet need to be broken into "words" to 411 // keep within the line length limits). 412 bool nonSpaceFound = false; 413 for (int i = 0; i < charset-tail; i++) { 414 if (!isspace (tail[i])) { 415 nonSpaceFound = true; 416 break; 417 } 418 } 419 if (!encodedWordFoundPreviously || nonSpaceFound) { 420 if (string != tail && tail != charset) 421 memmove(string, tail, charset-tail); 422 string += charset-tail; 423 } 424 tail = charset; 425 encodedWordFoundPreviously = true; 426 427 // move things to point at what they should: 428 // =?...charset...?e?...text...?= (e == encoding) 429 // ^charset ^encoding ^end 430 charset += 2; 431 encoding += 1; 432 end += 2; 433 434 // find the charset this text is in now 435 size_t cLen = encoding - 1 - charset; 436 bool base64encoded = toupper(*encoding) == 'B'; 437 438 uint32 convertID = B_MAIL_NULL_CONVERSION; 439 char charsetName[cLen + 1]; 440 memcpy(charsetName, charset, cLen); 441 charsetName[cLen] = '\0'; 442 if (strcasecmp(charsetName, "us-ascii") == 0) { 443 convertID = B_MAIL_US_ASCII_CONVERSION; 444 } else if (strcasecmp(charsetName, "utf-8") == 0) { 445 convertID = B_MAIL_UTF8_CONVERSION; 446 } else { 447 const BCharacterSet* charSet 448 = BCharacterSetRoster::FindCharacterSetByName(charsetName); 449 if (charSet != NULL) { 450 convertID = charSet->GetConversionID(); 451 } 452 } 453 if (convertID == B_MAIL_NULL_CONVERSION) { 454 // unidentified charset 455 // what to do? doing nothing skips the encoded text; 456 // but we should keep it: we copy it to the output. 457 if (string != tail && tail != end) 458 memmove(string, tail, end-tail); 459 string += end-tail; 460 continue; 461 } 462 // else we've successfully identified the charset 463 464 char *src = encoding+2; 465 int32 srcLen = end - 2 - src; 466 // encoded text: src..src+srcLen 467 468 // decode text, get decoded length (reducing xforms) 469 srcLen = !base64encoded ? decode_qp(src, src, srcLen, 1) 470 : decode_base64(src, src, srcLen); 471 472 // allocate space for the converted text 473 int32 dstLen = end-string + *bufLen-strLen; 474 char *dst = (char*)malloc(dstLen); 475 int32 cvLen = srcLen; 476 int32 convState = 0; 477 478 // 479 // do the conversion 480 // 481 ret = mail_convert_to_utf8(convertID, src, &cvLen, dst, &dstLen, 482 &convState); 483 if (ret != B_OK) { 484 // what to do? doing nothing skips the encoded text 485 // but we should keep it: we copy it to the output. 486 487 free(dst); 488 489 if (string != tail && tail != end) 490 memmove(string, tail, end-tail); 491 string += end-tail; 492 continue; 493 } 494 /* convert_to_ is either returning something wrong or my 495 test data is screwed up. Whatever it is, Not Enough 496 Space is not the only cause of the below, so we just 497 assume it succeeds if it converts anything at all. 498 else if (cvLen < srcLen) 499 { 500 // not enough room to convert the data; 501 // grow *buf and retry 502 503 free(dst); 504 505 char *temp = (char*)realloc(*bufp, 2*(*bufLen + 1)); 506 if (temp == NULL) 507 { 508 ret = B_NO_MEMORY; 509 break; 510 } 511 512 *bufp = temp; 513 *bufLen = 2*(*bufLen + 1); 514 515 string = *bufp + (string-head); 516 tail = *bufp + (tail-head); 517 charset = *bufp + (charset-head); 518 encoding = *bufp + (encoding-head); 519 end = *bufp + (end-head); 520 src = *bufp + (src-head); 521 head = *bufp; 522 continue; 523 } 524 */ 525 else { 526 if (dstLen > end-string) { 527 // copy the string forward... 528 memmove(string+dstLen, end, strLen - (end-head) + 1); 529 strLen += string+dstLen - end; 530 end = string + dstLen; 531 } 532 533 memcpy(string, dst, dstLen); 534 string += dstLen; 535 free(dst); 536 continue; 537 } 538 } 539 540 // copy everything that's left 541 size_t tailLen = strLen - (tail - head); 542 memmove(string, tail, tailLen+1); 543 string += tailLen; 544 545 // replace the last char 546 (*bufp)[strLen] = lastChar; 547 548 return ret < B_OK ? ret : string-head; 549 } 550 551 552 ssize_t 553 utf8_to_rfc2047 (char **bufp, ssize_t length, uint32 charset, char encoding) 554 { 555 struct word { 556 BString originalWord; 557 BString convertedWord; 558 bool needsEncoding; 559 560 // Convert the word from UTF-8 to the desired character set. The 561 // converted version also includes the escape codes to return to ASCII 562 // mode, if relevant. Also note if it uses unprintable characters, 563 // which means it will need that special encoding treatment later. 564 void ConvertWordToCharset (uint32 charset) { 565 int32 state = 0; 566 int32 originalLength = originalWord.Length(); 567 int32 convertedLength = originalLength * 5 + 1; 568 char *convertedBuffer = convertedWord.LockBuffer (convertedLength); 569 mail_convert_from_utf8 (charset, originalWord.String(), 570 &originalLength, convertedBuffer, &convertedLength, &state); 571 for (int i = 0; i < convertedLength; i++) { 572 if ((convertedBuffer[i] & (1 << 7)) || 573 (convertedBuffer[i] >= 0 && convertedBuffer[i] < 32)) { 574 needsEncoding = true; 575 break; 576 } 577 } 578 convertedWord.UnlockBuffer (convertedLength); 579 }; 580 }; 581 struct word *currentWord; 582 BList words; 583 584 // Break the header into words. White space characters (including tabs and 585 // newlines) separate the words. Each word includes any space before it as 586 // part of the word. Actually, quotes and other special characters 587 // (",()<>@) are treated as separate words of their own so that they don't 588 // get encoded (because MIME headers get the quotes parsed before character 589 // set unconversion is done). The reader is supposed to ignore all white 590 // space between encoded words, which can be inserted so that older mail 591 // parsers don't have overly long line length problems. 592 593 const char *source = *bufp; 594 const char *bufEnd = *bufp + length; 595 const char *specialChars = "\"()<>@,"; 596 597 while (source < bufEnd) { 598 currentWord = new struct word; 599 currentWord->needsEncoding = false; 600 601 int wordEnd = 0; 602 603 // Include leading spaces as part of the word. 604 while (source + wordEnd < bufEnd && isspace (source[wordEnd])) 605 wordEnd++; 606 607 if (source + wordEnd < bufEnd && 608 strchr (specialChars, source[wordEnd]) != NULL) { 609 // Got a quote mark or other special character, which is treated as 610 // a word in itself since it shouldn't be encoded, which would hide 611 // it from the mail system. 612 wordEnd++; 613 } else { 614 // Find the end of the word. Leave wordEnd pointing just after the 615 // last character in the word. 616 while (source + wordEnd < bufEnd) { 617 if (isspace(source[wordEnd]) || 618 strchr (specialChars, source[wordEnd]) != NULL) 619 break; 620 if (wordEnd > 51 /* Makes Base64 ISO-2022-JP "word" a multiple of 4 bytes */ && 621 0xC0 == (0xC0 & (unsigned int) source[wordEnd])) { 622 // No English words are that long (46 is the longest), 623 // break up what is likely Asian text (which has no spaces) 624 // at the start of the next non-ASCII UTF-8 character (high 625 // two bits are both ones). Note that two encoded words in 626 // a row get joined together, even if there is a space 627 // between them in the final output text, according to the 628 // standard. Next word will also be conveniently get 629 // encoded due to the 0xC0 test. 630 currentWord->needsEncoding = true; 631 break; 632 } 633 wordEnd++; 634 } 635 } 636 currentWord->originalWord.SetTo (source, wordEnd); 637 currentWord->ConvertWordToCharset (charset); 638 words.AddItem(currentWord); 639 source += wordEnd; 640 } 641 642 // Combine adjacent words which contain unprintable text so that the 643 // overhead of switching back and forth between regular text and specially 644 // encoded text is reduced. However, the combined word must be shorter 645 // than the maximum of 75 bytes, including character set specification and 646 // all those delimiters (worst case 22 bytes of overhead). 647 648 struct word *run; 649 650 for (int32 i = 0; (currentWord = (struct word *) words.ItemAt (i)) != NULL; i++) { 651 if (!currentWord->needsEncoding) 652 continue; // No need to combine unencoded words. 653 for (int32 g = i+1; (run = (struct word *) words.ItemAt (g)) != NULL; g++) { 654 if (!run->needsEncoding) 655 break; // Don't want to combine encoded and unencoded words. 656 if ((currentWord->convertedWord.Length() + run->convertedWord.Length() <= 53)) { 657 currentWord->originalWord.Append (run->originalWord); 658 currentWord->ConvertWordToCharset (charset); 659 words.RemoveItem(g); 660 delete run; 661 g--; 662 } else // Can't merge this word, result would be too long. 663 break; 664 } 665 } 666 667 // Combine the encoded and unencoded words into one line, doing the 668 // quoted-printable or base64 encoding. Insert an extra space between 669 // words which are both encoded to make word wrapping easier, since there 670 // is normally none, and you're allowed to insert space (the receiver 671 // throws it away if it is between encoded words). 672 673 BString rfc2047; 674 bool previousWordNeededEncoding = false; 675 676 const char *charset_dec = "none-bug"; 677 for (int32 i = 0; mail_charsets[i].charset != NULL; i++) { 678 if (mail_charsets[i].flavor == charset) { 679 charset_dec = mail_charsets[i].charset; 680 break; 681 } 682 } 683 684 while ((currentWord = (struct word *)words.RemoveItem((int32)0)) != NULL) { 685 if ((encoding != quoted_printable && encoding != base64) || 686 !currentWord->needsEncoding) { 687 rfc2047.Append (currentWord->convertedWord); 688 } else { 689 // This word needs encoding. Try to insert a space between it and 690 // the previous word. 691 if (previousWordNeededEncoding) 692 rfc2047 << ' '; // Can insert as many spaces as you want between encoded words. 693 else { 694 // Previous word is not encoded, spaces are significant. Try 695 // to move a space from the start of this word to be outside of 696 // the encoded text, so that there is a bit of space between 697 // this word and the previous one to enhance word wrapping 698 // chances later on. 699 if (currentWord->originalWord.Length() > 1 && 700 isspace (currentWord->originalWord[0])) { 701 rfc2047 << currentWord->originalWord[0]; 702 currentWord->originalWord.Remove (0 /* offset */, 1 /* length */); 703 currentWord->ConvertWordToCharset (charset); 704 } 705 } 706 707 char *encoded = NULL; 708 ssize_t encoded_len = 0; 709 int32 convertedLength = currentWord->convertedWord.Length (); 710 const char *convertedBuffer = currentWord->convertedWord.String (); 711 712 switch (encoding) { 713 case quoted_printable: 714 encoded = (char *) malloc (convertedLength * 3); 715 encoded_len = encode_qp (encoded, convertedBuffer, convertedLength, true /* headerMode */); 716 break; 717 case base64: 718 encoded = (char *) malloc (convertedLength * 2); 719 encoded_len = encode_base64 (encoded, convertedBuffer, convertedLength, true /* headerMode */); 720 break; 721 default: // Unknown encoding type, shouldn't happen. 722 encoded = (char *) convertedBuffer; 723 encoded_len = convertedLength; 724 break; 725 } 726 727 rfc2047 << "=?" << charset_dec << '?' << encoding << '?'; 728 rfc2047.Append (encoded, encoded_len); 729 rfc2047 << "?="; 730 731 if (encoding == quoted_printable || encoding == base64) 732 free(encoded); 733 } 734 previousWordNeededEncoding = currentWord->needsEncoding; 735 delete currentWord; 736 } 737 738 free(*bufp); 739 740 ssize_t finalLength = rfc2047.Length (); 741 *bufp = (char *) (malloc (finalLength + 1)); 742 memcpy (*bufp, rfc2047.String(), finalLength); 743 (*bufp)[finalLength] = 0; 744 745 return finalLength; 746 } 747 748 749 void 750 FoldLineAtWhiteSpaceAndAddCRLF(BString &string) 751 { 752 int inputLength = string.Length(); 753 int lineStartIndex; 754 const int maxLineLength = 78; // Doesn't include CRLF. 755 BString output; 756 int splitIndex; 757 int tempIndex; 758 759 lineStartIndex = 0; 760 while (true) { 761 // If we don't need to wrap the text, just output the remainder, if any. 762 763 if (lineStartIndex + maxLineLength >= inputLength) { 764 if (lineStartIndex < inputLength) { 765 output.Insert (string, lineStartIndex /* source offset */, 766 inputLength - lineStartIndex /* count */, 767 output.Length() /* insert at */); 768 output.Append (CRLF); 769 } 770 break; 771 } 772 773 // Look ahead for a convenient spot to split it, between a comma and 774 // space, which you often see between e-mail addresses like this: 775 // "Joe Who" joe@dot.com, "Someone Else" else@blot.com 776 777 tempIndex = lineStartIndex + maxLineLength; 778 if (tempIndex > inputLength) 779 tempIndex = inputLength; 780 splitIndex = string.FindLast (", ", tempIndex); 781 if (splitIndex >= lineStartIndex) 782 splitIndex++; // Point to the space character. 783 784 // If none of those exist, try splitting at any white space. 785 786 if (splitIndex <= lineStartIndex) 787 splitIndex = string.FindLast (" ", tempIndex); 788 if (splitIndex <= lineStartIndex) 789 splitIndex = string.FindLast ("\t", tempIndex); 790 791 // If none of those exist, allow for a longer word - split at the next 792 // available white space. 793 794 if (splitIndex <= lineStartIndex) 795 splitIndex = string.FindFirst (" ", lineStartIndex + 1); 796 if (splitIndex <= lineStartIndex) 797 splitIndex = string.FindFirst ("\t", lineStartIndex + 1); 798 799 // Give up, the whole rest of the line can't be split, just dump it 800 // out. 801 802 if (splitIndex <= lineStartIndex) { 803 if (lineStartIndex < inputLength) { 804 output.Insert (string, lineStartIndex /* source offset */, 805 inputLength - lineStartIndex /* count */, 806 output.Length() /* insert at */); 807 output.Append (CRLF); 808 } 809 break; 810 } 811 812 // Do the split. The current line up to but not including the space 813 // gets output, followed by a CRLF. The space remains to become the 814 // start of the next line (and that tells the message reader that it is 815 // a continuation line). 816 817 output.Insert (string, lineStartIndex /* source offset */, 818 splitIndex - lineStartIndex /* count */, 819 output.Length() /* insert at */); 820 output.Append (CRLF); 821 lineStartIndex = splitIndex; 822 } 823 string.SetTo (output); 824 } 825 826 827 ssize_t 828 readfoldedline(FILE *file, char **buffer, size_t *buflen) 829 { 830 ssize_t len = buflen && *buflen ? *buflen : 0; 831 char * buf = buffer && *buffer ? *buffer : NULL; 832 ssize_t cnt = 0; // Number of characters currently in the buffer. 833 int c; 834 835 while (true) { 836 // Make sure there is space in the buffer for two more characters (one 837 // for the next character, and one for the end of string NUL byte). 838 if (buf == NULL || cnt + 2 >= len) { 839 char *temp = (char *)realloc(buf, len + 64); 840 if (temp == NULL) { 841 // Out of memory, however existing buffer remains allocated. 842 cnt = ENOMEM; 843 break; 844 } 845 len += 64; 846 buf = temp; 847 } 848 849 // Read the next character, or end of file, or IO error. 850 if ((c = fgetc(file)) == EOF) { 851 if (ferror (file)) { 852 cnt = errno; 853 if (cnt >= 0) 854 cnt = -1; // Error codes must be negative. 855 } else { 856 // Really is end of file. Also make it end of line if there is 857 // some text already read in. If the first thing read was EOF, 858 // just return an empty string. 859 if (cnt > 0) { 860 buf[cnt++] = '\n'; 861 if (buf[cnt-2] == '\r') { 862 buf[cnt-2] = '\n'; 863 --cnt; 864 } 865 } 866 } 867 break; 868 } 869 870 buf[cnt++] = c; 871 872 if (c == '\n') { 873 // Convert CRLF end of line to just a LF. Do it before folding, in 874 // case we don't need to fold. 875 if (cnt >= 2 && buf[cnt-2] == '\r') { 876 buf[cnt-2] = '\n'; 877 --cnt; 878 } 879 // If the current line is empty then return it (so that empty lines 880 // don't disappear if the next line starts with a space). 881 if (cnt <= 1) 882 break; 883 // Fold if first character on the next line is whitespace. 884 c = fgetc(file); // Note it's OK to read EOF and ungetc it too. 885 if (c == ' ' || c == '\t') 886 buf[cnt-1] = c; // Replace \n with the white space character. 887 else { 888 // Not folding, we finished reading a line; break out of the loop 889 ungetc(c,file); 890 break; 891 } 892 } 893 } 894 895 if (buf != NULL && cnt >= 0) 896 buf[cnt] = '\0'; 897 898 if (buffer) 899 *buffer = buf; 900 else if (buf) 901 free(buf); 902 903 if (buflen) 904 *buflen = len; 905 906 return cnt; 907 } 908 909 910 ssize_t 911 readfoldedline(BPositionIO &in, char **buffer, size_t *buflen) 912 { 913 ssize_t len = buflen && *buflen ? *buflen : 0; 914 char * buf = buffer && *buffer ? *buffer : NULL; 915 ssize_t cnt = 0; // Number of characters currently in the buffer. 916 char c; 917 status_t errorCode; 918 919 while (true) { 920 // Make sure there is space in the buffer for two more characters (one 921 // for the next character, and one for the end of string NUL byte). 922 if (buf == NULL || cnt + 2 >= len) { 923 char *temp = (char *)realloc(buf, len + 64); 924 if (temp == NULL) { 925 // Out of memory, however existing buffer remains allocated. 926 cnt = ENOMEM; 927 break; 928 } 929 len += 64; 930 buf = temp; 931 } 932 933 errorCode = in.Read (&c,1); // A really slow way of reading - unbuffered. 934 if (errorCode != 1) { 935 if (errorCode < 0) { 936 cnt = errorCode; // IO error encountered, just return the code. 937 } else { 938 // Really is end of file. Also make it end of line if there is 939 // some text already read in. If the first thing read was EOF, 940 // just return an empty string. 941 if (cnt > 0) { 942 buf[cnt++] = '\n'; 943 if (buf[cnt-2] == '\r') { 944 buf[cnt-2] = '\n'; 945 --cnt; 946 } 947 } 948 } 949 break; 950 } 951 952 buf[cnt++] = c; 953 954 if (c == '\n') { 955 // Convert CRLF end of line to just a LF. Do it before folding, in 956 // case we don't need to fold. 957 if (cnt >= 2 && buf[cnt-2] == '\r') { 958 buf[cnt-2] = '\n'; 959 --cnt; 960 } 961 // If the current line is empty then return it (so that empty lines 962 // don't disappear if the next line starts with a space). 963 if (cnt <= 1) 964 break; 965 // if first character on the next line is whitespace, fold lines 966 errorCode = in.Read(&c,1); 967 if (errorCode == 1) { 968 if (c == ' ' || c == '\t') 969 buf[cnt-1] = c; // Replace \n with the white space character. 970 else { 971 // Not folding, we finished reading a whole line. 972 in.Seek(-1,SEEK_CUR); // Undo the look-ahead character read. 973 break; 974 } 975 } else if (errorCode < 0) { 976 cnt = errorCode; 977 break; 978 } else // No next line; at the end of the file. Return the line. 979 break; 980 } 981 } 982 983 if (buf != NULL && cnt >= 0) 984 buf[cnt] = '\0'; 985 986 if (buffer) 987 *buffer = buf; 988 else if (buf) 989 free(buf); 990 991 if (buflen) 992 *buflen = len; 993 994 return cnt; 995 } 996 997 998 ssize_t 999 nextfoldedline(const char** header, char **buffer, size_t *buflen) 1000 { 1001 ssize_t len = buflen && *buflen ? *buflen : 0; 1002 char * buf = buffer && *buffer ? *buffer : NULL; 1003 ssize_t cnt = 0; // Number of characters currently in the buffer. 1004 char c; 1005 1006 while (true) 1007 { 1008 // Make sure there is space in the buffer for two more characters (one 1009 // for the next character, and one for the end of string NUL byte). 1010 if (buf == NULL || cnt + 2 >= len) 1011 { 1012 char *temp = (char *)realloc(buf, len + 64); 1013 if (temp == NULL) { 1014 // Out of memory, however existing buffer remains allocated. 1015 cnt = ENOMEM; 1016 break; 1017 } 1018 len += 64; 1019 buf = temp; 1020 } 1021 1022 // Read the next character, or end of file. 1023 if ((c = *(*header)++) == 0) { 1024 // End of file. Also make it end of line if there is some text 1025 // already read in. If the first thing read was EOF, just return 1026 // an empty string. 1027 if (cnt > 0) { 1028 buf[cnt++] = '\n'; 1029 if (buf[cnt-2] == '\r') { 1030 buf[cnt-2] = '\n'; 1031 --cnt; 1032 } 1033 } 1034 break; 1035 } 1036 1037 buf[cnt++] = c; 1038 1039 if (c == '\n') { 1040 // Convert CRLF end of line to just a LF. Do it before folding, in 1041 // case we don't need to fold. 1042 if (cnt >= 2 && buf[cnt-2] == '\r') { 1043 buf[cnt-2] = '\n'; 1044 --cnt; 1045 } 1046 // If the current line is empty then return it (so that empty lines 1047 // don't disappear if the next line starts with a space). 1048 if (cnt <= 1) 1049 break; 1050 // if first character on the next line is whitespace, fold lines 1051 c = *(*header)++; 1052 if (c == ' ' || c == '\t') 1053 buf[cnt-1] = c; // Replace \n with the white space character. 1054 else { 1055 // Not folding, we finished reading a line; break out of the loop 1056 (*header)--; // Undo read of the non-whitespace. 1057 break; 1058 } 1059 } 1060 } 1061 1062 1063 if (buf != NULL && cnt >= 0) 1064 buf[cnt] = '\0'; 1065 1066 if (buffer) 1067 *buffer = buf; 1068 else if (buf) 1069 free(buf); 1070 1071 if (buflen) 1072 *buflen = len; 1073 1074 return cnt; 1075 } 1076 1077 1078 void 1079 trim_white_space(BString &string) 1080 { 1081 int32 i; 1082 int32 length = string.Length(); 1083 char *buffer = string.LockBuffer(length + 1); 1084 1085 while (length > 0 && isspace(buffer[length - 1])) 1086 length--; 1087 buffer[length] = '\0'; 1088 1089 for (i = 0; buffer[i] && isspace(buffer[i]); i++) {} 1090 if (i != 0) { 1091 length -= i; 1092 memmove(buffer,buffer + i,length + 1); 1093 } 1094 string.UnlockBuffer(length); 1095 } 1096 1097 1098 /*! Tries to return a human-readable name from the specified 1099 header parameter (should be from "To:" or "From:"). 1100 Tries to return the name rather than the eMail address. 1101 */ 1102 void 1103 extract_address_name(BString &header) 1104 { 1105 BString name; 1106 const char *start = header.String(); 1107 const char *stop = start + strlen (start); 1108 1109 // Find a string S in the header (email foo) that matches: 1110 // Old style name in brackets: foo@bar.com (S) 1111 // New style quotes: "S" <foo@bar.com> 1112 // New style no quotes if nothing else found: S <foo@bar.com> 1113 // If nothing else found then use the whole thing: S 1114 1115 for (int i = 0; i <= 3; i++) { 1116 // Set p1 to the first letter in the name and p2 to just past the last 1117 // letter in the name. p2 stays NULL if a name wasn't found in this 1118 // pass. 1119 const char *p1 = NULL, *p2 = NULL; 1120 1121 switch (i) { 1122 case 0: // foo@bar.com (S) 1123 if ((p1 = strchr(start,'(')) != NULL) { 1124 p1++; // Advance to first letter in the name. 1125 size_t nest = 1; // Handle nested brackets. 1126 for (p2 = p1; p2 < stop; ++p2) 1127 { 1128 if (*p2 == ')') 1129 --nest; 1130 else if (*p2 == '(') 1131 ++nest; 1132 if (nest <= 0) 1133 break; 1134 } 1135 if (nest != 0) 1136 p2 = NULL; // False alarm, no terminating bracket. 1137 } 1138 break; 1139 case 1: // "S" <foo@bar.com> 1140 if ((p1 = strchr(start, '\"')) != NULL) 1141 p2 = strchr(++p1, '\"'); 1142 break; 1143 case 2: // S <foo@bar.com> 1144 p1 = start; 1145 if (name.Length() == 0) 1146 p2 = strchr(start, '<'); 1147 break; 1148 case 3: // S 1149 p1 = start; 1150 if (name.Length() == 0) 1151 p2 = stop; 1152 break; 1153 } 1154 1155 // Remove leading and trailing space-like characters and save the 1156 // result if it is longer than any other likely names found. 1157 if (p2 != NULL) { 1158 while (p1 < p2 && (isspace (*p1))) 1159 ++p1; 1160 1161 while (p1 < p2 && (isspace (p2[-1]))) 1162 --p2; 1163 1164 int newLength = p2 - p1; 1165 if (name.Length() < newLength) 1166 name.SetTo(p1, newLength); 1167 } 1168 } 1169 1170 int32 lessIndex = name.FindFirst('<'); 1171 int32 greaterIndex = name.FindLast('>'); 1172 1173 if (lessIndex == 0) { 1174 // Have an address of the form <address> and nothing else, so remove 1175 // the greater and less than signs, if any. 1176 if (greaterIndex > 0) 1177 name.Remove(greaterIndex, 1); 1178 name.Remove(lessIndex, 1); 1179 } else if (lessIndex > 0 && lessIndex < greaterIndex) { 1180 // Yahoo stupidly inserts the e-mail address into the name string, so 1181 // this bit of code fixes: "Joe <joe@yahoo.com>" <joe@yahoo.com> 1182 name.Remove(lessIndex, greaterIndex - lessIndex + 1); 1183 } 1184 1185 trim_white_space(name); 1186 header = name; 1187 } 1188 1189 1190 /*! Given a subject in a BString, remove the extraneous RE: re: and other stuff 1191 to get down to the core subject string, which should be identical for all 1192 messages posted about a topic. The input string is modified in place to 1193 become the output core subject string. 1194 */ 1195 void 1196 SubjectToThread (BString &string) 1197 { 1198 // a regex that matches a non-ASCII UTF8 character: 1199 #define U8C \ 1200 "[\302-\337][\200-\277]" \ 1201 "|\340[\302-\337][\200-\277]" \ 1202 "|[\341-\357][\200-\277][\200-\277]" \ 1203 "|\360[\220-\277][\200-\277][\200-\277]" \ 1204 "|[\361-\367][\200-\277][\200-\277][\200-\277]" \ 1205 "|\370[\210-\277][\200-\277][\200-\277][\200-\277]" \ 1206 "|[\371-\373][\200-\277][\200-\277][\200-\277][\200-\277]" \ 1207 "|\374[\204-\277][\200-\277][\200-\277][\200-\277][\200-\277]" \ 1208 "|\375[\200-\277][\200-\277][\200-\277][\200-\277][\200-\277]" 1209 1210 #define PATTERN \ 1211 "^ +" \ 1212 "|^(\\[[^]]*\\])(\\<| +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \ 1213 "|^( +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \ 1214 "| *\\(fwd\\) *$" 1215 1216 if (gRebuf == NULL && atomic_add(&gLocker, 1) == 0) { 1217 // the idea is to compile the regexp once to speed up testing 1218 1219 for (int i=0; i<256; ++i) gTranslation[i]=i; 1220 for (int i='a'; i<='z'; ++i) gTranslation[i]=toupper(i); 1221 1222 gRe.translate = gTranslation; 1223 gRe.regs_allocated = REGS_FIXED; 1224 re_syntax_options = RE_SYNTAX_POSIX_EXTENDED; 1225 1226 const char *pattern = PATTERN; 1227 // count subexpressions in PATTERN 1228 for (unsigned int i=0; pattern[i] != 0; ++i) 1229 { 1230 if (pattern[i] == '\\') 1231 ++i; 1232 else if (pattern[i] == '(') 1233 ++gNsub; 1234 } 1235 1236 const char *err = re_compile_pattern(pattern,strlen(pattern),&gRe); 1237 if (err == NULL) 1238 gRebuf = &gRe; 1239 else 1240 fprintf(stderr, "Failed to compile the regex: %s\n", err); 1241 } else { 1242 int32 tries = 200; 1243 while (gRebuf == NULL && tries-- > 0) 1244 snooze(10000); 1245 } 1246 1247 if (gRebuf) { 1248 struct re_registers regs; 1249 // can't be static if this function is to be thread-safe 1250 1251 regs.num_regs = gNsub; 1252 regs.start = (regoff_t*)malloc(gNsub*sizeof(regoff_t)); 1253 regs.end = (regoff_t*)malloc(gNsub*sizeof(regoff_t)); 1254 1255 for (int start = 0; (start = re_search(gRebuf, string.String(), 1256 string.Length(), 0, string.Length(), ®s)) >= 0;) { 1257 // 1258 // we found something 1259 // 1260 1261 // don't delete [bemaildaemon]... 1262 if (start == regs.start[1]) 1263 start = regs.start[2]; 1264 1265 string.Remove(start,regs.end[0]-start); 1266 if (start) 1267 string.Insert(' ',1,start); 1268 1269 // TODO: for some subjects this results in an endless loop, check 1270 // why this happen. 1271 if (regs.end[0] - start <= 1) 1272 break; 1273 } 1274 1275 free(regs.start); 1276 free(regs.end); 1277 } 1278 1279 // Finally remove leading and trailing space. Some software, like 1280 // tm-edit 1.8, appends a space to the subject, which would break 1281 // threading if we left it in. 1282 trim_white_space(string); 1283 } 1284 1285 1286 /*! Converts a date to a time. Handles numeric time zones too, unlike 1287 parsedate(). Returns -1 if it fails. 1288 */ 1289 time_t 1290 ParseDateWithTimeZone(const char *DateString) 1291 { 1292 time_t currentTime; 1293 time_t dateAsTime; 1294 char tempDateString[80]; 1295 char tempZoneString[6]; 1296 time_t zoneDeltaTime; 1297 int zoneIndex; 1298 char *zonePntr; 1299 1300 // See if we can remove the time zone portion. parsedate understands time 1301 // zone 3 letter names, but doesn't understand the numeric +9999 time zone 1302 // format. To do: see if a newer parsedate exists. 1303 1304 strncpy (tempDateString, DateString, sizeof (tempDateString)); 1305 tempDateString[sizeof (tempDateString) - 1] = 0; 1306 1307 // Remove trailing spaces. 1308 zonePntr = tempDateString + strlen (tempDateString) - 1; 1309 while (zonePntr >= tempDateString && isspace (*zonePntr)) 1310 *zonePntr-- = 0; 1311 if (zonePntr < tempDateString) 1312 return -1; // Empty string. 1313 1314 // Remove the trailing time zone in round brackets, like in 1315 // Fri, 22 Feb 2002 15:22:42 EST (-0500) 1316 // Thu, 25 Apr 1996 11:44:19 -0400 (EDT) 1317 if (tempDateString[strlen(tempDateString)-1] == ')') 1318 { 1319 zonePntr = strrchr (tempDateString, '('); 1320 if (zonePntr != NULL) 1321 { 1322 *zonePntr-- = 0; // Zap the '(', then remove trailing spaces. 1323 while (zonePntr >= tempDateString && isspace (*zonePntr)) 1324 *zonePntr-- = 0; 1325 if (zonePntr < tempDateString) 1326 return -1; // Empty string. 1327 } 1328 } 1329 1330 // Look for a numeric time zone like Tue, 30 Dec 2003 05:01:40 +0000 1331 for (zoneIndex = strlen (tempDateString); zoneIndex >= 0; zoneIndex--) 1332 { 1333 zonePntr = tempDateString + zoneIndex; 1334 if (zonePntr[0] == '+' || zonePntr[0] == '-') 1335 { 1336 if (zonePntr[1] >= '0' && zonePntr[1] <= '9' && 1337 zonePntr[2] >= '0' && zonePntr[2] <= '9' && 1338 zonePntr[3] >= '0' && zonePntr[3] <= '9' && 1339 zonePntr[4] >= '0' && zonePntr[4] <= '9') 1340 break; 1341 } 1342 } 1343 if (zoneIndex >= 0) 1344 { 1345 // Remove the zone from the date string and any following time zone 1346 // letter codes. Also put in GMT so that the date gets parsed as GMT. 1347 memcpy (tempZoneString, zonePntr, 5); 1348 tempZoneString [5] = 0; 1349 strcpy (zonePntr, "GMT"); 1350 } 1351 else // No numeric time zone found. 1352 strcpy (tempZoneString, "+0000"); 1353 1354 time (¤tTime); 1355 dateAsTime = parsedate (tempDateString, currentTime); 1356 if (dateAsTime == (time_t) -1) 1357 return -1; // Failure. 1358 1359 zoneDeltaTime = 60 * atol (tempZoneString + 3); // Get the last two digits - minutes. 1360 tempZoneString[3] = 0; 1361 zoneDeltaTime += atol (tempZoneString + 1) * 60 * 60; // Get the first two digits - hours. 1362 if (tempZoneString[0] == '+') 1363 zoneDeltaTime = 0 - zoneDeltaTime; 1364 dateAsTime += zoneDeltaTime; 1365 1366 return dateAsTime; 1367 } 1368 1369 1370 /*! Parses a mail header and fills the headers BMessage 1371 */ 1372 status_t 1373 parse_header(BMessage &headers, BPositionIO &input) 1374 { 1375 char *buffer = NULL; 1376 size_t bufferSize = 0; 1377 int32 length; 1378 1379 while ((length = readfoldedline(input, &buffer, &bufferSize)) >= 2) { 1380 --length; 1381 // Don't include the \n at the end of the buffer. 1382 1383 // convert to UTF-8 and null-terminate the buffer 1384 length = rfc2047_to_utf8(&buffer, &bufferSize, length); 1385 buffer[length] = '\0'; 1386 1387 const char *delimiter = strstr(buffer, ":"); 1388 if (delimiter == NULL) 1389 continue; 1390 1391 BString header(buffer, delimiter - buffer); 1392 header.CapitalizeEachWord(); 1393 // unified case for later fetch 1394 1395 delimiter++; // Skip the colon. 1396 // Skip over leading white space and tabs. 1397 // TODO: (comments in brackets). 1398 while (isspace(*delimiter)) 1399 delimiter++; 1400 1401 // TODO: implement joining of multiple header tags (i.e. multiple "Cc:"s) 1402 headers.AddString(header.String(), delimiter); 1403 } 1404 free(buffer); 1405 1406 return B_OK; 1407 } 1408 1409 1410 status_t 1411 extract_from_header(const BString& header, const BString& field, 1412 BString& target) 1413 { 1414 int32 headerLength = header.Length(); 1415 int32 fieldEndPos = 0; 1416 while (true) { 1417 int32 pos = header.IFindFirst(field, fieldEndPos); 1418 if (pos < 0) 1419 return B_BAD_VALUE; 1420 fieldEndPos = pos + field.Length(); 1421 1422 if (pos != 0 && header.ByteAt(pos - 1) != '\n') 1423 continue; 1424 if (header.ByteAt(fieldEndPos) == ':') 1425 break; 1426 } 1427 fieldEndPos++; 1428 1429 int32 crPos = fieldEndPos; 1430 while (true) { 1431 fieldEndPos = crPos; 1432 crPos = header.FindFirst('\n', crPos); 1433 if (crPos < 0) 1434 crPos = headerLength; 1435 BString temp; 1436 header.CopyInto(temp, fieldEndPos, crPos - fieldEndPos); 1437 if (header.ByteAt(crPos - 1) == '\r') { 1438 temp.Truncate(temp.Length() - 1); 1439 temp += " "; 1440 } 1441 target += temp; 1442 crPos++; 1443 if (crPos >= headerLength) 1444 break; 1445 char nextByte = header.ByteAt(crPos); 1446 if (nextByte != ' ' && nextByte != '\t') 1447 break; 1448 crPos++; 1449 } 1450 1451 size_t bufferSize = target.Length(); 1452 char* buffer = target.LockBuffer(bufferSize); 1453 size_t length = rfc2047_to_utf8(&buffer, &bufferSize, bufferSize); 1454 target.UnlockBuffer(length); 1455 1456 trim_white_space(target); 1457 1458 return B_OK; 1459 } 1460 1461 1462 void 1463 extract_address(BString &address) 1464 { 1465 const char *string = address.String(); 1466 int32 first; 1467 1468 // first, remove all quoted text 1469 1470 if ((first = address.FindFirst('"')) >= 0) { 1471 int32 last = first + 1; 1472 while (string[last] && string[last] != '"') 1473 last++; 1474 1475 if (string[last] == '"') 1476 address.Remove(first, last + 1 - first); 1477 } 1478 1479 // try to extract the address now 1480 1481 if ((first = address.FindFirst('<')) >= 0) { 1482 // the world likes us and we can just get the address the easy way... 1483 int32 last = address.FindFirst('>'); 1484 if (last >= 0) { 1485 address.Truncate(last); 1486 address.Remove(0, first + 1); 1487 1488 return; 1489 } 1490 } 1491 1492 // then, see if there is anything in parenthesis to throw away 1493 1494 if ((first = address.FindFirst('(')) >= 0) { 1495 int32 last = first + 1; 1496 while (string[last] && string[last] != ')') 1497 last++; 1498 1499 if (string[last] == ')') 1500 address.Remove(first, last + 1 - first); 1501 } 1502 1503 // now, there shouldn't be much else left 1504 1505 trim_white_space(address); 1506 } 1507 1508 1509 void 1510 get_address_list(BList &list, const char *string, 1511 void (*cleanupFunc)(BString &)) 1512 { 1513 if (string == NULL || !string[0]) 1514 return; 1515 1516 const char *start = string; 1517 1518 while (true) { 1519 if (string[0] == '"') { 1520 const char *quoteEnd = ++string; 1521 1522 while (quoteEnd[0] && quoteEnd[0] != '"') 1523 quoteEnd++; 1524 1525 if (!quoteEnd[0]) // string exceeds line! 1526 quoteEnd = string; 1527 1528 string = quoteEnd + 1; 1529 } 1530 1531 if (string[0] == ',' || string[0] == '\0') { 1532 BString address(start, string - start); 1533 trim_white_space(address); 1534 1535 if (cleanupFunc) 1536 cleanupFunc(address); 1537 1538 list.AddItem(strdup(address.String())); 1539 1540 start = string + 1; 1541 } 1542 1543 if (!string[0]) 1544 break; 1545 1546 string++; 1547 } 1548 } 1549 1550