1 /* 2 * Copyright 2011-2016, Haiku, Inc. All rights reserved. 3 * Copyright 2001-2003 Dr. Zoidberg Enterprises. All rights reserved. 4 */ 5 6 7 #include <mail_util.h> 8 9 #include <stdlib.h> 10 #include <strings.h> 11 #include <stdio.h> 12 #define __USE_GNU 13 #include <regex.h> 14 #include <ctype.h> 15 #include <errno.h> 16 17 #include <FindDirectory.h> 18 #include <List.h> 19 #include <Locker.h> 20 #include <parsedate.h> 21 #include <Path.h> 22 #include <String.h> 23 #include <UTF8.h> 24 25 #include <mail_encoding.h> 26 27 #include <AttributeUtilities.h> 28 #include <CharacterSet.h> 29 #include <CharacterSetRoster.h> 30 31 32 using namespace BPrivate; 33 34 35 #define CRLF "\r\n" 36 37 struct CharsetConversionEntry { 38 const char *charset; 39 uint32 flavor; 40 }; 41 42 extern const CharsetConversionEntry mail_charsets[] = { 43 // In order of authority, so when searching for the name for a particular 44 // numbered conversion, start at the beginning of the array. 45 {"iso-8859-1", B_ISO1_CONVERSION}, // MIME STANDARD 46 {"iso-8859-2", B_ISO2_CONVERSION}, // MIME STANDARD 47 {"iso-8859-3", B_ISO3_CONVERSION}, // MIME STANDARD 48 {"iso-8859-4", B_ISO4_CONVERSION}, // MIME STANDARD 49 {"iso-8859-5", B_ISO5_CONVERSION}, // MIME STANDARD 50 {"iso-8859-6", B_ISO6_CONVERSION}, // MIME STANDARD 51 {"iso-8859-7", B_ISO7_CONVERSION}, // MIME STANDARD 52 {"iso-8859-8", B_ISO8_CONVERSION}, // MIME STANDARD 53 {"iso-8859-9", B_ISO9_CONVERSION}, // MIME STANDARD 54 {"iso-8859-10", B_ISO10_CONVERSION}, // MIME STANDARD 55 {"iso-8859-13", B_ISO13_CONVERSION}, // MIME STANDARD 56 {"iso-8859-14", B_ISO14_CONVERSION}, // MIME STANDARD 57 {"iso-8859-15", B_ISO15_CONVERSION}, // MIME STANDARD 58 59 {"shift_jis", B_SJIS_CONVERSION}, // MIME STANDARD 60 {"shift-jis", B_SJIS_CONVERSION}, 61 {"iso-2022-jp", B_JIS_CONVERSION}, // MIME STANDARD 62 {"euc-jp", B_EUC_CONVERSION}, // MIME STANDARD 63 64 {"euc-kr", B_EUC_KR_CONVERSION}, // Shift encoding 7 bit and KSC-5601 if bit 8 is on. // MIME STANDARD 65 {"ksc5601", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE? 66 {"ks_c_5601-1987", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE with stupid MS software 67 68 {"koi8-r", B_KOI8R_CONVERSION}, // MIME STANDARD 69 {"windows-1251",B_MS_WINDOWS_1251_CONVERSION}, // MIME STANDARD 70 {"windows-1252",B_MS_WINDOWS_CONVERSION}, // MIME STANDARD 71 72 {"dos-437", B_MS_DOS_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM437? ) 73 {"dos-866", B_MS_DOS_866_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM866? ) 74 {"x-mac-roman", B_MAC_ROMAN_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( macintosh? + x-mac-roman? ) 75 76 {"big5", 24}, // MIME STANDARD 77 78 {"gb18030", 25}, // WRONG NAME : MIME STANDARD NAME = NONE ( GB18030? ) 79 {"gb2312", 25}, // COMPATIBLE 80 {"gbk", 25}, // COMPATIBLE 81 82 /* {"utf-16", B_UNICODE_CONVERSION}, Might not work due to NULs in text, needs testing. */ 83 {"us-ascii", B_MAIL_US_ASCII_CONVERSION}, // MIME STANDARD 84 {"utf-8", B_MAIL_UTF8_CONVERSION /* Special code for no conversion */}, // MIME STANDARD 85 86 {NULL, (uint32) -1} /* End of list marker, NULL string pointer is the key. */ 87 }; 88 89 90 static int32 gLocker = 0; 91 static size_t gNsub = 1; 92 static re_pattern_buffer gRe; 93 static re_pattern_buffer *gRebuf = NULL; 94 static unsigned char gTranslation[256]; 95 96 97 static int 98 handle_non_rfc2047_encoding(char **buffer, size_t *bufferLength, 99 size_t *sourceLength) 100 { 101 char *string = *buffer; 102 int32 length = *sourceLength; 103 int32 i; 104 105 // check for 8-bit characters 106 for (i = 0;i < length;i++) 107 if (string[i] & 0x80) 108 break; 109 if (i == length) 110 return false; 111 112 // check for groups of 8-bit characters - this code is not very smart; 113 // it just can detect some sort of single-byte encoded stuff, the rest 114 // is regarded as UTF-8 115 116 int32 singletons = 0,doubles = 0; 117 118 for (i = 0;i < length;i++) 119 { 120 if (string[i] & 0x80) 121 { 122 if ((string[i + 1] & 0x80) == 0) 123 singletons++; 124 else doubles++; 125 i++; 126 } 127 } 128 129 if (singletons != 0) // can't be valid UTF-8 anymore, so we assume ISO-Latin-1 130 { 131 int32 state = 0; 132 // just to be sure 133 int32 destLength = length * 4 + 1; 134 int32 destBufferLength = destLength; 135 char *dest = (char*)malloc(destLength); 136 if (dest == NULL) 137 return 0; 138 139 if (convert_to_utf8(B_ISO1_CONVERSION, string, &length,dest, 140 &destLength, &state) == B_OK) { 141 *buffer = dest; 142 *bufferLength = destBufferLength; 143 *sourceLength = destLength; 144 return true; 145 } 146 free(dest); 147 return false; 148 } 149 150 // we assume a valid UTF-8 string here, but yes, we don't check it 151 return true; 152 } 153 154 155 // #pragma mark - 156 157 158 status_t 159 write_read_attr(BNode& node, read_flags flag) 160 { 161 if (node.WriteAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32)) 162 < 0) 163 return B_ERROR; 164 165 // Manage the status string only if it currently has a known state 166 BString currentStatus; 167 if (node.ReadAttrString(B_MAIL_ATTR_STATUS, ¤tStatus) == B_OK 168 && currentStatus.ICompare("New") != 0 169 && currentStatus.ICompare("Read") != 0 170 && currentStatus.ICompare("Seen") != 0) { 171 return B_OK; 172 } 173 174 const char* statusString = flag == B_READ ? "Read" 175 : flag == B_SEEN ? "Seen" : "New"; 176 if (node.WriteAttr(B_MAIL_ATTR_STATUS, B_STRING_TYPE, 0, statusString, 177 strlen(statusString)) < 0) 178 return B_ERROR; 179 180 return B_OK; 181 } 182 183 184 status_t 185 read_read_attr(BNode& node, read_flags& flag) 186 { 187 if (node.ReadAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32)) 188 == sizeof(int32)) 189 return B_OK; 190 191 BString statusString; 192 if (node.ReadAttrString(B_MAIL_ATTR_STATUS, &statusString) == B_OK) { 193 if (statusString.ICompare("New")) 194 flag = B_UNREAD; 195 else 196 flag = B_READ; 197 198 return B_OK; 199 } 200 201 return B_ERROR; 202 } 203 204 205 // The next couple of functions are our wrapper around convert_to_utf8 and 206 // convert_from_utf8 so that they can also convert from UTF-8 to UTF-8 by 207 // specifying the B_MAIL_UTF8_CONVERSION constant as the conversion operation. 208 // It also lets us add new conversions, like B_MAIL_US_ASCII_CONVERSION. 209 210 211 status_t 212 mail_convert_to_utf8(uint32 srcEncoding, const char *src, int32 *srcLen, 213 char *dst, int32 *dstLen, int32 *state, char substitute) 214 { 215 int32 copyAmount; 216 char *originalDst = dst; 217 status_t returnCode = -1; 218 219 if (srcEncoding == B_MAIL_UTF8_CONVERSION) { 220 copyAmount = *srcLen; 221 if (*dstLen < copyAmount) 222 copyAmount = *dstLen; 223 memcpy (dst, src, copyAmount); 224 *srcLen = copyAmount; 225 *dstLen = copyAmount; 226 returnCode = B_OK; 227 } else if (srcEncoding == B_MAIL_US_ASCII_CONVERSION) { 228 int32 i; 229 unsigned char letter; 230 copyAmount = *srcLen; 231 if (*dstLen < copyAmount) 232 copyAmount = *dstLen; 233 for (i = 0; i < copyAmount; i++) { 234 letter = *src++; 235 if (letter > 0x80U) 236 // Invalid, could also use substitute, but better to strip high bit. 237 *dst++ = letter - 0x80U; 238 else if (letter == 0x80U) 239 // Can't convert to 0x00 since that's NUL, which would cause problems. 240 *dst++ = substitute; 241 else 242 *dst++ = letter; 243 } 244 *srcLen = copyAmount; 245 *dstLen = copyAmount; 246 returnCode = B_OK; 247 } else 248 returnCode = convert_to_utf8 (srcEncoding, src, srcLen, 249 dst, dstLen, state, substitute); 250 251 if (returnCode == B_OK) { 252 // Replace spurious NUL bytes, which should normally not be in the 253 // output of the decoding (not normal UTF-8 characters, and no NULs are 254 // in our usual input strings). They happen for some odd ISO-2022-JP 255 // byte pair combinations which are improperly handled by the BeOS 256 // routines. Like "\e$ByD\e(B" where \e is the ESC character $1B, the 257 // first ESC $ B switches to a Japanese character set, then the next 258 // two bytes "yD" specify a character, then ESC ( B switches back to 259 // the ASCII character set. The UTF-8 conversion yields a NUL byte. 260 int32 i; 261 for (i = 0; i < *dstLen; i++) 262 if (originalDst[i] == 0) 263 originalDst[i] = substitute; 264 } 265 return returnCode; 266 } 267 268 269 status_t 270 mail_convert_from_utf8(uint32 dstEncoding, const char *src, int32 *srcLen, 271 char *dst, int32 *dstLen, int32 *state, char substitute) 272 { 273 int32 copyAmount; 274 status_t errorCode; 275 int32 originalDstLen = *dstLen; 276 int32 tempDstLen; 277 int32 tempSrcLen; 278 279 if (dstEncoding == B_MAIL_UTF8_CONVERSION) { 280 copyAmount = *srcLen; 281 if (*dstLen < copyAmount) 282 copyAmount = *dstLen; 283 memcpy (dst, src, copyAmount); 284 *srcLen = copyAmount; 285 *dstLen = copyAmount; 286 return B_OK; 287 } 288 289 if (dstEncoding == B_MAIL_US_ASCII_CONVERSION) { 290 int32 characterLength; 291 int32 dstRemaining = *dstLen; 292 unsigned char letter; 293 int32 srcRemaining = *srcLen; 294 295 // state contains the number of source bytes to skip, left over from a 296 // partial UTF-8 character split over the end of the buffer from last 297 // time. 298 if (srcRemaining <= *state) { 299 *state -= srcRemaining; 300 *dstLen = 0; 301 return B_OK; 302 } 303 srcRemaining -= *state; 304 src += *state; 305 *state = 0; 306 307 while (true) { 308 if (srcRemaining <= 0 || dstRemaining <= 0) 309 break; 310 letter = *src; 311 if (letter < 0x80) 312 characterLength = 1; // Regular ASCII equivalent code. 313 else if (letter < 0xC0) 314 characterLength = 1; // Invalid in-between data byte 10xxxxxx. 315 else if (letter < 0xE0) 316 characterLength = 2; 317 else if (letter < 0xF0) 318 characterLength = 3; 319 else if (letter < 0xF8) 320 characterLength = 4; 321 else if (letter < 0xFC) 322 characterLength = 5; 323 else if (letter < 0xFE) 324 characterLength = 6; 325 else 326 characterLength = 1; // 0xFE and 0xFF are invalid in UTF-8. 327 if (letter < 0x80) 328 *dst++ = *src; 329 else 330 *dst++ = substitute; 331 dstRemaining--; 332 if (srcRemaining < characterLength) { 333 // Character split past the end of the buffer. 334 *state = characterLength - srcRemaining; 335 srcRemaining = 0; 336 } else { 337 src += characterLength; 338 srcRemaining -= characterLength; 339 } 340 } 341 // Update with the amounts used. 342 *srcLen = *srcLen - srcRemaining; 343 *dstLen = *dstLen - dstRemaining; 344 return B_OK; 345 } 346 347 errorCode = convert_from_utf8(dstEncoding, src, srcLen, dst, dstLen, state, 348 substitute); 349 if (errorCode != B_OK) 350 return errorCode; 351 352 if (dstEncoding != B_JIS_CONVERSION) 353 return B_OK; 354 355 // B_JIS_CONVERSION (ISO-2022-JP) works by shifting between different 356 // character subsets. For E-mail headers (and other uses), it needs to be 357 // switched back to ASCII at the end (otherwise the last character gets 358 // lost or other weird things happen in the headers). Note that we can't 359 // just append the escape code since the convert_from_utf8 "state" will be 360 // wrong. So we append an ASCII letter and throw it away, leaving just the 361 // escape code. Well, it actually switches to the Roman character set, not 362 // ASCII, but that should be OK. 363 364 tempDstLen = originalDstLen - *dstLen; 365 if (tempDstLen < 3) // Not enough space remaining in the output. 366 return B_OK; // Sort of an error, but we did convert the rest OK. 367 tempSrcLen = 1; 368 errorCode = convert_from_utf8(dstEncoding, "a", &tempSrcLen, 369 dst + *dstLen, &tempDstLen, state, substitute); 370 if (errorCode != B_OK) 371 return errorCode; 372 *dstLen += tempDstLen - 1 /* don't include the ASCII letter */; 373 return B_OK; 374 } 375 376 377 ssize_t 378 rfc2047_to_utf8(char **bufp, size_t *bufLen, size_t strLen) 379 { 380 char *head, *tail; 381 char *charset, *encoding, *end; 382 ssize_t ret = B_OK; 383 384 if (bufp == NULL || *bufp == NULL) 385 return -1; 386 387 char *string = *bufp; 388 389 //---------Handle *&&^%*&^ non-RFC compliant, 8bit mail 390 if (handle_non_rfc2047_encoding(bufp,bufLen,&strLen)) 391 return strLen; 392 393 // set up string length 394 if (strLen == 0) 395 strLen = strlen(*bufp); 396 char lastChar = (*bufp)[strLen]; 397 (*bufp)[strLen] = '\0'; 398 399 //---------Whew! Now for RFC compliant mail 400 bool encodedWordFoundPreviously = false; 401 for (head = tail = string; 402 ((charset = strstr(tail, "=?")) != NULL) 403 && (((encoding = strchr(charset + 2, '?')) != NULL) 404 && encoding[1] && (encoding[2] == '?') && encoding[3]) 405 && (end = strstr(encoding + 3, "?=")) != NULL; 406 // found "=?...charset...?e?...text...?= (e == encoding) 407 // ^charset ^encoding ^end 408 tail = end) 409 { 410 // Copy non-encoded text (from tail up to charset) to the output. 411 // Ignore spaces between two encoded "words". RFC2047 says the words 412 // should be concatenated without the space (designed for Asian 413 // sentences which have no spaces yet need to be broken into "words" to 414 // keep within the line length limits). 415 bool nonSpaceFound = false; 416 for (int i = 0; i < charset-tail; i++) { 417 if (!isspace (tail[i])) { 418 nonSpaceFound = true; 419 break; 420 } 421 } 422 if (!encodedWordFoundPreviously || nonSpaceFound) { 423 if (string != tail && tail != charset) 424 memmove(string, tail, charset-tail); 425 string += charset-tail; 426 } 427 tail = charset; 428 encodedWordFoundPreviously = true; 429 430 // move things to point at what they should: 431 // =?...charset...?e?...text...?= (e == encoding) 432 // ^charset ^encoding ^end 433 charset += 2; 434 encoding += 1; 435 end += 2; 436 437 // find the charset this text is in now 438 size_t cLen = encoding - 1 - charset; 439 bool base64encoded = toupper(*encoding) == 'B'; 440 441 uint32 convertID = B_MAIL_NULL_CONVERSION; 442 char charsetName[cLen + 1]; 443 memcpy(charsetName, charset, cLen); 444 charsetName[cLen] = '\0'; 445 if (strcasecmp(charsetName, "us-ascii") == 0) { 446 convertID = B_MAIL_US_ASCII_CONVERSION; 447 } else if (strcasecmp(charsetName, "utf-8") == 0) { 448 convertID = B_MAIL_UTF8_CONVERSION; 449 } else { 450 const BCharacterSet* charSet 451 = BCharacterSetRoster::FindCharacterSetByName(charsetName); 452 if (charSet != NULL) { 453 convertID = charSet->GetConversionID(); 454 } 455 } 456 if (convertID == B_MAIL_NULL_CONVERSION) { 457 // unidentified charset 458 // what to do? doing nothing skips the encoded text; 459 // but we should keep it: we copy it to the output. 460 if (string != tail && tail != end) 461 memmove(string, tail, end-tail); 462 string += end-tail; 463 continue; 464 } 465 // else we've successfully identified the charset 466 467 char *src = encoding+2; 468 int32 srcLen = end - 2 - src; 469 // encoded text: src..src+srcLen 470 471 // decode text, get decoded length (reducing xforms) 472 srcLen = !base64encoded ? decode_qp(src, src, srcLen, 1) 473 : decode_base64(src, src, srcLen); 474 475 // allocate space for the converted text 476 int32 dstLen = end-string + *bufLen-strLen; 477 char *dst = (char*)malloc(dstLen); 478 int32 cvLen = srcLen; 479 int32 convState = 0; 480 481 // 482 // do the conversion 483 // 484 ret = mail_convert_to_utf8(convertID, src, &cvLen, dst, &dstLen, 485 &convState); 486 if (ret != B_OK) { 487 // what to do? doing nothing skips the encoded text 488 // but we should keep it: we copy it to the output. 489 490 free(dst); 491 492 if (string != tail && tail != end) 493 memmove(string, tail, end-tail); 494 string += end-tail; 495 continue; 496 } 497 /* convert_to_ is either returning something wrong or my 498 test data is screwed up. Whatever it is, Not Enough 499 Space is not the only cause of the below, so we just 500 assume it succeeds if it converts anything at all. 501 else if (cvLen < srcLen) 502 { 503 // not enough room to convert the data; 504 // grow *buf and retry 505 506 free(dst); 507 508 char *temp = (char*)realloc(*bufp, 2*(*bufLen + 1)); 509 if (temp == NULL) 510 { 511 ret = B_NO_MEMORY; 512 break; 513 } 514 515 *bufp = temp; 516 *bufLen = 2*(*bufLen + 1); 517 518 string = *bufp + (string-head); 519 tail = *bufp + (tail-head); 520 charset = *bufp + (charset-head); 521 encoding = *bufp + (encoding-head); 522 end = *bufp + (end-head); 523 src = *bufp + (src-head); 524 head = *bufp; 525 continue; 526 } 527 */ 528 else { 529 if (dstLen > end-string) { 530 // copy the string forward... 531 memmove(string+dstLen, end, strLen - (end-head) + 1); 532 strLen += string+dstLen - end; 533 end = string + dstLen; 534 } 535 536 memcpy(string, dst, dstLen); 537 string += dstLen; 538 free(dst); 539 continue; 540 } 541 } 542 543 // copy everything that's left 544 size_t tailLen = strLen - (tail - head); 545 memmove(string, tail, tailLen+1); 546 string += tailLen; 547 548 // replace the last char 549 (*bufp)[strLen] = lastChar; 550 551 return ret < B_OK ? ret : string-head; 552 } 553 554 555 ssize_t 556 utf8_to_rfc2047 (char **bufp, ssize_t length, uint32 charset, char encoding) 557 { 558 struct word { 559 BString originalWord; 560 BString convertedWord; 561 bool needsEncoding; 562 563 // Convert the word from UTF-8 to the desired character set. The 564 // converted version also includes the escape codes to return to ASCII 565 // mode, if relevant. Also note if it uses unprintable characters, 566 // which means it will need that special encoding treatment later. 567 void ConvertWordToCharset (uint32 charset) { 568 int32 state = 0; 569 int32 originalLength = originalWord.Length(); 570 int32 convertedLength = originalLength * 5 + 1; 571 char *convertedBuffer = convertedWord.LockBuffer (convertedLength); 572 mail_convert_from_utf8 (charset, originalWord.String(), 573 &originalLength, convertedBuffer, &convertedLength, &state); 574 for (int i = 0; i < convertedLength; i++) { 575 if ((convertedBuffer[i] & (1 << 7)) || 576 (convertedBuffer[i] >= 0 && convertedBuffer[i] < 32)) { 577 needsEncoding = true; 578 break; 579 } 580 } 581 convertedWord.UnlockBuffer (convertedLength); 582 }; 583 }; 584 struct word *currentWord; 585 BList words; 586 587 // Break the header into words. White space characters (including tabs and 588 // newlines) separate the words. Each word includes any space before it as 589 // part of the word. Actually, quotes and other special characters 590 // (",()<>@) are treated as separate words of their own so that they don't 591 // get encoded (because MIME headers get the quotes parsed before character 592 // set unconversion is done). The reader is supposed to ignore all white 593 // space between encoded words, which can be inserted so that older mail 594 // parsers don't have overly long line length problems. 595 596 const char *source = *bufp; 597 const char *bufEnd = *bufp + length; 598 const char *specialChars = "\"()<>@,"; 599 600 while (source < bufEnd) { 601 currentWord = new struct word; 602 currentWord->needsEncoding = false; 603 604 int wordEnd = 0; 605 606 // Include leading spaces as part of the word. 607 while (source + wordEnd < bufEnd && isspace (source[wordEnd])) 608 wordEnd++; 609 610 if (source + wordEnd < bufEnd && 611 strchr (specialChars, source[wordEnd]) != NULL) { 612 // Got a quote mark or other special character, which is treated as 613 // a word in itself since it shouldn't be encoded, which would hide 614 // it from the mail system. 615 wordEnd++; 616 } else { 617 // Find the end of the word. Leave wordEnd pointing just after the 618 // last character in the word. 619 while (source + wordEnd < bufEnd) { 620 if (isspace(source[wordEnd]) || 621 strchr (specialChars, source[wordEnd]) != NULL) 622 break; 623 if (wordEnd > 51 /* Makes Base64 ISO-2022-JP "word" a multiple of 4 bytes */ && 624 0xC0 == (0xC0 & (unsigned int) source[wordEnd])) { 625 // No English words are that long (46 is the longest), 626 // break up what is likely Asian text (which has no spaces) 627 // at the start of the next non-ASCII UTF-8 character (high 628 // two bits are both ones). Note that two encoded words in 629 // a row get joined together, even if there is a space 630 // between them in the final output text, according to the 631 // standard. Next word will also be conveniently get 632 // encoded due to the 0xC0 test. 633 currentWord->needsEncoding = true; 634 break; 635 } 636 wordEnd++; 637 } 638 } 639 currentWord->originalWord.SetTo (source, wordEnd); 640 currentWord->ConvertWordToCharset (charset); 641 words.AddItem(currentWord); 642 source += wordEnd; 643 } 644 645 // Combine adjacent words which contain unprintable text so that the 646 // overhead of switching back and forth between regular text and specially 647 // encoded text is reduced. However, the combined word must be shorter 648 // than the maximum of 75 bytes, including character set specification and 649 // all those delimiters (worst case 22 bytes of overhead). 650 651 struct word *run; 652 653 for (int32 i = 0; (currentWord = (struct word *) words.ItemAt (i)) != NULL; i++) { 654 if (!currentWord->needsEncoding) 655 continue; // No need to combine unencoded words. 656 for (int32 g = i+1; (run = (struct word *) words.ItemAt (g)) != NULL; g++) { 657 if (!run->needsEncoding) 658 break; // Don't want to combine encoded and unencoded words. 659 if ((currentWord->convertedWord.Length() + run->convertedWord.Length() <= 53)) { 660 currentWord->originalWord.Append (run->originalWord); 661 currentWord->ConvertWordToCharset (charset); 662 words.RemoveItem(g); 663 delete run; 664 g--; 665 } else // Can't merge this word, result would be too long. 666 break; 667 } 668 } 669 670 // Combine the encoded and unencoded words into one line, doing the 671 // quoted-printable or base64 encoding. Insert an extra space between 672 // words which are both encoded to make word wrapping easier, since there 673 // is normally none, and you're allowed to insert space (the receiver 674 // throws it away if it is between encoded words). 675 676 BString rfc2047; 677 bool previousWordNeededEncoding = false; 678 679 const char *charset_dec = "none-bug"; 680 for (int32 i = 0; mail_charsets[i].charset != NULL; i++) { 681 if (mail_charsets[i].flavor == charset) { 682 charset_dec = mail_charsets[i].charset; 683 break; 684 } 685 } 686 687 while ((currentWord = (struct word *)words.RemoveItem((int32)0)) != NULL) { 688 if ((encoding != quoted_printable && encoding != base64) || 689 !currentWord->needsEncoding) { 690 rfc2047.Append (currentWord->convertedWord); 691 } else { 692 // This word needs encoding. Try to insert a space between it and 693 // the previous word. 694 if (previousWordNeededEncoding) 695 rfc2047 << ' '; // Can insert as many spaces as you want between encoded words. 696 else { 697 // Previous word is not encoded, spaces are significant. Try 698 // to move a space from the start of this word to be outside of 699 // the encoded text, so that there is a bit of space between 700 // this word and the previous one to enhance word wrapping 701 // chances later on. 702 if (currentWord->originalWord.Length() > 1 && 703 isspace (currentWord->originalWord[0])) { 704 rfc2047 << currentWord->originalWord[0]; 705 currentWord->originalWord.Remove (0 /* offset */, 1 /* length */); 706 currentWord->ConvertWordToCharset (charset); 707 } 708 } 709 710 char *encoded = NULL; 711 ssize_t encoded_len = 0; 712 int32 convertedLength = currentWord->convertedWord.Length (); 713 const char *convertedBuffer = currentWord->convertedWord.String (); 714 715 switch (encoding) { 716 case quoted_printable: 717 encoded = (char *) malloc (convertedLength * 3); 718 encoded_len = encode_qp (encoded, convertedBuffer, convertedLength, true /* headerMode */); 719 break; 720 case base64: 721 encoded = (char *) malloc (convertedLength * 2); 722 encoded_len = encode_base64 (encoded, convertedBuffer, convertedLength, true /* headerMode */); 723 break; 724 default: // Unknown encoding type, shouldn't happen. 725 encoded = (char *) convertedBuffer; 726 encoded_len = convertedLength; 727 break; 728 } 729 730 rfc2047 << "=?" << charset_dec << '?' << encoding << '?'; 731 rfc2047.Append (encoded, encoded_len); 732 rfc2047 << "?="; 733 734 if (encoding == quoted_printable || encoding == base64) 735 free(encoded); 736 } 737 previousWordNeededEncoding = currentWord->needsEncoding; 738 delete currentWord; 739 } 740 741 free(*bufp); 742 743 ssize_t finalLength = rfc2047.Length (); 744 *bufp = (char *) (malloc (finalLength + 1)); 745 memcpy (*bufp, rfc2047.String(), finalLength); 746 (*bufp)[finalLength] = 0; 747 748 return finalLength; 749 } 750 751 752 void 753 FoldLineAtWhiteSpaceAndAddCRLF(BString &string) 754 { 755 int inputLength = string.Length(); 756 int lineStartIndex; 757 const int maxLineLength = 78; // Doesn't include CRLF. 758 BString output; 759 int splitIndex; 760 int tempIndex; 761 762 lineStartIndex = 0; 763 while (true) { 764 // If we don't need to wrap the text, just output the remainder, if any. 765 766 if (lineStartIndex + maxLineLength >= inputLength) { 767 if (lineStartIndex < inputLength) { 768 output.Insert (string, lineStartIndex /* source offset */, 769 inputLength - lineStartIndex /* count */, 770 output.Length() /* insert at */); 771 output.Append (CRLF); 772 } 773 break; 774 } 775 776 // Look ahead for a convenient spot to split it, between a comma and 777 // space, which you often see between e-mail addresses like this: 778 // "Joe Who" joe@dot.com, "Someone Else" else@blot.com 779 780 tempIndex = lineStartIndex + maxLineLength; 781 if (tempIndex > inputLength) 782 tempIndex = inputLength; 783 splitIndex = string.FindLast (", ", tempIndex); 784 if (splitIndex >= lineStartIndex) 785 splitIndex++; // Point to the space character. 786 787 // If none of those exist, try splitting at any white space. 788 789 if (splitIndex <= lineStartIndex) 790 splitIndex = string.FindLast (" ", tempIndex); 791 if (splitIndex <= lineStartIndex) 792 splitIndex = string.FindLast ("\t", tempIndex); 793 794 // If none of those exist, allow for a longer word - split at the next 795 // available white space. 796 797 if (splitIndex <= lineStartIndex) 798 splitIndex = string.FindFirst (" ", lineStartIndex + 1); 799 if (splitIndex <= lineStartIndex) 800 splitIndex = string.FindFirst ("\t", lineStartIndex + 1); 801 802 // Give up, the whole rest of the line can't be split, just dump it 803 // out. 804 805 if (splitIndex <= lineStartIndex) { 806 if (lineStartIndex < inputLength) { 807 output.Insert (string, lineStartIndex /* source offset */, 808 inputLength - lineStartIndex /* count */, 809 output.Length() /* insert at */); 810 output.Append (CRLF); 811 } 812 break; 813 } 814 815 // Do the split. The current line up to but not including the space 816 // gets output, followed by a CRLF. The space remains to become the 817 // start of the next line (and that tells the message reader that it is 818 // a continuation line). 819 820 output.Insert (string, lineStartIndex /* source offset */, 821 splitIndex - lineStartIndex /* count */, 822 output.Length() /* insert at */); 823 output.Append (CRLF); 824 lineStartIndex = splitIndex; 825 } 826 string.SetTo (output); 827 } 828 829 830 ssize_t 831 readfoldedline(FILE *file, char **buffer, size_t *buflen) 832 { 833 ssize_t len = buflen && *buflen ? *buflen : 0; 834 char * buf = buffer && *buffer ? *buffer : NULL; 835 ssize_t cnt = 0; // Number of characters currently in the buffer. 836 int c; 837 838 while (true) { 839 // Make sure there is space in the buffer for two more characters (one 840 // for the next character, and one for the end of string NUL byte). 841 if (buf == NULL || cnt + 2 >= len) { 842 char *temp = (char *)realloc(buf, len + 64); 843 if (temp == NULL) { 844 // Out of memory, however existing buffer remains allocated. 845 cnt = ENOMEM; 846 break; 847 } 848 len += 64; 849 buf = temp; 850 } 851 852 // Read the next character, or end of file, or IO error. 853 if ((c = fgetc(file)) == EOF) { 854 if (ferror (file)) { 855 cnt = errno; 856 if (cnt >= 0) 857 cnt = -1; // Error codes must be negative. 858 } else { 859 // Really is end of file. Also make it end of line if there is 860 // some text already read in. If the first thing read was EOF, 861 // just return an empty string. 862 if (cnt > 0) { 863 buf[cnt++] = '\n'; 864 if (buf[cnt-2] == '\r') { 865 buf[cnt-2] = '\n'; 866 --cnt; 867 } 868 } 869 } 870 break; 871 } 872 873 buf[cnt++] = c; 874 875 if (c == '\n') { 876 // Convert CRLF end of line to just a LF. Do it before folding, in 877 // case we don't need to fold. 878 if (cnt >= 2 && buf[cnt-2] == '\r') { 879 buf[cnt-2] = '\n'; 880 --cnt; 881 } 882 // If the current line is empty then return it (so that empty lines 883 // don't disappear if the next line starts with a space). 884 if (cnt <= 1) 885 break; 886 // Fold if first character on the next line is whitespace. 887 c = fgetc(file); // Note it's OK to read EOF and ungetc it too. 888 if (c == ' ' || c == '\t') 889 buf[cnt-1] = c; // Replace \n with the white space character. 890 else { 891 // Not folding, we finished reading a line; break out of the loop 892 ungetc(c,file); 893 break; 894 } 895 } 896 } 897 898 if (buf != NULL && cnt >= 0) 899 buf[cnt] = '\0'; 900 901 if (buffer) 902 *buffer = buf; 903 else if (buf) 904 free(buf); 905 906 if (buflen) 907 *buflen = len; 908 909 return cnt; 910 } 911 912 913 ssize_t 914 readfoldedline(BPositionIO &in, char **buffer, size_t *buflen) 915 { 916 ssize_t len = buflen && *buflen ? *buflen : 0; 917 char * buf = buffer && *buffer ? *buffer : NULL; 918 ssize_t cnt = 0; // Number of characters currently in the buffer. 919 char c; 920 status_t errorCode; 921 922 while (true) { 923 // Make sure there is space in the buffer for two more characters (one 924 // for the next character, and one for the end of string NUL byte). 925 if (buf == NULL || cnt + 2 >= len) { 926 char *temp = (char *)realloc(buf, len + 64); 927 if (temp == NULL) { 928 // Out of memory, however existing buffer remains allocated. 929 cnt = ENOMEM; 930 break; 931 } 932 len += 64; 933 buf = temp; 934 } 935 936 errorCode = in.Read (&c,1); // A really slow way of reading - unbuffered. 937 if (errorCode != 1) { 938 if (errorCode < 0) { 939 cnt = errorCode; // IO error encountered, just return the code. 940 } else { 941 // Really is end of file. Also make it end of line if there is 942 // some text already read in. If the first thing read was EOF, 943 // just return an empty string. 944 if (cnt > 0) { 945 buf[cnt++] = '\n'; 946 if (buf[cnt-2] == '\r') { 947 buf[cnt-2] = '\n'; 948 --cnt; 949 } 950 } 951 } 952 break; 953 } 954 955 buf[cnt++] = c; 956 957 if (c == '\n') { 958 // Convert CRLF end of line to just a LF. Do it before folding, in 959 // case we don't need to fold. 960 if (cnt >= 2 && buf[cnt-2] == '\r') { 961 buf[cnt-2] = '\n'; 962 --cnt; 963 } 964 // If the current line is empty then return it (so that empty lines 965 // don't disappear if the next line starts with a space). 966 if (cnt <= 1) 967 break; 968 // if first character on the next line is whitespace, fold lines 969 errorCode = in.Read(&c,1); 970 if (errorCode == 1) { 971 if (c == ' ' || c == '\t') 972 buf[cnt-1] = c; // Replace \n with the white space character. 973 else { 974 // Not folding, we finished reading a whole line. 975 in.Seek(-1,SEEK_CUR); // Undo the look-ahead character read. 976 break; 977 } 978 } else if (errorCode < 0) { 979 cnt = errorCode; 980 break; 981 } else // No next line; at the end of the file. Return the line. 982 break; 983 } 984 } 985 986 if (buf != NULL && cnt >= 0) 987 buf[cnt] = '\0'; 988 989 if (buffer) 990 *buffer = buf; 991 else if (buf) 992 free(buf); 993 994 if (buflen) 995 *buflen = len; 996 997 return cnt; 998 } 999 1000 1001 ssize_t 1002 nextfoldedline(const char** header, char **buffer, size_t *buflen) 1003 { 1004 ssize_t len = buflen && *buflen ? *buflen : 0; 1005 char * buf = buffer && *buffer ? *buffer : NULL; 1006 ssize_t cnt = 0; // Number of characters currently in the buffer. 1007 char c; 1008 1009 while (true) 1010 { 1011 // Make sure there is space in the buffer for two more characters (one 1012 // for the next character, and one for the end of string NUL byte). 1013 if (buf == NULL || cnt + 2 >= len) 1014 { 1015 char *temp = (char *)realloc(buf, len + 64); 1016 if (temp == NULL) { 1017 // Out of memory, however existing buffer remains allocated. 1018 cnt = ENOMEM; 1019 break; 1020 } 1021 len += 64; 1022 buf = temp; 1023 } 1024 1025 // Read the next character, or end of file. 1026 if ((c = *(*header)++) == 0) { 1027 // End of file. Also make it end of line if there is some text 1028 // already read in. If the first thing read was EOF, just return 1029 // an empty string. 1030 if (cnt > 0) { 1031 buf[cnt++] = '\n'; 1032 if (buf[cnt-2] == '\r') { 1033 buf[cnt-2] = '\n'; 1034 --cnt; 1035 } 1036 } 1037 break; 1038 } 1039 1040 buf[cnt++] = c; 1041 1042 if (c == '\n') { 1043 // Convert CRLF end of line to just a LF. Do it before folding, in 1044 // case we don't need to fold. 1045 if (cnt >= 2 && buf[cnt-2] == '\r') { 1046 buf[cnt-2] = '\n'; 1047 --cnt; 1048 } 1049 // If the current line is empty then return it (so that empty lines 1050 // don't disappear if the next line starts with a space). 1051 if (cnt <= 1) 1052 break; 1053 // if first character on the next line is whitespace, fold lines 1054 c = *(*header)++; 1055 if (c == ' ' || c == '\t') 1056 buf[cnt-1] = c; // Replace \n with the white space character. 1057 else { 1058 // Not folding, we finished reading a line; break out of the loop 1059 (*header)--; // Undo read of the non-whitespace. 1060 break; 1061 } 1062 } 1063 } 1064 1065 1066 if (buf != NULL && cnt >= 0) 1067 buf[cnt] = '\0'; 1068 1069 if (buffer) 1070 *buffer = buf; 1071 else if (buf) 1072 free(buf); 1073 1074 if (buflen) 1075 *buflen = len; 1076 1077 return cnt; 1078 } 1079 1080 1081 void 1082 trim_white_space(BString &string) 1083 { 1084 int32 i; 1085 int32 length = string.Length(); 1086 char *buffer = string.LockBuffer(length + 1); 1087 1088 while (length > 0 && isspace(buffer[length - 1])) 1089 length--; 1090 buffer[length] = '\0'; 1091 1092 for (i = 0; buffer[i] && isspace(buffer[i]); i++) {} 1093 if (i != 0) { 1094 length -= i; 1095 memmove(buffer,buffer + i,length + 1); 1096 } 1097 string.UnlockBuffer(length); 1098 } 1099 1100 1101 /*! Tries to return a human-readable name from the specified 1102 header parameter (should be from "To:" or "From:"). 1103 Tries to return the name rather than the eMail address. 1104 */ 1105 void 1106 extract_address_name(BString &header) 1107 { 1108 BString name; 1109 const char *start = header.String(); 1110 const char *stop = start + strlen (start); 1111 1112 // Find a string S in the header (email foo) that matches: 1113 // Old style name in brackets: foo@bar.com (S) 1114 // New style quotes: "S" <foo@bar.com> 1115 // New style no quotes if nothing else found: S <foo@bar.com> 1116 // If nothing else found then use the whole thing: S 1117 1118 for (int i = 0; i <= 3; i++) { 1119 // Set p1 to the first letter in the name and p2 to just past the last 1120 // letter in the name. p2 stays NULL if a name wasn't found in this 1121 // pass. 1122 const char *p1 = NULL, *p2 = NULL; 1123 1124 switch (i) { 1125 case 0: // foo@bar.com (S) 1126 if ((p1 = strchr(start,'(')) != NULL) { 1127 p1++; // Advance to first letter in the name. 1128 size_t nest = 1; // Handle nested brackets. 1129 for (p2 = p1; p2 < stop; ++p2) 1130 { 1131 if (*p2 == ')') 1132 --nest; 1133 else if (*p2 == '(') 1134 ++nest; 1135 if (nest <= 0) 1136 break; 1137 } 1138 if (nest != 0) 1139 p2 = NULL; // False alarm, no terminating bracket. 1140 } 1141 break; 1142 case 1: // "S" <foo@bar.com> 1143 if ((p1 = strchr(start, '\"')) != NULL) 1144 p2 = strchr(++p1, '\"'); 1145 break; 1146 case 2: // S <foo@bar.com> 1147 p1 = start; 1148 if (name.Length() == 0) 1149 p2 = strchr(start, '<'); 1150 break; 1151 case 3: // S 1152 p1 = start; 1153 if (name.Length() == 0) 1154 p2 = stop; 1155 break; 1156 } 1157 1158 // Remove leading and trailing space-like characters and save the 1159 // result if it is longer than any other likely names found. 1160 if (p2 != NULL) { 1161 while (p1 < p2 && (isspace (*p1))) 1162 ++p1; 1163 1164 while (p1 < p2 && (isspace (p2[-1]))) 1165 --p2; 1166 1167 int newLength = p2 - p1; 1168 if (name.Length() < newLength) 1169 name.SetTo(p1, newLength); 1170 } 1171 } 1172 1173 int32 lessIndex = name.FindFirst('<'); 1174 int32 greaterIndex = name.FindLast('>'); 1175 1176 if (lessIndex == 0) { 1177 // Have an address of the form <address> and nothing else, so remove 1178 // the greater and less than signs, if any. 1179 if (greaterIndex > 0) 1180 name.Remove(greaterIndex, 1); 1181 name.Remove(lessIndex, 1); 1182 } else if (lessIndex > 0 && lessIndex < greaterIndex) { 1183 // Yahoo stupidly inserts the e-mail address into the name string, so 1184 // this bit of code fixes: "Joe <joe@yahoo.com>" <joe@yahoo.com> 1185 name.Remove(lessIndex, greaterIndex - lessIndex + 1); 1186 } 1187 1188 trim_white_space(name); 1189 header = name; 1190 } 1191 1192 1193 /*! Given a subject in a BString, remove the extraneous RE: re: and other stuff 1194 to get down to the core subject string, which should be identical for all 1195 messages posted about a topic. The input string is modified in place to 1196 become the output core subject string. 1197 */ 1198 void 1199 SubjectToThread (BString &string) 1200 { 1201 // a regex that matches a non-ASCII UTF8 character: 1202 #define U8C \ 1203 "[\302-\337][\200-\277]" \ 1204 "|\340[\302-\337][\200-\277]" \ 1205 "|[\341-\357][\200-\277][\200-\277]" \ 1206 "|\360[\220-\277][\200-\277][\200-\277]" \ 1207 "|[\361-\367][\200-\277][\200-\277][\200-\277]" \ 1208 "|\370[\210-\277][\200-\277][\200-\277][\200-\277]" \ 1209 "|[\371-\373][\200-\277][\200-\277][\200-\277][\200-\277]" \ 1210 "|\374[\204-\277][\200-\277][\200-\277][\200-\277][\200-\277]" \ 1211 "|\375[\200-\277][\200-\277][\200-\277][\200-\277][\200-\277]" 1212 1213 #define PATTERN \ 1214 "^ +" \ 1215 "|^(\\[[^]]*\\])(\\<| +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \ 1216 "|^( +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \ 1217 "| *\\(fwd\\) *$" 1218 1219 if (gRebuf == NULL && atomic_add(&gLocker, 1) == 0) { 1220 // the idea is to compile the regexp once to speed up testing 1221 1222 for (int i=0; i<256; ++i) gTranslation[i]=i; 1223 for (int i='a'; i<='z'; ++i) gTranslation[i]=toupper(i); 1224 1225 gRe.translate = gTranslation; 1226 gRe.regs_allocated = REGS_FIXED; 1227 re_syntax_options = RE_SYNTAX_POSIX_EXTENDED; 1228 1229 const char *pattern = PATTERN; 1230 // count subexpressions in PATTERN 1231 for (unsigned int i=0; pattern[i] != 0; ++i) 1232 { 1233 if (pattern[i] == '\\') 1234 ++i; 1235 else if (pattern[i] == '(') 1236 ++gNsub; 1237 } 1238 1239 const char *err = re_compile_pattern(pattern,strlen(pattern),&gRe); 1240 if (err == NULL) 1241 gRebuf = &gRe; 1242 else 1243 fprintf(stderr, "Failed to compile the regex: %s\n", err); 1244 } else { 1245 int32 tries = 200; 1246 while (gRebuf == NULL && tries-- > 0) 1247 snooze(10000); 1248 } 1249 1250 if (gRebuf) { 1251 struct re_registers regs; 1252 // can't be static if this function is to be thread-safe 1253 1254 regs.num_regs = gNsub; 1255 regs.start = (regoff_t*)malloc(gNsub*sizeof(regoff_t)); 1256 regs.end = (regoff_t*)malloc(gNsub*sizeof(regoff_t)); 1257 1258 for (int start = 0; (start = re_search(gRebuf, string.String(), 1259 string.Length(), 0, string.Length(), ®s)) >= 0;) { 1260 // 1261 // we found something 1262 // 1263 1264 // don't delete [bemaildaemon]... 1265 if (start == regs.start[1]) 1266 start = regs.start[2]; 1267 1268 string.Remove(start,regs.end[0]-start); 1269 if (start) 1270 string.Insert(' ',1,start); 1271 1272 // TODO: for some subjects this results in an endless loop, check 1273 // why this happen. 1274 if (regs.end[0] - start <= 1) 1275 break; 1276 } 1277 1278 free(regs.start); 1279 free(regs.end); 1280 } 1281 1282 // Finally remove leading and trailing space. Some software, like 1283 // tm-edit 1.8, appends a space to the subject, which would break 1284 // threading if we left it in. 1285 trim_white_space(string); 1286 } 1287 1288 1289 /*! Converts a date to a time. Handles numeric time zones too, unlike 1290 parsedate(). Returns -1 if it fails. 1291 */ 1292 time_t 1293 ParseDateWithTimeZone(const char *DateString) 1294 { 1295 time_t currentTime; 1296 time_t dateAsTime; 1297 char tempDateString[80]; 1298 char tempZoneString[6]; 1299 time_t zoneDeltaTime; 1300 int zoneIndex; 1301 char *zonePntr; 1302 1303 // See if we can remove the time zone portion. parsedate understands time 1304 // zone 3 letter names, but doesn't understand the numeric +9999 time zone 1305 // format. To do: see if a newer parsedate exists. 1306 1307 strncpy (tempDateString, DateString, sizeof (tempDateString)); 1308 tempDateString[sizeof (tempDateString) - 1] = 0; 1309 1310 // Remove trailing spaces. 1311 zonePntr = tempDateString + strlen (tempDateString) - 1; 1312 while (zonePntr >= tempDateString && isspace (*zonePntr)) 1313 *zonePntr-- = 0; 1314 if (zonePntr < tempDateString) 1315 return -1; // Empty string. 1316 1317 // Remove the trailing time zone in round brackets, like in 1318 // Fri, 22 Feb 2002 15:22:42 EST (-0500) 1319 // Thu, 25 Apr 1996 11:44:19 -0400 (EDT) 1320 if (tempDateString[strlen(tempDateString)-1] == ')') 1321 { 1322 zonePntr = strrchr (tempDateString, '('); 1323 if (zonePntr != NULL) 1324 { 1325 *zonePntr-- = 0; // Zap the '(', then remove trailing spaces. 1326 while (zonePntr >= tempDateString && isspace (*zonePntr)) 1327 *zonePntr-- = 0; 1328 if (zonePntr < tempDateString) 1329 return -1; // Empty string. 1330 } 1331 } 1332 1333 // Look for a numeric time zone like Tue, 30 Dec 2003 05:01:40 +0000 1334 for (zoneIndex = strlen (tempDateString); zoneIndex >= 0; zoneIndex--) 1335 { 1336 zonePntr = tempDateString + zoneIndex; 1337 if (zonePntr[0] == '+' || zonePntr[0] == '-') 1338 { 1339 if (zonePntr[1] >= '0' && zonePntr[1] <= '9' && 1340 zonePntr[2] >= '0' && zonePntr[2] <= '9' && 1341 zonePntr[3] >= '0' && zonePntr[3] <= '9' && 1342 zonePntr[4] >= '0' && zonePntr[4] <= '9') 1343 break; 1344 } 1345 } 1346 if (zoneIndex >= 0) 1347 { 1348 // Remove the zone from the date string and any following time zone 1349 // letter codes. Also put in GMT so that the date gets parsed as GMT. 1350 memcpy (tempZoneString, zonePntr, 5); 1351 tempZoneString [5] = 0; 1352 strcpy (zonePntr, "GMT"); 1353 } 1354 else // No numeric time zone found. 1355 strcpy (tempZoneString, "+0000"); 1356 1357 time (¤tTime); 1358 dateAsTime = parsedate (tempDateString, currentTime); 1359 if (dateAsTime == (time_t) -1) 1360 return -1; // Failure. 1361 1362 zoneDeltaTime = 60 * atol (tempZoneString + 3); // Get the last two digits - minutes. 1363 tempZoneString[3] = 0; 1364 zoneDeltaTime += atol (tempZoneString + 1) * 60 * 60; // Get the first two digits - hours. 1365 if (tempZoneString[0] == '+') 1366 zoneDeltaTime = 0 - zoneDeltaTime; 1367 dateAsTime += zoneDeltaTime; 1368 1369 return dateAsTime; 1370 } 1371 1372 1373 /*! Parses a mail header and fills the headers BMessage 1374 */ 1375 status_t 1376 parse_header(BMessage &headers, BPositionIO &input) 1377 { 1378 char *buffer = NULL; 1379 size_t bufferSize = 0; 1380 int32 length; 1381 1382 while ((length = readfoldedline(input, &buffer, &bufferSize)) >= 2) { 1383 --length; 1384 // Don't include the \n at the end of the buffer. 1385 1386 // convert to UTF-8 and null-terminate the buffer 1387 length = rfc2047_to_utf8(&buffer, &bufferSize, length); 1388 buffer[length] = '\0'; 1389 1390 const char *delimiter = strstr(buffer, ":"); 1391 if (delimiter == NULL) 1392 continue; 1393 1394 BString header(buffer, delimiter - buffer); 1395 header.CapitalizeEachWord(); 1396 // unified case for later fetch 1397 1398 delimiter++; // Skip the colon. 1399 // Skip over leading white space and tabs. 1400 // TODO: (comments in brackets). 1401 while (isspace(*delimiter)) 1402 delimiter++; 1403 1404 // TODO: implement joining of multiple header tags (i.e. multiple "Cc:"s) 1405 headers.AddString(header.String(), delimiter); 1406 } 1407 free(buffer); 1408 1409 return B_OK; 1410 } 1411 1412 1413 status_t 1414 extract_from_header(const BString& header, const BString& field, 1415 BString& target) 1416 { 1417 int32 headerLength = header.Length(); 1418 int32 fieldEndPos = 0; 1419 while (true) { 1420 int32 pos = header.IFindFirst(field, fieldEndPos); 1421 if (pos < 0) 1422 return B_BAD_VALUE; 1423 fieldEndPos = pos + field.Length(); 1424 1425 if (pos != 0 && header.ByteAt(pos - 1) != '\n') 1426 continue; 1427 if (header.ByteAt(fieldEndPos) == ':') 1428 break; 1429 } 1430 fieldEndPos++; 1431 1432 int32 crPos = fieldEndPos; 1433 while (true) { 1434 fieldEndPos = crPos; 1435 crPos = header.FindFirst('\n', crPos); 1436 if (crPos < 0) 1437 crPos = headerLength; 1438 BString temp; 1439 header.CopyInto(temp, fieldEndPos, crPos - fieldEndPos); 1440 if (header.ByteAt(crPos - 1) == '\r') { 1441 temp.Truncate(temp.Length() - 1); 1442 temp += " "; 1443 } 1444 target += temp; 1445 crPos++; 1446 if (crPos >= headerLength) 1447 break; 1448 char nextByte = header.ByteAt(crPos); 1449 if (nextByte != ' ' && nextByte != '\t') 1450 break; 1451 crPos++; 1452 } 1453 1454 size_t bufferSize = target.Length(); 1455 char* buffer = target.LockBuffer(bufferSize); 1456 size_t length = rfc2047_to_utf8(&buffer, &bufferSize, bufferSize); 1457 target.UnlockBuffer(length); 1458 1459 trim_white_space(target); 1460 1461 return B_OK; 1462 } 1463 1464 1465 void 1466 extract_address(BString &address) 1467 { 1468 const char *string = address.String(); 1469 int32 first; 1470 1471 // first, remove all quoted text 1472 1473 if ((first = address.FindFirst('"')) >= 0) { 1474 int32 last = first + 1; 1475 while (string[last] && string[last] != '"') 1476 last++; 1477 1478 if (string[last] == '"') 1479 address.Remove(first, last + 1 - first); 1480 } 1481 1482 // try to extract the address now 1483 1484 if ((first = address.FindFirst('<')) >= 0) { 1485 // the world likes us and we can just get the address the easy way... 1486 int32 last = address.FindFirst('>'); 1487 if (last >= 0) { 1488 address.Truncate(last); 1489 address.Remove(0, first + 1); 1490 1491 return; 1492 } 1493 } 1494 1495 // then, see if there is anything in parenthesis to throw away 1496 1497 if ((first = address.FindFirst('(')) >= 0) { 1498 int32 last = first + 1; 1499 while (string[last] && string[last] != ')') 1500 last++; 1501 1502 if (string[last] == ')') 1503 address.Remove(first, last + 1 - first); 1504 } 1505 1506 // now, there shouldn't be much else left 1507 1508 trim_white_space(address); 1509 } 1510 1511 1512 void 1513 get_address_list(BList &list, const char *string, 1514 void (*cleanupFunc)(BString &)) 1515 { 1516 if (string == NULL || !string[0]) 1517 return; 1518 1519 const char *start = string; 1520 1521 while (true) { 1522 if (string[0] == '"') { 1523 const char *quoteEnd = ++string; 1524 1525 while (quoteEnd[0] && quoteEnd[0] != '"') 1526 quoteEnd++; 1527 1528 if (!quoteEnd[0]) // string exceeds line! 1529 quoteEnd = string; 1530 1531 string = quoteEnd + 1; 1532 } 1533 1534 if (string[0] == ',' || string[0] == '\0') { 1535 BString address(start, string - start); 1536 trim_white_space(address); 1537 1538 if (cleanupFunc) 1539 cleanupFunc(address); 1540 1541 list.AddItem(strdup(address.String())); 1542 1543 start = string + 1; 1544 } 1545 1546 if (!string[0]) 1547 break; 1548 1549 string++; 1550 } 1551 } 1552 1553 1554 status_t 1555 CopyMailFolderAttributes(const char* targetPath) 1556 { 1557 BPath path; 1558 status_t status = find_directory(B_USER_SETTINGS_DIRECTORY, &path); 1559 if (status != B_OK) 1560 return status; 1561 1562 path.Append("Tracker"); 1563 path.Append("DefaultQueryTemplates"); 1564 path.Append("text_x-email"); 1565 1566 BNode source(path.Path()); 1567 BNode target(targetPath); 1568 return BPrivate::CopyAttributes(source, target); 1569 } 1570