1 /* 2 * Copyright 2011-2016, Haiku, Inc. All rights reserved. 3 * Copyright 2001-2003 Dr. Zoidberg Enterprises. All rights reserved. 4 */ 5 6 7 #include <mail_util.h> 8 9 #include <stdlib.h> 10 #include <strings.h> 11 #include <stdio.h> 12 #define __USE_GNU 13 #include <regex.h> 14 #include <ctype.h> 15 #include <errno.h> 16 17 #include <FindDirectory.h> 18 #include <List.h> 19 #include <Locker.h> 20 #include <parsedate.h> 21 #include <Path.h> 22 #include <String.h> 23 #include <UTF8.h> 24 25 #include <mail_encoding.h> 26 27 #include <AttributeUtilities.h> 28 #include <CharacterSet.h> 29 #include <CharacterSetRoster.h> 30 31 32 using namespace BPrivate; 33 34 35 #define CRLF "\r\n" 36 37 struct CharsetConversionEntry { 38 const char *charset; 39 uint32 flavor; 40 }; 41 42 extern const CharsetConversionEntry mail_charsets[] = { 43 // In order of authority, so when searching for the name for a particular 44 // numbered conversion, start at the beginning of the array. 45 {"iso-8859-1", B_ISO1_CONVERSION}, // MIME STANDARD 46 {"iso-8859-2", B_ISO2_CONVERSION}, // MIME STANDARD 47 {"iso-8859-3", B_ISO3_CONVERSION}, // MIME STANDARD 48 {"iso-8859-4", B_ISO4_CONVERSION}, // MIME STANDARD 49 {"iso-8859-5", B_ISO5_CONVERSION}, // MIME STANDARD 50 {"iso-8859-6", B_ISO6_CONVERSION}, // MIME STANDARD 51 {"iso-8859-7", B_ISO7_CONVERSION}, // MIME STANDARD 52 {"iso-8859-8", B_ISO8_CONVERSION}, // MIME STANDARD 53 {"iso-8859-9", B_ISO9_CONVERSION}, // MIME STANDARD 54 {"iso-8859-10", B_ISO10_CONVERSION}, // MIME STANDARD 55 {"iso-8859-13", B_ISO13_CONVERSION}, // MIME STANDARD 56 {"iso-8859-14", B_ISO14_CONVERSION}, // MIME STANDARD 57 {"iso-8859-15", B_ISO15_CONVERSION}, // MIME STANDARD 58 59 {"shift_jis", B_SJIS_CONVERSION}, // MIME STANDARD 60 {"shift-jis", B_SJIS_CONVERSION}, 61 {"iso-2022-jp", B_JIS_CONVERSION}, // MIME STANDARD 62 {"euc-jp", B_EUC_CONVERSION}, // MIME STANDARD 63 64 {"euc-kr", B_EUC_KR_CONVERSION}, // Shift encoding 7 bit and KSC-5601 if bit 8 is on. // MIME STANDARD 65 {"ksc5601", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE? 66 {"ks_c_5601-1987", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE with stupid MS software 67 68 {"koi8-r", B_KOI8R_CONVERSION}, // MIME STANDARD 69 {"windows-1251",B_MS_WINDOWS_1251_CONVERSION}, // MIME STANDARD 70 {"windows-1252",B_MS_WINDOWS_CONVERSION}, // MIME STANDARD 71 72 {"dos-437", B_MS_DOS_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM437? ) 73 {"dos-866", B_MS_DOS_866_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM866? ) 74 {"x-mac-roman", B_MAC_ROMAN_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( macintosh? + x-mac-roman? ) 75 76 {"big5", 24}, // MIME STANDARD 77 78 {"gb18030", 25}, // WRONG NAME : MIME STANDARD NAME = NONE ( GB18030? ) 79 {"gb2312", 25}, // COMPATIBLE 80 {"gbk", 25}, // COMPATIBLE 81 82 /* {"utf-16", B_UNICODE_CONVERSION}, Might not work due to NULs in text, needs testing. */ 83 {"us-ascii", B_MAIL_US_ASCII_CONVERSION}, // MIME STANDARD 84 {"utf-8", B_MAIL_UTF8_CONVERSION /* Special code for no conversion */}, // MIME STANDARD 85 86 {NULL, (uint32) -1} /* End of list marker, NULL string pointer is the key. */ 87 }; 88 89 90 static int32 gLocker = 0; 91 static size_t gNsub = 1; 92 static re_pattern_buffer gRe; 93 static re_pattern_buffer *gRebuf = NULL; 94 static unsigned char gTranslation[256]; 95 96 97 static int 98 handle_non_rfc2047_encoding(char **buffer, size_t *bufferLength, 99 size_t *sourceLength) 100 { 101 char *string = *buffer; 102 int32 length = *sourceLength; 103 int32 i; 104 105 // check for 8-bit characters 106 for (i = 0;i < length;i++) 107 if (string[i] & 0x80) 108 break; 109 if (i == length) 110 return false; 111 112 // check for groups of 8-bit characters - this code is not very smart; 113 // it just can detect some sort of single-byte encoded stuff, the rest 114 // is regarded as UTF-8 115 116 int32 singletons = 0,doubles = 0; 117 118 for (i = 0;i < length;i++) 119 { 120 if (string[i] & 0x80) 121 { 122 if ((string[i + 1] & 0x80) == 0) 123 singletons++; 124 else doubles++; 125 i++; 126 } 127 } 128 129 if (singletons != 0) // can't be valid UTF-8 anymore, so we assume ISO-Latin-1 130 { 131 int32 state = 0; 132 // just to be sure 133 int32 destLength = length * 4 + 1; 134 int32 destBufferLength = destLength; 135 char *dest = (char*)malloc(destLength); 136 if (dest == NULL) 137 return 0; 138 139 if (convert_to_utf8(B_ISO1_CONVERSION, string, &length,dest, 140 &destLength, &state) == B_OK) { 141 *buffer = dest; 142 *bufferLength = destBufferLength; 143 *sourceLength = destLength; 144 return true; 145 } 146 free(dest); 147 return false; 148 } 149 150 // we assume a valid UTF-8 string here, but yes, we don't check it 151 return true; 152 } 153 154 155 // #pragma mark - 156 157 158 status_t 159 write_read_attr(BNode& node, read_flags flag) 160 { 161 if (node.WriteAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32)) 162 < 0) 163 return B_ERROR; 164 165 // Manage the status string only if it currently has a known state 166 BString currentStatus; 167 if (node.ReadAttrString(B_MAIL_ATTR_STATUS, ¤tStatus) == B_OK 168 && currentStatus.ICompare("New") != 0 169 && currentStatus.ICompare("Read") != 0 170 && currentStatus.ICompare("Seen") != 0) { 171 return B_OK; 172 } 173 174 BString statusString = flag == B_READ ? "Read" 175 : flag == B_SEEN ? "Seen" : "New"; 176 if (node.WriteAttrString(B_MAIL_ATTR_STATUS, &statusString) < 0) 177 return B_ERROR; 178 179 return B_OK; 180 } 181 182 183 status_t 184 read_read_attr(BNode& node, read_flags& flag) 185 { 186 if (node.ReadAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32)) 187 == sizeof(int32)) 188 return B_OK; 189 190 BString statusString; 191 if (node.ReadAttrString(B_MAIL_ATTR_STATUS, &statusString) == B_OK) { 192 if (statusString.ICompare("New") == 0) 193 flag = B_UNREAD; 194 else 195 flag = B_READ; 196 197 return B_OK; 198 } 199 200 return B_ERROR; 201 } 202 203 204 // The next couple of functions are our wrapper around convert_to_utf8 and 205 // convert_from_utf8 so that they can also convert from UTF-8 to UTF-8 by 206 // specifying the B_MAIL_UTF8_CONVERSION constant as the conversion operation. 207 // It also lets us add new conversions, like B_MAIL_US_ASCII_CONVERSION. 208 209 210 status_t 211 mail_convert_to_utf8(uint32 srcEncoding, const char *src, int32 *srcLen, 212 char *dst, int32 *dstLen, int32 *state, char substitute) 213 { 214 int32 copyAmount; 215 char *originalDst = dst; 216 status_t returnCode = -1; 217 218 if (srcEncoding == B_MAIL_UTF8_CONVERSION) { 219 copyAmount = *srcLen; 220 if (*dstLen < copyAmount) 221 copyAmount = *dstLen; 222 memcpy (dst, src, copyAmount); 223 *srcLen = copyAmount; 224 *dstLen = copyAmount; 225 returnCode = B_OK; 226 } else if (srcEncoding == B_MAIL_US_ASCII_CONVERSION) { 227 int32 i; 228 unsigned char letter; 229 copyAmount = *srcLen; 230 if (*dstLen < copyAmount) 231 copyAmount = *dstLen; 232 for (i = 0; i < copyAmount; i++) { 233 letter = *src++; 234 if (letter > 0x80U) 235 // Invalid, could also use substitute, but better to strip high bit. 236 *dst++ = letter - 0x80U; 237 else if (letter == 0x80U) 238 // Can't convert to 0x00 since that's NUL, which would cause problems. 239 *dst++ = substitute; 240 else 241 *dst++ = letter; 242 } 243 *srcLen = copyAmount; 244 *dstLen = copyAmount; 245 returnCode = B_OK; 246 } else 247 returnCode = convert_to_utf8 (srcEncoding, src, srcLen, 248 dst, dstLen, state, substitute); 249 250 if (returnCode == B_OK) { 251 // Replace spurious NUL bytes, which should normally not be in the 252 // output of the decoding (not normal UTF-8 characters, and no NULs are 253 // in our usual input strings). They happen for some odd ISO-2022-JP 254 // byte pair combinations which are improperly handled by the BeOS 255 // routines. Like "\e$ByD\e(B" where \e is the ESC character $1B, the 256 // first ESC $ B switches to a Japanese character set, then the next 257 // two bytes "yD" specify a character, then ESC ( B switches back to 258 // the ASCII character set. The UTF-8 conversion yields a NUL byte. 259 int32 i; 260 for (i = 0; i < *dstLen; i++) 261 if (originalDst[i] == 0) 262 originalDst[i] = substitute; 263 } 264 return returnCode; 265 } 266 267 268 status_t 269 mail_convert_from_utf8(uint32 dstEncoding, const char *src, int32 *srcLen, 270 char *dst, int32 *dstLen, int32 *state, char substitute) 271 { 272 int32 copyAmount; 273 status_t errorCode; 274 int32 originalDstLen = *dstLen; 275 int32 tempDstLen; 276 int32 tempSrcLen; 277 278 if (dstEncoding == B_MAIL_UTF8_CONVERSION) { 279 copyAmount = *srcLen; 280 if (*dstLen < copyAmount) 281 copyAmount = *dstLen; 282 memcpy (dst, src, copyAmount); 283 *srcLen = copyAmount; 284 *dstLen = copyAmount; 285 return B_OK; 286 } 287 288 if (dstEncoding == B_MAIL_US_ASCII_CONVERSION) { 289 int32 characterLength; 290 int32 dstRemaining = *dstLen; 291 unsigned char letter; 292 int32 srcRemaining = *srcLen; 293 294 // state contains the number of source bytes to skip, left over from a 295 // partial UTF-8 character split over the end of the buffer from last 296 // time. 297 if (srcRemaining <= *state) { 298 *state -= srcRemaining; 299 *dstLen = 0; 300 return B_OK; 301 } 302 srcRemaining -= *state; 303 src += *state; 304 *state = 0; 305 306 while (true) { 307 if (srcRemaining <= 0 || dstRemaining <= 0) 308 break; 309 letter = *src; 310 if (letter < 0x80) 311 characterLength = 1; // Regular ASCII equivalent code. 312 else if (letter < 0xC0) 313 characterLength = 1; // Invalid in-between data byte 10xxxxxx. 314 else if (letter < 0xE0) 315 characterLength = 2; 316 else if (letter < 0xF0) 317 characterLength = 3; 318 else if (letter < 0xF8) 319 characterLength = 4; 320 else if (letter < 0xFC) 321 characterLength = 5; 322 else if (letter < 0xFE) 323 characterLength = 6; 324 else 325 characterLength = 1; // 0xFE and 0xFF are invalid in UTF-8. 326 if (letter < 0x80) 327 *dst++ = *src; 328 else 329 *dst++ = substitute; 330 dstRemaining--; 331 if (srcRemaining < characterLength) { 332 // Character split past the end of the buffer. 333 *state = characterLength - srcRemaining; 334 srcRemaining = 0; 335 } else { 336 src += characterLength; 337 srcRemaining -= characterLength; 338 } 339 } 340 // Update with the amounts used. 341 *srcLen = *srcLen - srcRemaining; 342 *dstLen = *dstLen - dstRemaining; 343 return B_OK; 344 } 345 346 errorCode = convert_from_utf8(dstEncoding, src, srcLen, dst, dstLen, state, 347 substitute); 348 if (errorCode != B_OK) 349 return errorCode; 350 351 if (dstEncoding != B_JIS_CONVERSION) 352 return B_OK; 353 354 // B_JIS_CONVERSION (ISO-2022-JP) works by shifting between different 355 // character subsets. For E-mail headers (and other uses), it needs to be 356 // switched back to ASCII at the end (otherwise the last character gets 357 // lost or other weird things happen in the headers). Note that we can't 358 // just append the escape code since the convert_from_utf8 "state" will be 359 // wrong. So we append an ASCII letter and throw it away, leaving just the 360 // escape code. Well, it actually switches to the Roman character set, not 361 // ASCII, but that should be OK. 362 363 tempDstLen = originalDstLen - *dstLen; 364 if (tempDstLen < 3) // Not enough space remaining in the output. 365 return B_OK; // Sort of an error, but we did convert the rest OK. 366 tempSrcLen = 1; 367 errorCode = convert_from_utf8(dstEncoding, "a", &tempSrcLen, 368 dst + *dstLen, &tempDstLen, state, substitute); 369 if (errorCode != B_OK) 370 return errorCode; 371 *dstLen += tempDstLen - 1 /* don't include the ASCII letter */; 372 return B_OK; 373 } 374 375 376 ssize_t 377 rfc2047_to_utf8(char **bufp, size_t *bufLen, size_t strLen) 378 { 379 char *head, *tail; 380 char *charset, *encoding, *end; 381 ssize_t ret = B_OK; 382 383 if (bufp == NULL || *bufp == NULL) 384 return -1; 385 386 char *string = *bufp; 387 388 //---------Handle *&&^%*&^ non-RFC compliant, 8bit mail 389 if (handle_non_rfc2047_encoding(bufp,bufLen,&strLen)) 390 return strLen; 391 392 // set up string length 393 if (strLen == 0) 394 strLen = strlen(*bufp); 395 char lastChar = (*bufp)[strLen]; 396 (*bufp)[strLen] = '\0'; 397 398 //---------Whew! Now for RFC compliant mail 399 bool encodedWordFoundPreviously = false; 400 for (head = tail = string; 401 ((charset = strstr(tail, "=?")) != NULL) 402 && (((encoding = strchr(charset + 2, '?')) != NULL) 403 && encoding[1] && (encoding[2] == '?') && encoding[3]) 404 && (end = strstr(encoding + 3, "?=")) != NULL; 405 // found "=?...charset...?e?...text...?= (e == encoding) 406 // ^charset ^encoding ^end 407 tail = end) 408 { 409 // Copy non-encoded text (from tail up to charset) to the output. 410 // Ignore spaces between two encoded "words". RFC2047 says the words 411 // should be concatenated without the space (designed for Asian 412 // sentences which have no spaces yet need to be broken into "words" to 413 // keep within the line length limits). 414 bool nonSpaceFound = false; 415 for (int i = 0; i < charset-tail; i++) { 416 if (!isspace (tail[i])) { 417 nonSpaceFound = true; 418 break; 419 } 420 } 421 if (!encodedWordFoundPreviously || nonSpaceFound) { 422 if (string != tail && tail != charset) 423 memmove(string, tail, charset-tail); 424 string += charset-tail; 425 } 426 tail = charset; 427 encodedWordFoundPreviously = true; 428 429 // move things to point at what they should: 430 // =?...charset...?e?...text...?= (e == encoding) 431 // ^charset ^encoding ^end 432 charset += 2; 433 encoding += 1; 434 end += 2; 435 436 // find the charset this text is in now 437 size_t cLen = encoding - 1 - charset; 438 bool base64encoded = toupper(*encoding) == 'B'; 439 440 uint32 convertID = B_MAIL_NULL_CONVERSION; 441 char charsetName[cLen + 1]; 442 memcpy(charsetName, charset, cLen); 443 charsetName[cLen] = '\0'; 444 if (strcasecmp(charsetName, "us-ascii") == 0) { 445 convertID = B_MAIL_US_ASCII_CONVERSION; 446 } else if (strcasecmp(charsetName, "utf-8") == 0) { 447 convertID = B_MAIL_UTF8_CONVERSION; 448 } else { 449 const BCharacterSet* charSet 450 = BCharacterSetRoster::FindCharacterSetByName(charsetName); 451 if (charSet != NULL) { 452 convertID = charSet->GetConversionID(); 453 } 454 } 455 if (convertID == B_MAIL_NULL_CONVERSION) { 456 // unidentified charset 457 // what to do? doing nothing skips the encoded text; 458 // but we should keep it: we copy it to the output. 459 if (string != tail && tail != end) 460 memmove(string, tail, end-tail); 461 string += end-tail; 462 continue; 463 } 464 // else we've successfully identified the charset 465 466 char *src = encoding+2; 467 int32 srcLen = end - 2 - src; 468 // encoded text: src..src+srcLen 469 470 // decode text, get decoded length (reducing xforms) 471 srcLen = !base64encoded ? decode_qp(src, src, srcLen, 1) 472 : decode_base64(src, src, srcLen); 473 474 // allocate space for the converted text 475 int32 dstLen = end-string + *bufLen-strLen; 476 char *dst = (char*)malloc(dstLen); 477 int32 cvLen = srcLen; 478 int32 convState = 0; 479 480 // 481 // do the conversion 482 // 483 ret = mail_convert_to_utf8(convertID, src, &cvLen, dst, &dstLen, 484 &convState); 485 if (ret != B_OK) { 486 // what to do? doing nothing skips the encoded text 487 // but we should keep it: we copy it to the output. 488 489 free(dst); 490 491 if (string != tail && tail != end) 492 memmove(string, tail, end-tail); 493 string += end-tail; 494 continue; 495 } 496 /* convert_to_ is either returning something wrong or my 497 test data is screwed up. Whatever it is, Not Enough 498 Space is not the only cause of the below, so we just 499 assume it succeeds if it converts anything at all. 500 else if (cvLen < srcLen) 501 { 502 // not enough room to convert the data; 503 // grow *buf and retry 504 505 free(dst); 506 507 char *temp = (char*)realloc(*bufp, 2*(*bufLen + 1)); 508 if (temp == NULL) 509 { 510 ret = B_NO_MEMORY; 511 break; 512 } 513 514 *bufp = temp; 515 *bufLen = 2*(*bufLen + 1); 516 517 string = *bufp + (string-head); 518 tail = *bufp + (tail-head); 519 charset = *bufp + (charset-head); 520 encoding = *bufp + (encoding-head); 521 end = *bufp + (end-head); 522 src = *bufp + (src-head); 523 head = *bufp; 524 continue; 525 } 526 */ 527 else { 528 if (dstLen > end-string) { 529 // copy the string forward... 530 memmove(string+dstLen, end, strLen - (end-head) + 1); 531 strLen += string+dstLen - end; 532 end = string + dstLen; 533 } 534 535 memcpy(string, dst, dstLen); 536 string += dstLen; 537 free(dst); 538 continue; 539 } 540 } 541 542 // copy everything that's left 543 size_t tailLen = strLen - (tail - head); 544 memmove(string, tail, tailLen+1); 545 string += tailLen; 546 547 // replace the last char 548 (*bufp)[strLen] = lastChar; 549 550 return ret < B_OK ? ret : string-head; 551 } 552 553 554 ssize_t 555 utf8_to_rfc2047 (char **bufp, ssize_t length, uint32 charset, char encoding) 556 { 557 struct word { 558 BString originalWord; 559 BString convertedWord; 560 bool needsEncoding; 561 562 // Convert the word from UTF-8 to the desired character set. The 563 // converted version also includes the escape codes to return to ASCII 564 // mode, if relevant. Also note if it uses unprintable characters, 565 // which means it will need that special encoding treatment later. 566 void ConvertWordToCharset (uint32 charset) { 567 int32 state = 0; 568 int32 originalLength = originalWord.Length(); 569 int32 convertedLength = originalLength * 5 + 1; 570 char *convertedBuffer = convertedWord.LockBuffer (convertedLength); 571 mail_convert_from_utf8 (charset, originalWord.String(), 572 &originalLength, convertedBuffer, &convertedLength, &state); 573 for (int i = 0; i < convertedLength; i++) { 574 if ((convertedBuffer[i] & (1 << 7)) || 575 (convertedBuffer[i] >= 0 && convertedBuffer[i] < 32)) { 576 needsEncoding = true; 577 break; 578 } 579 } 580 convertedWord.UnlockBuffer (convertedLength); 581 }; 582 }; 583 struct word *currentWord; 584 BList words; 585 586 // Break the header into words. White space characters (including tabs and 587 // newlines) separate the words. Each word includes any space before it as 588 // part of the word. Actually, quotes and other special characters 589 // (",()<>@) are treated as separate words of their own so that they don't 590 // get encoded (because MIME headers get the quotes parsed before character 591 // set unconversion is done). The reader is supposed to ignore all white 592 // space between encoded words, which can be inserted so that older mail 593 // parsers don't have overly long line length problems. 594 595 const char *source = *bufp; 596 const char *bufEnd = *bufp + length; 597 const char *specialChars = "\"()<>@,"; 598 599 while (source < bufEnd) { 600 currentWord = new struct word; 601 currentWord->needsEncoding = false; 602 603 int wordEnd = 0; 604 605 // Include leading spaces as part of the word. 606 while (source + wordEnd < bufEnd && isspace (source[wordEnd])) 607 wordEnd++; 608 609 if (source + wordEnd < bufEnd && 610 strchr (specialChars, source[wordEnd]) != NULL) { 611 // Got a quote mark or other special character, which is treated as 612 // a word in itself since it shouldn't be encoded, which would hide 613 // it from the mail system. 614 wordEnd++; 615 } else { 616 // Find the end of the word. Leave wordEnd pointing just after the 617 // last character in the word. 618 while (source + wordEnd < bufEnd) { 619 if (isspace(source[wordEnd]) || 620 strchr (specialChars, source[wordEnd]) != NULL) 621 break; 622 if (wordEnd > 51 /* Makes Base64 ISO-2022-JP "word" a multiple of 4 bytes */ && 623 0xC0 == (0xC0 & (unsigned int) source[wordEnd])) { 624 // No English words are that long (46 is the longest), 625 // break up what is likely Asian text (which has no spaces) 626 // at the start of the next non-ASCII UTF-8 character (high 627 // two bits are both ones). Note that two encoded words in 628 // a row get joined together, even if there is a space 629 // between them in the final output text, according to the 630 // standard. Next word will also be conveniently get 631 // encoded due to the 0xC0 test. 632 currentWord->needsEncoding = true; 633 break; 634 } 635 wordEnd++; 636 } 637 } 638 currentWord->originalWord.SetTo (source, wordEnd); 639 currentWord->ConvertWordToCharset (charset); 640 words.AddItem(currentWord); 641 source += wordEnd; 642 } 643 644 // Combine adjacent words which contain unprintable text so that the 645 // overhead of switching back and forth between regular text and specially 646 // encoded text is reduced. However, the combined word must be shorter 647 // than the maximum of 75 bytes, including character set specification and 648 // all those delimiters (worst case 22 bytes of overhead). 649 650 struct word *run; 651 652 for (int32 i = 0; (currentWord = (struct word *) words.ItemAt (i)) != NULL; i++) { 653 if (!currentWord->needsEncoding) 654 continue; // No need to combine unencoded words. 655 for (int32 g = i+1; (run = (struct word *) words.ItemAt (g)) != NULL; g++) { 656 if (!run->needsEncoding) 657 break; // Don't want to combine encoded and unencoded words. 658 if ((currentWord->convertedWord.Length() + run->convertedWord.Length() <= 53)) { 659 currentWord->originalWord.Append (run->originalWord); 660 currentWord->ConvertWordToCharset (charset); 661 words.RemoveItem(g); 662 delete run; 663 g--; 664 } else // Can't merge this word, result would be too long. 665 break; 666 } 667 } 668 669 // Combine the encoded and unencoded words into one line, doing the 670 // quoted-printable or base64 encoding. Insert an extra space between 671 // words which are both encoded to make word wrapping easier, since there 672 // is normally none, and you're allowed to insert space (the receiver 673 // throws it away if it is between encoded words). 674 675 BString rfc2047; 676 bool previousWordNeededEncoding = false; 677 678 const char *charset_dec = "none-bug"; 679 for (int32 i = 0; mail_charsets[i].charset != NULL; i++) { 680 if (mail_charsets[i].flavor == charset) { 681 charset_dec = mail_charsets[i].charset; 682 break; 683 } 684 } 685 686 while ((currentWord = (struct word *)words.RemoveItem((int32)0)) != NULL) { 687 if ((encoding != quoted_printable && encoding != base64) || 688 !currentWord->needsEncoding) { 689 rfc2047.Append (currentWord->convertedWord); 690 } else { 691 // This word needs encoding. Try to insert a space between it and 692 // the previous word. 693 if (previousWordNeededEncoding) 694 rfc2047 << ' '; // Can insert as many spaces as you want between encoded words. 695 else { 696 // Previous word is not encoded, spaces are significant. Try 697 // to move a space from the start of this word to be outside of 698 // the encoded text, so that there is a bit of space between 699 // this word and the previous one to enhance word wrapping 700 // chances later on. 701 if (currentWord->originalWord.Length() > 1 && 702 isspace (currentWord->originalWord[0])) { 703 rfc2047 << currentWord->originalWord[0]; 704 currentWord->originalWord.Remove (0 /* offset */, 1 /* length */); 705 currentWord->ConvertWordToCharset (charset); 706 } 707 } 708 709 char *encoded = NULL; 710 ssize_t encoded_len = 0; 711 int32 convertedLength = currentWord->convertedWord.Length (); 712 const char *convertedBuffer = currentWord->convertedWord.String (); 713 714 switch (encoding) { 715 case quoted_printable: 716 encoded = (char *) malloc (convertedLength * 3); 717 encoded_len = encode_qp (encoded, convertedBuffer, convertedLength, true /* headerMode */); 718 break; 719 case base64: 720 encoded = (char *) malloc (convertedLength * 2); 721 encoded_len = encode_base64 (encoded, convertedBuffer, convertedLength, true /* headerMode */); 722 break; 723 default: // Unknown encoding type, shouldn't happen. 724 encoded = (char *) convertedBuffer; 725 encoded_len = convertedLength; 726 break; 727 } 728 729 rfc2047 << "=?" << charset_dec << '?' << encoding << '?'; 730 rfc2047.Append (encoded, encoded_len); 731 rfc2047 << "?="; 732 733 if (encoding == quoted_printable || encoding == base64) 734 free(encoded); 735 } 736 previousWordNeededEncoding = currentWord->needsEncoding; 737 delete currentWord; 738 } 739 740 free(*bufp); 741 742 ssize_t finalLength = rfc2047.Length (); 743 *bufp = (char *) (malloc (finalLength + 1)); 744 memcpy (*bufp, rfc2047.String(), finalLength); 745 (*bufp)[finalLength] = 0; 746 747 return finalLength; 748 } 749 750 751 void 752 FoldLineAtWhiteSpaceAndAddCRLF(BString &string) 753 { 754 int inputLength = string.Length(); 755 int lineStartIndex; 756 const int maxLineLength = 78; // Doesn't include CRLF. 757 BString output; 758 int splitIndex; 759 int tempIndex; 760 761 lineStartIndex = 0; 762 while (true) { 763 // If we don't need to wrap the text, just output the remainder, if any. 764 765 if (lineStartIndex + maxLineLength >= inputLength) { 766 if (lineStartIndex < inputLength) { 767 output.Insert (string, lineStartIndex /* source offset */, 768 inputLength - lineStartIndex /* count */, 769 output.Length() /* insert at */); 770 output.Append (CRLF); 771 } 772 break; 773 } 774 775 // Look ahead for a convenient spot to split it, between a comma and 776 // space, which you often see between e-mail addresses like this: 777 // "Joe Who" joe@dot.com, "Someone Else" else@blot.com 778 779 tempIndex = lineStartIndex + maxLineLength; 780 if (tempIndex > inputLength) 781 tempIndex = inputLength; 782 splitIndex = string.FindLast (", ", tempIndex); 783 if (splitIndex >= lineStartIndex) 784 splitIndex++; // Point to the space character. 785 786 // If none of those exist, try splitting at any white space. 787 788 if (splitIndex <= lineStartIndex) 789 splitIndex = string.FindLast (" ", tempIndex); 790 if (splitIndex <= lineStartIndex) 791 splitIndex = string.FindLast ("\t", tempIndex); 792 793 // If none of those exist, allow for a longer word - split at the next 794 // available white space. 795 796 if (splitIndex <= lineStartIndex) 797 splitIndex = string.FindFirst (" ", lineStartIndex + 1); 798 if (splitIndex <= lineStartIndex) 799 splitIndex = string.FindFirst ("\t", lineStartIndex + 1); 800 801 // Give up, the whole rest of the line can't be split, just dump it 802 // out. 803 804 if (splitIndex <= lineStartIndex) { 805 if (lineStartIndex < inputLength) { 806 output.Insert (string, lineStartIndex /* source offset */, 807 inputLength - lineStartIndex /* count */, 808 output.Length() /* insert at */); 809 output.Append (CRLF); 810 } 811 break; 812 } 813 814 // Do the split. The current line up to but not including the space 815 // gets output, followed by a CRLF. The space remains to become the 816 // start of the next line (and that tells the message reader that it is 817 // a continuation line). 818 819 output.Insert (string, lineStartIndex /* source offset */, 820 splitIndex - lineStartIndex /* count */, 821 output.Length() /* insert at */); 822 output.Append (CRLF); 823 lineStartIndex = splitIndex; 824 } 825 string.SetTo (output); 826 } 827 828 829 ssize_t 830 readfoldedline(FILE *file, char **buffer, size_t *buflen) 831 { 832 ssize_t len = buflen && *buflen ? *buflen : 0; 833 char * buf = buffer && *buffer ? *buffer : NULL; 834 ssize_t cnt = 0; // Number of characters currently in the buffer. 835 int c; 836 837 while (true) { 838 // Make sure there is space in the buffer for two more characters (one 839 // for the next character, and one for the end of string NUL byte). 840 if (buf == NULL || cnt + 2 >= len) { 841 char *temp = (char *)realloc(buf, len + 64); 842 if (temp == NULL) { 843 // Out of memory, however existing buffer remains allocated. 844 cnt = ENOMEM; 845 break; 846 } 847 len += 64; 848 buf = temp; 849 } 850 851 // Read the next character, or end of file, or IO error. 852 if ((c = fgetc(file)) == EOF) { 853 if (ferror (file)) { 854 cnt = errno; 855 if (cnt >= 0) 856 cnt = -1; // Error codes must be negative. 857 } else { 858 // Really is end of file. Also make it end of line if there is 859 // some text already read in. If the first thing read was EOF, 860 // just return an empty string. 861 if (cnt > 0) { 862 buf[cnt++] = '\n'; 863 if (buf[cnt-2] == '\r') { 864 buf[cnt-2] = '\n'; 865 --cnt; 866 } 867 } 868 } 869 break; 870 } 871 872 buf[cnt++] = c; 873 874 if (c == '\n') { 875 // Convert CRLF end of line to just a LF. Do it before folding, in 876 // case we don't need to fold. 877 if (cnt >= 2 && buf[cnt-2] == '\r') { 878 buf[cnt-2] = '\n'; 879 --cnt; 880 } 881 // If the current line is empty then return it (so that empty lines 882 // don't disappear if the next line starts with a space). 883 if (cnt <= 1) 884 break; 885 // Fold if first character on the next line is whitespace. 886 c = fgetc(file); // Note it's OK to read EOF and ungetc it too. 887 if (c == ' ' || c == '\t') 888 buf[cnt-1] = c; // Replace \n with the white space character. 889 else { 890 // Not folding, we finished reading a line; break out of the loop 891 ungetc(c,file); 892 break; 893 } 894 } 895 } 896 897 if (buf != NULL && cnt >= 0) 898 buf[cnt] = '\0'; 899 900 if (buffer) 901 *buffer = buf; 902 else if (buf) 903 free(buf); 904 905 if (buflen) 906 *buflen = len; 907 908 return cnt; 909 } 910 911 912 ssize_t 913 readfoldedline(BPositionIO &in, char **buffer, size_t *buflen) 914 { 915 ssize_t len = buflen && *buflen ? *buflen : 0; 916 char * buf = buffer && *buffer ? *buffer : NULL; 917 ssize_t cnt = 0; // Number of characters currently in the buffer. 918 char c; 919 status_t errorCode; 920 921 while (true) { 922 // Make sure there is space in the buffer for two more characters (one 923 // for the next character, and one for the end of string NUL byte). 924 if (buf == NULL || cnt + 2 >= len) { 925 char *temp = (char *)realloc(buf, len + 64); 926 if (temp == NULL) { 927 // Out of memory, however existing buffer remains allocated. 928 cnt = ENOMEM; 929 break; 930 } 931 len += 64; 932 buf = temp; 933 } 934 935 errorCode = in.Read (&c,1); // A really slow way of reading - unbuffered. 936 if (errorCode != 1) { 937 if (errorCode < 0) { 938 cnt = errorCode; // IO error encountered, just return the code. 939 } else { 940 // Really is end of file. Also make it end of line if there is 941 // some text already read in. If the first thing read was EOF, 942 // just return an empty string. 943 if (cnt > 0) { 944 buf[cnt++] = '\n'; 945 if (buf[cnt-2] == '\r') { 946 buf[cnt-2] = '\n'; 947 --cnt; 948 } 949 } 950 } 951 break; 952 } 953 954 buf[cnt++] = c; 955 956 if (c == '\n') { 957 // Convert CRLF end of line to just a LF. Do it before folding, in 958 // case we don't need to fold. 959 if (cnt >= 2 && buf[cnt-2] == '\r') { 960 buf[cnt-2] = '\n'; 961 --cnt; 962 } 963 // If the current line is empty then return it (so that empty lines 964 // don't disappear if the next line starts with a space). 965 if (cnt <= 1) 966 break; 967 // if first character on the next line is whitespace, fold lines 968 errorCode = in.Read(&c,1); 969 if (errorCode == 1) { 970 if (c == ' ' || c == '\t') 971 buf[cnt-1] = c; // Replace \n with the white space character. 972 else { 973 // Not folding, we finished reading a whole line. 974 in.Seek(-1,SEEK_CUR); // Undo the look-ahead character read. 975 break; 976 } 977 } else if (errorCode < 0) { 978 cnt = errorCode; 979 break; 980 } else // No next line; at the end of the file. Return the line. 981 break; 982 } 983 } 984 985 if (buf != NULL && cnt >= 0) 986 buf[cnt] = '\0'; 987 988 if (buffer) 989 *buffer = buf; 990 else if (buf) 991 free(buf); 992 993 if (buflen) 994 *buflen = len; 995 996 return cnt; 997 } 998 999 1000 ssize_t 1001 nextfoldedline(const char** header, char **buffer, size_t *buflen) 1002 { 1003 ssize_t len = buflen && *buflen ? *buflen : 0; 1004 char * buf = buffer && *buffer ? *buffer : NULL; 1005 ssize_t cnt = 0; // Number of characters currently in the buffer. 1006 char c; 1007 1008 while (true) 1009 { 1010 // Make sure there is space in the buffer for two more characters (one 1011 // for the next character, and one for the end of string NUL byte). 1012 if (buf == NULL || cnt + 2 >= len) 1013 { 1014 char *temp = (char *)realloc(buf, len + 64); 1015 if (temp == NULL) { 1016 // Out of memory, however existing buffer remains allocated. 1017 cnt = ENOMEM; 1018 break; 1019 } 1020 len += 64; 1021 buf = temp; 1022 } 1023 1024 // Read the next character, or end of file. 1025 if ((c = *(*header)++) == 0) { 1026 // End of file. Also make it end of line if there is some text 1027 // already read in. If the first thing read was EOF, just return 1028 // an empty string. 1029 if (cnt > 0) { 1030 buf[cnt++] = '\n'; 1031 if (buf[cnt-2] == '\r') { 1032 buf[cnt-2] = '\n'; 1033 --cnt; 1034 } 1035 } 1036 break; 1037 } 1038 1039 buf[cnt++] = c; 1040 1041 if (c == '\n') { 1042 // Convert CRLF end of line to just a LF. Do it before folding, in 1043 // case we don't need to fold. 1044 if (cnt >= 2 && buf[cnt-2] == '\r') { 1045 buf[cnt-2] = '\n'; 1046 --cnt; 1047 } 1048 // If the current line is empty then return it (so that empty lines 1049 // don't disappear if the next line starts with a space). 1050 if (cnt <= 1) 1051 break; 1052 // if first character on the next line is whitespace, fold lines 1053 c = *(*header)++; 1054 if (c == ' ' || c == '\t') 1055 buf[cnt-1] = c; // Replace \n with the white space character. 1056 else { 1057 // Not folding, we finished reading a line; break out of the loop 1058 (*header)--; // Undo read of the non-whitespace. 1059 break; 1060 } 1061 } 1062 } 1063 1064 1065 if (buf != NULL && cnt >= 0) 1066 buf[cnt] = '\0'; 1067 1068 if (buffer) 1069 *buffer = buf; 1070 else if (buf) 1071 free(buf); 1072 1073 if (buflen) 1074 *buflen = len; 1075 1076 return cnt; 1077 } 1078 1079 1080 void 1081 trim_white_space(BString &string) 1082 { 1083 int32 i; 1084 int32 length = string.Length(); 1085 char *buffer = string.LockBuffer(length + 1); 1086 1087 while (length > 0 && isspace(buffer[length - 1])) 1088 length--; 1089 buffer[length] = '\0'; 1090 1091 for (i = 0; buffer[i] && isspace(buffer[i]); i++) {} 1092 if (i != 0) { 1093 length -= i; 1094 memmove(buffer,buffer + i,length + 1); 1095 } 1096 string.UnlockBuffer(length); 1097 } 1098 1099 1100 /*! Tries to return a human-readable name from the specified 1101 header parameter (should be from "To:" or "From:"). 1102 Tries to return the name rather than the eMail address. 1103 */ 1104 void 1105 extract_address_name(BString &header) 1106 { 1107 BString name; 1108 const char *start = header.String(); 1109 const char *stop = start + strlen (start); 1110 1111 // Find a string S in the header (email foo) that matches: 1112 // Old style name in brackets: foo@bar.com (S) 1113 // New style quotes: "S" <foo@bar.com> 1114 // New style no quotes if nothing else found: S <foo@bar.com> 1115 // If nothing else found then use the whole thing: S 1116 1117 for (int i = 0; i <= 3; i++) { 1118 // Set p1 to the first letter in the name and p2 to just past the last 1119 // letter in the name. p2 stays NULL if a name wasn't found in this 1120 // pass. 1121 const char *p1 = NULL, *p2 = NULL; 1122 1123 switch (i) { 1124 case 0: // foo@bar.com (S) 1125 if ((p1 = strchr(start,'(')) != NULL) { 1126 p1++; // Advance to first letter in the name. 1127 size_t nest = 1; // Handle nested brackets. 1128 for (p2 = p1; p2 < stop; ++p2) 1129 { 1130 if (*p2 == ')') 1131 --nest; 1132 else if (*p2 == '(') 1133 ++nest; 1134 if (nest <= 0) 1135 break; 1136 } 1137 if (nest != 0) 1138 p2 = NULL; // False alarm, no terminating bracket. 1139 } 1140 break; 1141 case 1: // "S" <foo@bar.com> 1142 if ((p1 = strchr(start, '\"')) != NULL) 1143 p2 = strchr(++p1, '\"'); 1144 break; 1145 case 2: // S <foo@bar.com> 1146 p1 = start; 1147 if (name.Length() == 0) 1148 p2 = strchr(start, '<'); 1149 break; 1150 case 3: // S 1151 p1 = start; 1152 if (name.Length() == 0) 1153 p2 = stop; 1154 break; 1155 } 1156 1157 // Remove leading and trailing space-like characters and save the 1158 // result if it is longer than any other likely names found. 1159 if (p2 != NULL) { 1160 while (p1 < p2 && (isspace (*p1))) 1161 ++p1; 1162 1163 while (p1 < p2 && (isspace (p2[-1]))) 1164 --p2; 1165 1166 int newLength = p2 - p1; 1167 if (name.Length() < newLength) 1168 name.SetTo(p1, newLength); 1169 } 1170 } 1171 1172 int32 lessIndex = name.FindFirst('<'); 1173 int32 greaterIndex = name.FindLast('>'); 1174 1175 if (lessIndex == 0) { 1176 // Have an address of the form <address> and nothing else, so remove 1177 // the greater and less than signs, if any. 1178 if (greaterIndex > 0) 1179 name.Remove(greaterIndex, 1); 1180 name.Remove(lessIndex, 1); 1181 } else if (lessIndex > 0 && lessIndex < greaterIndex) { 1182 // Yahoo stupidly inserts the e-mail address into the name string, so 1183 // this bit of code fixes: "Joe <joe@yahoo.com>" <joe@yahoo.com> 1184 name.Remove(lessIndex, greaterIndex - lessIndex + 1); 1185 } 1186 1187 trim_white_space(name); 1188 header = name; 1189 } 1190 1191 1192 /*! Given a subject in a BString, remove the extraneous RE: re: and other stuff 1193 to get down to the core subject string, which should be identical for all 1194 messages posted about a topic. The input string is modified in place to 1195 become the output core subject string. 1196 */ 1197 void 1198 SubjectToThread (BString &string) 1199 { 1200 // a regex that matches a non-ASCII UTF8 character: 1201 #define U8C \ 1202 "[\302-\337][\200-\277]" \ 1203 "|\340[\302-\337][\200-\277]" \ 1204 "|[\341-\357][\200-\277][\200-\277]" \ 1205 "|\360[\220-\277][\200-\277][\200-\277]" \ 1206 "|[\361-\367][\200-\277][\200-\277][\200-\277]" \ 1207 "|\370[\210-\277][\200-\277][\200-\277][\200-\277]" \ 1208 "|[\371-\373][\200-\277][\200-\277][\200-\277][\200-\277]" \ 1209 "|\374[\204-\277][\200-\277][\200-\277][\200-\277][\200-\277]" \ 1210 "|\375[\200-\277][\200-\277][\200-\277][\200-\277][\200-\277]" 1211 1212 #define PATTERN \ 1213 "^ +" \ 1214 "|^(\\[[^]]*\\])(\\<| +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \ 1215 "|^( +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \ 1216 "| *\\(fwd\\) *$" 1217 1218 if (gRebuf == NULL && atomic_add(&gLocker, 1) == 0) { 1219 // the idea is to compile the regexp once to speed up testing 1220 1221 for (int i=0; i<256; ++i) gTranslation[i]=i; 1222 for (int i='a'; i<='z'; ++i) gTranslation[i]=toupper(i); 1223 1224 gRe.translate = gTranslation; 1225 gRe.regs_allocated = REGS_FIXED; 1226 re_syntax_options = RE_SYNTAX_POSIX_EXTENDED; 1227 1228 const char *pattern = PATTERN; 1229 // count subexpressions in PATTERN 1230 for (unsigned int i=0; pattern[i] != 0; ++i) 1231 { 1232 if (pattern[i] == '\\') 1233 ++i; 1234 else if (pattern[i] == '(') 1235 ++gNsub; 1236 } 1237 1238 const char *err = re_compile_pattern(pattern,strlen(pattern),&gRe); 1239 if (err == NULL) 1240 gRebuf = &gRe; 1241 else 1242 fprintf(stderr, "Failed to compile the regex: %s\n", err); 1243 } else { 1244 int32 tries = 200; 1245 while (gRebuf == NULL && tries-- > 0) 1246 snooze(10000); 1247 } 1248 1249 if (gRebuf) { 1250 struct re_registers regs; 1251 // can't be static if this function is to be thread-safe 1252 1253 regs.num_regs = gNsub; 1254 regs.start = (regoff_t*)malloc(gNsub*sizeof(regoff_t)); 1255 regs.end = (regoff_t*)malloc(gNsub*sizeof(regoff_t)); 1256 1257 for (int start = 0; (start = re_search(gRebuf, string.String(), 1258 string.Length(), 0, string.Length(), ®s)) >= 0;) { 1259 // 1260 // we found something 1261 // 1262 1263 // don't delete [bemaildaemon]... 1264 if (start == regs.start[1]) 1265 start = regs.start[2]; 1266 1267 string.Remove(start,regs.end[0]-start); 1268 if (start) 1269 string.Insert(' ',1,start); 1270 1271 // TODO: for some subjects this results in an endless loop, check 1272 // why this happen. 1273 if (regs.end[0] - start <= 1) 1274 break; 1275 } 1276 1277 free(regs.start); 1278 free(regs.end); 1279 } 1280 1281 // Finally remove leading and trailing space. Some software, like 1282 // tm-edit 1.8, appends a space to the subject, which would break 1283 // threading if we left it in. 1284 trim_white_space(string); 1285 } 1286 1287 1288 /*! Converts a date to a time. Handles numeric time zones too, unlike 1289 parsedate(). Returns -1 if it fails. 1290 */ 1291 time_t 1292 ParseDateWithTimeZone(const char *DateString) 1293 { 1294 time_t currentTime; 1295 time_t dateAsTime; 1296 char tempDateString[80]; 1297 char tempZoneString[6]; 1298 time_t zoneDeltaTime; 1299 int zoneIndex; 1300 char *zonePntr; 1301 1302 // See if we can remove the time zone portion. parsedate understands time 1303 // zone 3 letter names, but doesn't understand the numeric +9999 time zone 1304 // format. To do: see if a newer parsedate exists. 1305 1306 strncpy (tempDateString, DateString, sizeof (tempDateString)); 1307 tempDateString[sizeof (tempDateString) - 1] = 0; 1308 1309 // Remove trailing spaces. 1310 zonePntr = tempDateString + strlen (tempDateString) - 1; 1311 while (zonePntr >= tempDateString && isspace (*zonePntr)) 1312 *zonePntr-- = 0; 1313 if (zonePntr < tempDateString) 1314 return -1; // Empty string. 1315 1316 // Remove the trailing time zone in round brackets, like in 1317 // Fri, 22 Feb 2002 15:22:42 EST (-0500) 1318 // Thu, 25 Apr 1996 11:44:19 -0400 (EDT) 1319 if (tempDateString[strlen(tempDateString)-1] == ')') 1320 { 1321 zonePntr = strrchr (tempDateString, '('); 1322 if (zonePntr != NULL) 1323 { 1324 *zonePntr-- = 0; // Zap the '(', then remove trailing spaces. 1325 while (zonePntr >= tempDateString && isspace (*zonePntr)) 1326 *zonePntr-- = 0; 1327 if (zonePntr < tempDateString) 1328 return -1; // Empty string. 1329 } 1330 } 1331 1332 // Look for a numeric time zone like Tue, 30 Dec 2003 05:01:40 +0000 1333 for (zoneIndex = strlen (tempDateString); zoneIndex >= 0; zoneIndex--) 1334 { 1335 zonePntr = tempDateString + zoneIndex; 1336 if (zonePntr[0] == '+' || zonePntr[0] == '-') 1337 { 1338 if (zonePntr[1] >= '0' && zonePntr[1] <= '9' && 1339 zonePntr[2] >= '0' && zonePntr[2] <= '9' && 1340 zonePntr[3] >= '0' && zonePntr[3] <= '9' && 1341 zonePntr[4] >= '0' && zonePntr[4] <= '9') 1342 break; 1343 } 1344 } 1345 if (zoneIndex >= 0) 1346 { 1347 // Remove the zone from the date string and any following time zone 1348 // letter codes. Also put in GMT so that the date gets parsed as GMT. 1349 memcpy (tempZoneString, zonePntr, 5); 1350 tempZoneString [5] = 0; 1351 strcpy (zonePntr, "GMT"); 1352 } 1353 else // No numeric time zone found. 1354 strcpy (tempZoneString, "+0000"); 1355 1356 time (¤tTime); 1357 dateAsTime = parsedate (tempDateString, currentTime); 1358 if (dateAsTime == (time_t) -1) 1359 return -1; // Failure. 1360 1361 zoneDeltaTime = 60 * atol (tempZoneString + 3); // Get the last two digits - minutes. 1362 tempZoneString[3] = 0; 1363 zoneDeltaTime += atol (tempZoneString + 1) * 60 * 60; // Get the first two digits - hours. 1364 if (tempZoneString[0] == '+') 1365 zoneDeltaTime = 0 - zoneDeltaTime; 1366 dateAsTime += zoneDeltaTime; 1367 1368 return dateAsTime; 1369 } 1370 1371 1372 /*! Parses a mail header and fills the headers BMessage 1373 */ 1374 status_t 1375 parse_header(BMessage &headers, BPositionIO &input) 1376 { 1377 char *buffer = NULL; 1378 size_t bufferSize = 0; 1379 int32 length; 1380 1381 while ((length = readfoldedline(input, &buffer, &bufferSize)) >= 2) { 1382 --length; 1383 // Don't include the \n at the end of the buffer. 1384 1385 // convert to UTF-8 and null-terminate the buffer 1386 length = rfc2047_to_utf8(&buffer, &bufferSize, length); 1387 buffer[length] = '\0'; 1388 1389 const char *delimiter = strstr(buffer, ":"); 1390 if (delimiter == NULL) 1391 continue; 1392 1393 BString header(buffer, delimiter - buffer); 1394 header.CapitalizeEachWord(); 1395 // unified case for later fetch 1396 1397 delimiter++; // Skip the colon. 1398 // Skip over leading white space and tabs. 1399 // TODO: (comments in brackets). 1400 while (isspace(*delimiter)) 1401 delimiter++; 1402 1403 // TODO: implement joining of multiple header tags (i.e. multiple "Cc:"s) 1404 headers.AddString(header.String(), delimiter); 1405 } 1406 free(buffer); 1407 1408 return B_OK; 1409 } 1410 1411 1412 status_t 1413 extract_from_header(const BString& header, const BString& field, 1414 BString& target) 1415 { 1416 int32 headerLength = header.Length(); 1417 int32 fieldEndPos = 0; 1418 while (true) { 1419 int32 pos = header.IFindFirst(field, fieldEndPos); 1420 if (pos < 0) 1421 return B_BAD_VALUE; 1422 fieldEndPos = pos + field.Length(); 1423 1424 if (pos != 0 && header.ByteAt(pos - 1) != '\n') 1425 continue; 1426 if (header.ByteAt(fieldEndPos) == ':') 1427 break; 1428 } 1429 fieldEndPos++; 1430 1431 int32 crPos = fieldEndPos; 1432 while (true) { 1433 fieldEndPos = crPos; 1434 crPos = header.FindFirst('\n', crPos); 1435 if (crPos < 0) 1436 crPos = headerLength; 1437 BString temp; 1438 header.CopyInto(temp, fieldEndPos, crPos - fieldEndPos); 1439 if (header.ByteAt(crPos - 1) == '\r') { 1440 temp.Truncate(temp.Length() - 1); 1441 temp += " "; 1442 } 1443 target += temp; 1444 crPos++; 1445 if (crPos >= headerLength) 1446 break; 1447 char nextByte = header.ByteAt(crPos); 1448 if (nextByte != ' ' && nextByte != '\t') 1449 break; 1450 crPos++; 1451 } 1452 1453 size_t bufferSize = target.Length(); 1454 char* buffer = target.LockBuffer(bufferSize); 1455 size_t length = rfc2047_to_utf8(&buffer, &bufferSize, bufferSize); 1456 target.UnlockBuffer(length); 1457 1458 trim_white_space(target); 1459 1460 return B_OK; 1461 } 1462 1463 1464 void 1465 extract_address(BString &address) 1466 { 1467 const char *string = address.String(); 1468 int32 first; 1469 1470 // first, remove all quoted text 1471 1472 if ((first = address.FindFirst('"')) >= 0) { 1473 int32 last = first + 1; 1474 while (string[last] && string[last] != '"') 1475 last++; 1476 1477 if (string[last] == '"') 1478 address.Remove(first, last + 1 - first); 1479 } 1480 1481 // try to extract the address now 1482 1483 if ((first = address.FindFirst('<')) >= 0) { 1484 // the world likes us and we can just get the address the easy way... 1485 int32 last = address.FindFirst('>'); 1486 if (last >= 0) { 1487 address.Truncate(last); 1488 address.Remove(0, first + 1); 1489 1490 return; 1491 } 1492 } 1493 1494 // then, see if there is anything in parenthesis to throw away 1495 1496 if ((first = address.FindFirst('(')) >= 0) { 1497 int32 last = first + 1; 1498 while (string[last] && string[last] != ')') 1499 last++; 1500 1501 if (string[last] == ')') 1502 address.Remove(first, last + 1 - first); 1503 } 1504 1505 // now, there shouldn't be much else left 1506 1507 trim_white_space(address); 1508 } 1509 1510 1511 void 1512 get_address_list(BList &list, const char *string, 1513 void (*cleanupFunc)(BString &)) 1514 { 1515 if (string == NULL || !string[0]) 1516 return; 1517 1518 const char *start = string; 1519 1520 while (true) { 1521 if (string[0] == '"') { 1522 const char *quoteEnd = ++string; 1523 1524 while (quoteEnd[0] && quoteEnd[0] != '"') 1525 quoteEnd++; 1526 1527 if (!quoteEnd[0]) // string exceeds line! 1528 quoteEnd = string; 1529 1530 string = quoteEnd + 1; 1531 } 1532 1533 if (string[0] == ',' || string[0] == '\0') { 1534 BString address(start, string - start); 1535 trim_white_space(address); 1536 1537 if (cleanupFunc) 1538 cleanupFunc(address); 1539 1540 list.AddItem(strdup(address.String())); 1541 1542 start = string + 1; 1543 } 1544 1545 if (!string[0]) 1546 break; 1547 1548 string++; 1549 } 1550 } 1551 1552 1553 status_t 1554 CopyMailFolderAttributes(const char* targetPath) 1555 { 1556 BPath path; 1557 status_t status = find_directory(B_USER_SETTINGS_DIRECTORY, &path); 1558 if (status != B_OK) 1559 return status; 1560 1561 path.Append("Tracker"); 1562 path.Append("DefaultQueryTemplates"); 1563 path.Append("text_x-email"); 1564 1565 BNode source(path.Path()); 1566 BNode target(targetPath); 1567 return BPrivate::CopyAttributes(source, target); 1568 } 1569