1 /* mail util - header parsing 2 ** 3 ** Copyright 2001-2003 Dr. Zoidberg Enterprises. All rights reserved. 4 */ 5 6 7 #include <UTF8.h> 8 #include <Message.h> 9 #include <String.h> 10 #include <Locker.h> 11 #include <DataIO.h> 12 #include <List.h> 13 14 #include <stdlib.h> 15 #include <string.h> 16 #include <stdio.h> 17 #define __USE_GNU 18 #include <regex.h> 19 #include <ctype.h> 20 #include <errno.h> 21 #include <parsedate.h> 22 23 #include <mail_encoding.h> 24 25 #include <mail_util.h> 26 27 #include <CharacterSet.h> 28 #include <CharacterSetRoster.h> 29 30 using namespace BPrivate; 31 32 #define CRLF "\r\n" 33 34 struct CharsetConversionEntry 35 { 36 const char *charset; 37 uint32 flavor; 38 }; 39 40 extern const CharsetConversionEntry mail_charsets [] = 41 { 42 // In order of authority, so when searching for the name for a particular 43 // numbered conversion, start at the beginning of the array. 44 {"iso-8859-1", B_ISO1_CONVERSION}, // MIME STANDARD 45 {"iso-8859-2", B_ISO2_CONVERSION}, // MIME STANDARD 46 {"iso-8859-3", B_ISO3_CONVERSION}, // MIME STANDARD 47 {"iso-8859-4", B_ISO4_CONVERSION}, // MIME STANDARD 48 {"iso-8859-5", B_ISO5_CONVERSION}, // MIME STANDARD 49 {"iso-8859-6", B_ISO6_CONVERSION}, // MIME STANDARD 50 {"iso-8859-7", B_ISO7_CONVERSION}, // MIME STANDARD 51 {"iso-8859-8", B_ISO8_CONVERSION}, // MIME STANDARD 52 {"iso-8859-9", B_ISO9_CONVERSION}, // MIME STANDARD 53 {"iso-8859-10", B_ISO10_CONVERSION}, // MIME STANDARD 54 {"iso-8859-13", B_ISO13_CONVERSION}, // MIME STANDARD 55 {"iso-8859-14", B_ISO14_CONVERSION}, // MIME STANDARD 56 {"iso-8859-15", B_ISO15_CONVERSION}, // MIME STANDARD 57 58 {"shift_jis", B_SJIS_CONVERSION}, // MIME STANDARD 59 {"shift-jis", B_SJIS_CONVERSION}, 60 {"iso-2022-jp", B_JIS_CONVERSION}, // MIME STANDARD 61 {"euc-jp", B_EUC_CONVERSION}, // MIME STANDARD 62 63 {"euc-kr", B_EUC_KR_CONVERSION}, // Shift encoding 7 bit and KSC-5601 if bit 8 is on. // MIME STANDARD 64 {"ksc5601", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE? 65 {"ks_c_5601-1987", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE with stupid MS software 66 67 {"koi8-r", B_KOI8R_CONVERSION}, // MIME STANDARD 68 {"windows-1251",B_MS_WINDOWS_1251_CONVERSION}, // MIME STANDARD 69 {"windows-1252",B_MS_WINDOWS_CONVERSION}, // MIME STANDARD 70 71 {"dos-437", B_MS_DOS_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM437? ) 72 {"dos-866", B_MS_DOS_866_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM866? ) 73 {"x-mac-roman", B_MAC_ROMAN_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( macintosh? + x-mac-roman? ) 74 75 {"big5", 24}, // MIME STANDARD 76 77 {"gb18030", 25}, // WRONG NAME : MIME STANDARD NAME = NONE ( GB18030? ) 78 {"gb2312", 25}, // COMPATIBLE 79 {"gbk", 25}, // COMPATIBLE 80 81 /* {"utf-16", B_UNICODE_CONVERSION}, Might not work due to NULs in text, needs testing. */ 82 {"us-ascii", B_MAIL_US_ASCII_CONVERSION}, // MIME STANDARD 83 {"utf-8", B_MAIL_UTF8_CONVERSION /* Special code for no conversion */}, // MIME STANDARD 84 85 {NULL, (uint32) -1} /* End of list marker, NULL string pointer is the key. */ 86 }; 87 88 89 status_t 90 write_read_attr(BNode& node, read_flags flag) 91 { 92 if (node.WriteAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32)) 93 < 0) 94 return B_ERROR; 95 96 #if R5_COMPATIBLE 97 // manage the status string only if it currently has a "read" status 98 BString currentStatus; 99 if (node.ReadAttrString(B_MAIL_ATTR_STATUS, ¤tStatus) == B_OK) { 100 if (currentStatus.ICompare("New") != 0 101 && currentStatus.ICompare("Read") != 0 102 && currentStatus.ICompare("Seen") != 0) 103 return B_OK; 104 } 105 106 const char* statusString = (flag == B_READ) ? "Read" 107 : (flag == B_SEEN) ? "Seen" : "New"; 108 if (node.WriteAttr(B_MAIL_ATTR_STATUS, B_STRING_TYPE, 0, statusString, 109 strlen(statusString)) < 0) 110 return B_ERROR; 111 #endif 112 return B_OK; 113 } 114 115 116 status_t 117 read_read_attr(BNode& node, read_flags& flag) 118 { 119 if (node.ReadAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32)) 120 == sizeof(int32)) 121 return B_OK; 122 123 #if R5_COMPATIBLE 124 BString statusString; 125 if (node.ReadAttrString(B_MAIL_ATTR_STATUS, &statusString) == B_OK) { 126 if (statusString.ICompare("New")) 127 flag = B_UNREAD; 128 else 129 flag = B_READ; 130 131 return B_OK; 132 } 133 #endif 134 return B_ERROR; 135 } 136 137 138 // The next couple of functions are our wrapper around convert_to_utf8 and 139 // convert_from_utf8 so that they can also convert from UTF-8 to UTF-8 by 140 // specifying the B_MAIL_UTF8_CONVERSION constant as the conversion operation. It 141 // also lets us add new conversions, like B_MAIL_US_ASCII_CONVERSION. 142 143 _EXPORT status_t mail_convert_to_utf8 ( 144 uint32 srcEncoding, 145 const char *src, 146 int32 *srcLen, 147 char *dst, 148 int32 *dstLen, 149 int32 *state, 150 char substitute) 151 { 152 int32 copyAmount; 153 char *originalDst = dst; 154 status_t returnCode = -1; 155 156 if (srcEncoding == B_MAIL_UTF8_CONVERSION) { 157 copyAmount = *srcLen; 158 if (*dstLen < copyAmount) 159 copyAmount = *dstLen; 160 memcpy (dst, src, copyAmount); 161 *srcLen = copyAmount; 162 *dstLen = copyAmount; 163 returnCode = B_OK; 164 } else if (srcEncoding == B_MAIL_US_ASCII_CONVERSION) { 165 int32 i; 166 unsigned char letter; 167 copyAmount = *srcLen; 168 if (*dstLen < copyAmount) 169 copyAmount = *dstLen; 170 for (i = 0; i < copyAmount; i++) { 171 letter = *src++; 172 if (letter > 0x80U) 173 // Invalid, could also use substitute, but better to strip high bit. 174 *dst++ = letter - 0x80U; 175 else if (letter == 0x80U) 176 // Can't convert to 0x00 since that's NUL, which would cause problems. 177 *dst++ = substitute; 178 else 179 *dst++ = letter; 180 } 181 *srcLen = copyAmount; 182 *dstLen = copyAmount; 183 returnCode = B_OK; 184 } else 185 returnCode = convert_to_utf8 (srcEncoding, src, srcLen, 186 dst, dstLen, state, substitute); 187 188 if (returnCode == B_OK) { 189 // Replace spurious NUL bytes, which should normally not be in the 190 // output of the decoding (not normal UTF-8 characters, and no NULs are 191 // in our usual input strings). They happen for some odd ISO-2022-JP 192 // byte pair combinations which are improperly handled by the BeOS 193 // routines. Like "\e$ByD\e(B" where \e is the ESC character $1B, the 194 // first ESC $ B switches to a Japanese character set, then the next 195 // two bytes "yD" specify a character, then ESC ( B switches back to 196 // the ASCII character set. The UTF-8 conversion yields a NUL byte. 197 int32 i; 198 for (i = 0; i < *dstLen; i++) 199 if (originalDst[i] == 0) 200 originalDst[i] = substitute; 201 } 202 return returnCode; 203 } 204 205 206 _EXPORT status_t mail_convert_from_utf8 ( 207 uint32 dstEncoding, 208 const char *src, 209 int32 *srcLen, 210 char *dst, 211 int32 *dstLen, 212 int32 *state, 213 char substitute) 214 { 215 int32 copyAmount; 216 status_t errorCode; 217 int32 originalDstLen = *dstLen; 218 int32 tempDstLen; 219 int32 tempSrcLen; 220 221 if (dstEncoding == B_MAIL_UTF8_CONVERSION) 222 { 223 copyAmount = *srcLen; 224 if (*dstLen < copyAmount) 225 copyAmount = *dstLen; 226 memcpy (dst, src, copyAmount); 227 *srcLen = copyAmount; 228 *dstLen = copyAmount; 229 return B_OK; 230 } 231 232 if (dstEncoding == B_MAIL_US_ASCII_CONVERSION) 233 { 234 int32 characterLength; 235 int32 dstRemaining = *dstLen; 236 unsigned char letter; 237 int32 srcRemaining = *srcLen; 238 239 // state contains the number of source bytes to skip, left over from a 240 // partial UTF-8 character split over the end of the buffer from last 241 // time. 242 if (srcRemaining <= *state) { 243 *state -= srcRemaining; 244 *dstLen = 0; 245 return B_OK; 246 } 247 srcRemaining -= *state; 248 src += *state; 249 *state = 0; 250 251 while (true) { 252 if (srcRemaining <= 0 || dstRemaining <= 0) 253 break; 254 letter = *src; 255 if (letter < 0x80) 256 characterLength = 1; // Regular ASCII equivalent code. 257 else if (letter < 0xC0) 258 characterLength = 1; // Invalid in-between data byte 10xxxxxx. 259 else if (letter < 0xE0) 260 characterLength = 2; 261 else if (letter < 0xF0) 262 characterLength = 3; 263 else if (letter < 0xF8) 264 characterLength = 4; 265 else if (letter < 0xFC) 266 characterLength = 5; 267 else if (letter < 0xFE) 268 characterLength = 6; 269 else 270 characterLength = 1; // 0xFE and 0xFF are invalid in UTF-8. 271 if (letter < 0x80) 272 *dst++ = *src; 273 else 274 *dst++ = substitute; 275 dstRemaining--; 276 if (srcRemaining < characterLength) { 277 // Character split past the end of the buffer. 278 *state = characterLength - srcRemaining; 279 srcRemaining = 0; 280 } else { 281 src += characterLength; 282 srcRemaining -= characterLength; 283 } 284 } 285 // Update with the amounts used. 286 *srcLen = *srcLen - srcRemaining; 287 *dstLen = *dstLen - dstRemaining; 288 return B_OK; 289 } 290 291 errorCode = convert_from_utf8 (dstEncoding, src, srcLen, dst, dstLen, state, substitute); 292 if (errorCode != B_OK) 293 return errorCode; 294 295 if (dstEncoding != B_JIS_CONVERSION) 296 return B_OK; 297 298 // B_JIS_CONVERSION (ISO-2022-JP) works by shifting between different 299 // character subsets. For E-mail headers (and other uses), it needs to be 300 // switched back to ASCII at the end (otherwise the last character gets 301 // lost or other weird things happen in the headers). Note that we can't 302 // just append the escape code since the convert_from_utf8 "state" will be 303 // wrong. So we append an ASCII letter and throw it away, leaving just the 304 // escape code. Well, it actually switches to the Roman character set, not 305 // ASCII, but that should be OK. 306 307 tempDstLen = originalDstLen - *dstLen; 308 if (tempDstLen < 3) // Not enough space remaining in the output. 309 return B_OK; // Sort of an error, but we did convert the rest OK. 310 tempSrcLen = 1; 311 errorCode = convert_from_utf8 (dstEncoding, "a", &tempSrcLen, 312 dst + *dstLen, &tempDstLen, state, substitute); 313 if (errorCode != B_OK) 314 return errorCode; 315 *dstLen += tempDstLen - 1 /* don't include the ASCII letter */; 316 return B_OK; 317 } 318 319 320 321 static int handle_non_rfc2047_encoding(char **buffer,size_t *bufferLength,size_t *sourceLength) 322 { 323 char *string = *buffer; 324 int32 length = *sourceLength; 325 int32 i; 326 327 // check for 8-bit characters 328 for (i = 0;i < length;i++) 329 if (string[i] & 0x80) 330 break; 331 if (i == length) 332 return false; 333 334 // check for groups of 8-bit characters - this code is not very smart; 335 // it just can detect some sort of single-byte encoded stuff, the rest 336 // is regarded as UTF-8 337 338 int32 singletons = 0,doubles = 0; 339 340 for (i = 0;i < length;i++) 341 { 342 if (string[i] & 0x80) 343 { 344 if ((string[i + 1] & 0x80) == 0) 345 singletons++; 346 else doubles++; 347 i++; 348 } 349 } 350 351 if (singletons != 0) // can't be valid UTF-8 anymore, so we assume ISO-Latin-1 352 { 353 int32 state = 0; 354 // just to be sure 355 int32 destLength = length * 4 + 1; 356 int32 destBufferLength = destLength; 357 char *dest = (char*)malloc(destLength); 358 if (dest == NULL) 359 return 0; 360 361 if (convert_to_utf8(B_ISO1_CONVERSION, string, &length,dest, 362 &destLength, &state) == B_OK) { 363 *buffer = dest; 364 *bufferLength = destBufferLength; 365 *sourceLength = destLength; 366 return true; 367 } 368 free(dest); 369 return false; 370 } 371 372 // we assume a valid UTF-8 string here, but yes, we don't check it 373 return true; 374 } 375 376 377 _EXPORT ssize_t rfc2047_to_utf8(char **bufp, size_t *bufLen, size_t strLen) 378 { 379 char *head, *tail; 380 char *charset, *encoding, *end; 381 ssize_t ret = B_OK; 382 383 if (bufp == NULL || *bufp == NULL) 384 return -1; 385 386 char *string = *bufp; 387 388 //---------Handle *&&^%*&^ non-RFC compliant, 8bit mail 389 if (handle_non_rfc2047_encoding(bufp,bufLen,&strLen)) 390 return strLen; 391 392 // set up string length 393 if (strLen == 0) 394 strLen = strlen(*bufp); 395 char lastChar = (*bufp)[strLen]; 396 (*bufp)[strLen] = '\0'; 397 398 //---------Whew! Now for RFC compliant mail 399 bool encodedWordFoundPreviously = false; 400 for (head = tail = string; 401 ((charset = strstr(tail, "=?")) != NULL) 402 && (((encoding = strchr(charset + 2, '?')) != NULL) 403 && encoding[1] && (encoding[2] == '?') && encoding[3]) 404 && (end = strstr(encoding + 3, "?=")) != NULL; 405 // found "=?...charset...?e?...text...?= (e == encoding) 406 // ^charset ^encoding ^end 407 tail = end) 408 { 409 // Copy non-encoded text (from tail up to charset) to the output. 410 // Ignore spaces between two encoded "words". RFC2047 says the words 411 // should be concatenated without the space (designed for Asian 412 // sentences which have no spaces yet need to be broken into "words" to 413 // keep within the line length limits). 414 bool nonSpaceFound = false; 415 for (int i = 0; i < charset-tail; i++) { 416 if (!isspace (tail[i])) { 417 nonSpaceFound = true; 418 break; 419 } 420 } 421 if (!encodedWordFoundPreviously || nonSpaceFound) { 422 if (string != tail && tail != charset) 423 memmove(string, tail, charset-tail); 424 string += charset-tail; 425 } 426 tail = charset; 427 encodedWordFoundPreviously = true; 428 429 // move things to point at what they should: 430 // =?...charset...?e?...text...?= (e == encoding) 431 // ^charset ^encoding ^end 432 charset += 2; 433 encoding += 1; 434 end += 2; 435 436 // find the charset this text is in now 437 size_t cLen = encoding - 1 - charset; 438 bool base64encoded = toupper(*encoding) == 'B'; 439 440 uint32 convert_id = B_MAIL_NULL_CONVERSION; 441 char charset_string[cLen+1]; 442 memcpy(charset_string, charset, cLen); 443 charset_string[cLen] = '\0'; 444 if (strcasecmp(charset_string, "us-ascii") == 0) { 445 convert_id = B_MAIL_US_ASCII_CONVERSION; 446 } else if (strcasecmp(charset_string, "utf-8") == 0) { 447 convert_id = B_MAIL_UTF8_CONVERSION; 448 } else { 449 const BCharacterSet * cs = BCharacterSetRoster::FindCharacterSetByName(charset_string); 450 if (cs != NULL) { 451 convert_id = cs->GetConversionID(); 452 } 453 } 454 if (convert_id == B_MAIL_NULL_CONVERSION) 455 { 456 // unidentified charset 457 // what to do? doing nothing skips the encoded text; 458 // but we should keep it: we copy it to the output. 459 if (string != tail && tail != end) 460 memmove(string, tail, end-tail); 461 string += end-tail; 462 continue; 463 } 464 // else we've successfully identified the charset 465 466 char *src = encoding+2; 467 int32 srcLen = end - 2 - src; 468 // encoded text: src..src+srcLen 469 470 // decode text, get decoded length (reducing xforms) 471 srcLen = !base64encoded ? decode_qp(src, src, srcLen, 1) 472 : decode_base64(src, src, srcLen); 473 474 // allocate space for the converted text 475 int32 dstLen = end-string + *bufLen-strLen; 476 char *dst = (char*)malloc(dstLen); 477 int32 cvLen = srcLen; 478 int32 convState = 0; 479 480 // 481 // do the conversion 482 // 483 ret = mail_convert_to_utf8(convert_id, src, &cvLen, dst, &dstLen, &convState); 484 if (ret != B_OK) 485 { 486 // what to do? doing nothing skips the encoded text 487 // but we should keep it: we copy it to the output. 488 489 free(dst); 490 491 if (string != tail && tail != end) 492 memmove(string, tail, end-tail); 493 string += end-tail; 494 continue; 495 } 496 /* convert_to_ is either returning something wrong or my 497 test data is screwed up. Whatever it is, Not Enough 498 Space is not the only cause of the below, so we just 499 assume it succeeds if it converts anything at all. 500 else if (cvLen < srcLen) 501 { 502 // not enough room to convert the data; 503 // grow *buf and retry 504 505 free(dst); 506 507 char *temp = (char*)realloc(*bufp, 2*(*bufLen + 1)); 508 if (temp == NULL) 509 { 510 ret = B_NO_MEMORY; 511 break; 512 } 513 514 *bufp = temp; 515 *bufLen = 2*(*bufLen + 1); 516 517 string = *bufp + (string-head); 518 tail = *bufp + (tail-head); 519 charset = *bufp + (charset-head); 520 encoding = *bufp + (encoding-head); 521 end = *bufp + (end-head); 522 src = *bufp + (src-head); 523 head = *bufp; 524 continue; 525 } 526 */ 527 else 528 { 529 if (dstLen > end-string) 530 { 531 // copy the string forward... 532 memmove(string+dstLen, end, strLen - (end-head) + 1); 533 strLen += string+dstLen - end; 534 end = string + dstLen; 535 } 536 537 memcpy(string, dst, dstLen); 538 string += dstLen; 539 free(dst); 540 continue; 541 } 542 } 543 544 // copy everything that's left 545 size_t tailLen = strLen - (tail - head); 546 memmove(string, tail, tailLen+1); 547 string += tailLen; 548 549 // replace the last char 550 (*bufp)[strLen] = lastChar; 551 552 return ret < B_OK ? ret : string-head; 553 } 554 555 556 _EXPORT ssize_t utf8_to_rfc2047 (char **bufp, ssize_t length, uint32 charset, char encoding) { 557 struct word { 558 BString originalWord; 559 BString convertedWord; 560 bool needsEncoding; 561 562 // Convert the word from UTF-8 to the desired character set. The 563 // converted version also includes the escape codes to return to ASCII 564 // mode, if relevant. Also note if it uses unprintable characters, 565 // which means it will need that special encoding treatment later. 566 void ConvertWordToCharset (uint32 charset) { 567 int32 state = 0; 568 int32 originalLength = originalWord.Length(); 569 int32 convertedLength = originalLength * 5 + 1; 570 char *convertedBuffer = convertedWord.LockBuffer (convertedLength); 571 mail_convert_from_utf8 (charset, originalWord.String(), 572 &originalLength, convertedBuffer, &convertedLength, &state); 573 for (int i = 0; i < convertedLength; i++) { 574 if ((convertedBuffer[i] & (1 << 7)) || 575 (convertedBuffer[i] >= 0 && convertedBuffer[i] < 32)) { 576 needsEncoding = true; 577 break; 578 } 579 } 580 convertedWord.UnlockBuffer (convertedLength); 581 }; 582 }; 583 struct word *currentWord; 584 BList words; 585 586 // Break the header into words. White space characters (including tabs and 587 // newlines) separate the words. Each word includes any space before it as 588 // part of the word. Actually, quotes and other special characters 589 // (",()<>@) are treated as separate words of their own so that they don't 590 // get encoded (because MIME headers get the quotes parsed before character 591 // set unconversion is done). The reader is supposed to ignore all white 592 // space between encoded words, which can be inserted so that older mail 593 // parsers don't have overly long line length problems. 594 595 const char *source = *bufp; 596 const char *bufEnd = *bufp + length; 597 const char *specialChars = "\"()<>@,"; 598 599 while (source < bufEnd) { 600 currentWord = new struct word; 601 currentWord->needsEncoding = false; 602 603 int wordEnd = 0; 604 605 // Include leading spaces as part of the word. 606 while (source + wordEnd < bufEnd && isspace (source[wordEnd])) 607 wordEnd++; 608 609 if (source + wordEnd < bufEnd && 610 strchr (specialChars, source[wordEnd]) != NULL) { 611 // Got a quote mark or other special character, which is treated as 612 // a word in itself since it shouldn't be encoded, which would hide 613 // it from the mail system. 614 wordEnd++; 615 } else { 616 // Find the end of the word. Leave wordEnd pointing just after the 617 // last character in the word. 618 while (source + wordEnd < bufEnd) { 619 if (isspace(source[wordEnd]) || 620 strchr (specialChars, source[wordEnd]) != NULL) 621 break; 622 if (wordEnd > 51 /* Makes Base64 ISO-2022-JP "word" a multiple of 4 bytes */ && 623 0xC0 == (0xC0 & (unsigned int) source[wordEnd])) { 624 // No English words are that long (46 is the longest), 625 // break up what is likely Asian text (which has no spaces) 626 // at the start of the next non-ASCII UTF-8 character (high 627 // two bits are both ones). Note that two encoded words in 628 // a row get joined together, even if there is a space 629 // between them in the final output text, according to the 630 // standard. Next word will also be conveniently get 631 // encoded due to the 0xC0 test. 632 currentWord->needsEncoding = true; 633 break; 634 } 635 wordEnd++; 636 } 637 } 638 currentWord->originalWord.SetTo (source, wordEnd); 639 currentWord->ConvertWordToCharset (charset); 640 words.AddItem(currentWord); 641 source += wordEnd; 642 } 643 644 // Combine adjacent words which contain unprintable text so that the 645 // overhead of switching back and forth between regular text and specially 646 // encoded text is reduced. However, the combined word must be shorter 647 // than the maximum of 75 bytes, including character set specification and 648 // all those delimiters (worst case 22 bytes of overhead). 649 650 struct word *run; 651 652 for (int32 i = 0; (currentWord = (struct word *) words.ItemAt (i)) != NULL; i++) { 653 if (!currentWord->needsEncoding) 654 continue; // No need to combine unencoded words. 655 for (int32 g = i+1; (run = (struct word *) words.ItemAt (g)) != NULL; g++) { 656 if (!run->needsEncoding) 657 break; // Don't want to combine encoded and unencoded words. 658 if ((currentWord->convertedWord.Length() + run->convertedWord.Length() <= 53)) { 659 currentWord->originalWord.Append (run->originalWord); 660 currentWord->ConvertWordToCharset (charset); 661 words.RemoveItem(g); 662 delete run; 663 g--; 664 } else // Can't merge this word, result would be too long. 665 break; 666 } 667 } 668 669 // Combine the encoded and unencoded words into one line, doing the 670 // quoted-printable or base64 encoding. Insert an extra space between 671 // words which are both encoded to make word wrapping easier, since there 672 // is normally none, and you're allowed to insert space (the receiver 673 // throws it away if it is between encoded words). 674 675 BString rfc2047; 676 bool previousWordNeededEncoding = false; 677 678 const char *charset_dec = "none-bug"; 679 for (int32 i = 0; mail_charsets[i].charset != NULL; i++) { 680 if (mail_charsets[i].flavor == charset) { 681 charset_dec = mail_charsets[i].charset; 682 break; 683 } 684 } 685 686 while ((currentWord = (struct word *)words.RemoveItem(0L)) != NULL) { 687 if ((encoding != quoted_printable && encoding != base64) || 688 !currentWord->needsEncoding) { 689 rfc2047.Append (currentWord->convertedWord); 690 } else { 691 // This word needs encoding. Try to insert a space between it and 692 // the previous word. 693 if (previousWordNeededEncoding) 694 rfc2047 << ' '; // Can insert as many spaces as you want between encoded words. 695 else { 696 // Previous word is not encoded, spaces are significant. Try 697 // to move a space from the start of this word to be outside of 698 // the encoded text, so that there is a bit of space between 699 // this word and the previous one to enhance word wrapping 700 // chances later on. 701 if (currentWord->originalWord.Length() > 1 && 702 isspace (currentWord->originalWord[0])) { 703 rfc2047 << currentWord->originalWord[0]; 704 currentWord->originalWord.Remove (0 /* offset */, 1 /* length */); 705 currentWord->ConvertWordToCharset (charset); 706 } 707 } 708 709 char *encoded = NULL; 710 ssize_t encoded_len = 0; 711 int32 convertedLength = currentWord->convertedWord.Length (); 712 const char *convertedBuffer = currentWord->convertedWord.String (); 713 714 switch (encoding) { 715 case quoted_printable: 716 encoded = (char *) malloc (convertedLength * 3); 717 encoded_len = encode_qp (encoded, convertedBuffer, convertedLength, true /* headerMode */); 718 break; 719 case base64: 720 encoded = (char *) malloc (convertedLength * 2); 721 encoded_len = encode_base64 (encoded, convertedBuffer, convertedLength, true /* headerMode */); 722 break; 723 default: // Unknown encoding type, shouldn't happen. 724 encoded = (char *) convertedBuffer; 725 encoded_len = convertedLength; 726 break; 727 } 728 729 rfc2047 << "=?" << charset_dec << '?' << encoding << '?'; 730 rfc2047.Append (encoded, encoded_len); 731 rfc2047 << "?="; 732 733 if (encoding == quoted_printable || encoding == base64) 734 free(encoded); 735 } 736 previousWordNeededEncoding = currentWord->needsEncoding; 737 delete currentWord; 738 } 739 740 free(*bufp); 741 742 ssize_t finalLength = rfc2047.Length (); 743 *bufp = (char *) (malloc (finalLength + 1)); 744 memcpy (*bufp, rfc2047.String(), finalLength); 745 (*bufp)[finalLength] = 0; 746 747 return finalLength; 748 } 749 750 751 //==================================================================== 752 753 void FoldLineAtWhiteSpaceAndAddCRLF (BString &string) 754 { 755 int inputLength = string.Length(); 756 int lineStartIndex; 757 const int maxLineLength = 78; // Doesn't include CRLF. 758 BString output; 759 int splitIndex; 760 int tempIndex; 761 762 lineStartIndex = 0; 763 while (true) { 764 // If we don't need to wrap the text, just output the remainder, if any. 765 766 if (lineStartIndex + maxLineLength >= inputLength) { 767 if (lineStartIndex < inputLength) { 768 output.Insert (string, lineStartIndex /* source offset */, 769 inputLength - lineStartIndex /* count */, 770 output.Length() /* insert at */); 771 output.Append (CRLF); 772 } 773 break; 774 } 775 776 // Look ahead for a convenient spot to split it, between a comma and 777 // space, which you often see between e-mail addresses like this: 778 // "Joe Who" joe@dot.com, "Someone Else" else@blot.com 779 780 tempIndex = lineStartIndex + maxLineLength; 781 if (tempIndex > inputLength) 782 tempIndex = inputLength; 783 splitIndex = string.FindLast (", ", tempIndex); 784 if (splitIndex >= lineStartIndex) 785 splitIndex++; // Point to the space character. 786 787 // If none of those exist, try splitting at any white space. 788 789 if (splitIndex <= lineStartIndex) 790 splitIndex = string.FindLast (" ", tempIndex); 791 if (splitIndex <= lineStartIndex) 792 splitIndex = string.FindLast ("\t", tempIndex); 793 794 // If none of those exist, allow for a longer word - split at the next 795 // available white space. 796 797 if (splitIndex <= lineStartIndex) 798 splitIndex = string.FindFirst (" ", lineStartIndex + 1); 799 if (splitIndex <= lineStartIndex) 800 splitIndex = string.FindFirst ("\t", lineStartIndex + 1); 801 802 // Give up, the whole rest of the line can't be split, just dump it 803 // out. 804 805 if (splitIndex <= lineStartIndex) { 806 if (lineStartIndex < inputLength) { 807 output.Insert (string, lineStartIndex /* source offset */, 808 inputLength - lineStartIndex /* count */, 809 output.Length() /* insert at */); 810 output.Append (CRLF); 811 } 812 break; 813 } 814 815 // Do the split. The current line up to but not including the space 816 // gets output, followed by a CRLF. The space remains to become the 817 // start of the next line (and that tells the message reader that it is 818 // a continuation line). 819 820 output.Insert (string, lineStartIndex /* source offset */, 821 splitIndex - lineStartIndex /* count */, 822 output.Length() /* insert at */); 823 output.Append (CRLF); 824 lineStartIndex = splitIndex; 825 } 826 string.SetTo (output); 827 } 828 829 830 //==================================================================== 831 832 _EXPORT ssize_t readfoldedline(FILE *file, char **buffer, size_t *buflen) 833 { 834 ssize_t len = buflen && *buflen ? *buflen : 0; 835 char * buf = buffer && *buffer ? *buffer : NULL; 836 ssize_t cnt = 0; // Number of characters currently in the buffer. 837 int c; 838 839 while (true) 840 { 841 // Make sure there is space in the buffer for two more characters (one 842 // for the next character, and one for the end of string NUL byte). 843 if (buf == NULL || cnt + 2 >= len) 844 { 845 char *temp = (char *)realloc(buf, len + 64); 846 if (temp == NULL) { 847 // Out of memory, however existing buffer remains allocated. 848 cnt = ENOMEM; 849 break; 850 } 851 len += 64; 852 buf = temp; 853 } 854 855 // Read the next character, or end of file, or IO error. 856 if ((c = fgetc(file)) == EOF) { 857 if (ferror (file)) { 858 cnt = errno; 859 if (cnt >= 0) 860 cnt = -1; // Error codes must be negative. 861 } else { 862 // Really is end of file. Also make it end of line if there is 863 // some text already read in. If the first thing read was EOF, 864 // just return an empty string. 865 if (cnt > 0) { 866 buf[cnt++] = '\n'; 867 if (buf[cnt-2] == '\r') { 868 buf[cnt-2] = '\n'; 869 --cnt; 870 } 871 } 872 } 873 break; 874 } 875 876 buf[cnt++] = c; 877 878 if (c == '\n') { 879 // Convert CRLF end of line to just a LF. Do it before folding, in 880 // case we don't need to fold. 881 if (cnt >= 2 && buf[cnt-2] == '\r') { 882 buf[cnt-2] = '\n'; 883 --cnt; 884 } 885 // If the current line is empty then return it (so that empty lines 886 // don't disappear if the next line starts with a space). 887 if (cnt <= 1) 888 break; 889 // Fold if first character on the next line is whitespace. 890 c = fgetc(file); // Note it's OK to read EOF and ungetc it too. 891 if (c == ' ' || c == '\t') 892 buf[cnt-1] = c; // Replace \n with the white space character. 893 else { 894 // Not folding, we finished reading a line; break out of the loop 895 ungetc(c,file); 896 break; 897 } 898 } 899 } 900 901 902 if (buf != NULL && cnt >= 0) 903 buf[cnt] = '\0'; 904 905 if (buffer) 906 *buffer = buf; 907 else if (buf) 908 free(buf); 909 910 if (buflen) 911 *buflen = len; 912 913 return cnt; 914 } 915 916 917 //==================================================================== 918 919 _EXPORT ssize_t readfoldedline(BPositionIO &in, char **buffer, size_t *buflen) 920 { 921 ssize_t len = buflen && *buflen ? *buflen : 0; 922 char * buf = buffer && *buffer ? *buffer : NULL; 923 ssize_t cnt = 0; // Number of characters currently in the buffer. 924 char c; 925 status_t errorCode; 926 927 while (true) 928 { 929 // Make sure there is space in the buffer for two more characters (one 930 // for the next character, and one for the end of string NUL byte). 931 if (buf == NULL || cnt + 2 >= len) 932 { 933 char *temp = (char *)realloc(buf, len + 64); 934 if (temp == NULL) { 935 // Out of memory, however existing buffer remains allocated. 936 cnt = ENOMEM; 937 break; 938 } 939 len += 64; 940 buf = temp; 941 } 942 943 errorCode = in.Read (&c,1); // A really slow way of reading - unbuffered. 944 if (errorCode != 1) { 945 if (errorCode < 0) { 946 cnt = errorCode; // IO error encountered, just return the code. 947 } else { 948 // Really is end of file. Also make it end of line if there is 949 // some text already read in. If the first thing read was EOF, 950 // just return an empty string. 951 if (cnt > 0) { 952 buf[cnt++] = '\n'; 953 if (buf[cnt-2] == '\r') { 954 buf[cnt-2] = '\n'; 955 --cnt; 956 } 957 } 958 } 959 break; 960 } 961 962 buf[cnt++] = c; 963 964 if (c == '\n') { 965 // Convert CRLF end of line to just a LF. Do it before folding, in 966 // case we don't need to fold. 967 if (cnt >= 2 && buf[cnt-2] == '\r') { 968 buf[cnt-2] = '\n'; 969 --cnt; 970 } 971 // If the current line is empty then return it (so that empty lines 972 // don't disappear if the next line starts with a space). 973 if (cnt <= 1) 974 break; 975 // if first character on the next line is whitespace, fold lines 976 errorCode = in.Read(&c,1); 977 if (errorCode == 1) { 978 if (c == ' ' || c == '\t') 979 buf[cnt-1] = c; // Replace \n with the white space character. 980 else { 981 // Not folding, we finished reading a whole line. 982 in.Seek(-1,SEEK_CUR); // Undo the look-ahead character read. 983 break; 984 } 985 } else if (errorCode < 0) { 986 cnt = errorCode; 987 break; 988 } else // No next line; at the end of the file. Return the line. 989 break; 990 } 991 } 992 993 if (buf != NULL && cnt >= 0) 994 buf[cnt] = '\0'; 995 996 if (buffer) 997 *buffer = buf; 998 else if (buf) 999 free(buf); 1000 1001 if (buflen) 1002 *buflen = len; 1003 1004 return cnt; 1005 } 1006 1007 1008 _EXPORT ssize_t 1009 nextfoldedline(const char** header, char **buffer, size_t *buflen) 1010 { 1011 ssize_t len = buflen && *buflen ? *buflen : 0; 1012 char * buf = buffer && *buffer ? *buffer : NULL; 1013 ssize_t cnt = 0; // Number of characters currently in the buffer. 1014 char c; 1015 1016 while (true) 1017 { 1018 // Make sure there is space in the buffer for two more characters (one 1019 // for the next character, and one for the end of string NUL byte). 1020 if (buf == NULL || cnt + 2 >= len) 1021 { 1022 char *temp = (char *)realloc(buf, len + 64); 1023 if (temp == NULL) { 1024 // Out of memory, however existing buffer remains allocated. 1025 cnt = ENOMEM; 1026 break; 1027 } 1028 len += 64; 1029 buf = temp; 1030 } 1031 1032 // Read the next character, or end of file. 1033 if ((c = *(*header)++) == 0) { 1034 // End of file. Also make it end of line if there is some text 1035 // already read in. If the first thing read was EOF, just return 1036 // an empty string. 1037 if (cnt > 0) { 1038 buf[cnt++] = '\n'; 1039 if (buf[cnt-2] == '\r') { 1040 buf[cnt-2] = '\n'; 1041 --cnt; 1042 } 1043 } 1044 break; 1045 } 1046 1047 buf[cnt++] = c; 1048 1049 if (c == '\n') { 1050 // Convert CRLF end of line to just a LF. Do it before folding, in 1051 // case we don't need to fold. 1052 if (cnt >= 2 && buf[cnt-2] == '\r') { 1053 buf[cnt-2] = '\n'; 1054 --cnt; 1055 } 1056 // If the current line is empty then return it (so that empty lines 1057 // don't disappear if the next line starts with a space). 1058 if (cnt <= 1) 1059 break; 1060 // if first character on the next line is whitespace, fold lines 1061 c = *(*header)++; 1062 if (c == ' ' || c == '\t') 1063 buf[cnt-1] = c; // Replace \n with the white space character. 1064 else { 1065 // Not folding, we finished reading a line; break out of the loop 1066 (*header)--; // Undo read of the non-whitespace. 1067 break; 1068 } 1069 } 1070 } 1071 1072 1073 if (buf != NULL && cnt >= 0) 1074 buf[cnt] = '\0'; 1075 1076 if (buffer) 1077 *buffer = buf; 1078 else if (buf) 1079 free(buf); 1080 1081 if (buflen) 1082 *buflen = len; 1083 1084 return cnt; 1085 } 1086 1087 1088 _EXPORT void 1089 trim_white_space(BString &string) 1090 { 1091 int32 i; 1092 int32 length = string.Length(); 1093 char *buffer = string.LockBuffer(length + 1); 1094 1095 while (length > 0 && isspace(buffer[length - 1])) 1096 length--; 1097 buffer[length] = '\0'; 1098 1099 for (i = 0; buffer[i] && isspace(buffer[i]); i++) {} 1100 if (i != 0) { 1101 length -= i; 1102 memmove(buffer,buffer + i,length + 1); 1103 } 1104 string.UnlockBuffer(length); 1105 } 1106 1107 1108 /** Tries to return a human-readable name from the specified 1109 * header parameter (should be from "To:" or "From:"). 1110 * Tries to return the name rather than the eMail address. 1111 */ 1112 1113 _EXPORT void 1114 extract_address_name(BString &header) 1115 { 1116 BString name; 1117 const char *start = header.String(); 1118 const char *stop = start + strlen (start); 1119 1120 // Find a string S in the header (email foo) that matches: 1121 // Old style name in brackets: foo@bar.com (S) 1122 // New style quotes: "S" <foo@bar.com> 1123 // New style no quotes if nothing else found: S <foo@bar.com> 1124 // If nothing else found then use the whole thing: S 1125 1126 for (int i = 0; i <= 3; i++) { 1127 // Set p1 to the first letter in the name and p2 to just past the last 1128 // letter in the name. p2 stays NULL if a name wasn't found in this 1129 // pass. 1130 const char *p1 = NULL, *p2 = NULL; 1131 1132 switch (i) { 1133 case 0: // foo@bar.com (S) 1134 if ((p1 = strchr(start,'(')) != NULL) { 1135 p1++; // Advance to first letter in the name. 1136 size_t nest = 1; // Handle nested brackets. 1137 for (p2 = p1; p2 < stop; ++p2) 1138 { 1139 if (*p2 == ')') 1140 --nest; 1141 else if (*p2 == '(') 1142 ++nest; 1143 if (nest <= 0) 1144 break; 1145 } 1146 if (nest != 0) 1147 p2 = NULL; // False alarm, no terminating bracket. 1148 } 1149 break; 1150 case 1: // "S" <foo@bar.com> 1151 if ((p1 = strchr(start, '\"')) != NULL) 1152 p2 = strchr(++p1, '\"'); 1153 break; 1154 case 2: // S <foo@bar.com> 1155 p1 = start; 1156 if (name.Length() == 0) 1157 p2 = strchr(start, '<'); 1158 break; 1159 case 3: // S 1160 p1 = start; 1161 if (name.Length() == 0) 1162 p2 = stop; 1163 break; 1164 } 1165 1166 // Remove leading and trailing space-like characters and save the 1167 // result if it is longer than any other likely names found. 1168 if (p2 != NULL) { 1169 while (p1 < p2 && (isspace (*p1))) 1170 ++p1; 1171 1172 while (p1 < p2 && (isspace (p2[-1]))) 1173 --p2; 1174 1175 int newLength = p2 - p1; 1176 if (name.Length() < newLength) 1177 name.SetTo(p1, newLength); 1178 } 1179 } 1180 1181 int32 lessIndex = name.FindFirst('<'); 1182 int32 greaterIndex = name.FindLast('>'); 1183 1184 if (lessIndex == 0) { 1185 // Have an address of the form <address> and nothing else, so remove 1186 // the greater and less than signs, if any. 1187 if (greaterIndex > 0) 1188 name.Remove(greaterIndex, 1); 1189 name.Remove(lessIndex, 1); 1190 } else if (lessIndex > 0 && lessIndex < greaterIndex) { 1191 // Yahoo stupidly inserts the e-mail address into the name string, so 1192 // this bit of code fixes: "Joe <joe@yahoo.com>" <joe@yahoo.com> 1193 name.Remove(lessIndex, greaterIndex - lessIndex + 1); 1194 } 1195 1196 trim_white_space(name); 1197 header = name; 1198 } 1199 1200 1201 1202 // Given a subject in a BString, remove the extraneous RE: re: and other stuff 1203 // to get down to the core subject string, which should be identical for all 1204 // messages posted about a topic. The input string is modified in place to 1205 // become the output core subject string. 1206 1207 static int32 gLocker = 0; 1208 static size_t gNsub = 1; 1209 static re_pattern_buffer gRe; 1210 static re_pattern_buffer *gRebuf = NULL; 1211 static unsigned char gTranslation[256]; 1212 1213 _EXPORT void SubjectToThread (BString &string) 1214 { 1215 // a regex that matches a non-ASCII UTF8 character: 1216 #define U8C \ 1217 "[\302-\337][\200-\277]" \ 1218 "|\340[\302-\337][\200-\277]" \ 1219 "|[\341-\357][\200-\277][\200-\277]" \ 1220 "|\360[\220-\277][\200-\277][\200-\277]" \ 1221 "|[\361-\367][\200-\277][\200-\277][\200-\277]" \ 1222 "|\370[\210-\277][\200-\277][\200-\277][\200-\277]" \ 1223 "|[\371-\373][\200-\277][\200-\277][\200-\277][\200-\277]" \ 1224 "|\374[\204-\277][\200-\277][\200-\277][\200-\277][\200-\277]" \ 1225 "|\375[\200-\277][\200-\277][\200-\277][\200-\277][\200-\277]" 1226 1227 #define PATTERN \ 1228 "^ +" \ 1229 "|^(\\[[^]]*\\])(\\<| +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \ 1230 "|^( +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \ 1231 "| *\\(fwd\\) *$" 1232 1233 if (gRebuf == NULL && atomic_add(&gLocker,1) == 0) 1234 { 1235 // the idea is to compile the regexp once to speed up testing 1236 1237 for (int i=0; i<256; ++i) gTranslation[i]=i; 1238 for (int i='a'; i<='z'; ++i) gTranslation[i]=toupper(i); 1239 1240 gRe.translate = gTranslation; 1241 gRe.regs_allocated = REGS_FIXED; 1242 re_syntax_options = RE_SYNTAX_POSIX_EXTENDED; 1243 1244 const char *pattern = PATTERN; 1245 // count subexpressions in PATTERN 1246 for (unsigned int i=0; pattern[i] != 0; ++i) 1247 { 1248 if (pattern[i] == '\\') 1249 ++i; 1250 else if (pattern[i] == '(') 1251 ++gNsub; 1252 } 1253 1254 const char *err = re_compile_pattern(pattern,strlen(pattern),&gRe); 1255 if (err == NULL) 1256 gRebuf = &gRe; 1257 else 1258 fprintf(stderr, "Failed to compile the regex: %s\n", err); 1259 } 1260 else 1261 { 1262 int32 tries = 200; 1263 while (gRebuf == NULL && tries-- > 0) 1264 snooze(10000); 1265 } 1266 1267 if (gRebuf) 1268 { 1269 struct re_registers regs; 1270 // can't be static if this function is to be thread-safe 1271 1272 regs.num_regs = gNsub; 1273 regs.start = (regoff_t*)malloc(gNsub*sizeof(regoff_t)); 1274 regs.end = (regoff_t*)malloc(gNsub*sizeof(regoff_t)); 1275 1276 for (int start=0; 1277 (start=re_search(gRebuf, string.String(), string.Length(), 1278 0, string.Length(), ®s)) >= 0; 1279 ) 1280 { 1281 // 1282 // we found something 1283 // 1284 1285 // don't delete [bemaildaemon]... 1286 if (start == regs.start[1]) 1287 start = regs.start[2]; 1288 1289 string.Remove(start,regs.end[0]-start); 1290 if (start) string.Insert(' ',1,start); 1291 } 1292 1293 free(regs.start); 1294 free(regs.end); 1295 } 1296 1297 // Finally remove leading and trailing space. Some software, like 1298 // tm-edit 1.8, appends a space to the subject, which would break 1299 // threading if we left it in. 1300 trim_white_space(string); 1301 } 1302 1303 1304 1305 // Converts a date to a time. Handles numeric time zones too, unlike 1306 // parsedate. Returns -1 if it fails. 1307 1308 _EXPORT time_t ParseDateWithTimeZone (const char *DateString) 1309 { 1310 time_t currentTime; 1311 time_t dateAsTime; 1312 char tempDateString [80]; 1313 char tempZoneString [6]; 1314 time_t zoneDeltaTime; 1315 int zoneIndex; 1316 char *zonePntr; 1317 1318 // See if we can remove the time zone portion. parsedate understands time 1319 // zone 3 letter names, but doesn't understand the numeric +9999 time zone 1320 // format. To do: see if a newer parsedate exists. 1321 1322 strncpy (tempDateString, DateString, sizeof (tempDateString)); 1323 tempDateString[sizeof (tempDateString) - 1] = 0; 1324 1325 // Remove trailing spaces. 1326 zonePntr = tempDateString + strlen (tempDateString) - 1; 1327 while (zonePntr >= tempDateString && isspace (*zonePntr)) 1328 *zonePntr-- = 0; 1329 if (zonePntr < tempDateString) 1330 return -1; // Empty string. 1331 1332 // Remove the trailing time zone in round brackets, like in 1333 // Fri, 22 Feb 2002 15:22:42 EST (-0500) 1334 // Thu, 25 Apr 1996 11:44:19 -0400 (EDT) 1335 if (tempDateString[strlen(tempDateString)-1] == ')') 1336 { 1337 zonePntr = strrchr (tempDateString, '('); 1338 if (zonePntr != NULL) 1339 { 1340 *zonePntr-- = 0; // Zap the '(', then remove trailing spaces. 1341 while (zonePntr >= tempDateString && isspace (*zonePntr)) 1342 *zonePntr-- = 0; 1343 if (zonePntr < tempDateString) 1344 return -1; // Empty string. 1345 } 1346 } 1347 1348 // Look for a numeric time zone like Tue, 30 Dec 2003 05:01:40 +0000 1349 for (zoneIndex = strlen (tempDateString); zoneIndex >= 0; zoneIndex--) 1350 { 1351 zonePntr = tempDateString + zoneIndex; 1352 if (zonePntr[0] == '+' || zonePntr[0] == '-') 1353 { 1354 if (zonePntr[1] >= '0' && zonePntr[1] <= '9' && 1355 zonePntr[2] >= '0' && zonePntr[2] <= '9' && 1356 zonePntr[3] >= '0' && zonePntr[3] <= '9' && 1357 zonePntr[4] >= '0' && zonePntr[4] <= '9') 1358 break; 1359 } 1360 } 1361 if (zoneIndex >= 0) 1362 { 1363 // Remove the zone from the date string and any following time zone 1364 // letter codes. Also put in GMT so that the date gets parsed as GMT. 1365 memcpy (tempZoneString, zonePntr, 5); 1366 tempZoneString [5] = 0; 1367 strcpy (zonePntr, "GMT"); 1368 } 1369 else // No numeric time zone found. 1370 strcpy (tempZoneString, "+0000"); 1371 1372 time (¤tTime); 1373 dateAsTime = parsedate (tempDateString, currentTime); 1374 if (dateAsTime == (time_t) -1) 1375 return -1; // Failure. 1376 1377 zoneDeltaTime = 60 * atol (tempZoneString + 3); // Get the last two digits - minutes. 1378 tempZoneString[3] = 0; 1379 zoneDeltaTime += atol (tempZoneString + 1) * 60 * 60; // Get the first two digits - hours. 1380 if (tempZoneString[0] == '+') 1381 zoneDeltaTime = 0 - zoneDeltaTime; 1382 dateAsTime += zoneDeltaTime; 1383 1384 return dateAsTime; 1385 } 1386 1387 1388 /** Parses a mail header and fills the headers BMessage 1389 */ 1390 1391 _EXPORT status_t 1392 parse_header(BMessage &headers, BPositionIO &input) 1393 { 1394 char *buffer = NULL; 1395 size_t bufferSize = 0; 1396 int32 length; 1397 1398 while ((length = readfoldedline(input, &buffer, &bufferSize)) >= 2) { 1399 --length; 1400 // Don't include the \n at the end of the buffer. 1401 1402 // convert to UTF-8 and null-terminate the buffer 1403 length = rfc2047_to_utf8(&buffer, &bufferSize, length); 1404 buffer[length] = '\0'; 1405 1406 const char *delimiter = strstr(buffer, ":"); 1407 if (delimiter == NULL) 1408 continue; 1409 1410 BString header(buffer, delimiter - buffer); 1411 header.CapitalizeEachWord(); 1412 // unified case for later fetch 1413 1414 delimiter++; // Skip the colon. 1415 while (isspace (*delimiter)) 1416 delimiter++; // Skip over leading white space and tabs. To do: (comments in brackets). 1417 1418 // ToDo: implement joining of multiple header tags (i.e. multiple "Cc:"s) 1419 headers.AddString(header.String(), delimiter); 1420 } 1421 free(buffer); 1422 1423 return B_OK; 1424 } 1425 1426 1427 _EXPORT status_t 1428 extract_from_header(const BString& header, const BString& field, 1429 BString& target) 1430 { 1431 int32 headerLength = header.Length(); 1432 int32 fieldEndPos = 0; 1433 while (true) { 1434 int32 pos = header.IFindFirst(field, fieldEndPos); 1435 if (pos < 0) 1436 return B_BAD_VALUE; 1437 fieldEndPos = pos + field.Length(); 1438 1439 if (pos != 0 && header.ByteAt(pos - 1) != '\n') 1440 continue; 1441 if (header.ByteAt(fieldEndPos) == ':') 1442 break; 1443 } 1444 fieldEndPos++; 1445 1446 int32 crPos = fieldEndPos; 1447 while (true) { 1448 fieldEndPos = crPos; 1449 crPos = header.FindFirst('\n', crPos); 1450 if (crPos < 0) 1451 crPos = headerLength; 1452 BString temp; 1453 header.CopyInto(temp, fieldEndPos, crPos - fieldEndPos); 1454 if (header.ByteAt(crPos - 1) == '\r') { 1455 temp.Truncate(temp.Length() - 1); 1456 temp += " "; 1457 } 1458 target += temp; 1459 crPos++; 1460 if (crPos >= headerLength) 1461 break; 1462 char nextByte = header.ByteAt(crPos); 1463 if (nextByte != ' ' && nextByte != '\t') 1464 break; 1465 crPos++; 1466 } 1467 1468 size_t bufferSize = target.Length(); 1469 char* buffer = target.LockBuffer(bufferSize); 1470 size_t length = rfc2047_to_utf8(&buffer, &bufferSize, bufferSize); 1471 target.UnlockBuffer(length); 1472 1473 return B_OK; 1474 } 1475 1476 1477 _EXPORT void 1478 extract_address(BString &address) 1479 { 1480 const char *string = address.String(); 1481 int32 first; 1482 1483 // first, remove all quoted text 1484 1485 if ((first = address.FindFirst('"')) >= 0) { 1486 int32 last = first + 1; 1487 while (string[last] && string[last] != '"') 1488 last++; 1489 1490 if (string[last] == '"') 1491 address.Remove(first, last + 1 - first); 1492 } 1493 1494 // try to extract the address now 1495 1496 if ((first = address.FindFirst('<')) >= 0) { 1497 // the world likes us and we can just get the address the easy way... 1498 int32 last = address.FindFirst('>'); 1499 if (last >= 0) { 1500 address.Truncate(last); 1501 address.Remove(0, first + 1); 1502 1503 return; 1504 } 1505 } 1506 1507 // then, see if there is anything in parenthesis to throw away 1508 1509 if ((first = address.FindFirst('(')) >= 0) { 1510 int32 last = first + 1; 1511 while (string[last] && string[last] != ')') 1512 last++; 1513 1514 if (string[last] == ')') 1515 address.Remove(first, last + 1 - first); 1516 } 1517 1518 // now, there shouldn't be much else left 1519 1520 trim_white_space(address); 1521 } 1522 1523 1524 _EXPORT void 1525 get_address_list(BList &list, const char *string, void (*cleanupFunc)(BString &)) 1526 { 1527 if (string == NULL || !string[0]) 1528 return; 1529 1530 const char *start = string; 1531 1532 while (true) { 1533 if (string[0] == '"') { 1534 const char *quoteEnd = ++string; 1535 1536 while (quoteEnd[0] && quoteEnd[0] != '"') 1537 quoteEnd++; 1538 1539 if (!quoteEnd[0]) // string exceeds line! 1540 quoteEnd = string; 1541 1542 string = quoteEnd + 1; 1543 } 1544 1545 if (string[0] == ',' || string[0] == '\0') { 1546 BString address(start, string - start); 1547 trim_white_space(address); 1548 1549 if (cleanupFunc) 1550 cleanupFunc(address); 1551 1552 list.AddItem(strdup(address.String())); 1553 1554 start = string + 1; 1555 } 1556 1557 if (!string[0]) 1558 break; 1559 1560 string++; 1561 } 1562 } 1563 1564