1 /* mail util - header parsing 2 ** 3 ** Copyright 2001-2003 Dr. Zoidberg Enterprises. All rights reserved. 4 */ 5 6 7 #include <UTF8.h> 8 #include <Message.h> 9 #include <String.h> 10 #include <Locker.h> 11 #include <DataIO.h> 12 #include <List.h> 13 14 #include <stdlib.h> 15 #include <string.h> 16 #include <stdio.h> 17 #include <regex.h> 18 #include <ctype.h> 19 #include <errno.h> 20 #include <parsedate.h> 21 22 #include <mail_encoding.h> 23 24 #include <mail_util.h> 25 26 #include <CharacterSet.h> 27 #include <CharacterSetRoster.h> 28 29 using namespace BPrivate; 30 31 #define CRLF "\r\n" 32 33 struct CharsetConversionEntry 34 { 35 const char *charset; 36 uint32 flavor; 37 }; 38 39 extern const CharsetConversionEntry mail_charsets [] = 40 { 41 // In order of authority, so when searching for the name for a particular 42 // numbered conversion, start at the beginning of the array. 43 {"iso-8859-1", B_ISO1_CONVERSION}, // MIME STANDARD 44 {"iso-8859-2", B_ISO2_CONVERSION}, // MIME STANDARD 45 {"iso-8859-3", B_ISO3_CONVERSION}, // MIME STANDARD 46 {"iso-8859-4", B_ISO4_CONVERSION}, // MIME STANDARD 47 {"iso-8859-5", B_ISO5_CONVERSION}, // MIME STANDARD 48 {"iso-8859-6", B_ISO6_CONVERSION}, // MIME STANDARD 49 {"iso-8859-7", B_ISO7_CONVERSION}, // MIME STANDARD 50 {"iso-8859-8", B_ISO8_CONVERSION}, // MIME STANDARD 51 {"iso-8859-9", B_ISO9_CONVERSION}, // MIME STANDARD 52 {"iso-8859-10", B_ISO10_CONVERSION}, // MIME STANDARD 53 {"iso-8859-13", B_ISO13_CONVERSION}, // MIME STANDARD 54 {"iso-8859-14", B_ISO14_CONVERSION}, // MIME STANDARD 55 {"iso-8859-15", B_ISO15_CONVERSION}, // MIME STANDARD 56 57 {"shift_jis", B_SJIS_CONVERSION}, // MIME STANDARD 58 {"shift-jis", B_SJIS_CONVERSION}, 59 {"iso-2022-jp", B_JIS_CONVERSION}, // MIME STANDARD 60 {"euc-jp", B_EUC_CONVERSION}, // MIME STANDARD 61 62 {"euc-kr", B_EUC_KR_CONVERSION}, // Shift encoding 7 bit and KSC-5601 if bit 8 is on. // MIME STANDARD 63 {"ksc5601", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE? 64 {"ks_c_5601-1987", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE with stupid MS software 65 66 {"koi8-r", B_KOI8R_CONVERSION}, // MIME STANDARD 67 {"windows-1251",B_MS_WINDOWS_1251_CONVERSION}, // MIME STANDARD 68 {"windows-1252",B_MS_WINDOWS_CONVERSION}, // MIME STANDARD 69 70 {"dos-437", B_MS_DOS_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM437? ) 71 {"dos-866", B_MS_DOS_866_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM866? ) 72 {"x-mac-roman", B_MAC_ROMAN_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( macintosh? + x-mac-roman? ) 73 74 {"big5", 24}, // MIME STANDARD 75 76 {"gb18030", 25}, // WRONG NAME : MIME STANDARD NAME = NONE ( GB18030? ) 77 {"gb2312", 25}, // COMPATIBLE 78 {"gbk", 25}, // COMPATIBLE 79 80 /* {"utf-16", B_UNICODE_CONVERSION}, Might not work due to NULs in text, needs testing. */ 81 {"us-ascii", B_MAIL_US_ASCII_CONVERSION}, // MIME STANDARD 82 {"utf-8", B_MAIL_UTF8_CONVERSION /* Special code for no conversion */}, // MIME STANDARD 83 84 {NULL, (uint32) -1} /* End of list marker, NULL string pointer is the key. */ 85 }; 86 87 88 // The next couple of functions are our wrapper around convert_to_utf8 and 89 // convert_from_utf8 so that they can also convert from UTF-8 to UTF-8 by 90 // specifying the B_MAIL_UTF8_CONVERSION constant as the conversion operation. It 91 // also lets us add new conversions, like B_MAIL_US_ASCII_CONVERSION. 92 93 _EXPORT status_t mail_convert_to_utf8 ( 94 uint32 srcEncoding, 95 const char *src, 96 int32 *srcLen, 97 char *dst, 98 int32 *dstLen, 99 int32 *state, 100 char substitute) 101 { 102 int32 copyAmount; 103 char *originalDst = dst; 104 status_t returnCode = -1; 105 106 if (srcEncoding == B_MAIL_UTF8_CONVERSION) { 107 copyAmount = *srcLen; 108 if (*dstLen < copyAmount) 109 copyAmount = *dstLen; 110 memcpy (dst, src, copyAmount); 111 *srcLen = copyAmount; 112 *dstLen = copyAmount; 113 returnCode = B_OK; 114 } else if (srcEncoding == B_MAIL_US_ASCII_CONVERSION) { 115 int32 i; 116 unsigned char letter; 117 copyAmount = *srcLen; 118 if (*dstLen < copyAmount) 119 copyAmount = *dstLen; 120 for (i = 0; i < copyAmount; i++) { 121 letter = *src++; 122 if (letter > 0x80U) 123 // Invalid, could also use substitute, but better to strip high bit. 124 *dst++ = letter - 0x80U; 125 else if (letter == 0x80U) 126 // Can't convert to 0x00 since that's NUL, which would cause problems. 127 *dst++ = substitute; 128 else 129 *dst++ = letter; 130 } 131 *srcLen = copyAmount; 132 *dstLen = copyAmount; 133 returnCode = B_OK; 134 } else 135 returnCode = convert_to_utf8 (srcEncoding, src, srcLen, 136 dst, dstLen, state, substitute); 137 138 if (returnCode == B_OK) { 139 // Replace spurious NUL bytes, which should normally not be in the 140 // output of the decoding (not normal UTF-8 characters, and no NULs are 141 // in our usual input strings). They happen for some odd ISO-2022-JP 142 // byte pair combinations which are improperly handled by the BeOS 143 // routines. Like "\e$ByD\e(B" where \e is the ESC character $1B, the 144 // first ESC $ B switches to a Japanese character set, then the next 145 // two bytes "yD" specify a character, then ESC ( B switches back to 146 // the ASCII character set. The UTF-8 conversion yields a NUL byte. 147 int32 i; 148 for (i = 0; i < *dstLen; i++) 149 if (originalDst[i] == 0) 150 originalDst[i] = substitute; 151 } 152 return returnCode; 153 } 154 155 156 _EXPORT status_t mail_convert_from_utf8 ( 157 uint32 dstEncoding, 158 const char *src, 159 int32 *srcLen, 160 char *dst, 161 int32 *dstLen, 162 int32 *state, 163 char substitute) 164 { 165 int32 copyAmount; 166 status_t errorCode; 167 int32 originalDstLen = *dstLen; 168 int32 tempDstLen; 169 int32 tempSrcLen; 170 171 if (dstEncoding == B_MAIL_UTF8_CONVERSION) 172 { 173 copyAmount = *srcLen; 174 if (*dstLen < copyAmount) 175 copyAmount = *dstLen; 176 memcpy (dst, src, copyAmount); 177 *srcLen = copyAmount; 178 *dstLen = copyAmount; 179 return B_OK; 180 } 181 182 if (dstEncoding == B_MAIL_US_ASCII_CONVERSION) 183 { 184 int32 characterLength; 185 int32 dstRemaining = *dstLen; 186 unsigned char letter; 187 int32 srcRemaining = *srcLen; 188 189 // state contains the number of source bytes to skip, left over from a 190 // partial UTF-8 character split over the end of the buffer from last 191 // time. 192 if (srcRemaining <= *state) { 193 *state -= srcRemaining; 194 *dstLen = 0; 195 return B_OK; 196 } 197 srcRemaining -= *state; 198 src += *state; 199 *state = 0; 200 201 while (true) { 202 if (srcRemaining <= 0 || dstRemaining <= 0) 203 break; 204 letter = *src; 205 if (letter < 0x80) 206 characterLength = 1; // Regular ASCII equivalent code. 207 else if (letter < 0xC0) 208 characterLength = 1; // Invalid in-between data byte 10xxxxxx. 209 else if (letter < 0xE0) 210 characterLength = 2; 211 else if (letter < 0xF0) 212 characterLength = 3; 213 else if (letter < 0xF8) 214 characterLength = 4; 215 else if (letter < 0xFC) 216 characterLength = 5; 217 else if (letter < 0xFE) 218 characterLength = 6; 219 else 220 characterLength = 1; // 0xFE and 0xFF are invalid in UTF-8. 221 if (letter < 0x80) 222 *dst++ = *src; 223 else 224 *dst++ = substitute; 225 dstRemaining--; 226 if (srcRemaining < characterLength) { 227 // Character split past the end of the buffer. 228 *state = characterLength - srcRemaining; 229 srcRemaining = 0; 230 } else { 231 src += characterLength; 232 srcRemaining -= characterLength; 233 } 234 } 235 // Update with the amounts used. 236 *srcLen = *srcLen - srcRemaining; 237 *dstLen = *dstLen - dstRemaining; 238 return B_OK; 239 } 240 241 errorCode = convert_from_utf8 (dstEncoding, src, srcLen, dst, dstLen, state, substitute); 242 if (errorCode != B_OK) 243 return errorCode; 244 245 if (dstEncoding != B_JIS_CONVERSION) 246 return B_OK; 247 248 // B_JIS_CONVERSION (ISO-2022-JP) works by shifting between different 249 // character subsets. For E-mail headers (and other uses), it needs to be 250 // switched back to ASCII at the end (otherwise the last character gets 251 // lost or other weird things happen in the headers). Note that we can't 252 // just append the escape code since the convert_from_utf8 "state" will be 253 // wrong. So we append an ASCII letter and throw it away, leaving just the 254 // escape code. Well, it actually switches to the Roman character set, not 255 // ASCII, but that should be OK. 256 257 tempDstLen = originalDstLen - *dstLen; 258 if (tempDstLen < 3) // Not enough space remaining in the output. 259 return B_OK; // Sort of an error, but we did convert the rest OK. 260 tempSrcLen = 1; 261 errorCode = convert_from_utf8 (dstEncoding, "a", &tempSrcLen, 262 dst + *dstLen, &tempDstLen, state, substitute); 263 if (errorCode != B_OK) 264 return errorCode; 265 *dstLen += tempDstLen - 1 /* don't include the ASCII letter */; 266 return B_OK; 267 } 268 269 270 271 static int handle_non_rfc2047_encoding(char **buffer,size_t *bufferLength,size_t *sourceLength) 272 { 273 char *string = *buffer; 274 int32 length = *sourceLength; 275 int32 i; 276 277 // check for 8-bit characters 278 for (i = 0;i < length;i++) 279 if (string[i] & 0x80) 280 break; 281 if (i == length) 282 return false; 283 284 // check for groups of 8-bit characters - this code is not very smart; 285 // it just can detect some sort of single-byte encoded stuff, the rest 286 // is regarded as UTF-8 287 288 int32 singletons = 0,doubles = 0; 289 290 for (i = 0;i < length;i++) 291 { 292 if (string[i] & 0x80) 293 { 294 if ((string[i + 1] & 0x80) == 0) 295 singletons++; 296 else doubles++; 297 i++; 298 } 299 } 300 301 if (singletons != 0) // can't be valid UTF-8 anymore, so we assume ISO-Latin-1 302 { 303 int32 state = 0; 304 // just to be sure 305 int32 destLength = length * 4 + 1; 306 int32 destBufferLength = destLength; 307 char *dest = (char *)malloc(destLength); 308 if (dest == NULL) 309 return 0; 310 311 if (convert_to_utf8(B_ISO1_CONVERSION,string,&length,dest,&destLength,&state) == B_OK) 312 { 313 free(*buffer); 314 *buffer = dest; 315 *bufferLength = destBufferLength; 316 *sourceLength = destLength; 317 return true; 318 } 319 free(dest); 320 return false; 321 } 322 323 // we assume a valid UTF-8 string here, but yes, we don't check it 324 return true; 325 } 326 327 328 _EXPORT ssize_t rfc2047_to_utf8(char **bufp, size_t *bufLen, size_t strLen) 329 { 330 char *head, *tail; 331 char *charset, *encoding, *end; 332 ssize_t ret = B_OK; 333 334 if (bufp == NULL || *bufp == NULL) 335 return -1; 336 337 char *string = *bufp; 338 339 //---------Handle *&&^%*&^ non-RFC compliant, 8bit mail 340 if (handle_non_rfc2047_encoding(bufp,bufLen,&strLen)) 341 return strLen; 342 343 // set up string length 344 if (strLen == 0) 345 strLen = strlen(*bufp); 346 char lastChar = (*bufp)[strLen]; 347 (*bufp)[strLen] = '\0'; 348 349 //---------Whew! Now for RFC compliant mail 350 bool encodedWordFoundPreviously = false; 351 for (head = tail = string; 352 ((charset = strstr(tail, "=?")) != NULL) 353 && (((encoding = strchr(charset + 2, '?')) != NULL) 354 && encoding[1] && (encoding[2] == '?') && encoding[3]) 355 && (end = strstr(encoding + 3, "?=")) != NULL; 356 // found "=?...charset...?e?...text...?= (e == encoding) 357 // ^charset ^encoding ^end 358 tail = end) 359 { 360 // Copy non-encoded text (from tail up to charset) to the output. 361 // Ignore spaces between two encoded "words". RFC2047 says the words 362 // should be concatenated without the space (designed for Asian 363 // sentences which have no spaces yet need to be broken into "words" to 364 // keep within the line length limits). 365 bool nonSpaceFound = false; 366 for (int i = 0; i < charset-tail; i++) { 367 if (!isspace (tail[i])) { 368 nonSpaceFound = true; 369 break; 370 } 371 } 372 if (!encodedWordFoundPreviously || nonSpaceFound) { 373 if (string != tail && tail != charset) 374 memmove(string, tail, charset-tail); 375 string += charset-tail; 376 } 377 tail = charset; 378 encodedWordFoundPreviously = true; 379 380 // move things to point at what they should: 381 // =?...charset...?e?...text...?= (e == encoding) 382 // ^charset ^encoding ^end 383 charset += 2; 384 encoding += 1; 385 end += 2; 386 387 // find the charset this text is in now 388 size_t cLen = encoding - 1 - charset; 389 bool base64encoded = toupper(*encoding) == 'B'; 390 391 uint32 convert_id = B_MAIL_NULL_CONVERSION; 392 char charset_string[cLen+1]; 393 memcpy(charset_string, charset, cLen); 394 charset_string[cLen] = '\0'; 395 if (strcasecmp(charset_string, "us-ascii") == 0) { 396 convert_id = B_MAIL_US_ASCII_CONVERSION; 397 } else if (strcasecmp(charset_string, "utf-8") == 0) { 398 convert_id = B_MAIL_UTF8_CONVERSION; 399 } else { 400 const BCharacterSet * cs = BCharacterSetRoster::FindCharacterSetByName(charset_string); 401 if (cs != NULL) { 402 convert_id = cs->GetConversionID(); 403 } 404 } 405 if (convert_id == B_MAIL_NULL_CONVERSION) 406 { 407 // unidentified charset 408 // what to do? doing nothing skips the encoded text; 409 // but we should keep it: we copy it to the output. 410 if (string != tail && tail != end) 411 memmove(string, tail, end-tail); 412 string += end-tail; 413 continue; 414 } 415 // else we've successfully identified the charset 416 417 char *src = encoding+2; 418 int32 srcLen = end - 2 - src; 419 // encoded text: src..src+srcLen 420 421 // decode text, get decoded length (reducing xforms) 422 srcLen = !base64encoded ? decode_qp(src, src, srcLen, 1) 423 : decode_base64(src, src, srcLen); 424 425 // allocate space for the converted text 426 int32 dstLen = end-string + *bufLen-strLen; 427 char *dst = (char*)malloc(dstLen); 428 int32 cvLen = srcLen; 429 int32 convState = 0; 430 431 // 432 // do the conversion 433 // 434 ret = mail_convert_to_utf8(convert_id, src, &cvLen, dst, &dstLen, &convState); 435 if (ret != B_OK) 436 { 437 // what to do? doing nothing skips the encoded text 438 // but we should keep it: we copy it to the output. 439 440 free(dst); 441 442 if (string != tail && tail != end) 443 memmove(string, tail, end-tail); 444 string += end-tail; 445 continue; 446 } 447 /* convert_to_ is either returning something wrong or my 448 test data is screwed up. Whatever it is, Not Enough 449 Space is not the only cause of the below, so we just 450 assume it succeeds if it converts anything at all. 451 else if (cvLen < srcLen) 452 { 453 // not enough room to convert the data; 454 // grow *buf and retry 455 456 free(dst); 457 458 char *temp = (char*)realloc(*bufp, 2*(*bufLen + 1)); 459 if (temp == NULL) 460 { 461 ret = B_NO_MEMORY; 462 break; 463 } 464 465 *bufp = temp; 466 *bufLen = 2*(*bufLen + 1); 467 468 string = *bufp + (string-head); 469 tail = *bufp + (tail-head); 470 charset = *bufp + (charset-head); 471 encoding = *bufp + (encoding-head); 472 end = *bufp + (end-head); 473 src = *bufp + (src-head); 474 head = *bufp; 475 continue; 476 } 477 */ 478 else 479 { 480 if (dstLen > end-string) 481 { 482 // copy the string forward... 483 memmove(string+dstLen, end, strLen - (end-head) + 1); 484 strLen += string+dstLen - end; 485 end = string + dstLen; 486 } 487 488 memcpy(string, dst, dstLen); 489 string += dstLen; 490 free(dst); 491 continue; 492 } 493 } 494 495 // copy everything that's left 496 size_t tailLen = strLen - (tail - head); 497 memmove(string, tail, tailLen+1); 498 string += tailLen; 499 500 // replace the last char 501 (*bufp)[strLen] = lastChar; 502 503 return ret < B_OK ? ret : string-head; 504 } 505 506 507 _EXPORT ssize_t utf8_to_rfc2047 (char **bufp, ssize_t length, uint32 charset, char encoding) { 508 struct word { 509 BString originalWord; 510 BString convertedWord; 511 bool needsEncoding; 512 513 // Convert the word from UTF-8 to the desired character set. The 514 // converted version also includes the escape codes to return to ASCII 515 // mode, if relevant. Also note if it uses unprintable characters, 516 // which means it will need that special encoding treatment later. 517 void ConvertWordToCharset (uint32 charset) { 518 int32 state = 0; 519 int32 originalLength = originalWord.Length(); 520 int32 convertedLength = originalLength * 5 + 1; 521 char *convertedBuffer = convertedWord.LockBuffer (convertedLength); 522 mail_convert_from_utf8 (charset, originalWord.String(), 523 &originalLength, convertedBuffer, &convertedLength, &state); 524 for (int i = 0; i < convertedLength; i++) { 525 if ((convertedBuffer[i] & (1 << 7)) || 526 (convertedBuffer[i] >= 0 && convertedBuffer[i] < 32)) { 527 needsEncoding = true; 528 break; 529 } 530 } 531 convertedWord.UnlockBuffer (convertedLength); 532 }; 533 }; 534 struct word *currentWord; 535 BList words; 536 537 // Break the header into words. White space characters (including tabs and 538 // newlines) separate the words. Each word includes any space before it as 539 // part of the word. Actually, quotes and other special characters 540 // (",()<>@) are treated as separate words of their own so that they don't 541 // get encoded (because MIME headers get the quotes parsed before character 542 // set unconversion is done). The reader is supposed to ignore all white 543 // space between encoded words, which can be inserted so that older mail 544 // parsers don't have overly long line length problems. 545 546 const char *source = *bufp; 547 const char *bufEnd = *bufp + length; 548 const char *specialChars = "\"()<>@,"; 549 550 while (source < bufEnd) { 551 currentWord = new struct word; 552 currentWord->needsEncoding = false; 553 554 int wordEnd = 0; 555 556 // Include leading spaces as part of the word. 557 while (source + wordEnd < bufEnd && isspace (source[wordEnd])) 558 wordEnd++; 559 560 if (source + wordEnd < bufEnd && 561 strchr (specialChars, source[wordEnd]) != NULL) { 562 // Got a quote mark or other special character, which is treated as 563 // a word in itself since it shouldn't be encoded, which would hide 564 // it from the mail system. 565 wordEnd++; 566 } else { 567 // Find the end of the word. Leave wordEnd pointing just after the 568 // last character in the word. 569 while (source + wordEnd < bufEnd) { 570 if (isspace(source[wordEnd]) || 571 strchr (specialChars, source[wordEnd]) != NULL) 572 break; 573 if (wordEnd > 51 /* Makes Base64 ISO-2022-JP "word" a multiple of 4 bytes */ && 574 0xC0 == (0xC0 & (unsigned int) source[wordEnd])) { 575 // No English words are that long (46 is the longest), 576 // break up what is likely Asian text (which has no spaces) 577 // at the start of the next non-ASCII UTF-8 character (high 578 // two bits are both ones). Note that two encoded words in 579 // a row get joined together, even if there is a space 580 // between them in the final output text, according to the 581 // standard. Next word will also be conveniently get 582 // encoded due to the 0xC0 test. 583 currentWord->needsEncoding = true; 584 break; 585 } 586 wordEnd++; 587 } 588 } 589 currentWord->originalWord.SetTo (source, wordEnd); 590 currentWord->ConvertWordToCharset (charset); 591 words.AddItem(currentWord); 592 source += wordEnd; 593 } 594 595 // Combine adjacent words which contain unprintable text so that the 596 // overhead of switching back and forth between regular text and specially 597 // encoded text is reduced. However, the combined word must be shorter 598 // than the maximum of 75 bytes, including character set specification and 599 // all those delimiters (worst case 22 bytes of overhead). 600 601 struct word *run; 602 603 for (int32 i = 0; (currentWord = (struct word *) words.ItemAt (i)) != NULL; i++) { 604 if (!currentWord->needsEncoding) 605 continue; // No need to combine unencoded words. 606 for (int32 g = i+1; (run = (struct word *) words.ItemAt (g)) != NULL; g++) { 607 if (!run->needsEncoding) 608 break; // Don't want to combine encoded and unencoded words. 609 if ((currentWord->convertedWord.Length() + run->convertedWord.Length() <= 53)) { 610 currentWord->originalWord.Append (run->originalWord); 611 currentWord->ConvertWordToCharset (charset); 612 words.RemoveItem(g); 613 delete run; 614 g--; 615 } else // Can't merge this word, result would be too long. 616 break; 617 } 618 } 619 620 // Combine the encoded and unencoded words into one line, doing the 621 // quoted-printable or base64 encoding. Insert an extra space between 622 // words which are both encoded to make word wrapping easier, since there 623 // is normally none, and you're allowed to insert space (the receiver 624 // throws it away if it is between encoded words). 625 626 BString rfc2047; 627 bool previousWordNeededEncoding = false; 628 629 const char *charset_dec = "none-bug"; 630 for (int32 i = 0; mail_charsets[i].charset != NULL; i++) { 631 if (mail_charsets[i].flavor == charset) { 632 charset_dec = mail_charsets[i].charset; 633 break; 634 } 635 } 636 637 while ((currentWord = (struct word *)words.RemoveItem(0L)) != NULL) { 638 if ((encoding != quoted_printable && encoding != base64) || 639 !currentWord->needsEncoding) { 640 rfc2047.Append (currentWord->convertedWord); 641 } else { 642 // This word needs encoding. Try to insert a space between it and 643 // the previous word. 644 if (previousWordNeededEncoding) 645 rfc2047 << ' '; // Can insert as many spaces as you want between encoded words. 646 else { 647 // Previous word is not encoded, spaces are significant. Try 648 // to move a space from the start of this word to be outside of 649 // the encoded text, so that there is a bit of space between 650 // this word and the previous one to enhance word wrapping 651 // chances later on. 652 if (currentWord->originalWord.Length() > 1 && 653 isspace (currentWord->originalWord[0])) { 654 rfc2047 << currentWord->originalWord[0]; 655 currentWord->originalWord.Remove (0 /* offset */, 1 /* length */); 656 currentWord->ConvertWordToCharset (charset); 657 } 658 } 659 660 char *encoded = NULL; 661 ssize_t encoded_len = 0; 662 int32 convertedLength = currentWord->convertedWord.Length (); 663 const char *convertedBuffer = currentWord->convertedWord.String (); 664 665 switch (encoding) { 666 case quoted_printable: 667 encoded = (char *) malloc (convertedLength * 3); 668 encoded_len = encode_qp (encoded, convertedBuffer, convertedLength, true /* headerMode */); 669 break; 670 case base64: 671 encoded = (char *) malloc (convertedLength * 2); 672 encoded_len = encode_base64 (encoded, convertedBuffer, convertedLength, true /* headerMode */); 673 break; 674 default: // Unknown encoding type, shouldn't happen. 675 encoded = (char *) convertedBuffer; 676 encoded_len = convertedLength; 677 break; 678 } 679 680 rfc2047 << "=?" << charset_dec << '?' << encoding << '?'; 681 rfc2047.Append (encoded, encoded_len); 682 rfc2047 << "?="; 683 684 if (encoding == quoted_printable || encoding == base64) 685 free(encoded); 686 } 687 previousWordNeededEncoding = currentWord->needsEncoding; 688 delete currentWord; 689 } 690 691 free(*bufp); 692 693 ssize_t finalLength = rfc2047.Length (); 694 *bufp = (char *) (malloc (finalLength + 1)); 695 memcpy (*bufp, rfc2047.String(), finalLength); 696 (*bufp)[finalLength] = 0; 697 698 return finalLength; 699 } 700 701 702 //==================================================================== 703 704 void FoldLineAtWhiteSpaceAndAddCRLF (BString &string) 705 { 706 int inputLength = string.Length(); 707 int lineStartIndex; 708 const int maxLineLength = 78; // Doesn't include CRLF. 709 BString output; 710 int splitIndex; 711 int tempIndex; 712 713 lineStartIndex = 0; 714 while (true) { 715 // If we don't need to wrap the text, just output the remainder, if any. 716 717 if (lineStartIndex + maxLineLength >= inputLength) { 718 if (lineStartIndex < inputLength) { 719 output.Insert (string, lineStartIndex /* source offset */, 720 inputLength - lineStartIndex /* count */, 721 output.Length() /* insert at */); 722 output.Append (CRLF); 723 } 724 break; 725 } 726 727 // Look ahead for a convenient spot to split it, between a comma and 728 // space, which you often see between e-mail addresses like this: 729 // "Joe Who" joe@dot.com, "Someone Else" else@blot.com 730 731 tempIndex = lineStartIndex + maxLineLength; 732 if (tempIndex > inputLength) 733 tempIndex = inputLength; 734 splitIndex = string.FindLast (", ", tempIndex); 735 if (splitIndex >= lineStartIndex) 736 splitIndex++; // Point to the space character. 737 738 // If none of those exist, try splitting at any white space. 739 740 if (splitIndex <= lineStartIndex) 741 splitIndex = string.FindLast (" ", tempIndex); 742 if (splitIndex <= lineStartIndex) 743 splitIndex = string.FindLast ("\t", tempIndex); 744 745 // If none of those exist, allow for a longer word - split at the next 746 // available white space. 747 748 if (splitIndex <= lineStartIndex) 749 splitIndex = string.FindFirst (" ", lineStartIndex + 1); 750 if (splitIndex <= lineStartIndex) 751 splitIndex = string.FindFirst ("\t", lineStartIndex + 1); 752 753 // Give up, the whole rest of the line can't be split, just dump it 754 // out. 755 756 if (splitIndex <= lineStartIndex) { 757 if (lineStartIndex < inputLength) { 758 output.Insert (string, lineStartIndex /* source offset */, 759 inputLength - lineStartIndex /* count */, 760 output.Length() /* insert at */); 761 output.Append (CRLF); 762 } 763 break; 764 } 765 766 // Do the split. The current line up to but not including the space 767 // gets output, followed by a CRLF. The space remains to become the 768 // start of the next line (and that tells the message reader that it is 769 // a continuation line). 770 771 output.Insert (string, lineStartIndex /* source offset */, 772 splitIndex - lineStartIndex /* count */, 773 output.Length() /* insert at */); 774 output.Append (CRLF); 775 lineStartIndex = splitIndex; 776 } 777 string.SetTo (output); 778 } 779 780 781 //==================================================================== 782 783 _EXPORT ssize_t readfoldedline(FILE *file, char **buffer, size_t *buflen) 784 { 785 ssize_t len = buflen && *buflen ? *buflen : 0; 786 char * buf = buffer && *buffer ? *buffer : NULL; 787 ssize_t cnt = 0; // Number of characters currently in the buffer. 788 int c; 789 790 while (true) 791 { 792 // Make sure there is space in the buffer for two more characters (one 793 // for the next character, and one for the end of string NUL byte). 794 if (buf == NULL || cnt + 2 >= len) 795 { 796 char *temp = (char *)realloc(buf, len + 64); 797 if (temp == NULL) { 798 // Out of memory, however existing buffer remains allocated. 799 cnt = ENOMEM; 800 break; 801 } 802 len += 64; 803 buf = temp; 804 } 805 806 // Read the next character, or end of file, or IO error. 807 if ((c = fgetc(file)) == EOF) { 808 if (ferror (file)) { 809 cnt = errno; 810 if (cnt >= 0) 811 cnt = -1; // Error codes must be negative. 812 } else { 813 // Really is end of file. Also make it end of line if there is 814 // some text already read in. If the first thing read was EOF, 815 // just return an empty string. 816 if (cnt > 0) { 817 buf[cnt++] = '\n'; 818 if (buf[cnt-2] == '\r') { 819 buf[cnt-2] = '\n'; 820 --cnt; 821 } 822 } 823 } 824 break; 825 } 826 827 buf[cnt++] = c; 828 829 if (c == '\n') { 830 // Convert CRLF end of line to just a LF. Do it before folding, in 831 // case we don't need to fold. 832 if (cnt >= 2 && buf[cnt-2] == '\r') { 833 buf[cnt-2] = '\n'; 834 --cnt; 835 } 836 // If the current line is empty then return it (so that empty lines 837 // don't disappear if the next line starts with a space). 838 if (cnt <= 1) 839 break; 840 // Fold if first character on the next line is whitespace. 841 c = fgetc(file); // Note it's OK to read EOF and ungetc it too. 842 if (c == ' ' || c == '\t') 843 buf[cnt-1] = c; // Replace \n with the white space character. 844 else { 845 // Not folding, we finished reading a line; break out of the loop 846 ungetc(c,file); 847 break; 848 } 849 } 850 } 851 852 853 if (buf != NULL && cnt >= 0) 854 buf[cnt] = '\0'; 855 856 if (buffer) 857 *buffer = buf; 858 else if (buf) 859 free(buf); 860 861 if (buflen) 862 *buflen = len; 863 864 return cnt; 865 } 866 867 868 //==================================================================== 869 870 _EXPORT ssize_t readfoldedline(BPositionIO &in, char **buffer, size_t *buflen) 871 { 872 ssize_t len = buflen && *buflen ? *buflen : 0; 873 char * buf = buffer && *buffer ? *buffer : NULL; 874 ssize_t cnt = 0; // Number of characters currently in the buffer. 875 char c; 876 status_t errorCode; 877 878 while (true) 879 { 880 // Make sure there is space in the buffer for two more characters (one 881 // for the next character, and one for the end of string NUL byte). 882 if (buf == NULL || cnt + 2 >= len) 883 { 884 char *temp = (char *)realloc(buf, len + 64); 885 if (temp == NULL) { 886 // Out of memory, however existing buffer remains allocated. 887 cnt = ENOMEM; 888 break; 889 } 890 len += 64; 891 buf = temp; 892 } 893 894 errorCode = in.Read (&c,1); // A really slow way of reading - unbuffered. 895 if (errorCode != 1) { 896 if (errorCode < 0) { 897 cnt = errorCode; // IO error encountered, just return the code. 898 } else { 899 // Really is end of file. Also make it end of line if there is 900 // some text already read in. If the first thing read was EOF, 901 // just return an empty string. 902 if (cnt > 0) { 903 buf[cnt++] = '\n'; 904 if (buf[cnt-2] == '\r') { 905 buf[cnt-2] = '\n'; 906 --cnt; 907 } 908 } 909 } 910 break; 911 } 912 913 buf[cnt++] = c; 914 915 if (c == '\n') { 916 // Convert CRLF end of line to just a LF. Do it before folding, in 917 // case we don't need to fold. 918 if (cnt >= 2 && buf[cnt-2] == '\r') { 919 buf[cnt-2] = '\n'; 920 --cnt; 921 } 922 // If the current line is empty then return it (so that empty lines 923 // don't disappear if the next line starts with a space). 924 if (cnt <= 1) 925 break; 926 // if first character on the next line is whitespace, fold lines 927 errorCode = in.Read(&c,1); 928 if (errorCode == 1) { 929 if (c == ' ' || c == '\t') 930 buf[cnt-1] = c; // Replace \n with the white space character. 931 else { 932 // Not folding, we finished reading a whole line. 933 in.Seek(-1,SEEK_CUR); // Undo the look-ahead character read. 934 break; 935 } 936 } else if (errorCode < 0) { 937 cnt = errorCode; 938 break; 939 } else // No next line; at the end of the file. Return the line. 940 break; 941 } 942 } 943 944 if (buf != NULL && cnt >= 0) 945 buf[cnt] = '\0'; 946 947 if (buffer) 948 *buffer = buf; 949 else if (buf) 950 free(buf); 951 952 if (buflen) 953 *buflen = len; 954 955 return cnt; 956 } 957 958 959 _EXPORT ssize_t 960 nextfoldedline(const char** header, char **buffer, size_t *buflen) 961 { 962 ssize_t len = buflen && *buflen ? *buflen : 0; 963 char * buf = buffer && *buffer ? *buffer : NULL; 964 ssize_t cnt = 0; // Number of characters currently in the buffer. 965 char c; 966 967 while (true) 968 { 969 // Make sure there is space in the buffer for two more characters (one 970 // for the next character, and one for the end of string NUL byte). 971 if (buf == NULL || cnt + 2 >= len) 972 { 973 char *temp = (char *)realloc(buf, len + 64); 974 if (temp == NULL) { 975 // Out of memory, however existing buffer remains allocated. 976 cnt = ENOMEM; 977 break; 978 } 979 len += 64; 980 buf = temp; 981 } 982 983 // Read the next character, or end of file. 984 if ((c = *(*header)++) == 0) { 985 // End of file. Also make it end of line if there is some text 986 // already read in. If the first thing read was EOF, just return 987 // an empty string. 988 if (cnt > 0) { 989 buf[cnt++] = '\n'; 990 if (buf[cnt-2] == '\r') { 991 buf[cnt-2] = '\n'; 992 --cnt; 993 } 994 } 995 break; 996 } 997 998 buf[cnt++] = c; 999 1000 if (c == '\n') { 1001 // Convert CRLF end of line to just a LF. Do it before folding, in 1002 // case we don't need to fold. 1003 if (cnt >= 2 && buf[cnt-2] == '\r') { 1004 buf[cnt-2] = '\n'; 1005 --cnt; 1006 } 1007 // If the current line is empty then return it (so that empty lines 1008 // don't disappear if the next line starts with a space). 1009 if (cnt <= 1) 1010 break; 1011 // if first character on the next line is whitespace, fold lines 1012 c = *(*header)++; 1013 if (c == ' ' || c == '\t') 1014 buf[cnt-1] = c; // Replace \n with the white space character. 1015 else { 1016 // Not folding, we finished reading a line; break out of the loop 1017 (*header)--; // Undo read of the non-whitespace. 1018 break; 1019 } 1020 } 1021 } 1022 1023 1024 if (buf != NULL && cnt >= 0) 1025 buf[cnt] = '\0'; 1026 1027 if (buffer) 1028 *buffer = buf; 1029 else if (buf) 1030 free(buf); 1031 1032 if (buflen) 1033 *buflen = len; 1034 1035 return cnt; 1036 } 1037 1038 1039 _EXPORT void 1040 trim_white_space(BString &string) 1041 { 1042 int32 i; 1043 int32 length = string.Length(); 1044 char *buffer = string.LockBuffer(length + 1); 1045 1046 while (length > 0 && isspace(buffer[length - 1])) 1047 length--; 1048 buffer[length] = '\0'; 1049 1050 for (i = 0; buffer[i] && isspace(buffer[i]); i++) {} 1051 if (i != 0) { 1052 length -= i; 1053 memmove(buffer,buffer + i,length + 1); 1054 } 1055 string.UnlockBuffer(length); 1056 } 1057 1058 1059 /** Tries to return a human-readable name from the specified 1060 * header parameter (should be from "To:" or "From:"). 1061 * Tries to return the name rather than the eMail address. 1062 */ 1063 1064 _EXPORT void 1065 extract_address_name(BString &header) 1066 { 1067 BString name; 1068 const char *start = header.String(); 1069 const char *stop = start + strlen (start); 1070 1071 // Find a string S in the header (email foo) that matches: 1072 // Old style name in brackets: foo@bar.com (S) 1073 // New style quotes: "S" <foo@bar.com> 1074 // New style no quotes if nothing else found: S <foo@bar.com> 1075 // If nothing else found then use the whole thing: S 1076 1077 for (int i = 0; i <= 3; i++) { 1078 // Set p1 to the first letter in the name and p2 to just past the last 1079 // letter in the name. p2 stays NULL if a name wasn't found in this 1080 // pass. 1081 const char *p1 = NULL, *p2 = NULL; 1082 1083 switch (i) { 1084 case 0: // foo@bar.com (S) 1085 if ((p1 = strchr(start,'(')) != NULL) { 1086 p1++; // Advance to first letter in the name. 1087 size_t nest = 1; // Handle nested brackets. 1088 for (p2 = p1; p2 < stop; ++p2) 1089 { 1090 if (*p2 == ')') 1091 --nest; 1092 else if (*p2 == '(') 1093 ++nest; 1094 if (nest <= 0) 1095 break; 1096 } 1097 if (nest != 0) 1098 p2 = NULL; // False alarm, no terminating bracket. 1099 } 1100 break; 1101 case 1: // "S" <foo@bar.com> 1102 if ((p1 = strchr(start, '\"')) != NULL) 1103 p2 = strchr(++p1, '\"'); 1104 break; 1105 case 2: // S <foo@bar.com> 1106 p1 = start; 1107 if (name.Length() == 0) 1108 p2 = strchr(start, '<'); 1109 break; 1110 case 3: // S 1111 p1 = start; 1112 if (name.Length() == 0) 1113 p2 = stop; 1114 break; 1115 } 1116 1117 // Remove leading and trailing space-like characters and save the 1118 // result if it is longer than any other likely names found. 1119 if (p2 != NULL) { 1120 while (p1 < p2 && (isspace (*p1))) 1121 ++p1; 1122 1123 while (p1 < p2 && (isspace (p2[-1]))) 1124 --p2; 1125 1126 int newLength = p2 - p1; 1127 if (name.Length() < newLength) 1128 name.SetTo(p1, newLength); 1129 } 1130 } 1131 1132 int32 lessIndex = name.FindFirst('<'); 1133 int32 greaterIndex = name.FindLast('>'); 1134 1135 if (lessIndex == 0) { 1136 // Have an address of the form <address> and nothing else, so remove 1137 // the greater and less than signs, if any. 1138 if (greaterIndex > 0) 1139 name.Remove(greaterIndex, 1); 1140 name.Remove(lessIndex, 1); 1141 } else if (lessIndex > 0 && lessIndex < greaterIndex) { 1142 // Yahoo stupidly inserts the e-mail address into the name string, so 1143 // this bit of code fixes: "Joe <joe@yahoo.com>" <joe@yahoo.com> 1144 name.Remove(lessIndex, greaterIndex - lessIndex + 1); 1145 } 1146 1147 trim_white_space(name); 1148 header = name; 1149 } 1150 1151 1152 1153 // Given a subject in a BString, remove the extraneous RE: re: and other stuff 1154 // to get down to the core subject string, which should be identical for all 1155 // messages posted about a topic. The input string is modified in place to 1156 // become the output core subject string. 1157 1158 static int32 gLocker = 0; 1159 static size_t gNsub = 1; 1160 static re_pattern_buffer gRe; 1161 static re_pattern_buffer *gRebuf = NULL; 1162 static char gTranslation[256]; 1163 1164 _EXPORT void SubjectToThread (BString &string) 1165 { 1166 // a regex that matches a non-ASCII UTF8 character: 1167 #define U8C \ 1168 "[\302-\337][\200-\277]" \ 1169 "|\340[\302-\337][\200-\277]" \ 1170 "|[\341-\357][\200-\277][\200-\277]" \ 1171 "|\360[\220-\277][\200-\277][\200-\277]" \ 1172 "|[\361-\367][\200-\277][\200-\277][\200-\277]" \ 1173 "|\370[\210-\277][\200-\277][\200-\277][\200-\277]" \ 1174 "|[\371-\373][\200-\277][\200-\277][\200-\277][\200-\277]" \ 1175 "|\374[\204-\277][\200-\277][\200-\277][\200-\277][\200-\277]" \ 1176 "|\375[\200-\277][\200-\277][\200-\277][\200-\277][\200-\277]" 1177 1178 #define PATTERN \ 1179 "^ +" \ 1180 "|^(\\[[^]]*\\])(\\<| +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \ 1181 "|^( +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \ 1182 "| *\\(fwd\\) *$" 1183 1184 if (gRebuf == NULL && atomic_add(&gLocker,1) == 0) 1185 { 1186 // the idea is to compile the regexp once to speed up testing 1187 1188 for (int i=0; i<256; ++i) gTranslation[i]=i; 1189 for (int i='a'; i<='z'; ++i) gTranslation[i]=toupper(i); 1190 1191 gRe.translate = gTranslation; 1192 gRe.regs_allocated = REGS_FIXED; 1193 re_syntax_options = RE_SYNTAX_POSIX_EXTENDED; 1194 1195 const char *pattern = PATTERN; 1196 // count subexpressions in PATTERN 1197 for (unsigned int i=0; pattern[i] != 0; ++i) 1198 { 1199 if (pattern[i] == '\\') 1200 ++i; 1201 else if (pattern[i] == '(') 1202 ++gNsub; 1203 } 1204 1205 const char *err = re_compile_pattern(pattern,strlen(pattern),&gRe); 1206 if (err == NULL) 1207 gRebuf = &gRe; 1208 else 1209 fprintf(stderr, "Failed to compile the regex: %s\n", err); 1210 } 1211 else 1212 { 1213 int32 tries = 200; 1214 while (gRebuf == NULL && tries-- > 0) 1215 snooze(10000); 1216 } 1217 1218 if (gRebuf) 1219 { 1220 struct re_registers regs; 1221 // can't be static if this function is to be thread-safe 1222 1223 regs.num_regs = gNsub; 1224 regs.start = (regoff_t*)malloc(gNsub*sizeof(regoff_t)); 1225 regs.end = (regoff_t*)malloc(gNsub*sizeof(regoff_t)); 1226 1227 for (int start=0; 1228 (start=re_search(gRebuf, string.String(), string.Length(), 1229 0, string.Length(), ®s)) >= 0; 1230 ) 1231 { 1232 // 1233 // we found something 1234 // 1235 1236 // don't delete [bemaildaemon]... 1237 if (start == regs.start[1]) 1238 start = regs.start[2]; 1239 1240 string.Remove(start,regs.end[0]-start); 1241 if (start) string.Insert(' ',1,start); 1242 } 1243 1244 free(regs.start); 1245 free(regs.end); 1246 } 1247 1248 // Finally remove leading and trailing space. Some software, like 1249 // tm-edit 1.8, appends a space to the subject, which would break 1250 // threading if we left it in. 1251 trim_white_space(string); 1252 } 1253 1254 1255 1256 // Converts a date to a time. Handles numeric time zones too, unlike 1257 // parsedate. Returns -1 if it fails. 1258 1259 _EXPORT time_t ParseDateWithTimeZone (const char *DateString) 1260 { 1261 time_t currentTime; 1262 time_t dateAsTime; 1263 char tempDateString [80]; 1264 char tempZoneString [6]; 1265 time_t zoneDeltaTime; 1266 int zoneIndex; 1267 char *zonePntr; 1268 1269 // See if we can remove the time zone portion. parsedate understands time 1270 // zone 3 letter names, but doesn't understand the numeric +9999 time zone 1271 // format. To do: see if a newer parsedate exists. 1272 1273 strncpy (tempDateString, DateString, sizeof (tempDateString)); 1274 tempDateString[sizeof (tempDateString) - 1] = 0; 1275 1276 // Remove trailing spaces. 1277 zonePntr = tempDateString + strlen (tempDateString) - 1; 1278 while (zonePntr >= tempDateString && isspace (*zonePntr)) 1279 *zonePntr-- = 0; 1280 if (zonePntr < tempDateString) 1281 return -1; // Empty string. 1282 1283 // Remove the trailing time zone in round brackets, like in 1284 // Fri, 22 Feb 2002 15:22:42 EST (-0500) 1285 // Thu, 25 Apr 1996 11:44:19 -0400 (EDT) 1286 if (tempDateString[strlen(tempDateString)-1] == ')') 1287 { 1288 zonePntr = strrchr (tempDateString, '('); 1289 if (zonePntr != NULL) 1290 { 1291 *zonePntr-- = 0; // Zap the '(', then remove trailing spaces. 1292 while (zonePntr >= tempDateString && isspace (*zonePntr)) 1293 *zonePntr-- = 0; 1294 if (zonePntr < tempDateString) 1295 return -1; // Empty string. 1296 } 1297 } 1298 1299 // Look for a numeric time zone like Tue, 30 Dec 2003 05:01:40 +0000 1300 for (zoneIndex = strlen (tempDateString); zoneIndex >= 0; zoneIndex--) 1301 { 1302 zonePntr = tempDateString + zoneIndex; 1303 if (zonePntr[0] == '+' || zonePntr[0] == '-') 1304 { 1305 if (zonePntr[1] >= '0' && zonePntr[1] <= '9' && 1306 zonePntr[2] >= '0' && zonePntr[2] <= '9' && 1307 zonePntr[3] >= '0' && zonePntr[3] <= '9' && 1308 zonePntr[4] >= '0' && zonePntr[4] <= '9') 1309 break; 1310 } 1311 } 1312 if (zoneIndex >= 0) 1313 { 1314 // Remove the zone from the date string and any following time zone 1315 // letter codes. Also put in GMT so that the date gets parsed as GMT. 1316 memcpy (tempZoneString, zonePntr, 5); 1317 tempZoneString [5] = 0; 1318 strcpy (zonePntr, "GMT"); 1319 } 1320 else // No numeric time zone found. 1321 strcpy (tempZoneString, "+0000"); 1322 1323 time (¤tTime); 1324 dateAsTime = parsedate (tempDateString, currentTime); 1325 if (dateAsTime == (time_t) -1) 1326 return -1; // Failure. 1327 1328 zoneDeltaTime = 60 * atol (tempZoneString + 3); // Get the last two digits - minutes. 1329 tempZoneString[3] = 0; 1330 zoneDeltaTime += atol (tempZoneString + 1) * 60 * 60; // Get the first two digits - hours. 1331 if (tempZoneString[0] == '+') 1332 zoneDeltaTime = 0 - zoneDeltaTime; 1333 dateAsTime += zoneDeltaTime; 1334 1335 return dateAsTime; 1336 } 1337 1338 1339 /** Parses a mail header and fills the headers BMessage 1340 */ 1341 1342 _EXPORT status_t 1343 parse_header(BMessage &headers, BPositionIO &input) 1344 { 1345 char *buffer = NULL; 1346 size_t bufferSize = 0; 1347 int32 length; 1348 1349 while ((length = readfoldedline(input, &buffer, &bufferSize)) >= 2) { 1350 --length; 1351 // Don't include the \n at the end of the buffer. 1352 1353 // convert to UTF-8 and null-terminate the buffer 1354 length = rfc2047_to_utf8(&buffer, &bufferSize, length); 1355 buffer[length] = '\0'; 1356 1357 const char *delimiter = strstr(buffer, ":"); 1358 if (delimiter == NULL) 1359 continue; 1360 1361 BString header(buffer, delimiter - buffer); 1362 header.CapitalizeEachWord(); 1363 // unified case for later fetch 1364 1365 delimiter++; // Skip the colon. 1366 while (isspace (*delimiter)) 1367 delimiter++; // Skip over leading white space and tabs. To do: (comments in brackets). 1368 1369 // ToDo: implement joining of multiple header tags (i.e. multiple "Cc:"s) 1370 headers.AddString(header.String(), delimiter); 1371 } 1372 free(buffer); 1373 1374 return B_OK; 1375 } 1376 1377 1378 _EXPORT void 1379 extract_address(BString &address) 1380 { 1381 const char *string = address.String(); 1382 int32 first; 1383 1384 // first, remove all quoted text 1385 1386 if ((first = address.FindFirst('"')) >= 0) { 1387 int32 last = first + 1; 1388 while (string[last] && string[last] != '"') 1389 last++; 1390 1391 if (string[last] == '"') 1392 address.Remove(first, last + 1 - first); 1393 } 1394 1395 // try to extract the address now 1396 1397 if ((first = address.FindFirst('<')) >= 0) { 1398 // the world likes us and we can just get the address the easy way... 1399 int32 last = address.FindFirst('>'); 1400 if (last >= 0) { 1401 address.Truncate(last); 1402 address.Remove(0, first + 1); 1403 1404 return; 1405 } 1406 } 1407 1408 // then, see if there is anything in parenthesis to throw away 1409 1410 if ((first = address.FindFirst('(')) >= 0) { 1411 int32 last = first + 1; 1412 while (string[last] && string[last] != ')') 1413 last++; 1414 1415 if (string[last] == ')') 1416 address.Remove(first, last + 1 - first); 1417 } 1418 1419 // now, there shouldn't be much else left 1420 1421 trim_white_space(address); 1422 } 1423 1424 1425 _EXPORT void 1426 get_address_list(BList &list, const char *string, void (*cleanupFunc)(BString &)) 1427 { 1428 if (string == NULL || !string[0]) 1429 return; 1430 1431 const char *start = string; 1432 1433 while (true) { 1434 if (string[0] == '"') { 1435 const char *quoteEnd = ++string; 1436 1437 while (quoteEnd[0] && quoteEnd[0] != '"') 1438 quoteEnd++; 1439 1440 if (!quoteEnd[0]) // string exceeds line! 1441 quoteEnd = string; 1442 1443 string = quoteEnd + 1; 1444 } 1445 1446 if (string[0] == ',' || string[0] == '\0') { 1447 BString address(start, string - start); 1448 trim_white_space(address); 1449 1450 if (cleanupFunc) 1451 cleanupFunc(address); 1452 1453 list.AddItem(strdup(address.String())); 1454 1455 start = string + 1; 1456 } 1457 1458 if (!string[0]) 1459 break; 1460 1461 string++; 1462 } 1463 } 1464 1465