1 /* mail util - header parsing 2 ** 3 ** Copyright 2001-2003 Dr. Zoidberg Enterprises. All rights reserved. 4 */ 5 6 7 #include <UTF8.h> 8 #include <Message.h> 9 #include <String.h> 10 #include <Locker.h> 11 #include <DataIO.h> 12 #include <List.h> 13 14 #include <stdlib.h> 15 #include <string.h> 16 #include <stdio.h> 17 #include <regex.h> 18 #include <ctype.h> 19 #include <errno.h> 20 #include <parsedate.h> 21 22 #include <mail_encoding.h> 23 24 #include <mail_util.h> 25 26 #include <CharacterSet.h> 27 #include <CharacterSetRoster.h> 28 29 using namespace BPrivate; 30 31 #define CRLF "\r\n" 32 33 struct CharsetConversionEntry 34 { 35 const char *charset; 36 uint32 flavor; 37 }; 38 39 extern const CharsetConversionEntry mail_charsets [] = 40 { 41 // In order of authority, so when searching for the name for a particular 42 // numbered conversion, start at the beginning of the array. 43 {"iso-8859-1", B_ISO1_CONVERSION}, // MIME STANDARD 44 {"iso-8859-2", B_ISO2_CONVERSION}, // MIME STANDARD 45 {"iso-8859-3", B_ISO3_CONVERSION}, // MIME STANDARD 46 {"iso-8859-4", B_ISO4_CONVERSION}, // MIME STANDARD 47 {"iso-8859-5", B_ISO5_CONVERSION}, // MIME STANDARD 48 {"iso-8859-6", B_ISO6_CONVERSION}, // MIME STANDARD 49 {"iso-8859-7", B_ISO7_CONVERSION}, // MIME STANDARD 50 {"iso-8859-8", B_ISO8_CONVERSION}, // MIME STANDARD 51 {"iso-8859-9", B_ISO9_CONVERSION}, // MIME STANDARD 52 {"iso-8859-10", B_ISO10_CONVERSION}, // MIME STANDARD 53 {"iso-8859-13", B_ISO13_CONVERSION}, // MIME STANDARD 54 {"iso-8859-14", B_ISO14_CONVERSION}, // MIME STANDARD 55 {"iso-8859-15", B_ISO15_CONVERSION}, // MIME STANDARD 56 57 {"shift_jis", B_SJIS_CONVERSION}, // MIME STANDARD 58 {"shift-jis", B_SJIS_CONVERSION}, 59 {"iso-2022-jp", B_JIS_CONVERSION}, // MIME STANDARD 60 {"euc-jp", B_EUC_CONVERSION}, // MIME STANDARD 61 62 {"euc-kr", B_EUC_KR_CONVERSION}, // Shift encoding 7 bit and KSC-5601 if bit 8 is on. // MIME STANDARD 63 {"ksc5601", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE? 64 {"ks_c_5601-1987", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE with stupid MS software 65 66 {"koi8-r", B_KOI8R_CONVERSION}, // MIME STANDARD 67 {"windows-1251",B_MS_WINDOWS_1251_CONVERSION}, // MIME STANDARD 68 {"windows-1252",B_MS_WINDOWS_CONVERSION}, // MIME STANDARD 69 70 {"dos-437", B_MS_DOS_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM437? ) 71 {"dos-866", B_MS_DOS_866_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM866? ) 72 {"x-mac-roman", B_MAC_ROMAN_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( macintosh? + x-mac-roman? ) 73 74 {"big5", 24}, // MIME STANDARD 75 76 {"gb18030", 25}, // WRONG NAME : MIME STANDARD NAME = NONE ( GB18030? ) 77 {"gb2312", 25}, // COMPATIBLE 78 {"gbk", 25}, // COMPATIBLE 79 80 /* {"utf-16", B_UNICODE_CONVERSION}, Might not work due to NULs in text, needs testing. */ 81 {"us-ascii", B_MAIL_US_ASCII_CONVERSION}, // MIME STANDARD 82 {"utf-8", B_MAIL_UTF8_CONVERSION /* Special code for no conversion */}, // MIME STANDARD 83 84 {NULL, (uint32) -1} /* End of list marker, NULL string pointer is the key. */ 85 }; 86 87 88 // The next couple of functions are our wrapper around convert_to_utf8 and 89 // convert_from_utf8 so that they can also convert from UTF-8 to UTF-8 by 90 // specifying the B_MAIL_UTF8_CONVERSION constant as the conversion operation. It 91 // also lets us add new conversions, like B_MAIL_US_ASCII_CONVERSION. 92 93 _EXPORT status_t mail_convert_to_utf8 ( 94 uint32 srcEncoding, 95 const char *src, 96 int32 *srcLen, 97 char *dst, 98 int32 *dstLen, 99 int32 *state, 100 char substitute) 101 { 102 int32 copyAmount; 103 char *originalDst = dst; 104 status_t returnCode = -1; 105 106 if (srcEncoding == B_MAIL_UTF8_CONVERSION) { 107 copyAmount = *srcLen; 108 if (*dstLen < copyAmount) 109 copyAmount = *dstLen; 110 memcpy (dst, src, copyAmount); 111 *srcLen = copyAmount; 112 *dstLen = copyAmount; 113 returnCode = B_OK; 114 } else if (srcEncoding == B_MAIL_US_ASCII_CONVERSION) { 115 int32 i; 116 unsigned char letter; 117 copyAmount = *srcLen; 118 if (*dstLen < copyAmount) 119 copyAmount = *dstLen; 120 for (i = 0; i < copyAmount; i++) { 121 letter = *src++; 122 if (letter > 0x80U) 123 // Invalid, could also use substitute, but better to strip high bit. 124 *dst++ = letter - 0x80U; 125 else if (letter == 0x80U) 126 // Can't convert to 0x00 since that's NUL, which would cause problems. 127 *dst++ = substitute; 128 else 129 *dst++ = letter; 130 } 131 *srcLen = copyAmount; 132 *dstLen = copyAmount; 133 returnCode = B_OK; 134 } else 135 returnCode = convert_to_utf8 (srcEncoding, src, srcLen, 136 dst, dstLen, state, substitute); 137 138 if (returnCode == B_OK) { 139 // Replace spurious NUL bytes, which should normally not be in the 140 // output of the decoding (not normal UTF-8 characters, and no NULs are 141 // in our usual input strings). They happen for some odd ISO-2022-JP 142 // byte pair combinations which are improperly handled by the BeOS 143 // routines. Like "\e$ByD\e(B" where \e is the ESC character $1B, the 144 // first ESC $ B switches to a Japanese character set, then the next 145 // two bytes "yD" specify a character, then ESC ( B switches back to 146 // the ASCII character set. The UTF-8 conversion yields a NUL byte. 147 int32 i; 148 for (i = 0; i < *dstLen; i++) 149 if (originalDst[i] == 0) 150 originalDst[i] = substitute; 151 } 152 return returnCode; 153 } 154 155 156 _EXPORT status_t mail_convert_from_utf8 ( 157 uint32 dstEncoding, 158 const char *src, 159 int32 *srcLen, 160 char *dst, 161 int32 *dstLen, 162 int32 *state, 163 char substitute) 164 { 165 int32 copyAmount; 166 status_t errorCode; 167 int32 originalDstLen = *dstLen; 168 int32 tempDstLen; 169 int32 tempSrcLen; 170 171 if (dstEncoding == B_MAIL_UTF8_CONVERSION) 172 { 173 copyAmount = *srcLen; 174 if (*dstLen < copyAmount) 175 copyAmount = *dstLen; 176 memcpy (dst, src, copyAmount); 177 *srcLen = copyAmount; 178 *dstLen = copyAmount; 179 return B_OK; 180 } 181 182 if (dstEncoding == B_MAIL_US_ASCII_CONVERSION) 183 { 184 int32 characterLength; 185 int32 dstRemaining = *dstLen; 186 unsigned char letter; 187 int32 srcRemaining = *srcLen; 188 189 // state contains the number of source bytes to skip, left over from a 190 // partial UTF-8 character split over the end of the buffer from last 191 // time. 192 if (srcRemaining <= *state) { 193 *state -= srcRemaining; 194 *dstLen = 0; 195 return B_OK; 196 } 197 srcRemaining -= *state; 198 src += *state; 199 *state = 0; 200 201 while (true) { 202 if (srcRemaining <= 0 || dstRemaining <= 0) 203 break; 204 letter = *src; 205 if (letter < 0x80) 206 characterLength = 1; // Regular ASCII equivalent code. 207 else if (letter < 0xC0) 208 characterLength = 1; // Invalid in-between data byte 10xxxxxx. 209 else if (letter < 0xE0) 210 characterLength = 2; 211 else if (letter < 0xF0) 212 characterLength = 3; 213 else if (letter < 0xF8) 214 characterLength = 4; 215 else if (letter < 0xFC) 216 characterLength = 5; 217 else if (letter < 0xFE) 218 characterLength = 6; 219 else 220 characterLength = 1; // 0xFE and 0xFF are invalid in UTF-8. 221 if (letter < 0x80) 222 *dst++ = *src; 223 else 224 *dst++ = substitute; 225 dstRemaining--; 226 if (srcRemaining < characterLength) { 227 // Character split past the end of the buffer. 228 *state = characterLength - srcRemaining; 229 srcRemaining = 0; 230 } else { 231 src += characterLength; 232 srcRemaining -= characterLength; 233 } 234 } 235 // Update with the amounts used. 236 *srcLen = *srcLen - srcRemaining; 237 *dstLen = *dstLen - dstRemaining; 238 return B_OK; 239 } 240 241 errorCode = convert_from_utf8 (dstEncoding, src, srcLen, dst, dstLen, state, substitute); 242 if (errorCode != B_OK) 243 return errorCode; 244 245 if (dstEncoding != B_JIS_CONVERSION) 246 return B_OK; 247 248 // B_JIS_CONVERSION (ISO-2022-JP) works by shifting between different 249 // character subsets. For E-mail headers (and other uses), it needs to be 250 // switched back to ASCII at the end (otherwise the last character gets 251 // lost or other weird things happen in the headers). Note that we can't 252 // just append the escape code since the convert_from_utf8 "state" will be 253 // wrong. So we append an ASCII letter and throw it away, leaving just the 254 // escape code. Well, it actually switches to the Roman character set, not 255 // ASCII, but that should be OK. 256 257 tempDstLen = originalDstLen - *dstLen; 258 if (tempDstLen < 3) // Not enough space remaining in the output. 259 return B_OK; // Sort of an error, but we did convert the rest OK. 260 tempSrcLen = 1; 261 errorCode = convert_from_utf8 (dstEncoding, "a", &tempSrcLen, 262 dst + *dstLen, &tempDstLen, state, substitute); 263 if (errorCode != B_OK) 264 return errorCode; 265 *dstLen += tempDstLen - 1 /* don't include the ASCII letter */; 266 return B_OK; 267 } 268 269 270 271 static int handle_non_rfc2047_encoding(char **buffer,size_t *bufferLength,size_t *sourceLength) 272 { 273 char *string = *buffer; 274 int32 length = *sourceLength; 275 int32 i; 276 277 // check for 8-bit characters 278 for (i = 0;i < length;i++) 279 if (string[i] & 0x80) 280 break; 281 if (i == length) 282 return false; 283 284 // check for groups of 8-bit characters - this code is not very smart; 285 // it just can detect some sort of single-byte encoded stuff, the rest 286 // is regarded as UTF-8 287 288 int32 singletons = 0,doubles = 0; 289 290 for (i = 0;i < length;i++) 291 { 292 if (string[i] & 0x80) 293 { 294 if ((string[i + 1] & 0x80) == 0) 295 singletons++; 296 else doubles++; 297 i++; 298 } 299 } 300 301 if (singletons != 0) // can't be valid UTF-8 anymore, so we assume ISO-Latin-1 302 { 303 int32 state = 0; 304 // just to be sure 305 int32 destLength = length * 4 + 1; 306 int32 destBufferLength = destLength; 307 char *dest = (char *)malloc(destLength); 308 if (dest == NULL) 309 return 0; 310 311 if (convert_to_utf8(B_ISO1_CONVERSION,string,&length,dest,&destLength,&state) == B_OK) 312 { 313 free(*buffer); 314 *buffer = dest; 315 *bufferLength = destBufferLength; 316 *sourceLength = destLength; 317 return true; 318 } 319 free(dest); 320 return false; 321 } 322 323 // we assume a valid UTF-8 string here, but yes, we don't check it 324 return true; 325 } 326 327 328 _EXPORT ssize_t rfc2047_to_utf8(char **bufp, size_t *bufLen, size_t strLen) 329 { 330 char *string = *bufp; 331 char *head, *tail; 332 char *charset, *encoding, *end; 333 ssize_t ret = B_OK; 334 335 if (bufp == NULL || *bufp == NULL) 336 return -1; 337 338 //---------Handle *&&^%*&^ non-RFC compliant, 8bit mail 339 if (handle_non_rfc2047_encoding(bufp,bufLen,&strLen)) 340 return strLen; 341 342 // set up string length 343 if (strLen == 0) 344 strLen = strlen(*bufp); 345 char lastChar = (*bufp)[strLen]; 346 (*bufp)[strLen] = '\0'; 347 348 //---------Whew! Now for RFC compliant mail 349 bool encodedWordFoundPreviously = false; 350 for (head = tail = string; 351 ((charset = strstr(tail, "=?")) != NULL) 352 && (((encoding = strchr(charset + 2, '?')) != NULL) 353 && encoding[1] && (encoding[2] == '?') && encoding[3]) 354 && (end = strstr(encoding + 3, "?=")) != NULL; 355 // found "=?...charset...?e?...text...?= (e == encoding) 356 // ^charset ^encoding ^end 357 tail = end) 358 { 359 // Copy non-encoded text (from tail up to charset) to the output. 360 // Ignore spaces between two encoded "words". RFC2047 says the words 361 // should be concatenated without the space (designed for Asian 362 // sentences which have no spaces yet need to be broken into "words" to 363 // keep within the line length limits). 364 bool nonSpaceFound = false; 365 for (int i = 0; i < charset-tail; i++) { 366 if (!isspace (tail[i])) { 367 nonSpaceFound = true; 368 break; 369 } 370 } 371 if (!encodedWordFoundPreviously || nonSpaceFound) { 372 if (string != tail && tail != charset) 373 memmove(string, tail, charset-tail); 374 string += charset-tail; 375 } 376 tail = charset; 377 encodedWordFoundPreviously = true; 378 379 // move things to point at what they should: 380 // =?...charset...?e?...text...?= (e == encoding) 381 // ^charset ^encoding ^end 382 charset += 2; 383 encoding += 1; 384 end += 2; 385 386 // find the charset this text is in now 387 size_t cLen = encoding - 1 - charset; 388 bool base64encoded = toupper(*encoding) == 'B'; 389 390 uint32 convert_id = B_MAIL_NULL_CONVERSION; 391 char charset_string[cLen+1]; 392 memcpy(charset_string, charset, cLen); 393 charset_string[cLen] = '\0'; 394 if (strcasecmp(charset_string, "us-ascii") == 0) { 395 convert_id = B_MAIL_US_ASCII_CONVERSION; 396 } else if (strcasecmp(charset_string, "utf-8") == 0) { 397 convert_id = B_MAIL_UTF8_CONVERSION; 398 } else { 399 const BCharacterSet * cs = BCharacterSetRoster::FindCharacterSetByName(charset_string); 400 if (cs != NULL) { 401 convert_id = cs->GetConversionID(); 402 } 403 } 404 if (convert_id == B_MAIL_NULL_CONVERSION) 405 { 406 // unidentified charset 407 // what to do? doing nothing skips the encoded text; 408 // but we should keep it: we copy it to the output. 409 if (string != tail && tail != end) 410 memmove(string, tail, end-tail); 411 string += end-tail; 412 continue; 413 } 414 // else we've successfully identified the charset 415 416 char *src = encoding+2; 417 int32 srcLen = end - 2 - src; 418 // encoded text: src..src+srcLen 419 420 // decode text, get decoded length (reducing xforms) 421 srcLen = !base64encoded ? decode_qp(src, src, srcLen, 1) 422 : decode_base64(src, src, srcLen); 423 424 // allocate space for the converted text 425 int32 dstLen = end-string + *bufLen-strLen; 426 char *dst = (char*)malloc(dstLen); 427 int32 cvLen = srcLen; 428 int32 convState = 0; 429 430 // 431 // do the conversion 432 // 433 ret = mail_convert_to_utf8(convert_id, src, &cvLen, dst, &dstLen, &convState); 434 if (ret != B_OK) 435 { 436 // what to do? doing nothing skips the encoded text 437 // but we should keep it: we copy it to the output. 438 439 free(dst); 440 441 if (string != tail && tail != end) 442 memmove(string, tail, end-tail); 443 string += end-tail; 444 continue; 445 } 446 /* convert_to_ is either returning something wrong or my 447 test data is screwed up. Whatever it is, Not Enough 448 Space is not the only cause of the below, so we just 449 assume it succeeds if it converts anything at all. 450 else if (cvLen < srcLen) 451 { 452 // not enough room to convert the data; 453 // grow *buf and retry 454 455 free(dst); 456 457 char *temp = (char*)realloc(*bufp, 2*(*bufLen + 1)); 458 if (temp == NULL) 459 { 460 ret = B_NO_MEMORY; 461 break; 462 } 463 464 *bufp = temp; 465 *bufLen = 2*(*bufLen + 1); 466 467 string = *bufp + (string-head); 468 tail = *bufp + (tail-head); 469 charset = *bufp + (charset-head); 470 encoding = *bufp + (encoding-head); 471 end = *bufp + (end-head); 472 src = *bufp + (src-head); 473 head = *bufp; 474 continue; 475 } 476 */ 477 else 478 { 479 if (dstLen > end-string) 480 { 481 // copy the string forward... 482 memmove(string+dstLen, end, strLen - (end-head) + 1); 483 strLen += string+dstLen - end; 484 end = string + dstLen; 485 } 486 487 memcpy(string, dst, dstLen); 488 string += dstLen; 489 free(dst); 490 continue; 491 } 492 } 493 494 // copy everything that's left 495 size_t tailLen = strLen - (tail - head); 496 memmove(string, tail, tailLen+1); 497 string += tailLen; 498 499 // replace the last char 500 (*bufp)[strLen] = lastChar; 501 502 return ret < B_OK ? ret : string-head; 503 } 504 505 506 _EXPORT ssize_t utf8_to_rfc2047 (char **bufp, ssize_t length, uint32 charset, char encoding) { 507 struct word { 508 BString originalWord; 509 BString convertedWord; 510 bool needsEncoding; 511 512 // Convert the word from UTF-8 to the desired character set. The 513 // converted version also includes the escape codes to return to ASCII 514 // mode, if relevant. Also note if it uses unprintable characters, 515 // which means it will need that special encoding treatment later. 516 void ConvertWordToCharset (uint32 charset) { 517 int32 state = 0; 518 int32 originalLength = originalWord.Length(); 519 int32 convertedLength = originalLength * 5 + 1; 520 char *convertedBuffer = convertedWord.LockBuffer (convertedLength); 521 mail_convert_from_utf8 (charset, originalWord.String(), 522 &originalLength, convertedBuffer, &convertedLength, &state); 523 for (int i = 0; i < convertedLength; i++) { 524 if ((convertedBuffer[i] & (1 << 7)) || 525 (convertedBuffer[i] >= 0 && convertedBuffer[i] < 32)) { 526 needsEncoding = true; 527 break; 528 } 529 } 530 convertedWord.UnlockBuffer (convertedLength); 531 }; 532 }; 533 struct word *currentWord; 534 BList words; 535 536 // Break the header into words. White space characters (including tabs and 537 // newlines) separate the words. Each word includes any space before it as 538 // part of the word. Actually, quotes and other special characters 539 // (",()<>@) are treated as separate words of their own so that they don't 540 // get encoded (because MIME headers get the quotes parsed before character 541 // set unconversion is done). The reader is supposed to ignore all white 542 // space between encoded words, which can be inserted so that older mail 543 // parsers don't have overly long line length problems. 544 545 const char *source = *bufp; 546 const char *bufEnd = *bufp + length; 547 const char *specialChars = "\"()<>@,"; 548 549 while (source < bufEnd) { 550 currentWord = new struct word; 551 currentWord->needsEncoding = false; 552 553 int wordEnd = 0; 554 555 // Include leading spaces as part of the word. 556 while (source + wordEnd < bufEnd && isspace (source[wordEnd])) 557 wordEnd++; 558 559 if (source + wordEnd < bufEnd && 560 strchr (specialChars, source[wordEnd]) != NULL) { 561 // Got a quote mark or other special character, which is treated as 562 // a word in itself since it shouldn't be encoded, which would hide 563 // it from the mail system. 564 wordEnd++; 565 } else { 566 // Find the end of the word. Leave wordEnd pointing just after the 567 // last character in the word. 568 while (source + wordEnd < bufEnd) { 569 if (isspace(source[wordEnd]) || 570 strchr (specialChars, source[wordEnd]) != NULL) 571 break; 572 if (wordEnd > 51 /* Makes Base64 ISO-2022-JP "word" a multiple of 4 bytes */ && 573 0xC0 == (0xC0 & (unsigned int) source[wordEnd])) { 574 // No English words are that long (46 is the longest), 575 // break up what is likely Asian text (which has no spaces) 576 // at the start of the next non-ASCII UTF-8 character (high 577 // two bits are both ones). Note that two encoded words in 578 // a row get joined together, even if there is a space 579 // between them in the final output text, according to the 580 // standard. Next word will also be conveniently get 581 // encoded due to the 0xC0 test. 582 currentWord->needsEncoding = true; 583 break; 584 } 585 wordEnd++; 586 } 587 } 588 currentWord->originalWord.SetTo (source, wordEnd); 589 currentWord->ConvertWordToCharset (charset); 590 words.AddItem(currentWord); 591 source += wordEnd; 592 } 593 594 // Combine adjacent words which contain unprintable text so that the 595 // overhead of switching back and forth between regular text and specially 596 // encoded text is reduced. However, the combined word must be shorter 597 // than the maximum of 75 bytes, including character set specification and 598 // all those delimiters (worst case 22 bytes of overhead). 599 600 struct word *run; 601 602 for (int32 i = 0; (currentWord = (struct word *) words.ItemAt (i)) != NULL; i++) { 603 if (!currentWord->needsEncoding) 604 continue; // No need to combine unencoded words. 605 for (int32 g = i+1; (run = (struct word *) words.ItemAt (g)) != NULL; g++) { 606 if (!run->needsEncoding) 607 break; // Don't want to combine encoded and unencoded words. 608 if ((currentWord->convertedWord.Length() + run->convertedWord.Length() <= 53)) { 609 currentWord->originalWord.Append (run->originalWord); 610 currentWord->ConvertWordToCharset (charset); 611 words.RemoveItem(g); 612 delete run; 613 g--; 614 } else // Can't merge this word, result would be too long. 615 break; 616 } 617 } 618 619 // Combine the encoded and unencoded words into one line, doing the 620 // quoted-printable or base64 encoding. Insert an extra space between 621 // words which are both encoded to make word wrapping easier, since there 622 // is normally none, and you're allowed to insert space (the receiver 623 // throws it away if it is between encoded words). 624 625 BString rfc2047; 626 bool previousWordNeededEncoding = false; 627 628 const char *charset_dec = "none-bug"; 629 for (int32 i = 0; mail_charsets[i].charset != NULL; i++) { 630 if (mail_charsets[i].flavor == charset) { 631 charset_dec = mail_charsets[i].charset; 632 break; 633 } 634 } 635 636 while ((currentWord = (struct word *)words.RemoveItem(0L)) != NULL) { 637 if ((encoding != quoted_printable && encoding != base64) || 638 !currentWord->needsEncoding) { 639 rfc2047.Append (currentWord->convertedWord); 640 } else { 641 // This word needs encoding. Try to insert a space between it and 642 // the previous word. 643 if (previousWordNeededEncoding) 644 rfc2047 << ' '; // Can insert as many spaces as you want between encoded words. 645 else { 646 // Previous word is not encoded, spaces are significant. Try 647 // to move a space from the start of this word to be outside of 648 // the encoded text, so that there is a bit of space between 649 // this word and the previous one to enhance word wrapping 650 // chances later on. 651 if (currentWord->originalWord.Length() > 1 && 652 isspace (currentWord->originalWord[0])) { 653 rfc2047 << currentWord->originalWord[0]; 654 currentWord->originalWord.Remove (0 /* offset */, 1 /* length */); 655 currentWord->ConvertWordToCharset (charset); 656 } 657 } 658 659 char *encoded = NULL; 660 ssize_t encoded_len = 0; 661 int32 convertedLength = currentWord->convertedWord.Length (); 662 const char *convertedBuffer = currentWord->convertedWord.String (); 663 664 switch (encoding) { 665 case quoted_printable: 666 encoded = (char *) malloc (convertedLength * 3); 667 encoded_len = encode_qp (encoded, convertedBuffer, convertedLength, true /* headerMode */); 668 break; 669 case base64: 670 encoded = (char *) malloc (convertedLength * 2); 671 encoded_len = encode_base64 (encoded, convertedBuffer, convertedLength, true /* headerMode */); 672 break; 673 default: // Unknown encoding type, shouldn't happen. 674 encoded = (char *) convertedBuffer; 675 encoded_len = convertedLength; 676 break; 677 } 678 679 rfc2047 << "=?" << charset_dec << '?' << encoding << '?'; 680 rfc2047.Append (encoded, encoded_len); 681 rfc2047 << "?="; 682 683 if (encoding == quoted_printable || encoding == base64) 684 free(encoded); 685 } 686 previousWordNeededEncoding = currentWord->needsEncoding; 687 delete currentWord; 688 } 689 690 free(*bufp); 691 692 ssize_t finalLength = rfc2047.Length (); 693 *bufp = (char *) (malloc (finalLength + 1)); 694 memcpy (*bufp, rfc2047.String(), finalLength); 695 (*bufp)[finalLength] = 0; 696 697 return finalLength; 698 } 699 700 701 //==================================================================== 702 703 void FoldLineAtWhiteSpaceAndAddCRLF (BString &string) 704 { 705 int inputLength = string.Length(); 706 int lineStartIndex; 707 const int maxLineLength = 78; // Doesn't include CRLF. 708 BString output; 709 int splitIndex; 710 int tempIndex; 711 712 lineStartIndex = 0; 713 while (true) { 714 // If we don't need to wrap the text, just output the remainder, if any. 715 716 if (lineStartIndex + maxLineLength >= inputLength) { 717 if (lineStartIndex < inputLength) { 718 output.Insert (string, lineStartIndex /* source offset */, 719 inputLength - lineStartIndex /* count */, 720 output.Length() /* insert at */); 721 output.Append (CRLF); 722 } 723 break; 724 } 725 726 // Look ahead for a convenient spot to split it, between a comma and 727 // space, which you often see between e-mail addresses like this: 728 // "Joe Who" joe@dot.com, "Someone Else" else@blot.com 729 730 tempIndex = lineStartIndex + maxLineLength; 731 if (tempIndex > inputLength) 732 tempIndex = inputLength; 733 splitIndex = string.FindLast (", ", tempIndex); 734 if (splitIndex >= lineStartIndex) 735 splitIndex++; // Point to the space character. 736 737 // If none of those exist, try splitting at any white space. 738 739 if (splitIndex <= lineStartIndex) 740 splitIndex = string.FindLast (" ", tempIndex); 741 if (splitIndex <= lineStartIndex) 742 splitIndex = string.FindLast ("\t", tempIndex); 743 744 // If none of those exist, allow for a longer word - split at the next 745 // available white space. 746 747 if (splitIndex <= lineStartIndex) 748 splitIndex = string.FindFirst (" ", lineStartIndex + 1); 749 if (splitIndex <= lineStartIndex) 750 splitIndex = string.FindFirst ("\t", lineStartIndex + 1); 751 752 // Give up, the whole rest of the line can't be split, just dump it 753 // out. 754 755 if (splitIndex <= lineStartIndex) { 756 if (lineStartIndex < inputLength) { 757 output.Insert (string, lineStartIndex /* source offset */, 758 inputLength - lineStartIndex /* count */, 759 output.Length() /* insert at */); 760 output.Append (CRLF); 761 } 762 break; 763 } 764 765 // Do the split. The current line up to but not including the space 766 // gets output, followed by a CRLF. The space remains to become the 767 // start of the next line (and that tells the message reader that it is 768 // a continuation line). 769 770 output.Insert (string, lineStartIndex /* source offset */, 771 splitIndex - lineStartIndex /* count */, 772 output.Length() /* insert at */); 773 output.Append (CRLF); 774 lineStartIndex = splitIndex; 775 } 776 string.SetTo (output); 777 } 778 779 780 //==================================================================== 781 782 _EXPORT ssize_t readfoldedline(FILE *file, char **buffer, size_t *buflen) 783 { 784 ssize_t len = buflen && *buflen ? *buflen : 0; 785 char * buf = buffer && *buffer ? *buffer : NULL; 786 ssize_t cnt = 0; // Number of characters currently in the buffer. 787 int c; 788 789 while (true) 790 { 791 // Make sure there is space in the buffer for two more characters (one 792 // for the next character, and one for the end of string NUL byte). 793 if (buf == NULL || cnt + 2 >= len) 794 { 795 char *temp = (char *)realloc(buf, len + 64); 796 if (temp == NULL) { 797 // Out of memory, however existing buffer remains allocated. 798 cnt = ENOMEM; 799 break; 800 } 801 len += 64; 802 buf = temp; 803 } 804 805 // Read the next character, or end of file, or IO error. 806 if ((c = fgetc(file)) == EOF) { 807 if (ferror (file)) { 808 cnt = errno; 809 if (cnt >= 0) 810 cnt = -1; // Error codes must be negative. 811 } else { 812 // Really is end of file. Also make it end of line if there is 813 // some text already read in. If the first thing read was EOF, 814 // just return an empty string. 815 if (cnt > 0) { 816 buf[cnt++] = '\n'; 817 if (buf[cnt-2] == '\r') { 818 buf[cnt-2] = '\n'; 819 --cnt; 820 } 821 } 822 } 823 break; 824 } 825 826 buf[cnt++] = c; 827 828 if (c == '\n') { 829 // Convert CRLF end of line to just a LF. Do it before folding, in 830 // case we don't need to fold. 831 if (cnt >= 2 && buf[cnt-2] == '\r') { 832 buf[cnt-2] = '\n'; 833 --cnt; 834 } 835 // If the current line is empty then return it (so that empty lines 836 // don't disappear if the next line starts with a space). 837 if (cnt <= 1) 838 break; 839 // Fold if first character on the next line is whitespace. 840 c = fgetc(file); // Note it's OK to read EOF and ungetc it too. 841 if (c == ' ' || c == '\t') 842 buf[cnt-1] = c; // Replace \n with the white space character. 843 else { 844 // Not folding, we finished reading a line; break out of the loop 845 ungetc(c,file); 846 break; 847 } 848 } 849 } 850 851 852 if (buf != NULL && cnt >= 0) 853 buf[cnt] = '\0'; 854 855 if (buffer) 856 *buffer = buf; 857 else if (buf) 858 free(buf); 859 860 if (buflen) 861 *buflen = len; 862 863 return cnt; 864 } 865 866 867 //==================================================================== 868 869 _EXPORT ssize_t readfoldedline(BPositionIO &in, char **buffer, size_t *buflen) 870 { 871 ssize_t len = buflen && *buflen ? *buflen : 0; 872 char * buf = buffer && *buffer ? *buffer : NULL; 873 ssize_t cnt = 0; // Number of characters currently in the buffer. 874 char c; 875 status_t errorCode; 876 877 while (true) 878 { 879 // Make sure there is space in the buffer for two more characters (one 880 // for the next character, and one for the end of string NUL byte). 881 if (buf == NULL || cnt + 2 >= len) 882 { 883 char *temp = (char *)realloc(buf, len + 64); 884 if (temp == NULL) { 885 // Out of memory, however existing buffer remains allocated. 886 cnt = ENOMEM; 887 break; 888 } 889 len += 64; 890 buf = temp; 891 } 892 893 errorCode = in.Read (&c,1); // A really slow way of reading - unbuffered. 894 if (errorCode != 1) { 895 if (errorCode < 0) { 896 cnt = errorCode; // IO error encountered, just return the code. 897 } else { 898 // Really is end of file. Also make it end of line if there is 899 // some text already read in. If the first thing read was EOF, 900 // just return an empty string. 901 if (cnt > 0) { 902 buf[cnt++] = '\n'; 903 if (buf[cnt-2] == '\r') { 904 buf[cnt-2] = '\n'; 905 --cnt; 906 } 907 } 908 } 909 break; 910 } 911 912 buf[cnt++] = c; 913 914 if (c == '\n') { 915 // Convert CRLF end of line to just a LF. Do it before folding, in 916 // case we don't need to fold. 917 if (cnt >= 2 && buf[cnt-2] == '\r') { 918 buf[cnt-2] = '\n'; 919 --cnt; 920 } 921 // If the current line is empty then return it (so that empty lines 922 // don't disappear if the next line starts with a space). 923 if (cnt <= 1) 924 break; 925 // if first character on the next line is whitespace, fold lines 926 errorCode = in.Read(&c,1); 927 if (errorCode == 1) { 928 if (c == ' ' || c == '\t') 929 buf[cnt-1] = c; // Replace \n with the white space character. 930 else { 931 // Not folding, we finished reading a whole line. 932 in.Seek(-1,SEEK_CUR); // Undo the look-ahead character read. 933 break; 934 } 935 } else if (errorCode < 0) { 936 cnt = errorCode; 937 break; 938 } else // No next line; at the end of the file. Return the line. 939 break; 940 } 941 } 942 943 if (buf != NULL && cnt >= 0) 944 buf[cnt] = '\0'; 945 946 if (buffer) 947 *buffer = buf; 948 else if (buf) 949 free(buf); 950 951 if (buflen) 952 *buflen = len; 953 954 return cnt; 955 } 956 957 958 _EXPORT ssize_t 959 nextfoldedline(const char** header, char **buffer, size_t *buflen) 960 { 961 ssize_t len = buflen && *buflen ? *buflen : 0; 962 char * buf = buffer && *buffer ? *buffer : NULL; 963 ssize_t cnt = 0; // Number of characters currently in the buffer. 964 char c; 965 966 while (true) 967 { 968 // Make sure there is space in the buffer for two more characters (one 969 // for the next character, and one for the end of string NUL byte). 970 if (buf == NULL || cnt + 2 >= len) 971 { 972 char *temp = (char *)realloc(buf, len + 64); 973 if (temp == NULL) { 974 // Out of memory, however existing buffer remains allocated. 975 cnt = ENOMEM; 976 break; 977 } 978 len += 64; 979 buf = temp; 980 } 981 982 // Read the next character, or end of file. 983 if ((c = *(*header)++) == 0) { 984 // End of file. Also make it end of line if there is some text 985 // already read in. If the first thing read was EOF, just return 986 // an empty string. 987 if (cnt > 0) { 988 buf[cnt++] = '\n'; 989 if (buf[cnt-2] == '\r') { 990 buf[cnt-2] = '\n'; 991 --cnt; 992 } 993 } 994 break; 995 } 996 997 buf[cnt++] = c; 998 999 if (c == '\n') { 1000 // Convert CRLF end of line to just a LF. Do it before folding, in 1001 // case we don't need to fold. 1002 if (cnt >= 2 && buf[cnt-2] == '\r') { 1003 buf[cnt-2] = '\n'; 1004 --cnt; 1005 } 1006 // If the current line is empty then return it (so that empty lines 1007 // don't disappear if the next line starts with a space). 1008 if (cnt <= 1) 1009 break; 1010 // if first character on the next line is whitespace, fold lines 1011 c = *(*header)++; 1012 if (c == ' ' || c == '\t') 1013 buf[cnt-1] = c; // Replace \n with the white space character. 1014 else { 1015 // Not folding, we finished reading a line; break out of the loop 1016 (*header)--; // Undo read of the non-whitespace. 1017 break; 1018 } 1019 } 1020 } 1021 1022 1023 if (buf != NULL && cnt >= 0) 1024 buf[cnt] = '\0'; 1025 1026 if (buffer) 1027 *buffer = buf; 1028 else if (buf) 1029 free(buf); 1030 1031 if (buflen) 1032 *buflen = len; 1033 1034 return cnt; 1035 } 1036 1037 1038 _EXPORT void 1039 trim_white_space(BString &string) 1040 { 1041 int32 i; 1042 int32 length = string.Length(); 1043 char *buffer = string.LockBuffer(length + 1); 1044 1045 while (length > 0 && isspace(buffer[length - 1])) 1046 length--; 1047 buffer[length] = '\0'; 1048 1049 for (i = 0; buffer[i] && isspace(buffer[i]); i++) {} 1050 if (i != 0) { 1051 length -= i; 1052 memmove(buffer,buffer + i,length + 1); 1053 } 1054 string.UnlockBuffer(length); 1055 } 1056 1057 1058 /** Tries to return a human-readable name from the specified 1059 * header parameter (should be from "To:" or "From:"). 1060 * Tries to return the name rather than the eMail address. 1061 */ 1062 1063 _EXPORT void 1064 extract_address_name(BString &header) 1065 { 1066 BString name; 1067 const char *start = header.String(); 1068 const char *stop = start + strlen (start); 1069 1070 // Find a string S in the header (email foo) that matches: 1071 // Old style name in brackets: foo@bar.com (S) 1072 // New style quotes: "S" <foo@bar.com> 1073 // New style no quotes if nothing else found: S <foo@bar.com> 1074 // If nothing else found then use the whole thing: S 1075 1076 for (int i = 0; i <= 3; i++) { 1077 // Set p1 to the first letter in the name and p2 to just past the last 1078 // letter in the name. p2 stays NULL if a name wasn't found in this 1079 // pass. 1080 const char *p1 = NULL, *p2 = NULL; 1081 1082 switch (i) { 1083 case 0: // foo@bar.com (S) 1084 if ((p1 = strchr(start,'(')) != NULL) { 1085 p1++; // Advance to first letter in the name. 1086 size_t nest = 1; // Handle nested brackets. 1087 for (p2 = p1; p2 < stop; ++p2) 1088 { 1089 if (*p2 == ')') 1090 --nest; 1091 else if (*p2 == '(') 1092 ++nest; 1093 if (nest <= 0) 1094 break; 1095 } 1096 if (nest != 0) 1097 p2 = NULL; // False alarm, no terminating bracket. 1098 } 1099 break; 1100 case 1: // "S" <foo@bar.com> 1101 if ((p1 = strchr(start, '\"')) != NULL) 1102 p2 = strchr(++p1, '\"'); 1103 break; 1104 case 2: // S <foo@bar.com> 1105 p1 = start; 1106 if (name.Length() == 0) 1107 p2 = strchr(start, '<'); 1108 break; 1109 case 3: // S 1110 p1 = start; 1111 if (name.Length() == 0) 1112 p2 = stop; 1113 break; 1114 } 1115 1116 // Remove leading and trailing space-like characters and save the 1117 // result if it is longer than any other likely names found. 1118 if (p2 != NULL) { 1119 while (p1 < p2 && (isspace (*p1))) 1120 ++p1; 1121 1122 while (p1 < p2 && (isspace (p2[-1]))) 1123 --p2; 1124 1125 int newLength = p2 - p1; 1126 if (name.Length() < newLength) 1127 name.SetTo(p1, newLength); 1128 } 1129 } 1130 1131 int32 lessIndex = name.FindFirst('<'); 1132 int32 greaterIndex = name.FindLast('>'); 1133 1134 if (lessIndex == 0) { 1135 // Have an address of the form <address> and nothing else, so remove 1136 // the greater and less than signs, if any. 1137 if (greaterIndex > 0) 1138 name.Remove(greaterIndex, 1); 1139 name.Remove(lessIndex, 1); 1140 } else if (lessIndex > 0 && lessIndex < greaterIndex) { 1141 // Yahoo stupidly inserts the e-mail address into the name string, so 1142 // this bit of code fixes: "Joe <joe@yahoo.com>" <joe@yahoo.com> 1143 name.Remove(lessIndex, greaterIndex - lessIndex + 1); 1144 } 1145 1146 trim_white_space(name); 1147 header = name; 1148 } 1149 1150 1151 1152 // Given a subject in a BString, remove the extraneous RE: re: and other stuff 1153 // to get down to the core subject string, which should be identical for all 1154 // messages posted about a topic. The input string is modified in place to 1155 // become the output core subject string. 1156 1157 static int32 gLocker = 0; 1158 static size_t gNsub = 1; 1159 static re_pattern_buffer gRe; 1160 static re_pattern_buffer *gRebuf = NULL; 1161 static char gTranslation[256]; 1162 1163 _EXPORT void SubjectToThread (BString &string) 1164 { 1165 // a regex that matches a non-ASCII UTF8 character: 1166 #define U8C \ 1167 "[\302-\337][\200-\277]" \ 1168 "|\340[\302-\337][\200-\277]" \ 1169 "|[\341-\357][\200-\277][\200-\277]" \ 1170 "|\360[\220-\277][\200-\277][\200-\277]" \ 1171 "|[\361-\367][\200-\277][\200-\277][\200-\277]" \ 1172 "|\370[\210-\277][\200-\277][\200-\277][\200-\277]" \ 1173 "|[\371-\373][\200-\277][\200-\277][\200-\277][\200-\277]" \ 1174 "|\374[\204-\277][\200-\277][\200-\277][\200-\277][\200-\277]" \ 1175 "|\375[\200-\277][\200-\277][\200-\277][\200-\277][\200-\277]" 1176 1177 #define PATTERN \ 1178 "^ +" \ 1179 "|^(\\[[^]]*\\])(\\<| +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \ 1180 "|^( +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \ 1181 "| *\\(fwd\\) *$" 1182 1183 if (gRebuf == NULL && atomic_add(&gLocker,1) == 0) 1184 { 1185 // the idea is to compile the regexp once to speed up testing 1186 1187 for (int i=0; i<256; ++i) gTranslation[i]=i; 1188 for (int i='a'; i<='z'; ++i) gTranslation[i]=toupper(i); 1189 1190 gRe.translate = gTranslation; 1191 gRe.regs_allocated = REGS_FIXED; 1192 re_syntax_options = RE_SYNTAX_POSIX_EXTENDED; 1193 1194 const char *pattern = PATTERN; 1195 // count subexpressions in PATTERN 1196 for (unsigned int i=0; pattern[i] != 0; ++i) 1197 { 1198 if (pattern[i] == '\\') 1199 ++i; 1200 else if (pattern[i] == '(') 1201 ++gNsub; 1202 } 1203 1204 const char *err = re_compile_pattern(pattern,strlen(pattern),&gRe); 1205 if (err == NULL) 1206 gRebuf = &gRe; 1207 else 1208 fprintf(stderr, "Failed to compile the regex: %s\n", err); 1209 } 1210 else 1211 { 1212 int32 tries = 200; 1213 while (gRebuf == NULL && tries-- > 0) 1214 snooze(10000); 1215 } 1216 1217 if (gRebuf) 1218 { 1219 struct re_registers regs; 1220 // can't be static if this function is to be thread-safe 1221 1222 regs.num_regs = gNsub; 1223 regs.start = (regoff_t*)malloc(gNsub*sizeof(regoff_t)); 1224 regs.end = (regoff_t*)malloc(gNsub*sizeof(regoff_t)); 1225 1226 for (int start=0; 1227 (start=re_search(gRebuf, string.String(), string.Length(), 1228 0, string.Length(), ®s)) >= 0; 1229 ) 1230 { 1231 // 1232 // we found something 1233 // 1234 1235 // don't delete [bemaildaemon]... 1236 if (start == regs.start[1]) 1237 start = regs.start[2]; 1238 1239 string.Remove(start,regs.end[0]-start); 1240 if (start) string.Insert(' ',1,start); 1241 } 1242 1243 free(regs.start); 1244 free(regs.end); 1245 } 1246 1247 // Finally remove leading and trailing space. Some software, like 1248 // tm-edit 1.8, appends a space to the subject, which would break 1249 // threading if we left it in. 1250 trim_white_space(string); 1251 } 1252 1253 1254 1255 // Converts a date to a time. Handles numeric time zones too, unlike 1256 // parsedate. Returns -1 if it fails. 1257 1258 _EXPORT time_t ParseDateWithTimeZone (const char *DateString) 1259 { 1260 time_t currentTime; 1261 time_t dateAsTime; 1262 char tempDateString [80]; 1263 char tempZoneString [6]; 1264 time_t zoneDeltaTime; 1265 int zoneIndex; 1266 char *zonePntr; 1267 1268 // See if we can remove the time zone portion. parsedate understands time 1269 // zone 3 letter names, but doesn't understand the numeric +9999 time zone 1270 // format. To do: see if a newer parsedate exists. 1271 1272 strncpy (tempDateString, DateString, sizeof (tempDateString)); 1273 tempDateString[sizeof (tempDateString) - 1] = 0; 1274 1275 // Remove trailing spaces. 1276 zonePntr = tempDateString + strlen (tempDateString) - 1; 1277 while (zonePntr >= tempDateString && isspace (*zonePntr)) 1278 *zonePntr-- = 0; 1279 if (zonePntr < tempDateString) 1280 return -1; // Empty string. 1281 1282 // Remove the trailing time zone in round brackets, like in 1283 // Fri, 22 Feb 2002 15:22:42 EST (-0500) 1284 // Thu, 25 Apr 1996 11:44:19 -0400 (EDT) 1285 if (tempDateString[strlen(tempDateString)-1] == ')') 1286 { 1287 zonePntr = strrchr (tempDateString, '('); 1288 if (zonePntr != NULL) 1289 { 1290 *zonePntr-- = 0; // Zap the '(', then remove trailing spaces. 1291 while (zonePntr >= tempDateString && isspace (*zonePntr)) 1292 *zonePntr-- = 0; 1293 if (zonePntr < tempDateString) 1294 return -1; // Empty string. 1295 } 1296 } 1297 1298 // Look for a numeric time zone like Tue, 30 Dec 2003 05:01:40 +0000 1299 for (zoneIndex = strlen (tempDateString); zoneIndex >= 0; zoneIndex--) 1300 { 1301 zonePntr = tempDateString + zoneIndex; 1302 if (zonePntr[0] == '+' || zonePntr[0] == '-') 1303 { 1304 if (zonePntr[1] >= '0' && zonePntr[1] <= '9' && 1305 zonePntr[2] >= '0' && zonePntr[2] <= '9' && 1306 zonePntr[3] >= '0' && zonePntr[3] <= '9' && 1307 zonePntr[4] >= '0' && zonePntr[4] <= '9') 1308 break; 1309 } 1310 } 1311 if (zoneIndex >= 0) 1312 { 1313 // Remove the zone from the date string and any following time zone 1314 // letter codes. Also put in GMT so that the date gets parsed as GMT. 1315 memcpy (tempZoneString, zonePntr, 5); 1316 tempZoneString [5] = 0; 1317 strcpy (zonePntr, "GMT"); 1318 } 1319 else // No numeric time zone found. 1320 strcpy (tempZoneString, "+0000"); 1321 1322 time (¤tTime); 1323 dateAsTime = parsedate (tempDateString, currentTime); 1324 if (dateAsTime == (time_t) -1) 1325 return -1; // Failure. 1326 1327 zoneDeltaTime = 60 * atol (tempZoneString + 3); // Get the last two digits - minutes. 1328 tempZoneString[3] = 0; 1329 zoneDeltaTime += atol (tempZoneString + 1) * 60 * 60; // Get the first two digits - hours. 1330 if (tempZoneString[0] == '+') 1331 zoneDeltaTime = 0 - zoneDeltaTime; 1332 dateAsTime += zoneDeltaTime; 1333 1334 return dateAsTime; 1335 } 1336 1337 1338 /** Parses a mail header and fills the headers BMessage 1339 */ 1340 1341 _EXPORT status_t 1342 parse_header(BMessage &headers, BPositionIO &input) 1343 { 1344 char *buffer = NULL; 1345 size_t bufferSize = 0; 1346 int32 length; 1347 1348 while ((length = readfoldedline(input, &buffer, &bufferSize)) >= 2) { 1349 --length; 1350 // Don't include the \n at the end of the buffer. 1351 1352 // convert to UTF-8 and null-terminate the buffer 1353 length = rfc2047_to_utf8(&buffer, &bufferSize, length); 1354 buffer[length] = '\0'; 1355 1356 const char *delimiter = strstr(buffer, ":"); 1357 if (delimiter == NULL) 1358 continue; 1359 1360 BString header(buffer, delimiter - buffer); 1361 header.CapitalizeEachWord(); 1362 // unified case for later fetch 1363 1364 delimiter++; // Skip the colon. 1365 while (isspace (*delimiter)) 1366 delimiter++; // Skip over leading white space and tabs. To do: (comments in brackets). 1367 1368 // ToDo: implement joining of multiple header tags (i.e. multiple "Cc:"s) 1369 headers.AddString(header.String(), delimiter); 1370 } 1371 free(buffer); 1372 1373 return B_OK; 1374 } 1375 1376 1377 _EXPORT void 1378 extract_address(BString &address) 1379 { 1380 const char *string = address.String(); 1381 int32 first; 1382 1383 // first, remove all quoted text 1384 1385 if ((first = address.FindFirst('"')) >= 0) { 1386 int32 last = first + 1; 1387 while (string[last] && string[last] != '"') 1388 last++; 1389 1390 if (string[last] == '"') 1391 address.Remove(first, last + 1 - first); 1392 } 1393 1394 // try to extract the address now 1395 1396 if ((first = address.FindFirst('<')) >= 0) { 1397 // the world likes us and we can just get the address the easy way... 1398 int32 last = address.FindFirst('>'); 1399 if (last >= 0) { 1400 address.Truncate(last); 1401 address.Remove(0, first + 1); 1402 1403 return; 1404 } 1405 } 1406 1407 // then, see if there is anything in parenthesis to throw away 1408 1409 if ((first = address.FindFirst('(')) >= 0) { 1410 int32 last = first + 1; 1411 while (string[last] && string[last] != ')') 1412 last++; 1413 1414 if (string[last] == ')') 1415 address.Remove(first, last + 1 - first); 1416 } 1417 1418 // now, there shouldn't be much else left 1419 1420 trim_white_space(address); 1421 } 1422 1423 1424 _EXPORT void 1425 get_address_list(BList &list, const char *string, void (*cleanupFunc)(BString &)) 1426 { 1427 if (string == NULL || !string[0]) 1428 return; 1429 1430 const char *start = string; 1431 1432 while (true) { 1433 if (string[0] == '"') { 1434 const char *quoteEnd = ++string; 1435 1436 while (quoteEnd[0] && quoteEnd[0] != '"') 1437 quoteEnd++; 1438 1439 if (!quoteEnd[0]) // string exceeds line! 1440 quoteEnd = string; 1441 1442 string = quoteEnd + 1; 1443 } 1444 1445 if (string[0] == ',' || string[0] == '\0') { 1446 BString address(start, string - start); 1447 trim_white_space(address); 1448 1449 if (cleanupFunc) 1450 cleanupFunc(address); 1451 1452 list.AddItem(strdup(address.String())); 1453 1454 start = string + 1; 1455 } 1456 1457 if (!string[0]) 1458 break; 1459 1460 string++; 1461 } 1462 } 1463 1464