1 /* mail util - header parsing 2 ** 3 ** Copyright 2001-2003 Dr. Zoidberg Enterprises. All rights reserved. 4 */ 5 6 7 #include <UTF8.h> 8 #include <Message.h> 9 #include <String.h> 10 #include <Locker.h> 11 #include <DataIO.h> 12 #include <List.h> 13 14 #include <stdlib.h> 15 #include <string.h> 16 #include <stdio.h> 17 #define __USE_GNU 18 #include <regex.h> 19 #include <ctype.h> 20 #include <errno.h> 21 #include <parsedate.h> 22 23 #include <mail_encoding.h> 24 25 #include <mail_util.h> 26 27 #include <CharacterSet.h> 28 #include <CharacterSetRoster.h> 29 30 using namespace BPrivate; 31 32 #define CRLF "\r\n" 33 34 struct CharsetConversionEntry 35 { 36 const char *charset; 37 uint32 flavor; 38 }; 39 40 extern const CharsetConversionEntry mail_charsets [] = 41 { 42 // In order of authority, so when searching for the name for a particular 43 // numbered conversion, start at the beginning of the array. 44 {"iso-8859-1", B_ISO1_CONVERSION}, // MIME STANDARD 45 {"iso-8859-2", B_ISO2_CONVERSION}, // MIME STANDARD 46 {"iso-8859-3", B_ISO3_CONVERSION}, // MIME STANDARD 47 {"iso-8859-4", B_ISO4_CONVERSION}, // MIME STANDARD 48 {"iso-8859-5", B_ISO5_CONVERSION}, // MIME STANDARD 49 {"iso-8859-6", B_ISO6_CONVERSION}, // MIME STANDARD 50 {"iso-8859-7", B_ISO7_CONVERSION}, // MIME STANDARD 51 {"iso-8859-8", B_ISO8_CONVERSION}, // MIME STANDARD 52 {"iso-8859-9", B_ISO9_CONVERSION}, // MIME STANDARD 53 {"iso-8859-10", B_ISO10_CONVERSION}, // MIME STANDARD 54 {"iso-8859-13", B_ISO13_CONVERSION}, // MIME STANDARD 55 {"iso-8859-14", B_ISO14_CONVERSION}, // MIME STANDARD 56 {"iso-8859-15", B_ISO15_CONVERSION}, // MIME STANDARD 57 58 {"shift_jis", B_SJIS_CONVERSION}, // MIME STANDARD 59 {"shift-jis", B_SJIS_CONVERSION}, 60 {"iso-2022-jp", B_JIS_CONVERSION}, // MIME STANDARD 61 {"euc-jp", B_EUC_CONVERSION}, // MIME STANDARD 62 63 {"euc-kr", B_EUC_KR_CONVERSION}, // Shift encoding 7 bit and KSC-5601 if bit 8 is on. // MIME STANDARD 64 {"ksc5601", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE? 65 {"ks_c_5601-1987", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE with stupid MS software 66 67 {"koi8-r", B_KOI8R_CONVERSION}, // MIME STANDARD 68 {"windows-1251",B_MS_WINDOWS_1251_CONVERSION}, // MIME STANDARD 69 {"windows-1252",B_MS_WINDOWS_CONVERSION}, // MIME STANDARD 70 71 {"dos-437", B_MS_DOS_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM437? ) 72 {"dos-866", B_MS_DOS_866_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM866? ) 73 {"x-mac-roman", B_MAC_ROMAN_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( macintosh? + x-mac-roman? ) 74 75 {"big5", 24}, // MIME STANDARD 76 77 {"gb18030", 25}, // WRONG NAME : MIME STANDARD NAME = NONE ( GB18030? ) 78 {"gb2312", 25}, // COMPATIBLE 79 {"gbk", 25}, // COMPATIBLE 80 81 /* {"utf-16", B_UNICODE_CONVERSION}, Might not work due to NULs in text, needs testing. */ 82 {"us-ascii", B_MAIL_US_ASCII_CONVERSION}, // MIME STANDARD 83 {"utf-8", B_MAIL_UTF8_CONVERSION /* Special code for no conversion */}, // MIME STANDARD 84 85 {NULL, (uint32) -1} /* End of list marker, NULL string pointer is the key. */ 86 }; 87 88 89 // The next couple of functions are our wrapper around convert_to_utf8 and 90 // convert_from_utf8 so that they can also convert from UTF-8 to UTF-8 by 91 // specifying the B_MAIL_UTF8_CONVERSION constant as the conversion operation. It 92 // also lets us add new conversions, like B_MAIL_US_ASCII_CONVERSION. 93 94 _EXPORT status_t mail_convert_to_utf8 ( 95 uint32 srcEncoding, 96 const char *src, 97 int32 *srcLen, 98 char *dst, 99 int32 *dstLen, 100 int32 *state, 101 char substitute) 102 { 103 int32 copyAmount; 104 char *originalDst = dst; 105 status_t returnCode = -1; 106 107 if (srcEncoding == B_MAIL_UTF8_CONVERSION) { 108 copyAmount = *srcLen; 109 if (*dstLen < copyAmount) 110 copyAmount = *dstLen; 111 memcpy (dst, src, copyAmount); 112 *srcLen = copyAmount; 113 *dstLen = copyAmount; 114 returnCode = B_OK; 115 } else if (srcEncoding == B_MAIL_US_ASCII_CONVERSION) { 116 int32 i; 117 unsigned char letter; 118 copyAmount = *srcLen; 119 if (*dstLen < copyAmount) 120 copyAmount = *dstLen; 121 for (i = 0; i < copyAmount; i++) { 122 letter = *src++; 123 if (letter > 0x80U) 124 // Invalid, could also use substitute, but better to strip high bit. 125 *dst++ = letter - 0x80U; 126 else if (letter == 0x80U) 127 // Can't convert to 0x00 since that's NUL, which would cause problems. 128 *dst++ = substitute; 129 else 130 *dst++ = letter; 131 } 132 *srcLen = copyAmount; 133 *dstLen = copyAmount; 134 returnCode = B_OK; 135 } else 136 returnCode = convert_to_utf8 (srcEncoding, src, srcLen, 137 dst, dstLen, state, substitute); 138 139 if (returnCode == B_OK) { 140 // Replace spurious NUL bytes, which should normally not be in the 141 // output of the decoding (not normal UTF-8 characters, and no NULs are 142 // in our usual input strings). They happen for some odd ISO-2022-JP 143 // byte pair combinations which are improperly handled by the BeOS 144 // routines. Like "\e$ByD\e(B" where \e is the ESC character $1B, the 145 // first ESC $ B switches to a Japanese character set, then the next 146 // two bytes "yD" specify a character, then ESC ( B switches back to 147 // the ASCII character set. The UTF-8 conversion yields a NUL byte. 148 int32 i; 149 for (i = 0; i < *dstLen; i++) 150 if (originalDst[i] == 0) 151 originalDst[i] = substitute; 152 } 153 return returnCode; 154 } 155 156 157 _EXPORT status_t mail_convert_from_utf8 ( 158 uint32 dstEncoding, 159 const char *src, 160 int32 *srcLen, 161 char *dst, 162 int32 *dstLen, 163 int32 *state, 164 char substitute) 165 { 166 int32 copyAmount; 167 status_t errorCode; 168 int32 originalDstLen = *dstLen; 169 int32 tempDstLen; 170 int32 tempSrcLen; 171 172 if (dstEncoding == B_MAIL_UTF8_CONVERSION) 173 { 174 copyAmount = *srcLen; 175 if (*dstLen < copyAmount) 176 copyAmount = *dstLen; 177 memcpy (dst, src, copyAmount); 178 *srcLen = copyAmount; 179 *dstLen = copyAmount; 180 return B_OK; 181 } 182 183 if (dstEncoding == B_MAIL_US_ASCII_CONVERSION) 184 { 185 int32 characterLength; 186 int32 dstRemaining = *dstLen; 187 unsigned char letter; 188 int32 srcRemaining = *srcLen; 189 190 // state contains the number of source bytes to skip, left over from a 191 // partial UTF-8 character split over the end of the buffer from last 192 // time. 193 if (srcRemaining <= *state) { 194 *state -= srcRemaining; 195 *dstLen = 0; 196 return B_OK; 197 } 198 srcRemaining -= *state; 199 src += *state; 200 *state = 0; 201 202 while (true) { 203 if (srcRemaining <= 0 || dstRemaining <= 0) 204 break; 205 letter = *src; 206 if (letter < 0x80) 207 characterLength = 1; // Regular ASCII equivalent code. 208 else if (letter < 0xC0) 209 characterLength = 1; // Invalid in-between data byte 10xxxxxx. 210 else if (letter < 0xE0) 211 characterLength = 2; 212 else if (letter < 0xF0) 213 characterLength = 3; 214 else if (letter < 0xF8) 215 characterLength = 4; 216 else if (letter < 0xFC) 217 characterLength = 5; 218 else if (letter < 0xFE) 219 characterLength = 6; 220 else 221 characterLength = 1; // 0xFE and 0xFF are invalid in UTF-8. 222 if (letter < 0x80) 223 *dst++ = *src; 224 else 225 *dst++ = substitute; 226 dstRemaining--; 227 if (srcRemaining < characterLength) { 228 // Character split past the end of the buffer. 229 *state = characterLength - srcRemaining; 230 srcRemaining = 0; 231 } else { 232 src += characterLength; 233 srcRemaining -= characterLength; 234 } 235 } 236 // Update with the amounts used. 237 *srcLen = *srcLen - srcRemaining; 238 *dstLen = *dstLen - dstRemaining; 239 return B_OK; 240 } 241 242 errorCode = convert_from_utf8 (dstEncoding, src, srcLen, dst, dstLen, state, substitute); 243 if (errorCode != B_OK) 244 return errorCode; 245 246 if (dstEncoding != B_JIS_CONVERSION) 247 return B_OK; 248 249 // B_JIS_CONVERSION (ISO-2022-JP) works by shifting between different 250 // character subsets. For E-mail headers (and other uses), it needs to be 251 // switched back to ASCII at the end (otherwise the last character gets 252 // lost or other weird things happen in the headers). Note that we can't 253 // just append the escape code since the convert_from_utf8 "state" will be 254 // wrong. So we append an ASCII letter and throw it away, leaving just the 255 // escape code. Well, it actually switches to the Roman character set, not 256 // ASCII, but that should be OK. 257 258 tempDstLen = originalDstLen - *dstLen; 259 if (tempDstLen < 3) // Not enough space remaining in the output. 260 return B_OK; // Sort of an error, but we did convert the rest OK. 261 tempSrcLen = 1; 262 errorCode = convert_from_utf8 (dstEncoding, "a", &tempSrcLen, 263 dst + *dstLen, &tempDstLen, state, substitute); 264 if (errorCode != B_OK) 265 return errorCode; 266 *dstLen += tempDstLen - 1 /* don't include the ASCII letter */; 267 return B_OK; 268 } 269 270 271 272 static int handle_non_rfc2047_encoding(char **buffer,size_t *bufferLength,size_t *sourceLength) 273 { 274 char *string = *buffer; 275 int32 length = *sourceLength; 276 int32 i; 277 278 // check for 8-bit characters 279 for (i = 0;i < length;i++) 280 if (string[i] & 0x80) 281 break; 282 if (i == length) 283 return false; 284 285 // check for groups of 8-bit characters - this code is not very smart; 286 // it just can detect some sort of single-byte encoded stuff, the rest 287 // is regarded as UTF-8 288 289 int32 singletons = 0,doubles = 0; 290 291 for (i = 0;i < length;i++) 292 { 293 if (string[i] & 0x80) 294 { 295 if ((string[i + 1] & 0x80) == 0) 296 singletons++; 297 else doubles++; 298 i++; 299 } 300 } 301 302 if (singletons != 0) // can't be valid UTF-8 anymore, so we assume ISO-Latin-1 303 { 304 int32 state = 0; 305 // just to be sure 306 int32 destLength = length * 4 + 1; 307 int32 destBufferLength = destLength; 308 char *dest = (char *)malloc(destLength); 309 if (dest == NULL) 310 return 0; 311 312 if (convert_to_utf8(B_ISO1_CONVERSION,string,&length,dest,&destLength,&state) == B_OK) 313 { 314 free(*buffer); 315 *buffer = dest; 316 *bufferLength = destBufferLength; 317 *sourceLength = destLength; 318 return true; 319 } 320 free(dest); 321 return false; 322 } 323 324 // we assume a valid UTF-8 string here, but yes, we don't check it 325 return true; 326 } 327 328 329 _EXPORT ssize_t rfc2047_to_utf8(char **bufp, size_t *bufLen, size_t strLen) 330 { 331 char *head, *tail; 332 char *charset, *encoding, *end; 333 ssize_t ret = B_OK; 334 335 if (bufp == NULL || *bufp == NULL) 336 return -1; 337 338 char *string = *bufp; 339 340 //---------Handle *&&^%*&^ non-RFC compliant, 8bit mail 341 if (handle_non_rfc2047_encoding(bufp,bufLen,&strLen)) 342 return strLen; 343 344 // set up string length 345 if (strLen == 0) 346 strLen = strlen(*bufp); 347 char lastChar = (*bufp)[strLen]; 348 (*bufp)[strLen] = '\0'; 349 350 //---------Whew! Now for RFC compliant mail 351 bool encodedWordFoundPreviously = false; 352 for (head = tail = string; 353 ((charset = strstr(tail, "=?")) != NULL) 354 && (((encoding = strchr(charset + 2, '?')) != NULL) 355 && encoding[1] && (encoding[2] == '?') && encoding[3]) 356 && (end = strstr(encoding + 3, "?=")) != NULL; 357 // found "=?...charset...?e?...text...?= (e == encoding) 358 // ^charset ^encoding ^end 359 tail = end) 360 { 361 // Copy non-encoded text (from tail up to charset) to the output. 362 // Ignore spaces between two encoded "words". RFC2047 says the words 363 // should be concatenated without the space (designed for Asian 364 // sentences which have no spaces yet need to be broken into "words" to 365 // keep within the line length limits). 366 bool nonSpaceFound = false; 367 for (int i = 0; i < charset-tail; i++) { 368 if (!isspace (tail[i])) { 369 nonSpaceFound = true; 370 break; 371 } 372 } 373 if (!encodedWordFoundPreviously || nonSpaceFound) { 374 if (string != tail && tail != charset) 375 memmove(string, tail, charset-tail); 376 string += charset-tail; 377 } 378 tail = charset; 379 encodedWordFoundPreviously = true; 380 381 // move things to point at what they should: 382 // =?...charset...?e?...text...?= (e == encoding) 383 // ^charset ^encoding ^end 384 charset += 2; 385 encoding += 1; 386 end += 2; 387 388 // find the charset this text is in now 389 size_t cLen = encoding - 1 - charset; 390 bool base64encoded = toupper(*encoding) == 'B'; 391 392 uint32 convert_id = B_MAIL_NULL_CONVERSION; 393 char charset_string[cLen+1]; 394 memcpy(charset_string, charset, cLen); 395 charset_string[cLen] = '\0'; 396 if (strcasecmp(charset_string, "us-ascii") == 0) { 397 convert_id = B_MAIL_US_ASCII_CONVERSION; 398 } else if (strcasecmp(charset_string, "utf-8") == 0) { 399 convert_id = B_MAIL_UTF8_CONVERSION; 400 } else { 401 const BCharacterSet * cs = BCharacterSetRoster::FindCharacterSetByName(charset_string); 402 if (cs != NULL) { 403 convert_id = cs->GetConversionID(); 404 } 405 } 406 if (convert_id == B_MAIL_NULL_CONVERSION) 407 { 408 // unidentified charset 409 // what to do? doing nothing skips the encoded text; 410 // but we should keep it: we copy it to the output. 411 if (string != tail && tail != end) 412 memmove(string, tail, end-tail); 413 string += end-tail; 414 continue; 415 } 416 // else we've successfully identified the charset 417 418 char *src = encoding+2; 419 int32 srcLen = end - 2 - src; 420 // encoded text: src..src+srcLen 421 422 // decode text, get decoded length (reducing xforms) 423 srcLen = !base64encoded ? decode_qp(src, src, srcLen, 1) 424 : decode_base64(src, src, srcLen); 425 426 // allocate space for the converted text 427 int32 dstLen = end-string + *bufLen-strLen; 428 char *dst = (char*)malloc(dstLen); 429 int32 cvLen = srcLen; 430 int32 convState = 0; 431 432 // 433 // do the conversion 434 // 435 ret = mail_convert_to_utf8(convert_id, src, &cvLen, dst, &dstLen, &convState); 436 if (ret != B_OK) 437 { 438 // what to do? doing nothing skips the encoded text 439 // but we should keep it: we copy it to the output. 440 441 free(dst); 442 443 if (string != tail && tail != end) 444 memmove(string, tail, end-tail); 445 string += end-tail; 446 continue; 447 } 448 /* convert_to_ is either returning something wrong or my 449 test data is screwed up. Whatever it is, Not Enough 450 Space is not the only cause of the below, so we just 451 assume it succeeds if it converts anything at all. 452 else if (cvLen < srcLen) 453 { 454 // not enough room to convert the data; 455 // grow *buf and retry 456 457 free(dst); 458 459 char *temp = (char*)realloc(*bufp, 2*(*bufLen + 1)); 460 if (temp == NULL) 461 { 462 ret = B_NO_MEMORY; 463 break; 464 } 465 466 *bufp = temp; 467 *bufLen = 2*(*bufLen + 1); 468 469 string = *bufp + (string-head); 470 tail = *bufp + (tail-head); 471 charset = *bufp + (charset-head); 472 encoding = *bufp + (encoding-head); 473 end = *bufp + (end-head); 474 src = *bufp + (src-head); 475 head = *bufp; 476 continue; 477 } 478 */ 479 else 480 { 481 if (dstLen > end-string) 482 { 483 // copy the string forward... 484 memmove(string+dstLen, end, strLen - (end-head) + 1); 485 strLen += string+dstLen - end; 486 end = string + dstLen; 487 } 488 489 memcpy(string, dst, dstLen); 490 string += dstLen; 491 free(dst); 492 continue; 493 } 494 } 495 496 // copy everything that's left 497 size_t tailLen = strLen - (tail - head); 498 memmove(string, tail, tailLen+1); 499 string += tailLen; 500 501 // replace the last char 502 (*bufp)[strLen] = lastChar; 503 504 return ret < B_OK ? ret : string-head; 505 } 506 507 508 _EXPORT ssize_t utf8_to_rfc2047 (char **bufp, ssize_t length, uint32 charset, char encoding) { 509 struct word { 510 BString originalWord; 511 BString convertedWord; 512 bool needsEncoding; 513 514 // Convert the word from UTF-8 to the desired character set. The 515 // converted version also includes the escape codes to return to ASCII 516 // mode, if relevant. Also note if it uses unprintable characters, 517 // which means it will need that special encoding treatment later. 518 void ConvertWordToCharset (uint32 charset) { 519 int32 state = 0; 520 int32 originalLength = originalWord.Length(); 521 int32 convertedLength = originalLength * 5 + 1; 522 char *convertedBuffer = convertedWord.LockBuffer (convertedLength); 523 mail_convert_from_utf8 (charset, originalWord.String(), 524 &originalLength, convertedBuffer, &convertedLength, &state); 525 for (int i = 0; i < convertedLength; i++) { 526 if ((convertedBuffer[i] & (1 << 7)) || 527 (convertedBuffer[i] >= 0 && convertedBuffer[i] < 32)) { 528 needsEncoding = true; 529 break; 530 } 531 } 532 convertedWord.UnlockBuffer (convertedLength); 533 }; 534 }; 535 struct word *currentWord; 536 BList words; 537 538 // Break the header into words. White space characters (including tabs and 539 // newlines) separate the words. Each word includes any space before it as 540 // part of the word. Actually, quotes and other special characters 541 // (",()<>@) are treated as separate words of their own so that they don't 542 // get encoded (because MIME headers get the quotes parsed before character 543 // set unconversion is done). The reader is supposed to ignore all white 544 // space between encoded words, which can be inserted so that older mail 545 // parsers don't have overly long line length problems. 546 547 const char *source = *bufp; 548 const char *bufEnd = *bufp + length; 549 const char *specialChars = "\"()<>@,"; 550 551 while (source < bufEnd) { 552 currentWord = new struct word; 553 currentWord->needsEncoding = false; 554 555 int wordEnd = 0; 556 557 // Include leading spaces as part of the word. 558 while (source + wordEnd < bufEnd && isspace (source[wordEnd])) 559 wordEnd++; 560 561 if (source + wordEnd < bufEnd && 562 strchr (specialChars, source[wordEnd]) != NULL) { 563 // Got a quote mark or other special character, which is treated as 564 // a word in itself since it shouldn't be encoded, which would hide 565 // it from the mail system. 566 wordEnd++; 567 } else { 568 // Find the end of the word. Leave wordEnd pointing just after the 569 // last character in the word. 570 while (source + wordEnd < bufEnd) { 571 if (isspace(source[wordEnd]) || 572 strchr (specialChars, source[wordEnd]) != NULL) 573 break; 574 if (wordEnd > 51 /* Makes Base64 ISO-2022-JP "word" a multiple of 4 bytes */ && 575 0xC0 == (0xC0 & (unsigned int) source[wordEnd])) { 576 // No English words are that long (46 is the longest), 577 // break up what is likely Asian text (which has no spaces) 578 // at the start of the next non-ASCII UTF-8 character (high 579 // two bits are both ones). Note that two encoded words in 580 // a row get joined together, even if there is a space 581 // between them in the final output text, according to the 582 // standard. Next word will also be conveniently get 583 // encoded due to the 0xC0 test. 584 currentWord->needsEncoding = true; 585 break; 586 } 587 wordEnd++; 588 } 589 } 590 currentWord->originalWord.SetTo (source, wordEnd); 591 currentWord->ConvertWordToCharset (charset); 592 words.AddItem(currentWord); 593 source += wordEnd; 594 } 595 596 // Combine adjacent words which contain unprintable text so that the 597 // overhead of switching back and forth between regular text and specially 598 // encoded text is reduced. However, the combined word must be shorter 599 // than the maximum of 75 bytes, including character set specification and 600 // all those delimiters (worst case 22 bytes of overhead). 601 602 struct word *run; 603 604 for (int32 i = 0; (currentWord = (struct word *) words.ItemAt (i)) != NULL; i++) { 605 if (!currentWord->needsEncoding) 606 continue; // No need to combine unencoded words. 607 for (int32 g = i+1; (run = (struct word *) words.ItemAt (g)) != NULL; g++) { 608 if (!run->needsEncoding) 609 break; // Don't want to combine encoded and unencoded words. 610 if ((currentWord->convertedWord.Length() + run->convertedWord.Length() <= 53)) { 611 currentWord->originalWord.Append (run->originalWord); 612 currentWord->ConvertWordToCharset (charset); 613 words.RemoveItem(g); 614 delete run; 615 g--; 616 } else // Can't merge this word, result would be too long. 617 break; 618 } 619 } 620 621 // Combine the encoded and unencoded words into one line, doing the 622 // quoted-printable or base64 encoding. Insert an extra space between 623 // words which are both encoded to make word wrapping easier, since there 624 // is normally none, and you're allowed to insert space (the receiver 625 // throws it away if it is between encoded words). 626 627 BString rfc2047; 628 bool previousWordNeededEncoding = false; 629 630 const char *charset_dec = "none-bug"; 631 for (int32 i = 0; mail_charsets[i].charset != NULL; i++) { 632 if (mail_charsets[i].flavor == charset) { 633 charset_dec = mail_charsets[i].charset; 634 break; 635 } 636 } 637 638 while ((currentWord = (struct word *)words.RemoveItem(0L)) != NULL) { 639 if ((encoding != quoted_printable && encoding != base64) || 640 !currentWord->needsEncoding) { 641 rfc2047.Append (currentWord->convertedWord); 642 } else { 643 // This word needs encoding. Try to insert a space between it and 644 // the previous word. 645 if (previousWordNeededEncoding) 646 rfc2047 << ' '; // Can insert as many spaces as you want between encoded words. 647 else { 648 // Previous word is not encoded, spaces are significant. Try 649 // to move a space from the start of this word to be outside of 650 // the encoded text, so that there is a bit of space between 651 // this word and the previous one to enhance word wrapping 652 // chances later on. 653 if (currentWord->originalWord.Length() > 1 && 654 isspace (currentWord->originalWord[0])) { 655 rfc2047 << currentWord->originalWord[0]; 656 currentWord->originalWord.Remove (0 /* offset */, 1 /* length */); 657 currentWord->ConvertWordToCharset (charset); 658 } 659 } 660 661 char *encoded = NULL; 662 ssize_t encoded_len = 0; 663 int32 convertedLength = currentWord->convertedWord.Length (); 664 const char *convertedBuffer = currentWord->convertedWord.String (); 665 666 switch (encoding) { 667 case quoted_printable: 668 encoded = (char *) malloc (convertedLength * 3); 669 encoded_len = encode_qp (encoded, convertedBuffer, convertedLength, true /* headerMode */); 670 break; 671 case base64: 672 encoded = (char *) malloc (convertedLength * 2); 673 encoded_len = encode_base64 (encoded, convertedBuffer, convertedLength, true /* headerMode */); 674 break; 675 default: // Unknown encoding type, shouldn't happen. 676 encoded = (char *) convertedBuffer; 677 encoded_len = convertedLength; 678 break; 679 } 680 681 rfc2047 << "=?" << charset_dec << '?' << encoding << '?'; 682 rfc2047.Append (encoded, encoded_len); 683 rfc2047 << "?="; 684 685 if (encoding == quoted_printable || encoding == base64) 686 free(encoded); 687 } 688 previousWordNeededEncoding = currentWord->needsEncoding; 689 delete currentWord; 690 } 691 692 free(*bufp); 693 694 ssize_t finalLength = rfc2047.Length (); 695 *bufp = (char *) (malloc (finalLength + 1)); 696 memcpy (*bufp, rfc2047.String(), finalLength); 697 (*bufp)[finalLength] = 0; 698 699 return finalLength; 700 } 701 702 703 //==================================================================== 704 705 void FoldLineAtWhiteSpaceAndAddCRLF (BString &string) 706 { 707 int inputLength = string.Length(); 708 int lineStartIndex; 709 const int maxLineLength = 78; // Doesn't include CRLF. 710 BString output; 711 int splitIndex; 712 int tempIndex; 713 714 lineStartIndex = 0; 715 while (true) { 716 // If we don't need to wrap the text, just output the remainder, if any. 717 718 if (lineStartIndex + maxLineLength >= inputLength) { 719 if (lineStartIndex < inputLength) { 720 output.Insert (string, lineStartIndex /* source offset */, 721 inputLength - lineStartIndex /* count */, 722 output.Length() /* insert at */); 723 output.Append (CRLF); 724 } 725 break; 726 } 727 728 // Look ahead for a convenient spot to split it, between a comma and 729 // space, which you often see between e-mail addresses like this: 730 // "Joe Who" joe@dot.com, "Someone Else" else@blot.com 731 732 tempIndex = lineStartIndex + maxLineLength; 733 if (tempIndex > inputLength) 734 tempIndex = inputLength; 735 splitIndex = string.FindLast (", ", tempIndex); 736 if (splitIndex >= lineStartIndex) 737 splitIndex++; // Point to the space character. 738 739 // If none of those exist, try splitting at any white space. 740 741 if (splitIndex <= lineStartIndex) 742 splitIndex = string.FindLast (" ", tempIndex); 743 if (splitIndex <= lineStartIndex) 744 splitIndex = string.FindLast ("\t", tempIndex); 745 746 // If none of those exist, allow for a longer word - split at the next 747 // available white space. 748 749 if (splitIndex <= lineStartIndex) 750 splitIndex = string.FindFirst (" ", lineStartIndex + 1); 751 if (splitIndex <= lineStartIndex) 752 splitIndex = string.FindFirst ("\t", lineStartIndex + 1); 753 754 // Give up, the whole rest of the line can't be split, just dump it 755 // out. 756 757 if (splitIndex <= lineStartIndex) { 758 if (lineStartIndex < inputLength) { 759 output.Insert (string, lineStartIndex /* source offset */, 760 inputLength - lineStartIndex /* count */, 761 output.Length() /* insert at */); 762 output.Append (CRLF); 763 } 764 break; 765 } 766 767 // Do the split. The current line up to but not including the space 768 // gets output, followed by a CRLF. The space remains to become the 769 // start of the next line (and that tells the message reader that it is 770 // a continuation line). 771 772 output.Insert (string, lineStartIndex /* source offset */, 773 splitIndex - lineStartIndex /* count */, 774 output.Length() /* insert at */); 775 output.Append (CRLF); 776 lineStartIndex = splitIndex; 777 } 778 string.SetTo (output); 779 } 780 781 782 //==================================================================== 783 784 _EXPORT ssize_t readfoldedline(FILE *file, char **buffer, size_t *buflen) 785 { 786 ssize_t len = buflen && *buflen ? *buflen : 0; 787 char * buf = buffer && *buffer ? *buffer : NULL; 788 ssize_t cnt = 0; // Number of characters currently in the buffer. 789 int c; 790 791 while (true) 792 { 793 // Make sure there is space in the buffer for two more characters (one 794 // for the next character, and one for the end of string NUL byte). 795 if (buf == NULL || cnt + 2 >= len) 796 { 797 char *temp = (char *)realloc(buf, len + 64); 798 if (temp == NULL) { 799 // Out of memory, however existing buffer remains allocated. 800 cnt = ENOMEM; 801 break; 802 } 803 len += 64; 804 buf = temp; 805 } 806 807 // Read the next character, or end of file, or IO error. 808 if ((c = fgetc(file)) == EOF) { 809 if (ferror (file)) { 810 cnt = errno; 811 if (cnt >= 0) 812 cnt = -1; // Error codes must be negative. 813 } else { 814 // Really is end of file. Also make it end of line if there is 815 // some text already read in. If the first thing read was EOF, 816 // just return an empty string. 817 if (cnt > 0) { 818 buf[cnt++] = '\n'; 819 if (buf[cnt-2] == '\r') { 820 buf[cnt-2] = '\n'; 821 --cnt; 822 } 823 } 824 } 825 break; 826 } 827 828 buf[cnt++] = c; 829 830 if (c == '\n') { 831 // Convert CRLF end of line to just a LF. Do it before folding, in 832 // case we don't need to fold. 833 if (cnt >= 2 && buf[cnt-2] == '\r') { 834 buf[cnt-2] = '\n'; 835 --cnt; 836 } 837 // If the current line is empty then return it (so that empty lines 838 // don't disappear if the next line starts with a space). 839 if (cnt <= 1) 840 break; 841 // Fold if first character on the next line is whitespace. 842 c = fgetc(file); // Note it's OK to read EOF and ungetc it too. 843 if (c == ' ' || c == '\t') 844 buf[cnt-1] = c; // Replace \n with the white space character. 845 else { 846 // Not folding, we finished reading a line; break out of the loop 847 ungetc(c,file); 848 break; 849 } 850 } 851 } 852 853 854 if (buf != NULL && cnt >= 0) 855 buf[cnt] = '\0'; 856 857 if (buffer) 858 *buffer = buf; 859 else if (buf) 860 free(buf); 861 862 if (buflen) 863 *buflen = len; 864 865 return cnt; 866 } 867 868 869 //==================================================================== 870 871 _EXPORT ssize_t readfoldedline(BPositionIO &in, char **buffer, size_t *buflen) 872 { 873 ssize_t len = buflen && *buflen ? *buflen : 0; 874 char * buf = buffer && *buffer ? *buffer : NULL; 875 ssize_t cnt = 0; // Number of characters currently in the buffer. 876 char c; 877 status_t errorCode; 878 879 while (true) 880 { 881 // Make sure there is space in the buffer for two more characters (one 882 // for the next character, and one for the end of string NUL byte). 883 if (buf == NULL || cnt + 2 >= len) 884 { 885 char *temp = (char *)realloc(buf, len + 64); 886 if (temp == NULL) { 887 // Out of memory, however existing buffer remains allocated. 888 cnt = ENOMEM; 889 break; 890 } 891 len += 64; 892 buf = temp; 893 } 894 895 errorCode = in.Read (&c,1); // A really slow way of reading - unbuffered. 896 if (errorCode != 1) { 897 if (errorCode < 0) { 898 cnt = errorCode; // IO error encountered, just return the code. 899 } else { 900 // Really is end of file. Also make it end of line if there is 901 // some text already read in. If the first thing read was EOF, 902 // just return an empty string. 903 if (cnt > 0) { 904 buf[cnt++] = '\n'; 905 if (buf[cnt-2] == '\r') { 906 buf[cnt-2] = '\n'; 907 --cnt; 908 } 909 } 910 } 911 break; 912 } 913 914 buf[cnt++] = c; 915 916 if (c == '\n') { 917 // Convert CRLF end of line to just a LF. Do it before folding, in 918 // case we don't need to fold. 919 if (cnt >= 2 && buf[cnt-2] == '\r') { 920 buf[cnt-2] = '\n'; 921 --cnt; 922 } 923 // If the current line is empty then return it (so that empty lines 924 // don't disappear if the next line starts with a space). 925 if (cnt <= 1) 926 break; 927 // if first character on the next line is whitespace, fold lines 928 errorCode = in.Read(&c,1); 929 if (errorCode == 1) { 930 if (c == ' ' || c == '\t') 931 buf[cnt-1] = c; // Replace \n with the white space character. 932 else { 933 // Not folding, we finished reading a whole line. 934 in.Seek(-1,SEEK_CUR); // Undo the look-ahead character read. 935 break; 936 } 937 } else if (errorCode < 0) { 938 cnt = errorCode; 939 break; 940 } else // No next line; at the end of the file. Return the line. 941 break; 942 } 943 } 944 945 if (buf != NULL && cnt >= 0) 946 buf[cnt] = '\0'; 947 948 if (buffer) 949 *buffer = buf; 950 else if (buf) 951 free(buf); 952 953 if (buflen) 954 *buflen = len; 955 956 return cnt; 957 } 958 959 960 _EXPORT ssize_t 961 nextfoldedline(const char** header, char **buffer, size_t *buflen) 962 { 963 ssize_t len = buflen && *buflen ? *buflen : 0; 964 char * buf = buffer && *buffer ? *buffer : NULL; 965 ssize_t cnt = 0; // Number of characters currently in the buffer. 966 char c; 967 968 while (true) 969 { 970 // Make sure there is space in the buffer for two more characters (one 971 // for the next character, and one for the end of string NUL byte). 972 if (buf == NULL || cnt + 2 >= len) 973 { 974 char *temp = (char *)realloc(buf, len + 64); 975 if (temp == NULL) { 976 // Out of memory, however existing buffer remains allocated. 977 cnt = ENOMEM; 978 break; 979 } 980 len += 64; 981 buf = temp; 982 } 983 984 // Read the next character, or end of file. 985 if ((c = *(*header)++) == 0) { 986 // End of file. Also make it end of line if there is some text 987 // already read in. If the first thing read was EOF, just return 988 // an empty string. 989 if (cnt > 0) { 990 buf[cnt++] = '\n'; 991 if (buf[cnt-2] == '\r') { 992 buf[cnt-2] = '\n'; 993 --cnt; 994 } 995 } 996 break; 997 } 998 999 buf[cnt++] = c; 1000 1001 if (c == '\n') { 1002 // Convert CRLF end of line to just a LF. Do it before folding, in 1003 // case we don't need to fold. 1004 if (cnt >= 2 && buf[cnt-2] == '\r') { 1005 buf[cnt-2] = '\n'; 1006 --cnt; 1007 } 1008 // If the current line is empty then return it (so that empty lines 1009 // don't disappear if the next line starts with a space). 1010 if (cnt <= 1) 1011 break; 1012 // if first character on the next line is whitespace, fold lines 1013 c = *(*header)++; 1014 if (c == ' ' || c == '\t') 1015 buf[cnt-1] = c; // Replace \n with the white space character. 1016 else { 1017 // Not folding, we finished reading a line; break out of the loop 1018 (*header)--; // Undo read of the non-whitespace. 1019 break; 1020 } 1021 } 1022 } 1023 1024 1025 if (buf != NULL && cnt >= 0) 1026 buf[cnt] = '\0'; 1027 1028 if (buffer) 1029 *buffer = buf; 1030 else if (buf) 1031 free(buf); 1032 1033 if (buflen) 1034 *buflen = len; 1035 1036 return cnt; 1037 } 1038 1039 1040 _EXPORT void 1041 trim_white_space(BString &string) 1042 { 1043 int32 i; 1044 int32 length = string.Length(); 1045 char *buffer = string.LockBuffer(length + 1); 1046 1047 while (length > 0 && isspace(buffer[length - 1])) 1048 length--; 1049 buffer[length] = '\0'; 1050 1051 for (i = 0; buffer[i] && isspace(buffer[i]); i++) {} 1052 if (i != 0) { 1053 length -= i; 1054 memmove(buffer,buffer + i,length + 1); 1055 } 1056 string.UnlockBuffer(length); 1057 } 1058 1059 1060 /** Tries to return a human-readable name from the specified 1061 * header parameter (should be from "To:" or "From:"). 1062 * Tries to return the name rather than the eMail address. 1063 */ 1064 1065 _EXPORT void 1066 extract_address_name(BString &header) 1067 { 1068 BString name; 1069 const char *start = header.String(); 1070 const char *stop = start + strlen (start); 1071 1072 // Find a string S in the header (email foo) that matches: 1073 // Old style name in brackets: foo@bar.com (S) 1074 // New style quotes: "S" <foo@bar.com> 1075 // New style no quotes if nothing else found: S <foo@bar.com> 1076 // If nothing else found then use the whole thing: S 1077 1078 for (int i = 0; i <= 3; i++) { 1079 // Set p1 to the first letter in the name and p2 to just past the last 1080 // letter in the name. p2 stays NULL if a name wasn't found in this 1081 // pass. 1082 const char *p1 = NULL, *p2 = NULL; 1083 1084 switch (i) { 1085 case 0: // foo@bar.com (S) 1086 if ((p1 = strchr(start,'(')) != NULL) { 1087 p1++; // Advance to first letter in the name. 1088 size_t nest = 1; // Handle nested brackets. 1089 for (p2 = p1; p2 < stop; ++p2) 1090 { 1091 if (*p2 == ')') 1092 --nest; 1093 else if (*p2 == '(') 1094 ++nest; 1095 if (nest <= 0) 1096 break; 1097 } 1098 if (nest != 0) 1099 p2 = NULL; // False alarm, no terminating bracket. 1100 } 1101 break; 1102 case 1: // "S" <foo@bar.com> 1103 if ((p1 = strchr(start, '\"')) != NULL) 1104 p2 = strchr(++p1, '\"'); 1105 break; 1106 case 2: // S <foo@bar.com> 1107 p1 = start; 1108 if (name.Length() == 0) 1109 p2 = strchr(start, '<'); 1110 break; 1111 case 3: // S 1112 p1 = start; 1113 if (name.Length() == 0) 1114 p2 = stop; 1115 break; 1116 } 1117 1118 // Remove leading and trailing space-like characters and save the 1119 // result if it is longer than any other likely names found. 1120 if (p2 != NULL) { 1121 while (p1 < p2 && (isspace (*p1))) 1122 ++p1; 1123 1124 while (p1 < p2 && (isspace (p2[-1]))) 1125 --p2; 1126 1127 int newLength = p2 - p1; 1128 if (name.Length() < newLength) 1129 name.SetTo(p1, newLength); 1130 } 1131 } 1132 1133 int32 lessIndex = name.FindFirst('<'); 1134 int32 greaterIndex = name.FindLast('>'); 1135 1136 if (lessIndex == 0) { 1137 // Have an address of the form <address> and nothing else, so remove 1138 // the greater and less than signs, if any. 1139 if (greaterIndex > 0) 1140 name.Remove(greaterIndex, 1); 1141 name.Remove(lessIndex, 1); 1142 } else if (lessIndex > 0 && lessIndex < greaterIndex) { 1143 // Yahoo stupidly inserts the e-mail address into the name string, so 1144 // this bit of code fixes: "Joe <joe@yahoo.com>" <joe@yahoo.com> 1145 name.Remove(lessIndex, greaterIndex - lessIndex + 1); 1146 } 1147 1148 trim_white_space(name); 1149 header = name; 1150 } 1151 1152 1153 1154 // Given a subject in a BString, remove the extraneous RE: re: and other stuff 1155 // to get down to the core subject string, which should be identical for all 1156 // messages posted about a topic. The input string is modified in place to 1157 // become the output core subject string. 1158 1159 static int32 gLocker = 0; 1160 static size_t gNsub = 1; 1161 static re_pattern_buffer gRe; 1162 static re_pattern_buffer *gRebuf = NULL; 1163 static unsigned char gTranslation[256]; 1164 1165 _EXPORT void SubjectToThread (BString &string) 1166 { 1167 // a regex that matches a non-ASCII UTF8 character: 1168 #define U8C \ 1169 "[\302-\337][\200-\277]" \ 1170 "|\340[\302-\337][\200-\277]" \ 1171 "|[\341-\357][\200-\277][\200-\277]" \ 1172 "|\360[\220-\277][\200-\277][\200-\277]" \ 1173 "|[\361-\367][\200-\277][\200-\277][\200-\277]" \ 1174 "|\370[\210-\277][\200-\277][\200-\277][\200-\277]" \ 1175 "|[\371-\373][\200-\277][\200-\277][\200-\277][\200-\277]" \ 1176 "|\374[\204-\277][\200-\277][\200-\277][\200-\277][\200-\277]" \ 1177 "|\375[\200-\277][\200-\277][\200-\277][\200-\277][\200-\277]" 1178 1179 #define PATTERN \ 1180 "^ +" \ 1181 "|^(\\[[^]]*\\])(\\<| +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \ 1182 "|^( +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \ 1183 "| *\\(fwd\\) *$" 1184 1185 if (gRebuf == NULL && atomic_add(&gLocker,1) == 0) 1186 { 1187 // the idea is to compile the regexp once to speed up testing 1188 1189 for (int i=0; i<256; ++i) gTranslation[i]=i; 1190 for (int i='a'; i<='z'; ++i) gTranslation[i]=toupper(i); 1191 1192 gRe.translate = gTranslation; 1193 gRe.regs_allocated = REGS_FIXED; 1194 re_syntax_options = RE_SYNTAX_POSIX_EXTENDED; 1195 1196 const char *pattern = PATTERN; 1197 // count subexpressions in PATTERN 1198 for (unsigned int i=0; pattern[i] != 0; ++i) 1199 { 1200 if (pattern[i] == '\\') 1201 ++i; 1202 else if (pattern[i] == '(') 1203 ++gNsub; 1204 } 1205 1206 const char *err = re_compile_pattern(pattern,strlen(pattern),&gRe); 1207 if (err == NULL) 1208 gRebuf = &gRe; 1209 else 1210 fprintf(stderr, "Failed to compile the regex: %s\n", err); 1211 } 1212 else 1213 { 1214 int32 tries = 200; 1215 while (gRebuf == NULL && tries-- > 0) 1216 snooze(10000); 1217 } 1218 1219 if (gRebuf) 1220 { 1221 struct re_registers regs; 1222 // can't be static if this function is to be thread-safe 1223 1224 regs.num_regs = gNsub; 1225 regs.start = (regoff_t*)malloc(gNsub*sizeof(regoff_t)); 1226 regs.end = (regoff_t*)malloc(gNsub*sizeof(regoff_t)); 1227 1228 for (int start=0; 1229 (start=re_search(gRebuf, string.String(), string.Length(), 1230 0, string.Length(), ®s)) >= 0; 1231 ) 1232 { 1233 // 1234 // we found something 1235 // 1236 1237 // don't delete [bemaildaemon]... 1238 if (start == regs.start[1]) 1239 start = regs.start[2]; 1240 1241 string.Remove(start,regs.end[0]-start); 1242 if (start) string.Insert(' ',1,start); 1243 } 1244 1245 free(regs.start); 1246 free(regs.end); 1247 } 1248 1249 // Finally remove leading and trailing space. Some software, like 1250 // tm-edit 1.8, appends a space to the subject, which would break 1251 // threading if we left it in. 1252 trim_white_space(string); 1253 } 1254 1255 1256 1257 // Converts a date to a time. Handles numeric time zones too, unlike 1258 // parsedate. Returns -1 if it fails. 1259 1260 _EXPORT time_t ParseDateWithTimeZone (const char *DateString) 1261 { 1262 time_t currentTime; 1263 time_t dateAsTime; 1264 char tempDateString [80]; 1265 char tempZoneString [6]; 1266 time_t zoneDeltaTime; 1267 int zoneIndex; 1268 char *zonePntr; 1269 1270 // See if we can remove the time zone portion. parsedate understands time 1271 // zone 3 letter names, but doesn't understand the numeric +9999 time zone 1272 // format. To do: see if a newer parsedate exists. 1273 1274 strncpy (tempDateString, DateString, sizeof (tempDateString)); 1275 tempDateString[sizeof (tempDateString) - 1] = 0; 1276 1277 // Remove trailing spaces. 1278 zonePntr = tempDateString + strlen (tempDateString) - 1; 1279 while (zonePntr >= tempDateString && isspace (*zonePntr)) 1280 *zonePntr-- = 0; 1281 if (zonePntr < tempDateString) 1282 return -1; // Empty string. 1283 1284 // Remove the trailing time zone in round brackets, like in 1285 // Fri, 22 Feb 2002 15:22:42 EST (-0500) 1286 // Thu, 25 Apr 1996 11:44:19 -0400 (EDT) 1287 if (tempDateString[strlen(tempDateString)-1] == ')') 1288 { 1289 zonePntr = strrchr (tempDateString, '('); 1290 if (zonePntr != NULL) 1291 { 1292 *zonePntr-- = 0; // Zap the '(', then remove trailing spaces. 1293 while (zonePntr >= tempDateString && isspace (*zonePntr)) 1294 *zonePntr-- = 0; 1295 if (zonePntr < tempDateString) 1296 return -1; // Empty string. 1297 } 1298 } 1299 1300 // Look for a numeric time zone like Tue, 30 Dec 2003 05:01:40 +0000 1301 for (zoneIndex = strlen (tempDateString); zoneIndex >= 0; zoneIndex--) 1302 { 1303 zonePntr = tempDateString + zoneIndex; 1304 if (zonePntr[0] == '+' || zonePntr[0] == '-') 1305 { 1306 if (zonePntr[1] >= '0' && zonePntr[1] <= '9' && 1307 zonePntr[2] >= '0' && zonePntr[2] <= '9' && 1308 zonePntr[3] >= '0' && zonePntr[3] <= '9' && 1309 zonePntr[4] >= '0' && zonePntr[4] <= '9') 1310 break; 1311 } 1312 } 1313 if (zoneIndex >= 0) 1314 { 1315 // Remove the zone from the date string and any following time zone 1316 // letter codes. Also put in GMT so that the date gets parsed as GMT. 1317 memcpy (tempZoneString, zonePntr, 5); 1318 tempZoneString [5] = 0; 1319 strcpy (zonePntr, "GMT"); 1320 } 1321 else // No numeric time zone found. 1322 strcpy (tempZoneString, "+0000"); 1323 1324 time (¤tTime); 1325 dateAsTime = parsedate (tempDateString, currentTime); 1326 if (dateAsTime == (time_t) -1) 1327 return -1; // Failure. 1328 1329 zoneDeltaTime = 60 * atol (tempZoneString + 3); // Get the last two digits - minutes. 1330 tempZoneString[3] = 0; 1331 zoneDeltaTime += atol (tempZoneString + 1) * 60 * 60; // Get the first two digits - hours. 1332 if (tempZoneString[0] == '+') 1333 zoneDeltaTime = 0 - zoneDeltaTime; 1334 dateAsTime += zoneDeltaTime; 1335 1336 return dateAsTime; 1337 } 1338 1339 1340 /** Parses a mail header and fills the headers BMessage 1341 */ 1342 1343 _EXPORT status_t 1344 parse_header(BMessage &headers, BPositionIO &input) 1345 { 1346 char *buffer = NULL; 1347 size_t bufferSize = 0; 1348 int32 length; 1349 1350 while ((length = readfoldedline(input, &buffer, &bufferSize)) >= 2) { 1351 --length; 1352 // Don't include the \n at the end of the buffer. 1353 1354 // convert to UTF-8 and null-terminate the buffer 1355 length = rfc2047_to_utf8(&buffer, &bufferSize, length); 1356 buffer[length] = '\0'; 1357 1358 const char *delimiter = strstr(buffer, ":"); 1359 if (delimiter == NULL) 1360 continue; 1361 1362 BString header(buffer, delimiter - buffer); 1363 header.CapitalizeEachWord(); 1364 // unified case for later fetch 1365 1366 delimiter++; // Skip the colon. 1367 while (isspace (*delimiter)) 1368 delimiter++; // Skip over leading white space and tabs. To do: (comments in brackets). 1369 1370 // ToDo: implement joining of multiple header tags (i.e. multiple "Cc:"s) 1371 headers.AddString(header.String(), delimiter); 1372 } 1373 free(buffer); 1374 1375 return B_OK; 1376 } 1377 1378 1379 _EXPORT void 1380 extract_address(BString &address) 1381 { 1382 const char *string = address.String(); 1383 int32 first; 1384 1385 // first, remove all quoted text 1386 1387 if ((first = address.FindFirst('"')) >= 0) { 1388 int32 last = first + 1; 1389 while (string[last] && string[last] != '"') 1390 last++; 1391 1392 if (string[last] == '"') 1393 address.Remove(first, last + 1 - first); 1394 } 1395 1396 // try to extract the address now 1397 1398 if ((first = address.FindFirst('<')) >= 0) { 1399 // the world likes us and we can just get the address the easy way... 1400 int32 last = address.FindFirst('>'); 1401 if (last >= 0) { 1402 address.Truncate(last); 1403 address.Remove(0, first + 1); 1404 1405 return; 1406 } 1407 } 1408 1409 // then, see if there is anything in parenthesis to throw away 1410 1411 if ((first = address.FindFirst('(')) >= 0) { 1412 int32 last = first + 1; 1413 while (string[last] && string[last] != ')') 1414 last++; 1415 1416 if (string[last] == ')') 1417 address.Remove(first, last + 1 - first); 1418 } 1419 1420 // now, there shouldn't be much else left 1421 1422 trim_white_space(address); 1423 } 1424 1425 1426 _EXPORT void 1427 get_address_list(BList &list, const char *string, void (*cleanupFunc)(BString &)) 1428 { 1429 if (string == NULL || !string[0]) 1430 return; 1431 1432 const char *start = string; 1433 1434 while (true) { 1435 if (string[0] == '"') { 1436 const char *quoteEnd = ++string; 1437 1438 while (quoteEnd[0] && quoteEnd[0] != '"') 1439 quoteEnd++; 1440 1441 if (!quoteEnd[0]) // string exceeds line! 1442 quoteEnd = string; 1443 1444 string = quoteEnd + 1; 1445 } 1446 1447 if (string[0] == ',' || string[0] == '\0') { 1448 BString address(start, string - start); 1449 trim_white_space(address); 1450 1451 if (cleanupFunc) 1452 cleanupFunc(address); 1453 1454 list.AddItem(strdup(address.String())); 1455 1456 start = string + 1; 1457 } 1458 1459 if (!string[0]) 1460 break; 1461 1462 string++; 1463 } 1464 } 1465 1466