1 /*
2 * Copyright 2011-2016, Haiku, Inc. All rights reserved.
3 * Copyright 2001-2003 Dr. Zoidberg Enterprises. All rights reserved.
4 */
5
6
7 #include <mail_util.h>
8
9 #include <stdlib.h>
10 #include <strings.h>
11 #include <stdio.h>
12 #define __USE_GNU
13 #include <regex.h>
14 #include <ctype.h>
15 #include <errno.h>
16
17 #include <FindDirectory.h>
18 #include <List.h>
19 #include <Locker.h>
20 #include <parsedate.h>
21 #include <Path.h>
22 #include <String.h>
23 #include <UTF8.h>
24
25 #include <mail_encoding.h>
26
27 #include <AttributeUtilities.h>
28 #include <CharacterSet.h>
29 #include <CharacterSetRoster.h>
30
31
32 using namespace BPrivate;
33
34
35 #define CRLF "\r\n"
36
37 struct CharsetConversionEntry {
38 const char *charset;
39 uint32 flavor;
40 };
41
42 extern const CharsetConversionEntry mail_charsets[] = {
43 // In order of authority, so when searching for the name for a particular
44 // numbered conversion, start at the beginning of the array.
45 {"iso-8859-1", B_ISO1_CONVERSION}, // MIME STANDARD
46 {"iso-8859-2", B_ISO2_CONVERSION}, // MIME STANDARD
47 {"iso-8859-3", B_ISO3_CONVERSION}, // MIME STANDARD
48 {"iso-8859-4", B_ISO4_CONVERSION}, // MIME STANDARD
49 {"iso-8859-5", B_ISO5_CONVERSION}, // MIME STANDARD
50 {"iso-8859-6", B_ISO6_CONVERSION}, // MIME STANDARD
51 {"iso-8859-7", B_ISO7_CONVERSION}, // MIME STANDARD
52 {"iso-8859-8", B_ISO8_CONVERSION}, // MIME STANDARD
53 {"iso-8859-9", B_ISO9_CONVERSION}, // MIME STANDARD
54 {"iso-8859-10", B_ISO10_CONVERSION}, // MIME STANDARD
55 {"iso-8859-13", B_ISO13_CONVERSION}, // MIME STANDARD
56 {"iso-8859-14", B_ISO14_CONVERSION}, // MIME STANDARD
57 {"iso-8859-15", B_ISO15_CONVERSION}, // MIME STANDARD
58
59 {"shift_jis", B_SJIS_CONVERSION}, // MIME STANDARD
60 {"shift-jis", B_SJIS_CONVERSION},
61 {"iso-2022-jp", B_JIS_CONVERSION}, // MIME STANDARD
62 {"euc-jp", B_EUC_CONVERSION}, // MIME STANDARD
63
64 {"euc-kr", B_EUC_KR_CONVERSION}, // Shift encoding 7 bit and KSC-5601 if bit 8 is on. // MIME STANDARD
65 {"ksc5601", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE?
66 {"ks_c_5601-1987", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE with stupid MS software
67
68 {"koi8-r", B_KOI8R_CONVERSION}, // MIME STANDARD
69 {"windows-1251",B_MS_WINDOWS_1251_CONVERSION}, // MIME STANDARD
70 {"windows-1252",B_MS_WINDOWS_CONVERSION}, // MIME STANDARD
71
72 {"dos-437", B_MS_DOS_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM437? )
73 {"dos-866", B_MS_DOS_866_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM866? )
74 {"x-mac-roman", B_MAC_ROMAN_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( macintosh? + x-mac-roman? )
75
76 {"big5", 24}, // MIME STANDARD
77
78 {"gb18030", 25}, // WRONG NAME : MIME STANDARD NAME = NONE ( GB18030? )
79 {"gb2312", 25}, // COMPATIBLE
80 {"gbk", 25}, // COMPATIBLE
81
82 /* {"utf-16", B_UNICODE_CONVERSION}, Might not work due to NULs in text, needs testing. */
83 {"us-ascii", B_MAIL_US_ASCII_CONVERSION}, // MIME STANDARD
84 {"utf-8", B_MAIL_UTF8_CONVERSION /* Special code for no conversion */}, // MIME STANDARD
85
86 {NULL, (uint32) -1} /* End of list marker, NULL string pointer is the key. */
87 };
88
89
90 static int32 gLocker = 0;
91 static size_t gNsub = 1;
92 static re_pattern_buffer gRe;
93 static re_pattern_buffer *gRebuf = NULL;
94 static unsigned char gTranslation[256];
95
96
97 static int
handle_non_rfc2047_encoding(char ** buffer,size_t * bufferLength,size_t * sourceLength)98 handle_non_rfc2047_encoding(char **buffer, size_t *bufferLength,
99 size_t *sourceLength)
100 {
101 char *string = *buffer;
102 int32 length = *sourceLength;
103 int32 i;
104
105 // check for 8-bit characters
106 for (i = 0;i < length;i++)
107 if (string[i] & 0x80)
108 break;
109 if (i == length)
110 return false;
111
112 // check for groups of 8-bit characters - this code is not very smart;
113 // it just can detect some sort of single-byte encoded stuff, the rest
114 // is regarded as UTF-8
115
116 int32 singletons = 0,doubles = 0;
117
118 for (i = 0;i < length;i++)
119 {
120 if (string[i] & 0x80)
121 {
122 if ((string[i + 1] & 0x80) == 0)
123 singletons++;
124 else doubles++;
125 i++;
126 }
127 }
128
129 if (singletons != 0) // can't be valid UTF-8 anymore, so we assume ISO-Latin-1
130 {
131 int32 state = 0;
132 // just to be sure
133 int32 destLength = length * 4 + 1;
134 int32 destBufferLength = destLength;
135 char *dest = (char*)malloc(destLength);
136 if (dest == NULL)
137 return 0;
138
139 if (convert_to_utf8(B_ISO1_CONVERSION, string, &length,dest,
140 &destLength, &state) == B_OK) {
141 *buffer = dest;
142 *bufferLength = destBufferLength;
143 *sourceLength = destLength;
144 return true;
145 }
146 free(dest);
147 return false;
148 }
149
150 // we assume a valid UTF-8 string here, but yes, we don't check it
151 return true;
152 }
153
154
155 // #pragma mark -
156
157
158 status_t
write_read_attr(BNode & node,read_flags flag)159 write_read_attr(BNode& node, read_flags flag)
160 {
161 if (node.WriteAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32))
162 < 0)
163 return B_ERROR;
164
165 // Manage the status string only if it currently has a known state
166 BString currentStatus;
167 if (node.ReadAttrString(B_MAIL_ATTR_STATUS, ¤tStatus) == B_OK
168 && currentStatus.ICompare("New") != 0
169 && currentStatus.ICompare("Read") != 0
170 && currentStatus.ICompare("Seen") != 0) {
171 return B_OK;
172 }
173
174 BString statusString = flag == B_READ ? "Read"
175 : flag == B_SEEN ? "Seen" : "New";
176 if (node.WriteAttrString(B_MAIL_ATTR_STATUS, &statusString) < 0)
177 return B_ERROR;
178
179 return B_OK;
180 }
181
182
183 status_t
read_read_attr(BNode & node,read_flags & flag)184 read_read_attr(BNode& node, read_flags& flag)
185 {
186 if (node.ReadAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32))
187 == sizeof(int32))
188 return B_OK;
189
190 BString statusString;
191 if (node.ReadAttrString(B_MAIL_ATTR_STATUS, &statusString) == B_OK) {
192 if (statusString.ICompare("New") == 0)
193 flag = B_UNREAD;
194 else
195 flag = B_READ;
196
197 return B_OK;
198 }
199
200 return B_ERROR;
201 }
202
203
204 // The next couple of functions are our wrapper around convert_to_utf8 and
205 // convert_from_utf8 so that they can also convert from UTF-8 to UTF-8 by
206 // specifying the B_MAIL_UTF8_CONVERSION constant as the conversion operation.
207 // It also lets us add new conversions, like B_MAIL_US_ASCII_CONVERSION.
208
209
210 status_t
mail_convert_to_utf8(uint32 srcEncoding,const char * src,int32 * srcLen,char * dst,int32 * dstLen,int32 * state,char substitute)211 mail_convert_to_utf8(uint32 srcEncoding, const char *src, int32 *srcLen,
212 char *dst, int32 *dstLen, int32 *state, char substitute)
213 {
214 int32 copyAmount;
215 char *originalDst = dst;
216 status_t returnCode = -1;
217
218 if (srcEncoding == B_MAIL_UTF8_CONVERSION) {
219 copyAmount = *srcLen;
220 if (*dstLen < copyAmount)
221 copyAmount = *dstLen;
222 memcpy (dst, src, copyAmount);
223 *srcLen = copyAmount;
224 *dstLen = copyAmount;
225 returnCode = B_OK;
226 } else if (srcEncoding == B_MAIL_US_ASCII_CONVERSION) {
227 int32 i;
228 unsigned char letter;
229 copyAmount = *srcLen;
230 if (*dstLen < copyAmount)
231 copyAmount = *dstLen;
232 for (i = 0; i < copyAmount; i++) {
233 letter = *src++;
234 if (letter > 0x80U)
235 // Invalid, could also use substitute, but better to strip high bit.
236 *dst++ = letter - 0x80U;
237 else if (letter == 0x80U)
238 // Can't convert to 0x00 since that's NUL, which would cause problems.
239 *dst++ = substitute;
240 else
241 *dst++ = letter;
242 }
243 *srcLen = copyAmount;
244 *dstLen = copyAmount;
245 returnCode = B_OK;
246 } else
247 returnCode = convert_to_utf8 (srcEncoding, src, srcLen,
248 dst, dstLen, state, substitute);
249
250 if (returnCode == B_OK) {
251 // Replace spurious NUL bytes, which should normally not be in the
252 // output of the decoding (not normal UTF-8 characters, and no NULs are
253 // in our usual input strings). They happen for some odd ISO-2022-JP
254 // byte pair combinations which are improperly handled by the BeOS
255 // routines. Like "\e$ByD\e(B" where \e is the ESC character $1B, the
256 // first ESC $ B switches to a Japanese character set, then the next
257 // two bytes "yD" specify a character, then ESC ( B switches back to
258 // the ASCII character set. The UTF-8 conversion yields a NUL byte.
259 int32 i;
260 for (i = 0; i < *dstLen; i++)
261 if (originalDst[i] == 0)
262 originalDst[i] = substitute;
263 }
264 return returnCode;
265 }
266
267
268 status_t
mail_convert_from_utf8(uint32 dstEncoding,const char * src,int32 * srcLen,char * dst,int32 * dstLen,int32 * state,char substitute)269 mail_convert_from_utf8(uint32 dstEncoding, const char *src, int32 *srcLen,
270 char *dst, int32 *dstLen, int32 *state, char substitute)
271 {
272 int32 copyAmount;
273 status_t errorCode;
274 int32 originalDstLen = *dstLen;
275 int32 tempDstLen;
276 int32 tempSrcLen;
277
278 if (dstEncoding == B_MAIL_UTF8_CONVERSION) {
279 copyAmount = *srcLen;
280 if (*dstLen < copyAmount)
281 copyAmount = *dstLen;
282 memcpy (dst, src, copyAmount);
283 *srcLen = copyAmount;
284 *dstLen = copyAmount;
285 return B_OK;
286 }
287
288 if (dstEncoding == B_MAIL_US_ASCII_CONVERSION) {
289 int32 characterLength;
290 int32 dstRemaining = *dstLen;
291 unsigned char letter;
292 int32 srcRemaining = *srcLen;
293
294 // state contains the number of source bytes to skip, left over from a
295 // partial UTF-8 character split over the end of the buffer from last
296 // time.
297 if (srcRemaining <= *state) {
298 *state -= srcRemaining;
299 *dstLen = 0;
300 return B_OK;
301 }
302 srcRemaining -= *state;
303 src += *state;
304 *state = 0;
305
306 while (true) {
307 if (srcRemaining <= 0 || dstRemaining <= 0)
308 break;
309 letter = *src;
310 if (letter < 0x80)
311 characterLength = 1; // Regular ASCII equivalent code.
312 else if (letter < 0xC0)
313 characterLength = 1; // Invalid in-between data byte 10xxxxxx.
314 else if (letter < 0xE0)
315 characterLength = 2;
316 else if (letter < 0xF0)
317 characterLength = 3;
318 else if (letter < 0xF8)
319 characterLength = 4;
320 else if (letter < 0xFC)
321 characterLength = 5;
322 else if (letter < 0xFE)
323 characterLength = 6;
324 else
325 characterLength = 1; // 0xFE and 0xFF are invalid in UTF-8.
326 if (letter < 0x80)
327 *dst++ = *src;
328 else
329 *dst++ = substitute;
330 dstRemaining--;
331 if (srcRemaining < characterLength) {
332 // Character split past the end of the buffer.
333 *state = characterLength - srcRemaining;
334 srcRemaining = 0;
335 } else {
336 src += characterLength;
337 srcRemaining -= characterLength;
338 }
339 }
340 // Update with the amounts used.
341 *srcLen = *srcLen - srcRemaining;
342 *dstLen = *dstLen - dstRemaining;
343 return B_OK;
344 }
345
346 errorCode = convert_from_utf8(dstEncoding, src, srcLen, dst, dstLen, state,
347 substitute);
348 if (errorCode != B_OK)
349 return errorCode;
350
351 if (dstEncoding != B_JIS_CONVERSION)
352 return B_OK;
353
354 // B_JIS_CONVERSION (ISO-2022-JP) works by shifting between different
355 // character subsets. For E-mail headers (and other uses), it needs to be
356 // switched back to ASCII at the end (otherwise the last character gets
357 // lost or other weird things happen in the headers). Note that we can't
358 // just append the escape code since the convert_from_utf8 "state" will be
359 // wrong. So we append an ASCII letter and throw it away, leaving just the
360 // escape code. Well, it actually switches to the Roman character set, not
361 // ASCII, but that should be OK.
362
363 tempDstLen = originalDstLen - *dstLen;
364 if (tempDstLen < 3) // Not enough space remaining in the output.
365 return B_OK; // Sort of an error, but we did convert the rest OK.
366 tempSrcLen = 1;
367 errorCode = convert_from_utf8(dstEncoding, "a", &tempSrcLen,
368 dst + *dstLen, &tempDstLen, state, substitute);
369 if (errorCode != B_OK)
370 return errorCode;
371 *dstLen += tempDstLen - 1 /* don't include the ASCII letter */;
372 return B_OK;
373 }
374
375
376 ssize_t
rfc2047_to_utf8(char ** bufp,size_t * bufLen,size_t strLen)377 rfc2047_to_utf8(char **bufp, size_t *bufLen, size_t strLen)
378 {
379 char *head, *tail;
380 char *charset, *encoding, *end;
381 ssize_t ret = B_OK;
382
383 if (bufp == NULL || *bufp == NULL)
384 return -1;
385
386 char *string = *bufp;
387
388 //---------Handle *&&^%*&^ non-RFC compliant, 8bit mail
389 if (handle_non_rfc2047_encoding(bufp,bufLen,&strLen))
390 return strLen;
391
392 // set up string length
393 if (strLen == 0)
394 strLen = strlen(*bufp);
395 char lastChar = (*bufp)[strLen];
396 (*bufp)[strLen] = '\0';
397
398 //---------Whew! Now for RFC compliant mail
399 bool encodedWordFoundPreviously = false;
400 for (head = tail = string;
401 ((charset = strstr(tail, "=?")) != NULL)
402 && (((encoding = strchr(charset + 2, '?')) != NULL)
403 && encoding[1] && (encoding[2] == '?') && encoding[3])
404 && (end = strstr(encoding + 3, "?=")) != NULL;
405 // found "=?...charset...?e?...text...?= (e == encoding)
406 // ^charset ^encoding ^end
407 tail = end)
408 {
409 // Copy non-encoded text (from tail up to charset) to the output.
410 // Ignore spaces between two encoded "words". RFC2047 says the words
411 // should be concatenated without the space (designed for Asian
412 // sentences which have no spaces yet need to be broken into "words" to
413 // keep within the line length limits).
414 bool nonSpaceFound = false;
415 for (int i = 0; i < charset-tail; i++) {
416 if (!isspace (tail[i])) {
417 nonSpaceFound = true;
418 break;
419 }
420 }
421 if (!encodedWordFoundPreviously || nonSpaceFound) {
422 if (string != tail && tail != charset)
423 memmove(string, tail, charset-tail);
424 string += charset-tail;
425 }
426 tail = charset;
427 encodedWordFoundPreviously = true;
428
429 // move things to point at what they should:
430 // =?...charset...?e?...text...?= (e == encoding)
431 // ^charset ^encoding ^end
432 charset += 2;
433 encoding += 1;
434 end += 2;
435
436 // find the charset this text is in now
437 size_t cLen = encoding - 1 - charset;
438 bool base64encoded = toupper(*encoding) == 'B';
439
440 uint32 convertID = B_MAIL_NULL_CONVERSION;
441 char charsetName[cLen + 1];
442 memcpy(charsetName, charset, cLen);
443 charsetName[cLen] = '\0';
444 if (strcasecmp(charsetName, "us-ascii") == 0) {
445 convertID = B_MAIL_US_ASCII_CONVERSION;
446 } else if (strcasecmp(charsetName, "utf-8") == 0) {
447 convertID = B_MAIL_UTF8_CONVERSION;
448 } else {
449 const BCharacterSet* charSet
450 = BCharacterSetRoster::FindCharacterSetByName(charsetName);
451 if (charSet != NULL) {
452 convertID = charSet->GetConversionID();
453 }
454 }
455 if (convertID == B_MAIL_NULL_CONVERSION) {
456 // unidentified charset
457 // what to do? doing nothing skips the encoded text;
458 // but we should keep it: we copy it to the output.
459 if (string != tail && tail != end)
460 memmove(string, tail, end-tail);
461 string += end-tail;
462 continue;
463 }
464 // else we've successfully identified the charset
465
466 char *src = encoding+2;
467 int32 srcLen = end - 2 - src;
468 // encoded text: src..src+srcLen
469
470 // decode text, get decoded length (reducing xforms)
471 srcLen = !base64encoded ? decode_qp(src, src, srcLen, 1)
472 : decode_base64(src, src, srcLen);
473
474 // allocate space for the converted text
475 int32 dstLen = end-string + *bufLen-strLen;
476 char *dst = (char*)malloc(dstLen);
477 int32 cvLen = srcLen;
478 int32 convState = 0;
479
480 //
481 // do the conversion
482 //
483 ret = mail_convert_to_utf8(convertID, src, &cvLen, dst, &dstLen,
484 &convState);
485 if (ret != B_OK) {
486 // what to do? doing nothing skips the encoded text
487 // but we should keep it: we copy it to the output.
488
489 free(dst);
490
491 if (string != tail && tail != end)
492 memmove(string, tail, end-tail);
493 string += end-tail;
494 continue;
495 }
496 /* convert_to_ is either returning something wrong or my
497 test data is screwed up. Whatever it is, Not Enough
498 Space is not the only cause of the below, so we just
499 assume it succeeds if it converts anything at all.
500 else if (cvLen < srcLen)
501 {
502 // not enough room to convert the data;
503 // grow *buf and retry
504
505 free(dst);
506
507 char *temp = (char*)realloc(*bufp, 2*(*bufLen + 1));
508 if (temp == NULL)
509 {
510 ret = B_NO_MEMORY;
511 break;
512 }
513
514 *bufp = temp;
515 *bufLen = 2*(*bufLen + 1);
516
517 string = *bufp + (string-head);
518 tail = *bufp + (tail-head);
519 charset = *bufp + (charset-head);
520 encoding = *bufp + (encoding-head);
521 end = *bufp + (end-head);
522 src = *bufp + (src-head);
523 head = *bufp;
524 continue;
525 }
526 */
527 else {
528 if (dstLen > end-string) {
529 // copy the string forward...
530 memmove(string+dstLen, end, strLen - (end-head) + 1);
531 strLen += string+dstLen - end;
532 end = string + dstLen;
533 }
534
535 memcpy(string, dst, dstLen);
536 string += dstLen;
537 free(dst);
538 continue;
539 }
540 }
541
542 // copy everything that's left
543 size_t tailLen = strLen - (tail - head);
544 memmove(string, tail, tailLen+1);
545 string += tailLen;
546
547 // replace the last char
548 (*bufp)[strLen] = lastChar;
549
550 return ret < B_OK ? ret : string-head;
551 }
552
553
554 ssize_t
utf8_to_rfc2047(char ** bufp,ssize_t length,uint32 charset,char encoding)555 utf8_to_rfc2047 (char **bufp, ssize_t length, uint32 charset, char encoding)
556 {
557 struct word {
558 BString originalWord;
559 BString convertedWord;
560 bool needsEncoding;
561
562 // Convert the word from UTF-8 to the desired character set. The
563 // converted version also includes the escape codes to return to ASCII
564 // mode, if relevant. Also note if it uses unprintable characters,
565 // which means it will need that special encoding treatment later.
566 void ConvertWordToCharset (uint32 charset) {
567 int32 state = 0;
568 int32 originalLength = originalWord.Length();
569 int32 convertedLength = originalLength * 5 + 1;
570 char *convertedBuffer = convertedWord.LockBuffer (convertedLength);
571 mail_convert_from_utf8 (charset, originalWord.String(),
572 &originalLength, convertedBuffer, &convertedLength, &state);
573 for (int i = 0; i < convertedLength; i++) {
574 if ((convertedBuffer[i] & (1 << 7)) ||
575 (convertedBuffer[i] >= 0 && convertedBuffer[i] < 32)) {
576 needsEncoding = true;
577 break;
578 }
579 }
580 convertedWord.UnlockBuffer (convertedLength);
581 };
582 };
583 struct word *currentWord;
584 BList words;
585
586 // Break the header into words. White space characters (including tabs and
587 // newlines) separate the words. Each word includes any space before it as
588 // part of the word. Actually, quotes and other special characters
589 // (",()<>@) are treated as separate words of their own so that they don't
590 // get encoded (because MIME headers get the quotes parsed before character
591 // set unconversion is done). The reader is supposed to ignore all white
592 // space between encoded words, which can be inserted so that older mail
593 // parsers don't have overly long line length problems.
594
595 const char *source = *bufp;
596 const char *bufEnd = *bufp + length;
597 const char *specialChars = "\"()<>@,";
598
599 while (source < bufEnd) {
600 currentWord = new struct word;
601 currentWord->needsEncoding = false;
602
603 int wordEnd = 0;
604
605 // Include leading spaces as part of the word.
606 while (source + wordEnd < bufEnd && isspace (source[wordEnd]))
607 wordEnd++;
608
609 if (source + wordEnd < bufEnd &&
610 strchr (specialChars, source[wordEnd]) != NULL) {
611 // Got a quote mark or other special character, which is treated as
612 // a word in itself since it shouldn't be encoded, which would hide
613 // it from the mail system.
614 wordEnd++;
615 } else {
616 // Find the end of the word. Leave wordEnd pointing just after the
617 // last character in the word.
618 while (source + wordEnd < bufEnd) {
619 if (isspace(source[wordEnd]) ||
620 strchr (specialChars, source[wordEnd]) != NULL)
621 break;
622 if (wordEnd > 51 /* Makes Base64 ISO-2022-JP "word" a multiple of 4 bytes */ &&
623 0xC0 == (0xC0 & (unsigned int) source[wordEnd])) {
624 // No English words are that long (46 is the longest),
625 // break up what is likely Asian text (which has no spaces)
626 // at the start of the next non-ASCII UTF-8 character (high
627 // two bits are both ones). Note that two encoded words in
628 // a row get joined together, even if there is a space
629 // between them in the final output text, according to the
630 // standard. Next word will also be conveniently get
631 // encoded due to the 0xC0 test.
632 currentWord->needsEncoding = true;
633 break;
634 }
635 wordEnd++;
636 }
637 }
638 currentWord->originalWord.SetTo (source, wordEnd);
639 currentWord->ConvertWordToCharset (charset);
640 words.AddItem(currentWord);
641 source += wordEnd;
642 }
643
644 // Combine adjacent words which contain unprintable text so that the
645 // overhead of switching back and forth between regular text and specially
646 // encoded text is reduced. However, the combined word must be shorter
647 // than the maximum of 75 bytes, including character set specification and
648 // all those delimiters (worst case 22 bytes of overhead).
649
650 struct word *run;
651
652 for (int32 i = 0; (currentWord = (struct word *) words.ItemAt (i)) != NULL; i++) {
653 if (!currentWord->needsEncoding)
654 continue; // No need to combine unencoded words.
655 for (int32 g = i+1; (run = (struct word *) words.ItemAt (g)) != NULL; g++) {
656 if (!run->needsEncoding)
657 break; // Don't want to combine encoded and unencoded words.
658 if ((currentWord->convertedWord.Length() + run->convertedWord.Length() <= 53)) {
659 currentWord->originalWord.Append (run->originalWord);
660 currentWord->ConvertWordToCharset (charset);
661 words.RemoveItem(g);
662 delete run;
663 g--;
664 } else // Can't merge this word, result would be too long.
665 break;
666 }
667 }
668
669 // Combine the encoded and unencoded words into one line, doing the
670 // quoted-printable or base64 encoding. Insert an extra space between
671 // words which are both encoded to make word wrapping easier, since there
672 // is normally none, and you're allowed to insert space (the receiver
673 // throws it away if it is between encoded words).
674
675 BString rfc2047;
676 bool previousWordNeededEncoding = false;
677
678 const char *charset_dec = "none-bug";
679 for (int32 i = 0; mail_charsets[i].charset != NULL; i++) {
680 if (mail_charsets[i].flavor == charset) {
681 charset_dec = mail_charsets[i].charset;
682 break;
683 }
684 }
685
686 while ((currentWord = (struct word *)words.RemoveItem((int32)0)) != NULL) {
687 if ((encoding != quoted_printable && encoding != base64) ||
688 !currentWord->needsEncoding) {
689 rfc2047.Append (currentWord->convertedWord);
690 } else {
691 // This word needs encoding. Try to insert a space between it and
692 // the previous word.
693 if (previousWordNeededEncoding)
694 rfc2047 << ' '; // Can insert as many spaces as you want between encoded words.
695 else {
696 // Previous word is not encoded, spaces are significant. Try
697 // to move a space from the start of this word to be outside of
698 // the encoded text, so that there is a bit of space between
699 // this word and the previous one to enhance word wrapping
700 // chances later on.
701 if (currentWord->originalWord.Length() > 1 &&
702 isspace (currentWord->originalWord[0])) {
703 rfc2047 << currentWord->originalWord[0];
704 currentWord->originalWord.Remove (0 /* offset */, 1 /* length */);
705 currentWord->ConvertWordToCharset (charset);
706 }
707 }
708
709 char *encoded = NULL;
710 ssize_t encoded_len = 0;
711 int32 convertedLength = currentWord->convertedWord.Length ();
712 const char *convertedBuffer = currentWord->convertedWord.String ();
713
714 switch (encoding) {
715 case quoted_printable:
716 encoded = (char *) malloc (convertedLength * 3);
717 encoded_len = encode_qp (encoded, convertedBuffer, convertedLength, true /* headerMode */);
718 break;
719 case base64:
720 encoded = (char *) malloc (convertedLength * 2);
721 encoded_len = encode_base64 (encoded, convertedBuffer, convertedLength, true /* headerMode */);
722 break;
723 default: // Unknown encoding type, shouldn't happen.
724 encoded = (char *) convertedBuffer;
725 encoded_len = convertedLength;
726 break;
727 }
728
729 rfc2047 << "=?" << charset_dec << '?' << encoding << '?';
730 rfc2047.Append (encoded, encoded_len);
731 rfc2047 << "?=";
732
733 if (encoding == quoted_printable || encoding == base64)
734 free(encoded);
735 }
736 previousWordNeededEncoding = currentWord->needsEncoding;
737 delete currentWord;
738 }
739
740 free(*bufp);
741
742 ssize_t finalLength = rfc2047.Length ();
743 *bufp = (char *) (malloc (finalLength + 1));
744 memcpy (*bufp, rfc2047.String(), finalLength);
745 (*bufp)[finalLength] = 0;
746
747 return finalLength;
748 }
749
750
751 void
FoldLineAtWhiteSpaceAndAddCRLF(BString & string)752 FoldLineAtWhiteSpaceAndAddCRLF(BString &string)
753 {
754 int inputLength = string.Length();
755 int lineStartIndex;
756 const int maxLineLength = 78; // Doesn't include CRLF.
757 BString output;
758 int splitIndex;
759 int tempIndex;
760
761 lineStartIndex = 0;
762 while (true) {
763 // If we don't need to wrap the text, just output the remainder, if any.
764
765 if (lineStartIndex + maxLineLength >= inputLength) {
766 if (lineStartIndex < inputLength) {
767 output.Insert (string, lineStartIndex /* source offset */,
768 inputLength - lineStartIndex /* count */,
769 output.Length() /* insert at */);
770 output.Append (CRLF);
771 }
772 break;
773 }
774
775 // Look ahead for a convenient spot to split it, between a comma and
776 // space, which you often see between e-mail addresses like this:
777 // "Joe Who" joe@dot.com, "Someone Else" else@blot.com
778
779 tempIndex = lineStartIndex + maxLineLength;
780 if (tempIndex > inputLength)
781 tempIndex = inputLength;
782 splitIndex = string.FindLast (", ", tempIndex);
783 if (splitIndex >= lineStartIndex)
784 splitIndex++; // Point to the space character.
785
786 // If none of those exist, try splitting at any white space.
787
788 if (splitIndex <= lineStartIndex)
789 splitIndex = string.FindLast (" ", tempIndex);
790 if (splitIndex <= lineStartIndex)
791 splitIndex = string.FindLast ("\t", tempIndex);
792
793 // If none of those exist, allow for a longer word - split at the next
794 // available white space.
795
796 if (splitIndex <= lineStartIndex)
797 splitIndex = string.FindFirst (" ", lineStartIndex + 1);
798 if (splitIndex <= lineStartIndex)
799 splitIndex = string.FindFirst ("\t", lineStartIndex + 1);
800
801 // Give up, the whole rest of the line can't be split, just dump it
802 // out.
803
804 if (splitIndex <= lineStartIndex) {
805 if (lineStartIndex < inputLength) {
806 output.Insert (string, lineStartIndex /* source offset */,
807 inputLength - lineStartIndex /* count */,
808 output.Length() /* insert at */);
809 output.Append (CRLF);
810 }
811 break;
812 }
813
814 // Do the split. The current line up to but not including the space
815 // gets output, followed by a CRLF. The space remains to become the
816 // start of the next line (and that tells the message reader that it is
817 // a continuation line).
818
819 output.Insert (string, lineStartIndex /* source offset */,
820 splitIndex - lineStartIndex /* count */,
821 output.Length() /* insert at */);
822 output.Append (CRLF);
823 lineStartIndex = splitIndex;
824 }
825 string.SetTo (output);
826 }
827
828
829 ssize_t
readfoldedline(FILE * file,char ** buffer,size_t * buflen)830 readfoldedline(FILE *file, char **buffer, size_t *buflen)
831 {
832 ssize_t len = buflen && *buflen ? *buflen : 0;
833 char * buf = buffer && *buffer ? *buffer : NULL;
834 ssize_t cnt = 0; // Number of characters currently in the buffer.
835 int c;
836
837 while (true) {
838 // Make sure there is space in the buffer for two more characters (one
839 // for the next character, and one for the end of string NUL byte).
840 if (buf == NULL || cnt + 2 >= len) {
841 char *temp = (char *)realloc(buf, len + 64);
842 if (temp == NULL) {
843 // Out of memory, however existing buffer remains allocated.
844 cnt = ENOMEM;
845 break;
846 }
847 len += 64;
848 buf = temp;
849 }
850
851 // Read the next character, or end of file, or IO error.
852 if ((c = fgetc(file)) == EOF) {
853 if (ferror (file)) {
854 cnt = errno;
855 if (cnt >= 0)
856 cnt = -1; // Error codes must be negative.
857 } else {
858 // Really is end of file. Also make it end of line if there is
859 // some text already read in. If the first thing read was EOF,
860 // just return an empty string.
861 if (cnt > 0) {
862 buf[cnt++] = '\n';
863 if (buf[cnt-2] == '\r') {
864 buf[cnt-2] = '\n';
865 --cnt;
866 }
867 }
868 }
869 break;
870 }
871
872 buf[cnt++] = c;
873
874 if (c == '\n') {
875 // Convert CRLF end of line to just a LF. Do it before folding, in
876 // case we don't need to fold.
877 if (cnt >= 2 && buf[cnt-2] == '\r') {
878 buf[cnt-2] = '\n';
879 --cnt;
880 }
881 // If the current line is empty then return it (so that empty lines
882 // don't disappear if the next line starts with a space).
883 if (cnt <= 1)
884 break;
885 // Fold if first character on the next line is whitespace.
886 c = fgetc(file); // Note it's OK to read EOF and ungetc it too.
887 if (c == ' ' || c == '\t')
888 buf[cnt-1] = c; // Replace \n with the white space character.
889 else {
890 // Not folding, we finished reading a line; break out of the loop
891 ungetc(c,file);
892 break;
893 }
894 }
895 }
896
897 if (buf != NULL && cnt >= 0)
898 buf[cnt] = '\0';
899
900 if (buffer)
901 *buffer = buf;
902 else if (buf)
903 free(buf);
904
905 if (buflen)
906 *buflen = len;
907
908 return cnt;
909 }
910
911
912 ssize_t
readfoldedline(BPositionIO & in,char ** buffer,size_t * buflen)913 readfoldedline(BPositionIO &in, char **buffer, size_t *buflen)
914 {
915 ssize_t len = buflen && *buflen ? *buflen : 0;
916 char * buf = buffer && *buffer ? *buffer : NULL;
917 ssize_t cnt = 0; // Number of characters currently in the buffer.
918 char c;
919 status_t errorCode;
920
921 while (true) {
922 // Make sure there is space in the buffer for two more characters (one
923 // for the next character, and one for the end of string NUL byte).
924 if (buf == NULL || cnt + 2 >= len) {
925 char *temp = (char *)realloc(buf, len + 64);
926 if (temp == NULL) {
927 // Out of memory, however existing buffer remains allocated.
928 cnt = ENOMEM;
929 break;
930 }
931 len += 64;
932 buf = temp;
933 }
934
935 errorCode = in.Read (&c,1); // A really slow way of reading - unbuffered.
936 if (errorCode != 1) {
937 if (errorCode < 0) {
938 cnt = errorCode; // IO error encountered, just return the code.
939 } else {
940 // Really is end of file. Also make it end of line if there is
941 // some text already read in. If the first thing read was EOF,
942 // just return an empty string.
943 if (cnt > 0) {
944 buf[cnt++] = '\n';
945 if (buf[cnt-2] == '\r') {
946 buf[cnt-2] = '\n';
947 --cnt;
948 }
949 }
950 }
951 break;
952 }
953
954 buf[cnt++] = c;
955
956 if (c == '\n') {
957 // Convert CRLF end of line to just a LF. Do it before folding, in
958 // case we don't need to fold.
959 if (cnt >= 2 && buf[cnt-2] == '\r') {
960 buf[cnt-2] = '\n';
961 --cnt;
962 }
963 // If the current line is empty then return it (so that empty lines
964 // don't disappear if the next line starts with a space).
965 if (cnt <= 1)
966 break;
967 // if first character on the next line is whitespace, fold lines
968 errorCode = in.Read(&c,1);
969 if (errorCode == 1) {
970 if (c == ' ' || c == '\t')
971 buf[cnt-1] = c; // Replace \n with the white space character.
972 else {
973 // Not folding, we finished reading a whole line.
974 in.Seek(-1,SEEK_CUR); // Undo the look-ahead character read.
975 break;
976 }
977 } else if (errorCode < 0) {
978 cnt = errorCode;
979 break;
980 } else // No next line; at the end of the file. Return the line.
981 break;
982 }
983 }
984
985 if (buf != NULL && cnt >= 0)
986 buf[cnt] = '\0';
987
988 if (buffer)
989 *buffer = buf;
990 else if (buf)
991 free(buf);
992
993 if (buflen)
994 *buflen = len;
995
996 return cnt;
997 }
998
999
1000 ssize_t
nextfoldedline(const char ** header,char ** buffer,size_t * buflen)1001 nextfoldedline(const char** header, char **buffer, size_t *buflen)
1002 {
1003 ssize_t len = buflen && *buflen ? *buflen : 0;
1004 char * buf = buffer && *buffer ? *buffer : NULL;
1005 ssize_t cnt = 0; // Number of characters currently in the buffer.
1006 char c;
1007
1008 while (true)
1009 {
1010 // Make sure there is space in the buffer for two more characters (one
1011 // for the next character, and one for the end of string NUL byte).
1012 if (buf == NULL || cnt + 2 >= len)
1013 {
1014 char *temp = (char *)realloc(buf, len + 64);
1015 if (temp == NULL) {
1016 // Out of memory, however existing buffer remains allocated.
1017 cnt = ENOMEM;
1018 break;
1019 }
1020 len += 64;
1021 buf = temp;
1022 }
1023
1024 // Read the next character, or end of file.
1025 if ((c = *(*header)++) == 0) {
1026 // End of file. Also make it end of line if there is some text
1027 // already read in. If the first thing read was EOF, just return
1028 // an empty string.
1029 if (cnt > 0) {
1030 buf[cnt++] = '\n';
1031 if (buf[cnt-2] == '\r') {
1032 buf[cnt-2] = '\n';
1033 --cnt;
1034 }
1035 }
1036 break;
1037 }
1038
1039 buf[cnt++] = c;
1040
1041 if (c == '\n') {
1042 // Convert CRLF end of line to just a LF. Do it before folding, in
1043 // case we don't need to fold.
1044 if (cnt >= 2 && buf[cnt-2] == '\r') {
1045 buf[cnt-2] = '\n';
1046 --cnt;
1047 }
1048 // If the current line is empty then return it (so that empty lines
1049 // don't disappear if the next line starts with a space).
1050 if (cnt <= 1)
1051 break;
1052 // if first character on the next line is whitespace, fold lines
1053 c = *(*header)++;
1054 if (c == ' ' || c == '\t')
1055 buf[cnt-1] = c; // Replace \n with the white space character.
1056 else {
1057 // Not folding, we finished reading a line; break out of the loop
1058 (*header)--; // Undo read of the non-whitespace.
1059 break;
1060 }
1061 }
1062 }
1063
1064
1065 if (buf != NULL && cnt >= 0)
1066 buf[cnt] = '\0';
1067
1068 if (buffer)
1069 *buffer = buf;
1070 else if (buf)
1071 free(buf);
1072
1073 if (buflen)
1074 *buflen = len;
1075
1076 return cnt;
1077 }
1078
1079
1080 void
trim_white_space(BString & string)1081 trim_white_space(BString &string)
1082 {
1083 int32 i;
1084 int32 length = string.Length();
1085 char *buffer = string.LockBuffer(length + 1);
1086
1087 while (length > 0 && isspace(buffer[length - 1]))
1088 length--;
1089 buffer[length] = '\0';
1090
1091 for (i = 0; buffer[i] && isspace(buffer[i]); i++) {}
1092 if (i != 0) {
1093 length -= i;
1094 memmove(buffer,buffer + i,length + 1);
1095 }
1096 string.UnlockBuffer(length);
1097 }
1098
1099
1100 /*! Tries to return a human-readable name from the specified
1101 header parameter (should be from "To:" or "From:").
1102 Tries to return the name rather than the eMail address.
1103 */
1104 void
extract_address_name(BString & header)1105 extract_address_name(BString &header)
1106 {
1107 BString name;
1108 const char *start = header.String();
1109 const char *stop = start + strlen (start);
1110
1111 // Find a string S in the header (email foo) that matches:
1112 // Old style name in brackets: foo@bar.com (S)
1113 // New style quotes: "S" <foo@bar.com>
1114 // New style no quotes if nothing else found: S <foo@bar.com>
1115 // If nothing else found then use the whole thing: S
1116
1117 for (int i = 0; i <= 3; i++) {
1118 // Set p1 to the first letter in the name and p2 to just past the last
1119 // letter in the name. p2 stays NULL if a name wasn't found in this
1120 // pass.
1121 const char *p1 = NULL, *p2 = NULL;
1122
1123 switch (i) {
1124 case 0: // foo@bar.com (S)
1125 if ((p1 = strchr(start,'(')) != NULL) {
1126 p1++; // Advance to first letter in the name.
1127 size_t nest = 1; // Handle nested brackets.
1128 for (p2 = p1; p2 < stop; ++p2)
1129 {
1130 if (*p2 == ')')
1131 --nest;
1132 else if (*p2 == '(')
1133 ++nest;
1134 if (nest <= 0)
1135 break;
1136 }
1137 if (nest != 0)
1138 p2 = NULL; // False alarm, no terminating bracket.
1139 }
1140 break;
1141 case 1: // "S" <foo@bar.com>
1142 if ((p1 = strchr(start, '\"')) != NULL)
1143 p2 = strchr(++p1, '\"');
1144 break;
1145 case 2: // S <foo@bar.com>
1146 p1 = start;
1147 if (name.Length() == 0)
1148 p2 = strchr(start, '<');
1149 break;
1150 case 3: // S
1151 p1 = start;
1152 if (name.Length() == 0)
1153 p2 = stop;
1154 break;
1155 }
1156
1157 // Remove leading and trailing space-like characters and save the
1158 // result if it is longer than any other likely names found.
1159 if (p2 != NULL) {
1160 while (p1 < p2 && (isspace (*p1)))
1161 ++p1;
1162
1163 while (p1 < p2 && (isspace (p2[-1])))
1164 --p2;
1165
1166 int newLength = p2 - p1;
1167 if (name.Length() < newLength)
1168 name.SetTo(p1, newLength);
1169 }
1170 }
1171
1172 int32 lessIndex = name.FindFirst('<');
1173 int32 greaterIndex = name.FindLast('>');
1174
1175 if (lessIndex == 0) {
1176 // Have an address of the form <address> and nothing else, so remove
1177 // the greater and less than signs, if any.
1178 if (greaterIndex > 0)
1179 name.Remove(greaterIndex, 1);
1180 name.Remove(lessIndex, 1);
1181 } else if (lessIndex > 0 && lessIndex < greaterIndex) {
1182 // Yahoo stupidly inserts the e-mail address into the name string, so
1183 // this bit of code fixes: "Joe <joe@yahoo.com>" <joe@yahoo.com>
1184 name.Remove(lessIndex, greaterIndex - lessIndex + 1);
1185 }
1186
1187 trim_white_space(name);
1188 header = name;
1189 }
1190
1191
1192 /*! Given a subject in a BString, remove the extraneous RE: re: and other stuff
1193 to get down to the core subject string, which should be identical for all
1194 messages posted about a topic. The input string is modified in place to
1195 become the output core subject string.
1196 */
1197 void
SubjectToThread(BString & string)1198 SubjectToThread (BString &string)
1199 {
1200 // a regex that matches a non-ASCII UTF8 character:
1201 #define U8C \
1202 "[\302-\337][\200-\277]" \
1203 "|\340[\302-\337][\200-\277]" \
1204 "|[\341-\357][\200-\277][\200-\277]" \
1205 "|\360[\220-\277][\200-\277][\200-\277]" \
1206 "|[\361-\367][\200-\277][\200-\277][\200-\277]" \
1207 "|\370[\210-\277][\200-\277][\200-\277][\200-\277]" \
1208 "|[\371-\373][\200-\277][\200-\277][\200-\277][\200-\277]" \
1209 "|\374[\204-\277][\200-\277][\200-\277][\200-\277][\200-\277]" \
1210 "|\375[\200-\277][\200-\277][\200-\277][\200-\277][\200-\277]"
1211
1212 #define PATTERN \
1213 "^ +" \
1214 "|^(\\[[^]]*\\])(\\<| +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \
1215 "|^( +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \
1216 "| *\\(fwd\\) *$"
1217
1218 if (gRebuf == NULL && atomic_add(&gLocker, 1) == 0) {
1219 // the idea is to compile the regexp once to speed up testing
1220
1221 for (int i=0; i<256; ++i) gTranslation[i]=i;
1222 for (int i='a'; i<='z'; ++i) gTranslation[i]=toupper(i);
1223
1224 gRe.translate = gTranslation;
1225 gRe.regs_allocated = REGS_FIXED;
1226 re_syntax_options = RE_SYNTAX_POSIX_EXTENDED;
1227
1228 const char *pattern = PATTERN;
1229 // count subexpressions in PATTERN
1230 for (unsigned int i=0; pattern[i] != 0; ++i)
1231 {
1232 if (pattern[i] == '\\')
1233 ++i;
1234 else if (pattern[i] == '(')
1235 ++gNsub;
1236 }
1237
1238 const char *err = re_compile_pattern(pattern,strlen(pattern),&gRe);
1239 if (err == NULL)
1240 gRebuf = &gRe;
1241 else
1242 fprintf(stderr, "Failed to compile the regex: %s\n", err);
1243 } else {
1244 int32 tries = 200;
1245 while (gRebuf == NULL && tries-- > 0)
1246 snooze(10000);
1247 }
1248
1249 if (gRebuf) {
1250 struct re_registers regs;
1251 // can't be static if this function is to be thread-safe
1252
1253 regs.num_regs = gNsub;
1254 regs.start = (regoff_t*)malloc(gNsub*sizeof(regoff_t));
1255 regs.end = (regoff_t*)malloc(gNsub*sizeof(regoff_t));
1256
1257 for (int start = 0; (start = re_search(gRebuf, string.String(),
1258 string.Length(), 0, string.Length(), ®s)) >= 0;) {
1259 //
1260 // we found something
1261 //
1262
1263 // don't delete [bemaildaemon]...
1264 if (start == regs.start[1])
1265 start = regs.start[2];
1266
1267 string.Remove(start,regs.end[0]-start);
1268 if (start)
1269 string.Insert(' ',1,start);
1270
1271 // TODO: for some subjects this results in an endless loop, check
1272 // why this happen.
1273 if (regs.end[0] - start <= 1)
1274 break;
1275 }
1276
1277 free(regs.start);
1278 free(regs.end);
1279 }
1280
1281 // Finally remove leading and trailing space. Some software, like
1282 // tm-edit 1.8, appends a space to the subject, which would break
1283 // threading if we left it in.
1284 trim_white_space(string);
1285 }
1286
1287
1288 /*! Converts a date to a time. Handles numeric time zones too, unlike
1289 parsedate(). Returns -1 if it fails.
1290 */
1291 time_t
ParseDateWithTimeZone(const char * DateString)1292 ParseDateWithTimeZone(const char *DateString)
1293 {
1294 time_t currentTime;
1295 time_t dateAsTime;
1296 char tempDateString[80];
1297 char tempZoneString[6];
1298 time_t zoneDeltaTime;
1299 int zoneIndex;
1300 char *zonePntr;
1301
1302 // See if we can remove the time zone portion. parsedate understands time
1303 // zone 3 letter names, but doesn't understand the numeric +9999 time zone
1304 // format. To do: see if a newer parsedate exists.
1305
1306 strncpy (tempDateString, DateString, sizeof (tempDateString));
1307 tempDateString[sizeof (tempDateString) - 1] = 0;
1308
1309 // Remove trailing spaces.
1310 zonePntr = tempDateString + strlen (tempDateString) - 1;
1311 while (zonePntr >= tempDateString && isspace (*zonePntr))
1312 *zonePntr-- = 0;
1313 if (zonePntr < tempDateString)
1314 return -1; // Empty string.
1315
1316 // Remove the trailing time zone in round brackets, like in
1317 // Fri, 22 Feb 2002 15:22:42 EST (-0500)
1318 // Thu, 25 Apr 1996 11:44:19 -0400 (EDT)
1319 if (tempDateString[strlen(tempDateString)-1] == ')')
1320 {
1321 zonePntr = strrchr (tempDateString, '(');
1322 if (zonePntr != NULL)
1323 {
1324 *zonePntr-- = 0; // Zap the '(', then remove trailing spaces.
1325 while (zonePntr >= tempDateString && isspace (*zonePntr))
1326 *zonePntr-- = 0;
1327 if (zonePntr < tempDateString)
1328 return -1; // Empty string.
1329 }
1330 }
1331
1332 // Look for a numeric time zone like Tue, 30 Dec 2003 05:01:40 +0000
1333 for (zoneIndex = strlen (tempDateString); zoneIndex >= 0; zoneIndex--)
1334 {
1335 zonePntr = tempDateString + zoneIndex;
1336 if (zonePntr[0] == '+' || zonePntr[0] == '-')
1337 {
1338 if (zonePntr[1] >= '0' && zonePntr[1] <= '9' &&
1339 zonePntr[2] >= '0' && zonePntr[2] <= '9' &&
1340 zonePntr[3] >= '0' && zonePntr[3] <= '9' &&
1341 zonePntr[4] >= '0' && zonePntr[4] <= '9')
1342 break;
1343 }
1344 }
1345 if (zoneIndex >= 0)
1346 {
1347 // Remove the zone from the date string and any following time zone
1348 // letter codes. Also put in GMT so that the date gets parsed as GMT.
1349 memcpy (tempZoneString, zonePntr, 5);
1350 tempZoneString [5] = 0;
1351 strcpy (zonePntr, "GMT");
1352 }
1353 else // No numeric time zone found.
1354 strcpy (tempZoneString, "+0000");
1355
1356 time (¤tTime);
1357 dateAsTime = parsedate (tempDateString, currentTime);
1358 if (dateAsTime == (time_t) -1)
1359 return -1; // Failure.
1360
1361 zoneDeltaTime = 60 * atol (tempZoneString + 3); // Get the last two digits - minutes.
1362 tempZoneString[3] = 0;
1363 zoneDeltaTime += atol (tempZoneString + 1) * 60 * 60; // Get the first two digits - hours.
1364 if (tempZoneString[0] == '+')
1365 zoneDeltaTime = 0 - zoneDeltaTime;
1366 dateAsTime += zoneDeltaTime;
1367
1368 return dateAsTime;
1369 }
1370
1371
1372 /*! Parses a mail header and fills the headers BMessage
1373 */
1374 status_t
parse_header(BMessage & headers,BPositionIO & input)1375 parse_header(BMessage &headers, BPositionIO &input)
1376 {
1377 char *buffer = NULL;
1378 size_t bufferSize = 0;
1379 int32 length;
1380
1381 while ((length = readfoldedline(input, &buffer, &bufferSize)) >= 2) {
1382 --length;
1383 // Don't include the \n at the end of the buffer.
1384
1385 // convert to UTF-8 and null-terminate the buffer
1386 length = rfc2047_to_utf8(&buffer, &bufferSize, length);
1387 buffer[length] = '\0';
1388
1389 const char *delimiter = strstr(buffer, ":");
1390 if (delimiter == NULL)
1391 continue;
1392
1393 BString header(buffer, delimiter - buffer);
1394 header.CapitalizeEachWord();
1395 // unified case for later fetch
1396
1397 delimiter++; // Skip the colon.
1398 // Skip over leading white space and tabs.
1399 // TODO: (comments in brackets).
1400 while (isspace(*delimiter))
1401 delimiter++;
1402
1403 // TODO: implement joining of multiple header tags (i.e. multiple "Cc:"s)
1404 headers.AddString(header.String(), delimiter);
1405 }
1406 free(buffer);
1407
1408 return B_OK;
1409 }
1410
1411
1412 status_t
extract_from_header(const BString & header,const BString & field,BString & target)1413 extract_from_header(const BString& header, const BString& field,
1414 BString& target)
1415 {
1416 int32 headerLength = header.Length();
1417 int32 fieldEndPos = 0;
1418 while (true) {
1419 int32 pos = header.IFindFirst(field, fieldEndPos);
1420 if (pos < 0)
1421 return B_BAD_VALUE;
1422 fieldEndPos = pos + field.Length();
1423
1424 if (pos != 0 && header.ByteAt(pos - 1) != '\n')
1425 continue;
1426 if (header.ByteAt(fieldEndPos) == ':')
1427 break;
1428 }
1429 fieldEndPos++;
1430
1431 int32 crPos = fieldEndPos;
1432 while (true) {
1433 fieldEndPos = crPos;
1434 crPos = header.FindFirst('\n', crPos);
1435 if (crPos < 0)
1436 crPos = headerLength;
1437 BString temp;
1438 header.CopyInto(temp, fieldEndPos, crPos - fieldEndPos);
1439 if (header.ByteAt(crPos - 1) == '\r') {
1440 temp.Truncate(temp.Length() - 1);
1441 temp += " ";
1442 }
1443 target += temp;
1444 crPos++;
1445 if (crPos >= headerLength)
1446 break;
1447 char nextByte = header.ByteAt(crPos);
1448 if (nextByte != ' ' && nextByte != '\t')
1449 break;
1450 crPos++;
1451 }
1452
1453 size_t bufferSize = target.Length();
1454 char* buffer = target.LockBuffer(bufferSize);
1455 size_t length = rfc2047_to_utf8(&buffer, &bufferSize, bufferSize);
1456 target.UnlockBuffer(length);
1457
1458 trim_white_space(target);
1459
1460 return B_OK;
1461 }
1462
1463
1464 void
extract_address(BString & address)1465 extract_address(BString &address)
1466 {
1467 const char *string = address.String();
1468 int32 first;
1469
1470 // first, remove all quoted text
1471
1472 if ((first = address.FindFirst('"')) >= 0) {
1473 int32 last = first + 1;
1474 while (string[last] && string[last] != '"')
1475 last++;
1476
1477 if (string[last] == '"')
1478 address.Remove(first, last + 1 - first);
1479 }
1480
1481 // try to extract the address now
1482
1483 if ((first = address.FindFirst('<')) >= 0) {
1484 // the world likes us and we can just get the address the easy way...
1485 int32 last = address.FindFirst('>');
1486 if (last >= 0) {
1487 address.Truncate(last);
1488 address.Remove(0, first + 1);
1489
1490 return;
1491 }
1492 }
1493
1494 // then, see if there is anything in parenthesis to throw away
1495
1496 if ((first = address.FindFirst('(')) >= 0) {
1497 int32 last = first + 1;
1498 while (string[last] && string[last] != ')')
1499 last++;
1500
1501 if (string[last] == ')')
1502 address.Remove(first, last + 1 - first);
1503 }
1504
1505 // now, there shouldn't be much else left
1506
1507 trim_white_space(address);
1508 }
1509
1510
1511 void
get_address_list(BList & list,const char * string,void (* cleanupFunc)(BString &))1512 get_address_list(BList &list, const char *string,
1513 void (*cleanupFunc)(BString &))
1514 {
1515 if (string == NULL || !string[0])
1516 return;
1517
1518 const char *start = string;
1519
1520 while (true) {
1521 if (string[0] == '"') {
1522 const char *quoteEnd = ++string;
1523
1524 while (quoteEnd[0] && quoteEnd[0] != '"')
1525 quoteEnd++;
1526
1527 if (!quoteEnd[0]) // string exceeds line!
1528 quoteEnd = string;
1529
1530 string = quoteEnd + 1;
1531 }
1532
1533 if (string[0] == ',' || string[0] == '\0') {
1534 BString address(start, string - start);
1535 trim_white_space(address);
1536
1537 if (cleanupFunc)
1538 cleanupFunc(address);
1539
1540 list.AddItem(strdup(address.String()));
1541
1542 start = string + 1;
1543 }
1544
1545 if (!string[0])
1546 break;
1547
1548 string++;
1549 }
1550 }
1551
1552
1553 status_t
CopyMailFolderAttributes(const char * targetPath)1554 CopyMailFolderAttributes(const char* targetPath)
1555 {
1556 BPath path;
1557 status_t status = find_directory(B_USER_SETTINGS_DIRECTORY, &path);
1558 if (status != B_OK)
1559 return status;
1560
1561 path.Append("Tracker");
1562 path.Append("DefaultQueryTemplates");
1563 path.Append("text_x-email");
1564
1565 BNode source(path.Path());
1566 BNode target(targetPath);
1567 return BPrivate::CopyAttributes(source, target);
1568 }
1569