xref: /haiku/src/kits/mail/mail_util.cpp (revision f2df0cfe93a902842f6f4629ff614f5b3f9bf687)
1 /*
2  * Copyright 2011-2016, Haiku, Inc. All rights reserved.
3  * Copyright 2001-2003 Dr. Zoidberg Enterprises. All rights reserved.
4  */
5 
6 
7 #include <mail_util.h>
8 
9 #include <stdlib.h>
10 #include <strings.h>
11 #include <stdio.h>
12 #define __USE_GNU
13 #include <regex.h>
14 #include <ctype.h>
15 #include <errno.h>
16 
17 #include <FindDirectory.h>
18 #include <List.h>
19 #include <Locker.h>
20 #include <parsedate.h>
21 #include <Path.h>
22 #include <String.h>
23 #include <UTF8.h>
24 
25 #include <mail_encoding.h>
26 
27 #include <AttributeUtilities.h>
28 #include <CharacterSet.h>
29 #include <CharacterSetRoster.h>
30 
31 
32 using namespace BPrivate;
33 
34 
35 #define CRLF   "\r\n"
36 
37 struct CharsetConversionEntry {
38 	const char *charset;
39 	uint32 flavor;
40 };
41 
42 extern const CharsetConversionEntry mail_charsets[] = {
43 	// In order of authority, so when searching for the name for a particular
44 	// numbered conversion, start at the beginning of the array.
45 	{"iso-8859-1",  B_ISO1_CONVERSION}, // MIME STANDARD
46 	{"iso-8859-2",  B_ISO2_CONVERSION}, // MIME STANDARD
47 	{"iso-8859-3",  B_ISO3_CONVERSION}, // MIME STANDARD
48 	{"iso-8859-4",  B_ISO4_CONVERSION}, // MIME STANDARD
49 	{"iso-8859-5",  B_ISO5_CONVERSION}, // MIME STANDARD
50 	{"iso-8859-6",  B_ISO6_CONVERSION}, // MIME STANDARD
51 	{"iso-8859-7",  B_ISO7_CONVERSION}, // MIME STANDARD
52 	{"iso-8859-8",  B_ISO8_CONVERSION}, // MIME STANDARD
53 	{"iso-8859-9",  B_ISO9_CONVERSION}, // MIME STANDARD
54 	{"iso-8859-10", B_ISO10_CONVERSION}, // MIME STANDARD
55 	{"iso-8859-13", B_ISO13_CONVERSION}, // MIME STANDARD
56 	{"iso-8859-14", B_ISO14_CONVERSION}, // MIME STANDARD
57 	{"iso-8859-15", B_ISO15_CONVERSION}, // MIME STANDARD
58 
59 	{"shift_jis",	B_SJIS_CONVERSION}, // MIME STANDARD
60 	{"shift-jis",	B_SJIS_CONVERSION},
61 	{"iso-2022-jp", B_JIS_CONVERSION}, // MIME STANDARD
62 	{"euc-jp",		B_EUC_CONVERSION}, // MIME STANDARD
63 
64 	{"euc-kr",      B_EUC_KR_CONVERSION}, // Shift encoding 7 bit and KSC-5601 if bit 8 is on. // MIME STANDARD
65 	{"ksc5601",		B_EUC_KR_CONVERSION},    // Not sure if 7 or 8 bit. // COMPATIBLE?
66 	{"ks_c_5601-1987", B_EUC_KR_CONVERSION}, // Not sure if 7 or 8 bit. // COMPATIBLE with stupid MS software
67 
68 	{"koi8-r",      B_KOI8R_CONVERSION},           // MIME STANDARD
69 	{"windows-1251",B_MS_WINDOWS_1251_CONVERSION}, // MIME STANDARD
70 	{"windows-1252",B_MS_WINDOWS_CONVERSION},      // MIME STANDARD
71 
72 	{"dos-437",     B_MS_DOS_CONVERSION},     // WRONG NAME : MIME STANDARD NAME = NONE ( IBM437? )
73 	{"dos-866",     B_MS_DOS_866_CONVERSION}, // WRONG NAME : MIME STANDARD NAME = NONE ( IBM866? )
74 	{"x-mac-roman", B_MAC_ROMAN_CONVERSION},  // WRONG NAME : MIME STANDARD NAME = NONE ( macintosh? + x-mac-roman? )
75 
76     {"big5",        24}, // MIME STANDARD
77 
78     {"gb18030",     25}, // WRONG NAME : MIME STANDARD NAME = NONE ( GB18030? )
79     {"gb2312",      25}, // COMPATIBLE
80     {"gbk",         25}, // COMPATIBLE
81 
82 	/* {"utf-16",		B_UNICODE_CONVERSION}, Might not work due to NULs in text, needs testing. */
83 	{"us-ascii",	B_MAIL_US_ASCII_CONVERSION},                                  // MIME STANDARD
84 	{"utf-8",		B_MAIL_UTF8_CONVERSION /* Special code for no conversion */}, // MIME STANDARD
85 
86 	{NULL, (uint32) -1} /* End of list marker, NULL string pointer is the key. */
87 };
88 
89 
90 static int32 gLocker = 0;
91 static size_t gNsub = 1;
92 static re_pattern_buffer gRe;
93 static re_pattern_buffer *gRebuf = NULL;
94 static unsigned char gTranslation[256];
95 
96 
97 static int
98 handle_non_rfc2047_encoding(char **buffer, size_t *bufferLength,
99 	size_t *sourceLength)
100 {
101 	char *string = *buffer;
102 	int32 length = *sourceLength;
103 	int32 i;
104 
105 	// check for 8-bit characters
106 	for (i = 0;i < length;i++)
107 		if (string[i] & 0x80)
108 			break;
109 	if (i == length)
110 		return false;
111 
112 	// check for groups of 8-bit characters - this code is not very smart;
113 	// it just can detect some sort of single-byte encoded stuff, the rest
114 	// is regarded as UTF-8
115 
116 	int32 singletons = 0,doubles = 0;
117 
118 	for (i = 0;i < length;i++)
119 	{
120 		if (string[i] & 0x80)
121 		{
122 			if ((string[i + 1] & 0x80) == 0)
123 				singletons++;
124 			else doubles++;
125 			i++;
126 		}
127 	}
128 
129 	if (singletons != 0)	// can't be valid UTF-8 anymore, so we assume ISO-Latin-1
130 	{
131 		int32 state = 0;
132 		// just to be sure
133 		int32 destLength = length * 4 + 1;
134 		int32 destBufferLength = destLength;
135 		char *dest = (char*)malloc(destLength);
136 		if (dest == NULL)
137 			return 0;
138 
139 		if (convert_to_utf8(B_ISO1_CONVERSION, string, &length,dest,
140 			&destLength, &state) == B_OK) {
141 			*buffer = dest;
142 			*bufferLength = destBufferLength;
143 			*sourceLength = destLength;
144 			return true;
145 		}
146 		free(dest);
147 		return false;
148 	}
149 
150 	// we assume a valid UTF-8 string here, but yes, we don't check it
151 	return true;
152 }
153 
154 
155 // #pragma mark -
156 
157 
158 status_t
159 write_read_attr(BNode& node, read_flags flag)
160 {
161 	if (node.WriteAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32))
162 			< 0)
163 		return B_ERROR;
164 
165 	// Manage the status string only if it currently has a known state
166 	BString currentStatus;
167 	if (node.ReadAttrString(B_MAIL_ATTR_STATUS, &currentStatus) == B_OK
168 		&& currentStatus.ICompare("New") != 0
169 		&& currentStatus.ICompare("Read") != 0
170 		&& currentStatus.ICompare("Seen") != 0) {
171 		return B_OK;
172 	}
173 
174 	const char* statusString = flag == B_READ ? "Read"
175 		: flag  == B_SEEN ? "Seen" : "New";
176 	if (node.WriteAttr(B_MAIL_ATTR_STATUS, B_STRING_TYPE, 0, statusString,
177 			strlen(statusString)) < 0)
178 		return B_ERROR;
179 
180 	return B_OK;
181 }
182 
183 
184 status_t
185 read_read_attr(BNode& node, read_flags& flag)
186 {
187 	if (node.ReadAttr(B_MAIL_ATTR_READ, B_INT32_TYPE, 0, &flag, sizeof(int32))
188 			== sizeof(int32))
189 		return B_OK;
190 
191 	BString statusString;
192 	if (node.ReadAttrString(B_MAIL_ATTR_STATUS, &statusString) == B_OK) {
193 		if (statusString.ICompare("New"))
194 			flag = B_UNREAD;
195 		else
196 			flag = B_READ;
197 
198 		return B_OK;
199 	}
200 
201 	return B_ERROR;
202 }
203 
204 
205 // The next couple of functions are our wrapper around convert_to_utf8 and
206 // convert_from_utf8 so that they can also convert from UTF-8 to UTF-8 by
207 // specifying the B_MAIL_UTF8_CONVERSION constant as the conversion operation.
208 // It also lets us add new conversions, like B_MAIL_US_ASCII_CONVERSION.
209 
210 
211 status_t
212 mail_convert_to_utf8(uint32 srcEncoding, const char *src, int32 *srcLen,
213 	char *dst, int32 *dstLen, int32 *state, char substitute)
214 {
215 	int32 copyAmount;
216 	char *originalDst = dst;
217 	status_t returnCode = -1;
218 
219 	if (srcEncoding == B_MAIL_UTF8_CONVERSION) {
220 		copyAmount = *srcLen;
221 		if (*dstLen < copyAmount)
222 			copyAmount = *dstLen;
223 		memcpy (dst, src, copyAmount);
224 		*srcLen = copyAmount;
225 		*dstLen = copyAmount;
226 		returnCode = B_OK;
227 	} else if (srcEncoding == B_MAIL_US_ASCII_CONVERSION) {
228 		int32 i;
229 		unsigned char letter;
230 		copyAmount = *srcLen;
231 		if (*dstLen < copyAmount)
232 			copyAmount = *dstLen;
233 		for (i = 0; i < copyAmount; i++) {
234 			letter = *src++;
235 			if (letter > 0x80U)
236 				// Invalid, could also use substitute, but better to strip high bit.
237 				*dst++ = letter - 0x80U;
238 			else if (letter == 0x80U)
239 				// Can't convert to 0x00 since that's NUL, which would cause problems.
240 				*dst++ = substitute;
241 			else
242 				*dst++ = letter;
243 		}
244 		*srcLen = copyAmount;
245 		*dstLen = copyAmount;
246 		returnCode = B_OK;
247 	} else
248 		returnCode = convert_to_utf8 (srcEncoding, src, srcLen,
249 			dst, dstLen, state, substitute);
250 
251 	if (returnCode == B_OK) {
252 		// Replace spurious NUL bytes, which should normally not be in the
253 		// output of the decoding (not normal UTF-8 characters, and no NULs are
254 		// in our usual input strings).  They happen for some odd ISO-2022-JP
255 		// byte pair combinations which are improperly handled by the BeOS
256 		// routines.  Like "\e$ByD\e(B" where \e is the ESC character $1B, the
257 		// first ESC $ B switches to a Japanese character set, then the next
258 		// two bytes "yD" specify a character, then ESC ( B switches back to
259 		// the ASCII character set.  The UTF-8 conversion yields a NUL byte.
260 		int32 i;
261 		for (i = 0; i < *dstLen; i++)
262 			if (originalDst[i] == 0)
263 				originalDst[i] = substitute;
264 	}
265 	return returnCode;
266 }
267 
268 
269 status_t
270 mail_convert_from_utf8(uint32 dstEncoding, const char *src, int32 *srcLen,
271 	char *dst, int32 *dstLen, int32 *state, char substitute)
272 {
273 	int32 copyAmount;
274 	status_t errorCode;
275 	int32 originalDstLen = *dstLen;
276 	int32 tempDstLen;
277 	int32 tempSrcLen;
278 
279 	if (dstEncoding == B_MAIL_UTF8_CONVERSION) {
280 		copyAmount = *srcLen;
281 		if (*dstLen < copyAmount)
282 			copyAmount = *dstLen;
283 		memcpy (dst, src, copyAmount);
284 		*srcLen = copyAmount;
285 		*dstLen = copyAmount;
286 		return B_OK;
287 	}
288 
289 	if (dstEncoding == B_MAIL_US_ASCII_CONVERSION) {
290 		int32 characterLength;
291 		int32 dstRemaining = *dstLen;
292 		unsigned char letter;
293 		int32 srcRemaining = *srcLen;
294 
295 		// state contains the number of source bytes to skip, left over from a
296 		// partial UTF-8 character split over the end of the buffer from last
297 		// time.
298 		if (srcRemaining <= *state) {
299 			*state -= srcRemaining;
300 			*dstLen = 0;
301 			return B_OK;
302 		}
303 		srcRemaining -= *state;
304 		src += *state;
305 		*state = 0;
306 
307 		while (true) {
308 			if (srcRemaining <= 0 || dstRemaining <= 0)
309 				break;
310 			letter = *src;
311 			if (letter < 0x80)
312 				characterLength = 1; // Regular ASCII equivalent code.
313 			else if (letter < 0xC0)
314 				characterLength = 1; // Invalid in-between data byte 10xxxxxx.
315 			else if (letter < 0xE0)
316 				characterLength = 2;
317 			else if (letter < 0xF0)
318 				characterLength = 3;
319 			else if (letter < 0xF8)
320 				characterLength = 4;
321 			else if (letter < 0xFC)
322 				characterLength = 5;
323 			else if (letter < 0xFE)
324 				characterLength = 6;
325 			else
326 				characterLength = 1; // 0xFE and 0xFF are invalid in UTF-8.
327 			if (letter < 0x80)
328 				*dst++ = *src;
329 			else
330 				*dst++ = substitute;
331 			dstRemaining--;
332 			if (srcRemaining < characterLength) {
333 				// Character split past the end of the buffer.
334 				*state = characterLength - srcRemaining;
335 				srcRemaining = 0;
336 			} else {
337 				src += characterLength;
338 				srcRemaining -= characterLength;
339 			}
340 		}
341 		// Update with the amounts used.
342 		*srcLen = *srcLen - srcRemaining;
343 		*dstLen = *dstLen - dstRemaining;
344 		return B_OK;
345 	}
346 
347 	errorCode = convert_from_utf8(dstEncoding, src, srcLen, dst, dstLen, state,
348 		substitute);
349 	if (errorCode != B_OK)
350 		return errorCode;
351 
352 	if (dstEncoding != B_JIS_CONVERSION)
353 		return B_OK;
354 
355 	// B_JIS_CONVERSION (ISO-2022-JP) works by shifting between different
356 	// character subsets.  For E-mail headers (and other uses), it needs to be
357 	// switched back to ASCII at the end (otherwise the last character gets
358 	// lost or other weird things happen in the headers).  Note that we can't
359 	// just append the escape code since the convert_from_utf8 "state" will be
360 	// wrong.  So we append an ASCII letter and throw it away, leaving just the
361 	// escape code.  Well, it actually switches to the Roman character set, not
362 	// ASCII, but that should be OK.
363 
364 	tempDstLen = originalDstLen - *dstLen;
365 	if (tempDstLen < 3) // Not enough space remaining in the output.
366 		return B_OK; // Sort of an error, but we did convert the rest OK.
367 	tempSrcLen = 1;
368 	errorCode = convert_from_utf8(dstEncoding, "a", &tempSrcLen,
369 		dst + *dstLen, &tempDstLen, state, substitute);
370 	if (errorCode != B_OK)
371 		return errorCode;
372 	*dstLen += tempDstLen - 1 /* don't include the ASCII letter */;
373 	return B_OK;
374 }
375 
376 
377 ssize_t
378 rfc2047_to_utf8(char **bufp, size_t *bufLen, size_t strLen)
379 {
380 	char *head, *tail;
381 	char *charset, *encoding, *end;
382 	ssize_t ret = B_OK;
383 
384 	if (bufp == NULL || *bufp == NULL)
385 		return -1;
386 
387 	char *string = *bufp;
388 
389 	//---------Handle *&&^%*&^ non-RFC compliant, 8bit mail
390 	if (handle_non_rfc2047_encoding(bufp,bufLen,&strLen))
391 		return strLen;
392 
393 	// set up string length
394 	if (strLen == 0)
395 		strLen = strlen(*bufp);
396 	char lastChar = (*bufp)[strLen];
397 	(*bufp)[strLen] = '\0';
398 
399 	//---------Whew! Now for RFC compliant mail
400 	bool encodedWordFoundPreviously = false;
401 	for (head = tail = string;
402 		((charset = strstr(tail, "=?")) != NULL)
403 		&& (((encoding = strchr(charset + 2, '?')) != NULL)
404 			&& encoding[1] && (encoding[2] == '?') && encoding[3])
405 		&& (end = strstr(encoding + 3, "?=")) != NULL;
406 		// found "=?...charset...?e?...text...?=   (e == encoding)
407 		//        ^charset       ^encoding    ^end
408 		tail = end)
409 	{
410 		// Copy non-encoded text (from tail up to charset) to the output.
411 		// Ignore spaces between two encoded "words".  RFC2047 says the words
412 		// should be concatenated without the space (designed for Asian
413 		// sentences which have no spaces yet need to be broken into "words" to
414 		// keep within the line length limits).
415 		bool nonSpaceFound = false;
416 		for (int i = 0; i < charset-tail; i++) {
417 			if (!isspace (tail[i])) {
418 				nonSpaceFound = true;
419 				break;
420 			}
421 		}
422 		if (!encodedWordFoundPreviously || nonSpaceFound) {
423 			if (string != tail && tail != charset)
424 				memmove(string, tail, charset-tail);
425 			string += charset-tail;
426 		}
427 		tail = charset;
428 		encodedWordFoundPreviously = true;
429 
430 		// move things to point at what they should:
431 		//   =?...charset...?e?...text...?=   (e == encoding)
432 		//     ^charset      ^encoding     ^end
433 		charset += 2;
434 		encoding += 1;
435 		end += 2;
436 
437 		// find the charset this text is in now
438 		size_t cLen = encoding - 1 - charset;
439 		bool base64encoded = toupper(*encoding) == 'B';
440 
441 		uint32 convertID = B_MAIL_NULL_CONVERSION;
442 		char charsetName[cLen + 1];
443 		memcpy(charsetName, charset, cLen);
444 		charsetName[cLen] = '\0';
445 		if (strcasecmp(charsetName, "us-ascii") == 0) {
446 			convertID = B_MAIL_US_ASCII_CONVERSION;
447 		} else if (strcasecmp(charsetName, "utf-8") == 0) {
448 			convertID = B_MAIL_UTF8_CONVERSION;
449 		} else {
450 			const BCharacterSet* charSet
451 				= BCharacterSetRoster::FindCharacterSetByName(charsetName);
452 			if (charSet != NULL) {
453 				convertID = charSet->GetConversionID();
454 			}
455 		}
456 		if (convertID == B_MAIL_NULL_CONVERSION) {
457 			// unidentified charset
458 			// what to do? doing nothing skips the encoded text;
459 			// but we should keep it: we copy it to the output.
460 			if (string != tail && tail != end)
461 				memmove(string, tail, end-tail);
462 			string += end-tail;
463 			continue;
464 		}
465 		// else we've successfully identified the charset
466 
467 		char *src = encoding+2;
468 		int32 srcLen = end - 2 - src;
469 		// encoded text: src..src+srcLen
470 
471 		// decode text, get decoded length (reducing xforms)
472 		srcLen = !base64encoded ? decode_qp(src, src, srcLen, 1)
473 			: decode_base64(src, src, srcLen);
474 
475 		// allocate space for the converted text
476 		int32 dstLen = end-string + *bufLen-strLen;
477 		char *dst = (char*)malloc(dstLen);
478 		int32 cvLen = srcLen;
479 		int32 convState = 0;
480 
481 		//
482 		// do the conversion
483 		//
484 		ret = mail_convert_to_utf8(convertID, src, &cvLen, dst, &dstLen,
485 			&convState);
486 		if (ret != B_OK) {
487 			// what to do? doing nothing skips the encoded text
488 			// but we should keep it: we copy it to the output.
489 
490 			free(dst);
491 
492 			if (string != tail && tail != end)
493 				memmove(string, tail, end-tail);
494 			string += end-tail;
495 			continue;
496 		}
497 		/* convert_to_ is either returning something wrong or my
498 		   test data is screwed up.  Whatever it is, Not Enough
499 		   Space is not the only cause of the below, so we just
500 		   assume it succeeds if it converts anything at all.
501 		else if (cvLen < srcLen)
502 		{
503 			// not enough room to convert the data;
504 			// grow *buf and retry
505 
506 			free(dst);
507 
508 			char *temp = (char*)realloc(*bufp, 2*(*bufLen + 1));
509 			if (temp == NULL)
510 			{
511 				ret = B_NO_MEMORY;
512 				break;
513 			}
514 
515 			*bufp = temp;
516 			*bufLen = 2*(*bufLen + 1);
517 
518 			string = *bufp + (string-head);
519 			tail = *bufp + (tail-head);
520 			charset = *bufp + (charset-head);
521 			encoding = *bufp + (encoding-head);
522 			end = *bufp + (end-head);
523 			src = *bufp + (src-head);
524 			head = *bufp;
525 			continue;
526 		}
527 		*/
528 		else {
529 			if (dstLen > end-string) {
530 				// copy the string forward...
531 				memmove(string+dstLen, end, strLen - (end-head) + 1);
532 				strLen += string+dstLen - end;
533 				end = string + dstLen;
534 			}
535 
536 			memcpy(string, dst, dstLen);
537 			string += dstLen;
538 			free(dst);
539 			continue;
540 		}
541 	}
542 
543 	// copy everything that's left
544 	size_t tailLen = strLen - (tail - head);
545 	memmove(string, tail, tailLen+1);
546 	string += tailLen;
547 
548 	// replace the last char
549 	(*bufp)[strLen] = lastChar;
550 
551 	return ret < B_OK ? ret : string-head;
552 }
553 
554 
555 ssize_t
556 utf8_to_rfc2047 (char **bufp, ssize_t length, uint32 charset, char encoding)
557 {
558 	struct word {
559 		BString	originalWord;
560 		BString	convertedWord;
561 		bool	needsEncoding;
562 
563 		// Convert the word from UTF-8 to the desired character set.  The
564 		// converted version also includes the escape codes to return to ASCII
565 		// mode, if relevant.  Also note if it uses unprintable characters,
566 		// which means it will need that special encoding treatment later.
567 		void ConvertWordToCharset (uint32 charset) {
568 			int32 state = 0;
569 			int32 originalLength = originalWord.Length();
570 			int32 convertedLength = originalLength * 5 + 1;
571 			char *convertedBuffer = convertedWord.LockBuffer (convertedLength);
572 			mail_convert_from_utf8 (charset, originalWord.String(),
573 				&originalLength, convertedBuffer, &convertedLength, &state);
574 			for (int i = 0; i < convertedLength; i++) {
575 				if ((convertedBuffer[i] & (1 << 7)) ||
576 					(convertedBuffer[i] >= 0 && convertedBuffer[i] < 32)) {
577 					needsEncoding = true;
578 					break;
579 				}
580 			}
581 			convertedWord.UnlockBuffer (convertedLength);
582 		};
583 	};
584 	struct word *currentWord;
585 	BList words;
586 
587 	// Break the header into words.  White space characters (including tabs and
588 	// newlines) separate the words.  Each word includes any space before it as
589 	// part of the word.  Actually, quotes and other special characters
590 	// (",()<>@) are treated as separate words of their own so that they don't
591 	// get encoded (because MIME headers get the quotes parsed before character
592 	// set unconversion is done).  The reader is supposed to ignore all white
593 	// space between encoded words, which can be inserted so that older mail
594 	// parsers don't have overly long line length problems.
595 
596 	const char *source = *bufp;
597 	const char *bufEnd = *bufp + length;
598 	const char *specialChars = "\"()<>@,";
599 
600 	while (source < bufEnd) {
601 		currentWord = new struct word;
602 		currentWord->needsEncoding = false;
603 
604 		int wordEnd = 0;
605 
606 		// Include leading spaces as part of the word.
607 		while (source + wordEnd < bufEnd && isspace (source[wordEnd]))
608 			wordEnd++;
609 
610 		if (source + wordEnd < bufEnd &&
611 			strchr (specialChars, source[wordEnd]) != NULL) {
612 			// Got a quote mark or other special character, which is treated as
613 			// a word in itself since it shouldn't be encoded, which would hide
614 			// it from the mail system.
615 			wordEnd++;
616 		} else {
617 			// Find the end of the word.  Leave wordEnd pointing just after the
618 			// last character in the word.
619 			while (source + wordEnd < bufEnd) {
620 				if (isspace(source[wordEnd]) ||
621 					strchr (specialChars, source[wordEnd]) != NULL)
622 					break;
623 				if (wordEnd > 51 /* Makes Base64 ISO-2022-JP "word" a multiple of 4 bytes */ &&
624 					0xC0 == (0xC0 & (unsigned int) source[wordEnd])) {
625 					// No English words are that long (46 is the longest),
626 					// break up what is likely Asian text (which has no spaces)
627 					// at the start of the next non-ASCII UTF-8 character (high
628 					// two bits are both ones).  Note that two encoded words in
629 					// a row get joined together, even if there is a space
630 					// between them in the final output text, according to the
631 					// standard.  Next word will also be conveniently get
632 					// encoded due to the 0xC0 test.
633 					currentWord->needsEncoding = true;
634 					break;
635 				}
636 				wordEnd++;
637 			}
638 		}
639 		currentWord->originalWord.SetTo (source, wordEnd);
640 		currentWord->ConvertWordToCharset (charset);
641 		words.AddItem(currentWord);
642 		source += wordEnd;
643 	}
644 
645 	// Combine adjacent words which contain unprintable text so that the
646 	// overhead of switching back and forth between regular text and specially
647 	// encoded text is reduced.  However, the combined word must be shorter
648 	// than the maximum of 75 bytes, including character set specification and
649 	// all those delimiters (worst case 22 bytes of overhead).
650 
651 	struct word *run;
652 
653 	for (int32 i = 0; (currentWord = (struct word *) words.ItemAt (i)) != NULL; i++) {
654 		if (!currentWord->needsEncoding)
655 			continue; // No need to combine unencoded words.
656 		for (int32 g = i+1; (run = (struct word *) words.ItemAt (g)) != NULL; g++) {
657 			if (!run->needsEncoding)
658 				break; // Don't want to combine encoded and unencoded words.
659 			if ((currentWord->convertedWord.Length() + run->convertedWord.Length() <= 53)) {
660 				currentWord->originalWord.Append (run->originalWord);
661 				currentWord->ConvertWordToCharset (charset);
662 				words.RemoveItem(g);
663 				delete run;
664 				g--;
665 			} else // Can't merge this word, result would be too long.
666 				break;
667 		}
668 	}
669 
670 	// Combine the encoded and unencoded words into one line, doing the
671 	// quoted-printable or base64 encoding.  Insert an extra space between
672 	// words which are both encoded to make word wrapping easier, since there
673 	// is normally none, and you're allowed to insert space (the receiver
674 	// throws it away if it is between encoded words).
675 
676 	BString rfc2047;
677 	bool	previousWordNeededEncoding = false;
678 
679 	const char *charset_dec = "none-bug";
680 	for (int32 i = 0; mail_charsets[i].charset != NULL; i++) {
681 		if (mail_charsets[i].flavor == charset) {
682 			charset_dec = mail_charsets[i].charset;
683 			break;
684 		}
685 	}
686 
687 	while ((currentWord = (struct word *)words.RemoveItem((int32)0)) != NULL) {
688 		if ((encoding != quoted_printable && encoding != base64) ||
689 		!currentWord->needsEncoding) {
690 			rfc2047.Append (currentWord->convertedWord);
691 		} else {
692 			// This word needs encoding.  Try to insert a space between it and
693 			// the previous word.
694 			if (previousWordNeededEncoding)
695 				rfc2047 << ' '; // Can insert as many spaces as you want between encoded words.
696 			else {
697 				// Previous word is not encoded, spaces are significant.  Try
698 				// to move a space from the start of this word to be outside of
699 				// the encoded text, so that there is a bit of space between
700 				// this word and the previous one to enhance word wrapping
701 				// chances later on.
702 				if (currentWord->originalWord.Length() > 1 &&
703 					isspace (currentWord->originalWord[0])) {
704 					rfc2047 << currentWord->originalWord[0];
705 					currentWord->originalWord.Remove (0 /* offset */, 1 /* length */);
706 					currentWord->ConvertWordToCharset (charset);
707 				}
708 			}
709 
710 			char *encoded = NULL;
711 			ssize_t encoded_len = 0;
712 			int32 convertedLength = currentWord->convertedWord.Length ();
713 			const char *convertedBuffer = currentWord->convertedWord.String ();
714 
715 			switch (encoding) {
716 				case quoted_printable:
717 					encoded = (char *) malloc (convertedLength * 3);
718 					encoded_len = encode_qp (encoded, convertedBuffer, convertedLength, true /* headerMode */);
719 					break;
720 				case base64:
721 					encoded = (char *) malloc (convertedLength * 2);
722 					encoded_len = encode_base64 (encoded, convertedBuffer, convertedLength, true /* headerMode */);
723 					break;
724 				default: // Unknown encoding type, shouldn't happen.
725 					encoded = (char *) convertedBuffer;
726 					encoded_len = convertedLength;
727 					break;
728 			}
729 
730 			rfc2047 << "=?" << charset_dec << '?' << encoding << '?';
731 			rfc2047.Append (encoded, encoded_len);
732 			rfc2047 << "?=";
733 
734 			if (encoding == quoted_printable || encoding == base64)
735 				free(encoded);
736 		}
737 		previousWordNeededEncoding = currentWord->needsEncoding;
738 		delete currentWord;
739 	}
740 
741 	free(*bufp);
742 
743 	ssize_t finalLength = rfc2047.Length ();
744 	*bufp = (char *) (malloc (finalLength + 1));
745 	memcpy (*bufp, rfc2047.String(), finalLength);
746 	(*bufp)[finalLength] = 0;
747 
748 	return finalLength;
749 }
750 
751 
752 void
753 FoldLineAtWhiteSpaceAndAddCRLF(BString &string)
754 {
755 	int inputLength = string.Length();
756 	int lineStartIndex;
757 	const int maxLineLength = 78; // Doesn't include CRLF.
758 	BString output;
759 	int splitIndex;
760 	int tempIndex;
761 
762 	lineStartIndex = 0;
763 	while (true) {
764 		// If we don't need to wrap the text, just output the remainder, if any.
765 
766 		if (lineStartIndex + maxLineLength >= inputLength) {
767 			if (lineStartIndex < inputLength) {
768 				output.Insert (string, lineStartIndex /* source offset */,
769 					inputLength - lineStartIndex /* count */,
770 					output.Length() /* insert at */);
771 				output.Append (CRLF);
772 			}
773 			break;
774 		}
775 
776 		// Look ahead for a convenient spot to split it, between a comma and
777 		// space, which you often see between e-mail addresses like this:
778 		// "Joe Who" joe@dot.com, "Someone Else" else@blot.com
779 
780 		tempIndex = lineStartIndex + maxLineLength;
781 		if (tempIndex > inputLength)
782 			tempIndex = inputLength;
783 		splitIndex = string.FindLast (", ", tempIndex);
784 		if (splitIndex >= lineStartIndex)
785 			splitIndex++; // Point to the space character.
786 
787 		// If none of those exist, try splitting at any white space.
788 
789 		if (splitIndex <= lineStartIndex)
790 			splitIndex = string.FindLast (" ", tempIndex);
791 		if (splitIndex <= lineStartIndex)
792 			splitIndex = string.FindLast ("\t", tempIndex);
793 
794 		// If none of those exist, allow for a longer word - split at the next
795 		// available white space.
796 
797 		if (splitIndex <= lineStartIndex)
798 			splitIndex = string.FindFirst (" ", lineStartIndex + 1);
799 		if (splitIndex <= lineStartIndex)
800 			splitIndex = string.FindFirst ("\t", lineStartIndex + 1);
801 
802 		// Give up, the whole rest of the line can't be split, just dump it
803 		// out.
804 
805 		if (splitIndex <= lineStartIndex) {
806 			if (lineStartIndex < inputLength) {
807 				output.Insert (string, lineStartIndex /* source offset */,
808 					inputLength - lineStartIndex /* count */,
809 					output.Length() /* insert at */);
810 				output.Append (CRLF);
811 			}
812 			break;
813 		}
814 
815 		// Do the split.  The current line up to but not including the space
816 		// gets output, followed by a CRLF.  The space remains to become the
817 		// start of the next line (and that tells the message reader that it is
818 		// a continuation line).
819 
820 		output.Insert (string, lineStartIndex /* source offset */,
821 			splitIndex - lineStartIndex /* count */,
822 			output.Length() /* insert at */);
823 		output.Append (CRLF);
824 		lineStartIndex = splitIndex;
825 	}
826 	string.SetTo (output);
827 }
828 
829 
830 ssize_t
831 readfoldedline(FILE *file, char **buffer, size_t *buflen)
832 {
833 	ssize_t len = buflen && *buflen ? *buflen : 0;
834 	char * buf = buffer && *buffer ? *buffer : NULL;
835 	ssize_t cnt = 0; // Number of characters currently in the buffer.
836 	int c;
837 
838 	while (true) {
839 		// Make sure there is space in the buffer for two more characters (one
840 		// for the next character, and one for the end of string NUL byte).
841 		if (buf == NULL || cnt + 2 >= len) {
842 			char *temp = (char *)realloc(buf, len + 64);
843 			if (temp == NULL) {
844 				// Out of memory, however existing buffer remains allocated.
845 				cnt = ENOMEM;
846 				break;
847 			}
848 			len += 64;
849 			buf = temp;
850 		}
851 
852 		// Read the next character, or end of file, or IO error.
853 		if ((c = fgetc(file)) == EOF) {
854 			if (ferror (file)) {
855 				cnt = errno;
856 				if (cnt >= 0)
857 					cnt = -1; // Error codes must be negative.
858 			} else {
859 				// Really is end of file.  Also make it end of line if there is
860 				// some text already read in.  If the first thing read was EOF,
861 				// just return an empty string.
862 				if (cnt > 0) {
863 					buf[cnt++] = '\n';
864 					if (buf[cnt-2] == '\r') {
865 						buf[cnt-2] = '\n';
866 						--cnt;
867 					}
868 				}
869 			}
870 			break;
871 		}
872 
873 		buf[cnt++] = c;
874 
875 		if (c == '\n') {
876 			// Convert CRLF end of line to just a LF.  Do it before folding, in
877 			// case we don't need to fold.
878 			if (cnt >= 2 && buf[cnt-2] == '\r') {
879 				buf[cnt-2] = '\n';
880 				--cnt;
881 			}
882 			// If the current line is empty then return it (so that empty lines
883 			// don't disappear if the next line starts with a space).
884 			if (cnt <= 1)
885 				break;
886 			// Fold if first character on the next line is whitespace.
887 			c = fgetc(file); // Note it's OK to read EOF and ungetc it too.
888 			if (c == ' ' || c == '\t')
889 				buf[cnt-1] = c; // Replace \n with the white space character.
890 			else {
891 				// Not folding, we finished reading a line; break out of the loop
892 				ungetc(c,file);
893 				break;
894 			}
895 		}
896 	}
897 
898 	if (buf != NULL && cnt >= 0)
899 		buf[cnt] = '\0';
900 
901 	if (buffer)
902 		*buffer = buf;
903 	else if (buf)
904 		free(buf);
905 
906 	if (buflen)
907 		*buflen = len;
908 
909 	return cnt;
910 }
911 
912 
913 ssize_t
914 readfoldedline(BPositionIO &in, char **buffer, size_t *buflen)
915 {
916 	ssize_t len = buflen && *buflen ? *buflen : 0;
917 	char * buf = buffer && *buffer ? *buffer : NULL;
918 	ssize_t cnt = 0; // Number of characters currently in the buffer.
919 	char c;
920 	status_t errorCode;
921 
922 	while (true) {
923 		// Make sure there is space in the buffer for two more characters (one
924 		// for the next character, and one for the end of string NUL byte).
925 		if (buf == NULL || cnt + 2 >= len) {
926 			char *temp = (char *)realloc(buf, len + 64);
927 			if (temp == NULL) {
928 				// Out of memory, however existing buffer remains allocated.
929 				cnt = ENOMEM;
930 				break;
931 			}
932 			len += 64;
933 			buf = temp;
934 		}
935 
936 		errorCode = in.Read (&c,1); // A really slow way of reading - unbuffered.
937 		if (errorCode != 1) {
938 			if (errorCode < 0) {
939 				cnt = errorCode; // IO error encountered, just return the code.
940 			} else {
941 				// Really is end of file.  Also make it end of line if there is
942 				// some text already read in.  If the first thing read was EOF,
943 				// just return an empty string.
944 				if (cnt > 0) {
945 					buf[cnt++] = '\n';
946 					if (buf[cnt-2] == '\r') {
947 						buf[cnt-2] = '\n';
948 						--cnt;
949 					}
950 				}
951 			}
952 			break;
953 		}
954 
955 		buf[cnt++] = c;
956 
957 		if (c == '\n') {
958 			// Convert CRLF end of line to just a LF.  Do it before folding, in
959 			// case we don't need to fold.
960 			if (cnt >= 2 && buf[cnt-2] == '\r') {
961 				buf[cnt-2] = '\n';
962 				--cnt;
963 			}
964 			// If the current line is empty then return it (so that empty lines
965 			// don't disappear if the next line starts with a space).
966 			if (cnt <= 1)
967 				break;
968 			// if first character on the next line is whitespace, fold lines
969 			errorCode = in.Read(&c,1);
970 			if (errorCode == 1) {
971 				if (c == ' ' || c == '\t')
972 					buf[cnt-1] = c; // Replace \n with the white space character.
973 				else {
974 					// Not folding, we finished reading a whole line.
975 					in.Seek(-1,SEEK_CUR); // Undo the look-ahead character read.
976 					break;
977 				}
978 			} else if (errorCode < 0) {
979 				cnt = errorCode;
980 				break;
981 			} else // No next line; at the end of the file.  Return the line.
982 				break;
983 		}
984 	}
985 
986 	if (buf != NULL && cnt >= 0)
987 		buf[cnt] = '\0';
988 
989 	if (buffer)
990 		*buffer = buf;
991 	else if (buf)
992 		free(buf);
993 
994 	if (buflen)
995 		*buflen = len;
996 
997 	return cnt;
998 }
999 
1000 
1001 ssize_t
1002 nextfoldedline(const char** header, char **buffer, size_t *buflen)
1003 {
1004 	ssize_t len = buflen && *buflen ? *buflen : 0;
1005 	char * buf = buffer && *buffer ? *buffer : NULL;
1006 	ssize_t cnt = 0; // Number of characters currently in the buffer.
1007 	char c;
1008 
1009 	while (true)
1010 	{
1011 		// Make sure there is space in the buffer for two more characters (one
1012 		// for the next character, and one for the end of string NUL byte).
1013 		if (buf == NULL || cnt + 2 >= len)
1014 		{
1015 			char *temp = (char *)realloc(buf, len + 64);
1016 			if (temp == NULL) {
1017 				// Out of memory, however existing buffer remains allocated.
1018 				cnt = ENOMEM;
1019 				break;
1020 			}
1021 			len += 64;
1022 			buf = temp;
1023 		}
1024 
1025 		// Read the next character, or end of file.
1026 		if ((c = *(*header)++) == 0) {
1027 			// End of file.  Also make it end of line if there is some text
1028 			// already read in.  If the first thing read was EOF, just return
1029 			// an empty string.
1030 			if (cnt > 0) {
1031 				buf[cnt++] = '\n';
1032 				if (buf[cnt-2] == '\r') {
1033 					buf[cnt-2] = '\n';
1034 					--cnt;
1035 				}
1036 			}
1037 			break;
1038 		}
1039 
1040 		buf[cnt++] = c;
1041 
1042 		if (c == '\n') {
1043 			// Convert CRLF end of line to just a LF.  Do it before folding, in
1044 			// case we don't need to fold.
1045 			if (cnt >= 2 && buf[cnt-2] == '\r') {
1046 				buf[cnt-2] = '\n';
1047 				--cnt;
1048 			}
1049 			// If the current line is empty then return it (so that empty lines
1050 			// don't disappear if the next line starts with a space).
1051 			if (cnt <= 1)
1052 				break;
1053 			// if first character on the next line is whitespace, fold lines
1054 			c = *(*header)++;
1055 			if (c == ' ' || c == '\t')
1056 				buf[cnt-1] = c; // Replace \n with the white space character.
1057 			else {
1058 				// Not folding, we finished reading a line; break out of the loop
1059 				(*header)--; // Undo read of the non-whitespace.
1060 				break;
1061 			}
1062 		}
1063 	}
1064 
1065 
1066 	if (buf != NULL && cnt >= 0)
1067 		buf[cnt] = '\0';
1068 
1069 	if (buffer)
1070 		*buffer = buf;
1071 	else if (buf)
1072 		free(buf);
1073 
1074 	if (buflen)
1075 		*buflen = len;
1076 
1077 	return cnt;
1078 }
1079 
1080 
1081 void
1082 trim_white_space(BString &string)
1083 {
1084 	int32 i;
1085 	int32 length = string.Length();
1086 	char *buffer = string.LockBuffer(length + 1);
1087 
1088 	while (length > 0 && isspace(buffer[length - 1]))
1089 		length--;
1090 	buffer[length] = '\0';
1091 
1092 	for (i = 0; buffer[i] && isspace(buffer[i]); i++) {}
1093 	if (i != 0) {
1094 		length -= i;
1095 		memmove(buffer,buffer + i,length + 1);
1096 	}
1097 	string.UnlockBuffer(length);
1098 }
1099 
1100 
1101 /*!	Tries to return a human-readable name from the specified
1102 	header parameter (should be from "To:" or "From:").
1103 	Tries to return the name rather than the eMail address.
1104 */
1105 void
1106 extract_address_name(BString &header)
1107 {
1108 	BString name;
1109 	const char *start = header.String();
1110 	const char *stop = start + strlen (start);
1111 
1112 	// Find a string S in the header (email foo) that matches:
1113 	//   Old style name in brackets: foo@bar.com (S)
1114 	//   New style quotes: "S" <foo@bar.com>
1115 	//   New style no quotes if nothing else found: S <foo@bar.com>
1116 	//   If nothing else found then use the whole thing: S
1117 
1118 	for (int i = 0; i <= 3; i++) {
1119 		// Set p1 to the first letter in the name and p2 to just past the last
1120 		// letter in the name.  p2 stays NULL if a name wasn't found in this
1121 		// pass.
1122 		const char *p1 = NULL, *p2 = NULL;
1123 
1124 		switch (i) {
1125 			case 0: // foo@bar.com (S)
1126 				if ((p1 = strchr(start,'(')) != NULL) {
1127 					p1++; // Advance to first letter in the name.
1128 					size_t nest = 1; // Handle nested brackets.
1129 					for (p2 = p1; p2 < stop; ++p2)
1130 					{
1131 						if (*p2 == ')')
1132 							--nest;
1133 						else if (*p2 == '(')
1134 							++nest;
1135 						if (nest <= 0)
1136 							break;
1137 					}
1138 					if (nest != 0)
1139 						p2 = NULL; // False alarm, no terminating bracket.
1140 				}
1141 				break;
1142 			case 1: // "S" <foo@bar.com>
1143 				if ((p1 = strchr(start, '\"')) != NULL)
1144 					p2 = strchr(++p1, '\"');
1145 				break;
1146 			case 2: // S <foo@bar.com>
1147 				p1 = start;
1148 				if (name.Length() == 0)
1149 					p2 = strchr(start, '<');
1150 				break;
1151 			case 3: // S
1152 				p1 = start;
1153 				if (name.Length() == 0)
1154 					p2 = stop;
1155 				break;
1156 		}
1157 
1158 		// Remove leading and trailing space-like characters and save the
1159 		// result if it is longer than any other likely names found.
1160 		if (p2 != NULL) {
1161 			while (p1 < p2 && (isspace (*p1)))
1162 				++p1;
1163 
1164 			while (p1 < p2 && (isspace (p2[-1])))
1165 				--p2;
1166 
1167 			int newLength = p2 - p1;
1168 			if (name.Length() < newLength)
1169 				name.SetTo(p1, newLength);
1170 		}
1171 	}
1172 
1173 	int32 lessIndex = name.FindFirst('<');
1174 	int32 greaterIndex = name.FindLast('>');
1175 
1176 	if (lessIndex == 0) {
1177 		// Have an address of the form <address> and nothing else, so remove
1178 		// the greater and less than signs, if any.
1179 		if (greaterIndex > 0)
1180 			name.Remove(greaterIndex, 1);
1181 		name.Remove(lessIndex, 1);
1182 	} else if (lessIndex > 0 && lessIndex < greaterIndex) {
1183 		// Yahoo stupidly inserts the e-mail address into the name string, so
1184 		// this bit of code fixes: "Joe <joe@yahoo.com>" <joe@yahoo.com>
1185 		name.Remove(lessIndex, greaterIndex - lessIndex + 1);
1186 	}
1187 
1188 	trim_white_space(name);
1189 	header = name;
1190 }
1191 
1192 
1193 /*!	Given a subject in a BString, remove the extraneous RE: re: and other stuff
1194 	to get down to the core subject string, which should be identical for all
1195 	messages posted about a topic.  The input string is modified in place to
1196 	become the output core subject string.
1197 */
1198 void
1199 SubjectToThread (BString &string)
1200 {
1201 // a regex that matches a non-ASCII UTF8 character:
1202 #define U8C \
1203 	"[\302-\337][\200-\277]" \
1204 	"|\340[\302-\337][\200-\277]" \
1205 	"|[\341-\357][\200-\277][\200-\277]" \
1206 	"|\360[\220-\277][\200-\277][\200-\277]" \
1207 	"|[\361-\367][\200-\277][\200-\277][\200-\277]" \
1208 	"|\370[\210-\277][\200-\277][\200-\277][\200-\277]" \
1209 	"|[\371-\373][\200-\277][\200-\277][\200-\277][\200-\277]" \
1210 	"|\374[\204-\277][\200-\277][\200-\277][\200-\277][\200-\277]" \
1211 	"|\375[\200-\277][\200-\277][\200-\277][\200-\277][\200-\277]"
1212 
1213 #define PATTERN \
1214 	"^ +" \
1215 	"|^(\\[[^]]*\\])(\\<|  +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \
1216 	"|^(  +| *(\\<(\\w|" U8C "){2,3} *(\\[[^\\]]*\\])? *:)+ *)" \
1217 	"| *\\(fwd\\) *$"
1218 
1219 	if (gRebuf == NULL && atomic_add(&gLocker, 1) == 0) {
1220 		// the idea is to compile the regexp once to speed up testing
1221 
1222 		for (int i=0; i<256; ++i) gTranslation[i]=i;
1223 		for (int i='a'; i<='z'; ++i) gTranslation[i]=toupper(i);
1224 
1225 		gRe.translate = gTranslation;
1226 		gRe.regs_allocated = REGS_FIXED;
1227 		re_syntax_options = RE_SYNTAX_POSIX_EXTENDED;
1228 
1229 		const char *pattern = PATTERN;
1230 		// count subexpressions in PATTERN
1231 		for (unsigned int i=0; pattern[i] != 0; ++i)
1232 		{
1233 			if (pattern[i] == '\\')
1234 				++i;
1235 			else if (pattern[i] == '(')
1236 				++gNsub;
1237 		}
1238 
1239 		const char *err = re_compile_pattern(pattern,strlen(pattern),&gRe);
1240 		if (err == NULL)
1241 			gRebuf = &gRe;
1242 		else
1243 			fprintf(stderr, "Failed to compile the regex: %s\n", err);
1244 	} else {
1245 		int32 tries = 200;
1246 		while (gRebuf == NULL && tries-- > 0)
1247 			snooze(10000);
1248 	}
1249 
1250 	if (gRebuf) {
1251 		struct re_registers regs;
1252 		// can't be static if this function is to be thread-safe
1253 
1254 		regs.num_regs = gNsub;
1255 		regs.start = (regoff_t*)malloc(gNsub*sizeof(regoff_t));
1256 		regs.end = (regoff_t*)malloc(gNsub*sizeof(regoff_t));
1257 
1258 		for (int start = 0; (start = re_search(gRebuf, string.String(),
1259 				string.Length(), 0, string.Length(), &regs)) >= 0;) {
1260 			//
1261 			// we found something
1262 			//
1263 
1264 			// don't delete [bemaildaemon]...
1265 			if (start == regs.start[1])
1266 				start = regs.start[2];
1267 
1268 			string.Remove(start,regs.end[0]-start);
1269 			if (start)
1270 				string.Insert(' ',1,start);
1271 
1272 			// TODO: for some subjects this results in an endless loop, check
1273 			// why this happen.
1274 			if (regs.end[0] - start <= 1)
1275 				break;
1276 		}
1277 
1278 		free(regs.start);
1279 		free(regs.end);
1280 	}
1281 
1282 	// Finally remove leading and trailing space.  Some software, like
1283 	// tm-edit 1.8, appends a space to the subject, which would break
1284 	// threading if we left it in.
1285 	trim_white_space(string);
1286 }
1287 
1288 
1289 /*!	Converts a date to a time.  Handles numeric time zones too, unlike
1290 	parsedate().  Returns -1 if it fails.
1291 */
1292 time_t
1293 ParseDateWithTimeZone(const char *DateString)
1294 {
1295 	time_t currentTime;
1296 	time_t dateAsTime;
1297 	char tempDateString[80];
1298 	char tempZoneString[6];
1299 	time_t zoneDeltaTime;
1300 	int zoneIndex;
1301 	char *zonePntr;
1302 
1303 	// See if we can remove the time zone portion.  parsedate understands time
1304 	// zone 3 letter names, but doesn't understand the numeric +9999 time zone
1305 	// format.  To do: see if a newer parsedate exists.
1306 
1307 	strncpy (tempDateString, DateString, sizeof (tempDateString));
1308 	tempDateString[sizeof (tempDateString) - 1] = 0;
1309 
1310 	// Remove trailing spaces.
1311 	zonePntr = tempDateString + strlen (tempDateString) - 1;
1312 	while (zonePntr >= tempDateString && isspace (*zonePntr))
1313 		*zonePntr-- = 0;
1314 	if (zonePntr < tempDateString)
1315 		return -1; // Empty string.
1316 
1317 	// Remove the trailing time zone in round brackets, like in
1318 	// Fri, 22 Feb 2002 15:22:42 EST (-0500)
1319 	// Thu, 25 Apr 1996 11:44:19 -0400 (EDT)
1320 	if (tempDateString[strlen(tempDateString)-1] == ')')
1321 	{
1322 		zonePntr = strrchr (tempDateString, '(');
1323 		if (zonePntr != NULL)
1324 		{
1325 			*zonePntr-- = 0; // Zap the '(', then remove trailing spaces.
1326 			while (zonePntr >= tempDateString && isspace (*zonePntr))
1327 				*zonePntr-- = 0;
1328 			if (zonePntr < tempDateString)
1329 				return -1; // Empty string.
1330 		}
1331 	}
1332 
1333 	// Look for a numeric time zone like  Tue, 30 Dec 2003 05:01:40 +0000
1334 	for (zoneIndex = strlen (tempDateString); zoneIndex >= 0; zoneIndex--)
1335 	{
1336 		zonePntr = tempDateString + zoneIndex;
1337 		if (zonePntr[0] == '+' || zonePntr[0] == '-')
1338 		{
1339 			if (zonePntr[1] >= '0' && zonePntr[1] <= '9' &&
1340 				zonePntr[2] >= '0' && zonePntr[2] <= '9' &&
1341 				zonePntr[3] >= '0' && zonePntr[3] <= '9' &&
1342 				zonePntr[4] >= '0' && zonePntr[4] <= '9')
1343 				break;
1344 		}
1345 	}
1346 	if (zoneIndex >= 0)
1347 	{
1348 		// Remove the zone from the date string and any following time zone
1349 		// letter codes.  Also put in GMT so that the date gets parsed as GMT.
1350 		memcpy (tempZoneString, zonePntr, 5);
1351 		tempZoneString [5] = 0;
1352 		strcpy (zonePntr, "GMT");
1353 	}
1354 	else // No numeric time zone found.
1355 		strcpy (tempZoneString, "+0000");
1356 
1357 	time (&currentTime);
1358 	dateAsTime = parsedate (tempDateString, currentTime);
1359 	if (dateAsTime == (time_t) -1)
1360 		return -1; // Failure.
1361 
1362 	zoneDeltaTime = 60 * atol (tempZoneString + 3); // Get the last two digits - minutes.
1363 	tempZoneString[3] = 0;
1364 	zoneDeltaTime += atol (tempZoneString + 1) * 60 * 60; // Get the first two digits - hours.
1365 	if (tempZoneString[0] == '+')
1366 		zoneDeltaTime = 0 - zoneDeltaTime;
1367 	dateAsTime += zoneDeltaTime;
1368 
1369 	return dateAsTime;
1370 }
1371 
1372 
1373 /*! Parses a mail header and fills the headers BMessage
1374 */
1375 status_t
1376 parse_header(BMessage &headers, BPositionIO &input)
1377 {
1378 	char *buffer = NULL;
1379 	size_t bufferSize = 0;
1380 	int32 length;
1381 
1382 	while ((length = readfoldedline(input, &buffer, &bufferSize)) >= 2) {
1383 		--length;
1384 			// Don't include the \n at the end of the buffer.
1385 
1386 		// convert to UTF-8 and null-terminate the buffer
1387 		length = rfc2047_to_utf8(&buffer, &bufferSize, length);
1388 		buffer[length] = '\0';
1389 
1390 		const char *delimiter = strstr(buffer, ":");
1391 		if (delimiter == NULL)
1392 			continue;
1393 
1394 		BString header(buffer, delimiter - buffer);
1395 		header.CapitalizeEachWord();
1396 			// unified case for later fetch
1397 
1398 		delimiter++; // Skip the colon.
1399 		// Skip over leading white space and tabs.
1400 		// TODO: (comments in brackets).
1401 		while (isspace(*delimiter))
1402 			delimiter++;
1403 
1404 		// TODO: implement joining of multiple header tags (i.e. multiple "Cc:"s)
1405 		headers.AddString(header.String(), delimiter);
1406 	}
1407 	free(buffer);
1408 
1409 	return B_OK;
1410 }
1411 
1412 
1413 status_t
1414 extract_from_header(const BString& header, const BString& field,
1415 	BString& target)
1416 {
1417 	int32 headerLength = header.Length();
1418 	int32 fieldEndPos = 0;
1419 	while (true) {
1420 		int32 pos = header.IFindFirst(field, fieldEndPos);
1421 		if (pos < 0)
1422 			return B_BAD_VALUE;
1423 		fieldEndPos = pos + field.Length();
1424 
1425 		if (pos != 0 && header.ByteAt(pos - 1) != '\n')
1426 			continue;
1427 		if (header.ByteAt(fieldEndPos) == ':')
1428 			break;
1429 	}
1430 	fieldEndPos++;
1431 
1432 	int32 crPos = fieldEndPos;
1433 	while (true) {
1434 		fieldEndPos = crPos;
1435 		crPos = header.FindFirst('\n', crPos);
1436 		if (crPos < 0)
1437 			crPos = headerLength;
1438 		BString temp;
1439 		header.CopyInto(temp, fieldEndPos, crPos - fieldEndPos);
1440 		if (header.ByteAt(crPos - 1) == '\r') {
1441 			temp.Truncate(temp.Length() - 1);
1442 			temp += " ";
1443 		}
1444 		target += temp;
1445 		crPos++;
1446 		if (crPos >= headerLength)
1447 			break;
1448 		char nextByte = header.ByteAt(crPos);
1449 		if (nextByte != ' ' && nextByte != '\t')
1450 			break;
1451 		crPos++;
1452 	}
1453 
1454 	size_t bufferSize = target.Length();
1455 	char* buffer = target.LockBuffer(bufferSize);
1456 	size_t length = rfc2047_to_utf8(&buffer, &bufferSize, bufferSize);
1457 	target.UnlockBuffer(length);
1458 
1459 	trim_white_space(target);
1460 
1461 	return B_OK;
1462 }
1463 
1464 
1465 void
1466 extract_address(BString &address)
1467 {
1468 	const char *string = address.String();
1469 	int32 first;
1470 
1471 	// first, remove all quoted text
1472 
1473 	if ((first = address.FindFirst('"')) >= 0) {
1474 		int32 last = first + 1;
1475 		while (string[last] && string[last] != '"')
1476 			last++;
1477 
1478 		if (string[last] == '"')
1479 			address.Remove(first, last + 1 - first);
1480 	}
1481 
1482 	// try to extract the address now
1483 
1484 	if ((first = address.FindFirst('<')) >= 0) {
1485 		// the world likes us and we can just get the address the easy way...
1486 		int32 last = address.FindFirst('>');
1487 		if (last >= 0) {
1488 			address.Truncate(last);
1489 			address.Remove(0, first + 1);
1490 
1491 			return;
1492 		}
1493 	}
1494 
1495 	// then, see if there is anything in parenthesis to throw away
1496 
1497 	if ((first = address.FindFirst('(')) >= 0) {
1498 		int32 last = first + 1;
1499 		while (string[last] && string[last] != ')')
1500 			last++;
1501 
1502 		if (string[last] == ')')
1503 			address.Remove(first, last + 1 - first);
1504 	}
1505 
1506 	// now, there shouldn't be much else left
1507 
1508 	trim_white_space(address);
1509 }
1510 
1511 
1512 void
1513 get_address_list(BList &list, const char *string,
1514 	void (*cleanupFunc)(BString &))
1515 {
1516 	if (string == NULL || !string[0])
1517 		return;
1518 
1519 	const char *start = string;
1520 
1521 	while (true) {
1522 		if (string[0] == '"') {
1523 			const char *quoteEnd = ++string;
1524 
1525 			while (quoteEnd[0] && quoteEnd[0] != '"')
1526 				quoteEnd++;
1527 
1528 			if (!quoteEnd[0])	// string exceeds line!
1529 				quoteEnd = string;
1530 
1531 			string = quoteEnd + 1;
1532 		}
1533 
1534 		if (string[0] == ',' || string[0] == '\0') {
1535 			BString address(start, string - start);
1536 			trim_white_space(address);
1537 
1538 			if (cleanupFunc)
1539 				cleanupFunc(address);
1540 
1541 			list.AddItem(strdup(address.String()));
1542 
1543 			start = string + 1;
1544 		}
1545 
1546 		if (!string[0])
1547 			break;
1548 
1549 		string++;
1550 	}
1551 }
1552 
1553 
1554 status_t
1555 CopyMailFolderAttributes(const char* targetPath)
1556 {
1557 	BPath path;
1558 	status_t status = find_directory(B_USER_SETTINGS_DIRECTORY, &path);
1559 	if (status != B_OK)
1560 		return status;
1561 
1562 	path.Append("Tracker");
1563 	path.Append("DefaultQueryTemplates");
1564 	path.Append("text_x-email");
1565 
1566 	BNode source(path.Path());
1567 	BNode target(targetPath);
1568 	return BPrivate::CopyAttributes(source, target);
1569 }
1570