xref: /haiku/src/kits/mail/mail_encoding.cpp (revision 9fc69d1b0015d7048ec051e2c0faa8e073ce847e)
1 /*
2  * Copyright 2011, Haiku, Inc. All rights reserved.
3  * Copyright 2001-2003 Dr. Zoidberg Enterprises. All rights reserved.
4  */
5 
6 
7 #include <ctype.h>
8 #include <string.h>
9 
10 #include <SupportDefs.h>
11 
12 #include <mail_encoding.h>
13 
14 
15 #define	DEC(c) (((c) - ' ') & 077)
16 
17 
18 static const char kHexAlphabet[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
19 	'8','9','A','B','C','D','E','F'};
20 
21 
22 ssize_t
23 encode(mail_encoding encoding, char *out, const char *in, off_t length,
24 	int headerMode)
25 {
26 	switch (encoding) {
27 		case base64:
28 			return encode_base64(out,in,length,headerMode);
29 		case quoted_printable:
30 			return encode_qp(out,in,length,headerMode);
31 		case seven_bit:
32 		case eight_bit:
33 		case no_encoding:
34 			memcpy(out,in,length);
35 			return length;
36 		case uuencode:
37 		default:
38 			return -1;
39 	}
40 
41 	return -1;
42 }
43 
44 
45 ssize_t
46 decode(mail_encoding encoding, char *out, const char *in, off_t length,
47 	int underscoreIsSpace)
48 {
49 	switch (encoding) {
50 		case base64:
51 			return decode_base64(out, in, length);
52 		case uuencode:
53 			return uu_decode(out, in, length);
54 		case seven_bit:
55 		case eight_bit:
56 		case no_encoding:
57 			memcpy(out, in, length);
58 			return length;
59 		case quoted_printable:
60 			return decode_qp(out, in, length, underscoreIsSpace);
61 		default:
62 			break;
63 	}
64 
65 	return -1;
66 }
67 
68 
69 ssize_t
70 max_encoded_length(mail_encoding encoding, off_t length)
71 {
72 	switch (encoding) {
73 		case base64:
74 		{
75 			double result = length * 1.33333333333333;
76 			result += (result / BASE64_LINELENGTH) * 2 + 20;
77 			return (ssize_t)(result);
78 		}
79 		case quoted_printable:
80 			return length * 3;
81 		case seven_bit:
82 		case eight_bit:
83 		case no_encoding:
84 			return length;
85 		case uuencode:
86 		default:
87 			return -1;
88 	}
89 
90 	return -1;
91 }
92 
93 
94 mail_encoding
95 encoding_for_cte(const char *cte)
96 {
97 	if (cte == NULL)
98 		return no_encoding;
99 
100 	if (strcasecmp(cte,"uuencode") == 0)
101 		return uuencode;
102 	if (strcasecmp(cte,"base64") == 0)
103 		return base64;
104 	if (strcasecmp(cte,"quoted-printable") == 0)
105 		return quoted_printable;
106 	if (strcasecmp(cte,"7bit") == 0)
107 		return seven_bit;
108 	if (strcasecmp(cte,"8bit") == 0)
109 		return eight_bit;
110 
111 	return no_encoding;
112 }
113 
114 
115 ssize_t
116 decode_qp(char *out, const char *in, off_t length, int underscoreIsSpace)
117 {
118 	// decode Quoted Printable
119 	char *dataout = out;
120 	const char *datain = in, *dataend = in + length;
121 
122 	while (datain < dataend) {
123 		if (*datain == '=' && dataend - datain > 2) {
124 			int a = toupper(datain[1]);
125 			a -= a >= '0' && a <= '9' ? '0' : (a >= 'A' && a <= 'F'
126 				? 'A' - 10 : a + 1);
127 
128 			int b = toupper(datain[2]);
129 			b -= b >= '0' && b <= '9' ? '0' : (b >= 'A' && b <= 'F'
130 				? 'A' - 10 : b + 1);
131 
132 			if (a >= 0 && b >= 0) {
133 				*dataout++ = (a << 4) + b;
134 				datain += 3;
135 				continue;
136 			} else if (datain[1] == '\r' && datain[2] == '\n') {
137 				// strip =<CR><NL>
138 				datain += 3;
139 				continue;
140 			}
141 		} else if (*datain == '_' && underscoreIsSpace) {
142 			*dataout++ = ' ';
143 			++datain;
144 			continue;
145 		}
146 
147 		*dataout++ = *datain++;
148 	}
149 
150 	*dataout = '\0';
151 	return dataout - out;
152 }
153 
154 
155 ssize_t
156 encode_qp(char *out, const char *in, off_t length, int headerMode)
157 {
158 	int g = 0, i = 0;
159 
160 	for (; i < length; i++) {
161 		if (((uint8 *)(in))[i] > 127 || in[i] == '?' || in[i] == '='
162 			|| in[i] == '_'
163 			// Also encode the letter F in "From " at the start of the line,
164 			// which Unix systems use to mark the start of messages in their
165 			// mbox files.
166 			|| (in[i] == 'F' && i + 5 <= length && (i == 0 || in[i - 1] == '\n')
167 				&& in[i + 1] == 'r' && in[i + 2] == 'o' && in[i + 3] == 'm'
168 				&& in[i + 4] == ' ')) {
169 			out[g++] = '=';
170 			out[g++] = kHexAlphabet[(in[i] >> 4) & 0x0f];
171 			out[g++] = kHexAlphabet[in[i] & 0x0f];
172 		} else if (headerMode && (in[i] == ' ' || in[i] == '\t')) {
173 			out[g++] = '_';
174 		} else if (headerMode && in[i] >= 0 && in[i] < 32) {
175 			// Control codes in headers need to be sanitized, otherwise certain
176 			// Japanese ISPs mangle the headers badly.  But they don't mangle
177 			// the body.
178 			out[g++] = '=';
179 			out[g++] = kHexAlphabet[(in[i] >> 4) & 0x0f];
180 			out[g++] = kHexAlphabet[in[i] & 0x0f];
181 		} else
182 			out[g++] = in[i];
183 	}
184 
185 	return g;
186 }
187 
188 
189 ssize_t
190 uu_decode(char *out, const char *in, off_t length)
191 {
192 	long n;
193 	uint8 *p, *inBuffer = (uint8 *)in;
194 	uint8 *outBuffer = (uint8 *)out;
195 
196 	inBuffer = (uint8 *)strstr((char *)inBuffer, "begin");
197 	goto enterLoop;
198 
199 	while ((inBuffer - (uint8 *)in) <= length
200 		&& strncmp((char *)inBuffer, "end", 3)) {
201 		p = inBuffer;
202 		n = DEC(inBuffer[0]);
203 
204 		for (++inBuffer; n > 0; inBuffer += 4, n -= 3) {
205 			if (n >= 3) {
206 				*outBuffer++ = DEC(inBuffer[0]) << 2 | DEC (inBuffer[1]) >> 4;
207 				*outBuffer++ = DEC(inBuffer[1]) << 4 | DEC (inBuffer[2]) >> 2;
208 				*outBuffer++ = DEC(inBuffer[2]) << 6 | DEC (inBuffer[3]);
209 			} else {
210 				if (n >= 1) {
211 					*outBuffer++ = DEC(inBuffer[0]) << 2
212 						| DEC (inBuffer[1]) >> 4;
213 				}
214 				if (n >= 2) {
215 					*outBuffer++ = DEC(inBuffer[1]) << 4
216 						| DEC (inBuffer[2]) >> 2;
217 				}
218 			}
219 		}
220 		inBuffer = p;
221 
222 	enterLoop:
223 		while (inBuffer[0] != '\n' && inBuffer[0] != '\r' && inBuffer[0] != 0)
224 			inBuffer++;
225 		while (inBuffer[0] == '\n' || inBuffer[0] == '\r')
226 			inBuffer++;
227 	}
228 
229 	return (ssize_t)(outBuffer - (uint8 *)in);
230 }
231 
232