xref: /haiku/src/kits/mail/mail_encoding.cpp (revision 23d878482ed22e55dad6d1fca1df7bea42eb157c)
1 /*
2  * Copyright 2011, Haiku, Inc. All rights reserved.
3  * Copyright 2001-2003 Dr. Zoidberg Enterprises. All rights reserved.
4  */
5 
6 
7 #include <ctype.h>
8 #include <string.h>
9 
10 #include <SupportDefs.h>
11 
12 #include <mail_encoding.h>
13 
14 
15 #define	DEC(c) (((c) - ' ') & 077)
16 
17 
18 static const char kBase64Alphabet[64] = {
19   'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
20   'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
21   'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
22   'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
23   '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
24   '+',
25   '/'
26  };
27 
28 static const char kHexAlphabet[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
29 	'8','9','A','B','C','D','E','F'};
30 
31 
32 ssize_t
33 encode(mail_encoding encoding, char *out, const char *in, off_t length,
34 	int headerMode)
35 {
36 	switch (encoding) {
37 		case base64:
38 			return encode_base64(out,in,length,headerMode);
39 		case quoted_printable:
40 			return encode_qp(out,in,length,headerMode);
41 		case seven_bit:
42 		case eight_bit:
43 		case no_encoding:
44 			memcpy(out,in,length);
45 			return length;
46 		case uuencode:
47 		default:
48 			return -1;
49 	}
50 
51 	return -1;
52 }
53 
54 
55 ssize_t
56 decode(mail_encoding encoding, char *out, const char *in, off_t length,
57 	int underscoreIsSpace)
58 {
59 	switch (encoding) {
60 		case base64:
61 			return decode_base64(out, in, length);
62 		case uuencode:
63 			return uu_decode(out, in, length);
64 		case seven_bit:
65 		case eight_bit:
66 		case no_encoding:
67 			memcpy(out, in, length);
68 			return length;
69 		case quoted_printable:
70 			return decode_qp(out, in, length, underscoreIsSpace);
71 		default:
72 			break;
73 	}
74 
75 	return -1;
76 }
77 
78 
79 ssize_t
80 max_encoded_length(mail_encoding encoding, off_t length)
81 {
82 	switch (encoding) {
83 		case base64:
84 		{
85 			double result = length * 1.33333333333333;
86 			result += (result / BASE64_LINELENGTH) * 2 + 20;
87 			return (ssize_t)(result);
88 		}
89 		case quoted_printable:
90 			return length * 3;
91 		case seven_bit:
92 		case eight_bit:
93 		case no_encoding:
94 			return length;
95 		case uuencode:
96 		default:
97 			return -1;
98 	}
99 
100 	return -1;
101 }
102 
103 
104 mail_encoding
105 encoding_for_cte(const char *cte)
106 {
107 	if (cte == NULL)
108 		return no_encoding;
109 
110 	if (strcasecmp(cte,"uuencode") == 0)
111 		return uuencode;
112 	if (strcasecmp(cte,"base64") == 0)
113 		return base64;
114 	if (strcasecmp(cte,"quoted-printable") == 0)
115 		return quoted_printable;
116 	if (strcasecmp(cte,"7bit") == 0)
117 		return seven_bit;
118 	if (strcasecmp(cte,"8bit") == 0)
119 		return eight_bit;
120 
121 	return no_encoding;
122 }
123 
124 
125 ssize_t
126 encode_base64(char *out, const char *in, off_t length, int headerMode)
127 {
128 	uint32 concat;
129 	int i = 0;
130 	int k = 0;
131 	int lineLength = 4;
132 		// Stop before it actually gets too long
133 
134 	while (i < length) {
135 		concat = ((in[i] & 0xff) << 16);
136 
137 		if ((i+1) < length)
138 			concat |= ((in[i+1] & 0xff) << 8);
139 		if ((i+2) < length)
140 			concat |= (in[i+2] & 0xff);
141 
142 		i += 3;
143 
144 		out[k++] = kBase64Alphabet[(concat >> 18) & 63];
145 		out[k++] = kBase64Alphabet[(concat >> 12) & 63];
146 		out[k++] = kBase64Alphabet[(concat >> 6) & 63];
147 		out[k++] = kBase64Alphabet[concat & 63];
148 
149 		if (i >= length) {
150 			int v;
151 			for (v = 0; v <= (i - length); v++)
152 				out[k-v] = '=';
153 		}
154 
155 		lineLength += 4;
156 
157 		// No line breaks in header mode, since the text is part of a Subject:
158 		// line or some other single header line.  The header code will do word
159 		// wrapping separately from this encoding stuff.
160 		if (!headerMode && lineLength > BASE64_LINELENGTH) {
161 			out[k++] = '\r';
162 			out[k++] = '\n';
163 
164 			lineLength = 4;
165 		}
166 	}
167 
168 	return k;
169 }
170 
171 
172 ssize_t
173 decode_base64(char *out, const char *in, off_t length)
174 {
175 	uint32 concat, value;
176 	int lastOutLine = 0;
177 	int i, j;
178 	int outIndex = 0;
179 
180 	for (i = 0; i < length; i += 4) {
181 		concat = 0;
182 
183 		for (j = 0; j < 4 && (i + j) < length; j++) {
184 			value = in[i + j];
185 
186 			if (value == '\n' || value == '\r') {
187 				// jump over line breaks
188 				lastOutLine = outIndex;
189 				i++;
190 				j--;
191 				continue;
192 			}
193 
194 			if ((value >= 'A') && (value <= 'Z'))
195 				value -= 'A';
196 			else if ((value >= 'a') && (value <= 'z'))
197 				value = value - 'a' + 26;
198 			else if ((value >= '0') && (value <= '9'))
199 				value = value - '0' + 52;
200 			else if (value == '+')
201 				value = 62;
202 			else if (value == '/')
203 				value = 63;
204 			else if (value == '=')
205 				break;
206 			else {
207 				// there is an invalid character in this line - we will
208 				// ignore the whole line and go to the next
209 				outIndex = lastOutLine;
210 				while (i < length && in[i] != '\n' && in[i] != '\r')
211 					i++;
212 				concat = 0;
213 			}
214 
215 			value = value << ((3-j)*6);
216 
217 			concat |= value;
218 		}
219 
220 		if (j > 1)
221 			out[outIndex++] = (concat & 0x00ff0000) >> 16;
222 		if (j > 2)
223 			out[outIndex++] = (concat & 0x0000ff00) >> 8;
224 		if (j > 3)
225 			out[outIndex++] = (concat & 0x000000ff);
226 	}
227 
228 	return outIndex;
229 }
230 
231 
232 ssize_t
233 decode_qp(char *out, const char *in, off_t length, int underscoreIsSpace)
234 {
235 	// decode Quoted Printable
236 	char *dataout = out;
237 	const char *datain = in, *dataend = in + length;
238 
239 	while (datain < dataend) {
240 		if (*datain == '=' && dataend - datain > 2) {
241 			int a = toupper(datain[1]);
242 			a -= a >= '0' && a <= '9' ? '0' : (a >= 'A' && a <= 'F'
243 				? 'A' - 10 : a + 1);
244 
245 			int b = toupper(datain[2]);
246 			b -= b >= '0' && b <= '9' ? '0' : (b >= 'A' && b <= 'F'
247 				? 'A' - 10 : b + 1);
248 
249 			if (a >= 0 && b >= 0) {
250 				*dataout++ = (a << 4) + b;
251 				datain += 3;
252 				continue;
253 			} else if (datain[1] == '\r' && datain[2] == '\n') {
254 				// strip =<CR><NL>
255 				datain += 3;
256 				continue;
257 			}
258 		} else if (*datain == '_' && underscoreIsSpace) {
259 			*dataout++ = ' ';
260 			++datain;
261 			continue;
262 		}
263 
264 		*dataout++ = *datain++;
265 	}
266 
267 	*dataout = '\0';
268 	return dataout - out;
269 }
270 
271 
272 ssize_t
273 encode_qp(char *out, const char *in, off_t length, int headerMode)
274 {
275 	int g = 0, i = 0;
276 
277 	for (; i < length; i++) {
278 		if (((uint8 *)(in))[i] > 127 || in[i] == '?' || in[i] == '='
279 			|| in[i] == '_'
280 			// Also encode the letter F in "From " at the start of the line,
281 			// which Unix systems use to mark the start of messages in their
282 			// mbox files.
283 			|| (in[i] == 'F' && i + 5 <= length && (i == 0 || in[i - 1] == '\n')
284 				&& in[i + 1] == 'r' && in[i + 2] == 'o' && in[i + 3] == 'm'
285 				&& in[i + 4] == ' ')) {
286 			out[g++] = '=';
287 			out[g++] = kHexAlphabet[(in[i] >> 4) & 0x0f];
288 			out[g++] = kHexAlphabet[in[i] & 0x0f];
289 		} else if (headerMode && (in[i] == ' ' || in[i] == '\t')) {
290 			out[g++] = '_';
291 		} else if (headerMode && in[i] >= 0 && in[i] < 32) {
292 			// Control codes in headers need to be sanitized, otherwise certain
293 			// Japanese ISPs mangle the headers badly.  But they don't mangle
294 			// the body.
295 			out[g++] = '=';
296 			out[g++] = kHexAlphabet[(in[i] >> 4) & 0x0f];
297 			out[g++] = kHexAlphabet[in[i] & 0x0f];
298 		} else
299 			out[g++] = in[i];
300 	}
301 
302 	return g;
303 }
304 
305 
306 ssize_t
307 uu_decode(char *out, const char *in, off_t length)
308 {
309 	long n;
310 	uint8 *p, *inBuffer = (uint8 *)in;
311 	uint8 *outBuffer = (uint8 *)out;
312 
313 	inBuffer = (uint8 *)strstr((char *)inBuffer, "begin");
314 	goto enterLoop;
315 
316 	while ((inBuffer - (uint8 *)in) <= length
317 		&& strncmp((char *)inBuffer, "end", 3)) {
318 		p = inBuffer;
319 		n = DEC(inBuffer[0]);
320 
321 		for (++inBuffer; n > 0; inBuffer += 4, n -= 3) {
322 			if (n >= 3) {
323 				*outBuffer++ = DEC(inBuffer[0]) << 2 | DEC (inBuffer[1]) >> 4;
324 				*outBuffer++ = DEC(inBuffer[1]) << 4 | DEC (inBuffer[2]) >> 2;
325 				*outBuffer++ = DEC(inBuffer[2]) << 6 | DEC (inBuffer[3]);
326 			} else {
327 				if (n >= 1) {
328 					*outBuffer++ = DEC(inBuffer[0]) << 2
329 						| DEC (inBuffer[1]) >> 4;
330 				}
331 				if (n >= 2) {
332 					*outBuffer++ = DEC(inBuffer[1]) << 4
333 						| DEC (inBuffer[2]) >> 2;
334 				}
335 			}
336 		}
337 		inBuffer = p;
338 
339 	enterLoop:
340 		while (inBuffer[0] != '\n' && inBuffer[0] != '\r' && inBuffer[0] != 0)
341 			inBuffer++;
342 		while (inBuffer[0] == '\n' || inBuffer[0] == '\r')
343 			inBuffer++;
344 	}
345 
346 	return (ssize_t)(outBuffer - (uint8 *)in);
347 }
348 
349