xref: /haiku/src/kits/mail/mail_encoding.cpp (revision 41660cabf4f1cabafed672078cf3ee6d96eee073)
1 /*
2  * Copyright 2011, Haiku, Inc. All rights reserved.
3  * Copyright 2001-2003 Dr. Zoidberg Enterprises. All rights reserved.
4  */
5 
6 
7 #include <ctype.h>
8 #include <string.h>
9 #include <strings.h>
10 
11 #include <SupportDefs.h>
12 
13 #include <mail_encoding.h>
14 
15 
16 #define	DEC(c) (((c) - ' ') & 077)
17 
18 
19 static const char kHexAlphabet[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
20 	'8','9','A','B','C','D','E','F'};
21 
22 
23 ssize_t
24 encode(mail_encoding encoding, char *out, const char *in, off_t length,
25 	int headerMode)
26 {
27 	switch (encoding) {
28 		case base64:
29 			return encode_base64(out,in,length,headerMode);
30 		case quoted_printable:
31 			return encode_qp(out,in,length,headerMode);
32 		case seven_bit:
33 		case eight_bit:
34 		case no_encoding:
35 			memcpy(out,in,length);
36 			return length;
37 		case uuencode:
38 		default:
39 			return -1;
40 	}
41 
42 	return -1;
43 }
44 
45 
46 ssize_t
47 decode(mail_encoding encoding, char *out, const char *in, off_t length,
48 	int underscoreIsSpace)
49 {
50 	switch (encoding) {
51 		case base64:
52 			return decode_base64(out, in, length);
53 		case uuencode:
54 			return uu_decode(out, in, length);
55 		case seven_bit:
56 		case eight_bit:
57 		case no_encoding:
58 			memcpy(out, in, length);
59 			return length;
60 		case quoted_printable:
61 			return decode_qp(out, in, length, underscoreIsSpace);
62 		default:
63 			break;
64 	}
65 
66 	return -1;
67 }
68 
69 
70 ssize_t
71 max_encoded_length(mail_encoding encoding, off_t length)
72 {
73 	switch (encoding) {
74 		case base64:
75 		{
76 			double result = length * 1.33333333333333;
77 			result += (result / BASE64_LINELENGTH) * 2 + 20;
78 			return (ssize_t)(result);
79 		}
80 		case quoted_printable:
81 			return length * 3;
82 		case seven_bit:
83 		case eight_bit:
84 		case no_encoding:
85 			return length;
86 		case uuencode:
87 		default:
88 			return -1;
89 	}
90 
91 	return -1;
92 }
93 
94 
95 mail_encoding
96 encoding_for_cte(const char *cte)
97 {
98 	if (cte == NULL)
99 		return no_encoding;
100 
101 	if (strcasecmp(cte,"uuencode") == 0)
102 		return uuencode;
103 	if (strcasecmp(cte,"base64") == 0)
104 		return base64;
105 	if (strcasecmp(cte,"quoted-printable") == 0)
106 		return quoted_printable;
107 	if (strcasecmp(cte,"7bit") == 0)
108 		return seven_bit;
109 	if (strcasecmp(cte,"8bit") == 0)
110 		return eight_bit;
111 
112 	return no_encoding;
113 }
114 
115 
116 ssize_t
117 decode_qp(char *out, const char *in, off_t length, int underscoreIsSpace)
118 {
119 	// decode Quoted Printable
120 	char *dataout = out;
121 	const char *datain = in, *dataend = in + length;
122 
123 	while (datain < dataend) {
124 		if (*datain == '=' && dataend - datain > 2) {
125 			int a = toupper(datain[1]);
126 			a -= a >= '0' && a <= '9' ? '0' : (a >= 'A' && a <= 'F'
127 				? 'A' - 10 : a + 1);
128 
129 			int b = toupper(datain[2]);
130 			b -= b >= '0' && b <= '9' ? '0' : (b >= 'A' && b <= 'F'
131 				? 'A' - 10 : b + 1);
132 
133 			if (a >= 0 && b >= 0) {
134 				*dataout++ = (a << 4) + b;
135 				datain += 3;
136 				continue;
137 			} else if (datain[1] == '\r' && datain[2] == '\n') {
138 				// strip =<CR><NL>
139 				datain += 3;
140 				continue;
141 			}
142 		} else if (*datain == '_' && underscoreIsSpace) {
143 			*dataout++ = ' ';
144 			++datain;
145 			continue;
146 		}
147 
148 		*dataout++ = *datain++;
149 	}
150 
151 	*dataout = '\0';
152 	return dataout - out;
153 }
154 
155 
156 ssize_t
157 encode_qp(char *out, const char *in, off_t length, int headerMode)
158 {
159 	int g = 0, i = 0;
160 
161 	for (; i < length; i++) {
162 		if (((uint8 *)(in))[i] > 127 || in[i] == '?' || in[i] == '='
163 			|| in[i] == '_'
164 			// Also encode the letter F in "From " at the start of the line,
165 			// which Unix systems use to mark the start of messages in their
166 			// mbox files.
167 			|| (in[i] == 'F' && i + 5 <= length && (i == 0 || in[i - 1] == '\n')
168 				&& in[i + 1] == 'r' && in[i + 2] == 'o' && in[i + 3] == 'm'
169 				&& in[i + 4] == ' ')) {
170 			out[g++] = '=';
171 			out[g++] = kHexAlphabet[(in[i] >> 4) & 0x0f];
172 			out[g++] = kHexAlphabet[in[i] & 0x0f];
173 		} else if (headerMode && (in[i] == ' ' || in[i] == '\t')) {
174 			out[g++] = '_';
175 		} else if (headerMode && in[i] >= 0 && in[i] < 32) {
176 			// Control codes in headers need to be sanitized, otherwise certain
177 			// Japanese ISPs mangle the headers badly.  But they don't mangle
178 			// the body.
179 			out[g++] = '=';
180 			out[g++] = kHexAlphabet[(in[i] >> 4) & 0x0f];
181 			out[g++] = kHexAlphabet[in[i] & 0x0f];
182 		} else
183 			out[g++] = in[i];
184 	}
185 
186 	return g;
187 }
188 
189 
190 ssize_t
191 uu_decode(char *out, const char *in, off_t length)
192 {
193 	long n;
194 	uint8 *p, *inBuffer = (uint8 *)in;
195 	uint8 *outBuffer = (uint8 *)out;
196 
197 	inBuffer = (uint8 *)strstr((char *)inBuffer, "begin");
198 	goto enterLoop;
199 
200 	while ((inBuffer - (uint8 *)in) <= length
201 		&& strncmp((char *)inBuffer, "end", 3)) {
202 		p = inBuffer;
203 		n = DEC(inBuffer[0]);
204 
205 		for (++inBuffer; n > 0; inBuffer += 4, n -= 3) {
206 			if (n >= 3) {
207 				*outBuffer++ = DEC(inBuffer[0]) << 2 | DEC (inBuffer[1]) >> 4;
208 				*outBuffer++ = DEC(inBuffer[1]) << 4 | DEC (inBuffer[2]) >> 2;
209 				*outBuffer++ = DEC(inBuffer[2]) << 6 | DEC (inBuffer[3]);
210 			} else {
211 				if (n >= 1) {
212 					*outBuffer++ = DEC(inBuffer[0]) << 2
213 						| DEC (inBuffer[1]) >> 4;
214 				}
215 				if (n >= 2) {
216 					*outBuffer++ = DEC(inBuffer[1]) << 4
217 						| DEC (inBuffer[2]) >> 2;
218 				}
219 			}
220 		}
221 		inBuffer = p;
222 
223 	enterLoop:
224 		while (inBuffer[0] != '\n' && inBuffer[0] != '\r' && inBuffer[0] != 0)
225 			inBuffer++;
226 		while (inBuffer[0] == '\n' || inBuffer[0] == '\r')
227 			inBuffer++;
228 	}
229 
230 	return (ssize_t)(outBuffer - (uint8 *)in);
231 }
232 
233