1 /*
2 * Copyright 2011, Haiku, Inc. All rights reserved.
3 * Copyright 2001-2003 Dr. Zoidberg Enterprises. All rights reserved.
4 */
5
6
7 #include <ctype.h>
8 #include <string.h>
9 #include <strings.h>
10
11 #include <SupportDefs.h>
12
13 #include <mail_encoding.h>
14
15
16 #define DEC(c) (((c) - ' ') & 077)
17
18
19 static const char kHexAlphabet[16] = {'0', '1', '2', '3', '4', '5', '6', '7',
20 '8','9','A','B','C','D','E','F'};
21
22
23 ssize_t
encode(mail_encoding encoding,char * out,const char * in,off_t length,int headerMode)24 encode(mail_encoding encoding, char *out, const char *in, off_t length,
25 int headerMode)
26 {
27 switch (encoding) {
28 case base64:
29 return encode_base64(out,in,length,headerMode);
30 case quoted_printable:
31 return encode_qp(out,in,length,headerMode);
32 case seven_bit:
33 case eight_bit:
34 case no_encoding:
35 memcpy(out,in,length);
36 return length;
37 case uuencode:
38 default:
39 return -1;
40 }
41
42 return -1;
43 }
44
45
46 ssize_t
decode(mail_encoding encoding,char * out,const char * in,off_t length,int underscoreIsSpace)47 decode(mail_encoding encoding, char *out, const char *in, off_t length,
48 int underscoreIsSpace)
49 {
50 switch (encoding) {
51 case base64:
52 return decode_base64(out, in, length);
53 case uuencode:
54 return uu_decode(out, in, length);
55 case seven_bit:
56 case eight_bit:
57 case no_encoding:
58 memcpy(out, in, length);
59 return length;
60 case quoted_printable:
61 return decode_qp(out, in, length, underscoreIsSpace);
62 default:
63 break;
64 }
65
66 return -1;
67 }
68
69
70 ssize_t
max_encoded_length(mail_encoding encoding,off_t length)71 max_encoded_length(mail_encoding encoding, off_t length)
72 {
73 switch (encoding) {
74 case base64:
75 {
76 double result = length * 1.33333333333333;
77 result += (result / BASE64_LINELENGTH) * 2 + 20;
78 return (ssize_t)(result);
79 }
80 case quoted_printable:
81 return length * 3;
82 case seven_bit:
83 case eight_bit:
84 case no_encoding:
85 return length;
86 case uuencode:
87 default:
88 return -1;
89 }
90
91 return -1;
92 }
93
94
95 mail_encoding
encoding_for_cte(const char * cte)96 encoding_for_cte(const char *cte)
97 {
98 if (cte == NULL)
99 return no_encoding;
100
101 if (strcasecmp(cte,"uuencode") == 0)
102 return uuencode;
103 if (strcasecmp(cte,"base64") == 0)
104 return base64;
105 if (strcasecmp(cte,"quoted-printable") == 0)
106 return quoted_printable;
107 if (strcasecmp(cte,"7bit") == 0)
108 return seven_bit;
109 if (strcasecmp(cte,"8bit") == 0)
110 return eight_bit;
111
112 return no_encoding;
113 }
114
115
116 ssize_t
decode_qp(char * out,const char * in,off_t length,int underscoreIsSpace)117 decode_qp(char *out, const char *in, off_t length, int underscoreIsSpace)
118 {
119 // decode Quoted Printable
120 char *dataout = out;
121 const char *datain = in, *dataend = in + length;
122
123 while (datain < dataend) {
124 if (*datain == '=' && dataend - datain > 2) {
125 int a = toupper(datain[1]);
126 a -= a >= '0' && a <= '9' ? '0' : (a >= 'A' && a <= 'F'
127 ? 'A' - 10 : a + 1);
128
129 int b = toupper(datain[2]);
130 b -= b >= '0' && b <= '9' ? '0' : (b >= 'A' && b <= 'F'
131 ? 'A' - 10 : b + 1);
132
133 if (a >= 0 && b >= 0) {
134 *dataout++ = (a << 4) + b;
135 datain += 3;
136 continue;
137 } else if (datain[1] == '\r' && datain[2] == '\n') {
138 // strip =<CR><NL>
139 datain += 3;
140 continue;
141 }
142 } else if (*datain == '_' && underscoreIsSpace) {
143 *dataout++ = ' ';
144 ++datain;
145 continue;
146 }
147
148 *dataout++ = *datain++;
149 }
150
151 *dataout = '\0';
152 return dataout - out;
153 }
154
155
156 ssize_t
encode_qp(char * out,const char * in,off_t length,int headerMode)157 encode_qp(char *out, const char *in, off_t length, int headerMode)
158 {
159 int g = 0, i = 0;
160
161 for (; i < length; i++) {
162 if (((uint8 *)(in))[i] > 127 || in[i] == '?' || in[i] == '='
163 || in[i] == '_'
164 // Also encode the letter F in "From " at the start of the line,
165 // which Unix systems use to mark the start of messages in their
166 // mbox files.
167 || (in[i] == 'F' && i + 5 <= length && (i == 0 || in[i - 1] == '\n')
168 && in[i + 1] == 'r' && in[i + 2] == 'o' && in[i + 3] == 'm'
169 && in[i + 4] == ' ')) {
170 out[g++] = '=';
171 out[g++] = kHexAlphabet[(in[i] >> 4) & 0x0f];
172 out[g++] = kHexAlphabet[in[i] & 0x0f];
173 } else if (headerMode && (in[i] == ' ' || in[i] == '\t')) {
174 out[g++] = '_';
175 } else if (headerMode && in[i] >= 0 && in[i] < 32) {
176 // Control codes in headers need to be sanitized, otherwise certain
177 // Japanese ISPs mangle the headers badly. But they don't mangle
178 // the body.
179 out[g++] = '=';
180 out[g++] = kHexAlphabet[(in[i] >> 4) & 0x0f];
181 out[g++] = kHexAlphabet[in[i] & 0x0f];
182 } else
183 out[g++] = in[i];
184 }
185
186 return g;
187 }
188
189
190 ssize_t
uu_decode(char * out,const char * in,off_t length)191 uu_decode(char *out, const char *in, off_t length)
192 {
193 long n;
194 uint8 *p, *inBuffer = (uint8 *)in;
195 uint8 *outBuffer = (uint8 *)out;
196
197 inBuffer = (uint8 *)strstr((char *)inBuffer, "begin");
198 goto enterLoop;
199
200 while ((inBuffer - (uint8 *)in) <= length
201 && strncmp((char *)inBuffer, "end", 3)) {
202 p = inBuffer;
203 n = DEC(inBuffer[0]);
204
205 for (++inBuffer; n > 0; inBuffer += 4, n -= 3) {
206 if (n >= 3) {
207 *outBuffer++ = DEC(inBuffer[0]) << 2 | DEC (inBuffer[1]) >> 4;
208 *outBuffer++ = DEC(inBuffer[1]) << 4 | DEC (inBuffer[2]) >> 2;
209 *outBuffer++ = DEC(inBuffer[2]) << 6 | DEC (inBuffer[3]);
210 } else {
211 if (n >= 1) {
212 *outBuffer++ = DEC(inBuffer[0]) << 2
213 | DEC (inBuffer[1]) >> 4;
214 }
215 if (n >= 2) {
216 *outBuffer++ = DEC(inBuffer[1]) << 4
217 | DEC (inBuffer[2]) >> 2;
218 }
219 }
220 }
221 inBuffer = p;
222
223 enterLoop:
224 while (inBuffer[0] != '\n' && inBuffer[0] != '\r' && inBuffer[0] != 0)
225 inBuffer++;
226 while (inBuffer[0] == '\n' || inBuffer[0] == '\r')
227 inBuffer++;
228 }
229
230 return (ssize_t)(outBuffer - (uint8 *)in);
231 }
232
233