1 /* 2 * Copyright 2011, Haiku, Inc. All rights reserved. 3 * Copyright 2001-2003 Dr. Zoidberg Enterprises. All rights reserved. 4 */ 5 6 7 #include <ctype.h> 8 #include <string.h> 9 10 #include <SupportDefs.h> 11 12 #include <mail_encoding.h> 13 14 15 #define DEC(c) (((c) - ' ') & 077) 16 17 18 static const char kBase64Alphabet[64] = { 19 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 20 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 21 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 22 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 23 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 24 '+', 25 '/' 26 }; 27 28 static const char kHexAlphabet[16] = {'0', '1', '2', '3', '4', '5', '6', '7', 29 '8','9','A','B','C','D','E','F'}; 30 31 32 ssize_t 33 encode(mail_encoding encoding, char *out, const char *in, off_t length, 34 int headerMode) 35 { 36 switch (encoding) { 37 case base64: 38 return encode_base64(out,in,length,headerMode); 39 case quoted_printable: 40 return encode_qp(out,in,length,headerMode); 41 case seven_bit: 42 case eight_bit: 43 case no_encoding: 44 memcpy(out,in,length); 45 return length; 46 case uuencode: 47 default: 48 return -1; 49 } 50 51 return -1; 52 } 53 54 55 ssize_t 56 decode(mail_encoding encoding, char *out, const char *in, off_t length, 57 int underscoreIsSpace) 58 { 59 switch (encoding) { 60 case base64: 61 return decode_base64(out, in, length); 62 case uuencode: 63 return uu_decode(out, in, length); 64 case seven_bit: 65 case eight_bit: 66 case no_encoding: 67 memcpy(out, in, length); 68 return length; 69 case quoted_printable: 70 return decode_qp(out, in, length, underscoreIsSpace); 71 default: 72 break; 73 } 74 75 return -1; 76 } 77 78 79 ssize_t 80 max_encoded_length(mail_encoding encoding, off_t length) 81 { 82 switch (encoding) { 83 case base64: 84 { 85 double result = length * 1.33333333333333; 86 result += (result / BASE64_LINELENGTH) * 2 + 20; 87 return (ssize_t)(result); 88 } 89 case quoted_printable: 90 return length * 3; 91 case seven_bit: 92 case eight_bit: 93 case no_encoding: 94 return length; 95 case uuencode: 96 default: 97 return -1; 98 } 99 100 return -1; 101 } 102 103 104 mail_encoding 105 encoding_for_cte(const char *cte) 106 { 107 if (cte == NULL) 108 return no_encoding; 109 110 if (strcasecmp(cte,"uuencode") == 0) 111 return uuencode; 112 if (strcasecmp(cte,"base64") == 0) 113 return base64; 114 if (strcasecmp(cte,"quoted-printable") == 0) 115 return quoted_printable; 116 if (strcasecmp(cte,"7bit") == 0) 117 return seven_bit; 118 if (strcasecmp(cte,"8bit") == 0) 119 return eight_bit; 120 121 return no_encoding; 122 } 123 124 125 ssize_t 126 encode_base64(char *out, const char *in, off_t length, int headerMode) 127 { 128 uint32 concat; 129 int i = 0; 130 int k = 0; 131 int lineLength = 4; 132 // Stop before it actually gets too long 133 134 while (i < length) { 135 concat = ((in[i] & 0xff) << 16); 136 137 if ((i+1) < length) 138 concat |= ((in[i+1] & 0xff) << 8); 139 if ((i+2) < length) 140 concat |= (in[i+2] & 0xff); 141 142 i += 3; 143 144 out[k++] = kBase64Alphabet[(concat >> 18) & 63]; 145 out[k++] = kBase64Alphabet[(concat >> 12) & 63]; 146 out[k++] = kBase64Alphabet[(concat >> 6) & 63]; 147 out[k++] = kBase64Alphabet[concat & 63]; 148 149 if (i >= length) { 150 int v; 151 for (v = 0; v <= (i - length); v++) 152 out[k-v] = '='; 153 } 154 155 lineLength += 4; 156 157 // No line breaks in header mode, since the text is part of a Subject: 158 // line or some other single header line. The header code will do word 159 // wrapping separately from this encoding stuff. 160 if (!headerMode && lineLength > BASE64_LINELENGTH) { 161 out[k++] = '\r'; 162 out[k++] = '\n'; 163 164 lineLength = 4; 165 } 166 } 167 168 return k; 169 } 170 171 172 ssize_t 173 decode_base64(char *out, const char *in, off_t length) 174 { 175 uint32 concat, value; 176 int lastOutLine = 0; 177 int i, j; 178 int outIndex = 0; 179 180 for (i = 0; i < length; i += 4) { 181 concat = 0; 182 183 for (j = 0; j < 4 && (i + j) < length; j++) { 184 value = in[i + j]; 185 186 if (value == '\n' || value == '\r') { 187 // jump over line breaks 188 lastOutLine = outIndex; 189 i++; 190 j--; 191 continue; 192 } 193 194 if ((value >= 'A') && (value <= 'Z')) 195 value -= 'A'; 196 else if ((value >= 'a') && (value <= 'z')) 197 value = value - 'a' + 26; 198 else if ((value >= '0') && (value <= '9')) 199 value = value - '0' + 52; 200 else if (value == '+') 201 value = 62; 202 else if (value == '/') 203 value = 63; 204 else if (value == '=') 205 break; 206 else { 207 // there is an invalid character in this line - we will 208 // ignore the whole line and go to the next 209 outIndex = lastOutLine; 210 while (i < length && in[i] != '\n' && in[i] != '\r') 211 i++; 212 concat = 0; 213 } 214 215 value = value << ((3-j)*6); 216 217 concat |= value; 218 } 219 220 if (j > 1) 221 out[outIndex++] = (concat & 0x00ff0000) >> 16; 222 if (j > 2) 223 out[outIndex++] = (concat & 0x0000ff00) >> 8; 224 if (j > 3) 225 out[outIndex++] = (concat & 0x000000ff); 226 } 227 228 return outIndex; 229 } 230 231 232 ssize_t 233 decode_qp(char *out, const char *in, off_t length, int underscoreIsSpace) 234 { 235 // decode Quoted Printable 236 char *dataout = out; 237 const char *datain = in, *dataend = in + length; 238 239 while (datain < dataend) { 240 if (*datain == '=' && dataend - datain > 2) { 241 int a = toupper(datain[1]); 242 a -= a >= '0' && a <= '9' ? '0' : (a >= 'A' && a <= 'F' 243 ? 'A' - 10 : a + 1); 244 245 int b = toupper(datain[2]); 246 b -= b >= '0' && b <= '9' ? '0' : (b >= 'A' && b <= 'F' 247 ? 'A' - 10 : b + 1); 248 249 if (a >= 0 && b >= 0) { 250 *dataout++ = (a << 4) + b; 251 datain += 3; 252 continue; 253 } else if (datain[1] == '\r' && datain[2] == '\n') { 254 // strip =<CR><NL> 255 datain += 3; 256 continue; 257 } 258 } else if (*datain == '_' && underscoreIsSpace) { 259 *dataout++ = ' '; 260 ++datain; 261 continue; 262 } 263 264 *dataout++ = *datain++; 265 } 266 267 *dataout = '\0'; 268 return dataout - out; 269 } 270 271 272 ssize_t 273 encode_qp(char *out, const char *in, off_t length, int headerMode) 274 { 275 int g = 0, i = 0; 276 277 for (; i < length; i++) { 278 if (((uint8 *)(in))[i] > 127 || in[i] == '?' || in[i] == '=' 279 || in[i] == '_' 280 // Also encode the letter F in "From " at the start of the line, 281 // which Unix systems use to mark the start of messages in their 282 // mbox files. 283 || (in[i] == 'F' && i + 5 <= length && (i == 0 || in[i - 1] == '\n') 284 && in[i + 1] == 'r' && in[i + 2] == 'o' && in[i + 3] == 'm' 285 && in[i + 4] == ' ')) { 286 out[g++] = '='; 287 out[g++] = kHexAlphabet[(in[i] >> 4) & 0x0f]; 288 out[g++] = kHexAlphabet[in[i] & 0x0f]; 289 } else if (headerMode && (in[i] == ' ' || in[i] == '\t')) { 290 out[g++] = '_'; 291 } else if (headerMode && in[i] >= 0 && in[i] < 32) { 292 // Control codes in headers need to be sanitized, otherwise certain 293 // Japanese ISPs mangle the headers badly. But they don't mangle 294 // the body. 295 out[g++] = '='; 296 out[g++] = kHexAlphabet[(in[i] >> 4) & 0x0f]; 297 out[g++] = kHexAlphabet[in[i] & 0x0f]; 298 } else 299 out[g++] = in[i]; 300 } 301 302 return g; 303 } 304 305 306 ssize_t 307 uu_decode(char *out, const char *in, off_t length) 308 { 309 long n; 310 uint8 *p, *inBuffer = (uint8 *)in; 311 uint8 *outBuffer = (uint8 *)out; 312 313 inBuffer = (uint8 *)strstr((char *)inBuffer, "begin"); 314 goto enterLoop; 315 316 while ((inBuffer - (uint8 *)in) <= length 317 && strncmp((char *)inBuffer, "end", 3)) { 318 p = inBuffer; 319 n = DEC(inBuffer[0]); 320 321 for (++inBuffer; n > 0; inBuffer += 4, n -= 3) { 322 if (n >= 3) { 323 *outBuffer++ = DEC(inBuffer[0]) << 2 | DEC (inBuffer[1]) >> 4; 324 *outBuffer++ = DEC(inBuffer[1]) << 4 | DEC (inBuffer[2]) >> 2; 325 *outBuffer++ = DEC(inBuffer[2]) << 6 | DEC (inBuffer[3]); 326 } else { 327 if (n >= 1) { 328 *outBuffer++ = DEC(inBuffer[0]) << 2 329 | DEC (inBuffer[1]) >> 4; 330 } 331 if (n >= 2) { 332 *outBuffer++ = DEC(inBuffer[1]) << 4 333 | DEC (inBuffer[2]) >> 2; 334 } 335 } 336 } 337 inBuffer = p; 338 339 enterLoop: 340 while (inBuffer[0] != '\n' && inBuffer[0] != '\r' && inBuffer[0] != 0) 341 inBuffer++; 342 while (inBuffer[0] == '\n' || inBuffer[0] == '\r') 343 inBuffer++; 344 } 345 346 return (ssize_t)(outBuffer - (uint8 *)in); 347 } 348 349