1 /* 2 * Copyright (C) 1999-2001 Free Software Foundation, Inc. 3 * This file is part of the GNU LIBICONV Library. 4 * 5 * The GNU LIBICONV Library is free software; you can redistribute it 6 * and/or modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either version 2 8 * of the License, or (at your option) any later version. 9 * 10 * The GNU LIBICONV Library is distributed in the hope that it will be 11 * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public 16 * License along with the GNU LIBICONV Library; see the file COPYING.LIB. 17 * If not, write to the Free Software Foundation, Inc., 51 Franklin Street, 18 * Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 21 /* 22 * UTF-7 23 */ 24 25 /* Specification: RFC 2152 (and old RFC 1641, RFC 1642) */ 26 /* The original Base64 encoding is defined in RFC 2045. */ 27 28 /* Set of direct characters: 29 * A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr 30 */ 31 static const unsigned char direct_tab[128/8] = { 32 0x00, 0x26, 0x00, 0x00, 0x81, 0xf3, 0xff, 0x87, 33 0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07, 34 }; 35 #define isdirect(ch) ((ch) < 128 && ((direct_tab[(ch)>>3] >> (ch & 7)) & 1)) 36 37 /* Set of direct and optional direct characters: 38 * A-Z a-z 0-9 ' ( ) , - . / : ? space tab lf cr 39 * ! " # $ % & * ; < = > @ [ ] ^ _ ` { | } 40 */ 41 static const unsigned char xdirect_tab[128/8] = { 42 0x00, 0x26, 0x00, 0x00, 0xff, 0xf7, 0xff, 0xff, 43 0xff, 0xff, 0xff, 0xef, 0xff, 0xff, 0xff, 0x3f, 44 }; 45 #define isxdirect(ch) ((ch) < 128 && ((xdirect_tab[(ch)>>3] >> (ch & 7)) & 1)) 46 47 /* Set of base64 characters, extended: 48 * A-Z a-z 0-9 + / - 49 */ 50 static const unsigned char xbase64_tab[128/8] = { 51 0x00, 0x00, 0x00, 0x00, 0x00, 0xa8, 0xff, 0x03, 52 0xfe, 0xff, 0xff, 0x07, 0xfe, 0xff, 0xff, 0x07, 53 }; 54 #define isxbase64(ch) ((ch) < 128 && ((xbase64_tab[(ch)>>3] >> (ch & 7)) & 1)) 55 56 /* 57 * The state is structured as follows: 58 * bit 1..0: shift 59 * bit 7..2: data 60 * Precise meaning: 61 * shift data 62 * 0 0 not inside base64 encoding 63 * 1 0 inside base64, no pending bits 64 * 2 XXXX00 inside base64, 4 bits remain from 2nd byte 65 * 3 XX0000 inside base64, 2 bits remain from 3rd byte 66 */ 67 68 static int 69 utf7_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n) 70 { 71 state_t state = conv->istate; 72 int count = 0; /* number of input bytes already read */ 73 if (state & 3) 74 goto active; 75 else 76 goto inactive; 77 78 inactive: 79 { 80 /* Here (state & 3) == 0 */ 81 if (n < count+1) 82 goto none; 83 { 84 unsigned char c = *s; 85 if (isxdirect(c)) { 86 *pwc = (ucs4_t) c; 87 conv->istate = state; 88 return count+1; 89 } 90 if (c == '+') { 91 if (n < count+2) 92 goto none; 93 if (s[1] == '-') { 94 *pwc = (ucs4_t) '+'; 95 conv->istate = state; 96 return count+2; 97 } 98 s++; count++; 99 state = 1; 100 goto active; 101 } 102 return RET_ILSEQ; 103 } 104 } 105 106 active: 107 { 108 /* base64 encoding active */ 109 unsigned int wc = 0; 110 state_t base64state = state; 111 unsigned int kmax = 2; /* number of payload bytes to read */ 112 unsigned int k = 0; /* number of payload bytes already read */ 113 unsigned int base64count = 0; /* number of base64 bytes already read */ 114 for (;;) { 115 unsigned char c = *s; 116 unsigned int i; 117 if (c >= 'A' && c <= 'Z') 118 i = c-'A'; 119 else if (c >= 'a' && c <= 'z') 120 i = c-'a'+26; 121 else if (c >= '0' && c <= '9') 122 i = c-'0'+52; 123 else if (c == '+') 124 i = 62; 125 else if (c == '/') 126 i = 63; 127 else { 128 /* c terminates base64 encoding */ 129 if (base64state & -4) 130 return RET_ILSEQ; /* data must be 0, otherwise illegal */ 131 if (base64count) 132 return RET_ILSEQ; /* partial UTF-16 characters are invalid */ 133 if (c == '-') { 134 s++; count++; 135 } 136 state = 0; 137 goto inactive; 138 } 139 s++; base64count++; 140 /* read 6 bits: 0 <= i < 64 */ 141 switch (base64state & 3) { 142 case 1: /* inside base64, no pending bits */ 143 base64state = (i << 2) | 0; break; 144 case 0: /* inside base64, 6 bits remain from 1st byte */ 145 wc = (wc << 8) | (base64state & -4) | (i >> 4); k++; 146 base64state = ((i & 15) << 4) | 2; break; 147 case 2: /* inside base64, 4 bits remain from 2nd byte */ 148 wc = (wc << 8) | (base64state & -4) | (i >> 2); k++; 149 base64state = ((i & 3) << 6) | 3; break; 150 case 3: /* inside base64, 2 bits remain from 3rd byte */ 151 wc = (wc << 8) | (base64state & -4) | i; k++; 152 base64state = 1; break; 153 } 154 if (k == kmax) { 155 /* UTF-16: When we see a High Surrogate, we must also decode 156 the following Low Surrogate. */ 157 if (kmax == 2 && (wc >= 0xd800 && wc < 0xdc00)) 158 kmax = 4; 159 else 160 break; 161 } 162 if (n < count+base64count+1) 163 goto none; 164 } 165 /* Here k = kmax > 0, hence base64count > 0. */ 166 if ((base64state & 3) == 0) abort(); 167 if (kmax == 4) { 168 ucs4_t wc1 = wc >> 16; 169 ucs4_t wc2 = wc & 0xffff; 170 if (!(wc1 >= 0xd800 && wc1 < 0xdc00)) abort(); 171 if (!(wc2 >= 0xdc00 && wc2 < 0xe000)) return RET_ILSEQ; 172 *pwc = 0x10000 + ((wc1 - 0xd800) << 10) + (wc2 - 0xdc00); 173 } else { 174 *pwc = wc; 175 } 176 conv->istate = base64state; 177 return count+base64count; 178 } 179 180 none: 181 conv->istate = state; 182 return RET_TOOFEW(count); 183 } 184 185 /* 186 * The state is structured as follows: 187 * bit 1..0: shift 188 * bit 7..2: data 189 * Precise meaning: 190 * shift data 191 * 0 0 not inside base64 encoding 192 * 1 0 inside base64, no pending bits 193 * 2 XX00 inside base64, 2 bits known for 2nd byte 194 * 3 XXXX inside base64, 4 bits known for 3rd byte 195 */ 196 197 /* Define this to 1 if you want the so-called "optional direct" characters 198 ! " # $ % & * ; < = > @ [ ] ^ _ ` { | } 199 to be encoded. Define to 0 if you want them to be passed straight through, 200 like the so-called "direct" characters. 201 We set this to 1 because it's safer. 202 */ 203 #define UTF7_ENCODE_OPTIONAL_CHARS 1 204 205 static int 206 utf7_wctomb (conv_t conv, unsigned char *r, ucs4_t iwc, int n) 207 { 208 state_t state = conv->ostate; 209 unsigned int wc = iwc; 210 int count = 0; 211 if (state & 3) 212 goto active; 213 214 /*inactive:*/ 215 { 216 if (UTF7_ENCODE_OPTIONAL_CHARS ? isdirect(wc) : isxdirect(wc)) { 217 r[0] = (unsigned char) wc; 218 /*conv->ostate = state;*/ 219 return 1; 220 } else { 221 *r++ = '+'; 222 if (wc == '+') { 223 if (n < 2) 224 return RET_TOOSMALL; 225 *r = '-'; 226 /*conv->ostate = state;*/ 227 return 2; 228 } 229 count = 1; 230 state = 1; 231 goto active; 232 } 233 } 234 235 active: 236 { 237 /* base64 encoding active */ 238 if (UTF7_ENCODE_OPTIONAL_CHARS ? isdirect(wc) : isxdirect(wc)) { 239 /* deactivate base64 encoding */ 240 count += ((state & 3) >= 2 ? 1 : 0) + (isxbase64(wc) ? 1 : 0) + 1; 241 if (n < count) 242 return RET_TOOSMALL; 243 if ((state & 3) >= 2) { 244 unsigned int i = state & -4; 245 unsigned char c; 246 if (i < 26) 247 c = i+'A'; 248 else if (i < 52) 249 c = i-26+'a'; 250 else if (i < 62) 251 c = i-52+'0'; 252 else if (i == 62) 253 c = '+'; 254 else if (i == 63) 255 c = '/'; 256 else 257 abort(); 258 *r++ = c; 259 } 260 if (isxbase64(wc)) 261 *r++ = '-'; 262 state = 0; 263 *r++ = (unsigned char) wc; 264 conv->ostate = state; 265 return count; 266 } else { 267 unsigned int k; /* number of payload bytes to write */ 268 if (wc < 0x10000) { 269 k = 2; 270 count += ((state & 3) >= 2 ? 3 : 2); 271 } else if (wc < 0x110000) { 272 unsigned int wc1 = 0xd800 + ((wc - 0x10000) >> 10); 273 unsigned int wc2 = 0xdc00 + ((wc - 0x10000) & 0x3ff); 274 wc = (wc1 << 16) | wc2; 275 k = 4; 276 count += ((state & 3) >= 3 ? 6 : 5); 277 } else 278 return RET_ILUNI; 279 if (n < count) 280 return RET_TOOSMALL; 281 for (;;) { 282 unsigned int i; 283 unsigned char c; 284 switch (state & 3) { 285 case 0: /* inside base64, 6 bits known for 4th byte */ 286 c = (state & -4) >> 2; state = 1; break; 287 case 1: /* inside base64, no pending bits */ 288 i = (wc >> (8 * --k)) & 0xff; 289 c = i >> 2; state = ((i & 3) << 4) | 2; break; 290 case 2: /* inside base64, 2 bits known for 2nd byte */ 291 i = (wc >> (8 * --k)) & 0xff; 292 c = (state & -4) | (i >> 4); state = ((i & 15) << 2) | 3; break; 293 case 3: /* inside base64, 4 bits known for 3rd byte */ 294 i = (wc >> (8 * --k)) & 0xff; 295 c = (state & -4) | (i >> 6); state = ((i & 63) << 2) | 0; break; 296 default: abort(); /* stupid gcc */ 297 } 298 if (c < 26) 299 c = c+'A'; 300 else if (c < 52) 301 c = c-26+'a'; 302 else if (c < 62) 303 c = c-52+'0'; 304 else if (c == 62) 305 c = '+'; 306 else if (c == 63) 307 c = '/'; 308 else 309 abort(); 310 *r++ = c; 311 if ((state & 3) && (k == 0)) 312 break; 313 } 314 conv->ostate = state; 315 return count; 316 } 317 } 318 } 319 320 static int 321 utf7_reset (conv_t conv, unsigned char *r, int n) 322 { 323 state_t state = conv->ostate; 324 if (state & 3) { 325 /* deactivate base64 encoding */ 326 unsigned int count = ((state & 3) >= 2 ? 1 : 0) + 1; 327 if (n < count) 328 return RET_TOOSMALL; 329 if ((state & 3) >= 2) { 330 unsigned int i = state & -4; 331 unsigned char c; 332 if (i < 26) 333 c = i+'A'; 334 else if (i < 52) 335 c = i-26+'a'; 336 else if (i < 62) 337 c = i-52+'0'; 338 else if (i == 62) 339 c = '+'; 340 else if (i == 63) 341 c = '/'; 342 else 343 abort(); 344 *r++ = c; 345 } 346 *r++ = '-'; 347 /* conv->ostate = 0; will be done by the caller */ 348 return count; 349 } else 350 return 0; 351 } 352