1 /* Test of conversion of string to wide string. 2 Copyright (C) 2008-2011 Free Software Foundation, Inc. 3 4 This program is free software: you can redistribute it and/or modify 5 it under the terms of the GNU General Public License as published by 6 the Free Software Foundation; either version 3 of the License, or 7 (at your option) any later version. 8 9 This program is distributed in the hope that it will be useful, 10 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 GNU General Public License for more details. 13 14 You should have received a copy of the GNU General Public License 15 along with this program. If not, see <http://www.gnu.org/licenses/>. */ 16 17 /* Written by Bruno Haible <bruno@clisp.org>, 2008. */ 18 19 #undef NDEBUG 20 #include <assert.h> 21 #include <locale.h> 22 #include <stdio.h> 23 #include <string.h> 24 #include <wchar.h> 25 26 27 #define BUFSIZE 10 28 29 30 int main(int argc, char *argv[]) 31 { 32 mbstate_t state; 33 wchar_t wc; 34 size_t ret; 35 int mode; 36 wchar_t buf[BUFSIZE]; 37 const char *src; 38 mbstate_t temp_state; 39 40 printf("POSIX ...\n"); 41 { 42 char input[] = "Buesser"; 43 char isoInput[] = "B\374\337er"; /* "Büßer" */ 44 45 memset(&state, '\0', sizeof(mbstate_t)); 46 47 { 48 size_t i; 49 for (i = 0; i < BUFSIZE; i++) 50 buf[i] = (wchar_t) 0xBADFACE; 51 } 52 53 wc = (wchar_t) 0xBADFACE; 54 ret = mbrtowc(&wc, input, 1, &state); 55 assert(ret == 1); 56 assert(wc == 'B'); 57 assert(mbsinit (&state)); 58 input[0] = '\0'; 59 60 wc = (wchar_t) 0xBADFACE; 61 ret = mbrtowc(&wc, input + 1, 1, &state); 62 assert(ret == 1); 63 assert(wctob (wc) == (unsigned char) 'u'); 64 assert(mbsinit (&state)); 65 input[1] = '\0'; 66 67 src = input + 2; 68 temp_state = state; 69 ret = mbsrtowcs(NULL, &src, BUFSIZE, &temp_state); 70 assert(ret == 5); 71 assert(src == input + 2); 72 assert(mbsinit (&state)); 73 74 src = input + 2; 75 ret = mbsrtowcs(buf, &src, BUFSIZE, &state); 76 assert(ret == 5); 77 assert(src == NULL); 78 assert(wctob (buf[0]) == (unsigned char) 'e'); 79 assert(buf[1] == 's'); 80 assert(buf[2] == 's'); 81 assert(buf[3] == 'e'); 82 assert(buf[4] == 'r'); 83 assert(buf[5] == 0); 84 assert(buf[6] == (wchar_t) 0xBADFACE); 85 assert(mbsinit (&state)); 86 87 src = isoInput; 88 ret = mbsrtowcs(buf, &src, BUFSIZE, &state); 89 assert(ret == (size_t)-1); 90 assert(src == isoInput + 1); 91 } 92 93 /* configure should already have checked that the locale is supported. */ 94 if (setlocale(LC_ALL, "") == NULL) { 95 fprintf(stderr, "unable to set standard locale\n"); 96 return 1; 97 } 98 99 /* Test NUL byte input. */ 100 { 101 const char *src; 102 103 memset(&state, '\0', sizeof(mbstate_t)); 104 105 src = ""; 106 ret = mbsrtowcs(NULL, &src, 0, &state); 107 assert(ret == 0); 108 assert(mbsinit (&state)); 109 110 src = ""; 111 ret = mbsrtowcs(NULL, &src, 1, &state); 112 assert(ret == 0); 113 assert(mbsinit (&state)); 114 115 wc = (wchar_t) 0xBADFACE; 116 src = ""; 117 ret = mbsrtowcs(&wc, &src, 0, &state); 118 assert(ret == 0); 119 assert(wc == (wchar_t) 0xBADFACE); 120 assert(mbsinit (&state)); 121 122 wc = (wchar_t) 0xBADFACE; 123 src = ""; 124 ret = mbsrtowcs(&wc, &src, 1, &state); 125 assert(ret == 0); 126 assert(wc == 0); 127 assert(mbsinit (&state)); 128 } 129 130 for (mode = '1'; mode <= '4'; ++mode) { 131 int unlimited; 132 for (unlimited = 0; unlimited < 2; unlimited++) { 133 { 134 size_t i; 135 for (i = 0; i < BUFSIZE; i++) 136 buf[i] = (wchar_t) 0xBADFACE; 137 } 138 139 switch (mode) { 140 case '1': 141 /* Locale encoding is ISO-8859-1 or ISO-8859-15. */ 142 printf("ISO8859-1 ...\n"); 143 { 144 char input[] = "B\374\337er"; /* "Büßer" */ 145 memset(&state, '\0', sizeof(mbstate_t)); 146 147 if (setlocale (LC_ALL, "en_US.ISO8859-1") == NULL) { 148 fprintf(stderr, 149 "unable to set ISO8859-1 locale, skipping\n"); 150 break; 151 } 152 153 wc = (wchar_t) 0xBADFACE; 154 ret = mbrtowc(&wc, input, 1, &state); 155 assert(ret == 1); 156 assert(wc == 'B'); 157 assert(mbsinit (&state)); 158 input[0] = '\0'; 159 160 wc = (wchar_t) 0xBADFACE; 161 ret = mbrtowc(&wc, input + 1, 1, &state); 162 assert(ret == 1); 163 assert(wctob (wc) == (unsigned char) '\374'); 164 assert(mbsinit (&state)); 165 input[1] = '\0'; 166 167 src = input + 2; 168 temp_state = state; 169 ret = mbsrtowcs(NULL, &src, unlimited ? BUFSIZE : 1, 170 &temp_state); 171 assert(ret == 3); 172 assert(src == input + 2); 173 assert(mbsinit (&state)); 174 175 src = input + 2; 176 ret = mbsrtowcs(buf, &src, unlimited ? BUFSIZE : 1, &state); 177 assert(ret == (unlimited ? 3u : 1u)); 178 assert(src == (unlimited ? NULL : input + 3)); 179 assert(wctob (buf[0]) == (unsigned char) '\337'); 180 if (unlimited) { 181 assert(buf[1] == 'e'); 182 assert(buf[2] == 'r'); 183 assert(buf[3] == 0); 184 assert(buf[4] == (wchar_t) 0xBADFACE); 185 } else 186 assert(buf[1] == (wchar_t) 0xBADFACE); 187 assert(mbsinit (&state)); 188 } 189 break; 190 191 case '2': 192 /* Locale encoding is UTF-8. */ 193 printf("UTF-8 ...\n"); 194 { 195 char input[] = "B\303\274\303\237er"; /* "Büßer" */ 196 char isoInput[] = "B\374\337er"; /* "Büßer" */ 197 memset(&state, '\0', sizeof(mbstate_t)); 198 199 if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) { 200 fprintf(stderr, 201 "unable to set UTF-8 locale, skipping\n"); 202 break; 203 } 204 205 wc = (wchar_t) 0xBADFACE; 206 ret = mbrtowc(&wc, input, 1, &state); 207 assert(ret == 1); 208 assert(wc == 'B'); 209 assert(mbsinit (&state)); 210 input[0] = '\0'; 211 212 wc = (wchar_t) 0xBADFACE; 213 ret = mbrtowc(&wc, input + 1, 1, &state); 214 assert(ret == (size_t)(-2)); 215 assert(wc == (wchar_t) 0xBADFACE); 216 assert(!mbsinit (&state)); 217 input[1] = '\0'; 218 219 src = input + 2; 220 temp_state = state; 221 ret = mbsrtowcs(NULL, &src, unlimited ? BUFSIZE : 2, 222 &temp_state); 223 assert(ret == 4); 224 assert(src == input + 2); 225 assert(!mbsinit (&state)); 226 227 src = input + 2; 228 ret = mbsrtowcs(buf, &src, unlimited ? BUFSIZE : 2, &state); 229 assert(ret == (unlimited ? 4u : 2u)); 230 assert(src == (unlimited ? NULL : input + 5)); 231 assert(wctob (buf[0]) == EOF); 232 assert(wctob (buf[1]) == EOF); 233 if (unlimited) { 234 assert(buf[2] == 'e'); 235 assert(buf[3] == 'r'); 236 assert(buf[4] == 0); 237 assert(buf[5] == (wchar_t) 0xBADFACE); 238 } else 239 assert(buf[2] == (wchar_t) 0xBADFACE); 240 assert(mbsinit (&state)); 241 242 src = isoInput; 243 ret = mbsrtowcs(buf, &src, BUFSIZE, &state); 244 assert(ret == (size_t)-1); 245 assert(src == isoInput + 1); 246 } 247 break; 248 249 case '3': 250 /* Locale encoding is EUC-JP. */ 251 printf("EUC-JP ...\n"); 252 { 253 char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */ 254 memset(&state, '\0', sizeof(mbstate_t)); 255 256 if (setlocale (LC_ALL, "en_US.EUC-JP") == NULL) { 257 fprintf(stderr, 258 "unable to set EUC-JP locale, skipping\n"); 259 break; 260 } 261 262 wc = (wchar_t) 0xBADFACE; 263 ret = mbrtowc(&wc, input, 1, &state); 264 assert(ret == 1); 265 assert(wc == '<'); 266 assert(mbsinit (&state)); 267 input[0] = '\0'; 268 269 wc = (wchar_t) 0xBADFACE; 270 ret = mbrtowc(&wc, input + 1, 2, &state); 271 assert(ret == 2); 272 assert(wctob (wc) == EOF); 273 assert(mbsinit (&state)); 274 input[1] = '\0'; 275 input[2] = '\0'; 276 277 wc = (wchar_t) 0xBADFACE; 278 ret = mbrtowc(&wc, input + 3, 1, &state); 279 assert(ret == (size_t)(-2)); 280 assert(wc == (wchar_t) 0xBADFACE); 281 assert(!mbsinit (&state)); 282 input[3] = '\0'; 283 284 src = input + 4; 285 temp_state = state; 286 ret = mbsrtowcs(NULL, &src, unlimited ? BUFSIZE : 2, 287 &temp_state); 288 assert(ret == 3); 289 assert(src == input + 4); 290 assert(!mbsinit (&state)); 291 292 src = input + 4; 293 ret = mbsrtowcs(buf, &src, unlimited ? BUFSIZE : 2, &state); 294 assert(ret == (unlimited ? 3u : 2u)); 295 assert(src == (unlimited ? NULL : input + 7)); 296 assert(wctob (buf[0]) == EOF); 297 assert(wctob (buf[1]) == EOF); 298 if (unlimited) { 299 assert(buf[2] == '>'); 300 assert(buf[3] == 0); 301 assert(buf[4] == (wchar_t) 0xBADFACE); 302 } else 303 assert(buf[2] == (wchar_t) 0xBADFACE); 304 assert(mbsinit (&state)); 305 } 306 break; 307 308 case '4': 309 /* Locale encoding is GB18030. */ 310 printf("GB18030 ...\n"); 311 { 312 char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */ 313 memset(&state, '\0', sizeof(mbstate_t)); 314 315 if (setlocale (LC_ALL, "en_US.GB18030") == NULL) { 316 fprintf(stderr, 317 "unable to set GB18030 locale, skipping\n"); 318 break; 319 } 320 321 wc = (wchar_t) 0xBADFACE; 322 ret = mbrtowc(&wc, input, 1, &state); 323 assert(ret == 1); 324 assert(wc == 'B'); 325 assert(mbsinit (&state)); 326 input[0] = '\0'; 327 328 wc = (wchar_t) 0xBADFACE; 329 ret = mbrtowc(&wc, input + 1, 1, &state); 330 assert(ret == (size_t)(-2)); 331 assert(wc == (wchar_t) 0xBADFACE); 332 assert(!mbsinit (&state)); 333 input[1] = '\0'; 334 335 src = input + 2; 336 temp_state = state; 337 ret = mbsrtowcs(NULL, &src, unlimited ? BUFSIZE : 2, 338 &temp_state); 339 assert(ret == 4); 340 assert(src == input + 2); 341 assert(!mbsinit (&state)); 342 343 src = input + 2; 344 ret = mbsrtowcs(buf, &src, unlimited ? BUFSIZE : 2, &state); 345 assert(ret == (unlimited ? 4u : 2u)); 346 assert(src == (unlimited ? NULL : input + 7)); 347 assert(wctob (buf[0]) == EOF); 348 assert(wctob (buf[1]) == EOF); 349 if (unlimited) { 350 assert(buf[2] == 'e'); 351 assert(buf[3] == 'r'); 352 assert(buf[4] == 0); 353 assert(buf[5] == (wchar_t) 0xBADFACE); 354 } else 355 assert(buf[2] == (wchar_t) 0xBADFACE); 356 assert(mbsinit (&state)); 357 } 358 break; 359 360 default: 361 return 1; 362 } 363 } 364 } 365 366 return 0; 367 } 368