1 /* Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc. 2 This file is part of the GNU C Library. 3 Contributed by Ulrich Drepper <drepper@redhat.com>, 2000. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, write to the Free 17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 18 02111-1307 USA. */ 19 20 /* We always want assert to be fully defined. */ 21 #undef NDEBUG 22 #include <assert.h> 23 #include <locale.h> 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <wchar.h> 28 29 static int check_ascii(const char *locname); 30 31 32 /* UTF-8 single byte feeding test for mbrtowc(), 33 contributed by Markus Kuhn <mkuhn@acm.org>. */ 34 static int 35 utf8_test_1(void) 36 { 37 wchar_t wc; 38 mbstate_t s; 39 40 const char str[] = "\xe0\xa0\x80"; 41 42 wc = 42; /* arbitrary number */ 43 memset(&s, 0, sizeof(s)); /* get s into initial state */ 44 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */ 45 assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */ 46 assert (wc == 42); /* no value has not been stored into &wc yet */ 47 assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */ 48 assert (wc == 0x2260); /* E2 89 A0 = U+2260 (not equal) decoded correctly */ 49 assert (mbrtowc (&wc, "", 1, &s) == 0); /* test final byte processing */ 50 assert (wc == 0); /* test final byte decoding */ 51 52 /* The following test is by Al Viro <aviro@redhat.com>. */ 53 wc = 42; /* arbitrary number */ 54 memset(&s, 0, sizeof(s)); /* get s into initial state */ 55 assert (mbrtowc (&wc, str, 1, &s) == (size_t)-2); 56 assert (mbrtowc (&wc, str + 1, 2, &s) == 2); 57 assert (wc == 0x800); 58 59 wc = 42; /* arbitrary number */ 60 memset(&s, 0, sizeof(s)); /* get s into initial state */ 61 assert (mbrtowc (&wc, str, 3, &s) == 3); 62 assert (wc == 0x800); 63 64 return 0; 65 } 66 67 68 /* Test for NUL byte processing via empty string. */ 69 static int 70 utf8_test_2(void) 71 { 72 wchar_t wc; 73 mbstate_t s; 74 75 wc = 42; /* arbitrary number */ 76 memset(&s, 0, sizeof(s)); /* get s into initial state */ 77 assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */ 78 assert (mbsinit (&s)); 79 80 wc = 42; /* arbitrary number */ 81 memset(&s, 0, sizeof(s)); /* get s into initial state */ 82 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */ 83 assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */ 84 85 wc = 42; /* arbitrary number */ 86 memset(&s, 0, sizeof(s)); /* get s into initial state */ 87 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */ 88 assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */ 89 assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */ 90 91 wc = 42; /* arbitrary number */ 92 memset(&s, 0, sizeof(s)); /* get s into initial state */ 93 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */ 94 assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */ 95 assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */ 96 assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */ 97 assert (mbsinit (&s)); 98 99 return 0; 100 } 101 102 103 /* Test for NUL byte processing via NULL string. */ 104 static int 105 utf8_test_3(void) 106 { 107 wchar_t wc; 108 mbstate_t s; 109 110 wc = 42; /* arbitrary number */ 111 memset(&s, 0, sizeof(s)); /* get s into initial state */ 112 assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */ 113 assert (mbsinit (&s)); 114 115 wc = 42; /* arbitrary number */ 116 memset(&s, 0, sizeof(s)); /* get s into initial state */ 117 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */ 118 assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */ 119 120 wc = 42; /* arbitrary number */ 121 memset(&s, 0, sizeof(s)); /* get s into initial state */ 122 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */ 123 assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */ 124 assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */ 125 126 wc = 42; /* arbitrary number */ 127 memset(&s, 0, sizeof(s)); /* get s into initial state */ 128 assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */ 129 assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */ 130 assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */ 131 assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */ 132 assert (mbsinit (&s)); 133 134 return 0; 135 } 136 137 138 static int 139 utf8_test(void) 140 { 141 const char *locale = "de_DE.UTF-8"; 142 int error = 0; 143 144 if (!setlocale(LC_CTYPE, locale)) { 145 fprintf(stderr, "locale '%s' not available!\n", locale); 146 exit(1); 147 } 148 149 error |= utf8_test_1(); 150 error |= utf8_test_2(); 151 error |= utf8_test_3(); 152 153 return error; 154 } 155 156 157 int 158 main(void) 159 { 160 int result = 0; 161 162 /* Check mapping of ASCII range for some character sets which have 163 ASCII as a subset. For those the wide char generated must have 164 the same value. */ 165 setlocale(LC_ALL, "C"); 166 result |= check_ascii(setlocale(LC_ALL, NULL)); 167 168 setlocale(LC_ALL, "de_DE.UTF-8"); 169 result |= check_ascii(setlocale(LC_ALL, NULL)); 170 result |= utf8_test(); 171 172 setlocale(LC_ALL, "ja_JP.EUC-JP"); 173 result |= check_ascii(setlocale(LC_ALL, NULL)); 174 175 return result; 176 } 177 178 179 static int 180 check_ascii(const char *locname) 181 { 182 int c; 183 int res = 0; 184 185 printf("Testing locale \"%s\":\n", locname); 186 187 for (c = 0; c <= 127; ++c) { 188 char buf[MB_CUR_MAX]; 189 wchar_t wc = (wchar_t) 0xffffffff; 190 mbstate_t s; 191 size_t n, i; 192 193 for (i = 0; i < MB_CUR_MAX; ++i) 194 buf[i] = c + i; 195 196 memset(&s, '\0', sizeof(s)); 197 198 n = mbrtowc(&wc, buf, MB_CUR_MAX, &s); 199 if (n == (size_t) - 1) { 200 printf("%s: '\\x%x': encoding error\n", locname, c); 201 ++res; 202 } else if (n == (size_t) - 2) { 203 printf("%s: '\\x%x': incomplete character\n", locname, c); 204 ++res; 205 } else if (n == 0 && c != 0) { 206 printf("%s: '\\x%x': 0 returned\n", locname, c); 207 ++res; 208 } else if (n != 0 && c == 0) { 209 printf("%s: '\\x%x': not 0 returned\n", locname, c); 210 ++res; 211 } else if (c != 0 && n != 1) { 212 printf("%s: '\\x%x': not 1 returned\n", locname, c); 213 ++res; 214 } else if (wc != (wchar_t) c) { 215 printf("%s: '\\x%x': wc != L'\\x%x'\n", locname, c, c); 216 ++res; 217 } 218 } 219 220 printf(res == 1 ? "%d error\n" : "%d errors\n", res); 221 222 return res != 0; 223 } 224