xref: /haiku/src/tests/system/libroot/posix/tst-mbrtowc.c (revision a84e14ca84d32e9469c91372d71556488bd3d48b)
1 /* Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
2  This file is part of the GNU C Library.
3  Contributed by Ulrich Drepper <drepper@redhat.com>, 2000.
4 
5  The GNU C Library is free software; you can redistribute it and/or
6  modify it under the terms of the GNU Lesser General Public
7  License as published by the Free Software Foundation; either
8  version 2.1 of the License, or (at your option) any later version.
9 
10  The GNU C Library is distributed in the hope that it will be useful,
11  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  Lesser General Public License for more details.
14 
15  You should have received a copy of the GNU Lesser General Public
16  License along with the GNU C Library; if not, write to the Free
17  Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18  02111-1307 USA.  */
19 
20 /* We always want assert to be fully defined.  */
21 #undef NDEBUG
22 #include <assert.h>
23 #include <locale.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <wchar.h>
28 
29 static int check_ascii(const char *locname);
30 
31 
32 /* UTF-8 single byte feeding test for mbrtowc(),
33  contributed by Markus Kuhn <mkuhn@acm.org>.  */
34 static int
utf8_test_1(void)35 utf8_test_1(void)
36 {
37 	wchar_t wc;
38 	mbstate_t s;
39 
40 	const char str[] = "\xe0\xa0\x80";
41 
42 	wc = 42; /* arbitrary number */
43 	memset(&s, 0, sizeof(s)); /* get s into initial state */
44 	assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
45 	assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
46 	assert (wc == 42); /* no value has not been stored into &wc yet */
47 	assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */
48 	assert (wc == 0x2260); /* E2 89 A0 = U+2260 (not equal) decoded correctly */
49 	assert (mbrtowc (&wc, "", 1, &s) == 0); /* test final byte processing */
50 	assert (wc == 0); /* test final byte decoding */
51 
52 	/* The following test is by Al Viro <aviro@redhat.com>.  */
53 	wc = 42; /* arbitrary number */
54 	memset(&s, 0, sizeof(s)); /* get s into initial state */
55 	assert (mbrtowc (&wc, str, 1, &s) == (size_t)-2);
56 	assert (mbrtowc (&wc, str + 1, 2, &s) == 2);
57 	assert (wc == 0x800);
58 
59 	wc = 42; /* arbitrary number */
60 	memset(&s, 0, sizeof(s)); /* get s into initial state */
61 	assert (mbrtowc (&wc, str, 3, &s) == 3);
62 	assert (wc == 0x800);
63 
64 	return 0;
65 }
66 
67 
68 /* Test for NUL byte processing via empty string.  */
69 static int
utf8_test_2(void)70 utf8_test_2(void)
71 {
72 	wchar_t wc;
73 	mbstate_t s;
74 
75 	wc = 42; /* arbitrary number */
76 	memset(&s, 0, sizeof(s)); /* get s into initial state */
77 	assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */
78 	assert (mbsinit (&s));
79 
80 	wc = 42; /* arbitrary number */
81 	memset(&s, 0, sizeof(s)); /* get s into initial state */
82 	assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
83 	assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */
84 
85 	wc = 42; /* arbitrary number */
86 	memset(&s, 0, sizeof(s)); /* get s into initial state */
87 	assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
88 	assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
89 	assert (mbrtowc (NULL, "", 1, &s) == (size_t) -1); /* invalid terminator */
90 
91 	wc = 42; /* arbitrary number */
92 	memset(&s, 0, sizeof(s)); /* get s into initial state */
93 	assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
94 	assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
95 	assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */
96 	assert (mbrtowc (NULL, "", 1, &s) == 0); /* valid terminator */
97 	assert (mbsinit (&s));
98 
99 	return 0;
100 }
101 
102 
103 /* Test for NUL byte processing via NULL string.  */
104 static int
utf8_test_3(void)105 utf8_test_3(void)
106 {
107 	wchar_t wc;
108 	mbstate_t s;
109 
110 	wc = 42; /* arbitrary number */
111 	memset(&s, 0, sizeof(s)); /* get s into initial state */
112 	assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */
113 	assert (mbsinit (&s));
114 
115 	wc = 42; /* arbitrary number */
116 	memset(&s, 0, sizeof(s)); /* get s into initial state */
117 	assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
118 	assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */
119 
120 	wc = 42; /* arbitrary number */
121 	memset(&s, 0, sizeof(s)); /* get s into initial state */
122 	assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
123 	assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
124 	assert (mbrtowc (NULL, NULL, 0, &s) == (size_t) -1); /* invalid terminator */
125 
126 	wc = 42; /* arbitrary number */
127 	memset(&s, 0, sizeof(s)); /* get s into initial state */
128 	assert (mbrtowc (&wc, "\xE2", 1, &s) == (size_t) -2); /* 1st byte processed */
129 	assert (mbrtowc (&wc, "\x89", 1, &s) == (size_t) -2); /* 2nd byte processed */
130 	assert (mbrtowc (&wc, "\xA0", 1, &s) == 1); /* 3nd byte processed */
131 	assert (mbrtowc (NULL, NULL, 0, &s) == 0); /* valid terminator */
132 	assert (mbsinit (&s));
133 
134 	return 0;
135 }
136 
137 
138 static int
utf8_test(void)139 utf8_test(void)
140 {
141 	const char *locale = "de_DE.UTF-8";
142 	int error = 0;
143 
144 	if (!setlocale(LC_CTYPE, locale)) {
145 		fprintf(stderr, "locale '%s' not available!\n", locale);
146 		exit(1);
147 	}
148 
149 	error |= utf8_test_1();
150 	error |= utf8_test_2();
151 	error |= utf8_test_3();
152 
153 	return error;
154 }
155 
156 
157 int
main(void)158 main(void)
159 {
160 	int result = 0;
161 
162 	/* Check mapping of ASCII range for some character sets which have
163 	 ASCII as a subset.  For those the wide char generated must have
164 	 the same value.  */
165 	setlocale(LC_ALL, "C");
166 	result |= check_ascii(setlocale(LC_ALL, NULL));
167 
168 	setlocale(LC_ALL, "de_DE.UTF-8");
169 	result |= check_ascii(setlocale(LC_ALL, NULL));
170 	result |= utf8_test();
171 
172 	setlocale(LC_ALL, "ja_JP.EUC-JP");
173 	result |= check_ascii(setlocale(LC_ALL, NULL));
174 
175 	return result;
176 }
177 
178 
179 static int
check_ascii(const char * locname)180 check_ascii(const char *locname)
181 {
182 	int c;
183 	int res = 0;
184 
185 	printf("Testing locale \"%s\":\n", locname);
186 
187 	for (c = 0; c <= 127; ++c) {
188 		char buf[MB_CUR_MAX];
189 		wchar_t wc = (wchar_t) 0xffffffff;
190 		mbstate_t s;
191 		size_t n, i;
192 
193 		for (i = 0; i < MB_CUR_MAX; ++i)
194 			buf[i] = c + i;
195 
196 		memset(&s, '\0', sizeof(s));
197 
198 		n = mbrtowc(&wc, buf, MB_CUR_MAX, &s);
199 		if (n == (size_t) - 1) {
200 			printf("%s: '\\x%x': encoding error\n", locname, c);
201 			++res;
202 		} else if (n == (size_t) - 2) {
203 			printf("%s: '\\x%x': incomplete character\n", locname, c);
204 			++res;
205 		} else if (n == 0 && c != 0) {
206 			printf("%s: '\\x%x': 0 returned\n", locname, c);
207 			++res;
208 		} else if (n != 0 && c == 0) {
209 			printf("%s: '\\x%x': not 0 returned\n", locname, c);
210 			++res;
211 		} else if (c != 0 && n != 1) {
212 			printf("%s: '\\x%x': not 1 returned\n", locname, c);
213 			++res;
214 		} else if (wc != (wchar_t) c) {
215 			printf("%s: '\\x%x' != wc != L'\\x%x'\n", locname, c, wc);
216 			++res;
217 		}
218 	}
219 
220 	printf(res == 1 ? "%d error\n" : "%d errors\n", res);
221 
222 	return res != 0;
223 }
224