xref: /haiku/src/tests/system/libroot/posix/gnulib-test-mbrtowc.c (revision 323b65468e5836bb27a5e373b14027d902349437)
1 /* Test of conversion of multibyte character to wide character.
2    Copyright (C) 2008-2011 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16 
17 /* Written by Bruno Haible <bruno@clisp.org>, 2008.  */
18 
19 #undef NDEBUG
20 #include <assert.h>
21 #include <locale.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <wchar.h>
25 
26 #include <Debug.h>
27 
28 int
29 main (int argc, char *argv[])
30 {
31   mbstate_t state;
32   wchar_t wc;
33   size_t ret;
34   int i;
35 
36   /* configure should already have checked that the locale is supported.  */
37   if (setlocale (LC_ALL, "") == NULL) {
38 	fprintf(stderr, "unable to set standard locale\n");
39     return 1;
40   }
41 
42   /* Test zero-length input.  */
43   printf("zero-length input ...\n");
44   {
45     memset (&state, '\0', sizeof (mbstate_t));
46     wc = (wchar_t) 0xBADFACE;
47     ret = mbrtowc (&wc, "x", 0, &state);
48     /* gnulib's implementation returns (size_t)(-2).
49        The AIX 5.1 implementation returns (size_t)(-1).
50        glibc's implementation returns 0.  */
51     assert (ret == (size_t)(-2) || ret == (size_t)(-1) || ret == 0);
52     assert (mbsinit (&state));
53   }
54 
55   /* Test NUL byte input.  */
56   printf("NUL byte input ...\n");
57   {
58     memset (&state, '\0', sizeof (mbstate_t));
59     wc = (wchar_t) 0xBADFACE;
60     ret = mbrtowc (&wc, "", 1, &state);
61     assert (ret == 0);
62     assert (wc == 0);
63     assert (mbsinit (&state));
64     ret = mbrtowc (NULL, "", 1, &state);
65     assert (ret == 0);
66     assert (mbsinit (&state));
67   }
68 
69   /* Test single-byte input.  */
70   printf("single-byte input ...\n");
71   {
72     char buf[1];
73     int c;
74 
75     memset (&state, '\0', sizeof (mbstate_t));
76     for (c = 0; c < 0x100; c++)
77       switch (c)
78         {
79         case '\t': case '\v': case '\f':
80         case ' ': case '!': case '"': case '#': case '%':
81         case '&': case '\'': case '(': case ')': case '*':
82         case '+': case ',': case '-': case '.': case '/':
83         case '0': case '1': case '2': case '3': case '4':
84         case '5': case '6': case '7': case '8': case '9':
85         case ':': case ';': case '<': case '=': case '>':
86         case '?':
87         case 'A': case 'B': case 'C': case 'D': case 'E':
88         case 'F': case 'G': case 'H': case 'I': case 'J':
89         case 'K': case 'L': case 'M': case 'N': case 'O':
90         case 'P': case 'Q': case 'R': case 'S': case 'T':
91         case 'U': case 'V': case 'W': case 'X': case 'Y':
92         case 'Z':
93         case '[': case '\\': case ']': case '^': case '_':
94         case 'a': case 'b': case 'c': case 'd': case 'e':
95         case 'f': case 'g': case 'h': case 'i': case 'j':
96         case 'k': case 'l': case 'm': case 'n': case 'o':
97         case 'p': case 'q': case 'r': case 's': case 't':
98         case 'u': case 'v': case 'w': case 'x': case 'y':
99         case 'z': case '{': case '|': case '}': case '~':
100           /* c is in the ISO C "basic character set".  */
101           buf[0] = c;
102           wc = (wchar_t) 0xBADFACE;
103           ret = mbrtowc (&wc, buf, 1, &state);
104           assert (ret == 1);
105           assert (wc == c);
106           assert (mbsinit (&state));
107           ret = mbrtowc (NULL, buf, 1, &state);
108           assert (ret == 1);
109           assert (mbsinit (&state));
110           break;
111         }
112   }
113 
114   /* Test special calling convention, passing a NULL pointer.  */
115   printf("special calling convention, passing NULL ...\n");
116   {
117     memset (&state, '\0', sizeof (mbstate_t));
118     wc = (wchar_t) 0xBADFACE;
119     ret = mbrtowc (&wc, NULL, 5, &state);
120     assert (ret == 0);
121     assert (wc == (wchar_t) 0xBADFACE);
122     assert (mbsinit (&state));
123   }
124 
125   for (i = '1'; i <= '4'; ++i) {
126     switch (i)
127       {
128       case '1':
129         /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
130     	printf("ISO8859-1 ...\n");
131         {
132           char input[] = "B\374\337er"; /* "Büßer" */
133           memset (&state, '\0', sizeof (mbstate_t));
134 
135        	  if (setlocale (LC_ALL, "en_US.ISO8859-1") == NULL) {
136        		  fprintf(stderr, "unable to set ISO8859-1 locale, skipping\n");
137        		  break;
138        	  }
139 
140           wc = (wchar_t) 0xBADFACE;
141           ret = mbrtowc (&wc, input, 1, &state);
142           assert (ret == 1);
143           assert (wc == 'B');
144           assert (mbsinit (&state));
145           input[0] = '\0';
146 
147           wc = (wchar_t) 0xBADFACE;
148           ret = mbrtowc (&wc, input + 1, 1, &state);
149           assert (ret == 1);
150           assert (wctob (wc) == (unsigned char) '\374');
151           assert (mbsinit (&state));
152           input[1] = '\0';
153 
154           /* Test support of NULL first argument.  */
155           ret = mbrtowc (NULL, input + 2, 3, &state);
156           assert (ret == 1);
157           assert (mbsinit (&state));
158 
159           wc = (wchar_t) 0xBADFACE;
160           ret = mbrtowc (&wc, input + 2, 3, &state);
161           assert (ret == 1);
162           assert (wctob (wc) == (unsigned char) '\337');
163           assert (mbsinit (&state));
164           input[2] = '\0';
165 
166           wc = (wchar_t) 0xBADFACE;
167           ret = mbrtowc (&wc, input + 3, 2, &state);
168           assert (ret == 1);
169           assert (wc == 'e');
170           assert (mbsinit (&state));
171           input[3] = '\0';
172 
173           wc = (wchar_t) 0xBADFACE;
174           ret = mbrtowc (&wc, input + 4, 1, &state);
175           assert (ret == 1);
176           assert (wc == 'r');
177           assert (mbsinit (&state));
178         }
179         break;
180 
181       case '2':
182         /* Locale encoding is UTF-8.  */
183       	printf("UTF-8 ...\n");
184         {
185           char input[] = "B\303\274\303\237er"; /* "Büßer" */
186           memset (&state, '\0', sizeof (mbstate_t));
187 
188 		  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) {
189 			  fprintf(stderr, "unable to set UTF-8 locale, skipping\n");
190 			  break;
191 		  }
192 
193           wc = (wchar_t) 0xBADFACE;
194           ret = mbrtowc (&wc, input, 1, &state);
195           assert (ret == 1);
196           assert (wc == 'B');
197           assert (mbsinit (&state));
198           input[0] = '\0';
199 
200           wc = (wchar_t) 0xBADFACE;
201           ret = mbrtowc (&wc, input + 1, 1, &state);
202           assert (ret == (size_t)(-2));
203           assert (wc == (wchar_t) 0xBADFACE);
204           assert (!mbsinit (&state));
205           input[1] = '\0';
206 
207           wc = (wchar_t) 0xBADFACE;
208           ret = mbrtowc (&wc, input + 2, 5, &state);
209           assert (ret == 1);
210           assert (wctob (wc) == EOF);
211           assert (mbsinit (&state));
212           input[2] = '\0';
213 
214           /* Test support of NULL first argument.  */
215           ret = mbrtowc (NULL, input + 3, 4, &state);
216           assert (ret == 2);
217           assert (mbsinit (&state));
218 
219           wc = (wchar_t) 0xBADFACE;
220           ret = mbrtowc (&wc, input + 3, 4, &state);
221           assert (ret == 2);
222           assert (wctob (wc) == EOF);
223           assert (mbsinit (&state));
224           input[3] = '\0';
225           input[4] = '\0';
226 
227           wc = (wchar_t) 0xBADFACE;
228           ret = mbrtowc (&wc, input + 5, 2, &state);
229           assert (ret == 1);
230           assert (wc == 'e');
231           assert (mbsinit (&state));
232           input[5] = '\0';
233 
234           wc = (wchar_t) 0xBADFACE;
235           ret = mbrtowc (&wc, input + 6, 1, &state);
236           assert (ret == 1);
237           assert (wc == 'r');
238           assert (mbsinit (&state));
239         }
240         break;
241 
242       case '3':
243         /* Locale encoding is EUC-JP.  */
244        	printf("EUC-JP ...\n");
245         {
246           char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */
247           memset (&state, '\0', sizeof (mbstate_t));
248 
249 		  if (setlocale (LC_ALL, "en_US.EUC-JP") == NULL) {
250 			  fprintf(stderr, "unable to set EUC-JP locale, skipping\n");
251 			  break;
252 		  }
253 
254           wc = (wchar_t) 0xBADFACE;
255           ret = mbrtowc (&wc, input, 1, &state);
256           assert (ret == 1);
257           assert (wc == '<');
258           assert (mbsinit (&state));
259           input[0] = '\0';
260 
261           wc = (wchar_t) 0xBADFACE;
262           ret = mbrtowc (&wc, input + 1, 2, &state);
263           assert (ret == 2);
264           assert (wctob (wc) == EOF);
265           assert (mbsinit (&state));
266           input[1] = '\0';
267           input[2] = '\0';
268 
269           wc = (wchar_t) 0xBADFACE;
270           ret = mbrtowc (&wc, input + 3, 1, &state);
271           assert (ret == (size_t)(-2));
272           assert (wc == (wchar_t) 0xBADFACE);
273           assert (!mbsinit (&state));
274           input[3] = '\0';
275 
276           wc = (wchar_t) 0xBADFACE;
277           ret = mbrtowc (&wc, input + 4, 4, &state);
278           assert (ret == 1);
279           assert (wctob (wc) == EOF);
280           assert (mbsinit (&state));
281           input[4] = '\0';
282 
283           /* Test support of NULL first argument.  */
284           ret = mbrtowc (NULL, input + 5, 3, &state);
285           assert (ret == 2);
286           assert (mbsinit (&state));
287 
288           wc = (wchar_t) 0xBADFACE;
289           ret = mbrtowc (&wc, input + 5, 3, &state);
290           assert (ret == 2);
291           assert (wctob (wc) == EOF);
292           assert (mbsinit (&state));
293           input[5] = '\0';
294           input[6] = '\0';
295 
296           wc = (wchar_t) 0xBADFACE;
297           ret = mbrtowc (&wc, input + 7, 1, &state);
298           assert (ret == 1);
299           assert (wc == '>');
300           assert (mbsinit (&state));
301         }
302         break;
303 
304       case '4':
305         /* Locale encoding is GB18030.  */
306        	printf("GB18030 ...\n");
307         {
308           char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
309           memset (&state, '\0', sizeof (mbstate_t));
310 
311 		  if (setlocale (LC_ALL, "en_US.GB18030") == NULL) {
312 			  fprintf(stderr, "unable to set GB18030 locale, skipping\n");
313 			  break;
314 		  }
315 
316           wc = (wchar_t) 0xBADFACE;
317           ret = mbrtowc (&wc, input, 1, &state);
318           assert (ret == 1);
319           assert (wc == 'B');
320           assert (mbsinit (&state));
321           input[0] = '\0';
322 
323           wc = (wchar_t) 0xBADFACE;
324           ret = mbrtowc (&wc, input + 1, 1, &state);
325           assert (ret == (size_t)(-2));
326           assert (wc == (wchar_t) 0xBADFACE);
327           assert (!mbsinit (&state));
328           input[1] = '\0';
329 
330           wc = (wchar_t) 0xBADFACE;
331           ret = mbrtowc (&wc, input + 2, 7, &state);
332           assert (ret == 1);
333           assert (wctob (wc) == EOF);
334           assert (mbsinit (&state));
335           input[2] = '\0';
336 
337           /* Test support of NULL first argument.  */
338           ret = mbrtowc (NULL, input + 3, 6, &state);
339           assert (ret == 4);
340           assert (mbsinit (&state));
341 
342           wc = (wchar_t) 0xBADFACE;
343           ret = mbrtowc (&wc, input + 3, 6, &state);
344           assert (ret == 4);
345           assert (wctob (wc) == EOF);
346           assert (mbsinit (&state));
347           input[3] = '\0';
348           input[4] = '\0';
349           input[5] = '\0';
350           input[6] = '\0';
351 
352           wc = (wchar_t) 0xBADFACE;
353           ret = mbrtowc (&wc, input + 7, 2, &state);
354           assert (ret == 1);
355           assert (wc == 'e');
356           assert (mbsinit (&state));
357           input[5] = '\0';
358 
359           wc = (wchar_t) 0xBADFACE;
360           ret = mbrtowc (&wc, input + 8, 1, &state);
361           assert (ret == 1);
362           assert (wc == 'r');
363           assert (mbsinit (&state));
364         }
365         break;
366       }
367   }
368 
369   return 0;
370 }
371