xref: /haiku/src/tests/system/libroot/posix/gnulib-test-mbrtowc.c (revision e81a954787e50e56a7f06f72705b7859b6ab06d1)
1 /* Test of conversion of multibyte character to wide character.
2    Copyright (C) 2008-2011 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16 
17 /* Written by Bruno Haible <bruno@clisp.org>, 2008.  */
18 
19 #undef NDEBUG
20 #include <assert.h>
21 #include <locale.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <wchar.h>
25 
26 #include <Debug.h>
27 
28 int
29 main (int argc, char *argv[])
30 {
31   mbstate_t state;
32   wchar_t wc;
33   size_t ret;
34   int i;
35 
36   /* configure should already have checked that the locale is supported.  */
37   if (setlocale (LC_ALL, "") == NULL) {
38 	fprintf(stderr, "unable to set standard locale\n");
39     return 1;
40   }
41 
42   /* Test zero-length input.  */
43   printf("zero-length input ...\n");
44   {
45     memset (&state, '\0', sizeof (mbstate_t));
46     wc = (wchar_t) 0xBADFACE;
47     ret = mbrtowc (&wc, "x", 0, &state);
48     /* gnulib's implementation returns (size_t)(-2).
49        The AIX 5.1 implementation returns (size_t)(-1).
50        glibc's implementation returns 0.  */
51     assert (ret == (size_t)(-2) || ret == (size_t)(-1) || ret == 0);
52     assert (mbsinit (&state));
53   }
54 
55   /* Test NUL byte input.  */
56   printf("NUL byte input ...\n");
57   {
58     memset (&state, '\0', sizeof (mbstate_t));
59     wc = (wchar_t) 0xBADFACE;
60     ret = mbrtowc (&wc, "", 1, &state);
61     assert (ret == 0);
62     assert (wc == 0);
63     assert (mbsinit (&state));
64     ret = mbrtowc (NULL, "", 1, &state);
65     assert (ret == 0);
66     assert (mbsinit (&state));
67   }
68 
69   /* Test single-byte input.  */
70   printf("single-byte input ...\n");
71   {
72     char buf[1];
73     int c;
74 
75     memset (&state, '\0', sizeof (mbstate_t));
76     for (c = 0; c < 0x100; c++)
77       switch (c)
78         {
79         case '\t': case '\v': case '\f':
80         case ' ': case '!': case '"': case '#': case '%':
81         case '&': case '\'': case '(': case ')': case '*':
82         case '+': case ',': case '-': case '.': case '/':
83         case '0': case '1': case '2': case '3': case '4':
84         case '5': case '6': case '7': case '8': case '9':
85         case ':': case ';': case '<': case '=': case '>':
86         case '?':
87         case 'A': case 'B': case 'C': case 'D': case 'E':
88         case 'F': case 'G': case 'H': case 'I': case 'J':
89         case 'K': case 'L': case 'M': case 'N': case 'O':
90         case 'P': case 'Q': case 'R': case 'S': case 'T':
91         case 'U': case 'V': case 'W': case 'X': case 'Y':
92         case 'Z':
93         case '[': case '\\': case ']': case '^': case '_':
94         case 'a': case 'b': case 'c': case 'd': case 'e':
95         case 'f': case 'g': case 'h': case 'i': case 'j':
96         case 'k': case 'l': case 'm': case 'n': case 'o':
97         case 'p': case 'q': case 'r': case 's': case 't':
98         case 'u': case 'v': case 'w': case 'x': case 'y':
99         case 'z': case '{': case '|': case '}': case '~':
100           /* c is in the ISO C "basic character set".  */
101           buf[0] = c;
102           wc = (wchar_t) 0xBADFACE;
103           ret = mbrtowc (&wc, buf, 1, &state);
104           assert (ret == 1);
105           assert (wc == c);
106           assert (mbsinit (&state));
107           ret = mbrtowc (NULL, buf, 1, &state);
108           assert (ret == 1);
109           assert (mbsinit (&state));
110           break;
111         }
112   }
113 
114   /* Test special calling convention, passing a NULL pointer.  */
115   printf("special calling convention, passing NULL ...\n");
116   {
117     memset (&state, '\0', sizeof (mbstate_t));
118     wc = (wchar_t) 0xBADFACE;
119     ret = mbrtowc (&wc, NULL, 5, &state);
120     assert (ret == 0);
121     assert (wc == (wchar_t) 0xBADFACE);
122     assert (mbsinit (&state));
123   }
124 
125   for (i = '1'; i <= '4'; ++i) {
126     switch (i)
127       {
128       case '1':
129         /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
130     	printf("ISO8859-1 ...\n");
131         {
132           char input[] = "B\374\337er"; /* "Büßer" */
133           memset (&state, '\0', sizeof (mbstate_t));
134 
135        	  if (setlocale (LC_ALL, "en_US.ISO8859-1") == NULL) {
136        		  fprintf(stderr, "unable to set ISO8859-1 locale, skipping\n");
137        		  break;
138        	  }
139 
140           wc = (wchar_t) 0xBADFACE;
141           ret = mbrtowc (&wc, input, 1, &state);
142           assert (ret == 1);
143           assert (wc == 'B');
144           assert (mbsinit (&state));
145           input[0] = '\0';
146 
147           wc = (wchar_t) 0xBADFACE;
148           ret = mbrtowc (&wc, input + 1, 1, &state);
149           assert (ret == 1);
150           assert (wctob (wc) == (unsigned char) '\374');
151           assert (mbsinit (&state));
152           input[1] = '\0';
153 
154           /* Test support of NULL first argument.  */
155           ret = mbrtowc (NULL, input + 2, 3, &state);
156           assert (ret == 1);
157           assert (mbsinit (&state));
158 
159           wc = (wchar_t) 0xBADFACE;
160           ret = mbrtowc (&wc, input + 2, 3, &state);
161           assert (ret == 1);
162           assert (wctob (wc) == (unsigned char) '\337');
163           assert (mbsinit (&state));
164           input[2] = '\0';
165 
166           wc = (wchar_t) 0xBADFACE;
167           ret = mbrtowc (&wc, input + 3, 2, &state);
168           assert (ret == 1);
169           assert (wc == 'e');
170           assert (mbsinit (&state));
171           input[3] = '\0';
172 
173           wc = (wchar_t) 0xBADFACE;
174           ret = mbrtowc (&wc, input + 4, 1, &state);
175           assert (ret == 1);
176           assert (wc == 'r');
177           assert (mbsinit (&state));
178         }
179         break;
180 
181       case '2':
182         /* Locale encoding is UTF-8.  */
183       	printf("UTF-8 ...\n");
184         {
185           char input[] = "B\303\274\303\237er"; /* "Büßer" */
186           memset (&state, '\0', sizeof (mbstate_t));
187 
188 		  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) {
189 			  fprintf(stderr, "unable to set UTF-8 locale, skipping\n");
190 			  break;
191 		  }
192 
193           wc = (wchar_t) 0xBADFACE;
194           ret = mbrtowc (&wc, input, 1, &state);
195           assert (ret == 1);
196           assert (wc == 'B');
197           assert (mbsinit (&state));
198           input[0] = '\0';
199 
200           wc = (wchar_t) 0xBADFACE;
201           ret = mbrtowc (&wc, input + 1, 1, &state);
202           assert (ret == (size_t)(-2));
203           assert (wc == (wchar_t) 0xBADFACE);
204           assert (!mbsinit (&state));
205           input[1] = '\0';
206 
207           wc = (wchar_t) 0xBADFACE;
208           ret = mbrtowc (&wc, input + 2, 5, &state);
209           assert (ret == 1);
210           assert (wctob (wc) == EOF);
211           assert (mbsinit (&state));
212           input[2] = '\0';
213 
214           /* Test support of NULL first argument.  */
215           ret = mbrtowc (NULL, input + 3, 4, &state);
216           assert (ret == 2);
217           assert (mbsinit (&state));
218 
219           wc = (wchar_t) 0xBADFACE;
220           ret = mbrtowc (&wc, input + 3, 4, &state);
221           assert (ret == 2);
222           assert (wctob (wc) == EOF);
223           assert (mbsinit (&state));
224           input[3] = '\0';
225           input[4] = '\0';
226 
227           wc = (wchar_t) 0xBADFACE;
228           ret = mbrtowc (&wc, input + 5, 2, &state);
229           assert (ret == 1);
230           assert (wc == 'e');
231           assert (mbsinit (&state));
232           input[5] = '\0';
233 
234           wc = (wchar_t) 0xBADFACE;
235           ret = mbrtowc (&wc, input + 6, 1, &state);
236           assert (ret == 1);
237           assert (wc == 'r');
238           assert (mbsinit (&state));
239 
240           /* reproduce a valid use case from readline (as used in our bash): */
241           {
242 	        char tooShort[] = "\303";
243 	        char ok[] = "\303\274";
244         	/* make a backup of the state */
245         	mbstate_t stateBackup = state;
246         	/* try with a source that's too short */
247             ret = mbrtowc (&wc, tooShort, 1, &state);
248             assert (ret == (size_t)-2);
249             /* restore the state from the backup */
250             state = stateBackup;
251 			/* retry with enough source */
252             ret = mbrtowc (&wc, ok, 2, &state);
253             assert (ret == 2);
254           }
255         }
256         break;
257 
258       case '3':
259         /* Locale encoding is EUC-JP.  */
260        	printf("EUC-JP ...\n");
261         {
262           char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */
263           memset (&state, '\0', sizeof (mbstate_t));
264 
265 		  if (setlocale (LC_ALL, "en_US.EUC-JP") == NULL) {
266 			  fprintf(stderr, "unable to set EUC-JP locale, skipping\n");
267 			  break;
268 		  }
269 
270           wc = (wchar_t) 0xBADFACE;
271           ret = mbrtowc (&wc, input, 1, &state);
272           assert (ret == 1);
273           assert (wc == '<');
274           assert (mbsinit (&state));
275           input[0] = '\0';
276 
277           wc = (wchar_t) 0xBADFACE;
278           ret = mbrtowc (&wc, input + 1, 2, &state);
279           assert (ret == 2);
280           assert (wctob (wc) == EOF);
281           assert (mbsinit (&state));
282           input[1] = '\0';
283           input[2] = '\0';
284 
285           wc = (wchar_t) 0xBADFACE;
286           ret = mbrtowc (&wc, input + 3, 1, &state);
287           assert (ret == (size_t)(-2));
288           assert (wc == (wchar_t) 0xBADFACE);
289           assert (!mbsinit (&state));
290           input[3] = '\0';
291 
292           wc = (wchar_t) 0xBADFACE;
293           ret = mbrtowc (&wc, input + 4, 4, &state);
294           assert (ret == 1);
295           assert (wctob (wc) == EOF);
296           assert (mbsinit (&state));
297           input[4] = '\0';
298 
299           /* Test support of NULL first argument.  */
300           ret = mbrtowc (NULL, input + 5, 3, &state);
301           assert (ret == 2);
302           assert (mbsinit (&state));
303 
304           wc = (wchar_t) 0xBADFACE;
305           ret = mbrtowc (&wc, input + 5, 3, &state);
306           assert (ret == 2);
307           assert (wctob (wc) == EOF);
308           assert (mbsinit (&state));
309           input[5] = '\0';
310           input[6] = '\0';
311 
312           wc = (wchar_t) 0xBADFACE;
313           ret = mbrtowc (&wc, input + 7, 1, &state);
314           assert (ret == 1);
315           assert (wc == '>');
316           assert (mbsinit (&state));
317         }
318         break;
319 
320       case '4':
321         /* Locale encoding is GB18030.  */
322        	printf("GB18030 ...\n");
323         {
324           char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
325           memset (&state, '\0', sizeof (mbstate_t));
326 
327 		  if (setlocale (LC_ALL, "en_US.GB18030") == NULL) {
328 			  fprintf(stderr, "unable to set GB18030 locale, skipping\n");
329 			  break;
330 		  }
331 
332           wc = (wchar_t) 0xBADFACE;
333           ret = mbrtowc (&wc, input, 1, &state);
334           assert (ret == 1);
335           assert (wc == 'B');
336           assert (mbsinit (&state));
337           input[0] = '\0';
338 
339           wc = (wchar_t) 0xBADFACE;
340           ret = mbrtowc (&wc, input + 1, 1, &state);
341           assert (ret == (size_t)(-2));
342           assert (wc == (wchar_t) 0xBADFACE);
343           assert (!mbsinit (&state));
344           input[1] = '\0';
345 
346           wc = (wchar_t) 0xBADFACE;
347           ret = mbrtowc (&wc, input + 2, 7, &state);
348           assert (ret == 1);
349           assert (wctob (wc) == EOF);
350           assert (mbsinit (&state));
351           input[2] = '\0';
352 
353           /* Test support of NULL first argument.  */
354           ret = mbrtowc (NULL, input + 3, 6, &state);
355           assert (ret == 4);
356           assert (mbsinit (&state));
357 
358           wc = (wchar_t) 0xBADFACE;
359           ret = mbrtowc (&wc, input + 3, 6, &state);
360           assert (ret == 4);
361           assert (wctob (wc) == EOF);
362           assert (mbsinit (&state));
363           input[3] = '\0';
364           input[4] = '\0';
365           input[5] = '\0';
366           input[6] = '\0';
367 
368           wc = (wchar_t) 0xBADFACE;
369           ret = mbrtowc (&wc, input + 7, 2, &state);
370           assert (ret == 1);
371           assert (wc == 'e');
372           assert (mbsinit (&state));
373           input[5] = '\0';
374 
375           wc = (wchar_t) 0xBADFACE;
376           ret = mbrtowc (&wc, input + 8, 1, &state);
377           assert (ret == 1);
378           assert (wc == 'r');
379           assert (mbsinit (&state));
380         }
381         break;
382       }
383   }
384 
385   return 0;
386 }
387