xref: /haiku/src/tests/system/libroot/posix/gnulib-test-mbrtowc.c (revision fc7456e9b1ec38c941134ed6d01c438cf289381e)
1 /* Test of conversion of multibyte character to wide character.
2    Copyright (C) 2008-2011 Free Software Foundation, Inc.
3 
4    This program is free software: you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 3 of the License, or
7    (at your option) any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
16 
17 /* Written by Bruno Haible <bruno@clisp.org>, 2008.  */
18 
19 #undef NDEBUG
20 #include <assert.h>
21 #include <locale.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <wchar.h>
25 
26 #include <Debug.h>
27 
28 int
29 main (int argc, char *argv[])
30 {
31   mbstate_t state;
32   wchar_t wc;
33   size_t ret;
34   int i;
35 
36   /* configure should already have checked that the locale is supported.  */
37   if (setlocale (LC_ALL, "") == NULL) {
38 	fprintf(stderr, "unable to set standard locale\n");
39     return 1;
40   }
41 
42   /* Test zero-length input.  */
43   printf("zero-length input ...\n");
44   {
45     memset (&state, '\0', sizeof (mbstate_t));
46     wc = (wchar_t) 0xBADFACE;
47     ret = mbrtowc (&wc, "x", 0, &state);
48     /* gnulib's implementation returns (size_t)(-2).
49        The AIX 5.1 implementation returns (size_t)(-1).
50        glibc's implementation returns 0.  */
51     assert (ret == (size_t)(-2) || ret == (size_t)(-1) || ret == 0);
52     assert (mbsinit (&state));
53   }
54 
55   /* Test NUL byte input.  */
56   printf("NUL byte input ...\n");
57   {
58     memset (&state, '\0', sizeof (mbstate_t));
59     wc = (wchar_t) 0xBADFACE;
60     ret = mbrtowc (&wc, "", 1, &state);
61     assert (ret == 0);
62     assert (wc == 0);
63     assert (mbsinit (&state));
64     ret = mbrtowc (NULL, "", 1, &state);
65     assert (ret == 0);
66     assert (mbsinit (&state));
67   }
68 
69   /* Test single-byte input.  */
70   printf("single-byte input ...\n");
71   {
72     char buf[1];
73     int c;
74 
75     memset (&state, '\0', sizeof (mbstate_t));
76     for (c = 0; c < 0x100; c++)
77       switch (c)
78         {
79         case '\t': case '\v': case '\f':
80         case ' ': case '!': case '"': case '#': case '%':
81         case '&': case '\'': case '(': case ')': case '*':
82         case '+': case ',': case '-': case '.': case '/':
83         case '0': case '1': case '2': case '3': case '4':
84         case '5': case '6': case '7': case '8': case '9':
85         case ':': case ';': case '<': case '=': case '>':
86         case '?':
87         case 'A': case 'B': case 'C': case 'D': case 'E':
88         case 'F': case 'G': case 'H': case 'I': case 'J':
89         case 'K': case 'L': case 'M': case 'N': case 'O':
90         case 'P': case 'Q': case 'R': case 'S': case 'T':
91         case 'U': case 'V': case 'W': case 'X': case 'Y':
92         case 'Z':
93         case '[': case '\\': case ']': case '^': case '_':
94         case 'a': case 'b': case 'c': case 'd': case 'e':
95         case 'f': case 'g': case 'h': case 'i': case 'j':
96         case 'k': case 'l': case 'm': case 'n': case 'o':
97         case 'p': case 'q': case 'r': case 's': case 't':
98         case 'u': case 'v': case 'w': case 'x': case 'y':
99         case 'z': case '{': case '|': case '}': case '~':
100           /* c is in the ISO C "basic character set".  */
101           buf[0] = c;
102           wc = (wchar_t) 0xBADFACE;
103           ret = mbrtowc (&wc, buf, 1, &state);
104           assert (ret == 1);
105           assert (wc == c);
106           assert (mbsinit (&state));
107           ret = mbrtowc (NULL, buf, 1, &state);
108           assert (ret == 1);
109           assert (mbsinit (&state));
110           break;
111         }
112   }
113 
114   /* Test special calling convention, passing a NULL pointer.  */
115   printf("special calling convention, passing NULL ...\n");
116   {
117     memset (&state, '\0', sizeof (mbstate_t));
118     wc = (wchar_t) 0xBADFACE;
119     ret = mbrtowc (&wc, NULL, 5, &state);
120     assert (ret == 0);
121     assert (wc == (wchar_t) 0xBADFACE);
122     assert (mbsinit (&state));
123   }
124 
125   /* Check a 4-bytes character.  */
126   {
127     memset (&state, '\0', sizeof (mbstate_t));
128 
129     wc = (wchar_t) 0xBADFACE;
130     ret = mbrtowc (&wc, "\360", 1, &state);
131     assert (ret == (size_t)(-2));
132     assert (wc == (wchar_t) 0xBADFACE);
133     assert (!mbsinit (&state));
134 
135     wc = (wchar_t) 0xBADFACE;
136     ret = mbrtowc (&wc, "\237", 1, &state);
137     assert (ret == (size_t)(-2));
138     assert (wc == (wchar_t) 0xBADFACE);
139     assert (!mbsinit (&state));
140 
141     wc = (wchar_t) 0xBADFACE;
142     ret = mbrtowc (&wc, "\220", 1, &state);
143     assert (ret == (size_t)(-2));
144     assert (wc == (wchar_t) 0xBADFACE);
145     assert (!mbsinit (&state));
146 
147     wc = (wchar_t) 0xBADFACE;
148     ret = mbrtowc (&wc, "\203", 1, &state);
149     assert (ret == 1);
150     assert (wc == (wchar_t) 0x1F403);
151     assert (mbsinit (&state));
152   }
153 
154   for (i = '1'; i <= '4'; ++i) {
155     switch (i)
156       {
157       case '1':
158         /* Locale encoding is ISO-8859-1 or ISO-8859-15.  */
159     	printf("ISO8859-1 ...\n");
160         {
161           char input[] = "B\374\337er"; /* "Büßer" */
162           memset (&state, '\0', sizeof (mbstate_t));
163 
164        	  if (setlocale (LC_ALL, "en_US.ISO8859-1") == NULL) {
165        		  fprintf(stderr, "unable to set ISO8859-1 locale, skipping\n");
166        		  break;
167        	  }
168 
169           wc = (wchar_t) 0xBADFACE;
170           ret = mbrtowc (&wc, input, 1, &state);
171           assert (ret == 1);
172           assert (wc == 'B');
173           assert (mbsinit (&state));
174           input[0] = '\0';
175 
176           wc = (wchar_t) 0xBADFACE;
177           ret = mbrtowc (&wc, input + 1, 1, &state);
178           assert (ret == 1);
179           assert (wctob (wc) == (unsigned char) '\374');
180           assert (mbsinit (&state));
181           input[1] = '\0';
182 
183           /* Test support of NULL first argument.  */
184           ret = mbrtowc (NULL, input + 2, 3, &state);
185           assert (ret == 1);
186           assert (mbsinit (&state));
187 
188           wc = (wchar_t) 0xBADFACE;
189           ret = mbrtowc (&wc, input + 2, 3, &state);
190           assert (ret == 1);
191           assert (wctob (wc) == (unsigned char) '\337');
192           assert (mbsinit (&state));
193           input[2] = '\0';
194 
195           wc = (wchar_t) 0xBADFACE;
196           ret = mbrtowc (&wc, input + 3, 2, &state);
197           assert (ret == 1);
198           assert (wc == 'e');
199           assert (mbsinit (&state));
200           input[3] = '\0';
201 
202           wc = (wchar_t) 0xBADFACE;
203           ret = mbrtowc (&wc, input + 4, 1, &state);
204           assert (ret == 1);
205           assert (wc == 'r');
206           assert (mbsinit (&state));
207         }
208         break;
209 
210       case '2':
211         /* Locale encoding is UTF-8.  */
212       	printf("UTF-8 ...\n");
213         {
214           char input[] = "B\303\274\303\237er"; /* "Büßer" */
215           memset (&state, '\0', sizeof (mbstate_t));
216 
217 		  if (setlocale (LC_ALL, "en_US.UTF-8") == NULL) {
218 			  fprintf(stderr, "unable to set UTF-8 locale, skipping\n");
219 			  break;
220 		  }
221 
222           wc = (wchar_t) 0xBADFACE;
223           ret = mbrtowc (&wc, input, 1, &state);
224           assert (ret == 1);
225           assert (wc == 'B');
226           assert (mbsinit (&state));
227           input[0] = '\0';
228 
229           wc = (wchar_t) 0xBADFACE;
230           ret = mbrtowc (&wc, input + 1, 1, &state);
231           assert (ret == (size_t)(-2));
232           assert (wc == (wchar_t) 0xBADFACE);
233           assert (!mbsinit (&state));
234           input[1] = '\0';
235 
236           wc = (wchar_t) 0xBADFACE;
237           ret = mbrtowc (&wc, input + 2, 5, &state);
238           assert (ret == 1);
239           assert (wctob (wc) == EOF);
240           assert (mbsinit (&state));
241           input[2] = '\0';
242 
243           /* Test support of NULL first argument.  */
244           ret = mbrtowc (NULL, input + 3, 4, &state);
245           assert (ret == 2);
246           assert (mbsinit (&state));
247 
248           wc = (wchar_t) 0xBADFACE;
249           ret = mbrtowc (&wc, input + 3, 4, &state);
250           assert (ret == 2);
251           assert (wctob (wc) == EOF);
252           assert (mbsinit (&state));
253           input[3] = '\0';
254           input[4] = '\0';
255 
256           wc = (wchar_t) 0xBADFACE;
257           ret = mbrtowc (&wc, input + 5, 2, &state);
258           assert (ret == 1);
259           assert (wc == 'e');
260           assert (mbsinit (&state));
261           input[5] = '\0';
262 
263           wc = (wchar_t) 0xBADFACE;
264           ret = mbrtowc (&wc, input + 6, 1, &state);
265           assert (ret == 1);
266           assert (wc == 'r');
267           assert (mbsinit (&state));
268 
269           /* reproduce a valid use case from readline (as used in our bash): */
270           {
271 	        char tooShort[] = "\303";
272 	        char ok[] = "\303\274";
273         	/* make a backup of the state */
274         	mbstate_t stateBackup = state;
275         	/* try with a source that's too short */
276             ret = mbrtowc (&wc, tooShort, 1, &state);
277             assert (ret == (size_t)-2);
278             /* restore the state from the backup */
279             state = stateBackup;
280 			/* retry with enough source */
281             ret = mbrtowc (&wc, ok, 2, &state);
282             assert (ret == 2);
283           }
284         }
285         break;
286 
287       case '3':
288         /* Locale encoding is EUC-JP.  */
289        	printf("EUC-JP ...\n");
290         {
291           char input[] = "<\306\374\313\334\270\354>"; /* "<日本語>" */
292           memset (&state, '\0', sizeof (mbstate_t));
293 
294 		  if (setlocale (LC_ALL, "en_US.EUC-JP") == NULL) {
295 			  fprintf(stderr, "unable to set EUC-JP locale, skipping\n");
296 			  break;
297 		  }
298 
299           wc = (wchar_t) 0xBADFACE;
300           ret = mbrtowc (&wc, input, 1, &state);
301           assert (ret == 1);
302           assert (wc == '<');
303           assert (mbsinit (&state));
304           input[0] = '\0';
305 
306           wc = (wchar_t) 0xBADFACE;
307           ret = mbrtowc (&wc, input + 1, 2, &state);
308           assert (ret == 2);
309           assert (wctob (wc) == EOF);
310           assert (mbsinit (&state));
311           input[1] = '\0';
312           input[2] = '\0';
313 
314           wc = (wchar_t) 0xBADFACE;
315           ret = mbrtowc (&wc, input + 3, 1, &state);
316           assert (ret == (size_t)(-2));
317           assert (wc == (wchar_t) 0xBADFACE);
318           assert (!mbsinit (&state));
319           input[3] = '\0';
320 
321           wc = (wchar_t) 0xBADFACE;
322           ret = mbrtowc (&wc, input + 4, 4, &state);
323           assert (ret == 1);
324           assert (wctob (wc) == EOF);
325           assert (mbsinit (&state));
326           input[4] = '\0';
327 
328           /* Test support of NULL first argument.  */
329           ret = mbrtowc (NULL, input + 5, 3, &state);
330           assert (ret == 2);
331           assert (mbsinit (&state));
332 
333           wc = (wchar_t) 0xBADFACE;
334           ret = mbrtowc (&wc, input + 5, 3, &state);
335           assert (ret == 2);
336           assert (wctob (wc) == EOF);
337           assert (mbsinit (&state));
338           input[5] = '\0';
339           input[6] = '\0';
340 
341           wc = (wchar_t) 0xBADFACE;
342           ret = mbrtowc (&wc, input + 7, 1, &state);
343           assert (ret == 1);
344           assert (wc == '>');
345           assert (mbsinit (&state));
346         }
347         break;
348 
349       case '4':
350         /* Locale encoding is GB18030.  */
351        	printf("GB18030 ...\n");
352         {
353           char input[] = "B\250\271\201\060\211\070er"; /* "Büßer" */
354           memset (&state, '\0', sizeof (mbstate_t));
355 
356 		  if (setlocale (LC_ALL, "en_US.GB18030") == NULL) {
357 			  fprintf(stderr, "unable to set GB18030 locale, skipping\n");
358 			  break;
359 		  }
360 
361           wc = (wchar_t) 0xBADFACE;
362           ret = mbrtowc (&wc, input, 1, &state);
363           assert (ret == 1);
364           assert (wc == 'B');
365           assert (mbsinit (&state));
366           input[0] = '\0';
367 
368           wc = (wchar_t) 0xBADFACE;
369           ret = mbrtowc (&wc, input + 1, 1, &state);
370           assert (ret == (size_t)(-2));
371           assert (wc == (wchar_t) 0xBADFACE);
372           assert (!mbsinit (&state));
373           input[1] = '\0';
374 
375           wc = (wchar_t) 0xBADFACE;
376           ret = mbrtowc (&wc, input + 2, 7, &state);
377           assert (ret == 1);
378           assert (wctob (wc) == EOF);
379           assert (mbsinit (&state));
380           input[2] = '\0';
381 
382           /* Test support of NULL first argument.  */
383           ret = mbrtowc (NULL, input + 3, 6, &state);
384           assert (ret == 4);
385           assert (mbsinit (&state));
386 
387           wc = (wchar_t) 0xBADFACE;
388           ret = mbrtowc (&wc, input + 3, 6, &state);
389           assert (ret == 4);
390           assert (wctob (wc) == EOF);
391           assert (mbsinit (&state));
392           input[3] = '\0';
393           input[4] = '\0';
394           input[5] = '\0';
395           input[6] = '\0';
396 
397           wc = (wchar_t) 0xBADFACE;
398           ret = mbrtowc (&wc, input + 7, 2, &state);
399           assert (ret == 1);
400           assert (wc == 'e');
401           assert (mbsinit (&state));
402           input[5] = '\0';
403 
404           wc = (wchar_t) 0xBADFACE;
405           ret = mbrtowc (&wc, input + 8, 1, &state);
406           assert (ret == 1);
407           assert (wc == 'r');
408           assert (mbsinit (&state));
409         }
410         break;
411       }
412   }
413 
414   return 0;
415 }
416