xref: /haiku/src/add-ons/kernel/file_systems/ntfs/libntfs/unistr.c (revision 0490778ed1bf2e39eb9d9ea8d62a2b0ab378fecd)
1 /**
2  * unistr.c - Unicode string handling. Originated from the Linux-NTFS project.
3  *
4  * Copyright (c) 2000-2004 Anton Altaparmakov
5  * Copyright (c) 2002-2009 Szabolcs Szakacsits
6  * Copyright (c) 2008-2015 Jean-Pierre Andre
7  * Copyright (c) 2008      Bernhard Kaindl
8  *
9  * This program/include file is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU General Public License as published
11  * by the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program/include file is distributed in the hope that it will be
15  * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
16  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program (in the main directory of the NTFS-3G
21  * distribution in the file COPYING); if not, write to the Free Software
22  * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
23  */
24 
25 #ifdef HAVE_CONFIG_H
26 #include "config.h"
27 #endif
28 
29 #ifdef HAVE_STDIO_H
30 #include <stdio.h>
31 #endif
32 #ifdef HAVE_STDLIB_H
33 #include <stdlib.h>
34 #endif
35 #ifdef HAVE_WCHAR_H
36 #include <wchar.h>
37 #endif
38 #ifdef HAVE_STRING_H
39 #include <string.h>
40 #endif
41 #ifdef HAVE_ERRNO_H
42 #include <errno.h>
43 #endif
44 #ifdef HAVE_LOCALE_H
45 #include <locale.h>
46 #endif
47 
48 #if defined(__APPLE__) || defined(__DARWIN__)
49 #ifdef ENABLE_NFCONV
50 #include <CoreFoundation/CoreFoundation.h>
51 #endif /* ENABLE_NFCONV */
52 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
53 
54 #include "compat.h"
55 #include "attrib.h"
56 #include "types.h"
57 #include "unistr.h"
58 #include "debug.h"
59 #include "logging.h"
60 #include "misc.h"
61 
62 #ifndef ALLOW_BROKEN_UNICODE
63 /* Erik allowing broken UTF-16 surrogate pairs and U+FFFE and U+FFFF by default,
64  * open to debate. */
65 #define ALLOW_BROKEN_UNICODE 1
66 #endif /* !defined(ALLOW_BROKEN_UNICODE) */
67 
68 /*
69  * IMPORTANT
70  * =========
71  *
72  * All these routines assume that the Unicode characters are in little endian
73  * encoding inside the strings!!!
74  */
75 
76 static int use_utf8 = 1; /* use UTF-8 encoding for file names */
77 
78 #if defined(__APPLE__) || defined(__DARWIN__)
79 #ifdef ENABLE_NFCONV
80 /**
81  * This variable controls whether or not automatic normalization form conversion
82  * should be performed when translating NTFS unicode file names to UTF-8.
83  * Defaults to on, but can be controlled from the outside using the function
84  *   int ntfs_macosx_normalize_filenames(int normalize);
85  */
86 static int nfconvert_utf8 = 1;
87 #endif /* ENABLE_NFCONV */
88 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
89 
90 /*
91  * This is used by the name collation functions to quickly determine what
92  * characters are (in)valid.
93  */
94 #if 0
95 static const u8 legal_ansi_char_array[0x40] = {
96 	0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
97 	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
98 
99 	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
100 	0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
101 
102 	0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17,
103 	0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00,
104 
105 	0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17,
106 	0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18,
107 };
108 #endif
109 
110 /**
111  * ntfs_names_are_equal - compare two Unicode names for equality
112  * @s1:			name to compare to @s2
113  * @s1_len:		length in Unicode characters of @s1
114  * @s2:			name to compare to @s1
115  * @s2_len:		length in Unicode characters of @s2
116  * @ic:			ignore case bool
117  * @upcase:		upcase table (only if @ic == IGNORE_CASE)
118  * @upcase_size:	length in Unicode characters of @upcase (if present)
119  *
120  * Compare the names @s1 and @s2 and return TRUE (1) if the names are
121  * identical, or FALSE (0) if they are not identical. If @ic is IGNORE_CASE,
122  * the @upcase table is used to perform a case insensitive comparison.
123  */
ntfs_names_are_equal(const ntfschar * s1,size_t s1_len,const ntfschar * s2,size_t s2_len,const IGNORE_CASE_BOOL ic,const ntfschar * upcase,const u32 upcase_size)124 BOOL ntfs_names_are_equal(const ntfschar *s1, size_t s1_len,
125 		const ntfschar *s2, size_t s2_len,
126 		const IGNORE_CASE_BOOL ic,
127 		const ntfschar *upcase, const u32 upcase_size)
128 {
129 	if (s1_len != s2_len)
130 		return FALSE;
131 	if (!s1_len)
132 		return TRUE;
133 	if (ic == CASE_SENSITIVE)
134 		return ntfs_ucsncmp(s1, s2, s1_len) ? FALSE: TRUE;
135 	return ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size) ? FALSE:
136 								       TRUE;
137 }
138 
139 /*
140  * ntfs_names_full_collate() fully collate two Unicode names
141  *
142  * @name1:	first Unicode name to compare
143  * @name1_len:	length of first Unicode name to compare
144  * @name2:	second Unicode name to compare
145  * @name2_len:	length of second Unicode name to compare
146  * @ic:		either CASE_SENSITIVE or IGNORE_CASE (see below)
147  * @upcase:	upcase table
148  * @upcase_len:	upcase table size
149  *
150  * If @ic is CASE_SENSITIVE, then the names are compared primarily ignoring
151  * case, but if the names are equal ignoring case, then they are compared
152  * case-sensitively.  As an example, "abc" would collate before "BCD" (since
153  * "abc" and "BCD" differ ignoring case and 'A' < 'B') but after "ABC" (since
154  * "ABC" and "abc" are equal ignoring case and 'A' < 'a').  This matches the
155  * collation order of filenames as indexed in NTFS directories.
156  *
157  * If @ic is IGNORE_CASE, then the names are only compared case-insensitively
158  * and are considered to match if and only if they are equal ignoring case.
159  *
160  * Returns:
161  *  -1 if the first name collates before the second one,
162  *   0 if the names match, or
163  *   1 if the second name collates before the first one
164  */
ntfs_names_full_collate(const ntfschar * name1,const u32 name1_len,const ntfschar * name2,const u32 name2_len,const IGNORE_CASE_BOOL ic,const ntfschar * upcase,const u32 upcase_len)165 int ntfs_names_full_collate(const ntfschar *name1, const u32 name1_len,
166 		const ntfschar *name2, const u32 name2_len,
167 		const IGNORE_CASE_BOOL ic, const ntfschar *upcase,
168 		const u32 upcase_len)
169 {
170 	u32 cnt;
171 	u16 c1, c2;
172 	u16 u1, u2;
173 
174 #ifdef DEBUG
175 	if (!name1 || !name2 || !upcase || !upcase_len) {
176 		ntfs_log_debug("ntfs_names_collate received NULL pointer!\n");
177 		exit(1);
178 	}
179 #endif
180 	cnt = min(name1_len, name2_len);
181 	if (cnt > 0) {
182 		if (ic == CASE_SENSITIVE) {
183 			while (--cnt && (*name1 == *name2)) {
184 				name1++;
185 				name2++;
186 			}
187 			u1 = c1 = le16_to_cpu(*name1);
188 			u2 = c2 = le16_to_cpu(*name2);
189 			if (u1 < upcase_len)
190 				u1 = le16_to_cpu(upcase[u1]);
191 			if (u2 < upcase_len)
192 				u2 = le16_to_cpu(upcase[u2]);
193 			if ((u1 == u2) && cnt)
194 				do {
195 					name1++;
196 					u1 = le16_to_cpu(*name1);
197 					name2++;
198 					u2 = le16_to_cpu(*name2);
199 					if (u1 < upcase_len)
200 						u1 = le16_to_cpu(upcase[u1]);
201 					if (u2 < upcase_len)
202 						u2 = le16_to_cpu(upcase[u2]);
203 				} while ((u1 == u2) && --cnt);
204 			if (u1 < u2)
205 				return -1;
206 			if (u1 > u2)
207 				return 1;
208 			if (name1_len < name2_len)
209 				return -1;
210 			if (name1_len > name2_len)
211 				return 1;
212 			if (c1 < c2)
213 				return -1;
214 			if (c1 > c2)
215 				return 1;
216 		} else {
217 			do {
218 				u1 = le16_to_cpu(*name1);
219 				name1++;
220 				u2 = le16_to_cpu(*name2);
221 				name2++;
222 				if (u1 < upcase_len)
223 					u1 = le16_to_cpu(upcase[u1]);
224 				if (u2 < upcase_len)
225 					u2 = le16_to_cpu(upcase[u2]);
226 			} while ((u1 == u2) && --cnt);
227 			if (u1 < u2)
228 				return -1;
229 			if (u1 > u2)
230 				return 1;
231 			if (name1_len < name2_len)
232 				return -1;
233 			if (name1_len > name2_len)
234 				return 1;
235 		}
236 	} else {
237 		if (name1_len < name2_len)
238 			return -1;
239 		if (name1_len > name2_len)
240 			return 1;
241 	}
242 	return 0;
243 }
244 
245 /**
246  * ntfs_ucsncmp - compare two little endian Unicode strings
247  * @s1:		first string
248  * @s2:		second string
249  * @n:		maximum unicode characters to compare
250  *
251  * Compare the first @n characters of the Unicode strings @s1 and @s2,
252  * The strings in little endian format and appropriate le16_to_cpu()
253  * conversion is performed on non-little endian machines.
254  *
255  * The function returns an integer less than, equal to, or greater than zero
256  * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
257  * to be less than, to match, or be greater than @s2.
258  */
ntfs_ucsncmp(const ntfschar * s1,const ntfschar * s2,size_t n)259 int ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n)
260 {
261 	u16 c1, c2;
262 	size_t i;
263 
264 #ifdef DEBUG
265 	if (!s1 || !s2) {
266 		ntfs_log_debug("ntfs_wcsncmp() received NULL pointer!\n");
267 		exit(1);
268 	}
269 #endif
270 	for (i = 0; i < n; ++i) {
271 		c1 = le16_to_cpu(s1[i]);
272 		c2 = le16_to_cpu(s2[i]);
273 		if (c1 < c2)
274 			return -1;
275 		if (c1 > c2)
276 			return 1;
277 		if (!c1)
278 			break;
279 	}
280 	return 0;
281 }
282 
283 /**
284  * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case
285  * @s1:			first string
286  * @s2:			second string
287  * @n:			maximum unicode characters to compare
288  * @upcase:		upcase table
289  * @upcase_size:	upcase table size in Unicode characters
290  *
291  * Compare the first @n characters of the Unicode strings @s1 and @s2,
292  * ignoring case. The strings in little endian format and appropriate
293  * le16_to_cpu() conversion is performed on non-little endian machines.
294  *
295  * Each character is uppercased using the @upcase table before the comparison.
296  *
297  * The function returns an integer less than, equal to, or greater than zero
298  * if @s1 (or the first @n Unicode characters thereof) is found, respectively,
299  * to be less than, to match, or be greater than @s2.
300  */
ntfs_ucsncasecmp(const ntfschar * s1,const ntfschar * s2,size_t n,const ntfschar * upcase,const u32 upcase_size)301 int ntfs_ucsncasecmp(const ntfschar *s1, const ntfschar *s2, size_t n,
302 		const ntfschar *upcase, const u32 upcase_size)
303 {
304 	u16 c1, c2;
305 	size_t i;
306 
307 #ifdef DEBUG
308 	if (!s1 || !s2 || !upcase) {
309 		ntfs_log_debug("ntfs_wcsncasecmp() received NULL pointer!\n");
310 		exit(1);
311 	}
312 #endif
313 	for (i = 0; i < n; ++i) {
314 		if ((c1 = le16_to_cpu(s1[i])) < upcase_size)
315 			c1 = le16_to_cpu(upcase[c1]);
316 		if ((c2 = le16_to_cpu(s2[i])) < upcase_size)
317 			c2 = le16_to_cpu(upcase[c2]);
318 		if (c1 < c2)
319 			return -1;
320 		if (c1 > c2)
321 			return 1;
322 		if (!c1)
323 			break;
324 	}
325 	return 0;
326 }
327 
328 /**
329  * ntfs_ucsnlen - determine the length of a little endian Unicode string
330  * @s:		pointer to Unicode string
331  * @maxlen:	maximum length of string @s
332  *
333  * Return the number of Unicode characters in the little endian Unicode
334  * string @s up to a maximum of maxlen Unicode characters, not including
335  * the terminating (ntfschar)'\0'. If there is no (ntfschar)'\0' between @s
336  * and @s + @maxlen, @maxlen is returned.
337  *
338  * This function never looks beyond @s + @maxlen.
339  */
ntfs_ucsnlen(const ntfschar * s,u32 maxlen)340 u32 ntfs_ucsnlen(const ntfschar *s, u32 maxlen)
341 {
342 	u32 i;
343 
344 	for (i = 0; i < maxlen; i++) {
345 		if (!le16_to_cpu(s[i]))
346 			break;
347 	}
348 	return i;
349 }
350 
351 /**
352  * ntfs_ucsndup - duplicate little endian Unicode string
353  * @s:		pointer to Unicode string
354  * @maxlen:	maximum length of string @s
355  *
356  * Return a pointer to a new little endian Unicode string which is a duplicate
357  * of the string s.  Memory for the new string is obtained with ntfs_malloc(3),
358  * and can be freed with free(3).
359  *
360  * A maximum of @maxlen Unicode characters are copied and a terminating
361  * (ntfschar)'\0' little endian Unicode character is added.
362  *
363  * This function never looks beyond @s + @maxlen.
364  *
365  * Return a pointer to the new little endian Unicode string on success and NULL
366  * on failure with errno set to the error code.
367  */
ntfs_ucsndup(const ntfschar * s,u32 maxlen)368 ntfschar *ntfs_ucsndup(const ntfschar *s, u32 maxlen)
369 {
370 	ntfschar *dst;
371 	u32 len;
372 
373 	len = ntfs_ucsnlen(s, maxlen);
374 	dst = ntfs_malloc((len + 1) * sizeof(ntfschar));
375 	if (dst) {
376 		memcpy(dst, s, len * sizeof(ntfschar));
377 		dst[len] = const_cpu_to_le16(L'\0');
378 	}
379 	return dst;
380 }
381 
382 /**
383  * ntfs_name_upcase - Map an Unicode name to its uppercase equivalent
384  * @name:
385  * @name_len:
386  * @upcase:
387  * @upcase_len:
388  *
389  * Description...
390  *
391  * Returns:
392  */
ntfs_name_upcase(ntfschar * name,u32 name_len,const ntfschar * upcase,const u32 upcase_len)393 void ntfs_name_upcase(ntfschar *name, u32 name_len, const ntfschar *upcase,
394 		const u32 upcase_len)
395 {
396 	u32 i;
397 	u16 u;
398 
399 	for (i = 0; i < name_len; i++)
400 		if ((u = le16_to_cpu(name[i])) < upcase_len)
401 			name[i] = upcase[u];
402 }
403 
404 /**
405  * ntfs_name_locase - Map a Unicode name to its lowercase equivalent
406  */
ntfs_name_locase(ntfschar * name,u32 name_len,const ntfschar * locase,const u32 locase_len)407 void ntfs_name_locase(ntfschar *name, u32 name_len, const ntfschar *locase,
408 		const u32 locase_len)
409 {
410 	u32 i;
411 	u16 u;
412 
413 	if (locase)
414 		for (i = 0; i < name_len; i++)
415 			if ((u = le16_to_cpu(name[i])) < locase_len)
416 				name[i] = locase[u];
417 }
418 
419 /**
420  * ntfs_file_value_upcase - Convert a filename to upper case
421  * @file_name_attr:
422  * @upcase:
423  * @upcase_len:
424  *
425  * Description...
426  *
427  * Returns:
428  */
ntfs_file_value_upcase(FILE_NAME_ATTR * file_name_attr,const ntfschar * upcase,const u32 upcase_len)429 void ntfs_file_value_upcase(FILE_NAME_ATTR *file_name_attr,
430 		const ntfschar *upcase, const u32 upcase_len)
431 {
432 	ntfs_name_upcase((ntfschar*)&file_name_attr->file_name,
433 			file_name_attr->file_name_length, upcase, upcase_len);
434 }
435 
436 /*
437    NTFS uses Unicode (UTF-16LE [NTFS-3G uses UCS-2LE, which is enough
438    for now]) for path names, but the Unicode code points need to be
439    converted before a path can be accessed under NTFS. For 7 bit ASCII/ANSI,
440    glibc does this even without a locale in a hard-coded fashion as that
441    appears to be is easy because the low 7-bit ASCII range appears to be
442    available in all charsets but it does not convert anything if
443    there was some error with the locale setup or none set up like
444    when mount is called during early boot where he (by policy) do
445    not use locales (and may be not available if /usr is not yet mounted),
446    so this patch fixes the resulting issues for systems which use
447    UTF-8 and for others, specifying the locale in fstab brings them
448    the encoding which they want.
449 
450    If no locale is defined or there was a problem with setting one
451    up and whenever nl_langinfo(CODESET) returns a sting starting with
452    "ANSI", use an internal UCS-2LE <-> UTF-8 codeset converter to fix
453    the bug where NTFS-3G does not show any path names which include
454    international characters!!! (and also fails on creating them) as result.
455 
456    Author: Bernhard Kaindl <bk@suse.de>
457    Jean-Pierre Andre made it compliant with RFC3629/RFC2781.
458 */
459 
460 /*
461  * Return the number of bytes in UTF-8 needed (without the terminating null) to
462  * store the given UTF-16LE string.
463  *
464  * On error, -1 is returned, and errno is set to the error code. The following
465  * error codes can be expected:
466  *	EILSEQ		The input string is not valid UTF-16LE (only possible
467  *			if compiled without ALLOW_BROKEN_UNICODE).
468  *	ENAMETOOLONG	The length of the UTF-8 string in bytes (without the
469  *			terminating null) would exceed @outs_len.
470  */
utf16_to_utf8_size(const ntfschar * ins,const int ins_len,int outs_len)471 static int utf16_to_utf8_size(const ntfschar *ins, const int ins_len, int outs_len)
472 {
473 	int i, ret = -1;
474 	int count = 0;
475 	BOOL surrog;
476 
477 	surrog = FALSE;
478 	for (i = 0; i < ins_len && ins[i] && count <= outs_len; i++) {
479 		unsigned short c = le16_to_cpu(ins[i]);
480 		if (surrog) {
481 			if ((c >= 0xdc00) && (c < 0xe000)) {
482 				surrog = FALSE;
483 				count += 4;
484 			} else {
485 #if ALLOW_BROKEN_UNICODE
486 				/* The first UTF-16 unit of a surrogate pair has
487 				 * a value between 0xd800 and 0xdc00. It can be
488 				 * encoded as an individual UTF-8 sequence if we
489 				 * cannot combine it with the next UTF-16 unit
490 				 * unit as a surrogate pair. */
491 				surrog = FALSE;
492 				count += 3;
493 
494 				--i;
495 				continue;
496 #else
497 				goto fail;
498 #endif /* ALLOW_BROKEN_UNICODE */
499 			}
500 		} else
501 			if (c < 0x80)
502 				count++;
503 			else if (c < 0x800)
504 				count += 2;
505 			else if (c < 0xd800)
506 				count += 3;
507 			else if (c < 0xdc00)
508 				surrog = TRUE;
509 #if ALLOW_BROKEN_UNICODE
510 			else if (c < 0xe000)
511 				count += 3;
512 			else if (c >= 0xe000)
513 #else
514 			else if ((c >= 0xe000) && (c < 0xfffe))
515 #endif /* ALLOW_BROKEN_UNICODE */
516 				count += 3;
517 			else
518 				goto fail;
519 	}
520 
521 	if (surrog && count <= outs_len) {
522 #if ALLOW_BROKEN_UNICODE
523 		count += 3; /* ending with a single surrogate */
524 #else
525 		goto fail;
526 #endif /* ALLOW_BROKEN_UNICODE */
527 	}
528 
529 	if (count > outs_len) {
530 		errno = ENAMETOOLONG;
531 		goto out;
532 	}
533 
534 	ret = count;
535 out:
536 	return ret;
537 fail:
538 	errno = EILSEQ;
539 	goto out;
540 }
541 
542 /*
543  * ntfs_utf16_to_utf8 - convert a little endian UTF16LE string to an UTF-8 string
544  * @ins:	input utf16 string buffer
545  * @ins_len:	length of input string in utf16 characters
546  * @outs:	on return contains the (allocated) output multibyte string
547  * @outs_len:	length of output buffer in bytes (ignored if *@outs is NULL)
548  *
549  * Return -1 with errno set if string has invalid byte sequence or too long.
550  */
ntfs_utf16_to_utf8(const ntfschar * ins,const int ins_len,char ** outs,int outs_len)551 static int ntfs_utf16_to_utf8(const ntfschar *ins, const int ins_len,
552 			      char **outs, int outs_len)
553 {
554 #if defined(__APPLE__) || defined(__DARWIN__)
555 #ifdef ENABLE_NFCONV
556 	char *original_outs_value = *outs;
557 	int original_outs_len = outs_len;
558 #endif /* ENABLE_NFCONV */
559 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
560 
561 	char *t;
562 	int i, size, ret = -1;
563 	int halfpair;
564 
565 	halfpair = 0;
566 	if (!*outs) {
567 		/* If no output buffer was provided, we will allocate one and
568 		 * limit its length to PATH_MAX.  Note: we follow the standard
569 		 * convention of PATH_MAX including the terminating null. */
570 		outs_len = PATH_MAX;
571 	}
572 
573 	/* The size *with* the terminating null is limited to @outs_len,
574 	 * so the size *without* the terminating null is limited to one less. */
575 	size = utf16_to_utf8_size(ins, ins_len, outs_len - 1);
576 
577 	if (size < 0)
578 		goto out;
579 
580 	if (!*outs) {
581 		outs_len = size + 1;
582 		*outs = ntfs_malloc(outs_len);
583 		if (!*outs)
584 			goto out;
585 	}
586 
587 	t = *outs;
588 
589 	for (i = 0; i < ins_len && ins[i]; i++) {
590 	    unsigned short c = le16_to_cpu(ins[i]);
591 			/* size not double-checked */
592 		if (halfpair) {
593 			if ((c >= 0xdc00) && (c < 0xe000)) {
594 				*t++ = 0xf0 + (((halfpair + 64) >> 8) & 7);
595 				*t++ = 0x80 + (((halfpair + 64) >> 2) & 63);
596 				*t++ = 0x80 + ((c >> 6) & 15) + ((halfpair & 3) << 4);
597 				*t++ = 0x80 + (c & 63);
598 				halfpair = 0;
599 			} else {
600 #if ALLOW_BROKEN_UNICODE
601 				/* The first UTF-16 unit of a surrogate pair has
602 				 * a value between 0xd800 and 0xdc00. It can be
603 				 * encoded as an individual UTF-8 sequence if we
604 				 * cannot combine it with the next UTF-16 unit
605 				 * unit as a surrogate pair. */
606 				*t++ = 0xe0 | (halfpair >> 12);
607 				*t++ = 0x80 | ((halfpair >> 6) & 0x3f);
608 				*t++ = 0x80 | (halfpair & 0x3f);
609 				halfpair = 0;
610 
611 				--i;
612 				continue;
613 #else
614 				goto fail;
615 #endif /* ALLOW_BROKEN_UNICODE */
616 			}
617 		} else if (c < 0x80) {
618 			*t++ = c;
619 	    	} else {
620 			if (c < 0x800) {
621 			   	*t++ = (0xc0 | ((c >> 6) & 0x3f));
622 			        *t++ = 0x80 | (c & 0x3f);
623 			} else if (c < 0xd800) {
624 			   	*t++ = 0xe0 | (c >> 12);
625 			   	*t++ = 0x80 | ((c >> 6) & 0x3f);
626 		        	*t++ = 0x80 | (c & 0x3f);
627 			} else if (c < 0xdc00)
628 				halfpair = c;
629 #if ALLOW_BROKEN_UNICODE
630 			else if (c < 0xe000) {
631 				*t++ = 0xe0 | (c >> 12);
632 				*t++ = 0x80 | ((c >> 6) & 0x3f);
633 				*t++ = 0x80 | (c & 0x3f);
634 			}
635 #endif /* ALLOW_BROKEN_UNICODE */
636 			else if (c >= 0xe000) {
637 				*t++ = 0xe0 | (c >> 12);
638 				*t++ = 0x80 | ((c >> 6) & 0x3f);
639 			        *t++ = 0x80 | (c & 0x3f);
640 			} else
641 				goto fail;
642 	        }
643 	}
644 #if ALLOW_BROKEN_UNICODE
645 	if (halfpair) { /* ending with a single surrogate */
646 		*t++ = 0xe0 | (halfpair >> 12);
647 		*t++ = 0x80 | ((halfpair >> 6) & 0x3f);
648 		*t++ = 0x80 | (halfpair & 0x3f);
649 	}
650 #endif /* ALLOW_BROKEN_UNICODE */
651 	*t = '\0';
652 
653 #if defined(__APPLE__) || defined(__DARWIN__)
654 #ifdef ENABLE_NFCONV
655 	if(nfconvert_utf8 && (t - *outs) > 0) {
656 		char *new_outs = NULL;
657 		int new_outs_len = ntfs_macosx_normalize_utf8(*outs, &new_outs, 0); // Normalize to decomposed form
658 		if(new_outs_len >= 0 && new_outs != NULL) {
659 			if(original_outs_value != *outs) {
660 				// We have allocated outs ourselves.
661 				free(*outs);
662 				*outs = new_outs;
663 				t = *outs + new_outs_len;
664 			}
665 			else {
666 				// We need to copy new_outs into the fixed outs buffer.
667 				memset(*outs, 0, original_outs_len);
668 				strncpy(*outs, new_outs, original_outs_len-1);
669 				t = *outs + original_outs_len;
670 				free(new_outs);
671 			}
672 		}
673 		else {
674 			ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFD: %s\n", *outs);
675 			ntfs_log_error("  new_outs=0x%p\n", new_outs);
676 			ntfs_log_error("  new_outs_len=%d\n", new_outs_len);
677 		}
678 	}
679 #endif /* ENABLE_NFCONV */
680 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
681 
682 	ret = t - *outs;
683 out:
684 	return ret;
685 fail:
686 	errno = EILSEQ;
687 	goto out;
688 }
689 
690 /*
691  * Return the amount of 16-bit elements in UTF-16LE needed
692  * (without the terminating null) to store given UTF-8 string.
693  *
694  * Return -1 with errno set if it's longer than PATH_MAX or string is invalid.
695  *
696  * Note: This does not check whether the input sequence is a valid utf8 string,
697  *	 and should be used only in context where such check is made!
698  */
utf8_to_utf16_size(const char * s)699 static int utf8_to_utf16_size(const char *s)
700 {
701 	int ret = -1;
702 	unsigned int byte;
703 	size_t count = 0;
704 
705 	while ((byte = *((const unsigned char *)s++))) {
706 		if (++count >= PATH_MAX)
707 			goto fail;
708 		if (byte >= 0xc0) {
709 			if (byte >= 0xF5) {
710 				errno = EILSEQ;
711 				goto out;
712 			}
713 			if (!*s)
714 				break;
715 			if (byte >= 0xC0)
716 				s++;
717 			if (!*s)
718 				break;
719 			if (byte >= 0xE0)
720 				s++;
721 			if (!*s)
722 				break;
723 			if (byte >= 0xF0) {
724 				s++;
725 				if (++count >= PATH_MAX)
726 					goto fail;
727 			}
728 		}
729 	}
730 	ret = count;
731 out:
732 	return ret;
733 fail:
734 	errno = ENAMETOOLONG;
735 	goto out;
736 }
737 /*
738  * This converts one UTF-8 sequence to cpu-endian Unicode value
739  * within range U+0 .. U+10ffff and excluding U+D800 .. U+DFFF
740  *
741  * Return the number of used utf8 bytes or -1 with errno set
742  * if sequence is invalid.
743  */
utf8_to_unicode(u32 * wc,const char * s)744 static int utf8_to_unicode(u32 *wc, const char *s)
745 {
746     	unsigned int byte = *((const unsigned char *)s);
747 
748 					/* single byte */
749 	if (byte == 0) {
750 		*wc = (u32) 0;
751 		return 0;
752 	} else if (byte < 0x80) {
753 		*wc = (u32) byte;
754 		return 1;
755 					/* double byte */
756 	} else if (byte < 0xc2) {
757 		goto fail;
758 	} else if (byte < 0xE0) {
759 		if ((s[1] & 0xC0) == 0x80) {
760 			*wc = ((u32)(byte & 0x1F) << 6)
761 			    | ((u32)(s[1] & 0x3F));
762 			return 2;
763 		} else
764 			goto fail;
765 					/* three-byte */
766 	} else if (byte < 0xF0) {
767 		if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)) {
768 			*wc = ((u32)(byte & 0x0F) << 12)
769 			    | ((u32)(s[1] & 0x3F) << 6)
770 			    | ((u32)(s[2] & 0x3F));
771 			/* Check valid ranges */
772 #if ALLOW_BROKEN_UNICODE
773 			if (((*wc >= 0x800) && (*wc <= 0xD7FF))
774 			  || ((*wc >= 0xD800) && (*wc <= 0xDFFF))
775 			  || ((*wc >= 0xe000) && (*wc <= 0xFFFF)))
776 				return 3;
777 #else
778 			if (((*wc >= 0x800) && (*wc <= 0xD7FF))
779 			  || ((*wc >= 0xe000) && (*wc <= 0xFFFD)))
780 				return 3;
781 #endif /* ALLOW_BROKEN_UNICODE */
782 		}
783 		goto fail;
784 					/* four-byte */
785 	} else if (byte < 0xF5) {
786 		if (((s[1] & 0xC0) == 0x80) && ((s[2] & 0xC0) == 0x80)
787 		  && ((s[3] & 0xC0) == 0x80)) {
788 			*wc = ((u32)(byte & 0x07) << 18)
789 			    | ((u32)(s[1] & 0x3F) << 12)
790 			    | ((u32)(s[2] & 0x3F) << 6)
791 			    | ((u32)(s[3] & 0x3F));
792 			/* Check valid ranges */
793 			if ((*wc <= 0x10ffff) && (*wc >= 0x10000))
794 				return 4;
795 		}
796 		goto fail;
797 	}
798 fail:
799 	errno = EILSEQ;
800 	return -1;
801 }
802 
803 /**
804  * ntfs_utf8_to_utf16 - convert a UTF-8 string to a UTF-16LE string
805  * @ins:	input multibyte string buffer
806  * @outs:	on return contains the (allocated) output utf16 string
807  * @outs_len:	length of output buffer in utf16 characters
808  *
809  * Return -1 with errno set.
810  */
ntfs_utf8_to_utf16(const char * ins,ntfschar ** outs)811 static int ntfs_utf8_to_utf16(const char *ins, ntfschar **outs)
812 {
813 #if defined(__APPLE__) || defined(__DARWIN__)
814 #ifdef ENABLE_NFCONV
815 	char *new_ins = NULL;
816 	if(nfconvert_utf8) {
817 		int new_ins_len;
818 		new_ins_len = ntfs_macosx_normalize_utf8(ins, &new_ins, 1); // Normalize to composed form
819 		if(new_ins_len >= 0)
820 			ins = new_ins;
821 		else
822 			ntfs_log_error("Failed to normalize NTFS string to UTF-8 NFC: %s\n", ins);
823 	}
824 #endif /* ENABLE_NFCONV */
825 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
826 	const char *t = ins;
827 	u32 wc;
828 	BOOL allocated;
829 	ntfschar *outpos;
830 	int shorts, ret = -1;
831 
832 	shorts = utf8_to_utf16_size(ins);
833 	if (shorts < 0)
834 		goto fail;
835 
836 	allocated = FALSE;
837 	if (!*outs) {
838 		*outs = ntfs_malloc((shorts + 1) * sizeof(ntfschar));
839 		if (!*outs)
840 			goto fail;
841 		allocated = TRUE;
842 	}
843 
844 	outpos = *outs;
845 
846 	while(1) {
847 		int m  = utf8_to_unicode(&wc, t);
848 		if (m <= 0) {
849 			if (m < 0) {
850 				/* do not leave space allocated if failed */
851 				if (allocated) {
852 					free(*outs);
853 					*outs = (ntfschar*)NULL;
854 				}
855 				goto fail;
856 			}
857 			*outpos++ = const_cpu_to_le16(0);
858 			break;
859 		}
860 		if (wc < 0x10000)
861 			*outpos++ = cpu_to_le16(wc);
862 		else {
863 			wc -= 0x10000;
864 			*outpos++ = cpu_to_le16((wc >> 10) + 0xd800);
865 			*outpos++ = cpu_to_le16((wc & 0x3ff) + 0xdc00);
866 		}
867 		t += m;
868 	}
869 
870 	ret = --outpos - *outs;
871 fail:
872 #if defined(__APPLE__) || defined(__DARWIN__)
873 #ifdef ENABLE_NFCONV
874 	if(new_ins != NULL)
875 		free(new_ins);
876 #endif /* ENABLE_NFCONV */
877 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
878 	return ret;
879 }
880 
881 /**
882  * ntfs_ucstombs - convert a little endian Unicode string to a multibyte string
883  * @ins:	input Unicode string buffer
884  * @ins_len:	length of input string in Unicode characters
885  * @outs:	on return contains the (allocated) output multibyte string
886  * @outs_len:	length of output buffer in bytes (ignored if *@outs is NULL)
887  *
888  * Convert the input little endian, 2-byte Unicode string @ins, of length
889  * @ins_len into the multibyte string format dictated by the current locale.
890  *
891  * If *@outs is NULL, the function allocates the string and the caller is
892  * responsible for calling free(*@outs); when finished with it.
893  *
894  * On success the function returns the number of bytes written to the output
895  * string *@outs (>= 0), not counting the terminating NULL byte. If the output
896  * string buffer was allocated, *@outs is set to it.
897  *
898  * On error, -1 is returned, and errno is set to the error code. The following
899  * error codes can be expected:
900  *	EINVAL		Invalid arguments (e.g. @ins or @outs is NULL).
901  *	EILSEQ		The input string cannot be represented as a multibyte
902  *			sequence according to the current locale.
903  *	ENAMETOOLONG	Destination buffer is too small for input string.
904  *	ENOMEM		Not enough memory to allocate destination buffer.
905  */
ntfs_ucstombs(const ntfschar * ins,const int ins_len,char ** outs,int outs_len)906 int ntfs_ucstombs(const ntfschar *ins, const int ins_len, char **outs,
907 		int outs_len)
908 {
909 	char *mbs;
910 	int mbs_len;
911 #ifdef MB_CUR_MAX
912 	wchar_t wc;
913 	int i, o;
914 	int cnt = 0;
915 #ifdef HAVE_MBSINIT
916 	mbstate_t mbstate;
917 #endif
918 #endif /* MB_CUR_MAX */
919 
920 	if (!ins || !outs) {
921 		errno = EINVAL;
922 		return -1;
923 	}
924 	mbs = *outs;
925 	mbs_len = outs_len;
926 	if (mbs && !mbs_len) {
927 		errno = ENAMETOOLONG;
928 		return -1;
929 	}
930 	if (use_utf8)
931 		return ntfs_utf16_to_utf8(ins, ins_len, outs, outs_len);
932 #ifdef MB_CUR_MAX
933 	if (!mbs) {
934 		mbs_len = (ins_len + 1) * MB_CUR_MAX;
935 		mbs = ntfs_malloc(mbs_len);
936 		if (!mbs)
937 			return -1;
938 	}
939 #ifdef HAVE_MBSINIT
940 	memset(&mbstate, 0, sizeof(mbstate));
941 #else
942 #ifndef __HAIKU__
943 	wctomb(NULL, 0);
944 #endif
945 #endif
946 	for (i = o = 0; i < ins_len; i++) {
947 		/* Reallocate memory if necessary or abort. */
948 		if ((int)(o + MB_CUR_MAX) > mbs_len) {
949 			char *tc;
950 			if (mbs == *outs) {
951 				errno = ENAMETOOLONG;
952 				return -1;
953 			}
954 			tc = ntfs_malloc((mbs_len + 64) & ~63);
955 			if (!tc)
956 				goto err_out;
957 			memcpy(tc, mbs, mbs_len);
958 			mbs_len = (mbs_len + 64) & ~63;
959 			free(mbs);
960 			mbs = tc;
961 		}
962 		/* Convert the LE Unicode character to a CPU wide character. */
963 		wc = (wchar_t)le16_to_cpu(ins[i]);
964 		if (!wc)
965 			break;
966 		/* Convert the CPU endian wide character to multibyte. */
967 #ifdef HAVE_MBSINIT
968 		cnt = wcrtomb(mbs + o, wc, &mbstate);
969 #elif defined(__HAIKU__)
970 		cnt = -1;
971 #else
972 		cnt = wctomb(mbs + o, wc);
973 #endif
974 		if (cnt == -1)
975 			goto err_out;
976 		if (cnt <= 0) {
977 			ntfs_log_debug("Eeek. cnt <= 0, cnt = %i\n", cnt);
978 			errno = EINVAL;
979 			goto err_out;
980 		}
981 		o += cnt;
982 	}
983 #ifdef HAVE_MBSINIT
984 	/* Make sure we are back in the initial state. */
985 	if (!mbsinit(&mbstate)) {
986 		ntfs_log_debug("Eeek. mbstate not in initial state!\n");
987 		errno = EILSEQ;
988 		goto err_out;
989 	}
990 #endif
991 	/* Now write the NULL character. */
992 	mbs[o] = '\0';
993 	if (*outs != mbs)
994 		*outs = mbs;
995 	return o;
996 err_out:
997 	if (mbs != *outs) {
998 		int eo = errno;
999 		free(mbs);
1000 		errno = eo;
1001 	}
1002 #else /* MB_CUR_MAX */
1003 	errno = EILSEQ;
1004 #endif /* MB_CUR_MAX */
1005 	return -1;
1006 }
1007 
1008 /**
1009  * ntfs_mbstoucs - convert a multibyte string to a little endian Unicode string
1010  * @ins:	input multibyte string buffer
1011  * @outs:	on return contains the (allocated) output Unicode string
1012  *
1013  * Convert the input multibyte string @ins, from the current locale into the
1014  * corresponding little endian, 2-byte Unicode string.
1015  *
1016  * The function allocates the string and the caller is responsible for calling
1017  * free(*@outs); when finished with it.
1018  *
1019  * On success the function returns the number of Unicode characters written to
1020  * the output string *@outs (>= 0), not counting the terminating Unicode NULL
1021  * character.
1022  *
1023  * On error, -1 is returned, and errno is set to the error code. The following
1024  * error codes can be expected:
1025  *	EINVAL		Invalid arguments (e.g. @ins or @outs is NULL).
1026  *	EILSEQ		The input string cannot be represented as a Unicode
1027  *			string according to the current locale.
1028  *	ENAMETOOLONG	Destination buffer is too small for input string.
1029  *	ENOMEM		Not enough memory to allocate destination buffer.
1030  */
ntfs_mbstoucs(const char * ins,ntfschar ** outs)1031 int ntfs_mbstoucs(const char *ins, ntfschar **outs)
1032 {
1033 #ifdef MB_CUR_MAX
1034 	ntfschar *ucs;
1035 	const char *s;
1036 	wchar_t wc;
1037 	int i, o, cnt, ins_len, ucs_len, ins_size;
1038 #ifdef HAVE_MBSINIT
1039 	mbstate_t mbstate;
1040 #endif
1041 #endif /* MB_CUR_MAX */
1042 
1043 	if (!ins || !outs) {
1044 		errno = EINVAL;
1045 		return -1;
1046 	}
1047 
1048 	if (use_utf8)
1049 		return ntfs_utf8_to_utf16(ins, outs);
1050 
1051 #ifdef MB_CUR_MAX
1052 	/* Determine the size of the multi-byte string in bytes. */
1053 	ins_size = strlen(ins);
1054 	/* Determine the length of the multi-byte string. */
1055 	s = ins;
1056 #if defined(HAVE_MBSINIT)
1057 	memset(&mbstate, 0, sizeof(mbstate));
1058 	ins_len = mbsrtowcs(NULL, (const char **)&s, 0, &mbstate);
1059 #ifdef __CYGWIN32__
1060 	if (!ins_len && *ins) {
1061 		/* Older Cygwin had broken mbsrtowcs() implementation. */
1062 		ins_len = strlen(ins);
1063 	}
1064 #endif
1065 #elif !defined(DJGPP) && !defined(__HAIKU__)
1066 	ins_len = mbstowcs(NULL, s, 0);
1067 #else
1068 	/* Eeek!!! DJGPP has broken mbstowcs() implementation!!! */
1069 	ins_len = strlen(ins);
1070 #endif
1071 	if (ins_len == -1)
1072 		return ins_len;
1073 #ifdef HAVE_MBSINIT
1074 	if ((s != ins) || !mbsinit(&mbstate)) {
1075 #else
1076 	if (s != ins) {
1077 #endif
1078 		errno = EILSEQ;
1079 		return -1;
1080 	}
1081 	/* Add the NULL terminator. */
1082 	ins_len++;
1083 	ucs_len = ins_len;
1084 	ucs = ntfs_malloc(ucs_len * sizeof(ntfschar));
1085 	if (!ucs)
1086 		return -1;
1087 #ifdef HAVE_MBSINIT
1088 	memset(&mbstate, 0, sizeof(mbstate));
1089 #else
1090 #ifndef __HAIKU__
1091 	mbtowc(NULL, NULL, 0);
1092 #endif
1093 #endif
1094 	for (i = o = cnt = 0; i < ins_size; i += cnt, o++) {
1095 		/* Reallocate memory if necessary. */
1096 		if (o >= ucs_len) {
1097 			ntfschar *tc;
1098 			ucs_len = (ucs_len * sizeof(ntfschar) + 64) & ~63;
1099 			tc = realloc(ucs, ucs_len);
1100 			if (!tc)
1101 				goto err_out;
1102 			ucs = tc;
1103 			ucs_len /= sizeof(ntfschar);
1104 		}
1105 		/* Convert the multibyte character to a wide character. */
1106 #ifdef HAVE_MBSINIT
1107 		cnt = mbrtowc(&wc, ins + i, ins_size - i, &mbstate);
1108 #elif defined(__HAIKU__)
1109 		cnt = -1;
1110 #else
1111 		cnt = mbtowc(&wc, ins + i, ins_size - i);
1112 #endif
1113 		if (!cnt)
1114 			break;
1115 		if (cnt == -1)
1116 			goto err_out;
1117 		if (cnt < -1) {
1118 			ntfs_log_trace("Eeek. cnt = %i\n", cnt);
1119 			errno = EINVAL;
1120 			goto err_out;
1121 		}
1122 		/* Make sure we are not overflowing the NTFS Unicode set. */
1123 		if ((unsigned long)wc >= (unsigned long)(1 <<
1124 				(8 * sizeof(ntfschar)))) {
1125 			errno = EILSEQ;
1126 			goto err_out;
1127 		}
1128 		/* Convert the CPU wide character to a LE Unicode character. */
1129 		ucs[o] = cpu_to_le16(wc);
1130 	}
1131 #ifdef HAVE_MBSINIT
1132 	/* Make sure we are back in the initial state. */
1133 	if (!mbsinit(&mbstate)) {
1134 		ntfs_log_trace("Eeek. mbstate not in initial state!\n");
1135 		errno = EILSEQ;
1136 		goto err_out;
1137 	}
1138 #endif
1139 	/* Now write the NULL character. */
1140 	ucs[o] = const_cpu_to_le16(L'\0');
1141 	*outs = ucs;
1142 	return o;
1143 err_out:
1144 	free(ucs);
1145 #else /* MB_CUR_MAX */
1146 	errno = EILSEQ;
1147 #endif /* MB_CUR_MAX */
1148 	return -1;
1149 }
1150 
1151 /*
1152  *		Turn a UTF8 name uppercase
1153  *
1154  *	Returns an allocated uppercase name which has to be freed by caller
1155  *	or NULL if there is an error (described by errno)
1156  */
1157 
1158 char *ntfs_uppercase_mbs(const char *low,
1159 			const ntfschar *upcase, u32 upcase_size)
1160 {
1161 	int size;
1162 	char *upp;
1163 	u32 wc;
1164 	int n;
1165 	const char *s;
1166 	char *t;
1167 
1168 	size = strlen(low);
1169 	upp = (char*)ntfs_malloc(3*size + 1);
1170 	if (upp) {
1171 		s = low;
1172 		t = upp;
1173 		do {
1174 			n = utf8_to_unicode(&wc, s);
1175 			if (n > 0) {
1176 				if (wc < upcase_size)
1177 					wc = le16_to_cpu(upcase[wc]);
1178 				if (wc < 0x80)
1179 					*t++ = wc;
1180 				else if (wc < 0x800) {
1181 					*t++ = (0xc0 | ((wc >> 6) & 0x3f));
1182 					*t++ = 0x80 | (wc & 0x3f);
1183 				} else if (wc < 0x10000) {
1184 					*t++ = 0xe0 | (wc >> 12);
1185 					*t++ = 0x80 | ((wc >> 6) & 0x3f);
1186 					*t++ = 0x80 | (wc & 0x3f);
1187 				} else {
1188 					*t++ = 0xf0 | ((wc >> 18) & 7);
1189 					*t++ = 0x80 | ((wc >> 12) & 63);
1190 					*t++ = 0x80 | ((wc >> 6) & 0x3f);
1191 					*t++ = 0x80 | (wc & 0x3f);
1192 				}
1193 			s += n;
1194 			}
1195 		} while (n > 0);
1196 		if (n < 0) {
1197 			free(upp);
1198 			upp = (char*)NULL;
1199 			errno = EILSEQ;
1200 		}
1201 		*t = 0;
1202 	}
1203 	return (upp);
1204 }
1205 
1206 /**
1207  * ntfs_upcase_table_build - build the default upcase table for NTFS
1208  * @uc:		destination buffer where to store the built table
1209  * @uc_len:	size of destination buffer in bytes
1210  *
1211  * ntfs_upcase_table_build() builds the default upcase table for NTFS and
1212  * stores it in the caller supplied buffer @uc of size @uc_len.
1213  *
1214  * Note, @uc_len must be at least 128kiB in size or bad things will happen!
1215  */
1216 void ntfs_upcase_table_build(ntfschar *uc, u32 uc_len)
1217 {
1218 	struct NEWUPPERCASE {
1219 		unsigned short first;
1220 		unsigned short last;
1221 		short diff;
1222 		unsigned char step;
1223 		unsigned char osmajor;
1224 		unsigned char osminor;
1225 	} ;
1226 
1227 	/*
1228 	 *	This is the table as defined by Windows XP
1229 	 */
1230 	static int uc_run_table[][3] = { /* Start, End, Add */
1231 	{0x0061, 0x007B,  -32}, {0x0451, 0x045D, -80}, {0x1F70, 0x1F72,  74},
1232 	{0x00E0, 0x00F7,  -32}, {0x045E, 0x0460, -80}, {0x1F72, 0x1F76,  86},
1233 	{0x00F8, 0x00FF,  -32}, {0x0561, 0x0587, -48}, {0x1F76, 0x1F78, 100},
1234 	{0x0256, 0x0258, -205}, {0x1F00, 0x1F08,   8}, {0x1F78, 0x1F7A, 128},
1235 	{0x028A, 0x028C, -217}, {0x1F10, 0x1F16,   8}, {0x1F7A, 0x1F7C, 112},
1236 	{0x03AC, 0x03AD,  -38}, {0x1F20, 0x1F28,   8}, {0x1F7C, 0x1F7E, 126},
1237 	{0x03AD, 0x03B0,  -37}, {0x1F30, 0x1F38,   8}, {0x1FB0, 0x1FB2,   8},
1238 	{0x03B1, 0x03C2,  -32}, {0x1F40, 0x1F46,   8}, {0x1FD0, 0x1FD2,   8},
1239 	{0x03C2, 0x03C3,  -31}, {0x1F51, 0x1F52,   8}, {0x1FE0, 0x1FE2,   8},
1240 	{0x03C3, 0x03CC,  -32}, {0x1F53, 0x1F54,   8}, {0x1FE5, 0x1FE6,   7},
1241 	{0x03CC, 0x03CD,  -64}, {0x1F55, 0x1F56,   8}, {0x2170, 0x2180, -16},
1242 	{0x03CD, 0x03CF,  -63}, {0x1F57, 0x1F58,   8}, {0x24D0, 0x24EA, -26},
1243 	{0x0430, 0x0450,  -32}, {0x1F60, 0x1F68,   8}, {0xFF41, 0xFF5B, -32},
1244 	{0}
1245 	};
1246 	static int uc_dup_table[][2] = { /* Start, End */
1247 	{0x0100, 0x012F}, {0x01A0, 0x01A6}, {0x03E2, 0x03EF}, {0x04CB, 0x04CC},
1248 	{0x0132, 0x0137}, {0x01B3, 0x01B7}, {0x0460, 0x0481}, {0x04D0, 0x04EB},
1249 	{0x0139, 0x0149}, {0x01CD, 0x01DD}, {0x0490, 0x04BF}, {0x04EE, 0x04F5},
1250 	{0x014A, 0x0178}, {0x01DE, 0x01EF}, {0x04BF, 0x04BF}, {0x04F8, 0x04F9},
1251 	{0x0179, 0x017E}, {0x01F4, 0x01F5}, {0x04C1, 0x04C4}, {0x1E00, 0x1E95},
1252 	{0x018B, 0x018B}, {0x01FA, 0x0218}, {0x04C7, 0x04C8}, {0x1EA0, 0x1EF9},
1253 	{0}
1254 	};
1255 	static int uc_byte_table[][2] = { /* Offset, Value */
1256 	{0x00FF, 0x0178}, {0x01AD, 0x01AC}, {0x01F3, 0x01F1}, {0x0269, 0x0196},
1257 	{0x0183, 0x0182}, {0x01B0, 0x01AF}, {0x0253, 0x0181}, {0x026F, 0x019C},
1258 	{0x0185, 0x0184}, {0x01B9, 0x01B8}, {0x0254, 0x0186}, {0x0272, 0x019D},
1259 	{0x0188, 0x0187}, {0x01BD, 0x01BC}, {0x0259, 0x018F}, {0x0275, 0x019F},
1260 	{0x018C, 0x018B}, {0x01C6, 0x01C4}, {0x025B, 0x0190}, {0x0283, 0x01A9},
1261 	{0x0192, 0x0191}, {0x01C9, 0x01C7}, {0x0260, 0x0193}, {0x0288, 0x01AE},
1262 	{0x0199, 0x0198}, {0x01CC, 0x01CA}, {0x0263, 0x0194}, {0x0292, 0x01B7},
1263 	{0x01A8, 0x01A7}, {0x01DD, 0x018E}, {0x0268, 0x0197},
1264 	{0}
1265 	};
1266 
1267 /*
1268  *		Changes which were applied to later Windows versions
1269  *
1270  *   md5 for $UpCase from Winxp : 6fa3db2468275286210751e869d36373
1271  *                        Vista : 2f03b5a69d486ff3864cecbd07f24440
1272  *                        Win8 :  7ff498a44e45e77374cc7c962b1b92f2
1273  */
1274 	static const struct NEWUPPERCASE newuppercase[] = {
1275 						/* from Windows 6.0 (Vista) */
1276 		{ 0x37b, 0x37d, 0x82, 1, 6, 0 },
1277 		{ 0x1f80, 0x1f87, 0x8, 1, 6, 0 },
1278 		{ 0x1f90, 0x1f97, 0x8, 1, 6, 0 },
1279 		{ 0x1fa0, 0x1fa7, 0x8, 1, 6, 0 },
1280 		{ 0x2c30, 0x2c5e, -0x30, 1, 6, 0 },
1281 		{ 0x2d00, 0x2d25, -0x1c60, 1, 6, 0 },
1282 		{ 0x2c68, 0x2c6c, -0x1, 2, 6, 0 },
1283 		{ 0x219, 0x21f, -0x1, 2, 6, 0 },
1284 		{ 0x223, 0x233, -0x1, 2, 6, 0 },
1285 		{ 0x247, 0x24f, -0x1, 2, 6, 0 },
1286 		{ 0x3d9, 0x3e1, -0x1, 2, 6, 0 },
1287 		{ 0x48b, 0x48f, -0x1, 2, 6, 0 },
1288 		{ 0x4fb, 0x513, -0x1, 2, 6, 0 },
1289 		{ 0x2c81, 0x2ce3, -0x1, 2, 6, 0 },
1290 		{ 0x3f8, 0x3fb, -0x1, 3, 6, 0 },
1291 		{ 0x4c6, 0x4ce, -0x1, 4, 6, 0 },
1292 		{ 0x23c, 0x242, -0x1, 6, 6, 0 },
1293 		{ 0x4ed, 0x4f7, -0x1, 10, 6, 0 },
1294 		{ 0x450, 0x45d, -0x50, 13, 6, 0 },
1295 		{ 0x2c61, 0x2c76, -0x1, 21, 6, 0 },
1296 		{ 0x1fcc, 0x1ffc, -0x9, 48, 6, 0 },
1297 		{ 0x180, 0x180, 0xc3, 1, 6, 0 },
1298 		{ 0x195, 0x195, 0x61, 1, 6, 0 },
1299 		{ 0x19a, 0x19a, 0xa3, 1, 6, 0 },
1300 		{ 0x19e, 0x19e, 0x82, 1, 6, 0 },
1301 		{ 0x1bf, 0x1bf, 0x38, 1, 6, 0 },
1302 		{ 0x1f9, 0x1f9, -0x1, 1, 6, 0 },
1303 		{ 0x23a, 0x23a, 0x2a2b, 1, 6, 0 },
1304 		{ 0x23e, 0x23e, 0x2a28, 1, 6, 0 },
1305 		{ 0x26b, 0x26b, 0x29f7, 1, 6, 0 },
1306 		{ 0x27d, 0x27d, 0x29e7, 1, 6, 0 },
1307 		{ 0x280, 0x280, -0xda, 1, 6, 0 },
1308 		{ 0x289, 0x289, -0x45, 1, 6, 0 },
1309 		{ 0x28c, 0x28c, -0x47, 1, 6, 0 },
1310 		{ 0x3f2, 0x3f2, 0x7, 1, 6, 0 },
1311 		{ 0x4cf, 0x4cf, -0xf, 1, 6, 0 },
1312 		{ 0x1d7d, 0x1d7d, 0xee6, 1, 6, 0 },
1313 		{ 0x1fb3, 0x1fb3, 0x9, 1, 6, 0 },
1314 		{ 0x214e, 0x214e, -0x1c, 1, 6, 0 },
1315 		{ 0x2184, 0x2184, -0x1, 1, 6, 0 },
1316 						/* from Windows 6.1 (Win7) */
1317 		{ 0x23a, 0x23e,  0x0, 4, 6, 1 },
1318 		{ 0x250, 0x250,  0x2a1f, 2, 6, 1 },
1319 		{ 0x251, 0x251,  0x2a1c, 2, 6, 1 },
1320 		{ 0x271, 0x271,  0x29fd, 2, 6, 1 },
1321 		{ 0x371, 0x373, -0x1, 2, 6, 1 },
1322 		{ 0x377, 0x377, -0x1, 2, 6, 1 },
1323 		{ 0x3c2, 0x3c2,  0x0, 2, 6, 1 },
1324 		{ 0x3d7, 0x3d7, -0x8, 2, 6, 1 },
1325 		{ 0x515, 0x523, -0x1, 2, 6, 1 },
1326 			/* below, -0x75fc stands for 0x8a04 and truncation */
1327 		{ 0x1d79, 0x1d79, -0x75fc, 2, 6, 1 },
1328 		{ 0x1efb, 0x1eff, -0x1, 2, 6, 1 },
1329 		{ 0x1fc3, 0x1ff3,  0x9, 48, 6, 1 },
1330 		{ 0x1fcc, 0x1ffc,  0x0, 48, 6, 1 },
1331 		{ 0x2c65, 0x2c65, -0x2a2b, 2, 6, 1 },
1332 		{ 0x2c66, 0x2c66, -0x2a28, 2, 6, 1 },
1333 		{ 0x2c73, 0x2c73, -0x1, 2, 6, 1 },
1334 		{ 0xa641, 0xa65f, -0x1, 2, 6, 1 },
1335 		{ 0xa663, 0xa66d, -0x1, 2, 6, 1 },
1336 		{ 0xa681, 0xa697, -0x1, 2, 6, 1 },
1337 		{ 0xa723, 0xa72f, -0x1, 2, 6, 1 },
1338 		{ 0xa733, 0xa76f, -0x1, 2, 6, 1 },
1339 		{ 0xa77a, 0xa77c, -0x1, 2, 6, 1 },
1340 		{ 0xa77f, 0xa787, -0x1, 2, 6, 1 },
1341 		{ 0xa78c, 0xa78c, -0x1, 2, 6, 1 },
1342 							/* end mark */
1343 		{ 0 }
1344 	} ;
1345 
1346 	int i, r;
1347 	int k, off;
1348 	const struct NEWUPPERCASE *puc;
1349 
1350 	memset((char*)uc, 0, uc_len);
1351 	uc_len >>= 1;
1352 	if (uc_len > 65536)
1353 		uc_len = 65536;
1354 	for (i = 0; (u32)i < uc_len; i++)
1355 		uc[i] = cpu_to_le16(i);
1356 	for (r = 0; uc_run_table[r][0]; r++) {
1357 		off = uc_run_table[r][2];
1358 		for (i = uc_run_table[r][0]; i < uc_run_table[r][1]; i++)
1359 			uc[i] = cpu_to_le16(i + off);
1360 	}
1361 	for (r = 0; uc_dup_table[r][0]; r++)
1362 		for (i = uc_dup_table[r][0]; i < uc_dup_table[r][1]; i += 2)
1363 			uc[i + 1] = cpu_to_le16(i);
1364 	for (r = 0; uc_byte_table[r][0]; r++) {
1365 		k = uc_byte_table[r][1];
1366 		uc[uc_byte_table[r][0]] = cpu_to_le16(k);
1367 	}
1368 	for (r=0; newuppercase[r].first; r++) {
1369 		puc = &newuppercase[r];
1370 		if ((puc->osmajor < UPCASE_MAJOR)
1371 		  || ((puc->osmajor == UPCASE_MAJOR)
1372 		     && (puc->osminor <= UPCASE_MINOR))) {
1373 			off = puc->diff;
1374 			for (i = puc->first; i <= puc->last; i += puc->step)
1375 				uc[i] = cpu_to_le16(i + off);
1376 		}
1377 	}
1378 }
1379 
1380 /*
1381  *		Allocate and build the default upcase table
1382  *
1383  *	Returns the number of entries
1384  *		0 if failed
1385  */
1386 
1387 #define UPCASE_LEN 65536 /* default number of entries in upcase */
1388 
1389 u32 ntfs_upcase_build_default(ntfschar **upcase)
1390 {
1391 	u32 upcase_len = 0;
1392 
1393 	*upcase = (ntfschar*)ntfs_malloc(UPCASE_LEN*2);
1394 	if (*upcase) {
1395 		ntfs_upcase_table_build(*upcase, UPCASE_LEN*2);
1396 		upcase_len = UPCASE_LEN;
1397 	}
1398 	return (upcase_len);
1399 }
1400 
1401 /*
1402  *		Build a table for converting to lower case
1403  *
1404  *	This is only meaningful when there is a single lower case
1405  *	character leading to an upper case one, and currently the
1406  *	only exception is the greek letter sigma which has a single
1407  *	upper case glyph (code U+03A3), but two lower case glyphs
1408  *	(code U+03C3 and U+03C2, the latter to be used at the end
1409  *	of a word). In the following implementation the upper case
1410  *	sigma will be lowercased as U+03C3.
1411  */
1412 
1413 ntfschar *ntfs_locase_table_build(const ntfschar *uc, u32 uc_cnt)
1414 {
1415 	ntfschar *lc;
1416 	u32 upp;
1417 	u32 i;
1418 
1419 	lc = (ntfschar*)ntfs_malloc(uc_cnt*sizeof(ntfschar));
1420 	if (lc) {
1421 		for (i=0; i<uc_cnt; i++)
1422 			lc[i] = cpu_to_le16(i);
1423 		for (i=0; i<uc_cnt; i++) {
1424 			upp = le16_to_cpu(uc[i]);
1425 			if ((upp != i) && (upp < uc_cnt))
1426 				lc[upp] = cpu_to_le16(i);
1427 		}
1428 	} else
1429 		ntfs_log_error("Could not build the locase table\n");
1430 	return (lc);
1431 }
1432 
1433 /**
1434  * ntfs_str2ucs - convert a string to a valid NTFS file name
1435  * @s:		input string
1436  * @len:	length of output buffer in Unicode characters
1437  *
1438  * Convert the input @s string into the corresponding little endian,
1439  * 2-byte Unicode string. The length of the converted string is less
1440  * or equal to the maximum length allowed by the NTFS format (255).
1441  *
1442  * If @s is NULL then return AT_UNNAMED.
1443  *
1444  * On success the function returns the Unicode string in an allocated
1445  * buffer and the caller is responsible to free it when it's not needed
1446  * anymore.
1447  *
1448  * On error NULL is returned and errno is set to the error code.
1449  */
1450 ntfschar *ntfs_str2ucs(const char *s, int *len)
1451 {
1452 	ntfschar *ucs = NULL;
1453 
1454 	if (s && ((*len = ntfs_mbstoucs(s, &ucs)) == -1)) {
1455 		ntfs_log_perror("Couldn't convert '%s' to Unicode", s);
1456 		return NULL;
1457 	}
1458 	if (*len > NTFS_MAX_NAME_LEN) {
1459 		free(ucs);
1460 		errno = ENAMETOOLONG;
1461 		return NULL;
1462 	}
1463 	if (!ucs || !*len) {
1464 		ucs  = AT_UNNAMED;
1465 		*len = 0;
1466 	}
1467 	return ucs;
1468 }
1469 
1470 /**
1471  * ntfs_ucsfree - free memory allocated by ntfs_str2ucs()
1472  * @ucs		input string to be freed
1473  *
1474  * Free memory at @ucs and which was allocated by ntfs_str2ucs.
1475  *
1476  * Return value: none.
1477  */
1478 void ntfs_ucsfree(ntfschar *ucs)
1479 {
1480 	if (ucs && (ucs != AT_UNNAMED))
1481 		free(ucs);
1482 }
1483 
1484 /*
1485  *		Check whether a name contains no chars forbidden
1486  *	for DOS or Win32 use
1487  *
1488  *	If @strict is TRUE, then trailing dots and spaces are forbidden.
1489  *	These names are technically allowed in the Win32 namespace, but
1490  *	they can be problematic.  See comment for FILE_NAME_WIN32.
1491  *
1492  *	If there is a bad char, errno is set to EINVAL
1493  */
1494 
1495 BOOL ntfs_forbidden_chars(const ntfschar *name, int len, BOOL strict)
1496 {
1497 	BOOL forbidden;
1498 	int ch;
1499 	int i;
1500 	static const u32 mainset = (1L << ('\"' - 0x20))
1501 			| (1L << ('*' - 0x20))
1502 			| (1L << ('/' - 0x20))
1503 			| (1L << (':' - 0x20))
1504 			| (1L << ('<' - 0x20))
1505 			| (1L << ('>' - 0x20))
1506 			| (1L << ('?' - 0x20));
1507 
1508 	forbidden = (len == 0) ||
1509 		    (strict && (name[len-1] == const_cpu_to_le16(' ') ||
1510 				name[len-1] == const_cpu_to_le16('.')));
1511 	for (i=0; i<len; i++) {
1512 		ch = le16_to_cpu(name[i]);
1513 		if ((ch < 0x20)
1514 		    || ((ch < 0x40)
1515 			&& ((1L << (ch - 0x20)) & mainset))
1516 		    || (ch == '\\')
1517 		    || (ch == '|'))
1518 			forbidden = TRUE;
1519 	}
1520 	if (forbidden)
1521 		errno = EINVAL;
1522 	return (forbidden);
1523 }
1524 
1525 /*
1526  *		Check whether a name contains no forbidden chars and
1527  *	is not a reserved name for DOS or Win32 use
1528  *
1529  *	The reserved names are CON, PRN, AUX, NUL, COM1..COM9, LPT1..LPT9
1530  *	with no suffix or any suffix.
1531  *
1532  *	If @strict is TRUE, then trailing dots and spaces are forbidden.
1533  *	These names are technically allowed in the Win32 namespace, but
1534  *	they can be problematic.  See comment for FILE_NAME_WIN32.
1535  *
1536  *	If the name is forbidden, errno is set to EINVAL
1537  */
1538 
1539 BOOL ntfs_forbidden_names(ntfs_volume *vol, const ntfschar *name, int len,
1540 			  BOOL strict)
1541 {
1542 	BOOL forbidden;
1543 	int h;
1544 	static const ntfschar dot = const_cpu_to_le16('.');
1545 	static const ntfschar con[] = { const_cpu_to_le16('c'),
1546 			const_cpu_to_le16('o'), const_cpu_to_le16('n') };
1547 	static const ntfschar prn[] = { const_cpu_to_le16('p'),
1548 			const_cpu_to_le16('r'), const_cpu_to_le16('n') };
1549 	static const ntfschar aux[] = { const_cpu_to_le16('a'),
1550 			const_cpu_to_le16('u'), const_cpu_to_le16('x') };
1551 	static const ntfschar nul[] = { const_cpu_to_le16('n'),
1552 			const_cpu_to_le16('u'), const_cpu_to_le16('l') };
1553 	static const ntfschar com[] = { const_cpu_to_le16('c'),
1554 			const_cpu_to_le16('o'), const_cpu_to_le16('m') };
1555 	static const ntfschar lpt[] = { const_cpu_to_le16('l'),
1556 			const_cpu_to_le16('p'), const_cpu_to_le16('t') };
1557 
1558 	forbidden = ntfs_forbidden_chars(name, len, strict);
1559 	if (!forbidden && (len >= 3)) {
1560 		/*
1561 		 * Rough hash check to tell whether the first couple of chars
1562 		 * may be one of CO PR AU NU LP or lowercase variants.
1563 		 */
1564 		h = ((le16_to_cpu(name[0]) & 31)*48)
1565 				^ ((le16_to_cpu(name[1]) & 31)*165);
1566 		if ((h % 23) == 17) {
1567 			/* do a full check, depending on the third char */
1568 			switch (le16_to_cpu(name[2]) & ~0x20) {
1569 			case 'N' :
1570 				if (((len == 3) || (name[3] == dot))
1571 				    && (!ntfs_ucsncasecmp(name, con, 3,
1572 						vol->upcase, vol->upcase_len)
1573 					|| !ntfs_ucsncasecmp(name, prn, 3,
1574 						vol->upcase, vol->upcase_len)))
1575 					forbidden = TRUE;
1576 				break;
1577 			case 'X' :
1578 				if (((len == 3) || (name[3] == dot))
1579 				    && !ntfs_ucsncasecmp(name, aux, 3,
1580 						vol->upcase, vol->upcase_len))
1581 					forbidden = TRUE;
1582 				break;
1583 			case 'L' :
1584 				if (((len == 3) || (name[3] == dot))
1585 				    && !ntfs_ucsncasecmp(name, nul, 3,
1586 						vol->upcase, vol->upcase_len))
1587 					forbidden = TRUE;
1588 				break;
1589 			case 'M' :
1590 				if ((len > 3)
1591 				    && (le16_to_cpu(name[3]) >= '1')
1592 				    && (le16_to_cpu(name[3]) <= '9')
1593 				    && ((len == 4) || (name[4] == dot))
1594 				    && !ntfs_ucsncasecmp(name, com, 3,
1595 						vol->upcase, vol->upcase_len))
1596 					forbidden = TRUE;
1597 				break;
1598 			case 'T' :
1599 				if ((len > 3)
1600 				    && (le16_to_cpu(name[3]) >= '1')
1601 				    && (le16_to_cpu(name[3]) <= '9')
1602 				    && ((len == 4) || (name[4] == dot))
1603 				    && !ntfs_ucsncasecmp(name, lpt, 3,
1604 						vol->upcase, vol->upcase_len))
1605 					forbidden = TRUE;
1606 				break;
1607 			}
1608 		}
1609 	}
1610 
1611 	if (forbidden)
1612 		errno = EINVAL;
1613 	return (forbidden);
1614 }
1615 
1616 /*
1617  *		Check whether the same name can be used as a DOS and
1618  *	a Win32 name
1619  *
1620  *	The names must be the same, or the short name the uppercase
1621  *	variant of the long name
1622  */
1623 
1624 BOOL ntfs_collapsible_chars(ntfs_volume *vol,
1625 			const ntfschar *shortname, int shortlen,
1626 			const ntfschar *longname, int longlen)
1627 {
1628 	BOOL collapsible;
1629 	unsigned int ch;
1630 	unsigned int cs;
1631 	int i;
1632 
1633 	collapsible = shortlen == longlen;
1634 	for (i=0; collapsible && (i<shortlen); i++) {
1635 		ch = le16_to_cpu(longname[i]);
1636 		cs = le16_to_cpu(shortname[i]);
1637 		if ((cs != ch)
1638 		    && ((ch >= vol->upcase_len)
1639 			|| (cs >= vol->upcase_len)
1640 			|| (vol->upcase[cs] != vol->upcase[ch])))
1641 				collapsible = FALSE;
1642 	}
1643 	return (collapsible);
1644 }
1645 
1646 /*
1647  * Define the character encoding to be used.
1648  * Use UTF-8 unless specified otherwise.
1649  */
1650 
1651 int ntfs_set_char_encoding(const char *locale)
1652 {
1653 	use_utf8 = 0;
1654 	if (!locale || strstr(locale,"utf8") || strstr(locale,"UTF8")
1655 	    || strstr(locale,"utf-8") || strstr(locale,"UTF-8"))
1656 		use_utf8 = 1;
1657 	else
1658 #ifndef __HAIKU__
1659 		if (setlocale(LC_ALL, locale))
1660 			use_utf8 = 0;
1661 		else
1662 #endif
1663 		{
1664 			ntfs_log_error("Invalid locale, encoding to UTF-8\n");
1665 			use_utf8 = 1;
1666 	 	}
1667 	return 0; /* always successful */
1668 }
1669 
1670 #if defined(__APPLE__) || defined(__DARWIN__)
1671 
1672 int ntfs_macosx_normalize_filenames(int normalize) {
1673 #ifdef ENABLE_NFCONV
1674 	if (normalize == 0 || normalize == 1) {
1675 		nfconvert_utf8 = normalize;
1676 		return 0;
1677 	}
1678 	else {
1679 		return -1;
1680 	}
1681 #else
1682 	return -1;
1683 #endif /* ENABLE_NFCONV */
1684 }
1685 
1686 int ntfs_macosx_normalize_utf8(const char *utf8_string, char **target,
1687 		int composed)
1688 {
1689 #ifdef ENABLE_NFCONV
1690 	/* For this code to compile, the CoreFoundation framework must be fed to
1691 	 * the linker. */
1692 	CFStringRef cfSourceString;
1693 	CFMutableStringRef cfMutableString;
1694 	CFRange rangeToProcess;
1695 	CFIndex requiredBufferLength;
1696 	char *result = NULL;
1697 	int resultLength = -1;
1698 
1699 	/* Convert the UTF-8 string to a CFString. */
1700 	cfSourceString = CFStringCreateWithCString(kCFAllocatorDefault,
1701 		utf8_string, kCFStringEncodingUTF8);
1702 	if (cfSourceString == NULL) {
1703 		ntfs_log_error("CFStringCreateWithCString failed!\n");
1704 		return -2;
1705 	}
1706 
1707 	/* Create a mutable string from cfSourceString that we are free to
1708 	 * modify. */
1709 	cfMutableString = CFStringCreateMutableCopy(kCFAllocatorDefault, 0,
1710 		cfSourceString);
1711 	CFRelease(cfSourceString); /* End-of-life. */
1712 	if (cfMutableString == NULL) {
1713 		ntfs_log_error("CFStringCreateMutableCopy failed!\n");
1714 		return -3;
1715 	}
1716 
1717 	/* Normalize the mutable string to the desired normalization form. */
1718 	CFStringNormalize(cfMutableString, (composed != 0 ?
1719 		kCFStringNormalizationFormC : kCFStringNormalizationFormD));
1720 
1721 	/* Store the resulting string in a '\0'-terminated UTF-8 encoded char*
1722 	 * buffer. */
1723 	rangeToProcess = CFRangeMake(0, CFStringGetLength(cfMutableString));
1724 	if (CFStringGetBytes(cfMutableString, rangeToProcess,
1725 		kCFStringEncodingUTF8, 0, false, NULL, 0,
1726 		&requiredBufferLength) > 0)
1727 	{
1728 		resultLength = sizeof(char) * (requiredBufferLength + 1);
1729 		result = ntfs_calloc(resultLength);
1730 
1731 		if (result != NULL) {
1732 			if (CFStringGetBytes(cfMutableString, rangeToProcess,
1733 				kCFStringEncodingUTF8, 0, false,
1734 				(UInt8*) result, resultLength - 1,
1735 				&requiredBufferLength) <= 0)
1736 			{
1737 				ntfs_log_error("Could not perform UTF-8 "
1738 					"conversion of normalized "
1739 					"CFMutableString.\n");
1740 				free(result);
1741 				result = NULL;
1742 			}
1743 		}
1744 		else {
1745 			ntfs_log_error("Could not perform a ntfs_calloc of %d "
1746 				"bytes for char *result.\n", resultLength);
1747 		}
1748 	}
1749 	else {
1750 		ntfs_log_error("Could not perform check for required length of "
1751 			"UTF-8 conversion of normalized CFMutableString.\n");
1752 	}
1753 
1754 	CFRelease(cfMutableString);
1755 
1756 	if (result != NULL) {
1757 	 	*target = result;
1758 		return resultLength - 1;
1759 	}
1760 	else {
1761 		return -1;
1762 	}
1763 #else
1764 	return -1;
1765 #endif /* ENABLE_NFCONV */
1766 }
1767 #endif /* defined(__APPLE__) || defined(__DARWIN__) */
1768