xref: /haiku/src/system/libroot/add-ons/icu/ICUCollateData.cpp (revision 445d4fd926c569e7b9ae28017da86280aaecbae2)
1 /*
2  * Copyright 2010-2011, Oliver Tappe, zooey@hirschkaefer.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "ICUCollateData.h"
8 
9 #include <assert.h>
10 #include <string.h>
11 #include <strings.h>
12 #include <wchar.h>
13 
14 #include <unicode/unistr.h>
15 
16 #include <AutoDeleter.h>
17 
18 
19 U_NAMESPACE_USE
20 
21 
22 namespace BPrivate {
23 namespace Libroot {
24 
25 
26 ICUCollateData::ICUCollateData(pthread_key_t tlsKey)
27 	:
28 	inherited(tlsKey),
29 	fCollator(NULL)
30 {
31 }
32 
33 
34 ICUCollateData::~ICUCollateData()
35 {
36 	delete fCollator;
37 }
38 
39 
40 status_t
41 ICUCollateData::SetTo(const Locale& locale, const char* posixLocaleName)
42 {
43 	status_t result = inherited::SetTo(locale, posixLocaleName);
44 
45 	if (result == B_OK) {
46 		UErrorCode icuStatus = U_ZERO_ERROR;
47 		delete fCollator;
48 		fCollator = Collator::createInstance(fLocale, icuStatus);
49 		if (!U_SUCCESS(icuStatus))
50 			return B_NO_MEMORY;
51 	}
52 
53 	return result;
54 }
55 
56 
57 status_t
58 ICUCollateData::SetToPosix()
59 {
60 	status_t result = inherited::SetToPosix();
61 
62 	if (result == B_OK) {
63 		delete fCollator;
64 		fCollator = NULL;
65 	}
66 
67 	return result;
68 }
69 
70 
71 status_t
72 ICUCollateData::Strcoll(const char* a, const char* b, int& result)
73 {
74 	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
75 		// handle POSIX here as the collator ICU uses for that (english) is
76 		// incompatible in too many ways
77 		result = strcmp(a, b);
78 		for (const char* aIter = a; *aIter != 0; ++aIter) {
79 			if (*aIter < 0)
80 				return B_BAD_VALUE;
81 		}
82 		for (const char* bIter = b; *bIter != 0; ++bIter) {
83 			if (*bIter < 0)
84 				return B_BAD_VALUE;
85 		}
86 		return B_OK;
87 	}
88 
89 	status_t status = B_OK;
90 	UErrorCode icuStatus = U_ZERO_ERROR;
91 
92 	if (strcasecmp(fGivenCharset, "utf-8") == 0) {
93 		UCharIterator aIter, bIter;
94 		uiter_setUTF8(&aIter, a, -1);
95 		uiter_setUTF8(&bIter, b, -1);
96 
97 		result = fCollator->compare(aIter, bIter, icuStatus);
98 	} else {
99 		UnicodeString unicodeA;
100 		UnicodeString unicodeB;
101 
102 		if (_ToUnicodeString(a, unicodeA) != B_OK
103 			|| _ToUnicodeString(b, unicodeB) != B_OK) {
104 			status = B_BAD_VALUE;
105 		}
106 
107 		result = fCollator->compare(unicodeA, unicodeB, icuStatus);
108 	}
109 
110 	if (!U_SUCCESS(icuStatus))
111 		status = B_BAD_VALUE;
112 
113 	return status;
114 }
115 
116 
117 status_t
118 ICUCollateData::Strxfrm(char* out, const char* in,
119 	size_t outSize, size_t& requiredSize)
120 {
121 	if (in == NULL) {
122 		requiredSize = 0;
123 		return B_OK;
124 	}
125 
126 	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
127 		// handle POSIX here as the collator ICU uses for that (english) is
128 		// incompatible in too many ways
129 		requiredSize = strlcpy(out, in, outSize);
130 		for (const char* inIter = in; *inIter != 0; ++inIter) {
131 			if (*inIter < 0)
132 				return B_BAD_VALUE;
133 		}
134 		return B_OK;
135 	}
136 
137 	UnicodeString unicodeIn;
138 	if (_ToUnicodeString(in, unicodeIn) != B_OK)
139 		return B_BAD_VALUE;
140 
141 	requiredSize = fCollator->getSortKey(unicodeIn, (uint8_t*)out, outSize);
142 
143 	// Do not include terminating NULL byte in the required-size.
144 	if (requiredSize > 0) {
145 		if (outSize >= requiredSize)
146 			assert(out[requiredSize - 1] == '\0');
147 		requiredSize--;
148 	}
149 
150 	return B_OK;
151 }
152 
153 
154 status_t
155 ICUCollateData::Wcscoll(const wchar_t* a, const wchar_t* b, int& result)
156 {
157 	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
158 		// handle POSIX here as the collator ICU uses for that (english) is
159 		// incompatible in too many ways
160 		result = wcscmp(a, b);
161 		for (const wchar_t* aIter = a; *aIter != 0; ++aIter) {
162 			if (*aIter > 127)
163 				return B_BAD_VALUE;
164 		}
165 		for (const wchar_t* bIter = b; *bIter != 0; ++bIter) {
166 			if (*bIter > 127)
167 				return B_BAD_VALUE;
168 		}
169 		return B_OK;
170 	}
171 
172 	UnicodeString unicodeA = UnicodeString::fromUTF32((UChar32*)a, -1);
173 	UnicodeString unicodeB = UnicodeString::fromUTF32((UChar32*)b, -1);
174 
175 	UErrorCode icuStatus = U_ZERO_ERROR;
176 	result = fCollator->compare(unicodeA, unicodeB, icuStatus);
177 
178 	if (!U_SUCCESS(icuStatus))
179 		return B_BAD_VALUE;
180 
181 	return B_OK;
182 }
183 
184 
185 status_t
186 ICUCollateData::Wcsxfrm(wchar_t* out, const wchar_t* in, size_t outSize,
187 	size_t& requiredSize)
188 {
189 	if (in == NULL) {
190 		requiredSize = 0;
191 		return B_OK;
192 	}
193 
194 	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
195 		// handle POSIX here as the collator ICU uses for that (english) is
196 		// incompatible in too many ways
197 		requiredSize = wcslcpy(out, in, outSize);
198 		for (const wchar_t* inIter = in; *inIter != 0; ++inIter) {
199 			if (*inIter > 127)
200 				return B_BAD_VALUE;
201 		}
202 		return B_OK;
203 	}
204 
205 	UnicodeString unicodeIn = UnicodeString::fromUTF32((UChar32*)in, -1);
206 	requiredSize = fCollator->getSortKey(unicodeIn, NULL, 0);
207 
208 	if (outSize == 0)
209 		return B_OK;
210 
211 	uint8_t* buffer = (uint8_t*)out;
212 	fCollator->getSortKey(unicodeIn, buffer, outSize);
213 
214 	// convert 1-byte characters to 4-byte wide characters:
215 	for (size_t i = 0; i < outSize; ++i)
216 		out[outSize - 1 - i] = buffer[outSize - 1 - i];
217 
218 	// Do not include terminating NULL character in the required-size.
219 	if (requiredSize > 0) {
220 		if (outSize >= requiredSize)
221 			assert(out[requiredSize - 1] == 0);
222 		requiredSize--;
223 	}
224 
225 	return B_OK;
226 }
227 
228 
229 status_t
230 ICUCollateData::_ToUnicodeString(const char* in, UnicodeString& out)
231 {
232 	out.remove();
233 
234 	if (in == NULL)
235 		return B_OK;
236 
237 	size_t inLen = strlen(in);
238 	if (inLen == 0)
239 		return B_OK;
240 
241 	UConverter* converter;
242 	status_t result = _GetConverter(converter);
243 	if (result != B_OK)
244 		return result;
245 
246 	UErrorCode icuStatus = U_ZERO_ERROR;
247 	int32_t outLen = ucnv_toUChars(converter, NULL, 0, in, inLen, &icuStatus);
248 	if (icuStatus != U_BUFFER_OVERFLOW_ERROR)
249 		return B_BAD_VALUE;
250 	if (outLen < 0)
251 		return B_ERROR;
252 	if (outLen == 0)
253 		return B_OK;
254 
255 	UChar* outBuf = out.getBuffer(outLen + 1);
256 	icuStatus = U_ZERO_ERROR;
257 	outLen
258 		= ucnv_toUChars(converter, outBuf, outLen + 1, in, inLen, &icuStatus);
259 	if (!U_SUCCESS(icuStatus)) {
260 		out.releaseBuffer(0);
261 		return B_BAD_VALUE;
262 	}
263 
264 	out.releaseBuffer(outLen);
265 
266 	return B_OK;
267 }
268 
269 
270 }	// namespace Libroot
271 }	// namespace BPrivate
272