xref: /haiku/src/system/libroot/add-ons/icu/ICUCollateData.cpp (revision 9e25244c5e9051f6cd333820d6332397361abd6c)
1 /*
2  * Copyright 2010-2011, Oliver Tappe, zooey@hirschkaefer.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "ICUCollateData.h"
8 
9 #include <string.h>
10 #include <strings.h>
11 #include <wchar.h>
12 
13 #include <unicode/unistr.h>
14 
15 #include <AutoDeleter.h>
16 
17 
18 U_NAMESPACE_USE
19 
20 
21 namespace BPrivate {
22 namespace Libroot {
23 
24 
25 ICUCollateData::ICUCollateData(pthread_key_t tlsKey)
26 	:
27 	inherited(tlsKey),
28 	fCollator(NULL)
29 {
30 }
31 
32 
33 ICUCollateData::~ICUCollateData()
34 {
35 	delete fCollator;
36 }
37 
38 
39 status_t
40 ICUCollateData::SetTo(const Locale& locale, const char* posixLocaleName)
41 {
42 	status_t result = inherited::SetTo(locale, posixLocaleName);
43 
44 	if (result == B_OK) {
45 		UErrorCode icuStatus = U_ZERO_ERROR;
46 		delete fCollator;
47 		fCollator = Collator::createInstance(fLocale, icuStatus);
48 		if (!U_SUCCESS(icuStatus))
49 			return B_NO_MEMORY;
50 	}
51 
52 	return result;
53 }
54 
55 
56 status_t
57 ICUCollateData::SetToPosix()
58 {
59 	status_t result = inherited::SetToPosix();
60 
61 	if (result == B_OK) {
62 		delete fCollator;
63 		fCollator = NULL;
64 	}
65 
66 	return result;
67 }
68 
69 
70 status_t
71 ICUCollateData::Strcoll(const char* a, const char* b, int& result)
72 {
73 	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
74 		// handle POSIX here as the collator ICU uses for that (english) is
75 		// incompatible in too many ways
76 		result = strcmp(a, b);
77 		for (const char* aIter = a; *aIter != 0; ++aIter) {
78 			if (*aIter < 0)
79 				return B_BAD_VALUE;
80 		}
81 		for (const char* bIter = b; *bIter != 0; ++bIter) {
82 			if (*bIter < 0)
83 				return B_BAD_VALUE;
84 		}
85 		return B_OK;
86 	}
87 
88 	status_t status = B_OK;
89 	UErrorCode icuStatus = U_ZERO_ERROR;
90 
91 	if (strcasecmp(fGivenCharset, "utf-8") == 0) {
92 		UCharIterator aIter, bIter;
93 		uiter_setUTF8(&aIter, a, -1);
94 		uiter_setUTF8(&bIter, b, -1);
95 
96 		result = fCollator->compare(aIter, bIter, icuStatus);
97 	} else {
98 		UnicodeString unicodeA;
99 		UnicodeString unicodeB;
100 
101 		if (_ToUnicodeString(a, unicodeA) != B_OK
102 			|| _ToUnicodeString(b, unicodeB) != B_OK) {
103 			status = B_BAD_VALUE;
104 		}
105 
106 		result = fCollator->compare(unicodeA, unicodeB, icuStatus);
107 	}
108 
109 	if (!U_SUCCESS(icuStatus))
110 		status = B_BAD_VALUE;
111 
112 	return status;
113 }
114 
115 
116 status_t
117 ICUCollateData::Strxfrm(char* out, const char* in, size_t size, size_t& outSize)
118 {
119 	if (in == NULL) {
120 		outSize = 0;
121 		return B_OK;
122 	}
123 
124 	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
125 		// handle POSIX here as the collator ICU uses for that (english) is
126 		// incompatible in too many ways
127 		outSize = strlcpy(out, in, size);
128 		for (const char* inIter = in; *inIter != 0; ++inIter) {
129 			if (*inIter < 0)
130 				return B_BAD_VALUE;
131 		}
132 		return B_OK;
133 	}
134 
135 	UnicodeString unicodeIn;
136 	if (_ToUnicodeString(in, unicodeIn) != B_OK)
137 		return B_BAD_VALUE;
138 
139 	outSize = fCollator->getSortKey(unicodeIn, (uint8_t*)out, size);
140 
141 	return B_OK;
142 }
143 
144 
145 status_t
146 ICUCollateData::Wcscoll(const wchar_t* a, const wchar_t* b, int& result)
147 {
148 	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
149 		// handle POSIX here as the collator ICU uses for that (english) is
150 		// incompatible in too many ways
151 		result = wcscmp(a, b);
152 		for (const wchar_t* aIter = a; *aIter != 0; ++aIter) {
153 			if (*aIter > 127)
154 				return B_BAD_VALUE;
155 		}
156 		for (const wchar_t* bIter = b; *bIter != 0; ++bIter) {
157 			if (*bIter > 127)
158 				return B_BAD_VALUE;
159 		}
160 		return B_OK;
161 	}
162 
163 	UnicodeString unicodeA = UnicodeString::fromUTF32((UChar32*)a, -1);
164 	UnicodeString unicodeB = UnicodeString::fromUTF32((UChar32*)b, -1);
165 
166 	UErrorCode icuStatus = U_ZERO_ERROR;
167 	result = fCollator->compare(unicodeA, unicodeB, icuStatus);
168 
169 	if (!U_SUCCESS(icuStatus))
170 		return B_BAD_VALUE;
171 
172 	return B_OK;
173 }
174 
175 
176 status_t
177 ICUCollateData::Wcsxfrm(wchar_t* out, const wchar_t* in, size_t outSize,
178 	size_t& requiredSize)
179 {
180 	if (in == NULL) {
181 		requiredSize = 0;
182 		return B_OK;
183 	}
184 
185 	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
186 		// handle POSIX here as the collator ICU uses for that (english) is
187 		// incompatible in too many ways
188 		requiredSize = wcslcpy(out, in, outSize);
189 		for (const wchar_t* inIter = in; *inIter != 0; ++inIter) {
190 			if (*inIter > 127)
191 				return B_BAD_VALUE;
192 		}
193 		return B_OK;
194 	}
195 
196 	UnicodeString unicodeIn = UnicodeString::fromUTF32((UChar32*)in, -1);
197 	requiredSize = fCollator->getSortKey(unicodeIn, NULL, 0);
198 
199 	if (outSize == 0)
200 		return B_OK;
201 
202 	uint8_t* buffer = (uint8_t*)out;
203 	fCollator->getSortKey(unicodeIn, buffer, outSize);
204 
205 	// convert 1-byte characters to 4-byte wide characters:
206 	for (size_t i = 0; i < outSize; ++i)
207 		out[outSize - 1 - i] = buffer[outSize - 1 - i];
208 
209 	return B_OK;
210 }
211 
212 
213 status_t
214 ICUCollateData::_ToUnicodeString(const char* in, UnicodeString& out)
215 {
216 	out.remove();
217 
218 	if (in == NULL)
219 		return B_OK;
220 
221 	size_t inLen = strlen(in);
222 	if (inLen == 0)
223 		return B_OK;
224 
225 	UConverter* converter;
226 	status_t result = _GetConverter(converter);
227 	if (result != B_OK)
228 		return result;
229 
230 	UErrorCode icuStatus = U_ZERO_ERROR;
231 	int32_t outLen = ucnv_toUChars(converter, NULL, 0, in, inLen, &icuStatus);
232 	if (icuStatus != U_BUFFER_OVERFLOW_ERROR)
233 		return B_BAD_VALUE;
234 	if (outLen < 0)
235 		return B_ERROR;
236 	if (outLen == 0)
237 		return B_OK;
238 
239 	UChar* outBuf = out.getBuffer(outLen + 1);
240 	icuStatus = U_ZERO_ERROR;
241 	outLen
242 		= ucnv_toUChars(converter, outBuf, outLen + 1, in, inLen, &icuStatus);
243 	if (!U_SUCCESS(icuStatus)) {
244 		out.releaseBuffer(0);
245 		return B_BAD_VALUE;
246 	}
247 
248 	out.releaseBuffer(outLen);
249 
250 	return B_OK;
251 }
252 
253 
254 }	// namespace Libroot
255 }	// namespace BPrivate
256