xref: /haiku/src/system/libroot/add-ons/icu/ICUCollateData.cpp (revision 1026b0a1a76dc88927bb8175c470f638dc5464ee)
1 /*
2  * Copyright 2010-2011, Oliver Tappe, zooey@hirschkaefer.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "ICUCollateData.h"
8 
9 #include <string.h>
10 #include <wchar.h>
11 
12 #include <unicode/unistr.h>
13 
14 #include <AutoDeleter.h>
15 
16 
17 namespace BPrivate {
18 namespace Libroot {
19 
20 
21 ICUCollateData::ICUCollateData(pthread_key_t tlsKey)
22 	:
23 	inherited(tlsKey),
24 	fCollator(NULL)
25 {
26 }
27 
28 
29 ICUCollateData::~ICUCollateData()
30 {
31 	delete fCollator;
32 }
33 
34 
35 status_t
36 ICUCollateData::SetTo(const Locale& locale, const char* posixLocaleName)
37 {
38 	status_t result = inherited::SetTo(locale, posixLocaleName);
39 
40 	if (result == B_OK) {
41 		UErrorCode icuStatus = U_ZERO_ERROR;
42 		delete fCollator;
43 		fCollator = Collator::createInstance(fLocale, icuStatus);
44 		if (!U_SUCCESS(icuStatus))
45 			return B_NO_MEMORY;
46 	}
47 
48 	return result;
49 }
50 
51 
52 status_t
53 ICUCollateData::SetToPosix()
54 {
55 	status_t result = inherited::SetToPosix();
56 
57 	if (result == B_OK) {
58 		delete fCollator;
59 		fCollator = NULL;
60 	}
61 
62 	return result;
63 }
64 
65 
66 status_t
67 ICUCollateData::Strcoll(const char* a, const char* b, int& result)
68 {
69 	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
70 		// handle POSIX here as the collator ICU uses for that (english) is
71 		// incompatible in too many ways
72 		result = strcmp(a, b);
73 		for (const char* aIter = a; *aIter != 0; ++aIter) {
74 			if (*aIter < 0)
75 				return B_BAD_VALUE;
76 		}
77 		for (const char* bIter = b; *bIter != 0; ++bIter) {
78 			if (*bIter < 0)
79 				return B_BAD_VALUE;
80 		}
81 		return B_OK;
82 	}
83 
84 	status_t status = B_OK;
85 	UErrorCode icuStatus = U_ZERO_ERROR;
86 
87 	if (strcasecmp(fGivenCharset, "utf-8") == 0) {
88 		UCharIterator aIter, bIter;
89 		uiter_setUTF8(&aIter, a, -1);
90 		uiter_setUTF8(&bIter, b, -1);
91 
92 		result = fCollator->compare(aIter, bIter, icuStatus);
93 	} else {
94 		UnicodeString unicodeA;
95 		UnicodeString unicodeB;
96 
97 		if (_ToUnicodeString(a, unicodeA) != B_OK
98 			|| _ToUnicodeString(b, unicodeB) != B_OK) {
99 			status = B_BAD_VALUE;
100 		}
101 
102 		result = fCollator->compare(unicodeA, unicodeB, icuStatus);
103 	}
104 
105 	if (!U_SUCCESS(icuStatus))
106 		status = B_BAD_VALUE;
107 
108 	return status;
109 }
110 
111 
112 status_t
113 ICUCollateData::Strxfrm(char* out, const char* in, size_t size, size_t& outSize)
114 {
115 	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
116 		// handle POSIX here as the collator ICU uses for that (english) is
117 		// incompatible in too many ways
118 		outSize = strlcpy(out, in, size);
119 		for (const char* inIter = in; *inIter != 0; ++inIter) {
120 			if (*inIter < 0)
121 				return B_BAD_VALUE;
122 		}
123 		return B_OK;
124 	}
125 
126 	if (in == NULL) {
127 		outSize = 0;
128 		return B_OK;
129 	}
130 
131 	UnicodeString unicodeIn;
132 	if (_ToUnicodeString(in, unicodeIn) != B_OK)
133 		return B_BAD_VALUE;
134 
135 	outSize = fCollator->getSortKey(unicodeIn, (uint8_t*)out, size);
136 
137 	return B_OK;
138 }
139 
140 
141 status_t
142 ICUCollateData::Wcscoll(const wchar_t* a, const wchar_t* b, int& result)
143 {
144 	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
145 		// handle POSIX here as the collator ICU uses for that (english) is
146 		// incompatible in too many ways
147 		result = wcscmp(a, b);
148 		for (const wchar_t* aIter = a; *aIter != 0; ++aIter) {
149 			if (*aIter > 127)
150 				return B_BAD_VALUE;
151 		}
152 		for (const wchar_t* bIter = b; *bIter != 0; ++bIter) {
153 			if (*bIter > 127)
154 				return B_BAD_VALUE;
155 		}
156 		return B_OK;
157 	}
158 
159 	UnicodeString unicodeA = UnicodeString::fromUTF32((UChar32*)a, -1);
160 	UnicodeString unicodeB = UnicodeString::fromUTF32((UChar32*)b, -1);
161 
162 	UErrorCode icuStatus = U_ZERO_ERROR;
163 	result = fCollator->compare(unicodeA, unicodeB, icuStatus);
164 
165 	if (!U_SUCCESS(icuStatus))
166 		return B_BAD_VALUE;
167 
168 	return B_OK;
169 }
170 
171 
172 status_t
173 ICUCollateData::Wcsxfrm(wchar_t* out, const wchar_t* in, size_t size,
174 	size_t& outSize)
175 {
176 	if (in == NULL) {
177 		outSize = 0;
178 		return B_OK;
179 	}
180 
181 	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
182 		// handle POSIX here as the collator ICU uses for that (english) is
183 		// incompatible in too many ways
184 		outSize = wcslcpy(out, in, size);
185 		for (const wchar_t* inIter = in; *inIter != 0; ++inIter) {
186 			if (*inIter > 127)
187 				return B_BAD_VALUE;
188 		}
189 		return B_OK;
190 	}
191 
192 	UnicodeString unicodeIn = UnicodeString::fromUTF32((UChar32*)in, -1);
193 	size_t requiredSize = fCollator->getSortKey(unicodeIn, NULL, 0);
194 
195 	uint8_t* buffer = (uint8_t*)out;
196 	outSize = fCollator->getSortKey(unicodeIn, buffer, requiredSize);
197 
198 	// convert 1-byte characters to 4-byte wide characters:
199 	for (size_t i = 0; i < outSize; ++i)
200 		out[outSize - 1 - i] = buffer[outSize - 1 - i];
201 
202 	return B_OK;
203 }
204 
205 
206 status_t
207 ICUCollateData::_ToUnicodeString(const char* in, UnicodeString& out)
208 {
209 	out.remove();
210 
211 	if (in == NULL)
212 		return B_OK;
213 
214 	size_t inLen = strlen(in);
215 	if (inLen == 0)
216 		return B_OK;
217 
218 	UConverter* converter;
219 	status_t result = _GetConverter(converter);
220 	if (result != B_OK)
221 		return result;
222 
223 	UErrorCode icuStatus = U_ZERO_ERROR;
224 	int32_t outLen = ucnv_toUChars(converter, NULL, 0, in, inLen, &icuStatus);
225 	if (icuStatus != U_BUFFER_OVERFLOW_ERROR)
226 		return B_BAD_VALUE;
227 	if (outLen < 0)
228 		return B_ERROR;
229 	if (outLen == 0)
230 		return B_OK;
231 
232 	UChar* outBuf = out.getBuffer(outLen + 1);
233 	icuStatus = U_ZERO_ERROR;
234 	outLen
235 		= ucnv_toUChars(converter, outBuf, outLen + 1, in, inLen, &icuStatus);
236 	if (!U_SUCCESS(icuStatus)) {
237 		out.releaseBuffer(0);
238 		return B_BAD_VALUE;
239 	}
240 
241 	out.releaseBuffer(outLen);
242 
243 	return B_OK;
244 }
245 
246 
247 }	// namespace Libroot
248 }	// namespace BPrivate
249