xref: /haiku/src/system/libroot/add-ons/icu/ICUCollateData.cpp (revision 6aff37d1c79e20748c683ae224bd629f88a5b0be)
1 /*
2  * Copyright 2010-2011, Oliver Tappe, zooey@hirschkaefer.de.
3  * Distributed under the terms of the MIT License.
4  */
5 
6 
7 #include "ICUCollateData.h"
8 
9 #include <string.h>
10 #include <strings.h>
11 #include <wchar.h>
12 
13 #include <unicode/unistr.h>
14 
15 #include <AutoDeleter.h>
16 
17 
18 namespace BPrivate {
19 namespace Libroot {
20 
21 
22 ICUCollateData::ICUCollateData(pthread_key_t tlsKey)
23 	:
24 	inherited(tlsKey),
25 	fCollator(NULL)
26 {
27 }
28 
29 
30 ICUCollateData::~ICUCollateData()
31 {
32 	delete fCollator;
33 }
34 
35 
36 status_t
37 ICUCollateData::SetTo(const Locale& locale, const char* posixLocaleName)
38 {
39 	status_t result = inherited::SetTo(locale, posixLocaleName);
40 
41 	if (result == B_OK) {
42 		UErrorCode icuStatus = U_ZERO_ERROR;
43 		delete fCollator;
44 		fCollator = Collator::createInstance(fLocale, icuStatus);
45 		if (!U_SUCCESS(icuStatus))
46 			return B_NO_MEMORY;
47 	}
48 
49 	return result;
50 }
51 
52 
53 status_t
54 ICUCollateData::SetToPosix()
55 {
56 	status_t result = inherited::SetToPosix();
57 
58 	if (result == B_OK) {
59 		delete fCollator;
60 		fCollator = NULL;
61 	}
62 
63 	return result;
64 }
65 
66 
67 status_t
68 ICUCollateData::Strcoll(const char* a, const char* b, int& result)
69 {
70 	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
71 		// handle POSIX here as the collator ICU uses for that (english) is
72 		// incompatible in too many ways
73 		result = strcmp(a, b);
74 		for (const char* aIter = a; *aIter != 0; ++aIter) {
75 			if (*aIter < 0)
76 				return B_BAD_VALUE;
77 		}
78 		for (const char* bIter = b; *bIter != 0; ++bIter) {
79 			if (*bIter < 0)
80 				return B_BAD_VALUE;
81 		}
82 		return B_OK;
83 	}
84 
85 	status_t status = B_OK;
86 	UErrorCode icuStatus = U_ZERO_ERROR;
87 
88 	if (strcasecmp(fGivenCharset, "utf-8") == 0) {
89 		UCharIterator aIter, bIter;
90 		uiter_setUTF8(&aIter, a, -1);
91 		uiter_setUTF8(&bIter, b, -1);
92 
93 		result = fCollator->compare(aIter, bIter, icuStatus);
94 	} else {
95 		UnicodeString unicodeA;
96 		UnicodeString unicodeB;
97 
98 		if (_ToUnicodeString(a, unicodeA) != B_OK
99 			|| _ToUnicodeString(b, unicodeB) != B_OK) {
100 			status = B_BAD_VALUE;
101 		}
102 
103 		result = fCollator->compare(unicodeA, unicodeB, icuStatus);
104 	}
105 
106 	if (!U_SUCCESS(icuStatus))
107 		status = B_BAD_VALUE;
108 
109 	return status;
110 }
111 
112 
113 status_t
114 ICUCollateData::Strxfrm(char* out, const char* in, size_t size, size_t& outSize)
115 {
116 	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
117 		// handle POSIX here as the collator ICU uses for that (english) is
118 		// incompatible in too many ways
119 		outSize = strlcpy(out, in, size);
120 		for (const char* inIter = in; *inIter != 0; ++inIter) {
121 			if (*inIter < 0)
122 				return B_BAD_VALUE;
123 		}
124 		return B_OK;
125 	}
126 
127 	if (in == NULL) {
128 		outSize = 0;
129 		return B_OK;
130 	}
131 
132 	UnicodeString unicodeIn;
133 	if (_ToUnicodeString(in, unicodeIn) != B_OK)
134 		return B_BAD_VALUE;
135 
136 	outSize = fCollator->getSortKey(unicodeIn, (uint8_t*)out, size);
137 
138 	return B_OK;
139 }
140 
141 
142 status_t
143 ICUCollateData::Wcscoll(const wchar_t* a, const wchar_t* b, int& result)
144 {
145 	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
146 		// handle POSIX here as the collator ICU uses for that (english) is
147 		// incompatible in too many ways
148 		result = wcscmp(a, b);
149 		for (const wchar_t* aIter = a; *aIter != 0; ++aIter) {
150 			if (*aIter > 127)
151 				return B_BAD_VALUE;
152 		}
153 		for (const wchar_t* bIter = b; *bIter != 0; ++bIter) {
154 			if (*bIter > 127)
155 				return B_BAD_VALUE;
156 		}
157 		return B_OK;
158 	}
159 
160 	UnicodeString unicodeA = UnicodeString::fromUTF32((UChar32*)a, -1);
161 	UnicodeString unicodeB = UnicodeString::fromUTF32((UChar32*)b, -1);
162 
163 	UErrorCode icuStatus = U_ZERO_ERROR;
164 	result = fCollator->compare(unicodeA, unicodeB, icuStatus);
165 
166 	if (!U_SUCCESS(icuStatus))
167 		return B_BAD_VALUE;
168 
169 	return B_OK;
170 }
171 
172 
173 status_t
174 ICUCollateData::Wcsxfrm(wchar_t* out, const wchar_t* in, size_t size,
175 	size_t& outSize)
176 {
177 	if (in == NULL) {
178 		outSize = 0;
179 		return B_OK;
180 	}
181 
182 	if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) {
183 		// handle POSIX here as the collator ICU uses for that (english) is
184 		// incompatible in too many ways
185 		outSize = wcslcpy(out, in, size);
186 		for (const wchar_t* inIter = in; *inIter != 0; ++inIter) {
187 			if (*inIter > 127)
188 				return B_BAD_VALUE;
189 		}
190 		return B_OK;
191 	}
192 
193 	UnicodeString unicodeIn = UnicodeString::fromUTF32((UChar32*)in, -1);
194 	size_t requiredSize = fCollator->getSortKey(unicodeIn, NULL, 0);
195 
196 	uint8_t* buffer = (uint8_t*)out;
197 	outSize = fCollator->getSortKey(unicodeIn, buffer, requiredSize);
198 
199 	// convert 1-byte characters to 4-byte wide characters:
200 	for (size_t i = 0; i < outSize; ++i)
201 		out[outSize - 1 - i] = buffer[outSize - 1 - i];
202 
203 	return B_OK;
204 }
205 
206 
207 status_t
208 ICUCollateData::_ToUnicodeString(const char* in, UnicodeString& out)
209 {
210 	out.remove();
211 
212 	if (in == NULL)
213 		return B_OK;
214 
215 	size_t inLen = strlen(in);
216 	if (inLen == 0)
217 		return B_OK;
218 
219 	UConverter* converter;
220 	status_t result = _GetConverter(converter);
221 	if (result != B_OK)
222 		return result;
223 
224 	UErrorCode icuStatus = U_ZERO_ERROR;
225 	int32_t outLen = ucnv_toUChars(converter, NULL, 0, in, inLen, &icuStatus);
226 	if (icuStatus != U_BUFFER_OVERFLOW_ERROR)
227 		return B_BAD_VALUE;
228 	if (outLen < 0)
229 		return B_ERROR;
230 	if (outLen == 0)
231 		return B_OK;
232 
233 	UChar* outBuf = out.getBuffer(outLen + 1);
234 	icuStatus = U_ZERO_ERROR;
235 	outLen
236 		= ucnv_toUChars(converter, outBuf, outLen + 1, in, inLen, &icuStatus);
237 	if (!U_SUCCESS(icuStatus)) {
238 		out.releaseBuffer(0);
239 		return B_BAD_VALUE;
240 	}
241 
242 	out.releaseBuffer(outLen);
243 
244 	return B_OK;
245 }
246 
247 
248 }	// namespace Libroot
249 }	// namespace BPrivate
250