1 /* 2 * Copyright 2010-2011, Oliver Tappe, zooey@hirschkaefer.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "ICUCollateData.h" 8 9 #include <string.h> 10 #include <strings.h> 11 #include <wchar.h> 12 13 #include <unicode/unistr.h> 14 15 #include <AutoDeleter.h> 16 17 18 U_NAMESPACE_USE 19 20 21 namespace BPrivate { 22 namespace Libroot { 23 24 25 ICUCollateData::ICUCollateData(pthread_key_t tlsKey) 26 : 27 inherited(tlsKey), 28 fCollator(NULL) 29 { 30 } 31 32 33 ICUCollateData::~ICUCollateData() 34 { 35 delete fCollator; 36 } 37 38 39 status_t 40 ICUCollateData::SetTo(const Locale& locale, const char* posixLocaleName) 41 { 42 status_t result = inherited::SetTo(locale, posixLocaleName); 43 44 if (result == B_OK) { 45 UErrorCode icuStatus = U_ZERO_ERROR; 46 delete fCollator; 47 fCollator = Collator::createInstance(fLocale, icuStatus); 48 if (!U_SUCCESS(icuStatus)) 49 return B_NO_MEMORY; 50 } 51 52 return result; 53 } 54 55 56 status_t 57 ICUCollateData::SetToPosix() 58 { 59 status_t result = inherited::SetToPosix(); 60 61 if (result == B_OK) { 62 delete fCollator; 63 fCollator = NULL; 64 } 65 66 return result; 67 } 68 69 70 status_t 71 ICUCollateData::Strcoll(const char* a, const char* b, int& result) 72 { 73 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 74 // handle POSIX here as the collator ICU uses for that (english) is 75 // incompatible in too many ways 76 result = strcmp(a, b); 77 for (const char* aIter = a; *aIter != 0; ++aIter) { 78 if (*aIter < 0) 79 return B_BAD_VALUE; 80 } 81 for (const char* bIter = b; *bIter != 0; ++bIter) { 82 if (*bIter < 0) 83 return B_BAD_VALUE; 84 } 85 return B_OK; 86 } 87 88 status_t status = B_OK; 89 UErrorCode icuStatus = U_ZERO_ERROR; 90 91 if (strcasecmp(fGivenCharset, "utf-8") == 0) { 92 UCharIterator aIter, bIter; 93 uiter_setUTF8(&aIter, a, -1); 94 uiter_setUTF8(&bIter, b, -1); 95 96 result = fCollator->compare(aIter, bIter, icuStatus); 97 } else { 98 UnicodeString unicodeA; 99 UnicodeString unicodeB; 100 101 if (_ToUnicodeString(a, unicodeA) != B_OK 102 || _ToUnicodeString(b, unicodeB) != B_OK) { 103 status = B_BAD_VALUE; 104 } 105 106 result = fCollator->compare(unicodeA, unicodeB, icuStatus); 107 } 108 109 if (!U_SUCCESS(icuStatus)) 110 status = B_BAD_VALUE; 111 112 return status; 113 } 114 115 116 status_t 117 ICUCollateData::Strxfrm(char* out, const char* in, size_t size, size_t& outSize) 118 { 119 if (in == NULL) { 120 outSize = 0; 121 return B_OK; 122 } 123 124 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 125 // handle POSIX here as the collator ICU uses for that (english) is 126 // incompatible in too many ways 127 outSize = strlcpy(out, in, size); 128 for (const char* inIter = in; *inIter != 0; ++inIter) { 129 if (*inIter < 0) 130 return B_BAD_VALUE; 131 } 132 return B_OK; 133 } 134 135 UnicodeString unicodeIn; 136 if (_ToUnicodeString(in, unicodeIn) != B_OK) 137 return B_BAD_VALUE; 138 139 outSize = fCollator->getSortKey(unicodeIn, (uint8_t*)out, size); 140 141 return B_OK; 142 } 143 144 145 status_t 146 ICUCollateData::Wcscoll(const wchar_t* a, const wchar_t* b, int& result) 147 { 148 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 149 // handle POSIX here as the collator ICU uses for that (english) is 150 // incompatible in too many ways 151 result = wcscmp(a, b); 152 for (const wchar_t* aIter = a; *aIter != 0; ++aIter) { 153 if (*aIter > 127) 154 return B_BAD_VALUE; 155 } 156 for (const wchar_t* bIter = b; *bIter != 0; ++bIter) { 157 if (*bIter > 127) 158 return B_BAD_VALUE; 159 } 160 return B_OK; 161 } 162 163 UnicodeString unicodeA = UnicodeString::fromUTF32((UChar32*)a, -1); 164 UnicodeString unicodeB = UnicodeString::fromUTF32((UChar32*)b, -1); 165 166 UErrorCode icuStatus = U_ZERO_ERROR; 167 result = fCollator->compare(unicodeA, unicodeB, icuStatus); 168 169 if (!U_SUCCESS(icuStatus)) 170 return B_BAD_VALUE; 171 172 return B_OK; 173 } 174 175 176 status_t 177 ICUCollateData::Wcsxfrm(wchar_t* out, const wchar_t* in, size_t size, 178 size_t& outSize) 179 { 180 if (in == NULL) { 181 outSize = 0; 182 return B_OK; 183 } 184 185 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 186 // handle POSIX here as the collator ICU uses for that (english) is 187 // incompatible in too many ways 188 outSize = wcslcpy(out, in, size); 189 for (const wchar_t* inIter = in; *inIter != 0; ++inIter) { 190 if (*inIter > 127) 191 return B_BAD_VALUE; 192 } 193 return B_OK; 194 } 195 196 UnicodeString unicodeIn = UnicodeString::fromUTF32((UChar32*)in, -1); 197 size_t requiredSize = fCollator->getSortKey(unicodeIn, NULL, 0); 198 199 uint8_t* buffer = (uint8_t*)out; 200 outSize = fCollator->getSortKey(unicodeIn, buffer, requiredSize); 201 202 // convert 1-byte characters to 4-byte wide characters: 203 for (size_t i = 0; i < outSize; ++i) 204 out[outSize - 1 - i] = buffer[outSize - 1 - i]; 205 206 return B_OK; 207 } 208 209 210 status_t 211 ICUCollateData::_ToUnicodeString(const char* in, UnicodeString& out) 212 { 213 out.remove(); 214 215 if (in == NULL) 216 return B_OK; 217 218 size_t inLen = strlen(in); 219 if (inLen == 0) 220 return B_OK; 221 222 UConverter* converter; 223 status_t result = _GetConverter(converter); 224 if (result != B_OK) 225 return result; 226 227 UErrorCode icuStatus = U_ZERO_ERROR; 228 int32_t outLen = ucnv_toUChars(converter, NULL, 0, in, inLen, &icuStatus); 229 if (icuStatus != U_BUFFER_OVERFLOW_ERROR) 230 return B_BAD_VALUE; 231 if (outLen < 0) 232 return B_ERROR; 233 if (outLen == 0) 234 return B_OK; 235 236 UChar* outBuf = out.getBuffer(outLen + 1); 237 icuStatus = U_ZERO_ERROR; 238 outLen 239 = ucnv_toUChars(converter, outBuf, outLen + 1, in, inLen, &icuStatus); 240 if (!U_SUCCESS(icuStatus)) { 241 out.releaseBuffer(0); 242 return B_BAD_VALUE; 243 } 244 245 out.releaseBuffer(outLen); 246 247 return B_OK; 248 } 249 250 251 } // namespace Libroot 252 } // namespace BPrivate 253