1 /* 2 * Copyright 2010-2011, Oliver Tappe, zooey@hirschkaefer.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "ICUCollateData.h" 8 9 #include <string.h> 10 #include <wchar.h> 11 12 #include <unicode/unistr.h> 13 14 #include <AutoDeleter.h> 15 16 17 namespace BPrivate { 18 namespace Libroot { 19 20 21 ICUCollateData::ICUCollateData(pthread_key_t tlsKey) 22 : 23 inherited(tlsKey), 24 fCollator(NULL) 25 { 26 } 27 28 29 ICUCollateData::~ICUCollateData() 30 { 31 delete fCollator; 32 } 33 34 35 status_t 36 ICUCollateData::SetTo(const Locale& locale, const char* posixLocaleName) 37 { 38 status_t result = inherited::SetTo(locale, posixLocaleName); 39 40 if (result == B_OK) { 41 UErrorCode icuStatus = U_ZERO_ERROR; 42 delete fCollator; 43 fCollator = Collator::createInstance(fLocale, icuStatus); 44 if (!U_SUCCESS(icuStatus)) 45 return B_NO_MEMORY; 46 } 47 48 return result; 49 } 50 51 52 status_t 53 ICUCollateData::SetToPosix() 54 { 55 status_t result = inherited::SetToPosix(); 56 57 if (result == B_OK) { 58 delete fCollator; 59 fCollator = NULL; 60 } 61 62 return result; 63 } 64 65 66 status_t 67 ICUCollateData::Strcoll(const char* a, const char* b, int& result) 68 { 69 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 70 // handle POSIX here as the collator ICU uses for that (english) is 71 // incompatible in too many ways 72 result = strcmp(a, b); 73 for (const char* aIter = a; *aIter != 0; ++aIter) { 74 if (*aIter < 0) 75 return B_BAD_VALUE; 76 } 77 for (const char* bIter = b; *bIter != 0; ++bIter) { 78 if (*bIter < 0) 79 return B_BAD_VALUE; 80 } 81 return B_OK; 82 } 83 84 status_t status = B_OK; 85 UErrorCode icuStatus = U_ZERO_ERROR; 86 87 if (strcasecmp(fGivenCharset, "utf-8") == 0) { 88 UCharIterator aIter, bIter; 89 uiter_setUTF8(&aIter, a, -1); 90 uiter_setUTF8(&bIter, b, -1); 91 92 result = fCollator->compare(aIter, bIter, icuStatus); 93 } else { 94 UnicodeString unicodeA; 95 UnicodeString unicodeB; 96 97 if (_ToUnicodeString(a, unicodeA) != B_OK 98 || _ToUnicodeString(b, unicodeB) != B_OK) { 99 status = B_BAD_VALUE; 100 } 101 102 result = fCollator->compare(unicodeA, unicodeB, icuStatus); 103 } 104 105 if (!U_SUCCESS(icuStatus)) 106 status = B_BAD_VALUE; 107 108 return status; 109 } 110 111 112 status_t 113 ICUCollateData::Strxfrm(char* out, const char* in, size_t size, size_t& outSize) 114 { 115 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 116 // handle POSIX here as the collator ICU uses for that (english) is 117 // incompatible in too many ways 118 outSize = strlcpy(out, in, size); 119 for (const char* inIter = in; *inIter != 0; ++inIter) { 120 if (*inIter < 0) 121 return B_BAD_VALUE; 122 } 123 return B_OK; 124 } 125 126 if (in == NULL) { 127 outSize = 0; 128 return B_OK; 129 } 130 131 UnicodeString unicodeIn; 132 if (_ToUnicodeString(in, unicodeIn) != B_OK) 133 return B_BAD_VALUE; 134 135 outSize = fCollator->getSortKey(unicodeIn, (uint8_t*)out, size); 136 137 return B_OK; 138 } 139 140 141 status_t 142 ICUCollateData::Wcscoll(const wchar_t* a, const wchar_t* b, int& result) 143 { 144 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 145 // handle POSIX here as the collator ICU uses for that (english) is 146 // incompatible in too many ways 147 result = wcscmp(a, b); 148 for (const wchar_t* aIter = a; *aIter != 0; ++aIter) { 149 if (*aIter > 127) 150 return B_BAD_VALUE; 151 } 152 for (const wchar_t* bIter = b; *bIter != 0; ++bIter) { 153 if (*bIter > 127) 154 return B_BAD_VALUE; 155 } 156 return B_OK; 157 } 158 159 UnicodeString unicodeA = UnicodeString::fromUTF32((UChar32*)a, -1); 160 UnicodeString unicodeB = UnicodeString::fromUTF32((UChar32*)b, -1); 161 162 UErrorCode icuStatus = U_ZERO_ERROR; 163 result = fCollator->compare(unicodeA, unicodeB, icuStatus); 164 165 if (!U_SUCCESS(icuStatus)) 166 return B_BAD_VALUE; 167 168 return B_OK; 169 } 170 171 172 status_t 173 ICUCollateData::Wcsxfrm(wchar_t* out, const wchar_t* in, size_t size, 174 size_t& outSize) 175 { 176 if (in == NULL) { 177 outSize = 0; 178 return B_OK; 179 } 180 181 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 182 // handle POSIX here as the collator ICU uses for that (english) is 183 // incompatible in too many ways 184 outSize = wcslcpy(out, in, size); 185 for (const wchar_t* inIter = in; *inIter != 0; ++inIter) { 186 if (*inIter > 127) 187 return B_BAD_VALUE; 188 } 189 return B_OK; 190 } 191 192 UnicodeString unicodeIn = UnicodeString::fromUTF32((UChar32*)in, -1); 193 size_t requiredSize = fCollator->getSortKey(unicodeIn, NULL, 0); 194 195 uint8_t* buffer = (uint8_t*)out; 196 outSize = fCollator->getSortKey(unicodeIn, buffer, requiredSize); 197 198 // convert 1-byte characters to 4-byte wide characters: 199 for (size_t i = 0; i < outSize; ++i) 200 out[outSize - 1 - i] = buffer[outSize - 1 - i]; 201 202 return B_OK; 203 } 204 205 206 status_t 207 ICUCollateData::_ToUnicodeString(const char* in, UnicodeString& out) 208 { 209 out.remove(); 210 211 if (in == NULL) 212 return B_OK; 213 214 size_t inLen = strlen(in); 215 if (inLen == 0) 216 return B_OK; 217 218 UConverter* converter; 219 status_t result = _GetConverter(converter); 220 if (result != B_OK) 221 return result; 222 223 UErrorCode icuStatus = U_ZERO_ERROR; 224 int32_t outLen = ucnv_toUChars(converter, NULL, 0, in, inLen, &icuStatus); 225 if (icuStatus != U_BUFFER_OVERFLOW_ERROR) 226 return B_BAD_VALUE; 227 if (outLen < 0) 228 return B_ERROR; 229 if (outLen == 0) 230 return B_OK; 231 232 UChar* outBuf = out.getBuffer(outLen + 1); 233 icuStatus = U_ZERO_ERROR; 234 outLen 235 = ucnv_toUChars(converter, outBuf, outLen + 1, in, inLen, &icuStatus); 236 if (!U_SUCCESS(icuStatus)) { 237 out.releaseBuffer(0); 238 return B_BAD_VALUE; 239 } 240 241 out.releaseBuffer(outLen); 242 243 return B_OK; 244 } 245 246 247 } // namespace Libroot 248 } // namespace BPrivate 249