1 /* 2 * Copyright 2010-2011, Oliver Tappe, zooey@hirschkaefer.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "ICUCollateData.h" 8 9 #include <assert.h> 10 #include <string.h> 11 #include <strings.h> 12 #include <wchar.h> 13 14 #include <unicode/unistr.h> 15 16 #include <AutoDeleter.h> 17 18 19 U_NAMESPACE_USE 20 21 22 namespace BPrivate { 23 namespace Libroot { 24 25 26 ICUCollateData::ICUCollateData(pthread_key_t tlsKey) 27 : 28 inherited(tlsKey), 29 fCollator(NULL) 30 { 31 } 32 33 34 ICUCollateData::~ICUCollateData() 35 { 36 delete fCollator; 37 } 38 39 40 status_t 41 ICUCollateData::SetTo(const Locale& locale, const char* posixLocaleName) 42 { 43 status_t result = inherited::SetTo(locale, posixLocaleName); 44 45 if (result == B_OK) { 46 UErrorCode icuStatus = U_ZERO_ERROR; 47 delete fCollator; 48 fCollator = Collator::createInstance(fLocale, icuStatus); 49 if (!U_SUCCESS(icuStatus)) 50 return B_NO_MEMORY; 51 } 52 53 return result; 54 } 55 56 57 status_t 58 ICUCollateData::SetToPosix() 59 { 60 status_t result = inherited::SetToPosix(); 61 62 if (result == B_OK) { 63 delete fCollator; 64 fCollator = NULL; 65 } 66 67 return result; 68 } 69 70 71 status_t 72 ICUCollateData::Strcoll(const char* a, const char* b, int& result) 73 { 74 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 75 // handle POSIX here as the collator ICU uses for that (english) is 76 // incompatible in too many ways 77 result = strcmp(a, b); 78 for (const char* aIter = a; *aIter != 0; ++aIter) { 79 if (*aIter < 0) 80 return B_BAD_VALUE; 81 } 82 for (const char* bIter = b; *bIter != 0; ++bIter) { 83 if (*bIter < 0) 84 return B_BAD_VALUE; 85 } 86 return B_OK; 87 } 88 89 status_t status = B_OK; 90 UErrorCode icuStatus = U_ZERO_ERROR; 91 92 if (strcasecmp(fGivenCharset, "utf-8") == 0) { 93 UCharIterator aIter, bIter; 94 uiter_setUTF8(&aIter, a, -1); 95 uiter_setUTF8(&bIter, b, -1); 96 97 result = fCollator->compare(aIter, bIter, icuStatus); 98 } else { 99 UnicodeString unicodeA; 100 UnicodeString unicodeB; 101 102 if (_ToUnicodeString(a, unicodeA) != B_OK 103 || _ToUnicodeString(b, unicodeB) != B_OK) { 104 status = B_BAD_VALUE; 105 } 106 107 result = fCollator->compare(unicodeA, unicodeB, icuStatus); 108 } 109 110 if (!U_SUCCESS(icuStatus)) 111 status = B_BAD_VALUE; 112 113 return status; 114 } 115 116 117 status_t 118 ICUCollateData::Strxfrm(char* out, const char* in, 119 size_t outSize, size_t& requiredSize) 120 { 121 if (in == NULL) { 122 requiredSize = 0; 123 return B_OK; 124 } 125 126 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 127 // handle POSIX here as the collator ICU uses for that (english) is 128 // incompatible in too many ways 129 requiredSize = strlcpy(out, in, outSize); 130 for (const char* inIter = in; *inIter != 0; ++inIter) { 131 if (*inIter < 0) 132 return B_BAD_VALUE; 133 } 134 return B_OK; 135 } 136 137 UnicodeString unicodeIn; 138 if (_ToUnicodeString(in, unicodeIn) != B_OK) 139 return B_BAD_VALUE; 140 141 requiredSize = fCollator->getSortKey(unicodeIn, (uint8_t*)out, outSize); 142 143 // Do not include terminating NULL byte in the required-size. 144 if (requiredSize > 0) { 145 if (outSize >= requiredSize) 146 assert(out[requiredSize - 1] == '\0'); 147 requiredSize--; 148 } 149 150 return B_OK; 151 } 152 153 154 status_t 155 ICUCollateData::Wcscoll(const wchar_t* a, const wchar_t* b, int& result) 156 { 157 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 158 // handle POSIX here as the collator ICU uses for that (english) is 159 // incompatible in too many ways 160 result = wcscmp(a, b); 161 for (const wchar_t* aIter = a; *aIter != 0; ++aIter) { 162 if (*aIter > 127) 163 return B_BAD_VALUE; 164 } 165 for (const wchar_t* bIter = b; *bIter != 0; ++bIter) { 166 if (*bIter > 127) 167 return B_BAD_VALUE; 168 } 169 return B_OK; 170 } 171 172 UnicodeString unicodeA = UnicodeString::fromUTF32((UChar32*)a, -1); 173 UnicodeString unicodeB = UnicodeString::fromUTF32((UChar32*)b, -1); 174 175 UErrorCode icuStatus = U_ZERO_ERROR; 176 result = fCollator->compare(unicodeA, unicodeB, icuStatus); 177 178 if (!U_SUCCESS(icuStatus)) 179 return B_BAD_VALUE; 180 181 return B_OK; 182 } 183 184 185 status_t 186 ICUCollateData::Wcsxfrm(wchar_t* out, const wchar_t* in, size_t outSize, 187 size_t& requiredSize) 188 { 189 if (in == NULL) { 190 requiredSize = 0; 191 return B_OK; 192 } 193 194 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 195 // handle POSIX here as the collator ICU uses for that (english) is 196 // incompatible in too many ways 197 requiredSize = wcslcpy(out, in, outSize); 198 for (const wchar_t* inIter = in; *inIter != 0; ++inIter) { 199 if (*inIter > 127) 200 return B_BAD_VALUE; 201 } 202 return B_OK; 203 } 204 205 UnicodeString unicodeIn = UnicodeString::fromUTF32((UChar32*)in, -1); 206 requiredSize = fCollator->getSortKey(unicodeIn, NULL, 0); 207 208 if (outSize == 0) 209 return B_OK; 210 211 uint8_t* buffer = (uint8_t*)out; 212 fCollator->getSortKey(unicodeIn, buffer, outSize); 213 214 // convert 1-byte characters to 4-byte wide characters: 215 for (size_t i = 0; i < outSize; ++i) 216 out[outSize - 1 - i] = buffer[outSize - 1 - i]; 217 218 // Do not include terminating NULL character in the required-size. 219 if (requiredSize > 0) { 220 if (outSize >= requiredSize) 221 assert(out[requiredSize - 1] == 0); 222 requiredSize--; 223 } 224 225 return B_OK; 226 } 227 228 229 status_t 230 ICUCollateData::_ToUnicodeString(const char* in, UnicodeString& out) 231 { 232 out.remove(); 233 234 if (in == NULL) 235 return B_OK; 236 237 size_t inLen = strlen(in); 238 if (inLen == 0) 239 return B_OK; 240 241 UConverter* converter; 242 status_t result = _GetConverter(converter); 243 if (result != B_OK) 244 return result; 245 246 UErrorCode icuStatus = U_ZERO_ERROR; 247 int32_t outLen = ucnv_toUChars(converter, NULL, 0, in, inLen, &icuStatus); 248 if (icuStatus != U_BUFFER_OVERFLOW_ERROR) 249 return B_BAD_VALUE; 250 if (outLen < 0) 251 return B_ERROR; 252 if (outLen == 0) 253 return B_OK; 254 255 UChar* outBuf = out.getBuffer(outLen + 1); 256 icuStatus = U_ZERO_ERROR; 257 outLen 258 = ucnv_toUChars(converter, outBuf, outLen + 1, in, inLen, &icuStatus); 259 if (!U_SUCCESS(icuStatus)) { 260 out.releaseBuffer(0); 261 return B_BAD_VALUE; 262 } 263 264 out.releaseBuffer(outLen); 265 266 return B_OK; 267 } 268 269 270 } // namespace Libroot 271 } // namespace BPrivate 272