1 /* 2 * Copyright 2010-2011, Oliver Tappe, zooey@hirschkaefer.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "ICUCollateData.h" 8 9 #include <string.h> 10 #include <strings.h> 11 #include <wchar.h> 12 13 #include <unicode/unistr.h> 14 15 #include <AutoDeleter.h> 16 17 18 namespace BPrivate { 19 namespace Libroot { 20 21 22 ICUCollateData::ICUCollateData(pthread_key_t tlsKey) 23 : 24 inherited(tlsKey), 25 fCollator(NULL) 26 { 27 } 28 29 30 ICUCollateData::~ICUCollateData() 31 { 32 delete fCollator; 33 } 34 35 36 status_t 37 ICUCollateData::SetTo(const Locale& locale, const char* posixLocaleName) 38 { 39 status_t result = inherited::SetTo(locale, posixLocaleName); 40 41 if (result == B_OK) { 42 UErrorCode icuStatus = U_ZERO_ERROR; 43 delete fCollator; 44 fCollator = Collator::createInstance(fLocale, icuStatus); 45 if (!U_SUCCESS(icuStatus)) 46 return B_NO_MEMORY; 47 } 48 49 return result; 50 } 51 52 53 status_t 54 ICUCollateData::SetToPosix() 55 { 56 status_t result = inherited::SetToPosix(); 57 58 if (result == B_OK) { 59 delete fCollator; 60 fCollator = NULL; 61 } 62 63 return result; 64 } 65 66 67 status_t 68 ICUCollateData::Strcoll(const char* a, const char* b, int& result) 69 { 70 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 71 // handle POSIX here as the collator ICU uses for that (english) is 72 // incompatible in too many ways 73 result = strcmp(a, b); 74 for (const char* aIter = a; *aIter != 0; ++aIter) { 75 if (*aIter < 0) 76 return B_BAD_VALUE; 77 } 78 for (const char* bIter = b; *bIter != 0; ++bIter) { 79 if (*bIter < 0) 80 return B_BAD_VALUE; 81 } 82 return B_OK; 83 } 84 85 status_t status = B_OK; 86 UErrorCode icuStatus = U_ZERO_ERROR; 87 88 if (strcasecmp(fGivenCharset, "utf-8") == 0) { 89 UCharIterator aIter, bIter; 90 uiter_setUTF8(&aIter, a, -1); 91 uiter_setUTF8(&bIter, b, -1); 92 93 result = fCollator->compare(aIter, bIter, icuStatus); 94 } else { 95 UnicodeString unicodeA; 96 UnicodeString unicodeB; 97 98 if (_ToUnicodeString(a, unicodeA) != B_OK 99 || _ToUnicodeString(b, unicodeB) != B_OK) { 100 status = B_BAD_VALUE; 101 } 102 103 result = fCollator->compare(unicodeA, unicodeB, icuStatus); 104 } 105 106 if (!U_SUCCESS(icuStatus)) 107 status = B_BAD_VALUE; 108 109 return status; 110 } 111 112 113 status_t 114 ICUCollateData::Strxfrm(char* out, const char* in, size_t size, size_t& outSize) 115 { 116 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 117 // handle POSIX here as the collator ICU uses for that (english) is 118 // incompatible in too many ways 119 outSize = strlcpy(out, in, size); 120 for (const char* inIter = in; *inIter != 0; ++inIter) { 121 if (*inIter < 0) 122 return B_BAD_VALUE; 123 } 124 return B_OK; 125 } 126 127 if (in == NULL) { 128 outSize = 0; 129 return B_OK; 130 } 131 132 UnicodeString unicodeIn; 133 if (_ToUnicodeString(in, unicodeIn) != B_OK) 134 return B_BAD_VALUE; 135 136 outSize = fCollator->getSortKey(unicodeIn, (uint8_t*)out, size); 137 138 return B_OK; 139 } 140 141 142 status_t 143 ICUCollateData::Wcscoll(const wchar_t* a, const wchar_t* b, int& result) 144 { 145 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 146 // handle POSIX here as the collator ICU uses for that (english) is 147 // incompatible in too many ways 148 result = wcscmp(a, b); 149 for (const wchar_t* aIter = a; *aIter != 0; ++aIter) { 150 if (*aIter > 127) 151 return B_BAD_VALUE; 152 } 153 for (const wchar_t* bIter = b; *bIter != 0; ++bIter) { 154 if (*bIter > 127) 155 return B_BAD_VALUE; 156 } 157 return B_OK; 158 } 159 160 UnicodeString unicodeA = UnicodeString::fromUTF32((UChar32*)a, -1); 161 UnicodeString unicodeB = UnicodeString::fromUTF32((UChar32*)b, -1); 162 163 UErrorCode icuStatus = U_ZERO_ERROR; 164 result = fCollator->compare(unicodeA, unicodeB, icuStatus); 165 166 if (!U_SUCCESS(icuStatus)) 167 return B_BAD_VALUE; 168 169 return B_OK; 170 } 171 172 173 status_t 174 ICUCollateData::Wcsxfrm(wchar_t* out, const wchar_t* in, size_t size, 175 size_t& outSize) 176 { 177 if (in == NULL) { 178 outSize = 0; 179 return B_OK; 180 } 181 182 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 183 // handle POSIX here as the collator ICU uses for that (english) is 184 // incompatible in too many ways 185 outSize = wcslcpy(out, in, size); 186 for (const wchar_t* inIter = in; *inIter != 0; ++inIter) { 187 if (*inIter > 127) 188 return B_BAD_VALUE; 189 } 190 return B_OK; 191 } 192 193 UnicodeString unicodeIn = UnicodeString::fromUTF32((UChar32*)in, -1); 194 size_t requiredSize = fCollator->getSortKey(unicodeIn, NULL, 0); 195 196 uint8_t* buffer = (uint8_t*)out; 197 outSize = fCollator->getSortKey(unicodeIn, buffer, requiredSize); 198 199 // convert 1-byte characters to 4-byte wide characters: 200 for (size_t i = 0; i < outSize; ++i) 201 out[outSize - 1 - i] = buffer[outSize - 1 - i]; 202 203 return B_OK; 204 } 205 206 207 status_t 208 ICUCollateData::_ToUnicodeString(const char* in, UnicodeString& out) 209 { 210 out.remove(); 211 212 if (in == NULL) 213 return B_OK; 214 215 size_t inLen = strlen(in); 216 if (inLen == 0) 217 return B_OK; 218 219 UConverter* converter; 220 status_t result = _GetConverter(converter); 221 if (result != B_OK) 222 return result; 223 224 UErrorCode icuStatus = U_ZERO_ERROR; 225 int32_t outLen = ucnv_toUChars(converter, NULL, 0, in, inLen, &icuStatus); 226 if (icuStatus != U_BUFFER_OVERFLOW_ERROR) 227 return B_BAD_VALUE; 228 if (outLen < 0) 229 return B_ERROR; 230 if (outLen == 0) 231 return B_OK; 232 233 UChar* outBuf = out.getBuffer(outLen + 1); 234 icuStatus = U_ZERO_ERROR; 235 outLen 236 = ucnv_toUChars(converter, outBuf, outLen + 1, in, inLen, &icuStatus); 237 if (!U_SUCCESS(icuStatus)) { 238 out.releaseBuffer(0); 239 return B_BAD_VALUE; 240 } 241 242 out.releaseBuffer(outLen); 243 244 return B_OK; 245 } 246 247 248 } // namespace Libroot 249 } // namespace BPrivate 250