1 /* 2 * Copyright 2010-2011, Oliver Tappe, zooey@hirschkaefer.de. 3 * Distributed under the terms of the MIT License. 4 */ 5 6 7 #include "ICUCollateData.h" 8 9 #include <string.h> 10 #include <strings.h> 11 #include <wchar.h> 12 13 #include <unicode/unistr.h> 14 15 #include <AutoDeleter.h> 16 17 18 U_NAMESPACE_USE 19 20 21 namespace BPrivate { 22 namespace Libroot { 23 24 25 ICUCollateData::ICUCollateData(pthread_key_t tlsKey) 26 : 27 inherited(tlsKey), 28 fCollator(NULL) 29 { 30 } 31 32 33 ICUCollateData::~ICUCollateData() 34 { 35 delete fCollator; 36 } 37 38 39 status_t 40 ICUCollateData::SetTo(const Locale& locale, const char* posixLocaleName) 41 { 42 status_t result = inherited::SetTo(locale, posixLocaleName); 43 44 if (result == B_OK) { 45 UErrorCode icuStatus = U_ZERO_ERROR; 46 delete fCollator; 47 fCollator = Collator::createInstance(fLocale, icuStatus); 48 if (!U_SUCCESS(icuStatus)) 49 return B_NO_MEMORY; 50 } 51 52 return result; 53 } 54 55 56 status_t 57 ICUCollateData::SetToPosix() 58 { 59 status_t result = inherited::SetToPosix(); 60 61 if (result == B_OK) { 62 delete fCollator; 63 fCollator = NULL; 64 } 65 66 return result; 67 } 68 69 70 status_t 71 ICUCollateData::Strcoll(const char* a, const char* b, int& result) 72 { 73 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 74 // handle POSIX here as the collator ICU uses for that (english) is 75 // incompatible in too many ways 76 result = strcmp(a, b); 77 for (const char* aIter = a; *aIter != 0; ++aIter) { 78 if (*aIter < 0) 79 return B_BAD_VALUE; 80 } 81 for (const char* bIter = b; *bIter != 0; ++bIter) { 82 if (*bIter < 0) 83 return B_BAD_VALUE; 84 } 85 return B_OK; 86 } 87 88 status_t status = B_OK; 89 UErrorCode icuStatus = U_ZERO_ERROR; 90 91 if (strcasecmp(fGivenCharset, "utf-8") == 0) { 92 UCharIterator aIter, bIter; 93 uiter_setUTF8(&aIter, a, -1); 94 uiter_setUTF8(&bIter, b, -1); 95 96 result = fCollator->compare(aIter, bIter, icuStatus); 97 } else { 98 UnicodeString unicodeA; 99 UnicodeString unicodeB; 100 101 if (_ToUnicodeString(a, unicodeA) != B_OK 102 || _ToUnicodeString(b, unicodeB) != B_OK) { 103 status = B_BAD_VALUE; 104 } 105 106 result = fCollator->compare(unicodeA, unicodeB, icuStatus); 107 } 108 109 if (!U_SUCCESS(icuStatus)) 110 status = B_BAD_VALUE; 111 112 return status; 113 } 114 115 116 status_t 117 ICUCollateData::Strxfrm(char* out, const char* in, size_t size, size_t& outSize) 118 { 119 if (in == NULL) { 120 outSize = 0; 121 return B_OK; 122 } 123 124 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 125 // handle POSIX here as the collator ICU uses for that (english) is 126 // incompatible in too many ways 127 outSize = strlcpy(out, in, size); 128 for (const char* inIter = in; *inIter != 0; ++inIter) { 129 if (*inIter < 0) 130 return B_BAD_VALUE; 131 } 132 return B_OK; 133 } 134 135 UnicodeString unicodeIn; 136 if (_ToUnicodeString(in, unicodeIn) != B_OK) 137 return B_BAD_VALUE; 138 139 outSize = fCollator->getSortKey(unicodeIn, (uint8_t*)out, size); 140 141 return B_OK; 142 } 143 144 145 status_t 146 ICUCollateData::Wcscoll(const wchar_t* a, const wchar_t* b, int& result) 147 { 148 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 149 // handle POSIX here as the collator ICU uses for that (english) is 150 // incompatible in too many ways 151 result = wcscmp(a, b); 152 for (const wchar_t* aIter = a; *aIter != 0; ++aIter) { 153 if (*aIter > 127) 154 return B_BAD_VALUE; 155 } 156 for (const wchar_t* bIter = b; *bIter != 0; ++bIter) { 157 if (*bIter > 127) 158 return B_BAD_VALUE; 159 } 160 return B_OK; 161 } 162 163 UnicodeString unicodeA = UnicodeString::fromUTF32((UChar32*)a, -1); 164 UnicodeString unicodeB = UnicodeString::fromUTF32((UChar32*)b, -1); 165 166 UErrorCode icuStatus = U_ZERO_ERROR; 167 result = fCollator->compare(unicodeA, unicodeB, icuStatus); 168 169 if (!U_SUCCESS(icuStatus)) 170 return B_BAD_VALUE; 171 172 return B_OK; 173 } 174 175 176 status_t 177 ICUCollateData::Wcsxfrm(wchar_t* out, const wchar_t* in, size_t outSize, 178 size_t& requiredSize) 179 { 180 if (in == NULL) { 181 requiredSize = 0; 182 return B_OK; 183 } 184 185 if (fCollator == NULL || strcmp(fPosixLocaleName, "POSIX") == 0) { 186 // handle POSIX here as the collator ICU uses for that (english) is 187 // incompatible in too many ways 188 requiredSize = wcslcpy(out, in, outSize); 189 for (const wchar_t* inIter = in; *inIter != 0; ++inIter) { 190 if (*inIter > 127) 191 return B_BAD_VALUE; 192 } 193 return B_OK; 194 } 195 196 UnicodeString unicodeIn = UnicodeString::fromUTF32((UChar32*)in, -1); 197 requiredSize = fCollator->getSortKey(unicodeIn, NULL, 0); 198 199 if (outSize == 0) 200 return B_OK; 201 202 uint8_t* buffer = (uint8_t*)out; 203 fCollator->getSortKey(unicodeIn, buffer, outSize); 204 205 // convert 1-byte characters to 4-byte wide characters: 206 for (size_t i = 0; i < outSize; ++i) 207 out[outSize - 1 - i] = buffer[outSize - 1 - i]; 208 209 return B_OK; 210 } 211 212 213 status_t 214 ICUCollateData::_ToUnicodeString(const char* in, UnicodeString& out) 215 { 216 out.remove(); 217 218 if (in == NULL) 219 return B_OK; 220 221 size_t inLen = strlen(in); 222 if (inLen == 0) 223 return B_OK; 224 225 UConverter* converter; 226 status_t result = _GetConverter(converter); 227 if (result != B_OK) 228 return result; 229 230 UErrorCode icuStatus = U_ZERO_ERROR; 231 int32_t outLen = ucnv_toUChars(converter, NULL, 0, in, inLen, &icuStatus); 232 if (icuStatus != U_BUFFER_OVERFLOW_ERROR) 233 return B_BAD_VALUE; 234 if (outLen < 0) 235 return B_ERROR; 236 if (outLen == 0) 237 return B_OK; 238 239 UChar* outBuf = out.getBuffer(outLen + 1); 240 icuStatus = U_ZERO_ERROR; 241 outLen 242 = ucnv_toUChars(converter, outBuf, outLen + 1, in, inLen, &icuStatus); 243 if (!U_SUCCESS(icuStatus)) { 244 out.releaseBuffer(0); 245 return B_BAD_VALUE; 246 } 247 248 out.releaseBuffer(outLen); 249 250 return B_OK; 251 } 252 253 254 } // namespace Libroot 255 } // namespace BPrivate 256