xref: /haiku/src/kits/locale/Collator.cpp (revision fc7456e9b1ec38c941134ed6d01c438cf289381e)
1 /*
2 * Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3 * Copyright 2010, Adrien Destugues <pulkomandy@pulkomandy.ath.cx>
4 * Distributed under the terms of the MIT License.
5 */
6 
7 
8 #include <unicode/uversion.h>
9 #include <Collator.h>
10 
11 #include <ctype.h>
12 #include <stdlib.h>
13 
14 #include <new>
15 #include <typeinfo>
16 
17 #include <UnicodeChar.h>
18 #include <String.h>
19 #include <Message.h>
20 
21 #include <unicode/coll.h>
22 #include <unicode/tblcoll.h>
23 
24 
25 U_NAMESPACE_USE
26 
27 
28 BCollator::BCollator()
29 	:
30 	fIgnorePunctuation(true)
31 {
32 	// TODO: the collator construction will have to change; the default
33 	//	collator should be constructed by the Locale/LocaleRoster, so we
34 	//	only need a constructor where you specify all details
35 
36 	UErrorCode error = U_ZERO_ERROR;
37 	fICUCollator = Collator::createInstance(error);
38 	SetStrength(B_COLLATE_TERTIARY);
39 }
40 
41 
42 BCollator::BCollator(const char* locale, int8 strength, bool ignorePunctuation)
43 	:
44 	fIgnorePunctuation(ignorePunctuation)
45 {
46 	UErrorCode error = U_ZERO_ERROR;
47 	fICUCollator = Collator::createInstance(locale, error);
48 	SetStrength(strength);
49 }
50 
51 
52 BCollator::BCollator(BMessage* archive)
53 	:
54 	BArchivable(archive),
55 	fICUCollator(NULL),
56 	fIgnorePunctuation(true)
57 {
58 	archive->FindBool("loc:punctuation", &fIgnorePunctuation);
59 
60 	UErrorCode error = U_ZERO_ERROR;
61 	RuleBasedCollator* fallbackICUCollator
62 		= static_cast<RuleBasedCollator*>(Collator::createInstance(error));
63 
64 	ssize_t size;
65 	const void* buffer = NULL;
66 	if (archive->FindData("loc:collator", B_RAW_TYPE, &buffer, &size) == B_OK) {
67 		fICUCollator = new RuleBasedCollator((const uint8_t*)buffer, (int)size,
68 			fallbackICUCollator, error);
69 		if (fICUCollator == NULL) {
70 			fICUCollator = fallbackICUCollator;
71 				// Unarchiving failed, so we revert to the fallback collator
72 				// TODO: when can this happen, can it be avoided?
73 		}
74 	}
75 }
76 
77 
78 BCollator::BCollator(const BCollator& other)
79 	:
80 	fICUCollator(NULL)
81 {
82 	*this = other;
83 }
84 
85 
86 BCollator::~BCollator()
87 {
88 	delete fICUCollator;
89 }
90 
91 
92 BCollator& BCollator::operator=(const BCollator& source)
93 {
94 	if (&source != this) {
95 		delete fICUCollator;
96 
97 		fICUCollator = source.fICUCollator != NULL
98 			? source.fICUCollator->clone()
99 			: NULL;
100 		fIgnorePunctuation = source.fIgnorePunctuation;
101 	}
102 
103 	return *this;
104 }
105 
106 
107 void
108 BCollator::SetIgnorePunctuation(bool ignore)
109 {
110 	fIgnorePunctuation = ignore;
111 }
112 
113 
114 bool
115 BCollator::IgnorePunctuation() const
116 {
117 	return fIgnorePunctuation;
118 }
119 
120 
121 status_t
122 BCollator::SetNumericSorting(bool enable)
123 {
124 	if (fICUCollator == NULL)
125 		return B_NO_INIT;
126 
127 	UErrorCode error = U_ZERO_ERROR;
128 	fICUCollator->setAttribute(UCOL_NUMERIC_COLLATION,
129 		enable ? UCOL_ON : UCOL_OFF, error);
130 
131 	return error == U_ZERO_ERROR ? B_OK : B_ERROR;
132 }
133 
134 
135 status_t
136 BCollator::GetSortKey(const char* string, BString* key) const
137 {
138 	// TODO : handle fIgnorePunctuation
139 
140 	int length = strlen(string);
141 
142 	uint8_t* buffer = (uint8_t*)malloc(length * 2);
143 		// According to ICU documentation this should be enough in "most cases"
144 	if (buffer == NULL)
145 		return B_NO_MEMORY;
146 
147 	UErrorCode error = U_ZERO_ERROR;
148 	int requiredSize = fICUCollator->getSortKey(UnicodeString(string, length,
149 		NULL, error), buffer, length * 2);
150 	if (requiredSize > length * 2) {
151 		uint8_t* tmpBuffer = (uint8_t*)realloc(buffer, requiredSize);
152 		if (tmpBuffer == NULL) {
153 			free(buffer);
154 			buffer = NULL;
155 			return B_NO_MEMORY;
156 		} else {
157 			buffer = tmpBuffer;
158 		}
159 
160 		error = U_ZERO_ERROR;
161 		fICUCollator->getSortKey(UnicodeString(string, length, NULL, error),
162 			buffer,	requiredSize);
163 	}
164 
165 	key->SetTo((char*)buffer);
166 	free(buffer);
167 
168 	if (error == U_ZERO_ERROR)
169 		return B_OK;
170 
171 	return B_ERROR;
172 }
173 
174 
175 int
176 BCollator::Compare(const char* s1, const char* s2) const
177 {
178 	if (fICUCollator == NULL)
179 		return strcmp(s1, s2);
180 
181 	// TODO : handle fIgnorePunctuation
182 	UErrorCode error = U_ZERO_ERROR;
183 	return fICUCollator->compare(s1, s2, error);
184 }
185 
186 
187 status_t
188 BCollator::Archive(BMessage* archive, bool deep) const
189 {
190 	status_t status = BArchivable::Archive(archive, deep);
191 	if (status < B_OK)
192 		return status;
193 
194 	if (status == B_OK)
195 		status = archive->AddBool("loc:punctuation", fIgnorePunctuation);
196 
197 	UErrorCode error = U_ZERO_ERROR;
198 	int size = static_cast<RuleBasedCollator*>(fICUCollator)->cloneBinary(NULL,
199 		0, error);
200 		// This WILL fail with U_BUFFER_OVERFLOW_ERROR. But we get the needed
201 		// size.
202 	error = U_ZERO_ERROR;
203 	uint8_t* buffer = (uint8_t*)malloc(size);
204 	static_cast<RuleBasedCollator*>(fICUCollator)->cloneBinary(buffer, size,
205 		error);
206 
207 	if (status == B_OK && error == U_ZERO_ERROR)
208 		status = archive->AddData("loc:collator", B_RAW_TYPE, buffer, size);
209 	free(buffer);
210 
211 	if (error == U_ZERO_ERROR)
212 		return status;
213 	return B_ERROR;
214 }
215 
216 
217 BArchivable*
218 BCollator::Instantiate(BMessage* archive)
219 {
220 	if (validate_instantiation(archive, "BCollator"))
221 		return new(std::nothrow) BCollator(archive);
222 
223 	return NULL;
224 }
225 
226 
227 status_t
228 BCollator::SetStrength(int8 strength) const
229 {
230 	if (fICUCollator == NULL)
231 		return B_NO_INIT;
232 
233 	if (strength == B_COLLATE_DEFAULT)
234 		strength = B_COLLATE_TERTIARY;
235 
236 	Collator::ECollationStrength icuStrength;
237 	switch (strength) {
238 		case B_COLLATE_PRIMARY:
239 			icuStrength = Collator::PRIMARY;
240 			break;
241 		case B_COLLATE_SECONDARY:
242 			icuStrength = Collator::SECONDARY;
243 			break;
244 		case B_COLLATE_TERTIARY:
245 		default:
246 			icuStrength = Collator::TERTIARY;
247 			break;
248 		case B_COLLATE_QUATERNARY:
249 			icuStrength = Collator::QUATERNARY;
250 			break;
251 		case B_COLLATE_IDENTICAL:
252 			icuStrength = Collator::IDENTICAL;
253 			break;
254 	}
255 	fICUCollator->setStrength(icuStrength);
256 
257 	return B_OK;
258 }
259