xref: /haiku/src/kits/locale/Collator.cpp (revision 97dfeb96704e5dbc5bec32ad7b21379d0125e031)
1 /*
2 * Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3 * Copyright 2010, Adrien Destugues <pulkomandy@pulkomandy.ath.cx>
4 * Distributed under the terms of the MIT License.
5 */
6 
7 
8 #include <unicode/uversion.h>
9 #include <Collator.h>
10 
11 #include <ctype.h>
12 #include <stdlib.h>
13 
14 #include <new>
15 #include <typeinfo>
16 
17 #include <UnicodeChar.h>
18 #include <String.h>
19 #include <Message.h>
20 
21 #include <unicode/coll.h>
22 #include <unicode/tblcoll.h>
23 
24 
25 BCollator::BCollator()
26 	:
27 	fDefaultStrength(B_COLLATE_PRIMARY),
28 	fIgnorePunctuation(true)
29 {
30 	// TODO: the collator construction will have to change; the default
31 	//	collator should be constructed by the Locale/LocaleRoster, so we
32 	//	only need a constructor where you specify all details
33 
34 	UErrorCode error = U_ZERO_ERROR;
35 	fICUCollator = Collator::createInstance(error);
36 }
37 
38 
39 BCollator::BCollator(const char* locale, int8 strength, bool ignorePunctuation)
40 	:
41 	fDefaultStrength(strength),
42 	fIgnorePunctuation(ignorePunctuation)
43 {
44 	UErrorCode error = U_ZERO_ERROR;
45 	fICUCollator = Collator::createInstance(locale, error);
46 }
47 
48 
49 BCollator::BCollator(BMessage* archive)
50 	:
51 	BArchivable(archive),
52 	fICUCollator(NULL),
53 	fDefaultStrength(B_COLLATE_PRIMARY),
54 	fIgnorePunctuation(true)
55 {
56 	int32 data;
57 	if (archive->FindInt32("loc:strength", &data) == B_OK)
58 		fDefaultStrength = (uint8)data;
59 	else
60 		fDefaultStrength = B_COLLATE_PRIMARY;
61 
62 	archive->FindBool("loc:punctuation", &fIgnorePunctuation);
63 
64 	UErrorCode error = U_ZERO_ERROR;
65 	RuleBasedCollator* fallbackICUCollator
66 		= static_cast<RuleBasedCollator*>(Collator::createInstance(error));
67 
68 	ssize_t size;
69 	const void* buffer = NULL;
70 	if (archive->FindData("loc:collator", B_RAW_TYPE, &buffer, &size) == B_OK) {
71 		fICUCollator = new RuleBasedCollator((const uint8_t*)buffer, (int)size,
72 			fallbackICUCollator, error);
73 		if (fICUCollator == NULL) {
74 			fICUCollator = fallbackICUCollator;
75 				// Unarchiving failed, so we revert to the fallback collator
76 				// TODO: when can this happen, can it be avoided?
77 		}
78 	}
79 }
80 
81 
82 BCollator::BCollator(const BCollator& other)
83 	:
84 	fICUCollator(NULL)
85 {
86 	*this = other;
87 }
88 
89 
90 BCollator::~BCollator()
91 {
92 	delete fICUCollator;
93 }
94 
95 
96 BCollator& BCollator::operator=(const BCollator& source)
97 {
98 	if (&source != this) {
99 		delete fICUCollator;
100 
101 		fICUCollator = source.fICUCollator != NULL
102 			? source.fICUCollator->clone()
103 			: NULL;
104 		fDefaultStrength = source.fDefaultStrength;
105 		fIgnorePunctuation = source.fIgnorePunctuation;
106 	}
107 
108 	return *this;
109 }
110 
111 
112 void
113 BCollator::SetDefaultStrength(int8 strength)
114 {
115 	fDefaultStrength = strength;
116 }
117 
118 
119 int8
120 BCollator::DefaultStrength() const
121 {
122 	return fDefaultStrength;
123 }
124 
125 
126 void
127 BCollator::SetIgnorePunctuation(bool ignore)
128 {
129 	fIgnorePunctuation = ignore;
130 }
131 
132 
133 bool
134 BCollator::IgnorePunctuation() const
135 {
136 	return fIgnorePunctuation;
137 }
138 
139 
140 status_t
141 BCollator::GetSortKey(const char* string, BString* key, int8 strength) const
142 {
143 	_SetStrength(strength);
144 
145 	// TODO : handle fIgnorePunctuation
146 
147 	int length = strlen(string);
148 
149 	uint8_t* buffer = (uint8_t*)malloc(length * 2);
150 		// According to ICU documentation this should be enough in "most cases"
151 	if (buffer == NULL)
152 		return B_NO_MEMORY;
153 
154 	UErrorCode error = U_ZERO_ERROR;
155 	int requiredSize = fICUCollator->getSortKey(UnicodeString(string, length,
156 		NULL, error), buffer, length * 2);
157 	if (requiredSize > length * 2) {
158 		buffer = (uint8_t*)realloc(buffer, requiredSize);
159 		if (buffer == NULL)
160 			return B_NO_MEMORY;
161 
162 		error = U_ZERO_ERROR;
163 		fICUCollator->getSortKey(UnicodeString(string, length, NULL, error),
164 			buffer,	requiredSize);
165 	}
166 
167 	key->SetTo((char*)buffer);
168 	free(buffer);
169 
170 	if (error == U_ZERO_ERROR)
171 		return B_OK;
172 
173 	return B_ERROR;
174 }
175 
176 
177 int
178 BCollator::Compare(const char* s1, const char* s2, int8 strength) const
179 {
180 	_SetStrength(strength);
181 
182 	// TODO : handle fIgnorePunctuation
183 
184 	UErrorCode error = U_ZERO_ERROR;
185 	return fICUCollator->compare(s1, s2, error);
186 }
187 
188 
189 status_t
190 BCollator::Archive(BMessage* archive, bool deep) const
191 {
192 	status_t status = BArchivable::Archive(archive, deep);
193 	if (status < B_OK)
194 		return status;
195 
196 	if (status == B_OK)
197 		status = archive->AddInt32("loc:strength", fDefaultStrength);
198 	if (status == B_OK)
199 		status = archive->AddBool("loc:punctuation", fIgnorePunctuation);
200 
201 	UErrorCode error = U_ZERO_ERROR;
202 	int size = static_cast<RuleBasedCollator*>(fICUCollator)->cloneBinary(NULL,
203 		0, error);
204 		// This WILL fail with U_BUFFER_OVERFLOW_ERROR. But we get the needed
205 		// size.
206 	error = U_ZERO_ERROR;
207 	uint8_t* buffer = (uint8_t*)malloc(size);
208 	static_cast<RuleBasedCollator*>(fICUCollator)->cloneBinary(buffer, size,
209 		error);
210 
211 	if (status == B_OK && error == U_ZERO_ERROR)
212 		status = archive->AddData("loc:collator", B_RAW_TYPE, buffer, size);
213 	free(buffer);
214 
215 	if (error == U_ZERO_ERROR)
216 		return status;
217 	return B_ERROR;
218 }
219 
220 
221 BArchivable*
222 BCollator::Instantiate(BMessage* archive)
223 {
224 	if (validate_instantiation(archive, "BCollator"))
225 		return new(std::nothrow) BCollator(archive);
226 
227 	return NULL;
228 }
229 
230 
231 status_t
232 BCollator::_SetStrength(int8 strength) const
233 {
234 	if (strength == B_COLLATE_DEFAULT)
235 		strength = fDefaultStrength;
236 
237 	Collator::ECollationStrength icuStrength;
238 	switch (strength) {
239 		case B_COLLATE_PRIMARY:
240 			icuStrength = Collator::PRIMARY;
241 			break;
242 		case B_COLLATE_SECONDARY:
243 			icuStrength = Collator::SECONDARY;
244 			break;
245 		case B_COLLATE_TERTIARY:
246 		default:
247 			icuStrength = Collator::TERTIARY;
248 			break;
249 		case B_COLLATE_QUATERNARY:
250 			icuStrength = Collator::QUATERNARY;
251 			break;
252 		case B_COLLATE_IDENTICAL:
253 			icuStrength = Collator::IDENTICAL;
254 			break;
255 	}
256 	fICUCollator->setStrength(icuStrength);
257 
258 	return B_OK;
259 }
260