xref: /haiku/src/kits/locale/Collator.cpp (revision 323b65468e5836bb27a5e373b14027d902349437)
1 /*
2 * Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3 * Copyright 2010, Adrien Destugues <pulkomandy@pulkomandy.ath.cx>
4 * Distributed under the terms of the MIT License.
5 */
6 
7 
8 #include <ctype.h>
9 #include <stdlib.h>
10 
11 #include <new>
12 #include <typeinfo>
13 
14 #include <Collator.h>
15 #include <UnicodeChar.h>
16 #include <String.h>
17 #include <Message.h>
18 
19 #include <unicode/coll.h>
20 #include <unicode/tblcoll.h>
21 
22 
23 BCollator::BCollator()
24 	:
25 	fDefaultStrength(B_COLLATE_PRIMARY),
26 	fIgnorePunctuation(true)
27 {
28 	// TODO: the collator construction will have to change; the default
29 	//	collator should be constructed by the Locale/LocaleRoster, so we
30 	//	only need a constructor where you specify all details
31 
32 	UErrorCode error = U_ZERO_ERROR;
33 	fICUCollator = Collator::createInstance(error);
34 }
35 
36 
37 BCollator::BCollator(const char* locale, int8 strength, bool ignorePunctuation)
38 	:
39 	fDefaultStrength(strength),
40 	fIgnorePunctuation(ignorePunctuation)
41 {
42 	UErrorCode error = U_ZERO_ERROR;
43 	fICUCollator = Collator::createInstance(locale, error);
44 }
45 
46 
47 BCollator::BCollator(BMessage* archive)
48 	:
49 	BArchivable(archive),
50 	fICUCollator(NULL),
51 	fDefaultStrength(B_COLLATE_PRIMARY),
52 	fIgnorePunctuation(true)
53 {
54 	int32 data;
55 	if (archive->FindInt32("loc:strength", &data) == B_OK)
56 		fDefaultStrength = (uint8)data;
57 	else
58 		fDefaultStrength = B_COLLATE_PRIMARY;
59 
60 	archive->FindBool("loc:punctuation", &fIgnorePunctuation);
61 
62 	UErrorCode error = U_ZERO_ERROR;
63 	RuleBasedCollator* fallbackICUCollator
64 		= static_cast<RuleBasedCollator*>(Collator::createInstance(error));
65 
66 	ssize_t size;
67 	const void* buffer = NULL;
68 	if (archive->FindData("loc:collator", B_RAW_TYPE, &buffer, &size) == B_OK) {
69 		fICUCollator = new RuleBasedCollator((const uint8_t*)buffer, (int)size,
70 			fallbackICUCollator, error);
71 		if (fICUCollator == NULL) {
72 			fICUCollator = fallbackICUCollator;
73 				// Unarchiving failed, so we revert to the fallback collator
74 				// TODO: when can this happen, can it be avoided?
75 		}
76 	}
77 }
78 
79 
80 BCollator::BCollator(const BCollator& other)
81 	:
82 	fICUCollator(NULL)
83 {
84 	*this = other;
85 }
86 
87 
88 BCollator::~BCollator()
89 {
90 	delete fICUCollator;
91 }
92 
93 
94 BCollator& BCollator::operator=(const BCollator& source)
95 {
96 	if (&source != this) {
97 		delete fICUCollator;
98 
99 		fICUCollator = source.fICUCollator != NULL
100 			? source.fICUCollator->clone()
101 			: NULL;
102 		fDefaultStrength = source.fDefaultStrength;
103 		fIgnorePunctuation = source.fIgnorePunctuation;
104 	}
105 
106 	return *this;
107 }
108 
109 
110 void
111 BCollator::SetDefaultStrength(int8 strength)
112 {
113 	fDefaultStrength = strength;
114 }
115 
116 
117 int8
118 BCollator::DefaultStrength() const
119 {
120 	return fDefaultStrength;
121 }
122 
123 
124 void
125 BCollator::SetIgnorePunctuation(bool ignore)
126 {
127 	fIgnorePunctuation = ignore;
128 }
129 
130 
131 bool
132 BCollator::IgnorePunctuation() const
133 {
134 	return fIgnorePunctuation;
135 }
136 
137 
138 status_t
139 BCollator::GetSortKey(const char* string, BString* key, int8 strength) const
140 {
141 	_SetStrength(strength);
142 
143 	// TODO : handle fIgnorePunctuation
144 
145 	int length = strlen(string);
146 
147 	uint8_t* buffer = (uint8_t*)malloc(length * 2);
148 		// According to ICU documentation this should be enough in "most cases"
149 	if (buffer == NULL)
150 		return B_NO_MEMORY;
151 
152 	UErrorCode error = U_ZERO_ERROR;
153 	int requiredSize = fICUCollator->getSortKey(UnicodeString(string, length,
154 		NULL, error), buffer, length * 2);
155 	if (requiredSize > length * 2) {
156 		buffer = (uint8_t*)realloc(buffer, requiredSize);
157 		if (buffer == NULL)
158 			return B_NO_MEMORY;
159 
160 		error = U_ZERO_ERROR;
161 		fICUCollator->getSortKey(UnicodeString(string, length, NULL, error),
162 			buffer,	requiredSize);
163 	}
164 
165 	key->SetTo((char*)buffer);
166 	free(buffer);
167 
168 	if (error == U_ZERO_ERROR)
169 		return B_OK;
170 
171 	return B_ERROR;
172 }
173 
174 
175 int
176 BCollator::Compare(const char* s1, const char* s2, int8 strength) const
177 {
178 	_SetStrength(strength);
179 
180 	// TODO : handle fIgnorePunctuation
181 
182 	UErrorCode error = U_ZERO_ERROR;
183 	return fICUCollator->compare(s1, s2, error);
184 }
185 
186 
187 status_t
188 BCollator::Archive(BMessage* archive, bool deep) const
189 {
190 	status_t status = BArchivable::Archive(archive, deep);
191 	if (status < B_OK)
192 		return status;
193 
194 	if (status == B_OK)
195 		status = archive->AddInt32("loc:strength", fDefaultStrength);
196 	if (status == B_OK)
197 		status = archive->AddBool("loc:punctuation", fIgnorePunctuation);
198 
199 	UErrorCode error = U_ZERO_ERROR;
200 	int size = static_cast<RuleBasedCollator*>(fICUCollator)->cloneBinary(NULL,
201 		0, error);
202 		// This WILL fail with U_BUFFER_OVERFLOW_ERROR. But we get the needed
203 		// size.
204 	error = U_ZERO_ERROR;
205 	uint8_t* buffer = (uint8_t*)malloc(size);
206 	static_cast<RuleBasedCollator*>(fICUCollator)->cloneBinary(buffer, size,
207 		error);
208 
209 	if (status == B_OK && error == U_ZERO_ERROR)
210 		status = archive->AddData("loc:collator", B_RAW_TYPE, buffer, size);
211 	delete buffer;
212 
213 	if (error == U_ZERO_ERROR)
214 		return status;
215 	return B_ERROR;
216 }
217 
218 
219 BArchivable*
220 BCollator::Instantiate(BMessage* archive)
221 {
222 	if (validate_instantiation(archive, "BCollator"))
223 		return new(std::nothrow) BCollator(archive);
224 
225 	return NULL;
226 }
227 
228 
229 status_t
230 BCollator::_SetStrength(int8 strength) const
231 {
232 	if (strength == B_COLLATE_DEFAULT)
233 		strength = fDefaultStrength;
234 
235 	Collator::ECollationStrength icuStrength;
236 	switch (strength) {
237 		case B_COLLATE_PRIMARY:
238 			icuStrength = Collator::PRIMARY;
239 			break;
240 		case B_COLLATE_SECONDARY:
241 			icuStrength = Collator::SECONDARY;
242 			break;
243 		case B_COLLATE_TERTIARY:
244 		default:
245 			icuStrength = Collator::TERTIARY;
246 			break;
247 		case B_COLLATE_QUATERNARY:
248 			icuStrength = Collator::QUATERNARY;
249 			break;
250 		case B_COLLATE_IDENTICAL:
251 			icuStrength = Collator::IDENTICAL;
252 			break;
253 	}
254 	fICUCollator->setStrength(icuStrength);
255 
256 	return B_OK;
257 }
258