xref: /haiku/src/kits/locale/Collator.cpp (revision 0fae96c5a349db3761ac2a4ab4a7fbbf23a3b76c)
1 /*
2 * Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3 * Copyright 2010, Adrien Destugues <pulkomandy@pulkomandy.ath.cx>
4 * Distributed under the terms of the MIT License.
5 */
6 
7 
8 #include <unicode/uversion.h>
9 #include <Collator.h>
10 
11 #include <ctype.h>
12 #include <stdlib.h>
13 
14 #include <new>
15 #include <typeinfo>
16 
17 #include <UnicodeChar.h>
18 #include <String.h>
19 #include <Message.h>
20 
21 #include <unicode/coll.h>
22 #include <unicode/tblcoll.h>
23 
24 
25 U_NAMESPACE_USE
26 
27 
28 BCollator::BCollator()
29 	:
30 	fIgnorePunctuation(true)
31 {
32 	// TODO: the collator construction will have to change; the default
33 	//	collator should be constructed by the Locale/LocaleRoster, so we
34 	//	only need a constructor where you specify all details
35 
36 	UErrorCode error = U_ZERO_ERROR;
37 	fICUCollator = Collator::createInstance(error);
38 	SetStrength(B_COLLATE_TERTIARY);
39 }
40 
41 
42 BCollator::BCollator(const char* locale, int8 strength, bool ignorePunctuation)
43 	:
44 	fIgnorePunctuation(ignorePunctuation)
45 {
46 	UErrorCode error = U_ZERO_ERROR;
47 	fICUCollator = Collator::createInstance(locale, error);
48 	SetStrength(strength);
49 }
50 
51 
52 BCollator::BCollator(BMessage* archive)
53 	:
54 	BArchivable(archive),
55 	fICUCollator(NULL),
56 	fIgnorePunctuation(true)
57 {
58 	archive->FindBool("loc:punctuation", &fIgnorePunctuation);
59 
60 	UErrorCode error = U_ZERO_ERROR;
61 	RuleBasedCollator* fallbackICUCollator
62 		= static_cast<RuleBasedCollator*>(Collator::createInstance(error));
63 
64 	ssize_t size;
65 	const void* buffer = NULL;
66 	if (archive->FindData("loc:collator", B_RAW_TYPE, &buffer, &size) == B_OK) {
67 		fICUCollator = new RuleBasedCollator((const uint8_t*)buffer, (int)size,
68 			fallbackICUCollator, error);
69 		if (fICUCollator == NULL) {
70 			fICUCollator = fallbackICUCollator;
71 				// Unarchiving failed, so we revert to the fallback collator
72 				// TODO: when can this happen, can it be avoided?
73 		}
74 	}
75 }
76 
77 
78 BCollator::BCollator(const BCollator& other)
79 	:
80 	fICUCollator(NULL)
81 {
82 	*this = other;
83 }
84 
85 
86 BCollator::~BCollator()
87 {
88 	delete fICUCollator;
89 }
90 
91 
92 BCollator& BCollator::operator=(const BCollator& source)
93 {
94 	if (&source != this) {
95 		delete fICUCollator;
96 
97 		fICUCollator = source.fICUCollator != NULL
98 			? source.fICUCollator->clone()
99 			: NULL;
100 		fIgnorePunctuation = source.fIgnorePunctuation;
101 	}
102 
103 	return *this;
104 }
105 
106 
107 void
108 BCollator::SetIgnorePunctuation(bool ignore)
109 {
110 	fIgnorePunctuation = ignore;
111 }
112 
113 
114 bool
115 BCollator::IgnorePunctuation() const
116 {
117 	return fIgnorePunctuation;
118 }
119 
120 
121 status_t
122 BCollator::SetNumericSorting(bool enable)
123 {
124 	UErrorCode error = U_ZERO_ERROR;
125 	fICUCollator->setAttribute(UCOL_NUMERIC_COLLATION,
126 		enable ? UCOL_ON : UCOL_OFF, error);
127 
128 	return error == U_ZERO_ERROR ? B_OK : B_ERROR;
129 }
130 
131 
132 status_t
133 BCollator::GetSortKey(const char* string, BString* key) const
134 {
135 	// TODO : handle fIgnorePunctuation
136 
137 	int length = strlen(string);
138 
139 	uint8_t* buffer = (uint8_t*)malloc(length * 2);
140 		// According to ICU documentation this should be enough in "most cases"
141 	if (buffer == NULL)
142 		return B_NO_MEMORY;
143 
144 	UErrorCode error = U_ZERO_ERROR;
145 	int requiredSize = fICUCollator->getSortKey(UnicodeString(string, length,
146 		NULL, error), buffer, length * 2);
147 	if (requiredSize > length * 2) {
148 		uint8_t* tmpBuffer = (uint8_t*)realloc(buffer, requiredSize);
149 		if (tmpBuffer == NULL) {
150 			free(buffer);
151 			buffer = NULL;
152 			return B_NO_MEMORY;
153 		} else {
154 			buffer = tmpBuffer;
155 		}
156 
157 		error = U_ZERO_ERROR;
158 		fICUCollator->getSortKey(UnicodeString(string, length, NULL, error),
159 			buffer,	requiredSize);
160 	}
161 
162 	key->SetTo((char*)buffer);
163 	free(buffer);
164 
165 	if (error == U_ZERO_ERROR)
166 		return B_OK;
167 
168 	return B_ERROR;
169 }
170 
171 
172 int
173 BCollator::Compare(const char* s1, const char* s2) const
174 {
175 	// TODO : handle fIgnorePunctuation
176 
177 	UErrorCode error = U_ZERO_ERROR;
178 	return fICUCollator->compare(s1, s2, error);
179 }
180 
181 
182 status_t
183 BCollator::Archive(BMessage* archive, bool deep) const
184 {
185 	status_t status = BArchivable::Archive(archive, deep);
186 	if (status < B_OK)
187 		return status;
188 
189 	if (status == B_OK)
190 		status = archive->AddBool("loc:punctuation", fIgnorePunctuation);
191 
192 	UErrorCode error = U_ZERO_ERROR;
193 	int size = static_cast<RuleBasedCollator*>(fICUCollator)->cloneBinary(NULL,
194 		0, error);
195 		// This WILL fail with U_BUFFER_OVERFLOW_ERROR. But we get the needed
196 		// size.
197 	error = U_ZERO_ERROR;
198 	uint8_t* buffer = (uint8_t*)malloc(size);
199 	static_cast<RuleBasedCollator*>(fICUCollator)->cloneBinary(buffer, size,
200 		error);
201 
202 	if (status == B_OK && error == U_ZERO_ERROR)
203 		status = archive->AddData("loc:collator", B_RAW_TYPE, buffer, size);
204 	free(buffer);
205 
206 	if (error == U_ZERO_ERROR)
207 		return status;
208 	return B_ERROR;
209 }
210 
211 
212 BArchivable*
213 BCollator::Instantiate(BMessage* archive)
214 {
215 	if (validate_instantiation(archive, "BCollator"))
216 		return new(std::nothrow) BCollator(archive);
217 
218 	return NULL;
219 }
220 
221 
222 status_t
223 BCollator::SetStrength(int8 strength) const
224 {
225 	if (strength == B_COLLATE_DEFAULT)
226 		strength = B_COLLATE_TERTIARY;
227 
228 	Collator::ECollationStrength icuStrength;
229 	switch (strength) {
230 		case B_COLLATE_PRIMARY:
231 			icuStrength = Collator::PRIMARY;
232 			break;
233 		case B_COLLATE_SECONDARY:
234 			icuStrength = Collator::SECONDARY;
235 			break;
236 		case B_COLLATE_TERTIARY:
237 		default:
238 			icuStrength = Collator::TERTIARY;
239 			break;
240 		case B_COLLATE_QUATERNARY:
241 			icuStrength = Collator::QUATERNARY;
242 			break;
243 		case B_COLLATE_IDENTICAL:
244 			icuStrength = Collator::IDENTICAL;
245 			break;
246 	}
247 	fICUCollator->setStrength(icuStrength);
248 
249 	return B_OK;
250 }
251