xref: /haiku/src/kits/locale/Collator.cpp (revision 7457ccb4b2f4786525d3b7bda42598487d57ab7d)
1 /*
2 * Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3 * Copyright 2010, Adrien Destugues <pulkomandy@pulkomandy.ath.cx>
4 * Distributed under the terms of the MIT License.
5 */
6 
7 
8 #include <unicode/uversion.h>
9 #include <Collator.h>
10 
11 #include <ctype.h>
12 #include <stdlib.h>
13 
14 #include <new>
15 #include <typeinfo>
16 
17 #include <UnicodeChar.h>
18 #include <String.h>
19 #include <Message.h>
20 
21 #include <unicode/coll.h>
22 #include <unicode/tblcoll.h>
23 
24 
25 BCollator::BCollator()
26 	:
27 	fIgnorePunctuation(true)
28 {
29 	// TODO: the collator construction will have to change; the default
30 	//	collator should be constructed by the Locale/LocaleRoster, so we
31 	//	only need a constructor where you specify all details
32 
33 	UErrorCode error = U_ZERO_ERROR;
34 	fICUCollator = Collator::createInstance(error);
35 	SetStrength(B_COLLATE_TERTIARY);
36 }
37 
38 
39 BCollator::BCollator(const char* locale, int8 strength, bool ignorePunctuation)
40 	:
41 	fIgnorePunctuation(ignorePunctuation)
42 {
43 	UErrorCode error = U_ZERO_ERROR;
44 	fICUCollator = Collator::createInstance(locale, error);
45 	SetStrength(strength);
46 }
47 
48 
49 BCollator::BCollator(BMessage* archive)
50 	:
51 	BArchivable(archive),
52 	fICUCollator(NULL),
53 	fIgnorePunctuation(true)
54 {
55 	archive->FindBool("loc:punctuation", &fIgnorePunctuation);
56 
57 	UErrorCode error = U_ZERO_ERROR;
58 	RuleBasedCollator* fallbackICUCollator
59 		= static_cast<RuleBasedCollator*>(Collator::createInstance(error));
60 
61 	ssize_t size;
62 	const void* buffer = NULL;
63 	if (archive->FindData("loc:collator", B_RAW_TYPE, &buffer, &size) == B_OK) {
64 		fICUCollator = new RuleBasedCollator((const uint8_t*)buffer, (int)size,
65 			fallbackICUCollator, error);
66 		if (fICUCollator == NULL) {
67 			fICUCollator = fallbackICUCollator;
68 				// Unarchiving failed, so we revert to the fallback collator
69 				// TODO: when can this happen, can it be avoided?
70 		}
71 	}
72 }
73 
74 
75 BCollator::BCollator(const BCollator& other)
76 	:
77 	fICUCollator(NULL)
78 {
79 	*this = other;
80 }
81 
82 
83 BCollator::~BCollator()
84 {
85 	delete fICUCollator;
86 }
87 
88 
89 BCollator& BCollator::operator=(const BCollator& source)
90 {
91 	if (&source != this) {
92 		delete fICUCollator;
93 
94 		fICUCollator = source.fICUCollator != NULL
95 			? source.fICUCollator->clone()
96 			: NULL;
97 		fIgnorePunctuation = source.fIgnorePunctuation;
98 	}
99 
100 	return *this;
101 }
102 
103 
104 void
105 BCollator::SetIgnorePunctuation(bool ignore)
106 {
107 	fIgnorePunctuation = ignore;
108 }
109 
110 
111 bool
112 BCollator::IgnorePunctuation() const
113 {
114 	return fIgnorePunctuation;
115 }
116 
117 
118 status_t
119 BCollator::SetNumericSorting(bool enable)
120 {
121 	UErrorCode error = U_ZERO_ERROR;
122 	fICUCollator->setAttribute(UCOL_NUMERIC_COLLATION,
123 		enable ? UCOL_ON : UCOL_OFF, error);
124 
125 	return error == U_ZERO_ERROR ? B_OK : B_ERROR;
126 }
127 
128 
129 status_t
130 BCollator::GetSortKey(const char* string, BString* key) const
131 {
132 	// TODO : handle fIgnorePunctuation
133 
134 	int length = strlen(string);
135 
136 	uint8_t* buffer = (uint8_t*)malloc(length * 2);
137 		// According to ICU documentation this should be enough in "most cases"
138 	if (buffer == NULL)
139 		return B_NO_MEMORY;
140 
141 	UErrorCode error = U_ZERO_ERROR;
142 	int requiredSize = fICUCollator->getSortKey(UnicodeString(string, length,
143 		NULL, error), buffer, length * 2);
144 	if (requiredSize > length * 2) {
145 		buffer = (uint8_t*)realloc(buffer, requiredSize);
146 		if (buffer == NULL)
147 			return B_NO_MEMORY;
148 
149 		error = U_ZERO_ERROR;
150 		fICUCollator->getSortKey(UnicodeString(string, length, NULL, error),
151 			buffer,	requiredSize);
152 	}
153 
154 	key->SetTo((char*)buffer);
155 	free(buffer);
156 
157 	if (error == U_ZERO_ERROR)
158 		return B_OK;
159 
160 	return B_ERROR;
161 }
162 
163 
164 int
165 BCollator::Compare(const char* s1, const char* s2) const
166 {
167 	// TODO : handle fIgnorePunctuation
168 
169 	UErrorCode error = U_ZERO_ERROR;
170 	return fICUCollator->compare(s1, s2, error);
171 }
172 
173 
174 status_t
175 BCollator::Archive(BMessage* archive, bool deep) const
176 {
177 	status_t status = BArchivable::Archive(archive, deep);
178 	if (status < B_OK)
179 		return status;
180 
181 	if (status == B_OK)
182 		status = archive->AddBool("loc:punctuation", fIgnorePunctuation);
183 
184 	UErrorCode error = U_ZERO_ERROR;
185 	int size = static_cast<RuleBasedCollator*>(fICUCollator)->cloneBinary(NULL,
186 		0, error);
187 		// This WILL fail with U_BUFFER_OVERFLOW_ERROR. But we get the needed
188 		// size.
189 	error = U_ZERO_ERROR;
190 	uint8_t* buffer = (uint8_t*)malloc(size);
191 	static_cast<RuleBasedCollator*>(fICUCollator)->cloneBinary(buffer, size,
192 		error);
193 
194 	if (status == B_OK && error == U_ZERO_ERROR)
195 		status = archive->AddData("loc:collator", B_RAW_TYPE, buffer, size);
196 	free(buffer);
197 
198 	if (error == U_ZERO_ERROR)
199 		return status;
200 	return B_ERROR;
201 }
202 
203 
204 BArchivable*
205 BCollator::Instantiate(BMessage* archive)
206 {
207 	if (validate_instantiation(archive, "BCollator"))
208 		return new(std::nothrow) BCollator(archive);
209 
210 	return NULL;
211 }
212 
213 
214 status_t
215 BCollator::SetStrength(int8 strength) const
216 {
217 	if (strength == B_COLLATE_DEFAULT)
218 		strength = B_COLLATE_TERTIARY;
219 
220 	Collator::ECollationStrength icuStrength;
221 	switch (strength) {
222 		case B_COLLATE_PRIMARY:
223 			icuStrength = Collator::PRIMARY;
224 			break;
225 		case B_COLLATE_SECONDARY:
226 			icuStrength = Collator::SECONDARY;
227 			break;
228 		case B_COLLATE_TERTIARY:
229 		default:
230 			icuStrength = Collator::TERTIARY;
231 			break;
232 		case B_COLLATE_QUATERNARY:
233 			icuStrength = Collator::QUATERNARY;
234 			break;
235 		case B_COLLATE_IDENTICAL:
236 			icuStrength = Collator::IDENTICAL;
237 			break;
238 	}
239 	fICUCollator->setStrength(icuStrength);
240 
241 	return B_OK;
242 }
243