xref: /haiku/src/kits/locale/Collator.cpp (revision a72f3582be00f2151800fa7da036d7adc14e3272)
1 /*
2 * Copyright 2003, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
3 * Copyright 2010, Adrien Destugues <pulkomandy@pulkomandy.ath.cx>
4 * Distributed under the terms of the MIT License.
5 */
6 
7 
8 #include <unicode/uversion.h>
9 #include <Collator.h>
10 
11 #include <ctype.h>
12 #include <stdlib.h>
13 
14 #include <new>
15 #include <typeinfo>
16 
17 #include <UnicodeChar.h>
18 #include <String.h>
19 #include <Message.h>
20 
21 #include <unicode/coll.h>
22 #include <unicode/tblcoll.h>
23 
24 
25 BCollator::BCollator()
26 	:
27 	fIgnorePunctuation(true)
28 {
29 	// TODO: the collator construction will have to change; the default
30 	//	collator should be constructed by the Locale/LocaleRoster, so we
31 	//	only need a constructor where you specify all details
32 
33 	UErrorCode error = U_ZERO_ERROR;
34 	fICUCollator = Collator::createInstance(error);
35 	SetStrength(B_COLLATE_TERTIARY);
36 }
37 
38 
39 BCollator::BCollator(const char* locale, int8 strength, bool ignorePunctuation)
40 	:
41 	fIgnorePunctuation(ignorePunctuation)
42 {
43 	UErrorCode error = U_ZERO_ERROR;
44 	fICUCollator = Collator::createInstance(locale, error);
45 	SetStrength(strength);
46 }
47 
48 
49 BCollator::BCollator(BMessage* archive)
50 	:
51 	BArchivable(archive),
52 	fICUCollator(NULL),
53 	fIgnorePunctuation(true)
54 {
55 	archive->FindBool("loc:punctuation", &fIgnorePunctuation);
56 
57 	UErrorCode error = U_ZERO_ERROR;
58 	RuleBasedCollator* fallbackICUCollator
59 		= static_cast<RuleBasedCollator*>(Collator::createInstance(error));
60 
61 	ssize_t size;
62 	const void* buffer = NULL;
63 	if (archive->FindData("loc:collator", B_RAW_TYPE, &buffer, &size) == B_OK) {
64 		fICUCollator = new RuleBasedCollator((const uint8_t*)buffer, (int)size,
65 			fallbackICUCollator, error);
66 		if (fICUCollator == NULL) {
67 			fICUCollator = fallbackICUCollator;
68 				// Unarchiving failed, so we revert to the fallback collator
69 				// TODO: when can this happen, can it be avoided?
70 		}
71 	}
72 }
73 
74 
75 BCollator::BCollator(const BCollator& other)
76 	:
77 	fICUCollator(NULL)
78 {
79 	*this = other;
80 }
81 
82 
83 BCollator::~BCollator()
84 {
85 	delete fICUCollator;
86 }
87 
88 
89 BCollator& BCollator::operator=(const BCollator& source)
90 {
91 	if (&source != this) {
92 		delete fICUCollator;
93 
94 		fICUCollator = source.fICUCollator != NULL
95 			? source.fICUCollator->clone()
96 			: NULL;
97 		fIgnorePunctuation = source.fIgnorePunctuation;
98 	}
99 
100 	return *this;
101 }
102 
103 
104 void
105 BCollator::SetIgnorePunctuation(bool ignore)
106 {
107 	fIgnorePunctuation = ignore;
108 }
109 
110 
111 bool
112 BCollator::IgnorePunctuation() const
113 {
114 	return fIgnorePunctuation;
115 }
116 
117 
118 status_t
119 BCollator::SetNumericSorting(bool enable)
120 {
121 	UErrorCode error = U_ZERO_ERROR;
122 	fICUCollator->setAttribute(UCOL_NUMERIC_COLLATION,
123 		enable ? UCOL_ON : UCOL_OFF, error);
124 
125 	return error == U_ZERO_ERROR ? B_OK : B_ERROR;
126 }
127 
128 
129 status_t
130 BCollator::GetSortKey(const char* string, BString* key) const
131 {
132 	// TODO : handle fIgnorePunctuation
133 
134 	int length = strlen(string);
135 
136 	uint8_t* buffer = (uint8_t*)malloc(length * 2);
137 		// According to ICU documentation this should be enough in "most cases"
138 	if (buffer == NULL)
139 		return B_NO_MEMORY;
140 
141 	UErrorCode error = U_ZERO_ERROR;
142 	int requiredSize = fICUCollator->getSortKey(UnicodeString(string, length,
143 		NULL, error), buffer, length * 2);
144 	if (requiredSize > length * 2) {
145 		uint8_t* tmpBuffer = (uint8_t*)realloc(buffer, requiredSize);
146 		if (tmpBuffer == NULL) {
147 			free(buffer);
148 			buffer = NULL;
149 			return B_NO_MEMORY;
150 		} else {
151 			buffer = tmpBuffer;
152 		}
153 
154 		error = U_ZERO_ERROR;
155 		fICUCollator->getSortKey(UnicodeString(string, length, NULL, error),
156 			buffer,	requiredSize);
157 	}
158 
159 	key->SetTo((char*)buffer);
160 	free(buffer);
161 
162 	if (error == U_ZERO_ERROR)
163 		return B_OK;
164 
165 	return B_ERROR;
166 }
167 
168 
169 int
170 BCollator::Compare(const char* s1, const char* s2) const
171 {
172 	// TODO : handle fIgnorePunctuation
173 
174 	UErrorCode error = U_ZERO_ERROR;
175 	return fICUCollator->compare(s1, s2, error);
176 }
177 
178 
179 status_t
180 BCollator::Archive(BMessage* archive, bool deep) const
181 {
182 	status_t status = BArchivable::Archive(archive, deep);
183 	if (status < B_OK)
184 		return status;
185 
186 	if (status == B_OK)
187 		status = archive->AddBool("loc:punctuation", fIgnorePunctuation);
188 
189 	UErrorCode error = U_ZERO_ERROR;
190 	int size = static_cast<RuleBasedCollator*>(fICUCollator)->cloneBinary(NULL,
191 		0, error);
192 		// This WILL fail with U_BUFFER_OVERFLOW_ERROR. But we get the needed
193 		// size.
194 	error = U_ZERO_ERROR;
195 	uint8_t* buffer = (uint8_t*)malloc(size);
196 	static_cast<RuleBasedCollator*>(fICUCollator)->cloneBinary(buffer, size,
197 		error);
198 
199 	if (status == B_OK && error == U_ZERO_ERROR)
200 		status = archive->AddData("loc:collator", B_RAW_TYPE, buffer, size);
201 	free(buffer);
202 
203 	if (error == U_ZERO_ERROR)
204 		return status;
205 	return B_ERROR;
206 }
207 
208 
209 BArchivable*
210 BCollator::Instantiate(BMessage* archive)
211 {
212 	if (validate_instantiation(archive, "BCollator"))
213 		return new(std::nothrow) BCollator(archive);
214 
215 	return NULL;
216 }
217 
218 
219 status_t
220 BCollator::SetStrength(int8 strength) const
221 {
222 	if (strength == B_COLLATE_DEFAULT)
223 		strength = B_COLLATE_TERTIARY;
224 
225 	Collator::ECollationStrength icuStrength;
226 	switch (strength) {
227 		case B_COLLATE_PRIMARY:
228 			icuStrength = Collator::PRIMARY;
229 			break;
230 		case B_COLLATE_SECONDARY:
231 			icuStrength = Collator::SECONDARY;
232 			break;
233 		case B_COLLATE_TERTIARY:
234 		default:
235 			icuStrength = Collator::TERTIARY;
236 			break;
237 		case B_COLLATE_QUATERNARY:
238 			icuStrength = Collator::QUATERNARY;
239 			break;
240 		case B_COLLATE_IDENTICAL:
241 			icuStrength = Collator::IDENTICAL;
242 			break;
243 	}
244 	fICUCollator->setStrength(icuStrength);
245 
246 	return B_OK;
247 }
248