xref: /haiku/headers/private/shared/OpenHashTable.h (revision 820dca4df6c7bf955c46e8f6521b9408f50b2900)
1 /*
2 Open Tracker License
3 
4 Terms and Conditions
5 
6 Copyright (c) 1991-2000, Be Incorporated. All rights reserved.
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy of
9 this software and associated documentation files (the "Software"), to deal in
10 the Software without restriction, including without limitation the rights to
11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12 of the Software, and to permit persons to whom the Software is furnished to do
13 so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice applies to all licensees
16 and shall be included in all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 
25 Except as contained in this notice, the name of Be Incorporated shall not be
26 used in advertising or otherwise to promote the sale, use or other dealings in
27 this Software without prior written authorization from Be Incorporated.
28 
29 Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
30 of Be Incorporated in the United States and other countries. Other brand product
31 names are registered trademarks or trademarks of their respective holders.
32 All rights reserved.
33 */
34 
35 // bonefish:
36 // * removed need for exceptions
37 // * fixed warnings
38 // * implemented rehashing
39 // * added RemoveAll()
40 // TODO:
41 // * shrinking of element vectors
42 
43 // Hash table with open addresssing
44 
45 #ifndef __OPEN_HASH_TABLE__
46 #define __OPEN_HASH_TABLE__
47 
48 #include <stdlib.h>
49 #include <new>
50 
51 // don't include <Debug.h>
52 #ifndef _OPEN_HASH_TABLE_ASSERT
53 #	define _OPEN_HASH_TABLE_ASSERT(E)	(void)0
54 #endif
55 #ifndef _OPEN_HASH_TABLE_TRESPASS
56 #	define _OPEN_HASH_TABLE_TRESPASS()	(void)0
57 #endif
58 
59 namespace BPrivate {
60 
61 template <class Element>
62 class ElementVector {
63 	// element vector for OpenHashTable needs to implement this
64 	// interface
65 public:
66 	Element &At(int32 index);
67 	Element *Add();
68 	int32 IndexOf(const Element &) const;
69 	void Remove(int32 index);
70 };
71 
72 class OpenHashElement {
73 public:
74 	uint32 Hash() const;
75 	bool operator==(const OpenHashElement &) const;
76 	void Adopt(OpenHashElement &);
77 		// low overhead copy, original element is in undefined state
78 		// after call (calls Adopt on BString members, etc.)
79 	int32 fNext;
80 };
81 
82 const uint32 kPrimes [] = {
83 	509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, 131071, 262139,
84 	524287, 1048573, 2097143, 4194301, 8388593, 16777213, 33554393, 67108859,
85 	134217689, 268435399, 536870909, 1073741789, 2147483647, 0
86 };
87 
88 template <class Element, class ElementVec = ElementVector<Element> >
89 class OpenHashTable {
90 public:
91 	OpenHashTable(int32 minSize, ElementVec *elementVector = 0,
92 				  float maxLoadFactor = 0.8);
93 		// it is up to the subclass of OpenHashTable to supply
94 		// elementVector
95 	~OpenHashTable();
96 
97 	bool InitCheck() const;
98 
99 	void SetElementVector(ElementVec *elementVector);
100 
101 	Element *FindFirst(uint32 elementHash) const;
102 	Element *Add(uint32 elementHash);
103 
104 	void Remove(Element *element, bool dontRehash = false);
105 	void RemoveAll();
106 
107 	// when calling Add, any outstanding element pointer may become
108 	// invalid; to deal with this, get the element index and restore
109 	// it after the add
110 	int32 ElementIndex(const Element *) const;
111 	Element *ElementAt(int32 index) const;
112 
113 	int32 ArraySize() const;
114 	int32 VectorSize() const;
115 	int32 CountElements() const;
116 
117 protected:
118 	static int32 OptimalSize(int32 minSize);
119 
120 private:
121 	bool _RehashIfNeeded();
122 	bool _Rehash();
123 
124 	int32 fArraySize;
125 	int32 fInitialSize;
126 	int32 fElementCount;
127 	int32 *fHashArray;
128 	ElementVec *fElementVector;
129 	float fMaxLoadFactor;
130 };
131 
132 template <class Element>
133 class OpenHashElementArray : public ElementVector<Element> {
134 	// this is a straightforward implementation of an element vector
135 	// deleting is handled by linking deleted elements into a free list
136 	// the vector never shrinks
137 public:
138 	OpenHashElementArray(int32 initialSize);
139 	~OpenHashElementArray();
140 
141 	bool InitCheck() const;
142 
143 	Element &At(int32 index);
144 	const Element &At(int32 index) const;
145 	Element *Add(const Element &);
146 	Element *Add();
147 	void Remove(int32 index);
148 	int32 IndexOf(const Element &) const;
149 	int32 Size() const;
150 
151 private:
152 	Element *fData;
153 	int32 fSize;
154 	int32 fNextFree;
155 	int32 fNextDeleted;
156 };
157 
158 
159 //-----------------------------------
160 
161 template<class Element, class ElementVec>
162 OpenHashTable<Element, ElementVec>::OpenHashTable(int32 minSize,
163 	ElementVec *elementVector, float maxLoadFactor)
164 	:	fArraySize(OptimalSize(minSize)),
165 		fInitialSize(fArraySize),
166 		fElementCount(0),
167 		fElementVector(elementVector),
168 		fMaxLoadFactor(maxLoadFactor)
169 {
170 	// sanity check the maximal load factor
171 	if (fMaxLoadFactor < 0.5)
172 		fMaxLoadFactor = 0.5;
173 	// allocate and init the array
174 	fHashArray = (int32*)calloc(fArraySize, sizeof(int32));
175 	if (fHashArray) {
176 		for (int32 index = 0; index < fArraySize; index++)
177 			fHashArray[index] = -1;
178 	}
179 }
180 
181 template<class Element, class ElementVec>
182 OpenHashTable<Element, ElementVec>::~OpenHashTable()
183 {
184 	RemoveAll();
185 	free(fHashArray);
186 }
187 
188 template<class Element, class ElementVec>
189 bool
190 OpenHashTable<Element, ElementVec>::InitCheck() const
191 {
192 	return (fHashArray && fElementVector);
193 }
194 
195 template<class Element, class ElementVec>
196 int32
197 OpenHashTable<Element, ElementVec>::OptimalSize(int32 minSize)
198 {
199 	for (int32 index = 0; ; index++)
200 		if (!kPrimes[index] || kPrimes[index] >= (uint32)minSize)
201 			return (int32)kPrimes[index];
202 
203 	return 0;
204 }
205 
206 template<class Element, class ElementVec>
207 Element *
208 OpenHashTable<Element, ElementVec>::FindFirst(uint32 hash) const
209 {
210 	_OPEN_HASH_TABLE_ASSERT(fElementVector);
211 	hash %= fArraySize;
212 	if (fHashArray[hash] < 0)
213 		return 0;
214 
215 	return &fElementVector->At(fHashArray[hash]);
216 }
217 
218 template<class Element, class ElementVec>
219 int32
220 OpenHashTable<Element, ElementVec>::ElementIndex(const Element *element) const
221 {
222 	return fElementVector->IndexOf(*element);
223 }
224 
225 template<class Element, class ElementVec>
226 Element *
227 OpenHashTable<Element, ElementVec>::ElementAt(int32 index) const
228 {
229 	return &fElementVector->At(index);
230 }
231 
232 template<class Element, class ElementVec>
233 int32
234 OpenHashTable<Element, ElementVec>::ArraySize() const
235 {
236 	 return fArraySize;
237 }
238 
239 template<class Element, class ElementVec>
240 int32
241 OpenHashTable<Element, ElementVec>::VectorSize() const
242 {
243 	 return fElementVector->Size();
244 }
245 
246 template<class Element, class ElementVec>
247 int32
248 OpenHashTable<Element, ElementVec>::CountElements() const
249 {
250 	 return fElementCount;
251 }
252 
253 
254 template<class Element, class ElementVec>
255 Element *
256 OpenHashTable<Element, ElementVec>::Add(uint32 hash)
257 {
258 	_OPEN_HASH_TABLE_ASSERT(fElementVector);
259 	_RehashIfNeeded();
260 	hash %= fArraySize;
261 	Element *result = fElementVector->Add();
262 	if (result) {
263 		result->fNext = fHashArray[hash];
264 		fHashArray[hash] = fElementVector->IndexOf(*result);
265 		fElementCount++;
266 	}
267 	return result;
268 }
269 
270 template<class Element, class ElementVec>
271 void
272 OpenHashTable<Element, ElementVec>::Remove(Element *element, bool dontRehash)
273 {
274 	if (!dontRehash)
275 		_RehashIfNeeded();
276 	uint32 hash = element->Hash() % fArraySize;
277 	int32 next = fHashArray[hash];
278 	_OPEN_HASH_TABLE_ASSERT(next >= 0);
279 
280 	if (&fElementVector->At(next) == element) {
281 		fHashArray[hash] = element->fNext;
282 		fElementVector->Remove(next);
283 		fElementCount--;
284 		return;
285 	}
286 
287 	for (int32 index = next; index >= 0; ) {
288 		// look for an existing match in table
289 		next = fElementVector->At(index).fNext;
290 		if (next < 0) {
291 			_OPEN_HASH_TABLE_TRESPASS();
292 			return;
293 		}
294 
295 		if (&fElementVector->At(next) == element) {
296 			fElementVector->At(index).fNext = element->fNext;
297 			fElementVector->Remove(next);
298 			fElementCount--;
299 			return;
300 		}
301 		index = next;
302 	}
303 }
304 
305 template<class Element, class ElementVec>
306 void
307 OpenHashTable<Element, ElementVec>::RemoveAll()
308 {
309 	for (int32 i = 0; fElementCount > 0 && i < fArraySize; i++) {
310 		int32 index = fHashArray[i];
311 		while (index >= 0) {
312 			Element* element = &fElementVector->At(index);
313 			int32 next = element->fNext;
314 			fElementVector->Remove(index);
315 			fElementCount--;
316 			index = next;
317 		}
318 		fHashArray[i] = -1;
319 	}
320 	_RehashIfNeeded();
321 }
322 
323 template<class Element, class ElementVec>
324 void
325 OpenHashTable<Element, ElementVec>::SetElementVector(ElementVec *elementVector)
326 {
327 	fElementVector = elementVector;
328 }
329 
330 // _RehashIfNeeded
331 template<class Element, class ElementVec>
332 bool
333 OpenHashTable<Element, ElementVec>::_RehashIfNeeded()
334 {
335 	// The load factor range [fMaxLoadFactor / 3, fMaxLoadFactor] is fine,
336 	// I think. After rehashing the load factor will be about
337 	// fMaxLoadFactor * 2 / 3, respectively fMaxLoadFactor / 2.
338 	float loadFactor = (float)fElementCount / (float)fArraySize;
339 	if (loadFactor > fMaxLoadFactor
340 		|| (fArraySize > fInitialSize && loadFactor < fMaxLoadFactor / 3)) {
341 		return _Rehash();
342 	}
343 	return true;
344 }
345 
346 // _Rehash
347 template<class Element, class ElementVec>
348 bool
349 OpenHashTable<Element, ElementVec>::_Rehash()
350 {
351 	bool result = true;
352 	int32 newSize = int32(fElementCount * 1.73 * fMaxLoadFactor);
353 	newSize = (fInitialSize > newSize ? fInitialSize : newSize);
354 	if (newSize != fArraySize) {
355 		// allocate a new array
356 		int32 *newHashArray = (int32*)calloc(newSize, sizeof(int32));
357 		if (newHashArray) {
358 			// init the new hash array
359 			for (int32 index = 0; index < newSize; index++)
360 				newHashArray[index] = -1;
361 			// iterate through all elements and put them into the new
362 			// hash array
363 			for (int i = 0; i < fArraySize; i++) {
364 				int32 index = fHashArray[i];
365 				while (index >= 0) {
366 					// insert the element in the new array
367 					Element &element = fElementVector->At(index);
368 					int32 next = element.fNext;
369 					uint32 hash = (element.Hash() % newSize);
370 					element.fNext = newHashArray[hash];
371 					newHashArray[hash] = index;
372 					// next element in old list
373 					index = next;
374 				}
375 			}
376 			// delete the old array and set the new one
377 			free(fHashArray);
378 			fHashArray = newHashArray;
379 			fArraySize = newSize;
380 		} else
381 			result = false;
382 	}
383 	return result;
384 }
385 
386 
387 template<class Element>
388 OpenHashElementArray<Element>::OpenHashElementArray(int32 initialSize)
389 	:	fSize(initialSize),
390 		fNextFree(0),
391 		fNextDeleted(-1)
392 {
393 	fData = (Element*)calloc((size_t)initialSize, sizeof(Element));
394 }
395 
396 template<class Element>
397 OpenHashElementArray<Element>::~OpenHashElementArray()
398 {
399 	free(fData);
400 }
401 
402 template<class Element>
403 bool
404 OpenHashElementArray<Element>::InitCheck() const
405 {
406 	return fData;
407 }
408 
409 template<class Element>
410 Element &
411 OpenHashElementArray<Element>::At(int32 index)
412 {
413 	_OPEN_HASH_TABLE_ASSERT(index < fSize);
414 	return fData[index];
415 }
416 
417 template<class Element>
418 const Element &
419 OpenHashElementArray<Element>::At(int32 index) const
420 {
421 	_OPEN_HASH_TABLE_ASSERT(index < fSize);
422 	return fData[index];
423 }
424 
425 template<class Element>
426 int32
427 OpenHashElementArray<Element>::IndexOf(const Element &element) const
428 {
429 	int32 result = &element - fData;
430 	if (result < 0 || result > fSize)
431 		return -1;
432 
433 	return result;
434 }
435 
436 template<class Element>
437 int32
438 OpenHashElementArray<Element>::Size() const
439 {
440 	return fSize;
441 }
442 
443 
444 template<class Element>
445 Element *
446 OpenHashElementArray<Element>::Add(const Element &newElement)
447 {
448 	Element *element = Add();
449 	if (element)
450 		element->Adopt(newElement);
451 	return element;
452 }
453 
454 #if DEBUG
455 const int32 kGrowChunk = 10;
456 #else
457 const int32 kGrowChunk = 1024;
458 #endif
459 
460 template<class Element>
461 Element *
462 OpenHashElementArray<Element>::Add()
463 {
464 	int32 index = fNextFree;
465 	if (fNextDeleted >= 0) {
466 		index = fNextDeleted;
467 		fNextDeleted = At(index).fNext;
468 	} else if (fNextFree >= fSize - 1) {
469 		int32 newSize = fSize + kGrowChunk;
470 /*
471 		Element *newData = (Element *)calloc((size_t)newSize , sizeof(Element));
472 		if (!newData)
473 			return NULL;
474 		memcpy(newData, fData, fSize * sizeof(Element));
475 		free(fData);
476 */
477 		Element *newData = (Element*)realloc(fData,
478 			(size_t)newSize * sizeof(Element));
479 		if (!newData)
480 			return NULL;
481 
482 		fData = newData;
483 		fSize = newSize;
484 		index = fNextFree;
485 		fNextFree++;
486 	} else
487 		fNextFree++;
488 
489 	new (&At(index)) Element;
490 		// call placement new to initialize the element properly
491 	_OPEN_HASH_TABLE_ASSERT(At(index).fNext == -1);
492 
493 	return &At(index);
494 }
495 
496 template<class Element>
497 void
498 OpenHashElementArray<Element>::Remove(int32 index)
499 {
500 	// delete by chaining empty elements in a single linked
501 	// list, reusing the next field
502 	_OPEN_HASH_TABLE_ASSERT(index < fSize);
503 	At(index).~Element();
504 		// call the destructor explicitly to destroy the element
505 		// properly
506 	At(index).fNext = fNextDeleted;
507 	fNextDeleted = index;
508 }
509 
510 } // namespace BPrivate
511 
512 using BPrivate::OpenHashTable;
513 
514 #endif // __OPEN_HASH_TABLE__
515