xref: /haiku/headers/private/shared/OpenHashTable.h (revision 1214ef1b2100f2b3299fc9d8d6142e46f70a4c3f)
1 /*
2 Open Tracker License
3 
4 Terms and Conditions
5 
6 Copyright (c) 1991-2000, Be Incorporated. All rights reserved.
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy of
9 this software and associated documentation files (the "Software"), to deal in
10 the Software without restriction, including without limitation the rights to
11 use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
12 of the Software, and to permit persons to whom the Software is furnished to do
13 so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice applies to all licensees
16 and shall be included in all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF TITLE, MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 BE INCORPORATED BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
22 AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF, OR IN CONNECTION
23 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 
25 Except as contained in this notice, the name of Be Incorporated shall not be
26 used in advertising or otherwise to promote the sale, use or other dealings in
27 this Software without prior written authorization from Be Incorporated.
28 
29 Tracker(TM), Be(R), BeOS(R), and BeIA(TM) are trademarks or registered trademarks
30 of Be Incorporated in the United States and other countries. Other brand product
31 names are registered trademarks or trademarks of their respective holders.
32 All rights reserved.
33 */
34 
35 // bonefish:
36 // * removed need for exceptions
37 // * fixed warnings
38 // * implemented rehashing
39 // * added RemoveAll()
40 // TODO:
41 // * shrinking of element vectors
42 
43 // Hash table with open addresssing
44 
45 #ifndef __OPEN_HASH_TABLE__
46 #define __OPEN_HASH_TABLE__
47 
48 #include <malloc.h>
49 #include <new.h>
50 
51 // don't include <Debug.h>
52 #define ASSERT(E)               (void)0
53 #define TRESPASS()              (void)0
54 
55 namespace BPrivate {
56 
57 template <class Element>
58 class ElementVector {
59 	// element vector for OpenHashTable needs to implement this
60 	// interface
61 public:
62 	Element &At(int32 index);
63 	Element *Add();
64 	int32 IndexOf(const Element &) const;
65 	void Remove(int32 index);
66 };
67 
68 class OpenHashElement {
69 public:
70 	uint32 Hash() const;
71 	bool operator==(const OpenHashElement &) const;
72 	void Adopt(OpenHashElement &);
73 		// low overhead copy, original element is in undefined state
74 		// after call (calls Adopt on BString members, etc.)
75 	int32 fNext;
76 };
77 
78 const uint32 kPrimes [] = {
79 	509, 1021, 2039, 4093, 8191, 16381, 32749, 65521, 131071, 262139,
80 	524287, 1048573, 2097143, 4194301, 8388593, 16777213, 33554393, 67108859,
81 	134217689, 268435399, 536870909, 1073741789, 2147483647, 0
82 };
83 
84 template <class Element, class ElementVec = ElementVector<Element> >
85 class OpenHashTable {
86 public:
87 	OpenHashTable(int32 minSize, ElementVec *elementVector = 0,
88 				  float maxLoadFactor = 0.8);
89 		// it is up to the subclass of OpenHashTable to supply
90 		// elementVector
91 	~OpenHashTable();
92 
93 	bool InitCheck() const;
94 
95 	void SetElementVector(ElementVec *elementVector);
96 
97 	Element *FindFirst(uint32 elementHash) const;
98 	Element *Add(uint32 elementHash);
99 
100 	void Remove(Element *element, bool dontRehash = false);
101 	void RemoveAll();
102 
103 	// when calling Add, any outstanding element pointer may become
104 	// invalid; to deal with this, get the element index and restore
105 	// it after the add
106 	int32 ElementIndex(const Element *) const;
107 	Element *ElementAt(int32 index) const;
108 
109 	int32 ArraySize() const;
110 	int32 VectorSize() const;
111 	int32 CountElements() const;
112 
113 protected:
114 	static int32 OptimalSize(int32 minSize);
115 
116 private:
117 	bool _RehashIfNeeded();
118 	bool _Rehash();
119 
120 	int32 fArraySize;
121 	int32 fInitialSize;
122 	int32 fElementCount;
123 	int32 *fHashArray;
124 	ElementVec *fElementVector;
125 	float fMaxLoadFactor;
126 };
127 
128 template <class Element>
129 class OpenHashElementArray : public ElementVector<Element> {
130 	// this is a straightforward implementation of an element vector
131 	// deleting is handled by linking deleted elements into a free list
132 	// the vector never shrinks
133 public:
134 	OpenHashElementArray(int32 initialSize);
135 	~OpenHashElementArray();
136 
137 	bool InitCheck() const;
138 
139 	Element &At(int32 index);
140 	const Element &At(int32 index) const;
141 	Element *Add(const Element &);
142 	Element *Add();
143 	void Remove(int32 index);
144 	int32 IndexOf(const Element &) const;
145 	int32 Size() const;
146 
147 private:
148 	Element *fData;
149 	int32 fSize;
150 	int32 fNextFree;
151 	int32 fNextDeleted;
152 };
153 
154 
155 //-----------------------------------
156 
157 template<class Element, class ElementVec>
158 OpenHashTable<Element, ElementVec>::OpenHashTable(int32 minSize,
159 	ElementVec *elementVector, float maxLoadFactor)
160 	:	fArraySize(OptimalSize(minSize)),
161 		fInitialSize(fArraySize),
162 		fElementCount(0),
163 		fElementVector(elementVector),
164 		fMaxLoadFactor(maxLoadFactor)
165 {
166 	// sanity check the maximal load factor
167 	if (fMaxLoadFactor < 0.5)
168 		fMaxLoadFactor = 0.5;
169 	// allocate and init the array
170 	fHashArray = (int32*)calloc(fArraySize, sizeof(int32));
171 	if (fHashArray) {
172 		for (int32 index = 0; index < fArraySize; index++)
173 			fHashArray[index] = -1;
174 	}
175 }
176 
177 template<class Element, class ElementVec>
178 OpenHashTable<Element, ElementVec>::~OpenHashTable()
179 {
180 	RemoveAll();
181 	free(fHashArray);
182 }
183 
184 template<class Element, class ElementVec>
185 bool
186 OpenHashTable<Element, ElementVec>::InitCheck() const
187 {
188 	return (fHashArray && fElementVector);
189 }
190 
191 template<class Element, class ElementVec>
192 int32
193 OpenHashTable<Element, ElementVec>::OptimalSize(int32 minSize)
194 {
195 	for (int32 index = 0; ; index++)
196 		if (!kPrimes[index] || kPrimes[index] >= (uint32)minSize)
197 			return (int32)kPrimes[index];
198 
199 	return 0;
200 }
201 
202 template<class Element, class ElementVec>
203 Element *
204 OpenHashTable<Element, ElementVec>::FindFirst(uint32 hash) const
205 {
206 	ASSERT(fElementVector);
207 	hash %= fArraySize;
208 	if (fHashArray[hash] < 0)
209 		return 0;
210 
211 	return &fElementVector->At(fHashArray[hash]);
212 }
213 
214 template<class Element, class ElementVec>
215 int32
216 OpenHashTable<Element, ElementVec>::ElementIndex(const Element *element) const
217 {
218 	return fElementVector->IndexOf(*element);
219 }
220 
221 template<class Element, class ElementVec>
222 Element *
223 OpenHashTable<Element, ElementVec>::ElementAt(int32 index) const
224 {
225 	return &fElementVector->At(index);
226 }
227 
228 template<class Element, class ElementVec>
229 int32
230 OpenHashTable<Element, ElementVec>::ArraySize() const
231 {
232 	 return fArraySize;
233 }
234 
235 template<class Element, class ElementVec>
236 int32
237 OpenHashTable<Element, ElementVec>::VectorSize() const
238 {
239 	 return fElementVector->Size();
240 }
241 
242 template<class Element, class ElementVec>
243 int32
244 OpenHashTable<Element, ElementVec>::CountElements() const
245 {
246 	 return fElementCount;
247 }
248 
249 
250 template<class Element, class ElementVec>
251 Element *
252 OpenHashTable<Element, ElementVec>::Add(uint32 hash)
253 {
254 	ASSERT(fElementVector);
255 	_RehashIfNeeded();
256 	hash %= fArraySize;
257 	Element *result = fElementVector->Add();
258 	if (result) {
259 		result->fNext = fHashArray[hash];
260 		fHashArray[hash] = fElementVector->IndexOf(*result);
261 		fElementCount++;
262 	}
263 	return result;
264 }
265 
266 template<class Element, class ElementVec>
267 void
268 OpenHashTable<Element, ElementVec>::Remove(Element *element, bool dontRehash)
269 {
270 	if (!dontRehash)
271 		_RehashIfNeeded();
272 	uint32 hash = element->Hash() % fArraySize;
273 	int32 next = fHashArray[hash];
274 	ASSERT(next >= 0);
275 
276 	if (&fElementVector->At(next) == element) {
277 		fHashArray[hash] = element->fNext;
278 		fElementVector->Remove(next);
279 		fElementCount--;
280 		return;
281 	}
282 
283 	for (int32 index = next; index >= 0; ) {
284 		// look for an existing match in table
285 		next = fElementVector->At(index).fNext;
286 		if (next < 0) {
287 			TRESPASS();
288 			return;
289 		}
290 
291 		if (&fElementVector->At(next) == element) {
292 			fElementVector->At(index).fNext = element->fNext;
293 			fElementVector->Remove(next);
294 			fElementCount--;
295 			return;
296 		}
297 		index = next;
298 	}
299 }
300 
301 template<class Element, class ElementVec>
302 void
303 OpenHashTable<Element, ElementVec>::RemoveAll()
304 {
305 	for (int32 i = 0; fElementCount > 0 && i < fArraySize; i++) {
306 		int32 index = fHashArray[i];
307 		while (index >= 0) {
308 			Element* element = &fElementVector->At(index);
309 			int32 next = element->fNext;
310 			fElementVector->Remove(index);
311 			fElementCount--;
312 			index = next;
313 		}
314 		fHashArray[i] = -1;
315 	}
316 	_RehashIfNeeded();
317 }
318 
319 template<class Element, class ElementVec>
320 void
321 OpenHashTable<Element, ElementVec>::SetElementVector(ElementVec *elementVector)
322 {
323 	fElementVector = elementVector;
324 }
325 
326 // _RehashIfNeeded
327 template<class Element, class ElementVec>
328 bool
329 OpenHashTable<Element, ElementVec>::_RehashIfNeeded()
330 {
331 	// The load factor range [fMaxLoadFactor / 3, fMaxLoadFactor] is fine,
332 	// I think. After rehashing the load factor will be about
333 	// fMaxLoadFactor * 2 / 3, respectively fMaxLoadFactor / 2.
334 	float loadFactor = (float)fElementCount / (float)fArraySize;
335 	if (loadFactor > fMaxLoadFactor
336 		|| (fArraySize > fInitialSize && loadFactor < fMaxLoadFactor / 3)) {
337 		return _Rehash();
338 	}
339 	return true;
340 }
341 
342 // _Rehash
343 template<class Element, class ElementVec>
344 bool
345 OpenHashTable<Element, ElementVec>::_Rehash()
346 {
347 	bool result = true;
348 	int32 newSize = int32(fElementCount * 1.73 * fMaxLoadFactor);
349 	newSize = (fInitialSize > newSize ? fInitialSize : newSize);
350 	if (newSize != fArraySize) {
351 		// allocate a new array
352 		int32 *newHashArray = (int32*)calloc(newSize, sizeof(int32));
353 		if (newHashArray) {
354 			// init the new hash array
355 			for (int32 index = 0; index < newSize; index++)
356 				newHashArray[index] = -1;
357 			// iterate through all elements and put them into the new
358 			// hash array
359 			for (int i = 0; i < fArraySize; i++) {
360 				int32 index = fHashArray[i];
361 				while (index >= 0) {
362 					// insert the element in the new array
363 					Element &element = fElementVector->At(index);
364 					int32 next = element.fNext;
365 					uint32 hash = (element.Hash() % newSize);
366 					element.fNext = newHashArray[hash];
367 					newHashArray[hash] = index;
368 					// next element in old list
369 					index = next;
370 				}
371 			}
372 			// delete the old array and set the new one
373 			free(fHashArray);
374 			fHashArray = newHashArray;
375 			fArraySize = newSize;
376 		} else
377 			result = false;
378 	}
379 	return result;
380 }
381 
382 
383 template<class Element>
384 OpenHashElementArray<Element>::OpenHashElementArray(int32 initialSize)
385 	:	fSize(initialSize),
386 		fNextFree(0),
387 		fNextDeleted(-1)
388 {
389 	fData = (Element*)calloc((size_t)initialSize, sizeof(Element));
390 }
391 
392 template<class Element>
393 OpenHashElementArray<Element>::~OpenHashElementArray()
394 {
395 	free(fData);
396 }
397 
398 template<class Element>
399 bool
400 OpenHashElementArray<Element>::InitCheck() const
401 {
402 	return fData;
403 }
404 
405 template<class Element>
406 Element &
407 OpenHashElementArray<Element>::At(int32 index)
408 {
409 	ASSERT(index < fSize);
410 	return fData[index];
411 }
412 
413 template<class Element>
414 const Element &
415 OpenHashElementArray<Element>::At(int32 index) const
416 {
417 	ASSERT(index < fSize);
418 	return fData[index];
419 }
420 
421 template<class Element>
422 int32
423 OpenHashElementArray<Element>::IndexOf(const Element &element) const
424 {
425 	int32 result = &element - fData;
426 	if (result < 0 || result > fSize)
427 		return -1;
428 
429 	return result;
430 }
431 
432 template<class Element>
433 int32
434 OpenHashElementArray<Element>::Size() const
435 {
436 	return fSize;
437 }
438 
439 
440 template<class Element>
441 Element *
442 OpenHashElementArray<Element>::Add(const Element &newElement)
443 {
444 	Element *element = Add();
445 	if (element)
446 		element.Adopt(newElement);
447 	return element;
448 }
449 
450 #if DEBUG
451 const int32 kGrowChunk = 10;
452 #else
453 const int32 kGrowChunk = 1024;
454 #endif
455 
456 template<class Element>
457 Element *
458 OpenHashElementArray<Element>::Add()
459 {
460 	int32 index = fNextFree;
461 	if (fNextDeleted >= 0) {
462 		index = fNextDeleted;
463 		fNextDeleted = At(index).fNext;
464 	} else if (fNextFree >= fSize - 1) {
465 		int32 newSize = fSize + kGrowChunk;
466 /*
467 		Element *newData = (Element *)calloc((size_t)newSize , sizeof(Element));
468 		if (!newData)
469 			return NULL;
470 		memcpy(newData, fData, fSize * sizeof(Element));
471 		free(fData);
472 */
473 		Element *newData = (Element*)realloc(fData,
474 			(size_t)newSize * sizeof(Element));
475 		if (!newData)
476 			return NULL;
477 
478 		fData = newData;
479 		fSize = newSize;
480 		index = fNextFree;
481 		fNextFree++;
482 	} else
483 		fNextFree++;
484 
485 	new (&At(index)) Element;
486 		// call placement new to initialize the element properly
487 	ASSERT(At(index).fNext == -1);
488 
489 	return &At(index);
490 }
491 
492 template<class Element>
493 void
494 OpenHashElementArray<Element>::Remove(int32 index)
495 {
496 	// delete by chaining empty elements in a single linked
497 	// list, reusing the next field
498 	ASSERT(index < fSize);
499 	At(index).~Element();
500 		// call the destructor explicitly to destroy the element
501 		// properly
502 	At(index).fNext = fNextDeleted;
503 	fNextDeleted = index;
504 }
505 
506 } // namespace BPrivate
507 
508 using BPrivate::OpenHashTable;
509 
510 #endif // __OPEN_HASH_TABLE__
511