xref: /haiku/src/add-ons/kernel/file_systems/bfs/BPlusTree.h (revision 9eb55bc1d104b8fda80898f8b25c94d8000c8255)
1 #ifndef B_PLUS_TREE_H
2 #define B_PLUS_TREE_H
3 /* BPlusTree - BFS B+Tree implementation
4 **
5 ** Initial version by Axel Dörfler, axeld@pinc-software.de
6 ** Roughly based on 'btlib' written by Marcus J. Ranum
7 **
8 ** Copyright (c) 2001-2004 pinc Software. All Rights Reserved.
9 ** This file may be used under the terms of the OpenBeOS License.
10 */
11 
12 
13 #include "bfs.h"
14 #include "Journal.h"
15 #include "Chain.h"
16 
17 #include <string.h>
18 
19 
20 //****************** on-disk structures ********************
21 
22 #define BPLUSTREE_NULL			-1LL
23 #define BPLUSTREE_FREE			-2LL
24 
25 struct bplustree_header {
26 	uint32		magic;
27 	uint32		node_size;
28 	uint32		max_number_of_levels;
29 	uint32		data_type;
30 	off_t		root_node_pointer;
31 	off_t		free_node_pointer;
32 	off_t		maximum_size;
33 
34 	uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
35 	uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
36 	uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
37 	off_t RootNode() const { return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
38 	off_t FreeNode() const { return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
39 	off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
40 	uint32 MaxNumberOfLevels() const { return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }
41 
42 	inline bool IsValidLink(off_t link);
43 } _PACKED;
44 
45 #define BPLUSTREE_MAGIC 			0x69f6c2e8
46 #define BPLUSTREE_NODE_SIZE 		1024
47 #define BPLUSTREE_MAX_KEY_LENGTH	256
48 #define BPLUSTREE_MIN_KEY_LENGTH	1
49 
50 enum bplustree_types {
51 	BPLUSTREE_STRING_TYPE	= 0,
52 	BPLUSTREE_INT32_TYPE	= 1,
53 	BPLUSTREE_UINT32_TYPE	= 2,
54 	BPLUSTREE_INT64_TYPE	= 3,
55 	BPLUSTREE_UINT64_TYPE	= 4,
56 	BPLUSTREE_FLOAT_TYPE	= 5,
57 	BPLUSTREE_DOUBLE_TYPE	= 6
58 };
59 
60 struct sorted_array;
61 typedef sorted_array duplicate_array;
62 
63 struct bplustree_node {
64 	off_t	left_link;
65 	off_t	right_link;
66 	off_t	overflow_link;
67 	uint16	all_key_count;
68 	uint16	all_key_length;
69 
70 	off_t LeftLink() const { return BFS_ENDIAN_TO_HOST_INT64(left_link); }
71 	off_t RightLink() const { return BFS_ENDIAN_TO_HOST_INT64(right_link); }
72 	off_t OverflowLink() const { return BFS_ENDIAN_TO_HOST_INT64(overflow_link); }
73 	uint16 NumKeys() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_count); }
74 	uint16 AllKeyLength() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_length); }
75 
76 	inline uint16 *KeyLengths() const;
77 	inline off_t *Values() const;
78 	inline uint8 *Keys() const;
79 	inline int32 Used() const;
80 	uint8 *KeyAt(int32 index, uint16 *keyLength) const;
81 
82 	inline bool IsLeaf() const;
83 
84 	void Initialize();
85 	uint8 CountDuplicates(off_t offset, bool isFragment) const;
86 	off_t DuplicateAt(off_t offset, bool isFragment, int8 index) const;
87 	int32 FragmentsUsed(uint32 nodeSize);
88 	inline duplicate_array *FragmentAt(int8 index);
89 	inline duplicate_array *DuplicateArray();
90 
91 	static inline uint8 LinkType(off_t link);
92 	static inline off_t MakeLink(uint8 type, off_t link, uint32 fragmentIndex = 0);
93 	static inline bool IsDuplicate(off_t link);
94 	static inline off_t FragmentOffset(off_t link);
95 	static inline uint32 FragmentIndex(off_t link);
96 
97 #ifdef DEBUG
98 	void CheckIntegrity(uint32 nodeSize);
99 #endif
100 } _PACKED;
101 
102 //#define BPLUSTREE_NODE 0
103 #define BPLUSTREE_DUPLICATE_NODE 2
104 #define BPLUSTREE_DUPLICATE_FRAGMENT 3
105 
106 #define NUM_FRAGMENT_VALUES 7
107 #define NUM_DUPLICATE_VALUES 125
108 
109 //**************************************
110 
111 enum bplustree_traversing {
112 	BPLUSTREE_FORWARD = 1,
113 	BPLUSTREE_BACKWARD = -1,
114 
115 	BPLUSTREE_BEGIN = 0,
116 	BPLUSTREE_END = 1
117 };
118 
119 
120 //****************** in-memory structures ********************
121 
122 template<class T> class Stack;
123 class BPlusTree;
124 class TreeIterator;
125 class CachedNode;
126 class Inode;
127 
128 // needed for searching (utilizing a stack)
129 struct node_and_key {
130 	off_t	nodeOffset;
131 	uint16	keyIndex;
132 };
133 
134 
135 //***** Cache handling *****
136 
137 class CachedNode {
138 	public:
139 		CachedNode(BPlusTree *tree)
140 			:
141 			fTree(tree),
142 			fNode(NULL),
143 			fBlock(NULL)
144 		{
145 		}
146 
147 		CachedNode(BPlusTree *tree, off_t offset, bool check = true)
148 			:
149 			fTree(tree),
150 			fNode(NULL),
151 			fBlock(NULL)
152 		{
153 			SetTo(offset, check);
154 		}
155 
156 		~CachedNode()
157 		{
158 			Unset();
159 		}
160 
161 		bplustree_node *SetTo(off_t offset, bool check = true);
162 		bplustree_header *SetToHeader();
163 		void Unset();
164 
165 		status_t Free(Transaction *transaction, off_t offset);
166 		status_t Allocate(Transaction *transaction, bplustree_node **node, off_t *offset);
167 		status_t WriteBack(Transaction *transaction);
168 
169 		bplustree_node *Node() const { return fNode; }
170 
171 	protected:
172 		bplustree_node	*InternalSetTo(off_t offset);
173 
174 		BPlusTree		*fTree;
175 		bplustree_node	*fNode;
176 		uint8			*fBlock;
177 		off_t			fBlockNumber;
178 };
179 
180 
181 //******** B+tree class *********
182 
183 class BPlusTree {
184 	public:
185 		BPlusTree(Transaction *transaction, Inode *stream, int32 nodeSize = BPLUSTREE_NODE_SIZE);
186 		BPlusTree(Inode *stream);
187 		BPlusTree();
188 		~BPlusTree();
189 
190 		status_t	SetTo(Transaction *transaction, Inode *stream, int32 nodeSize = BPLUSTREE_NODE_SIZE);
191 		status_t	SetTo(Inode *stream);
192 		status_t	SetStream(Inode *stream);
193 
194 		status_t	InitCheck();
195 		status_t	Validate();
196 
197 		status_t	Remove(Transaction *transaction, const uint8 *key, uint16 keyLength, off_t value);
198 		status_t	Insert(Transaction *transaction, const uint8 *key, uint16 keyLength, off_t value);
199 
200 		status_t	Remove(Transaction *transaction, const char *key, off_t value);
201 		status_t	Insert(Transaction *transaction, const char *key, off_t value);
202 		status_t	Insert(Transaction *transaction, int32 key, off_t value);
203 		status_t	Insert(Transaction *transaction, uint32 key, off_t value);
204 		status_t	Insert(Transaction *transaction, int64 key, off_t value);
205 		status_t	Insert(Transaction *transaction, uint64 key, off_t value);
206 		status_t	Insert(Transaction *transaction, float key, off_t value);
207 		status_t	Insert(Transaction *transaction, double key, off_t value);
208 
209 		status_t	Replace(Transaction *transaction, const uint8 *key, uint16 keyLength, off_t value);
210 		status_t	Find(const uint8 *key, uint16 keyLength, off_t *value);
211 
212 		static int32 TypeCodeToKeyType(type_code code);
213 		static int32 ModeToKeyType(mode_t mode);
214 
215 	private:
216 		BPlusTree(const BPlusTree &);
217 		BPlusTree &operator=(const BPlusTree &);
218 			// no implementation
219 
220 		int32		CompareKeys(const void *key1, int keylength1, const void *key2, int keylength2);
221 		status_t	FindKey(bplustree_node *node, const uint8 *key, uint16 keyLength,
222 						uint16 *index = NULL, off_t *next = NULL);
223 		status_t	SeekDown(Stack<node_and_key> &stack, const uint8 *key, uint16 keyLength);
224 
225 		status_t	FindFreeDuplicateFragment(bplustree_node *node, CachedNode *cached,
226 						off_t *_offset, bplustree_node **_fragment, uint32 *_index);
227 		status_t	InsertDuplicate(Transaction *transaction, CachedNode *cached,
228 						bplustree_node *node, uint16 index, off_t value);
229 		void		InsertKey(bplustree_node *node, uint16 index, uint8 *key, uint16 keyLength,
230 						off_t value);
231 		status_t	SplitNode(bplustree_node *node, off_t nodeOffset, bplustree_node *other,
232 						off_t otherOffset, uint16 *_keyIndex, uint8 *key, uint16 *_keyLength,
233 						off_t *_value);
234 
235 		status_t	RemoveDuplicate(Transaction *transaction, bplustree_node *node,
236 						CachedNode *cached, uint16 keyIndex, off_t value);
237 		void		RemoveKey(bplustree_node *node, uint16 index);
238 
239 		void		UpdateIterators(off_t offset, off_t nextOffset, uint16 keyIndex,
240 						uint16 splitAt, int8 change);
241 		void		AddIterator(TreeIterator *iterator);
242 		void		RemoveIterator(TreeIterator *iterator);
243 
244 	private:
245 		friend TreeIterator;
246 		friend CachedNode;
247 
248 		Inode		*fStream;
249 		bplustree_header *fHeader;
250 		CachedNode	fCachedHeader;
251 		int32		fNodeSize;
252 		bool		fAllowDuplicates;
253 		status_t	fStatus;
254 		SimpleLock	fIteratorLock;
255 		Chain<TreeIterator> fIterators;
256 };
257 
258 
259 //***** helper classes/functions *****
260 
261 extern int32 compareKeys(type_code type, const void *key1, int keyLength1,
262 				const void *key2, int keyLength2);
263 
264 class TreeIterator {
265 	public:
266 		TreeIterator(BPlusTree *tree);
267 		~TreeIterator();
268 
269 		status_t	Goto(int8 to);
270 		status_t	Traverse(int8 direction, void *key, uint16 *keyLength, uint16 maxLength,
271 						off_t *value, uint16 *duplicate = NULL);
272 		status_t	Find(const uint8 *key, uint16 keyLength);
273 
274 		status_t	Rewind();
275 		status_t	GetNextEntry(void *key, uint16 *keyLength, uint16 maxLength,
276 						off_t *value, uint16 *duplicate = NULL);
277 		status_t	GetPreviousEntry(void *key, uint16 *keyLength, uint16 maxLength,
278 						off_t *value, uint16 *duplicate = NULL);
279 		void		SkipDuplicates();
280 
281 #ifdef DEBUG
282 		void		Dump();
283 #endif
284 
285 	private:
286 		BPlusTree	*fTree;
287 
288 		off_t		fCurrentNodeOffset;	// traverse position
289 		int32		fCurrentKey;
290 		off_t		fDuplicateNode;
291 		uint16		fDuplicate, fNumDuplicates;
292 		bool		fIsFragment;
293 
294 	private:
295 		friend Chain<TreeIterator>;
296 		friend BPlusTree;
297 
298 		void Update(off_t offset, off_t nextOffset, uint16 keyIndex, uint16 splitAt, int8 change);
299 		void Stop();
300 		TreeIterator *fNext;
301 };
302 
303 // BPlusTree's inline functions (most of them may not be needed)
304 
305 inline status_t
306 BPlusTree::Remove(Transaction *transaction, const char *key, off_t value)
307 {
308 	if (fHeader->data_type != BPLUSTREE_STRING_TYPE)
309 		return B_BAD_TYPE;
310 	return Remove(transaction, (uint8 *)key, strlen(key), value);
311 }
312 
313 inline status_t
314 BPlusTree::Insert(Transaction *transaction, const char *key, off_t value)
315 {
316 	if (fHeader->data_type != BPLUSTREE_STRING_TYPE)
317 		return B_BAD_TYPE;
318 	return Insert(transaction, (uint8 *)key, strlen(key), value);
319 }
320 
321 inline status_t
322 BPlusTree::Insert(Transaction *transaction, int32 key, off_t value)
323 {
324 	if (fHeader->data_type != BPLUSTREE_INT32_TYPE)
325 		return B_BAD_TYPE;
326 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
327 }
328 
329 inline status_t
330 BPlusTree::Insert(Transaction *transaction, uint32 key, off_t value)
331 {
332 	if (fHeader->data_type != BPLUSTREE_UINT32_TYPE)
333 		return B_BAD_TYPE;
334 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
335 }
336 
337 inline status_t
338 BPlusTree::Insert(Transaction *transaction, int64 key, off_t value)
339 {
340 	if (fHeader->data_type != BPLUSTREE_INT64_TYPE)
341 		return B_BAD_TYPE;
342 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
343 }
344 
345 inline status_t
346 BPlusTree::Insert(Transaction *transaction, uint64 key, off_t value)
347 {
348 	if (fHeader->data_type != BPLUSTREE_UINT64_TYPE)
349 		return B_BAD_TYPE;
350 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
351 }
352 
353 inline status_t
354 BPlusTree::Insert(Transaction *transaction, float key, off_t value)
355 {
356 	if (fHeader->data_type != BPLUSTREE_FLOAT_TYPE)
357 		return B_BAD_TYPE;
358 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
359 }
360 
361 inline status_t
362 BPlusTree::Insert(Transaction *transaction, double key, off_t value)
363 {
364 	if (fHeader->data_type != BPLUSTREE_DOUBLE_TYPE)
365 		return B_BAD_TYPE;
366 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
367 }
368 
369 
370 /************************ TreeIterator inline functions ************************/
371 //	#pragma mark -
372 
373 inline status_t
374 TreeIterator::Rewind()
375 {
376 	return Goto(BPLUSTREE_BEGIN);
377 }
378 
379 inline status_t
380 TreeIterator::GetNextEntry(void *key, uint16 *keyLength, uint16 maxLength,
381 	off_t *value, uint16 *duplicate)
382 {
383 	return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value, duplicate);
384 }
385 
386 inline status_t
387 TreeIterator::GetPreviousEntry(void *key, uint16 *keyLength, uint16 maxLength,
388 	off_t *value, uint16 *duplicate)
389 {
390 	return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value, duplicate);
391 }
392 
393 /************************ bplustree_header inline functions ************************/
394 //	#pragma mark -
395 
396 
397 inline bool
398 bplustree_header::IsValidLink(off_t link)
399 {
400 	return link == BPLUSTREE_NULL || (link > 0 && link <= MaximumSize() - NodeSize());
401 }
402 
403 
404 /************************ bplustree_node inline functions ************************/
405 //	#pragma mark -
406 
407 
408 inline uint16 *
409 bplustree_node::KeyLengths() const
410 {
411 	return (uint16 *)(((char *)this) + round_up(sizeof(bplustree_node) + AllKeyLength()));
412 }
413 
414 
415 inline off_t *
416 bplustree_node::Values() const
417 {
418 	return (off_t *)((char *)KeyLengths() + NumKeys() * sizeof(uint16));
419 }
420 
421 
422 inline uint8 *
423 bplustree_node::Keys() const
424 {
425 	return (uint8 *)this + sizeof(bplustree_node);
426 }
427 
428 
429 inline int32
430 bplustree_node::Used() const
431 {
432 	return round_up(sizeof(bplustree_node) + AllKeyLength()) + NumKeys() * (sizeof(uint16) + sizeof(off_t));
433 }
434 
435 
436 inline bool
437 bplustree_node::IsLeaf() const
438 {
439 	return OverflowLink() == BPLUSTREE_NULL;
440 }
441 
442 
443 inline duplicate_array *
444 bplustree_node::FragmentAt(int8 index)
445 {
446 	return (duplicate_array *)((off_t *)this + index * (NUM_FRAGMENT_VALUES + 1));
447 }
448 
449 
450 inline duplicate_array *
451 bplustree_node::DuplicateArray()
452 {
453 	return (duplicate_array *)&this->overflow_link;
454 }
455 
456 
457 inline uint8
458 bplustree_node::LinkType(off_t link)
459 {
460 	return *(uint64 *)&link >> 62;
461 }
462 
463 
464 inline off_t
465 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
466 {
467 	return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL) | (fragmentIndex & 0x3ff);
468 }
469 
470 
471 inline bool
472 bplustree_node::IsDuplicate(off_t link)
473 {
474 	return (LinkType(link) & (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
475 }
476 
477 
478 inline off_t
479 bplustree_node::FragmentOffset(off_t link)
480 {
481 	return link & 0x3ffffffffffffc00LL;
482 }
483 
484 
485 inline uint32
486 bplustree_node::FragmentIndex(off_t link)
487 {
488 	return (uint32)(link & 0x3ff);
489 }
490 
491 #endif	/* B_PLUS_TREE_H */
492