xref: /haiku/src/add-ons/kernel/file_systems/bfs/BPlusTree.h (revision fef6144999c2fa611f59ee6ffe6dd7999501385c)
1 /* BPlusTree - BFS B+Tree implementation
2  *
3  * Roughly based on 'btlib' written by Marcus J. Ranum - it shares
4  * no code but achieves binary compatibility with the on disk format.
5  *
6  * Copyright 2001-2005, Axel Dörfler, axeld@pinc-software.de.
7  * This file may be used under the terms of the MIT License.
8  */
9 #ifndef B_PLUS_TREE_H
10 #define B_PLUS_TREE_H
11 
12 
13 #include "bfs.h"
14 #include "Journal.h"
15 #include "Chain.h"
16 
17 #include <string.h>
18 
19 
20 //****************** on-disk structures ********************
21 
22 struct bplustree_node;
23 
24 #define BPLUSTREE_NULL			-1LL
25 #define BPLUSTREE_FREE			-2LL
26 
27 struct bplustree_header {
28 	uint32		magic;
29 	uint32		node_size;
30 	uint32		max_number_of_levels;
31 	uint32		data_type;
32 	off_t		root_node_pointer;
33 	off_t		free_node_pointer;
34 	off_t		maximum_size;
35 
36 	uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
37 	uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
38 	uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
39 	off_t RootNode() const { return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
40 	off_t FreeNode() const { return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
41 	off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
42 	uint32 MaxNumberOfLevels() const { return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }
43 
44 	inline bool CheckNode(bplustree_node *node);
45 	inline bool IsValidLink(off_t link);
46 } _PACKED;
47 
48 #define BPLUSTREE_MAGIC 			0x69f6c2e8
49 #define BPLUSTREE_NODE_SIZE 		1024
50 #define BPLUSTREE_MAX_KEY_LENGTH	256
51 #define BPLUSTREE_MIN_KEY_LENGTH	1
52 
53 enum bplustree_types {
54 	BPLUSTREE_STRING_TYPE	= 0,
55 	BPLUSTREE_INT32_TYPE	= 1,
56 	BPLUSTREE_UINT32_TYPE	= 2,
57 	BPLUSTREE_INT64_TYPE	= 3,
58 	BPLUSTREE_UINT64_TYPE	= 4,
59 	BPLUSTREE_FLOAT_TYPE	= 5,
60 	BPLUSTREE_DOUBLE_TYPE	= 6
61 };
62 
63 struct sorted_array;
64 typedef sorted_array duplicate_array;
65 
66 struct bplustree_node {
67 	off_t	left_link;
68 	off_t	right_link;
69 	off_t	overflow_link;
70 	uint16	all_key_count;
71 	uint16	all_key_length;
72 
73 	off_t LeftLink() const { return BFS_ENDIAN_TO_HOST_INT64(left_link); }
74 	off_t RightLink() const { return BFS_ENDIAN_TO_HOST_INT64(right_link); }
75 	off_t OverflowLink() const { return BFS_ENDIAN_TO_HOST_INT64(overflow_link); }
76 	uint16 NumKeys() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_count); }
77 	uint16 AllKeyLength() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_length); }
78 
79 	inline uint16 *KeyLengths() const;
80 	inline off_t *Values() const;
81 	inline uint8 *Keys() const;
82 	inline int32 Used() const;
83 	uint8 *KeyAt(int32 index, uint16 *keyLength) const;
84 
85 	inline bool IsLeaf() const;
86 
87 	void Initialize();
88 	uint8 CountDuplicates(off_t offset, bool isFragment) const;
89 	off_t DuplicateAt(off_t offset, bool isFragment, int8 index) const;
90 	uint32 FragmentsUsed(uint32 nodeSize);
91 	inline duplicate_array *FragmentAt(int8 index);
92 	inline duplicate_array *DuplicateArray();
93 
94 	static inline uint8 LinkType(off_t link);
95 	static inline off_t MakeLink(uint8 type, off_t link, uint32 fragmentIndex = 0);
96 	static inline bool IsDuplicate(off_t link);
97 	static inline off_t FragmentOffset(off_t link);
98 	static inline uint32 FragmentIndex(off_t link);
99 	static inline uint32 MaxFragments(uint32 nodeSize);
100 
101 #ifdef DEBUG
102 	status_t CheckIntegrity(uint32 nodeSize);
103 #endif
104 } _PACKED;
105 
106 //#define BPLUSTREE_NODE 0
107 #define BPLUSTREE_DUPLICATE_NODE 2
108 #define BPLUSTREE_DUPLICATE_FRAGMENT 3
109 
110 #define NUM_FRAGMENT_VALUES 7
111 #define NUM_DUPLICATE_VALUES 125
112 
113 //**************************************
114 
115 enum bplustree_traversing {
116 	BPLUSTREE_FORWARD = 1,
117 	BPLUSTREE_BACKWARD = -1,
118 
119 	BPLUSTREE_BEGIN = 0,
120 	BPLUSTREE_END = 1
121 };
122 
123 
124 //****************** in-memory structures ********************
125 
126 template<class T> class Stack;
127 class BPlusTree;
128 class TreeIterator;
129 class CachedNode;
130 class Inode;
131 
132 // needed for searching (utilizing a stack)
133 struct node_and_key {
134 	off_t	nodeOffset;
135 	uint16	keyIndex;
136 };
137 
138 
139 //***** Cache handling *****
140 
141 class CachedNode {
142 	public:
143 		CachedNode(BPlusTree *tree)
144 			:
145 			fTree(tree),
146 			fNode(NULL)
147 		{
148 		}
149 
150 		CachedNode(BPlusTree *tree, off_t offset, bool check = true)
151 			:
152 			fTree(tree),
153 			fNode(NULL)
154 		{
155 			SetTo(offset, check);
156 		}
157 
158 		~CachedNode()
159 		{
160 			Unset();
161 		}
162 
163 		bplustree_node *SetTo(off_t offset, bool check = true);
164 		bplustree_node *SetToWritable(Transaction &transaction, off_t offset, bool check = true);
165 		bplustree_header *SetToWritableHeader(Transaction &transaction);
166 		bplustree_header *SetToHeader();
167 		status_t MakeWritable(Transaction &transaction);
168 		void UnsetUnchanged(Transaction &transaction);
169 		void Unset();
170 
171 		status_t Free(Transaction &transaction, off_t offset);
172 		status_t Allocate(Transaction &transaction, bplustree_node **node, off_t *offset);
173 
174 		bool IsWritable() const { return fWritable; }
175 		bplustree_node *Node() const { return fNode; }
176 
177 	protected:
178 		bplustree_node	*InternalSetTo(Transaction *transaction, off_t offset);
179 
180 		BPlusTree		*fTree;
181 		bplustree_node	*fNode;
182 		off_t			fOffset;
183 		off_t			fBlockNumber;
184 		bool			fWritable;
185 };
186 
187 
188 //******** B+tree class *********
189 
190 class BPlusTree {
191 	public:
192 		BPlusTree(Transaction &transaction, Inode *stream, int32 nodeSize = BPLUSTREE_NODE_SIZE);
193 		BPlusTree(Inode *stream);
194 		BPlusTree();
195 		~BPlusTree();
196 
197 		status_t	SetTo(Transaction &transaction, Inode *stream, int32 nodeSize = BPLUSTREE_NODE_SIZE);
198 		status_t	SetTo(Inode *stream);
199 		status_t	SetStream(Inode *stream);
200 
201 		status_t	InitCheck();
202 		status_t	Validate();
203 
204 		status_t	Remove(Transaction &transaction, const uint8 *key, uint16 keyLength, off_t value);
205 		status_t	Insert(Transaction &transaction, const uint8 *key, uint16 keyLength, off_t value);
206 
207 		status_t	Remove(Transaction &transaction, const char *key, off_t value);
208 		status_t	Insert(Transaction &transaction, const char *key, off_t value);
209 		status_t	Insert(Transaction &transaction, int32 key, off_t value);
210 		status_t	Insert(Transaction &transaction, uint32 key, off_t value);
211 		status_t	Insert(Transaction &transaction, int64 key, off_t value);
212 		status_t	Insert(Transaction &transaction, uint64 key, off_t value);
213 		status_t	Insert(Transaction &transaction, float key, off_t value);
214 		status_t	Insert(Transaction &transaction, double key, off_t value);
215 
216 		status_t	Replace(Transaction &transaction, const uint8 *key, uint16 keyLength, off_t value);
217 		status_t	Find(const uint8 *key, uint16 keyLength, off_t *value);
218 
219 		static int32 TypeCodeToKeyType(type_code code);
220 		static int32 ModeToKeyType(mode_t mode);
221 
222 	private:
223 		BPlusTree(const BPlusTree &);
224 		BPlusTree &operator=(const BPlusTree &);
225 			// no implementation
226 
227 		int32		CompareKeys(const void *key1, int keylength1, const void *key2, int keylength2);
228 		status_t	FindKey(bplustree_node *node, const uint8 *key, uint16 keyLength,
229 						uint16 *index = NULL, off_t *next = NULL);
230 		status_t	SeekDown(Stack<node_and_key> &stack, const uint8 *key, uint16 keyLength);
231 
232 		status_t	FindFreeDuplicateFragment(bplustree_node *node, CachedNode &cached,
233 						off_t *_offset, bplustree_node **_fragment, uint32 *_index);
234 		status_t	InsertDuplicate(Transaction &transaction, CachedNode &cached,
235 						bplustree_node *node, uint16 index, off_t value);
236 		void		InsertKey(bplustree_node *node, uint16 index, uint8 *key, uint16 keyLength,
237 						off_t value);
238 		status_t	SplitNode(bplustree_node *node, off_t nodeOffset, bplustree_node *other,
239 						off_t otherOffset, uint16 *_keyIndex, uint8 *key, uint16 *_keyLength,
240 						off_t *_value);
241 
242 		status_t	RemoveDuplicate(Transaction &transaction, bplustree_node *node,
243 						CachedNode &cached, uint16 keyIndex, off_t value);
244 		void		RemoveKey(bplustree_node *node, uint16 index);
245 
246 		void		UpdateIterators(off_t offset, off_t nextOffset, uint16 keyIndex,
247 						uint16 splitAt, int8 change);
248 		void		AddIterator(TreeIterator *iterator);
249 		void		RemoveIterator(TreeIterator *iterator);
250 
251 	private:
252 		friend class TreeIterator;
253 		friend class CachedNode;
254 
255 		Inode		*fStream;
256 		bplustree_header *fHeader;
257 		CachedNode	fCachedHeader;
258 		int32		fNodeSize;
259 		bool		fAllowDuplicates;
260 		status_t	fStatus;
261 		SimpleLock	fIteratorLock;
262 		Chain<TreeIterator> fIterators;
263 };
264 
265 
266 //***** helper classes/functions *****
267 
268 extern int32 compareKeys(type_code type, const void *key1, int keyLength1,
269 				const void *key2, int keyLength2);
270 
271 class TreeIterator {
272 	public:
273 		TreeIterator(BPlusTree *tree);
274 		~TreeIterator();
275 
276 		status_t	Goto(int8 to);
277 		status_t	Traverse(int8 direction, void *key, uint16 *keyLength, uint16 maxLength,
278 						off_t *value, uint16 *duplicate = NULL);
279 		status_t	Find(const uint8 *key, uint16 keyLength);
280 
281 		status_t	Rewind();
282 		status_t	GetNextEntry(void *key, uint16 *keyLength, uint16 maxLength,
283 						off_t *value, uint16 *duplicate = NULL);
284 		status_t	GetPreviousEntry(void *key, uint16 *keyLength, uint16 maxLength,
285 						off_t *value, uint16 *duplicate = NULL);
286 		void		SkipDuplicates();
287 
288 #ifdef DEBUG
289 		void		Dump();
290 #endif
291 
292 	private:
293 		BPlusTree	*fTree;
294 
295 		off_t		fCurrentNodeOffset;	// traverse position
296 		int32		fCurrentKey;
297 		off_t		fDuplicateNode;
298 		uint16		fDuplicate, fNumDuplicates;
299 		bool		fIsFragment;
300 
301 	private:
302 		friend class Chain<TreeIterator>;
303 		friend class BPlusTree;
304 
305 		void Update(off_t offset, off_t nextOffset, uint16 keyIndex, uint16 splitAt, int8 change);
306 		void Stop();
307 		TreeIterator *fNext;
308 };
309 
310 // BPlusTree's inline functions (most of them may not be needed)
311 
312 inline status_t
313 BPlusTree::Remove(Transaction &transaction, const char *key, off_t value)
314 {
315 	if (fHeader->data_type != BPLUSTREE_STRING_TYPE)
316 		return B_BAD_TYPE;
317 	return Remove(transaction, (uint8 *)key, strlen(key), value);
318 }
319 
320 inline status_t
321 BPlusTree::Insert(Transaction &transaction, const char *key, off_t value)
322 {
323 	if (fHeader->data_type != BPLUSTREE_STRING_TYPE)
324 		return B_BAD_TYPE;
325 	return Insert(transaction, (uint8 *)key, strlen(key), value);
326 }
327 
328 inline status_t
329 BPlusTree::Insert(Transaction &transaction, int32 key, off_t value)
330 {
331 	if (fHeader->data_type != BPLUSTREE_INT32_TYPE)
332 		return B_BAD_TYPE;
333 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
334 }
335 
336 inline status_t
337 BPlusTree::Insert(Transaction &transaction, uint32 key, off_t value)
338 {
339 	if (fHeader->data_type != BPLUSTREE_UINT32_TYPE)
340 		return B_BAD_TYPE;
341 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
342 }
343 
344 inline status_t
345 BPlusTree::Insert(Transaction &transaction, int64 key, off_t value)
346 {
347 	if (fHeader->data_type != BPLUSTREE_INT64_TYPE)
348 		return B_BAD_TYPE;
349 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
350 }
351 
352 inline status_t
353 BPlusTree::Insert(Transaction &transaction, uint64 key, off_t value)
354 {
355 	if (fHeader->data_type != BPLUSTREE_UINT64_TYPE)
356 		return B_BAD_TYPE;
357 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
358 }
359 
360 inline status_t
361 BPlusTree::Insert(Transaction &transaction, float key, off_t value)
362 {
363 	if (fHeader->data_type != BPLUSTREE_FLOAT_TYPE)
364 		return B_BAD_TYPE;
365 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
366 }
367 
368 inline status_t
369 BPlusTree::Insert(Transaction &transaction, double key, off_t value)
370 {
371 	if (fHeader->data_type != BPLUSTREE_DOUBLE_TYPE)
372 		return B_BAD_TYPE;
373 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
374 }
375 
376 
377 /************************ TreeIterator inline functions ************************/
378 //	#pragma mark -
379 
380 inline status_t
381 TreeIterator::Rewind()
382 {
383 	return Goto(BPLUSTREE_BEGIN);
384 }
385 
386 inline status_t
387 TreeIterator::GetNextEntry(void *key, uint16 *keyLength, uint16 maxLength,
388 	off_t *value, uint16 *duplicate)
389 {
390 	return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value, duplicate);
391 }
392 
393 inline status_t
394 TreeIterator::GetPreviousEntry(void *key, uint16 *keyLength, uint16 maxLength,
395 	off_t *value, uint16 *duplicate)
396 {
397 	return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value, duplicate);
398 }
399 
400 /************************ bplustree_header inline functions ************************/
401 //	#pragma mark -
402 
403 
404 inline bool
405 bplustree_header::CheckNode(bplustree_node *node)
406 {
407 	// sanity checks (links, all_key_count)
408 	return IsValidLink(node->LeftLink())
409 		&& IsValidLink(node->RightLink())
410 		&& IsValidLink(node->OverflowLink())
411 		&& (int8 *)node->Values() + node->NumKeys() * sizeof(off_t) <= (int8 *)node + NodeSize();
412 }
413 
414 
415 inline bool
416 bplustree_header::IsValidLink(off_t link)
417 {
418 	return link == BPLUSTREE_NULL || (link > 0 && link <= MaximumSize() - NodeSize());
419 }
420 
421 
422 /************************ bplustree_node inline functions ************************/
423 //	#pragma mark -
424 
425 
426 inline uint16 *
427 bplustree_node::KeyLengths() const
428 {
429 	return (uint16 *)(((char *)this) + round_up(sizeof(bplustree_node) + AllKeyLength()));
430 }
431 
432 
433 inline off_t *
434 bplustree_node::Values() const
435 {
436 	return (off_t *)((char *)KeyLengths() + NumKeys() * sizeof(uint16));
437 }
438 
439 
440 inline uint8 *
441 bplustree_node::Keys() const
442 {
443 	return (uint8 *)this + sizeof(bplustree_node);
444 }
445 
446 
447 inline int32
448 bplustree_node::Used() const
449 {
450 	return round_up(sizeof(bplustree_node) + AllKeyLength()) + NumKeys() * (sizeof(uint16) + sizeof(off_t));
451 }
452 
453 
454 inline bool
455 bplustree_node::IsLeaf() const
456 {
457 	return OverflowLink() == BPLUSTREE_NULL;
458 }
459 
460 
461 inline duplicate_array *
462 bplustree_node::FragmentAt(int8 index)
463 {
464 	return (duplicate_array *)((off_t *)this + index * (NUM_FRAGMENT_VALUES + 1));
465 }
466 
467 
468 inline duplicate_array *
469 bplustree_node::DuplicateArray()
470 {
471 	return (duplicate_array *)&this->overflow_link;
472 }
473 
474 
475 inline uint8
476 bplustree_node::LinkType(off_t link)
477 {
478 	return *(uint64 *)&link >> 62;
479 }
480 
481 
482 inline off_t
483 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
484 {
485 	return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL) | (fragmentIndex & 0x3ff);
486 }
487 
488 
489 inline bool
490 bplustree_node::IsDuplicate(off_t link)
491 {
492 	return (LinkType(link) & (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
493 }
494 
495 
496 inline off_t
497 bplustree_node::FragmentOffset(off_t link)
498 {
499 	return link & 0x3ffffffffffffc00LL;
500 }
501 
502 
503 inline uint32
504 bplustree_node::FragmentIndex(off_t link)
505 {
506 	return (uint32)(link & 0x3ff);
507 }
508 
509 
510 inline uint32
511 bplustree_node::MaxFragments(uint32 nodeSize)
512 {
513 	return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t));
514 }
515 
516 #endif	/* B_PLUS_TREE_H */
517