xref: /haiku/src/add-ons/kernel/file_systems/bfs/BPlusTree.h (revision 14b32de1d5efe99b4c6d4ef8c25df47eb009cf0f)
1 /*
2  * Copyright 2001-2012, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 #ifndef B_PLUS_TREE_H
6 #define B_PLUS_TREE_H
7 
8 
9 #include "bfs.h"
10 
11 #include "Utility.h"
12 
13 #if !_BOOT_MODE
14 #include "Journal.h"
15 class Inode;
16 #else
17 #define Inode BFS::Stream
18 
19 namespace BFS {
20 
21 class Stream;
22 class Transaction;
23 class TransactionListener {};
24 #endif // _BOOT_MODE
25 
26 //	#pragma mark - on-disk structures
27 
28 struct bplustree_node;
29 
30 #define BPLUSTREE_NULL			-1LL
31 #define BPLUSTREE_FREE			-2LL
32 
33 struct bplustree_header {
34 	uint32		magic;
35 	uint32		node_size;
36 	uint32		max_number_of_levels;
37 	uint32		data_type;
38 	int64		root_node_pointer;
39 	int64		free_node_pointer;
40 	int64		maximum_size;
41 
42 	uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
43 	uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
44 	uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
45 	off_t RootNode() const
46 		{ return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
47 	off_t FreeNode() const
48 		{ return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
49 	off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
50 	uint32 MaxNumberOfLevels() const
51 		{ return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }
52 
53 	inline bool CheckNode(bplustree_node* node) const;
54 	inline bool IsValidLink(off_t link) const;
55 	bool IsValid() const;
56 } _PACKED;
57 
58 #define BPLUSTREE_MAGIC 			0x69f6c2e8
59 #define BPLUSTREE_NODE_SIZE 		1024
60 #define BPLUSTREE_MAX_KEY_LENGTH	256
61 #define BPLUSTREE_MIN_KEY_LENGTH	1
62 
63 enum bplustree_types {
64 	BPLUSTREE_STRING_TYPE	= 0,
65 	BPLUSTREE_INT32_TYPE	= 1,
66 	BPLUSTREE_UINT32_TYPE	= 2,
67 	BPLUSTREE_INT64_TYPE	= 3,
68 	BPLUSTREE_UINT64_TYPE	= 4,
69 	BPLUSTREE_FLOAT_TYPE	= 5,
70 	BPLUSTREE_DOUBLE_TYPE	= 6
71 };
72 
73 
74 struct duplicate_array;
75 
76 
77 struct bplustree_node {
78 			int64				left_link;
79 			int64				right_link;
80 			int64				overflow_link;
81 			uint16				all_key_count;
82 			uint16				all_key_length;
83 
84 			off_t				LeftLink() const
85 									{ return BFS_ENDIAN_TO_HOST_INT64(
86 										left_link); }
87 			off_t				RightLink() const
88 									{ return BFS_ENDIAN_TO_HOST_INT64(
89 										right_link); }
90 			off_t				OverflowLink() const
91 									{ return BFS_ENDIAN_TO_HOST_INT64(
92 										overflow_link); }
93 			uint16				NumKeys() const
94 									{ return BFS_ENDIAN_TO_HOST_INT16(
95 										all_key_count); }
96 			uint16				AllKeyLength() const
97 									{ return BFS_ENDIAN_TO_HOST_INT16(
98 										all_key_length); }
99 
100 	inline	uint16*				KeyLengths() const;
101 	inline	off_t*				Values() const;
102 	inline	uint8*				Keys() const;
103 	inline	int32				Used() const;
104 			uint8*				KeyAt(int32 index, uint16* keyLength) const;
105 
106 	inline	bool				IsLeaf() const;
107 
108 			void				Initialize();
109 			uint8				CountDuplicates(off_t offset,
110 									bool isFragment) const;
111 			off_t				DuplicateAt(off_t offset, bool isFragment,
112 									int8 index) const;
113 			uint32				FragmentsUsed(uint32 nodeSize) const;
114 	inline	duplicate_array*	FragmentAt(int8 index) const;
115 	inline	duplicate_array*	DuplicateArray() const;
116 
117 	static inline uint8			LinkType(off_t link);
118 	static inline off_t			MakeLink(uint8 type, off_t link,
119 									uint32 fragmentIndex = 0);
120 	static inline bool			IsDuplicate(off_t link);
121 	static inline off_t			FragmentOffset(off_t link);
122 	static inline uint32		FragmentIndex(off_t link);
123 	static inline uint32		MaxFragments(uint32 nodeSize);
124 
125 			status_t			CheckIntegrity(uint32 nodeSize) const;
126 } _PACKED;
127 
128 //#define BPLUSTREE_NODE 0
129 #define BPLUSTREE_DUPLICATE_NODE 2
130 #define BPLUSTREE_DUPLICATE_FRAGMENT 3
131 
132 #define NUM_FRAGMENT_VALUES 7
133 #define NUM_DUPLICATE_VALUES 125
134 
135 //**************************************
136 
137 enum bplustree_traversing {
138 	BPLUSTREE_FORWARD = 1,
139 	BPLUSTREE_BACKWARD = -1,
140 
141 	BPLUSTREE_BEGIN = 0,
142 	BPLUSTREE_END = 1
143 };
144 
145 
146 //	#pragma mark - in-memory structures
147 
148 
149 class BPlusTree;
150 struct TreeCheck;
151 class TreeIterator;
152 
153 
154 #if !_BOOT_MODE
155 template<class T> class Stack;
156 
157 // needed for searching (utilizing a stack)
158 struct node_and_key {
159 	off_t	nodeOffset;
160 	uint16	keyIndex;
161 };
162 #endif // !_BOOT_MODE
163 
164 
165 class CachedNode {
166 public:
167 	CachedNode(BPlusTree* tree)
168 		:
169 		fTree(tree),
170 		fNode(NULL)
171 	{
172 #if _BOOT_MODE
173 		fBlock = NULL;
174 #endif
175 	}
176 
177 	CachedNode(BPlusTree* tree, off_t offset, bool check = true)
178 		:
179 		fTree(tree),
180 		fNode(NULL)
181 	{
182 #if _BOOT_MODE
183 		fBlock = NULL;
184 #endif
185 		SetTo(offset, check);
186 	}
187 
188 	~CachedNode()
189 	{
190 		Unset();
191 #if _BOOT_MODE
192 		free(fBlock);
193 #endif
194 	}
195 
196 			const bplustree_node* SetTo(off_t offset, bool check = true);
197 			status_t			SetTo(off_t offset,
198 									const bplustree_node** _node,
199 									bool check = true);
200 
201 			const bplustree_header* SetToHeader();
202 			void				Unset();
203 
204 #if !_BOOT_MODE
205 			bplustree_node*		SetToWritable(Transaction& transaction,
206 									off_t offset, bool check = true);
207 			bplustree_node*		MakeWritable(Transaction& transaction);
208 			bplustree_header*	SetToWritableHeader(Transaction& transaction);
209 
210 			void				UnsetUnchanged(Transaction& transaction);
211 
212 			status_t			Free(Transaction& transaction, off_t offset);
213 			status_t			Allocate(Transaction& transaction,
214 									bplustree_node** _node, off_t* _offset);
215 #endif // !_BOOT_MODE
216 
217 			bool				IsWritable() const { return fWritable; }
218 			bplustree_node*		Node() const { return fNode; }
219 
220 protected:
221 			bplustree_node*		InternalSetTo(Transaction* transaction,
222 									off_t offset);
223 
224 			BPlusTree*			fTree;
225 			bplustree_node*		fNode;
226 			off_t				fOffset;
227 			off_t				fBlockNumber;
228 			bool				fWritable;
229 #if _BOOT_MODE
230 			uint8*				fBlock;
231 #endif
232 };
233 
234 
235 class BPlusTree : public TransactionListener {
236 public:
237 #if !_BOOT_MODE
238 								BPlusTree(Transaction& transaction,
239 									Inode* stream,
240 									int32 nodeSize = BPLUSTREE_NODE_SIZE);
241 #endif
242 								BPlusTree(Inode* stream);
243 								BPlusTree();
244 								~BPlusTree();
245 
246 #if !_BOOT_MODE
247 			status_t			SetTo(Transaction& transaction, Inode* stream,
248 									int32 nodeSize = BPLUSTREE_NODE_SIZE);
249 #endif
250 			status_t			SetTo(Inode* stream);
251 			status_t			SetStream(Inode* stream);
252 
253 			status_t			InitCheck();
254 
255 			size_t				NodeSize() const { return fNodeSize; }
256 			Inode*				Stream() const { return fStream; }
257 
258 #if !_BOOT_MODE
259 			status_t			Validate(bool repair, bool& _errorsFound);
260 			status_t			MakeEmpty();
261 
262 			status_t			Remove(Transaction& transaction,
263 									const uint8* key, uint16 keyLength,
264 									off_t value);
265 			status_t			Insert(Transaction& transaction,
266 									const uint8* key, uint16 keyLength,
267 									off_t value);
268 
269 			status_t			Remove(Transaction& transaction, const char* key,
270 									off_t value);
271 			status_t			Insert(Transaction& transaction, const char* key,
272 									off_t value);
273 			status_t			Insert(Transaction& transaction, int32 key,
274 									off_t value);
275 			status_t			Insert(Transaction& transaction, uint32 key,
276 									off_t value);
277 			status_t			Insert(Transaction& transaction, int64 key,
278 									off_t value);
279 			status_t			Insert(Transaction& transaction, uint64 key,
280 									off_t value);
281 			status_t			Insert(Transaction& transaction, float key,
282 									off_t value);
283 			status_t			Insert(Transaction& transaction, double key,
284 									off_t value);
285 
286 			status_t			Replace(Transaction& transaction,
287 									const uint8* key, uint16 keyLength,
288 									off_t value);
289 #endif // !_BOOT_MODE
290 
291 			status_t			Find(const uint8* key, uint16 keyLength,
292 									off_t* value);
293 
294 #if !_BOOT_MODE
295 	static	int32				TypeCodeToKeyType(type_code code);
296 	static	int32				ModeToKeyType(mode_t mode);
297 
298 protected:
299 	virtual void				TransactionDone(bool success);
300 	virtual void				RemovedFromTransaction();
301 #endif // !_BOOT_MODE
302 
303 private:
304 								BPlusTree(const BPlusTree& other);
305 								BPlusTree& operator=(const BPlusTree& other);
306 									// no implementation
307 
308 			int32				_CompareKeys(const void* key1, int keylength1,
309 									const void* key2, int keylength2);
310 			status_t			_FindKey(const bplustree_node* node,
311 									const uint8* key, uint16 keyLength,
312 									uint16* index = NULL, off_t* next = NULL);
313 #if !_BOOT_MODE
314 			status_t			_SeekDown(Stack<node_and_key>& stack,
315 									const uint8* key, uint16 keyLength);
316 
317 			status_t			_FindFreeDuplicateFragment(
318 									Transaction& transaction,
319 									const bplustree_node* node,
320 									CachedNode& cached, off_t* _offset,
321 									bplustree_node** _fragment, uint32* _index);
322 			status_t			_InsertDuplicate(Transaction& transaction,
323 									CachedNode& cached,
324 									const bplustree_node* node, uint16 index,
325 									off_t value);
326 			void				_InsertKey(bplustree_node* node, uint16 index,
327 									uint8* key, uint16 keyLength, off_t value);
328 			status_t			_SplitNode(bplustree_node* node,
329 									off_t nodeOffset, bplustree_node* other,
330 									off_t otherOffset, uint16* _keyIndex,
331 									uint8* key, uint16* _keyLength,
332 									off_t* _value);
333 
334 			status_t			_RemoveDuplicate(Transaction& transaction,
335 									const bplustree_node* node,
336 									CachedNode& cached, uint16 keyIndex,
337 									off_t value);
338 			void				_RemoveKey(bplustree_node* node, uint16 index);
339 
340 			void				_UpdateIterators(off_t offset, off_t nextOffset,
341 									uint16 keyIndex, uint16 splitAt,
342 									int8 change);
343 			void				_AddIterator(TreeIterator* iterator);
344 			void				_RemoveIterator(TreeIterator* iterator);
345 
346 			status_t			_ValidateChildren(TreeCheck& check,
347 									uint32 level, off_t offset,
348 									const uint8* largestKey, uint16 keyLength,
349 									const bplustree_node* parent);
350 			status_t			_ValidateChild(TreeCheck& check,
351 									CachedNode& cached, uint32 level,
352 									off_t offset, off_t lastOffset,
353 									off_t nextOffset, const uint8* key,
354 									uint16 keyLength);
355 #endif // !_BOOT_MODE
356 
357 private:
358 			friend class TreeIterator;
359 			friend class CachedNode;
360 			friend class TreeCheck;
361 
362 			Inode*				fStream;
363 			bplustree_header	fHeader;
364 			int32				fNodeSize;
365 			bool				fAllowDuplicates;
366 			bool				fInTransaction;
367 			status_t			fStatus;
368 
369 #if !_BOOT_MODE
370 			mutex				fIteratorLock;
371 			SinglyLinkedList<TreeIterator> fIterators;
372 #endif
373 };
374 
375 
376 //	#pragma mark - helper classes/functions
377 
378 
379 extern int32 compareKeys(type_code type, const void* key1, int keyLength1,
380 	const void* key2, int keyLength2);
381 
382 
383 class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> {
384 public:
385 								TreeIterator(BPlusTree* tree);
386 								~TreeIterator();
387 
388 			status_t			Goto(int8 to);
389 			status_t			Traverse(int8 direction, void* key,
390 									uint16* keyLength, uint16 maxLength,
391 									off_t* value, uint16* duplicate = NULL);
392 			status_t			Find(const uint8* key, uint16 keyLength);
393 
394 			status_t			Rewind();
395 			status_t			GetNextEntry(void* key, uint16* keyLength,
396 									uint16 maxLength, off_t* value,
397 									uint16* duplicate = NULL);
398 			status_t			GetPreviousEntry(void* key, uint16* keyLength,
399 									uint16 maxLength, off_t* value,
400 									uint16* duplicate = NULL);
401 			void				SkipDuplicates();
402 
403 			BPlusTree*			Tree() const { return fTree; }
404 
405 #ifdef DEBUG
406 			void				Dump();
407 #endif
408 
409 private:
410 			friend class BPlusTree;
411 
412 			// called by BPlusTree
413 			void				Update(off_t offset, off_t nextOffset,
414 									uint16 keyIndex, uint16 splitAt,
415 									int8 change);
416 			void				Stop();
417 
418 private:
419 			BPlusTree*			fTree;
420 			off_t				fCurrentNodeOffset;
421 									// traverse position
422 			int32				fCurrentKey;
423 			off_t				fDuplicateNode;
424 			uint16				fDuplicate;
425 			uint16				fNumDuplicates;
426 			bool				fIsFragment;
427 };
428 
429 
430 //	#pragma mark - BPlusTree's inline functions
431 //	(most of them may not be needed)
432 
433 
434 #if !_BOOT_MODE
435 inline status_t
436 BPlusTree::Remove(Transaction& transaction, const char* key, off_t value)
437 {
438 	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
439 		return B_BAD_TYPE;
440 	return Remove(transaction, (uint8*)key, strlen(key), value);
441 }
442 
443 
444 inline status_t
445 BPlusTree::Insert(Transaction& transaction, const char* key, off_t value)
446 {
447 	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
448 		return B_BAD_TYPE;
449 	return Insert(transaction, (uint8*)key, strlen(key), value);
450 }
451 
452 
453 inline status_t
454 BPlusTree::Insert(Transaction& transaction, int32 key, off_t value)
455 {
456 	if (fHeader.DataType() != BPLUSTREE_INT32_TYPE)
457 		return B_BAD_TYPE;
458 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
459 }
460 
461 
462 inline status_t
463 BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value)
464 {
465 	if (fHeader.DataType() != BPLUSTREE_UINT32_TYPE)
466 		return B_BAD_TYPE;
467 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
468 }
469 
470 
471 inline status_t
472 BPlusTree::Insert(Transaction& transaction, int64 key, off_t value)
473 {
474 	if (fHeader.DataType() != BPLUSTREE_INT64_TYPE)
475 		return B_BAD_TYPE;
476 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
477 }
478 
479 
480 inline status_t
481 BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value)
482 {
483 	if (fHeader.DataType() != BPLUSTREE_UINT64_TYPE)
484 		return B_BAD_TYPE;
485 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
486 }
487 
488 
489 inline status_t
490 BPlusTree::Insert(Transaction& transaction, float key, off_t value)
491 {
492 	if (fHeader.DataType() != BPLUSTREE_FLOAT_TYPE)
493 		return B_BAD_TYPE;
494 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
495 }
496 
497 
498 inline status_t
499 BPlusTree::Insert(Transaction& transaction, double key, off_t value)
500 {
501 	if (fHeader.DataType() != BPLUSTREE_DOUBLE_TYPE)
502 		return B_BAD_TYPE;
503 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
504 }
505 #endif // !_BOOT_MODE
506 
507 
508 //	#pragma mark - TreeIterator inline functions
509 
510 
511 inline status_t
512 TreeIterator::Rewind()
513 {
514 	return Goto(BPLUSTREE_BEGIN);
515 }
516 
517 
518 inline status_t
519 TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength,
520 	off_t* value, uint16* duplicate)
521 {
522 	return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value,
523 		duplicate);
524 }
525 
526 
527 inline status_t
528 TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength,
529 	off_t* value, uint16* duplicate)
530 {
531 	return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value,
532 		duplicate);
533 }
534 
535 
536 //	#pragma mark - bplustree_header inline functions
537 
538 
539 inline bool
540 bplustree_header::CheckNode(bplustree_node* node) const
541 {
542 	// sanity checks (links, all_key_count)
543 	return IsValidLink(node->LeftLink())
544 		&& IsValidLink(node->RightLink())
545 		&& IsValidLink(node->OverflowLink())
546 		&& (int8*)node->Values() + node->NumKeys() * sizeof(off_t)
547 				<= (int8*)node + NodeSize();
548 }
549 
550 
551 inline bool
552 bplustree_header::IsValidLink(off_t link) const
553 {
554 	return link == BPLUSTREE_NULL
555 		|| (link > 0 && link <= MaximumSize() - NodeSize());
556 }
557 
558 
559 //	#pragma mark - bplustree_node inline functions
560 
561 
562 inline uint16*
563 bplustree_node::KeyLengths() const
564 {
565 	return (uint16*)(((char*)this) + key_align(sizeof(bplustree_node)
566 		+ AllKeyLength()));
567 }
568 
569 
570 inline off_t*
571 bplustree_node::Values() const
572 {
573 	return (off_t*)((char*)KeyLengths() + NumKeys() * sizeof(uint16));
574 }
575 
576 
577 inline uint8*
578 bplustree_node::Keys() const
579 {
580 	return (uint8*)this + sizeof(bplustree_node);
581 }
582 
583 
584 inline int32
585 bplustree_node::Used() const
586 {
587 	return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys()
588 		* (sizeof(uint16) + sizeof(off_t));
589 }
590 
591 
592 inline bool
593 bplustree_node::IsLeaf() const
594 {
595 	return OverflowLink() == BPLUSTREE_NULL;
596 }
597 
598 
599 inline duplicate_array*
600 bplustree_node::FragmentAt(int8 index) const
601 {
602 	return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1));
603 }
604 
605 
606 inline duplicate_array*
607 bplustree_node::DuplicateArray() const
608 {
609 	return (duplicate_array*)&overflow_link;
610 }
611 
612 
613 inline uint8
614 bplustree_node::LinkType(off_t link)
615 {
616 	return *(uint64*)&link >> 62;
617 }
618 
619 
620 inline off_t
621 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
622 {
623 	return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL)
624 		| (fragmentIndex & 0x3ff);
625 }
626 
627 
628 inline bool
629 bplustree_node::IsDuplicate(off_t link)
630 {
631 	return (LinkType(link)
632 		& (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
633 }
634 
635 
636 inline off_t
637 bplustree_node::FragmentOffset(off_t link)
638 {
639 	return link & 0x3ffffffffffffc00LL;
640 }
641 
642 
643 inline uint32
644 bplustree_node::FragmentIndex(off_t link)
645 {
646 	return (uint32)(link & 0x3ff);
647 }
648 
649 
650 inline uint32
651 bplustree_node::MaxFragments(uint32 nodeSize)
652 {
653 	return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t));
654 }
655 
656 
657 #if _BOOT_MODE
658 } // namespace BFS
659 
660 #undef Inode
661 #endif
662 
663 #endif	// B_PLUS_TREE_H
664