xref: /haiku/src/add-ons/kernel/file_systems/bfs/BPlusTree.h (revision 5e7964b0a929555415798dea3373db9ac4611caa)
1 /*
2  * Copyright 2001-2015, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 #ifndef B_PLUS_TREE_H
6 #define B_PLUS_TREE_H
7 
8 
9 #include "bfs.h"
10 
11 #include "Utility.h"
12 
13 #if !_BOOT_MODE
14 #include "Journal.h"
15 class Inode;
16 #else
17 #define Inode BFS::Stream
18 
19 namespace BFS {
20 
21 class Stream;
22 class Transaction;
23 class TransactionListener {};
24 #endif // _BOOT_MODE
25 
26 //	#pragma mark - on-disk structures
27 
28 struct bplustree_node;
29 
30 #define BPLUSTREE_NULL			-1LL
31 #define BPLUSTREE_FREE			-2LL
32 
33 struct bplustree_header {
34 	uint32		magic;
35 	uint32		node_size;
36 	uint32		max_number_of_levels;
37 	uint32		data_type;
38 	int64		root_node_pointer;
39 	int64		free_node_pointer;
40 	int64		maximum_size;
41 
42 	uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
43 	uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
44 	uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
45 	off_t RootNode() const
46 		{ return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
47 	off_t FreeNode() const
48 		{ return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
49 	off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
50 	uint32 MaxNumberOfLevels() const
51 		{ return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }
52 
53 	inline bool CheckNode(const bplustree_node* node) const;
54 	inline bool IsValidLink(off_t link) const;
55 	bool IsValid() const;
56 } _PACKED;
57 
58 #define BPLUSTREE_MAGIC 			0x69f6c2e8
59 #define BPLUSTREE_NODE_SIZE 		1024
60 #define BPLUSTREE_MAX_KEY_LENGTH	256
61 #define BPLUSTREE_MIN_KEY_LENGTH	1
62 
63 enum bplustree_types {
64 	BPLUSTREE_STRING_TYPE	= 0,
65 	BPLUSTREE_INT32_TYPE	= 1,
66 	BPLUSTREE_UINT32_TYPE	= 2,
67 	BPLUSTREE_INT64_TYPE	= 3,
68 	BPLUSTREE_UINT64_TYPE	= 4,
69 	BPLUSTREE_FLOAT_TYPE	= 5,
70 	BPLUSTREE_DOUBLE_TYPE	= 6
71 };
72 
73 
74 struct duplicate_array;
75 
76 
77 struct bplustree_node {
78 			int64				left_link;
79 			int64				right_link;
80 			int64				overflow_link;
81 			uint16				all_key_count;
82 			uint16				all_key_length;
83 
84 			off_t				LeftLink() const
85 									{ return BFS_ENDIAN_TO_HOST_INT64(
86 										left_link); }
87 			off_t				RightLink() const
88 									{ return BFS_ENDIAN_TO_HOST_INT64(
89 										right_link); }
90 			off_t				OverflowLink() const
91 									{ return BFS_ENDIAN_TO_HOST_INT64(
92 										overflow_link); }
93 			uint16				NumKeys() const
94 									{ return BFS_ENDIAN_TO_HOST_INT16(
95 										all_key_count); }
96 			uint16				AllKeyLength() const
97 									{ return BFS_ENDIAN_TO_HOST_INT16(
98 										all_key_length); }
99 
100 	inline	uint16*				KeyLengths() const;
101 	inline	off_t*				Values() const;
102 	inline	uint8*				Keys() const;
103 	inline	int32				Used() const;
104 			uint8*				KeyAt(int32 index, uint16* keyLength) const;
105 
106 	inline	bool				IsLeaf() const;
107 
108 			void				Initialize();
109 			uint8				CountDuplicates(off_t offset,
110 									bool isFragment) const;
111 			off_t				DuplicateAt(off_t offset, bool isFragment,
112 									int8 index) const;
113 			uint32				FragmentsUsed(uint32 nodeSize) const;
114 	inline	duplicate_array*	FragmentAt(int8 index) const;
115 	inline	duplicate_array*	DuplicateArray() const;
116 
117 	static inline uint8			LinkType(off_t link);
118 	static inline off_t			MakeLink(uint8 type, off_t link,
119 									uint32 fragmentIndex = 0);
120 	static inline bool			IsDuplicate(off_t link);
121 	static inline off_t			FragmentOffset(off_t link);
122 	static inline uint32		FragmentIndex(off_t link);
123 	static inline uint32		MaxFragments(uint32 nodeSize);
124 
125 			status_t			CheckIntegrity(uint32 nodeSize) const;
126 } _PACKED;
127 
128 //#define BPLUSTREE_NODE 0
129 #define BPLUSTREE_DUPLICATE_NODE 2
130 #define BPLUSTREE_DUPLICATE_FRAGMENT 3
131 
132 #define NUM_FRAGMENT_VALUES 7
133 #define NUM_DUPLICATE_VALUES 125
134 
135 //**************************************
136 
137 enum bplustree_traversing {
138 	BPLUSTREE_FORWARD = 1,
139 	BPLUSTREE_BACKWARD = -1,
140 
141 	BPLUSTREE_BEGIN = 0,
142 	BPLUSTREE_END = 1
143 };
144 
145 
146 //	#pragma mark - in-memory structures
147 
148 
149 class BPlusTree;
150 struct TreeCheck;
151 class TreeIterator;
152 
153 
154 #if !_BOOT_MODE
155 // needed for searching (utilizing a stack)
156 struct node_and_key {
157 	off_t	nodeOffset;
158 	uint16	keyIndex;
159 };
160 #endif // !_BOOT_MODE
161 
162 
163 class CachedNode {
164 public:
165 	CachedNode(BPlusTree* tree)
166 		:
167 		fTree(tree),
168 		fNode(NULL),
169 		fOffset(0),
170 		fBlockNumber(0),
171 		fWritable(false)
172 	{
173 #if _BOOT_MODE
174 		fBlock = NULL;
175 #endif
176 	}
177 
178 	CachedNode(BPlusTree* tree, off_t offset, bool check = true)
179 		:
180 		fTree(tree),
181 		fNode(NULL),
182 		fOffset(0),
183 		fBlockNumber(0),
184 		fWritable(false)
185 	{
186 #if _BOOT_MODE
187 		fBlock = NULL;
188 #endif
189 		SetTo(offset, check);
190 	}
191 
192 	~CachedNode()
193 	{
194 		Unset();
195 #if _BOOT_MODE
196 		free(fBlock);
197 #endif
198 	}
199 
200 			const bplustree_node* SetTo(off_t offset, bool check = true);
201 			status_t			SetTo(off_t offset,
202 									const bplustree_node** _node,
203 									bool check = true);
204 
205 			const bplustree_header* SetToHeader();
206 			void				Unset();
207 
208 #if !_BOOT_MODE
209 			bplustree_node*		SetToWritable(Transaction& transaction,
210 									off_t offset, bool check = true);
211 			bplustree_node*		MakeWritable(Transaction& transaction);
212 			bplustree_header*	SetToWritableHeader(Transaction& transaction);
213 
214 			void				UnsetUnchanged(Transaction& transaction);
215 
216 			status_t			Free(Transaction& transaction, off_t offset);
217 			status_t			Allocate(Transaction& transaction,
218 									bplustree_node** _node, off_t* _offset);
219 #endif // !_BOOT_MODE
220 
221 			bool				IsWritable() const { return fWritable; }
222 			bplustree_node*		Node() const { return fNode; }
223 
224 protected:
225 			bplustree_node*		InternalSetTo(Transaction* transaction,
226 									off_t offset);
227 
228 			BPlusTree*			fTree;
229 			bplustree_node*		fNode;
230 			off_t				fOffset;
231 			off_t				fBlockNumber;
232 			bool				fWritable;
233 #if _BOOT_MODE
234 			uint8*				fBlock;
235 #endif
236 };
237 
238 
239 class BPlusTree : public TransactionListener {
240 public:
241 #if !_BOOT_MODE
242 								BPlusTree(Transaction& transaction,
243 									Inode* stream,
244 									int32 nodeSize = BPLUSTREE_NODE_SIZE);
245 #endif
246 								BPlusTree(Inode* stream);
247 								BPlusTree();
248 								~BPlusTree();
249 
250 #if !_BOOT_MODE
251 			status_t			SetTo(Transaction& transaction, Inode* stream,
252 									int32 nodeSize = BPLUSTREE_NODE_SIZE);
253 #endif
254 			status_t			SetTo(Inode* stream);
255 			status_t			SetStream(Inode* stream);
256 
257 			status_t			InitCheck();
258 
259 			size_t				NodeSize() const { return fNodeSize; }
260 			Inode*				Stream() const { return fStream; }
261 
262 #if !_BOOT_MODE
263 			status_t			Validate(bool repair, bool& _errorsFound);
264 			status_t			MakeEmpty();
265 
266 			status_t			Remove(Transaction& transaction,
267 									const uint8* key, uint16 keyLength,
268 									off_t value);
269 			status_t			Insert(Transaction& transaction,
270 									const uint8* key, uint16 keyLength,
271 									off_t value);
272 
273 			status_t			Remove(Transaction& transaction, const char* key,
274 									off_t value);
275 			status_t			Insert(Transaction& transaction, const char* key,
276 									off_t value);
277 			status_t			Insert(Transaction& transaction, int32 key,
278 									off_t value);
279 			status_t			Insert(Transaction& transaction, uint32 key,
280 									off_t value);
281 			status_t			Insert(Transaction& transaction, int64 key,
282 									off_t value);
283 			status_t			Insert(Transaction& transaction, uint64 key,
284 									off_t value);
285 			status_t			Insert(Transaction& transaction, float key,
286 									off_t value);
287 			status_t			Insert(Transaction& transaction, double key,
288 									off_t value);
289 
290 			status_t			Replace(Transaction& transaction,
291 									const uint8* key, uint16 keyLength,
292 									off_t value);
293 #endif // !_BOOT_MODE
294 
295 			status_t			Find(const uint8* key, uint16 keyLength,
296 									off_t* value);
297 
298 #if !_BOOT_MODE
299 	static	int32				TypeCodeToKeyType(type_code code);
300 	static	int32				ModeToKeyType(mode_t mode);
301 
302 protected:
303 	virtual void				TransactionDone(bool success);
304 	virtual void				RemovedFromTransaction();
305 #endif // !_BOOT_MODE
306 
307 private:
308 								BPlusTree(const BPlusTree& other);
309 								BPlusTree& operator=(const BPlusTree& other);
310 									// no implementation
311 
312 			int32				_CompareKeys(const void* key1, int keylength1,
313 									const void* key2, int keylength2);
314 			status_t			_FindKey(const bplustree_node* node,
315 									const uint8* key, uint16 keyLength,
316 									uint16* index = NULL, off_t* next = NULL);
317 #if !_BOOT_MODE
318 			status_t			_SeekDown(Stack<node_and_key>& stack,
319 									const uint8* key, uint16 keyLength);
320 
321 			status_t			_FindFreeDuplicateFragment(
322 									Transaction& transaction,
323 									const bplustree_node* node,
324 									CachedNode& cached, off_t* _offset,
325 									bplustree_node** _fragment, uint32* _index);
326 			status_t			_InsertDuplicate(Transaction& transaction,
327 									CachedNode& cached,
328 									const bplustree_node* node, uint16 index,
329 									off_t value);
330 			void				_InsertKey(bplustree_node* node, uint16 index,
331 									uint8* key, uint16 keyLength, off_t value);
332 			status_t			_SplitNode(bplustree_node* node,
333 									off_t nodeOffset, bplustree_node* other,
334 									off_t otherOffset, uint16* _keyIndex,
335 									uint8* key, uint16* _keyLength,
336 									off_t* _value);
337 
338 			status_t			_RemoveDuplicate(Transaction& transaction,
339 									const bplustree_node* node,
340 									CachedNode& cached, uint16 keyIndex,
341 									off_t value);
342 			void				_RemoveKey(bplustree_node* node, uint16 index);
343 
344 			void				_UpdateIterators(off_t offset, off_t nextOffset,
345 									uint16 keyIndex, uint16 splitAt,
346 									int8 change);
347 			void				_AddIterator(TreeIterator* iterator);
348 			void				_RemoveIterator(TreeIterator* iterator);
349 
350 			status_t			_ValidateChildren(TreeCheck& check,
351 									uint32 level, off_t offset,
352 									const uint8* largestKey, uint16 keyLength,
353 									const bplustree_node* parent);
354 			status_t			_ValidateChild(TreeCheck& check,
355 									CachedNode& cached, uint32 level,
356 									off_t offset, off_t lastOffset,
357 									off_t nextOffset, const uint8* key,
358 									uint16 keyLength);
359 #endif // !_BOOT_MODE
360 
361 private:
362 			friend class TreeIterator;
363 			friend class CachedNode;
364 			friend class TreeCheck;
365 
366 			Inode*				fStream;
367 			bplustree_header	fHeader;
368 			int32				fNodeSize;
369 			bool				fAllowDuplicates;
370 			bool				fInTransaction;
371 			status_t			fStatus;
372 
373 #if !_BOOT_MODE
374 			mutex				fIteratorLock;
375 			SinglyLinkedList<TreeIterator> fIterators;
376 #endif
377 };
378 
379 
380 //	#pragma mark - helper classes/functions
381 
382 
383 class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> {
384 public:
385 								TreeIterator(BPlusTree* tree);
386 								~TreeIterator();
387 
388 			status_t			Goto(int8 to);
389 			status_t			Traverse(int8 direction, void* key,
390 									uint16* keyLength, uint16 maxLength,
391 									off_t* value, uint16* duplicate = NULL);
392 			status_t			Find(const uint8* key, uint16 keyLength);
393 
394 			status_t			Rewind();
395 			status_t			GetNextEntry(void* key, uint16* keyLength,
396 									uint16 maxLength, off_t* value,
397 									uint16* duplicate = NULL);
398 			status_t			GetPreviousEntry(void* key, uint16* keyLength,
399 									uint16 maxLength, off_t* value,
400 									uint16* duplicate = NULL);
401 			void				SkipDuplicates();
402 
403 			BPlusTree*			Tree() const { return fTree; }
404 
405 #ifdef DEBUG
406 			void				Dump();
407 #endif
408 
409 private:
410 			friend class BPlusTree;
411 
412 			// called by BPlusTree
413 			void				Update(off_t offset, off_t nextOffset,
414 									uint16 keyIndex, uint16 splitAt,
415 									int8 change);
416 			void				Stop();
417 
418 private:
419 			BPlusTree*			fTree;
420 			off_t				fCurrentNodeOffset;
421 									// traverse position
422 			int32				fCurrentKey;
423 			off_t				fDuplicateNode;
424 			uint16				fDuplicate;
425 			uint16				fNumDuplicates;
426 			bool				fIsFragment;
427 };
428 
429 
430 //	#pragma mark - BPlusTree's inline functions
431 //	(most of them may not be needed)
432 
433 
434 #if !_BOOT_MODE
435 inline status_t
436 BPlusTree::Remove(Transaction& transaction, const char* key, off_t value)
437 {
438 	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
439 		return B_BAD_TYPE;
440 	return Remove(transaction, (uint8*)key, strlen(key), value);
441 }
442 
443 
444 inline status_t
445 BPlusTree::Insert(Transaction& transaction, const char* key, off_t value)
446 {
447 	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
448 		return B_BAD_TYPE;
449 	return Insert(transaction, (uint8*)key, strlen(key), value);
450 }
451 
452 
453 inline status_t
454 BPlusTree::Insert(Transaction& transaction, int32 key, off_t value)
455 {
456 	if (fHeader.DataType() != BPLUSTREE_INT32_TYPE)
457 		return B_BAD_TYPE;
458 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
459 }
460 
461 
462 inline status_t
463 BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value)
464 {
465 	if (fHeader.DataType() != BPLUSTREE_UINT32_TYPE)
466 		return B_BAD_TYPE;
467 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
468 }
469 
470 
471 inline status_t
472 BPlusTree::Insert(Transaction& transaction, int64 key, off_t value)
473 {
474 	if (fHeader.DataType() != BPLUSTREE_INT64_TYPE)
475 		return B_BAD_TYPE;
476 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
477 }
478 
479 
480 inline status_t
481 BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value)
482 {
483 	if (fHeader.DataType() != BPLUSTREE_UINT64_TYPE)
484 		return B_BAD_TYPE;
485 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
486 }
487 
488 
489 inline status_t
490 BPlusTree::Insert(Transaction& transaction, float key, off_t value)
491 {
492 	if (fHeader.DataType() != BPLUSTREE_FLOAT_TYPE)
493 		return B_BAD_TYPE;
494 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
495 }
496 
497 
498 inline status_t
499 BPlusTree::Insert(Transaction& transaction, double key, off_t value)
500 {
501 	if (fHeader.DataType() != BPLUSTREE_DOUBLE_TYPE)
502 		return B_BAD_TYPE;
503 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
504 }
505 #endif // !_BOOT_MODE
506 
507 
508 //	#pragma mark - TreeIterator inline functions
509 
510 
511 inline status_t
512 TreeIterator::Rewind()
513 {
514 	return Goto(BPLUSTREE_BEGIN);
515 }
516 
517 
518 inline status_t
519 TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength,
520 	off_t* value, uint16* duplicate)
521 {
522 	return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value,
523 		duplicate);
524 }
525 
526 
527 inline status_t
528 TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength,
529 	off_t* value, uint16* duplicate)
530 {
531 	return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value,
532 		duplicate);
533 }
534 
535 
536 //	#pragma mark - bplustree_header inline functions
537 
538 
539 inline bool
540 bplustree_header::CheckNode(const bplustree_node* node) const
541 {
542 	// sanity checks (links, all_key_count)
543 	return IsValidLink(node->LeftLink())
544 		&& IsValidLink(node->RightLink())
545 		&& IsValidLink(node->OverflowLink())
546 		&& (int8*)node->Values() + node->NumKeys() * sizeof(off_t)
547 				<= (int8*)node + NodeSize();
548 }
549 
550 
551 inline bool
552 bplustree_header::IsValidLink(off_t link) const
553 {
554 	return link == BPLUSTREE_NULL
555 		|| (link > 0 && link <= MaximumSize() - NodeSize());
556 }
557 
558 
559 //	#pragma mark - bplustree_node inline functions
560 
561 
562 inline uint16*
563 bplustree_node::KeyLengths() const
564 {
565 	return (uint16*)(((char*)this) + key_align(sizeof(bplustree_node)
566 		+ AllKeyLength()));
567 }
568 
569 
570 inline off_t*
571 bplustree_node::Values() const
572 {
573 	return (off_t*)((char*)KeyLengths() + NumKeys() * sizeof(uint16));
574 }
575 
576 
577 inline uint8*
578 bplustree_node::Keys() const
579 {
580 	return (uint8*)this + sizeof(bplustree_node);
581 }
582 
583 
584 inline int32
585 bplustree_node::Used() const
586 {
587 	return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys()
588 		* (sizeof(uint16) + sizeof(off_t));
589 }
590 
591 
592 inline bool
593 bplustree_node::IsLeaf() const
594 {
595 	return OverflowLink() == BPLUSTREE_NULL;
596 }
597 
598 
599 inline duplicate_array*
600 bplustree_node::FragmentAt(int8 index) const
601 {
602 	return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1));
603 }
604 
605 
606 inline duplicate_array*
607 bplustree_node::DuplicateArray() const
608 {
609 	return (duplicate_array*)&overflow_link;
610 }
611 
612 
613 inline uint8
614 bplustree_node::LinkType(off_t link)
615 {
616 	return *(uint64*)&link >> 62;
617 }
618 
619 
620 inline off_t
621 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
622 {
623 	return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL)
624 		| (fragmentIndex & 0x3ff);
625 }
626 
627 
628 inline bool
629 bplustree_node::IsDuplicate(off_t link)
630 {
631 	return (LinkType(link)
632 		& (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
633 }
634 
635 
636 inline off_t
637 bplustree_node::FragmentOffset(off_t link)
638 {
639 	return link & 0x3ffffffffffffc00LL;
640 }
641 
642 
643 inline uint32
644 bplustree_node::FragmentIndex(off_t link)
645 {
646 	return (uint32)(link & 0x3ff);
647 }
648 
649 
650 inline uint32
651 bplustree_node::MaxFragments(uint32 nodeSize)
652 {
653 	return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t));
654 }
655 
656 
657 #if _BOOT_MODE
658 } // namespace BFS
659 
660 #undef Inode
661 #endif
662 
663 #endif	// B_PLUS_TREE_H
664