xref: /haiku/src/add-ons/kernel/file_systems/bfs/BPlusTree.h (revision 04a0e9c7b68cbe3a43d38e2bca8e860fd80936fb)
1 /*
2  * Copyright 2001-2012, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 #ifndef B_PLUS_TREE_H
6 #define B_PLUS_TREE_H
7 
8 
9 #include "bfs.h"
10 
11 #include "Utility.h"
12 
13 #if !_BOOT_MODE
14 #include "Journal.h"
15 class Inode;
16 #else
17 #define Inode BFS::Stream
18 
19 namespace BFS {
20 
21 class Stream;
22 class Transaction;
23 class TransactionListener {};
24 #endif // _BOOT_MODE
25 
26 //	#pragma mark - on-disk structures
27 
28 struct bplustree_node;
29 
30 #define BPLUSTREE_NULL			-1LL
31 #define BPLUSTREE_FREE			-2LL
32 
33 struct bplustree_header {
34 	uint32		magic;
35 	uint32		node_size;
36 	uint32		max_number_of_levels;
37 	uint32		data_type;
38 	int64		root_node_pointer;
39 	int64		free_node_pointer;
40 	int64		maximum_size;
41 
42 	uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
43 	uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
44 	uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
45 	off_t RootNode() const
46 		{ return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
47 	off_t FreeNode() const
48 		{ return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
49 	off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
50 	uint32 MaxNumberOfLevels() const
51 		{ return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }
52 
53 	inline bool CheckNode(bplustree_node* node) const;
54 	inline bool IsValidLink(off_t link) const;
55 	bool IsValid() const;
56 } _PACKED;
57 
58 #define BPLUSTREE_MAGIC 			0x69f6c2e8
59 #define BPLUSTREE_NODE_SIZE 		1024
60 #define BPLUSTREE_MAX_KEY_LENGTH	256
61 #define BPLUSTREE_MIN_KEY_LENGTH	1
62 
63 enum bplustree_types {
64 	BPLUSTREE_STRING_TYPE	= 0,
65 	BPLUSTREE_INT32_TYPE	= 1,
66 	BPLUSTREE_UINT32_TYPE	= 2,
67 	BPLUSTREE_INT64_TYPE	= 3,
68 	BPLUSTREE_UINT64_TYPE	= 4,
69 	BPLUSTREE_FLOAT_TYPE	= 5,
70 	BPLUSTREE_DOUBLE_TYPE	= 6
71 };
72 
73 
74 struct duplicate_array;
75 
76 
77 struct bplustree_node {
78 			int64				left_link;
79 			int64				right_link;
80 			int64				overflow_link;
81 			uint16				all_key_count;
82 			uint16				all_key_length;
83 
84 			off_t				LeftLink() const
85 									{ return BFS_ENDIAN_TO_HOST_INT64(
86 										left_link); }
87 			off_t				RightLink() const
88 									{ return BFS_ENDIAN_TO_HOST_INT64(
89 										right_link); }
90 			off_t				OverflowLink() const
91 									{ return BFS_ENDIAN_TO_HOST_INT64(
92 										overflow_link); }
93 			uint16				NumKeys() const
94 									{ return BFS_ENDIAN_TO_HOST_INT16(
95 										all_key_count); }
96 			uint16				AllKeyLength() const
97 									{ return BFS_ENDIAN_TO_HOST_INT16(
98 										all_key_length); }
99 
100 	inline	uint16*				KeyLengths() const;
101 	inline	off_t*				Values() const;
102 	inline	uint8*				Keys() const;
103 	inline	int32				Used() const;
104 			uint8*				KeyAt(int32 index, uint16* keyLength) const;
105 
106 	inline	bool				IsLeaf() const;
107 
108 			void				Initialize();
109 			uint8				CountDuplicates(off_t offset,
110 									bool isFragment) const;
111 			off_t				DuplicateAt(off_t offset, bool isFragment,
112 									int8 index) const;
113 			uint32				FragmentsUsed(uint32 nodeSize) const;
114 	inline	duplicate_array*	FragmentAt(int8 index) const;
115 	inline	duplicate_array*	DuplicateArray() const;
116 
117 	static inline uint8			LinkType(off_t link);
118 	static inline off_t			MakeLink(uint8 type, off_t link,
119 									uint32 fragmentIndex = 0);
120 	static inline bool			IsDuplicate(off_t link);
121 	static inline off_t			FragmentOffset(off_t link);
122 	static inline uint32		FragmentIndex(off_t link);
123 	static inline uint32		MaxFragments(uint32 nodeSize);
124 
125 			status_t			CheckIntegrity(uint32 nodeSize) const;
126 } _PACKED;
127 
128 //#define BPLUSTREE_NODE 0
129 #define BPLUSTREE_DUPLICATE_NODE 2
130 #define BPLUSTREE_DUPLICATE_FRAGMENT 3
131 
132 #define NUM_FRAGMENT_VALUES 7
133 #define NUM_DUPLICATE_VALUES 125
134 
135 //**************************************
136 
137 enum bplustree_traversing {
138 	BPLUSTREE_FORWARD = 1,
139 	BPLUSTREE_BACKWARD = -1,
140 
141 	BPLUSTREE_BEGIN = 0,
142 	BPLUSTREE_END = 1
143 };
144 
145 
146 //	#pragma mark - in-memory structures
147 
148 
149 class BPlusTree;
150 struct TreeCheck;
151 class TreeIterator;
152 
153 
154 #if !_BOOT_MODE
155 // needed for searching (utilizing a stack)
156 struct node_and_key {
157 	off_t	nodeOffset;
158 	uint16	keyIndex;
159 };
160 #endif // !_BOOT_MODE
161 
162 
163 class CachedNode {
164 public:
165 	CachedNode(BPlusTree* tree)
166 		:
167 		fTree(tree),
168 		fNode(NULL)
169 	{
170 #if _BOOT_MODE
171 		fBlock = NULL;
172 #endif
173 	}
174 
175 	CachedNode(BPlusTree* tree, off_t offset, bool check = true)
176 		:
177 		fTree(tree),
178 		fNode(NULL)
179 	{
180 #if _BOOT_MODE
181 		fBlock = NULL;
182 #endif
183 		SetTo(offset, check);
184 	}
185 
186 	~CachedNode()
187 	{
188 		Unset();
189 #if _BOOT_MODE
190 		free(fBlock);
191 #endif
192 	}
193 
194 			const bplustree_node* SetTo(off_t offset, bool check = true);
195 			status_t			SetTo(off_t offset,
196 									const bplustree_node** _node,
197 									bool check = true);
198 
199 			const bplustree_header* SetToHeader();
200 			void				Unset();
201 
202 #if !_BOOT_MODE
203 			bplustree_node*		SetToWritable(Transaction& transaction,
204 									off_t offset, bool check = true);
205 			bplustree_node*		MakeWritable(Transaction& transaction);
206 			bplustree_header*	SetToWritableHeader(Transaction& transaction);
207 
208 			void				UnsetUnchanged(Transaction& transaction);
209 
210 			status_t			Free(Transaction& transaction, off_t offset);
211 			status_t			Allocate(Transaction& transaction,
212 									bplustree_node** _node, off_t* _offset);
213 #endif // !_BOOT_MODE
214 
215 			bool				IsWritable() const { return fWritable; }
216 			bplustree_node*		Node() const { return fNode; }
217 
218 protected:
219 			bplustree_node*		InternalSetTo(Transaction* transaction,
220 									off_t offset);
221 
222 			BPlusTree*			fTree;
223 			bplustree_node*		fNode;
224 			off_t				fOffset;
225 			off_t				fBlockNumber;
226 			bool				fWritable;
227 #if _BOOT_MODE
228 			uint8*				fBlock;
229 #endif
230 };
231 
232 
233 class BPlusTree : public TransactionListener {
234 public:
235 #if !_BOOT_MODE
236 								BPlusTree(Transaction& transaction,
237 									Inode* stream,
238 									int32 nodeSize = BPLUSTREE_NODE_SIZE);
239 #endif
240 								BPlusTree(Inode* stream);
241 								BPlusTree();
242 								~BPlusTree();
243 
244 #if !_BOOT_MODE
245 			status_t			SetTo(Transaction& transaction, Inode* stream,
246 									int32 nodeSize = BPLUSTREE_NODE_SIZE);
247 #endif
248 			status_t			SetTo(Inode* stream);
249 			status_t			SetStream(Inode* stream);
250 
251 			status_t			InitCheck();
252 
253 			size_t				NodeSize() const { return fNodeSize; }
254 			Inode*				Stream() const { return fStream; }
255 
256 #if !_BOOT_MODE
257 			status_t			Validate(bool repair, bool& _errorsFound);
258 			status_t			MakeEmpty();
259 
260 			status_t			Remove(Transaction& transaction,
261 									const uint8* key, uint16 keyLength,
262 									off_t value);
263 			status_t			Insert(Transaction& transaction,
264 									const uint8* key, uint16 keyLength,
265 									off_t value);
266 
267 			status_t			Remove(Transaction& transaction, const char* key,
268 									off_t value);
269 			status_t			Insert(Transaction& transaction, const char* key,
270 									off_t value);
271 			status_t			Insert(Transaction& transaction, int32 key,
272 									off_t value);
273 			status_t			Insert(Transaction& transaction, uint32 key,
274 									off_t value);
275 			status_t			Insert(Transaction& transaction, int64 key,
276 									off_t value);
277 			status_t			Insert(Transaction& transaction, uint64 key,
278 									off_t value);
279 			status_t			Insert(Transaction& transaction, float key,
280 									off_t value);
281 			status_t			Insert(Transaction& transaction, double key,
282 									off_t value);
283 
284 			status_t			Replace(Transaction& transaction,
285 									const uint8* key, uint16 keyLength,
286 									off_t value);
287 #endif // !_BOOT_MODE
288 
289 			status_t			Find(const uint8* key, uint16 keyLength,
290 									off_t* value);
291 
292 #if !_BOOT_MODE
293 	static	int32				TypeCodeToKeyType(type_code code);
294 	static	int32				ModeToKeyType(mode_t mode);
295 
296 protected:
297 	virtual void				TransactionDone(bool success);
298 	virtual void				RemovedFromTransaction();
299 #endif // !_BOOT_MODE
300 
301 private:
302 								BPlusTree(const BPlusTree& other);
303 								BPlusTree& operator=(const BPlusTree& other);
304 									// no implementation
305 
306 			int32				_CompareKeys(const void* key1, int keylength1,
307 									const void* key2, int keylength2);
308 			status_t			_FindKey(const bplustree_node* node,
309 									const uint8* key, uint16 keyLength,
310 									uint16* index = NULL, off_t* next = NULL);
311 #if !_BOOT_MODE
312 			status_t			_SeekDown(Stack<node_and_key>& stack,
313 									const uint8* key, uint16 keyLength);
314 
315 			status_t			_FindFreeDuplicateFragment(
316 									Transaction& transaction,
317 									const bplustree_node* node,
318 									CachedNode& cached, off_t* _offset,
319 									bplustree_node** _fragment, uint32* _index);
320 			status_t			_InsertDuplicate(Transaction& transaction,
321 									CachedNode& cached,
322 									const bplustree_node* node, uint16 index,
323 									off_t value);
324 			void				_InsertKey(bplustree_node* node, uint16 index,
325 									uint8* key, uint16 keyLength, off_t value);
326 			status_t			_SplitNode(bplustree_node* node,
327 									off_t nodeOffset, bplustree_node* other,
328 									off_t otherOffset, uint16* _keyIndex,
329 									uint8* key, uint16* _keyLength,
330 									off_t* _value);
331 
332 			status_t			_RemoveDuplicate(Transaction& transaction,
333 									const bplustree_node* node,
334 									CachedNode& cached, uint16 keyIndex,
335 									off_t value);
336 			void				_RemoveKey(bplustree_node* node, uint16 index);
337 
338 			void				_UpdateIterators(off_t offset, off_t nextOffset,
339 									uint16 keyIndex, uint16 splitAt,
340 									int8 change);
341 			void				_AddIterator(TreeIterator* iterator);
342 			void				_RemoveIterator(TreeIterator* iterator);
343 
344 			status_t			_ValidateChildren(TreeCheck& check,
345 									uint32 level, off_t offset,
346 									const uint8* largestKey, uint16 keyLength,
347 									const bplustree_node* parent);
348 			status_t			_ValidateChild(TreeCheck& check,
349 									CachedNode& cached, uint32 level,
350 									off_t offset, off_t lastOffset,
351 									off_t nextOffset, const uint8* key,
352 									uint16 keyLength);
353 #endif // !_BOOT_MODE
354 
355 private:
356 			friend class TreeIterator;
357 			friend class CachedNode;
358 			friend class TreeCheck;
359 
360 			Inode*				fStream;
361 			bplustree_header	fHeader;
362 			int32				fNodeSize;
363 			bool				fAllowDuplicates;
364 			bool				fInTransaction;
365 			status_t			fStatus;
366 
367 #if !_BOOT_MODE
368 			mutex				fIteratorLock;
369 			SinglyLinkedList<TreeIterator> fIterators;
370 #endif
371 };
372 
373 
374 //	#pragma mark - helper classes/functions
375 
376 
377 extern int32 compareKeys(type_code type, const void* key1, int keyLength1,
378 	const void* key2, int keyLength2);
379 
380 
381 class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> {
382 public:
383 								TreeIterator(BPlusTree* tree);
384 								~TreeIterator();
385 
386 			status_t			Goto(int8 to);
387 			status_t			Traverse(int8 direction, void* key,
388 									uint16* keyLength, uint16 maxLength,
389 									off_t* value, uint16* duplicate = NULL);
390 			status_t			Find(const uint8* key, uint16 keyLength);
391 
392 			status_t			Rewind();
393 			status_t			GetNextEntry(void* key, uint16* keyLength,
394 									uint16 maxLength, off_t* value,
395 									uint16* duplicate = NULL);
396 			status_t			GetPreviousEntry(void* key, uint16* keyLength,
397 									uint16 maxLength, off_t* value,
398 									uint16* duplicate = NULL);
399 			void				SkipDuplicates();
400 
401 			BPlusTree*			Tree() const { return fTree; }
402 
403 #ifdef DEBUG
404 			void				Dump();
405 #endif
406 
407 private:
408 			friend class BPlusTree;
409 
410 			// called by BPlusTree
411 			void				Update(off_t offset, off_t nextOffset,
412 									uint16 keyIndex, uint16 splitAt,
413 									int8 change);
414 			void				Stop();
415 
416 private:
417 			BPlusTree*			fTree;
418 			off_t				fCurrentNodeOffset;
419 									// traverse position
420 			int32				fCurrentKey;
421 			off_t				fDuplicateNode;
422 			uint16				fDuplicate;
423 			uint16				fNumDuplicates;
424 			bool				fIsFragment;
425 };
426 
427 
428 //	#pragma mark - BPlusTree's inline functions
429 //	(most of them may not be needed)
430 
431 
432 #if !_BOOT_MODE
433 inline status_t
434 BPlusTree::Remove(Transaction& transaction, const char* key, off_t value)
435 {
436 	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
437 		return B_BAD_TYPE;
438 	return Remove(transaction, (uint8*)key, strlen(key), value);
439 }
440 
441 
442 inline status_t
443 BPlusTree::Insert(Transaction& transaction, const char* key, off_t value)
444 {
445 	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
446 		return B_BAD_TYPE;
447 	return Insert(transaction, (uint8*)key, strlen(key), value);
448 }
449 
450 
451 inline status_t
452 BPlusTree::Insert(Transaction& transaction, int32 key, off_t value)
453 {
454 	if (fHeader.DataType() != BPLUSTREE_INT32_TYPE)
455 		return B_BAD_TYPE;
456 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
457 }
458 
459 
460 inline status_t
461 BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value)
462 {
463 	if (fHeader.DataType() != BPLUSTREE_UINT32_TYPE)
464 		return B_BAD_TYPE;
465 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
466 }
467 
468 
469 inline status_t
470 BPlusTree::Insert(Transaction& transaction, int64 key, off_t value)
471 {
472 	if (fHeader.DataType() != BPLUSTREE_INT64_TYPE)
473 		return B_BAD_TYPE;
474 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
475 }
476 
477 
478 inline status_t
479 BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value)
480 {
481 	if (fHeader.DataType() != BPLUSTREE_UINT64_TYPE)
482 		return B_BAD_TYPE;
483 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
484 }
485 
486 
487 inline status_t
488 BPlusTree::Insert(Transaction& transaction, float key, off_t value)
489 {
490 	if (fHeader.DataType() != BPLUSTREE_FLOAT_TYPE)
491 		return B_BAD_TYPE;
492 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
493 }
494 
495 
496 inline status_t
497 BPlusTree::Insert(Transaction& transaction, double key, off_t value)
498 {
499 	if (fHeader.DataType() != BPLUSTREE_DOUBLE_TYPE)
500 		return B_BAD_TYPE;
501 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
502 }
503 #endif // !_BOOT_MODE
504 
505 
506 //	#pragma mark - TreeIterator inline functions
507 
508 
509 inline status_t
510 TreeIterator::Rewind()
511 {
512 	return Goto(BPLUSTREE_BEGIN);
513 }
514 
515 
516 inline status_t
517 TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength,
518 	off_t* value, uint16* duplicate)
519 {
520 	return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value,
521 		duplicate);
522 }
523 
524 
525 inline status_t
526 TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength,
527 	off_t* value, uint16* duplicate)
528 {
529 	return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value,
530 		duplicate);
531 }
532 
533 
534 //	#pragma mark - bplustree_header inline functions
535 
536 
537 inline bool
538 bplustree_header::CheckNode(bplustree_node* node) const
539 {
540 	// sanity checks (links, all_key_count)
541 	return IsValidLink(node->LeftLink())
542 		&& IsValidLink(node->RightLink())
543 		&& IsValidLink(node->OverflowLink())
544 		&& (int8*)node->Values() + node->NumKeys() * sizeof(off_t)
545 				<= (int8*)node + NodeSize();
546 }
547 
548 
549 inline bool
550 bplustree_header::IsValidLink(off_t link) const
551 {
552 	return link == BPLUSTREE_NULL
553 		|| (link > 0 && link <= MaximumSize() - NodeSize());
554 }
555 
556 
557 //	#pragma mark - bplustree_node inline functions
558 
559 
560 inline uint16*
561 bplustree_node::KeyLengths() const
562 {
563 	return (uint16*)(((char*)this) + key_align(sizeof(bplustree_node)
564 		+ AllKeyLength()));
565 }
566 
567 
568 inline off_t*
569 bplustree_node::Values() const
570 {
571 	return (off_t*)((char*)KeyLengths() + NumKeys() * sizeof(uint16));
572 }
573 
574 
575 inline uint8*
576 bplustree_node::Keys() const
577 {
578 	return (uint8*)this + sizeof(bplustree_node);
579 }
580 
581 
582 inline int32
583 bplustree_node::Used() const
584 {
585 	return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys()
586 		* (sizeof(uint16) + sizeof(off_t));
587 }
588 
589 
590 inline bool
591 bplustree_node::IsLeaf() const
592 {
593 	return OverflowLink() == BPLUSTREE_NULL;
594 }
595 
596 
597 inline duplicate_array*
598 bplustree_node::FragmentAt(int8 index) const
599 {
600 	return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1));
601 }
602 
603 
604 inline duplicate_array*
605 bplustree_node::DuplicateArray() const
606 {
607 	return (duplicate_array*)&overflow_link;
608 }
609 
610 
611 inline uint8
612 bplustree_node::LinkType(off_t link)
613 {
614 	return *(uint64*)&link >> 62;
615 }
616 
617 
618 inline off_t
619 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
620 {
621 	return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL)
622 		| (fragmentIndex & 0x3ff);
623 }
624 
625 
626 inline bool
627 bplustree_node::IsDuplicate(off_t link)
628 {
629 	return (LinkType(link)
630 		& (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
631 }
632 
633 
634 inline off_t
635 bplustree_node::FragmentOffset(off_t link)
636 {
637 	return link & 0x3ffffffffffffc00LL;
638 }
639 
640 
641 inline uint32
642 bplustree_node::FragmentIndex(off_t link)
643 {
644 	return (uint32)(link & 0x3ff);
645 }
646 
647 
648 inline uint32
649 bplustree_node::MaxFragments(uint32 nodeSize)
650 {
651 	return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t));
652 }
653 
654 
655 #if _BOOT_MODE
656 } // namespace BFS
657 
658 #undef Inode
659 #endif
660 
661 #endif	// B_PLUS_TREE_H
662