xref: /haiku/src/add-ons/kernel/file_systems/bfs/BPlusTree.h (revision 529cd177b573aaba391c8adc9c9f5ad76a14bf81)
1 /*
2  * Copyright 2001-2014, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 #ifndef B_PLUS_TREE_H
6 #define B_PLUS_TREE_H
7 
8 
9 #include "bfs.h"
10 
11 #include "Utility.h"
12 
13 #if !_BOOT_MODE
14 #include "Journal.h"
15 class Inode;
16 #else
17 #define Inode BFS::Stream
18 
19 namespace BFS {
20 
21 class Stream;
22 class Transaction;
23 class TransactionListener {};
24 #endif // _BOOT_MODE
25 
26 //	#pragma mark - on-disk structures
27 
28 struct bplustree_node;
29 
30 #define BPLUSTREE_NULL			-1LL
31 #define BPLUSTREE_FREE			-2LL
32 
33 struct bplustree_header {
34 	uint32		magic;
35 	uint32		node_size;
36 	uint32		max_number_of_levels;
37 	uint32		data_type;
38 	int64		root_node_pointer;
39 	int64		free_node_pointer;
40 	int64		maximum_size;
41 
42 	uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
43 	uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
44 	uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
45 	off_t RootNode() const
46 		{ return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
47 	off_t FreeNode() const
48 		{ return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
49 	off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
50 	uint32 MaxNumberOfLevels() const
51 		{ return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }
52 
53 	inline bool CheckNode(bplustree_node* node) const;
54 	inline bool IsValidLink(off_t link) const;
55 	bool IsValid() const;
56 } _PACKED;
57 
58 #define BPLUSTREE_MAGIC 			0x69f6c2e8
59 #define BPLUSTREE_NODE_SIZE 		1024
60 #define BPLUSTREE_MAX_KEY_LENGTH	256
61 #define BPLUSTREE_MIN_KEY_LENGTH	1
62 
63 enum bplustree_types {
64 	BPLUSTREE_STRING_TYPE	= 0,
65 	BPLUSTREE_INT32_TYPE	= 1,
66 	BPLUSTREE_UINT32_TYPE	= 2,
67 	BPLUSTREE_INT64_TYPE	= 3,
68 	BPLUSTREE_UINT64_TYPE	= 4,
69 	BPLUSTREE_FLOAT_TYPE	= 5,
70 	BPLUSTREE_DOUBLE_TYPE	= 6
71 };
72 
73 
74 struct duplicate_array;
75 
76 
77 struct bplustree_node {
78 			int64				left_link;
79 			int64				right_link;
80 			int64				overflow_link;
81 			uint16				all_key_count;
82 			uint16				all_key_length;
83 
84 			off_t				LeftLink() const
85 									{ return BFS_ENDIAN_TO_HOST_INT64(
86 										left_link); }
87 			off_t				RightLink() const
88 									{ return BFS_ENDIAN_TO_HOST_INT64(
89 										right_link); }
90 			off_t				OverflowLink() const
91 									{ return BFS_ENDIAN_TO_HOST_INT64(
92 										overflow_link); }
93 			uint16				NumKeys() const
94 									{ return BFS_ENDIAN_TO_HOST_INT16(
95 										all_key_count); }
96 			uint16				AllKeyLength() const
97 									{ return BFS_ENDIAN_TO_HOST_INT16(
98 										all_key_length); }
99 
100 	inline	uint16*				KeyLengths() const;
101 	inline	off_t*				Values() const;
102 	inline	uint8*				Keys() const;
103 	inline	int32				Used() const;
104 			uint8*				KeyAt(int32 index, uint16* keyLength) const;
105 
106 	inline	bool				IsLeaf() const;
107 
108 			void				Initialize();
109 			uint8				CountDuplicates(off_t offset,
110 									bool isFragment) const;
111 			off_t				DuplicateAt(off_t offset, bool isFragment,
112 									int8 index) const;
113 			uint32				FragmentsUsed(uint32 nodeSize) const;
114 	inline	duplicate_array*	FragmentAt(int8 index) const;
115 	inline	duplicate_array*	DuplicateArray() const;
116 
117 	static inline uint8			LinkType(off_t link);
118 	static inline off_t			MakeLink(uint8 type, off_t link,
119 									uint32 fragmentIndex = 0);
120 	static inline bool			IsDuplicate(off_t link);
121 	static inline off_t			FragmentOffset(off_t link);
122 	static inline uint32		FragmentIndex(off_t link);
123 	static inline uint32		MaxFragments(uint32 nodeSize);
124 
125 			status_t			CheckIntegrity(uint32 nodeSize) const;
126 } _PACKED;
127 
128 //#define BPLUSTREE_NODE 0
129 #define BPLUSTREE_DUPLICATE_NODE 2
130 #define BPLUSTREE_DUPLICATE_FRAGMENT 3
131 
132 #define NUM_FRAGMENT_VALUES 7
133 #define NUM_DUPLICATE_VALUES 125
134 
135 //**************************************
136 
137 enum bplustree_traversing {
138 	BPLUSTREE_FORWARD = 1,
139 	BPLUSTREE_BACKWARD = -1,
140 
141 	BPLUSTREE_BEGIN = 0,
142 	BPLUSTREE_END = 1
143 };
144 
145 
146 //	#pragma mark - in-memory structures
147 
148 
149 class BPlusTree;
150 struct TreeCheck;
151 class TreeIterator;
152 
153 
154 #if !_BOOT_MODE
155 // needed for searching (utilizing a stack)
156 struct node_and_key {
157 	off_t	nodeOffset;
158 	uint16	keyIndex;
159 };
160 #endif // !_BOOT_MODE
161 
162 
163 class CachedNode {
164 public:
165 	CachedNode(BPlusTree* tree)
166 		:
167 		fTree(tree),
168 		fNode(NULL)
169 	{
170 #if _BOOT_MODE
171 		fBlock = NULL;
172 #endif
173 	}
174 
175 	CachedNode(BPlusTree* tree, off_t offset, bool check = true)
176 		:
177 		fTree(tree),
178 		fNode(NULL)
179 	{
180 #if _BOOT_MODE
181 		fBlock = NULL;
182 #endif
183 		SetTo(offset, check);
184 	}
185 
186 	~CachedNode()
187 	{
188 		Unset();
189 #if _BOOT_MODE
190 		free(fBlock);
191 #endif
192 	}
193 
194 			const bplustree_node* SetTo(off_t offset, bool check = true);
195 			status_t			SetTo(off_t offset,
196 									const bplustree_node** _node,
197 									bool check = true);
198 
199 			const bplustree_header* SetToHeader();
200 			void				Unset();
201 
202 #if !_BOOT_MODE
203 			bplustree_node*		SetToWritable(Transaction& transaction,
204 									off_t offset, bool check = true);
205 			bplustree_node*		MakeWritable(Transaction& transaction);
206 			bplustree_header*	SetToWritableHeader(Transaction& transaction);
207 
208 			void				UnsetUnchanged(Transaction& transaction);
209 
210 			status_t			Free(Transaction& transaction, off_t offset);
211 			status_t			Allocate(Transaction& transaction,
212 									bplustree_node** _node, off_t* _offset);
213 #endif // !_BOOT_MODE
214 
215 			bool				IsWritable() const { return fWritable; }
216 			bplustree_node*		Node() const { return fNode; }
217 
218 protected:
219 			bplustree_node*		InternalSetTo(Transaction* transaction,
220 									off_t offset);
221 
222 			BPlusTree*			fTree;
223 			bplustree_node*		fNode;
224 			off_t				fOffset;
225 			off_t				fBlockNumber;
226 			bool				fWritable;
227 #if _BOOT_MODE
228 			uint8*				fBlock;
229 #endif
230 };
231 
232 
233 class BPlusTree : public TransactionListener {
234 public:
235 #if !_BOOT_MODE
236 								BPlusTree(Transaction& transaction,
237 									Inode* stream,
238 									int32 nodeSize = BPLUSTREE_NODE_SIZE);
239 #endif
240 								BPlusTree(Inode* stream);
241 								BPlusTree();
242 								~BPlusTree();
243 
244 #if !_BOOT_MODE
245 			status_t			SetTo(Transaction& transaction, Inode* stream,
246 									int32 nodeSize = BPLUSTREE_NODE_SIZE);
247 #endif
248 			status_t			SetTo(Inode* stream);
249 			status_t			SetStream(Inode* stream);
250 
251 			status_t			InitCheck();
252 
253 			size_t				NodeSize() const { return fNodeSize; }
254 			Inode*				Stream() const { return fStream; }
255 
256 #if !_BOOT_MODE
257 			status_t			Validate(bool repair, bool& _errorsFound);
258 			status_t			MakeEmpty();
259 
260 			status_t			Remove(Transaction& transaction,
261 									const uint8* key, uint16 keyLength,
262 									off_t value);
263 			status_t			Insert(Transaction& transaction,
264 									const uint8* key, uint16 keyLength,
265 									off_t value);
266 
267 			status_t			Remove(Transaction& transaction, const char* key,
268 									off_t value);
269 			status_t			Insert(Transaction& transaction, const char* key,
270 									off_t value);
271 			status_t			Insert(Transaction& transaction, int32 key,
272 									off_t value);
273 			status_t			Insert(Transaction& transaction, uint32 key,
274 									off_t value);
275 			status_t			Insert(Transaction& transaction, int64 key,
276 									off_t value);
277 			status_t			Insert(Transaction& transaction, uint64 key,
278 									off_t value);
279 			status_t			Insert(Transaction& transaction, float key,
280 									off_t value);
281 			status_t			Insert(Transaction& transaction, double key,
282 									off_t value);
283 
284 			status_t			Replace(Transaction& transaction,
285 									const uint8* key, uint16 keyLength,
286 									off_t value);
287 #endif // !_BOOT_MODE
288 
289 			status_t			Find(const uint8* key, uint16 keyLength,
290 									off_t* value);
291 
292 #if !_BOOT_MODE
293 	static	int32				TypeCodeToKeyType(type_code code);
294 	static	int32				ModeToKeyType(mode_t mode);
295 
296 protected:
297 	virtual void				TransactionDone(bool success);
298 	virtual void				RemovedFromTransaction();
299 #endif // !_BOOT_MODE
300 
301 private:
302 								BPlusTree(const BPlusTree& other);
303 								BPlusTree& operator=(const BPlusTree& other);
304 									// no implementation
305 
306 			int32				_CompareKeys(const void* key1, int keylength1,
307 									const void* key2, int keylength2);
308 			status_t			_FindKey(const bplustree_node* node,
309 									const uint8* key, uint16 keyLength,
310 									uint16* index = NULL, off_t* next = NULL);
311 #if !_BOOT_MODE
312 			status_t			_SeekDown(Stack<node_and_key>& stack,
313 									const uint8* key, uint16 keyLength);
314 
315 			status_t			_FindFreeDuplicateFragment(
316 									Transaction& transaction,
317 									const bplustree_node* node,
318 									CachedNode& cached, off_t* _offset,
319 									bplustree_node** _fragment, uint32* _index);
320 			status_t			_InsertDuplicate(Transaction& transaction,
321 									CachedNode& cached,
322 									const bplustree_node* node, uint16 index,
323 									off_t value);
324 			void				_InsertKey(bplustree_node* node, uint16 index,
325 									uint8* key, uint16 keyLength, off_t value);
326 			status_t			_SplitNode(bplustree_node* node,
327 									off_t nodeOffset, bplustree_node* other,
328 									off_t otherOffset, uint16* _keyIndex,
329 									uint8* key, uint16* _keyLength,
330 									off_t* _value);
331 
332 			status_t			_RemoveDuplicate(Transaction& transaction,
333 									const bplustree_node* node,
334 									CachedNode& cached, uint16 keyIndex,
335 									off_t value);
336 			void				_RemoveKey(bplustree_node* node, uint16 index);
337 
338 			void				_UpdateIterators(off_t offset, off_t nextOffset,
339 									uint16 keyIndex, uint16 splitAt,
340 									int8 change);
341 			void				_AddIterator(TreeIterator* iterator);
342 			void				_RemoveIterator(TreeIterator* iterator);
343 
344 			status_t			_ValidateChildren(TreeCheck& check,
345 									uint32 level, off_t offset,
346 									const uint8* largestKey, uint16 keyLength,
347 									const bplustree_node* parent);
348 			status_t			_ValidateChild(TreeCheck& check,
349 									CachedNode& cached, uint32 level,
350 									off_t offset, off_t lastOffset,
351 									off_t nextOffset, const uint8* key,
352 									uint16 keyLength);
353 #endif // !_BOOT_MODE
354 
355 private:
356 			friend class TreeIterator;
357 			friend class CachedNode;
358 			friend class TreeCheck;
359 
360 			Inode*				fStream;
361 			bplustree_header	fHeader;
362 			int32				fNodeSize;
363 			bool				fAllowDuplicates;
364 			bool				fInTransaction;
365 			status_t			fStatus;
366 
367 #if !_BOOT_MODE
368 			mutex				fIteratorLock;
369 			SinglyLinkedList<TreeIterator> fIterators;
370 #endif
371 };
372 
373 
374 //	#pragma mark - helper classes/functions
375 
376 
377 class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> {
378 public:
379 								TreeIterator(BPlusTree* tree);
380 								~TreeIterator();
381 
382 			status_t			Goto(int8 to);
383 			status_t			Traverse(int8 direction, void* key,
384 									uint16* keyLength, uint16 maxLength,
385 									off_t* value, uint16* duplicate = NULL);
386 			status_t			Find(const uint8* key, uint16 keyLength);
387 
388 			status_t			Rewind();
389 			status_t			GetNextEntry(void* key, uint16* keyLength,
390 									uint16 maxLength, off_t* value,
391 									uint16* duplicate = NULL);
392 			status_t			GetPreviousEntry(void* key, uint16* keyLength,
393 									uint16 maxLength, off_t* value,
394 									uint16* duplicate = NULL);
395 			void				SkipDuplicates();
396 
397 			BPlusTree*			Tree() const { return fTree; }
398 
399 #ifdef DEBUG
400 			void				Dump();
401 #endif
402 
403 private:
404 			friend class BPlusTree;
405 
406 			// called by BPlusTree
407 			void				Update(off_t offset, off_t nextOffset,
408 									uint16 keyIndex, uint16 splitAt,
409 									int8 change);
410 			void				Stop();
411 
412 private:
413 			BPlusTree*			fTree;
414 			off_t				fCurrentNodeOffset;
415 									// traverse position
416 			int32				fCurrentKey;
417 			off_t				fDuplicateNode;
418 			uint16				fDuplicate;
419 			uint16				fNumDuplicates;
420 			bool				fIsFragment;
421 };
422 
423 
424 //	#pragma mark - BPlusTree's inline functions
425 //	(most of them may not be needed)
426 
427 
428 #if !_BOOT_MODE
429 inline status_t
430 BPlusTree::Remove(Transaction& transaction, const char* key, off_t value)
431 {
432 	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
433 		return B_BAD_TYPE;
434 	return Remove(transaction, (uint8*)key, strlen(key), value);
435 }
436 
437 
438 inline status_t
439 BPlusTree::Insert(Transaction& transaction, const char* key, off_t value)
440 {
441 	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
442 		return B_BAD_TYPE;
443 	return Insert(transaction, (uint8*)key, strlen(key), value);
444 }
445 
446 
447 inline status_t
448 BPlusTree::Insert(Transaction& transaction, int32 key, off_t value)
449 {
450 	if (fHeader.DataType() != BPLUSTREE_INT32_TYPE)
451 		return B_BAD_TYPE;
452 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
453 }
454 
455 
456 inline status_t
457 BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value)
458 {
459 	if (fHeader.DataType() != BPLUSTREE_UINT32_TYPE)
460 		return B_BAD_TYPE;
461 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
462 }
463 
464 
465 inline status_t
466 BPlusTree::Insert(Transaction& transaction, int64 key, off_t value)
467 {
468 	if (fHeader.DataType() != BPLUSTREE_INT64_TYPE)
469 		return B_BAD_TYPE;
470 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
471 }
472 
473 
474 inline status_t
475 BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value)
476 {
477 	if (fHeader.DataType() != BPLUSTREE_UINT64_TYPE)
478 		return B_BAD_TYPE;
479 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
480 }
481 
482 
483 inline status_t
484 BPlusTree::Insert(Transaction& transaction, float key, off_t value)
485 {
486 	if (fHeader.DataType() != BPLUSTREE_FLOAT_TYPE)
487 		return B_BAD_TYPE;
488 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
489 }
490 
491 
492 inline status_t
493 BPlusTree::Insert(Transaction& transaction, double key, off_t value)
494 {
495 	if (fHeader.DataType() != BPLUSTREE_DOUBLE_TYPE)
496 		return B_BAD_TYPE;
497 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
498 }
499 #endif // !_BOOT_MODE
500 
501 
502 //	#pragma mark - TreeIterator inline functions
503 
504 
505 inline status_t
506 TreeIterator::Rewind()
507 {
508 	return Goto(BPLUSTREE_BEGIN);
509 }
510 
511 
512 inline status_t
513 TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength,
514 	off_t* value, uint16* duplicate)
515 {
516 	return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value,
517 		duplicate);
518 }
519 
520 
521 inline status_t
522 TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength,
523 	off_t* value, uint16* duplicate)
524 {
525 	return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value,
526 		duplicate);
527 }
528 
529 
530 //	#pragma mark - bplustree_header inline functions
531 
532 
533 inline bool
534 bplustree_header::CheckNode(bplustree_node* node) const
535 {
536 	// sanity checks (links, all_key_count)
537 	return IsValidLink(node->LeftLink())
538 		&& IsValidLink(node->RightLink())
539 		&& IsValidLink(node->OverflowLink())
540 		&& (int8*)node->Values() + node->NumKeys() * sizeof(off_t)
541 				<= (int8*)node + NodeSize();
542 }
543 
544 
545 inline bool
546 bplustree_header::IsValidLink(off_t link) const
547 {
548 	return link == BPLUSTREE_NULL
549 		|| (link > 0 && link <= MaximumSize() - NodeSize());
550 }
551 
552 
553 //	#pragma mark - bplustree_node inline functions
554 
555 
556 inline uint16*
557 bplustree_node::KeyLengths() const
558 {
559 	return (uint16*)(((char*)this) + key_align(sizeof(bplustree_node)
560 		+ AllKeyLength()));
561 }
562 
563 
564 inline off_t*
565 bplustree_node::Values() const
566 {
567 	return (off_t*)((char*)KeyLengths() + NumKeys() * sizeof(uint16));
568 }
569 
570 
571 inline uint8*
572 bplustree_node::Keys() const
573 {
574 	return (uint8*)this + sizeof(bplustree_node);
575 }
576 
577 
578 inline int32
579 bplustree_node::Used() const
580 {
581 	return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys()
582 		* (sizeof(uint16) + sizeof(off_t));
583 }
584 
585 
586 inline bool
587 bplustree_node::IsLeaf() const
588 {
589 	return OverflowLink() == BPLUSTREE_NULL;
590 }
591 
592 
593 inline duplicate_array*
594 bplustree_node::FragmentAt(int8 index) const
595 {
596 	return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1));
597 }
598 
599 
600 inline duplicate_array*
601 bplustree_node::DuplicateArray() const
602 {
603 	return (duplicate_array*)&overflow_link;
604 }
605 
606 
607 inline uint8
608 bplustree_node::LinkType(off_t link)
609 {
610 	return *(uint64*)&link >> 62;
611 }
612 
613 
614 inline off_t
615 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
616 {
617 	return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL)
618 		| (fragmentIndex & 0x3ff);
619 }
620 
621 
622 inline bool
623 bplustree_node::IsDuplicate(off_t link)
624 {
625 	return (LinkType(link)
626 		& (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
627 }
628 
629 
630 inline off_t
631 bplustree_node::FragmentOffset(off_t link)
632 {
633 	return link & 0x3ffffffffffffc00LL;
634 }
635 
636 
637 inline uint32
638 bplustree_node::FragmentIndex(off_t link)
639 {
640 	return (uint32)(link & 0x3ff);
641 }
642 
643 
644 inline uint32
645 bplustree_node::MaxFragments(uint32 nodeSize)
646 {
647 	return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t));
648 }
649 
650 
651 #if _BOOT_MODE
652 } // namespace BFS
653 
654 #undef Inode
655 #endif
656 
657 #endif	// B_PLUS_TREE_H
658