xref: /haiku/src/add-ons/kernel/file_systems/bfs/BPlusTree.h (revision 4fd62caa9acc437534c41bbb7d3fc9d53e915005)
1 /*
2  * Copyright 2001-2012, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 #ifndef B_PLUS_TREE_H
6 #define B_PLUS_TREE_H
7 
8 
9 #include "bfs.h"
10 #include "Journal.h"
11 
12 
13 //	#pragma mark - on-disk structures
14 
15 struct bplustree_node;
16 
17 #define BPLUSTREE_NULL			-1LL
18 #define BPLUSTREE_FREE			-2LL
19 
20 struct bplustree_header {
21 	uint32		magic;
22 	uint32		node_size;
23 	uint32		max_number_of_levels;
24 	uint32		data_type;
25 	int64		root_node_pointer;
26 	int64		free_node_pointer;
27 	int64		maximum_size;
28 
29 	uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
30 	uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
31 	uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
32 	off_t RootNode() const
33 		{ return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
34 	off_t FreeNode() const
35 		{ return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
36 	off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
37 	uint32 MaxNumberOfLevels() const
38 		{ return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }
39 
40 	inline bool CheckNode(bplustree_node* node) const;
41 	inline bool IsValidLink(off_t link) const;
42 	bool IsValid() const;
43 } _PACKED;
44 
45 #define BPLUSTREE_MAGIC 			0x69f6c2e8
46 #define BPLUSTREE_NODE_SIZE 		1024
47 #define BPLUSTREE_MAX_KEY_LENGTH	256
48 #define BPLUSTREE_MIN_KEY_LENGTH	1
49 
50 enum bplustree_types {
51 	BPLUSTREE_STRING_TYPE	= 0,
52 	BPLUSTREE_INT32_TYPE	= 1,
53 	BPLUSTREE_UINT32_TYPE	= 2,
54 	BPLUSTREE_INT64_TYPE	= 3,
55 	BPLUSTREE_UINT64_TYPE	= 4,
56 	BPLUSTREE_FLOAT_TYPE	= 5,
57 	BPLUSTREE_DOUBLE_TYPE	= 6
58 };
59 
60 
61 struct duplicate_array;
62 
63 
64 struct bplustree_node {
65 			int64				left_link;
66 			int64				right_link;
67 			int64				overflow_link;
68 			uint16				all_key_count;
69 			uint16				all_key_length;
70 
71 			off_t				LeftLink() const
72 									{ return BFS_ENDIAN_TO_HOST_INT64(
73 										left_link); }
74 			off_t				RightLink() const
75 									{ return BFS_ENDIAN_TO_HOST_INT64(
76 										right_link); }
77 			off_t				OverflowLink() const
78 									{ return BFS_ENDIAN_TO_HOST_INT64(
79 										overflow_link); }
80 			uint16				NumKeys() const
81 									{ return BFS_ENDIAN_TO_HOST_INT16(
82 										all_key_count); }
83 			uint16				AllKeyLength() const
84 									{ return BFS_ENDIAN_TO_HOST_INT16(
85 										all_key_length); }
86 
87 	inline	uint16*				KeyLengths() const;
88 	inline	off_t*				Values() const;
89 	inline	uint8*				Keys() const;
90 	inline	int32				Used() const;
91 			uint8*				KeyAt(int32 index, uint16* keyLength) const;
92 
93 	inline	bool				IsLeaf() const;
94 
95 			void				Initialize();
96 			uint8				CountDuplicates(off_t offset,
97 									bool isFragment) const;
98 			off_t				DuplicateAt(off_t offset, bool isFragment,
99 									int8 index) const;
100 			uint32				FragmentsUsed(uint32 nodeSize) const;
101 	inline	duplicate_array*	FragmentAt(int8 index) const;
102 	inline	duplicate_array*	DuplicateArray() const;
103 
104 	static inline uint8			LinkType(off_t link);
105 	static inline off_t			MakeLink(uint8 type, off_t link,
106 									uint32 fragmentIndex = 0);
107 	static inline bool			IsDuplicate(off_t link);
108 	static inline off_t			FragmentOffset(off_t link);
109 	static inline uint32		FragmentIndex(off_t link);
110 	static inline uint32		MaxFragments(uint32 nodeSize);
111 
112 			status_t			CheckIntegrity(uint32 nodeSize) const;
113 } _PACKED;
114 
115 //#define BPLUSTREE_NODE 0
116 #define BPLUSTREE_DUPLICATE_NODE 2
117 #define BPLUSTREE_DUPLICATE_FRAGMENT 3
118 
119 #define NUM_FRAGMENT_VALUES 7
120 #define NUM_DUPLICATE_VALUES 125
121 
122 //**************************************
123 
124 enum bplustree_traversing {
125 	BPLUSTREE_FORWARD = 1,
126 	BPLUSTREE_BACKWARD = -1,
127 
128 	BPLUSTREE_BEGIN = 0,
129 	BPLUSTREE_END = 1
130 };
131 
132 
133 //	#pragma mark - in-memory structures
134 
135 template<class T> class Stack;
136 class BPlusTree;
137 class TreeIterator;
138 class CachedNode;
139 class Inode;
140 struct TreeCheck;
141 
142 // needed for searching (utilizing a stack)
143 struct node_and_key {
144 	off_t	nodeOffset;
145 	uint16	keyIndex;
146 };
147 
148 
149 class CachedNode {
150 public:
151 	CachedNode(BPlusTree* tree)
152 		:
153 		fTree(tree),
154 		fNode(NULL)
155 	{
156 	}
157 
158 	CachedNode(BPlusTree* tree, off_t offset, bool check = true)
159 		:
160 		fTree(tree),
161 		fNode(NULL)
162 	{
163 		SetTo(offset, check);
164 	}
165 
166 	~CachedNode()
167 	{
168 		Unset();
169 	}
170 
171 			const bplustree_node* SetTo(off_t offset, bool check = true);
172 			status_t			SetTo(off_t offset,
173 									const bplustree_node** _node,
174 									bool check = true);
175 			bplustree_node*		SetToWritable(Transaction& transaction,
176 									off_t offset, bool check = true);
177 			bplustree_node*		MakeWritable(Transaction& transaction);
178 			const bplustree_header* SetToHeader();
179 			bplustree_header*	SetToWritableHeader(Transaction& transaction);
180 
181 			void				UnsetUnchanged(Transaction& transaction);
182 			void				Unset();
183 
184 			status_t			Free(Transaction& transaction, off_t offset);
185 			status_t			Allocate(Transaction& transaction,
186 									bplustree_node** _node, off_t* _offset);
187 
188 			bool				IsWritable() const { return fWritable; }
189 			bplustree_node*		Node() const { return fNode; }
190 
191 protected:
192 			bplustree_node*		InternalSetTo(Transaction* transaction,
193 									off_t offset);
194 
195 			BPlusTree*			fTree;
196 			bplustree_node*		fNode;
197 			off_t				fOffset;
198 			off_t				fBlockNumber;
199 			bool				fWritable;
200 };
201 
202 
203 class BPlusTree : public TransactionListener {
204 public:
205 								BPlusTree(Transaction& transaction,
206 									Inode* stream,
207 									int32 nodeSize = BPLUSTREE_NODE_SIZE);
208 								BPlusTree(Inode* stream);
209 								BPlusTree();
210 								~BPlusTree();
211 
212 			status_t			SetTo(Transaction& transaction, Inode* stream,
213 									int32 nodeSize = BPLUSTREE_NODE_SIZE);
214 			status_t			SetTo(Inode* stream);
215 			status_t			SetStream(Inode* stream);
216 
217 			status_t			InitCheck();
218 
219 			size_t				NodeSize() const { return fNodeSize; }
220 			Inode*				Stream() const { return fStream; }
221 
222 			status_t			Validate(bool repair, bool& _errorsFound);
223 			status_t			MakeEmpty();
224 
225 			status_t			Remove(Transaction& transaction,
226 									const uint8* key, uint16 keyLength,
227 									off_t value);
228 			status_t			Insert(Transaction& transaction,
229 									const uint8* key, uint16 keyLength,
230 									off_t value);
231 
232 			status_t			Remove(Transaction& transaction, const char* key,
233 									off_t value);
234 			status_t			Insert(Transaction& transaction, const char* key,
235 									off_t value);
236 			status_t			Insert(Transaction& transaction, int32 key,
237 									off_t value);
238 			status_t			Insert(Transaction& transaction, uint32 key,
239 									off_t value);
240 			status_t			Insert(Transaction& transaction, int64 key,
241 									off_t value);
242 			status_t			Insert(Transaction& transaction, uint64 key,
243 									off_t value);
244 			status_t			Insert(Transaction& transaction, float key,
245 									off_t value);
246 			status_t			Insert(Transaction& transaction, double key,
247 									off_t value);
248 
249 			status_t			Replace(Transaction& transaction,
250 									const uint8* key, uint16 keyLength,
251 									off_t value);
252 			status_t			Find(const uint8* key, uint16 keyLength,
253 									off_t* value);
254 
255 	static	int32				TypeCodeToKeyType(type_code code);
256 	static	int32				ModeToKeyType(mode_t mode);
257 
258 protected:
259 	virtual void				TransactionDone(bool success);
260 	virtual void				RemovedFromTransaction();
261 
262 private:
263 								BPlusTree(const BPlusTree& other);
264 								BPlusTree& operator=(const BPlusTree& other);
265 									// no implementation
266 
267 			int32				_CompareKeys(const void* key1, int keylength1,
268 									const void* key2, int keylength2);
269 			status_t			_FindKey(const bplustree_node* node,
270 									const uint8* key, uint16 keyLength,
271 									uint16* index = NULL, off_t* next = NULL);
272 			status_t			_SeekDown(Stack<node_and_key>& stack,
273 									const uint8* key, uint16 keyLength);
274 
275 			status_t			_FindFreeDuplicateFragment(
276 									Transaction& transaction,
277 									const bplustree_node* node,
278 									CachedNode& cached, off_t* _offset,
279 									bplustree_node** _fragment, uint32* _index);
280 			status_t			_InsertDuplicate(Transaction& transaction,
281 									CachedNode& cached,
282 									const bplustree_node* node, uint16 index,
283 									off_t value);
284 			void				_InsertKey(bplustree_node* node, uint16 index,
285 									uint8* key, uint16 keyLength, off_t value);
286 			status_t			_SplitNode(bplustree_node* node,
287 									off_t nodeOffset, bplustree_node* other,
288 									off_t otherOffset, uint16* _keyIndex,
289 									uint8* key, uint16* _keyLength,
290 									off_t* _value);
291 
292 			status_t			_RemoveDuplicate(Transaction& transaction,
293 									const bplustree_node* node,
294 									CachedNode& cached, uint16 keyIndex,
295 									off_t value);
296 			void				_RemoveKey(bplustree_node* node, uint16 index);
297 
298 			void				_UpdateIterators(off_t offset, off_t nextOffset,
299 									uint16 keyIndex, uint16 splitAt,
300 									int8 change);
301 			void				_AddIterator(TreeIterator* iterator);
302 			void				_RemoveIterator(TreeIterator* iterator);
303 
304 			status_t			_ValidateChildren(TreeCheck& check,
305 									uint32 level, off_t offset,
306 									const uint8* largestKey, uint16 keyLength,
307 									const bplustree_node* parent);
308 			status_t			_ValidateChild(TreeCheck& check,
309 									CachedNode& cached, uint32 level,
310 									off_t offset, off_t lastOffset,
311 									off_t nextOffset, const uint8* key,
312 									uint16 keyLength);
313 
314 private:
315 			friend class TreeIterator;
316 			friend class CachedNode;
317 			friend class TreeCheck;
318 
319 			Inode*				fStream;
320 			bplustree_header	fHeader;
321 			int32				fNodeSize;
322 			bool				fAllowDuplicates;
323 			bool				fInTransaction;
324 			status_t			fStatus;
325 			mutex				fIteratorLock;
326 			SinglyLinkedList<TreeIterator> fIterators;
327 };
328 
329 
330 //	#pragma mark - helper classes/functions
331 
332 
333 extern int32 compareKeys(type_code type, const void* key1, int keyLength1,
334 	const void* key2, int keyLength2);
335 
336 
337 class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> {
338 public:
339 								TreeIterator(BPlusTree* tree);
340 								~TreeIterator();
341 
342 			status_t			Goto(int8 to);
343 			status_t			Traverse(int8 direction, void* key,
344 									uint16* keyLength, uint16 maxLength,
345 									off_t* value, uint16* duplicate = NULL);
346 			status_t			Find(const uint8* key, uint16 keyLength);
347 
348 			status_t			Rewind();
349 			status_t			GetNextEntry(void* key, uint16* keyLength,
350 									uint16 maxLength, off_t* value,
351 									uint16* duplicate = NULL);
352 			status_t			GetPreviousEntry(void* key, uint16* keyLength,
353 									uint16 maxLength, off_t* value,
354 									uint16* duplicate = NULL);
355 			void				SkipDuplicates();
356 
357 			BPlusTree*			Tree() const { return fTree; }
358 
359 #ifdef DEBUG
360 			void				Dump();
361 #endif
362 
363 private:
364 			friend class BPlusTree;
365 
366 			// called by BPlusTree
367 			void				Update(off_t offset, off_t nextOffset,
368 									uint16 keyIndex, uint16 splitAt,
369 									int8 change);
370 			void				Stop();
371 
372 private:
373 			BPlusTree*			fTree;
374 			off_t				fCurrentNodeOffset;
375 									// traverse position
376 			int32				fCurrentKey;
377 			off_t				fDuplicateNode;
378 			uint16				fDuplicate;
379 			uint16				fNumDuplicates;
380 			bool				fIsFragment;
381 };
382 
383 
384 //	#pragma mark - BPlusTree's inline functions
385 //	(most of them may not be needed)
386 
387 
388 inline status_t
389 BPlusTree::Remove(Transaction& transaction, const char* key, off_t value)
390 {
391 	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
392 		return B_BAD_TYPE;
393 	return Remove(transaction, (uint8*)key, strlen(key), value);
394 }
395 
396 
397 inline status_t
398 BPlusTree::Insert(Transaction& transaction, const char* key, off_t value)
399 {
400 	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
401 		return B_BAD_TYPE;
402 	return Insert(transaction, (uint8*)key, strlen(key), value);
403 }
404 
405 
406 inline status_t
407 BPlusTree::Insert(Transaction& transaction, int32 key, off_t value)
408 {
409 	if (fHeader.DataType() != BPLUSTREE_INT32_TYPE)
410 		return B_BAD_TYPE;
411 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
412 }
413 
414 
415 inline status_t
416 BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value)
417 {
418 	if (fHeader.DataType() != BPLUSTREE_UINT32_TYPE)
419 		return B_BAD_TYPE;
420 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
421 }
422 
423 
424 inline status_t
425 BPlusTree::Insert(Transaction& transaction, int64 key, off_t value)
426 {
427 	if (fHeader.DataType() != BPLUSTREE_INT64_TYPE)
428 		return B_BAD_TYPE;
429 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
430 }
431 
432 
433 inline status_t
434 BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value)
435 {
436 	if (fHeader.DataType() != BPLUSTREE_UINT64_TYPE)
437 		return B_BAD_TYPE;
438 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
439 }
440 
441 
442 inline status_t
443 BPlusTree::Insert(Transaction& transaction, float key, off_t value)
444 {
445 	if (fHeader.DataType() != BPLUSTREE_FLOAT_TYPE)
446 		return B_BAD_TYPE;
447 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
448 }
449 
450 
451 inline status_t
452 BPlusTree::Insert(Transaction& transaction, double key, off_t value)
453 {
454 	if (fHeader.DataType() != BPLUSTREE_DOUBLE_TYPE)
455 		return B_BAD_TYPE;
456 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
457 }
458 
459 
460 //	#pragma mark - TreeIterator inline functions
461 
462 
463 inline status_t
464 TreeIterator::Rewind()
465 {
466 	return Goto(BPLUSTREE_BEGIN);
467 }
468 
469 
470 inline status_t
471 TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength,
472 	off_t* value, uint16* duplicate)
473 {
474 	return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value,
475 		duplicate);
476 }
477 
478 
479 inline status_t
480 TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength,
481 	off_t* value, uint16* duplicate)
482 {
483 	return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value,
484 		duplicate);
485 }
486 
487 
488 //	#pragma mark - bplustree_header inline functions
489 
490 
491 inline bool
492 bplustree_header::CheckNode(bplustree_node* node) const
493 {
494 	// sanity checks (links, all_key_count)
495 	return IsValidLink(node->LeftLink())
496 		&& IsValidLink(node->RightLink())
497 		&& IsValidLink(node->OverflowLink())
498 		&& (int8*)node->Values() + node->NumKeys() * sizeof(off_t)
499 				<= (int8*)node + NodeSize();
500 }
501 
502 
503 inline bool
504 bplustree_header::IsValidLink(off_t link) const
505 {
506 	return link == BPLUSTREE_NULL
507 		|| (link > 0 && link <= MaximumSize() - NodeSize());
508 }
509 
510 
511 //	#pragma mark - bplustree_node inline functions
512 
513 
514 inline uint16*
515 bplustree_node::KeyLengths() const
516 {
517 	return (uint16*)(((char*)this) + key_align(sizeof(bplustree_node)
518 		+ AllKeyLength()));
519 }
520 
521 
522 inline off_t*
523 bplustree_node::Values() const
524 {
525 	return (off_t*)((char*)KeyLengths() + NumKeys() * sizeof(uint16));
526 }
527 
528 
529 inline uint8*
530 bplustree_node::Keys() const
531 {
532 	return (uint8*)this + sizeof(bplustree_node);
533 }
534 
535 
536 inline int32
537 bplustree_node::Used() const
538 {
539 	return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys()
540 		* (sizeof(uint16) + sizeof(off_t));
541 }
542 
543 
544 inline bool
545 bplustree_node::IsLeaf() const
546 {
547 	return OverflowLink() == BPLUSTREE_NULL;
548 }
549 
550 
551 inline duplicate_array*
552 bplustree_node::FragmentAt(int8 index) const
553 {
554 	return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1));
555 }
556 
557 
558 inline duplicate_array*
559 bplustree_node::DuplicateArray() const
560 {
561 	return (duplicate_array*)&overflow_link;
562 }
563 
564 
565 inline uint8
566 bplustree_node::LinkType(off_t link)
567 {
568 	return *(uint64*)&link >> 62;
569 }
570 
571 
572 inline off_t
573 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
574 {
575 	return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL)
576 		| (fragmentIndex & 0x3ff);
577 }
578 
579 
580 inline bool
581 bplustree_node::IsDuplicate(off_t link)
582 {
583 	return (LinkType(link)
584 		& (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
585 }
586 
587 
588 inline off_t
589 bplustree_node::FragmentOffset(off_t link)
590 {
591 	return link & 0x3ffffffffffffc00LL;
592 }
593 
594 
595 inline uint32
596 bplustree_node::FragmentIndex(off_t link)
597 {
598 	return (uint32)(link & 0x3ff);
599 }
600 
601 
602 inline uint32
603 bplustree_node::MaxFragments(uint32 nodeSize)
604 {
605 	return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t));
606 }
607 
608 
609 #endif	// B_PLUS_TREE_H
610