xref: /haiku/src/add-ons/kernel/file_systems/bfs/BPlusTree.h (revision f2b4344867e97c3f4e742a1b4a15e6879644601a)
1 /*
2  * Copyright 2001-2010, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 #ifndef B_PLUS_TREE_H
6 #define B_PLUS_TREE_H
7 
8 
9 #include "bfs.h"
10 #include "Journal.h"
11 
12 
13 //	#pragma mark - on-disk structures
14 
15 struct bplustree_node;
16 
17 #define BPLUSTREE_NULL			-1LL
18 #define BPLUSTREE_FREE			-2LL
19 
20 struct bplustree_header {
21 	uint32		magic;
22 	uint32		node_size;
23 	uint32		max_number_of_levels;
24 	uint32		data_type;
25 	int64		root_node_pointer;
26 	int64		free_node_pointer;
27 	int64		maximum_size;
28 
29 	uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
30 	uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
31 	uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
32 	off_t RootNode() const
33 		{ return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
34 	off_t FreeNode() const
35 		{ return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
36 	off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
37 	uint32 MaxNumberOfLevels() const
38 		{ return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }
39 
40 	inline bool CheckNode(bplustree_node* node) const;
41 	inline bool IsValidLink(off_t link) const;
42 } _PACKED;
43 
44 #define BPLUSTREE_MAGIC 			0x69f6c2e8
45 #define BPLUSTREE_NODE_SIZE 		1024
46 #define BPLUSTREE_MAX_KEY_LENGTH	256
47 #define BPLUSTREE_MIN_KEY_LENGTH	1
48 
49 enum bplustree_types {
50 	BPLUSTREE_STRING_TYPE	= 0,
51 	BPLUSTREE_INT32_TYPE	= 1,
52 	BPLUSTREE_UINT32_TYPE	= 2,
53 	BPLUSTREE_INT64_TYPE	= 3,
54 	BPLUSTREE_UINT64_TYPE	= 4,
55 	BPLUSTREE_FLOAT_TYPE	= 5,
56 	BPLUSTREE_DOUBLE_TYPE	= 6
57 };
58 
59 struct sorted_array;
60 typedef sorted_array duplicate_array;
61 
62 struct bplustree_node {
63 			int64				left_link;
64 			int64				right_link;
65 			int64				overflow_link;
66 			uint16				all_key_count;
67 			uint16				all_key_length;
68 
69 			off_t				LeftLink() const
70 									{ return BFS_ENDIAN_TO_HOST_INT64(
71 										left_link); }
72 			off_t				RightLink() const
73 									{ return BFS_ENDIAN_TO_HOST_INT64(
74 										right_link); }
75 			off_t				OverflowLink() const
76 									{ return BFS_ENDIAN_TO_HOST_INT64(
77 										overflow_link); }
78 			uint16				NumKeys() const
79 									{ return BFS_ENDIAN_TO_HOST_INT16(
80 										all_key_count); }
81 			uint16				AllKeyLength() const
82 									{ return BFS_ENDIAN_TO_HOST_INT16(
83 										all_key_length); }
84 
85 	inline	uint16*				KeyLengths() const;
86 	inline	off_t*				Values() const;
87 	inline	uint8*				Keys() const;
88 	inline	int32				Used() const;
89 			uint8*				KeyAt(int32 index, uint16* keyLength) const;
90 
91 	inline	bool				IsLeaf() const;
92 
93 			void				Initialize();
94 			uint8				CountDuplicates(off_t offset,
95 									bool isFragment) const;
96 			off_t				DuplicateAt(off_t offset, bool isFragment,
97 									int8 index) const;
98 			uint32				FragmentsUsed(uint32 nodeSize) const;
99 	inline	duplicate_array*	FragmentAt(int8 index) const;
100 	inline	duplicate_array*	DuplicateArray() const;
101 
102 	static inline uint8			LinkType(off_t link);
103 	static inline off_t			MakeLink(uint8 type, off_t link,
104 									uint32 fragmentIndex = 0);
105 	static inline bool			IsDuplicate(off_t link);
106 	static inline off_t			FragmentOffset(off_t link);
107 	static inline uint32		FragmentIndex(off_t link);
108 	static inline uint32		MaxFragments(uint32 nodeSize);
109 
110 #ifdef DEBUG
111 			status_t			CheckIntegrity(uint32 nodeSize) const;
112 #endif
113 } _PACKED;
114 
115 //#define BPLUSTREE_NODE 0
116 #define BPLUSTREE_DUPLICATE_NODE 2
117 #define BPLUSTREE_DUPLICATE_FRAGMENT 3
118 
119 #define NUM_FRAGMENT_VALUES 7
120 #define NUM_DUPLICATE_VALUES 125
121 
122 //**************************************
123 
124 enum bplustree_traversing {
125 	BPLUSTREE_FORWARD = 1,
126 	BPLUSTREE_BACKWARD = -1,
127 
128 	BPLUSTREE_BEGIN = 0,
129 	BPLUSTREE_END = 1
130 };
131 
132 
133 //	#pragma mark - in-memory structures
134 
135 template<class T> class Stack;
136 class BPlusTree;
137 class TreeIterator;
138 class CachedNode;
139 class Inode;
140 
141 // needed for searching (utilizing a stack)
142 struct node_and_key {
143 	off_t	nodeOffset;
144 	uint16	keyIndex;
145 };
146 
147 
148 class CachedNode {
149 public:
150 	CachedNode(BPlusTree* tree)
151 		:
152 		fTree(tree),
153 		fNode(NULL)
154 	{
155 	}
156 
157 	CachedNode(BPlusTree* tree, off_t offset, bool check = true)
158 		:
159 		fTree(tree),
160 		fNode(NULL)
161 	{
162 		SetTo(offset, check);
163 	}
164 
165 	~CachedNode()
166 	{
167 		Unset();
168 	}
169 
170 			const bplustree_node* SetTo(off_t offset, bool check = true);
171 			bplustree_node*		SetToWritable(Transaction& transaction,
172 									off_t offset, bool check = true);
173 			bplustree_node*		MakeWritable(Transaction& transaction);
174 			const bplustree_header* SetToHeader();
175 			bplustree_header*	SetToWritableHeader(Transaction& transaction);
176 
177 			void				UnsetUnchanged(Transaction& transaction);
178 			void				Unset();
179 
180 			status_t			Free(Transaction& transaction, off_t offset);
181 			status_t			Allocate(Transaction& transaction,
182 									bplustree_node** _node, off_t* _offset);
183 
184 			bool				IsWritable() const { return fWritable; }
185 			bplustree_node*		Node() const { return fNode; }
186 
187 protected:
188 			bplustree_node*		InternalSetTo(Transaction* transaction,
189 									off_t offset);
190 
191 			BPlusTree*			fTree;
192 			bplustree_node*		fNode;
193 			off_t				fOffset;
194 			off_t				fBlockNumber;
195 			bool				fWritable;
196 };
197 
198 
199 class BPlusTree : public TransactionListener {
200 public:
201 								BPlusTree(Transaction& transaction,
202 									Inode* stream,
203 									int32 nodeSize = BPLUSTREE_NODE_SIZE);
204 								BPlusTree(Inode* stream);
205 								BPlusTree();
206 								~BPlusTree();
207 
208 			status_t			SetTo(Transaction& transaction, Inode* stream,
209 									int32 nodeSize = BPLUSTREE_NODE_SIZE);
210 			status_t			SetTo(Inode* stream);
211 			status_t			SetStream(Inode* stream);
212 
213 			status_t			InitCheck();
214 			status_t			Validate();
215 
216 			status_t			Remove(Transaction& transaction,
217 									const uint8* key, uint16 keyLength,
218 									off_t value);
219 			status_t			Insert(Transaction& transaction,
220 									const uint8* key, uint16 keyLength,
221 									off_t value);
222 
223 			status_t			Remove(Transaction& transaction, const char* key,
224 									off_t value);
225 			status_t			Insert(Transaction& transaction, const char* key,
226 									off_t value);
227 			status_t			Insert(Transaction& transaction, int32 key,
228 									off_t value);
229 			status_t			Insert(Transaction& transaction, uint32 key,
230 									off_t value);
231 			status_t			Insert(Transaction& transaction, int64 key,
232 									off_t value);
233 			status_t			Insert(Transaction& transaction, uint64 key,
234 									off_t value);
235 			status_t			Insert(Transaction& transaction, float key,
236 									off_t value);
237 			status_t			Insert(Transaction& transaction, double key,
238 									off_t value);
239 
240 			status_t			Replace(Transaction& transaction,
241 									const uint8* key, uint16 keyLength,
242 									off_t value);
243 			status_t			Find(const uint8* key, uint16 keyLength,
244 									off_t* value);
245 
246 	static	int32				TypeCodeToKeyType(type_code code);
247 	static	int32				ModeToKeyType(mode_t mode);
248 
249 protected:
250 	virtual void				TransactionDone(bool success);
251 	virtual void				RemovedFromTransaction();
252 
253 private:
254 								BPlusTree(const BPlusTree& other);
255 								BPlusTree& operator=(const BPlusTree& other);
256 									// no implementation
257 
258 			int32				_CompareKeys(const void* key1, int keylength1,
259 									const void* key2, int keylength2);
260 			status_t			_FindKey(const bplustree_node* node,
261 									const uint8* key, uint16 keyLength,
262 									uint16* index = NULL, off_t* next = NULL);
263 			status_t			_SeekDown(Stack<node_and_key>& stack,
264 									const uint8* key, uint16 keyLength);
265 
266 			status_t			_FindFreeDuplicateFragment(
267 									Transaction& transaction,
268 									const bplustree_node* node,
269 									CachedNode& cached, off_t* _offset,
270 									bplustree_node** _fragment, uint32* _index);
271 			status_t			_InsertDuplicate(Transaction& transaction,
272 									CachedNode& cached,
273 									const bplustree_node* node, uint16 index,
274 									off_t value);
275 			void				_InsertKey(bplustree_node* node, uint16 index,
276 									uint8* key, uint16 keyLength, off_t value);
277 			status_t			_SplitNode(bplustree_node* node,
278 									off_t nodeOffset, bplustree_node* other,
279 									off_t otherOffset, uint16* _keyIndex,
280 									uint8* key, uint16* _keyLength,
281 									off_t* _value);
282 
283 			status_t			_RemoveDuplicate(Transaction& transaction,
284 									const bplustree_node* node,
285 									CachedNode& cached, uint16 keyIndex,
286 									off_t value);
287 			void				_RemoveKey(bplustree_node* node, uint16 index);
288 
289 			void				_UpdateIterators(off_t offset, off_t nextOffset,
290 									uint16 keyIndex, uint16 splitAt,
291 									int8 change);
292 			void				_AddIterator(TreeIterator* iterator);
293 			void				_RemoveIterator(TreeIterator* iterator);
294 
295 private:
296 			friend class TreeIterator;
297 			friend class CachedNode;
298 
299 			Inode*				fStream;
300 			bplustree_header	fHeader;
301 			int32				fNodeSize;
302 			bool				fAllowDuplicates;
303 			bool				fInTransaction;
304 			status_t			fStatus;
305 			mutex				fIteratorLock;
306 			SinglyLinkedList<TreeIterator> fIterators;
307 };
308 
309 
310 //	#pragma mark - helper classes/functions
311 
312 extern int32 compareKeys(type_code type, const void* key1, int keyLength1,
313 	const void* key2, int keyLength2);
314 
315 class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> {
316 public:
317 								TreeIterator(BPlusTree* tree);
318 								~TreeIterator();
319 
320 			status_t			Goto(int8 to);
321 			status_t			Traverse(int8 direction, void* key,
322 									uint16* keyLength, uint16 maxLength,
323 									off_t* value, uint16* duplicate = NULL);
324 			status_t			Find(const uint8* key, uint16 keyLength);
325 
326 			status_t			Rewind();
327 			status_t			GetNextEntry(void* key, uint16* keyLength,
328 									uint16 maxLength, off_t* value,
329 									uint16* duplicate = NULL);
330 			status_t			GetPreviousEntry(void* key, uint16* keyLength,
331 									uint16 maxLength, off_t* value,
332 									uint16* duplicate = NULL);
333 			void				SkipDuplicates();
334 
335 			BPlusTree*			Tree() const { return fTree; }
336 
337 #ifdef DEBUG
338 			void				Dump();
339 #endif
340 
341 private:
342 			friend class BPlusTree;
343 
344 			// called by BPlusTree
345 			void				Update(off_t offset, off_t nextOffset,
346 									uint16 keyIndex, uint16 splitAt,
347 									int8 change);
348 			void				Stop();
349 
350 private:
351 			BPlusTree*			fTree;
352 			off_t				fCurrentNodeOffset;
353 									// traverse position
354 			int32				fCurrentKey;
355 			off_t				fDuplicateNode;
356 			uint16				fDuplicate;
357 			uint16				fNumDuplicates;
358 			bool				fIsFragment;
359 };
360 
361 
362 //	#pragma mark - BPlusTree's inline functions
363 //	(most of them may not be needed)
364 
365 
366 inline status_t
367 BPlusTree::Remove(Transaction& transaction, const char* key, off_t value)
368 {
369 	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
370 		return B_BAD_TYPE;
371 	return Remove(transaction, (uint8*)key, strlen(key), value);
372 }
373 
374 inline status_t
375 BPlusTree::Insert(Transaction& transaction, const char* key, off_t value)
376 {
377 	if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
378 		return B_BAD_TYPE;
379 	return Insert(transaction, (uint8*)key, strlen(key), value);
380 }
381 
382 inline status_t
383 BPlusTree::Insert(Transaction& transaction, int32 key, off_t value)
384 {
385 	if (fHeader.DataType() != BPLUSTREE_INT32_TYPE)
386 		return B_BAD_TYPE;
387 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
388 }
389 
390 inline status_t
391 BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value)
392 {
393 	if (fHeader.DataType() != BPLUSTREE_UINT32_TYPE)
394 		return B_BAD_TYPE;
395 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
396 }
397 
398 inline status_t
399 BPlusTree::Insert(Transaction& transaction, int64 key, off_t value)
400 {
401 	if (fHeader.DataType() != BPLUSTREE_INT64_TYPE)
402 		return B_BAD_TYPE;
403 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
404 }
405 
406 inline status_t
407 BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value)
408 {
409 	if (fHeader.DataType() != BPLUSTREE_UINT64_TYPE)
410 		return B_BAD_TYPE;
411 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
412 }
413 
414 inline status_t
415 BPlusTree::Insert(Transaction& transaction, float key, off_t value)
416 {
417 	if (fHeader.DataType() != BPLUSTREE_FLOAT_TYPE)
418 		return B_BAD_TYPE;
419 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
420 }
421 
422 inline status_t
423 BPlusTree::Insert(Transaction& transaction, double key, off_t value)
424 {
425 	if (fHeader.DataType() != BPLUSTREE_DOUBLE_TYPE)
426 		return B_BAD_TYPE;
427 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
428 }
429 
430 
431 //	#pragma mark - TreeIterator inline functions
432 
433 
434 inline status_t
435 TreeIterator::Rewind()
436 {
437 	return Goto(BPLUSTREE_BEGIN);
438 }
439 
440 inline status_t
441 TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength,
442 	off_t* value, uint16* duplicate)
443 {
444 	return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value,
445 		duplicate);
446 }
447 
448 inline status_t
449 TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength,
450 	off_t* value, uint16* duplicate)
451 {
452 	return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value,
453 		duplicate);
454 }
455 
456 
457 //	#pragma mark - bplustree_header inline functions
458 
459 
460 inline bool
461 bplustree_header::CheckNode(bplustree_node* node) const
462 {
463 	// sanity checks (links, all_key_count)
464 	return IsValidLink(node->LeftLink())
465 		&& IsValidLink(node->RightLink())
466 		&& IsValidLink(node->OverflowLink())
467 		&& (int8*)node->Values() + node->NumKeys() * sizeof(off_t)
468 				<= (int8*)node + NodeSize();
469 }
470 
471 
472 inline bool
473 bplustree_header::IsValidLink(off_t link) const
474 {
475 	return link == BPLUSTREE_NULL
476 		|| (link > 0 && link <= MaximumSize() - NodeSize());
477 }
478 
479 
480 //	#pragma mark - bplustree_node inline functions
481 
482 
483 inline uint16*
484 bplustree_node::KeyLengths() const
485 {
486 	return (uint16*)(((char*)this) + key_align(sizeof(bplustree_node)
487 		+ AllKeyLength()));
488 }
489 
490 
491 inline off_t*
492 bplustree_node::Values() const
493 {
494 	return (off_t*)((char*)KeyLengths() + NumKeys() * sizeof(uint16));
495 }
496 
497 
498 inline uint8*
499 bplustree_node::Keys() const
500 {
501 	return (uint8*)this + sizeof(bplustree_node);
502 }
503 
504 
505 inline int32
506 bplustree_node::Used() const
507 {
508 	return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys()
509 		* (sizeof(uint16) + sizeof(off_t));
510 }
511 
512 
513 inline bool
514 bplustree_node::IsLeaf() const
515 {
516 	return OverflowLink() == BPLUSTREE_NULL;
517 }
518 
519 
520 inline duplicate_array*
521 bplustree_node::FragmentAt(int8 index) const
522 {
523 	return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1));
524 }
525 
526 
527 inline duplicate_array*
528 bplustree_node::DuplicateArray() const
529 {
530 	return (duplicate_array*)&overflow_link;
531 }
532 
533 
534 inline uint8
535 bplustree_node::LinkType(off_t link)
536 {
537 	return *(uint64*)&link >> 62;
538 }
539 
540 
541 inline off_t
542 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
543 {
544 	return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL)
545 		| (fragmentIndex & 0x3ff);
546 }
547 
548 
549 inline bool
550 bplustree_node::IsDuplicate(off_t link)
551 {
552 	return (LinkType(link)
553 		& (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
554 }
555 
556 
557 inline off_t
558 bplustree_node::FragmentOffset(off_t link)
559 {
560 	return link & 0x3ffffffffffffc00LL;
561 }
562 
563 
564 inline uint32
565 bplustree_node::FragmentIndex(off_t link)
566 {
567 	return (uint32)(link & 0x3ff);
568 }
569 
570 
571 inline uint32
572 bplustree_node::MaxFragments(uint32 nodeSize)
573 {
574 	return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t));
575 }
576 
577 #endif	// B_PLUS_TREE_H
578