xref: /haiku/src/add-ons/kernel/file_systems/bfs/BPlusTree.h (revision b55a57da7173b9af0432bd3e148d03f06161d036)
1 /*
2  * Copyright 2001-2009, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 #ifndef B_PLUS_TREE_H
6 #define B_PLUS_TREE_H
7 
8 
9 #include "bfs.h"
10 #include "Journal.h"
11 
12 
13 //	#pragma mark - on-disk structures
14 
15 struct bplustree_node;
16 
17 #define BPLUSTREE_NULL			-1LL
18 #define BPLUSTREE_FREE			-2LL
19 
20 struct bplustree_header {
21 	uint32		magic;
22 	uint32		node_size;
23 	uint32		max_number_of_levels;
24 	uint32		data_type;
25 	int64		root_node_pointer;
26 	int64		free_node_pointer;
27 	int64		maximum_size;
28 
29 	uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
30 	uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
31 	uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
32 	off_t RootNode() const
33 		{ return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
34 	off_t FreeNode() const
35 		{ return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
36 	off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
37 	uint32 MaxNumberOfLevels() const
38 		{ return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }
39 
40 	inline bool CheckNode(bplustree_node* node) const;
41 	inline bool IsValidLink(off_t link) const;
42 } _PACKED;
43 
44 #define BPLUSTREE_MAGIC 			0x69f6c2e8
45 #define BPLUSTREE_NODE_SIZE 		1024
46 #define BPLUSTREE_MAX_KEY_LENGTH	256
47 #define BPLUSTREE_MIN_KEY_LENGTH	1
48 
49 enum bplustree_types {
50 	BPLUSTREE_STRING_TYPE	= 0,
51 	BPLUSTREE_INT32_TYPE	= 1,
52 	BPLUSTREE_UINT32_TYPE	= 2,
53 	BPLUSTREE_INT64_TYPE	= 3,
54 	BPLUSTREE_UINT64_TYPE	= 4,
55 	BPLUSTREE_FLOAT_TYPE	= 5,
56 	BPLUSTREE_DOUBLE_TYPE	= 6
57 };
58 
59 struct sorted_array;
60 typedef sorted_array duplicate_array;
61 
62 struct bplustree_node {
63 	int64	left_link;
64 	int64	right_link;
65 	int64	overflow_link;
66 	uint16	all_key_count;
67 	uint16	all_key_length;
68 
69 	off_t LeftLink() const { return BFS_ENDIAN_TO_HOST_INT64(left_link); }
70 	off_t RightLink() const { return BFS_ENDIAN_TO_HOST_INT64(right_link); }
71 	off_t OverflowLink() const
72 		{ return BFS_ENDIAN_TO_HOST_INT64(overflow_link); }
73 	uint16 NumKeys() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_count); }
74 	uint16 AllKeyLength() const
75 		{ return BFS_ENDIAN_TO_HOST_INT16(all_key_length); }
76 
77 	inline uint16* KeyLengths() const;
78 	inline off_t* Values() const;
79 	inline uint8* Keys() const;
80 	inline int32 Used() const;
81 	uint8* KeyAt(int32 index, uint16* keyLength) const;
82 
83 	inline bool IsLeaf() const;
84 
85 	void Initialize();
86 	uint8 CountDuplicates(off_t offset, bool isFragment) const;
87 	off_t DuplicateAt(off_t offset, bool isFragment, int8 index) const;
88 	uint32 FragmentsUsed(uint32 nodeSize) const;
89 	inline duplicate_array* FragmentAt(int8 index) const;
90 	inline duplicate_array* DuplicateArray() const;
91 
92 	static inline uint8 LinkType(off_t link);
93 	static inline off_t MakeLink(uint8 type, off_t link,
94 		uint32 fragmentIndex = 0);
95 	static inline bool IsDuplicate(off_t link);
96 	static inline off_t FragmentOffset(off_t link);
97 	static inline uint32 FragmentIndex(off_t link);
98 	static inline uint32 MaxFragments(uint32 nodeSize);
99 
100 #ifdef DEBUG
101 	status_t CheckIntegrity(uint32 nodeSize) const;
102 #endif
103 } _PACKED;
104 
105 //#define BPLUSTREE_NODE 0
106 #define BPLUSTREE_DUPLICATE_NODE 2
107 #define BPLUSTREE_DUPLICATE_FRAGMENT 3
108 
109 #define NUM_FRAGMENT_VALUES 7
110 #define NUM_DUPLICATE_VALUES 125
111 
112 //**************************************
113 
114 enum bplustree_traversing {
115 	BPLUSTREE_FORWARD = 1,
116 	BPLUSTREE_BACKWARD = -1,
117 
118 	BPLUSTREE_BEGIN = 0,
119 	BPLUSTREE_END = 1
120 };
121 
122 
123 //	#pragma mark - in-memory structures
124 
125 template<class T> class Stack;
126 class BPlusTree;
127 class TreeIterator;
128 class CachedNode;
129 class Inode;
130 
131 // needed for searching (utilizing a stack)
132 struct node_and_key {
133 	off_t	nodeOffset;
134 	uint16	keyIndex;
135 };
136 
137 
138 class CachedNode {
139 public:
140 	CachedNode(BPlusTree* tree)
141 		:
142 		fTree(tree),
143 		fNode(NULL)
144 	{
145 	}
146 
147 	CachedNode(BPlusTree* tree, off_t offset, bool check = true)
148 		:
149 		fTree(tree),
150 		fNode(NULL)
151 	{
152 		SetTo(offset, check);
153 	}
154 
155 	~CachedNode()
156 	{
157 		Unset();
158 	}
159 
160 	const bplustree_node* SetTo(off_t offset, bool check = true);
161 	bplustree_node* SetToWritable(Transaction& transaction, off_t offset,
162 		bool check = true);
163 	bplustree_node* MakeWritable(Transaction& transaction);
164 	const bplustree_header* SetToHeader();
165 	bplustree_header* SetToWritableHeader(Transaction& transaction);
166 	bplustree_header* MakeWritableHeader(Transaction& transaction);
167 
168 	void UnsetUnchanged(Transaction& transaction);
169 	void Unset();
170 
171 	status_t Free(Transaction& transaction, off_t offset);
172 	status_t Allocate(Transaction& transaction, bplustree_node** node,
173 		off_t* offset);
174 
175 	bool IsWritable() const { return fWritable; }
176 	bplustree_node* Node() const { return fNode; }
177 
178 protected:
179 	bplustree_node*	InternalSetTo(Transaction* transaction, off_t offset);
180 
181 	BPlusTree*		fTree;
182 	bplustree_node*	fNode;
183 	off_t			fOffset;
184 	off_t			fBlockNumber;
185 	bool			fWritable;
186 };
187 
188 
189 class BPlusTree {
190 public:
191 						BPlusTree(Transaction& transaction, Inode* stream,
192 							int32 nodeSize = BPLUSTREE_NODE_SIZE);
193 						BPlusTree(Inode* stream);
194 						BPlusTree();
195 						~BPlusTree();
196 
197 			status_t	SetTo(Transaction& transaction, Inode* stream,
198 							int32 nodeSize = BPLUSTREE_NODE_SIZE);
199 			status_t	SetTo(Inode* stream);
200 			status_t	SetStream(Inode* stream);
201 
202 			status_t	InitCheck();
203 			status_t	Validate();
204 
205 			status_t	Remove(Transaction& transaction, const uint8* key,
206 							uint16 keyLength, off_t value);
207 			status_t	Insert(Transaction& transaction, const uint8* key,
208 							uint16 keyLength, off_t value);
209 
210 			status_t	Remove(Transaction& transaction, const char* key,
211 							off_t value);
212 			status_t	Insert(Transaction& transaction, const char* key,
213 							off_t value);
214 			status_t	Insert(Transaction& transaction, int32 key,
215 							off_t value);
216 			status_t	Insert(Transaction& transaction, uint32 key,
217 							off_t value);
218 			status_t	Insert(Transaction& transaction, int64 key,
219 							off_t value);
220 			status_t	Insert(Transaction& transaction, uint64 key,
221 							off_t value);
222 			status_t	Insert(Transaction& transaction, float key,
223 							off_t value);
224 			status_t	Insert(Transaction& transaction, double key,
225 							off_t value);
226 
227 			status_t	Replace(Transaction& transaction, const uint8* key,
228 							uint16 keyLength, off_t value);
229 			status_t	Find(const uint8* key, uint16 keyLength, off_t* value);
230 
231 	static	int32		TypeCodeToKeyType(type_code code);
232 	static	int32		ModeToKeyType(mode_t mode);
233 
234 private:
235 						BPlusTree(const BPlusTree& other);
236 						BPlusTree& operator=(const BPlusTree& other);
237 							// no implementation
238 
239 			int32		_CompareKeys(const void* key1, int keylength1,
240 							const void* key2, int keylength2);
241 			status_t	_FindKey(const bplustree_node* node, const uint8* key,
242 							uint16 keyLength, uint16* index = NULL,
243 							off_t* next = NULL);
244 			status_t	_SeekDown(Stack<node_and_key>& stack, const uint8* key,
245 							uint16 keyLength);
246 
247 			status_t	_FindFreeDuplicateFragment(Transaction& transaction,
248 							const bplustree_node* node, CachedNode& cached,
249 							off_t* _offset, bplustree_node** _fragment,
250 							uint32* _index);
251 			status_t	_InsertDuplicate(Transaction& transaction,
252 							CachedNode& cached, const bplustree_node* node,
253 							uint16 index, off_t value);
254 			void		_InsertKey(bplustree_node* node, uint16 index,
255 							uint8* key, uint16 keyLength, off_t value);
256 			status_t	_SplitNode(bplustree_node* node, off_t nodeOffset,
257 							bplustree_node* other, off_t otherOffset,
258 							uint16* _keyIndex, uint8* key, uint16* _keyLength,
259 							off_t* _value);
260 
261 			status_t	_RemoveDuplicate(Transaction& transaction,
262 							const bplustree_node* node, CachedNode& cached,
263 							uint16 keyIndex, off_t value);
264 			void		_RemoveKey(bplustree_node* node, uint16 index);
265 
266 			void		_UpdateIterators(off_t offset, off_t nextOffset,
267 							uint16 keyIndex, uint16 splitAt, int8 change);
268 			void		_AddIterator(TreeIterator* iterator);
269 			void		_RemoveIterator(TreeIterator* iterator);
270 
271 private:
272 	friend class TreeIterator;
273 	friend class CachedNode;
274 
275 			Inode*		fStream;
276 			const bplustree_header* fHeader;
277 			CachedNode	fCachedHeader;
278 			int32		fNodeSize;
279 			bool		fAllowDuplicates;
280 			status_t	fStatus;
281 			mutex		fIteratorLock;
282 			SinglyLinkedList<TreeIterator> fIterators;
283 };
284 
285 
286 //	#pragma mark - helper classes/functions
287 
288 extern int32 compareKeys(type_code type, const void* key1, int keyLength1,
289 	const void* key2, int keyLength2);
290 
291 class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> {
292 public:
293 						TreeIterator(BPlusTree* tree);
294 						~TreeIterator();
295 
296 			status_t	Goto(int8 to);
297 			status_t	Traverse(int8 direction, void* key, uint16* keyLength,
298 							uint16 maxLength, off_t* value,
299 							uint16* duplicate = NULL);
300 			status_t	Find(const uint8* key, uint16 keyLength);
301 
302 			status_t	Rewind();
303 			status_t	GetNextEntry(void* key, uint16* keyLength,
304 							uint16 maxLength, off_t* value,
305 							uint16* duplicate = NULL);
306 			status_t	GetPreviousEntry(void* key, uint16* keyLength,
307 							uint16 maxLength, off_t* value,
308 							uint16* duplicate = NULL);
309 			void		SkipDuplicates();
310 
311 			BPlusTree*	Tree() const { return fTree; }
312 
313 #ifdef DEBUG
314 			void		Dump();
315 #endif
316 
317 private:
318 	friend class BPlusTree;
319 
320 			// called by BPlusTree
321 			void		Update(off_t offset, off_t nextOffset, uint16 keyIndex,
322 							uint16 splitAt, int8 change);
323 			void		Stop();
324 
325 private:
326 			BPlusTree*	fTree;
327 			off_t		fCurrentNodeOffset;
328 							// traverse position
329 			int32		fCurrentKey;
330 			off_t		fDuplicateNode;
331 			uint16		fDuplicate, fNumDuplicates;
332 			bool		fIsFragment;
333 };
334 
335 
336 //	#pragma mark - BPlusTree's inline functions
337 //	(most of them may not be needed)
338 
339 
340 inline status_t
341 BPlusTree::Remove(Transaction& transaction, const char* key, off_t value)
342 {
343 	if (fHeader->data_type != BPLUSTREE_STRING_TYPE)
344 		return B_BAD_TYPE;
345 	return Remove(transaction, (uint8*)key, strlen(key), value);
346 }
347 
348 inline status_t
349 BPlusTree::Insert(Transaction& transaction, const char* key, off_t value)
350 {
351 	if (fHeader->data_type != BPLUSTREE_STRING_TYPE)
352 		return B_BAD_TYPE;
353 	return Insert(transaction, (uint8*)key, strlen(key), value);
354 }
355 
356 inline status_t
357 BPlusTree::Insert(Transaction& transaction, int32 key, off_t value)
358 {
359 	if (fHeader->data_type != BPLUSTREE_INT32_TYPE)
360 		return B_BAD_TYPE;
361 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
362 }
363 
364 inline status_t
365 BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value)
366 {
367 	if (fHeader->data_type != BPLUSTREE_UINT32_TYPE)
368 		return B_BAD_TYPE;
369 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
370 }
371 
372 inline status_t
373 BPlusTree::Insert(Transaction& transaction, int64 key, off_t value)
374 {
375 	if (fHeader->data_type != BPLUSTREE_INT64_TYPE)
376 		return B_BAD_TYPE;
377 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
378 }
379 
380 inline status_t
381 BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value)
382 {
383 	if (fHeader->data_type != BPLUSTREE_UINT64_TYPE)
384 		return B_BAD_TYPE;
385 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
386 }
387 
388 inline status_t
389 BPlusTree::Insert(Transaction& transaction, float key, off_t value)
390 {
391 	if (fHeader->data_type != BPLUSTREE_FLOAT_TYPE)
392 		return B_BAD_TYPE;
393 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
394 }
395 
396 inline status_t
397 BPlusTree::Insert(Transaction& transaction, double key, off_t value)
398 {
399 	if (fHeader->data_type != BPLUSTREE_DOUBLE_TYPE)
400 		return B_BAD_TYPE;
401 	return Insert(transaction, (uint8*)&key, sizeof(key), value);
402 }
403 
404 
405 //	#pragma mark - TreeIterator inline functions
406 
407 
408 inline status_t
409 TreeIterator::Rewind()
410 {
411 	return Goto(BPLUSTREE_BEGIN);
412 }
413 
414 inline status_t
415 TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength,
416 	off_t* value, uint16* duplicate)
417 {
418 	return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value,
419 		duplicate);
420 }
421 
422 inline status_t
423 TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength,
424 	off_t* value, uint16* duplicate)
425 {
426 	return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value,
427 		duplicate);
428 }
429 
430 
431 //	#pragma mark - bplustree_header inline functions
432 
433 
434 inline bool
435 bplustree_header::CheckNode(bplustree_node* node) const
436 {
437 	// sanity checks (links, all_key_count)
438 	return IsValidLink(node->LeftLink())
439 		&& IsValidLink(node->RightLink())
440 		&& IsValidLink(node->OverflowLink())
441 		&& (int8*)node->Values() + node->NumKeys() * sizeof(off_t)
442 				<= (int8*)node + NodeSize();
443 }
444 
445 
446 inline bool
447 bplustree_header::IsValidLink(off_t link) const
448 {
449 	return link == BPLUSTREE_NULL
450 		|| (link > 0 && link <= MaximumSize() - NodeSize());
451 }
452 
453 
454 //	#pragma mark - bplustree_node inline functions
455 
456 
457 inline uint16*
458 bplustree_node::KeyLengths() const
459 {
460 	return (uint16*)(((char*)this) + key_align(sizeof(bplustree_node)
461 		+ AllKeyLength()));
462 }
463 
464 
465 inline off_t*
466 bplustree_node::Values() const
467 {
468 	return (off_t*)((char*)KeyLengths() + NumKeys() * sizeof(uint16));
469 }
470 
471 
472 inline uint8*
473 bplustree_node::Keys() const
474 {
475 	return (uint8*)this + sizeof(bplustree_node);
476 }
477 
478 
479 inline int32
480 bplustree_node::Used() const
481 {
482 	return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys()
483 		* (sizeof(uint16) + sizeof(off_t));
484 }
485 
486 
487 inline bool
488 bplustree_node::IsLeaf() const
489 {
490 	return OverflowLink() == BPLUSTREE_NULL;
491 }
492 
493 
494 inline duplicate_array*
495 bplustree_node::FragmentAt(int8 index) const
496 {
497 	return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1));
498 }
499 
500 
501 inline duplicate_array*
502 bplustree_node::DuplicateArray() const
503 {
504 	return (duplicate_array*)&overflow_link;
505 }
506 
507 
508 inline uint8
509 bplustree_node::LinkType(off_t link)
510 {
511 	return *(uint64*)&link >> 62;
512 }
513 
514 
515 inline off_t
516 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
517 {
518 	return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL)
519 		| (fragmentIndex & 0x3ff);
520 }
521 
522 
523 inline bool
524 bplustree_node::IsDuplicate(off_t link)
525 {
526 	return (LinkType(link)
527 		& (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
528 }
529 
530 
531 inline off_t
532 bplustree_node::FragmentOffset(off_t link)
533 {
534 	return link & 0x3ffffffffffffc00LL;
535 }
536 
537 
538 inline uint32
539 bplustree_node::FragmentIndex(off_t link)
540 {
541 	return (uint32)(link & 0x3ff);
542 }
543 
544 
545 inline uint32
546 bplustree_node::MaxFragments(uint32 nodeSize)
547 {
548 	return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t));
549 }
550 
551 #endif	// B_PLUS_TREE_H
552