xref: /haiku/src/add-ons/kernel/file_systems/bfs/BPlusTree.h (revision 652f187eee454506a61c85631eb5d8415f3e22d4)
1 /*
2  * Copyright 2001-2007, Axel Dörfler, axeld@pinc-software.de.
3  * This file may be used under the terms of the MIT License.
4  */
5 #ifndef B_PLUS_TREE_H
6 #define B_PLUS_TREE_H
7 
8 
9 #include "bfs.h"
10 #include "Journal.h"
11 #include "Chain.h"
12 
13 
14 //	#pragma mark - on-disk structures
15 
16 struct bplustree_node;
17 
18 #define BPLUSTREE_NULL			-1LL
19 #define BPLUSTREE_FREE			-2LL
20 
21 struct bplustree_header {
22 	uint32		magic;
23 	uint32		node_size;
24 	uint32		max_number_of_levels;
25 	uint32		data_type;
26 	off_t		root_node_pointer;
27 	off_t		free_node_pointer;
28 	off_t		maximum_size;
29 
30 	uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
31 	uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
32 	uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
33 	off_t RootNode() const { return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
34 	off_t FreeNode() const { return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
35 	off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
36 	uint32 MaxNumberOfLevels() const
37 		{ return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }
38 
39 	inline bool CheckNode(bplustree_node *node) const;
40 	inline bool IsValidLink(off_t link) const;
41 } _PACKED;
42 
43 #define BPLUSTREE_MAGIC 			0x69f6c2e8
44 #define BPLUSTREE_NODE_SIZE 		1024
45 #define BPLUSTREE_MAX_KEY_LENGTH	256
46 #define BPLUSTREE_MIN_KEY_LENGTH	1
47 
48 enum bplustree_types {
49 	BPLUSTREE_STRING_TYPE	= 0,
50 	BPLUSTREE_INT32_TYPE	= 1,
51 	BPLUSTREE_UINT32_TYPE	= 2,
52 	BPLUSTREE_INT64_TYPE	= 3,
53 	BPLUSTREE_UINT64_TYPE	= 4,
54 	BPLUSTREE_FLOAT_TYPE	= 5,
55 	BPLUSTREE_DOUBLE_TYPE	= 6
56 };
57 
58 struct sorted_array;
59 typedef sorted_array duplicate_array;
60 
61 struct bplustree_node {
62 	off_t	left_link;
63 	off_t	right_link;
64 	off_t	overflow_link;
65 	uint16	all_key_count;
66 	uint16	all_key_length;
67 
68 	off_t LeftLink() const { return BFS_ENDIAN_TO_HOST_INT64(left_link); }
69 	off_t RightLink() const { return BFS_ENDIAN_TO_HOST_INT64(right_link); }
70 	off_t OverflowLink() const { return BFS_ENDIAN_TO_HOST_INT64(overflow_link); }
71 	uint16 NumKeys() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_count); }
72 	uint16 AllKeyLength() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_length); }
73 
74 	inline uint16 *KeyLengths() const;
75 	inline off_t *Values() const;
76 	inline uint8 *Keys() const;
77 	inline int32 Used() const;
78 	uint8 *KeyAt(int32 index, uint16 *keyLength) const;
79 
80 	inline bool IsLeaf() const;
81 
82 	void Initialize();
83 	uint8 CountDuplicates(off_t offset, bool isFragment) const;
84 	off_t DuplicateAt(off_t offset, bool isFragment, int8 index) const;
85 	uint32 FragmentsUsed(uint32 nodeSize) const;
86 	inline duplicate_array *FragmentAt(int8 index) const;
87 	inline duplicate_array *DuplicateArray() const;
88 
89 	static inline uint8 LinkType(off_t link);
90 	static inline off_t MakeLink(uint8 type, off_t link, uint32 fragmentIndex = 0);
91 	static inline bool IsDuplicate(off_t link);
92 	static inline off_t FragmentOffset(off_t link);
93 	static inline uint32 FragmentIndex(off_t link);
94 	static inline uint32 MaxFragments(uint32 nodeSize);
95 
96 #ifdef DEBUG
97 	status_t CheckIntegrity(uint32 nodeSize) const;
98 #endif
99 } _PACKED;
100 
101 //#define BPLUSTREE_NODE 0
102 #define BPLUSTREE_DUPLICATE_NODE 2
103 #define BPLUSTREE_DUPLICATE_FRAGMENT 3
104 
105 #define NUM_FRAGMENT_VALUES 7
106 #define NUM_DUPLICATE_VALUES 125
107 
108 //**************************************
109 
110 enum bplustree_traversing {
111 	BPLUSTREE_FORWARD = 1,
112 	BPLUSTREE_BACKWARD = -1,
113 
114 	BPLUSTREE_BEGIN = 0,
115 	BPLUSTREE_END = 1
116 };
117 
118 
119 //	#pragma mark - in-memory structures
120 
121 template<class T> class Stack;
122 class BPlusTree;
123 class TreeIterator;
124 class CachedNode;
125 class Inode;
126 
127 // needed for searching (utilizing a stack)
128 struct node_and_key {
129 	off_t	nodeOffset;
130 	uint16	keyIndex;
131 };
132 
133 
134 class CachedNode {
135 	public:
136 		CachedNode(BPlusTree *tree)
137 			:
138 			fTree(tree),
139 			fNode(NULL)
140 		{
141 		}
142 
143 		CachedNode(BPlusTree *tree, off_t offset, bool check = true)
144 			:
145 			fTree(tree),
146 			fNode(NULL)
147 		{
148 			SetTo(offset, check);
149 		}
150 
151 		~CachedNode()
152 		{
153 			Unset();
154 		}
155 
156 		const bplustree_node *SetTo(off_t offset, bool check = true);
157 		bplustree_node *SetToWritable(Transaction &transaction, off_t offset,
158 			bool check = true);
159 		bplustree_node *MakeWritable(Transaction &transaction);
160 		const bplustree_header *SetToHeader();
161 		bplustree_header *SetToWritableHeader(Transaction &transaction);
162 		bplustree_header *MakeWritableHeader(Transaction &transaction);
163 
164 		void UnsetUnchanged(Transaction &transaction);
165 		void Unset();
166 
167 		status_t Free(Transaction &transaction, off_t offset);
168 		status_t Allocate(Transaction &transaction, bplustree_node **node,
169 			off_t *offset);
170 
171 		bool IsWritable() const { return fWritable; }
172 		bplustree_node *Node() const { return fNode; }
173 
174 	protected:
175 		bplustree_node	*InternalSetTo(Transaction *transaction, off_t offset);
176 
177 		BPlusTree		*fTree;
178 		bplustree_node	*fNode;
179 		off_t			fOffset;
180 		off_t			fBlockNumber;
181 		bool			fWritable;
182 };
183 
184 
185 class BPlusTree {
186 	public:
187 		BPlusTree(Transaction &transaction, Inode *stream,
188 			int32 nodeSize = BPLUSTREE_NODE_SIZE);
189 		BPlusTree(Inode *stream);
190 		BPlusTree();
191 		~BPlusTree();
192 
193 		status_t	SetTo(Transaction &transaction, Inode *stream,
194 						int32 nodeSize = BPLUSTREE_NODE_SIZE);
195 		status_t	SetTo(Inode *stream);
196 		status_t	SetStream(Inode *stream);
197 
198 		status_t	InitCheck();
199 		status_t	Validate();
200 
201 		status_t	Remove(Transaction &transaction, const uint8 *key,
202 						uint16 keyLength, off_t value);
203 		status_t	Insert(Transaction &transaction, const uint8 *key,
204 						uint16 keyLength, off_t value);
205 
206 		status_t	Remove(Transaction &transaction, const char *key,
207 						off_t value);
208 		status_t	Insert(Transaction &transaction, const char *key,
209 						off_t value);
210 		status_t	Insert(Transaction &transaction, int32 key, off_t value);
211 		status_t	Insert(Transaction &transaction, uint32 key, off_t value);
212 		status_t	Insert(Transaction &transaction, int64 key, off_t value);
213 		status_t	Insert(Transaction &transaction, uint64 key, off_t value);
214 		status_t	Insert(Transaction &transaction, float key, off_t value);
215 		status_t	Insert(Transaction &transaction, double key, off_t value);
216 
217 		status_t	Replace(Transaction &transaction, const uint8 *key,
218 						uint16 keyLength, off_t value);
219 		status_t	Find(const uint8 *key, uint16 keyLength, off_t *value);
220 
221 		static int32 TypeCodeToKeyType(type_code code);
222 		static int32 ModeToKeyType(mode_t mode);
223 
224 	private:
225 		BPlusTree(const BPlusTree &);
226 		BPlusTree &operator=(const BPlusTree &);
227 			// no implementation
228 
229 		int32		_CompareKeys(const void *key1, int keylength1,
230 						const void *key2, int keylength2);
231 		status_t	_FindKey(const bplustree_node *node, const uint8 *key,
232 						uint16 keyLength, uint16 *index = NULL,
233 						off_t *next = NULL);
234 		status_t	_SeekDown(Stack<node_and_key> &stack, const uint8 *key,
235 						uint16 keyLength);
236 
237 		status_t	_FindFreeDuplicateFragment(Transaction &transaction,
238 						const bplustree_node *node, CachedNode &cached,
239 						off_t *_offset, bplustree_node **_fragment,
240 						uint32 *_index);
241 		status_t	_InsertDuplicate(Transaction &transaction,
242 						CachedNode &cached, const bplustree_node *node,
243 						uint16 index, off_t value);
244 		void		_InsertKey(bplustree_node *node, uint16 index, uint8 *key,
245 						uint16 keyLength, off_t value);
246 		status_t	_SplitNode(bplustree_node *node, off_t nodeOffset,
247 						bplustree_node *other, off_t otherOffset,
248 						uint16 *_keyIndex, uint8 *key, uint16 *_keyLength,
249 						off_t *_value);
250 
251 		status_t	_RemoveDuplicate(Transaction &transaction,
252 						const bplustree_node *node, CachedNode &cached,
253 						uint16 keyIndex, off_t value);
254 		void		_RemoveKey(bplustree_node *node, uint16 index);
255 
256 		void		_UpdateIterators(off_t offset, off_t nextOffset,
257 						uint16 keyIndex, uint16 splitAt, int8 change);
258 		void		_AddIterator(TreeIterator *iterator);
259 		void		_RemoveIterator(TreeIterator *iterator);
260 
261 	private:
262 		friend class TreeIterator;
263 		friend class CachedNode;
264 
265 		Inode		*fStream;
266 		const bplustree_header *fHeader;
267 		CachedNode	fCachedHeader;
268 		int32		fNodeSize;
269 		bool		fAllowDuplicates;
270 		status_t	fStatus;
271 		SimpleLock	fIteratorLock;
272 		Chain<TreeIterator> fIterators;
273 };
274 
275 
276 //	#pragma mark - helper classes/functions
277 
278 extern int32 compareKeys(type_code type, const void *key1, int keyLength1,
279 				const void *key2, int keyLength2);
280 
281 class TreeIterator {
282 	public:
283 		TreeIterator(BPlusTree *tree);
284 		~TreeIterator();
285 
286 		status_t	Goto(int8 to);
287 		status_t	Traverse(int8 direction, void *key, uint16 *keyLength,
288 						uint16 maxLength, off_t *value,
289 						uint16 *duplicate = NULL);
290 		status_t	Find(const uint8 *key, uint16 keyLength);
291 
292 		status_t	Rewind();
293 		status_t	GetNextEntry(void *key, uint16 *keyLength, uint16 maxLength,
294 						off_t *value, uint16 *duplicate = NULL);
295 		status_t	GetPreviousEntry(void *key, uint16 *keyLength,
296 						uint16 maxLength, off_t *value,
297 						uint16 *duplicate = NULL);
298 		void		SkipDuplicates();
299 
300 #ifdef DEBUG
301 		void		Dump();
302 #endif
303 
304 	private:
305 		BPlusTree	*fTree;
306 
307 		off_t		fCurrentNodeOffset;	// traverse position
308 		int32		fCurrentKey;
309 		off_t		fDuplicateNode;
310 		uint16		fDuplicate, fNumDuplicates;
311 		bool		fIsFragment;
312 
313 	private:
314 		friend class Chain<TreeIterator>;
315 		friend class BPlusTree;
316 
317 		void		Update(off_t offset, off_t nextOffset, uint16 keyIndex,
318 						uint16 splitAt, int8 change);
319 		void Stop();
320 		TreeIterator *fNext;
321 };
322 
323 
324 //	#pragma mark - BPlusTree's inline functions
325 //	(most of them may not be needed)
326 
327 
328 inline status_t
329 BPlusTree::Remove(Transaction &transaction, const char *key, off_t value)
330 {
331 	if (fHeader->data_type != BPLUSTREE_STRING_TYPE)
332 		return B_BAD_TYPE;
333 	return Remove(transaction, (uint8 *)key, strlen(key), value);
334 }
335 
336 inline status_t
337 BPlusTree::Insert(Transaction &transaction, const char *key, off_t value)
338 {
339 	if (fHeader->data_type != BPLUSTREE_STRING_TYPE)
340 		return B_BAD_TYPE;
341 	return Insert(transaction, (uint8 *)key, strlen(key), value);
342 }
343 
344 inline status_t
345 BPlusTree::Insert(Transaction &transaction, int32 key, off_t value)
346 {
347 	if (fHeader->data_type != BPLUSTREE_INT32_TYPE)
348 		return B_BAD_TYPE;
349 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
350 }
351 
352 inline status_t
353 BPlusTree::Insert(Transaction &transaction, uint32 key, off_t value)
354 {
355 	if (fHeader->data_type != BPLUSTREE_UINT32_TYPE)
356 		return B_BAD_TYPE;
357 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
358 }
359 
360 inline status_t
361 BPlusTree::Insert(Transaction &transaction, int64 key, off_t value)
362 {
363 	if (fHeader->data_type != BPLUSTREE_INT64_TYPE)
364 		return B_BAD_TYPE;
365 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
366 }
367 
368 inline status_t
369 BPlusTree::Insert(Transaction &transaction, uint64 key, off_t value)
370 {
371 	if (fHeader->data_type != BPLUSTREE_UINT64_TYPE)
372 		return B_BAD_TYPE;
373 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
374 }
375 
376 inline status_t
377 BPlusTree::Insert(Transaction &transaction, float key, off_t value)
378 {
379 	if (fHeader->data_type != BPLUSTREE_FLOAT_TYPE)
380 		return B_BAD_TYPE;
381 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
382 }
383 
384 inline status_t
385 BPlusTree::Insert(Transaction &transaction, double key, off_t value)
386 {
387 	if (fHeader->data_type != BPLUSTREE_DOUBLE_TYPE)
388 		return B_BAD_TYPE;
389 	return Insert(transaction, (uint8 *)&key, sizeof(key), value);
390 }
391 
392 
393 //	#pragma mark - TreeIterator inline functions
394 
395 
396 inline status_t
397 TreeIterator::Rewind()
398 {
399 	return Goto(BPLUSTREE_BEGIN);
400 }
401 
402 inline status_t
403 TreeIterator::GetNextEntry(void *key, uint16 *keyLength, uint16 maxLength,
404 	off_t *value, uint16 *duplicate)
405 {
406 	return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value, duplicate);
407 }
408 
409 inline status_t
410 TreeIterator::GetPreviousEntry(void *key, uint16 *keyLength, uint16 maxLength,
411 	off_t *value, uint16 *duplicate)
412 {
413 	return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value, duplicate);
414 }
415 
416 
417 //	#pragma mark - bplustree_header inline functions
418 
419 
420 inline bool
421 bplustree_header::CheckNode(bplustree_node *node) const
422 {
423 	// sanity checks (links, all_key_count)
424 	return IsValidLink(node->LeftLink())
425 		&& IsValidLink(node->RightLink())
426 		&& IsValidLink(node->OverflowLink())
427 		&& (int8 *)node->Values() + node->NumKeys() * sizeof(off_t) <= (int8 *)node + NodeSize();
428 }
429 
430 
431 inline bool
432 bplustree_header::IsValidLink(off_t link) const
433 {
434 	return link == BPLUSTREE_NULL || (link > 0 && link <= MaximumSize() - NodeSize());
435 }
436 
437 
438 //	#pragma mark - bplustree_node inline functions
439 
440 
441 inline uint16 *
442 bplustree_node::KeyLengths() const
443 {
444 	return (uint16 *)(((char *)this) + round_up(sizeof(bplustree_node)
445 		+ AllKeyLength()));
446 }
447 
448 
449 inline off_t *
450 bplustree_node::Values() const
451 {
452 	return (off_t *)((char *)KeyLengths() + NumKeys() * sizeof(uint16));
453 }
454 
455 
456 inline uint8 *
457 bplustree_node::Keys() const
458 {
459 	return (uint8 *)this + sizeof(bplustree_node);
460 }
461 
462 
463 inline int32
464 bplustree_node::Used() const
465 {
466 	return round_up(sizeof(bplustree_node) + AllKeyLength()) + NumKeys()
467 		* (sizeof(uint16) + sizeof(off_t));
468 }
469 
470 
471 inline bool
472 bplustree_node::IsLeaf() const
473 {
474 	return OverflowLink() == BPLUSTREE_NULL;
475 }
476 
477 
478 inline duplicate_array *
479 bplustree_node::FragmentAt(int8 index) const
480 {
481 	return (duplicate_array *)((off_t *)this
482 		+ index * (NUM_FRAGMENT_VALUES + 1));
483 }
484 
485 
486 inline duplicate_array *
487 bplustree_node::DuplicateArray() const
488 {
489 	return (duplicate_array *)&this->overflow_link;
490 }
491 
492 
493 inline uint8
494 bplustree_node::LinkType(off_t link)
495 {
496 	return *(uint64 *)&link >> 62;
497 }
498 
499 
500 inline off_t
501 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
502 {
503 	return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL)
504 		| (fragmentIndex & 0x3ff);
505 }
506 
507 
508 inline bool
509 bplustree_node::IsDuplicate(off_t link)
510 {
511 	return (LinkType(link)
512 		& (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
513 }
514 
515 
516 inline off_t
517 bplustree_node::FragmentOffset(off_t link)
518 {
519 	return link & 0x3ffffffffffffc00LL;
520 }
521 
522 
523 inline uint32
524 bplustree_node::FragmentIndex(off_t link)
525 {
526 	return (uint32)(link & 0x3ff);
527 }
528 
529 
530 inline uint32
531 bplustree_node::MaxFragments(uint32 nodeSize)
532 {
533 	return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t));
534 }
535 
536 #endif	// B_PLUS_TREE_H
537