1 /*
2 * Copyright 2001-2015, Axel Dörfler, axeld@pinc-software.de.
3 * This file may be used under the terms of the MIT License.
4 */
5 #ifndef B_PLUS_TREE_H
6 #define B_PLUS_TREE_H
7
8
9 #include "bfs.h"
10
11 #include "Utility.h"
12
13 #if !_BOOT_MODE
14 #include "Journal.h"
15 class Inode;
16 #else
17 #define Inode BFS::Stream
18
19 namespace BFS {
20
21 class Stream;
22 class Transaction;
23 class TransactionListener {};
24 #endif // _BOOT_MODE
25
26 // #pragma mark - on-disk structures
27
28 struct bplustree_node;
29
30 #define BPLUSTREE_NULL -1LL
31 #define BPLUSTREE_FREE -2LL
32
33 struct bplustree_header {
34 uint32 magic;
35 uint32 node_size;
36 uint32 max_number_of_levels;
37 uint32 data_type;
38 int64 root_node_pointer;
39 int64 free_node_pointer;
40 int64 maximum_size;
41
Magicbplustree_header42 uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); }
NodeSizebplustree_header43 uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); }
DataTypebplustree_header44 uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); }
RootNodebplustree_header45 off_t RootNode() const
46 { return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); }
FreeNodebplustree_header47 off_t FreeNode() const
48 { return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); }
MaximumSizebplustree_header49 off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); }
MaxNumberOfLevelsbplustree_header50 uint32 MaxNumberOfLevels() const
51 { return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); }
52
53 inline bool CheckNode(const bplustree_node* node) const;
54 inline bool IsValidLink(off_t link) const;
55 bool IsValid() const;
56 } _PACKED;
57
58 #define BPLUSTREE_MAGIC 0x69f6c2e8
59 #define BPLUSTREE_NODE_SIZE 1024
60 #define BPLUSTREE_MAX_KEY_LENGTH 256
61 #define BPLUSTREE_MIN_KEY_LENGTH 1
62
63 enum bplustree_types {
64 BPLUSTREE_STRING_TYPE = 0,
65 BPLUSTREE_INT32_TYPE = 1,
66 BPLUSTREE_UINT32_TYPE = 2,
67 BPLUSTREE_INT64_TYPE = 3,
68 BPLUSTREE_UINT64_TYPE = 4,
69 BPLUSTREE_FLOAT_TYPE = 5,
70 BPLUSTREE_DOUBLE_TYPE = 6
71 };
72
73
74 struct duplicate_array;
75
76
77 template <typename T>
78 struct __attribute__((packed)) Unaligned {
79 T value;
80
81 Unaligned<T>& operator=(const T& newValue)
82 {
83 value = newValue; return *this;
84 }
TUnaligned85 operator T() const { return value; }
86 };
87
88
89 struct bplustree_node {
90 int64 left_link;
91 int64 right_link;
92 int64 overflow_link;
93 uint16 all_key_count;
94 uint16 all_key_length;
95
LeftLinkbplustree_node96 off_t LeftLink() const
97 { return BFS_ENDIAN_TO_HOST_INT64(
98 left_link); }
RightLinkbplustree_node99 off_t RightLink() const
100 { return BFS_ENDIAN_TO_HOST_INT64(
101 right_link); }
OverflowLinkbplustree_node102 off_t OverflowLink() const
103 { return BFS_ENDIAN_TO_HOST_INT64(
104 overflow_link); }
NumKeysbplustree_node105 uint16 NumKeys() const
106 { return BFS_ENDIAN_TO_HOST_INT16(
107 all_key_count); }
AllKeyLengthbplustree_node108 uint16 AllKeyLength() const
109 { return BFS_ENDIAN_TO_HOST_INT16(
110 all_key_length); }
111
112 inline Unaligned<uint16>* KeyLengths() const;
113 inline Unaligned<off_t>* Values() const;
114 inline uint8* Keys() const;
115 inline int32 Used() const;
116 uint8* KeyAt(int32 index, uint16* keyLength) const;
117
118 inline bool IsLeaf() const;
119
120 void Initialize();
121 uint8 CountDuplicates(off_t offset,
122 bool isFragment) const;
123 off_t DuplicateAt(off_t offset, bool isFragment,
124 int8 index) const;
125 uint32 FragmentsUsed(uint32 nodeSize) const;
126 inline duplicate_array* FragmentAt(int8 index) const;
127 inline duplicate_array* DuplicateArray() const;
128
129 static inline uint8 LinkType(off_t link);
130 static inline off_t MakeLink(uint8 type, off_t link,
131 uint32 fragmentIndex = 0);
132 static inline bool IsDuplicate(off_t link);
133 static inline off_t FragmentOffset(off_t link);
134 static inline uint32 FragmentIndex(off_t link);
135 static inline uint32 MaxFragments(uint32 nodeSize);
136
137 status_t CheckIntegrity(uint32 nodeSize) const;
138 } _PACKED;
139
140 //#define BPLUSTREE_NODE 0
141 #define BPLUSTREE_DUPLICATE_NODE 2
142 #define BPLUSTREE_DUPLICATE_FRAGMENT 3
143
144 #define NUM_FRAGMENT_VALUES 7
145 #define NUM_DUPLICATE_VALUES 125
146
147 //**************************************
148
149 enum bplustree_traversing {
150 BPLUSTREE_FORWARD = 1,
151 BPLUSTREE_BACKWARD = -1,
152
153 BPLUSTREE_BEGIN = 0,
154 BPLUSTREE_END = 1
155 };
156
157
158 // #pragma mark - in-memory structures
159
160
161 class BPlusTree;
162 struct TreeCheck;
163 class TreeIterator;
164
165
166 #if !_BOOT_MODE
167 // needed for searching (utilizing a stack)
168 struct node_and_key {
169 off_t nodeOffset;
170 uint16 keyIndex;
171 };
172 #endif // !_BOOT_MODE
173
174
175 class CachedNode {
176 public:
CachedNode(BPlusTree * tree)177 CachedNode(BPlusTree* tree)
178 :
179 fTree(tree),
180 fNode(NULL),
181 fOffset(0),
182 fBlockNumber(0),
183 fWritable(false)
184 {
185 #if _BOOT_MODE
186 fBlock = NULL;
187 #endif
188 }
189
190 CachedNode(BPlusTree* tree, off_t offset, bool check = true)
191 :
fTree(tree)192 fTree(tree),
193 fNode(NULL),
194 fOffset(0),
195 fBlockNumber(0),
196 fWritable(false)
197 {
198 #if _BOOT_MODE
199 fBlock = NULL;
200 #endif
201 SetTo(offset, check);
202 }
203
~CachedNode()204 ~CachedNode()
205 {
206 Unset();
207 #if _BOOT_MODE
208 free(fBlock);
209 #endif
210 }
211
212 const bplustree_node* SetTo(off_t offset, bool check = true);
213 status_t SetTo(off_t offset,
214 const bplustree_node** _node,
215 bool check = true);
216
217 const bplustree_header* SetToHeader();
218 void Unset();
219
220 #if !_BOOT_MODE
221 bplustree_node* SetToWritable(Transaction& transaction,
222 off_t offset, bool check = true);
223 bplustree_node* MakeWritable(Transaction& transaction);
224 bplustree_header* SetToWritableHeader(Transaction& transaction);
225
226 void UnsetUnchanged(Transaction& transaction);
227
228 status_t Free(Transaction& transaction, off_t offset);
229 status_t Allocate(Transaction& transaction,
230 bplustree_node** _node, off_t* _offset);
231 #endif // !_BOOT_MODE
232
IsWritable()233 bool IsWritable() const { return fWritable; }
Node()234 bplustree_node* Node() const { return fNode; }
235
236 protected:
237 bplustree_node* InternalSetTo(Transaction* transaction,
238 off_t offset);
239
240 BPlusTree* fTree;
241 bplustree_node* fNode;
242 off_t fOffset;
243 off_t fBlockNumber;
244 bool fWritable;
245 #if _BOOT_MODE
246 uint8* fBlock;
247 #endif
248 };
249
250
251 class BPlusTree : public TransactionListener {
252 public:
253 #if !_BOOT_MODE
254 BPlusTree(Transaction& transaction,
255 Inode* stream,
256 int32 nodeSize = BPLUSTREE_NODE_SIZE);
257 #endif
258 BPlusTree(Inode* stream);
259 BPlusTree();
260 ~BPlusTree();
261
262 #if !_BOOT_MODE
263 status_t SetTo(Transaction& transaction, Inode* stream,
264 int32 nodeSize = BPLUSTREE_NODE_SIZE);
265 #endif
266 status_t SetTo(Inode* stream);
267 status_t SetStream(Inode* stream);
268
269 status_t InitCheck();
270
NodeSize()271 size_t NodeSize() const { return fNodeSize; }
Stream()272 Inode* Stream() const { return fStream; }
273
274 #if !_BOOT_MODE
275 status_t Validate(bool repair, bool& _errorsFound);
276 status_t MakeEmpty();
277
278 status_t Remove(Transaction& transaction,
279 const uint8* key, uint16 keyLength,
280 off_t value);
281 status_t Insert(Transaction& transaction,
282 const uint8* key, uint16 keyLength,
283 off_t value);
284
285 status_t Remove(Transaction& transaction, const char* key,
286 off_t value);
287 status_t Insert(Transaction& transaction, const char* key,
288 off_t value);
289 status_t Insert(Transaction& transaction, int32 key,
290 off_t value);
291 status_t Insert(Transaction& transaction, uint32 key,
292 off_t value);
293 status_t Insert(Transaction& transaction, int64 key,
294 off_t value);
295 status_t Insert(Transaction& transaction, uint64 key,
296 off_t value);
297 status_t Insert(Transaction& transaction, float key,
298 off_t value);
299 status_t Insert(Transaction& transaction, double key,
300 off_t value);
301
302 status_t Replace(Transaction& transaction,
303 const uint8* key, uint16 keyLength,
304 off_t value);
305 #endif // !_BOOT_MODE
306
307 status_t Find(const uint8* key, uint16 keyLength,
308 off_t* value);
309
310 #if !_BOOT_MODE
311 static int32 TypeCodeToKeyType(type_code code);
312 static int32 ModeToKeyType(mode_t mode);
313
314 protected:
315 virtual void TransactionDone(bool success);
316 virtual void RemovedFromTransaction();
317 #endif // !_BOOT_MODE
318
319 private:
320 BPlusTree(const BPlusTree& other);
321 BPlusTree& operator=(const BPlusTree& other);
322 // no implementation
323
324 int32 _CompareKeys(const void* key1, int keylength1,
325 const void* key2, int keylength2);
326 status_t _FindKey(const bplustree_node* node,
327 const uint8* key, uint16 keyLength,
328 uint16* index = NULL, off_t* next = NULL);
329 #if !_BOOT_MODE
330 status_t _SeekDown(Stack<node_and_key>& stack,
331 const uint8* key, uint16 keyLength);
332
333 status_t _FindFreeDuplicateFragment(
334 Transaction& transaction,
335 const bplustree_node* node,
336 CachedNode& cached, off_t* _offset,
337 bplustree_node** _fragment, uint32* _index);
338 status_t _InsertDuplicate(Transaction& transaction,
339 CachedNode& cached,
340 const bplustree_node* node, uint16 index,
341 off_t value);
342 void _InsertKey(bplustree_node* node, uint16 index,
343 uint8* key, uint16 keyLength, off_t value);
344 status_t _SplitNode(bplustree_node* node,
345 off_t nodeOffset, bplustree_node* other,
346 off_t otherOffset, uint16* _keyIndex,
347 uint8* key, uint16* _keyLength,
348 off_t* _value);
349
350 status_t _RemoveDuplicate(Transaction& transaction,
351 const bplustree_node* node,
352 CachedNode& cached, uint16 keyIndex,
353 off_t value);
354 void _RemoveKey(bplustree_node* node, uint16 index);
355
356 void _UpdateIterators(off_t offset, off_t nextOffset,
357 uint16 keyIndex, uint16 splitAt,
358 int8 change);
359 void _AddIterator(TreeIterator* iterator);
360 void _RemoveIterator(TreeIterator* iterator);
361
362 status_t _ValidateChildren(TreeCheck& check,
363 uint32 level, off_t offset,
364 const uint8* largestKey, uint16 keyLength,
365 const bplustree_node* parent);
366 status_t _ValidateChild(TreeCheck& check,
367 CachedNode& cached, uint32 level,
368 off_t offset, off_t lastOffset,
369 off_t nextOffset, const uint8* key,
370 uint16 keyLength);
371 #endif // !_BOOT_MODE
372
373 private:
374 friend class TreeIterator;
375 friend class CachedNode;
376 friend struct TreeCheck;
377
378 Inode* fStream;
379 bplustree_header fHeader;
380 int32 fNodeSize;
381 bool fAllowDuplicates;
382 bool fInTransaction;
383 status_t fStatus;
384
385 #if !_BOOT_MODE
386 mutex fIteratorLock;
387 SinglyLinkedList<TreeIterator> fIterators;
388 #endif
389 };
390
391
392 // #pragma mark - helper classes/functions
393
394
395 class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> {
396 public:
397 TreeIterator(BPlusTree* tree);
398 ~TreeIterator();
399
400 status_t Goto(int8 to);
401 status_t Traverse(int8 direction, void* key,
402 uint16* keyLength, uint16 maxLength,
403 off_t* value, uint16* duplicate = NULL);
404 status_t Find(const uint8* key, uint16 keyLength);
405
406 status_t Rewind();
407 status_t GetNextEntry(void* key, uint16* keyLength,
408 uint16 maxLength, off_t* value,
409 uint16* duplicate = NULL);
410 status_t GetPreviousEntry(void* key, uint16* keyLength,
411 uint16 maxLength, off_t* value,
412 uint16* duplicate = NULL);
413 void SkipDuplicates();
414
Tree()415 BPlusTree* Tree() const { return fTree; }
416
417 #ifdef DEBUG
418 void Dump();
419 #endif
420
421 private:
422 friend class BPlusTree;
423
424 // called by BPlusTree
425 void Update(off_t offset, off_t nextOffset,
426 uint16 keyIndex, uint16 splitAt,
427 int8 change);
428 void Stop();
429
430 private:
431 BPlusTree* fTree;
432 off_t fCurrentNodeOffset;
433 // traverse position
434 int32 fCurrentKey;
435 off_t fDuplicateNode;
436 uint16 fDuplicate;
437 uint16 fNumDuplicates;
438 bool fIsFragment;
439 };
440
441
442 // #pragma mark - BPlusTree's inline functions
443 // (most of them may not be needed)
444
445
446 #if !_BOOT_MODE
447 inline status_t
Remove(Transaction & transaction,const char * key,off_t value)448 BPlusTree::Remove(Transaction& transaction, const char* key, off_t value)
449 {
450 if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
451 return B_BAD_TYPE;
452 return Remove(transaction, (uint8*)key, strlen(key), value);
453 }
454
455
456 inline status_t
Insert(Transaction & transaction,const char * key,off_t value)457 BPlusTree::Insert(Transaction& transaction, const char* key, off_t value)
458 {
459 if (fHeader.DataType() != BPLUSTREE_STRING_TYPE)
460 return B_BAD_TYPE;
461 return Insert(transaction, (uint8*)key, strlen(key), value);
462 }
463
464
465 inline status_t
Insert(Transaction & transaction,int32 key,off_t value)466 BPlusTree::Insert(Transaction& transaction, int32 key, off_t value)
467 {
468 if (fHeader.DataType() != BPLUSTREE_INT32_TYPE)
469 return B_BAD_TYPE;
470 return Insert(transaction, (uint8*)&key, sizeof(key), value);
471 }
472
473
474 inline status_t
Insert(Transaction & transaction,uint32 key,off_t value)475 BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value)
476 {
477 if (fHeader.DataType() != BPLUSTREE_UINT32_TYPE)
478 return B_BAD_TYPE;
479 return Insert(transaction, (uint8*)&key, sizeof(key), value);
480 }
481
482
483 inline status_t
Insert(Transaction & transaction,int64 key,off_t value)484 BPlusTree::Insert(Transaction& transaction, int64 key, off_t value)
485 {
486 if (fHeader.DataType() != BPLUSTREE_INT64_TYPE)
487 return B_BAD_TYPE;
488 return Insert(transaction, (uint8*)&key, sizeof(key), value);
489 }
490
491
492 inline status_t
Insert(Transaction & transaction,uint64 key,off_t value)493 BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value)
494 {
495 if (fHeader.DataType() != BPLUSTREE_UINT64_TYPE)
496 return B_BAD_TYPE;
497 return Insert(transaction, (uint8*)&key, sizeof(key), value);
498 }
499
500
501 inline status_t
Insert(Transaction & transaction,float key,off_t value)502 BPlusTree::Insert(Transaction& transaction, float key, off_t value)
503 {
504 if (fHeader.DataType() != BPLUSTREE_FLOAT_TYPE)
505 return B_BAD_TYPE;
506 return Insert(transaction, (uint8*)&key, sizeof(key), value);
507 }
508
509
510 inline status_t
Insert(Transaction & transaction,double key,off_t value)511 BPlusTree::Insert(Transaction& transaction, double key, off_t value)
512 {
513 if (fHeader.DataType() != BPLUSTREE_DOUBLE_TYPE)
514 return B_BAD_TYPE;
515 return Insert(transaction, (uint8*)&key, sizeof(key), value);
516 }
517 #endif // !_BOOT_MODE
518
519
520 // #pragma mark - TreeIterator inline functions
521
522
523 inline status_t
Rewind()524 TreeIterator::Rewind()
525 {
526 return Goto(BPLUSTREE_BEGIN);
527 }
528
529
530 inline status_t
GetNextEntry(void * key,uint16 * keyLength,uint16 maxLength,off_t * value,uint16 * duplicate)531 TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength,
532 off_t* value, uint16* duplicate)
533 {
534 return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value,
535 duplicate);
536 }
537
538
539 inline status_t
GetPreviousEntry(void * key,uint16 * keyLength,uint16 maxLength,off_t * value,uint16 * duplicate)540 TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength,
541 off_t* value, uint16* duplicate)
542 {
543 return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value,
544 duplicate);
545 }
546
547
548 // #pragma mark - bplustree_header inline functions
549
550
551 inline bool
CheckNode(const bplustree_node * node)552 bplustree_header::CheckNode(const bplustree_node* node) const
553 {
554 // sanity checks (links, all_key_count)
555 return IsValidLink(node->LeftLink())
556 && IsValidLink(node->RightLink())
557 && IsValidLink(node->OverflowLink())
558 && (int8*)node->Values() + node->NumKeys() * sizeof(off_t)
559 <= (int8*)node + NodeSize();
560 }
561
562
563 inline bool
IsValidLink(off_t link)564 bplustree_header::IsValidLink(off_t link) const
565 {
566 return link == BPLUSTREE_NULL
567 || (link > 0 && link <= MaximumSize() - NodeSize());
568 }
569
570
571 // #pragma mark - bplustree_node inline functions
572
573
574 inline Unaligned<uint16>*
KeyLengths()575 bplustree_node::KeyLengths() const
576 {
577 return (Unaligned<uint16>*)(((char*)this) + key_align(sizeof(bplustree_node)
578 + AllKeyLength()));
579 }
580
581
582 inline Unaligned<off_t>*
Values()583 bplustree_node::Values() const
584 {
585 return (Unaligned<off_t>*)(
586 (char*)KeyLengths() + NumKeys() * sizeof(uint16));
587 }
588
589
590 inline uint8*
Keys()591 bplustree_node::Keys() const
592 {
593 return (uint8*)this + sizeof(bplustree_node);
594 }
595
596
597 inline int32
Used()598 bplustree_node::Used() const
599 {
600 return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys()
601 * (sizeof(uint16) + sizeof(off_t));
602 }
603
604
605 inline bool
IsLeaf()606 bplustree_node::IsLeaf() const
607 {
608 return OverflowLink() == BPLUSTREE_NULL;
609 }
610
611
612 inline duplicate_array*
FragmentAt(int8 index)613 bplustree_node::FragmentAt(int8 index) const
614 {
615 return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1));
616 }
617
618
619 inline duplicate_array*
DuplicateArray()620 bplustree_node::DuplicateArray() const
621 {
622 return (duplicate_array*)&overflow_link;
623 }
624
625
626 inline uint8
LinkType(off_t link)627 bplustree_node::LinkType(off_t link)
628 {
629 return *(uint64*)&link >> 62;
630 }
631
632
633 inline off_t
MakeLink(uint8 type,off_t link,uint32 fragmentIndex)634 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex)
635 {
636 return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL)
637 | (fragmentIndex & 0x3ff);
638 }
639
640
641 inline bool
IsDuplicate(off_t link)642 bplustree_node::IsDuplicate(off_t link)
643 {
644 return (LinkType(link)
645 & (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0;
646 }
647
648
649 inline off_t
FragmentOffset(off_t link)650 bplustree_node::FragmentOffset(off_t link)
651 {
652 return link & 0x3ffffffffffffc00LL;
653 }
654
655
656 inline uint32
FragmentIndex(off_t link)657 bplustree_node::FragmentIndex(off_t link)
658 {
659 return (uint32)(link & 0x3ff);
660 }
661
662
663 inline uint32
MaxFragments(uint32 nodeSize)664 bplustree_node::MaxFragments(uint32 nodeSize)
665 {
666 return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t));
667 }
668
669
670 #if _BOOT_MODE
671 } // namespace BFS
672
673 #undef Inode
674 #endif
675
676 #endif // B_PLUS_TREE_H
677