1 /* 2 * Copyright 2001-2015, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 #ifndef B_PLUS_TREE_H 6 #define B_PLUS_TREE_H 7 8 9 #include "bfs.h" 10 11 #include "Utility.h" 12 13 #if !_BOOT_MODE 14 #include "Journal.h" 15 class Inode; 16 #else 17 #define Inode BFS::Stream 18 19 namespace BFS { 20 21 class Stream; 22 class Transaction; 23 class TransactionListener {}; 24 #endif // _BOOT_MODE 25 26 // #pragma mark - on-disk structures 27 28 struct bplustree_node; 29 30 #define BPLUSTREE_NULL -1LL 31 #define BPLUSTREE_FREE -2LL 32 33 struct bplustree_header { 34 uint32 magic; 35 uint32 node_size; 36 uint32 max_number_of_levels; 37 uint32 data_type; 38 int64 root_node_pointer; 39 int64 free_node_pointer; 40 int64 maximum_size; 41 42 uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); } 43 uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); } 44 uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); } 45 off_t RootNode() const 46 { return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); } 47 off_t FreeNode() const 48 { return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); } 49 off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); } 50 uint32 MaxNumberOfLevels() const 51 { return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); } 52 53 inline bool CheckNode(const bplustree_node* node) const; 54 inline bool IsValidLink(off_t link) const; 55 bool IsValid() const; 56 } _PACKED; 57 58 #define BPLUSTREE_MAGIC 0x69f6c2e8 59 #define BPLUSTREE_NODE_SIZE 1024 60 #define BPLUSTREE_MAX_KEY_LENGTH 256 61 #define BPLUSTREE_MIN_KEY_LENGTH 1 62 63 enum bplustree_types { 64 BPLUSTREE_STRING_TYPE = 0, 65 BPLUSTREE_INT32_TYPE = 1, 66 BPLUSTREE_UINT32_TYPE = 2, 67 BPLUSTREE_INT64_TYPE = 3, 68 BPLUSTREE_UINT64_TYPE = 4, 69 BPLUSTREE_FLOAT_TYPE = 5, 70 BPLUSTREE_DOUBLE_TYPE = 6 71 }; 72 73 74 struct duplicate_array; 75 76 77 template <typename T> 78 struct __attribute__((packed)) Unaligned { 79 T value; 80 81 Unaligned<T>& operator=(const T& newValue) 82 { 83 value = newValue; return *this; 84 } 85 operator T() const { return value; } 86 }; 87 88 89 struct bplustree_node { 90 int64 left_link; 91 int64 right_link; 92 int64 overflow_link; 93 uint16 all_key_count; 94 uint16 all_key_length; 95 96 off_t LeftLink() const 97 { return BFS_ENDIAN_TO_HOST_INT64( 98 left_link); } 99 off_t RightLink() const 100 { return BFS_ENDIAN_TO_HOST_INT64( 101 right_link); } 102 off_t OverflowLink() const 103 { return BFS_ENDIAN_TO_HOST_INT64( 104 overflow_link); } 105 uint16 NumKeys() const 106 { return BFS_ENDIAN_TO_HOST_INT16( 107 all_key_count); } 108 uint16 AllKeyLength() const 109 { return BFS_ENDIAN_TO_HOST_INT16( 110 all_key_length); } 111 112 inline Unaligned<uint16>* KeyLengths() const; 113 inline Unaligned<off_t>* Values() const; 114 inline uint8* Keys() const; 115 inline int32 Used() const; 116 uint8* KeyAt(int32 index, uint16* keyLength) const; 117 118 inline bool IsLeaf() const; 119 120 void Initialize(); 121 uint8 CountDuplicates(off_t offset, 122 bool isFragment) const; 123 off_t DuplicateAt(off_t offset, bool isFragment, 124 int8 index) const; 125 uint32 FragmentsUsed(uint32 nodeSize) const; 126 inline duplicate_array* FragmentAt(int8 index) const; 127 inline duplicate_array* DuplicateArray() const; 128 129 static inline uint8 LinkType(off_t link); 130 static inline off_t MakeLink(uint8 type, off_t link, 131 uint32 fragmentIndex = 0); 132 static inline bool IsDuplicate(off_t link); 133 static inline off_t FragmentOffset(off_t link); 134 static inline uint32 FragmentIndex(off_t link); 135 static inline uint32 MaxFragments(uint32 nodeSize); 136 137 status_t CheckIntegrity(uint32 nodeSize) const; 138 } _PACKED; 139 140 //#define BPLUSTREE_NODE 0 141 #define BPLUSTREE_DUPLICATE_NODE 2 142 #define BPLUSTREE_DUPLICATE_FRAGMENT 3 143 144 #define NUM_FRAGMENT_VALUES 7 145 #define NUM_DUPLICATE_VALUES 125 146 147 //************************************** 148 149 enum bplustree_traversing { 150 BPLUSTREE_FORWARD = 1, 151 BPLUSTREE_BACKWARD = -1, 152 153 BPLUSTREE_BEGIN = 0, 154 BPLUSTREE_END = 1 155 }; 156 157 158 // #pragma mark - in-memory structures 159 160 161 class BPlusTree; 162 struct TreeCheck; 163 class TreeIterator; 164 165 166 #if !_BOOT_MODE 167 // needed for searching (utilizing a stack) 168 struct node_and_key { 169 off_t nodeOffset; 170 uint16 keyIndex; 171 }; 172 #endif // !_BOOT_MODE 173 174 175 class CachedNode { 176 public: 177 CachedNode(BPlusTree* tree) 178 : 179 fTree(tree), 180 fNode(NULL), 181 fOffset(0), 182 fBlockNumber(0), 183 fWritable(false) 184 { 185 #if _BOOT_MODE 186 fBlock = NULL; 187 #endif 188 } 189 190 CachedNode(BPlusTree* tree, off_t offset, bool check = true) 191 : 192 fTree(tree), 193 fNode(NULL), 194 fOffset(0), 195 fBlockNumber(0), 196 fWritable(false) 197 { 198 #if _BOOT_MODE 199 fBlock = NULL; 200 #endif 201 SetTo(offset, check); 202 } 203 204 ~CachedNode() 205 { 206 Unset(); 207 #if _BOOT_MODE 208 free(fBlock); 209 #endif 210 } 211 212 const bplustree_node* SetTo(off_t offset, bool check = true); 213 status_t SetTo(off_t offset, 214 const bplustree_node** _node, 215 bool check = true); 216 217 const bplustree_header* SetToHeader(); 218 void Unset(); 219 220 #if !_BOOT_MODE 221 bplustree_node* SetToWritable(Transaction& transaction, 222 off_t offset, bool check = true); 223 bplustree_node* MakeWritable(Transaction& transaction); 224 bplustree_header* SetToWritableHeader(Transaction& transaction); 225 226 void UnsetUnchanged(Transaction& transaction); 227 228 status_t Free(Transaction& transaction, off_t offset); 229 status_t Allocate(Transaction& transaction, 230 bplustree_node** _node, off_t* _offset); 231 #endif // !_BOOT_MODE 232 233 bool IsWritable() const { return fWritable; } 234 bplustree_node* Node() const { return fNode; } 235 236 protected: 237 bplustree_node* InternalSetTo(Transaction* transaction, 238 off_t offset); 239 240 BPlusTree* fTree; 241 bplustree_node* fNode; 242 off_t fOffset; 243 off_t fBlockNumber; 244 bool fWritable; 245 #if _BOOT_MODE 246 uint8* fBlock; 247 #endif 248 }; 249 250 251 class BPlusTree : public TransactionListener { 252 public: 253 #if !_BOOT_MODE 254 BPlusTree(Transaction& transaction, 255 Inode* stream, 256 int32 nodeSize = BPLUSTREE_NODE_SIZE); 257 #endif 258 BPlusTree(Inode* stream); 259 BPlusTree(); 260 ~BPlusTree(); 261 262 #if !_BOOT_MODE 263 status_t SetTo(Transaction& transaction, Inode* stream, 264 int32 nodeSize = BPLUSTREE_NODE_SIZE); 265 #endif 266 status_t SetTo(Inode* stream); 267 status_t SetStream(Inode* stream); 268 269 status_t InitCheck(); 270 271 size_t NodeSize() const { return fNodeSize; } 272 Inode* Stream() const { return fStream; } 273 274 #if !_BOOT_MODE 275 status_t Validate(bool repair, bool& _errorsFound); 276 status_t MakeEmpty(); 277 278 status_t Remove(Transaction& transaction, 279 const uint8* key, uint16 keyLength, 280 off_t value); 281 status_t Insert(Transaction& transaction, 282 const uint8* key, uint16 keyLength, 283 off_t value); 284 285 status_t Remove(Transaction& transaction, const char* key, 286 off_t value); 287 status_t Insert(Transaction& transaction, const char* key, 288 off_t value); 289 status_t Insert(Transaction& transaction, int32 key, 290 off_t value); 291 status_t Insert(Transaction& transaction, uint32 key, 292 off_t value); 293 status_t Insert(Transaction& transaction, int64 key, 294 off_t value); 295 status_t Insert(Transaction& transaction, uint64 key, 296 off_t value); 297 status_t Insert(Transaction& transaction, float key, 298 off_t value); 299 status_t Insert(Transaction& transaction, double key, 300 off_t value); 301 302 status_t Replace(Transaction& transaction, 303 const uint8* key, uint16 keyLength, 304 off_t value); 305 #endif // !_BOOT_MODE 306 307 status_t Find(const uint8* key, uint16 keyLength, 308 off_t* value); 309 310 #if !_BOOT_MODE 311 static int32 TypeCodeToKeyType(type_code code); 312 static int32 ModeToKeyType(mode_t mode); 313 314 protected: 315 virtual void TransactionDone(bool success); 316 virtual void RemovedFromTransaction(); 317 #endif // !_BOOT_MODE 318 319 private: 320 BPlusTree(const BPlusTree& other); 321 BPlusTree& operator=(const BPlusTree& other); 322 // no implementation 323 324 int32 _CompareKeys(const void* key1, int keylength1, 325 const void* key2, int keylength2); 326 status_t _FindKey(const bplustree_node* node, 327 const uint8* key, uint16 keyLength, 328 uint16* index = NULL, off_t* next = NULL); 329 #if !_BOOT_MODE 330 status_t _SeekDown(Stack<node_and_key>& stack, 331 const uint8* key, uint16 keyLength); 332 333 status_t _FindFreeDuplicateFragment( 334 Transaction& transaction, 335 const bplustree_node* node, 336 CachedNode& cached, off_t* _offset, 337 bplustree_node** _fragment, uint32* _index); 338 status_t _InsertDuplicate(Transaction& transaction, 339 CachedNode& cached, 340 const bplustree_node* node, uint16 index, 341 off_t value); 342 void _InsertKey(bplustree_node* node, uint16 index, 343 uint8* key, uint16 keyLength, off_t value); 344 status_t _SplitNode(bplustree_node* node, 345 off_t nodeOffset, bplustree_node* other, 346 off_t otherOffset, uint16* _keyIndex, 347 uint8* key, uint16* _keyLength, 348 off_t* _value); 349 350 status_t _RemoveDuplicate(Transaction& transaction, 351 const bplustree_node* node, 352 CachedNode& cached, uint16 keyIndex, 353 off_t value); 354 void _RemoveKey(bplustree_node* node, uint16 index); 355 356 void _UpdateIterators(off_t offset, off_t nextOffset, 357 uint16 keyIndex, uint16 splitAt, 358 int8 change); 359 void _AddIterator(TreeIterator* iterator); 360 void _RemoveIterator(TreeIterator* iterator); 361 362 status_t _ValidateChildren(TreeCheck& check, 363 uint32 level, off_t offset, 364 const uint8* largestKey, uint16 keyLength, 365 const bplustree_node* parent); 366 status_t _ValidateChild(TreeCheck& check, 367 CachedNode& cached, uint32 level, 368 off_t offset, off_t lastOffset, 369 off_t nextOffset, const uint8* key, 370 uint16 keyLength); 371 #endif // !_BOOT_MODE 372 373 private: 374 friend class TreeIterator; 375 friend class CachedNode; 376 friend struct TreeCheck; 377 378 Inode* fStream; 379 bplustree_header fHeader; 380 int32 fNodeSize; 381 bool fAllowDuplicates; 382 bool fInTransaction; 383 status_t fStatus; 384 385 #if !_BOOT_MODE 386 mutex fIteratorLock; 387 SinglyLinkedList<TreeIterator> fIterators; 388 #endif 389 }; 390 391 392 // #pragma mark - helper classes/functions 393 394 395 class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> { 396 public: 397 TreeIterator(BPlusTree* tree); 398 ~TreeIterator(); 399 400 status_t Goto(int8 to); 401 status_t Traverse(int8 direction, void* key, 402 uint16* keyLength, uint16 maxLength, 403 off_t* value, uint16* duplicate = NULL); 404 status_t Find(const uint8* key, uint16 keyLength); 405 406 status_t Rewind(); 407 status_t GetNextEntry(void* key, uint16* keyLength, 408 uint16 maxLength, off_t* value, 409 uint16* duplicate = NULL); 410 status_t GetPreviousEntry(void* key, uint16* keyLength, 411 uint16 maxLength, off_t* value, 412 uint16* duplicate = NULL); 413 void SkipDuplicates(); 414 415 BPlusTree* Tree() const { return fTree; } 416 417 #ifdef DEBUG 418 void Dump(); 419 #endif 420 421 private: 422 friend class BPlusTree; 423 424 // called by BPlusTree 425 void Update(off_t offset, off_t nextOffset, 426 uint16 keyIndex, uint16 splitAt, 427 int8 change); 428 void Stop(); 429 430 private: 431 BPlusTree* fTree; 432 off_t fCurrentNodeOffset; 433 // traverse position 434 int32 fCurrentKey; 435 off_t fDuplicateNode; 436 uint16 fDuplicate; 437 uint16 fNumDuplicates; 438 bool fIsFragment; 439 }; 440 441 442 // #pragma mark - BPlusTree's inline functions 443 // (most of them may not be needed) 444 445 446 #if !_BOOT_MODE 447 inline status_t 448 BPlusTree::Remove(Transaction& transaction, const char* key, off_t value) 449 { 450 if (fHeader.DataType() != BPLUSTREE_STRING_TYPE) 451 return B_BAD_TYPE; 452 return Remove(transaction, (uint8*)key, strlen(key), value); 453 } 454 455 456 inline status_t 457 BPlusTree::Insert(Transaction& transaction, const char* key, off_t value) 458 { 459 if (fHeader.DataType() != BPLUSTREE_STRING_TYPE) 460 return B_BAD_TYPE; 461 return Insert(transaction, (uint8*)key, strlen(key), value); 462 } 463 464 465 inline status_t 466 BPlusTree::Insert(Transaction& transaction, int32 key, off_t value) 467 { 468 if (fHeader.DataType() != BPLUSTREE_INT32_TYPE) 469 return B_BAD_TYPE; 470 return Insert(transaction, (uint8*)&key, sizeof(key), value); 471 } 472 473 474 inline status_t 475 BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value) 476 { 477 if (fHeader.DataType() != BPLUSTREE_UINT32_TYPE) 478 return B_BAD_TYPE; 479 return Insert(transaction, (uint8*)&key, sizeof(key), value); 480 } 481 482 483 inline status_t 484 BPlusTree::Insert(Transaction& transaction, int64 key, off_t value) 485 { 486 if (fHeader.DataType() != BPLUSTREE_INT64_TYPE) 487 return B_BAD_TYPE; 488 return Insert(transaction, (uint8*)&key, sizeof(key), value); 489 } 490 491 492 inline status_t 493 BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value) 494 { 495 if (fHeader.DataType() != BPLUSTREE_UINT64_TYPE) 496 return B_BAD_TYPE; 497 return Insert(transaction, (uint8*)&key, sizeof(key), value); 498 } 499 500 501 inline status_t 502 BPlusTree::Insert(Transaction& transaction, float key, off_t value) 503 { 504 if (fHeader.DataType() != BPLUSTREE_FLOAT_TYPE) 505 return B_BAD_TYPE; 506 return Insert(transaction, (uint8*)&key, sizeof(key), value); 507 } 508 509 510 inline status_t 511 BPlusTree::Insert(Transaction& transaction, double key, off_t value) 512 { 513 if (fHeader.DataType() != BPLUSTREE_DOUBLE_TYPE) 514 return B_BAD_TYPE; 515 return Insert(transaction, (uint8*)&key, sizeof(key), value); 516 } 517 #endif // !_BOOT_MODE 518 519 520 // #pragma mark - TreeIterator inline functions 521 522 523 inline status_t 524 TreeIterator::Rewind() 525 { 526 return Goto(BPLUSTREE_BEGIN); 527 } 528 529 530 inline status_t 531 TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength, 532 off_t* value, uint16* duplicate) 533 { 534 return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value, 535 duplicate); 536 } 537 538 539 inline status_t 540 TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength, 541 off_t* value, uint16* duplicate) 542 { 543 return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value, 544 duplicate); 545 } 546 547 548 // #pragma mark - bplustree_header inline functions 549 550 551 inline bool 552 bplustree_header::CheckNode(const bplustree_node* node) const 553 { 554 // sanity checks (links, all_key_count) 555 return IsValidLink(node->LeftLink()) 556 && IsValidLink(node->RightLink()) 557 && IsValidLink(node->OverflowLink()) 558 && (int8*)node->Values() + node->NumKeys() * sizeof(off_t) 559 <= (int8*)node + NodeSize(); 560 } 561 562 563 inline bool 564 bplustree_header::IsValidLink(off_t link) const 565 { 566 return link == BPLUSTREE_NULL 567 || (link > 0 && link <= MaximumSize() - NodeSize()); 568 } 569 570 571 // #pragma mark - bplustree_node inline functions 572 573 574 inline Unaligned<uint16>* 575 bplustree_node::KeyLengths() const 576 { 577 return (Unaligned<uint16>*)(((char*)this) + key_align(sizeof(bplustree_node) 578 + AllKeyLength())); 579 } 580 581 582 inline Unaligned<off_t>* 583 bplustree_node::Values() const 584 { 585 return (Unaligned<off_t>*)( 586 (char*)KeyLengths() + NumKeys() * sizeof(uint16)); 587 } 588 589 590 inline uint8* 591 bplustree_node::Keys() const 592 { 593 return (uint8*)this + sizeof(bplustree_node); 594 } 595 596 597 inline int32 598 bplustree_node::Used() const 599 { 600 return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys() 601 * (sizeof(uint16) + sizeof(off_t)); 602 } 603 604 605 inline bool 606 bplustree_node::IsLeaf() const 607 { 608 return OverflowLink() == BPLUSTREE_NULL; 609 } 610 611 612 inline duplicate_array* 613 bplustree_node::FragmentAt(int8 index) const 614 { 615 return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1)); 616 } 617 618 619 inline duplicate_array* 620 bplustree_node::DuplicateArray() const 621 { 622 return (duplicate_array*)&overflow_link; 623 } 624 625 626 inline uint8 627 bplustree_node::LinkType(off_t link) 628 { 629 return *(uint64*)&link >> 62; 630 } 631 632 633 inline off_t 634 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex) 635 { 636 return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL) 637 | (fragmentIndex & 0x3ff); 638 } 639 640 641 inline bool 642 bplustree_node::IsDuplicate(off_t link) 643 { 644 return (LinkType(link) 645 & (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0; 646 } 647 648 649 inline off_t 650 bplustree_node::FragmentOffset(off_t link) 651 { 652 return link & 0x3ffffffffffffc00LL; 653 } 654 655 656 inline uint32 657 bplustree_node::FragmentIndex(off_t link) 658 { 659 return (uint32)(link & 0x3ff); 660 } 661 662 663 inline uint32 664 bplustree_node::MaxFragments(uint32 nodeSize) 665 { 666 return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t)); 667 } 668 669 670 #if _BOOT_MODE 671 } // namespace BFS 672 673 #undef Inode 674 #endif 675 676 #endif // B_PLUS_TREE_H 677