1 /* 2 * Copyright 2001-2012, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 #ifndef B_PLUS_TREE_H 6 #define B_PLUS_TREE_H 7 8 9 #include "bfs.h" 10 11 #include "Utility.h" 12 13 #if !_BOOT_MODE 14 #include "Journal.h" 15 class Inode; 16 #else 17 #define Inode BFS::Stream 18 19 namespace BFS { 20 21 class Stream; 22 class Transaction; 23 class TransactionListener {}; 24 #endif // _BOOT_MODE 25 26 // #pragma mark - on-disk structures 27 28 struct bplustree_node; 29 30 #define BPLUSTREE_NULL -1LL 31 #define BPLUSTREE_FREE -2LL 32 33 struct bplustree_header { 34 uint32 magic; 35 uint32 node_size; 36 uint32 max_number_of_levels; 37 uint32 data_type; 38 int64 root_node_pointer; 39 int64 free_node_pointer; 40 int64 maximum_size; 41 42 uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); } 43 uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); } 44 uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); } 45 off_t RootNode() const 46 { return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); } 47 off_t FreeNode() const 48 { return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); } 49 off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); } 50 uint32 MaxNumberOfLevels() const 51 { return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); } 52 53 inline bool CheckNode(bplustree_node* node) const; 54 inline bool IsValidLink(off_t link) const; 55 bool IsValid() const; 56 } _PACKED; 57 58 #define BPLUSTREE_MAGIC 0x69f6c2e8 59 #define BPLUSTREE_NODE_SIZE 1024 60 #define BPLUSTREE_MAX_KEY_LENGTH 256 61 #define BPLUSTREE_MIN_KEY_LENGTH 1 62 63 enum bplustree_types { 64 BPLUSTREE_STRING_TYPE = 0, 65 BPLUSTREE_INT32_TYPE = 1, 66 BPLUSTREE_UINT32_TYPE = 2, 67 BPLUSTREE_INT64_TYPE = 3, 68 BPLUSTREE_UINT64_TYPE = 4, 69 BPLUSTREE_FLOAT_TYPE = 5, 70 BPLUSTREE_DOUBLE_TYPE = 6 71 }; 72 73 74 struct duplicate_array; 75 76 77 struct bplustree_node { 78 int64 left_link; 79 int64 right_link; 80 int64 overflow_link; 81 uint16 all_key_count; 82 uint16 all_key_length; 83 84 off_t LeftLink() const 85 { return BFS_ENDIAN_TO_HOST_INT64( 86 left_link); } 87 off_t RightLink() const 88 { return BFS_ENDIAN_TO_HOST_INT64( 89 right_link); } 90 off_t OverflowLink() const 91 { return BFS_ENDIAN_TO_HOST_INT64( 92 overflow_link); } 93 uint16 NumKeys() const 94 { return BFS_ENDIAN_TO_HOST_INT16( 95 all_key_count); } 96 uint16 AllKeyLength() const 97 { return BFS_ENDIAN_TO_HOST_INT16( 98 all_key_length); } 99 100 inline uint16* KeyLengths() const; 101 inline off_t* Values() const; 102 inline uint8* Keys() const; 103 inline int32 Used() const; 104 uint8* KeyAt(int32 index, uint16* keyLength) const; 105 106 inline bool IsLeaf() const; 107 108 void Initialize(); 109 uint8 CountDuplicates(off_t offset, 110 bool isFragment) const; 111 off_t DuplicateAt(off_t offset, bool isFragment, 112 int8 index) const; 113 uint32 FragmentsUsed(uint32 nodeSize) const; 114 inline duplicate_array* FragmentAt(int8 index) const; 115 inline duplicate_array* DuplicateArray() const; 116 117 static inline uint8 LinkType(off_t link); 118 static inline off_t MakeLink(uint8 type, off_t link, 119 uint32 fragmentIndex = 0); 120 static inline bool IsDuplicate(off_t link); 121 static inline off_t FragmentOffset(off_t link); 122 static inline uint32 FragmentIndex(off_t link); 123 static inline uint32 MaxFragments(uint32 nodeSize); 124 125 status_t CheckIntegrity(uint32 nodeSize) const; 126 } _PACKED; 127 128 //#define BPLUSTREE_NODE 0 129 #define BPLUSTREE_DUPLICATE_NODE 2 130 #define BPLUSTREE_DUPLICATE_FRAGMENT 3 131 132 #define NUM_FRAGMENT_VALUES 7 133 #define NUM_DUPLICATE_VALUES 125 134 135 //************************************** 136 137 enum bplustree_traversing { 138 BPLUSTREE_FORWARD = 1, 139 BPLUSTREE_BACKWARD = -1, 140 141 BPLUSTREE_BEGIN = 0, 142 BPLUSTREE_END = 1 143 }; 144 145 146 // #pragma mark - in-memory structures 147 148 149 class BPlusTree; 150 struct TreeCheck; 151 class TreeIterator; 152 153 154 #if !_BOOT_MODE 155 template<class T> class Stack; 156 157 // needed for searching (utilizing a stack) 158 struct node_and_key { 159 off_t nodeOffset; 160 uint16 keyIndex; 161 }; 162 #endif // !_BOOT_MODE 163 164 165 class CachedNode { 166 public: 167 CachedNode(BPlusTree* tree) 168 : 169 fTree(tree), 170 fNode(NULL) 171 { 172 #if _BOOT_MODE 173 fBlock = NULL; 174 #endif 175 } 176 177 CachedNode(BPlusTree* tree, off_t offset, bool check = true) 178 : 179 fTree(tree), 180 fNode(NULL) 181 { 182 #if _BOOT_MODE 183 fBlock = NULL; 184 #endif 185 SetTo(offset, check); 186 } 187 188 ~CachedNode() 189 { 190 Unset(); 191 #if _BOOT_MODE 192 free(fBlock); 193 #endif 194 } 195 196 const bplustree_node* SetTo(off_t offset, bool check = true); 197 status_t SetTo(off_t offset, 198 const bplustree_node** _node, 199 bool check = true); 200 201 const bplustree_header* SetToHeader(); 202 void Unset(); 203 204 #if !_BOOT_MODE 205 bplustree_node* SetToWritable(Transaction& transaction, 206 off_t offset, bool check = true); 207 bplustree_node* MakeWritable(Transaction& transaction); 208 bplustree_header* SetToWritableHeader(Transaction& transaction); 209 210 void UnsetUnchanged(Transaction& transaction); 211 212 status_t Free(Transaction& transaction, off_t offset); 213 status_t Allocate(Transaction& transaction, 214 bplustree_node** _node, off_t* _offset); 215 #endif // !_BOOT_MODE 216 217 bool IsWritable() const { return fWritable; } 218 bplustree_node* Node() const { return fNode; } 219 220 protected: 221 bplustree_node* InternalSetTo(Transaction* transaction, 222 off_t offset); 223 224 BPlusTree* fTree; 225 bplustree_node* fNode; 226 off_t fOffset; 227 off_t fBlockNumber; 228 bool fWritable; 229 #if _BOOT_MODE 230 uint8* fBlock; 231 #endif 232 }; 233 234 235 class BPlusTree : public TransactionListener { 236 public: 237 #if !_BOOT_MODE 238 BPlusTree(Transaction& transaction, 239 Inode* stream, 240 int32 nodeSize = BPLUSTREE_NODE_SIZE); 241 #endif 242 BPlusTree(Inode* stream); 243 BPlusTree(); 244 ~BPlusTree(); 245 246 #if !_BOOT_MODE 247 status_t SetTo(Transaction& transaction, Inode* stream, 248 int32 nodeSize = BPLUSTREE_NODE_SIZE); 249 #endif 250 status_t SetTo(Inode* stream); 251 status_t SetStream(Inode* stream); 252 253 status_t InitCheck(); 254 255 size_t NodeSize() const { return fNodeSize; } 256 Inode* Stream() const { return fStream; } 257 258 #if !_BOOT_MODE 259 status_t Validate(bool repair, bool& _errorsFound); 260 status_t MakeEmpty(); 261 262 status_t Remove(Transaction& transaction, 263 const uint8* key, uint16 keyLength, 264 off_t value); 265 status_t Insert(Transaction& transaction, 266 const uint8* key, uint16 keyLength, 267 off_t value); 268 269 status_t Remove(Transaction& transaction, const char* key, 270 off_t value); 271 status_t Insert(Transaction& transaction, const char* key, 272 off_t value); 273 status_t Insert(Transaction& transaction, int32 key, 274 off_t value); 275 status_t Insert(Transaction& transaction, uint32 key, 276 off_t value); 277 status_t Insert(Transaction& transaction, int64 key, 278 off_t value); 279 status_t Insert(Transaction& transaction, uint64 key, 280 off_t value); 281 status_t Insert(Transaction& transaction, float key, 282 off_t value); 283 status_t Insert(Transaction& transaction, double key, 284 off_t value); 285 286 status_t Replace(Transaction& transaction, 287 const uint8* key, uint16 keyLength, 288 off_t value); 289 #endif // !_BOOT_MODE 290 291 status_t Find(const uint8* key, uint16 keyLength, 292 off_t* value); 293 294 #if !_BOOT_MODE 295 static int32 TypeCodeToKeyType(type_code code); 296 static int32 ModeToKeyType(mode_t mode); 297 298 protected: 299 virtual void TransactionDone(bool success); 300 virtual void RemovedFromTransaction(); 301 #endif // !_BOOT_MODE 302 303 private: 304 BPlusTree(const BPlusTree& other); 305 BPlusTree& operator=(const BPlusTree& other); 306 // no implementation 307 308 int32 _CompareKeys(const void* key1, int keylength1, 309 const void* key2, int keylength2); 310 status_t _FindKey(const bplustree_node* node, 311 const uint8* key, uint16 keyLength, 312 uint16* index = NULL, off_t* next = NULL); 313 #if !_BOOT_MODE 314 status_t _SeekDown(Stack<node_and_key>& stack, 315 const uint8* key, uint16 keyLength); 316 317 status_t _FindFreeDuplicateFragment( 318 Transaction& transaction, 319 const bplustree_node* node, 320 CachedNode& cached, off_t* _offset, 321 bplustree_node** _fragment, uint32* _index); 322 status_t _InsertDuplicate(Transaction& transaction, 323 CachedNode& cached, 324 const bplustree_node* node, uint16 index, 325 off_t value); 326 void _InsertKey(bplustree_node* node, uint16 index, 327 uint8* key, uint16 keyLength, off_t value); 328 status_t _SplitNode(bplustree_node* node, 329 off_t nodeOffset, bplustree_node* other, 330 off_t otherOffset, uint16* _keyIndex, 331 uint8* key, uint16* _keyLength, 332 off_t* _value); 333 334 status_t _RemoveDuplicate(Transaction& transaction, 335 const bplustree_node* node, 336 CachedNode& cached, uint16 keyIndex, 337 off_t value); 338 void _RemoveKey(bplustree_node* node, uint16 index); 339 340 void _UpdateIterators(off_t offset, off_t nextOffset, 341 uint16 keyIndex, uint16 splitAt, 342 int8 change); 343 void _AddIterator(TreeIterator* iterator); 344 void _RemoveIterator(TreeIterator* iterator); 345 346 status_t _ValidateChildren(TreeCheck& check, 347 uint32 level, off_t offset, 348 const uint8* largestKey, uint16 keyLength, 349 const bplustree_node* parent); 350 status_t _ValidateChild(TreeCheck& check, 351 CachedNode& cached, uint32 level, 352 off_t offset, off_t lastOffset, 353 off_t nextOffset, const uint8* key, 354 uint16 keyLength); 355 #endif // !_BOOT_MODE 356 357 private: 358 friend class TreeIterator; 359 friend class CachedNode; 360 friend class TreeCheck; 361 362 Inode* fStream; 363 bplustree_header fHeader; 364 int32 fNodeSize; 365 bool fAllowDuplicates; 366 bool fInTransaction; 367 status_t fStatus; 368 369 #if !_BOOT_MODE 370 mutex fIteratorLock; 371 SinglyLinkedList<TreeIterator> fIterators; 372 #endif 373 }; 374 375 376 // #pragma mark - helper classes/functions 377 378 379 extern int32 compareKeys(type_code type, const void* key1, int keyLength1, 380 const void* key2, int keyLength2); 381 382 383 class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> { 384 public: 385 TreeIterator(BPlusTree* tree); 386 ~TreeIterator(); 387 388 status_t Goto(int8 to); 389 status_t Traverse(int8 direction, void* key, 390 uint16* keyLength, uint16 maxLength, 391 off_t* value, uint16* duplicate = NULL); 392 status_t Find(const uint8* key, uint16 keyLength); 393 394 status_t Rewind(); 395 status_t GetNextEntry(void* key, uint16* keyLength, 396 uint16 maxLength, off_t* value, 397 uint16* duplicate = NULL); 398 status_t GetPreviousEntry(void* key, uint16* keyLength, 399 uint16 maxLength, off_t* value, 400 uint16* duplicate = NULL); 401 void SkipDuplicates(); 402 403 BPlusTree* Tree() const { return fTree; } 404 405 #ifdef DEBUG 406 void Dump(); 407 #endif 408 409 private: 410 friend class BPlusTree; 411 412 // called by BPlusTree 413 void Update(off_t offset, off_t nextOffset, 414 uint16 keyIndex, uint16 splitAt, 415 int8 change); 416 void Stop(); 417 418 private: 419 BPlusTree* fTree; 420 off_t fCurrentNodeOffset; 421 // traverse position 422 int32 fCurrentKey; 423 off_t fDuplicateNode; 424 uint16 fDuplicate; 425 uint16 fNumDuplicates; 426 bool fIsFragment; 427 }; 428 429 430 // #pragma mark - BPlusTree's inline functions 431 // (most of them may not be needed) 432 433 434 #if !_BOOT_MODE 435 inline status_t 436 BPlusTree::Remove(Transaction& transaction, const char* key, off_t value) 437 { 438 if (fHeader.DataType() != BPLUSTREE_STRING_TYPE) 439 return B_BAD_TYPE; 440 return Remove(transaction, (uint8*)key, strlen(key), value); 441 } 442 443 444 inline status_t 445 BPlusTree::Insert(Transaction& transaction, const char* key, off_t value) 446 { 447 if (fHeader.DataType() != BPLUSTREE_STRING_TYPE) 448 return B_BAD_TYPE; 449 return Insert(transaction, (uint8*)key, strlen(key), value); 450 } 451 452 453 inline status_t 454 BPlusTree::Insert(Transaction& transaction, int32 key, off_t value) 455 { 456 if (fHeader.DataType() != BPLUSTREE_INT32_TYPE) 457 return B_BAD_TYPE; 458 return Insert(transaction, (uint8*)&key, sizeof(key), value); 459 } 460 461 462 inline status_t 463 BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value) 464 { 465 if (fHeader.DataType() != BPLUSTREE_UINT32_TYPE) 466 return B_BAD_TYPE; 467 return Insert(transaction, (uint8*)&key, sizeof(key), value); 468 } 469 470 471 inline status_t 472 BPlusTree::Insert(Transaction& transaction, int64 key, off_t value) 473 { 474 if (fHeader.DataType() != BPLUSTREE_INT64_TYPE) 475 return B_BAD_TYPE; 476 return Insert(transaction, (uint8*)&key, sizeof(key), value); 477 } 478 479 480 inline status_t 481 BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value) 482 { 483 if (fHeader.DataType() != BPLUSTREE_UINT64_TYPE) 484 return B_BAD_TYPE; 485 return Insert(transaction, (uint8*)&key, sizeof(key), value); 486 } 487 488 489 inline status_t 490 BPlusTree::Insert(Transaction& transaction, float key, off_t value) 491 { 492 if (fHeader.DataType() != BPLUSTREE_FLOAT_TYPE) 493 return B_BAD_TYPE; 494 return Insert(transaction, (uint8*)&key, sizeof(key), value); 495 } 496 497 498 inline status_t 499 BPlusTree::Insert(Transaction& transaction, double key, off_t value) 500 { 501 if (fHeader.DataType() != BPLUSTREE_DOUBLE_TYPE) 502 return B_BAD_TYPE; 503 return Insert(transaction, (uint8*)&key, sizeof(key), value); 504 } 505 #endif // !_BOOT_MODE 506 507 508 // #pragma mark - TreeIterator inline functions 509 510 511 inline status_t 512 TreeIterator::Rewind() 513 { 514 return Goto(BPLUSTREE_BEGIN); 515 } 516 517 518 inline status_t 519 TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength, 520 off_t* value, uint16* duplicate) 521 { 522 return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value, 523 duplicate); 524 } 525 526 527 inline status_t 528 TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength, 529 off_t* value, uint16* duplicate) 530 { 531 return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value, 532 duplicate); 533 } 534 535 536 // #pragma mark - bplustree_header inline functions 537 538 539 inline bool 540 bplustree_header::CheckNode(bplustree_node* node) const 541 { 542 // sanity checks (links, all_key_count) 543 return IsValidLink(node->LeftLink()) 544 && IsValidLink(node->RightLink()) 545 && IsValidLink(node->OverflowLink()) 546 && (int8*)node->Values() + node->NumKeys() * sizeof(off_t) 547 <= (int8*)node + NodeSize(); 548 } 549 550 551 inline bool 552 bplustree_header::IsValidLink(off_t link) const 553 { 554 return link == BPLUSTREE_NULL 555 || (link > 0 && link <= MaximumSize() - NodeSize()); 556 } 557 558 559 // #pragma mark - bplustree_node inline functions 560 561 562 inline uint16* 563 bplustree_node::KeyLengths() const 564 { 565 return (uint16*)(((char*)this) + key_align(sizeof(bplustree_node) 566 + AllKeyLength())); 567 } 568 569 570 inline off_t* 571 bplustree_node::Values() const 572 { 573 return (off_t*)((char*)KeyLengths() + NumKeys() * sizeof(uint16)); 574 } 575 576 577 inline uint8* 578 bplustree_node::Keys() const 579 { 580 return (uint8*)this + sizeof(bplustree_node); 581 } 582 583 584 inline int32 585 bplustree_node::Used() const 586 { 587 return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys() 588 * (sizeof(uint16) + sizeof(off_t)); 589 } 590 591 592 inline bool 593 bplustree_node::IsLeaf() const 594 { 595 return OverflowLink() == BPLUSTREE_NULL; 596 } 597 598 599 inline duplicate_array* 600 bplustree_node::FragmentAt(int8 index) const 601 { 602 return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1)); 603 } 604 605 606 inline duplicate_array* 607 bplustree_node::DuplicateArray() const 608 { 609 return (duplicate_array*)&overflow_link; 610 } 611 612 613 inline uint8 614 bplustree_node::LinkType(off_t link) 615 { 616 return *(uint64*)&link >> 62; 617 } 618 619 620 inline off_t 621 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex) 622 { 623 return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL) 624 | (fragmentIndex & 0x3ff); 625 } 626 627 628 inline bool 629 bplustree_node::IsDuplicate(off_t link) 630 { 631 return (LinkType(link) 632 & (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0; 633 } 634 635 636 inline off_t 637 bplustree_node::FragmentOffset(off_t link) 638 { 639 return link & 0x3ffffffffffffc00LL; 640 } 641 642 643 inline uint32 644 bplustree_node::FragmentIndex(off_t link) 645 { 646 return (uint32)(link & 0x3ff); 647 } 648 649 650 inline uint32 651 bplustree_node::MaxFragments(uint32 nodeSize) 652 { 653 return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t)); 654 } 655 656 657 #if _BOOT_MODE 658 } // namespace BFS 659 660 #undef Inode 661 #endif 662 663 #endif // B_PLUS_TREE_H 664