1 /* 2 * Copyright 2001-2012, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 #ifndef B_PLUS_TREE_H 6 #define B_PLUS_TREE_H 7 8 9 #include "bfs.h" 10 11 #include "Utility.h" 12 13 #if !_BOOT_MODE 14 #include "Journal.h" 15 class Inode; 16 #else 17 #define Inode BFS::Stream 18 19 namespace BFS { 20 21 class Stream; 22 class Transaction; 23 class TransactionListener {}; 24 #endif // _BOOT_MODE 25 26 // #pragma mark - on-disk structures 27 28 struct bplustree_node; 29 30 #define BPLUSTREE_NULL -1LL 31 #define BPLUSTREE_FREE -2LL 32 33 struct bplustree_header { 34 uint32 magic; 35 uint32 node_size; 36 uint32 max_number_of_levels; 37 uint32 data_type; 38 int64 root_node_pointer; 39 int64 free_node_pointer; 40 int64 maximum_size; 41 42 uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); } 43 uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); } 44 uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); } 45 off_t RootNode() const 46 { return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); } 47 off_t FreeNode() const 48 { return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); } 49 off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); } 50 uint32 MaxNumberOfLevels() const 51 { return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); } 52 53 inline bool CheckNode(bplustree_node* node) const; 54 inline bool IsValidLink(off_t link) const; 55 bool IsValid() const; 56 } _PACKED; 57 58 #define BPLUSTREE_MAGIC 0x69f6c2e8 59 #define BPLUSTREE_NODE_SIZE 1024 60 #define BPLUSTREE_MAX_KEY_LENGTH 256 61 #define BPLUSTREE_MIN_KEY_LENGTH 1 62 63 enum bplustree_types { 64 BPLUSTREE_STRING_TYPE = 0, 65 BPLUSTREE_INT32_TYPE = 1, 66 BPLUSTREE_UINT32_TYPE = 2, 67 BPLUSTREE_INT64_TYPE = 3, 68 BPLUSTREE_UINT64_TYPE = 4, 69 BPLUSTREE_FLOAT_TYPE = 5, 70 BPLUSTREE_DOUBLE_TYPE = 6 71 }; 72 73 74 struct duplicate_array; 75 76 77 struct bplustree_node { 78 int64 left_link; 79 int64 right_link; 80 int64 overflow_link; 81 uint16 all_key_count; 82 uint16 all_key_length; 83 84 off_t LeftLink() const 85 { return BFS_ENDIAN_TO_HOST_INT64( 86 left_link); } 87 off_t RightLink() const 88 { return BFS_ENDIAN_TO_HOST_INT64( 89 right_link); } 90 off_t OverflowLink() const 91 { return BFS_ENDIAN_TO_HOST_INT64( 92 overflow_link); } 93 uint16 NumKeys() const 94 { return BFS_ENDIAN_TO_HOST_INT16( 95 all_key_count); } 96 uint16 AllKeyLength() const 97 { return BFS_ENDIAN_TO_HOST_INT16( 98 all_key_length); } 99 100 inline uint16* KeyLengths() const; 101 inline off_t* Values() const; 102 inline uint8* Keys() const; 103 inline int32 Used() const; 104 uint8* KeyAt(int32 index, uint16* keyLength) const; 105 106 inline bool IsLeaf() const; 107 108 void Initialize(); 109 uint8 CountDuplicates(off_t offset, 110 bool isFragment) const; 111 off_t DuplicateAt(off_t offset, bool isFragment, 112 int8 index) const; 113 uint32 FragmentsUsed(uint32 nodeSize) const; 114 inline duplicate_array* FragmentAt(int8 index) const; 115 inline duplicate_array* DuplicateArray() const; 116 117 static inline uint8 LinkType(off_t link); 118 static inline off_t MakeLink(uint8 type, off_t link, 119 uint32 fragmentIndex = 0); 120 static inline bool IsDuplicate(off_t link); 121 static inline off_t FragmentOffset(off_t link); 122 static inline uint32 FragmentIndex(off_t link); 123 static inline uint32 MaxFragments(uint32 nodeSize); 124 125 status_t CheckIntegrity(uint32 nodeSize) const; 126 } _PACKED; 127 128 //#define BPLUSTREE_NODE 0 129 #define BPLUSTREE_DUPLICATE_NODE 2 130 #define BPLUSTREE_DUPLICATE_FRAGMENT 3 131 132 #define NUM_FRAGMENT_VALUES 7 133 #define NUM_DUPLICATE_VALUES 125 134 135 //************************************** 136 137 enum bplustree_traversing { 138 BPLUSTREE_FORWARD = 1, 139 BPLUSTREE_BACKWARD = -1, 140 141 BPLUSTREE_BEGIN = 0, 142 BPLUSTREE_END = 1 143 }; 144 145 146 // #pragma mark - in-memory structures 147 148 149 class BPlusTree; 150 struct TreeCheck; 151 class TreeIterator; 152 153 154 #if !_BOOT_MODE 155 // needed for searching (utilizing a stack) 156 struct node_and_key { 157 off_t nodeOffset; 158 uint16 keyIndex; 159 }; 160 #endif // !_BOOT_MODE 161 162 163 class CachedNode { 164 public: 165 CachedNode(BPlusTree* tree) 166 : 167 fTree(tree), 168 fNode(NULL) 169 { 170 #if _BOOT_MODE 171 fBlock = NULL; 172 #endif 173 } 174 175 CachedNode(BPlusTree* tree, off_t offset, bool check = true) 176 : 177 fTree(tree), 178 fNode(NULL) 179 { 180 #if _BOOT_MODE 181 fBlock = NULL; 182 #endif 183 SetTo(offset, check); 184 } 185 186 ~CachedNode() 187 { 188 Unset(); 189 #if _BOOT_MODE 190 free(fBlock); 191 #endif 192 } 193 194 const bplustree_node* SetTo(off_t offset, bool check = true); 195 status_t SetTo(off_t offset, 196 const bplustree_node** _node, 197 bool check = true); 198 199 const bplustree_header* SetToHeader(); 200 void Unset(); 201 202 #if !_BOOT_MODE 203 bplustree_node* SetToWritable(Transaction& transaction, 204 off_t offset, bool check = true); 205 bplustree_node* MakeWritable(Transaction& transaction); 206 bplustree_header* SetToWritableHeader(Transaction& transaction); 207 208 void UnsetUnchanged(Transaction& transaction); 209 210 status_t Free(Transaction& transaction, off_t offset); 211 status_t Allocate(Transaction& transaction, 212 bplustree_node** _node, off_t* _offset); 213 #endif // !_BOOT_MODE 214 215 bool IsWritable() const { return fWritable; } 216 bplustree_node* Node() const { return fNode; } 217 218 protected: 219 bplustree_node* InternalSetTo(Transaction* transaction, 220 off_t offset); 221 222 BPlusTree* fTree; 223 bplustree_node* fNode; 224 off_t fOffset; 225 off_t fBlockNumber; 226 bool fWritable; 227 #if _BOOT_MODE 228 uint8* fBlock; 229 #endif 230 }; 231 232 233 class BPlusTree : public TransactionListener { 234 public: 235 #if !_BOOT_MODE 236 BPlusTree(Transaction& transaction, 237 Inode* stream, 238 int32 nodeSize = BPLUSTREE_NODE_SIZE); 239 #endif 240 BPlusTree(Inode* stream); 241 BPlusTree(); 242 ~BPlusTree(); 243 244 #if !_BOOT_MODE 245 status_t SetTo(Transaction& transaction, Inode* stream, 246 int32 nodeSize = BPLUSTREE_NODE_SIZE); 247 #endif 248 status_t SetTo(Inode* stream); 249 status_t SetStream(Inode* stream); 250 251 status_t InitCheck(); 252 253 size_t NodeSize() const { return fNodeSize; } 254 Inode* Stream() const { return fStream; } 255 256 #if !_BOOT_MODE 257 status_t Validate(bool repair, bool& _errorsFound); 258 status_t MakeEmpty(); 259 260 status_t Remove(Transaction& transaction, 261 const uint8* key, uint16 keyLength, 262 off_t value); 263 status_t Insert(Transaction& transaction, 264 const uint8* key, uint16 keyLength, 265 off_t value); 266 267 status_t Remove(Transaction& transaction, const char* key, 268 off_t value); 269 status_t Insert(Transaction& transaction, const char* key, 270 off_t value); 271 status_t Insert(Transaction& transaction, int32 key, 272 off_t value); 273 status_t Insert(Transaction& transaction, uint32 key, 274 off_t value); 275 status_t Insert(Transaction& transaction, int64 key, 276 off_t value); 277 status_t Insert(Transaction& transaction, uint64 key, 278 off_t value); 279 status_t Insert(Transaction& transaction, float key, 280 off_t value); 281 status_t Insert(Transaction& transaction, double key, 282 off_t value); 283 284 status_t Replace(Transaction& transaction, 285 const uint8* key, uint16 keyLength, 286 off_t value); 287 #endif // !_BOOT_MODE 288 289 status_t Find(const uint8* key, uint16 keyLength, 290 off_t* value); 291 292 #if !_BOOT_MODE 293 static int32 TypeCodeToKeyType(type_code code); 294 static int32 ModeToKeyType(mode_t mode); 295 296 protected: 297 virtual void TransactionDone(bool success); 298 virtual void RemovedFromTransaction(); 299 #endif // !_BOOT_MODE 300 301 private: 302 BPlusTree(const BPlusTree& other); 303 BPlusTree& operator=(const BPlusTree& other); 304 // no implementation 305 306 int32 _CompareKeys(const void* key1, int keylength1, 307 const void* key2, int keylength2); 308 status_t _FindKey(const bplustree_node* node, 309 const uint8* key, uint16 keyLength, 310 uint16* index = NULL, off_t* next = NULL); 311 #if !_BOOT_MODE 312 status_t _SeekDown(Stack<node_and_key>& stack, 313 const uint8* key, uint16 keyLength); 314 315 status_t _FindFreeDuplicateFragment( 316 Transaction& transaction, 317 const bplustree_node* node, 318 CachedNode& cached, off_t* _offset, 319 bplustree_node** _fragment, uint32* _index); 320 status_t _InsertDuplicate(Transaction& transaction, 321 CachedNode& cached, 322 const bplustree_node* node, uint16 index, 323 off_t value); 324 void _InsertKey(bplustree_node* node, uint16 index, 325 uint8* key, uint16 keyLength, off_t value); 326 status_t _SplitNode(bplustree_node* node, 327 off_t nodeOffset, bplustree_node* other, 328 off_t otherOffset, uint16* _keyIndex, 329 uint8* key, uint16* _keyLength, 330 off_t* _value); 331 332 status_t _RemoveDuplicate(Transaction& transaction, 333 const bplustree_node* node, 334 CachedNode& cached, uint16 keyIndex, 335 off_t value); 336 void _RemoveKey(bplustree_node* node, uint16 index); 337 338 void _UpdateIterators(off_t offset, off_t nextOffset, 339 uint16 keyIndex, uint16 splitAt, 340 int8 change); 341 void _AddIterator(TreeIterator* iterator); 342 void _RemoveIterator(TreeIterator* iterator); 343 344 status_t _ValidateChildren(TreeCheck& check, 345 uint32 level, off_t offset, 346 const uint8* largestKey, uint16 keyLength, 347 const bplustree_node* parent); 348 status_t _ValidateChild(TreeCheck& check, 349 CachedNode& cached, uint32 level, 350 off_t offset, off_t lastOffset, 351 off_t nextOffset, const uint8* key, 352 uint16 keyLength); 353 #endif // !_BOOT_MODE 354 355 private: 356 friend class TreeIterator; 357 friend class CachedNode; 358 friend class TreeCheck; 359 360 Inode* fStream; 361 bplustree_header fHeader; 362 int32 fNodeSize; 363 bool fAllowDuplicates; 364 bool fInTransaction; 365 status_t fStatus; 366 367 #if !_BOOT_MODE 368 mutex fIteratorLock; 369 SinglyLinkedList<TreeIterator> fIterators; 370 #endif 371 }; 372 373 374 // #pragma mark - helper classes/functions 375 376 377 extern int32 compareKeys(type_code type, const void* key1, int keyLength1, 378 const void* key2, int keyLength2); 379 380 381 class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> { 382 public: 383 TreeIterator(BPlusTree* tree); 384 ~TreeIterator(); 385 386 status_t Goto(int8 to); 387 status_t Traverse(int8 direction, void* key, 388 uint16* keyLength, uint16 maxLength, 389 off_t* value, uint16* duplicate = NULL); 390 status_t Find(const uint8* key, uint16 keyLength); 391 392 status_t Rewind(); 393 status_t GetNextEntry(void* key, uint16* keyLength, 394 uint16 maxLength, off_t* value, 395 uint16* duplicate = NULL); 396 status_t GetPreviousEntry(void* key, uint16* keyLength, 397 uint16 maxLength, off_t* value, 398 uint16* duplicate = NULL); 399 void SkipDuplicates(); 400 401 BPlusTree* Tree() const { return fTree; } 402 403 #ifdef DEBUG 404 void Dump(); 405 #endif 406 407 private: 408 friend class BPlusTree; 409 410 // called by BPlusTree 411 void Update(off_t offset, off_t nextOffset, 412 uint16 keyIndex, uint16 splitAt, 413 int8 change); 414 void Stop(); 415 416 private: 417 BPlusTree* fTree; 418 off_t fCurrentNodeOffset; 419 // traverse position 420 int32 fCurrentKey; 421 off_t fDuplicateNode; 422 uint16 fDuplicate; 423 uint16 fNumDuplicates; 424 bool fIsFragment; 425 }; 426 427 428 // #pragma mark - BPlusTree's inline functions 429 // (most of them may not be needed) 430 431 432 #if !_BOOT_MODE 433 inline status_t 434 BPlusTree::Remove(Transaction& transaction, const char* key, off_t value) 435 { 436 if (fHeader.DataType() != BPLUSTREE_STRING_TYPE) 437 return B_BAD_TYPE; 438 return Remove(transaction, (uint8*)key, strlen(key), value); 439 } 440 441 442 inline status_t 443 BPlusTree::Insert(Transaction& transaction, const char* key, off_t value) 444 { 445 if (fHeader.DataType() != BPLUSTREE_STRING_TYPE) 446 return B_BAD_TYPE; 447 return Insert(transaction, (uint8*)key, strlen(key), value); 448 } 449 450 451 inline status_t 452 BPlusTree::Insert(Transaction& transaction, int32 key, off_t value) 453 { 454 if (fHeader.DataType() != BPLUSTREE_INT32_TYPE) 455 return B_BAD_TYPE; 456 return Insert(transaction, (uint8*)&key, sizeof(key), value); 457 } 458 459 460 inline status_t 461 BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value) 462 { 463 if (fHeader.DataType() != BPLUSTREE_UINT32_TYPE) 464 return B_BAD_TYPE; 465 return Insert(transaction, (uint8*)&key, sizeof(key), value); 466 } 467 468 469 inline status_t 470 BPlusTree::Insert(Transaction& transaction, int64 key, off_t value) 471 { 472 if (fHeader.DataType() != BPLUSTREE_INT64_TYPE) 473 return B_BAD_TYPE; 474 return Insert(transaction, (uint8*)&key, sizeof(key), value); 475 } 476 477 478 inline status_t 479 BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value) 480 { 481 if (fHeader.DataType() != BPLUSTREE_UINT64_TYPE) 482 return B_BAD_TYPE; 483 return Insert(transaction, (uint8*)&key, sizeof(key), value); 484 } 485 486 487 inline status_t 488 BPlusTree::Insert(Transaction& transaction, float key, off_t value) 489 { 490 if (fHeader.DataType() != BPLUSTREE_FLOAT_TYPE) 491 return B_BAD_TYPE; 492 return Insert(transaction, (uint8*)&key, sizeof(key), value); 493 } 494 495 496 inline status_t 497 BPlusTree::Insert(Transaction& transaction, double key, off_t value) 498 { 499 if (fHeader.DataType() != BPLUSTREE_DOUBLE_TYPE) 500 return B_BAD_TYPE; 501 return Insert(transaction, (uint8*)&key, sizeof(key), value); 502 } 503 #endif // !_BOOT_MODE 504 505 506 // #pragma mark - TreeIterator inline functions 507 508 509 inline status_t 510 TreeIterator::Rewind() 511 { 512 return Goto(BPLUSTREE_BEGIN); 513 } 514 515 516 inline status_t 517 TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength, 518 off_t* value, uint16* duplicate) 519 { 520 return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value, 521 duplicate); 522 } 523 524 525 inline status_t 526 TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength, 527 off_t* value, uint16* duplicate) 528 { 529 return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value, 530 duplicate); 531 } 532 533 534 // #pragma mark - bplustree_header inline functions 535 536 537 inline bool 538 bplustree_header::CheckNode(bplustree_node* node) const 539 { 540 // sanity checks (links, all_key_count) 541 return IsValidLink(node->LeftLink()) 542 && IsValidLink(node->RightLink()) 543 && IsValidLink(node->OverflowLink()) 544 && (int8*)node->Values() + node->NumKeys() * sizeof(off_t) 545 <= (int8*)node + NodeSize(); 546 } 547 548 549 inline bool 550 bplustree_header::IsValidLink(off_t link) const 551 { 552 return link == BPLUSTREE_NULL 553 || (link > 0 && link <= MaximumSize() - NodeSize()); 554 } 555 556 557 // #pragma mark - bplustree_node inline functions 558 559 560 inline uint16* 561 bplustree_node::KeyLengths() const 562 { 563 return (uint16*)(((char*)this) + key_align(sizeof(bplustree_node) 564 + AllKeyLength())); 565 } 566 567 568 inline off_t* 569 bplustree_node::Values() const 570 { 571 return (off_t*)((char*)KeyLengths() + NumKeys() * sizeof(uint16)); 572 } 573 574 575 inline uint8* 576 bplustree_node::Keys() const 577 { 578 return (uint8*)this + sizeof(bplustree_node); 579 } 580 581 582 inline int32 583 bplustree_node::Used() const 584 { 585 return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys() 586 * (sizeof(uint16) + sizeof(off_t)); 587 } 588 589 590 inline bool 591 bplustree_node::IsLeaf() const 592 { 593 return OverflowLink() == BPLUSTREE_NULL; 594 } 595 596 597 inline duplicate_array* 598 bplustree_node::FragmentAt(int8 index) const 599 { 600 return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1)); 601 } 602 603 604 inline duplicate_array* 605 bplustree_node::DuplicateArray() const 606 { 607 return (duplicate_array*)&overflow_link; 608 } 609 610 611 inline uint8 612 bplustree_node::LinkType(off_t link) 613 { 614 return *(uint64*)&link >> 62; 615 } 616 617 618 inline off_t 619 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex) 620 { 621 return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL) 622 | (fragmentIndex & 0x3ff); 623 } 624 625 626 inline bool 627 bplustree_node::IsDuplicate(off_t link) 628 { 629 return (LinkType(link) 630 & (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0; 631 } 632 633 634 inline off_t 635 bplustree_node::FragmentOffset(off_t link) 636 { 637 return link & 0x3ffffffffffffc00LL; 638 } 639 640 641 inline uint32 642 bplustree_node::FragmentIndex(off_t link) 643 { 644 return (uint32)(link & 0x3ff); 645 } 646 647 648 inline uint32 649 bplustree_node::MaxFragments(uint32 nodeSize) 650 { 651 return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t)); 652 } 653 654 655 #if _BOOT_MODE 656 } // namespace BFS 657 658 #undef Inode 659 #endif 660 661 #endif // B_PLUS_TREE_H 662