1 /* 2 * Copyright 2001-2012, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 #ifndef B_PLUS_TREE_H 6 #define B_PLUS_TREE_H 7 8 9 #include "bfs.h" 10 #include "Journal.h" 11 12 13 // #pragma mark - on-disk structures 14 15 struct bplustree_node; 16 17 #define BPLUSTREE_NULL -1LL 18 #define BPLUSTREE_FREE -2LL 19 20 struct bplustree_header { 21 uint32 magic; 22 uint32 node_size; 23 uint32 max_number_of_levels; 24 uint32 data_type; 25 int64 root_node_pointer; 26 int64 free_node_pointer; 27 int64 maximum_size; 28 29 uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); } 30 uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); } 31 uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); } 32 off_t RootNode() const 33 { return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); } 34 off_t FreeNode() const 35 { return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); } 36 off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); } 37 uint32 MaxNumberOfLevels() const 38 { return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); } 39 40 inline bool CheckNode(bplustree_node* node) const; 41 inline bool IsValidLink(off_t link) const; 42 bool IsValid() const; 43 } _PACKED; 44 45 #define BPLUSTREE_MAGIC 0x69f6c2e8 46 #define BPLUSTREE_NODE_SIZE 1024 47 #define BPLUSTREE_MAX_KEY_LENGTH 256 48 #define BPLUSTREE_MIN_KEY_LENGTH 1 49 50 enum bplustree_types { 51 BPLUSTREE_STRING_TYPE = 0, 52 BPLUSTREE_INT32_TYPE = 1, 53 BPLUSTREE_UINT32_TYPE = 2, 54 BPLUSTREE_INT64_TYPE = 3, 55 BPLUSTREE_UINT64_TYPE = 4, 56 BPLUSTREE_FLOAT_TYPE = 5, 57 BPLUSTREE_DOUBLE_TYPE = 6 58 }; 59 60 61 struct duplicate_array; 62 63 64 struct bplustree_node { 65 int64 left_link; 66 int64 right_link; 67 int64 overflow_link; 68 uint16 all_key_count; 69 uint16 all_key_length; 70 71 off_t LeftLink() const 72 { return BFS_ENDIAN_TO_HOST_INT64( 73 left_link); } 74 off_t RightLink() const 75 { return BFS_ENDIAN_TO_HOST_INT64( 76 right_link); } 77 off_t OverflowLink() const 78 { return BFS_ENDIAN_TO_HOST_INT64( 79 overflow_link); } 80 uint16 NumKeys() const 81 { return BFS_ENDIAN_TO_HOST_INT16( 82 all_key_count); } 83 uint16 AllKeyLength() const 84 { return BFS_ENDIAN_TO_HOST_INT16( 85 all_key_length); } 86 87 inline uint16* KeyLengths() const; 88 inline off_t* Values() const; 89 inline uint8* Keys() const; 90 inline int32 Used() const; 91 uint8* KeyAt(int32 index, uint16* keyLength) const; 92 93 inline bool IsLeaf() const; 94 95 void Initialize(); 96 uint8 CountDuplicates(off_t offset, 97 bool isFragment) const; 98 off_t DuplicateAt(off_t offset, bool isFragment, 99 int8 index) const; 100 uint32 FragmentsUsed(uint32 nodeSize) const; 101 inline duplicate_array* FragmentAt(int8 index) const; 102 inline duplicate_array* DuplicateArray() const; 103 104 static inline uint8 LinkType(off_t link); 105 static inline off_t MakeLink(uint8 type, off_t link, 106 uint32 fragmentIndex = 0); 107 static inline bool IsDuplicate(off_t link); 108 static inline off_t FragmentOffset(off_t link); 109 static inline uint32 FragmentIndex(off_t link); 110 static inline uint32 MaxFragments(uint32 nodeSize); 111 112 status_t CheckIntegrity(uint32 nodeSize) const; 113 } _PACKED; 114 115 //#define BPLUSTREE_NODE 0 116 #define BPLUSTREE_DUPLICATE_NODE 2 117 #define BPLUSTREE_DUPLICATE_FRAGMENT 3 118 119 #define NUM_FRAGMENT_VALUES 7 120 #define NUM_DUPLICATE_VALUES 125 121 122 //************************************** 123 124 enum bplustree_traversing { 125 BPLUSTREE_FORWARD = 1, 126 BPLUSTREE_BACKWARD = -1, 127 128 BPLUSTREE_BEGIN = 0, 129 BPLUSTREE_END = 1 130 }; 131 132 133 // #pragma mark - in-memory structures 134 135 template<class T> class Stack; 136 class BPlusTree; 137 class TreeIterator; 138 class CachedNode; 139 class Inode; 140 struct TreeCheck; 141 142 // needed for searching (utilizing a stack) 143 struct node_and_key { 144 off_t nodeOffset; 145 uint16 keyIndex; 146 }; 147 148 149 class CachedNode { 150 public: 151 CachedNode(BPlusTree* tree) 152 : 153 fTree(tree), 154 fNode(NULL) 155 { 156 } 157 158 CachedNode(BPlusTree* tree, off_t offset, bool check = true) 159 : 160 fTree(tree), 161 fNode(NULL) 162 { 163 SetTo(offset, check); 164 } 165 166 ~CachedNode() 167 { 168 Unset(); 169 } 170 171 const bplustree_node* SetTo(off_t offset, bool check = true); 172 status_t SetTo(off_t offset, 173 const bplustree_node** _node, 174 bool check = true); 175 bplustree_node* SetToWritable(Transaction& transaction, 176 off_t offset, bool check = true); 177 bplustree_node* MakeWritable(Transaction& transaction); 178 const bplustree_header* SetToHeader(); 179 bplustree_header* SetToWritableHeader(Transaction& transaction); 180 181 void UnsetUnchanged(Transaction& transaction); 182 void Unset(); 183 184 status_t Free(Transaction& transaction, off_t offset); 185 status_t Allocate(Transaction& transaction, 186 bplustree_node** _node, off_t* _offset); 187 188 bool IsWritable() const { return fWritable; } 189 bplustree_node* Node() const { return fNode; } 190 191 protected: 192 bplustree_node* InternalSetTo(Transaction* transaction, 193 off_t offset); 194 195 BPlusTree* fTree; 196 bplustree_node* fNode; 197 off_t fOffset; 198 off_t fBlockNumber; 199 bool fWritable; 200 }; 201 202 203 class BPlusTree : public TransactionListener { 204 public: 205 BPlusTree(Transaction& transaction, 206 Inode* stream, 207 int32 nodeSize = BPLUSTREE_NODE_SIZE); 208 BPlusTree(Inode* stream); 209 BPlusTree(); 210 ~BPlusTree(); 211 212 status_t SetTo(Transaction& transaction, Inode* stream, 213 int32 nodeSize = BPLUSTREE_NODE_SIZE); 214 status_t SetTo(Inode* stream); 215 status_t SetStream(Inode* stream); 216 217 status_t InitCheck(); 218 219 size_t NodeSize() const { return fNodeSize; } 220 Inode* Stream() const { return fStream; } 221 222 status_t Validate(bool repair, bool& _errorsFound); 223 status_t MakeEmpty(); 224 225 status_t Remove(Transaction& transaction, 226 const uint8* key, uint16 keyLength, 227 off_t value); 228 status_t Insert(Transaction& transaction, 229 const uint8* key, uint16 keyLength, 230 off_t value); 231 232 status_t Remove(Transaction& transaction, const char* key, 233 off_t value); 234 status_t Insert(Transaction& transaction, const char* key, 235 off_t value); 236 status_t Insert(Transaction& transaction, int32 key, 237 off_t value); 238 status_t Insert(Transaction& transaction, uint32 key, 239 off_t value); 240 status_t Insert(Transaction& transaction, int64 key, 241 off_t value); 242 status_t Insert(Transaction& transaction, uint64 key, 243 off_t value); 244 status_t Insert(Transaction& transaction, float key, 245 off_t value); 246 status_t Insert(Transaction& transaction, double key, 247 off_t value); 248 249 status_t Replace(Transaction& transaction, 250 const uint8* key, uint16 keyLength, 251 off_t value); 252 status_t Find(const uint8* key, uint16 keyLength, 253 off_t* value); 254 255 static int32 TypeCodeToKeyType(type_code code); 256 static int32 ModeToKeyType(mode_t mode); 257 258 protected: 259 virtual void TransactionDone(bool success); 260 virtual void RemovedFromTransaction(); 261 262 private: 263 BPlusTree(const BPlusTree& other); 264 BPlusTree& operator=(const BPlusTree& other); 265 // no implementation 266 267 int32 _CompareKeys(const void* key1, int keylength1, 268 const void* key2, int keylength2); 269 status_t _FindKey(const bplustree_node* node, 270 const uint8* key, uint16 keyLength, 271 uint16* index = NULL, off_t* next = NULL); 272 status_t _SeekDown(Stack<node_and_key>& stack, 273 const uint8* key, uint16 keyLength); 274 275 status_t _FindFreeDuplicateFragment( 276 Transaction& transaction, 277 const bplustree_node* node, 278 CachedNode& cached, off_t* _offset, 279 bplustree_node** _fragment, uint32* _index); 280 status_t _InsertDuplicate(Transaction& transaction, 281 CachedNode& cached, 282 const bplustree_node* node, uint16 index, 283 off_t value); 284 void _InsertKey(bplustree_node* node, uint16 index, 285 uint8* key, uint16 keyLength, off_t value); 286 status_t _SplitNode(bplustree_node* node, 287 off_t nodeOffset, bplustree_node* other, 288 off_t otherOffset, uint16* _keyIndex, 289 uint8* key, uint16* _keyLength, 290 off_t* _value); 291 292 status_t _RemoveDuplicate(Transaction& transaction, 293 const bplustree_node* node, 294 CachedNode& cached, uint16 keyIndex, 295 off_t value); 296 void _RemoveKey(bplustree_node* node, uint16 index); 297 298 void _UpdateIterators(off_t offset, off_t nextOffset, 299 uint16 keyIndex, uint16 splitAt, 300 int8 change); 301 void _AddIterator(TreeIterator* iterator); 302 void _RemoveIterator(TreeIterator* iterator); 303 304 status_t _ValidateChildren(TreeCheck& check, 305 uint32 level, off_t offset, 306 const uint8* largestKey, uint16 keyLength, 307 const bplustree_node* parent); 308 status_t _ValidateChild(TreeCheck& check, 309 CachedNode& cached, uint32 level, 310 off_t offset, off_t lastOffset, 311 off_t nextOffset, const uint8* key, 312 uint16 keyLength); 313 314 private: 315 friend class TreeIterator; 316 friend class CachedNode; 317 friend class TreeCheck; 318 319 Inode* fStream; 320 bplustree_header fHeader; 321 int32 fNodeSize; 322 bool fAllowDuplicates; 323 bool fInTransaction; 324 status_t fStatus; 325 mutex fIteratorLock; 326 SinglyLinkedList<TreeIterator> fIterators; 327 }; 328 329 330 // #pragma mark - helper classes/functions 331 332 333 extern int32 compareKeys(type_code type, const void* key1, int keyLength1, 334 const void* key2, int keyLength2); 335 336 337 class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> { 338 public: 339 TreeIterator(BPlusTree* tree); 340 ~TreeIterator(); 341 342 status_t Goto(int8 to); 343 status_t Traverse(int8 direction, void* key, 344 uint16* keyLength, uint16 maxLength, 345 off_t* value, uint16* duplicate = NULL); 346 status_t Find(const uint8* key, uint16 keyLength); 347 348 status_t Rewind(); 349 status_t GetNextEntry(void* key, uint16* keyLength, 350 uint16 maxLength, off_t* value, 351 uint16* duplicate = NULL); 352 status_t GetPreviousEntry(void* key, uint16* keyLength, 353 uint16 maxLength, off_t* value, 354 uint16* duplicate = NULL); 355 void SkipDuplicates(); 356 357 BPlusTree* Tree() const { return fTree; } 358 359 #ifdef DEBUG 360 void Dump(); 361 #endif 362 363 private: 364 friend class BPlusTree; 365 366 // called by BPlusTree 367 void Update(off_t offset, off_t nextOffset, 368 uint16 keyIndex, uint16 splitAt, 369 int8 change); 370 void Stop(); 371 372 private: 373 BPlusTree* fTree; 374 off_t fCurrentNodeOffset; 375 // traverse position 376 int32 fCurrentKey; 377 off_t fDuplicateNode; 378 uint16 fDuplicate; 379 uint16 fNumDuplicates; 380 bool fIsFragment; 381 }; 382 383 384 // #pragma mark - BPlusTree's inline functions 385 // (most of them may not be needed) 386 387 388 inline status_t 389 BPlusTree::Remove(Transaction& transaction, const char* key, off_t value) 390 { 391 if (fHeader.DataType() != BPLUSTREE_STRING_TYPE) 392 return B_BAD_TYPE; 393 return Remove(transaction, (uint8*)key, strlen(key), value); 394 } 395 396 397 inline status_t 398 BPlusTree::Insert(Transaction& transaction, const char* key, off_t value) 399 { 400 if (fHeader.DataType() != BPLUSTREE_STRING_TYPE) 401 return B_BAD_TYPE; 402 return Insert(transaction, (uint8*)key, strlen(key), value); 403 } 404 405 406 inline status_t 407 BPlusTree::Insert(Transaction& transaction, int32 key, off_t value) 408 { 409 if (fHeader.DataType() != BPLUSTREE_INT32_TYPE) 410 return B_BAD_TYPE; 411 return Insert(transaction, (uint8*)&key, sizeof(key), value); 412 } 413 414 415 inline status_t 416 BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value) 417 { 418 if (fHeader.DataType() != BPLUSTREE_UINT32_TYPE) 419 return B_BAD_TYPE; 420 return Insert(transaction, (uint8*)&key, sizeof(key), value); 421 } 422 423 424 inline status_t 425 BPlusTree::Insert(Transaction& transaction, int64 key, off_t value) 426 { 427 if (fHeader.DataType() != BPLUSTREE_INT64_TYPE) 428 return B_BAD_TYPE; 429 return Insert(transaction, (uint8*)&key, sizeof(key), value); 430 } 431 432 433 inline status_t 434 BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value) 435 { 436 if (fHeader.DataType() != BPLUSTREE_UINT64_TYPE) 437 return B_BAD_TYPE; 438 return Insert(transaction, (uint8*)&key, sizeof(key), value); 439 } 440 441 442 inline status_t 443 BPlusTree::Insert(Transaction& transaction, float key, off_t value) 444 { 445 if (fHeader.DataType() != BPLUSTREE_FLOAT_TYPE) 446 return B_BAD_TYPE; 447 return Insert(transaction, (uint8*)&key, sizeof(key), value); 448 } 449 450 451 inline status_t 452 BPlusTree::Insert(Transaction& transaction, double key, off_t value) 453 { 454 if (fHeader.DataType() != BPLUSTREE_DOUBLE_TYPE) 455 return B_BAD_TYPE; 456 return Insert(transaction, (uint8*)&key, sizeof(key), value); 457 } 458 459 460 // #pragma mark - TreeIterator inline functions 461 462 463 inline status_t 464 TreeIterator::Rewind() 465 { 466 return Goto(BPLUSTREE_BEGIN); 467 } 468 469 470 inline status_t 471 TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength, 472 off_t* value, uint16* duplicate) 473 { 474 return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value, 475 duplicate); 476 } 477 478 479 inline status_t 480 TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength, 481 off_t* value, uint16* duplicate) 482 { 483 return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value, 484 duplicate); 485 } 486 487 488 // #pragma mark - bplustree_header inline functions 489 490 491 inline bool 492 bplustree_header::CheckNode(bplustree_node* node) const 493 { 494 // sanity checks (links, all_key_count) 495 return IsValidLink(node->LeftLink()) 496 && IsValidLink(node->RightLink()) 497 && IsValidLink(node->OverflowLink()) 498 && (int8*)node->Values() + node->NumKeys() * sizeof(off_t) 499 <= (int8*)node + NodeSize(); 500 } 501 502 503 inline bool 504 bplustree_header::IsValidLink(off_t link) const 505 { 506 return link == BPLUSTREE_NULL 507 || (link > 0 && link <= MaximumSize() - NodeSize()); 508 } 509 510 511 // #pragma mark - bplustree_node inline functions 512 513 514 inline uint16* 515 bplustree_node::KeyLengths() const 516 { 517 return (uint16*)(((char*)this) + key_align(sizeof(bplustree_node) 518 + AllKeyLength())); 519 } 520 521 522 inline off_t* 523 bplustree_node::Values() const 524 { 525 return (off_t*)((char*)KeyLengths() + NumKeys() * sizeof(uint16)); 526 } 527 528 529 inline uint8* 530 bplustree_node::Keys() const 531 { 532 return (uint8*)this + sizeof(bplustree_node); 533 } 534 535 536 inline int32 537 bplustree_node::Used() const 538 { 539 return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys() 540 * (sizeof(uint16) + sizeof(off_t)); 541 } 542 543 544 inline bool 545 bplustree_node::IsLeaf() const 546 { 547 return OverflowLink() == BPLUSTREE_NULL; 548 } 549 550 551 inline duplicate_array* 552 bplustree_node::FragmentAt(int8 index) const 553 { 554 return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1)); 555 } 556 557 558 inline duplicate_array* 559 bplustree_node::DuplicateArray() const 560 { 561 return (duplicate_array*)&overflow_link; 562 } 563 564 565 inline uint8 566 bplustree_node::LinkType(off_t link) 567 { 568 return *(uint64*)&link >> 62; 569 } 570 571 572 inline off_t 573 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex) 574 { 575 return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL) 576 | (fragmentIndex & 0x3ff); 577 } 578 579 580 inline bool 581 bplustree_node::IsDuplicate(off_t link) 582 { 583 return (LinkType(link) 584 & (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0; 585 } 586 587 588 inline off_t 589 bplustree_node::FragmentOffset(off_t link) 590 { 591 return link & 0x3ffffffffffffc00LL; 592 } 593 594 595 inline uint32 596 bplustree_node::FragmentIndex(off_t link) 597 { 598 return (uint32)(link & 0x3ff); 599 } 600 601 602 inline uint32 603 bplustree_node::MaxFragments(uint32 nodeSize) 604 { 605 return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t)); 606 } 607 608 609 #endif // B_PLUS_TREE_H 610