1 /* 2 * Copyright 2001-2010, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 #ifndef B_PLUS_TREE_H 6 #define B_PLUS_TREE_H 7 8 9 #include "bfs.h" 10 #include "Journal.h" 11 12 13 // #pragma mark - on-disk structures 14 15 struct bplustree_node; 16 17 #define BPLUSTREE_NULL -1LL 18 #define BPLUSTREE_FREE -2LL 19 20 struct bplustree_header { 21 uint32 magic; 22 uint32 node_size; 23 uint32 max_number_of_levels; 24 uint32 data_type; 25 int64 root_node_pointer; 26 int64 free_node_pointer; 27 int64 maximum_size; 28 29 uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); } 30 uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); } 31 uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); } 32 off_t RootNode() const 33 { return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); } 34 off_t FreeNode() const 35 { return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); } 36 off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); } 37 uint32 MaxNumberOfLevels() const 38 { return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); } 39 40 inline bool CheckNode(bplustree_node* node) const; 41 inline bool IsValidLink(off_t link) const; 42 } _PACKED; 43 44 #define BPLUSTREE_MAGIC 0x69f6c2e8 45 #define BPLUSTREE_NODE_SIZE 1024 46 #define BPLUSTREE_MAX_KEY_LENGTH 256 47 #define BPLUSTREE_MIN_KEY_LENGTH 1 48 49 enum bplustree_types { 50 BPLUSTREE_STRING_TYPE = 0, 51 BPLUSTREE_INT32_TYPE = 1, 52 BPLUSTREE_UINT32_TYPE = 2, 53 BPLUSTREE_INT64_TYPE = 3, 54 BPLUSTREE_UINT64_TYPE = 4, 55 BPLUSTREE_FLOAT_TYPE = 5, 56 BPLUSTREE_DOUBLE_TYPE = 6 57 }; 58 59 struct sorted_array; 60 typedef sorted_array duplicate_array; 61 62 struct bplustree_node { 63 int64 left_link; 64 int64 right_link; 65 int64 overflow_link; 66 uint16 all_key_count; 67 uint16 all_key_length; 68 69 off_t LeftLink() const 70 { return BFS_ENDIAN_TO_HOST_INT64( 71 left_link); } 72 off_t RightLink() const 73 { return BFS_ENDIAN_TO_HOST_INT64( 74 right_link); } 75 off_t OverflowLink() const 76 { return BFS_ENDIAN_TO_HOST_INT64( 77 overflow_link); } 78 uint16 NumKeys() const 79 { return BFS_ENDIAN_TO_HOST_INT16( 80 all_key_count); } 81 uint16 AllKeyLength() const 82 { return BFS_ENDIAN_TO_HOST_INT16( 83 all_key_length); } 84 85 inline uint16* KeyLengths() const; 86 inline off_t* Values() const; 87 inline uint8* Keys() const; 88 inline int32 Used() const; 89 uint8* KeyAt(int32 index, uint16* keyLength) const; 90 91 inline bool IsLeaf() const; 92 93 void Initialize(); 94 uint8 CountDuplicates(off_t offset, 95 bool isFragment) const; 96 off_t DuplicateAt(off_t offset, bool isFragment, 97 int8 index) const; 98 uint32 FragmentsUsed(uint32 nodeSize) const; 99 inline duplicate_array* FragmentAt(int8 index) const; 100 inline duplicate_array* DuplicateArray() const; 101 102 static inline uint8 LinkType(off_t link); 103 static inline off_t MakeLink(uint8 type, off_t link, 104 uint32 fragmentIndex = 0); 105 static inline bool IsDuplicate(off_t link); 106 static inline off_t FragmentOffset(off_t link); 107 static inline uint32 FragmentIndex(off_t link); 108 static inline uint32 MaxFragments(uint32 nodeSize); 109 110 #ifdef DEBUG 111 status_t CheckIntegrity(uint32 nodeSize) const; 112 #endif 113 } _PACKED; 114 115 //#define BPLUSTREE_NODE 0 116 #define BPLUSTREE_DUPLICATE_NODE 2 117 #define BPLUSTREE_DUPLICATE_FRAGMENT 3 118 119 #define NUM_FRAGMENT_VALUES 7 120 #define NUM_DUPLICATE_VALUES 125 121 122 //************************************** 123 124 enum bplustree_traversing { 125 BPLUSTREE_FORWARD = 1, 126 BPLUSTREE_BACKWARD = -1, 127 128 BPLUSTREE_BEGIN = 0, 129 BPLUSTREE_END = 1 130 }; 131 132 133 // #pragma mark - in-memory structures 134 135 template<class T> class Stack; 136 class BPlusTree; 137 class TreeIterator; 138 class CachedNode; 139 class Inode; 140 141 // needed for searching (utilizing a stack) 142 struct node_and_key { 143 off_t nodeOffset; 144 uint16 keyIndex; 145 }; 146 147 148 class CachedNode { 149 public: 150 CachedNode(BPlusTree* tree) 151 : 152 fTree(tree), 153 fNode(NULL) 154 { 155 } 156 157 CachedNode(BPlusTree* tree, off_t offset, bool check = true) 158 : 159 fTree(tree), 160 fNode(NULL) 161 { 162 SetTo(offset, check); 163 } 164 165 ~CachedNode() 166 { 167 Unset(); 168 } 169 170 const bplustree_node* SetTo(off_t offset, bool check = true); 171 bplustree_node* SetToWritable(Transaction& transaction, 172 off_t offset, bool check = true); 173 bplustree_node* MakeWritable(Transaction& transaction); 174 const bplustree_header* SetToHeader(); 175 bplustree_header* SetToWritableHeader(Transaction& transaction); 176 177 void UnsetUnchanged(Transaction& transaction); 178 void Unset(); 179 180 status_t Free(Transaction& transaction, off_t offset); 181 status_t Allocate(Transaction& transaction, 182 bplustree_node** _node, off_t* _offset); 183 184 bool IsWritable() const { return fWritable; } 185 bplustree_node* Node() const { return fNode; } 186 187 protected: 188 bplustree_node* InternalSetTo(Transaction* transaction, 189 off_t offset); 190 191 BPlusTree* fTree; 192 bplustree_node* fNode; 193 off_t fOffset; 194 off_t fBlockNumber; 195 bool fWritable; 196 }; 197 198 199 class BPlusTree : public TransactionListener { 200 public: 201 BPlusTree(Transaction& transaction, 202 Inode* stream, 203 int32 nodeSize = BPLUSTREE_NODE_SIZE); 204 BPlusTree(Inode* stream); 205 BPlusTree(); 206 ~BPlusTree(); 207 208 status_t SetTo(Transaction& transaction, Inode* stream, 209 int32 nodeSize = BPLUSTREE_NODE_SIZE); 210 status_t SetTo(Inode* stream); 211 status_t SetStream(Inode* stream); 212 213 status_t InitCheck(); 214 status_t Validate(); 215 216 status_t Remove(Transaction& transaction, 217 const uint8* key, uint16 keyLength, 218 off_t value); 219 status_t Insert(Transaction& transaction, 220 const uint8* key, uint16 keyLength, 221 off_t value); 222 223 status_t Remove(Transaction& transaction, const char* key, 224 off_t value); 225 status_t Insert(Transaction& transaction, const char* key, 226 off_t value); 227 status_t Insert(Transaction& transaction, int32 key, 228 off_t value); 229 status_t Insert(Transaction& transaction, uint32 key, 230 off_t value); 231 status_t Insert(Transaction& transaction, int64 key, 232 off_t value); 233 status_t Insert(Transaction& transaction, uint64 key, 234 off_t value); 235 status_t Insert(Transaction& transaction, float key, 236 off_t value); 237 status_t Insert(Transaction& transaction, double key, 238 off_t value); 239 240 status_t Replace(Transaction& transaction, 241 const uint8* key, uint16 keyLength, 242 off_t value); 243 status_t Find(const uint8* key, uint16 keyLength, 244 off_t* value); 245 246 static int32 TypeCodeToKeyType(type_code code); 247 static int32 ModeToKeyType(mode_t mode); 248 249 protected: 250 virtual void TransactionDone(bool success); 251 virtual void RemovedFromTransaction(); 252 253 private: 254 BPlusTree(const BPlusTree& other); 255 BPlusTree& operator=(const BPlusTree& other); 256 // no implementation 257 258 int32 _CompareKeys(const void* key1, int keylength1, 259 const void* key2, int keylength2); 260 status_t _FindKey(const bplustree_node* node, 261 const uint8* key, uint16 keyLength, 262 uint16* index = NULL, off_t* next = NULL); 263 status_t _SeekDown(Stack<node_and_key>& stack, 264 const uint8* key, uint16 keyLength); 265 266 status_t _FindFreeDuplicateFragment( 267 Transaction& transaction, 268 const bplustree_node* node, 269 CachedNode& cached, off_t* _offset, 270 bplustree_node** _fragment, uint32* _index); 271 status_t _InsertDuplicate(Transaction& transaction, 272 CachedNode& cached, 273 const bplustree_node* node, uint16 index, 274 off_t value); 275 void _InsertKey(bplustree_node* node, uint16 index, 276 uint8* key, uint16 keyLength, off_t value); 277 status_t _SplitNode(bplustree_node* node, 278 off_t nodeOffset, bplustree_node* other, 279 off_t otherOffset, uint16* _keyIndex, 280 uint8* key, uint16* _keyLength, 281 off_t* _value); 282 283 status_t _RemoveDuplicate(Transaction& transaction, 284 const bplustree_node* node, 285 CachedNode& cached, uint16 keyIndex, 286 off_t value); 287 void _RemoveKey(bplustree_node* node, uint16 index); 288 289 void _UpdateIterators(off_t offset, off_t nextOffset, 290 uint16 keyIndex, uint16 splitAt, 291 int8 change); 292 void _AddIterator(TreeIterator* iterator); 293 void _RemoveIterator(TreeIterator* iterator); 294 295 private: 296 friend class TreeIterator; 297 friend class CachedNode; 298 299 Inode* fStream; 300 bplustree_header fHeader; 301 int32 fNodeSize; 302 bool fAllowDuplicates; 303 bool fInTransaction; 304 status_t fStatus; 305 mutex fIteratorLock; 306 SinglyLinkedList<TreeIterator> fIterators; 307 }; 308 309 310 // #pragma mark - helper classes/functions 311 312 extern int32 compareKeys(type_code type, const void* key1, int keyLength1, 313 const void* key2, int keyLength2); 314 315 class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> { 316 public: 317 TreeIterator(BPlusTree* tree); 318 ~TreeIterator(); 319 320 status_t Goto(int8 to); 321 status_t Traverse(int8 direction, void* key, 322 uint16* keyLength, uint16 maxLength, 323 off_t* value, uint16* duplicate = NULL); 324 status_t Find(const uint8* key, uint16 keyLength); 325 326 status_t Rewind(); 327 status_t GetNextEntry(void* key, uint16* keyLength, 328 uint16 maxLength, off_t* value, 329 uint16* duplicate = NULL); 330 status_t GetPreviousEntry(void* key, uint16* keyLength, 331 uint16 maxLength, off_t* value, 332 uint16* duplicate = NULL); 333 void SkipDuplicates(); 334 335 BPlusTree* Tree() const { return fTree; } 336 337 #ifdef DEBUG 338 void Dump(); 339 #endif 340 341 private: 342 friend class BPlusTree; 343 344 // called by BPlusTree 345 void Update(off_t offset, off_t nextOffset, 346 uint16 keyIndex, uint16 splitAt, 347 int8 change); 348 void Stop(); 349 350 private: 351 BPlusTree* fTree; 352 off_t fCurrentNodeOffset; 353 // traverse position 354 int32 fCurrentKey; 355 off_t fDuplicateNode; 356 uint16 fDuplicate; 357 uint16 fNumDuplicates; 358 bool fIsFragment; 359 }; 360 361 362 // #pragma mark - BPlusTree's inline functions 363 // (most of them may not be needed) 364 365 366 inline status_t 367 BPlusTree::Remove(Transaction& transaction, const char* key, off_t value) 368 { 369 if (fHeader.DataType() != BPLUSTREE_STRING_TYPE) 370 return B_BAD_TYPE; 371 return Remove(transaction, (uint8*)key, strlen(key), value); 372 } 373 374 inline status_t 375 BPlusTree::Insert(Transaction& transaction, const char* key, off_t value) 376 { 377 if (fHeader.DataType() != BPLUSTREE_STRING_TYPE) 378 return B_BAD_TYPE; 379 return Insert(transaction, (uint8*)key, strlen(key), value); 380 } 381 382 inline status_t 383 BPlusTree::Insert(Transaction& transaction, int32 key, off_t value) 384 { 385 if (fHeader.DataType() != BPLUSTREE_INT32_TYPE) 386 return B_BAD_TYPE; 387 return Insert(transaction, (uint8*)&key, sizeof(key), value); 388 } 389 390 inline status_t 391 BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value) 392 { 393 if (fHeader.DataType() != BPLUSTREE_UINT32_TYPE) 394 return B_BAD_TYPE; 395 return Insert(transaction, (uint8*)&key, sizeof(key), value); 396 } 397 398 inline status_t 399 BPlusTree::Insert(Transaction& transaction, int64 key, off_t value) 400 { 401 if (fHeader.DataType() != BPLUSTREE_INT64_TYPE) 402 return B_BAD_TYPE; 403 return Insert(transaction, (uint8*)&key, sizeof(key), value); 404 } 405 406 inline status_t 407 BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value) 408 { 409 if (fHeader.DataType() != BPLUSTREE_UINT64_TYPE) 410 return B_BAD_TYPE; 411 return Insert(transaction, (uint8*)&key, sizeof(key), value); 412 } 413 414 inline status_t 415 BPlusTree::Insert(Transaction& transaction, float key, off_t value) 416 { 417 if (fHeader.DataType() != BPLUSTREE_FLOAT_TYPE) 418 return B_BAD_TYPE; 419 return Insert(transaction, (uint8*)&key, sizeof(key), value); 420 } 421 422 inline status_t 423 BPlusTree::Insert(Transaction& transaction, double key, off_t value) 424 { 425 if (fHeader.DataType() != BPLUSTREE_DOUBLE_TYPE) 426 return B_BAD_TYPE; 427 return Insert(transaction, (uint8*)&key, sizeof(key), value); 428 } 429 430 431 // #pragma mark - TreeIterator inline functions 432 433 434 inline status_t 435 TreeIterator::Rewind() 436 { 437 return Goto(BPLUSTREE_BEGIN); 438 } 439 440 inline status_t 441 TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength, 442 off_t* value, uint16* duplicate) 443 { 444 return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value, 445 duplicate); 446 } 447 448 inline status_t 449 TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength, 450 off_t* value, uint16* duplicate) 451 { 452 return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value, 453 duplicate); 454 } 455 456 457 // #pragma mark - bplustree_header inline functions 458 459 460 inline bool 461 bplustree_header::CheckNode(bplustree_node* node) const 462 { 463 // sanity checks (links, all_key_count) 464 return IsValidLink(node->LeftLink()) 465 && IsValidLink(node->RightLink()) 466 && IsValidLink(node->OverflowLink()) 467 && (int8*)node->Values() + node->NumKeys() * sizeof(off_t) 468 <= (int8*)node + NodeSize(); 469 } 470 471 472 inline bool 473 bplustree_header::IsValidLink(off_t link) const 474 { 475 return link == BPLUSTREE_NULL 476 || (link > 0 && link <= MaximumSize() - NodeSize()); 477 } 478 479 480 // #pragma mark - bplustree_node inline functions 481 482 483 inline uint16* 484 bplustree_node::KeyLengths() const 485 { 486 return (uint16*)(((char*)this) + key_align(sizeof(bplustree_node) 487 + AllKeyLength())); 488 } 489 490 491 inline off_t* 492 bplustree_node::Values() const 493 { 494 return (off_t*)((char*)KeyLengths() + NumKeys() * sizeof(uint16)); 495 } 496 497 498 inline uint8* 499 bplustree_node::Keys() const 500 { 501 return (uint8*)this + sizeof(bplustree_node); 502 } 503 504 505 inline int32 506 bplustree_node::Used() const 507 { 508 return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys() 509 * (sizeof(uint16) + sizeof(off_t)); 510 } 511 512 513 inline bool 514 bplustree_node::IsLeaf() const 515 { 516 return OverflowLink() == BPLUSTREE_NULL; 517 } 518 519 520 inline duplicate_array* 521 bplustree_node::FragmentAt(int8 index) const 522 { 523 return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1)); 524 } 525 526 527 inline duplicate_array* 528 bplustree_node::DuplicateArray() const 529 { 530 return (duplicate_array*)&overflow_link; 531 } 532 533 534 inline uint8 535 bplustree_node::LinkType(off_t link) 536 { 537 return *(uint64*)&link >> 62; 538 } 539 540 541 inline off_t 542 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex) 543 { 544 return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL) 545 | (fragmentIndex & 0x3ff); 546 } 547 548 549 inline bool 550 bplustree_node::IsDuplicate(off_t link) 551 { 552 return (LinkType(link) 553 & (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0; 554 } 555 556 557 inline off_t 558 bplustree_node::FragmentOffset(off_t link) 559 { 560 return link & 0x3ffffffffffffc00LL; 561 } 562 563 564 inline uint32 565 bplustree_node::FragmentIndex(off_t link) 566 { 567 return (uint32)(link & 0x3ff); 568 } 569 570 571 inline uint32 572 bplustree_node::MaxFragments(uint32 nodeSize) 573 { 574 return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t)); 575 } 576 577 #endif // B_PLUS_TREE_H 578