1 /* 2 * Copyright 2001-2009, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 #ifndef B_PLUS_TREE_H 6 #define B_PLUS_TREE_H 7 8 9 #include "bfs.h" 10 #include "Journal.h" 11 12 13 // #pragma mark - on-disk structures 14 15 struct bplustree_node; 16 17 #define BPLUSTREE_NULL -1LL 18 #define BPLUSTREE_FREE -2LL 19 20 struct bplustree_header { 21 uint32 magic; 22 uint32 node_size; 23 uint32 max_number_of_levels; 24 uint32 data_type; 25 int64 root_node_pointer; 26 int64 free_node_pointer; 27 int64 maximum_size; 28 29 uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); } 30 uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); } 31 uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); } 32 off_t RootNode() const 33 { return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); } 34 off_t FreeNode() const 35 { return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); } 36 off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); } 37 uint32 MaxNumberOfLevels() const 38 { return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); } 39 40 inline bool CheckNode(bplustree_node* node) const; 41 inline bool IsValidLink(off_t link) const; 42 } _PACKED; 43 44 #define BPLUSTREE_MAGIC 0x69f6c2e8 45 #define BPLUSTREE_NODE_SIZE 1024 46 #define BPLUSTREE_MAX_KEY_LENGTH 256 47 #define BPLUSTREE_MIN_KEY_LENGTH 1 48 49 enum bplustree_types { 50 BPLUSTREE_STRING_TYPE = 0, 51 BPLUSTREE_INT32_TYPE = 1, 52 BPLUSTREE_UINT32_TYPE = 2, 53 BPLUSTREE_INT64_TYPE = 3, 54 BPLUSTREE_UINT64_TYPE = 4, 55 BPLUSTREE_FLOAT_TYPE = 5, 56 BPLUSTREE_DOUBLE_TYPE = 6 57 }; 58 59 struct sorted_array; 60 typedef sorted_array duplicate_array; 61 62 struct bplustree_node { 63 int64 left_link; 64 int64 right_link; 65 int64 overflow_link; 66 uint16 all_key_count; 67 uint16 all_key_length; 68 69 off_t LeftLink() const { return BFS_ENDIAN_TO_HOST_INT64(left_link); } 70 off_t RightLink() const { return BFS_ENDIAN_TO_HOST_INT64(right_link); } 71 off_t OverflowLink() const 72 { return BFS_ENDIAN_TO_HOST_INT64(overflow_link); } 73 uint16 NumKeys() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_count); } 74 uint16 AllKeyLength() const 75 { return BFS_ENDIAN_TO_HOST_INT16(all_key_length); } 76 77 inline uint16* KeyLengths() const; 78 inline off_t* Values() const; 79 inline uint8* Keys() const; 80 inline int32 Used() const; 81 uint8* KeyAt(int32 index, uint16* keyLength) const; 82 83 inline bool IsLeaf() const; 84 85 void Initialize(); 86 uint8 CountDuplicates(off_t offset, bool isFragment) const; 87 off_t DuplicateAt(off_t offset, bool isFragment, int8 index) const; 88 uint32 FragmentsUsed(uint32 nodeSize) const; 89 inline duplicate_array* FragmentAt(int8 index) const; 90 inline duplicate_array* DuplicateArray() const; 91 92 static inline uint8 LinkType(off_t link); 93 static inline off_t MakeLink(uint8 type, off_t link, 94 uint32 fragmentIndex = 0); 95 static inline bool IsDuplicate(off_t link); 96 static inline off_t FragmentOffset(off_t link); 97 static inline uint32 FragmentIndex(off_t link); 98 static inline uint32 MaxFragments(uint32 nodeSize); 99 100 #ifdef DEBUG 101 status_t CheckIntegrity(uint32 nodeSize) const; 102 #endif 103 } _PACKED; 104 105 //#define BPLUSTREE_NODE 0 106 #define BPLUSTREE_DUPLICATE_NODE 2 107 #define BPLUSTREE_DUPLICATE_FRAGMENT 3 108 109 #define NUM_FRAGMENT_VALUES 7 110 #define NUM_DUPLICATE_VALUES 125 111 112 //************************************** 113 114 enum bplustree_traversing { 115 BPLUSTREE_FORWARD = 1, 116 BPLUSTREE_BACKWARD = -1, 117 118 BPLUSTREE_BEGIN = 0, 119 BPLUSTREE_END = 1 120 }; 121 122 123 // #pragma mark - in-memory structures 124 125 template<class T> class Stack; 126 class BPlusTree; 127 class TreeIterator; 128 class CachedNode; 129 class Inode; 130 131 // needed for searching (utilizing a stack) 132 struct node_and_key { 133 off_t nodeOffset; 134 uint16 keyIndex; 135 }; 136 137 138 class CachedNode { 139 public: 140 CachedNode(BPlusTree* tree) 141 : 142 fTree(tree), 143 fNode(NULL) 144 { 145 } 146 147 CachedNode(BPlusTree* tree, off_t offset, bool check = true) 148 : 149 fTree(tree), 150 fNode(NULL) 151 { 152 SetTo(offset, check); 153 } 154 155 ~CachedNode() 156 { 157 Unset(); 158 } 159 160 const bplustree_node* SetTo(off_t offset, bool check = true); 161 bplustree_node* SetToWritable(Transaction& transaction, off_t offset, 162 bool check = true); 163 bplustree_node* MakeWritable(Transaction& transaction); 164 const bplustree_header* SetToHeader(); 165 bplustree_header* SetToWritableHeader(Transaction& transaction); 166 bplustree_header* MakeWritableHeader(Transaction& transaction); 167 168 void UnsetUnchanged(Transaction& transaction); 169 void Unset(); 170 171 status_t Free(Transaction& transaction, off_t offset); 172 status_t Allocate(Transaction& transaction, bplustree_node** node, 173 off_t* offset); 174 175 bool IsWritable() const { return fWritable; } 176 bplustree_node* Node() const { return fNode; } 177 178 protected: 179 bplustree_node* InternalSetTo(Transaction* transaction, off_t offset); 180 181 BPlusTree* fTree; 182 bplustree_node* fNode; 183 off_t fOffset; 184 off_t fBlockNumber; 185 bool fWritable; 186 }; 187 188 189 class BPlusTree { 190 public: 191 BPlusTree(Transaction& transaction, Inode* stream, 192 int32 nodeSize = BPLUSTREE_NODE_SIZE); 193 BPlusTree(Inode* stream); 194 BPlusTree(); 195 ~BPlusTree(); 196 197 status_t SetTo(Transaction& transaction, Inode* stream, 198 int32 nodeSize = BPLUSTREE_NODE_SIZE); 199 status_t SetTo(Inode* stream); 200 status_t SetStream(Inode* stream); 201 202 status_t InitCheck(); 203 status_t Validate(); 204 205 status_t Remove(Transaction& transaction, const uint8* key, 206 uint16 keyLength, off_t value); 207 status_t Insert(Transaction& transaction, const uint8* key, 208 uint16 keyLength, off_t value); 209 210 status_t Remove(Transaction& transaction, const char* key, 211 off_t value); 212 status_t Insert(Transaction& transaction, const char* key, 213 off_t value); 214 status_t Insert(Transaction& transaction, int32 key, 215 off_t value); 216 status_t Insert(Transaction& transaction, uint32 key, 217 off_t value); 218 status_t Insert(Transaction& transaction, int64 key, 219 off_t value); 220 status_t Insert(Transaction& transaction, uint64 key, 221 off_t value); 222 status_t Insert(Transaction& transaction, float key, 223 off_t value); 224 status_t Insert(Transaction& transaction, double key, 225 off_t value); 226 227 status_t Replace(Transaction& transaction, const uint8* key, 228 uint16 keyLength, off_t value); 229 status_t Find(const uint8* key, uint16 keyLength, off_t* value); 230 231 static int32 TypeCodeToKeyType(type_code code); 232 static int32 ModeToKeyType(mode_t mode); 233 234 private: 235 BPlusTree(const BPlusTree& other); 236 BPlusTree& operator=(const BPlusTree& other); 237 // no implementation 238 239 int32 _CompareKeys(const void* key1, int keylength1, 240 const void* key2, int keylength2); 241 status_t _FindKey(const bplustree_node* node, const uint8* key, 242 uint16 keyLength, uint16* index = NULL, 243 off_t* next = NULL); 244 status_t _SeekDown(Stack<node_and_key>& stack, const uint8* key, 245 uint16 keyLength); 246 247 status_t _FindFreeDuplicateFragment(Transaction& transaction, 248 const bplustree_node* node, CachedNode& cached, 249 off_t* _offset, bplustree_node** _fragment, 250 uint32* _index); 251 status_t _InsertDuplicate(Transaction& transaction, 252 CachedNode& cached, const bplustree_node* node, 253 uint16 index, off_t value); 254 void _InsertKey(bplustree_node* node, uint16 index, 255 uint8* key, uint16 keyLength, off_t value); 256 status_t _SplitNode(bplustree_node* node, off_t nodeOffset, 257 bplustree_node* other, off_t otherOffset, 258 uint16* _keyIndex, uint8* key, uint16* _keyLength, 259 off_t* _value); 260 261 status_t _RemoveDuplicate(Transaction& transaction, 262 const bplustree_node* node, CachedNode& cached, 263 uint16 keyIndex, off_t value); 264 void _RemoveKey(bplustree_node* node, uint16 index); 265 266 void _UpdateIterators(off_t offset, off_t nextOffset, 267 uint16 keyIndex, uint16 splitAt, int8 change); 268 void _AddIterator(TreeIterator* iterator); 269 void _RemoveIterator(TreeIterator* iterator); 270 271 private: 272 friend class TreeIterator; 273 friend class CachedNode; 274 275 Inode* fStream; 276 const bplustree_header* fHeader; 277 CachedNode fCachedHeader; 278 int32 fNodeSize; 279 bool fAllowDuplicates; 280 status_t fStatus; 281 mutex fIteratorLock; 282 SinglyLinkedList<TreeIterator> fIterators; 283 }; 284 285 286 // #pragma mark - helper classes/functions 287 288 extern int32 compareKeys(type_code type, const void* key1, int keyLength1, 289 const void* key2, int keyLength2); 290 291 class TreeIterator : public SinglyLinkedListLinkImpl<TreeIterator> { 292 public: 293 TreeIterator(BPlusTree* tree); 294 ~TreeIterator(); 295 296 status_t Goto(int8 to); 297 status_t Traverse(int8 direction, void* key, uint16* keyLength, 298 uint16 maxLength, off_t* value, 299 uint16* duplicate = NULL); 300 status_t Find(const uint8* key, uint16 keyLength); 301 302 status_t Rewind(); 303 status_t GetNextEntry(void* key, uint16* keyLength, 304 uint16 maxLength, off_t* value, 305 uint16* duplicate = NULL); 306 status_t GetPreviousEntry(void* key, uint16* keyLength, 307 uint16 maxLength, off_t* value, 308 uint16* duplicate = NULL); 309 void SkipDuplicates(); 310 311 BPlusTree* Tree() const { return fTree; } 312 313 #ifdef DEBUG 314 void Dump(); 315 #endif 316 317 private: 318 friend class BPlusTree; 319 320 // called by BPlusTree 321 void Update(off_t offset, off_t nextOffset, uint16 keyIndex, 322 uint16 splitAt, int8 change); 323 void Stop(); 324 325 private: 326 BPlusTree* fTree; 327 off_t fCurrentNodeOffset; 328 // traverse position 329 int32 fCurrentKey; 330 off_t fDuplicateNode; 331 uint16 fDuplicate, fNumDuplicates; 332 bool fIsFragment; 333 }; 334 335 336 // #pragma mark - BPlusTree's inline functions 337 // (most of them may not be needed) 338 339 340 inline status_t 341 BPlusTree::Remove(Transaction& transaction, const char* key, off_t value) 342 { 343 if (fHeader->data_type != BPLUSTREE_STRING_TYPE) 344 return B_BAD_TYPE; 345 return Remove(transaction, (uint8*)key, strlen(key), value); 346 } 347 348 inline status_t 349 BPlusTree::Insert(Transaction& transaction, const char* key, off_t value) 350 { 351 if (fHeader->data_type != BPLUSTREE_STRING_TYPE) 352 return B_BAD_TYPE; 353 return Insert(transaction, (uint8*)key, strlen(key), value); 354 } 355 356 inline status_t 357 BPlusTree::Insert(Transaction& transaction, int32 key, off_t value) 358 { 359 if (fHeader->data_type != BPLUSTREE_INT32_TYPE) 360 return B_BAD_TYPE; 361 return Insert(transaction, (uint8*)&key, sizeof(key), value); 362 } 363 364 inline status_t 365 BPlusTree::Insert(Transaction& transaction, uint32 key, off_t value) 366 { 367 if (fHeader->data_type != BPLUSTREE_UINT32_TYPE) 368 return B_BAD_TYPE; 369 return Insert(transaction, (uint8*)&key, sizeof(key), value); 370 } 371 372 inline status_t 373 BPlusTree::Insert(Transaction& transaction, int64 key, off_t value) 374 { 375 if (fHeader->data_type != BPLUSTREE_INT64_TYPE) 376 return B_BAD_TYPE; 377 return Insert(transaction, (uint8*)&key, sizeof(key), value); 378 } 379 380 inline status_t 381 BPlusTree::Insert(Transaction& transaction, uint64 key, off_t value) 382 { 383 if (fHeader->data_type != BPLUSTREE_UINT64_TYPE) 384 return B_BAD_TYPE; 385 return Insert(transaction, (uint8*)&key, sizeof(key), value); 386 } 387 388 inline status_t 389 BPlusTree::Insert(Transaction& transaction, float key, off_t value) 390 { 391 if (fHeader->data_type != BPLUSTREE_FLOAT_TYPE) 392 return B_BAD_TYPE; 393 return Insert(transaction, (uint8*)&key, sizeof(key), value); 394 } 395 396 inline status_t 397 BPlusTree::Insert(Transaction& transaction, double key, off_t value) 398 { 399 if (fHeader->data_type != BPLUSTREE_DOUBLE_TYPE) 400 return B_BAD_TYPE; 401 return Insert(transaction, (uint8*)&key, sizeof(key), value); 402 } 403 404 405 // #pragma mark - TreeIterator inline functions 406 407 408 inline status_t 409 TreeIterator::Rewind() 410 { 411 return Goto(BPLUSTREE_BEGIN); 412 } 413 414 inline status_t 415 TreeIterator::GetNextEntry(void* key, uint16* keyLength, uint16 maxLength, 416 off_t* value, uint16* duplicate) 417 { 418 return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value, 419 duplicate); 420 } 421 422 inline status_t 423 TreeIterator::GetPreviousEntry(void* key, uint16* keyLength, uint16 maxLength, 424 off_t* value, uint16* duplicate) 425 { 426 return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value, 427 duplicate); 428 } 429 430 431 // #pragma mark - bplustree_header inline functions 432 433 434 inline bool 435 bplustree_header::CheckNode(bplustree_node* node) const 436 { 437 // sanity checks (links, all_key_count) 438 return IsValidLink(node->LeftLink()) 439 && IsValidLink(node->RightLink()) 440 && IsValidLink(node->OverflowLink()) 441 && (int8*)node->Values() + node->NumKeys() * sizeof(off_t) 442 <= (int8*)node + NodeSize(); 443 } 444 445 446 inline bool 447 bplustree_header::IsValidLink(off_t link) const 448 { 449 return link == BPLUSTREE_NULL 450 || (link > 0 && link <= MaximumSize() - NodeSize()); 451 } 452 453 454 // #pragma mark - bplustree_node inline functions 455 456 457 inline uint16* 458 bplustree_node::KeyLengths() const 459 { 460 return (uint16*)(((char*)this) + key_align(sizeof(bplustree_node) 461 + AllKeyLength())); 462 } 463 464 465 inline off_t* 466 bplustree_node::Values() const 467 { 468 return (off_t*)((char*)KeyLengths() + NumKeys() * sizeof(uint16)); 469 } 470 471 472 inline uint8* 473 bplustree_node::Keys() const 474 { 475 return (uint8*)this + sizeof(bplustree_node); 476 } 477 478 479 inline int32 480 bplustree_node::Used() const 481 { 482 return key_align(sizeof(bplustree_node) + AllKeyLength()) + NumKeys() 483 * (sizeof(uint16) + sizeof(off_t)); 484 } 485 486 487 inline bool 488 bplustree_node::IsLeaf() const 489 { 490 return OverflowLink() == BPLUSTREE_NULL; 491 } 492 493 494 inline duplicate_array* 495 bplustree_node::FragmentAt(int8 index) const 496 { 497 return (duplicate_array*)((off_t*)this + index * (NUM_FRAGMENT_VALUES + 1)); 498 } 499 500 501 inline duplicate_array* 502 bplustree_node::DuplicateArray() const 503 { 504 return (duplicate_array*)&overflow_link; 505 } 506 507 508 inline uint8 509 bplustree_node::LinkType(off_t link) 510 { 511 return *(uint64*)&link >> 62; 512 } 513 514 515 inline off_t 516 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex) 517 { 518 return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL) 519 | (fragmentIndex & 0x3ff); 520 } 521 522 523 inline bool 524 bplustree_node::IsDuplicate(off_t link) 525 { 526 return (LinkType(link) 527 & (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0; 528 } 529 530 531 inline off_t 532 bplustree_node::FragmentOffset(off_t link) 533 { 534 return link & 0x3ffffffffffffc00LL; 535 } 536 537 538 inline uint32 539 bplustree_node::FragmentIndex(off_t link) 540 { 541 return (uint32)(link & 0x3ff); 542 } 543 544 545 inline uint32 546 bplustree_node::MaxFragments(uint32 nodeSize) 547 { 548 return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t)); 549 } 550 551 #endif // B_PLUS_TREE_H 552