1 /* 2 * Copyright 2001-2007, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 #ifndef B_PLUS_TREE_H 6 #define B_PLUS_TREE_H 7 8 9 #include "bfs.h" 10 #include "Journal.h" 11 #include "Chain.h" 12 13 14 // #pragma mark - on-disk structures 15 16 struct bplustree_node; 17 18 #define BPLUSTREE_NULL -1LL 19 #define BPLUSTREE_FREE -2LL 20 21 struct bplustree_header { 22 uint32 magic; 23 uint32 node_size; 24 uint32 max_number_of_levels; 25 uint32 data_type; 26 off_t root_node_pointer; 27 off_t free_node_pointer; 28 off_t maximum_size; 29 30 uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); } 31 uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); } 32 uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); } 33 off_t RootNode() const { return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); } 34 off_t FreeNode() const { return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); } 35 off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); } 36 uint32 MaxNumberOfLevels() const 37 { return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); } 38 39 inline bool CheckNode(bplustree_node *node) const; 40 inline bool IsValidLink(off_t link) const; 41 } _PACKED; 42 43 #define BPLUSTREE_MAGIC 0x69f6c2e8 44 #define BPLUSTREE_NODE_SIZE 1024 45 #define BPLUSTREE_MAX_KEY_LENGTH 256 46 #define BPLUSTREE_MIN_KEY_LENGTH 1 47 48 enum bplustree_types { 49 BPLUSTREE_STRING_TYPE = 0, 50 BPLUSTREE_INT32_TYPE = 1, 51 BPLUSTREE_UINT32_TYPE = 2, 52 BPLUSTREE_INT64_TYPE = 3, 53 BPLUSTREE_UINT64_TYPE = 4, 54 BPLUSTREE_FLOAT_TYPE = 5, 55 BPLUSTREE_DOUBLE_TYPE = 6 56 }; 57 58 struct sorted_array; 59 typedef sorted_array duplicate_array; 60 61 struct bplustree_node { 62 off_t left_link; 63 off_t right_link; 64 off_t overflow_link; 65 uint16 all_key_count; 66 uint16 all_key_length; 67 68 off_t LeftLink() const { return BFS_ENDIAN_TO_HOST_INT64(left_link); } 69 off_t RightLink() const { return BFS_ENDIAN_TO_HOST_INT64(right_link); } 70 off_t OverflowLink() const { return BFS_ENDIAN_TO_HOST_INT64(overflow_link); } 71 uint16 NumKeys() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_count); } 72 uint16 AllKeyLength() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_length); } 73 74 inline uint16 *KeyLengths() const; 75 inline off_t *Values() const; 76 inline uint8 *Keys() const; 77 inline int32 Used() const; 78 uint8 *KeyAt(int32 index, uint16 *keyLength) const; 79 80 inline bool IsLeaf() const; 81 82 void Initialize(); 83 uint8 CountDuplicates(off_t offset, bool isFragment) const; 84 off_t DuplicateAt(off_t offset, bool isFragment, int8 index) const; 85 uint32 FragmentsUsed(uint32 nodeSize) const; 86 inline duplicate_array *FragmentAt(int8 index) const; 87 inline duplicate_array *DuplicateArray() const; 88 89 static inline uint8 LinkType(off_t link); 90 static inline off_t MakeLink(uint8 type, off_t link, uint32 fragmentIndex = 0); 91 static inline bool IsDuplicate(off_t link); 92 static inline off_t FragmentOffset(off_t link); 93 static inline uint32 FragmentIndex(off_t link); 94 static inline uint32 MaxFragments(uint32 nodeSize); 95 96 #ifdef DEBUG 97 status_t CheckIntegrity(uint32 nodeSize) const; 98 #endif 99 } _PACKED; 100 101 //#define BPLUSTREE_NODE 0 102 #define BPLUSTREE_DUPLICATE_NODE 2 103 #define BPLUSTREE_DUPLICATE_FRAGMENT 3 104 105 #define NUM_FRAGMENT_VALUES 7 106 #define NUM_DUPLICATE_VALUES 125 107 108 //************************************** 109 110 enum bplustree_traversing { 111 BPLUSTREE_FORWARD = 1, 112 BPLUSTREE_BACKWARD = -1, 113 114 BPLUSTREE_BEGIN = 0, 115 BPLUSTREE_END = 1 116 }; 117 118 119 // #pragma mark - in-memory structures 120 121 template<class T> class Stack; 122 class BPlusTree; 123 class TreeIterator; 124 class CachedNode; 125 class Inode; 126 127 // needed for searching (utilizing a stack) 128 struct node_and_key { 129 off_t nodeOffset; 130 uint16 keyIndex; 131 }; 132 133 134 class CachedNode { 135 public: 136 CachedNode(BPlusTree *tree) 137 : 138 fTree(tree), 139 fNode(NULL) 140 { 141 } 142 143 CachedNode(BPlusTree *tree, off_t offset, bool check = true) 144 : 145 fTree(tree), 146 fNode(NULL) 147 { 148 SetTo(offset, check); 149 } 150 151 ~CachedNode() 152 { 153 Unset(); 154 } 155 156 const bplustree_node *SetTo(off_t offset, bool check = true); 157 bplustree_node *SetToWritable(Transaction &transaction, off_t offset, 158 bool check = true); 159 bplustree_node *MakeWritable(Transaction &transaction); 160 const bplustree_header *SetToHeader(); 161 bplustree_header *SetToWritableHeader(Transaction &transaction); 162 bplustree_header *MakeWritableHeader(Transaction &transaction); 163 164 void UnsetUnchanged(Transaction &transaction); 165 void Unset(); 166 167 status_t Free(Transaction &transaction, off_t offset); 168 status_t Allocate(Transaction &transaction, bplustree_node **node, 169 off_t *offset); 170 171 bool IsWritable() const { return fWritable; } 172 bplustree_node *Node() const { return fNode; } 173 174 protected: 175 bplustree_node *InternalSetTo(Transaction *transaction, off_t offset); 176 177 BPlusTree *fTree; 178 bplustree_node *fNode; 179 off_t fOffset; 180 off_t fBlockNumber; 181 bool fWritable; 182 }; 183 184 185 class BPlusTree { 186 public: 187 BPlusTree(Transaction &transaction, Inode *stream, 188 int32 nodeSize = BPLUSTREE_NODE_SIZE); 189 BPlusTree(Inode *stream); 190 BPlusTree(); 191 ~BPlusTree(); 192 193 status_t SetTo(Transaction &transaction, Inode *stream, 194 int32 nodeSize = BPLUSTREE_NODE_SIZE); 195 status_t SetTo(Inode *stream); 196 status_t SetStream(Inode *stream); 197 198 status_t InitCheck(); 199 status_t Validate(); 200 201 status_t Remove(Transaction &transaction, const uint8 *key, 202 uint16 keyLength, off_t value); 203 status_t Insert(Transaction &transaction, const uint8 *key, 204 uint16 keyLength, off_t value); 205 206 status_t Remove(Transaction &transaction, const char *key, 207 off_t value); 208 status_t Insert(Transaction &transaction, const char *key, 209 off_t value); 210 status_t Insert(Transaction &transaction, int32 key, off_t value); 211 status_t Insert(Transaction &transaction, uint32 key, off_t value); 212 status_t Insert(Transaction &transaction, int64 key, off_t value); 213 status_t Insert(Transaction &transaction, uint64 key, off_t value); 214 status_t Insert(Transaction &transaction, float key, off_t value); 215 status_t Insert(Transaction &transaction, double key, off_t value); 216 217 status_t Replace(Transaction &transaction, const uint8 *key, 218 uint16 keyLength, off_t value); 219 status_t Find(const uint8 *key, uint16 keyLength, off_t *value); 220 221 static int32 TypeCodeToKeyType(type_code code); 222 static int32 ModeToKeyType(mode_t mode); 223 224 private: 225 BPlusTree(const BPlusTree &); 226 BPlusTree &operator=(const BPlusTree &); 227 // no implementation 228 229 int32 _CompareKeys(const void *key1, int keylength1, 230 const void *key2, int keylength2); 231 status_t _FindKey(const bplustree_node *node, const uint8 *key, 232 uint16 keyLength, uint16 *index = NULL, 233 off_t *next = NULL); 234 status_t _SeekDown(Stack<node_and_key> &stack, const uint8 *key, 235 uint16 keyLength); 236 237 status_t _FindFreeDuplicateFragment(Transaction &transaction, 238 const bplustree_node *node, CachedNode &cached, 239 off_t *_offset, bplustree_node **_fragment, 240 uint32 *_index); 241 status_t _InsertDuplicate(Transaction &transaction, 242 CachedNode &cached, const bplustree_node *node, 243 uint16 index, off_t value); 244 void _InsertKey(bplustree_node *node, uint16 index, uint8 *key, 245 uint16 keyLength, off_t value); 246 status_t _SplitNode(bplustree_node *node, off_t nodeOffset, 247 bplustree_node *other, off_t otherOffset, 248 uint16 *_keyIndex, uint8 *key, uint16 *_keyLength, 249 off_t *_value); 250 251 status_t _RemoveDuplicate(Transaction &transaction, 252 const bplustree_node *node, CachedNode &cached, 253 uint16 keyIndex, off_t value); 254 void _RemoveKey(bplustree_node *node, uint16 index); 255 256 void _UpdateIterators(off_t offset, off_t nextOffset, 257 uint16 keyIndex, uint16 splitAt, int8 change); 258 void _AddIterator(TreeIterator *iterator); 259 void _RemoveIterator(TreeIterator *iterator); 260 261 private: 262 friend class TreeIterator; 263 friend class CachedNode; 264 265 Inode *fStream; 266 const bplustree_header *fHeader; 267 CachedNode fCachedHeader; 268 int32 fNodeSize; 269 bool fAllowDuplicates; 270 status_t fStatus; 271 SimpleLock fIteratorLock; 272 Chain<TreeIterator> fIterators; 273 }; 274 275 276 // #pragma mark - helper classes/functions 277 278 extern int32 compareKeys(type_code type, const void *key1, int keyLength1, 279 const void *key2, int keyLength2); 280 281 class TreeIterator { 282 public: 283 TreeIterator(BPlusTree *tree); 284 ~TreeIterator(); 285 286 status_t Goto(int8 to); 287 status_t Traverse(int8 direction, void *key, uint16 *keyLength, 288 uint16 maxLength, off_t *value, 289 uint16 *duplicate = NULL); 290 status_t Find(const uint8 *key, uint16 keyLength); 291 292 status_t Rewind(); 293 status_t GetNextEntry(void *key, uint16 *keyLength, uint16 maxLength, 294 off_t *value, uint16 *duplicate = NULL); 295 status_t GetPreviousEntry(void *key, uint16 *keyLength, 296 uint16 maxLength, off_t *value, 297 uint16 *duplicate = NULL); 298 void SkipDuplicates(); 299 300 #ifdef DEBUG 301 void Dump(); 302 #endif 303 304 private: 305 BPlusTree *fTree; 306 307 off_t fCurrentNodeOffset; // traverse position 308 int32 fCurrentKey; 309 off_t fDuplicateNode; 310 uint16 fDuplicate, fNumDuplicates; 311 bool fIsFragment; 312 313 private: 314 friend class Chain<TreeIterator>; 315 friend class BPlusTree; 316 317 void Update(off_t offset, off_t nextOffset, uint16 keyIndex, 318 uint16 splitAt, int8 change); 319 void Stop(); 320 TreeIterator *fNext; 321 }; 322 323 324 // #pragma mark - BPlusTree's inline functions 325 // (most of them may not be needed) 326 327 328 inline status_t 329 BPlusTree::Remove(Transaction &transaction, const char *key, off_t value) 330 { 331 if (fHeader->data_type != BPLUSTREE_STRING_TYPE) 332 return B_BAD_TYPE; 333 return Remove(transaction, (uint8 *)key, strlen(key), value); 334 } 335 336 inline status_t 337 BPlusTree::Insert(Transaction &transaction, const char *key, off_t value) 338 { 339 if (fHeader->data_type != BPLUSTREE_STRING_TYPE) 340 return B_BAD_TYPE; 341 return Insert(transaction, (uint8 *)key, strlen(key), value); 342 } 343 344 inline status_t 345 BPlusTree::Insert(Transaction &transaction, int32 key, off_t value) 346 { 347 if (fHeader->data_type != BPLUSTREE_INT32_TYPE) 348 return B_BAD_TYPE; 349 return Insert(transaction, (uint8 *)&key, sizeof(key), value); 350 } 351 352 inline status_t 353 BPlusTree::Insert(Transaction &transaction, uint32 key, off_t value) 354 { 355 if (fHeader->data_type != BPLUSTREE_UINT32_TYPE) 356 return B_BAD_TYPE; 357 return Insert(transaction, (uint8 *)&key, sizeof(key), value); 358 } 359 360 inline status_t 361 BPlusTree::Insert(Transaction &transaction, int64 key, off_t value) 362 { 363 if (fHeader->data_type != BPLUSTREE_INT64_TYPE) 364 return B_BAD_TYPE; 365 return Insert(transaction, (uint8 *)&key, sizeof(key), value); 366 } 367 368 inline status_t 369 BPlusTree::Insert(Transaction &transaction, uint64 key, off_t value) 370 { 371 if (fHeader->data_type != BPLUSTREE_UINT64_TYPE) 372 return B_BAD_TYPE; 373 return Insert(transaction, (uint8 *)&key, sizeof(key), value); 374 } 375 376 inline status_t 377 BPlusTree::Insert(Transaction &transaction, float key, off_t value) 378 { 379 if (fHeader->data_type != BPLUSTREE_FLOAT_TYPE) 380 return B_BAD_TYPE; 381 return Insert(transaction, (uint8 *)&key, sizeof(key), value); 382 } 383 384 inline status_t 385 BPlusTree::Insert(Transaction &transaction, double key, off_t value) 386 { 387 if (fHeader->data_type != BPLUSTREE_DOUBLE_TYPE) 388 return B_BAD_TYPE; 389 return Insert(transaction, (uint8 *)&key, sizeof(key), value); 390 } 391 392 393 // #pragma mark - TreeIterator inline functions 394 395 396 inline status_t 397 TreeIterator::Rewind() 398 { 399 return Goto(BPLUSTREE_BEGIN); 400 } 401 402 inline status_t 403 TreeIterator::GetNextEntry(void *key, uint16 *keyLength, uint16 maxLength, 404 off_t *value, uint16 *duplicate) 405 { 406 return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value, duplicate); 407 } 408 409 inline status_t 410 TreeIterator::GetPreviousEntry(void *key, uint16 *keyLength, uint16 maxLength, 411 off_t *value, uint16 *duplicate) 412 { 413 return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value, duplicate); 414 } 415 416 417 // #pragma mark - bplustree_header inline functions 418 419 420 inline bool 421 bplustree_header::CheckNode(bplustree_node *node) const 422 { 423 // sanity checks (links, all_key_count) 424 return IsValidLink(node->LeftLink()) 425 && IsValidLink(node->RightLink()) 426 && IsValidLink(node->OverflowLink()) 427 && (int8 *)node->Values() + node->NumKeys() * sizeof(off_t) <= (int8 *)node + NodeSize(); 428 } 429 430 431 inline bool 432 bplustree_header::IsValidLink(off_t link) const 433 { 434 return link == BPLUSTREE_NULL || (link > 0 && link <= MaximumSize() - NodeSize()); 435 } 436 437 438 // #pragma mark - bplustree_node inline functions 439 440 441 inline uint16 * 442 bplustree_node::KeyLengths() const 443 { 444 return (uint16 *)(((char *)this) + round_up(sizeof(bplustree_node) 445 + AllKeyLength())); 446 } 447 448 449 inline off_t * 450 bplustree_node::Values() const 451 { 452 return (off_t *)((char *)KeyLengths() + NumKeys() * sizeof(uint16)); 453 } 454 455 456 inline uint8 * 457 bplustree_node::Keys() const 458 { 459 return (uint8 *)this + sizeof(bplustree_node); 460 } 461 462 463 inline int32 464 bplustree_node::Used() const 465 { 466 return round_up(sizeof(bplustree_node) + AllKeyLength()) + NumKeys() 467 * (sizeof(uint16) + sizeof(off_t)); 468 } 469 470 471 inline bool 472 bplustree_node::IsLeaf() const 473 { 474 return OverflowLink() == BPLUSTREE_NULL; 475 } 476 477 478 inline duplicate_array * 479 bplustree_node::FragmentAt(int8 index) const 480 { 481 return (duplicate_array *)((off_t *)this 482 + index * (NUM_FRAGMENT_VALUES + 1)); 483 } 484 485 486 inline duplicate_array * 487 bplustree_node::DuplicateArray() const 488 { 489 return (duplicate_array *)&this->overflow_link; 490 } 491 492 493 inline uint8 494 bplustree_node::LinkType(off_t link) 495 { 496 return *(uint64 *)&link >> 62; 497 } 498 499 500 inline off_t 501 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex) 502 { 503 return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL) 504 | (fragmentIndex & 0x3ff); 505 } 506 507 508 inline bool 509 bplustree_node::IsDuplicate(off_t link) 510 { 511 return (LinkType(link) 512 & (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0; 513 } 514 515 516 inline off_t 517 bplustree_node::FragmentOffset(off_t link) 518 { 519 return link & 0x3ffffffffffffc00LL; 520 } 521 522 523 inline uint32 524 bplustree_node::FragmentIndex(off_t link) 525 { 526 return (uint32)(link & 0x3ff); 527 } 528 529 530 inline uint32 531 bplustree_node::MaxFragments(uint32 nodeSize) 532 { 533 return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t)); 534 } 535 536 #endif // B_PLUS_TREE_H 537