1 /* BPlusTree - BFS B+Tree implementation 2 * 3 * Roughly based on 'btlib' written by Marcus J. Ranum - it shares 4 * no code but achieves binary compatibility with the on disk format. 5 * 6 * Copyright 2001-2005, Axel Dörfler, axeld@pinc-software.de. 7 * This file may be used under the terms of the MIT License. 8 */ 9 #ifndef B_PLUS_TREE_H 10 #define B_PLUS_TREE_H 11 12 13 #include "bfs.h" 14 #include "Journal.h" 15 #include "Chain.h" 16 17 #include <string.h> 18 19 20 //****************** on-disk structures ******************** 21 22 struct bplustree_node; 23 24 #define BPLUSTREE_NULL -1LL 25 #define BPLUSTREE_FREE -2LL 26 27 struct bplustree_header { 28 uint32 magic; 29 uint32 node_size; 30 uint32 max_number_of_levels; 31 uint32 data_type; 32 off_t root_node_pointer; 33 off_t free_node_pointer; 34 off_t maximum_size; 35 36 uint32 Magic() const { return BFS_ENDIAN_TO_HOST_INT32(magic); } 37 uint32 NodeSize() const { return BFS_ENDIAN_TO_HOST_INT32(node_size); } 38 uint32 DataType() const { return BFS_ENDIAN_TO_HOST_INT32(data_type); } 39 off_t RootNode() const { return BFS_ENDIAN_TO_HOST_INT64(root_node_pointer); } 40 off_t FreeNode() const { return BFS_ENDIAN_TO_HOST_INT64(free_node_pointer); } 41 off_t MaximumSize() const { return BFS_ENDIAN_TO_HOST_INT64(maximum_size); } 42 uint32 MaxNumberOfLevels() const { return BFS_ENDIAN_TO_HOST_INT32(max_number_of_levels); } 43 44 inline bool CheckNode(bplustree_node *node); 45 inline bool IsValidLink(off_t link); 46 } _PACKED; 47 48 #define BPLUSTREE_MAGIC 0x69f6c2e8 49 #define BPLUSTREE_NODE_SIZE 1024 50 #define BPLUSTREE_MAX_KEY_LENGTH 256 51 #define BPLUSTREE_MIN_KEY_LENGTH 1 52 53 enum bplustree_types { 54 BPLUSTREE_STRING_TYPE = 0, 55 BPLUSTREE_INT32_TYPE = 1, 56 BPLUSTREE_UINT32_TYPE = 2, 57 BPLUSTREE_INT64_TYPE = 3, 58 BPLUSTREE_UINT64_TYPE = 4, 59 BPLUSTREE_FLOAT_TYPE = 5, 60 BPLUSTREE_DOUBLE_TYPE = 6 61 }; 62 63 struct sorted_array; 64 typedef sorted_array duplicate_array; 65 66 struct bplustree_node { 67 off_t left_link; 68 off_t right_link; 69 off_t overflow_link; 70 uint16 all_key_count; 71 uint16 all_key_length; 72 73 off_t LeftLink() const { return BFS_ENDIAN_TO_HOST_INT64(left_link); } 74 off_t RightLink() const { return BFS_ENDIAN_TO_HOST_INT64(right_link); } 75 off_t OverflowLink() const { return BFS_ENDIAN_TO_HOST_INT64(overflow_link); } 76 uint16 NumKeys() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_count); } 77 uint16 AllKeyLength() const { return BFS_ENDIAN_TO_HOST_INT16(all_key_length); } 78 79 inline uint16 *KeyLengths() const; 80 inline off_t *Values() const; 81 inline uint8 *Keys() const; 82 inline int32 Used() const; 83 uint8 *KeyAt(int32 index, uint16 *keyLength) const; 84 85 inline bool IsLeaf() const; 86 87 void Initialize(); 88 uint8 CountDuplicates(off_t offset, bool isFragment) const; 89 off_t DuplicateAt(off_t offset, bool isFragment, int8 index) const; 90 uint32 FragmentsUsed(uint32 nodeSize); 91 inline duplicate_array *FragmentAt(int8 index); 92 inline duplicate_array *DuplicateArray(); 93 94 static inline uint8 LinkType(off_t link); 95 static inline off_t MakeLink(uint8 type, off_t link, uint32 fragmentIndex = 0); 96 static inline bool IsDuplicate(off_t link); 97 static inline off_t FragmentOffset(off_t link); 98 static inline uint32 FragmentIndex(off_t link); 99 static inline uint32 MaxFragments(uint32 nodeSize); 100 101 #ifdef DEBUG 102 status_t CheckIntegrity(uint32 nodeSize); 103 #endif 104 } _PACKED; 105 106 //#define BPLUSTREE_NODE 0 107 #define BPLUSTREE_DUPLICATE_NODE 2 108 #define BPLUSTREE_DUPLICATE_FRAGMENT 3 109 110 #define NUM_FRAGMENT_VALUES 7 111 #define NUM_DUPLICATE_VALUES 125 112 113 //************************************** 114 115 enum bplustree_traversing { 116 BPLUSTREE_FORWARD = 1, 117 BPLUSTREE_BACKWARD = -1, 118 119 BPLUSTREE_BEGIN = 0, 120 BPLUSTREE_END = 1 121 }; 122 123 124 //****************** in-memory structures ******************** 125 126 template<class T> class Stack; 127 class BPlusTree; 128 class TreeIterator; 129 class CachedNode; 130 class Inode; 131 132 // needed for searching (utilizing a stack) 133 struct node_and_key { 134 off_t nodeOffset; 135 uint16 keyIndex; 136 }; 137 138 139 //***** Cache handling ***** 140 141 class CachedNode { 142 public: 143 CachedNode(BPlusTree *tree) 144 : 145 fTree(tree), 146 fNode(NULL) 147 { 148 } 149 150 CachedNode(BPlusTree *tree, off_t offset, bool check = true) 151 : 152 fTree(tree), 153 fNode(NULL) 154 { 155 SetTo(offset, check); 156 } 157 158 ~CachedNode() 159 { 160 Unset(); 161 } 162 163 bplustree_node *SetTo(off_t offset, bool check = true); 164 bplustree_node *SetToWritable(Transaction &transaction, off_t offset, bool check = true); 165 bplustree_header *SetToWritableHeader(Transaction &transaction); 166 bplustree_header *SetToHeader(); 167 status_t MakeWritable(Transaction &transaction); 168 void UnsetUnchanged(Transaction &transaction); 169 void Unset(); 170 171 status_t Free(Transaction &transaction, off_t offset); 172 status_t Allocate(Transaction &transaction, bplustree_node **node, off_t *offset); 173 174 bool IsWritable() const { return fWritable; } 175 bplustree_node *Node() const { return fNode; } 176 177 protected: 178 bplustree_node *InternalSetTo(Transaction *transaction, off_t offset); 179 180 BPlusTree *fTree; 181 bplustree_node *fNode; 182 off_t fOffset; 183 off_t fBlockNumber; 184 bool fWritable; 185 }; 186 187 188 //******** B+tree class ********* 189 190 class BPlusTree { 191 public: 192 BPlusTree(Transaction &transaction, Inode *stream, int32 nodeSize = BPLUSTREE_NODE_SIZE); 193 BPlusTree(Inode *stream); 194 BPlusTree(); 195 ~BPlusTree(); 196 197 status_t SetTo(Transaction &transaction, Inode *stream, int32 nodeSize = BPLUSTREE_NODE_SIZE); 198 status_t SetTo(Inode *stream); 199 status_t SetStream(Inode *stream); 200 201 status_t InitCheck(); 202 status_t Validate(); 203 204 status_t Remove(Transaction &transaction, const uint8 *key, uint16 keyLength, off_t value); 205 status_t Insert(Transaction &transaction, const uint8 *key, uint16 keyLength, off_t value); 206 207 status_t Remove(Transaction &transaction, const char *key, off_t value); 208 status_t Insert(Transaction &transaction, const char *key, off_t value); 209 status_t Insert(Transaction &transaction, int32 key, off_t value); 210 status_t Insert(Transaction &transaction, uint32 key, off_t value); 211 status_t Insert(Transaction &transaction, int64 key, off_t value); 212 status_t Insert(Transaction &transaction, uint64 key, off_t value); 213 status_t Insert(Transaction &transaction, float key, off_t value); 214 status_t Insert(Transaction &transaction, double key, off_t value); 215 216 status_t Replace(Transaction &transaction, const uint8 *key, uint16 keyLength, off_t value); 217 status_t Find(const uint8 *key, uint16 keyLength, off_t *value); 218 219 static int32 TypeCodeToKeyType(type_code code); 220 static int32 ModeToKeyType(mode_t mode); 221 222 private: 223 BPlusTree(const BPlusTree &); 224 BPlusTree &operator=(const BPlusTree &); 225 // no implementation 226 227 int32 CompareKeys(const void *key1, int keylength1, const void *key2, int keylength2); 228 status_t FindKey(bplustree_node *node, const uint8 *key, uint16 keyLength, 229 uint16 *index = NULL, off_t *next = NULL); 230 status_t SeekDown(Stack<node_and_key> &stack, const uint8 *key, uint16 keyLength); 231 232 status_t FindFreeDuplicateFragment(bplustree_node *node, CachedNode &cached, 233 off_t *_offset, bplustree_node **_fragment, uint32 *_index); 234 status_t InsertDuplicate(Transaction &transaction, CachedNode &cached, 235 bplustree_node *node, uint16 index, off_t value); 236 void InsertKey(bplustree_node *node, uint16 index, uint8 *key, uint16 keyLength, 237 off_t value); 238 status_t SplitNode(bplustree_node *node, off_t nodeOffset, bplustree_node *other, 239 off_t otherOffset, uint16 *_keyIndex, uint8 *key, uint16 *_keyLength, 240 off_t *_value); 241 242 status_t RemoveDuplicate(Transaction &transaction, bplustree_node *node, 243 CachedNode &cached, uint16 keyIndex, off_t value); 244 void RemoveKey(bplustree_node *node, uint16 index); 245 246 void UpdateIterators(off_t offset, off_t nextOffset, uint16 keyIndex, 247 uint16 splitAt, int8 change); 248 void AddIterator(TreeIterator *iterator); 249 void RemoveIterator(TreeIterator *iterator); 250 251 private: 252 friend class TreeIterator; 253 friend class CachedNode; 254 255 Inode *fStream; 256 bplustree_header *fHeader; 257 CachedNode fCachedHeader; 258 int32 fNodeSize; 259 bool fAllowDuplicates; 260 status_t fStatus; 261 SimpleLock fIteratorLock; 262 Chain<TreeIterator> fIterators; 263 }; 264 265 266 //***** helper classes/functions ***** 267 268 extern int32 compareKeys(type_code type, const void *key1, int keyLength1, 269 const void *key2, int keyLength2); 270 271 class TreeIterator { 272 public: 273 TreeIterator(BPlusTree *tree); 274 ~TreeIterator(); 275 276 status_t Goto(int8 to); 277 status_t Traverse(int8 direction, void *key, uint16 *keyLength, uint16 maxLength, 278 off_t *value, uint16 *duplicate = NULL); 279 status_t Find(const uint8 *key, uint16 keyLength); 280 281 status_t Rewind(); 282 status_t GetNextEntry(void *key, uint16 *keyLength, uint16 maxLength, 283 off_t *value, uint16 *duplicate = NULL); 284 status_t GetPreviousEntry(void *key, uint16 *keyLength, uint16 maxLength, 285 off_t *value, uint16 *duplicate = NULL); 286 void SkipDuplicates(); 287 288 #ifdef DEBUG 289 void Dump(); 290 #endif 291 292 private: 293 BPlusTree *fTree; 294 295 off_t fCurrentNodeOffset; // traverse position 296 int32 fCurrentKey; 297 off_t fDuplicateNode; 298 uint16 fDuplicate, fNumDuplicates; 299 bool fIsFragment; 300 301 private: 302 friend class Chain<TreeIterator>; 303 friend class BPlusTree; 304 305 void Update(off_t offset, off_t nextOffset, uint16 keyIndex, uint16 splitAt, int8 change); 306 void Stop(); 307 TreeIterator *fNext; 308 }; 309 310 // BPlusTree's inline functions (most of them may not be needed) 311 312 inline status_t 313 BPlusTree::Remove(Transaction &transaction, const char *key, off_t value) 314 { 315 if (fHeader->data_type != BPLUSTREE_STRING_TYPE) 316 return B_BAD_TYPE; 317 return Remove(transaction, (uint8 *)key, strlen(key), value); 318 } 319 320 inline status_t 321 BPlusTree::Insert(Transaction &transaction, const char *key, off_t value) 322 { 323 if (fHeader->data_type != BPLUSTREE_STRING_TYPE) 324 return B_BAD_TYPE; 325 return Insert(transaction, (uint8 *)key, strlen(key), value); 326 } 327 328 inline status_t 329 BPlusTree::Insert(Transaction &transaction, int32 key, off_t value) 330 { 331 if (fHeader->data_type != BPLUSTREE_INT32_TYPE) 332 return B_BAD_TYPE; 333 return Insert(transaction, (uint8 *)&key, sizeof(key), value); 334 } 335 336 inline status_t 337 BPlusTree::Insert(Transaction &transaction, uint32 key, off_t value) 338 { 339 if (fHeader->data_type != BPLUSTREE_UINT32_TYPE) 340 return B_BAD_TYPE; 341 return Insert(transaction, (uint8 *)&key, sizeof(key), value); 342 } 343 344 inline status_t 345 BPlusTree::Insert(Transaction &transaction, int64 key, off_t value) 346 { 347 if (fHeader->data_type != BPLUSTREE_INT64_TYPE) 348 return B_BAD_TYPE; 349 return Insert(transaction, (uint8 *)&key, sizeof(key), value); 350 } 351 352 inline status_t 353 BPlusTree::Insert(Transaction &transaction, uint64 key, off_t value) 354 { 355 if (fHeader->data_type != BPLUSTREE_UINT64_TYPE) 356 return B_BAD_TYPE; 357 return Insert(transaction, (uint8 *)&key, sizeof(key), value); 358 } 359 360 inline status_t 361 BPlusTree::Insert(Transaction &transaction, float key, off_t value) 362 { 363 if (fHeader->data_type != BPLUSTREE_FLOAT_TYPE) 364 return B_BAD_TYPE; 365 return Insert(transaction, (uint8 *)&key, sizeof(key), value); 366 } 367 368 inline status_t 369 BPlusTree::Insert(Transaction &transaction, double key, off_t value) 370 { 371 if (fHeader->data_type != BPLUSTREE_DOUBLE_TYPE) 372 return B_BAD_TYPE; 373 return Insert(transaction, (uint8 *)&key, sizeof(key), value); 374 } 375 376 377 /************************ TreeIterator inline functions ************************/ 378 // #pragma mark - 379 380 inline status_t 381 TreeIterator::Rewind() 382 { 383 return Goto(BPLUSTREE_BEGIN); 384 } 385 386 inline status_t 387 TreeIterator::GetNextEntry(void *key, uint16 *keyLength, uint16 maxLength, 388 off_t *value, uint16 *duplicate) 389 { 390 return Traverse(BPLUSTREE_FORWARD, key, keyLength, maxLength, value, duplicate); 391 } 392 393 inline status_t 394 TreeIterator::GetPreviousEntry(void *key, uint16 *keyLength, uint16 maxLength, 395 off_t *value, uint16 *duplicate) 396 { 397 return Traverse(BPLUSTREE_BACKWARD, key, keyLength, maxLength, value, duplicate); 398 } 399 400 /************************ bplustree_header inline functions ************************/ 401 // #pragma mark - 402 403 404 inline bool 405 bplustree_header::CheckNode(bplustree_node *node) 406 { 407 // sanity checks (links, all_key_count) 408 return IsValidLink(node->LeftLink()) 409 && IsValidLink(node->RightLink()) 410 && IsValidLink(node->OverflowLink()) 411 && (int8 *)node->Values() + node->NumKeys() * sizeof(off_t) <= (int8 *)node + NodeSize(); 412 } 413 414 415 inline bool 416 bplustree_header::IsValidLink(off_t link) 417 { 418 return link == BPLUSTREE_NULL || (link > 0 && link <= MaximumSize() - NodeSize()); 419 } 420 421 422 /************************ bplustree_node inline functions ************************/ 423 // #pragma mark - 424 425 426 inline uint16 * 427 bplustree_node::KeyLengths() const 428 { 429 return (uint16 *)(((char *)this) + round_up(sizeof(bplustree_node) + AllKeyLength())); 430 } 431 432 433 inline off_t * 434 bplustree_node::Values() const 435 { 436 return (off_t *)((char *)KeyLengths() + NumKeys() * sizeof(uint16)); 437 } 438 439 440 inline uint8 * 441 bplustree_node::Keys() const 442 { 443 return (uint8 *)this + sizeof(bplustree_node); 444 } 445 446 447 inline int32 448 bplustree_node::Used() const 449 { 450 return round_up(sizeof(bplustree_node) + AllKeyLength()) + NumKeys() * (sizeof(uint16) + sizeof(off_t)); 451 } 452 453 454 inline bool 455 bplustree_node::IsLeaf() const 456 { 457 return OverflowLink() == BPLUSTREE_NULL; 458 } 459 460 461 inline duplicate_array * 462 bplustree_node::FragmentAt(int8 index) 463 { 464 return (duplicate_array *)((off_t *)this + index * (NUM_FRAGMENT_VALUES + 1)); 465 } 466 467 468 inline duplicate_array * 469 bplustree_node::DuplicateArray() 470 { 471 return (duplicate_array *)&this->overflow_link; 472 } 473 474 475 inline uint8 476 bplustree_node::LinkType(off_t link) 477 { 478 return *(uint64 *)&link >> 62; 479 } 480 481 482 inline off_t 483 bplustree_node::MakeLink(uint8 type, off_t link, uint32 fragmentIndex) 484 { 485 return ((off_t)type << 62) | (link & 0x3ffffffffffffc00LL) | (fragmentIndex & 0x3ff); 486 } 487 488 489 inline bool 490 bplustree_node::IsDuplicate(off_t link) 491 { 492 return (LinkType(link) & (BPLUSTREE_DUPLICATE_NODE | BPLUSTREE_DUPLICATE_FRAGMENT)) > 0; 493 } 494 495 496 inline off_t 497 bplustree_node::FragmentOffset(off_t link) 498 { 499 return link & 0x3ffffffffffffc00LL; 500 } 501 502 503 inline uint32 504 bplustree_node::FragmentIndex(off_t link) 505 { 506 return (uint32)(link & 0x3ff); 507 } 508 509 510 inline uint32 511 bplustree_node::MaxFragments(uint32 nodeSize) 512 { 513 return nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t)); 514 } 515 516 #endif /* B_PLUS_TREE_H */ 517