1 /* BPlusTree - BFS B+Tree implementation 2 ** 3 ** Initial version by Axel Dörfler, axeld@pinc-software.de 4 ** Roughly based on 'btlib' written by Marcus J. Ranum 5 ** 6 ** Copyright (c) 2001-2002 pinc Software. All Rights Reserved. 7 ** This file may be used under the terms of the OpenBeOS License. 8 */ 9 10 11 #include "Debug.h" 12 #include "cpp.h" 13 #include "BPlusTree.h" 14 #include "Inode.h" 15 #include "Utility.h" 16 #include "Stack.h" 17 18 #include <TypeConstants.h> 19 20 #include <string.h> 21 #include <stdlib.h> 22 #include <stdio.h> 23 24 25 #ifdef DEBUG 26 class NodeChecker { 27 public: 28 NodeChecker(bplustree_node *node, int32 nodeSize) 29 : 30 fNode(node), 31 fSize(nodeSize) 32 { 33 } 34 35 ~NodeChecker() 36 { 37 fNode->CheckIntegrity(fSize); 38 } 39 40 private: 41 bplustree_node *fNode; 42 int32 fSize; 43 }; 44 #endif 45 46 // Node Caching for the BPlusTree class 47 // 48 // With write support, there is the need for a function that allocates new 49 // nodes by either returning empty nodes, or by growing the file's data stream 50 // 51 // !! The CachedNode class assumes that you have properly locked the stream 52 // !! before asking for nodes. 53 // 54 // Note: This code will fail if the block size is smaller than the node size! 55 // Since BFS supports block sizes of 1024 bytes or greater, and the node size 56 // is hard-coded to 1024 bytes, that's not an issue now. 57 58 void 59 CachedNode::Unset() 60 { 61 if (fTree == NULL || fTree->fStream == NULL) 62 return; 63 64 if (fBlock != NULL) { 65 release_block(fTree->fStream->GetVolume()->Device(),fBlockNumber); 66 67 fBlock = NULL; 68 fNode = NULL; 69 } 70 } 71 72 73 bplustree_node * 74 CachedNode::SetTo(off_t offset,bool check) 75 { 76 if (fTree == NULL || fTree->fStream == NULL) { 77 REPORT_ERROR(B_BAD_VALUE); 78 return NULL; 79 } 80 81 Unset(); 82 83 // You can only ask for nodes at valid positions - you can't 84 // even access the b+tree header with this method (use SetToHeader() 85 // instead) 86 if (offset > fTree->fHeader->MaximumSize() - fTree->fNodeSize 87 || offset <= 0 88 || (offset % fTree->fNodeSize) != 0) 89 return NULL; 90 91 if (InternalSetTo(offset) != NULL && check) { 92 // sanity checks (links, all_key_count) 93 bplustree_header *header = fTree->fHeader; 94 if (!header->IsValidLink(fNode->LeftLink()) 95 || !header->IsValidLink(fNode->RightLink()) 96 || !header->IsValidLink(fNode->OverflowLink()) 97 || (int8 *)fNode->Values() + fNode->NumKeys() * sizeof(off_t) > 98 (int8 *)fNode + fTree->fNodeSize) { 99 FATAL(("invalid node read from offset %Ld, inode at %Ld\n", 100 offset,fTree->fStream->ID())); 101 return NULL; 102 } 103 } 104 return fNode; 105 } 106 107 108 bplustree_header * 109 CachedNode::SetToHeader() 110 { 111 if (fTree == NULL || fTree->fStream == NULL) { 112 REPORT_ERROR(B_BAD_VALUE); 113 return NULL; 114 } 115 116 Unset(); 117 118 InternalSetTo(0LL); 119 return (bplustree_header *)fNode; 120 } 121 122 123 bplustree_node * 124 CachedNode::InternalSetTo(off_t offset) 125 { 126 fNode = NULL; 127 128 off_t fileOffset; 129 block_run run; 130 if (offset < fTree->fStream->Size() 131 && fTree->fStream->FindBlockRun(offset,run,fileOffset) == B_OK) { 132 Volume *volume = fTree->fStream->GetVolume(); 133 134 int32 blockOffset = (offset - fileOffset) / volume->BlockSize(); 135 fBlockNumber = volume->ToBlock(run) + blockOffset; 136 137 fBlock = (uint8 *)get_block(volume->Device(),fBlockNumber,volume->BlockSize()); 138 if (fBlock) { 139 // the node is somewhere in that block... (confusing offset calculation) 140 fNode = (bplustree_node *)(fBlock + offset - 141 (fileOffset + blockOffset * volume->BlockSize())); 142 } else 143 REPORT_ERROR(B_IO_ERROR); 144 } 145 return fNode; 146 } 147 148 149 status_t 150 CachedNode::Free(Transaction *transaction,off_t offset) 151 { 152 if (transaction == NULL || fTree == NULL || fTree->fStream == NULL 153 || offset == BPLUSTREE_NULL) 154 RETURN_ERROR(B_BAD_VALUE); 155 156 // ToDo: scan the free nodes list and remove all nodes at the end 157 // of the tree - perhaps that shouldn't be done everytime that 158 // function is called, perhaps it should be done when the directory 159 // inode is closed or based on some calculation or whatever... 160 161 // if the node is the last one in the tree, we shrink 162 // the tree and file size by one node 163 off_t lastOffset = fTree->fHeader->MaximumSize() - fTree->fNodeSize; 164 if (offset == lastOffset) { 165 fTree->fHeader->maximum_size = HOST_ENDIAN_TO_BFS_INT64(lastOffset); 166 167 status_t status = fTree->fStream->SetFileSize(transaction,lastOffset); 168 if (status < B_OK) 169 return status; 170 171 return fTree->fCachedHeader.WriteBack(transaction); 172 } 173 174 // add the node to the free nodes list 175 fNode->left_link = fTree->fHeader->free_node_pointer; 176 fNode->overflow_link = HOST_ENDIAN_TO_BFS_INT64((uint64)BPLUSTREE_FREE); 177 178 if (WriteBack(transaction) == B_OK) { 179 fTree->fHeader->free_node_pointer = HOST_ENDIAN_TO_BFS_INT64(offset); 180 return fTree->fCachedHeader.WriteBack(transaction); 181 } 182 return B_ERROR; 183 } 184 185 186 status_t 187 CachedNode::Allocate(Transaction *transaction, bplustree_node **_node, off_t *_offset) 188 { 189 if (transaction == NULL || fTree == NULL || fTree->fHeader == NULL 190 || fTree->fStream == NULL) { 191 RETURN_ERROR(B_BAD_VALUE); 192 } 193 194 status_t status; 195 196 // if there are any free nodes, recycle them 197 if (SetTo(fTree->fHeader->FreeNode(), false) != NULL) { 198 *_offset = fTree->fHeader->FreeNode(); 199 200 // set new free node pointer 201 fTree->fHeader->free_node_pointer = fNode->left_link; 202 if ((status = fTree->fCachedHeader.WriteBack(transaction)) == B_OK) { 203 fNode->Initialize(); 204 *_node = fNode; 205 return B_OK; 206 } 207 return status; 208 } 209 // allocate space for a new node 210 Inode *stream = fTree->fStream; 211 if ((status = stream->Append(transaction, fTree->fNodeSize)) < B_OK) 212 return status; 213 214 // the maximum_size has to be changed before the call to SetTo() - or 215 // else it will fail because the requested node is out of bounds 216 off_t offset = fTree->fHeader->MaximumSize(); 217 fTree->fHeader->maximum_size = HOST_ENDIAN_TO_BFS_INT64(fTree->fHeader->MaximumSize() + fTree->fNodeSize); 218 219 if (SetTo(offset, false) != NULL) { 220 *_offset = offset; 221 222 if (fTree->fCachedHeader.WriteBack(transaction) >= B_OK) { 223 fNode->Initialize(); 224 *_node = fNode; 225 return B_OK; 226 } 227 } 228 RETURN_ERROR(B_ERROR); 229 } 230 231 232 status_t 233 CachedNode::WriteBack(Transaction *transaction) 234 { 235 if (transaction == NULL || fTree == NULL || fTree->fStream == NULL || fNode == NULL) 236 RETURN_ERROR(B_BAD_VALUE); 237 238 return transaction->WriteBlocks(fBlockNumber, fBlock); 239 } 240 241 242 // #pragma mark - 243 244 245 BPlusTree::BPlusTree(Transaction *transaction,Inode *stream,int32 nodeSize) 246 : 247 fStream(NULL), 248 fHeader(NULL), 249 fCachedHeader(this) 250 { 251 SetTo(transaction,stream); 252 } 253 254 255 BPlusTree::BPlusTree(Inode *stream) 256 : 257 fStream(NULL), 258 fHeader(NULL), 259 fCachedHeader(this) 260 { 261 SetTo(stream); 262 } 263 264 265 BPlusTree::BPlusTree() 266 : 267 fStream(NULL), 268 fHeader(NULL), 269 fCachedHeader(this), 270 fNodeSize(BPLUSTREE_NODE_SIZE), 271 fAllowDuplicates(true), 272 fStatus(B_NO_INIT) 273 { 274 } 275 276 277 BPlusTree::~BPlusTree() 278 { 279 // if there are any TreeIterators left, we need to stop them 280 // (can happen when the tree's inode gets deleted while 281 // traversing the tree - a TreeIterator doesn't lock the inode) 282 if (fIteratorLock.Lock() < B_OK) 283 return; 284 285 TreeIterator *iterator = NULL; 286 while ((iterator = fIterators.Next(iterator)) != NULL) 287 iterator->Stop(); 288 289 fIteratorLock.Unlock(); 290 } 291 292 293 /** Create a new B+Tree on the specified stream */ 294 295 status_t 296 BPlusTree::SetTo(Transaction *transaction, Inode *stream, int32 nodeSize) 297 { 298 // initializes in-memory B+Tree 299 300 fCachedHeader.Unset(); 301 fStream = stream; 302 303 fHeader = fCachedHeader.SetToHeader(); 304 if (fHeader == NULL) { 305 // allocate space for new header + node! 306 fStatus = stream->SetFileSize(transaction, nodeSize * 2); 307 if (fStatus < B_OK) 308 RETURN_ERROR(fStatus); 309 310 fHeader = fCachedHeader.SetToHeader(); 311 if (fHeader == NULL) 312 RETURN_ERROR(fStatus = B_ERROR); 313 } 314 315 fAllowDuplicates = ((stream->Mode() & S_INDEX_DIR) == S_INDEX_DIR 316 && stream->BlockRun() != stream->Parent()) 317 || (stream->Mode() & S_ALLOW_DUPS) != 0; 318 319 fNodeSize = nodeSize; 320 321 // initialize b+tree header 322 fHeader->magic = HOST_ENDIAN_TO_BFS_INT32(BPLUSTREE_MAGIC); 323 fHeader->node_size = HOST_ENDIAN_TO_BFS_INT32(fNodeSize); 324 fHeader->max_number_of_levels = HOST_ENDIAN_TO_BFS_INT32(1); 325 fHeader->data_type = HOST_ENDIAN_TO_BFS_INT32(ModeToKeyType(stream->Mode())); 326 fHeader->root_node_pointer = HOST_ENDIAN_TO_BFS_INT64(nodeSize); 327 fHeader->free_node_pointer = HOST_ENDIAN_TO_BFS_INT64((uint64)BPLUSTREE_NULL); 328 fHeader->maximum_size = HOST_ENDIAN_TO_BFS_INT64(nodeSize * 2); 329 330 if (fCachedHeader.WriteBack(transaction) < B_OK) 331 RETURN_ERROR(fStatus = B_ERROR); 332 333 // initialize b+tree root node 334 CachedNode cached(this, fHeader->RootNode(), false); 335 if (cached.Node() == NULL) 336 RETURN_ERROR(B_ERROR); 337 338 cached.Node()->Initialize(); 339 return fStatus = cached.WriteBack(transaction); 340 } 341 342 343 status_t 344 BPlusTree::SetTo(Inode *stream) 345 { 346 if (stream == NULL || stream->Node() == NULL) 347 RETURN_ERROR(fStatus = B_BAD_VALUE); 348 349 // get on-disk B+Tree header 350 351 fCachedHeader.Unset(); 352 fStream = stream; 353 354 fHeader = fCachedHeader.SetToHeader(); 355 if (fHeader == NULL) 356 RETURN_ERROR(fStatus = B_NO_INIT); 357 358 // is header valid? 359 360 if (fHeader->Magic() != BPLUSTREE_MAGIC 361 || fHeader->MaximumSize() != stream->Size() 362 || (fHeader->RootNode() % fHeader->NodeSize()) != 0 363 || !fHeader->IsValidLink(fHeader->RootNode()) 364 || !fHeader->IsValidLink(fHeader->FreeNode())) 365 RETURN_ERROR(fStatus = B_BAD_DATA); 366 367 fNodeSize = fHeader->NodeSize(); 368 369 { 370 uint32 toMode[] = {S_STR_INDEX, S_INT_INDEX, S_UINT_INDEX, S_LONG_LONG_INDEX, 371 S_ULONG_LONG_INDEX, S_FLOAT_INDEX, S_DOUBLE_INDEX}; 372 uint32 mode = stream->Mode() & (S_STR_INDEX | S_INT_INDEX | S_UINT_INDEX | S_LONG_LONG_INDEX 373 | S_ULONG_LONG_INDEX | S_FLOAT_INDEX | S_DOUBLE_INDEX); 374 375 if (fHeader->DataType() > BPLUSTREE_DOUBLE_TYPE 376 || (stream->Mode() & S_INDEX_DIR) && toMode[fHeader->DataType()] != mode 377 || !stream->IsContainer()) { 378 D( dump_bplustree_header(fHeader); 379 dump_inode(stream->Node()); 380 ); 381 RETURN_ERROR(fStatus = B_BAD_TYPE); 382 } 383 384 // although it's in stat.h, the S_ALLOW_DUPS flag is obviously unused 385 // in the original BFS code - we will honour it nevertheless 386 fAllowDuplicates = ((stream->Mode() & S_INDEX_DIR) == S_INDEX_DIR 387 && stream->BlockRun() != stream->Parent()) 388 || (stream->Mode() & S_ALLOW_DUPS) != 0; 389 } 390 391 CachedNode cached(this, fHeader->RootNode()); 392 RETURN_ERROR(fStatus = cached.Node() ? B_OK : B_BAD_DATA); 393 } 394 395 396 status_t 397 BPlusTree::InitCheck() 398 { 399 return fStatus; 400 } 401 402 403 int32 404 BPlusTree::TypeCodeToKeyType(type_code code) 405 { 406 switch (code) { 407 case B_STRING_TYPE: 408 return BPLUSTREE_STRING_TYPE; 409 case B_SSIZE_T_TYPE: 410 case B_INT32_TYPE: 411 return BPLUSTREE_INT32_TYPE; 412 case B_SIZE_T_TYPE: 413 case B_UINT32_TYPE: 414 return BPLUSTREE_UINT32_TYPE; 415 case B_OFF_T_TYPE: 416 case B_INT64_TYPE: 417 return BPLUSTREE_INT64_TYPE; 418 case B_UINT64_TYPE: 419 return BPLUSTREE_UINT64_TYPE; 420 case B_FLOAT_TYPE: 421 return BPLUSTREE_FLOAT_TYPE; 422 case B_DOUBLE_TYPE: 423 return BPLUSTREE_DOUBLE_TYPE; 424 } 425 return -1; 426 } 427 428 429 int32 430 BPlusTree::ModeToKeyType(mode_t mode) 431 { 432 switch (mode & (S_STR_INDEX | S_INT_INDEX | S_UINT_INDEX | S_LONG_LONG_INDEX 433 | S_ULONG_LONG_INDEX | S_FLOAT_INDEX | S_DOUBLE_INDEX)) { 434 case S_INT_INDEX: 435 return BPLUSTREE_INT32_TYPE; 436 case S_UINT_INDEX: 437 return BPLUSTREE_UINT32_TYPE; 438 case S_LONG_LONG_INDEX: 439 return BPLUSTREE_INT64_TYPE; 440 case S_ULONG_LONG_INDEX: 441 return BPLUSTREE_UINT64_TYPE; 442 case S_FLOAT_INDEX: 443 return BPLUSTREE_FLOAT_TYPE; 444 case S_DOUBLE_INDEX: 445 return BPLUSTREE_DOUBLE_TYPE; 446 case S_STR_INDEX: 447 default: 448 // default is for standard directories 449 return BPLUSTREE_STRING_TYPE; 450 } 451 } 452 453 454 // #pragma mark - 455 456 457 void 458 BPlusTree::UpdateIterators(off_t offset, off_t nextOffset, uint16 keyIndex, uint16 splitAt, 459 int8 change) 460 { 461 // Although every iterator which is affected by this update currently 462 // waits on a semaphore, other iterators could be added/removed at 463 // any time, so we need to protect this loop 464 if (fIteratorLock.Lock() < B_OK) 465 return; 466 467 TreeIterator *iterator = NULL; 468 while ((iterator = fIterators.Next(iterator)) != NULL) 469 iterator->Update(offset, nextOffset, keyIndex, splitAt, change); 470 471 fIteratorLock.Unlock(); 472 } 473 474 475 void 476 BPlusTree::AddIterator(TreeIterator *iterator) 477 { 478 if (fIteratorLock.Lock() < B_OK) 479 return; 480 481 fIterators.Add(iterator); 482 483 fIteratorLock.Unlock(); 484 } 485 486 487 void 488 BPlusTree::RemoveIterator(TreeIterator *iterator) 489 { 490 if (fIteratorLock.Lock() < B_OK) 491 return; 492 493 fIterators.Remove(iterator); 494 495 fIteratorLock.Unlock(); 496 } 497 498 499 int32 500 BPlusTree::CompareKeys(const void *key1, int keyLength1, const void *key2, int keyLength2) 501 { 502 type_code type = 0; 503 switch (fHeader->data_type) 504 { 505 case BPLUSTREE_STRING_TYPE: 506 type = B_STRING_TYPE; 507 break; 508 case BPLUSTREE_INT32_TYPE: 509 type = B_INT32_TYPE; 510 break; 511 case BPLUSTREE_UINT32_TYPE: 512 type = B_UINT32_TYPE; 513 break; 514 case BPLUSTREE_INT64_TYPE: 515 type = B_INT64_TYPE; 516 break; 517 case BPLUSTREE_UINT64_TYPE: 518 type = B_UINT64_TYPE; 519 break; 520 case BPLUSTREE_FLOAT_TYPE: 521 type = B_FLOAT_TYPE; 522 break; 523 case BPLUSTREE_DOUBLE_TYPE: 524 type = B_DOUBLE_TYPE; 525 break; 526 } 527 return compareKeys(type, key1, keyLength1, key2, keyLength2); 528 } 529 530 531 status_t 532 BPlusTree::FindKey(bplustree_node *node, const uint8 *key, uint16 keyLength, uint16 *index, 533 off_t *next) 534 { 535 if (node->all_key_count == 0) { 536 if (index) 537 *index = 0; 538 if (next) 539 *next = node->OverflowLink(); 540 return B_ENTRY_NOT_FOUND; 541 } 542 543 off_t *values = node->Values(); 544 int16 saveIndex = -1; 545 546 // binary search in the key array 547 for (int16 first = 0, last = node->NumKeys() - 1; first <= last;) { 548 uint16 i = (first + last) >> 1; 549 550 uint16 searchLength; 551 uint8 *searchKey = node->KeyAt(i, &searchLength); 552 if (searchKey + searchLength + sizeof(off_t) + sizeof(uint16) > (uint8 *)node + fNodeSize 553 || searchLength > BPLUSTREE_MAX_KEY_LENGTH) { 554 fStream->GetVolume()->Panic(); 555 RETURN_ERROR(B_BAD_DATA); 556 } 557 558 int32 cmp = CompareKeys(key, keyLength, searchKey, searchLength); 559 if (cmp < 0) { 560 last = i - 1; 561 saveIndex = i; 562 } else if (cmp > 0) { 563 saveIndex = first = i + 1; 564 } else { 565 if (index) 566 *index = i; 567 if (next) 568 *next = BFS_ENDIAN_TO_HOST_INT64(values[i]); 569 return B_OK; 570 } 571 } 572 573 if (index) 574 *index = saveIndex; 575 if (next) { 576 if (saveIndex == node->NumKeys()) 577 *next = node->OverflowLink(); 578 else 579 *next = BFS_ENDIAN_TO_HOST_INT64(values[saveIndex]); 580 } 581 return B_ENTRY_NOT_FOUND; 582 } 583 584 585 /** Prepares the stack to contain all nodes that were passed while 586 * following the key, from the root node to the leaf node that could 587 * or should contain that key. 588 */ 589 590 status_t 591 BPlusTree::SeekDown(Stack<node_and_key> &stack, const uint8 *key, uint16 keyLength) 592 { 593 // set the root node to begin with 594 node_and_key nodeAndKey; 595 nodeAndKey.nodeOffset = fHeader->RootNode(); 596 597 CachedNode cached(this); 598 bplustree_node *node; 599 while ((node = cached.SetTo(nodeAndKey.nodeOffset)) != NULL) { 600 // if we are already on leaf level, we're done 601 if (node->OverflowLink() == BPLUSTREE_NULL) { 602 // node that the keyIndex is not properly set here (but it's not 603 // needed in the calling functions anyway)! 604 nodeAndKey.keyIndex = 0; 605 stack.Push(nodeAndKey); 606 return B_OK; 607 } 608 609 off_t nextOffset; 610 status_t status = FindKey(node, key, keyLength, &nodeAndKey.keyIndex, &nextOffset); 611 612 if (status == B_ENTRY_NOT_FOUND && nextOffset == nodeAndKey.nodeOffset) 613 RETURN_ERROR(B_ERROR); 614 615 // put the node offset & the correct keyIndex on the stack 616 stack.Push(nodeAndKey); 617 618 nodeAndKey.nodeOffset = nextOffset; 619 } 620 RETURN_ERROR(B_ERROR); 621 } 622 623 624 status_t 625 BPlusTree::FindFreeDuplicateFragment(bplustree_node *node, CachedNode *cached, off_t *_offset, 626 bplustree_node **_fragment, uint32 *_index) 627 { 628 off_t *values = node->Values(); 629 for (int32 i = 0; i < node->NumKeys(); i++) { 630 // does the value link to a duplicate fragment? 631 if (bplustree_node::LinkType(values[i]) != BPLUSTREE_DUPLICATE_FRAGMENT) 632 continue; 633 634 bplustree_node *fragment = cached->SetTo(bplustree_node::FragmentOffset(values[i]), false); 635 if (fragment == NULL) { 636 FATAL(("Could not get duplicate fragment at %Ld\n",values[i])); 637 continue; 638 } 639 640 // see if there is some space left for us 641 int32 num = (fNodeSize >> 3) / (NUM_FRAGMENT_VALUES + 1); 642 for (int32 j = 0;j < num;j++) { 643 duplicate_array *array = fragment->FragmentAt(j); 644 645 if (array->count == 0) { 646 *_offset = bplustree_node::FragmentOffset(values[i]); 647 *_fragment = fragment; 648 *_index = j; 649 return B_OK; 650 } 651 } 652 } 653 return B_ENTRY_NOT_FOUND; 654 } 655 656 657 status_t 658 BPlusTree::InsertDuplicate(Transaction *transaction, CachedNode *cached, bplustree_node *node, 659 uint16 index, off_t value) 660 { 661 CachedNode cachedDuplicate(this); 662 off_t *values = node->Values(); 663 off_t oldValue = values[index]; 664 status_t status; 665 off_t offset; 666 667 if (bplustree_node::IsDuplicate(oldValue)) { 668 // 669 // If it's a duplicate fragment, try to insert it into that, or if it 670 // doesn't fit anymore, create a new duplicate node 671 // 672 if (bplustree_node::LinkType(oldValue) == BPLUSTREE_DUPLICATE_FRAGMENT) { 673 bplustree_node *duplicate = cachedDuplicate.SetTo(bplustree_node::FragmentOffset(oldValue),false); 674 if (duplicate == NULL) 675 return B_IO_ERROR; 676 677 duplicate_array *array = duplicate->FragmentAt(bplustree_node::FragmentIndex(oldValue)); 678 if (array->count > NUM_FRAGMENT_VALUES 679 || array->count < 1) { 680 FATAL(("insertDuplicate: Invalid array[%ld] size in fragment %Ld == %Ld!\n",bplustree_node::FragmentIndex(oldValue),bplustree_node::FragmentOffset(oldValue),array->count)); 681 return B_BAD_DATA; 682 } 683 684 if (array->count < NUM_FRAGMENT_VALUES) { 685 array->Insert(value); 686 } else { 687 // test if the fragment will be empty if we remove this key's values 688 if (duplicate->FragmentsUsed(fNodeSize) < 2) { 689 // the node will be empty without our values, so let us 690 // reuse it as a duplicate node 691 offset = bplustree_node::FragmentOffset(oldValue); 692 693 memmove(duplicate->DuplicateArray(), array, (NUM_FRAGMENT_VALUES + 1) * sizeof(off_t)); 694 duplicate->left_link = duplicate->right_link = HOST_ENDIAN_TO_BFS_INT64((uint64)BPLUSTREE_NULL); 695 696 array = duplicate->DuplicateArray(); 697 array->Insert(value); 698 } else { 699 // create a new duplicate node 700 CachedNode cachedNewDuplicate(this); 701 bplustree_node *newDuplicate; 702 status = cachedNewDuplicate.Allocate(transaction, &newDuplicate, &offset); 703 if (status < B_OK) 704 return status; 705 706 // copy the array from the fragment node to the duplicate node 707 // and free the old entry (by zero'ing all values) 708 newDuplicate->overflow_link = HOST_ENDIAN_TO_BFS_INT64(array->count); 709 memcpy(&newDuplicate->all_key_count, &array->values[0], 710 array->count * sizeof(off_t)); 711 memset(array,0,(NUM_FRAGMENT_VALUES + 1) * sizeof(off_t)); 712 713 array = newDuplicate->DuplicateArray(); 714 array->Insert(value); 715 716 // if this fails, the old fragments node will contain wrong 717 // data... (but since it couldn't be written, it shouldn't 718 // be fatal) 719 if ((status = cachedNewDuplicate.WriteBack(transaction)) < B_OK) 720 return status; 721 } 722 723 // update the main pointer to link to a duplicate node 724 values[index] = bplustree_node::MakeLink(BPLUSTREE_DUPLICATE_NODE, offset); 725 if ((status = cached->WriteBack(transaction)) < B_OK) 726 return status; 727 } 728 729 return cachedDuplicate.WriteBack(transaction); 730 } 731 732 // 733 // Put the value into a dedicated duplicate node 734 // 735 736 // search for free space in the duplicate nodes of that key 737 duplicate_array *array; 738 bplustree_node *duplicate; 739 off_t duplicateOffset; 740 do { 741 duplicateOffset = bplustree_node::FragmentOffset(oldValue); 742 duplicate = cachedDuplicate.SetTo(duplicateOffset, false); 743 if (duplicate == NULL) 744 return B_IO_ERROR; 745 746 array = duplicate->DuplicateArray(); 747 if (array->count > NUM_DUPLICATE_VALUES || array->count < 0) { 748 FATAL(("removeDuplicate: Invalid array size in duplicate %Ld == %Ld!\n", 749 duplicateOffset,array->count)); 750 return B_BAD_DATA; 751 } 752 } while (array->count >= NUM_DUPLICATE_VALUES 753 && (oldValue = duplicate->RightLink()) != BPLUSTREE_NULL); 754 755 if (array->count < NUM_DUPLICATE_VALUES) { 756 array->Insert(value); 757 } else { 758 // no space left - add a new duplicate node 759 760 CachedNode cachedNewDuplicate(this); 761 bplustree_node *newDuplicate; 762 status = cachedNewDuplicate.Allocate(transaction, &newDuplicate, &offset); 763 if (status < B_OK) 764 return status; 765 766 // link the two nodes together 767 duplicate->right_link = HOST_ENDIAN_TO_BFS_INT64(offset); 768 newDuplicate->left_link = HOST_ENDIAN_TO_BFS_INT64(duplicateOffset); 769 770 array = newDuplicate->DuplicateArray(); 771 array->count = 0; 772 array->Insert(value); 773 774 status = cachedNewDuplicate.WriteBack(transaction); 775 if (status < B_OK) 776 return status; 777 } 778 return cachedDuplicate.WriteBack(transaction); 779 } 780 781 // 782 // Search for a free duplicate fragment or create a new one 783 // to insert the duplicate value into 784 // 785 786 uint32 fragmentIndex = 0; 787 bplustree_node *fragment; 788 if (FindFreeDuplicateFragment(node, &cachedDuplicate, &offset, &fragment, &fragmentIndex) < B_OK) { 789 // allocate a new duplicate fragment node 790 if ((status = cachedDuplicate.Allocate(transaction, &fragment, &offset)) < B_OK) 791 return status; 792 793 memset(fragment, 0, fNodeSize); 794 } 795 duplicate_array *array = fragment->FragmentAt(fragmentIndex); 796 array->Insert(oldValue); 797 array->Insert(value); 798 799 if ((status = cachedDuplicate.WriteBack(transaction)) < B_OK) 800 return status; 801 802 values[index] = bplustree_node::MakeLink(BPLUSTREE_DUPLICATE_FRAGMENT, offset, fragmentIndex); 803 804 return cached->WriteBack(transaction); 805 } 806 807 808 void 809 BPlusTree::InsertKey(bplustree_node *node, uint16 index, uint8 *key, uint16 keyLength, 810 off_t value) 811 { 812 // should never happen, but who knows? 813 if (index > node->NumKeys()) 814 return; 815 816 off_t *values = node->Values(); 817 uint16 *keyLengths = node->KeyLengths(); 818 uint8 *keys = node->Keys(); 819 820 node->all_key_count = HOST_ENDIAN_TO_BFS_INT16(node->NumKeys() + 1); 821 node->all_key_length = HOST_ENDIAN_TO_BFS_INT16(node->AllKeyLength() + keyLength); 822 823 off_t *newValues = node->Values(); 824 uint16 *newKeyLengths = node->KeyLengths(); 825 826 // move values and copy new value into them 827 memmove(newValues + index + 1, values + index, sizeof(off_t) * (node->NumKeys() - 1 - index)); 828 memmove(newValues, values, sizeof(off_t) * index); 829 830 newValues[index] = value; 831 832 // move and update key length index 833 for (uint16 i = node->NumKeys(); i-- > index + 1;) 834 newKeyLengths[i] = keyLengths[i - 1] + keyLength; 835 memmove(newKeyLengths,keyLengths,sizeof(uint16) * index); 836 837 int32 keyStart; 838 newKeyLengths[index] = keyLength + (keyStart = index > 0 ? newKeyLengths[index - 1] : 0); 839 840 // move keys and copy new key into them 841 int32 size = node->AllKeyLength() - newKeyLengths[index]; 842 if (size > 0) 843 memmove(keys + newKeyLengths[index],keys + newKeyLengths[index] - keyLength,size); 844 845 memcpy(keys + keyStart, key, keyLength); 846 } 847 848 849 status_t 850 BPlusTree::SplitNode(bplustree_node *node,off_t nodeOffset,bplustree_node *other, 851 off_t otherOffset, uint16 *_keyIndex, uint8 *key, uint16 *_keyLength, off_t *_value) 852 { 853 if (*_keyIndex > node->NumKeys() + 1) 854 return B_BAD_VALUE; 855 856 uint16 *inKeyLengths = node->KeyLengths(); 857 off_t *inKeyValues = node->Values(); 858 uint8 *inKeys = node->Keys(); 859 uint8 *outKeys = other->Keys(); 860 int32 keyIndex = *_keyIndex; // can become less than zero! 861 862 // how many keys will fit in one (half) page? 863 // that loop will find the answer to this question and 864 // change the key lengths indices for their new home 865 866 // "bytes" is the number of bytes written for the new key, 867 // "bytesBefore" are the bytes before that key 868 // "bytesAfter" are the bytes after the new key, if any 869 int32 bytes = 0, bytesBefore = 0, bytesAfter = 0; 870 871 size_t size = fNodeSize >> 1; 872 int32 out,in; 873 for (in = out = 0; in < node->NumKeys() + 1;) { 874 if (!bytes) 875 bytesBefore = in > 0 ? inKeyLengths[in - 1] : 0; 876 877 if (in == keyIndex && !bytes) { 878 bytes = *_keyLength; 879 } else { 880 if (keyIndex < out) 881 bytesAfter = inKeyLengths[in] - bytesBefore; 882 883 in++; 884 } 885 out++; 886 887 if (round_up(sizeof(bplustree_node) + bytesBefore + bytesAfter + bytes) + 888 out * (sizeof(uint16) + sizeof(off_t)) >= size) { 889 // we have found the number of keys in the new node! 890 break; 891 } 892 } 893 894 // if the new key was not inserted, set the length of the keys 895 // that can be copied directly 896 if (keyIndex >= out && in > 0) 897 bytesBefore = inKeyLengths[in - 1]; 898 899 if (bytesBefore < 0 || bytesAfter < 0) 900 return B_BAD_DATA; 901 902 other->left_link = node->left_link; 903 other->right_link = HOST_ENDIAN_TO_BFS_INT64(nodeOffset); 904 other->all_key_length = HOST_ENDIAN_TO_BFS_INT16(bytes + bytesBefore + bytesAfter); 905 other->all_key_count = HOST_ENDIAN_TO_BFS_INT16(out); 906 907 uint16 *outKeyLengths = other->KeyLengths(); 908 off_t *outKeyValues = other->Values(); 909 int32 keys = out > keyIndex ? keyIndex : out; 910 911 if (bytesBefore) { 912 // copy the keys 913 memcpy(outKeys, inKeys, bytesBefore); 914 memcpy(outKeyLengths, inKeyLengths, keys * sizeof(uint16)); 915 memcpy(outKeyValues, inKeyValues, keys * sizeof(off_t)); 916 } 917 if (bytes) { 918 // copy the newly inserted key 919 memcpy(outKeys + bytesBefore, key, bytes); 920 outKeyLengths[keyIndex] = bytes + bytesBefore; 921 outKeyValues[keyIndex] = *_value; 922 923 if (bytesAfter) { 924 // copy the keys after the new key 925 memcpy(outKeys + bytesBefore + bytes, inKeys + bytesBefore, bytesAfter); 926 keys = out - keyIndex - 1; 927 for (int32 i = 0;i < keys;i++) 928 outKeyLengths[keyIndex + i + 1] = inKeyLengths[keyIndex + i] + bytes; 929 memcpy(outKeyValues + keyIndex + 1, inKeyValues + keyIndex, keys * sizeof(off_t)); 930 } 931 } 932 933 // if the new key was already inserted, we shouldn't use it again 934 if (in != out) 935 keyIndex--; 936 937 int32 total = bytesBefore + bytesAfter; 938 939 // these variables are for the key that will be returned 940 // to the parent node 941 uint8 *newKey = NULL; 942 uint16 newLength; 943 bool newAllocated = false; 944 945 // If we have split an index node, we have to drop the first key 946 // of the next node (which can also be the new key to insert). 947 // The dropped key is also the one which has to be inserted in 948 // the parent node, so we will set the "newKey" already here. 949 if (node->OverflowLink() != BPLUSTREE_NULL) { 950 if (in == keyIndex) { 951 newKey = key; 952 newLength = *_keyLength; 953 954 other->overflow_link = HOST_ENDIAN_TO_BFS_INT64(*_value); 955 keyIndex--; 956 } else { 957 // If a key is dropped (is not the new key), we have to copy 958 // it, because it would be lost if not. 959 uint8 *droppedKey = node->KeyAt(in,&newLength); 960 if (droppedKey + newLength + sizeof(off_t) + sizeof(uint16) > (uint8 *)node + fNodeSize 961 || newLength > BPLUSTREE_MAX_KEY_LENGTH) { 962 fStream->GetVolume()->Panic(); 963 RETURN_ERROR(B_BAD_DATA); 964 } 965 newKey = (uint8 *)malloc(newLength); 966 if (newKey == NULL) 967 return B_NO_MEMORY; 968 memcpy(newKey, droppedKey, newLength); 969 970 other->overflow_link = inKeyValues[in]; 971 total = inKeyLengths[in++]; 972 } 973 } 974 975 // and now the same game for the other page and the rest of the keys 976 // (but with memmove() instead of memcpy(), because they may overlap) 977 978 bytesBefore = bytesAfter = bytes = 0; 979 out = 0; 980 int32 skip = in; 981 while (in < node->NumKeys() + 1) { 982 if (in == keyIndex && !bytes) { 983 // it's enough to set bytesBefore once here, because we do 984 // not need to know the exact length of all keys in this 985 // loop 986 bytesBefore = in > skip ? inKeyLengths[in - 1] : 0; 987 bytes = *_keyLength; 988 } else { 989 if (in < node->NumKeys()) { 990 inKeyLengths[in] -= total; 991 if (bytes) { 992 inKeyLengths[in] += bytes; 993 bytesAfter = inKeyLengths[in] - bytesBefore - bytes; 994 } 995 } 996 in++; 997 } 998 999 out++; 1000 1001 // break out when all keys are done 1002 if (in > node->NumKeys() && keyIndex < in) 1003 break; 1004 } 1005 1006 // adjust the byte counts (since we were a bit lazy in the loop) 1007 if (keyIndex >= in && keyIndex - skip < out) 1008 bytesAfter = inKeyLengths[in] - bytesBefore - total; 1009 else if (keyIndex < skip) 1010 bytesBefore = node->AllKeyLength() - total; 1011 1012 if (bytesBefore < 0 || bytesAfter < 0) 1013 return B_BAD_DATA; 1014 1015 node->left_link = HOST_ENDIAN_TO_BFS_INT64(otherOffset); 1016 // right link, and overflow link can stay the same 1017 node->all_key_length = HOST_ENDIAN_TO_BFS_INT16(bytes + bytesBefore + bytesAfter); 1018 node->all_key_count = HOST_ENDIAN_TO_BFS_INT16(out - 1); 1019 1020 // array positions have changed 1021 outKeyLengths = node->KeyLengths(); 1022 outKeyValues = node->Values(); 1023 1024 // move the keys in the old node: the order is important here, 1025 // because we don't want to overwrite any contents 1026 1027 keys = keyIndex <= skip ? out : keyIndex - skip; 1028 keyIndex -= skip; 1029 1030 if (bytesBefore) 1031 memmove(inKeys, inKeys + total, bytesBefore); 1032 if (bytesAfter) 1033 memmove(inKeys + bytesBefore + bytes, inKeys + total + bytesBefore, bytesAfter); 1034 1035 if (bytesBefore) 1036 memmove(outKeyLengths, inKeyLengths + skip, keys * sizeof(uint16)); 1037 in = out - keyIndex - 1; 1038 if (bytesAfter) 1039 memmove(outKeyLengths + keyIndex + 1, inKeyLengths + skip + keyIndex, in * sizeof(uint16)); 1040 1041 if (bytesBefore) 1042 memmove(outKeyValues, inKeyValues + skip, keys * sizeof(off_t)); 1043 if (bytesAfter) 1044 memmove(outKeyValues + keyIndex + 1, inKeyValues + skip + keyIndex, in * sizeof(off_t)); 1045 1046 if (bytes) { 1047 // finally, copy the newly inserted key (don't overwrite anything) 1048 memcpy(inKeys + bytesBefore, key, bytes); 1049 outKeyLengths[keyIndex] = bytes + bytesBefore; 1050 outKeyValues[keyIndex] = *_value; 1051 } 1052 1053 // Prepare the key that will be inserted in the parent node which 1054 // is either the dropped key or the last of the other node. 1055 // If it's the dropped key, "newKey" was already set earlier. 1056 1057 if (newKey == NULL) 1058 newKey = other->KeyAt(other->NumKeys() - 1, &newLength); 1059 1060 memcpy(key,newKey,newLength); 1061 *_keyLength = newLength; 1062 *_value = otherOffset; 1063 1064 if (newAllocated) 1065 free(newKey); 1066 1067 return B_OK; 1068 } 1069 1070 1071 status_t 1072 BPlusTree::Insert(Transaction *transaction, const uint8 *key, uint16 keyLength, off_t value) 1073 { 1074 if (keyLength < BPLUSTREE_MIN_KEY_LENGTH || keyLength > BPLUSTREE_MAX_KEY_LENGTH) 1075 RETURN_ERROR(B_BAD_VALUE); 1076 1077 // lock access to stream 1078 WriteLocked locked(fStream->Lock()); 1079 1080 Stack<node_and_key> stack; 1081 if (SeekDown(stack, key, keyLength) != B_OK) 1082 RETURN_ERROR(B_ERROR); 1083 1084 uint8 keyBuffer[BPLUSTREE_MAX_KEY_LENGTH + 1]; 1085 1086 memcpy(keyBuffer, key, keyLength); 1087 keyBuffer[keyLength] = 0; 1088 1089 node_and_key nodeAndKey; 1090 bplustree_node *node; 1091 1092 CachedNode cached(this); 1093 while (stack.Pop(&nodeAndKey) && (node = cached.SetTo(nodeAndKey.nodeOffset)) != NULL) { 1094 #ifdef DEBUG 1095 NodeChecker checker(node, fNodeSize); 1096 #endif 1097 if (node->IsLeaf()) { 1098 // first round, check for duplicate entries 1099 status_t status = FindKey(node, key, keyLength, &nodeAndKey.keyIndex); 1100 1101 // is this a duplicate entry? 1102 if (status == B_OK) { 1103 if (fAllowDuplicates) 1104 return InsertDuplicate(transaction, &cached, node, nodeAndKey.keyIndex, value); 1105 1106 RETURN_ERROR(B_NAME_IN_USE); 1107 } 1108 } 1109 1110 // is the node big enough to hold the pair? 1111 if (int32(round_up(sizeof(bplustree_node) + node->AllKeyLength() + keyLength) 1112 + (node->NumKeys() + 1) * (sizeof(uint16) + sizeof(off_t))) < fNodeSize) 1113 { 1114 InsertKey(node, nodeAndKey.keyIndex, keyBuffer, keyLength, value); 1115 UpdateIterators(nodeAndKey.nodeOffset, BPLUSTREE_NULL, nodeAndKey.keyIndex, 0, 1); 1116 1117 return cached.WriteBack(transaction); 1118 } else { 1119 CachedNode cachedNewRoot(this); 1120 CachedNode cachedOther(this); 1121 1122 // do we need to allocate a new root node? if so, then do 1123 // it now 1124 off_t newRoot = BPLUSTREE_NULL; 1125 if (nodeAndKey.nodeOffset == fHeader->RootNode()) { 1126 bplustree_node *root; 1127 status_t status = cachedNewRoot.Allocate(transaction, &root, &newRoot); 1128 if (status < B_OK) { 1129 // The tree is most likely corrupted! 1130 // But it's still sane at leaf level - we could set 1131 // a flag in the header that forces the tree to be 1132 // rebuild next time... 1133 // But since we will have journaling, that's not a big 1134 // problem anyway. 1135 RETURN_ERROR(status); 1136 } 1137 } 1138 1139 // reserve space for the other node 1140 bplustree_node *other; 1141 off_t otherOffset; 1142 status_t status = cachedOther.Allocate(transaction, &other, &otherOffset); 1143 if (status < B_OK) { 1144 cachedNewRoot.Free(transaction, newRoot); 1145 RETURN_ERROR(status); 1146 } 1147 1148 if (SplitNode(node, nodeAndKey.nodeOffset, other, otherOffset, 1149 &nodeAndKey.keyIndex, keyBuffer, &keyLength, &value) < B_OK) { 1150 // free root node & other node here 1151 cachedNewRoot.Free(transaction, newRoot); 1152 cachedOther.Free(transaction, otherOffset); 1153 1154 RETURN_ERROR(B_ERROR); 1155 } 1156 1157 // write the updated nodes back 1158 1159 if (cached.WriteBack(transaction) < B_OK 1160 || cachedOther.WriteBack(transaction) < B_OK) 1161 RETURN_ERROR(B_ERROR); 1162 1163 UpdateIterators(nodeAndKey.nodeOffset, otherOffset, nodeAndKey.keyIndex, 1164 node->NumKeys(), 1); 1165 1166 // update the right link of the node in the left of the new node 1167 if ((other = cachedOther.SetTo(other->LeftLink())) != NULL) { 1168 other->right_link = HOST_ENDIAN_TO_BFS_INT64(otherOffset); 1169 if (cachedOther.WriteBack(transaction) < B_OK) 1170 RETURN_ERROR(B_ERROR); 1171 } 1172 1173 // create a new root if necessary 1174 if (newRoot != BPLUSTREE_NULL) { 1175 bplustree_node *root = cachedNewRoot.Node(); 1176 1177 InsertKey(root, 0, keyBuffer, keyLength, node->LeftLink()); 1178 root->overflow_link = HOST_ENDIAN_TO_BFS_INT64(nodeAndKey.nodeOffset); 1179 1180 if (cachedNewRoot.WriteBack(transaction) < B_OK) 1181 RETURN_ERROR(B_ERROR); 1182 1183 // finally, update header to point to the new root 1184 fHeader->root_node_pointer = HOST_ENDIAN_TO_BFS_INT64(newRoot); 1185 fHeader->max_number_of_levels = HOST_ENDIAN_TO_BFS_INT32(fHeader->MaxNumberOfLevels() + 1); 1186 1187 return fCachedHeader.WriteBack(transaction); 1188 } 1189 } 1190 } 1191 RETURN_ERROR(B_ERROR); 1192 } 1193 1194 1195 status_t 1196 BPlusTree::RemoveDuplicate(Transaction *transaction, bplustree_node *node, CachedNode *cached, 1197 uint16 index, off_t value) 1198 { 1199 CachedNode cachedDuplicate(this); 1200 off_t *values = node->Values(); 1201 off_t oldValue = values[index]; 1202 status_t status; 1203 1204 off_t duplicateOffset = bplustree_node::FragmentOffset(oldValue); 1205 bplustree_node *duplicate = cachedDuplicate.SetTo(duplicateOffset, false); 1206 if (duplicate == NULL) 1207 return B_IO_ERROR; 1208 1209 // if it's a duplicate fragment, remove the entry from there 1210 if (bplustree_node::LinkType(oldValue) == BPLUSTREE_DUPLICATE_FRAGMENT) { 1211 duplicate_array *array = duplicate->FragmentAt(bplustree_node::FragmentIndex(oldValue)); 1212 1213 if (array->count > NUM_FRAGMENT_VALUES 1214 || array->count < 1) { 1215 FATAL(("removeDuplicate: Invalid array[%ld] size in fragment %Ld == %Ld!\n",bplustree_node::FragmentIndex(oldValue),duplicateOffset,array->count)); 1216 return B_BAD_DATA; 1217 } 1218 if (!array->Remove(value)) 1219 FATAL(("Oh no, value %Ld not found in fragments of node %Ld...\n",value,duplicateOffset)); 1220 1221 // remove the array from the fragment node if it is empty 1222 if (array->count == 1) { 1223 // set the link to the remaining value 1224 values[index] = array->values[0]; 1225 1226 // Remove the whole fragment node, if this was the only array, 1227 // otherwise free the array and write the changes back 1228 if (duplicate->FragmentsUsed(fNodeSize) == 1) 1229 status = cachedDuplicate.Free(transaction,duplicateOffset); 1230 else { 1231 array->count = 0; 1232 status = cachedDuplicate.WriteBack(transaction); 1233 } 1234 if (status < B_OK) 1235 return status; 1236 1237 return cached->WriteBack(transaction); 1238 } 1239 return cachedDuplicate.WriteBack(transaction); 1240 } 1241 1242 // 1243 // Remove value from a duplicate node! 1244 // 1245 1246 duplicate_array *array; 1247 1248 if (duplicate->LeftLink() != BPLUSTREE_NULL) { 1249 FATAL(("invalid duplicate node: first left link points to %Ld!\n", duplicate->LeftLink())); 1250 return B_BAD_DATA; 1251 } 1252 1253 // Search the duplicate nodes until the entry could be found (and removed) 1254 while (duplicate != NULL) { 1255 array = duplicate->DuplicateArray(); 1256 if (array->count > NUM_DUPLICATE_VALUES 1257 || array->count < 0) { 1258 FATAL(("removeDuplicate: Invalid array size in duplicate %Ld == %Ld!\n",duplicateOffset,array->count)); 1259 return B_BAD_DATA; 1260 } 1261 1262 if (array->Remove(value)) 1263 break; 1264 1265 if ((duplicateOffset = duplicate->RightLink()) == BPLUSTREE_NULL) 1266 RETURN_ERROR(B_ENTRY_NOT_FOUND); 1267 1268 duplicate = cachedDuplicate.SetTo(duplicateOffset,false); 1269 } 1270 if (duplicate == NULL) 1271 RETURN_ERROR(B_IO_ERROR); 1272 1273 while (true) { 1274 off_t left = duplicate->LeftLink(); 1275 off_t right = duplicate->RightLink(); 1276 bool isLast = left == BPLUSTREE_NULL && right == BPLUSTREE_NULL; 1277 1278 if (isLast && array->count == 1 || array->count == 0) { 1279 // Free empty duplicate page, link their siblings together, and 1280 // update the duplicate link if needed (which should not be, if 1281 // we are the only one working on that tree...) 1282 1283 if (duplicateOffset == bplustree_node::FragmentOffset(oldValue) 1284 || array->count == 1) { 1285 if (array->count == 1 && isLast) 1286 values[index] = array->values[0]; 1287 else if (isLast) { 1288 FATAL(("removed last value from duplicate!\n")); 1289 } else 1290 values[index] = bplustree_node::MakeLink(BPLUSTREE_DUPLICATE_NODE,right); 1291 1292 if ((status = cached->WriteBack(transaction)) < B_OK) 1293 return status; 1294 } 1295 1296 if ((status = cachedDuplicate.Free(transaction,duplicateOffset)) < B_OK) 1297 return status; 1298 1299 if (left != BPLUSTREE_NULL 1300 && (duplicate = cachedDuplicate.SetTo(left,false)) != NULL) { 1301 duplicate->right_link = HOST_ENDIAN_TO_BFS_INT64(right); 1302 1303 // If the next node is the last node, we need to free that node 1304 // and convert the duplicate entry back into a normal entry 1305 if (right == BPLUSTREE_NULL && duplicate->LeftLink() == BPLUSTREE_NULL 1306 && duplicate->DuplicateArray()->count <= NUM_FRAGMENT_VALUES) { 1307 duplicateOffset = left; 1308 continue; 1309 } 1310 1311 status = cachedDuplicate.WriteBack(transaction); 1312 if (status < B_OK) 1313 return status; 1314 } 1315 if (right != BPLUSTREE_NULL 1316 && (duplicate = cachedDuplicate.SetTo(right, false)) != NULL) { 1317 duplicate->left_link = HOST_ENDIAN_TO_BFS_INT64(left); 1318 1319 // Again, we may need to turn the duplicate entry back into a normal entry 1320 array = duplicate->DuplicateArray(); 1321 if (left == BPLUSTREE_NULL && duplicate->RightLink() == BPLUSTREE_NULL 1322 && duplicate->DuplicateArray()->count <= NUM_FRAGMENT_VALUES) { 1323 duplicateOffset = right; 1324 continue; 1325 } 1326 1327 return cachedDuplicate.WriteBack(transaction); 1328 } 1329 return status; 1330 } else if (isLast && array->count <= NUM_FRAGMENT_VALUES) { 1331 // If the number of entries fits in a duplicate fragment, then 1332 // either find a free fragment node, or convert this node to a 1333 // fragment node. 1334 CachedNode cachedOther(this); 1335 1336 bplustree_node *fragment = NULL; 1337 uint32 fragmentIndex = 0; 1338 off_t offset; 1339 if (FindFreeDuplicateFragment(node,&cachedOther,&offset,&fragment,&fragmentIndex) < B_OK) { 1340 // convert node 1341 memmove(duplicate,array,(NUM_FRAGMENT_VALUES + 1) * sizeof(off_t)); 1342 memset((off_t *)duplicate + NUM_FRAGMENT_VALUES + 1,0,fNodeSize - (NUM_FRAGMENT_VALUES + 1) * sizeof(off_t)); 1343 } else { 1344 // move to other node 1345 duplicate_array *target = fragment->FragmentAt(fragmentIndex); 1346 memcpy(target,array,(NUM_FRAGMENT_VALUES + 1) * sizeof(off_t)); 1347 1348 cachedDuplicate.Free(transaction,duplicateOffset); 1349 duplicateOffset = offset; 1350 } 1351 values[index] = bplustree_node::MakeLink(BPLUSTREE_DUPLICATE_FRAGMENT,duplicateOffset,fragmentIndex); 1352 1353 if ((status = cached->WriteBack(transaction)) < B_OK) 1354 return status; 1355 1356 if (fragment != NULL) 1357 return cachedOther.WriteBack(transaction); 1358 } 1359 return cachedDuplicate.WriteBack(transaction); 1360 } 1361 } 1362 1363 1364 /** Removes the key with the given index from the specified node. 1365 * Since it has to get the key from the node anyway (to obtain it's 1366 * pointer), it's not needed to pass the key & its length, although 1367 * the calling method (BPlusTree::Remove()) have this data. 1368 */ 1369 1370 void 1371 BPlusTree::RemoveKey(bplustree_node *node,uint16 index) 1372 { 1373 // should never happen, but who knows? 1374 if (index > node->NumKeys() && node->NumKeys() > 0) { 1375 FATAL(("Asked me to remove key outer limits: %u\n",index)); 1376 return; 1377 } 1378 1379 off_t *values = node->Values(); 1380 1381 // if we would have to drop the overflow link, drop 1382 // the last key instead and update the overflow link 1383 // to the value of that one 1384 if (!node->IsLeaf() && index == node->NumKeys()) 1385 node->overflow_link = values[--index]; 1386 1387 uint16 length; 1388 uint8 *key = node->KeyAt(index,&length); 1389 if (key + length + sizeof(off_t) + sizeof(uint16) > (uint8 *)node + fNodeSize 1390 || length > BPLUSTREE_MAX_KEY_LENGTH) { 1391 FATAL(("Key length to long: %s, %u (inode at %ld,%u)\n", key, length, fStream->BlockRun().allocation_group, fStream->BlockRun().start)); 1392 fStream->GetVolume()->Panic(); 1393 return; 1394 } 1395 1396 uint16 *keyLengths = node->KeyLengths(); 1397 uint8 *keys = node->Keys(); 1398 1399 node->all_key_count = HOST_ENDIAN_TO_BFS_INT16(node->NumKeys() - 1); 1400 node->all_key_length = HOST_ENDIAN_TO_BFS_INT64(node->AllKeyLength() - length); 1401 1402 off_t *newValues = node->Values(); 1403 uint16 *newKeyLengths = node->KeyLengths(); 1404 1405 // move key data 1406 memmove(key, key + length, node->AllKeyLength() - (key - keys)); 1407 1408 // move and update key lengths 1409 if (index > 0 && newKeyLengths != keyLengths) 1410 memmove(newKeyLengths, keyLengths, index * sizeof(uint16)); 1411 for (uint16 i = index; i < node->NumKeys(); i++) 1412 newKeyLengths[i] = keyLengths[i + 1] - length; 1413 1414 // move values 1415 if (index > 0) 1416 memmove(newValues,values,index * sizeof(off_t)); 1417 if (node->NumKeys() > index) 1418 memmove(newValues + index, values + index + 1, (node->NumKeys() - index) * sizeof(off_t)); 1419 } 1420 1421 1422 /** Removes the specified key from the tree. The "value" parameter is only used 1423 * for trees which allow duplicates, so you may safely ignore it. 1424 * It's not an optional parameter, so at least you have to think about it. 1425 */ 1426 1427 status_t 1428 BPlusTree::Remove(Transaction *transaction, const uint8 *key, uint16 keyLength, off_t value) 1429 { 1430 if (keyLength < BPLUSTREE_MIN_KEY_LENGTH || keyLength > BPLUSTREE_MAX_KEY_LENGTH) 1431 RETURN_ERROR(B_BAD_VALUE); 1432 1433 // lock access to stream 1434 WriteLocked locked(fStream->Lock()); 1435 1436 Stack<node_and_key> stack; 1437 if (SeekDown(stack,key,keyLength) != B_OK) 1438 RETURN_ERROR(B_ERROR); 1439 1440 node_and_key nodeAndKey; 1441 bplustree_node *node; 1442 1443 CachedNode cached(this); 1444 while (stack.Pop(&nodeAndKey) && (node = cached.SetTo(nodeAndKey.nodeOffset)) != NULL) 1445 { 1446 #ifdef DEBUG 1447 NodeChecker checker(node, fNodeSize); 1448 #endif 1449 if (node->IsLeaf()) // first round, check for duplicate entries 1450 { 1451 status_t status = FindKey(node,key,keyLength,&nodeAndKey.keyIndex); 1452 if (status < B_OK) 1453 RETURN_ERROR(status); 1454 1455 // If we will remove the last key, the iterator will be set 1456 // to the next node after the current - if there aren't any 1457 // more nodes, we need a way to prevent the TreeIterators to 1458 // touch the old node again, we use BPLUSTREE_FREE for this 1459 off_t next = node->RightLink() == BPLUSTREE_NULL ? BPLUSTREE_FREE : node->RightLink(); 1460 UpdateIterators(nodeAndKey.nodeOffset, node->NumKeys() == 1 ? 1461 next : BPLUSTREE_NULL,nodeAndKey.keyIndex,0 , -1); 1462 1463 // is this a duplicate entry? 1464 if (bplustree_node::IsDuplicate(node->Values()[nodeAndKey.keyIndex])) { 1465 if (fAllowDuplicates) 1466 return RemoveDuplicate(transaction, node, &cached, nodeAndKey.keyIndex, value); 1467 else 1468 RETURN_ERROR(B_NAME_IN_USE); 1469 } 1470 } 1471 1472 // if it's an empty root node, we have to convert it 1473 // to a leaf node by dropping the overflow link, or, 1474 // if it's a leaf node, just empty it 1475 if (nodeAndKey.nodeOffset == fHeader->RootNode() 1476 && node->NumKeys() == 0 1477 || node->NumKeys() == 1 && node->IsLeaf()) { 1478 node->overflow_link = HOST_ENDIAN_TO_BFS_INT64((uint64)BPLUSTREE_NULL); 1479 node->all_key_count = 0; 1480 node->all_key_length = 0; 1481 1482 if (cached.WriteBack(transaction) < B_OK) 1483 return B_IO_ERROR; 1484 1485 // if we've cleared the root node, reset the maximum 1486 // number of levels in the header 1487 if (nodeAndKey.nodeOffset == fHeader->RootNode()) { 1488 fHeader->max_number_of_levels = HOST_ENDIAN_TO_BFS_INT32(1); 1489 return fCachedHeader.WriteBack(transaction); 1490 } 1491 return B_OK; 1492 } 1493 1494 // if there is only one key left, we don't have to remove 1495 // it, we can just dump the node (index nodes still have 1496 // the overflow link, so we have to drop the last key) 1497 if (node->NumKeys() > 1 1498 || !node->IsLeaf() && node->NumKeys() == 1) { 1499 RemoveKey(node,nodeAndKey.keyIndex); 1500 return cached.WriteBack(transaction); 1501 } 1502 1503 // when we are here, we can just free the node, but 1504 // we have to update the right/left link of the 1505 // siblings first 1506 CachedNode otherCached(this); 1507 bplustree_node *other = otherCached.SetTo(node->LeftLink()); 1508 if (other != NULL) { 1509 other->right_link = node->right_link; 1510 if (otherCached.WriteBack(transaction) < B_OK) 1511 return B_IO_ERROR; 1512 } 1513 1514 if ((other = otherCached.SetTo(node->RightLink())) != NULL) { 1515 other->left_link = node->left_link; 1516 if (otherCached.WriteBack(transaction) < B_OK) 1517 return B_IO_ERROR; 1518 } 1519 1520 cached.Free(transaction,nodeAndKey.nodeOffset); 1521 } 1522 RETURN_ERROR(B_ERROR); 1523 } 1524 1525 1526 /** Replaces the value for the key in the tree. 1527 * Returns B_OK if the key could be found and its value replaced, 1528 * B_ENTRY_NOT_FOUND if the key couldn't be found, and other errors 1529 * to indicate that something went terribly wrong. 1530 * Note that this doesn't work with duplicates - it will just 1531 * return B_BAD_TYPE if you call this function on a tree where 1532 * duplicates are allowed. 1533 */ 1534 1535 status_t 1536 BPlusTree::Replace(Transaction *transaction, const uint8 *key, uint16 keyLength, off_t value) 1537 { 1538 if (keyLength < BPLUSTREE_MIN_KEY_LENGTH || keyLength > BPLUSTREE_MAX_KEY_LENGTH 1539 || key == NULL) 1540 RETURN_ERROR(B_BAD_VALUE); 1541 1542 if (fAllowDuplicates) 1543 RETURN_ERROR(B_BAD_TYPE); 1544 1545 // lock access to stream (a read lock is okay for this purpose) 1546 ReadLocked locked(fStream->Lock()); 1547 1548 off_t nodeOffset = fHeader->RootNode(); 1549 CachedNode cached(this); 1550 bplustree_node *node; 1551 1552 while ((node = cached.SetTo(nodeOffset)) != NULL) { 1553 uint16 keyIndex = 0; 1554 off_t nextOffset; 1555 status_t status = FindKey(node, key, keyLength, &keyIndex, &nextOffset); 1556 1557 if (node->OverflowLink() == BPLUSTREE_NULL) { 1558 if (status == B_OK) { 1559 node->Values()[keyIndex] = value; 1560 return cached.WriteBack(transaction); 1561 } 1562 1563 return status; 1564 } else if (nextOffset == nodeOffset) 1565 RETURN_ERROR(B_ERROR); 1566 1567 nodeOffset = nextOffset; 1568 } 1569 RETURN_ERROR(B_ERROR); 1570 } 1571 1572 1573 /** Searches the key in the tree, and stores the offset found in 1574 * _value, if successful. 1575 * It's very similar to BPlusTree::SeekDown(), but doesn't fill 1576 * a stack while it descends the tree. 1577 * Returns B_OK when the key could be found, B_ENTRY_NOT_FOUND 1578 * if not. It can also return other errors to indicate that 1579 * something went wrong. 1580 * Note that this doesn't work with duplicates - it will just 1581 * return B_BAD_TYPE if you call this function on a tree where 1582 * duplicates are allowed. 1583 */ 1584 1585 status_t 1586 BPlusTree::Find(const uint8 *key, uint16 keyLength, off_t *_value) 1587 { 1588 if (keyLength < BPLUSTREE_MIN_KEY_LENGTH || keyLength > BPLUSTREE_MAX_KEY_LENGTH 1589 || key == NULL) 1590 RETURN_ERROR(B_BAD_VALUE); 1591 1592 if (fAllowDuplicates) 1593 RETURN_ERROR(B_BAD_TYPE); 1594 1595 // lock access to stream 1596 ReadLocked locked(fStream->Lock()); 1597 1598 off_t nodeOffset = fHeader->RootNode(); 1599 CachedNode cached(this); 1600 bplustree_node *node; 1601 1602 #ifdef DEBUG 1603 int32 levels = 0; 1604 #endif 1605 1606 while ((node = cached.SetTo(nodeOffset)) != NULL) { 1607 uint16 keyIndex = 0; 1608 off_t nextOffset; 1609 status_t status = FindKey(node, key, keyLength, &keyIndex, &nextOffset); 1610 1611 #ifdef DEBUG 1612 levels++; 1613 #endif 1614 if (node->OverflowLink() == BPLUSTREE_NULL) { 1615 if (status == B_OK && _value != NULL) 1616 *_value = BFS_ENDIAN_TO_HOST_INT64(node->Values()[keyIndex]); 1617 1618 #ifdef DEBUG 1619 if (levels != (int32)fHeader->MaxNumberOfLevels()) 1620 DEBUGGER(("levels don't match")); 1621 #endif 1622 return status; 1623 } else if (nextOffset == nodeOffset) 1624 RETURN_ERROR(B_ERROR); 1625 1626 nodeOffset = nextOffset; 1627 } 1628 FATAL(("b+tree node at %Ld could not be loaded\n", nodeOffset)); 1629 RETURN_ERROR(B_ERROR); 1630 } 1631 1632 1633 // #pragma mark - 1634 1635 1636 TreeIterator::TreeIterator(BPlusTree *tree) 1637 : 1638 fTree(tree), 1639 fCurrentNodeOffset(BPLUSTREE_NULL), 1640 fNext(NULL) 1641 { 1642 tree->AddIterator(this); 1643 } 1644 1645 1646 TreeIterator::~TreeIterator() 1647 { 1648 if (fTree) 1649 fTree->RemoveIterator(this); 1650 } 1651 1652 1653 status_t 1654 TreeIterator::Goto(int8 to) 1655 { 1656 if (fTree == NULL || fTree->fHeader == NULL) 1657 RETURN_ERROR(B_BAD_VALUE); 1658 1659 // lock access to stream 1660 ReadLocked locked(fTree->fStream->Lock()); 1661 1662 off_t nodeOffset = fTree->fHeader->RootNode(); 1663 CachedNode cached(fTree); 1664 bplustree_node *node; 1665 1666 while ((node = cached.SetTo(nodeOffset)) != NULL) { 1667 // is the node a leaf node? 1668 if (node->OverflowLink() == BPLUSTREE_NULL) { 1669 fCurrentNodeOffset = nodeOffset; 1670 fCurrentKey = to == BPLUSTREE_BEGIN ? -1 : node->NumKeys(); 1671 fDuplicateNode = BPLUSTREE_NULL; 1672 1673 return B_OK; 1674 } 1675 1676 // get the next node offset depending on the direction (and if there 1677 // are any keys in that node at all) 1678 off_t nextOffset; 1679 if (to == BPLUSTREE_END || node->all_key_count == 0) 1680 nextOffset = node->OverflowLink(); 1681 else { 1682 if (node->AllKeyLength() > fTree->fNodeSize 1683 || (uint32)node->Values() > (uint32)node + fTree->fNodeSize - 8 * node->NumKeys()) 1684 RETURN_ERROR(B_ERROR); 1685 1686 nextOffset = BFS_ENDIAN_TO_HOST_INT64(node->Values()[0]); 1687 } 1688 if (nextOffset == nodeOffset) 1689 break; 1690 1691 nodeOffset = nextOffset; 1692 } 1693 #if __MWERKS__ 1694 FATAL(("TreeIterator::Goto(int8 to) fails\n")); 1695 #else 1696 FATAL(("%s fails\n",__PRETTY_FUNCTION__)); 1697 #endif 1698 RETURN_ERROR(B_ERROR); 1699 } 1700 1701 1702 /** Iterates through the tree in the specified direction. 1703 * When it iterates through duplicates, the "key" is only updated for the 1704 * first entry - if you need to know when this happens, use the "duplicate" 1705 * parameter which is 0 for no duplicate, 1 for the first, and 2 for all 1706 * the other duplicates. 1707 * That's not too nice, but saves the 256 bytes that would be needed to 1708 * store the last key - if this will ever become an issue, it will be 1709 * easy to change. 1710 * The other advantage of this is, that the queries can skip all duplicates 1711 * at once when they are not relevant to them. 1712 */ 1713 1714 status_t 1715 TreeIterator::Traverse(int8 direction, void *key, uint16 *keyLength, uint16 maxLength, 1716 off_t *value, uint16 *duplicate) 1717 { 1718 if (fTree == NULL) 1719 return B_INTERRUPTED; 1720 if (fCurrentNodeOffset == BPLUSTREE_NULL 1721 && Goto(direction == BPLUSTREE_FORWARD ? BPLUSTREE_BEGIN : BPLUSTREE_END) < B_OK) 1722 RETURN_ERROR(B_ERROR); 1723 1724 // if the tree was emptied since the last call 1725 if (fCurrentNodeOffset == BPLUSTREE_FREE) 1726 return B_ENTRY_NOT_FOUND; 1727 1728 // lock access to stream 1729 ReadLocked locked(fTree->fStream->Lock()); 1730 1731 CachedNode cached(fTree); 1732 bplustree_node *node; 1733 1734 if (fDuplicateNode != BPLUSTREE_NULL) 1735 { 1736 // regardless of traverse direction the duplicates are always presented in 1737 // the same order; since they are all considered as equal, this shouldn't 1738 // cause any problems 1739 1740 if (!fIsFragment || fDuplicate < fNumDuplicates) 1741 node = cached.SetTo(bplustree_node::FragmentOffset(fDuplicateNode), false); 1742 else 1743 node = NULL; 1744 1745 if (node != NULL) 1746 { 1747 if (!fIsFragment && fDuplicate >= fNumDuplicates) 1748 { 1749 // if the node is out of duplicates, we go directly to the next one 1750 fDuplicateNode = node->RightLink(); 1751 if (fDuplicateNode != BPLUSTREE_NULL 1752 && (node = cached.SetTo(fDuplicateNode, false)) != NULL) 1753 { 1754 fNumDuplicates = node->CountDuplicates(fDuplicateNode, false); 1755 fDuplicate = 0; 1756 } 1757 } 1758 if (fDuplicate < fNumDuplicates) 1759 { 1760 *value = node->DuplicateAt(fDuplicateNode, fIsFragment, fDuplicate++); 1761 if (duplicate) 1762 *duplicate = 2; 1763 return B_OK; 1764 } 1765 } 1766 fDuplicateNode = BPLUSTREE_NULL; 1767 } 1768 1769 off_t savedNodeOffset = fCurrentNodeOffset; 1770 if ((node = cached.SetTo(fCurrentNodeOffset)) == NULL) 1771 RETURN_ERROR(B_ERROR); 1772 1773 if (duplicate) 1774 *duplicate = 0; 1775 1776 fCurrentKey += direction; 1777 1778 // is the current key in the current node? 1779 while ((direction == BPLUSTREE_FORWARD && fCurrentKey >= node->NumKeys()) 1780 || (direction == BPLUSTREE_BACKWARD && fCurrentKey < 0)) 1781 { 1782 fCurrentNodeOffset = direction == BPLUSTREE_FORWARD ? node->RightLink() : node->LeftLink(); 1783 1784 // are there any more nodes? 1785 if (fCurrentNodeOffset != BPLUSTREE_NULL) 1786 { 1787 node = cached.SetTo(fCurrentNodeOffset); 1788 if (!node) 1789 RETURN_ERROR(B_ERROR); 1790 1791 // reset current key 1792 fCurrentKey = direction == BPLUSTREE_FORWARD ? 0 : node->NumKeys(); 1793 } 1794 else 1795 { 1796 // there are no nodes left, so turn back to the last key 1797 fCurrentNodeOffset = savedNodeOffset; 1798 fCurrentKey = direction == BPLUSTREE_FORWARD ? node->NumKeys() : -1; 1799 1800 return B_ENTRY_NOT_FOUND; 1801 } 1802 } 1803 1804 if (node->all_key_count == 0) 1805 RETURN_ERROR(B_ERROR); // B_ENTRY_NOT_FOUND ? 1806 1807 uint16 length; 1808 uint8 *keyStart = node->KeyAt(fCurrentKey, &length); 1809 if (keyStart + length + sizeof(off_t) + sizeof(uint16) > (uint8 *)node + fTree->fNodeSize 1810 || length > BPLUSTREE_MAX_KEY_LENGTH) { 1811 fTree->fStream->GetVolume()->Panic(); 1812 RETURN_ERROR(B_BAD_DATA); 1813 } 1814 1815 length = min_c(length, maxLength); 1816 memcpy(key, keyStart, length); 1817 1818 if (fTree->fHeader->data_type == BPLUSTREE_STRING_TYPE) // terminate string type 1819 { 1820 if (length == maxLength) 1821 length--; 1822 ((char *)key)[length] = '\0'; 1823 } 1824 *keyLength = length; 1825 1826 off_t offset = BFS_ENDIAN_TO_HOST_INT64(node->Values()[fCurrentKey]); 1827 1828 // duplicate fragments? 1829 uint8 type = bplustree_node::LinkType(offset); 1830 if (type == BPLUSTREE_DUPLICATE_FRAGMENT || type == BPLUSTREE_DUPLICATE_NODE) 1831 { 1832 fDuplicateNode = offset; 1833 1834 node = cached.SetTo(bplustree_node::FragmentOffset(fDuplicateNode), false); 1835 if (node == NULL) 1836 RETURN_ERROR(B_ERROR); 1837 1838 fIsFragment = type == BPLUSTREE_DUPLICATE_FRAGMENT; 1839 1840 fNumDuplicates = node->CountDuplicates(offset, fIsFragment); 1841 if (fNumDuplicates) 1842 { 1843 offset = node->DuplicateAt(offset, fIsFragment, 0); 1844 fDuplicate = 1; 1845 if (duplicate) 1846 *duplicate = 1; 1847 } 1848 else 1849 { 1850 // shouldn't happen, but we're dealing here with potentially corrupt disks... 1851 fDuplicateNode = BPLUSTREE_NULL; 1852 offset = 0; 1853 } 1854 } 1855 *value = offset; 1856 1857 return B_OK; 1858 } 1859 1860 1861 /** This is more or less a copy of BPlusTree::Find() - but it just 1862 * sets the current position in the iterator, regardless of if the 1863 * key could be found or not. 1864 */ 1865 1866 status_t 1867 TreeIterator::Find(const uint8 *key, uint16 keyLength) 1868 { 1869 if (fTree == NULL) 1870 return B_INTERRUPTED; 1871 if (keyLength < BPLUSTREE_MIN_KEY_LENGTH || keyLength > BPLUSTREE_MAX_KEY_LENGTH 1872 || key == NULL) 1873 RETURN_ERROR(B_BAD_VALUE); 1874 1875 // lock access to stream 1876 ReadLocked locked(fTree->fStream->Lock()); 1877 1878 off_t nodeOffset = fTree->fHeader->RootNode(); 1879 1880 CachedNode cached(fTree); 1881 bplustree_node *node; 1882 while ((node = cached.SetTo(nodeOffset)) != NULL) { 1883 uint16 keyIndex = 0; 1884 off_t nextOffset; 1885 status_t status = fTree->FindKey(node, key, keyLength, &keyIndex, &nextOffset); 1886 1887 if (node->OverflowLink() == BPLUSTREE_NULL) { 1888 fCurrentNodeOffset = nodeOffset; 1889 fCurrentKey = keyIndex - 1; 1890 fDuplicateNode = BPLUSTREE_NULL; 1891 1892 return status; 1893 } else if (nextOffset == nodeOffset) 1894 RETURN_ERROR(B_ERROR); 1895 1896 nodeOffset = nextOffset; 1897 } 1898 RETURN_ERROR(B_ERROR); 1899 } 1900 1901 1902 void 1903 TreeIterator::SkipDuplicates() 1904 { 1905 fDuplicateNode = BPLUSTREE_NULL; 1906 } 1907 1908 1909 void 1910 TreeIterator::Update(off_t offset, off_t nextOffset, uint16 keyIndex, uint16 splitAt, int8 change) 1911 { 1912 if (offset != fCurrentNodeOffset) 1913 return; 1914 1915 if (nextOffset != BPLUSTREE_NULL) { 1916 fCurrentNodeOffset = nextOffset; 1917 if (splitAt <= fCurrentKey) { 1918 fCurrentKey -= splitAt; 1919 keyIndex -= splitAt; 1920 } 1921 } 1922 1923 // Adjust fCurrentKey to point to the same key as before. 1924 // Note, that if a key is inserted at the current position 1925 // it won't be included in this tree transition. 1926 if (keyIndex <= fCurrentKey) 1927 fCurrentKey += change; 1928 1929 // ToDo: duplicate handling! 1930 } 1931 1932 1933 void 1934 TreeIterator::Stop() 1935 { 1936 fTree = NULL; 1937 } 1938 1939 1940 #ifdef DEBUG 1941 void 1942 TreeIterator::Dump() 1943 { 1944 __out("TreeIterator at %p:\n", this); 1945 __out("\tfTree = %p\n", fTree); 1946 __out("\tfCurrentNodeOffset = %Ld\n", fCurrentNodeOffset); 1947 __out("\tfCurrentKey = %ld\n", fCurrentKey); 1948 __out("\tfDuplicateNode = %Ld (%Ld, 0x%Lx)\n", bplustree_node::FragmentOffset(fDuplicateNode), fDuplicateNode, fDuplicateNode); 1949 __out("\tfDuplicate = %u\n", fDuplicate); 1950 __out("\tfNumDuplicates = %u\n", fNumDuplicates); 1951 __out("\tfIsFragment = %s\n", fIsFragment ? "true" : "false"); 1952 } 1953 #endif 1954 1955 1956 // #pragma mark - 1957 1958 1959 void 1960 bplustree_node::Initialize() 1961 { 1962 left_link = right_link = overflow_link = HOST_ENDIAN_TO_BFS_INT64((uint64)BPLUSTREE_NULL); 1963 all_key_count = 0; 1964 all_key_length = 0; 1965 } 1966 1967 1968 uint8 * 1969 bplustree_node::KeyAt(int32 index, uint16 *keyLength) const 1970 { 1971 if (index < 0 || index > NumKeys()) 1972 return NULL; 1973 1974 uint8 *keyStart = Keys(); 1975 uint16 *keyLengths = KeyLengths(); 1976 1977 *keyLength = BFS_ENDIAN_TO_HOST_INT16(keyLengths[index]) 1978 - (index != 0 ? BFS_ENDIAN_TO_HOST_INT16(keyLengths[index - 1]) : 0); 1979 if (index > 0) 1980 keyStart += BFS_ENDIAN_TO_HOST_INT16(keyLengths[index - 1]); 1981 1982 return keyStart; 1983 } 1984 1985 1986 uint8 1987 bplustree_node::CountDuplicates(off_t offset, bool isFragment) const 1988 { 1989 // the duplicate fragment handling is currently hard-coded to a node size 1990 // of 1024 bytes - with future versions of BFS, this may be a problem 1991 1992 if (isFragment) { 1993 uint32 fragment = (NUM_FRAGMENT_VALUES + 1) * ((uint64)offset & 0x3ff); 1994 1995 return ((off_t *)this)[fragment]; 1996 } 1997 return OverflowLink(); 1998 } 1999 2000 2001 off_t 2002 bplustree_node::DuplicateAt(off_t offset, bool isFragment, int8 index) const 2003 { 2004 uint32 start; 2005 if (isFragment) 2006 start = 8 * ((uint64)offset & 0x3ff); 2007 else 2008 start = 2; 2009 2010 return ((off_t *)this)[start + 1 + index]; 2011 } 2012 2013 2014 /** Although the name suggests it, this function doesn't return the real 2015 * used fragment count; at least, it can only count to two: it returns 2016 * 0, if there is no fragment used, 1 if there is only one fragment 2017 * used, and 2 if there are at least 2 fragments used. 2018 */ 2019 2020 int32 2021 bplustree_node::FragmentsUsed(uint32 nodeSize) 2022 { 2023 uint32 used = 0; 2024 for (uint32 i = 0; i < nodeSize / ((NUM_FRAGMENT_VALUES + 1) * sizeof(off_t)); i++) { 2025 duplicate_array *array = FragmentAt(i); 2026 if (array->count > 0 && ++used > 1) 2027 return used; 2028 } 2029 return used; 2030 } 2031 2032 2033 #ifdef DEBUG 2034 void 2035 bplustree_node::CheckIntegrity(uint32 nodeSize) 2036 { 2037 if (NumKeys() > nodeSize || AllKeyLength() > nodeSize) 2038 DEBUGGER(("invalid node: key/length count")); 2039 2040 for (int32 i = 0; i < NumKeys(); i++) { 2041 uint16 length; 2042 uint8 *key = KeyAt(i, &length); 2043 if (key + length + sizeof(off_t) + sizeof(uint16) > (uint8 *)this + nodeSize 2044 || length > BPLUSTREE_MAX_KEY_LENGTH) 2045 DEBUGGER(("invalid node: keys corrupted")); 2046 } 2047 } 2048 #endif 2049 2050 2051 // #pragma mark - 2052 2053 2054 int32 2055 compareKeys(type_code type,const void *key1, int keyLength1, const void *key2, int keyLength2) 2056 { 2057 // if one of the keys is NULL, bail out gracefully 2058 if (key1 == NULL || key2 == NULL) 2059 return -1; 2060 2061 switch (type) 2062 { 2063 case B_STRING_TYPE: 2064 { 2065 int len = min_c(keyLength1, keyLength2); 2066 int result = strncmp((const char *)key1, (const char *)key2, len); 2067 2068 if (result == 0 2069 && !(((const char *)key1)[len] == '\0' && ((const char *)key2)[len] == '\0')) 2070 result = keyLength1 - keyLength2; 2071 2072 return result; 2073 } 2074 2075 case B_SSIZE_T_TYPE: 2076 case B_INT32_TYPE: 2077 return *(int32 *)key1 - *(int32 *)key2; 2078 2079 case B_SIZE_T_TYPE: 2080 case B_UINT32_TYPE: 2081 if (*(uint32 *)key1 == *(uint32 *)key2) 2082 return 0; 2083 else if (*(uint32 *)key1 > *(uint32 *)key2) 2084 return 1; 2085 2086 return -1; 2087 2088 case B_OFF_T_TYPE: 2089 case B_INT64_TYPE: 2090 if (*(int64 *)key1 == *(int64 *)key2) 2091 return 0; 2092 else if (*(int64 *)key1 > *(int64 *)key2) 2093 return 1; 2094 2095 return -1; 2096 2097 case B_UINT64_TYPE: 2098 if (*(uint64 *)key1 == *(uint64 *)key2) 2099 return 0; 2100 else if (*(uint64 *)key1 > *(uint64 *)key2) 2101 return 1; 2102 2103 return -1; 2104 2105 case B_FLOAT_TYPE: 2106 { 2107 float result = *(float *)key1 - *(float *)key2; 2108 if (result == 0.0f) 2109 return 0; 2110 2111 return (result < 0.0f) ? -1 : 1; 2112 } 2113 2114 case B_DOUBLE_TYPE: 2115 { 2116 double result = *(double *)key1 - *(double *)key2; 2117 if (result == 0.0) 2118 return 0; 2119 2120 return (result < 0.0) ? -1 : 1; 2121 } 2122 } 2123 2124 // if the type is unknown, the entries don't match... 2125 return -1; 2126 } 2127 2128 2129