1 /* 2 * Copyright 2001-2017, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 6 7 //! Block bitmap handling and allocation policies 8 9 10 #include "BlockAllocator.h" 11 12 #include "bfs_control.h" 13 #include "BPlusTree.h" 14 #include "Debug.h" 15 #include "Inode.h" 16 #include "Volume.h" 17 18 19 // Things the BlockAllocator should do: 20 21 // - find a range of blocks of a certain size nearby a specific position 22 // - allocating an unsharp range of blocks for pre-allocation 23 // - free blocks 24 // - know how to deal with each allocation, special handling for directories, 25 // files, symlinks, etc. (type sensitive allocation policies) 26 27 // What makes the code complicated is the fact that we are not just reading 28 // in the whole bitmap and operate on that in memory - e.g. a 13 GB partition 29 // with a block size of 2048 bytes already has a 800kB bitmap, and the size 30 // of partitions will grow even more - so that's not an option. 31 // Instead we are reading in every block when it's used - since an allocation 32 // group can span several blocks in the block bitmap, the AllocationBlock 33 // class is there to make handling those easier. 34 35 // The current implementation is only slightly optimized and could probably 36 // be improved a lot. Furthermore, the allocation policies used here should 37 // have some real world tests. 38 39 #if BFS_TRACING && !defined(FS_SHELL) 40 namespace BFSBlockTracing { 41 42 43 class Allocate : public AbstractTraceEntry { 44 public: 45 Allocate(block_run run) 46 : 47 fRun(run) 48 { 49 Initialized(); 50 } 51 52 virtual void AddDump(TraceOutput& out) 53 { 54 out.Print("bfs:alloc %lu.%u.%u", fRun.AllocationGroup(), 55 fRun.Start(), fRun.Length()); 56 } 57 58 const block_run& Run() const { return fRun; } 59 60 private: 61 block_run fRun; 62 }; 63 64 65 class Free : public AbstractTraceEntry { 66 public: 67 Free(block_run run) 68 : 69 fRun(run) 70 { 71 Initialized(); 72 } 73 74 virtual void AddDump(TraceOutput& out) 75 { 76 out.Print("bfs:free %lu.%u.%u", fRun.AllocationGroup(), 77 fRun.Start(), fRun.Length()); 78 } 79 80 const block_run& Run() const { return fRun; } 81 82 private: 83 block_run fRun; 84 }; 85 86 87 static uint32 88 checksum(const uint8* data, size_t size) 89 { 90 const uint32* data4 = (const uint32*)data; 91 uint32 sum = 0; 92 while (size >= 4) { 93 sum += *data4; 94 data4++; 95 size -= 4; 96 } 97 return sum; 98 } 99 100 101 class Block : public AbstractTraceEntry { 102 public: 103 Block(const char* label, off_t blockNumber, const uint8* data, 104 size_t size, uint32 start = 0, uint32 length = 0) 105 : 106 fBlock(blockNumber), 107 fData(data), 108 fStart(start), 109 fLength(length), 110 fLabel(label) 111 { 112 fSum = checksum(data, size); 113 Initialized(); 114 } 115 116 virtual void AddDump(TraceOutput& out) 117 { 118 out.Print("bfs:%s: block %Ld (%p), sum %lu, s/l %lu/%lu", fLabel, 119 fBlock, fData, fSum, fStart, fLength); 120 } 121 122 private: 123 off_t fBlock; 124 const uint8* fData; 125 uint32 fStart; 126 uint32 fLength; 127 uint32 fSum; 128 const char* fLabel; 129 }; 130 131 132 class BlockChange : public AbstractTraceEntry { 133 public: 134 BlockChange(const char* label, int32 block, uint32 oldData, uint32 newData) 135 : 136 fBlock(block), 137 fOldData(oldData), 138 fNewData(newData), 139 fLabel(label) 140 { 141 Initialized(); 142 } 143 144 virtual void AddDump(TraceOutput& out) 145 { 146 out.Print("bfs:%s: block %ld, %08lx -> %08lx", fLabel, 147 fBlock, fOldData, fNewData); 148 } 149 150 private: 151 int32 fBlock; 152 uint32 fOldData; 153 uint32 fNewData; 154 const char* fLabel; 155 }; 156 157 158 } // namespace BFSBlockTracing 159 160 # define T(x) new(std::nothrow) BFSBlockTracing::x; 161 #else 162 # define T(x) ; 163 #endif 164 165 #ifdef DEBUG_ALLOCATION_GROUPS 166 # define CHECK_ALLOCATION_GROUP(group) _CheckGroup(group) 167 #else 168 # define CHECK_ALLOCATION_GROUP(group) ; 169 #endif 170 171 172 struct check_index { 173 check_index() 174 : 175 inode(NULL) 176 { 177 } 178 179 char name[B_FILE_NAME_LENGTH]; 180 block_run run; 181 Inode* inode; 182 }; 183 184 185 struct check_cookie { 186 check_cookie() 187 { 188 } 189 190 uint32 pass; 191 block_run current; 192 Inode* parent; 193 mode_t parent_mode; 194 Stack<block_run> stack; 195 TreeIterator* iterator; 196 check_control control; 197 Stack<check_index*> indices; 198 }; 199 200 201 class AllocationBlock : public CachedBlock { 202 public: 203 AllocationBlock(Volume* volume); 204 205 inline void Allocate(uint16 start, uint16 numBlocks); 206 inline void Free(uint16 start, uint16 numBlocks); 207 inline bool IsUsed(uint16 block); 208 209 status_t SetTo(AllocationGroup& group, uint16 block); 210 status_t SetToWritable(Transaction& transaction, AllocationGroup& group, 211 uint16 block); 212 213 uint32 NumBlockBits() const { return fNumBits; } 214 uint32& Block(int32 index) { return ((uint32*)fBlock)[index]; } 215 uint8* Block() const { return (uint8*)fBlock; } 216 217 private: 218 uint32 fNumBits; 219 #ifdef DEBUG 220 bool fWritable; 221 #endif 222 }; 223 224 225 class AllocationGroup { 226 public: 227 AllocationGroup(); 228 229 void AddFreeRange(int32 start, int32 blocks); 230 bool IsFull() const { return fFreeBits == 0; } 231 232 status_t Allocate(Transaction& transaction, uint16 start, int32 length); 233 status_t Free(Transaction& transaction, uint16 start, int32 length); 234 235 uint32 NumBits() const { return fNumBits; } 236 uint32 NumBlocks() const { return fNumBlocks; } 237 int32 Start() const { return fStart; } 238 239 private: 240 friend class BlockAllocator; 241 242 uint32 fNumBits; 243 uint32 fNumBlocks; 244 int32 fStart; 245 int32 fFirstFree; 246 int32 fFreeBits; 247 248 int32 fLargestStart; 249 int32 fLargestLength; 250 bool fLargestValid; 251 }; 252 253 254 AllocationBlock::AllocationBlock(Volume* volume) 255 : CachedBlock(volume) 256 { 257 } 258 259 260 status_t 261 AllocationBlock::SetTo(AllocationGroup& group, uint16 block) 262 { 263 // 8 blocks per byte 264 fNumBits = fVolume->BlockSize() << 3; 265 // the last group may have less bits than the others 266 if ((block + 1) * fNumBits > group.NumBits()) 267 fNumBits = group.NumBits() % fNumBits; 268 269 #ifdef DEBUG 270 fWritable = false; 271 #endif 272 return CachedBlock::SetTo(group.Start() + block) != NULL ? B_OK : B_ERROR; 273 } 274 275 276 status_t 277 AllocationBlock::SetToWritable(Transaction& transaction, AllocationGroup& group, 278 uint16 block) 279 { 280 // 8 blocks per byte 281 fNumBits = fVolume->BlockSize() << 3; 282 // the last group may have less bits in the last block 283 if ((block + 1) * fNumBits > group.NumBits()) 284 fNumBits = group.NumBits() % fNumBits; 285 286 #ifdef DEBUG 287 fWritable = true; 288 #endif 289 return CachedBlock::SetToWritable(transaction, group.Start() + block) 290 != NULL ? B_OK : B_ERROR; 291 } 292 293 294 bool 295 AllocationBlock::IsUsed(uint16 block) 296 { 297 if (block > fNumBits) 298 return true; 299 300 // the block bitmap is accessed in 32-bit blocks 301 return Block(block >> 5) & HOST_ENDIAN_TO_BFS_INT32(1UL << (block % 32)); 302 } 303 304 305 void 306 AllocationBlock::Allocate(uint16 start, uint16 numBlocks) 307 { 308 ASSERT(start < fNumBits); 309 ASSERT(uint32(start + numBlocks) <= fNumBits); 310 #ifdef DEBUG 311 ASSERT(fWritable); 312 #endif 313 314 T(Block("b-alloc-in", fBlockNumber, fBlock, fVolume->BlockSize(), 315 start, numBlocks)); 316 317 int32 block = start >> 5; 318 319 while (numBlocks > 0) { 320 uint32 mask = 0; 321 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 322 mask |= 1UL << i; 323 324 T(BlockChange("b-alloc", block, Block(block), 325 Block(block) | HOST_ENDIAN_TO_BFS_INT32(mask))); 326 327 #if KDEBUG 328 // check for already set blocks 329 if (HOST_ENDIAN_TO_BFS_INT32(mask) & ((uint32*)fBlock)[block]) { 330 FATAL(("AllocationBlock::Allocate(): some blocks are already " 331 "allocated, start = %u, numBlocks = %u\n", start, numBlocks)); 332 panic("blocks already set!"); 333 } 334 #endif 335 336 Block(block++) |= HOST_ENDIAN_TO_BFS_INT32(mask); 337 start = 0; 338 } 339 T(Block("b-alloc-out", fBlockNumber, fBlock, fVolume->BlockSize(), 340 start, numBlocks)); 341 } 342 343 344 void 345 AllocationBlock::Free(uint16 start, uint16 numBlocks) 346 { 347 ASSERT(start < fNumBits); 348 ASSERT(uint32(start + numBlocks) <= fNumBits); 349 #ifdef DEBUG 350 ASSERT(fWritable); 351 #endif 352 353 int32 block = start >> 5; 354 355 while (numBlocks > 0) { 356 uint32 mask = 0; 357 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 358 mask |= 1UL << (i % 32); 359 360 T(BlockChange("b-free", block, Block(block), 361 Block(block) & HOST_ENDIAN_TO_BFS_INT32(~mask))); 362 363 Block(block++) &= HOST_ENDIAN_TO_BFS_INT32(~mask); 364 start = 0; 365 } 366 } 367 368 369 // #pragma mark - 370 371 372 /*! The allocation groups are created and initialized in 373 BlockAllocator::Initialize() and BlockAllocator::InitializeAndClearBitmap() 374 respectively. 375 */ 376 AllocationGroup::AllocationGroup() 377 : 378 fFirstFree(-1), 379 fFreeBits(0), 380 fLargestValid(false) 381 { 382 } 383 384 385 void 386 AllocationGroup::AddFreeRange(int32 start, int32 blocks) 387 { 388 //D(if (blocks > 512) 389 // PRINT(("range of %ld blocks starting at %ld\n",blocks,start))); 390 391 if (fFirstFree == -1) 392 fFirstFree = start; 393 394 if (!fLargestValid || fLargestLength < blocks) { 395 fLargestStart = start; 396 fLargestLength = blocks; 397 fLargestValid = true; 398 } 399 400 fFreeBits += blocks; 401 } 402 403 404 /*! Allocates the specified run in the allocation group. 405 Doesn't check if the run is valid or already allocated partially, nor 406 does it maintain the free ranges hints or the volume's used blocks count. 407 It only does the low-level work of allocating some bits in the block bitmap. 408 Assumes that the block bitmap lock is hold. 409 */ 410 status_t 411 AllocationGroup::Allocate(Transaction& transaction, uint16 start, int32 length) 412 { 413 ASSERT(start + length <= (int32)fNumBits); 414 415 // Update the allocation group info 416 // TODO: this info will be incorrect if something goes wrong later 417 // Note, the fFirstFree block doesn't have to be really free 418 if (start == fFirstFree) 419 fFirstFree = start + length; 420 fFreeBits -= length; 421 422 if (fLargestValid) { 423 bool cut = false; 424 if (fLargestStart == start) { 425 // cut from start 426 fLargestStart += length; 427 fLargestLength -= length; 428 cut = true; 429 } else if (start > fLargestStart 430 && start < fLargestStart + fLargestLength) { 431 // cut from end 432 fLargestLength = start - fLargestStart; 433 cut = true; 434 } 435 if (cut && (fLargestLength < fLargestStart 436 || fLargestLength 437 < (int32)fNumBits - (fLargestStart + fLargestLength))) { 438 // might not be the largest block anymore 439 fLargestValid = false; 440 } 441 } 442 443 Volume* volume = transaction.GetVolume(); 444 445 // calculate block in the block bitmap and position within 446 uint32 bitsPerBlock = volume->BlockSize() << 3; 447 uint32 block = start / bitsPerBlock; 448 start = start % bitsPerBlock; 449 450 AllocationBlock cached(volume); 451 452 while (length > 0) { 453 if (cached.SetToWritable(transaction, *this, block) < B_OK) { 454 fLargestValid = false; 455 RETURN_ERROR(B_IO_ERROR); 456 } 457 458 uint32 numBlocks = length; 459 if (start + numBlocks > cached.NumBlockBits()) 460 numBlocks = cached.NumBlockBits() - start; 461 462 cached.Allocate(start, numBlocks); 463 464 length -= numBlocks; 465 start = 0; 466 block++; 467 } 468 469 return B_OK; 470 } 471 472 473 /*! Frees the specified run in the allocation group. 474 Doesn't check if the run is valid or was not completely allocated, nor 475 does it maintain the free ranges hints or the volume's used blocks count. 476 It only does the low-level work of freeing some bits in the block bitmap. 477 Assumes that the block bitmap lock is hold. 478 */ 479 status_t 480 AllocationGroup::Free(Transaction& transaction, uint16 start, int32 length) 481 { 482 ASSERT(start + length <= (int32)fNumBits); 483 484 // Update the allocation group info 485 // TODO: this info will be incorrect if something goes wrong later 486 if (fFirstFree > start) 487 fFirstFree = start; 488 fFreeBits += length; 489 490 // The range to be freed cannot be part of the valid largest range 491 ASSERT(!fLargestValid || start + length <= fLargestStart 492 || start > fLargestStart); 493 494 if (fLargestValid 495 && (start + length == fLargestStart 496 || fLargestStart + fLargestLength == start 497 || (start < fLargestStart && fLargestStart > fLargestLength) 498 || (start > fLargestStart 499 && (int32)fNumBits - (fLargestStart + fLargestLength) 500 > fLargestLength))) { 501 fLargestValid = false; 502 } 503 504 Volume* volume = transaction.GetVolume(); 505 506 // calculate block in the block bitmap and position within 507 uint32 bitsPerBlock = volume->BlockSize() << 3; 508 uint32 block = start / bitsPerBlock; 509 start = start % bitsPerBlock; 510 511 AllocationBlock cached(volume); 512 513 while (length > 0) { 514 if (cached.SetToWritable(transaction, *this, block) < B_OK) 515 RETURN_ERROR(B_IO_ERROR); 516 517 T(Block("free-1", block, cached.Block(), volume->BlockSize())); 518 uint16 freeLength = length; 519 if (uint32(start + length) > cached.NumBlockBits()) 520 freeLength = cached.NumBlockBits() - start; 521 522 cached.Free(start, freeLength); 523 524 length -= freeLength; 525 start = 0; 526 T(Block("free-2", block, cached.Block(), volume->BlockSize())); 527 block++; 528 } 529 return B_OK; 530 } 531 532 533 // #pragma mark - 534 535 536 BlockAllocator::BlockAllocator(Volume* volume) 537 : 538 fVolume(volume), 539 fGroups(NULL), 540 fCheckBitmap(NULL), 541 fCheckCookie(NULL) 542 { 543 recursive_lock_init(&fLock, "bfs allocator"); 544 } 545 546 547 BlockAllocator::~BlockAllocator() 548 { 549 recursive_lock_destroy(&fLock); 550 delete[] fGroups; 551 } 552 553 554 status_t 555 BlockAllocator::Initialize(bool full) 556 { 557 fNumGroups = fVolume->AllocationGroups(); 558 fBlocksPerGroup = fVolume->SuperBlock().BlocksPerAllocationGroup(); 559 fNumBlocks = (fVolume->NumBlocks() + fVolume->BlockSize() * 8 - 1) 560 / (fVolume->BlockSize() * 8); 561 562 fGroups = new(std::nothrow) AllocationGroup[fNumGroups]; 563 if (fGroups == NULL) 564 return B_NO_MEMORY; 565 566 if (!full) 567 return B_OK; 568 569 recursive_lock_lock(&fLock); 570 // the lock will be released by the _Initialize() method 571 572 thread_id id = spawn_kernel_thread((thread_func)BlockAllocator::_Initialize, 573 "bfs block allocator", B_LOW_PRIORITY, this); 574 if (id < B_OK) 575 return _Initialize(this); 576 577 recursive_lock_transfer_lock(&fLock, id); 578 579 return resume_thread(id); 580 } 581 582 583 status_t 584 BlockAllocator::InitializeAndClearBitmap(Transaction& transaction) 585 { 586 status_t status = Initialize(false); 587 if (status != B_OK) 588 return status; 589 590 uint32 numBits = 8 * fBlocksPerGroup * fVolume->BlockSize(); 591 uint32 blockShift = fVolume->BlockShift(); 592 593 uint32* buffer = (uint32*)malloc(numBits >> 3); 594 if (buffer == NULL) 595 RETURN_ERROR(B_NO_MEMORY); 596 597 memset(buffer, 0, numBits >> 3); 598 599 off_t offset = 1; 600 // the bitmap starts directly after the superblock 601 602 // initialize the AllocationGroup objects and clear the on-disk bitmap 603 604 for (int32 i = 0; i < fNumGroups; i++) { 605 if (write_pos(fVolume->Device(), offset << blockShift, buffer, 606 fBlocksPerGroup << blockShift) < B_OK) { 607 free(buffer); 608 return B_ERROR; 609 } 610 611 // the last allocation group may contain less blocks than the others 612 if (i == fNumGroups - 1) { 613 fGroups[i].fNumBits = fVolume->NumBlocks() - i * numBits; 614 fGroups[i].fNumBlocks = 1 + ((fGroups[i].NumBits() - 1) 615 >> (blockShift + 3)); 616 } else { 617 fGroups[i].fNumBits = numBits; 618 fGroups[i].fNumBlocks = fBlocksPerGroup; 619 } 620 fGroups[i].fStart = offset; 621 fGroups[i].fFirstFree = fGroups[i].fLargestStart = 0; 622 fGroups[i].fFreeBits = fGroups[i].fLargestLength = fGroups[i].fNumBits; 623 fGroups[i].fLargestValid = true; 624 625 offset += fBlocksPerGroup; 626 } 627 free(buffer); 628 629 // reserve the boot block, the log area, and the block bitmap itself 630 uint32 reservedBlocks = fVolume->Log().Start() + fVolume->Log().Length(); 631 632 if (fGroups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) { 633 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 634 return B_ERROR; 635 } 636 fVolume->SuperBlock().used_blocks 637 = HOST_ENDIAN_TO_BFS_INT64(reservedBlocks); 638 639 return B_OK; 640 } 641 642 643 status_t 644 BlockAllocator::_Initialize(BlockAllocator* allocator) 645 { 646 // The lock must already be held at this point 647 RecursiveLocker locker(allocator->fLock, true); 648 649 Volume* volume = allocator->fVolume; 650 uint32 blocks = allocator->fBlocksPerGroup; 651 uint32 blockShift = volume->BlockShift(); 652 off_t freeBlocks = 0; 653 654 uint32* buffer = (uint32*)malloc(blocks << blockShift); 655 if (buffer == NULL) 656 RETURN_ERROR(B_NO_MEMORY); 657 658 AllocationGroup* groups = allocator->fGroups; 659 off_t offset = 1; 660 uint32 bitsPerGroup = 8 * (blocks << blockShift); 661 int32 numGroups = allocator->fNumGroups; 662 663 for (int32 i = 0; i < numGroups; i++) { 664 if (read_pos(volume->Device(), offset << blockShift, buffer, 665 blocks << blockShift) < B_OK) 666 break; 667 668 // the last allocation group may contain less blocks than the others 669 if (i == numGroups - 1) { 670 groups[i].fNumBits = volume->NumBlocks() - i * bitsPerGroup; 671 groups[i].fNumBlocks = 1 + ((groups[i].NumBits() - 1) 672 >> (blockShift + 3)); 673 } else { 674 groups[i].fNumBits = bitsPerGroup; 675 groups[i].fNumBlocks = blocks; 676 } 677 groups[i].fStart = offset; 678 679 // finds all free ranges in this allocation group 680 int32 start = -1, range = 0; 681 int32 numBits = groups[i].fNumBits, bit = 0; 682 int32 count = (numBits + 31) / 32; 683 684 for (int32 k = 0; k < count; k++) { 685 for (int32 j = 0; j < 32 && bit < numBits; j++, bit++) { 686 if (buffer[k] & (1UL << j)) { 687 // block is in use 688 if (range > 0) { 689 groups[i].AddFreeRange(start, range); 690 range = 0; 691 } 692 } else if (range++ == 0) { 693 // block is free, start new free range 694 start = bit; 695 } 696 } 697 } 698 if (range) 699 groups[i].AddFreeRange(start, range); 700 701 freeBlocks += groups[i].fFreeBits; 702 703 offset += blocks; 704 } 705 free(buffer); 706 707 // check if block bitmap and log area are reserved 708 uint32 reservedBlocks = volume->Log().Start() + volume->Log().Length(); 709 710 if (allocator->CheckBlocks(0, reservedBlocks) != B_OK) { 711 if (volume->IsReadOnly()) { 712 FATAL(("Space for block bitmap or log area is not reserved " 713 "(volume is mounted read-only)!\n")); 714 } else { 715 Transaction transaction(volume, 0); 716 if (groups[0].Allocate(transaction, 0, reservedBlocks) != B_OK) { 717 FATAL(("Could not allocate reserved space for block " 718 "bitmap/log!\n")); 719 volume->Panic(); 720 } else { 721 transaction.Done(); 722 FATAL(("Space for block bitmap or log area was not " 723 "reserved!\n")); 724 } 725 } 726 } 727 728 off_t usedBlocks = volume->NumBlocks() - freeBlocks; 729 if (volume->UsedBlocks() != usedBlocks) { 730 // If the disk in a dirty state at mount time, it's 731 // normal that the values don't match 732 INFORM(("volume reports %" B_PRIdOFF " used blocks, correct is %" 733 B_PRIdOFF "\n", volume->UsedBlocks(), usedBlocks)); 734 volume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 735 } 736 737 return B_OK; 738 } 739 740 741 void 742 BlockAllocator::Uninitialize() 743 { 744 // We only have to make sure that the initializer thread isn't running 745 // anymore. 746 recursive_lock_lock(&fLock); 747 } 748 749 750 /*! Tries to allocate between \a minimum, and \a maximum blocks starting 751 at group \a groupIndex with offset \a start. The resulting allocation 752 is put into \a run. 753 754 The number of allocated blocks is always a multiple of \a minimum which 755 has to be a power of two value. 756 */ 757 status_t 758 BlockAllocator::AllocateBlocks(Transaction& transaction, int32 groupIndex, 759 uint16 start, uint16 maximum, uint16 minimum, block_run& run) 760 { 761 if (maximum == 0) 762 return B_BAD_VALUE; 763 764 FUNCTION_START(("group = %ld, start = %u, maximum = %u, minimum = %u\n", 765 groupIndex, start, maximum, minimum)); 766 767 AllocationBlock cached(fVolume); 768 RecursiveLocker lock(fLock); 769 770 uint32 bitsPerFullBlock = fVolume->BlockSize() << 3; 771 772 // Find the block_run that can fulfill the request best 773 int32 bestGroup = -1; 774 int32 bestStart = -1; 775 int32 bestLength = -1; 776 777 for (int32 i = 0; i < fNumGroups + 1; i++, groupIndex++, start = 0) { 778 groupIndex = groupIndex % fNumGroups; 779 AllocationGroup& group = fGroups[groupIndex]; 780 781 CHECK_ALLOCATION_GROUP(groupIndex); 782 783 if (start >= group.NumBits() || group.IsFull()) 784 continue; 785 786 // The wanted maximum is smaller than the largest free block in the 787 // group or already smaller than the minimum 788 789 if (start < group.fFirstFree) 790 start = group.fFirstFree; 791 792 if (group.fLargestValid) { 793 if (group.fLargestLength < bestLength) 794 continue; 795 796 if (group.fLargestStart >= start) { 797 if (group.fLargestLength >= bestLength) { 798 bestGroup = groupIndex; 799 bestStart = group.fLargestStart; 800 bestLength = group.fLargestLength; 801 802 if (bestLength >= maximum) 803 break; 804 } 805 806 // We know everything about this group we have to, let's skip 807 // to the next 808 continue; 809 } 810 } 811 812 // There may be more than one block per allocation group - and 813 // we iterate through it to find a place for the allocation. 814 // (one allocation can't exceed one allocation group) 815 816 uint32 block = start / (fVolume->BlockSize() << 3); 817 int32 currentStart = 0, currentLength = 0; 818 int32 groupLargestStart = -1; 819 int32 groupLargestLength = -1; 820 int32 currentBit = start; 821 bool canFindGroupLargest = start == 0; 822 823 for (; block < group.NumBlocks(); block++) { 824 if (cached.SetTo(group, block) < B_OK) 825 RETURN_ERROR(B_ERROR); 826 827 T(Block("alloc-in", group.Start() + block, cached.Block(), 828 fVolume->BlockSize(), groupIndex, currentStart)); 829 830 // find a block large enough to hold the allocation 831 for (uint32 bit = start % bitsPerFullBlock; 832 bit < cached.NumBlockBits(); bit++) { 833 if (!cached.IsUsed(bit)) { 834 if (currentLength == 0) { 835 // start new range 836 currentStart = currentBit; 837 } 838 839 // have we found a range large enough to hold numBlocks? 840 if (++currentLength >= maximum) { 841 bestGroup = groupIndex; 842 bestStart = currentStart; 843 bestLength = currentLength; 844 break; 845 } 846 } else { 847 if (currentLength) { 848 // end of a range 849 if (currentLength > bestLength) { 850 bestGroup = groupIndex; 851 bestStart = currentStart; 852 bestLength = currentLength; 853 } 854 if (currentLength > groupLargestLength) { 855 groupLargestStart = currentStart; 856 groupLargestLength = currentLength; 857 } 858 currentLength = 0; 859 } 860 if ((int32)group.NumBits() - currentBit 861 <= groupLargestLength) { 862 // We can't find a bigger block in this group anymore, 863 // let's skip the rest. 864 block = group.NumBlocks(); 865 break; 866 } 867 } 868 currentBit++; 869 } 870 871 T(Block("alloc-out", block, cached.Block(), 872 fVolume->BlockSize(), groupIndex, currentStart)); 873 874 if (bestLength >= maximum) { 875 canFindGroupLargest = false; 876 break; 877 } 878 879 // start from the beginning of the next block 880 start = 0; 881 } 882 883 if (currentBit == (int32)group.NumBits()) { 884 if (currentLength > bestLength) { 885 bestGroup = groupIndex; 886 bestStart = currentStart; 887 bestLength = currentLength; 888 } 889 if (canFindGroupLargest && currentLength > groupLargestLength) { 890 groupLargestStart = currentStart; 891 groupLargestLength = currentLength; 892 } 893 } 894 895 if (canFindGroupLargest && !group.fLargestValid 896 && groupLargestLength >= 0) { 897 group.fLargestStart = groupLargestStart; 898 group.fLargestLength = groupLargestLength; 899 group.fLargestValid = true; 900 } 901 902 if (bestLength >= maximum) 903 break; 904 } 905 906 // If we found a suitable range, mark the blocks as in use, and 907 // write the updated block bitmap back to disk 908 if (bestLength < minimum) 909 return B_DEVICE_FULL; 910 911 if (bestLength > maximum) 912 bestLength = maximum; 913 else if (minimum > 1) { 914 // make sure bestLength is a multiple of minimum 915 bestLength = round_down(bestLength, minimum); 916 } 917 918 if (fGroups[bestGroup].Allocate(transaction, bestStart, bestLength) != B_OK) 919 RETURN_ERROR(B_IO_ERROR); 920 921 CHECK_ALLOCATION_GROUP(bestGroup); 922 923 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(bestGroup); 924 run.start = HOST_ENDIAN_TO_BFS_INT16(bestStart); 925 run.length = HOST_ENDIAN_TO_BFS_INT16(bestLength); 926 927 fVolume->SuperBlock().used_blocks 928 = HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() + bestLength); 929 // We are not writing back the disk's superblock - it's 930 // either done by the journaling code, or when the disk 931 // is unmounted. 932 // If the value is not correct at mount time, it will be 933 // fixed anyway. 934 935 // We need to flush any remaining blocks in the new allocation to make sure 936 // they won't interfere with the file cache. 937 block_cache_discard(fVolume->BlockCache(), fVolume->ToBlock(run), 938 run.Length()); 939 940 T(Allocate(run)); 941 return B_OK; 942 } 943 944 945 status_t 946 BlockAllocator::AllocateForInode(Transaction& transaction, 947 const block_run* parent, mode_t type, block_run& run) 948 { 949 // Apply some allocation policies here (AllocateBlocks() will break them 950 // if necessary) - we will start with those described in Dominic Giampaolo's 951 // "Practical File System Design", and see how good they work 952 953 // Files are going in the same allocation group as its parent, 954 // sub-directories will be inserted 8 allocation groups after 955 // the one of the parent 956 uint16 group = parent->AllocationGroup(); 957 if ((type & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY) 958 group += 8; 959 960 return AllocateBlocks(transaction, group, 0, 1, 1, run); 961 } 962 963 964 status_t 965 BlockAllocator::Allocate(Transaction& transaction, Inode* inode, 966 off_t numBlocks, block_run& run, uint16 minimum) 967 { 968 if (numBlocks <= 0) 969 return B_ERROR; 970 971 // one block_run can't hold more data than there is in one allocation group 972 if (numBlocks > fGroups[0].NumBits()) 973 numBlocks = fGroups[0].NumBits(); 974 975 // since block_run.length is uint16, the largest number of blocks that 976 // can be covered by a block_run is 65535 977 // TODO: if we drop compatibility, couldn't we do this any better? 978 // There are basically two possibilities: 979 // a) since a length of zero doesn't have any sense, take that for 65536 - 980 // but that could cause many problems (bugs) in other areas 981 // b) reduce the maximum amount of blocks per block_run, so that the 982 // remaining number of free blocks can be used in a useful manner 983 // (like 4 blocks) - but that would also reduce the maximum file size 984 // c) have BlockRun::Length() return (length + 1). 985 if (numBlocks > MAX_BLOCK_RUN_LENGTH) 986 numBlocks = MAX_BLOCK_RUN_LENGTH; 987 988 // Apply some allocation policies here (AllocateBlocks() will break them 989 // if necessary) 990 uint16 group = inode->BlockRun().AllocationGroup(); 991 uint16 start = 0; 992 993 // Are there already allocated blocks? (then just try to allocate near the 994 // last one) 995 if (inode->Size() > 0) { 996 const data_stream& data = inode->Node().data; 997 // TODO: we currently don't care for when the data stream 998 // is already grown into the indirect ranges 999 if (data.max_double_indirect_range == 0 1000 && data.max_indirect_range == 0) { 1001 // Since size > 0, there must be a valid block run in this stream 1002 int32 last = 0; 1003 for (; last < NUM_DIRECT_BLOCKS - 1; last++) 1004 if (data.direct[last + 1].IsZero()) 1005 break; 1006 1007 group = data.direct[last].AllocationGroup(); 1008 start = data.direct[last].Start() + data.direct[last].Length(); 1009 } 1010 } else if (inode->IsContainer() || inode->IsSymLink()) { 1011 // directory and symbolic link data will go in the same allocation 1012 // group as the inode is in but after the inode data 1013 start = inode->BlockRun().Start(); 1014 } else { 1015 // file data will start in the next allocation group 1016 group = inode->BlockRun().AllocationGroup() + 1; 1017 } 1018 1019 return AllocateBlocks(transaction, group, start, numBlocks, minimum, run); 1020 } 1021 1022 1023 status_t 1024 BlockAllocator::Free(Transaction& transaction, block_run run) 1025 { 1026 RecursiveLocker lock(fLock); 1027 1028 int32 group = run.AllocationGroup(); 1029 uint16 start = run.Start(); 1030 uint16 length = run.Length(); 1031 1032 FUNCTION_START(("group = %ld, start = %u, length = %u\n", group, start, 1033 length)); 1034 T(Free(run)); 1035 1036 // doesn't use Volume::IsValidBlockRun() here because it can check better 1037 // against the group size (the last group may have a different length) 1038 if (group < 0 || group >= fNumGroups 1039 || start > fGroups[group].NumBits() 1040 || uint32(start + length) > fGroups[group].NumBits() 1041 || length == 0) { 1042 FATAL(("tried to free an invalid block_run (%d, %u, %u)\n", (int)group, 1043 start, length)); 1044 DEBUGGER(("tried to free invalid block_run")); 1045 return B_BAD_VALUE; 1046 } 1047 // check if someone tries to free reserved areas at the beginning of the 1048 // drive 1049 if (group == 0 1050 && start < uint32(fVolume->Log().Start() + fVolume->Log().Length())) { 1051 FATAL(("tried to free a reserved block_run (%d, %u, %u)\n", (int)group, 1052 start, length)); 1053 DEBUGGER(("tried to free reserved block")); 1054 return B_BAD_VALUE; 1055 } 1056 #ifdef DEBUG 1057 if (CheckBlockRun(run) != B_OK) 1058 return B_BAD_DATA; 1059 #endif 1060 1061 CHECK_ALLOCATION_GROUP(group); 1062 1063 if (fGroups[group].Free(transaction, start, length) != B_OK) 1064 RETURN_ERROR(B_IO_ERROR); 1065 1066 CHECK_ALLOCATION_GROUP(group); 1067 1068 #ifdef DEBUG 1069 if (CheckBlockRun(run, NULL, false) != B_OK) { 1070 DEBUGGER(("CheckBlockRun() reports allocated blocks (which were just " 1071 "freed)\n")); 1072 } 1073 #endif 1074 1075 fVolume->SuperBlock().used_blocks = 1076 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() - run.Length()); 1077 return B_OK; 1078 } 1079 1080 1081 size_t 1082 BlockAllocator::BitmapSize() const 1083 { 1084 return fVolume->BlockSize() * fNumBlocks; 1085 } 1086 1087 1088 #ifdef DEBUG_FRAGMENTER 1089 void 1090 BlockAllocator::Fragment() 1091 { 1092 AllocationBlock cached(fVolume); 1093 RecursiveLocker lock(fLock); 1094 1095 // only leave 4 block holes 1096 static const uint32 kMask = 0x0f0f0f0f; 1097 uint32 valuesPerBlock = fVolume->BlockSize() / 4; 1098 1099 for (int32 i = 0; i < fNumGroups; i++) { 1100 AllocationGroup& group = fGroups[i]; 1101 1102 for (uint32 block = 0; block < group.NumBlocks(); block++) { 1103 Transaction transaction(fVolume, 0); 1104 1105 if (cached.SetToWritable(transaction, group, block) != B_OK) 1106 return; 1107 1108 for (int32 index = 0; index < valuesPerBlock; index++) { 1109 cached.Block(index) |= HOST_ENDIAN_TO_BFS_INT32(kMask); 1110 } 1111 1112 transaction.Done(); 1113 } 1114 } 1115 } 1116 #endif // DEBUG_FRAGMENTER 1117 1118 1119 #ifdef DEBUG_ALLOCATION_GROUPS 1120 void 1121 BlockAllocator::_CheckGroup(int32 groupIndex) const 1122 { 1123 AllocationBlock cached(fVolume); 1124 ASSERT_LOCKED_RECURSIVE(&fLock); 1125 1126 AllocationGroup& group = fGroups[groupIndex]; 1127 1128 int32 currentStart = 0, currentLength = 0; 1129 int32 firstFree = -1; 1130 int32 largestStart = -1; 1131 int32 largestLength = 0; 1132 int32 currentBit = 0; 1133 1134 for (uint32 block = 0; block < group.NumBlocks(); block++) { 1135 if (cached.SetTo(group, block) < B_OK) { 1136 panic("setting group block %d failed\n", (int)block); 1137 return; 1138 } 1139 1140 for (uint32 bit = 0; bit < cached.NumBlockBits(); bit++) { 1141 if (!cached.IsUsed(bit)) { 1142 if (firstFree < 0) { 1143 firstFree = currentBit; 1144 if (!group.fLargestValid) { 1145 if (firstFree >= 0 && firstFree < group.fFirstFree) { 1146 // mostly harmless but noteworthy 1147 dprintf("group %d first free too late: should be " 1148 "%d, is %d\n", (int)groupIndex, (int)firstFree, 1149 (int)group.fFirstFree); 1150 } 1151 return; 1152 } 1153 } 1154 1155 if (currentLength == 0) { 1156 // start new range 1157 currentStart = currentBit; 1158 } 1159 currentLength++; 1160 } else if (currentLength) { 1161 // end of a range 1162 if (currentLength > largestLength) { 1163 largestStart = currentStart; 1164 largestLength = currentLength; 1165 } 1166 currentLength = 0; 1167 } 1168 currentBit++; 1169 } 1170 } 1171 1172 if (currentLength > largestLength) { 1173 largestStart = currentStart; 1174 largestLength = currentLength; 1175 } 1176 1177 if (firstFree >= 0 && firstFree < group.fFirstFree) { 1178 // mostly harmless but noteworthy 1179 dprintf("group %d first free too late: should be %d, is %d\n", 1180 (int)groupIndex, (int)firstFree, (int)group.fFirstFree); 1181 } 1182 if (group.fLargestValid 1183 && (largestStart != group.fLargestStart 1184 || largestLength != group.fLargestLength)) { 1185 panic("bfs %p: group %d largest differs: %d.%d, checked %d.%d.\n", 1186 fVolume, (int)groupIndex, (int)group.fLargestStart, 1187 (int)group.fLargestLength, (int)largestStart, (int)largestLength); 1188 } 1189 } 1190 #endif // DEBUG_ALLOCATION_GROUPS 1191 1192 1193 status_t 1194 BlockAllocator::Trim(uint64 offset, uint64 size, uint64& trimmedSize) 1195 { 1196 const uint32 kTrimRanges = 128; 1197 fs_trim_data* trimData = (fs_trim_data*)malloc(sizeof(fs_trim_data) 1198 + sizeof(uint64) * kTrimRanges); 1199 if (trimData == NULL) 1200 return B_NO_MEMORY; 1201 1202 MemoryDeleter deleter(trimData); 1203 RecursiveLocker locker(fLock); 1204 1205 // TODO: take given offset and size into account! 1206 int32 lastGroup = fNumGroups - 1; 1207 uint32 firstBlock = 0; 1208 uint32 firstBit = 0; 1209 uint64 currentBlock = 0; 1210 uint32 blockShift = fVolume->BlockShift(); 1211 1212 uint64 firstFree = 0; 1213 size_t freeLength = 0; 1214 1215 trimData->range_count = 0; 1216 trimmedSize = 0; 1217 1218 AllocationBlock cached(fVolume); 1219 for (int32 groupIndex = 0; groupIndex <= lastGroup; groupIndex++) { 1220 AllocationGroup& group = fGroups[groupIndex]; 1221 1222 for (uint32 block = firstBlock; block < group.NumBlocks(); block++) { 1223 cached.SetTo(group, block); 1224 1225 for (uint32 i = firstBit; i < cached.NumBlockBits(); i++) { 1226 if (cached.IsUsed(i)) { 1227 // Block is in use 1228 if (freeLength > 0) { 1229 status_t status = _TrimNext(*trimData, kTrimRanges, 1230 firstFree << blockShift, freeLength << blockShift, 1231 false, trimmedSize); 1232 if (status != B_OK) 1233 return status; 1234 1235 freeLength = 0; 1236 } 1237 } else if (freeLength++ == 0) { 1238 // Block is free, start new free range 1239 firstFree = currentBlock; 1240 } 1241 1242 currentBlock++; 1243 } 1244 } 1245 1246 firstBlock = 0; 1247 firstBit = 0; 1248 } 1249 1250 return _TrimNext(*trimData, kTrimRanges, firstFree << blockShift, 1251 freeLength << blockShift, true, trimmedSize); 1252 } 1253 1254 1255 // #pragma mark - Bitmap validity checking 1256 1257 // TODO: implement new FS checking API 1258 // Functions to check the validity of the bitmap - they are used from 1259 // the "checkfs" command (since this does even a bit more, maybe we should 1260 // move this some place else?) 1261 1262 1263 bool 1264 BlockAllocator::_IsValidCheckControl(const check_control* control) 1265 { 1266 if (control == NULL 1267 || control->magic != BFS_IOCTL_CHECK_MAGIC) { 1268 FATAL(("invalid check_control (%p)!\n", control)); 1269 return false; 1270 } 1271 1272 return true; 1273 } 1274 1275 1276 status_t 1277 BlockAllocator::StartChecking(const check_control* control) 1278 { 1279 if (!_IsValidCheckControl(control)) 1280 return B_BAD_VALUE; 1281 1282 fVolume->GetJournal(0)->Lock(NULL, true); 1283 // Lock the volume's journal 1284 1285 recursive_lock_lock(&fLock); 1286 1287 size_t size = BitmapSize(); 1288 fCheckBitmap = (uint32*)malloc(size); 1289 if (fCheckBitmap == NULL) { 1290 recursive_lock_unlock(&fLock); 1291 fVolume->GetJournal(0)->Unlock(NULL, true); 1292 return B_NO_MEMORY; 1293 } 1294 1295 fCheckCookie = new(std::nothrow) check_cookie(); 1296 if (fCheckCookie == NULL) { 1297 free(fCheckBitmap); 1298 fCheckBitmap = NULL; 1299 recursive_lock_unlock(&fLock); 1300 fVolume->GetJournal(0)->Unlock(NULL, true); 1301 1302 return B_NO_MEMORY; 1303 } 1304 1305 memcpy(&fCheckCookie->control, control, sizeof(check_control)); 1306 memset(&fCheckCookie->control.stats, 0, sizeof(control->stats)); 1307 1308 // initialize bitmap 1309 memset(fCheckBitmap, 0, size); 1310 for (int32 block = fVolume->Log().Start() + fVolume->Log().Length(); 1311 block-- > 0;) { 1312 _SetCheckBitmapAt(block); 1313 } 1314 1315 fCheckCookie->pass = BFS_CHECK_PASS_BITMAP; 1316 fCheckCookie->stack.Push(fVolume->Root()); 1317 fCheckCookie->stack.Push(fVolume->Indices()); 1318 fCheckCookie->iterator = NULL; 1319 fCheckCookie->control.stats.block_size = fVolume->BlockSize(); 1320 1321 // Put removed vnodes to the stack -- they are not reachable by traversing 1322 // the file system anymore. 1323 InodeList::Iterator iterator = fVolume->RemovedInodes().GetIterator(); 1324 while (Inode* inode = iterator.Next()) { 1325 fCheckCookie->stack.Push(inode->BlockRun()); 1326 } 1327 1328 // TODO: check reserved area in bitmap! 1329 1330 return B_OK; 1331 } 1332 1333 1334 status_t 1335 BlockAllocator::StopChecking(check_control* control) 1336 { 1337 if (fCheckCookie == NULL) 1338 return B_NO_INIT; 1339 1340 if (fCheckCookie->iterator != NULL) { 1341 delete fCheckCookie->iterator; 1342 fCheckCookie->iterator = NULL; 1343 1344 // the current directory inode is still locked in memory 1345 put_vnode(fVolume->FSVolume(), fVolume->ToVnode(fCheckCookie->current)); 1346 } 1347 1348 if (fVolume->IsReadOnly()) { 1349 // We can't fix errors on this volume 1350 fCheckCookie->control.flags = 0; 1351 } 1352 1353 if (fCheckCookie->control.status != B_ENTRY_NOT_FOUND) 1354 FATAL(("BlockAllocator::CheckNextNode() didn't run through\n")); 1355 1356 switch (fCheckCookie->pass) { 1357 case BFS_CHECK_PASS_BITMAP: 1358 // if CheckNextNode() could completely work through, we can 1359 // fix any damages of the bitmap 1360 if (fCheckCookie->control.status == B_ENTRY_NOT_FOUND) 1361 _WriteBackCheckBitmap(); 1362 break; 1363 1364 case BFS_CHECK_PASS_INDEX: 1365 _FreeIndices(); 1366 break; 1367 } 1368 1369 fVolume->SetCheckingThread(-1); 1370 1371 if (control != NULL) 1372 user_memcpy(control, &fCheckCookie->control, sizeof(check_control)); 1373 1374 free(fCheckBitmap); 1375 fCheckBitmap = NULL; 1376 delete fCheckCookie; 1377 fCheckCookie = NULL; 1378 recursive_lock_unlock(&fLock); 1379 fVolume->GetJournal(0)->Unlock(NULL, true); 1380 1381 return B_OK; 1382 } 1383 1384 1385 status_t 1386 BlockAllocator::CheckNextNode(check_control* control) 1387 { 1388 if (fCheckCookie == NULL) 1389 return B_NO_INIT; 1390 1391 fVolume->SetCheckingThread(find_thread(NULL)); 1392 1393 // Make sure the user control is copied on exit 1394 class CopyControlOnExit { 1395 public: 1396 CopyControlOnExit(check_control* source, check_control* userTarget) 1397 : 1398 fSource(source), 1399 fTarget(userTarget) 1400 { 1401 } 1402 1403 ~CopyControlOnExit() 1404 { 1405 if (fTarget != NULL) 1406 user_memcpy(fTarget, fSource, sizeof(check_control)); 1407 } 1408 1409 private: 1410 check_control* fSource; 1411 check_control* fTarget; 1412 } copyControl(&fCheckCookie->control, control); 1413 1414 while (true) { 1415 if (fCheckCookie->iterator == NULL) { 1416 if (!fCheckCookie->stack.Pop(&fCheckCookie->current)) { 1417 // No more runs on the stack, we might be finished! 1418 if (fCheckCookie->pass == BFS_CHECK_PASS_BITMAP 1419 && !fCheckCookie->indices.IsEmpty()) { 1420 // Start second pass to repair indices 1421 _WriteBackCheckBitmap(); 1422 1423 fCheckCookie->pass = BFS_CHECK_PASS_INDEX; 1424 fCheckCookie->control.pass = BFS_CHECK_PASS_INDEX; 1425 1426 status_t status = _PrepareIndices(); 1427 if (status != B_OK) { 1428 fCheckCookie->control.status = status; 1429 return status; 1430 } 1431 1432 fCheckCookie->stack.Push(fVolume->Root()); 1433 continue; 1434 } 1435 1436 fCheckCookie->control.status = B_ENTRY_NOT_FOUND; 1437 return B_ENTRY_NOT_FOUND; 1438 } 1439 1440 Vnode vnode(fVolume, fCheckCookie->current); 1441 Inode* inode; 1442 if (vnode.Get(&inode) != B_OK) { 1443 FATAL(("check: Could not open inode at %" B_PRIdOFF "\n", 1444 fVolume->ToBlock(fCheckCookie->current))); 1445 continue; 1446 } 1447 1448 fCheckCookie->control.inode = inode->ID(); 1449 fCheckCookie->control.mode = inode->Mode(); 1450 1451 if (!inode->IsContainer()) { 1452 // Check file 1453 fCheckCookie->control.errors = 0; 1454 fCheckCookie->control.status = CheckInode(inode, NULL); 1455 1456 if (inode->GetName(fCheckCookie->control.name) < B_OK) 1457 strcpy(fCheckCookie->control.name, "(node has no name)"); 1458 1459 return B_OK; 1460 } 1461 1462 // Check directory 1463 BPlusTree* tree = inode->Tree(); 1464 if (tree == NULL) { 1465 FATAL(("check: could not open b+tree from inode at %" B_PRIdOFF 1466 "\n", fVolume->ToBlock(fCheckCookie->current))); 1467 continue; 1468 } 1469 1470 fCheckCookie->parent = inode; 1471 fCheckCookie->parent_mode = inode->Mode(); 1472 1473 // get iterator for the next directory 1474 fCheckCookie->iterator = new(std::nothrow) TreeIterator(tree); 1475 if (fCheckCookie->iterator == NULL) 1476 RETURN_ERROR(B_NO_MEMORY); 1477 1478 // the inode must stay locked in memory until the iterator is freed 1479 vnode.Keep(); 1480 1481 // check the inode of the directory 1482 fCheckCookie->control.errors = 0; 1483 fCheckCookie->control.status = CheckInode(inode, NULL); 1484 1485 if (inode->GetName(fCheckCookie->control.name) != B_OK) 1486 strcpy(fCheckCookie->control.name, "(dir has no name)"); 1487 1488 return B_OK; 1489 } 1490 1491 char name[B_FILE_NAME_LENGTH]; 1492 uint16 length; 1493 ino_t id; 1494 1495 status_t status = fCheckCookie->iterator->GetNextEntry(name, &length, 1496 B_FILE_NAME_LENGTH, &id); 1497 if (status != B_OK) { 1498 // we no longer need this iterator 1499 delete fCheckCookie->iterator; 1500 fCheckCookie->iterator = NULL; 1501 1502 // unlock the directory's inode from memory 1503 put_vnode(fVolume->FSVolume(), 1504 fVolume->ToVnode(fCheckCookie->current)); 1505 1506 if (status == B_ENTRY_NOT_FOUND) { 1507 // We iterated over all entries already, just go on to the next 1508 continue; 1509 } 1510 1511 // Iterating over the B+tree failed - we let the checkfs run 1512 // fail completely, as we would delete all files we cannot 1513 // access. 1514 // TODO: maybe have a force parameter that actually does that. 1515 // TODO: we also need to be able to repair broken B+trees! 1516 return status; 1517 } 1518 1519 // ignore "." and ".." entries 1520 if (!strcmp(name, ".") || !strcmp(name, "..")) 1521 continue; 1522 1523 // fill in the control data as soon as we have them 1524 strlcpy(fCheckCookie->control.name, name, B_FILE_NAME_LENGTH); 1525 fCheckCookie->control.inode = id; 1526 fCheckCookie->control.errors = 0; 1527 1528 Vnode vnode(fVolume, id); 1529 Inode* inode; 1530 if (vnode.Get(&inode) != B_OK) { 1531 FATAL(("Could not open inode ID %" B_PRIdINO "!\n", id)); 1532 fCheckCookie->control.errors |= BFS_COULD_NOT_OPEN; 1533 1534 if ((fCheckCookie->control.flags & BFS_REMOVE_INVALID) != 0) { 1535 status = _RemoveInvalidNode(fCheckCookie->parent, 1536 fCheckCookie->iterator->Tree(), NULL, name); 1537 } else 1538 status = B_ERROR; 1539 1540 fCheckCookie->control.status = status; 1541 return B_OK; 1542 } 1543 1544 // check if the inode's name is the same as in the b+tree 1545 if (fCheckCookie->pass == BFS_CHECK_PASS_BITMAP 1546 && inode->IsRegularNode()) { 1547 RecursiveLocker locker(inode->SmallDataLock()); 1548 NodeGetter node(fVolume, inode); 1549 if (node.Node() == NULL) { 1550 fCheckCookie->control.errors |= BFS_COULD_NOT_OPEN; 1551 fCheckCookie->control.status = B_IO_ERROR; 1552 return B_OK; 1553 } 1554 1555 const char* localName = inode->Name(node.Node()); 1556 if (localName == NULL || strcmp(localName, name)) { 1557 fCheckCookie->control.errors |= BFS_NAMES_DONT_MATCH; 1558 FATAL(("Names differ: tree \"%s\", inode \"%s\"\n", name, 1559 localName)); 1560 1561 if ((fCheckCookie->control.flags & BFS_FIX_NAME_MISMATCHES) 1562 != 0) { 1563 // Rename the inode 1564 Transaction transaction(fVolume, inode->BlockNumber()); 1565 1566 // Note, this may need extra blocks, but the inode will 1567 // only be checked afterwards, so that it won't be lost 1568 status = inode->SetName(transaction, name); 1569 if (status == B_OK) 1570 status = inode->WriteBack(transaction); 1571 if (status == B_OK) 1572 status = transaction.Done(); 1573 if (status != B_OK) { 1574 fCheckCookie->control.status = status; 1575 return B_OK; 1576 } 1577 } 1578 } 1579 } 1580 1581 fCheckCookie->control.mode = inode->Mode(); 1582 1583 // Check for the correct mode of the node (if the mode of the 1584 // file don't fit to its parent, there is a serious problem) 1585 if (fCheckCookie->pass == BFS_CHECK_PASS_BITMAP 1586 && (((fCheckCookie->parent_mode & S_ATTR_DIR) != 0 1587 && !inode->IsAttribute()) 1588 || ((fCheckCookie->parent_mode & S_INDEX_DIR) != 0 1589 && !inode->IsIndex()) 1590 || (is_directory(fCheckCookie->parent_mode) 1591 && !inode->IsRegularNode()))) { 1592 FATAL(("inode at %" B_PRIdOFF " is of wrong type: %o (parent " 1593 "%o at %" B_PRIdOFF ")!\n", inode->BlockNumber(), 1594 inode->Mode(), fCheckCookie->parent_mode, 1595 fCheckCookie->parent->BlockNumber())); 1596 1597 // if we are allowed to fix errors, we should remove the file 1598 if ((fCheckCookie->control.flags & BFS_REMOVE_WRONG_TYPES) != 0 1599 && (fCheckCookie->control.flags & BFS_FIX_BITMAP_ERRORS) 1600 != 0) { 1601 status = _RemoveInvalidNode(fCheckCookie->parent, NULL, 1602 inode, name); 1603 } else 1604 status = B_ERROR; 1605 1606 fCheckCookie->control.errors |= BFS_WRONG_TYPE; 1607 fCheckCookie->control.status = status; 1608 return B_OK; 1609 } 1610 1611 // push the directory on the stack so that it will be scanned later 1612 if (inode->IsContainer() && !inode->IsIndex()) 1613 fCheckCookie->stack.Push(inode->BlockRun()); 1614 else { 1615 // check it now 1616 fCheckCookie->control.status = CheckInode(inode, name); 1617 return B_OK; 1618 } 1619 } 1620 // is never reached 1621 } 1622 1623 1624 status_t 1625 BlockAllocator::_RemoveInvalidNode(Inode* parent, BPlusTree* tree, Inode* inode, 1626 const char* name) 1627 { 1628 // It's safe to start a transaction, because Inode::Remove() 1629 // won't touch the block bitmap (which we hold the lock for) 1630 // if we set the INODE_DONT_FREE_SPACE flag - since we fix 1631 // the bitmap anyway. 1632 Transaction transaction(fVolume, parent->BlockNumber()); 1633 status_t status; 1634 1635 if (inode != NULL) { 1636 inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DONT_FREE_SPACE); 1637 1638 status = parent->Remove(transaction, name, NULL, false, true); 1639 } else { 1640 parent->WriteLockInTransaction(transaction); 1641 1642 // does the file even exist? 1643 off_t id; 1644 status = tree->Find((uint8*)name, (uint16)strlen(name), &id); 1645 if (status == B_OK) 1646 status = tree->Remove(transaction, name, id); 1647 } 1648 1649 if (status == B_OK) { 1650 entry_cache_remove(fVolume->ID(), parent->ID(), name); 1651 transaction.Done(); 1652 } 1653 1654 return status; 1655 } 1656 1657 1658 bool 1659 BlockAllocator::_CheckBitmapIsUsedAt(off_t block) const 1660 { 1661 size_t size = BitmapSize(); 1662 uint32 index = block / 32; // 32bit resolution 1663 if (index > size / 4) 1664 return false; 1665 1666 return BFS_ENDIAN_TO_HOST_INT32(fCheckBitmap[index]) 1667 & (1UL << (block & 0x1f)); 1668 } 1669 1670 1671 void 1672 BlockAllocator::_SetCheckBitmapAt(off_t block) 1673 { 1674 size_t size = BitmapSize(); 1675 uint32 index = block / 32; // 32bit resolution 1676 if (index > size / 4) 1677 return; 1678 1679 fCheckBitmap[index] |= HOST_ENDIAN_TO_BFS_INT32(1UL << (block & 0x1f)); 1680 } 1681 1682 1683 status_t 1684 BlockAllocator::_WriteBackCheckBitmap() 1685 { 1686 if (fVolume->IsReadOnly()) 1687 return B_OK; 1688 1689 // calculate the number of used blocks in the check bitmap 1690 size_t size = BitmapSize(); 1691 off_t usedBlocks = 0LL; 1692 1693 // TODO: update the allocation groups used blocks info 1694 for (uint32 i = size >> 2; i-- > 0;) { 1695 uint32 compare = 1; 1696 // Count the number of bits set 1697 for (int16 j = 0; j < 32; j++, compare <<= 1) { 1698 if ((compare & fCheckBitmap[i]) != 0) 1699 usedBlocks++; 1700 } 1701 } 1702 1703 fCheckCookie->control.stats.freed = fVolume->UsedBlocks() - usedBlocks 1704 + fCheckCookie->control.stats.missing; 1705 if (fCheckCookie->control.stats.freed < 0) 1706 fCheckCookie->control.stats.freed = 0; 1707 1708 // Should we fix errors? Were there any errors we can fix? 1709 if ((fCheckCookie->control.flags & BFS_FIX_BITMAP_ERRORS) != 0 1710 && (fCheckCookie->control.stats.freed != 0 1711 || fCheckCookie->control.stats.missing != 0)) { 1712 // If so, write the check bitmap back over the original one, 1713 // and use transactions here to play safe - we even use several 1714 // transactions, so that we don't blow the maximum log size 1715 // on large disks, since we don't need to make this atomic. 1716 #if 0 1717 // prints the blocks that differ 1718 off_t block = 0; 1719 for (int32 i = 0; i < fNumGroups; i++) { 1720 AllocationBlock cached(fVolume); 1721 for (uint32 j = 0; j < fGroups[i].NumBlocks(); j++) { 1722 cached.SetTo(fGroups[i], j); 1723 for (uint32 k = 0; k < cached.NumBlockBits(); k++) { 1724 if (cached.IsUsed(k) != _CheckBitmapIsUsedAt(block)) { 1725 dprintf("differ block %lld (should be %d)\n", block, 1726 _CheckBitmapIsUsedAt(block)); 1727 } 1728 block++; 1729 } 1730 } 1731 } 1732 #endif 1733 1734 fVolume->SuperBlock().used_blocks 1735 = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 1736 1737 size_t blockSize = fVolume->BlockSize(); 1738 1739 for (uint32 i = 0; i < fNumBlocks; i += 512) { 1740 Transaction transaction(fVolume, 1 + i); 1741 1742 uint32 blocksToWrite = 512; 1743 if (blocksToWrite + i > fNumBlocks) 1744 blocksToWrite = fNumBlocks - i; 1745 1746 status_t status = transaction.WriteBlocks(1 + i, 1747 (uint8*)fCheckBitmap + i * blockSize, blocksToWrite); 1748 if (status < B_OK) { 1749 FATAL(("error writing bitmap: %s\n", strerror(status))); 1750 return status; 1751 } 1752 transaction.Done(); 1753 } 1754 } 1755 1756 return B_OK; 1757 } 1758 1759 1760 /*! Checks whether or not the specified block range is allocated or not, 1761 depending on the \a allocated argument. 1762 */ 1763 status_t 1764 BlockAllocator::CheckBlocks(off_t start, off_t length, bool allocated) 1765 { 1766 if (start < 0 || start + length > fVolume->NumBlocks()) 1767 return B_BAD_VALUE; 1768 1769 int32 group = start >> fVolume->AllocationGroupShift(); 1770 uint32 bitmapBlock = start / (fVolume->BlockSize() << 3); 1771 uint32 blockOffset = start % (fVolume->BlockSize() << 3); 1772 1773 uint32 groupBlock = bitmapBlock % fBlocksPerGroup; 1774 1775 AllocationBlock cached(fVolume); 1776 1777 while (groupBlock < fGroups[group].NumBlocks() && length > 0) { 1778 if (cached.SetTo(fGroups[group], groupBlock) != B_OK) 1779 RETURN_ERROR(B_IO_ERROR); 1780 1781 for (; blockOffset < cached.NumBlockBits() && length > 0; 1782 blockOffset++, length--) { 1783 if (cached.IsUsed(blockOffset) != allocated) { 1784 RETURN_ERROR(B_BAD_DATA); 1785 } 1786 } 1787 1788 blockOffset = 0; 1789 1790 if (++groupBlock >= fGroups[group].NumBlocks()) { 1791 groupBlock = 0; 1792 group++; 1793 } 1794 } 1795 1796 return B_OK; 1797 } 1798 1799 1800 status_t 1801 BlockAllocator::CheckBlockRun(block_run run, const char* type, bool allocated) 1802 { 1803 if (run.AllocationGroup() < 0 || run.AllocationGroup() >= fNumGroups 1804 || run.Start() > fGroups[run.AllocationGroup()].fNumBits 1805 || uint32(run.Start() + run.Length()) 1806 > fGroups[run.AllocationGroup()].fNumBits 1807 || run.length == 0) { 1808 PRINT(("%s: block_run(%ld, %u, %u) is invalid!\n", type, 1809 run.AllocationGroup(), run.Start(), run.Length())); 1810 if (fCheckCookie == NULL) 1811 return B_BAD_DATA; 1812 1813 fCheckCookie->control.errors |= BFS_INVALID_BLOCK_RUN; 1814 return B_OK; 1815 } 1816 1817 uint32 bitsPerBlock = fVolume->BlockSize() << 3; 1818 uint32 block = run.Start() / bitsPerBlock; 1819 uint32 pos = run.Start() % bitsPerBlock; 1820 int32 length = 0; 1821 off_t firstMissing = -1, firstSet = -1; 1822 off_t firstGroupBlock 1823 = (off_t)run.AllocationGroup() << fVolume->AllocationGroupShift(); 1824 1825 AllocationBlock cached(fVolume); 1826 1827 for (; block < fBlocksPerGroup && length < run.Length(); block++, pos = 0) { 1828 if (cached.SetTo(fGroups[run.AllocationGroup()], block) < B_OK) 1829 RETURN_ERROR(B_IO_ERROR); 1830 1831 if (pos >= cached.NumBlockBits()) { 1832 // something very strange has happened... 1833 RETURN_ERROR(B_ERROR); 1834 } 1835 1836 while (length < run.Length() && pos < cached.NumBlockBits()) { 1837 if (cached.IsUsed(pos) != allocated) { 1838 if (fCheckCookie == NULL) { 1839 PRINT(("%s: block_run(%ld, %u, %u) is only partially " 1840 "allocated (pos = %ld, length = %ld)!\n", type, 1841 run.AllocationGroup(), run.Start(), run.Length(), 1842 pos, length)); 1843 return B_BAD_DATA; 1844 } 1845 if (firstMissing == -1) { 1846 firstMissing = firstGroupBlock + pos + block * bitsPerBlock; 1847 fCheckCookie->control.errors |= BFS_MISSING_BLOCKS; 1848 } 1849 fCheckCookie->control.stats.missing++; 1850 } else if (firstMissing != -1) { 1851 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are " 1852 "%sallocated!\n", type, run.AllocationGroup(), run.Start(), 1853 run.Length(), firstMissing, 1854 firstGroupBlock + pos + block * bitsPerBlock - 1, 1855 allocated ? "not " : "")); 1856 firstMissing = -1; 1857 } 1858 1859 if (fCheckBitmap != NULL) { 1860 // Set the block in the check bitmap as well, but have a look 1861 // if it is already allocated first 1862 uint32 offset = pos + block * bitsPerBlock; 1863 if (_CheckBitmapIsUsedAt(firstGroupBlock + offset)) { 1864 if (firstSet == -1) { 1865 firstSet = firstGroupBlock + offset; 1866 fCheckCookie->control.errors |= BFS_BLOCKS_ALREADY_SET; 1867 dprintf("block %" B_PRIdOFF " is already set!!!\n", 1868 firstGroupBlock + offset); 1869 } 1870 fCheckCookie->control.stats.already_set++; 1871 } else { 1872 if (firstSet != -1) { 1873 FATAL(("%s: block_run(%d, %u, %u): blocks %" B_PRIdOFF 1874 " - %" B_PRIdOFF " are already set!\n", type, 1875 (int)run.AllocationGroup(), run.Start(), 1876 run.Length(), firstSet, 1877 firstGroupBlock + offset - 1)); 1878 firstSet = -1; 1879 } 1880 _SetCheckBitmapAt(firstGroupBlock + offset); 1881 } 1882 } 1883 length++; 1884 pos++; 1885 } 1886 1887 if (block + 1 >= fBlocksPerGroup || length >= run.Length()) { 1888 if (firstMissing != -1) { 1889 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are not " 1890 "allocated!\n", type, run.AllocationGroup(), run.Start(), 1891 run.Length(), firstMissing, 1892 firstGroupBlock + pos + block * bitsPerBlock - 1)); 1893 } 1894 if (firstSet != -1) { 1895 FATAL(("%s: block_run(%d, %u, %u): blocks %" B_PRIdOFF " - %" 1896 B_PRIdOFF " are already set!\n", type, 1897 (int)run.AllocationGroup(), run.Start(), run.Length(), 1898 firstSet, 1899 firstGroupBlock + pos + block * bitsPerBlock - 1)); 1900 } 1901 } 1902 } 1903 1904 return B_OK; 1905 } 1906 1907 1908 status_t 1909 BlockAllocator::CheckInode(Inode* inode, const char* name) 1910 { 1911 if (fCheckCookie != NULL && fCheckBitmap == NULL) 1912 return B_NO_INIT; 1913 if (inode == NULL) 1914 return B_BAD_VALUE; 1915 1916 switch (fCheckCookie->pass) { 1917 case BFS_CHECK_PASS_BITMAP: 1918 { 1919 status_t status = _CheckInodeBlocks(inode, name); 1920 if (status != B_OK) 1921 return status; 1922 1923 // Check the B+tree as well 1924 if (inode->IsContainer()) { 1925 bool repairErrors 1926 = (fCheckCookie->control.flags & BFS_FIX_BPLUSTREES) != 0; 1927 bool errorsFound = false; 1928 status = inode->Tree()->Validate(repairErrors, errorsFound); 1929 if (errorsFound) { 1930 fCheckCookie->control.errors |= BFS_INVALID_BPLUSTREE; 1931 if (inode->IsIndex() && name != NULL && repairErrors) { 1932 // We completely rebuild corrupt indices 1933 check_index* index = new(std::nothrow) check_index; 1934 if (index == NULL) 1935 return B_NO_MEMORY; 1936 1937 strlcpy(index->name, name, sizeof(index->name)); 1938 index->run = inode->BlockRun(); 1939 fCheckCookie->indices.Push(index); 1940 } 1941 } 1942 } 1943 1944 return status; 1945 } 1946 1947 case BFS_CHECK_PASS_INDEX: 1948 return _AddInodeToIndex(inode); 1949 } 1950 1951 return B_OK; 1952 } 1953 1954 1955 status_t 1956 BlockAllocator::_CheckInodeBlocks(Inode* inode, const char* name) 1957 { 1958 status_t status = CheckBlockRun(inode->BlockRun(), "inode"); 1959 if (status != B_OK) 1960 return status; 1961 1962 // If the inode has an attribute directory, push it on the stack 1963 if (!inode->Attributes().IsZero()) 1964 fCheckCookie->stack.Push(inode->Attributes()); 1965 1966 if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) { 1967 // symlinks may not have a valid data stream 1968 if (strlen(inode->Node().short_symlink) >= SHORT_SYMLINK_NAME_LENGTH) 1969 return B_BAD_DATA; 1970 1971 return B_OK; 1972 } 1973 1974 data_stream* data = &inode->Node().data; 1975 1976 // check the direct range 1977 1978 if (data->max_direct_range) { 1979 for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) { 1980 if (data->direct[i].IsZero()) 1981 break; 1982 1983 status = CheckBlockRun(data->direct[i], "direct"); 1984 if (status < B_OK) 1985 return status; 1986 1987 fCheckCookie->control.stats.direct_block_runs++; 1988 fCheckCookie->control.stats.blocks_in_direct 1989 += data->direct[i].Length(); 1990 } 1991 } 1992 1993 CachedBlock cached(fVolume); 1994 1995 // check the indirect range 1996 1997 if (data->max_indirect_range) { 1998 status = CheckBlockRun(data->indirect, "indirect"); 1999 if (status < B_OK) 2000 return status; 2001 2002 off_t block = fVolume->ToBlock(data->indirect); 2003 2004 for (int32 i = 0; i < data->indirect.Length(); i++) { 2005 block_run* runs = (block_run*)cached.SetTo(block + i); 2006 if (runs == NULL) 2007 RETURN_ERROR(B_IO_ERROR); 2008 2009 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 2010 int32 index = 0; 2011 for (; index < runsPerBlock; index++) { 2012 if (runs[index].IsZero()) 2013 break; 2014 2015 status = CheckBlockRun(runs[index], "indirect->run"); 2016 if (status < B_OK) 2017 return status; 2018 2019 fCheckCookie->control.stats.indirect_block_runs++; 2020 fCheckCookie->control.stats.blocks_in_indirect 2021 += runs[index].Length(); 2022 } 2023 fCheckCookie->control.stats.indirect_array_blocks++; 2024 2025 if (index < runsPerBlock) 2026 break; 2027 } 2028 } 2029 2030 // check the double indirect range 2031 2032 if (data->max_double_indirect_range) { 2033 status = CheckBlockRun(data->double_indirect, "double indirect"); 2034 if (status != B_OK) 2035 return status; 2036 2037 int32 runsPerBlock = runs_per_block(fVolume->BlockSize()); 2038 int32 runsPerArray = runsPerBlock * data->double_indirect.Length(); 2039 2040 CachedBlock cachedDirect(fVolume); 2041 2042 for (int32 indirectIndex = 0; indirectIndex < runsPerArray; 2043 indirectIndex++) { 2044 // get the indirect array block 2045 block_run* array = (block_run*)cached.SetTo( 2046 fVolume->ToBlock(data->double_indirect) 2047 + indirectIndex / runsPerBlock); 2048 if (array == NULL) 2049 return B_IO_ERROR; 2050 2051 block_run indirect = array[indirectIndex % runsPerBlock]; 2052 // are we finished yet? 2053 if (indirect.IsZero()) 2054 return B_OK; 2055 2056 status = CheckBlockRun(indirect, "double indirect->runs"); 2057 if (status != B_OK) 2058 return status; 2059 2060 int32 maxIndex 2061 = ((uint32)indirect.Length() << fVolume->BlockShift()) 2062 / sizeof(block_run); 2063 2064 for (int32 index = 0; index < maxIndex; ) { 2065 block_run* runs = (block_run*)cachedDirect.SetTo( 2066 fVolume->ToBlock(indirect) + index / runsPerBlock); 2067 if (runs == NULL) 2068 return B_IO_ERROR; 2069 2070 do { 2071 // are we finished yet? 2072 if (runs[index % runsPerBlock].IsZero()) 2073 return B_OK; 2074 2075 status = CheckBlockRun(runs[index % runsPerBlock], 2076 "double indirect->runs->run"); 2077 if (status != B_OK) 2078 return status; 2079 2080 fCheckCookie->control.stats.double_indirect_block_runs++; 2081 fCheckCookie->control.stats.blocks_in_double_indirect 2082 += runs[index % runsPerBlock].Length(); 2083 } while ((++index % runsPerBlock) != 0); 2084 } 2085 2086 fCheckCookie->control.stats.double_indirect_array_blocks++; 2087 } 2088 } 2089 2090 return B_OK; 2091 } 2092 2093 2094 status_t 2095 BlockAllocator::_PrepareIndices() 2096 { 2097 int32 count = 0; 2098 2099 for (int32 i = 0; i < fCheckCookie->indices.CountItems(); i++) { 2100 check_index* index = fCheckCookie->indices.Array()[i]; 2101 Vnode vnode(fVolume, index->run); 2102 Inode* inode; 2103 status_t status = vnode.Get(&inode); 2104 if (status != B_OK) { 2105 FATAL(("check: Could not open index at %" B_PRIdOFF "\n", 2106 fVolume->ToBlock(index->run))); 2107 return status; 2108 } 2109 2110 BPlusTree* tree = inode->Tree(); 2111 if (tree == NULL) { 2112 // TODO: We can't yet repair those 2113 continue; 2114 } 2115 2116 status = tree->MakeEmpty(); 2117 if (status != B_OK) 2118 return status; 2119 2120 index->inode = inode; 2121 vnode.Keep(); 2122 count++; 2123 } 2124 2125 return count == 0 ? B_ENTRY_NOT_FOUND : B_OK; 2126 } 2127 2128 2129 void 2130 BlockAllocator::_FreeIndices() 2131 { 2132 for (int32 i = 0; i < fCheckCookie->indices.CountItems(); i++) { 2133 check_index* index = fCheckCookie->indices.Array()[i]; 2134 if (index->inode != NULL) { 2135 put_vnode(fVolume->FSVolume(), 2136 fVolume->ToVnode(index->inode->BlockRun())); 2137 } 2138 } 2139 fCheckCookie->indices.MakeEmpty(); 2140 } 2141 2142 2143 status_t 2144 BlockAllocator::_AddInodeToIndex(Inode* inode) 2145 { 2146 Transaction transaction(fVolume, inode->BlockNumber()); 2147 2148 for (int32 i = 0; i < fCheckCookie->indices.CountItems(); i++) { 2149 check_index* index = fCheckCookie->indices.Array()[i]; 2150 if (index->inode == NULL) 2151 continue; 2152 2153 index->inode->WriteLockInTransaction(transaction); 2154 2155 BPlusTree* tree = index->inode->Tree(); 2156 if (tree == NULL) 2157 return B_ERROR; 2158 2159 status_t status = B_OK; 2160 2161 if (!strcmp(index->name, "name")) { 2162 if (inode->InNameIndex()) { 2163 char name[B_FILE_NAME_LENGTH]; 2164 if (inode->GetName(name, B_FILE_NAME_LENGTH) != B_OK) 2165 return B_ERROR; 2166 2167 status = tree->Insert(transaction, name, inode->ID()); 2168 } 2169 } else if (!strcmp(index->name, "last_modified")) { 2170 if (inode->InLastModifiedIndex()) { 2171 status = tree->Insert(transaction, inode->OldLastModified(), 2172 inode->ID()); 2173 } 2174 } else if (!strcmp(index->name, "size")) { 2175 if (inode->InSizeIndex()) 2176 status = tree->Insert(transaction, inode->Size(), inode->ID()); 2177 } else { 2178 uint8 key[BPLUSTREE_MAX_KEY_LENGTH]; 2179 size_t keyLength = BPLUSTREE_MAX_KEY_LENGTH; 2180 if (inode->ReadAttribute(index->name, B_ANY_TYPE, 0, key, 2181 &keyLength) == B_OK) { 2182 status = tree->Insert(transaction, key, keyLength, inode->ID()); 2183 } 2184 } 2185 2186 if (status != B_OK) 2187 return status; 2188 } 2189 2190 return transaction.Done(); 2191 } 2192 2193 2194 status_t 2195 BlockAllocator::_AddTrim(fs_trim_data& trimData, uint32 maxRanges, 2196 uint64 offset, uint64 size) 2197 { 2198 if (trimData.range_count < maxRanges && size > 0) { 2199 trimData.ranges[trimData.range_count].offset = offset; 2200 trimData.ranges[trimData.range_count].size = size; 2201 trimData.range_count++; 2202 return true; 2203 } 2204 2205 return false; 2206 } 2207 2208 2209 status_t 2210 BlockAllocator::_TrimNext(fs_trim_data& trimData, uint32 maxRanges, 2211 uint64 offset, uint64 size, bool force, uint64& trimmedSize) 2212 { 2213 PRINT(("_TrimNext(index %" B_PRIu32 ", offset %" B_PRIu64 ", size %" 2214 B_PRIu64 ")\n", trimData.range_count, offset, size)); 2215 2216 bool pushed = _AddTrim(trimData, maxRanges, offset, size); 2217 2218 if (!pushed || force) { 2219 // Trim now 2220 trimData.trimmed_size = 0; 2221 dprintf("TRIM FS:\n"); 2222 for (uint32 i = 0; i < trimData.range_count; i++) { 2223 dprintf("[%3" B_PRIu32 "] %" B_PRIu64 " : %" B_PRIu64 "\n", i, 2224 trimData.ranges[i].offset, trimData.ranges[i].size); 2225 } 2226 if (ioctl(fVolume->Device(), B_TRIM_DEVICE, &trimData, 2227 sizeof(fs_trim_data)) != 0) { 2228 return errno; 2229 } 2230 2231 trimmedSize += trimData.trimmed_size; 2232 trimData.range_count = 0; 2233 } 2234 2235 if (!pushed) 2236 _AddTrim(trimData, maxRanges, offset, size); 2237 2238 return B_OK; 2239 } 2240 2241 2242 // #pragma mark - debugger commands 2243 2244 2245 #ifdef BFS_DEBUGGER_COMMANDS 2246 2247 void 2248 BlockAllocator::Dump(int32 index) 2249 { 2250 kprintf("allocation groups: %" B_PRId32 " (base %p)\n", fNumGroups, fGroups); 2251 kprintf("blocks per group: %" B_PRId32 "\n", fBlocksPerGroup); 2252 2253 for (int32 i = 0; i < fNumGroups; i++) { 2254 if (index != -1 && i != index) 2255 continue; 2256 2257 AllocationGroup& group = fGroups[i]; 2258 2259 kprintf("[%3" B_PRId32 "] num bits: %" B_PRIu32 " (%p)\n", i, 2260 group.NumBits(), &group); 2261 kprintf(" num blocks: %" B_PRIu32 "\n", group.NumBlocks()); 2262 kprintf(" start: %" B_PRId32 "\n", group.Start()); 2263 kprintf(" first free: %" B_PRId32 "\n", group.fFirstFree); 2264 kprintf(" largest start: %" B_PRId32 "%s\n", group.fLargestStart, 2265 group.fLargestValid ? "" : " (invalid)"); 2266 kprintf(" largest length: %" B_PRId32 "\n", group.fLargestLength); 2267 kprintf(" free bits: %" B_PRId32 "\n", group.fFreeBits); 2268 } 2269 } 2270 2271 2272 #if BFS_TRACING 2273 static char kTraceBuffer[256]; 2274 2275 2276 int 2277 dump_block_allocator_blocks(int argc, char** argv) 2278 { 2279 if (argc != 3 || !strcmp(argv[1], "--help")) { 2280 kprintf("usage: %s <ptr-to-volume> <block>\n", argv[0]); 2281 return 0; 2282 } 2283 2284 Volume* volume = (Volume*)parse_expression(argv[1]); 2285 off_t block = parse_expression(argv[2]); 2286 2287 // iterate over all tracing entries to find overlapping actions 2288 2289 using namespace BFSBlockTracing; 2290 2291 LazyTraceOutput out(kTraceBuffer, sizeof(kTraceBuffer), 0); 2292 TraceEntryIterator iterator; 2293 while (TraceEntry* _entry = iterator.Next()) { 2294 if (Allocate* entry = dynamic_cast<Allocate*>(_entry)) { 2295 off_t first = volume->ToBlock(entry->Run()); 2296 off_t last = first - 1 + entry->Run().Length(); 2297 if (block >= first && block <= last) { 2298 out.Clear(); 2299 const char* dump = out.DumpEntry(entry); 2300 kprintf("%5ld. %s\n", iterator.Index(), dump); 2301 } 2302 } else if (Free* entry = dynamic_cast<Free*>(_entry)) { 2303 off_t first = volume->ToBlock(entry->Run()); 2304 off_t last = first - 1 + entry->Run().Length(); 2305 if (block >= first && block <= last) { 2306 out.Clear(); 2307 const char* dump = out.DumpEntry(entry); 2308 kprintf("%5ld. %s\n", iterator.Index(), dump); 2309 } 2310 } 2311 } 2312 2313 return 0; 2314 } 2315 #endif 2316 2317 2318 int 2319 dump_block_allocator(int argc, char** argv) 2320 { 2321 int32 group = -1; 2322 if (argc == 3) { 2323 group = parse_expression(argv[2]); 2324 argc--; 2325 } 2326 2327 if (argc != 2 || !strcmp(argv[1], "--help")) { 2328 kprintf("usage: %s <ptr-to-volume> [group]\n", argv[0]); 2329 return 0; 2330 } 2331 2332 Volume* volume = (Volume*)parse_expression(argv[1]); 2333 BlockAllocator& allocator = volume->Allocator(); 2334 2335 allocator.Dump(group); 2336 return 0; 2337 } 2338 2339 #endif // BFS_DEBUGGER_COMMANDS 2340 2341