1 /* 2 * Copyright 2001-2013, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 6 7 //! Block bitmap handling and allocation policies 8 9 10 #include "BlockAllocator.h" 11 12 #include "bfs_control.h" 13 #include "BPlusTree.h" 14 #include "Debug.h" 15 #include "Inode.h" 16 #include "Volume.h" 17 18 19 // Things the BlockAllocator should do: 20 21 // - find a range of blocks of a certain size nearby a specific position 22 // - allocating an unsharp range of blocks for pre-allocation 23 // - free blocks 24 // - know how to deal with each allocation, special handling for directories, 25 // files, symlinks, etc. (type sensitive allocation policies) 26 27 // What makes the code complicated is the fact that we are not just reading 28 // in the whole bitmap and operate on that in memory - e.g. a 13 GB partition 29 // with a block size of 2048 bytes already has a 800kB bitmap, and the size 30 // of partitions will grow even more - so that's not an option. 31 // Instead we are reading in every block when it's used - since an allocation 32 // group can span several blocks in the block bitmap, the AllocationBlock 33 // class is there to make handling those easier. 34 35 // The current implementation is only slightly optimized and could probably 36 // be improved a lot. Furthermore, the allocation policies used here should 37 // have some real world tests. 38 39 #if BFS_TRACING && !defined(BFS_SHELL) 40 namespace BFSBlockTracing { 41 42 43 class Allocate : public AbstractTraceEntry { 44 public: 45 Allocate(block_run run) 46 : 47 fRun(run) 48 { 49 Initialized(); 50 } 51 52 virtual void AddDump(TraceOutput& out) 53 { 54 out.Print("bfs:alloc %lu.%u.%u", fRun.AllocationGroup(), 55 fRun.Start(), fRun.Length()); 56 } 57 58 const block_run& Run() const { return fRun; } 59 60 private: 61 block_run fRun; 62 }; 63 64 65 class Free : public AbstractTraceEntry { 66 public: 67 Free(block_run run) 68 : 69 fRun(run) 70 { 71 Initialized(); 72 } 73 74 virtual void AddDump(TraceOutput& out) 75 { 76 out.Print("bfs:free %lu.%u.%u", fRun.AllocationGroup(), 77 fRun.Start(), fRun.Length()); 78 } 79 80 const block_run& Run() const { return fRun; } 81 82 private: 83 block_run fRun; 84 }; 85 86 87 static uint32 88 checksum(const uint8* data, size_t size) 89 { 90 const uint32* data4 = (const uint32*)data; 91 uint32 sum = 0; 92 while (size >= 4) { 93 sum += *data4; 94 data4++; 95 size -= 4; 96 } 97 return sum; 98 } 99 100 101 class Block : public AbstractTraceEntry { 102 public: 103 Block(const char* label, off_t blockNumber, const uint8* data, 104 size_t size, uint32 start = 0, uint32 length = 0) 105 : 106 fBlock(blockNumber), 107 fData(data), 108 fStart(start), 109 fLength(length), 110 fLabel(label) 111 { 112 fSum = checksum(data, size); 113 Initialized(); 114 } 115 116 virtual void AddDump(TraceOutput& out) 117 { 118 out.Print("bfs:%s: block %Ld (%p), sum %lu, s/l %lu/%lu", fLabel, 119 fBlock, fData, fSum, fStart, fLength); 120 } 121 122 private: 123 off_t fBlock; 124 const uint8* fData; 125 uint32 fStart; 126 uint32 fLength; 127 uint32 fSum; 128 const char* fLabel; 129 }; 130 131 132 class BlockChange : public AbstractTraceEntry { 133 public: 134 BlockChange(const char* label, int32 block, uint32 oldData, uint32 newData) 135 : 136 fBlock(block), 137 fOldData(oldData), 138 fNewData(newData), 139 fLabel(label) 140 { 141 Initialized(); 142 } 143 144 virtual void AddDump(TraceOutput& out) 145 { 146 out.Print("bfs:%s: block %ld, %08lx -> %08lx", fLabel, 147 fBlock, fOldData, fNewData); 148 } 149 150 private: 151 int32 fBlock; 152 uint32 fOldData; 153 uint32 fNewData; 154 const char* fLabel; 155 }; 156 157 158 } // namespace BFSBlockTracing 159 160 # define T(x) new(std::nothrow) BFSBlockTracing::x; 161 #else 162 # define T(x) ; 163 #endif 164 165 #ifdef DEBUG_ALLOCATION_GROUPS 166 # define CHECK_ALLOCATION_GROUP(group) _CheckGroup(group) 167 #else 168 # define CHECK_ALLOCATION_GROUP(group) ; 169 #endif 170 171 172 struct check_index { 173 check_index() 174 : 175 inode(NULL) 176 { 177 } 178 179 char name[B_FILE_NAME_LENGTH]; 180 block_run run; 181 Inode* inode; 182 }; 183 184 185 struct check_cookie { 186 check_cookie() 187 { 188 } 189 190 uint32 pass; 191 block_run current; 192 Inode* parent; 193 mode_t parent_mode; 194 Stack<block_run> stack; 195 TreeIterator* iterator; 196 check_control control; 197 Stack<check_index*> indices; 198 }; 199 200 201 class AllocationBlock : public CachedBlock { 202 public: 203 AllocationBlock(Volume* volume); 204 205 inline void Allocate(uint16 start, uint16 numBlocks); 206 inline void Free(uint16 start, uint16 numBlocks); 207 inline bool IsUsed(uint16 block); 208 209 status_t SetTo(AllocationGroup& group, uint16 block); 210 status_t SetToWritable(Transaction& transaction, AllocationGroup& group, 211 uint16 block); 212 213 uint32 NumBlockBits() const { return fNumBits; } 214 uint32& Block(int32 index) { return ((uint32*)fBlock)[index]; } 215 uint8* Block() const { return (uint8*)fBlock; } 216 217 private: 218 uint32 fNumBits; 219 #ifdef DEBUG 220 bool fWritable; 221 #endif 222 }; 223 224 225 class AllocationGroup { 226 public: 227 AllocationGroup(); 228 229 void AddFreeRange(int32 start, int32 blocks); 230 bool IsFull() const { return fFreeBits == 0; } 231 232 status_t Allocate(Transaction& transaction, uint16 start, int32 length); 233 status_t Free(Transaction& transaction, uint16 start, int32 length); 234 235 uint32 NumBits() const { return fNumBits; } 236 uint32 NumBlocks() const { return fNumBlocks; } 237 int32 Start() const { return fStart; } 238 239 private: 240 friend class BlockAllocator; 241 242 uint32 fNumBits; 243 uint32 fNumBlocks; 244 int32 fStart; 245 int32 fFirstFree; 246 int32 fFreeBits; 247 248 int32 fLargestStart; 249 int32 fLargestLength; 250 bool fLargestValid; 251 }; 252 253 254 AllocationBlock::AllocationBlock(Volume* volume) 255 : CachedBlock(volume) 256 { 257 } 258 259 260 status_t 261 AllocationBlock::SetTo(AllocationGroup& group, uint16 block) 262 { 263 // 8 blocks per byte 264 fNumBits = fVolume->BlockSize() << 3; 265 // the last group may have less bits than the others 266 if ((block + 1) * fNumBits > group.NumBits()) 267 fNumBits = group.NumBits() % fNumBits; 268 269 #ifdef DEBUG 270 fWritable = false; 271 #endif 272 return CachedBlock::SetTo(group.Start() + block) != NULL ? B_OK : B_ERROR; 273 } 274 275 276 status_t 277 AllocationBlock::SetToWritable(Transaction& transaction, AllocationGroup& group, 278 uint16 block) 279 { 280 // 8 blocks per byte 281 fNumBits = fVolume->BlockSize() << 3; 282 // the last group may have less bits in the last block 283 if ((block + 1) * fNumBits > group.NumBits()) 284 fNumBits = group.NumBits() % fNumBits; 285 286 #ifdef DEBUG 287 fWritable = true; 288 #endif 289 return CachedBlock::SetToWritable(transaction, group.Start() + block) 290 != NULL ? B_OK : B_ERROR; 291 } 292 293 294 bool 295 AllocationBlock::IsUsed(uint16 block) 296 { 297 if (block > fNumBits) 298 return true; 299 300 // the block bitmap is accessed in 32-bit blocks 301 return Block(block >> 5) & HOST_ENDIAN_TO_BFS_INT32(1UL << (block % 32)); 302 } 303 304 305 void 306 AllocationBlock::Allocate(uint16 start, uint16 numBlocks) 307 { 308 ASSERT(start < fNumBits); 309 ASSERT(uint32(start + numBlocks) <= fNumBits); 310 #ifdef DEBUG 311 ASSERT(fWritable); 312 #endif 313 314 T(Block("b-alloc-in", fBlockNumber, fBlock, fVolume->BlockSize(), 315 start, numBlocks)); 316 317 int32 block = start >> 5; 318 319 while (numBlocks > 0) { 320 uint32 mask = 0; 321 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 322 mask |= 1UL << i; 323 324 T(BlockChange("b-alloc", block, Block(block), 325 Block(block) | HOST_ENDIAN_TO_BFS_INT32(mask))); 326 327 #if KDEBUG 328 // check for already set blocks 329 if (HOST_ENDIAN_TO_BFS_INT32(mask) & ((uint32*)fBlock)[block]) { 330 FATAL(("AllocationBlock::Allocate(): some blocks are already " 331 "allocated, start = %u, numBlocks = %u\n", start, numBlocks)); 332 panic("blocks already set!"); 333 } 334 #endif 335 336 Block(block++) |= HOST_ENDIAN_TO_BFS_INT32(mask); 337 start = 0; 338 } 339 T(Block("b-alloc-out", fBlockNumber, fBlock, fVolume->BlockSize(), 340 start, numBlocks)); 341 } 342 343 344 void 345 AllocationBlock::Free(uint16 start, uint16 numBlocks) 346 { 347 ASSERT(start < fNumBits); 348 ASSERT(uint32(start + numBlocks) <= fNumBits); 349 #ifdef DEBUG 350 ASSERT(fWritable); 351 #endif 352 353 int32 block = start >> 5; 354 355 while (numBlocks > 0) { 356 uint32 mask = 0; 357 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 358 mask |= 1UL << (i % 32); 359 360 T(BlockChange("b-free", block, Block(block), 361 Block(block) & HOST_ENDIAN_TO_BFS_INT32(~mask))); 362 363 Block(block++) &= HOST_ENDIAN_TO_BFS_INT32(~mask); 364 start = 0; 365 } 366 } 367 368 369 // #pragma mark - 370 371 372 /*! The allocation groups are created and initialized in 373 BlockAllocator::Initialize() and BlockAllocator::InitializeAndClearBitmap() 374 respectively. 375 */ 376 AllocationGroup::AllocationGroup() 377 : 378 fFirstFree(-1), 379 fFreeBits(0), 380 fLargestValid(false) 381 { 382 } 383 384 385 void 386 AllocationGroup::AddFreeRange(int32 start, int32 blocks) 387 { 388 //D(if (blocks > 512) 389 // PRINT(("range of %ld blocks starting at %ld\n",blocks,start))); 390 391 if (fFirstFree == -1) 392 fFirstFree = start; 393 394 if (!fLargestValid || fLargestLength < blocks) { 395 fLargestStart = start; 396 fLargestLength = blocks; 397 fLargestValid = true; 398 } 399 400 fFreeBits += blocks; 401 } 402 403 404 /*! Allocates the specified run in the allocation group. 405 Doesn't check if the run is valid or already allocated partially, nor 406 does it maintain the free ranges hints or the volume's used blocks count. 407 It only does the low-level work of allocating some bits in the block bitmap. 408 Assumes that the block bitmap lock is hold. 409 */ 410 status_t 411 AllocationGroup::Allocate(Transaction& transaction, uint16 start, int32 length) 412 { 413 ASSERT(start + length <= (int32)fNumBits); 414 415 // Update the allocation group info 416 // TODO: this info will be incorrect if something goes wrong later 417 // Note, the fFirstFree block doesn't have to be really free 418 if (start == fFirstFree) 419 fFirstFree = start + length; 420 fFreeBits -= length; 421 422 if (fLargestValid) { 423 bool cut = false; 424 if (fLargestStart == start) { 425 // cut from start 426 fLargestStart += length; 427 fLargestLength -= length; 428 cut = true; 429 } else if (start > fLargestStart 430 && start < fLargestStart + fLargestLength) { 431 // cut from end 432 fLargestLength = start - fLargestStart; 433 cut = true; 434 } 435 if (cut && (fLargestLength < fLargestStart 436 || fLargestLength 437 < (int32)fNumBits - (fLargestStart + fLargestLength))) { 438 // might not be the largest block anymore 439 fLargestValid = false; 440 } 441 } 442 443 Volume* volume = transaction.GetVolume(); 444 445 // calculate block in the block bitmap and position within 446 uint32 bitsPerBlock = volume->BlockSize() << 3; 447 uint32 block = start / bitsPerBlock; 448 start = start % bitsPerBlock; 449 450 AllocationBlock cached(volume); 451 452 while (length > 0) { 453 if (cached.SetToWritable(transaction, *this, block) < B_OK) { 454 fLargestValid = false; 455 RETURN_ERROR(B_IO_ERROR); 456 } 457 458 uint32 numBlocks = length; 459 if (start + numBlocks > cached.NumBlockBits()) 460 numBlocks = cached.NumBlockBits() - start; 461 462 cached.Allocate(start, numBlocks); 463 464 length -= numBlocks; 465 start = 0; 466 block++; 467 } 468 469 return B_OK; 470 } 471 472 473 /*! Frees the specified run in the allocation group. 474 Doesn't check if the run is valid or was not completely allocated, nor 475 does it maintain the free ranges hints or the volume's used blocks count. 476 It only does the low-level work of freeing some bits in the block bitmap. 477 Assumes that the block bitmap lock is hold. 478 */ 479 status_t 480 AllocationGroup::Free(Transaction& transaction, uint16 start, int32 length) 481 { 482 ASSERT(start + length <= (int32)fNumBits); 483 484 // Update the allocation group info 485 // TODO: this info will be incorrect if something goes wrong later 486 if (fFirstFree > start) 487 fFirstFree = start; 488 fFreeBits += length; 489 490 // The range to be freed cannot be part of the valid largest range 491 ASSERT(!fLargestValid || start + length <= fLargestStart 492 || start > fLargestStart); 493 494 if (fLargestValid 495 && (start + length == fLargestStart 496 || fLargestStart + fLargestLength == start 497 || (start < fLargestStart && fLargestStart > fLargestLength) 498 || (start > fLargestStart 499 && (int32)fNumBits - (fLargestStart + fLargestLength) 500 > fLargestLength))) { 501 fLargestValid = false; 502 } 503 504 Volume* volume = transaction.GetVolume(); 505 506 // calculate block in the block bitmap and position within 507 uint32 bitsPerBlock = volume->BlockSize() << 3; 508 uint32 block = start / bitsPerBlock; 509 start = start % bitsPerBlock; 510 511 AllocationBlock cached(volume); 512 513 while (length > 0) { 514 if (cached.SetToWritable(transaction, *this, block) < B_OK) 515 RETURN_ERROR(B_IO_ERROR); 516 517 T(Block("free-1", block, cached.Block(), volume->BlockSize())); 518 uint16 freeLength = length; 519 if (uint32(start + length) > cached.NumBlockBits()) 520 freeLength = cached.NumBlockBits() - start; 521 522 cached.Free(start, freeLength); 523 524 length -= freeLength; 525 start = 0; 526 T(Block("free-2", block, cached.Block(), volume->BlockSize())); 527 block++; 528 } 529 return B_OK; 530 } 531 532 533 // #pragma mark - 534 535 536 BlockAllocator::BlockAllocator(Volume* volume) 537 : 538 fVolume(volume), 539 fGroups(NULL), 540 fCheckBitmap(NULL), 541 fCheckCookie(NULL) 542 { 543 recursive_lock_init(&fLock, "bfs allocator"); 544 } 545 546 547 BlockAllocator::~BlockAllocator() 548 { 549 recursive_lock_destroy(&fLock); 550 delete[] fGroups; 551 } 552 553 554 status_t 555 BlockAllocator::Initialize(bool full) 556 { 557 fNumGroups = fVolume->AllocationGroups(); 558 fBlocksPerGroup = fVolume->SuperBlock().BlocksPerAllocationGroup(); 559 fNumBlocks = (fVolume->NumBlocks() + fVolume->BlockSize() * 8 - 1) 560 / (fVolume->BlockSize() * 8); 561 562 fGroups = new(std::nothrow) AllocationGroup[fNumGroups]; 563 if (fGroups == NULL) 564 return B_NO_MEMORY; 565 566 if (!full) 567 return B_OK; 568 569 recursive_lock_lock(&fLock); 570 // the lock will be released by the _Initialize() method 571 572 thread_id id = spawn_kernel_thread((thread_func)BlockAllocator::_Initialize, 573 "bfs block allocator", B_LOW_PRIORITY, this); 574 if (id < B_OK) 575 return _Initialize(this); 576 577 recursive_lock_transfer_lock(&fLock, id); 578 579 return resume_thread(id); 580 } 581 582 583 status_t 584 BlockAllocator::InitializeAndClearBitmap(Transaction& transaction) 585 { 586 status_t status = Initialize(false); 587 if (status != B_OK) 588 return status; 589 590 uint32 numBits = 8 * fBlocksPerGroup * fVolume->BlockSize(); 591 uint32 blockShift = fVolume->BlockShift(); 592 593 uint32* buffer = (uint32*)malloc(numBits >> 3); 594 if (buffer == NULL) 595 RETURN_ERROR(B_NO_MEMORY); 596 597 memset(buffer, 0, numBits >> 3); 598 599 off_t offset = 1; 600 // the bitmap starts directly after the superblock 601 602 // initialize the AllocationGroup objects and clear the on-disk bitmap 603 604 for (int32 i = 0; i < fNumGroups; i++) { 605 if (write_pos(fVolume->Device(), offset << blockShift, buffer, 606 fBlocksPerGroup << blockShift) < B_OK) { 607 free(buffer); 608 return B_ERROR; 609 } 610 611 // the last allocation group may contain less blocks than the others 612 if (i == fNumGroups - 1) { 613 fGroups[i].fNumBits = fVolume->NumBlocks() - i * numBits; 614 fGroups[i].fNumBlocks = 1 + ((fGroups[i].NumBits() - 1) 615 >> (blockShift + 3)); 616 } else { 617 fGroups[i].fNumBits = numBits; 618 fGroups[i].fNumBlocks = fBlocksPerGroup; 619 } 620 fGroups[i].fStart = offset; 621 fGroups[i].fFirstFree = fGroups[i].fLargestStart = 0; 622 fGroups[i].fFreeBits = fGroups[i].fLargestLength = fGroups[i].fNumBits; 623 fGroups[i].fLargestValid = true; 624 625 offset += fBlocksPerGroup; 626 } 627 free(buffer); 628 629 // reserve the boot block, the log area, and the block bitmap itself 630 uint32 reservedBlocks = fVolume->Log().Start() + fVolume->Log().Length(); 631 632 if (fGroups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) { 633 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 634 return B_ERROR; 635 } 636 fVolume->SuperBlock().used_blocks 637 = HOST_ENDIAN_TO_BFS_INT64(reservedBlocks); 638 639 return B_OK; 640 } 641 642 643 status_t 644 BlockAllocator::_Initialize(BlockAllocator* allocator) 645 { 646 // The lock must already be held at this point 647 RecursiveLocker locker(allocator->fLock, true); 648 649 Volume* volume = allocator->fVolume; 650 uint32 blocks = allocator->fBlocksPerGroup; 651 uint32 blockShift = volume->BlockShift(); 652 off_t freeBlocks = 0; 653 654 uint32* buffer = (uint32*)malloc(blocks << blockShift); 655 if (buffer == NULL) 656 RETURN_ERROR(B_NO_MEMORY); 657 658 AllocationGroup* groups = allocator->fGroups; 659 off_t offset = 1; 660 uint32 bitsPerGroup = 8 * (blocks << blockShift); 661 int32 numGroups = allocator->fNumGroups; 662 663 for (int32 i = 0; i < numGroups; i++) { 664 if (read_pos(volume->Device(), offset << blockShift, buffer, 665 blocks << blockShift) < B_OK) 666 break; 667 668 // the last allocation group may contain less blocks than the others 669 if (i == numGroups - 1) { 670 groups[i].fNumBits = volume->NumBlocks() - i * bitsPerGroup; 671 groups[i].fNumBlocks = 1 + ((groups[i].NumBits() - 1) 672 >> (blockShift + 3)); 673 } else { 674 groups[i].fNumBits = bitsPerGroup; 675 groups[i].fNumBlocks = blocks; 676 } 677 groups[i].fStart = offset; 678 679 // finds all free ranges in this allocation group 680 int32 start = -1, range = 0; 681 int32 numBits = groups[i].fNumBits, bit = 0; 682 int32 count = (numBits + 31) / 32; 683 684 for (int32 k = 0; k < count; k++) { 685 for (int32 j = 0; j < 32 && bit < numBits; j++, bit++) { 686 if (buffer[k] & (1UL << j)) { 687 // block is in use 688 if (range > 0) { 689 groups[i].AddFreeRange(start, range); 690 range = 0; 691 } 692 } else if (range++ == 0) { 693 // block is free, start new free range 694 start = bit; 695 } 696 } 697 } 698 if (range) 699 groups[i].AddFreeRange(start, range); 700 701 freeBlocks += groups[i].fFreeBits; 702 703 offset += blocks; 704 } 705 free(buffer); 706 707 // check if block bitmap and log area are reserved 708 uint32 reservedBlocks = volume->Log().Start() + volume->Log().Length(); 709 710 if (allocator->CheckBlocks(0, reservedBlocks) != B_OK) { 711 if (volume->IsReadOnly()) { 712 FATAL(("Space for block bitmap or log area is not reserved " 713 "(volume is mounted read-only)!\n")); 714 } else { 715 Transaction transaction(volume, 0); 716 if (groups[0].Allocate(transaction, 0, reservedBlocks) != B_OK) { 717 FATAL(("Could not allocate reserved space for block " 718 "bitmap/log!\n")); 719 volume->Panic(); 720 } else { 721 transaction.Done(); 722 FATAL(("Space for block bitmap or log area was not " 723 "reserved!\n")); 724 } 725 } 726 } 727 728 off_t usedBlocks = volume->NumBlocks() - freeBlocks; 729 if (volume->UsedBlocks() != usedBlocks) { 730 // If the disk in a dirty state at mount time, it's 731 // normal that the values don't match 732 INFORM(("volume reports %" B_PRIdOFF " used blocks, correct is %" 733 B_PRIdOFF "\n", volume->UsedBlocks(), usedBlocks)); 734 volume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 735 } 736 737 return B_OK; 738 } 739 740 741 void 742 BlockAllocator::Uninitialize() 743 { 744 // We only have to make sure that the initializer thread isn't running 745 // anymore. 746 recursive_lock_lock(&fLock); 747 } 748 749 750 /*! Tries to allocate between \a minimum, and \a maximum blocks starting 751 at group \a groupIndex with offset \a start. The resulting allocation 752 is put into \a run. 753 754 The number of allocated blocks is always a multiple of \a minimum which 755 has to be a power of two value. 756 */ 757 status_t 758 BlockAllocator::AllocateBlocks(Transaction& transaction, int32 groupIndex, 759 uint16 start, uint16 maximum, uint16 minimum, block_run& run) 760 { 761 if (maximum == 0) 762 return B_BAD_VALUE; 763 764 FUNCTION_START(("group = %ld, start = %u, maximum = %u, minimum = %u\n", 765 groupIndex, start, maximum, minimum)); 766 767 AllocationBlock cached(fVolume); 768 RecursiveLocker lock(fLock); 769 770 uint32 bitsPerFullBlock = fVolume->BlockSize() << 3; 771 772 // Find the block_run that can fulfill the request best 773 int32 bestGroup = -1; 774 int32 bestStart = -1; 775 int32 bestLength = -1; 776 777 for (int32 i = 0; i < fNumGroups + 1; i++, groupIndex++, start = 0) { 778 groupIndex = groupIndex % fNumGroups; 779 AllocationGroup& group = fGroups[groupIndex]; 780 781 CHECK_ALLOCATION_GROUP(groupIndex); 782 783 if (start >= group.NumBits() || group.IsFull()) 784 continue; 785 786 // The wanted maximum is smaller than the largest free block in the 787 // group or already smaller than the minimum 788 789 if (start < group.fFirstFree) 790 start = group.fFirstFree; 791 792 if (group.fLargestValid) { 793 if (group.fLargestLength < bestLength) 794 continue; 795 796 if (group.fLargestStart >= start) { 797 if (group.fLargestLength >= bestLength) { 798 bestGroup = groupIndex; 799 bestStart = group.fLargestStart; 800 bestLength = group.fLargestLength; 801 802 if (bestLength >= maximum) 803 break; 804 } 805 806 // We know everything about this group we have to, let's skip 807 // to the next 808 continue; 809 } 810 } 811 812 // There may be more than one block per allocation group - and 813 // we iterate through it to find a place for the allocation. 814 // (one allocation can't exceed one allocation group) 815 816 uint32 block = start / (fVolume->BlockSize() << 3); 817 int32 currentStart = 0, currentLength = 0; 818 int32 groupLargestStart = -1; 819 int32 groupLargestLength = -1; 820 int32 currentBit = start; 821 bool canFindGroupLargest = start == 0; 822 823 for (; block < group.NumBlocks(); block++) { 824 if (cached.SetTo(group, block) < B_OK) 825 RETURN_ERROR(B_ERROR); 826 827 T(Block("alloc-in", group.Start() + block, cached.Block(), 828 fVolume->BlockSize(), groupIndex, currentStart)); 829 830 // find a block large enough to hold the allocation 831 for (uint32 bit = start % bitsPerFullBlock; 832 bit < cached.NumBlockBits(); bit++) { 833 if (!cached.IsUsed(bit)) { 834 if (currentLength == 0) { 835 // start new range 836 currentStart = currentBit; 837 } 838 839 // have we found a range large enough to hold numBlocks? 840 if (++currentLength >= maximum) { 841 bestGroup = groupIndex; 842 bestStart = currentStart; 843 bestLength = currentLength; 844 break; 845 } 846 } else { 847 if (currentLength) { 848 // end of a range 849 if (currentLength > bestLength) { 850 bestGroup = groupIndex; 851 bestStart = currentStart; 852 bestLength = currentLength; 853 } 854 if (currentLength > groupLargestLength) { 855 groupLargestStart = currentStart; 856 groupLargestLength = currentLength; 857 } 858 currentLength = 0; 859 } 860 if ((int32)group.NumBits() - currentBit 861 <= groupLargestLength) { 862 // We can't find a bigger block in this group anymore, 863 // let's skip the rest. 864 block = group.NumBlocks(); 865 break; 866 } 867 } 868 currentBit++; 869 } 870 871 T(Block("alloc-out", block, cached.Block(), 872 fVolume->BlockSize(), groupIndex, currentStart)); 873 874 if (bestLength >= maximum) { 875 canFindGroupLargest = false; 876 break; 877 } 878 879 // start from the beginning of the next block 880 start = 0; 881 } 882 883 if (currentBit == (int32)group.NumBits()) { 884 if (currentLength > bestLength) { 885 bestGroup = groupIndex; 886 bestStart = currentStart; 887 bestLength = currentLength; 888 } 889 if (canFindGroupLargest && currentLength > groupLargestLength) { 890 groupLargestStart = currentStart; 891 groupLargestLength = currentLength; 892 } 893 } 894 895 if (canFindGroupLargest && !group.fLargestValid 896 && groupLargestLength >= 0) { 897 group.fLargestStart = groupLargestStart; 898 group.fLargestLength = groupLargestLength; 899 group.fLargestValid = true; 900 } 901 902 if (bestLength >= maximum) 903 break; 904 } 905 906 // If we found a suitable range, mark the blocks as in use, and 907 // write the updated block bitmap back to disk 908 if (bestLength < minimum) 909 return B_DEVICE_FULL; 910 911 if (bestLength > maximum) 912 bestLength = maximum; 913 else if (minimum > 1) { 914 // make sure bestLength is a multiple of minimum 915 bestLength = round_down(bestLength, minimum); 916 } 917 918 if (fGroups[bestGroup].Allocate(transaction, bestStart, bestLength) != B_OK) 919 RETURN_ERROR(B_IO_ERROR); 920 921 CHECK_ALLOCATION_GROUP(bestGroup); 922 923 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(bestGroup); 924 run.start = HOST_ENDIAN_TO_BFS_INT16(bestStart); 925 run.length = HOST_ENDIAN_TO_BFS_INT16(bestLength); 926 927 fVolume->SuperBlock().used_blocks 928 = HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() + bestLength); 929 // We are not writing back the disk's superblock - it's 930 // either done by the journaling code, or when the disk 931 // is unmounted. 932 // If the value is not correct at mount time, it will be 933 // fixed anyway. 934 935 // We need to flush any remaining blocks in the new allocation to make sure 936 // they won't interfere with the file cache. 937 block_cache_discard(fVolume->BlockCache(), fVolume->ToBlock(run), 938 run.Length()); 939 940 T(Allocate(run)); 941 return B_OK; 942 } 943 944 945 status_t 946 BlockAllocator::AllocateForInode(Transaction& transaction, 947 const block_run* parent, mode_t type, block_run& run) 948 { 949 // Apply some allocation policies here (AllocateBlocks() will break them 950 // if necessary) - we will start with those described in Dominic Giampaolo's 951 // "Practical File System Design", and see how good they work 952 953 // Files are going in the same allocation group as its parent, 954 // sub-directories will be inserted 8 allocation groups after 955 // the one of the parent 956 uint16 group = parent->AllocationGroup(); 957 if ((type & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY) 958 group += 8; 959 960 return AllocateBlocks(transaction, group, 0, 1, 1, run); 961 } 962 963 964 status_t 965 BlockAllocator::Allocate(Transaction& transaction, Inode* inode, 966 off_t numBlocks, block_run& run, uint16 minimum) 967 { 968 if (numBlocks <= 0) 969 return B_ERROR; 970 971 // one block_run can't hold more data than there is in one allocation group 972 if (numBlocks > fGroups[0].NumBits()) 973 numBlocks = fGroups[0].NumBits(); 974 975 // since block_run.length is uint16, the largest number of blocks that 976 // can be covered by a block_run is 65535 977 // TODO: if we drop compatibility, couldn't we do this any better? 978 // There are basically two possibilities: 979 // a) since a length of zero doesn't have any sense, take that for 65536 - 980 // but that could cause many problems (bugs) in other areas 981 // b) reduce the maximum amount of blocks per block_run, so that the 982 // remaining number of free blocks can be used in a useful manner 983 // (like 4 blocks) - but that would also reduce the maximum file size 984 // c) have BlockRun::Length() return (length + 1). 985 if (numBlocks > MAX_BLOCK_RUN_LENGTH) 986 numBlocks = MAX_BLOCK_RUN_LENGTH; 987 988 // Apply some allocation policies here (AllocateBlocks() will break them 989 // if necessary) 990 uint16 group = inode->BlockRun().AllocationGroup(); 991 uint16 start = 0; 992 993 // Are there already allocated blocks? (then just try to allocate near the 994 // last one) 995 if (inode->Size() > 0) { 996 const data_stream& data = inode->Node().data; 997 // TODO: we currently don't care for when the data stream 998 // is already grown into the indirect ranges 999 if (data.max_double_indirect_range == 0 1000 && data.max_indirect_range == 0) { 1001 // Since size > 0, there must be a valid block run in this stream 1002 int32 last = 0; 1003 for (; last < NUM_DIRECT_BLOCKS - 1; last++) 1004 if (data.direct[last + 1].IsZero()) 1005 break; 1006 1007 group = data.direct[last].AllocationGroup(); 1008 start = data.direct[last].Start() + data.direct[last].Length(); 1009 } 1010 } else if (inode->IsContainer() || inode->IsSymLink()) { 1011 // directory and symbolic link data will go in the same allocation 1012 // group as the inode is in but after the inode data 1013 start = inode->BlockRun().Start(); 1014 } else { 1015 // file data will start in the next allocation group 1016 group = inode->BlockRun().AllocationGroup() + 1; 1017 } 1018 1019 return AllocateBlocks(transaction, group, start, numBlocks, minimum, run); 1020 } 1021 1022 1023 status_t 1024 BlockAllocator::Free(Transaction& transaction, block_run run) 1025 { 1026 RecursiveLocker lock(fLock); 1027 1028 int32 group = run.AllocationGroup(); 1029 uint16 start = run.Start(); 1030 uint16 length = run.Length(); 1031 1032 FUNCTION_START(("group = %ld, start = %u, length = %u\n", group, start, 1033 length)); 1034 T(Free(run)); 1035 1036 // doesn't use Volume::IsValidBlockRun() here because it can check better 1037 // against the group size (the last group may have a different length) 1038 if (group < 0 || group >= fNumGroups 1039 || start > fGroups[group].NumBits() 1040 || uint32(start + length) > fGroups[group].NumBits() 1041 || length == 0) { 1042 FATAL(("tried to free an invalid block_run (%d, %u, %u)\n", (int)group, 1043 start, length)); 1044 DEBUGGER(("tried to free invalid block_run")); 1045 return B_BAD_VALUE; 1046 } 1047 // check if someone tries to free reserved areas at the beginning of the 1048 // drive 1049 if (group == 0 1050 && start < uint32(fVolume->Log().Start() + fVolume->Log().Length())) { 1051 FATAL(("tried to free a reserved block_run (%d, %u, %u)\n", (int)group, 1052 start, length)); 1053 DEBUGGER(("tried to free reserved block")); 1054 return B_BAD_VALUE; 1055 } 1056 #ifdef DEBUG 1057 if (CheckBlockRun(run) != B_OK) 1058 return B_BAD_DATA; 1059 #endif 1060 1061 CHECK_ALLOCATION_GROUP(group); 1062 1063 if (fGroups[group].Free(transaction, start, length) != B_OK) 1064 RETURN_ERROR(B_IO_ERROR); 1065 1066 CHECK_ALLOCATION_GROUP(group); 1067 1068 #ifdef DEBUG 1069 if (CheckBlockRun(run, NULL, false) != B_OK) { 1070 DEBUGGER(("CheckBlockRun() reports allocated blocks (which were just " 1071 "freed)\n")); 1072 } 1073 #endif 1074 1075 fVolume->SuperBlock().used_blocks = 1076 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() - run.Length()); 1077 return B_OK; 1078 } 1079 1080 1081 size_t 1082 BlockAllocator::BitmapSize() const 1083 { 1084 return fVolume->BlockSize() * fNumBlocks; 1085 } 1086 1087 1088 #ifdef DEBUG_FRAGMENTER 1089 void 1090 BlockAllocator::Fragment() 1091 { 1092 AllocationBlock cached(fVolume); 1093 RecursiveLocker lock(fLock); 1094 1095 // only leave 4 block holes 1096 static const uint32 kMask = 0x0f0f0f0f; 1097 uint32 valuesPerBlock = fVolume->BlockSize() / 4; 1098 1099 for (int32 i = 0; i < fNumGroups; i++) { 1100 AllocationGroup& group = fGroups[i]; 1101 1102 for (uint32 block = 0; block < group.NumBlocks(); block++) { 1103 Transaction transaction(fVolume, 0); 1104 1105 if (cached.SetToWritable(transaction, group, block) != B_OK) 1106 return; 1107 1108 for (int32 index = 0; index < valuesPerBlock; index++) { 1109 cached.Block(index) |= HOST_ENDIAN_TO_BFS_INT32(kMask); 1110 } 1111 1112 transaction.Done(); 1113 } 1114 } 1115 } 1116 #endif // DEBUG_FRAGMENTER 1117 1118 1119 #ifdef DEBUG_ALLOCATION_GROUPS 1120 void 1121 BlockAllocator::_CheckGroup(int32 groupIndex) const 1122 { 1123 AllocationBlock cached(fVolume); 1124 ASSERT_LOCKED_RECURSIVE(&fLock); 1125 1126 AllocationGroup& group = fGroups[groupIndex]; 1127 1128 int32 currentStart = 0, currentLength = 0; 1129 int32 firstFree = -1; 1130 int32 largestStart = -1; 1131 int32 largestLength = 0; 1132 int32 currentBit = 0; 1133 1134 for (uint32 block = 0; block < group.NumBlocks(); block++) { 1135 if (cached.SetTo(group, block) < B_OK) { 1136 panic("setting group block %d failed\n", (int)block); 1137 return; 1138 } 1139 1140 for (uint32 bit = 0; bit < cached.NumBlockBits(); bit++) { 1141 if (!cached.IsUsed(bit)) { 1142 if (firstFree < 0) { 1143 firstFree = currentBit; 1144 if (!group.fLargestValid) { 1145 if (firstFree >= 0 && firstFree < group.fFirstFree) { 1146 // mostly harmless but noteworthy 1147 dprintf("group %d first free too late: should be " 1148 "%d, is %d\n", (int)groupIndex, (int)firstFree, 1149 (int)group.fFirstFree); 1150 } 1151 return; 1152 } 1153 } 1154 1155 if (currentLength == 0) { 1156 // start new range 1157 currentStart = currentBit; 1158 } 1159 currentLength++; 1160 } else if (currentLength) { 1161 // end of a range 1162 if (currentLength > largestLength) { 1163 largestStart = currentStart; 1164 largestLength = currentLength; 1165 } 1166 currentLength = 0; 1167 } 1168 currentBit++; 1169 } 1170 } 1171 1172 if (currentLength > largestLength) { 1173 largestStart = currentStart; 1174 largestLength = currentLength; 1175 } 1176 1177 if (firstFree >= 0 && firstFree < group.fFirstFree) { 1178 // mostly harmless but noteworthy 1179 dprintf("group %d first free too late: should be %d, is %d\n", 1180 (int)groupIndex, (int)firstFree, (int)group.fFirstFree); 1181 } 1182 if (group.fLargestValid 1183 && (largestStart != group.fLargestStart 1184 || largestLength != group.fLargestLength)) { 1185 panic("bfs %p: group %d largest differs: %d.%d, checked %d.%d.\n", 1186 fVolume, (int)groupIndex, (int)group.fLargestStart, 1187 (int)group.fLargestLength, (int)largestStart, (int)largestLength); 1188 } 1189 } 1190 #endif // DEBUG_ALLOCATION_GROUPS 1191 1192 1193 status_t 1194 BlockAllocator::Trim(uint64 offset, uint64 size, uint64& trimmedSize) 1195 { 1196 const uint32 kTrimRanges = 128; 1197 fs_trim_data* trimData = (fs_trim_data*)malloc(sizeof(fs_trim_data) 1198 + sizeof(uint64) * kTrimRanges); 1199 if (trimData == NULL) 1200 return B_NO_MEMORY; 1201 1202 MemoryDeleter deleter(trimData); 1203 RecursiveLocker locker(fLock); 1204 1205 // TODO: take given offset and size into account! 1206 int32 lastGroup = fNumGroups - 1; 1207 uint32 firstBlock = 0; 1208 uint32 firstBit = 0; 1209 uint64 currentBlock = 0; 1210 uint32 blockShift = fVolume->BlockShift(); 1211 1212 uint64 firstFree = 0; 1213 size_t freeLength = 0; 1214 1215 trimData->range_count = 0; 1216 trimmedSize = 0; 1217 1218 AllocationBlock cached(fVolume); 1219 for (int32 groupIndex = 0; groupIndex <= lastGroup; groupIndex++) { 1220 AllocationGroup& group = fGroups[groupIndex]; 1221 1222 for (uint32 block = firstBlock; block < group.NumBlocks(); block++) { 1223 cached.SetTo(group, block); 1224 1225 for (uint32 i = firstBit; i < cached.NumBlockBits(); i++) { 1226 if (cached.IsUsed(i)) { 1227 // Block is in use 1228 if (freeLength > 0) { 1229 status_t status = _TrimNext(*trimData, kTrimRanges, 1230 firstFree << blockShift, freeLength << blockShift, 1231 false, trimmedSize); 1232 if (status != B_OK) 1233 return status; 1234 1235 freeLength = 0; 1236 } 1237 } else if (freeLength++ == 0) { 1238 // Block is free, start new free range 1239 firstFree = currentBlock; 1240 } 1241 1242 currentBlock++; 1243 } 1244 } 1245 1246 firstBlock = 0; 1247 firstBit = 0; 1248 } 1249 1250 return _TrimNext(*trimData, kTrimRanges, firstFree << blockShift, 1251 freeLength << blockShift, true, trimmedSize); 1252 } 1253 1254 1255 // #pragma mark - Bitmap validity checking 1256 1257 // TODO: implement new FS checking API 1258 // Functions to check the validity of the bitmap - they are used from 1259 // the "checkfs" command (since this does even a bit more, maybe we should 1260 // move this some place else?) 1261 1262 1263 bool 1264 BlockAllocator::_IsValidCheckControl(const check_control* control) 1265 { 1266 if (control == NULL 1267 || control->magic != BFS_IOCTL_CHECK_MAGIC) { 1268 FATAL(("invalid check_control (%p)!\n", control)); 1269 return false; 1270 } 1271 1272 return true; 1273 } 1274 1275 1276 status_t 1277 BlockAllocator::StartChecking(const check_control* control) 1278 { 1279 if (!_IsValidCheckControl(control)) 1280 return B_BAD_VALUE; 1281 1282 fVolume->GetJournal(0)->Lock(NULL, true); 1283 // Lock the volume's journal 1284 1285 recursive_lock_lock(&fLock); 1286 1287 size_t size = BitmapSize(); 1288 fCheckBitmap = (uint32*)malloc(size); 1289 if (fCheckBitmap == NULL) { 1290 recursive_lock_unlock(&fLock); 1291 fVolume->GetJournal(0)->Unlock(NULL, true); 1292 return B_NO_MEMORY; 1293 } 1294 1295 fCheckCookie = new(std::nothrow) check_cookie(); 1296 if (fCheckCookie == NULL) { 1297 free(fCheckBitmap); 1298 fCheckBitmap = NULL; 1299 recursive_lock_unlock(&fLock); 1300 fVolume->GetJournal(0)->Unlock(NULL, true); 1301 1302 return B_NO_MEMORY; 1303 } 1304 1305 memcpy(&fCheckCookie->control, control, sizeof(check_control)); 1306 memset(&fCheckCookie->control.stats, 0, sizeof(control->stats)); 1307 1308 // initialize bitmap 1309 memset(fCheckBitmap, 0, size); 1310 for (int32 block = fVolume->Log().Start() + fVolume->Log().Length(); 1311 block-- > 0;) { 1312 _SetCheckBitmapAt(block); 1313 } 1314 1315 fCheckCookie->pass = BFS_CHECK_PASS_BITMAP; 1316 fCheckCookie->stack.Push(fVolume->Root()); 1317 fCheckCookie->stack.Push(fVolume->Indices()); 1318 fCheckCookie->iterator = NULL; 1319 fCheckCookie->control.stats.block_size = fVolume->BlockSize(); 1320 1321 // Put removed vnodes to the stack -- they are not reachable by traversing 1322 // the file system anymore. 1323 InodeList::Iterator iterator = fVolume->RemovedInodes().GetIterator(); 1324 while (Inode* inode = iterator.Next()) { 1325 fCheckCookie->stack.Push(inode->BlockRun()); 1326 } 1327 1328 // TODO: check reserved area in bitmap! 1329 1330 return B_OK; 1331 } 1332 1333 1334 status_t 1335 BlockAllocator::StopChecking(check_control* control) 1336 { 1337 if (fCheckCookie == NULL) 1338 return B_NO_INIT; 1339 1340 if (fCheckCookie->iterator != NULL) { 1341 delete fCheckCookie->iterator; 1342 fCheckCookie->iterator = NULL; 1343 1344 // the current directory inode is still locked in memory 1345 put_vnode(fVolume->FSVolume(), fVolume->ToVnode(fCheckCookie->current)); 1346 } 1347 1348 if (fVolume->IsReadOnly()) { 1349 // We can't fix errors on this volume 1350 fCheckCookie->control.flags = 0; 1351 } 1352 1353 if (fCheckCookie->control.status != B_ENTRY_NOT_FOUND) 1354 FATAL(("BlockAllocator::CheckNextNode() didn't run through\n")); 1355 1356 switch (fCheckCookie->pass) { 1357 case BFS_CHECK_PASS_BITMAP: 1358 // if CheckNextNode() could completely work through, we can 1359 // fix any damages of the bitmap 1360 if (fCheckCookie->control.status == B_ENTRY_NOT_FOUND) 1361 _WriteBackCheckBitmap(); 1362 break; 1363 1364 case BFS_CHECK_PASS_INDEX: 1365 _FreeIndices(); 1366 break; 1367 } 1368 1369 fVolume->SetCheckingThread(-1); 1370 1371 if (control != NULL) 1372 user_memcpy(control, &fCheckCookie->control, sizeof(check_control)); 1373 1374 free(fCheckBitmap); 1375 fCheckBitmap = NULL; 1376 delete fCheckCookie; 1377 fCheckCookie = NULL; 1378 recursive_lock_unlock(&fLock); 1379 fVolume->GetJournal(0)->Unlock(NULL, true); 1380 1381 return B_OK; 1382 } 1383 1384 1385 status_t 1386 BlockAllocator::CheckNextNode(check_control* control) 1387 { 1388 if (fCheckCookie == NULL) 1389 return B_NO_INIT; 1390 1391 fVolume->SetCheckingThread(find_thread(NULL)); 1392 1393 // Make sure the user control is copied on exit 1394 class CopyControlOnExit { 1395 public: 1396 CopyControlOnExit(check_control* source, check_control* userTarget) 1397 : 1398 fSource(source), 1399 fTarget(userTarget) 1400 { 1401 } 1402 1403 ~CopyControlOnExit() 1404 { 1405 if (fTarget != NULL) 1406 user_memcpy(fTarget, fSource, sizeof(check_control)); 1407 } 1408 1409 private: 1410 check_control* fSource; 1411 check_control* fTarget; 1412 } copyControl(&fCheckCookie->control, control); 1413 1414 while (true) { 1415 if (fCheckCookie->iterator == NULL) { 1416 if (!fCheckCookie->stack.Pop(&fCheckCookie->current)) { 1417 // No more runs on the stack, we might be finished! 1418 if (fCheckCookie->pass == BFS_CHECK_PASS_BITMAP 1419 && !fCheckCookie->indices.IsEmpty()) { 1420 // Start second pass to repair indices 1421 _WriteBackCheckBitmap(); 1422 1423 fCheckCookie->pass = BFS_CHECK_PASS_INDEX; 1424 fCheckCookie->control.pass = BFS_CHECK_PASS_INDEX; 1425 1426 status_t status = _PrepareIndices(); 1427 if (status != B_OK) { 1428 fCheckCookie->control.status = status; 1429 return status; 1430 } 1431 1432 fCheckCookie->stack.Push(fVolume->Root()); 1433 continue; 1434 } 1435 1436 fCheckCookie->control.status = B_ENTRY_NOT_FOUND; 1437 return B_ENTRY_NOT_FOUND; 1438 } 1439 1440 Vnode vnode(fVolume, fCheckCookie->current); 1441 Inode* inode; 1442 if (vnode.Get(&inode) != B_OK) { 1443 FATAL(("check: Could not open inode at %" B_PRIdOFF "\n", 1444 fVolume->ToBlock(fCheckCookie->current))); 1445 continue; 1446 } 1447 1448 fCheckCookie->control.inode = inode->ID(); 1449 fCheckCookie->control.mode = inode->Mode(); 1450 1451 if (!inode->IsContainer()) { 1452 // Check file 1453 fCheckCookie->control.errors = 0; 1454 fCheckCookie->control.status = CheckInode(inode, NULL); 1455 1456 if (inode->GetName(fCheckCookie->control.name) < B_OK) 1457 strcpy(fCheckCookie->control.name, "(node has no name)"); 1458 1459 return B_OK; 1460 } 1461 1462 // Check directory 1463 BPlusTree* tree = inode->Tree(); 1464 if (tree == NULL) { 1465 FATAL(("check: could not open b+tree from inode at %" B_PRIdOFF 1466 "\n", fVolume->ToBlock(fCheckCookie->current))); 1467 continue; 1468 } 1469 1470 fCheckCookie->parent = inode; 1471 fCheckCookie->parent_mode = inode->Mode(); 1472 1473 // get iterator for the next directory 1474 fCheckCookie->iterator = new(std::nothrow) TreeIterator(tree); 1475 if (fCheckCookie->iterator == NULL) 1476 RETURN_ERROR(B_NO_MEMORY); 1477 1478 // the inode must stay locked in memory until the iterator is freed 1479 vnode.Keep(); 1480 1481 // check the inode of the directory 1482 fCheckCookie->control.errors = 0; 1483 fCheckCookie->control.status = CheckInode(inode, NULL); 1484 1485 if (inode->GetName(fCheckCookie->control.name) != B_OK) 1486 strcpy(fCheckCookie->control.name, "(dir has no name)"); 1487 1488 return B_OK; 1489 } 1490 1491 char name[B_FILE_NAME_LENGTH]; 1492 uint16 length; 1493 ino_t id; 1494 1495 status_t status = fCheckCookie->iterator->GetNextEntry(name, &length, 1496 B_FILE_NAME_LENGTH, &id); 1497 if (status != B_OK) { 1498 // we no longer need this iterator 1499 delete fCheckCookie->iterator; 1500 fCheckCookie->iterator = NULL; 1501 1502 // unlock the directory's inode from memory 1503 put_vnode(fVolume->FSVolume(), 1504 fVolume->ToVnode(fCheckCookie->current)); 1505 1506 if (status == B_ENTRY_NOT_FOUND) { 1507 // We iterated over all entries already, just go on to the next 1508 continue; 1509 } 1510 1511 // Iterating over the B+tree failed - we let the checkfs run 1512 // fail completely, as we would delete all files we cannot 1513 // access. 1514 // TODO: maybe have a force parameter that actually does that. 1515 // TODO: we also need to be able to repair broken B+trees! 1516 return status; 1517 } 1518 1519 // ignore "." and ".." entries 1520 if (!strcmp(name, ".") || !strcmp(name, "..")) 1521 continue; 1522 1523 // fill in the control data as soon as we have them 1524 strlcpy(fCheckCookie->control.name, name, B_FILE_NAME_LENGTH); 1525 fCheckCookie->control.inode = id; 1526 fCheckCookie->control.errors = 0; 1527 1528 Vnode vnode(fVolume, id); 1529 Inode* inode; 1530 if (vnode.Get(&inode) != B_OK) { 1531 FATAL(("Could not open inode ID %" B_PRIdINO "!\n", id)); 1532 fCheckCookie->control.errors |= BFS_COULD_NOT_OPEN; 1533 1534 if ((fCheckCookie->control.flags & BFS_REMOVE_INVALID) != 0) { 1535 status = _RemoveInvalidNode(fCheckCookie->parent, 1536 fCheckCookie->iterator->Tree(), NULL, name); 1537 } else 1538 status = B_ERROR; 1539 1540 fCheckCookie->control.status = status; 1541 return B_OK; 1542 } 1543 1544 // check if the inode's name is the same as in the b+tree 1545 if (fCheckCookie->pass == BFS_CHECK_PASS_BITMAP 1546 && inode->IsRegularNode()) { 1547 RecursiveLocker locker(inode->SmallDataLock()); 1548 NodeGetter node(fVolume, inode); 1549 1550 const char* localName = inode->Name(node.Node()); 1551 if (localName == NULL || strcmp(localName, name)) { 1552 fCheckCookie->control.errors |= BFS_NAMES_DONT_MATCH; 1553 FATAL(("Names differ: tree \"%s\", inode \"%s\"\n", name, 1554 localName)); 1555 1556 if ((fCheckCookie->control.flags & BFS_FIX_NAME_MISMATCHES) 1557 != 0) { 1558 // Rename the inode 1559 Transaction transaction(fVolume, inode->BlockNumber()); 1560 1561 // Note, this may need extra blocks, but the inode will 1562 // only be checked afterwards, so that it won't be lost 1563 status = inode->SetName(transaction, name); 1564 if (status == B_OK) 1565 status = inode->WriteBack(transaction); 1566 if (status == B_OK) 1567 status = transaction.Done(); 1568 if (status != B_OK) { 1569 fCheckCookie->control.status = status; 1570 return B_OK; 1571 } 1572 } 1573 } 1574 } 1575 1576 fCheckCookie->control.mode = inode->Mode(); 1577 1578 // Check for the correct mode of the node (if the mode of the 1579 // file don't fit to its parent, there is a serious problem) 1580 if (fCheckCookie->pass == BFS_CHECK_PASS_BITMAP 1581 && (((fCheckCookie->parent_mode & S_ATTR_DIR) != 0 1582 && !inode->IsAttribute()) 1583 || ((fCheckCookie->parent_mode & S_INDEX_DIR) != 0 1584 && !inode->IsIndex()) 1585 || (is_directory(fCheckCookie->parent_mode) 1586 && !inode->IsRegularNode()))) { 1587 FATAL(("inode at %" B_PRIdOFF " is of wrong type: %o (parent " 1588 "%o at %" B_PRIdOFF ")!\n", inode->BlockNumber(), 1589 inode->Mode(), fCheckCookie->parent_mode, 1590 fCheckCookie->parent->BlockNumber())); 1591 1592 // if we are allowed to fix errors, we should remove the file 1593 if ((fCheckCookie->control.flags & BFS_REMOVE_WRONG_TYPES) != 0 1594 && (fCheckCookie->control.flags & BFS_FIX_BITMAP_ERRORS) 1595 != 0) { 1596 status = _RemoveInvalidNode(fCheckCookie->parent, NULL, 1597 inode, name); 1598 } else 1599 status = B_ERROR; 1600 1601 fCheckCookie->control.errors |= BFS_WRONG_TYPE; 1602 fCheckCookie->control.status = status; 1603 return B_OK; 1604 } 1605 1606 // push the directory on the stack so that it will be scanned later 1607 if (inode->IsContainer() && !inode->IsIndex()) 1608 fCheckCookie->stack.Push(inode->BlockRun()); 1609 else { 1610 // check it now 1611 fCheckCookie->control.status = CheckInode(inode, name); 1612 return B_OK; 1613 } 1614 } 1615 // is never reached 1616 } 1617 1618 1619 status_t 1620 BlockAllocator::_RemoveInvalidNode(Inode* parent, BPlusTree* tree, Inode* inode, 1621 const char* name) 1622 { 1623 // It's safe to start a transaction, because Inode::Remove() 1624 // won't touch the block bitmap (which we hold the lock for) 1625 // if we set the INODE_DONT_FREE_SPACE flag - since we fix 1626 // the bitmap anyway. 1627 Transaction transaction(fVolume, parent->BlockNumber()); 1628 status_t status; 1629 1630 if (inode != NULL) { 1631 inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DONT_FREE_SPACE); 1632 1633 status = parent->Remove(transaction, name, NULL, false, true); 1634 } else { 1635 parent->WriteLockInTransaction(transaction); 1636 1637 // does the file even exist? 1638 off_t id; 1639 status = tree->Find((uint8*)name, (uint16)strlen(name), &id); 1640 if (status == B_OK) 1641 status = tree->Remove(transaction, name, id); 1642 } 1643 1644 if (status == B_OK) { 1645 entry_cache_remove(fVolume->ID(), parent->ID(), name); 1646 transaction.Done(); 1647 } 1648 1649 return status; 1650 } 1651 1652 1653 bool 1654 BlockAllocator::_CheckBitmapIsUsedAt(off_t block) const 1655 { 1656 size_t size = BitmapSize(); 1657 uint32 index = block / 32; // 32bit resolution 1658 if (index > size / 4) 1659 return false; 1660 1661 return BFS_ENDIAN_TO_HOST_INT32(fCheckBitmap[index]) 1662 & (1UL << (block & 0x1f)); 1663 } 1664 1665 1666 void 1667 BlockAllocator::_SetCheckBitmapAt(off_t block) 1668 { 1669 size_t size = BitmapSize(); 1670 uint32 index = block / 32; // 32bit resolution 1671 if (index > size / 4) 1672 return; 1673 1674 fCheckBitmap[index] |= HOST_ENDIAN_TO_BFS_INT32(1UL << (block & 0x1f)); 1675 } 1676 1677 1678 status_t 1679 BlockAllocator::_WriteBackCheckBitmap() 1680 { 1681 if (fVolume->IsReadOnly()) 1682 return B_OK; 1683 1684 // calculate the number of used blocks in the check bitmap 1685 size_t size = BitmapSize(); 1686 off_t usedBlocks = 0LL; 1687 1688 // TODO: update the allocation groups used blocks info 1689 for (uint32 i = size >> 2; i-- > 0;) { 1690 uint32 compare = 1; 1691 // Count the number of bits set 1692 for (int16 j = 0; j < 32; j++, compare <<= 1) { 1693 if ((compare & fCheckBitmap[i]) != 0) 1694 usedBlocks++; 1695 } 1696 } 1697 1698 fCheckCookie->control.stats.freed = fVolume->UsedBlocks() - usedBlocks 1699 + fCheckCookie->control.stats.missing; 1700 if (fCheckCookie->control.stats.freed < 0) 1701 fCheckCookie->control.stats.freed = 0; 1702 1703 // Should we fix errors? Were there any errors we can fix? 1704 if ((fCheckCookie->control.flags & BFS_FIX_BITMAP_ERRORS) != 0 1705 && (fCheckCookie->control.stats.freed != 0 1706 || fCheckCookie->control.stats.missing != 0)) { 1707 // If so, write the check bitmap back over the original one, 1708 // and use transactions here to play safe - we even use several 1709 // transactions, so that we don't blow the maximum log size 1710 // on large disks, since we don't need to make this atomic. 1711 #if 0 1712 // prints the blocks that differ 1713 off_t block = 0; 1714 for (int32 i = 0; i < fNumGroups; i++) { 1715 AllocationBlock cached(fVolume); 1716 for (uint32 j = 0; j < fGroups[i].NumBlocks(); j++) { 1717 cached.SetTo(fGroups[i], j); 1718 for (uint32 k = 0; k < cached.NumBlockBits(); k++) { 1719 if (cached.IsUsed(k) != _CheckBitmapIsUsedAt(block)) { 1720 dprintf("differ block %lld (should be %d)\n", block, 1721 _CheckBitmapIsUsedAt(block)); 1722 } 1723 block++; 1724 } 1725 } 1726 } 1727 #endif 1728 1729 fVolume->SuperBlock().used_blocks 1730 = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 1731 1732 size_t blockSize = fVolume->BlockSize(); 1733 1734 for (uint32 i = 0; i < fNumBlocks; i += 512) { 1735 Transaction transaction(fVolume, 1 + i); 1736 1737 uint32 blocksToWrite = 512; 1738 if (blocksToWrite + i > fNumBlocks) 1739 blocksToWrite = fNumBlocks - i; 1740 1741 status_t status = transaction.WriteBlocks(1 + i, 1742 (uint8*)fCheckBitmap + i * blockSize, blocksToWrite); 1743 if (status < B_OK) { 1744 FATAL(("error writing bitmap: %s\n", strerror(status))); 1745 return status; 1746 } 1747 transaction.Done(); 1748 } 1749 } 1750 1751 return B_OK; 1752 } 1753 1754 1755 /*! Checks whether or not the specified block range is allocated or not, 1756 depending on the \a allocated argument. 1757 */ 1758 status_t 1759 BlockAllocator::CheckBlocks(off_t start, off_t length, bool allocated) 1760 { 1761 if (start < 0 || start + length > fVolume->NumBlocks()) 1762 return B_BAD_VALUE; 1763 1764 int32 group = start >> fVolume->AllocationGroupShift(); 1765 uint32 bitmapBlock = start / (fVolume->BlockSize() << 3); 1766 uint32 blockOffset = start % (fVolume->BlockSize() << 3); 1767 1768 uint32 groupBlock = bitmapBlock % fBlocksPerGroup; 1769 1770 AllocationBlock cached(fVolume); 1771 1772 while (groupBlock < fGroups[group].NumBlocks() && length > 0) { 1773 if (cached.SetTo(fGroups[group], groupBlock) != B_OK) 1774 RETURN_ERROR(B_IO_ERROR); 1775 1776 for (; blockOffset < cached.NumBlockBits() && length > 0; 1777 blockOffset++, length--) { 1778 if (cached.IsUsed(blockOffset) != allocated) { 1779 RETURN_ERROR(B_BAD_DATA); 1780 } 1781 } 1782 1783 blockOffset = 0; 1784 1785 if (++groupBlock >= fGroups[group].NumBlocks()) { 1786 groupBlock = 0; 1787 group++; 1788 } 1789 } 1790 1791 return B_OK; 1792 } 1793 1794 1795 status_t 1796 BlockAllocator::CheckBlockRun(block_run run, const char* type, bool allocated) 1797 { 1798 if (run.AllocationGroup() < 0 || run.AllocationGroup() >= fNumGroups 1799 || run.Start() > fGroups[run.AllocationGroup()].fNumBits 1800 || uint32(run.Start() + run.Length()) 1801 > fGroups[run.AllocationGroup()].fNumBits 1802 || run.length == 0) { 1803 PRINT(("%s: block_run(%ld, %u, %u) is invalid!\n", type, 1804 run.AllocationGroup(), run.Start(), run.Length())); 1805 if (fCheckCookie == NULL) 1806 return B_BAD_DATA; 1807 1808 fCheckCookie->control.errors |= BFS_INVALID_BLOCK_RUN; 1809 return B_OK; 1810 } 1811 1812 uint32 bitsPerBlock = fVolume->BlockSize() << 3; 1813 uint32 block = run.Start() / bitsPerBlock; 1814 uint32 pos = run.Start() % bitsPerBlock; 1815 int32 length = 0; 1816 off_t firstMissing = -1, firstSet = -1; 1817 off_t firstGroupBlock 1818 = (off_t)run.AllocationGroup() << fVolume->AllocationGroupShift(); 1819 1820 AllocationBlock cached(fVolume); 1821 1822 for (; block < fBlocksPerGroup && length < run.Length(); block++, pos = 0) { 1823 if (cached.SetTo(fGroups[run.AllocationGroup()], block) < B_OK) 1824 RETURN_ERROR(B_IO_ERROR); 1825 1826 if (pos >= cached.NumBlockBits()) { 1827 // something very strange has happened... 1828 RETURN_ERROR(B_ERROR); 1829 } 1830 1831 while (length < run.Length() && pos < cached.NumBlockBits()) { 1832 if (cached.IsUsed(pos) != allocated) { 1833 if (fCheckCookie == NULL) { 1834 PRINT(("%s: block_run(%ld, %u, %u) is only partially " 1835 "allocated (pos = %ld, length = %ld)!\n", type, 1836 run.AllocationGroup(), run.Start(), run.Length(), 1837 pos, length)); 1838 return B_BAD_DATA; 1839 } 1840 if (firstMissing == -1) { 1841 firstMissing = firstGroupBlock + pos + block * bitsPerBlock; 1842 fCheckCookie->control.errors |= BFS_MISSING_BLOCKS; 1843 } 1844 fCheckCookie->control.stats.missing++; 1845 } else if (firstMissing != -1) { 1846 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are " 1847 "%sallocated!\n", type, run.AllocationGroup(), run.Start(), 1848 run.Length(), firstMissing, 1849 firstGroupBlock + pos + block * bitsPerBlock - 1, 1850 allocated ? "not " : "")); 1851 firstMissing = -1; 1852 } 1853 1854 if (fCheckBitmap != NULL) { 1855 // Set the block in the check bitmap as well, but have a look 1856 // if it is already allocated first 1857 uint32 offset = pos + block * bitsPerBlock; 1858 if (_CheckBitmapIsUsedAt(firstGroupBlock + offset)) { 1859 if (firstSet == -1) { 1860 firstSet = firstGroupBlock + offset; 1861 fCheckCookie->control.errors |= BFS_BLOCKS_ALREADY_SET; 1862 dprintf("block %" B_PRIdOFF " is already set!!!\n", 1863 firstGroupBlock + offset); 1864 } 1865 fCheckCookie->control.stats.already_set++; 1866 } else { 1867 if (firstSet != -1) { 1868 FATAL(("%s: block_run(%d, %u, %u): blocks %" B_PRIdOFF 1869 " - %" B_PRIdOFF " are already set!\n", type, 1870 (int)run.AllocationGroup(), run.Start(), 1871 run.Length(), firstSet, 1872 firstGroupBlock + offset - 1)); 1873 firstSet = -1; 1874 } 1875 _SetCheckBitmapAt(firstGroupBlock + offset); 1876 } 1877 } 1878 length++; 1879 pos++; 1880 } 1881 1882 if (block + 1 >= fBlocksPerGroup || length >= run.Length()) { 1883 if (firstMissing != -1) { 1884 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are not " 1885 "allocated!\n", type, run.AllocationGroup(), run.Start(), 1886 run.Length(), firstMissing, 1887 firstGroupBlock + pos + block * bitsPerBlock - 1)); 1888 } 1889 if (firstSet != -1) { 1890 FATAL(("%s: block_run(%d, %u, %u): blocks %" B_PRIdOFF " - %" 1891 B_PRIdOFF " are already set!\n", type, 1892 (int)run.AllocationGroup(), run.Start(), run.Length(), 1893 firstSet, 1894 firstGroupBlock + pos + block * bitsPerBlock - 1)); 1895 } 1896 } 1897 } 1898 1899 return B_OK; 1900 } 1901 1902 1903 status_t 1904 BlockAllocator::CheckInode(Inode* inode, const char* name) 1905 { 1906 if (fCheckCookie != NULL && fCheckBitmap == NULL) 1907 return B_NO_INIT; 1908 if (inode == NULL) 1909 return B_BAD_VALUE; 1910 1911 switch (fCheckCookie->pass) { 1912 case BFS_CHECK_PASS_BITMAP: 1913 { 1914 status_t status = _CheckInodeBlocks(inode, name); 1915 if (status != B_OK) 1916 return status; 1917 1918 // Check the B+tree as well 1919 if (inode->IsContainer()) { 1920 bool repairErrors 1921 = (fCheckCookie->control.flags & BFS_FIX_BPLUSTREES) != 0; 1922 bool errorsFound = false; 1923 status = inode->Tree()->Validate(repairErrors, errorsFound); 1924 if (errorsFound) { 1925 fCheckCookie->control.errors |= BFS_INVALID_BPLUSTREE; 1926 if (inode->IsIndex() && name != NULL && repairErrors) { 1927 // We completely rebuild corrupt indices 1928 check_index* index = new(std::nothrow) check_index; 1929 if (index == NULL) 1930 return B_NO_MEMORY; 1931 1932 strlcpy(index->name, name, sizeof(index->name)); 1933 index->run = inode->BlockRun(); 1934 fCheckCookie->indices.Push(index); 1935 } 1936 } 1937 } 1938 1939 return status; 1940 } 1941 1942 case BFS_CHECK_PASS_INDEX: 1943 return _AddInodeToIndex(inode); 1944 } 1945 1946 return B_OK; 1947 } 1948 1949 1950 status_t 1951 BlockAllocator::_CheckInodeBlocks(Inode* inode, const char* name) 1952 { 1953 status_t status = CheckBlockRun(inode->BlockRun(), "inode"); 1954 if (status != B_OK) 1955 return status; 1956 1957 // If the inode has an attribute directory, push it on the stack 1958 if (!inode->Attributes().IsZero()) 1959 fCheckCookie->stack.Push(inode->Attributes()); 1960 1961 if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) { 1962 // symlinks may not have a valid data stream 1963 if (strlen(inode->Node().short_symlink) >= SHORT_SYMLINK_NAME_LENGTH) 1964 return B_BAD_DATA; 1965 1966 return B_OK; 1967 } 1968 1969 data_stream* data = &inode->Node().data; 1970 1971 // check the direct range 1972 1973 if (data->max_direct_range) { 1974 for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) { 1975 if (data->direct[i].IsZero()) 1976 break; 1977 1978 status = CheckBlockRun(data->direct[i], "direct"); 1979 if (status < B_OK) 1980 return status; 1981 1982 fCheckCookie->control.stats.direct_block_runs++; 1983 fCheckCookie->control.stats.blocks_in_direct 1984 += data->direct[i].Length(); 1985 } 1986 } 1987 1988 CachedBlock cached(fVolume); 1989 1990 // check the indirect range 1991 1992 if (data->max_indirect_range) { 1993 status = CheckBlockRun(data->indirect, "indirect"); 1994 if (status < B_OK) 1995 return status; 1996 1997 off_t block = fVolume->ToBlock(data->indirect); 1998 1999 for (int32 i = 0; i < data->indirect.Length(); i++) { 2000 block_run* runs = (block_run*)cached.SetTo(block + i); 2001 if (runs == NULL) 2002 RETURN_ERROR(B_IO_ERROR); 2003 2004 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 2005 int32 index = 0; 2006 for (; index < runsPerBlock; index++) { 2007 if (runs[index].IsZero()) 2008 break; 2009 2010 status = CheckBlockRun(runs[index], "indirect->run"); 2011 if (status < B_OK) 2012 return status; 2013 2014 fCheckCookie->control.stats.indirect_block_runs++; 2015 fCheckCookie->control.stats.blocks_in_indirect 2016 += runs[index].Length(); 2017 } 2018 fCheckCookie->control.stats.indirect_array_blocks++; 2019 2020 if (index < runsPerBlock) 2021 break; 2022 } 2023 } 2024 2025 // check the double indirect range 2026 2027 if (data->max_double_indirect_range) { 2028 status = CheckBlockRun(data->double_indirect, "double indirect"); 2029 if (status != B_OK) 2030 return status; 2031 2032 int32 runsPerBlock = runs_per_block(fVolume->BlockSize()); 2033 int32 runsPerArray = runsPerBlock * data->double_indirect.Length(); 2034 2035 CachedBlock cachedDirect(fVolume); 2036 2037 for (int32 indirectIndex = 0; indirectIndex < runsPerArray; 2038 indirectIndex++) { 2039 // get the indirect array block 2040 block_run* array = (block_run*)cached.SetTo( 2041 fVolume->ToBlock(data->double_indirect) 2042 + indirectIndex / runsPerBlock); 2043 if (array == NULL) 2044 return B_IO_ERROR; 2045 2046 block_run indirect = array[indirectIndex % runsPerBlock]; 2047 // are we finished yet? 2048 if (indirect.IsZero()) 2049 return B_OK; 2050 2051 status = CheckBlockRun(indirect, "double indirect->runs"); 2052 if (status != B_OK) 2053 return status; 2054 2055 int32 maxIndex 2056 = ((uint32)indirect.Length() << fVolume->BlockShift()) 2057 / sizeof(block_run); 2058 2059 for (int32 index = 0; index < maxIndex; ) { 2060 block_run* runs = (block_run*)cachedDirect.SetTo( 2061 fVolume->ToBlock(indirect) + index / runsPerBlock); 2062 if (runs == NULL) 2063 return B_IO_ERROR; 2064 2065 do { 2066 // are we finished yet? 2067 if (runs[index % runsPerBlock].IsZero()) 2068 return B_OK; 2069 2070 status = CheckBlockRun(runs[index % runsPerBlock], 2071 "double indirect->runs->run"); 2072 if (status != B_OK) 2073 return status; 2074 2075 fCheckCookie->control.stats.double_indirect_block_runs++; 2076 fCheckCookie->control.stats.blocks_in_double_indirect 2077 += runs[index % runsPerBlock].Length(); 2078 } while ((++index % runsPerBlock) != 0); 2079 } 2080 2081 fCheckCookie->control.stats.double_indirect_array_blocks++; 2082 } 2083 } 2084 2085 return B_OK; 2086 } 2087 2088 2089 status_t 2090 BlockAllocator::_PrepareIndices() 2091 { 2092 int32 count = 0; 2093 2094 for (int32 i = 0; i < fCheckCookie->indices.CountItems(); i++) { 2095 check_index* index = fCheckCookie->indices.Array()[i]; 2096 Vnode vnode(fVolume, index->run); 2097 Inode* inode; 2098 status_t status = vnode.Get(&inode); 2099 if (status != B_OK) { 2100 FATAL(("check: Could not open index at %" B_PRIdOFF "\n", 2101 fVolume->ToBlock(index->run))); 2102 return status; 2103 } 2104 2105 BPlusTree* tree = inode->Tree(); 2106 if (tree == NULL) { 2107 // TODO: We can't yet repair those 2108 continue; 2109 } 2110 2111 status = tree->MakeEmpty(); 2112 if (status != B_OK) 2113 return status; 2114 2115 index->inode = inode; 2116 vnode.Keep(); 2117 count++; 2118 } 2119 2120 return count == 0 ? B_ENTRY_NOT_FOUND : B_OK; 2121 } 2122 2123 2124 void 2125 BlockAllocator::_FreeIndices() 2126 { 2127 for (int32 i = 0; i < fCheckCookie->indices.CountItems(); i++) { 2128 check_index* index = fCheckCookie->indices.Array()[i]; 2129 if (index->inode != NULL) { 2130 put_vnode(fVolume->FSVolume(), 2131 fVolume->ToVnode(index->inode->BlockRun())); 2132 } 2133 } 2134 fCheckCookie->indices.MakeEmpty(); 2135 } 2136 2137 2138 status_t 2139 BlockAllocator::_AddInodeToIndex(Inode* inode) 2140 { 2141 Transaction transaction(fVolume, inode->BlockNumber()); 2142 2143 for (int32 i = 0; i < fCheckCookie->indices.CountItems(); i++) { 2144 check_index* index = fCheckCookie->indices.Array()[i]; 2145 if (index->inode == NULL) 2146 continue; 2147 2148 index->inode->WriteLockInTransaction(transaction); 2149 2150 BPlusTree* tree = index->inode->Tree(); 2151 if (tree == NULL) 2152 return B_ERROR; 2153 2154 status_t status = B_OK; 2155 2156 if (!strcmp(index->name, "name")) { 2157 if (inode->InNameIndex()) { 2158 char name[B_FILE_NAME_LENGTH]; 2159 if (inode->GetName(name, B_FILE_NAME_LENGTH) != B_OK) 2160 return B_ERROR; 2161 2162 status = tree->Insert(transaction, name, inode->ID()); 2163 } 2164 } else if (!strcmp(index->name, "last_modified")) { 2165 if (inode->InLastModifiedIndex()) { 2166 status = tree->Insert(transaction, inode->OldLastModified(), 2167 inode->ID()); 2168 } 2169 } else if (!strcmp(index->name, "size")) { 2170 if (inode->InSizeIndex()) 2171 status = tree->Insert(transaction, inode->Size(), inode->ID()); 2172 } else { 2173 uint8 key[BPLUSTREE_MAX_KEY_LENGTH]; 2174 size_t keyLength = BPLUSTREE_MAX_KEY_LENGTH; 2175 if (inode->ReadAttribute(index->name, B_ANY_TYPE, 0, key, 2176 &keyLength) == B_OK) { 2177 status = tree->Insert(transaction, key, keyLength, inode->ID()); 2178 } 2179 } 2180 2181 if (status != B_OK) 2182 return status; 2183 } 2184 2185 return transaction.Done(); 2186 } 2187 2188 2189 status_t 2190 BlockAllocator::_AddTrim(fs_trim_data& trimData, uint32 maxRanges, 2191 uint64 offset, uint64 size) 2192 { 2193 if (trimData.range_count < maxRanges && size > 0) { 2194 trimData.ranges[trimData.range_count].offset = offset; 2195 trimData.ranges[trimData.range_count].size = size; 2196 trimData.range_count++; 2197 return true; 2198 } 2199 2200 return false; 2201 } 2202 2203 2204 status_t 2205 BlockAllocator::_TrimNext(fs_trim_data& trimData, uint32 maxRanges, 2206 uint64 offset, uint64 size, bool force, uint64& trimmedSize) 2207 { 2208 PRINT(("_TrimNext(index %" B_PRIu32 ", offset %" B_PRIu64 ", size %" 2209 B_PRIu64 ")\n", trimData.range_count, offset, size)); 2210 2211 bool pushed = _AddTrim(trimData, maxRanges, offset, size); 2212 2213 if (!pushed || force) { 2214 // Trim now 2215 trimData.trimmed_size = 0; 2216 if (ioctl(fVolume->Device(), B_TRIM_DEVICE, &trimData, 2217 sizeof(fs_trim_data)) != 0) { 2218 return errno; 2219 } 2220 2221 trimmedSize += trimData.trimmed_size; 2222 trimData.range_count = 0; 2223 } 2224 2225 if (!pushed) 2226 _AddTrim(trimData, maxRanges, offset, size); 2227 2228 return B_OK; 2229 } 2230 2231 2232 // #pragma mark - debugger commands 2233 2234 2235 #ifdef BFS_DEBUGGER_COMMANDS 2236 2237 void 2238 BlockAllocator::Dump(int32 index) 2239 { 2240 kprintf("allocation groups: %" B_PRId32 " (base %p)\n", fNumGroups, fGroups); 2241 kprintf("blocks per group: %" B_PRId32 "\n", fBlocksPerGroup); 2242 2243 for (int32 i = 0; i < fNumGroups; i++) { 2244 if (index != -1 && i != index) 2245 continue; 2246 2247 AllocationGroup& group = fGroups[i]; 2248 2249 kprintf("[%3" B_PRId32 "] num bits: %" B_PRIu32 " (%p)\n", i, 2250 group.NumBits(), &group); 2251 kprintf(" num blocks: %" B_PRIu32 "\n", group.NumBlocks()); 2252 kprintf(" start: %" B_PRId32 "\n", group.Start()); 2253 kprintf(" first free: %" B_PRId32 "\n", group.fFirstFree); 2254 kprintf(" largest start: %" B_PRId32 "%s\n", group.fLargestStart, 2255 group.fLargestValid ? "" : " (invalid)"); 2256 kprintf(" largest length: %" B_PRId32 "\n", group.fLargestLength); 2257 kprintf(" free bits: %" B_PRId32 "\n", group.fFreeBits); 2258 } 2259 } 2260 2261 2262 #if BFS_TRACING 2263 static char kTraceBuffer[256]; 2264 2265 2266 int 2267 dump_block_allocator_blocks(int argc, char** argv) 2268 { 2269 if (argc != 3 || !strcmp(argv[1], "--help")) { 2270 kprintf("usage: %s <ptr-to-volume> <block>\n", argv[0]); 2271 return 0; 2272 } 2273 2274 Volume* volume = (Volume*)parse_expression(argv[1]); 2275 off_t block = parse_expression(argv[2]); 2276 2277 // iterate over all tracing entries to find overlapping actions 2278 2279 using namespace BFSBlockTracing; 2280 2281 LazyTraceOutput out(kTraceBuffer, sizeof(kTraceBuffer), 0); 2282 TraceEntryIterator iterator; 2283 while (TraceEntry* _entry = iterator.Next()) { 2284 if (Allocate* entry = dynamic_cast<Allocate*>(_entry)) { 2285 off_t first = volume->ToBlock(entry->Run()); 2286 off_t last = first - 1 + entry->Run().Length(); 2287 if (block >= first && block <= last) { 2288 out.Clear(); 2289 const char* dump = out.DumpEntry(entry); 2290 kprintf("%5ld. %s\n", iterator.Index(), dump); 2291 } 2292 } else if (Free* entry = dynamic_cast<Free*>(_entry)) { 2293 off_t first = volume->ToBlock(entry->Run()); 2294 off_t last = first - 1 + entry->Run().Length(); 2295 if (block >= first && block <= last) { 2296 out.Clear(); 2297 const char* dump = out.DumpEntry(entry); 2298 kprintf("%5ld. %s\n", iterator.Index(), dump); 2299 } 2300 } 2301 } 2302 2303 return 0; 2304 } 2305 #endif 2306 2307 2308 int 2309 dump_block_allocator(int argc, char** argv) 2310 { 2311 int32 group = -1; 2312 if (argc == 3) { 2313 group = parse_expression(argv[2]); 2314 argc--; 2315 } 2316 2317 if (argc != 2 || !strcmp(argv[1], "--help")) { 2318 kprintf("usage: %s <ptr-to-volume> [group]\n", argv[0]); 2319 return 0; 2320 } 2321 2322 Volume* volume = (Volume*)parse_expression(argv[1]); 2323 BlockAllocator& allocator = volume->Allocator(); 2324 2325 allocator.Dump(group); 2326 return 0; 2327 } 2328 2329 #endif // BFS_DEBUGGER_COMMANDS 2330 2331