1 /* 2 * Copyright 2001-2009, Axel Dörfler, axeld@pinc-software.de. 3 * This file may be used under the terms of the MIT License. 4 */ 5 6 //! block bitmap handling and allocation policies 7 8 9 #include "BlockAllocator.h" 10 11 #include "bfs_control.h" 12 #include "BPlusTree.h" 13 #include "Debug.h" 14 #include "Inode.h" 15 #include "Volume.h" 16 17 18 // Things the BlockAllocator should do: 19 20 // - find a range of blocks of a certain size nearby a specific position 21 // - allocating an unsharp range of blocks for pre-allocation 22 // - free blocks 23 // - know how to deal with each allocation, special handling for directories, 24 // files, symlinks, etc. (type sensitive allocation policies) 25 26 // What makes the code complicated is the fact that we are not just reading 27 // in the whole bitmap and operate on that in memory - e.g. a 13 GB partition 28 // with a block size of 2048 bytes already has a 800kB bitmap, and the size 29 // of partitions will grow even more - so that's not an option. 30 // Instead we are reading in every block when it's used - since an allocation 31 // group can span several blocks in the block bitmap, the AllocationBlock 32 // class is there to make handling those easier. 33 34 // The current implementation is only slightly optimized and could probably 35 // be improved a lot. Furthermore, the allocation policies used here should 36 // have some real world tests. 37 38 #if BFS_TRACING && !defined(BFS_SHELL) 39 namespace BFSBlockTracing { 40 41 class Allocate : public AbstractTraceEntry { 42 public: 43 Allocate(block_run run) 44 : 45 fRun(run) 46 { 47 Initialized(); 48 } 49 50 virtual void AddDump(TraceOutput& out) 51 { 52 out.Print("bfs:alloc %lu.%u.%u", fRun.AllocationGroup(), 53 fRun.Start(), fRun.Length()); 54 } 55 56 const block_run& Run() const { return fRun; } 57 58 private: 59 block_run fRun; 60 }; 61 62 class Free : public AbstractTraceEntry { 63 public: 64 Free(block_run run) 65 : 66 fRun(run) 67 { 68 Initialized(); 69 } 70 71 virtual void AddDump(TraceOutput& out) 72 { 73 out.Print("bfs:free %lu.%u.%u", fRun.AllocationGroup(), 74 fRun.Start(), fRun.Length()); 75 } 76 77 const block_run& Run() const { return fRun; } 78 79 private: 80 block_run fRun; 81 }; 82 83 84 static uint32 85 checksum(const uint8* data, size_t size) 86 { 87 const uint32* data4 = (const uint32*)data; 88 uint32 sum = 0; 89 while (size >= 4) { 90 sum += *data4; 91 data4++; 92 size -= 4; 93 } 94 return sum; 95 } 96 97 98 class Block : public AbstractTraceEntry { 99 public: 100 Block(const char* label, off_t blockNumber, const uint8* data, 101 size_t size, uint32 start = 0, uint32 length = 0) 102 : 103 fBlock(blockNumber), 104 fData(data), 105 fStart(start), 106 fLength(length), 107 fLabel(label) 108 { 109 fSum = checksum(data, size); 110 Initialized(); 111 } 112 113 virtual void AddDump(TraceOutput& out) 114 { 115 out.Print("bfs:%s: block %Ld (%p), sum %lu, s/l %lu/%lu", fLabel, 116 fBlock, fData, fSum, fStart, fLength); 117 } 118 119 private: 120 off_t fBlock; 121 const uint8 *fData; 122 uint32 fStart; 123 uint32 fLength; 124 uint32 fSum; 125 const char* fLabel; 126 }; 127 128 129 class BlockChange : public AbstractTraceEntry { 130 public: 131 BlockChange(const char* label, int32 block, uint32 oldData, uint32 newData) 132 : 133 fBlock(block), 134 fOldData(oldData), 135 fNewData(newData), 136 fLabel(label) 137 { 138 Initialized(); 139 } 140 141 virtual void AddDump(TraceOutput& out) 142 { 143 out.Print("bfs:%s: block %ld, %08lx -> %08lx", fLabel, 144 fBlock, fOldData, fNewData); 145 } 146 147 private: 148 int32 fBlock; 149 uint32 fOldData; 150 uint32 fNewData; 151 const char* fLabel; 152 }; 153 154 } // namespace BFSBlockTracing 155 156 # define T(x) new(std::nothrow) BFSBlockTracing::x; 157 #else 158 # define T(x) ; 159 #endif 160 161 #ifdef DEBUG_ALLOCATION_GROUPS 162 # define CHECK_ALLOCATION_GROUP(group) _CheckGroup(group) 163 #else 164 # define CHECK_ALLOCATION_GROUP(group) ; 165 #endif 166 167 168 struct check_cookie { 169 check_cookie() {} 170 171 block_run current; 172 Inode* parent; 173 mode_t parent_mode; 174 Stack<block_run> stack; 175 TreeIterator* iterator; 176 }; 177 178 179 class AllocationBlock : public CachedBlock { 180 public: 181 AllocationBlock(Volume* volume); 182 183 inline void Allocate(uint16 start, uint16 numBlocks); 184 inline void Free(uint16 start, uint16 numBlocks); 185 inline bool IsUsed(uint16 block); 186 187 status_t SetTo(AllocationGroup& group, uint16 block); 188 status_t SetToWritable(Transaction& transaction, AllocationGroup& group, 189 uint16 block); 190 191 uint32 NumBlockBits() const { return fNumBits; } 192 uint32& Block(int32 index) { return ((uint32*)fBlock)[index]; } 193 uint8* Block() const { return (uint8*)fBlock; } 194 195 private: 196 uint32 fNumBits; 197 #ifdef DEBUG 198 bool fWritable; 199 #endif 200 }; 201 202 203 class AllocationGroup { 204 public: 205 AllocationGroup(); 206 207 void AddFreeRange(int32 start, int32 blocks); 208 bool IsFull() const { return fFreeBits == 0; } 209 210 status_t Allocate(Transaction& transaction, uint16 start, int32 length); 211 status_t Free(Transaction& transaction, uint16 start, int32 length); 212 213 uint32 NumBits() const { return fNumBits; } 214 uint32 NumBlocks() const { return fNumBlocks; } 215 int32 Start() const { return fStart; } 216 217 private: 218 friend class BlockAllocator; 219 220 uint32 fNumBits; 221 uint32 fNumBlocks; 222 int32 fStart; 223 int32 fFirstFree; 224 int32 fFreeBits; 225 226 int32 fLargestStart; 227 int32 fLargestLength; 228 bool fLargestValid; 229 }; 230 231 232 AllocationBlock::AllocationBlock(Volume* volume) 233 : CachedBlock(volume) 234 { 235 } 236 237 238 status_t 239 AllocationBlock::SetTo(AllocationGroup& group, uint16 block) 240 { 241 // 8 blocks per byte 242 fNumBits = fVolume->BlockSize() << 3; 243 // the last group may have less bits than the others 244 if ((block + 1) * fNumBits > group.NumBits()) 245 fNumBits = group.NumBits() % fNumBits; 246 247 #ifdef DEBUG 248 fWritable = false; 249 #endif 250 return CachedBlock::SetTo(group.Start() + block) != NULL ? B_OK : B_ERROR; 251 } 252 253 254 status_t 255 AllocationBlock::SetToWritable(Transaction& transaction, AllocationGroup& group, 256 uint16 block) 257 { 258 // 8 blocks per byte 259 fNumBits = fVolume->BlockSize() << 3; 260 // the last group may have less bits in the last block 261 if ((block + 1) * fNumBits > group.NumBits()) 262 fNumBits = group.NumBits() % fNumBits; 263 264 #ifdef DEBUG 265 fWritable = true; 266 #endif 267 return CachedBlock::SetToWritable(transaction, group.Start() + block) 268 != NULL ? B_OK : B_ERROR; 269 } 270 271 272 bool 273 AllocationBlock::IsUsed(uint16 block) 274 { 275 if (block > fNumBits) 276 return true; 277 278 // the block bitmap is accessed in 32-bit blocks 279 return Block(block >> 5) & HOST_ENDIAN_TO_BFS_INT32(1UL << (block % 32)); 280 } 281 282 283 void 284 AllocationBlock::Allocate(uint16 start, uint16 numBlocks) 285 { 286 ASSERT(start < fNumBits); 287 ASSERT(uint32(start + numBlocks) <= fNumBits); 288 #ifdef DEBUG 289 ASSERT(fWritable); 290 #endif 291 292 T(Block("b-alloc-in", fBlockNumber, fBlock, fVolume->BlockSize(), 293 start, numBlocks)); 294 295 int32 block = start >> 5; 296 297 while (numBlocks > 0) { 298 uint32 mask = 0; 299 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 300 mask |= 1UL << i; 301 302 T(BlockChange("b-alloc", block, Block(block), 303 Block(block) | HOST_ENDIAN_TO_BFS_INT32(mask))); 304 305 #if KDEBUG 306 // check for already set blocks 307 if (HOST_ENDIAN_TO_BFS_INT32(mask) & ((uint32*)fBlock)[block]) { 308 FATAL(("AllocationBlock::Allocate(): some blocks are already " 309 "allocated, start = %u, numBlocks = %u\n", start, numBlocks)); 310 panic("blocks already set!"); 311 } 312 #endif 313 314 Block(block++) |= HOST_ENDIAN_TO_BFS_INT32(mask); 315 start = 0; 316 } 317 T(Block("b-alloc-out", fBlockNumber, fBlock, fVolume->BlockSize(), 318 start, numBlocks)); 319 } 320 321 322 void 323 AllocationBlock::Free(uint16 start, uint16 numBlocks) 324 { 325 ASSERT(start < fNumBits); 326 ASSERT(uint32(start + numBlocks) <= fNumBits); 327 #ifdef DEBUG 328 ASSERT(fWritable); 329 #endif 330 331 int32 block = start >> 5; 332 333 while (numBlocks > 0) { 334 uint32 mask = 0; 335 for (int32 i = start % 32; i < 32 && numBlocks; i++, numBlocks--) 336 mask |= 1UL << (i % 32); 337 338 T(BlockChange("b-free", block, Block(block), 339 Block(block) & HOST_ENDIAN_TO_BFS_INT32(~mask))); 340 341 Block(block++) &= HOST_ENDIAN_TO_BFS_INT32(~mask); 342 start = 0; 343 } 344 } 345 346 347 // #pragma mark - 348 349 350 /*! The allocation groups are created and initialized in 351 BlockAllocator::Initialize() and BlockAllocator::InitializeAndClearBitmap() 352 respectively. 353 */ 354 AllocationGroup::AllocationGroup() 355 : 356 fFirstFree(-1), 357 fFreeBits(0), 358 fLargestValid(false) 359 { 360 } 361 362 363 void 364 AllocationGroup::AddFreeRange(int32 start, int32 blocks) 365 { 366 //D(if (blocks > 512) 367 // PRINT(("range of %ld blocks starting at %ld\n",blocks,start))); 368 369 if (fFirstFree == -1) 370 fFirstFree = start; 371 372 if (!fLargestValid || fLargestLength < blocks) { 373 fLargestStart = start; 374 fLargestLength = blocks; 375 fLargestValid = true; 376 } 377 378 fFreeBits += blocks; 379 } 380 381 382 /*! Allocates the specified run in the allocation group. 383 Doesn't check if the run is valid or already allocated partially, nor 384 does it maintain the free ranges hints or the volume's used blocks count. 385 It only does the low-level work of allocating some bits in the block bitmap. 386 Assumes that the block bitmap lock is hold. 387 */ 388 status_t 389 AllocationGroup::Allocate(Transaction& transaction, uint16 start, int32 length) 390 { 391 ASSERT(start + length <= (int32)fNumBits); 392 393 // Update the allocation group info 394 // TODO: this info will be incorrect if something goes wrong later 395 // Note, the fFirstFree block doesn't have to be really free 396 if (start == fFirstFree) 397 fFirstFree = start + length; 398 fFreeBits -= length; 399 400 if (fLargestValid) { 401 bool cut = false; 402 if (fLargestStart == start) { 403 // cut from start 404 fLargestStart += length; 405 fLargestLength -= length; 406 cut = true; 407 } else if (start > fLargestStart 408 && start < fLargestStart + fLargestLength) { 409 // cut from end 410 fLargestLength = start - fLargestStart; 411 cut = true; 412 } 413 if (cut && (fLargestLength < fLargestStart 414 || fLargestLength 415 < (int32)fNumBits - (fLargestStart + fLargestLength))) { 416 // might not be the largest block anymore 417 fLargestValid = false; 418 } 419 } 420 421 Volume* volume = transaction.GetVolume(); 422 423 // calculate block in the block bitmap and position within 424 uint32 bitsPerBlock = volume->BlockSize() << 3; 425 uint32 block = start / bitsPerBlock; 426 start = start % bitsPerBlock; 427 428 AllocationBlock cached(volume); 429 430 while (length > 0) { 431 if (cached.SetToWritable(transaction, *this, block) < B_OK) { 432 fLargestValid = false; 433 RETURN_ERROR(B_IO_ERROR); 434 } 435 436 uint32 numBlocks = length; 437 if (start + numBlocks > cached.NumBlockBits()) 438 numBlocks = cached.NumBlockBits() - start; 439 440 cached.Allocate(start, numBlocks); 441 442 length -= numBlocks; 443 start = 0; 444 block++; 445 } 446 447 return B_OK; 448 } 449 450 451 /*! Frees the specified run in the allocation group. 452 Doesn't check if the run is valid or was not completely allocated, nor 453 does it maintain the free ranges hints or the volume's used blocks count. 454 It only does the low-level work of freeing some bits in the block bitmap. 455 Assumes that the block bitmap lock is hold. 456 */ 457 status_t 458 AllocationGroup::Free(Transaction& transaction, uint16 start, int32 length) 459 { 460 ASSERT(start + length <= (int32)fNumBits); 461 462 // Update the allocation group info 463 // TODO: this info will be incorrect if something goes wrong later 464 if (fFirstFree > start) 465 fFirstFree = start; 466 fFreeBits += length; 467 468 // The range to be freed cannot be part of the valid largest range 469 ASSERT(!fLargestValid || start + length <= fLargestStart 470 || start > fLargestStart); 471 472 if (fLargestValid 473 && (start + length == fLargestStart 474 || fLargestStart + fLargestLength == start 475 || (start < fLargestStart && fLargestStart > fLargestLength) 476 || (start > fLargestStart 477 && (int32)fNumBits - (fLargestStart + fLargestLength) 478 > fLargestLength))) { 479 fLargestValid = false; 480 } 481 482 Volume* volume = transaction.GetVolume(); 483 484 // calculate block in the block bitmap and position within 485 uint32 bitsPerBlock = volume->BlockSize() << 3; 486 uint32 block = start / bitsPerBlock; 487 start = start % bitsPerBlock; 488 489 AllocationBlock cached(volume); 490 491 while (length > 0) { 492 if (cached.SetToWritable(transaction, *this, block) < B_OK) 493 RETURN_ERROR(B_IO_ERROR); 494 495 T(Block("free-1", block, cached.Block(), volume->BlockSize())); 496 uint16 freeLength = length; 497 if (uint32(start + length) > cached.NumBlockBits()) 498 freeLength = cached.NumBlockBits() - start; 499 500 cached.Free(start, freeLength); 501 502 length -= freeLength; 503 start = 0; 504 T(Block("free-2", block, cached.Block(), volume->BlockSize())); 505 block++; 506 } 507 return B_OK; 508 } 509 510 511 // #pragma mark - 512 513 514 BlockAllocator::BlockAllocator(Volume* volume) 515 : 516 fVolume(volume), 517 fGroups(NULL), 518 fCheckBitmap(NULL) 519 { 520 mutex_init(&fLock, "bfs allocator"); 521 } 522 523 524 BlockAllocator::~BlockAllocator() 525 { 526 mutex_destroy(&fLock); 527 delete[] fGroups; 528 } 529 530 531 status_t 532 BlockAllocator::Initialize(bool full) 533 { 534 fNumGroups = fVolume->AllocationGroups(); 535 fBlocksPerGroup = fVolume->SuperBlock().BlocksPerAllocationGroup(); 536 fNumBlocks = (fVolume->NumBlocks() + fVolume->BlockSize() * 8 - 1) 537 / (fVolume->BlockSize() * 8); 538 539 fGroups = new AllocationGroup[fNumGroups]; 540 if (fGroups == NULL) 541 return B_NO_MEMORY; 542 543 if (!full) 544 return B_OK; 545 546 mutex_lock(&fLock); 547 // the lock will be released by the _Initialize() method 548 549 thread_id id = spawn_kernel_thread((thread_func)BlockAllocator::_Initialize, 550 "bfs block allocator", B_LOW_PRIORITY, this); 551 if (id < B_OK) 552 return _Initialize(this); 553 554 mutex_transfer_lock(&fLock, id); 555 556 return resume_thread(id); 557 } 558 559 560 status_t 561 BlockAllocator::InitializeAndClearBitmap(Transaction& transaction) 562 { 563 status_t status = Initialize(false); 564 if (status != B_OK) 565 return status; 566 567 uint32 numBits = 8 * fBlocksPerGroup * fVolume->BlockSize(); 568 uint32 blockShift = fVolume->BlockShift(); 569 570 uint32* buffer = (uint32*)malloc(numBits >> 3); 571 if (buffer == NULL) 572 RETURN_ERROR(B_NO_MEMORY); 573 574 memset(buffer, 0, numBits >> 3); 575 576 off_t offset = 1; 577 // the bitmap starts directly after the super block 578 579 // initialize the AllocationGroup objects and clear the on-disk bitmap 580 581 for (int32 i = 0; i < fNumGroups; i++) { 582 if (write_pos(fVolume->Device(), offset << blockShift, buffer, 583 fBlocksPerGroup << blockShift) < B_OK) 584 return B_ERROR; 585 586 // the last allocation group may contain less blocks than the others 587 if (i == fNumGroups - 1) { 588 fGroups[i].fNumBits = fVolume->NumBlocks() - i * numBits; 589 fGroups[i].fNumBlocks = 1 + ((fGroups[i].NumBits() - 1) 590 >> (blockShift + 3)); 591 } else { 592 fGroups[i].fNumBits = numBits; 593 fGroups[i].fNumBlocks = fBlocksPerGroup; 594 } 595 fGroups[i].fStart = offset; 596 fGroups[i].fFirstFree = fGroups[i].fLargestStart = 0; 597 fGroups[i].fFreeBits = fGroups[i].fLargestLength = fGroups[i].fNumBits; 598 fGroups[i].fLargestValid = true; 599 600 offset += fBlocksPerGroup; 601 } 602 free(buffer); 603 604 // reserve the boot block, the log area, and the block bitmap itself 605 uint32 reservedBlocks = fVolume->Log().Start() + fVolume->Log().Length(); 606 607 if (fGroups[0].Allocate(transaction, 0, reservedBlocks) < B_OK) { 608 FATAL(("could not allocate reserved space for block bitmap/log!\n")); 609 return B_ERROR; 610 } 611 fVolume->SuperBlock().used_blocks 612 = HOST_ENDIAN_TO_BFS_INT64(reservedBlocks); 613 614 return B_OK; 615 } 616 617 618 status_t 619 BlockAllocator::_Initialize(BlockAllocator* allocator) 620 { 621 // The lock must already be held at this point 622 623 Volume* volume = allocator->fVolume; 624 uint32 blocks = allocator->fBlocksPerGroup; 625 uint32 blockShift = volume->BlockShift(); 626 off_t freeBlocks = 0; 627 628 uint32* buffer = (uint32*)malloc(blocks << blockShift); 629 if (buffer == NULL) { 630 mutex_unlock(&allocator->fLock); 631 RETURN_ERROR(B_NO_MEMORY); 632 } 633 634 AllocationGroup* groups = allocator->fGroups; 635 off_t offset = 1; 636 uint32 bitsPerGroup = 8 * (blocks << blockShift); 637 int32 numGroups = allocator->fNumGroups; 638 639 for (int32 i = 0; i < numGroups; i++) { 640 if (read_pos(volume->Device(), offset << blockShift, buffer, 641 blocks << blockShift) < B_OK) 642 break; 643 644 // the last allocation group may contain less blocks than the others 645 if (i == numGroups - 1) { 646 groups[i].fNumBits = volume->NumBlocks() - i * bitsPerGroup; 647 groups[i].fNumBlocks = 1 + ((groups[i].NumBits() - 1) 648 >> (blockShift + 3)); 649 } else { 650 groups[i].fNumBits = bitsPerGroup; 651 groups[i].fNumBlocks = blocks; 652 } 653 groups[i].fStart = offset; 654 655 // finds all free ranges in this allocation group 656 int32 start = -1, range = 0; 657 int32 numBits = groups[i].fNumBits, bit = 0; 658 int32 count = (numBits + 31) / 32; 659 660 for (int32 k = 0; k < count; k++) { 661 for (int32 j = 0; j < 32 && bit < numBits; j++, bit++) { 662 if (buffer[k] & (1UL << j)) { 663 // block is in use 664 if (range > 0) { 665 groups[i].AddFreeRange(start, range); 666 range = 0; 667 } 668 } else if (range++ == 0) { 669 // block is free, start new free range 670 start = bit; 671 } 672 } 673 } 674 if (range) 675 groups[i].AddFreeRange(start, range); 676 677 freeBlocks += groups[i].fFreeBits; 678 679 offset += blocks; 680 } 681 free(buffer); 682 683 // check if block bitmap and log area are reserved 684 uint32 reservedBlocks = volume->Log().Start() + volume->Log().Length(); 685 686 if (allocator->CheckBlocks(0, reservedBlocks) != B_OK) { 687 if (volume->IsReadOnly()) { 688 FATAL(("Space for block bitmap or log area is not reserved " 689 "(volume is mounted read-only)!\n")); 690 } else { 691 Transaction transaction(volume, 0); 692 if (groups[0].Allocate(transaction, 0, reservedBlocks) != B_OK) { 693 FATAL(("Could not allocate reserved space for block " 694 "bitmap/log!\n")); 695 volume->Panic(); 696 } else { 697 transaction.Done(); 698 FATAL(("Space for block bitmap or log area was not " 699 "reserved!\n")); 700 } 701 } 702 } 703 704 off_t usedBlocks = volume->NumBlocks() - freeBlocks; 705 if (volume->UsedBlocks() != usedBlocks) { 706 // If the disk in a dirty state at mount time, it's 707 // normal that the values don't match 708 INFORM(("volume reports %" B_PRIdOFF " used blocks, correct is %" 709 B_PRIdOFF "\n", volume->UsedBlocks(), usedBlocks)); 710 volume->SuperBlock().used_blocks = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 711 } 712 713 mutex_unlock(&allocator->fLock); 714 return B_OK; 715 } 716 717 718 void 719 BlockAllocator::Uninitialize() 720 { 721 // We only have to make sure that the initializer thread isn't running 722 // anymore. 723 mutex_lock(&fLock); 724 } 725 726 727 /*! Tries to allocate between \a minimum, and \a maximum blocks starting 728 at group \a groupIndex with offset \a start. The resulting allocation 729 is put into \a run. 730 731 The number of allocated blocks is always a multiple of \a minimum which 732 has to be a power of two value. 733 */ 734 status_t 735 BlockAllocator::AllocateBlocks(Transaction& transaction, int32 groupIndex, 736 uint16 start, uint16 maximum, uint16 minimum, block_run& run) 737 { 738 if (maximum == 0) 739 return B_BAD_VALUE; 740 741 FUNCTION_START(("group = %ld, start = %u, maximum = %u, minimum = %u\n", 742 groupIndex, start, maximum, minimum)); 743 744 AllocationBlock cached(fVolume); 745 MutexLocker lock(fLock); 746 747 uint32 bitsPerFullBlock = fVolume->BlockSize() << 3; 748 749 // Find the block_run that can fulfill the request best 750 int32 bestGroup = -1; 751 int32 bestStart = -1; 752 int32 bestLength = -1; 753 754 for (int32 i = 0; i < fNumGroups + 1; i++, groupIndex++, start = 0) { 755 groupIndex = groupIndex % fNumGroups; 756 AllocationGroup& group = fGroups[groupIndex]; 757 758 CHECK_ALLOCATION_GROUP(groupIndex); 759 760 if (start >= group.NumBits() || group.IsFull()) 761 continue; 762 763 // The wanted maximum is smaller than the largest free block in the 764 // group or already smaller than the minimum 765 766 if (start < group.fFirstFree) 767 start = group.fFirstFree; 768 769 if (group.fLargestValid) { 770 if (group.fLargestLength < bestLength) 771 continue; 772 773 if (group.fLargestStart >= start) { 774 if (group.fLargestLength >= bestLength) { 775 bestGroup = groupIndex; 776 bestStart = group.fLargestStart; 777 bestLength = group.fLargestLength; 778 779 if (bestLength >= maximum) 780 break; 781 } 782 783 // We know everything about this group we have to, let's skip 784 // to the next 785 continue; 786 } 787 } 788 789 // There may be more than one block per allocation group - and 790 // we iterate through it to find a place for the allocation. 791 // (one allocation can't exceed one allocation group) 792 793 uint32 block = start / (fVolume->BlockSize() << 3); 794 int32 currentStart = 0, currentLength = 0; 795 int32 groupLargestStart = -1; 796 int32 groupLargestLength = -1; 797 int32 currentBit = start; 798 bool canFindGroupLargest = start == 0; 799 800 for (; block < group.NumBlocks(); block++) { 801 if (cached.SetTo(group, block) < B_OK) 802 RETURN_ERROR(B_ERROR); 803 804 T(Block("alloc-in", group.Start() + block, cached.Block(), 805 fVolume->BlockSize(), groupIndex, currentStart)); 806 807 // find a block large enough to hold the allocation 808 for (uint32 bit = start % bitsPerFullBlock; 809 bit < cached.NumBlockBits(); bit++) { 810 if (!cached.IsUsed(bit)) { 811 if (currentLength == 0) { 812 // start new range 813 currentStart = currentBit; 814 } 815 816 // have we found a range large enough to hold numBlocks? 817 if (++currentLength >= maximum) { 818 bestGroup = groupIndex; 819 bestStart = currentStart; 820 bestLength = currentLength; 821 break; 822 } 823 } else { 824 if (currentLength) { 825 // end of a range 826 if (currentLength > bestLength) { 827 bestGroup = groupIndex; 828 bestStart = currentStart; 829 bestLength = currentLength; 830 } 831 if (currentLength > groupLargestLength) { 832 groupLargestStart = currentStart; 833 groupLargestLength = currentLength; 834 } 835 currentLength = 0; 836 } 837 if ((int32)group.NumBits() - currentBit 838 <= groupLargestLength) { 839 // We can't find a bigger block in this group anymore, 840 // let's skip the rest. 841 block = group.NumBlocks(); 842 break; 843 } 844 } 845 currentBit++; 846 } 847 848 T(Block("alloc-out", block, cached.Block(), 849 fVolume->BlockSize(), groupIndex, currentStart)); 850 851 if (bestLength >= maximum) { 852 canFindGroupLargest = false; 853 break; 854 } 855 856 // start from the beginning of the next block 857 start = 0; 858 } 859 860 if (currentBit == (int32)group.NumBits()) { 861 if (currentLength > bestLength) { 862 bestGroup = groupIndex; 863 bestStart = currentStart; 864 bestLength = currentLength; 865 } 866 if (canFindGroupLargest && currentLength > groupLargestLength) { 867 groupLargestStart = currentStart; 868 groupLargestLength = currentLength; 869 } 870 } 871 872 if (canFindGroupLargest && !group.fLargestValid 873 && groupLargestLength >= 0) { 874 group.fLargestStart = groupLargestStart; 875 group.fLargestLength = groupLargestLength; 876 group.fLargestValid = true; 877 } 878 879 if (bestLength >= maximum) 880 break; 881 } 882 883 // If we found a suitable range, mark the blocks as in use, and 884 // write the updated block bitmap back to disk 885 if (bestLength < minimum) 886 return B_DEVICE_FULL; 887 888 if (bestLength > maximum) 889 bestLength = maximum; 890 else if (minimum > 1) { 891 // make sure bestLength is a multiple of minimum 892 bestLength = round_down(bestLength, minimum); 893 } 894 895 if (fGroups[bestGroup].Allocate(transaction, bestStart, bestLength) != B_OK) 896 RETURN_ERROR(B_IO_ERROR); 897 898 CHECK_ALLOCATION_GROUP(bestGroup); 899 900 run.allocation_group = HOST_ENDIAN_TO_BFS_INT32(bestGroup); 901 run.start = HOST_ENDIAN_TO_BFS_INT16(bestStart); 902 run.length = HOST_ENDIAN_TO_BFS_INT16(bestLength); 903 904 fVolume->SuperBlock().used_blocks 905 = HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() + bestLength); 906 // We are not writing back the disk's super block - it's 907 // either done by the journaling code, or when the disk 908 // is unmounted. 909 // If the value is not correct at mount time, it will be 910 // fixed anyway. 911 912 // We need to flush any remaining blocks in the new allocation to make sure 913 // they won't interfere with the file cache. 914 block_cache_discard(fVolume->BlockCache(), fVolume->ToBlock(run), 915 run.Length()); 916 917 T(Allocate(run)); 918 return B_OK; 919 } 920 921 922 status_t 923 BlockAllocator::AllocateForInode(Transaction& transaction, 924 const block_run* parent, mode_t type, block_run& run) 925 { 926 // Apply some allocation policies here (AllocateBlocks() will break them 927 // if necessary) - we will start with those described in Dominic Giampaolo's 928 // "Practical File System Design", and see how good they work 929 930 // Files are going in the same allocation group as its parent, 931 // sub-directories will be inserted 8 allocation groups after 932 // the one of the parent 933 uint16 group = parent->AllocationGroup(); 934 if ((type & (S_DIRECTORY | S_INDEX_DIR | S_ATTR_DIR)) == S_DIRECTORY) 935 group += 8; 936 937 return AllocateBlocks(transaction, group, 0, 1, 1, run); 938 } 939 940 941 status_t 942 BlockAllocator::Allocate(Transaction& transaction, Inode* inode, 943 off_t numBlocks, block_run& run, uint16 minimum) 944 { 945 if (numBlocks <= 0) 946 return B_ERROR; 947 948 // one block_run can't hold more data than there is in one allocation group 949 if (numBlocks > fGroups[0].NumBits()) 950 numBlocks = fGroups[0].NumBits(); 951 952 // since block_run.length is uint16, the largest number of blocks that 953 // can be covered by a block_run is 65535 954 // TODO: if we drop compatibility, couldn't we do this any better? 955 // There are basically two possibilities: 956 // a) since a length of zero doesn't have any sense, take that for 65536 - 957 // but that could cause many problems (bugs) in other areas 958 // b) reduce the maximum amount of blocks per block_run, so that the 959 // remaining number of free blocks can be used in a useful manner 960 // (like 4 blocks) - but that would also reduce the maximum file size 961 // c) have BlockRun::Length() return (length + 1). 962 if (numBlocks > MAX_BLOCK_RUN_LENGTH) 963 numBlocks = MAX_BLOCK_RUN_LENGTH; 964 965 // Apply some allocation policies here (AllocateBlocks() will break them 966 // if necessary) 967 uint16 group = inode->BlockRun().AllocationGroup(); 968 uint16 start = 0; 969 970 // Are there already allocated blocks? (then just try to allocate near the 971 // last one) 972 if (inode->Size() > 0) { 973 const data_stream& data = inode->Node().data; 974 // TODO: we currently don't care for when the data stream 975 // is already grown into the indirect ranges 976 if (data.max_double_indirect_range == 0 977 && data.max_indirect_range == 0) { 978 // Since size > 0, there must be a valid block run in this stream 979 int32 last = 0; 980 for (; last < NUM_DIRECT_BLOCKS - 1; last++) 981 if (data.direct[last + 1].IsZero()) 982 break; 983 984 group = data.direct[last].AllocationGroup(); 985 start = data.direct[last].Start() + data.direct[last].Length(); 986 } 987 } else if (inode->IsContainer() || inode->IsSymLink()) { 988 // directory and symbolic link data will go in the same allocation 989 // group as the inode is in but after the inode data 990 start = inode->BlockRun().Start(); 991 } else { 992 // file data will start in the next allocation group 993 group = inode->BlockRun().AllocationGroup() + 1; 994 } 995 996 return AllocateBlocks(transaction, group, start, numBlocks, minimum, run); 997 } 998 999 1000 status_t 1001 BlockAllocator::Free(Transaction& transaction, block_run run) 1002 { 1003 MutexLocker lock(fLock); 1004 1005 int32 group = run.AllocationGroup(); 1006 uint16 start = run.Start(); 1007 uint16 length = run.Length(); 1008 1009 FUNCTION_START(("group = %ld, start = %u, length = %u\n", group, start, 1010 length)); 1011 T(Free(run)); 1012 1013 // doesn't use Volume::IsValidBlockRun() here because it can check better 1014 // against the group size (the last group may have a different length) 1015 if (group < 0 || group >= fNumGroups 1016 || start > fGroups[group].NumBits() 1017 || uint32(start + length) > fGroups[group].NumBits() 1018 || length == 0) { 1019 FATAL(("tried to free an invalid block_run (%d, %u, %u)\n", (int)group, 1020 start, length)); 1021 DEBUGGER(("tried to free invalid block_run")); 1022 return B_BAD_VALUE; 1023 } 1024 // check if someone tries to free reserved areas at the beginning of the 1025 // drive 1026 if (group == 0 1027 && start < uint32(fVolume->Log().Start() + fVolume->Log().Length())) { 1028 FATAL(("tried to free a reserved block_run (%d, %u, %u)\n", (int)group, 1029 start, length)); 1030 DEBUGGER(("tried to free reserved block")); 1031 return B_BAD_VALUE; 1032 } 1033 #ifdef DEBUG 1034 if (CheckBlockRun(run) != B_OK) 1035 return B_BAD_DATA; 1036 #endif 1037 1038 CHECK_ALLOCATION_GROUP(group); 1039 1040 if (fGroups[group].Free(transaction, start, length) != B_OK) 1041 RETURN_ERROR(B_IO_ERROR); 1042 1043 CHECK_ALLOCATION_GROUP(group); 1044 1045 #ifdef DEBUG 1046 if (CheckBlockRun(run, NULL, NULL, false) != B_OK) { 1047 DEBUGGER(("CheckBlockRun() reports allocated blocks (which were just " 1048 "freed)\n")); 1049 } 1050 #endif 1051 1052 fVolume->SuperBlock().used_blocks = 1053 HOST_ENDIAN_TO_BFS_INT64(fVolume->UsedBlocks() - run.Length()); 1054 return B_OK; 1055 } 1056 1057 1058 size_t 1059 BlockAllocator::BitmapSize() const 1060 { 1061 return fVolume->BlockSize() * fNumBlocks; 1062 } 1063 1064 1065 #ifdef DEBUG_FRAGMENTER 1066 void 1067 BlockAllocator::Fragment() 1068 { 1069 AllocationBlock cached(fVolume); 1070 MutexLocker lock(fLock); 1071 1072 // only leave 4 block holes 1073 static const uint32 kMask = 0x0f0f0f0f; 1074 uint32 valuesPerBlock = fVolume->BlockSize() / 4; 1075 1076 for (int32 i = 0; i < fNumGroups; i++) { 1077 AllocationGroup& group = fGroups[i]; 1078 1079 for (uint32 block = 0; block < group.NumBlocks(); block++) { 1080 Transaction transaction(fVolume, 0); 1081 1082 if (cached.SetToWritable(transaction, group, block) != B_OK) 1083 return; 1084 1085 for (int32 index = 0; index < valuesPerBlock; index++) { 1086 cached.Block(index) |= HOST_ENDIAN_TO_BFS_INT32(kMask); 1087 } 1088 1089 transaction.Done(); 1090 } 1091 } 1092 } 1093 #endif // DEBUG_FRAGMENTER 1094 1095 1096 #ifdef DEBUG_ALLOCATION_GROUPS 1097 void 1098 BlockAllocator::_CheckGroup(int32 groupIndex) const 1099 { 1100 AllocationBlock cached(fVolume); 1101 ASSERT_LOCKED_MUTEX(&fLock); 1102 1103 AllocationGroup& group = fGroups[groupIndex]; 1104 1105 int32 currentStart = 0, currentLength = 0; 1106 int32 firstFree = -1; 1107 int32 largestStart = -1; 1108 int32 largestLength = 0; 1109 int32 currentBit = 0; 1110 1111 for (uint32 block = 0; block < group.NumBlocks(); block++) { 1112 if (cached.SetTo(group, block) < B_OK) { 1113 panic("setting group block %d failed\n", (int)block); 1114 return; 1115 } 1116 1117 for (uint32 bit = 0; bit < cached.NumBlockBits(); bit++) { 1118 if (!cached.IsUsed(bit)) { 1119 if (firstFree < 0) { 1120 firstFree = currentBit; 1121 if (!group.fLargestValid) { 1122 if (firstFree >= 0 && firstFree < group.fFirstFree) { 1123 // mostly harmless but noteworthy 1124 dprintf("group %d first free too late: should be " 1125 "%d, is %d\n", (int)groupIndex, (int)firstFree, 1126 (int)group.fFirstFree); 1127 } 1128 return; 1129 } 1130 } 1131 1132 if (currentLength == 0) { 1133 // start new range 1134 currentStart = currentBit; 1135 } 1136 currentLength++; 1137 } else if (currentLength) { 1138 // end of a range 1139 if (currentLength > largestLength) { 1140 largestStart = currentStart; 1141 largestLength = currentLength; 1142 } 1143 currentLength = 0; 1144 } 1145 currentBit++; 1146 } 1147 } 1148 1149 if (currentLength > largestLength) { 1150 largestStart = currentStart; 1151 largestLength = currentLength; 1152 } 1153 1154 if (firstFree >= 0 && firstFree < group.fFirstFree) { 1155 // mostly harmless but noteworthy 1156 dprintf("group %d first free too late: should be %d, is %d\n", 1157 (int)groupIndex, (int)firstFree, (int)group.fFirstFree); 1158 } 1159 if (group.fLargestValid 1160 && (largestStart != group.fLargestStart 1161 || largestLength != group.fLargestLength)) { 1162 panic("bfs %p: group %d largest differs: %d.%d, checked %d.%d.\n", 1163 fVolume, (int)groupIndex, (int)group.fLargestStart, 1164 (int)group.fLargestLength, (int)largestStart, (int)largestLength); 1165 } 1166 } 1167 #endif // DEBUG_ALLOCATION_GROUPS 1168 1169 1170 // #pragma mark - Bitmap validity checking 1171 1172 // TODO: implement new FS checking API 1173 // Functions to check the validity of the bitmap - they are used from 1174 // the "checkfs" command (since this does even a bit more, maybe we should 1175 // move this some place else?) 1176 1177 1178 bool 1179 BlockAllocator::_IsValidCheckControl(check_control* control) 1180 { 1181 if (control == NULL 1182 || control->magic != BFS_IOCTL_CHECK_MAGIC) { 1183 FATAL(("invalid check_control (%p)!\n", control)); 1184 return false; 1185 } 1186 1187 return true; 1188 } 1189 1190 1191 status_t 1192 BlockAllocator::StartChecking(check_control* control) 1193 { 1194 if (!_IsValidCheckControl(control)) 1195 return B_BAD_VALUE; 1196 1197 fVolume->GetJournal(0)->Lock(NULL, true); 1198 // Lock the volume's journal 1199 1200 status_t status = mutex_lock(&fLock); 1201 if (status != B_OK) { 1202 fVolume->GetJournal(0)->Unlock(NULL, true); 1203 return status; 1204 } 1205 1206 size_t size = BitmapSize(); 1207 fCheckBitmap = (uint32*)malloc(size); 1208 if (fCheckBitmap == NULL) { 1209 mutex_unlock(&fLock); 1210 fVolume->GetJournal(0)->Unlock(NULL, true); 1211 return B_NO_MEMORY; 1212 } 1213 1214 check_cookie* cookie = new check_cookie(); 1215 if (cookie == NULL) { 1216 free(fCheckBitmap); 1217 fCheckBitmap = NULL; 1218 mutex_unlock(&fLock); 1219 fVolume->GetJournal(0)->Unlock(NULL, true); 1220 1221 return B_NO_MEMORY; 1222 } 1223 1224 // initialize bitmap 1225 memset(fCheckBitmap, 0, size); 1226 for (int32 block = fVolume->Log().Start() + fVolume->Log().Length(); 1227 block-- > 0;) { 1228 _SetCheckBitmapAt(block); 1229 } 1230 1231 cookie->stack.Push(fVolume->Root()); 1232 cookie->stack.Push(fVolume->Indices()); 1233 cookie->iterator = NULL; 1234 control->cookie = cookie; 1235 1236 fCheckCookie = cookie; 1237 // to be able to restore nicely if "chkbfs" exited abnormally 1238 1239 // Put removed vnodes to the stack -- they are not reachable by traversing 1240 // the file system anymore. 1241 InodeList::Iterator iterator = fVolume->RemovedInodes().GetIterator(); 1242 while (Inode* inode = iterator.Next()) { 1243 cookie->stack.Push(inode->BlockRun()); 1244 } 1245 1246 // TODO: check reserved area in bitmap! 1247 1248 return B_OK; 1249 } 1250 1251 1252 status_t 1253 BlockAllocator::StopChecking(check_control* control) 1254 { 1255 check_cookie* cookie; 1256 if (control == NULL) 1257 cookie = fCheckCookie; 1258 else 1259 cookie = (check_cookie*)control->cookie; 1260 1261 if (cookie == NULL) 1262 return B_ERROR; 1263 1264 if (cookie->iterator != NULL) { 1265 delete cookie->iterator; 1266 cookie->iterator = NULL; 1267 1268 // the current directory inode is still locked in memory 1269 put_vnode(fVolume->FSVolume(), fVolume->ToVnode(cookie->current)); 1270 } 1271 1272 if (fVolume->IsReadOnly()) { 1273 // We can't fix errors on this volume 1274 control->flags &= ~BFS_FIX_BITMAP_ERRORS; 1275 } 1276 1277 // if CheckNextNode() could completely work through, we can 1278 // fix any damages of the bitmap 1279 if (control != NULL && control->status == B_ENTRY_NOT_FOUND) { 1280 // calculate the number of used blocks in the check bitmap 1281 size_t size = BitmapSize(); 1282 off_t usedBlocks = 0LL; 1283 1284 // TODO: update the allocation groups used blocks info 1285 for (uint32 i = size >> 2; i-- > 0;) { 1286 uint32 compare = 1; 1287 // Count the number of bits set 1288 for (int16 j = 0; j < 32; j++, compare <<= 1) { 1289 if ((compare & fCheckBitmap[i]) != 0) 1290 usedBlocks++; 1291 } 1292 } 1293 1294 control->stats.freed = fVolume->UsedBlocks() - usedBlocks 1295 + control->stats.missing; 1296 if (control->stats.freed < 0) 1297 control->stats.freed = 0; 1298 1299 // Should we fix errors? Were there any errors we can fix? 1300 if ((control->flags & BFS_FIX_BITMAP_ERRORS) != 0 1301 && (control->stats.freed != 0 || control->stats.missing != 0)) { 1302 // If so, write the check bitmap back over the original one, 1303 // and use transactions here to play safe - we even use several 1304 // transactions, so that we don't blow the maximum log size 1305 // on large disks, since we don't need to make this atomic. 1306 #if 0 1307 // prints the blocks that differ 1308 off_t block = 0; 1309 for (int32 i = 0; i < fNumGroups; i++) { 1310 AllocationBlock cached(fVolume); 1311 for (uint32 j = 0; j < fGroups[i].NumBlocks(); j++) { 1312 cached.SetTo(fGroups[i], j); 1313 for (uint32 k = 0; k < cached.NumBlockBits(); k++) { 1314 if (cached.IsUsed(k) != _CheckBitmapIsUsedAt(block)) { 1315 dprintf("differ block %lld (should be %d)\n", block, 1316 _CheckBitmapIsUsedAt(block)); 1317 } 1318 block++; 1319 } 1320 } 1321 } 1322 #endif 1323 1324 fVolume->SuperBlock().used_blocks 1325 = HOST_ENDIAN_TO_BFS_INT64(usedBlocks); 1326 1327 int32 blocksInBitmap = fNumGroups * fBlocksPerGroup; 1328 size_t blockSize = fVolume->BlockSize(); 1329 1330 for (int32 i = 0; i < blocksInBitmap; i += 512) { 1331 Transaction transaction(fVolume, 1 + i); 1332 1333 int32 blocksToWrite = 512; 1334 if (blocksToWrite + i > blocksInBitmap) 1335 blocksToWrite = blocksInBitmap - i; 1336 1337 status_t status = transaction.WriteBlocks(1 + i, 1338 (uint8*)fCheckBitmap + i * blockSize, blocksToWrite); 1339 if (status < B_OK) { 1340 FATAL(("error writing bitmap: %s\n", strerror(status))); 1341 break; 1342 } 1343 transaction.Done(); 1344 } 1345 } 1346 } else 1347 FATAL(("BlockAllocator::CheckNextNode() didn't run through\n")); 1348 1349 fVolume->SetCheckingThread(-1); 1350 1351 free(fCheckBitmap); 1352 fCheckBitmap = NULL; 1353 fCheckCookie = NULL; 1354 delete cookie; 1355 mutex_unlock(&fLock); 1356 fVolume->GetJournal(0)->Unlock(NULL, true); 1357 1358 return B_OK; 1359 } 1360 1361 1362 status_t 1363 BlockAllocator::CheckNextNode(check_control* control) 1364 { 1365 if (!_IsValidCheckControl(control)) 1366 return B_BAD_VALUE; 1367 1368 check_cookie* cookie = (check_cookie*)control->cookie; 1369 fVolume->SetCheckingThread(find_thread(NULL)); 1370 1371 while (true) { 1372 if (cookie->iterator == NULL) { 1373 if (!cookie->stack.Pop(&cookie->current)) { 1374 // no more runs on the stack, we are obviously finished! 1375 control->status = B_ENTRY_NOT_FOUND; 1376 return B_ENTRY_NOT_FOUND; 1377 } 1378 1379 Vnode vnode(fVolume, cookie->current); 1380 Inode* inode; 1381 if (vnode.Get(&inode) != B_OK) { 1382 FATAL(("check: Could not open inode at %" B_PRIdOFF "\n", 1383 fVolume->ToBlock(cookie->current))); 1384 continue; 1385 } 1386 1387 control->inode = inode->ID(); 1388 control->mode = inode->Mode(); 1389 1390 if (!inode->IsContainer()) { 1391 // Check file 1392 control->errors = 0; 1393 control->status = CheckInode(inode, control); 1394 1395 if (inode->GetName(control->name) < B_OK) 1396 strcpy(control->name, "(node has no name)"); 1397 1398 return B_OK; 1399 } 1400 1401 // get iterator for the next directory 1402 1403 BPlusTree* tree = inode->Tree(); 1404 if (tree == NULL) { 1405 FATAL(("check: could not open b+tree from inode at %" B_PRIdOFF 1406 "\n", fVolume->ToBlock(cookie->current))); 1407 continue; 1408 } 1409 1410 cookie->parent = inode; 1411 cookie->parent_mode = inode->Mode(); 1412 1413 cookie->iterator = new TreeIterator(tree); 1414 if (cookie->iterator == NULL) 1415 RETURN_ERROR(B_NO_MEMORY); 1416 1417 // the inode must stay locked in memory until the iterator is freed 1418 vnode.Keep(); 1419 1420 // check the inode of the directory 1421 control->errors = 0; 1422 control->status = CheckInode(inode, control); 1423 1424 if (inode->GetName(control->name) < B_OK) 1425 strcpy(control->name, "(dir has no name)"); 1426 1427 return B_OK; 1428 } 1429 1430 char name[B_FILE_NAME_LENGTH]; 1431 uint16 length; 1432 ino_t id; 1433 1434 status_t status = cookie->iterator->GetNextEntry(name, &length, 1435 B_FILE_NAME_LENGTH, &id); 1436 if (status == B_ENTRY_NOT_FOUND) { 1437 // there are no more entries in this iterator, free it and go on 1438 delete cookie->iterator; 1439 cookie->iterator = NULL; 1440 1441 // unlock the directory's inode from memory 1442 put_vnode(fVolume->FSVolume(), fVolume->ToVnode(cookie->current)); 1443 1444 continue; 1445 } else if (status == B_OK) { 1446 // ignore "." and ".." entries 1447 if (!strcmp(name, ".") || !strcmp(name, "..")) 1448 continue; 1449 1450 // fill in the control data as soon as we have them 1451 strlcpy(control->name, name, B_FILE_NAME_LENGTH); 1452 control->inode = id; 1453 control->errors = 0; 1454 1455 Vnode vnode(fVolume, id); 1456 Inode* inode; 1457 if (vnode.Get(&inode) != B_OK) { 1458 FATAL(("Could not open inode ID %" B_PRIdINO "!\n", id)); 1459 control->errors |= BFS_COULD_NOT_OPEN; 1460 1461 if ((control->flags & BFS_REMOVE_INVALID) != 0) { 1462 status = _RemoveInvalidNode(cookie->parent, 1463 cookie->iterator->Tree(), NULL, name); 1464 } else 1465 status = B_ERROR; 1466 1467 control->status = status; 1468 return B_OK; 1469 } 1470 1471 // check if the inode's name is the same as in the b+tree 1472 if (inode->IsRegularNode()) { 1473 RecursiveLocker locker(inode->SmallDataLock()); 1474 NodeGetter node(fVolume, inode); 1475 1476 const char* localName = inode->Name(node.Node()); 1477 if (localName == NULL || strcmp(localName, name)) { 1478 control->errors |= BFS_NAMES_DONT_MATCH; 1479 FATAL(("Names differ: tree \"%s\", inode \"%s\"\n", name, 1480 localName)); 1481 1482 if ((control->flags & BFS_FIX_NAME_MISMATCHES) != 0) { 1483 // Rename the inode 1484 Transaction transaction(fVolume, inode->BlockNumber()); 1485 1486 status = inode->SetName(transaction, name); 1487 if (status == B_OK) 1488 status = inode->WriteBack(transaction); 1489 if (status == B_OK) 1490 status = transaction.Done(); 1491 if (status != B_OK) { 1492 control->status = status; 1493 return B_OK; 1494 } 1495 } 1496 } 1497 } 1498 1499 control->mode = inode->Mode(); 1500 1501 // Check for the correct mode of the node (if the mode of the 1502 // file don't fit to its parent, there is a serious problem) 1503 if (((cookie->parent_mode & S_ATTR_DIR) != 0 1504 && !inode->IsAttribute()) 1505 || ((cookie->parent_mode & S_INDEX_DIR) != 0 1506 && !inode->IsIndex()) 1507 || (is_directory(cookie->parent_mode) 1508 && !inode->IsRegularNode())) { 1509 FATAL(("inode at %" B_PRIdOFF " is of wrong type: %o (parent " 1510 "%o at %" B_PRIdOFF ")!\n", inode->BlockNumber(), 1511 inode->Mode(), cookie->parent_mode, 1512 cookie->parent->BlockNumber())); 1513 1514 // if we are allowed to fix errors, we should remove the file 1515 if ((control->flags & BFS_REMOVE_WRONG_TYPES) != 0 1516 && (control->flags & BFS_FIX_BITMAP_ERRORS) != 0) { 1517 status = _RemoveInvalidNode(cookie->parent, NULL, inode, 1518 name); 1519 } else 1520 status = B_ERROR; 1521 1522 control->errors |= BFS_WRONG_TYPE; 1523 control->status = status; 1524 return B_OK; 1525 } 1526 1527 // push the directory on the stack so that it will be scanned later 1528 if (inode->IsContainer() && !inode->IsIndex()) 1529 cookie->stack.Push(inode->BlockRun()); 1530 else { 1531 // check it now 1532 control->status = CheckInode(inode, control); 1533 1534 return B_OK; 1535 } 1536 } 1537 } 1538 // is never reached 1539 } 1540 1541 1542 status_t 1543 BlockAllocator::_RemoveInvalidNode(Inode* parent, BPlusTree* tree, Inode* inode, 1544 const char* name) 1545 { 1546 // it's safe to start a transaction, because Inode::Remove() 1547 // won't touch the block bitmap (which we hold the lock for) 1548 // if we set the INODE_DONT_FREE_SPACE flag - since we fix 1549 // the bitmap anyway 1550 Transaction transaction(fVolume, parent->BlockNumber()); 1551 status_t status; 1552 1553 if (inode != NULL) { 1554 inode->Node().flags |= HOST_ENDIAN_TO_BFS_INT32(INODE_DONT_FREE_SPACE); 1555 1556 status = parent->Remove(transaction, name, NULL, false, true); 1557 } else { 1558 parent->WriteLockInTransaction(transaction); 1559 1560 // does the file even exist? 1561 off_t id; 1562 status = tree->Find((uint8*)name, (uint16)strlen(name), &id); 1563 if (status == B_OK) 1564 status = tree->Remove(transaction, name, id); 1565 } 1566 1567 if (status == B_OK) 1568 transaction.Done(); 1569 1570 return status; 1571 } 1572 1573 1574 bool 1575 BlockAllocator::_CheckBitmapIsUsedAt(off_t block) const 1576 { 1577 size_t size = BitmapSize(); 1578 uint32 index = block / 32; // 32bit resolution 1579 if (index > size / 4) 1580 return false; 1581 1582 return BFS_ENDIAN_TO_HOST_INT32(fCheckBitmap[index]) 1583 & (1UL << (block & 0x1f)); 1584 } 1585 1586 1587 void 1588 BlockAllocator::_SetCheckBitmapAt(off_t block) 1589 { 1590 size_t size = BitmapSize(); 1591 uint32 index = block / 32; // 32bit resolution 1592 if (index > size / 4) 1593 return; 1594 1595 fCheckBitmap[index] |= HOST_ENDIAN_TO_BFS_INT32(1UL << (block & 0x1f)); 1596 } 1597 1598 1599 /*! Checks whether or not the specified block range is allocated or not, 1600 depending on the \a allocated argument. 1601 */ 1602 status_t 1603 BlockAllocator::CheckBlocks(off_t start, off_t length, bool allocated) 1604 { 1605 if (start < 0 || start + length > fVolume->NumBlocks()) 1606 return B_BAD_VALUE; 1607 1608 uint32 group = start >> fVolume->AllocationGroupShift(); 1609 uint32 groupBlock = start / (fVolume->BlockSize() << 3); 1610 uint32 blockOffset = start % fVolume->BlockSize(); 1611 1612 AllocationBlock cached(fVolume); 1613 1614 while (groupBlock < fGroups[group].NumBlocks() && length > 0) { 1615 if (cached.SetTo(fGroups[group], groupBlock) != B_OK) 1616 RETURN_ERROR(B_IO_ERROR); 1617 1618 for (; blockOffset < cached.NumBlockBits() && length > 0; 1619 blockOffset++, length--) { 1620 if (cached.IsUsed(blockOffset) != allocated) { 1621 RETURN_ERROR(B_BAD_DATA); 1622 } 1623 } 1624 1625 blockOffset = 0; 1626 1627 if (++groupBlock >= fGroups[group].NumBlocks()) { 1628 groupBlock = 0; 1629 group++; 1630 } 1631 } 1632 1633 return B_OK; 1634 } 1635 1636 1637 status_t 1638 BlockAllocator::CheckBlockRun(block_run run, const char* type, 1639 check_control* control, bool allocated) 1640 { 1641 if (run.AllocationGroup() < 0 || run.AllocationGroup() >= fNumGroups 1642 || run.Start() > fGroups[run.AllocationGroup()].fNumBits 1643 || uint32(run.Start() + run.Length()) 1644 > fGroups[run.AllocationGroup()].fNumBits 1645 || run.length == 0) { 1646 PRINT(("%s: block_run(%ld, %u, %u) is invalid!\n", type, 1647 run.AllocationGroup(), run.Start(), run.Length())); 1648 if (control == NULL) 1649 return B_BAD_DATA; 1650 1651 control->errors |= BFS_INVALID_BLOCK_RUN; 1652 return B_OK; 1653 } 1654 1655 uint32 bitsPerBlock = fVolume->BlockSize() << 3; 1656 uint32 block = run.Start() / bitsPerBlock; 1657 uint32 pos = run.Start() % bitsPerBlock; 1658 int32 length = 0; 1659 off_t firstMissing = -1, firstSet = -1; 1660 off_t firstGroupBlock 1661 = (off_t)run.AllocationGroup() << fVolume->AllocationGroupShift(); 1662 1663 AllocationBlock cached(fVolume); 1664 1665 for (; block < fBlocksPerGroup && length < run.Length(); block++, pos = 0) { 1666 if (cached.SetTo(fGroups[run.AllocationGroup()], block) < B_OK) 1667 RETURN_ERROR(B_IO_ERROR); 1668 1669 if (pos >= cached.NumBlockBits()) { 1670 // something very strange has happened... 1671 RETURN_ERROR(B_ERROR); 1672 } 1673 1674 while (length < run.Length() && pos < cached.NumBlockBits()) { 1675 if (cached.IsUsed(pos) != allocated) { 1676 if (control == NULL) { 1677 PRINT(("%s: block_run(%ld, %u, %u) is only partially " 1678 "allocated (pos = %ld, length = %ld)!\n", type, 1679 run.AllocationGroup(), run.Start(), run.Length(), 1680 pos, length)); 1681 return B_BAD_DATA; 1682 } 1683 if (firstMissing == -1) { 1684 firstMissing = firstGroupBlock + pos + block * bitsPerBlock; 1685 control->errors |= BFS_MISSING_BLOCKS; 1686 } 1687 control->stats.missing++; 1688 } else if (firstMissing != -1) { 1689 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are " 1690 "%sallocated!\n", type, run.AllocationGroup(), run.Start(), 1691 run.Length(), firstMissing, 1692 firstGroupBlock + pos + block * bitsPerBlock - 1, 1693 allocated ? "not " : "")); 1694 firstMissing = -1; 1695 } 1696 1697 if (fCheckBitmap != NULL) { 1698 // Set the block in the check bitmap as well, but have a look 1699 // if it is already allocated first 1700 uint32 offset = pos + block * bitsPerBlock; 1701 if (_CheckBitmapIsUsedAt(firstGroupBlock + offset)) { 1702 if (firstSet == -1) { 1703 firstSet = firstGroupBlock + offset; 1704 control->errors |= BFS_BLOCKS_ALREADY_SET; 1705 dprintf("block %" B_PRIdOFF " is already set!!!\n", 1706 firstGroupBlock + offset); 1707 } 1708 control->stats.already_set++; 1709 } else { 1710 if (firstSet != -1) { 1711 FATAL(("%s: block_run(%d, %u, %u): blocks %" B_PRIdOFF 1712 " - %" B_PRIdOFF " are already set!\n", type, 1713 (int)run.AllocationGroup(), run.Start(), 1714 run.Length(), firstSet, 1715 firstGroupBlock + offset - 1)); 1716 firstSet = -1; 1717 } 1718 _SetCheckBitmapAt(firstGroupBlock + offset); 1719 } 1720 } 1721 length++; 1722 pos++; 1723 } 1724 1725 if (block + 1 >= fBlocksPerGroup || length >= run.Length()) { 1726 if (firstMissing != -1) { 1727 PRINT(("%s: block_run(%ld, %u, %u): blocks %Ld - %Ld are not " 1728 "allocated!\n", type, run.AllocationGroup(), run.Start(), 1729 run.Length(), firstMissing, 1730 firstGroupBlock + pos + block * bitsPerBlock - 1)); 1731 } 1732 if (firstSet != -1) { 1733 FATAL(("%s: block_run(%d, %u, %u): blocks %" B_PRIdOFF " - %" 1734 B_PRIdOFF " are already set!\n", type, 1735 (int)run.AllocationGroup(), run.Start(), run.Length(), 1736 firstSet, 1737 firstGroupBlock + pos + block * bitsPerBlock - 1)); 1738 } 1739 } 1740 } 1741 1742 return B_OK; 1743 } 1744 1745 1746 status_t 1747 BlockAllocator::CheckInode(Inode* inode, check_control* control) 1748 { 1749 if (control != NULL && fCheckBitmap == NULL) 1750 return B_NO_INIT; 1751 if (inode == NULL) 1752 return B_BAD_VALUE; 1753 1754 status_t status = CheckBlockRun(inode->BlockRun(), "inode", control); 1755 if (status != B_OK) 1756 return status; 1757 1758 // If the inode has an attribute directory, push it on the stack 1759 if (!inode->Attributes().IsZero()) { 1760 check_cookie* cookie = (check_cookie*)control->cookie; 1761 cookie->stack.Push(inode->Attributes()); 1762 } 1763 1764 if (inode->IsSymLink() && (inode->Flags() & INODE_LONG_SYMLINK) == 0) { 1765 // symlinks may not have a valid data stream 1766 if (strlen(inode->Node().short_symlink) >= SHORT_SYMLINK_NAME_LENGTH) 1767 return B_BAD_DATA; 1768 1769 return B_OK; 1770 } 1771 1772 data_stream* data = &inode->Node().data; 1773 1774 // check the direct range 1775 1776 if (data->max_direct_range) { 1777 for (int32 i = 0; i < NUM_DIRECT_BLOCKS; i++) { 1778 if (data->direct[i].IsZero()) 1779 break; 1780 1781 status = CheckBlockRun(data->direct[i], "direct", control); 1782 if (status < B_OK) 1783 return status; 1784 } 1785 } 1786 1787 CachedBlock cached(fVolume); 1788 1789 // check the indirect range 1790 1791 if (data->max_indirect_range) { 1792 status = CheckBlockRun(data->indirect, "indirect", control); 1793 if (status < B_OK) 1794 return status; 1795 1796 off_t block = fVolume->ToBlock(data->indirect); 1797 1798 for (int32 i = 0; i < data->indirect.Length(); i++) { 1799 block_run* runs = (block_run*)cached.SetTo(block + i); 1800 if (runs == NULL) 1801 RETURN_ERROR(B_IO_ERROR); 1802 1803 int32 runsPerBlock = fVolume->BlockSize() / sizeof(block_run); 1804 int32 index = 0; 1805 for (; index < runsPerBlock; index++) { 1806 if (runs[index].IsZero()) 1807 break; 1808 1809 status = CheckBlockRun(runs[index], "indirect->run", control); 1810 if (status < B_OK) 1811 return status; 1812 } 1813 if (index < runsPerBlock) 1814 break; 1815 } 1816 } 1817 1818 // check the double indirect range 1819 1820 if (data->max_double_indirect_range) { 1821 status = CheckBlockRun(data->double_indirect, "double indirect", 1822 control); 1823 if (status != B_OK) 1824 return status; 1825 1826 int32 runsPerBlock = runs_per_block(fVolume->BlockSize()); 1827 int32 runsPerArray = runsPerBlock * data->double_indirect.Length(); 1828 1829 CachedBlock cachedDirect(fVolume); 1830 1831 for (int32 indirectIndex = 0; indirectIndex < runsPerArray; 1832 indirectIndex++) { 1833 // get the indirect array block 1834 block_run* array = (block_run*)cached.SetTo( 1835 fVolume->ToBlock(data->double_indirect) 1836 + indirectIndex / runsPerBlock); 1837 if (array == NULL) 1838 return B_IO_ERROR; 1839 1840 block_run indirect = array[indirectIndex % runsPerBlock]; 1841 // are we finished yet? 1842 if (indirect.IsZero()) 1843 return B_OK; 1844 1845 status = CheckBlockRun(indirect, "double indirect->runs", control); 1846 if (status != B_OK) 1847 return status; 1848 1849 int32 maxIndex = (indirect.Length() << fVolume->BlockShift()) 1850 / sizeof(block_run); 1851 1852 for (int32 index = 0; index < maxIndex; ) { 1853 block_run* runs = (block_run*)cachedDirect.SetTo( 1854 fVolume->ToBlock(indirect) + index / runsPerBlock); 1855 if (runs == NULL) 1856 return B_IO_ERROR; 1857 1858 do { 1859 // are we finished yet? 1860 if (runs[index % runsPerBlock].IsZero()) 1861 return B_OK; 1862 1863 status = CheckBlockRun(runs[index % runsPerBlock], 1864 "double indirect->runs->run", control); 1865 if (status != B_OK) 1866 return status; 1867 } while ((++index % runsPerArray) != 0); 1868 } 1869 } 1870 } 1871 1872 return B_OK; 1873 } 1874 1875 1876 // #pragma mark - debugger commands 1877 1878 1879 #ifdef BFS_DEBUGGER_COMMANDS 1880 1881 void 1882 BlockAllocator::Dump(int32 index) 1883 { 1884 kprintf("allocation groups: %ld (base %p)\n", fNumGroups, fGroups); 1885 kprintf("blocks per group: %ld\n", fBlocksPerGroup); 1886 1887 for (int32 i = 0; i < fNumGroups; i++) { 1888 if (index != -1 && i != index) 1889 continue; 1890 1891 AllocationGroup& group = fGroups[i]; 1892 1893 kprintf("[%3ld] num bits: %lu (%p)\n", i, group.NumBits(), 1894 &group); 1895 kprintf(" num blocks: %lu\n", group.NumBlocks()); 1896 kprintf(" start: %ld\n", group.Start()); 1897 kprintf(" first free: %ld\n", group.fFirstFree); 1898 kprintf(" largest start: %ld%s\n", group.fLargestStart, 1899 group.fLargestValid ? "" : " (invalid)"); 1900 kprintf(" largest length: %ld\n", group.fLargestLength); 1901 kprintf(" free bits: %ld\n", group.fFreeBits); 1902 } 1903 } 1904 1905 1906 #if BFS_TRACING 1907 static char kTraceBuffer[256]; 1908 1909 1910 int 1911 dump_block_allocator_blocks(int argc, char** argv) 1912 { 1913 if (argc != 3 || !strcmp(argv[1], "--help")) { 1914 kprintf("usage: %s <ptr-to-volume> <block>\n", argv[0]); 1915 return 0; 1916 } 1917 1918 Volume* volume = (Volume*)parse_expression(argv[1]); 1919 off_t block = parse_expression(argv[2]); 1920 1921 // iterate over all tracing entries to find overlapping actions 1922 1923 using namespace BFSBlockTracing; 1924 1925 LazyTraceOutput out(kTraceBuffer, sizeof(kTraceBuffer), 0); 1926 TraceEntryIterator iterator; 1927 while (TraceEntry* _entry = iterator.Next()) { 1928 if (Allocate* entry = dynamic_cast<Allocate*>(_entry)) { 1929 off_t first = volume->ToBlock(entry->Run()); 1930 off_t last = first - 1 + entry->Run().Length(); 1931 if (block >= first && block <= last) { 1932 out.Clear(); 1933 const char* dump = out.DumpEntry(entry); 1934 kprintf("%5ld. %s\n", iterator.Index(), dump); 1935 } 1936 } else if (Free* entry = dynamic_cast<Free*>(_entry)) { 1937 off_t first = volume->ToBlock(entry->Run()); 1938 off_t last = first - 1 + entry->Run().Length(); 1939 if (block >= first && block <= last) { 1940 out.Clear(); 1941 const char* dump = out.DumpEntry(entry); 1942 kprintf("%5ld. %s\n", iterator.Index(), dump); 1943 } 1944 } 1945 } 1946 1947 return 0; 1948 } 1949 #endif 1950 1951 1952 int 1953 dump_block_allocator(int argc, char** argv) 1954 { 1955 int32 group = -1; 1956 if (argc == 3) { 1957 group = parse_expression(argv[2]); 1958 argc--; 1959 } 1960 1961 if (argc != 2 || !strcmp(argv[1], "--help")) { 1962 kprintf("usage: %s <ptr-to-volume> [group]\n", argv[0]); 1963 return 0; 1964 } 1965 1966 Volume* volume = (Volume*)parse_expression(argv[1]); 1967 BlockAllocator& allocator = volume->Allocator(); 1968 1969 allocator.Dump(group); 1970 return 0; 1971 } 1972 1973 #endif // BFS_DEBUGGER_COMMANDS 1974 1975